Merge refs/heads/drm-latest from master.kernel.org:/pub/scm/linux/kernel/git/airlied...

author Linus Torvalds <torvalds@g5.osdl.org>

Tue, 30 Aug 2005 14:45:15 +0000 (07:45 -0700)

committer Linus Torvalds <torvalds@g5.osdl.org>

Tue, 30 Aug 2005 14:45:15 +0000 (07:45 -0700)
author Linus Torvalds <torvalds@g5.osdl.org>
Tue, 30 Aug 2005 14:45:15 +0000 (07:45 -0700)
committer Linus Torvalds <torvalds@g5.osdl.org>
Tue, 30 Aug 2005 14:45:15 +0000 (07:45 -0700)
diff --git a/CREDITS b/CREDITS

index d97e62524ddcc46ae7e15a3577cf745a03844445..f553f8cfaa6266a54bc4081d448719d73abb5001 100644 (file)
--- a/CREDITS
+++ b/CREDITS
@@ -2380,8 +2380,8 @@ E: tmolina@cablespeed.com
  D: bug fixes, documentation, minor hackery
  
  N: James Morris
-E: jmorris@redhat.com
-W: http://www.intercode.com.au/jmorris/
+E: jmorris@namei.org
+W: http://namei.org/
  D: Netfilter, Linux Security Modules (LSM), SELinux, IPSec,
  D: Crypto API, general networking, miscellaneous.
  S: PO Box 707
@@ -2423,8 +2423,7 @@ S: Toronto, Ontario
  S: Canada
  
  N: Zwane Mwaikambo
-E: zwane@linuxpower.ca
-W: http://function.linuxpower.ca
+E: zwane@arm.linux.org.uk
  D: Various driver hacking
  D: Lowlevel x86 kernel hacking
  D: General debugging
diff --git a/Documentation/SubmittingPatches b/Documentation/SubmittingPatches

index 6761a7b241a5fafbe77c69d09e23b1dd56781f06..7f43b040311e526e3e3fdf36f1f3a6f7d98f0f18 100644 (file)
--- a/Documentation/SubmittingPatches
+++ b/Documentation/SubmittingPatches
@@ -149,6 +149,11 @@ USB, framebuffer devices, the VFS, the SCSI subsystem, etc.  See the
  MAINTAINERS file for a mailing list that relates specifically to
  your change.
  
+If changes affect userland-kernel interfaces, please send
+the MAN-PAGES maintainer (as listed in the MAINTAINERS file)
+a man-pages patch, or at least a notification of the change,
+so that some information makes its way into the manual pages.
+
  Even if the maintainer did not respond in step #4, make sure to ALWAYS
  copy the maintainer when you change their code.
  
diff --git a/Documentation/acpi-hotkey.txt b/Documentation/acpi-hotkey.txt

index 4c115a7bb8262a95080bbc36355ca77fcd3faa2e..0acdc80c30c2fab156a02238e6146afd6496425f 100644 (file)
--- a/Documentation/acpi-hotkey.txt
+++ b/Documentation/acpi-hotkey.txt
@@ -33,3 +33,6 @@ The result of the execution of this aml method is
  attached to /proc/acpi/hotkey/poll_method, which is dnyamically
  created.  Please use command "cat /proc/acpi/hotkey/polling_method" 
  to retrieve it.
+
+Note: Use cmdline "acpi_generic_hotkey" to over-ride
+loading any platform specific drivers.
diff --git a/Documentation/arm/Samsung-S3C24XX/USB-Host.txt b/Documentation/arm/Samsung-S3C24XX/USB-Host.txt

new file mode 100644 (file)

index 0000000..b93b68e
--- /dev/null
+++ b/Documentation/arm/Samsung-S3C24XX/USB-Host.txt
@@ -0,0 +1,93 @@
+                       S3C24XX USB Host support
+                       ========================
+
+
+
+Introduction
+------------
+
+  This document details the S3C2410/S3C2440 in-built OHCI USB host support.
+
+Configuration
+-------------
+
+  Enable at least the following kernel options:
+
+  menuconfig:
+
+   Device Drivers  --->
+     USB support  --->
+       <*> Support for Host-side USB
+       <*>   OHCI HCD support
+
+
+  .config:
+    CONFIG_USB
+    CONFIG_USB_OHCI_HCD
+
+
+  Once these options are configured, the standard set of USB device
+  drivers can be configured and used.
+
+
+Board Support
+-------------
+
+  The driver attaches to a platform device, which will need to be
+  added by the board specific support file in linux/arch/arm/mach-s3c2410,
+  such as mach-bast.c or mach-smdk2410.c
+
+  The platform device's platform_data field is only needed if the
+  board implements extra power control or over-current monitoring.
+
+  The OHCI driver does not ensure the state of the S3C2410's MISCCTRL
+  register, so if both ports are to be used for the host, then it is
+  the board support file's responsibility to ensure that the second
+  port is configured to be connected to the OHCI core.
+
+
+Platform Data
+-------------
+
+  See linux/include/asm-arm/arch-s3c2410/usb-control.h for the
+  descriptions of the platform device data. An implementation
+  can be found in linux/arch/arm/mach-s3c2410/usb-simtec.c .
+
+  The `struct s3c2410_hcd_info` contains a pair of functions
+  that get called to enable over-current detection, and to
+  control the port power status.
+
+  The ports are numbered 0 and 1.
+
+  power_control:
+
+    Called to enable or disable the power on the port.
+
+  enable_oc:
+
+    Called to enable or disable the over-current monitoring.
+    This should claim or release the resources being used to
+    check the power condition on the port, such as an IRQ.
+
+  report_oc:
+
+    The OHCI driver fills this field in for the over-current code
+    to call when there is a change to the over-current state on
+    an port. The ports argument is a bitmask of 1 bit per port,
+    with bit X being 1 for an over-current on port X.
+
+    The function s3c2410_usb_report_oc() has been provided to
+    ensure this is called correctly.
+
+  port[x]:
+
+    This is struct describes each port, 0 or 1. The platform driver
+    should set the flags field of each port to S3C_HCDFLG_USED if
+    the port is enabled.
+
+
+
+Document Author
+---------------
+
+Ben Dooks, (c) 2005 Simtec Electronics
diff --git a/Documentation/dontdiff b/Documentation/dontdiff

index b974cf595d0185627d53cf65d84e41d1585e5ac5..96bea278bbf61eb9ae6d6cb5657f8092b543a87e 100644 (file)
--- a/Documentation/dontdiff
+++ b/Documentation/dontdiff
@@ -104,6 +104,7 @@ logo_*.c
  logo_*_clut224.c
  logo_*_mono.c
  lxdialog
+mach-types
  mach-types.h
  make_times_h
  map
diff --git a/Documentation/feature-removal-schedule.txt b/Documentation/feature-removal-schedule.txt

index 8b1430b4665571d613dd1f85c6a4e4e22d387755..0665cb12bd6650f65f692ab726212deed76422c9 100644 (file)
--- a/Documentation/feature-removal-schedule.txt
+++ b/Documentation/feature-removal-schedule.txt
@@ -135,3 +135,15 @@ Why:       With the 16-bit PCMCIA subsystem now behaving (almost) like a
         pcmciautils package available at
         http://kernel.org/pub/linux/utils/kernel/pcmcia/
  Who:   Dominik Brodowski <linux@brodo.de>
+
+---------------------------
+
+What:  ip_queue and ip6_queue (old ipv4-only and ipv6-only netfilter queue)
+When:  December 2005
+Why:   This interface has been obsoleted by the new layer3-independent
+       "nfnetlink_queue".  The Kernel interface is compatible, so the old
+       ip[6]tables "QUEUE" targets still work and will transparently handle
+       all packets into nfnetlink queue number 0.  Userspace users will have
+       to link against API-compatible library on top of libnfnetlink_queue 
+       instead of the current 'libipq'.
+Who:   Harald Welte <laforge@netfilter.org>
diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt

index a998a8c2f95baee78ec3080a244a4ff8fa280fa6..3d5cd7a09b2fc1aa56b6c197ee8d35df7116ec4d 100644 (file)
--- a/Documentation/kernel-parameters.txt
+++ b/Documentation/kernel-parameters.txt
@@ -159,6 +159,11 @@ running once the system is up.
  
         acpi_fake_ecdt  [HW,ACPI] Workaround failure due to BIOS lacking ECDT
  
+       acpi_generic_hotkey [HW,ACPI]
+                       Allow consolidated generic hotkey driver to
+                       over-ride platform specific driver.
+                       See also Documentation/acpi-hotkey.txt.
+
         ad1816=         [HW,OSS]
                         Format: <io>,<irq>,<dma>,<dma2>
                         See also Documentation/sound/oss/AD1816.
diff --git a/Documentation/kprobes.txt b/Documentation/kprobes.txt

new file mode 100644 (file)

index 0000000..0541fe1
--- /dev/null
+++ b/Documentation/kprobes.txt
@@ -0,0 +1,588 @@
+Title  : Kernel Probes (Kprobes)
+Authors        : Jim Keniston <jkenisto@us.ibm.com>
+       : Prasanna S Panchamukhi <prasanna@in.ibm.com>
+
+CONTENTS
+
+1. Concepts: Kprobes, Jprobes, Return Probes
+2. Architectures Supported
+3. Configuring Kprobes
+4. API Reference
+5. Kprobes Features and Limitations
+6. Probe Overhead
+7. TODO
+8. Kprobes Example
+9. Jprobes Example
+10. Kretprobes Example
+
+1. Concepts: Kprobes, Jprobes, Return Probes
+
+Kprobes enables you to dynamically break into any kernel routine and
+collect debugging and performance information non-disruptively. You
+can trap at almost any kernel code address, specifying a handler
+routine to be invoked when the breakpoint is hit.
+
+There are currently three types of probes: kprobes, jprobes, and
+kretprobes (also called return probes).  A kprobe can be inserted
+on virtually any instruction in the kernel.  A jprobe is inserted at
+the entry to a kernel function, and provides convenient access to the
+function's arguments.  A return probe fires when a specified function
+returns.
+
+In the typical case, Kprobes-based instrumentation is packaged as
+a kernel module.  The module's init function installs ("registers")
+one or more probes, and the exit function unregisters them.  A
+registration function such as register_kprobe() specifies where
+the probe is to be inserted and what handler is to be called when
+the probe is hit.
+
+The next three subsections explain how the different types of
+probes work.  They explain certain things that you'll need to
+know in order to make the best use of Kprobes -- e.g., the
+difference between a pre_handler and a post_handler, and how
+to use the maxactive and nmissed fields of a kretprobe.  But
+if you're in a hurry to start using Kprobes, you can skip ahead
+to section 2.
+
+1.1 How Does a Kprobe Work?
+
+When a kprobe is registered, Kprobes makes a copy of the probed
+instruction and replaces the first byte(s) of the probed instruction
+with a breakpoint instruction (e.g., int3 on i386 and x86_64).
+
+When a CPU hits the breakpoint instruction, a trap occurs, the CPU's
+registers are saved, and control passes to Kprobes via the
+notifier_call_chain mechanism.  Kprobes executes the "pre_handler"
+associated with the kprobe, passing the handler the addresses of the
+kprobe struct and the saved registers.
+
+Next, Kprobes single-steps its copy of the probed instruction.
+(It would be simpler to single-step the actual instruction in place,
+but then Kprobes would have to temporarily remove the breakpoint
+instruction.  This would open a small time window when another CPU
+could sail right past the probepoint.)
+
+After the instruction is single-stepped, Kprobes executes the
+"post_handler," if any, that is associated with the kprobe.
+Execution then continues with the instruction following the probepoint.
+
+1.2 How Does a Jprobe Work?
+
+A jprobe is implemented using a kprobe that is placed on a function's
+entry point.  It employs a simple mirroring principle to allow
+seamless access to the probed function's arguments.  The jprobe
+handler routine should have the same signature (arg list and return
+type) as the function being probed, and must always end by calling
+the Kprobes function jprobe_return().
+
+Here's how it works.  When the probe is hit, Kprobes makes a copy of
+the saved registers and a generous portion of the stack (see below).
+Kprobes then points the saved instruction pointer at the jprobe's
+handler routine, and returns from the trap.  As a result, control
+passes to the handler, which is presented with the same register and
+stack contents as the probed function.  When it is done, the handler
+calls jprobe_return(), which traps again to restore the original stack
+contents and processor state and switch to the probed function.
+
+By convention, the callee owns its arguments, so gcc may produce code
+that unexpectedly modifies that portion of the stack.  This is why
+Kprobes saves a copy of the stack and restores it after the jprobe
+handler has run.  Up to MAX_STACK_SIZE bytes are copied -- e.g.,
+64 bytes on i386.
+
+Note that the probed function's args may be passed on the stack
+or in registers (e.g., for x86_64 or for an i386 fastcall function).
+The jprobe will work in either case, so long as the handler's
+prototype matches that of the probed function.
+
+1.3 How Does a Return Probe Work?
+
+When you call register_kretprobe(), Kprobes establishes a kprobe at
+the entry to the function.  When the probed function is called and this
+probe is hit, Kprobes saves a copy of the return address, and replaces
+the return address with the address of a "trampoline."  The trampoline
+is an arbitrary piece of code -- typically just a nop instruction.
+At boot time, Kprobes registers a kprobe at the trampoline.
+
+When the probed function executes its return instruction, control
+passes to the trampoline and that probe is hit.  Kprobes' trampoline
+handler calls the user-specified handler associated with the kretprobe,
+then sets the saved instruction pointer to the saved return address,
+and that's where execution resumes upon return from the trap.
+
+While the probed function is executing, its return address is
+stored in an object of type kretprobe_instance.  Before calling
+register_kretprobe(), the user sets the maxactive field of the
+kretprobe struct to specify how many instances of the specified
+function can be probed simultaneously.  register_kretprobe()
+pre-allocates the indicated number of kretprobe_instance objects.
+
+For example, if the function is non-recursive and is called with a
+spinlock held, maxactive = 1 should be enough.  If the function is
+non-recursive and can never relinquish the CPU (e.g., via a semaphore
+or preemption), NR_CPUS should be enough.  If maxactive <= 0, it is
+set to a default value.  If CONFIG_PREEMPT is enabled, the default
+is max(10, 2*NR_CPUS).  Otherwise, the default is NR_CPUS.
+
+It's not a disaster if you set maxactive too low; you'll just miss
+some probes.  In the kretprobe struct, the nmissed field is set to
+zero when the return probe is registered, and is incremented every
+time the probed function is entered but there is no kretprobe_instance
+object available for establishing the return probe.
+
+2. Architectures Supported
+
+Kprobes, jprobes, and return probes are implemented on the following
+architectures:
+
+- i386
+- x86_64 (AMD-64, E64MT)
+- ppc64
+- ia64 (Support for probes on certain instruction types is still in progress.)
+- sparc64 (Return probes not yet implemented.)
+
+3. Configuring Kprobes
+
+When configuring the kernel using make menuconfig/xconfig/oldconfig,
+ensure that CONFIG_KPROBES is set to "y".  Under "Kernel hacking",
+look for "Kprobes".  You may have to enable "Kernel debugging"
+(CONFIG_DEBUG_KERNEL) before you can enable Kprobes.
+
+You may also want to ensure that CONFIG_KALLSYMS and perhaps even
+CONFIG_KALLSYMS_ALL are set to "y", since kallsyms_lookup_name()
+is a handy, version-independent way to find a function's address.
+
+If you need to insert a probe in the middle of a function, you may find
+it useful to "Compile the kernel with debug info" (CONFIG_DEBUG_INFO),
+so you can use "objdump -d -l vmlinux" to see the source-to-object
+code mapping.
+
+4. API Reference
+
+The Kprobes API includes a "register" function and an "unregister"
+function for each type of probe.  Here are terse, mini-man-page
+specifications for these functions and the associated probe handlers
+that you'll write.  See the latter half of this document for examples.
+
+4.1 register_kprobe
+
+#include <linux/kprobes.h>
+int register_kprobe(struct kprobe *kp);
+
+Sets a breakpoint at the address kp->addr.  When the breakpoint is
+hit, Kprobes calls kp->pre_handler.  After the probed instruction
+is single-stepped, Kprobe calls kp->post_handler.  If a fault
+occurs during execution of kp->pre_handler or kp->post_handler,
+or during single-stepping of the probed instruction, Kprobes calls
+kp->fault_handler.  Any or all handlers can be NULL.
+
+register_kprobe() returns 0 on success, or a negative errno otherwise.
+
+User's pre-handler (kp->pre_handler):
+#include <linux/kprobes.h>
+#include <linux/ptrace.h>
+int pre_handler(struct kprobe *p, struct pt_regs *regs);
+
+Called with p pointing to the kprobe associated with the breakpoint,
+and regs pointing to the struct containing the registers saved when
+the breakpoint was hit.  Return 0 here unless you're a Kprobes geek.
+
+User's post-handler (kp->post_handler):
+#include <linux/kprobes.h>
+#include <linux/ptrace.h>
+void post_handler(struct kprobe *p, struct pt_regs *regs,
+       unsigned long flags);
+
+p and regs are as described for the pre_handler.  flags always seems
+to be zero.
+
+User's fault-handler (kp->fault_handler):
+#include <linux/kprobes.h>
+#include <linux/ptrace.h>
+int fault_handler(struct kprobe *p, struct pt_regs *regs, int trapnr);
+
+p and regs are as described for the pre_handler.  trapnr is the
+architecture-specific trap number associated with the fault (e.g.,
+on i386, 13 for a general protection fault or 14 for a page fault).
+Returns 1 if it successfully handled the exception.
+
+4.2 register_jprobe
+
+#include <linux/kprobes.h>
+int register_jprobe(struct jprobe *jp)
+
+Sets a breakpoint at the address jp->kp.addr, which must be the address
+of the first instruction of a function.  When the breakpoint is hit,
+Kprobes runs the handler whose address is jp->entry.
+
+The handler should have the same arg list and return type as the probed
+function; and just before it returns, it must call jprobe_return().
+(The handler never actually returns, since jprobe_return() returns
+control to Kprobes.)  If the probed function is declared asmlinkage,
+fastcall, or anything else that affects how args are passed, the
+handler's declaration must match.
+
+register_jprobe() returns 0 on success, or a negative errno otherwise.
+
+4.3 register_kretprobe
+
+#include <linux/kprobes.h>
+int register_kretprobe(struct kretprobe *rp);
+
+Establishes a return probe for the function whose address is
+rp->kp.addr.  When that function returns, Kprobes calls rp->handler.
+You must set rp->maxactive appropriately before you call
+register_kretprobe(); see "How Does a Return Probe Work?" for details.
+
+register_kretprobe() returns 0 on success, or a negative errno
+otherwise.
+
+User's return-probe handler (rp->handler):
+#include <linux/kprobes.h>
+#include <linux/ptrace.h>
+int kretprobe_handler(struct kretprobe_instance *ri, struct pt_regs *regs);
+
+regs is as described for kprobe.pre_handler.  ri points to the
+kretprobe_instance object, of which the following fields may be
+of interest:
+- ret_addr: the return address
+- rp: points to the corresponding kretprobe object
+- task: points to the corresponding task struct
+The handler's return value is currently ignored.
+
+4.4 unregister_*probe
+
+#include <linux/kprobes.h>
+void unregister_kprobe(struct kprobe *kp);
+void unregister_jprobe(struct jprobe *jp);
+void unregister_kretprobe(struct kretprobe *rp);
+
+Removes the specified probe.  The unregister function can be called
+at any time after the probe has been registered.
+
+5. Kprobes Features and Limitations
+
+As of Linux v2.6.12, Kprobes allows multiple probes at the same
+address.  Currently, however, there cannot be multiple jprobes on
+the same function at the same time.
+
+In general, you can install a probe anywhere in the kernel.
+In particular, you can probe interrupt handlers.  Known exceptions
+are discussed in this section.
+
+For obvious reasons, it's a bad idea to install a probe in
+the code that implements Kprobes (mostly kernel/kprobes.c and
+arch/*/kernel/kprobes.c).  A patch in the v2.6.13 timeframe instructs
+Kprobes to reject such requests.
+
+If you install a probe in an inline-able function, Kprobes makes
+no attempt to chase down all inline instances of the function and
+install probes there.  gcc may inline a function without being asked,
+so keep this in mind if you're not seeing the probe hits you expect.
+
+A probe handler can modify the environment of the probed function
+-- e.g., by modifying kernel data structures, or by modifying the
+contents of the pt_regs struct (which are restored to the registers
+upon return from the breakpoint).  So Kprobes can be used, for example,
+to install a bug fix or to inject faults for testing.  Kprobes, of
+course, has no way to distinguish the deliberately injected faults
+from the accidental ones.  Don't drink and probe.
+
+Kprobes makes no attempt to prevent probe handlers from stepping on
+each other -- e.g., probing printk() and then calling printk() from a
+probe handler.  As of Linux v2.6.12, if a probe handler hits a probe,
+that second probe's handlers won't be run in that instance.
+
+In Linux v2.6.12 and previous versions, Kprobes' data structures are
+protected by a single lock that is held during probe registration and
+unregistration and while handlers are run.  Thus, no two handlers
+can run simultaneously.  To improve scalability on SMP systems,
+this restriction will probably be removed soon, in which case
+multiple handlers (or multiple instances of the same handler) may
+run concurrently on different CPUs.  Code your handlers accordingly.
+
+Kprobes does not use semaphores or allocate memory except during
+registration and unregistration.
+
+Probe handlers are run with preemption disabled.  Depending on the
+architecture, handlers may also run with interrupts disabled.  In any
+case, your handler should not yield the CPU (e.g., by attempting to
+acquire a semaphore).
+
+Since a return probe is implemented by replacing the return
+address with the trampoline's address, stack backtraces and calls
+to __builtin_return_address() will typically yield the trampoline's
+address instead of the real return address for kretprobed functions.
+(As far as we can tell, __builtin_return_address() is used only
+for instrumentation and error reporting.)
+
+If the number of times a function is called does not match the
+number of times it returns, registering a return probe on that
+function may produce undesirable results.  We have the do_exit()
+and do_execve() cases covered.  do_fork() is not an issue.  We're
+unaware of other specific cases where this could be a problem.
+
+6. Probe Overhead
+
+On a typical CPU in use in 2005, a kprobe hit takes 0.5 to 1.0
+microseconds to process.  Specifically, a benchmark that hits the same
+probepoint repeatedly, firing a simple handler each time, reports 1-2
+million hits per second, depending on the architecture.  A jprobe or
+return-probe hit typically takes 50-75% longer than a kprobe hit.
+When you have a return probe set on a function, adding a kprobe at
+the entry to that function adds essentially no overhead.
+
+Here are sample overhead figures (in usec) for different architectures.
+k = kprobe; j = jprobe; r = return probe; kr = kprobe + return probe
+on same function; jr = jprobe + return probe on same function
+
+i386: Intel Pentium M, 1495 MHz, 2957.31 bogomips
+k = 0.57 usec; j = 1.00; r = 0.92; kr = 0.99; jr = 1.40
+
+x86_64: AMD Opteron 246, 1994 MHz, 3971.48 bogomips
+k = 0.49 usec; j = 0.76; r = 0.80; kr = 0.82; jr = 1.07
+
+ppc64: POWER5 (gr), 1656 MHz (SMT disabled, 1 virtual CPU per physical CPU)
+k = 0.77 usec; j = 1.31; r = 1.26; kr = 1.45; jr = 1.99
+
+7. TODO
+
+a. SystemTap (http://sourceware.org/systemtap): Work in progress
+to provide a simplified programming interface for probe-based
+instrumentation.
+b. Improved SMP scalability: Currently, work is in progress to handle
+multiple kprobes in parallel.
+c. Kernel return probes for sparc64.
+d. Support for other architectures.
+e. User-space probes.
+
+8. Kprobes Example
+
+Here's a sample kernel module showing the use of kprobes to dump a
+stack trace and selected i386 registers when do_fork() is called.
+----- cut here -----
+/*kprobe_example.c*/
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/kprobes.h>
+#include <linux/kallsyms.h>
+#include <linux/sched.h>
+
+/*For each probe you need to allocate a kprobe structure*/
+static struct kprobe kp;
+
+/*kprobe pre_handler: called just before the probed instruction is executed*/
+int handler_pre(struct kprobe *p, struct pt_regs *regs)
+{
+       printk("pre_handler: p->addr=0x%p, eip=%lx, eflags=0x%lx\n",
+               p->addr, regs->eip, regs->eflags);
+       dump_stack();
+       return 0;
+}
+
+/*kprobe post_handler: called after the probed instruction is executed*/
+void handler_post(struct kprobe *p, struct pt_regs *regs, unsigned long flags)
+{
+       printk("post_handler: p->addr=0x%p, eflags=0x%lx\n",
+               p->addr, regs->eflags);
+}
+
+/* fault_handler: this is called if an exception is generated for any
+ * instruction within the pre- or post-handler, or when Kprobes
+ * single-steps the probed instruction.
+ */
+int handler_fault(struct kprobe *p, struct pt_regs *regs, int trapnr)
+{
+       printk("fault_handler: p->addr=0x%p, trap #%dn",
+               p->addr, trapnr);
+       /* Return 0 because we don't handle the fault. */
+       return 0;
+}
+
+int init_module(void)
+{
+       int ret;
+       kp.pre_handler = handler_pre;
+       kp.post_handler = handler_post;
+       kp.fault_handler = handler_fault;
+       kp.addr = (kprobe_opcode_t*) kallsyms_lookup_name("do_fork");
+       /* register the kprobe now */
+       if (!kp.addr) {
+               printk("Couldn't find %s to plant kprobe\n", "do_fork");
+               return -1;
+       }
+       if ((ret = register_kprobe(&kp) < 0)) {
+               printk("register_kprobe failed, returned %d\n", ret);
+               return -1;
+       }
+       printk("kprobe registered\n");
+       return 0;
+}
+
+void cleanup_module(void)
+{
+       unregister_kprobe(&kp);
+       printk("kprobe unregistered\n");
+}
+
+MODULE_LICENSE("GPL");
+----- cut here -----
+
+You can build the kernel module, kprobe-example.ko, using the following
+Makefile:
+----- cut here -----
+obj-m := kprobe-example.o
+KDIR := /lib/modules/$(shell uname -r)/build
+PWD := $(shell pwd)
+default:
+       $(MAKE) -C $(KDIR) SUBDIRS=$(PWD) modules
+clean:
+       rm -f *.mod.c *.ko *.o
+----- cut here -----
+
+$ make
+$ su -
+...
+# insmod kprobe-example.ko
+
+You will see the trace data in /var/log/messages and on the console
+whenever do_fork() is invoked to create a new process.
+
+9. Jprobes Example
+
+Here's a sample kernel module showing the use of jprobes to dump
+the arguments of do_fork().
+----- cut here -----
+/*jprobe-example.c */
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/fs.h>
+#include <linux/uio.h>
+#include <linux/kprobes.h>
+#include <linux/kallsyms.h>
+
+/*
+ * Jumper probe for do_fork.
+ * Mirror principle enables access to arguments of the probed routine
+ * from the probe handler.
+ */
+
+/* Proxy routine having the same arguments as actual do_fork() routine */
+long jdo_fork(unsigned long clone_flags, unsigned long stack_start,
+             struct pt_regs *regs, unsigned long stack_size,
+             int __user * parent_tidptr, int __user * child_tidptr)
+{
+       printk("jprobe: clone_flags=0x%lx, stack_size=0x%lx, regs=0x%p\n",
+              clone_flags, stack_size, regs);
+       /* Always end with a call to jprobe_return(). */
+       jprobe_return();
+       /*NOTREACHED*/
+       return 0;
+}
+
+static struct jprobe my_jprobe = {
+       .entry = (kprobe_opcode_t *) jdo_fork
+};
+
+int init_module(void)
+{
+       int ret;
+       my_jprobe.kp.addr = (kprobe_opcode_t *) kallsyms_lookup_name("do_fork");
+       if (!my_jprobe.kp.addr) {
+               printk("Couldn't find %s to plant jprobe\n", "do_fork");
+               return -1;
+       }
+
+       if ((ret = register_jprobe(&my_jprobe)) <0) {
+               printk("register_jprobe failed, returned %d\n", ret);
+               return -1;
+       }
+       printk("Planted jprobe at %p, handler addr %p\n",
+              my_jprobe.kp.addr, my_jprobe.entry);
+       return 0;
+}
+
+void cleanup_module(void)
+{
+       unregister_jprobe(&my_jprobe);
+       printk("jprobe unregistered\n");
+}
+
+MODULE_LICENSE("GPL");
+----- cut here -----
+
+Build and insert the kernel module as shown in the above kprobe
+example.  You will see the trace data in /var/log/messages and on
+the console whenever do_fork() is invoked to create a new process.
+(Some messages may be suppressed if syslogd is configured to
+eliminate duplicate messages.)
+
+10. Kretprobes Example
+
+Here's a sample kernel module showing the use of return probes to
+report failed calls to sys_open().
+----- cut here -----
+/*kretprobe-example.c*/
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/kprobes.h>
+#include <linux/kallsyms.h>
+
+static const char *probed_func = "sys_open";
+
+/* Return-probe handler: If the probed function fails, log the return value. */
+static int ret_handler(struct kretprobe_instance *ri, struct pt_regs *regs)
+{
+       // Substitute the appropriate register name for your architecture --
+       // e.g., regs->rax for x86_64, regs->gpr[3] for ppc64.
+       int retval = (int) regs->eax;
+       if (retval < 0) {
+               printk("%s returns %d\n", probed_func, retval);
+       }
+       return 0;
+}
+
+static struct kretprobe my_kretprobe = {
+       .handler = ret_handler,
+       /* Probe up to 20 instances concurrently. */
+       .maxactive = 20
+};
+
+int init_module(void)
+{
+       int ret;
+       my_kretprobe.kp.addr =
+               (kprobe_opcode_t *) kallsyms_lookup_name(probed_func);
+       if (!my_kretprobe.kp.addr) {
+               printk("Couldn't find %s to plant return probe\n", probed_func);
+               return -1;
+       }
+       if ((ret = register_kretprobe(&my_kretprobe)) < 0) {
+               printk("register_kretprobe failed, returned %d\n", ret);
+               return -1;
+       }
+       printk("Planted return probe at %p\n", my_kretprobe.kp.addr);
+       return 0;
+}
+
+void cleanup_module(void)
+{
+       unregister_kretprobe(&my_kretprobe);
+       printk("kretprobe unregistered\n");
+       /* nmissed > 0 suggests that maxactive was set too low. */
+       printk("Missed probing %d instances of %s\n",
+               my_kretprobe.nmissed, probed_func);
+}
+
+MODULE_LICENSE("GPL");
+----- cut here -----
+
+Build and insert the kernel module as shown in the above kprobe
+example.  You will see the trace data in /var/log/messages and on the
+console whenever sys_open() returns a negative value.  (Some messages
+may be suppressed if syslogd is configured to eliminate duplicate
+messages.)
+
+For additional information on Kprobes, refer to the following URLs:
+http://www-106.ibm.com/developerworks/library/l-kprobes.html?ca=dgr-lnxw42Kprobe
+http://www.redhat.com/magazine/005mar05/features/kprobes/
diff --git a/Documentation/networking/bonding.txt b/Documentation/networking/bonding.txt

index 0bc2ed136a3836ea48f5478252c953646b3d4ade..24d029455baadabc3acc398e3970ff8052e3ab1d 100644 (file)
--- a/Documentation/networking/bonding.txt
+++ b/Documentation/networking/bonding.txt
@@ -1,5 +1,7 @@
  
-                   Linux Ethernet Bonding Driver HOWTO
+               Linux Ethernet Bonding Driver HOWTO
+
+               Latest update: 21 June 2005
  
  Initial release : Thomas Davis <tadavis at lbl.gov>
  Corrections, HA extensions : 2000/10/03-15 :
@@ -11,15 +13,22 @@ Corrections, HA extensions : 2000/10/03-15 :
  
  Reorganized and updated Feb 2005 by Jay Vosburgh
  
-Note :
-------
+Introduction
+============
+
+       The Linux bonding driver provides a method for aggregating
+multiple network interfaces into a single logical "bonded" interface.
+The behavior of the bonded interfaces depends upon the mode; generally
+speaking, modes provide either hot standby or load balancing services.
+Additionally, link integrity monitoring may be performed.
         
-The bonding driver originally came from Donald Becker's beowulf patches for
-kernel 2.0. It has changed quite a bit since, and the original tools from
-extreme-linux and beowulf sites will not work with this version of the driver.
+       The bonding driver originally came from Donald Becker's
+beowulf patches for kernel 2.0. It has changed quite a bit since, and
+the original tools from extreme-linux and beowulf sites will not work
+with this version of the driver.
  
-For new versions of the driver, patches for older kernels and the updated
-userspace tools, please follow the links at the end of this file.
+       For new versions of the driver, updated userspace tools, and
+who to ask for help, please follow the links at the end of this file.
  
  Table of Contents
  =================
@@ -30,9 +39,13 @@ Table of Contents
  
  3. Configuring Bonding Devices
  3.1    Configuration with sysconfig support
+3.1.1          Using DHCP with sysconfig
+3.1.2          Configuring Multiple Bonds with sysconfig
  3.2    Configuration with initscripts support
+3.2.1          Using DHCP with initscripts
+3.2.2          Configuring Multiple Bonds with initscripts
  3.3    Configuring Bonding Manually
-3.4    Configuring Multiple Bonds
+3.3.1          Configuring Multiple Bonds Manually
  
  5. Querying Bonding Configuration
  5.1    Bonding Configuration
@@ -56,21 +69,30 @@ Table of Contents
  
  11. Promiscuous mode
  
-12. High Availability Information
+12. Configuring Bonding for High Availability
  12.1   High Availability in a Single Switch Topology
-12.1.1         Bonding Mode Selection for Single Switch Topology
-12.1.2         Link Monitoring for Single Switch Topology
  12.2   High Availability in a Multiple Switch Topology
-12.2.1         Bonding Mode Selection for Multiple Switch Topology
-12.2.2         Link Monitoring for Multiple Switch Topology
-12.3   Switch Behavior Issues for High Availability
+12.2.1         HA Bonding Mode Selection for Multiple Switch Topology
+12.2.2         HA Link Monitoring for Multiple Switch Topology
+
+13. Configuring Bonding for Maximum Throughput
+13.1   Maximum Throughput in a Single Switch Topology
+13.1.1         MT Bonding Mode Selection for Single Switch Topology
+13.1.2         MT Link Monitoring for Single Switch Topology
+13.2   Maximum Throughput in a Multiple Switch Topology
+13.2.1         MT Bonding Mode Selection for Multiple Switch Topology
+13.2.2         MT Link Monitoring for Multiple Switch Topology
  
-13. Hardware Specific Considerations
-13.1   IBM BladeCenter
+14. Switch Behavior Issues
+14.1   Link Establishment and Failover Delays
+14.2   Duplicated Incoming Packets
  
-14. Frequently Asked Questions
+15. Hardware Specific Considerations
+15.1   IBM BladeCenter
  
-15. Resources and Links
+16. Frequently Asked Questions
+
+17. Resources and Links
  
  
  1. Bonding Driver Installation
@@ -86,16 +108,10 @@ the following steps:
  1.1 Configure and build the kernel with bonding
  -----------------------------------------------
  
-       The latest version of the bonding driver is available in the
+       The current version of the bonding driver is available in the
  drivers/net/bonding subdirectory of the most recent kernel source
-(which is available on http://kernel.org).
-
-       Prior to the 2.4.11 kernel, the bonding driver was maintained
-largely outside the kernel tree; patches for some earlier kernels are
-available on the bonding sourceforge site, although those patches are
-still several years out of date.  Most users will want to use either
-the most recent kernel from kernel.org or whatever kernel came with
-their distro.
+(which is available on http://kernel.org).  Most users "rolling their
+own" will want to use the most recent kernel from kernel.org.
  
         Configure kernel with "make menuconfig" (or "make xconfig" or
  "make config"), then select "Bonding driver support" in the "Network
@@ -103,8 +119,8 @@ device support" section.  It is recommended that you configure the
  driver as module since it is currently the only way to pass parameters
  to the driver or configure more than one bonding device.
  
-       Build and install the new kernel and modules, then proceed to
-step 2.
+       Build and install the new kernel and modules, then continue
+below to install ifenslave.
  
  1.2 Install ifenslave Control Utility
  -------------------------------------
@@ -147,9 +163,9 @@ default kernel source include directory.
         Options for the bonding driver are supplied as parameters to
  the bonding module at load time.  They may be given as command line
  arguments to the insmod or modprobe command, but are usually specified
-in either the /etc/modprobe.conf configuration file, or in a
-distro-specific configuration file (some of which are detailed in the
-next section).
+in either the /etc/modules.conf or /etc/modprobe.conf configuration
+file, or in a distro-specific configuration file (some of which are
+detailed in the next section).
  
         The available bonding driver parameters are listed below. If a
  parameter is not specified the default value is used.  When initially
@@ -162,34 +178,34 @@ degradation will occur during link failures.  Very few devices do not
  support at least miimon, so there is really no reason not to use it.
  
         Options with textual values will accept either the text name
-       or, for backwards compatibility, the option value.  E.g.,
-       "mode=802.3ad" and "mode=4" set the same mode.
+or, for backwards compatibility, the option value.  E.g.,
+"mode=802.3ad" and "mode=4" set the same mode.
  
         The parameters are as follows:
  
  arp_interval
  
-       Specifies the ARP monitoring frequency in milli-seconds. If
-       ARP monitoring is used in a load-balancing mode (mode 0 or 2),
-       the switch should be configured in a mode that evenly
-       distributes packets across all links - such as round-robin. If
-       the switch is configured to distribute the packets in an XOR
+       Specifies the ARP link monitoring frequency in milliseconds.
+       If ARP monitoring is used in an etherchannel compatible mode
+       (modes 0 and 2), the switch should be configured in a mode
+       that evenly distributes packets across all links. If the
+       switch is configured to distribute the packets in an XOR
         fashion, all replies from the ARP targets will be received on
         the same link which could cause the other team members to
-       fail. ARP monitoring should not be used in conjunction with
-       miimon. A value of 0 disables ARP monitoring. The default
+       fail.  ARP monitoring should not be used in conjunction with
+       miimon.  A value of 0 disables ARP monitoring.  The default
         value is 0.
  
  arp_ip_target
  
-       Specifies the ip addresses to use when arp_interval is > 0.
-       These are the targets of the ARP request sent to determine the
-       health of the link to the targets.  Specify these values in
-       ddd.ddd.ddd.ddd format.  Multiple ip adresses must be
-       seperated by a comma.  At least one IP address must be given
-       for ARP monitoring to function.  The maximum number of targets
-       that can be specified is 16.  The default value is no IP
-       addresses.
+       Specifies the IP addresses to use as ARP monitoring peers when
+       arp_interval is > 0.  These are the targets of the ARP request
+       sent to determine the health of the link to the targets.
+       Specify these values in ddd.ddd.ddd.ddd format.  Multiple IP
+       addresses must be separated by a comma.  At least one IP
+       address must be given for ARP monitoring to function.  The
+       maximum number of targets that can be specified is 16.  The
+       default value is no IP addresses.
  
  downdelay
  
@@ -207,11 +223,13 @@ lacp_rate
         are:
  
         slow or 0
-               Request partner to transmit LACPDUs every 30 seconds (default)
+               Request partner to transmit LACPDUs every 30 seconds
  
         fast or 1
                 Request partner to transmit LACPDUs every 1 second
  
+       The default is slow.
+
  max_bonds
  
         Specifies the number of bonding devices to create for this
@@ -221,10 +239,11 @@ max_bonds
  
  miimon
  
-       Specifies the frequency in milli-seconds that MII link
-       monitoring will occur.  A value of zero disables MII link
-       monitoring.  A value of 100 is a good starting point.  The
-       use_carrier option, below, affects how the link state is
+       Specifies the MII link monitoring frequency in milliseconds.
+       This determines how often the link state of each slave is
+       inspected for link failures.  A value of zero disables MII
+       link monitoring.  A value of 100 is a good starting point.
+       The use_carrier option, below, affects how the link state is
         determined.  See the High Availability section for additional
         information.  The default value is 0.
  
@@ -246,17 +265,31 @@ mode
                 active.  A different slave becomes active if, and only
                 if, the active slave fails.  The bond's MAC address is
                 externally visible on only one port (network adapter)
-               to avoid confusing the switch.  This mode provides
-               fault tolerance.  The primary option affects the
-               behavior of this mode.
+               to avoid confusing the switch.
+
+               In bonding version 2.6.2 or later, when a failover
+               occurs in active-backup mode, bonding will issue one
+               or more gratuitous ARPs on the newly active slave.
+               One gratutious ARP is issued for the bonding master
+               interface and each VLAN interfaces configured above
+               it, provided that the interface has at least one IP
+               address configured.  Gratuitous ARPs issued for VLAN
+               interfaces are tagged with the appropriate VLAN id.
+
+               This mode provides fault tolerance.  The primary
+               option, documented below, affects the behavior of this
+               mode.
  
         balance-xor or 2
  
-               XOR policy: Transmit based on [(source MAC address
-               XOR'd with destination MAC address) modulo slave
-               count].  This selects the same slave for each
-               destination MAC address.  This mode provides load
-               balancing and fault tolerance.
+               XOR policy: Transmit based on the selected transmit
+               hash policy.  The default policy is a simple [(source
+               MAC address XOR'd with destination MAC address) modulo
+               slave count].  Alternate transmit policies may be
+               selected via the xmit_hash_policy option, described
+               below.
+
+               This mode provides load balancing and fault tolerance.
  
         broadcast or 3
  
@@ -270,7 +303,17 @@ mode
                 duplex settings.  Utilizes all slaves in the active
                 aggregator according to the 802.3ad specification.
  
-               Pre-requisites:
+               Slave selection for outgoing traffic is done according
+               to the transmit hash policy, which may be changed from
+               the default simple XOR policy via the xmit_hash_policy
+               option, documented below.  Note that not all transmit
+               policies may be 802.3ad compliant, particularly in
+               regards to the packet mis-ordering requirements of
+               section 43.2.4 of the 802.3ad standard.  Differing
+               peer implementations will have varying tolerances for
+               noncompliance.
+
+               Prerequisites:
  
                 1. Ethtool support in the base drivers for retrieving
                 the speed and duplex of each slave.
@@ -333,7 +376,7 @@ mode
  
                 When a link is reconnected or a new slave joins the
                 bond the receive traffic is redistributed among all
-               active slaves in the bond by intiating ARP Replies
+               active slaves in the bond by initiating ARP Replies
                 with the selected mac address to each of the
                 clients. The updelay parameter (detailed below) must
                 be set to a value equal or greater than the switch's
@@ -396,6 +439,60 @@ use_carrier
         0 will use the deprecated MII / ETHTOOL ioctls.  The default
         value is 1.
  
+xmit_hash_policy
+
+       Selects the transmit hash policy to use for slave selection in
+       balance-xor and 802.3ad modes.  Possible values are:
+
+       layer2
+
+               Uses XOR of hardware MAC addresses to generate the
+               hash.  The formula is
+
+               (source MAC XOR destination MAC) modulo slave count
+
+               This algorithm will place all traffic to a particular
+               network peer on the same slave.
+
+               This algorithm is 802.3ad compliant.
+
+       layer3+4
+
+               This policy uses upper layer protocol information,
+               when available, to generate the hash.  This allows for
+               traffic to a particular network peer to span multiple
+               slaves, although a single connection will not span
+               multiple slaves.
+
+               The formula for unfragmented TCP and UDP packets is
+
+               ((source port XOR dest port) XOR
+                        ((source IP XOR dest IP) AND 0xffff)
+                               modulo slave count
+
+               For fragmented TCP or UDP packets and all other IP
+               protocol traffic, the source and destination port
+               information is omitted.  For non-IP traffic, the
+               formula is the same as for the layer2 transmit hash
+               policy.
+
+               This policy is intended to mimic the behavior of
+               certain switches, notably Cisco switches with PFC2 as
+               well as some Foundry and IBM products.
+
+               This algorithm is not fully 802.3ad compliant.  A
+               single TCP or UDP conversation containing both
+               fragmented and unfragmented packets will see packets
+               striped across two interfaces.  This may result in out
+               of order delivery.  Most traffic types will not meet
+               this criteria, as TCP rarely fragments traffic, and
+               most UDP traffic is not involved in extended
+               conversations.  Other implementations of 802.3ad may
+               or may not tolerate this noncompliance.
+
+       The default value is layer2.  This option was added in bonding
+version 2.6.3.  In earlier versions of bonding, this parameter does
+not exist, and the layer2 policy is the only policy.
  
  
  3. Configuring Bonding Devices
@@ -448,8 +545,9 @@ Bonding devices can be managed by hand, however, as follows.
  slave devices.  On SLES 9, this is most easily done by running the
  yast2 sysconfig configuration utility.  The goal is for to create an
  ifcfg-id file for each slave device.  The simplest way to accomplish
-this is to configure the devices for DHCP.  The name of the
-configuration file for each device will be of the form:
+this is to configure the devices for DHCP (this is only to get the
+file ifcfg-id file created; see below for some issues with DHCP).  The
+name of the configuration file for each device will be of the form:
  
  ifcfg-id-xx:xx:xx:xx:xx:xx
  
@@ -459,7 +557,7 @@ the device's permanent MAC address.
         Once the set of ifcfg-id-xx:xx:xx:xx:xx:xx files has been
  created, it is necessary to edit the configuration files for the slave
  devices (the MAC addresses correspond to those of the slave devices).
-Before editing, the file will contain muliple lines, and will look
+Before editing, the file will contain multiple lines, and will look
  something like this:
  
  BOOTPROTO='dhcp'
@@ -496,16 +594,11 @@ STARTMODE="onboot"
  BONDING_MASTER="yes"
  BONDING_MODULE_OPTS="mode=active-backup miimon=100"
  BONDING_SLAVE0="eth0"
-BONDING_SLAVE1="eth1"
+BONDING_SLAVE1="bus-pci-0000:06:08.1"
  
         Replace the sample BROADCAST, IPADDR, NETMASK and NETWORK
  values with the appropriate values for your network.
  
-       Note that configuring the bonding device with BOOTPROTO='dhcp'
-does not work; the scripts attempt to obtain the device address from
-DHCP prior to adding any of the slave devices.  Without active slaves,
-the DHCP requests are not sent to the network.
-
         The STARTMODE specifies when the device is brought online.
  The possible values are:
  
@@ -531,9 +624,17 @@ for the bonding mode, link monitoring, and so on here.  Do not include
  the max_bonds bonding parameter; this will confuse the configuration
  system if you have multiple bonding devices.
  
-       Finally, supply one BONDING_SLAVEn="ethX" for each slave,
-where "n" is an increasing value, one for each slave, and "ethX" is
-the name of the slave device (eth0, eth1, etc).
+       Finally, supply one BONDING_SLAVEn="slave device" for each
+slave.  where "n" is an increasing value, one for each slave.  The
+"slave device" is either an interface name, e.g., "eth0", or a device
+specifier for the network device.  The interface name is easier to
+find, but the ethN names are subject to change at boot time if, e.g.,
+a device early in the sequence has failed.  The device specifiers
+(bus-pci-0000:06:08.1 in the example above) specify the physical
+network device, and will not change unless the device's bus location
+changes (for example, it is moved from one PCI slot to another).  The
+example above uses one of each type for demonstration purposes; most
+configurations will choose one or the other for all slave devices.
  
         When all configuration files have been modified or created,
  networking must be restarted for the configuration changes to take
@@ -544,7 +645,7 @@ effect.  This can be accomplished via the following:
         Note that the network control script (/sbin/ifdown) will
  remove the bonding module as part of the network shutdown processing,
  so it is not necessary to remove the module by hand if, e.g., the
-module paramters have changed.
+module parameters have changed.
  
         Also, at this writing, YaST/YaST2 will not manage bonding
  devices (they do not show bonding interfaces on its list of network
@@ -559,12 +660,37 @@ format can be found in an example ifcfg template file:
         Note that the template does not document the various BONDING_
  settings described above, but does describe many of the other options.
  
+3.1.1 Using DHCP with sysconfig
+-------------------------------
+
+       Under sysconfig, configuring a device with BOOTPROTO='dhcp'
+will cause it to query DHCP for its IP address information.  At this
+writing, this does not function for bonding devices; the scripts
+attempt to obtain the device address from DHCP prior to adding any of
+the slave devices.  Without active slaves, the DHCP requests are not
+sent to the network.
+
+3.1.2 Configuring Multiple Bonds with sysconfig
+-----------------------------------------------
+
+       The sysconfig network initialization system is capable of
+handling multiple bonding devices.  All that is necessary is for each
+bonding instance to have an appropriately configured ifcfg-bondX file
+(as described above).  Do not specify the "max_bonds" parameter to any
+instance of bonding, as this will confuse sysconfig.  If you require
+multiple bonding devices with identical parameters, create multiple
+ifcfg-bondX files.
+
+       Because the sysconfig scripts supply the bonding module
+options in the ifcfg-bondX file, it is not necessary to add them to
+the system /etc/modules.conf or /etc/modprobe.conf configuration file.
+
  3.2 Configuration with initscripts support
  ------------------------------------------
  
         This section applies to distros using a version of initscripts
  with bonding support, for example, Red Hat Linux 9 or Red Hat
-Enterprise Linux version 3.  On these systems, the network
+Enterprise Linux version 3 or 4.  On these systems, the network
  initialization scripts have some knowledge of bonding, and can be
  configured to control bonding devices.
  
@@ -614,10 +740,11 @@ USERCTL=no
         Be sure to change the networking specific lines (IPADDR,
  NETMASK, NETWORK and BROADCAST) to match your network configuration.
  
-       Finally, it is necessary to edit /etc/modules.conf to load the
-bonding module when the bond0 interface is brought up.  The following
-sample lines in /etc/modules.conf will load the bonding module, and
-select its options:
+       Finally, it is necessary to edit /etc/modules.conf (or
+/etc/modprobe.conf, depending upon your distro) to load the bonding
+module with your desired options when the bond0 interface is brought
+up.  The following lines in /etc/modules.conf (or modprobe.conf) will
+load the bonding module, and select its options:
  
  alias bond0 bonding
  options bond0 mode=balance-alb miimon=100
@@ -629,6 +756,33 @@ options for your configuration.
  will restart the networking subsystem and your bond link should be now
  up and running.
  
+3.2.1 Using DHCP with initscripts
+---------------------------------
+
+       Recent versions of initscripts (the version supplied with
+Fedora Core 3 and Red Hat Enterprise Linux 4 is reported to work) do
+have support for assigning IP information to bonding devices via DHCP.
+
+       To configure bonding for DHCP, configure it as described
+above, except replace the line "BOOTPROTO=none" with "BOOTPROTO=dhcp"
+and add a line consisting of "TYPE=Bonding".  Note that the TYPE value
+is case sensitive.
+
+3.2.2 Configuring Multiple Bonds with initscripts
+-------------------------------------------------
+
+       At this writing, the initscripts package does not directly
+support loading the bonding driver multiple times, so the process for
+doing so is the same as described in the "Configuring Multiple Bonds
+Manually" section, below.
+
+       NOTE: It has been observed that some Red Hat supplied kernels
+are apparently unable to rename modules at load time (the "-obonding1"
+part).  Attempts to pass that option to modprobe will produce an
+"Operation not permitted" error.  This has been reported on some
+Fedora Core kernels, and has been seen on RHEL 4 as well.  On kernels
+exhibiting this problem, it will be impossible to configure multiple
+bonds with differing parameters.
  
  3.3 Configuring Bonding Manually
  --------------------------------
@@ -638,10 +792,11 @@ scripts (the sysconfig or initscripts package) do not have specific
  knowledge of bonding.  One such distro is SuSE Linux Enterprise Server
  version 8.
  
-       The general methodology for these systems is to place the
-bonding module parameters into /etc/modprobe.conf, then add modprobe
-and/or ifenslave commands to the system's global init script.  The
-name of the global init script differs; for sysconfig, it is
+       The general method for these systems is to place the bonding
+module parameters into /etc/modules.conf or /etc/modprobe.conf (as
+appropriate for the installed distro), then add modprobe and/or
+ifenslave commands to the system's global init script.  The name of
+the global init script differs; for sysconfig, it is
  /etc/init.d/boot.local and for initscripts it is /etc/rc.d/rc.local.
  
         For example, if you wanted to make a simple bond of two e100
@@ -649,7 +804,7 @@ devices (presumed to be eth0 and eth1), and have it persist across
  reboots, edit the appropriate file (/etc/init.d/boot.local or
  /etc/rc.d/rc.local), and add the following:
  
-modprobe bonding -obond0 mode=balance-alb miimon=100
+modprobe bonding mode=balance-alb miimon=100
  modprobe e100
  ifconfig bond0 192.168.1.1 netmask 255.255.255.0 up
  ifenslave bond0 eth0
@@ -657,11 +812,7 @@ ifenslave bond0 eth1
  
         Replace the example bonding module parameters and bond0
  network configuration (IP address, netmask, etc) with the appropriate
-values for your configuration.  The above example loads the bonding
-module with the name "bond0," this simplifies the naming if multiple
-bonding modules are loaded (each successive instance of the module is
-given a different name, and the module instance names match the
-bonding interface names).
+values for your configuration.
  
         Unfortunately, this method will not provide support for the
  ifup and ifdown scripts on the bond devices.  To reload the bonding
@@ -684,20 +835,23 @@ appropriate device driver modules.  For our example above, you can do
  the following:
  
  # ifconfig bond0 down
-# rmmod bond0
+# rmmod bonding
  # rmmod e100
  
         Again, for convenience, it may be desirable to create a script
  with these commands.
  
  
-3.4 Configuring Multiple Bonds
-------------------------------
+3.3.1 Configuring Multiple Bonds Manually
+-----------------------------------------
  
         This section contains information on configuring multiple
-bonding devices with differing options.  If you require multiple
-bonding devices, but all with the same options, see the "max_bonds"
-module paramter, documented above.
+bonding devices with differing options for those systems whose network
+initialization scripts lack support for configuring multiple bonds.
+
+       If you require multiple bonding devices, but all with the same
+options, you may wish to use the "max_bonds" module parameter,
+documented above.
  
         To create multiple bonding devices with differing options, it
  is necessary to load the bonding driver multiple times.  Note that
@@ -724,11 +878,16 @@ named "bond0" and creates the bond0 device in balance-rr mode with an
  miimon of 100.  The second instance is named "bond1" and creates the
  bond1 device in balance-alb mode with an miimon of 50.
  
+       In some circumstances (typically with older distributions),
+the above does not work, and the second bonding instance never sees
+its options.  In that case, the second options line can be substituted
+as follows:
+
+install bonding1 /sbin/modprobe bonding -obond1 mode=balance-alb miimon=50
+
         This may be repeated any number of times, specifying a new and
-unique name in place of bond0 or bond1 for each instance.
+unique name in place of bond1 for each subsequent instance.
  
-       When the appropriate module paramters are in place, then
-configure bonding according to the instructions for your distro.
  
  5. Querying Bonding Configuration 
  =================================
@@ -846,8 +1005,8 @@ tagged internally by bonding itself.  As a result, bonding must
  self generated packets.
  
         For reasons of simplicity, and to support the use of adapters
-that can do VLAN hardware acceleration offloding, the bonding
-interface declares itself as fully hardware offloaing capable, it gets
+that can do VLAN hardware acceleration offloading, the bonding
+interface declares itself as fully hardware offloading capable, it gets
  the add_vid/kill_vid notifications to gather the necessary
  information, and it propagates those actions to the slaves.  In case
  of mixed adapter types, hardware accelerated tagged packets that
@@ -880,7 +1039,7 @@ bond interface:
  matches the hardware address of the VLAN interfaces.
  
         Note that changing a VLAN interface's HW address would set the
-underlying device -- i.e. the bonding interface -- to promiscouos
+underlying device -- i.e. the bonding interface -- to promiscuous
  mode, which might not be what you want.
  
  
@@ -923,7 +1082,7 @@ down or have a problem making it unresponsive to ARP requests.  Having
  an additional target (or several) increases the reliability of the ARP
  monitoring.
  
-       Multiple ARP targets must be seperated by commas as follows:
+       Multiple ARP targets must be separated by commas as follows:
  
  # example options for ARP monitoring with three targets
  alias bond0 bonding
@@ -1045,7 +1204,7 @@ install bonding /sbin/modprobe tg3; /sbin/modprobe e1000;
         This will, when loading the bonding module, rather than
  performing the normal action, instead execute the provided command.
  This command loads the device drivers in the order needed, then calls
-modprobe with --ingore-install to cause the normal action to then take
+modprobe with --ignore-install to cause the normal action to then take
  place.  Full documentation on this can be found in the modprobe.conf
  and modprobe manual pages.
  
@@ -1130,14 +1289,14 @@ association.
  common to enable promiscuous mode on the device, so that all traffic
  is seen (instead of seeing only traffic destined for the local host).
  The bonding driver handles promiscuous mode changes to the bonding
-master device (e.g., bond0), and propogates the setting to the slave
+master device (e.g., bond0), and propagates the setting to the slave
  devices.
  
         For the balance-rr, balance-xor, broadcast, and 802.3ad modes,
-the promiscuous mode setting is propogated to all slaves.
+the promiscuous mode setting is propagated to all slaves.
  
         For the active-backup, balance-tlb and balance-alb modes, the
-promiscuous mode setting is propogated only to the active slave.
+promiscuous mode setting is propagated only to the active slave.
  
         For balance-tlb mode, the active slave is the slave currently
  receiving inbound traffic.
@@ -1148,46 +1307,182 @@ sending to peers that are unassigned or if the load is unbalanced.
  
         For the active-backup, balance-tlb and balance-alb modes, when
  the active slave changes (e.g., due to a link failure), the
-promiscuous setting will be propogated to the new active slave.
+promiscuous setting will be propagated to the new active slave.
  
-12. High Availability Information
-=================================
+12. Configuring Bonding for High Availability
+=============================================
  
         High Availability refers to configurations that provide
  maximum network availability by having redundant or backup devices,
-links and switches between the host and the rest of the world.
-
-       There are currently two basic methods for configuring to
-maximize availability. They are dependent on the network topology and
-the primary goal of the configuration, but in general, a configuration
-can be optimized for maximum available bandwidth, or for maximum
-network availability.
+links or switches between the host and the rest of the world.  The
+goal is to provide the maximum availability of network connectivity
+(i.e., the network always works), even though other configurations
+could provide higher throughput.
  
  12.1 High Availability in a Single Switch Topology
  --------------------------------------------------
  
-       If two hosts (or a host and a switch) are directly connected
-via multiple physical links, then there is no network availability
-penalty for optimizing for maximum bandwidth: there is only one switch
-(or peer), so if it fails, you have no alternative access to fail over
-to.
+       If two hosts (or a host and a single switch) are directly
+connected via multiple physical links, then there is no availability
+penalty to optimizing for maximum bandwidth.  In this case, there is
+only one switch (or peer), so if it fails, there is no alternative
+access to fail over to.  Additionally, the bonding load balance modes
+support link monitoring of their members, so if individual links fail,
+the load will be rebalanced across the remaining devices.
+
+       See Section 13, "Configuring Bonding for Maximum Throughput"
+for information on configuring bonding with one peer device.
+
+12.2 High Availability in a Multiple Switch Topology
+----------------------------------------------------
+
+       With multiple switches, the configuration of bonding and the
+network changes dramatically.  In multiple switch topologies, there is
+a trade off between network availability and usable bandwidth.
+
+       Below is a sample network, configured to maximize the
+availability of the network:
  
-Example 1 : host to switch (or other host)
+                |                                     |
+                |port3                           port3|
+          +-----+----+                          +-----+----+
+          |          |port2       ISL      port2|          |
+          | switch A +--------------------------+ switch B |
+          |          |                          |          |
+          +-----+----+                          +-----++---+
+                |port1                           port1|
+                |             +-------+               |
+                +-------------+ host1 +---------------+
+                         eth0 +-------+ eth1
  
-          +----------+                          +----------+
-          |          |eth0                  eth0|  switch  |
-          | Host A   +--------------------------+    or    |
-          |          +--------------------------+  other   |
-          |          |eth1                  eth1|  host    |
-          +----------+                          +----------+
+       In this configuration, there is a link between the two
+switches (ISL, or inter switch link), and multiple ports connecting to
+the outside world ("port3" on each switch).  There is no technical
+reason that this could not be extended to a third switch.
  
+12.2.1 HA Bonding Mode Selection for Multiple Switch Topology
+-------------------------------------------------------------
  
-12.1.1 Bonding Mode Selection for single switch topology
---------------------------------------------------------
+       In a topology such as the example above, the active-backup and
+broadcast modes are the only useful bonding modes when optimizing for
+availability; the other modes require all links to terminate on the
+same peer for them to behave rationally.
+
+active-backup: This is generally the preferred mode, particularly if
+       the switches have an ISL and play together well.  If the
+       network configuration is such that one switch is specifically
+       a backup switch (e.g., has lower capacity, higher cost, etc),
+       then the primary option can be used to insure that the
+       preferred link is always used when it is available.
+
+broadcast: This mode is really a special purpose mode, and is suitable
+       only for very specific needs.  For example, if the two
+       switches are not connected (no ISL), and the networks beyond
+       them are totally independent.  In this case, if it is
+       necessary for some specific one-way traffic to reach both
+       independent networks, then the broadcast mode may be suitable.
+
+12.2.2 HA Link Monitoring Selection for Multiple Switch Topology
+----------------------------------------------------------------
+
+       The choice of link monitoring ultimately depends upon your
+switch.  If the switch can reliably fail ports in response to other
+failures, then either the MII or ARP monitors should work.  For
+example, in the above example, if the "port3" link fails at the remote
+end, the MII monitor has no direct means to detect this.  The ARP
+monitor could be configured with a target at the remote end of port3,
+thus detecting that failure without switch support.
+
+       In general, however, in a multiple switch topology, the ARP
+monitor can provide a higher level of reliability in detecting end to
+end connectivity failures (which may be caused by the failure of any
+individual component to pass traffic for any reason).  Additionally,
+the ARP monitor should be configured with multiple targets (at least
+one for each switch in the network).  This will insure that,
+regardless of which switch is active, the ARP monitor has a suitable
+target to query.
+
+
+13. Configuring Bonding for Maximum Throughput
+==============================================
+
+13.1 Maximizing Throughput in a Single Switch Topology
+------------------------------------------------------
+
+       In a single switch configuration, the best method to maximize
+throughput depends upon the application and network environment.  The
+various load balancing modes each have strengths and weaknesses in
+different environments, as detailed below.
+
+       For this discussion, we will break down the topologies into
+two categories.  Depending upon the destination of most traffic, we
+categorize them into either "gatewayed" or "local" configurations.
+
+       In a gatewayed configuration, the "switch" is acting primarily
+as a router, and the majority of traffic passes through this router to
+other networks.  An example would be the following:
+
+
+     +----------+                     +----------+
+     |          |eth0            port1|          | to other networks
+     | Host A   +---------------------+ router   +------------------->
+     |          +---------------------+          | Hosts B and C are out
+     |          |eth1            port2|          | here somewhere
+     +----------+                     +----------+
+
+       The router may be a dedicated router device, or another host
+acting as a gateway.  For our discussion, the important point is that
+the majority of traffic from Host A will pass through the router to
+some other network before reaching its final destination.
+
+       In a gatewayed network configuration, although Host A may
+communicate with many other systems, all of its traffic will be sent
+and received via one other peer on the local network, the router.
+
+       Note that the case of two systems connected directly via
+multiple physical links is, for purposes of configuring bonding, the
+same as a gatewayed configuration.  In that case, it happens that all
+traffic is destined for the "gateway" itself, not some other network
+beyond the gateway.
+
+       In a local configuration, the "switch" is acting primarily as
+a switch, and the majority of traffic passes through this switch to
+reach other stations on the same network.  An example would be the
+following:
+
+    +----------+            +----------+       +--------+
+    |          |eth0   port1|          +-------+ Host B |
+    |  Host A  +------------+  switch  |port3  +--------+
+    |          +------------+          |                  +--------+
+    |          |eth1   port2|          +------------------+ Host C |
+    +----------+            +----------+port4             +--------+
+
+
+       Again, the switch may be a dedicated switch device, or another
+host acting as a gateway.  For our discussion, the important point is
+that the majority of traffic from Host A is destined for other hosts
+on the same local network (Hosts B and C in the above example).
+
+       In summary, in a gatewayed configuration, traffic to and from
+the bonded device will be to the same MAC level peer on the network
+(the gateway itself, i.e., the router), regardless of its final
+destination.  In a local configuration, traffic flows directly to and
+from the final destinations, thus, each destination (Host B, Host C)
+will be addressed directly by their individual MAC addresses.
+
+       This distinction between a gatewayed and a local network
+configuration is important because many of the load balancing modes
+available use the MAC addresses of the local network source and
+destination to make load balancing decisions.  The behavior of each
+mode is described below.
+
+
+13.1.1 MT Bonding Mode Selection for Single Switch Topology
+-----------------------------------------------------------
  
         This configuration is the easiest to set up and to understand,
  although you will have to decide which bonding mode best suits your
-needs.  The tradeoffs for each mode are detailed below:
+needs.  The trade offs for each mode are detailed below:
  
  balance-rr: This mode is the only mode that will permit a single
         TCP/IP connection to stripe traffic across multiple
@@ -1206,6 +1501,23 @@ balance-rr: This mode is the only mode that will permit a single
         interface's worth of throughput, even after adjusting
         tcp_reordering.
  
+       Note that this out of order delivery occurs when both the
+       sending and receiving systems are utilizing a multiple
+       interface bond.  Consider a configuration in which a
+       balance-rr bond feeds into a single higher capacity network
+       channel (e.g., multiple 100Mb/sec ethernets feeding a single
+       gigabit ethernet via an etherchannel capable switch).  In this
+       configuration, traffic sent from the multiple 100Mb devices to
+       a destination connected to the gigabit device will not see
+       packets out of order.  However, traffic sent from the gigabit
+       device to the multiple 100Mb devices may or may not see
+       traffic out of order, depending upon the balance policy of the
+       switch.  Many switches do not support any modes that stripe
+       traffic (instead choosing a port based upon IP or MAC level
+       addresses); for those devices, traffic flowing from the
+       gigabit device to the many 100Mb devices will only utilize one
+       interface.
+
         If you are utilizing protocols other than TCP/IP, UDP for
         example, and your application can tolerate out of order
         delivery, then this mode can allow for single stream datagram
@@ -1220,16 +1532,21 @@ active-backup: There is not much advantage in this network topology to
         connected to the same peer as the primary.  In this case, a
         load balancing mode (with link monitoring) will provide the
         same level of network availability, but with increased
-       available bandwidth.  On the plus side, it does not require
-       any configuration of the switch.
+       available bandwidth.  On the plus side, active-backup mode
+       does not require any configuration of the switch, so it may
+       have value if the hardware available does not support any of
+       the load balance modes.
  
  balance-xor: This mode will limit traffic such that packets destined
         for specific peers will always be sent over the same
         interface.  Since the destination is determined by the MAC
-       addresses involved, this may be desirable if you have a large
-       network with many hosts.  It is likely to be suboptimal if all
-       your traffic is passed through a single router, however.  As
-       with balance-rr, the switch ports need to be configured for
+       addresses involved, this mode works best in a "local" network
+       configuration (as described above), with destinations all on
+       the same local network.  This mode is likely to be suboptimal
+       if all your traffic is passed through a single router (i.e., a
+       "gatewayed" network configuration, as described above).
+
+       As with balance-rr, the switch ports need to be configured for
         "etherchannel" or "trunking."
  
  broadcast: Like active-backup, there is not much advantage to this
@@ -1241,122 +1558,131 @@ broadcast: Like active-backup, there is not much advantage to this
         protocol includes automatic configuration of the aggregates,
         so minimal manual configuration of the switch is needed
         (typically only to designate that some set of devices is
-       usable for 802.3ad).  The 802.3ad standard also mandates that
-       frames be delivered in order (within certain limits), so in
-       general single connections will not see misordering of
+       available for 802.3ad).  The 802.3ad standard also mandates
+       that frames be delivered in order (within certain limits), so
+       in general single connections will not see misordering of
         packets.  The 802.3ad mode does have some drawbacks: the
         standard mandates that all devices in the aggregate operate at
         the same speed and duplex.  Also, as with all bonding load
         balance modes other than balance-rr, no single connection will
         be able to utilize more than a single interface's worth of
-       bandwidth.  Additionally, the linux bonding 802.3ad
-       implementation distributes traffic by peer (using an XOR of
-       MAC addresses), so in general all traffic to a particular
-       destination will use the same interface.  Finally, the 802.3ad
-       mode mandates the use of the MII monitor, therefore, the ARP
-       monitor is not available in this mode.
-
-balance-tlb: This mode is also a good choice for this type of
-       topology.  It has no special switch configuration
-       requirements, and balances outgoing traffic by peer, in a
-       vaguely intelligent manner (not a simple XOR as in balance-xor
-       or 802.3ad mode), so that unlucky MAC addresses will not all
-       "bunch up" on a single interface.  Interfaces may be of
-       differing speeds.  On the down side, in this mode all incoming
-       traffic arrives over a single interface, this mode requires
-       certain ethtool support in the network device driver of the
-       slave interfaces, and the ARP monitor is not available.
-
-balance-alb: This mode is everything that balance-tlb is, and more. It
-       has all of the features (and restrictions) of balance-tlb, and
-       will also balance incoming traffic from peers (as described in
-       the Bonding Module Options section, above).  The only extra
-       down side to this mode is that the network device driver must
-       support changing the hardware address while the device is
-       open.
-
-12.1.2 Link Monitoring for Single Switch Topology
--------------------------------------------------
+       bandwidth.  
+
+       Additionally, the linux bonding 802.3ad implementation
+       distributes traffic by peer (using an XOR of MAC addresses),
+       so in a "gatewayed" configuration, all outgoing traffic will
+       generally use the same device.  Incoming traffic may also end
+       up on a single device, but that is dependent upon the
+       balancing policy of the peer's 8023.ad implementation.  In a
+       "local" configuration, traffic will be distributed across the
+       devices in the bond.
+
+       Finally, the 802.3ad mode mandates the use of the MII monitor,
+       therefore, the ARP monitor is not available in this mode.
+
+balance-tlb: The balance-tlb mode balances outgoing traffic by peer.
+       Since the balancing is done according to MAC address, in a
+       "gatewayed" configuration (as described above), this mode will
+       send all traffic across a single device.  However, in a
+       "local" network configuration, this mode balances multiple
+       local network peers across devices in a vaguely intelligent
+       manner (not a simple XOR as in balance-xor or 802.3ad mode),
+       so that mathematically unlucky MAC addresses (i.e., ones that
+       XOR to the same value) will not all "bunch up" on a single
+       interface.
+
+       Unlike 802.3ad, interfaces may be of differing speeds, and no
+       special switch configuration is required.  On the down side,
+       in this mode all incoming traffic arrives over a single
+       interface, this mode requires certain ethtool support in the
+       network device driver of the slave interfaces, and the ARP
+       monitor is not available.
+
+balance-alb: This mode is everything that balance-tlb is, and more.
+       It has all of the features (and restrictions) of balance-tlb,
+       and will also balance incoming traffic from local network
+       peers (as described in the Bonding Module Options section,
+       above).
+
+       The only additional down side to this mode is that the network
+       device driver must support changing the hardware address while
+       the device is open.
+
+13.1.2 MT Link Monitoring for Single Switch Topology
+----------------------------------------------------
  
         The choice of link monitoring may largely depend upon which
  mode you choose to use.  The more advanced load balancing modes do not
  support the use of the ARP monitor, and are thus restricted to using
-the MII monitor (which does not provide as high a level of assurance
-as the ARP monitor).
-
-
-12.2 High Availability in a Multiple Switch Topology
-----------------------------------------------------
-
-       With multiple switches, the configuration of bonding and the
-network changes dramatically.  In multiple switch topologies, there is
-a tradeoff between network availability and usable bandwidth.
-
-       Below is a sample network, configured to maximize the
-availability of the network:
-
-                |                                     |
-                |port3                           port3|
-          +-----+----+                          +-----+----+
-          |          |port2       ISL      port2|          |
-          | switch A +--------------------------+ switch B |
-          |          |                          |          |
-          +-----+----+                          +-----++---+
-                |port1                           port1|
-                |             +-------+               |
-                +-------------+ host1 +---------------+
-                         eth0 +-------+ eth1
-
-       In this configuration, there is a link between the two
-switches (ISL, or inter switch link), and multiple ports connecting to
-the outside world ("port3" on each switch).  There is no technical
-reason that this could not be extended to a third switch.
-
-12.2.1 Bonding Mode Selection for Multiple Switch Topology
-----------------------------------------------------------
-
-       In a topology such as this, the active-backup and broadcast
-modes are the only useful bonding modes; the other modes require all
-links to terminate on the same peer for them to behave rationally.
-
-active-backup: This is generally the preferred mode, particularly if
-       the switches have an ISL and play together well.  If the
-       network configuration is such that one switch is specifically
-       a backup switch (e.g., has lower capacity, higher cost, etc),
-       then the primary option can be used to insure that the
-       preferred link is always used when it is available.
-
-broadcast: This mode is really a special purpose mode, and is suitable
-       only for very specific needs.  For example, if the two
-       switches are not connected (no ISL), and the networks beyond
-       them are totally independant.  In this case, if it is
-       necessary for some specific one-way traffic to reach both
-       independent networks, then the broadcast mode may be suitable.
-
-12.2.2 Link Monitoring Selection for Multiple Switch Topology
+the MII monitor (which does not provide as high a level of end to end
+assurance as the ARP monitor).
+
+13.2 Maximum Throughput in a Multiple Switch Topology
+-----------------------------------------------------
+
+       Multiple switches may be utilized to optimize for throughput
+when they are configured in parallel as part of an isolated network
+between two or more systems, for example:
+
+                       +-----------+
+                       |  Host A   | 
+                       +-+---+---+-+
+                         |   |   |
+                +--------+   |   +---------+
+                |            |             |
+         +------+---+  +-----+----+  +-----+----+
+         | Switch A |  | Switch B |  | Switch C |
+         +------+---+  +-----+----+  +-----+----+
+                |            |             |
+                +--------+   |   +---------+
+                         |   |   |
+                       +-+---+---+-+
+                       |  Host B   | 
+                       +-----------+
+
+       In this configuration, the switches are isolated from one
+another.  One reason to employ a topology such as this is for an
+isolated network with many hosts (a cluster configured for high
+performance, for example), using multiple smaller switches can be more
+cost effective than a single larger switch, e.g., on a network with 24
+hosts, three 24 port switches can be significantly less expensive than
+a single 72 port switch.
+
+       If access beyond the network is required, an individual host
+can be equipped with an additional network device connected to an
+external network; this host then additionally acts as a gateway.
+
+13.2.1 MT Bonding Mode Selection for Multiple Switch Topology
  -------------------------------------------------------------
  
-       The choice of link monitoring ultimately depends upon your
-switch.  If the switch can reliably fail ports in response to other
-failures, then either the MII or ARP monitors should work.  For
-example, in the above example, if the "port3" link fails at the remote
-end, the MII monitor has no direct means to detect this.  The ARP
-monitor could be configured with a target at the remote end of port3,
-thus detecting that failure without switch support.
+       In actual practice, the bonding mode typically employed in
+configurations of this type is balance-rr.  Historically, in this
+network configuration, the usual caveats about out of order packet
+delivery are mitigated by the use of network adapters that do not do
+any kind of packet coalescing (via the use of NAPI, or because the
+device itself does not generate interrupts until some number of
+packets has arrived).  When employed in this fashion, the balance-rr
+mode allows individual connections between two hosts to effectively
+utilize greater than one interface's bandwidth.
  
-       In general, however, in a multiple switch topology, the ARP
-monitor can provide a higher level of reliability in detecting link
-failures.  Additionally, it should be configured with multiple targets
-(at least one for each switch in the network).  This will insure that,
-regardless of which switch is active, the ARP monitor has a suitable
-target to query.
+13.2.2 MT Link Monitoring for Multiple Switch Topology
+------------------------------------------------------
  
+       Again, in actual practice, the MII monitor is most often used
+in this configuration, as performance is given preference over
+availability.  The ARP monitor will function in this topology, but its
+advantages over the MII monitor are mitigated by the volume of probes
+needed as the number of systems involved grows (remember that each
+host in the network is configured with bonding).
  
-12.3 Switch Behavior Issues for High Availability
--------------------------------------------------
+14. Switch Behavior Issues
+==========================
  
-       You may encounter issues with the timing of link up and down
-reporting by the switch.
+14.1 Link Establishment and Failover Delays
+-------------------------------------------
+
+       Some switches exhibit undesirable behavior with regard to the
+timing of link up and down reporting by the switch.
  
         First, when a link comes up, some switches may indicate that
  the link is up (carrier available), but not pass traffic over the
@@ -1370,30 +1696,70 @@ relevant interface(s).
         Second, some switches may "bounce" the link state one or more
  times while a link is changing state.  This occurs most commonly while
  the switch is initializing.  Again, an appropriate updelay value may
-help, but note that if all links are down, then updelay is ignored
-when any link becomes active (the slave closest to completing its
-updelay is chosen).
+help.
  
         Note that when a bonding interface has no active links, the
-driver will immediately reuse the first link that goes up, even if
-updelay parameter was specified.  If there are slave interfaces
-waiting for the updelay timeout to expire, the interface that first
-went into that state will be immediately reused.  This reduces down
-time of the network if the value of updelay has been overestimated.
+driver will immediately reuse the first link that goes up, even if the
+updelay parameter has been specified (the updelay is ignored in this
+case).  If there are slave interfaces waiting for the updelay timeout
+to expire, the interface that first went into that state will be
+immediately reused.  This reduces down time of the network if the
+value of updelay has been overestimated, and since this occurs only in
+cases with no connectivity, there is no additional penalty for
+ignoring the updelay.
  
         In addition to the concerns about switch timings, if your
  switches take a long time to go into backup mode, it may be desirable
  to not activate a backup interface immediately after a link goes down.
  Failover may be delayed via the downdelay bonding module option.
  
-13. Hardware Specific Considerations
+14.2 Duplicated Incoming Packets
+--------------------------------
+
+       It is not uncommon to observe a short burst of duplicated
+traffic when the bonding device is first used, or after it has been
+idle for some period of time.  This is most easily observed by issuing
+a "ping" to some other host on the network, and noticing that the
+output from ping flags duplicates (typically one per slave).
+
+       For example, on a bond in active-backup mode with five slaves
+all connected to one switch, the output may appear as follows:
+
+# ping -n 10.0.4.2
+PING 10.0.4.2 (10.0.4.2) from 10.0.3.10 : 56(84) bytes of data.
+64 bytes from 10.0.4.2: icmp_seq=1 ttl=64 time=13.7 ms
+64 bytes from 10.0.4.2: icmp_seq=1 ttl=64 time=13.8 ms (DUP!)
+64 bytes from 10.0.4.2: icmp_seq=1 ttl=64 time=13.8 ms (DUP!)
+64 bytes from 10.0.4.2: icmp_seq=1 ttl=64 time=13.8 ms (DUP!)
+64 bytes from 10.0.4.2: icmp_seq=1 ttl=64 time=13.8 ms (DUP!)
+64 bytes from 10.0.4.2: icmp_seq=2 ttl=64 time=0.216 ms
+64 bytes from 10.0.4.2: icmp_seq=3 ttl=64 time=0.267 ms
+64 bytes from 10.0.4.2: icmp_seq=4 ttl=64 time=0.222 ms
+
+       This is not due to an error in the bonding driver, rather, it
+is a side effect of how many switches update their MAC forwarding
+tables.  Initially, the switch does not associate the MAC address in
+the packet with a particular switch port, and so it may send the
+traffic to all ports until its MAC forwarding table is updated.  Since
+the interfaces attached to the bond may occupy multiple ports on a
+single switch, when the switch (temporarily) floods the traffic to all
+ports, the bond device receives multiple copies of the same packet
+(one per slave device).
+
+       The duplicated packet behavior is switch dependent, some
+switches exhibit this, and some do not.  On switches that display this
+behavior, it can be induced by clearing the MAC forwarding table (on
+most Cisco switches, the privileged command "clear mac address-table
+dynamic" will accomplish this).
+
+15. Hardware Specific Considerations
  ====================================
  
         This section contains additional information for configuring
  bonding on specific hardware platforms, or for interfacing bonding
  with particular switches or other devices.
  
-13.1 IBM BladeCenter
+15.1 IBM BladeCenter
  --------------------
  
         This applies to the JS20 and similar systems.
@@ -1407,12 +1773,12 @@ JS20 network adapter information
  --------------------------------
  
         All JS20s come with two Broadcom Gigabit Ethernet ports
-integrated on the planar.  In the BladeCenter chassis, the eth0 port
-of all JS20 blades is hard wired to I/O Module #1; similarly, all eth1
-ports are wired to I/O Module #2.  An add-on Broadcom daughter card
-can be installed on a JS20 to provide two more Gigabit Ethernet ports.
-These ports, eth2 and eth3, are wired to I/O Modules 3 and 4,
-respectively.
+integrated on the planar (that's "motherboard" in IBM-speak).  In the
+BladeCenter chassis, the eth0 port of all JS20 blades is hard wired to
+I/O Module #1; similarly, all eth1 ports are wired to I/O Module #2.
+An add-on Broadcom daughter card can be installed on a JS20 to provide
+two more Gigabit Ethernet ports.  These ports, eth2 and eth3, are
+wired to I/O Modules 3 and 4, respectively.
  
         Each I/O Module may contain either a switch or a passthrough
  module (which allows ports to be directly connected to an external
@@ -1432,29 +1798,30 @@ BladeCenter networking configuration
  of ways, this discussion will be confined to describing basic
  configurations.
  
-       Normally, Ethernet Switch Modules (ESM) are used in I/O
+       Normally, Ethernet Switch Modules (ESMs) are used in I/O
  modules 1 and 2.  In this configuration, the eth0 and eth1 ports of a
  JS20 will be connected to different internal switches (in the
  respective I/O modules).
  
-       An optical passthru module (OPM) connects the I/O module
-directly to an external switch.  By using OPMs in I/O module #1 and
-#2, the eth0 and eth1 interfaces of a JS20 can be redirected to the
-outside world and connected to a common external switch.
-
-       Depending upon the mix of ESM and OPM modules, the network
-will appear to bonding as either a single switch topology (all OPM
-modules) or as a multiple switch topology (one or more ESM modules,
-zero or more OPM modules).  It is also possible to connect ESM modules
-together, resulting in a configuration much like the example in "High
-Availability in a multiple switch topology."
-
-Requirements for specifc modes
-------------------------------
-
-       The balance-rr mode requires the use of OPM modules for
-devices in the bond, all connected to an common external switch.  That
-switch must be configured for "etherchannel" or "trunking" on the
+       A passthrough module (OPM or CPM, optical or copper,
+passthrough module) connects the I/O module directly to an external
+switch.  By using PMs in I/O module #1 and #2, the eth0 and eth1
+interfaces of a JS20 can be redirected to the outside world and
+connected to a common external switch.
+
+       Depending upon the mix of ESMs and PMs, the network will
+appear to bonding as either a single switch topology (all PMs) or as a
+multiple switch topology (one or more ESMs, zero or more PMs).  It is
+also possible to connect ESMs together, resulting in a configuration
+much like the example in "High Availability in a Multiple Switch
+Topology," above.
+
+Requirements for specific modes
+-------------------------------
+
+       The balance-rr mode requires the use of passthrough modules
+for devices in the bond, all connected to an common external switch.
+That switch must be configured for "etherchannel" or "trunking" on the
  appropriate ports, as is usual for balance-rr.
  
         The balance-alb and balance-tlb modes will function with
@@ -1484,17 +1851,18 @@ connected to the JS20 system.
  Other concerns
  --------------
  
-       The Serial Over LAN link is established over the primary
+       The Serial Over LAN (SoL) link is established over the primary
  ethernet (eth0) only, therefore, any loss of link to eth0 will result
  in losing your SoL connection.  It will not fail over with other
-network traffic.
+network traffic, as the SoL system is beyond the control of the
+bonding driver.
  
         It may be desirable to disable spanning tree on the switch
  (either the internal Ethernet Switch Module, or an external switch) to
-avoid fail-over delays issues when using bonding.
+avoid fail-over delay issues when using bonding.
  
         
-14. Frequently Asked Questions
+16. Frequently Asked Questions
  ==============================
  
  1.  Is it SMP safe?
@@ -1505,8 +1873,8 @@ The new driver was designed to be SMP safe from the start.
  2.  What type of cards will work with it?
  
         Any Ethernet type cards (you can even mix cards - a Intel
-EtherExpress PRO/100 and a 3com 3c905b, for example).  They need not
-be of the same speed.
+EtherExpress PRO/100 and a 3com 3c905b, for example).  For most modes,
+devices need not be of the same speed.
  
  3.  How many bonding devices can I have?
  
@@ -1524,11 +1892,12 @@ system.
  disabled.  The active-backup mode will fail over to a backup link, and
  other modes will ignore the failed link.  The link will continue to be
  monitored, and should it recover, it will rejoin the bond (in whatever
-manner is appropriate for the mode). See the section on High
-Availability for additional information.
+manner is appropriate for the mode). See the sections on High
+Availability and the documentation for each mode for additional
+information.
         
         Link monitoring can be enabled via either the miimon or
-arp_interval paramters (described in the module paramters section,
+arp_interval parameters (described in the module parameters section,
  above).  In general, miimon monitors the carrier state as sensed by
  the underlying network device, and the arp monitor (arp_interval)
  monitors connectivity to another host on the local network.
@@ -1536,7 +1905,7 @@ monitors connectivity to another host on the local network.
         If no link monitoring is configured, the bonding driver will
  be unable to detect link failures, and will assume that all links are
  always available.  This will likely result in lost packets, and a
-resulting degredation of performance.  The precise performance loss
+resulting degradation of performance.  The precise performance loss
  depends upon the bonding mode and network configuration.
  
  6.  Can bonding be used for High Availability?
@@ -1550,12 +1919,12 @@ depends upon the bonding mode and network configuration.
         In the basic balance modes (balance-rr and balance-xor), it
  works with any system that supports etherchannel (also called
  trunking).  Most managed switches currently available have such
-support, and many unmananged switches as well.
+support, and many unmanaged switches as well.
  
         The advanced balance modes (balance-tlb and balance-alb) do
  not have special switch requirements, but do need device drivers that
  support specific features (described in the appropriate section under
-module paramters, above).
+module parameters, above).
  
         In 802.3ad mode, it works with with systems that support IEEE
  802.3ad Dynamic Link Aggregation.  Most managed and many unmanaged
@@ -1565,17 +1934,19 @@ switches currently available support 802.3ad.
  
  8.  Where does a bonding device get its MAC address from?
  
-       If not explicitly configured with ifconfig, the MAC address of
-the bonding device is taken from its first slave device. This MAC
-address is then passed to all following slaves and remains persistent
-(even if the the first slave is removed) until the bonding device is
-brought down or reconfigured.
+       If not explicitly configured (with ifconfig or ip link), the
+MAC address of the bonding device is taken from its first slave
+device.  This MAC address is then passed to all following slaves and
+remains persistent (even if the the first slave is removed) until the
+bonding device is brought down or reconfigured.
  
         If you wish to change the MAC address, you can set it with
-ifconfig:
+ifconfig or ip link:
  
  # ifconfig bond0 hw ether 00:11:22:33:44:55
  
+# ip link set bond0 address 66:77:88:99:aa:bb
+
         The MAC address can be also changed by bringing down/up the
  device and then changing its slaves (or their order):
  
@@ -1591,23 +1962,28 @@ from the bond (`ifenslave -d bond0 eth0'). The bonding driver will
  then restore the MAC addresses that the slaves had before they were
  enslaved.
  
-15. Resources and Links
+16. Resources and Links
  =======================
  
  The latest version of the bonding driver can be found in the latest
  version of the linux kernel, found on http://kernel.org
  
+The latest version of this document can be found in either the latest
+kernel source (named Documentation/networking/bonding.txt), or on the
+bonding sourceforge site:
+
+http://www.sourceforge.net/projects/bonding
+
  Discussions regarding the bonding driver take place primarily on the
  bonding-devel mailing list, hosted at sourceforge.net.  If you have
-questions or problems, post them to the list.
+questions or problems, post them to the list.  The list address is:
  
  bonding-devel@lists.sourceforge.net
  
-https://lists.sourceforge.net/lists/listinfo/bonding-devel
-
-There is also a project site on sourceforge.
+       The administrative interface (to subscribe or unsubscribe) can
+be found at:
  
-http://www.sourceforge.net/projects/bonding
+https://lists.sourceforge.net/lists/listinfo/bonding-devel
  
  Donald Becker's Ethernet Drivers and diag programs may be found at :
   - http://www.scyld.com/network/
diff --git a/Documentation/networking/phy.txt b/Documentation/networking/phy.txt

new file mode 100644 (file)

index 0000000..29ccae4
--- /dev/null
+++ b/Documentation/networking/phy.txt
@@ -0,0 +1,288 @@
+
+-------
+PHY Abstraction Layer
+(Updated 2005-07-21)
+
+Purpose
+
+ Most network devices consist of set of registers which provide an interface
+ to a MAC layer, which communicates with the physical connection through a
+ PHY.  The PHY concerns itself with negotiating link parameters with the link
+ partner on the other side of the network connection (typically, an ethernet
+ cable), and provides a register interface to allow drivers to determine what
+ settings were chosen, and to configure what settings are allowed.
+
+ While these devices are distinct from the network devices, and conform to a
+ standard layout for the registers, it has been common practice to integrate
+ the PHY management code with the network driver.  This has resulted in large
+ amounts of redundant code.  Also, on embedded systems with multiple (and
+ sometimes quite different) ethernet controllers connected to the same 
+ management bus, it is difficult to ensure safe use of the bus.
+
+ Since the PHYs are devices, and the management busses through which they are
+ accessed are, in fact, busses, the PHY Abstraction Layer treats them as such.
+ In doing so, it has these goals:
+
+   1) Increase code-reuse
+   2) Increase overall code-maintainability
+   3) Speed development time for new network drivers, and for new systems
+ 
+ Basically, this layer is meant to provide an interface to PHY devices which
+ allows network driver writers to write as little code as possible, while
+ still providing a full feature set.
+
+The MDIO bus
+
+ Most network devices are connected to a PHY by means of a management bus.
+ Different devices use different busses (though some share common interfaces).
+ In order to take advantage of the PAL, each bus interface needs to be
+ registered as a distinct device.
+
+ 1) read and write functions must be implemented.  Their prototypes are:
+
+     int write(struct mii_bus *bus, int mii_id, int regnum, u16 value);
+     int read(struct mii_bus *bus, int mii_id, int regnum);
+
+   mii_id is the address on the bus for the PHY, and regnum is the register
+   number.  These functions are guaranteed not to be called from interrupt
+   time, so it is safe for them to block, waiting for an interrupt to signal
+   the operation is complete
+ 
+ 2) A reset function is necessary.  This is used to return the bus to an
+   initialized state.
+
+ 3) A probe function is needed.  This function should set up anything the bus
+   driver needs, setup the mii_bus structure, and register with the PAL using
+   mdiobus_register.  Similarly, there's a remove function to undo all of
+   that (use mdiobus_unregister).
+ 
+ 4) Like any driver, the device_driver structure must be configured, and init
+   exit functions are used to register the driver.
+
+ 5) The bus must also be declared somewhere as a device, and registered.
+
+ As an example for how one driver implemented an mdio bus driver, see
+ drivers/net/gianfar_mii.c and arch/ppc/syslib/mpc85xx_devices.c
+
+Connecting to a PHY
+
+ Sometime during startup, the network driver needs to establish a connection
+ between the PHY device, and the network device.  At this time, the PHY's bus
+ and drivers need to all have been loaded, so it is ready for the connection.
+ At this point, there are several ways to connect to the PHY:
+
+ 1) The PAL handles everything, and only calls the network driver when
+   the link state changes, so it can react.
+
+ 2) The PAL handles everything except interrupts (usually because the
+   controller has the interrupt registers).
+
+ 3) The PAL handles everything, but checks in with the driver every second,
+   allowing the network driver to react first to any changes before the PAL
+   does.
+ 
+ 4) The PAL serves only as a library of functions, with the network device
+   manually calling functions to update status, and configure the PHY
+
+
+Letting the PHY Abstraction Layer do Everything
+
+ If you choose option 1 (The hope is that every driver can, but to still be
+ useful to drivers that can't), connecting to the PHY is simple:
+
+ First, you need a function to react to changes in the link state.  This
+ function follows this protocol:
+
+   static void adjust_link(struct net_device *dev);
+ 
+ Next, you need to know the device name of the PHY connected to this device. 
+ The name will look something like, "phy0:0", where the first number is the
+ bus id, and the second is the PHY's address on that bus.
+ 
+ Now, to connect, just call this function:
+ 
+   phydev = phy_connect(dev, phy_name, &adjust_link, flags);
+
+ phydev is a pointer to the phy_device structure which represents the PHY.  If
+ phy_connect is successful, it will return the pointer.  dev, here, is the
+ pointer to your net_device.  Once done, this function will have started the
+ PHY's software state machine, and registered for the PHY's interrupt, if it
+ has one.  The phydev structure will be populated with information about the
+ current state, though the PHY will not yet be truly operational at this
+ point.
+
+ flags is a u32 which can optionally contain phy-specific flags.
+ This is useful if the system has put hardware restrictions on
+ the PHY/controller, of which the PHY needs to be aware.
+
+ Now just make sure that phydev->supported and phydev->advertising have any
+ values pruned from them which don't make sense for your controller (a 10/100
+ controller may be connected to a gigabit capable PHY, so you would need to
+ mask off SUPPORTED_1000baseT*).  See include/linux/ethtool.h for definitions
+ for these bitfields. Note that you should not SET any bits, or the PHY may
+ get put into an unsupported state.
+
+ Lastly, once the controller is ready to handle network traffic, you call
+ phy_start(phydev).  This tells the PAL that you are ready, and configures the
+ PHY to connect to the network.  If you want to handle your own interrupts,
+ just set phydev->irq to PHY_IGNORE_INTERRUPT before you call phy_start.
+ Similarly, if you don't want to use interrupts, set phydev->irq to PHY_POLL.
+
+ When you want to disconnect from the network (even if just briefly), you call
+ phy_stop(phydev).
+
+Keeping Close Tabs on the PAL
+
+ It is possible that the PAL's built-in state machine needs a little help to
+ keep your network device and the PHY properly in sync.  If so, you can
+ register a helper function when connecting to the PHY, which will be called
+ every second before the state machine reacts to any changes.  To do this, you
+ need to manually call phy_attach() and phy_prepare_link(), and then call
+ phy_start_machine() with the second argument set to point to your special
+ handler.
+
+ Currently there are no examples of how to use this functionality, and testing
+ on it has been limited because the author does not have any drivers which use
+ it (they all use option 1).  So Caveat Emptor.
+
+Doing it all yourself
+
+ There's a remote chance that the PAL's built-in state machine cannot track
+ the complex interactions between the PHY and your network device.  If this is
+ so, you can simply call phy_attach(), and not call phy_start_machine or
+ phy_prepare_link().  This will mean that phydev->state is entirely yours to
+ handle (phy_start and phy_stop toggle between some of the states, so you
+ might need to avoid them).
+
+ An effort has been made to make sure that useful functionality can be
+ accessed without the state-machine running, and most of these functions are
+ descended from functions which did not interact with a complex state-machine.
+ However, again, no effort has been made so far to test running without the
+ state machine, so tryer beware.
+
+ Here is a brief rundown of the functions:
+
+ int phy_read(struct phy_device *phydev, u16 regnum);
+ int phy_write(struct phy_device *phydev, u16 regnum, u16 val);
+
+   Simple read/write primitives.  They invoke the bus's read/write function
+   pointers.
+
+ void phy_print_status(struct phy_device *phydev);
+ 
+   A convenience function to print out the PHY status neatly.
+
+ int phy_clear_interrupt(struct phy_device *phydev);
+ int phy_config_interrupt(struct phy_device *phydev, u32 interrupts);
+   
+   Clear the PHY's interrupt, and configure which ones are allowed,
+   respectively.  Currently only supports all on, or all off.
+ 
+ int phy_enable_interrupts(struct phy_device *phydev);
+ int phy_disable_interrupts(struct phy_device *phydev);
+
+   Functions which enable/disable PHY interrupts, clearing them
+   before and after, respectively.
+
+ int phy_start_interrupts(struct phy_device *phydev);
+ int phy_stop_interrupts(struct phy_device *phydev);
+
+   Requests the IRQ for the PHY interrupts, then enables them for
+   start, or disables then frees them for stop.
+
+ struct phy_device * phy_attach(struct net_device *dev, const char *phy_id,
+                u32 flags);
+
+   Attaches a network device to a particular PHY, binding the PHY to a generic
+   driver if none was found during bus initialization.  Passes in
+   any phy-specific flags as needed.
+
+ int phy_start_aneg(struct phy_device *phydev);
+   
+   Using variables inside the phydev structure, either configures advertising
+   and resets autonegotiation, or disables autonegotiation, and configures
+   forced settings.
+
+ static inline int phy_read_status(struct phy_device *phydev);
+
+   Fills the phydev structure with up-to-date information about the current
+   settings in the PHY.
+
+ void phy_sanitize_settings(struct phy_device *phydev)
+   
+   Resolves differences between currently desired settings, and
+   supported settings for the given PHY device.  Does not make
+   the changes in the hardware, though.
+
+ int phy_ethtool_sset(struct phy_device *phydev, struct ethtool_cmd *cmd);
+ int phy_ethtool_gset(struct phy_device *phydev, struct ethtool_cmd *cmd);
+
+   Ethtool convenience functions.
+
+ int phy_mii_ioctl(struct phy_device *phydev,
+                 struct mii_ioctl_data *mii_data, int cmd);
+
+   The MII ioctl.  Note that this function will completely screw up the state
+   machine if you write registers like BMCR, BMSR, ADVERTISE, etc.  Best to
+   use this only to write registers which are not standard, and don't set off
+   a renegotiation.
+
+
+PHY Device Drivers
+
+ With the PHY Abstraction Layer, adding support for new PHYs is
+ quite easy.  In some cases, no work is required at all!  However,
+ many PHYs require a little hand-holding to get up-and-running.
+
+Generic PHY driver
+
+ If the desired PHY doesn't have any errata, quirks, or special
+ features you want to support, then it may be best to not add
+ support, and let the PHY Abstraction Layer's Generic PHY Driver
+ do all of the work.  
+
+Writing a PHY driver
+
+ If you do need to write a PHY driver, the first thing to do is
+ make sure it can be matched with an appropriate PHY device.
+ This is done during bus initialization by reading the device's
+ UID (stored in registers 2 and 3), then comparing it to each
+ driver's phy_id field by ANDing it with each driver's
+ phy_id_mask field.  Also, it needs a name.  Here's an example:
+
+   static struct phy_driver dm9161_driver = {
+         .phy_id         = 0x0181b880,
+        .name           = "Davicom DM9161E",
+        .phy_id_mask    = 0x0ffffff0,
+        ...
+   }
+
+ Next, you need to specify what features (speed, duplex, autoneg,
+ etc) your PHY device and driver support.  Most PHYs support
+ PHY_BASIC_FEATURES, but you can look in include/mii.h for other
+ features.
+
+ Each driver consists of a number of function pointers:
+
+   config_init: configures PHY into a sane state after a reset.
+     For instance, a Davicom PHY requires descrambling disabled.
+   probe: Does any setup needed by the driver
+   suspend/resume: power management
+   config_aneg: Changes the speed/duplex/negotiation settings
+   read_status: Reads the current speed/duplex/negotiation settings
+   ack_interrupt: Clear a pending interrupt
+   config_intr: Enable or disable interrupts
+   remove: Does any driver take-down
+
+ Of these, only config_aneg and read_status are required to be
+ assigned by the driver code.  The rest are optional.  Also, it is
+ preferred to use the generic phy driver's versions of these two
+ functions if at all possible: genphy_read_status and
+ genphy_config_aneg.  If this is not possible, it is likely that
+ you only need to perform some actions before and after invoking
+ these functions, and so your functions will wrap the generic
+ ones.
+
+ Feel free to look at the Marvell, Cicada, and Davicom drivers in
+ drivers/net/phy/ for examples (the lxt and qsemi drivers have
+ not been tested as of this writing)
diff --git a/Documentation/pci.txt b/Documentation/pci.txt

index 62b1dc5d97e2e90523e8010b93054f81ef3ffe58..76d28d033657aac4158b8db93821553f332d6b11 100644 (file)
--- a/Documentation/pci.txt
+++ b/Documentation/pci.txt
@@ -266,20 +266,6 @@ port an old driver to the new PCI interface.  They are no longer present
  in the kernel as they aren't compatible with hotplug or PCI domains or
  having sane locking.
  
-pcibios_present() and          Since ages, you don't need to test presence
-pci_present()                  of PCI subsystem when trying to talk to it.
-                               If it's not there, the list of PCI devices
-                               is empty and all functions for searching for
-                               devices just return NULL.
-pcibios_(read|write)_*         Superseded by their pci_(read|write)_*
-                               counterparts.
-pcibios_find_*                 Superseded by their pci_get_* counterparts.
-pci_for_each_dev()             Superseded by pci_get_device()
-pci_for_each_dev_reverse()     Superseded by pci_find_device_reverse()
-pci_for_each_bus()             Superseded by pci_find_next_bus()
  pci_find_device()              Superseded by pci_get_device()
  pci_find_subsys()              Superseded by pci_get_subsys()
  pci_find_slot()                        Superseded by pci_get_slot()
-pcibios_find_class()           Superseded by pci_get_class()
-pci_find_class()               Superseded by pci_get_class()
-pci_(read|write)_*_nodev()     Superseded by pci_bus_(read|write)_*()
diff --git a/Documentation/usb/usbmon.txt b/Documentation/usb/usbmon.txt

index f1896ee3bb2abf97f9b09f811a16db6fe9f3d1d2..63cb7edd177ef87e304fb7500596ffaa72c42633 100644 (file)
--- a/Documentation/usb/usbmon.txt
+++ b/Documentation/usb/usbmon.txt
@@ -102,7 +102,7 @@ Here is the list of words, from left to right:
  - URB Status. This field makes no sense for submissions, but is present
    to help scripts with parsing. In error case, it contains the error code.
    In case of a setup packet, it contains a Setup Tag. If scripts read a number
-  in this field, the proceed to read Data Length. Otherwise, they read
+  in this field, they proceed to read Data Length. Otherwise, they read
    the setup packet before reading the Data Length.
  - Setup packet, if present, consists of 5 words: one of each for bmRequestType,
    bRequest, wValue, wIndex, wLength, as specified by the USB Specification 2.0.
diff --git a/Documentation/video4linux/CARDLIST.cx88 b/Documentation/video4linux/CARDLIST.cx88

index 6d44958289de94186ca1418edf8ae27b01dd6d46..03deb0726aa4476b2c141eb8e16bfb49b9c47704 100644 (file)
--- a/Documentation/video4linux/CARDLIST.cx88
+++ b/Documentation/video4linux/CARDLIST.cx88
@@ -29,3 +29,4 @@ card=27 - PixelView PlayTV Ultra Pro (Stereo)
  card=28 - DViCO FusionHDTV 3 Gold-T
  card=29 - ADS Tech Instant TV DVB-T PCI
  card=30 - TerraTec Cinergy 1400 DVB-T
+card=31 - DViCO FusionHDTV 5 Gold
diff --git a/Documentation/video4linux/CARDLIST.tuner b/Documentation/video4linux/CARDLIST.tuner

index d1b9d21ffd89a7b9199845bfce45ed4fdd5586dd..f3302e1b1b9c4a31836612917336af57094befcd 100644 (file)
--- a/Documentation/video4linux/CARDLIST.tuner
+++ b/Documentation/video4linux/CARDLIST.tuner
@@ -62,3 +62,5 @@ tuner=60 - Thomson DDT 7611 (ATSC/NTSC)
  tuner=61 - Tena TNF9533-D/IF/TNF9533-B/DF
  tuner=62 - Philips TEA5767HN FM Radio
  tuner=63 - Philips FMD1216ME MK3 Hybrid Tuner
+tuner=64 - LG TDVS-H062F/TUA6034
+tuner=65 - Ymec TVF66T5-B/DFF
diff --git a/Documentation/video4linux/bttv/Insmod-options b/Documentation/video4linux/bttv/Insmod-options

index 7bb5a50b07796f365007d86702e0bc0a56e7368b..fc94ff235ffac51f1f0079bbe673f47c3a460632 100644 (file)
--- a/Documentation/video4linux/bttv/Insmod-options
+++ b/Documentation/video4linux/bttv/Insmod-options
@@ -44,6 +44,9 @@ bttv.o
                                 push used by bttv.  bttv will disable overlay
                                 by default on this hardware to avoid crashes.
                                 With this insmod option you can override this.
+               no_overlay=1    Disable overlay. It should be used by broken
+                               hardware that doesn't support PCI2PCI direct
+                               transfers.
                 automute=0/1    Automatically mutes the sound if there is
                                 no TV signal, on by default.  You might try
                                 to disable this if you have bad input signal
diff --git a/Documentation/x86_64/boot-options.txt b/Documentation/x86_64/boot-options.txt

index 476c0c22fbb7e43788b543380c0aaf82f9c7dba3..678e8f192db2917c741ca0b88ddc97f761a4a8d7 100644 (file)
--- a/Documentation/x86_64/boot-options.txt
+++ b/Documentation/x86_64/boot-options.txt
@@ -6,6 +6,11 @@ only the AMD64 specific ones are listed here.
  Machine check
  
     mce=off disable machine check
+   mce=bootlog Enable logging of machine checks left over from booting.
+               Disabled by default because some BIOS leave bogus ones.
+               If your BIOS doesn't do that it's a good idea to enable though
+               to make sure you log even machine check events that result
+               in a reboot.
  
     nomce (for compatibility with i386): same as mce=off
  
diff --git a/MAINTAINERS b/MAINTAINERS

index ec8433c39deee71023cfd130f739bd62d8e7faff..564a03e61a0c4b2322136ab9d96b3f121391ee83 100644 (file)
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -784,7 +784,7 @@ DVB SUBSYSTEM AND DRIVERS
  P:     LinuxTV.org Project
  M:     linux-dvb-maintainer@linuxtv.org
  L:     linux-dvb@linuxtv.org (subscription required)
-W:     http://linuxtv.org/developer/dvb.xml
+W:     http://linuxtv.org/
  S:     Supported
  
  EATA-DMA SCSI DRIVER
@@ -1521,6 +1521,12 @@ P:       Zach Brown
  M:     zab@zabbo.net
  S:     Odd Fixes
  
+MAN-PAGES: MANUAL PAGES FOR LINUX -- Sections 2, 3, 4, 5, and 7
+P: Michael Kerrisk
+M: mtk-manpages@gmx.net
+W: ftp://ftp.kernel.org/pub/linux/docs/manpages
+S: Maintained
+
  MARVELL MV64340 ETHERNET DRIVER
  P:     Manish Lachwani
  M:     Manish_Lachwani@pmc-sierra.com
@@ -1652,7 +1658,7 @@ M:        kuznet@ms2.inr.ac.ru
  P:     Pekka Savola (ipv6)
  M:     pekkas@netcore.fi
  P:     James Morris
-M:     jmorris@redhat.com
+M:     jmorris@namei.org
  P:     Hideaki YOSHIFUJI
  M:     yoshfuji@linux-ipv6.org
  P:     Patrick McHardy
@@ -1733,7 +1739,7 @@ S:        Maintained
  
  OPL3-SA2, SA3, and SAx DRIVER
  P:     Zwane Mwaikambo
-M:     zwane@commfireservices.com
+M:     zwane@arm.linux.org.uk
  L:     linux-sound@vger.kernel.org
  S:     Maintained
  
@@ -1819,6 +1825,12 @@ P:       Greg Kroah-Hartman
  M:     greg@kroah.com
  S:     Maintained
  
+PCIE HOTPLUG DRIVER
+P:     Kristen Carlson Accardi
+M:     kristen.c.accardi@intel.com
+L:     pcihpd-discuss@lists.sourceforge.net
+S:     Maintained
+
  PCMCIA SUBSYSTEM
  P:     Linux PCMCIA Team
  L:     http://lists.infradead.org/mailman/listinfo/linux-pcmcia
@@ -1983,7 +1995,7 @@ S:        Maintained
  
  SC1200 WDT DRIVER
  P:     Zwane Mwaikambo
-M:     zwane@commfireservices.com
+M:     zwane@arm.linux.org.uk
  S:     Maintained
  
  SCHEDULER
@@ -2041,7 +2053,7 @@ SELINUX SECURITY MODULE
  P:     Stephen Smalley
  M:     sds@epoch.ncsc.mil
  P:     James Morris
-M:     jmorris@redhat.com
+M:     jmorris@namei.org
  L:     linux-kernel@vger.kernel.org (kernel issues)
  L:     selinux@tycho.nsa.gov (general discussion)
  W:     http://www.nsa.gov/selinux
@@ -2195,6 +2207,12 @@ W:       http://projects.buici.com/arm
  L:     linux-arm-kernel@lists.arm.linux.org.uk (subscribers-only)
  S:     Maintained
  
+SHPC HOTPLUG DRIVER
+P:     Kristen Carlson Accardi
+M:     kristen.c.accardi@intel.com
+L:     pcihpd-discuss@lists.sourceforge.net
+S:     Maintained
+
  SPARC (sparc32):
  P:     William L. Irwin
  M:     wli@holomorphy.com
diff --git a/Makefile b/Makefile

index 717b9b9192d5f535da43fbc658fed341cbde4c6c..3d84df581cf23c287cff7a816c79afd3ec049c42 100644 (file)
--- a/Makefile
+++ b/Makefile
@@ -1,8 +1,8 @@
  VERSION = 2
  PATCHLEVEL = 6
  SUBLEVEL = 13
-EXTRAVERSION =-rc4
-NAME=Woozy Numbat
+EXTRAVERSION =
+NAME=Affluent Albatross
  
  # *DOCUMENTATION*
  # To see a list of typical targets execute "make help"
diff --git a/REPORTING-BUGS b/REPORTING-BUGS

index 2045eaea2d9e13d199d53d795260e5650baf1e7e..224c34741d32d139aec5d5ff110cfd5398565f58 100644 (file)
--- a/REPORTING-BUGS
+++ b/REPORTING-BUGS
@@ -41,18 +41,19 @@ summary from [1.]>" for easy identification by the developers
  [2.] Full description of the problem/report:
  [3.] Keywords (i.e., modules, networking, kernel):
  [4.] Kernel version (from /proc/version):
-[5.] Output of Oops.. message (if applicable) with symbolic information 
+[5.] Most recent kernel version which did not have the bug:
+[6.] Output of Oops.. message (if applicable) with symbolic information
       resolved (see Documentation/oops-tracing.txt)
-[6.] A small shell script or example program which triggers the
+[7.] A small shell script or example program which triggers the
       problem (if possible)
-[7.] Environment
-[7.1.] Software (add the output of the ver_linux script here)
-[7.2.] Processor information (from /proc/cpuinfo):
-[7.3.] Module information (from /proc/modules):
-[7.4.] Loaded driver and hardware information (/proc/ioports, /proc/iomem)
-[7.5.] PCI information ('lspci -vvv' as root)
-[7.6.] SCSI information (from /proc/scsi/scsi)
-[7.7.] Other information that might be relevant to the problem
+[8.] Environment
+[8.1.] Software (add the output of the ver_linux script here)
+[8.2.] Processor information (from /proc/cpuinfo):
+[8.3.] Module information (from /proc/modules):
+[8.4.] Loaded driver and hardware information (/proc/ioports, /proc/iomem)
+[8.5.] PCI information ('lspci -vvv' as root)
+[8.6.] SCSI information (from /proc/scsi/scsi)
+[8.7.] Other information that might be relevant to the problem
         (please look in /proc and include all information that you
         think to be relevant):
  [X.] Other notes, patches, fixes, workarounds:
diff --git a/arch/alpha/Kconfig b/arch/alpha/Kconfig

index 083c5df42d35bea201f08e6875fe038b1596d44c..189d5eababa8d15708e15e354e0eb372d5e7fad1 100644 (file)
--- a/arch/alpha/Kconfig
+++ b/arch/alpha/Kconfig
@@ -522,7 +522,7 @@ source "mm/Kconfig"
  
  config NUMA
         bool "NUMA Support (EXPERIMENTAL)"
-       depends on DISCONTIGMEM
+       depends on DISCONTIGMEM && BROKEN
         help
           Say Y to compile the kernel to support NUMA (Non-Uniform Memory
           Access).  This option is for configuring high-end multiprocessor
diff --git a/arch/alpha/kernel/pci.c b/arch/alpha/kernel/pci.c

index 1f36bbd0ed5db64e88005d79be5c08d6be384998..2a8b364c822e9f0e17c9ead5352ab84ad9f1ace6 100644 (file)
--- a/arch/alpha/kernel/pci.c
+++ b/arch/alpha/kernel/pci.c
@@ -350,8 +350,24 @@ pcibios_resource_to_bus(struct pci_dev *dev, struct pci_bus_region *region,
         region->end = res->end - offset;
  }
  
+void pcibios_bus_to_resource(struct pci_dev *dev, struct resource *res,
+                            struct pci_bus_region *region)
+{
+       struct pci_controller *hose = (struct pci_controller *)dev->sysdata;
+       unsigned long offset = 0;
+
+       if (res->flags & IORESOURCE_IO)
+               offset = hose->io_space->start;
+       else if (res->flags & IORESOURCE_MEM)
+               offset = hose->mem_space->start;
+
+       res->start = region->start + offset;
+       res->end = region->end + offset;
+}
+
  #ifdef CONFIG_HOTPLUG
  EXPORT_SYMBOL(pcibios_resource_to_bus);
+EXPORT_SYMBOL(pcibios_bus_to_resource);
  #endif
  
  int
diff --git a/arch/alpha/kernel/signal.c b/arch/alpha/kernel/signal.c

index 08fe8071a7f8fde380e1fa7338fef07ab8925ef5..2e45e8604e325454e87681321c862e57b19b1355 100644 (file)
--- a/arch/alpha/kernel/signal.c
+++ b/arch/alpha/kernel/signal.c
@@ -566,13 +566,12 @@ handle_signal(int sig, struct k_sigaction *ka, siginfo_t *info,
         if (ka->sa.sa_flags & SA_RESETHAND)
                 ka->sa.sa_handler = SIG_DFL;
  
-       if (!(ka->sa.sa_flags & SA_NODEFER)) {
-               spin_lock_irq(&current->sighand->siglock);
-               sigorsets(&current->blocked,&current->blocked,&ka->sa.sa_mask);
+       spin_lock_irq(&current->sighand->siglock);
+       sigorsets(&current->blocked,&current->blocked,&ka->sa.sa_mask);
+       if (!(ka->sa.sa_flags & SA_NODEFER)) 
                 sigaddset(&current->blocked,sig);
-               recalc_sigpending();
-               spin_unlock_irq(&current->sighand->siglock);
-       }
+       recalc_sigpending();
+       spin_unlock_irq(&current->sighand->siglock);
  }
  
  static inline void
diff --git a/arch/alpha/kernel/smp.c b/arch/alpha/kernel/smp.c

index 8f1e78551b1e39b669bf696284802ad216f2d065..e211aa7404e6152c4668277fdc03872547d8fab1 100644 (file)
--- a/arch/alpha/kernel/smp.c
+++ b/arch/alpha/kernel/smp.c
@@ -1036,7 +1036,7 @@ debug_spin_lock(spinlock_t * lock, const char *base_file, int line_no)
         "       br      1b\n"
         ".previous"
         : "=r" (tmp), "=m" (lock->lock), "=r" (stuck)
-       : "1" (lock->lock), "2" (stuck) : "memory");
+       : "m" (lock->lock), "2" (stuck) : "memory");
  
         if (stuck < 0) {
                 printk(KERN_WARNING
@@ -1115,7 +1115,7 @@ void _raw_write_lock(rwlock_t * lock)
         ".previous"
         : "=m" (*(volatile int *)lock), "=&r" (regx), "=&r" (regy),
           "=&r" (stuck_lock), "=&r" (stuck_reader)
-       : "0" (*(volatile int *)lock), "3" (stuck_lock), "4" (stuck_reader) : "memory");
+       : "m" (*(volatile int *)lock), "3" (stuck_lock), "4" (stuck_reader) : "memory");
  
         if (stuck_lock < 0) {
                 printk(KERN_WARNING "write_lock stuck at %p\n", inline_pc);
@@ -1153,7 +1153,7 @@ void _raw_read_lock(rwlock_t * lock)
         "       br      1b\n"
         ".previous"
         : "=m" (*(volatile int *)lock), "=&r" (regx), "=&r" (stuck_lock)
-       : "0" (*(volatile int *)lock), "2" (stuck_lock) : "memory");
+       : "m" (*(volatile int *)lock), "2" (stuck_lock) : "memory");
  
         if (stuck_lock < 0) {
                 printk(KERN_WARNING "read_lock stuck at %p\n", inline_pc);
diff --git a/arch/alpha/oprofile/common.c b/arch/alpha/oprofile/common.c

index 908eb4af8decfe263e0908297256e7bfec5e73b3..ba788cfdc3c6cb00a31f31d82cd1f545420ff751 100644 (file)
--- a/arch/alpha/oprofile/common.c
+++ b/arch/alpha/oprofile/common.c
@@ -65,7 +65,7 @@ op_axp_setup(void)
         model->reg_setup(&reg, ctr, &sys);
  
         /* Configure the registers on all cpus.  */
-       smp_call_function(model->cpu_setup, &reg, 0, 1);
+       (void)smp_call_function(model->cpu_setup, &reg, 0, 1);
         model->cpu_setup(&reg);
         return 0;
  }
@@ -86,7 +86,7 @@ op_axp_cpu_start(void *dummy)
  static int
  op_axp_start(void)
  {
-       smp_call_function(op_axp_cpu_start, NULL, 0, 1);
+       (void)smp_call_function(op_axp_cpu_start, NULL, 0, 1);
         op_axp_cpu_start(NULL);
         return 0;
  }
@@ -101,7 +101,7 @@ op_axp_cpu_stop(void *dummy)
  static void
  op_axp_stop(void)
  {
-       smp_call_function(op_axp_cpu_stop, NULL, 0, 1);
+       (void)smp_call_function(op_axp_cpu_stop, NULL, 0, 1);
         op_axp_cpu_stop(NULL);
  }
  
diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig

index 7bc4a583f4e101a01bfdc92707bf2f25f5048b61..4bf0e8737e1fd514477ec4009ba4e908f189b9a9 100644 (file)
--- a/arch/arm/Kconfig
+++ b/arch/arm/Kconfig
@@ -310,7 +310,7 @@ menu "Kernel Features"
  
  config SMP
         bool "Symmetric Multi-Processing (EXPERIMENTAL)"
-       depends on EXPERIMENTAL #&& n
+       depends on EXPERIMENTAL && BROKEN #&& n
         help
           This enables support for systems with more than one CPU. If you have
           a system with only one CPU, like most personal computers, say N. If
@@ -635,10 +635,6 @@ config PM
           and the Battery Powered Linux mini-HOWTO, available from
           <http://www.tldp.org/docs.html#howto>.
  
-         Note that, even if you say N here, Linux on the x86 architecture
-         will issue the hlt instruction if nothing is to be done, thereby
-         sending the processor to sleep and saving power.
-
  config APM
         tristate "Advanced Power Management Emulation"
         depends on PM
@@ -650,12 +646,6 @@ config APM
           battery status information, and user-space programs will receive
           notification of APM "events" (e.g. battery status change).
  
-         If you select "Y" here, you can disable actual use of the APM
-         BIOS by passing the "apm=off" option to the kernel at boot time.
-
-         Note that the APM support is almost completely disabled for
-         machines with more than one CPU.
-
           In order to use APM, you will need supporting software. For location
           and more information, read <file:Documentation/pm.txt> and the
           Battery Powered Linux mini-HOWTO, available from
@@ -665,39 +655,12 @@ config APM
           manpage ("man 8 hdparm") for that), and it doesn't turn off
           VESA-compliant "green" monitors.
  
-         This driver does not support the TI 4000M TravelMate and the ACER
-         486/DX4/75 because they don't have compliant BIOSes. Many "green"
-         desktop machines also don't have compliant BIOSes, and this driver
-         may cause those machines to panic during the boot phase.
-
           Generally, if you don't have a battery in your machine, there isn't
           much point in using this driver and you should say N. If you get
           random kernel OOPSes or reboots that don't seem to be related to
           anything, try disabling/enabling this option (or disabling/enabling
           APM in your BIOS).
  
-         Some other things you should try when experiencing seemingly random,
-         "weird" problems:
-
-         1) make sure that you have enough swap space and that it is
-         enabled.
-         2) pass the "no-hlt" option to the kernel
-         3) switch on floating point emulation in the kernel and pass
-         the "no387" option to the kernel
-         4) pass the "floppy=nodma" option to the kernel
-         5) pass the "mem=4M" option to the kernel (thereby disabling
-         all but the first 4 MB of RAM)
-         6) make sure that the CPU is not over clocked.
-         7) read the sig11 FAQ at <http://www.bitwizard.nl/sig11/>
-         8) disable the cache from your BIOS settings
-         9) install a fan for the video card or exchange video RAM
-         10) install a better fan for the CPU
-         11) exchange RAM chips
-         12) exchange the motherboard.
-
-         To compile this driver as a module, choose M here: the
-         module will be called apm.
-
  endmenu
  
  source "net/Kconfig"
@@ -752,6 +715,8 @@ source "drivers/hwmon/Kconfig"
  
  source "drivers/misc/Kconfig"
  
+source "drivers/mfd/Kconfig"
+
  source "drivers/media/Kconfig"
  
  source "drivers/video/Kconfig"
diff --git a/arch/arm/common/Kconfig b/arch/arm/common/Kconfig

index 692af6b5e8ff259d1455e816edf92ec186229a7a..666ba393575b8d7800b5b8a14d4e5ce1466932bc 100644 (file)
--- a/arch/arm/common/Kconfig
+++ b/arch/arm/common/Kconfig
@@ -1,6 +1,9 @@
  config ICST525
         bool
  
+config ARM_GIC
+       bool
+
  config ICST307
         bool
  
diff --git a/arch/arm/common/Makefile b/arch/arm/common/Makefile

index 11f20a43ee3aa81c8c605512b2d5398361478026..a87886564b19e0b9c33798cdae6947114e0e5947 100644 (file)
--- a/arch/arm/common/Makefile
+++ b/arch/arm/common/Makefile
@@ -4,6 +4,7 @@
  
  obj-y                          += rtctime.o
  obj-$(CONFIG_ARM_AMBA)         += amba.o
+obj-$(CONFIG_ARM_GIC)          += gic.o
  obj-$(CONFIG_ICST525)          += icst525.o
  obj-$(CONFIG_ICST307)          += icst307.o
  obj-$(CONFIG_SA1111)           += sa1111.o
diff --git a/arch/arm/common/gic.c b/arch/arm/common/gic.c

new file mode 100644 (file)

index 0000000..51dbf54
--- /dev/null
+++ b/arch/arm/common/gic.c
@@ -0,0 +1,166 @@
+/*
+ *  linux/arch/arm/common/gic.c
+ *
+ *  Copyright (C) 2002 ARM Limited, All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * Interrupt architecture for the GIC:
+ *
+ * o There is one Interrupt Distributor, which receives interrupts
+ *   from system devices and sends them to the Interrupt Controllers.
+ *
+ * o There is one CPU Interface per CPU, which sends interrupts sent
+ *   by the Distributor, and interrupts generated locally, to the
+ *   associated CPU.
+ *
+ * Note that IRQs 0-31 are special - they are local to each CPU.
+ * As such, the enable set/clear, pending set/clear and active bit
+ * registers are banked per-cpu for these sources.
+ */
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/list.h>
+#include <linux/smp.h>
+
+#include <asm/irq.h>
+#include <asm/io.h>
+#include <asm/mach/irq.h>
+#include <asm/hardware/gic.h>
+
+static void __iomem *gic_dist_base;
+static void __iomem *gic_cpu_base;
+
+/*
+ * Routines to acknowledge, disable and enable interrupts
+ *
+ * Linux assumes that when we're done with an interrupt we need to
+ * unmask it, in the same way we need to unmask an interrupt when
+ * we first enable it.
+ *
+ * The GIC has a seperate notion of "end of interrupt" to re-enable
+ * an interrupt after handling, in order to support hardware
+ * prioritisation.
+ *
+ * We can make the GIC behave in the way that Linux expects by making
+ * our "acknowledge" routine disable the interrupt, then mark it as
+ * complete.
+ */
+static void gic_ack_irq(unsigned int irq)
+{
+       u32 mask = 1 << (irq % 32);
+       writel(mask, gic_dist_base + GIC_DIST_ENABLE_CLEAR + (irq / 32) * 4);
+       writel(irq, gic_cpu_base + GIC_CPU_EOI);
+}
+
+static void gic_mask_irq(unsigned int irq)
+{
+       u32 mask = 1 << (irq % 32);
+       writel(mask, gic_dist_base + GIC_DIST_ENABLE_CLEAR + (irq / 32) * 4);
+}
+
+static void gic_unmask_irq(unsigned int irq)
+{
+       u32 mask = 1 << (irq % 32);
+       writel(mask, gic_dist_base + GIC_DIST_ENABLE_SET + (irq / 32) * 4);
+}
+
+static void gic_set_cpu(struct irqdesc *desc, unsigned int irq, unsigned int cpu)
+{
+       void __iomem *reg = gic_dist_base + GIC_DIST_TARGET + (irq & ~3);
+       unsigned int shift = (irq % 4) * 8;
+       u32 val;
+
+       val = readl(reg) & ~(0xff << shift);
+       val |= 1 << (cpu + shift);
+       writel(val, reg);
+}
+
+static struct irqchip gic_chip = {
+       .ack            = gic_ack_irq,
+       .mask           = gic_mask_irq,
+       .unmask         = gic_unmask_irq,
+#ifdef CONFIG_SMP
+       .set_cpu        = gic_set_cpu,
+#endif
+};
+
+void __init gic_dist_init(void __iomem *base)
+{
+       unsigned int max_irq, i;
+       u32 cpumask = 1 << smp_processor_id();
+
+       cpumask |= cpumask << 8;
+       cpumask |= cpumask << 16;
+
+       gic_dist_base = base;
+
+       writel(0, base + GIC_DIST_CTRL);
+
+       /*
+        * Find out how many interrupts are supported.
+        */
+       max_irq = readl(base + GIC_DIST_CTR) & 0x1f;
+       max_irq = (max_irq + 1) * 32;
+
+       /*
+        * The GIC only supports up to 1020 interrupt sources.
+        * Limit this to either the architected maximum, or the
+        * platform maximum.
+        */
+       if (max_irq > max(1020, NR_IRQS))
+               max_irq = max(1020, NR_IRQS);
+
+       /*
+        * Set all global interrupts to be level triggered, active low.
+        */
+       for (i = 32; i < max_irq; i += 16)
+               writel(0, base + GIC_DIST_CONFIG + i * 4 / 16);
+
+       /*
+        * Set all global interrupts to this CPU only.
+        */
+       for (i = 32; i < max_irq; i += 4)
+               writel(cpumask, base + GIC_DIST_TARGET + i * 4 / 4);
+
+       /*
+        * Set priority on all interrupts.
+        */
+       for (i = 0; i < max_irq; i += 4)
+               writel(0xa0a0a0a0, base + GIC_DIST_PRI + i * 4 / 4);
+
+       /*
+        * Disable all interrupts.
+        */
+       for (i = 0; i < max_irq; i += 32)
+               writel(0xffffffff, base + GIC_DIST_ENABLE_CLEAR + i * 4 / 32);
+
+       /*
+        * Setup the Linux IRQ subsystem.
+        */
+       for (i = 29; i < max_irq; i++) {
+               set_irq_chip(i, &gic_chip);
+               set_irq_handler(i, do_level_IRQ);
+               set_irq_flags(i, IRQF_VALID | IRQF_PROBE);
+       }
+
+       writel(1, base + GIC_DIST_CTRL);
+}
+
+void __cpuinit gic_cpu_init(void __iomem *base)
+{
+       gic_cpu_base = base;
+       writel(0xf0, base + GIC_CPU_PRIMASK);
+       writel(1, base + GIC_CPU_CTRL);
+}
+
+#ifdef CONFIG_SMP
+void gic_raise_softirq(cpumask_t cpumask, unsigned int irq)
+{
+       unsigned long map = *cpus_addr(cpumask);
+
+       writel(map << 16 | irq, gic_dist_base + GIC_DIST_SOFTINT);
+}
+#endif
diff --git a/arch/arm/kernel/bios32.c b/arch/arm/kernel/bios32.c

index ad26e98f1e62343c5f231c5358319c9342bd39cf..c4923fac8dff56bf9a6232b4554c643668fafe7b 100644 (file)
--- a/arch/arm/kernel/bios32.c
+++ b/arch/arm/kernel/bios32.c
@@ -447,9 +447,26 @@ pcibios_resource_to_bus(struct pci_dev *dev, struct pci_bus_region *region,
         region->end   = res->end - offset;
  }
  
+void __devinit
+pcibios_bus_to_resource(struct pci_dev *dev, struct resource *res,
+                       struct pci_bus_region *region)
+{
+       struct pci_sys_data *root = dev->sysdata;
+       unsigned long offset = 0;
+
+       if (res->flags & IORESOURCE_IO)
+               offset = root->io_offset;
+       if (res->flags & IORESOURCE_MEM)
+               offset = root->mem_offset;
+
+       res->start = region->start + offset;
+       res->end   = region->end + offset;
+}
+
  #ifdef CONFIG_HOTPLUG
  EXPORT_SYMBOL(pcibios_fixup_bus);
  EXPORT_SYMBOL(pcibios_resource_to_bus);
+EXPORT_SYMBOL(pcibios_bus_to_resource);
  #endif
  
  /*
diff --git a/arch/arm/kernel/calls.S b/arch/arm/kernel/calls.S

index e5d370c235d747575cf25a6f955c277c92df3957..2b6b4c786e654c125cfa62b750337bc5894b9731 100644 (file)
--- a/arch/arm/kernel/calls.S
+++ b/arch/arm/kernel/calls.S
@@ -327,6 +327,12 @@ __syscall_start:
  /* 310 */      .long   sys_request_key
                 .long   sys_keyctl
                 .long   sys_semtimedop
+/* vserver */  .long   sys_ni_syscall
+               .long   sys_ioprio_set
+/* 315 */      .long   sys_ioprio_get
+               .long   sys_inotify_init
+               .long   sys_inotify_add_watch
+               .long   sys_inotify_rm_watch
  __syscall_end:
  
                 .rept   NR_syscalls - (__syscall_end - __syscall_start) / 4
diff --git a/arch/arm/kernel/entry-armv.S b/arch/arm/kernel/entry-armv.S

index 39a6c1b0b9a32db8f578bab2d9f4156acc5d4054..7152bfbee581ea4fa83769bd323564a6249782f7 100644 (file)
--- a/arch/arm/kernel/entry-armv.S
+++ b/arch/arm/kernel/entry-armv.S
@@ -533,6 +533,13 @@ ENTRY(__switch_to)
         ldr     r3, [r2, #TI_TP_VALUE]
         stmia   ip!, {r4 - sl, fp, sp, lr}      @ Store most regs on stack
         ldr     r6, [r2, #TI_CPU_DOMAIN]!
+#if __LINUX_ARM_ARCH__ >= 6
+#ifdef CONFIG_CPU_MPCORE
+       clrex
+#else
+       strex   r3, r4, [ip]                    @ Clear exclusive monitor
+#endif
+#endif
  #if defined(CONFIG_CPU_XSCALE) && !defined(CONFIG_IWMMXT)
         mra     r4, r5, acc0
         stmia   ip, {r4, r5}
diff --git a/arch/arm/kernel/signal.c b/arch/arm/kernel/signal.c

index 5e435e42dacdef45d87173527d60759aa7af083f..a94d75fef598b90bc1a65d32dc017bfa184c0059 100644 (file)
--- a/arch/arm/kernel/signal.c
+++ b/arch/arm/kernel/signal.c
@@ -658,11 +658,12 @@ handle_signal(unsigned long sig, struct k_sigaction *ka,
         /*
          * Block the signal if we were unsuccessful.
          */
-       if (ret != 0 || !(ka->sa.sa_flags & SA_NODEFER)) {
+       if (ret != 0) {
                 spin_lock_irq(&tsk->sighand->siglock);
                 sigorsets(&tsk->blocked, &tsk->blocked,
                           &ka->sa.sa_mask);
-               sigaddset(&tsk->blocked, sig);
+               if (!(ka->sa.sa_flags & SA_NODEFER))
+                       sigaddset(&tsk->blocked, sig);
                 recalc_sigpending();
                 spin_unlock_irq(&tsk->sighand->siglock);
         }
diff --git a/arch/arm/kernel/traps.c b/arch/arm/kernel/traps.c

index d571c37ac30c1f0b16fd8e81f5542aa35995d4d4..4554c961251c5871e0a05a2c6a74d1f660d0af73 100644 (file)
--- a/arch/arm/kernel/traps.c
+++ b/arch/arm/kernel/traps.c
@@ -617,7 +617,7 @@ baddataabort(int code, unsigned long instr, struct pt_regs *regs)
         notify_die("unknown data abort code", regs, &info, instr, 0);
  }
  
-volatile void __bug(const char *file, int line, void *data)
+void __attribute__((noreturn)) __bug(const char *file, int line, void *data)
  {
         printk(KERN_CRIT"kernel BUG at %s:%d!", file, line);
         if (data)
diff --git a/arch/arm/lib/bitops.h b/arch/arm/lib/bitops.h

index 2036ff15bda9b70e0c5f8b6f79538008ebe393e4..64a988c1ad447739ccf7a67d4b4c0ebdb7e3f6d2 100644 (file)
--- a/arch/arm/lib/bitops.h
+++ b/arch/arm/lib/bitops.h
@@ -1,4 +1,6 @@
-#if __LINUX_ARM_ARCH__ >= 6
+#include <linux/config.h>
+
+#if __LINUX_ARM_ARCH__ >= 6 && defined(CONFIG_CPU_MPCORE)
         .macro  bitop, instr
         mov     r2, #1
         and     r3, r0, #7              @ Get bit offset
diff --git a/arch/arm/mach-ixp4xx/coyote-setup.c b/arch/arm/mach-ixp4xx/coyote-setup.c

index 4ff4393ef0ea681fcd0eca14fdae6bee00f03e35..411ea999619055a7b0e73bd3678ecd9ad8806989 100644 (file)
--- a/arch/arm/mach-ixp4xx/coyote-setup.c
+++ b/arch/arm/mach-ixp4xx/coyote-setup.c
@@ -36,7 +36,7 @@ static struct flash_platform_data coyote_flash_data = {
  
  static struct resource coyote_flash_resource = {
         .start          = COYOTE_FLASH_BASE,
-       .end            = COYOTE_FLASH_BASE + COYOTE_FLASH_SIZE,
+       .end            = COYOTE_FLASH_BASE + COYOTE_FLASH_SIZE - 1,
         .flags          = IORESOURCE_MEM,
  };
  
@@ -61,7 +61,7 @@ static struct plat_serial8250_port coyote_uart_data[] = {
                 .mapbase        = IXP4XX_UART2_BASE_PHYS,
                 .membase        = (char *)IXP4XX_UART2_BASE_VIRT + REG_OFFSET,
                 .irq            = IRQ_IXP4XX_UART2,
-               .flags          = UPF_BOOT_AUTOCONF,
+               .flags          = UPF_BOOT_AUTOCONF | UPF_SKIP_TEST,
                 .iotype         = UPIO_MEM,
                 .regshift       = 2,
                 .uartclk        = IXP4XX_UART_XTAL,
diff --git a/arch/arm/mach-ixp4xx/gtwx5715-setup.c b/arch/arm/mach-ixp4xx/gtwx5715-setup.c

index 8ba1cd9406e702fbc66f1be2dd9124599d6c1630..333459d6aa464bb5394fd46783e9b6690b6ed4aa 100644 (file)
--- a/arch/arm/mach-ixp4xx/gtwx5715-setup.c
+++ b/arch/arm/mach-ixp4xx/gtwx5715-setup.c
@@ -83,7 +83,7 @@ static struct plat_serial8250_port gtwx5715_uart_platform_data[] = {
         .mapbase        = IXP4XX_UART2_BASE_PHYS,
         .membase        = (char *)IXP4XX_UART2_BASE_VIRT + REG_OFFSET,
         .irq            = IRQ_IXP4XX_UART2,
-       .flags          = UPF_BOOT_AUTOCONF,
+       .flags          = UPF_BOOT_AUTOCONF | UPF_SKIP_TEST,
         .iotype         = UPIO_MEM,
         .regshift       = 2,
         .uartclk        = IXP4XX_UART_XTAL,
@@ -114,7 +114,7 @@ static struct flash_platform_data gtwx5715_flash_data = {
  
  static struct resource gtwx5715_flash_resource = {
         .start          = GTWX5715_FLASH_BASE,
-       .end            = GTWX5715_FLASH_BASE + GTWX5715_FLASH_SIZE,
+       .end            = GTWX5715_FLASH_BASE + GTWX5715_FLASH_SIZE - 1,
         .flags          = IORESOURCE_MEM,
  };
  
diff --git a/arch/arm/mach-ixp4xx/ixdp425-setup.c b/arch/arm/mach-ixp4xx/ixdp425-setup.c

index c2ba759e994611116101bb6719ed84d6bc069227..fa0646c8693b096c7c1ee6b6a16dc3074bff2679 100644 (file)
--- a/arch/arm/mach-ixp4xx/ixdp425-setup.c
+++ b/arch/arm/mach-ixp4xx/ixdp425-setup.c
@@ -36,7 +36,7 @@ static struct flash_platform_data ixdp425_flash_data = {
  
  static struct resource ixdp425_flash_resource = {
         .start          = IXDP425_FLASH_BASE,
-       .end            = IXDP425_FLASH_BASE + IXDP425_FLASH_SIZE,
+       .end            = IXDP425_FLASH_BASE + IXDP425_FLASH_SIZE - 1,
         .flags          = IORESOURCE_MEM,
  };
  
@@ -82,7 +82,7 @@ static struct plat_serial8250_port ixdp425_uart_data[] = {
                 .mapbase        = IXP4XX_UART1_BASE_PHYS,
                 .membase        = (char *)IXP4XX_UART1_BASE_VIRT + REG_OFFSET,
                 .irq            = IRQ_IXP4XX_UART1,
-               .flags          = UPF_BOOT_AUTOCONF,
+               .flags          = UPF_BOOT_AUTOCONF | UPF_SKIP_TEST,
                 .iotype         = UPIO_MEM,
                 .regshift       = 2,
                 .uartclk        = IXP4XX_UART_XTAL,
@@ -91,7 +91,7 @@ static struct plat_serial8250_port ixdp425_uart_data[] = {
                 .mapbase        = IXP4XX_UART2_BASE_PHYS,
                 .membase        = (char *)IXP4XX_UART2_BASE_VIRT + REG_OFFSET,
                 .irq            = IRQ_IXP4XX_UART1,
-               .flags          = UPF_BOOT_AUTOCONF,
+               .flags          = UPF_BOOT_AUTOCONF | UPF_SKIP_TEST,
                 .iotype         = UPIO_MEM,
                 .regshift       = 2,
                 .uartclk        = IXP4XX_UART_XTAL,
diff --git a/arch/arm/mach-s3c2410/mach-bast.c b/arch/arm/mach-s3c2410/mach-bast.c

index 1e7f343822d0c5c111d5988bcb0eeeb4126ad7bc..e9182242da95be91ebe9dfc5f389c0a3318d8a5e 100644 (file)
--- a/arch/arm/mach-s3c2410/mach-bast.c
+++ b/arch/arm/mach-s3c2410/mach-bast.c
@@ -30,6 +30,7 @@
   *     28-Jun-2005 BJD  Moved pm functionality out to common code
   *     17-Jul-2005 BJD  Changed to platform device for SuperIO 16550s
   *     25-Jul-2005 BJD  Removed ASIX static mappings
+ *     27-Jul-2005 BJD  Ensure maximum frequency of i2c bus
  */
  
  #include <linux/kernel.h>
@@ -60,6 +61,7 @@
  #include <asm/arch/regs-mem.h>
  #include <asm/arch/regs-lcd.h>
  #include <asm/arch/nand.h>
+#include <asm/arch/iic.h>
  
  #include <linux/mtd/mtd.h>
  #include <linux/mtd/nand.h>
@@ -304,7 +306,7 @@ static void bast_nand_select(struct s3c2410_nand_set *set, int slot)
  }
  
  static struct s3c2410_platform_nand bast_nand_info = {
-       .tacls          = 80,
+       .tacls          = 40,
         .twrph0         = 80,
         .twrph1         = 80,
         .nr_sets        = ARRAY_SIZE(bast_nand_sets),
@@ -385,6 +387,17 @@ static struct platform_device bast_sio = {
         },
  };
  
+/* we have devices on the bus which cannot work much over the
+ * standard 100KHz i2c bus frequency
+*/
+
+static struct s3c2410_platform_i2c bast_i2c_info = {
+       .flags          = 0,
+       .slave_addr     = 0x10,
+       .bus_freq       = 100*1000,
+       .max_freq       = 130*1000,
+};
+
  /* Standard BAST devices */
  
  static struct platform_device *bast_devices[] __initdata = {
@@ -431,6 +444,7 @@ void __init bast_map_io(void)
         s3c24xx_uclk.parent  = &s3c24xx_clkout1;
  
         s3c_device_nand.dev.platform_data = &bast_nand_info;
+       s3c_device_i2c.dev.platform_data = &bast_i2c_info;
  
         s3c24xx_init_io(bast_iodesc, ARRAY_SIZE(bast_iodesc));
         s3c24xx_init_clocks(0);
diff --git a/arch/arm/mach-s3c2410/s3c2410.c b/arch/arm/mach-s3c2410/s3c2410.c

index ff2f25409e446baf77b66390dad0928b7d3d773a..0b88993dfd27c514c181f6adc83aeb898aaf28d7 100644 (file)
--- a/arch/arm/mach-s3c2410/s3c2410.c
+++ b/arch/arm/mach-s3c2410/s3c2410.c
@@ -18,6 +18,7 @@
   *     28-Sep-2004 BJD  Updates for new serial port bits
   *     04-Nov-2004 BJD  Updated UART configuration process
   *     10-Jan-2005 BJD  Removed s3c2410_clock_tick_rate
+ *     13-Aug-2005 DA   Removed UART from initial I/O mappings
  */
  
  #include <linux/kernel.h>
@@ -49,10 +50,9 @@ static struct map_desc s3c2410_iodesc[] __initdata = {
         IODESC_ENT(USBHOST),
         IODESC_ENT(CLKPWR),
         IODESC_ENT(LCD),
-       IODESC_ENT(UART),
         IODESC_ENT(TIMER),
         IODESC_ENT(ADC),
-       IODESC_ENT(WATCHDOG)
+       IODESC_ENT(WATCHDOG),
  };
  
  static struct resource s3c_uart0_resource[] = {
diff --git a/arch/arm/mach-s3c2410/usb-simtec.c b/arch/arm/mach-s3c2410/usb-simtec.c

index 7f2b61362976b088a5c8126f4f7915082062b307..f021fd82be52cb7260c91be5c02a8ce0c4d2ef49 100644 (file)
--- a/arch/arm/mach-s3c2410/usb-simtec.c
+++ b/arch/arm/mach-s3c2410/usb-simtec.c
@@ -1,6 +1,6 @@
  /* linux/arch/arm/mach-s3c2410/usb-simtec.c
   *
- * Copyright (c) 2004 Simtec Electronics
+ * Copyright (c) 2004,2005 Simtec Electronics
   *   Ben Dooks <ben@simtec.co.uk>
   *
   * http://www.simtec.co.uk/products/EB2410ITX/
@@ -14,6 +14,8 @@
   * Modifications:
   *     14-Sep-2004 BJD  Created
   *     18-Oct-2004 BJD  Cleanups, and added code to report OC cleared
+ *     09-Aug-2005 BJD  Renamed s3c2410_report_oc to s3c2410_usb_report_oc
+ *     09-Aug-2005 BJD  Ports powered only if both are enabled
  */
  
  #define DEBUG
@@ -47,13 +49,19 @@
   * designed boards.
  */
  
+static unsigned int power_state[2];
+
  static void
  usb_simtec_powercontrol(int port, int to)
  {
         pr_debug("usb_simtec_powercontrol(%d,%d)\n", port, to);
  
-       if (port == 1)
-               s3c2410_gpio_setpin(S3C2410_GPB4, to ? 0:1);
+       power_state[port] = to;
+
+       if (power_state[0] && power_state[1])
+               s3c2410_gpio_setpin(S3C2410_GPB4, 0);
+       else
+               s3c2410_gpio_setpin(S3C2410_GPB4, 1);
  }
  
  static irqreturn_t
@@ -63,10 +71,10 @@ usb_simtec_ocirq(int irq, void *pw, struct pt_regs *regs)
  
         if (s3c2410_gpio_getpin(S3C2410_GPG10) == 0) {
                 pr_debug("usb_simtec: over-current irq (oc detected)\n");
-               s3c2410_report_oc(info, 3);
+               s3c2410_usb_report_oc(info, 3);
         } else {
                 pr_debug("usb_simtec: over-current irq (oc cleared)\n");
-               s3c2410_report_oc(info, 0);
+               s3c2410_usb_report_oc(info, 0);
         }
  
         return IRQ_HANDLED;
diff --git a/arch/arm/mach-sa1100/assabet.c b/arch/arm/mach-sa1100/assabet.c

index 4d4d303ee3a8003bdbd4e5f48a8a04480fadfd1e..24687f511bf53c9bf74ee4d55a332a61a690f162 100644 (file)
--- a/arch/arm/mach-sa1100/assabet.c
+++ b/arch/arm/mach-sa1100/assabet.c
@@ -35,6 +35,7 @@
  #include <asm/mach/map.h>
  #include <asm/mach/serial_sa1100.h>
  #include <asm/arch/assabet.h>
+#include <asm/arch/mcp.h>
  
  #include "generic.h"
  
@@ -198,6 +199,11 @@ static struct irda_platform_data assabet_irda_data = {
         .set_speed      = assabet_irda_set_speed,
  };
  
+static struct mcp_plat_data assabet_mcp_data = {
+       .mccr0          = MCCR0_ADM,
+       .sclk_rate      = 11981000,
+};
+
  static void __init assabet_init(void)
  {
         /*
@@ -246,6 +252,7 @@ static void __init assabet_init(void)
         sa11x0_set_flash_data(&assabet_flash_data, assabet_flash_resources,
                               ARRAY_SIZE(assabet_flash_resources));
         sa11x0_set_irda_data(&assabet_irda_data);
+       sa11x0_set_mcp_data(&assabet_mcp_data);
  }
  
  /*
diff --git a/arch/arm/mach-sa1100/cerf.c b/arch/arm/mach-sa1100/cerf.c

index 0aa918e24c3123c26caa928a7683aa4e5d45bf9d..9484be7dc671a0ada6bb834ea480b48b73ff6863 100644 (file)
--- a/arch/arm/mach-sa1100/cerf.c
+++ b/arch/arm/mach-sa1100/cerf.c
@@ -29,6 +29,7 @@
  #include <asm/mach/serial_sa1100.h>
  
  #include <asm/arch/cerf.h>
+#include <asm/arch/mcp.h>
  #include "generic.h"
  
  static struct resource cerfuart2_resources[] = {
@@ -116,10 +117,16 @@ static void __init cerf_map_io(void)
         GPDR |= CERF_GPIO_CF_RESET;
  }
  
+static struct mcp_plat_data cerf_mcp_data = {
+       .mccr0          = MCCR0_ADM,
+       .sclk_rate      = 11981000,
+};
+
  static void __init cerf_init(void)
  {
         platform_add_devices(cerf_devices, ARRAY_SIZE(cerf_devices));
         sa11x0_set_flash_data(&cerf_flash_data, &cerf_flash_resource, 1);
+       sa11x0_set_mcp_data(&cerf_mcp_data);
  }
  
  MACHINE_START(CERF, "Intrinsyc CerfBoard/CerfCube")
diff --git a/arch/arm/mach-sa1100/generic.c b/arch/arm/mach-sa1100/generic.c

index 95ae217be1bc06a485d29764c1507f6d50eac9a9..3f1e358455e51578ca01f16d6cde1a3141fbfb50 100644 (file)
--- a/arch/arm/mach-sa1100/generic.c
+++ b/arch/arm/mach-sa1100/generic.c
@@ -221,6 +221,11 @@ static struct platform_device sa11x0mcp_device = {
         .resource       = sa11x0mcp_resources,
  };
  
+void sa11x0_set_mcp_data(struct mcp_plat_data *data)
+{
+       sa11x0mcp_device.dev.platform_data = data;
+}
+
  static struct resource sa11x0ssp_resources[] = {
         [0] = {
                 .start  = 0x80070000,
diff --git a/arch/arm/mach-sa1100/generic.h b/arch/arm/mach-sa1100/generic.h

index bfe41da9923e35b0b427058485e05749c454f2a1..279e3afa3c393bd8b25b4f01c47abd00ffb40d57 100644 (file)
--- a/arch/arm/mach-sa1100/generic.h
+++ b/arch/arm/mach-sa1100/generic.h
@@ -34,5 +34,8 @@ struct resource;
  extern void sa11x0_set_flash_data(struct flash_platform_data *flash,
                                   struct resource *res, int nr);
  
+struct sa11x0_ssp_plat_ops;
+extern void sa11x0_set_ssp_data(struct sa11x0_ssp_plat_ops *ops);
+
  struct irda_platform_data;
  void sa11x0_set_irda_data(struct irda_platform_data *irda);
diff --git a/arch/arm/mach-sa1100/jornada720.c b/arch/arm/mach-sa1100/jornada720.c

index eee3cbc5ec4f4d3a7bd9e0fcde4fc74e746c0454..2f497112c96a176c0d72b90095c66ffb1d0b5536 100644 (file)
--- a/arch/arm/mach-sa1100/jornada720.c
+++ b/arch/arm/mach-sa1100/jornada720.c
@@ -97,6 +97,7 @@ static void __init jornada720_map_io(void)
  }
  
  MACHINE_START(JORNADA720, "HP Jornada 720")
+       /* Maintainer: Michael Gernoth <michael@gernoth.net> */
         .phys_ram       = 0xc0000000,
         .phys_io        = 0x80000000,
         .io_pg_offst    = ((0xf8000000) >> 18) & 0xfffc,
diff --git a/arch/arm/mach-sa1100/lart.c b/arch/arm/mach-sa1100/lart.c

index 870b488aeda44323172400c76bfd2e1274fbf9a8..ed6744d480aff0f04e2b1bd6a1f742b1e01d3b23 100644 (file)
--- a/arch/arm/mach-sa1100/lart.c
+++ b/arch/arm/mach-sa1100/lart.c
@@ -13,12 +13,23 @@
  #include <asm/mach/arch.h>
  #include <asm/mach/map.h>
  #include <asm/mach/serial_sa1100.h>
+#include <asm/arch/mcp.h>
  
  #include "generic.h"
  
  
  #warning "include/asm/arch-sa1100/ide.h needs fixing for lart"
  
+static struct mcp_plat_data lart_mcp_data = {
+       .mccr0          = MCCR0_ADM,
+       .sclk_rate      = 11981000,
+};
+
+static void __init lart_init(void)
+{
+       sa11x0_set_mcp_data(&lart_mcp_data);
+}
+
  static struct map_desc lart_io_desc[] __initdata = {
   /* virtual     physical    length      type */
    { 0xe8000000, 0x00000000, 0x00400000, MT_DEVICE }, /* main flash memory */
@@ -47,5 +58,6 @@ MACHINE_START(LART, "LART")
         .boot_params    = 0xc0000100,
         .map_io         = lart_map_io,
         .init_irq       = sa1100_init_irq,
+       .init_machine   = lart_init,
         .timer          = &sa1100_timer,
  MACHINE_END
diff --git a/arch/arm/mach-sa1100/shannon.c b/arch/arm/mach-sa1100/shannon.c

index 43a00359fcdddf9e8d0186702112ae98659f9921..7482288278d96acf17323a5612aa2791ab771d4c 100644 (file)
--- a/arch/arm/mach-sa1100/shannon.c
+++ b/arch/arm/mach-sa1100/shannon.c
@@ -18,6 +18,7 @@
  #include <asm/mach/flash.h>
  #include <asm/mach/map.h>
  #include <asm/mach/serial_sa1100.h>
+#include <asm/arch/mcp.h>
  #include <asm/arch/shannon.h>
  
  #include "generic.h"
@@ -52,9 +53,15 @@ static struct resource shannon_flash_resource = {
         .flags          = IORESOURCE_MEM,
  };
  
+static struct mcp_plat_data shannon_mcp_data = {
+       .mccr0          = MCCR0_ADM,
+       .sclk_rate      = 11981000,
+};
+
  static void __init shannon_init(void)
  {
         sa11x0_set_flash_data(&shannon_flash_data, &shannon_flash_resource, 1);
+       sa11x0_set_mcp_data(&shannon_mcp_data);
  }
  
  static void __init shannon_map_io(void)
diff --git a/arch/arm/mach-sa1100/simpad.c b/arch/arm/mach-sa1100/simpad.c

index 77978586b1268d955e2fa58fdaab475a2408ee0c..07f6d5fd7bb0a0108f149d8323073012dfe0a9d2 100644 (file)
--- a/arch/arm/mach-sa1100/simpad.c
+++ b/arch/arm/mach-sa1100/simpad.c
@@ -23,6 +23,7 @@
  #include <asm/mach/flash.h>
  #include <asm/mach/map.h>
  #include <asm/mach/serial_sa1100.h>
+#include <asm/arch/mcp.h>
  #include <asm/arch/simpad.h>
  
  #include <linux/serial_core.h>
@@ -123,6 +124,11 @@ static struct resource simpad_flash_resources [] = {
         }
  };
  
+static struct mcp_plat_data simpad_mcp_data = {
+       .mccr0          = MCCR0_ADM,
+       .sclk_rate      = 11981000,
+};
+
  
  
  static void __init simpad_map_io(void)
@@ -157,6 +163,7 @@ static void __init simpad_map_io(void)
  
         sa11x0_set_flash_data(&simpad_flash_data, simpad_flash_resources,
                               ARRAY_SIZE(simpad_flash_resources));
+       sa11x0_set_mcp_data(&simpad_mcp_data);
  }
  
  static void simpad_power_off(void)
diff --git a/arch/arm/mm/Kconfig b/arch/arm/mm/Kconfig

index afbbeb6f46582270b834ffa6e8da166a3d1bf014..db5e47dfc303dce4a49b9ca54db0ab1f0e3673af 100644 (file)
--- a/arch/arm/mm/Kconfig
+++ b/arch/arm/mm/Kconfig
@@ -384,7 +384,7 @@ config CPU_DCACHE_DISABLE
  
  config CPU_DCACHE_WRITETHROUGH
         bool "Force write through D-cache"
-       depends on (CPU_ARM920T || CPU_ARM922T || CPU_ARM925T || CPU_ARM926T || CPU_ARM1020) && !CPU_DISABLE_DCACHE
+       depends on (CPU_ARM920T || CPU_ARM922T || CPU_ARM925T || CPU_ARM926T || CPU_ARM1020) && !CPU_DCACHE_DISABLE
         default y if CPU_ARM925T
         help
           Say Y here to use the data cache in writethrough mode. Unless you
diff --git a/arch/arm/mm/fault.c b/arch/arm/mm/fault.c

index 65bfe84b6d672e8989f0d8141b5781093147cf74..0b6c4db44e08275e4ef15ab74923a581f48dc645 100644 (file)
--- a/arch/arm/mm/fault.c
+++ b/arch/arm/mm/fault.c
@@ -238,9 +238,9 @@ do_page_fault(unsigned long addr, unsigned int fsr, struct pt_regs *regs)
         up_read(&mm->mmap_sem);
  
         /*
-        * Handle the "normal" case first
+        * Handle the "normal" case first - VM_FAULT_MAJOR / VM_FAULT_MINOR
          */
-       if (fault > 0)
+       if (fault >= VM_FAULT_MINOR)
                 return 0;
  
         /*
@@ -261,7 +261,7 @@ do_page_fault(unsigned long addr, unsigned int fsr, struct pt_regs *regs)
                 do_exit(SIGKILL);
                 return 0;
  
-       case 0:
+       case VM_FAULT_SIGBUS:
                 /*
                  * We had some memory, but were unable to
                  * successfully fix up this page fault.
diff --git a/arch/arm/mm/mm-armv.c b/arch/arm/mm/mm-armv.c

index e33fe4229d056e9ae098249ceb2b3b022cd87196..3c655c54e23131b10cbf33d3d1fb1fe4a81d52be 100644 (file)
--- a/arch/arm/mm/mm-armv.c
+++ b/arch/arm/mm/mm-armv.c
@@ -383,6 +383,7 @@ static void __init build_mem_type_table(void)
  {
         struct cachepolicy *cp;
         unsigned int cr = get_cr();
+       unsigned int user_pgprot;
         int cpu_arch = cpu_architecture();
         int i;
  
@@ -408,6 +409,9 @@ static void __init build_mem_type_table(void)
                 }
         }
  
+       cp = &cache_policies[cachepolicy];
+       user_pgprot = cp->pte;
+
         /*
          * ARMv6 and above have extended page tables.
          */
@@ -426,11 +430,18 @@ static void __init build_mem_type_table(void)
                 mem_types[MT_MINICLEAN].prot_sect |= PMD_SECT_APX|PMD_SECT_AP_WRITE;
                 mem_types[MT_CACHECLEAN].prot_sect |= PMD_SECT_APX|PMD_SECT_AP_WRITE;
  
+               /*
+                * Mark the device area as "shared device"
+                */
                 mem_types[MT_DEVICE].prot_pte |= L_PTE_BUFFERABLE;
                 mem_types[MT_DEVICE].prot_sect |= PMD_SECT_BUFFERED;
-       }
  
-       cp = &cache_policies[cachepolicy];
+               /*
+                * User pages need to be mapped with the ASID
+                * (iow, non-global)
+                */
+               user_pgprot |= L_PTE_ASID;
+       }
  
         if (cpu_arch >= CPU_ARCH_ARMv5) {
                 mem_types[MT_LOW_VECTORS].prot_pte |= cp->pte & PTE_CACHEABLE;
@@ -448,7 +459,7 @@ static void __init build_mem_type_table(void)
  
         for (i = 0; i < 16; i++) {
                 unsigned long v = pgprot_val(protection_map[i]);
-               v &= (~(PTE_BUFFERABLE|PTE_CACHEABLE)) | cp->pte;
+               v &= (~(PTE_BUFFERABLE|PTE_CACHEABLE)) | user_pgprot;
                 protection_map[i] = __pgprot(v);
         }
  
diff --git a/arch/arm/mm/proc-v6.S b/arch/arm/mm/proc-v6.S

index 352db98ee2697f7f985973945f5d5fc2909d3263..139a38670c5d07d35d37150adcd6358f64aefb22 100644 (file)
--- a/arch/arm/mm/proc-v6.S
+++ b/arch/arm/mm/proc-v6.S
@@ -105,18 +105,12 @@ ENTRY(cpu_v6_dcache_clean_area)
  ENTRY(cpu_v6_switch_mm)
         mov     r2, #0
         ldr     r1, [r1, #MM_CONTEXT_ID]        @ get mm->context.id
-       mcr     p15, 0, r2, c7, c5, 6           @ flush BTAC/BTB
+       mcr     p15, 0, r2, c7, c5, 6           @ flush BTAC/BTB
         mcr     p15, 0, r2, c7, c10, 4          @ drain write buffer
         mcr     p15, 0, r0, c2, c0, 0           @ set TTB 0
         mcr     p15, 0, r1, c13, c0, 1          @ set context ID
         mov     pc, lr
  
-#define nG     (1 << 11)
-#define APX    (1 << 9)
-#define AP1    (1 << 5)
-#define AP0    (1 << 4)
-#define XN     (1 << 0)
-
  /*
   *     cpu_v6_set_pte(ptep, pte)
   *
@@ -139,24 +133,24 @@ ENTRY(cpu_v6_switch_mm)
  ENTRY(cpu_v6_set_pte)
         str     r1, [r0], #-2048                @ linux version
  
-       bic     r2, r1, #0x00000ff0
+       bic     r2, r1, #0x000007f0
         bic     r2, r2, #0x00000003
-       orr     r2, r2, #AP0 | 2
+       orr     r2, r2, #PTE_EXT_AP0 | 2
  
         tst     r1, #L_PTE_WRITE
         tstne   r1, #L_PTE_DIRTY
-       orreq   r2, r2, #APX
+       orreq   r2, r2, #PTE_EXT_APX
  
         tst     r1, #L_PTE_USER
-       orrne   r2, r2, #AP1 | nG
-       tstne   r2, #APX
-       bicne   r2, r2, #APX | AP0
+       orrne   r2, r2, #PTE_EXT_AP1
+       tstne   r2, #PTE_EXT_APX
+       bicne   r2, r2, #PTE_EXT_APX | PTE_EXT_AP0
  
         tst     r1, #L_PTE_YOUNG
-       biceq   r2, r2, #APX | AP1 | AP0
+       biceq   r2, r2, #PTE_EXT_APX | PTE_EXT_AP_MASK
  
  @      tst     r1, #L_PTE_EXEC
-@      orreq   r2, r2, #XN
+@      orreq   r2, r2, #PTE_EXT_XN
  
         tst     r1, #L_PTE_PRESENT
         moveq   r2, #0
diff --git a/arch/arm/mm/proc-xscale.S b/arch/arm/mm/proc-xscale.S

index 2d977b4eeeabf95937be3ad863a550d62e21e40e..b88de2700146e6cd494b774985eeca84df14c256 100644 (file)
--- a/arch/arm/mm/proc-xscale.S
+++ b/arch/arm/mm/proc-xscale.S
@@ -370,142 +370,6 @@ ENTRY(cpu_xscale_dcache_clean_area)
         bhi     1b
         mov     pc, lr
  
-/* ================================ CACHE LOCKING============================
- *
- * The XScale MicroArchitecture implements support for locking entries into
- * the data and instruction cache.  The following functions implement the core
- * low level instructions needed to accomplish the locking.  The developer's
- * manual states that the code that performs the locking must be in non-cached
- * memory.  To accomplish this, the code in xscale-cache-lock.c copies the
- * following functions from the cache into a non-cached memory region that
- * is allocated through consistent_alloc().
- *
- */
-       .align  5
-/*
- * xscale_icache_lock
- *
- * r0: starting address to lock
- * r1: end address to lock
- */
-ENTRY(xscale_icache_lock)
-
-iLockLoop:
-       bic     r0, r0, #CACHELINESIZE - 1
-       mcr     p15, 0, r0, c9, c1, 0   @ lock into cache
-       cmp     r0, r1                  @ are we done?
-       add     r0, r0, #CACHELINESIZE  @ advance to next cache line
-       bls     iLockLoop
-       mov     pc, lr
-
-/*
- * xscale_icache_unlock
- */
-ENTRY(xscale_icache_unlock)
-       mcr     p15, 0, r0, c9, c1, 1   @ Unlock icache
-       mov     pc, lr
-
-/*
- * xscale_dcache_lock
- *
- * r0: starting address to lock
- * r1: end address to lock
- */
-ENTRY(xscale_dcache_lock)
-       mcr     p15, 0, ip, c7, c10, 4          @ Drain Write (& Fill) Buffer
-       mov     r2, #1
-       mcr     p15, 0, r2, c9, c2, 0   @ Put dcache in lock mode
-       cpwait  ip                      @ Wait for completion
-
-       mrs     r2, cpsr
-       orr     r3, r2, #PSR_F_BIT | PSR_I_BIT
-dLockLoop:
-       msr     cpsr_c, r3
-       mcr     p15, 0, r0, c7, c10, 1  @ Write back line if it is dirty
-       mcr     p15, 0, r0, c7, c6, 1   @ Flush/invalidate line
-       msr     cpsr_c, r2
-       ldr     ip, [r0], #CACHELINESIZE @ Preload 32 bytes into cache from
-                                       @ location [r0]. Post-increment
-                                       @ r3 to next cache line
-       cmp     r0, r1                  @ Are we done?
-       bls     dLockLoop
-
-       mcr     p15, 0, ip, c7, c10, 4          @ Drain Write (& Fill) Buffer
-       mov     r2, #0
-       mcr     p15, 0, r2, c9, c2, 0   @ Get out of lock mode
-       cpwait_ret lr, ip
-
-/*
- * xscale_dcache_unlock
- */
-ENTRY(xscale_dcache_unlock)
-       mcr     p15, 0, ip, c7, c10, 4          @ Drain Write (& Fill) Buffer
-       mcr     p15, 0, ip, c9, c2, 1   @ Unlock cache
-       mov     pc, lr
-
-/*
- * Needed to determine the length of the code that needs to be copied.
- */
-       .align  5
-ENTRY(xscale_cache_dummy)
-       mov     pc, lr
-
-/* ================================ TLB LOCKING==============================
- *
- * The XScale MicroArchitecture implements support for locking entries into
- * the Instruction and Data TLBs.  The following functions provide the
- * low level support for supporting these under Linux.  xscale-lock.c
- * implements some higher level management code.  Most of the following
- * is taken straight out of the Developer's Manual.
- */
-
-/*
- * Lock I-TLB entry
- *
- * r0: Virtual address to translate and lock
- */
-       .align  5
-ENTRY(xscale_itlb_lock)
-       mrs     r2, cpsr
-       orr     r3, r2, #PSR_F_BIT | PSR_I_BIT
-       msr     cpsr_c, r3                      @ Disable interrupts
-       mcr     p15, 0, r0, c8, c5, 1           @ Invalidate I-TLB entry
-       mcr     p15, 0, r0, c10, c4, 0          @ Translate and lock
-       msr     cpsr_c, r2                      @ Restore interrupts
-       cpwait_ret lr, ip
-
-/*
- * Lock D-TLB entry
- *
- * r0: Virtual address to translate and lock
- */
-       .align  5
-ENTRY(xscale_dtlb_lock)
-       mrs     r2, cpsr
-       orr     r3, r2, #PSR_F_BIT | PSR_I_BIT
-       msr     cpsr_c, r3                      @ Disable interrupts
-       mcr     p15, 0, r0, c8, c6, 1           @ Invalidate D-TLB entry
-       mcr     p15, 0, r0, c10, c8, 0          @ Translate and lock
-       msr     cpsr_c, r2                      @ Restore interrupts
-       cpwait_ret lr, ip
-
-/*
- * Unlock all I-TLB entries
- */
-       .align  5
-ENTRY(xscale_itlb_unlock)
-       mcr     p15, 0, ip, c10, c4, 1          @ Unlock I-TLB
-       mcr     p15, 0, ip, c8, c5, 0           @ Invalidate I-TLB
-       cpwait_ret lr, ip
-
-/*
- * Unlock all D-TLB entries
- */
-ENTRY(xscale_dtlb_unlock)
-       mcr     p15, 0, ip, c10, c8, 1          @ Unlock D-TBL
-       mcr     p15, 0, ip, c8, c6, 0           @ Invalidate D-TLB
-       cpwait_ret lr, ip
-
  /* =============================== PageTable ============================== */
  
  #define PTE_CACHE_WRITE_ALLOCATE 0
diff --git a/arch/arm/nwfpe/double_cpdo.c b/arch/arm/nwfpe/double_cpdo.c

index 7ffd8cb9bc9609ced698a093777cc29e7678505b..c51d1386a97c9492786a0ec49ed7947be83d4a28 100644 (file)
--- a/arch/arm/nwfpe/double_cpdo.c
+++ b/arch/arm/nwfpe/double_cpdo.c
@@ -40,17 +40,17 @@ float64 float64_arccos(float64 rFm);
  float64 float64_pow(float64 rFn, float64 rFm);
  float64 float64_pol(float64 rFn, float64 rFm);
  
-static float64 float64_rsf(float64 rFn, float64 rFm)
+static float64 float64_rsf(struct roundingData *roundData, float64 rFn, float64 rFm)
  {
-       return float64_sub(rFm, rFn);
+       return float64_sub(roundData, rFm, rFn);
  }
  
-static float64 float64_rdv(float64 rFn, float64 rFm)
+static float64 float64_rdv(struct roundingData *roundData, float64 rFn, float64 rFm)
  {
-       return float64_div(rFm, rFn);
+       return float64_div(roundData, rFm, rFn);
  }
  
-static float64 (*const dyadic_double[16])(float64 rFn, float64 rFm) = {
+static float64 (*const dyadic_double[16])(struct roundingData*, float64 rFn, float64 rFm) = {
         [ADF_CODE >> 20] = float64_add,
         [MUF_CODE >> 20] = float64_mul,
         [SUF_CODE >> 20] = float64_sub,
@@ -65,12 +65,12 @@ static float64 (*const dyadic_double[16])(float64 rFn, float64 rFm) = {
         [FRD_CODE >> 20] = float64_rdv,
  };
  
-static float64 float64_mvf(float64 rFm)
+static float64 float64_mvf(struct roundingData *roundData,float64 rFm)
  {
         return rFm;
  }
  
-static float64 float64_mnf(float64 rFm)
+static float64 float64_mnf(struct roundingData *roundData,float64 rFm)
  {
         union float64_components u;
  
@@ -84,7 +84,7 @@ static float64 float64_mnf(float64 rFm)
         return u.f64;
  }
  
-static float64 float64_abs(float64 rFm)
+static float64 float64_abs(struct roundingData *roundData,float64 rFm)
  {
         union float64_components u;
  
@@ -98,7 +98,7 @@ static float64 float64_abs(float64 rFm)
         return u.f64;
  }
  
-static float64 (*const monadic_double[16])(float64 rFm) = {
+static float64 (*const monadic_double[16])(struct roundingData *, float64 rFm) = {
         [MVF_CODE >> 20] = float64_mvf,
         [MNF_CODE >> 20] = float64_mnf,
         [ABS_CODE >> 20] = float64_abs,
@@ -108,7 +108,7 @@ static float64 (*const monadic_double[16])(float64 rFm) = {
         [NRM_CODE >> 20] = float64_mvf,
  };
  
-unsigned int DoubleCPDO(const unsigned int opcode, FPREG * rFd)
+unsigned int DoubleCPDO(struct roundingData *roundData, const unsigned int opcode, FPREG * rFd)
  {
         FPA11 *fpa11 = GET_FPA11();
         float64 rFm;
@@ -151,13 +151,13 @@ unsigned int DoubleCPDO(const unsigned int opcode, FPREG * rFd)
                 }
  
                 if (dyadic_double[opc_mask_shift]) {
-                       rFd->fDouble = dyadic_double[opc_mask_shift](rFn, rFm);
+                       rFd->fDouble = dyadic_double[opc_mask_shift](roundData, rFn, rFm);
                 } else {
                         return 0;
                 }
         } else {
                 if (monadic_double[opc_mask_shift]) {
-                       rFd->fDouble = monadic_double[opc_mask_shift](rFm);
+                       rFd->fDouble = monadic_double[opc_mask_shift](roundData, rFm);
                 } else {
                         return 0;
                 }
diff --git a/arch/arm/nwfpe/extended_cpdo.c b/arch/arm/nwfpe/extended_cpdo.c

index c39f68a3449e0799a7e0db046c179d01f517b703..65a279ba927ffac47ca5de5e3a38317c8547e8fc 100644 (file)
--- a/arch/arm/nwfpe/extended_cpdo.c
+++ b/arch/arm/nwfpe/extended_cpdo.c
@@ -35,17 +35,17 @@ floatx80 floatx80_arccos(floatx80 rFm);
  floatx80 floatx80_pow(floatx80 rFn, floatx80 rFm);
  floatx80 floatx80_pol(floatx80 rFn, floatx80 rFm);
  
-static floatx80 floatx80_rsf(floatx80 rFn, floatx80 rFm)
+static floatx80 floatx80_rsf(struct roundingData *roundData, floatx80 rFn, floatx80 rFm)
  {
-       return floatx80_sub(rFm, rFn);
+       return floatx80_sub(roundData, rFm, rFn);
  }
  
-static floatx80 floatx80_rdv(floatx80 rFn, floatx80 rFm)
+static floatx80 floatx80_rdv(struct roundingData *roundData, floatx80 rFn, floatx80 rFm)
  {
-       return floatx80_div(rFm, rFn);
+       return floatx80_div(roundData, rFm, rFn);
  }
  
-static floatx80 (*const dyadic_extended[16])(floatx80 rFn, floatx80 rFm) = {
+static floatx80 (*const dyadic_extended[16])(struct roundingData*, floatx80 rFn, floatx80 rFm) = {
         [ADF_CODE >> 20] = floatx80_add,
         [MUF_CODE >> 20] = floatx80_mul,
         [SUF_CODE >> 20] = floatx80_sub,
@@ -60,24 +60,24 @@ static floatx80 (*const dyadic_extended[16])(floatx80 rFn, floatx80 rFm) = {
         [FRD_CODE >> 20] = floatx80_rdv,
  };
  
-static floatx80 floatx80_mvf(floatx80 rFm)
+static floatx80 floatx80_mvf(struct roundingData *roundData, floatx80 rFm)
  {
         return rFm;
  }
  
-static floatx80 floatx80_mnf(floatx80 rFm)
+static floatx80 floatx80_mnf(struct roundingData *roundData, floatx80 rFm)
  {
         rFm.high ^= 0x8000;
         return rFm;
  }
  
-static floatx80 floatx80_abs(floatx80 rFm)
+static floatx80 floatx80_abs(struct roundingData *roundData, floatx80 rFm)
  {
         rFm.high &= 0x7fff;
         return rFm;
  }
  
-static floatx80 (*const monadic_extended[16])(floatx80 rFm) = {
+static floatx80 (*const monadic_extended[16])(struct roundingData*, floatx80 rFm) = {
         [MVF_CODE >> 20] = floatx80_mvf,
         [MNF_CODE >> 20] = floatx80_mnf,
         [ABS_CODE >> 20] = floatx80_abs,
@@ -87,7 +87,7 @@ static floatx80 (*const monadic_extended[16])(floatx80 rFm) = {
         [NRM_CODE >> 20] = floatx80_mvf,
  };
  
-unsigned int ExtendedCPDO(const unsigned int opcode, FPREG * rFd)
+unsigned int ExtendedCPDO(struct roundingData *roundData, const unsigned int opcode, FPREG * rFd)
  {
         FPA11 *fpa11 = GET_FPA11();
         floatx80 rFm;
@@ -138,13 +138,13 @@ unsigned int ExtendedCPDO(const unsigned int opcode, FPREG * rFd)
                 }
  
                 if (dyadic_extended[opc_mask_shift]) {
-                       rFd->fExtended = dyadic_extended[opc_mask_shift](rFn, rFm);
+                       rFd->fExtended = dyadic_extended[opc_mask_shift](roundData, rFn, rFm);
                 } else {
                         return 0;
                 }
         } else {
                 if (monadic_extended[opc_mask_shift]) {
-                       rFd->fExtended = monadic_extended[opc_mask_shift](rFm);
+                       rFd->fExtended = monadic_extended[opc_mask_shift](roundData, rFm);
                 } else {
                         return 0;
                 }
diff --git a/arch/arm/nwfpe/fpa11.c b/arch/arm/nwfpe/fpa11.c

index bf61696865ec2c6531007d6eaa6aedd6c6bf8850..7690f731ee8706227acdf3b1f56de14f2b906839 100644 (file)
--- a/arch/arm/nwfpe/fpa11.c
+++ b/arch/arm/nwfpe/fpa11.c
@@ -51,48 +51,42 @@ static void resetFPA11(void)
         fpa11->fpsr = FP_EMULATOR | BIT_AC;
  }
  
-void SetRoundingMode(const unsigned int opcode)
+int8 SetRoundingMode(const unsigned int opcode)
  {
         switch (opcode & MASK_ROUNDING_MODE) {
         default:
         case ROUND_TO_NEAREST:
-               float_rounding_mode = float_round_nearest_even;
-               break;
+               return float_round_nearest_even;
  
         case ROUND_TO_PLUS_INFINITY:
-               float_rounding_mode = float_round_up;
-               break;
+               return float_round_up;
  
         case ROUND_TO_MINUS_INFINITY:
-               float_rounding_mode = float_round_down;
-               break;
+               return float_round_down;
  
         case ROUND_TO_ZERO:
-               float_rounding_mode = float_round_to_zero;
-               break;
+               return float_round_to_zero;
         }
  }
  
-void SetRoundingPrecision(const unsigned int opcode)
+int8 SetRoundingPrecision(const unsigned int opcode)
  {
  #ifdef CONFIG_FPE_NWFPE_XP
         switch (opcode & MASK_ROUNDING_PRECISION) {
         case ROUND_SINGLE:
-               floatx80_rounding_precision = 32;
-               break;
+               return 32;
  
         case ROUND_DOUBLE:
-               floatx80_rounding_precision = 64;
-               break;
+               return 64;
  
         case ROUND_EXTENDED:
-               floatx80_rounding_precision = 80;
-               break;
+               return 80;
  
         default:
-               floatx80_rounding_precision = 80;
+               return 80;
         }
  #endif
+       return 80;
  }
  
  void nwfpe_init_fpa(union fp_state *fp)
@@ -103,8 +97,6 @@ void nwfpe_init_fpa(union fp_state *fp)
  #endif
         memset(fpa11, 0, sizeof(FPA11));
         resetFPA11();
-       SetRoundingMode(ROUND_TO_NEAREST);
-       SetRoundingPrecision(ROUND_EXTENDED);
         fpa11->initflag = 1;
  }
  
diff --git a/arch/arm/nwfpe/fpa11.h b/arch/arm/nwfpe/fpa11.h

index e4a61aea534b4e840c02f969629d997a2465b339..93523ae4b7a1f028b6772de5d8f7455b27400c57 100644 (file)
--- a/arch/arm/nwfpe/fpa11.h
+++ b/arch/arm/nwfpe/fpa11.h
@@ -37,6 +37,13 @@
  /* includes */
  #include "fpsr.h"              /* FP control and status register definitions */
  #include "milieu.h"
+
+struct roundingData {
+    int8 mode;
+    int8 precision;
+    signed char exception;
+};
+
  #include "softfloat.h"
  
  #define                typeNone                0x00
@@ -84,8 +91,8 @@ typedef struct tagFPA11 {
                                    initialised. */
  } FPA11;
  
-extern void SetRoundingMode(const unsigned int);
-extern void SetRoundingPrecision(const unsigned int);
+extern int8 SetRoundingMode(const unsigned int);
+extern int8 SetRoundingPrecision(const unsigned int);
  extern void nwfpe_init_fpa(union fp_state *fp);
  
  #endif
diff --git a/arch/arm/nwfpe/fpa11_cpdo.c b/arch/arm/nwfpe/fpa11_cpdo.c

index 1bea67437b6f2bc5ed859b7aa0db2237e84a7c6e..4a31dfd9406884a90a242e7173bae04cd7f77bf2 100644 (file)
--- a/arch/arm/nwfpe/fpa11_cpdo.c
+++ b/arch/arm/nwfpe/fpa11_cpdo.c
@@ -24,15 +24,16 @@
  #include "fpa11.h"
  #include "fpopcode.h"
  
-unsigned int SingleCPDO(const unsigned int opcode, FPREG * rFd);
-unsigned int DoubleCPDO(const unsigned int opcode, FPREG * rFd);
-unsigned int ExtendedCPDO(const unsigned int opcode, FPREG * rFd);
+unsigned int SingleCPDO(struct roundingData *roundData, const unsigned int opcode, FPREG * rFd);
+unsigned int DoubleCPDO(struct roundingData *roundData, const unsigned int opcode, FPREG * rFd);
+unsigned int ExtendedCPDO(struct roundingData *roundData, const unsigned int opcode, FPREG * rFd);
  
  unsigned int EmulateCPDO(const unsigned int opcode)
  {
         FPA11 *fpa11 = GET_FPA11();
         FPREG *rFd;
         unsigned int nType, nDest, nRc;
+       struct roundingData roundData;
  
         /* Get the destination size.  If not valid let Linux perform
            an invalid instruction trap. */
@@ -40,7 +41,9 @@ unsigned int EmulateCPDO(const unsigned int opcode)
         if (typeNone == nDest)
                 return 0;
  
-       SetRoundingMode(opcode);
+       roundData.mode = SetRoundingMode(opcode);
+       roundData.precision = SetRoundingPrecision(opcode);
+       roundData.exception = 0;
  
         /* Compare the size of the operands in Fn and Fm.
            Choose the largest size and perform operations in that size,
@@ -63,14 +66,14 @@ unsigned int EmulateCPDO(const unsigned int opcode)
  
         switch (nType) {
         case typeSingle:
-               nRc = SingleCPDO(opcode, rFd);
+               nRc = SingleCPDO(&roundData, opcode, rFd);
                 break;
         case typeDouble:
-               nRc = DoubleCPDO(opcode, rFd);
+               nRc = DoubleCPDO(&roundData, opcode, rFd);
                 break;
  #ifdef CONFIG_FPE_NWFPE_XP
         case typeExtended:
-               nRc = ExtendedCPDO(opcode, rFd);
+               nRc = ExtendedCPDO(&roundData, opcode, rFd);
                 break;
  #endif
         default:
@@ -93,9 +96,9 @@ unsigned int EmulateCPDO(const unsigned int opcode)
                         case typeSingle:
                                 {
                                         if (typeDouble == nType)
-                                               rFd->fSingle = float64_to_float32(rFd->fDouble);
+                                               rFd->fSingle = float64_to_float32(&roundData, rFd->fDouble);
                                         else
-                                               rFd->fSingle = floatx80_to_float32(rFd->fExtended);
+                                               rFd->fSingle = floatx80_to_float32(&roundData, rFd->fExtended);
                                 }
                                 break;
  
@@ -104,7 +107,7 @@ unsigned int EmulateCPDO(const unsigned int opcode)
                                         if (typeSingle == nType)
                                                 rFd->fDouble = float32_to_float64(rFd->fSingle);
                                         else
-                                               rFd->fDouble = floatx80_to_float64(rFd->fExtended);
+                                               rFd->fDouble = floatx80_to_float64(&roundData, rFd->fExtended);
                                 }
                                 break;
  
@@ -121,12 +124,15 @@ unsigned int EmulateCPDO(const unsigned int opcode)
  #else
                 if (nDest != nType) {
                         if (nDest == typeSingle)
-                               rFd->fSingle = float64_to_float32(rFd->fDouble);
+                               rFd->fSingle = float64_to_float32(&roundData, rFd->fDouble);
                         else
                                 rFd->fDouble = float32_to_float64(rFd->fSingle);
                 }
  #endif
         }
  
+       if (roundData.exception)
+               float_raise(roundData.exception);
+
         return nRc;
  }
diff --git a/arch/arm/nwfpe/fpa11_cpdt.c b/arch/arm/nwfpe/fpa11_cpdt.c

index 95fb63fa9d181238423e6c5202c7ca3d57844a70..b0db5cbcc3b190575774991ff759d1590461a7d2 100644 (file)
--- a/arch/arm/nwfpe/fpa11_cpdt.c
+++ b/arch/arm/nwfpe/fpa11_cpdt.c
@@ -96,7 +96,7 @@ static inline void loadMultiple(const unsigned int Fn, const unsigned int __user
         }
  }
  
-static inline void storeSingle(const unsigned int Fn, unsigned int __user *pMem)
+static inline void storeSingle(struct roundingData *roundData, const unsigned int Fn, unsigned int __user *pMem)
  {
         FPA11 *fpa11 = GET_FPA11();
         union {
@@ -106,12 +106,12 @@ static inline void storeSingle(const unsigned int Fn, unsigned int __user *pMem)
  
         switch (fpa11->fType[Fn]) {
         case typeDouble:
-               val.f = float64_to_float32(fpa11->fpreg[Fn].fDouble);
+               val.f = float64_to_float32(roundData, fpa11->fpreg[Fn].fDouble);
                 break;
  
  #ifdef CONFIG_FPE_NWFPE_XP
         case typeExtended:
-               val.f = floatx80_to_float32(fpa11->fpreg[Fn].fExtended);
+               val.f = floatx80_to_float32(roundData, fpa11->fpreg[Fn].fExtended);
                 break;
  #endif
  
@@ -122,7 +122,7 @@ static inline void storeSingle(const unsigned int Fn, unsigned int __user *pMem)
         put_user(val.i[0], pMem);
  }
  
-static inline void storeDouble(const unsigned int Fn, unsigned int __user *pMem)
+static inline void storeDouble(struct roundingData *roundData, const unsigned int Fn, unsigned int __user *pMem)
  {
         FPA11 *fpa11 = GET_FPA11();
         union {
@@ -137,7 +137,7 @@ static inline void storeDouble(const unsigned int Fn, unsigned int __user *pMem)
  
  #ifdef CONFIG_FPE_NWFPE_XP
         case typeExtended:
-               val.f = floatx80_to_float64(fpa11->fpreg[Fn].fExtended);
+               val.f = floatx80_to_float64(roundData, fpa11->fpreg[Fn].fExtended);
                 break;
  #endif
  
@@ -259,8 +259,11 @@ unsigned int PerformSTF(const unsigned int opcode)
  {
         unsigned int __user *pBase, *pAddress, *pFinal;
         unsigned int nRc = 1, write_back = WRITE_BACK(opcode);
+       struct roundingData roundData;
  
-       SetRoundingMode(ROUND_TO_NEAREST);
+       roundData.mode = SetRoundingMode(opcode);
+       roundData.precision = SetRoundingPrecision(opcode);
+       roundData.exception = 0;
  
         pBase = (unsigned int __user *) readRegister(getRn(opcode));
         if (REG_PC == getRn(opcode)) {
@@ -281,10 +284,10 @@ unsigned int PerformSTF(const unsigned int opcode)
  
         switch (opcode & MASK_TRANSFER_LENGTH) {
         case TRANSFER_SINGLE:
-               storeSingle(getFd(opcode), pAddress);
+               storeSingle(&roundData, getFd(opcode), pAddress);
                 break;
         case TRANSFER_DOUBLE:
-               storeDouble(getFd(opcode), pAddress);
+               storeDouble(&roundData, getFd(opcode), pAddress);
                 break;
  #ifdef CONFIG_FPE_NWFPE_XP
         case TRANSFER_EXTENDED:
@@ -295,6 +298,9 @@ unsigned int PerformSTF(const unsigned int opcode)
                 nRc = 0;
         }
  
+       if (roundData.exception)
+               float_raise(roundData.exception);
+
         if (write_back)
                 writeRegister(getRn(opcode), (unsigned long) pFinal);
         return nRc;
diff --git a/arch/arm/nwfpe/fpa11_cprt.c b/arch/arm/nwfpe/fpa11_cprt.c

index db01fbc97216829b52db0b5117b88e2457c1253e..adf8d3000540f9c6f774024ff22d720507a2e24f 100644 (file)
--- a/arch/arm/nwfpe/fpa11_cprt.c
+++ b/arch/arm/nwfpe/fpa11_cprt.c
@@ -33,8 +33,6 @@ extern flag floatx80_is_nan(floatx80);
  extern flag float64_is_nan(float64);
  extern flag float32_is_nan(float32);
  
-void SetRoundingMode(const unsigned int opcode);
-
  unsigned int PerformFLT(const unsigned int opcode);
  unsigned int PerformFIX(const unsigned int opcode);
  
@@ -77,14 +75,17 @@ unsigned int EmulateCPRT(const unsigned int opcode)
  unsigned int PerformFLT(const unsigned int opcode)
  {
         FPA11 *fpa11 = GET_FPA11();
-       SetRoundingMode(opcode);
-       SetRoundingPrecision(opcode);
+       struct roundingData roundData;
+
+       roundData.mode = SetRoundingMode(opcode);
+       roundData.precision = SetRoundingPrecision(opcode);
+       roundData.exception = 0;
  
         switch (opcode & MASK_ROUNDING_PRECISION) {
         case ROUND_SINGLE:
                 {
                         fpa11->fType[getFn(opcode)] = typeSingle;
-                       fpa11->fpreg[getFn(opcode)].fSingle = int32_to_float32(readRegister(getRd(opcode)));
+                       fpa11->fpreg[getFn(opcode)].fSingle = int32_to_float32(&roundData, readRegister(getRd(opcode)));
                 }
                 break;
  
@@ -108,6 +109,9 @@ unsigned int PerformFLT(const unsigned int opcode)
                 return 0;
         }
  
+       if (roundData.exception)
+               float_raise(roundData.exception);
+
         return 1;
  }
  
@@ -115,26 +119,29 @@ unsigned int PerformFIX(const unsigned int opcode)
  {
         FPA11 *fpa11 = GET_FPA11();
         unsigned int Fn = getFm(opcode);
+       struct roundingData roundData;
  
-       SetRoundingMode(opcode);
+       roundData.mode = SetRoundingMode(opcode);
+       roundData.precision = SetRoundingPrecision(opcode);
+       roundData.exception = 0;
  
         switch (fpa11->fType[Fn]) {
         case typeSingle:
                 {
-                       writeRegister(getRd(opcode), float32_to_int32(fpa11->fpreg[Fn].fSingle));
+                       writeRegister(getRd(opcode), float32_to_int32(&roundData, fpa11->fpreg[Fn].fSingle));
                 }
                 break;
  
         case typeDouble:
                 {
-                       writeRegister(getRd(opcode), float64_to_int32(fpa11->fpreg[Fn].fDouble));
+                       writeRegister(getRd(opcode), float64_to_int32(&roundData, fpa11->fpreg[Fn].fDouble));
                 }
                 break;
  
  #ifdef CONFIG_FPE_NWFPE_XP
         case typeExtended:
                 {
-                       writeRegister(getRd(opcode), floatx80_to_int32(fpa11->fpreg[Fn].fExtended));
+                       writeRegister(getRd(opcode), floatx80_to_int32(&roundData, fpa11->fpreg[Fn].fExtended));
                 }
                 break;
  #endif
@@ -143,6 +150,9 @@ unsigned int PerformFIX(const unsigned int opcode)
                 return 0;
         }
  
+       if (roundData.exception)
+               float_raise(roundData.exception);
+
         return 1;
  }
  
diff --git a/arch/arm/nwfpe/fpmodule.c b/arch/arm/nwfpe/fpmodule.c

index 12885f31d34794dde98be059088669efbc250c89..2dfe1ac42ee8916cc2734d22a671a2f3858ce19d 100644 (file)
--- a/arch/arm/nwfpe/fpmodule.c
+++ b/arch/arm/nwfpe/fpmodule.c
@@ -116,8 +116,6 @@ fpmodule.c to integrate with the NetBSD kernel (I hope!).
  code to access data in user space in some other source files at the 
  moment (grep for get_user / put_user calls).  --philb]
  
-float_exception_flags is a global variable in SoftFloat.
-
  This function is called by the SoftFloat routines to raise a floating
  point exception.  We check the trap enable byte in the FPSR, and raise
  a SIGFPE exception if necessary.  If not the relevant bits in the 
@@ -129,15 +127,14 @@ void float_raise(signed char flags)
         register unsigned int fpsr, cumulativeTraps;
  
  #ifdef CONFIG_DEBUG_USER
-       printk(KERN_DEBUG
-              "NWFPE: %s[%d] takes exception %08x at %p from %08lx\n",
-              current->comm, current->pid, flags,
-              __builtin_return_address(0), GET_USERREG()->ARM_pc);
+       /* Ignore inexact errors as there are far too many of them to log */
+       if (flags & ~BIT_IXC)
+               printk(KERN_DEBUG
+                      "NWFPE: %s[%d] takes exception %08x at %p from %08lx\n",
+                      current->comm, current->pid, flags,
+                      __builtin_return_address(0), GET_USERREG()->ARM_pc);
  #endif
  
-       /* Keep SoftFloat exception flags up to date.  */
-       float_exception_flags |= flags;
-
         /* Read fpsr and initialize the cumulativeTraps.  */
         fpsr = readFPSR();
         cumulativeTraps = 0;
diff --git a/arch/arm/nwfpe/fpopcode.h b/arch/arm/nwfpe/fpopcode.h

index 8035f4faafbfa4ce780649808cd0b8bffffdf361..1777e92a88e69c73db5a0fa67438ff8520a9d56a 100644 (file)
--- a/arch/arm/nwfpe/fpopcode.h
+++ b/arch/arm/nwfpe/fpopcode.h
@@ -370,20 +370,20 @@ TABLE 5
  #define getRoundingMode(opcode)                ((opcode & MASK_ROUNDING_MODE) >> 5)
  
  #ifdef CONFIG_FPE_NWFPE_XP
-static inline const floatx80 getExtendedConstant(const unsigned int nIndex)
+static inline __attribute_pure__ floatx80 getExtendedConstant(const unsigned int nIndex)
  {
         extern const floatx80 floatx80Constant[];
         return floatx80Constant[nIndex];
  }
  #endif
  
-static inline const float64 getDoubleConstant(const unsigned int nIndex)
+static inline __attribute_pure__ float64 getDoubleConstant(const unsigned int nIndex)
  {
         extern const float64 float64Constant[];
         return float64Constant[nIndex];
  }
  
-static inline const float32 getSingleConstant(const unsigned int nIndex)
+static inline __attribute_pure__ float32 getSingleConstant(const unsigned int nIndex)
  {
         extern const float32 float32Constant[];
         return float32Constant[nIndex];
diff --git a/arch/arm/nwfpe/single_cpdo.c b/arch/arm/nwfpe/single_cpdo.c

index 705808e88d9d3041a02b3cb3b86988d810bd02bb..c66981d682cfe89d9bbfb01c8d71a0db20938be5 100644 (file)
--- a/arch/arm/nwfpe/single_cpdo.c
+++ b/arch/arm/nwfpe/single_cpdo.c
@@ -36,17 +36,17 @@ float32 float32_arccos(float32 rFm);
  float32 float32_pow(float32 rFn, float32 rFm);
  float32 float32_pol(float32 rFn, float32 rFm);
  
-static float32 float32_rsf(float32 rFn, float32 rFm)
+static float32 float32_rsf(struct roundingData *roundData, float32 rFn, float32 rFm)
  {
-       return float32_sub(rFm, rFn);
+       return float32_sub(roundData, rFm, rFn);
  }
  
-static float32 float32_rdv(float32 rFn, float32 rFm)
+static float32 float32_rdv(struct roundingData *roundData, float32 rFn, float32 rFm)
  {
-       return float32_div(rFm, rFn);
+       return float32_div(roundData, rFm, rFn);
  }
  
-static float32 (*const dyadic_single[16])(float32 rFn, float32 rFm) = {
+static float32 (*const dyadic_single[16])(struct roundingData *, float32 rFn, float32 rFm) = {
         [ADF_CODE >> 20] = float32_add,
         [MUF_CODE >> 20] = float32_mul,
         [SUF_CODE >> 20] = float32_sub,
@@ -60,22 +60,22 @@ static float32 (*const dyadic_single[16])(float32 rFn, float32 rFm) = {
         [FRD_CODE >> 20] = float32_rdv,
  };
  
-static float32 float32_mvf(float32 rFm)
+static float32 float32_mvf(struct roundingData *roundData, float32 rFm)
  {
         return rFm;
  }
  
-static float32 float32_mnf(float32 rFm)
+static float32 float32_mnf(struct roundingData *roundData, float32 rFm)
  {
         return rFm ^ 0x80000000;
  }
  
-static float32 float32_abs(float32 rFm)
+static float32 float32_abs(struct roundingData *roundData, float32 rFm)
  {
         return rFm & 0x7fffffff;
  }
  
-static float32 (*const monadic_single[16])(float32 rFm) = {
+static float32 (*const monadic_single[16])(struct roundingData*, float32 rFm) = {
         [MVF_CODE >> 20] = float32_mvf,
         [MNF_CODE >> 20] = float32_mnf,
         [ABS_CODE >> 20] = float32_abs,
@@ -85,7 +85,7 @@ static float32 (*const monadic_single[16])(float32 rFm) = {
         [NRM_CODE >> 20] = float32_mvf,
  };
  
-unsigned int SingleCPDO(const unsigned int opcode, FPREG * rFd)
+unsigned int SingleCPDO(struct roundingData *roundData, const unsigned int opcode, FPREG * rFd)
  {
         FPA11 *fpa11 = GET_FPA11();
         float32 rFm;
@@ -108,13 +108,13 @@ unsigned int SingleCPDO(const unsigned int opcode, FPREG * rFd)
                 if (fpa11->fType[Fn] == typeSingle &&
                     dyadic_single[opc_mask_shift]) {
                         rFn = fpa11->fpreg[Fn].fSingle;
-                       rFd->fSingle = dyadic_single[opc_mask_shift](rFn, rFm);
+                       rFd->fSingle = dyadic_single[opc_mask_shift](roundData, rFn, rFm);
                 } else {
                         return 0;
                 }
         } else {
                 if (monadic_single[opc_mask_shift]) {
-                       rFd->fSingle = monadic_single[opc_mask_shift](rFm);
+                       rFd->fSingle = monadic_single[opc_mask_shift](roundData, rFm);
                 } else {
                         return 0;
                 }
diff --git a/arch/arm/nwfpe/softfloat.c b/arch/arm/nwfpe/softfloat.c

index e038dd3be9b3c63e019a5be3f77de94f25f4c949..f9f049132a17bffb920acb8df278f5e91b514f7a 100644 (file)
--- a/arch/arm/nwfpe/softfloat.c
+++ b/arch/arm/nwfpe/softfloat.c
@@ -34,16 +34,6 @@ this code that are retained.
  //#include "milieu.h"
  //#include "softfloat.h"
  
-/*
--------------------------------------------------------------------------------
-Floating-point rounding mode, extended double-precision rounding precision,
-and exception flags.
--------------------------------------------------------------------------------
-*/
-int8 float_rounding_mode = float_round_nearest_even;
-int8 floatx80_rounding_precision = 80;
-int8 float_exception_flags;
-
  /*
  -------------------------------------------------------------------------------
  Primitive arithmetic functions, including multi-word arithmetic, and
@@ -77,14 +67,14 @@ input is too large, however, the invalid exception is raised and the largest
  positive or negative integer is returned.
  -------------------------------------------------------------------------------
  */
-static int32 roundAndPackInt32( flag zSign, bits64 absZ )
+static int32 roundAndPackInt32( struct roundingData *roundData, flag zSign, bits64 absZ )
  {
      int8 roundingMode;
      flag roundNearestEven;
      int8 roundIncrement, roundBits;
      int32 z;
  
-    roundingMode = float_rounding_mode;
+    roundingMode = roundData->mode;
      roundNearestEven = ( roundingMode == float_round_nearest_even );
      roundIncrement = 0x40;
      if ( ! roundNearestEven ) {
@@ -107,10 +97,10 @@ static int32 roundAndPackInt32( flag zSign, bits64 absZ )
      z = absZ;
      if ( zSign ) z = - z;
      if ( ( absZ>>32 ) || ( z && ( ( z < 0 ) ^ zSign ) ) ) {
-        float_exception_flags |= float_flag_invalid;
+        roundData->exception |= float_flag_invalid;
          return zSign ? 0x80000000 : 0x7FFFFFFF;
      }
-    if ( roundBits ) float_exception_flags |= float_flag_inexact;
+    if ( roundBits ) roundData->exception |= float_flag_inexact;
      return z;
  
  }
@@ -224,14 +214,14 @@ The handling of underflow and overflow follows the IEC/IEEE Standard for
  Binary Floating-point Arithmetic.
  -------------------------------------------------------------------------------
  */
-static float32 roundAndPackFloat32( flag zSign, int16 zExp, bits32 zSig )
+static float32 roundAndPackFloat32( struct roundingData *roundData, flag zSign, int16 zExp, bits32 zSig )
  {
      int8 roundingMode;
      flag roundNearestEven;
      int8 roundIncrement, roundBits;
      flag isTiny;
  
-    roundingMode = float_rounding_mode;
+    roundingMode = roundData->mode;
      roundNearestEven = ( roundingMode == float_round_nearest_even );
      roundIncrement = 0x40;
      if ( ! roundNearestEven ) {
@@ -254,7 +244,7 @@ static float32 roundAndPackFloat32( flag zSign, int16 zExp, bits32 zSig )
               || (    ( zExp == 0xFD )
                    && ( (sbits32) ( zSig + roundIncrement ) < 0 ) )
             ) {
-            float_raise( float_flag_overflow | float_flag_inexact );
+            roundData->exception |= float_flag_overflow | float_flag_inexact;
              return packFloat32( zSign, 0xFF, 0 ) - ( roundIncrement == 0 );
          }
          if ( zExp < 0 ) {
@@ -265,10 +255,10 @@ static float32 roundAndPackFloat32( flag zSign, int16 zExp, bits32 zSig )
              shift32RightJamming( zSig, - zExp, &zSig );
              zExp = 0;
              roundBits = zSig & 0x7F;
-            if ( isTiny && roundBits ) float_raise( float_flag_underflow );
+            if ( isTiny && roundBits ) roundData->exception |= float_flag_underflow;
          }
      }
-    if ( roundBits ) float_exception_flags |= float_flag_inexact;
+    if ( roundBits ) roundData->exception |= float_flag_inexact;
      zSig = ( zSig + roundIncrement )>>7;
      zSig &= ~ ( ( ( roundBits ^ 0x40 ) == 0 ) & roundNearestEven );
      if ( zSig == 0 ) zExp = 0;
@@ -287,12 +277,12 @@ point exponent.
  -------------------------------------------------------------------------------
  */
  static float32
- normalizeRoundAndPackFloat32( flag zSign, int16 zExp, bits32 zSig )
+ normalizeRoundAndPackFloat32( struct roundingData *roundData, flag zSign, int16 zExp, bits32 zSig )
  {
      int8 shiftCount;
  
      shiftCount = countLeadingZeros32( zSig ) - 1;
-    return roundAndPackFloat32( zSign, zExp - shiftCount, zSig<<shiftCount );
+    return roundAndPackFloat32( roundData, zSign, zExp - shiftCount, zSig<<shiftCount );
  
  }
  
@@ -395,14 +385,14 @@ The handling of underflow and overflow follows the IEC/IEEE Standard for
  Binary Floating-point Arithmetic.
  -------------------------------------------------------------------------------
  */
-static float64 roundAndPackFloat64( flag zSign, int16 zExp, bits64 zSig )
+static float64 roundAndPackFloat64( struct roundingData *roundData, flag zSign, int16 zExp, bits64 zSig )
  {
      int8 roundingMode;
      flag roundNearestEven;
      int16 roundIncrement, roundBits;
      flag isTiny;
  
-    roundingMode = float_rounding_mode;
+    roundingMode = roundData->mode;
      roundNearestEven = ( roundingMode == float_round_nearest_even );
      roundIncrement = 0x200;
      if ( ! roundNearestEven ) {
@@ -427,7 +417,7 @@ static float64 roundAndPackFloat64( flag zSign, int16 zExp, bits64 zSig )
             ) {
              //register int lr = __builtin_return_address(0);
              //printk("roundAndPackFloat64 called from 0x%08x\n",lr);
-            float_raise( float_flag_overflow | float_flag_inexact );
+            roundData->exception |= float_flag_overflow | float_flag_inexact;
              return packFloat64( zSign, 0x7FF, 0 ) - ( roundIncrement == 0 );
          }
          if ( zExp < 0 ) {
@@ -438,10 +428,10 @@ static float64 roundAndPackFloat64( flag zSign, int16 zExp, bits64 zSig )
              shift64RightJamming( zSig, - zExp, &zSig );
              zExp = 0;
              roundBits = zSig & 0x3FF;
-            if ( isTiny && roundBits ) float_raise( float_flag_underflow );
+            if ( isTiny && roundBits ) roundData->exception |= float_flag_underflow;
          }
      }
-    if ( roundBits ) float_exception_flags |= float_flag_inexact;
+    if ( roundBits ) roundData->exception |= float_flag_inexact;
      zSig = ( zSig + roundIncrement )>>10;
      zSig &= ~ ( ( ( roundBits ^ 0x200 ) == 0 ) & roundNearestEven );
      if ( zSig == 0 ) zExp = 0;
@@ -460,12 +450,12 @@ point exponent.
  -------------------------------------------------------------------------------
  */
  static float64
- normalizeRoundAndPackFloat64( flag zSign, int16 zExp, bits64 zSig )
+ normalizeRoundAndPackFloat64( struct roundingData *roundData, flag zSign, int16 zExp, bits64 zSig )
  {
      int8 shiftCount;
  
      shiftCount = countLeadingZeros64( zSig ) - 1;
-    return roundAndPackFloat64( zSign, zExp - shiftCount, zSig<<shiftCount );
+    return roundAndPackFloat64( roundData, zSign, zExp - shiftCount, zSig<<shiftCount );
  
  }
  
@@ -572,14 +562,15 @@ Floating-point Arithmetic.
  */
  static floatx80
   roundAndPackFloatx80(
-     int8 roundingPrecision, flag zSign, int32 zExp, bits64 zSig0, bits64 zSig1
+     struct roundingData *roundData, flag zSign, int32 zExp, bits64 zSig0, bits64 zSig1
   )
  {
-    int8 roundingMode;
+    int8 roundingMode, roundingPrecision;
      flag roundNearestEven, increment, isTiny;
      int64 roundIncrement, roundMask, roundBits;
  
-    roundingMode = float_rounding_mode;
+    roundingMode = roundData->mode;
+    roundingPrecision = roundData->precision;
      roundNearestEven = ( roundingMode == float_round_nearest_even );
      if ( roundingPrecision == 80 ) goto precision80;
      if ( roundingPrecision == 64 ) {
@@ -623,8 +614,8 @@ static floatx80
              shift64RightJamming( zSig0, 1 - zExp, &zSig0 );
              zExp = 0;
              roundBits = zSig0 & roundMask;
-            if ( isTiny && roundBits ) float_raise( float_flag_underflow );
-            if ( roundBits ) float_exception_flags |= float_flag_inexact;
+            if ( isTiny && roundBits ) roundData->exception |= float_flag_underflow;
+            if ( roundBits ) roundData->exception |= float_flag_inexact;
              zSig0 += roundIncrement;
              if ( (sbits64) zSig0 < 0 ) zExp = 1;
              roundIncrement = roundMask + 1;
@@ -635,7 +626,7 @@ static floatx80
              return packFloatx80( zSign, zExp, zSig0 );
          }
      }
-    if ( roundBits ) float_exception_flags |= float_flag_inexact;
+    if ( roundBits ) roundData->exception |= float_flag_inexact;
      zSig0 += roundIncrement;
      if ( zSig0 < roundIncrement ) {
          ++zExp;
@@ -672,7 +663,7 @@ static floatx80
             ) {
              roundMask = 0;
   overflow:
-            float_raise( float_flag_overflow | float_flag_inexact );
+            roundData->exception |= float_flag_overflow | float_flag_inexact;
              if (    ( roundingMode == float_round_to_zero )
                   || ( zSign && ( roundingMode == float_round_up ) )
                   || ( ! zSign && ( roundingMode == float_round_down ) )
@@ -689,8 +680,8 @@ static floatx80
                  || ( zSig0 < LIT64( 0xFFFFFFFFFFFFFFFF ) );
              shift64ExtraRightJamming( zSig0, zSig1, 1 - zExp, &zSig0, &zSig1 );
              zExp = 0;
-            if ( isTiny && zSig1 ) float_raise( float_flag_underflow );
-            if ( zSig1 ) float_exception_flags |= float_flag_inexact;
+            if ( isTiny && zSig1 ) roundData->exception |= float_flag_underflow;
+            if ( zSig1 ) roundData->exception |= float_flag_inexact;
              if ( roundNearestEven ) {
                  increment = ( (sbits64) zSig1 < 0 );
              }
@@ -710,7 +701,7 @@ static floatx80
              return packFloatx80( zSign, zExp, zSig0 );
          }
      }
-    if ( zSig1 ) float_exception_flags |= float_flag_inexact;
+    if ( zSig1 ) roundData->exception |= float_flag_inexact;
      if ( increment ) {
          ++zSig0;
          if ( zSig0 == 0 ) {
@@ -740,7 +731,7 @@ normalized.
  */
  static floatx80
   normalizeRoundAndPackFloatx80(
-     int8 roundingPrecision, flag zSign, int32 zExp, bits64 zSig0, bits64 zSig1
+     struct roundingData *roundData, flag zSign, int32 zExp, bits64 zSig0, bits64 zSig1
   )
  {
      int8 shiftCount;
@@ -754,7 +745,7 @@ static floatx80
      shortShift128Left( zSig0, zSig1, shiftCount, &zSig0, &zSig1 );
      zExp -= shiftCount;
      return
-        roundAndPackFloatx80( roundingPrecision, zSign, zExp, zSig0, zSig1 );
+        roundAndPackFloatx80( roundData, zSign, zExp, zSig0, zSig1 );
  
  }
  
@@ -767,14 +758,14 @@ the single-precision floating-point format.  The conversion is performed
  according to the IEC/IEEE Standard for Binary Floating-point Arithmetic.
  -------------------------------------------------------------------------------
  */
-float32 int32_to_float32( int32 a )
+float32 int32_to_float32(struct roundingData *roundData, int32 a)
  {
      flag zSign;
  
      if ( a == 0 ) return 0;
      if ( a == 0x80000000 ) return packFloat32( 1, 0x9E, 0 );
      zSign = ( a < 0 );
-    return normalizeRoundAndPackFloat32( zSign, 0x9C, zSign ? - a : a );
+    return normalizeRoundAndPackFloat32( roundData, zSign, 0x9C, zSign ? - a : a );
  
  }
  
@@ -840,7 +831,7 @@ positive integer is returned.  Otherwise, if the conversion overflows, the
  largest integer with the same sign as `a' is returned.
  -------------------------------------------------------------------------------
  */
-int32 float32_to_int32( float32 a )
+int32 float32_to_int32( struct roundingData *roundData, float32 a )
  {
      flag aSign;
      int16 aExp, shiftCount;
@@ -856,7 +847,7 @@ int32 float32_to_int32( float32 a )
      zSig = aSig;
      zSig <<= 32;
      if ( 0 < shiftCount ) shift64RightJamming( zSig, shiftCount, &zSig );
-    return roundAndPackInt32( aSign, zSig );
+    return roundAndPackInt32( roundData, aSign, zSig );
  
  }
  
@@ -889,13 +880,13 @@ int32 float32_to_int32_round_to_zero( float32 a )
          return 0x80000000;
      }
      else if ( aExp <= 0x7E ) {
-        if ( aExp | aSig ) float_exception_flags |= float_flag_inexact;
+        if ( aExp | aSig ) float_raise( float_flag_inexact );
          return 0;
      }
      aSig = ( aSig | 0x00800000 )<<8;
      z = aSig>>( - shiftCount );
      if ( (bits32) ( aSig<<( shiftCount & 31 ) ) ) {
-        float_exception_flags |= float_flag_inexact;
+        float_raise( float_flag_inexact );
      }
      return aSign ? - z : z;
  
@@ -973,7 +964,7 @@ operation is performed according to the IEC/IEEE Standard for Binary
  Floating-point Arithmetic.
  -------------------------------------------------------------------------------
  */
-float32 float32_round_to_int( float32 a )
+float32 float32_round_to_int( struct roundingData *roundData, float32 a )
  {
      flag aSign;
      int16 aExp;
@@ -988,11 +979,12 @@ float32 float32_round_to_int( float32 a )
          }
          return a;
      }
+    roundingMode = roundData->mode;
      if ( aExp <= 0x7E ) {
          if ( (bits32) ( a<<1 ) == 0 ) return a;
-        float_exception_flags |= float_flag_inexact;
+        roundData->exception |= float_flag_inexact;
          aSign = extractFloat32Sign( a );
-        switch ( float_rounding_mode ) {
+        switch ( roundingMode ) {
           case float_round_nearest_even:
              if ( ( aExp == 0x7E ) && extractFloat32Frac( a ) ) {
                  return packFloat32( aSign, 0x7F, 0 );
@@ -1009,7 +1001,6 @@ float32 float32_round_to_int( float32 a )
      lastBitMask <<= 0x96 - aExp;
      roundBitsMask = lastBitMask - 1;
      z = a;
-    roundingMode = float_rounding_mode;
      if ( roundingMode == float_round_nearest_even ) {
          z += lastBitMask>>1;
          if ( ( z & roundBitsMask ) == 0 ) z &= ~ lastBitMask;
@@ -1020,7 +1011,7 @@ float32 float32_round_to_int( float32 a )
          }
      }
      z &= ~ roundBitsMask;
-    if ( z != a ) float_exception_flags |= float_flag_inexact;
+    if ( z != a ) roundData->exception |= float_flag_inexact;
      return z;
  
  }
@@ -1034,7 +1025,7 @@ addition is performed according to the IEC/IEEE Standard for Binary
  Floating-point Arithmetic.
  -------------------------------------------------------------------------------
  */
-static float32 addFloat32Sigs( float32 a, float32 b, flag zSign )
+static float32 addFloat32Sigs( struct roundingData *roundData, float32 a, float32 b, flag zSign )
  {
      int16 aExp, bExp, zExp;
      bits32 aSig, bSig, zSig;
@@ -1093,7 +1084,7 @@ static float32 addFloat32Sigs( float32 a, float32 b, flag zSign )
          ++zExp;
      }
   roundAndPack:
-    return roundAndPackFloat32( zSign, zExp, zSig );
+    return roundAndPackFloat32( roundData, zSign, zExp, zSig );
  
  }
  
@@ -1106,7 +1097,7 @@ result is a NaN.  The subtraction is performed according to the IEC/IEEE
  Standard for Binary Floating-point Arithmetic.
  -------------------------------------------------------------------------------
  */
-static float32 subFloat32Sigs( float32 a, float32 b, flag zSign )
+static float32 subFloat32Sigs( struct roundingData *roundData, float32 a, float32 b, flag zSign )
  {
      int16 aExp, bExp, zExp;
      bits32 aSig, bSig, zSig;
@@ -1123,7 +1114,7 @@ static float32 subFloat32Sigs( float32 a, float32 b, flag zSign )
      if ( expDiff < 0 ) goto bExpBigger;
      if ( aExp == 0xFF ) {
          if ( aSig | bSig ) return propagateFloat32NaN( a, b );
-        float_raise( float_flag_invalid );
+        roundData->exception |= float_flag_invalid;
          return float32_default_nan;
      }
      if ( aExp == 0 ) {
@@ -1132,7 +1123,7 @@ static float32 subFloat32Sigs( float32 a, float32 b, flag zSign )
      }
      if ( bSig < aSig ) goto aBigger;
      if ( aSig < bSig ) goto bBigger;
-    return packFloat32( float_rounding_mode == float_round_down, 0, 0 );
+    return packFloat32( roundData->mode == float_round_down, 0, 0 );
   bExpBigger:
      if ( bExp == 0xFF ) {
          if ( bSig ) return propagateFloat32NaN( a, b );
@@ -1169,7 +1160,7 @@ static float32 subFloat32Sigs( float32 a, float32 b, flag zSign )
      zExp = aExp;
   normalizeRoundAndPack:
      --zExp;
-    return normalizeRoundAndPackFloat32( zSign, zExp, zSig );
+    return normalizeRoundAndPackFloat32( roundData, zSign, zExp, zSig );
  
  }
  
@@ -1180,17 +1171,17 @@ and `b'.  The operation is performed according to the IEC/IEEE Standard for
  Binary Floating-point Arithmetic.
  -------------------------------------------------------------------------------
  */
-float32 float32_add( float32 a, float32 b )
+float32 float32_add( struct roundingData *roundData, float32 a, float32 b )
  {
      flag aSign, bSign;
  
      aSign = extractFloat32Sign( a );
      bSign = extractFloat32Sign( b );
      if ( aSign == bSign ) {
-        return addFloat32Sigs( a, b, aSign );
+        return addFloat32Sigs( roundData, a, b, aSign );
      }
      else {
-        return subFloat32Sigs( a, b, aSign );
+        return subFloat32Sigs( roundData, a, b, aSign );
      }
  
  }
@@ -1202,17 +1193,17 @@ Returns the result of subtracting the single-precision floating-point values
  for Binary Floating-point Arithmetic.
  -------------------------------------------------------------------------------
  */
-float32 float32_sub( float32 a, float32 b )
+float32 float32_sub( struct roundingData *roundData, float32 a, float32 b )
  {
      flag aSign, bSign;
  
      aSign = extractFloat32Sign( a );
      bSign = extractFloat32Sign( b );
      if ( aSign == bSign ) {
-        return subFloat32Sigs( a, b, aSign );
+        return subFloat32Sigs( roundData, a, b, aSign );
      }
      else {
-        return addFloat32Sigs( a, b, aSign );
+        return addFloat32Sigs( roundData, a, b, aSign );
      }
  
  }
@@ -1224,7 +1215,7 @@ Returns the result of multiplying the single-precision floating-point values
  for Binary Floating-point Arithmetic.
  -------------------------------------------------------------------------------
  */
-float32 float32_mul( float32 a, float32 b )
+float32 float32_mul( struct roundingData *roundData, float32 a, float32 b )
  {
      flag aSign, bSign, zSign;
      int16 aExp, bExp, zExp;
@@ -1244,7 +1235,7 @@ float32 float32_mul( float32 a, float32 b )
              return propagateFloat32NaN( a, b );
          }
          if ( ( bExp | bSig ) == 0 ) {
-            float_raise( float_flag_invalid );
+            roundData->exception |= float_flag_invalid;
              return float32_default_nan;
          }
          return packFloat32( zSign, 0xFF, 0 );
@@ -1252,7 +1243,7 @@ float32 float32_mul( float32 a, float32 b )
      if ( bExp == 0xFF ) {
          if ( bSig ) return propagateFloat32NaN( a, b );
          if ( ( aExp | aSig ) == 0 ) {
-            float_raise( float_flag_invalid );
+            roundData->exception |= float_flag_invalid;
              return float32_default_nan;
          }
          return packFloat32( zSign, 0xFF, 0 );
@@ -1274,7 +1265,7 @@ float32 float32_mul( float32 a, float32 b )
          zSig <<= 1;
          --zExp;
      }
-    return roundAndPackFloat32( zSign, zExp, zSig );
+    return roundAndPackFloat32( roundData, zSign, zExp, zSig );
  
  }
  
@@ -1285,7 +1276,7 @@ by the corresponding value `b'.  The operation is performed according to the
  IEC/IEEE Standard for Binary Floating-point Arithmetic.
  -------------------------------------------------------------------------------
  */
-float32 float32_div( float32 a, float32 b )
+float32 float32_div( struct roundingData *roundData, float32 a, float32 b )
  {
      flag aSign, bSign, zSign;
      int16 aExp, bExp, zExp;
@@ -1302,7 +1293,7 @@ float32 float32_div( float32 a, float32 b )
          if ( aSig ) return propagateFloat32NaN( a, b );
          if ( bExp == 0xFF ) {
              if ( bSig ) return propagateFloat32NaN( a, b );
-            float_raise( float_flag_invalid );
+            roundData->exception |= float_flag_invalid;
              return float32_default_nan;
          }
          return packFloat32( zSign, 0xFF, 0 );
@@ -1314,10 +1305,10 @@ float32 float32_div( float32 a, float32 b )
      if ( bExp == 0 ) {
          if ( bSig == 0 ) {
              if ( ( aExp | aSig ) == 0 ) {
-                float_raise( float_flag_invalid );
+                roundData->exception |= float_flag_invalid;
                  return float32_default_nan;
              }
-            float_raise( float_flag_divbyzero );
+            roundData->exception |= float_flag_divbyzero;
              return packFloat32( zSign, 0xFF, 0 );
          }
          normalizeFloat32Subnormal( bSig, &bExp, &bSig );
@@ -1341,7 +1332,7 @@ float32 float32_div( float32 a, float32 b )
      if ( ( zSig & 0x3F ) == 0 ) {
          zSig |= ( ( (bits64) bSig ) * zSig != ( (bits64) aSig )<<32 );
      }
-    return roundAndPackFloat32( zSign, zExp, zSig );
+    return roundAndPackFloat32( roundData, zSign, zExp, zSig );
  
  }
  
@@ -1352,7 +1343,7 @@ with respect to the corresponding value `b'.  The operation is performed
  according to the IEC/IEEE Standard for Binary Floating-point Arithmetic.
  -------------------------------------------------------------------------------
  */
-float32 float32_rem( float32 a, float32 b )
+float32 float32_rem( struct roundingData *roundData, float32 a, float32 b )
  {
      flag aSign, bSign, zSign;
      int16 aExp, bExp, expDiff;
@@ -1372,7 +1363,7 @@ float32 float32_rem( float32 a, float32 b )
          if ( aSig || ( ( bExp == 0xFF ) && bSig ) ) {
              return propagateFloat32NaN( a, b );
          }
-        float_raise( float_flag_invalid );
+        roundData->exception |= float_flag_invalid;
          return float32_default_nan;
      }
      if ( bExp == 0xFF ) {
@@ -1381,7 +1372,7 @@ float32 float32_rem( float32 a, float32 b )
      }
      if ( bExp == 0 ) {
          if ( bSig == 0 ) {
-            float_raise( float_flag_invalid );
+            roundData->exception |= float_flag_invalid;
              return float32_default_nan;
          }
          normalizeFloat32Subnormal( bSig, &bExp, &bSig );
@@ -1444,7 +1435,7 @@ float32 float32_rem( float32 a, float32 b )
      }
      zSign = ( (sbits32) aSig < 0 );
      if ( zSign ) aSig = - aSig;
-    return normalizeRoundAndPackFloat32( aSign ^ zSign, bExp, aSig );
+    return normalizeRoundAndPackFloat32( roundData, aSign ^ zSign, bExp, aSig );
  
  }
  
@@ -1455,7 +1446,7 @@ The operation is performed according to the IEC/IEEE Standard for Binary
  Floating-point Arithmetic.
  -------------------------------------------------------------------------------
  */
-float32 float32_sqrt( float32 a )
+float32 float32_sqrt( struct roundingData *roundData, float32 a )
  {
      flag aSign;
      int16 aExp, zExp;
@@ -1468,12 +1459,12 @@ float32 float32_sqrt( float32 a )
      if ( aExp == 0xFF ) {
          if ( aSig ) return propagateFloat32NaN( a, 0 );
          if ( ! aSign ) return a;
-        float_raise( float_flag_invalid );
+        roundData->exception |= float_flag_invalid;
          return float32_default_nan;
      }
      if ( aSign ) {
          if ( ( aExp | aSig ) == 0 ) return a;
-        float_raise( float_flag_invalid );
+        roundData->exception |= float_flag_invalid;
          return float32_default_nan;
      }
      if ( aExp == 0 ) {
@@ -1499,7 +1490,7 @@ float32 float32_sqrt( float32 a )
          }
      }
      shift32RightJamming( zSig, 1, &zSig );
-    return roundAndPackFloat32( 0, zExp, zSig );
+    return roundAndPackFloat32( roundData, 0, zExp, zSig );
  
  }
  
@@ -1611,9 +1602,7 @@ flag float32_le_quiet( float32 a, float32 b )
      if (    ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
           || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
         ) {
-        if ( float32_is_signaling_nan( a ) || float32_is_signaling_nan( b ) ) {
-            float_raise( float_flag_invalid );
-        }
+        /* Do nothing, even if NaN as we're quiet */
          return 0;
      }
      aSign = extractFloat32Sign( a );
@@ -1638,9 +1627,7 @@ flag float32_lt_quiet( float32 a, float32 b )
      if (    ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
           || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
         ) {
-        if ( float32_is_signaling_nan( a ) || float32_is_signaling_nan( b ) ) {
-            float_raise( float_flag_invalid );
-        }
+        /* Do nothing, even if NaN as we're quiet */
          return 0;
      }
      aSign = extractFloat32Sign( a );
@@ -1661,7 +1648,7 @@ positive integer is returned.  Otherwise, if the conversion overflows, the
  largest integer with the same sign as `a' is returned.
  -------------------------------------------------------------------------------
  */
-int32 float64_to_int32( float64 a )
+int32 float64_to_int32( struct roundingData *roundData, float64 a )
  {
      flag aSign;
      int16 aExp, shiftCount;
@@ -1674,7 +1661,7 @@ int32 float64_to_int32( float64 a )
      if ( aExp ) aSig |= LIT64( 0x0010000000000000 );
      shiftCount = 0x42C - aExp;
      if ( 0 < shiftCount ) shift64RightJamming( aSig, shiftCount, &aSig );
-    return roundAndPackInt32( aSign, aSig );
+    return roundAndPackInt32( roundData, aSign, aSig );
  
  }
  
@@ -1705,7 +1692,7 @@ int32 float64_to_int32_round_to_zero( float64 a )
          goto invalid;
      }
      else if ( 52 < shiftCount ) {
-        if ( aExp || aSig ) float_exception_flags |= float_flag_inexact;
+        if ( aExp || aSig ) float_raise( float_flag_inexact );
          return 0;
      }
      aSig |= LIT64( 0x0010000000000000 );
@@ -1715,11 +1702,11 @@ int32 float64_to_int32_round_to_zero( float64 a )
      if ( aSign ) z = - z;
      if ( ( z < 0 ) ^ aSign ) {
   invalid:
-        float_exception_flags |= float_flag_invalid;
+        float_raise( float_flag_invalid );
          return aSign ? 0x80000000 : 0x7FFFFFFF;
      }
      if ( ( aSig<<shiftCount ) != savedASig ) {
-        float_exception_flags |= float_flag_inexact;
+        float_raise( float_flag_inexact );
      }
      return z;
  
@@ -1736,7 +1723,7 @@ positive integer is returned.  Otherwise, if the conversion overflows, the
  largest positive integer is returned.
  -------------------------------------------------------------------------------
  */
-int32 float64_to_uint32( float64 a )
+int32 float64_to_uint32( struct roundingData *roundData, float64 a )
  {
      flag aSign;
      int16 aExp, shiftCount;
@@ -1749,7 +1736,7 @@ int32 float64_to_uint32( float64 a )
      if ( aExp ) aSig |= LIT64( 0x0010000000000000 );
      shiftCount = 0x42C - aExp;
      if ( 0 < shiftCount ) shift64RightJamming( aSig, shiftCount, &aSig );
-    return roundAndPackInt32( aSign, aSig );
+    return roundAndPackInt32( roundData, aSign, aSig );
  }
  
  /*
@@ -1778,7 +1765,7 @@ int32 float64_to_uint32_round_to_zero( float64 a )
          goto invalid;
      }
      else if ( 52 < shiftCount ) {
-        if ( aExp || aSig ) float_exception_flags |= float_flag_inexact;
+        if ( aExp || aSig ) float_raise( float_flag_inexact );
          return 0;
      }
      aSig |= LIT64( 0x0010000000000000 );
@@ -1788,11 +1775,11 @@ int32 float64_to_uint32_round_to_zero( float64 a )
      if ( aSign ) z = - z;
      if ( ( z < 0 ) ^ aSign ) {
   invalid:
-        float_exception_flags |= float_flag_invalid;
+        float_raise( float_flag_invalid );
          return aSign ? 0x80000000 : 0x7FFFFFFF;
      }
      if ( ( aSig<<shiftCount ) != savedASig ) {
-        float_exception_flags |= float_flag_inexact;
+        float_raise( float_flag_inexact );
      }
      return z;
  }
@@ -1805,7 +1792,7 @@ performed according to the IEC/IEEE Standard for Binary Floating-point
  Arithmetic.
  -------------------------------------------------------------------------------
  */
-float32 float64_to_float32( float64 a )
+float32 float64_to_float32( struct roundingData *roundData, float64 a )
  {
      flag aSign;
      int16 aExp;
@@ -1825,7 +1812,7 @@ float32 float64_to_float32( float64 a )
          zSig |= 0x40000000;
          aExp -= 0x381;
      }
-    return roundAndPackFloat32( aSign, aExp, zSig );
+    return roundAndPackFloat32( roundData, aSign, aExp, zSig );
  
  }
  
@@ -1872,7 +1859,7 @@ operation is performed according to the IEC/IEEE Standard for Binary
  Floating-point Arithmetic.
  -------------------------------------------------------------------------------
  */
-float64 float64_round_to_int( float64 a )
+float64 float64_round_to_int( struct roundingData *roundData, float64 a )
  {
      flag aSign;
      int16 aExp;
@@ -1889,9 +1876,9 @@ float64 float64_round_to_int( float64 a )
      }
      if ( aExp <= 0x3FE ) {
          if ( (bits64) ( a<<1 ) == 0 ) return a;
-        float_exception_flags |= float_flag_inexact;
+        roundData->exception |= float_flag_inexact;
          aSign = extractFloat64Sign( a );
-        switch ( float_rounding_mode ) {
+        switch ( roundData->mode ) {
           case float_round_nearest_even:
              if ( ( aExp == 0x3FE ) && extractFloat64Frac( a ) ) {
                  return packFloat64( aSign, 0x3FF, 0 );
@@ -1909,7 +1896,7 @@ float64 float64_round_to_int( float64 a )
      lastBitMask <<= 0x433 - aExp;
      roundBitsMask = lastBitMask - 1;
      z = a;
-    roundingMode = float_rounding_mode;
+    roundingMode = roundData->mode;
      if ( roundingMode == float_round_nearest_even ) {
          z += lastBitMask>>1;
          if ( ( z & roundBitsMask ) == 0 ) z &= ~ lastBitMask;
@@ -1920,7 +1907,7 @@ float64 float64_round_to_int( float64 a )
          }
      }
      z &= ~ roundBitsMask;
-    if ( z != a ) float_exception_flags |= float_flag_inexact;
+    if ( z != a ) roundData->exception |= float_flag_inexact;
      return z;
  
  }
@@ -1934,7 +1921,7 @@ addition is performed according to the IEC/IEEE Standard for Binary
  Floating-point Arithmetic.
  -------------------------------------------------------------------------------
  */
-static float64 addFloat64Sigs( float64 a, float64 b, flag zSign )
+static float64 addFloat64Sigs( struct roundingData *roundData, float64 a, float64 b, flag zSign )
  {
      int16 aExp, bExp, zExp;
      bits64 aSig, bSig, zSig;
@@ -1993,7 +1980,7 @@ static float64 addFloat64Sigs( float64 a, float64 b, flag zSign )
          ++zExp;
      }
   roundAndPack:
-    return roundAndPackFloat64( zSign, zExp, zSig );
+    return roundAndPackFloat64( roundData, zSign, zExp, zSig );
  
  }
  
@@ -2006,7 +1993,7 @@ result is a NaN.  The subtraction is performed according to the IEC/IEEE
  Standard for Binary Floating-point Arithmetic.
  -------------------------------------------------------------------------------
  */
-static float64 subFloat64Sigs( float64 a, float64 b, flag zSign )
+static float64 subFloat64Sigs( struct roundingData *roundData, float64 a, float64 b, flag zSign )
  {
      int16 aExp, bExp, zExp;
      bits64 aSig, bSig, zSig;
@@ -2023,7 +2010,7 @@ static float64 subFloat64Sigs( float64 a, float64 b, flag zSign )
      if ( expDiff < 0 ) goto bExpBigger;
      if ( aExp == 0x7FF ) {
          if ( aSig | bSig ) return propagateFloat64NaN( a, b );
-        float_raise( float_flag_invalid );
+        roundData->exception |= float_flag_invalid;
          return float64_default_nan;
      }
      if ( aExp == 0 ) {
@@ -2032,7 +2019,7 @@ static float64 subFloat64Sigs( float64 a, float64 b, flag zSign )
      }
      if ( bSig < aSig ) goto aBigger;
      if ( aSig < bSig ) goto bBigger;
-    return packFloat64( float_rounding_mode == float_round_down, 0, 0 );
+    return packFloat64( roundData->mode == float_round_down, 0, 0 );
   bExpBigger:
      if ( bExp == 0x7FF ) {
          if ( bSig ) return propagateFloat64NaN( a, b );
@@ -2069,7 +2056,7 @@ static float64 subFloat64Sigs( float64 a, float64 b, flag zSign )
      zExp = aExp;
   normalizeRoundAndPack:
      --zExp;
-    return normalizeRoundAndPackFloat64( zSign, zExp, zSig );
+    return normalizeRoundAndPackFloat64( roundData, zSign, zExp, zSig );
  
  }
  
@@ -2080,17 +2067,17 @@ and `b'.  The operation is performed according to the IEC/IEEE Standard for
  Binary Floating-point Arithmetic.
  -------------------------------------------------------------------------------
  */
-float64 float64_add( float64 a, float64 b )
+float64 float64_add( struct roundingData *roundData, float64 a, float64 b )
  {
      flag aSign, bSign;
  
      aSign = extractFloat64Sign( a );
      bSign = extractFloat64Sign( b );
      if ( aSign == bSign ) {
-        return addFloat64Sigs( a, b, aSign );
+        return addFloat64Sigs( roundData, a, b, aSign );
      }
      else {
-        return subFloat64Sigs( a, b, aSign );
+        return subFloat64Sigs( roundData, a, b, aSign );
      }
  
  }
@@ -2102,17 +2089,17 @@ Returns the result of subtracting the double-precision floating-point values
  for Binary Floating-point Arithmetic.
  -------------------------------------------------------------------------------
  */
-float64 float64_sub( float64 a, float64 b )
+float64 float64_sub( struct roundingData *roundData, float64 a, float64 b )
  {
      flag aSign, bSign;
  
      aSign = extractFloat64Sign( a );
      bSign = extractFloat64Sign( b );
      if ( aSign == bSign ) {
-        return subFloat64Sigs( a, b, aSign );
+        return subFloat64Sigs( roundData, a, b, aSign );
      }
      else {
-        return addFloat64Sigs( a, b, aSign );
+        return addFloat64Sigs( roundData, a, b, aSign );
      }
  
  }
@@ -2124,7 +2111,7 @@ Returns the result of multiplying the double-precision floating-point values
  for Binary Floating-point Arithmetic.
  -------------------------------------------------------------------------------
  */
-float64 float64_mul( float64 a, float64 b )
+float64 float64_mul( struct roundingData *roundData, float64 a, float64 b )
  {
      flag aSign, bSign, zSign;
      int16 aExp, bExp, zExp;
@@ -2142,7 +2129,7 @@ float64 float64_mul( float64 a, float64 b )
              return propagateFloat64NaN( a, b );
          }
          if ( ( bExp | bSig ) == 0 ) {
-            float_raise( float_flag_invalid );
+            roundData->exception |= float_flag_invalid;
              return float64_default_nan;
          }
          return packFloat64( zSign, 0x7FF, 0 );
@@ -2150,7 +2137,7 @@ float64 float64_mul( float64 a, float64 b )
      if ( bExp == 0x7FF ) {
          if ( bSig ) return propagateFloat64NaN( a, b );
          if ( ( aExp | aSig ) == 0 ) {
-            float_raise( float_flag_invalid );
+            roundData->exception |= float_flag_invalid;
              return float64_default_nan;
          }
          return packFloat64( zSign, 0x7FF, 0 );
@@ -2172,7 +2159,7 @@ float64 float64_mul( float64 a, float64 b )
          zSig0 <<= 1;
          --zExp;
      }
-    return roundAndPackFloat64( zSign, zExp, zSig0 );
+    return roundAndPackFloat64( roundData, zSign, zExp, zSig0 );
  
  }
  
@@ -2183,7 +2170,7 @@ by the corresponding value `b'.  The operation is performed according to
  the IEC/IEEE Standard for Binary Floating-point Arithmetic.
  -------------------------------------------------------------------------------
  */
-float64 float64_div( float64 a, float64 b )
+float64 float64_div( struct roundingData *roundData, float64 a, float64 b )
  {
      flag aSign, bSign, zSign;
      int16 aExp, bExp, zExp;
@@ -2202,7 +2189,7 @@ float64 float64_div( float64 a, float64 b )
          if ( aSig ) return propagateFloat64NaN( a, b );
          if ( bExp == 0x7FF ) {
              if ( bSig ) return propagateFloat64NaN( a, b );
-            float_raise( float_flag_invalid );
+            roundData->exception |= float_flag_invalid;
              return float64_default_nan;
          }
          return packFloat64( zSign, 0x7FF, 0 );
@@ -2214,10 +2201,10 @@ float64 float64_div( float64 a, float64 b )
      if ( bExp == 0 ) {
          if ( bSig == 0 ) {
              if ( ( aExp | aSig ) == 0 ) {
-                float_raise( float_flag_invalid );
+                roundData->exception |= float_flag_invalid;
                  return float64_default_nan;
              }
-            float_raise( float_flag_divbyzero );
+            roundData->exception |= float_flag_divbyzero;
              return packFloat64( zSign, 0x7FF, 0 );
          }
          normalizeFloat64Subnormal( bSig, &bExp, &bSig );
@@ -2243,7 +2230,7 @@ float64 float64_div( float64 a, float64 b )
          }
          zSig |= ( rem1 != 0 );
      }
-    return roundAndPackFloat64( zSign, zExp, zSig );
+    return roundAndPackFloat64( roundData, zSign, zExp, zSig );
  
  }
  
@@ -2254,7 +2241,7 @@ with respect to the corresponding value `b'.  The operation is performed
  according to the IEC/IEEE Standard for Binary Floating-point Arithmetic.
  -------------------------------------------------------------------------------
  */
-float64 float64_rem( float64 a, float64 b )
+float64 float64_rem( struct roundingData *roundData, float64 a, float64 b )
  {
      flag aSign, bSign, zSign;
      int16 aExp, bExp, expDiff;
@@ -2272,7 +2259,7 @@ float64 float64_rem( float64 a, float64 b )
          if ( aSig || ( ( bExp == 0x7FF ) && bSig ) ) {
              return propagateFloat64NaN( a, b );
          }
-        float_raise( float_flag_invalid );
+        roundData->exception |= float_flag_invalid;
          return float64_default_nan;
      }
      if ( bExp == 0x7FF ) {
@@ -2281,7 +2268,7 @@ float64 float64_rem( float64 a, float64 b )
      }
      if ( bExp == 0 ) {
          if ( bSig == 0 ) {
-            float_raise( float_flag_invalid );
+            roundData->exception |= float_flag_invalid;
              return float64_default_nan;
          }
          normalizeFloat64Subnormal( bSig, &bExp, &bSig );
@@ -2329,7 +2316,7 @@ float64 float64_rem( float64 a, float64 b )
      }
      zSign = ( (sbits64) aSig < 0 );
      if ( zSign ) aSig = - aSig;
-    return normalizeRoundAndPackFloat64( aSign ^ zSign, bExp, aSig );
+    return normalizeRoundAndPackFloat64( roundData, aSign ^ zSign, bExp, aSig );
  
  }
  
@@ -2340,7 +2327,7 @@ The operation is performed according to the IEC/IEEE Standard for Binary
  Floating-point Arithmetic.
  -------------------------------------------------------------------------------
  */
-float64 float64_sqrt( float64 a )
+float64 float64_sqrt( struct roundingData *roundData, float64 a )
  {
      flag aSign;
      int16 aExp, zExp;
@@ -2354,12 +2341,12 @@ float64 float64_sqrt( float64 a )
      if ( aExp == 0x7FF ) {
          if ( aSig ) return propagateFloat64NaN( a, a );
          if ( ! aSign ) return a;
-        float_raise( float_flag_invalid );
+        roundData->exception |= float_flag_invalid;
          return float64_default_nan;
      }
      if ( aSign ) {
          if ( ( aExp | aSig ) == 0 ) return a;
-        float_raise( float_flag_invalid );
+        roundData->exception |= float_flag_invalid;
          return float64_default_nan;
      }
      if ( aExp == 0 ) {
@@ -2390,7 +2377,7 @@ float64 float64_sqrt( float64 a )
          }
      }
      shift64RightJamming( zSig, 1, &zSig );
-    return roundAndPackFloat64( 0, zExp, zSig );
+    return roundAndPackFloat64( roundData, 0, zExp, zSig );
  
  }
  
@@ -2502,9 +2489,7 @@ flag float64_le_quiet( float64 a, float64 b )
      if (    ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) )
           || ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) )
         ) {
-        if ( float64_is_signaling_nan( a ) || float64_is_signaling_nan( b ) ) {
-            float_raise( float_flag_invalid );
-        }
+        /* Do nothing, even if NaN as we're quiet */
          return 0;
      }
      aSign = extractFloat64Sign( a );
@@ -2529,9 +2514,7 @@ flag float64_lt_quiet( float64 a, float64 b )
      if (    ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) )
           || ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) )
         ) {
-        if ( float64_is_signaling_nan( a ) || float64_is_signaling_nan( b ) ) {
-            float_raise( float_flag_invalid );
-        }
+        /* Do nothing, even if NaN as we're quiet */
          return 0;
      }
      aSign = extractFloat64Sign( a );
@@ -2554,7 +2537,7 @@ largest positive integer is returned.  Otherwise, if the conversion
  overflows, the largest integer with the same sign as `a' is returned.
  -------------------------------------------------------------------------------
  */
-int32 floatx80_to_int32( floatx80 a )
+int32 floatx80_to_int32( struct roundingData *roundData, floatx80 a )
  {
      flag aSign;
      int32 aExp, shiftCount;
@@ -2567,7 +2550,7 @@ int32 floatx80_to_int32( floatx80 a )
      shiftCount = 0x4037 - aExp;
      if ( shiftCount <= 0 ) shiftCount = 1;
      shift64RightJamming( aSig, shiftCount, &aSig );
-    return roundAndPackInt32( aSign, aSig );
+    return roundAndPackInt32( roundData, aSign, aSig );
  
  }
  
@@ -2598,7 +2581,7 @@ int32 floatx80_to_int32_round_to_zero( floatx80 a )
          goto invalid;
      }
      else if ( 63 < shiftCount ) {
-        if ( aExp || aSig ) float_exception_flags |= float_flag_inexact;
+        if ( aExp || aSig ) float_raise( float_flag_inexact );
          return 0;
      }
      savedASig = aSig;
@@ -2607,11 +2590,11 @@ int32 floatx80_to_int32_round_to_zero( floatx80 a )
      if ( aSign ) z = - z;
      if ( ( z < 0 ) ^ aSign ) {
   invalid:
-        float_exception_flags |= float_flag_invalid;
+        float_raise( float_flag_invalid );
          return aSign ? 0x80000000 : 0x7FFFFFFF;
      }
      if ( ( aSig<<shiftCount ) != savedASig ) {
-        float_exception_flags |= float_flag_inexact;
+        float_raise( float_flag_inexact );
      }
      return z;
  
@@ -2625,7 +2608,7 @@ conversion is performed according to the IEC/IEEE Standard for Binary
  Floating-point Arithmetic.
  -------------------------------------------------------------------------------
  */
-float32 floatx80_to_float32( floatx80 a )
+float32 floatx80_to_float32( struct roundingData *roundData, floatx80 a )
  {
      flag aSign;
      int32 aExp;
@@ -2642,7 +2625,7 @@ float32 floatx80_to_float32( floatx80 a )
      }
      shift64RightJamming( aSig, 33, &aSig );
      if ( aExp || aSig ) aExp -= 0x3F81;
-    return roundAndPackFloat32( aSign, aExp, aSig );
+    return roundAndPackFloat32( roundData, aSign, aExp, aSig );
  
  }
  
@@ -2654,7 +2637,7 @@ conversion is performed according to the IEC/IEEE Standard for Binary
  Floating-point Arithmetic.
  -------------------------------------------------------------------------------
  */
-float64 floatx80_to_float64( floatx80 a )
+float64 floatx80_to_float64( struct roundingData *roundData, floatx80 a )
  {
      flag aSign;
      int32 aExp;
@@ -2671,7 +2654,7 @@ float64 floatx80_to_float64( floatx80 a )
      }
      shift64RightJamming( aSig, 1, &zSig );
      if ( aExp || aSig ) aExp -= 0x3C01;
-    return roundAndPackFloat64( aSign, aExp, zSig );
+    return roundAndPackFloat64( roundData, aSign, aExp, zSig );
  
  }
  
@@ -2683,7 +2666,7 @@ value.  The operation is performed according to the IEC/IEEE Standard for
  Binary Floating-point Arithmetic.
  -------------------------------------------------------------------------------
  */
-floatx80 floatx80_round_to_int( floatx80 a )
+floatx80 floatx80_round_to_int( struct roundingData *roundData, floatx80 a )
  {
      flag aSign;
      int32 aExp;
@@ -2703,9 +2686,9 @@ floatx80 floatx80_round_to_int( floatx80 a )
               && ( (bits64) ( extractFloatx80Frac( a )<<1 ) == 0 ) ) {
              return a;
          }
-        float_exception_flags |= float_flag_inexact;
+        roundData->exception |= float_flag_inexact;
          aSign = extractFloatx80Sign( a );
-        switch ( float_rounding_mode ) {
+        switch ( roundData->mode ) {
           case float_round_nearest_even:
              if ( ( aExp == 0x3FFE ) && (bits64) ( extractFloatx80Frac( a )<<1 )
                 ) {
@@ -2729,7 +2712,7 @@ floatx80 floatx80_round_to_int( floatx80 a )
      lastBitMask <<= 0x403E - aExp;
      roundBitsMask = lastBitMask - 1;
      z = a;
-    roundingMode = float_rounding_mode;
+    roundingMode = roundData->mode;
      if ( roundingMode == float_round_nearest_even ) {
          z.low += lastBitMask>>1;
          if ( ( z.low & roundBitsMask ) == 0 ) z.low &= ~ lastBitMask;
@@ -2744,7 +2727,7 @@ floatx80 floatx80_round_to_int( floatx80 a )
          ++z.high;
          z.low = LIT64( 0x8000000000000000 );
      }
-    if ( z.low != a.low ) float_exception_flags |= float_flag_inexact;
+    if ( z.low != a.low ) roundData->exception |= float_flag_inexact;
      return z;
  
  }
@@ -2758,7 +2741,7 @@ The addition is performed according to the IEC/IEEE Standard for Binary
  Floating-point Arithmetic.
  -------------------------------------------------------------------------------
  */
-static floatx80 addFloatx80Sigs( floatx80 a, floatx80 b, flag zSign )
+static floatx80 addFloatx80Sigs( struct roundingData *roundData, floatx80 a, floatx80 b, flag zSign )
  {
      int32 aExp, bExp, zExp;
      bits64 aSig, bSig, zSig0, zSig1;
@@ -2814,7 +2797,7 @@ static floatx80 addFloatx80Sigs( floatx80 a, floatx80 b, flag zSign )
   roundAndPack:
      return
          roundAndPackFloatx80(
-            floatx80_rounding_precision, zSign, zExp, zSig0, zSig1 );
+            roundData, zSign, zExp, zSig0, zSig1 );
  
  }
  
@@ -2827,7 +2810,7 @@ result is a NaN.  The subtraction is performed according to the IEC/IEEE
  Standard for Binary Floating-point Arithmetic.
  -------------------------------------------------------------------------------
  */
-static floatx80 subFloatx80Sigs( floatx80 a, floatx80 b, flag zSign )
+static floatx80 subFloatx80Sigs( struct roundingData *roundData, floatx80 a, floatx80 b, flag zSign )
  {
      int32 aExp, bExp, zExp;
      bits64 aSig, bSig, zSig0, zSig1;
@@ -2845,7 +2828,7 @@ static floatx80 subFloatx80Sigs( floatx80 a, floatx80 b, flag zSign )
          if ( (bits64) ( ( aSig | bSig )<<1 ) ) {
              return propagateFloatx80NaN( a, b );
          }
-        float_raise( float_flag_invalid );
+        roundData->exception |= float_flag_invalid;
          z.low = floatx80_default_nan_low;
          z.high = floatx80_default_nan_high;
          return z;
@@ -2857,7 +2840,7 @@ static floatx80 subFloatx80Sigs( floatx80 a, floatx80 b, flag zSign )
      zSig1 = 0;
      if ( bSig < aSig ) goto aBigger;
      if ( aSig < bSig ) goto bBigger;
-    return packFloatx80( float_rounding_mode == float_round_down, 0, 0 );
+    return packFloatx80( roundData->mode == float_round_down, 0, 0 );
   bExpBigger:
      if ( bExp == 0x7FFF ) {
          if ( (bits64) ( bSig<<1 ) ) return propagateFloatx80NaN( a, b );
@@ -2883,7 +2866,7 @@ static floatx80 subFloatx80Sigs( floatx80 a, floatx80 b, flag zSign )
   normalizeRoundAndPack:
      return
          normalizeRoundAndPackFloatx80(
-            floatx80_rounding_precision, zSign, zExp, zSig0, zSig1 );
+            roundData, zSign, zExp, zSig0, zSig1 );
  
  }
  
@@ -2894,17 +2877,17 @@ values `a' and `b'.  The operation is performed according to the IEC/IEEE
  Standard for Binary Floating-point Arithmetic.
  -------------------------------------------------------------------------------
  */
-floatx80 floatx80_add( floatx80 a, floatx80 b )
+floatx80 floatx80_add( struct roundingData *roundData, floatx80 a, floatx80 b )
  {
      flag aSign, bSign;
      
      aSign = extractFloatx80Sign( a );
      bSign = extractFloatx80Sign( b );
      if ( aSign == bSign ) {
-        return addFloatx80Sigs( a, b, aSign );
+        return addFloatx80Sigs( roundData, a, b, aSign );
      }
      else {
-        return subFloatx80Sigs( a, b, aSign );
+        return subFloatx80Sigs( roundData, a, b, aSign );
      }
      
  }
@@ -2916,17 +2899,17 @@ point values `a' and `b'.  The operation is performed according to the
  IEC/IEEE Standard for Binary Floating-point Arithmetic.
  -------------------------------------------------------------------------------
  */
-floatx80 floatx80_sub( floatx80 a, floatx80 b )
+floatx80 floatx80_sub( struct roundingData *roundData, floatx80 a, floatx80 b )
  {
      flag aSign, bSign;
  
      aSign = extractFloatx80Sign( a );
      bSign = extractFloatx80Sign( b );
      if ( aSign == bSign ) {
-        return subFloatx80Sigs( a, b, aSign );
+        return subFloatx80Sigs( roundData, a, b, aSign );
      }
      else {
-        return addFloatx80Sigs( a, b, aSign );
+        return addFloatx80Sigs( roundData, a, b, aSign );
      }
  
  }
@@ -2938,7 +2921,7 @@ point values `a' and `b'.  The operation is performed according to the
  IEC/IEEE Standard for Binary Floating-point Arithmetic.
  -------------------------------------------------------------------------------
  */
-floatx80 floatx80_mul( floatx80 a, floatx80 b )
+floatx80 floatx80_mul( struct roundingData *roundData, floatx80 a, floatx80 b )
  {
      flag aSign, bSign, zSign;
      int32 aExp, bExp, zExp;
@@ -2964,7 +2947,7 @@ floatx80 floatx80_mul( floatx80 a, floatx80 b )
          if ( (bits64) ( bSig<<1 ) ) return propagateFloatx80NaN( a, b );
          if ( ( aExp | aSig ) == 0 ) {
   invalid:
-            float_raise( float_flag_invalid );
+            roundData->exception |= float_flag_invalid;
              z.low = floatx80_default_nan_low;
              z.high = floatx80_default_nan_high;
              return z;
@@ -2987,7 +2970,7 @@ floatx80 floatx80_mul( floatx80 a, floatx80 b )
      }
      return
          roundAndPackFloatx80(
-            floatx80_rounding_precision, zSign, zExp, zSig0, zSig1 );
+            roundData, zSign, zExp, zSig0, zSig1 );
  
  }
  
@@ -2998,7 +2981,7 @@ value `a' by the corresponding value `b'.  The operation is performed
  according to the IEC/IEEE Standard for Binary Floating-point Arithmetic.
  -------------------------------------------------------------------------------
  */
-floatx80 floatx80_div( floatx80 a, floatx80 b )
+floatx80 floatx80_div( struct roundingData *roundData, floatx80 a, floatx80 b )
  {
      flag aSign, bSign, zSign;
      int32 aExp, bExp, zExp;
@@ -3029,12 +3012,12 @@ floatx80 floatx80_div( floatx80 a, floatx80 b )
          if ( bSig == 0 ) {
              if ( ( aExp | aSig ) == 0 ) {
   invalid:
-                float_raise( float_flag_invalid );
+                roundData->exception |= float_flag_invalid;
                  z.low = floatx80_default_nan_low;
                  z.high = floatx80_default_nan_high;
                  return z;
              }
-            float_raise( float_flag_divbyzero );
+            roundData->exception |= float_flag_divbyzero;
              return packFloatx80( zSign, 0x7FFF, LIT64( 0x8000000000000000 ) );
          }
          normalizeFloatx80Subnormal( bSig, &bExp, &bSig );
@@ -3068,7 +3051,7 @@ floatx80 floatx80_div( floatx80 a, floatx80 b )
      }
      return
          roundAndPackFloatx80(
-            floatx80_rounding_precision, zSign, zExp, zSig0, zSig1 );
+            roundData, zSign, zExp, zSig0, zSig1 );
  
  }
  
@@ -3079,7 +3062,7 @@ Returns the remainder of the extended double-precision floating-point value
  according to the IEC/IEEE Standard for Binary Floating-point Arithmetic.
  -------------------------------------------------------------------------------
  */
-floatx80 floatx80_rem( floatx80 a, floatx80 b )
+floatx80 floatx80_rem( struct roundingData *roundData, floatx80 a, floatx80 b )
  {
      flag aSign, bSign, zSign;
      int32 aExp, bExp, expDiff;
@@ -3107,7 +3090,7 @@ floatx80 floatx80_rem( floatx80 a, floatx80 b )
      if ( bExp == 0 ) {
          if ( bSig == 0 ) {
   invalid:
-            float_raise( float_flag_invalid );
+            roundData->exception |= float_flag_invalid;
              z.low = floatx80_default_nan_low;
              z.high = floatx80_default_nan_high;
              return z;
@@ -3164,9 +3147,10 @@ floatx80 floatx80_rem( floatx80 a, floatx80 b )
          aSig1 = alternateASig1;
          zSign = ! zSign;
      }
+
      return
          normalizeRoundAndPackFloatx80(
-            80, zSign, bExp + expDiff, aSig0, aSig1 );
+            roundData, zSign, bExp + expDiff, aSig0, aSig1 );
  
  }
  
@@ -3177,7 +3161,7 @@ value `a'.  The operation is performed according to the IEC/IEEE Standard
  for Binary Floating-point Arithmetic.
  -------------------------------------------------------------------------------
  */
-floatx80 floatx80_sqrt( floatx80 a )
+floatx80 floatx80_sqrt( struct roundingData *roundData, floatx80 a )
  {
      flag aSign;
      int32 aExp, zExp;
@@ -3197,7 +3181,7 @@ floatx80 floatx80_sqrt( floatx80 a )
      if ( aSign ) {
          if ( ( aExp | aSig0 ) == 0 ) return a;
   invalid:
-        float_raise( float_flag_invalid );
+        roundData->exception |= float_flag_invalid;
          z.low = floatx80_default_nan_low;
          z.high = floatx80_default_nan_high;
          return z;
@@ -3242,7 +3226,7 @@ floatx80 floatx80_sqrt( floatx80 a )
      }
      return
          roundAndPackFloatx80(
-            floatx80_rounding_precision, 0, zExp, zSig0, zSig1 );
+            roundData, 0, zExp, zSig0, zSig1 );
  
  }
  
@@ -3390,10 +3374,7 @@ flag floatx80_le_quiet( floatx80 a, floatx80 b )
           || (    ( extractFloatx80Exp( b ) == 0x7FFF )
                && (bits64) ( extractFloatx80Frac( b )<<1 ) )
         ) {
-        if (    floatx80_is_signaling_nan( a )
-             || floatx80_is_signaling_nan( b ) ) {
-            float_raise( float_flag_invalid );
-        }
+        /* Do nothing, even if NaN as we're quiet */
          return 0;
      }
      aSign = extractFloatx80Sign( a );
@@ -3427,10 +3408,7 @@ flag floatx80_lt_quiet( floatx80 a, floatx80 b )
           || (    ( extractFloatx80Exp( b ) == 0x7FFF )
                && (bits64) ( extractFloatx80Frac( b )<<1 ) )
         ) {
-        if (    floatx80_is_signaling_nan( a )
-             || floatx80_is_signaling_nan( b ) ) {
-            float_raise( float_flag_invalid );
-        }
+        /* Do nothing, even if NaN as we're quiet */
          return 0;
      }
      aSign = extractFloatx80Sign( a );
diff --git a/arch/arm/nwfpe/softfloat.h b/arch/arm/nwfpe/softfloat.h

index 1e1743173899268c91d55ef3e06ab5f40661294b..1c8799b9ee4d1399d209f5b4e0ed669b5ed0949d 100644 (file)
--- a/arch/arm/nwfpe/softfloat.h
+++ b/arch/arm/nwfpe/softfloat.h
@@ -74,7 +74,7 @@ enum {
  Software IEC/IEEE floating-point rounding mode.
  -------------------------------------------------------------------------------
  */
-extern signed char float_rounding_mode;
+//extern int8 float_rounding_mode;
  enum {
      float_round_nearest_even = 0,
      float_round_to_zero      = 1,
@@ -86,7 +86,6 @@ enum {
  -------------------------------------------------------------------------------
  Software IEC/IEEE floating-point exception flags.
  -------------------------------------------------------------------------------
-extern signed char float_exception_flags;
  enum {
      float_flag_inexact   =  1,
      float_flag_underflow =  2,
@@ -99,7 +98,6 @@ ScottB: November 4, 1998
  Changed the enumeration to match the bit order in the FPA11.
  */
  
-extern signed char float_exception_flags;
  enum {
      float_flag_invalid   =  1,
      float_flag_divbyzero =  2,
@@ -121,7 +119,7 @@ void float_raise( signed char );
  Software IEC/IEEE integer-to-floating-point conversion routines.
  -------------------------------------------------------------------------------
  */
-float32 int32_to_float32( signed int );
+float32 int32_to_float32( struct roundingData *, signed int );
  float64 int32_to_float64( signed int );
  #ifdef FLOATX80
  floatx80 int32_to_floatx80( signed int );
@@ -132,7 +130,7 @@ floatx80 int32_to_floatx80( signed int );
  Software IEC/IEEE single-precision conversion routines.
  -------------------------------------------------------------------------------
  */
-signed int float32_to_int32( float32 );
+signed int float32_to_int32( struct roundingData *, float32 );
  signed int float32_to_int32_round_to_zero( float32 );
  float64 float32_to_float64( float32 );
  #ifdef FLOATX80
@@ -144,13 +142,13 @@ floatx80 float32_to_floatx80( float32 );
  Software IEC/IEEE single-precision operations.
  -------------------------------------------------------------------------------
  */
-float32 float32_round_to_int( float32 );
-float32 float32_add( float32, float32 );
-float32 float32_sub( float32, float32 );
-float32 float32_mul( float32, float32 );
-float32 float32_div( float32, float32 );
-float32 float32_rem( float32, float32 );
-float32 float32_sqrt( float32 );
+float32 float32_round_to_int( struct roundingData*, float32 );
+float32 float32_add( struct roundingData *, float32, float32 );
+float32 float32_sub( struct roundingData *, float32, float32 );
+float32 float32_mul( struct roundingData *, float32, float32 );
+float32 float32_div( struct roundingData *, float32, float32 );
+float32 float32_rem( struct roundingData *, float32, float32 );
+float32 float32_sqrt( struct roundingData*, float32 );
  char float32_eq( float32, float32 );
  char float32_le( float32, float32 );
  char float32_lt( float32, float32 );
@@ -164,9 +162,9 @@ char float32_is_signaling_nan( float32 );
  Software IEC/IEEE double-precision conversion routines.
  -------------------------------------------------------------------------------
  */
-signed int float64_to_int32( float64 );
+signed int float64_to_int32( struct roundingData *, float64 );
  signed int float64_to_int32_round_to_zero( float64 );
-float32 float64_to_float32( float64 );
+float32 float64_to_float32( struct roundingData *, float64 );
  #ifdef FLOATX80
  floatx80 float64_to_floatx80( float64 );
  #endif
@@ -176,13 +174,13 @@ floatx80 float64_to_floatx80( float64 );
  Software IEC/IEEE double-precision operations.
  -------------------------------------------------------------------------------
  */
-float64 float64_round_to_int( float64 );
-float64 float64_add( float64, float64 );
-float64 float64_sub( float64, float64 );
-float64 float64_mul( float64, float64 );
-float64 float64_div( float64, float64 );
-float64 float64_rem( float64, float64 );
-float64 float64_sqrt( float64 );
+float64 float64_round_to_int( struct roundingData *, float64 );
+float64 float64_add( struct roundingData *, float64, float64 );
+float64 float64_sub( struct roundingData *, float64, float64 );
+float64 float64_mul( struct roundingData *, float64, float64 );
+float64 float64_div( struct roundingData *, float64, float64 );
+float64 float64_rem( struct roundingData *, float64, float64 );
+float64 float64_sqrt( struct roundingData *, float64 );
  char float64_eq( float64, float64 );
  char float64_le( float64, float64 );
  char float64_lt( float64, float64 );
@@ -198,31 +196,23 @@ char float64_is_signaling_nan( float64 );
  Software IEC/IEEE extended double-precision conversion routines.
  -------------------------------------------------------------------------------
  */
-signed int floatx80_to_int32( floatx80 );
+signed int floatx80_to_int32( struct roundingData *, floatx80 );
  signed int floatx80_to_int32_round_to_zero( floatx80 );
-float32 floatx80_to_float32( floatx80 );
-float64 floatx80_to_float64( floatx80 );
-
-/*
--------------------------------------------------------------------------------
-Software IEC/IEEE extended double-precision rounding precision.  Valid
-values are 32, 64, and 80.
--------------------------------------------------------------------------------
-*/
-extern signed char floatx80_rounding_precision;
+float32 floatx80_to_float32( struct roundingData *, floatx80 );
+float64 floatx80_to_float64( struct roundingData *, floatx80 );
  
  /*
  -------------------------------------------------------------------------------
  Software IEC/IEEE extended double-precision operations.
  -------------------------------------------------------------------------------
  */
-floatx80 floatx80_round_to_int( floatx80 );
-floatx80 floatx80_add( floatx80, floatx80 );
-floatx80 floatx80_sub( floatx80, floatx80 );
-floatx80 floatx80_mul( floatx80, floatx80 );
-floatx80 floatx80_div( floatx80, floatx80 );
-floatx80 floatx80_rem( floatx80, floatx80 );
-floatx80 floatx80_sqrt( floatx80 );
+floatx80 floatx80_round_to_int( struct roundingData *, floatx80 );
+floatx80 floatx80_add( struct roundingData *, floatx80, floatx80 );
+floatx80 floatx80_sub( struct roundingData *, floatx80, floatx80 );
+floatx80 floatx80_mul( struct roundingData *, floatx80, floatx80 );
+floatx80 floatx80_div( struct roundingData *, floatx80, floatx80 );
+floatx80 floatx80_rem( struct roundingData *, floatx80, floatx80 );
+floatx80 floatx80_sqrt( struct roundingData *, floatx80 );
  char floatx80_eq( floatx80, floatx80 );
  char floatx80_le( floatx80, floatx80 );
  char floatx80_lt( floatx80, floatx80 );
diff --git a/arch/arm/oprofile/backtrace.c b/arch/arm/oprofile/backtrace.c

index ec58d3e2eb8bef453dd8b7ba17db4d672c3debd9..df35c452a8bf6528c3cf92ee892af205350d7b50 100644 (file)
--- a/arch/arm/oprofile/backtrace.c
+++ b/arch/arm/oprofile/backtrace.c
@@ -115,7 +115,7 @@ static int valid_kernel_stack(struct frame_tail *tail, struct pt_regs *regs)
         return (tailaddr > stack) && (tailaddr < stack_base);
  }
  
-void arm_backtrace(struct pt_regs const *regs, unsigned int depth)
+void arm_backtrace(struct pt_regs * const regs, unsigned int depth)
  {
         struct frame_tail *tail;
         unsigned long last_address = 0;
diff --git a/arch/arm/vfp/vfpdouble.c b/arch/arm/vfp/vfpdouble.c

index b801cd66b6eadaed73befcd8da0363871340a90c..9b367a65cb4d65caec8afedf39b53c2cb99b0159 100644 (file)
--- a/arch/arm/vfp/vfpdouble.c
+++ b/arch/arm/vfp/vfpdouble.c
@@ -770,6 +770,9 @@ vfp_double_add(struct vfp_double *vdd, struct vfp_double *vdn,
                 if ((s64)m_sig < 0) {
                         vdd->sign = vfp_sign_negate(vdd->sign);
                         m_sig = -m_sig;
+               } else if (m_sig == 0) {
+                       vdd->sign = (fpscr & FPSCR_RMODE_MASK) ==
+                                     FPSCR_ROUND_MINUSINF ? 0x8000 : 0;
                 }
         } else {
                 m_sig += vdn->significand;
diff --git a/arch/arm26/kernel/signal.c b/arch/arm26/kernel/signal.c

index 356d9809cc0bb5ad4ba011025889061a92d711ea..ce2055bdc9ee82c342e35f63ca98835d66c9715b 100644 (file)
--- a/arch/arm26/kernel/signal.c
+++ b/arch/arm26/kernel/signal.c
@@ -454,14 +454,13 @@ handle_signal(unsigned long sig, siginfo_t *info, sigset_t *oldset,
                 if (ka->sa.sa_flags & SA_ONESHOT)
                         ka->sa.sa_handler = SIG_DFL;
  
-               if (!(ka->sa.sa_flags & SA_NODEFER)) {
-                       spin_lock_irq(&tsk->sighand->siglock);
-                       sigorsets(&tsk->blocked, &tsk->blocked,
-                                 &ka->sa.sa_mask);
+               spin_lock_irq(&tsk->sighand->siglock);
+               sigorsets(&tsk->blocked, &tsk->blocked,
+                         &ka->sa.sa_mask);
+               if (!(ka->sa.sa_flags & SA_NODEFER))
                         sigaddset(&tsk->blocked, sig);
-                       recalc_sigpending();
-                       spin_unlock_irq(&tsk->sighand->siglock);
-               }
+               recalc_sigpending();
+               spin_unlock_irq(&tsk->sighand->siglock);
                 return;
         }
  
diff --git a/arch/arm26/mm/fault.c b/arch/arm26/mm/fault.c

index dacca8bb7744d5e5e44290cd5df963ef5f400b91..bd6f2db608b76ecde7dce3e02c26d70c386e7cc6 100644 (file)
--- a/arch/arm26/mm/fault.c
+++ b/arch/arm26/mm/fault.c
@@ -176,12 +176,12 @@ survive:
          * Handle the "normal" cases first - successful and sigbus
          */
         switch (fault) {
-       case 2:
+       case VM_FAULT_MAJOR:
                 tsk->maj_flt++;
                 return fault;
-       case 1:
+       case VM_FAULT_MINOR:
                 tsk->min_flt++;
-       case 0:
+       case VM_FAULT_SIGBUS:
                 return fault;
         }
  
@@ -226,14 +226,11 @@ int do_page_fault(unsigned long addr, unsigned int fsr, struct pt_regs *regs)
         /*
          * Handle the "normal" case first
          */
-       if (fault > 0)
+       switch (fault) {
+       case VM_FAULT_MINOR:
+       case VM_FAULT_MAJOR:
                 return 0;
-
-       /*
-        * We had some memory, but were unable to
-        * successfully fix up this page fault.
-        */
-       if (fault == 0){
+       case VM_FAULT_SIGBUS:
                 goto do_sigbus;
         }
  
diff --git a/arch/cris/arch-v10/kernel/signal.c b/arch/cris/arch-v10/kernel/signal.c

index 85e0032e664ffcf93e489841d4ba19ee3746e42a..693771961f859f788c4eec80bbea21df85ecf1e7 100644 (file)
--- a/arch/cris/arch-v10/kernel/signal.c
+++ b/arch/cris/arch-v10/kernel/signal.c
@@ -517,13 +517,12 @@ handle_signal(int canrestart, unsigned long sig,
         if (ka->sa.sa_flags & SA_ONESHOT)
                 ka->sa.sa_handler = SIG_DFL;
  
-       if (!(ka->sa.sa_flags & SA_NODEFER)) {
-               spin_lock_irq(&current->sighand->siglock);
-               sigorsets(&current->blocked,&current->blocked,&ka->sa.sa_mask);
+       spin_lock_irq(&current->sighand->siglock);
+       sigorsets(&current->blocked,&current->blocked,&ka->sa.sa_mask);
+       if (!(ka->sa.sa_flags & SA_NODEFER))
                 sigaddset(&current->blocked,sig);
-               recalc_sigpending();
-               spin_unlock_irq(&current->sighand->siglock);
-       }
+       recalc_sigpending();
+       spin_unlock_irq(&current->sighand->siglock);
  }
  
  /*
diff --git a/arch/cris/arch-v32/kernel/signal.c b/arch/cris/arch-v32/kernel/signal.c

index fb4c79d5b76b028840c5cf665e6807b27a86f475..0a3614dab88701b2827e26ce5858c94bf824852e 100644 (file)
--- a/arch/cris/arch-v32/kernel/signal.c
+++ b/arch/cris/arch-v32/kernel/signal.c
@@ -568,13 +568,12 @@ handle_signal(int canrestart, unsigned long sig,
         if (ka->sa.sa_flags & SA_ONESHOT)
                 ka->sa.sa_handler = SIG_DFL;
  
-       if (!(ka->sa.sa_flags & SA_NODEFER)) {
-               spin_lock_irq(&current->sighand->siglock);
-               sigorsets(&current->blocked,&current->blocked,&ka->sa.sa_mask);
+       spin_lock_irq(&current->sighand->siglock);
+       sigorsets(&current->blocked,&current->blocked,&ka->sa.sa_mask);
+       if (!(ka->sa.sa_flags & SA_NODEFER))
                 sigaddset(&current->blocked,sig);
-               recalc_sigpending();
-               spin_unlock_irq(&current->sighand->siglock);
-       }
+       recalc_sigpending();
+       spin_unlock_irq(&current->sighand->siglock);
  }
  
  /*
diff --git a/arch/cris/mm/fault.c b/arch/cris/mm/fault.c

index fe1cc36b5aca0d31914c599938dac1994e7ea07b..934c51078ccee4bafca2b68b7752b1745a63ee03 100644 (file)
--- a/arch/cris/mm/fault.c
+++ b/arch/cris/mm/fault.c
@@ -284,13 +284,13 @@ do_page_fault(unsigned long address, struct pt_regs *regs,
          */
  
         switch (handle_mm_fault(mm, vma, address, writeaccess & 1)) {
-       case 1:
+       case VM_FAULT_MINOR:
                 tsk->min_flt++;
                 break;
-       case 2:
+       case VM_FAULT_MAJOR:
                 tsk->maj_flt++;
                 break;
-       case 0:
+       case VM_FAULT_SIGBUS:
                 goto do_sigbus;
         default:
                 goto out_of_memory;
diff --git a/arch/frv/kernel/signal.c b/arch/frv/kernel/signal.c

index 36a2dffc8ebd953618a70a4f365b23ce3349ea3f..d4ccc0728dfe4b97ac7065f4b0fe344e72320873 100644 (file)
--- a/arch/frv/kernel/signal.c
+++ b/arch/frv/kernel/signal.c
@@ -506,13 +506,12 @@ static void handle_signal(unsigned long sig, siginfo_t *info,
         else
                 setup_frame(sig, ka, oldset, regs);
  
-       if (!(ka->sa.sa_flags & SA_NODEFER)) {
-               spin_lock_irq(&current->sighand->siglock);
-               sigorsets(&current->blocked, &current->blocked, &ka->sa.sa_mask);
+       spin_lock_irq(&current->sighand->siglock);
+       sigorsets(&current->blocked, &current->blocked, &ka->sa.sa_mask);
+       if (!(ka->sa.sa_flags & SA_NODEFER))
                 sigaddset(&current->blocked, sig);
-               recalc_sigpending();
-               spin_unlock_irq(&current->sighand->siglock);
-       }
+       recalc_sigpending();
+       spin_unlock_irq(&current->sighand->siglock);
  } /* end handle_signal() */
  
  /*****************************************************************************/
diff --git a/arch/frv/mm/fault.c b/arch/frv/mm/fault.c

index 41d02ac482335b1d81a3bb349c7df0137d85f417..8b3eb50c510544c57d9a0cbc4093575400ba52cf 100644 (file)
--- a/arch/frv/mm/fault.c
+++ b/arch/frv/mm/fault.c
@@ -163,13 +163,13 @@ asmlinkage void do_page_fault(int datammu, unsigned long esr0, unsigned long ear
          * the fault.
          */
         switch (handle_mm_fault(mm, vma, ear0, write)) {
-       case 1:
+       case VM_FAULT_MINOR:
                 current->min_flt++;
                 break;
-       case 2:
+       case VM_FAULT_MAJOR:
                 current->maj_flt++;
                 break;
-       case 0:
+       case VM_FAULT_SIGBUS:
                 goto do_sigbus;
         default:
                 goto out_of_memory;
diff --git a/arch/h8300/kernel/signal.c b/arch/h8300/kernel/signal.c

index 5aab87eae1f941f8373d8ea1db5882ced9b447d0..f13d5e82d4b977065727d88ce45be4f0904c7dbb 100644 (file)
--- a/arch/h8300/kernel/signal.c
+++ b/arch/h8300/kernel/signal.c
@@ -488,13 +488,12 @@ handle_signal(unsigned long sig, siginfo_t *info, struct k_sigaction *ka,
         else
                 setup_frame(sig, ka, oldset, regs);
  
-       if (!(ka->sa.sa_flags & SA_NODEFER)) {
-               spin_lock_irq(&current->sighand->siglock);
-               sigorsets(&current->blocked,&current->blocked,&ka->sa.sa_mask);
+       spin_lock_irq(&current->sighand->siglock);
+       sigorsets(&current->blocked,&current->blocked,&ka->sa.sa_mask);
+       if (!(ka->sa.sa_flags & SA_NODEFER))
                 sigaddset(&current->blocked,sig);
-               recalc_sigpending();
-               spin_unlock_irq(&current->sighand->siglock);
-       }
+       recalc_sigpending();
+       spin_unlock_irq(&current->sighand->siglock);
  }
  
  /*
diff --git a/arch/i386/Kconfig b/arch/i386/Kconfig

index a801d9d486064f9e1c631cc117ae4e81624ce1dd..619d843ba231492dfb7100d32734ea711c391e7c 100644 (file)
--- a/arch/i386/Kconfig
+++ b/arch/i386/Kconfig
@@ -454,8 +454,9 @@ config HPET_TIMER
           Choose N to continue using the legacy 8254 timer.
  
  config HPET_EMULATE_RTC
-       bool "Provide RTC interrupt"
+       bool
         depends on HPET_TIMER && RTC=y
+       default y
  
  config SMP
         bool "Symmetric multi-processing support"
diff --git a/arch/i386/kernel/apic.c b/arch/i386/kernel/apic.c

index bd1dbf3bd223cfd51021b7eec43d89b206a8bb95..a22a866de8f9db44472cc65f933383ce6589591a 100644 (file)
--- a/arch/i386/kernel/apic.c
+++ b/arch/i386/kernel/apic.c
@@ -726,15 +726,11 @@ __setup("apic=", apic_set_verbosity);
  static int __init detect_init_APIC (void)
  {
         u32 h, l, features;
-       extern void get_cpu_vendor(struct cpuinfo_x86*);
  
         /* Disabled by kernel option? */
         if (enable_local_apic < 0)
                 return -1;
  
-       /* Workaround for us being called before identify_cpu(). */
-       get_cpu_vendor(&boot_cpu_data);
-
         switch (boot_cpu_data.x86_vendor) {
         case X86_VENDOR_AMD:
                 if ((boot_cpu_data.x86 == 6 && boot_cpu_data.x86_model > 1) ||
diff --git a/arch/i386/kernel/cpu/transmeta.c b/arch/i386/kernel/cpu/transmeta.c

index f57e5ee949435279b5d4bd52efa48161bfcc0f85..fc426380366bcbbd131e7e2d7f508f2f2ca1349c 100644 (file)
--- a/arch/i386/kernel/cpu/transmeta.c
+++ b/arch/i386/kernel/cpu/transmeta.c
@@ -76,6 +76,12 @@ static void __init init_transmeta(struct cpuinfo_x86 *c)
  #define USER686 (X86_FEATURE_TSC|X86_FEATURE_CX8|X86_FEATURE_CMOV)
          if ( c->x86 == 5 && (c->x86_capability[0] & USER686) == USER686 )
                 c->x86 = 6;
+
+#ifdef CONFIG_SYSCTL
+       /* randomize_va_space slows us down enormously;
+          it probably triggers retranslation of x86->native bytecode */
+       randomize_va_space = 0;
+#endif
  }
  
  static void transmeta_identify(struct cpuinfo_x86 * c)
diff --git a/arch/i386/kernel/nmi.c b/arch/i386/kernel/nmi.c

index da6c46d667cb6fdc33d1b89dcdadfe4a41f0fe98..8c242bb1ef4571685cebeb2c11c0160a92bc9695 100644 (file)
--- a/arch/i386/kernel/nmi.c
+++ b/arch/i386/kernel/nmi.c
@@ -195,7 +195,7 @@ static void disable_lapic_nmi_watchdog(void)
                         wrmsr(MSR_P6_EVNTSEL0, 0, 0);
                         break;
                 case 15:
-                       if (boot_cpu_data.x86_model > 0x3)
+                       if (boot_cpu_data.x86_model > 0x4)
                                 break;
  
                         wrmsr(MSR_P4_IQ_CCCR0, 0, 0);
@@ -432,7 +432,7 @@ void setup_apic_nmi_watchdog (void)
                         setup_p6_watchdog();
                         break;
                 case 15:
-                       if (boot_cpu_data.x86_model > 0x3)
+                       if (boot_cpu_data.x86_model > 0x4)
                                 return;
  
                         if (!setup_p4_watchdog())
diff --git a/arch/i386/kernel/signal.c b/arch/i386/kernel/signal.c

index 89ef7adc63a4be19611f57b580fddf46259dbd73..140e340569c67711fc83f3f54cf4139d64c701aa 100644 (file)
--- a/arch/i386/kernel/signal.c
+++ b/arch/i386/kernel/signal.c
@@ -577,10 +577,11 @@ handle_signal(unsigned long sig, siginfo_t *info, struct k_sigaction *ka,
         else
                 ret = setup_frame(sig, ka, oldset, regs);
  
-       if (ret && !(ka->sa.sa_flags & SA_NODEFER)) {
+       if (ret) {
                 spin_lock_irq(&current->sighand->siglock);
                 sigorsets(&current->blocked,&current->blocked,&ka->sa.sa_mask);
-               sigaddset(&current->blocked,sig);
+               if (!(ka->sa.sa_flags & SA_NODEFER))
+                       sigaddset(&current->blocked,sig);
                 recalc_sigpending();
                 spin_unlock_irq(&current->sighand->siglock);
         }
diff --git a/arch/i386/kernel/syscall_table.S b/arch/i386/kernel/syscall_table.S

index 468500a7e8949a100fc56dfa187a2ffdb603bef9..9b21a31d4f4ec1b32b4d6c438d236c6332a19759 100644 (file)
--- a/arch/i386/kernel/syscall_table.S
+++ b/arch/i386/kernel/syscall_table.S
@@ -251,7 +251,7 @@ ENTRY(sys_call_table)
         .long sys_io_submit
         .long sys_io_cancel
         .long sys_fadvise64     /* 250 */
-       .long sys_set_zone_reclaim
+       .long sys_ni_syscall
         .long sys_exit_group
         .long sys_lookup_dcookie
         .long sys_epoll_create
diff --git a/arch/i386/kernel/traps.c b/arch/i386/kernel/traps.c

index a61f33d06ea34313b9a36a271d9fd69f2b2034db..cd2d5d5514fe0ffcdf78cadfbabdf5863ede040c 100644 (file)
--- a/arch/i386/kernel/traps.c
+++ b/arch/i386/kernel/traps.c
@@ -803,15 +803,17 @@ void math_error(void __user *eip)
          */
         cwd = get_fpu_cwd(task);
         swd = get_fpu_swd(task);
-       switch (((~cwd) & swd & 0x3f) | (swd & 0x240)) {
+       switch (swd & ~cwd & 0x3f) {
                 case 0x000:
                 default:
                         break;
                 case 0x001: /* Invalid Op */
-               case 0x041: /* Stack Fault */
-               case 0x241: /* Stack Fault | Direction */
+                       /*
+                        * swd & 0x240 == 0x040: Stack Underflow
+                        * swd & 0x240 == 0x240: Stack Overflow
+                        * User must clear the SF bit (0x40) if set
+                        */
                         info.si_code = FPE_FLTINV;
-                       /* Should we clear the SF or let user space do it ???? */
                         break;
                 case 0x002: /* Denormalize */
                 case 0x010: /* Underflow */
diff --git a/arch/i386/mach-visws/reboot.c b/arch/i386/mach-visws/reboot.c

index 9e9296676f931dfc2831f40c127933d2f772badf..5d73e042ed0a15470654a28ad1549cb4a9cbedff 100644 (file)
--- a/arch/i386/mach-visws/reboot.c
+++ b/arch/i386/mach-visws/reboot.c
@@ -9,12 +9,15 @@
  void (*pm_power_off)(void);
  EXPORT_SYMBOL(pm_power_off);
  
-void machine_restart(char * __unused)
+void machine_shutdown(void)
  {
  #ifdef CONFIG_SMP
         smp_send_stop();
  #endif
+}
  
+void machine_emergency_restart(void)
+{
         /*
          * Visual Workstations restart after this
          * register is poked on the PIIX4
@@ -22,6 +25,12 @@ void machine_restart(char * __unused)
         outb(PIIX4_RESET_VAL, PIIX4_RESET_PORT);
  }
  
+void machine_restart(char * __unused)
+{
+       machine_shutdown();
+       machine_emergency_restart();
+}
+
  void machine_power_off(void)
  {
         unsigned short pm_status;
diff --git a/arch/i386/mach-visws/setup.c b/arch/i386/mach-visws/setup.c

index 9f6d2d9b1be7b2100c518a128160136b416dc8d7..26ada6fc0d774f082f5379550ef1e9063f8a0f2d 100644 (file)
--- a/arch/i386/mach-visws/setup.c
+++ b/arch/i386/mach-visws/setup.c
@@ -14,6 +14,8 @@
  #include "cobalt.h"
  #include "piix4.h"
  
+int no_broadcast;
+
  char visws_board_type = -1;
  char visws_board_rev = -1;
  
diff --git a/arch/i386/mach-voyager/voyager_basic.c b/arch/i386/mach-voyager/voyager_basic.c

index b3eda46e0fe9d4e1a02cfc8e90c9e29b7abd1e94..c6384061328a5d72acaed91f216c26fef86a8c91 100644 (file)
--- a/arch/i386/mach-voyager/voyager_basic.c
+++ b/arch/i386/mach-voyager/voyager_basic.c
@@ -251,6 +251,12 @@ kb_wait(void)
                         break;
  }
  
+void
+machine_shutdown(void)
+{
+       /* Architecture specific shutdown needed before a kexec */
+}
+
  void
  machine_restart(char *cmd)
  {
@@ -278,6 +284,13 @@ machine_restart(char *cmd)
         }
  }
  
+void
+machine_emergency_restart(void)
+{
+       /*for now, just hook this to a warm restart */
+       machine_restart(NULL);
+}
+
  void
  mca_nmi_hook(void)
  {
diff --git a/arch/i386/mm/discontig.c b/arch/i386/mm/discontig.c

index c369a8bf7cbec4c5bee790c9ef5a06e41ceab2cd..6711ce3f6916ad3601b33d4ba5a2d694f2bd2989 100644 (file)
--- a/arch/i386/mm/discontig.c
+++ b/arch/i386/mm/discontig.c
@@ -243,14 +243,6 @@ static unsigned long calculate_numa_remap_pages(void)
                 /* now the roundup is correct, convert to PAGE_SIZE pages */
                 size = size * PTRS_PER_PTE;
  
-               if (node_end_pfn[nid] & (PTRS_PER_PTE-1)) {
-                       /*
-                        * Adjust size if node_end_pfn is not on a proper
-                        * pmd boundary. remap_numa_kva will barf otherwise.
-                        */
-                       size +=  node_end_pfn[nid] & (PTRS_PER_PTE-1);
-               }
-
                 /*
                  * Validate the region we are allocating only contains valid
                  * pages.
@@ -270,6 +262,17 @@ static unsigned long calculate_numa_remap_pages(void)
                 reserve_pages += size;
                 printk("Shrinking node %d from %ld pages to %ld pages\n",
                         nid, node_end_pfn[nid], node_end_pfn[nid] - size);
+
+               if (node_end_pfn[nid] & (PTRS_PER_PTE-1)) {
+                       /*
+                        * Align node_end_pfn[] and node_remap_start_pfn[] to
+                        * pmd boundary. remap_numa_kva will barf otherwise.
+                        */
+                       printk("Shrinking node %d further by %ld pages for proper alignment\n",
+                               nid, node_end_pfn[nid] & (PTRS_PER_PTE-1));
+                       size +=  node_end_pfn[nid] & (PTRS_PER_PTE-1);
+               }
+
                 node_end_pfn[nid] -= size;
                 node_remap_start_pfn[nid] = node_end_pfn[nid];
         }
diff --git a/arch/i386/pci/visws.c b/arch/i386/pci/visws.c

index 314c933b6b8e6ab5b49cf31727ad2defde15321f..6c17433fdf7ddb8b9d5d8dfa3d40626136933bb5 100644 (file)
--- a/arch/i386/pci/visws.c
+++ b/arch/i386/pci/visws.c
@@ -18,8 +18,10 @@
  extern struct pci_raw_ops pci_direct_conf1;
  
  static int pci_visws_enable_irq(struct pci_dev *dev) { return 0; }
+static void pci_visws_disable_irq(struct pci_dev *dev) { }
  
  int (*pcibios_enable_irq)(struct pci_dev *dev) = &pci_visws_enable_irq;
+void (*pcibios_disable_irq)(struct pci_dev *dev) = &pci_visws_disable_irq;
  
  void __init pcibios_penalize_isa_irq(int irq, int active) {}
  
diff --git a/arch/ia64/Kconfig b/arch/ia64/Kconfig

index cbb3e0cef93afa009b631d5dba79b1cdab790668..80988136f26d7dec0791f62c1df59eca73af35ea 100644 (file)
--- a/arch/ia64/Kconfig
+++ b/arch/ia64/Kconfig
@@ -392,15 +392,8 @@ menu "Bus options (PCI, PCMCIA)"
  config PCI
         bool "PCI support"
         help
-         Find out whether you have a PCI motherboard. PCI is the name of a
-         bus system, i.e. the way the CPU talks to the other stuff inside
-         your box. Other bus systems are ISA, EISA, MicroChannel (MCA) or
-         VESA. If you have PCI, say Y, otherwise N.
-
-         The PCI-HOWTO, available from
-         <http://www.tldp.org/docs.html#howto>, contains valuable
-         information about which PCI hardware does work under Linux and which
-         doesn't.
+         Real IA-64 machines all have PCI/PCI-X/PCI Express busses.  Say Y
+         here unless you are using a simulator without PCI support.
  
  config PCI_DOMAINS
         bool
diff --git a/arch/ia64/configs/sn2_defconfig b/arch/ia64/configs/sn2_defconfig

index 04d0b00a2b8c6aed16851e58009136fcaf54bc0b..dccf35c60b941845351262e42e87361993760de3 100644 (file)
--- a/arch/ia64/configs/sn2_defconfig
+++ b/arch/ia64/configs/sn2_defconfig
@@ -1,7 +1,7 @@
  #
  # Automatically generated make config: don't edit
-# Linux kernel version: 2.6.10
-# Mon Jan 10 13:57:35 2005
+# Linux kernel version: 2.6.13-rc6
+# Tue Aug 16 14:40:41 2005
  #
  
  #
@@ -10,6 +10,7 @@
  CONFIG_EXPERIMENTAL=y
  CONFIG_CLEAN_COMPILE=y
  CONFIG_LOCK_KERNEL=y
+CONFIG_INIT_ENV_ARG_LIMIT=32
  
  #
  # General setup
@@ -21,24 +22,26 @@ CONFIG_POSIX_MQUEUE=y
  # CONFIG_BSD_PROCESS_ACCT is not set
  CONFIG_SYSCTL=y
  # CONFIG_AUDIT is not set
-CONFIG_LOG_BUF_SHIFT=20
  CONFIG_HOTPLUG=y
  CONFIG_KOBJECT_UEVENT=y
  # CONFIG_IKCONFIG is not set
+CONFIG_CPUSETS=y
  # CONFIG_EMBEDDED is not set
  CONFIG_KALLSYMS=y
  CONFIG_KALLSYMS_ALL=y
  # CONFIG_KALLSYMS_EXTRA_PASS is not set
+CONFIG_PRINTK=y
+CONFIG_BUG=y
+CONFIG_BASE_FULL=y
  CONFIG_FUTEX=y
  CONFIG_EPOLL=y
-CONFIG_CPUSETS=y
-# CONFIG_CC_OPTIMIZE_FOR_SIZE is not set
  CONFIG_SHMEM=y
  CONFIG_CC_ALIGN_FUNCTIONS=0
  CONFIG_CC_ALIGN_LABELS=0
  CONFIG_CC_ALIGN_LOOPS=0
  CONFIG_CC_ALIGN_JUMPS=0
  # CONFIG_TINY_SHMEM is not set
+CONFIG_BASE_SMALL=0
  
  #
  # Loadable module support
@@ -63,9 +66,12 @@ CONFIG_GENERIC_CALIBRATE_DELAY=y
  CONFIG_TIME_INTERPOLATION=y
  CONFIG_EFI=y
  CONFIG_GENERIC_IOMAP=y
+CONFIG_SCHED_NO_NO_OMIT_FRAME_POINTER=y
+CONFIG_IA64_UNCACHED_ALLOCATOR=y
  # CONFIG_IA64_GENERIC is not set
  # CONFIG_IA64_DIG is not set
  # CONFIG_IA64_HP_ZX1 is not set
+# CONFIG_IA64_HP_ZX1_SWIOTLB is not set
  CONFIG_IA64_SGI_SN2=y
  # CONFIG_IA64_HP_SIM is not set
  # CONFIG_ITANIUM is not set
@@ -74,6 +80,10 @@ CONFIG_MCKINLEY=y
  # CONFIG_IA64_PAGE_SIZE_8KB is not set
  CONFIG_IA64_PAGE_SIZE_16KB=y
  # CONFIG_IA64_PAGE_SIZE_64KB is not set
+# CONFIG_HZ_100 is not set
+CONFIG_HZ_250=y
+# CONFIG_HZ_1000 is not set
+CONFIG_HZ=250
  CONFIG_IA64_L1_CACHE_SHIFT=7
  CONFIG_NUMA=y
  CONFIG_VIRTUAL_MEM_MAP=y
@@ -81,11 +91,20 @@ CONFIG_HOLES_IN_ZONE=y
  CONFIG_ARCH_DISCONTIGMEM_ENABLE=y
  # CONFIG_IA64_CYCLONE is not set
  CONFIG_IOSAPIC=y
+CONFIG_IA64_SGI_SN_XP=m
  CONFIG_FORCE_MAX_ZONEORDER=18
  CONFIG_SMP=y
  CONFIG_NR_CPUS=512
  # CONFIG_HOTPLUG_CPU is not set
+CONFIG_SCHED_SMT=y
  CONFIG_PREEMPT=y
+CONFIG_SELECT_MEMORY_MODEL=y
+# CONFIG_FLATMEM_MANUAL is not set
+CONFIG_DISCONTIGMEM_MANUAL=y
+# CONFIG_SPARSEMEM_MANUAL is not set
+CONFIG_DISCONTIGMEM=y
+CONFIG_FLAT_NODE_MEM_MAP=y
+CONFIG_NEED_MULTIPLE_NODES=y
  CONFIG_HAVE_DEC_LOCK=y
  CONFIG_IA32_SUPPORT=y
  CONFIG_COMPAT=y
@@ -105,6 +124,7 @@ CONFIG_BINFMT_ELF=y
  #
  # Power management and ACPI
  #
+CONFIG_PM=y
  CONFIG_ACPI=y
  
  #
@@ -114,6 +134,7 @@ CONFIG_ACPI_BOOT=y
  CONFIG_ACPI_INTERPRETER=y
  # CONFIG_ACPI_BUTTON is not set
  CONFIG_ACPI_VIDEO=m
+CONFIG_ACPI_HOTKEY=m
  # CONFIG_ACPI_FAN is not set
  # CONFIG_ACPI_PROCESSOR is not set
  CONFIG_ACPI_NUMA=y
@@ -133,6 +154,7 @@ CONFIG_PCI_DOMAINS=y
  # CONFIG_PCI_MSI is not set
  CONFIG_PCI_LEGACY_PROC=y
  CONFIG_PCI_NAMES=y
+# CONFIG_PCI_DEBUG is not set
  
  #
  # PCI Hotplug Support
@@ -141,7 +163,6 @@ CONFIG_HOTPLUG_PCI=y
  # CONFIG_HOTPLUG_PCI_FAKE is not set
  # CONFIG_HOTPLUG_PCI_ACPI is not set
  # CONFIG_HOTPLUG_PCI_CPCI is not set
-# CONFIG_HOTPLUG_PCI_PCIE is not set
  # CONFIG_HOTPLUG_PCI_SHPC is not set
  CONFIG_HOTPLUG_PCI_SGI=y
  
@@ -151,8 +172,70 @@ CONFIG_HOTPLUG_PCI_SGI=y
  # CONFIG_PCCARD is not set
  
  #
-# PC-card bridges
+# Networking
+#
+CONFIG_NET=y
+
+#
+# Networking options
+#
+CONFIG_PACKET=y
+CONFIG_PACKET_MMAP=y
+CONFIG_UNIX=y
+# CONFIG_NET_KEY is not set
+CONFIG_INET=y
+CONFIG_IP_MULTICAST=y
+# CONFIG_IP_ADVANCED_ROUTER is not set
+CONFIG_IP_FIB_HASH=y
+# CONFIG_IP_PNP is not set
+# CONFIG_NET_IPIP is not set
+# CONFIG_NET_IPGRE is not set
+# CONFIG_IP_MROUTE is not set
+# CONFIG_ARPD is not set
+CONFIG_SYN_COOKIES=y
+# CONFIG_INET_AH is not set
+# CONFIG_INET_ESP is not set
+# CONFIG_INET_IPCOMP is not set
+# CONFIG_INET_TUNNEL is not set
+CONFIG_IP_TCPDIAG=y
+# CONFIG_IP_TCPDIAG_IPV6 is not set
+# CONFIG_TCP_CONG_ADVANCED is not set
+CONFIG_TCP_CONG_BIC=y
+CONFIG_IPV6=m
+# CONFIG_IPV6_PRIVACY is not set
+# CONFIG_INET6_AH is not set
+# CONFIG_INET6_ESP is not set
+# CONFIG_INET6_IPCOMP is not set
+# CONFIG_INET6_TUNNEL is not set
+# CONFIG_IPV6_TUNNEL is not set
+# CONFIG_NETFILTER is not set
+
+#
+# SCTP Configuration (EXPERIMENTAL)
+#
+# CONFIG_IP_SCTP is not set
+# CONFIG_ATM is not set
+# CONFIG_BRIDGE is not set
+# CONFIG_VLAN_8021Q is not set
+# CONFIG_DECNET is not set
+# CONFIG_LLC2 is not set
+# CONFIG_IPX is not set
+# CONFIG_ATALK is not set
+# CONFIG_X25 is not set
+# CONFIG_LAPB is not set
+# CONFIG_NET_DIVERT is not set
+# CONFIG_ECONET is not set
+# CONFIG_WAN_ROUTER is not set
+# CONFIG_NET_SCHED is not set
+# CONFIG_NET_CLS_ROUTE is not set
+
+#
+# Network testing
  #
+# CONFIG_NET_PKTGEN is not set
+# CONFIG_HAMRADIO is not set
+# CONFIG_IRDA is not set
+# CONFIG_BT is not set
  
  #
  # Device Drivers
@@ -163,7 +246,7 @@ CONFIG_HOTPLUG_PCI_SGI=y
  #
  CONFIG_STANDALONE=y
  CONFIG_PREVENT_FIRMWARE_BUILD=y
-CONFIG_FW_LOADER=m
+CONFIG_FW_LOADER=y
  # CONFIG_DEBUG_DRIVER is not set
  
  #
@@ -188,6 +271,7 @@ CONFIG_FW_LOADER=m
  # CONFIG_BLK_CPQ_CISS_DA is not set
  # CONFIG_BLK_DEV_DAC960 is not set
  # CONFIG_BLK_DEV_UMEM is not set
+# CONFIG_BLK_DEV_COW_COMMON is not set
  CONFIG_BLK_DEV_LOOP=y
  CONFIG_BLK_DEV_CRYPTOLOOP=m
  CONFIG_BLK_DEV_NBD=m
@@ -252,6 +336,7 @@ CONFIG_IDEDMA_PCI_AUTO=y
  # CONFIG_BLK_DEV_HPT366 is not set
  # CONFIG_BLK_DEV_SC1200 is not set
  # CONFIG_BLK_DEV_PIIX is not set
+# CONFIG_BLK_DEV_IT821X is not set
  # CONFIG_BLK_DEV_NS87415 is not set
  # CONFIG_BLK_DEV_PDC202XX_OLD is not set
  # CONFIG_BLK_DEV_PDC202XX_NEW is not set
@@ -282,6 +367,7 @@ CONFIG_CHR_DEV_ST=m
  CONFIG_BLK_DEV_SR=m
  # CONFIG_BLK_DEV_SR_VENDOR is not set
  CONFIG_CHR_DEV_SG=m
+CONFIG_CHR_DEV_SCH=m
  
  #
  # Some SCSI devices (e.g. CD jukebox) support multiple LUNs
@@ -315,24 +401,20 @@ CONFIG_SCSI_SATA=y
  # CONFIG_SCSI_ATA_PIIX is not set
  # CONFIG_SCSI_SATA_NV is not set
  # CONFIG_SCSI_SATA_PROMISE is not set
+# CONFIG_SCSI_SATA_QSTOR is not set
  # CONFIG_SCSI_SATA_SX4 is not set
  # CONFIG_SCSI_SATA_SIL is not set
  # CONFIG_SCSI_SATA_SIS is not set
  # CONFIG_SCSI_SATA_ULI is not set
  # CONFIG_SCSI_SATA_VIA is not set
  CONFIG_SCSI_SATA_VITESSE=y
-# CONFIG_SCSI_BUSLOGIC is not set
  # CONFIG_SCSI_DMX3191D is not set
-# CONFIG_SCSI_EATA is not set
-# CONFIG_SCSI_EATA_PIO is not set
  # CONFIG_SCSI_FUTURE_DOMAIN is not set
-# CONFIG_SCSI_GDTH is not set
  # CONFIG_SCSI_IPS is not set
  # CONFIG_SCSI_INITIO is not set
  # CONFIG_SCSI_INIA100 is not set
  # CONFIG_SCSI_SYM53C8XX_2 is not set
  # CONFIG_SCSI_IPR is not set
-# CONFIG_SCSI_QLOGIC_ISP is not set
  # CONFIG_SCSI_QLOGIC_FC is not set
  CONFIG_SCSI_QLOGIC_1280=y
  # CONFIG_SCSI_QLOGIC_1280_1040 is not set
@@ -342,6 +424,8 @@ CONFIG_SCSI_QLA22XX=y
  CONFIG_SCSI_QLA2300=y
  CONFIG_SCSI_QLA2322=y
  # CONFIG_SCSI_QLA6312 is not set
+# CONFIG_SCSI_QLA24XX is not set
+# CONFIG_SCSI_LPFC is not set
  # CONFIG_SCSI_DC395x is not set
  # CONFIG_SCSI_DC390T is not set
  # CONFIG_SCSI_DEBUG is not set
@@ -364,11 +448,15 @@ CONFIG_DM_CRYPT=m
  CONFIG_DM_SNAPSHOT=m
  CONFIG_DM_MIRROR=m
  CONFIG_DM_ZERO=m
+CONFIG_DM_MULTIPATH=m
+CONFIG_DM_MULTIPATH_EMC=m
  
  #
  # Fusion MPT device support
  #
  CONFIG_FUSION=y
+CONFIG_FUSION_SPI=y
+CONFIG_FUSION_FC=y
  CONFIG_FUSION_MAX_SGE=128
  CONFIG_FUSION_CTL=m
  
@@ -383,82 +471,13 @@ CONFIG_FUSION_CTL=m
  # CONFIG_I2O is not set
  
  #
-# Networking support
-#
-CONFIG_NET=y
-
-#
-# Networking options
-#
-CONFIG_PACKET=y
-CONFIG_PACKET_MMAP=y
-CONFIG_NETLINK_DEV=y
-CONFIG_UNIX=y
-# CONFIG_NET_KEY is not set
-CONFIG_INET=y
-CONFIG_IP_MULTICAST=y
-# CONFIG_IP_ADVANCED_ROUTER is not set
-# CONFIG_IP_PNP is not set
-# CONFIG_NET_IPIP is not set
-# CONFIG_NET_IPGRE is not set
-# CONFIG_IP_MROUTE is not set
-# CONFIG_ARPD is not set
-CONFIG_SYN_COOKIES=y
-# CONFIG_INET_AH is not set
-# CONFIG_INET_ESP is not set
-# CONFIG_INET_IPCOMP is not set
-# CONFIG_INET_TUNNEL is not set
-CONFIG_IP_TCPDIAG=y
-# CONFIG_IP_TCPDIAG_IPV6 is not set
-CONFIG_IPV6=m
-# CONFIG_IPV6_PRIVACY is not set
-# CONFIG_INET6_AH is not set
-# CONFIG_INET6_ESP is not set
-# CONFIG_INET6_IPCOMP is not set
-# CONFIG_INET6_TUNNEL is not set
-# CONFIG_IPV6_TUNNEL is not set
-# CONFIG_NETFILTER is not set
-
-#
-# SCTP Configuration (EXPERIMENTAL)
-#
-# CONFIG_IP_SCTP is not set
-# CONFIG_ATM is not set
-# CONFIG_BRIDGE is not set
-# CONFIG_VLAN_8021Q is not set
-# CONFIG_DECNET is not set
-# CONFIG_LLC2 is not set
-# CONFIG_IPX is not set
-# CONFIG_ATALK is not set
-# CONFIG_X25 is not set
-# CONFIG_LAPB is not set
-# CONFIG_NET_DIVERT is not set
-# CONFIG_ECONET is not set
-# CONFIG_WAN_ROUTER is not set
-
-#
-# QoS and/or fair queueing
-#
-# CONFIG_NET_SCHED is not set
-# CONFIG_NET_CLS_ROUTE is not set
-
-#
-# Network testing
+# Network device support
  #
-# CONFIG_NET_PKTGEN is not set
-CONFIG_NETPOLL=y
-# CONFIG_NETPOLL_RX is not set
-# CONFIG_NETPOLL_TRAP is not set
-CONFIG_NET_POLL_CONTROLLER=y
-# CONFIG_HAMRADIO is not set
-# CONFIG_IRDA is not set
-# CONFIG_BT is not set
  CONFIG_NETDEVICES=y
  # CONFIG_DUMMY is not set
  # CONFIG_BONDING is not set
  # CONFIG_EQUALIZER is not set
  # CONFIG_TUN is not set
-# CONFIG_ETHERTAP is not set
  
  #
  # ARCnet devices
@@ -480,8 +499,10 @@ CONFIG_NETDEVICES=y
  # CONFIG_HAMACHI is not set
  # CONFIG_YELLOWFIN is not set
  # CONFIG_R8169 is not set
+# CONFIG_SKGE is not set
  # CONFIG_SK98LIN is not set
  CONFIG_TIGON3=y
+# CONFIG_BNX2 is not set
  
  #
  # Ethernet (10000 Mbit)
@@ -512,6 +533,10 @@ CONFIG_S2IO=m
  # CONFIG_NET_FC is not set
  # CONFIG_SHAPER is not set
  CONFIG_NETCONSOLE=y
+CONFIG_NETPOLL=y
+# CONFIG_NETPOLL_RX is not set
+# CONFIG_NETPOLL_TRAP is not set
+CONFIG_NET_POLL_CONTROLLER=y
  
  #
  # ISDN subsystem
@@ -540,14 +565,6 @@ CONFIG_INPUT_MOUSEDEV_SCREEN_Y=768
  # CONFIG_INPUT_EVDEV is not set
  # CONFIG_INPUT_EVBUG is not set
  
-#
-# Input I/O drivers
-#
-# CONFIG_GAMEPORT is not set
-CONFIG_SOUND_GAMEPORT=y
-# CONFIG_SERIO is not set
-# CONFIG_SERIO_I8042 is not set
-
  #
  # Input Device Drivers
  #
@@ -557,6 +574,12 @@ CONFIG_SOUND_GAMEPORT=y
  # CONFIG_INPUT_TOUCHSCREEN is not set
  # CONFIG_INPUT_MISC is not set
  
+#
+# Hardware I/O ports
+#
+# CONFIG_SERIO is not set
+# CONFIG_GAMEPORT is not set
+
  #
  # Character devices
  #
@@ -568,9 +591,10 @@ CONFIG_SERIAL_NONSTANDARD=y
  # CONFIG_CYCLADES is not set
  # CONFIG_MOXA_SMARTIO is not set
  # CONFIG_ISI is not set
-# CONFIG_SYNCLINK is not set
  # CONFIG_SYNCLINKMP is not set
  # CONFIG_N_HDLC is not set
+# CONFIG_SPECIALIX is not set
+# CONFIG_SX is not set
  # CONFIG_STALDRV is not set
  CONFIG_SGI_SNSC=y
  CONFIG_SGI_TIOCX=y
@@ -587,6 +611,7 @@ CONFIG_SGI_MBCS=m
  CONFIG_SERIAL_CORE=y
  CONFIG_SERIAL_CORE_CONSOLE=y
  CONFIG_SERIAL_SGI_L1_CONSOLE=y
+# CONFIG_SERIAL_JSM is not set
  CONFIG_SERIAL_SGI_IOC4=y
  CONFIG_UNIX98_PTYS=y
  CONFIG_LEGACY_PTYS=y
@@ -615,18 +640,30 @@ CONFIG_EFI_RTC=y
  CONFIG_RAW_DRIVER=m
  # CONFIG_HPET is not set
  CONFIG_MAX_RAW_DEVS=256
+# CONFIG_HANGCHECK_TIMER is not set
  CONFIG_MMTIMER=y
  
+#
+# TPM devices
+#
+# CONFIG_TCG_TPM is not set
+
  #
  # I2C support
  #
  # CONFIG_I2C is not set
+# CONFIG_I2C_SENSOR is not set
  
  #
  # Dallas's 1-wire bus
  #
  # CONFIG_W1 is not set
  
+#
+# Hardware Monitoring support
+#
+# CONFIG_HWMON is not set
+
  #
  # Misc devices
  #
@@ -660,6 +697,8 @@ CONFIG_DUMMY_CONSOLE=y
  #
  # USB support
  #
+CONFIG_USB_ARCH_HAS_HCD=y
+CONFIG_USB_ARCH_HAS_OHCI=y
  CONFIG_USB=m
  # CONFIG_USB_DEBUG is not set
  
@@ -669,9 +708,8 @@ CONFIG_USB=m
  # CONFIG_USB_DEVICEFS is not set
  # CONFIG_USB_BANDWIDTH is not set
  # CONFIG_USB_DYNAMIC_MINORS is not set
+# CONFIG_USB_SUSPEND is not set
  # CONFIG_USB_OTG is not set
-CONFIG_USB_ARCH_HAS_HCD=y
-CONFIG_USB_ARCH_HAS_OHCI=y
  
  #
  # USB Host Controller Drivers
@@ -679,7 +717,10 @@ CONFIG_USB_ARCH_HAS_OHCI=y
  CONFIG_USB_EHCI_HCD=m
  # CONFIG_USB_EHCI_SPLIT_ISO is not set
  # CONFIG_USB_EHCI_ROOT_HUB_TT is not set
+# CONFIG_USB_ISP116X_HCD is not set
  CONFIG_USB_OHCI_HCD=m
+# CONFIG_USB_OHCI_BIG_ENDIAN is not set
+CONFIG_USB_OHCI_LITTLE_ENDIAN=y
  CONFIG_USB_UHCI_HCD=m
  # CONFIG_USB_SL811_HCD is not set
  
@@ -710,12 +751,15 @@ CONFIG_USB_HIDINPUT=y
  # CONFIG_USB_MOUSE is not set
  # CONFIG_USB_AIPTEK is not set
  # CONFIG_USB_WACOM is not set
+# CONFIG_USB_ACECAD is not set
  # CONFIG_USB_KBTAB is not set
  # CONFIG_USB_POWERMATE is not set
  # CONFIG_USB_MTOUCH is not set
+# CONFIG_USB_ITMTOUCH is not set
  # CONFIG_USB_EGALAX is not set
  # CONFIG_USB_XPAD is not set
  # CONFIG_USB_ATI_REMOTE is not set
+# CONFIG_USB_KEYSPAN_REMOTE is not set
  
  #
  # USB Imaging devices
@@ -740,6 +784,7 @@ CONFIG_USB_HIDINPUT=y
  # CONFIG_USB_PEGASUS is not set
  # CONFIG_USB_RTL8150 is not set
  # CONFIG_USB_USBNET is not set
+CONFIG_USB_MON=y
  
  #
  # USB port drivers
@@ -763,9 +808,12 @@ CONFIG_USB_HIDINPUT=y
  # CONFIG_USB_CYTHERM is not set
  # CONFIG_USB_PHIDGETKIT is not set
  # CONFIG_USB_PHIDGETSERVO is not set
+# CONFIG_USB_IDMOUSE is not set
+# CONFIG_USB_SISUSBVGA is not set
+# CONFIG_USB_LD is not set
  
  #
-# USB ATM/DSL drivers
+# USB DSL modem support
  #
  
  #
@@ -782,6 +830,7 @@ CONFIG_USB_HIDINPUT=y
  # InfiniBand support
  #
  CONFIG_INFINIBAND=m
+CONFIG_INFINIBAND_USER_VERBS=m
  CONFIG_INFINIBAND_MTHCA=m
  # CONFIG_INFINIBAND_MTHCA_DEBUG is not set
  CONFIG_INFINIBAND_IPOIB=m
@@ -799,6 +848,7 @@ CONFIG_EXT2_FS=y
  CONFIG_EXT2_FS_XATTR=y
  CONFIG_EXT2_FS_POSIX_ACL=y
  CONFIG_EXT2_FS_SECURITY=y
+# CONFIG_EXT2_FS_XIP is not set
  CONFIG_EXT3_FS=y
  CONFIG_EXT3_FS_XATTR=y
  CONFIG_EXT3_FS_POSIX_ACL=y
@@ -814,13 +864,19 @@ CONFIG_REISERFS_FS_POSIX_ACL=y
  CONFIG_REISERFS_FS_SECURITY=y
  # CONFIG_JFS_FS is not set
  CONFIG_FS_POSIX_ACL=y
+
+#
+# XFS support
+#
  CONFIG_XFS_FS=y
+CONFIG_XFS_EXPORT=y
  CONFIG_XFS_RT=y
  CONFIG_XFS_QUOTA=y
  # CONFIG_XFS_SECURITY is not set
  CONFIG_XFS_POSIX_ACL=y
  # CONFIG_MINIX_FS is not set
  # CONFIG_ROMFS_FS is not set
+CONFIG_INOTIFY=y
  CONFIG_QUOTA=y
  # CONFIG_QFMT_V1 is not set
  # CONFIG_QFMT_V2 is not set
@@ -854,7 +910,6 @@ CONFIG_FAT_DEFAULT_IOCHARSET="iso8859-1"
  CONFIG_PROC_FS=y
  CONFIG_PROC_KCORE=y
  CONFIG_SYSFS=y
-# CONFIG_DEVFS_FS is not set
  # CONFIG_DEVPTS_FS_XATTR is not set
  CONFIG_TMPFS=y
  CONFIG_TMPFS_XATTR=y
@@ -885,15 +940,18 @@ CONFIG_RAMFS=y
  #
  CONFIG_NFS_FS=m
  CONFIG_NFS_V3=y
+# CONFIG_NFS_V3_ACL is not set
  CONFIG_NFS_V4=y
  CONFIG_NFS_DIRECTIO=y
  CONFIG_NFSD=m
  CONFIG_NFSD_V3=y
+# CONFIG_NFSD_V3_ACL is not set
  CONFIG_NFSD_V4=y
  CONFIG_NFSD_TCP=y
  CONFIG_LOCKD=m
  CONFIG_LOCKD_V4=y
-CONFIG_EXPORTFS=m
+CONFIG_EXPORTFS=y
+CONFIG_NFS_COMMON=y
  CONFIG_SUNRPC=m
  CONFIG_SUNRPC_GSS=m
  CONFIG_RPCSEC_GSS_KRB5=m
@@ -980,6 +1038,9 @@ CONFIG_CRC32=y
  # CONFIG_LIBCRC32C is not set
  CONFIG_ZLIB_INFLATE=m
  CONFIG_ZLIB_DEFLATE=m
+CONFIG_GENERIC_ALLOCATOR=y
+CONFIG_GENERIC_HARDIRQS=y
+CONFIG_GENERIC_IRQ_PROBE=y
  
  #
  # Profiling support
@@ -989,15 +1050,19 @@ CONFIG_ZLIB_DEFLATE=m
  #
  # Kernel hacking
  #
+# CONFIG_PRINTK_TIME is not set
  CONFIG_DEBUG_KERNEL=y
  CONFIG_MAGIC_SYSRQ=y
+CONFIG_LOG_BUF_SHIFT=20
  # CONFIG_SCHEDSTATS is not set
  # CONFIG_DEBUG_SLAB is not set
+CONFIG_DEBUG_PREEMPT=y
  # CONFIG_DEBUG_SPINLOCK is not set
  # CONFIG_DEBUG_SPINLOCK_SLEEP is not set
  # CONFIG_DEBUG_KOBJECT is not set
  CONFIG_DEBUG_INFO=y
  # CONFIG_DEBUG_FS is not set
+# CONFIG_KPROBES is not set
  CONFIG_IA64_GRANULE_16MB=y
  # CONFIG_IA64_GRANULE_64MB is not set
  # CONFIG_IA64_PRINT_HAZARDS is not set
@@ -1019,11 +1084,12 @@ CONFIG_CRYPTO=y
  CONFIG_CRYPTO_HMAC=y
  # CONFIG_CRYPTO_NULL is not set
  # CONFIG_CRYPTO_MD4 is not set
-CONFIG_CRYPTO_MD5=m
+CONFIG_CRYPTO_MD5=y
  CONFIG_CRYPTO_SHA1=m
  # CONFIG_CRYPTO_SHA256 is not set
  # CONFIG_CRYPTO_SHA512 is not set
  # CONFIG_CRYPTO_WP512 is not set
+# CONFIG_CRYPTO_TGR192 is not set
  CONFIG_CRYPTO_DES=m
  # CONFIG_CRYPTO_BLOWFISH is not set
  # CONFIG_CRYPTO_TWOFISH is not set
diff --git a/arch/ia64/configs/tiger_defconfig b/arch/ia64/configs/tiger_defconfig

index 73454eee26f1968a319a4c91ba312c2cd02c1b9e..c853cfcd2d1123d6ef94271c0d8fab153e3c7c7a 100644 (file)
--- a/arch/ia64/configs/tiger_defconfig
+++ b/arch/ia64/configs/tiger_defconfig
@@ -1,7 +1,7 @@
  #
  # Automatically generated make config: don't edit
-# Linux kernel version: 2.6.13-rc1-20050629
-# Wed Jun 29 15:28:12 2005
+# Linux kernel version: 2.6.13-rc6-tiger-smp
+# Wed Aug 17 10:19:51 2005
  #
  
  #
@@ -132,6 +132,7 @@ CONFIG_ACPI_BOOT=y
  CONFIG_ACPI_INTERPRETER=y
  CONFIG_ACPI_BUTTON=m
  # CONFIG_ACPI_VIDEO is not set
+# CONFIG_ACPI_HOTKEY is not set
  CONFIG_ACPI_FAN=m
  CONFIG_ACPI_PROCESSOR=m
  # CONFIG_ACPI_HOTPLUG_CPU is not set
@@ -169,6 +170,66 @@ CONFIG_HOTPLUG_PCI_ACPI=m
  #
  # CONFIG_PCCARD is not set
  
+#
+# Networking
+#
+CONFIG_NET=y
+
+#
+# Networking options
+#
+CONFIG_PACKET=y
+# CONFIG_PACKET_MMAP is not set
+CONFIG_UNIX=y
+# CONFIG_NET_KEY is not set
+CONFIG_INET=y
+CONFIG_IP_MULTICAST=y
+# CONFIG_IP_ADVANCED_ROUTER is not set
+CONFIG_IP_FIB_HASH=y
+# CONFIG_IP_PNP is not set
+# CONFIG_NET_IPIP is not set
+# CONFIG_NET_IPGRE is not set
+# CONFIG_IP_MROUTE is not set
+CONFIG_ARPD=y
+CONFIG_SYN_COOKIES=y
+# CONFIG_INET_AH is not set
+# CONFIG_INET_ESP is not set
+# CONFIG_INET_IPCOMP is not set
+# CONFIG_INET_TUNNEL is not set
+CONFIG_IP_TCPDIAG=y
+# CONFIG_IP_TCPDIAG_IPV6 is not set
+# CONFIG_TCP_CONG_ADVANCED is not set
+CONFIG_TCP_CONG_BIC=y
+# CONFIG_IPV6 is not set
+# CONFIG_NETFILTER is not set
+
+#
+# SCTP Configuration (EXPERIMENTAL)
+#
+# CONFIG_IP_SCTP is not set
+# CONFIG_ATM is not set
+# CONFIG_BRIDGE is not set
+# CONFIG_VLAN_8021Q is not set
+# CONFIG_DECNET is not set
+# CONFIG_LLC2 is not set
+# CONFIG_IPX is not set
+# CONFIG_ATALK is not set
+# CONFIG_X25 is not set
+# CONFIG_LAPB is not set
+# CONFIG_NET_DIVERT is not set
+# CONFIG_ECONET is not set
+# CONFIG_WAN_ROUTER is not set
+# CONFIG_NET_SCHED is not set
+# CONFIG_NET_CLS_ROUTE is not set
+
+#
+# Network testing
+#
+# CONFIG_NET_PKTGEN is not set
+# CONFIG_HAMRADIO is not set
+# CONFIG_IRDA is not set
+# CONFIG_BT is not set
+
  #
  # Device Drivers
  #
@@ -178,7 +239,7 @@ CONFIG_HOTPLUG_PCI_ACPI=m
  #
  CONFIG_STANDALONE=y
  CONFIG_PREVENT_FIRMWARE_BUILD=y
-# CONFIG_FW_LOADER is not set
+CONFIG_FW_LOADER=m
  # CONFIG_DEBUG_DRIVER is not set
  
  #
@@ -348,6 +409,7 @@ CONFIG_SCSI_QLA22XX=m
  CONFIG_SCSI_QLA2300=m
  CONFIG_SCSI_QLA2322=m
  # CONFIG_SCSI_QLA6312 is not set
+# CONFIG_SCSI_QLA24XX is not set
  # CONFIG_SCSI_LPFC is not set
  # CONFIG_SCSI_DC395x is not set
  # CONFIG_SCSI_DC390T is not set
@@ -393,72 +455,8 @@ CONFIG_FUSION_CTL=y
  # CONFIG_I2O is not set
  
  #
-# Networking support
-#
-CONFIG_NET=y
-
-#
-# Networking options
+# Network device support
  #
-CONFIG_PACKET=y
-# CONFIG_PACKET_MMAP is not set
-CONFIG_UNIX=y
-# CONFIG_NET_KEY is not set
-CONFIG_INET=y
-CONFIG_IP_MULTICAST=y
-# CONFIG_IP_ADVANCED_ROUTER is not set
-CONFIG_IP_FIB_HASH=y
-# CONFIG_IP_PNP is not set
-# CONFIG_NET_IPIP is not set
-# CONFIG_NET_IPGRE is not set
-# CONFIG_IP_MROUTE is not set
-CONFIG_ARPD=y
-CONFIG_SYN_COOKIES=y
-# CONFIG_INET_AH is not set
-# CONFIG_INET_ESP is not set
-# CONFIG_INET_IPCOMP is not set
-# CONFIG_INET_TUNNEL is not set
-CONFIG_IP_TCPDIAG=y
-# CONFIG_IP_TCPDIAG_IPV6 is not set
-# CONFIG_TCP_CONG_ADVANCED is not set
-CONFIG_TCP_CONG_BIC=y
-# CONFIG_IPV6 is not set
-# CONFIG_NETFILTER is not set
-
-#
-# SCTP Configuration (EXPERIMENTAL)
-#
-# CONFIG_IP_SCTP is not set
-# CONFIG_ATM is not set
-# CONFIG_BRIDGE is not set
-# CONFIG_VLAN_8021Q is not set
-# CONFIG_DECNET is not set
-# CONFIG_LLC2 is not set
-# CONFIG_IPX is not set
-# CONFIG_ATALK is not set
-# CONFIG_X25 is not set
-# CONFIG_LAPB is not set
-# CONFIG_NET_DIVERT is not set
-# CONFIG_ECONET is not set
-# CONFIG_WAN_ROUTER is not set
-
-#
-# QoS and/or fair queueing
-#
-# CONFIG_NET_SCHED is not set
-# CONFIG_NET_CLS_ROUTE is not set
-
-#
-# Network testing
-#
-# CONFIG_NET_PKTGEN is not set
-CONFIG_NETPOLL=y
-# CONFIG_NETPOLL_RX is not set
-# CONFIG_NETPOLL_TRAP is not set
-CONFIG_NET_POLL_CONTROLLER=y
-# CONFIG_HAMRADIO is not set
-# CONFIG_IRDA is not set
-# CONFIG_BT is not set
  CONFIG_NETDEVICES=y
  CONFIG_DUMMY=m
  # CONFIG_BONDING is not set
@@ -555,6 +553,10 @@ CONFIG_TIGON3=y
  # CONFIG_NET_FC is not set
  # CONFIG_SHAPER is not set
  CONFIG_NETCONSOLE=y
+CONFIG_NETPOLL=y
+# CONFIG_NETPOLL_RX is not set
+# CONFIG_NETPOLL_TRAP is not set
+CONFIG_NET_POLL_CONTROLLER=y
  
  #
  # ISDN subsystem
@@ -680,6 +682,7 @@ CONFIG_DRM_R128=m
  CONFIG_DRM_RADEON=m
  CONFIG_DRM_MGA=m
  CONFIG_DRM_SIS=m
+# CONFIG_DRM_VIA is not set
  CONFIG_RAW_DRIVER=m
  CONFIG_HPET=y
  # CONFIG_HPET_RTC_IRQ is not set
@@ -696,12 +699,19 @@ CONFIG_MAX_RAW_DEVS=256
  # I2C support
  #
  # CONFIG_I2C is not set
+# CONFIG_I2C_SENSOR is not set
  
  #
  # Dallas's 1-wire bus
  #
  # CONFIG_W1 is not set
  
+#
+# Hardware Monitoring support
+#
+CONFIG_HWMON=y
+# CONFIG_HWMON_DEBUG_CHIP is not set
+
  #
  # Misc devices
  #
@@ -800,6 +810,7 @@ CONFIG_USB_HIDINPUT=y
  # CONFIG_USB_EGALAX is not set
  # CONFIG_USB_XPAD is not set
  # CONFIG_USB_ATI_REMOTE is not set
+# CONFIG_USB_KEYSPAN_REMOTE is not set
  
  #
  # USB Imaging devices
@@ -850,6 +861,7 @@ CONFIG_USB_HIDINPUT=y
  # CONFIG_USB_PHIDGETSERVO is not set
  # CONFIG_USB_IDMOUSE is not set
  # CONFIG_USB_SISUSBVGA is not set
+# CONFIG_USB_LD is not set
  # CONFIG_USB_TEST is not set
  
  #
@@ -910,6 +922,7 @@ CONFIG_XFS_EXPORT=y
  # CONFIG_XFS_POSIX_ACL is not set
  # CONFIG_MINIX_FS is not set
  # CONFIG_ROMFS_FS is not set
+CONFIG_INOTIFY=y
  # CONFIG_QUOTA is not set
  CONFIG_DNOTIFY=y
  CONFIG_AUTOFS_FS=y
diff --git a/arch/ia64/configs/zx1_defconfig b/arch/ia64/configs/zx1_defconfig

index b7755e4436d222e09d396d91bc924efc5a633b6f..88e8867fa8e82f65092a9e5e99e0f437a3f43191 100644 (file)
--- a/arch/ia64/configs/zx1_defconfig
+++ b/arch/ia64/configs/zx1_defconfig
@@ -1,7 +1,7 @@
  #
  # Automatically generated make config: don't edit
-# Linux kernel version: 2.6.13-rc1-20050629
-# Wed Jun 29 15:31:11 2005
+# Linux kernel version: 2.6.13-rc6
+# Wed Aug 17 10:02:43 2005
  #
  
  #
@@ -132,6 +132,7 @@ CONFIG_ACPI_BOOT=y
  CONFIG_ACPI_INTERPRETER=y
  CONFIG_ACPI_BUTTON=y
  CONFIG_ACPI_VIDEO=m
+CONFIG_ACPI_HOTKEY=m
  CONFIG_ACPI_FAN=y
  CONFIG_ACPI_PROCESSOR=y
  CONFIG_ACPI_THERMAL=y
@@ -168,6 +169,83 @@ CONFIG_HOTPLUG_PCI_ACPI=y
  #
  # CONFIG_PCCARD is not set
  
+#
+# Networking
+#
+CONFIG_NET=y
+
+#
+# Networking options
+#
+CONFIG_PACKET=y
+# CONFIG_PACKET_MMAP is not set
+CONFIG_UNIX=y
+# CONFIG_NET_KEY is not set
+CONFIG_INET=y
+CONFIG_IP_MULTICAST=y
+# CONFIG_IP_ADVANCED_ROUTER is not set
+CONFIG_IP_FIB_HASH=y
+# CONFIG_IP_PNP is not set
+# CONFIG_NET_IPIP is not set
+# CONFIG_NET_IPGRE is not set
+# CONFIG_IP_MROUTE is not set
+# CONFIG_ARPD is not set
+# CONFIG_SYN_COOKIES is not set
+# CONFIG_INET_AH is not set
+# CONFIG_INET_ESP is not set
+# CONFIG_INET_IPCOMP is not set
+# CONFIG_INET_TUNNEL is not set
+# CONFIG_IP_TCPDIAG is not set
+# CONFIG_IP_TCPDIAG_IPV6 is not set
+# CONFIG_TCP_CONG_ADVANCED is not set
+CONFIG_TCP_CONG_BIC=y
+
+#
+# IP: Virtual Server Configuration
+#
+# CONFIG_IP_VS is not set
+# CONFIG_IPV6 is not set
+CONFIG_NETFILTER=y
+# CONFIG_NETFILTER_DEBUG is not set
+
+#
+# IP: Netfilter Configuration
+#
+# CONFIG_IP_NF_CONNTRACK is not set
+# CONFIG_IP_NF_CONNTRACK_MARK is not set
+# CONFIG_IP_NF_QUEUE is not set
+# CONFIG_IP_NF_IPTABLES is not set
+CONFIG_IP_NF_ARPTABLES=y
+# CONFIG_IP_NF_ARPFILTER is not set
+# CONFIG_IP_NF_ARP_MANGLE is not set
+
+#
+# SCTP Configuration (EXPERIMENTAL)
+#
+# CONFIG_IP_SCTP is not set
+# CONFIG_ATM is not set
+# CONFIG_BRIDGE is not set
+# CONFIG_VLAN_8021Q is not set
+# CONFIG_DECNET is not set
+# CONFIG_LLC2 is not set
+# CONFIG_IPX is not set
+# CONFIG_ATALK is not set
+# CONFIG_X25 is not set
+# CONFIG_LAPB is not set
+# CONFIG_NET_DIVERT is not set
+# CONFIG_ECONET is not set
+# CONFIG_WAN_ROUTER is not set
+# CONFIG_NET_SCHED is not set
+# CONFIG_NET_CLS_ROUTE is not set
+
+#
+# Network testing
+#
+# CONFIG_NET_PKTGEN is not set
+# CONFIG_HAMRADIO is not set
+# CONFIG_IRDA is not set
+# CONFIG_BT is not set
+
  #
  # Device Drivers
  #
@@ -349,6 +427,7 @@ CONFIG_SCSI_QLA2XXX=y
  # CONFIG_SCSI_QLA2300 is not set
  # CONFIG_SCSI_QLA2322 is not set
  # CONFIG_SCSI_QLA6312 is not set
+# CONFIG_SCSI_QLA24XX is not set
  # CONFIG_SCSI_LPFC is not set
  # CONFIG_SCSI_DC395x is not set
  # CONFIG_SCSI_DC390T is not set
@@ -362,9 +441,11 @@ CONFIG_SCSI_QLA2XXX=y
  #
  # Fusion MPT device support
  #
-# CONFIG_FUSION is not set
-# CONFIG_FUSION_SPI is not set
-# CONFIG_FUSION_FC is not set
+CONFIG_FUSION=y
+CONFIG_FUSION_SPI=y
+CONFIG_FUSION_FC=y
+CONFIG_FUSION_MAX_SGE=128
+CONFIG_FUSION_CTL=m
  
  #
  # IEEE 1394 (FireWire) support
@@ -377,87 +458,8 @@ CONFIG_SCSI_QLA2XXX=y
  # CONFIG_I2O is not set
  
  #
-# Networking support
-#
-CONFIG_NET=y
-
-#
-# Networking options
-#
-CONFIG_PACKET=y
-# CONFIG_PACKET_MMAP is not set
-CONFIG_UNIX=y
-# CONFIG_NET_KEY is not set
-CONFIG_INET=y
-CONFIG_IP_MULTICAST=y
-# CONFIG_IP_ADVANCED_ROUTER is not set
-CONFIG_IP_FIB_HASH=y
-# CONFIG_IP_PNP is not set
-# CONFIG_NET_IPIP is not set
-# CONFIG_NET_IPGRE is not set
-# CONFIG_IP_MROUTE is not set
-# CONFIG_ARPD is not set
-# CONFIG_SYN_COOKIES is not set
-# CONFIG_INET_AH is not set
-# CONFIG_INET_ESP is not set
-# CONFIG_INET_IPCOMP is not set
-# CONFIG_INET_TUNNEL is not set
-# CONFIG_IP_TCPDIAG is not set
-# CONFIG_IP_TCPDIAG_IPV6 is not set
-# CONFIG_TCP_CONG_ADVANCED is not set
-CONFIG_TCP_CONG_BIC=y
-
-#
-# IP: Virtual Server Configuration
-#
-# CONFIG_IP_VS is not set
-# CONFIG_IPV6 is not set
-CONFIG_NETFILTER=y
-# CONFIG_NETFILTER_DEBUG is not set
-
-#
-# IP: Netfilter Configuration
+# Network device support
  #
-# CONFIG_IP_NF_CONNTRACK is not set
-# CONFIG_IP_NF_CONNTRACK_MARK is not set
-# CONFIG_IP_NF_QUEUE is not set
-# CONFIG_IP_NF_IPTABLES is not set
-CONFIG_IP_NF_ARPTABLES=y
-# CONFIG_IP_NF_ARPFILTER is not set
-# CONFIG_IP_NF_ARP_MANGLE is not set
-
-#
-# SCTP Configuration (EXPERIMENTAL)
-#
-# CONFIG_IP_SCTP is not set
-# CONFIG_ATM is not set
-# CONFIG_BRIDGE is not set
-# CONFIG_VLAN_8021Q is not set
-# CONFIG_DECNET is not set
-# CONFIG_LLC2 is not set
-# CONFIG_IPX is not set
-# CONFIG_ATALK is not set
-# CONFIG_X25 is not set
-# CONFIG_LAPB is not set
-# CONFIG_NET_DIVERT is not set
-# CONFIG_ECONET is not set
-# CONFIG_WAN_ROUTER is not set
-
-#
-# QoS and/or fair queueing
-#
-# CONFIG_NET_SCHED is not set
-# CONFIG_NET_CLS_ROUTE is not set
-
-#
-# Network testing
-#
-# CONFIG_NET_PKTGEN is not set
-# CONFIG_NETPOLL is not set
-# CONFIG_NET_POLL_CONTROLLER is not set
-# CONFIG_HAMRADIO is not set
-# CONFIG_IRDA is not set
-# CONFIG_BT is not set
  CONFIG_NETDEVICES=y
  CONFIG_DUMMY=y
  # CONFIG_BONDING is not set
@@ -555,6 +557,8 @@ CONFIG_TIGON3=y
  # CONFIG_NET_FC is not set
  # CONFIG_SHAPER is not set
  # CONFIG_NETCONSOLE is not set
+# CONFIG_NETPOLL is not set
+# CONFIG_NET_POLL_CONTROLLER is not set
  
  #
  # ISDN subsystem
@@ -659,6 +663,7 @@ CONFIG_DRM=y
  CONFIG_DRM_RADEON=y
  # CONFIG_DRM_MGA is not set
  # CONFIG_DRM_SIS is not set
+# CONFIG_DRM_VIA is not set
  # CONFIG_RAW_DRIVER is not set
  # CONFIG_HPET is not set
  # CONFIG_HANGCHECK_TIMER is not set
@@ -706,47 +711,10 @@ CONFIG_I2C_ALGOPCF=y
  # CONFIG_I2C_VIAPRO is not set
  # CONFIG_I2C_VOODOO3 is not set
  # CONFIG_I2C_PCA_ISA is not set
+# CONFIG_I2C_SENSOR is not set
  
  #
-# Hardware Sensors Chip support
-#
-# CONFIG_I2C_SENSOR is not set
-# CONFIG_SENSORS_ADM1021 is not set
-# CONFIG_SENSORS_ADM1025 is not set
-# CONFIG_SENSORS_ADM1026 is not set
-# CONFIG_SENSORS_ADM1031 is not set
-# CONFIG_SENSORS_ADM9240 is not set
-# CONFIG_SENSORS_ASB100 is not set
-# CONFIG_SENSORS_ATXP1 is not set
-# CONFIG_SENSORS_DS1621 is not set
-# CONFIG_SENSORS_FSCHER is not set
-# CONFIG_SENSORS_FSCPOS is not set
-# CONFIG_SENSORS_GL518SM is not set
-# CONFIG_SENSORS_GL520SM is not set
-# CONFIG_SENSORS_IT87 is not set
-# CONFIG_SENSORS_LM63 is not set
-# CONFIG_SENSORS_LM75 is not set
-# CONFIG_SENSORS_LM77 is not set
-# CONFIG_SENSORS_LM78 is not set
-# CONFIG_SENSORS_LM80 is not set
-# CONFIG_SENSORS_LM83 is not set
-# CONFIG_SENSORS_LM85 is not set
-# CONFIG_SENSORS_LM87 is not set
-# CONFIG_SENSORS_LM90 is not set
-# CONFIG_SENSORS_LM92 is not set
-# CONFIG_SENSORS_MAX1619 is not set
-# CONFIG_SENSORS_PC87360 is not set
-# CONFIG_SENSORS_SMSC47B397 is not set
-# CONFIG_SENSORS_SIS5595 is not set
-# CONFIG_SENSORS_SMSC47M1 is not set
-# CONFIG_SENSORS_VIA686A is not set
-# CONFIG_SENSORS_W83781D is not set
-# CONFIG_SENSORS_W83L785TS is not set
-# CONFIG_SENSORS_W83627HF is not set
-# CONFIG_SENSORS_W83627EHF is not set
-
-#
-# Other I2C Chip support
+# Miscellaneous I2C Chip support
  #
  # CONFIG_SENSORS_DS1337 is not set
  # CONFIG_SENSORS_DS1374 is not set
@@ -766,6 +734,11 @@ CONFIG_I2C_ALGOPCF=y
  #
  # CONFIG_W1 is not set
  
+#
+# Hardware Monitoring support
+#
+# CONFIG_HWMON is not set
+
  #
  # Misc devices
  #
@@ -782,7 +755,6 @@ CONFIG_VIDEO_DEV=y
  #
  # Video Adapters
  #
-# CONFIG_TUNER_MULTI_I2C is not set
  # CONFIG_VIDEO_BT848 is not set
  # CONFIG_VIDEO_CPIA is not set
  # CONFIG_VIDEO_SAA5246A is not set
@@ -1025,6 +997,7 @@ CONFIG_USB_HIDDEV=y
  # CONFIG_USB_EGALAX is not set
  # CONFIG_USB_XPAD is not set
  # CONFIG_USB_ATI_REMOTE is not set
+# CONFIG_USB_KEYSPAN_REMOTE is not set
  
  #
  # USB Imaging devices
@@ -1080,6 +1053,7 @@ CONFIG_USB_MON=y
  # CONFIG_USB_PHIDGETSERVO is not set
  # CONFIG_USB_IDMOUSE is not set
  # CONFIG_USB_SISUSBVGA is not set
+# CONFIG_USB_LD is not set
  
  #
  # USB DSL modem support
@@ -1121,6 +1095,7 @@ CONFIG_JBD=y
  CONFIG_FS_MBCACHE=y
  # CONFIG_REISERFS_FS is not set
  # CONFIG_JFS_FS is not set
+# CONFIG_FS_POSIX_ACL is not set
  
  #
  # XFS support
@@ -1128,6 +1103,7 @@ CONFIG_FS_MBCACHE=y
  # CONFIG_XFS_FS is not set
  # CONFIG_MINIX_FS is not set
  # CONFIG_ROMFS_FS is not set
+# CONFIG_INOTIFY is not set
  # CONFIG_QUOTA is not set
  CONFIG_DNOTIFY=y
  CONFIG_AUTOFS_FS=y
diff --git a/arch/ia64/hp/sim/boot/boot_head.S b/arch/ia64/hp/sim/boot/boot_head.S

index 9364199e56322b5614360a240a9bdc4495ded06b..1c8c7e6a9a5ecaba963c20ba69d2e93c76c0994a 100644 (file)
--- a/arch/ia64/hp/sim/boot/boot_head.S
+++ b/arch/ia64/hp/sim/boot/boot_head.S
@@ -22,7 +22,7 @@ GLOBAL_ENTRY(_start)
         .save rp, r0
         .body
         movl gp = __gp
-       movl sp = stack_mem
+       movl sp = stack_mem+16384-16
         bsw.1
         br.call.sptk.many rp=start_bootloader
  END(_start)
diff --git a/arch/ia64/kernel/domain.c b/arch/ia64/kernel/domain.c

index d65e87b6394fa2c636f9593718f6e84f29c9804c..bbb8efe126b716b0851102f133480abc5729f57b 100644 (file)
--- a/arch/ia64/kernel/domain.c
+++ b/arch/ia64/kernel/domain.c
@@ -341,7 +341,7 @@ next_sg:
  #endif
  
         /* Attach the domains */
-       for_each_online_cpu(i) {
+       for_each_cpu_mask(i, *cpu_map) {
                 struct sched_domain *sd;
  #ifdef CONFIG_SCHED_SMT
                 sd = &per_cpu(cpu_domains, i);
diff --git a/arch/ia64/kernel/entry.S b/arch/ia64/kernel/entry.S

index 66946f3fdac7504dbc66000fdc06ddebd1c9598b..9be53e1ea40431505fb392c9e54a7887e933d810 100644 (file)
--- a/arch/ia64/kernel/entry.S
+++ b/arch/ia64/kernel/entry.S
@@ -1573,7 +1573,7 @@ sys_call_table:
         data8 sys_keyctl
         data8 sys_ioprio_set
         data8 sys_ioprio_get                    // 1275
-       data8 sys_set_zone_reclaim
+       data8 sys_ni_syscall
         data8 sys_inotify_init
         data8 sys_inotify_add_watch
         data8 sys_inotify_rm_watch
diff --git a/arch/ia64/kernel/perfmon.c b/arch/ia64/kernel/perfmon.c

index b8ebb8e427efad319ddecadcee632b9c037eb56a..f1201ac8a11617b012ec9e8a25c1b6cf20863d80 100644 (file)
--- a/arch/ia64/kernel/perfmon.c
+++ b/arch/ia64/kernel/perfmon.c
@@ -4312,6 +4312,7 @@ pfm_context_load(pfm_context_t *ctx, void *arg, int count, struct pt_regs *regs)
         DPRINT(("before cmpxchg() old_ctx=%p new_ctx=%p\n",
                 thread->pfm_context, ctx));
  
+       ret = -EBUSY;
         old = ia64_cmpxchg(acq, &thread->pfm_context, NULL, ctx, sizeof(pfm_context_t *));
         if (old != NULL) {
                 DPRINT(("load_pid [%d] already has a context\n", req->load_pid));
diff --git a/arch/ia64/kernel/process.c b/arch/ia64/kernel/process.c

index 66e8406098080343da5fa91e3f1b860d7e6ee5d7..051e050359e490ac4a5e44a90cb6c46849ea77f9 100644 (file)
--- a/arch/ia64/kernel/process.c
+++ b/arch/ia64/kernel/process.c
@@ -179,7 +179,7 @@ static int can_do_pal_halt = 1;
  
  static int __init nohalt_setup(char * str)
  {
-       pal_halt = 0;
+       pal_halt = can_do_pal_halt = 0;
         return 1;
  }
  __setup("nohalt", nohalt_setup);
diff --git a/arch/ia64/kernel/salinfo.c b/arch/ia64/kernel/salinfo.c

index d227fabecd023d741880d4fed93dcec60c012a3f..6f0cc7a6634ee84a00147630e0815c072a3edc4a 100644 (file)
--- a/arch/ia64/kernel/salinfo.c
+++ b/arch/ia64/kernel/salinfo.c
@@ -143,7 +143,8 @@ struct salinfo_data {
  
  static struct salinfo_data salinfo_data[ARRAY_SIZE(salinfo_log_name)];
  
-static spinlock_t data_lock, data_saved_lock;
+static DEFINE_SPINLOCK(data_lock);
+static DEFINE_SPINLOCK(data_saved_lock);
  
  /** salinfo_platform_oemdata - optional callback to decode oemdata from an error
   * record.
diff --git a/arch/ia64/kernel/signal.c b/arch/ia64/kernel/signal.c

index b8a0a7d257a9b3f427561faadb6d98eaf7b4648f..774f34b675cfdb3c688e2831b4fb599d1081b2ae 100644 (file)
--- a/arch/ia64/kernel/signal.c
+++ b/arch/ia64/kernel/signal.c
@@ -467,15 +467,12 @@ handle_signal (unsigned long sig, struct k_sigaction *ka, siginfo_t *info, sigse
                 if (!setup_frame(sig, ka, info, oldset, scr))
                         return 0;
  
-       if (!(ka->sa.sa_flags & SA_NODEFER)) {
-               spin_lock_irq(&current->sighand->siglock);
-               {
-                       sigorsets(&current->blocked, &current->blocked, &ka->sa.sa_mask);
-                       sigaddset(&current->blocked, sig);
-                       recalc_sigpending();
-               }
-               spin_unlock_irq(&current->sighand->siglock);
-       }
+       spin_lock_irq(&current->sighand->siglock);
+       sigorsets(&current->blocked, &current->blocked, &ka->sa.sa_mask);
+       if (!(ka->sa.sa_flags & SA_NODEFER))
+               sigaddset(&current->blocked, sig);
+       recalc_sigpending();
+       spin_unlock_irq(&current->sighand->siglock);
         return 1;
  }
  
diff --git a/arch/ia64/pci/pci.c b/arch/ia64/pci/pci.c

index 54d9ed444e4a11343a11674ac9c6d5e53d1b6729..f9472c50ab4298a072fd25cfca6743ff69122b91 100644 (file)
--- a/arch/ia64/pci/pci.c
+++ b/arch/ia64/pci/pci.c
@@ -380,6 +380,7 @@ void pcibios_bus_to_resource(struct pci_dev *dev,
         res->start = region->start + offset;
         res->end = region->end + offset;
  }
+EXPORT_SYMBOL(pcibios_bus_to_resource);
  
  static int __devinit is_valid_resource(struct pci_dev *dev, int idx)
  {
diff --git a/arch/ia64/sn/kernel/io_init.c b/arch/ia64/sn/kernel/io_init.c

index a6649baf629a205eb190d16745b92e176337c448..414cdf2e3c965e124223833a1ed036783e4c4392 100644 (file)
--- a/arch/ia64/sn/kernel/io_init.c
+++ b/arch/ia64/sn/kernel/io_init.c
@@ -203,6 +203,7 @@ static void sn_fixup_ionodes(void)
                                 continue;
                         }
  
+                       spin_lock_init(&sn_flush_device_list->sfdl_flush_lock);
                         hubdev->hdi_flush_nasid_list.widget_p[widget] =
                             sn_flush_device_list;
                 }
@@ -322,7 +323,7 @@ void sn_pci_controller_fixup(int segment, int busnum, struct pci_bus *bus)
         struct pci_controller *controller;
         struct pcibus_bussoft *prom_bussoft_ptr;
         struct hubdev_info *hubdev_info;
-       void *provider_soft;
+       void *provider_soft = NULL;
         struct sn_pcibus_provider *provider;
  
         status = sal_get_pcibus_info((u64) segment, (u64) busnum,
@@ -338,7 +339,7 @@ void sn_pci_controller_fixup(int segment, int busnum, struct pci_bus *bus)
         if (bus == NULL) {
                 bus = pci_scan_bus(busnum, &pci_root_ops, controller);
                 if (bus == NULL)
-                       return; /* error, or bus already scanned */
+                       goto error_return; /* error, or bus already scanned */
                 bus->sysdata = NULL;
         }
  
@@ -351,28 +352,30 @@ void sn_pci_controller_fixup(int segment, int busnum, struct pci_bus *bus)
          */
  
         if (prom_bussoft_ptr->bs_asic_type >= PCIIO_ASIC_MAX_TYPES)
-               return;         /* unsupported asic type */
+               goto error_return; /* unsupported asic type */
  
         if (prom_bussoft_ptr->bs_asic_type == PCIIO_ASIC_TYPE_PPB)
                 goto error_return; /* no further fixup necessary */
  
         provider = sn_pci_provider[prom_bussoft_ptr->bs_asic_type];
         if (provider == NULL)
-               return;         /* no provider registerd for this asic */
+               goto error_return; /* no provider registerd for this asic */
  
-       provider_soft = NULL;
+       bus->sysdata = controller;
         if (provider->bus_fixup)
                 provider_soft = (*provider->bus_fixup) (prom_bussoft_ptr, controller);
  
-       if (provider_soft == NULL)
-               return;         /* fixup failed or not applicable */
+       if (provider_soft == NULL) {
+               /* fixup failed or not applicable */
+               bus->sysdata = NULL;
+               goto error_return;
+       }
  
         /*
          * Generic bus fixup goes here.  Don't reference prom_bussoft_ptr
          * after this point.
          */
  
-       bus->sysdata = controller;
         PCI_CONTROLLER(bus)->platform_data = provider_soft;
         nasid = NASID_GET(SN_PCIBUS_BUSSOFT(bus)->bs_base);
         cnode = nasid_to_cnodeid(nasid);
diff --git a/arch/m32r/Kconfig b/arch/m32r/Kconfig

index 7772951df313b9e9edec01dd2b2cb922b1e43d62..7622d4ec5f08d2ff995b9a05e10fdefb63630fbe 100644 (file)
--- a/arch/m32r/Kconfig
+++ b/arch/m32r/Kconfig
@@ -269,7 +269,7 @@ config NR_CPUS
  # Common NUMA Features
  config NUMA
         bool "Numa Memory Allocation Support"
-       depends on SMP
+       depends on SMP && BROKEN
         default n
  
  # turning this on wastes a bunch of space.
@@ -286,6 +286,7 @@ menu "Bus options (PCI, PCMCIA, EISA, MCA, ISA)"
  
  config PCI
         bool "PCI support"
+       depends on BROKEN
         default n
         help
           Find out whether you have a PCI motherboard. PCI is the name of a
diff --git a/arch/m32r/Kconfig.debug b/arch/m32r/Kconfig.debug

index 31039723804f95fa106c6eadc0a0393db8fba4d8..bbf711bab69e904bc9726314bd27b9df59d2c82b 100644 (file)
--- a/arch/m32r/Kconfig.debug
+++ b/arch/m32r/Kconfig.debug
@@ -20,7 +20,7 @@ config DEBUG_STACK_USAGE
  
  config DEBUG_PAGEALLOC
         bool "Page alloc debugging"
-       depends on DEBUG_KERNEL
+       depends on DEBUG_KERNEL && BROKEN
         help
           Unmap pages from the kernel linear mapping after free_pages().
           This results in a large slowdown, but helps to find certain types
diff --git a/arch/m32r/kernel/setup_m32700ut.c b/arch/m32r/kernel/setup_m32700ut.c

index a146b24a556b1aec6074728232347e9d79932bfc..708634b685e44ff925bb2d4eca7af5de8173e329 100644 (file)
--- a/arch/m32r/kernel/setup_m32700ut.c
+++ b/arch/m32r/kernel/setup_m32700ut.c
@@ -30,9 +30,11 @@
  typedef struct {
         unsigned long icucr;  /* ICU Control Register */
  } icu_data_t;
+static icu_data_t icu_data[M32700UT_NUM_CPU_IRQ];
+#else
+icu_data_t icu_data[M32700UT_NUM_CPU_IRQ];
  #endif /* CONFIG_SMP */
  
-static icu_data_t icu_data[M32700UT_NUM_CPU_IRQ];
  
  static void disable_m32700ut_irq(unsigned int irq)
  {
diff --git a/arch/m32r/kernel/setup_opsput.c b/arch/m32r/kernel/setup_opsput.c

index f0301f58bcce31e54e5150141af1d1c1aa9f37f5..d7b7ec6d30f88c3942eec68d520b05b0a66675a7 100644 (file)
--- a/arch/m32r/kernel/setup_opsput.c
+++ b/arch/m32r/kernel/setup_opsput.c
@@ -31,9 +31,11 @@
  typedef struct {
         unsigned long icucr;  /* ICU Control Register */
  } icu_data_t;
+static icu_data_t icu_data[OPSPUT_NUM_CPU_IRQ];
+#else
+icu_data_t icu_data[OPSPUT_NUM_CPU_IRQ];
  #endif /* CONFIG_SMP */
  
-static icu_data_t icu_data[OPSPUT_NUM_CPU_IRQ];
  
  static void disable_opsput_irq(unsigned int irq)
  {
diff --git a/arch/m32r/kernel/signal.c b/arch/m32r/kernel/signal.c

index 5aef7e406ef5f2a0a74e90a334ee4a5ad2817ffd..71763f7a1d1989eb0cda8196694e07043a47ce52 100644 (file)
--- a/arch/m32r/kernel/signal.c
+++ b/arch/m32r/kernel/signal.c
@@ -341,13 +341,12 @@ handle_signal(unsigned long sig, struct k_sigaction *ka, siginfo_t *info,
         /* Set up the stack frame */
         setup_rt_frame(sig, ka, info, oldset, regs);
  
-       if (!(ka->sa.sa_flags & SA_NODEFER)) {
-               spin_lock_irq(&current->sighand->siglock);
-               sigorsets(&current->blocked,&current->blocked,&ka->sa.sa_mask);
+       spin_lock_irq(&current->sighand->siglock);
+       sigorsets(&current->blocked,&current->blocked,&ka->sa.sa_mask);
+       if (!(ka->sa.sa_flags & SA_NODEFER))
                 sigaddset(&current->blocked,sig);
-               recalc_sigpending();
-               spin_unlock_irq(&current->sighand->siglock);
-       }
+       recalc_sigpending();
+       spin_unlock_irq(&current->sighand->siglock);
  }
  
  /*
diff --git a/arch/m32r/kernel/smpboot.c b/arch/m32r/kernel/smpboot.c

index f9a0e723478dd58747945b9e5f7fe7b7b9578932..640d592ea07251207f59f4864cfd0ce9e1ad580d 100644 (file)
--- a/arch/m32r/kernel/smpboot.c
+++ b/arch/m32r/kernel/smpboot.c
@@ -91,6 +91,7 @@ extern struct {
  
  /* which physical physical ID maps to which logical CPU number */
  static volatile int physid_2_cpu[NR_CPUS];
+#define physid_to_cpu(physid)  physid_2_cpu[physid]
  
  /* which logical CPU number maps to which physical ID */
  volatile int cpu_2_physid[NR_CPUS];
diff --git a/arch/m32r/kernel/time.c b/arch/m32r/kernel/time.c

index 3c4707280a5215cd833652067339ac4d80491d4d..8a2b77bc5749b50b01a45ef3c81b6d2c8870fc05 100644 (file)
--- a/arch/m32r/kernel/time.c
+++ b/arch/m32r/kernel/time.c
@@ -205,8 +205,7 @@ static long last_rtc_update = 0;
   * timer_interrupt() needs to keep up the real-time clock,
   * as well as call the "do_timer()" routine every clocktick
   */
-static inline void
-do_timer_interrupt(int irq, void *dev_id, struct pt_regs * regs)
+irqreturn_t timer_interrupt(int irq, void *dev_id, struct pt_regs *regs)
  {
  #ifndef CONFIG_SMP
         profile_tick(CPU_PROFILING, regs);
@@ -221,6 +220,7 @@ do_timer_interrupt(int irq, void *dev_id, struct pt_regs * regs)
          * CMOS clock accordingly every ~11 minutes. Set_rtc_mmss() has to be
          * called as close as possible to 500 ms before the new second starts.
          */
+       write_seqlock(&xtime_lock);
         if ((time_status & STA_UNSYNC) == 0
                 && xtime.tv_sec > last_rtc_update + 660
                 && (xtime.tv_nsec / 1000) >= 500000 - ((unsigned)TICK_SIZE) / 2
@@ -231,6 +231,7 @@ do_timer_interrupt(int irq, void *dev_id, struct pt_regs * regs)
                 else    /* do it again in 60 s */
                         last_rtc_update = xtime.tv_sec - 600;
         }
+       write_sequnlock(&xtime_lock);
         /* As we return to user mode fire off the other CPU schedulers..
            this is basically because we don't yet share IRQ's around.
            This message is rigged to be safe on the 386 - basically it's
@@ -238,14 +239,8 @@ do_timer_interrupt(int irq, void *dev_id, struct pt_regs * regs)
  
  #ifdef CONFIG_SMP
         smp_local_timer_interrupt(regs);
+       smp_send_timer();
  #endif
-}
-
-irqreturn_t timer_interrupt(int irq, void *dev_id, struct pt_regs *regs)
-{
-       write_seqlock(&xtime_lock);
-       do_timer_interrupt(irq, NULL, regs);
-       write_sequnlock(&xtime_lock);
  
         return IRQ_HANDLED;
  }
diff --git a/arch/m32r/lib/csum_partial_copy.c b/arch/m32r/lib/csum_partial_copy.c

index c871b4606b07f9ba82f8f8be4944428fc831d6f6..ddb16a83a8ce4621af77840971e24e9868e8bb99 100644 (file)
--- a/arch/m32r/lib/csum_partial_copy.c
+++ b/arch/m32r/lib/csum_partial_copy.c
@@ -58,3 +58,4 @@ csum_partial_copy_from_user (const unsigned char __user *src,
         return csum_partial(dst, len-missing, sum);
  }
  EXPORT_SYMBOL(csum_partial_copy_from_user);
+EXPORT_SYMBOL(csum_partial);
diff --git a/arch/m32r/mm/discontig.c b/arch/m32r/mm/discontig.c

index 1d1a01e54b3fa233cd1f942c3d0b0e6cb6512885..08e727955555ca13b8b8eafcc5272db2edffcb8f 100644 (file)
--- a/arch/m32r/mm/discontig.c
+++ b/arch/m32r/mm/discontig.c
@@ -12,12 +12,14 @@
  #include <linux/mmzone.h>
  #include <linux/initrd.h>
  #include <linux/nodemask.h>
+#include <linux/module.h>
  
  #include <asm/setup.h>
  
  extern char _end[];
  
  struct pglist_data *node_data[MAX_NUMNODES];
+EXPORT_SYMBOL(node_data);
  static bootmem_data_t node_bdata[MAX_NUMNODES] __initdata;
  
  pg_data_t m32r_node_data[MAX_NUMNODES];
diff --git a/arch/m68k/mm/fault.c b/arch/m68k/mm/fault.c

index ac48b6d2aff6049143546277a191d798005d7c73..aec15270d334b74795fbfdf7ec214434f35a923f 100644 (file)
--- a/arch/m68k/mm/fault.c
+++ b/arch/m68k/mm/fault.c
@@ -160,13 +160,13 @@ good_area:
         printk("handle_mm_fault returns %d\n",fault);
  #endif
         switch (fault) {
-       case 1:
+       case VM_FAULT_MINOR:
                 current->min_flt++;
                 break;
-       case 2:
+       case VM_FAULT_MAJOR:
                 current->maj_flt++;
                 break;
-       case 0:
+       case VM_FAULT_SIGBUS:
                 goto bus_err;
         default:
                 goto out_of_memory;
diff --git a/arch/m68knommu/kernel/signal.c b/arch/m68knommu/kernel/signal.c

index 30dceb59a462078b6c09b3616dee2ef9ca46c75a..43a2726c0d0a4931070a55c1af124650ed872e58 100644 (file)
--- a/arch/m68knommu/kernel/signal.c
+++ b/arch/m68knommu/kernel/signal.c
@@ -732,13 +732,12 @@ handle_signal(int sig, struct k_sigaction *ka, siginfo_t *info,
         if (ka->sa.sa_flags & SA_ONESHOT)
                 ka->sa.sa_handler = SIG_DFL;
  
-       if (!(ka->sa.sa_flags & SA_NODEFER)) {
-               spin_lock_irq(&current->sighand->siglock);
-               sigorsets(&current->blocked,&current->blocked,&ka->sa.sa_mask);
+       spin_lock_irq(&current->sighand->siglock);
+       sigorsets(&current->blocked,&current->blocked,&ka->sa.sa_mask);
+       if (!(ka->sa.sa_flags & SA_NODEFER))
                 sigaddset(&current->blocked,sig);
-               recalc_sigpending();
-               spin_unlock_irq(&current->sighand->siglock);
-       }
+       recalc_sigpending();
+       spin_unlock_irq(&current->sighand->siglock);
  }
  
  /*
diff --git a/arch/mips/kernel/irixsig.c b/arch/mips/kernel/irixsig.c

index 40244782a8e54185d263de24729f83b135150096..4c114ae21793eb8a4fde93da5b8ba529c2222395 100644 (file)
--- a/arch/mips/kernel/irixsig.c
+++ b/arch/mips/kernel/irixsig.c
@@ -155,13 +155,12 @@ static inline void handle_signal(unsigned long sig, siginfo_t *info,
         else
                 setup_irix_frame(ka, regs, sig, oldset);
  
-       if (!(ka->sa.sa_flags & SA_NODEFER)) {
-               spin_lock_irq(&current->sighand->siglock);
-               sigorsets(&current->blocked,&current->blocked,&ka->sa.sa_mask);
+       spin_lock_irq(&current->sighand->siglock);
+       sigorsets(&current->blocked,&current->blocked,&ka->sa.sa_mask);
+       if (!(ka->sa.sa_flags & SA_NODEFER))
                 sigaddset(&current->blocked,sig);
-               recalc_sigpending();
-               spin_unlock_irq(&current->sighand->siglock);
-       }
+       recalc_sigpending();
+       spin_unlock_irq(&current->sighand->siglock);
  }
  
  asmlinkage int do_irix_signal(sigset_t *oldset, struct pt_regs *regs)
diff --git a/arch/mips/kernel/signal.c b/arch/mips/kernel/signal.c

index 65ee15396ffdefc52782ab24bd983baf08e90f30..0209c1dd1429a1c87a1da11a2f2ce1f14f705a30 100644 (file)
--- a/arch/mips/kernel/signal.c
+++ b/arch/mips/kernel/signal.c
@@ -425,13 +425,12 @@ static inline void handle_signal(unsigned long sig, siginfo_t *info,
                 setup_frame(ka, regs, sig, oldset);
  #endif
  
-       if (!(ka->sa.sa_flags & SA_NODEFER)) {
-               spin_lock_irq(&current->sighand->siglock);
-               sigorsets(&current->blocked,&current->blocked,&ka->sa.sa_mask);
+       spin_lock_irq(&current->sighand->siglock);
+       sigorsets(&current->blocked,&current->blocked,&ka->sa.sa_mask);
+       if (!(ka->sa.sa_flags & SA_NODEFER))
                 sigaddset(&current->blocked,sig);
-               recalc_sigpending();
-               spin_unlock_irq(&current->sighand->siglock);
-       }
+       recalc_sigpending();
+       spin_unlock_irq(&current->sighand->siglock);
  }
  
  extern int do_signal32(sigset_t *oldset, struct pt_regs *regs);
diff --git a/arch/mips/kernel/signal32.c b/arch/mips/kernel/signal32.c

index c1a69cf232f937e68cac85eaa8c47aec58d6650b..f6875f023a29fdc34ef22bedbb292f9ccf3db9bc 100644 (file)
--- a/arch/mips/kernel/signal32.c
+++ b/arch/mips/kernel/signal32.c
@@ -751,13 +751,12 @@ static inline void handle_signal(unsigned long sig, siginfo_t *info,
         else
                 setup_frame(ka, regs, sig, oldset);
  
-       if (!(ka->sa.sa_flags & SA_NODEFER)) {
-               spin_lock_irq(&current->sighand->siglock);
-               sigorsets(&current->blocked,&current->blocked,&ka->sa.sa_mask);
+       spin_lock_irq(&current->sighand->siglock);
+       sigorsets(&current->blocked,&current->blocked,&ka->sa.sa_mask);
+       if (!(ka->sa.sa_flags & SA_NODEFER))
                 sigaddset(&current->blocked,sig);
-               recalc_sigpending();
-               spin_unlock_irq(&current->sighand->siglock);
-       }
+       recalc_sigpending();
+       spin_unlock_irq(&current->sighand->siglock);
  }
  
  int do_signal32(sigset_t *oldset, struct pt_regs *regs)
diff --git a/arch/parisc/kernel/signal.c b/arch/parisc/kernel/signal.c

index 9421bb98ea6398a369bc01e2811f55491ef065fc..55d71c15e1f7cebc25b68c9e9facb4c7fdfb69e4 100644 (file)
--- a/arch/parisc/kernel/signal.c
+++ b/arch/parisc/kernel/signal.c
@@ -517,13 +517,12 @@ handle_signal(unsigned long sig, siginfo_t *info, struct k_sigaction *ka,
         if (!setup_rt_frame(sig, ka, info, oldset, regs, in_syscall))
                 return 0;
  
-       if (!(ka->sa.sa_flags & SA_NODEFER)) {
-               spin_lock_irq(&current->sighand->siglock);
-               sigorsets(&current->blocked,&current->blocked,&ka->sa.sa_mask);
+       spin_lock_irq(&current->sighand->siglock);
+       sigorsets(&current->blocked,&current->blocked,&ka->sa.sa_mask);
+       if (!(ka->sa.sa_flags & SA_NODEFER))
                 sigaddset(&current->blocked,sig);
-               recalc_sigpending();
-               spin_unlock_irq(&current->sighand->siglock);
-       }
+       recalc_sigpending();
+       spin_unlock_irq(&current->sighand->siglock);
         return 1;
  }
  
diff --git a/arch/parisc/mm/fault.c b/arch/parisc/mm/fault.c

index eaa701479f5f1d0fcf6f9be64f839497cd2371b1..0ad945d4c0a4d051220d46638db6a1c720ea25e2 100644 (file)
--- a/arch/parisc/mm/fault.c
+++ b/arch/parisc/mm/fault.c
@@ -178,17 +178,17 @@ good_area:
          */
  
         switch (handle_mm_fault(mm, vma, address, (acc_type & VM_WRITE) != 0)) {
-             case 1:
+             case VM_FAULT_MINOR:
                 ++current->min_flt;
                 break;
-             case 2:
+             case VM_FAULT_MAJOR:
                 ++current->maj_flt;
                 break;
-             case 0:
+             case VM_FAULT_SIGBUS:
                 /*
-                * We ran out of memory, or some other thing happened
-                * to us that made us unable to handle the page fault
-                * gracefully.
+                * We hit a hared mapping outside of the file, or some
+                * other thing happened to us that made us unable to
+                * handle the page fault gracefully.
                  */
                 goto bad_area;
               default:
diff --git a/arch/ppc/8xx_io/Kconfig b/arch/ppc/8xx_io/Kconfig

index 9e2227ec3b349fdac73ede19bcddd93ac3c6ceb3..57dacf978532f6cbbf43d0a9ebee0dc81a8ef7e5 100644 (file)
--- a/arch/ppc/8xx_io/Kconfig
+++ b/arch/ppc/8xx_io/Kconfig
@@ -69,9 +69,9 @@ config FEC_QS6612
         
  config ENET_BIG_BUFFERS
         bool "Use Big CPM Ethernet Buffers"
-       depends on NET_ETHERNET
+       depends on SCC_ENET || FEC_ENET
         help
-         Allocate large buffers for MPC8xx Etherenet.  Increases throughput
+         Allocate large buffers for MPC8xx Ethernet. Increases throughput
           and decreases the likelihood of dropped packets, but costs memory.
  
  config HTDMSOUND
diff --git a/arch/ppc/8xx_io/commproc.c b/arch/ppc/8xx_io/commproc.c

index 0cc2e7a9cb11fdffdd979f640103549f8745a63e..11726e2a4ec85c2b015d264e7486af6e1f752a8e 100644 (file)
--- a/arch/ppc/8xx_io/commproc.c
+++ b/arch/ppc/8xx_io/commproc.c
@@ -39,8 +39,6 @@
  #include <asm/tlbflush.h>
  #include <asm/rheap.h>
  
-extern int get_pteptr(struct mm_struct *mm, unsigned long addr, pte_t **ptep);
-
  static void m8xx_cpm_dpinit(void);
  static uint    host_buffer;    /* One page of host buffer */
  static uint    host_end;       /* end + 1 */
@@ -108,14 +106,11 @@ struct hw_interrupt_type cpm_pic = {
         .end            = cpm_eoi,
  };
  
-extern void flush_tlb_page(struct vm_area_struct *vma, unsigned long vmaddr);
-
  void
-m8xx_cpm_reset(uint bootpage)
+m8xx_cpm_reset(void)
  {
         volatile immap_t         *imp;
         volatile cpm8xx_t       *commproc;
-       pte_t *pte;
  
         imp = (immap_t *)IMAP_ADDR;
         commproc = (cpm8xx_t *)&imp->im_cpm;
@@ -143,17 +138,6 @@ m8xx_cpm_reset(uint bootpage)
         /* Reclaim the DP memory for our use. */
         m8xx_cpm_dpinit();
  
-       /* get the PTE for the bootpage */
-       if (!get_pteptr(&init_mm, bootpage, &pte))
-              panic("get_pteptr failed\n");
-                                                                                                                                                                                       
-       /* and make it uncachable */
-       pte_val(*pte) |= _PAGE_NO_CACHE;
-       _tlbie(bootpage);
-
-       host_buffer = bootpage;
-       host_end = host_buffer + PAGE_SIZE;
-
         /* Tell everyone where the comm processor resides.
         */
         cpmp = (cpm8xx_t *)commproc;
@@ -384,8 +368,6 @@ static rh_info_t cpm_dpmem_info;
  
  void m8xx_cpm_dpinit(void)
  {
-       cpm8xx_t *cp = &((immap_t *)IMAP_ADDR)->im_cpm;
-
         spin_lock_init(&cpm_dpmem_lock);
  
         /* Initialize the info header */
diff --git a/arch/ppc/8xx_io/fec.c b/arch/ppc/8xx_io/fec.c

index 0730392dcc2065c65b02e6dd740c337a3d730ef6..62f68d6181c652b3bd8be030463b78e260f89db0 100644 (file)
--- a/arch/ppc/8xx_io/fec.c
+++ b/arch/ppc/8xx_io/fec.c
@@ -173,7 +173,7 @@ struct fec_enet_private {
         uint    phy_status;
         uint    phy_speed;
         phy_info_t      *phy;
-       struct tq_struct phy_task;
+       struct work_struct phy_task;
  
         uint    sequence_done;
  
@@ -199,7 +199,8 @@ static int fec_enet_start_xmit(struct sk_buff *skb, struct net_device *dev);
  #ifdef CONFIG_USE_MDIO
  static void fec_enet_mii(struct net_device *dev);
  #endif /* CONFIG_USE_MDIO */
-static void fec_enet_interrupt(int irq, void * dev_id, struct pt_regs * regs);
+static irqreturn_t fec_enet_interrupt(int irq, void * dev_id,
+                                                       struct pt_regs * regs);
  #ifdef CONFIG_FEC_PACKETHOOK
  static void  fec_enet_tx(struct net_device *dev, __u32 regval);
  static void  fec_enet_rx(struct net_device *dev, __u32 regval);
@@ -471,7 +472,7 @@ fec_timeout(struct net_device *dev)
  /* The interrupt handler.
   * This is called from the MPC core interrupt.
   */
-static void
+static irqreturn_t
  fec_enet_interrupt(int irq, void * dev_id, struct pt_regs * regs)
  {
         struct  net_device *dev = dev_id;
@@ -525,6 +526,7 @@ printk("%s[%d] %s: unexpected FEC_ENET_MII event\n", __FILE__,__LINE__,__FUNCTIO
                 }
  
         }
+       return IRQ_RETVAL(IRQ_HANDLED);
  }
  
  
@@ -1263,8 +1265,9 @@ static void mii_display_status(struct net_device *dev)
         printk(".\n");
  }
  
-static void mii_display_config(struct net_device *dev)
+static void mii_display_config(void *priv)
  {
+       struct net_device *dev = (struct net_device *)priv;
         struct fec_enet_private *fep = dev->priv;
         volatile uint *s = &(fep->phy_status);
  
@@ -1294,8 +1297,9 @@ static void mii_display_config(struct net_device *dev)
         fep->sequence_done = 1;
  }
  
-static void mii_relink(struct net_device *dev)
+static void mii_relink(void *priv)
  {
+       struct net_device *dev = (struct net_device *)priv;
         struct fec_enet_private *fep = dev->priv;
         int duplex;
  
@@ -1323,18 +1327,16 @@ static void mii_queue_relink(uint mii_reg, struct net_device *dev)
  {
         struct fec_enet_private *fep = dev->priv;
  
-       fep->phy_task.routine = (void *)mii_relink;
-       fep->phy_task.data = dev;
-       schedule_task(&fep->phy_task);
+       INIT_WORK(&fep->phy_task, mii_relink, (void *)dev);
+       schedule_work(&fep->phy_task);
  }
  
  static void mii_queue_config(uint mii_reg, struct net_device *dev)
  {
         struct fec_enet_private *fep = dev->priv;
  
-       fep->phy_task.routine = (void *)mii_display_config;
-       fep->phy_task.data = dev;
-       schedule_task(&fep->phy_task);
+       INIT_WORK(&fep->phy_task, mii_display_config, (void *)dev);
+       schedule_work(&fep->phy_task);
  }
  
  
@@ -1403,11 +1405,11 @@ mii_discover_phy(uint mii_reg, struct net_device *dev)
  
  /* This interrupt occurs when the PHY detects a link change.
  */
-static void
+static
  #ifdef CONFIG_RPXCLASSIC
-mii_link_interrupt(void *dev_id)
+void mii_link_interrupt(void *dev_id)
  #else
-mii_link_interrupt(int irq, void * dev_id, struct pt_regs * regs)
+irqreturn_t mii_link_interrupt(int irq, void * dev_id, struct pt_regs * regs)
  #endif
  {
  #ifdef CONFIG_USE_MDIO
@@ -1440,6 +1442,9 @@ mii_link_interrupt(int irq, void * dev_id, struct pt_regs * regs)
  printk("%s[%d] %s: unexpected Link interrupt\n", __FILE__,__LINE__,__FUNCTION__);
  #endif /* CONFIG_USE_MDIO */
  
+#ifndef CONFIG_RPXCLASSIC
+       return IRQ_RETVAL(IRQ_HANDLED);
+#endif /* CONFIG_RPXCLASSIC */
  }
  
  static int
@@ -1575,7 +1580,7 @@ static int __init fec_enet_init(void)
         struct fec_enet_private *fep;
         int i, j, k, err;
         unsigned char   *eap, *iap, *ba;
-       unsigned long   mem_addr;
+       dma_addr_t      mem_addr;
         volatile        cbd_t   *bdp;
         cbd_t           *cbd_base;
         volatile        immap_t *immap;
@@ -1640,7 +1645,8 @@ static int __init fec_enet_init(void)
                 printk("FEC initialization failed.\n");
                 return 1;
         }
-       cbd_base = (cbd_t *)consistent_alloc(GFP_KERNEL, PAGE_SIZE, &mem_addr);
+       cbd_base = (cbd_t *)dma_alloc_coherent(dev->class_dev.dev, PAGE_SIZE,
+                                              &mem_addr, GFP_KERNEL);
  
         /* Set receive and transmit descriptor base.
         */
@@ -1657,7 +1663,10 @@ static int __init fec_enet_init(void)
  
                 /* Allocate a page.
                 */
-               ba = (unsigned char *)consistent_alloc(GFP_KERNEL, PAGE_SIZE, &mem_addr);
+               ba = (unsigned char *)dma_alloc_coherent(dev->class_dev.dev,
+                                                        PAGE_SIZE,
+                                                        &mem_addr,
+                                                        GFP_KERNEL);
                 /* BUG: no check for failure */
  
                 /* Initialize the BD for every fragment in the page.
diff --git a/arch/ppc/Kconfig b/arch/ppc/Kconfig

index 2c2da9b43b7a062c52d11a8708ac284c9797a7bf..e6fa1d1cc03a210991b6b029cd9c6bb50106037e 100644 (file)
--- a/arch/ppc/Kconfig
+++ b/arch/ppc/Kconfig
@@ -558,6 +558,7 @@ config PPC_MULTIPLATFORM
  
  config APUS
         bool "Amiga-APUS"
+       depends on BROKEN
         help
           Select APUS if configuring for a PowerUP Amiga.
           More information is available at:
@@ -647,6 +648,7 @@ config PAL4
  
  config GEMINI
         bool "Synergy-Gemini"
+       depends on BROKEN
         help
           Select Gemini if configuring for a Synergy Microsystems' Gemini
           series Single Board Computer.  More information is available at:
@@ -909,6 +911,7 @@ config PPCBUG_NVRAM
         default y if PPC_PREP
  
  config SMP
+       depends on PPC_STD_MMU
         bool "Symmetric multi-processing support"
         ---help---
           This enables support for systems with more than one CPU. If you have
@@ -928,7 +931,7 @@ config SMP
  
  config IRQ_ALL_CPUS
         bool "Distribute interrupts on all CPUs by default"
-       depends on SMP
+       depends on SMP && !MV64360
         help
           This option gives the kernel permission to distribute IRQs across
           multiple CPUs.  Saying N here will route all IRQs to the first
@@ -1119,7 +1122,9 @@ config PROC_HARDWARE
  
  source "drivers/zorro/Kconfig"
  
+if !44x || BROKEN
  source kernel/power/Kconfig
+endif
  
  config SECCOMP
         bool "Enable seccomp to safely compute untrusted bytecode"
diff --git a/arch/ppc/Makefile b/arch/ppc/Makefile

index f9b0d778dd82ee1d0ec03b2273f46511a51cdf4c..d1b6e6dcb50414af2ca4fe38ab3f463bd2a8ded5 100644 (file)
--- a/arch/ppc/Makefile
+++ b/arch/ppc/Makefile
@@ -21,11 +21,13 @@ CC          := $(CC) -m32
  endif
  
  LDFLAGS_vmlinux        := -Ttext $(KERNELLOAD) -Bstatic
-CPPFLAGS       += -Iarch/$(ARCH)
+CPPFLAGS       += -Iarch/$(ARCH) -Iinclude3
  AFLAGS         += -Iarch/$(ARCH)
  CFLAGS         += -Iarch/$(ARCH) -msoft-float -pipe \
                 -ffixed-r2 -mmultiple
  CPP            = $(CC) -E $(CFLAGS)
+# Temporary hack until we have migrated to asm-powerpc
+LINUXINCLUDE    += -Iinclude3
  
  CHECKFLAGS     += -D__powerpc__
  
@@ -101,6 +103,7 @@ endef
  
  archclean:
         $(Q)$(MAKE) $(clean)=arch/ppc/boot
+       $(Q)rm -rf include3
  
  prepare: include/asm-$(ARCH)/offsets.h checkbin
  
@@ -110,6 +113,12 @@ arch/$(ARCH)/kernel/asm-offsets.s: include/asm include/linux/version.h \
  include/asm-$(ARCH)/offsets.h: arch/$(ARCH)/kernel/asm-offsets.s
         $(call filechk,gen-asm-offsets)
  
+# Temporary hack until we have migrated to asm-powerpc
+include/asm: include3/asm
+include3/asm:
+       $(Q)if [ ! -d include3 ]; then mkdir -p include3; fi
+       $(Q)ln -fsn $(srctree)/include/asm-powerpc include3/asm
+
  # Use the file '.tmp_gas_check' for binutils tests, as gas won't output
  # to stdout and these checks are run even on install targets.
  TOUT   := .tmp_gas_check
diff --git a/arch/ppc/boot/simple/Makefile b/arch/ppc/boot/simple/Makefile

index 991b4cbb83c8953e14159d64e542f2fa98b5a75e..d4dc4fa79647e856fcd5a853dd0e41106cab9b02 100644 (file)
--- a/arch/ppc/boot/simple/Makefile
+++ b/arch/ppc/boot/simple/Makefile
@@ -61,6 +61,12 @@ zimageinitrd-$(CONFIG_IBM_OPENBIOS)  := zImage.initrd-TREE
           end-$(CONFIG_EMBEDDEDBOOT)    := embedded
          misc-$(CONFIG_EMBEDDEDBOOT)    := misc-embedded.o
  
+      zimage-$(CONFIG_BAMBOO)          := zImage-TREE
+zimageinitrd-$(CONFIG_BAMBOO)          := zImage.initrd-TREE
+         end-$(CONFIG_BAMBOO)          := bamboo
+  entrypoint-$(CONFIG_BAMBOO)          := 0x01000000
+     extra.o-$(CONFIG_BAMBOO)          := pibs.o
+
        zimage-$(CONFIG_EBONY)           := zImage-TREE
  zimageinitrd-$(CONFIG_EBONY)           := zImage.initrd-TREE
           end-$(CONFIG_EBONY)           := ebony
diff --git a/arch/ppc/boot/simple/pibs.c b/arch/ppc/boot/simple/pibs.c

index 1348740e503f8d9abc48a98439fcf6b2a1520d8f..67222d57c3456476f797538f3d90fb968a81fd53 100644 (file)
--- a/arch/ppc/boot/simple/pibs.c
+++ b/arch/ppc/boot/simple/pibs.c
@@ -91,9 +91,11 @@ load_kernel(unsigned long load_addr, int num_words, unsigned long cksum,
  
         mac64 = simple_strtoull((char *)PIBS_MAC_BASE, 0, 16);
         memcpy(hold_residual->bi_enetaddr, (char *)&mac64+2, 6);
-#ifdef CONFIG_440GX
+#if defined(CONFIG_440GX) || defined(CONFIG_440EP)
         mac64 = simple_strtoull((char *)(PIBS_MAC_BASE+PIBS_MAC_OFFSET), 0, 16);
         memcpy(hold_residual->bi_enet1addr, (char *)&mac64+2, 6);
+#endif
+#ifdef CONFIG_440GX
         mac64 = simple_strtoull((char *)(PIBS_MAC_BASE+PIBS_MAC_OFFSET*2), 0, 16);
         memcpy(hold_residual->bi_enet2addr, (char *)&mac64+2, 6);
         mac64 = simple_strtoull((char *)(PIBS_MAC_BASE+PIBS_MAC_OFFSET*3), 0, 16);
diff --git a/arch/ppc/boot/utils/addRamDisk.c b/arch/ppc/boot/utils/addRamDisk.c

deleted file mode 100644 (file)

index 93400df..0000000
--- a/arch/ppc/boot/utils/addRamDisk.c
+++ /dev/null
@@ -1,203 +0,0 @@
-#include <stdio.h>
-#include <stdlib.h>
-#include <netinet/in.h>
-#include <unistd.h>
-#include <sys/types.h>
-#include <sys/stat.h>
-#include <string.h>
-
-#define ElfHeaderSize  (64 * 1024)
-#define ElfPages  (ElfHeaderSize / 4096)
-#define KERNELBASE (0xc0000000)
-
-void get4k(FILE *file, char *buf )
-{
-    unsigned j;
-    unsigned num = fread(buf, 1, 4096, file);
-    for (  j=num; j<4096; ++j )
-       buf[j] = 0;
-}
-
-void put4k(FILE *file, char *buf )
-{
-    fwrite(buf, 1, 4096, file);
-}
-
-void death(const char *msg, FILE *fdesc, const char *fname)
-{
-    printf(msg);
-    fclose(fdesc);
-    unlink(fname);
-    exit(1);
-}
-
-int main(int argc, char **argv)
-{
-    char inbuf[4096];
-    FILE *ramDisk = NULL;
-    FILE *inputVmlinux = NULL;
-    FILE *outputVmlinux = NULL;
-    unsigned i = 0;
-    u_int32_t ramFileLen = 0;
-    u_int32_t ramLen = 0;
-    u_int32_t roundR = 0;
-    u_int32_t kernelLen = 0;
-    u_int32_t actualKernelLen = 0;
-    u_int32_t round = 0;
-    u_int32_t roundedKernelLen = 0;
-    u_int32_t ramStartOffs = 0;
-    u_int32_t ramPages = 0;
-    u_int32_t roundedKernelPages = 0;
-    u_int32_t hvReleaseData = 0;
-    u_int32_t eyeCatcher = 0xc8a5d9c4;
-    u_int32_t naca = 0;
-    u_int32_t xRamDisk = 0;
-    u_int32_t xRamDiskSize = 0;
-    if ( argc < 2 ) {
-       printf("Name of RAM disk file missing.\n");
-       exit(1);
-    }
-
-    if ( argc < 3 ) {
-       printf("Name of vmlinux file missing.\n");
-       exit(1);
-    }
-
-    if ( argc < 4 ) {
-       printf("Name of vmlinux output file missing.\n");
-       exit(1);
-    }
-
-    ramDisk = fopen(argv[1], "r");
-    if ( ! ramDisk ) {
-       printf("RAM disk file \"%s\" failed to open.\n", argv[1]);
-       exit(1);
-    }
-    inputVmlinux = fopen(argv[2], "r");
-    if ( ! inputVmlinux ) {
-       printf("vmlinux file \"%s\" failed to open.\n", argv[2]);
-       exit(1);
-    }
-    outputVmlinux = fopen(argv[3], "w+");
-    if ( ! outputVmlinux ) {
-       printf("output vmlinux file \"%s\" failed to open.\n", argv[3]);
-       exit(1);
-    }
-    fseek(ramDisk, 0, SEEK_END);
-    ramFileLen = ftell(ramDisk);
-    fseek(ramDisk, 0, SEEK_SET);
-    printf("%s file size = %d\n", argv[1], ramFileLen);
-
-    ramLen = ramFileLen;
-
-    roundR = 4096 - (ramLen % 4096);
-    if ( roundR ) {
-       printf("Rounding RAM disk file up to a multiple of 4096, adding %d\n", roundR);
-       ramLen += roundR;
-    }
-
-    printf("Rounded RAM disk size is %d\n", ramLen);
-    fseek(inputVmlinux, 0, SEEK_END);
-    kernelLen = ftell(inputVmlinux);
-    fseek(inputVmlinux, 0, SEEK_SET);
-    printf("kernel file size = %d\n", kernelLen);
-    if ( kernelLen == 0 ) {
-       printf("You must have a linux kernel specified as argv[2]\n");
-       exit(1);
-    }
-
-    actualKernelLen = kernelLen - ElfHeaderSize;
-
-    printf("actual kernel length (minus ELF header) = %d\n", actualKernelLen);
-
-    round = actualKernelLen % 4096;
-    roundedKernelLen = actualKernelLen;
-    if ( round )
-       roundedKernelLen += (4096 - round);
-
-    printf("actual kernel length rounded up to a 4k multiple = %d\n", roundedKernelLen);
-
-    ramStartOffs = roundedKernelLen;
-    ramPages = ramLen / 4096;
-
-    printf("RAM disk pages to copy = %d\n", ramPages);
-
-    // Copy 64K ELF header
-      for (i=0; i<(ElfPages); ++i) {
-         get4k( inputVmlinux, inbuf );
-         put4k( outputVmlinux, inbuf );
-      }
-
-    roundedKernelPages = roundedKernelLen / 4096;
-
-    fseek(inputVmlinux, ElfHeaderSize, SEEK_SET);
-
-    for ( i=0; i<roundedKernelPages; ++i ) {
-       get4k( inputVmlinux, inbuf );
-       put4k( outputVmlinux, inbuf );
-    }
-
-    for ( i=0; i<ramPages; ++i ) {
-       get4k( ramDisk, inbuf );
-       put4k( outputVmlinux, inbuf );
-    }
-
-    /* Close the input files */
-    fclose(ramDisk);
-    fclose(inputVmlinux);
-    /* And flush the written output file */
-    fflush(outputVmlinux);
-
-    /* fseek to the hvReleaseData pointer */
-    fseek(outputVmlinux, ElfHeaderSize + 0x24, SEEK_SET);
-    if (fread(&hvReleaseData, 4, 1, outputVmlinux) != 1) {
-        death("Could not read hvReleaseData pointer\n", outputVmlinux, argv[3]);
-    }
-    hvReleaseData = ntohl(hvReleaseData); /* Convert to native int */
-    printf("hvReleaseData is at %08x\n", hvReleaseData);
-
-    /* fseek to the hvReleaseData */
-    fseek(outputVmlinux, ElfHeaderSize + hvReleaseData, SEEK_SET);
-    if (fread(inbuf, 0x40, 1, outputVmlinux) != 1) {
-        death("Could not read hvReleaseData\n", outputVmlinux, argv[3]);
-    }
-    /* Check hvReleaseData sanity */
-    if (memcmp(inbuf, &eyeCatcher, 4) != 0) {
-        death("hvReleaseData is invalid\n", outputVmlinux, argv[3]);
-    }
-    /* Get the naca pointer */
-    naca = ntohl(*((u_int32_t *) &inbuf[0x0c])) - KERNELBASE;
-    printf("naca is at %08x\n", naca);
-
-    /* fseek to the naca */
-    fseek(outputVmlinux, ElfHeaderSize + naca, SEEK_SET);
-    if (fread(inbuf, 0x18, 1, outputVmlinux) != 1) {
-        death("Could not read naca\n", outputVmlinux, argv[3]);
-    }
-    xRamDisk = ntohl(*((u_int32_t *) &inbuf[0x0c]));
-    xRamDiskSize = ntohl(*((u_int32_t *) &inbuf[0x14]));
-    /* Make sure a RAM disk isn't already present */
-    if ((xRamDisk != 0) || (xRamDiskSize != 0)) {
-        death("RAM disk is already attached to this kernel\n", outputVmlinux, argv[3]);
-    }
-    /* Fill in the values */
-    *((u_int32_t *) &inbuf[0x0c]) = htonl(ramStartOffs);
-    *((u_int32_t *) &inbuf[0x14]) = htonl(ramPages);
-
-    /* Write out the new naca */
-    fflush(outputVmlinux);
-    fseek(outputVmlinux, ElfHeaderSize + naca, SEEK_SET);
-    if (fwrite(inbuf, 0x18, 1, outputVmlinux) != 1) {
-        death("Could not write naca\n", outputVmlinux, argv[3]);
-    }
-    printf("RAM Disk of 0x%x pages size is attached to the kernel at offset 0x%08x\n",
-            ramPages, ramStartOffs);
-
-    /* Done */
-    fclose(outputVmlinux);
-    /* Set permission to executable */
-    chmod(argv[3], S_IRUSR|S_IWUSR|S_IXUSR|S_IRGRP|S_IXGRP|S_IROTH|S_IXOTH);
-
-    return 0;
-}
-
diff --git a/arch/ppc/configs/bamboo_defconfig b/arch/ppc/configs/bamboo_defconfig

new file mode 100644 (file)

index 0000000..0ba4e70
--- /dev/null
+++ b/arch/ppc/configs/bamboo_defconfig
@@ -0,0 +1,943 @@
+#
+# Automatically generated make config: don't edit
+# Linux kernel version: 2.6.12
+# Tue Jun 28 15:24:25 2005
+#
+CONFIG_MMU=y
+CONFIG_GENERIC_HARDIRQS=y
+CONFIG_RWSEM_XCHGADD_ALGORITHM=y
+CONFIG_GENERIC_CALIBRATE_DELAY=y
+CONFIG_HAVE_DEC_LOCK=y
+CONFIG_PPC=y
+CONFIG_PPC32=y
+CONFIG_GENERIC_NVRAM=y
+CONFIG_SCHED_NO_NO_OMIT_FRAME_POINTER=y
+
+#
+# Code maturity level options
+#
+CONFIG_EXPERIMENTAL=y
+CONFIG_CLEAN_COMPILE=y
+CONFIG_BROKEN_ON_SMP=y
+CONFIG_INIT_ENV_ARG_LIMIT=32
+
+#
+# General setup
+#
+CONFIG_LOCALVERSION=""
+CONFIG_SWAP=y
+CONFIG_SYSVIPC=y
+# CONFIG_POSIX_MQUEUE is not set
+# CONFIG_BSD_PROCESS_ACCT is not set
+CONFIG_SYSCTL=y
+# CONFIG_AUDIT is not set
+# CONFIG_HOTPLUG is not set
+CONFIG_KOBJECT_UEVENT=y
+# CONFIG_IKCONFIG is not set
+CONFIG_EMBEDDED=y
+CONFIG_KALLSYMS=y
+# CONFIG_KALLSYMS_ALL is not set
+# CONFIG_KALLSYMS_EXTRA_PASS is not set
+CONFIG_PRINTK=y
+CONFIG_BUG=y
+CONFIG_BASE_FULL=y
+CONFIG_FUTEX=y
+CONFIG_EPOLL=y
+# CONFIG_CC_OPTIMIZE_FOR_SIZE is not set
+CONFIG_SHMEM=y
+CONFIG_CC_ALIGN_FUNCTIONS=0
+CONFIG_CC_ALIGN_LABELS=0
+CONFIG_CC_ALIGN_LOOPS=0
+CONFIG_CC_ALIGN_JUMPS=0
+# CONFIG_TINY_SHMEM is not set
+CONFIG_BASE_SMALL=0
+
+#
+# Loadable module support
+#
+CONFIG_MODULES=y
+CONFIG_MODULE_UNLOAD=y
+# CONFIG_MODULE_FORCE_UNLOAD is not set
+CONFIG_OBSOLETE_MODPARM=y
+# CONFIG_MODVERSIONS is not set
+# CONFIG_MODULE_SRCVERSION_ALL is not set
+CONFIG_KMOD=y
+
+#
+# Processor
+#
+# CONFIG_6xx is not set
+# CONFIG_40x is not set
+CONFIG_44x=y
+# CONFIG_POWER3 is not set
+# CONFIG_POWER4 is not set
+# CONFIG_8xx is not set
+# CONFIG_E200 is not set
+# CONFIG_E500 is not set
+CONFIG_PPC_FPU=y
+CONFIG_BOOKE=y
+CONFIG_PTE_64BIT=y
+CONFIG_PHYS_64BIT=y
+# CONFIG_MATH_EMULATION is not set
+# CONFIG_KEXEC is not set
+# CONFIG_CPU_FREQ is not set
+CONFIG_4xx=y
+
+#
+# IBM 4xx options
+#
+CONFIG_BAMBOO=y
+# CONFIG_EBONY is not set
+# CONFIG_LUAN is not set
+# CONFIG_OCOTEA is not set
+CONFIG_440EP=y
+CONFIG_440=y
+CONFIG_IBM440EP_ERR42=y
+CONFIG_IBM_OCP=y
+# CONFIG_PPC4xx_DMA is not set
+CONFIG_PPC_GEN550=y
+# CONFIG_PM is not set
+CONFIG_NOT_COHERENT_CACHE=y
+
+#
+# Platform options
+#
+# CONFIG_PC_KEYBOARD is not set
+# CONFIG_SMP is not set
+# CONFIG_PREEMPT is not set
+# CONFIG_HIGHMEM is not set
+CONFIG_SELECT_MEMORY_MODEL=y
+CONFIG_FLATMEM_MANUAL=y
+# CONFIG_DISCONTIGMEM_MANUAL is not set
+# CONFIG_SPARSEMEM_MANUAL is not set
+CONFIG_FLATMEM=y
+CONFIG_FLAT_NODE_MEM_MAP=y
+CONFIG_BINFMT_ELF=y
+# CONFIG_BINFMT_MISC is not set
+CONFIG_CMDLINE_BOOL=y
+CONFIG_CMDLINE="ip=on"
+CONFIG_SECCOMP=y
+CONFIG_ISA_DMA_API=y
+
+#
+# Bus options
+#
+CONFIG_PCI=y
+CONFIG_PCI_DOMAINS=y
+# CONFIG_PCI_LEGACY_PROC is not set
+# CONFIG_PCI_NAMES is not set
+# CONFIG_PCI_DEBUG is not set
+
+#
+# PCCARD (PCMCIA/CardBus) support
+#
+# CONFIG_PCCARD is not set
+
+#
+# Advanced setup
+#
+# CONFIG_ADVANCED_OPTIONS is not set
+
+#
+# Default settings for advanced configuration options are used
+#
+CONFIG_HIGHMEM_START=0xfe000000
+CONFIG_LOWMEM_SIZE=0x30000000
+CONFIG_KERNEL_START=0xc0000000
+CONFIG_TASK_SIZE=0x80000000
+CONFIG_CONSISTENT_START=0xff100000
+CONFIG_CONSISTENT_SIZE=0x00200000
+CONFIG_BOOT_LOAD=0x01000000
+
+#
+# Device Drivers
+#
+
+#
+# Generic Driver Options
+#
+# CONFIG_STANDALONE is not set
+CONFIG_PREVENT_FIRMWARE_BUILD=y
+# CONFIG_FW_LOADER is not set
+# CONFIG_DEBUG_DRIVER is not set
+
+#
+# Memory Technology Devices (MTD)
+#
+# CONFIG_MTD is not set
+
+#
+# Parallel port support
+#
+# CONFIG_PARPORT is not set
+
+#
+# Plug and Play support
+#
+
+#
+# Block devices
+#
+# CONFIG_BLK_DEV_FD is not set
+# CONFIG_BLK_CPQ_DA is not set
+# CONFIG_BLK_CPQ_CISS_DA is not set
+# CONFIG_BLK_DEV_DAC960 is not set
+# CONFIG_BLK_DEV_UMEM is not set
+# CONFIG_BLK_DEV_COW_COMMON is not set
+# CONFIG_BLK_DEV_LOOP is not set
+# CONFIG_BLK_DEV_NBD is not set
+# CONFIG_BLK_DEV_SX8 is not set
+# CONFIG_BLK_DEV_UB is not set
+# CONFIG_BLK_DEV_RAM is not set
+CONFIG_BLK_DEV_RAM_COUNT=16
+CONFIG_INITRAMFS_SOURCE=""
+# CONFIG_LBD is not set
+# CONFIG_CDROM_PKTCDVD is not set
+
+#
+# IO Schedulers
+#
+CONFIG_IOSCHED_NOOP=y
+CONFIG_IOSCHED_AS=y
+CONFIG_IOSCHED_DEADLINE=y
+CONFIG_IOSCHED_CFQ=y
+# CONFIG_ATA_OVER_ETH is not set
+
+#
+# ATA/ATAPI/MFM/RLL support
+#
+CONFIG_IDE=y
+CONFIG_BLK_DEV_IDE=y
+
+#
+# Please see Documentation/ide.txt for help/info on IDE drives
+#
+# CONFIG_BLK_DEV_IDE_SATA is not set
+CONFIG_BLK_DEV_IDEDISK=y
+# CONFIG_IDEDISK_MULTI_MODE is not set
+# CONFIG_BLK_DEV_IDECD is not set
+# CONFIG_BLK_DEV_IDETAPE is not set
+# CONFIG_BLK_DEV_IDEFLOPPY is not set
+# CONFIG_BLK_DEV_IDESCSI is not set
+# CONFIG_IDE_TASK_IOCTL is not set
+
+#
+# IDE chipset support/bugfixes
+#
+CONFIG_IDE_GENERIC=y
+CONFIG_BLK_DEV_IDEPCI=y
+# CONFIG_IDEPCI_SHARE_IRQ is not set
+# CONFIG_BLK_DEV_OFFBOARD is not set
+# CONFIG_BLK_DEV_GENERIC is not set
+# CONFIG_BLK_DEV_OPTI621 is not set
+# CONFIG_BLK_DEV_SL82C105 is not set
+CONFIG_BLK_DEV_IDEDMA_PCI=y
+# CONFIG_BLK_DEV_IDEDMA_FORCED is not set
+# CONFIG_IDEDMA_PCI_AUTO is not set
+# CONFIG_BLK_DEV_AEC62XX is not set
+# CONFIG_BLK_DEV_ALI15X3 is not set
+# CONFIG_BLK_DEV_AMD74XX is not set
+CONFIG_BLK_DEV_CMD64X=y
+# CONFIG_BLK_DEV_TRIFLEX is not set
+# CONFIG_BLK_DEV_CY82C693 is not set
+# CONFIG_BLK_DEV_CS5520 is not set
+# CONFIG_BLK_DEV_CS5530 is not set
+# CONFIG_BLK_DEV_HPT34X is not set
+# CONFIG_BLK_DEV_HPT366 is not set
+# CONFIG_BLK_DEV_SC1200 is not set
+# CONFIG_BLK_DEV_PIIX is not set
+# CONFIG_BLK_DEV_IT821X is not set
+# CONFIG_BLK_DEV_NS87415 is not set
+# CONFIG_BLK_DEV_PDC202XX_OLD is not set
+# CONFIG_BLK_DEV_PDC202XX_NEW is not set
+# CONFIG_BLK_DEV_SVWKS is not set
+# CONFIG_BLK_DEV_SIIMAGE is not set
+# CONFIG_BLK_DEV_SLC90E66 is not set
+# CONFIG_BLK_DEV_TRM290 is not set
+# CONFIG_BLK_DEV_VIA82CXXX is not set
+# CONFIG_IDE_ARM is not set
+CONFIG_BLK_DEV_IDEDMA=y
+# CONFIG_IDEDMA_IVB is not set
+# CONFIG_IDEDMA_AUTO is not set
+# CONFIG_BLK_DEV_HD is not set
+
+#
+# SCSI device support
+#
+CONFIG_SCSI=y
+CONFIG_SCSI_PROC_FS=y
+
+#
+# SCSI support type (disk, tape, CD-ROM)
+#
+# CONFIG_BLK_DEV_SD is not set
+CONFIG_CHR_DEV_ST=y
+# CONFIG_CHR_DEV_OSST is not set
+# CONFIG_BLK_DEV_SR is not set
+# CONFIG_CHR_DEV_SG is not set
+# CONFIG_CHR_DEV_SCH is not set
+
+#
+# Some SCSI devices (e.g. CD jukebox) support multiple LUNs
+#
+# CONFIG_SCSI_MULTI_LUN is not set
+# CONFIG_SCSI_CONSTANTS is not set
+# CONFIG_SCSI_LOGGING is not set
+
+#
+# SCSI Transport Attributes
+#
+CONFIG_SCSI_SPI_ATTRS=y
+# CONFIG_SCSI_FC_ATTRS is not set
+# CONFIG_SCSI_ISCSI_ATTRS is not set
+
+#
+# SCSI low-level drivers
+#
+# CONFIG_BLK_DEV_3W_XXXX_RAID is not set
+# CONFIG_SCSI_3W_9XXX is not set
+# CONFIG_SCSI_ACARD is not set
+# CONFIG_SCSI_AACRAID is not set
+# CONFIG_SCSI_AIC7XXX is not set
+# CONFIG_SCSI_AIC7XXX_OLD is not set
+# CONFIG_SCSI_AIC79XX is not set
+# CONFIG_SCSI_DPT_I2O is not set
+# CONFIG_MEGARAID_NEWGEN is not set
+# CONFIG_MEGARAID_LEGACY is not set
+# CONFIG_SCSI_SATA is not set
+# CONFIG_SCSI_BUSLOGIC is not set
+# CONFIG_SCSI_DMX3191D is not set
+# CONFIG_SCSI_EATA is not set
+# CONFIG_SCSI_FUTURE_DOMAIN is not set
+# CONFIG_SCSI_GDTH is not set
+# CONFIG_SCSI_IPS is not set
+# CONFIG_SCSI_INITIO is not set
+# CONFIG_SCSI_INIA100 is not set
+CONFIG_SCSI_SYM53C8XX_2=y
+CONFIG_SCSI_SYM53C8XX_DMA_ADDRESSING_MODE=1
+CONFIG_SCSI_SYM53C8XX_DEFAULT_TAGS=16
+CONFIG_SCSI_SYM53C8XX_MAX_TAGS=64
+# CONFIG_SCSI_SYM53C8XX_IOMAPPED is not set
+# CONFIG_SCSI_IPR is not set
+# CONFIG_SCSI_QLOGIC_FC is not set
+# CONFIG_SCSI_QLOGIC_1280 is not set
+CONFIG_SCSI_QLA2XXX=y
+# CONFIG_SCSI_QLA21XX is not set
+# CONFIG_SCSI_QLA22XX is not set
+# CONFIG_SCSI_QLA2300 is not set
+# CONFIG_SCSI_QLA2322 is not set
+# CONFIG_SCSI_QLA6312 is not set
+# CONFIG_SCSI_LPFC is not set
+# CONFIG_SCSI_DC395x is not set
+# CONFIG_SCSI_DC390T is not set
+# CONFIG_SCSI_NSP32 is not set
+# CONFIG_SCSI_DEBUG is not set
+
+#
+# Multi-device support (RAID and LVM)
+#
+# CONFIG_MD is not set
+
+#
+# Fusion MPT device support
+#
+# CONFIG_FUSION is not set
+# CONFIG_FUSION_SPI is not set
+# CONFIG_FUSION_FC is not set
+
+#
+# IEEE 1394 (FireWire) support
+#
+# CONFIG_IEEE1394 is not set
+
+#
+# I2O device support
+#
+# CONFIG_I2O is not set
+
+#
+# Macintosh device drivers
+#
+
+#
+# Networking support
+#
+CONFIG_NET=y
+
+#
+# Networking options
+#
+CONFIG_PACKET=y
+# CONFIG_PACKET_MMAP is not set
+CONFIG_UNIX=y
+# CONFIG_NET_KEY is not set
+CONFIG_INET=y
+# CONFIG_IP_MULTICAST is not set
+# CONFIG_IP_ADVANCED_ROUTER is not set
+CONFIG_IP_FIB_HASH=y
+CONFIG_IP_PNP=y
+# CONFIG_IP_PNP_DHCP is not set
+CONFIG_IP_PNP_BOOTP=y
+# CONFIG_IP_PNP_RARP is not set
+# CONFIG_NET_IPIP is not set
+# CONFIG_NET_IPGRE is not set
+# CONFIG_ARPD is not set
+# CONFIG_SYN_COOKIES is not set
+# CONFIG_INET_AH is not set
+# CONFIG_INET_ESP is not set
+# CONFIG_INET_IPCOMP is not set
+# CONFIG_INET_TUNNEL is not set
+CONFIG_IP_TCPDIAG=y
+# CONFIG_IP_TCPDIAG_IPV6 is not set
+# CONFIG_TCP_CONG_ADVANCED is not set
+CONFIG_TCP_CONG_BIC=y
+
+#
+# IP: Virtual Server Configuration
+#
+# CONFIG_IP_VS is not set
+# CONFIG_IPV6 is not set
+CONFIG_NETFILTER=y
+# CONFIG_NETFILTER_DEBUG is not set
+
+#
+# IP: Netfilter Configuration
+#
+# CONFIG_IP_NF_CONNTRACK is not set
+# CONFIG_IP_NF_CONNTRACK_MARK is not set
+# CONFIG_IP_NF_QUEUE is not set
+# CONFIG_IP_NF_IPTABLES is not set
+# CONFIG_IP_NF_ARPTABLES is not set
+
+#
+# SCTP Configuration (EXPERIMENTAL)
+#
+# CONFIG_IP_SCTP is not set
+# CONFIG_ATM is not set
+# CONFIG_BRIDGE is not set
+# CONFIG_VLAN_8021Q is not set
+# CONFIG_DECNET is not set
+# CONFIG_LLC2 is not set
+# CONFIG_IPX is not set
+# CONFIG_ATALK is not set
+# CONFIG_X25 is not set
+# CONFIG_LAPB is not set
+# CONFIG_NET_DIVERT is not set
+# CONFIG_ECONET is not set
+# CONFIG_WAN_ROUTER is not set
+
+#
+# QoS and/or fair queueing
+#
+# CONFIG_NET_SCHED is not set
+# CONFIG_NET_CLS_ROUTE is not set
+
+#
+# Network testing
+#
+# CONFIG_NET_PKTGEN is not set
+# CONFIG_NETPOLL is not set
+# CONFIG_NET_POLL_CONTROLLER is not set
+# CONFIG_HAMRADIO is not set
+# CONFIG_IRDA is not set
+# CONFIG_BT is not set
+CONFIG_NETDEVICES=y
+# CONFIG_DUMMY is not set
+# CONFIG_BONDING is not set
+# CONFIG_EQUALIZER is not set
+# CONFIG_TUN is not set
+
+#
+# ARCnet devices
+#
+# CONFIG_ARCNET is not set
+
+#
+# Ethernet (10 or 100Mbit)
+#
+CONFIG_NET_ETHERNET=y
+CONFIG_MII=y
+# CONFIG_HAPPYMEAL is not set
+# CONFIG_SUNGEM is not set
+# CONFIG_NET_VENDOR_3COM is not set
+
+#
+# Tulip family network device support
+#
+# CONFIG_NET_TULIP is not set
+# CONFIG_HP100 is not set
+CONFIG_IBM_EMAC=y
+# CONFIG_IBM_EMAC_ERRMSG is not set
+CONFIG_IBM_EMAC_RXB=64
+CONFIG_IBM_EMAC_TXB=8
+CONFIG_IBM_EMAC_FGAP=8
+CONFIG_IBM_EMAC_SKBRES=0
+CONFIG_NET_PCI=y
+# CONFIG_PCNET32 is not set
+# CONFIG_AMD8111_ETH is not set
+# CONFIG_ADAPTEC_STARFIRE is not set
+# CONFIG_B44 is not set
+# CONFIG_FORCEDETH is not set
+# CONFIG_DGRS is not set
+CONFIG_EEPRO100=y
+# CONFIG_E100 is not set
+# CONFIG_FEALNX is not set
+CONFIG_NATSEMI=y
+# CONFIG_NE2K_PCI is not set
+# CONFIG_8139CP is not set
+# CONFIG_8139TOO is not set
+# CONFIG_SIS900 is not set
+# CONFIG_EPIC100 is not set
+# CONFIG_SUNDANCE is not set
+# CONFIG_TLAN is not set
+# CONFIG_VIA_RHINE is not set
+
+#
+# Ethernet (1000 Mbit)
+#
+# CONFIG_ACENIC is not set
+# CONFIG_DL2K is not set
+CONFIG_E1000=y
+# CONFIG_E1000_NAPI is not set
+# CONFIG_NS83820 is not set
+# CONFIG_HAMACHI is not set
+# CONFIG_YELLOWFIN is not set
+# CONFIG_R8169 is not set
+# CONFIG_SKGE is not set
+# CONFIG_SK98LIN is not set
+# CONFIG_VIA_VELOCITY is not set
+# CONFIG_TIGON3 is not set
+# CONFIG_BNX2 is not set
+
+#
+# Ethernet (10000 Mbit)
+#
+# CONFIG_IXGB is not set
+# CONFIG_S2IO is not set
+
+#
+# Token Ring devices
+#
+# CONFIG_TR is not set
+
+#
+# Wireless LAN (non-hamradio)
+#
+# CONFIG_NET_RADIO is not set
+
+#
+# Wan interfaces
+#
+# CONFIG_WAN is not set
+# CONFIG_FDDI is not set
+# CONFIG_HIPPI is not set
+# CONFIG_PPP is not set
+# CONFIG_SLIP is not set
+# CONFIG_NET_FC is not set
+# CONFIG_SHAPER is not set
+# CONFIG_NETCONSOLE is not set
+
+#
+# ISDN subsystem
+#
+# CONFIG_ISDN is not set
+
+#
+# Telephony Support
+#
+# CONFIG_PHONE is not set
+
+#
+# Input device support
+#
+CONFIG_INPUT=y
+
+#
+# Userland interfaces
+#
+CONFIG_INPUT_MOUSEDEV=y
+CONFIG_INPUT_MOUSEDEV_PSAUX=y
+CONFIG_INPUT_MOUSEDEV_SCREEN_X=1024
+CONFIG_INPUT_MOUSEDEV_SCREEN_Y=768
+# CONFIG_INPUT_JOYDEV is not set
+# CONFIG_INPUT_TSDEV is not set
+# CONFIG_INPUT_EVDEV is not set
+# CONFIG_INPUT_EVBUG is not set
+
+#
+# Input Device Drivers
+#
+# CONFIG_INPUT_KEYBOARD is not set
+# CONFIG_INPUT_MOUSE is not set
+# CONFIG_INPUT_JOYSTICK is not set
+# CONFIG_INPUT_TOUCHSCREEN is not set
+# CONFIG_INPUT_MISC is not set
+
+#
+# Hardware I/O ports
+#
+CONFIG_SERIO=y
+# CONFIG_SERIO_I8042 is not set
+# CONFIG_SERIO_SERPORT is not set
+# CONFIG_SERIO_PCIPS2 is not set
+# CONFIG_SERIO_LIBPS2 is not set
+# CONFIG_SERIO_RAW is not set
+# CONFIG_GAMEPORT is not set
+
+#
+# Character devices
+#
+# CONFIG_VT is not set
+# CONFIG_SERIAL_NONSTANDARD is not set
+
+#
+# Serial drivers
+#
+CONFIG_SERIAL_8250=y
+CONFIG_SERIAL_8250_CONSOLE=y
+CONFIG_SERIAL_8250_NR_UARTS=4
+CONFIG_SERIAL_8250_EXTENDED=y
+# CONFIG_SERIAL_8250_MANY_PORTS is not set
+CONFIG_SERIAL_8250_SHARE_IRQ=y
+# CONFIG_SERIAL_8250_DETECT_IRQ is not set
+# CONFIG_SERIAL_8250_RSA is not set
+
+#
+# Non-8250 serial port support
+#
+CONFIG_SERIAL_CORE=y
+CONFIG_SERIAL_CORE_CONSOLE=y
+# CONFIG_SERIAL_JSM is not set
+CONFIG_UNIX98_PTYS=y
+CONFIG_LEGACY_PTYS=y
+CONFIG_LEGACY_PTY_COUNT=256
+
+#
+# IPMI
+#
+# CONFIG_IPMI_HANDLER is not set
+
+#
+# Watchdog Cards
+#
+# CONFIG_WATCHDOG is not set
+# CONFIG_NVRAM is not set
+# CONFIG_GEN_RTC is not set
+# CONFIG_DTLK is not set
+# CONFIG_R3964 is not set
+# CONFIG_APPLICOM is not set
+
+#
+# Ftape, the floppy tape device driver
+#
+# CONFIG_AGP is not set
+# CONFIG_DRM is not set
+# CONFIG_RAW_DRIVER is not set
+
+#
+# TPM devices
+#
+# CONFIG_TCG_TPM is not set
+
+#
+# I2C support
+#
+# CONFIG_I2C is not set
+
+#
+# Dallas's 1-wire bus
+#
+# CONFIG_W1 is not set
+
+#
+# Misc devices
+#
+
+#
+# Multimedia devices
+#
+# CONFIG_VIDEO_DEV is not set
+
+#
+# Digital Video Broadcasting Devices
+#
+# CONFIG_DVB is not set
+
+#
+# Graphics support
+#
+# CONFIG_FB is not set
+
+#
+# Sound
+#
+# CONFIG_SOUND is not set
+
+#
+# USB support
+#
+CONFIG_USB_ARCH_HAS_HCD=y
+CONFIG_USB_ARCH_HAS_OHCI=y
+CONFIG_USB=y
+CONFIG_USB_DEBUG=y
+
+#
+# Miscellaneous USB options
+#
+# CONFIG_USB_DEVICEFS is not set
+# CONFIG_USB_BANDWIDTH is not set
+# CONFIG_USB_DYNAMIC_MINORS is not set
+# CONFIG_USB_OTG is not set
+
+#
+# USB Host Controller Drivers
+#
+# CONFIG_USB_EHCI_HCD is not set
+# CONFIG_USB_ISP116X_HCD is not set
+# CONFIG_USB_OHCI_HCD is not set
+# CONFIG_USB_UHCI_HCD is not set
+# CONFIG_USB_SL811_HCD is not set
+
+#
+# USB Device Class drivers
+#
+# CONFIG_USB_BLUETOOTH_TTY is not set
+# CONFIG_USB_ACM is not set
+# CONFIG_USB_PRINTER is not set
+
+#
+# NOTE: USB_STORAGE enables SCSI, and 'SCSI disk support' may also be needed; see USB_STORAGE Help for more information
+#
+# CONFIG_USB_STORAGE is not set
+
+#
+# USB Input Devices
+#
+# CONFIG_USB_HID is not set
+
+#
+# USB HID Boot Protocol drivers
+#
+# CONFIG_USB_KBD is not set
+# CONFIG_USB_MOUSE is not set
+# CONFIG_USB_AIPTEK is not set
+# CONFIG_USB_WACOM is not set
+# CONFIG_USB_ACECAD is not set
+# CONFIG_USB_KBTAB is not set
+# CONFIG_USB_POWERMATE is not set
+# CONFIG_USB_MTOUCH is not set
+# CONFIG_USB_ITMTOUCH is not set
+# CONFIG_USB_EGALAX is not set
+# CONFIG_USB_XPAD is not set
+# CONFIG_USB_ATI_REMOTE is not set
+
+#
+# USB Imaging devices
+#
+# CONFIG_USB_MDC800 is not set
+# CONFIG_USB_MICROTEK is not set
+
+#
+# USB Multimedia devices
+#
+# CONFIG_USB_DABUSB is not set
+
+#
+# Video4Linux support is needed for USB Multimedia device support
+#
+
+#
+# USB Network Adapters
+#
+# CONFIG_USB_CATC is not set
+# CONFIG_USB_KAWETH is not set
+CONFIG_USB_PEGASUS=y
+# CONFIG_USB_RTL8150 is not set
+# CONFIG_USB_USBNET is not set
+CONFIG_USB_MON=y
+
+#
+# USB port drivers
+#
+
+#
+# USB Serial Converter support
+#
+# CONFIG_USB_SERIAL is not set
+
+#
+# USB Miscellaneous drivers
+#
+# CONFIG_USB_EMI62 is not set
+# CONFIG_USB_EMI26 is not set
+# CONFIG_USB_AUERSWALD is not set
+# CONFIG_USB_RIO500 is not set
+# CONFIG_USB_LEGOTOWER is not set
+# CONFIG_USB_LCD is not set
+# CONFIG_USB_LED is not set
+# CONFIG_USB_CYTHERM is not set
+# CONFIG_USB_PHIDGETKIT is not set
+# CONFIG_USB_PHIDGETSERVO is not set
+# CONFIG_USB_IDMOUSE is not set
+
+#
+# USB DSL modem support
+#
+
+#
+# USB Gadget Support
+#
+# CONFIG_USB_GADGET is not set
+
+#
+# MMC/SD Card support
+#
+# CONFIG_MMC is not set
+
+#
+# InfiniBand support
+#
+# CONFIG_INFINIBAND is not set
+
+#
+# SN Devices
+#
+
+#
+# File systems
+#
+# CONFIG_EXT2_FS is not set
+# CONFIG_EXT3_FS is not set
+# CONFIG_JBD is not set
+# CONFIG_REISERFS_FS is not set
+# CONFIG_JFS_FS is not set
+
+#
+# XFS support
+#
+# CONFIG_XFS_FS is not set
+# CONFIG_MINIX_FS is not set
+# CONFIG_ROMFS_FS is not set
+# CONFIG_QUOTA is not set
+CONFIG_DNOTIFY=y
+# CONFIG_AUTOFS_FS is not set
+# CONFIG_AUTOFS4_FS is not set
+
+#
+# CD-ROM/DVD Filesystems
+#
+# CONFIG_ISO9660_FS is not set
+# CONFIG_UDF_FS is not set
+
+#
+# DOS/FAT/NT Filesystems
+#
+# CONFIG_MSDOS_FS is not set
+# CONFIG_VFAT_FS is not set
+# CONFIG_NTFS_FS is not set
+
+#
+# Pseudo filesystems
+#
+CONFIG_PROC_FS=y
+CONFIG_PROC_KCORE=y
+CONFIG_SYSFS=y
+# CONFIG_DEVPTS_FS_XATTR is not set
+# CONFIG_TMPFS is not set
+# CONFIG_HUGETLB_PAGE is not set
+CONFIG_RAMFS=y
+
+#
+# Miscellaneous filesystems
+#
+# CONFIG_ADFS_FS is not set
+# CONFIG_AFFS_FS is not set
+# CONFIG_HFS_FS is not set
+# CONFIG_HFSPLUS_FS is not set
+# CONFIG_BEFS_FS is not set
+# CONFIG_BFS_FS is not set
+# CONFIG_EFS_FS is not set
+# CONFIG_CRAMFS is not set
+# CONFIG_VXFS_FS is not set
+# CONFIG_HPFS_FS is not set
+# CONFIG_QNX4FS_FS is not set
+# CONFIG_SYSV_FS is not set
+# CONFIG_UFS_FS is not set
+
+#
+# Network File Systems
+#
+CONFIG_NFS_FS=y
+# CONFIG_NFS_V3 is not set
+# CONFIG_NFS_V4 is not set
+# CONFIG_NFS_DIRECTIO is not set
+# CONFIG_NFSD is not set
+CONFIG_ROOT_NFS=y
+CONFIG_LOCKD=y
+CONFIG_NFS_COMMON=y
+CONFIG_SUNRPC=y
+# CONFIG_RPCSEC_GSS_KRB5 is not set
+# CONFIG_RPCSEC_GSS_SPKM3 is not set
+# CONFIG_SMB_FS is not set
+# CONFIG_CIFS is not set
+# CONFIG_NCP_FS is not set
+# CONFIG_CODA_FS is not set
+# CONFIG_AFS_FS is not set
+
+#
+# Partition Types
+#
+# CONFIG_PARTITION_ADVANCED is not set
+CONFIG_MSDOS_PARTITION=y
+
+#
+# Native Language Support
+#
+# CONFIG_NLS is not set
+
+#
+# Library routines
+#
+# CONFIG_CRC_CCITT is not set
+CONFIG_CRC32=y
+# CONFIG_LIBCRC32C is not set
+
+#
+# Profiling support
+#
+# CONFIG_PROFILING is not set
+
+#
+# Kernel hacking
+#
+# CONFIG_PRINTK_TIME is not set
+CONFIG_DEBUG_KERNEL=y
+CONFIG_MAGIC_SYSRQ=y
+CONFIG_LOG_BUF_SHIFT=14
+# CONFIG_SCHEDSTATS is not set
+# CONFIG_DEBUG_SLAB is not set
+# CONFIG_DEBUG_SPINLOCK is not set
+# CONFIG_DEBUG_SPINLOCK_SLEEP is not set
+# CONFIG_DEBUG_KOBJECT is not set
+CONFIG_DEBUG_INFO=y
+# CONFIG_DEBUG_FS is not set
+# CONFIG_KGDB is not set
+# CONFIG_XMON is not set
+CONFIG_BDI_SWITCH=y
+# CONFIG_SERIAL_TEXT_DEBUG is not set
+CONFIG_PPC_OCP=y
+
+#
+# Security options
+#
+# CONFIG_KEYS is not set
+# CONFIG_SECURITY is not set
+
+#
+# Cryptographic options
+#
+# CONFIG_CRYPTO is not set
+
+#
+# Hardware crypto devices
+#
diff --git a/arch/ppc/kernel/cputable.c b/arch/ppc/kernel/cputable.c

index 50936cda0af9a723977c7897c44c9aa080d90daa..8a3d74f2531e840a3a31717cbee94e24ab530baf 100644 (file)
--- a/arch/ppc/kernel/cputable.c
+++ b/arch/ppc/kernel/cputable.c
@@ -852,6 +852,26 @@ struct cpu_spec    cpu_specs[] = {
  
  #endif /* CONFIG_40x */
  #ifdef CONFIG_44x
+       {
+               .pvr_mask               = 0xf0000fff,
+               .pvr_value              = 0x40000850,
+               .cpu_name               = "440EP Rev. A",
+               .cpu_features           = CPU_FTR_SPLIT_ID_CACHE |
+                       CPU_FTR_USE_TB,
+               .cpu_user_features      = COMMON_PPC, /* 440EP has an FPU */
+               .icache_bsize           = 32,
+               .dcache_bsize           = 32,
+       },
+       {
+               .pvr_mask               = 0xf0000fff,
+               .pvr_value              = 0x400008d3,
+               .cpu_name               = "440EP Rev. B",
+               .cpu_features           = CPU_FTR_SPLIT_ID_CACHE |
+                       CPU_FTR_USE_TB,
+               .cpu_user_features      = COMMON_PPC, /* 440EP has an FPU */
+               .icache_bsize           = 32,
+               .dcache_bsize           = 32,
+       },
         {       /* 440GP Rev. B */
                 .pvr_mask               = 0xf0000fff,
                 .pvr_value              = 0x40000440,
diff --git a/arch/ppc/kernel/entry.S b/arch/ppc/kernel/entry.S

index d4df68629cc6a750a22e1106018057b4fef9952c..cb83045e2edfb6408cf88a2b4e58f921d7d6ec53 100644 (file)
--- a/arch/ppc/kernel/entry.S
+++ b/arch/ppc/kernel/entry.S
@@ -215,6 +215,7 @@ syscall_dotrace_cont:
         lwzx    r10,r10,r0      /* Fetch system call handler [ptr] */
         mtlr    r10
         addi    r9,r1,STACK_FRAME_OVERHEAD
+       PPC440EP_ERR42
         blrl                    /* Call handler */
         .globl  ret_from_syscall
  ret_from_syscall:
diff --git a/arch/ppc/kernel/head_44x.S b/arch/ppc/kernel/head_44x.S

index 72ee8f33bde465471f9dd90b5341d7cc4d1aea6d..69ff3a9961e8728d061e258c0b7121a0aa71e97c 100644 (file)
--- a/arch/ppc/kernel/head_44x.S
+++ b/arch/ppc/kernel/head_44x.S
@@ -190,7 +190,9 @@ skpinv:     addi    r4,r4,1                         /* Increment */
  
         /* xlat fields */
         lis     r4,UART0_PHYS_IO_BASE@h         /* RPN depends on SoC */
+#ifndef CONFIG_440EP
         ori     r4,r4,0x0001            /* ERPN is 1 for second 4GB page */
+#endif
  
         /* attrib fields */
         li      r5,0
@@ -228,6 +230,16 @@ skpinv:    addi    r4,r4,1                         /* Increment */
         lis     r4,interrupt_base@h     /* IVPR only uses the high 16-bits */
         mtspr   SPRN_IVPR,r4
  
+#ifdef CONFIG_440EP
+       /* Clear DAPUIB flag in CCR0 (enable APU between CPU and FPU) */
+       mfspr   r2,SPRN_CCR0
+       lis     r3,0xffef
+       ori     r3,r3,0xffff
+       and     r2,r2,r3
+       mtspr   SPRN_CCR0,r2
+       isync
+#endif
+
         /*
          * This is where the main kernel code starts.
          */
diff --git a/arch/ppc/kernel/misc.S b/arch/ppc/kernel/misc.S

index c2fb85b49a11ae1a1f2f319478d96dffbdbee1f7..ce71b4a0158595b6d20791cb72dc0f4d932290f9 100644 (file)
--- a/arch/ppc/kernel/misc.S
+++ b/arch/ppc/kernel/misc.S
@@ -1145,6 +1145,7 @@ _GLOBAL(kernel_thread)
         stwu    r0,-16(r1)
         mtlr    r30             /* fn addr in lr */
         mr      r3,r31          /* load arg and call fn */
+       PPC440EP_ERR42
         blrl
         li      r0,__NR_exit    /* exit if function returns */
         li      r3,0
diff --git a/arch/ppc/kernel/pci.c b/arch/ppc/kernel/pci.c

index 70cfb6ffd877d8a9007abc859d0f4e87d40dd3f5..7b3586a3bf302f22289ab5cf6cbe2e874edeed09 100644 (file)
--- a/arch/ppc/kernel/pci.c
+++ b/arch/ppc/kernel/pci.c
@@ -160,6 +160,21 @@ void pcibios_resource_to_bus(struct pci_dev *dev, struct pci_bus_region *region,
  }
  EXPORT_SYMBOL(pcibios_resource_to_bus);
  
+void pcibios_bus_to_resource(struct pci_dev *dev, struct resource *res,
+                            struct pci_bus_region *region)
+{
+       unsigned long offset = 0;
+       struct pci_controller *hose = dev->sysdata;
+
+       if (hose && res->flags & IORESOURCE_IO)
+               offset = (unsigned long)hose->io_base_virt - isa_io_base;
+       else if (hose && res->flags & IORESOURCE_MEM)
+               offset = hose->pci_mem_offset;
+       res->start = region->start + offset;
+       res->end = region->end + offset;
+}
+EXPORT_SYMBOL(pcibios_bus_to_resource);
+
  /*
   * We need to avoid collisions with `mirrored' VGA ports
   * and other strange ISA hardware, so we always want the
diff --git a/arch/ppc/kernel/ppc_ksyms.c b/arch/ppc/kernel/ppc_ksyms.c

index d59ad07de8e7d83b3931b90e3777aef95d7df532..e7d40cc6c1b6fb266a5ab2fe49e72f7046193fc1 100644 (file)
--- a/arch/ppc/kernel/ppc_ksyms.c
+++ b/arch/ppc/kernel/ppc_ksyms.c
@@ -324,7 +324,7 @@ EXPORT_SYMBOL(__res);
  
  EXPORT_SYMBOL(next_mmu_context);
  EXPORT_SYMBOL(set_context);
-EXPORT_SYMBOL(handle_mm_fault); /* For MOL */
+EXPORT_SYMBOL_GPL(__handle_mm_fault); /* For MOL */
  EXPORT_SYMBOL(disarm_decr);
  #ifdef CONFIG_PPC_STD_MMU
  extern long mol_trampoline;
diff --git a/arch/ppc/kernel/signal.c b/arch/ppc/kernel/signal.c

index 8aaeb6f4e750249026e257b3e416604eb09c075f..2244bf91e593b1fd1ec13b596b4515e6c829987e 100644 (file)
--- a/arch/ppc/kernel/signal.c
+++ b/arch/ppc/kernel/signal.c
@@ -759,13 +759,12 @@ int do_signal(sigset_t *oldset, struct pt_regs *regs)
         else
                 handle_signal(signr, &ka, &info, oldset, regs, newsp);
  
-       if (!(ka.sa.sa_flags & SA_NODEFER)) {
-               spin_lock_irq(&current->sighand->siglock);
-               sigorsets(&current->blocked,&current->blocked,&ka.sa.sa_mask);
+       spin_lock_irq(&current->sighand->siglock);
+       sigorsets(&current->blocked,&current->blocked,&ka.sa.sa_mask);
+       if (!(ka.sa.sa_flags & SA_NODEFER))
                 sigaddset(&current->blocked, signr);
-               recalc_sigpending();
-               spin_unlock_irq(&current->sighand->siglock);
-       }
+       recalc_sigpending();
+       spin_unlock_irq(&current->sighand->siglock);
  
         return 1;
  }
diff --git a/arch/ppc/platforms/4xx/Kconfig b/arch/ppc/platforms/4xx/Kconfig

index a0612a86455a1d80620f3ea52e764323b2886bcf..805dd98908a3cd4e90164f2e0c1cde80d0c8ef5e 100644 (file)
--- a/arch/ppc/platforms/4xx/Kconfig
+++ b/arch/ppc/platforms/4xx/Kconfig
@@ -3,6 +3,11 @@ config 4xx
         depends on 40x || 44x
         default y
  
+config WANT_EARLY_SERIAL
+       bool
+       select SERIAL_8250
+       default n
+
  menu "IBM 4xx options"
         depends on 4xx
  
@@ -18,6 +23,7 @@ config ASH
  
  config BUBINGA
         bool "Bubinga"
+       select WANT_EARLY_SERIAL
         help
           This option enables support for the IBM 405EP evaluation board.
  
@@ -68,18 +74,27 @@ choice
         depends on 44x
         default EBONY
  
+config BAMBOO
+       bool "Bamboo"
+       select WANT_EARLY_SERIAL
+       help
+         This option enables support for the IBM PPC440EP evaluation board.
+
  config EBONY
         bool "Ebony"
+       select WANT_EARLY_SERIAL
         help
           This option enables support for the IBM PPC440GP evaluation board.
  
  config LUAN
         bool "Luan"
+       select WANT_EARLY_SERIAL
         help
           This option enables support for the IBM PPC440SP evaluation board.
  
  config OCOTEA
         bool "Ocotea"
+       select WANT_EARLY_SERIAL
         help
           This option enables support for the IBM PPC440GX evaluation board.
  
@@ -98,6 +113,12 @@ config NP405H
         depends on ASH
         default y
  
+config 440EP
+       bool
+       depends on BAMBOO
+       select PPC_FPU
+       default y
+
  config 440GP
         bool
         depends on EBONY
@@ -115,7 +136,7 @@ config 440SP
  
  config 440
         bool
-       depends on 440GP || 440SP
+       depends on 440GP || 440SP || 440EP
         default y
  
  config 440A
@@ -123,6 +144,11 @@ config 440A
         depends on 440GX
         default y
  
+config IBM440EP_ERR42
+       bool
+       depends on 440EP
+       default y
+
  # All 405-based cores up until the 405GPR and 405EP have this errata.
  config IBM405_ERR77
         bool
@@ -142,7 +168,7 @@ config BOOKE
  
  config IBM_OCP
         bool
-       depends on ASH || BUBINGA || CPCI405 || EBONY || EP405 || LUAN || OCOTEA || REDWOOD_5 || REDWOOD_6 || SYCAMORE || WALNUT
+       depends on ASH || BAMBOO || BUBINGA || CPCI405 || EBONY || EP405 || LUAN || OCOTEA || REDWOOD_5 || REDWOOD_6 || SYCAMORE || WALNUT
         default y
  
  config XILINX_OCP
@@ -214,10 +240,6 @@ config PPC_GEN550
         depends on 4xx
         default y
  
-config PM
-       bool "Power Management support (EXPERIMENTAL)"
-       depends on 4xx && EXPERIMENTAL
-
  choice
         prompt "TTYS0 device and default console"
         depends on 40x
diff --git a/arch/ppc/platforms/4xx/Makefile b/arch/ppc/platforms/4xx/Makefile

index ea470c6adbb67f7c07f11c99273fcfef0a1e2799..844c3b5066e8b103f487b1cd14e5f8e0da554721 100644 (file)
--- a/arch/ppc/platforms/4xx/Makefile
+++ b/arch/ppc/platforms/4xx/Makefile
@@ -2,6 +2,7 @@
  # Makefile for the PowerPC 4xx linux kernel.
  
  obj-$(CONFIG_ASH)              += ash.o
+obj-$(CONFIG_BAMBOO)           += bamboo.o
  obj-$(CONFIG_CPCI405)          += cpci405.o
  obj-$(CONFIG_EBONY)            += ebony.o
  obj-$(CONFIG_EP405)            += ep405.o
@@ -19,6 +20,7 @@ obj-$(CONFIG_405GP)           += ibm405gp.o
  obj-$(CONFIG_REDWOOD_5)                += ibmstb4.o
  obj-$(CONFIG_NP405H)           += ibmnp405h.o
  obj-$(CONFIG_REDWOOD_6)                += ibmstbx25.o
+obj-$(CONFIG_440EP)            += ibm440ep.o
  obj-$(CONFIG_440GP)            += ibm440gp.o
  obj-$(CONFIG_440GX)            += ibm440gx.o
  obj-$(CONFIG_440SP)            += ibm440sp.o
diff --git a/arch/ppc/platforms/4xx/bamboo.c b/arch/ppc/platforms/4xx/bamboo.c

new file mode 100644 (file)

index 0000000..f116787
--- /dev/null
+++ b/arch/ppc/platforms/4xx/bamboo.c
@@ -0,0 +1,427 @@
+/*
+ * arch/ppc/platforms/4xx/bamboo.c
+ *
+ * Bamboo board specific routines
+ *
+ * Wade Farnsworth <wfarnsworth@mvista.com>
+ * Copyright 2004 MontaVista Software Inc.
+ *
+ * This program is free software; you can redistribute  it and/or modify it
+ * under  the terms of  the GNU General  Public License as published by the
+ * Free Software Foundation;  either version 2 of the  License, or (at your
+ * option) any later version.
+ */
+
+#include <linux/config.h>
+#include <linux/stddef.h>
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/errno.h>
+#include <linux/reboot.h>
+#include <linux/pci.h>
+#include <linux/kdev_t.h>
+#include <linux/types.h>
+#include <linux/major.h>
+#include <linux/blkdev.h>
+#include <linux/console.h>
+#include <linux/delay.h>
+#include <linux/ide.h>
+#include <linux/initrd.h>
+#include <linux/irq.h>
+#include <linux/seq_file.h>
+#include <linux/root_dev.h>
+#include <linux/tty.h>
+#include <linux/serial.h>
+#include <linux/serial_core.h>
+#include <linux/ethtool.h>
+
+#include <asm/system.h>
+#include <asm/pgtable.h>
+#include <asm/page.h>
+#include <asm/dma.h>
+#include <asm/io.h>
+#include <asm/machdep.h>
+#include <asm/ocp.h>
+#include <asm/pci-bridge.h>
+#include <asm/time.h>
+#include <asm/todc.h>
+#include <asm/bootinfo.h>
+#include <asm/ppc4xx_pic.h>
+#include <asm/ppcboot.h>
+
+#include <syslib/gen550.h>
+#include <syslib/ibm440gx_common.h>
+
+/*
+ * This is a horrible kludge, we eventually need to abstract this
+ * generic PHY stuff, so the  standard phy mode defines can be
+ * easily used from arch code.
+ */
+#include "../../../../drivers/net/ibm_emac/ibm_emac_phy.h"
+
+bd_t __res;
+
+static struct ibm44x_clocks clocks __initdata;
+
+/*
+ * Bamboo external IRQ triggering/polarity settings
+ */
+unsigned char ppc4xx_uic_ext_irq_cfg[] __initdata = {
+       (IRQ_SENSE_LEVEL | IRQ_POLARITY_NEGATIVE), /* IRQ0: Ethernet transceiver */
+       (IRQ_SENSE_LEVEL | IRQ_POLARITY_POSITIVE), /* IRQ1: Expansion connector */
+       (IRQ_SENSE_LEVEL | IRQ_POLARITY_NEGATIVE), /* IRQ2: PCI slot 0 */
+       (IRQ_SENSE_LEVEL | IRQ_POLARITY_NEGATIVE), /* IRQ3: PCI slot 1 */
+       (IRQ_SENSE_LEVEL | IRQ_POLARITY_NEGATIVE), /* IRQ4: PCI slot 2 */
+       (IRQ_SENSE_LEVEL | IRQ_POLARITY_NEGATIVE), /* IRQ5: PCI slot 3 */
+       (IRQ_SENSE_EDGE  | IRQ_POLARITY_NEGATIVE), /* IRQ6: SMI pushbutton */
+       (IRQ_SENSE_LEVEL | IRQ_POLARITY_NEGATIVE), /* IRQ7: EXT */
+       (IRQ_SENSE_LEVEL | IRQ_POLARITY_NEGATIVE), /* IRQ8: EXT */
+       (IRQ_SENSE_LEVEL | IRQ_POLARITY_NEGATIVE), /* IRQ9: EXT */
+};
+
+static void __init
+bamboo_calibrate_decr(void)
+{
+       unsigned int freq;
+
+       if (mfspr(SPRN_CCR1) & CCR1_TCS)
+               freq = BAMBOO_TMRCLK;
+       else
+               freq = clocks.cpu;
+
+       ibm44x_calibrate_decr(freq);
+
+}
+
+static int
+bamboo_show_cpuinfo(struct seq_file *m)
+{
+       seq_printf(m, "vendor\t\t: IBM\n");
+       seq_printf(m, "machine\t\t: PPC440EP EVB (Bamboo)\n");
+
+       return 0;
+}
+
+static inline int
+bamboo_map_irq(struct pci_dev *dev, unsigned char idsel, unsigned char pin)
+{
+       static char pci_irq_table[][4] =
+       /*
+        *      PCI IDSEL/INTPIN->INTLINE
+        *         A   B   C   D
+        */
+       {
+               { 28, 28, 28, 28 },     /* IDSEL 1 - PCI Slot 0 */
+               { 27, 27, 27, 27 },     /* IDSEL 2 - PCI Slot 1 */
+               { 26, 26, 26, 26 },     /* IDSEL 3 - PCI Slot 2 */
+               { 25, 25, 25, 25 },     /* IDSEL 4 - PCI Slot 3 */
+       };
+
+       const long min_idsel = 1, max_idsel = 4, irqs_per_slot = 4;
+       return PCI_IRQ_TABLE_LOOKUP;
+}
+
+static void __init bamboo_set_emacdata(void)
+{
+       unsigned char * selection1_base;
+       struct ocp_def *def;
+       struct ocp_func_emac_data *emacdata;
+       u8 selection1_val;
+       int mode;
+
+       selection1_base = ioremap64(BAMBOO_FPGA_SELECTION1_REG_ADDR, 16);
+       selection1_val = readb(selection1_base);
+       iounmap((void *) selection1_base);
+       if (BAMBOO_SEL_MII(selection1_val))
+               mode = PHY_MODE_MII;
+       else if (BAMBOO_SEL_RMII(selection1_val))
+               mode = PHY_MODE_RMII;
+       else
+               mode = PHY_MODE_SMII;
+
+       /* Set mac_addr and phy mode for each EMAC */
+
+       def = ocp_get_one_device(OCP_VENDOR_IBM, OCP_FUNC_EMAC, 0);
+       emacdata = def->additions;
+       memcpy(emacdata->mac_addr, __res.bi_enetaddr, 6);
+       emacdata->phy_mode = mode;
+
+       def = ocp_get_one_device(OCP_VENDOR_IBM, OCP_FUNC_EMAC, 1);
+       emacdata = def->additions;
+       memcpy(emacdata->mac_addr, __res.bi_enet1addr, 6);
+       emacdata->phy_mode = mode;
+}
+
+static int
+bamboo_exclude_device(unsigned char bus, unsigned char devfn)
+{
+       return (bus == 0 && devfn == 0);
+}
+
+#define PCI_READW(offset) \
+        (readw((void *)((u32)pci_reg_base+offset)))
+
+#define PCI_WRITEW(value, offset) \
+       (writew(value, (void *)((u32)pci_reg_base+offset)))
+
+#define PCI_WRITEL(value, offset) \
+       (writel(value, (void *)((u32)pci_reg_base+offset)))
+
+static void __init
+bamboo_setup_pci(void)
+{
+       void *pci_reg_base;
+       unsigned long memory_size;
+       memory_size = ppc_md.find_end_of_memory();
+
+       pci_reg_base = ioremap64(BAMBOO_PCIL0_BASE, BAMBOO_PCIL0_SIZE);
+
+       /* Enable PCI I/O, Mem, and Busmaster cycles */
+       PCI_WRITEW(PCI_READW(PCI_COMMAND) |
+                  PCI_COMMAND_MEMORY |
+                  PCI_COMMAND_MASTER, PCI_COMMAND);
+
+       /* Disable region first */
+       PCI_WRITEL(0, BAMBOO_PCIL0_PMM0MA);
+
+       /* PLB starting addr: 0x00000000A0000000 */
+       PCI_WRITEL(BAMBOO_PCI_PHY_MEM_BASE, BAMBOO_PCIL0_PMM0LA);
+
+       /* PCI start addr, 0xA0000000 (PCI Address) */
+       PCI_WRITEL(BAMBOO_PCI_MEM_BASE, BAMBOO_PCIL0_PMM0PCILA);
+       PCI_WRITEL(0, BAMBOO_PCIL0_PMM0PCIHA);
+
+       /* Enable no pre-fetch, enable region */
+       PCI_WRITEL(((0xffffffff -
+                    (BAMBOO_PCI_UPPER_MEM - BAMBOO_PCI_MEM_BASE)) | 0x01),
+                     BAMBOO_PCIL0_PMM0MA);
+
+       /* Disable region one */
+       PCI_WRITEL(0, BAMBOO_PCIL0_PMM1MA);
+       PCI_WRITEL(0, BAMBOO_PCIL0_PMM1LA);
+       PCI_WRITEL(0, BAMBOO_PCIL0_PMM1PCILA);
+       PCI_WRITEL(0, BAMBOO_PCIL0_PMM1PCIHA);
+       PCI_WRITEL(0, BAMBOO_PCIL0_PMM1MA);
+
+       /* Disable region two */
+       PCI_WRITEL(0, BAMBOO_PCIL0_PMM2MA);
+       PCI_WRITEL(0, BAMBOO_PCIL0_PMM2LA);
+       PCI_WRITEL(0, BAMBOO_PCIL0_PMM2PCILA);
+       PCI_WRITEL(0, BAMBOO_PCIL0_PMM2PCIHA);
+       PCI_WRITEL(0, BAMBOO_PCIL0_PMM2MA);
+
+       /* Now configure the PCI->PLB windows, we only use PTM1
+        *
+        * For Inbound flow, set the window size to all available memory
+        * This is required because if size is smaller,
+        * then Eth/PCI DD would fail as PCI card not able to access
+        * the memory allocated by DD.
+        */
+
+       PCI_WRITEL(0, BAMBOO_PCIL0_PTM1MS);     /* disabled region 1 */
+       PCI_WRITEL(0, BAMBOO_PCIL0_PTM1LA);     /* begin of address map */
+
+       memory_size = 1 << fls(memory_size - 1);
+
+       /* Size low + Enabled */
+       PCI_WRITEL((0xffffffff - (memory_size - 1)) | 0x1, BAMBOO_PCIL0_PTM1MS);
+
+       eieio();
+       iounmap(pci_reg_base);
+}
+
+static void __init
+bamboo_setup_hose(void)
+{
+       unsigned int bar_response, bar;
+       struct pci_controller *hose;
+
+       bamboo_setup_pci();
+
+       hose = pcibios_alloc_controller();
+
+       if (!hose)
+               return;
+
+       hose->first_busno = 0;
+       hose->last_busno = 0xff;
+
+       hose->pci_mem_offset = BAMBOO_PCI_MEM_OFFSET;
+
+       pci_init_resource(&hose->io_resource,
+                       BAMBOO_PCI_LOWER_IO,
+                       BAMBOO_PCI_UPPER_IO,
+                       IORESOURCE_IO,
+                       "PCI host bridge");
+
+       pci_init_resource(&hose->mem_resources[0],
+                       BAMBOO_PCI_LOWER_MEM,
+                       BAMBOO_PCI_UPPER_MEM,
+                       IORESOURCE_MEM,
+                       "PCI host bridge");
+
+       ppc_md.pci_exclude_device = bamboo_exclude_device;
+
+       hose->io_space.start = BAMBOO_PCI_LOWER_IO;
+       hose->io_space.end = BAMBOO_PCI_UPPER_IO;
+       hose->mem_space.start = BAMBOO_PCI_LOWER_MEM;
+       hose->mem_space.end = BAMBOO_PCI_UPPER_MEM;
+       isa_io_base =
+               (unsigned long)ioremap64(BAMBOO_PCI_IO_BASE, BAMBOO_PCI_IO_SIZE);
+       hose->io_base_virt = (void *)isa_io_base;
+
+       setup_indirect_pci(hose,
+                       BAMBOO_PCI_CFGA_PLB32,
+                       BAMBOO_PCI_CFGD_PLB32);
+       hose->set_cfg_type = 1;
+
+       /* Zero config bars */
+       for (bar = PCI_BASE_ADDRESS_1; bar <= PCI_BASE_ADDRESS_2; bar += 4) {
+               early_write_config_dword(hose, hose->first_busno,
+                                        PCI_FUNC(hose->first_busno), bar,
+                                        0x00000000);
+               early_read_config_dword(hose, hose->first_busno,
+                                       PCI_FUNC(hose->first_busno), bar,
+                                       &bar_response);
+       }
+
+       hose->last_busno = pciauto_bus_scan(hose, hose->first_busno);
+
+       ppc_md.pci_swizzle = common_swizzle;
+       ppc_md.pci_map_irq = bamboo_map_irq;
+}
+
+TODC_ALLOC();
+
+static void __init
+bamboo_early_serial_map(void)
+{
+       struct uart_port port;
+
+       /* Setup ioremapped serial port access */
+       memset(&port, 0, sizeof(port));
+       port.membase = ioremap64(PPC440EP_UART0_ADDR, 8);
+       port.irq = 0;
+       port.uartclk = clocks.uart0;
+       port.regshift = 0;
+       port.iotype = SERIAL_IO_MEM;
+       port.flags = ASYNC_BOOT_AUTOCONF | ASYNC_SKIP_TEST;
+       port.line = 0;
+
+       if (early_serial_setup(&port) != 0) {
+               printk("Early serial init of port 0 failed\n");
+       }
+
+#if defined(CONFIG_SERIAL_TEXT_DEBUG) || defined(CONFIG_KGDB)
+       /* Configure debug serial access */
+       gen550_init(0, &port);
+#endif
+
+       port.membase = ioremap64(PPC440EP_UART1_ADDR, 8);
+       port.irq = 1;
+       port.uartclk = clocks.uart1;
+       port.line = 1;
+
+       if (early_serial_setup(&port) != 0) {
+               printk("Early serial init of port 1 failed\n");
+       }
+
+#if defined(CONFIG_SERIAL_TEXT_DEBUG) || defined(CONFIG_KGDB)
+       /* Configure debug serial access */
+       gen550_init(1, &port);
+#endif
+
+       port.membase = ioremap64(PPC440EP_UART2_ADDR, 8);
+       port.irq = 3;
+       port.uartclk = clocks.uart2;
+       port.line = 2;
+
+       if (early_serial_setup(&port) != 0) {
+               printk("Early serial init of port 2 failed\n");
+       }
+
+#if defined(CONFIG_SERIAL_TEXT_DEBUG) || defined(CONFIG_KGDB)
+       /* Configure debug serial access */
+       gen550_init(2, &port);
+#endif
+
+       port.membase = ioremap64(PPC440EP_UART3_ADDR, 8);
+       port.irq = 4;
+       port.uartclk = clocks.uart3;
+       port.line = 3;
+
+       if (early_serial_setup(&port) != 0) {
+               printk("Early serial init of port 3 failed\n");
+       }
+}
+
+static void __init
+bamboo_setup_arch(void)
+{
+
+       bamboo_set_emacdata();
+
+       ibm440gx_get_clocks(&clocks, 33333333, 6 * 1843200);
+       ocp_sys_info.opb_bus_freq = clocks.opb;
+
+       /* Setup TODC access */
+       TODC_INIT(TODC_TYPE_DS1743,
+                       0,
+                       0,
+                       ioremap64(BAMBOO_RTC_ADDR, BAMBOO_RTC_SIZE),
+                       8);
+
+       /* init to some ~sane value until calibrate_delay() runs */
+        loops_per_jiffy = 50000000/HZ;
+
+       /* Setup PCI host bridge */
+       bamboo_setup_hose();
+
+#ifdef CONFIG_BLK_DEV_INITRD
+       if (initrd_start)
+               ROOT_DEV = Root_RAM0;
+       else
+#endif
+#ifdef CONFIG_ROOT_NFS
+               ROOT_DEV = Root_NFS;
+#else
+               ROOT_DEV = Root_HDA1;
+#endif
+
+       bamboo_early_serial_map();
+
+       /* Identify the system */
+       printk("IBM Bamboo port (MontaVista Software, Inc. (source@mvista.com))\n");
+}
+
+void __init platform_init(unsigned long r3, unsigned long r4,
+               unsigned long r5, unsigned long r6, unsigned long r7)
+{
+       parse_bootinfo(find_bootinfo());
+
+       /*
+        * If we were passed in a board information, copy it into the
+        * residual data area.
+        */
+       if (r3)
+               __res = *(bd_t *)(r3 + KERNELBASE);
+
+
+       ibm44x_platform_init();
+
+       ppc_md.setup_arch = bamboo_setup_arch;
+       ppc_md.show_cpuinfo = bamboo_show_cpuinfo;
+       ppc_md.get_irq = NULL;          /* Set in ppc4xx_pic_init() */
+
+       ppc_md.calibrate_decr = bamboo_calibrate_decr;
+       ppc_md.time_init = todc_time_init;
+       ppc_md.set_rtc_time = todc_set_rtc_time;
+       ppc_md.get_rtc_time = todc_get_rtc_time;
+
+       ppc_md.nvram_read_val = todc_direct_read_val;
+       ppc_md.nvram_write_val = todc_direct_write_val;
+#ifdef CONFIG_KGDB
+       ppc_md.early_serial_map = bamboo_early_serial_map;
+#endif
+}
+
diff --git a/arch/ppc/platforms/4xx/bamboo.h b/arch/ppc/platforms/4xx/bamboo.h

new file mode 100644 (file)

index 0000000..63d7145
--- /dev/null
+++ b/arch/ppc/platforms/4xx/bamboo.h
@@ -0,0 +1,136 @@
+/*
+ * arch/ppc/platforms/bamboo.h
+ *
+ * Bamboo board definitions
+ *
+ * Wade Farnsworth <wfarnsworth@mvista.com>
+ *
+ * Copyright 2004 MontaVista Software Inc.
+ *
+ * This program is free software; you can redistribute  it and/or modify it
+ * under  the terms of  the GNU General  Public License as published by the
+ * Free Software Foundation;  either version 2 of the  License, or (at your
+ * option) any later version.
+ */
+
+#ifdef __KERNEL__
+#ifndef __ASM_BAMBOO_H__
+#define __ASM_BAMBOO_H__
+
+#include <linux/config.h>
+#include <platforms/4xx/ibm440ep.h>
+
+/* F/W TLB mapping used in bootloader glue to reset EMAC */
+#define PPC44x_EMAC0_MR0               0x0EF600E00
+
+/* Location of MAC addresses in PIBS image */
+#define PIBS_FLASH_BASE                        0xfff00000
+#define PIBS_MAC_BASE                  (PIBS_FLASH_BASE+0xc0400)
+#define PIBS_MAC_SIZE                  0x200
+#define PIBS_MAC_OFFSET                        0x100
+
+/* Default clock rate */
+#define BAMBOO_TMRCLK                  25000000
+
+/* RTC/NVRAM location */
+#define BAMBOO_RTC_ADDR                        0x080000000ULL
+#define BAMBOO_RTC_SIZE                        0x2000
+
+/* FPGA Registers */
+#define BAMBOO_FPGA_ADDR               0x080002000ULL
+
+#define BAMBOO_FPGA_CONFIG2_REG_ADDR   (BAMBOO_FPGA_ADDR + 0x1)
+#define BAMBOO_FULL_DUPLEX_EN(x)       (x & 0x08)
+#define BAMBOO_FORCE_100Mbps(x)                (x & 0x04)
+#define BAMBOO_AUTONEGOTIATE(x)                (x & 0x02)
+
+#define BAMBOO_FPGA_SETTING_REG_ADDR   (BAMBOO_FPGA_ADDR + 0x3)
+#define BAMBOO_BOOT_SMALL_FLASH(x)     (!(x & 0x80))
+#define BAMBOO_LARGE_FLASH_EN(x)       (!(x & 0x40))
+#define BAMBOO_BOOT_NAND_FLASH(x)      (!(x & 0x20))
+
+#define BAMBOO_FPGA_SELECTION1_REG_ADDR (BAMBOO_FPGA_ADDR + 0x4)
+#define BAMBOO_SEL_MII(x)              (x & 0x80)
+#define BAMBOO_SEL_RMII(x)             (x & 0x40)
+#define BAMBOO_SEL_SMII(x)             (x & 0x20)
+
+/* Flash */
+#define BAMBOO_SMALL_FLASH_LOW         0x087f00000ULL
+#define BAMBOO_SMALL_FLASH_HIGH                0x0fff00000ULL
+#define BAMBOO_SMALL_FLASH_SIZE                0x100000
+#define BAMBOO_LARGE_FLASH_LOW         0x087800000ULL
+#define BAMBOO_LARGE_FLASH_HIGH1       0x0ff800000ULL
+#define BAMBOO_LARGE_FLASH_HIGH2       0x0ffc00000ULL
+#define BAMBOO_LARGE_FLASH_SIZE                0x400000
+#define BAMBOO_SRAM_LOW                        0x087f00000ULL
+#define BAMBOO_SRAM_HIGH1              0x0fff00000ULL
+#define BAMBOO_SRAM_HIGH2              0x0ff800000ULL
+#define BAMBOO_SRAM_SIZE               0x100000
+#define BAMBOO_NAND_FLASH_REG_ADDR     0x090000000ULL
+#define BAMBOO_NAND_FLASH_REG_SIZE     0x2000
+
+/*
+ * Serial port defines
+ */
+#define RS_TABLE_SIZE                  4
+
+#define UART0_IO_BASE                  0xEF600300
+#define UART1_IO_BASE                  0xEF600400
+#define UART2_IO_BASE                  0xEF600500
+#define UART3_IO_BASE                  0xEF600600
+
+#define BASE_BAUD                      33177600/3/16
+#define UART0_INT                      0
+#define UART1_INT                      1
+#define UART2_INT                      3
+#define UART3_INT                      4
+
+#define STD_UART_OP(num)                                       \
+       { 0, BASE_BAUD, 0, UART##num##_INT,                     \
+               (ASYNC_BOOT_AUTOCONF | ASYNC_SKIP_TEST),        \
+               iomem_base: UART##num##_IO_BASE,                \
+               io_type: SERIAL_IO_MEM},
+
+#define SERIAL_PORT_DFNS       \
+       STD_UART_OP(0)          \
+       STD_UART_OP(1)          \
+       STD_UART_OP(2)          \
+       STD_UART_OP(3)
+
+/* PCI support */
+#define BAMBOO_PCI_CFGA_PLB32          0xeec00000
+#define BAMBOO_PCI_CFGD_PLB32          0xeec00004
+
+#define BAMBOO_PCI_IO_BASE             0x00000000e8000000ULL
+#define BAMBOO_PCI_IO_SIZE             0x00010000
+#define BAMBOO_PCI_MEM_OFFSET          0x00000000
+#define BAMBOO_PCI_PHY_MEM_BASE                0x00000000a0000000ULL
+
+#define BAMBOO_PCI_LOWER_IO            0x00000000
+#define BAMBOO_PCI_UPPER_IO            0x0000ffff
+#define BAMBOO_PCI_LOWER_MEM           0xa0000000
+#define BAMBOO_PCI_UPPER_MEM           0xafffffff
+#define BAMBOO_PCI_MEM_BASE            0xa0000000
+
+#define BAMBOO_PCIL0_BASE              0x00000000ef400000ULL
+#define BAMBOO_PCIL0_SIZE              0x40
+
+#define BAMBOO_PCIL0_PMM0LA            0x000
+#define BAMBOO_PCIL0_PMM0MA            0x004
+#define BAMBOO_PCIL0_PMM0PCILA         0x008
+#define BAMBOO_PCIL0_PMM0PCIHA         0x00C
+#define BAMBOO_PCIL0_PMM1LA            0x010
+#define BAMBOO_PCIL0_PMM1MA            0x014
+#define BAMBOO_PCIL0_PMM1PCILA         0x018
+#define BAMBOO_PCIL0_PMM1PCIHA         0x01C
+#define BAMBOO_PCIL0_PMM2LA            0x020
+#define BAMBOO_PCIL0_PMM2MA            0x024
+#define BAMBOO_PCIL0_PMM2PCILA         0x028
+#define BAMBOO_PCIL0_PMM2PCIHA         0x02C
+#define BAMBOO_PCIL0_PTM1MS            0x030
+#define BAMBOO_PCIL0_PTM1LA            0x034
+#define BAMBOO_PCIL0_PTM2MS            0x038
+#define BAMBOO_PCIL0_PTM2LA            0x03C
+
+#endif                          /* __ASM_BAMBOO_H__ */
+#endif                          /* __KERNEL__ */
diff --git a/arch/ppc/platforms/4xx/ibm440ep.c b/arch/ppc/platforms/4xx/ibm440ep.c

new file mode 100644 (file)

index 0000000..284da01
--- /dev/null
+++ b/arch/ppc/platforms/4xx/ibm440ep.c
@@ -0,0 +1,220 @@
+/*
+ * arch/ppc/platforms/4xx/ibm440ep.c
+ *
+ * PPC440EP I/O descriptions
+ *
+ * Wade Farnsworth <wfarnsworth@mvista.com>
+ * Copyright 2004 MontaVista Software Inc.
+ *
+ * This program is free software; you can redistribute  it and/or modify it
+ * under  the terms of  the GNU General  Public License as published by the
+ * Free Software Foundation;  either version 2 of the  License, or (at your
+ * option) any later version.
+ *
+ */
+#include <linux/init.h>
+#include <linux/module.h>
+#include <platforms/4xx/ibm440ep.h>
+#include <asm/ocp.h>
+#include <asm/ppc4xx_pic.h>
+
+static struct ocp_func_emac_data ibm440ep_emac0_def = {
+       .rgmii_idx      = -1,           /* No RGMII */
+       .rgmii_mux      = -1,           /* No RGMII */
+       .zmii_idx       = 0,            /* ZMII device index */
+       .zmii_mux       = 0,            /* ZMII input of this EMAC */
+       .mal_idx        = 0,            /* MAL device index */
+       .mal_rx_chan    = 0,            /* MAL rx channel number */
+       .mal_tx_chan    = 0,            /* MAL tx channel number */
+       .wol_irq        = 61,           /* WOL interrupt number */
+       .mdio_idx       = -1,           /* No shared MDIO */
+       .tah_idx        = -1,           /* No TAH */
+};
+
+static struct ocp_func_emac_data ibm440ep_emac1_def = {
+       .rgmii_idx      = -1,           /* No RGMII */
+       .rgmii_mux      = -1,           /* No RGMII */
+       .zmii_idx       = 0,            /* ZMII device index */
+       .zmii_mux       = 1,            /* ZMII input of this EMAC */
+       .mal_idx        = 0,            /* MAL device index */
+       .mal_rx_chan    = 1,            /* MAL rx channel number */
+       .mal_tx_chan    = 2,            /* MAL tx channel number */
+       .wol_irq        = 63,           /* WOL interrupt number */
+       .mdio_idx       = -1,           /* No shared MDIO */
+       .tah_idx        = -1,           /* No TAH */
+};
+OCP_SYSFS_EMAC_DATA()
+
+static struct ocp_func_mal_data ibm440ep_mal0_def = {
+       .num_tx_chans   = 4,            /* Number of TX channels */
+       .num_rx_chans   = 2,            /* Number of RX channels */
+       .txeob_irq      = 10,           /* TX End Of Buffer IRQ  */
+       .rxeob_irq      = 11,           /* RX End Of Buffer IRQ  */
+       .txde_irq       = 33,           /* TX Descriptor Error IRQ */
+       .rxde_irq       = 34,           /* RX Descriptor Error IRQ */
+       .serr_irq       = 32,           /* MAL System Error IRQ    */
+};
+OCP_SYSFS_MAL_DATA()
+
+static struct ocp_func_iic_data ibm440ep_iic0_def = {
+       .fast_mode      = 0,            /* Use standad mode (100Khz) */
+};
+
+static struct ocp_func_iic_data ibm440ep_iic1_def = {
+       .fast_mode      = 0,            /* Use standad mode (100Khz) */
+};
+OCP_SYSFS_IIC_DATA()
+
+struct ocp_def core_ocp[] = {
+       { .vendor       = OCP_VENDOR_IBM,
+         .function     = OCP_FUNC_OPB,
+         .index        = 0,
+         .paddr        = 0x0EF600000ULL,
+         .irq          = OCP_IRQ_NA,
+         .pm           = OCP_CPM_NA,
+       },
+       { .vendor       = OCP_VENDOR_IBM,
+         .function     = OCP_FUNC_16550,
+         .index        = 0,
+         .paddr        = PPC440EP_UART0_ADDR,
+         .irq          = UART0_INT,
+         .pm           = IBM_CPM_UART0,
+       },
+       { .vendor       = OCP_VENDOR_IBM,
+         .function     = OCP_FUNC_16550,
+         .index        = 1,
+         .paddr        = PPC440EP_UART1_ADDR,
+         .irq          = UART1_INT,
+         .pm           = IBM_CPM_UART1,
+       },
+       { .vendor       = OCP_VENDOR_IBM,
+         .function     = OCP_FUNC_16550,
+         .index        = 2,
+         .paddr        = PPC440EP_UART2_ADDR,
+         .irq          = UART2_INT,
+         .pm           = IBM_CPM_UART2,
+       },
+       { .vendor       = OCP_VENDOR_IBM,
+         .function     = OCP_FUNC_16550,
+         .index        = 3,
+         .paddr        = PPC440EP_UART3_ADDR,
+         .irq          = UART3_INT,
+         .pm           = IBM_CPM_UART3,
+       },
+       { .vendor       = OCP_VENDOR_IBM,
+         .function     = OCP_FUNC_IIC,
+         .index        = 0,
+         .paddr        = 0x0EF600700ULL,
+         .irq          = 2,
+         .pm           = IBM_CPM_IIC0,
+         .additions    = &ibm440ep_iic0_def,
+         .show         = &ocp_show_iic_data
+       },
+       { .vendor       = OCP_VENDOR_IBM,
+         .function     = OCP_FUNC_IIC,
+         .index        = 1,
+         .paddr        = 0x0EF600800ULL,
+         .irq          = 7,
+         .pm           = IBM_CPM_IIC1,
+         .additions    = &ibm440ep_iic1_def,
+         .show         = &ocp_show_iic_data
+       },
+       { .vendor       = OCP_VENDOR_IBM,
+         .function     = OCP_FUNC_GPIO,
+         .index        = 0,
+         .paddr        = 0x0EF600B00ULL,
+         .irq          = OCP_IRQ_NA,
+         .pm           = IBM_CPM_GPIO0,
+       },
+       { .vendor       = OCP_VENDOR_IBM,
+         .function     = OCP_FUNC_GPIO,
+         .index        = 1,
+         .paddr        = 0x0EF600C00ULL,
+         .irq          = OCP_IRQ_NA,
+         .pm           = OCP_CPM_NA,
+       },
+       { .vendor       = OCP_VENDOR_IBM,
+         .function     = OCP_FUNC_MAL,
+         .paddr        = OCP_PADDR_NA,
+         .irq          = OCP_IRQ_NA,
+         .pm           = OCP_CPM_NA,
+         .additions    = &ibm440ep_mal0_def,
+         .show         = &ocp_show_mal_data,
+       },
+       { .vendor       = OCP_VENDOR_IBM,
+         .function     = OCP_FUNC_EMAC,
+         .index        = 0,
+         .paddr        = 0x0EF600E00ULL,
+         .irq          = 60,
+         .pm           = OCP_CPM_NA,
+         .additions    = &ibm440ep_emac0_def,
+         .show         = &ocp_show_emac_data,
+       },
+       { .vendor       = OCP_VENDOR_IBM,
+         .function     = OCP_FUNC_EMAC,
+         .index        = 1,
+         .paddr        = 0x0EF600F00ULL,
+         .irq          = 62,
+         .pm           = OCP_CPM_NA,
+         .additions    = &ibm440ep_emac1_def,
+         .show         = &ocp_show_emac_data,
+       },
+       { .vendor       = OCP_VENDOR_IBM,
+         .function     = OCP_FUNC_ZMII,
+         .paddr        = 0x0EF600D00ULL,
+         .irq          = OCP_IRQ_NA,
+         .pm           = OCP_CPM_NA,
+       },
+       { .vendor       = OCP_VENDOR_INVALID
+       }
+};
+
+/* Polarity and triggering settings for internal interrupt sources */
+struct ppc4xx_uic_settings ppc4xx_core_uic_cfg[] __initdata = {
+       { .polarity     = 0xffbffe03,
+         .triggering   = 0xfffffe00,
+         .ext_irq_mask = 0x000001fc,   /* IRQ0 - IRQ6 */
+       },
+       { .polarity     = 0xffffc6ef,
+         .triggering   = 0xffffc7ff,
+         .ext_irq_mask = 0x00003800,   /* IRQ7 - IRQ9 */
+       },
+};
+
+static struct resource usb_gadget_resources[] = {
+       [0] = {
+               .start  = 0x050000100ULL,
+               .end    = 0x05000017FULL,
+               .flags  = IORESOURCE_MEM,
+       },
+       [1] = {
+               .start  = 55,
+               .end    = 55,
+               .flags  = IORESOURCE_IRQ,
+       },
+};
+
+static u64 dma_mask = 0xffffffffULL;
+
+static struct platform_device usb_gadget_device = {
+       .name           = "musbhsfc",
+       .id             = 0,
+       .num_resources  = ARRAY_SIZE(usb_gadget_resources),
+       .resource       = usb_gadget_resources,
+       .dev            = {
+               .dma_mask = &dma_mask,
+               .coherent_dma_mask = 0xffffffffULL,
+       }
+};
+
+static struct platform_device *ibm440ep_devs[] __initdata = {
+       &usb_gadget_device,
+};
+
+static int __init
+ibm440ep_platform_add_devices(void)
+{
+       return platform_add_devices(ibm440ep_devs, ARRAY_SIZE(ibm440ep_devs));
+}
+arch_initcall(ibm440ep_platform_add_devices);
+
diff --git a/arch/ppc/platforms/4xx/ibm440ep.h b/arch/ppc/platforms/4xx/ibm440ep.h

new file mode 100644 (file)

index 0000000..97c80b8
--- /dev/null
+++ b/arch/ppc/platforms/4xx/ibm440ep.h
@@ -0,0 +1,76 @@
+/*
+ * arch/ppc/platforms/4xx/ibm440ep.h
+ *
+ * PPC440EP definitions
+ *
+ * Wade Farnsworth <wfarnsworth@mvista.com>
+ *
+ * Copyright 2002 Roland Dreier
+ * Copyright 2004 MontaVista Software, Inc.
+ *
+ * This program is free software; you can redistribute  it and/or modify it
+ * under  the terms of  the GNU General  Public License as published by the
+ * Free Software Foundation;  either version 2 of the  License, or (at your
+ * option) any later version.
+ *
+ */
+
+#ifdef __KERNEL__
+#ifndef __PPC_PLATFORMS_IBM440EP_H
+#define __PPC_PLATFORMS_IBM440EP_H
+
+#include <linux/config.h>
+#include <asm/ibm44x.h>
+
+/* UART */
+#define PPC440EP_UART0_ADDR            0x0EF600300
+#define PPC440EP_UART1_ADDR            0x0EF600400
+#define PPC440EP_UART2_ADDR            0x0EF600500
+#define PPC440EP_UART3_ADDR            0x0EF600600
+#define UART0_INT                      0
+#define UART1_INT                      1
+#define UART2_INT                      3
+#define UART3_INT                      4
+
+/* Clock and Power Management */
+#define IBM_CPM_IIC0           0x80000000      /* IIC interface */
+#define IBM_CPM_IIC1           0x40000000      /* IIC interface */
+#define IBM_CPM_PCI            0x20000000      /* PCI bridge */
+#define IBM_CPM_USB1H          0x08000000      /* USB 1.1 Host */
+#define IBM_CPM_FPU            0x04000000      /* floating point unit */
+#define IBM_CPM_CPU            0x02000000      /* processor core */
+#define IBM_CPM_DMA            0x01000000      /* DMA controller */
+#define IBM_CPM_BGO            0x00800000      /* PLB to OPB bus arbiter */
+#define IBM_CPM_BGI            0x00400000      /* OPB to PLB bridge */
+#define IBM_CPM_EBC            0x00200000      /* External Bus Controller */
+#define IBM_CPM_EBM            0x00100000      /* Ext Bus Master Interface */
+#define IBM_CPM_DMC            0x00080000      /* SDRAM peripheral controller */
+#define IBM_CPM_PLB4           0x00040000      /* PLB4 bus arbiter */
+#define IBM_CPM_PLB4x3         0x00020000      /* PLB4 to PLB3 bridge controller */
+#define IBM_CPM_PLB3x4         0x00010000      /* PLB3 to PLB4 bridge controller */
+#define IBM_CPM_PLB3           0x00008000      /* PLB3 bus arbiter */
+#define IBM_CPM_PPM            0x00002000      /* PLB Performance Monitor */
+#define IBM_CPM_UIC1           0x00001000      /* Universal Interrupt Controller */
+#define IBM_CPM_GPIO0          0x00000800      /* General Purpose IO (??) */
+#define IBM_CPM_GPT            0x00000400      /* General Purpose Timers  */
+#define IBM_CPM_UART0          0x00000200      /* serial port 0 */
+#define IBM_CPM_UART1          0x00000100      /* serial port 1 */
+#define IBM_CPM_UIC0           0x00000080      /* Universal Interrupt Controller */
+#define IBM_CPM_TMRCLK         0x00000040      /* CPU timers */
+#define IBM_CPM_EMAC0          0x00000020      /* ethernet port 0 */
+#define IBM_CPM_EMAC1          0x00000010      /* ethernet port 1 */
+#define IBM_CPM_UART2          0x00000008      /* serial port 2 */
+#define IBM_CPM_UART3          0x00000004      /* serial port 3 */
+#define IBM_CPM_USB2D          0x00000002      /* USB 2.0 Device */
+#define IBM_CPM_USB2H          0x00000001      /* USB 2.0 Host */
+
+#define DFLT_IBM4xx_PM         ~(IBM_CPM_UIC0 | IBM_CPM_UIC1 | IBM_CPM_CPU \
+                               | IBM_CPM_EBC | IBM_CPM_BGO | IBM_CPM_FPU \
+                               | IBM_CPM_EBM | IBM_CPM_PLB4 | IBM_CPM_3x4 \
+                               | IBM_CPM_PLB3 | IBM_CPM_PLB4x3 \
+                               | IBM_CPM_EMAC0 | IBM_CPM_TMRCLK \
+                               | IBM_CPM_DMA | IBM_CPM_PCI | IBM_CPM_EMAC1)
+
+
+#endif /* __PPC_PLATFORMS_IBM440EP_H */
+#endif /* __KERNEL__ */
diff --git a/arch/ppc/syslib/Makefile b/arch/ppc/syslib/Makefile

index dec5bf4f6879ff3956aa3b7cddfde7a2028595f4..220a65ab0a51f608089e7a49941aae852937a096 100644 (file)
--- a/arch/ppc/syslib/Makefile
+++ b/arch/ppc/syslib/Makefile
@@ -11,6 +11,7 @@ obj-$(CONFIG_PPCBUG_NVRAM)    += prep_nvram.o
  obj-$(CONFIG_PPC_OCP)          += ocp.o
  obj-$(CONFIG_IBM_OCP)          += ibm_ocp.o
  obj-$(CONFIG_44x)              += ibm44x_common.o
+obj-$(CONFIG_440EP)            += ibm440gx_common.o
  obj-$(CONFIG_440GP)            += ibm440gp_common.o
  obj-$(CONFIG_440GX)            += ibm440gx_common.o
  obj-$(CONFIG_440SP)            += ibm440gx_common.o ibm440sp_common.o
@@ -44,6 +45,7 @@ obj-$(CONFIG_PPC_CHRP)                += open_pic.o indirect_pci.o i8259.o
  obj-$(CONFIG_PPC_PREP)         += open_pic.o indirect_pci.o i8259.o todc_time.o
  obj-$(CONFIG_ADIR)             += i8259.o indirect_pci.o pci_auto.o \
                                         todc_time.o
+obj-$(CONFIG_BAMBOO)           += indirect_pci.o pci_auto.o todc_time.o
  obj-$(CONFIG_CPCI690)          += todc_time.o pci_auto.o
  obj-$(CONFIG_EBONY)            += indirect_pci.o pci_auto.o todc_time.o
  obj-$(CONFIG_EV64260)          += todc_time.o pci_auto.o
diff --git a/arch/ppc/syslib/ibm440gx_common.c b/arch/ppc/syslib/ibm440gx_common.c

index 4ad85e0e0234222e7d4db97074acf7a41554096e..d4776af6a3ca210a49b519d98bd28ec72e071181 100644 (file)
--- a/arch/ppc/syslib/ibm440gx_common.c
+++ b/arch/ppc/syslib/ibm440gx_common.c
@@ -34,6 +34,10 @@ void __init ibm440gx_get_clocks(struct ibm44x_clocks* p, unsigned int sys_clk,
         u32 plld  = CPR_READ(DCRN_CPR_PLLD);
         u32 uart0 = SDR_READ(DCRN_SDR_UART0);
         u32 uart1 = SDR_READ(DCRN_SDR_UART1);
+#ifdef CONFIG_440EP
+       u32 uart2 = SDR_READ(DCRN_SDR_UART2);
+       u32 uart3 = SDR_READ(DCRN_SDR_UART3);
+#endif
  
         /* Dividers */
         u32 fbdv   = __fix_zero((plld >> 24) & 0x1f, 32);
@@ -96,6 +100,17 @@ bypass:
                 p->uart1 = ser_clk;
         else
                 p->uart1 = p->plb / __fix_zero(uart1 & 0xff, 256);
+#ifdef CONFIG_440EP
+       if (uart2 & 0x00800000)
+               p->uart2 = ser_clk;
+       else
+               p->uart2 = p->plb / __fix_zero(uart2 & 0xff, 256);
+
+       if (uart3 & 0x00800000)
+               p->uart3 = ser_clk;
+       else
+               p->uart3 = p->plb / __fix_zero(uart3 & 0xff, 256);
+#endif
  }
  
  /* Issue L2C diagnostic command */
diff --git a/arch/ppc/syslib/ibm44x_common.h b/arch/ppc/syslib/ibm44x_common.h

index b14eb603ce01a75fb7bc33adad5b8f4ce06d0685..c16b6a5ac6ab9bc8090c6a4437210c404ccafe54 100644 (file)
--- a/arch/ppc/syslib/ibm44x_common.h
+++ b/arch/ppc/syslib/ibm44x_common.h
@@ -29,6 +29,10 @@ struct ibm44x_clocks {
         unsigned int ebc;       /* PerClk */
         unsigned int uart0;
         unsigned int uart1;
+#ifdef CONFIG_440EP
+       unsigned int uart2;
+       unsigned int uart3;
+#endif
  };
  
  /* common 44x platform init */
diff --git a/arch/ppc/syslib/m8xx_setup.c b/arch/ppc/syslib/m8xx_setup.c

index c1db2ab1d1540117c105958404d3aafa3724286c..a3702cfe8f7c70ff25c546b76f8b8cddc01e47eb 100644 (file)
--- a/arch/ppc/syslib/m8xx_setup.c
+++ b/arch/ppc/syslib/m8xx_setup.c
@@ -57,7 +57,7 @@ unsigned char __res[sizeof(bd_t)];
  extern void m8xx_ide_init(void);
  
  extern unsigned long find_available_memory(void);
-extern void m8xx_cpm_reset(uint cpm_page);
+extern void m8xx_cpm_reset();
  extern void m8xx_wdt_handler_install(bd_t *bp);
  extern void rpxfb_alloc_pages(void);
  extern void cpm_interrupt_init(void);
@@ -70,13 +70,9 @@ board_init(void)
  void __init
  m8xx_setup_arch(void)
  {
-       int     cpm_page;
-
-       cpm_page = (int) alloc_bootmem_pages(PAGE_SIZE);
-
         /* Reset the Communication Processor Module.
         */
-       m8xx_cpm_reset(cpm_page);
+       m8xx_cpm_reset();
  
  #ifdef CONFIG_FB_RPX
         rpxfb_alloc_pages();
@@ -427,7 +423,7 @@ platform_init(unsigned long r3, unsigned long r4, unsigned long r5,
         ppc_md.find_end_of_memory       = m8xx_find_end_of_memory;
         ppc_md.setup_io_mappings        = m8xx_map_io;
  
-#if defined(CONFIG_BLK_DEV_IDE) || defined(CONFIG_BLK_DEV_IDE_MODULE)
+#if defined(CONFIG_BLK_DEV_MPC8xx_IDE)
         m8xx_ide_init();
  #endif
  }
diff --git a/arch/ppc/syslib/mpc83xx_devices.c b/arch/ppc/syslib/mpc83xx_devices.c

index 75c8e9834ae70ad142f2aa8e65e025065a4816de..5aaf0e58e1f9d2f7b0b187ed475e98c81c8901de 100644 (file)
--- a/arch/ppc/syslib/mpc83xx_devices.c
+++ b/arch/ppc/syslib/mpc83xx_devices.c
@@ -191,8 +191,8 @@ struct platform_device ppc_sys_platform_devices[] = {
                 .num_resources   = 2,
                 .resource = (struct resource[]) {
                         {
-                               .start  = 0x22000,
-                               .end    = 0x22fff,
+                               .start  = 0x23000,
+                               .end    = 0x23fff,
                                 .flags  = IORESOURCE_MEM,
                         },
                         {
@@ -208,8 +208,8 @@ struct platform_device ppc_sys_platform_devices[] = {
                 .num_resources   = 2,
                 .resource = (struct resource[]) {
                         {
-                               .start  = 0x23000,
-                               .end    = 0x23fff,
+                               .start  = 0x22000,
+                               .end    = 0x22fff,
                                 .flags  = IORESOURCE_MEM,
                         },
                         {
diff --git a/arch/ppc/syslib/ppc4xx_dma.c b/arch/ppc/syslib/ppc4xx_dma.c

index 5015ab99afd21b78416dc59b74883f4889b7ba29..f15e64285f9628e25465a55426cc3e88d3d5d737 100644 (file)
--- a/arch/ppc/syslib/ppc4xx_dma.c
+++ b/arch/ppc/syslib/ppc4xx_dma.c
@@ -620,6 +620,7 @@ ppc4xx_clr_dma_status(unsigned int dmanr)
         return DMA_STATUS_GOOD;
  }
  
+#ifdef CONFIG_PPC4xx_EDMA
  /*
   * Enables the burst on the channel (BTEN bit in the control/count register)
   * Note:
@@ -685,6 +686,11 @@ ppc4xx_set_burst_size(unsigned int dmanr, unsigned int bsize)
         return DMA_STATUS_GOOD;
  }
  
+EXPORT_SYMBOL(ppc4xx_enable_burst);
+EXPORT_SYMBOL(ppc4xx_disable_burst);
+EXPORT_SYMBOL(ppc4xx_set_burst_size);
+#endif /* CONFIG_PPC4xx_EDMA */
+
  EXPORT_SYMBOL(ppc4xx_init_dma_channel);
  EXPORT_SYMBOL(ppc4xx_get_channel_config);
  EXPORT_SYMBOL(ppc4xx_set_channel_priority);
@@ -703,6 +709,4 @@ EXPORT_SYMBOL(ppc4xx_enable_dma_interrupt);
  EXPORT_SYMBOL(ppc4xx_disable_dma_interrupt);
  EXPORT_SYMBOL(ppc4xx_get_dma_status);
  EXPORT_SYMBOL(ppc4xx_clr_dma_status);
-EXPORT_SYMBOL(ppc4xx_enable_burst);
-EXPORT_SYMBOL(ppc4xx_disable_burst);
-EXPORT_SYMBOL(ppc4xx_set_burst_size);
+
diff --git a/arch/ppc64/Kconfig b/arch/ppc64/Kconfig

index 2ce87836c6711b76b8862eff790ce1b8c6f30d2b..13b262f10216aa4d88d88988374ef744f9b3174f 100644 (file)
--- a/arch/ppc64/Kconfig
+++ b/arch/ppc64/Kconfig
@@ -302,12 +302,6 @@ config GENERIC_HARDIRQS
         bool
         default y
  
-config MSCHUNKS
-       bool
-       depends on PPC_ISERIES
-       default y
-
-
  config PPC_RTAS
         bool
         depends on PPC_PSERIES || PPC_BPA
@@ -350,13 +344,46 @@ config SECCOMP
  
           If unsure, say Y. Only embedded should say N here.
  
+source "fs/Kconfig.binfmt"
+
+config HOTPLUG_CPU
+       bool "Support for hot-pluggable CPUs"
+       depends on SMP && EXPERIMENTAL && (PPC_PSERIES || PPC_PMAC)
+       select HOTPLUG
+       ---help---
+         Say Y here to be able to turn CPUs off and on.
+
+         Say N if you are unsure.
+
+config PROC_DEVICETREE
+       bool "Support for Open Firmware device tree in /proc"
+       depends on !PPC_ISERIES
+       help
+         This option adds a device-tree directory under /proc which contains
+         an image of the device tree that the kernel copies from Open
+         Firmware. If unsure, say Y here.
+
+config CMDLINE_BOOL
+       bool "Default bootloader kernel arguments"
+       depends on !PPC_ISERIES
+
+config CMDLINE
+       string "Initial kernel command string"
+       depends on CMDLINE_BOOL
+       default "console=ttyS0,9600 console=tty0 root=/dev/sda2"
+       help
+         On some platforms, there is currently no way for the boot loader to
+         pass arguments to the kernel. For these platforms, you can supply
+         some command-line options at build time by entering them here.  In
+         most cases you will need to specify the root device here.
+
  endmenu
  
  config ISA_DMA_API
         bool
         default y
  
-menu "General setup"
+menu "Bus Options"
  
  config ISA
         bool
@@ -389,45 +416,12 @@ config PCI_DOMAINS
         bool
         default PCI
  
-source "fs/Kconfig.binfmt"
-
  source "drivers/pci/Kconfig"
  
-config HOTPLUG_CPU
-       bool "Support for hot-pluggable CPUs"
-       depends on SMP && EXPERIMENTAL && (PPC_PSERIES || PPC_PMAC)
-       select HOTPLUG
-       ---help---
-         Say Y here to be able to turn CPUs off and on.
-
-         Say N if you are unsure.
-
  source "drivers/pcmcia/Kconfig"
  
  source "drivers/pci/hotplug/Kconfig"
  
-config PROC_DEVICETREE
-       bool "Support for Open Firmware device tree in /proc"
-       depends on !PPC_ISERIES
-       help
-         This option adds a device-tree directory under /proc which contains
-         an image of the device tree that the kernel copies from Open
-         Firmware. If unsure, say Y here.
-
-config CMDLINE_BOOL
-       bool "Default bootloader kernel arguments"
-       depends on !PPC_ISERIES
-
-config CMDLINE
-       string "Initial kernel command string"
-       depends on CMDLINE_BOOL
-       default "console=ttyS0,9600 console=tty0 root=/dev/sda2"
-       help
-         On some platforms, there is currently no way for the boot loader to
-         pass arguments to the kernel. For these platforms, you can supply
-         some command-line options at build time by entering them here.  In
-         most cases you will need to specify the root device here.
-
  endmenu
  
  source "net/Kconfig"
diff --git a/arch/ppc64/Makefile b/arch/ppc64/Makefile

index 731b84758331bfc5fc790554265b8bf9a4eb8557..6350cce82efb9d5c27ed703a6c4fb8e5a9275534 100644 (file)
--- a/arch/ppc64/Makefile
+++ b/arch/ppc64/Makefile
@@ -55,6 +55,8 @@ LDFLAGS               := -m elf64ppc
  LDFLAGS_vmlinux        := -Bstatic -e $(KERNELLOAD) -Ttext $(KERNELLOAD)
  CFLAGS         += -msoft-float -pipe -mminimal-toc -mtraceback=none \
                    -mcall-aixdesc
+# Temporary hack until we have migrated to asm-powerpc
+CPPFLAGS       += -Iinclude3
  
  GCC_VERSION     := $(call cc-version)
  GCC_BROKEN_VEC := $(shell if [ $(GCC_VERSION) -lt 0400 ] ; then echo "y"; fi ;)
@@ -112,6 +114,7 @@ all: $(KBUILD_IMAGE)
  
  archclean:
         $(Q)$(MAKE) $(clean)=$(boot)
+       $(Q)rm -rf include3
  
  prepare: include/asm-ppc64/offsets.h
  
@@ -121,6 +124,12 @@ arch/ppc64/kernel/asm-offsets.s: include/asm include/linux/version.h \
  include/asm-ppc64/offsets.h: arch/ppc64/kernel/asm-offsets.s
         $(call filechk,gen-asm-offsets)
  
+# Temporary hack until we have migrated to asm-powerpc
+include/asm: include3/asm
+include3/asm:
+       $(Q)if [ ! -d include3 ]; then mkdir -p include3; fi;
+       $(Q)ln -fsn $(srctree)/include/asm-powerpc include3/asm
+
  define archhelp
    echo  '* zImage       - Compressed kernel image (arch/$(ARCH)/boot/zImage)'
    echo  '  zImage.initrd- Compressed kernel image with initrd attached,'
diff --git a/arch/ppc64/boot/Makefile b/arch/ppc64/boot/Makefile

index 683b2d43c15fe2facb4a7a1d7f388f260481c3a0..2c5f5e73d00c76d3e4a56e8f721656c926aa55e9 100644 (file)
--- a/arch/ppc64/boot/Makefile
+++ b/arch/ppc64/boot/Makefile
@@ -22,8 +22,8 @@
  
  
  HOSTCC         := gcc
-BOOTCFLAGS     := $(HOSTCFLAGS) $(LINUXINCLUDE) -fno-builtin 
-BOOTAFLAGS     := -D__ASSEMBLY__ $(BOOTCFLAGS) -traditional
+BOOTCFLAGS     := $(HOSTCFLAGS) -fno-builtin -nostdinc -isystem $(shell $(CROSS32CC) -print-file-name=include)
+BOOTAFLAGS     := -D__ASSEMBLY__ $(BOOTCFLAGS) -traditional -nostdinc
  BOOTLFLAGS     := -Ttext 0x00400000 -e _start -T $(srctree)/$(src)/zImage.lds
  OBJCOPYFLAGS    := contents,alloc,load,readonly,data
  
diff --git a/arch/ppc64/boot/addnote.c b/arch/ppc64/boot/addnote.c

index 719663a694bb9a06d19ce3048154eb22dfd68ebb..8041a9845ab7efa9c94d58505a1d32c949b7e716 100644 (file)
--- a/arch/ppc64/boot/addnote.c
+++ b/arch/ppc64/boot/addnote.c
@@ -157,7 +157,7 @@ main(int ac, char **av)
         PUT_32BE(ns, strlen(arch) + 1);
         PUT_32BE(ns + 4, N_DESCR * 4);
         PUT_32BE(ns + 8, 0x1275);
-       strcpy(&buf[ns + 12], arch);
+       strcpy((char *) &buf[ns + 12], arch);
         ns += 12 + strlen(arch) + 1;
         for (i = 0; i < N_DESCR; ++i, ns += 4)
                 PUT_32BE(ns, descr[i]);
@@ -172,7 +172,7 @@ main(int ac, char **av)
         PUT_32BE(ns, strlen(rpaname) + 1);
         PUT_32BE(ns + 4, sizeof(rpanote));
         PUT_32BE(ns + 8, 0x12759999);
-       strcpy(&buf[ns + 12], rpaname);
+       strcpy((char *) &buf[ns + 12], rpaname);
         ns += 12 + ROUNDUP(strlen(rpaname) + 1);
         for (i = 0; i < N_RPA_DESCR; ++i, ns += 4)
                 PUT_32BE(ns, rpanote[i]);
diff --git a/arch/ppc64/boot/crt0.S b/arch/ppc64/boot/crt0.S

index 04d3e74cd72f7d53fd9fbbfe7378a6b2f5e62e2f..3861e7f9cf19fe47aafe8e65d52b600936d8b5c8 100644 (file)
--- a/arch/ppc64/boot/crt0.S
+++ b/arch/ppc64/boot/crt0.S
@@ -9,7 +9,7 @@
   * NOTE: this code runs in 32 bit mode and is packaged as ELF32.
   */
  
-#include <asm/ppc_asm.h>
+#include "ppc_asm.h"
  
         .text
         .globl  _start
diff --git a/arch/ppc64/boot/div64.S b/arch/ppc64/boot/div64.S

index 38f7e466d7d65603e1b62ead96b4dcbc5c479829..722f360a32a9e905b2edf6f3cdb90b9ef9e5d588 100644 (file)
--- a/arch/ppc64/boot/div64.S
+++ b/arch/ppc64/boot/div64.S
@@ -13,7 +13,7 @@
   * as published by the Free Software Foundation; either version
   * 2 of the License, or (at your option) any later version.
   */
-#include <asm/ppc_asm.h>
+#include "ppc_asm.h"
  
         .globl __div64_32
  __div64_32:
diff --git a/arch/ppc64/boot/elf.h b/arch/ppc64/boot/elf.h

new file mode 100644 (file)

index 0000000..d4828fc
--- /dev/null
+++ b/arch/ppc64/boot/elf.h
@@ -0,0 +1,149 @@
+#ifndef _PPC_BOOT_ELF_H_
+#define _PPC_BOOT_ELF_H_
+
+/* 32-bit ELF base types. */
+typedef unsigned int Elf32_Addr;
+typedef unsigned short Elf32_Half;
+typedef unsigned int Elf32_Off;
+typedef signed int Elf32_Sword;
+typedef unsigned int Elf32_Word;
+
+/* 64-bit ELF base types. */
+typedef unsigned long long Elf64_Addr;
+typedef unsigned short Elf64_Half;
+typedef signed short Elf64_SHalf;
+typedef unsigned long long Elf64_Off;
+typedef signed int Elf64_Sword;
+typedef unsigned int Elf64_Word;
+typedef unsigned long long Elf64_Xword;
+typedef signed long long Elf64_Sxword;
+
+/* These constants are for the segment types stored in the image headers */
+#define PT_NULL    0
+#define PT_LOAD    1
+#define PT_DYNAMIC 2
+#define PT_INTERP  3
+#define PT_NOTE    4
+#define PT_SHLIB   5
+#define PT_PHDR    6
+#define PT_TLS     7           /* Thread local storage segment */
+#define PT_LOOS    0x60000000  /* OS-specific */
+#define PT_HIOS    0x6fffffff  /* OS-specific */
+#define PT_LOPROC  0x70000000
+#define PT_HIPROC  0x7fffffff
+#define PT_GNU_EH_FRAME                0x6474e550
+
+#define PT_GNU_STACK   (PT_LOOS + 0x474e551)
+
+/* These constants define the different elf file types */
+#define ET_NONE   0
+#define ET_REL    1
+#define ET_EXEC   2
+#define ET_DYN    3
+#define ET_CORE   4
+#define ET_LOPROC 0xff00
+#define ET_HIPROC 0xffff
+
+/* These constants define the various ELF target machines */
+#define EM_NONE  0
+#define EM_PPC        20       /* PowerPC */
+#define EM_PPC64       21      /* PowerPC64 */
+
+#define EI_NIDENT      16
+
+typedef struct elf32_hdr {
+       unsigned char e_ident[EI_NIDENT];
+       Elf32_Half e_type;
+       Elf32_Half e_machine;
+       Elf32_Word e_version;
+       Elf32_Addr e_entry;     /* Entry point */
+       Elf32_Off e_phoff;
+       Elf32_Off e_shoff;
+       Elf32_Word e_flags;
+       Elf32_Half e_ehsize;
+       Elf32_Half e_phentsize;
+       Elf32_Half e_phnum;
+       Elf32_Half e_shentsize;
+       Elf32_Half e_shnum;
+       Elf32_Half e_shstrndx;
+} Elf32_Ehdr;
+
+typedef struct elf64_hdr {
+       unsigned char e_ident[16];      /* ELF "magic number" */
+       Elf64_Half e_type;
+       Elf64_Half e_machine;
+       Elf64_Word e_version;
+       Elf64_Addr e_entry;     /* Entry point virtual address */
+       Elf64_Off e_phoff;      /* Program header table file offset */
+       Elf64_Off e_shoff;      /* Section header table file offset */
+       Elf64_Word e_flags;
+       Elf64_Half e_ehsize;
+       Elf64_Half e_phentsize;
+       Elf64_Half e_phnum;
+       Elf64_Half e_shentsize;
+       Elf64_Half e_shnum;
+       Elf64_Half e_shstrndx;
+} Elf64_Ehdr;
+
+/* These constants define the permissions on sections in the program
+   header, p_flags. */
+#define PF_R           0x4
+#define PF_W           0x2
+#define PF_X           0x1
+
+typedef struct elf32_phdr {
+       Elf32_Word p_type;
+       Elf32_Off p_offset;
+       Elf32_Addr p_vaddr;
+       Elf32_Addr p_paddr;
+       Elf32_Word p_filesz;
+       Elf32_Word p_memsz;
+       Elf32_Word p_flags;
+       Elf32_Word p_align;
+} Elf32_Phdr;
+
+typedef struct elf64_phdr {
+       Elf64_Word p_type;
+       Elf64_Word p_flags;
+       Elf64_Off p_offset;     /* Segment file offset */
+       Elf64_Addr p_vaddr;     /* Segment virtual address */
+       Elf64_Addr p_paddr;     /* Segment physical address */
+       Elf64_Xword p_filesz;   /* Segment size in file */
+       Elf64_Xword p_memsz;    /* Segment size in memory */
+       Elf64_Xword p_align;    /* Segment alignment, file & memory */
+} Elf64_Phdr;
+
+#define        EI_MAG0         0       /* e_ident[] indexes */
+#define        EI_MAG1         1
+#define        EI_MAG2         2
+#define        EI_MAG3         3
+#define        EI_CLASS        4
+#define        EI_DATA         5
+#define        EI_VERSION      6
+#define        EI_OSABI        7
+#define        EI_PAD          8
+
+#define        ELFMAG0         0x7f    /* EI_MAG */
+#define        ELFMAG1         'E'
+#define        ELFMAG2         'L'
+#define        ELFMAG3         'F'
+#define        ELFMAG          "\177ELF"
+#define        SELFMAG         4
+
+#define        ELFCLASSNONE    0       /* EI_CLASS */
+#define        ELFCLASS32      1
+#define        ELFCLASS64      2
+#define        ELFCLASSNUM     3
+
+#define ELFDATANONE    0       /* e_ident[EI_DATA] */
+#define ELFDATA2LSB    1
+#define ELFDATA2MSB    2
+
+#define EV_NONE                0       /* e_version, EI_VERSION */
+#define EV_CURRENT     1
+#define EV_NUM         2
+
+#define ELFOSABI_NONE  0
+#define ELFOSABI_LINUX 3
+
+#endif                         /* _PPC_BOOT_ELF_H_ */
diff --git a/arch/ppc64/boot/main.c b/arch/ppc64/boot/main.c

index 199d9804f61c418a9351697846e1e83964f6f615..99e68cfbe6883b9ee0e6f249a6d4ba9b30ae59a2 100644 (file)
--- a/arch/ppc64/boot/main.c
+++ b/arch/ppc64/boot/main.c
@@ -8,36 +8,28 @@
   * as published by the Free Software Foundation; either version
   * 2 of the License, or (at your option) any later version.
   */
-#include "ppc32-types.h"
+#include <stdarg.h>
+#include <stddef.h>
+#include "elf.h"
+#include "page.h"
+#include "string.h"
+#include "stdio.h"
+#include "prom.h"
  #include "zlib.h"
-#include <linux/elf.h>
-#include <linux/string.h>
-#include <asm/processor.h>
-#include <asm/page.h>
-
-extern void *finddevice(const char *);
-extern int getprop(void *, const char *, void *, int);
-extern void printf(const char *fmt, ...);
-extern int sprintf(char *buf, const char *fmt, ...);
-void gunzip(void *, int, unsigned char *, int *);
-void *claim(unsigned int, unsigned int, unsigned int);
-void flush_cache(void *, unsigned long);
-void pause(void);
-extern void exit(void);
-
-unsigned long strlen(const char *s);
-void *memmove(void *dest, const void *src, unsigned long n);
-void *memcpy(void *dest, const void *src, unsigned long n);
+
+static void gunzip(void *, int, unsigned char *, int *);
+extern void flush_cache(void *, unsigned long);
+
  
  /* Value picked to match that used by yaboot */
  #define PROG_START     0x01400000
  #define RAM_END                (256<<20) // Fixme: use OF */
  
-char *avail_ram;
-char *begin_avail, *end_avail;
-char *avail_high;
-unsigned int heap_use;
-unsigned int heap_max;
+static char *avail_ram;
+static char *begin_avail, *end_avail;
+static char *avail_high;
+static unsigned int heap_use;
+static unsigned int heap_max;
  
  extern char _start[];
  extern char _vmlinux_start[];
@@ -52,9 +44,9 @@ struct addr_range {
         unsigned long size;
         unsigned long memsize;
  };
-struct addr_range vmlinux = {0, 0, 0};
-struct addr_range vmlinuz = {0, 0, 0};
-struct addr_range initrd  = {0, 0, 0};
+static struct addr_range vmlinux = {0, 0, 0};
+static struct addr_range vmlinuz = {0, 0, 0};
+static struct addr_range initrd  = {0, 0, 0};
  
  static char scratch[128<<10];  /* 128kB of scratch space for gunzip */
  
@@ -64,13 +56,6 @@ typedef void (*kernel_entry_t)( unsigned long,
                                 void *);
  
  
-int (*prom)(void *);
-
-void *chosen_handle;
-void *stdin;
-void *stdout;
-void *stderr;
-
  #undef DEBUG
  
  static unsigned long claim_base = PROG_START;
@@ -277,7 +262,7 @@ void zfree(void *x, void *addr, unsigned nb)
  
  #define DEFLATED       8
  
-void gunzip(void *dst, int dstlen, unsigned char *src, int *lenp)
+static void gunzip(void *dst, int dstlen, unsigned char *src, int *lenp)
  {
         z_stream s;
         int r, i, flags;
diff --git a/arch/ppc64/boot/page.h b/arch/ppc64/boot/page.h

new file mode 100644 (file)

index 0000000..14eca30
--- /dev/null
+++ b/arch/ppc64/boot/page.h
@@ -0,0 +1,34 @@
+#ifndef _PPC_BOOT_PAGE_H
+#define _PPC_BOOT_PAGE_H
+/*
+ * Copyright (C) 2001 PPC64 Team, IBM Corp
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#ifdef __ASSEMBLY__
+#define ASM_CONST(x) x
+#else
+#define __ASM_CONST(x) x##UL
+#define ASM_CONST(x) __ASM_CONST(x)
+#endif
+
+/* PAGE_SHIFT determines the page size */
+#define PAGE_SHIFT     12
+#define PAGE_SIZE      (ASM_CONST(1) << PAGE_SHIFT)
+#define PAGE_MASK      (~(PAGE_SIZE-1))
+
+/* align addr on a size boundary - adjust address up/down if needed */
+#define _ALIGN_UP(addr,size)   (((addr)+((size)-1))&(~((size)-1)))
+#define _ALIGN_DOWN(addr,size) ((addr)&(~((size)-1)))
+
+/* align addr on a size boundary - adjust address up if needed */
+#define _ALIGN(addr,size)     _ALIGN_UP(addr,size)
+
+/* to align the pointer to the (next) page boundary */
+#define PAGE_ALIGN(addr)       _ALIGN(addr, PAGE_SIZE)
+
+#endif                         /* _PPC_BOOT_PAGE_H */
diff --git a/arch/ppc64/boot/ppc32-types.h b/arch/ppc64/boot/ppc32-types.h

deleted file mode 100644 (file)

index f7b8884..0000000
--- a/arch/ppc64/boot/ppc32-types.h
+++ /dev/null
@@ -1,36 +0,0 @@
-#ifndef _PPC64_TYPES_H
-#define _PPC64_TYPES_H
-
-typedef __signed__ char __s8;
-typedef unsigned char __u8;
-
-typedef __signed__ short __s16;
-typedef unsigned short __u16;
-
-typedef __signed__ int __s32;
-typedef unsigned int __u32;
-
-typedef __signed__ long long __s64;
-typedef unsigned long long __u64;
-
-typedef signed char s8;
-typedef unsigned char u8;
-
-typedef signed short s16;
-typedef unsigned short u16;
-
-typedef signed int s32;
-typedef unsigned int u32;
-
-typedef signed long long s64;
-typedef unsigned long long u64;
-
-typedef struct {
-       __u32 u[4];
-} __attribute((aligned(16))) __vector128;
-
-#define BITS_PER_LONG 32
-
-typedef __vector128 vector128;
-
-#endif /* _PPC64_TYPES_H */
diff --git a/arch/ppc64/boot/ppc_asm.h b/arch/ppc64/boot/ppc_asm.h

new file mode 100644 (file)

index 0000000..1c2c281
--- /dev/null
+++ b/arch/ppc64/boot/ppc_asm.h
@@ -0,0 +1,62 @@
+#ifndef _PPC64_PPC_ASM_H
+#define _PPC64_PPC_ASM_H
+/*
+ *
+ * Definitions used by various bits of low-level assembly code on PowerPC.
+ *
+ * Copyright (C) 1995-1999 Gary Thomas, Paul Mackerras, Cort Dougan.
+ *
+ *  This program is free software; you can redistribute it and/or
+ *  modify it under the terms of the GNU General Public License
+ *  as published by the Free Software Foundation; either version
+ *  2 of the License, or (at your option) any later version.
+ */
+
+/* Condition Register Bit Fields */
+
+#define        cr0     0
+#define        cr1     1
+#define        cr2     2
+#define        cr3     3
+#define        cr4     4
+#define        cr5     5
+#define        cr6     6
+#define        cr7     7
+
+
+/* General Purpose Registers (GPRs) */
+
+#define        r0      0
+#define        r1      1
+#define        r2      2
+#define        r3      3
+#define        r4      4
+#define        r5      5
+#define        r6      6
+#define        r7      7
+#define        r8      8
+#define        r9      9
+#define        r10     10
+#define        r11     11
+#define        r12     12
+#define        r13     13
+#define        r14     14
+#define        r15     15
+#define        r16     16
+#define        r17     17
+#define        r18     18
+#define        r19     19
+#define        r20     20
+#define        r21     21
+#define        r22     22
+#define        r23     23
+#define        r24     24
+#define        r25     25
+#define        r26     26
+#define        r27     27
+#define        r28     28
+#define        r29     29
+#define        r30     30
+#define        r31     31
+
+#endif /* _PPC64_PPC_ASM_H */
diff --git a/arch/ppc64/boot/prom.c b/arch/ppc64/boot/prom.c

index 5e48b80ff5a07471bd2f07feba550bd85cf9bbaf..4bea2f4dcb067412be7610b23308a0d9c996302b 100644 (file)
--- a/arch/ppc64/boot/prom.c
+++ b/arch/ppc64/boot/prom.c
@@ -7,43 +7,19 @@
   * 2 of the License, or (at your option) any later version.
   */
  #include <stdarg.h>
-#include <linux/types.h>
-#include <linux/string.h>
-#include <linux/ctype.h>
-
-extern __u32 __div64_32(unsigned long long *dividend, __u32 divisor);
-
-/* The unnecessary pointer compare is there
- * to check for type safety (n must be 64bit)
- */
-# define do_div(n,base) ({                             \
-       __u32 __base = (base);                  \
-       __u32 __rem;                                    \
-       (void)(((typeof((n)) *)0) == ((unsigned long long *)0));        \
-       if (((n) >> 32) == 0) {                 \
-               __rem = (__u32)(n) % __base;            \
-               (n) = (__u32)(n) / __base;              \
-       } else                                          \
-               __rem = __div64_32(&(n), __base);       \
-       __rem;                                          \
- })
+#include <stddef.h>
+#include "string.h"
+#include "stdio.h"
+#include "prom.h"
  
  int (*prom)(void *);
  
  void *chosen_handle;
+
  void *stdin;
  void *stdout;
  void *stderr;
  
-void exit(void);
-void *finddevice(const char *name);
-int getprop(void *phandle, const char *name, void *buf, int buflen);
-void chrpboot(int a1, int a2, void *prom);     /* in main.c */
-
-int printf(char *fmt, ...);
-
-/* there is no convenient header to get this from...  -- paulus */
-extern unsigned long strlen(const char *);
  
  int
  write(void *handle, void *ptr, int nb)
@@ -210,107 +186,6 @@ fputs(char *str, void *f)
         return write(f, str, n) == n? 0: -1;
  }
  
-int
-readchar(void)
-{
-       char ch;
-
-       for (;;) {
-               switch (read(stdin, &ch, 1)) {
-               case 1:
-                       return ch;
-               case -1:
-                       printf("read(stdin) returned -1\r\n");
-                       return -1;
-               }
-       }
-}
-
-static char line[256];
-static char *lineptr;
-static int lineleft;
-
-int
-getchar(void)
-{
-       int c;
-
-       if (lineleft == 0) {
-               lineptr = line;
-               for (;;) {
-                       c = readchar();
-                       if (c == -1 || c == 4)
-                               break;
-                       if (c == '\r' || c == '\n') {
-                               *lineptr++ = '\n';
-                               putchar('\n');
-                               break;
-                       }
-                       switch (c) {
-                       case 0177:
-                       case '\b':
-                               if (lineptr > line) {
-                                       putchar('\b');
-                                       putchar(' ');
-                                       putchar('\b');
-                                       --lineptr;
-                               }
-                               break;
-                       case 'U' & 0x1F:
-                               while (lineptr > line) {
-                                       putchar('\b');
-                                       putchar(' ');
-                                       putchar('\b');
-                                       --lineptr;
-                               }
-                               break;
-                       default:
-                               if (lineptr >= &line[sizeof(line) - 1])
-                                       putchar('\a');
-                               else {
-                                       putchar(c);
-                                       *lineptr++ = c;
-                               }
-                       }
-               }
-               lineleft = lineptr - line;
-               lineptr = line;
-       }
-       if (lineleft == 0)
-               return -1;
-       --lineleft;
-       return *lineptr++;
-}
-
-
-
-/* String functions lifted from lib/vsprintf.c and lib/ctype.c */
-unsigned char _ctype[] = {
-_C,_C,_C,_C,_C,_C,_C,_C,                       /* 0-7 */
-_C,_C|_S,_C|_S,_C|_S,_C|_S,_C|_S,_C,_C,                /* 8-15 */
-_C,_C,_C,_C,_C,_C,_C,_C,                       /* 16-23 */
-_C,_C,_C,_C,_C,_C,_C,_C,                       /* 24-31 */
-_S|_SP,_P,_P,_P,_P,_P,_P,_P,                   /* 32-39 */
-_P,_P,_P,_P,_P,_P,_P,_P,                       /* 40-47 */
-_D,_D,_D,_D,_D,_D,_D,_D,                       /* 48-55 */
-_D,_D,_P,_P,_P,_P,_P,_P,                       /* 56-63 */
-_P,_U|_X,_U|_X,_U|_X,_U|_X,_U|_X,_U|_X,_U,     /* 64-71 */
-_U,_U,_U,_U,_U,_U,_U,_U,                       /* 72-79 */
-_U,_U,_U,_U,_U,_U,_U,_U,                       /* 80-87 */
-_U,_U,_U,_P,_P,_P,_P,_P,                       /* 88-95 */
-_P,_L|_X,_L|_X,_L|_X,_L|_X,_L|_X,_L|_X,_L,     /* 96-103 */
-_L,_L,_L,_L,_L,_L,_L,_L,                       /* 104-111 */
-_L,_L,_L,_L,_L,_L,_L,_L,                       /* 112-119 */
-_L,_L,_L,_P,_P,_P,_P,_C,                       /* 120-127 */
-0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,               /* 128-143 */
-0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,               /* 144-159 */
-_S|_SP,_P,_P,_P,_P,_P,_P,_P,_P,_P,_P,_P,_P,_P,_P,_P,   /* 160-175 */
-_P,_P,_P,_P,_P,_P,_P,_P,_P,_P,_P,_P,_P,_P,_P,_P,       /* 176-191 */
-_U,_U,_U,_U,_U,_U,_U,_U,_U,_U,_U,_U,_U,_U,_U,_U,       /* 192-207 */
-_U,_U,_U,_U,_U,_U,_U,_P,_U,_U,_U,_U,_U,_U,_U,_L,       /* 208-223 */
-_L,_L,_L,_L,_L,_L,_L,_L,_L,_L,_L,_L,_L,_L,_L,_L,       /* 224-239 */
-_L,_L,_L,_L,_L,_L,_L,_P,_L,_L,_L,_L,_L,_L,_L,_L};      /* 240-255 */
-
  size_t strnlen(const char * s, size_t count)
  {
         const char *sc;
@@ -320,44 +195,30 @@ size_t strnlen(const char * s, size_t count)
         return sc - s;
  }
  
-unsigned long simple_strtoul(const char *cp,char **endp,unsigned int base)
-{
-       unsigned long result = 0,value;
+extern unsigned int __div64_32(unsigned long long *dividend,
+                              unsigned int divisor);
  
-       if (!base) {
-               base = 10;
-               if (*cp == '0') {
-                       base = 8;
-                       cp++;
-                       if ((*cp == 'x') && isxdigit(cp[1])) {
-                               cp++;
-                               base = 16;
-                       }
-               }
-       }
-       while (isxdigit(*cp) &&
-              (value = isdigit(*cp) ? *cp-'0' : toupper(*cp)-'A'+10) < base) {
-               result = result*base + value;
-               cp++;
-       }
-       if (endp)
-               *endp = (char *)cp;
-       return result;
-}
-
-long simple_strtol(const char *cp,char **endp,unsigned int base)
-{
-       if(*cp=='-')
-               return -simple_strtoul(cp+1,endp,base);
-       return simple_strtoul(cp,endp,base);
-}
+/* The unnecessary pointer compare is there
+ * to check for type safety (n must be 64bit)
+ */
+# define do_div(n,base) ({                                             \
+       unsigned int __base = (base);                                   \
+       unsigned int __rem;                                             \
+       (void)(((typeof((n)) *)0) == ((unsigned long long *)0));        \
+       if (((n) >> 32) == 0) {                                         \
+               __rem = (unsigned int)(n) % __base;                     \
+               (n) = (unsigned int)(n) / __base;                       \
+       } else                                                          \
+               __rem = __div64_32(&(n), __base);                       \
+       __rem;                                                          \
+ })
  
  static int skip_atoi(const char **s)
  {
-       int i=0;
+       int i, c;
  
-       while (isdigit(**s))
-               i = i*10 + *((*s)++) - '0';
+       for (i = 0; '0' <= (c = **s) && c <= '9'; ++*s)
+               i = i*10 + c - '0';
         return i;
  }
  
@@ -436,9 +297,6 @@ static char * number(char * str, unsigned long long num, int base, int size, int
         return str;
  }
  
-/* Forward decl. needed for IP address printing stuff... */
-int sprintf(char * buf, const char *fmt, ...);
-
  int vsprintf(char *buf, const char *fmt, va_list args)
  {
         int len;
@@ -477,7 +335,7 @@ int vsprintf(char *buf, const char *fmt, va_list args)
                 
                 /* get field width */
                 field_width = -1;
-               if (isdigit(*fmt))
+               if ('0' <= *fmt && *fmt <= '9')
                         field_width = skip_atoi(&fmt);
                 else if (*fmt == '*') {
                         ++fmt;
@@ -493,7 +351,7 @@ int vsprintf(char *buf, const char *fmt, va_list args)
                 precision = -1;
                 if (*fmt == '.') {
                         ++fmt;  
-                       if (isdigit(*fmt))
+                       if ('0' <= *fmt && *fmt <= '9')
                                 precision = skip_atoi(&fmt);
                         else if (*fmt == '*') {
                                 ++fmt;
@@ -628,7 +486,7 @@ int sprintf(char * buf, const char *fmt, ...)
  static char sprint_buf[1024];
  
  int
-printf(char *fmt, ...)
+printf(const char *fmt, ...)
  {
         va_list args;
         int n;
diff --git a/arch/ppc64/boot/prom.h b/arch/ppc64/boot/prom.h

new file mode 100644 (file)

index 0000000..96ab5ae
--- /dev/null
+++ b/arch/ppc64/boot/prom.h
@@ -0,0 +1,18 @@
+#ifndef _PPC_BOOT_PROM_H_
+#define _PPC_BOOT_PROM_H_
+
+extern int (*prom) (void *);
+extern void *chosen_handle;
+
+extern void *stdin;
+extern void *stdout;
+extern void *stderr;
+
+extern int write(void *handle, void *ptr, int nb);
+extern int read(void *handle, void *ptr, int nb);
+extern void exit(void);
+extern void pause(void);
+extern void *finddevice(const char *);
+extern void *claim(unsigned long virt, unsigned long size, unsigned long align);
+extern int getprop(void *phandle, const char *name, void *buf, int buflen);
+#endif                         /* _PPC_BOOT_PROM_H_ */
diff --git a/arch/ppc64/boot/stdio.h b/arch/ppc64/boot/stdio.h

new file mode 100644 (file)

index 0000000..24bd3a8
--- /dev/null
+++ b/arch/ppc64/boot/stdio.h
@@ -0,0 +1,16 @@
+#ifndef _PPC_BOOT_STDIO_H_
+#define _PPC_BOOT_STDIO_H_
+
+extern int printf(const char *fmt, ...);
+
+extern int sprintf(char *buf, const char *fmt, ...);
+
+extern int vsprintf(char *buf, const char *fmt, va_list args);
+
+extern int putc(int c, void *f);
+extern int putchar(int c);
+extern int getchar(void);
+
+extern int fputs(char *str, void *f);
+
+#endif                         /* _PPC_BOOT_STDIO_H_ */
diff --git a/arch/ppc64/boot/string.S b/arch/ppc64/boot/string.S

index ba5f2d21c9eade5fac5ab5fa4990bc0541dd04a7..7ade87ae7718304fc6ef0ff70c9d36a2cb9a523f 100644 (file)
--- a/arch/ppc64/boot/string.S
+++ b/arch/ppc64/boot/string.S
@@ -9,7 +9,7 @@
   * NOTE: this code runs in 32 bit mode and is packaged as ELF32.
   */
  
-#include <asm/ppc_asm.h>
+#include "ppc_asm.h"
  
         .text
         .globl  strcpy
diff --git a/arch/ppc64/boot/string.h b/arch/ppc64/boot/string.h

new file mode 100644 (file)

index 0000000..9289258
--- /dev/null
+++ b/arch/ppc64/boot/string.h
@@ -0,0 +1,16 @@
+#ifndef _PPC_BOOT_STRING_H_
+#define _PPC_BOOT_STRING_H_
+
+extern char *strcpy(char *dest, const char *src);
+extern char *strncpy(char *dest, const char *src, size_t n);
+extern char *strcat(char *dest, const char *src);
+extern int strcmp(const char *s1, const char *s2);
+extern size_t strlen(const char *s);
+extern size_t strnlen(const char *s, size_t count);
+
+extern void *memset(void *s, int c, size_t n);
+extern void *memmove(void *dest, const void *src, unsigned long n);
+extern void *memcpy(void *dest, const void *src, unsigned long n);
+extern int memcmp(const void *s1, const void *s2, size_t n);
+
+#endif /* _PPC_BOOT_STRING_H_ */
diff --git a/arch/ppc64/boot/zlib.c b/arch/ppc64/boot/zlib.c

index 9d5e4e9832d2a8bea9760f8af922ec4d815fbd88..0d910cd2079df021ea09dd1f87caa94f922e66bc 100644 (file)
--- a/arch/ppc64/boot/zlib.c
+++ b/arch/ppc64/boot/zlib.c
@@ -107,7 +107,7 @@ extern void *memcpy(void *, const void *, unsigned long);
  
  /* Diagnostic functions */
  #ifdef DEBUG_ZLIB
-#  include <stdio.h>
+#  include "stdio.h"
  #  ifndef verbose
  #    define verbose 0
  #  endif
@@ -1307,7 +1307,7 @@ local int huft_build(
    {
      *t = (inflate_huft *)Z_NULL;
      *m = 0;
-    return Z_OK;
+    return Z_DATA_ERROR;
    }
  
  
@@ -1351,6 +1351,7 @@ local int huft_build(
      if ((j = *p++) != 0)
        v[x[j]++] = i;
    } while (++i < n);
+  n = x[g];                    /* set n to length of v */
  
  
    /* Generate the Huffman codes and for each, make the table entries */
diff --git a/arch/ppc64/configs/bpa_defconfig b/arch/ppc64/configs/bpa_defconfig

new file mode 100644 (file)

index 0000000..46c5da4
--- /dev/null
+++ b/arch/ppc64/configs/bpa_defconfig
@@ -0,0 +1,987 @@
+#
+# Automatically generated make config: don't edit
+# Linux kernel version: 2.6.13-rc6
+# Mon Aug  8 14:12:19 2005
+#
+CONFIG_64BIT=y
+CONFIG_MMU=y
+CONFIG_RWSEM_XCHGADD_ALGORITHM=y
+CONFIG_GENERIC_CALIBRATE_DELAY=y
+CONFIG_GENERIC_ISA_DMA=y
+CONFIG_HAVE_DEC_LOCK=y
+CONFIG_EARLY_PRINTK=y
+CONFIG_COMPAT=y
+CONFIG_SCHED_NO_NO_OMIT_FRAME_POINTER=y
+CONFIG_FORCE_MAX_ZONEORDER=13
+
+#
+# Code maturity level options
+#
+CONFIG_EXPERIMENTAL=y
+CONFIG_CLEAN_COMPILE=y
+CONFIG_LOCK_KERNEL=y
+CONFIG_INIT_ENV_ARG_LIMIT=32
+
+#
+# General setup
+#
+CONFIG_LOCALVERSION=""
+CONFIG_SWAP=y
+CONFIG_SYSVIPC=y
+# CONFIG_POSIX_MQUEUE is not set
+# CONFIG_BSD_PROCESS_ACCT is not set
+CONFIG_SYSCTL=y
+# CONFIG_AUDIT is not set
+CONFIG_HOTPLUG=y
+CONFIG_KOBJECT_UEVENT=y
+# CONFIG_IKCONFIG is not set
+# CONFIG_CPUSETS is not set
+# CONFIG_EMBEDDED is not set
+CONFIG_KALLSYMS=y
+# CONFIG_KALLSYMS_ALL is not set
+# CONFIG_KALLSYMS_EXTRA_PASS is not set
+CONFIG_PRINTK=y
+CONFIG_BUG=y
+CONFIG_BASE_FULL=y
+CONFIG_FUTEX=y
+CONFIG_EPOLL=y
+CONFIG_SHMEM=y
+CONFIG_CC_ALIGN_FUNCTIONS=0
+CONFIG_CC_ALIGN_LABELS=0
+CONFIG_CC_ALIGN_LOOPS=0
+CONFIG_CC_ALIGN_JUMPS=0
+# CONFIG_TINY_SHMEM is not set
+CONFIG_BASE_SMALL=0
+
+#
+# Loadable module support
+#
+CONFIG_MODULES=y
+CONFIG_MODULE_UNLOAD=y
+# CONFIG_MODULE_FORCE_UNLOAD is not set
+CONFIG_OBSOLETE_MODPARM=y
+# CONFIG_MODVERSIONS is not set
+# CONFIG_MODULE_SRCVERSION_ALL is not set
+# CONFIG_KMOD is not set
+CONFIG_STOP_MACHINE=y
+CONFIG_SYSVIPC_COMPAT=y
+
+#
+# Platform support
+#
+# CONFIG_PPC_ISERIES is not set
+CONFIG_PPC_MULTIPLATFORM=y
+# CONFIG_PPC_PSERIES is not set
+CONFIG_PPC_BPA=y
+# CONFIG_PPC_PMAC is not set
+# CONFIG_PPC_MAPLE is not set
+CONFIG_PPC=y
+CONFIG_PPC64=y
+CONFIG_PPC_OF=y
+CONFIG_BPA_IIC=y
+CONFIG_ALTIVEC=y
+CONFIG_KEXEC=y
+# CONFIG_U3_DART is not set
+# CONFIG_BOOTX_TEXT is not set
+# CONFIG_POWER4_ONLY is not set
+# CONFIG_IOMMU_VMERGE is not set
+CONFIG_SMP=y
+CONFIG_NR_CPUS=4
+CONFIG_ARCH_SELECT_MEMORY_MODEL=y
+CONFIG_ARCH_FLATMEM_ENABLE=y
+CONFIG_SELECT_MEMORY_MODEL=y
+CONFIG_FLATMEM_MANUAL=y
+# CONFIG_DISCONTIGMEM_MANUAL is not set
+# CONFIG_SPARSEMEM_MANUAL is not set
+CONFIG_FLATMEM=y
+CONFIG_FLAT_NODE_MEM_MAP=y
+# CONFIG_NUMA is not set
+CONFIG_SCHED_SMT=y
+CONFIG_PREEMPT_NONE=y
+# CONFIG_PREEMPT_VOLUNTARY is not set
+# CONFIG_PREEMPT is not set
+CONFIG_PREEMPT_BKL=y
+# CONFIG_HZ_100 is not set
+CONFIG_HZ_250=y
+# CONFIG_HZ_1000 is not set
+CONFIG_HZ=250
+CONFIG_GENERIC_HARDIRQS=y
+CONFIG_PPC_RTAS=y
+CONFIG_RTAS_PROC=y
+CONFIG_RTAS_FLASH=y
+CONFIG_SECCOMP=y
+CONFIG_ISA_DMA_API=y
+
+#
+# General setup
+#
+CONFIG_PCI=y
+CONFIG_PCI_DOMAINS=y
+CONFIG_BINFMT_ELF=y
+# CONFIG_BINFMT_MISC is not set
+CONFIG_PCI_LEGACY_PROC=y
+CONFIG_PCI_NAMES=y
+# CONFIG_PCI_DEBUG is not set
+
+#
+# PCCARD (PCMCIA/CardBus) support
+#
+# CONFIG_PCCARD is not set
+
+#
+# PCI Hotplug Support
+#
+# CONFIG_HOTPLUG_PCI is not set
+CONFIG_PROC_DEVICETREE=y
+# CONFIG_CMDLINE_BOOL is not set
+
+#
+# Networking
+#
+CONFIG_NET=y
+
+#
+# Networking options
+#
+CONFIG_PACKET=y
+# CONFIG_PACKET_MMAP is not set
+CONFIG_UNIX=y
+CONFIG_XFRM=y
+# CONFIG_XFRM_USER is not set
+# CONFIG_NET_KEY is not set
+CONFIG_INET=y
+CONFIG_IP_MULTICAST=y
+# CONFIG_IP_ADVANCED_ROUTER is not set
+CONFIG_IP_FIB_HASH=y
+# CONFIG_IP_PNP is not set
+CONFIG_NET_IPIP=y
+# CONFIG_NET_IPGRE is not set
+# CONFIG_IP_MROUTE is not set
+# CONFIG_ARPD is not set
+CONFIG_SYN_COOKIES=y
+# CONFIG_INET_AH is not set
+# CONFIG_INET_ESP is not set
+# CONFIG_INET_IPCOMP is not set
+CONFIG_INET_TUNNEL=y
+CONFIG_IP_TCPDIAG=y
+CONFIG_IP_TCPDIAG_IPV6=y
+# CONFIG_TCP_CONG_ADVANCED is not set
+CONFIG_TCP_CONG_BIC=y
+
+#
+# IP: Virtual Server Configuration
+#
+# CONFIG_IP_VS is not set
+CONFIG_IPV6=y
+# CONFIG_IPV6_PRIVACY is not set
+CONFIG_INET6_AH=m
+CONFIG_INET6_ESP=m
+CONFIG_INET6_IPCOMP=m
+CONFIG_INET6_TUNNEL=m
+CONFIG_IPV6_TUNNEL=m
+CONFIG_NETFILTER=y
+# CONFIG_NETFILTER_DEBUG is not set
+
+#
+# IP: Netfilter Configuration
+#
+CONFIG_IP_NF_CONNTRACK=y
+# CONFIG_IP_NF_CT_ACCT is not set
+# CONFIG_IP_NF_CONNTRACK_MARK is not set
+CONFIG_IP_NF_CT_PROTO_SCTP=y
+CONFIG_IP_NF_FTP=m
+CONFIG_IP_NF_IRC=m
+CONFIG_IP_NF_TFTP=m
+CONFIG_IP_NF_AMANDA=m
+CONFIG_IP_NF_QUEUE=m
+CONFIG_IP_NF_IPTABLES=m
+CONFIG_IP_NF_MATCH_LIMIT=m
+CONFIG_IP_NF_MATCH_IPRANGE=m
+CONFIG_IP_NF_MATCH_MAC=m
+CONFIG_IP_NF_MATCH_PKTTYPE=m
+CONFIG_IP_NF_MATCH_MARK=m
+CONFIG_IP_NF_MATCH_MULTIPORT=m
+CONFIG_IP_NF_MATCH_TOS=m
+CONFIG_IP_NF_MATCH_RECENT=m
+CONFIG_IP_NF_MATCH_ECN=m
+CONFIG_IP_NF_MATCH_DSCP=m
+CONFIG_IP_NF_MATCH_AH_ESP=m
+CONFIG_IP_NF_MATCH_LENGTH=m
+CONFIG_IP_NF_MATCH_TTL=m
+CONFIG_IP_NF_MATCH_TCPMSS=m
+CONFIG_IP_NF_MATCH_HELPER=m
+CONFIG_IP_NF_MATCH_STATE=m
+CONFIG_IP_NF_MATCH_CONNTRACK=m
+CONFIG_IP_NF_MATCH_OWNER=m
+CONFIG_IP_NF_MATCH_ADDRTYPE=m
+CONFIG_IP_NF_MATCH_REALM=m
+CONFIG_IP_NF_MATCH_SCTP=m
+CONFIG_IP_NF_MATCH_COMMENT=m
+CONFIG_IP_NF_MATCH_HASHLIMIT=m
+CONFIG_IP_NF_FILTER=m
+CONFIG_IP_NF_TARGET_REJECT=m
+CONFIG_IP_NF_TARGET_LOG=m
+CONFIG_IP_NF_TARGET_ULOG=m
+CONFIG_IP_NF_TARGET_TCPMSS=m
+CONFIG_IP_NF_NAT=m
+CONFIG_IP_NF_NAT_NEEDED=y
+CONFIG_IP_NF_TARGET_MASQUERADE=m
+CONFIG_IP_NF_TARGET_REDIRECT=m
+CONFIG_IP_NF_TARGET_NETMAP=m
+CONFIG_IP_NF_TARGET_SAME=m
+CONFIG_IP_NF_NAT_SNMP_BASIC=m
+CONFIG_IP_NF_NAT_IRC=m
+CONFIG_IP_NF_NAT_FTP=m
+CONFIG_IP_NF_NAT_TFTP=m
+CONFIG_IP_NF_NAT_AMANDA=m
+CONFIG_IP_NF_MANGLE=m
+CONFIG_IP_NF_TARGET_TOS=m
+CONFIG_IP_NF_TARGET_ECN=m
+CONFIG_IP_NF_TARGET_DSCP=m
+CONFIG_IP_NF_TARGET_MARK=m
+CONFIG_IP_NF_TARGET_CLASSIFY=m
+CONFIG_IP_NF_RAW=m
+CONFIG_IP_NF_TARGET_NOTRACK=m
+CONFIG_IP_NF_ARPTABLES=m
+CONFIG_IP_NF_ARPFILTER=m
+CONFIG_IP_NF_ARP_MANGLE=m
+
+#
+# IPv6: Netfilter Configuration (EXPERIMENTAL)
+#
+# CONFIG_IP6_NF_QUEUE is not set
+# CONFIG_IP6_NF_IPTABLES is not set
+
+#
+# SCTP Configuration (EXPERIMENTAL)
+#
+# CONFIG_IP_SCTP is not set
+# CONFIG_ATM is not set
+# CONFIG_BRIDGE is not set
+# CONFIG_VLAN_8021Q is not set
+# CONFIG_DECNET is not set
+# CONFIG_LLC2 is not set
+# CONFIG_IPX is not set
+# CONFIG_ATALK is not set
+# CONFIG_X25 is not set
+# CONFIG_LAPB is not set
+# CONFIG_NET_DIVERT is not set
+# CONFIG_ECONET is not set
+# CONFIG_WAN_ROUTER is not set
+# CONFIG_NET_SCHED is not set
+CONFIG_NET_CLS_ROUTE=y
+
+#
+# Network testing
+#
+# CONFIG_NET_PKTGEN is not set
+# CONFIG_HAMRADIO is not set
+# CONFIG_IRDA is not set
+# CONFIG_BT is not set
+
+#
+# Device Drivers
+#
+
+#
+# Generic Driver Options
+#
+CONFIG_STANDALONE=y
+CONFIG_PREVENT_FIRMWARE_BUILD=y
+CONFIG_FW_LOADER=y
+# CONFIG_DEBUG_DRIVER is not set
+
+#
+# Memory Technology Devices (MTD)
+#
+# CONFIG_MTD is not set
+
+#
+# Parallel port support
+#
+# CONFIG_PARPORT is not set
+
+#
+# Plug and Play support
+#
+
+#
+# Block devices
+#
+# CONFIG_BLK_DEV_FD is not set
+# CONFIG_BLK_CPQ_DA is not set
+# CONFIG_BLK_CPQ_CISS_DA is not set
+# CONFIG_BLK_DEV_DAC960 is not set
+# CONFIG_BLK_DEV_UMEM is not set
+# CONFIG_BLK_DEV_COW_COMMON is not set
+CONFIG_BLK_DEV_LOOP=y
+# CONFIG_BLK_DEV_CRYPTOLOOP is not set
+CONFIG_BLK_DEV_NBD=y
+# CONFIG_BLK_DEV_SX8 is not set
+CONFIG_BLK_DEV_RAM=y
+CONFIG_BLK_DEV_RAM_COUNT=16
+CONFIG_BLK_DEV_RAM_SIZE=131072
+CONFIG_BLK_DEV_INITRD=y
+CONFIG_INITRAMFS_SOURCE=""
+# CONFIG_CDROM_PKTCDVD is not set
+
+#
+# IO Schedulers
+#
+CONFIG_IOSCHED_NOOP=y
+CONFIG_IOSCHED_AS=y
+CONFIG_IOSCHED_DEADLINE=y
+CONFIG_IOSCHED_CFQ=y
+# CONFIG_ATA_OVER_ETH is not set
+
+#
+# ATA/ATAPI/MFM/RLL support
+#
+CONFIG_IDE=y
+CONFIG_BLK_DEV_IDE=y
+
+#
+# Please see Documentation/ide.txt for help/info on IDE drives
+#
+# CONFIG_BLK_DEV_IDE_SATA is not set
+CONFIG_BLK_DEV_IDEDISK=y
+CONFIG_IDEDISK_MULTI_MODE=y
+# CONFIG_BLK_DEV_IDECD is not set
+# CONFIG_BLK_DEV_IDETAPE is not set
+# CONFIG_BLK_DEV_IDEFLOPPY is not set
+# CONFIG_IDE_TASK_IOCTL is not set
+
+#
+# IDE chipset support/bugfixes
+#
+CONFIG_IDE_GENERIC=y
+CONFIG_BLK_DEV_IDEPCI=y
+CONFIG_IDEPCI_SHARE_IRQ=y
+# CONFIG_BLK_DEV_OFFBOARD is not set
+CONFIG_BLK_DEV_GENERIC=y
+# CONFIG_BLK_DEV_OPTI621 is not set
+# CONFIG_BLK_DEV_SL82C105 is not set
+CONFIG_BLK_DEV_IDEDMA_PCI=y
+# CONFIG_BLK_DEV_IDEDMA_FORCED is not set
+CONFIG_IDEDMA_PCI_AUTO=y
+# CONFIG_IDEDMA_ONLYDISK is not set
+CONFIG_BLK_DEV_AEC62XX=y
+# CONFIG_BLK_DEV_ALI15X3 is not set
+# CONFIG_BLK_DEV_AMD74XX is not set
+# CONFIG_BLK_DEV_CMD64X is not set
+# CONFIG_BLK_DEV_TRIFLEX is not set
+# CONFIG_BLK_DEV_CY82C693 is not set
+# CONFIG_BLK_DEV_CS5520 is not set
+# CONFIG_BLK_DEV_CS5530 is not set
+# CONFIG_BLK_DEV_HPT34X is not set
+# CONFIG_BLK_DEV_HPT366 is not set
+# CONFIG_BLK_DEV_SC1200 is not set
+# CONFIG_BLK_DEV_PIIX is not set
+# CONFIG_BLK_DEV_IT821X is not set
+# CONFIG_BLK_DEV_NS87415 is not set
+# CONFIG_BLK_DEV_PDC202XX_OLD is not set
+# CONFIG_BLK_DEV_PDC202XX_NEW is not set
+# CONFIG_BLK_DEV_SVWKS is not set
+CONFIG_BLK_DEV_SIIMAGE=y
+# CONFIG_BLK_DEV_SLC90E66 is not set
+# CONFIG_BLK_DEV_TRM290 is not set
+# CONFIG_BLK_DEV_VIA82CXXX is not set
+# CONFIG_IDE_ARM is not set
+CONFIG_BLK_DEV_IDEDMA=y
+# CONFIG_IDEDMA_IVB is not set
+CONFIG_IDEDMA_AUTO=y
+# CONFIG_BLK_DEV_HD is not set
+
+#
+# SCSI device support
+#
+# CONFIG_SCSI is not set
+
+#
+# Multi-device support (RAID and LVM)
+#
+# CONFIG_MD is not set
+
+#
+# Fusion MPT device support
+#
+# CONFIG_FUSION is not set
+
+#
+# IEEE 1394 (FireWire) support
+#
+# CONFIG_IEEE1394 is not set
+
+#
+# I2O device support
+#
+# CONFIG_I2O is not set
+
+#
+# Macintosh device drivers
+#
+
+#
+# Network device support
+#
+CONFIG_NETDEVICES=y
+# CONFIG_DUMMY is not set
+# CONFIG_BONDING is not set
+# CONFIG_EQUALIZER is not set
+# CONFIG_TUN is not set
+
+#
+# ARCnet devices
+#
+# CONFIG_ARCNET is not set
+
+#
+# Ethernet (10 or 100Mbit)
+#
+CONFIG_NET_ETHERNET=y
+CONFIG_MII=y
+# CONFIG_HAPPYMEAL is not set
+# CONFIG_SUNGEM is not set
+# CONFIG_NET_VENDOR_3COM is not set
+
+#
+# Tulip family network device support
+#
+# CONFIG_NET_TULIP is not set
+# CONFIG_HP100 is not set
+# CONFIG_NET_PCI is not set
+
+#
+# Ethernet (1000 Mbit)
+#
+# CONFIG_ACENIC is not set
+# CONFIG_DL2K is not set
+CONFIG_E1000=m
+# CONFIG_E1000_NAPI is not set
+# CONFIG_NS83820 is not set
+# CONFIG_HAMACHI is not set
+# CONFIG_YELLOWFIN is not set
+# CONFIG_R8169 is not set
+CONFIG_SKGE=m
+# CONFIG_SK98LIN is not set
+# CONFIG_TIGON3 is not set
+# CONFIG_BNX2 is not set
+# CONFIG_MV643XX_ETH is not set
+
+#
+# Ethernet (10000 Mbit)
+#
+# CONFIG_IXGB is not set
+# CONFIG_S2IO is not set
+
+#
+# Token Ring devices
+#
+# CONFIG_TR is not set
+
+#
+# Wireless LAN (non-hamradio)
+#
+# CONFIG_NET_RADIO is not set
+
+#
+# Wan interfaces
+#
+# CONFIG_WAN is not set
+# CONFIG_FDDI is not set
+# CONFIG_HIPPI is not set
+# CONFIG_PPP is not set
+# CONFIG_SLIP is not set
+# CONFIG_SHAPER is not set
+# CONFIG_NETCONSOLE is not set
+# CONFIG_NETPOLL is not set
+# CONFIG_NET_POLL_CONTROLLER is not set
+
+#
+# ISDN subsystem
+#
+# CONFIG_ISDN is not set
+
+#
+# Telephony Support
+#
+# CONFIG_PHONE is not set
+
+#
+# Input device support
+#
+CONFIG_INPUT=y
+
+#
+# Userland interfaces
+#
+CONFIG_INPUT_MOUSEDEV=y
+# CONFIG_INPUT_MOUSEDEV_PSAUX is not set
+CONFIG_INPUT_MOUSEDEV_SCREEN_X=1024
+CONFIG_INPUT_MOUSEDEV_SCREEN_Y=768
+# CONFIG_INPUT_JOYDEV is not set
+# CONFIG_INPUT_TSDEV is not set
+# CONFIG_INPUT_EVDEV is not set
+# CONFIG_INPUT_EVBUG is not set
+
+#
+# Input Device Drivers
+#
+# CONFIG_INPUT_KEYBOARD is not set
+# CONFIG_INPUT_MOUSE is not set
+# CONFIG_INPUT_JOYSTICK is not set
+# CONFIG_INPUT_TOUCHSCREEN is not set
+# CONFIG_INPUT_MISC is not set
+
+#
+# Hardware I/O ports
+#
+CONFIG_SERIO=y
+# CONFIG_SERIO_I8042 is not set
+CONFIG_SERIO_SERPORT=y
+# CONFIG_SERIO_PCIPS2 is not set
+# CONFIG_SERIO_RAW is not set
+# CONFIG_GAMEPORT is not set
+
+#
+# Character devices
+#
+CONFIG_VT=y
+CONFIG_VT_CONSOLE=y
+CONFIG_HW_CONSOLE=y
+CONFIG_SERIAL_NONSTANDARD=y
+# CONFIG_ROCKETPORT is not set
+# CONFIG_CYCLADES is not set
+# CONFIG_MOXA_SMARTIO is not set
+# CONFIG_ISI is not set
+# CONFIG_SYNCLINK is not set
+# CONFIG_SYNCLINKMP is not set
+# CONFIG_N_HDLC is not set
+# CONFIG_SPECIALIX is not set
+# CONFIG_SX is not set
+# CONFIG_STALDRV is not set
+
+#
+# Serial drivers
+#
+CONFIG_SERIAL_8250=y
+CONFIG_SERIAL_8250_CONSOLE=y
+CONFIG_SERIAL_8250_NR_UARTS=4
+# CONFIG_SERIAL_8250_EXTENDED is not set
+
+#
+# Non-8250 serial port support
+#
+CONFIG_SERIAL_CORE=y
+CONFIG_SERIAL_CORE_CONSOLE=y
+# CONFIG_SERIAL_JSM is not set
+CONFIG_UNIX98_PTYS=y
+# CONFIG_LEGACY_PTYS is not set
+
+#
+# IPMI
+#
+# CONFIG_IPMI_HANDLER is not set
+
+#
+# Watchdog Cards
+#
+CONFIG_WATCHDOG=y
+# CONFIG_WATCHDOG_NOWAYOUT is not set
+
+#
+# Watchdog Device Drivers
+#
+# CONFIG_SOFT_WATCHDOG is not set
+CONFIG_WATCHDOG_RTAS=y
+
+#
+# PCI-based Watchdog Cards
+#
+# CONFIG_PCIPCWATCHDOG is not set
+# CONFIG_WDTPCI is not set
+# CONFIG_RTC is not set
+# CONFIG_DTLK is not set
+# CONFIG_R3964 is not set
+# CONFIG_APPLICOM is not set
+
+#
+# Ftape, the floppy tape device driver
+#
+# CONFIG_AGP is not set
+# CONFIG_DRM is not set
+# CONFIG_RAW_DRIVER is not set
+# CONFIG_HANGCHECK_TIMER is not set
+
+#
+# TPM devices
+#
+# CONFIG_TCG_TPM is not set
+
+#
+# I2C support
+#
+CONFIG_I2C=y
+# CONFIG_I2C_CHARDEV is not set
+
+#
+# I2C Algorithms
+#
+CONFIG_I2C_ALGOBIT=y
+# CONFIG_I2C_ALGOPCF is not set
+# CONFIG_I2C_ALGOPCA is not set
+
+#
+# I2C Hardware Bus support
+#
+# CONFIG_I2C_ALI1535 is not set
+# CONFIG_I2C_ALI1563 is not set
+# CONFIG_I2C_ALI15X3 is not set
+# CONFIG_I2C_AMD756 is not set
+# CONFIG_I2C_AMD8111 is not set
+# CONFIG_I2C_I801 is not set
+# CONFIG_I2C_I810 is not set
+# CONFIG_I2C_PIIX4 is not set
+# CONFIG_I2C_ISA is not set
+# CONFIG_I2C_NFORCE2 is not set
+# CONFIG_I2C_PARPORT_LIGHT is not set
+# CONFIG_I2C_PROSAVAGE is not set
+# CONFIG_I2C_SAVAGE4 is not set
+# CONFIG_SCx200_ACB is not set
+# CONFIG_I2C_SIS5595 is not set
+# CONFIG_I2C_SIS630 is not set
+# CONFIG_I2C_SIS96X is not set
+# CONFIG_I2C_STUB is not set
+# CONFIG_I2C_VIA is not set
+# CONFIG_I2C_VIAPRO is not set
+# CONFIG_I2C_VOODOO3 is not set
+# CONFIG_I2C_PCA_ISA is not set
+# CONFIG_I2C_SENSOR is not set
+
+#
+# Miscellaneous I2C Chip support
+#
+# CONFIG_SENSORS_DS1337 is not set
+# CONFIG_SENSORS_DS1374 is not set
+# CONFIG_SENSORS_EEPROM is not set
+# CONFIG_SENSORS_PCF8574 is not set
+# CONFIG_SENSORS_PCA9539 is not set
+# CONFIG_SENSORS_PCF8591 is not set
+# CONFIG_SENSORS_RTC8564 is not set
+# CONFIG_SENSORS_MAX6875 is not set
+# CONFIG_I2C_DEBUG_CORE is not set
+# CONFIG_I2C_DEBUG_ALGO is not set
+# CONFIG_I2C_DEBUG_BUS is not set
+# CONFIG_I2C_DEBUG_CHIP is not set
+
+#
+# Dallas's 1-wire bus
+#
+# CONFIG_W1 is not set
+
+#
+# Hardware Monitoring support
+#
+# CONFIG_HWMON is not set
+
+#
+# Misc devices
+#
+
+#
+# Multimedia devices
+#
+# CONFIG_VIDEO_DEV is not set
+
+#
+# Digital Video Broadcasting Devices
+#
+# CONFIG_DVB is not set
+
+#
+# Graphics support
+#
+# CONFIG_FB is not set
+
+#
+# Console display driver support
+#
+# CONFIG_VGA_CONSOLE is not set
+CONFIG_DUMMY_CONSOLE=y
+
+#
+# Sound
+#
+# CONFIG_SOUND is not set
+
+#
+# USB support
+#
+CONFIG_USB_ARCH_HAS_HCD=y
+CONFIG_USB_ARCH_HAS_OHCI=y
+# CONFIG_USB is not set
+
+#
+# USB Gadget Support
+#
+# CONFIG_USB_GADGET is not set
+
+#
+# MMC/SD Card support
+#
+# CONFIG_MMC is not set
+
+#
+# InfiniBand support
+#
+# CONFIG_INFINIBAND is not set
+
+#
+# SN Devices
+#
+
+#
+# File systems
+#
+CONFIG_EXT2_FS=y
+# CONFIG_EXT2_FS_XATTR is not set
+# CONFIG_EXT2_FS_XIP is not set
+CONFIG_EXT3_FS=y
+CONFIG_EXT3_FS_XATTR=y
+# CONFIG_EXT3_FS_POSIX_ACL is not set
+# CONFIG_EXT3_FS_SECURITY is not set
+CONFIG_JBD=y
+# CONFIG_JBD_DEBUG is not set
+CONFIG_FS_MBCACHE=y
+# CONFIG_REISERFS_FS is not set
+# CONFIG_JFS_FS is not set
+CONFIG_FS_POSIX_ACL=y
+
+#
+# XFS support
+#
+# CONFIG_XFS_FS is not set
+# CONFIG_MINIX_FS is not set
+# CONFIG_ROMFS_FS is not set
+CONFIG_INOTIFY=y
+# CONFIG_QUOTA is not set
+CONFIG_DNOTIFY=y
+# CONFIG_AUTOFS_FS is not set
+# CONFIG_AUTOFS4_FS is not set
+
+#
+# CD-ROM/DVD Filesystems
+#
+CONFIG_ISO9660_FS=m
+CONFIG_JOLIET=y
+# CONFIG_ZISOFS is not set
+CONFIG_UDF_FS=m
+CONFIG_UDF_NLS=y
+
+#
+# DOS/FAT/NT Filesystems
+#
+CONFIG_FAT_FS=m
+CONFIG_MSDOS_FS=m
+CONFIG_VFAT_FS=m
+CONFIG_FAT_DEFAULT_CODEPAGE=437
+CONFIG_FAT_DEFAULT_IOCHARSET="iso8859-1"
+# CONFIG_NTFS_FS is not set
+
+#
+# Pseudo filesystems
+#
+CONFIG_PROC_FS=y
+CONFIG_PROC_KCORE=y
+CONFIG_SYSFS=y
+# CONFIG_DEVPTS_FS_XATTR is not set
+CONFIG_TMPFS=y
+CONFIG_TMPFS_XATTR=y
+# CONFIG_TMPFS_SECURITY is not set
+CONFIG_HUGETLBFS=y
+CONFIG_HUGETLB_PAGE=y
+CONFIG_RAMFS=y
+
+#
+# Miscellaneous filesystems
+#
+# CONFIG_ADFS_FS is not set
+# CONFIG_AFFS_FS is not set
+# CONFIG_HFS_FS is not set
+# CONFIG_HFSPLUS_FS is not set
+# CONFIG_BEFS_FS is not set
+# CONFIG_BFS_FS is not set
+# CONFIG_EFS_FS is not set
+# CONFIG_CRAMFS is not set
+# CONFIG_VXFS_FS is not set
+# CONFIG_HPFS_FS is not set
+# CONFIG_QNX4FS_FS is not set
+# CONFIG_SYSV_FS is not set
+# CONFIG_UFS_FS is not set
+
+#
+# Network File Systems
+#
+CONFIG_NFS_FS=m
+CONFIG_NFS_V3=y
+CONFIG_NFS_V3_ACL=y
+# CONFIG_NFS_V4 is not set
+# CONFIG_NFS_DIRECTIO is not set
+CONFIG_NFSD=m
+CONFIG_NFSD_V2_ACL=y
+CONFIG_NFSD_V3=y
+CONFIG_NFSD_V3_ACL=y
+# CONFIG_NFSD_V4 is not set
+CONFIG_NFSD_TCP=y
+CONFIG_LOCKD=m
+CONFIG_LOCKD_V4=y
+CONFIG_EXPORTFS=m
+CONFIG_NFS_ACL_SUPPORT=m
+CONFIG_NFS_COMMON=y
+CONFIG_SUNRPC=m
+# CONFIG_RPCSEC_GSS_KRB5 is not set
+# CONFIG_RPCSEC_GSS_SPKM3 is not set
+# CONFIG_SMB_FS is not set
+# CONFIG_CIFS is not set
+# CONFIG_NCP_FS is not set
+# CONFIG_CODA_FS is not set
+# CONFIG_AFS_FS is not set
+
+#
+# Partition Types
+#
+CONFIG_PARTITION_ADVANCED=y
+# CONFIG_ACORN_PARTITION is not set
+# CONFIG_OSF_PARTITION is not set
+# CONFIG_AMIGA_PARTITION is not set
+# CONFIG_ATARI_PARTITION is not set
+# CONFIG_MAC_PARTITION is not set
+CONFIG_MSDOS_PARTITION=y
+# CONFIG_BSD_DISKLABEL is not set
+# CONFIG_MINIX_SUBPARTITION is not set
+# CONFIG_SOLARIS_X86_PARTITION is not set
+# CONFIG_UNIXWARE_DISKLABEL is not set
+# CONFIG_LDM_PARTITION is not set
+# CONFIG_SGI_PARTITION is not set
+# CONFIG_ULTRIX_PARTITION is not set
+# CONFIG_SUN_PARTITION is not set
+CONFIG_EFI_PARTITION=y
+
+#
+# Native Language Support
+#
+CONFIG_NLS=m
+CONFIG_NLS_DEFAULT="iso8859-1"
+# CONFIG_NLS_CODEPAGE_437 is not set
+# CONFIG_NLS_CODEPAGE_737 is not set
+# CONFIG_NLS_CODEPAGE_775 is not set
+# CONFIG_NLS_CODEPAGE_850 is not set
+# CONFIG_NLS_CODEPAGE_852 is not set
+# CONFIG_NLS_CODEPAGE_855 is not set
+# CONFIG_NLS_CODEPAGE_857 is not set
+# CONFIG_NLS_CODEPAGE_860 is not set
+# CONFIG_NLS_CODEPAGE_861 is not set
+# CONFIG_NLS_CODEPAGE_862 is not set
+# CONFIG_NLS_CODEPAGE_863 is not set
+# CONFIG_NLS_CODEPAGE_864 is not set
+# CONFIG_NLS_CODEPAGE_865 is not set
+# CONFIG_NLS_CODEPAGE_866 is not set
+# CONFIG_NLS_CODEPAGE_869 is not set
+# CONFIG_NLS_CODEPAGE_936 is not set
+# CONFIG_NLS_CODEPAGE_950 is not set
+# CONFIG_NLS_CODEPAGE_932 is not set
+# CONFIG_NLS_CODEPAGE_949 is not set
+# CONFIG_NLS_CODEPAGE_874 is not set
+# CONFIG_NLS_ISO8859_8 is not set
+# CONFIG_NLS_CODEPAGE_1250 is not set
+# CONFIG_NLS_CODEPAGE_1251 is not set
+# CONFIG_NLS_ASCII is not set
+CONFIG_NLS_ISO8859_1=m
+CONFIG_NLS_ISO8859_2=m
+CONFIG_NLS_ISO8859_3=m
+CONFIG_NLS_ISO8859_4=m
+CONFIG_NLS_ISO8859_5=m
+CONFIG_NLS_ISO8859_6=m
+CONFIG_NLS_ISO8859_7=m
+CONFIG_NLS_ISO8859_9=m
+CONFIG_NLS_ISO8859_13=m
+CONFIG_NLS_ISO8859_14=m
+CONFIG_NLS_ISO8859_15=m
+# CONFIG_NLS_KOI8_R is not set
+# CONFIG_NLS_KOI8_U is not set
+# CONFIG_NLS_UTF8 is not set
+
+#
+# Profiling support
+#
+# CONFIG_PROFILING is not set
+
+#
+# Kernel hacking
+#
+# CONFIG_PRINTK_TIME is not set
+CONFIG_DEBUG_KERNEL=y
+CONFIG_MAGIC_SYSRQ=y
+CONFIG_LOG_BUF_SHIFT=15
+# CONFIG_SCHEDSTATS is not set
+# CONFIG_DEBUG_SLAB is not set
+# CONFIG_DEBUG_SPINLOCK is not set
+CONFIG_DEBUG_SPINLOCK_SLEEP=y
+# CONFIG_DEBUG_KOBJECT is not set
+# CONFIG_DEBUG_INFO is not set
+CONFIG_DEBUG_FS=y
+# CONFIG_DEBUG_STACKOVERFLOW is not set
+# CONFIG_KPROBES is not set
+# CONFIG_DEBUG_STACK_USAGE is not set
+CONFIG_DEBUGGER=y
+# CONFIG_XMON is not set
+# CONFIG_PPCDBG is not set
+CONFIG_IRQSTACKS=y
+
+#
+# Security options
+#
+# CONFIG_KEYS is not set
+# CONFIG_SECURITY is not set
+
+#
+# Cryptographic options
+#
+CONFIG_CRYPTO=y
+CONFIG_CRYPTO_HMAC=y
+# CONFIG_CRYPTO_NULL is not set
+# CONFIG_CRYPTO_MD4 is not set
+CONFIG_CRYPTO_MD5=m
+CONFIG_CRYPTO_SHA1=m
+# CONFIG_CRYPTO_SHA256 is not set
+# CONFIG_CRYPTO_SHA512 is not set
+# CONFIG_CRYPTO_WP512 is not set
+# CONFIG_CRYPTO_TGR192 is not set
+CONFIG_CRYPTO_DES=m
+# CONFIG_CRYPTO_BLOWFISH is not set
+# CONFIG_CRYPTO_TWOFISH is not set
+# CONFIG_CRYPTO_SERPENT is not set
+# CONFIG_CRYPTO_AES is not set
+# CONFIG_CRYPTO_CAST5 is not set
+# CONFIG_CRYPTO_CAST6 is not set
+# CONFIG_CRYPTO_TEA is not set
+# CONFIG_CRYPTO_ARC4 is not set
+# CONFIG_CRYPTO_KHAZAD is not set
+# CONFIG_CRYPTO_ANUBIS is not set
+CONFIG_CRYPTO_DEFLATE=m
+# CONFIG_CRYPTO_MICHAEL_MIC is not set
+# CONFIG_CRYPTO_CRC32C is not set
+# CONFIG_CRYPTO_TEST is not set
+
+#
+# Hardware crypto devices
+#
+
+#
+# Library routines
+#
+# CONFIG_CRC_CCITT is not set
+CONFIG_CRC32=y
+# CONFIG_LIBCRC32C is not set
+CONFIG_ZLIB_INFLATE=m
+CONFIG_ZLIB_DEFLATE=m
diff --git a/arch/ppc64/configs/g5_defconfig b/arch/ppc64/configs/g5_defconfig

index 9e0abe8392fce34537d4d0e990f298f3473bbc2d..fc83d93302821697bef3b1af1d7cd0fb49c6e340 100644 (file)
--- a/arch/ppc64/configs/g5_defconfig
+++ b/arch/ppc64/configs/g5_defconfig
@@ -1,7 +1,7 @@
  #
  # Automatically generated make config: don't edit
-# Linux kernel version: 2.6.13-rc3
-# Wed Jul 13 14:40:34 2005
+# Linux kernel version: 2.6.13-rc6
+# Mon Aug  8 14:16:59 2005
  #
  CONFIG_64BIT=y
  CONFIG_MMU=y
@@ -103,10 +103,10 @@ CONFIG_PREEMPT_NONE=y
  # CONFIG_PREEMPT_VOLUNTARY is not set
  # CONFIG_PREEMPT is not set
  # CONFIG_PREEMPT_BKL is not set
-CONFIG_HZ_100=y
-# CONFIG_HZ_250 is not set
+# CONFIG_HZ_100 is not set
+CONFIG_HZ_250=y
  # CONFIG_HZ_1000 is not set
-CONFIG_HZ=100
+CONFIG_HZ=250
  CONFIG_GENERIC_HARDIRQS=y
  CONFIG_SECCOMP=y
  CONFIG_ISA_DMA_API=y
@@ -267,8 +267,6 @@ CONFIG_NET_CLS_ROUTE=y
  # Network testing
  #
  # CONFIG_NET_PKTGEN is not set
-# CONFIG_NETPOLL is not set
-# CONFIG_NET_POLL_CONTROLLER is not set
  # CONFIG_HAMRADIO is not set
  # CONFIG_IRDA is not set
  # CONFIG_BT is not set
@@ -468,6 +466,7 @@ CONFIG_SCSI_QLA2XXX=y
  # CONFIG_SCSI_QLA2300 is not set
  # CONFIG_SCSI_QLA2322 is not set
  # CONFIG_SCSI_QLA6312 is not set
+# CONFIG_SCSI_QLA24XX is not set
  # CONFIG_SCSI_LPFC is not set
  # CONFIG_SCSI_DC395x is not set
  # CONFIG_SCSI_DC390T is not set
@@ -539,11 +538,9 @@ CONFIG_IEEE1394_RAWIO=y
  #
  # Macintosh device drivers
  #
-CONFIG_ADB=y
  CONFIG_ADB_PMU=y
  CONFIG_PMAC_SMU=y
  # CONFIG_PMAC_BACKLIGHT is not set
-# CONFIG_INPUT_ADBHID is not set
  CONFIG_THERM_PM72=y
  
  #
@@ -631,6 +628,8 @@ CONFIG_PPPOE=m
  # CONFIG_NET_FC is not set
  # CONFIG_SHAPER is not set
  # CONFIG_NETCONSOLE is not set
+# CONFIG_NETPOLL is not set
+# CONFIG_NET_POLL_CONTROLLER is not set
  
  #
  # ISDN subsystem
@@ -718,7 +717,6 @@ CONFIG_LEGACY_PTY_COUNT=256
  #
  # CONFIG_WATCHDOG is not set
  # CONFIG_RTC is not set
-# CONFIG_GEN_RTC is not set
  # CONFIG_DTLK is not set
  # CONFIG_R3964 is not set
  # CONFIG_APPLICOM is not set
diff --git a/arch/ppc64/configs/iSeries_defconfig b/arch/ppc64/configs/iSeries_defconfig

index dbd54d188c2bfaa9cc9a3ec60ba3369b54359f2e..013d4e0e4003aee687135c36124033c780f7f234 100644 (file)
--- a/arch/ppc64/configs/iSeries_defconfig
+++ b/arch/ppc64/configs/iSeries_defconfig
@@ -1,7 +1,7 @@
  #
  # Automatically generated make config: don't edit
-# Linux kernel version: 2.6.13-rc3
-# Wed Jul 13 14:43:39 2005
+# Linux kernel version: 2.6.13-rc6
+# Mon Aug  8 14:17:02 2005
  #
  CONFIG_64BIT=y
  CONFIG_MMU=y
@@ -94,12 +94,11 @@ CONFIG_PREEMPT_NONE=y
  # CONFIG_PREEMPT_VOLUNTARY is not set
  # CONFIG_PREEMPT is not set
  # CONFIG_PREEMPT_BKL is not set
-CONFIG_HZ_100=y
-# CONFIG_HZ_250 is not set
+# CONFIG_HZ_100 is not set
+CONFIG_HZ_250=y
  # CONFIG_HZ_1000 is not set
-CONFIG_HZ=100
+CONFIG_HZ=250
  CONFIG_GENERIC_HARDIRQS=y
-CONFIG_MSCHUNKS=y
  CONFIG_LPARCFG=y
  CONFIG_SECCOMP=y
  CONFIG_ISA_DMA_API=y
@@ -257,10 +256,6 @@ CONFIG_NET_CLS_ROUTE=y
  # Network testing
  #
  # CONFIG_NET_PKTGEN is not set
-CONFIG_NETPOLL=y
-CONFIG_NETPOLL_RX=y
-CONFIG_NETPOLL_TRAP=y
-CONFIG_NET_POLL_CONTROLLER=y
  # CONFIG_HAMRADIO is not set
  # CONFIG_IRDA is not set
  # CONFIG_BT is not set
@@ -388,6 +383,7 @@ CONFIG_SCSI_QLA2XXX=y
  # CONFIG_SCSI_QLA2300 is not set
  # CONFIG_SCSI_QLA2322 is not set
  # CONFIG_SCSI_QLA6312 is not set
+# CONFIG_SCSI_QLA24XX is not set
  # CONFIG_SCSI_LPFC is not set
  # CONFIG_SCSI_DC395x is not set
  # CONFIG_SCSI_DC390T is not set
@@ -537,6 +533,10 @@ CONFIG_PPPOE=m
  # CONFIG_NET_FC is not set
  # CONFIG_SHAPER is not set
  CONFIG_NETCONSOLE=y
+CONFIG_NETPOLL=y
+CONFIG_NETPOLL_RX=y
+CONFIG_NETPOLL_TRAP=y
+CONFIG_NET_POLL_CONTROLLER=y
  
  #
  # ISDN subsystem
@@ -610,7 +610,6 @@ CONFIG_LEGACY_PTY_COUNT=256
  #
  # CONFIG_WATCHDOG is not set
  # CONFIG_RTC is not set
-# CONFIG_GEN_RTC is not set
  # CONFIG_DTLK is not set
  # CONFIG_R3964 is not set
  # CONFIG_APPLICOM is not set
diff --git a/arch/ppc64/configs/maple_defconfig b/arch/ppc64/configs/maple_defconfig

index cda8e8cb6d1d35d813e2d3b3a8de073ef8ed1746..dd42892cd873720083b79760557138e9542e9bbf 100644 (file)
--- a/arch/ppc64/configs/maple_defconfig
+++ b/arch/ppc64/configs/maple_defconfig
@@ -1,7 +1,7 @@
  #
  # Automatically generated make config: don't edit
-# Linux kernel version: 2.6.13-rc3
-# Wed Jul 13 14:46:18 2005
+# Linux kernel version: 2.6.13-rc6
+# Mon Aug  8 14:17:04 2005
  #
  CONFIG_64BIT=y
  CONFIG_MMU=y
@@ -103,10 +103,10 @@ CONFIG_PREEMPT_NONE=y
  # CONFIG_PREEMPT_VOLUNTARY is not set
  # CONFIG_PREEMPT is not set
  # CONFIG_PREEMPT_BKL is not set
-CONFIG_HZ_100=y
-# CONFIG_HZ_250 is not set
+# CONFIG_HZ_100 is not set
+CONFIG_HZ_250=y
  # CONFIG_HZ_1000 is not set
-CONFIG_HZ=100
+CONFIG_HZ=250
  CONFIG_GENERIC_HARDIRQS=y
  CONFIG_SECCOMP=y
  CONFIG_ISA_DMA_API=y
@@ -193,8 +193,6 @@ CONFIG_TCP_CONG_BIC=y
  # Network testing
  #
  # CONFIG_NET_PKTGEN is not set
-# CONFIG_NETPOLL is not set
-# CONFIG_NET_POLL_CONTROLLER is not set
  # CONFIG_HAMRADIO is not set
  # CONFIG_IRDA is not set
  # CONFIG_BT is not set
@@ -433,6 +431,8 @@ CONFIG_E1000=y
  # CONFIG_SLIP is not set
  # CONFIG_SHAPER is not set
  # CONFIG_NETCONSOLE is not set
+# CONFIG_NETPOLL is not set
+# CONFIG_NET_POLL_CONTROLLER is not set
  
  #
  # ISDN subsystem
@@ -512,7 +512,6 @@ CONFIG_LEGACY_PTY_COUNT=256
  #
  # CONFIG_WATCHDOG is not set
  # CONFIG_RTC is not set
-# CONFIG_GEN_RTC is not set
  # CONFIG_DTLK is not set
  # CONFIG_R3964 is not set
  # CONFIG_APPLICOM is not set
diff --git a/arch/ppc64/configs/pSeries_defconfig b/arch/ppc64/configs/pSeries_defconfig

index 5112edf1818134191864fb1bb5913cb8d047ceea..29f7b80b0efc9ea7b8385ef5aa06d35e4fba531a 100644 (file)
--- a/arch/ppc64/configs/pSeries_defconfig
+++ b/arch/ppc64/configs/pSeries_defconfig
@@ -1,7 +1,7 @@
  #
  # Automatically generated make config: don't edit
-# Linux kernel version: 2.6.13-rc3
-# Wed Jul 13 14:47:54 2005
+# Linux kernel version: 2.6.13-rc6
+# Mon Aug  8 14:17:07 2005
  #
  CONFIG_64BIT=y
  CONFIG_MMU=y
@@ -112,10 +112,10 @@ CONFIG_PREEMPT_NONE=y
  # CONFIG_PREEMPT_VOLUNTARY is not set
  # CONFIG_PREEMPT is not set
  # CONFIG_PREEMPT_BKL is not set
-CONFIG_HZ_100=y
-# CONFIG_HZ_250 is not set
+# CONFIG_HZ_100 is not set
+CONFIG_HZ_250=y
  # CONFIG_HZ_1000 is not set
-CONFIG_HZ=100
+CONFIG_HZ=250
  CONFIG_EEH=y
  CONFIG_GENERIC_HARDIRQS=y
  CONFIG_PPC_RTAS=y
@@ -287,10 +287,6 @@ CONFIG_NET_CLS_ROUTE=y
  # Network testing
  #
  # CONFIG_NET_PKTGEN is not set
-CONFIG_NETPOLL=y
-CONFIG_NETPOLL_RX=y
-CONFIG_NETPOLL_TRAP=y
-CONFIG_NET_POLL_CONTROLLER=y
  # CONFIG_HAMRADIO is not set
  # CONFIG_IRDA is not set
  # CONFIG_BT is not set
@@ -488,6 +484,7 @@ CONFIG_SCSI_QLA22XX=m
  CONFIG_SCSI_QLA2300=m
  CONFIG_SCSI_QLA2322=m
  CONFIG_SCSI_QLA6312=m
+CONFIG_SCSI_QLA24XX=m
  CONFIG_SCSI_LPFC=m
  # CONFIG_SCSI_DC395x is not set
  # CONFIG_SCSI_DC390T is not set
@@ -645,6 +642,10 @@ CONFIG_PPPOE=m
  # CONFIG_NET_FC is not set
  # CONFIG_SHAPER is not set
  CONFIG_NETCONSOLE=y
+CONFIG_NETPOLL=y
+CONFIG_NETPOLL_RX=y
+CONFIG_NETPOLL_TRAP=y
+CONFIG_NET_POLL_CONTROLLER=y
  
  #
  # ISDN subsystem
@@ -746,7 +747,6 @@ CONFIG_HVCS=m
  #
  # CONFIG_WATCHDOG is not set
  # CONFIG_RTC is not set
-# CONFIG_GEN_RTC is not set
  # CONFIG_DTLK is not set
  # CONFIG_R3964 is not set
  # CONFIG_APPLICOM is not set
diff --git a/arch/ppc64/defconfig b/arch/ppc64/defconfig

index fbf1f427ad3594d51f2feb13d1546f07192ed4e3..7cb4750bb7a9bf86f406a261d1bdb85a55f2231a 100644 (file)
--- a/arch/ppc64/defconfig
+++ b/arch/ppc64/defconfig
@@ -1,7 +1,7 @@
  #
  # Automatically generated make config: don't edit
-# Linux kernel version: 2.6.13-rc3
-# Wed Jul 13 14:37:07 2005
+# Linux kernel version: 2.6.13-rc6
+# Mon Aug  8 14:16:54 2005
  #
  CONFIG_64BIT=y
  CONFIG_MMU=y
@@ -114,10 +114,10 @@ CONFIG_PREEMPT_NONE=y
  # CONFIG_PREEMPT_VOLUNTARY is not set
  # CONFIG_PREEMPT is not set
  # CONFIG_PREEMPT_BKL is not set
-CONFIG_HZ_100=y
-# CONFIG_HZ_250 is not set
+# CONFIG_HZ_100 is not set
+CONFIG_HZ_250=y
  # CONFIG_HZ_1000 is not set
-CONFIG_HZ=100
+CONFIG_HZ=250
  CONFIG_EEH=y
  CONFIG_GENERIC_HARDIRQS=y
  CONFIG_PPC_RTAS=y
@@ -289,10 +289,6 @@ CONFIG_NET_CLS_ROUTE=y
  # Network testing
  #
  # CONFIG_NET_PKTGEN is not set
-CONFIG_NETPOLL=y
-CONFIG_NETPOLL_RX=y
-CONFIG_NETPOLL_TRAP=y
-CONFIG_NET_POLL_CONTROLLER=y
  # CONFIG_HAMRADIO is not set
  # CONFIG_IRDA is not set
  # CONFIG_BT is not set
@@ -506,6 +502,7 @@ CONFIG_SCSI_QLA22XX=m
  CONFIG_SCSI_QLA2300=m
  CONFIG_SCSI_QLA2322=m
  CONFIG_SCSI_QLA6312=m
+CONFIG_SCSI_QLA24XX=m
  CONFIG_SCSI_LPFC=m
  # CONFIG_SCSI_DC395x is not set
  # CONFIG_SCSI_DC390T is not set
@@ -579,11 +576,9 @@ CONFIG_IEEE1394_AMDTP=m
  #
  # Macintosh device drivers
  #
-CONFIG_ADB=y
  CONFIG_ADB_PMU=y
  CONFIG_PMAC_SMU=y
  # CONFIG_PMAC_BACKLIGHT is not set
-# CONFIG_INPUT_ADBHID is not set
  CONFIG_THERM_PM72=y
  
  #
@@ -694,6 +689,10 @@ CONFIG_PPPOE=m
  # CONFIG_NET_FC is not set
  # CONFIG_SHAPER is not set
  CONFIG_NETCONSOLE=y
+CONFIG_NETPOLL=y
+CONFIG_NETPOLL_RX=y
+CONFIG_NETPOLL_TRAP=y
+CONFIG_NET_POLL_CONTROLLER=y
  
  #
  # ISDN subsystem
@@ -797,7 +796,6 @@ CONFIG_HVCS=m
  #
  # CONFIG_WATCHDOG is not set
  # CONFIG_RTC is not set
-# CONFIG_GEN_RTC is not set
  # CONFIG_DTLK is not set
  # CONFIG_R3964 is not set
  # CONFIG_APPLICOM is not set
diff --git a/arch/ppc64/kernel/LparData.c b/arch/ppc64/kernel/LparData.c

index 76cfd1449d529542759064ef970b2f1af2a8548c..0a9c23ca2f0ca6e901904273cd5fffbd12da36fd 100644 (file)
--- a/arch/ppc64/kernel/LparData.c
+++ b/arch/ppc64/kernel/LparData.c
@@ -32,32 +32,12 @@
  /* The HvReleaseData is the root of the information shared between 
   * the hypervisor and Linux.  
   */
-
-/*
- * WARNING - magic here
- *
- * Ok, this is a horrid hack below, but marginally better than the
- * alternatives.  What we really want is just to initialize
- * hvReleaseData in C as in the #if 0 section here.  However, gcc
- * refuses to believe that (u32)&x is a constant expression, so will
- * not allow the xMsNucDataOffset field to be properly initialized.
- * So, we declare hvReleaseData in inline asm instead.  We use inline
- * asm, rather than a .S file, because the assembler won't generate
- * the necessary relocation for the LparMap either, unless that symbol
- * is declared in the same source file.  Finally, we put the asm in a
- * dummy, attribute-used function, instead of at file scope, because
- * file scope asms don't allow contraints.  We want to use the "i"
- * constraints to put sizeof() and offsetof() expressions in there,
- * because including asm/offsets.h in C code then stringifying causes
- * all manner of warnings.
- */
-#if 0
  struct HvReleaseData hvReleaseData = {
         .xDesc = 0xc8a5d9c4,    /* "HvRD" ebcdic */
         .xSize = sizeof(struct HvReleaseData),
         .xVpdAreasPtrOffset = offsetof(struct naca_struct, xItVpdAreas),
         .xSlicNacaAddr = &naca,         /* 64-bit Naca address */
-       .xMsNucDataOffset = (u32)((unsigned long)&xLparMap - KERNELBASE),
+       .xMsNucDataOffset = LPARMAP_PHYS,
         .xFlags = HVREL_TAGSINACTIVE    /* tags inactive       */
                                         /* 64 bit              */
                                         /* shared processors   */
@@ -70,62 +50,16 @@ struct HvReleaseData hvReleaseData = {
                 0xa7, 0x40, 0xf2, 0x4b,
                 0xf4, 0x4b, 0xf6, 0xf4 },
  };
-#endif
-
-
-extern struct HvReleaseData hvReleaseData;
-
-static void __attribute_used__ hvReleaseData_wrapper(void)
-{
-       /* This doesn't appear to need any alignment (even 4 byte) */
-       asm volatile (
-               "       lparMapPhys = xLparMap - %3\n"
-               "       .data\n"
-               "       .globl  hvReleaseData\n"
-               "hvReleaseData:\n"
-               "       .long   0xc8a5d9c4\n"   /* xDesc */
-                                               /* "HvRD" in ebcdic */
-               "       .short  %0\n"           /* xSize */
-               "       .short  %1\n"           /* xVpdAreasPtrOffset */
-               "       .llong  naca\n"         /* xSlicNacaAddr */
-               "       .long   lparMapPhys\n"  /* xMsNucDataOffset */
-               "       .long   0\n"            /* xRsvd1 */
-               "       .short  %2\n"           /* xFlags */
-               "       .short  4\n"    /* xVrmIndex  - v5r2m0 */
-               "       .short  3\n"    /* xMinSupportedPlicVrmIndex - v5r1m0 */
-               "       .short  3\n"    /* xMinCompatablePlicVrmIndex - v5r1m0 */
-               "       .long   0xd38995a4\n"   /* xVrmName */
-               "       .long   0xa740f24b\n"   /*   "Linux 2.4.64" ebcdic */
-               "       .long   0xf44bf6f4\n"
-               "       . = hvReleaseData + %0\n"
-               "       .previous\n"
-               : : "i"(sizeof(hvReleaseData)),
-               "i"(offsetof(struct naca_struct, xItVpdAreas)),
-               "i"(HVREL_TAGSINACTIVE /* tags inactive, 64 bit, */
-                                      /* shared processors, HMT allowed */
-                   | 6), /* TEMP: This allows non-GA drivers */
-               "i"(KERNELBASE)
-               );
-}
-
-struct LparMap __attribute__((aligned (16))) xLparMap = {
-       .xNumberEsids = HvEsidsToMap,
-       .xNumberRanges = HvRangesToMap,
-       .xSegmentTableOffs = STAB0_PAGE,
-
-       .xEsids = {
-               { .xKernelEsid = GET_ESID(KERNELBASE),
-                 .xKernelVsid = KERNEL_VSID(KERNELBASE), },
-               { .xKernelEsid = GET_ESID(VMALLOCBASE),
-                 .xKernelVsid = KERNEL_VSID(VMALLOCBASE), },
-       },
  
-       .xRanges = {
-               { .xPages = HvPagesToMap,
-                 .xOffset = 0,
-                 .xVPN = KERNEL_VSID(KERNELBASE) << (SID_SHIFT - PAGE_SHIFT),
-               },
-       },
+/*
+ * The NACA.  The first dword of the naca is required by the iSeries
+ * hypervisor to point to itVpdAreas.  The hypervisor finds the NACA
+ * through the pointer in hvReleaseData.
+ */
+struct naca_struct naca = {
+       .xItVpdAreas = &itVpdAreas,
+       .xRamDisk = 0,
+       .xRamDiskSize = 0,
  };
  
  extern void system_reset_iSeries(void);
@@ -291,29 +225,3 @@ struct ItVpdAreas itVpdAreas = {
                 0,0
         }
  };
-
-struct msChunks msChunks;
-EXPORT_SYMBOL(msChunks);
-
-/* Depending on whether this is called from iSeries or pSeries setup
- * code, the location of the msChunks struct may or may not have
- * to be reloc'd, so we force the caller to do that for us by passing
- * in a pointer to the structure.
- */
-unsigned long
-msChunks_alloc(unsigned long mem, unsigned long num_chunks, unsigned long chunk_size)
-{
-       unsigned long offset = reloc_offset();
-       struct msChunks *_msChunks = PTRRELOC(&msChunks);
-
-       _msChunks->num_chunks  = num_chunks;
-       _msChunks->chunk_size  = chunk_size;
-       _msChunks->chunk_shift = __ilog2(chunk_size);
-       _msChunks->chunk_mask  = (1UL<<_msChunks->chunk_shift)-1;
-
-       mem = _ALIGN(mem, sizeof(msChunks_entry));
-       _msChunks->abs = (msChunks_entry *)(mem + offset);
-       mem += num_chunks * sizeof(msChunks_entry);
-
-       return mem;
-}
diff --git a/arch/ppc64/kernel/Makefile b/arch/ppc64/kernel/Makefile

index d9b2660ef221f694e3b5a80c646b9fa508db0f41..f4b3bfcc109d2e76c883704fca5fc6479438d9fe 100644 (file)
--- a/arch/ppc64/kernel/Makefile
+++ b/arch/ppc64/kernel/Makefile
@@ -11,7 +11,7 @@ obj-y               :=        setup.o entry.o traps.o irq.o idle.o dma.o \
                         udbg.o binfmt_elf32.o sys_ppc32.o ioctl32.o \
                         ptrace32.o signal32.o rtc.o init_task.o \
                         lmb.o cputable.o cpu_setup_power4.o idle_power4.o \
-                       iommu.o sysfs.o vdso.o pmc.o
+                       iommu.o sysfs.o vdso.o pmc.o firmware.o
  obj-y += vdso32/ vdso64/
  
  obj-$(CONFIG_PPC_OF) +=        of_device.o
@@ -50,7 +50,10 @@ obj-$(CONFIG_LPARCFG)                += lparcfg.o
  obj-$(CONFIG_HVC_CONSOLE)      += hvconsole.o
  obj-$(CONFIG_BOOTX_TEXT)       += btext.o
  obj-$(CONFIG_HVCS)             += hvcserver.o
-obj-$(CONFIG_IBMVIO)           += vio.o
+
+vio-obj-$(CONFIG_PPC_PSERIES)  += pSeries_vio.o
+vio-obj-$(CONFIG_PPC_ISERIES)  += iSeries_vio.o
+obj-$(CONFIG_IBMVIO)           += vio.o $(vio-obj-y)
  obj-$(CONFIG_XICS)             += xics.o
  obj-$(CONFIG_MPIC)             += mpic.o
  
@@ -73,3 +76,8 @@ obj-$(CONFIG_ALTIVEC)         += vecemu.o vector.o
  obj-$(CONFIG_KPROBES)          += kprobes.o
  
  CFLAGS_ioctl32.o += -Ifs/
+
+ifeq ($(CONFIG_PPC_ISERIES),y)
+arch/ppc64/kernel/head.o: arch/ppc64/kernel/lparmap.s
+AFLAGS_head.o += -Iarch/ppc64/kernel
+endif
diff --git a/arch/ppc64/kernel/asm-offsets.c b/arch/ppc64/kernel/asm-offsets.c

index abb9e5b5da03b81aa42c88b2ad635f7d1cc00914..17e35d0fed09ff87d7eeb0d49e30b02d8e9f139d 100644 (file)
--- a/arch/ppc64/kernel/asm-offsets.c
+++ b/arch/ppc64/kernel/asm-offsets.c
@@ -94,7 +94,8 @@ int main(void)
         DEFINE(PACASLBCACHEPTR, offsetof(struct paca_struct, slb_cache_ptr));
         DEFINE(PACACONTEXTID, offsetof(struct paca_struct, context.id));
  #ifdef CONFIG_HUGETLB_PAGE
-       DEFINE(PACAHTLBSEGS, offsetof(struct paca_struct, context.htlb_segs));
+       DEFINE(PACALOWHTLBAREAS, offsetof(struct paca_struct, context.low_htlb_areas));
+       DEFINE(PACAHIGHHTLBAREAS, offsetof(struct paca_struct, context.high_htlb_areas));
  #endif /* CONFIG_HUGETLB_PAGE */
         DEFINE(PACADEFAULTDECR, offsetof(struct paca_struct, default_decr));
          DEFINE(PACA_EXGEN, offsetof(struct paca_struct, exgen));
diff --git a/arch/ppc64/kernel/cputable.c b/arch/ppc64/kernel/cputable.c

index 77cec42f9525897c960f85f86ee4793e2597cd77..4847f2ac8c9fe1d9d8de70ce428d46925699e162 100644 (file)
--- a/arch/ppc64/kernel/cputable.c
+++ b/arch/ppc64/kernel/cputable.c
@@ -5,7 +5,7 @@
   *
   *  Modifications for ppc64:
   *      Copyright (C) 2003 Dave Engebretsen <engebret@us.ibm.com>
- * 
+ *
   *  This program is free software; you can redistribute it and/or
   *  modify it under the terms of the GNU General Public License
   *  as published by the Free Software Foundation; either version
@@ -60,7 +60,6 @@ struct cpu_spec       cpu_specs[] = {
                 .icache_bsize           = 128,
                 .dcache_bsize           = 128,
                 .cpu_setup              = __setup_cpu_power3,
-               .firmware_features      = COMMON_PPC64_FW,
         },
         {       /* Power3+ */
                 .pvr_mask               = 0xffff0000,
@@ -73,7 +72,6 @@ struct cpu_spec       cpu_specs[] = {
                 .icache_bsize           = 128,
                 .dcache_bsize           = 128,
                 .cpu_setup              = __setup_cpu_power3,
-               .firmware_features      = COMMON_PPC64_FW,
         },
         {       /* Northstar */
                 .pvr_mask               = 0xffff0000,
@@ -86,7 +84,6 @@ struct cpu_spec       cpu_specs[] = {
                 .icache_bsize           = 128,
                 .dcache_bsize           = 128,
                 .cpu_setup              = __setup_cpu_power3,
-               .firmware_features      = COMMON_PPC64_FW,
         },
         {       /* Pulsar */
                 .pvr_mask               = 0xffff0000,
@@ -99,7 +96,6 @@ struct cpu_spec       cpu_specs[] = {
                 .icache_bsize           = 128,
                 .dcache_bsize           = 128,
                 .cpu_setup              = __setup_cpu_power3,
-               .firmware_features      = COMMON_PPC64_FW,
         },
         {       /* I-star */
                 .pvr_mask               = 0xffff0000,
@@ -112,7 +108,6 @@ struct cpu_spec     cpu_specs[] = {
                 .icache_bsize           = 128,
                 .dcache_bsize           = 128,
                 .cpu_setup              = __setup_cpu_power3,
-               .firmware_features      = COMMON_PPC64_FW,
         },
         {       /* S-star */
                 .pvr_mask               = 0xffff0000,
@@ -125,7 +120,6 @@ struct cpu_spec     cpu_specs[] = {
                 .icache_bsize           = 128,
                 .dcache_bsize           = 128,
                 .cpu_setup              = __setup_cpu_power3,
-               .firmware_features      = COMMON_PPC64_FW,
         },
         {       /* Power4 */
                 .pvr_mask               = 0xffff0000,
@@ -138,7 +132,6 @@ struct cpu_spec     cpu_specs[] = {
                 .icache_bsize           = 128,
                 .dcache_bsize           = 128,
                 .cpu_setup              = __setup_cpu_power4,
-               .firmware_features      = COMMON_PPC64_FW,
         },
         {       /* Power4+ */
                 .pvr_mask               = 0xffff0000,
@@ -151,7 +144,6 @@ struct cpu_spec     cpu_specs[] = {
                 .icache_bsize           = 128,
                 .dcache_bsize           = 128,
                 .cpu_setup              = __setup_cpu_power4,
-               .firmware_features      = COMMON_PPC64_FW,
         },
         {       /* PPC970 */
                 .pvr_mask               = 0xffff0000,
@@ -166,7 +158,6 @@ struct cpu_spec     cpu_specs[] = {
                 .icache_bsize           = 128,
                 .dcache_bsize           = 128,
                 .cpu_setup              = __setup_cpu_ppc970,
-               .firmware_features      = COMMON_PPC64_FW,
         },
         {       /* PPC970FX */
                 .pvr_mask               = 0xffff0000,
@@ -181,7 +172,6 @@ struct cpu_spec     cpu_specs[] = {
                 .icache_bsize           = 128,
                 .dcache_bsize           = 128,
                 .cpu_setup              = __setup_cpu_ppc970,
-               .firmware_features      = COMMON_PPC64_FW,
         },
         {       /* PPC970MP */
                 .pvr_mask               = 0xffff0000,
@@ -196,7 +186,6 @@ struct cpu_spec     cpu_specs[] = {
                 .icache_bsize           = 128,
                 .dcache_bsize           = 128,
                 .cpu_setup              = __setup_cpu_ppc970,
-               .firmware_features      = COMMON_PPC64_FW,
         },
         {       /* Power5 */
                 .pvr_mask               = 0xffff0000,
@@ -211,7 +200,6 @@ struct cpu_spec     cpu_specs[] = {
                 .icache_bsize           = 128,
                 .dcache_bsize           = 128,
                 .cpu_setup              = __setup_cpu_power4,
-               .firmware_features      = COMMON_PPC64_FW,
         },
         {       /* Power5 */
                 .pvr_mask               = 0xffff0000,
@@ -226,7 +214,6 @@ struct cpu_spec     cpu_specs[] = {
                 .icache_bsize           = 128,
                 .dcache_bsize           = 128,
                 .cpu_setup              = __setup_cpu_power4,
-               .firmware_features      = COMMON_PPC64_FW,
         },
         {       /* BE DD1.x */
                 .pvr_mask               = 0xffff0000,
@@ -241,7 +228,6 @@ struct cpu_spec     cpu_specs[] = {
                 .icache_bsize           = 128,
                 .dcache_bsize           = 128,
                 .cpu_setup              = __setup_cpu_be,
-               .firmware_features      = COMMON_PPC64_FW,
         },
         {       /* default match */
                 .pvr_mask               = 0x00000000,
@@ -254,29 +240,5 @@ struct cpu_spec    cpu_specs[] = {
                 .icache_bsize           = 128,
                 .dcache_bsize           = 128,
                 .cpu_setup              = __setup_cpu_power4,
-               .firmware_features      = COMMON_PPC64_FW,
         }
  };
-
-firmware_feature_t firmware_features_table[FIRMWARE_MAX_FEATURES] = {
-       {FW_FEATURE_PFT,                "hcall-pft"},
-       {FW_FEATURE_TCE,                "hcall-tce"},
-       {FW_FEATURE_SPRG0,              "hcall-sprg0"},
-       {FW_FEATURE_DABR,               "hcall-dabr"},
-       {FW_FEATURE_COPY,               "hcall-copy"},
-       {FW_FEATURE_ASR,                "hcall-asr"},
-       {FW_FEATURE_DEBUG,              "hcall-debug"},
-       {FW_FEATURE_PERF,               "hcall-perf"},
-       {FW_FEATURE_DUMP,               "hcall-dump"},
-       {FW_FEATURE_INTERRUPT,          "hcall-interrupt"},
-       {FW_FEATURE_MIGRATE,            "hcall-migrate"},
-       {FW_FEATURE_PERFMON,            "hcall-perfmon"},
-       {FW_FEATURE_CRQ,                "hcall-crq"},
-       {FW_FEATURE_VIO,                "hcall-vio"},
-       {FW_FEATURE_RDMA,               "hcall-rdma"},
-       {FW_FEATURE_LLAN,               "hcall-lLAN"},
-       {FW_FEATURE_BULK,               "hcall-bulk"},
-       {FW_FEATURE_XDABR,              "hcall-xdabr"},
-       {FW_FEATURE_MULTITCE,           "hcall-multi-tce"},
-       {FW_FEATURE_SPLPAR,             "hcall-splpar"},
-};
diff --git a/arch/ppc64/kernel/firmware.c b/arch/ppc64/kernel/firmware.c

new file mode 100644 (file)

index 0000000..d8432c0
--- /dev/null
+++ b/arch/ppc64/kernel/firmware.c
@@ -0,0 +1,47 @@
+/*
+ *  arch/ppc64/kernel/firmware.c
+ *
+ *  Extracted from cputable.c
+ *
+ *  Copyright (C) 2001 Ben. Herrenschmidt (benh@kernel.crashing.org)
+ *
+ *  Modifications for ppc64:
+ *      Copyright (C) 2003 Dave Engebretsen <engebret@us.ibm.com>
+ *  Copyright (C) 2005 Stephen Rothwell, IBM Corporation
+ *
+ *  This program is free software; you can redistribute it and/or
+ *  modify it under the terms of the GNU General Public License
+ *  as published by the Free Software Foundation; either version
+ *  2 of the License, or (at your option) any later version.
+ */
+
+#include <linux/config.h>
+
+#include <asm/firmware.h>
+
+unsigned long ppc64_firmware_features;
+
+#ifdef CONFIG_PPC_PSERIES
+firmware_feature_t firmware_features_table[FIRMWARE_MAX_FEATURES] = {
+       {FW_FEATURE_PFT,                "hcall-pft"},
+       {FW_FEATURE_TCE,                "hcall-tce"},
+       {FW_FEATURE_SPRG0,              "hcall-sprg0"},
+       {FW_FEATURE_DABR,               "hcall-dabr"},
+       {FW_FEATURE_COPY,               "hcall-copy"},
+       {FW_FEATURE_ASR,                "hcall-asr"},
+       {FW_FEATURE_DEBUG,              "hcall-debug"},
+       {FW_FEATURE_PERF,               "hcall-perf"},
+       {FW_FEATURE_DUMP,               "hcall-dump"},
+       {FW_FEATURE_INTERRUPT,          "hcall-interrupt"},
+       {FW_FEATURE_MIGRATE,            "hcall-migrate"},
+       {FW_FEATURE_PERFMON,            "hcall-perfmon"},
+       {FW_FEATURE_CRQ,                "hcall-crq"},
+       {FW_FEATURE_VIO,                "hcall-vio"},
+       {FW_FEATURE_RDMA,               "hcall-rdma"},
+       {FW_FEATURE_LLAN,               "hcall-lLAN"},
+       {FW_FEATURE_BULK,               "hcall-bulk"},
+       {FW_FEATURE_XDABR,              "hcall-xdabr"},
+       {FW_FEATURE_MULTITCE,           "hcall-multi-tce"},
+       {FW_FEATURE_SPLPAR,             "hcall-splpar"},
+};
+#endif
diff --git a/arch/ppc64/kernel/head.S b/arch/ppc64/kernel/head.S

index 74fc3bc68604715e23929a7b33457c280688f0b6..036959775623759b4fbd939dca7fae590b160a2f 100644 (file)
--- a/arch/ppc64/kernel/head.S
+++ b/arch/ppc64/kernel/head.S
@@ -23,14 +23,11 @@
   *  2 of the License, or (at your option) any later version.
   */
  
-#define SECONDARY_PROCESSORS
-
  #include <linux/config.h>
  #include <linux/threads.h>
  #include <asm/processor.h>
  #include <asm/page.h>
  #include <asm/mmu.h>
-#include <asm/naca.h>
  #include <asm/systemcfg.h>
  #include <asm/ppc_asm.h>
  #include <asm/offsets.h>
@@ -38,24 +35,20 @@
  #include <asm/cputable.h>
  #include <asm/setup.h>
  #include <asm/hvcall.h>
+#include <asm/iSeries/LparMap.h>
  
  #ifdef CONFIG_PPC_ISERIES
  #define DO_SOFT_DISABLE
  #endif
  
-/*
- * hcall interface to pSeries LPAR
- */
-#define H_SET_ASR      0x30
-
  /*
   * We layout physical memory as follows:
   * 0x0000 - 0x00ff : Secondary processor spin code
   * 0x0100 - 0x2fff : pSeries Interrupt prologs
- * 0x3000 - 0x3fff : Interrupt support
- * 0x4000 - 0x4fff : NACA
- * 0x6000         : iSeries and common interrupt prologs
- * 0x9000 - 0x9fff : Initial segment table
+ * 0x3000 - 0x5fff : interrupt support, iSeries and common interrupt prologs
+ * 0x6000 - 0x6fff : Initial (CPU0) segment table
+ * 0x7000 - 0x7fff : FWNMI data area
+ * 0x8000 -        : Early init and support code
   */
  
  /*
@@ -93,6 +86,7 @@ END_FTR_SECTION(0, 1)
  
         /* Catch branch to 0 in real mode */
         trap
+
  #ifdef CONFIG_PPC_ISERIES
         /*
          * At offset 0x20, there is a pointer to iSeries LPAR data.
@@ -102,12 +96,12 @@ END_FTR_SECTION(0, 1)
         .llong hvReleaseData-KERNELBASE
  
         /*
-        * At offset 0x28 and 0x30 are offsets to the msChunks
+        * At offset 0x28 and 0x30 are offsets to the mschunks_map
          * array (used by the iSeries LPAR debugger to do translation
          * between physical addresses and absolute addresses) and
          * to the pidhash table (also used by the debugger)
          */
-       .llong msChunks-KERNELBASE
+       .llong mschunks_map-KERNELBASE
         .llong 0        /* pidhash-KERNELBASE SFRXXX */
  
         /* Offset 0x38 - Pointer to start of embedded System.map */
@@ -119,7 +113,7 @@ embedded_sysmap_start:
  embedded_sysmap_end:
         .llong  0
  
-#else /* CONFIG_PPC_ISERIES */
+#endif /* CONFIG_PPC_ISERIES */
  
         /* Secondary processors spin on this value until it goes to 1. */
         .globl  __secondary_hold_spinloop
@@ -154,7 +148,7 @@ _GLOBAL(__secondary_hold)
         std     r24,__secondary_hold_acknowledge@l(0)
         sync
  
-       /* All secondary cpu's wait here until told to start. */
+       /* All secondary cpus wait here until told to start. */
  100:   ld      r4,__secondary_hold_spinloop@l(0)
         cmpdi   0,r4,1
         bne     100b
@@ -169,7 +163,6 @@ _GLOBAL(__secondary_hold)
         BUG_OPCODE
  #endif
  #endif
-#endif
  
  /* This value is used to mark exception frames on the stack. */
         .section ".toc","aw"
@@ -501,33 +494,37 @@ system_call_pSeries:
         STD_EXCEPTION_PSERIES(0x1300, instruction_breakpoint)
         STD_EXCEPTION_PSERIES(0x1700, altivec_assist)
  
+       . = 0x3000
+
+/*** pSeries interrupt support ***/
+
         /* moved from 0xf00 */
-       STD_EXCEPTION_PSERIES(0x3000, performance_monitor)
+       STD_EXCEPTION_PSERIES(., performance_monitor)
  
-       . = 0x3100
+       .align  7
  _GLOBAL(do_stab_bolted_pSeries)
         mtcrf   0x80,r12
         mfspr   r12,SPRG2
         EXCEPTION_PROLOG_PSERIES(PACA_EXSLB, .do_stab_bolted)
  
-       
-       /* Space for the naca.  Architected to be located at real address
-        * NACA_PHYS_ADDR.  Various tools rely on this location being fixed.
-        * The first dword of the naca is required by iSeries LPAR to
-        * point to itVpdAreas.  On pSeries native, this value is not used.
-        */
-       . = NACA_PHYS_ADDR
-       .globl __end_interrupts
-__end_interrupts:
-#ifdef CONFIG_PPC_ISERIES
-       .globl naca
-naca:
-       .llong  itVpdAreas
-       .llong  0               /* xRamDisk */
-       .llong  0               /* xRamDiskSize */
+/*
+ * Vectors for the FWNMI option.  Share common code.
+ */
+      .globl system_reset_fwnmi
+system_reset_fwnmi:
+      HMT_MEDIUM
+      mtspr   SPRG1,r13               /* save r13 */
+      RUNLATCH_ON(r13)
+      EXCEPTION_PROLOG_PSERIES(PACA_EXGEN, system_reset_common)
  
-       . = 0x6100
+      .globl machine_check_fwnmi
+machine_check_fwnmi:
+      HMT_MEDIUM
+      mtspr   SPRG1,r13               /* save r13 */
+      RUNLATCH_ON(r13)
+      EXCEPTION_PROLOG_PSERIES(PACA_EXMC, machine_check_common)
  
+#ifdef CONFIG_PPC_ISERIES
  /***  ISeries-LPAR interrupt handlers ***/
  
         STD_EXCEPTION_ISERIES(0x200, machine_check, PACA_EXMC)
@@ -625,9 +622,7 @@ system_reset_iSeries:
  
         cmpwi   0,r23,0
         beq     iSeries_secondary_smp_loop      /* Loop until told to go */
-#ifdef SECONDARY_PROCESSORS
         bne     .__secondary_start              /* Loop until told to go */
-#endif
  iSeries_secondary_smp_loop:
         /* Let the Hypervisor know we are alive */
         /* 8002 is a call to HvCallCfg::getLps, a harmless Hypervisor function */
@@ -670,45 +665,7 @@ hardware_interrupt_iSeries_masked:
         ld      r13,PACA_EXGEN+EX_R13(r13)
         rfid
         b       .       /* prevent speculative execution */
-#endif
-
-/*
- * Data area reserved for FWNMI option.
- */
-       .= 0x7000
-       .globl fwnmi_data_area
-fwnmi_data_area:
-
-/*
- * Vectors for the FWNMI option.  Share common code.
- */
-       . = 0x8000
-       .globl system_reset_fwnmi
-system_reset_fwnmi:
-       HMT_MEDIUM
-       mtspr   SPRG1,r13               /* save r13 */
-       RUNLATCH_ON(r13)
-       EXCEPTION_PROLOG_PSERIES(PACA_EXGEN, system_reset_common)
-       .globl machine_check_fwnmi
-machine_check_fwnmi:
-       HMT_MEDIUM
-       mtspr   SPRG1,r13               /* save r13 */
-       RUNLATCH_ON(r13)
-       EXCEPTION_PROLOG_PSERIES(PACA_EXMC, machine_check_common)
-
-       /*
-        * Space for the initial segment table
-        * For LPAR, the hypervisor must fill in at least one entry
-        * before we get control (with relocate on)
-        */
-       . = STAB0_PHYS_ADDR
-       .globl __start_stab
-__start_stab:
-
-       . = (STAB0_PHYS_ADDR + PAGE_SIZE)
-       .globl __end_stab
-__end_stab:
-
+#endif /* CONFIG_PPC_ISERIES */
  
  /*** Common interrupt handlers ***/
  
@@ -746,8 +703,8 @@ machine_check_common:
   * R9 contains the saved CR, r13 points to the paca,
   * r10 contains the (bad) kernel stack pointer,
   * r11 and r12 contain the saved SRR0 and SRR1.
- * We switch to using the paca guard page as an emergency stack,
- * save the registers there, and call kernel_bad_stack(), which panics.
+ * We switch to using an emergency stack, save the registers there,
+ * and call kernel_bad_stack(), which panics.
   */
  bad_stack:
         ld      r1,PACAEMERGSP(r13)
@@ -900,6 +857,62 @@ fp_unavailable_common:
         bl      .kernel_fp_unavailable_exception
         BUG_OPCODE
  
+/*
+ * load_up_fpu(unused, unused, tsk)
+ * Disable FP for the task which had the FPU previously,
+ * and save its floating-point registers in its thread_struct.
+ * Enables the FPU for use in the kernel on return.
+ * On SMP we know the fpu is free, since we give it up every
+ * switch (ie, no lazy save of the FP registers).
+ * On entry: r13 == 'current' && last_task_used_math != 'current'
+ */
+_STATIC(load_up_fpu)
+       mfmsr   r5                      /* grab the current MSR */
+       ori     r5,r5,MSR_FP
+       mtmsrd  r5                      /* enable use of fpu now */
+       isync
+/*
+ * For SMP, we don't do lazy FPU switching because it just gets too
+ * horrendously complex, especially when a task switches from one CPU
+ * to another.  Instead we call giveup_fpu in switch_to.
+ *
+ */
+#ifndef CONFIG_SMP
+       ld      r3,last_task_used_math@got(r2)
+       ld      r4,0(r3)
+       cmpdi   0,r4,0
+       beq     1f
+       /* Save FP state to last_task_used_math's THREAD struct */
+       addi    r4,r4,THREAD
+       SAVE_32FPRS(0, r4)
+       mffs    fr0
+       stfd    fr0,THREAD_FPSCR(r4)
+       /* Disable FP for last_task_used_math */
+       ld      r5,PT_REGS(r4)
+       ld      r4,_MSR-STACK_FRAME_OVERHEAD(r5)
+       li      r6,MSR_FP|MSR_FE0|MSR_FE1
+       andc    r4,r4,r6
+       std     r4,_MSR-STACK_FRAME_OVERHEAD(r5)
+1:
+#endif /* CONFIG_SMP */
+       /* enable use of FP after return */
+       ld      r4,PACACURRENT(r13)
+       addi    r5,r4,THREAD            /* Get THREAD */
+       ld      r4,THREAD_FPEXC_MODE(r5)
+       ori     r12,r12,MSR_FP
+       or      r12,r12,r4
+       std     r12,_MSR(r1)
+       lfd     fr0,THREAD_FPSCR(r5)
+       mtfsf   0xff,fr0
+       REST_32FPRS(0, r5)
+#ifndef CONFIG_SMP
+       /* Update last_task_used_math to 'current' */
+       subi    r4,r5,THREAD            /* Back to 'current' */
+       std     r4,0(r3)
+#endif /* CONFIG_SMP */
+       /* restore registers and return */
+       b       fast_exception_return
+
         .align  7
         .globl altivec_unavailable_common
  altivec_unavailable_common:
@@ -915,6 +928,80 @@ END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC)
         bl      .altivec_unavailable_exception
         b       .ret_from_except
  
+#ifdef CONFIG_ALTIVEC
+/*
+ * load_up_altivec(unused, unused, tsk)
+ * Disable VMX for the task which had it previously,
+ * and save its vector registers in its thread_struct.
+ * Enables the VMX for use in the kernel on return.
+ * On SMP we know the VMX is free, since we give it up every
+ * switch (ie, no lazy save of the vector registers).
+ * On entry: r13 == 'current' && last_task_used_altivec != 'current'
+ */
+_STATIC(load_up_altivec)
+       mfmsr   r5                      /* grab the current MSR */
+       oris    r5,r5,MSR_VEC@h
+       mtmsrd  r5                      /* enable use of VMX now */
+       isync
+
+/*
+ * For SMP, we don't do lazy VMX switching because it just gets too
+ * horrendously complex, especially when a task switches from one CPU
+ * to another.  Instead we call giveup_altvec in switch_to.
+ * VRSAVE isn't dealt with here, that is done in the normal context
+ * switch code. Note that we could rely on vrsave value to eventually
+ * avoid saving all of the VREGs here...
+ */
+#ifndef CONFIG_SMP
+       ld      r3,last_task_used_altivec@got(r2)
+       ld      r4,0(r3)
+       cmpdi   0,r4,0
+       beq     1f
+       /* Save VMX state to last_task_used_altivec's THREAD struct */
+       addi    r4,r4,THREAD
+       SAVE_32VRS(0,r5,r4)
+       mfvscr  vr0
+       li      r10,THREAD_VSCR
+       stvx    vr0,r10,r4
+       /* Disable VMX for last_task_used_altivec */
+       ld      r5,PT_REGS(r4)
+       ld      r4,_MSR-STACK_FRAME_OVERHEAD(r5)
+       lis     r6,MSR_VEC@h
+       andc    r4,r4,r6
+       std     r4,_MSR-STACK_FRAME_OVERHEAD(r5)
+1:
+#endif /* CONFIG_SMP */
+       /* Hack: if we get an altivec unavailable trap with VRSAVE
+        * set to all zeros, we assume this is a broken application
+        * that fails to set it properly, and thus we switch it to
+        * all 1's
+        */
+       mfspr   r4,SPRN_VRSAVE
+       cmpdi   0,r4,0
+       bne+    1f
+       li      r4,-1
+       mtspr   SPRN_VRSAVE,r4
+1:
+       /* enable use of VMX after return */
+       ld      r4,PACACURRENT(r13)
+       addi    r5,r4,THREAD            /* Get THREAD */
+       oris    r12,r12,MSR_VEC@h
+       std     r12,_MSR(r1)
+       li      r4,1
+       li      r10,THREAD_VSCR
+       stw     r4,THREAD_USED_VR(r5)
+       lvx     vr0,r10,r5
+       mtvscr  vr0
+       REST_32VRS(0,r4,r5)
+#ifndef CONFIG_SMP
+       /* Update last_task_used_math to 'current' */
+       subi    r4,r5,THREAD            /* Back to 'current' */
+       std     r4,0(r3)
+#endif /* CONFIG_SMP */
+       /* restore registers and return */
+       b       fast_exception_return
+#endif /* CONFIG_ALTIVEC */
+
  /*
   * Hash table stuff
   */
@@ -1161,6 +1248,42 @@ unrecov_slb:
         bl      .unrecoverable_exception
         b       1b
  
+/*
+ * Space for CPU0's segment table.
+ *
+ * On iSeries, the hypervisor must fill in at least one entry before
+ * we get control (with relocate on).  The address is give to the hv
+ * as a page number (see xLparMap in LparData.c), so this must be at a
+ * fixed address (the linker can't compute (u64)&initial_stab >>
+ * PAGE_SHIFT).
+ */
+       . = STAB0_PHYS_ADDR     /* 0x6000 */
+       .globl initial_stab
+initial_stab:
+       .space  4096
+
+/*
+ * Data area reserved for FWNMI option.
+ * This address (0x7000) is fixed by the RPA.
+ */
+       .= 0x7000
+       .globl fwnmi_data_area
+fwnmi_data_area:
+
+       /* iSeries does not use the FWNMI stuff, so it is safe to put
+        * this here, even if we later allow kernels that will boot on
+        * both pSeries and iSeries */
+#ifdef CONFIG_PPC_ISERIES
+        . = LPARMAP_PHYS
+#include "lparmap.s"
+/*
+ * This ".text" is here for old compilers that generate a trailing
+ * .note section when compiling .c files to .s
+ */
+       .text
+#endif /* CONFIG_PPC_ISERIES */
+
+        . = 0x8000
  
  /*
   * On pSeries, secondary processors spin in the following code.
@@ -1194,7 +1317,7 @@ _GLOBAL(pSeries_secondary_smp_init)
         b       .kexec_wait             /* next kernel might do better   */
  
  2:     mtspr   SPRG3,r13               /* Save vaddr of paca in SPRG3   */
-       /* From now on, r24 is expected to be logica cpuid */
+       /* From now on, r24 is expected to be logical cpuid */
         mr      r24,r5
  3:     HMT_LOW
         lbz     r23,PACAPROCSTART(r13)  /* Test if this processor should */
@@ -1207,9 +1330,7 @@ _GLOBAL(pSeries_secondary_smp_init)
  
         cmpwi   0,r23,0
  #ifdef CONFIG_SMP
-#ifdef SECONDARY_PROCESSORS
         bne     .__secondary_start
-#endif
  #endif
         b       3b                      /* Loop until told to go         */
  
@@ -1424,228 +1545,6 @@ _GLOBAL(copy_and_flush)
  .align 8
  copy_to_here:
  
-/*
- * load_up_fpu(unused, unused, tsk)
- * Disable FP for the task which had the FPU previously,
- * and save its floating-point registers in its thread_struct.
- * Enables the FPU for use in the kernel on return.
- * On SMP we know the fpu is free, since we give it up every
- * switch (ie, no lazy save of the FP registers).
- * On entry: r13 == 'current' && last_task_used_math != 'current'
- */
-_STATIC(load_up_fpu)
-       mfmsr   r5                      /* grab the current MSR */
-       ori     r5,r5,MSR_FP
-       mtmsrd  r5                      /* enable use of fpu now */
-       isync
-/*
- * For SMP, we don't do lazy FPU switching because it just gets too
- * horrendously complex, especially when a task switches from one CPU
- * to another.  Instead we call giveup_fpu in switch_to.
- *
- */
-#ifndef CONFIG_SMP
-       ld      r3,last_task_used_math@got(r2)
-       ld      r4,0(r3)
-       cmpdi   0,r4,0
-       beq     1f
-       /* Save FP state to last_task_used_math's THREAD struct */
-       addi    r4,r4,THREAD
-       SAVE_32FPRS(0, r4)
-       mffs    fr0
-       stfd    fr0,THREAD_FPSCR(r4)
-       /* Disable FP for last_task_used_math */
-       ld      r5,PT_REGS(r4)
-       ld      r4,_MSR-STACK_FRAME_OVERHEAD(r5)
-       li      r6,MSR_FP|MSR_FE0|MSR_FE1
-       andc    r4,r4,r6
-       std     r4,_MSR-STACK_FRAME_OVERHEAD(r5)
-1:
-#endif /* CONFIG_SMP */
-       /* enable use of FP after return */
-       ld      r4,PACACURRENT(r13)
-       addi    r5,r4,THREAD            /* Get THREAD */
-       ld      r4,THREAD_FPEXC_MODE(r5)
-       ori     r12,r12,MSR_FP
-       or      r12,r12,r4
-       std     r12,_MSR(r1)
-       lfd     fr0,THREAD_FPSCR(r5)
-       mtfsf   0xff,fr0
-       REST_32FPRS(0, r5)
-#ifndef CONFIG_SMP
-       /* Update last_task_used_math to 'current' */
-       subi    r4,r5,THREAD            /* Back to 'current' */
-       std     r4,0(r3)
-#endif /* CONFIG_SMP */
-       /* restore registers and return */
-       b       fast_exception_return
-
-/*
- * disable_kernel_fp()
- * Disable the FPU.
- */
-_GLOBAL(disable_kernel_fp)
-       mfmsr   r3
-       rldicl  r0,r3,(63-MSR_FP_LG),1
-       rldicl  r3,r0,(MSR_FP_LG+1),0
-       mtmsrd  r3                      /* disable use of fpu now */
-       isync
-       blr
-
-/*
- * giveup_fpu(tsk)
- * Disable FP for the task given as the argument,
- * and save the floating-point registers in its thread_struct.
- * Enables the FPU for use in the kernel on return.
- */
-_GLOBAL(giveup_fpu)
-       mfmsr   r5
-       ori     r5,r5,MSR_FP
-       mtmsrd  r5                      /* enable use of fpu now */
-       isync
-       cmpdi   0,r3,0
-       beqlr-                          /* if no previous owner, done */
-       addi    r3,r3,THREAD            /* want THREAD of task */
-       ld      r5,PT_REGS(r3)
-       cmpdi   0,r5,0
-       SAVE_32FPRS(0, r3)
-       mffs    fr0
-       stfd    fr0,THREAD_FPSCR(r3)
-       beq     1f
-       ld      r4,_MSR-STACK_FRAME_OVERHEAD(r5)
-       li      r3,MSR_FP|MSR_FE0|MSR_FE1
-       andc    r4,r4,r3                /* disable FP for previous task */
-       std     r4,_MSR-STACK_FRAME_OVERHEAD(r5)
-1:
-#ifndef CONFIG_SMP
-       li      r5,0
-       ld      r4,last_task_used_math@got(r2)
-       std     r5,0(r4)
-#endif /* CONFIG_SMP */
-       blr
-
-
-#ifdef CONFIG_ALTIVEC
-               
-/*
- * load_up_altivec(unused, unused, tsk)
- * Disable VMX for the task which had it previously,
- * and save its vector registers in its thread_struct.
- * Enables the VMX for use in the kernel on return.
- * On SMP we know the VMX is free, since we give it up every
- * switch (ie, no lazy save of the vector registers).
- * On entry: r13 == 'current' && last_task_used_altivec != 'current'
- */
-_STATIC(load_up_altivec)
-       mfmsr   r5                      /* grab the current MSR */
-       oris    r5,r5,MSR_VEC@h
-       mtmsrd  r5                      /* enable use of VMX now */
-       isync
-       
-/*
- * For SMP, we don't do lazy VMX switching because it just gets too
- * horrendously complex, especially when a task switches from one CPU
- * to another.  Instead we call giveup_altvec in switch_to.
- * VRSAVE isn't dealt with here, that is done in the normal context
- * switch code. Note that we could rely on vrsave value to eventually
- * avoid saving all of the VREGs here...
- */
-#ifndef CONFIG_SMP
-       ld      r3,last_task_used_altivec@got(r2)
-       ld      r4,0(r3)
-       cmpdi   0,r4,0
-       beq     1f
-       /* Save VMX state to last_task_used_altivec's THREAD struct */
-       addi    r4,r4,THREAD
-       SAVE_32VRS(0,r5,r4)
-       mfvscr  vr0
-       li      r10,THREAD_VSCR
-       stvx    vr0,r10,r4
-       /* Disable VMX for last_task_used_altivec */
-       ld      r5,PT_REGS(r4)
-       ld      r4,_MSR-STACK_FRAME_OVERHEAD(r5)
-       lis     r6,MSR_VEC@h
-       andc    r4,r4,r6
-       std     r4,_MSR-STACK_FRAME_OVERHEAD(r5)
-1:
-#endif /* CONFIG_SMP */
-       /* Hack: if we get an altivec unavailable trap with VRSAVE
-        * set to all zeros, we assume this is a broken application
-        * that fails to set it properly, and thus we switch it to
-        * all 1's
-        */
-       mfspr   r4,SPRN_VRSAVE
-       cmpdi   0,r4,0
-       bne+    1f
-       li      r4,-1
-       mtspr   SPRN_VRSAVE,r4
-1:
-       /* enable use of VMX after return */
-       ld      r4,PACACURRENT(r13)
-       addi    r5,r4,THREAD            /* Get THREAD */
-       oris    r12,r12,MSR_VEC@h
-       std     r12,_MSR(r1)
-       li      r4,1
-       li      r10,THREAD_VSCR
-       stw     r4,THREAD_USED_VR(r5)
-       lvx     vr0,r10,r5
-       mtvscr  vr0
-       REST_32VRS(0,r4,r5)
-#ifndef CONFIG_SMP
-       /* Update last_task_used_math to 'current' */
-       subi    r4,r5,THREAD            /* Back to 'current' */
-       std     r4,0(r3)
-#endif /* CONFIG_SMP */
-       /* restore registers and return */
-       b       fast_exception_return
-
-/*
- * disable_kernel_altivec()
- * Disable the VMX.
- */
-_GLOBAL(disable_kernel_altivec)
-       mfmsr   r3
-       rldicl  r0,r3,(63-MSR_VEC_LG),1
-       rldicl  r3,r0,(MSR_VEC_LG+1),0
-       mtmsrd  r3                      /* disable use of VMX now */
-       isync
-       blr
-
-/*
- * giveup_altivec(tsk)
- * Disable VMX for the task given as the argument,
- * and save the vector registers in its thread_struct.
- * Enables the VMX for use in the kernel on return.
- */
-_GLOBAL(giveup_altivec)
-       mfmsr   r5
-       oris    r5,r5,MSR_VEC@h
-       mtmsrd  r5                      /* enable use of VMX now */
-       isync
-       cmpdi   0,r3,0
-       beqlr-                          /* if no previous owner, done */
-       addi    r3,r3,THREAD            /* want THREAD of task */
-       ld      r5,PT_REGS(r3)
-       cmpdi   0,r5,0
-       SAVE_32VRS(0,r4,r3)
-       mfvscr  vr0
-       li      r4,THREAD_VSCR
-       stvx    vr0,r4,r3
-       beq     1f
-       ld      r4,_MSR-STACK_FRAME_OVERHEAD(r5)
-       lis     r3,MSR_VEC@h
-       andc    r4,r4,r3                /* disable FP for previous task */
-       std     r4,_MSR-STACK_FRAME_OVERHEAD(r5)
-1:
-#ifndef CONFIG_SMP
-       li      r5,0
-       ld      r4,last_task_used_altivec@got(r2)
-       std     r5,0(r4)
-#endif /* CONFIG_SMP */
-       blr
-
-#endif /* CONFIG_ALTIVEC */
-
  #ifdef CONFIG_SMP
  #ifdef CONFIG_PPC_PMAC
  /*
@@ -1996,9 +1895,6 @@ _STATIC(start_here_common)
  
         bl .start_kernel
  
-_GLOBAL(__setup_cpu_power3)
-       blr
-
  _GLOBAL(hmt_init)
  #ifdef CONFIG_HMT
         LOADADDR(r5, hmt_thread_data)
@@ -2071,7 +1967,7 @@ _GLOBAL(hmt_start_secondary)
         blr
  #endif
  
-#if defined(CONFIG_SMP) && !defined(CONFIG_PPC_ISERIES)
+#if defined(CONFIG_KEXEC) || (defined(CONFIG_SMP) && !defined(CONFIG_PPC_ISERIES))
  _GLOBAL(smp_release_cpus)
         /* All secondary cpus are spinning on a common
          * spinloop, release them all now so they can start
@@ -2089,20 +1985,19 @@ _GLOBAL(smp_release_cpus)
  
  /*
   * We put a few things here that have to be page-aligned.
- * This stuff goes at the beginning of the data segment,
- * which is page-aligned.
+ * This stuff goes at the beginning of the bss, which is page-aligned.
   */
-       .data
+       .section ".bss"
+
         .align  12
-       .globl  sdata
-sdata:
+
         .globl  empty_zero_page
  empty_zero_page:
-       .space  4096
+       .space  PAGE_SIZE
  
         .globl  swapper_pg_dir
  swapper_pg_dir:
-       .space  4096
+       .space  PAGE_SIZE
  
  /*
   * This space gets a copy of optional info passed to us by the bootstrap
diff --git a/arch/ppc64/kernel/iSeries_htab.c b/arch/ppc64/kernel/iSeries_htab.c

index b0250ae4a72a5954f77bc21fb600080aea83723a..2192055a90a07ba3f34f074e70cb75225d124fdb 100644 (file)
--- a/arch/ppc64/kernel/iSeries_htab.c
+++ b/arch/ppc64/kernel/iSeries_htab.c
@@ -41,6 +41,7 @@ static long iSeries_hpte_insert(unsigned long hpte_group, unsigned long va,
                                 unsigned long prpn, unsigned long vflags,
                                 unsigned long rflags)
  {
+       unsigned long arpn;
         long slot;
         hpte_t lhpte;
         int secondary = 0;
@@ -70,8 +71,10 @@ static long iSeries_hpte_insert(unsigned long hpte_group, unsigned long va,
                 slot &= 0x7fffffffffffffff;
         }
  
+       arpn = phys_to_abs(prpn << PAGE_SHIFT) >> PAGE_SHIFT;
+
         lhpte.v = (va >> 23) << HPTE_V_AVPN_SHIFT | vflags | HPTE_V_VALID;
-       lhpte.r = (physRpn_to_absRpn(prpn) << HPTE_R_RPN_SHIFT) | rflags;
+       lhpte.r = (arpn << HPTE_R_RPN_SHIFT) | rflags;
  
         /* Now fill in the actual HPTE */
         HvCallHpt_addValidate(slot, secondary, &lhpte);
diff --git a/arch/ppc64/kernel/iSeries_setup.c b/arch/ppc64/kernel/iSeries_setup.c

index a649edbb23b6605aabe1a981fde2d36649229696..3ffefbbc6623a9e27e262691f5cb877bad32feb2 100644 (file)
--- a/arch/ppc64/kernel/iSeries_setup.c
+++ b/arch/ppc64/kernel/iSeries_setup.c
@@ -39,6 +39,7 @@
  #include <asm/cputable.h>
  #include <asm/sections.h>
  #include <asm/iommu.h>
+#include <asm/firmware.h>
  
  #include <asm/time.h>
  #include "iSeries_setup.h"
@@ -314,6 +315,8 @@ static void __init iSeries_init_early(void)
  
         DBG(" -> iSeries_init_early()\n");
  
+       ppc64_firmware_features = FW_FEATURE_ISERIES;
+
         ppcdbg_initialize();
  
  #if defined(CONFIG_BLK_DEV_INITRD)
@@ -412,6 +415,22 @@ static void __init iSeries_init_early(void)
         DBG(" <- iSeries_init_early()\n");
  }
  
+struct mschunks_map mschunks_map = {
+       /* XXX We don't use these, but Piranha might need them. */
+       .chunk_size  = MSCHUNKS_CHUNK_SIZE,
+       .chunk_shift = MSCHUNKS_CHUNK_SHIFT,
+       .chunk_mask  = MSCHUNKS_OFFSET_MASK,
+};
+EXPORT_SYMBOL(mschunks_map);
+
+void mschunks_alloc(unsigned long num_chunks)
+{
+       klimit = _ALIGN(klimit, sizeof(u32));
+       mschunks_map.mapping = (u32 *)klimit;
+       klimit += num_chunks * sizeof(u32);
+       mschunks_map.num_chunks = num_chunks;
+}
+
  /*
   * The iSeries may have very large memories ( > 128 GB ) and a partition
   * may get memory in "chunks" that may be anywhere in the 2**52 real
@@ -449,7 +468,7 @@ static void __init build_iSeries_Memory_Map(void)
  
         /* Chunk size on iSeries is 256K bytes */
         totalChunks = (u32)HvLpConfig_getMsChunks();
-       klimit = msChunks_alloc(klimit, totalChunks, 1UL << 18);
+       mschunks_alloc(totalChunks);
  
         /*
          * Get absolute address of our load area
@@ -486,7 +505,7 @@ static void __init build_iSeries_Memory_Map(void)
         printk("Load area size %dK\n", loadAreaSize * 256);
  
         for (nextPhysChunk = 0; nextPhysChunk < loadAreaSize; ++nextPhysChunk)
-               msChunks.abs[nextPhysChunk] =
+               mschunks_map.mapping[nextPhysChunk] =
                         loadAreaFirstChunk + nextPhysChunk;
  
         /*
@@ -495,7 +514,7 @@ static void __init build_iSeries_Memory_Map(void)
          */
         hptFirstChunk = (u32)addr_to_chunk(HvCallHpt_getHptAddress());
         hptSizePages = (u32)HvCallHpt_getHptPages();
-       hptSizeChunks = hptSizePages >> (msChunks.chunk_shift - PAGE_SHIFT);
+       hptSizeChunks = hptSizePages >> (MSCHUNKS_CHUNK_SHIFT - PAGE_SHIFT);
         hptLastChunk = hptFirstChunk + hptSizeChunks - 1;
  
         printk("HPT absolute addr = %016lx, size = %dK\n",
@@ -552,7 +571,8 @@ static void __init build_iSeries_Memory_Map(void)
                                      (absChunk > hptLastChunk)) &&
                                     ((absChunk < loadAreaFirstChunk) ||
                                      (absChunk > loadAreaLastChunk))) {
-                                       msChunks.abs[nextPhysChunk] = absChunk;
+                                       mschunks_map.mapping[nextPhysChunk] =
+                                               absChunk;
                                         ++nextPhysChunk;
                                 }
                         }
@@ -944,6 +964,8 @@ void __init iSeries_early_setup(void)
         ppc_md.calibrate_decr = iSeries_calibrate_decr;
         ppc_md.progress = iSeries_progress;
  
+       /* XXX Implement enable_pmcs for iSeries */
+
         if (get_paca()->lppaca.shared_proc) {
                 ppc_md.idle_loop = iseries_shared_idle;
                 printk(KERN_INFO "Using shared processor idle loop\n");
diff --git a/arch/ppc64/kernel/iSeries_vio.c b/arch/ppc64/kernel/iSeries_vio.c

new file mode 100644 (file)

index 0000000..6b754b0
--- /dev/null
+++ b/arch/ppc64/kernel/iSeries_vio.c
@@ -0,0 +1,155 @@
+/*
+ * IBM PowerPC iSeries Virtual I/O Infrastructure Support.
+ *
+ *    Copyright (c) 2005 Stephen Rothwell, IBM Corp.
+ *
+ *      This program is free software; you can redistribute it and/or
+ *      modify it under the terms of the GNU General Public License
+ *      as published by the Free Software Foundation; either version
+ *      2 of the License, or (at your option) any later version.
+ */
+#include <linux/types.h>
+#include <linux/device.h>
+#include <linux/init.h>
+
+#include <asm/vio.h>
+#include <asm/iommu.h>
+#include <asm/abs_addr.h>
+#include <asm/page.h>
+#include <asm/iSeries/vio.h>
+#include <asm/iSeries/HvTypes.h>
+#include <asm/iSeries/HvLpConfig.h>
+#include <asm/iSeries/HvCallXm.h>
+
+struct device *iSeries_vio_dev = &vio_bus_device.dev;
+EXPORT_SYMBOL(iSeries_vio_dev);
+
+static struct iommu_table veth_iommu_table;
+static struct iommu_table vio_iommu_table;
+
+static void __init iommu_vio_init(void)
+{
+       struct iommu_table *t;
+       struct iommu_table_cb cb;
+       unsigned long cbp;
+       unsigned long itc_entries;
+
+       cb.itc_busno = 255;    /* Bus 255 is the virtual bus */
+       cb.itc_virtbus = 0xff; /* Ask for virtual bus */
+
+       cbp = virt_to_abs(&cb);
+       HvCallXm_getTceTableParms(cbp);
+
+       itc_entries = cb.itc_size * PAGE_SIZE / sizeof(union tce_entry);
+       veth_iommu_table.it_size        = itc_entries / 2;
+       veth_iommu_table.it_busno       = cb.itc_busno;
+       veth_iommu_table.it_offset      = cb.itc_offset;
+       veth_iommu_table.it_index       = cb.itc_index;
+       veth_iommu_table.it_type        = TCE_VB;
+       veth_iommu_table.it_blocksize   = 1;
+
+       t = iommu_init_table(&veth_iommu_table);
+
+       if (!t)
+               printk("Virtual Bus VETH TCE table failed.\n");
+
+       vio_iommu_table.it_size         = itc_entries - veth_iommu_table.it_size;
+       vio_iommu_table.it_busno        = cb.itc_busno;
+       vio_iommu_table.it_offset       = cb.itc_offset +
+                                         veth_iommu_table.it_size;
+       vio_iommu_table.it_index        = cb.itc_index;
+       vio_iommu_table.it_type         = TCE_VB;
+       vio_iommu_table.it_blocksize    = 1;
+
+       t = iommu_init_table(&vio_iommu_table);
+
+       if (!t)
+               printk("Virtual Bus VIO TCE table failed.\n");
+}
+
+/**
+ * vio_register_device_iseries: - Register a new iSeries vio device.
+ * @voidev:    The device to register.
+ */
+static struct vio_dev *__init vio_register_device_iseries(char *type,
+               uint32_t unit_num)
+{
+       struct vio_dev *viodev;
+
+       /* allocate a vio_dev for this device */
+       viodev = kmalloc(sizeof(struct vio_dev), GFP_KERNEL);
+       if (!viodev)
+               return NULL;
+       memset(viodev, 0, sizeof(struct vio_dev));
+
+       snprintf(viodev->dev.bus_id, BUS_ID_SIZE, "%s%d", type, unit_num);
+
+       viodev->name = viodev->dev.bus_id;
+       viodev->type = type;
+       viodev->unit_address = unit_num;
+       viodev->iommu_table = &vio_iommu_table;
+       if (vio_register_device(viodev) == NULL) {
+               kfree(viodev);
+               return NULL;
+       }
+       return viodev;
+}
+
+void __init probe_bus_iseries(void)
+{
+       HvLpIndexMap vlan_map;
+       struct vio_dev *viodev;
+       int i;
+
+       /* there is only one of each of these */
+       vio_register_device_iseries("viocons", 0);
+       vio_register_device_iseries("vscsi", 0);
+
+       vlan_map = HvLpConfig_getVirtualLanIndexMap();
+       for (i = 0; i < HVMAXARCHITECTEDVIRTUALLANS; i++) {
+               if ((vlan_map & (0x8000 >> i)) == 0)
+                       continue;
+               viodev = vio_register_device_iseries("vlan", i);
+               /* veth is special and has it own iommu_table */
+               viodev->iommu_table = &veth_iommu_table;
+       }
+       for (i = 0; i < HVMAXARCHITECTEDVIRTUALDISKS; i++)
+               vio_register_device_iseries("viodasd", i);
+       for (i = 0; i < HVMAXARCHITECTEDVIRTUALCDROMS; i++)
+               vio_register_device_iseries("viocd", i);
+       for (i = 0; i < HVMAXARCHITECTEDVIRTUALTAPES; i++)
+               vio_register_device_iseries("viotape", i);
+}
+
+/**
+ * vio_match_device_iseries: - Tell if a iSeries VIO device matches a
+ *     vio_device_id
+ */
+static int vio_match_device_iseries(const struct vio_device_id *id,
+               const struct vio_dev *dev)
+{
+       return strncmp(dev->type, id->type, strlen(id->type)) == 0;
+}
+
+static struct vio_bus_ops vio_bus_ops_iseries = {
+       .match = vio_match_device_iseries,
+};
+
+/**
+ * vio_bus_init_iseries: - Initialize the iSeries virtual IO bus
+ */
+static int __init vio_bus_init_iseries(void)
+{
+       int err;
+
+       err = vio_bus_init(&vio_bus_ops_iseries);
+       if (err == 0) {
+               iommu_vio_init();
+               vio_bus_device.iommu_table = &vio_iommu_table;
+               iSeries_vio_dev = &vio_bus_device.dev;
+               probe_bus_iseries();
+       }
+       return err;
+}
+
+__initcall(vio_bus_init_iseries);
diff --git a/arch/ppc64/kernel/iommu.c b/arch/ppc64/kernel/iommu.c

index 8316426ccaf60036c59d3cd45e18835d8e19c88a..845eebd1e28de803904778c7ed1dc39fb61a7abc 100644 (file)
--- a/arch/ppc64/kernel/iommu.c
+++ b/arch/ppc64/kernel/iommu.c
@@ -242,7 +242,7 @@ int iommu_map_sg(struct device *dev, struct iommu_table *tbl,
         dma_addr_t dma_next = 0, dma_addr;
         unsigned long flags;
         struct scatterlist *s, *outs, *segstart;
-       int outcount;
+       int outcount, incount;
         unsigned long handle;
  
         BUG_ON(direction == DMA_NONE);
@@ -252,6 +252,7 @@ int iommu_map_sg(struct device *dev, struct iommu_table *tbl,
  
         outs = s = segstart = &sglist[0];
         outcount = 1;
+       incount = nelems;
         handle = 0;
  
         /* Init first segment length for backout at failure */
@@ -338,10 +339,10 @@ int iommu_map_sg(struct device *dev, struct iommu_table *tbl,
  
         DBG("mapped %d elements:\n", outcount);
  
-       /* For the sake of iommu_free_sg, we clear out the length in the
+       /* For the sake of iommu_unmap_sg, we clear out the length in the
          * next entry of the sglist if we didn't fill the list completely
          */
-       if (outcount < nelems) {
+       if (outcount < incount) {
                 outs++;
                 outs->dma_address = DMA_ERROR_CODE;
                 outs->dma_length = 0;
diff --git a/arch/ppc64/kernel/lmb.c b/arch/ppc64/kernel/lmb.c

index d6c6bd03d2a42cf49ec1f8ebeec5ee077a2f683f..5adaca2ddc9da02456db4e2591120b73e7bce1e6 100644 (file)
--- a/arch/ppc64/kernel/lmb.c
+++ b/arch/ppc64/kernel/lmb.c
@@ -28,33 +28,28 @@ void lmb_dump_all(void)
  {
  #ifdef DEBUG
         unsigned long i;
-       struct lmb *_lmb  = &lmb;
  
         udbg_printf("lmb_dump_all:\n");
         udbg_printf("    memory.cnt               = 0x%lx\n",
-                   _lmb->memory.cnt);
+                   lmb.memory.cnt);
         udbg_printf("    memory.size              = 0x%lx\n",
-                   _lmb->memory.size);
-       for (i=0; i < _lmb->memory.cnt ;i++) {
+                   lmb.memory.size);
+       for (i=0; i < lmb.memory.cnt ;i++) {
                 udbg_printf("    memory.region[0x%x].base       = 0x%lx\n",
-                           i, _lmb->memory.region[i].base);
-               udbg_printf("                 .physbase = 0x%lx\n",
-                           _lmb->memory.region[i].physbase);
+                           i, lmb.memory.region[i].base);
                 udbg_printf("                 .size     = 0x%lx\n",
-                           _lmb->memory.region[i].size);
+                           lmb.memory.region[i].size);
         }
  
         udbg_printf("\n    reserved.cnt   = 0x%lx\n",
-                   _lmb->reserved.cnt);
+                   lmb.reserved.cnt);
         udbg_printf("    reserved.size    = 0x%lx\n",
-                   _lmb->reserved.size);
-       for (i=0; i < _lmb->reserved.cnt ;i++) {
+                   lmb.reserved.size);
+       for (i=0; i < lmb.reserved.cnt ;i++) {
                 udbg_printf("    reserved.region[0x%x].base       = 0x%lx\n",
-                           i, _lmb->reserved.region[i].base);
-               udbg_printf("                 .physbase = 0x%lx\n",
-                           _lmb->reserved.region[i].physbase);
+                           i, lmb.reserved.region[i].base);
                 udbg_printf("                 .size     = 0x%lx\n",
-                           _lmb->reserved.region[i].size);
+                           lmb.reserved.region[i].size);
         }
  #endif /* DEBUG */
  }
@@ -98,7 +93,6 @@ lmb_coalesce_regions(struct lmb_region *rgn, unsigned long r1, unsigned long r2)
         rgn->region[r1].size += rgn->region[r2].size;
         for (i=r2; i < rgn->cnt-1; i++) {
                 rgn->region[i].base = rgn->region[i+1].base;
-               rgn->region[i].physbase = rgn->region[i+1].physbase;
                 rgn->region[i].size = rgn->region[i+1].size;
         }
         rgn->cnt--;
@@ -108,49 +102,29 @@ lmb_coalesce_regions(struct lmb_region *rgn, unsigned long r1, unsigned long r2)
  void __init
  lmb_init(void)
  {
-       struct lmb *_lmb = &lmb;
-
         /* Create a dummy zero size LMB which will get coalesced away later.
          * This simplifies the lmb_add() code below...
          */
-       _lmb->memory.region[0].base = 0;
-       _lmb->memory.region[0].size = 0;
-       _lmb->memory.cnt = 1;
+       lmb.memory.region[0].base = 0;
+       lmb.memory.region[0].size = 0;
+       lmb.memory.cnt = 1;
  
         /* Ditto. */
-       _lmb->reserved.region[0].base = 0;
-       _lmb->reserved.region[0].size = 0;
-       _lmb->reserved.cnt = 1;
+       lmb.reserved.region[0].base = 0;
+       lmb.reserved.region[0].size = 0;
+       lmb.reserved.cnt = 1;
  }
  
  /* This routine called with relocation disabled. */
  void __init
  lmb_analyze(void)
  {
-       unsigned long i;
-       unsigned long mem_size = 0;
-       unsigned long size_mask = 0;
-       struct lmb *_lmb = &lmb;
-#ifdef CONFIG_MSCHUNKS
-       unsigned long physbase = 0;
-#endif
-
-       for (i=0; i < _lmb->memory.cnt; i++) {
-               unsigned long lmb_size;
-
-               lmb_size = _lmb->memory.region[i].size;
-
-#ifdef CONFIG_MSCHUNKS
-               _lmb->memory.region[i].physbase = physbase;
-               physbase += lmb_size;
-#else
-               _lmb->memory.region[i].physbase = _lmb->memory.region[i].base;
-#endif
-               mem_size += lmb_size;
-               size_mask |= lmb_size;
-       }
+       int i;
+
+       lmb.memory.size = 0;
  
-       _lmb->memory.size = mem_size;
+       for (i = 0; i < lmb.memory.cnt; i++)
+               lmb.memory.size += lmb.memory.region[i].size;
  }
  
  /* This routine called with relocation disabled. */
@@ -168,7 +142,6 @@ lmb_add_region(struct lmb_region *rgn, unsigned long base, unsigned long size)
                 adjacent = lmb_addrs_adjacent(base,size,rgnbase,rgnsize);
                 if ( adjacent > 0 ) {
                         rgn->region[i].base -= size;
-                       rgn->region[i].physbase -= size;
                         rgn->region[i].size += size;
                         coalesced++;
                         break;
@@ -195,11 +168,9 @@ lmb_add_region(struct lmb_region *rgn, unsigned long base, unsigned long size)
         for (i=rgn->cnt-1; i >= 0; i--) {
                 if (base < rgn->region[i].base) {
                         rgn->region[i+1].base = rgn->region[i].base;
-                       rgn->region[i+1].physbase = rgn->region[i].physbase;
                         rgn->region[i+1].size = rgn->region[i].size;
                 }  else {
                         rgn->region[i+1].base = base;
-                       rgn->region[i+1].physbase = lmb_abs_to_phys(base);
                         rgn->region[i+1].size = size;
                         break;
                 }
@@ -213,12 +184,11 @@ lmb_add_region(struct lmb_region *rgn, unsigned long base, unsigned long size)
  long __init
  lmb_add(unsigned long base, unsigned long size)
  {
-       struct lmb *_lmb = &lmb;
-       struct lmb_region *_rgn = &(_lmb->memory);
+       struct lmb_region *_rgn = &(lmb.memory);
  
         /* On pSeries LPAR systems, the first LMB is our RMO region. */
         if ( base == 0 )
-               _lmb->rmo_size = size;
+               lmb.rmo_size = size;
  
         return lmb_add_region(_rgn, base, size);
  
@@ -227,8 +197,7 @@ lmb_add(unsigned long base, unsigned long size)
  long __init
  lmb_reserve(unsigned long base, unsigned long size)
  {
-       struct lmb *_lmb = &lmb;
-       struct lmb_region *_rgn = &(_lmb->reserved);
+       struct lmb_region *_rgn = &(lmb.reserved);
  
         return lmb_add_region(_rgn, base, size);
  }
@@ -260,13 +229,10 @@ lmb_alloc_base(unsigned long size, unsigned long align, unsigned long max_addr)
  {
         long i, j;
         unsigned long base = 0;
-       struct lmb *_lmb = &lmb;
-       struct lmb_region *_mem = &(_lmb->memory);
-       struct lmb_region *_rsv = &(_lmb->reserved);
  
-       for (i=_mem->cnt-1; i >= 0; i--) {
-               unsigned long lmbbase = _mem->region[i].base;
-               unsigned long lmbsize = _mem->region[i].size;
+       for (i=lmb.memory.cnt-1; i >= 0; i--) {
+               unsigned long lmbbase = lmb.memory.region[i].base;
+               unsigned long lmbsize = lmb.memory.region[i].size;
  
                 if ( max_addr == LMB_ALLOC_ANYWHERE )
                         base = _ALIGN_DOWN(lmbbase+lmbsize-size, align);
@@ -276,8 +242,8 @@ lmb_alloc_base(unsigned long size, unsigned long align, unsigned long max_addr)
                         continue;
  
                 while ( (lmbbase <= base) &&
-                       ((j = lmb_overlaps_region(_rsv,base,size)) >= 0) ) {
-                       base = _ALIGN_DOWN(_rsv->region[j].base-size, align);
+                       ((j = lmb_overlaps_region(&lmb.reserved,base,size)) >= 0) ) {
+                       base = _ALIGN_DOWN(lmb.reserved.region[j].base-size, align);
                 }
  
                 if ( (base != 0) && (lmbbase <= base) )
@@ -287,62 +253,24 @@ lmb_alloc_base(unsigned long size, unsigned long align, unsigned long max_addr)
         if ( i < 0 )
                 return 0;
  
-       lmb_add_region(_rsv, base, size);
+       lmb_add_region(&lmb.reserved, base, size);
  
         return base;
  }
  
+/* You must call lmb_analyze() before this. */
  unsigned long __init
  lmb_phys_mem_size(void)
  {
-       struct lmb *_lmb = &lmb;
-#ifdef CONFIG_MSCHUNKS
-       return _lmb->memory.size;
-#else
-       struct lmb_region *_mem = &(_lmb->memory);
-       unsigned long total = 0;
-       int i;
-
-       /* add all physical memory to the bootmem map */
-       for (i=0; i < _mem->cnt; i++)
-               total += _mem->region[i].size;
-       return total;
-#endif /* CONFIG_MSCHUNKS */
+       return lmb.memory.size;
  }
  
  unsigned long __init
  lmb_end_of_DRAM(void)
  {
-       struct lmb *_lmb = &lmb;
-       struct lmb_region *_mem = &(_lmb->memory);
-       int idx = _mem->cnt - 1;
-
-#ifdef CONFIG_MSCHUNKS
-       return (_mem->region[idx].physbase + _mem->region[idx].size);
-#else
-       return (_mem->region[idx].base + _mem->region[idx].size);
-#endif /* CONFIG_MSCHUNKS */
-
-       return 0;
-}
-
-unsigned long __init
-lmb_abs_to_phys(unsigned long aa)
-{
-       unsigned long i, pa = aa;
-       struct lmb *_lmb = &lmb;
-       struct lmb_region *_mem = &(_lmb->memory);
-
-       for (i=0; i < _mem->cnt; i++) {
-               unsigned long lmbbase = _mem->region[i].base;
-               unsigned long lmbsize = _mem->region[i].size;
-               if ( lmb_addrs_overlap(aa,1,lmbbase,lmbsize) ) {
-                       pa = _mem->region[i].physbase + (aa - lmbbase);
-                       break;
-               }
-       }
+       int idx = lmb.memory.cnt - 1;
  
-       return pa;
+       return (lmb.memory.region[idx].base + lmb.memory.region[idx].size);
  }
  
  /*
@@ -353,20 +281,19 @@ void __init lmb_enforce_memory_limit(void)
  {
         extern unsigned long memory_limit;
         unsigned long i, limit;
-       struct lmb_region *mem = &(lmb.memory);
  
         if (! memory_limit)
                 return;
  
         limit = memory_limit;
-       for (i = 0; i < mem->cnt; i++) {
-               if (limit > mem->region[i].size) {
-                       limit -= mem->region[i].size;
+       for (i = 0; i < lmb.memory.cnt; i++) {
+               if (limit > lmb.memory.region[i].size) {
+                       limit -= lmb.memory.region[i].size;
                         continue;
                 }
  
-               mem->region[i].size = limit;
-               mem->cnt = i + 1;
+               lmb.memory.region[i].size = limit;
+               lmb.memory.cnt = i + 1;
                 break;
         }
  }
diff --git a/arch/ppc64/kernel/lparcfg.c b/arch/ppc64/kernel/lparcfg.c

index 02e96627fa6604999b3ac6aafd926fea185e2676..edad361a8db00a9fbc5e3c7950ee575e22f8d4db 100644 (file)
--- a/arch/ppc64/kernel/lparcfg.c
+++ b/arch/ppc64/kernel/lparcfg.c
@@ -29,7 +29,7 @@
  #include <asm/iSeries/HvLpConfig.h>
  #include <asm/lppaca.h>
  #include <asm/hvcall.h>
-#include <asm/cputable.h>
+#include <asm/firmware.h>
  #include <asm/rtas.h>
  #include <asm/system.h>
  #include <asm/time.h>
@@ -273,6 +273,7 @@ static void parse_system_parameter_string(struct seq_file *m)
                 if (!workbuffer) {
                         printk(KERN_ERR "%s %s kmalloc failure at line %d \n",
                                __FILE__, __FUNCTION__, __LINE__);
+                       kfree(local_buffer);                    
                         return;
                 }
  #ifdef LPARCFG_DEBUG
@@ -377,7 +378,7 @@ static int lparcfg_data(struct seq_file *m, void *v)
  
         partition_active_processors = lparcfg_count_active_processors();
  
-       if (cur_cpu_spec->firmware_features & FW_FEATURE_SPLPAR) {
+       if (firmware_has_feature(FW_FEATURE_SPLPAR)) {
                 unsigned long h_entitled, h_unallocated;
                 unsigned long h_aggregation, h_resource;
                 unsigned long pool_idle_time, pool_procs;
@@ -571,7 +572,7 @@ int __init lparcfg_init(void)
         mode_t mode = S_IRUSR;
  
         /* Allow writing if we have FW_FEATURE_SPLPAR */
-       if (cur_cpu_spec->firmware_features & FW_FEATURE_SPLPAR) {
+       if (firmware_has_feature(FW_FEATURE_SPLPAR)) {
                 lparcfg_fops.write = lparcfg_write;
                 mode |= S_IWUSR;
         }
diff --git a/arch/ppc64/kernel/lparmap.c b/arch/ppc64/kernel/lparmap.c

new file mode 100644 (file)

index 0000000..b81de28
--- /dev/null
+++ b/arch/ppc64/kernel/lparmap.c
@@ -0,0 +1,31 @@
+/*
+ * Copyright (C) 2005  Stephen Rothwell  IBM Corp.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+#include <asm/mmu.h>
+#include <asm/page.h>
+#include <asm/iSeries/LparMap.h>
+
+const struct LparMap __attribute__((__section__(".text"))) xLparMap = {
+       .xNumberEsids = HvEsidsToMap,
+       .xNumberRanges = HvRangesToMap,
+       .xSegmentTableOffs = STAB0_PAGE,
+
+       .xEsids = {
+               { .xKernelEsid = GET_ESID(KERNELBASE),
+                 .xKernelVsid = KERNEL_VSID(KERNELBASE), },
+               { .xKernelEsid = GET_ESID(VMALLOCBASE),
+                 .xKernelVsid = KERNEL_VSID(VMALLOCBASE), },
+       },
+
+       .xRanges = {
+               { .xPages = HvPagesToMap,
+                 .xOffset = 0,
+                 .xVPN = KERNEL_VSID(KERNELBASE) << (SID_SHIFT - PAGE_SHIFT),
+               },
+       },
+};
diff --git a/arch/ppc64/kernel/machine_kexec.c b/arch/ppc64/kernel/machine_kexec.c

index fdb2fc649d72ce05d323f98c2147e4c8bb6fcd0c..4775f12a013c5b1a728b5f4ef57f81ef4f07e0be 100644 (file)
--- a/arch/ppc64/kernel/machine_kexec.c
+++ b/arch/ppc64/kernel/machine_kexec.c
@@ -185,7 +185,7 @@ void kexec_copy_flush(struct kimage *image)
  void kexec_smp_down(void *arg)
  {
         if (ppc_md.cpu_irq_down)
-               ppc_md.cpu_irq_down();
+               ppc_md.cpu_irq_down(1);
  
         local_irq_disable();
         kexec_smp_wait();
@@ -232,7 +232,7 @@ static void kexec_prepare_cpus(void)
  
         /* after we tell the others to go down */
         if (ppc_md.cpu_irq_down)
-               ppc_md.cpu_irq_down();
+               ppc_md.cpu_irq_down(0);
  
         put_cpu();
  
@@ -243,15 +243,19 @@ static void kexec_prepare_cpus(void)
  
  static void kexec_prepare_cpus(void)
  {
+       extern void smp_release_cpus(void);
         /*
          * move the secondarys to us so that we can copy
          * the new kernel 0-0x100 safely
          *
          * do this if kexec in setup.c ?
+        *
+        * We need to release the cpus if we are ever going from an
+        * UP to an SMP kernel.
          */
-       smp_relase_cpus();
+       smp_release_cpus();
         if (ppc_md.cpu_irq_down)
-               ppc_md.cpu_irq_down();
+               ppc_md.cpu_irq_down(0);
         local_irq_disable();
  }
  
diff --git a/arch/ppc64/kernel/misc.S b/arch/ppc64/kernel/misc.S

index a05b50b738e97b1c55ff1eb8013ae24d6abd929d..474df0a862bfac60094e0e50c6ba0890f87e03c7 100644 (file)
--- a/arch/ppc64/kernel/misc.S
+++ b/arch/ppc64/kernel/misc.S
@@ -680,6 +680,104 @@ _GLOBAL(kernel_thread)
         ld      r30,-16(r1)
         blr
  
+/*
+ * disable_kernel_fp()
+ * Disable the FPU.
+ */
+_GLOBAL(disable_kernel_fp)
+       mfmsr   r3
+       rldicl  r0,r3,(63-MSR_FP_LG),1
+       rldicl  r3,r0,(MSR_FP_LG+1),0
+       mtmsrd  r3                      /* disable use of fpu now */
+       isync
+       blr
+
+/*
+ * giveup_fpu(tsk)
+ * Disable FP for the task given as the argument,
+ * and save the floating-point registers in its thread_struct.
+ * Enables the FPU for use in the kernel on return.
+ */
+_GLOBAL(giveup_fpu)
+       mfmsr   r5
+       ori     r5,r5,MSR_FP
+       mtmsrd  r5                      /* enable use of fpu now */
+       isync
+       cmpdi   0,r3,0
+       beqlr-                          /* if no previous owner, done */
+       addi    r3,r3,THREAD            /* want THREAD of task */
+       ld      r5,PT_REGS(r3)
+       cmpdi   0,r5,0
+       SAVE_32FPRS(0, r3)
+       mffs    fr0
+       stfd    fr0,THREAD_FPSCR(r3)
+       beq     1f
+       ld      r4,_MSR-STACK_FRAME_OVERHEAD(r5)
+       li      r3,MSR_FP|MSR_FE0|MSR_FE1
+       andc    r4,r4,r3                /* disable FP for previous task */
+       std     r4,_MSR-STACK_FRAME_OVERHEAD(r5)
+1:
+#ifndef CONFIG_SMP
+       li      r5,0
+       ld      r4,last_task_used_math@got(r2)
+       std     r5,0(r4)
+#endif /* CONFIG_SMP */
+       blr
+
+#ifdef CONFIG_ALTIVEC
+
+#if 0 /* this has no callers for now */
+/*
+ * disable_kernel_altivec()
+ * Disable the VMX.
+ */
+_GLOBAL(disable_kernel_altivec)
+       mfmsr   r3
+       rldicl  r0,r3,(63-MSR_VEC_LG),1
+       rldicl  r3,r0,(MSR_VEC_LG+1),0
+       mtmsrd  r3                      /* disable use of VMX now */
+       isync
+       blr
+#endif /* 0 */
+
+/*
+ * giveup_altivec(tsk)
+ * Disable VMX for the task given as the argument,
+ * and save the vector registers in its thread_struct.
+ * Enables the VMX for use in the kernel on return.
+ */
+_GLOBAL(giveup_altivec)
+       mfmsr   r5
+       oris    r5,r5,MSR_VEC@h
+       mtmsrd  r5                      /* enable use of VMX now */
+       isync
+       cmpdi   0,r3,0
+       beqlr-                          /* if no previous owner, done */
+       addi    r3,r3,THREAD            /* want THREAD of task */
+       ld      r5,PT_REGS(r3)
+       cmpdi   0,r5,0
+       SAVE_32VRS(0,r4,r3)
+       mfvscr  vr0
+       li      r4,THREAD_VSCR
+       stvx    vr0,r4,r3
+       beq     1f
+       ld      r4,_MSR-STACK_FRAME_OVERHEAD(r5)
+       lis     r3,MSR_VEC@h
+       andc    r4,r4,r3                /* disable FP for previous task */
+       std     r4,_MSR-STACK_FRAME_OVERHEAD(r5)
+1:
+#ifndef CONFIG_SMP
+       li      r5,0
+       ld      r4,last_task_used_altivec@got(r2)
+       std     r5,0(r4)
+#endif /* CONFIG_SMP */
+       blr
+
+#endif /* CONFIG_ALTIVEC */
+
+_GLOBAL(__setup_cpu_power3)
+       blr
+
  /* kexec_wait(phys_cpu)
   *
   * wait for the flag to change, indicating this kernel is going away but
diff --git a/arch/ppc64/kernel/mpic.c b/arch/ppc64/kernel/mpic.c

index e8fbab1df37f842cda79e59f8eb077228b93c6d9..cc262a05ddb4558499d649cf9c46d7610230f31f 100644 (file)
--- a/arch/ppc64/kernel/mpic.c
+++ b/arch/ppc64/kernel/mpic.c
@@ -794,10 +794,10 @@ void mpic_setup_this_cpu(void)
  
  /*
   * XXX: someone who knows mpic should check this.
- * do we need to eoi the ipi here (see xics comments)?
+ * do we need to eoi the ipi including for kexec cpu here (see xics comments)?
   * or can we reset the mpic in the new kernel?
   */
-void mpic_teardown_this_cpu(void)
+void mpic_teardown_this_cpu(int secondary)
  {
         struct mpic *mpic = mpic_primary;
         unsigned long flags;
diff --git a/arch/ppc64/kernel/mpic.h b/arch/ppc64/kernel/mpic.h

index 99fbbc9a084c46bec6e37789aca5987233177dd0..ca78a7f1052867de98d5fb06dbdbef4ec3b693e9 100644 (file)
--- a/arch/ppc64/kernel/mpic.h
+++ b/arch/ppc64/kernel/mpic.h
@@ -256,7 +256,7 @@ extern unsigned int mpic_irq_get_priority(unsigned int irq);
  extern void mpic_setup_this_cpu(void);
  
  /* Clean up for kexec (or cpu offline or ...) */
-extern void mpic_teardown_this_cpu(void);
+extern void mpic_teardown_this_cpu(int secondary);
  
  /* Request IPIs on primary mpic */
  extern void mpic_request_ipis(void);
diff --git a/arch/ppc64/kernel/of_device.c b/arch/ppc64/kernel/of_device.c

index b80e81984ba867c5f03dcfe2c91caccf4d73802c..da580812ddfeeb6305b8604c03ae6867ade531af 100644 (file)
--- a/arch/ppc64/kernel/of_device.c
+++ b/arch/ppc64/kernel/of_device.c
@@ -236,7 +236,6 @@ void of_device_unregister(struct of_device *ofdev)
  struct of_device* of_platform_device_create(struct device_node *np, const char *bus_id)
  {
         struct of_device *dev;
-       u32 *reg;
  
         dev = kmalloc(sizeof(*dev), GFP_KERNEL);
         if (!dev)
@@ -250,7 +249,6 @@ struct of_device* of_platform_device_create(struct device_node *np, const char *
         dev->dev.bus = &of_platform_bus_type;
         dev->dev.release = of_release_dev;
  
-       reg = (u32 *)get_property(np, "reg", NULL);
         strlcpy(dev->dev.bus_id, bus_id, BUS_ID_SIZE);
  
         if (of_device_register(dev) != 0) {
diff --git a/arch/ppc64/kernel/pSeries_iommu.c b/arch/ppc64/kernel/pSeries_iommu.c

index 69130522a87e17fed1d7de09562b4069aa0f2aba..9d5e1e7fc38962f26fdce0838d82d68f562e67f9 100644 (file)
--- a/arch/ppc64/kernel/pSeries_iommu.c
+++ b/arch/ppc64/kernel/pSeries_iommu.c
@@ -45,6 +45,7 @@
  #include <asm/plpar_wrappers.h>
  #include <asm/pSeries_reconfig.h>
  #include <asm/systemcfg.h>
+#include <asm/firmware.h>
  #include "pci.h"
  
  #define DBG(fmt...)
@@ -546,7 +547,7 @@ void iommu_init_early_pSeries(void)
         }
  
         if (systemcfg->platform & PLATFORM_LPAR) {
-               if (cur_cpu_spec->firmware_features & FW_FEATURE_MULTITCE) {
+               if (firmware_has_feature(FW_FEATURE_MULTITCE)) {
                         ppc_md.tce_build = tce_buildmulti_pSeriesLP;
                         ppc_md.tce_free  = tce_freemulti_pSeriesLP;
                 } else {
diff --git a/arch/ppc64/kernel/pSeries_lpar.c b/arch/ppc64/kernel/pSeries_lpar.c

index 74dd144dcce8dd18a06d2f84023c4f93e988a6f0..0a3ddc9227c56edd27182ea533b487452b519f17 100644 (file)
--- a/arch/ppc64/kernel/pSeries_lpar.c
+++ b/arch/ppc64/kernel/pSeries_lpar.c
@@ -52,7 +52,6 @@ EXPORT_SYMBOL(plpar_hcall_4out);
  EXPORT_SYMBOL(plpar_hcall_norets);
  EXPORT_SYMBOL(plpar_hcall_8arg_2ret);
  
-extern void fw_feature_init(void);
  extern void pSeries_find_serial_port(void);
  
  
@@ -279,7 +278,6 @@ long pSeries_lpar_hpte_insert(unsigned long hpte_group,
                               unsigned long va, unsigned long prpn,
                               unsigned long vflags, unsigned long rflags)
  {
-       unsigned long arpn = physRpn_to_absRpn(prpn);
         unsigned long lpar_rc;
         unsigned long flags;
         unsigned long slot;
@@ -290,7 +288,7 @@ long pSeries_lpar_hpte_insert(unsigned long hpte_group,
         if (vflags & HPTE_V_LARGE)
                 hpte_v &= ~(1UL << HPTE_V_AVPN_SHIFT);
  
-       hpte_r = (arpn << HPTE_R_RPN_SHIFT) | rflags;
+       hpte_r = (prpn << HPTE_R_RPN_SHIFT) | rflags;
  
         /* Now fill in the actual HPTE */
         /* Set CEC cookie to 0         */
diff --git a/arch/ppc64/kernel/pSeries_setup.c b/arch/ppc64/kernel/pSeries_setup.c

index 5bec956e44a043775ffbd264807cebced14dfc77..f0f0630cf07cb0098030d92297f4e4d12b0ed74d 100644 (file)
--- a/arch/ppc64/kernel/pSeries_setup.c
+++ b/arch/ppc64/kernel/pSeries_setup.c
@@ -60,7 +60,8 @@
  #include <asm/nvram.h>
  #include <asm/plpar_wrappers.h>
  #include <asm/xics.h>
-#include <asm/cputable.h>
+#include <asm/firmware.h>
+#include <asm/pmc.h>
  
  #include "i8259.h"
  #include "mpic.h"
@@ -187,6 +188,21 @@ static void __init pSeries_setup_mpic(void)
                                   " MPIC     ");
  }
  
+static void pseries_lpar_enable_pmcs(void)
+{
+       unsigned long set, reset;
+
+       power4_enable_pmcs();
+
+       set = 1UL << 63;
+       reset = 0;
+       plpar_hcall_norets(H_PERFMON, set, reset);
+
+       /* instruct hypervisor to maintain PMCs */
+       if (firmware_has_feature(FW_FEATURE_SPLPAR))
+               get_paca()->lppaca.pmcregs_in_use = 1;
+}
+
  static void __init pSeries_setup_arch(void)
  {
         /* Fixup ppc_md depending on the type of interrupt controller */
@@ -231,11 +247,9 @@ static void __init pSeries_setup_arch(void)
  
         pSeries_nvram_init();
  
-       if (cur_cpu_spec->firmware_features & FW_FEATURE_SPLPAR)
-               vpa_init(boot_cpuid);
-
         /* Choose an idle loop */
-       if (cur_cpu_spec->firmware_features & FW_FEATURE_SPLPAR) {
+       if (firmware_has_feature(FW_FEATURE_SPLPAR)) {
+               vpa_init(boot_cpuid);
                 if (get_paca()->lppaca.shared_proc) {
                         printk(KERN_INFO "Using shared processor idle loop\n");
                         ppc_md.idle_loop = pseries_shared_idle;
@@ -247,6 +261,11 @@ static void __init pSeries_setup_arch(void)
                 printk(KERN_INFO "Using default idle loop\n");
                 ppc_md.idle_loop = default_idle;
         }
+
+       if (systemcfg->platform & PLATFORM_LPAR)
+               ppc_md.enable_pmcs = pseries_lpar_enable_pmcs;
+       else
+               ppc_md.enable_pmcs = power4_enable_pmcs;
  }
  
  static int __init pSeries_init_panel(void)
@@ -260,11 +279,11 @@ static int __init pSeries_init_panel(void)
  arch_initcall(pSeries_init_panel);
  
  
-/* Build up the firmware_features bitmask field
+/* Build up the ppc64_firmware_features bitmask field
   * using contents of device-tree/ibm,hypertas-functions.
   * Ultimately this functionality may be moved into prom.c prom_init().
   */
-void __init fw_feature_init(void)
+static void __init fw_feature_init(void)
  {
         struct device_node * dn;
         char * hypertas;
@@ -272,7 +291,7 @@ void __init fw_feature_init(void)
  
         DBG(" -> fw_feature_init()\n");
  
-       cur_cpu_spec->firmware_features = 0;
+       ppc64_firmware_features = 0;
         dn = of_find_node_by_path("/rtas");
         if (dn == NULL) {
                 printk(KERN_ERR "WARNING ! Cannot find RTAS in device-tree !\n");
@@ -288,7 +307,7 @@ void __init fw_feature_init(void)
                                 if ((firmware_features_table[i].name) &&
                                     (strcmp(firmware_features_table[i].name,hypertas))==0) {
                                         /* we have a match */
-                                       cur_cpu_spec->firmware_features |= 
+                                       ppc64_firmware_features |= 
                                                 (firmware_features_table[i].val);
                                         break;
                                 } 
@@ -302,7 +321,7 @@ void __init fw_feature_init(void)
         of_node_put(dn);
   no_rtas:
         printk(KERN_INFO "firmware_features = 0x%lx\n", 
-              cur_cpu_spec->firmware_features);
+              ppc64_firmware_features);
  
         DBG(" <- fw_feature_init()\n");
  }
diff --git a/arch/ppc64/kernel/pSeries_smp.c b/arch/ppc64/kernel/pSeries_smp.c

index 62c55a123560cf6850dc1c10c5a171c953a7290c..79c7f32236658805bab9f783d80cb3d42091893a 100644 (file)
--- a/arch/ppc64/kernel/pSeries_smp.c
+++ b/arch/ppc64/kernel/pSeries_smp.c
@@ -41,6 +41,7 @@
  #include <asm/machdep.h>
  #include <asm/xics.h>
  #include <asm/cputable.h>
+#include <asm/firmware.h>
  #include <asm/system.h>
  #include <asm/rtas.h>
  #include <asm/plpar_wrappers.h>
@@ -326,7 +327,7 @@ static void __devinit smp_xics_setup_cpu(int cpu)
         if (cpu != boot_cpuid)
                 xics_setup_cpu();
  
-       if (cur_cpu_spec->firmware_features & FW_FEATURE_SPLPAR)
+       if (firmware_has_feature(FW_FEATURE_SPLPAR))
                 vpa_init(cpu);
  
         cpu_clear(cpu, of_spin_map);
diff --git a/arch/ppc64/kernel/pSeries_vio.c b/arch/ppc64/kernel/pSeries_vio.c

new file mode 100644 (file)

index 0000000..e0ae06f
--- /dev/null
+++ b/arch/ppc64/kernel/pSeries_vio.c
@@ -0,0 +1,273 @@
+/*
+ * IBM PowerPC pSeries Virtual I/O Infrastructure Support.
+ *
+ *    Copyright (c) 2003-2005 IBM Corp.
+ *     Dave Engebretsen engebret@us.ibm.com
+ *     Santiago Leon santil@us.ibm.com
+ *     Hollis Blanchard <hollisb@us.ibm.com>
+ *     Stephen Rothwell
+ *
+ *      This program is free software; you can redistribute it and/or
+ *      modify it under the terms of the GNU General Public License
+ *      as published by the Free Software Foundation; either version
+ *      2 of the License, or (at your option) any later version.
+ */
+
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/mm.h>
+#include <linux/kobject.h>
+#include <asm/iommu.h>
+#include <asm/dma.h>
+#include <asm/prom.h>
+#include <asm/vio.h>
+#include <asm/hvcall.h>
+
+extern struct subsystem devices_subsys; /* needed for vio_find_name() */
+
+static void probe_bus_pseries(void)
+{
+       struct device_node *node_vroot, *of_node;
+
+       node_vroot = find_devices("vdevice");
+       if ((node_vroot == NULL) || (node_vroot->child == NULL))
+               /* this machine doesn't do virtual IO, and that's ok */
+               return;
+
+       /*
+        * Create struct vio_devices for each virtual device in the device tree.
+        * Drivers will associate with them later.
+        */
+       for (of_node = node_vroot->child; of_node != NULL;
+                       of_node = of_node->sibling) {
+               printk(KERN_DEBUG "%s: processing %p\n", __FUNCTION__, of_node);
+               vio_register_device_node(of_node);
+       }
+}
+
+/**
+ * vio_match_device_pseries: - Tell if a pSeries VIO device matches a
+ *     vio_device_id
+ */
+static int vio_match_device_pseries(const struct vio_device_id *id,
+               const struct vio_dev *dev)
+{
+       return (strncmp(dev->type, id->type, strlen(id->type)) == 0) &&
+                       device_is_compatible(dev->dev.platform_data, id->compat);
+}
+
+static void vio_release_device_pseries(struct device *dev)
+{
+       /* XXX free TCE table */
+       of_node_put(dev->platform_data);
+}
+
+static ssize_t viodev_show_devspec(struct device *dev,
+               struct device_attribute *attr, char *buf)
+{
+       struct device_node *of_node = dev->platform_data;
+
+       return sprintf(buf, "%s\n", of_node->full_name);
+}
+DEVICE_ATTR(devspec, S_IRUSR | S_IRGRP | S_IROTH, viodev_show_devspec, NULL);
+
+static void vio_unregister_device_pseries(struct vio_dev *viodev)
+{
+       device_remove_file(&viodev->dev, &dev_attr_devspec);
+}
+
+static struct vio_bus_ops vio_bus_ops_pseries = {
+       .match = vio_match_device_pseries,
+       .unregister_device = vio_unregister_device_pseries,
+       .release_device = vio_release_device_pseries,
+};
+
+/**
+ * vio_bus_init_pseries: - Initialize the pSeries virtual IO bus
+ */
+static int __init vio_bus_init_pseries(void)
+{
+       int err;
+
+       err = vio_bus_init(&vio_bus_ops_pseries);
+       if (err == 0)
+               probe_bus_pseries();
+       return err;
+}
+
+__initcall(vio_bus_init_pseries);
+
+/**
+ * vio_build_iommu_table: - gets the dma information from OF and
+ *     builds the TCE tree.
+ * @dev: the virtual device.
+ *
+ * Returns a pointer to the built tce tree, or NULL if it can't
+ * find property.
+*/
+static struct iommu_table *vio_build_iommu_table(struct vio_dev *dev)
+{
+       unsigned int *dma_window;
+       struct iommu_table *newTceTable;
+       unsigned long offset;
+       int dma_window_property_size;
+
+       dma_window = (unsigned int *) get_property(dev->dev.platform_data, "ibm,my-dma-window", &dma_window_property_size);
+       if(!dma_window) {
+               return NULL;
+       }
+
+       newTceTable = (struct iommu_table *) kmalloc(sizeof(struct iommu_table), GFP_KERNEL);
+
+       /*  There should be some code to extract the phys-encoded offset
+               using prom_n_addr_cells(). However, according to a comment
+               on earlier versions, it's always zero, so we don't bother */
+       offset = dma_window[1] >>  PAGE_SHIFT;
+
+       /* TCE table size - measured in tce entries */
+       newTceTable->it_size            = dma_window[4] >> PAGE_SHIFT;
+       /* offset for VIO should always be 0 */
+       newTceTable->it_offset          = offset;
+       newTceTable->it_busno           = 0;
+       newTceTable->it_index           = (unsigned long)dma_window[0];
+       newTceTable->it_type            = TCE_VB;
+
+       return iommu_init_table(newTceTable);
+}
+
+/**
+ * vio_register_device_node: - Register a new vio device.
+ * @of_node:   The OF node for this device.
+ *
+ * Creates and initializes a vio_dev structure from the data in
+ * of_node (dev.platform_data) and adds it to the list of virtual devices.
+ * Returns a pointer to the created vio_dev or NULL if node has
+ * NULL device_type or compatible fields.
+ */
+struct vio_dev * __devinit vio_register_device_node(struct device_node *of_node)
+{
+       struct vio_dev *viodev;
+       unsigned int *unit_address;
+       unsigned int *irq_p;
+
+       /* we need the 'device_type' property, in order to match with drivers */
+       if ((NULL == of_node->type)) {
+               printk(KERN_WARNING
+                       "%s: node %s missing 'device_type'\n", __FUNCTION__,
+                       of_node->name ? of_node->name : "<unknown>");
+               return NULL;
+       }
+
+       unit_address = (unsigned int *)get_property(of_node, "reg", NULL);
+       if (!unit_address) {
+               printk(KERN_WARNING "%s: node %s missing 'reg'\n", __FUNCTION__,
+                       of_node->name ? of_node->name : "<unknown>");
+               return NULL;
+       }
+
+       /* allocate a vio_dev for this node */
+       viodev = kmalloc(sizeof(struct vio_dev), GFP_KERNEL);
+       if (!viodev) {
+               return NULL;
+       }
+       memset(viodev, 0, sizeof(struct vio_dev));
+
+       viodev->dev.platform_data = of_node_get(of_node);
+
+       viodev->irq = NO_IRQ;
+       irq_p = (unsigned int *)get_property(of_node, "interrupts", NULL);
+       if (irq_p) {
+               int virq = virt_irq_create_mapping(*irq_p);
+               if (virq == NO_IRQ) {
+                       printk(KERN_ERR "Unable to allocate interrupt "
+                              "number for %s\n", of_node->full_name);
+               } else
+                       viodev->irq = irq_offset_up(virq);
+       }
+
+       snprintf(viodev->dev.bus_id, BUS_ID_SIZE, "%x", *unit_address);
+       viodev->name = of_node->name;
+       viodev->type = of_node->type;
+       viodev->unit_address = *unit_address;
+       viodev->iommu_table = vio_build_iommu_table(viodev);
+
+       /* register with generic device framework */
+       if (vio_register_device(viodev) == NULL) {
+               /* XXX free TCE table */
+               kfree(viodev);
+               return NULL;
+       }
+       device_create_file(&viodev->dev, &dev_attr_devspec);
+
+       return viodev;
+}
+EXPORT_SYMBOL(vio_register_device_node);
+
+/**
+ * vio_get_attribute: - get attribute for virtual device
+ * @vdev:      The vio device to get property.
+ * @which:     The property/attribute to be extracted.
+ * @length:    Pointer to length of returned data size (unused if NULL).
+ *
+ * Calls prom.c's get_property() to return the value of the
+ * attribute specified by the preprocessor constant @which
+*/
+const void * vio_get_attribute(struct vio_dev *vdev, void* which, int* length)
+{
+       return get_property(vdev->dev.platform_data, (char*)which, length);
+}
+EXPORT_SYMBOL(vio_get_attribute);
+
+/* vio_find_name() - internal because only vio.c knows how we formatted the
+ * kobject name
+ * XXX once vio_bus_type.devices is actually used as a kset in
+ * drivers/base/bus.c, this function should be removed in favor of
+ * "device_find(kobj_name, &vio_bus_type)"
+ */
+static struct vio_dev *vio_find_name(const char *kobj_name)
+{
+       struct kobject *found;
+
+       found = kset_find_obj(&devices_subsys.kset, kobj_name);
+       if (!found)
+               return NULL;
+
+       return to_vio_dev(container_of(found, struct device, kobj));
+}
+
+/**
+ * vio_find_node - find an already-registered vio_dev
+ * @vnode: device_node of the virtual device we're looking for
+ */
+struct vio_dev *vio_find_node(struct device_node *vnode)
+{
+       uint32_t *unit_address;
+       char kobj_name[BUS_ID_SIZE];
+
+       /* construct the kobject name from the device node */
+       unit_address = (uint32_t *)get_property(vnode, "reg", NULL);
+       if (!unit_address)
+               return NULL;
+       snprintf(kobj_name, BUS_ID_SIZE, "%x", *unit_address);
+
+       return vio_find_name(kobj_name);
+}
+EXPORT_SYMBOL(vio_find_node);
+
+int vio_enable_interrupts(struct vio_dev *dev)
+{
+       int rc = h_vio_signal(dev->unit_address, VIO_IRQ_ENABLE);
+       if (rc != H_Success)
+               printk(KERN_ERR "vio: Error 0x%x enabling interrupts\n", rc);
+       return rc;
+}
+EXPORT_SYMBOL(vio_enable_interrupts);
+
+int vio_disable_interrupts(struct vio_dev *dev)
+{
+       int rc = h_vio_signal(dev->unit_address, VIO_IRQ_DISABLE);
+       if (rc != H_Success)
+               printk(KERN_ERR "vio: Error 0x%x disabling interrupts\n", rc);
+       return rc;
+}
+EXPORT_SYMBOL(vio_disable_interrupts);
diff --git a/arch/ppc64/kernel/pacaData.c b/arch/ppc64/kernel/pacaData.c

index 6316188737b6eea0861e10cfa0041997456061c5..6182a2cd90a50e171b1812cef88ab76eaf843a55 100644 (file)
--- a/arch/ppc64/kernel/pacaData.c
+++ b/arch/ppc64/kernel/pacaData.c
@@ -78,7 +78,7 @@ extern unsigned long __toc_start;
  
  #define BOOTCPU_PACA_INIT(number)                                          \
  {                                                                          \
-       PACA_INIT_COMMON(number, 1, 0, STAB0_VIRT_ADDR)                     \
+       PACA_INIT_COMMON(number, 1, 0, (u64)&initial_stab)                  \
         PACA_INIT_ISERIES(number)                                           \
  }
  
@@ -90,7 +90,7 @@ extern unsigned long __toc_start;
  
  #define BOOTCPU_PACA_INIT(number)                                          \
  {                                                                          \
-       PACA_INIT_COMMON(number, 1, STAB0_PHYS_ADDR, STAB0_VIRT_ADDR)       \
+       PACA_INIT_COMMON(number, 1, STAB0_PHYS_ADDR, (u64)&initial_stab)    \
  }
  #endif
  
diff --git a/arch/ppc64/kernel/pci.c b/arch/ppc64/kernel/pci.c

index ae6f579d3fa0f0f41cffa6e5c78c439ac430723b..d0d55c7908efe5a1c2bc5c584e4f9a91873b1c2f 100644 (file)
--- a/arch/ppc64/kernel/pci.c
+++ b/arch/ppc64/kernel/pci.c
@@ -108,8 +108,28 @@ void  pcibios_resource_to_bus(struct pci_dev *dev, struct pci_bus_region *region
         region->end = res->end - offset;
  }
  
+void pcibios_bus_to_resource(struct pci_dev *dev, struct resource *res,
+                             struct pci_bus_region *region)
+{
+       unsigned long offset = 0;
+       struct pci_controller *hose = pci_bus_to_host(dev->bus);
+
+       if (!hose)
+               return;
+
+       if (res->flags & IORESOURCE_IO)
+               offset = (unsigned long)hose->io_base_virt - pci_io_base;
+
+       if (res->flags & IORESOURCE_MEM)
+               offset = hose->pci_mem_offset;
+
+       res->start = region->start + offset;
+       res->end = region->end + offset;
+}
+
  #ifdef CONFIG_HOTPLUG
  EXPORT_SYMBOL(pcibios_resource_to_bus);
+EXPORT_SYMBOL(pcibios_bus_to_resource);
  #endif
  
  /*
diff --git a/arch/ppc64/kernel/pmac_setup.c b/arch/ppc64/kernel/pmac_setup.c

index e40877fa67cd0efd266a78d1446f10bded9efa7e..8ff86a766cdf7d3595b5aab03224150cacc992c4 100644 (file)
--- a/arch/ppc64/kernel/pmac_setup.c
+++ b/arch/ppc64/kernel/pmac_setup.c
@@ -71,6 +71,7 @@
  #include <asm/of_device.h>
  #include <asm/lmb.h>
  #include <asm/smu.h>
+#include <asm/pmc.h>
  
  #include "pmac.h"
  #include "mpic.h"
@@ -511,4 +512,5 @@ struct machdep_calls __initdata pmac_md = {
         .progress               = pmac_progress,
         .check_legacy_ioport    = pmac_check_legacy_ioport,
         .idle_loop              = native_idle,
+       .enable_pmcs            = power4_enable_pmcs,
  };
diff --git a/arch/ppc64/kernel/pmc.c b/arch/ppc64/kernel/pmc.c

index 67be773f9c00d25d8df4ee6ed6c42870a57706f8..cdfec7438d0132334f17b65ec539fa919c997b91 100644 (file)
--- a/arch/ppc64/kernel/pmc.c
+++ b/arch/ppc64/kernel/pmc.c
@@ -65,3 +65,24 @@ void release_pmc_hardware(void)
         spin_unlock(&pmc_owner_lock);
  }
  EXPORT_SYMBOL_GPL(release_pmc_hardware);
+
+void power4_enable_pmcs(void)
+{
+       unsigned long hid0;
+
+       hid0 = mfspr(HID0);
+       hid0 |= 1UL << (63 - 20);
+
+       /* POWER4 requires the following sequence */
+       asm volatile(
+               "sync\n"
+               "mtspr     %1, %0\n"
+               "mfspr     %0, %1\n"
+               "mfspr     %0, %1\n"
+               "mfspr     %0, %1\n"
+               "mfspr     %0, %1\n"
+               "mfspr     %0, %1\n"
+               "mfspr     %0, %1\n"
+               "isync" : "=&r" (hid0) : "i" (HID0), "0" (hid0):
+               "memory");
+}
diff --git a/arch/ppc64/kernel/process.c b/arch/ppc64/kernel/process.c

index f7cae05e40fb2cbfcfb255998ec938c6eee72676..7a7e027653ad922dac160da11e545401696148ed 100644 (file)
--- a/arch/ppc64/kernel/process.c
+++ b/arch/ppc64/kernel/process.c
@@ -50,6 +50,7 @@
  #include <asm/machdep.h>
  #include <asm/iSeries/HvCallHpt.h>
  #include <asm/cputable.h>
+#include <asm/firmware.h>
  #include <asm/sections.h>
  #include <asm/tlbflush.h>
  #include <asm/time.h>
@@ -202,11 +203,10 @@ struct task_struct *__switch_to(struct task_struct *prev,
         new_thread = &new->thread;
         old_thread = &current->thread;
  
-/* Collect purr utilization data per process and per processor wise */
-/* purr is nothing but processor time base                          */
-
-#if defined(CONFIG_PPC_PSERIES)
-       if (cur_cpu_spec->firmware_features & FW_FEATURE_SPLPAR) {
+       /* Collect purr utilization data per process and per processor
+        * wise purr is nothing but processor time base
+        */
+       if (firmware_has_feature(FW_FEATURE_SPLPAR)) {
                 struct cpu_usage *cu = &__get_cpu_var(cpu_usage_array);
                 long unsigned start_tb, current_tb;
                 start_tb = old_thread->start_tb;
@@ -214,8 +214,6 @@ struct task_struct *__switch_to(struct task_struct *prev,
                 old_thread->accum_tb += (current_tb - start_tb);
                 new_thread->start_tb = current_tb;
         }
-#endif
-
  
         local_irq_save(flags);
         last = _switch(old_thread, new_thread);
diff --git a/arch/ppc64/kernel/prom.c b/arch/ppc64/kernel/prom.c

index 47727a6f734623c89a14d5b0d61593bdcacafa74..b21848826791e224a7bf9bd737de5c0feba74ebe 100644 (file)
--- a/arch/ppc64/kernel/prom.c
+++ b/arch/ppc64/kernel/prom.c
@@ -625,8 +625,8 @@ void __init finish_device_tree(void)
  
  static inline char *find_flat_dt_string(u32 offset)
  {
-       return ((char *)initial_boot_params) + initial_boot_params->off_dt_strings
-               + offset;
+       return ((char *)initial_boot_params) +
+               initial_boot_params->off_dt_strings + offset;
  }
  
  /**
@@ -635,26 +635,33 @@ static inline char *find_flat_dt_string(u32 offset)
   * unflatten the tree
   */
  static int __init scan_flat_dt(int (*it)(unsigned long node,
-                                        const char *full_path, void *data),
+                                        const char *uname, int depth,
+                                        void *data),
                                void *data)
  {
         unsigned long p = ((unsigned long)initial_boot_params) +
                 initial_boot_params->off_dt_struct;
         int rc = 0;
+       int depth = -1;
  
         do {
                 u32 tag = *((u32 *)p);
                 char *pathp;
                 
                 p += 4;
-               if (tag == OF_DT_END_NODE)
+               if (tag == OF_DT_END_NODE) {
+                       depth --;
+                       continue;
+               }
+               if (tag == OF_DT_NOP)
                         continue;
                 if (tag == OF_DT_END)
                         break;
                 if (tag == OF_DT_PROP) {
                         u32 sz = *((u32 *)p);
                         p += 8;
-                       p = _ALIGN(p, sz >= 8 ? 8 : 4);
+                       if (initial_boot_params->version < 0x10)
+                               p = _ALIGN(p, sz >= 8 ? 8 : 4);
                         p += sz;
                         p = _ALIGN(p, 4);
                         continue;
@@ -664,9 +671,18 @@ static int __init scan_flat_dt(int (*it)(unsigned long node,
                                " device tree !\n", tag);
                         return -EINVAL;
                 }
+               depth++;
                 pathp = (char *)p;
                 p = _ALIGN(p + strlen(pathp) + 1, 4);
-               rc = it(p, pathp, data);
+               if ((*pathp) == '/') {
+                       char *lp, *np;
+                       for (lp = NULL, np = pathp; *np; np++)
+                               if ((*np) == '/')
+                                       lp = np+1;
+                       if (lp != NULL)
+                               pathp = lp;
+               }
+               rc = it(p, pathp, depth, data);
                 if (rc != 0)
                         break;          
         } while(1);
@@ -689,17 +705,21 @@ static void* __init get_flat_dt_prop(unsigned long node, const char *name,
                 const char *nstr;
  
                 p += 4;
+               if (tag == OF_DT_NOP)
+                       continue;
                 if (tag != OF_DT_PROP)
                         return NULL;
  
                 sz = *((u32 *)p);
                 noff = *((u32 *)(p + 4));
                 p += 8;
-               p = _ALIGN(p, sz >= 8 ? 8 : 4);
+               if (initial_boot_params->version < 0x10)
+                       p = _ALIGN(p, sz >= 8 ? 8 : 4);
  
                 nstr = find_flat_dt_string(noff);
                 if (nstr == NULL) {
-                       printk(KERN_WARNING "Can't find property index name !\n");
+                       printk(KERN_WARNING "Can't find property index"
+                              " name !\n");
                         return NULL;
                 }
                 if (strcmp(name, nstr) == 0) {
@@ -713,7 +733,7 @@ static void* __init get_flat_dt_prop(unsigned long node, const char *name,
  }
  
  static void *__init unflatten_dt_alloc(unsigned long *mem, unsigned long size,
-                                              unsigned long align)
+                                      unsigned long align)
  {
         void *res;
  
@@ -727,13 +747,16 @@ static void *__init unflatten_dt_alloc(unsigned long *mem, unsigned long size,
  static unsigned long __init unflatten_dt_node(unsigned long mem,
                                               unsigned long *p,
                                               struct device_node *dad,
-                                             struct device_node ***allnextpp)
+                                             struct device_node ***allnextpp,
+                                             unsigned long fpsize)
  {
         struct device_node *np;
         struct property *pp, **prev_pp = NULL;
         char *pathp;
         u32 tag;
-       unsigned int l;
+       unsigned int l, allocl;
+       int has_name = 0;
+       int new_format = 0;
  
         tag = *((u32 *)(*p));
         if (tag != OF_DT_BEGIN_NODE) {
@@ -742,21 +765,62 @@ static unsigned long __init unflatten_dt_node(unsigned long mem,
         }
         *p += 4;
         pathp = (char *)*p;
-       l = strlen(pathp) + 1;
+       l = allocl = strlen(pathp) + 1;
         *p = _ALIGN(*p + l, 4);
  
-       np = unflatten_dt_alloc(&mem, sizeof(struct device_node) + l,
+       /* version 0x10 has a more compact unit name here instead of the full
+        * path. we accumulate the full path size using "fpsize", we'll rebuild
+        * it later. We detect this because the first character of the name is
+        * not '/'.
+        */
+       if ((*pathp) != '/') {
+               new_format = 1;
+               if (fpsize == 0) {
+                       /* root node: special case. fpsize accounts for path
+                        * plus terminating zero. root node only has '/', so
+                        * fpsize should be 2, but we want to avoid the first
+                        * level nodes to have two '/' so we use fpsize 1 here
+                        */
+                       fpsize = 1;
+                       allocl = 2;
+               } else {
+                       /* account for '/' and path size minus terminal 0
+                        * already in 'l'
+                        */
+                       fpsize += l;
+                       allocl = fpsize;
+               }
+       }
+
+
+       np = unflatten_dt_alloc(&mem, sizeof(struct device_node) + allocl,
                                 __alignof__(struct device_node));
         if (allnextpp) {
                 memset(np, 0, sizeof(*np));
                 np->full_name = ((char*)np) + sizeof(struct device_node);
-               memcpy(np->full_name, pathp, l);
+               if (new_format) {
+                       char *p = np->full_name;
+                       /* rebuild full path for new format */
+                       if (dad && dad->parent) {
+                               strcpy(p, dad->full_name);
+#ifdef DEBUG
+                               if ((strlen(p) + l + 1) != allocl) {
+                                       DBG("%s: p: %d, l: %d, a: %d\n",
+                                           pathp, strlen(p), l, allocl);
+                               }
+#endif
+                               p += strlen(p);
+                       }
+                       *(p++) = '/';
+                       memcpy(p, pathp, l);
+               } else
+                       memcpy(np->full_name, pathp, l);
                 prev_pp = &np->properties;
                 **allnextpp = np;
                 *allnextpp = &np->allnext;
                 if (dad != NULL) {
                         np->parent = dad;
-                       /* we temporarily use the `next' field as `last_child'. */
+                       /* we temporarily use the next field as `last_child'*/
                         if (dad->next == 0)
                                 dad->child = np;
                         else
@@ -770,18 +834,26 @@ static unsigned long __init unflatten_dt_node(unsigned long mem,
                 char *pname;
  
                 tag = *((u32 *)(*p));
+               if (tag == OF_DT_NOP) {
+                       *p += 4;
+                       continue;
+               }
                 if (tag != OF_DT_PROP)
                         break;
                 *p += 4;
                 sz = *((u32 *)(*p));
                 noff = *((u32 *)((*p) + 4));
-               *p = _ALIGN((*p) + 8, sz >= 8 ? 8 : 4);
+               *p += 8;
+               if (initial_boot_params->version < 0x10)
+                       *p = _ALIGN(*p, sz >= 8 ? 8 : 4);
  
                 pname = find_flat_dt_string(noff);
                 if (pname == NULL) {
                         printk("Can't find property name in list !\n");
                         break;
                 }
+               if (strcmp(pname, "name") == 0)
+                       has_name = 1;
                 l = strlen(pname) + 1;
                 pp = unflatten_dt_alloc(&mem, sizeof(struct property),
                                         __alignof__(struct property));
@@ -801,6 +873,36 @@ static unsigned long __init unflatten_dt_node(unsigned long mem,
                 }
                 *p = _ALIGN((*p) + sz, 4);
         }
+       /* with version 0x10 we may not have the name property, recreate
+        * it here from the unit name if absent
+        */
+       if (!has_name) {
+               char *p = pathp, *ps = pathp, *pa = NULL;
+               int sz;
+
+               while (*p) {
+                       if ((*p) == '@')
+                               pa = p;
+                       if ((*p) == '/')
+                               ps = p + 1;
+                       p++;
+               }
+               if (pa < ps)
+                       pa = p;
+               sz = (pa - ps) + 1;
+               pp = unflatten_dt_alloc(&mem, sizeof(struct property) + sz,
+                                       __alignof__(struct property));
+               if (allnextpp) {
+                       pp->name = "name";
+                       pp->length = sz;
+                       pp->value = (unsigned char *)(pp + 1);
+                       *prev_pp = pp;
+                       prev_pp = &pp->next;
+                       memcpy(pp->value, ps, sz - 1);
+                       ((char *)pp->value)[sz - 1] = 0;
+                       DBG("fixed up name for %s -> %s\n", pathp, pp->value);
+               }
+       }
         if (allnextpp) {
                 *prev_pp = NULL;
                 np->name = get_property(np, "name", NULL);
@@ -812,11 +914,11 @@ static unsigned long __init unflatten_dt_node(unsigned long mem,
                         np->type = "<NULL>";
         }
         while (tag == OF_DT_BEGIN_NODE) {
-               mem = unflatten_dt_node(mem, p, np, allnextpp);
+               mem = unflatten_dt_node(mem, p, np, allnextpp, fpsize);
                 tag = *((u32 *)(*p));
         }
         if (tag != OF_DT_END_NODE) {
-               printk("Weird tag at start of node: %x\n", tag);
+               printk("Weird tag at end of node: %x\n", tag);
                 return mem;
         }
         *p += 4;
@@ -842,21 +944,32 @@ void __init unflatten_device_tree(void)
         /* First pass, scan for size */
         start = ((unsigned long)initial_boot_params) +
                 initial_boot_params->off_dt_struct;
-       size = unflatten_dt_node(0, &start, NULL, NULL);
+       size = unflatten_dt_node(0, &start, NULL, NULL, 0);
+       size = (size | 3) + 1;
  
         DBG("  size is %lx, allocating...\n", size);
  
         /* Allocate memory for the expanded device tree */
-       mem = (unsigned long)abs_to_virt(lmb_alloc(size,
-                                                  __alignof__(struct device_node)));
+       mem = lmb_alloc(size + 4, __alignof__(struct device_node));
+       if (!mem) {
+               DBG("Couldn't allocate memory with lmb_alloc()!\n");
+               panic("Couldn't allocate memory with lmb_alloc()!\n");
+       }
+       mem = (unsigned long)abs_to_virt(mem);
+
+       ((u32 *)mem)[size / 4] = 0xdeadbeef;
+
         DBG("  unflattening...\n", mem);
  
         /* Second pass, do actual unflattening */
         start = ((unsigned long)initial_boot_params) +
                 initial_boot_params->off_dt_struct;
-       unflatten_dt_node(mem, &start, NULL, &allnextp);
+       unflatten_dt_node(mem, &start, NULL, &allnextp, 0);
         if (*((u32 *)start) != OF_DT_END)
-               printk(KERN_WARNING "Weird tag at end of tree: %x\n", *((u32 *)start));
+               printk(KERN_WARNING "Weird tag at end of tree: %08x\n", *((u32 *)start));
+       if (((u32 *)mem)[size / 4] != 0xdeadbeef)
+               printk(KERN_WARNING "End of tree marker overwritten: %08x\n",
+                      ((u32 *)mem)[size / 4] );
         *allnextp = NULL;
  
         /* Get pointer to OF "/chosen" node for use everywhere */
@@ -880,7 +993,7 @@ void __init unflatten_device_tree(void)
  
  
  static int __init early_init_dt_scan_cpus(unsigned long node,
-                                         const char *full_path, void *data)
+                                         const char *uname, int depth, void *data)
  {
         char *type = get_flat_dt_prop(node, "device_type", NULL);
         u32 *prop;
@@ -916,6 +1029,7 @@ static int __init early_init_dt_scan_cpus(unsigned long node,
                 }
         }
  
+#ifdef CONFIG_ALTIVEC
         /* Check if we have a VMX and eventually update CPU features */
         prop = (u32 *)get_flat_dt_prop(node, "ibm,vmx", NULL);
         if (prop && (*prop) > 0) {
@@ -929,6 +1043,7 @@ static int __init early_init_dt_scan_cpus(unsigned long node,
                 cur_cpu_spec->cpu_features |= CPU_FTR_ALTIVEC;
                 cur_cpu_spec->cpu_user_features |= PPC_FEATURE_HAS_ALTIVEC;
         }
+#endif /* CONFIG_ALTIVEC */
  
         /*
          * Check for an SMT capable CPU and set the CPU feature. We do
@@ -945,13 +1060,15 @@ static int __init early_init_dt_scan_cpus(unsigned long node,
  }
  
  static int __init early_init_dt_scan_chosen(unsigned long node,
-                                           const char *full_path, void *data)
+                                           const char *uname, int depth, void *data)
  {
         u32 *prop;
         u64 *prop64;
         extern unsigned long memory_limit, tce_alloc_start, tce_alloc_end;
  
-       if (strcmp(full_path, "/chosen") != 0)
+       DBG("search \"chosen\", depth: %d, uname: %s\n", depth, uname);
+
+       if (depth != 1 || strcmp(uname, "chosen") != 0)
                 return 0;
  
         /* get platform type */
@@ -1001,18 +1118,20 @@ static int __init early_init_dt_scan_chosen(unsigned long node,
  }
  
  static int __init early_init_dt_scan_root(unsigned long node,
-                                         const char *full_path, void *data)
+                                         const char *uname, int depth, void *data)
  {
         u32 *prop;
  
-       if (strcmp(full_path, "/") != 0)
+       if (depth != 0)
                 return 0;
  
         prop = (u32 *)get_flat_dt_prop(node, "#size-cells", NULL);
         dt_root_size_cells = (prop == NULL) ? 1 : *prop;
-               
+       DBG("dt_root_size_cells = %x\n", dt_root_size_cells);
+
         prop = (u32 *)get_flat_dt_prop(node, "#address-cells", NULL);
         dt_root_addr_cells = (prop == NULL) ? 2 : *prop;
+       DBG("dt_root_addr_cells = %x\n", dt_root_addr_cells);
         
         /* break now */
         return 1;
@@ -1040,7 +1159,7 @@ static unsigned long __init dt_mem_next_cell(int s, cell_t **cellp)
  
  
  static int __init early_init_dt_scan_memory(unsigned long node,
-                                           const char *full_path, void *data)
+                                           const char *uname, int depth, void *data)
  {
         char *type = get_flat_dt_prop(node, "device_type", NULL);
         cell_t *reg, *endp;
@@ -1056,7 +1175,9 @@ static int __init early_init_dt_scan_memory(unsigned long node,
  
         endp = reg + (l / sizeof(cell_t));
  
-       DBG("memory scan node %s ...\n", full_path);
+       DBG("memory scan node %s ..., reg size %ld, data: %x %x %x %x, ...\n",
+           uname, l, reg[0], reg[1], reg[2], reg[3]);
+
         while ((endp - reg) >= (dt_root_addr_cells + dt_root_size_cells)) {
                 unsigned long base, size;
  
@@ -1467,10 +1588,11 @@ struct device_node *of_find_node_by_path(const char *path)
         struct device_node *np = allnodes;
  
         read_lock(&devtree_lock);
-       for (; np != 0; np = np->allnext)
+       for (; np != 0; np = np->allnext) {
                 if (np->full_name != 0 && strcasecmp(np->full_name, path) == 0
                     && of_node_get(np))
                         break;
+       }
         read_unlock(&devtree_lock);
         return np;
  }
diff --git a/arch/ppc64/kernel/prom_init.c b/arch/ppc64/kernel/prom_init.c

index e248a7950aebdebb5a7ead74b5482ad18d7c30e1..122283a1d39a79d2cb988b51128bec5e582cccdd 100644 (file)
--- a/arch/ppc64/kernel/prom_init.c
+++ b/arch/ppc64/kernel/prom_init.c
@@ -892,7 +892,10 @@ static void __init prom_init_mem(void)
         if ( RELOC(of_platform) == PLATFORM_PSERIES_LPAR )
                 RELOC(alloc_top) = RELOC(rmo_top);
         else
-               RELOC(alloc_top) = RELOC(rmo_top) = min(0x40000000ul, RELOC(ram_top));
+               /* Some RS64 machines have buggy firmware where claims up at 1GB
+                * fails. Cap at 768MB as a workaround. Still plenty of room.
+                */
+               RELOC(alloc_top) = RELOC(rmo_top) = min(0x30000000ul, RELOC(ram_top));
  
         prom_printf("memory layout at init:\n");
         prom_printf("  memory_limit : %x (16 MB aligned)\n", RELOC(prom_memory_limit));
@@ -1534,7 +1537,8 @@ static unsigned long __init dt_find_string(char *str)
   */
  #define MAX_PROPERTY_NAME 64
  
-static void __init scan_dt_build_strings(phandle node, unsigned long *mem_start,
+static void __init scan_dt_build_strings(phandle node,
+                                        unsigned long *mem_start,
                                          unsigned long *mem_end)
  {
         unsigned long offset = reloc_offset();
@@ -1547,16 +1551,21 @@ static void __init scan_dt_build_strings(phandle node, unsigned long *mem_start,
         /* get and store all property names */
         prev_name = RELOC("");
         for (;;) {
-               int rc;
-
                 /* 64 is max len of name including nul. */
                 namep = make_room(mem_start, mem_end, MAX_PROPERTY_NAME, 1);
-               rc = call_prom("nextprop", 3, 1, node, prev_name, namep);
-               if (rc != 1) {
+               if (call_prom("nextprop", 3, 1, node, prev_name, namep) != 1) {
                         /* No more nodes: unwind alloc */
                         *mem_start = (unsigned long)namep;
                         break;
                 }
+
+               /* skip "name" */
+               if (strcmp(namep, RELOC("name")) == 0) {
+                       *mem_start = (unsigned long)namep;
+                       prev_name = RELOC("name");
+                       continue;
+               }
+               /* get/create string entry */
                 soff = dt_find_string(namep);
                 if (soff != 0) {
                         *mem_start = (unsigned long)namep;
@@ -1571,7 +1580,7 @@ static void __init scan_dt_build_strings(phandle node, unsigned long *mem_start,
  
         /* do all our children */
         child = call_prom("child", 1, 1, node);
-       while (child != (phandle)0) {
+       while (child != 0) {
                 scan_dt_build_strings(child, mem_start, mem_end);
                 child = call_prom("peer", 1, 1, child);
         }
@@ -1580,16 +1589,13 @@ static void __init scan_dt_build_strings(phandle node, unsigned long *mem_start,
  static void __init scan_dt_build_struct(phandle node, unsigned long *mem_start,
                                         unsigned long *mem_end)
  {
-       int l, align;
         phandle child;
-       char *namep, *prev_name, *sstart, *p, *ep;
+       char *namep, *prev_name, *sstart, *p, *ep, *lp, *path;
         unsigned long soff;
         unsigned char *valp;
         unsigned long offset = reloc_offset();
-       char pname[MAX_PROPERTY_NAME];
-       char *path;
-
-       path = RELOC(prom_scratch);
+       static char pname[MAX_PROPERTY_NAME];
+       int l;
  
         dt_push_token(OF_DT_BEGIN_NODE, mem_start, mem_end);
  
@@ -1599,23 +1605,33 @@ static void __init scan_dt_build_struct(phandle node, unsigned long *mem_start,
                       namep, *mem_end - *mem_start);
         if (l >= 0) {
                 /* Didn't fit?  Get more room. */
-               if (l+1 > *mem_end - *mem_start) {
+               if ((l+1) > (*mem_end - *mem_start)) {
                         namep = make_room(mem_start, mem_end, l+1, 1);
                         call_prom("package-to-path", 3, 1, node, namep, l);
                 }
                 namep[l] = '\0';
+
                 /* Fixup an Apple bug where they have bogus \0 chars in the
                  * middle of the path in some properties
                  */
                 for (p = namep, ep = namep + l; p < ep; p++)
                         if (*p == '\0') {
                                 memmove(p, p+1, ep - p);
-                               ep--; l--;
+                               ep--; l--; p--;
                         }
-               *mem_start = _ALIGN(((unsigned long) namep) + strlen(namep) + 1, 4);
+
+               /* now try to extract the unit name in that mess */
+               for (p = namep, lp = NULL; *p; p++)
+                       if (*p == '/')
+                               lp = p + 1;
+               if (lp != NULL)
+                       memmove(namep, lp, strlen(lp) + 1);
+               *mem_start = _ALIGN(((unsigned long) namep) +
+                                   strlen(namep) + 1, 4);
         }
  
         /* get it again for debugging */
+       path = RELOC(prom_scratch);
         memset(path, 0, PROM_SCRATCH_SIZE);
         call_prom("package-to-path", 3, 1, node, path, PROM_SCRATCH_SIZE-1);
  
@@ -1623,23 +1639,27 @@ static void __init scan_dt_build_struct(phandle node, unsigned long *mem_start,
         prev_name = RELOC("");
         sstart = (char *)RELOC(dt_string_start);
         for (;;) {
-               int rc;
-
-               rc = call_prom("nextprop", 3, 1, node, prev_name, pname);
-               if (rc != 1)
+               if (call_prom("nextprop", 3, 1, node, prev_name,
+                             RELOC(pname)) != 1)
                         break;
  
+               /* skip "name" */
+               if (strcmp(RELOC(pname), RELOC("name")) == 0) {
+                       prev_name = RELOC("name");
+                       continue;
+               }
+
                 /* find string offset */
-               soff = dt_find_string(pname);
+               soff = dt_find_string(RELOC(pname));
                 if (soff == 0) {
-                       prom_printf("WARNING: Can't find string index for <%s>, node %s\n",
-                                   pname, path);
+                       prom_printf("WARNING: Can't find string index for"
+                                   " <%s>, node %s\n", RELOC(pname), path);
                         break;
                 }
                 prev_name = sstart + soff;
  
                 /* get length */
-               l = call_prom("getproplen", 2, 1, node, pname);
+               l = call_prom("getproplen", 2, 1, node, RELOC(pname));
  
                 /* sanity checks */
                 if (l == PROM_ERROR)
@@ -1648,7 +1668,7 @@ static void __init scan_dt_build_struct(phandle node, unsigned long *mem_start,
                         prom_printf("WARNING: ignoring large property ");
                         /* It seems OF doesn't null-terminate the path :-( */
                         prom_printf("[%s] ", path);
-                       prom_printf("%s length 0x%x\n", pname, l);
+                       prom_printf("%s length 0x%x\n", RELOC(pname), l);
                         continue;
                 }
  
@@ -1658,17 +1678,16 @@ static void __init scan_dt_build_struct(phandle node, unsigned long *mem_start,
                 dt_push_token(soff, mem_start, mem_end);
  
                 /* push property content */
-               align = (l >= 8) ? 8 : 4;
-               valp = make_room(mem_start, mem_end, l, align);
-               call_prom("getprop", 4, 1, node, pname, valp, l);
+               valp = make_room(mem_start, mem_end, l, 4);
+               call_prom("getprop", 4, 1, node, RELOC(pname), valp, l);
                 *mem_start = _ALIGN(*mem_start, 4);
         }
  
         /* Add a "linux,phandle" property. */
         soff = dt_find_string(RELOC("linux,phandle"));
         if (soff == 0)
-               prom_printf("WARNING: Can't find string index for <linux-phandle>"
-                           " node %s\n", path);
+               prom_printf("WARNING: Can't find string index for"
+                           " <linux-phandle> node %s\n", path);
         else {
                 dt_push_token(OF_DT_PROP, mem_start, mem_end);
                 dt_push_token(4, mem_start, mem_end);
@@ -1679,7 +1698,7 @@ static void __init scan_dt_build_struct(phandle node, unsigned long *mem_start,
  
         /* do all our children */
         child = call_prom("child", 1, 1, node);
-       while (child != (phandle)0) {
+       while (child != 0) {
                 scan_dt_build_struct(child, mem_start, mem_end);
                 child = call_prom("peer", 1, 1, child);
         }
@@ -1718,7 +1737,8 @@ static void __init flatten_device_tree(void)
  
         /* Build header and make room for mem rsv map */ 
         mem_start = _ALIGN(mem_start, 4);
-       hdr = make_room(&mem_start, &mem_end, sizeof(struct boot_param_header), 4);
+       hdr = make_room(&mem_start, &mem_end,
+                       sizeof(struct boot_param_header), 4);
         RELOC(dt_header_start) = (unsigned long)hdr;
         rsvmap = make_room(&mem_start, &mem_end, sizeof(mem_reserve_map), 8);
  
@@ -1731,11 +1751,11 @@ static void __init flatten_device_tree(void)
         namep = make_room(&mem_start, &mem_end, 16, 1);
         strcpy(namep, RELOC("linux,phandle"));
         mem_start = (unsigned long)namep + strlen(namep) + 1;
-       RELOC(dt_string_end) = mem_start;
  
         /* Build string array */
         prom_printf("Building dt strings...\n"); 
         scan_dt_build_strings(root, &mem_start, &mem_end);
+       RELOC(dt_string_end) = mem_start;
  
         /* Build structure */
         mem_start = PAGE_ALIGN(mem_start);
@@ -1750,9 +1770,11 @@ static void __init flatten_device_tree(void)
         hdr->totalsize = RELOC(dt_struct_end) - RELOC(dt_header_start);
         hdr->off_dt_struct = RELOC(dt_struct_start) - RELOC(dt_header_start);
         hdr->off_dt_strings = RELOC(dt_string_start) - RELOC(dt_header_start);
+       hdr->dt_strings_size = RELOC(dt_string_end) - RELOC(dt_string_start);
         hdr->off_mem_rsvmap = ((unsigned long)rsvmap) - RELOC(dt_header_start);
         hdr->version = OF_DT_VERSION;
-       hdr->last_comp_version = 1;
+       /* Version 16 is not backward compatible */
+       hdr->last_comp_version = 0x10;
  
         /* Reserve the whole thing and copy the reserve map in, we
          * also bump mem_reserve_cnt to cause further reservations to
@@ -1803,11 +1825,14 @@ static void __init fixup_device_tree(void)
         if (prom_getprop(u3, "device-rev", &u3_rev, sizeof(u3_rev))
             == PROM_ERROR)
                 return;
-       if (u3_rev != 0x35)
+       if (u3_rev != 0x35 && u3_rev != 0x37)
                 return;
         /* does it need fixup ? */
         if (prom_getproplen(i2c, "interrupts") > 0)
                 return;
+
+       prom_printf("fixing up bogus interrupts for u3 i2c...\n");
+
         /* interrupt on this revision of u3 is number 0 and level */
         interrupts[0] = 0;
         interrupts[1] = 1;
diff --git a/arch/ppc64/kernel/rtas_pci.c b/arch/ppc64/kernel/rtas_pci.c

index 1048817befb84a6614a4081396a3db08f333e7fe..1dccadaddd1d094551f0528fb2c747d2afac8773 100644 (file)
--- a/arch/ppc64/kernel/rtas_pci.c
+++ b/arch/ppc64/kernel/rtas_pci.c
@@ -58,6 +58,21 @@ static int config_access_valid(struct device_node *dn, int where)
         return 0;
  }
  
+static int of_device_available(struct device_node * dn)
+{
+        char * status;
+
+        status = get_property(dn, "status", NULL);
+
+        if (!status)
+                return 1;
+
+        if (!strcmp(status, "okay"))
+                return 1;
+
+        return 0;
+}
+
  static int rtas_read_config(struct device_node *dn, int where, int size, u32 *val)
  {
         int returnval = -1;
@@ -103,7 +118,7 @@ static int rtas_pci_read_config(struct pci_bus *bus,
  
         /* Search only direct children of the bus */
         for (dn = busdn->child; dn; dn = dn->sibling)
-               if (dn->devfn == devfn)
+               if (dn->devfn == devfn && of_device_available(dn))
                         return rtas_read_config(dn, where, size, val);
         return PCIBIOS_DEVICE_NOT_FOUND;
  }
@@ -146,7 +161,7 @@ static int rtas_pci_write_config(struct pci_bus *bus,
  
         /* Search only direct children of the bus */
         for (dn = busdn->child; dn; dn = dn->sibling)
-               if (dn->devfn == devfn)
+               if (dn->devfn == devfn && of_device_available(dn))
                         return rtas_write_config(dn, where, size, val);
         return PCIBIOS_DEVICE_NOT_FOUND;
  }
diff --git a/arch/ppc64/kernel/setup.c b/arch/ppc64/kernel/setup.c

index 687e8559520839554eaf4d467d78a8d373eb2376..ee3b20de2e7a3f5c701dc7bf5af50e5999ed8b78 100644 (file)
--- a/arch/ppc64/kernel/setup.c
+++ b/arch/ppc64/kernel/setup.c
@@ -536,15 +536,19 @@ static void __init check_for_initrd(void)
  
         DBG(" -> check_for_initrd()\n");
  
-       prop = (u64 *)get_property(of_chosen, "linux,initrd-start", NULL);
-       if (prop != NULL) {
-               initrd_start = (unsigned long)__va(*prop);
-               prop = (u64 *)get_property(of_chosen, "linux,initrd-end", NULL);
+       if (of_chosen) {
+               prop = (u64 *)get_property(of_chosen,
+                               "linux,initrd-start", NULL);
                 if (prop != NULL) {
-                       initrd_end = (unsigned long)__va(*prop);
-                       initrd_below_start_ok = 1;
-               } else
-                       initrd_start = 0;
+                       initrd_start = (unsigned long)__va(*prop);
+                       prop = (u64 *)get_property(of_chosen,
+                                       "linux,initrd-end", NULL);
+                       if (prop != NULL) {
+                               initrd_end = (unsigned long)__va(*prop);
+                               initrd_below_start_ok = 1;
+                       } else
+                               initrd_start = 0;
+               }
         }
  
         /* If we were passed an initrd, set the ROOT_DEV properly if the values
@@ -627,7 +631,7 @@ void __init setup_system(void)
          * Initialize xmon
          */
  #ifdef CONFIG_XMON_DEFAULT
-       xmon_init();
+       xmon_init(1);
  #endif
         /*
          * Register early console
@@ -706,6 +710,8 @@ void machine_power_off(void)
         local_irq_disable();
         while (1) ;
  }
+/* Used by the G5 thermal driver */
+EXPORT_SYMBOL_GPL(machine_power_off);
  
  void machine_halt(void)
  {
@@ -1341,11 +1347,13 @@ static int __init early_xmon(char *p)
         /* ensure xmon is enabled */
         if (p) {
                 if (strncmp(p, "on", 2) == 0)
-                       xmon_init();
+                       xmon_init(1);
+               if (strncmp(p, "off", 3) == 0)
+                       xmon_init(0);
                 if (strncmp(p, "early", 5) != 0)
                         return 0;
         }
-       xmon_init();
+       xmon_init(1);
         debugger(NULL);
  
         return 0;
diff --git a/arch/ppc64/kernel/signal.c b/arch/ppc64/kernel/signal.c

index bf782276984c69147411c7aa5f513ab4cc545cda..49a79a55c32de7d5ab73191dbe78047a6dcf848e 100644 (file)
--- a/arch/ppc64/kernel/signal.c
+++ b/arch/ppc64/kernel/signal.c
@@ -481,10 +481,11 @@ static int handle_signal(unsigned long sig, struct k_sigaction *ka,
         /* Set up Signal Frame */
         ret = setup_rt_frame(sig, ka, info, oldset, regs);
  
-       if (ret && !(ka->sa.sa_flags & SA_NODEFER)) {
+       if (ret) {
                 spin_lock_irq(&current->sighand->siglock);
                 sigorsets(&current->blocked, &current->blocked, &ka->sa.sa_mask);
-               sigaddset(&current->blocked,sig);
+               if (!(ka->sa.sa_flags & SA_NODEFER))
+                       sigaddset(&current->blocked,sig);
                 recalc_sigpending();
                 spin_unlock_irq(&current->sighand->siglock);
         }
diff --git a/arch/ppc64/kernel/signal32.c b/arch/ppc64/kernel/signal32.c

index 3c2fa5c284c04ef223c468c2df53f6c7676dc20a..46f4d6cc7fc9591ce7240887a05edadcd711c6f6 100644 (file)
--- a/arch/ppc64/kernel/signal32.c
+++ b/arch/ppc64/kernel/signal32.c
@@ -976,11 +976,12 @@ int do_signal32(sigset_t *oldset, struct pt_regs *regs)
         else
                 ret = handle_signal32(signr, &ka, &info, oldset, regs, newsp);
  
-       if (ret && !(ka.sa.sa_flags & SA_NODEFER)) {
+       if (ret) {
                 spin_lock_irq(&current->sighand->siglock);
                 sigorsets(&current->blocked, &current->blocked,
                           &ka.sa.sa_mask);
-               sigaddset(&current->blocked, signr);
+               if (!(ka.sa.sa_flags & SA_NODEFER))
+                       sigaddset(&current->blocked, signr);
                 recalc_sigpending();
                 spin_unlock_irq(&current->sighand->siglock);
         }
diff --git a/arch/ppc64/kernel/sysfs.c b/arch/ppc64/kernel/sysfs.c

index 02b8ac4e016883cfc5e4bfa0e151995346d4dc55..f311ee7c0070d2d024d25b78b88622e154793ce5 100644 (file)
--- a/arch/ppc64/kernel/sysfs.c
+++ b/arch/ppc64/kernel/sysfs.c
@@ -13,6 +13,7 @@
  #include <asm/current.h>
  #include <asm/processor.h>
  #include <asm/cputable.h>
+#include <asm/firmware.h>
  #include <asm/hvcall.h>
  #include <asm/prom.h>
  #include <asm/systemcfg.h>
@@ -100,6 +101,8 @@ static int __init setup_smt_snooze_delay(char *str)
  }
  __setup("smt-snooze-delay=", setup_smt_snooze_delay);
  
+#endif /* CONFIG_PPC_MULTIPLATFORM */
+
  /*
   * Enabling PMCs will slow partition context switch times so we only do
   * it the first time we write to the PMCs.
@@ -109,65 +112,15 @@ static DEFINE_PER_CPU(char, pmcs_enabled);
  
  void ppc64_enable_pmcs(void)
  {
-       unsigned long hid0;
-#ifdef CONFIG_PPC_PSERIES
-       unsigned long set, reset;
-#endif /* CONFIG_PPC_PSERIES */
-
         /* Only need to enable them once */
         if (__get_cpu_var(pmcs_enabled))
                 return;
  
         __get_cpu_var(pmcs_enabled) = 1;
  
-       switch (systemcfg->platform) {
-       case PLATFORM_PSERIES:
-       case PLATFORM_POWERMAC:
-               hid0 = mfspr(HID0);
-               hid0 |= 1UL << (63 - 20);
-
-               /* POWER4 requires the following sequence */
-               asm volatile(
-                            "sync\n"
-                            "mtspr     %1, %0\n"
-                            "mfspr     %0, %1\n"
-                            "mfspr     %0, %1\n"
-                            "mfspr     %0, %1\n"
-                            "mfspr     %0, %1\n"
-                            "mfspr     %0, %1\n"
-                            "mfspr     %0, %1\n"
-                            "isync" : "=&r" (hid0) : "i" (HID0), "0" (hid0):
-                            "memory");
-               break;
-
-#ifdef CONFIG_PPC_PSERIES
-       case PLATFORM_PSERIES_LPAR:
-               set = 1UL << 63;
-               reset = 0;
-               plpar_hcall_norets(H_PERFMON, set, reset);
-               break;
-#endif /* CONFIG_PPC_PSERIES */
-
-       default:
-               break;
-       }
-
-#ifdef CONFIG_PPC_PSERIES
-       /* instruct hypervisor to maintain PMCs */
-       if (cur_cpu_spec->firmware_features & FW_FEATURE_SPLPAR)
-               get_paca()->lppaca.pmcregs_in_use = 1;
-#endif /* CONFIG_PPC_PSERIES */
+       if (ppc_md.enable_pmcs)
+               ppc_md.enable_pmcs();
  }
-
-#else
-
-/* PMC stuff */
-void ppc64_enable_pmcs(void)
-{
-       /* XXX Implement for iseries */
-}
-#endif /* CONFIG_PPC_MULTIPLATFORM */
-
  EXPORT_SYMBOL(ppc64_enable_pmcs);
  
  /* XXX convert to rusty's on_one_cpu */
diff --git a/arch/ppc64/kernel/time.c b/arch/ppc64/kernel/time.c

index 909462e1adeaf7cc1bab5ec9b766315d2ee63ec8..1696e1b05bb9c8b914d5a6198cb5ecd8bb00b6ba 100644 (file)
--- a/arch/ppc64/kernel/time.c
+++ b/arch/ppc64/kernel/time.c
@@ -67,6 +67,7 @@
  #include <asm/prom.h>
  #include <asm/sections.h>
  #include <asm/systemcfg.h>
+#include <asm/firmware.h>
  
  u64 jiffies_64 __cacheline_aligned_in_smp = INITIAL_JIFFIES;
  
@@ -370,13 +371,11 @@ int timer_interrupt(struct pt_regs * regs)
                 process_hvlpevents(regs);
  #endif
  
-/* collect purr register values often, for accurate calculations */
-#if defined(CONFIG_PPC_PSERIES)
-       if (cur_cpu_spec->firmware_features & FW_FEATURE_SPLPAR) {
+       /* collect purr register values often, for accurate calculations */
+       if (firmware_has_feature(FW_FEATURE_SPLPAR)) {
                 struct cpu_usage *cu = &__get_cpu_var(cpu_usage_array);
                 cu->current_tb = mfspr(SPRN_PURR);
         }
-#endif
  
         irq_exit();
  
diff --git a/arch/ppc64/kernel/vio.c b/arch/ppc64/kernel/vio.c

index 0c0ba71ac0e8b79cd2a29b8fffd8d4f177ac006b..c90e1dd875ce1a59f8250410f0610d2324112a81 100644 (file)
--- a/arch/ppc64/kernel/vio.c
+++ b/arch/ppc64/kernel/vio.c
@@ -1,10 +1,11 @@
  /*
   * IBM PowerPC Virtual I/O Infrastructure Support.
   *
- *    Copyright (c) 2003 IBM Corp.
+ *    Copyright (c) 2003-2005 IBM Corp.
   *     Dave Engebretsen engebret@us.ibm.com
   *     Santiago Leon santil@us.ibm.com
   *     Hollis Blanchard <hollisb@us.ibm.com>
+ *     Stephen Rothwell
   *
   *      This program is free software; you can redistribute it and/or
   *      modify it under the terms of the GNU General Public License
@@ -14,61 +15,30 @@
  
  #include <linux/init.h>
  #include <linux/console.h>
-#include <linux/version.h>
  #include <linux/module.h>
-#include <linux/kobject.h>
  #include <linux/mm.h>
  #include <linux/dma-mapping.h>
-#include <asm/rtas.h>
  #include <asm/iommu.h>
  #include <asm/dma.h>
-#include <asm/ppcdebug.h>
  #include <asm/vio.h>
-#include <asm/hvcall.h>
-#include <asm/iSeries/vio.h>
-#include <asm/iSeries/HvTypes.h>
-#include <asm/iSeries/HvCallXm.h>
-#include <asm/iSeries/HvLpConfig.h>
-
-#define DBGENTER() pr_debug("%s entered\n", __FUNCTION__)
-
-extern struct subsystem devices_subsys; /* needed for vio_find_name() */
  
  static const struct vio_device_id *vio_match_device(
                 const struct vio_device_id *, const struct vio_dev *);
  
-#ifdef CONFIG_PPC_PSERIES
-static struct iommu_table *vio_build_iommu_table(struct vio_dev *);
-static int vio_num_address_cells;
-#endif
-#ifdef CONFIG_PPC_ISERIES
-static struct iommu_table veth_iommu_table;
-static struct iommu_table vio_iommu_table;
-#endif
-static struct vio_dev vio_bus_device  = { /* fake "parent" device */
+struct vio_dev vio_bus_device  = { /* fake "parent" device */
         .name = vio_bus_device.dev.bus_id,
         .type = "",
-#ifdef CONFIG_PPC_ISERIES
-       .iommu_table = &vio_iommu_table,
-#endif
         .dev.bus_id = "vio",
         .dev.bus = &vio_bus_type,
  };
  
-#ifdef CONFIG_PPC_ISERIES
-static struct vio_dev *__init vio_register_device_iseries(char *type,
-               uint32_t unit_num);
-
-struct device *iSeries_vio_dev = &vio_bus_device.dev;
-EXPORT_SYMBOL(iSeries_vio_dev);
-
-#define device_is_compatible(a, b)     1
+static struct vio_bus_ops vio_bus_ops;
  
-#endif
-
-/* convert from struct device to struct vio_dev and pass to driver.
+/*
+ * Convert from struct device to struct vio_dev and pass to driver.
   * dev->driver has already been set by generic code because vio_bus_match
- * succeeded. */
+ * succeeded.
+ */
  static int vio_bus_probe(struct device *dev)
  {
         struct vio_dev *viodev = to_vio_dev(dev);
@@ -76,15 +46,12 @@ static int vio_bus_probe(struct device *dev)
         const struct vio_device_id *id;
         int error = -ENODEV;
  
-       DBGENTER();
-
         if (!viodrv->probe)
                 return error;
  
         id = vio_match_device(viodrv->id_table, viodev);
-       if (id) {
+       if (id)
                 error = viodrv->probe(viodev, id);
-       }
  
         return error;
  }
@@ -95,11 +62,8 @@ static int vio_bus_remove(struct device *dev)
         struct vio_dev *viodev = to_vio_dev(dev);
         struct vio_driver *viodrv = to_vio_driver(dev->driver);
  
-       DBGENTER();
-
-       if (viodrv->remove) {
+       if (viodrv->remove)
                 return viodrv->remove(viodev);
-       }
  
         /* driver can't remove */
         return 1;
@@ -135,193 +99,72 @@ void vio_unregister_driver(struct vio_driver *viodrv)
  EXPORT_SYMBOL(vio_unregister_driver);
  
  /**
- * vio_match_device: - Tell if a VIO device has a matching VIO device id structure.
- * @ids:       array of VIO device id structures to search in
- * @dev:       the VIO device structure to match against
+ * vio_match_device: - Tell if a VIO device has a matching
+ *                     VIO device id structure.
+ * @ids:       array of VIO device id structures to search in
+ * @dev:       the VIO device structure to match against
   *
   * Used by a driver to check whether a VIO device present in the
   * system is in its list of supported devices. Returns the matching
   * vio_device_id structure or NULL if there is no match.
   */
-static const struct vio_device_id * vio_match_device(const struct vio_device_id *ids,
-       const struct vio_dev *dev)
+static const struct vio_device_id *vio_match_device(
+               const struct vio_device_id *ids, const struct vio_dev *dev)
  {
-       DBGENTER();
-
-       while (ids->type) {
-               if ((strncmp(dev->type, ids->type, strlen(ids->type)) == 0) &&
-                       device_is_compatible(dev->dev.platform_data, ids->compat))
+       while (ids->type[0] != '\0') {
+               if (vio_bus_ops.match(ids, dev))
                         return ids;
                 ids++;
         }
         return NULL;
  }
  
-#ifdef CONFIG_PPC_ISERIES
-void __init iommu_vio_init(void)
-{
-       struct iommu_table *t;
-       struct iommu_table_cb cb;
-       unsigned long cbp;
-       unsigned long itc_entries;
-
-       cb.itc_busno = 255;    /* Bus 255 is the virtual bus */
-       cb.itc_virtbus = 0xff; /* Ask for virtual bus */
-
-       cbp = virt_to_abs(&cb);
-       HvCallXm_getTceTableParms(cbp);
-
-       itc_entries = cb.itc_size * PAGE_SIZE / sizeof(union tce_entry);
-       veth_iommu_table.it_size        = itc_entries / 2;
-       veth_iommu_table.it_busno       = cb.itc_busno;
-       veth_iommu_table.it_offset      = cb.itc_offset;
-       veth_iommu_table.it_index       = cb.itc_index;
-       veth_iommu_table.it_type        = TCE_VB;
-       veth_iommu_table.it_blocksize   = 1;
-
-       t = iommu_init_table(&veth_iommu_table);
-
-       if (!t)
-               printk("Virtual Bus VETH TCE table failed.\n");
-
-       vio_iommu_table.it_size         = itc_entries - veth_iommu_table.it_size;
-       vio_iommu_table.it_busno        = cb.itc_busno;
-       vio_iommu_table.it_offset       = cb.itc_offset +
-                                         veth_iommu_table.it_size;
-       vio_iommu_table.it_index        = cb.itc_index;
-       vio_iommu_table.it_type         = TCE_VB;
-       vio_iommu_table.it_blocksize    = 1;
-
-       t = iommu_init_table(&vio_iommu_table);
-
-       if (!t)
-               printk("Virtual Bus VIO TCE table failed.\n");
-}
-#endif
-
-#ifdef CONFIG_PPC_PSERIES
-static void probe_bus_pseries(void)
-{
-       struct device_node *node_vroot, *of_node;
-
-       node_vroot = find_devices("vdevice");
-       if ((node_vroot == NULL) || (node_vroot->child == NULL))
-               /* this machine doesn't do virtual IO, and that's ok */
-               return;
-
-       vio_num_address_cells = prom_n_addr_cells(node_vroot->child);
-
-       /*
-        * Create struct vio_devices for each virtual device in the device tree.
-        * Drivers will associate with them later.
-        */
-       for (of_node = node_vroot->child; of_node != NULL;
-                       of_node = of_node->sibling) {
-               printk(KERN_DEBUG "%s: processing %p\n", __FUNCTION__, of_node);
-               vio_register_device_node(of_node);
-       }
-}
-#endif
-
-#ifdef CONFIG_PPC_ISERIES
-static void probe_bus_iseries(void)
-{
-       HvLpIndexMap vlan_map = HvLpConfig_getVirtualLanIndexMap();
-       struct vio_dev *viodev;
-       int i;
-
-       /* there is only one of each of these */
-       vio_register_device_iseries("viocons", 0);
-       vio_register_device_iseries("vscsi", 0);
-
-       vlan_map = HvLpConfig_getVirtualLanIndexMap();
-       for (i = 0; i < HVMAXARCHITECTEDVIRTUALLANS; i++) {
-               if ((vlan_map & (0x8000 >> i)) == 0)
-                       continue;
-               viodev = vio_register_device_iseries("vlan", i);
-               /* veth is special and has it own iommu_table */
-               viodev->iommu_table = &veth_iommu_table;
-       }
-       for (i = 0; i < HVMAXARCHITECTEDVIRTUALDISKS; i++)
-               vio_register_device_iseries("viodasd", i);
-       for (i = 0; i < HVMAXARCHITECTEDVIRTUALCDROMS; i++)
-               vio_register_device_iseries("viocd", i);
-       for (i = 0; i < HVMAXARCHITECTEDVIRTUALTAPES; i++)
-               vio_register_device_iseries("viotape", i);
-}
-#endif
-
  /**
   * vio_bus_init: - Initialize the virtual IO bus
   */
-static int __init vio_bus_init(void)
+int __init vio_bus_init(struct vio_bus_ops *ops)
  {
         int err;
  
+       vio_bus_ops = *ops;
+
         err = bus_register(&vio_bus_type);
         if (err) {
                 printk(KERN_ERR "failed to register VIO bus\n");
                 return err;
         }
  
-       /* the fake parent of all vio devices, just to give us a nice directory */
+       /*
+        * The fake parent of all vio devices, just to give us
+        * a nice directory
+        */
         err = device_register(&vio_bus_device.dev);
         if (err) {
-               printk(KERN_WARNING "%s: device_register returned %i\n", __FUNCTION__,
-                       err);
+               printk(KERN_WARNING "%s: device_register returned %i\n",
+                               __FUNCTION__, err);
                 return err;
         }
  
-#ifdef CONFIG_PPC_PSERIES
-       probe_bus_pseries();
-#endif
-#ifdef CONFIG_PPC_ISERIES
-       probe_bus_iseries();
-#endif
-
         return 0;
  }
  
-__initcall(vio_bus_init);
-
  /* vio_dev refcount hit 0 */
  static void __devinit vio_dev_release(struct device *dev)
  {
-       DBGENTER();
-
-#ifdef CONFIG_PPC_PSERIES
-       /* XXX free TCE table */
-       of_node_put(dev->platform_data);
-#endif
+       if (vio_bus_ops.release_device)
+               vio_bus_ops.release_device(dev);
         kfree(to_vio_dev(dev));
  }
  
-#ifdef CONFIG_PPC_PSERIES
-static ssize_t viodev_show_devspec(struct device *dev, struct device_attribute *attr, char *buf)
-{
-       struct device_node *of_node = dev->platform_data;
-
-       return sprintf(buf, "%s\n", of_node->full_name);
-}
-DEVICE_ATTR(devspec, S_IRUSR | S_IRGRP | S_IROTH, viodev_show_devspec, NULL);
-#endif
-
-static ssize_t viodev_show_name(struct device *dev, struct device_attribute *attr, char *buf)
+static ssize_t viodev_show_name(struct device *dev,
+               struct device_attribute *attr, char *buf)
  {
         return sprintf(buf, "%s\n", to_vio_dev(dev)->name);
  }
  DEVICE_ATTR(name, S_IRUSR | S_IRGRP | S_IROTH, viodev_show_name, NULL);
  
-static struct vio_dev * __devinit vio_register_device_common(
-               struct vio_dev *viodev, char *name, char *type,
-               uint32_t unit_address, struct iommu_table *iommu_table)
+struct vio_dev * __devinit vio_register_device(struct vio_dev *viodev)
  {
-       DBGENTER();
-
-       viodev->name = name;
-       viodev->type = type;
-       viodev->unit_address = unit_address;
-       viodev->iommu_table = iommu_table;
         /* init generic 'struct device' fields: */
         viodev->dev.parent = &vio_bus_device.dev;
         viodev->dev.bus = &vio_bus_type;
@@ -338,222 +181,15 @@ static struct vio_dev * __devinit vio_register_device_common(
         return viodev;
  }
  
-#ifdef CONFIG_PPC_PSERIES
-/**
- * vio_register_device_node: - Register a new vio device.
- * @of_node:   The OF node for this device.
- *
- * Creates and initializes a vio_dev structure from the data in
- * of_node (dev.platform_data) and adds it to the list of virtual devices.
- * Returns a pointer to the created vio_dev or NULL if node has
- * NULL device_type or compatible fields.
- */
-struct vio_dev * __devinit vio_register_device_node(struct device_node *of_node)
-{
-       struct vio_dev *viodev;
-       unsigned int *unit_address;
-       unsigned int *irq_p;
-
-       DBGENTER();
-
-       /* we need the 'device_type' property, in order to match with drivers */
-       if ((NULL == of_node->type)) {
-               printk(KERN_WARNING
-                       "%s: node %s missing 'device_type'\n", __FUNCTION__,
-                       of_node->name ? of_node->name : "<unknown>");
-               return NULL;
-       }
-
-       unit_address = (unsigned int *)get_property(of_node, "reg", NULL);
-       if (!unit_address) {
-               printk(KERN_WARNING "%s: node %s missing 'reg'\n", __FUNCTION__,
-                       of_node->name ? of_node->name : "<unknown>");
-               return NULL;
-       }
-
-       /* allocate a vio_dev for this node */
-       viodev = kmalloc(sizeof(struct vio_dev), GFP_KERNEL);
-       if (!viodev) {
-               return NULL;
-       }
-       memset(viodev, 0, sizeof(struct vio_dev));
-
-       viodev->dev.platform_data = of_node_get(of_node);
-
-       viodev->irq = NO_IRQ;
-       irq_p = (unsigned int *)get_property(of_node, "interrupts", NULL);
-       if (irq_p) {
-               int virq = virt_irq_create_mapping(*irq_p);
-               if (virq == NO_IRQ) {
-                       printk(KERN_ERR "Unable to allocate interrupt "
-                              "number for %s\n", of_node->full_name);
-               } else
-                       viodev->irq = irq_offset_up(virq);
-       }
-
-       snprintf(viodev->dev.bus_id, BUS_ID_SIZE, "%x", *unit_address);
-
-       /* register with generic device framework */
-       if (vio_register_device_common(viodev, of_node->name, of_node->type,
-                               *unit_address, vio_build_iommu_table(viodev))
-                       == NULL) {
-               /* XXX free TCE table */
-               kfree(viodev);
-               return NULL;
-       }
-       device_create_file(&viodev->dev, &dev_attr_devspec);
-
-       return viodev;
-}
-EXPORT_SYMBOL(vio_register_device_node);
-#endif
-
-#ifdef CONFIG_PPC_ISERIES
-/**
- * vio_register_device: - Register a new vio device.
- * @voidev:    The device to register.
- */
-static struct vio_dev *__init vio_register_device_iseries(char *type,
-               uint32_t unit_num)
-{
-       struct vio_dev *viodev;
-
-       DBGENTER();
-
-       /* allocate a vio_dev for this node */
-       viodev = kmalloc(sizeof(struct vio_dev), GFP_KERNEL);
-       if (!viodev)
-               return NULL;
-       memset(viodev, 0, sizeof(struct vio_dev));
-
-       snprintf(viodev->dev.bus_id, BUS_ID_SIZE, "%s%d", type, unit_num);
-
-       return vio_register_device_common(viodev, viodev->dev.bus_id, type,
-                       unit_num, &vio_iommu_table);
-}
-#endif
-
  void __devinit vio_unregister_device(struct vio_dev *viodev)
  {
-       DBGENTER();
-#ifdef CONFIG_PPC_PSERIES
-       device_remove_file(&viodev->dev, &dev_attr_devspec);
-#endif
+       if (vio_bus_ops.unregister_device)
+               vio_bus_ops.unregister_device(viodev);
         device_remove_file(&viodev->dev, &dev_attr_name);
         device_unregister(&viodev->dev);
  }
  EXPORT_SYMBOL(vio_unregister_device);
  
-#ifdef CONFIG_PPC_PSERIES
-/**
- * vio_get_attribute: - get attribute for virtual device
- * @vdev:      The vio device to get property.
- * @which:     The property/attribute to be extracted.
- * @length:    Pointer to length of returned data size (unused if NULL).
- *
- * Calls prom.c's get_property() to return the value of the
- * attribute specified by the preprocessor constant @which
-*/
-const void * vio_get_attribute(struct vio_dev *vdev, void* which, int* length)
-{
-       return get_property(vdev->dev.platform_data, (char*)which, length);
-}
-EXPORT_SYMBOL(vio_get_attribute);
-
-/* vio_find_name() - internal because only vio.c knows how we formatted the
- * kobject name
- * XXX once vio_bus_type.devices is actually used as a kset in
- * drivers/base/bus.c, this function should be removed in favor of
- * "device_find(kobj_name, &vio_bus_type)"
- */
-static struct vio_dev *vio_find_name(const char *kobj_name)
-{
-       struct kobject *found;
-
-       found = kset_find_obj(&devices_subsys.kset, kobj_name);
-       if (!found)
-               return NULL;
-
-       return to_vio_dev(container_of(found, struct device, kobj));
-}
-
-/**
- * vio_find_node - find an already-registered vio_dev
- * @vnode: device_node of the virtual device we're looking for
- */
-struct vio_dev *vio_find_node(struct device_node *vnode)
-{
-       uint32_t *unit_address;
-       char kobj_name[BUS_ID_SIZE];
-
-       /* construct the kobject name from the device node */
-       unit_address = (uint32_t *)get_property(vnode, "reg", NULL);
-       if (!unit_address)
-               return NULL;
-       snprintf(kobj_name, BUS_ID_SIZE, "%x", *unit_address);
-
-       return vio_find_name(kobj_name);
-}
-EXPORT_SYMBOL(vio_find_node);
-
-/**
- * vio_build_iommu_table: - gets the dma information from OF and builds the TCE tree.
- * @dev: the virtual device.
- *
- * Returns a pointer to the built tce tree, or NULL if it can't
- * find property.
-*/
-static struct iommu_table * vio_build_iommu_table(struct vio_dev *dev)
-{
-       unsigned int *dma_window;
-       struct iommu_table *newTceTable;
-       unsigned long offset;
-       int dma_window_property_size;
-
-       dma_window = (unsigned int *) get_property(dev->dev.platform_data, "ibm,my-dma-window", &dma_window_property_size);
-       if(!dma_window) {
-               return NULL;
-       }
-
-       newTceTable = (struct iommu_table *) kmalloc(sizeof(struct iommu_table), GFP_KERNEL);
-
-       /*  There should be some code to extract the phys-encoded offset
-               using prom_n_addr_cells(). However, according to a comment
-               on earlier versions, it's always zero, so we don't bother */
-       offset = dma_window[1] >>  PAGE_SHIFT;
-
-       /* TCE table size - measured in tce entries */
-       newTceTable->it_size            = dma_window[4] >> PAGE_SHIFT;
-       /* offset for VIO should always be 0 */
-       newTceTable->it_offset          = offset;
-       newTceTable->it_busno           = 0;
-       newTceTable->it_index           = (unsigned long)dma_window[0];
-       newTceTable->it_type            = TCE_VB;
-
-       return iommu_init_table(newTceTable);
-}
-
-int vio_enable_interrupts(struct vio_dev *dev)
-{
-       int rc = h_vio_signal(dev->unit_address, VIO_IRQ_ENABLE);
-       if (rc != H_Success) {
-               printk(KERN_ERR "vio: Error 0x%x enabling interrupts\n", rc);
-       }
-       return rc;
-}
-EXPORT_SYMBOL(vio_enable_interrupts);
-
-int vio_disable_interrupts(struct vio_dev *dev)
-{
-       int rc = h_vio_signal(dev->unit_address, VIO_IRQ_DISABLE);
-       if (rc != H_Success) {
-               printk(KERN_ERR "vio: Error 0x%x disabling interrupts\n", rc);
-       }
-       return rc;
-}
-EXPORT_SYMBOL(vio_disable_interrupts);
-#endif
-
  static dma_addr_t vio_map_single(struct device *dev, void *vaddr,
                           size_t size, enum dma_data_direction direction)
  {
@@ -615,18 +251,8 @@ static int vio_bus_match(struct device *dev, struct device_driver *drv)
         const struct vio_dev *vio_dev = to_vio_dev(dev);
         struct vio_driver *vio_drv = to_vio_driver(drv);
         const struct vio_device_id *ids = vio_drv->id_table;
-       const struct vio_device_id *found_id;
-
-       DBGENTER();
  
-       if (!ids)
-               return 0;
-
-       found_id = vio_match_device(ids, vio_dev);
-       if (found_id)
-               return 1;
-
-       return 0;
+       return (ids != NULL) && (vio_match_device(ids, vio_dev) != NULL);
  }
  
  struct bus_type vio_bus_type = {
diff --git a/arch/ppc64/kernel/xics.c b/arch/ppc64/kernel/xics.c

index 677c4450984a85c3b53161f7409895309f907ce8..d9dc6f28d050050281ccc99e19e2d940de850abd 100644 (file)
--- a/arch/ppc64/kernel/xics.c
+++ b/arch/ppc64/kernel/xics.c
@@ -647,29 +647,30 @@ static void xics_set_affinity(unsigned int virq, cpumask_t cpumask)
         }
  }
  
-void xics_teardown_cpu(void)
+void xics_teardown_cpu(int secondary)
  {
         int cpu = smp_processor_id();
-       int status;
  
         ops->cppr_info(cpu, 0x00);
         iosync();
  
         /*
-        * we need to EOI the IPI if we got here from kexec down IPI
-        *
-        * xics doesn't care if we duplicate an EOI as long as we
-        * don't EOI and raise priority.
-        *
-        * probably need to check all the other interrupts too
-        * should we be flagging idle loop instead?
-        * or creating some task to be scheduled?
+        * Some machines need to have at least one cpu in the GIQ,
+        * so leave the master cpu in the group.
          */
-       ops->xirr_info_set(cpu, XICS_IPI);
-
-       status = rtas_set_indicator(GLOBAL_INTERRUPT_QUEUE,
-               (1UL << interrupt_server_size) - 1 - default_distrib_server, 0);
-       WARN_ON(status != 0);
+       if (secondary) {
+               /*
+                * we need to EOI the IPI if we got here from kexec down IPI
+                *
+                * probably need to check all the other interrupts too
+                * should we be flagging idle loop instead?
+                * or creating some task to be scheduled?
+                */
+               ops->xirr_info_set(cpu, XICS_IPI);
+               rtas_set_indicator(GLOBAL_INTERRUPT_QUEUE,
+                       (1UL << interrupt_server_size) - 1 -
+                       default_distrib_server, 0);
+       }
  }
  
  #ifdef CONFIG_HOTPLUG_CPU
diff --git a/arch/ppc64/mm/hash_low.S b/arch/ppc64/mm/hash_low.S

index fbff24827ae78d83809fc078900ed4299e16ab86..35eb49e1b8908d4c6c94580f45561f97e438c422 100644 (file)
--- a/arch/ppc64/mm/hash_low.S
+++ b/arch/ppc64/mm/hash_low.S
@@ -128,13 +128,11 @@ _GLOBAL(__hash_page)
         /* We eventually do the icache sync here (maybe inline that
          * code rather than call a C function...) 
          */
-BEGIN_FTR_SECTION
  BEGIN_FTR_SECTION
         mr      r4,r30
         mr      r5,r7
         bl      .hash_page_do_lazy_icache
-END_FTR_SECTION_IFSET(CPU_FTR_NOEXECUTE)
-END_FTR_SECTION_IFCLR(CPU_FTR_COHERENT_ICACHE)
+END_FTR_SECTION(CPU_FTR_NOEXECUTE|CPU_FTR_COHERENT_ICACHE, CPU_FTR_NOEXECUTE)
  
         /* At this point, r3 contains new PP bits, save them in
          * place of "access" in the param area (sic)
diff --git a/arch/ppc64/mm/hash_native.c b/arch/ppc64/mm/hash_native.c

index a6abd3a979bf5be80412ed4c78f54e08074e9d56..7626bb59954d43e6fa6b7437aff89c68cf2fe5b2 100644 (file)
--- a/arch/ppc64/mm/hash_native.c
+++ b/arch/ppc64/mm/hash_native.c
@@ -51,7 +51,6 @@ long native_hpte_insert(unsigned long hpte_group, unsigned long va,
                         unsigned long prpn, unsigned long vflags,
                         unsigned long rflags)
  {
-       unsigned long arpn = physRpn_to_absRpn(prpn);
         hpte_t *hptep = htab_address + hpte_group;
         unsigned long hpte_v, hpte_r;
         int i;
@@ -74,7 +73,7 @@ long native_hpte_insert(unsigned long hpte_group, unsigned long va,
         hpte_v = (va >> 23) << HPTE_V_AVPN_SHIFT | vflags | HPTE_V_VALID;
         if (vflags & HPTE_V_LARGE)
                 va &= ~(1UL << HPTE_V_AVPN_SHIFT);
-       hpte_r = (arpn << HPTE_R_RPN_SHIFT) | rflags;
+       hpte_r = (prpn << HPTE_R_RPN_SHIFT) | rflags;
  
         hptep->r = hpte_r;
         /* Guarantee the second dword is visible before the valid bit */
diff --git a/arch/ppc64/mm/hash_utils.c b/arch/ppc64/mm/hash_utils.c

index 623b5d130c3192766145a12ad45c5ef3f95541ad..09475c8edf7caea33dfb37f641c13879b8fac666 100644 (file)
--- a/arch/ppc64/mm/hash_utils.c
+++ b/arch/ppc64/mm/hash_utils.c
@@ -210,7 +210,7 @@ void __init htab_initialize(void)
  
         /* create bolted the linear mapping in the hash table */
         for (i=0; i < lmb.memory.cnt; i++) {
-               base = lmb.memory.region[i].physbase + KERNELBASE;
+               base = lmb.memory.region[i].base + KERNELBASE;
                 size = lmb.memory.region[i].size;
  
                 DBG("creating mapping for region: %lx : %lx\n", base, size);
@@ -302,7 +302,7 @@ int hash_page(unsigned long ea, unsigned long access, unsigned long trap)
         int local = 0;
         cpumask_t tmp;
  
-       if ((ea & ~REGION_MASK) > EADDR_MASK)
+       if ((ea & ~REGION_MASK) >= PGTABLE_RANGE)
                 return 1;
  
         switch (REGION_ID(ea)) {
diff --git a/arch/ppc64/mm/hugetlbpage.c b/arch/ppc64/mm/hugetlbpage.c

index f9524602818dad2aaaaad90fba4a2baf42be161b..e7833c80eb6824dc93abcfb3ff73dba56d53ac9d 100644 (file)
--- a/arch/ppc64/mm/hugetlbpage.c
+++ b/arch/ppc64/mm/hugetlbpage.c
@@ -27,124 +27,94 @@
  
  #include <linux/sysctl.h>
  
-#define        HUGEPGDIR_SHIFT         (HPAGE_SHIFT + PAGE_SHIFT - 3)
-#define HUGEPGDIR_SIZE         (1UL << HUGEPGDIR_SHIFT)
-#define HUGEPGDIR_MASK         (~(HUGEPGDIR_SIZE-1))
+#define NUM_LOW_AREAS  (0x100000000UL >> SID_SHIFT)
+#define NUM_HIGH_AREAS (PGTABLE_RANGE >> HTLB_AREA_SHIFT)
  
-#define HUGEPTE_INDEX_SIZE     9
-#define HUGEPGD_INDEX_SIZE     10
-
-#define PTRS_PER_HUGEPTE       (1 << HUGEPTE_INDEX_SIZE)
-#define PTRS_PER_HUGEPGD       (1 << HUGEPGD_INDEX_SIZE)
-
-static inline int hugepgd_index(unsigned long addr)
-{
-       return (addr & ~REGION_MASK) >> HUGEPGDIR_SHIFT;
-}
-
-static pud_t *hugepgd_offset(struct mm_struct *mm, unsigned long addr)
+/* Modelled after find_linux_pte() */
+pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr)
  {
-       int index;
+       pgd_t *pg;
+       pud_t *pu;
+       pmd_t *pm;
+       pte_t *pt;
  
-       if (! mm->context.huge_pgdir)
-               return NULL;
+       BUG_ON(! in_hugepage_area(mm->context, addr));
  
+       addr &= HPAGE_MASK;
+
+       pg = pgd_offset(mm, addr);
+       if (!pgd_none(*pg)) {
+               pu = pud_offset(pg, addr);
+               if (!pud_none(*pu)) {
+                       pm = pmd_offset(pu, addr);
+                       pt = (pte_t *)pm;
+                       BUG_ON(!pmd_none(*pm)
+                              && !(pte_present(*pt) && pte_huge(*pt)));
+                       return pt;
+               }
+       }
  
-       index = hugepgd_index(addr);
-       BUG_ON(index >= PTRS_PER_HUGEPGD);
-       return (pud_t *)(mm->context.huge_pgdir + index);
+       return NULL;
  }
  
-static inline pte_t *hugepte_offset(pud_t *dir, unsigned long addr)
+pte_t *huge_pte_alloc(struct mm_struct *mm, unsigned long addr)
  {
-       int index;
-
-       if (pud_none(*dir))
-               return NULL;
+       pgd_t *pg;
+       pud_t *pu;
+       pmd_t *pm;
+       pte_t *pt;
  
-       index = (addr >> HPAGE_SHIFT) % PTRS_PER_HUGEPTE;
-       return (pte_t *)pud_page(*dir) + index;
-}
-
-static pud_t *hugepgd_alloc(struct mm_struct *mm, unsigned long addr)
-{
         BUG_ON(! in_hugepage_area(mm->context, addr));
  
-       if (! mm->context.huge_pgdir) {
-               pgd_t *new;
-               spin_unlock(&mm->page_table_lock);
-               /* Don't use pgd_alloc(), because we want __GFP_REPEAT */
-               new = kmem_cache_alloc(zero_cache, GFP_KERNEL | __GFP_REPEAT);
-               BUG_ON(memcmp(new, empty_zero_page, PAGE_SIZE));
-               spin_lock(&mm->page_table_lock);
+       addr &= HPAGE_MASK;
  
-               /*
-                * Because we dropped the lock, we should re-check the
-                * entry, as somebody else could have populated it..
-                */
-               if (mm->context.huge_pgdir)
-                       pgd_free(new);
-               else
-                       mm->context.huge_pgdir = new;
-       }
-       return hugepgd_offset(mm, addr);
-}
+       pg = pgd_offset(mm, addr);
+       pu = pud_alloc(mm, pg, addr);
  
-static pte_t *hugepte_alloc(struct mm_struct *mm, pud_t *dir, unsigned long addr)
-{
-       if (! pud_present(*dir)) {
-               pte_t *new;
-
-               spin_unlock(&mm->page_table_lock);
-               new = kmem_cache_alloc(zero_cache, GFP_KERNEL | __GFP_REPEAT);
-               BUG_ON(memcmp(new, empty_zero_page, PAGE_SIZE));
-               spin_lock(&mm->page_table_lock);
-               /*
-                * Because we dropped the lock, we should re-check the
-                * entry, as somebody else could have populated it..
-                */
-               if (pud_present(*dir)) {
-                       if (new)
-                               kmem_cache_free(zero_cache, new);
-               } else {
-                       struct page *ptepage;
-
-                       if (! new)
-                               return NULL;
-                       ptepage = virt_to_page(new);
-                       ptepage->mapping = (void *) mm;
-                       ptepage->index = addr & HUGEPGDIR_MASK;
-                       pud_populate(mm, dir, new);
+       if (pu) {
+               pm = pmd_alloc(mm, pu, addr);
+               if (pm) {
+                       pt = (pte_t *)pm;
+                       BUG_ON(!pmd_none(*pm)
+                              && !(pte_present(*pt) && pte_huge(*pt)));
+                       return pt;
                 }
         }
  
-       return hugepte_offset(dir, addr);
+       return NULL;
  }
  
-pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr)
-{
-       pud_t *pud;
+#define HUGEPTE_BATCH_SIZE     (HPAGE_SIZE / PMD_SIZE)
  
-       BUG_ON(! in_hugepage_area(mm->context, addr));
+void set_huge_pte_at(struct mm_struct *mm, unsigned long addr,
+                    pte_t *ptep, pte_t pte)
+{
+       int i;
  
-       pud = hugepgd_offset(mm, addr);
-       if (! pud)
-               return NULL;
+       if (pte_present(*ptep)) {
+               pte_clear(mm, addr, ptep);
+               flush_tlb_pending();
+       }
  
-       return hugepte_offset(pud, addr);
+       for (i = 0; i < HUGEPTE_BATCH_SIZE; i++) {
+               *ptep = __pte(pte_val(pte) & ~_PAGE_HPTEFLAGS);
+               ptep++;
+       }
  }
  
-pte_t *huge_pte_alloc(struct mm_struct *mm, unsigned long addr)
+pte_t huge_ptep_get_and_clear(struct mm_struct *mm, unsigned long addr,
+                             pte_t *ptep)
  {
-       pud_t *pud;
+       unsigned long old = pte_update(ptep, ~0UL);
+       int i;
  
-       BUG_ON(! in_hugepage_area(mm->context, addr));
+       if (old & _PAGE_HASHPTE)
+               hpte_update(mm, addr, old, 0);
  
-       pud = hugepgd_alloc(mm, addr);
-       if (! pud)
-               return NULL;
+       for (i = 1; i < HUGEPTE_BATCH_SIZE; i++)
+               ptep[i] = __pte(0);
  
-       return hugepte_alloc(mm, pud, addr);
+       return __pte(old);
  }
  
  /*
@@ -162,15 +132,17 @@ int is_aligned_hugepage_range(unsigned long addr, unsigned long len)
         return 0;
  }
  
-static void flush_segments(void *parm)
+static void flush_low_segments(void *parm)
  {
-       u16 segs = (unsigned long) parm;
+       u16 areas = (unsigned long) parm;
         unsigned long i;
  
         asm volatile("isync" : : : "memory");
  
-       for (i = 0; i < 16; i++) {
-               if (! (segs & (1U << i)))
+       BUILD_BUG_ON((sizeof(areas)*8) != NUM_LOW_AREAS);
+
+       for (i = 0; i < NUM_LOW_AREAS; i++) {
+               if (! (areas & (1U << i)))
                         continue;
                 asm volatile("slbie %0" : : "r" (i << SID_SHIFT));
         }
@@ -178,13 +150,33 @@ static void flush_segments(void *parm)
         asm volatile("isync" : : : "memory");
  }
  
-static int prepare_low_seg_for_htlb(struct mm_struct *mm, unsigned long seg)
+static void flush_high_segments(void *parm)
  {
-       unsigned long start = seg << SID_SHIFT;
-       unsigned long end = (seg+1) << SID_SHIFT;
+       u16 areas = (unsigned long) parm;
+       unsigned long i, j;
+
+       asm volatile("isync" : : : "memory");
+
+       BUILD_BUG_ON((sizeof(areas)*8) != NUM_HIGH_AREAS);
+
+       for (i = 0; i < NUM_HIGH_AREAS; i++) {
+               if (! (areas & (1U << i)))
+                       continue;
+               for (j = 0; j < (1UL << (HTLB_AREA_SHIFT-SID_SHIFT)); j++)
+                       asm volatile("slbie %0"
+                                    :: "r" ((i << HTLB_AREA_SHIFT) + (j << SID_SHIFT)));
+       }
+
+       asm volatile("isync" : : : "memory");
+}
+
+static int prepare_low_area_for_htlb(struct mm_struct *mm, unsigned long area)
+{
+       unsigned long start = area << SID_SHIFT;
+       unsigned long end = (area+1) << SID_SHIFT;
         struct vm_area_struct *vma;
  
-       BUG_ON(seg >= 16);
+       BUG_ON(area >= NUM_LOW_AREAS);
  
         /* Check no VMAs are in the region */
         vma = find_vma(mm, start);
@@ -194,20 +186,39 @@ static int prepare_low_seg_for_htlb(struct mm_struct *mm, unsigned long seg)
         return 0;
  }
  
-static int open_low_hpage_segs(struct mm_struct *mm, u16 newsegs)
+static int prepare_high_area_for_htlb(struct mm_struct *mm, unsigned long area)
+{
+       unsigned long start = area << HTLB_AREA_SHIFT;
+       unsigned long end = (area+1) << HTLB_AREA_SHIFT;
+       struct vm_area_struct *vma;
+
+       BUG_ON(area >= NUM_HIGH_AREAS);
+
+       /* Check no VMAs are in the region */
+       vma = find_vma(mm, start);
+       if (vma && (vma->vm_start < end))
+               return -EBUSY;
+
+       return 0;
+}
+
+static int open_low_hpage_areas(struct mm_struct *mm, u16 newareas)
  {
         unsigned long i;
  
-       newsegs &= ~(mm->context.htlb_segs);
-       if (! newsegs)
+       BUILD_BUG_ON((sizeof(newareas)*8) != NUM_LOW_AREAS);
+       BUILD_BUG_ON((sizeof(mm->context.low_htlb_areas)*8) != NUM_LOW_AREAS);
+
+       newareas &= ~(mm->context.low_htlb_areas);
+       if (! newareas)
                 return 0; /* The segments we want are already open */
  
-       for (i = 0; i < 16; i++)
-               if ((1 << i) & newsegs)
-                       if (prepare_low_seg_for_htlb(mm, i) != 0)
+       for (i = 0; i < NUM_LOW_AREAS; i++)
+               if ((1 << i) & newareas)
+                       if (prepare_low_area_for_htlb(mm, i) != 0)
                                 return -EBUSY;
  
-       mm->context.htlb_segs |= newsegs;
+       mm->context.low_htlb_areas |= newareas;
  
         /* update the paca copy of the context struct */
         get_paca()->context = mm->context;
@@ -215,29 +226,63 @@ static int open_low_hpage_segs(struct mm_struct *mm, u16 newsegs)
         /* the context change must make it to memory before the flush,
          * so that further SLB misses do the right thing. */
         mb();
-       on_each_cpu(flush_segments, (void *)(unsigned long)newsegs, 0, 1);
+       on_each_cpu(flush_low_segments, (void *)(unsigned long)newareas, 0, 1);
+
+       return 0;
+}
+
+static int open_high_hpage_areas(struct mm_struct *mm, u16 newareas)
+{
+       unsigned long i;
+
+       BUILD_BUG_ON((sizeof(newareas)*8) != NUM_HIGH_AREAS);
+       BUILD_BUG_ON((sizeof(mm->context.high_htlb_areas)*8)
+                    != NUM_HIGH_AREAS);
+
+       newareas &= ~(mm->context.high_htlb_areas);
+       if (! newareas)
+               return 0; /* The areas we want are already open */
+
+       for (i = 0; i < NUM_HIGH_AREAS; i++)
+               if ((1 << i) & newareas)
+                       if (prepare_high_area_for_htlb(mm, i) != 0)
+                               return -EBUSY;
+
+       mm->context.high_htlb_areas |= newareas;
+
+       /* update the paca copy of the context struct */
+       get_paca()->context = mm->context;
+
+       /* the context change must make it to memory before the flush,
+        * so that further SLB misses do the right thing. */
+       mb();
+       on_each_cpu(flush_high_segments, (void *)(unsigned long)newareas, 0, 1);
  
         return 0;
  }
  
  int prepare_hugepage_range(unsigned long addr, unsigned long len)
  {
-       if (within_hugepage_high_range(addr, len))
-               return 0;
-       else if ((addr < 0x100000000UL) && ((addr+len) < 0x100000000UL)) {
-               int err;
-               /* Yes, we need both tests, in case addr+len overflows
-                * 64-bit arithmetic */
-               err = open_low_hpage_segs(current->mm,
+       int err;
+
+       if ( (addr+len) < addr )
+               return -EINVAL;
+
+       if ((addr + len) < 0x100000000UL)
+               err = open_low_hpage_areas(current->mm,
                                           LOW_ESID_MASK(addr, len));
-               if (err)
-                       printk(KERN_DEBUG "prepare_hugepage_range(%lx, %lx)"
-                              " failed (segs: 0x%04hx)\n", addr, len,
-                              LOW_ESID_MASK(addr, len));
+       else
+               err = open_high_hpage_areas(current->mm,
+                                           HTLB_AREA_MASK(addr, len));
+       if (err) {
+               printk(KERN_DEBUG "prepare_hugepage_range(%lx, %lx)"
+                      " failed (lowmask: 0x%04hx, highmask: 0x%04hx)\n",
+                      addr, len,
+                      LOW_ESID_MASK(addr, len), HTLB_AREA_MASK(addr, len));
                 return err;
         }
  
-       return -EINVAL;
+       return 0;
  }
  
  struct page *
@@ -309,8 +354,8 @@ full_search:
                         vma = find_vma(mm, addr);
                         continue;
                 }
-               if (touches_hugepage_high_range(addr, len)) {
-                       addr = TASK_HPAGE_END;
+               if (touches_hugepage_high_range(mm, addr, len)) {
+                       addr = ALIGN(addr+1, 1UL<<HTLB_AREA_SHIFT);
                         vma = find_vma(mm, addr);
                         continue;
                 }
@@ -389,8 +434,9 @@ hugepage_recheck:
                 if (touches_hugepage_low_range(mm, addr, len)) {
                         addr = (addr & ((~0) << SID_SHIFT)) - len;
                         goto hugepage_recheck;
-               } else if (touches_hugepage_high_range(addr, len)) {
-                       addr = TASK_HPAGE_BASE - len;
+               } else if (touches_hugepage_high_range(mm, addr, len)) {
+                       addr = (addr & ((~0UL) << HTLB_AREA_SHIFT)) - len;
+                       goto hugepage_recheck;
                 }
  
                 /*
@@ -481,23 +527,28 @@ static unsigned long htlb_get_low_area(unsigned long len, u16 segmask)
         return -ENOMEM;
  }
  
-static unsigned long htlb_get_high_area(unsigned long len)
+static unsigned long htlb_get_high_area(unsigned long len, u16 areamask)
  {
-       unsigned long addr = TASK_HPAGE_BASE;
+       unsigned long addr = 0x100000000UL;
         struct vm_area_struct *vma;
  
         vma = find_vma(current->mm, addr);
-       for (vma = find_vma(current->mm, addr);
-            addr + len <= TASK_HPAGE_END;
-            vma = vma->vm_next) {
+       while (addr + len <= TASK_SIZE_USER64) {
                 BUG_ON(vma && (addr >= vma->vm_end)); /* invariant */
-               BUG_ON(! within_hugepage_high_range(addr, len));
+
+               if (! __within_hugepage_high_range(addr, len, areamask)) {
+                       addr = ALIGN(addr+1, 1UL<<HTLB_AREA_SHIFT);
+                       vma = find_vma(current->mm, addr);
+                       continue;
+               }
  
                 if (!vma || (addr + len) <= vma->vm_start)
                         return addr;
                 addr = ALIGN(vma->vm_end, HPAGE_SIZE);
-               /* Because we're in a hugepage region, this alignment
-                * should not skip us over any VMAs */
+               /* Depending on segmask this might not be a confirmed
+                * hugepage region, so the ALIGN could have skipped
+                * some VMAs */
+               vma = find_vma(current->mm, addr);
         }
  
         return -ENOMEM;
@@ -507,6 +558,9 @@ unsigned long hugetlb_get_unmapped_area(struct file *file, unsigned long addr,
                                         unsigned long len, unsigned long pgoff,
                                         unsigned long flags)
  {
+       int lastshift;
+       u16 areamask, curareas;
+
         if (len & ~HPAGE_MASK)
                 return -EINVAL;
  
@@ -514,67 +568,49 @@ unsigned long hugetlb_get_unmapped_area(struct file *file, unsigned long addr,
                 return -EINVAL;
  
         if (test_thread_flag(TIF_32BIT)) {
-               int lastshift = 0;
-               u16 segmask, cursegs = current->mm->context.htlb_segs;
+               curareas = current->mm->context.low_htlb_areas;
  
                 /* First see if we can do the mapping in the existing
-                * low hpage segments */
-               addr = htlb_get_low_area(len, cursegs);
+                * low areas */
+               addr = htlb_get_low_area(len, curareas);
                 if (addr != -ENOMEM)
                         return addr;
  
-               for (segmask = LOW_ESID_MASK(0x100000000UL-len, len);
-                    ! lastshift; segmask >>=1) {
-                       if (segmask & 1)
+               lastshift = 0;
+               for (areamask = LOW_ESID_MASK(0x100000000UL-len, len);
+                    ! lastshift; areamask >>=1) {
+                       if (areamask & 1)
                                 lastshift = 1;
  
-                       addr = htlb_get_low_area(len, cursegs | segmask);
+                       addr = htlb_get_low_area(len, curareas | areamask);
                         if ((addr != -ENOMEM)
-                           && open_low_hpage_segs(current->mm, segmask) == 0)
+                           && open_low_hpage_areas(current->mm, areamask) == 0)
                                 return addr;
                 }
-               printk(KERN_DEBUG "hugetlb_get_unmapped_area() unable to open"
-                      " enough segments\n");
-               return -ENOMEM;
         } else {
-               return htlb_get_high_area(len);
-       }
-}
-
-void hugetlb_mm_free_pgd(struct mm_struct *mm)
-{
-       int i;
-       pgd_t *pgdir;
-
-       spin_lock(&mm->page_table_lock);
-
-       pgdir = mm->context.huge_pgdir;
-       if (! pgdir)
-               goto out;
-
-       mm->context.huge_pgdir = NULL;
+               curareas = current->mm->context.high_htlb_areas;
  
-       /* cleanup any hugepte pages leftover */
-       for (i = 0; i < PTRS_PER_HUGEPGD; i++) {
-               pud_t *pud = (pud_t *)(pgdir + i);
-
-               if (! pud_none(*pud)) {
-                       pte_t *pte = (pte_t *)pud_page(*pud);
-                       struct page *ptepage = virt_to_page(pte);
+               /* First see if we can do the mapping in the existing
+                * high areas */
+               addr = htlb_get_high_area(len, curareas);
+               if (addr != -ENOMEM)
+                       return addr;
  
-                       ptepage->mapping = NULL;
+               lastshift = 0;
+               for (areamask = HTLB_AREA_MASK(TASK_SIZE_USER64-len, len);
+                    ! lastshift; areamask >>=1) {
+                       if (areamask & 1)
+                               lastshift = 1;
  
-                       BUG_ON(memcmp(pte, empty_zero_page, PAGE_SIZE));
-                       kmem_cache_free(zero_cache, pte);
+                       addr = htlb_get_high_area(len, curareas | areamask);
+                       if ((addr != -ENOMEM)
+                           && open_high_hpage_areas(current->mm, areamask) == 0)
+                               return addr;
                 }
-               pud_clear(pud);
         }
-
-       BUG_ON(memcmp(pgdir, empty_zero_page, PAGE_SIZE));
-       kmem_cache_free(zero_cache, pgdir);
-
- out:
-       spin_unlock(&mm->page_table_lock);
+       printk(KERN_DEBUG "hugetlb_get_unmapped_area() unable to open"
+              " enough areas\n");
+       return -ENOMEM;
  }
  
  int hash_huge_page(struct mm_struct *mm, unsigned long access,
diff --git a/arch/ppc64/mm/imalloc.c b/arch/ppc64/mm/imalloc.c

index b6e75b891ac0fef5a2aa9dea4152b2c2ea2b998b..c65b87b927567cd1a3dd9c30275a713006c34661 100644 (file)
--- a/arch/ppc64/mm/imalloc.c
+++ b/arch/ppc64/mm/imalloc.c
@@ -31,7 +31,7 @@ static int get_free_im_addr(unsigned long size, unsigned long *im_addr)
                         break;
                 if ((unsigned long)tmp->addr >= ioremap_bot)
                         addr = tmp->size + (unsigned long) tmp->addr;
-               if (addr > IMALLOC_END-size) 
+               if (addr >= IMALLOC_END-size)
                         return 1;
         }
         *im_addr = addr;
diff --git a/arch/ppc64/mm/init.c b/arch/ppc64/mm/init.c

index e58a24d42879da92448a94a0b84c6f2dec1a682d..c02dc9809ca57a2a3299c2c5c83efe9a360583ea 100644 (file)
--- a/arch/ppc64/mm/init.c
+++ b/arch/ppc64/mm/init.c
@@ -42,7 +42,6 @@
  
  #include <asm/pgalloc.h>
  #include <asm/page.h>
-#include <asm/abs_addr.h>
  #include <asm/prom.h>
  #include <asm/lmb.h>
  #include <asm/rtas.h>
@@ -66,6 +65,14 @@
  #include <asm/vdso.h>
  #include <asm/imalloc.h>
  
+#if PGTABLE_RANGE > USER_VSID_RANGE
+#warning Limited user VSID range means pagetable space is wasted
+#endif
+
+#if (TASK_SIZE_USER64 < PGTABLE_RANGE) && (TASK_SIZE_USER64 < USER_VSID_RANGE)
+#warning TASK_SIZE is smaller than it needs to be.
+#endif
+
  int mem_init_done;
  unsigned long ioremap_bot = IMALLOC_BASE;
  static unsigned long phbs_io_bot = PHBS_IO_BASE;
@@ -159,7 +166,6 @@ static int map_io_page(unsigned long ea, unsigned long pa, int flags)
                 ptep = pte_alloc_kernel(&init_mm, pmdp, ea);
                 if (!ptep)
                         return -ENOMEM;
-               pa = abs_to_phys(pa);
                 set_pte_at(&init_mm, ea, ptep, pfn_pte(pa >> PAGE_SHIFT,
                                                           __pgprot(flags)));
                 spin_unlock(&init_mm.page_table_lock);
@@ -226,7 +232,7 @@ void __iomem * __ioremap(unsigned long addr, unsigned long size,
          * Before that, we map using addresses going
          * up from ioremap_bot.  imalloc will use
          * the addresses from ioremap_bot through
-        * IMALLOC_END (0xE000001fffffffff)
+        * IMALLOC_END
          * 
          */
         pa = addr & PAGE_MASK;
@@ -417,12 +423,6 @@ int init_new_context(struct task_struct *tsk, struct mm_struct *mm)
         int index;
         int err;
  
-#ifdef CONFIG_HUGETLB_PAGE
-       /* We leave htlb_segs as it was, but for a fork, we need to
-        * clear the huge_pgdir. */
-       mm->context.huge_pgdir = NULL;
-#endif
-
  again:
         if (!idr_pre_get(&mmu_context_idr, GFP_KERNEL))
                 return -ENOMEM;
@@ -453,8 +453,6 @@ void destroy_context(struct mm_struct *mm)
         spin_unlock(&mmu_context_lock);
  
         mm->context.id = NO_CONTEXT;
-
-       hugetlb_mm_free_pgd(mm);
  }
  
  /*
@@ -484,9 +482,9 @@ void __init mm_init_ppc64(void)
         for (i = 1; i < lmb.memory.cnt; i++) {
                 unsigned long base, prevbase, prevsize;
  
-               prevbase = lmb.memory.region[i-1].physbase;
+               prevbase = lmb.memory.region[i-1].base;
                 prevsize = lmb.memory.region[i-1].size;
-               base = lmb.memory.region[i].physbase;
+               base = lmb.memory.region[i].base;
                 if (base > (prevbase + prevsize)) {
                         io_hole_start = prevbase + prevsize;
                         io_hole_size = base  - (prevbase + prevsize);
@@ -513,11 +511,8 @@ int page_is_ram(unsigned long pfn)
         for (i=0; i < lmb.memory.cnt; i++) {
                 unsigned long base;
  
-#ifdef CONFIG_MSCHUNKS
-               base = lmb.memory.region[i].physbase;
-#else
                 base = lmb.memory.region[i].base;
-#endif
+
                 if ((paddr >= base) &&
                         (paddr < (base + lmb.memory.region[i].size))) {
                         return 1;
@@ -547,7 +542,7 @@ void __init do_init_bootmem(void)
          */
         bootmap_pages = bootmem_bootmap_pages(total_pages);
  
-       start = abs_to_phys(lmb_alloc(bootmap_pages<<PAGE_SHIFT, PAGE_SIZE));
+       start = lmb_alloc(bootmap_pages<<PAGE_SHIFT, PAGE_SIZE);
         BUG_ON(!start);
  
         boot_mapsize = init_bootmem(start >> PAGE_SHIFT, total_pages);
@@ -558,25 +553,25 @@ void __init do_init_bootmem(void)
          * present.
          */
         for (i=0; i < lmb.memory.cnt; i++) {
-               unsigned long physbase, size;
+               unsigned long base, size;
                 unsigned long start_pfn, end_pfn;
  
-               physbase = lmb.memory.region[i].physbase;
+               base = lmb.memory.region[i].base;
                 size = lmb.memory.region[i].size;
  
-               start_pfn = physbase >> PAGE_SHIFT;
+               start_pfn = base >> PAGE_SHIFT;
                 end_pfn = start_pfn + (size >> PAGE_SHIFT);
                 memory_present(0, start_pfn, end_pfn);
  
-               free_bootmem(physbase, size);
+               free_bootmem(base, size);
         }
  
         /* reserve the sections we're already using */
         for (i=0; i < lmb.reserved.cnt; i++) {
-               unsigned long physbase = lmb.reserved.region[i].physbase;
+               unsigned long base = lmb.reserved.region[i].base;
                 unsigned long size = lmb.reserved.region[i].size;
  
-               reserve_bootmem(physbase, size);
+               reserve_bootmem(base, size);
         }
  }
  
@@ -615,10 +610,10 @@ static int __init setup_kcore(void)
         int i;
  
         for (i=0; i < lmb.memory.cnt; i++) {
-               unsigned long physbase, size;
+               unsigned long base, size;
                 struct kcore_list *kcore_mem;
  
-               physbase = lmb.memory.region[i].physbase;
+               base = lmb.memory.region[i].base;
                 size = lmb.memory.region[i].size;
  
                 /* GFP_ATOMIC to avoid might_sleep warnings during boot */
@@ -626,7 +621,7 @@ static int __init setup_kcore(void)
                 if (!kcore_mem)
                         panic("mem_init: kmalloc failed\n");
  
-               kclist_add(kcore_mem, __va(physbase), size);
+               kclist_add(kcore_mem, __va(base), size);
         }
  
         kclist_add(&kcore_vmem, (void *)VMALLOC_START, VMALLOC_END-VMALLOC_START);
@@ -686,9 +681,6 @@ void __init mem_init(void)
  
         mem_init_done = 1;
  
-#ifdef CONFIG_PPC_ISERIES
-       iommu_vio_init();
-#endif
         /* Initialize the vDSO */
         vdso_init();
  }
@@ -833,23 +825,43 @@ void __iomem * reserve_phb_iospace(unsigned long size)
         return virt_addr;
  }
  
-kmem_cache_t *zero_cache;
-
-static void zero_ctor(void *pte, kmem_cache_t *cache, unsigned long flags)
+static void zero_ctor(void *addr, kmem_cache_t *cache, unsigned long flags)
  {
-       memset(pte, 0, PAGE_SIZE);
+       memset(addr, 0, kmem_cache_size(cache));
  }
  
+static const int pgtable_cache_size[2] = {
+       PTE_TABLE_SIZE, PMD_TABLE_SIZE
+};
+static const char *pgtable_cache_name[ARRAY_SIZE(pgtable_cache_size)] = {
+       "pgd_pte_cache", "pud_pmd_cache",
+};
+
+kmem_cache_t *pgtable_cache[ARRAY_SIZE(pgtable_cache_size)];
+
  void pgtable_cache_init(void)
  {
-       zero_cache = kmem_cache_create("zero",
-                               PAGE_SIZE,
-                               0,
-                               SLAB_HWCACHE_ALIGN | SLAB_MUST_HWCACHE_ALIGN,
-                               zero_ctor,
-                               NULL);
-       if (!zero_cache)
-               panic("pgtable_cache_init(): could not create zero_cache!\n");
+       int i;
+
+       BUILD_BUG_ON(PTE_TABLE_SIZE != pgtable_cache_size[PTE_CACHE_NUM]);
+       BUILD_BUG_ON(PMD_TABLE_SIZE != pgtable_cache_size[PMD_CACHE_NUM]);
+       BUILD_BUG_ON(PUD_TABLE_SIZE != pgtable_cache_size[PUD_CACHE_NUM]);
+       BUILD_BUG_ON(PGD_TABLE_SIZE != pgtable_cache_size[PGD_CACHE_NUM]);
+
+       for (i = 0; i < ARRAY_SIZE(pgtable_cache_size); i++) {
+               int size = pgtable_cache_size[i];
+               const char *name = pgtable_cache_name[i];
+
+               pgtable_cache[i] = kmem_cache_create(name,
+                                                    size, size,
+                                                    SLAB_HWCACHE_ALIGN
+                                                    | SLAB_MUST_HWCACHE_ALIGN,
+                                                    zero_ctor,
+                                                    NULL);
+               if (! pgtable_cache[i])
+                       panic("pgtable_cache_init(): could not create %s!\n",
+                             name);
+       }
  }
  
  pgprot_t phys_mem_access_prot(struct file *file, unsigned long addr,
diff --git a/arch/ppc64/mm/numa.c b/arch/ppc64/mm/numa.c

index cafd91aef289ecc4354f6252be37ee5f36006f98..c3116f0d788c359969fcd120eda6eb461451faad 100644 (file)
--- a/arch/ppc64/mm/numa.c
+++ b/arch/ppc64/mm/numa.c
@@ -647,7 +647,12 @@ void __init do_init_bootmem(void)
  new_range:
                         mem_start = read_n_cells(addr_cells, &memcell_buf);
                         mem_size = read_n_cells(size_cells, &memcell_buf);
-                       numa_domain = numa_enabled ? of_node_numa_domain(memory) : 0;
+                       if (numa_enabled) {
+                               numa_domain = of_node_numa_domain(memory);
+                               if (numa_domain  >= MAX_NUMNODES)
+                                       numa_domain = 0;
+                       } else
+                               numa_domain =  0;
  
                         if (numa_domain != nid)
                                 continue;
@@ -666,7 +671,7 @@ new_range:
                  * Mark reserved regions on this node
                  */
                 for (i = 0; i < lmb.reserved.cnt; i++) {
-                       unsigned long physbase = lmb.reserved.region[i].physbase;
+                       unsigned long physbase = lmb.reserved.region[i].base;
                         unsigned long size = lmb.reserved.region[i].size;
  
                         if (pa_to_nid(physbase) != nid &&
diff --git a/arch/ppc64/mm/slb_low.S b/arch/ppc64/mm/slb_low.S

index 8379d678f70f856c7ece883a5724cac91c0f0940..bab255889c58214c60214606d3577e9093bc24ca 100644 (file)
--- a/arch/ppc64/mm/slb_low.S
+++ b/arch/ppc64/mm/slb_low.S
@@ -89,28 +89,29 @@ END_FTR_SECTION_IFSET(CPU_FTR_16M_PAGE)
         b       9f
  
  0:     /* user address: proto-VSID = context<<15 | ESID */
-       li      r11,SLB_VSID_USER
-
-       srdi.   r9,r3,13
+       srdi.   r9,r3,USER_ESID_BITS
         bne-    8f                      /* invalid ea bits set */
  
  #ifdef CONFIG_HUGETLB_PAGE
  BEGIN_FTR_SECTION
-       /* check against the hugepage ranges */
-       cmpldi  r3,(TASK_HPAGE_END>>SID_SHIFT)
-       bge     6f                      /* >= TASK_HPAGE_END */
-       cmpldi  r3,(TASK_HPAGE_BASE>>SID_SHIFT)
-       bge     5f                      /* TASK_HPAGE_BASE..TASK_HPAGE_END */
+       lhz     r9,PACAHIGHHTLBAREAS(r13)
+       srdi    r11,r3,(HTLB_AREA_SHIFT-SID_SHIFT)
+       srd     r9,r9,r11
+       andi.   r9,r9,1
+       bne     5f
+
+       li      r11,SLB_VSID_USER
+
         cmpldi  r3,16
-       bge     6f                      /* 4GB..TASK_HPAGE_BASE */
+       bge     6f
  
-       lhz     r9,PACAHTLBSEGS(r13)
+       lhz     r9,PACALOWHTLBAREAS(r13)
         srd     r9,r9,r3
         andi.   r9,r9,1
+
         beq     6f
  
-5:     /* this is a hugepage user address */
-       li      r11,(SLB_VSID_USER|SLB_VSID_L)
+5:     li      r11,SLB_VSID_USER|SLB_VSID_L
  END_FTR_SECTION_IFSET(CPU_FTR_16M_PAGE)
  #endif /* CONFIG_HUGETLB_PAGE */
  
diff --git a/arch/ppc64/mm/tlb.c b/arch/ppc64/mm/tlb.c

index 26f0172c4527ba897e58547fd93eade9f56681a4..d8a6593a13f0ad98fd0b64310b514551b02047e4 100644 (file)
--- a/arch/ppc64/mm/tlb.c
+++ b/arch/ppc64/mm/tlb.c
@@ -41,7 +41,58 @@ DEFINE_PER_CPU(struct mmu_gather, mmu_gathers);
  DEFINE_PER_CPU(struct pte_freelist_batch *, pte_freelist_cur);
  unsigned long pte_freelist_forced_free;
  
-void __pte_free_tlb(struct mmu_gather *tlb, struct page *ptepage)
+struct pte_freelist_batch
+{
+       struct rcu_head rcu;
+       unsigned int    index;
+       pgtable_free_t  tables[0];
+};
+
+DEFINE_PER_CPU(struct pte_freelist_batch *, pte_freelist_cur);
+unsigned long pte_freelist_forced_free;
+
+#define PTE_FREELIST_SIZE \
+       ((PAGE_SIZE - sizeof(struct pte_freelist_batch)) \
+         / sizeof(pgtable_free_t))
+
+#ifdef CONFIG_SMP
+static void pte_free_smp_sync(void *arg)
+{
+       /* Do nothing, just ensure we sync with all CPUs */
+}
+#endif
+
+/* This is only called when we are critically out of memory
+ * (and fail to get a page in pte_free_tlb).
+ */
+static void pgtable_free_now(pgtable_free_t pgf)
+{
+       pte_freelist_forced_free++;
+
+       smp_call_function(pte_free_smp_sync, NULL, 0, 1);
+
+       pgtable_free(pgf);
+}
+
+static void pte_free_rcu_callback(struct rcu_head *head)
+{
+       struct pte_freelist_batch *batch =
+               container_of(head, struct pte_freelist_batch, rcu);
+       unsigned int i;
+
+       for (i = 0; i < batch->index; i++)
+               pgtable_free(batch->tables[i]);
+
+       free_page((unsigned long)batch);
+}
+
+static void pte_free_submit(struct pte_freelist_batch *batch)
+{
+       INIT_RCU_HEAD(&batch->rcu);
+       call_rcu(&batch->rcu, pte_free_rcu_callback);
+}
+
+void pgtable_free_tlb(struct mmu_gather *tlb, pgtable_free_t pgf)
  {
         /* This is safe as we are holding page_table_lock */
          cpumask_t local_cpumask = cpumask_of_cpu(smp_processor_id());
@@ -49,19 +100,19 @@ void __pte_free_tlb(struct mmu_gather *tlb, struct page *ptepage)
  
         if (atomic_read(&tlb->mm->mm_users) < 2 ||
             cpus_equal(tlb->mm->cpu_vm_mask, local_cpumask)) {
-               pte_free(ptepage);
+               pgtable_free(pgf);
                 return;
         }
  
         if (*batchp == NULL) {
                 *batchp = (struct pte_freelist_batch *)__get_free_page(GFP_ATOMIC);
                 if (*batchp == NULL) {
-                       pte_free_now(ptepage);
+                       pgtable_free_now(pgf);
                         return;
                 }
                 (*batchp)->index = 0;
         }
-       (*batchp)->pages[(*batchp)->index++] = ptepage;
+       (*batchp)->tables[(*batchp)->index++] = pgf;
         if ((*batchp)->index == PTE_FREELIST_SIZE) {
                 pte_free_submit(*batchp);
                 *batchp = NULL;
@@ -132,42 +183,6 @@ void __flush_tlb_pending(struct ppc64_tlb_batch *batch)
         put_cpu();
  }
  
-#ifdef CONFIG_SMP
-static void pte_free_smp_sync(void *arg)
-{
-       /* Do nothing, just ensure we sync with all CPUs */
-}
-#endif
-
-/* This is only called when we are critically out of memory
- * (and fail to get a page in pte_free_tlb).
- */
-void pte_free_now(struct page *ptepage)
-{
-       pte_freelist_forced_free++;
-
-       smp_call_function(pte_free_smp_sync, NULL, 0, 1);
-
-       pte_free(ptepage);
-}
-
-static void pte_free_rcu_callback(struct rcu_head *head)
-{
-       struct pte_freelist_batch *batch =
-               container_of(head, struct pte_freelist_batch, rcu);
-       unsigned int i;
-
-       for (i = 0; i < batch->index; i++)
-               pte_free(batch->pages[i]);
-       free_page((unsigned long)batch);
-}
-
-void pte_free_submit(struct pte_freelist_batch *batch)
-{
-       INIT_RCU_HEAD(&batch->rcu);
-       call_rcu(&batch->rcu, pte_free_rcu_callback);
-}
-
  void pte_free_finish(void)
  {
         /* This is safe as we are holding page_table_lock */
diff --git a/arch/ppc64/oprofile/common.c b/arch/ppc64/oprofile/common.c

index b28bfda23d944b4b275ef9e0de4c2db6a891a082..4acd1a424933d7c08afa1bf7908ef1ddc13104b1 100644 (file)
--- a/arch/ppc64/oprofile/common.c
+++ b/arch/ppc64/oprofile/common.c
@@ -153,6 +153,7 @@ int __init oprofile_arch_init(struct oprofile_operations *ops)
  
                 case PV_970:
                 case PV_970FX:
+               case PV_970MP:
                         model = &op_model_power4;
                         model->num_counters = 8;
                         ops->cpu_type = "ppc64/970";
diff --git a/arch/ppc64/xmon/start.c b/arch/ppc64/xmon/start.c

index a9265bcc79b24d1e3750efdb3ec86e7ee3d1cd61..f86b584acd76d16793e55be30848319d3e7066de 100644 (file)
--- a/arch/ppc64/xmon/start.c
+++ b/arch/ppc64/xmon/start.c
@@ -27,7 +27,7 @@ static void sysrq_handle_xmon(int key, struct pt_regs *pt_regs,
                               struct tty_struct *tty) 
  {
         /* ensure xmon is enabled */
-       xmon_init();
+       xmon_init(1);
         debugger(pt_regs);
  }
  
diff --git a/arch/ppc64/xmon/xmon.c b/arch/ppc64/xmon/xmon.c

index 7f6e13a4b71eec0c23330aaca24dcc76aa6a5971..45908b10acd345970da83c6d5369fc850e6b072a 100644 (file)
--- a/arch/ppc64/xmon/xmon.c
+++ b/arch/ppc64/xmon/xmon.c
@@ -329,13 +329,16 @@ int xmon_core(struct pt_regs *regs, int fromipi)
                 printf("cpu 0x%x: Exception %lx %s in xmon, "
                        "returning to main loop\n",
                        cpu, regs->trap, getvecname(TRAP(regs)));
+               release_output_lock();
                 longjmp(xmon_fault_jmp[cpu], 1);
         }
  
         if (setjmp(recurse_jmp) != 0) {
                 if (!in_xmon || !xmon_gate) {
+                       get_output_lock();
                         printf("xmon: WARNING: bad recursive fault "
                                "on cpu 0x%x\n", cpu);
+                       release_output_lock();
                         goto waiting;
                 }
                 secondary = !(xmon_taken && cpu == xmon_owner);
@@ -2493,15 +2496,25 @@ static void dump_stab(void)
         }
  }
  
-void xmon_init(void)
+void xmon_init(int enable)
  {
-       __debugger = xmon;
-       __debugger_ipi = xmon_ipi;
-       __debugger_bpt = xmon_bpt;
-       __debugger_sstep = xmon_sstep;
-       __debugger_iabr_match = xmon_iabr_match;
-       __debugger_dabr_match = xmon_dabr_match;
-       __debugger_fault_handler = xmon_fault_handler;
+       if (enable) {
+               __debugger = xmon;
+               __debugger_ipi = xmon_ipi;
+               __debugger_bpt = xmon_bpt;
+               __debugger_sstep = xmon_sstep;
+               __debugger_iabr_match = xmon_iabr_match;
+               __debugger_dabr_match = xmon_dabr_match;
+               __debugger_fault_handler = xmon_fault_handler;
+       } else {
+               __debugger = NULL;
+               __debugger_ipi = NULL;
+               __debugger_bpt = NULL;
+               __debugger_sstep = NULL;
+               __debugger_iabr_match = NULL;
+               __debugger_dabr_match = NULL;
+               __debugger_fault_handler = NULL;
+       }
  }
  
  void dump_segments(void)
diff --git a/arch/s390/kernel/compat_signal.c b/arch/s390/kernel/compat_signal.c

index d05d65ac96940832776685c0f8b537269bc95231..7358cdb8441fbf960a1f1bb5371780f3f5e798c6 100644 (file)
--- a/arch/s390/kernel/compat_signal.c
+++ b/arch/s390/kernel/compat_signal.c
@@ -637,12 +637,11 @@ handle_signal32(unsigned long sig, struct k_sigaction *ka,
         else
                 setup_frame32(sig, ka, oldset, regs);
  
-       if (!(ka->sa.sa_flags & SA_NODEFER)) {
-               spin_lock_irq(&current->sighand->siglock);
-               sigorsets(&current->blocked,&current->blocked,&ka->sa.sa_mask);
+       spin_lock_irq(&current->sighand->siglock);
+       sigorsets(&current->blocked,&current->blocked,&ka->sa.sa_mask);
+       if (!(ka->sa.sa_flags & SA_NODEFER))
                 sigaddset(&current->blocked,sig);
-               recalc_sigpending();
-               spin_unlock_irq(&current->sighand->siglock);
-       }
+       recalc_sigpending();
+       spin_unlock_irq(&current->sighand->siglock);
  }
  
diff --git a/arch/s390/kernel/compat_wrapper.S b/arch/s390/kernel/compat_wrapper.S

index 799a98eac92d6b18f8e76c971645a0a3269ea919..23fe94e58688ec4d6659f16fa06287ac6e8cb174 100644 (file)
--- a/arch/s390/kernel/compat_wrapper.S
+++ b/arch/s390/kernel/compat_wrapper.S
@@ -1449,3 +1449,29 @@ compat_sys_kexec_load_wrapper:
         llgtr   %r4,%r4                 # struct kexec_segment *
         llgfr   %r5,%r5                 # unsigned long
         jg      compat_sys_kexec_load
+
+       .globl  sys_ioprio_set_wrapper
+sys_ioprio_set_wrapper:
+       lgfr    %r2,%r2                 # int
+       lgfr    %r3,%r3                 # int
+       lgfr    %r4,%r4                 # int
+       jg      sys_ioprio_set
+
+       .globl  sys_ioprio_get_wrapper
+sys_ioprio_get_wrapper:
+       lgfr    %r2,%r2                 # int
+       lgfr    %r3,%r3                 # int
+       jg      sys_ioprio_get
+
+       .globl  sys_inotify_add_watch_wrapper
+sys_inotify_add_watch_wrapper:
+       lgfr    %r2,%r2                 # int
+       llgtr   %r3,%r3                 # const char *
+       llgfr   %r4,%r4                 # u32
+       jg      sys_inotify_add_watch
+
+       .globl  sys_inotify_rm_watch_wrapper
+sys_inotify_rm_watch_wrapper:
+       lgfr    %r2,%r2                 # int
+       llgfr   %r3,%r3                 # u32
+       jg      sys_inotify_rm_watch
diff --git a/arch/s390/kernel/cpcmd.c b/arch/s390/kernel/cpcmd.c

index 20062145e84e29bc1dcddcc258b7a29276991f84..d47fecb42cc5f24d5e2aeda18424fb60f37ffb25 100644 (file)
--- a/arch/s390/kernel/cpcmd.c
+++ b/arch/s390/kernel/cpcmd.c
@@ -46,9 +46,9 @@ int  __cpcmd(const char *cmd, char *response, int rlen, int *response_code)
                                 "lra    3,0(%4)\n"
                                 "lr     5,%5\n"
                                 "diag   2,4,0x8\n"
-                               "brc    8, .Litfits\n"
+                               "brc    8, 1f\n"
                                 "ar     5, %5\n"
-                               ".Litfits: \n"
+                               "1: \n"
                                 "lr     %0,4\n"
                                 "lr     %1,5\n"
                                 : "=d" (return_code), "=d" (return_len)
@@ -64,9 +64,9 @@ int  __cpcmd(const char *cmd, char *response, int rlen, int *response_code)
                                 "sam31\n"
                                 "diag   2,4,0x8\n"
                                 "sam64\n"
-                               "brc    8, .Litfits\n"
+                               "brc    8, 1f\n"
                                 "agr    5, %5\n"
-                               ".Litfits: \n"
+                               "1: \n"
                                 "lgr    %0,4\n"
                                 "lgr    %1,5\n"
                                 : "=d" (return_code), "=d" (return_len)
diff --git a/arch/s390/kernel/machine_kexec.c b/arch/s390/kernel/machine_kexec.c

index 2721c3a32b84f5087ed5d923a114b6f3f7fa8a16..5aa71b05b8ae81628902377fc40b35057cd0cae0 100644 (file)
--- a/arch/s390/kernel/machine_kexec.c
+++ b/arch/s390/kernel/machine_kexec.c
@@ -70,6 +70,8 @@ machine_kexec(struct kimage *image)
         for (;;);
  }
  
+extern void pfault_fini(void);
+
  static void
  kexec_halt_all_cpus(void *kernel_image)
  {
@@ -78,6 +80,11 @@ kexec_halt_all_cpus(void *kernel_image)
         struct kimage *image;
         relocate_kernel_t data_mover;
  
+#ifdef CONFIG_PFAULT
+       if (MACHINE_IS_VM)
+               pfault_fini();
+#endif
+
         if (atomic_compare_and_swap(-1, smp_processor_id(), &cpuid))
                 signal_processor(smp_processor_id(), sigp_stop);
  
diff --git a/arch/s390/kernel/relocate_kernel.S b/arch/s390/kernel/relocate_kernel.S

index d5e4a62fbb7965e1e000c28407fbdf388f2dcd0a..2a25ec7147ffef7be275feeb9a915dfaec6f70b3 100644 (file)
--- a/arch/s390/kernel/relocate_kernel.S
+++ b/arch/s390/kernel/relocate_kernel.S
@@ -4,6 +4,7 @@
   * (C) Copyright IBM Corp. 2005
   *
   * Author(s): Rolf Adelsberger <adelsberger@de.ibm.com>
+ *           Heiko Carstens <heiko.carstens@de.ibm.com>
   *
   */
  
@@ -25,8 +26,31 @@
         relocate_kernel:
                 basr    %r13,0          #base address
         .base:
-               spx     zero64-.base(%r13)      #absolute addressing mode
                 stnsm   sys_msk-.base(%r13),0xf8        #disable DAT and IRQ (external)
+               spx     zero64-.base(%r13)      #absolute addressing mode
+               stctl   %c0,%c15,ctlregs-.base(%r13)
+               stm     %r0,%r15,gprregs-.base(%r13)
+               la      %r1,load_psw-.base(%r13)
+               mvc     0(8,%r0),0(%r1)
+               la      %r0,.back-.base(%r13)
+               st      %r0,4(%r0)
+               oi      4(%r0),0x80
+               mvc     0x68(8,%r0),0(%r1)
+               la      %r0,.back_pgm-.base(%r13)
+               st      %r0,0x6c(%r0)
+               oi      0x6c(%r0),0x80
+               lhi     %r0,0
+               diag    %r0,%r0,0x308
+       .back:
+               basr    %r13,0
+       .back_base:
+               oi      have_diag308-.back_base(%r13),0x01
+               lctl    %c0,%c15,ctlregs-.back_base(%r13)
+               lm      %r0,%r15,gprregs-.back_base(%r13)
+               j       .start_reloc
+       .back_pgm:
+               lm      %r0,%r15,gprregs-.base(%r13)
+       .start_reloc:
                 lhi     %r10,-1         #preparing the mask
                 sll     %r10,12         #shift it such that it becomes 0xf000
         .top:
@@ -63,6 +87,10 @@
                 o       %r3,4(%r4)      #or load address into psw
                 st      %r3,4(%r4)
                 mvc     0(8,%r0),0(%r4) #copy psw to absolute address 0
+               tm      have_diag308-.base(%r13),0x01
+               jno     .no_diag308
+               diag    %r0,%r0,0x308
+       .no_diag308:
                 sr      %r1,%r1         #clear %r1
                 sr      %r2,%r2         #clear %r2
                 sigp    %r1,%r2,0x12    #set cpuid to zero
@@ -75,6 +103,17 @@
                 .long   0x00080000,0x80000000
         sys_msk:
                 .quad   0
+       ctlregs:
+               .rept   16
+               .long   0
+               .endr
+       gprregs:
+               .rept   16
+               .long   0
+               .endr
+       have_diag308:
+               .byte   0
+               .align  8
         relocate_kernel_end:
         .globl  relocate_kernel_len
         relocate_kernel_len:
diff --git a/arch/s390/kernel/relocate_kernel64.S b/arch/s390/kernel/relocate_kernel64.S

index 96290cc4eb3c49de81c4c19456db70c2c41dc89d..8cdb86e8911ff98ec92c39ae3db8be106c76f836 100644 (file)
--- a/arch/s390/kernel/relocate_kernel64.S
+++ b/arch/s390/kernel/relocate_kernel64.S
@@ -4,6 +4,7 @@
   * (C) Copyright IBM Corp. 2005
   *
   * Author(s): Rolf Adelsberger <adelsberger@de.ibm.com>
+ *           Heiko Carstens <heiko.carstens@de.ibm.com>
   *
   */
  
@@ -26,8 +27,34 @@
         relocate_kernel:
                 basr    %r13,0          #base address
         .base:
+               stnsm   sys_msk-.base(%r13),0xf8        #disable DAT and IRQs
                 spx     zero64-.base(%r13)      #absolute addressing mode
-               stnsm   sys_msk-.base(%r13),0xf8        #disable DAT and IRQ (external)
+               stctg   %c0,%c15,ctlregs-.base(%r13)
+               stmg    %r0,%r15,gprregs-.base(%r13)
+               lghi    %r0,3
+               sllg    %r0,%r0,31
+               stg     %r0,0x1d0(%r0)
+               la      %r0,.back_pgm-.base(%r13)
+               stg     %r0,0x1d8(%r0)
+               la      %r1,load_psw-.base(%r13)
+               mvc     0(8,%r0),0(%r1)
+               la      %r0,.back-.base(%r13)
+               st      %r0,4(%r0)
+               oi      4(%r0),0x80
+               lghi    %r0,0
+               diag    %r0,%r0,0x308
+       .back:
+               lhi     %r1,1           #mode 1 = esame
+               sigp    %r1,%r0,0x12    #switch to esame mode
+               sam64                   #switch to 64 bit addressing mode
+               basr    %r13,0
+       .back_base:
+               oi      have_diag308-.back_base(%r13),0x01
+               lctlg   %c0,%c15,ctlregs-.back_base(%r13)
+               lmg     %r0,%r15,gprregs-.back_base(%r13)
+               j       .top
+       .back_pgm:
+               lmg     %r0,%r15,gprregs-.base(%r13)
         .top:
                 lghi    %r7,4096        #load PAGE_SIZE in r7
                 lghi    %r9,4096        #load PAGE_SIZE in r9
@@ -62,6 +89,10 @@
                 o       %r3,4(%r4)      #or load address into psw
                 st      %r3,4(%r4)
                 mvc     0(8,%r0),0(%r4) #copy psw to absolute address 0
+               tm      have_diag308-.base(%r13),0x01
+               jno     .no_diag308
+               diag    %r0,%r0,0x308
+       .no_diag308:
                 sam31                   #31 bit mode
                 sr      %r1,%r1         #erase register r1
                 sr      %r2,%r2         #erase register r2
@@ -75,8 +106,18 @@
                 .long   0x00080000,0x80000000
         sys_msk:
                 .quad   0
+       ctlregs:
+               .rept   16
+               .quad   0
+               .endr
+       gprregs:
+               .rept   16
+               .quad   0
+               .endr
+       have_diag308:
+               .byte   0
+               .align  8
         relocate_kernel_end:
         .globl  relocate_kernel_len
         relocate_kernel_len:
                 .quad   relocate_kernel_end - relocate_kernel
-
diff --git a/arch/s390/kernel/signal.c b/arch/s390/kernel/signal.c

index 610c1d03e975701b152df903f399de25b4954ff2..6a3f5b7473a91b7c1a5ad08238430966f839e9f1 100644 (file)
--- a/arch/s390/kernel/signal.c
+++ b/arch/s390/kernel/signal.c
@@ -429,13 +429,12 @@ handle_signal(unsigned long sig, struct k_sigaction *ka,
         else
                 setup_frame(sig, ka, oldset, regs);
  
-       if (!(ka->sa.sa_flags & SA_NODEFER)) {
-               spin_lock_irq(&current->sighand->siglock);
-               sigorsets(&current->blocked,&current->blocked,&ka->sa.sa_mask);
+       spin_lock_irq(&current->sighand->siglock);
+       sigorsets(&current->blocked,&current->blocked,&ka->sa.sa_mask);
+       if (!(ka->sa.sa_flags & SA_NODEFER))
                 sigaddset(&current->blocked,sig);
-               recalc_sigpending();
-               spin_unlock_irq(&current->sighand->siglock);
-       }
+       recalc_sigpending();
+       spin_unlock_irq(&current->sighand->siglock);
  }
  
  /*
diff --git a/arch/s390/kernel/smp.c b/arch/s390/kernel/smp.c

index da77f001af8ddbf5687ded085fbffb4c3baadb5d..85222fee43611293bdd8271f760f030535ed286f 100644 (file)
--- a/arch/s390/kernel/smp.c
+++ b/arch/s390/kernel/smp.c
@@ -537,7 +537,8 @@ int __devinit start_secondary(void *cpuvoid)
  #endif
  #ifdef CONFIG_PFAULT
         /* Enable pfault pseudo page faults on this cpu. */
-       pfault_init();
+       if (MACHINE_IS_VM)
+               pfault_init();
  #endif
         /* Mark this cpu as online */
         cpu_set(smp_processor_id(), cpu_online_map);
@@ -690,7 +691,8 @@ __cpu_disable(void)
  
  #ifdef CONFIG_PFAULT
         /* Disable pfault pseudo page faults on this cpu. */
-       pfault_fini();
+       if (MACHINE_IS_VM)
+               pfault_fini();
  #endif
  
         /* disable all external interrupts */
diff --git a/arch/s390/kernel/syscalls.S b/arch/s390/kernel/syscalls.S

index a8668afb5f87d2b5b3324a8dae1802eab7aedcdf..426d7cafdab307192219807fc565537cbfee848d 100644 (file)
--- a/arch/s390/kernel/syscalls.S
+++ b/arch/s390/kernel/syscalls.S
@@ -290,3 +290,8 @@ SYSCALL(sys_add_key,sys_add_key,compat_sys_add_key_wrapper)
  SYSCALL(sys_request_key,sys_request_key,compat_sys_request_key_wrapper)
  SYSCALL(sys_keyctl,sys_keyctl,compat_sys_keyctl)               /* 280 */
  SYSCALL(sys_waitid,sys_waitid,compat_sys_waitid_wrapper)
+SYSCALL(sys_ioprio_set,sys_ioprio_set,sys_ioprio_set_wrapper)
+SYSCALL(sys_ioprio_get,sys_ioprio_get,sys_ioprio_get_wrapper)
+SYSCALL(sys_inotify_init,sys_inotify_init,sys_inotify_init)
+SYSCALL(sys_inotify_add_watch,sys_inotify_add_watch,sys_inotify_add_watch_wrapper)
+SYSCALL(sys_inotify_rm_watch,sys_inotify_rm_watch,sys_inotify_rm_watch_wrapper)
diff --git a/arch/s390/kernel/traps.c b/arch/s390/kernel/traps.c

index bc7b7be7acbe7b9858992e75fcc86309d59de79b..6b8703ec2ae66edf93f527a44d4ecaf2af108f42 100644 (file)
--- a/arch/s390/kernel/traps.c
+++ b/arch/s390/kernel/traps.c
@@ -29,6 +29,7 @@
  #include <linux/delay.h>
  #include <linux/module.h>
  #include <linux/kallsyms.h>
+#include <linux/reboot.h>
  
  #include <asm/system.h>
  #include <asm/uaccess.h>
@@ -675,6 +676,19 @@ asmlinkage void kernel_stack_overflow(struct pt_regs * regs)
         panic("Corrupt kernel stack, can't continue.");
  }
  
+#ifndef CONFIG_ARCH_S390X
+static int
+pagex_reboot_event(struct notifier_block *this, unsigned long event, void *ptr)
+{
+       if (MACHINE_IS_VM)
+               cpcmd("SET PAGEX OFF", NULL, 0, NULL);
+       return NOTIFY_DONE;
+}
+
+static struct notifier_block pagex_reboot_notifier = {
+       .notifier_call = &pagex_reboot_event,
+};
+#endif
  
  /* init is done in lowcore.S and head.S */
  
@@ -735,6 +749,7 @@ void __init trap_init(void)
                                                     &ext_int_pfault);
  #endif
  #ifndef CONFIG_ARCH_S390X
+               register_reboot_notifier(&pagex_reboot_notifier);
                 cpcmd("SET PAGEX ON", NULL, 0, NULL);
  #endif
         }
diff --git a/arch/sh/kernel/entry.S b/arch/sh/kernel/entry.S

index 6615e4838ee4cab03a4a72a552347ae768f38273..fb6368159dd08616da71a7fa788c4d663f7968b7 100644 (file)
--- a/arch/sh/kernel/entry.S
+++ b/arch/sh/kernel/entry.S
@@ -1145,5 +1145,10 @@ ENTRY(sys_call_table)
         .long sys_add_key               /* 285 */
         .long sys_request_key
         .long sys_keyctl
+       .long sys_ioprio_set
+       .long sys_ioprio_get
+       .long sys_inotify_init          /* 290 */
+       .long sys_inotify_add_watch
+       .long sys_inotify_rm_watch
  
  /* End of entry.S */
diff --git a/arch/sh/kernel/signal.c b/arch/sh/kernel/signal.c

index 8022243f017828a44875f6f79b86ff487388b0ec..b475c4d2405f3e81795dfb39dfdd8aa2c350e94d 100644 (file)
--- a/arch/sh/kernel/signal.c
+++ b/arch/sh/kernel/signal.c
@@ -546,13 +546,12 @@ handle_signal(unsigned long sig, struct k_sigaction *ka, siginfo_t *info,
         if (ka->sa.sa_flags & SA_ONESHOT)
                 ka->sa.sa_handler = SIG_DFL;
  
-       if (!(ka->sa.sa_flags & SA_NODEFER)) {
-               spin_lock_irq(&current->sighand->siglock);
-               sigorsets(&current->blocked,&current->blocked,&ka->sa.sa_mask);
+       spin_lock_irq(&current->sighand->siglock);
+       sigorsets(&current->blocked,&current->blocked,&ka->sa.sa_mask);
+       if (!(ka->sa.sa_flags & SA_NODEFER))
                 sigaddset(&current->blocked,sig);
-               recalc_sigpending();
-               spin_unlock_irq(&current->sighand->siglock);
-       }
+       recalc_sigpending();
+       spin_unlock_irq(&current->sighand->siglock);
  }
  
  /*
diff --git a/arch/sh64/kernel/signal.c b/arch/sh64/kernel/signal.c

index c6a14a87c59b4a887664a34c6604c96515c31417..3ea8929e483b5fea4f99fc6b6ebe2144ed61eb77 100644 (file)
--- a/arch/sh64/kernel/signal.c
+++ b/arch/sh64/kernel/signal.c
@@ -664,13 +664,12 @@ handle_signal(unsigned long sig, siginfo_t *info, struct k_sigaction *ka,
         else
                 setup_frame(sig, ka, oldset, regs);
  
-       if (!(ka->sa.sa_flags & SA_NODEFER)) {
-               spin_lock_irq(&current->sighand->siglock);
-               sigorsets(&current->blocked,&current->blocked,&ka->sa.sa_mask);
+       spin_lock_irq(&current->sighand->siglock);
+       sigorsets(&current->blocked,&current->blocked,&ka->sa.sa_mask);
+       if (!(ka->sa.sa_flags & SA_NODEFER))
                 sigaddset(&current->blocked,sig);
-               recalc_sigpending();
-               spin_unlock_irq(&current->sighand->siglock);
-       }
+       recalc_sigpending();
+       spin_unlock_irq(&current->sighand->siglock);
  }
  
  /*
diff --git a/arch/sh64/kernel/syscalls.S b/arch/sh64/kernel/syscalls.S

index 6aabc63e451841ead9ebb431b332d5a4a65d9ae5..a3d037805f1cf7e7d418e328cca61555e79479dd 100644 (file)
--- a/arch/sh64/kernel/syscalls.S
+++ b/arch/sh64/kernel/syscalls.S
@@ -342,4 +342,9 @@ sys_call_table:
         .long sys_add_key
         .long sys_request_key
         .long sys_keyctl                /* 315 */
+       .long sys_ioprio_set
+       .long sys_ioprio_get
+       .long sys_inotify_init
+       .long sys_inotify_add_watch
+       .long sys_inotify_rm_watch      /* 320 */
  
diff --git a/arch/sh64/mm/fault.c b/arch/sh64/mm/fault.c

index a24932881dbb21b8a37e2cea4c9401cc6df3014a..f08d0eaf6497d34c716d8f0c5ee4baf2aa4eb27c 100644 (file)
--- a/arch/sh64/mm/fault.c
+++ b/arch/sh64/mm/fault.c
@@ -223,13 +223,13 @@ good_area:
          */
  survive:
         switch (handle_mm_fault(mm, vma, address, writeaccess)) {
-       case 1:
+       case VM_FAULT_MINOR:
                 tsk->min_flt++;
                 break;
-       case 2:
+       case VM_FAULT_MAJOR:
                 tsk->maj_flt++;
                 break;
-       case 0:
+       case VM_FAULT_SIGBUS:
                 goto do_sigbus;
         default:
                 goto out_of_memory;
diff --git a/arch/sparc/kernel/setup.c b/arch/sparc/kernel/setup.c

index 55352ed85e8afe3187ec14ea00758e360d0e3d8e..53c192a4982f7210d8bd5587c2f0d99987a86b7b 100644 (file)
--- a/arch/sparc/kernel/setup.c
+++ b/arch/sparc/kernel/setup.c
@@ -32,7 +32,6 @@
  #include <linux/spinlock.h>
  #include <linux/root_dev.h>
  
-#include <asm/segment.h>
  #include <asm/system.h>
  #include <asm/io.h>
  #include <asm/processor.h>
diff --git a/arch/sparc/kernel/signal.c b/arch/sparc/kernel/signal.c

index 011ff35057a5149f332f2b928bb7e052b20d4292..5f34d7dc2b898672a4a8bfccd40940a647af0170 100644 (file)
--- a/arch/sparc/kernel/signal.c
+++ b/arch/sparc/kernel/signal.c
@@ -1034,13 +1034,12 @@ handle_signal(unsigned long signr, struct k_sigaction *ka,
                 else
                         setup_frame(&ka->sa, regs, signr, oldset, info);
         }
-       if (!(ka->sa.sa_flags & SA_NOMASK)) {
-               spin_lock_irq(&current->sighand->siglock);
-               sigorsets(&current->blocked,&current->blocked,&ka->sa.sa_mask);
+       spin_lock_irq(&current->sighand->siglock);
+       sigorsets(&current->blocked,&current->blocked,&ka->sa.sa_mask);
+       if (!(ka->sa.sa_flags & SA_NOMASK))
                 sigaddset(&current->blocked, signr);
-               recalc_sigpending();
-               spin_unlock_irq(&current->sighand->siglock);
-       }
+       recalc_sigpending();
+       spin_unlock_irq(&current->sighand->siglock);
  }
  
  static inline void syscall_restart(unsigned long orig_i0, struct pt_regs *regs,
diff --git a/arch/sparc/kernel/sparc_ksyms.c b/arch/sparc/kernel/sparc_ksyms.c

index 1bd430d0ca0629108135262bbcba06b986a6c005..8faa8dc4de435cd7850170a5ea266f19cd00ac6b 100644 (file)
--- a/arch/sparc/kernel/sparc_ksyms.c
+++ b/arch/sparc/kernel/sparc_ksyms.c
@@ -98,8 +98,9 @@ extern void ___rw_write_enter(void);
   * The module references will be fixed up by module_frob_arch_sections.
   */
  #define DOT_ALIAS2(__ret, __x, __arg1, __arg2) \
-       extern __ret __x(__arg1, __arg2) \
-                    __attribute__((weak, alias("." # __x)));
+       extern __ret __x(__arg1, __arg2); \
+       asm(".weak " #__x);\
+       asm(#__x "=." #__x);
  
  DOT_ALIAS2(int, div, int, int)
  DOT_ALIAS2(int, mul, int, int)
diff --git a/arch/sparc/kernel/tick14.c b/arch/sparc/kernel/tick14.c

index fd8005a3e6bde45ea8ff3fc4bc6d0cc08c6b0f79..591547af4c656d801cff84635e8e8bb48e372566 100644 (file)
--- a/arch/sparc/kernel/tick14.c
+++ b/arch/sparc/kernel/tick14.c
@@ -19,7 +19,6 @@
  #include <linux/interrupt.h>
  
  #include <asm/oplib.h>
-#include <asm/segment.h>
  #include <asm/timer.h>
  #include <asm/mostek.h>
  #include <asm/system.h>
diff --git a/arch/sparc/kernel/time.c b/arch/sparc/kernel/time.c

index 6486cbf2efe9fd5a62d1aebd8ad12f30c1402759..3b759aefc170e39b69f76d93487da243b46c1dc4 100644 (file)
--- a/arch/sparc/kernel/time.c
+++ b/arch/sparc/kernel/time.c
@@ -32,7 +32,6 @@
  #include <linux/profile.h>
  
  #include <asm/oplib.h>
-#include <asm/segment.h>
  #include <asm/timer.h>
  #include <asm/mostek.h>
  #include <asm/system.h>
diff --git a/arch/sparc/mm/fault.c b/arch/sparc/mm/fault.c

index 37f4107bae667b73c7bc14b05f85df48646347f2..2bbd53f3cafb2c461e111d10351e2436bd1a19bb 100644 (file)
--- a/arch/sparc/mm/fault.c
+++ b/arch/sparc/mm/fault.c
@@ -23,7 +23,6 @@
  #include <linux/module.h>
  
  #include <asm/system.h>
-#include <asm/segment.h>
  #include <asm/page.h>
  #include <asm/pgtable.h>
  #include <asm/memreg.h>
diff --git a/arch/sparc/mm/init.c b/arch/sparc/mm/init.c

index ec2e05028a10d7ff7804ef912833942e1e0fd20a..c03babaa0498b95787eac1bd3ec2244364208339 100644 (file)
--- a/arch/sparc/mm/init.c
+++ b/arch/sparc/mm/init.c
@@ -25,7 +25,6 @@
  #include <linux/bootmem.h>
  
  #include <asm/system.h>
-#include <asm/segment.h>
  #include <asm/vac-ops.h>
  #include <asm/page.h>
  #include <asm/pgtable.h>
diff --git a/arch/sparc64/kernel/Makefile b/arch/sparc64/kernel/Makefile

index 093281bdf85f688c7a867daf0283a2df90e921b9..6f00ab8b9d23ab6b79897acca6899b559c4dbaa4 100644 (file)
--- a/arch/sparc64/kernel/Makefile
+++ b/arch/sparc64/kernel/Makefile
@@ -8,7 +8,7 @@ EXTRA_CFLAGS := -Werror
  extra-y                := head.o init_task.o vmlinux.lds
  
  obj-y          := process.o setup.o cpu.o idprom.o \
-                  traps.o devices.o auxio.o \
+                  traps.o devices.o auxio.o una_asm.o \
                    irq.o ptrace.o time.o sys_sparc.o signal.o \
                    unaligned.o central.o pci.o starfire.o semaphore.o \
                    power.o sbus.o iommu_common.o sparc64_ksyms.o chmc.o
diff --git a/arch/sparc64/kernel/entry.S b/arch/sparc64/kernel/entry.S

index 88332f00094ac5d00ae9f0e2acdc0adbbdf595b5..cecdc0a7521f1d36fce506ab64e49ca41136d879 100644 (file)
--- a/arch/sparc64/kernel/entry.S
+++ b/arch/sparc64/kernel/entry.S
@@ -21,6 +21,7 @@
  #include <asm/visasm.h>
  #include <asm/estate.h>
  #include <asm/auxio.h>
+#include <asm/sfafsr.h>
  
  #define curptr      g6
  
@@ -690,14 +691,159 @@ netbsd_syscall:
         retl
          nop
  
-       /* These next few routines must be sure to clear the
-        * SFSR FaultValid bit so that the fast tlb data protection
-        * handler does not flush the wrong context and lock up the
-        * box.
+       /* We need to carefully read the error status, ACK
+        * the errors, prevent recursive traps, and pass the
+        * information on to C code for logging.
+        *
+        * We pass the AFAR in as-is, and we encode the status
+        * information as described in asm-sparc64/sfafsr.h
+        */
+       .globl          __spitfire_access_error
+__spitfire_access_error:
+       /* Disable ESTATE error reporting so that we do not
+        * take recursive traps and RED state the processor.
+        */
+       stxa            %g0, [%g0] ASI_ESTATE_ERROR_EN
+       membar          #Sync
+
+       mov             UDBE_UE, %g1
+       ldxa            [%g0] ASI_AFSR, %g4     ! Get AFSR
+
+       /* __spitfire_cee_trap branches here with AFSR in %g4 and
+        * UDBE_CE in %g1.  It only clears ESTATE_ERR_CE in the
+        * ESTATE Error Enable register.
+        */
+__spitfire_cee_trap_continue:
+       ldxa            [%g0] ASI_AFAR, %g5     ! Get AFAR
+
+       rdpr            %tt, %g3
+       and             %g3, 0x1ff, %g3         ! Paranoia
+       sllx            %g3, SFSTAT_TRAP_TYPE_SHIFT, %g3
+       or              %g4, %g3, %g4
+       rdpr            %tl, %g3
+       cmp             %g3, 1
+       mov             1, %g3
+       bleu            %xcc, 1f
+        sllx           %g3, SFSTAT_TL_GT_ONE_SHIFT, %g3
+
+       or              %g4, %g3, %g4
+
+       /* Read in the UDB error register state, clearing the
+        * sticky error bits as-needed.  We only clear them if
+        * the UE bit is set.  Likewise, __spitfire_cee_trap
+        * below will only do so if the CE bit is set.
+        *
+        * NOTE: UltraSparc-I/II have high and low UDB error
+        *       registers, corresponding to the two UDB units
+        *       present on those chips.  UltraSparc-IIi only
+        *       has a single UDB, called "SDB" in the manual.
+        *       For IIi the upper UDB register always reads
+        *       as zero so for our purposes things will just
+        *       work with the checks below.
          */
-       .globl          __do_data_access_exception
-       .globl          __do_data_access_exception_tl1
-__do_data_access_exception_tl1:
+1:     ldxa            [%g0] ASI_UDBH_ERROR_R, %g3
+       and             %g3, 0x3ff, %g7         ! Paranoia
+       sllx            %g7, SFSTAT_UDBH_SHIFT, %g7
+       or              %g4, %g7, %g4
+       andcc           %g3, %g1, %g3           ! UDBE_UE or UDBE_CE
+       be,pn           %xcc, 1f
+        nop
+       stxa            %g3, [%g0] ASI_UDB_ERROR_W
+       membar          #Sync
+
+1:     mov             0x18, %g3
+       ldxa            [%g3] ASI_UDBL_ERROR_R, %g3
+       and             %g3, 0x3ff, %g7         ! Paranoia
+       sllx            %g7, SFSTAT_UDBL_SHIFT, %g7
+       or              %g4, %g7, %g4
+       andcc           %g3, %g1, %g3           ! UDBE_UE or UDBE_CE
+       be,pn           %xcc, 1f
+        nop
+       mov             0x18, %g7
+       stxa            %g3, [%g7] ASI_UDB_ERROR_W
+       membar          #Sync
+
+1:     /* Ok, now that we've latched the error state,
+        * clear the sticky bits in the AFSR.
+        */
+       stxa            %g4, [%g0] ASI_AFSR
+       membar          #Sync
+
+       rdpr            %tl, %g2
+       cmp             %g2, 1
+       rdpr            %pil, %g2
+       bleu,pt         %xcc, 1f
+        wrpr           %g0, 15, %pil
+
+       ba,pt           %xcc, etraptl1
+        rd             %pc, %g7
+
+       ba,pt           %xcc, 2f
+        nop
+
+1:     ba,pt           %xcc, etrap_irq
+        rd             %pc, %g7
+
+2:     mov             %l4, %o1
+       mov             %l5, %o2
+       call            spitfire_access_error
+        add            %sp, PTREGS_OFF, %o0
+       ba,pt           %xcc, rtrap
+        clr            %l6
+
+       /* This is the trap handler entry point for ECC correctable
+        * errors.  They are corrected, but we listen for the trap
+        * so that the event can be logged.
+        *
+        * Disrupting errors are either:
+        * 1) single-bit ECC errors during UDB reads to system
+        *    memory
+        * 2) data parity errors during write-back events
+        *
+        * As far as I can make out from the manual, the CEE trap
+        * is only for correctable errors during memory read
+        * accesses by the front-end of the processor.
+        *
+        * The code below is only for trap level 1 CEE events,
+        * as it is the only situation where we can safely record
+        * and log.  For trap level >1 we just clear the CE bit
+        * in the AFSR and return.
+        *
+        * This is just like __spiftire_access_error above, but it
+        * specifically handles correctable errors.  If an
+        * uncorrectable error is indicated in the AFSR we
+        * will branch directly above to __spitfire_access_error
+        * to handle it instead.  Uncorrectable therefore takes
+        * priority over correctable, and the error logging
+        * C code will notice this case by inspecting the
+        * trap type.
+        */
+       .globl          __spitfire_cee_trap
+__spitfire_cee_trap:
+       ldxa            [%g0] ASI_AFSR, %g4     ! Get AFSR
+       mov             1, %g3
+       sllx            %g3, SFAFSR_UE_SHIFT, %g3
+       andcc           %g4, %g3, %g0           ! Check for UE
+       bne,pn          %xcc, __spitfire_access_error
+        nop
+
+       /* Ok, in this case we only have a correctable error.
+        * Indicate we only wish to capture that state in register
+        * %g1, and we only disable CE error reporting unlike UE
+        * handling which disables all errors.
+        */
+       ldxa            [%g0] ASI_ESTATE_ERROR_EN, %g3
+       andn            %g3, ESTATE_ERR_CE, %g3
+       stxa            %g3, [%g0] ASI_ESTATE_ERROR_EN
+       membar          #Sync
+
+       /* Preserve AFSR in %g4, indicate UDB state to capture in %g1 */
+       ba,pt           %xcc, __spitfire_cee_trap_continue
+        mov            UDBE_CE, %g1
+
+       .globl          __spitfire_data_access_exception
+       .globl          __spitfire_data_access_exception_tl1
+__spitfire_data_access_exception_tl1:
         rdpr            %pstate, %g4
         wrpr            %g4, PSTATE_MG|PSTATE_AG, %pstate
         mov             TLB_SFSR, %g3
@@ -706,9 +852,25 @@ __do_data_access_exception_tl1:
         ldxa            [%g5] ASI_DMMU, %g5     ! Get SFAR
         stxa            %g0, [%g3] ASI_DMMU     ! Clear SFSR.FaultValid bit
         membar          #Sync
+       rdpr            %tt, %g3
+       cmp             %g3, 0x80               ! first win spill/fill trap
+       blu,pn          %xcc, 1f
+        cmp            %g3, 0xff               ! last win spill/fill trap
+       bgu,pn          %xcc, 1f
+        nop
         ba,pt           %xcc, winfix_dax
          rdpr           %tpc, %g3
-__do_data_access_exception:
+1:     sethi           %hi(109f), %g7
+       ba,pt           %xcc, etraptl1
+109:    or             %g7, %lo(109b), %g7
+       mov             %l4, %o1
+       mov             %l5, %o2
+       call            spitfire_data_access_exception_tl1
+        add            %sp, PTREGS_OFF, %o0
+       ba,pt           %xcc, rtrap
+        clr            %l6
+
+__spitfire_data_access_exception:
         rdpr            %pstate, %g4
         wrpr            %g4, PSTATE_MG|PSTATE_AG, %pstate
         mov             TLB_SFSR, %g3
@@ -722,20 +884,19 @@ __do_data_access_exception:
  109:    or             %g7, %lo(109b), %g7
         mov             %l4, %o1
         mov             %l5, %o2
-       call            data_access_exception
+       call            spitfire_data_access_exception
          add            %sp, PTREGS_OFF, %o0
         ba,pt           %xcc, rtrap
          clr            %l6
  
-       .globl          __do_instruction_access_exception
-       .globl          __do_instruction_access_exception_tl1
-__do_instruction_access_exception_tl1:
+       .globl          __spitfire_insn_access_exception
+       .globl          __spitfire_insn_access_exception_tl1
+__spitfire_insn_access_exception_tl1:
         rdpr            %pstate, %g4
         wrpr            %g4, PSTATE_MG|PSTATE_AG, %pstate
         mov             TLB_SFSR, %g3
-       mov             DMMU_SFAR, %g5
-       ldxa            [%g3] ASI_DMMU, %g4     ! Get SFSR
-       ldxa            [%g5] ASI_DMMU, %g5     ! Get SFAR
+       ldxa            [%g3] ASI_IMMU, %g4     ! Get SFSR
+       rdpr            %tpc, %g5               ! IMMU has no SFAR, use TPC
         stxa            %g0, [%g3] ASI_IMMU     ! Clear FaultValid bit
         membar          #Sync
         sethi           %hi(109f), %g7
@@ -743,18 +904,17 @@ __do_instruction_access_exception_tl1:
  109:    or             %g7, %lo(109b), %g7
         mov             %l4, %o1
         mov             %l5, %o2
-       call            instruction_access_exception_tl1
+       call            spitfire_insn_access_exception_tl1
          add            %sp, PTREGS_OFF, %o0
         ba,pt           %xcc, rtrap
          clr            %l6
  
-__do_instruction_access_exception:
+__spitfire_insn_access_exception:
         rdpr            %pstate, %g4
         wrpr            %g4, PSTATE_MG|PSTATE_AG, %pstate
         mov             TLB_SFSR, %g3
-       mov             DMMU_SFAR, %g5
-       ldxa            [%g3] ASI_DMMU, %g4     ! Get SFSR
-       ldxa            [%g5] ASI_DMMU, %g5     ! Get SFAR
+       ldxa            [%g3] ASI_IMMU, %g4     ! Get SFSR
+       rdpr            %tpc, %g5               ! IMMU has no SFAR, use TPC
         stxa            %g0, [%g3] ASI_IMMU     ! Clear FaultValid bit
         membar          #Sync
         sethi           %hi(109f), %g7
@@ -762,102 +922,11 @@ __do_instruction_access_exception:
  109:    or             %g7, %lo(109b), %g7
         mov             %l4, %o1
         mov             %l5, %o2
-       call            instruction_access_exception
+       call            spitfire_insn_access_exception
          add            %sp, PTREGS_OFF, %o0
         ba,pt           %xcc, rtrap
          clr            %l6
  
-       /* This is the trap handler entry point for ECC correctable
-        * errors.  They are corrected, but we listen for the trap
-        * so that the event can be logged.
-        *
-        * Disrupting errors are either:
-        * 1) single-bit ECC errors during UDB reads to system
-        *    memory
-        * 2) data parity errors during write-back events
-        *
-        * As far as I can make out from the manual, the CEE trap
-        * is only for correctable errors during memory read
-        * accesses by the front-end of the processor.
-        *
-        * The code below is only for trap level 1 CEE events,
-        * as it is the only situation where we can safely record
-        * and log.  For trap level >1 we just clear the CE bit
-        * in the AFSR and return.
-        */
-
-       /* Our trap handling infrastructure allows us to preserve
-        * two 64-bit values during etrap for arguments to
-        * subsequent C code.  Therefore we encode the information
-        * as follows:
-        *
-        * value 1) Full 64-bits of AFAR
-        * value 2) Low 33-bits of AFSR, then bits 33-->42
-        *          are UDBL error status and bits 43-->52
-        *          are UDBH error status
-        */
-       .align  64
-       .globl  cee_trap
-cee_trap:
-       ldxa    [%g0] ASI_AFSR, %g1             ! Read AFSR
-       ldxa    [%g0] ASI_AFAR, %g2             ! Read AFAR
-       sllx    %g1, 31, %g1                    ! Clear reserved bits
-       srlx    %g1, 31, %g1                    ! in AFSR
-
-       /* NOTE: UltraSparc-I/II have high and low UDB error
-        *       registers, corresponding to the two UDB units
-        *       present on those chips.  UltraSparc-IIi only
-        *       has a single UDB, called "SDB" in the manual.
-        *       For IIi the upper UDB register always reads
-        *       as zero so for our purposes things will just
-        *       work with the checks below.
-        */
-       ldxa    [%g0] ASI_UDBL_ERROR_R, %g3     ! Read UDB-Low error status
-       andcc   %g3, (1 << 8), %g4              ! Check CE bit
-       sllx    %g3, (64 - 10), %g3             ! Clear reserved bits
-       srlx    %g3, (64 - 10), %g3             ! in UDB-Low error status
-
-       sllx    %g3, (33 + 0), %g3              ! Shift up to encoding area
-       or      %g1, %g3, %g1                   ! Or it in
-       be,pn   %xcc, 1f                        ! Branch if CE bit was clear
-        nop
-       stxa    %g4, [%g0] ASI_UDB_ERROR_W      ! Clear CE sticky bit in UDBL
-       membar  #Sync                           ! Synchronize ASI stores
-1:     mov     0x18, %g5                       ! Addr of UDB-High error status
-       ldxa    [%g5] ASI_UDBH_ERROR_R, %g3     ! Read it
-
-       andcc   %g3, (1 << 8), %g4              ! Check CE bit
-       sllx    %g3, (64 - 10), %g3             ! Clear reserved bits
-       srlx    %g3, (64 - 10), %g3             ! in UDB-High error status
-       sllx    %g3, (33 + 10), %g3             ! Shift up to encoding area
-       or      %g1, %g3, %g1                   ! Or it in
-       be,pn   %xcc, 1f                        ! Branch if CE bit was clear
-        nop
-       nop
-
-       stxa    %g4, [%g5] ASI_UDB_ERROR_W      ! Clear CE sticky bit in UDBH
-       membar  #Sync                           ! Synchronize ASI stores
-1:     mov     1, %g5                          ! AFSR CE bit is
-       sllx    %g5, 20, %g5                    ! bit 20
-       stxa    %g5, [%g0] ASI_AFSR             ! Clear CE sticky bit in AFSR
-       membar  #Sync                           ! Synchronize ASI stores
-       sllx    %g2, (64 - 41), %g2             ! Clear reserved bits
-       srlx    %g2, (64 - 41), %g2             ! in latched AFAR
-
-       andn    %g2, 0x0f, %g2                  ! Finish resv bit clearing
-       mov     %g1, %g4                        ! Move AFSR+UDB* into save reg
-       mov     %g2, %g5                        ! Move AFAR into save reg
-       rdpr    %pil, %g2
-       wrpr    %g0, 15, %pil
-       ba,pt   %xcc, etrap_irq
-        rd     %pc, %g7
-       mov     %l4, %o0
-
-       mov     %l5, %o1
-       call    cee_log
-        add    %sp, PTREGS_OFF, %o2
-       ba,a,pt %xcc, rtrap_irq
-
         /* Capture I/D/E-cache state into per-cpu error scoreboard.
          *
          * %g1:         (TL>=0) ? 1 : 0
diff --git a/arch/sparc64/kernel/pci.c b/arch/sparc64/kernel/pci.c

index bba140d98b1b8c1802664f625b102fb532339f9d..f21c993f885616cf963e8fe804b7303ecdc1e6bc 100644 (file)
--- a/arch/sparc64/kernel/pci.c
+++ b/arch/sparc64/kernel/pci.c
@@ -540,6 +540,7 @@ void pcibios_bus_to_resource(struct pci_dev *pdev, struct resource *res,
  
         pbm->parent->resource_adjust(pdev, res, root);
  }
+EXPORT_SYMBOL(pcibios_bus_to_resource);
  
  char * __init pcibios_setup(char *str)
  {
diff --git a/arch/sparc64/kernel/pci_iommu.c b/arch/sparc64/kernel/pci_iommu.c

index 2803bc7c2c798af12fc0d57b701991bc51727995..425c60cfea195a2b57f4931ceb42a5cb94b970f1 100644 (file)
--- a/arch/sparc64/kernel/pci_iommu.c
+++ b/arch/sparc64/kernel/pci_iommu.c
@@ -466,7 +466,7 @@ do_flush_sync:
                 if (!limit)
                         break;
                 udelay(1);
-               membar("#LoadLoad");
+               rmb();
         }
         if (!limit)
                 printk(KERN_WARNING "pci_strbuf_flush: flushflag timeout "
diff --git a/arch/sparc64/kernel/process.c b/arch/sparc64/kernel/process.c

index 07424b075938b7651e53380cddcf712c42b324e0..66255434128a2e6ebf6b144d5d030003fe4ebaca 100644 (file)
--- a/arch/sparc64/kernel/process.c
+++ b/arch/sparc64/kernel/process.c
@@ -103,7 +103,7 @@ void cpu_idle(void)
                  * other cpus see our increasing idleness for the buddy
                  * redistribution algorithm.  -DaveM
                  */
-               membar("#StoreStore | #StoreLoad");
+               membar_storeload_storestore();
         }
  }
  
diff --git a/arch/sparc64/kernel/sbus.c b/arch/sparc64/kernel/sbus.c

index 89f5e019f24c02ade1ed11ab0c6ed721de1b4bfd..e09ddf927655e4f608bdf0cae6e349a26cf6f172 100644 (file)
--- a/arch/sparc64/kernel/sbus.c
+++ b/arch/sparc64/kernel/sbus.c
@@ -147,7 +147,7 @@ static void sbus_strbuf_flush(struct sbus_iommu *iommu, u32 base, unsigned long
                 if (!limit)
                         break;
                 udelay(1);
-               membar("#LoadLoad");
+               rmb();
         }
         if (!limit)
                 printk(KERN_WARNING "sbus_strbuf_flush: flushflag timeout "
diff --git a/arch/sparc64/kernel/setup.c b/arch/sparc64/kernel/setup.c

index b7e6a91952b213dacfe1ce43e2497df4192de634..fbdfed3798d883667c7ab70715c0f740075898d2 100644 (file)
--- a/arch/sparc64/kernel/setup.c
+++ b/arch/sparc64/kernel/setup.c
@@ -33,7 +33,6 @@
  #include <linux/cpu.h>
  #include <linux/initrd.h>
  
-#include <asm/segment.h>
  #include <asm/system.h>
  #include <asm/io.h>
  #include <asm/processor.h>
diff --git a/arch/sparc64/kernel/signal.c b/arch/sparc64/kernel/signal.c

index b27934671c35324b713d355b6119541b3c8e69df..60f5dfabb1e173bcc08064c54cd64addf16c5030 100644 (file)
--- a/arch/sparc64/kernel/signal.c
+++ b/arch/sparc64/kernel/signal.c
@@ -574,13 +574,12 @@ static inline void handle_signal(unsigned long signr, struct k_sigaction *ka,
  {
         setup_rt_frame(ka, regs, signr, oldset,
                        (ka->sa.sa_flags & SA_SIGINFO) ? info : NULL);
-       if (!(ka->sa.sa_flags & SA_NOMASK)) {
-               spin_lock_irq(&current->sighand->siglock);
-               sigorsets(&current->blocked,&current->blocked,&ka->sa.sa_mask);
+       spin_lock_irq(&current->sighand->siglock);
+       sigorsets(&current->blocked,&current->blocked,&ka->sa.sa_mask);
+       if (!(ka->sa.sa_flags & SA_NOMASK))
                 sigaddset(&current->blocked,signr);
-               recalc_sigpending();
-               spin_unlock_irq(&current->sighand->siglock);
-       }
+       recalc_sigpending();
+       spin_unlock_irq(&current->sighand->siglock);
  }
  
  static inline void syscall_restart(unsigned long orig_i0, struct pt_regs *regs,
diff --git a/arch/sparc64/kernel/signal32.c b/arch/sparc64/kernel/signal32.c

index f28428f4170e67449259ce79e6a6ab1b0da6b2ca..aecccd0df1d129a6981ae85d86ef87d11bf47638 100644 (file)
--- a/arch/sparc64/kernel/signal32.c
+++ b/arch/sparc64/kernel/signal32.c
@@ -877,11 +877,12 @@ static void new_setup_frame32(struct k_sigaction *ka, struct pt_regs *regs,
                         unsigned long page = (unsigned long)
                                 page_address(pte_page(*ptep));
  
-                       __asm__ __volatile__(
-                       "       membar  #StoreStore\n"
-                       "       flush   %0 + %1"
-                       : : "r" (page), "r" (address & (PAGE_SIZE - 1))
-                       : "memory");
+                       wmb();
+                       __asm__ __volatile__("flush     %0 + %1"
+                                            : /* no outputs */
+                                            : "r" (page),
+                                              "r" (address & (PAGE_SIZE - 1))
+                                            : "memory");
                 }
                 pte_unmap(ptep);
                 preempt_enable();
@@ -1292,11 +1293,12 @@ static void setup_rt_frame32(struct k_sigaction *ka, struct pt_regs *regs,
                         unsigned long page = (unsigned long)
                                 page_address(pte_page(*ptep));
  
-                       __asm__ __volatile__(
-                       "       membar  #StoreStore\n"
-                       "       flush   %0 + %1"
-                       : : "r" (page), "r" (address & (PAGE_SIZE - 1))
-                       : "memory");
+                       wmb();
+                       __asm__ __volatile__("flush     %0 + %1"
+                                            : /* no outputs */
+                                            : "r" (page),
+                                              "r" (address & (PAGE_SIZE - 1))
+                                            : "memory");
                 }
                 pte_unmap(ptep);
                 preempt_enable();
@@ -1325,13 +1327,12 @@ static inline void handle_signal32(unsigned long signr, struct k_sigaction *ka,
                 else
                         setup_frame32(&ka->sa, regs, signr, oldset, info);
         }
-       if (!(ka->sa.sa_flags & SA_NOMASK)) {
-               spin_lock_irq(&current->sighand->siglock);
-               sigorsets(&current->blocked,&current->blocked,&ka->sa.sa_mask);
+       spin_lock_irq(&current->sighand->siglock);
+       sigorsets(&current->blocked,&current->blocked,&ka->sa.sa_mask);
+       if (!(ka->sa.sa_flags & SA_NOMASK))
                 sigaddset(&current->blocked,signr);
-               recalc_sigpending();
-               spin_unlock_irq(&current->sighand->siglock);
-       }
+       recalc_sigpending();
+       spin_unlock_irq(&current->sighand->siglock);
  }
  
  static inline void syscall_restart32(unsigned long orig_i0, struct pt_regs *regs,
diff --git a/arch/sparc64/kernel/smp.c b/arch/sparc64/kernel/smp.c

index b9b42491e118b6e273f40889de677ffb5d98cc1f..b4fc6a5462b2192065052a1693d194e375066910 100644 (file)
--- a/arch/sparc64/kernel/smp.c
+++ b/arch/sparc64/kernel/smp.c
@@ -144,7 +144,7 @@ void __init smp_callin(void)
         current->active_mm = &init_mm;
  
         while (!cpu_isset(cpuid, smp_commenced_mask))
-               membar("#LoadLoad");
+               rmb();
  
         cpu_set(cpuid, cpu_online_map);
  }
@@ -184,11 +184,11 @@ static inline long get_delta (long *rt, long *master)
         for (i = 0; i < NUM_ITERS; i++) {
                 t0 = tick_ops->get_tick();
                 go[MASTER] = 1;
-               membar("#StoreLoad");
+               membar_storeload();
                 while (!(tm = go[SLAVE]))
-                       membar("#LoadLoad");
+                       rmb();
                 go[SLAVE] = 0;
-               membar("#StoreStore");
+               wmb();
                 t1 = tick_ops->get_tick();
  
                 if (t1 - t0 < best_t1 - best_t0)
@@ -221,7 +221,7 @@ void smp_synchronize_tick_client(void)
         go[MASTER] = 1;
  
         while (go[MASTER])
-               membar("#LoadLoad");
+               rmb();
  
         local_irq_save(flags);
         {
@@ -273,21 +273,21 @@ static void smp_synchronize_one_tick(int cpu)
  
         /* wait for client to be ready */
         while (!go[MASTER])
-               membar("#LoadLoad");
+               rmb();
  
         /* now let the client proceed into his loop */
         go[MASTER] = 0;
-       membar("#StoreLoad");
+       membar_storeload();
  
         spin_lock_irqsave(&itc_sync_lock, flags);
         {
                 for (i = 0; i < NUM_ROUNDS*NUM_ITERS; i++) {
                         while (!go[MASTER])
-                               membar("#LoadLoad");
+                               rmb();
                         go[MASTER] = 0;
-                       membar("#StoreStore");
+                       wmb();
                         go[SLAVE] = tick_ops->get_tick();
-                       membar("#StoreLoad");
+                       membar_storeload();
                 }
         }
         spin_unlock_irqrestore(&itc_sync_lock, flags);
@@ -927,11 +927,11 @@ void smp_capture(void)
                        smp_processor_id());
  #endif
                 penguins_are_doing_time = 1;
-               membar("#StoreStore | #LoadStore");
+               membar_storestore_loadstore();
                 atomic_inc(&smp_capture_registry);
                 smp_cross_call(&xcall_capture, 0, 0, 0);
                 while (atomic_read(&smp_capture_registry) != ncpus)
-                       membar("#LoadLoad");
+                       rmb();
  #ifdef CAPTURE_DEBUG
                 printk("done\n");
  #endif
@@ -947,7 +947,7 @@ void smp_release(void)
                        smp_processor_id());
  #endif
                 penguins_are_doing_time = 0;
-               membar("#StoreStore | #StoreLoad");
+               membar_storeload_storestore();
                 atomic_dec(&smp_capture_registry);
         }
  }
@@ -970,9 +970,9 @@ void smp_penguin_jailcell(int irq, struct pt_regs *regs)
         save_alternate_globals(global_save);
         prom_world(1);
         atomic_inc(&smp_capture_registry);
-       membar("#StoreLoad | #StoreStore");
+       membar_storeload_storestore();
         while (penguins_are_doing_time)
-               membar("#LoadLoad");
+               rmb();
         restore_alternate_globals(global_save);
         atomic_dec(&smp_capture_registry);
         prom_world(0);
diff --git a/arch/sparc64/kernel/sparc64_ksyms.c b/arch/sparc64/kernel/sparc64_ksyms.c

index 9202d925a9ce6961816b5920f5cd9e5bc8758c58..a3ea697f1adbffb88f7de3ec9d5c4a8fe969da19 100644 (file)
--- a/arch/sparc64/kernel/sparc64_ksyms.c
+++ b/arch/sparc64/kernel/sparc64_ksyms.c
@@ -99,17 +99,6 @@ extern int __ashrdi3(int, int);
  extern void dump_thread(struct pt_regs *, struct user *);
  extern int dump_fpu (struct pt_regs * regs, elf_fpregset_t * fpregs);
  
-#if defined(CONFIG_SMP) && defined(CONFIG_DEBUG_SPINLOCK)
-extern void _do_spin_lock (spinlock_t *lock, char *str);
-extern void _do_spin_unlock (spinlock_t *lock);
-extern int _spin_trylock (spinlock_t *lock);
-extern void _do_read_lock(rwlock_t *rw, char *str);
-extern void _do_read_unlock(rwlock_t *rw, char *str);
-extern void _do_write_lock(rwlock_t *rw, char *str);
-extern void _do_write_unlock(rwlock_t *rw);
-extern int _do_write_trylock(rwlock_t *rw, char *str);
-#endif
-
  extern unsigned long phys_base;
  extern unsigned long pfn_base;
  
@@ -152,18 +141,6 @@ EXPORT_SYMBOL(_mcount);
  EXPORT_SYMBOL(cpu_online_map);
  EXPORT_SYMBOL(phys_cpu_present_map);
  
-/* Spinlock debugging library, optional. */
-#ifdef CONFIG_DEBUG_SPINLOCK
-EXPORT_SYMBOL(_do_spin_lock);
-EXPORT_SYMBOL(_do_spin_unlock);
-EXPORT_SYMBOL(_spin_trylock);
-EXPORT_SYMBOL(_do_read_lock);
-EXPORT_SYMBOL(_do_read_unlock);
-EXPORT_SYMBOL(_do_write_lock);
-EXPORT_SYMBOL(_do_write_unlock);
-EXPORT_SYMBOL(_do_write_trylock);
-#endif
-
  EXPORT_SYMBOL(smp_call_function);
  #endif /* CONFIG_SMP */
  
@@ -429,3 +406,12 @@ EXPORT_SYMBOL(xor_vis_4);
  EXPORT_SYMBOL(xor_vis_5);
  
  EXPORT_SYMBOL(prom_palette);
+
+/* memory barriers */
+EXPORT_SYMBOL(mb);
+EXPORT_SYMBOL(rmb);
+EXPORT_SYMBOL(wmb);
+EXPORT_SYMBOL(membar_storeload);
+EXPORT_SYMBOL(membar_storeload_storestore);
+EXPORT_SYMBOL(membar_storeload_loadload);
+EXPORT_SYMBOL(membar_storestore_loadstore);
diff --git a/arch/sparc64/kernel/traps.c b/arch/sparc64/kernel/traps.c

index 100b0107c4be89b1484c8b370dd8898871cfe10f..b280b2ef674f4a80b57cfaf36dec0140be727a66 100644 (file)
--- a/arch/sparc64/kernel/traps.c
+++ b/arch/sparc64/kernel/traps.c
@@ -33,6 +33,7 @@
  #include <asm/dcu.h>
  #include <asm/estate.h>
  #include <asm/chafsr.h>
+#include <asm/sfafsr.h>
  #include <asm/psrcompat.h>
  #include <asm/processor.h>
  #include <asm/timer.h>
@@ -143,8 +144,7 @@ void do_BUG(const char *file, int line)
  }
  #endif
  
-void instruction_access_exception(struct pt_regs *regs,
-                                 unsigned long sfsr, unsigned long sfar)
+void spitfire_insn_access_exception(struct pt_regs *regs, unsigned long sfsr, unsigned long sfar)
  {
         siginfo_t info;
  
@@ -153,8 +153,8 @@ void instruction_access_exception(struct pt_regs *regs,
                 return;
  
         if (regs->tstate & TSTATE_PRIV) {
-               printk("instruction_access_exception: SFSR[%016lx] SFAR[%016lx], going.\n",
-                      sfsr, sfar);
+               printk("spitfire_insn_access_exception: SFSR[%016lx] "
+                      "SFAR[%016lx], going.\n", sfsr, sfar);
                 die_if_kernel("Iax", regs);
         }
         if (test_thread_flag(TIF_32BIT)) {
@@ -169,19 +169,17 @@ void instruction_access_exception(struct pt_regs *regs,
         force_sig_info(SIGSEGV, &info, current);
  }
  
-void instruction_access_exception_tl1(struct pt_regs *regs,
-                                     unsigned long sfsr, unsigned long sfar)
+void spitfire_insn_access_exception_tl1(struct pt_regs *regs, unsigned long sfsr, unsigned long sfar)
  {
         if (notify_die(DIE_TRAP_TL1, "instruction access exception tl1", regs,
                        0, 0x8, SIGTRAP) == NOTIFY_STOP)
                 return;
  
         dump_tl1_traplog((struct tl1_traplog *)(regs + 1));
-       instruction_access_exception(regs, sfsr, sfar);
+       spitfire_insn_access_exception(regs, sfsr, sfar);
  }
  
-void data_access_exception(struct pt_regs *regs,
-                          unsigned long sfsr, unsigned long sfar)
+void spitfire_data_access_exception(struct pt_regs *regs, unsigned long sfsr, unsigned long sfar)
  {
         siginfo_t info;
  
@@ -207,8 +205,8 @@ void data_access_exception(struct pt_regs *regs,
                         return;
                 }
                 /* Shit... */
-               printk("data_access_exception: SFSR[%016lx] SFAR[%016lx], going.\n",
-                      sfsr, sfar);
+               printk("spitfire_data_access_exception: SFSR[%016lx] "
+                      "SFAR[%016lx], going.\n", sfsr, sfar);
                 die_if_kernel("Dax", regs);
         }
  
@@ -220,6 +218,16 @@ void data_access_exception(struct pt_regs *regs,
         force_sig_info(SIGSEGV, &info, current);
  }
  
+void spitfire_data_access_exception_tl1(struct pt_regs *regs, unsigned long sfsr, unsigned long sfar)
+{
+       if (notify_die(DIE_TRAP_TL1, "data access exception tl1", regs,
+                      0, 0x30, SIGTRAP) == NOTIFY_STOP)
+               return;
+
+       dump_tl1_traplog((struct tl1_traplog *)(regs + 1));
+       spitfire_data_access_exception(regs, sfsr, sfar);
+}
+
  #ifdef CONFIG_PCI
  /* This is really pathetic... */
  extern volatile int pci_poke_in_progress;
@@ -253,54 +261,13 @@ static void spitfire_clean_and_reenable_l1_caches(void)
                              : "memory");
  }
  
-void do_iae(struct pt_regs *regs)
+static void spitfire_enable_estate_errors(void)
  {
-       siginfo_t info;
-
-       spitfire_clean_and_reenable_l1_caches();
-
-       if (notify_die(DIE_TRAP, "instruction access exception", regs,
-                      0, 0x8, SIGTRAP) == NOTIFY_STOP)
-               return;
-
-       info.si_signo = SIGBUS;
-       info.si_errno = 0;
-       info.si_code = BUS_OBJERR;
-       info.si_addr = (void *)0;
-       info.si_trapno = 0;
-       force_sig_info(SIGBUS, &info, current);
-}
-
-void do_dae(struct pt_regs *regs)
-{
-       siginfo_t info;
-
-#ifdef CONFIG_PCI
-       if (pci_poke_in_progress && pci_poke_cpu == smp_processor_id()) {
-               spitfire_clean_and_reenable_l1_caches();
-
-               pci_poke_faulted = 1;
-
-               /* Why the fuck did they have to change this? */
-               if (tlb_type == cheetah || tlb_type == cheetah_plus)
-                       regs->tpc += 4;
-
-               regs->tnpc = regs->tpc + 4;
-               return;
-       }
-#endif
-       spitfire_clean_and_reenable_l1_caches();
-
-       if (notify_die(DIE_TRAP, "data access exception", regs,
-                      0, 0x30, SIGTRAP) == NOTIFY_STOP)
-               return;
-
-       info.si_signo = SIGBUS;
-       info.si_errno = 0;
-       info.si_code = BUS_OBJERR;
-       info.si_addr = (void *)0;
-       info.si_trapno = 0;
-       force_sig_info(SIGBUS, &info, current);
+       __asm__ __volatile__("stxa      %0, [%%g0] %1\n\t"
+                            "membar    #Sync"
+                            : /* no outputs */
+                            : "r" (ESTATE_ERR_ALL),
+                              "i" (ASI_ESTATE_ERROR_EN));
  }
  
  static char ecc_syndrome_table[] = {
@@ -338,65 +305,15 @@ static char ecc_syndrome_table[] = {
         0x0b, 0x48, 0x48, 0x4b, 0x48, 0x4b, 0x4b, 0x4a
  };
  
-/* cee_trap in entry.S encodes AFSR/UDBH/UDBL error status
- * in the following format.  The AFAR is left as is, with
- * reserved bits cleared, and is a raw 40-bit physical
- * address.
- */
-#define CE_STATUS_UDBH_UE              (1UL << (43 + 9))
-#define CE_STATUS_UDBH_CE              (1UL << (43 + 8))
-#define CE_STATUS_UDBH_ESYNDR          (0xffUL << 43)
-#define CE_STATUS_UDBH_SHIFT           43
-#define CE_STATUS_UDBL_UE              (1UL << (33 + 9))
-#define CE_STATUS_UDBL_CE              (1UL << (33 + 8))
-#define CE_STATUS_UDBL_ESYNDR          (0xffUL << 33)
-#define CE_STATUS_UDBL_SHIFT           33
-#define CE_STATUS_AFSR_MASK            (0x1ffffffffUL)
-#define CE_STATUS_AFSR_ME              (1UL << 32)
-#define CE_STATUS_AFSR_PRIV            (1UL << 31)
-#define CE_STATUS_AFSR_ISAP            (1UL << 30)
-#define CE_STATUS_AFSR_ETP             (1UL << 29)
-#define CE_STATUS_AFSR_IVUE            (1UL << 28)
-#define CE_STATUS_AFSR_TO              (1UL << 27)
-#define CE_STATUS_AFSR_BERR            (1UL << 26)
-#define CE_STATUS_AFSR_LDP             (1UL << 25)
-#define CE_STATUS_AFSR_CP              (1UL << 24)
-#define CE_STATUS_AFSR_WP              (1UL << 23)
-#define CE_STATUS_AFSR_EDP             (1UL << 22)
-#define CE_STATUS_AFSR_UE              (1UL << 21)
-#define CE_STATUS_AFSR_CE              (1UL << 20)
-#define CE_STATUS_AFSR_ETS             (0xfUL << 16)
-#define CE_STATUS_AFSR_ETS_SHIFT       16
-#define CE_STATUS_AFSR_PSYND           (0xffffUL << 0)
-#define CE_STATUS_AFSR_PSYND_SHIFT     0
-
-/* Layout of Ecache TAG Parity Syndrome of AFSR */
-#define AFSR_ETSYNDROME_7_0            0x1UL /* E$-tag bus bits  <7:0> */
-#define AFSR_ETSYNDROME_15_8           0x2UL /* E$-tag bus bits <15:8> */
-#define AFSR_ETSYNDROME_21_16          0x4UL /* E$-tag bus bits <21:16> */
-#define AFSR_ETSYNDROME_24_22          0x8UL /* E$-tag bus bits <24:22> */
-
  static char *syndrome_unknown = "<Unknown>";
  
-asmlinkage void cee_log(unsigned long ce_status,
-                       unsigned long afar,
-                       struct pt_regs *regs)
+static void spitfire_log_udb_syndrome(unsigned long afar, unsigned long udbh, unsigned long udbl, unsigned long bit)
  {
-       char memmod_str[64];
-       char *p;
-       unsigned short scode, udb_reg;
+       unsigned short scode;
+       char memmod_str[64], *p;
  
-       printk(KERN_WARNING "CPU[%d]: Correctable ECC Error "
-              "AFSR[%lx] AFAR[%016lx] UDBL[%lx] UDBH[%lx]\n",
-              smp_processor_id(),
-              (ce_status & CE_STATUS_AFSR_MASK),
-              afar,
-              ((ce_status >> CE_STATUS_UDBL_SHIFT) & 0x3ffUL),
-              ((ce_status >> CE_STATUS_UDBH_SHIFT) & 0x3ffUL));
-
-       udb_reg = ((ce_status >> CE_STATUS_UDBL_SHIFT) & 0x3ffUL);
-       if (udb_reg & (1 << 8)) {
-               scode = ecc_syndrome_table[udb_reg & 0xff];
+       if (udbl & bit) {
+               scode = ecc_syndrome_table[udbl & 0xff];
                 if (prom_getunumber(scode, afar,
                                     memmod_str, sizeof(memmod_str)) == -1)
                         p = syndrome_unknown;
@@ -407,9 +324,8 @@ asmlinkage void cee_log(unsigned long ce_status,
                        smp_processor_id(), scode, p);
         }
  
-       udb_reg = ((ce_status >> CE_STATUS_UDBH_SHIFT) & 0x3ffUL);
-       if (udb_reg & (1 << 8)) {
-               scode = ecc_syndrome_table[udb_reg & 0xff];
+       if (udbh & bit) {
+               scode = ecc_syndrome_table[udbh & 0xff];
                 if (prom_getunumber(scode, afar,
                                     memmod_str, sizeof(memmod_str)) == -1)
                         p = syndrome_unknown;
@@ -419,6 +335,127 @@ asmlinkage void cee_log(unsigned long ce_status,
                        "Memory Module \"%s\"\n",
                        smp_processor_id(), scode, p);
         }
+
+}
+
+static void spitfire_cee_log(unsigned long afsr, unsigned long afar, unsigned long udbh, unsigned long udbl, int tl1, struct pt_regs *regs)
+{
+
+       printk(KERN_WARNING "CPU[%d]: Correctable ECC Error "
+              "AFSR[%lx] AFAR[%016lx] UDBL[%lx] UDBH[%lx] TL>1[%d]\n",
+              smp_processor_id(), afsr, afar, udbl, udbh, tl1);
+
+       spitfire_log_udb_syndrome(afar, udbh, udbl, UDBE_CE);
+
+       /* We always log it, even if someone is listening for this
+        * trap.
+        */
+       notify_die(DIE_TRAP, "Correctable ECC Error", regs,
+                  0, TRAP_TYPE_CEE, SIGTRAP);
+
+       /* The Correctable ECC Error trap does not disable I/D caches.  So
+        * we only have to restore the ESTATE Error Enable register.
+        */
+       spitfire_enable_estate_errors();
+}
+
+static void spitfire_ue_log(unsigned long afsr, unsigned long afar, unsigned long udbh, unsigned long udbl, unsigned long tt, int tl1, struct pt_regs *regs)
+{
+       siginfo_t info;
+
+       printk(KERN_WARNING "CPU[%d]: Uncorrectable Error AFSR[%lx] "
+              "AFAR[%lx] UDBL[%lx] UDBH[%ld] TT[%lx] TL>1[%d]\n",
+              smp_processor_id(), afsr, afar, udbl, udbh, tt, tl1);
+
+       /* XXX add more human friendly logging of the error status
+        * XXX as is implemented for cheetah
+        */
+
+       spitfire_log_udb_syndrome(afar, udbh, udbl, UDBE_UE);
+
+       /* We always log it, even if someone is listening for this
+        * trap.
+        */
+       notify_die(DIE_TRAP, "Uncorrectable Error", regs,
+                  0, tt, SIGTRAP);
+
+       if (regs->tstate & TSTATE_PRIV) {
+               if (tl1)
+                       dump_tl1_traplog((struct tl1_traplog *)(regs + 1));
+               die_if_kernel("UE", regs);
+       }
+
+       /* XXX need more intelligent processing here, such as is implemented
+        * XXX for cheetah errors, in fact if the E-cache still holds the
+        * XXX line with bad parity this will loop
+        */
+
+       spitfire_clean_and_reenable_l1_caches();
+       spitfire_enable_estate_errors();
+
+       if (test_thread_flag(TIF_32BIT)) {
+               regs->tpc &= 0xffffffff;
+               regs->tnpc &= 0xffffffff;
+       }
+       info.si_signo = SIGBUS;
+       info.si_errno = 0;
+       info.si_code = BUS_OBJERR;
+       info.si_addr = (void *)0;
+       info.si_trapno = 0;
+       force_sig_info(SIGBUS, &info, current);
+}
+
+void spitfire_access_error(struct pt_regs *regs, unsigned long status_encoded, unsigned long afar)
+{
+       unsigned long afsr, tt, udbh, udbl;
+       int tl1;
+
+       afsr = (status_encoded & SFSTAT_AFSR_MASK) >> SFSTAT_AFSR_SHIFT;
+       tt = (status_encoded & SFSTAT_TRAP_TYPE) >> SFSTAT_TRAP_TYPE_SHIFT;
+       tl1 = (status_encoded & SFSTAT_TL_GT_ONE) ? 1 : 0;
+       udbl = (status_encoded & SFSTAT_UDBL_MASK) >> SFSTAT_UDBL_SHIFT;
+       udbh = (status_encoded & SFSTAT_UDBH_MASK) >> SFSTAT_UDBH_SHIFT;
+
+#ifdef CONFIG_PCI
+       if (tt == TRAP_TYPE_DAE &&
+           pci_poke_in_progress && pci_poke_cpu == smp_processor_id()) {
+               spitfire_clean_and_reenable_l1_caches();
+               spitfire_enable_estate_errors();
+
+               pci_poke_faulted = 1;
+               regs->tnpc = regs->tpc + 4;
+               return;
+       }
+#endif
+
+       if (afsr & SFAFSR_UE)
+               spitfire_ue_log(afsr, afar, udbh, udbl, tt, tl1, regs);
+
+       if (tt == TRAP_TYPE_CEE) {
+               /* Handle the case where we took a CEE trap, but ACK'd
+                * only the UE state in the UDB error registers.
+                */
+               if (afsr & SFAFSR_UE) {
+                       if (udbh & UDBE_CE) {
+                               __asm__ __volatile__(
+                                       "stxa   %0, [%1] %2\n\t"
+                                       "membar #Sync"
+                                       : /* no outputs */
+                                       : "r" (udbh & UDBE_CE),
+                                         "r" (0x0), "i" (ASI_UDB_ERROR_W));
+                       }
+                       if (udbl & UDBE_CE) {
+                               __asm__ __volatile__(
+                                       "stxa   %0, [%1] %2\n\t"
+                                       "membar #Sync"
+                                       : /* no outputs */
+                                       : "r" (udbl & UDBE_CE),
+                                         "r" (0x18), "i" (ASI_UDB_ERROR_W));
+                       }
+               }
+
+               spitfire_cee_log(afsr, afar, udbh, udbl, tl1, regs);
+       }
  }
  
  int cheetah_pcache_forced_on;
@@ -2127,6 +2164,9 @@ void __init trap_init(void)
             TI_PRE_COUNT != offsetof(struct thread_info, preempt_count) ||
             TI_NEW_CHILD != offsetof(struct thread_info, new_child) ||
             TI_SYS_NOERROR != offsetof(struct thread_info, syscall_noerror) ||
+           TI_RESTART_BLOCK != offsetof(struct thread_info, restart_block) ||
+           TI_KUNA_REGS != offsetof(struct thread_info, kern_una_regs) ||
+           TI_KUNA_INSN != offsetof(struct thread_info, kern_una_insn) ||
             TI_FPREGS != offsetof(struct thread_info, fpregs) ||
             (TI_FPREGS & (64 - 1)))
                 thread_info_offsets_are_bolixed_dave();
diff --git a/arch/sparc64/kernel/ttable.S b/arch/sparc64/kernel/ttable.S

index 491bb3681f9d7d5c90a32261b7c37c9da7c07ee5..8365bc1f81f3105f969ad938c3b3de3cebf221e7 100644 (file)
--- a/arch/sparc64/kernel/ttable.S
+++ b/arch/sparc64/kernel/ttable.S
@@ -18,9 +18,10 @@ sparc64_ttable_tl0:
  tl0_resv000:   BOOT_KERNEL BTRAP(0x1) BTRAP(0x2) BTRAP(0x3)
  tl0_resv004:   BTRAP(0x4)  BTRAP(0x5) BTRAP(0x6) BTRAP(0x7)
  tl0_iax:       membar #Sync
-               TRAP_NOSAVE_7INSNS(__do_instruction_access_exception)
+               TRAP_NOSAVE_7INSNS(__spitfire_insn_access_exception)
  tl0_resv009:   BTRAP(0x9)
-tl0_iae:       TRAP(do_iae)
+tl0_iae:       membar #Sync
+               TRAP_NOSAVE_7INSNS(__spitfire_access_error)
  tl0_resv00b:   BTRAP(0xb) BTRAP(0xc) BTRAP(0xd) BTRAP(0xe) BTRAP(0xf)
  tl0_ill:       membar #Sync
                 TRAP_7INSNS(do_illegal_instruction)
@@ -36,9 +37,10 @@ tl0_cwin:    CLEAN_WINDOW
  tl0_div0:      TRAP(do_div0)
  tl0_resv029:   BTRAP(0x29) BTRAP(0x2a) BTRAP(0x2b) BTRAP(0x2c) BTRAP(0x2d) BTRAP(0x2e)
  tl0_resv02f:   BTRAP(0x2f)
-tl0_dax:       TRAP_NOSAVE(__do_data_access_exception)
+tl0_dax:       TRAP_NOSAVE(__spitfire_data_access_exception)
  tl0_resv031:   BTRAP(0x31)
-tl0_dae:       TRAP(do_dae)
+tl0_dae:       membar #Sync
+               TRAP_NOSAVE_7INSNS(__spitfire_access_error)
  tl0_resv033:   BTRAP(0x33)
  tl0_mna:       TRAP_NOSAVE(do_mna)
  tl0_lddfmna:   TRAP_NOSAVE(do_lddfmna)
@@ -73,7 +75,8 @@ tl0_resv05c:  BTRAP(0x5c) BTRAP(0x5d) BTRAP(0x5e) BTRAP(0x5f)
  tl0_ivec:      TRAP_IVEC
  tl0_paw:       TRAP(do_paw)
  tl0_vaw:       TRAP(do_vaw)
-tl0_cee:       TRAP_NOSAVE(cee_trap)
+tl0_cee:       membar #Sync
+               TRAP_NOSAVE_7INSNS(__spitfire_cee_trap)
  tl0_iamiss:
  #include       "itlb_base.S"
  tl0_damiss:
@@ -175,9 +178,10 @@ tl0_resv1f0:       BTRAPS(0x1f0) BTRAPS(0x1f8)
  sparc64_ttable_tl1:
  tl1_resv000:   BOOT_KERNEL    BTRAPTL1(0x1) BTRAPTL1(0x2) BTRAPTL1(0x3)
  tl1_resv004:   BTRAPTL1(0x4)  BTRAPTL1(0x5) BTRAPTL1(0x6) BTRAPTL1(0x7)
-tl1_iax:       TRAP_NOSAVE(__do_instruction_access_exception_tl1)
+tl1_iax:       TRAP_NOSAVE(__spitfire_insn_access_exception_tl1)
  tl1_resv009:   BTRAPTL1(0x9)
-tl1_iae:       TRAPTL1(do_iae_tl1)
+tl1_iae:       membar #Sync
+               TRAP_NOSAVE_7INSNS(__spitfire_access_error)
  tl1_resv00b:   BTRAPTL1(0xb) BTRAPTL1(0xc) BTRAPTL1(0xd) BTRAPTL1(0xe) BTRAPTL1(0xf)
  tl1_ill:       TRAPTL1(do_ill_tl1)
  tl1_privop:    BTRAPTL1(0x11)
@@ -193,9 +197,10 @@ tl1_cwin:  CLEAN_WINDOW
  tl1_div0:      TRAPTL1(do_div0_tl1)
  tl1_resv029:   BTRAPTL1(0x29) BTRAPTL1(0x2a) BTRAPTL1(0x2b) BTRAPTL1(0x2c)
  tl1_resv02d:   BTRAPTL1(0x2d) BTRAPTL1(0x2e) BTRAPTL1(0x2f)
-tl1_dax:       TRAP_NOSAVE(__do_data_access_exception_tl1)
+tl1_dax:       TRAP_NOSAVE(__spitfire_data_access_exception_tl1)
  tl1_resv031:   BTRAPTL1(0x31)
-tl1_dae:       TRAPTL1(do_dae_tl1)
+tl1_dae:       membar #Sync
+               TRAP_NOSAVE_7INSNS(__spitfire_access_error)
  tl1_resv033:   BTRAPTL1(0x33)
  tl1_mna:       TRAP_NOSAVE(do_mna)
  tl1_lddfmna:   TRAPTL1(do_lddfmna_tl1)
@@ -219,8 +224,8 @@ tl1_paw:    TRAPTL1(do_paw_tl1)
  tl1_vaw:       TRAPTL1(do_vaw_tl1)
  
                 /* The grotty trick to save %g1 into current->thread.cee_stuff
-                * is because when we take this trap we could be interrupting trap
-                * code already using the trap alternate global registers.
+                * is because when we take this trap we could be interrupting
+                * trap code already using the trap alternate global registers.
                  *
                  * We cross our fingers and pray that this store/load does
                  * not cause yet another CEE trap.
diff --git a/arch/sparc64/kernel/una_asm.S b/arch/sparc64/kernel/una_asm.S

new file mode 100644 (file)

index 0000000..cbb4058
--- /dev/null
+++ b/arch/sparc64/kernel/una_asm.S
@@ -0,0 +1,153 @@
+/* una_asm.S: Kernel unaligned trap assembler helpers.
+ *
+ * Copyright (C) 1996,2005 David S. Miller (davem@davemloft.net)
+ * Copyright (C) 1996,1997 Jakub Jelinek (jj@sunsite.mff.cuni.cz)
+ */
+
+       .text
+
+kernel_unaligned_trap_fault:
+       call    kernel_mna_trap_fault
+        nop
+       retl
+        nop
+       .size   kern_unaligned_trap_fault, .-kern_unaligned_trap_fault
+
+       .globl  __do_int_store
+__do_int_store:
+       rd      %asi, %o4
+       wr      %o3, 0, %asi
+       ldx     [%o2], %g3
+       cmp     %o1, 2
+       be,pn   %icc, 2f
+        cmp    %o1, 4
+       be,pt   %icc, 1f
+        srlx   %g3, 24, %g2
+       srlx    %g3, 56, %g1
+       srlx    %g3, 48, %g7
+4:     stba    %g1, [%o0] %asi
+       srlx    %g3, 40, %g1
+5:     stba    %g7, [%o0 + 1] %asi
+       srlx    %g3, 32, %g7
+6:     stba    %g1, [%o0 + 2] %asi
+7:     stba    %g7, [%o0 + 3] %asi
+       srlx    %g3, 16, %g1
+8:     stba    %g2, [%o0 + 4] %asi
+       srlx    %g3, 8, %g7
+9:     stba    %g1, [%o0 + 5] %asi
+10:    stba    %g7, [%o0 + 6] %asi
+       ba,pt   %xcc, 0f
+11:     stba   %g3, [%o0 + 7] %asi
+1:     srl     %g3, 16, %g7
+12:    stba    %g2, [%o0] %asi
+       srl     %g3, 8, %g2
+13:    stba    %g7, [%o0 + 1] %asi
+14:    stba    %g2, [%o0 + 2] %asi
+       ba,pt   %xcc, 0f
+15:     stba   %g3, [%o0 + 3] %asi
+2:     srl     %g3, 8, %g2
+16:    stba    %g2, [%o0] %asi
+17:    stba    %g3, [%o0 + 1] %asi
+0:
+       wr      %o4, 0x0, %asi
+       retl
+        nop
+       .size   __do_int_store, .-__do_int_store
+
+       .section        __ex_table
+       .word           4b, kernel_unaligned_trap_fault
+       .word           5b, kernel_unaligned_trap_fault
+       .word           6b, kernel_unaligned_trap_fault
+       .word           7b, kernel_unaligned_trap_fault
+       .word           8b, kernel_unaligned_trap_fault
+       .word           9b, kernel_unaligned_trap_fault
+       .word           10b, kernel_unaligned_trap_fault
+       .word           11b, kernel_unaligned_trap_fault
+       .word           12b, kernel_unaligned_trap_fault
+       .word           13b, kernel_unaligned_trap_fault
+       .word           14b, kernel_unaligned_trap_fault
+       .word           15b, kernel_unaligned_trap_fault
+       .word           16b, kernel_unaligned_trap_fault
+       .word           17b, kernel_unaligned_trap_fault
+       .previous
+
+       .globl  do_int_load
+do_int_load:
+       rd      %asi, %o5
+       wr      %o4, 0, %asi
+       cmp     %o1, 8
+       bge,pn  %icc, 9f
+        cmp    %o1, 4
+       be,pt   %icc, 6f
+4:      lduba  [%o2] %asi, %g2
+5:     lduba   [%o2 + 1] %asi, %g3
+       sll     %g2, 8, %g2
+       brz,pt  %o3, 3f
+        add    %g2, %g3, %g2
+       sllx    %g2, 48, %g2
+       srax    %g2, 48, %g2
+3:     ba,pt   %xcc, 0f
+        stx    %g2, [%o0]
+6:     lduba   [%o2 + 1] %asi, %g3
+       sll     %g2, 24, %g2
+7:     lduba   [%o2 + 2] %asi, %g7
+       sll     %g3, 16, %g3
+8:     lduba   [%o2 + 3] %asi, %g1
+       sll     %g7, 8, %g7
+       or      %g2, %g3, %g2
+       or      %g7, %g1, %g7
+       or      %g2, %g7, %g2
+       brnz,a,pt %o3, 3f
+        sra    %g2, 0, %g2
+3:     ba,pt   %xcc, 0f
+        stx    %g2, [%o0]
+9:     lduba   [%o2] %asi, %g2
+10:    lduba   [%o2 + 1] %asi, %g3
+       sllx    %g2, 56, %g2
+11:    lduba   [%o2 + 2] %asi, %g7
+       sllx    %g3, 48, %g3
+12:    lduba   [%o2 + 3] %asi, %g1
+       sllx    %g7, 40, %g7
+       sllx    %g1, 32, %g1
+       or      %g2, %g3, %g2
+       or      %g7, %g1, %g7
+13:    lduba   [%o2 + 4] %asi, %g3
+       or      %g2, %g7, %g7
+14:    lduba   [%o2 + 5] %asi, %g1
+       sllx    %g3, 24, %g3
+15:    lduba   [%o2 + 6] %asi, %g2
+       sllx    %g1, 16, %g1
+       or      %g7, %g3, %g7
+16:    lduba   [%o2 + 7] %asi, %g3
+       sllx    %g2, 8, %g2
+       or      %g7, %g1, %g7
+       or      %g2, %g3, %g2
+       or      %g7, %g2, %g7
+       cmp     %o1, 8
+       be,a,pt %icc, 0f
+        stx    %g7, [%o0]
+       srlx    %g7, 32, %g2
+       sra     %g7, 0, %g7
+       stx     %g2, [%o0]
+       stx     %g7, [%o0 + 8]
+0:
+       wr      %o5, 0x0, %asi
+       retl
+        nop
+       .size   __do_int_load, .-__do_int_load
+
+       .section        __ex_table
+       .word           4b, kernel_unaligned_trap_fault
+       .word           5b, kernel_unaligned_trap_fault
+       .word           6b, kernel_unaligned_trap_fault
+       .word           7b, kernel_unaligned_trap_fault
+       .word           8b, kernel_unaligned_trap_fault
+       .word           9b, kernel_unaligned_trap_fault
+       .word           10b, kernel_unaligned_trap_fault
+       .word           11b, kernel_unaligned_trap_fault
+       .word           12b, kernel_unaligned_trap_fault
+       .word           13b, kernel_unaligned_trap_fault
+       .word           14b, kernel_unaligned_trap_fault
+       .word           15b, kernel_unaligned_trap_fault
+       .word           16b, kernel_unaligned_trap_fault
+       .previous
diff --git a/arch/sparc64/kernel/unaligned.c b/arch/sparc64/kernel/unaligned.c

index 4372bf32ecf6f28eb69ff2b8e31b3e7197b5af5d..da9739f0d43723cbee80f87cf9f491bff1ea11f3 100644 (file)
--- a/arch/sparc64/kernel/unaligned.c
+++ b/arch/sparc64/kernel/unaligned.c
@@ -180,169 +180,28 @@ static void __attribute_used__ unaligned_panic(char *str, struct pt_regs *regs)
         die_if_kernel(str, regs);
  }
  
-#define do_integer_load(dest_reg, size, saddr, is_signed, asi, errh) ({                \
-__asm__ __volatile__ (                                                         \
-       "wr     %4, 0, %%asi\n\t"                                               \
-       "cmp    %1, 8\n\t"                                                      \
-       "bge,pn %%icc, 9f\n\t"                                                  \
-       " cmp   %1, 4\n\t"                                                      \
-       "be,pt  %%icc, 6f\n"                                                    \
-"4:\t" " lduba [%2] %%asi, %%l1\n"                                             \
-"5:\t" "lduba  [%2 + 1] %%asi, %%l2\n\t"                                       \
-       "sll    %%l1, 8, %%l1\n\t"                                              \
-       "brz,pt %3, 3f\n\t"                                                     \
-       " add   %%l1, %%l2, %%l1\n\t"                                           \
-       "sllx   %%l1, 48, %%l1\n\t"                                             \
-       "srax   %%l1, 48, %%l1\n"                                               \
-"3:\t" "ba,pt  %%xcc, 0f\n\t"                                                  \
-       " stx   %%l1, [%0]\n"                                                   \
-"6:\t" "lduba  [%2 + 1] %%asi, %%l2\n\t"                                       \
-       "sll    %%l1, 24, %%l1\n"                                               \
-"7:\t" "lduba  [%2 + 2] %%asi, %%g7\n\t"                                       \
-       "sll    %%l2, 16, %%l2\n"                                               \
-"8:\t" "lduba  [%2 + 3] %%asi, %%g1\n\t"                                       \
-       "sll    %%g7, 8, %%g7\n\t"                                              \
-       "or     %%l1, %%l2, %%l1\n\t"                                           \
-       "or     %%g7, %%g1, %%g7\n\t"                                           \
-       "or     %%l1, %%g7, %%l1\n\t"                                           \
-       "brnz,a,pt %3, 3f\n\t"                                                  \
-       " sra   %%l1, 0, %%l1\n"                                                \
-"3:\t" "ba,pt  %%xcc, 0f\n\t"                                                  \
-       " stx   %%l1, [%0]\n"                                                   \
-"9:\t" "lduba  [%2] %%asi, %%l1\n"                                             \
-"10:\t"        "lduba  [%2 + 1] %%asi, %%l2\n\t"                                       \
-       "sllx   %%l1, 56, %%l1\n"                                               \
-"11:\t"        "lduba  [%2 + 2] %%asi, %%g7\n\t"                                       \
-       "sllx   %%l2, 48, %%l2\n"                                               \
-"12:\t"        "lduba  [%2 + 3] %%asi, %%g1\n\t"                                       \
-       "sllx   %%g7, 40, %%g7\n\t"                                             \
-       "sllx   %%g1, 32, %%g1\n\t"                                             \
-       "or     %%l1, %%l2, %%l1\n\t"                                           \
-       "or     %%g7, %%g1, %%g7\n"                                             \
-"13:\t"        "lduba  [%2 + 4] %%asi, %%l2\n\t"                                       \
-       "or     %%l1, %%g7, %%g7\n"                                             \
-"14:\t"        "lduba  [%2 + 5] %%asi, %%g1\n\t"                                       \
-       "sllx   %%l2, 24, %%l2\n"                                               \
-"15:\t"        "lduba  [%2 + 6] %%asi, %%l1\n\t"                                       \
-       "sllx   %%g1, 16, %%g1\n\t"                                             \
-       "or     %%g7, %%l2, %%g7\n"                                             \
-"16:\t"        "lduba  [%2 + 7] %%asi, %%l2\n\t"                                       \
-       "sllx   %%l1, 8, %%l1\n\t"                                              \
-       "or     %%g7, %%g1, %%g7\n\t"                                           \
-       "or     %%l1, %%l2, %%l1\n\t"                                           \
-       "or     %%g7, %%l1, %%g7\n\t"                                           \
-       "cmp    %1, 8\n\t"                                                      \
-       "be,a,pt %%icc, 0f\n\t"                                                 \
-       " stx   %%g7, [%0]\n\t"                                                 \
-       "srlx   %%g7, 32, %%l1\n\t"                                             \
-       "sra    %%g7, 0, %%g7\n\t"                                              \
-       "stx    %%l1, [%0]\n\t"                                                 \
-       "stx    %%g7, [%0 + 8]\n"                                               \
-"0:\n\t"                                                                       \
-       "wr     %%g0, %5, %%asi\n\n\t"                                          \
-       ".section __ex_table\n\t"                                               \
-       ".word  4b, " #errh "\n\t"                                              \
-       ".word  5b, " #errh "\n\t"                                              \
-       ".word  6b, " #errh "\n\t"                                              \
-       ".word  7b, " #errh "\n\t"                                              \
-       ".word  8b, " #errh "\n\t"                                              \
-       ".word  9b, " #errh "\n\t"                                              \
-       ".word  10b, " #errh "\n\t"                                             \
-       ".word  11b, " #errh "\n\t"                                             \
-       ".word  12b, " #errh "\n\t"                                             \
-       ".word  13b, " #errh "\n\t"                                             \
-       ".word  14b, " #errh "\n\t"                                             \
-       ".word  15b, " #errh "\n\t"                                             \
-       ".word  16b, " #errh "\n\n\t"                                           \
-       ".previous\n\t"                                                         \
-       : : "r" (dest_reg), "r" (size), "r" (saddr), "r" (is_signed),           \
-         "r" (asi), "i" (ASI_AIUS)                                             \
-       : "l1", "l2", "g7", "g1", "cc");                                        \
-})
+extern void do_int_load(unsigned long *dest_reg, int size,
+                       unsigned long *saddr, int is_signed, int asi);
         
-#define store_common(dst_addr, size, src_val, asi, errh) ({                    \
-__asm__ __volatile__ (                                                         \
-       "wr     %3, 0, %%asi\n\t"                                               \
-       "ldx    [%2], %%l1\n"                                                   \
-       "cmp    %1, 2\n\t"                                                      \
-       "be,pn  %%icc, 2f\n\t"                                                  \
-       " cmp   %1, 4\n\t"                                                      \
-       "be,pt  %%icc, 1f\n\t"                                                  \
-       " srlx  %%l1, 24, %%l2\n\t"                                             \
-       "srlx   %%l1, 56, %%g1\n\t"                                             \
-       "srlx   %%l1, 48, %%g7\n"                                               \
-"4:\t" "stba   %%g1, [%0] %%asi\n\t"                                           \
-       "srlx   %%l1, 40, %%g1\n"                                               \
-"5:\t" "stba   %%g7, [%0 + 1] %%asi\n\t"                                       \
-       "srlx   %%l1, 32, %%g7\n"                                               \
-"6:\t" "stba   %%g1, [%0 + 2] %%asi\n"                                         \
-"7:\t" "stba   %%g7, [%0 + 3] %%asi\n\t"                                       \
-       "srlx   %%l1, 16, %%g1\n"                                               \
-"8:\t" "stba   %%l2, [%0 + 4] %%asi\n\t"                                       \
-       "srlx   %%l1, 8, %%g7\n"                                                \
-"9:\t" "stba   %%g1, [%0 + 5] %%asi\n"                                         \
-"10:\t"        "stba   %%g7, [%0 + 6] %%asi\n\t"                                       \
-       "ba,pt  %%xcc, 0f\n"                                                    \
-"11:\t"        " stba  %%l1, [%0 + 7] %%asi\n"                                         \
-"1:\t" "srl    %%l1, 16, %%g7\n"                                               \
-"12:\t"        "stba   %%l2, [%0] %%asi\n\t"                                           \
-       "srl    %%l1, 8, %%l2\n"                                                \
-"13:\t"        "stba   %%g7, [%0 + 1] %%asi\n"                                         \
-"14:\t"        "stba   %%l2, [%0 + 2] %%asi\n\t"                                       \
-       "ba,pt  %%xcc, 0f\n"                                                    \
-"15:\t"        " stba  %%l1, [%0 + 3] %%asi\n"                                         \
-"2:\t" "srl    %%l1, 8, %%l2\n"                                                \
-"16:\t"        "stba   %%l2, [%0] %%asi\n"                                             \
-"17:\t"        "stba   %%l1, [%0 + 1] %%asi\n"                                         \
-"0:\n\t"                                                                       \
-       "wr     %%g0, %4, %%asi\n\n\t"                                          \
-       ".section __ex_table\n\t"                                               \
-       ".word  4b, " #errh "\n\t"                                              \
-       ".word  5b, " #errh "\n\t"                                              \
-       ".word  6b, " #errh "\n\t"                                              \
-       ".word  7b, " #errh "\n\t"                                              \
-       ".word  8b, " #errh "\n\t"                                              \
-       ".word  9b, " #errh "\n\t"                                              \
-       ".word  10b, " #errh "\n\t"                                             \
-       ".word  11b, " #errh "\n\t"                                             \
-       ".word  12b, " #errh "\n\t"                                             \
-       ".word  13b, " #errh "\n\t"                                             \
-       ".word  14b, " #errh "\n\t"                                             \
-       ".word  15b, " #errh "\n\t"                                             \
-       ".word  16b, " #errh "\n\t"                                             \
-       ".word  17b, " #errh "\n\n\t"                                           \
-       ".previous\n\t"                                                         \
-       : : "r" (dst_addr), "r" (size), "r" (src_val), "r" (asi), "i" (ASI_AIUS)\
-       : "l1", "l2", "g7", "g1", "cc");                                        \
-})
-
-#define do_integer_store(reg_num, size, dst_addr, regs, asi, errh) ({          \
-       unsigned long zero = 0;                                                 \
-       unsigned long *src_val = &zero;                                         \
-                                                                               \
-       if (size == 16) {                                                       \
-               size = 8;                                                       \
-               zero = (((long)(reg_num ?                                       \
-                       (unsigned)fetch_reg(reg_num, regs) : 0)) << 32) |       \
-                       (unsigned)fetch_reg(reg_num + 1, regs);                 \
-       } else if (reg_num) src_val = fetch_reg_addr(reg_num, regs);            \
-       store_common(dst_addr, size, src_val, asi, errh);                       \
-})
-
-extern void smp_capture(void);
-extern void smp_release(void);
-
-#define do_atomic(srcdest_reg, mem, errh) ({                                   \
-       unsigned long flags, tmp;                                               \
-                                                                               \
-       smp_capture();                                                          \
-       local_irq_save(flags);                                                  \
-       tmp = *srcdest_reg;                                                     \
-       do_integer_load(srcdest_reg, 4, mem, 0, errh);                          \
-       store_common(mem, 4, &tmp, errh);                                       \
-       local_irq_restore(flags);                                               \
-       smp_release();                                                          \
-})
+extern void __do_int_store(unsigned long *dst_addr, int size,
+                          unsigned long *src_val, int asi);
+
+static inline void do_int_store(int reg_num, int size, unsigned long *dst_addr,
+                               struct pt_regs *regs, int asi)
+{
+       unsigned long zero = 0;
+       unsigned long *src_val = &zero;
+
+       if (size == 16) {
+               size = 8;
+               zero = (((long)(reg_num ?
+                       (unsigned)fetch_reg(reg_num, regs) : 0)) << 32) |
+                       (unsigned)fetch_reg(reg_num + 1, regs);
+       } else if (reg_num) {
+               src_val = fetch_reg_addr(reg_num, regs);
+       }
+       __do_int_store(dst_addr, size, src_val, asi);
+}
  
  static inline void advance(struct pt_regs *regs)
  {
@@ -364,24 +223,29 @@ static inline int ok_for_kernel(unsigned int insn)
         return !floating_point_load_or_store_p(insn);
  }
  
-void kernel_mna_trap_fault(struct pt_regs *regs, unsigned int insn) __asm__ ("kernel_mna_trap_fault");
-
-void kernel_mna_trap_fault(struct pt_regs *regs, unsigned int insn)
+void kernel_mna_trap_fault(void)
  {
-       unsigned long g2 = regs->u_regs [UREG_G2];
+       struct pt_regs *regs = current_thread_info()->kern_una_regs;
+       unsigned int insn = current_thread_info()->kern_una_insn;
+       unsigned long g2 = regs->u_regs[UREG_G2];
         unsigned long fixup = search_extables_range(regs->tpc, &g2);
  
         if (!fixup) {
-               unsigned long address = compute_effective_address(regs, insn, ((insn >> 25) & 0x1f));
+               unsigned long address;
+
+               address = compute_effective_address(regs, insn,
+                                                   ((insn >> 25) & 0x1f));
                 if (address < PAGE_SIZE) {
-                       printk(KERN_ALERT "Unable to handle kernel NULL pointer dereference in mna handler");
+                       printk(KERN_ALERT "Unable to handle kernel NULL "
+                              "pointer dereference in mna handler");
                 } else
-                       printk(KERN_ALERT "Unable to handle kernel paging request in mna handler");
+                       printk(KERN_ALERT "Unable to handle kernel paging "
+                              "request in mna handler");
                 printk(KERN_ALERT " at virtual address %016lx\n",address);
-               printk(KERN_ALERT "current->{mm,active_mm}->context = %016lx\n",
+               printk(KERN_ALERT "current->{active_,}mm->context = %016lx\n",
                         (current->mm ? CTX_HWBITS(current->mm->context) :
                         CTX_HWBITS(current->active_mm->context)));
-               printk(KERN_ALERT "current->{mm,active_mm}->pgd = %016lx\n",
+               printk(KERN_ALERT "current->{active_,}mm->pgd = %016lx\n",
                         (current->mm ? (unsigned long) current->mm->pgd :
                         (unsigned long) current->active_mm->pgd));
                 die_if_kernel("Oops", regs);
@@ -400,48 +264,41 @@ asmlinkage void kernel_unaligned_trap(struct pt_regs *regs, unsigned int insn, u
         enum direction dir = decode_direction(insn);
         int size = decode_access_size(insn);
  
+       current_thread_info()->kern_una_regs = regs;
+       current_thread_info()->kern_una_insn = insn;
+
         if (!ok_for_kernel(insn) || dir == both) {
-               printk("Unsupported unaligned load/store trap for kernel at <%016lx>.\n",
-                      regs->tpc);
-               unaligned_panic("Kernel does fpu/atomic unaligned load/store.", regs);
-
-               __asm__ __volatile__ ("\n"
-"kernel_unaligned_trap_fault:\n\t"
-               "mov    %0, %%o0\n\t"
-               "call   kernel_mna_trap_fault\n\t"
-               " mov   %1, %%o1\n\t"
-               :
-               : "r" (regs), "r" (insn)
-               : "o0", "o1", "o2", "o3", "o4", "o5", "o7",
-                 "g1", "g2", "g3", "g4", "g7", "cc");
+               printk("Unsupported unaligned load/store trap for kernel "
+                      "at <%016lx>.\n", regs->tpc);
+               unaligned_panic("Kernel does fpu/atomic "
+                               "unaligned load/store.", regs);
+
+               kernel_mna_trap_fault();
         } else {
-               unsigned long addr = compute_effective_address(regs, insn, ((insn >> 25) & 0x1f));
+               unsigned long addr;
  
+               addr = compute_effective_address(regs, insn,
+                                                ((insn >> 25) & 0x1f));
  #ifdef DEBUG_MNA
-               printk("KMNA: pc=%016lx [dir=%s addr=%016lx size=%d] retpc[%016lx]\n",
-                      regs->tpc, dirstrings[dir], addr, size, regs->u_regs[UREG_RETPC]);
+               printk("KMNA: pc=%016lx [dir=%s addr=%016lx size=%d] "
+                      "retpc[%016lx]\n",
+                      regs->tpc, dirstrings[dir], addr, size,
+                      regs->u_regs[UREG_RETPC]);
  #endif
                 switch (dir) {
                 case load:
-                       do_integer_load(fetch_reg_addr(((insn>>25)&0x1f), regs),
-                                       size, (unsigned long *) addr,
-                                       decode_signedness(insn), decode_asi(insn, regs),
-                                       kernel_unaligned_trap_fault);
+                       do_int_load(fetch_reg_addr(((insn>>25)&0x1f), regs),
+                                   size, (unsigned long *) addr,
+                                   decode_signedness(insn),
+                                   decode_asi(insn, regs));
                         break;
  
                 case store:
-                       do_integer_store(((insn>>25)&0x1f), size,
-                                        (unsigned long *) addr, regs,
-                                        decode_asi(insn, regs),
-                                        kernel_unaligned_trap_fault);
-                       break;
-#if 0 /* unsupported */
-               case both:
-                       do_atomic(fetch_reg_addr(((insn>>25)&0x1f), regs),
-                                 (unsigned long *) addr,
-                                 kernel_unaligned_trap_fault);
+                       do_int_store(((insn>>25)&0x1f), size,
+                                    (unsigned long *) addr, regs,
+                                    decode_asi(insn, regs));
                         break;
-#endif
+
                 default:
                         panic("Impossible kernel unaligned trap.");
                         /* Not reached... */
@@ -492,9 +349,9 @@ int handle_popc(u32 insn, struct pt_regs *regs)
  
  extern void do_fpother(struct pt_regs *regs);
  extern void do_privact(struct pt_regs *regs);
-extern void data_access_exception(struct pt_regs *regs,
-                                 unsigned long sfsr,
-                                 unsigned long sfar);
+extern void spitfire_data_access_exception(struct pt_regs *regs,
+                                          unsigned long sfsr,
+                                          unsigned long sfar);
  
  int handle_ldf_stq(u32 insn, struct pt_regs *regs)
  {
@@ -537,14 +394,14 @@ int handle_ldf_stq(u32 insn, struct pt_regs *regs)
                                 break;
                         }
                 default:
-                       data_access_exception(regs, 0, addr);
+                       spitfire_data_access_exception(regs, 0, addr);
                         return 1;
                 }
                 if (put_user (first >> 32, (u32 __user *)addr) ||
                     __put_user ((u32)first, (u32 __user *)(addr + 4)) ||
                     __put_user (second >> 32, (u32 __user *)(addr + 8)) ||
                     __put_user ((u32)second, (u32 __user *)(addr + 12))) {
-                       data_access_exception(regs, 0, addr);
+                       spitfire_data_access_exception(regs, 0, addr);
                         return 1;
                 }
         } else {
@@ -557,7 +414,7 @@ int handle_ldf_stq(u32 insn, struct pt_regs *regs)
                         do_privact(regs);
                         return 1;
                 } else if (asi > ASI_SNFL) {
-                       data_access_exception(regs, 0, addr);
+                       spitfire_data_access_exception(regs, 0, addr);
                         return 1;
                 }
                 switch (insn & 0x180000) {
@@ -574,7 +431,7 @@ int handle_ldf_stq(u32 insn, struct pt_regs *regs)
                                 err |= __get_user (data[i], (u32 __user *)(addr + 4*i));
                 }
                 if (err && !(asi & 0x2 /* NF */)) {
-                       data_access_exception(regs, 0, addr);
+                       spitfire_data_access_exception(regs, 0, addr);
                         return 1;
                 }
                 if (asi & 0x8) /* Little */ {
@@ -677,7 +534,7 @@ void handle_lddfmna(struct pt_regs *regs, unsigned long sfar, unsigned long sfsr
                 *(u64 *)(f->regs + freg) = value;
                 current_thread_info()->fpsaved[0] |= flag;
         } else {
-daex:          data_access_exception(regs, sfsr, sfar);
+daex:          spitfire_data_access_exception(regs, sfsr, sfar);
                 return;
         }
         advance(regs);
@@ -721,7 +578,7 @@ void handle_stdfmna(struct pt_regs *regs, unsigned long sfar, unsigned long sfsr
                     __put_user ((u32)value, (u32 __user *)(sfar + 4)))
                         goto daex;
         } else {
-daex:          data_access_exception(regs, sfsr, sfar);
+daex:          spitfire_data_access_exception(regs, sfsr, sfar);
                 return;
         }
         advance(regs);
diff --git a/arch/sparc64/kernel/us2e_cpufreq.c b/arch/sparc64/kernel/us2e_cpufreq.c

index 7aae0a18aabe2e7e305453ddaa537e853ba521a9..686e526bec04c91ffe1e7c145537b1a74680fb20 100644 (file)
--- a/arch/sparc64/kernel/us2e_cpufreq.c
+++ b/arch/sparc64/kernel/us2e_cpufreq.c
@@ -88,7 +88,6 @@ static void frob_mem_refresh(int cpu_slowing_down,
  {
         unsigned long old_refr_count, refr_count, mctrl;
  
-
         refr_count  = (clock_tick * MCTRL0_REFR_INTERVAL);
         refr_count /= (MCTRL0_REFR_CLKS_P_CNT * divisor * 1000000000UL);
  
@@ -230,6 +229,25 @@ static unsigned long estar_to_divisor(unsigned long estar)
         return ret;
  }
  
+static unsigned int us2e_freq_get(unsigned int cpu)
+{
+       cpumask_t cpus_allowed;
+       unsigned long clock_tick, estar;
+
+       if (!cpu_online(cpu))
+               return 0;
+
+       cpus_allowed = current->cpus_allowed;
+       set_cpus_allowed(current, cpumask_of_cpu(cpu));
+
+       clock_tick = sparc64_get_clock_tick(cpu) / 1000;
+       estar = read_hbreg(HBIRD_ESTAR_MODE_ADDR);
+
+       set_cpus_allowed(current, cpus_allowed);
+
+       return clock_tick / estar_to_divisor(estar);
+}
+
  static void us2e_set_cpu_divider_index(unsigned int cpu, unsigned int index)
  {
         unsigned long new_bits, new_freq;
@@ -243,7 +261,7 @@ static void us2e_set_cpu_divider_index(unsigned int cpu, unsigned int index)
         cpus_allowed = current->cpus_allowed;
         set_cpus_allowed(current, cpumask_of_cpu(cpu));
  
-       new_freq = clock_tick = sparc64_get_clock_tick(cpu);
+       new_freq = clock_tick = sparc64_get_clock_tick(cpu) / 1000;
         new_bits = index_to_estar_mode(index);
         divisor = index_to_divisor(index);
         new_freq /= divisor;
@@ -258,7 +276,8 @@ static void us2e_set_cpu_divider_index(unsigned int cpu, unsigned int index)
         cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE);
  
         if (old_divisor != divisor)
-               us2e_transition(estar, new_bits, clock_tick, old_divisor, divisor);
+               us2e_transition(estar, new_bits, clock_tick * 1000,
+                               old_divisor, divisor);
  
         cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE);
  
@@ -272,10 +291,8 @@ static int us2e_freq_target(struct cpufreq_policy *policy,
         unsigned int new_index = 0;
  
         if (cpufreq_frequency_table_target(policy,
-                                             &us2e_freq_table[policy->cpu].table[0],
-                                             target_freq,
-                                             relation,
-                                             &new_index))
+                                          &us2e_freq_table[policy->cpu].table[0],
+                                          target_freq, relation, &new_index))
                 return -EINVAL;
  
         us2e_set_cpu_divider_index(policy->cpu, new_index);
@@ -292,7 +309,7 @@ static int us2e_freq_verify(struct cpufreq_policy *policy)
  static int __init us2e_freq_cpu_init(struct cpufreq_policy *policy)
  {
         unsigned int cpu = policy->cpu;
-       unsigned long clock_tick = sparc64_get_clock_tick(cpu);
+       unsigned long clock_tick = sparc64_get_clock_tick(cpu) / 1000;
         struct cpufreq_frequency_table *table =
                 &us2e_freq_table[cpu].table[0];
  
@@ -351,9 +368,10 @@ static int __init us2e_freq_init(void)
                 memset(us2e_freq_table, 0,
                        (NR_CPUS * sizeof(struct us2e_freq_percpu_info)));
  
+               driver->init = us2e_freq_cpu_init;
                 driver->verify = us2e_freq_verify;
                 driver->target = us2e_freq_target;
-               driver->init = us2e_freq_cpu_init;
+               driver->get = us2e_freq_get;
                 driver->exit = us2e_freq_cpu_exit;
                 driver->owner = THIS_MODULE,
                 strcpy(driver->name, "UltraSPARC-IIe");
diff --git a/arch/sparc64/kernel/us3_cpufreq.c b/arch/sparc64/kernel/us3_cpufreq.c

index 18fe54b8aa551c5eb94a2e414f0b0b619a329a25..9080e7cd4bb0b8506ceea663c6e4e0d5a4f8a87e 100644 (file)
--- a/arch/sparc64/kernel/us3_cpufreq.c
+++ b/arch/sparc64/kernel/us3_cpufreq.c
@@ -56,7 +56,7 @@ static void write_safari_cfg(unsigned long val)
  
  static unsigned long get_current_freq(unsigned int cpu, unsigned long safari_cfg)
  {
-       unsigned long clock_tick = sparc64_get_clock_tick(cpu);
+       unsigned long clock_tick = sparc64_get_clock_tick(cpu) / 1000;
         unsigned long ret;
  
         switch (safari_cfg & SAFARI_CFG_DIV_MASK) {
@@ -76,6 +76,26 @@ static unsigned long get_current_freq(unsigned int cpu, unsigned long safari_cfg
         return ret;
  }
  
+static unsigned int us3_freq_get(unsigned int cpu)
+{
+       cpumask_t cpus_allowed;
+       unsigned long reg;
+       unsigned int ret;
+
+       if (!cpu_online(cpu))
+               return 0;
+
+       cpus_allowed = current->cpus_allowed;
+       set_cpus_allowed(current, cpumask_of_cpu(cpu));
+
+       reg = read_safari_cfg();
+       ret = get_current_freq(cpu, reg);
+
+       set_cpus_allowed(current, cpus_allowed);
+
+       return ret;
+}
+
  static void us3_set_cpu_divider_index(unsigned int cpu, unsigned int index)
  {
         unsigned long new_bits, new_freq, reg;
@@ -88,7 +108,7 @@ static void us3_set_cpu_divider_index(unsigned int cpu, unsigned int index)
         cpus_allowed = current->cpus_allowed;
         set_cpus_allowed(current, cpumask_of_cpu(cpu));
  
-       new_freq = sparc64_get_clock_tick(cpu);
+       new_freq = sparc64_get_clock_tick(cpu) / 1000;
         switch (index) {
         case 0:
                 new_bits = SAFARI_CFG_DIV_1;
@@ -150,7 +170,7 @@ static int us3_freq_verify(struct cpufreq_policy *policy)
  static int __init us3_freq_cpu_init(struct cpufreq_policy *policy)
  {
         unsigned int cpu = policy->cpu;
-       unsigned long clock_tick = sparc64_get_clock_tick(cpu);
+       unsigned long clock_tick = sparc64_get_clock_tick(cpu) / 1000;
         struct cpufreq_frequency_table *table =
                 &us3_freq_table[cpu].table[0];
  
@@ -206,9 +226,10 @@ static int __init us3_freq_init(void)
                 memset(us3_freq_table, 0,
                        (NR_CPUS * sizeof(struct us3_freq_percpu_info)));
  
+               driver->init = us3_freq_cpu_init;
                 driver->verify = us3_freq_verify;
                 driver->target = us3_freq_target;
-               driver->init = us3_freq_cpu_init;
+               driver->get = us3_freq_get;
                 driver->exit = us3_freq_cpu_exit;
                 driver->owner = THIS_MODULE,
                 strcpy(driver->name, "UltraSPARC-III");
diff --git a/arch/sparc64/kernel/winfixup.S b/arch/sparc64/kernel/winfixup.S

index dfbc7e0dcf70f7dedf71952c96d18a88e3ce02dd..99c809a1e5acbbeacebdf561504dd24a2cdfce5a 100644 (file)
--- a/arch/sparc64/kernel/winfixup.S
+++ b/arch/sparc64/kernel/winfixup.S
@@ -318,7 +318,7 @@ fill_fixup_dax:
          nop
         rdpr            %pstate, %l1                    ! Prepare to change globals.
         mov             %g4, %o1                        ! Setup args for
-       mov             %g5, %o2                        ! final call to data_access_exception.
+       mov             %g5, %o2                        ! final call to spitfire_data_access_exception.
         andn            %l1, PSTATE_MM, %l1             ! We want to be in RMO
  
         mov             %g6, %o7                        ! Stash away current.
@@ -330,7 +330,7 @@ fill_fixup_dax:
         mov             TSB_REG, %g1
         ldxa            [%g1] ASI_IMMU, %g5
  #endif
-       call            data_access_exception
+       call            spitfire_data_access_exception
          add            %sp, PTREGS_OFF, %o0
  
         b,pt            %xcc, rtrap
@@ -391,7 +391,7 @@ window_dax_from_user_common:
  109:    or             %g7, %lo(109b), %g7
         mov             %l4, %o1
         mov             %l5, %o2
-       call            data_access_exception
+       call            spitfire_data_access_exception
          add            %sp, PTREGS_OFF, %o0
         ba,pt           %xcc, rtrap
          clr            %l6
diff --git a/arch/sparc64/lib/Makefile b/arch/sparc64/lib/Makefile

index 40dbeec7e5d6a8ed75006dd2873e0d40e019b29e..6201f1040982aabde591d28673a4a0d7e2697f95 100644 (file)
--- a/arch/sparc64/lib/Makefile
+++ b/arch/sparc64/lib/Makefile
@@ -12,7 +12,7 @@ lib-y := PeeCeeI.o copy_page.o clear_page.o strlen.o strncmp.o \
          U1memcpy.o U1copy_from_user.o U1copy_to_user.o \
          U3memcpy.o U3copy_from_user.o U3copy_to_user.o U3patch.o \
          copy_in_user.o user_fixup.o memmove.o \
-        mcount.o ipcsum.o rwsem.o xor.o find_bit.o delay.o
+        mcount.o ipcsum.o rwsem.o xor.o find_bit.o delay.o mb.o
  
  lib-$(CONFIG_DEBUG_SPINLOCK) += debuglocks.o
  lib-$(CONFIG_HAVE_DEC_LOCK) += dec_and_lock.o
diff --git a/arch/sparc64/lib/debuglocks.c b/arch/sparc64/lib/debuglocks.c

index f03344cf784e1785efbef5e0b195d89a568549b3..f5f0b5586f01a76ca110d4c04aec77c2be7d2931 100644 (file)
--- a/arch/sparc64/lib/debuglocks.c
+++ b/arch/sparc64/lib/debuglocks.c
@@ -12,8 +12,6 @@
  
  #ifdef CONFIG_SMP
  
-#define GET_CALLER(PC) __asm__ __volatile__("mov %%i7, %0" : "=r" (PC))
-
  static inline void show (char *str, spinlock_t *lock, unsigned long caller)
  {
         int cpu = smp_processor_id();
@@ -51,20 +49,19 @@ static inline void show_write (char *str, rwlock_t *lock, unsigned long caller)
  #undef INIT_STUCK
  #define INIT_STUCK 100000000
  
-void _do_spin_lock(spinlock_t *lock, char *str)
+void _do_spin_lock(spinlock_t *lock, char *str, unsigned long caller)
  {
-       unsigned long caller, val;
+       unsigned long val;
         int stuck = INIT_STUCK;
         int cpu = get_cpu();
         int shown = 0;
  
-       GET_CALLER(caller);
  again:
         __asm__ __volatile__("ldstub [%1], %0"
                              : "=r" (val)
                              : "r" (&(lock->lock))
                              : "memory");
-       membar("#StoreLoad | #StoreStore");
+       membar_storeload_storestore();
         if (val) {
                 while (lock->lock) {
                         if (!--stuck) {
@@ -72,7 +69,7 @@ again:
                                         show(str, lock, caller);
                                 stuck = INIT_STUCK;
                         }
-                       membar("#LoadLoad");
+                       rmb();
                 }
                 goto again;
         }
@@ -84,17 +81,16 @@ again:
         put_cpu();
  }
  
-int _do_spin_trylock(spinlock_t *lock)
+int _do_spin_trylock(spinlock_t *lock, unsigned long caller)
  {
-       unsigned long val, caller;
+       unsigned long val;
         int cpu = get_cpu();
  
-       GET_CALLER(caller);
         __asm__ __volatile__("ldstub [%1], %0"
                              : "=r" (val)
                              : "r" (&(lock->lock))
                              : "memory");
-       membar("#StoreLoad | #StoreStore");
+       membar_storeload_storestore();
         if (!val) {
                 lock->owner_pc = ((unsigned int)caller);
                 lock->owner_cpu = cpu;
@@ -111,21 +107,20 @@ void _do_spin_unlock(spinlock_t *lock)
  {
         lock->owner_pc = 0;
         lock->owner_cpu = NO_PROC_ID;
-       membar("#StoreStore | #LoadStore");
+       membar_storestore_loadstore();
         lock->lock = 0;
         current->thread.smp_lock_count--;
  }
  
  /* Keep INIT_STUCK the same... */
  
-void _do_read_lock (rwlock_t *rw, char *str)
+void _do_read_lock(rwlock_t *rw, char *str, unsigned long caller)
  {
-       unsigned long caller, val;
+       unsigned long val;
         int stuck = INIT_STUCK;
         int cpu = get_cpu();
         int shown = 0;
  
-       GET_CALLER(caller);
  wlock_again:
         /* Wait for any writer to go away.  */
         while (((long)(rw->lock)) < 0) {
@@ -134,7 +129,7 @@ wlock_again:
                                 show_read(str, rw, caller);
                         stuck = INIT_STUCK;
                 }
-               membar("#LoadLoad");
+               rmb();
         }
         /* Try once to increment the counter.  */
         __asm__ __volatile__(
@@ -147,7 +142,7 @@ wlock_again:
  "2:"   : "=r" (val)
         : "0" (&(rw->lock))
         : "g1", "g7", "memory");
-       membar("#StoreLoad | #StoreStore");
+       membar_storeload_storestore();
         if (val)
                 goto wlock_again;
         rw->reader_pc[cpu] = ((unsigned int)caller);
@@ -157,15 +152,13 @@ wlock_again:
         put_cpu();
  }
  
-void _do_read_unlock (rwlock_t *rw, char *str)
+void _do_read_unlock(rwlock_t *rw, char *str, unsigned long caller)
  {
-       unsigned long caller, val;
+       unsigned long val;
         int stuck = INIT_STUCK;
         int cpu = get_cpu();
         int shown = 0;
  
-       GET_CALLER(caller);
-
         /* Drop our identity _first_. */
         rw->reader_pc[cpu] = 0;
         current->thread.smp_lock_count--;
@@ -193,14 +186,13 @@ runlock_again:
         put_cpu();
  }
  
-void _do_write_lock (rwlock_t *rw, char *str)
+void _do_write_lock(rwlock_t *rw, char *str, unsigned long caller)
  {
-       unsigned long caller, val;
+       unsigned long val;
         int stuck = INIT_STUCK;
         int cpu = get_cpu();
         int shown = 0;
  
-       GET_CALLER(caller);
  wlock_again:
         /* Spin while there is another writer. */
         while (((long)rw->lock) < 0) {
@@ -209,7 +201,7 @@ wlock_again:
                                 show_write(str, rw, caller);
                         stuck = INIT_STUCK;
                 }
-               membar("#LoadLoad");
+               rmb();
         }
  
         /* Try to acuire the write bit.  */
@@ -264,7 +256,7 @@ wlock_again:
                                         show_write(str, rw, caller);
                                 stuck = INIT_STUCK;
                         }
-                       membar("#LoadLoad");
+                       rmb();
                 }
                 goto wlock_again;
         }
@@ -278,14 +270,12 @@ wlock_again:
         put_cpu();
  }
  
-void _do_write_unlock(rwlock_t *rw)
+void _do_write_unlock(rwlock_t *rw, unsigned long caller)
  {
-       unsigned long caller, val;
+       unsigned long val;
         int stuck = INIT_STUCK;
         int shown = 0;
  
-       GET_CALLER(caller);
-
         /* Drop our identity _first_ */
         rw->writer_pc = 0;
         rw->writer_cpu = NO_PROC_ID;
@@ -313,13 +303,11 @@ wlock_again:
         }
  }
  
-int _do_write_trylock (rwlock_t *rw, char *str)
+int _do_write_trylock(rwlock_t *rw, char *str, unsigned long caller)
  {
-       unsigned long caller, val;
+       unsigned long val;
         int cpu = get_cpu();
  
-       GET_CALLER(caller);
-
         /* Try to acuire the write bit.  */
         __asm__ __volatile__(
  "      mov     1, %%g3\n"
diff --git a/arch/sparc64/lib/mb.S b/arch/sparc64/lib/mb.S

new file mode 100644 (file)

index 0000000..4004f74
--- /dev/null
+++ b/arch/sparc64/lib/mb.S
@@ -0,0 +1,73 @@
+/* mb.S: Out of line memory barriers.
+ *
+ * Copyright (C) 2005 David S. Miller (davem@davemloft.net)
+ */
+
+       /* These are here in an effort to more fully work around
+        * Spitfire Errata #51.  Essentially, if a memory barrier
+        * occurs soon after a mispredicted branch, the chip can stop
+        * executing instructions until a trap occurs.  Therefore, if
+        * interrupts are disabled, the chip can hang forever.
+        *
+        * It used to be believed that the memory barrier had to be
+        * right in the delay slot, but a case has been traced
+        * recently wherein the memory barrier was one instruction
+        * after the branch delay slot and the chip still hung.  The
+        * offending sequence was the following in sym_wakeup_done()
+        * of the sym53c8xx_2 driver:
+        *
+        *      call    sym_ccb_from_dsa, 0
+        *       movge  %icc, 0, %l0
+        *      brz,pn  %o0, .LL1303
+        *       mov    %o0, %l2
+        *      membar  #LoadLoad
+        *
+        * The branch has to be mispredicted for the bug to occur.
+        * Therefore, we put the memory barrier explicitly into a
+        * "branch always, predicted taken" delay slot to avoid the
+        * problem case.
+        */
+
+       .text
+
+99:    retl
+        nop
+
+       .globl  mb
+mb:    ba,pt   %xcc, 99b
+        membar #LoadLoad | #LoadStore | #StoreStore | #StoreLoad
+       .size   mb, .-mb
+
+       .globl  rmb
+rmb:   ba,pt   %xcc, 99b
+        membar #LoadLoad
+       .size   rmb, .-rmb
+
+       .globl  wmb
+wmb:   ba,pt   %xcc, 99b
+        membar #StoreStore
+       .size   wmb, .-wmb
+
+       .globl  membar_storeload
+membar_storeload:
+       ba,pt   %xcc, 99b
+        membar #StoreLoad
+       .size   membar_storeload, .-membar_storeload
+
+       .globl  membar_storeload_storestore
+membar_storeload_storestore:
+       ba,pt   %xcc, 99b
+        membar #StoreLoad | #StoreStore
+       .size   membar_storeload_storestore, .-membar_storeload_storestore
+
+       .globl  membar_storeload_loadload
+membar_storeload_loadload:
+       ba,pt   %xcc, 99b
+        membar #StoreLoad | #LoadLoad
+       .size   membar_storeload_loadload, .-membar_storeload_loadload
+
+       .globl  membar_storestore_loadstore
+membar_storestore_loadstore:
+       ba,pt   %xcc, 99b
+        membar #StoreStore | #LoadStore
+       .size   membar_storestore_loadstore, .-membar_storestore_loadstore
diff --git a/arch/sparc64/solaris/misc.c b/arch/sparc64/solaris/misc.c

index 15b4cfe075572afbd1c9c2e19a28aef3e574ae70..302efbcba70e85208d1fa8ba7387da963b8f8181 100644 (file)
--- a/arch/sparc64/solaris/misc.c
+++ b/arch/sparc64/solaris/misc.c
@@ -737,7 +737,8 @@ MODULE_LICENSE("GPL");
  extern u32 tl0_solaris[8];
  #define update_ttable(x)                                                                               \
         tl0_solaris[3] = (((long)(x) - (long)tl0_solaris - 3) >> 2) | 0x40000000;                       \
-       __asm__ __volatile__ ("membar #StoreStore; flush %0" : : "r" (&tl0_solaris[3]))
+       wmb();          \
+       __asm__ __volatile__ ("flush %0" : : "r" (&tl0_solaris[3]))
  #else
  #endif 
  
@@ -761,7 +762,8 @@ int init_module(void)
         entry64_personality_patch |=
                 (offsetof(struct task_struct, personality) +
                  (sizeof(unsigned long) - 1));
-       __asm__ __volatile__("membar #StoreStore; flush %0"
+       wmb();
+       __asm__ __volatile__("flush %0"
                              : : "r" (&entry64_personality_patch));
         return 0;
  }
diff --git a/arch/sparc64/solaris/socket.c b/arch/sparc64/solaris/socket.c

index 06740582717e4427da136ed80b139b4aa40cbf05..d3a66ea74a7f1e75a9223c8a5812d6ac6898073e 100644 (file)
--- a/arch/sparc64/solaris/socket.c
+++ b/arch/sparc64/solaris/socket.c
@@ -16,6 +16,7 @@
  #include <linux/net.h>
  #include <linux/compat.h>
  #include <net/compat.h>
+#include <net/sock.h>
  
  #include <asm/uaccess.h>
  #include <asm/string.h>
@@ -297,121 +298,165 @@ asmlinkage int solaris_sendmsg(int fd, struct sol_nmsghdr __user *user_msg, unsi
  {
         struct socket *sock;
         char address[MAX_SOCK_ADDR];
-       struct iovec iov[UIO_FASTIOV];
+       struct iovec iovstack[UIO_FASTIOV], *iov = iovstack;
         unsigned char ctl[sizeof(struct cmsghdr) + 20];
         unsigned char *ctl_buf = ctl;
-       struct msghdr kern_msg;
-       int err, total_len;
+       struct msghdr msg_sys;
+       int err, ctl_len, iov_size, total_len;
  
-       if(msghdr_from_user32_to_kern(&kern_msg, user_msg))
-               return -EFAULT;
-       if(kern_msg.msg_iovlen > UIO_MAXIOV)
-               return -EINVAL;
-       err = verify_compat_iovec(&kern_msg, iov, address, VERIFY_READ);
-       if (err < 0)
+       err = -EFAULT;
+       if (msghdr_from_user32_to_kern(&msg_sys, user_msg))
+               goto out;
+
+       sock = sockfd_lookup(fd, &err);
+       if (!sock)
                 goto out;
+
+       /* do not move before msg_sys is valid */
+       err = -EMSGSIZE;
+       if (msg_sys.msg_iovlen > UIO_MAXIOV)
+               goto out_put;
+
+       /* Check whether to allocate the iovec area*/
+       err = -ENOMEM;
+       iov_size = msg_sys.msg_iovlen * sizeof(struct iovec);
+       if (msg_sys.msg_iovlen > UIO_FASTIOV) {
+               iov = sock_kmalloc(sock->sk, iov_size, GFP_KERNEL);
+               if (!iov)
+                       goto out_put;
+       }
+
+       err = verify_compat_iovec(&msg_sys, iov, address, VERIFY_READ);
+       if (err < 0)
+               goto out_freeiov;
         total_len = err;
  
-       if(kern_msg.msg_controllen) {
-               struct sol_cmsghdr __user *ucmsg = kern_msg.msg_control;
+       err = -ENOBUFS;
+       if (msg_sys.msg_controllen > INT_MAX)
+               goto out_freeiov;
+
+       ctl_len = msg_sys.msg_controllen;
+       if (ctl_len) {
+               struct sol_cmsghdr __user *ucmsg = msg_sys.msg_control;
                 unsigned long *kcmsg;
                 compat_size_t cmlen;
  
-               if (kern_msg.msg_controllen <= sizeof(compat_size_t))
-                       return -EINVAL;
+               err = -EINVAL;
+               if (ctl_len <= sizeof(compat_size_t))
+                       goto out_freeiov;
  
-               if(kern_msg.msg_controllen > sizeof(ctl)) {
+               if (ctl_len > sizeof(ctl)) {
                         err = -ENOBUFS;
-                       ctl_buf = kmalloc(kern_msg.msg_controllen, GFP_KERNEL);
-                       if(!ctl_buf)
+                       ctl_buf = kmalloc(ctl_len, GFP_KERNEL);
+                       if (!ctl_buf)
                                 goto out_freeiov;
                 }
                 __get_user(cmlen, &ucmsg->cmsg_len);
                 kcmsg = (unsigned long *) ctl_buf;
                 *kcmsg++ = (unsigned long)cmlen;
                 err = -EFAULT;
-               if(copy_from_user(kcmsg, &ucmsg->cmsg_level,
-                                 kern_msg.msg_controllen - sizeof(compat_size_t)))
+               if (copy_from_user(kcmsg, &ucmsg->cmsg_level,
+                                  ctl_len - sizeof(compat_size_t)))
                         goto out_freectl;
-               kern_msg.msg_control = ctl_buf;
+               msg_sys.msg_control = ctl_buf;
         }
-       kern_msg.msg_flags = solaris_to_linux_msgflags(user_flags);
+       msg_sys.msg_flags = solaris_to_linux_msgflags(user_flags);
  
-       lock_kernel();
-       sock = sockfd_lookup(fd, &err);
-       if (sock != NULL) {
-               if (sock->file->f_flags & O_NONBLOCK)
-                       kern_msg.msg_flags |= MSG_DONTWAIT;
-               err = sock_sendmsg(sock, &kern_msg, total_len);
-               sockfd_put(sock);
-       }
-       unlock_kernel();
+       if (sock->file->f_flags & O_NONBLOCK)
+               msg_sys.msg_flags |= MSG_DONTWAIT;
+       err = sock_sendmsg(sock, &msg_sys, total_len);
  
  out_freectl:
-       /* N.B. Use kfree here, as kern_msg.msg_controllen might change? */
-       if(ctl_buf != ctl)
-               kfree(ctl_buf);
+       if (ctl_buf != ctl)    
+               sock_kfree_s(sock->sk, ctl_buf, ctl_len);
  out_freeiov:
-       if(kern_msg.msg_iov != iov)
-               kfree(kern_msg.msg_iov);
-out:
+       if (iov != iovstack)
+               sock_kfree_s(sock->sk, iov, iov_size);
+out_put:
+       sockfd_put(sock);
+out:       
         return err;
  }
  
  asmlinkage int solaris_recvmsg(int fd, struct sol_nmsghdr __user *user_msg, unsigned int user_flags)
  {
-       struct iovec iovstack[UIO_FASTIOV];
-       struct msghdr kern_msg;
-       char addr[MAX_SOCK_ADDR];
         struct socket *sock;
+       struct iovec iovstack[UIO_FASTIOV];
         struct iovec *iov = iovstack;
+       struct msghdr msg_sys;
+       unsigned long cmsg_ptr;
+       int err, iov_size, total_len, len;
+
+       /* kernel mode address */
+       char addr[MAX_SOCK_ADDR];
+
+       /* user mode address pointers */
         struct sockaddr __user *uaddr;
         int __user *uaddr_len;
-       unsigned long cmsg_ptr;
-       int err, total_len, len = 0;
  
-       if(msghdr_from_user32_to_kern(&kern_msg, user_msg))
+       if (msghdr_from_user32_to_kern(&msg_sys, user_msg))
                 return -EFAULT;
-       if(kern_msg.msg_iovlen > UIO_MAXIOV)
-               return -EINVAL;
  
-       uaddr = kern_msg.msg_name;
+       sock = sockfd_lookup(fd, &err);
+       if (!sock)
+               goto out;
+
+       err = -EMSGSIZE;
+       if (msg_sys.msg_iovlen > UIO_MAXIOV)
+               goto out_put;
+
+       /* Check whether to allocate the iovec area*/
+       err = -ENOMEM;
+       iov_size = msg_sys.msg_iovlen * sizeof(struct iovec);
+       if (msg_sys.msg_iovlen > UIO_FASTIOV) {
+               iov = sock_kmalloc(sock->sk, iov_size, GFP_KERNEL);
+               if (!iov)
+                       goto out_put;
+       }
+
+       /*
+        *      Save the user-mode address (verify_iovec will change the
+        *      kernel msghdr to use the kernel address space)
+        */
+        
+       uaddr = (void __user *) msg_sys.msg_name;
         uaddr_len = &user_msg->msg_namelen;
-       err = verify_compat_iovec(&kern_msg, iov, addr, VERIFY_WRITE);
+       err = verify_compat_iovec(&msg_sys, iov, addr, VERIFY_WRITE);
         if (err < 0)
-               goto out;
+               goto out_freeiov;
         total_len = err;
  
-       cmsg_ptr = (unsigned long) kern_msg.msg_control;
-       kern_msg.msg_flags = 0;
+       cmsg_ptr = (unsigned long) msg_sys.msg_control;
+       msg_sys.msg_flags = MSG_CMSG_COMPAT;
  
-       lock_kernel();
-       sock = sockfd_lookup(fd, &err);
-       if (sock != NULL) {
-               if (sock->file->f_flags & O_NONBLOCK)
-                       user_flags |= MSG_DONTWAIT;
-               err = sock_recvmsg(sock, &kern_msg, total_len, user_flags);
-               if(err >= 0)
-                       len = err;
-               sockfd_put(sock);
-       }
-       unlock_kernel();
-
-       if(uaddr != NULL && err >= 0)
-               err = move_addr_to_user(addr, kern_msg.msg_namelen, uaddr, uaddr_len);
-       if(err >= 0) {
-               err = __put_user(linux_to_solaris_msgflags(kern_msg.msg_flags), &user_msg->msg_flags);
-               if(!err) {
-                       /* XXX Convert cmsg back into userspace 32-bit format... */
-                       err = __put_user((unsigned long)kern_msg.msg_control - cmsg_ptr,
-                                        &user_msg->msg_controllen);
-               }
+       if (sock->file->f_flags & O_NONBLOCK)
+               user_flags |= MSG_DONTWAIT;
+
+       err = sock_recvmsg(sock, &msg_sys, total_len, user_flags);
+       if(err < 0)
+               goto out_freeiov;
+
+       len = err;
+
+       if (uaddr != NULL) {
+               err = move_addr_to_user(addr, msg_sys.msg_namelen, uaddr, uaddr_len);
+               if (err < 0)
+                       goto out_freeiov;
         }
+       err = __put_user(linux_to_solaris_msgflags(msg_sys.msg_flags), &user_msg->msg_flags);
+       if (err)
+               goto out_freeiov;
+       err = __put_user((unsigned long)msg_sys.msg_control - cmsg_ptr,
+                        &user_msg->msg_controllen);
+       if (err)
+               goto out_freeiov;
+       err = len;
  
-       if(kern_msg.msg_iov != iov)
-               kfree(kern_msg.msg_iov);
+out_freeiov:
+       if (iov != iovstack)
+               sock_kfree_s(sock->sk, iov, iov_size);
+out_put:
+       sockfd_put(sock);
  out:
-       if(err < 0)
-               return err;
-       return len;
+       return err;
  }
diff --git a/arch/um/drivers/mmapper_kern.c b/arch/um/drivers/mmapper_kern.c

index a37a5ac13c22b8fb709d0ced7ec5db0a2629ce63..022f67bb687364f2934b388fca57f703f53c2bfe 100644 (file)
--- a/arch/um/drivers/mmapper_kern.c
+++ b/arch/um/drivers/mmapper_kern.c
@@ -9,19 +9,11 @@
   *
   */
  
-#include <linux/types.h>
-#include <linux/kdev_t.h>
-#include <linux/time.h>
-#include <linux/devfs_fs_kernel.h>
+#include <linux/init.h> 
  #include <linux/module.h>
  #include <linux/mm.h> 
-#include <linux/slab.h>
-#include <linux/init.h> 
-#include <linux/smp_lock.h>
  #include <linux/miscdevice.h>
  #include <asm/uaccess.h>
-#include <asm/irq.h>
-#include <asm/pgtable.h>
  #include "mem_user.h"
  #include "user_util.h"
   
@@ -31,35 +23,22 @@ static unsigned long p_buf = 0;
  static char *v_buf = NULL;
  
  static ssize_t
-mmapper_read(struct file *file, char *buf, size_t count, loff_t *ppos)
+mmapper_read(struct file *file, char __user *buf, size_t count, loff_t *ppos)
  {
-       if(*ppos > mmapper_size)
-               return -EINVAL;
-
-       if(count + *ppos > mmapper_size)
-               count = count + *ppos - mmapper_size;
-
-       if(count < 0)
-               return -EINVAL;
- 
-       copy_to_user(buf,&v_buf[*ppos],count);
-       
-       return count;
+       return simple_read_from_buffer(buf, count, ppos, v_buf, mmapper_size);
  }
  
  static ssize_t
-mmapper_write(struct file *file, const char *buf, size_t count, loff_t *ppos)
+mmapper_write(struct file *file, const char __user *buf, size_t count, loff_t *ppos)
  {
-       if(*ppos > mmapper_size)
+       if (*ppos > mmapper_size)
                 return -EINVAL;
  
-       if(count + *ppos > mmapper_size)
-               count = count + *ppos - mmapper_size;
-
-       if(count < 0)
-               return -EINVAL;
+       if (count > mmapper_size - *ppos)
+               count = mmapper_size - *ppos;
  
-       copy_from_user(&v_buf[*ppos],buf,count);
+       if (copy_from_user(&v_buf[*ppos], buf, count))
+               return -EFAULT;
         
         return count;
  }
@@ -77,7 +56,6 @@ mmapper_mmap(struct file *file, struct vm_area_struct * vma)
         int ret = -EINVAL;
         int size;
  
-       lock_kernel();
         if (vma->vm_pgoff != 0)
                 goto out;
         
@@ -92,7 +70,6 @@ mmapper_mmap(struct file *file, struct vm_area_struct * vma)
                 goto out;
         ret = 0;
  out:
-       unlock_kernel();
         return ret;
  }
  
diff --git a/arch/um/kernel/signal_kern.c b/arch/um/kernel/signal_kern.c

index 7807a3e8c426e362233c797a563f89523c2b7439..03618bd13d55f1521fb59a9b29224dd7fd0fd9f3 100644 (file)
--- a/arch/um/kernel/signal_kern.c
+++ b/arch/um/kernel/signal_kern.c
@@ -87,12 +87,12 @@ static int handle_signal(struct pt_regs *regs, unsigned long signr,
                 recalc_sigpending();
                 spin_unlock_irq(&current->sighand->siglock);
                 force_sigsegv(signr, current);
-       }
-       else if(!(ka->sa.sa_flags & SA_NODEFER)){
+       } else {
                 spin_lock_irq(&current->sighand->siglock);
                 sigorsets(&current->blocked, &current->blocked, 
                           &ka->sa.sa_mask);
-               sigaddset(&current->blocked, signr);
+                if(!(ka->sa.sa_flags & SA_NODEFER))
+                       sigaddset(&current->blocked, signr);
                 recalc_sigpending();
                 spin_unlock_irq(&current->sighand->siglock);
         }
diff --git a/arch/um/kernel/skas/process.c b/arch/um/kernel/skas/process.c

index 6dd9e5bf18ed7e92f38bb89a08ceafca56d8b757..f228f8b54194f3216b5382e83fea567af1471a2c 100644 (file)
--- a/arch/um/kernel/skas/process.c
+++ b/arch/um/kernel/skas/process.c
@@ -61,7 +61,11 @@ void wait_stub_done(int pid, int sig, char * fname)
  
                  CATCH_EINTR(n = waitpid(pid, &status, WUNTRACED));
          } while((n >= 0) && WIFSTOPPED(status) &&
-                (WSTOPSIG(status) == SIGVTALRM));
+                ((WSTOPSIG(status) == SIGVTALRM) ||
+                /* running UML inside a detached screen can cause
+                 * SIGWINCHes
+                 */
+                (WSTOPSIG(status) == SIGWINCH)));
  
          if((n < 0) || !WIFSTOPPED(status) ||
             (WSTOPSIG(status) != SIGUSR1 && WSTOPSIG(status) != SIGTRAP)){
diff --git a/arch/um/os-Linux/elf_aux.c b/arch/um/os-Linux/elf_aux.c

index 9416e1c299269d6f988c9f75f8e6f8ae94e41575..4cca3e9c23fe065c3fb2c5a84464d7cd7ab44178 100644 (file)
--- a/arch/um/os-Linux/elf_aux.c
+++ b/arch/um/os-Linux/elf_aux.c
@@ -9,7 +9,6 @@
   */
  #include <elf.h>
  #include <stddef.h>
-#include <asm/elf.h>
  #include "init.h"
  #include "elf_user.h"
  #include "mem_user.h"
diff --git a/arch/v850/kernel/signal.c b/arch/v850/kernel/signal.c

index 37061e32e1a42d3878a8108ebb340af11ab066e2..633e4e1b825f875daa32f2759ab3f6b94d555209 100644 (file)
--- a/arch/v850/kernel/signal.c
+++ b/arch/v850/kernel/signal.c
@@ -462,13 +462,12 @@ handle_signal(unsigned long sig, siginfo_t *info, struct k_sigaction *ka,
         else
                 setup_frame(sig, ka, oldset, regs);
  
-       if (!(ka->sa.sa_flags & SA_NODEFER)) {
-               spin_lock_irq(&current->sighand->siglock);
-               sigorsets(&current->blocked,&current->blocked,&ka->sa.sa_mask);
+       spin_lock_irq(&current->sighand->siglock);
+       sigorsets(&current->blocked,&current->blocked,&ka->sa.sa_mask);
+       if (!(ka->sa.sa_flags & SA_NODEFER))
                 sigaddset(&current->blocked,sig);
-               recalc_sigpending();
-               spin_unlock_irq(&current->sighand->siglock);
-       }
+       recalc_sigpending();
+       spin_unlock_irq(&current->sighand->siglock);
  }
  
  /*
diff --git a/arch/x86_64/crypto/aes.c b/arch/x86_64/crypto/aes.c

index 2b5c4010ce38715737a549e0bf986a9cb8431292..acfdaa28791ed049abbdec7cecd02fcd6ffdd66b 100644 (file)
--- a/arch/x86_64/crypto/aes.c
+++ b/arch/x86_64/crypto/aes.c
@@ -322,3 +322,4 @@ module_exit(aes_fini);
  
  MODULE_DESCRIPTION("Rijndael (AES) Cipher Algorithm");
  MODULE_LICENSE("GPL");
+MODULE_ALIAS("aes");
diff --git a/arch/x86_64/defconfig b/arch/x86_64/defconfig

index 776f3c866b7092d30dfa515f61deea41ff0cef04..b97a61e1c71ccd0f73a964d3db5cecb647c58a49 100644 (file)
--- a/arch/x86_64/defconfig
+++ b/arch/x86_64/defconfig
@@ -1,7 +1,7 @@
  #
  # Automatically generated make config: don't edit
-# Linux kernel version: 2.6.13-rc3
-# Fri Jul 22 16:47:31 2005
+# Linux kernel version: 2.6.13-rc6-git3
+# Fri Aug 12 16:40:34 2005
  #
  CONFIG_X86_64=y
  CONFIG_64BIT=y
@@ -284,10 +284,6 @@ CONFIG_IPV6=y
  # Network testing
  #
  # CONFIG_NET_PKTGEN is not set
-CONFIG_NETPOLL=y
-# CONFIG_NETPOLL_RX is not set
-# CONFIG_NETPOLL_TRAP is not set
-CONFIG_NET_POLL_CONTROLLER=y
  # CONFIG_HAMRADIO is not set
  # CONFIG_IRDA is not set
  # CONFIG_BT is not set
@@ -463,6 +459,7 @@ CONFIG_AIC79XX_DEBUG_MASK=0
  # CONFIG_MEGARAID_NEWGEN is not set
  # CONFIG_MEGARAID_LEGACY is not set
  CONFIG_SCSI_SATA=y
+# CONFIG_SCSI_SATA_AHCI is not set
  # CONFIG_SCSI_SATA_SVW is not set
  CONFIG_SCSI_ATA_PIIX=y
  # CONFIG_SCSI_SATA_NV is not set
@@ -492,6 +489,7 @@ CONFIG_SCSI_QLA2XXX=y
  # CONFIG_SCSI_QLA2300 is not set
  # CONFIG_SCSI_QLA2322 is not set
  # CONFIG_SCSI_QLA6312 is not set
+# CONFIG_SCSI_QLA24XX is not set
  # CONFIG_SCSI_LPFC is not set
  # CONFIG_SCSI_DC395x is not set
  # CONFIG_SCSI_DC390T is not set
@@ -512,9 +510,11 @@ CONFIG_BLK_DEV_DM=y
  #
  # Fusion MPT device support
  #
-# CONFIG_FUSION is not set
-# CONFIG_FUSION_SPI is not set
+CONFIG_FUSION=y
+CONFIG_FUSION_SPI=y
  # CONFIG_FUSION_FC is not set
+CONFIG_FUSION_MAX_SGE=128
+# CONFIG_FUSION_CTL is not set
  
  #
  # IEEE 1394 (FireWire) support
@@ -585,6 +585,7 @@ CONFIG_8139TOO=y
  # CONFIG_ACENIC is not set
  # CONFIG_DL2K is not set
  CONFIG_E1000=y
+# CONFIG_E1000_NAPI is not set
  # CONFIG_NS83820 is not set
  # CONFIG_HAMACHI is not set
  # CONFIG_YELLOWFIN is not set
@@ -624,6 +625,10 @@ CONFIG_S2IO=m
  # CONFIG_NET_FC is not set
  # CONFIG_SHAPER is not set
  CONFIG_NETCONSOLE=y
+CONFIG_NETPOLL=y
+# CONFIG_NETPOLL_RX is not set
+# CONFIG_NETPOLL_TRAP is not set
+CONFIG_NET_POLL_CONTROLLER=y
  
  #
  # ISDN subsystem
diff --git a/arch/x86_64/ia32/ptrace32.c b/arch/x86_64/ia32/ptrace32.c

index b98b6d2462f6b43fb8b6ecd7f07a50c6b00cb277..2a925e2af390bddc1ac34ba37882f8138703de1a 100644 (file)
--- a/arch/x86_64/ia32/ptrace32.c
+++ b/arch/x86_64/ia32/ptrace32.c
@@ -43,11 +43,11 @@ static int putreg32(struct task_struct *child, unsigned regno, u32 val)
         switch (regno) {
         case offsetof(struct user32, regs.fs):
                 if (val && (val & 3) != 3) return -EIO; 
-               child->thread.fs = val & 0xffff; 
+               child->thread.fsindex = val & 0xffff;
                 break;
         case offsetof(struct user32, regs.gs):
                 if (val && (val & 3) != 3) return -EIO; 
-               child->thread.gs = val & 0xffff;
+               child->thread.gsindex = val & 0xffff;
                 break;
         case offsetof(struct user32, regs.ds):
                 if (val && (val & 3) != 3) return -EIO; 
@@ -138,10 +138,10 @@ static int getreg32(struct task_struct *child, unsigned regno, u32 *val)
  
         switch (regno) {
         case offsetof(struct user32, regs.fs):
-               *val = child->thread.fs; 
+               *val = child->thread.fsindex;
                 break;
         case offsetof(struct user32, regs.gs):
-               *val = child->thread.gs;
+               *val = child->thread.gsindex;
                 break;
         case offsetof(struct user32, regs.ds):
                 *val = child->thread.ds;
diff --git a/arch/x86_64/kernel/e820.c b/arch/x86_64/kernel/e820.c

index 6ded3a50dfe63682174470acc3e78e2cfdbfd8b5..b548dea4e5b95e30c01f369f0c95dcd06db67551 100644 (file)
--- a/arch/x86_64/kernel/e820.c
+++ b/arch/x86_64/kernel/e820.c
@@ -185,6 +185,40 @@ unsigned long __init e820_end_of_ram(void)
  }
  
  /* 
+ * Compute how much memory is missing in a range.
+ * Unlike the other functions in this file the arguments are in page numbers.
+ */
+unsigned long __init
+e820_hole_size(unsigned long start_pfn, unsigned long end_pfn)
+{
+       unsigned long ram = 0;
+       unsigned long start = start_pfn << PAGE_SHIFT;
+       unsigned long end = end_pfn << PAGE_SHIFT;
+       int i;
+       for (i = 0; i < e820.nr_map; i++) {
+               struct e820entry *ei = &e820.map[i];
+               unsigned long last, addr;
+
+               if (ei->type != E820_RAM ||
+                   ei->addr+ei->size <= start ||
+                   ei->addr >= end)
+                       continue;
+
+               addr = round_up(ei->addr, PAGE_SIZE);
+               if (addr < start)
+                       addr = start;
+
+               last = round_down(ei->addr + ei->size, PAGE_SIZE);
+               if (last >= end)
+                       last = end;
+
+               if (last > addr)
+                       ram += last - addr;
+       }
+       return ((end - start) - ram) >> PAGE_SHIFT;
+}
+
+/*
   * Mark e820 reserved areas as busy for the resource manager.
   */
  void __init e820_reserve_resources(void)
diff --git a/arch/x86_64/kernel/mce.c b/arch/x86_64/kernel/mce.c

index 3b267c91bb0c711bde04a1efd619ad3da8c2b6eb..8aa56736cde3805fe8b38b84733e176914ae14be 100644 (file)
--- a/arch/x86_64/kernel/mce.c
+++ b/arch/x86_64/kernel/mce.c
@@ -36,6 +36,7 @@ static unsigned long bank[NR_BANKS] = { [0 ... NR_BANKS-1] = ~0UL };
  static unsigned long console_logged;
  static int notify_user;
  static int rip_msr;
+static int mce_bootlog;
  
  /*
   * Lockless MCE logging infrastructure.
@@ -197,10 +198,11 @@ void do_machine_check(struct pt_regs * regs, long error_code)
                         rdmsrl(MSR_IA32_MC0_ADDR + i*4, m.addr);
  
                 mce_get_rip(&m, regs);
-               if (error_code != -1)
+               if (error_code >= 0)
                         rdtscll(m.tsc);
                 wrmsrl(MSR_IA32_MC0_STATUS + i*4, 0);
-               mce_log(&m);
+               if (error_code != -2)
+                       mce_log(&m);
  
                 /* Did this bank cause the exception? */
                 /* Assume that the bank with uncorrectable errors did it,
@@ -315,7 +317,7 @@ static void mce_init(void *dummy)
  
         /* Log the machine checks left over from the previous reset.
            This also clears all registers */
-       do_machine_check(NULL, -1);
+       do_machine_check(NULL, mce_bootlog ? -1 : -2);
  
         set_in_cr4(X86_CR4_MCE);
  
@@ -476,11 +478,17 @@ static int __init mcheck_disable(char *str)
  }
  
  /* mce=off disables machine check. Note you can reenable it later
-   using sysfs */
+   using sysfs.
+   mce=bootlog Log MCEs from before booting. Disabled by default to work
+   around buggy BIOS that leave bogus MCEs.  */
  static int __init mcheck_enable(char *str)
  {
+       if (*str == '=')
+               str++;
         if (!strcmp(str, "off"))
                 mce_dont_init = 1;
+       else if (!strcmp(str, "bootlog"))
+               mce_bootlog = 1;
         else
                 printk("mce= argument %s ignored. Please use /sys", str); 
         return 0;
diff --git a/arch/x86_64/kernel/setup.c b/arch/x86_64/kernel/setup.c

index 0aa526298e92948cb5bd909304ff635e575f2360..116a491e2961bbca273d7319559e285be6e2aed0 100644 (file)
--- a/arch/x86_64/kernel/setup.c
+++ b/arch/x86_64/kernel/setup.c
@@ -645,15 +645,15 @@ void __init setup_arch(char **cmdline_p)
                 }
         }
  #endif
-
-       sparse_init();
-
  #ifdef CONFIG_KEXEC
         if (crashk_res.start != crashk_res.end) {
                 reserve_bootmem(crashk_res.start,
                         crashk_res.end - crashk_res.start + 1);
         }
  #endif
+
+       sparse_init();
+
         paging_init();
  
         check_ioapic();
diff --git a/arch/x86_64/kernel/signal.c b/arch/x86_64/kernel/signal.c

index 98590a989f3d759311935f6142191761bd42c376..d642fbf3da29d7823cd2a22f4329858aad7d76f3 100644 (file)
--- a/arch/x86_64/kernel/signal.c
+++ b/arch/x86_64/kernel/signal.c
@@ -394,10 +394,11 @@ handle_signal(unsigned long sig, siginfo_t *info, struct k_sigaction *ka,
  #endif
         ret = setup_rt_frame(sig, ka, info, oldset, regs);
  
-       if (ret && !(ka->sa.sa_flags & SA_NODEFER)) {
+       if (ret) {
                 spin_lock_irq(&current->sighand->siglock);
                 sigorsets(&current->blocked,&current->blocked,&ka->sa.sa_mask);
-               sigaddset(&current->blocked,sig);
+               if (!(ka->sa.sa_flags & SA_NODEFER))
+                       sigaddset(&current->blocked,sig);
                 recalc_sigpending();
                 spin_unlock_irq(&current->sighand->siglock);
         }
diff --git a/arch/x86_64/kernel/smpboot.c b/arch/x86_64/kernel/smpboot.c

index 6e4807d64d4618180f100befb9854299d742f0c4..fa25e39fe54dfc4aab4390c95b45039da0202558 100644 (file)
--- a/arch/x86_64/kernel/smpboot.c
+++ b/arch/x86_64/kernel/smpboot.c
@@ -334,7 +334,7 @@ static void __cpuinit tsc_sync_wait(void)
  {
         if (notscsync || !cpu_has_tsc)
                 return;
-       sync_tsc(boot_cpu_id);
+       sync_tsc(0);
  }
  
  static __init int notscsync_setup(char *s)
@@ -492,6 +492,14 @@ void __cpuinit start_secondary(void)
          */
         set_cpu_sibling_map(smp_processor_id());
  
+       /* 
+        * Wait for TSC sync to not schedule things before.
+        * We still process interrupts, which could see an inconsistent
+        * time in that window unfortunately. 
+        * Do this here because TSC sync has global unprotected state.
+        */
+       tsc_sync_wait();
+
         /*
          * We need to hold call_lock, so there is no inconsistency
          * between the time smp_call_function() determines number of
@@ -509,13 +517,6 @@ void __cpuinit start_secondary(void)
         per_cpu(cpu_state, smp_processor_id()) = CPU_ONLINE;
         unlock_ipi_call_lock();
  
-       mb();
-
-       /* Wait for TSC sync to not schedule things before.
-          We still process interrupts, which could see an inconsistent
-          time in that window unfortunately. */
-       tsc_sync_wait();
-
         cpu_idle();
  }
  
diff --git a/arch/x86_64/mm/fault.c b/arch/x86_64/mm/fault.c

index 13792721037e1692670732b49d7c92b6d39b68fa..ca914c3bd49cf9ee3042a06426facc57516830fe 100644 (file)
--- a/arch/x86_64/mm/fault.c
+++ b/arch/x86_64/mm/fault.c
@@ -211,9 +211,7 @@ int unhandled_signal(struct task_struct *tsk, int sig)
  {
         if (tsk->pid == 1)
                 return 1;
-       /* Warn for strace, but not for gdb */
-       if (!test_ti_thread_flag(tsk->thread_info, TIF_SYSCALL_TRACE) &&
-           (tsk->ptrace & PT_PTRACED))
+       if (tsk->ptrace & PT_PTRACED)
                 return 0;
         return (tsk->sighand->action[sig-1].sa.sa_handler == SIG_IGN) ||
                 (tsk->sighand->action[sig-1].sa.sa_handler == SIG_DFL);
@@ -439,13 +437,13 @@ good_area:
          * the fault.
          */
         switch (handle_mm_fault(mm, vma, address, write)) {
-       case 1:
+       case VM_FAULT_MINOR:
                 tsk->min_flt++;
                 break;
-       case 2:
+       case VM_FAULT_MAJOR:
                 tsk->maj_flt++;
                 break;
-       case 0:
+       case VM_FAULT_SIGBUS:
                 goto do_sigbus;
         default:
                 goto out_of_memory;
diff --git a/arch/x86_64/mm/init.c b/arch/x86_64/mm/init.c

index 72e4b364ed739bd25a4c31800664445801d8cc3b..aa4a5189ecee2640503d053bf6c79b233aa99f99 100644 (file)
--- a/arch/x86_64/mm/init.c
+++ b/arch/x86_64/mm/init.c
@@ -322,18 +322,26 @@ void zap_low_mappings(void)
  void __init paging_init(void)
  {
         {
-               unsigned long zones_size[MAX_NR_ZONES] = {0, 0, 0};
+               unsigned long zones_size[MAX_NR_ZONES];
+               unsigned long holes[MAX_NR_ZONES];
                 unsigned int max_dma;
  
+               memset(zones_size, 0, sizeof(zones_size));
+               memset(holes, 0, sizeof(holes));
+
                 max_dma = virt_to_phys((char *)MAX_DMA_ADDRESS) >> PAGE_SHIFT;
  
-               if (end_pfn < max_dma)
+               if (end_pfn < max_dma) {
                         zones_size[ZONE_DMA] = end_pfn;
-               else {
+                       holes[ZONE_DMA] = e820_hole_size(0, end_pfn);
+               } else {
                         zones_size[ZONE_DMA] = max_dma;
+                       holes[ZONE_DMA] = e820_hole_size(0, max_dma);
                         zones_size[ZONE_NORMAL] = end_pfn - max_dma;
+                       holes[ZONE_NORMAL] = e820_hole_size(max_dma, end_pfn);
                 }
-               free_area_init(zones_size);
+               free_area_init_node(0, NODE_DATA(0), zones_size,
+                        __pa(PAGE_OFFSET) >> PAGE_SHIFT, holes);
         }
         return;
  }
diff --git a/arch/x86_64/mm/numa.c b/arch/x86_64/mm/numa.c

index 70cb2904a90f9dd8c9310bb368976bc67124d784..6a156f5692ae78a520a4dc0694a78f70b4a6c71a 100644 (file)
--- a/arch/x86_64/mm/numa.c
+++ b/arch/x86_64/mm/numa.c
@@ -126,9 +126,11 @@ void __init setup_node_zones(int nodeid)
  { 
         unsigned long start_pfn, end_pfn; 
         unsigned long zones[MAX_NR_ZONES];
+       unsigned long holes[MAX_NR_ZONES];
         unsigned long dma_end_pfn;
  
         memset(zones, 0, sizeof(unsigned long) * MAX_NR_ZONES); 
+       memset(holes, 0, sizeof(unsigned long) * MAX_NR_ZONES);
  
         start_pfn = node_start_pfn(nodeid);
         end_pfn = node_end_pfn(nodeid);
@@ -139,13 +141,17 @@ void __init setup_node_zones(int nodeid)
         dma_end_pfn = __pa(MAX_DMA_ADDRESS) >> PAGE_SHIFT; 
         if (start_pfn < dma_end_pfn) { 
                 zones[ZONE_DMA] = dma_end_pfn - start_pfn;
+               holes[ZONE_DMA] = e820_hole_size(start_pfn, dma_end_pfn);
                 zones[ZONE_NORMAL] = end_pfn - dma_end_pfn; 
+               holes[ZONE_NORMAL] = e820_hole_size(dma_end_pfn, end_pfn);
+
         } else { 
                 zones[ZONE_NORMAL] = end_pfn - start_pfn; 
+               holes[ZONE_NORMAL] = e820_hole_size(start_pfn, end_pfn);
         } 
      
         free_area_init_node(nodeid, NODE_DATA(nodeid), zones,
-                           start_pfn, NULL); 
+                           start_pfn, holes);
  } 
  
  void __init numa_init_array(void)
diff --git a/arch/x86_64/pci/k8-bus.c b/arch/x86_64/pci/k8-bus.c

index c2c38b579939432627a006cb8b2307ca6b9cd6c1..d80c323669e0c5e741a33f8c5700b2c8b8784007 100644 (file)
--- a/arch/x86_64/pci/k8-bus.c
+++ b/arch/x86_64/pci/k8-bus.c
@@ -47,13 +47,22 @@ fill_mp_bus_to_cpumask(void)
                          * if there are no busses hanging off of the current
                          * ldt link then both the secondary and subordinate
                          * bus number fields are set to 0.
+                        * 
+                        * RED-PEN
+                        * This is slightly broken because it assumes
+                        * HT node IDs == Linux node ids, which is not always
+                        * true. However it is probably mostly true.
                          */
                         if (!(SECONDARY_LDT_BUS_NUMBER(ldtbus) == 0
                                 && SUBORDINATE_LDT_BUS_NUMBER(ldtbus) == 0)) {
                                 for (j = SECONDARY_LDT_BUS_NUMBER(ldtbus);
                                      j <= SUBORDINATE_LDT_BUS_NUMBER(ldtbus);
-                                    j++)
-                                       pci_bus_to_node[j] = NODE_ID(nid);
+                                    j++) { 
+                                       int node = NODE_ID(nid);
+                                       if (!node_online(node))
+                                               node = 0;
+                                       pci_bus_to_node[j] = node;
+                               }               
                         }
                 }
         }
diff --git a/arch/xtensa/kernel/signal.c b/arch/xtensa/kernel/signal.c

index df6e1e17b096ddc6093978f8c313c91dc7e1c5d2..dc42cede939462986a0acba34e756ee0614d4e3a 100644 (file)
--- a/arch/xtensa/kernel/signal.c
+++ b/arch/xtensa/kernel/signal.c
@@ -702,12 +702,11 @@ int do_signal(struct pt_regs *regs, sigset_t *oldset)
         if (ka.sa.sa_flags & SA_ONESHOT)
                 ka.sa.sa_handler = SIG_DFL;
  
-       if (!(ka.sa.sa_flags & SA_NODEFER)) {
-               spin_lock_irq(&current->sighand->siglock);
-               sigorsets(&current->blocked, &current->blocked, &ka.sa.sa_mask);
+       spin_lock_irq(&current->sighand->siglock);
+       sigorsets(&current->blocked, &current->blocked, &ka.sa.sa_mask);
+       if (!(ka.sa.sa_flags & SA_NODEFER))
                 sigaddset(&current->blocked, signr);
-               recalc_sigpending();
-               spin_unlock_irq(&current->sighand->siglock);
-       }
+       recalc_sigpending();
+       spin_unlock_irq(&current->sighand->siglock);
         return 1;
  }
diff --git a/drivers/Kconfig b/drivers/Kconfig

index cecab0acc3fe6baf22311c574abdc96ede313b74..46d655fab1159c54768d08d226ac3bc36a393b2a 100644 (file)
--- a/drivers/Kconfig
+++ b/drivers/Kconfig
@@ -48,6 +48,8 @@ source "drivers/hwmon/Kconfig"
  
  source "drivers/misc/Kconfig"
  
+source "drivers/mfd/Kconfig"
+
  source "drivers/media/Kconfig"
  
  source "drivers/video/Kconfig"
diff --git a/drivers/Makefile b/drivers/Makefile

index 126a851d56538f8c336da54d3091d0491f470094..9663132ed82547d3584a280b19c14a26c0895a0a 100644 (file)
--- a/drivers/Makefile
+++ b/drivers/Makefile
@@ -26,7 +26,7 @@ obj-$(CONFIG_FB_INTEL)          += video/intelfb/
  obj-$(CONFIG_SERIO)            += input/serio/
  obj-y                          += serial/
  obj-$(CONFIG_PARPORT)          += parport/
-obj-y                          += base/ block/ misc/ net/ media/
+obj-y                          += base/ block/ misc/ mfd/ net/ media/
  obj-$(CONFIG_NUBUS)            += nubus/
  obj-$(CONFIG_ATM)              += atm/
  obj-$(CONFIG_PPC_PMAC)         += macintosh/
diff --git a/drivers/acorn/block/fd1772.c b/drivers/acorn/block/fd1772.c

index 3cd2e968e96c31dd24477343470834091f29ee5d..c0a37d98b4f3d3c3c485de77ac2391295089852b 100644 (file)
--- a/drivers/acorn/block/fd1772.c
+++ b/drivers/acorn/block/fd1772.c
@@ -1283,8 +1283,7 @@ static void do_fd_request(request_queue_t* q)
         if (fdc_busy) return;
         save_flags(flags);
         cli();
-       while (fdc_busy)
-               sleep_on(&fdc_wait);
+       wait_event(fdc_wait, !fdc_busy);
         fdc_busy = 1;
         ENABLE_IRQ();
         restore_flags(flags);
diff --git a/drivers/acpi/Kconfig b/drivers/acpi/Kconfig

index 986410e7b48385301a9675a2c1a70064137138a8..ba13896cae407732b29342116790613fe9a5d083 100644 (file)
--- a/drivers/acpi/Kconfig
+++ b/drivers/acpi/Kconfig
@@ -133,9 +133,10 @@ config ACPI_HOTKEY
         depends on ACPI_INTERPRETER
         depends on EXPERIMENTAL
         depends on !IA64_SGI_SN
-       default m
+       default n
         help
-       ACPI generic hotkey
+         Experimental consolidated hotkey driver.
+         If you are unsure, say N.
  
  config ACPI_FAN
         tristate "Fan"
diff --git a/drivers/acpi/button.c b/drivers/acpi/button.c

index 0f45d45f05a049490d533b31d008dd4dfebe75b1..8162fd0c21a79fed1c11da240d63025556595c63 100644 (file)
--- a/drivers/acpi/button.c
+++ b/drivers/acpi/button.c
@@ -26,6 +26,9 @@
  #include <linux/kernel.h>
  #include <linux/module.h>
  #include <linux/init.h>
+#include <linux/types.h>
+#include <linux/proc_fs.h>
+#include <linux/seq_file.h>
  #include <acpi/acpi_bus.h>
  #include <acpi/acpi_drivers.h>
  
@@ -33,6 +36,9 @@
  #define ACPI_BUTTON_COMPONENT          0x00080000
  #define ACPI_BUTTON_DRIVER_NAME                "ACPI Button Driver"
  #define ACPI_BUTTON_CLASS              "button"
+#define ACPI_BUTTON_FILE_INFO          "info"
+#define ACPI_BUTTON_FILE_STATE         "state"
+#define ACPI_BUTTON_TYPE_UNKNOWN       0x00
  #define ACPI_BUTTON_NOTIFY_STATUS      0x80
  
  #define ACPI_BUTTON_SUBCLASS_POWER     "power"
@@ -64,6 +70,8 @@ MODULE_LICENSE("GPL");
  
  static int acpi_button_add (struct acpi_device *device);
  static int acpi_button_remove (struct acpi_device *device, int type);
+static int acpi_button_info_open_fs(struct inode *inode, struct file *file);
+static int acpi_button_state_open_fs(struct inode *inode, struct file *file);
  
  static struct acpi_driver acpi_button_driver = {
         .name =         ACPI_BUTTON_DRIVER_NAME,
@@ -82,6 +90,179 @@ struct acpi_button {
         unsigned long           pushed;
  };
  
+static struct file_operations acpi_button_info_fops = {
+       .open           = acpi_button_info_open_fs,
+       .read           = seq_read,
+       .llseek         = seq_lseek,
+       .release        = single_release,
+};
+
+static struct file_operations acpi_button_state_fops = {
+       .open           = acpi_button_state_open_fs,
+       .read           = seq_read,
+       .llseek         = seq_lseek,
+       .release        = single_release,
+};
+/* --------------------------------------------------------------------------
+                              FS Interface (/proc)
+   -------------------------------------------------------------------------- */
+
+static struct proc_dir_entry   *acpi_button_dir;
+
+static int acpi_button_info_seq_show(struct seq_file *seq, void *offset)
+{
+       struct acpi_button      *button = (struct acpi_button *) seq->private;
+
+       ACPI_FUNCTION_TRACE("acpi_button_info_seq_show");
+
+       if (!button || !button->device)
+               return_VALUE(0);
+
+       seq_printf(seq, "type:                    %s\n", 
+               acpi_device_name(button->device));
+
+       return_VALUE(0);
+}
+
+static int acpi_button_info_open_fs(struct inode *inode, struct file *file)
+{
+       return single_open(file, acpi_button_info_seq_show, PDE(inode)->data);
+}
+       
+static int acpi_button_state_seq_show(struct seq_file *seq, void *offset)
+{
+       struct acpi_button      *button = (struct acpi_button *) seq->private;
+       acpi_status             status;
+       unsigned long           state;
+
+       ACPI_FUNCTION_TRACE("acpi_button_state_seq_show");
+
+       if (!button || !button->device)
+               return_VALUE(0);
+
+       status = acpi_evaluate_integer(button->handle,"_LID",NULL,&state);
+       if (ACPI_FAILURE(status)) {
+               seq_printf(seq, "state:      unsupported\n");
+       }
+       else{
+               seq_printf(seq, "state:      %s\n", (state ? "open" : "closed")); 
+       }
+
+       return_VALUE(0);
+}
+
+static int acpi_button_state_open_fs(struct inode *inode, struct file *file)
+{
+       return single_open(file, acpi_button_state_seq_show, PDE(inode)->data);
+}
+
+static struct proc_dir_entry *acpi_power_dir;
+static struct proc_dir_entry *acpi_sleep_dir;
+static struct proc_dir_entry *acpi_lid_dir;
+
+static int
+acpi_button_add_fs (
+       struct acpi_device      *device)
+{
+       struct proc_dir_entry   *entry = NULL;
+       struct acpi_button      *button = NULL;
+
+       ACPI_FUNCTION_TRACE("acpi_button_add_fs");
+
+       if (!device || !acpi_driver_data(device))
+               return_VALUE(-EINVAL);
+
+       button = acpi_driver_data(device);
+
+       switch (button->type) {
+       case ACPI_BUTTON_TYPE_POWER:
+       case ACPI_BUTTON_TYPE_POWERF:
+               if (!acpi_power_dir)
+                       acpi_power_dir = proc_mkdir(ACPI_BUTTON_SUBCLASS_POWER, 
+                               acpi_button_dir);
+               entry = acpi_power_dir;
+               break;
+       case ACPI_BUTTON_TYPE_SLEEP:
+       case ACPI_BUTTON_TYPE_SLEEPF:
+               if (!acpi_sleep_dir)
+                       acpi_sleep_dir = proc_mkdir(ACPI_BUTTON_SUBCLASS_SLEEP, 
+                               acpi_button_dir);
+               entry = acpi_sleep_dir;
+               break;
+       case ACPI_BUTTON_TYPE_LID:
+               if (!acpi_lid_dir)
+                       acpi_lid_dir = proc_mkdir(ACPI_BUTTON_SUBCLASS_LID, 
+                               acpi_button_dir);
+               entry = acpi_lid_dir;
+               break;
+       }
+
+       if (!entry)
+               return_VALUE(-ENODEV);
+       entry->owner = THIS_MODULE;
+
+       acpi_device_dir(device) = proc_mkdir(acpi_device_bid(device), entry);
+       if (!acpi_device_dir(device))
+               return_VALUE(-ENODEV);
+       acpi_device_dir(device)->owner = THIS_MODULE;
+
+       /* 'info' [R] */
+       entry = create_proc_entry(ACPI_BUTTON_FILE_INFO,
+               S_IRUGO, acpi_device_dir(device));
+       if (!entry)
+               ACPI_DEBUG_PRINT((ACPI_DB_ERROR,
+                       "Unable to create '%s' fs entry\n",
+                       ACPI_BUTTON_FILE_INFO));
+       else {
+               entry->proc_fops = &acpi_button_info_fops;
+               entry->data = acpi_driver_data(device);
+               entry->owner = THIS_MODULE;
+       }
+
+       /* show lid state [R] */
+       if (button->type == ACPI_BUTTON_TYPE_LID) {
+               entry = create_proc_entry(ACPI_BUTTON_FILE_STATE,
+                       S_IRUGO, acpi_device_dir(device));
+               if (!entry)
+                       ACPI_DEBUG_PRINT((ACPI_DB_ERROR,
+                               "Unable to create '%s' fs entry\n",
+                               ACPI_BUTTON_FILE_INFO));
+               else {
+                       entry->proc_fops = &acpi_button_state_fops;
+                       entry->data = acpi_driver_data(device);
+                       entry->owner = THIS_MODULE;
+               }
+       }
+
+       return_VALUE(0);
+}
+
+
+static int
+acpi_button_remove_fs (
+       struct acpi_device      *device)
+{
+       struct acpi_button      *button = NULL;
+
+       ACPI_FUNCTION_TRACE("acpi_button_remove_fs");
+
+       button = acpi_driver_data(device);
+       if (acpi_device_dir(device)) {
+               if (button->type == ACPI_BUTTON_TYPE_LID)
+                       remove_proc_entry(ACPI_BUTTON_FILE_STATE,
+                                            acpi_device_dir(device));
+               remove_proc_entry(ACPI_BUTTON_FILE_INFO,
+                                    acpi_device_dir(device));
+
+               remove_proc_entry(acpi_device_bid(device),
+                                    acpi_device_dir(device)->parent);
+               acpi_device_dir(device) = NULL;
+       }
+
+       return_VALUE(0);
+}
+
+
  /* --------------------------------------------------------------------------
                                  Driver Interface
     -------------------------------------------------------------------------- */
@@ -121,7 +302,8 @@ acpi_button_notify_fixed (
         
         ACPI_FUNCTION_TRACE("acpi_button_notify_fixed");
  
-       BUG_ON(!button);
+       if (!button)
+               return_ACPI_STATUS(AE_BAD_PARAMETER);
  
         acpi_button_notify(button->handle, ACPI_BUTTON_NOTIFY_STATUS, button);
  
@@ -197,6 +379,10 @@ acpi_button_add (
                 goto end;
         }
  
+       result = acpi_button_add_fs(device);
+       if (result)
+               goto end;
+
         switch (button->type) {
         case ACPI_BUTTON_TYPE_POWERF:
                 status = acpi_install_fixed_event_handler (
@@ -240,6 +426,7 @@ acpi_button_add (
  
  end:
         if (result) {
+               acpi_button_remove_fs(device);
                 kfree(button);
         }
  
@@ -280,6 +467,8 @@ acpi_button_remove (struct acpi_device *device, int type)
                 ACPI_DEBUG_PRINT((ACPI_DB_ERROR,
                         "Error removing notify handler\n"));
  
+       acpi_button_remove_fs(device);  
+
         kfree(button);
  
         return_VALUE(0);
@@ -293,14 +482,20 @@ acpi_button_init (void)
  
         ACPI_FUNCTION_TRACE("acpi_button_init");
  
+       acpi_button_dir = proc_mkdir(ACPI_BUTTON_CLASS, acpi_root_dir);
+       if (!acpi_button_dir)
+               return_VALUE(-ENODEV);
+       acpi_button_dir->owner = THIS_MODULE;
         result = acpi_bus_register_driver(&acpi_button_driver);
         if (result < 0) {
+               remove_proc_entry(ACPI_BUTTON_CLASS, acpi_root_dir);
                 return_VALUE(-ENODEV);
         }
  
         return_VALUE(0);
  }
  
+
  static void __exit
  acpi_button_exit (void)
  {
@@ -308,8 +503,17 @@ acpi_button_exit (void)
  
         acpi_bus_unregister_driver(&acpi_button_driver);
  
+       if (acpi_power_dir) 
+               remove_proc_entry(ACPI_BUTTON_SUBCLASS_POWER, acpi_button_dir);
+       if (acpi_sleep_dir)
+               remove_proc_entry(ACPI_BUTTON_SUBCLASS_SLEEP, acpi_button_dir);
+       if (acpi_lid_dir)
+               remove_proc_entry(ACPI_BUTTON_SUBCLASS_LID, acpi_button_dir);
+       remove_proc_entry(ACPI_BUTTON_CLASS, acpi_root_dir);
+
         return_VOID;
  }
  
+
  module_init(acpi_button_init);
  module_exit(acpi_button_exit);
diff --git a/drivers/acpi/dispatcher/dswload.c b/drivers/acpi/dispatcher/dswload.c

index 1ac197ccfc8063a47bc93b91278f32ff8df734ce..d11620018421486f1559c3fc4be1afbd7578a8ef 100644 (file)
--- a/drivers/acpi/dispatcher/dswload.c
+++ b/drivers/acpi/dispatcher/dswload.c
@@ -491,12 +491,6 @@ acpi_ds_load2_begin_op (
                 if ((!(walk_state->op_info->flags & AML_NSOPCODE) &&
                           (walk_state->opcode != AML_INT_NAMEPATH_OP)) ||
                         (!(walk_state->op_info->flags & AML_NAMED))) {
-                       if ((walk_state->op_info->class == AML_CLASS_EXECUTE) ||
-                               (walk_state->op_info->class == AML_CLASS_CONTROL)) {
-                               ACPI_REPORT_WARNING ((
-                                       "Encountered executable code at module level, [%s]\n",
-                                       acpi_ps_get_opcode_name (walk_state->opcode)));
-                       }
                         return_ACPI_STATUS (AE_OK);
                 }
  
diff --git a/drivers/acpi/ec.c b/drivers/acpi/ec.c

index 2dadb7f632693e0edc03c522a914a1dff5deab35..1ac5731d45e5aece6a021e7bb548139a9034b9b5 100644 (file)
--- a/drivers/acpi/ec.c
+++ b/drivers/acpi/ec.c
@@ -76,13 +76,14 @@ static int acpi_ec_remove (struct acpi_device *device, int type);
  static int acpi_ec_start (struct acpi_device *device);
  static int acpi_ec_stop (struct acpi_device *device, int type);
  static int acpi_ec_burst_add ( struct acpi_device *device);
+static int acpi_ec_polling_add ( struct acpi_device    *device);
  
  static struct acpi_driver acpi_ec_driver = {
         .name =         ACPI_EC_DRIVER_NAME,
         .class =        ACPI_EC_CLASS,
         .ids =          ACPI_EC_HID,
         .ops =          {
-                               .add =          acpi_ec_burst_add,
+                               .add =          acpi_ec_polling_add,
                                 .remove =       acpi_ec_remove,
                                 .start =        acpi_ec_start,
                                 .stop =         acpi_ec_stop,
@@ -164,7 +165,7 @@ static union acpi_ec        *ec_ecdt;
  
  /* External interfaces use first EC only, so remember */
  static struct acpi_device *first_ec;
-static int acpi_ec_polling_mode;
+static int acpi_ec_polling_mode = EC_POLLING;
  
  /* --------------------------------------------------------------------------
                               Transaction Management
@@ -1710,11 +1711,24 @@ static int __init acpi_fake_ecdt_setup(char *str)
         acpi_fake_ecdt_enabled = 1;
         return 0;
  }
+
  __setup("acpi_fake_ecdt", acpi_fake_ecdt_setup);
  static int __init acpi_ec_set_polling_mode(char *str)
  {
-       acpi_ec_polling_mode = EC_POLLING;
-       acpi_ec_driver.ops.add = acpi_ec_polling_add;
+       int burst;
+
+       if (!get_option(&str, &burst))
+               return 0;
+
+       if (burst) {
+               acpi_ec_polling_mode = EC_BURST;
+               acpi_ec_driver.ops.add = acpi_ec_burst_add;
+       } else {
+               acpi_ec_polling_mode = EC_POLLING;
+               acpi_ec_driver.ops.add = acpi_ec_polling_add;
+       }
+       printk(KERN_INFO PREFIX "EC %s mode.\n",
+               burst ? "burst": "polling");
         return 0;
  }
-__setup("ec_polling", acpi_ec_set_polling_mode);
+__setup("ec_burst=", acpi_ec_set_polling_mode);
diff --git a/drivers/acpi/hotkey.c b/drivers/acpi/hotkey.c

index babdf762eadba7789281718348388ab074260ea1..1f76a40badecea06e7de62811df078fdb1cb23c9 100644 (file)
--- a/drivers/acpi/hotkey.c
+++ b/drivers/acpi/hotkey.c
@@ -1,5 +1,5 @@
-/* 
- *  hotkey.c - ACPI Hotkey Driver ($Revision:$)
+/*
+ *  hotkey.c - ACPI Hotkey Driver ($Revision: 0.2 $)
   *
   *  Copyright (C) 2004 Luming Yu <luming.yu@intel.com>
   *
@@ -51,17 +51,18 @@
  #define ACPI_HOTKEY_POLLING 0x2
  #define ACPI_UNDEFINED_EVENT    0xf
  
-#define MAX_CONFIG_RECORD_LEN   80
-#define MAX_NAME_PATH_LEN   80
-#define MAX_CALL_PARM       80
+#define RESULT_STR_LEN     80
  
-#define IS_EVENT(e)       0xff /* ((e) & 0x40000000)  */
-#define IS_POLL(e)      0xff   /* (~((e) & 0x40000000))  */
+#define ACTION_METHOD  0
+#define POLL_METHOD    1
  
+#define IS_EVENT(e)            ((e) <= 10000 && (e) >0)
+#define IS_POLL(e)             ((e) > 10000)
+#define IS_OTHERS(e)           ((e)<=0 || (e)>=20000)
  #define _COMPONENT              ACPI_HOTKEY_COMPONENT
  ACPI_MODULE_NAME("acpi_hotkey")
  
-    MODULE_AUTHOR("luming.yu@intel.com");
+MODULE_AUTHOR("luming.yu@intel.com");
  MODULE_DESCRIPTION(ACPI_HOTK_NAME);
  MODULE_LICENSE("GPL");
  
@@ -114,7 +115,7 @@ struct acpi_event_hotkey {
         char *action_method;    /* action method */
  };
  
-/* 
+/*
   * There are two ways to poll status
   * 1. directy call read_xxx method, without any arguments passed in
   * 2. call write_xxx method, with arguments passed in, you need
@@ -131,7 +132,7 @@ struct acpi_polling_hotkey {
         char *poll_method;      /* poll method */
         acpi_handle action_handle;      /* acpi handle attached action method */
         char *action_method;    /* action method */
-       void *poll_result;      /* polling_result */
+       union acpi_object *poll_result; /* polling_result */
         struct proc_dir_entry *proc;
  };
  
@@ -162,20 +163,25 @@ static struct acpi_driver hotkey_driver = {
                 },
  };
  
+static void free_hotkey_device(union acpi_hotkey *key);
+static void free_hotkey_buffer(union acpi_hotkey *key);
+static void free_poll_hotkey_buffer(union acpi_hotkey *key);
  static int hotkey_open_config(struct inode *inode, struct file *file);
+static int hotkey_poll_open_config(struct inode *inode, struct file *file);
  static ssize_t hotkey_write_config(struct file *file,
                                    const char __user * buffer,
                                    size_t count, loff_t * data);
-static ssize_t hotkey_write_poll_config(struct file *file,
-                                       const char __user * buffer,
-                                       size_t count, loff_t * data);
  static int hotkey_info_open_fs(struct inode *inode, struct file *file);
  static int hotkey_action_open_fs(struct inode *inode, struct file *file);
  static ssize_t hotkey_execute_aml_method(struct file *file,
                                          const char __user * buffer,
                                          size_t count, loff_t * data);
  static int hotkey_config_seq_show(struct seq_file *seq, void *offset);
+static int hotkey_poll_config_seq_show(struct seq_file *seq, void *offset);
  static int hotkey_polling_open_fs(struct inode *inode, struct file *file);
+static union acpi_hotkey *get_hotkey_by_event(struct
+                             acpi_hotkey_list
+                             *hotkey_list, int event);
  
  /* event based config */
  static struct file_operations hotkey_config_fops = {
@@ -188,9 +194,9 @@ static struct file_operations hotkey_config_fops = {
  
  /* polling based config */
  static struct file_operations hotkey_poll_config_fops = {
-       .open = hotkey_open_config,
+       .open = hotkey_poll_open_config,
         .read = seq_read,
-       .write = hotkey_write_poll_config,
+       .write = hotkey_write_config,
         .llseek = seq_lseek,
         .release = single_release,
  };
@@ -227,7 +233,7 @@ static int hotkey_info_seq_show(struct seq_file *seq, void *offset)
  {
         ACPI_FUNCTION_TRACE("hotkey_info_seq_show");
  
-       seq_printf(seq, "Hotkey generic driver ver: %s", HOTKEY_ACPI_VERSION);
+       seq_printf(seq, "Hotkey generic driver ver: %s\n", HOTKEY_ACPI_VERSION);
  
         return_VALUE(0);
  }
@@ -239,27 +245,35 @@ static int hotkey_info_open_fs(struct inode *inode, struct file *file)
  
  static char *format_result(union acpi_object *object)
  {
-       char *buf = (char *)kmalloc(sizeof(union acpi_object), GFP_KERNEL);
-
-       memset(buf, 0, sizeof(union acpi_object));
+       char *buf = NULL;
+       
+       buf = (char *)kmalloc(RESULT_STR_LEN, GFP_KERNEL);
+       if (buf)
+               memset(buf, 0, RESULT_STR_LEN);
+       else
+               goto do_fail;
  
         /* Now, just support integer type */
         if (object->type == ACPI_TYPE_INTEGER)
-               sprintf(buf, "%d", (u32) object->integer.value);
-
-       return buf;
+               sprintf(buf, "%d\n", (u32) object->integer.value);
+do_fail:
+       return (buf);
  }
  
  static int hotkey_polling_seq_show(struct seq_file *seq, void *offset)
  {
         struct acpi_polling_hotkey *poll_hotkey =
             (struct acpi_polling_hotkey *)seq->private;
+       char *buf;
  
         ACPI_FUNCTION_TRACE("hotkey_polling_seq_show");
  
-       if (poll_hotkey->poll_result)
-               seq_printf(seq, "%s", format_result(poll_hotkey->poll_result));
-
+       if (poll_hotkey->poll_result){
+               buf = format_result(poll_hotkey->poll_result);
+               if(buf)
+                       seq_printf(seq, "%s", buf);
+               kfree(buf);
+       }
         return_VALUE(0);
  }
  
@@ -276,19 +290,19 @@ static int hotkey_action_open_fs(struct inode *inode, struct file *file)
  /* Mapping external hotkey number to standardized hotkey event num */
  static int hotkey_get_internal_event(int event, struct acpi_hotkey_list *list)
  {
-       struct list_head *entries, *next;
-       int val = 0;
+       struct list_head *entries;
+       int val = -1;
  
         ACPI_FUNCTION_TRACE("hotkey_get_internal_event");
  
-       list_for_each_safe(entries, next, list->entries) {
+       list_for_each(entries, list->entries) {
                 union acpi_hotkey *key =
                     container_of(entries, union acpi_hotkey, entries);
                 if (key->link.hotkey_type == ACPI_HOTKEY_EVENT
-                   && key->event_hotkey.external_hotkey_num == event)
+                   && key->event_hotkey.external_hotkey_num == event){
                         val = key->link.hotkey_standard_num;
-               else
-                       val = -1;
+                       break;
+               }
         }
  
         return_VALUE(val);
@@ -306,7 +320,7 @@ acpi_hotkey_notify_handler(acpi_handle handle, u32 event, void *data)
                 return_VOID;
  
         internal_event = hotkey_get_internal_event(event, &global_hotkey_list);
-       acpi_bus_generate_event(device, event, 0);
+       acpi_bus_generate_event(device, internal_event, 0);
  
         return_VOID;
  }
@@ -329,13 +343,17 @@ static int auto_hotkey_remove(struct acpi_device *device, int type)
  static int create_polling_proc(union acpi_hotkey *device)
  {
         struct proc_dir_entry *proc;
+       char  proc_name[80];
         mode_t mode;
  
         ACPI_FUNCTION_TRACE("create_polling_proc");
         mode = S_IFREG | S_IRUGO | S_IWUGO;
  
-       proc = create_proc_entry(device->poll_hotkey.action_method,
-                                mode, hotkey_proc_dir);
+       sprintf(proc_name, "%d", device->link.hotkey_standard_num);
+       /*
+       strcat(proc_name, device->poll_hotkey.poll_method);
+       */
+       proc = create_proc_entry(proc_name, mode, hotkey_proc_dir);
  
         if (!proc) {
                 ACPI_DEBUG_PRINT((ACPI_DB_ERROR,
@@ -353,23 +371,6 @@ static int create_polling_proc(union acpi_hotkey *device)
         return_VALUE(0);
  }
  
-static int is_valid_acpi_path(const char *pathname)
-{
-       acpi_handle handle;
-       acpi_status status;
-       ACPI_FUNCTION_TRACE("is_valid_acpi_path");
-
-       status = acpi_get_handle(NULL, (char *)pathname, &handle);
-       return_VALUE(!ACPI_FAILURE(status));
-}
-
-static int is_valid_hotkey(union acpi_hotkey *device)
-{
-       ACPI_FUNCTION_TRACE("is_valid_hotkey");
-       /* Implement valid check */
-       return_VALUE(1);
-}
-
  static int hotkey_add(union acpi_hotkey *device)
  {
         int status = 0;
@@ -378,15 +379,11 @@ static int hotkey_add(union acpi_hotkey *device)
         ACPI_FUNCTION_TRACE("hotkey_add");
  
         if (device->link.hotkey_type == ACPI_HOTKEY_EVENT) {
-               status =
-                   acpi_bus_get_device(device->event_hotkey.bus_handle, &dev);
-               if (status)
-                       return_VALUE(status);
-
+               acpi_bus_get_device(device->event_hotkey.bus_handle, &dev);
                 status = acpi_install_notify_handler(dev->handle,
-                                                    ACPI_SYSTEM_NOTIFY,
+                                                    ACPI_DEVICE_NOTIFY,
                                                      acpi_hotkey_notify_handler,
-                                                    device);
+                                                    dev);
         } else                  /* Add polling hotkey */
                 create_polling_proc(device);
  
@@ -409,84 +406,143 @@ static int hotkey_remove(union acpi_hotkey *device)
                 if (key->link.hotkey_standard_num ==
                     device->link.hotkey_standard_num) {
                         list_del(&key->link.entries);
-                       remove_proc_entry(key->poll_hotkey.action_method,
-                                         hotkey_proc_dir);
+                       free_hotkey_device(key);
                         global_hotkey_list.count--;
                         break;
                 }
         }
+       kfree(device);
         return_VALUE(0);
  }
  
-static void hotkey_update(union acpi_hotkey *key)
+static int  hotkey_update(union acpi_hotkey *key)
  {
-       struct list_head *entries, *next;
+       struct list_head *entries;
  
         ACPI_FUNCTION_TRACE("hotkey_update");
  
-       list_for_each_safe(entries, next, global_hotkey_list.entries) {
-               union acpi_hotkey *key =
+       list_for_each(entries, global_hotkey_list.entries) {
+               union acpi_hotkey *tmp=
                     container_of(entries, union acpi_hotkey, entries);
-               if (key->link.hotkey_standard_num ==
+               if (tmp->link.hotkey_standard_num ==
                     key->link.hotkey_standard_num) {
-                       key->event_hotkey.bus_handle =
-                           key->event_hotkey.bus_handle;
-                       key->event_hotkey.external_hotkey_num =
-                           key->event_hotkey.external_hotkey_num;
-                       key->event_hotkey.action_handle =
-                           key->event_hotkey.action_handle;
-                       key->event_hotkey.action_method =
-                           key->event_hotkey.action_method;
+                       if (key->link.hotkey_type == ACPI_HOTKEY_EVENT) {
+                               free_hotkey_buffer(tmp);
+                               tmp->event_hotkey.bus_handle =
+                                       key->event_hotkey.bus_handle;
+                               tmp->event_hotkey.external_hotkey_num =
+                                       key->event_hotkey.external_hotkey_num;
+                               tmp->event_hotkey.action_handle =
+                                       key->event_hotkey.action_handle;
+                               tmp->event_hotkey.action_method =
+                                       key->event_hotkey.action_method;
+                               kfree(key);
+                       } else {
+                               /*
+                               char  proc_name[80];
+
+                               sprintf(proc_name, "%d", tmp->link.hotkey_standard_num);
+                               strcat(proc_name, tmp->poll_hotkey.poll_method);
+                               remove_proc_entry(proc_name,hotkey_proc_dir);
+                               */
+                               free_poll_hotkey_buffer(tmp);
+                               tmp->poll_hotkey.poll_handle =
+                                       key->poll_hotkey.poll_handle;
+                               tmp->poll_hotkey.poll_method =
+                                       key->poll_hotkey.poll_method;
+                               tmp->poll_hotkey.action_handle =
+                                       key->poll_hotkey.action_handle;
+                               tmp->poll_hotkey.action_method =
+                                       key->poll_hotkey.action_method;
+                               tmp->poll_hotkey.poll_result =
+                                       key->poll_hotkey.poll_result;
+                               /*
+                               create_polling_proc(tmp);
+                               */
+                               kfree(key);
+                       }
+                       return_VALUE(0);
                         break;
                 }
         }
  
-       return_VOID;
+       return_VALUE(-ENODEV);
  }
  
  static void free_hotkey_device(union acpi_hotkey *key)
  {
         struct acpi_device *dev;
-       int status;
  
         ACPI_FUNCTION_TRACE("free_hotkey_device");
  
         if (key->link.hotkey_type == ACPI_HOTKEY_EVENT) {
-               status =
-                   acpi_bus_get_device(key->event_hotkey.bus_handle, &dev);
+               acpi_bus_get_device(key->event_hotkey.bus_handle, &dev);
                 if (dev->handle)
                         acpi_remove_notify_handler(dev->handle,
-                                                  ACPI_SYSTEM_NOTIFY,
+                                                  ACPI_DEVICE_NOTIFY,
                                                    acpi_hotkey_notify_handler);
-       } else
-               remove_proc_entry(key->poll_hotkey.action_method,
-                                 hotkey_proc_dir);
+               free_hotkey_buffer(key);
+       } else {
+               char  proc_name[80];
+
+               sprintf(proc_name, "%d", key->link.hotkey_standard_num);
+               /*
+               strcat(proc_name, key->poll_hotkey.poll_method);
+               */
+               remove_proc_entry(proc_name,hotkey_proc_dir);
+               free_poll_hotkey_buffer(key);
+       }
         kfree(key);
         return_VOID;
  }
  
+static void
+free_hotkey_buffer(union acpi_hotkey *key)
+{
+       kfree(key->event_hotkey.action_method);
+}
+
+static void
+free_poll_hotkey_buffer(union acpi_hotkey *key)
+{
+       kfree(key->poll_hotkey.action_method);
+       kfree(key->poll_hotkey.poll_method);
+       kfree(key->poll_hotkey.poll_result);
+}
  static int
  init_hotkey_device(union acpi_hotkey *key, char *bus_str, char *action_str,
                    char *method, int std_num, int external_num)
  {
+       acpi_handle     tmp_handle;
+       acpi_status status = AE_OK;
+
         ACPI_FUNCTION_TRACE("init_hotkey_device");
  
+       if(std_num < 0 || IS_POLL(std_num) || !key )
+               goto do_fail;
+
+       if(!bus_str || !action_str || !method)
+               goto do_fail;
+
         key->link.hotkey_type = ACPI_HOTKEY_EVENT;
         key->link.hotkey_standard_num = std_num;
         key->event_hotkey.flag = 0;
-       if (is_valid_acpi_path(bus_str))
-               acpi_get_handle((acpi_handle) 0,
-                               bus_str, &(key->event_hotkey.bus_handle));
-       else
-               return_VALUE(-ENODEV);
-       key->event_hotkey.external_hotkey_num = external_num;
-       if (is_valid_acpi_path(action_str))
-               acpi_get_handle((acpi_handle) 0,
-                               action_str, &(key->event_hotkey.action_handle));
-       key->event_hotkey.action_method = kmalloc(sizeof(method), GFP_KERNEL);
-       strcpy(key->event_hotkey.action_method, method);
+       key->event_hotkey.action_method = method;
  
-       return_VALUE(!is_valid_hotkey(key));
+       status = acpi_get_handle(NULL,bus_str, &(key->event_hotkey.bus_handle));
+       if(ACPI_FAILURE(status))
+               goto do_fail;
+       key->event_hotkey.external_hotkey_num = external_num;
+       status = acpi_get_handle(NULL,action_str, &(key->event_hotkey.action_handle));
+       if(ACPI_FAILURE(status))
+               goto do_fail;
+       status = acpi_get_handle(key->event_hotkey.action_handle,
+                               method, &tmp_handle);
+       if (ACPI_FAILURE(status))
+               goto do_fail;
+       return_VALUE(AE_OK);
+do_fail:
+       return_VALUE(-ENODEV);
  }
  
  static int
@@ -495,34 +551,46 @@ init_poll_hotkey_device(union acpi_hotkey *key,
                         char *poll_method,
                         char *action_str, char *action_method, int std_num)
  {
+       acpi_status status = AE_OK;
+       acpi_handle     tmp_handle;
+
         ACPI_FUNCTION_TRACE("init_poll_hotkey_device");
  
+       if(std_num < 0 || IS_EVENT(std_num) || !key)
+               goto do_fail;
+
+       if(!poll_str || !poll_method || !action_str || !action_method)
+               goto do_fail;
+
         key->link.hotkey_type = ACPI_HOTKEY_POLLING;
         key->link.hotkey_standard_num = std_num;
         key->poll_hotkey.flag = 0;
-       if (is_valid_acpi_path(poll_str))
-               acpi_get_handle((acpi_handle) 0,
-                               poll_str, &(key->poll_hotkey.poll_handle));
-       else
-               return_VALUE(-ENODEV);
         key->poll_hotkey.poll_method = poll_method;
-       if (is_valid_acpi_path(action_str))
-               acpi_get_handle((acpi_handle) 0,
-                               action_str, &(key->poll_hotkey.action_handle));
-       key->poll_hotkey.action_method =
-           kmalloc(sizeof(action_method), GFP_KERNEL);
-       strcpy(key->poll_hotkey.action_method, action_method);
+       key->poll_hotkey.action_method = action_method;
+
+       status = acpi_get_handle(NULL,poll_str, &(key->poll_hotkey.poll_handle));
+       if(ACPI_FAILURE(status))
+               goto do_fail;
+       status = acpi_get_handle(key->poll_hotkey.poll_handle,
+                               poll_method, &tmp_handle);
+        if (ACPI_FAILURE(status))
+                       goto do_fail;
+       status = acpi_get_handle(NULL,action_str, &(key->poll_hotkey.action_handle));
+       if (ACPI_FAILURE(status))
+               goto do_fail;
+       status = acpi_get_handle(key->poll_hotkey.action_handle,
+                               action_method, &tmp_handle);
+       if (ACPI_FAILURE(status))
+               goto do_fail;
         key->poll_hotkey.poll_result =
             (union acpi_object *)kmalloc(sizeof(union acpi_object), GFP_KERNEL);
-       return_VALUE(is_valid_hotkey(key));
+       if(!key->poll_hotkey.poll_result)
+               goto do_fail;
+       return_VALUE(AE_OK);
+do_fail:
+       return_VALUE(-ENODEV);
  }
  
-static int check_hotkey_valid(union acpi_hotkey *key,
-                             struct acpi_hotkey_list *list)
-{
-       ACPI_FUNCTION_TRACE("check_hotkey_valid");
-       return_VALUE(0);
-}
  
  static int hotkey_open_config(struct inode *inode, struct file *file)
  {
@@ -531,10 +599,17 @@ static int hotkey_open_config(struct inode *inode, struct file *file)
                      (file, hotkey_config_seq_show, PDE(inode)->data));
  }
  
+static int hotkey_poll_open_config(struct inode *inode, struct file *file)
+{
+       ACPI_FUNCTION_TRACE("hotkey_poll_open_config");
+       return_VALUE(single_open
+                    (file, hotkey_poll_config_seq_show, PDE(inode)->data));
+}
+
  static int hotkey_config_seq_show(struct seq_file *seq, void *offset)
  {
         struct acpi_hotkey_list *hotkey_list = &global_hotkey_list;
-       struct list_head *entries, *next;
+       struct list_head *entries;
         char bus_name[ACPI_PATHNAME_MAX] = { 0 };
         char action_name[ACPI_PATHNAME_MAX] = { 0 };
         struct acpi_buffer bus = { ACPI_PATHNAME_MAX, bus_name };
@@ -542,10 +617,7 @@ static int hotkey_config_seq_show(struct seq_file *seq, void *offset)
  
         ACPI_FUNCTION_TRACE(("hotkey_config_seq_show"));
  
-       if (!hotkey_list)
-               goto end;
-
-       list_for_each_safe(entries, next, hotkey_list->entries) {
+       list_for_each(entries, hotkey_list->entries) {
                 union acpi_hotkey *key =
                     container_of(entries, union acpi_hotkey, entries);
                 if (key->link.hotkey_type == ACPI_HOTKEY_EVENT) {
@@ -553,18 +625,37 @@ static int hotkey_config_seq_show(struct seq_file *seq, void *offset)
                                       ACPI_NAME_TYPE_MAX, &bus);
                         acpi_get_name(key->event_hotkey.action_handle,
                                       ACPI_NAME_TYPE_MAX, &act);
-                       seq_printf(seq, "%s:%s:%s:%d:%d", bus_name,
+                       seq_printf(seq, "%s:%s:%s:%d:%d\n", bus_name,
                                    action_name,
                                    key->event_hotkey.action_method,
                                    key->link.hotkey_standard_num,
                                    key->event_hotkey.external_hotkey_num);
-               } /* ACPI_HOTKEY_POLLING */
-               else {
+               }
+       }
+       seq_puts(seq, "\n");
+       return_VALUE(0);
+}
+
+static int hotkey_poll_config_seq_show(struct seq_file *seq, void *offset)
+{
+       struct acpi_hotkey_list *hotkey_list = &global_hotkey_list;
+       struct list_head *entries;
+       char bus_name[ACPI_PATHNAME_MAX] = { 0 };
+       char action_name[ACPI_PATHNAME_MAX] = { 0 };
+       struct acpi_buffer bus = { ACPI_PATHNAME_MAX, bus_name };
+       struct acpi_buffer act = { ACPI_PATHNAME_MAX, action_name };
+
+       ACPI_FUNCTION_TRACE(("hotkey_config_seq_show"));
+
+       list_for_each(entries, hotkey_list->entries) {
+               union acpi_hotkey *key =
+                   container_of(entries, union acpi_hotkey, entries);
+               if (key->link.hotkey_type == ACPI_HOTKEY_POLLING) {
                         acpi_get_name(key->poll_hotkey.poll_handle,
                                       ACPI_NAME_TYPE_MAX, &bus);
                         acpi_get_name(key->poll_hotkey.action_handle,
                                       ACPI_NAME_TYPE_MAX, &act);
-                       seq_printf(seq, "%s:%s:%s:%s:%d", bus_name,
+                       seq_printf(seq, "%s:%s:%s:%s:%d\n", bus_name,
                                    key->poll_hotkey.poll_method,
                                    action_name,
                                    key->poll_hotkey.action_method,
@@ -572,49 +663,83 @@ static int hotkey_config_seq_show(struct seq_file *seq, void *offset)
                 }
         }
         seq_puts(seq, "\n");
-      end:
         return_VALUE(0);
  }
  
  static int
  get_parms(char *config_record,
           int *cmd,
-         char *bus_handle,
-         char *bus_method,
-         char *action_handle,
-         char *method, int *internal_event_num, int *external_event_num)
+         char **bus_handle,
+         char **bus_method,
+         char **action_handle,
+         char **method, int *internal_event_num, int *external_event_num)
  {
-       char *tmp, *tmp1;
+       char *tmp, *tmp1, count;
         ACPI_FUNCTION_TRACE(("get_parms"));
  
         sscanf(config_record, "%d", cmd);
  
+       if(*cmd == 1){
+               if(sscanf(config_record, "%d:%d", cmd, internal_event_num)!=2)
+                       goto do_fail;
+               else
+                       return (6);
+       }
         tmp = strchr(config_record, ':');
+       if (!tmp)
+               goto do_fail;
         tmp++;
         tmp1 = strchr(tmp, ':');
-       strncpy(bus_handle, tmp, tmp1 - tmp);
-       bus_handle[tmp1 - tmp] = 0;
+       if (!tmp1)
+               goto do_fail;
+
+       count = tmp1 - tmp;
+       *bus_handle = (char *) kmalloc(count+1, GFP_KERNEL);
+       if(!*bus_handle)
+               goto do_fail;
+       strncpy(*bus_handle, tmp, count);
+       *(*bus_handle + count) = 0;
  
         tmp = tmp1;
         tmp++;
         tmp1 = strchr(tmp, ':');
-       strncpy(bus_method, tmp, tmp1 - tmp);
-       bus_method[tmp1 - tmp] = 0;
+       if (!tmp1)
+               goto do_fail;
+       count = tmp1 - tmp;
+       *bus_method = (char *) kmalloc(count+1, GFP_KERNEL);
+       if(!*bus_method)
+               goto do_fail;
+       strncpy(*bus_method, tmp, count);
+       *(*bus_method + count) = 0;
  
         tmp = tmp1;
         tmp++;
         tmp1 = strchr(tmp, ':');
-       strncpy(action_handle, tmp, tmp1 - tmp);
-       action_handle[tmp1 - tmp] = 0;
+       if (!tmp1)
+               goto do_fail;
+       count = tmp1 - tmp;
+       *action_handle = (char *) kmalloc(count+1, GFP_KERNEL);
+       strncpy(*action_handle, tmp, count);
+       *(*action_handle + count) = 0;
  
         tmp = tmp1;
         tmp++;
         tmp1 = strchr(tmp, ':');
-       strncpy(method, tmp, tmp1 - tmp);
-       method[tmp1 - tmp] = 0;
+       if (!tmp1)
+               goto do_fail;
+       count = tmp1 - tmp;
+       *method = (char *) kmalloc(count+1, GFP_KERNEL);
+       if(!*method)
+               goto do_fail;
+       strncpy(*method, tmp, count);
+       *(*method + count) = 0;
+
+       if(sscanf(tmp1 + 1, "%d:%d", internal_event_num, external_event_num)<=0)
+               goto do_fail;
  
-       sscanf(tmp1 + 1, "%d:%d", internal_event_num, external_event_num);
         return_VALUE(6);
+do_fail:
+       return_VALUE(-1);
  }
  
  /*  count is length for one input record */
@@ -622,135 +747,117 @@ static ssize_t hotkey_write_config(struct file *file,
                                    const char __user * buffer,
                                    size_t count, loff_t * data)
  {
-       struct acpi_hotkey_list *hotkey_list = &global_hotkey_list;
-       char config_record[MAX_CONFIG_RECORD_LEN];
-       char bus_handle[MAX_NAME_PATH_LEN];
-       char bus_method[MAX_NAME_PATH_LEN];
-       char action_handle[MAX_NAME_PATH_LEN];
-       char method[20];
+       char *config_record = NULL;
+       char *bus_handle = NULL;
+       char *bus_method = NULL;
+       char *action_handle = NULL;
+       char *method = NULL;
         int cmd, internal_event_num, external_event_num;
         int ret = 0;
         union acpi_hotkey *key = NULL;
  
         ACPI_FUNCTION_TRACE(("hotkey_write_config"));
  
-       if (!hotkey_list || count > MAX_CONFIG_RECORD_LEN) {
-               ACPI_DEBUG_PRINT((ACPI_DB_ERROR, "Invalid arguments\n"));
-               return_VALUE(-EINVAL);
-       }
+       config_record = (char *) kmalloc(count+1, GFP_KERNEL);
+       if(!config_record)
+               return_VALUE(-ENOMEM);
  
         if (copy_from_user(config_record, buffer, count)) {
+               kfree(config_record);
                 ACPI_DEBUG_PRINT((ACPI_DB_ERROR, "Invalid data \n"));
                 return_VALUE(-EINVAL);
         }
-       config_record[count] = '\0';
+       config_record[count] = 0;
  
         ret = get_parms(config_record,
                         &cmd,
-                       bus_handle,
-                       bus_method,
-                       action_handle,
-                       method, &internal_event_num, &external_event_num);
+                       &bus_handle,
+                       &bus_method,
+                       &action_handle,
+                       &method, &internal_event_num, &external_event_num);
+
+       kfree(config_record);
+       if(IS_OTHERS(internal_event_num))
+               goto do_fail;
         if (ret != 6) {
+do_fail:       
+               kfree(bus_handle);
+               kfree(bus_method);
+               kfree(action_handle);
+               kfree(method);
                 ACPI_DEBUG_PRINT((ACPI_DB_ERROR,
                                   "Invalid data format ret=%d\n", ret));
                 return_VALUE(-EINVAL);
         }
  
         key = kmalloc(sizeof(union acpi_hotkey), GFP_KERNEL);
-       ret = init_hotkey_device(key, bus_handle, action_handle, method,
+       if(!key)
+               goto do_fail;
+       memset(key, 0, sizeof(union acpi_hotkey));
+       if(cmd == 1) {
+               union acpi_hotkey *tmp = NULL;
+               tmp = get_hotkey_by_event(&global_hotkey_list,
+                               internal_event_num);
+               if(!tmp)
+                       ACPI_DEBUG_PRINT((ACPI_DB_ERROR, "Invalid key"));
+               else
+                       memcpy(key, tmp, sizeof(union acpi_hotkey));
+               goto cont_cmd;
+       }
+       if (IS_EVENT(internal_event_num)) {
+               kfree(bus_method);
+               ret = init_hotkey_device(key, bus_handle, action_handle, method,
                                  internal_event_num, external_event_num);
-
-       if (ret || check_hotkey_valid(key, hotkey_list)) {
+       } else
+               ret = init_poll_hotkey_device(key, bus_handle, bus_method,
+                                     action_handle, method,
+                                     internal_event_num);
+       if (ret) {
+               kfree(bus_handle);
+               kfree(action_handle);
+               if(IS_EVENT(internal_event_num))
+                       free_hotkey_buffer(key);
+               else
+                       free_poll_hotkey_buffer(key);
                 kfree(key);
                 ACPI_DEBUG_PRINT((ACPI_DB_ERROR, "Invalid hotkey \n"));
                 return_VALUE(-EINVAL);
         }
-       switch (cmd) {
-       case 0:
-               hotkey_add(key);
-               break;
-       case 1:
-               hotkey_remove(key);
-               free_hotkey_device(key);
-               break;
-       case 2:
-               hotkey_update(key);
-               break;
-       default:
-               break;
-       }
-       return_VALUE(count);
-}
-
-/*  count is length for one input record */
-static ssize_t hotkey_write_poll_config(struct file *file,
-                                       const char __user * buffer,
-                                       size_t count, loff_t * data)
-{
-       struct seq_file *m = (struct seq_file *)file->private_data;
-       struct acpi_hotkey_list *hotkey_list =
-           (struct acpi_hotkey_list *)m->private;
-
-       char config_record[MAX_CONFIG_RECORD_LEN];
-       char polling_handle[MAX_NAME_PATH_LEN];
-       char action_handle[MAX_NAME_PATH_LEN];
-       char poll_method[20], action_method[20];
-       int ret, internal_event_num, cmd, external_event_num;
-       union acpi_hotkey *key = NULL;
-
-       ACPI_FUNCTION_TRACE("hotkey_write_poll_config");
-
-       if (!hotkey_list || count > MAX_CONFIG_RECORD_LEN) {
-               ACPI_DEBUG_PRINT((ACPI_DB_ERROR, "Invalid arguments\n"));
-               return_VALUE(-EINVAL);
-       }
-
-       if (copy_from_user(config_record, buffer, count)) {
-               ACPI_DEBUG_PRINT((ACPI_DB_ERROR, "Invalid data \n"));
-               return_VALUE(-EINVAL);
-       }
-       config_record[count] = '\0';
  
-       ret = get_parms(config_record,
-                       &cmd,
-                       polling_handle,
-                       poll_method,
-                       action_handle,
-                       action_method,
-                       &internal_event_num, &external_event_num);
-
-       if (ret != 6) {
-               ACPI_DEBUG_PRINT((ACPI_DB_ERROR, "Invalid data format\n"));
-               return_VALUE(-EINVAL);
-       }
+cont_cmd:
+       kfree(bus_handle);
+       kfree(action_handle);
  
-       key = kmalloc(sizeof(union acpi_hotkey), GFP_KERNEL);
-       ret = init_poll_hotkey_device(key, polling_handle, poll_method,
-                                     action_handle, action_method,
-                                     internal_event_num);
-       if (ret || check_hotkey_valid(key, hotkey_list)) {
-               kfree(key);
-               ACPI_DEBUG_PRINT((ACPI_DB_ERROR, "Invalid hotkey \n"));
-               return_VALUE(-EINVAL);
-       }
         switch (cmd) {
         case 0:
-               hotkey_add(key);
+               if(get_hotkey_by_event(&global_hotkey_list,key->link.hotkey_standard_num))
+                       goto fail_out;
+               else
+                       hotkey_add(key);
                 break;
         case 1:
                 hotkey_remove(key);
                 break;
         case 2:
-               hotkey_update(key);
+               if(hotkey_update(key))
+                       goto fail_out;
                 break;
         default:
+               goto fail_out;
                 break;
         }
         return_VALUE(count);
+fail_out:
+       if(IS_EVENT(internal_event_num))
+               free_hotkey_buffer(key);
+       else
+               free_poll_hotkey_buffer(key);
+       kfree(key);
+       ACPI_DEBUG_PRINT((ACPI_DB_ERROR, "invalid key\n"));
+       return_VALUE(-EINVAL);
  }
  
-/*  
+/*
   * This function evaluates an ACPI method, given an int as parameter, the
   * method is searched within the scope of the handle, can be NULL. The output
   * of the method is written is output, which can also be NULL
@@ -775,7 +882,7 @@ static int write_acpi_int(acpi_handle handle, const char *method, int val,
         return_VALUE(status == AE_OK);
  }
  
-static int read_acpi_int(acpi_handle handle, const char *method, int *val)
+static int read_acpi_int(acpi_handle handle, const char *method, union acpi_object *val)
  {
         struct acpi_buffer output;
         union acpi_object out_obj;
@@ -786,62 +893,32 @@ static int read_acpi_int(acpi_handle handle, const char *method, int *val)
         output.pointer = &out_obj;
  
         status = acpi_evaluate_object(handle, (char *)method, NULL, &output);
-       *val = out_obj.integer.value;
+       if(val){
+               val->integer.value = out_obj.integer.value;
+               val->type = out_obj.type;
+       } else
+               ACPI_DEBUG_PRINT((ACPI_DB_ERROR, "null val pointer"));
         return_VALUE((status == AE_OK)
                      && (out_obj.type == ACPI_TYPE_INTEGER));
  }
  
-static acpi_handle
-get_handle_from_hotkeylist(struct acpi_hotkey_list *hotkey_list, int event_num)
+static union acpi_hotkey *get_hotkey_by_event(struct
+                             acpi_hotkey_list
+                             *hotkey_list, int event)
  {
-       struct list_head *entries, *next;
-
-       list_for_each_safe(entries, next, hotkey_list->entries) {
-               union acpi_hotkey *key =
-                   container_of(entries, union acpi_hotkey, entries);
-               if (key->link.hotkey_type == ACPI_HOTKEY_EVENT
-                   && key->link.hotkey_standard_num == event_num) {
-                       return (key->event_hotkey.action_handle);
-               }
-       }
-       return (NULL);
-}
-
-static
-char *get_method_from_hotkeylist(struct acpi_hotkey_list *hotkey_list,
-                                int event_num)
-{
-       struct list_head *entries, *next;
-
-       list_for_each_safe(entries, next, hotkey_list->entries) {
-               union acpi_hotkey *key =
-                   container_of(entries, union acpi_hotkey, entries);
-
-               if (key->link.hotkey_type == ACPI_HOTKEY_EVENT &&
-                   key->link.hotkey_standard_num == event_num)
-                       return (key->event_hotkey.action_method);
-       }
-       return (NULL);
-}
-
-static struct acpi_polling_hotkey *get_hotkey_by_event(struct
-                                                      acpi_hotkey_list
-                                                      *hotkey_list, int event)
-{
-       struct list_head *entries, *next;
+       struct list_head *entries;
  
-       list_for_each_safe(entries, next, hotkey_list->entries) {
+       list_for_each(entries, hotkey_list->entries) {
                 union acpi_hotkey *key =
                     container_of(entries, union acpi_hotkey, entries);
-               if (key->link.hotkey_type == ACPI_HOTKEY_POLLING
-                   && key->link.hotkey_standard_num == event) {
-                       return (&key->poll_hotkey);
+               if (key->link.hotkey_standard_num == event) {
+                       return(key);
                 }
         }
-       return (NULL);
+       return(NULL);
  }
  
-/*  
+/*
   * user call AML method interface:
   * Call convention:
   * echo "event_num: arg type : value"
@@ -854,48 +931,56 @@ static ssize_t hotkey_execute_aml_method(struct file *file,
                                          size_t count, loff_t * data)
  {
         struct acpi_hotkey_list *hotkey_list = &global_hotkey_list;
-       char arg[MAX_CALL_PARM];
-       int event, type, value;
-
-       char *method;
-       acpi_handle handle;
+       char *arg;
+       int event,method_type,type, value;
+       union acpi_hotkey *key;
  
         ACPI_FUNCTION_TRACE("hotkey_execte_aml_method");
  
-       if (!hotkey_list || count > MAX_CALL_PARM) {
-               ACPI_DEBUG_PRINT((ACPI_DB_ERROR, "Invalid argument 1"));
-               return_VALUE(-EINVAL);
-       }
+       arg = (char *) kmalloc(count+1, GFP_KERNEL);
+       if(!arg)
+               return_VALUE(-ENOMEM);
+       arg[count]=0;
  
         if (copy_from_user(arg, buffer, count)) {
+               kfree(arg);
                 ACPI_DEBUG_PRINT((ACPI_DB_ERROR, "Invalid argument 2"));
                 return_VALUE(-EINVAL);
         }
  
-       arg[count] = '\0';
-
-       if (sscanf(arg, "%d:%d:%d", &event, &type, &value) != 3) {
+       if (sscanf(arg, "%d:%d:%d:%d", &event, &method_type, &type, &value) != 4) {
+               kfree(arg);
                 ACPI_DEBUG_PRINT((ACPI_DB_ERROR, "Invalid argument 3"));
                 return_VALUE(-EINVAL);
         }
-
+       kfree(arg);
         if (type == ACPI_TYPE_INTEGER) {
-               handle = get_handle_from_hotkeylist(hotkey_list, event);
-               method = (char *)get_method_from_hotkeylist(hotkey_list, event);
+               key = get_hotkey_by_event(hotkey_list, event);
+               if(!key)
+                       goto do_fail;
                 if (IS_EVENT(event))
-                       write_acpi_int(handle, method, value, NULL);
+                       write_acpi_int(key->event_hotkey.action_handle,
+                                       key->event_hotkey.action_method, value, NULL);
                 else if (IS_POLL(event)) {
-                       struct acpi_polling_hotkey *key;
-                       key = (struct acpi_polling_hotkey *)
-                           get_hotkey_by_event(hotkey_list, event);
-                       read_acpi_int(handle, method, key->poll_result);
+                       if ( method_type == POLL_METHOD )
+                               read_acpi_int(key->poll_hotkey.poll_handle,
+                                       key->poll_hotkey.poll_method,
+                                       key->poll_hotkey.poll_result);
+                       else if ( method_type == ACTION_METHOD )
+                               write_acpi_int(key->poll_hotkey.action_handle,
+                                       key->poll_hotkey.action_method, value, NULL);
+                       else
+                               goto do_fail;
+
                 }
         } else {
                 ACPI_DEBUG_PRINT((ACPI_DB_ERROR, "Not supported"));
                 return_VALUE(-EINVAL);
         }
-
         return_VALUE(count);
+do_fail:
+       return_VALUE(-EINVAL);
+
  }
  
  static int __init hotkey_init(void)
@@ -928,7 +1013,7 @@ static int __init hotkey_init(void)
                 ACPI_DEBUG_PRINT((ACPI_DB_ERROR,
                                   "Hotkey: Unable to create %s entry\n",
                                   HOTKEY_EV_CONFIG));
-               return (-ENODEV);
+               goto do_fail1;
         } else {
                 hotkey_config->proc_fops = &hotkey_config_fops;
                 hotkey_config->data = &global_hotkey_list;
@@ -943,7 +1028,8 @@ static int __init hotkey_init(void)
                 ACPI_DEBUG_PRINT((ACPI_DB_ERROR,
                                   "Hotkey: Unable to create %s entry\n",
                                   HOTKEY_EV_CONFIG));
-               return (-ENODEV);
+
+               goto do_fail2;
         } else {
                 hotkey_poll_config->proc_fops = &hotkey_poll_config_fops;
                 hotkey_poll_config->data = &global_hotkey_list;
@@ -957,7 +1043,7 @@ static int __init hotkey_init(void)
                 ACPI_DEBUG_PRINT((ACPI_DB_ERROR,
                                   "Hotkey: Unable to create %s entry\n",
                                   HOTKEY_ACTION));
-               return (-ENODEV);
+               goto do_fail3;
         } else {
                 hotkey_action->proc_fops = &hotkey_action_fops;
                 hotkey_action->owner = THIS_MODULE;
@@ -970,7 +1056,7 @@ static int __init hotkey_init(void)
                 ACPI_DEBUG_PRINT((ACPI_DB_ERROR,
                                   "Hotkey: Unable to create %s entry\n",
                                   HOTKEY_INFO));
-               return (-ENODEV);
+               goto do_fail4;
         } else {
                 hotkey_info->proc_fops = &hotkey_info_fops;
                 hotkey_info->owner = THIS_MODULE;
@@ -979,23 +1065,33 @@ static int __init hotkey_init(void)
         }
  
         result = acpi_bus_register_driver(&hotkey_driver);
-       if (result < 0) {
-               remove_proc_entry(HOTKEY_PROC, acpi_root_dir);
-               return (-ENODEV);
-       }
+       if (result < 0)
+               goto do_fail5;
         global_hotkey_list.count = 0;
         global_hotkey_list.entries = &hotkey_entries;
  
         INIT_LIST_HEAD(&hotkey_entries);
  
         return (0);
+
+do_fail5:
+       remove_proc_entry(HOTKEY_INFO, hotkey_proc_dir);
+do_fail4:
+       remove_proc_entry(HOTKEY_ACTION, hotkey_proc_dir);
+do_fail3:
+       remove_proc_entry(HOTKEY_PL_CONFIG, hotkey_proc_dir);
+do_fail2:
+       remove_proc_entry(HOTKEY_EV_CONFIG, hotkey_proc_dir);
+do_fail1:
+       remove_proc_entry(HOTKEY_PROC, acpi_root_dir);
+       return (-ENODEV);
  }
  
  static void __exit hotkey_exit(void)
  {
         struct list_head *entries, *next;
  
-       ACPI_FUNCTION_TRACE("hotkey_remove");
+       ACPI_FUNCTION_TRACE("hotkey_exit");
  
         list_for_each_safe(entries, next, global_hotkey_list.entries) {
                 union acpi_hotkey *key =
diff --git a/drivers/acpi/osl.c b/drivers/acpi/osl.c

index bdd9f37f8101e67a0392349696ec2f632a0b4747..0d11d6e6abd6d652efab6c2831a81b340c7a50f7 100644 (file)
--- a/drivers/acpi/osl.c
+++ b/drivers/acpi/osl.c
@@ -71,7 +71,7 @@ EXPORT_SYMBOL(acpi_in_debugger);
  extern char line_buf[80];
  #endif /*ENABLE_DEBUGGER*/
  
-int acpi_specific_hotkey_enabled;
+int acpi_specific_hotkey_enabled = TRUE;
  EXPORT_SYMBOL(acpi_specific_hotkey_enabled);
  
  static unsigned int acpi_irq_irq;
@@ -145,10 +145,14 @@ acpi_os_vprintf(const char *fmt, va_list args)
  #endif
  }
  
+extern int acpi_in_resume;
  void *
  acpi_os_allocate(acpi_size size)
  {
-       return kmalloc(size, GFP_KERNEL);
+       if (acpi_in_resume)
+               return kmalloc(size, GFP_ATOMIC);
+       else
+               return kmalloc(size, GFP_KERNEL);
  }
  
  void
@@ -1158,11 +1162,11 @@ __setup("acpi_wake_gpes_always_on", acpi_wake_gpes_always_on_setup);
  int __init
  acpi_hotkey_setup(char *str)
  {
-       acpi_specific_hotkey_enabled = TRUE;
+       acpi_specific_hotkey_enabled = FALSE;
         return 1;
  }
  
-__setup("acpi_specific_hotkey", acpi_hotkey_setup);
+__setup("acpi_generic_hotkey", acpi_hotkey_setup);
  
  /*
   * max_cstate is defined in the base kernel so modules can
diff --git a/drivers/acpi/pci_link.c b/drivers/acpi/pci_link.c

index 6a29610edc112f6a047b229a2fd4bbeb051b1309..834c2ceff1aa4ba1864e2dfff84667384a33a72a 100644 (file)
--- a/drivers/acpi/pci_link.c
+++ b/drivers/acpi/pci_link.c
@@ -692,7 +692,18 @@ acpi_pci_link_free_irq(acpi_handle handle)
                 return_VALUE(-1);
         }
  
+#ifdef FUTURE_USE
+       /*
+        * The Link reference count allows us to _DISable an unused link
+        * and suspend time, and set it again  on resume.
+        * However, 2.6.12 still has irq_router.resume
+        * which blindly restores the link state.
+        * So we disable the reference count method
+        * to prevent duplicate acpi_pci_link_set()
+        * which would harm some systems
+        */
         link->refcnt --;
+#endif
         ACPI_DEBUG_PRINT((ACPI_DB_INFO,
                 "Link %s is dereferenced\n", acpi_device_bid(link->device)));
  
@@ -776,16 +787,32 @@ end:
  }
  
  static int
-irqrouter_suspend(
-       struct sys_device *dev,
-       u32     state)
+acpi_pci_link_resume(
+       struct acpi_pci_link *link)
+{
+       ACPI_FUNCTION_TRACE("acpi_pci_link_resume");
+
+       if (link->refcnt && link->irq.active && link->irq.initialized)
+               return_VALUE(acpi_pci_link_set(link, link->irq.active));
+       else
+               return_VALUE(0);
+}
+
+/*
+ * FIXME: this is a workaround to avoid nasty warning.  It will be removed
+ * after every device calls pci_disable_device in .resume.
+ */
+int acpi_in_resume;
+static int
+irqrouter_resume(
+       struct sys_device *dev)
  {
         struct list_head        *node = NULL;
         struct acpi_pci_link    *link = NULL;
-       int                     ret = 0;
  
-       ACPI_FUNCTION_TRACE("irqrouter_suspend");
+       ACPI_FUNCTION_TRACE("irqrouter_resume");
  
+       acpi_in_resume = 1;
         list_for_each(node, &acpi_link.entries) {
                 link = list_entry(node, struct acpi_pci_link, node);
                 if (!link) {
@@ -793,21 +820,10 @@ irqrouter_suspend(
                                 "Invalid link context\n"));
                         continue;
                 }
-               if (link->irq.initialized && link->refcnt != 0
-                       /* We ignore legacy IDE device irq */
-                       && link->irq.active != 14 && link->irq.active !=15) {
-                       printk(KERN_WARNING PREFIX
-                               "%d drivers with interrupt %d neglected to call"
-                               " pci_disable_device at .suspend\n",
-                               link->refcnt,
-                               link->irq.active);
-                       printk(KERN_WARNING PREFIX
-                               "Fix the driver, or rmmod before suspend\n");
-                       link->refcnt = 0;
-                       ret = -EINVAL;
-               }
+               acpi_pci_link_resume(link);
         }
-       return_VALUE(ret);
+       acpi_in_resume = 0;
+       return_VALUE(0);
  }
  
  
@@ -922,7 +938,7 @@ __setup("acpi_irq_balance", acpi_irq_balance_set);
  /* FIXME: we will remove this interface after all drivers call pci_disable_device */
  static struct sysdev_class irqrouter_sysdev_class = {
          set_kset_name("irqrouter"),
-        .suspend = irqrouter_suspend,
+        .resume = irqrouter_resume,
  };
  
  
diff --git a/drivers/acpi/processor_idle.c b/drivers/acpi/processor_idle.c

index af271d994f15a6db56a94f4f801bf71dc5d15c86..2c04740c6543f54ea6d0c35a51d00d11e479b76b 100644 (file)
--- a/drivers/acpi/processor_idle.c
+++ b/drivers/acpi/processor_idle.c
@@ -86,12 +86,11 @@ static int set_max_cstate(struct dmi_system_id *id)
         if (max_cstate > ACPI_PROCESSOR_MAX_POWER)
                 return 0;
  
-       printk(KERN_NOTICE PREFIX "%s detected - %s disabled."
+       printk(KERN_NOTICE PREFIX "%s detected - limiting to C%ld max_cstate."
                 " Override with \"processor.max_cstate=%d\"\n", id->ident,
-               ((int)id->driver_data == 1)? "C2,C3":"C3",
-              ACPI_PROCESSOR_MAX_POWER + 1);
+               (long)id->driver_data, ACPI_PROCESSOR_MAX_POWER + 1);
  
-       max_cstate = (int)id->driver_data;
+       max_cstate = (long)id->driver_data;
  
         return 0;
  }
diff --git a/drivers/acpi/sleep/poweroff.c b/drivers/acpi/sleep/poweroff.c

index 186b182c582522810c22fd44cecf88afd87c97f1..f93d2ee54800517590d28f2d77767124962eb89f 100644 (file)
--- a/drivers/acpi/sleep/poweroff.c
+++ b/drivers/acpi/sleep/poweroff.c
@@ -55,7 +55,11 @@ void acpi_power_off(void)
  
  static int acpi_shutdown(struct sys_device *x)
  {
-       return acpi_sleep_prepare(ACPI_STATE_S5);
+       if (system_state == SYSTEM_POWER_OFF) {
+               /* Prepare if we are going to power off the system */
+               return acpi_sleep_prepare(ACPI_STATE_S5);
+       }
+       return 0;
  }
  
  static struct sysdev_class acpi_sysclass = {
diff --git a/drivers/atm/ambassador.c b/drivers/atm/ambassador.c

index 73c6b85299c12e1e34097b7723b039ef5fc522e3..d74a7c5e75dda83fdca50df5f7f1d05045214226 100644 (file)
--- a/drivers/atm/ambassador.c
+++ b/drivers/atm/ambassador.c
@@ -513,7 +513,7 @@ static void rx_complete (amb_dev * dev, rx_out * rx) {
           
           // VC layer stats
           atomic_inc(&atm_vcc->stats->rx);
-         do_gettimeofday(&skb->stamp);
+         __net_timestamp(skb);
           // end of our responsability
           atm_vcc->push (atm_vcc, skb);
           return;
diff --git a/drivers/atm/atmtcp.c b/drivers/atm/atmtcp.c

index f2f01cb82cb4f6bd9b1e15206331b3ad5896b349..57f1810fdccd74df8f755d1aa1f935ccf68b482e 100644 (file)
--- a/drivers/atm/atmtcp.c
+++ b/drivers/atm/atmtcp.c
@@ -325,7 +325,7 @@ static int atmtcp_c_send(struct atm_vcc *vcc,struct sk_buff *skb)
                 result = -ENOBUFS;
                 goto done;
         }
-       do_gettimeofday(&new_skb->stamp);
+       __net_timestamp(new_skb);
         memcpy(skb_put(new_skb,skb->len),skb->data,skb->len);
         out_vcc->push(out_vcc,new_skb);
         atomic_inc(&vcc->stats->tx);
diff --git a/drivers/atm/eni.c b/drivers/atm/eni.c

index 10da36934769d6fbdf124945f546462e7a10f28a..c13c4d736ef56911324acb025b91bbe4bde0279e 100644 (file)
--- a/drivers/atm/eni.c
+++ b/drivers/atm/eni.c
@@ -537,7 +537,7 @@ static int rx_aal0(struct atm_vcc *vcc)
                 return 0;
         }
         skb_put(skb,length);
-       skb->stamp = eni_vcc->timestamp;
+       skb_set_timestamp(skb, &eni_vcc->timestamp);
         DPRINTK("got len %ld\n",length);
         if (do_rx_dma(vcc,skb,1,length >> 2,length >> 2)) return 1;
         eni_vcc->rxing++;
diff --git a/drivers/atm/firestream.c b/drivers/atm/firestream.c

index b078fa548ebf9954babc6d0442455af4ae4396d4..58219744f5dbfef006d41888c893ff54be6648c8 100644 (file)
--- a/drivers/atm/firestream.c
+++ b/drivers/atm/firestream.c
@@ -815,7 +815,7 @@ static void process_incoming (struct fs_dev *dev, struct queue *q)
                                 skb_put (skb, qe->p1 & 0xffff); 
                                 ATM_SKB(skb)->vcc = atm_vcc;
                                 atomic_inc(&atm_vcc->stats->rx);
-                               do_gettimeofday(&skb->stamp);
+                               __net_timestamp(skb);
                                 fs_dprintk (FS_DEBUG_ALLOC, "Free rec-skb: %p (pushed)\n", skb);
                                 atm_vcc->push (atm_vcc, skb);
                                 fs_dprintk (FS_DEBUG_ALLOC, "Free rec-d: %p\n", pe);
diff --git a/drivers/atm/fore200e.c b/drivers/atm/fore200e.c

index 5f702199543ac043eec3905b6231e119e421ba7c..2bf723a7b6e62c12918b865cac2cf6c970d81736 100644 (file)
--- a/drivers/atm/fore200e.c
+++ b/drivers/atm/fore200e.c
@@ -1176,7 +1176,7 @@ fore200e_push_rpd(struct fore200e* fore200e, struct atm_vcc* vcc, struct rpd* rp
         return -ENOMEM;
      } 
  
-    do_gettimeofday(&skb->stamp);
+    __net_timestamp(skb);
      
  #ifdef FORE200E_52BYTE_AAL0_SDU
      if (cell_header) {
diff --git a/drivers/atm/he.c b/drivers/atm/he.c

index 28250c9b32d6cf9f28e48ce77a3b79444d773176..fde9334059af595428f3f871599a538e3c3a29b0 100644 (file)
--- a/drivers/atm/he.c
+++ b/drivers/atm/he.c
@@ -1886,7 +1886,7 @@ he_service_rbrq(struct he_dev *he_dev, int group)
                 if (rx_skb_reserve > 0)
                         skb_reserve(skb, rx_skb_reserve);
  
-               do_gettimeofday(&skb->stamp);
+               __net_timestamp(skb);
  
                 for (iov = he_vcc->iov_head;
                                 iov < he_vcc->iov_tail; ++iov) {
diff --git a/drivers/atm/horizon.c b/drivers/atm/horizon.c

index 924a2c8988bd62d05236907c43a14b622a319cc0..0cded04680033a95c1cd93ad9ce771d7112651e1 100644 (file)
--- a/drivers/atm/horizon.c
+++ b/drivers/atm/horizon.c
@@ -1034,7 +1034,7 @@ static void rx_schedule (hrz_dev * dev, int irq) {
           struct atm_vcc * vcc = ATM_SKB(skb)->vcc;
           // VC layer stats
           atomic_inc(&vcc->stats->rx);
-         do_gettimeofday(&skb->stamp);
+         __net_timestamp(skb);
           // end of our responsability
           vcc->push (vcc, skb);
         }
diff --git a/drivers/atm/idt77252.c b/drivers/atm/idt77252.c

index 30b7e990ed0b19a4419b9cb51e967055830c3cba..b4a76cade646badde872e6895c07c97339465a22 100644 (file)
--- a/drivers/atm/idt77252.c
+++ b/drivers/atm/idt77252.c
@@ -1101,7 +1101,7 @@ dequeue_rx(struct idt77252_dev *card, struct rsq_entry *rsqe)
                                cell, ATM_CELL_PAYLOAD);
  
                         ATM_SKB(sb)->vcc = vcc;
-                       do_gettimeofday(&sb->stamp);
+                       __net_timestamp(sb);
                         vcc->push(vcc, sb);
                         atomic_inc(&vcc->stats->rx);
  
@@ -1179,7 +1179,7 @@ dequeue_rx(struct idt77252_dev *card, struct rsq_entry *rsqe)
  
                         skb_trim(skb, len);
                         ATM_SKB(skb)->vcc = vcc;
-                       do_gettimeofday(&skb->stamp);
+                       __net_timestamp(skb);
  
                         vcc->push(vcc, skb);
                         atomic_inc(&vcc->stats->rx);
@@ -1201,7 +1201,7 @@ dequeue_rx(struct idt77252_dev *card, struct rsq_entry *rsqe)
  
                 skb_trim(skb, len);
                 ATM_SKB(skb)->vcc = vcc;
-               do_gettimeofday(&skb->stamp);
+               __net_timestamp(skb);
  
                 vcc->push(vcc, skb);
                 atomic_inc(&vcc->stats->rx);
@@ -1340,7 +1340,7 @@ idt77252_rx_raw(struct idt77252_dev *card)
                        ATM_CELL_PAYLOAD);
  
                 ATM_SKB(sb)->vcc = vcc;
-               do_gettimeofday(&sb->stamp);
+               __net_timestamp(sb);
                 vcc->push(vcc, sb);
                 atomic_inc(&vcc->stats->rx);
  
diff --git a/drivers/atm/lanai.c b/drivers/atm/lanai.c

index ffe3afa723b8db98e51e1ac91c39655e5cda6ff2..51ec147872934e902cb8c6ce4c99c6480cd2e0d5 100644 (file)
--- a/drivers/atm/lanai.c
+++ b/drivers/atm/lanai.c
@@ -1427,7 +1427,7 @@ static void vcc_rx_aal5(struct lanai_vcc *lvcc, int endptr)
         skb_put(skb, size);
         vcc_rx_memcpy(skb->data, lvcc, size);
         ATM_SKB(skb)->vcc = lvcc->rx.atmvcc;
-       do_gettimeofday(&skb->stamp);
+       __net_timestamp(skb);
         lvcc->rx.atmvcc->push(lvcc->rx.atmvcc, skb);
         atomic_inc(&lvcc->rx.atmvcc->stats->rx);
      out:
diff --git a/drivers/atm/nicstar.c b/drivers/atm/nicstar.c

index b2a7b754fd1403a4c7dbd4ffe5ab47cd3de70b5e..c57e20dcb0f839004267dc3ffbdc16e95a351403 100644 (file)
--- a/drivers/atm/nicstar.c
+++ b/drivers/atm/nicstar.c
@@ -214,8 +214,7 @@ static int __devinit ns_init_card(int i, struct pci_dev *pcidev);
  static void __devinit ns_init_card_error(ns_dev *card, int error);
  static scq_info *get_scq(int size, u32 scd);
  static void free_scq(scq_info *scq, struct atm_vcc *vcc);
-static void push_rxbufs(ns_dev *card, u32 type, u32 handle1, u32 addr1,
-                       u32 handle2, u32 addr2);
+static void push_rxbufs(ns_dev *, struct sk_buff *);
  static irqreturn_t ns_irq_handler(int irq, void *dev_id, struct pt_regs *regs);
  static int ns_open(struct atm_vcc *vcc);
  static void ns_close(struct atm_vcc *vcc);
@@ -766,6 +765,7 @@ static int __devinit ns_init_card(int i, struct pci_dev *pcidev)
           ns_init_card_error(card, error);
          return error;
        }
+      NS_SKB_CB(hb)->buf_type = BUF_NONE;
        skb_queue_tail(&card->hbpool.queue, hb);
        card->hbpool.count++;
     }
@@ -786,9 +786,10 @@ static int __devinit ns_init_card(int i, struct pci_dev *pcidev)
           ns_init_card_error(card, error);
          return error;
        }
+      NS_SKB_CB(lb)->buf_type = BUF_LG;
        skb_queue_tail(&card->lbpool.queue, lb);
        skb_reserve(lb, NS_SMBUFSIZE);
-      push_rxbufs(card, BUF_LG, (u32) lb, (u32) virt_to_bus(lb->data), 0, 0);
+      push_rxbufs(card, lb);
        /* Due to the implementation of push_rxbufs() this is 1, not 0 */
        if (j == 1)
        {
@@ -822,9 +823,10 @@ static int __devinit ns_init_card(int i, struct pci_dev *pcidev)
           ns_init_card_error(card, error);
          return error;
        }
+      NS_SKB_CB(sb)->buf_type = BUF_SM;
        skb_queue_tail(&card->sbpool.queue, sb);
        skb_reserve(sb, NS_AAL0_HEADER);
-      push_rxbufs(card, BUF_SM, (u32) sb, (u32) virt_to_bus(sb->data), 0, 0);
+      push_rxbufs(card, sb);
     }
     /* Test for strange behaviour which leads to crashes */
     if ((bcount = ns_stat_sfbqc_get(readl(card->membase + STAT))) < card->sbnr.min)
@@ -852,6 +854,7 @@ static int __devinit ns_init_card(int i, struct pci_dev *pcidev)
           ns_init_card_error(card, error);
          return error;
        }
+      NS_SKB_CB(iovb)->buf_type = BUF_NONE;
        skb_queue_tail(&card->iovpool.queue, iovb);
        card->iovpool.count++;
     }
@@ -1078,12 +1081,18 @@ static void free_scq(scq_info *scq, struct atm_vcc *vcc)
  
  /* The handles passed must be pointers to the sk_buff containing the small
     or large buffer(s) cast to u32. */
-static void push_rxbufs(ns_dev *card, u32 type, u32 handle1, u32 addr1,
-                       u32 handle2, u32 addr2)
+static void push_rxbufs(ns_dev *card, struct sk_buff *skb)
  {
+   struct ns_skb_cb *cb = NS_SKB_CB(skb);
+   u32 handle1, addr1;
+   u32 handle2, addr2;
     u32 stat;
     unsigned long flags;
     
+   /* *BARF* */
+   handle2 = addr2 = 0;
+   handle1 = (u32)skb;
+   addr1 = (u32)virt_to_bus(skb->data);
  
  #ifdef GENERAL_DEBUG
     if (!addr1)
@@ -1093,7 +1102,7 @@ static void push_rxbufs(ns_dev *card, u32 type, u32 handle1, u32 addr1,
     stat = readl(card->membase + STAT);
     card->sbfqc = ns_stat_sfbqc_get(stat);
     card->lbfqc = ns_stat_lfbqc_get(stat);
-   if (type == BUF_SM)
+   if (cb->buf_type == BUF_SM)
     {
        if (!addr2)
        {
@@ -1111,7 +1120,7 @@ static void push_rxbufs(ns_dev *card, u32 type, u32 handle1, u32 addr1,
          }
        }      
     }
-   else /* type == BUF_LG */
+   else /* buf_type == BUF_LG */
     {
        if (!addr2)
        {
@@ -1132,26 +1141,26 @@ static void push_rxbufs(ns_dev *card, u32 type, u32 handle1, u32 addr1,
  
     if (addr2)
     {
-      if (type == BUF_SM)
+      if (cb->buf_type == BUF_SM)
        {
           if (card->sbfqc >= card->sbnr.max)
           {
-            skb_unlink((struct sk_buff *) handle1);
+            skb_unlink((struct sk_buff *) handle1, &card->sbpool.queue);
              dev_kfree_skb_any((struct sk_buff *) handle1);
-            skb_unlink((struct sk_buff *) handle2);
+            skb_unlink((struct sk_buff *) handle2, &card->sbpool.queue);
              dev_kfree_skb_any((struct sk_buff *) handle2);
              return;
           }
          else
              card->sbfqc += 2;
        }
-      else /* (type == BUF_LG) */
+      else /* (buf_type == BUF_LG) */
        {
           if (card->lbfqc >= card->lbnr.max)
           {
-            skb_unlink((struct sk_buff *) handle1);
+            skb_unlink((struct sk_buff *) handle1, &card->lbpool.queue);
              dev_kfree_skb_any((struct sk_buff *) handle1);
-            skb_unlink((struct sk_buff *) handle2);
+            skb_unlink((struct sk_buff *) handle2, &card->lbpool.queue);
              dev_kfree_skb_any((struct sk_buff *) handle2);
              return;
           }
@@ -1166,12 +1175,12 @@ static void push_rxbufs(ns_dev *card, u32 type, u32 handle1, u32 addr1,
        writel(handle2, card->membase + DR2);
        writel(addr1, card->membase + DR1);
        writel(handle1, card->membase + DR0);
-      writel(NS_CMD_WRITE_FREEBUFQ | (u32) type, card->membase + CMD);
+      writel(NS_CMD_WRITE_FREEBUFQ | cb->buf_type, card->membase + CMD);
   
        spin_unlock_irqrestore(&card->res_lock, flags);
  
        XPRINTK("nicstar%d: Pushing %s buffers at 0x%x and 0x%x.\n", card->index,
-              (type == BUF_SM ? "small" : "large"), addr1, addr2);
+              (cb->buf_type == BUF_SM ? "small" : "large"), addr1, addr2);
     }
  
     if (!card->efbie && card->sbfqc >= card->sbnr.min &&
@@ -1322,9 +1331,10 @@ static irqreturn_t ns_irq_handler(int irq, void *dev_id, struct pt_regs *regs)
              card->efbie = 0;
              break;
           }
+         NS_SKB_CB(sb)->buf_type = BUF_SM;
           skb_queue_tail(&card->sbpool.queue, sb);
           skb_reserve(sb, NS_AAL0_HEADER);
-         push_rxbufs(card, BUF_SM, (u32) sb, (u32) virt_to_bus(sb->data), 0, 0);
+         push_rxbufs(card, sb);
        }
        card->sbfqc = i;
        process_rsq(card);
@@ -1348,9 +1358,10 @@ static irqreturn_t ns_irq_handler(int irq, void *dev_id, struct pt_regs *regs)
              card->efbie = 0;
              break;
           }
+         NS_SKB_CB(lb)->buf_type = BUF_LG;
           skb_queue_tail(&card->lbpool.queue, lb);
           skb_reserve(lb, NS_SMBUFSIZE);
-         push_rxbufs(card, BUF_LG, (u32) lb, (u32) virt_to_bus(lb->data), 0, 0);
+         push_rxbufs(card, lb);
        }
        card->lbfqc = i;
        process_rsq(card);
@@ -2202,7 +2213,7 @@ static void dequeue_rx(ns_dev *card, ns_rsqe *rsqe)
           memcpy(sb->tail, cell, ATM_CELL_PAYLOAD);
           skb_put(sb, ATM_CELL_PAYLOAD);
           ATM_SKB(sb)->vcc = vcc;
-         do_gettimeofday(&sb->stamp);
+        __net_timestamp(sb);
           vcc->push(vcc, sb);
           atomic_inc(&vcc->stats->rx);
           cell += ATM_CELL_PAYLOAD;
@@ -2227,6 +2238,7 @@ static void dequeue_rx(ns_dev *card, ns_rsqe *rsqe)
              recycle_rx_buf(card, skb);
              return;
          }
+         NS_SKB_CB(iovb)->buf_type = BUF_NONE;
        }
        else
           if (--card->iovpool.count < card->iovnr.min)
@@ -2234,6 +2246,7 @@ static void dequeue_rx(ns_dev *card, ns_rsqe *rsqe)
             struct sk_buff *new_iovb;
             if ((new_iovb = alloc_skb(NS_IOVBUFSIZE, GFP_ATOMIC)) != NULL)
             {
+               NS_SKB_CB(iovb)->buf_type = BUF_NONE;
                 skb_queue_tail(&card->iovpool.queue, new_iovb);
                 card->iovpool.count++;
             }
@@ -2264,7 +2277,7 @@ static void dequeue_rx(ns_dev *card, ns_rsqe *rsqe)
  
     if (NS_SKB(iovb)->iovcnt == 1)
     {
-      if (skb->list != &card->sbpool.queue)
+      if (NS_SKB_CB(skb)->buf_type != BUF_SM)
        {
           printk("nicstar%d: Expected a small buffer, and this is not one.\n",
                 card->index);
@@ -2278,7 +2291,7 @@ static void dequeue_rx(ns_dev *card, ns_rsqe *rsqe)
     }
     else /* NS_SKB(iovb)->iovcnt >= 2 */
     {
-      if (skb->list != &card->lbpool.queue)
+      if (NS_SKB_CB(skb)->buf_type != BUF_LG)
        {
           printk("nicstar%d: Expected a large buffer, and this is not one.\n",
                 card->index);
@@ -2322,8 +2335,7 @@ static void dequeue_rx(ns_dev *card, ns_rsqe *rsqe)
           /* skb points to a small buffer */
           if (!atm_charge(vcc, skb->truesize))
           {
-            push_rxbufs(card, BUF_SM, (u32) skb, (u32) virt_to_bus(skb->data),
-                        0, 0);
+            push_rxbufs(card, skb);
              atomic_inc(&vcc->stats->rx_drop);
           }
           else
@@ -2334,7 +2346,7 @@ static void dequeue_rx(ns_dev *card, ns_rsqe *rsqe)
              skb->destructor = ns_sb_destructor;
  #endif /* NS_USE_DESTRUCTORS */
              ATM_SKB(skb)->vcc = vcc;
-            do_gettimeofday(&skb->stamp);
+           __net_timestamp(skb);
              vcc->push(vcc, skb);
              atomic_inc(&vcc->stats->rx);
           }
@@ -2350,8 +2362,7 @@ static void dequeue_rx(ns_dev *card, ns_rsqe *rsqe)
          {
              if (!atm_charge(vcc, sb->truesize))
              {
-               push_rxbufs(card, BUF_SM, (u32) sb, (u32) virt_to_bus(sb->data),
-                           0, 0);
+               push_rxbufs(card, sb);
                 atomic_inc(&vcc->stats->rx_drop);
              }
              else
@@ -2362,21 +2373,19 @@ static void dequeue_rx(ns_dev *card, ns_rsqe *rsqe)
                 sb->destructor = ns_sb_destructor;
  #endif /* NS_USE_DESTRUCTORS */
                 ATM_SKB(sb)->vcc = vcc;
-               do_gettimeofday(&sb->stamp);
+              __net_timestamp(sb);
                 vcc->push(vcc, sb);
                 atomic_inc(&vcc->stats->rx);
              }
  
-            push_rxbufs(card, BUF_LG, (u32) skb,
-                          (u32) virt_to_bus(skb->data), 0, 0);
+            push_rxbufs(card, skb);
  
          }
          else                   /* len > NS_SMBUFSIZE, the usual case */
          {
              if (!atm_charge(vcc, skb->truesize))
              {
-               push_rxbufs(card, BUF_LG, (u32) skb,
-                           (u32) virt_to_bus(skb->data), 0, 0);
+               push_rxbufs(card, skb);
                 atomic_inc(&vcc->stats->rx_drop);
              }
              else
@@ -2389,13 +2398,12 @@ static void dequeue_rx(ns_dev *card, ns_rsqe *rsqe)
                 memcpy(skb->data, sb->data, NS_SMBUFSIZE);
                 skb_put(skb, len - NS_SMBUFSIZE);
                 ATM_SKB(skb)->vcc = vcc;
-               do_gettimeofday(&skb->stamp);
+              __net_timestamp(skb);
                 vcc->push(vcc, skb);
                 atomic_inc(&vcc->stats->rx);
              }
  
-            push_rxbufs(card, BUF_SM, (u32) sb, (u32) virt_to_bus(sb->data),
-                        0, 0);
+            push_rxbufs(card, sb);
  
           }
          
@@ -2430,6 +2438,7 @@ static void dequeue_rx(ns_dev *card, ns_rsqe *rsqe)
                    card->hbpool.count++;
                 }
              }
+            NS_SKB_CB(hb)->buf_type = BUF_NONE;
          }
          else
           if (--card->hbpool.count < card->hbnr.min)
@@ -2437,6 +2446,7 @@ static void dequeue_rx(ns_dev *card, ns_rsqe *rsqe)
              struct sk_buff *new_hb;
              if ((new_hb = dev_alloc_skb(NS_HBUFSIZE)) != NULL)
              {
+               NS_SKB_CB(new_hb)->buf_type = BUF_NONE;
                 skb_queue_tail(&card->hbpool.queue, new_hb);
                 card->hbpool.count++;
              }
@@ -2444,6 +2454,7 @@ static void dequeue_rx(ns_dev *card, ns_rsqe *rsqe)
             {
                 if ((new_hb = dev_alloc_skb(NS_HBUFSIZE)) != NULL)
                 {
+                  NS_SKB_CB(new_hb)->buf_type = BUF_NONE;
                    skb_queue_tail(&card->hbpool.queue, new_hb);
                    card->hbpool.count++;
                 }
@@ -2473,8 +2484,7 @@ static void dequeue_rx(ns_dev *card, ns_rsqe *rsqe)
              remaining = len - iov->iov_len;
              iov++;
              /* Free the small buffer */
-            push_rxbufs(card, BUF_SM, (u32) sb, (u32) virt_to_bus(sb->data),
-                        0, 0);
+            push_rxbufs(card, sb);
  
              /* Copy all large buffers to the huge buffer and free them */
              for (j = 1; j < NS_SKB(iovb)->iovcnt; j++)
@@ -2485,8 +2495,7 @@ static void dequeue_rx(ns_dev *card, ns_rsqe *rsqe)
                 skb_put(hb, tocopy);
                 iov++;
                 remaining -= tocopy;
-               push_rxbufs(card, BUF_LG, (u32) lb,
-                           (u32) virt_to_bus(lb->data), 0, 0);
+               push_rxbufs(card, lb);
              }
  #ifdef EXTRA_DEBUG
              if (remaining != 0 || hb->len != len)
@@ -2496,7 +2505,7 @@ static void dequeue_rx(ns_dev *card, ns_rsqe *rsqe)
  #ifdef NS_USE_DESTRUCTORS
              hb->destructor = ns_hb_destructor;
  #endif /* NS_USE_DESTRUCTORS */
-            do_gettimeofday(&hb->stamp);
+           __net_timestamp(hb);
              vcc->push(vcc, hb);
              atomic_inc(&vcc->stats->rx);
           }
@@ -2527,9 +2536,10 @@ static void ns_sb_destructor(struct sk_buff *sb)
        sb = __dev_alloc_skb(NS_SMSKBSIZE, GFP_KERNEL);
        if (sb == NULL)
           break;
+      NS_SKB_CB(sb)->buf_type = BUF_SM;
        skb_queue_tail(&card->sbpool.queue, sb);
        skb_reserve(sb, NS_AAL0_HEADER);
-      push_rxbufs(card, BUF_SM, (u32) sb, (u32) virt_to_bus(sb->data), 0, 0);
+      push_rxbufs(card, sb);
     } while (card->sbfqc < card->sbnr.min);
  }
  
@@ -2550,9 +2560,10 @@ static void ns_lb_destructor(struct sk_buff *lb)
        lb = __dev_alloc_skb(NS_LGSKBSIZE, GFP_KERNEL);
        if (lb == NULL)
           break;
+      NS_SKB_CB(lb)->buf_type = BUF_LG;
        skb_queue_tail(&card->lbpool.queue, lb);
        skb_reserve(lb, NS_SMBUFSIZE);
-      push_rxbufs(card, BUF_LG, (u32) lb, (u32) virt_to_bus(lb->data), 0, 0);
+      push_rxbufs(card, lb);
     } while (card->lbfqc < card->lbnr.min);
  }
  
@@ -2569,6 +2580,7 @@ static void ns_hb_destructor(struct sk_buff *hb)
        hb = __dev_alloc_skb(NS_HBUFSIZE, GFP_KERNEL);
        if (hb == NULL)
           break;
+      NS_SKB_CB(hb)->buf_type = BUF_NONE;
        skb_queue_tail(&card->hbpool.queue, hb);
        card->hbpool.count++;
     }
@@ -2577,45 +2589,25 @@ static void ns_hb_destructor(struct sk_buff *hb)
  #endif /* NS_USE_DESTRUCTORS */
  
  
-
  static void recycle_rx_buf(ns_dev *card, struct sk_buff *skb)
  {
-   if (skb->list == &card->sbpool.queue)
-      push_rxbufs(card, BUF_SM, (u32) skb, (u32) virt_to_bus(skb->data), 0, 0);
-   else if (skb->list == &card->lbpool.queue)
-      push_rxbufs(card, BUF_LG, (u32) skb, (u32) virt_to_bus(skb->data), 0, 0);
-   else
-   {
-      printk("nicstar%d: What kind of rx buffer is this?\n", card->index);
-      dev_kfree_skb_any(skb);
-   }
-}
+       struct ns_skb_cb *cb = NS_SKB_CB(skb);
  
+       if (unlikely(cb->buf_type == BUF_NONE)) {
+               printk("nicstar%d: What kind of rx buffer is this?\n", card->index);
+               dev_kfree_skb_any(skb);
+       } else
+               push_rxbufs(card, skb);
+}
  
  
  static void recycle_iovec_rx_bufs(ns_dev *card, struct iovec *iov, int count)
  {
-   struct sk_buff *skb;
-
-   for (; count > 0; count--)
-   {
-      skb = (struct sk_buff *) (iov++)->iov_base;
-      if (skb->list == &card->sbpool.queue)
-         push_rxbufs(card, BUF_SM, (u32) skb, (u32) virt_to_bus(skb->data),
-                    0, 0);
-      else if (skb->list == &card->lbpool.queue)
-         push_rxbufs(card, BUF_LG, (u32) skb, (u32) virt_to_bus(skb->data),
-                    0, 0);
-      else
-      {
-         printk("nicstar%d: What kind of rx buffer is this?\n", card->index);
-         dev_kfree_skb_any(skb);
-      }
-   }
+       while (count-- > 0)
+               recycle_rx_buf(card, (struct sk_buff *) (iov++)->iov_base);
  }
  
  
-
  static void recycle_iov_buf(ns_dev *card, struct sk_buff *iovb)
  {
     if (card->iovpool.count < card->iovnr.max)
@@ -2631,7 +2623,7 @@ static void recycle_iov_buf(ns_dev *card, struct sk_buff *iovb)
  
  static void dequeue_sm_buf(ns_dev *card, struct sk_buff *sb)
  {
-   skb_unlink(sb);
+   skb_unlink(sb, &card->sbpool.queue);
  #ifdef NS_USE_DESTRUCTORS
     if (card->sbfqc < card->sbnr.min)
  #else
@@ -2640,10 +2632,10 @@ static void dequeue_sm_buf(ns_dev *card, struct sk_buff *sb)
        struct sk_buff *new_sb;
        if ((new_sb = dev_alloc_skb(NS_SMSKBSIZE)) != NULL)
        {
+         NS_SKB_CB(new_sb)->buf_type = BUF_SM;
           skb_queue_tail(&card->sbpool.queue, new_sb);
           skb_reserve(new_sb, NS_AAL0_HEADER);
-         push_rxbufs(card, BUF_SM, (u32) new_sb,
-                     (u32) virt_to_bus(new_sb->data), 0, 0);
+         push_rxbufs(card, new_sb);
        }
     }
     if (card->sbfqc < card->sbnr.init)
@@ -2652,10 +2644,10 @@ static void dequeue_sm_buf(ns_dev *card, struct sk_buff *sb)
        struct sk_buff *new_sb;
        if ((new_sb = dev_alloc_skb(NS_SMSKBSIZE)) != NULL)
        {
+         NS_SKB_CB(new_sb)->buf_type = BUF_SM;
           skb_queue_tail(&card->sbpool.queue, new_sb);
           skb_reserve(new_sb, NS_AAL0_HEADER);
-         push_rxbufs(card, BUF_SM, (u32) new_sb,
-                     (u32) virt_to_bus(new_sb->data), 0, 0);
+         push_rxbufs(card, new_sb);
        }
     }
  }
@@ -2664,7 +2656,7 @@ static void dequeue_sm_buf(ns_dev *card, struct sk_buff *sb)
  
  static void dequeue_lg_buf(ns_dev *card, struct sk_buff *lb)
  {
-   skb_unlink(lb);
+   skb_unlink(lb, &card->lbpool.queue);
  #ifdef NS_USE_DESTRUCTORS
     if (card->lbfqc < card->lbnr.min)
  #else
@@ -2673,10 +2665,10 @@ static void dequeue_lg_buf(ns_dev *card, struct sk_buff *lb)
        struct sk_buff *new_lb;
        if ((new_lb = dev_alloc_skb(NS_LGSKBSIZE)) != NULL)
        {
+         NS_SKB_CB(new_lb)->buf_type = BUF_LG;
           skb_queue_tail(&card->lbpool.queue, new_lb);
           skb_reserve(new_lb, NS_SMBUFSIZE);
-         push_rxbufs(card, BUF_LG, (u32) new_lb,
-                     (u32) virt_to_bus(new_lb->data), 0, 0);
+         push_rxbufs(card, new_lb);
        }
     }
     if (card->lbfqc < card->lbnr.init)
@@ -2685,10 +2677,10 @@ static void dequeue_lg_buf(ns_dev *card, struct sk_buff *lb)
        struct sk_buff *new_lb;
        if ((new_lb = dev_alloc_skb(NS_LGSKBSIZE)) != NULL)
        {
+         NS_SKB_CB(new_lb)->buf_type = BUF_LG;
           skb_queue_tail(&card->lbpool.queue, new_lb);
           skb_reserve(new_lb, NS_SMBUFSIZE);
-         push_rxbufs(card, BUF_LG, (u32) new_lb,
-                     (u32) virt_to_bus(new_lb->data), 0, 0);
+         push_rxbufs(card, new_lb);
        }
     }
  }
@@ -2880,9 +2872,10 @@ static int ns_ioctl(struct atm_dev *dev, unsigned int cmd, void __user *arg)
                    sb = __dev_alloc_skb(NS_SMSKBSIZE, GFP_KERNEL);
                    if (sb == NULL)
                       return -ENOMEM;
+                  NS_SKB_CB(sb)->buf_type = BUF_SM;
                    skb_queue_tail(&card->sbpool.queue, sb);
                    skb_reserve(sb, NS_AAL0_HEADER);
-                  push_rxbufs(card, BUF_SM, (u32) sb, (u32) virt_to_bus(sb->data), 0, 0);
+                  push_rxbufs(card, sb);
                }
                break;
  
@@ -2894,9 +2887,10 @@ static int ns_ioctl(struct atm_dev *dev, unsigned int cmd, void __user *arg)
                    lb = __dev_alloc_skb(NS_LGSKBSIZE, GFP_KERNEL);
                    if (lb == NULL)
                       return -ENOMEM;
+                  NS_SKB_CB(lb)->buf_type = BUF_LG;
                    skb_queue_tail(&card->lbpool.queue, lb);
                    skb_reserve(lb, NS_SMBUFSIZE);
-                  push_rxbufs(card, BUF_LG, (u32) lb, (u32) virt_to_bus(lb->data), 0, 0);
+                  push_rxbufs(card, lb);
                }
                break;
  
@@ -2923,6 +2917,7 @@ static int ns_ioctl(struct atm_dev *dev, unsigned int cmd, void __user *arg)
                    hb = __dev_alloc_skb(NS_HBUFSIZE, GFP_KERNEL);
                    if (hb == NULL)
                       return -ENOMEM;
+                  NS_SKB_CB(hb)->buf_type = BUF_NONE;
                    ns_grab_int_lock(card, flags);
                    skb_queue_tail(&card->hbpool.queue, hb);
                    card->hbpool.count++;
@@ -2953,6 +2948,7 @@ static int ns_ioctl(struct atm_dev *dev, unsigned int cmd, void __user *arg)
                    iovb = alloc_skb(NS_IOVBUFSIZE, GFP_KERNEL);
                    if (iovb == NULL)
                       return -ENOMEM;
+                  NS_SKB_CB(iovb)->buf_type = BUF_NONE;
                    ns_grab_int_lock(card, flags);
                    skb_queue_tail(&card->iovpool.queue, iovb);
                    card->iovpool.count++;
@@ -2979,17 +2975,12 @@ static int ns_ioctl(struct atm_dev *dev, unsigned int cmd, void __user *arg)
  }
  
  
-
  static void which_list(ns_dev *card, struct sk_buff *skb)
  {
-   printk("It's a %s buffer.\n", skb->list == &card->sbpool.queue ?
-          "small" : skb->list == &card->lbpool.queue ? "large" :
-         skb->list == &card->hbpool.queue ? "huge" :
-         skb->list == &card->iovpool.queue ? "iovec" : "unknown");
+       printk("skb buf_type: 0x%08x\n", NS_SKB_CB(skb)->buf_type);
  }
  
  
-
  static void ns_poll(unsigned long arg)
  {
     int i;
diff --git a/drivers/atm/nicstar.h b/drivers/atm/nicstar.h

index ea83c46c8ba58b1c63eadceb973d52e45ed816f4..5997bcb45b59492486bafb4d951b6171a6a01049 100644 (file)
--- a/drivers/atm/nicstar.h
+++ b/drivers/atm/nicstar.h
@@ -103,8 +103,14 @@
  
  #define NS_IOREMAP_SIZE 4096
  
-#define BUF_SM 0x00000000      /* These two are used for push_rxbufs() */
-#define BUF_LG 0x00000001       /* CMD, Write_FreeBufQ, LBUF bit */
+/*
+ * BUF_XX distinguish the Rx buffers depending on their (small/large) size.
+ * BUG_SM and BUG_LG are both used by the driver and the device.
+ * BUF_NONE is only used by the driver.
+ */
+#define BUF_SM         0x00000000      /* These two are used for push_rxbufs() */
+#define BUF_LG         0x00000001      /* CMD, Write_FreeBufQ, LBUF bit */
+#define BUF_NONE       0xffffffff      /* Software only: */
  
  #define NS_HBUFSIZE 65568      /* Size of max. AAL5 PDU */
  #define NS_MAX_IOVECS (2 + (65568 - NS_SMBUFSIZE) / \
@@ -684,6 +690,12 @@ enum ns_regs
  /* Device driver structures ***************************************************/
  
  
+struct ns_skb_cb {
+       u32 buf_type;                   /* BUF_SM/BUF_LG/BUF_NONE */
+};
+
+#define NS_SKB_CB(skb) ((struct ns_skb_cb *)((skb)->cb))
+
  typedef struct tsq_info
  {
     void *org;
diff --git a/drivers/atm/zatm.c b/drivers/atm/zatm.c

index a2b236a966e0599285698160e44a56161f94be5a..c4b75ecf9460442d3dea8fba8ce0628cb315b801 100644 (file)
--- a/drivers/atm/zatm.c
+++ b/drivers/atm/zatm.c
@@ -400,7 +400,7 @@ unsigned long *x;
  EVENT("error code 0x%x/0x%x\n",(here[3] & uPD98401_AAL5_ES) >>
    uPD98401_AAL5_ES_SHIFT,error);
                 skb = ((struct rx_buffer_head *) bus_to_virt(here[2]))->skb;
-               do_gettimeofday(&skb->stamp);
+               __net_timestamp(skb);
  #if 0
  printk("[-3..0] 0x%08lx 0x%08lx 0x%08lx 0x%08lx\n",((unsigned *) skb->data)[-3],
    ((unsigned *) skb->data)[-2],((unsigned *) skb->data)[-1],
@@ -417,10 +417,12 @@ printk("dummy: 0x%08lx, 0x%08lx\n",dummy[0],dummy[1]);
                 chan = (here[3] & uPD98401_AAL5_CHAN) >>
                     uPD98401_AAL5_CHAN_SHIFT;
                 if (chan < zatm_dev->chans && zatm_dev->rx_map[chan]) {
+                       int pos = ZATM_VCC(vcc)->pool;
+
                         vcc = zatm_dev->rx_map[chan];
-                       if (skb == zatm_dev->last_free[ZATM_VCC(vcc)->pool])
-                               zatm_dev->last_free[ZATM_VCC(vcc)->pool] = NULL;
-                       skb_unlink(skb);
+                       if (skb == zatm_dev->last_free[pos])
+                               zatm_dev->last_free[pos] = NULL;
+                       skb_unlink(skb, zatm_dev->pool + pos);
                 }
                 else {
                         printk(KERN_ERR DEV_LABEL "(itf %d): RX indication "
diff --git a/drivers/base/bus.c b/drivers/base/bus.c

index 96fe2f956754a38a2bfcde9299fe9b5223109466..ab53832d57e5eff507b8e4552983446641f02d51 100644 (file)
--- a/drivers/base/bus.c
+++ b/drivers/base/bus.c
@@ -180,7 +180,9 @@ static ssize_t driver_bind(struct device_driver *drv,
                 up(&dev->sem);
                 put_device(dev);
         }
-       return err;
+       if (err)
+               return err;
+       return count;
  }
  static DRIVER_ATTR(bind, S_IWUSR, NULL, driver_bind);
  
diff --git a/drivers/base/class.c b/drivers/base/class.c

index 479c12570881374f9bdb6fafb172879b583b20bc..0154a1623b2198965be09dbf4bd9552b5525ba4e 100644 (file)
--- a/drivers/base/class.c
+++ b/drivers/base/class.c
@@ -299,6 +299,11 @@ static void class_dev_release(struct kobject * kobj)
  
         pr_debug("device class '%s': release.\n", cd->class_id);
  
+       if (cd->devt_attr) {
+               kfree(cd->devt_attr);
+               cd->devt_attr = NULL;
+       }
+
         if (cls->release)
                 cls->release(cd);
         else {
@@ -591,11 +596,8 @@ void class_device_del(struct class_device *class_dev)
  
         if (class_dev->dev)
                 sysfs_remove_link(&class_dev->kobj, "device");
-       if (class_dev->devt_attr) {
+       if (class_dev->devt_attr)
                 class_device_remove_file(class_dev, class_dev->devt_attr);
-               kfree(class_dev->devt_attr);
-               class_dev->devt_attr = NULL;
-       }
         class_device_remove_attrs(class_dev);
  
         kobject_hotplug(&class_dev->kobj, KOBJ_REMOVE);
diff --git a/drivers/block/aoe/aoenet.c b/drivers/block/aoe/aoenet.c

index 9e6f51c528b094684c2b709c55be738c3d361225..4be976940f6971f28bc03b18bab09312c0be9957 100644 (file)
--- a/drivers/block/aoe/aoenet.c
+++ b/drivers/block/aoe/aoenet.c
@@ -120,7 +120,7 @@ aoenet_xmit(struct sk_buff *sl)
   * (1) len doesn't include the header by default.  I want this. 
   */
  static int
-aoenet_rcv(struct sk_buff *skb, struct net_device *ifp, struct packet_type *pt)
+aoenet_rcv(struct sk_buff *skb, struct net_device *ifp, struct packet_type *pt, struct net_device *orig_dev)
  {
         struct aoe_hdr *h;
         u32 n;
diff --git a/drivers/block/cfq-iosched.c b/drivers/block/cfq-iosched.c

index de5746e38af935a01c45a866ae7598d24108a6ac..cd056e7e64ec15d5cf81f16068623e29e07ad187 100644 (file)
--- a/drivers/block/cfq-iosched.c
+++ b/drivers/block/cfq-iosched.c
@@ -47,7 +47,7 @@ static int cfq_slice_idle = HZ / 100;
  /*
   * disable queueing at the driver/hardware level
   */
-static int cfq_max_depth = 1;
+static int cfq_max_depth = 2;
  
  /*
   * for the hash of cfqq inside the cfqd
@@ -385,9 +385,15 @@ cfq_choose_req(struct cfq_data *cfqd, struct cfq_rq *crq1, struct cfq_rq *crq2)
                 return crq2;
         if (crq2 == NULL)
                 return crq1;
-       if (cfq_crq_requeued(crq1))
+
+       if (cfq_crq_requeued(crq1) && !cfq_crq_requeued(crq2))
                 return crq1;
-       if (cfq_crq_requeued(crq2))
+       else if (cfq_crq_requeued(crq2) && !cfq_crq_requeued(crq1))
+               return crq2;
+
+       if (cfq_crq_is_sync(crq1) && !cfq_crq_is_sync(crq2))
+               return crq1;
+       else if (cfq_crq_is_sync(crq2) && !cfq_crq_is_sync(crq1))
                 return crq2;
  
         s1 = crq1->request->sector;
@@ -1281,6 +1287,7 @@ dispatch:
                          */
                         if (!cfq_crq_in_driver(crq) &&
                             !cfq_cfqq_idle_window(cfqq) &&
+                           !blk_barrier_rq(rq) &&
                             cfqd->rq_in_driver >= cfqd->cfq_max_depth)
                                 return NULL;
  
@@ -1768,18 +1775,23 @@ static void
  cfq_crq_enqueued(struct cfq_data *cfqd, struct cfq_queue *cfqq,
                  struct cfq_rq *crq)
  {
-       const int sync = cfq_crq_is_sync(crq);
+       struct cfq_io_context *cic;
  
         cfqq->next_crq = cfq_choose_req(cfqd, cfqq->next_crq, crq);
  
-       if (sync) {
-               struct cfq_io_context *cic = crq->io_context;
+       /*
+        * we never wait for an async request and we don't allow preemption
+        * of an async request. so just return early
+        */
+       if (!cfq_crq_is_sync(crq))
+               return;
  
-               cfq_update_io_thinktime(cfqd, cic);
-               cfq_update_idle_window(cfqd, cfqq, cic);
+       cic = crq->io_context;
  
-               cic->last_queue = jiffies;
-       }
+       cfq_update_io_thinktime(cfqd, cic);
+       cfq_update_idle_window(cfqd, cfqq, cic);
+
+       cic->last_queue = jiffies;
  
         if (cfqq == cfqd->active_queue) {
                 /*
diff --git a/drivers/block/ll_rw_blk.c b/drivers/block/ll_rw_blk.c

index 692a5fced76e448819f4e649cd5576a066deae4f..3c818544475ea857d249f1afc8f259028c163a2b 100644 (file)
--- a/drivers/block/ll_rw_blk.c
+++ b/drivers/block/ll_rw_blk.c
@@ -719,7 +719,7 @@ struct request *blk_queue_find_tag(request_queue_t *q, int tag)
  {
         struct blk_queue_tag *bqt = q->queue_tags;
  
-       if (unlikely(bqt == NULL || tag >= bqt->max_depth))
+       if (unlikely(bqt == NULL || tag >= bqt->real_max_depth))
                 return NULL;
  
         return bqt->tag_index[tag];
@@ -798,6 +798,7 @@ init_tag_map(request_queue_t *q, struct blk_queue_tag *tags, int depth)
  
         memset(tag_index, 0, depth * sizeof(struct request *));
         memset(tag_map, 0, nr_ulongs * sizeof(unsigned long));
+       tags->real_max_depth = depth;
         tags->max_depth = depth;
         tags->tag_index = tag_index;
         tags->tag_map = tag_map;
@@ -871,12 +872,23 @@ int blk_queue_resize_tags(request_queue_t *q, int new_depth)
         if (!bqt)
                 return -ENXIO;
  
+       /*
+        * if we already have large enough real_max_depth.  just
+        * adjust max_depth.  *NOTE* as requests with tag value
+        * between new_depth and real_max_depth can be in-flight, tag
+        * map can not be shrunk blindly here.
+        */
+       if (new_depth <= bqt->real_max_depth) {
+               bqt->max_depth = new_depth;
+               return 0;
+       }
+
         /*
          * save the old state info, so we can copy it back
          */
         tag_index = bqt->tag_index;
         tag_map = bqt->tag_map;
-       max_depth = bqt->max_depth;
+       max_depth = bqt->real_max_depth;
  
         if (init_tag_map(q, bqt, new_depth))
                 return -ENOMEM;
@@ -913,7 +925,7 @@ void blk_queue_end_tag(request_queue_t *q, struct request *rq)
  
         BUG_ON(tag == -1);
  
-       if (unlikely(tag >= bqt->max_depth))
+       if (unlikely(tag >= bqt->real_max_depth))
                 /*
                  * This can happen after tag depth has been reduced.
                  * FIXME: how about a warning or info message here?
diff --git a/drivers/block/viodasd.c b/drivers/block/viodasd.c

index 46e56a25d2c8875966462bef4c53a225ac572090..e46ecd23b3ac74d6854c2839484e392326a71e6e 100644 (file)
--- a/drivers/block/viodasd.c
+++ b/drivers/block/viodasd.c
@@ -776,7 +776,7 @@ static int viodasd_remove(struct vio_dev *vdev)
   */
  static struct vio_device_id viodasd_device_table[] __devinitdata = {
         { "viodasd", "" },
-       { 0, }
+       { "", "" }
  };
  
  MODULE_DEVICE_TABLE(vio, viodasd_device_table);
diff --git a/drivers/bluetooth/bfusb.c b/drivers/bluetooth/bfusb.c

index c42d7e6ac1c5858c851096f38ff6afe65185ecc4..1e9db0156ea7d5e574f62cf08283fd5bd19f45f3 100644 (file)
--- a/drivers/bluetooth/bfusb.c
+++ b/drivers/bluetooth/bfusb.c
@@ -158,7 +158,7 @@ static int bfusb_send_bulk(struct bfusb *bfusb, struct sk_buff *skb)
         if (err) {
                 BT_ERR("%s bulk tx submit failed urb %p err %d", 
                                         bfusb->hdev->name, urb, err);
-               skb_unlink(skb);
+               skb_unlink(skb, &bfusb->pending_q);
                 usb_free_urb(urb);
         } else
                 atomic_inc(&bfusb->pending_tx);
@@ -212,7 +212,7 @@ static void bfusb_tx_complete(struct urb *urb, struct pt_regs *regs)
  
         read_lock(&bfusb->lock);
  
-       skb_unlink(skb);
+       skb_unlink(skb, &bfusb->pending_q);
         skb_queue_tail(&bfusb->completed_q, skb);
  
         bfusb_tx_wakeup(bfusb);
@@ -253,7 +253,7 @@ static int bfusb_rx_submit(struct bfusb *bfusb, struct urb *urb)
         if (err) {
                 BT_ERR("%s bulk rx submit failed urb %p err %d",
                                         bfusb->hdev->name, urb, err);
-               skb_unlink(skb);
+               skb_unlink(skb, &bfusb->pending_q);
                 kfree_skb(skb);
                 usb_free_urb(urb);
         }
@@ -330,7 +330,7 @@ static inline int bfusb_recv_block(struct bfusb *bfusb, int hdr, unsigned char *
                 }
  
                 skb->dev = (void *) bfusb->hdev;
-               skb->pkt_type = pkt_type;
+               bt_cb(skb)->pkt_type = pkt_type;
  
                 bfusb->reassembly = skb;
         } else {
@@ -398,7 +398,7 @@ static void bfusb_rx_complete(struct urb *urb, struct pt_regs *regs)
                 buf   += len;
         }
  
-       skb_unlink(skb);
+       skb_unlink(skb, &bfusb->pending_q);
         kfree_skb(skb);
  
         bfusb_rx_submit(bfusb, urb);
@@ -485,7 +485,7 @@ static int bfusb_send_frame(struct sk_buff *skb)
         unsigned char buf[3];
         int sent = 0, size, count;
  
-       BT_DBG("hdev %p skb %p type %d len %d", hdev, skb, skb->pkt_type, skb->len);
+       BT_DBG("hdev %p skb %p type %d len %d", hdev, skb, bt_cb(skb)->pkt_type, skb->len);
  
         if (!hdev) {
                 BT_ERR("Frame for unknown HCI device (hdev=NULL)");
@@ -497,7 +497,7 @@ static int bfusb_send_frame(struct sk_buff *skb)
  
         bfusb = (struct bfusb *) hdev->driver_data;
  
-       switch (skb->pkt_type) {
+       switch (bt_cb(skb)->pkt_type) {
         case HCI_COMMAND_PKT:
                 hdev->stat.cmd_tx++;
                 break;
@@ -510,7 +510,7 @@ static int bfusb_send_frame(struct sk_buff *skb)
         };
  
         /* Prepend skb with frame type */
-       memcpy(skb_push(skb, 1), &(skb->pkt_type), 1);
+       memcpy(skb_push(skb, 1), &bt_cb(skb)->pkt_type, 1);
  
         count = skb->len;
  
diff --git a/drivers/bluetooth/bluecard_cs.c b/drivers/bluetooth/bluecard_cs.c

index bd2ec7e284cc790db66922cef1335b2479320286..26fe9c0e1d20e4c1ef51c47428cf5672ae5db56c 100644 (file)
--- a/drivers/bluetooth/bluecard_cs.c
+++ b/drivers/bluetooth/bluecard_cs.c
@@ -270,7 +270,7 @@ static void bluecard_write_wakeup(bluecard_info_t *info)
                 if (!(skb = skb_dequeue(&(info->txq))))
                         break;
  
-               if (skb->pkt_type & 0x80) {
+               if (bt_cb(skb)->pkt_type & 0x80) {
                         /* Disable RTS */
                         info->ctrl_reg |= REG_CONTROL_RTS;
                         outb(info->ctrl_reg, iobase + REG_CONTROL);
@@ -288,13 +288,13 @@ static void bluecard_write_wakeup(bluecard_info_t *info)
                 /* Mark the buffer as dirty */
                 clear_bit(ready_bit, &(info->tx_state));
  
-               if (skb->pkt_type & 0x80) {
+               if (bt_cb(skb)->pkt_type & 0x80) {
                         DECLARE_WAIT_QUEUE_HEAD(wq);
                         DEFINE_WAIT(wait);
  
                         unsigned char baud_reg;
  
-                       switch (skb->pkt_type) {
+                       switch (bt_cb(skb)->pkt_type) {
                         case PKT_BAUD_RATE_460800:
                                 baud_reg = REG_CONTROL_BAUD_RATE_460800;
                                 break;
@@ -410,9 +410,9 @@ static void bluecard_receive(bluecard_info_t *info, unsigned int offset)
                 if (info->rx_state == RECV_WAIT_PACKET_TYPE) {
  
                         info->rx_skb->dev = (void *) info->hdev;
-                       info->rx_skb->pkt_type = buf[i];
+                       bt_cb(info->rx_skb)->pkt_type = buf[i];
  
-                       switch (info->rx_skb->pkt_type) {
+                       switch (bt_cb(info->rx_skb)->pkt_type) {
  
                         case 0x00:
                                 /* init packet */
@@ -444,7 +444,7 @@ static void bluecard_receive(bluecard_info_t *info, unsigned int offset)
  
                         default:
                                 /* unknown packet */
-                               BT_ERR("Unknown HCI packet with type 0x%02x received", info->rx_skb->pkt_type);
+                               BT_ERR("Unknown HCI packet with type 0x%02x received", bt_cb(info->rx_skb)->pkt_type);
                                 info->hdev->stat.err_rx++;
  
                                 kfree_skb(info->rx_skb);
@@ -586,21 +586,21 @@ static int bluecard_hci_set_baud_rate(struct hci_dev *hdev, int baud)
         switch (baud) {
         case 460800:
                 cmd[4] = 0x00;
-               skb->pkt_type = PKT_BAUD_RATE_460800;
+               bt_cb(skb)->pkt_type = PKT_BAUD_RATE_460800;
                 break;
         case 230400:
                 cmd[4] = 0x01;
-               skb->pkt_type = PKT_BAUD_RATE_230400;
+               bt_cb(skb)->pkt_type = PKT_BAUD_RATE_230400;
                 break;
         case 115200:
                 cmd[4] = 0x02;
-               skb->pkt_type = PKT_BAUD_RATE_115200;
+               bt_cb(skb)->pkt_type = PKT_BAUD_RATE_115200;
                 break;
         case 57600:
                 /* Fall through... */
         default:
                 cmd[4] = 0x03;
-               skb->pkt_type = PKT_BAUD_RATE_57600;
+               bt_cb(skb)->pkt_type = PKT_BAUD_RATE_57600;
                 break;
         }
  
@@ -680,7 +680,7 @@ static int bluecard_hci_send_frame(struct sk_buff *skb)
  
         info = (bluecard_info_t *)(hdev->driver_data);
  
-       switch (skb->pkt_type) {
+       switch (bt_cb(skb)->pkt_type) {
         case HCI_COMMAND_PKT:
                 hdev->stat.cmd_tx++;
                 break;
@@ -693,7 +693,7 @@ static int bluecard_hci_send_frame(struct sk_buff *skb)
         };
  
         /* Prepend skb with frame type */
-       memcpy(skb_push(skb, 1), &(skb->pkt_type), 1);
+       memcpy(skb_push(skb, 1), &bt_cb(skb)->pkt_type, 1);
         skb_queue_tail(&(info->txq), skb);
  
         bluecard_write_wakeup(info);
diff --git a/drivers/bluetooth/bpa10x.c b/drivers/bluetooth/bpa10x.c

index 2771c861f185295e120270cc0ac63fd0131df201..a1bf8f066c888c227e546d982ae63a0978ef8e7f 100644 (file)
--- a/drivers/bluetooth/bpa10x.c
+++ b/drivers/bluetooth/bpa10x.c
@@ -105,7 +105,7 @@ static void bpa10x_recv_bulk(struct bpa10x_data *data, unsigned char *buf, int c
                         if (skb) {
                                 memcpy(skb_put(skb, len), buf, len);
                                 skb->dev = (void *) data->hdev;
-                               skb->pkt_type = HCI_ACLDATA_PKT;
+                               bt_cb(skb)->pkt_type = HCI_ACLDATA_PKT;
                                 hci_recv_frame(skb);
                         }
                         break;
@@ -117,7 +117,7 @@ static void bpa10x_recv_bulk(struct bpa10x_data *data, unsigned char *buf, int c
                         if (skb) {
                                 memcpy(skb_put(skb, len), buf, len);
                                 skb->dev = (void *) data->hdev;
-                               skb->pkt_type = HCI_SCODATA_PKT;
+                               bt_cb(skb)->pkt_type = HCI_SCODATA_PKT;
                                 hci_recv_frame(skb);
                         }
                         break;
@@ -129,7 +129,7 @@ static void bpa10x_recv_bulk(struct bpa10x_data *data, unsigned char *buf, int c
                         if (skb) {
                                 memcpy(skb_put(skb, len), buf, len);
                                 skb->dev = (void *) data->hdev;
-                               skb->pkt_type = HCI_VENDOR_PKT;
+                               bt_cb(skb)->pkt_type = HCI_VENDOR_PKT;
                                 hci_recv_frame(skb);
                         }
                         break;
@@ -190,7 +190,7 @@ static int bpa10x_recv_event(struct bpa10x_data *data, unsigned char *buf, int s
                 }
  
                 skb->dev = (void *) data->hdev;
-               skb->pkt_type = pkt_type;
+               bt_cb(skb)->pkt_type = pkt_type;
  
                 memcpy(skb_put(skb, size), buf, size);
  
@@ -307,7 +307,8 @@ unlock:
         read_unlock(&data->lock);
  }
  
-static inline struct urb *bpa10x_alloc_urb(struct usb_device *udev, unsigned int pipe, size_t size, int flags, void *data)
+static inline struct urb *bpa10x_alloc_urb(struct usb_device *udev, unsigned int pipe,
+                                       size_t size, unsigned int __nocast flags, void *data)
  {
         struct urb *urb;
         struct usb_ctrlrequest *cr;
@@ -367,11 +368,8 @@ static inline void bpa10x_free_urb(struct urb *urb)
         if (!urb)
                 return;
  
-       if (urb->setup_packet)
-               kfree(urb->setup_packet);
-
-       if (urb->transfer_buffer)
-               kfree(urb->transfer_buffer);
+       kfree(urb->setup_packet);
+       kfree(urb->transfer_buffer);
  
         usb_free_urb(urb);
  }
@@ -490,7 +488,7 @@ static int bpa10x_send_frame(struct sk_buff *skb)
         struct hci_dev *hdev = (struct hci_dev *) skb->dev;
         struct bpa10x_data *data;
  
-       BT_DBG("hdev %p skb %p type %d len %d", hdev, skb, skb->pkt_type, skb->len);
+       BT_DBG("hdev %p skb %p type %d len %d", hdev, skb, bt_cb(skb)->pkt_type, skb->len);
  
         if (!hdev) {
                 BT_ERR("Frame for unknown HCI device");
@@ -503,9 +501,9 @@ static int bpa10x_send_frame(struct sk_buff *skb)
         data = hdev->driver_data;
  
         /* Prepend skb with frame type */
-       memcpy(skb_push(skb, 1), &(skb->pkt_type), 1);
+       memcpy(skb_push(skb, 1), &bt_cb(skb)->pkt_type, 1);
  
-       switch (skb->pkt_type) {
+       switch (bt_cb(skb)->pkt_type) {
         case HCI_COMMAND_PKT:
                 hdev->stat.cmd_tx++;
                 skb_queue_tail(&data->cmd_queue, skb);
diff --git a/drivers/bluetooth/bt3c_cs.c b/drivers/bluetooth/bt3c_cs.c

index adf1750ea58d653a93fa71d1722412cf770912e9..2e0338d80f32c1db8ba8e2f1a4e524e08298aeb6 100644 (file)
--- a/drivers/bluetooth/bt3c_cs.c
+++ b/drivers/bluetooth/bt3c_cs.c
@@ -259,11 +259,11 @@ static void bt3c_receive(bt3c_info_t *info)
                 if (info->rx_state == RECV_WAIT_PACKET_TYPE) {
  
                         info->rx_skb->dev = (void *) info->hdev;
-                       info->rx_skb->pkt_type = inb(iobase + DATA_L);
+                       bt_cb(info->rx_skb)->pkt_type = inb(iobase + DATA_L);
                         inb(iobase + DATA_H);
-                       //printk("bt3c: PACKET_TYPE=%02x\n", info->rx_skb->pkt_type);
+                       //printk("bt3c: PACKET_TYPE=%02x\n", bt_cb(info->rx_skb)->pkt_type);
  
-                       switch (info->rx_skb->pkt_type) {
+                       switch (bt_cb(info->rx_skb)->pkt_type) {
  
                         case HCI_EVENT_PKT:
                                 info->rx_state = RECV_WAIT_EVENT_HEADER;
@@ -282,7 +282,7 @@ static void bt3c_receive(bt3c_info_t *info)
  
                         default:
                                 /* Unknown packet */
-                               BT_ERR("Unknown HCI packet with type 0x%02x received", info->rx_skb->pkt_type);
+                               BT_ERR("Unknown HCI packet with type 0x%02x received", bt_cb(info->rx_skb)->pkt_type);
                                 info->hdev->stat.err_rx++;
                                 clear_bit(HCI_RUNNING, &(info->hdev->flags));
  
@@ -439,7 +439,7 @@ static int bt3c_hci_send_frame(struct sk_buff *skb)
  
         info = (bt3c_info_t *) (hdev->driver_data);
  
-       switch (skb->pkt_type) {
+       switch (bt_cb(skb)->pkt_type) {
         case HCI_COMMAND_PKT:
                 hdev->stat.cmd_tx++;
                 break;
@@ -452,7 +452,7 @@ static int bt3c_hci_send_frame(struct sk_buff *skb)
         };
  
         /* Prepend skb with frame type */
-       memcpy(skb_push(skb, 1), &(skb->pkt_type), 1);
+       memcpy(skb_push(skb, 1), &bt_cb(skb)->pkt_type, 1);
         skb_queue_tail(&(info->txq), skb);
  
         spin_lock_irqsave(&(info->lock), flags);
diff --git a/drivers/bluetooth/btuart_cs.c b/drivers/bluetooth/btuart_cs.c

index e4c59fdc0e12bcc7e5d1e49231d211b5d3c67b7c..89486ea7a0216db3712b3182fddde49824920d15 100644 (file)
--- a/drivers/bluetooth/btuart_cs.c
+++ b/drivers/bluetooth/btuart_cs.c
@@ -211,9 +211,9 @@ static void btuart_receive(btuart_info_t *info)
                 if (info->rx_state == RECV_WAIT_PACKET_TYPE) {
  
                         info->rx_skb->dev = (void *) info->hdev;
-                       info->rx_skb->pkt_type = inb(iobase + UART_RX);
+                       bt_cb(info->rx_skb)->pkt_type = inb(iobase + UART_RX);
  
-                       switch (info->rx_skb->pkt_type) {
+                       switch (bt_cb(info->rx_skb)->pkt_type) {
  
                         case HCI_EVENT_PKT:
                                 info->rx_state = RECV_WAIT_EVENT_HEADER;
@@ -232,7 +232,7 @@ static void btuart_receive(btuart_info_t *info)
  
                         default:
                                 /* Unknown packet */
-                               BT_ERR("Unknown HCI packet with type 0x%02x received", info->rx_skb->pkt_type);
+                               BT_ERR("Unknown HCI packet with type 0x%02x received", bt_cb(info->rx_skb)->pkt_type);
                                 info->hdev->stat.err_rx++;
                                 clear_bit(HCI_RUNNING, &(info->hdev->flags));
  
@@ -447,7 +447,7 @@ static int btuart_hci_send_frame(struct sk_buff *skb)
  
         info = (btuart_info_t *)(hdev->driver_data);
  
-       switch (skb->pkt_type) {
+       switch (bt_cb(skb)->pkt_type) {
         case HCI_COMMAND_PKT:
                 hdev->stat.cmd_tx++;
                 break;
@@ -460,7 +460,7 @@ static int btuart_hci_send_frame(struct sk_buff *skb)
         };
  
         /* Prepend skb with frame type */
-       memcpy(skb_push(skb, 1), &(skb->pkt_type), 1);
+       memcpy(skb_push(skb, 1), &bt_cb(skb)->pkt_type, 1);
         skb_queue_tail(&(info->txq), skb);
  
         btuart_write_wakeup(info);
diff --git a/drivers/bluetooth/dtl1_cs.c b/drivers/bluetooth/dtl1_cs.c

index e39868c3da4815f89fb247badd3faa9e6b15cb91..84c1f88394225504ff7c2e02f77a8f043db85960 100644 (file)
--- a/drivers/bluetooth/dtl1_cs.c
+++ b/drivers/bluetooth/dtl1_cs.c
@@ -251,7 +251,7 @@ static void dtl1_receive(dtl1_info_t *info)
                                 info->rx_count = nsh->len + (nsh->len & 0x0001);
                                 break;
                         case RECV_WAIT_DATA:
-                               info->rx_skb->pkt_type = nsh->type;
+                               bt_cb(info->rx_skb)->pkt_type = nsh->type;
  
                                 /* remove PAD byte if it exists */
                                 if (nsh->len & 0x0001) {
@@ -262,7 +262,7 @@ static void dtl1_receive(dtl1_info_t *info)
                                 /* remove NSH */
                                 skb_pull(info->rx_skb, NSHL);
  
-                               switch (info->rx_skb->pkt_type) {
+                               switch (bt_cb(info->rx_skb)->pkt_type) {
                                 case 0x80:
                                         /* control data for the Nokia Card */
                                         dtl1_control(info, info->rx_skb);
@@ -272,12 +272,12 @@ static void dtl1_receive(dtl1_info_t *info)
                                 case 0x84:
                                         /* send frame to the HCI layer */
                                         info->rx_skb->dev = (void *) info->hdev;
-                                       info->rx_skb->pkt_type &= 0x0f;
+                                       bt_cb(info->rx_skb)->pkt_type &= 0x0f;
                                         hci_recv_frame(info->rx_skb);
                                         break;
                                 default:
                                         /* unknown packet */
-                                       BT_ERR("Unknown HCI packet with type 0x%02x received", info->rx_skb->pkt_type);
+                                       BT_ERR("Unknown HCI packet with type 0x%02x received", bt_cb(info->rx_skb)->pkt_type);
                                         kfree_skb(info->rx_skb);
                                         break;
                                 }
@@ -410,7 +410,7 @@ static int dtl1_hci_send_frame(struct sk_buff *skb)
  
         info = (dtl1_info_t *)(hdev->driver_data);
  
-       switch (skb->pkt_type) {
+       switch (bt_cb(skb)->pkt_type) {
         case HCI_COMMAND_PKT:
                 hdev->stat.cmd_tx++;
                 nsh.type = 0x81;
diff --git a/drivers/bluetooth/hci_bcsp.c b/drivers/bluetooth/hci_bcsp.c

index c0ed213fc857d5405b76fb2f1739d6cc3701c88f..0ee324e1265de30d9f8e75605bbbe414fcfcbd89 100644 (file)
--- a/drivers/bluetooth/hci_bcsp.c
+++ b/drivers/bluetooth/hci_bcsp.c
@@ -58,8 +58,6 @@
  #ifndef CONFIG_BT_HCIUART_DEBUG
  #undef  BT_DBG
  #define BT_DBG( A... )
-#undef  BT_DMP
-#define BT_DMP( A... )
  #endif
  
  static int hciextn = 1;
@@ -151,7 +149,7 @@ static int bcsp_enqueue(struct hci_uart *hu, struct sk_buff *skb)
                 return 0;
         }
  
-       switch (skb->pkt_type) {
+       switch (bt_cb(skb)->pkt_type) {
         case HCI_ACLDATA_PKT:
         case HCI_COMMAND_PKT:
                 skb_queue_tail(&bcsp->rel, skb);
@@ -229,7 +227,7 @@ static struct sk_buff *bcsp_prepare_pkt(struct bcsp_struct *bcsp, u8 *data,
         if (!nskb)
                 return NULL;
  
-       nskb->pkt_type = pkt_type;
+       bt_cb(nskb)->pkt_type = pkt_type;
  
         bcsp_slip_msgdelim(nskb);
  
@@ -288,7 +286,7 @@ static struct sk_buff *bcsp_dequeue(struct hci_uart *hu)
            since they have priority */
  
         if ((skb = skb_dequeue(&bcsp->unrel)) != NULL) {
-               struct sk_buff *nskb = bcsp_prepare_pkt(bcsp, skb->data, skb->len, skb->pkt_type);
+               struct sk_buff *nskb = bcsp_prepare_pkt(bcsp, skb->data, skb->len, bt_cb(skb)->pkt_type);
                 if (nskb) {
                         kfree_skb(skb);
                         return nskb;
@@ -305,7 +303,7 @@ static struct sk_buff *bcsp_dequeue(struct hci_uart *hu)
         spin_lock_irqsave(&bcsp->unack.lock, flags);
  
         if (bcsp->unack.qlen < BCSP_TXWINSIZE && (skb = skb_dequeue(&bcsp->rel)) != NULL) {
-               struct sk_buff *nskb = bcsp_prepare_pkt(bcsp, skb->data, skb->len, skb->pkt_type);
+               struct sk_buff *nskb = bcsp_prepare_pkt(bcsp, skb->data, skb->len, bt_cb(skb)->pkt_type);
                 if (nskb) {
                         __skb_queue_tail(&bcsp->unack, skb);
                         mod_timer(&bcsp->tbcsp, jiffies + HZ / 4);
@@ -403,7 +401,7 @@ static void bcsp_handle_le_pkt(struct hci_uart *hu)
                 if (!nskb)
                         return;
                 memcpy(skb_put(nskb, 4), conf_rsp_pkt, 4);
-               nskb->pkt_type = BCSP_LE_PKT;
+               bt_cb(nskb)->pkt_type = BCSP_LE_PKT;
  
                 skb_queue_head(&bcsp->unrel, nskb);
                 hci_uart_tx_wakeup(hu);
@@ -485,14 +483,14 @@ static inline void bcsp_complete_rx_pkt(struct hci_uart *hu)
         bcsp_pkt_cull(bcsp);
         if ((bcsp->rx_skb->data[1] & 0x0f) == 6 &&
                         bcsp->rx_skb->data[0] & 0x80) {
-               bcsp->rx_skb->pkt_type = HCI_ACLDATA_PKT;
+               bt_cb(bcsp->rx_skb)->pkt_type = HCI_ACLDATA_PKT;
                 pass_up = 1;
         } else if ((bcsp->rx_skb->data[1] & 0x0f) == 5 &&
                         bcsp->rx_skb->data[0] & 0x80) {
-               bcsp->rx_skb->pkt_type = HCI_EVENT_PKT;
+               bt_cb(bcsp->rx_skb)->pkt_type = HCI_EVENT_PKT;
                 pass_up = 1;
         } else if ((bcsp->rx_skb->data[1] & 0x0f) == 7) {
-               bcsp->rx_skb->pkt_type = HCI_SCODATA_PKT;
+               bt_cb(bcsp->rx_skb)->pkt_type = HCI_SCODATA_PKT;
                 pass_up = 1;
         } else if ((bcsp->rx_skb->data[1] & 0x0f) == 1 &&
                         !(bcsp->rx_skb->data[0] & 0x80)) {
@@ -514,7 +512,7 @@ static inline void bcsp_complete_rx_pkt(struct hci_uart *hu)
                                 hdr.evt = 0xff;
                                 hdr.plen = bcsp->rx_skb->len;
                                 memcpy(skb_push(bcsp->rx_skb, HCI_EVENT_HDR_SIZE), &hdr, HCI_EVENT_HDR_SIZE);
-                               bcsp->rx_skb->pkt_type = HCI_EVENT_PKT;
+                               bt_cb(bcsp->rx_skb)->pkt_type = HCI_EVENT_PKT;
  
                                 hci_recv_frame(bcsp->rx_skb);
                         } else {
diff --git a/drivers/bluetooth/hci_h4.c b/drivers/bluetooth/hci_h4.c

index ade94a57bb11292e15ad3e45f1c7ac3badedb9fc..cf8a22d58d960f6753ec4bc98a34bad70c779c70 100644 (file)
--- a/drivers/bluetooth/hci_h4.c
+++ b/drivers/bluetooth/hci_h4.c
@@ -57,8 +57,6 @@
  #ifndef CONFIG_BT_HCIUART_DEBUG
  #undef  BT_DBG
  #define BT_DBG( A... )
-#undef  BT_DMP
-#define BT_DMP( A... )
  #endif
  
  /* Initialize protocol */
@@ -114,7 +112,7 @@ static int h4_enqueue(struct hci_uart *hu, struct sk_buff *skb)
         BT_DBG("hu %p skb %p", hu, skb);
  
         /* Prepend skb with frame type */
-       memcpy(skb_push(skb, 1), &skb->pkt_type, 1);
+       memcpy(skb_push(skb, 1), &bt_cb(skb)->pkt_type, 1);
         skb_queue_tail(&h4->txq, skb);
         return 0;
  }
@@ -125,7 +123,6 @@ static inline int h4_check_data_len(struct h4_struct *h4, int len)
  
         BT_DBG("len %d room %d", len, room);
         if (!len) {
-               BT_DMP(h4->rx_skb->data, h4->rx_skb->len);
                 hci_recv_frame(h4->rx_skb);
         } else if (len > room) {
                 BT_ERR("Data length is too large");
@@ -169,8 +166,6 @@ static int h4_recv(struct hci_uart *hu, void *data, int count)
                         case H4_W4_DATA:
                                 BT_DBG("Complete data");
  
-                               BT_DMP(h4->rx_skb->data, h4->rx_skb->len);
-
                                 hci_recv_frame(h4->rx_skb);
  
                                 h4->rx_state = H4_W4_PACKET_TYPE;
@@ -244,7 +239,7 @@ static int h4_recv(struct hci_uart *hu, void *data, int count)
                         return 0;
                 }
                 h4->rx_skb->dev = (void *) hu->hdev;
-               h4->rx_skb->pkt_type = type;
+               bt_cb(h4->rx_skb)->pkt_type = type;
         }
         return count;
  }
diff --git a/drivers/bluetooth/hci_ldisc.c b/drivers/bluetooth/hci_ldisc.c

index f766bc22c6bbe5e9e4e46a0eee51826fe89b9131..aed80cc2289028cda06e79125aaca8d066af447b 100644 (file)
--- a/drivers/bluetooth/hci_ldisc.c
+++ b/drivers/bluetooth/hci_ldisc.c
@@ -57,8 +57,6 @@
  #ifndef CONFIG_BT_HCIUART_DEBUG
  #undef  BT_DBG
  #define BT_DBG( A... )
-#undef  BT_DMP
-#define BT_DMP( A... )
  #endif
  
  static int reset = 0;
@@ -155,7 +153,7 @@ restart:
                         break;
                 }
         
-               hci_uart_tx_complete(hu, skb->pkt_type);
+               hci_uart_tx_complete(hu, bt_cb(skb)->pkt_type);
                 kfree_skb(skb);
         } 
         
@@ -231,7 +229,7 @@ static int hci_uart_send_frame(struct sk_buff *skb)
         hu = (struct hci_uart *) hdev->driver_data;
         tty = hu->tty;
  
-       BT_DBG("%s: type %d len %d", hdev->name, skb->pkt_type, skb->len);
+       BT_DBG("%s: type %d len %d", hdev->name, bt_cb(skb)->pkt_type, skb->len);
  
         hu->proto->enqueue(hu, skb);
  
diff --git a/drivers/bluetooth/hci_usb.c b/drivers/bluetooth/hci_usb.c

index b120ecf7b8c96f1d856d83bd0d8a814af61b0c32..67d96b5cbb96535568bb26f28595d75e1b869e6d 100644 (file)
--- a/drivers/bluetooth/hci_usb.c
+++ b/drivers/bluetooth/hci_usb.c
@@ -57,8 +57,6 @@
  #ifndef CONFIG_BT_HCIUSB_DEBUG
  #undef  BT_DBG
  #define BT_DBG(D...)
-#undef  BT_DMP
-#define BT_DMP(D...)
  #endif
  
  #ifndef CONFIG_BT_HCIUSB_ZERO_PACKET
@@ -110,6 +108,9 @@ static struct usb_device_id blacklist_ids[] = {
         /* Microsoft Wireless Transceiver for Bluetooth 2.0 */
         { USB_DEVICE(0x045e, 0x009c), .driver_info = HCI_RESET },
  
+       /* Kensington Bluetooth USB adapter */
+       { USB_DEVICE(0x047d, 0x105d), .driver_info = HCI_RESET },
+
         /* ISSC Bluetooth Adapter v3.1 */
         { USB_DEVICE(0x1131, 0x1001), .driver_info = HCI_RESET },
  
@@ -126,7 +127,7 @@ static struct usb_device_id blacklist_ids[] = {
         { }     /* Terminating entry */
  };
  
-static struct _urb *_urb_alloc(int isoc, int gfp)
+static struct _urb *_urb_alloc(int isoc, unsigned int __nocast gfp)
  {
         struct _urb *_urb = kmalloc(sizeof(struct _urb) +
                                 sizeof(struct usb_iso_packet_descriptor) * isoc, gfp);
@@ -387,10 +388,8 @@ static void hci_usb_unlink_urbs(struct hci_usb *husb)
                         urb = &_urb->urb;
                         BT_DBG("%s freeing _urb %p type %d urb %p",
                                         husb->hdev->name, _urb, _urb->type, urb);
-                       if (urb->setup_packet)
-                               kfree(urb->setup_packet);
-                       if (urb->transfer_buffer)
-                               kfree(urb->transfer_buffer);
+                       kfree(urb->setup_packet);
+                       kfree(urb->transfer_buffer);
                         _urb_free(_urb);
                 }
  
@@ -444,7 +443,7 @@ static int __tx_submit(struct hci_usb *husb, struct _urb *_urb)
  
  static inline int hci_usb_send_ctrl(struct hci_usb *husb, struct sk_buff *skb)
  {
-       struct _urb *_urb = __get_completed(husb, skb->pkt_type);
+       struct _urb *_urb = __get_completed(husb, bt_cb(skb)->pkt_type);
         struct usb_ctrlrequest *dr;
         struct urb *urb;
  
@@ -452,7 +451,7 @@ static inline int hci_usb_send_ctrl(struct hci_usb *husb, struct sk_buff *skb)
                 _urb = _urb_alloc(0, GFP_ATOMIC);
                 if (!_urb)
                         return -ENOMEM;
-               _urb->type = skb->pkt_type;
+               _urb->type = bt_cb(skb)->pkt_type;
  
                 dr = kmalloc(sizeof(*dr), GFP_ATOMIC);
                 if (!dr) {
@@ -480,7 +479,7 @@ static inline int hci_usb_send_ctrl(struct hci_usb *husb, struct sk_buff *skb)
  
  static inline int hci_usb_send_bulk(struct hci_usb *husb, struct sk_buff *skb)
  {
-       struct _urb *_urb = __get_completed(husb, skb->pkt_type);
+       struct _urb *_urb = __get_completed(husb, bt_cb(skb)->pkt_type);
         struct urb *urb;
         int pipe;
  
@@ -488,7 +487,7 @@ static inline int hci_usb_send_bulk(struct hci_usb *husb, struct sk_buff *skb)
                 _urb = _urb_alloc(0, GFP_ATOMIC);
                 if (!_urb)
                         return -ENOMEM;
-               _urb->type = skb->pkt_type;
+               _urb->type = bt_cb(skb)->pkt_type;
         }
  
         urb  = &_urb->urb;
@@ -506,14 +505,14 @@ static inline int hci_usb_send_bulk(struct hci_usb *husb, struct sk_buff *skb)
  #ifdef CONFIG_BT_HCIUSB_SCO
  static inline int hci_usb_send_isoc(struct hci_usb *husb, struct sk_buff *skb)
  {
-       struct _urb *_urb = __get_completed(husb, skb->pkt_type);
+       struct _urb *_urb = __get_completed(husb, bt_cb(skb)->pkt_type);
         struct urb *urb;
  
         if (!_urb) {
                 _urb = _urb_alloc(HCI_MAX_ISOC_FRAMES, GFP_ATOMIC);
                 if (!_urb)
                         return -ENOMEM;
-               _urb->type = skb->pkt_type;
+               _urb->type = bt_cb(skb)->pkt_type;
         }
  
         BT_DBG("%s skb %p len %d", husb->hdev->name, skb, skb->len);
@@ -602,11 +601,11 @@ static int hci_usb_send_frame(struct sk_buff *skb)
         if (!test_bit(HCI_RUNNING, &hdev->flags))
                 return -EBUSY;
  
-       BT_DBG("%s type %d len %d", hdev->name, skb->pkt_type, skb->len);
+       BT_DBG("%s type %d len %d", hdev->name, bt_cb(skb)->pkt_type, skb->len);
  
         husb = (struct hci_usb *) hdev->driver_data;
  
-       switch (skb->pkt_type) {
+       switch (bt_cb(skb)->pkt_type) {
         case HCI_COMMAND_PKT:
                 hdev->stat.cmd_tx++;
                 break;
@@ -628,7 +627,7 @@ static int hci_usb_send_frame(struct sk_buff *skb)
  
         read_lock(&husb->completion_lock);
  
-       skb_queue_tail(__transmit_q(husb, skb->pkt_type), skb);
+       skb_queue_tail(__transmit_q(husb, bt_cb(skb)->pkt_type), skb);
         hci_usb_tx_wakeup(husb);
  
         read_unlock(&husb->completion_lock);
@@ -683,7 +682,7 @@ static inline int __recv_frame(struct hci_usb *husb, int type, void *data, int c
                                 return -ENOMEM;
                         }
                         skb->dev = (void *) husb->hdev;
-                       skb->pkt_type = type;
+                       bt_cb(skb)->pkt_type = type;
         
                         __reassembly(husb, type) = skb;
  
@@ -703,6 +702,7 @@ static inline int __recv_frame(struct hci_usb *husb, int type, void *data, int c
                 if (!scb->expect) {
                         /* Complete frame */
                         __reassembly(husb, type) = NULL;
+                       bt_cb(skb)->pkt_type = type;
                         hci_recv_frame(skb);
                 }
  
diff --git a/drivers/bluetooth/hci_vhci.c b/drivers/bluetooth/hci_vhci.c

index f9b956fb2b8b81ac774cb772b21d06aee58a0a7f..52cbd45c308fa0189d9d0a6ef044ebc49651a379 100644 (file)
--- a/drivers/bluetooth/hci_vhci.c
+++ b/drivers/bluetooth/hci_vhci.c
@@ -1,229 +1,220 @@
-/* 
-   BlueZ - Bluetooth protocol stack for Linux
-   Copyright (C) 2000-2001 Qualcomm Incorporated
-
-   Written 2000,2001 by Maxim Krasnyansky <maxk@qualcomm.com>
-
-   This program is free software; you can redistribute it and/or modify
-   it under the terms of the GNU General Public License version 2 as
-   published by the Free Software Foundation;
-
-   THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
-   OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-   FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT OF THIRD PARTY RIGHTS.
-   IN NO EVENT SHALL THE COPYRIGHT HOLDER(S) AND AUTHOR(S) BE LIABLE FOR ANY
-   CLAIM, OR ANY SPECIAL INDIRECT OR CONSEQUENTIAL DAMAGES, OR ANY DAMAGES 
-   WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 
-   ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 
-   OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
-
-   ALL LIABILITY, INCLUDING LIABILITY FOR INFRINGEMENT OF ANY PATENTS, 
-   COPYRIGHTS, TRADEMARKS OR OTHER RIGHTS, RELATING TO USE OF THIS 
-   SOFTWARE IS DISCLAIMED.
-*/
-
  /*
- * Bluetooth HCI virtual device driver.
   *
- * $Id: hci_vhci.c,v 1.3 2002/04/17 17:37:20 maxk Exp $ 
+ *  Bluetooth virtual HCI driver
+ *
+ *  Copyright (C) 2000-2001 Qualcomm Incorporated
+ *  Copyright (C) 2002-2003 Maxim Krasnyansky <maxk@qualcomm.com>
+ *  Copyright (C) 2004-2005 Marcel Holtmann <marcel@holtmann.org>
+ *
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2 of the License, or
+ *  (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; if not, write to the Free Software
+ *  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ *
   */
-#define VERSION "1.1"
  
  #include <linux/config.h>
  #include <linux/module.h>
  
-#include <linux/errno.h>
  #include <linux/kernel.h>
-#include <linux/major.h>
-#include <linux/sched.h>
+#include <linux/init.h>
  #include <linux/slab.h>
+#include <linux/types.h>
+#include <linux/errno.h>
+#include <linux/sched.h>
  #include <linux/poll.h>
-#include <linux/fcntl.h>
-#include <linux/init.h>
-#include <linux/random.h>
  
  #include <linux/skbuff.h>
  #include <linux/miscdevice.h>
  
-#include <asm/system.h>
-#include <asm/uaccess.h>
-
  #include <net/bluetooth/bluetooth.h>
  #include <net/bluetooth/hci_core.h>
-#include "hci_vhci.h"
  
-/* HCI device part */
+#ifndef CONFIG_BT_HCIVHCI_DEBUG
+#undef  BT_DBG
+#define BT_DBG(D...)
+#endif
+
+#define VERSION "1.2"
+
+static int minor = MISC_DYNAMIC_MINOR;
+
+struct vhci_data {
+       struct hci_dev *hdev;
+
+       unsigned long flags;
+
+       wait_queue_head_t read_wait;
+       struct sk_buff_head readq;
+
+       struct fasync_struct *fasync;
+};
  
-static int hci_vhci_open(struct hci_dev *hdev)
+#define VHCI_FASYNC    0x0010
+
+static struct miscdevice vhci_miscdev;
+
+static int vhci_open_dev(struct hci_dev *hdev)
  {
         set_bit(HCI_RUNNING, &hdev->flags);
-       return 0;
-}
  
-static int hci_vhci_flush(struct hci_dev *hdev)
-{
-       struct hci_vhci_struct *hci_vhci = (struct hci_vhci_struct *) hdev->driver_data;
-       skb_queue_purge(&hci_vhci->readq);
         return 0;
  }
  
-static int hci_vhci_close(struct hci_dev *hdev)
+static int vhci_close_dev(struct hci_dev *hdev)
  {
+       struct vhci_data *vhci = hdev->driver_data;
+
         if (!test_and_clear_bit(HCI_RUNNING, &hdev->flags))
                 return 0;
  
-       hci_vhci_flush(hdev);
+       skb_queue_purge(&vhci->readq);
+
         return 0;
  }
  
-static void hci_vhci_destruct(struct hci_dev *hdev)
+static int vhci_flush(struct hci_dev *hdev)
  {
-       struct hci_vhci_struct *vhci;
+       struct vhci_data *vhci = hdev->driver_data;
  
-       if (!hdev) return;
+       skb_queue_purge(&vhci->readq);
  
-       vhci = (struct hci_vhci_struct *) hdev->driver_data;
-       kfree(vhci);
+       return 0;
  }
  
-static int hci_vhci_send_frame(struct sk_buff *skb)
+static int vhci_send_frame(struct sk_buff *skb)
  {
         struct hci_dev* hdev = (struct hci_dev *) skb->dev;
-       struct hci_vhci_struct *hci_vhci;
+       struct vhci_data *vhci;
  
         if (!hdev) {
-               BT_ERR("Frame for uknown device (hdev=NULL)");
+               BT_ERR("Frame for unknown HCI device (hdev=NULL)");
                 return -ENODEV;
         }
  
         if (!test_bit(HCI_RUNNING, &hdev->flags))
                 return -EBUSY;
  
-       hci_vhci = (struct hci_vhci_struct *) hdev->driver_data;
+       vhci = hdev->driver_data;
+
+       memcpy(skb_push(skb, 1), &bt_cb(skb)->pkt_type, 1);
+       skb_queue_tail(&vhci->readq, skb);
  
-       memcpy(skb_push(skb, 1), &skb->pkt_type, 1);
-       skb_queue_tail(&hci_vhci->readq, skb);
+       if (vhci->flags & VHCI_FASYNC)
+               kill_fasync(&vhci->fasync, SIGIO, POLL_IN);
  
-       if (hci_vhci->flags & VHCI_FASYNC)
-               kill_fasync(&hci_vhci->fasync, SIGIO, POLL_IN);
-       wake_up_interruptible(&hci_vhci->read_wait);
+       wake_up_interruptible(&vhci->read_wait);
  
         return 0;
  }
  
-/* Character device part */
-
-/* Poll */
-static unsigned int hci_vhci_chr_poll(struct file *file, poll_table * wait)
-{  
-       struct hci_vhci_struct *hci_vhci = (struct hci_vhci_struct *) file->private_data;
-
-       poll_wait(file, &hci_vhci->read_wait, wait);
- 
-       if (!skb_queue_empty(&hci_vhci->readq))
-               return POLLIN | POLLRDNORM;
-
-       return POLLOUT | POLLWRNORM;
+static void vhci_destruct(struct hci_dev *hdev)
+{
+       kfree(hdev->driver_data);
  }
  
-/* Get packet from user space buffer(already verified) */
-static inline ssize_t hci_vhci_get_user(struct hci_vhci_struct *hci_vhci, const char __user *buf, size_t count)
+static inline ssize_t vhci_get_user(struct vhci_data *vhci,
+                                       const char __user *buf, size_t count)
  {
         struct sk_buff *skb;
  
         if (count > HCI_MAX_FRAME_SIZE)
                 return -EINVAL;
  
-       if (!(skb = bt_skb_alloc(count, GFP_KERNEL)))
+       skb = bt_skb_alloc(count, GFP_KERNEL);
+       if (!skb)
                 return -ENOMEM;
-       
+
         if (copy_from_user(skb_put(skb, count), buf, count)) {
                 kfree_skb(skb);
                 return -EFAULT;
         }
  
-       skb->dev = (void *) hci_vhci->hdev;
-       skb->pkt_type = *((__u8 *) skb->data);
+       skb->dev = (void *) vhci->hdev;
+       bt_cb(skb)->pkt_type = *((__u8 *) skb->data);
         skb_pull(skb, 1);
  
         hci_recv_frame(skb);
  
         return count;
-} 
-
-/* Write */
-static ssize_t hci_vhci_chr_write(struct file * file, const char __user * buf, 
-                            size_t count, loff_t *pos)
-{
-       struct hci_vhci_struct *hci_vhci = (struct hci_vhci_struct *) file->private_data;
-
-       if (!access_ok(VERIFY_READ, buf, count))
-               return -EFAULT;
-
-       return hci_vhci_get_user(hci_vhci, buf, count);
  }
  
-/* Put packet to user space buffer(already verified) */
-static inline ssize_t hci_vhci_put_user(struct hci_vhci_struct *hci_vhci,
-                                      struct sk_buff *skb, char __user *buf,
-                                      int count)
+static inline ssize_t vhci_put_user(struct vhci_data *vhci,
+                       struct sk_buff *skb, char __user *buf, int count)
  {
-       int len = count, total = 0;
         char __user *ptr = buf;
+       int len, total = 0;
+
+       len = min_t(unsigned int, skb->len, count);
  
-       len = min_t(unsigned int, skb->len, len);
         if (copy_to_user(ptr, skb->data, len))
                 return -EFAULT;
+
         total += len;
  
-       hci_vhci->hdev->stat.byte_tx += len;
-       switch (skb->pkt_type) {
-               case HCI_COMMAND_PKT:
-                       hci_vhci->hdev->stat.cmd_tx++;
-                       break;
+       vhci->hdev->stat.byte_tx += len;
  
-               case HCI_ACLDATA_PKT:
-                       hci_vhci->hdev->stat.acl_tx++;
-                       break;
+       switch (bt_cb(skb)->pkt_type) {
+       case HCI_COMMAND_PKT:
+               vhci->hdev->stat.cmd_tx++;
+               break;
+
+       case HCI_ACLDATA_PKT:
+               vhci->hdev->stat.acl_tx++;
+               break;
  
-               case HCI_SCODATA_PKT:
-                       hci_vhci->hdev->stat.cmd_tx++;
-                       break;
+       case HCI_SCODATA_PKT:
+               vhci->hdev->stat.cmd_tx++;
+               break;
         };
  
         return total;
  }
  
-/* Read */
-static ssize_t hci_vhci_chr_read(struct file * file, char __user * buf, size_t count, loff_t *pos)
+static loff_t vhci_llseek(struct file * file, loff_t offset, int origin)
+{
+       return -ESPIPE;
+}
+
+static ssize_t vhci_read(struct file * file, char __user * buf, size_t count, loff_t *pos)
  {
-       struct hci_vhci_struct *hci_vhci = (struct hci_vhci_struct *) file->private_data;
         DECLARE_WAITQUEUE(wait, current);
+       struct vhci_data *vhci = file->private_data;
         struct sk_buff *skb;
         ssize_t ret = 0;
  
-       add_wait_queue(&hci_vhci->read_wait, &wait);
+       add_wait_queue(&vhci->read_wait, &wait);
         while (count) {
                 set_current_state(TASK_INTERRUPTIBLE);
  
-               /* Read frames from device queue */
-               if (!(skb = skb_dequeue(&hci_vhci->readq))) {
+               skb = skb_dequeue(&vhci->readq);
+               if (!skb) {
                         if (file->f_flags & O_NONBLOCK) {
                                 ret = -EAGAIN;
                                 break;
                         }
+
                         if (signal_pending(current)) {
                                 ret = -ERESTARTSYS;
                                 break;
                         }
  
-                       /* Nothing to read, let's sleep */
                         schedule();
                         continue;
                 }
  
                 if (access_ok(VERIFY_WRITE, buf, count))
-                       ret = hci_vhci_put_user(hci_vhci, skb, buf, count);
+                       ret = vhci_put_user(vhci, skb, buf, count);
                 else
                         ret = -EFAULT;
  
@@ -231,84 +222,90 @@ static ssize_t hci_vhci_chr_read(struct file * file, char __user * buf, size_t c
                 break;
         }
         set_current_state(TASK_RUNNING);
-       remove_wait_queue(&hci_vhci->read_wait, &wait);
+       remove_wait_queue(&vhci->read_wait, &wait);
  
         return ret;
  }
  
-static loff_t hci_vhci_chr_lseek(struct file * file, loff_t offset, int origin)
+static ssize_t vhci_write(struct file *file,
+                       const char __user *buf, size_t count, loff_t *pos)
  {
-       return -ESPIPE;
-}
+       struct vhci_data *vhci = file->private_data;
  
-static int hci_vhci_chr_ioctl(struct inode *inode, struct file *file, unsigned int cmd, unsigned long arg)
-{
-       return -EINVAL;
+       if (!access_ok(VERIFY_READ, buf, count))
+               return -EFAULT;
+
+       return vhci_get_user(vhci, buf, count);
  }
  
-static int hci_vhci_chr_fasync(int fd, struct file *file, int on)
+static unsigned int vhci_poll(struct file *file, poll_table *wait)
  {
-       struct hci_vhci_struct *hci_vhci = (struct hci_vhci_struct *) file->private_data;
-       int ret;
+       struct vhci_data *vhci = file->private_data;
  
-       if ((ret = fasync_helper(fd, file, on, &hci_vhci->fasync)) < 0)
-               return ret; 
- 
-       if (on)
-               hci_vhci->flags |= VHCI_FASYNC;
-       else 
-               hci_vhci->flags &= ~VHCI_FASYNC;
+       poll_wait(file, &vhci->read_wait, wait);
  
-       return 0;
+       if (!skb_queue_empty(&vhci->readq))
+               return POLLIN | POLLRDNORM;
+
+       return POLLOUT | POLLWRNORM;
  }
  
-static int hci_vhci_chr_open(struct inode *inode, struct file * file)
+static int vhci_ioctl(struct inode *inode, struct file *file,
+                                       unsigned int cmd, unsigned long arg)
  {
-       struct hci_vhci_struct *hci_vhci = NULL; 
+       return -EINVAL;
+}
+
+static int vhci_open(struct inode *inode, struct file *file)
+{
+       struct vhci_data *vhci;
         struct hci_dev *hdev;
  
-       if (!(hci_vhci = kmalloc(sizeof(struct hci_vhci_struct), GFP_KERNEL)))
+       vhci = kmalloc(sizeof(struct vhci_data), GFP_KERNEL);
+       if (!vhci)
                 return -ENOMEM;
  
-       memset(hci_vhci, 0, sizeof(struct hci_vhci_struct));
+       memset(vhci, 0, sizeof(struct vhci_data));
  
-       skb_queue_head_init(&hci_vhci->readq);
-       init_waitqueue_head(&hci_vhci->read_wait);
+       skb_queue_head_init(&vhci->readq);
+       init_waitqueue_head(&vhci->read_wait);
  
-       /* Initialize and register HCI device */
         hdev = hci_alloc_dev();
         if (!hdev) {
-               kfree(hci_vhci);
+               kfree(vhci);
                 return -ENOMEM;
         }
  
-       hci_vhci->hdev = hdev;
+       vhci->hdev = hdev;
  
         hdev->type = HCI_VHCI;
-       hdev->driver_data = hci_vhci;
+       hdev->driver_data = vhci;
+       SET_HCIDEV_DEV(hdev, vhci_miscdev.dev);
  
-       hdev->open  = hci_vhci_open;
-       hdev->close = hci_vhci_close;
-       hdev->flush = hci_vhci_flush;
-       hdev->send  = hci_vhci_send_frame;
-       hdev->destruct = hci_vhci_destruct;
+       hdev->open     = vhci_open_dev;
+       hdev->close    = vhci_close_dev;
+       hdev->flush    = vhci_flush;
+       hdev->send     = vhci_send_frame;
+       hdev->destruct = vhci_destruct;
  
         hdev->owner = THIS_MODULE;
-       
+
         if (hci_register_dev(hdev) < 0) {
-               kfree(hci_vhci);
+               BT_ERR("Can't register HCI device");
+               kfree(vhci);
                 hci_free_dev(hdev);
                 return -EBUSY;
         }
  
-       file->private_data = hci_vhci;
-       return nonseekable_open(inode, file);   
+       file->private_data = vhci;
+
+       return nonseekable_open(inode, file);
  }
  
-static int hci_vhci_chr_close(struct inode *inode, struct file *file)
+static int vhci_release(struct inode *inode, struct file *file)
  {
-       struct hci_vhci_struct *hci_vhci = (struct hci_vhci_struct *) file->private_data;
-       struct hci_dev *hdev = hci_vhci->hdev;
+       struct vhci_data *vhci = file->private_data;
+       struct hci_dev *hdev = vhci->hdev;
  
         if (hci_unregister_dev(hdev) < 0) {
                 BT_ERR("Can't unregister HCI device %s", hdev->name);
@@ -317,48 +314,71 @@ static int hci_vhci_chr_close(struct inode *inode, struct file *file)
         hci_free_dev(hdev);
  
         file->private_data = NULL;
+
         return 0;
  }
  
-static struct file_operations hci_vhci_fops = {
-       .owner  = THIS_MODULE,  
-       .llseek = hci_vhci_chr_lseek,
-       .read   = hci_vhci_chr_read,
-       .write  = hci_vhci_chr_write,
-       .poll   = hci_vhci_chr_poll,
-       .ioctl  = hci_vhci_chr_ioctl,
-       .open   = hci_vhci_chr_open,
-       .release        = hci_vhci_chr_close,
-       .fasync = hci_vhci_chr_fasync           
+static int vhci_fasync(int fd, struct file *file, int on)
+{
+       struct vhci_data *vhci = file->private_data;
+       int err;
+
+       err = fasync_helper(fd, file, on, &vhci->fasync);
+       if (err < 0)
+               return err;
+
+       if (on)
+               vhci->flags |= VHCI_FASYNC;
+       else
+               vhci->flags &= ~VHCI_FASYNC;
+
+       return 0;
+}
+
+static struct file_operations vhci_fops = {
+       .owner          = THIS_MODULE,
+       .llseek         = vhci_llseek,
+       .read           = vhci_read,
+       .write          = vhci_write,
+       .poll           = vhci_poll,
+       .ioctl          = vhci_ioctl,
+       .open           = vhci_open,
+       .release        = vhci_release,
+       .fasync         = vhci_fasync,
  };
  
-static struct miscdevice hci_vhci_miscdev=
-{
-        VHCI_MINOR,
-        "hci_vhci",
-        &hci_vhci_fops
+static struct miscdevice vhci_miscdev= {
+       .name           = "vhci",
+       .fops           = &vhci_fops,
  };
  
-static int __init hci_vhci_init(void)
+static int __init vhci_init(void)
  {
-       BT_INFO("VHCI driver ver %s", VERSION);
+       BT_INFO("Virtual HCI driver ver %s", VERSION);
  
-       if (misc_register(&hci_vhci_miscdev)) {
-               BT_ERR("Can't register misc device %d\n", VHCI_MINOR);
+       vhci_miscdev.minor = minor;
+
+       if (misc_register(&vhci_miscdev) < 0) {
+               BT_ERR("Can't register misc device with minor %d", minor);
                 return -EIO;
         }
  
         return 0;
  }
  
-static void hci_vhci_cleanup(void)
+static void __exit vhci_exit(void)
  {
-       misc_deregister(&hci_vhci_miscdev);
+       if (misc_deregister(&vhci_miscdev) < 0)
+               BT_ERR("Can't unregister misc device with minor %d", minor);
  }
  
-module_init(hci_vhci_init);
-module_exit(hci_vhci_cleanup);
+module_init(vhci_init);
+module_exit(vhci_exit);
+
+module_param(minor, int, 0444);
+MODULE_PARM_DESC(minor, "Miscellaneous minor device number");
  
-MODULE_AUTHOR("Maxim Krasnyansky <maxk@qualcomm.com>");
-MODULE_DESCRIPTION("Bluetooth VHCI driver ver " VERSION);
-MODULE_LICENSE("GPL"); 
+MODULE_AUTHOR("Maxim Krasnyansky <maxk@qualcomm.com>, Marcel Holtmann <marcel@holtmann.org>");
+MODULE_DESCRIPTION("Bluetooth virtual HCI driver ver " VERSION);
+MODULE_VERSION(VERSION);
+MODULE_LICENSE("GPL");
diff --git a/drivers/bluetooth/hci_vhci.h b/drivers/bluetooth/hci_vhci.h

deleted file mode 100644 (file)

index 53b11f9..0000000
--- a/drivers/bluetooth/hci_vhci.h
+++ /dev/null
@@ -1,50 +0,0 @@
-/* 
-   BlueZ - Bluetooth protocol stack for Linux
-   Copyright (C) 2000-2001 Qualcomm Incorporated
-
-   Written 2000,2001 by Maxim Krasnyansky <maxk@qualcomm.com>
-
-   This program is free software; you can redistribute it and/or modify
-   it under the terms of the GNU General Public License version 2 as
-   published by the Free Software Foundation;
-
-   THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
-   OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-   FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT OF THIRD PARTY RIGHTS.
-   IN NO EVENT SHALL THE COPYRIGHT HOLDER(S) AND AUTHOR(S) BE LIABLE FOR ANY
-   CLAIM, OR ANY SPECIAL INDIRECT OR CONSEQUENTIAL DAMAGES, OR ANY DAMAGES 
-   WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 
-   ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 
-   OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
-
-   ALL LIABILITY, INCLUDING LIABILITY FOR INFRINGEMENT OF ANY PATENTS, 
-   COPYRIGHTS, TRADEMARKS OR OTHER RIGHTS, RELATING TO USE OF THIS 
-   SOFTWARE IS DISCLAIMED.
-*/
-
-/*
- * $Id: hci_vhci.h,v 1.1.1.1 2002/03/08 21:03:15 maxk Exp $
- */
-
-#ifndef __HCI_VHCI_H
-#define __HCI_VHCI_H
-
-#ifdef __KERNEL__
-
-struct hci_vhci_struct {
-       struct hci_dev       *hdev;
-       __u32                flags;
-       wait_queue_head_t    read_wait;
-       struct sk_buff_head  readq;
-       struct fasync_struct *fasync;
-};
-
-/* VHCI device flags */
-#define VHCI_FASYNC            0x0010
-
-#endif /* __KERNEL__ */
-
-#define VHCI_DEV       "/dev/vhci"
-#define VHCI_MINOR     250
-
-#endif /* __HCI_VHCI_H */
diff --git a/drivers/cdrom/viocd.c b/drivers/cdrom/viocd.c

index 38dd9ffbe8bcc147cc2174e3965870ab5a9aba73..0829db58462fd8d55a7a34d3680c9feb5404d437 100644 (file)
--- a/drivers/cdrom/viocd.c
+++ b/drivers/cdrom/viocd.c
@@ -734,7 +734,7 @@ static int viocd_remove(struct vio_dev *vdev)
   */
  static struct vio_device_id viocd_device_table[] __devinitdata = {
         { "viocd", "" },
-       { 0, }
+       { "", "" }
  };
  
  MODULE_DEVICE_TABLE(vio, viocd_device_table);
diff --git a/drivers/char/Kconfig b/drivers/char/Kconfig

index 4f27e5519296f9a2866e9b59ba49fc3001855bd2..7333b41d4224240b19f683e12010c261b94417cd 100644 (file)
--- a/drivers/char/Kconfig
+++ b/drivers/char/Kconfig
@@ -80,7 +80,7 @@ config SERIAL_NONSTANDARD
  
  config COMPUTONE
         tristate "Computone IntelliPort Plus serial support"
-       depends on SERIAL_NONSTANDARD && BROKEN_ON_SMP
+       depends on SERIAL_NONSTANDARD && BROKEN_ON_SMP && (BROKEN || !SPARC32)
         ---help---
           This driver supports the entire family of Intelliport II/Plus
           controllers with the exception of the MicroChannel controllers and
@@ -138,7 +138,7 @@ config CYZ_INTR
  
  config DIGIEPCA
         tristate "Digiboard Intelligent Async Support"
-       depends on SERIAL_NONSTANDARD && BROKEN_ON_SMP
+       depends on SERIAL_NONSTANDARD && BROKEN_ON_SMP && (!64BIT || BROKEN)
         ---help---
           This is a driver for Digi International's Xx, Xeve, and Xem series
           of cards which provide multiple serial ports. You would need
@@ -208,7 +208,7 @@ config SYNCLINK
  
  config SYNCLINKMP
         tristate "SyncLink Multiport support"
-       depends on SERIAL_NONSTANDARD
+       depends on SERIAL_NONSTANDARD && (BROKEN || !SPARC32)
         help
           Enable support for the SyncLink Multiport (2 or 4 ports)
           serial adapter, running asynchronous and HDLC communications up
@@ -735,7 +735,7 @@ config SGI_IP27_RTC
  
  config GEN_RTC
         tristate "Generic /dev/rtc emulation"
-       depends on RTC!=y && !IA64 && !ARM && !PPC64
+       depends on RTC!=y && !IA64 && !ARM && !PPC64 && !M32R && !SPARC32
         ---help---
           If you say Y here and create a character special file /dev/rtc with
           major number 10 and minor number 135 using mknod ("man mknod"), you
diff --git a/drivers/char/hvc_vio.c b/drivers/char/hvc_vio.c

index 60bb9152b832fdc3f5021ae0af310dd71b1f275b..78d681dc35a8f362d11466db471aaf2ae45cc312 100644 (file)
--- a/drivers/char/hvc_vio.c
+++ b/drivers/char/hvc_vio.c
@@ -39,7 +39,7 @@ char hvc_driver_name[] = "hvc_console";
  
  static struct vio_device_id hvc_driver_table[] __devinitdata = {
         {"serial", "hvterm1"},
-       { NULL, }
+       { "", "" }
  };
  MODULE_DEVICE_TABLE(vio, hvc_driver_table);
  
diff --git a/drivers/char/hvcs.c b/drivers/char/hvcs.c

index 3236d2404905003ad92ce5b2ef82cc96f28a3906..f47f009f9259c1597e60b1f7740c1f9bfeb6b203 100644 (file)
--- a/drivers/char/hvcs.c
+++ b/drivers/char/hvcs.c
@@ -527,7 +527,7 @@ static int khvcsd(void *unused)
  
  static struct vio_device_id hvcs_driver_table[] __devinitdata= {
         {"serial-server", "hvterm2"},
-       { NULL, }
+       { "", "" }
  };
  MODULE_DEVICE_TABLE(vio, hvcs_driver_table);
  
diff --git a/drivers/char/mem.c b/drivers/char/mem.c

index 42187381506b7d0ff45dcdcc4df5dc2be7a2867f..850a78c9c4bc9c639dd5b2c5816a4c89e7568eb1 100644 (file)
--- a/drivers/char/mem.c
+++ b/drivers/char/mem.c
@@ -261,7 +261,11 @@ static int mmap_mem(struct file * file, struct vm_area_struct * vma)
  
  static int mmap_kmem(struct file * file, struct vm_area_struct * vma)
  {
-        unsigned long long val;
+       unsigned long pfn;
+
+       /* Turn a kernel-virtual address into a physical page frame */
+       pfn = __pa((u64)vma->vm_pgoff << PAGE_SHIFT) >> PAGE_SHIFT;
+
         /*
          * RED-PEN: on some architectures there is more mapped memory
          * than available in mem_map which pfn_valid checks
@@ -269,10 +273,10 @@ static int mmap_kmem(struct file * file, struct vm_area_struct * vma)
          *
          * RED-PEN: vmalloc is not supported right now.
          */
-       if (!pfn_valid(vma->vm_pgoff))
+       if (!pfn_valid(pfn))
                 return -EIO;
-       val = (u64)vma->vm_pgoff << PAGE_SHIFT;
-       vma->vm_pgoff = __pa(val) >> PAGE_SHIFT;
+
+       vma->vm_pgoff = pfn;
         return mmap_mem(file, vma);
  }
  
diff --git a/drivers/char/random.c b/drivers/char/random.c

index 6b11d6b2129f08cb9a800292f05f409e0b70835d..7999da25fe40afb035c4342cd1f227f0b4d21078 100644 (file)
--- a/drivers/char/random.c
+++ b/drivers/char/random.c
@@ -1589,6 +1589,40 @@ u32 secure_tcpv6_port_ephemeral(const __u32 *saddr, const __u32 *daddr, __u16 dp
  EXPORT_SYMBOL(secure_tcpv6_port_ephemeral);
  #endif
  
+#if defined(CONFIG_IP_DCCP) || defined(CONFIG_IP_DCCP_MODULE)
+/* Similar to secure_tcp_sequence_number but generate a 48 bit value
+ * bit's 32-47 increase every key exchange
+ *       0-31  hash(source, dest)
+ */
+u64 secure_dccp_sequence_number(__u32 saddr, __u32 daddr,
+                               __u16 sport, __u16 dport)
+{
+       struct timeval tv;
+       u64 seq;
+       __u32 hash[4];
+       struct keydata *keyptr = get_keyptr();
+
+       hash[0] = saddr;
+       hash[1] = daddr;
+       hash[2] = (sport << 16) + dport;
+       hash[3] = keyptr->secret[11];
+
+       seq = half_md4_transform(hash, keyptr->secret);
+       seq |= ((u64)keyptr->count) << (32 - HASH_BITS);
+
+       do_gettimeofday(&tv);
+       seq += tv.tv_usec + tv.tv_sec * 1000000;
+       seq &= (1ull << 48) - 1;
+#if 0
+       printk("dccp init_seq(%lx, %lx, %d, %d) = %d\n",
+              saddr, daddr, sport, dport, seq);
+#endif
+       return seq;
+}
+
+EXPORT_SYMBOL(secure_dccp_sequence_number);
+#endif
+
  #endif /* CONFIG_INET */
  
  
diff --git a/drivers/char/rtc.c b/drivers/char/rtc.c

index d8f9e94ae475703465300750258fbf984a3101a0..cd4fe8b1709f67593d47939819c9fd6a37e32ce3 100644 (file)
--- a/drivers/char/rtc.c
+++ b/drivers/char/rtc.c
@@ -1209,6 +1209,7 @@ static int rtc_proc_open(struct inode *inode, struct file *file)
  
  void rtc_get_rtc_time(struct rtc_time *rtc_tm)
  {
+       unsigned long uip_watchdog = jiffies;
         unsigned char ctrl;
  #ifdef CONFIG_MACH_DECSTATION
         unsigned int real_year;
@@ -1224,8 +1225,10 @@ void rtc_get_rtc_time(struct rtc_time *rtc_tm)
          * Once the read clears, read the RTC time (again via ioctl). Easy.
          */
  
-       if (rtc_is_updating() != 0)
-               msleep(20);
+       while (rtc_is_updating() != 0 && jiffies - uip_watchdog < 2*HZ/100) {
+               barrier();
+               cpu_relax();
+       }
  
         /*
          * Only the values that we read from the RTC are set. We leave
diff --git a/drivers/char/tpm/Kconfig b/drivers/char/tpm/Kconfig

index 94a3b3e20bf954766f98be18e0faf5817aca83c7..79e9832ef1f30141add74626adbd1a8d6916e10e 100644 (file)
--- a/drivers/char/tpm/Kconfig
+++ b/drivers/char/tpm/Kconfig
@@ -17,6 +17,8 @@ config TCG_TPM
           obtained at: <http://sourceforge.net/projects/trousers>.  To 
           compile this driver as a module, choose M here; the module 
           will be called tpm. If unsure, say N.
+         Note: For more TPM drivers enable CONFIG_PNP, CONFIG_ACPI_BUS
+         and CONFIG_PNPACPI.
  
  config TCG_NSC
         tristate "National Semiconductor TPM Interface"
@@ -36,12 +38,13 @@ config TCG_ATMEL
           as a module, choose M here; the module will be called tpm_atmel.
  
  config TCG_INFINEON
-       tristate "Infineon Technologies SLD 9630 TPM Interface"
-       depends on TCG_TPM
+       tristate "Infineon Technologies TPM Interface"
+       depends on TCG_TPM && PNPACPI
         ---help---
           If you have a TPM security chip from Infineon Technologies
-         say Yes and it will be accessible from within Linux.  To
-         compile this driver as a module, choose M here; the module
+         (either SLD 9630 TT 1.1 or SLB 9635 TT 1.2) say Yes and it
+         will be accessible from within Linux.
+         To compile this driver as a module, choose M here; the module
           will be called tpm_infineon.
           Further information on this driver and the supported hardware
           can be found at http://www.prosec.rub.de/tpm
diff --git a/drivers/char/tpm/tpm_infineon.c b/drivers/char/tpm/tpm_infineon.c

index 0e3241645c1907651612dccdbcdbad678c21f36a..dc8c540391fdc690f2f30c0a69a6f586794d4c86 100644 (file)
--- a/drivers/char/tpm/tpm_infineon.c
+++ b/drivers/char/tpm/tpm_infineon.c
@@ -1,7 +1,7 @@
  /*
   * Description:
   * Device Driver for the Infineon Technologies
- * SLD 9630 TT Trusted Platform Module
+ * SLD 9630 TT 1.1 and SLB 9635 TT 1.2 Trusted Platform Module
   * Specifications at www.trustedcomputinggroup.org
   *
   * Copyright (C) 2005, Marcel Selhorst <selhorst@crypto.rub.de>
@@ -12,9 +12,10 @@
   * modify it under the terms of the GNU General Public License as
   * published by the Free Software Foundation, version 2 of the
   * License.
- *
   */
  
+#include <acpi/acpi_bus.h>
+#include <linux/pnp.h>
  #include "tpm.h"
  
  /* Infineon specific definitions */
@@ -26,8 +27,11 @@
  #define        TPM_MSLEEP_TIME         3
  /* gives number of max. msleep()-calls before throwing timeout */
  #define        TPM_MAX_TRIES           5000
-#define        TCPA_INFINEON_DEV_VEN_VALUE     0x15D1
-#define        TPM_DATA                        (TPM_ADDR + 1) & 0xff
+#define        TPM_INFINEON_DEV_VEN_VALUE      0x15D1
+
+/* These values will be filled after ACPI-call */
+static int TPM_INF_DATA = 0;
+static int TPM_INF_ADDR = 0;
  
  /* TPM header definitions */
  enum infineon_tpm_header {
@@ -305,9 +309,10 @@ static int tpm_inf_send(struct tpm_chip *chip, u8 * buf, size_t count)
  
  static void tpm_inf_cancel(struct tpm_chip *chip)
  {
-       /* Nothing yet!
-          This has something to do with the internal functions
-          of the TPM. Abort isn't really necessary...
+       /*
+          Since we are using the legacy mode to communicate
+          with the TPM, we have no cancel functions, but have
+          a workaround for interrupting the TPM through WTX.
          */
  }
  
@@ -345,6 +350,32 @@ static struct tpm_vendor_specific tpm_inf = {
         .miscdev = {.fops = &inf_ops,},
  };
  
+static const struct pnp_device_id tpm_pnp_tbl[] = {
+       /* Infineon TPMs */
+       {"IFX0101", 0},
+       {"IFX0102", 0},
+       {"", 0}
+};
+
+static int __devinit tpm_inf_acpi_probe(struct pnp_dev *dev,
+                                       const struct pnp_device_id *dev_id)
+{
+       TPM_INF_ADDR = (pnp_port_start(dev, 0) & 0xff);
+       TPM_INF_DATA = ((TPM_INF_ADDR + 1) & 0xff);
+       tpm_inf.base = pnp_port_start(dev, 1);
+       dev_info(&dev->dev, "Found %s with ID %s\n",
+                dev->name, dev_id->id);
+       if (!((tpm_inf.base >> 8) & 0xff))
+               tpm_inf.base = 0;
+       return 0;
+}
+
+static struct pnp_driver tpm_inf_pnp = {
+       .name = "tpm_inf_pnp",
+       .id_table = tpm_pnp_tbl,
+       .probe = tpm_inf_acpi_probe,
+};
+
  static int __devinit tpm_inf_probe(struct pci_dev *pci_dev,
                                    const struct pci_device_id *pci_id)
  {
@@ -353,64 +384,99 @@ static int __devinit tpm_inf_probe(struct pci_dev *pci_dev,
         int vendorid[2];
         int version[2];
         int productid[2];
+       char chipname[20];
  
         if (pci_enable_device(pci_dev))
                 return -EIO;
  
         dev_info(&pci_dev->dev, "LPC-bus found at 0x%x\n", pci_id->device);
  
+       /* read IO-ports from ACPI */
+       pnp_register_driver(&tpm_inf_pnp);
+       pnp_unregister_driver(&tpm_inf_pnp);
+
+       /* Make sure, we have received valid config ports */
+       if (!TPM_INF_ADDR) {
+               pci_disable_device(pci_dev);
+               return -EIO;
+       }
+
         /* query chip for its vendor, its version number a.s.o. */
-       outb(ENABLE_REGISTER_PAIR, TPM_ADDR);
-       outb(IDVENL, TPM_ADDR);
-       vendorid[1] = inb(TPM_DATA);
-       outb(IDVENH, TPM_ADDR);
-       vendorid[0] = inb(TPM_DATA);
-       outb(IDPDL, TPM_ADDR);
-       productid[1] = inb(TPM_DATA);
-       outb(IDPDH, TPM_ADDR);
-       productid[0] = inb(TPM_DATA);
-       outb(CHIP_ID1, TPM_ADDR);
-       version[1] = inb(TPM_DATA);
-       outb(CHIP_ID2, TPM_ADDR);
-       version[0] = inb(TPM_DATA);
-
-       if ((vendorid[0] << 8 | vendorid[1]) == (TCPA_INFINEON_DEV_VEN_VALUE)) {
-
-               /* read IO-ports from TPM */
-               outb(IOLIMH, TPM_ADDR);
-               ioh = inb(TPM_DATA);
-               outb(IOLIML, TPM_ADDR);
-               iol = inb(TPM_DATA);
-               tpm_inf.base = (ioh << 8) | iol;
+       outb(ENABLE_REGISTER_PAIR, TPM_INF_ADDR);
+       outb(IDVENL, TPM_INF_ADDR);
+       vendorid[1] = inb(TPM_INF_DATA);
+       outb(IDVENH, TPM_INF_ADDR);
+       vendorid[0] = inb(TPM_INF_DATA);
+       outb(IDPDL, TPM_INF_ADDR);
+       productid[1] = inb(TPM_INF_DATA);
+       outb(IDPDH, TPM_INF_ADDR);
+       productid[0] = inb(TPM_INF_DATA);
+       outb(CHIP_ID1, TPM_INF_ADDR);
+       version[1] = inb(TPM_INF_DATA);
+       outb(CHIP_ID2, TPM_INF_ADDR);
+       version[0] = inb(TPM_INF_DATA);
+
+       switch ((productid[0] << 8) | productid[1]) {
+       case 6:
+               sprintf(chipname, " (SLD 9630 TT 1.1)");
+               break;
+       case 11:
+               sprintf(chipname, " (SLB 9635 TT 1.2)");
+               break;
+       default:
+               sprintf(chipname, " (unknown chip)");
+               break;
+       }
+       chipname[19] = 0;
+
+       if ((vendorid[0] << 8 | vendorid[1]) == (TPM_INFINEON_DEV_VEN_VALUE)) {
  
                 if (tpm_inf.base == 0) {
-                       dev_err(&pci_dev->dev, "No IO-ports set!\n");
+                       dev_err(&pci_dev->dev, "No IO-ports found!\n");
                         pci_disable_device(pci_dev);
-                       return -ENODEV;
+                       return -EIO;
+               }
+               /* configure TPM with IO-ports */
+               outb(IOLIMH, TPM_INF_ADDR);
+               outb(((tpm_inf.base >> 8) & 0xff), TPM_INF_DATA);
+               outb(IOLIML, TPM_INF_ADDR);
+               outb((tpm_inf.base & 0xff), TPM_INF_DATA);
+
+               /* control if IO-ports are set correctly */
+               outb(IOLIMH, TPM_INF_ADDR);
+               ioh = inb(TPM_INF_DATA);
+               outb(IOLIML, TPM_INF_ADDR);
+               iol = inb(TPM_INF_DATA);
+
+               if ((ioh << 8 | iol) != tpm_inf.base) {
+                       dev_err(&pci_dev->dev,
+                               "Could not set IO-ports to %04x\n",
+                               tpm_inf.base);
+                       pci_disable_device(pci_dev);
+                       return -EIO;
                 }
  
                 /* activate register */
-               outb(TPM_DAR, TPM_ADDR);
-               outb(0x01, TPM_DATA);
-               outb(DISABLE_REGISTER_PAIR, TPM_ADDR);
+               outb(TPM_DAR, TPM_INF_ADDR);
+               outb(0x01, TPM_INF_DATA);
+               outb(DISABLE_REGISTER_PAIR, TPM_INF_ADDR);
  
                 /* disable RESET, LP and IRQC */
                 outb(RESET_LP_IRQC_DISABLE, tpm_inf.base + CMD);
  
                 /* Finally, we're done, print some infos */
                 dev_info(&pci_dev->dev, "TPM found: "
+                        "config base 0x%x, "
                          "io base 0x%x, "
                          "chip version %02x%02x, "
                          "vendor id %x%x (Infineon), "
                          "product id %02x%02x"
                          "%s\n",
+                        TPM_INF_ADDR,
                          tpm_inf.base,
                          version[0], version[1],
                          vendorid[0], vendorid[1],
-                        productid[0], productid[1], ((productid[0] == 0)
-                                                     && (productid[1] ==
-                                                         6)) ?
-                        " (SLD 9630 TT 1.1)" : "");
+                        productid[0], productid[1], chipname);
  
                 rc = tpm_register_hardware(pci_dev, &tpm_inf);
                 if (rc < 0) {
@@ -462,6 +528,6 @@ module_init(init_inf);
  module_exit(cleanup_inf);
  
  MODULE_AUTHOR("Marcel Selhorst <selhorst@crypto.rub.de>");
-MODULE_DESCRIPTION("Driver for Infineon TPM SLD 9630 TT");
-MODULE_VERSION("1.4");
+MODULE_DESCRIPTION("Driver for Infineon TPM SLD 9630 TT 1.1 / SLB 9635 TT 1.2");
+MODULE_VERSION("1.5");
  MODULE_LICENSE("GPL");
diff --git a/drivers/char/viotape.c b/drivers/char/viotape.c

index 4764b4f9555de93de3ec0e945a7089a4e5dbf537..0aff45fac2e6c9f52fc3390bdf29b913f01b8f03 100644 (file)
--- a/drivers/char/viotape.c
+++ b/drivers/char/viotape.c
@@ -991,7 +991,7 @@ static int viotape_remove(struct vio_dev *vdev)
   */
  static struct vio_device_id viotape_device_table[] __devinitdata = {
         { "viotape", "" },
-       { 0, }
+       { "", "" }
  };
  
  MODULE_DEVICE_TABLE(vio, viotape_device_table);
diff --git a/drivers/char/vt.c b/drivers/char/vt.c

index 30d96739fb2379b1f73d5d6dbbabb899eaf09456..665103ccaee83b69b6ca30a7fda0f40eec8983f7 100644 (file)
--- a/drivers/char/vt.c
+++ b/drivers/char/vt.c
@@ -2433,7 +2433,7 @@ static int con_open(struct tty_struct *tty, struct file *filp)
         int ret = 0;
  
         acquire_console_sem();
-       if (tty->count == 1) {
+       if (tty->driver_data == NULL) {
                 ret = vc_allocate(currcons);
                 if (ret == 0) {
                         struct vc_data *vc = vc_cons[currcons].d;
diff --git a/drivers/char/watchdog/i8xx_tco.c b/drivers/char/watchdog/i8xx_tco.c

index f975dab1ddf95417f4c6214a0057661a0cf71c7c..a13395e2c372a9b27355040c9c94eae21040992b 100644 (file)
--- a/drivers/char/watchdog/i8xx_tco.c
+++ b/drivers/char/watchdog/i8xx_tco.c
@@ -1,5 +1,5 @@
  /*
- *     i8xx_tco 0.07:  TCO timer driver for i8xx chipsets
+ *     i8xx_tco:       TCO timer driver for i8xx chipsets
   *
   *     (c) Copyright 2000 kernel concepts <nils@kernelconcepts.de>, All Rights Reserved.
   *                             http://www.kernelconcepts.de
@@ -63,6 +63,9 @@
   *  20050128 Wim Van Sebroeck <wim@iguana.be>
   *     0.07 Added support for the ICH4-M, ICH6, ICH6R, ICH6-M, ICH6W and ICH6RW
   *          chipsets. Also added support for the "undocumented" ICH7 chipset.
+ *  20050807 Wim Van Sebroeck <wim@iguana.be>
+ *     0.08 Make sure that the watchdog is only "armed" when started.
+ *          (Kernel Bug 4251)
   */
  
  /*
@@ -87,7 +90,7 @@
  #include "i8xx_tco.h"
  
  /* Module and version information */
-#define TCO_VERSION "0.07"
+#define TCO_VERSION "0.08"
  #define TCO_MODULE_NAME "i8xx TCO timer"
  #define TCO_DRIVER_NAME   TCO_MODULE_NAME ", v" TCO_VERSION
  #define PFX TCO_MODULE_NAME ": "
@@ -125,10 +128,18 @@ static int tco_timer_start (void)
         unsigned char val;
  
         spin_lock(&tco_lock);
+
+       /* disable chipset's NO_REBOOT bit */
+       pci_read_config_byte (i8xx_tco_pci, 0xd4, &val);
+       val &= 0xfd;
+       pci_write_config_byte (i8xx_tco_pci, 0xd4, val);
+
+       /* Bit 11: TCO Timer Halt -> 0 = The TCO timer is enabled to count */
         val = inb (TCO1_CNT + 1);
         val &= 0xf7;
         outb (val, TCO1_CNT + 1);
         val = inb (TCO1_CNT + 1);
+
         spin_unlock(&tco_lock);
  
         if (val & 0x08)
@@ -138,13 +149,20 @@ static int tco_timer_start (void)
  
  static int tco_timer_stop (void)
  {
-       unsigned char val;
+       unsigned char val, val1;
  
         spin_lock(&tco_lock);
+       /* Bit 11: TCO Timer Halt -> 1 = The TCO timer is disabled */
         val = inb (TCO1_CNT + 1);
         val |= 0x08;
         outb (val, TCO1_CNT + 1);
         val = inb (TCO1_CNT + 1);
+
+       /* Set the NO_REBOOT bit to prevent later reboots, just for sure */
+       pci_read_config_byte (i8xx_tco_pci, 0xd4, &val1);
+       val1 |= 0x02;
+       pci_write_config_byte (i8xx_tco_pci, 0xd4, val1);
+
         spin_unlock(&tco_lock);
  
         if ((val & 0x08) == 0)
@@ -155,6 +173,7 @@ static int tco_timer_stop (void)
  static int tco_timer_keepalive (void)
  {
         spin_lock(&tco_lock);
+       /* Reload the timer by writing to the TCO Timer Reload register */
         outb (0x01, TCO1_RLD);
         spin_unlock(&tco_lock);
         return 0;
@@ -417,9 +436,8 @@ static unsigned char __init i8xx_tco_getdevice (void)
                         printk (KERN_ERR PFX "failed to get TCOBASE address\n");
                         return 0;
                 }
-               /*
-                * Check chipset's NO_REBOOT bit
-                */
+
+               /* Check chipset's NO_REBOOT bit */
                 pci_read_config_byte (i8xx_tco_pci, 0xd4, &val1);
                 if (val1 & 0x02) {
                         val1 &= 0xfd;
@@ -430,6 +448,10 @@ static unsigned char __init i8xx_tco_getdevice (void)
                                 return 0;       /* Cannot reset NO_REBOOT bit */
                         }
                 }
+               /* Disable reboots untill the watchdog starts */
+               val1 |= 0x02;
+               pci_write_config_byte (i8xx_tco_pci, 0xd4, val1);
+
                 /* Set the TCO_EN bit in SMI_EN register */
                 if (!request_region (SMI_EN + 1, 1, "i8xx TCO")) {
                         printk (KERN_ERR PFX "I/O address 0x%04x already in use\n",
@@ -505,17 +527,10 @@ out:
  
  static void __exit watchdog_cleanup (void)
  {
-       u8 val;
-
         /* Stop the timer before we leave */
         if (!nowayout)
                 tco_timer_stop ();
  
-       /* Set the NO_REBOOT bit to prevent later reboots, just for sure */
-       pci_read_config_byte (i8xx_tco_pci, 0xd4, &val);
-       val |= 0x02;
-       pci_write_config_byte (i8xx_tco_pci, 0xd4, val);
-
         /* Deregister */
         misc_deregister (&i8xx_tco_miscdev);
         unregister_reboot_notifier(&i8xx_tco_notifier);
diff --git a/drivers/char/watchdog/sa1100_wdt.c b/drivers/char/watchdog/sa1100_wdt.c

index 1b2132617dc3f94bf689e12e4ce34495e830b24c..fb88b4041dca4730ab70def4f462b97bead64ed1 100644 (file)
--- a/drivers/char/watchdog/sa1100_wdt.c
+++ b/drivers/char/watchdog/sa1100_wdt.c
@@ -36,13 +36,10 @@
  #include <asm/uaccess.h>
  
  #define OSCR_FREQ              CLOCK_TICK_RATE
-#define SA1100_CLOSE_MAGIC     (0x5afc4453)
  
  static unsigned long sa1100wdt_users;
-static int expect_close;
  static int pre_margin;
  static int boot_status;
-static int nowayout = WATCHDOG_NOWAYOUT;
  
  /*
   *     Allow only one person to hold it open
@@ -62,55 +59,33 @@ static int sa1100dog_open(struct inode *inode, struct file *file)
  }
  
  /*
- *     Shut off the timer.
- *     Lock it in if it's a module and we defined ...NOWAYOUT
- *     Oddly, the watchdog can only be enabled, but we can turn off
- *     the interrupt, which appears to prevent the watchdog timing out.
+ * The watchdog cannot be disabled.
+ *
+ * Previous comments suggested that turning off the interrupt by
+ * clearing OIER[E3] would prevent the watchdog timing out but this
+ * does not appear to be true (at least on the PXA255).
   */
  static int sa1100dog_release(struct inode *inode, struct file *file)
  {
-       OSMR3 = OSCR + pre_margin;
-
-       if (expect_close == SA1100_CLOSE_MAGIC) {
-               OIER &= ~OIER_E3;
-       } else {
-               printk(KERN_CRIT "WATCHDOG: WDT device closed unexpectedly.  WDT will not stop!\n");
-       }
+       printk(KERN_CRIT "WATCHDOG: Device closed - timer will not stop\n");
  
         clear_bit(1, &sa1100wdt_users);
-       expect_close = 0;
  
         return 0;
  }
  
  static ssize_t sa1100dog_write(struct file *file, const char *data, size_t len, loff_t *ppos)
  {
-       if (len) {
-               if (!nowayout) {
-                       size_t i;
-
-                       expect_close = 0;
-
-                       for (i = 0; i != len; i++) {
-                               char c;
-
-                               if (get_user(c, data + i))
-                                       return -EFAULT;
-                               if (c == 'V')
-                                       expect_close = SA1100_CLOSE_MAGIC;
-                       }
-               }
+       if (len)
                 /* Refresh OSMR3 timer. */
                 OSMR3 = OSCR + pre_margin;
-       }
  
         return len;
  }
  
  static struct watchdog_info ident = {
-       .options        = WDIOF_CARDRESET | WDIOF_MAGICCLOSE |
-                         WDIOF_SETTIMEOUT | WDIOF_KEEPALIVEPING,
-       .identity       = "SA1100 Watchdog",
+       .options        = WDIOF_CARDRESET | WDIOF_SETTIMEOUT | WDIOF_KEEPALIVEPING,
+       .identity       = "SA1100/PXA255 Watchdog",
  };
  
  static int sa1100dog_ioctl(struct inode *inode, struct file *file,
@@ -172,7 +147,7 @@ static struct file_operations sa1100dog_fops =
  static struct miscdevice sa1100dog_miscdev =
  {
         .minor          = WATCHDOG_MINOR,
-       .name           = "SA1100/PXA2xx watchdog",
+       .name           = "watchdog",
         .fops           = &sa1100dog_fops,
  };
  
@@ -194,7 +169,6 @@ static int __init sa1100dog_init(void)
         if (ret == 0)
                 printk("SA1100/PXA2xx Watchdog Timer: timer margin %d sec\n",
                        margin);
-
         return ret;
  }
  
@@ -212,8 +186,5 @@ MODULE_DESCRIPTION("SA1100/PXA2xx Watchdog");
  module_param(margin, int, 0);
  MODULE_PARM_DESC(margin, "Watchdog margin in seconds (default 60s)");
  
-module_param(nowayout, int, 0);
-MODULE_PARM_DESC(nowayout, "Watchdog cannot be stopped once started");
-
  MODULE_LICENSE("GPL");
  MODULE_ALIAS_MISCDEV(WATCHDOG_MINOR);
diff --git a/drivers/fc4/fc.c b/drivers/fc4/fc.c

index 5d961f5e0ca0011121063d07734dbdbcff495a4d..e4710d1d1f9d8db1757e84965c2ccbcfcbc8f50a 100644 (file)
--- a/drivers/fc4/fc.c
+++ b/drivers/fc4/fc.c
@@ -1004,8 +1004,8 @@ int fcp_scsi_dev_reset(Scsi_Cmnd *SCpnt)
                 return FAILED;
         }
         fc->rst_pkt->eh_state = SCSI_STATE_UNUSED;
-       return SUCCESS;
  #endif
+       return SUCCESS;
  }
  
  static int __fcp_scsi_host_reset(Scsi_Cmnd *SCpnt)
diff --git a/drivers/hwmon/adm1026.c b/drivers/hwmon/adm1026.c

index 4fa17c76eea2247dae603e5ca124cfe58775b0f1..c8a7f47911f99f315189cc60cb73ba23e888b5fb 100644 (file)
--- a/drivers/hwmon/adm1026.c
+++ b/drivers/hwmon/adm1026.c
@@ -325,7 +325,7 @@ int adm1026_attach_adapter(struct i2c_adapter *adapter)
  int adm1026_detach_client(struct i2c_client *client)
  {
         i2c_detach_client(client);
-       kfree(client);
+       kfree(i2c_get_clientdata(client));
         return 0;
  }
  
@@ -1691,7 +1691,7 @@ int adm1026_detect(struct i2c_adapter *adapter, int address,
  
         /* Error out and cleanup code */
  exitfree:
-       kfree(new_client);
+       kfree(data);
  exit:
         return err;
  }
diff --git a/drivers/hwmon/adm1031.c b/drivers/hwmon/adm1031.c

index 9168e983ca1d53b4a946316e22da7a362ca89c8e..9362509572709596d92849a666199f4cf1bcb46a 100644 (file)
--- a/drivers/hwmon/adm1031.c
+++ b/drivers/hwmon/adm1031.c
@@ -834,7 +834,7 @@ static int adm1031_detect(struct i2c_adapter *adapter, int address, int kind)
         return 0;
  
  exit_free:
-       kfree(new_client);
+       kfree(data);
  exit:
         return err;
  }
@@ -845,7 +845,7 @@ static int adm1031_detach_client(struct i2c_client *client)
         if ((ret = i2c_detach_client(client)) != 0) {
                 return ret;
         }
-       kfree(client);
+       kfree(i2c_get_clientdata(client));
         return 0;
  }
  
diff --git a/drivers/hwmon/adm9240.c b/drivers/hwmon/adm9240.c

index 5c68e9c311aa232abce025caaea75e9a7d5f860c..ce2a6eb93f6e5d4c7baed6fc26797b86988559e4 100644 (file)
--- a/drivers/hwmon/adm9240.c
+++ b/drivers/hwmon/adm9240.c
@@ -616,7 +616,7 @@ static int adm9240_detect(struct i2c_adapter *adapter, int address, int kind)
  
         return 0;
  exit_free:
-       kfree(new_client);
+       kfree(data);
  exit:
         return err;
  }
diff --git a/drivers/hwmon/fscpos.c b/drivers/hwmon/fscpos.c

index 270015b626adefcc3fe66cd41fefc2391bb33706..301ae98bd0adb1fca9d0f0e6687a538019439a0b 100644 (file)
--- a/drivers/hwmon/fscpos.c
+++ b/drivers/hwmon/fscpos.c
@@ -167,7 +167,7 @@ static ssize_t set_temp_reset(struct i2c_client *client, struct fscpos_data
                                 "experience to the module author.\n");
  
         /* Supported value: 2 (clears the status) */
-       fscpos_write_value(client, FSCPOS_REG_TEMP_STATE[nr], 2);
+       fscpos_write_value(client, FSCPOS_REG_TEMP_STATE[nr - 1], 2);
         return count;
  }
  
diff --git a/drivers/hwmon/smsc47b397.c b/drivers/hwmon/smsc47b397.c

index 251ac2659554d8fd77351cf54ef2ad955a7237be..fdeeb3ab6f2f3e9146b607ddc00c5d3ffd239bed 100644 (file)
--- a/drivers/hwmon/smsc47b397.c
+++ b/drivers/hwmon/smsc47b397.c
@@ -298,7 +298,7 @@ static int smsc47b397_detect(struct i2c_adapter *adapter, int addr, int kind)
         return 0;
  
  error_free:
-       kfree(new_client);
+       kfree(data);
  error_release:
         release_region(addr, SMSC_EXTENT);
         return err;
diff --git a/drivers/hwmon/smsc47m1.c b/drivers/hwmon/smsc47m1.c

index 897117a7213f0624a04e55deaaf46de38aaab88e..7166ad0b2fda666e5ace936c0b3960de9bbdd7b0 100644 (file)
--- a/drivers/hwmon/smsc47m1.c
+++ b/drivers/hwmon/smsc47m1.c
@@ -495,7 +495,7 @@ static int smsc47m1_detect(struct i2c_adapter *adapter, int address, int kind)
         return 0;
  
  error_free:
-       kfree(new_client);
+       kfree(data);
  error_release:
         release_region(address, SMSC_EXTENT);
         return err;
diff --git a/drivers/i2c/busses/i2c-mpc.c b/drivers/i2c/busses/i2c-mpc.c

index 04adde62a003c47a645819487ed3d646a02f2683..9ad3e9262e8ae059621c43413cb48c95ccd9aec7 100644 (file)
--- a/drivers/i2c/busses/i2c-mpc.c
+++ b/drivers/i2c/busses/i2c-mpc.c
@@ -382,100 +382,6 @@ static void __exit fsl_i2c_exit(void)
  module_init(fsl_i2c_init);
  module_exit(fsl_i2c_exit);
  
-static int fsl_i2c_probe(struct device *device)
-{
-       int result = 0;
-       struct mpc_i2c *i2c;
-       struct platform_device *pdev = to_platform_device(device);
-       struct fsl_i2c_platform_data *pdata;
-       struct resource *r = platform_get_resource(pdev, IORESOURCE_MEM, 0);
-
-       pdata = (struct fsl_i2c_platform_data *) pdev->dev.platform_data;
-
-       if (!(i2c = kmalloc(sizeof(*i2c), GFP_KERNEL))) {
-               return -ENOMEM;
-       }
-       memset(i2c, 0, sizeof(*i2c));
-
-       i2c->irq = platform_get_irq(pdev, 0);
-       i2c->flags = pdata->device_flags;
-       init_waitqueue_head(&i2c->queue);
-
-       i2c->base = ioremap((phys_addr_t)r->start, MPC_I2C_REGION);
-
-       if (!i2c->base) {
-               printk(KERN_ERR "i2c-mpc - failed to map controller\n");
-               result = -ENOMEM;
-               goto fail_map;
-       }
-
-       if (i2c->irq != 0)
-               if ((result = request_irq(i2c->irq, mpc_i2c_isr,
-                                         SA_SHIRQ, "i2c-mpc", i2c)) < 0) {
-                       printk(KERN_ERR
-                              "i2c-mpc - failed to attach interrupt\n");
-                       goto fail_irq;
-               }
-
-       mpc_i2c_setclock(i2c);
-       dev_set_drvdata(device, i2c);
-
-       i2c->adap = mpc_ops;
-       i2c_set_adapdata(&i2c->adap, i2c);
-       i2c->adap.dev.parent = &pdev->dev;
-       if ((result = i2c_add_adapter(&i2c->adap)) < 0) {
-               printk(KERN_ERR "i2c-mpc - failed to add adapter\n");
-               goto fail_add;
-       }
-
-       return result;
-
-      fail_add:
-       if (i2c->irq != 0)
-               free_irq(i2c->irq, NULL);
-      fail_irq:
-       iounmap(i2c->base);
-      fail_map:
-       kfree(i2c);
-       return result;
-};
-
-static int fsl_i2c_remove(struct device *device)
-{
-       struct mpc_i2c *i2c = dev_get_drvdata(device);
-
-       i2c_del_adapter(&i2c->adap);
-       dev_set_drvdata(device, NULL);
-
-       if (i2c->irq != 0)
-               free_irq(i2c->irq, i2c);
-
-       iounmap(i2c->base);
-       kfree(i2c);
-       return 0;
-};
-
-/* Structure for a device driver */
-static struct device_driver fsl_i2c_driver = {
-       .name = "fsl-i2c",
-       .bus = &platform_bus_type,
-       .probe = fsl_i2c_probe,
-       .remove = fsl_i2c_remove,
-};
-
-static int __init fsl_i2c_init(void)
-{
-       return driver_register(&fsl_i2c_driver);
-}
-
-static void __exit fsl_i2c_exit(void)
-{
-       driver_unregister(&fsl_i2c_driver);
-}
-
-module_init(fsl_i2c_init);
-module_exit(fsl_i2c_exit);
-
  MODULE_AUTHOR("Adrian Cox <adrian@humboldt.co.uk>");
  MODULE_DESCRIPTION
      ("I2C-Bus adapter for MPC107 bridge and MPC824x/85xx/52xx processors");
diff --git a/drivers/i2c/busses/i2c-sibyte.c b/drivers/i2c/busses/i2c-sibyte.c

index 1c99536b673b7775d97ff39091f112aa84cca17e..fa503ed9f86db6d8fb32899d817a9db78d6e60f7 100644 (file)
--- a/drivers/i2c/busses/i2c-sibyte.c
+++ b/drivers/i2c/busses/i2c-sibyte.c
@@ -23,8 +23,8 @@
  #include <asm/sibyte/sb1250_smbus.h>
  
  static struct i2c_algo_sibyte_data sibyte_board_data[2] = {
-       { NULL, 0, (void *) (KSEG1+A_SMB_BASE(0)) },
-       { NULL, 1, (void *) (KSEG1+A_SMB_BASE(1)) }
+       { NULL, 0, (void *) (CKSEG1+A_SMB_BASE(0)) },
+       { NULL, 1, (void *) (CKSEG1+A_SMB_BASE(1)) }
  };
  
  static struct i2c_adapter sibyte_board_adapter[2] = {
diff --git a/drivers/ide/Kconfig b/drivers/ide/Kconfig

index 5f33df47aa7432806e6f97ba6de93f74b6ea1f9a..1cadd2c3caddce3ab3cb0d26bfc7160ad70e7d5c 100644 (file)
--- a/drivers/ide/Kconfig
+++ b/drivers/ide/Kconfig
@@ -764,6 +764,7 @@ config BLK_DEV_IDE_PMAC_ATA100FIRST
  config BLK_DEV_IDEDMA_PMAC
         bool "PowerMac IDE DMA support"
         depends on BLK_DEV_IDE_PMAC
+       select BLK_DEV_IDEDMA_PCI
         help
           This option allows the driver for the built-in IDE controller on
           Power Macintoshes and PowerBooks to use DMA (direct memory access)
diff --git a/drivers/ide/ide-disk.c b/drivers/ide/ide-disk.c

index f9c1acb4ed6a438c5518a22de4697aa13c88a3ee..c9d3a00a3c0c643bedcefd299d41d9075a08c036 100644 (file)
--- a/drivers/ide/ide-disk.c
+++ b/drivers/ide/ide-disk.c
@@ -1220,7 +1220,7 @@ static int ide_disk_probe(struct device *dev)
                 goto failed;
  
         g = alloc_disk_node(1 << PARTN_BITS,
-                       pcibus_to_node(drive->hwif->pci_dev->bus));
+                       hwif_to_node(drive->hwif));
         if (!g)
                 goto out_free_idkp;
  
diff --git a/drivers/ide/ide-floppy.c b/drivers/ide/ide-floppy.c

index 9eab6426148e2372940c432ec2c647da8c14dd78..29c22fc278c6510b75209ea6fea80649fe23fe77 100644 (file)
--- a/drivers/ide/ide-floppy.c
+++ b/drivers/ide/ide-floppy.c
@@ -317,7 +317,7 @@ typedef struct ide_floppy_obj {
         unsigned long flags;
  } idefloppy_floppy_t;
  
-#define IDEFLOPPY_TICKS_DELAY  3       /* default delay for ZIP 100 */
+#define IDEFLOPPY_TICKS_DELAY  HZ/20   /* default delay for ZIP 100 (50ms) */
  
  /*
   *     Floppy flag bits values.
diff --git a/drivers/ide/ide-probe.c b/drivers/ide/ide-probe.c

index 7df85af75371419d5f58e26ef89811783d45b61f..c1128ae5cd2f98319ee889b74fefd7e69916edd4 100644 (file)
--- a/drivers/ide/ide-probe.c
+++ b/drivers/ide/ide-probe.c
@@ -978,8 +978,7 @@ static int ide_init_queue(ide_drive_t *drive)
          *      do not.
          */
  
-       q = blk_init_queue_node(do_ide_request, &ide_lock,
-                               pcibus_to_node(drive->hwif->pci_dev->bus));
+       q = blk_init_queue_node(do_ide_request, &ide_lock, hwif_to_node(hwif));
         if (!q)
                 return 1;
  
@@ -1048,6 +1047,8 @@ static int init_irq (ide_hwif_t *hwif)
  
         BUG_ON(in_interrupt());
         BUG_ON(irqs_disabled());        
+       BUG_ON(hwif == NULL);
+
         down(&ide_cfg_sem);
         hwif->hwgroup = NULL;
  #if MAX_HWIFS > 1
@@ -1097,7 +1098,7 @@ static int init_irq (ide_hwif_t *hwif)
                 spin_unlock_irq(&ide_lock);
         } else {
                 hwgroup = kmalloc_node(sizeof(ide_hwgroup_t), GFP_KERNEL,
-                       pcibus_to_node(hwif->drives[0].hwif->pci_dev->bus));
+                                       hwif_to_node(hwif->drives[0].hwif));
                 if (!hwgroup)
                         goto out_up;
  
diff --git a/drivers/ide/legacy/ide-cs.c b/drivers/ide/legacy/ide-cs.c

index 03747439ac9c1a193059f3078c44af328dc93e90..f1d1ec4e967718946e332c3fb0b43be2b1182def 100644 (file)
--- a/drivers/ide/legacy/ide-cs.c
+++ b/drivers/ide/legacy/ide-cs.c
@@ -508,5 +508,5 @@ static void __exit exit_ide_cs(void)
         BUG_ON(dev_list != NULL);
  }
  
-module_init(init_ide_cs);
+late_initcall(init_ide_cs);
  module_exit(exit_ide_cs);
diff --git a/drivers/ide/pci/generic.c b/drivers/ide/pci/generic.c

index da46577380f327dacd56a68afa9a369bd51882fe..6e3ab0c38c4d672e0c332be3ec88895957187124 100644 (file)
--- a/drivers/ide/pci/generic.c
+++ b/drivers/ide/pci/generic.c
@@ -173,6 +173,12 @@ static ide_pci_device_t generic_chipsets[] __devinitdata = {
                 .channels       = 2,
                 .autodma        = NOAUTODMA,
                 .bootable       = ON_BOARD,
+       },{     /* 14 */
+               .name           = "Revolution",
+               .init_hwif      = init_hwif_generic,
+               .channels       = 2,
+               .autodma        = AUTODMA,
+               .bootable       = OFF_BOARD,
         }
  };
  
@@ -231,6 +237,7 @@ static struct pci_device_id generic_pci_tbl[] = {
         { PCI_VENDOR_ID_TOSHIBA,PCI_DEVICE_ID_TOSHIBA_PICCOLO,     PCI_ANY_ID, PCI_ANY_ID, 0, 0, 11},
         { PCI_VENDOR_ID_TOSHIBA,PCI_DEVICE_ID_TOSHIBA_PICCOLO_1,   PCI_ANY_ID, PCI_ANY_ID, 0, 0, 12},
         { PCI_VENDOR_ID_TOSHIBA,PCI_DEVICE_ID_TOSHIBA_PICCOLO_2,   PCI_ANY_ID, PCI_ANY_ID, 0, 0, 13},
+       { PCI_VENDOR_ID_NETCELL,PCI_DEVICE_ID_REVOLUTION,          PCI_ANY_ID, PCI_ANY_ID, 0, 0, 14},
         /* Must come last. If you add entries adjust this table appropriately and the init_one code */
         { PCI_ANY_ID,           PCI_ANY_ID,                        PCI_ANY_ID, PCI_ANY_ID, PCI_CLASS_STORAGE_IDE << 8, 0xFFFFFF00UL, 0},
         { 0, },
diff --git a/drivers/ide/pci/serverworks.c b/drivers/ide/pci/serverworks.c

index c6f5fa4b4ca6b74b1ab3dc77cc5775d5a3c08b6a..ff2e217a8c84f883b85399faca8787b099c7a492 100644 (file)
--- a/drivers/ide/pci/serverworks.c
+++ b/drivers/ide/pci/serverworks.c
@@ -21,6 +21,9 @@
   *
   *   CSB6: `Champion South Bridge' IDE Interface (optional: third channel)
   *
+ *   HT1000: AKA BCM5785 - Hypertransport Southbridge for Opteron systems. IDE
+ *   controller same as the CSB6. Single channel ATA100 only.
+ *
   * Documentation:
   *     Available under NDA only. Errata info very hard to get.
   *
@@ -71,6 +74,8 @@ static u8 svwks_ratemask (ide_drive_t *drive)
         if (!svwks_revision)
                 pci_read_config_byte(dev, PCI_REVISION_ID, &svwks_revision);
  
+       if (dev->device == PCI_DEVICE_ID_SERVERWORKS_HT1000IDE)
+               return 2;
         if (dev->device == PCI_DEVICE_ID_SERVERWORKS_OSB4IDE) {
                 u32 reg = 0;
                 if (isa_dev)
@@ -109,6 +114,7 @@ static u8 svwks_csb_check (struct pci_dev *dev)
                 case PCI_DEVICE_ID_SERVERWORKS_CSB5IDE:
                 case PCI_DEVICE_ID_SERVERWORKS_CSB6IDE:
                 case PCI_DEVICE_ID_SERVERWORKS_CSB6IDE2:
+               case PCI_DEVICE_ID_SERVERWORKS_HT1000IDE:
                         return 1;
                 default:
                         break;
@@ -438,6 +444,13 @@ static unsigned int __devinit init_chipset_svwks (struct pci_dev *dev, const cha
                         btr |= (svwks_revision >= SVWKS_CSB5_REVISION_NEW) ? 0x3 : 0x2;
                 pci_write_config_byte(dev, 0x5A, btr);
         }
+       /* Setup HT1000 SouthBridge Controller - Single Channel Only */
+       else if (dev->device == PCI_DEVICE_ID_SERVERWORKS_HT1000IDE) {
+               pci_read_config_byte(dev, 0x5A, &btr);
+               btr &= ~0x40;
+               btr |= 0x3;
+               pci_write_config_byte(dev, 0x5A, btr);
+       }
  
         return (dev->irq) ? dev->irq : 0;
  }
@@ -629,6 +642,15 @@ static ide_pci_device_t serverworks_chipsets[] __devinitdata = {
                 .channels       = 1,    /* 2 */
                 .autodma        = AUTODMA,
                 .bootable       = ON_BOARD,
+       },{     /* 4 */
+               .name           = "SvrWks HT1000",
+               .init_setup     = init_setup_svwks,
+               .init_chipset   = init_chipset_svwks,
+               .init_hwif      = init_hwif_svwks,
+               .init_dma       = init_dma_svwks,
+               .channels       = 1,    /* 2 */
+               .autodma        = AUTODMA,
+               .bootable       = ON_BOARD,
         }
  };
  
@@ -653,6 +675,7 @@ static struct pci_device_id svwks_pci_tbl[] = {
         { PCI_VENDOR_ID_SERVERWORKS, PCI_DEVICE_ID_SERVERWORKS_CSB5IDE, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 1},
         { PCI_VENDOR_ID_SERVERWORKS, PCI_DEVICE_ID_SERVERWORKS_CSB6IDE, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 2},
         { PCI_VENDOR_ID_SERVERWORKS, PCI_DEVICE_ID_SERVERWORKS_CSB6IDE2, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 3},
+       { PCI_VENDOR_ID_SERVERWORKS, PCI_DEVICE_ID_SERVERWORKS_HT1000IDE, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 4},
         { 0, },
  };
  MODULE_DEVICE_TABLE(pci, svwks_pci_tbl);
diff --git a/drivers/ide/ppc/pmac.c b/drivers/ide/ppc/pmac.c

index be0fcc8f4b155be497ffed8c1f767ac923bc100f..ea65b070a3675b2e3c3f2b6e1e1840f93adb6cb8 100644 (file)
--- a/drivers/ide/ppc/pmac.c
+++ b/drivers/ide/ppc/pmac.c
@@ -1664,7 +1664,7 @@ static struct macio_driver pmac_ide_macio_driver =
  };
  
  static struct pci_device_id pmac_ide_pci_match[] = {
-       { PCI_VENDOR_ID_APPLE, PCI_DEVIEC_ID_APPLE_UNI_N_ATA, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0},
+       { PCI_VENDOR_ID_APPLE, PCI_DEVICE_ID_APPLE_UNI_N_ATA, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0},
         { PCI_VENDOR_ID_APPLE, PCI_DEVICE_ID_APPLE_IPID_ATA100, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0},
         { PCI_VENDOR_ID_APPLE, PCI_DEVICE_ID_APPLE_K2_ATA100, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0},
         { PCI_VENDOR_ID_APPLE, PCI_DEVICE_ID_APPLE_SH_ATA,
diff --git a/drivers/ide/setup-pci.c b/drivers/ide/setup-pci.c

index 77da827b2898362527c776486290df2f75821b31..18ed7765417ce7a649a987ac0b8438bbdea184ea 100644 (file)
--- a/drivers/ide/setup-pci.c
+++ b/drivers/ide/setup-pci.c
@@ -229,6 +229,7 @@ second_chance_to_dma:
                         case PCI_DEVICE_ID_AMD_VIPER_7409:
                         case PCI_DEVICE_ID_CMD_643:
                         case PCI_DEVICE_ID_SERVERWORKS_CSB5IDE:
+                       case PCI_DEVICE_ID_REVOLUTION:
                                 simplex_stat = hwif->INB(dma_base + 2);
                                 hwif->OUTB((simplex_stat&0x60),(dma_base + 2));
                                 simplex_stat = hwif->INB(dma_base + 2);
diff --git a/drivers/ieee1394/ieee1394_core.c b/drivers/ieee1394/ieee1394_core.c

index b248d89de8b4e26c543d5b30c2bd7a187d657059..d633770fac8ee3b99a3a77c84356db02be2bf7c6 100644 (file)
--- a/drivers/ieee1394/ieee1394_core.c
+++ b/drivers/ieee1394/ieee1394_core.c
@@ -681,7 +681,7 @@ static void handle_packet_response(struct hpsb_host *host, int tcode,
                  return;
          }
  
-       __skb_unlink(skb, skb->list);
+       __skb_unlink(skb, &host->pending_packet_queue);
  
         if (packet->state == hpsb_queued) {
                 packet->sendtime = jiffies;
@@ -989,7 +989,7 @@ void abort_timedouts(unsigned long __opaque)
                 packet = (struct hpsb_packet *)skb->data;
  
                 if (time_before(packet->sendtime + expire, jiffies)) {
-                       __skb_unlink(skb, skb->list);
+                       __skb_unlink(skb, &host->pending_packet_queue);
                         packet->state = hpsb_complete;
                         packet->ack_code = ACKX_TIMEOUT;
                         queue_packet_complete(packet);
diff --git a/drivers/ieee1394/ohci1394.c b/drivers/ieee1394/ohci1394.c

index b12a970cc9a30669f75413ed32fac0337e7ba6d2..27018c8efc242ee0a660c5be9d8449597e551c42 100644 (file)
--- a/drivers/ieee1394/ohci1394.c
+++ b/drivers/ieee1394/ohci1394.c
@@ -478,7 +478,6 @@ static void ohci_initialize(struct ti_ohci *ohci)
         int num_ports, i;
  
         spin_lock_init(&ohci->phy_reg_lock);
-       spin_lock_init(&ohci->event_lock);
  
         /* Put some defaults to these undefined bus options */
         buf = reg_read(ohci, OHCI1394_BusOptions);
@@ -3402,7 +3401,14 @@ static int __devinit ohci1394_pci_probe(struct pci_dev *dev,
         /* We hopefully don't have to pre-allocate IT DMA like we did
          * for IR DMA above. Allocate it on-demand and mark inactive. */
         ohci->it_legacy_context.ohci = NULL;
+       spin_lock_init(&ohci->event_lock);
  
+       /*
+        * interrupts are disabled, all right, but... due to SA_SHIRQ we
+        * might get called anyway.  We'll see no event, of course, but
+        * we need to get to that "no event", so enough should be initialized
+        * by that point.
+        */
         if (request_irq(dev->irq, ohci_irq_handler, SA_SHIRQ,
                          OHCI1394_DRIVER_NAME, ohci))
                 FAIL(-ENOMEM, "Failed to allocate shared interrupt %d", dev->irq);
diff --git a/drivers/infiniband/Kconfig b/drivers/infiniband/Kconfig

index 79c8e2dd9c33cfdc0618b228067c0c1261140cb7..32cdfb30e9b46e1cead064e74f7a06ded25495e1 100644 (file)
--- a/drivers/infiniband/Kconfig
+++ b/drivers/infiniband/Kconfig
@@ -1,6 +1,7 @@
  menu "InfiniBand support"
  
  config INFINIBAND
+       depends on PCI || BROKEN
         tristate "InfiniBand support"
         ---help---
           Core support for InfiniBand (IB).  Make sure to also select
diff --git a/drivers/infiniband/core/Makefile b/drivers/infiniband/core/Makefile

index 10be36731ed7baef2cfc30abb3aa57f036fc6c02..678a7e097f329de2e3e6b926443807babf2ccdf4 100644 (file)
--- a/drivers/infiniband/core/Makefile
+++ b/drivers/infiniband/core/Makefile
@@ -1,5 +1,3 @@
-EXTRA_CFLAGS += -Idrivers/infiniband/include
-
  obj-$(CONFIG_INFINIBAND) +=            ib_core.o ib_mad.o ib_sa.o \
                                         ib_cm.o ib_umad.o ib_ucm.o
  obj-$(CONFIG_INFINIBAND_USER_VERBS) += ib_uverbs.o
diff --git a/drivers/infiniband/core/agent.c b/drivers/infiniband/core/agent.c

index 729f0b0d983a0fe91abae6f54c4545ec77522919..5ac86f566dc020004f28f73bacf4934998a9fd30 100644 (file)
--- a/drivers/infiniband/core/agent.c
+++ b/drivers/infiniband/core/agent.c
@@ -1,9 +1,10 @@
  /*
- * Copyright (c) 2004 Mellanox Technologies Ltd.  All rights reserved.
- * Copyright (c) 2004 Infinicon Corporation.  All rights reserved.
- * Copyright (c) 2004 Intel Corporation.  All rights reserved.
- * Copyright (c) 2004 Topspin Corporation.  All rights reserved.
- * Copyright (c) 2004 Voltaire Corporation.  All rights reserved.
+ * Copyright (c) 2004, 2005 Mellanox Technologies Ltd.  All rights reserved.
+ * Copyright (c) 2004, 2005 Infinicon Corporation.  All rights reserved.
+ * Copyright (c) 2004, 2005 Intel Corporation.  All rights reserved.
+ * Copyright (c) 2004, 2005 Topspin Corporation.  All rights reserved.
+ * Copyright (c) 2004, 2005 Voltaire Corporation.  All rights reserved.
+ * Copyright (c) 2005 Sun Microsystems, Inc. All rights reserved.
   *
   * This software is available to you under a choice of one of two
   * licenses.  You may choose to be licensed under the terms of the GNU
@@ -40,7 +41,7 @@
  
  #include <asm/bug.h>
  
-#include <ib_smi.h>
+#include <rdma/ib_smi.h>
  
  #include "smi.h"
  #include "agent_priv.h"
diff --git a/drivers/infiniband/core/agent_priv.h b/drivers/infiniband/core/agent_priv.h

index 17435af1e914d22fe9b6097f6157881b9e586c8d..2ec6d7f1b7d083197301899752be400deef5c068 100644 (file)
--- a/drivers/infiniband/core/agent_priv.h
+++ b/drivers/infiniband/core/agent_priv.h
@@ -1,9 +1,9 @@
  /*
- * Copyright (c) 2004 Mellanox Technologies Ltd.  All rights reserved.
- * Copyright (c) 2004 Infinicon Corporation.  All rights reserved.
- * Copyright (c) 2004 Intel Corporation.  All rights reserved.
- * Copyright (c) 2004 Topspin Corporation.  All rights reserved.
- * Copyright (c) 2004 Voltaire Corporation.  All rights reserved.
+ * Copyright (c) 2004, 2005 Mellanox Technologies Ltd.  All rights reserved.
+ * Copyright (c) 2004, 2005 Infinicon Corporation.  All rights reserved.
+ * Copyright (c) 2004, 2005 Intel Corporation.  All rights reserved.
+ * Copyright (c) 2004, 2005 Topspin Corporation.  All rights reserved.
+ * Copyright (c) 2004, 2005 Voltaire Corporation.  All rights reserved.
   *
   * This software is available to you under a choice of one of two
   * licenses.  You may choose to be licensed under the terms of the GNU
diff --git a/drivers/infiniband/core/cache.c b/drivers/infiniband/core/cache.c

index 3042360c97e1910b052ff48c8e57378c7390749e..f014e639088ca13d923df980234b998244062983 100644 (file)
--- a/drivers/infiniband/core/cache.c
+++ b/drivers/infiniband/core/cache.c
@@ -1,5 +1,8 @@
  /*
   * Copyright (c) 2004 Topspin Communications.  All rights reserved.
+ * Copyright (c) 2005 Intel Corporation. All rights reserved.
+ * Copyright (c) 2005 Sun Microsystems, Inc. All rights reserved.
+ * Copyright (c) 2005 Voltaire, Inc. All rights reserved.
   *
   * This software is available to you under a choice of one of two
   * licenses.  You may choose to be licensed under the terms of the GNU
@@ -32,12 +35,11 @@
   * $Id: cache.c 1349 2004-12-16 21:09:43Z roland $
   */
  
-#include <linux/version.h>
  #include <linux/module.h>
  #include <linux/errno.h>
  #include <linux/slab.h>
  
-#include <ib_cache.h>
+#include <rdma/ib_cache.h>
  
  #include "core_priv.h"
  
diff --git a/drivers/infiniband/core/cm.c b/drivers/infiniband/core/cm.c

index 403ed125d8f4c1c7adbc14916446a8ce6c2b2f40..4de93ba274a61202e7e9d29d494826a148c79ac2 100644 (file)
--- a/drivers/infiniband/core/cm.c
+++ b/drivers/infiniband/core/cm.c
@@ -43,8 +43,8 @@
  #include <linux/spinlock.h>
  #include <linux/workqueue.h>
  
-#include <ib_cache.h>
-#include <ib_cm.h>
+#include <rdma/ib_cache.h>
+#include <rdma/ib_cm.h>
  #include "cm_msgs.h"
  
  MODULE_AUTHOR("Sean Hefty");
@@ -83,7 +83,7 @@ struct cm_port {
  struct cm_device {
         struct list_head list;
         struct ib_device *device;
-       u64 ca_guid;
+       __be64 ca_guid;
         struct cm_port port[0];
  };
  
@@ -100,8 +100,8 @@ struct cm_work {
         struct list_head list;
         struct cm_port *port;
         struct ib_mad_recv_wc *mad_recv_wc;     /* Received MADs */
-       u32 local_id;                           /* Established / timewait */
-       u32 remote_id;
+       __be32 local_id;                        /* Established / timewait */
+       __be32 remote_id;
         struct ib_cm_event cm_event;
         struct ib_sa_path_rec path[0];
  };
@@ -110,8 +110,8 @@ struct cm_timewait_info {
         struct cm_work work;                    /* Must be first. */
         struct rb_node remote_qp_node;
         struct rb_node remote_id_node;
-       u64 remote_ca_guid;
-       u32 remote_qpn;
+       __be64 remote_ca_guid;
+       __be32 remote_qpn;
         u8 inserted_remote_qp;
         u8 inserted_remote_id;
  };
@@ -132,11 +132,11 @@ struct cm_id_private {
         struct cm_av alt_av;
  
         void *private_data;
-       u64 tid;
-       u32 local_qpn;
-       u32 remote_qpn;
-       u32 sq_psn;
-       u32 rq_psn;
+       __be64 tid;
+       __be32 local_qpn;
+       __be32 remote_qpn;
+       __be32 sq_psn;
+       __be32 rq_psn;
         int timeout_ms;
         enum ib_mtu path_mtu;
         u8 private_data_len;
@@ -253,7 +253,7 @@ static void cm_set_ah_attr(struct ib_ah_attr *ah_attr, u8 port_num,
                            u16 dlid, u8 sl, u16 src_path_bits)
  {
         memset(ah_attr, 0, sizeof ah_attr);
-       ah_attr->dlid = be16_to_cpu(dlid);
+       ah_attr->dlid = dlid;
         ah_attr->sl = sl;
         ah_attr->src_path_bits = src_path_bits;
         ah_attr->port_num = port_num;
@@ -264,7 +264,7 @@ static void cm_init_av_for_response(struct cm_port *port,
  {
         av->port = port;
         av->pkey_index = wc->pkey_index;
-       cm_set_ah_attr(&av->ah_attr, port->port_num, cpu_to_be16(wc->slid),
+       cm_set_ah_attr(&av->ah_attr, port->port_num, wc->slid,
                        wc->sl, wc->dlid_path_bits);
  }
  
@@ -295,8 +295,9 @@ static int cm_init_av_by_path(struct ib_sa_path_rec *path, struct cm_av *av)
                 return ret;
  
         av->port = port;
-       cm_set_ah_attr(&av->ah_attr, av->port->port_num, path->dlid,
-                      path->sl, path->slid & 0x7F);
+       cm_set_ah_attr(&av->ah_attr, av->port->port_num,
+                      be16_to_cpu(path->dlid), path->sl,
+                      be16_to_cpu(path->slid) & 0x7F);
         av->packet_life_time = path->packet_life_time;
         return 0;
  }
@@ -309,26 +310,26 @@ static int cm_alloc_id(struct cm_id_private *cm_id_priv)
         do {
                 spin_lock_irqsave(&cm.lock, flags);
                 ret = idr_get_new_above(&cm.local_id_table, cm_id_priv, 1,
-                                       (int *) &cm_id_priv->id.local_id);
+                                       (__force int *) &cm_id_priv->id.local_id);
                 spin_unlock_irqrestore(&cm.lock, flags);
         } while( (ret == -EAGAIN) && idr_pre_get(&cm.local_id_table, GFP_KERNEL) );
         return ret;
  }
  
-static void cm_free_id(u32 local_id)
+static void cm_free_id(__be32 local_id)
  {
         unsigned long flags;
  
         spin_lock_irqsave(&cm.lock, flags);
-       idr_remove(&cm.local_id_table, (int) local_id);
+       idr_remove(&cm.local_id_table, (__force int) local_id);
         spin_unlock_irqrestore(&cm.lock, flags);
  }
  
-static struct cm_id_private * cm_get_id(u32 local_id, u32 remote_id)
+static struct cm_id_private * cm_get_id(__be32 local_id, __be32 remote_id)
  {
         struct cm_id_private *cm_id_priv;
  
-       cm_id_priv = idr_find(&cm.local_id_table, (int) local_id);
+       cm_id_priv = idr_find(&cm.local_id_table, (__force int) local_id);
         if (cm_id_priv) {
                 if (cm_id_priv->id.remote_id == remote_id)
                         atomic_inc(&cm_id_priv->refcount);
@@ -339,7 +340,7 @@ static struct cm_id_private * cm_get_id(u32 local_id, u32 remote_id)
         return cm_id_priv;
  }
  
-static struct cm_id_private * cm_acquire_id(u32 local_id, u32 remote_id)
+static struct cm_id_private * cm_acquire_id(__be32 local_id, __be32 remote_id)
  {
         struct cm_id_private *cm_id_priv;
         unsigned long flags;
@@ -356,8 +357,8 @@ static struct cm_id_private * cm_insert_listen(struct cm_id_private *cm_id_priv)
         struct rb_node **link = &cm.listen_service_table.rb_node;
         struct rb_node *parent = NULL;
         struct cm_id_private *cur_cm_id_priv;
-       u64 service_id = cm_id_priv->id.service_id;
-       u64 service_mask = cm_id_priv->id.service_mask;
+       __be64 service_id = cm_id_priv->id.service_id;
+       __be64 service_mask = cm_id_priv->id.service_mask;
  
         while (*link) {
                 parent = *link;
@@ -376,7 +377,7 @@ static struct cm_id_private * cm_insert_listen(struct cm_id_private *cm_id_priv)
         return NULL;
  }
  
-static struct cm_id_private * cm_find_listen(u64 service_id)
+static struct cm_id_private * cm_find_listen(__be64 service_id)
  {
         struct rb_node *node = cm.listen_service_table.rb_node;
         struct cm_id_private *cm_id_priv;
@@ -400,8 +401,8 @@ static struct cm_timewait_info * cm_insert_remote_id(struct cm_timewait_info
         struct rb_node **link = &cm.remote_id_table.rb_node;
         struct rb_node *parent = NULL;
         struct cm_timewait_info *cur_timewait_info;
-       u64 remote_ca_guid = timewait_info->remote_ca_guid;
-       u32 remote_id = timewait_info->work.remote_id;
+       __be64 remote_ca_guid = timewait_info->remote_ca_guid;
+       __be32 remote_id = timewait_info->work.remote_id;
  
         while (*link) {
                 parent = *link;
@@ -424,8 +425,8 @@ static struct cm_timewait_info * cm_insert_remote_id(struct cm_timewait_info
         return NULL;
  }
  
-static struct cm_timewait_info * cm_find_remote_id(u64 remote_ca_guid,
-                                                  u32 remote_id)
+static struct cm_timewait_info * cm_find_remote_id(__be64 remote_ca_guid,
+                                                  __be32 remote_id)
  {
         struct rb_node *node = cm.remote_id_table.rb_node;
         struct cm_timewait_info *timewait_info;
@@ -453,8 +454,8 @@ static struct cm_timewait_info * cm_insert_remote_qpn(struct cm_timewait_info
         struct rb_node **link = &cm.remote_qp_table.rb_node;
         struct rb_node *parent = NULL;
         struct cm_timewait_info *cur_timewait_info;
-       u64 remote_ca_guid = timewait_info->remote_ca_guid;
-       u32 remote_qpn = timewait_info->remote_qpn;
+       __be64 remote_ca_guid = timewait_info->remote_ca_guid;
+       __be32 remote_qpn = timewait_info->remote_qpn;
  
         while (*link) {
                 parent = *link;
@@ -484,7 +485,7 @@ static struct cm_id_private * cm_insert_remote_sidr(struct cm_id_private
         struct rb_node *parent = NULL;
         struct cm_id_private *cur_cm_id_priv;
         union ib_gid *port_gid = &cm_id_priv->av.dgid;
-       u32 remote_id = cm_id_priv->id.remote_id;
+       __be32 remote_id = cm_id_priv->id.remote_id;
  
         while (*link) {
                 parent = *link;
@@ -598,7 +599,7 @@ static void cm_cleanup_timewait(struct cm_timewait_info *timewait_info)
         spin_unlock_irqrestore(&cm.lock, flags);
  }
  
-static struct cm_timewait_info * cm_create_timewait_info(u32 local_id)
+static struct cm_timewait_info * cm_create_timewait_info(__be32 local_id)
  {
         struct cm_timewait_info *timewait_info;
  
@@ -715,14 +716,15 @@ retest:
  EXPORT_SYMBOL(ib_destroy_cm_id);
  
  int ib_cm_listen(struct ib_cm_id *cm_id,
-                u64 service_id,
-                u64 service_mask)
+                __be64 service_id,
+                __be64 service_mask)
  {
         struct cm_id_private *cm_id_priv, *cur_cm_id_priv;
         unsigned long flags;
         int ret = 0;
  
-       service_mask = service_mask ? service_mask : ~0ULL;
+       service_mask = service_mask ? service_mask :
+                      __constant_cpu_to_be64(~0ULL);
         service_id &= service_mask;
         if ((service_id & IB_SERVICE_ID_AGN_MASK) == IB_CM_ASSIGN_SERVICE_ID &&
             (service_id != IB_CM_ASSIGN_SERVICE_ID))
@@ -735,8 +737,8 @@ int ib_cm_listen(struct ib_cm_id *cm_id,
  
         spin_lock_irqsave(&cm.lock, flags);
         if (service_id == IB_CM_ASSIGN_SERVICE_ID) {
-               cm_id->service_id = __cpu_to_be64(cm.listen_service_id++);
-               cm_id->service_mask = ~0ULL;
+               cm_id->service_id = cpu_to_be64(cm.listen_service_id++);
+               cm_id->service_mask = __constant_cpu_to_be64(~0ULL);
         } else {
                 cm_id->service_id = service_id;
                 cm_id->service_mask = service_mask;
@@ -752,18 +754,19 @@ int ib_cm_listen(struct ib_cm_id *cm_id,
  }
  EXPORT_SYMBOL(ib_cm_listen);
  
-static u64 cm_form_tid(struct cm_id_private *cm_id_priv,
-                      enum cm_msg_sequence msg_seq)
+static __be64 cm_form_tid(struct cm_id_private *cm_id_priv,
+                         enum cm_msg_sequence msg_seq)
  {
         u64 hi_tid, low_tid;
  
         hi_tid   = ((u64) cm_id_priv->av.port->mad_agent->hi_tid) << 32;
-       low_tid  = (u64) (cm_id_priv->id.local_id | (msg_seq << 30));
+       low_tid  = (u64) ((__force u32)cm_id_priv->id.local_id |
+                         (msg_seq << 30));
         return cpu_to_be64(hi_tid | low_tid);
  }
  
  static void cm_format_mad_hdr(struct ib_mad_hdr *hdr,
-                             enum cm_msg_attr_id attr_id, u64 tid)
+                             __be16 attr_id, __be64 tid)
  {
         hdr->base_version  = IB_MGMT_BASE_VERSION;
         hdr->mgmt_class    = IB_MGMT_CLASS_CM;
@@ -896,7 +899,7 @@ int ib_send_cm_req(struct ib_cm_id *cm_id,
                         goto error1;
         }
         cm_id->service_id = param->service_id;
-       cm_id->service_mask = ~0ULL;
+       cm_id->service_mask = __constant_cpu_to_be64(~0ULL);
         cm_id_priv->timeout_ms = cm_convert_to_ms(
                                     param->primary_path->packet_life_time) * 2 +
                                  cm_convert_to_ms(
@@ -963,7 +966,7 @@ static int cm_issue_rej(struct cm_port *port,
         rej_msg->remote_comm_id = rcv_msg->local_comm_id;
         rej_msg->local_comm_id = rcv_msg->remote_comm_id;
         cm_rej_set_msg_rejected(rej_msg, msg_rejected);
-       rej_msg->reason = reason;
+       rej_msg->reason = cpu_to_be16(reason);
  
         if (ari && ari_length) {
                 cm_rej_set_reject_info_len(rej_msg, ari_length);
@@ -977,8 +980,8 @@ static int cm_issue_rej(struct cm_port *port,
         return ret;
  }
  
-static inline int cm_is_active_peer(u64 local_ca_guid, u64 remote_ca_guid,
-                                   u32 local_qpn, u32 remote_qpn)
+static inline int cm_is_active_peer(__be64 local_ca_guid, __be64 remote_ca_guid,
+                                   __be32 local_qpn, __be32 remote_qpn)
  {
         return (be64_to_cpu(local_ca_guid) > be64_to_cpu(remote_ca_guid) ||
                 ((local_ca_guid == remote_ca_guid) &&
@@ -1137,7 +1140,7 @@ static void cm_format_rej(struct cm_rej_msg *rej_msg,
                 break;
         }
  
-       rej_msg->reason = reason;
+       rej_msg->reason = cpu_to_be16(reason);
         if (ari && ari_length) {
                 cm_rej_set_reject_info_len(rej_msg, ari_length);
                 memcpy(rej_msg->ari, ari, ari_length);
@@ -1276,7 +1279,7 @@ static int cm_req_handler(struct cm_work *work)
         cm_id_priv->id.cm_handler = listen_cm_id_priv->id.cm_handler;
         cm_id_priv->id.context = listen_cm_id_priv->id.context;
         cm_id_priv->id.service_id = req_msg->service_id;
-       cm_id_priv->id.service_mask = ~0ULL;
+       cm_id_priv->id.service_mask = __constant_cpu_to_be64(~0ULL);
  
         cm_format_paths_from_req(req_msg, &work->path[0], &work->path[1]);
         ret = cm_init_av_by_path(&work->path[0], &cm_id_priv->av);
@@ -1969,7 +1972,7 @@ static void cm_format_rej_event(struct cm_work *work)
         param = &work->cm_event.param.rej_rcvd;
         param->ari = rej_msg->ari;
         param->ari_length = cm_rej_get_reject_info_len(rej_msg);
-       param->reason = rej_msg->reason;
+       param->reason = __be16_to_cpu(rej_msg->reason);
         work->cm_event.private_data = &rej_msg->private_data;
  }
  
@@ -1978,20 +1981,20 @@ static struct cm_id_private * cm_acquire_rejected_id(struct cm_rej_msg *rej_msg)
         struct cm_timewait_info *timewait_info;
         struct cm_id_private *cm_id_priv;
         unsigned long flags;
-       u32 remote_id;
+       __be32 remote_id;
  
         remote_id = rej_msg->local_comm_id;
  
-       if (rej_msg->reason == IB_CM_REJ_TIMEOUT) {
+       if (__be16_to_cpu(rej_msg->reason) == IB_CM_REJ_TIMEOUT) {
                 spin_lock_irqsave(&cm.lock, flags);
-               timewait_info = cm_find_remote_id( *((u64 *) rej_msg->ari),
+               timewait_info = cm_find_remote_id( *((__be64 *) rej_msg->ari),
                                                   remote_id);
                 if (!timewait_info) {
                         spin_unlock_irqrestore(&cm.lock, flags);
                         return NULL;
                 }
                 cm_id_priv = idr_find(&cm.local_id_table,
-                                     (int) timewait_info->work.local_id);
+                                     (__force int) timewait_info->work.local_id);
                 if (cm_id_priv) {
                         if (cm_id_priv->id.remote_id == remote_id)
                                 atomic_inc(&cm_id_priv->refcount);
@@ -2032,7 +2035,7 @@ static int cm_rej_handler(struct cm_work *work)
                 /* fall through */
         case IB_CM_REQ_RCVD:
         case IB_CM_MRA_REQ_SENT:
-               if (rej_msg->reason == IB_CM_REJ_STALE_CONN)
+               if (__be16_to_cpu(rej_msg->reason) == IB_CM_REJ_STALE_CONN)
                         cm_enter_timewait(cm_id_priv);
                 else
                         cm_reset_to_idle(cm_id_priv);
@@ -2553,7 +2556,7 @@ static void cm_format_sidr_req(struct cm_sidr_req_msg *sidr_req_msg,
         cm_format_mad_hdr(&sidr_req_msg->hdr, CM_SIDR_REQ_ATTR_ID,
                           cm_form_tid(cm_id_priv, CM_MSG_SEQUENCE_SIDR));
         sidr_req_msg->request_id = cm_id_priv->id.local_id;
-       sidr_req_msg->pkey = param->pkey;
+       sidr_req_msg->pkey = cpu_to_be16(param->pkey);
         sidr_req_msg->service_id = param->service_id;
  
         if (param->private_data && param->private_data_len)
@@ -2580,7 +2583,7 @@ int ib_send_cm_sidr_req(struct ib_cm_id *cm_id,
                 goto out;
  
         cm_id->service_id = param->service_id;
-       cm_id->service_mask = ~0ULL;
+       cm_id->service_mask = __constant_cpu_to_be64(~0ULL);
         cm_id_priv->timeout_ms = param->timeout_ms;
         cm_id_priv->max_cm_retries = param->max_cm_retries;
         ret = cm_alloc_msg(cm_id_priv, &msg);
@@ -2621,7 +2624,7 @@ static void cm_format_sidr_req_event(struct cm_work *work,
         sidr_req_msg = (struct cm_sidr_req_msg *)
                                 work->mad_recv_wc->recv_buf.mad;
         param = &work->cm_event.param.sidr_req_rcvd;
-       param->pkey = sidr_req_msg->pkey;
+       param->pkey = __be16_to_cpu(sidr_req_msg->pkey);
         param->listen_id = listen_id;
         param->device = work->port->mad_agent->device;
         param->port = work->port->port_num;
@@ -2645,7 +2648,7 @@ static int cm_sidr_req_handler(struct cm_work *work)
         sidr_req_msg = (struct cm_sidr_req_msg *)
                                 work->mad_recv_wc->recv_buf.mad;
         wc = work->mad_recv_wc->wc;
-       cm_id_priv->av.dgid.global.subnet_prefix = wc->slid;
+       cm_id_priv->av.dgid.global.subnet_prefix = cpu_to_be64(wc->slid);
         cm_id_priv->av.dgid.global.interface_id = 0;
         cm_init_av_for_response(work->port, work->mad_recv_wc->wc,
                                 &cm_id_priv->av);
@@ -2673,7 +2676,7 @@ static int cm_sidr_req_handler(struct cm_work *work)
         cm_id_priv->id.cm_handler = cur_cm_id_priv->id.cm_handler;
         cm_id_priv->id.context = cur_cm_id_priv->id.context;
         cm_id_priv->id.service_id = sidr_req_msg->service_id;
-       cm_id_priv->id.service_mask = ~0ULL;
+       cm_id_priv->id.service_mask = __constant_cpu_to_be64(~0ULL);
  
         cm_format_sidr_req_event(work, &cur_cm_id_priv->id);
         cm_process_work(cm_id_priv, work);
@@ -3175,10 +3178,10 @@ int ib_cm_init_qp_attr(struct ib_cm_id *cm_id,
  }
  EXPORT_SYMBOL(ib_cm_init_qp_attr);
  
-static u64 cm_get_ca_guid(struct ib_device *device)
+static __be64 cm_get_ca_guid(struct ib_device *device)
  {
         struct ib_device_attr *device_attr;
-       u64 guid;
+       __be64 guid;
         int ret;
  
         device_attr = kmalloc(sizeof *device_attr, GFP_KERNEL);
diff --git a/drivers/infiniband/core/cm_msgs.h b/drivers/infiniband/core/cm_msgs.h

index 15a309a77b2b675f65028b3fd3c485f6375204bb..813ab70bf6d5ebd5642bde4d499ce0cb911f81be 100644 (file)
--- a/drivers/infiniband/core/cm_msgs.h
+++ b/drivers/infiniband/core/cm_msgs.h
@@ -34,7 +34,7 @@
  #if !defined(CM_MSGS_H)
  #define CM_MSGS_H
  
-#include <ib_mad.h>
+#include <rdma/ib_mad.h>
  
  /*
   * Parameters to routines below should be in network-byte order, and values
@@ -43,19 +43,17 @@
  
  #define IB_CM_CLASS_VERSION    2 /* IB specification 1.2 */
  
-enum cm_msg_attr_id {
-       CM_REQ_ATTR_ID      = __constant_htons(0x0010),
-       CM_MRA_ATTR_ID      = __constant_htons(0x0011),
-       CM_REJ_ATTR_ID      = __constant_htons(0x0012),
-       CM_REP_ATTR_ID      = __constant_htons(0x0013),
-       CM_RTU_ATTR_ID      = __constant_htons(0x0014),
-       CM_DREQ_ATTR_ID     = __constant_htons(0x0015),
-       CM_DREP_ATTR_ID     = __constant_htons(0x0016),
-       CM_SIDR_REQ_ATTR_ID = __constant_htons(0x0017),
-       CM_SIDR_REP_ATTR_ID = __constant_htons(0x0018),
-       CM_LAP_ATTR_ID      = __constant_htons(0x0019),
-       CM_APR_ATTR_ID      = __constant_htons(0x001A)
-};
+#define CM_REQ_ATTR_ID     __constant_htons(0x0010)
+#define CM_MRA_ATTR_ID     __constant_htons(0x0011)
+#define CM_REJ_ATTR_ID     __constant_htons(0x0012)
+#define CM_REP_ATTR_ID     __constant_htons(0x0013)
+#define CM_RTU_ATTR_ID     __constant_htons(0x0014)
+#define CM_DREQ_ATTR_ID            __constant_htons(0x0015)
+#define CM_DREP_ATTR_ID            __constant_htons(0x0016)
+#define CM_SIDR_REQ_ATTR_ID __constant_htons(0x0017)
+#define CM_SIDR_REP_ATTR_ID __constant_htons(0x0018)
+#define CM_LAP_ATTR_ID      __constant_htons(0x0019)
+#define CM_APR_ATTR_ID      __constant_htons(0x001A)
  
  enum cm_msg_sequence {
         CM_MSG_SEQUENCE_REQ,
@@ -67,35 +65,35 @@ enum cm_msg_sequence {
  struct cm_req_msg {
         struct ib_mad_hdr hdr;
  
-       u32 local_comm_id;
-       u32 rsvd4;
-       u64 service_id;
-       u64 local_ca_guid;
-       u32 rsvd24;
-       u32 local_qkey;
+       __be32 local_comm_id;
+       __be32 rsvd4;
+       __be64 service_id;
+       __be64 local_ca_guid;
+       __be32 rsvd24;
+       __be32 local_qkey;
         /* local QPN:24, responder resources:8 */
-       u32 offset32;
+       __be32 offset32;
         /* local EECN:24, initiator depth:8 */
-       u32 offset36;
+       __be32 offset36;
         /*
          * remote EECN:24, remote CM response timeout:5,
          * transport service type:2, end-to-end flow control:1
          */
-       u32 offset40;
+       __be32 offset40;
         /* starting PSN:24, local CM response timeout:5, retry count:3 */
-       u32 offset44;
-       u16 pkey;
+       __be32 offset44;
+       __be16 pkey;
         /* path MTU:4, RDC exists:1, RNR retry count:3. */
         u8 offset50;
         /* max CM Retries:4, SRQ:1, rsvd:3 */
         u8 offset51;
  
-       u16 primary_local_lid;
-       u16 primary_remote_lid;
+       __be16 primary_local_lid;
+       __be16 primary_remote_lid;
         union ib_gid primary_local_gid;
         union ib_gid primary_remote_gid;
         /* flow label:20, rsvd:6, packet rate:6 */
-       u32 primary_offset88;
+       __be32 primary_offset88;
         u8 primary_traffic_class;
         u8 primary_hop_limit;
         /* SL:4, subnet local:1, rsvd:3 */
@@ -103,12 +101,12 @@ struct cm_req_msg {
         /* local ACK timeout:5, rsvd:3 */
         u8 primary_offset95;
  
-       u16 alt_local_lid;
-       u16 alt_remote_lid;
+       __be16 alt_local_lid;
+       __be16 alt_remote_lid;
         union ib_gid alt_local_gid;
         union ib_gid alt_remote_gid;
         /* flow label:20, rsvd:6, packet rate:6 */
-       u32 alt_offset132;
+       __be32 alt_offset132;
         u8 alt_traffic_class;
         u8 alt_hop_limit;
         /* SL:4, subnet local:1, rsvd:3 */
@@ -120,12 +118,12 @@ struct cm_req_msg {
  
  } __attribute__ ((packed));
  
-static inline u32 cm_req_get_local_qpn(struct cm_req_msg *req_msg)
+static inline __be32 cm_req_get_local_qpn(struct cm_req_msg *req_msg)
  {
         return cpu_to_be32(be32_to_cpu(req_msg->offset32) >> 8);
  }
  
-static inline void cm_req_set_local_qpn(struct cm_req_msg *req_msg, u32 qpn)
+static inline void cm_req_set_local_qpn(struct cm_req_msg *req_msg, __be32 qpn)
  {
         req_msg->offset32 = cpu_to_be32((be32_to_cpu(qpn) << 8) |
                                          (be32_to_cpu(req_msg->offset32) &
@@ -208,13 +206,13 @@ static inline void cm_req_set_flow_ctrl(struct cm_req_msg *req_msg,
                                           0xFFFFFFFE));
  }
  
-static inline u32 cm_req_get_starting_psn(struct cm_req_msg *req_msg)
+static inline __be32 cm_req_get_starting_psn(struct cm_req_msg *req_msg)
  {
         return cpu_to_be32(be32_to_cpu(req_msg->offset44) >> 8);
  }
  
  static inline void cm_req_set_starting_psn(struct cm_req_msg *req_msg,
-                                          u32 starting_psn)
+                                          __be32 starting_psn)
  {
         req_msg->offset44 = cpu_to_be32((be32_to_cpu(starting_psn) << 8) |
                             (be32_to_cpu(req_msg->offset44) & 0x000000FF));
@@ -288,13 +286,13 @@ static inline void cm_req_set_srq(struct cm_req_msg *req_msg, u8 srq)
                                   ((srq & 0x1) << 3));
  }
  
-static inline u32 cm_req_get_primary_flow_label(struct cm_req_msg *req_msg)
+static inline __be32 cm_req_get_primary_flow_label(struct cm_req_msg *req_msg)
  {
-       return cpu_to_be32((be32_to_cpu(req_msg->primary_offset88) >> 12));
+       return cpu_to_be32(be32_to_cpu(req_msg->primary_offset88) >> 12);
  }
  
  static inline void cm_req_set_primary_flow_label(struct cm_req_msg *req_msg,
-                                                u32 flow_label)
+                                                __be32 flow_label)
  {
         req_msg->primary_offset88 = cpu_to_be32(
                                     (be32_to_cpu(req_msg->primary_offset88) &
@@ -350,13 +348,13 @@ static inline void cm_req_set_primary_local_ack_timeout(struct cm_req_msg *req_m
                                           (local_ack_timeout << 3));
  }
  
-static inline u32 cm_req_get_alt_flow_label(struct cm_req_msg *req_msg)
+static inline __be32 cm_req_get_alt_flow_label(struct cm_req_msg *req_msg)
  {
-       return cpu_to_be32((be32_to_cpu(req_msg->alt_offset132) >> 12));
+       return cpu_to_be32(be32_to_cpu(req_msg->alt_offset132) >> 12);
  }
  
  static inline void cm_req_set_alt_flow_label(struct cm_req_msg *req_msg,
-                                            u32 flow_label)
+                                            __be32 flow_label)
  {
         req_msg->alt_offset132 = cpu_to_be32(
                                  (be32_to_cpu(req_msg->alt_offset132) &
@@ -422,8 +420,8 @@ enum cm_msg_response {
   struct cm_mra_msg {
         struct ib_mad_hdr hdr;
  
-       u32 local_comm_id;
-       u32 remote_comm_id;
+       __be32 local_comm_id;
+       __be32 remote_comm_id;
         /* message MRAed:2, rsvd:6 */
         u8 offset8;
         /* service timeout:5, rsvd:3 */
@@ -458,13 +456,13 @@ static inline void cm_mra_set_service_timeout(struct cm_mra_msg *mra_msg,
  struct cm_rej_msg {
         struct ib_mad_hdr hdr;
  
-       u32 local_comm_id;
-       u32 remote_comm_id;
+       __be32 local_comm_id;
+       __be32 remote_comm_id;
         /* message REJected:2, rsvd:6 */
         u8 offset8;
         /* reject info length:7, rsvd:1. */
         u8 offset9;
-       u16 reason;
+       __be16 reason;
         u8 ari[IB_CM_REJ_ARI_LENGTH];
  
         u8 private_data[IB_CM_REJ_PRIVATE_DATA_SIZE];
@@ -495,45 +493,45 @@ static inline void cm_rej_set_reject_info_len(struct cm_rej_msg *rej_msg,
  struct cm_rep_msg {
         struct ib_mad_hdr hdr;
  
-       u32 local_comm_id;
-       u32 remote_comm_id;
-       u32 local_qkey;
+       __be32 local_comm_id;
+       __be32 remote_comm_id;
+       __be32 local_qkey;
         /* local QPN:24, rsvd:8 */
-       u32 offset12;
+       __be32 offset12;
         /* local EECN:24, rsvd:8 */
-       u32 offset16;
+       __be32 offset16;
         /* starting PSN:24 rsvd:8 */
-       u32 offset20;
+       __be32 offset20;
         u8 resp_resources;
         u8 initiator_depth;
         /* target ACK delay:5, failover accepted:2, end-to-end flow control:1 */
         u8 offset26;
         /* RNR retry count:3, SRQ:1, rsvd:5 */
         u8 offset27;
-       u64 local_ca_guid;
+       __be64 local_ca_guid;
  
         u8 private_data[IB_CM_REP_PRIVATE_DATA_SIZE];
  
  } __attribute__ ((packed));
  
-static inline u32 cm_rep_get_local_qpn(struct cm_rep_msg *rep_msg)
+static inline __be32 cm_rep_get_local_qpn(struct cm_rep_msg *rep_msg)
  {
         return cpu_to_be32(be32_to_cpu(rep_msg->offset12) >> 8);
  }
  
-static inline void cm_rep_set_local_qpn(struct cm_rep_msg *rep_msg, u32 qpn)
+static inline void cm_rep_set_local_qpn(struct cm_rep_msg *rep_msg, __be32 qpn)
  {
         rep_msg->offset12 = cpu_to_be32((be32_to_cpu(qpn) << 8) |
                             (be32_to_cpu(rep_msg->offset12) & 0x000000FF));
  }
  
-static inline u32 cm_rep_get_starting_psn(struct cm_rep_msg *rep_msg)
+static inline __be32 cm_rep_get_starting_psn(struct cm_rep_msg *rep_msg)
  {
         return cpu_to_be32(be32_to_cpu(rep_msg->offset20) >> 8);
  }
  
  static inline void cm_rep_set_starting_psn(struct cm_rep_msg *rep_msg,
-                                          u32 starting_psn)
+                                          __be32 starting_psn)
  {
         rep_msg->offset20 = cpu_to_be32((be32_to_cpu(starting_psn) << 8) |
                             (be32_to_cpu(rep_msg->offset20) & 0x000000FF));
@@ -600,8 +598,8 @@ static inline void cm_rep_set_srq(struct cm_rep_msg *rep_msg, u8 srq)
  struct cm_rtu_msg {
         struct ib_mad_hdr hdr;
  
-       u32 local_comm_id;
-       u32 remote_comm_id;
+       __be32 local_comm_id;
+       __be32 remote_comm_id;
  
         u8 private_data[IB_CM_RTU_PRIVATE_DATA_SIZE];
  
@@ -610,21 +608,21 @@ struct cm_rtu_msg {
  struct cm_dreq_msg {
         struct ib_mad_hdr hdr;
  
-       u32 local_comm_id;
-       u32 remote_comm_id;
+       __be32 local_comm_id;
+       __be32 remote_comm_id;
         /* remote QPN/EECN:24, rsvd:8 */
-       u32 offset8;
+       __be32 offset8;
  
         u8 private_data[IB_CM_DREQ_PRIVATE_DATA_SIZE];
  
  } __attribute__ ((packed));
  
-static inline u32 cm_dreq_get_remote_qpn(struct cm_dreq_msg *dreq_msg)
+static inline __be32 cm_dreq_get_remote_qpn(struct cm_dreq_msg *dreq_msg)
  {
         return cpu_to_be32(be32_to_cpu(dreq_msg->offset8) >> 8);
  }
  
-static inline void cm_dreq_set_remote_qpn(struct cm_dreq_msg *dreq_msg, u32 qpn)
+static inline void cm_dreq_set_remote_qpn(struct cm_dreq_msg *dreq_msg, __be32 qpn)
  {
         dreq_msg->offset8 = cpu_to_be32((be32_to_cpu(qpn) << 8) |
                             (be32_to_cpu(dreq_msg->offset8) & 0x000000FF));
@@ -633,8 +631,8 @@ static inline void cm_dreq_set_remote_qpn(struct cm_dreq_msg *dreq_msg, u32 qpn)
  struct cm_drep_msg {
         struct ib_mad_hdr hdr;
  
-       u32 local_comm_id;
-       u32 remote_comm_id;
+       __be32 local_comm_id;
+       __be32 remote_comm_id;
  
         u8 private_data[IB_CM_DREP_PRIVATE_DATA_SIZE];
  
@@ -643,37 +641,37 @@ struct cm_drep_msg {
  struct cm_lap_msg {
         struct ib_mad_hdr hdr;
  
-       u32 local_comm_id;
-       u32 remote_comm_id;
+       __be32 local_comm_id;
+       __be32 remote_comm_id;
  
-       u32 rsvd8;
+       __be32 rsvd8;
         /* remote QPN/EECN:24, remote CM response timeout:5, rsvd:3 */
-       u32 offset12;
-       u32 rsvd16;
+       __be32 offset12;
+       __be32 rsvd16;
  
-       u16 alt_local_lid;
-       u16 alt_remote_lid;
+       __be16 alt_local_lid;
+       __be16 alt_remote_lid;
         union ib_gid alt_local_gid;
         union ib_gid alt_remote_gid;
         /* flow label:20, rsvd:4, traffic class:8 */
-       u32 offset56;
+       __be32 offset56;
         u8 alt_hop_limit;
         /* rsvd:2, packet rate:6 */
-       uint8_t offset61;
+       u8 offset61;
         /* SL:4, subnet local:1, rsvd:3 */
-       uint8_t offset62;
+       u8 offset62;
         /* local ACK timeout:5, rsvd:3 */
-       uint8_t offset63;
+       u8 offset63;
  
         u8 private_data[IB_CM_LAP_PRIVATE_DATA_SIZE];
  } __attribute__  ((packed));
  
-static inline u32 cm_lap_get_remote_qpn(struct cm_lap_msg *lap_msg)
+static inline __be32 cm_lap_get_remote_qpn(struct cm_lap_msg *lap_msg)
  {
         return cpu_to_be32(be32_to_cpu(lap_msg->offset12) >> 8);
  }
  
-static inline void cm_lap_set_remote_qpn(struct cm_lap_msg *lap_msg, u32 qpn)
+static inline void cm_lap_set_remote_qpn(struct cm_lap_msg *lap_msg, __be32 qpn)
  {
         lap_msg->offset12 = cpu_to_be32((be32_to_cpu(qpn) << 8) |
                                          (be32_to_cpu(lap_msg->offset12) &
@@ -693,17 +691,17 @@ static inline void cm_lap_set_remote_resp_timeout(struct cm_lap_msg *lap_msg,
                                           0xFFFFFF07));
  }
  
-static inline u32 cm_lap_get_flow_label(struct cm_lap_msg *lap_msg)
+static inline __be32 cm_lap_get_flow_label(struct cm_lap_msg *lap_msg)
  {
-       return be32_to_cpu(lap_msg->offset56) >> 12;
+       return cpu_to_be32(be32_to_cpu(lap_msg->offset56) >> 12);
  }
  
  static inline void cm_lap_set_flow_label(struct cm_lap_msg *lap_msg,
-                                        u32 flow_label)
+                                        __be32 flow_label)
  {
-       lap_msg->offset56 = cpu_to_be32((flow_label << 12) |
-                                        (be32_to_cpu(lap_msg->offset56) &
-                                         0x00000FFF));
+       lap_msg->offset56 = cpu_to_be32(
+                                (be32_to_cpu(lap_msg->offset56) & 0x00000FFF) |
+                                (be32_to_cpu(flow_label) << 12));
  }
  
  static inline u8 cm_lap_get_traffic_class(struct cm_lap_msg *lap_msg)
@@ -766,8 +764,8 @@ static inline void cm_lap_set_local_ack_timeout(struct cm_lap_msg *lap_msg,
  struct cm_apr_msg {
         struct ib_mad_hdr hdr;
  
-       u32 local_comm_id;
-       u32 remote_comm_id;
+       __be32 local_comm_id;
+       __be32 remote_comm_id;
  
         u8 info_length;
         u8 ap_status;
@@ -779,10 +777,10 @@ struct cm_apr_msg {
  struct cm_sidr_req_msg {
         struct ib_mad_hdr hdr;
  
-       u32 request_id;
-       u16 pkey;
-       u16 rsvd;
-       u64 service_id;
+       __be32 request_id;
+       __be16 pkey;
+       __be16 rsvd;
+       __be64 service_id;
  
         u8 private_data[IB_CM_SIDR_REQ_PRIVATE_DATA_SIZE];
  } __attribute__ ((packed));
@@ -790,26 +788,26 @@ struct cm_sidr_req_msg {
  struct cm_sidr_rep_msg {
         struct ib_mad_hdr hdr;
  
-       u32 request_id;
+       __be32 request_id;
         u8 status;
         u8 info_length;
-       u16 rsvd;
+       __be16 rsvd;
         /* QPN:24, rsvd:8 */
-       u32 offset8;
-       u64 service_id;
-       u32 qkey;
+       __be32 offset8;
+       __be64 service_id;
+       __be32 qkey;
         u8 info[IB_CM_SIDR_REP_INFO_LENGTH];
  
         u8 private_data[IB_CM_SIDR_REP_PRIVATE_DATA_SIZE];
  } __attribute__ ((packed));
  
-static inline u32 cm_sidr_rep_get_qpn(struct cm_sidr_rep_msg *sidr_rep_msg)
+static inline __be32 cm_sidr_rep_get_qpn(struct cm_sidr_rep_msg *sidr_rep_msg)
  {
         return cpu_to_be32(be32_to_cpu(sidr_rep_msg->offset8) >> 8);
  }
  
  static inline void cm_sidr_rep_set_qpn(struct cm_sidr_rep_msg *sidr_rep_msg,
-                                      u32 qpn)
+                                      __be32 qpn)
  {
         sidr_rep_msg->offset8 = cpu_to_be32((be32_to_cpu(qpn) << 8) |
                                         (be32_to_cpu(sidr_rep_msg->offset8) &
diff --git a/drivers/infiniband/core/core_priv.h b/drivers/infiniband/core/core_priv.h

index 797049626ff67b49fa7e678b44f0f98018223de8..7ad47a4b166b2bfbd35e5f8359b80827637c8e80 100644 (file)
--- a/drivers/infiniband/core/core_priv.h
+++ b/drivers/infiniband/core/core_priv.h
@@ -38,7 +38,7 @@
  #include <linux/list.h>
  #include <linux/spinlock.h>
  
-#include <ib_verbs.h>
+#include <rdma/ib_verbs.h>
  
  int  ib_device_register_sysfs(struct ib_device *device);
  void ib_device_unregister_sysfs(struct ib_device *device);
diff --git a/drivers/infiniband/core/device.c b/drivers/infiniband/core/device.c

index 9197e92d708a1208230edcfb2750bdec9caf7214..d3cf84e01587176bdfaa26926abbc0c8c6c63ae1 100644 (file)
--- a/drivers/infiniband/core/device.c
+++ b/drivers/infiniband/core/device.c
@@ -1,5 +1,6 @@
  /*
   * Copyright (c) 2004 Topspin Communications.  All rights reserved.
+ * Copyright (c) 2005 Sun Microsystems, Inc. All rights reserved.
   *
   * This software is available to you under a choice of one of two
   * licenses.  You may choose to be licensed under the terms of the GNU
diff --git a/drivers/infiniband/core/fmr_pool.c b/drivers/infiniband/core/fmr_pool.c

index 7763b31abba70b61f8d2c1557b98f1cd2c2c06d6..d34a6f1c4f4c53a48c2d6ea401ccf5bfcdfa61e0 100644 (file)
--- a/drivers/infiniband/core/fmr_pool.c
+++ b/drivers/infiniband/core/fmr_pool.c
@@ -39,7 +39,7 @@
  #include <linux/jhash.h>
  #include <linux/kthread.h>
  
-#include <ib_fmr_pool.h>
+#include <rdma/ib_fmr_pool.h>
  
  #include "core_priv.h"
  
@@ -334,6 +334,7 @@ void ib_destroy_fmr_pool(struct ib_fmr_pool *pool)
  {
         struct ib_pool_fmr *fmr;
         struct ib_pool_fmr *tmp;
+       LIST_HEAD(fmr_list);
         int                 i;
  
         kthread_stop(pool->thread);
@@ -341,6 +342,11 @@ void ib_destroy_fmr_pool(struct ib_fmr_pool *pool)
  
         i = 0;
         list_for_each_entry_safe(fmr, tmp, &pool->free_list, list) {
+               if (fmr->remap_count) {
+                       INIT_LIST_HEAD(&fmr_list);
+                       list_add_tail(&fmr->fmr->list, &fmr_list);
+                       ib_unmap_fmr(&fmr_list);
+               }
                 ib_dealloc_fmr(fmr->fmr);
                 list_del(&fmr->list);
                 kfree(fmr);
diff --git a/drivers/infiniband/core/mad.c b/drivers/infiniband/core/mad.c

index b97e210ce9c81ff16e7bc3ee47abbb58a54607dd..a4a4d9c1eef3ebb70603441d81f418898bb9cfa8 100644 (file)
--- a/drivers/infiniband/core/mad.c
+++ b/drivers/infiniband/core/mad.c
@@ -693,7 +693,8 @@ static int handle_outgoing_dr_smp(struct ib_mad_agent_private *mad_agent_priv,
                 goto out;
         }
  
-       build_smp_wc(send_wr->wr_id, smp->dr_slid, send_wr->wr.ud.pkey_index,
+       build_smp_wc(send_wr->wr_id, be16_to_cpu(smp->dr_slid),
+                    send_wr->wr.ud.pkey_index,
                      send_wr->wr.ud.port_num, &mad_wc);
  
         /* No GRH for DR SMP */
@@ -1554,7 +1555,7 @@ static int is_data_mad(struct ib_mad_agent_private *mad_agent_priv,
  }
  
  struct ib_mad_send_wr_private*
-ib_find_send_mad(struct ib_mad_agent_private *mad_agent_priv, u64 tid)
+ib_find_send_mad(struct ib_mad_agent_private *mad_agent_priv, __be64 tid)
  {
         struct ib_mad_send_wr_private *mad_send_wr;
  
@@ -1597,7 +1598,7 @@ static void ib_mad_complete_recv(struct ib_mad_agent_private *mad_agent_priv,
         struct ib_mad_send_wr_private *mad_send_wr;
         struct ib_mad_send_wc mad_send_wc;
         unsigned long flags;
-       u64 tid;
+       __be64 tid;
  
         INIT_LIST_HEAD(&mad_recv_wc->rmpp_list);
         list_add(&mad_recv_wc->recv_buf.list, &mad_recv_wc->rmpp_list);
@@ -2165,7 +2166,8 @@ static void local_completions(void *data)
                          * Defined behavior is to complete response
                          * before request
                          */
-                       build_smp_wc(local->wr_id, IB_LID_PERMISSIVE,
+                       build_smp_wc(local->wr_id,
+                                    be16_to_cpu(IB_LID_PERMISSIVE),
                                      0 /* pkey index */,
                                      recv_mad_agent->agent.port_num, &wc);
  
@@ -2294,7 +2296,7 @@ static void timeout_sends(void *data)
         spin_unlock_irqrestore(&mad_agent_priv->lock, flags);
  }
  
-static void ib_mad_thread_completion_handler(struct ib_cq *cq)
+static void ib_mad_thread_completion_handler(struct ib_cq *cq, void *arg)
  {
         struct ib_mad_port_private *port_priv = cq->cq_context;
  
@@ -2574,8 +2576,7 @@ static int ib_mad_port_open(struct ib_device *device,
  
         cq_size = (IB_MAD_QP_SEND_SIZE + IB_MAD_QP_RECV_SIZE) * 2;
         port_priv->cq = ib_create_cq(port_priv->device,
-                                    (ib_comp_handler)
-                                       ib_mad_thread_completion_handler,
+                                    ib_mad_thread_completion_handler,
                                      NULL, port_priv, cq_size);
         if (IS_ERR(port_priv->cq)) {
                 printk(KERN_ERR PFX "Couldn't create ib_mad CQ\n");
diff --git a/drivers/infiniband/core/mad_priv.h b/drivers/infiniband/core/mad_priv.h

index 568da10b05ab28675dff4697ac35dd2f9cd51282..f1ba794e0daa62a3dea35bfc7d2b0f232449af7e 100644 (file)
--- a/drivers/infiniband/core/mad_priv.h
+++ b/drivers/infiniband/core/mad_priv.h
@@ -40,8 +40,8 @@
  #include <linux/pci.h>
  #include <linux/kthread.h>
  #include <linux/workqueue.h>
-#include <ib_mad.h>
-#include <ib_smi.h>
+#include <rdma/ib_mad.h>
+#include <rdma/ib_smi.h>
  
  
  #define PFX "ib_mad: "
@@ -121,7 +121,7 @@ struct ib_mad_send_wr_private {
         struct ib_send_wr send_wr;
         struct ib_sge sg_list[IB_MAD_SEND_REQ_MAX_SG];
         u64 wr_id;                      /* client WR ID */
-       u64 tid;
+       __be64 tid;
         unsigned long timeout;
         int retries;
         int retry;
@@ -144,7 +144,7 @@ struct ib_mad_local_private {
         struct ib_send_wr send_wr;
         struct ib_sge sg_list[IB_MAD_SEND_REQ_MAX_SG];
         u64 wr_id;                      /* client WR ID */
-       u64 tid;
+       __be64 tid;
  };
  
  struct ib_mad_mgmt_method_table {
@@ -210,7 +210,7 @@ extern kmem_cache_t *ib_mad_cache;
  int ib_send_mad(struct ib_mad_send_wr_private *mad_send_wr);
  
  struct ib_mad_send_wr_private *
-ib_find_send_mad(struct ib_mad_agent_private *mad_agent_priv, u64 tid);
+ib_find_send_mad(struct ib_mad_agent_private *mad_agent_priv, __be64 tid);
  
  void ib_mad_complete_send_wr(struct ib_mad_send_wr_private *mad_send_wr,
                              struct ib_mad_send_wc *mad_send_wc);
diff --git a/drivers/infiniband/core/mad_rmpp.c b/drivers/infiniband/core/mad_rmpp.c

index 8f1eb80e421f12e7d763267e59df4bf64c459c99..43fd805e02659ce6f81befac85696c34f47beb9f 100644 (file)
--- a/drivers/infiniband/core/mad_rmpp.c
+++ b/drivers/infiniband/core/mad_rmpp.c
@@ -61,7 +61,7 @@ struct mad_rmpp_recv {
         int seg_num;
         int newwin;
  
-       u64 tid;
+       __be64 tid;
         u32 src_qp;
         u16 slid;
         u8 mgmt_class;
@@ -100,6 +100,121 @@ void ib_cancel_rmpp_recvs(struct ib_mad_agent_private *agent)
         }
  }
  
+static int data_offset(u8 mgmt_class)
+{
+       if (mgmt_class == IB_MGMT_CLASS_SUBN_ADM)
+               return offsetof(struct ib_sa_mad, data);
+       else if ((mgmt_class >= IB_MGMT_CLASS_VENDOR_RANGE2_START) &&
+                (mgmt_class <= IB_MGMT_CLASS_VENDOR_RANGE2_END))
+               return offsetof(struct ib_vendor_mad, data);
+       else
+               return offsetof(struct ib_rmpp_mad, data);
+}
+
+static void format_ack(struct ib_rmpp_mad *ack,
+                      struct ib_rmpp_mad *data,
+                      struct mad_rmpp_recv *rmpp_recv)
+{
+       unsigned long flags;
+
+       memcpy(&ack->mad_hdr, &data->mad_hdr,
+              data_offset(data->mad_hdr.mgmt_class));
+
+       ack->mad_hdr.method ^= IB_MGMT_METHOD_RESP;
+       ack->rmpp_hdr.rmpp_type = IB_MGMT_RMPP_TYPE_ACK;
+       ib_set_rmpp_flags(&ack->rmpp_hdr, IB_MGMT_RMPP_FLAG_ACTIVE);
+
+       spin_lock_irqsave(&rmpp_recv->lock, flags);
+       rmpp_recv->last_ack = rmpp_recv->seg_num;
+       ack->rmpp_hdr.seg_num = cpu_to_be32(rmpp_recv->seg_num);
+       ack->rmpp_hdr.paylen_newwin = cpu_to_be32(rmpp_recv->newwin);
+       spin_unlock_irqrestore(&rmpp_recv->lock, flags);
+}
+
+static void ack_recv(struct mad_rmpp_recv *rmpp_recv,
+                    struct ib_mad_recv_wc *recv_wc)
+{
+       struct ib_mad_send_buf *msg;
+       struct ib_send_wr *bad_send_wr;
+       int hdr_len, ret;
+
+       hdr_len = sizeof(struct ib_mad_hdr) + sizeof(struct ib_rmpp_hdr);
+       msg = ib_create_send_mad(&rmpp_recv->agent->agent, recv_wc->wc->src_qp,
+                                recv_wc->wc->pkey_index, rmpp_recv->ah, 1,
+                                hdr_len, sizeof(struct ib_rmpp_mad) - hdr_len,
+                                GFP_KERNEL);
+       if (!msg)
+               return;
+
+       format_ack((struct ib_rmpp_mad *) msg->mad,
+                  (struct ib_rmpp_mad *) recv_wc->recv_buf.mad, rmpp_recv);
+       ret = ib_post_send_mad(&rmpp_recv->agent->agent, &msg->send_wr,
+                              &bad_send_wr);
+       if (ret)
+               ib_free_send_mad(msg);
+}
+
+static int alloc_response_msg(struct ib_mad_agent *agent,
+                             struct ib_mad_recv_wc *recv_wc,
+                             struct ib_mad_send_buf **msg)
+{
+       struct ib_mad_send_buf *m;
+       struct ib_ah *ah;
+       int hdr_len;
+
+       ah = ib_create_ah_from_wc(agent->qp->pd, recv_wc->wc,
+                                 recv_wc->recv_buf.grh, agent->port_num);
+       if (IS_ERR(ah))
+               return PTR_ERR(ah);
+
+       hdr_len = sizeof(struct ib_mad_hdr) + sizeof(struct ib_rmpp_hdr);
+       m = ib_create_send_mad(agent, recv_wc->wc->src_qp,
+                              recv_wc->wc->pkey_index, ah, 1, hdr_len,
+                              sizeof(struct ib_rmpp_mad) - hdr_len,
+                              GFP_KERNEL);
+       if (IS_ERR(m)) {
+               ib_destroy_ah(ah);
+               return PTR_ERR(m);
+       }
+       *msg = m;
+       return 0;
+}
+
+static void free_msg(struct ib_mad_send_buf *msg)
+{
+       ib_destroy_ah(msg->send_wr.wr.ud.ah);
+       ib_free_send_mad(msg);
+}
+
+static void nack_recv(struct ib_mad_agent_private *agent,
+                     struct ib_mad_recv_wc *recv_wc, u8 rmpp_status)
+{
+       struct ib_mad_send_buf *msg;
+       struct ib_rmpp_mad *rmpp_mad;
+       struct ib_send_wr *bad_send_wr;
+       int ret;
+
+       ret = alloc_response_msg(&agent->agent, recv_wc, &msg);
+       if (ret)
+               return;
+
+       rmpp_mad = (struct ib_rmpp_mad *) msg->mad;
+       memcpy(rmpp_mad, recv_wc->recv_buf.mad,
+              data_offset(recv_wc->recv_buf.mad->mad_hdr.mgmt_class));
+
+       rmpp_mad->mad_hdr.method ^= IB_MGMT_METHOD_RESP;
+       rmpp_mad->rmpp_hdr.rmpp_version = IB_MGMT_RMPP_VERSION;
+       rmpp_mad->rmpp_hdr.rmpp_type = IB_MGMT_RMPP_TYPE_ABORT;
+       ib_set_rmpp_flags(&rmpp_mad->rmpp_hdr, IB_MGMT_RMPP_FLAG_ACTIVE);
+       rmpp_mad->rmpp_hdr.rmpp_status = rmpp_status;
+       rmpp_mad->rmpp_hdr.seg_num = 0;
+       rmpp_mad->rmpp_hdr.paylen_newwin = 0;
+
+       ret = ib_post_send_mad(&agent->agent, &msg->send_wr, &bad_send_wr);
+       if (ret)
+               free_msg(msg);
+}
+
  static void recv_timeout_handler(void *data)
  {
         struct mad_rmpp_recv *rmpp_recv = data;
@@ -115,8 +230,8 @@ static void recv_timeout_handler(void *data)
         list_del(&rmpp_recv->list);
         spin_unlock_irqrestore(&rmpp_recv->agent->lock, flags);
  
-       /* TODO: send abort. */
         rmpp_wc = rmpp_recv->rmpp_wc;
+       nack_recv(rmpp_recv->agent, rmpp_wc, IB_MGMT_RMPP_STATUS_T2L);
         destroy_rmpp_recv(rmpp_recv);
         ib_free_recv_mad(rmpp_wc);
  }
@@ -230,60 +345,6 @@ insert_rmpp_recv(struct ib_mad_agent_private *agent,
         return cur_rmpp_recv;
  }
  
-static int data_offset(u8 mgmt_class)
-{
-       if (mgmt_class == IB_MGMT_CLASS_SUBN_ADM)
-               return offsetof(struct ib_sa_mad, data);
-       else if ((mgmt_class >= IB_MGMT_CLASS_VENDOR_RANGE2_START) &&
-                (mgmt_class <= IB_MGMT_CLASS_VENDOR_RANGE2_END))
-               return offsetof(struct ib_vendor_mad, data);
-       else
-               return offsetof(struct ib_rmpp_mad, data);
-}
-
-static void format_ack(struct ib_rmpp_mad *ack,
-                      struct ib_rmpp_mad *data,
-                      struct mad_rmpp_recv *rmpp_recv)
-{
-       unsigned long flags;
-
-       memcpy(&ack->mad_hdr, &data->mad_hdr,
-              data_offset(data->mad_hdr.mgmt_class));
-
-       ack->mad_hdr.method ^= IB_MGMT_METHOD_RESP;
-       ack->rmpp_hdr.rmpp_type = IB_MGMT_RMPP_TYPE_ACK;
-       ib_set_rmpp_flags(&ack->rmpp_hdr, IB_MGMT_RMPP_FLAG_ACTIVE);
-
-       spin_lock_irqsave(&rmpp_recv->lock, flags);
-       rmpp_recv->last_ack = rmpp_recv->seg_num;
-       ack->rmpp_hdr.seg_num = cpu_to_be32(rmpp_recv->seg_num);
-       ack->rmpp_hdr.paylen_newwin = cpu_to_be32(rmpp_recv->newwin);
-       spin_unlock_irqrestore(&rmpp_recv->lock, flags);
-}
-
-static void ack_recv(struct mad_rmpp_recv *rmpp_recv,
-                    struct ib_mad_recv_wc *recv_wc)
-{
-       struct ib_mad_send_buf *msg;
-       struct ib_send_wr *bad_send_wr;
-       int hdr_len, ret;
-
-       hdr_len = sizeof(struct ib_mad_hdr) + sizeof(struct ib_rmpp_hdr);
-       msg = ib_create_send_mad(&rmpp_recv->agent->agent, recv_wc->wc->src_qp,
-                                recv_wc->wc->pkey_index, rmpp_recv->ah, 1,
-                                hdr_len, sizeof(struct ib_rmpp_mad) - hdr_len,
-                                GFP_KERNEL);
-       if (!msg)
-               return;
-
-       format_ack((struct ib_rmpp_mad *) msg->mad,
-                  (struct ib_rmpp_mad *) recv_wc->recv_buf.mad, rmpp_recv);
-       ret = ib_post_send_mad(&rmpp_recv->agent->agent, &msg->send_wr,
-                              &bad_send_wr);
-       if (ret)
-               ib_free_send_mad(msg);
-}
-
  static inline int get_last_flag(struct ib_mad_recv_buf *seg)
  {
         struct ib_rmpp_mad *rmpp_mad;
@@ -559,6 +620,34 @@ static int send_next_seg(struct ib_mad_send_wr_private *mad_send_wr)
         return ib_send_mad(mad_send_wr);
  }
  
+static void abort_send(struct ib_mad_agent_private *agent, __be64 tid,
+                      u8 rmpp_status)
+{
+       struct ib_mad_send_wr_private *mad_send_wr;
+       struct ib_mad_send_wc wc;
+       unsigned long flags;
+
+       spin_lock_irqsave(&agent->lock, flags);
+       mad_send_wr = ib_find_send_mad(agent, tid);
+       if (!mad_send_wr)
+               goto out;       /* Unmatched send */
+
+       if ((mad_send_wr->last_ack == mad_send_wr->total_seg) ||
+           (!mad_send_wr->timeout) || (mad_send_wr->status != IB_WC_SUCCESS))
+               goto out;       /* Send is already done */
+
+       ib_mark_mad_done(mad_send_wr);
+       spin_unlock_irqrestore(&agent->lock, flags);
+
+       wc.status = IB_WC_REM_ABORT_ERR;
+       wc.vendor_err = rmpp_status;
+       wc.wr_id = mad_send_wr->wr_id;
+       ib_mad_complete_send_wr(mad_send_wr, &wc);
+       return;
+out:
+       spin_unlock_irqrestore(&agent->lock, flags);
+}
+
  static void process_rmpp_ack(struct ib_mad_agent_private *agent,
                              struct ib_mad_recv_wc *mad_recv_wc)
  {
@@ -568,11 +657,21 @@ static void process_rmpp_ack(struct ib_mad_agent_private *agent,
         int seg_num, newwin, ret;
  
         rmpp_mad = (struct ib_rmpp_mad *)mad_recv_wc->recv_buf.mad;
-       if (rmpp_mad->rmpp_hdr.rmpp_status)
+       if (rmpp_mad->rmpp_hdr.rmpp_status) {
+               abort_send(agent, rmpp_mad->mad_hdr.tid,
+                          IB_MGMT_RMPP_STATUS_BAD_STATUS);
+               nack_recv(agent, mad_recv_wc, IB_MGMT_RMPP_STATUS_BAD_STATUS);
                 return;
+       }
  
         seg_num = be32_to_cpu(rmpp_mad->rmpp_hdr.seg_num);
         newwin = be32_to_cpu(rmpp_mad->rmpp_hdr.paylen_newwin);
+       if (newwin < seg_num) {
+               abort_send(agent, rmpp_mad->mad_hdr.tid,
+                          IB_MGMT_RMPP_STATUS_W2S);
+               nack_recv(agent, mad_recv_wc, IB_MGMT_RMPP_STATUS_W2S);
+               return;
+       }
  
         spin_lock_irqsave(&agent->lock, flags);
         mad_send_wr = ib_find_send_mad(agent, rmpp_mad->mad_hdr.tid);
@@ -583,8 +682,13 @@ static void process_rmpp_ack(struct ib_mad_agent_private *agent,
             (!mad_send_wr->timeout) || (mad_send_wr->status != IB_WC_SUCCESS))
                 goto out;       /* Send is already done */
  
-       if (seg_num > mad_send_wr->total_seg)
-               goto out;       /* Bad ACK */
+       if (seg_num > mad_send_wr->total_seg || seg_num > mad_send_wr->newwin) {
+               spin_unlock_irqrestore(&agent->lock, flags);
+               abort_send(agent, rmpp_mad->mad_hdr.tid,
+                          IB_MGMT_RMPP_STATUS_S2B);
+               nack_recv(agent, mad_recv_wc, IB_MGMT_RMPP_STATUS_S2B);
+               return;
+       }
  
         if (newwin < mad_send_wr->newwin || seg_num < mad_send_wr->last_ack)
                 goto out;       /* Old ACK */
@@ -628,6 +732,72 @@ out:
         spin_unlock_irqrestore(&agent->lock, flags);
  }
  
+static struct ib_mad_recv_wc *
+process_rmpp_data(struct ib_mad_agent_private *agent,
+                 struct ib_mad_recv_wc *mad_recv_wc)
+{
+       struct ib_rmpp_hdr *rmpp_hdr;
+       u8 rmpp_status;
+
+       rmpp_hdr = &((struct ib_rmpp_mad *)mad_recv_wc->recv_buf.mad)->rmpp_hdr;
+
+       if (rmpp_hdr->rmpp_status) {
+               rmpp_status = IB_MGMT_RMPP_STATUS_BAD_STATUS;
+               goto bad;
+       }
+
+       if (rmpp_hdr->seg_num == __constant_htonl(1)) {
+               if (!(ib_get_rmpp_flags(rmpp_hdr) & IB_MGMT_RMPP_FLAG_FIRST)) {
+                       rmpp_status = IB_MGMT_RMPP_STATUS_BAD_SEG;
+                       goto bad;
+               }
+               return start_rmpp(agent, mad_recv_wc);
+       } else {
+               if (ib_get_rmpp_flags(rmpp_hdr) & IB_MGMT_RMPP_FLAG_FIRST) {
+                       rmpp_status = IB_MGMT_RMPP_STATUS_BAD_SEG;
+                       goto bad;
+               }
+               return continue_rmpp(agent, mad_recv_wc);
+       }
+bad:
+       nack_recv(agent, mad_recv_wc, rmpp_status);
+       ib_free_recv_mad(mad_recv_wc);
+       return NULL;
+}
+
+static void process_rmpp_stop(struct ib_mad_agent_private *agent,
+                             struct ib_mad_recv_wc *mad_recv_wc)
+{
+       struct ib_rmpp_mad *rmpp_mad;
+
+       rmpp_mad = (struct ib_rmpp_mad *)mad_recv_wc->recv_buf.mad;
+
+       if (rmpp_mad->rmpp_hdr.rmpp_status != IB_MGMT_RMPP_STATUS_RESX) {
+               abort_send(agent, rmpp_mad->mad_hdr.tid,
+                          IB_MGMT_RMPP_STATUS_BAD_STATUS);
+               nack_recv(agent, mad_recv_wc, IB_MGMT_RMPP_STATUS_BAD_STATUS);
+       } else
+               abort_send(agent, rmpp_mad->mad_hdr.tid,
+                          rmpp_mad->rmpp_hdr.rmpp_status);
+}
+
+static void process_rmpp_abort(struct ib_mad_agent_private *agent,
+                              struct ib_mad_recv_wc *mad_recv_wc)
+{
+       struct ib_rmpp_mad *rmpp_mad;
+
+       rmpp_mad = (struct ib_rmpp_mad *)mad_recv_wc->recv_buf.mad;
+
+       if (rmpp_mad->rmpp_hdr.rmpp_status < IB_MGMT_RMPP_STATUS_ABORT_MIN ||
+           rmpp_mad->rmpp_hdr.rmpp_status > IB_MGMT_RMPP_STATUS_ABORT_MAX) {
+               abort_send(agent, rmpp_mad->mad_hdr.tid,
+                          IB_MGMT_RMPP_STATUS_BAD_STATUS);
+               nack_recv(agent, mad_recv_wc, IB_MGMT_RMPP_STATUS_BAD_STATUS);
+       } else
+               abort_send(agent, rmpp_mad->mad_hdr.tid,
+                          rmpp_mad->rmpp_hdr.rmpp_status);
+}
+
  struct ib_mad_recv_wc *
  ib_process_rmpp_recv_wc(struct ib_mad_agent_private *agent,
                         struct ib_mad_recv_wc *mad_recv_wc)
@@ -638,23 +808,29 @@ ib_process_rmpp_recv_wc(struct ib_mad_agent_private *agent,
         if (!(rmpp_mad->rmpp_hdr.rmpp_rtime_flags & IB_MGMT_RMPP_FLAG_ACTIVE))
                 return mad_recv_wc;
  
-       if (rmpp_mad->rmpp_hdr.rmpp_version != IB_MGMT_RMPP_VERSION)
+       if (rmpp_mad->rmpp_hdr.rmpp_version != IB_MGMT_RMPP_VERSION) {
+               abort_send(agent, rmpp_mad->mad_hdr.tid,
+                          IB_MGMT_RMPP_STATUS_UNV);
+               nack_recv(agent, mad_recv_wc, IB_MGMT_RMPP_STATUS_UNV);
                 goto out;
+       }
  
         switch (rmpp_mad->rmpp_hdr.rmpp_type) {
         case IB_MGMT_RMPP_TYPE_DATA:
-               if (rmpp_mad->rmpp_hdr.seg_num == __constant_htonl(1))
-                       return start_rmpp(agent, mad_recv_wc);
-               else
-                       return continue_rmpp(agent, mad_recv_wc);
+               return process_rmpp_data(agent, mad_recv_wc);
         case IB_MGMT_RMPP_TYPE_ACK:
                 process_rmpp_ack(agent, mad_recv_wc);
                 break;
         case IB_MGMT_RMPP_TYPE_STOP:
+               process_rmpp_stop(agent, mad_recv_wc);
+               break;
         case IB_MGMT_RMPP_TYPE_ABORT:
-               /* TODO: process_rmpp_nack(agent, mad_recv_wc); */
+               process_rmpp_abort(agent, mad_recv_wc);
                 break;
         default:
+               abort_send(agent, rmpp_mad->mad_hdr.tid,
+                          IB_MGMT_RMPP_STATUS_BADT);
+               nack_recv(agent, mad_recv_wc, IB_MGMT_RMPP_STATUS_BADT);
                 break;
         }
  out:
@@ -714,7 +890,10 @@ int ib_process_rmpp_send_wc(struct ib_mad_send_wr_private *mad_send_wr,
         if (rmpp_mad->rmpp_hdr.rmpp_type != IB_MGMT_RMPP_TYPE_DATA) {
                 msg = (struct ib_mad_send_buf *) (unsigned long)
                       mad_send_wc->wr_id;
-               ib_free_send_mad(msg);
+               if (rmpp_mad->rmpp_hdr.rmpp_type == IB_MGMT_RMPP_TYPE_ACK)
+                       ib_free_send_mad(msg);
+               else
+                       free_msg(msg);
                 return IB_RMPP_RESULT_INTERNAL;  /* ACK, STOP, or ABORT */
         }
  
diff --git a/drivers/infiniband/core/packer.c b/drivers/infiniband/core/packer.c

index eb5ff54c10d75e758088f2e594167a18a4f3fabc..35df5010e723ecf9d466f0a27fd33f51bfe80e86 100644 (file)
--- a/drivers/infiniband/core/packer.c
+++ b/drivers/infiniband/core/packer.c
@@ -1,5 +1,6 @@
  /*
   * Copyright (c) 2004 Topspin Corporation.  All rights reserved.
+ * Copyright (c) 2005 Sun Microsystems, Inc. All rights reserved.
   *
   * This software is available to you under a choice of one of two
   * licenses.  You may choose to be licensed under the terms of the GNU
@@ -32,7 +33,7 @@
   * $Id: packer.c 1349 2004-12-16 21:09:43Z roland $
   */
  
-#include <ib_pack.h>
+#include <rdma/ib_pack.h>
  
  static u64 value_read(int offset, int size, void *structure)
  {
diff --git a/drivers/infiniband/core/sa_query.c b/drivers/infiniband/core/sa_query.c

index 795184931c83e84318a34c0dc188db4bf6a957d6..126ac80db7b84f3e71b83211188b8b8024e07912 100644 (file)
--- a/drivers/infiniband/core/sa_query.c
+++ b/drivers/infiniband/core/sa_query.c
@@ -1,6 +1,6 @@
  /*
   * Copyright (c) 2004 Topspin Communications.  All rights reserved.
- * Copyright (c) 2005 Voltaire, Inc.  All rights reserved.
+ * Copyright (c) 2005 Voltaire, Inc.  All rights reserved.
   *
   * This software is available to you under a choice of one of two
   * licenses.  You may choose to be licensed under the terms of the GNU
@@ -44,8 +44,8 @@
  #include <linux/kref.h>
  #include <linux/idr.h>
  
-#include <ib_pack.h>
-#include <ib_sa.h>
+#include <rdma/ib_pack.h>
+#include <rdma/ib_sa.h>
  
  MODULE_AUTHOR("Roland Dreier");
  MODULE_DESCRIPTION("InfiniBand subnet administration query support");
diff --git a/drivers/infiniband/core/smi.c b/drivers/infiniband/core/smi.c

index b4b284324a33a5be9356dd1f01ba1dae71b70889..35852e794e26533aa61c8c8fe3900c3b5d372eee 100644 (file)
--- a/drivers/infiniband/core/smi.c
+++ b/drivers/infiniband/core/smi.c
@@ -1,9 +1,10 @@
  /*
- * Copyright (c) 2004 Mellanox Technologies Ltd.  All rights reserved.
- * Copyright (c) 2004 Infinicon Corporation.  All rights reserved.
- * Copyright (c) 2004 Intel Corporation.  All rights reserved.
- * Copyright (c) 2004 Topspin Corporation.  All rights reserved.
- * Copyright (c) 2004 Voltaire Corporation.  All rights reserved.
+ * Copyright (c) 2004, 2005 Mellanox Technologies Ltd.  All rights reserved.
+ * Copyright (c) 2004, 2005 Infinicon Corporation.  All rights reserved.
+ * Copyright (c) 2004, 2005 Intel Corporation.  All rights reserved.
+ * Copyright (c) 2004, 2005 Topspin Corporation.  All rights reserved.
+ * Copyright (c) 2004, 2005 Voltaire Corporation.  All rights reserved.
+ * Copyright (c) 2005 Sun Microsystems, Inc. All rights reserved.
   *
   * This software is available to you under a choice of one of two
   * licenses.  You may choose to be licensed under the terms of the GNU
@@ -36,7 +37,7 @@
   * $Id: smi.c 1389 2004-12-27 22:56:47Z roland $
   */
  
-#include <ib_smi.h>
+#include <rdma/ib_smi.h>
  #include "smi.h"
  
  /*
diff --git a/drivers/infiniband/core/sysfs.c b/drivers/infiniband/core/sysfs.c

index 90d51b179abe72671603f80b5545cc919f51e841..fae1c2dcee51660650921523ae85e1c5748b26fe 100644 (file)
--- a/drivers/infiniband/core/sysfs.c
+++ b/drivers/infiniband/core/sysfs.c
@@ -1,5 +1,7 @@
  /*
   * Copyright (c) 2004, 2005 Topspin Communications.  All rights reserved.
+ * Copyright (c) 2005 Mellanox Technologies Ltd.  All rights reserved.
+ * Copyright (c) 2005 Sun Microsystems, Inc. All rights reserved.
   *
   * This software is available to you under a choice of one of two
   * licenses.  You may choose to be licensed under the terms of the GNU
@@ -34,7 +36,7 @@
  
  #include "core_priv.h"
  
-#include <ib_mad.h>
+#include <rdma/ib_mad.h>
  
  struct ib_port {
         struct kobject         kobj;
@@ -253,14 +255,14 @@ static ssize_t show_port_gid(struct ib_port *p, struct port_attribute *attr,
                 return ret;
  
         return sprintf(buf, "%04x:%04x:%04x:%04x:%04x:%04x:%04x:%04x\n",
-                      be16_to_cpu(((u16 *) gid.raw)[0]),
-                      be16_to_cpu(((u16 *) gid.raw)[1]),
-                      be16_to_cpu(((u16 *) gid.raw)[2]),
-                      be16_to_cpu(((u16 *) gid.raw)[3]),
-                      be16_to_cpu(((u16 *) gid.raw)[4]),
-                      be16_to_cpu(((u16 *) gid.raw)[5]),
-                      be16_to_cpu(((u16 *) gid.raw)[6]),
-                      be16_to_cpu(((u16 *) gid.raw)[7]));
+                      be16_to_cpu(((__be16 *) gid.raw)[0]),
+                      be16_to_cpu(((__be16 *) gid.raw)[1]),
+                      be16_to_cpu(((__be16 *) gid.raw)[2]),
+                      be16_to_cpu(((__be16 *) gid.raw)[3]),
+                      be16_to_cpu(((__be16 *) gid.raw)[4]),
+                      be16_to_cpu(((__be16 *) gid.raw)[5]),
+                      be16_to_cpu(((__be16 *) gid.raw)[6]),
+                      be16_to_cpu(((__be16 *) gid.raw)[7]));
  }
  
  static ssize_t show_port_pkey(struct ib_port *p, struct port_attribute *attr,
@@ -332,11 +334,11 @@ static ssize_t show_pma_counter(struct ib_port *p, struct port_attribute *attr,
                 break;
         case 16:
                 ret = sprintf(buf, "%u\n",
-                             be16_to_cpup((u16 *)(out_mad->data + 40 + offset / 8)));
+                             be16_to_cpup((__be16 *)(out_mad->data + 40 + offset / 8)));
                 break;
         case 32:
                 ret = sprintf(buf, "%u\n",
-                             be32_to_cpup((u32 *)(out_mad->data + 40 + offset / 8)));
+                             be32_to_cpup((__be32 *)(out_mad->data + 40 + offset / 8)));
                 break;
         default:
                 ret = 0;
@@ -598,10 +600,10 @@ static ssize_t show_sys_image_guid(struct class_device *cdev, char *buf)
                 return ret;
  
         return sprintf(buf, "%04x:%04x:%04x:%04x\n",
-                      be16_to_cpu(((u16 *) &attr.sys_image_guid)[0]),
-                      be16_to_cpu(((u16 *) &attr.sys_image_guid)[1]),
-                      be16_to_cpu(((u16 *) &attr.sys_image_guid)[2]),
-                      be16_to_cpu(((u16 *) &attr.sys_image_guid)[3]));
+                      be16_to_cpu(((__be16 *) &attr.sys_image_guid)[0]),
+                      be16_to_cpu(((__be16 *) &attr.sys_image_guid)[1]),
+                      be16_to_cpu(((__be16 *) &attr.sys_image_guid)[2]),
+                      be16_to_cpu(((__be16 *) &attr.sys_image_guid)[3]));
  }
  
  static ssize_t show_node_guid(struct class_device *cdev, char *buf)
@@ -615,10 +617,10 @@ static ssize_t show_node_guid(struct class_device *cdev, char *buf)
                 return ret;
  
         return sprintf(buf, "%04x:%04x:%04x:%04x\n",
-                      be16_to_cpu(((u16 *) &attr.node_guid)[0]),
-                      be16_to_cpu(((u16 *) &attr.node_guid)[1]),
-                      be16_to_cpu(((u16 *) &attr.node_guid)[2]),
-                      be16_to_cpu(((u16 *) &attr.node_guid)[3]));
+                      be16_to_cpu(((__be16 *) &attr.node_guid)[0]),
+                      be16_to_cpu(((__be16 *) &attr.node_guid)[1]),
+                      be16_to_cpu(((__be16 *) &attr.node_guid)[2]),
+                      be16_to_cpu(((__be16 *) &attr.node_guid)[3]));
  }
  
  static CLASS_DEVICE_ATTR(node_type, S_IRUGO, show_node_type, NULL);
diff --git a/drivers/infiniband/core/ucm.c b/drivers/infiniband/core/ucm.c

index 61d07c732f4932178235a61958e3ff7abbe301c7..79595826ccc7392aa2e9d05c1ad45454eedfccc0 100644 (file)
--- a/drivers/infiniband/core/ucm.c
+++ b/drivers/infiniband/core/ucm.c
@@ -1,5 +1,6 @@
  /*
   * Copyright (c) 2005 Topspin Communications.  All rights reserved.
+ * Copyright (c) 2005 Intel Corporation.  All rights reserved.
   *
   * This software is available to you under a choice of one of two
   * licenses.  You may choose to be licensed under the terms of the GNU
@@ -73,14 +74,18 @@ static struct semaphore ctx_id_mutex;
  static struct idr       ctx_id_table;
  static int              ctx_id_rover = 0;
  
-static struct ib_ucm_context *ib_ucm_ctx_get(int id)
+static struct ib_ucm_context *ib_ucm_ctx_get(struct ib_ucm_file *file, int id)
  {
         struct ib_ucm_context *ctx;
  
         down(&ctx_id_mutex);
         ctx = idr_find(&ctx_id_table, id);
-       if (ctx)
-               ctx->ref++;
+       if (!ctx)
+               ctx = ERR_PTR(-ENOENT);
+       else if (ctx->file != file)
+               ctx = ERR_PTR(-EINVAL);
+       else
+               atomic_inc(&ctx->ref);
         up(&ctx_id_mutex);
  
         return ctx;
@@ -88,21 +93,37 @@ static struct ib_ucm_context *ib_ucm_ctx_get(int id)
  
  static void ib_ucm_ctx_put(struct ib_ucm_context *ctx)
  {
+       if (atomic_dec_and_test(&ctx->ref))
+               wake_up(&ctx->wait);
+}
+
+static ssize_t ib_ucm_destroy_ctx(struct ib_ucm_file *file, int id)
+{
+       struct ib_ucm_context *ctx;
         struct ib_ucm_event *uevent;
  
         down(&ctx_id_mutex);
-
-       ctx->ref--;
-       if (!ctx->ref)
+       ctx = idr_find(&ctx_id_table, id);
+       if (!ctx)
+               ctx = ERR_PTR(-ENOENT);
+       else if (ctx->file != file)
+               ctx = ERR_PTR(-EINVAL);
+       else
                 idr_remove(&ctx_id_table, ctx->id);
-
         up(&ctx_id_mutex);
  
-       if (ctx->ref)
-               return;
+       if (IS_ERR(ctx))
+               return PTR_ERR(ctx);
  
-       down(&ctx->file->mutex);
+       atomic_dec(&ctx->ref);
+       wait_event(ctx->wait, !atomic_read(&ctx->ref));
+
+       /* No new events will be generated after destroying the cm_id. */
+       if (!IS_ERR(ctx->cm_id))
+               ib_destroy_cm_id(ctx->cm_id);
  
+       /* Cleanup events not yet reported to the user. */
+       down(&file->mutex);
         list_del(&ctx->file_list);
         while (!list_empty(&ctx->events)) {
  
@@ -117,13 +138,10 @@ static void ib_ucm_ctx_put(struct ib_ucm_context *ctx)
  
                 kfree(uevent);
         }
+       up(&file->mutex);
  
-       up(&ctx->file->mutex);
-
-       ucm_dbg("Destroyed CM ID <%d>\n", ctx->id);
-
-       ib_destroy_cm_id(ctx->cm_id);
         kfree(ctx);
+       return 0;
  }
  
  static struct ib_ucm_context *ib_ucm_ctx_alloc(struct ib_ucm_file *file)
@@ -135,11 +153,11 @@ static struct ib_ucm_context *ib_ucm_ctx_alloc(struct ib_ucm_file *file)
         if (!ctx)
                 return NULL;
  
-       ctx->ref  = 1; /* user reference */
+       atomic_set(&ctx->ref, 1);
+       init_waitqueue_head(&ctx->wait);
         ctx->file = file;
  
         INIT_LIST_HEAD(&ctx->events);
-       init_MUTEX(&ctx->mutex);
  
         list_add_tail(&ctx->file_list, &file->ctxs);
  
@@ -177,8 +195,8 @@ static void ib_ucm_event_path_get(struct ib_ucm_path_rec *upath,
         if (!kpath || !upath)
                 return;
  
-       memcpy(upath->dgid, kpath->dgid.raw, sizeof(union ib_gid));
-       memcpy(upath->sgid, kpath->sgid.raw, sizeof(union ib_gid));
+       memcpy(upath->dgid, kpath->dgid.raw, sizeof *upath->dgid);
+       memcpy(upath->sgid, kpath->sgid.raw, sizeof *upath->sgid);
  
         upath->dlid             = kpath->dlid;
         upath->slid             = kpath->slid;
@@ -201,10 +219,11 @@ static void ib_ucm_event_path_get(struct ib_ucm_path_rec *upath,
                 kpath->packet_life_time_selector;
  }
  
-static void ib_ucm_event_req_get(struct ib_ucm_req_event_resp *ureq,
+static void ib_ucm_event_req_get(struct ib_ucm_context *ctx,
+                                struct ib_ucm_req_event_resp *ureq,
                                  struct ib_cm_req_event_param *kreq)
  {
-       ureq->listen_id = (long)kreq->listen_id->context;
+       ureq->listen_id = ctx->id;
  
         ureq->remote_ca_guid             = kreq->remote_ca_guid;
         ureq->remote_qkey                = kreq->remote_qkey;
@@ -240,34 +259,11 @@ static void ib_ucm_event_rep_get(struct ib_ucm_rep_event_resp *urep,
         urep->srq                 = krep->srq;
  }
  
-static void ib_ucm_event_rej_get(struct ib_ucm_rej_event_resp *urej,
-                                struct ib_cm_rej_event_param *krej)
-{
-       urej->reason = krej->reason;
-}
-
-static void ib_ucm_event_mra_get(struct ib_ucm_mra_event_resp *umra,
-                                struct ib_cm_mra_event_param *kmra)
-{
-       umra->timeout = kmra->service_timeout;
-}
-
-static void ib_ucm_event_lap_get(struct ib_ucm_lap_event_resp *ulap,
-                                struct ib_cm_lap_event_param *klap)
-{
-       ib_ucm_event_path_get(&ulap->path, klap->alternate_path);
-}
-
-static void ib_ucm_event_apr_get(struct ib_ucm_apr_event_resp *uapr,
-                                struct ib_cm_apr_event_param *kapr)
-{
-       uapr->status = kapr->ap_status;
-}
-
-static void ib_ucm_event_sidr_req_get(struct ib_ucm_sidr_req_event_resp *ureq,
+static void ib_ucm_event_sidr_req_get(struct ib_ucm_context *ctx,
+                                     struct ib_ucm_sidr_req_event_resp *ureq,
                                       struct ib_cm_sidr_req_event_param *kreq)
  {
-       ureq->listen_id = (long)kreq->listen_id->context;
+       ureq->listen_id = ctx->id;
         ureq->pkey      = kreq->pkey;
  }
  
@@ -279,19 +275,18 @@ static void ib_ucm_event_sidr_rep_get(struct ib_ucm_sidr_rep_event_resp *urep,
         urep->qpn    = krep->qpn;
  };
  
-static int ib_ucm_event_process(struct ib_cm_event *evt,
+static int ib_ucm_event_process(struct ib_ucm_context *ctx,
+                               struct ib_cm_event *evt,
                                 struct ib_ucm_event *uvt)
  {
         void *info = NULL;
-       int result;
  
         switch (evt->event) {
         case IB_CM_REQ_RECEIVED:
-               ib_ucm_event_req_get(&uvt->resp.u.req_resp,
+               ib_ucm_event_req_get(ctx, &uvt->resp.u.req_resp,
                                      &evt->param.req_rcvd);
                 uvt->data_len      = IB_CM_REQ_PRIVATE_DATA_SIZE;
-               uvt->resp.present |= (evt->param.req_rcvd.primary_path ?
-                                     IB_UCM_PRES_PRIMARY : 0);
+               uvt->resp.present  = IB_UCM_PRES_PRIMARY;
                 uvt->resp.present |= (evt->param.req_rcvd.alternate_path ?
                                       IB_UCM_PRES_ALTERNATE : 0);
                 break;
@@ -299,57 +294,46 @@ static int ib_ucm_event_process(struct ib_cm_event *evt,
                 ib_ucm_event_rep_get(&uvt->resp.u.rep_resp,
                                      &evt->param.rep_rcvd);
                 uvt->data_len = IB_CM_REP_PRIVATE_DATA_SIZE;
-
                 break;
         case IB_CM_RTU_RECEIVED:
                 uvt->data_len = IB_CM_RTU_PRIVATE_DATA_SIZE;
                 uvt->resp.u.send_status = evt->param.send_status;
-
                 break;
         case IB_CM_DREQ_RECEIVED:
                 uvt->data_len = IB_CM_DREQ_PRIVATE_DATA_SIZE;
                 uvt->resp.u.send_status = evt->param.send_status;
-
                 break;
         case IB_CM_DREP_RECEIVED:
                 uvt->data_len = IB_CM_DREP_PRIVATE_DATA_SIZE;
                 uvt->resp.u.send_status = evt->param.send_status;
-
                 break;
         case IB_CM_MRA_RECEIVED:
-               ib_ucm_event_mra_get(&uvt->resp.u.mra_resp,
-                                    &evt->param.mra_rcvd);
+               uvt->resp.u.mra_resp.timeout =
+                                       evt->param.mra_rcvd.service_timeout;
                 uvt->data_len = IB_CM_MRA_PRIVATE_DATA_SIZE;
-
                 break;
         case IB_CM_REJ_RECEIVED:
-               ib_ucm_event_rej_get(&uvt->resp.u.rej_resp,
-                                    &evt->param.rej_rcvd);
+               uvt->resp.u.rej_resp.reason = evt->param.rej_rcvd.reason;
                 uvt->data_len = IB_CM_REJ_PRIVATE_DATA_SIZE;
                 uvt->info_len = evt->param.rej_rcvd.ari_length;
                 info          = evt->param.rej_rcvd.ari;
-
                 break;
         case IB_CM_LAP_RECEIVED:
-               ib_ucm_event_lap_get(&uvt->resp.u.lap_resp,
-                                    &evt->param.lap_rcvd);
+               ib_ucm_event_path_get(&uvt->resp.u.lap_resp.path,
+                                     evt->param.lap_rcvd.alternate_path);
                 uvt->data_len = IB_CM_LAP_PRIVATE_DATA_SIZE;
-               uvt->resp.present |= (evt->param.lap_rcvd.alternate_path ?
-                                     IB_UCM_PRES_ALTERNATE : 0);
+               uvt->resp.present = IB_UCM_PRES_ALTERNATE;
                 break;
         case IB_CM_APR_RECEIVED:
-               ib_ucm_event_apr_get(&uvt->resp.u.apr_resp,
-                                    &evt->param.apr_rcvd);
+               uvt->resp.u.apr_resp.status = evt->param.apr_rcvd.ap_status;
                 uvt->data_len = IB_CM_APR_PRIVATE_DATA_SIZE;
                 uvt->info_len = evt->param.apr_rcvd.info_len;
                 info          = evt->param.apr_rcvd.apr_info;
-
                 break;
         case IB_CM_SIDR_REQ_RECEIVED:
-               ib_ucm_event_sidr_req_get(&uvt->resp.u.sidr_req_resp,
+               ib_ucm_event_sidr_req_get(ctx, &uvt->resp.u.sidr_req_resp,
                                           &evt->param.sidr_req_rcvd);
                 uvt->data_len = IB_CM_SIDR_REQ_PRIVATE_DATA_SIZE;
-
                 break;
         case IB_CM_SIDR_REP_RECEIVED:
                 ib_ucm_event_sidr_rep_get(&uvt->resp.u.sidr_rep_resp,
@@ -357,43 +341,35 @@ static int ib_ucm_event_process(struct ib_cm_event *evt,
                 uvt->data_len = IB_CM_SIDR_REP_PRIVATE_DATA_SIZE;
                 uvt->info_len = evt->param.sidr_rep_rcvd.info_len;
                 info          = evt->param.sidr_rep_rcvd.info;
-
                 break;
         default:
                 uvt->resp.u.send_status = evt->param.send_status;
-
                 break;
         }
  
-       if (uvt->data_len && evt->private_data) {
-
+       if (uvt->data_len) {
                 uvt->data = kmalloc(uvt->data_len, GFP_KERNEL);
-               if (!uvt->data) {
-                       result = -ENOMEM;
-                       goto error;
-               }
+               if (!uvt->data)
+                       goto err1;
  
                 memcpy(uvt->data, evt->private_data, uvt->data_len);
                 uvt->resp.present |= IB_UCM_PRES_DATA;
         }
  
-       if (uvt->info_len && info) {
-
+       if (uvt->info_len) {
                 uvt->info = kmalloc(uvt->info_len, GFP_KERNEL);
-               if (!uvt->info) {
-                       result = -ENOMEM;
-                       goto error;
-               }
+               if (!uvt->info)
+                       goto err2;
  
                 memcpy(uvt->info, info, uvt->info_len);
                 uvt->resp.present |= IB_UCM_PRES_INFO;
         }
-
         return 0;
-error:
-       kfree(uvt->info);
+
+err2:
         kfree(uvt->data);
-       return result;
+err1:
+       return -ENOMEM;
  }
  
  static int ib_ucm_event_handler(struct ib_cm_id *cm_id,
@@ -403,63 +379,42 @@ static int ib_ucm_event_handler(struct ib_cm_id *cm_id,
         struct ib_ucm_context *ctx;
         int result = 0;
         int id;
-       /*
-        * lookup correct context based on event type.
-        */
-       switch (event->event) {
-       case IB_CM_REQ_RECEIVED:
-               id = (long)event->param.req_rcvd.listen_id->context;
-               break;
-       case IB_CM_SIDR_REQ_RECEIVED:
-               id = (long)event->param.sidr_req_rcvd.listen_id->context;
-               break;
-       default:
-               id = (long)cm_id->context;
-               break;
-       }
  
-       ucm_dbg("Event. CM ID <%d> event <%d>\n", id, event->event);
-
-       ctx = ib_ucm_ctx_get(id);
-       if (!ctx)
-               return -ENOENT;
+       ctx = cm_id->context;
  
         if (event->event == IB_CM_REQ_RECEIVED ||
             event->event == IB_CM_SIDR_REQ_RECEIVED)
                 id = IB_UCM_CM_ID_INVALID;
+       else
+               id = ctx->id;
  
         uevent = kmalloc(sizeof(*uevent), GFP_KERNEL);
-       if (!uevent) {
-               result = -ENOMEM;
-               goto done;
-       }
+       if (!uevent)
+               goto err1;
  
         memset(uevent, 0, sizeof(*uevent));
-
         uevent->resp.id    = id;
         uevent->resp.event = event->event;
  
-       result = ib_ucm_event_process(event, uevent);
+       result = ib_ucm_event_process(ctx, event, uevent);
         if (result)
-               goto done;
+               goto err2;
  
         uevent->ctx   = ctx;
-       uevent->cm_id = ((event->event == IB_CM_REQ_RECEIVED ||
-                         event->event == IB_CM_SIDR_REQ_RECEIVED ) ?
-                        cm_id : NULL);
+       uevent->cm_id = (id == IB_UCM_CM_ID_INVALID) ? cm_id : NULL;
  
         down(&ctx->file->mutex);
-
         list_add_tail(&uevent->file_list, &ctx->file->events);
         list_add_tail(&uevent->ctx_list, &ctx->events);
-
         wake_up_interruptible(&ctx->file->poll_wait);
-
         up(&ctx->file->mutex);
-done:
-       ctx->error = result;
-       ib_ucm_ctx_put(ctx); /* func reference */
-       return result;
+       return 0;
+
+err2:
+       kfree(uevent);
+err1:
+       /* Destroy new cm_id's */
+       return (id == IB_UCM_CM_ID_INVALID);
  }
  
  static ssize_t ib_ucm_event(struct ib_ucm_file *file,
@@ -517,9 +472,8 @@ static ssize_t ib_ucm_event(struct ib_ucm_file *file,
                 goto done;
         }
  
-       ctx->cm_id             = uevent->cm_id;
-       ctx->cm_id->cm_handler = ib_ucm_event_handler;
-       ctx->cm_id->context    = (void *)(unsigned long)ctx->id;
+       ctx->cm_id          = uevent->cm_id;
+       ctx->cm_id->context = ctx;
  
         uevent->resp.id = ctx->id;
  
@@ -585,30 +539,29 @@ static ssize_t ib_ucm_create_id(struct ib_ucm_file *file,
         if (copy_from_user(&cmd, inbuf, sizeof(cmd)))
                 return -EFAULT;
  
+       down(&file->mutex);
         ctx = ib_ucm_ctx_alloc(file);
+       up(&file->mutex);
         if (!ctx)
                 return -ENOMEM;
  
-       ctx->cm_id = ib_create_cm_id(ib_ucm_event_handler,
-                                    (void *)(unsigned long)ctx->id);
-       if (!ctx->cm_id) {
-               result = -ENOMEM;
-               goto err_cm;
+       ctx->cm_id = ib_create_cm_id(ib_ucm_event_handler, ctx);
+       if (IS_ERR(ctx->cm_id)) {
+               result = PTR_ERR(ctx->cm_id);
+               goto err;
         }
  
         resp.id = ctx->id;
         if (copy_to_user((void __user *)(unsigned long)cmd.response,
                          &resp, sizeof(resp))) {
                 result = -EFAULT;
-               goto err_ret;
+               goto err;
         }
  
         return 0;
-err_ret:
-       ib_destroy_cm_id(ctx->cm_id);
-err_cm:
-       ib_ucm_ctx_put(ctx); /* user reference */
  
+err:
+       ib_ucm_destroy_ctx(file, ctx->id);
         return result;
  }
  
@@ -617,19 +570,11 @@ static ssize_t ib_ucm_destroy_id(struct ib_ucm_file *file,
                                  int in_len, int out_len)
  {
         struct ib_ucm_destroy_id cmd;
-       struct ib_ucm_context *ctx;
  
         if (copy_from_user(&cmd, inbuf, sizeof(cmd)))
                 return -EFAULT;
  
-       ctx = ib_ucm_ctx_get(cmd.id);
-       if (!ctx)
-               return -ENOENT;
-
-       ib_ucm_ctx_put(ctx); /* user reference */
-       ib_ucm_ctx_put(ctx); /* func reference */
-
-       return 0;
+       return ib_ucm_destroy_ctx(file, cmd.id);
  }
  
  static ssize_t ib_ucm_attr_id(struct ib_ucm_file *file,
@@ -647,15 +592,9 @@ static ssize_t ib_ucm_attr_id(struct ib_ucm_file *file,
         if (copy_from_user(&cmd, inbuf, sizeof(cmd)))
                 return -EFAULT;
  
-       ctx = ib_ucm_ctx_get(cmd.id);
-       if (!ctx)
-               return -ENOENT;
-
-       down(&ctx->file->mutex);
-       if (ctx->file != file) {
-               result = -EINVAL;
-               goto done;
-       }
+       ctx = ib_ucm_ctx_get(file, cmd.id);
+       if (IS_ERR(ctx))
+               return PTR_ERR(ctx);
  
         resp.service_id   = ctx->cm_id->service_id;
         resp.service_mask = ctx->cm_id->service_mask;
@@ -666,9 +605,7 @@ static ssize_t ib_ucm_attr_id(struct ib_ucm_file *file,
                          &resp, sizeof(resp)))
                 result = -EFAULT;
  
-done:
-       up(&ctx->file->mutex);
-       ib_ucm_ctx_put(ctx); /* func reference */
+       ib_ucm_ctx_put(ctx);
         return result;
  }
  
@@ -683,19 +620,12 @@ static ssize_t ib_ucm_listen(struct ib_ucm_file *file,
         if (copy_from_user(&cmd, inbuf, sizeof(cmd)))
                 return -EFAULT;
  
-       ctx = ib_ucm_ctx_get(cmd.id);
-       if (!ctx)
-               return -ENOENT;
+       ctx = ib_ucm_ctx_get(file, cmd.id);
+       if (IS_ERR(ctx))
+               return PTR_ERR(ctx);
  
-       down(&ctx->file->mutex);
-       if (ctx->file != file)
-               result = -EINVAL;
-       else
-               result = ib_cm_listen(ctx->cm_id, cmd.service_id,
-                                     cmd.service_mask);
-
-       up(&ctx->file->mutex);
-       ib_ucm_ctx_put(ctx); /* func reference */
+       result = ib_cm_listen(ctx->cm_id, cmd.service_id, cmd.service_mask);
+       ib_ucm_ctx_put(ctx);
         return result;
  }
  
@@ -710,18 +640,12 @@ static ssize_t ib_ucm_establish(struct ib_ucm_file *file,
         if (copy_from_user(&cmd, inbuf, sizeof(cmd)))
                 return -EFAULT;
  
-       ctx = ib_ucm_ctx_get(cmd.id);
-       if (!ctx)
-               return -ENOENT;
-
-       down(&ctx->file->mutex);
-       if (ctx->file != file)
-               result = -EINVAL;
-       else
-               result = ib_cm_establish(ctx->cm_id);
+       ctx = ib_ucm_ctx_get(file, cmd.id);
+       if (IS_ERR(ctx))
+               return PTR_ERR(ctx);
  
-       up(&ctx->file->mutex);
-       ib_ucm_ctx_put(ctx); /* func reference */
+       result = ib_cm_establish(ctx->cm_id);
+       ib_ucm_ctx_put(ctx);
         return result;
  }
  
@@ -768,8 +692,8 @@ static int ib_ucm_path_get(struct ib_sa_path_rec **path, u64 src)
                 return -EFAULT;
         }
  
-       memcpy(sa_path->dgid.raw, ucm_path.dgid, sizeof(union ib_gid));
-       memcpy(sa_path->sgid.raw, ucm_path.sgid, sizeof(union ib_gid));
+       memcpy(sa_path->dgid.raw, ucm_path.dgid, sizeof sa_path->dgid);
+       memcpy(sa_path->sgid.raw, ucm_path.sgid, sizeof sa_path->sgid);
  
         sa_path->dlid             = ucm_path.dlid;
         sa_path->slid             = ucm_path.slid;
@@ -839,25 +763,17 @@ static ssize_t ib_ucm_send_req(struct ib_ucm_file *file,
         param.max_cm_retries             = cmd.max_cm_retries;
         param.srq                        = cmd.srq;
  
-       ctx = ib_ucm_ctx_get(cmd.id);
-       if (!ctx) {
-               result = -ENOENT;
-               goto done;
-       }
-
-       down(&ctx->file->mutex);
-       if (ctx->file != file)
-               result = -EINVAL;
-       else
+       ctx = ib_ucm_ctx_get(file, cmd.id);
+       if (!IS_ERR(ctx)) {
                 result = ib_send_cm_req(ctx->cm_id, &param);
+               ib_ucm_ctx_put(ctx);
+       } else
+               result = PTR_ERR(ctx);
  
-       up(&ctx->file->mutex);
-       ib_ucm_ctx_put(ctx); /* func reference */
  done:
         kfree(param.private_data);
         kfree(param.primary_path);
         kfree(param.alternate_path);
-
         return result;
  }
  
@@ -890,23 +806,14 @@ static ssize_t ib_ucm_send_rep(struct ib_ucm_file *file,
         param.rnr_retry_count     = cmd.rnr_retry_count;
         param.srq                 = cmd.srq;
  
-       ctx = ib_ucm_ctx_get(cmd.id);
-       if (!ctx) {
-               result = -ENOENT;
-               goto done;
-       }
-
-       down(&ctx->file->mutex);
-       if (ctx->file != file)
-               result = -EINVAL;
-       else
+       ctx = ib_ucm_ctx_get(file, cmd.id);
+       if (!IS_ERR(ctx)) {
                 result = ib_send_cm_rep(ctx->cm_id, &param);
+               ib_ucm_ctx_put(ctx);
+       } else
+               result = PTR_ERR(ctx);
  
-       up(&ctx->file->mutex);
-       ib_ucm_ctx_put(ctx); /* func reference */
-done:
         kfree(param.private_data);
-
         return result;
  }
  
@@ -928,23 +835,14 @@ static ssize_t ib_ucm_send_private_data(struct ib_ucm_file *file,
         if (result)
                 return result;
  
-       ctx = ib_ucm_ctx_get(cmd.id);
-       if (!ctx) {
-               result = -ENOENT;
-               goto done;
-       }
-
-       down(&ctx->file->mutex);
-       if (ctx->file != file)
-               result = -EINVAL;
-       else
+       ctx = ib_ucm_ctx_get(file, cmd.id);
+       if (!IS_ERR(ctx)) {
                 result = func(ctx->cm_id, private_data, cmd.len);
+               ib_ucm_ctx_put(ctx);
+       } else
+               result = PTR_ERR(ctx);
  
-       up(&ctx->file->mutex);
-       ib_ucm_ctx_put(ctx); /* func reference */
-done:
         kfree(private_data);
-
         return result;
  }
  
@@ -995,26 +893,17 @@ static ssize_t ib_ucm_send_info(struct ib_ucm_file *file,
         if (result)
                 goto done;
  
-       ctx = ib_ucm_ctx_get(cmd.id);
-       if (!ctx) {
-               result = -ENOENT;
-               goto done;
-       }
-
-       down(&ctx->file->mutex);
-       if (ctx->file != file)
-               result = -EINVAL;
-       else
-               result = func(ctx->cm_id, cmd.status,
-                             info, cmd.info_len,
+       ctx = ib_ucm_ctx_get(file, cmd.id);
+       if (!IS_ERR(ctx)) {
+               result = func(ctx->cm_id, cmd.status, info, cmd.info_len,
                               data, cmd.data_len);
+               ib_ucm_ctx_put(ctx);
+       } else
+               result = PTR_ERR(ctx);
  
-       up(&ctx->file->mutex);
-       ib_ucm_ctx_put(ctx); /* func reference */
  done:
         kfree(data);
         kfree(info);
-
         return result;
  }
  
@@ -1048,24 +937,14 @@ static ssize_t ib_ucm_send_mra(struct ib_ucm_file *file,
         if (result)
                 return result;
  
-       ctx = ib_ucm_ctx_get(cmd.id);
-       if (!ctx) {
-               result = -ENOENT;
-               goto done;
-       }
+       ctx = ib_ucm_ctx_get(file, cmd.id);
+       if (!IS_ERR(ctx)) {
+               result = ib_send_cm_mra(ctx->cm_id, cmd.timeout, data, cmd.len);
+               ib_ucm_ctx_put(ctx);
+       } else
+               result = PTR_ERR(ctx);
  
-       down(&ctx->file->mutex);
-       if (ctx->file != file)
-               result = -EINVAL;
-       else
-               result = ib_send_cm_mra(ctx->cm_id, cmd.timeout,
-                                       data, cmd.len);
-
-       up(&ctx->file->mutex);
-       ib_ucm_ctx_put(ctx); /* func reference */
-done:
         kfree(data);
-
         return result;
  }
  
@@ -1090,24 +969,16 @@ static ssize_t ib_ucm_send_lap(struct ib_ucm_file *file,
         if (result)
                 goto done;
  
-       ctx = ib_ucm_ctx_get(cmd.id);
-       if (!ctx) {
-               result = -ENOENT;
-               goto done;
-       }
-
-       down(&ctx->file->mutex);
-       if (ctx->file != file)
-               result = -EINVAL;
-       else
+       ctx = ib_ucm_ctx_get(file, cmd.id);
+       if (!IS_ERR(ctx)) {
                 result = ib_send_cm_lap(ctx->cm_id, path, data, cmd.len);
+               ib_ucm_ctx_put(ctx);
+       } else
+               result = PTR_ERR(ctx);
  
-       up(&ctx->file->mutex);
-       ib_ucm_ctx_put(ctx); /* func reference */
  done:
         kfree(data);
         kfree(path);
-
         return result;
  }
  
@@ -1140,24 +1011,16 @@ static ssize_t ib_ucm_send_sidr_req(struct ib_ucm_file *file,
         param.max_cm_retries   = cmd.max_cm_retries;
         param.pkey             = cmd.pkey;
  
-       ctx = ib_ucm_ctx_get(cmd.id);
-       if (!ctx) {
-               result = -ENOENT;
-               goto done;
-       }
-
-       down(&ctx->file->mutex);
-       if (ctx->file != file)
-               result = -EINVAL;
-       else
+       ctx = ib_ucm_ctx_get(file, cmd.id);
+       if (!IS_ERR(ctx)) {
                 result = ib_send_cm_sidr_req(ctx->cm_id, &param);
+               ib_ucm_ctx_put(ctx);
+       } else
+               result = PTR_ERR(ctx);
  
-       up(&ctx->file->mutex);
-       ib_ucm_ctx_put(ctx); /* func reference */
  done:
         kfree(param.private_data);
         kfree(param.path);
-
         return result;
  }
  
@@ -1184,30 +1047,22 @@ static ssize_t ib_ucm_send_sidr_rep(struct ib_ucm_file *file,
         if (result)
                 goto done;
  
-       param.qp_num       = cmd.qpn;
-       param.qkey           = cmd.qkey;
-       param.status       = cmd.status;
-       param.info_length      = cmd.info_len;
-       param.private_data_len = cmd.data_len;
-
-       ctx = ib_ucm_ctx_get(cmd.id);
-       if (!ctx) {
-               result = -ENOENT;
-               goto done;
-       }
+       param.qp_num            = cmd.qpn;
+       param.qkey              = cmd.qkey;
+       param.status            = cmd.status;
+       param.info_length       = cmd.info_len;
+       param.private_data_len  = cmd.data_len;
  
-       down(&ctx->file->mutex);
-       if (ctx->file != file)
-               result = -EINVAL;
-       else
+       ctx = ib_ucm_ctx_get(file, cmd.id);
+       if (!IS_ERR(ctx)) {
                 result = ib_send_cm_sidr_rep(ctx->cm_id, &param);
+               ib_ucm_ctx_put(ctx);
+       } else
+               result = PTR_ERR(ctx);
  
-       up(&ctx->file->mutex);
-       ib_ucm_ctx_put(ctx); /* func reference */
  done:
         kfree(param.private_data);
         kfree(param.info);
-
         return result;
  }
  
@@ -1305,22 +1160,17 @@ static int ib_ucm_close(struct inode *inode, struct file *filp)
         struct ib_ucm_context *ctx;
  
         down(&file->mutex);
-
         while (!list_empty(&file->ctxs)) {
  
                 ctx = list_entry(file->ctxs.next,
                                  struct ib_ucm_context, file_list);
  
-               up(&ctx->file->mutex);
-               ib_ucm_ctx_put(ctx); /* user reference */
+               up(&file->mutex);
+               ib_ucm_destroy_ctx(file, ctx->id);
                 down(&file->mutex);
         }
-
         up(&file->mutex);
-
         kfree(file);
-
-       ucm_dbg("Deleted struct\n");
         return 0;
  }
  
diff --git a/drivers/infiniband/core/ucm.h b/drivers/infiniband/core/ucm.h

index 6d36606151b2690abfaf269fffe8ffc4a6d2ac43..c8819b928a1ba82772d0ae48761ee41e7ebc6f4d 100644 (file)
--- a/drivers/infiniband/core/ucm.h
+++ b/drivers/infiniband/core/ucm.h
@@ -40,17 +40,15 @@
  #include <linux/cdev.h>
  #include <linux/idr.h>
  
-#include <ib_cm.h>
-#include <ib_user_cm.h>
+#include <rdma/ib_cm.h>
+#include <rdma/ib_user_cm.h>
  
  #define IB_UCM_CM_ID_INVALID 0xffffffff
  
  struct ib_ucm_file {
         struct semaphore mutex;
         struct file *filp;
-       /*
-        * list of pending events
-        */
+
         struct list_head  ctxs;   /* list of active connections */
         struct list_head  events; /* list of pending events */
         wait_queue_head_t poll_wait;
@@ -58,12 +56,11 @@ struct ib_ucm_file {
  
  struct ib_ucm_context {
         int                 id;
-       int                 ref;
-       int                 error;
+       wait_queue_head_t   wait;
+       atomic_t            ref;
  
         struct ib_ucm_file *file;
         struct ib_cm_id    *cm_id;
-       struct semaphore    mutex;
  
         struct list_head    events;    /* list of pending events. */
         struct list_head    file_list; /* member in file ctx list */
diff --git a/drivers/infiniband/core/ud_header.c b/drivers/infiniband/core/ud_header.c

index dc4eb1db5e962a8d3541ccd7f273d1cad5684328..527b23450ab3d72dc12e3298e8bbdc49a871e12b 100644 (file)
--- a/drivers/infiniband/core/ud_header.c
+++ b/drivers/infiniband/core/ud_header.c
@@ -1,5 +1,6 @@
  /*
   * Copyright (c) 2004 Topspin Corporation.  All rights reserved.
+ * Copyright (c) 2005 Sun Microsystems, Inc. All rights reserved.
   *
   * This software is available to you under a choice of one of two
   * licenses.  You may choose to be licensed under the terms of the GNU
@@ -34,7 +35,7 @@
  
  #include <linux/errno.h>
  
-#include <ib_pack.h>
+#include <rdma/ib_pack.h>
  
  #define STRUCT_FIELD(header, field) \
         .struct_offset_bytes = offsetof(struct ib_unpacked_ ## header, field),      \
@@ -194,6 +195,7 @@ void ib_ud_header_init(int                      payload_bytes,
                        struct ib_ud_header *header)
  {
         int header_len;
+       u16 packet_length;
  
         memset(header, 0, sizeof *header);
  
@@ -208,7 +210,7 @@ void ib_ud_header_init(int                      payload_bytes,
         header->lrh.link_version     = 0;
         header->lrh.link_next_header =
                 grh_present ? IB_LNH_IBA_GLOBAL : IB_LNH_IBA_LOCAL;
-       header->lrh.packet_length    = (IB_LRH_BYTES     +
+       packet_length                = (IB_LRH_BYTES     +
                                         IB_BTH_BYTES     +
                                         IB_DETH_BYTES    +
                                         payload_bytes    +
@@ -217,8 +219,7 @@ void ib_ud_header_init(int                      payload_bytes,
  
         header->grh_present          = grh_present;
         if (grh_present) {
-               header->lrh.packet_length  += IB_GRH_BYTES / 4;
-
+               packet_length              += IB_GRH_BYTES / 4;
                 header->grh.ip_version      = 6;
                 header->grh.payload_length  =
                         cpu_to_be16((IB_BTH_BYTES     +
@@ -229,7 +230,7 @@ void ib_ud_header_init(int                      payload_bytes,
                 header->grh.next_header     = 0x1b;
         }
  
-       cpu_to_be16s(&header->lrh.packet_length);
+       header->lrh.packet_length = cpu_to_be16(packet_length);
  
         if (header->immediate_present)
                 header->bth.opcode           = IB_OPCODE_UD_SEND_ONLY_WITH_IMMEDIATE;
diff --git a/drivers/infiniband/core/user_mad.c b/drivers/infiniband/core/user_mad.c

index 2e38792df533568c4e25d6e5a4dfd0469d3cf0c3..7c2f03057ddb1f6ed411d3509c68ba768e4bf0ad 100644 (file)
--- a/drivers/infiniband/core/user_mad.c
+++ b/drivers/infiniband/core/user_mad.c
@@ -1,6 +1,6 @@
  /*
   * Copyright (c) 2004 Topspin Communications.  All rights reserved.
- * Copyright (c) 2005 Voltaire, Inc. All rights reserved.
+ * Copyright (c) 2005 Voltaire, Inc. All rights reserved. 
   * Copyright (c) 2005 Sun Microsystems, Inc. All rights reserved.
   *
   * This software is available to you under a choice of one of two
@@ -49,8 +49,8 @@
  #include <asm/uaccess.h>
  #include <asm/semaphore.h>
  
-#include <ib_mad.h>
-#include <ib_user_mad.h>
+#include <rdma/ib_mad.h>
+#include <rdma/ib_user_mad.h>
  
  MODULE_AUTHOR("Roland Dreier");
  MODULE_DESCRIPTION("InfiniBand userspace MAD packet access");
@@ -271,7 +271,7 @@ static ssize_t ib_umad_write(struct file *filp, const char __user *buf,
         struct ib_send_wr *bad_wr;
         struct ib_rmpp_mad *rmpp_mad;
         u8 method;
-       u64 *tid;
+       __be64 *tid;
         int ret, length, hdr_len, data_len, rmpp_hdr_size;
         int rmpp_active = 0;
  
@@ -316,7 +316,7 @@ static ssize_t ib_umad_write(struct file *filp, const char __user *buf,
         if (packet->mad.hdr.grh_present) {
                 ah_attr.ah_flags = IB_AH_GRH;
                 memcpy(ah_attr.grh.dgid.raw, packet->mad.hdr.gid, 16);
-               ah_attr.grh.flow_label     = packet->mad.hdr.flow_label;
+               ah_attr.grh.flow_label     = be32_to_cpu(packet->mad.hdr.flow_label);
                 ah_attr.grh.hop_limit      = packet->mad.hdr.hop_limit;
                 ah_attr.grh.traffic_class  = packet->mad.hdr.traffic_class;
         }
diff --git a/drivers/infiniband/core/uverbs.h b/drivers/infiniband/core/uverbs.h

index 7696022f9a4ec3365fa574a88239b5dfd2c3d515..180b3d4765e40bc4eae9d3c0c6516006fefdbee2 100644 (file)
--- a/drivers/infiniband/core/uverbs.h
+++ b/drivers/infiniband/core/uverbs.h
@@ -1,6 +1,8 @@
  /*
   * Copyright (c) 2005 Topspin Communications.  All rights reserved.
   * Copyright (c) 2005 Cisco Systems.  All rights reserved.
+ * Copyright (c) 2005 Mellanox Technologies. All rights reserved.
+ * Copyright (c) 2005 Voltaire, Inc. All rights reserved.
   *
   * This software is available to you under a choice of one of two
   * licenses.  You may choose to be licensed under the terms of the GNU
@@ -43,8 +45,8 @@
  #include <linux/kref.h>
  #include <linux/idr.h>
  
-#include <ib_verbs.h>
-#include <ib_user_verbs.h>
+#include <rdma/ib_verbs.h>
+#include <rdma/ib_user_verbs.h>
  
  struct ib_uverbs_device {
         int                                     devnum;
@@ -97,10 +99,12 @@ extern struct idr ib_uverbs_mw_idr;
  extern struct idr ib_uverbs_ah_idr;
  extern struct idr ib_uverbs_cq_idr;
  extern struct idr ib_uverbs_qp_idr;
+extern struct idr ib_uverbs_srq_idr;
  
  void ib_uverbs_comp_handler(struct ib_cq *cq, void *cq_context);
  void ib_uverbs_cq_event_handler(struct ib_event *event, void *context_ptr);
  void ib_uverbs_qp_event_handler(struct ib_event *event, void *context_ptr);
+void ib_uverbs_srq_event_handler(struct ib_event *event, void *context_ptr);
  
  int ib_umem_get(struct ib_device *dev, struct ib_umem *mem,
                 void *addr, size_t size, int write);
@@ -129,5 +133,8 @@ IB_UVERBS_DECLARE_CMD(modify_qp);
  IB_UVERBS_DECLARE_CMD(destroy_qp);
  IB_UVERBS_DECLARE_CMD(attach_mcast);
  IB_UVERBS_DECLARE_CMD(detach_mcast);
+IB_UVERBS_DECLARE_CMD(create_srq);
+IB_UVERBS_DECLARE_CMD(modify_srq);
+IB_UVERBS_DECLARE_CMD(destroy_srq);
  
  #endif /* UVERBS_H */
diff --git a/drivers/infiniband/core/uverbs_cmd.c b/drivers/infiniband/core/uverbs_cmd.c

index 5f2bbcda4c73000ed2b042c3f25282925aac6b28..ebccf9f38af946e6cfb1c894ecf020f4c96aaae8 100644 (file)
--- a/drivers/infiniband/core/uverbs_cmd.c
+++ b/drivers/infiniband/core/uverbs_cmd.c
@@ -724,6 +724,7 @@ ssize_t ib_uverbs_create_qp(struct ib_uverbs_file *file,
         struct ib_uobject              *uobj;
         struct ib_pd                   *pd;
         struct ib_cq                   *scq, *rcq;
+       struct ib_srq                  *srq;
         struct ib_qp                   *qp;
         struct ib_qp_init_attr          attr;
         int ret;
@@ -747,10 +748,12 @@ ssize_t ib_uverbs_create_qp(struct ib_uverbs_file *file,
         pd  = idr_find(&ib_uverbs_pd_idr, cmd.pd_handle);
         scq = idr_find(&ib_uverbs_cq_idr, cmd.send_cq_handle);
         rcq = idr_find(&ib_uverbs_cq_idr, cmd.recv_cq_handle);
+       srq = cmd.is_srq ? idr_find(&ib_uverbs_srq_idr, cmd.srq_handle) : NULL;
  
         if (!pd  || pd->uobject->context  != file->ucontext ||
             !scq || scq->uobject->context != file->ucontext ||
-           !rcq || rcq->uobject->context != file->ucontext) {
+           !rcq || rcq->uobject->context != file->ucontext ||
+           (cmd.is_srq && (!srq || srq->uobject->context != file->ucontext))) {
                 ret = -EINVAL;
                 goto err_up;
         }
@@ -759,7 +762,7 @@ ssize_t ib_uverbs_create_qp(struct ib_uverbs_file *file,
         attr.qp_context    = file;
         attr.send_cq       = scq;
         attr.recv_cq       = rcq;
-       attr.srq           = NULL;
+       attr.srq           = srq;
         attr.sq_sig_type   = cmd.sq_sig_all ? IB_SIGNAL_ALL_WR : IB_SIGNAL_REQ_WR;
         attr.qp_type       = cmd.qp_type;
  
@@ -1004,3 +1007,178 @@ ssize_t ib_uverbs_detach_mcast(struct ib_uverbs_file *file,
  
         return ret ? ret : in_len;
  }
+
+ssize_t ib_uverbs_create_srq(struct ib_uverbs_file *file,
+                            const char __user *buf, int in_len,
+                            int out_len)
+{
+       struct ib_uverbs_create_srq      cmd;
+       struct ib_uverbs_create_srq_resp resp;
+       struct ib_udata                  udata;
+       struct ib_uobject               *uobj;
+       struct ib_pd                    *pd;
+       struct ib_srq                   *srq;
+       struct ib_srq_init_attr          attr;
+       int ret;
+
+       if (out_len < sizeof resp)
+               return -ENOSPC;
+
+       if (copy_from_user(&cmd, buf, sizeof cmd))
+               return -EFAULT;
+
+       INIT_UDATA(&udata, buf + sizeof cmd,
+                  (unsigned long) cmd.response + sizeof resp,
+                  in_len - sizeof cmd, out_len - sizeof resp);
+
+       uobj = kmalloc(sizeof *uobj, GFP_KERNEL);
+       if (!uobj)
+               return -ENOMEM;
+
+       down(&ib_uverbs_idr_mutex);
+
+       pd  = idr_find(&ib_uverbs_pd_idr, cmd.pd_handle);
+
+       if (!pd || pd->uobject->context != file->ucontext) {
+               ret = -EINVAL;
+               goto err_up;
+       }
+
+       attr.event_handler  = ib_uverbs_srq_event_handler;
+       attr.srq_context    = file;
+       attr.attr.max_wr    = cmd.max_wr;
+       attr.attr.max_sge   = cmd.max_sge;
+       attr.attr.srq_limit = cmd.srq_limit;
+
+       uobj->user_handle = cmd.user_handle;
+       uobj->context     = file->ucontext;
+
+       srq = pd->device->create_srq(pd, &attr, &udata);
+       if (IS_ERR(srq)) {
+               ret = PTR_ERR(srq);
+               goto err_up;
+       }
+
+       srq->device        = pd->device;
+       srq->pd            = pd;
+       srq->uobject       = uobj;
+       srq->event_handler = attr.event_handler;
+       srq->srq_context   = attr.srq_context;
+       atomic_inc(&pd->usecnt);
+       atomic_set(&srq->usecnt, 0);
+
+       memset(&resp, 0, sizeof resp);
+
+retry:
+       if (!idr_pre_get(&ib_uverbs_srq_idr, GFP_KERNEL)) {
+               ret = -ENOMEM;
+               goto err_destroy;
+       }
+
+       ret = idr_get_new(&ib_uverbs_srq_idr, srq, &uobj->id);
+
+       if (ret == -EAGAIN)
+               goto retry;
+       if (ret)
+               goto err_destroy;
+
+       resp.srq_handle = uobj->id;
+
+       spin_lock_irq(&file->ucontext->lock);
+       list_add_tail(&uobj->list, &file->ucontext->srq_list);
+       spin_unlock_irq(&file->ucontext->lock);
+
+       if (copy_to_user((void __user *) (unsigned long) cmd.response,
+                        &resp, sizeof resp)) {
+               ret = -EFAULT;
+               goto err_list;
+       }
+
+       up(&ib_uverbs_idr_mutex);
+
+       return in_len;
+
+err_list:
+       spin_lock_irq(&file->ucontext->lock);
+       list_del(&uobj->list);
+       spin_unlock_irq(&file->ucontext->lock);
+
+err_destroy:
+       ib_destroy_srq(srq);
+
+err_up:
+       up(&ib_uverbs_idr_mutex);
+
+       kfree(uobj);
+       return ret;
+}
+
+ssize_t ib_uverbs_modify_srq(struct ib_uverbs_file *file,
+                            const char __user *buf, int in_len,
+                            int out_len)
+{
+       struct ib_uverbs_modify_srq cmd;
+       struct ib_srq              *srq;
+       struct ib_srq_attr          attr;
+       int                         ret;
+
+       if (copy_from_user(&cmd, buf, sizeof cmd))
+               return -EFAULT;
+
+       down(&ib_uverbs_idr_mutex);
+
+       srq = idr_find(&ib_uverbs_srq_idr, cmd.srq_handle);
+       if (!srq || srq->uobject->context != file->ucontext) {
+               ret = -EINVAL;
+               goto out;
+       }
+
+       attr.max_wr    = cmd.max_wr;
+       attr.max_sge   = cmd.max_sge;
+       attr.srq_limit = cmd.srq_limit;
+
+       ret = ib_modify_srq(srq, &attr, cmd.attr_mask);
+
+out:
+       up(&ib_uverbs_idr_mutex);
+
+       return ret ? ret : in_len;
+}
+
+ssize_t ib_uverbs_destroy_srq(struct ib_uverbs_file *file,
+                             const char __user *buf, int in_len,
+                             int out_len)
+{
+       struct ib_uverbs_destroy_srq cmd;
+       struct ib_srq               *srq;
+       struct ib_uobject           *uobj;
+       int                          ret = -EINVAL;
+
+       if (copy_from_user(&cmd, buf, sizeof cmd))
+               return -EFAULT;
+
+       down(&ib_uverbs_idr_mutex);
+
+       srq = idr_find(&ib_uverbs_srq_idr, cmd.srq_handle);
+       if (!srq || srq->uobject->context != file->ucontext)
+               goto out;
+
+       uobj = srq->uobject;
+
+       ret = ib_destroy_srq(srq);
+       if (ret)
+               goto out;
+
+       idr_remove(&ib_uverbs_srq_idr, cmd.srq_handle);
+
+       spin_lock_irq(&file->ucontext->lock);
+       list_del(&uobj->list);
+       spin_unlock_irq(&file->ucontext->lock);
+
+       kfree(uobj);
+
+out:
+       up(&ib_uverbs_idr_mutex);
+
+       return ret ? ret : in_len;
+}
diff --git a/drivers/infiniband/core/uverbs_main.c b/drivers/infiniband/core/uverbs_main.c

index eb99e693dec23b6d7cc2e5dcc38e79fa4f741fac..09caf5b1ef36131305bc02171672cc39550d777b 100644 (file)
--- a/drivers/infiniband/core/uverbs_main.c
+++ b/drivers/infiniband/core/uverbs_main.c
@@ -1,6 +1,8 @@
  /*
   * Copyright (c) 2005 Topspin Communications.  All rights reserved.
   * Copyright (c) 2005 Cisco Systems.  All rights reserved.
+ * Copyright (c) 2005 Mellanox Technologies. All rights reserved.
+ * Copyright (c) 2005 Voltaire, Inc. All rights reserved.
   *
   * This software is available to you under a choice of one of two
   * licenses.  You may choose to be licensed under the terms of the GNU
@@ -67,6 +69,7 @@ DEFINE_IDR(ib_uverbs_mw_idr);
  DEFINE_IDR(ib_uverbs_ah_idr);
  DEFINE_IDR(ib_uverbs_cq_idr);
  DEFINE_IDR(ib_uverbs_qp_idr);
+DEFINE_IDR(ib_uverbs_srq_idr);
  
  static spinlock_t map_lock;
  static DECLARE_BITMAP(dev_map, IB_UVERBS_MAX_DEVICES);
@@ -91,6 +94,9 @@ static ssize_t (*uverbs_cmd_table[])(struct ib_uverbs_file *file,
         [IB_USER_VERBS_CMD_DESTROY_QP]    = ib_uverbs_destroy_qp,
         [IB_USER_VERBS_CMD_ATTACH_MCAST]  = ib_uverbs_attach_mcast,
         [IB_USER_VERBS_CMD_DETACH_MCAST]  = ib_uverbs_detach_mcast,
+       [IB_USER_VERBS_CMD_CREATE_SRQ]    = ib_uverbs_create_srq,
+       [IB_USER_VERBS_CMD_MODIFY_SRQ]    = ib_uverbs_modify_srq,
+       [IB_USER_VERBS_CMD_DESTROY_SRQ]   = ib_uverbs_destroy_srq,
  };
  
  static struct vfsmount *uverbs_event_mnt;
@@ -125,18 +131,26 @@ static int ib_dealloc_ucontext(struct ib_ucontext *context)
                 kfree(uobj);
         }
  
-       /* XXX Free SRQs */
+       list_for_each_entry_safe(uobj, tmp, &context->srq_list, list) {
+               struct ib_srq *srq = idr_find(&ib_uverbs_srq_idr, uobj->id);
+               idr_remove(&ib_uverbs_srq_idr, uobj->id);
+               ib_destroy_srq(srq);
+               list_del(&uobj->list);
+               kfree(uobj);
+       }
+
         /* XXX Free MWs */
  
         list_for_each_entry_safe(uobj, tmp, &context->mr_list, list) {
                 struct ib_mr *mr = idr_find(&ib_uverbs_mr_idr, uobj->id);
+               struct ib_device *mrdev = mr->device;
                 struct ib_umem_object *memobj;
  
                 idr_remove(&ib_uverbs_mr_idr, uobj->id);
                 ib_dereg_mr(mr);
  
                 memobj = container_of(uobj, struct ib_umem_object, uobject);
-               ib_umem_release_on_close(mr->device, &memobj->umem);
+               ib_umem_release_on_close(mrdev, &memobj->umem);
  
                 list_del(&uobj->list);
                 kfree(memobj);
@@ -343,6 +357,13 @@ void ib_uverbs_qp_event_handler(struct ib_event *event, void *context_ptr)
                                 event->event);
  }
  
+void ib_uverbs_srq_event_handler(struct ib_event *event, void *context_ptr)
+{
+       ib_uverbs_async_handler(context_ptr,
+                               event->element.srq->uobject->user_handle,
+                               event->event);
+}
+
  static void ib_uverbs_event_handler(struct ib_event_handler *handler,
                                     struct ib_event *event)
  {
diff --git a/drivers/infiniband/core/uverbs_mem.c b/drivers/infiniband/core/uverbs_mem.c

index ed550f6595bd7c07efa8d8597c1715f6cd8dd653..36a32c315668cebbc7e34be30704b0c4851bcf66 100644 (file)
--- a/drivers/infiniband/core/uverbs_mem.c
+++ b/drivers/infiniband/core/uverbs_mem.c
@@ -1,6 +1,7 @@
  /*
   * Copyright (c) 2005 Topspin Communications.  All rights reserved.
   * Copyright (c) 2005 Cisco Systems.  All rights reserved.
+ * Copyright (c) 2005 Mellanox Technologies. All rights reserved.
   *
   * This software is available to you under a choice of one of two
   * licenses.  You may choose to be licensed under the terms of the GNU
diff --git a/drivers/infiniband/core/verbs.c b/drivers/infiniband/core/verbs.c

index 506fdf1f2a268de31fe957f7c362fb7ae7d1e1f9..5081d903e5617d00e602da1a89731a555984598a 100644 (file)
--- a/drivers/infiniband/core/verbs.c
+++ b/drivers/infiniband/core/verbs.c
@@ -4,6 +4,7 @@
   * Copyright (c) 2004 Intel Corporation.  All rights reserved.
   * Copyright (c) 2004 Topspin Corporation.  All rights reserved.
   * Copyright (c) 2004 Voltaire Corporation.  All rights reserved.
+ * Copyright (c) 2005 Sun Microsystems, Inc. All rights reserved.
   * Copyright (c) 2005 Cisco Systems.  All rights reserved.
   *
   * This software is available to you under a choice of one of two
@@ -40,8 +41,8 @@
  #include <linux/errno.h>
  #include <linux/err.h>
  
-#include <ib_verbs.h>
-#include <ib_cache.h>
+#include <rdma/ib_verbs.h>
+#include <rdma/ib_cache.h>
  
  /* Protection domains */
  
@@ -153,6 +154,66 @@ int ib_destroy_ah(struct ib_ah *ah)
  }
  EXPORT_SYMBOL(ib_destroy_ah);
  
+/* Shared receive queues */
+
+struct ib_srq *ib_create_srq(struct ib_pd *pd,
+                            struct ib_srq_init_attr *srq_init_attr)
+{
+       struct ib_srq *srq;
+
+       if (!pd->device->create_srq)
+               return ERR_PTR(-ENOSYS);
+
+       srq = pd->device->create_srq(pd, srq_init_attr, NULL);
+
+       if (!IS_ERR(srq)) {
+               srq->device        = pd->device;
+               srq->pd            = pd;
+               srq->uobject       = NULL;
+               srq->event_handler = srq_init_attr->event_handler;
+               srq->srq_context   = srq_init_attr->srq_context;
+               atomic_inc(&pd->usecnt);
+               atomic_set(&srq->usecnt, 0);
+       }
+
+       return srq;
+}
+EXPORT_SYMBOL(ib_create_srq);
+
+int ib_modify_srq(struct ib_srq *srq,
+                 struct ib_srq_attr *srq_attr,
+                 enum ib_srq_attr_mask srq_attr_mask)
+{
+       return srq->device->modify_srq(srq, srq_attr, srq_attr_mask);
+}
+EXPORT_SYMBOL(ib_modify_srq);
+
+int ib_query_srq(struct ib_srq *srq,
+                struct ib_srq_attr *srq_attr)
+{
+       return srq->device->query_srq ?
+               srq->device->query_srq(srq, srq_attr) : -ENOSYS;
+}
+EXPORT_SYMBOL(ib_query_srq);
+
+int ib_destroy_srq(struct ib_srq *srq)
+{
+       struct ib_pd *pd;
+       int ret;
+
+       if (atomic_read(&srq->usecnt))
+               return -EBUSY;
+
+       pd = srq->pd;
+
+       ret = srq->device->destroy_srq(srq);
+       if (!ret)
+               atomic_dec(&pd->usecnt);
+
+       return ret;
+}
+EXPORT_SYMBOL(ib_destroy_srq);
+
  /* Queue pairs */
  
  struct ib_qp *ib_create_qp(struct ib_pd *pd,
diff --git a/drivers/infiniband/hw/mthca/Makefile b/drivers/infiniband/hw/mthca/Makefile

index 5dcbd43073e2454d3944b3fddf1006b325c0d0b5..c44f7bae5424fd8227430918ecfd264522569cd0 100644 (file)
--- a/drivers/infiniband/hw/mthca/Makefile
+++ b/drivers/infiniband/hw/mthca/Makefile
@@ -1,5 +1,3 @@
-EXTRA_CFLAGS += -Idrivers/infiniband/include
-
  ifdef CONFIG_INFINIBAND_MTHCA_DEBUG
  EXTRA_CFLAGS += -DDEBUG
  endif
@@ -9,4 +7,4 @@ obj-$(CONFIG_INFINIBAND_MTHCA) += ib_mthca.o
  ib_mthca-y :=  mthca_main.o mthca_cmd.o mthca_profile.o mthca_reset.o \
                 mthca_allocator.o mthca_eq.o mthca_pd.o mthca_cq.o \
                 mthca_mr.o mthca_qp.o mthca_av.o mthca_mcg.o mthca_mad.o \
-               mthca_provider.o mthca_memfree.o mthca_uar.o
+               mthca_provider.o mthca_memfree.o mthca_uar.o mthca_srq.o
diff --git a/drivers/infiniband/hw/mthca/mthca_allocator.c b/drivers/infiniband/hw/mthca/mthca_allocator.c

index b1db48dd91d6ee8ba484c178fa1e851cdb00bfe5..9ba3211cef7cb2a7747348420d5dbad3ff0ee18d 100644 (file)
--- a/drivers/infiniband/hw/mthca/mthca_allocator.c
+++ b/drivers/infiniband/hw/mthca/mthca_allocator.c
@@ -177,3 +177,119 @@ void mthca_array_cleanup(struct mthca_array *array, int nent)
  
         kfree(array->page_list);
  }
+
+/*
+ * Handling for queue buffers -- we allocate a bunch of memory and
+ * register it in a memory region at HCA virtual address 0.  If the
+ * requested size is > max_direct, we split the allocation into
+ * multiple pages, so we don't require too much contiguous memory.
+ */
+
+int mthca_buf_alloc(struct mthca_dev *dev, int size, int max_direct,
+                   union mthca_buf *buf, int *is_direct, struct mthca_pd *pd,
+                   int hca_write, struct mthca_mr *mr)
+{
+       int err = -ENOMEM;
+       int npages, shift;
+       u64 *dma_list = NULL;
+       dma_addr_t t;
+       int i;
+
+       if (size <= max_direct) {
+               *is_direct = 1;
+               npages     = 1;
+               shift      = get_order(size) + PAGE_SHIFT;
+
+               buf->direct.buf = dma_alloc_coherent(&dev->pdev->dev,
+                                                    size, &t, GFP_KERNEL);
+               if (!buf->direct.buf)
+                       return -ENOMEM;
+
+               pci_unmap_addr_set(&buf->direct, mapping, t);
+
+               memset(buf->direct.buf, 0, size);
+
+               while (t & ((1 << shift) - 1)) {
+                       --shift;
+                       npages *= 2;
+               }
+
+               dma_list = kmalloc(npages * sizeof *dma_list, GFP_KERNEL);
+               if (!dma_list)
+                       goto err_free;
+
+               for (i = 0; i < npages; ++i)
+                       dma_list[i] = t + i * (1 << shift);
+       } else {
+               *is_direct = 0;
+               npages     = (size + PAGE_SIZE - 1) / PAGE_SIZE;
+               shift      = PAGE_SHIFT;
+
+               dma_list = kmalloc(npages * sizeof *dma_list, GFP_KERNEL);
+               if (!dma_list)
+                       return -ENOMEM;
+
+               buf->page_list = kmalloc(npages * sizeof *buf->page_list,
+                                        GFP_KERNEL);
+               if (!buf->page_list)
+                       goto err_out;
+
+               for (i = 0; i < npages; ++i)
+                       buf->page_list[i].buf = NULL;
+
+               for (i = 0; i < npages; ++i) {
+                       buf->page_list[i].buf =
+                               dma_alloc_coherent(&dev->pdev->dev, PAGE_SIZE,
+                                                  &t, GFP_KERNEL);
+                       if (!buf->page_list[i].buf)
+                               goto err_free;
+
+                       dma_list[i] = t;
+                       pci_unmap_addr_set(&buf->page_list[i], mapping, t);
+
+                       memset(buf->page_list[i].buf, 0, PAGE_SIZE);
+               }
+       }
+
+       err = mthca_mr_alloc_phys(dev, pd->pd_num,
+                                 dma_list, shift, npages,
+                                 0, size,
+                                 MTHCA_MPT_FLAG_LOCAL_READ |
+                                 (hca_write ? MTHCA_MPT_FLAG_LOCAL_WRITE : 0),
+                                 mr);
+       if (err)
+               goto err_free;
+
+       kfree(dma_list);
+
+       return 0;
+
+err_free:
+       mthca_buf_free(dev, size, buf, *is_direct, NULL);
+
+err_out:
+       kfree(dma_list);
+
+       return err;
+}
+
+void mthca_buf_free(struct mthca_dev *dev, int size, union mthca_buf *buf,
+                   int is_direct, struct mthca_mr *mr)
+{
+       int i;
+
+       if (mr)
+               mthca_free_mr(dev, mr);
+
+       if (is_direct)
+               dma_free_coherent(&dev->pdev->dev, size, buf->direct.buf,
+                                 pci_unmap_addr(&buf->direct, mapping));
+       else {
+               for (i = 0; i < (size + PAGE_SIZE - 1) / PAGE_SIZE; ++i)
+                       dma_free_coherent(&dev->pdev->dev, PAGE_SIZE,
+                                         buf->page_list[i].buf,
+                                         pci_unmap_addr(&buf->page_list[i],
+                                                        mapping));
+               kfree(buf->page_list);
+       }
+}
diff --git a/drivers/infiniband/hw/mthca/mthca_av.c b/drivers/infiniband/hw/mthca/mthca_av.c

index d58dcbe66488080b3ccdfc497cfb7fdc19822aae..889e85096736c198ef2cbc6a6525cfafcd06c7ea 100644 (file)
--- a/drivers/infiniband/hw/mthca/mthca_av.c
+++ b/drivers/infiniband/hw/mthca/mthca_av.c
@@ -35,22 +35,22 @@
  
  #include <linux/init.h>
  
-#include <ib_verbs.h>
-#include <ib_cache.h>
+#include <rdma/ib_verbs.h>
+#include <rdma/ib_cache.h>
  
  #include "mthca_dev.h"
  
  struct mthca_av {
-       u32 port_pd;
-       u8  reserved1;
-       u8  g_slid;
-       u16 dlid;
-       u8  reserved2;
-       u8  gid_index;
-       u8  msg_sr;
-       u8  hop_limit;
-       u32 sl_tclass_flowlabel;
-       u32 dgid[4];
+       __be32 port_pd;
+       u8     reserved1;
+       u8     g_slid;
+       __be16 dlid;
+       u8     reserved2;
+       u8     gid_index;
+       u8     msg_sr;
+       u8     hop_limit;
+       __be32 sl_tclass_flowlabel;
+       __be32 dgid[4];
  };
  
  int mthca_create_ah(struct mthca_dev *dev,
@@ -128,7 +128,7 @@ on_hca_fail:
                           av, (unsigned long) ah->avdma);
                 for (j = 0; j < 8; ++j)
                         printk(KERN_DEBUG "  [%2x] %08x\n",
-                              j * 4, be32_to_cpu(((u32 *) av)[j]));
+                              j * 4, be32_to_cpu(((__be32 *) av)[j]));
         }
  
         if (ah->type == MTHCA_AH_ON_HCA) {
@@ -169,7 +169,7 @@ int mthca_read_ah(struct mthca_dev *dev, struct mthca_ah *ah,
  
         header->lrh.service_level   = be32_to_cpu(ah->av->sl_tclass_flowlabel) >> 28;
         header->lrh.destination_lid = ah->av->dlid;
-       header->lrh.source_lid      = ah->av->g_slid & 0x7f;
+       header->lrh.source_lid      = cpu_to_be16(ah->av->g_slid & 0x7f);
         if (ah->av->g_slid & 0x80) {
                 header->grh_present = 1;
                 header->grh.traffic_class =
diff --git a/drivers/infiniband/hw/mthca/mthca_cmd.c b/drivers/infiniband/hw/mthca/mthca_cmd.c

index 1557a522d8319a8e3e2c1bc497a933094a056f87..cc758a2d2bc6cad452f73b644ada7246ea2441bc 100644 (file)
--- a/drivers/infiniband/hw/mthca/mthca_cmd.c
+++ b/drivers/infiniband/hw/mthca/mthca_cmd.c
@@ -1,5 +1,6 @@
  /*
   * Copyright (c) 2004, 2005 Topspin Communications.  All rights reserved.
+ * Copyright (c) 2005 Mellanox Technologies. All rights reserved.
   *
   * This software is available to you under a choice of one of two
   * licenses.  You may choose to be licensed under the terms of the GNU
@@ -36,7 +37,7 @@
  #include <linux/pci.h>
  #include <linux/errno.h>
  #include <asm/io.h>
-#include <ib_mad.h>
+#include <rdma/ib_mad.h>
  
  #include "mthca_dev.h"
  #include "mthca_config_reg.h"
@@ -108,6 +109,7 @@ enum {
         CMD_SW2HW_SRQ       = 0x35,
         CMD_HW2SW_SRQ       = 0x36,
         CMD_QUERY_SRQ       = 0x37,
+       CMD_ARM_SRQ         = 0x40,
  
         /* QP/EE commands */
         CMD_RST2INIT_QPEE   = 0x19,
@@ -219,20 +221,20 @@ static int mthca_cmd_post(struct mthca_dev *dev,
          * (and some architectures such as ia64 implement memcpy_toio
          * in terms of writeb).
          */
-       __raw_writel(cpu_to_be32(in_param >> 32),           dev->hcr + 0 * 4);
-       __raw_writel(cpu_to_be32(in_param & 0xfffffffful),  dev->hcr + 1 * 4);
-       __raw_writel(cpu_to_be32(in_modifier),              dev->hcr + 2 * 4);
-       __raw_writel(cpu_to_be32(out_param >> 32),          dev->hcr + 3 * 4);
-       __raw_writel(cpu_to_be32(out_param & 0xfffffffful), dev->hcr + 4 * 4);
-       __raw_writel(cpu_to_be32(token << 16),              dev->hcr + 5 * 4);
+       __raw_writel((__force u32) cpu_to_be32(in_param >> 32),           dev->hcr + 0 * 4);
+       __raw_writel((__force u32) cpu_to_be32(in_param & 0xfffffffful),  dev->hcr + 1 * 4);
+       __raw_writel((__force u32) cpu_to_be32(in_modifier),              dev->hcr + 2 * 4);
+       __raw_writel((__force u32) cpu_to_be32(out_param >> 32),          dev->hcr + 3 * 4);
+       __raw_writel((__force u32) cpu_to_be32(out_param & 0xfffffffful), dev->hcr + 4 * 4);
+       __raw_writel((__force u32) cpu_to_be32(token << 16),              dev->hcr + 5 * 4);
  
         /* __raw_writel may not order writes. */
         wmb();
  
-       __raw_writel(cpu_to_be32((1 << HCR_GO_BIT)                |
-                                (event ? (1 << HCA_E_BIT) : 0)   |
-                                (op_modifier << HCR_OPMOD_SHIFT) |
-                                op),                       dev->hcr + 6 * 4);
+       __raw_writel((__force u32) cpu_to_be32((1 << HCR_GO_BIT)                |
+                                              (event ? (1 << HCA_E_BIT) : 0)   |
+                                              (op_modifier << HCR_OPMOD_SHIFT) |
+                                              op),                       dev->hcr + 6 * 4);
  
  out:
         up(&dev->cmd.hcr_sem);
@@ -273,12 +275,14 @@ static int mthca_cmd_poll(struct mthca_dev *dev,
                 goto out;
         }
  
-       if (out_is_imm) {
-               memcpy_fromio(out_param, dev->hcr + HCR_OUT_PARAM_OFFSET, sizeof (u64));
-               be64_to_cpus(out_param);
-       }
+       if (out_is_imm)
+               *out_param = 
+                       (u64) be32_to_cpu((__force __be32)
+                                         __raw_readl(dev->hcr + HCR_OUT_PARAM_OFFSET)) << 32 |
+                       (u64) be32_to_cpu((__force __be32)
+                                         __raw_readl(dev->hcr + HCR_OUT_PARAM_OFFSET + 4));
  
-       *status = be32_to_cpu(__raw_readl(dev->hcr + HCR_STATUS_OFFSET)) >> 24;
+       *status = be32_to_cpu((__force __be32) __raw_readl(dev->hcr + HCR_STATUS_OFFSET)) >> 24;
  
  out:
         up(&dev->cmd.poll_sem);
@@ -1029,6 +1033,8 @@ int mthca_QUERY_DEV_LIM(struct mthca_dev *dev,
  
         mthca_dbg(dev, "Max QPs: %d, reserved QPs: %d, entry size: %d\n",
                   dev_lim->max_qps, dev_lim->reserved_qps, dev_lim->qpc_entry_sz);
+       mthca_dbg(dev, "Max SRQs: %d, reserved SRQs: %d, entry size: %d\n",
+                 dev_lim->max_srqs, dev_lim->reserved_srqs, dev_lim->srq_entry_sz);
         mthca_dbg(dev, "Max CQs: %d, reserved CQs: %d, entry size: %d\n",
                   dev_lim->max_cqs, dev_lim->reserved_cqs, dev_lim->cqc_entry_sz);
         mthca_dbg(dev, "Max EQs: %d, reserved EQs: %d, entry size: %d\n",
@@ -1082,6 +1088,34 @@ out:
         return err;
  }
  
+static void get_board_id(void *vsd, char *board_id)
+{
+       int i;
+
+#define VSD_OFFSET_SIG1                0x00
+#define VSD_OFFSET_SIG2                0xde
+#define VSD_OFFSET_MLX_BOARD_ID        0xd0
+#define VSD_OFFSET_TS_BOARD_ID 0x20
+
+#define VSD_SIGNATURE_TOPSPIN  0x5ad
+
+       memset(board_id, 0, MTHCA_BOARD_ID_LEN);
+
+       if (be16_to_cpup(vsd + VSD_OFFSET_SIG1) == VSD_SIGNATURE_TOPSPIN &&
+           be16_to_cpup(vsd + VSD_OFFSET_SIG2) == VSD_SIGNATURE_TOPSPIN) {
+               strlcpy(board_id, vsd + VSD_OFFSET_TS_BOARD_ID, MTHCA_BOARD_ID_LEN);
+       } else {
+               /*
+                * The board ID is a string but the firmware byte
+                * swaps each 4-byte word before passing it back to
+                * us.  Therefore we need to swab it before printing.
+                */
+               for (i = 0; i < 4; ++i)
+                       ((u32 *) board_id)[i] =
+                               swab32(*(u32 *) (vsd + VSD_OFFSET_MLX_BOARD_ID + i * 4));
+       }
+}
+
  int mthca_QUERY_ADAPTER(struct mthca_dev *dev,
                         struct mthca_adapter *adapter, u8 *status)
  {
@@ -1094,6 +1128,7 @@ int mthca_QUERY_ADAPTER(struct mthca_dev *dev,
  #define QUERY_ADAPTER_DEVICE_ID_OFFSET     0x04
  #define QUERY_ADAPTER_REVISION_ID_OFFSET   0x08
  #define QUERY_ADAPTER_INTA_PIN_OFFSET      0x10
+#define QUERY_ADAPTER_VSD_OFFSET           0x20
  
         mailbox = mthca_alloc_mailbox(dev, GFP_KERNEL);
         if (IS_ERR(mailbox))
@@ -1111,6 +1146,9 @@ int mthca_QUERY_ADAPTER(struct mthca_dev *dev,
         MTHCA_GET(adapter->revision_id, outbox, QUERY_ADAPTER_REVISION_ID_OFFSET);
         MTHCA_GET(adapter->inta_pin, outbox,    QUERY_ADAPTER_INTA_PIN_OFFSET);
  
+       get_board_id(outbox + QUERY_ADAPTER_VSD_OFFSET / 4,
+                    adapter->board_id);
+
  out:
         mthca_free_mailbox(dev, mailbox);
         return err;
@@ -1121,7 +1159,7 @@ int mthca_INIT_HCA(struct mthca_dev *dev,
                    u8 *status)
  {
         struct mthca_mailbox *mailbox;
-       u32 *inbox;
+       __be32 *inbox;
         int err;
  
  #define INIT_HCA_IN_SIZE                0x200
@@ -1247,10 +1285,8 @@ int mthca_INIT_IB(struct mthca_dev *dev,
  #define INIT_IB_FLAG_SIG         (1 << 18)
  #define INIT_IB_FLAG_NG          (1 << 17)
  #define INIT_IB_FLAG_G0          (1 << 16)
-#define INIT_IB_FLAG_1X          (1 << 8)
-#define INIT_IB_FLAG_4X          (1 << 9)
-#define INIT_IB_FLAG_12X         (1 << 11)
  #define INIT_IB_VL_SHIFT         4
+#define INIT_IB_PORT_WIDTH_SHIFT 8
  #define INIT_IB_MTU_SHIFT        12
  #define INIT_IB_MAX_GID_OFFSET   0x06
  #define INIT_IB_MAX_PKEY_OFFSET  0x0a
@@ -1266,12 +1302,11 @@ int mthca_INIT_IB(struct mthca_dev *dev,
         memset(inbox, 0, INIT_IB_IN_SIZE);
  
         flags = 0;
-       flags |= param->enable_1x     ? INIT_IB_FLAG_1X  : 0;
-       flags |= param->enable_4x     ? INIT_IB_FLAG_4X  : 0;
         flags |= param->set_guid0     ? INIT_IB_FLAG_G0  : 0;
         flags |= param->set_node_guid ? INIT_IB_FLAG_NG  : 0;
         flags |= param->set_si_guid   ? INIT_IB_FLAG_SIG : 0;
         flags |= param->vl_cap << INIT_IB_VL_SHIFT;
+       flags |= param->port_width << INIT_IB_PORT_WIDTH_SHIFT;
         flags |= param->mtu_cap << INIT_IB_MTU_SHIFT;
         MTHCA_PUT(inbox, flags, INIT_IB_FLAGS_OFFSET);
  
@@ -1342,7 +1377,7 @@ int mthca_MAP_ICM(struct mthca_dev *dev, struct mthca_icm *icm, u64 virt, u8 *st
  int mthca_MAP_ICM_page(struct mthca_dev *dev, u64 dma_addr, u64 virt, u8 *status)
  {
         struct mthca_mailbox *mailbox;
-       u64 *inbox;
+       __be64 *inbox;
         int err;
  
         mailbox = mthca_alloc_mailbox(dev, GFP_KERNEL);
@@ -1468,6 +1503,27 @@ int mthca_HW2SW_CQ(struct mthca_dev *dev, struct mthca_mailbox *mailbox,
                              CMD_TIME_CLASS_A, status);
  }
  
+int mthca_SW2HW_SRQ(struct mthca_dev *dev, struct mthca_mailbox *mailbox,
+                   int srq_num, u8 *status)
+{
+       return mthca_cmd(dev, mailbox->dma, srq_num, 0, CMD_SW2HW_SRQ,
+                       CMD_TIME_CLASS_A, status);
+}
+
+int mthca_HW2SW_SRQ(struct mthca_dev *dev, struct mthca_mailbox *mailbox,
+                   int srq_num, u8 *status)
+{
+       return mthca_cmd_box(dev, 0, mailbox->dma, srq_num, 0,
+                            CMD_HW2SW_SRQ,
+                            CMD_TIME_CLASS_A, status);
+}
+
+int mthca_ARM_SRQ(struct mthca_dev *dev, int srq_num, int limit, u8 *status)
+{
+       return mthca_cmd(dev, limit, srq_num, 0, CMD_ARM_SRQ,
+                        CMD_TIME_CLASS_B, status);
+}
+
  int mthca_MODIFY_QP(struct mthca_dev *dev, int trans, u32 num,
                     int is_ee, struct mthca_mailbox *mailbox, u32 optmask,
                     u8 *status)
@@ -1513,7 +1569,7 @@ int mthca_MODIFY_QP(struct mthca_dev *dev, int trans, u32 num,
                                 if (i % 8 == 0)
                                         printk("  [%02x] ", i * 4);
                                 printk(" %08x",
-                                      be32_to_cpu(((u32 *) mailbox->buf)[i + 2]));
+                                      be32_to_cpu(((__be32 *) mailbox->buf)[i + 2]));
                                 if ((i + 1) % 8 == 0)
                                         printk("\n");
                         }
@@ -1533,7 +1589,7 @@ int mthca_MODIFY_QP(struct mthca_dev *dev, int trans, u32 num,
                                 if (i % 8 == 0)
                                         printk("[%02x] ", i * 4);
                                 printk(" %08x",
-                                      be32_to_cpu(((u32 *) mailbox->buf)[i + 2]));
+                                      be32_to_cpu(((__be32 *) mailbox->buf)[i + 2]));
                                 if ((i + 1) % 8 == 0)
                                         printk("\n");
                         }
diff --git a/drivers/infiniband/hw/mthca/mthca_cmd.h b/drivers/infiniband/hw/mthca/mthca_cmd.h

index ed517f175dd6e68aee23bdbf8a4d97af82deab38..65f976a13e02065469c99001d41d2a09201853e7 100644 (file)
--- a/drivers/infiniband/hw/mthca/mthca_cmd.h
+++ b/drivers/infiniband/hw/mthca/mthca_cmd.h
@@ -1,5 +1,6 @@
  /*
   * Copyright (c) 2004, 2005 Topspin Communications.  All rights reserved.
+ * Copyright (c) 2005 Mellanox Technologies. All rights reserved.
   *
   * This software is available to you under a choice of one of two
   * licenses.  You may choose to be licensed under the terms of the GNU
@@ -35,7 +36,7 @@
  #ifndef MTHCA_CMD_H
  #define MTHCA_CMD_H
  
-#include <ib_verbs.h>
+#include <rdma/ib_verbs.h>
  
  #define MTHCA_MAILBOX_SIZE 4096
  
@@ -183,10 +184,11 @@ struct mthca_dev_lim {
  };
  
  struct mthca_adapter {
-       u32 vendor_id;
-       u32 device_id;
-       u32 revision_id;
-       u8  inta_pin;
+       u32  vendor_id;
+       u32  device_id;
+       u32  revision_id;
+       char board_id[MTHCA_BOARD_ID_LEN];
+       u8   inta_pin;
  };
  
  struct mthca_init_hca_param {
@@ -218,8 +220,7 @@ struct mthca_init_hca_param {
  };
  
  struct mthca_init_ib_param {
-       int enable_1x;
-       int enable_4x;
+       int port_width;
         int vl_cap;
         int mtu_cap;
         u16 gid_cap;
@@ -297,6 +298,11 @@ int mthca_SW2HW_CQ(struct mthca_dev *dev, struct mthca_mailbox *mailbox,
                    int cq_num, u8 *status);
  int mthca_HW2SW_CQ(struct mthca_dev *dev, struct mthca_mailbox *mailbox,
                    int cq_num, u8 *status);
+int mthca_SW2HW_SRQ(struct mthca_dev *dev, struct mthca_mailbox *mailbox,
+                   int srq_num, u8 *status);
+int mthca_HW2SW_SRQ(struct mthca_dev *dev, struct mthca_mailbox *mailbox,
+                   int srq_num, u8 *status);
+int mthca_ARM_SRQ(struct mthca_dev *dev, int srq_num, int limit, u8 *status);
  int mthca_MODIFY_QP(struct mthca_dev *dev, int trans, u32 num,
                     int is_ee, struct mthca_mailbox *mailbox, u32 optmask,
                     u8 *status);
diff --git a/drivers/infiniband/hw/mthca/mthca_config_reg.h b/drivers/infiniband/hw/mthca/mthca_config_reg.h

index b4bfbbfe2c3df20a586dd8f71bc8cdf2a5d1dbf9..afa56bfaab2ee11e4e0d023084291a0eabd3f8d1 100644 (file)
--- a/drivers/infiniband/hw/mthca/mthca_config_reg.h
+++ b/drivers/infiniband/hw/mthca/mthca_config_reg.h
@@ -1,5 +1,6 @@
  /*
   * Copyright (c) 2004 Topspin Communications.  All rights reserved.
+ * Copyright (c) 2005 Mellanox Technologies. All rights reserved.
   *
   * This software is available to you under a choice of one of two
   * licenses.  You may choose to be licensed under the terms of the GNU
diff --git a/drivers/infiniband/hw/mthca/mthca_cq.c b/drivers/infiniband/hw/mthca/mthca_cq.c

index 5687c3014522567140845aaa8ac3cde98df1539b..8600b6c3e0c262c72c45a8a281091238012619d8 100644 (file)
--- a/drivers/infiniband/hw/mthca/mthca_cq.c
+++ b/drivers/infiniband/hw/mthca/mthca_cq.c
@@ -2,6 +2,8 @@
   * Copyright (c) 2004, 2005 Topspin Communications.  All rights reserved.
   * Copyright (c) 2005 Sun Microsystems, Inc. All rights reserved.
   * Copyright (c) 2005 Cisco Systems, Inc. All rights reserved.
+ * Copyright (c) 2005 Mellanox Technologies. All rights reserved.
+ * Copyright (c) 2004 Voltaire, Inc. All rights reserved.
   *
   * This software is available to you under a choice of one of two
   * licenses.  You may choose to be licensed under the terms of the GNU
@@ -37,7 +39,7 @@
  #include <linux/init.h>
  #include <linux/hardirq.h>
  
-#include <ib_pack.h>
+#include <rdma/ib_pack.h>
  
  #include "mthca_dev.h"
  #include "mthca_cmd.h"
@@ -55,21 +57,21 @@ enum {
   * Must be packed because start is 64 bits but only aligned to 32 bits.
   */
  struct mthca_cq_context {
-       u32 flags;
-       u64 start;
-       u32 logsize_usrpage;
-       u32 error_eqn;          /* Tavor only */
-       u32 comp_eqn;
-       u32 pd;
-       u32 lkey;
-       u32 last_notified_index;
-       u32 solicit_producer_index;
-       u32 consumer_index;
-       u32 producer_index;
-       u32 cqn;
-       u32 ci_db;              /* Arbel only */
-       u32 state_db;           /* Arbel only */
-       u32 reserved;
+       __be32 flags;
+       __be64 start;
+       __be32 logsize_usrpage;
+       __be32 error_eqn;       /* Tavor only */
+       __be32 comp_eqn;
+       __be32 pd;
+       __be32 lkey;
+       __be32 last_notified_index;
+       __be32 solicit_producer_index;
+       __be32 consumer_index;
+       __be32 producer_index;
+       __be32 cqn;
+       __be32 ci_db;           /* Arbel only */
+       __be32 state_db;        /* Arbel only */
+       u32    reserved;
  } __attribute__((packed));
  
  #define MTHCA_CQ_STATUS_OK          ( 0 << 28)
@@ -108,31 +110,31 @@ enum {
  };
  
  struct mthca_cqe {
-       u32 my_qpn;
-       u32 my_ee;
-       u32 rqpn;
-       u16 sl_g_mlpath;
-       u16 rlid;
-       u32 imm_etype_pkey_eec;
-       u32 byte_cnt;
-       u32 wqe;
-       u8  opcode;
-       u8  is_send;
-       u8  reserved;
-       u8  owner;
+       __be32 my_qpn;
+       __be32 my_ee;
+       __be32 rqpn;
+       __be16 sl_g_mlpath;
+       __be16 rlid;
+       __be32 imm_etype_pkey_eec;
+       __be32 byte_cnt;
+       __be32 wqe;
+       u8     opcode;
+       u8     is_send;
+       u8     reserved;
+       u8     owner;
  };
  
  struct mthca_err_cqe {
-       u32 my_qpn;
-       u32 reserved1[3];
-       u8  syndrome;
-       u8  reserved2;
-       u16 db_cnt;
-       u32 reserved3;
-       u32 wqe;
-       u8  opcode;
-       u8  reserved4[2];
-       u8  owner;
+       __be32 my_qpn;
+       u32    reserved1[3];
+       u8     syndrome;
+       u8     reserved2;
+       __be16 db_cnt;
+       u32    reserved3;
+       __be32 wqe;
+       u8     opcode;
+       u8     reserved4[2];
+       u8     owner;
  };
  
  #define MTHCA_CQ_ENTRY_OWNER_SW      (0 << 7)
@@ -191,7 +193,7 @@ static void dump_cqe(struct mthca_dev *dev, void *cqe_ptr)
  static inline void update_cons_index(struct mthca_dev *dev, struct mthca_cq *cq,
                                      int incr)
  {
-       u32 doorbell[2];
+       __be32 doorbell[2];
  
         if (mthca_is_memfree(dev)) {
                 *cq->set_ci_db = cpu_to_be32(cq->cons_index);
@@ -222,7 +224,8 @@ void mthca_cq_event(struct mthca_dev *dev, u32 cqn)
         cq->ibcq.comp_handler(&cq->ibcq, cq->ibcq.cq_context);
  }
  
-void mthca_cq_clean(struct mthca_dev *dev, u32 cqn, u32 qpn)
+void mthca_cq_clean(struct mthca_dev *dev, u32 cqn, u32 qpn,
+                   struct mthca_srq *srq)
  {
         struct mthca_cq *cq;
         struct mthca_cqe *cqe;
@@ -263,8 +266,11 @@ void mthca_cq_clean(struct mthca_dev *dev, u32 cqn, u32 qpn)
          */
         while (prod_index > cq->cons_index) {
                 cqe = get_cqe(cq, (prod_index - 1) & cq->ibcq.cqe);
-               if (cqe->my_qpn == cpu_to_be32(qpn))
+               if (cqe->my_qpn == cpu_to_be32(qpn)) {
+                       if (srq)
+                               mthca_free_srq_wqe(srq, be32_to_cpu(cqe->wqe));
                         ++nfreed;
+               }
                 else if (nfreed)
                         memcpy(get_cqe(cq, (prod_index - 1 + nfreed) &
                                        cq->ibcq.cqe),
@@ -291,7 +297,7 @@ static int handle_error_cqe(struct mthca_dev *dev, struct mthca_cq *cq,
  {
         int err;
         int dbd;
-       u32 new_wqe;
+       __be32 new_wqe;
  
         if (cqe->syndrome == SYNDROME_LOCAL_QP_OP_ERR) {
                 mthca_dbg(dev, "local QP operation err "
@@ -365,6 +371,13 @@ static int handle_error_cqe(struct mthca_dev *dev, struct mthca_cq *cq,
                 break;
         }
  
+       /*
+        * Mem-free HCAs always generate one CQE per WQE, even in the
+        * error case, so we don't have to check the doorbell count, etc.
+        */
+       if (mthca_is_memfree(dev))
+               return 0;
+
         err = mthca_free_err_wqe(dev, qp, is_send, wqe_index, &dbd, &new_wqe);
         if (err)
                 return err;
@@ -373,12 +386,8 @@ static int handle_error_cqe(struct mthca_dev *dev, struct mthca_cq *cq,
          * If we're at the end of the WQE chain, or we've used up our
          * doorbell count, free the CQE.  Otherwise just update it for
          * the next poll operation.
-        *
-        * This does not apply to mem-free HCAs: they don't use the
-        * doorbell count field, and so we should always free the CQE.
          */
-       if (mthca_is_memfree(dev) ||
-           !(new_wqe & cpu_to_be32(0x3f)) || (!cqe->db_cnt && dbd))
+       if (!(new_wqe & cpu_to_be32(0x3f)) || (!cqe->db_cnt && dbd))
                 return 0;
  
         cqe->db_cnt   = cpu_to_be16(be16_to_cpu(cqe->db_cnt) - dbd);
@@ -450,23 +459,27 @@ static inline int mthca_poll_one(struct mthca_dev *dev,
                              >> wq->wqe_shift);
                 entry->wr_id = (*cur_qp)->wrid[wqe_index +
                                                (*cur_qp)->rq.max];
+       } else if ((*cur_qp)->ibqp.srq) {
+               struct mthca_srq *srq = to_msrq((*cur_qp)->ibqp.srq);
+               u32 wqe = be32_to_cpu(cqe->wqe);
+               wq = NULL;
+               wqe_index = wqe >> srq->wqe_shift;
+               entry->wr_id = srq->wrid[wqe_index];
+               mthca_free_srq_wqe(srq, wqe);
         } else {
                 wq = &(*cur_qp)->rq;
                 wqe_index = be32_to_cpu(cqe->wqe) >> wq->wqe_shift;
                 entry->wr_id = (*cur_qp)->wrid[wqe_index];
         }
  
-       if (wq->last_comp < wqe_index)
-               wq->tail += wqe_index - wq->last_comp;
-       else
-               wq->tail += wqe_index + wq->max - wq->last_comp;
-
-       wq->last_comp = wqe_index;
+       if (wq) {
+               if (wq->last_comp < wqe_index)
+                       wq->tail += wqe_index - wq->last_comp;
+               else
+                       wq->tail += wqe_index + wq->max - wq->last_comp;
  
-       if (0)
-               mthca_dbg(dev, "%s completion for QP %06x, index %d (nr %d)\n",
-                         is_send ? "Send" : "Receive",
-                         (*cur_qp)->qpn, wqe_index, wq->max);
+               wq->last_comp = wqe_index;
+       }
  
         if (is_error) {
                 err = handle_error_cqe(dev, cq, *cur_qp, wqe_index, is_send,
@@ -584,13 +597,13 @@ int mthca_poll_cq(struct ib_cq *ibcq, int num_entries,
  
  int mthca_tavor_arm_cq(struct ib_cq *cq, enum ib_cq_notify notify)
  {
-       u32 doorbell[2];
+       __be32 doorbell[2];
  
         doorbell[0] = cpu_to_be32((notify == IB_CQ_SOLICITED ?
                                    MTHCA_TAVOR_CQ_DB_REQ_NOT_SOL :
                                    MTHCA_TAVOR_CQ_DB_REQ_NOT)      |
                                   to_mcq(cq)->cqn);
-       doorbell[1] = 0xffffffff;
+       doorbell[1] = (__force __be32) 0xffffffff;
  
         mthca_write64(doorbell,
                       to_mdev(cq->device)->kar + MTHCA_CQ_DOORBELL,
@@ -602,9 +615,9 @@ int mthca_tavor_arm_cq(struct ib_cq *cq, enum ib_cq_notify notify)
  int mthca_arbel_arm_cq(struct ib_cq *ibcq, enum ib_cq_notify notify)
  {
         struct mthca_cq *cq = to_mcq(ibcq);
-       u32 doorbell[2];
+       __be32 doorbell[2];
         u32 sn;
-       u32 ci;
+       __be32 ci;
  
         sn = cq->arm_sn & 3;
         ci = cpu_to_be32(cq->cons_index);
@@ -637,113 +650,8 @@ int mthca_arbel_arm_cq(struct ib_cq *ibcq, enum ib_cq_notify notify)
  
  static void mthca_free_cq_buf(struct mthca_dev *dev, struct mthca_cq *cq)
  {
-       int i;
-       int size;
-
-       if (cq->is_direct)
-               dma_free_coherent(&dev->pdev->dev,
-                                 (cq->ibcq.cqe + 1) * MTHCA_CQ_ENTRY_SIZE,
-                                 cq->queue.direct.buf,
-                                 pci_unmap_addr(&cq->queue.direct,
-                                                mapping));
-       else {
-               size = (cq->ibcq.cqe + 1) * MTHCA_CQ_ENTRY_SIZE;
-               for (i = 0; i < (size + PAGE_SIZE - 1) / PAGE_SIZE; ++i)
-                       if (cq->queue.page_list[i].buf)
-                               dma_free_coherent(&dev->pdev->dev, PAGE_SIZE,
-                                                 cq->queue.page_list[i].buf,
-                                                 pci_unmap_addr(&cq->queue.page_list[i],
-                                                                mapping));
-
-               kfree(cq->queue.page_list);
-       }
-}
-
-static int mthca_alloc_cq_buf(struct mthca_dev *dev, int size,
-                             struct mthca_cq *cq)
-{
-       int err = -ENOMEM;
-       int npages, shift;
-       u64 *dma_list = NULL;
-       dma_addr_t t;
-       int i;
-
-       if (size <= MTHCA_MAX_DIRECT_CQ_SIZE) {
-               cq->is_direct = 1;
-               npages        = 1;
-               shift         = get_order(size) + PAGE_SHIFT;
-
-               cq->queue.direct.buf = dma_alloc_coherent(&dev->pdev->dev,
-                                                         size, &t, GFP_KERNEL);
-               if (!cq->queue.direct.buf)
-                       return -ENOMEM;
-
-               pci_unmap_addr_set(&cq->queue.direct, mapping, t);
-
-               memset(cq->queue.direct.buf, 0, size);
-
-               while (t & ((1 << shift) - 1)) {
-                       --shift;
-                       npages *= 2;
-               }
-
-               dma_list = kmalloc(npages * sizeof *dma_list, GFP_KERNEL);
-               if (!dma_list)
-                       goto err_free;
-
-               for (i = 0; i < npages; ++i)
-                       dma_list[i] = t + i * (1 << shift);
-       } else {
-               cq->is_direct = 0;
-               npages        = (size + PAGE_SIZE - 1) / PAGE_SIZE;
-               shift         = PAGE_SHIFT;
-
-               dma_list = kmalloc(npages * sizeof *dma_list, GFP_KERNEL);
-               if (!dma_list)
-                       return -ENOMEM;
-
-               cq->queue.page_list = kmalloc(npages * sizeof *cq->queue.page_list,
-                                             GFP_KERNEL);
-               if (!cq->queue.page_list)
-                       goto err_out;
-
-               for (i = 0; i < npages; ++i)
-                       cq->queue.page_list[i].buf = NULL;
-
-               for (i = 0; i < npages; ++i) {
-                       cq->queue.page_list[i].buf =
-                               dma_alloc_coherent(&dev->pdev->dev, PAGE_SIZE,
-                                                  &t, GFP_KERNEL);
-                       if (!cq->queue.page_list[i].buf)
-                               goto err_free;
-
-                       dma_list[i] = t;
-                       pci_unmap_addr_set(&cq->queue.page_list[i], mapping, t);
-
-                       memset(cq->queue.page_list[i].buf, 0, PAGE_SIZE);
-               }
-       }
-
-       err = mthca_mr_alloc_phys(dev, dev->driver_pd.pd_num,
-                                 dma_list, shift, npages,
-                                 0, size,
-                                 MTHCA_MPT_FLAG_LOCAL_WRITE |
-                                 MTHCA_MPT_FLAG_LOCAL_READ,
-                                 &cq->mr);
-       if (err)
-               goto err_free;
-
-       kfree(dma_list);
-
-       return 0;
-
-err_free:
-       mthca_free_cq_buf(dev, cq);
-
-err_out:
-       kfree(dma_list);
-
-       return err;
+       mthca_buf_free(dev, (cq->ibcq.cqe + 1) * MTHCA_CQ_ENTRY_SIZE,
+                      &cq->queue, cq->is_direct, &cq->mr);
  }
  
  int mthca_init_cq(struct mthca_dev *dev, int nent,
@@ -795,7 +703,9 @@ int mthca_init_cq(struct mthca_dev *dev, int nent,
         cq_context = mailbox->buf;
  
         if (cq->is_kernel) {
-               err = mthca_alloc_cq_buf(dev, size, cq);
+               err = mthca_buf_alloc(dev, size, MTHCA_MAX_DIRECT_CQ_SIZE,
+                                     &cq->queue, &cq->is_direct,
+                                     &dev->driver_pd, 1, &cq->mr);
                 if (err)
                         goto err_out_mailbox;
  
@@ -811,7 +721,6 @@ int mthca_init_cq(struct mthca_dev *dev, int nent,
         cq_context->flags           = cpu_to_be32(MTHCA_CQ_STATUS_OK      |
                                                   MTHCA_CQ_STATE_DISARMED |
                                                   MTHCA_CQ_FLAG_TR);
-       cq_context->start           = cpu_to_be64(0);
         cq_context->logsize_usrpage = cpu_to_be32((ffs(nent) - 1) << 24);
         if (ctx)
                 cq_context->logsize_usrpage |= cpu_to_be32(ctx->uar.index);
@@ -857,10 +766,8 @@ int mthca_init_cq(struct mthca_dev *dev, int nent,
         return 0;
  
  err_out_free_mr:
-       if (cq->is_kernel) {
-               mthca_free_mr(dev, &cq->mr);
+       if (cq->is_kernel)
                 mthca_free_cq_buf(dev, cq);
-       }
  
  err_out_mailbox:
         mthca_free_mailbox(dev, mailbox);
@@ -904,7 +811,7 @@ void mthca_free_cq(struct mthca_dev *dev,
                 mthca_warn(dev, "HW2SW_CQ returned status 0x%02x\n", status);
  
         if (0) {
-               u32 *ctx = mailbox->buf;
+               __be32 *ctx = mailbox->buf;
                 int j;
  
                 printk(KERN_ERR "context for CQN %x (cons index %x, next sw %d)\n",
@@ -928,7 +835,6 @@ void mthca_free_cq(struct mthca_dev *dev,
         wait_event(cq->wait, !atomic_read(&cq->refcount));
  
         if (cq->is_kernel) {
-               mthca_free_mr(dev, &cq->mr);
                 mthca_free_cq_buf(dev, cq);
                 if (mthca_is_memfree(dev)) {
                         mthca_free_db(dev, MTHCA_DB_TYPE_CQ_ARM,    cq->arm_db_index);
diff --git a/drivers/infiniband/hw/mthca/mthca_dev.h b/drivers/infiniband/hw/mthca/mthca_dev.h

index 5ecdd2eeeb0f0c18ac3e96e8c8b140acaee895ec..7bff5a8425f4e5c5fbd7eb634a78b13eb8a3b88f 100644 (file)
--- a/drivers/infiniband/hw/mthca/mthca_dev.h
+++ b/drivers/infiniband/hw/mthca/mthca_dev.h
@@ -2,6 +2,8 @@
   * Copyright (c) 2004, 2005 Topspin Communications.  All rights reserved.
   * Copyright (c) 2005 Sun Microsystems, Inc. All rights reserved.
   * Copyright (c) 2005 Cisco Systems.  All rights reserved.
+ * Copyright (c) 2005 Mellanox Technologies. All rights reserved.
+ * Copyright (c) 2004 Voltaire, Inc. All rights reserved.
   *
   * This software is available to you under a choice of one of two
   * licenses.  You may choose to be licensed under the terms of the GNU
@@ -66,6 +68,10 @@ enum {
         MTHCA_MAX_PORTS = 2
  };
  
+enum {
+       MTHCA_BOARD_ID_LEN = 64
+};
+
  enum {
         MTHCA_EQ_CONTEXT_SIZE =  0x40,
         MTHCA_CQ_CONTEXT_SIZE =  0x40,
@@ -142,6 +148,7 @@ struct mthca_limits {
         int      reserved_mcgs;
         int      num_pds;
         int      reserved_pds;
+       u8       port_width_cap;
  };
  
  struct mthca_alloc {
@@ -211,6 +218,13 @@ struct mthca_cq_table {
         struct mthca_icm_table *table;
  };
  
+struct mthca_srq_table {
+       struct mthca_alloc      alloc;
+       spinlock_t              lock;
+       struct mthca_array      srq;
+       struct mthca_icm_table *table;
+};
+
  struct mthca_qp_table {
         struct mthca_alloc      alloc;
         u32                     rdb_base;
@@ -246,6 +260,7 @@ struct mthca_dev {
         unsigned long    device_cap_flags;
  
         u32              rev_id;
+       char             board_id[MTHCA_BOARD_ID_LEN];
  
         /* firmware info */
         u64              fw_ver;
@@ -291,6 +306,7 @@ struct mthca_dev {
         struct mthca_mr_table  mr_table;
         struct mthca_eq_table  eq_table;
         struct mthca_cq_table  cq_table;
+       struct mthca_srq_table srq_table;
         struct mthca_qp_table  qp_table;
         struct mthca_av_table  av_table;
         struct mthca_mcg_table mcg_table;
@@ -331,14 +347,13 @@ extern void __buggy_use_of_MTHCA_PUT(void);
  
  #define MTHCA_PUT(dest, source, offset)                               \
         do {                                                          \
-               __typeof__(source) *__p =                             \
-                       (__typeof__(source) *) ((char *) (dest) + (offset)); \
+               void *__d = ((char *) (dest) + (offset));             \
                 switch (sizeof(source)) {                             \
-                       case 1: *__p = (source);            break;    \
-                       case 2: *__p = cpu_to_be16(source); break;    \
-                       case 4: *__p = cpu_to_be32(source); break;    \
-                       case 8: *__p = cpu_to_be64(source); break;    \
-                       default: __buggy_use_of_MTHCA_PUT();          \
+               case 1: *(u8 *) __d = (source);                break; \
+               case 2: *(__be16 *) __d = cpu_to_be16(source); break; \
+               case 4: *(__be32 *) __d = cpu_to_be32(source); break; \
+               case 8: *(__be64 *) __d = cpu_to_be64(source); break; \
+               default: __buggy_use_of_MTHCA_PUT();                  \
                 }                                                     \
         } while (0)
  
@@ -354,12 +369,18 @@ int mthca_array_set(struct mthca_array *array, int index, void *value);
  void mthca_array_clear(struct mthca_array *array, int index);
  int mthca_array_init(struct mthca_array *array, int nent);
  void mthca_array_cleanup(struct mthca_array *array, int nent);
+int mthca_buf_alloc(struct mthca_dev *dev, int size, int max_direct,
+                   union mthca_buf *buf, int *is_direct, struct mthca_pd *pd,
+                   int hca_write, struct mthca_mr *mr);
+void mthca_buf_free(struct mthca_dev *dev, int size, union mthca_buf *buf,
+                   int is_direct, struct mthca_mr *mr);
  
  int mthca_init_uar_table(struct mthca_dev *dev);
  int mthca_init_pd_table(struct mthca_dev *dev);
  int mthca_init_mr_table(struct mthca_dev *dev);
  int mthca_init_eq_table(struct mthca_dev *dev);
  int mthca_init_cq_table(struct mthca_dev *dev);
+int mthca_init_srq_table(struct mthca_dev *dev);
  int mthca_init_qp_table(struct mthca_dev *dev);
  int mthca_init_av_table(struct mthca_dev *dev);
  int mthca_init_mcg_table(struct mthca_dev *dev);
@@ -369,6 +390,7 @@ void mthca_cleanup_pd_table(struct mthca_dev *dev);
  void mthca_cleanup_mr_table(struct mthca_dev *dev);
  void mthca_cleanup_eq_table(struct mthca_dev *dev);
  void mthca_cleanup_cq_table(struct mthca_dev *dev);
+void mthca_cleanup_srq_table(struct mthca_dev *dev);
  void mthca_cleanup_qp_table(struct mthca_dev *dev);
  void mthca_cleanup_av_table(struct mthca_dev *dev);
  void mthca_cleanup_mcg_table(struct mthca_dev *dev);
@@ -419,7 +441,19 @@ int mthca_init_cq(struct mthca_dev *dev, int nent,
  void mthca_free_cq(struct mthca_dev *dev,
                    struct mthca_cq *cq);
  void mthca_cq_event(struct mthca_dev *dev, u32 cqn);
-void mthca_cq_clean(struct mthca_dev *dev, u32 cqn, u32 qpn);
+void mthca_cq_clean(struct mthca_dev *dev, u32 cqn, u32 qpn,
+                   struct mthca_srq *srq);
+
+int mthca_alloc_srq(struct mthca_dev *dev, struct mthca_pd *pd,
+                   struct ib_srq_attr *attr, struct mthca_srq *srq);
+void mthca_free_srq(struct mthca_dev *dev, struct mthca_srq *srq);
+void mthca_srq_event(struct mthca_dev *dev, u32 srqn,
+                    enum ib_event_type event_type);
+void mthca_free_srq_wqe(struct mthca_srq *srq, u32 wqe_addr);
+int mthca_tavor_post_srq_recv(struct ib_srq *srq, struct ib_recv_wr *wr,
+                             struct ib_recv_wr **bad_wr);
+int mthca_arbel_post_srq_recv(struct ib_srq *srq, struct ib_recv_wr *wr,
+                             struct ib_recv_wr **bad_wr);
  
  void mthca_qp_event(struct mthca_dev *dev, u32 qpn,
                     enum ib_event_type event_type);
@@ -433,7 +467,7 @@ int mthca_arbel_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
  int mthca_arbel_post_receive(struct ib_qp *ibqp, struct ib_recv_wr *wr,
                              struct ib_recv_wr **bad_wr);
  int mthca_free_err_wqe(struct mthca_dev *dev, struct mthca_qp *qp, int is_send,
-                      int index, int *dbd, u32 *new_wqe);
+                      int index, int *dbd, __be32 *new_wqe);
  int mthca_alloc_qp(struct mthca_dev *dev,
                    struct mthca_pd *pd,
                    struct mthca_cq *send_cq,
diff --git a/drivers/infiniband/hw/mthca/mthca_doorbell.h b/drivers/infiniband/hw/mthca/mthca_doorbell.h

index 535fad7710fb7129e7d9b2cea96c981cc1d9436e..dd9a44d170c9e951e60517b6a3c0c77b03163b04 100644 (file)
--- a/drivers/infiniband/hw/mthca/mthca_doorbell.h
+++ b/drivers/infiniband/hw/mthca/mthca_doorbell.h
@@ -1,6 +1,7 @@
  /*
   * Copyright (c) 2004 Topspin Communications.  All rights reserved.
   * Copyright (c) 2005 Sun Microsystems, Inc. All rights reserved.
+ * Copyright (c) 2005 Mellanox Technologies. All rights reserved.
   *
   * This software is available to you under a choice of one of two
   * licenses.  You may choose to be licensed under the terms of the GNU
@@ -57,13 +58,13 @@ static inline void mthca_write64_raw(__be64 val, void __iomem *dest)
         __raw_writeq((__force u64) val, dest);
  }
  
-static inline void mthca_write64(u32 val[2], void __iomem *dest,
+static inline void mthca_write64(__be32 val[2], void __iomem *dest,
                                  spinlock_t *doorbell_lock)
  {
         __raw_writeq(*(u64 *) val, dest);
  }
  
-static inline void mthca_write_db_rec(u32 val[2], u32 *db)
+static inline void mthca_write_db_rec(__be32 val[2], __be32 *db)
  {
         *(u64 *) db = *(u64 *) val;
  }
@@ -86,18 +87,18 @@ static inline void mthca_write64_raw(__be64 val, void __iomem *dest)
         __raw_writel(((__force u32 *) &val)[1], dest + 4);
  }
  
-static inline void mthca_write64(u32 val[2], void __iomem *dest,
+static inline void mthca_write64(__be32 val[2], void __iomem *dest,
                                  spinlock_t *doorbell_lock)
  {
         unsigned long flags;
  
         spin_lock_irqsave(doorbell_lock, flags);
-       __raw_writel(val[0], dest);
-       __raw_writel(val[1], dest + 4);
+       __raw_writel((__force u32) val[0], dest);
+       __raw_writel((__force u32) val[1], dest + 4);
         spin_unlock_irqrestore(doorbell_lock, flags);
  }
  
-static inline void mthca_write_db_rec(u32 val[2], u32 *db)
+static inline void mthca_write_db_rec(__be32 val[2], __be32 *db)
  {
         db[0] = val[0];
         wmb();
diff --git a/drivers/infiniband/hw/mthca/mthca_eq.c b/drivers/infiniband/hw/mthca/mthca_eq.c

index cbcf2b4722e4598e7f520add89dbb6868329fde7..18f0981eb0c15e977c587ccf052d590f51f1ed85 100644 (file)
--- a/drivers/infiniband/hw/mthca/mthca_eq.c
+++ b/drivers/infiniband/hw/mthca/mthca_eq.c
@@ -1,5 +1,6 @@
  /*
   * Copyright (c) 2004, 2005 Topspin Communications.  All rights reserved.
+ * Copyright (c) 2005 Mellanox Technologies. All rights reserved.
   *
   * This software is available to you under a choice of one of two
   * licenses.  You may choose to be licensed under the terms of the GNU
@@ -51,18 +52,18 @@ enum {
   * Must be packed because start is 64 bits but only aligned to 32 bits.
   */
  struct mthca_eq_context {
-       u32 flags;
-       u64 start;
-       u32 logsize_usrpage;
-       u32 tavor_pd;           /* reserved for Arbel */
-       u8  reserved1[3];
-       u8  intr;
-       u32 arbel_pd;           /* lost_count for Tavor */
-       u32 lkey;
-       u32 reserved2[2];
-       u32 consumer_index;
-       u32 producer_index;
-       u32 reserved3[4];
+       __be32 flags;
+       __be64 start;
+       __be32 logsize_usrpage;
+       __be32 tavor_pd;        /* reserved for Arbel */
+       u8     reserved1[3];
+       u8     intr;
+       __be32 arbel_pd;        /* lost_count for Tavor */
+       __be32 lkey;
+       u32    reserved2[2];
+       __be32 consumer_index;
+       __be32 producer_index;
+       u32    reserved3[4];
  } __attribute__((packed));
  
  #define MTHCA_EQ_STATUS_OK          ( 0 << 28)
@@ -127,28 +128,28 @@ struct mthca_eqe {
         union {
                 u32 raw[6];
                 struct {
-                       u32 cqn;
+                       __be32 cqn;
                 } __attribute__((packed)) comp;
                 struct {
-                       u16 reserved1;
-                       u16 token;
-                       u32 reserved2;
-                       u8  reserved3[3];
-                       u8  status;
-                       u64 out_param;
+                       u16    reserved1;
+                       __be16 token;
+                       u32    reserved2;
+                       u8     reserved3[3];
+                       u8     status;
+                       __be64 out_param;
                 } __attribute__((packed)) cmd;
                 struct {
-                       u32 qpn;
+                       __be32 qpn;
                 } __attribute__((packed)) qp;
                 struct {
-                       u32 cqn;
-                       u32 reserved1;
-                       u8  reserved2[3];
-                       u8  syndrome;
+                       __be32 cqn;
+                       u32    reserved1;
+                       u8     reserved2[3];
+                       u8     syndrome;
                 } __attribute__((packed)) cq_err;
                 struct {
-                       u32 reserved1[2];
-                       u32 port;
+                       u32    reserved1[2];
+                       __be32 port;
                 } __attribute__((packed)) port_change;
         } event;
         u8 reserved3[3];
@@ -167,7 +168,7 @@ static inline u64 async_mask(struct mthca_dev *dev)
  
  static inline void tavor_set_eq_ci(struct mthca_dev *dev, struct mthca_eq *eq, u32 ci)
  {
-       u32 doorbell[2];
+       __be32 doorbell[2];
  
         doorbell[0] = cpu_to_be32(MTHCA_EQ_DB_SET_CI | eq->eqn);
         doorbell[1] = cpu_to_be32(ci & (eq->nent - 1));
@@ -190,8 +191,8 @@ static inline void arbel_set_eq_ci(struct mthca_dev *dev, struct mthca_eq *eq, u
  {
         /* See comment in tavor_set_eq_ci() above. */
         wmb();
-       __raw_writel(cpu_to_be32(ci), dev->eq_regs.arbel.eq_set_ci_base +
-                    eq->eqn * 8);
+       __raw_writel((__force u32) cpu_to_be32(ci),
+                    dev->eq_regs.arbel.eq_set_ci_base + eq->eqn * 8);
         /* We still want ordering, just not swabbing, so add a barrier */
         mb();
  }
@@ -206,7 +207,7 @@ static inline void set_eq_ci(struct mthca_dev *dev, struct mthca_eq *eq, u32 ci)
  
  static inline void tavor_eq_req_not(struct mthca_dev *dev, int eqn)
  {
-       u32 doorbell[2];
+       __be32 doorbell[2];
  
         doorbell[0] = cpu_to_be32(MTHCA_EQ_DB_REQ_NOT | eqn);
         doorbell[1] = 0;
@@ -224,7 +225,7 @@ static inline void arbel_eq_req_not(struct mthca_dev *dev, u32 eqn_mask)
  static inline void disarm_cq(struct mthca_dev *dev, int eqn, int cqn)
  {
         if (!mthca_is_memfree(dev)) {
-               u32 doorbell[2];
+               __be32 doorbell[2];
  
                 doorbell[0] = cpu_to_be32(MTHCA_EQ_DB_DISARM_CQ | eqn);
                 doorbell[1] = cpu_to_be32(cqn);
diff --git a/drivers/infiniband/hw/mthca/mthca_mad.c b/drivers/infiniband/hw/mthca/mthca_mad.c

index 7df2236420155eaa687d22122b98b2c9fd46cc4e..9804174f7f3c5b9594f337c4dcdf16a4554deef5 100644 (file)
--- a/drivers/infiniband/hw/mthca/mthca_mad.c
+++ b/drivers/infiniband/hw/mthca/mthca_mad.c
@@ -1,5 +1,7 @@
  /*
   * Copyright (c) 2004 Topspin Communications.  All rights reserved.
+ * Copyright (c) 2005 Mellanox Technologies. All rights reserved.
+ * Copyright (c) 2004 Voltaire, Inc. All rights reserved.
   *
   * This software is available to you under a choice of one of two
   * licenses.  You may choose to be licensed under the terms of the GNU
@@ -32,9 +34,9 @@
   * $Id: mthca_mad.c 1349 2004-12-16 21:09:43Z roland $
   */
  
-#include <ib_verbs.h>
-#include <ib_mad.h>
-#include <ib_smi.h>
+#include <rdma/ib_verbs.h>
+#include <rdma/ib_mad.h>
+#include <rdma/ib_smi.h>
  
  #include "mthca_dev.h"
  #include "mthca_cmd.h"
@@ -192,7 +194,7 @@ int mthca_process_mad(struct ib_device *ibdev,
  {
         int err;
         u8 status;
-       u16 slid = in_wc ? in_wc->slid : IB_LID_PERMISSIVE;
+       u16 slid = in_wc ? in_wc->slid : be16_to_cpu(IB_LID_PERMISSIVE);
  
         /* Forward locally generated traps to the SM */
         if (in_mad->mad_hdr.method == IB_MGMT_METHOD_TRAP &&
diff --git a/drivers/infiniband/hw/mthca/mthca_main.c b/drivers/infiniband/hw/mthca/mthca_main.c

index 2ef916859e1752427864a973df28ac27f8c006b9..3241d6c9dc11d2f2a959224f22cd58070449d0b5 100644 (file)
--- a/drivers/infiniband/hw/mthca/mthca_main.c
+++ b/drivers/infiniband/hw/mthca/mthca_main.c
@@ -1,6 +1,7 @@
  /*
   * Copyright (c) 2004, 2005 Topspin Communications.  All rights reserved.
   * Copyright (c) 2005 Sun Microsystems, Inc. All rights reserved.
+ * Copyright (c) 2005 Mellanox Technologies. All rights reserved.
   *
   * This software is available to you under a choice of one of two
   * licenses.  You may choose to be licensed under the terms of the GNU
@@ -34,7 +35,6 @@
   */
  
  #include <linux/config.h>
-#include <linux/version.h>
  #include <linux/module.h>
  #include <linux/init.h>
  #include <linux/errno.h>
@@ -171,6 +171,7 @@ static int __devinit mthca_dev_lim(struct mthca_dev *mdev, struct mthca_dev_lim
         mdev->limits.reserved_mrws      = dev_lim->reserved_mrws;
         mdev->limits.reserved_uars      = dev_lim->reserved_uars;
         mdev->limits.reserved_pds       = dev_lim->reserved_pds;
+       mdev->limits.port_width_cap     = dev_lim->max_port_width;
  
         /* IB_DEVICE_RESIZE_MAX_WR not supported by driver.
            May be doable since hardware supports it for SRQ.
@@ -212,7 +213,6 @@ static int __devinit mthca_init_tavor(struct mthca_dev *mdev)
         struct mthca_dev_lim        dev_lim;
         struct mthca_profile        profile;
         struct mthca_init_hca_param init_hca;
-       struct mthca_adapter        adapter;
  
         err = mthca_SYS_EN(mdev, &status);
         if (err) {
@@ -253,6 +253,8 @@ static int __devinit mthca_init_tavor(struct mthca_dev *mdev)
         profile = default_profile;
         profile.num_uar   = dev_lim.uar_size / PAGE_SIZE;
         profile.uarc_size = 0;
+       if (mdev->mthca_flags & MTHCA_FLAG_SRQ)
+               profile.num_srq = dev_lim.max_srqs;
  
         err = mthca_make_profile(mdev, &profile, &dev_lim, &init_hca);
         if (err < 0)
@@ -270,26 +272,8 @@ static int __devinit mthca_init_tavor(struct mthca_dev *mdev)
                 goto err_disable;
         }
  
-       err = mthca_QUERY_ADAPTER(mdev, &adapter, &status);
-       if (err) {
-               mthca_err(mdev, "QUERY_ADAPTER command failed, aborting.\n");
-               goto err_close;
-       }
-       if (status) {
-               mthca_err(mdev, "QUERY_ADAPTER returned status 0x%02x, "
-                         "aborting.\n", status);
-               err = -EINVAL;
-               goto err_close;
-       }
-
-       mdev->eq_table.inta_pin = adapter.inta_pin;
-       mdev->rev_id            = adapter.revision_id;
-
         return 0;
  
-err_close:
-       mthca_CLOSE_HCA(mdev, 0, &status);
-
  err_disable:
         mthca_SYS_DIS(mdev, &status);
  
@@ -442,15 +426,29 @@ static int __devinit mthca_init_icm(struct mthca_dev *mdev,
         }
  
         mdev->cq_table.table = mthca_alloc_icm_table(mdev, init_hca->cqc_base,
-                                                    dev_lim->cqc_entry_sz,
-                                                    mdev->limits.num_cqs,
-                                                    mdev->limits.reserved_cqs, 0);
+                                                   dev_lim->cqc_entry_sz,
+                                                   mdev->limits.num_cqs,
+                                                   mdev->limits.reserved_cqs, 0);
         if (!mdev->cq_table.table) {
                 mthca_err(mdev, "Failed to map CQ context memory, aborting.\n");
                 err = -ENOMEM;
                 goto err_unmap_rdb;
         }
  
+       if (mdev->mthca_flags & MTHCA_FLAG_SRQ) {
+               mdev->srq_table.table =
+                       mthca_alloc_icm_table(mdev, init_hca->srqc_base,
+                                             dev_lim->srq_entry_sz,
+                                             mdev->limits.num_srqs,
+                                             mdev->limits.reserved_srqs, 0);
+               if (!mdev->srq_table.table) {
+                       mthca_err(mdev, "Failed to map SRQ context memory, "
+                                 "aborting.\n");
+                       err = -ENOMEM;
+                       goto err_unmap_cq;
+               }
+       }
+
         /*
          * It's not strictly required, but for simplicity just map the
          * whole multicast group table now.  The table isn't very big
@@ -466,11 +464,15 @@ static int __devinit mthca_init_icm(struct mthca_dev *mdev,
         if (!mdev->mcg_table.table) {
                 mthca_err(mdev, "Failed to map MCG context memory, aborting.\n");
                 err = -ENOMEM;
-               goto err_unmap_cq;
+               goto err_unmap_srq;
         }
  
         return 0;
  
+err_unmap_srq:
+       if (mdev->mthca_flags & MTHCA_FLAG_SRQ)
+               mthca_free_icm_table(mdev, mdev->srq_table.table);
+
  err_unmap_cq:
         mthca_free_icm_table(mdev, mdev->cq_table.table);
  
@@ -506,7 +508,6 @@ static int __devinit mthca_init_arbel(struct mthca_dev *mdev)
         struct mthca_dev_lim        dev_lim;
         struct mthca_profile        profile;
         struct mthca_init_hca_param init_hca;
-       struct mthca_adapter        adapter;
         u64 icm_size;
         u8 status;
         int err;
@@ -551,6 +552,8 @@ static int __devinit mthca_init_arbel(struct mthca_dev *mdev)
         profile = default_profile;
         profile.num_uar  = dev_lim.uar_size / PAGE_SIZE;
         profile.num_udav = 0;
+       if (mdev->mthca_flags & MTHCA_FLAG_SRQ)
+               profile.num_srq = dev_lim.max_srqs;
  
         icm_size = mthca_make_profile(mdev, &profile, &dev_lim, &init_hca);
         if ((int) icm_size < 0) {
@@ -574,24 +577,11 @@ static int __devinit mthca_init_arbel(struct mthca_dev *mdev)
                 goto err_free_icm;
         }
  
-       err = mthca_QUERY_ADAPTER(mdev, &adapter, &status);
-       if (err) {
-               mthca_err(mdev, "QUERY_ADAPTER command failed, aborting.\n");
-               goto err_free_icm;
-       }
-       if (status) {
-               mthca_err(mdev, "QUERY_ADAPTER returned status 0x%02x, "
-                         "aborting.\n", status);
-               err = -EINVAL;
-               goto err_free_icm;
-       }
-
-       mdev->eq_table.inta_pin = adapter.inta_pin;
-       mdev->rev_id            = adapter.revision_id;
-
         return 0;
  
  err_free_icm:
+       if (mdev->mthca_flags & MTHCA_FLAG_SRQ)
+               mthca_free_icm_table(mdev, mdev->srq_table.table);
         mthca_free_icm_table(mdev, mdev->cq_table.table);
         mthca_free_icm_table(mdev, mdev->qp_table.rdb_table);
         mthca_free_icm_table(mdev, mdev->qp_table.eqp_table);
@@ -614,12 +604,70 @@ err_disable:
         return err;
  }
  
+static void mthca_close_hca(struct mthca_dev *mdev)
+{
+       u8 status;
+
+       mthca_CLOSE_HCA(mdev, 0, &status);
+
+       if (mthca_is_memfree(mdev)) {
+               if (mdev->mthca_flags & MTHCA_FLAG_SRQ)
+                       mthca_free_icm_table(mdev, mdev->srq_table.table);
+               mthca_free_icm_table(mdev, mdev->cq_table.table);
+               mthca_free_icm_table(mdev, mdev->qp_table.rdb_table);
+               mthca_free_icm_table(mdev, mdev->qp_table.eqp_table);
+               mthca_free_icm_table(mdev, mdev->qp_table.qp_table);
+               mthca_free_icm_table(mdev, mdev->mr_table.mpt_table);
+               mthca_free_icm_table(mdev, mdev->mr_table.mtt_table);
+               mthca_unmap_eq_icm(mdev);
+
+               mthca_UNMAP_ICM_AUX(mdev, &status);
+               mthca_free_icm(mdev, mdev->fw.arbel.aux_icm);
+
+               mthca_UNMAP_FA(mdev, &status);
+               mthca_free_icm(mdev, mdev->fw.arbel.fw_icm);
+
+               if (!(mdev->mthca_flags & MTHCA_FLAG_NO_LAM))
+                       mthca_DISABLE_LAM(mdev, &status);
+       } else
+               mthca_SYS_DIS(mdev, &status);
+}
+
  static int __devinit mthca_init_hca(struct mthca_dev *mdev)
  {
+       u8 status;
+       int err;
+       struct mthca_adapter adapter;
+
         if (mthca_is_memfree(mdev))
-               return mthca_init_arbel(mdev);
+               err = mthca_init_arbel(mdev);
         else
-               return mthca_init_tavor(mdev);
+               err = mthca_init_tavor(mdev);
+
+       if (err)
+               return err;
+
+       err = mthca_QUERY_ADAPTER(mdev, &adapter, &status);
+       if (err) {
+               mthca_err(mdev, "QUERY_ADAPTER command failed, aborting.\n");
+               goto err_close;
+       }
+       if (status) {
+               mthca_err(mdev, "QUERY_ADAPTER returned status 0x%02x, "
+                         "aborting.\n", status);
+               err = -EINVAL;
+               goto err_close;
+       }
+
+       mdev->eq_table.inta_pin = adapter.inta_pin;
+       mdev->rev_id            = adapter.revision_id;
+       memcpy(mdev->board_id, adapter.board_id, sizeof mdev->board_id);
+
+       return 0;
+
+err_close:
+       mthca_close_hca(mdev);
+       return err;
  }
  
  static int __devinit mthca_setup_hca(struct mthca_dev *dev)
@@ -709,11 +757,18 @@ static int __devinit mthca_setup_hca(struct mthca_dev *dev)
                 goto err_cmd_poll;
         }
  
+       err = mthca_init_srq_table(dev);
+       if (err) {
+               mthca_err(dev, "Failed to initialize "
+                         "shared receive queue table, aborting.\n");
+               goto err_cq_table_free;
+       }
+
         err = mthca_init_qp_table(dev);
         if (err) {
                 mthca_err(dev, "Failed to initialize "
                           "queue pair table, aborting.\n");
-               goto err_cq_table_free;
+               goto err_srq_table_free;
         }
  
         err = mthca_init_av_table(dev);
@@ -738,6 +793,9 @@ err_av_table_free:
  err_qp_table_free:
         mthca_cleanup_qp_table(dev);
  
+err_srq_table_free:
+       mthca_cleanup_srq_table(dev);
+
  err_cq_table_free:
         mthca_cleanup_cq_table(dev);
  
@@ -844,33 +902,6 @@ static int __devinit mthca_enable_msi_x(struct mthca_dev *mdev)
         return 0;
  }
  
-static void mthca_close_hca(struct mthca_dev *mdev)
-{
-       u8 status;
-
-       mthca_CLOSE_HCA(mdev, 0, &status);
-
-       if (mthca_is_memfree(mdev)) {
-               mthca_free_icm_table(mdev, mdev->cq_table.table);
-               mthca_free_icm_table(mdev, mdev->qp_table.rdb_table);
-               mthca_free_icm_table(mdev, mdev->qp_table.eqp_table);
-               mthca_free_icm_table(mdev, mdev->qp_table.qp_table);
-               mthca_free_icm_table(mdev, mdev->mr_table.mpt_table);
-               mthca_free_icm_table(mdev, mdev->mr_table.mtt_table);
-               mthca_unmap_eq_icm(mdev);
-
-               mthca_UNMAP_ICM_AUX(mdev, &status);
-               mthca_free_icm(mdev, mdev->fw.arbel.aux_icm);
-
-               mthca_UNMAP_FA(mdev, &status);
-               mthca_free_icm(mdev, mdev->fw.arbel.fw_icm);
-
-               if (!(mdev->mthca_flags & MTHCA_FLAG_NO_LAM))
-                       mthca_DISABLE_LAM(mdev, &status);
-       } else
-               mthca_SYS_DIS(mdev, &status);
-}
-
  /* Types of supported HCA */
  enum {
         TAVOR,                  /* MT23108                        */
@@ -887,9 +918,9 @@ static struct {
         int is_memfree;
         int is_pcie;
  } mthca_hca_table[] = {
-       [TAVOR]        = { .latest_fw = MTHCA_FW_VER(3, 3, 2), .is_memfree = 0, .is_pcie = 0 },
-       [ARBEL_COMPAT] = { .latest_fw = MTHCA_FW_VER(4, 6, 2), .is_memfree = 0, .is_pcie = 1 },
-       [ARBEL_NATIVE] = { .latest_fw = MTHCA_FW_VER(5, 0, 1), .is_memfree = 1, .is_pcie = 1 },
+       [TAVOR]        = { .latest_fw = MTHCA_FW_VER(3, 3, 3), .is_memfree = 0, .is_pcie = 0 },
+       [ARBEL_COMPAT] = { .latest_fw = MTHCA_FW_VER(4, 7, 0), .is_memfree = 0, .is_pcie = 1 },
+       [ARBEL_NATIVE] = { .latest_fw = MTHCA_FW_VER(5, 1, 0), .is_memfree = 1, .is_pcie = 1 },
         [SINAI]        = { .latest_fw = MTHCA_FW_VER(1, 0, 1), .is_memfree = 1, .is_pcie = 1 }
  };
  
@@ -1051,6 +1082,7 @@ err_cleanup:
         mthca_cleanup_mcg_table(mdev);
         mthca_cleanup_av_table(mdev);
         mthca_cleanup_qp_table(mdev);
+       mthca_cleanup_srq_table(mdev);
         mthca_cleanup_cq_table(mdev);
         mthca_cmd_use_polling(mdev);
         mthca_cleanup_eq_table(mdev);
@@ -1100,6 +1132,7 @@ static void __devexit mthca_remove_one(struct pci_dev *pdev)
                 mthca_cleanup_mcg_table(mdev);
                 mthca_cleanup_av_table(mdev);
                 mthca_cleanup_qp_table(mdev);
+               mthca_cleanup_srq_table(mdev);
                 mthca_cleanup_cq_table(mdev);
                 mthca_cmd_use_polling(mdev);
                 mthca_cleanup_eq_table(mdev);
diff --git a/drivers/infiniband/hw/mthca/mthca_mcg.c b/drivers/infiniband/hw/mthca/mthca_mcg.c

index 5be7d949dbf61b74e06dc2a73931a05cf27fd38c..a2707605f4c8ba3319644452f2deb48004e001e9 100644 (file)
--- a/drivers/infiniband/hw/mthca/mthca_mcg.c
+++ b/drivers/infiniband/hw/mthca/mthca_mcg.c
@@ -42,10 +42,10 @@ enum {
  };
  
  struct mthca_mgm {
-       u32 next_gid_index;
-       u32 reserved[3];
-       u8  gid[16];
-       u32 qp[MTHCA_QP_PER_MGM];
+       __be32 next_gid_index;
+       u32    reserved[3];
+       u8     gid[16];
+       __be32 qp[MTHCA_QP_PER_MGM];
  };
  
  static const u8 zero_gid[16];  /* automatically initialized to 0 */
@@ -94,10 +94,14 @@ static int find_mgm(struct mthca_dev *dev,
         if (0)
                 mthca_dbg(dev, "Hash for %04x:%04x:%04x:%04x:"
                           "%04x:%04x:%04x:%04x is %04x\n",
-                         be16_to_cpu(((u16 *) gid)[0]), be16_to_cpu(((u16 *) gid)[1]),
-                         be16_to_cpu(((u16 *) gid)[2]), be16_to_cpu(((u16 *) gid)[3]),
-                         be16_to_cpu(((u16 *) gid)[4]), be16_to_cpu(((u16 *) gid)[5]),
-                         be16_to_cpu(((u16 *) gid)[6]), be16_to_cpu(((u16 *) gid)[7]),
+                         be16_to_cpu(((__be16 *) gid)[0]),
+                         be16_to_cpu(((__be16 *) gid)[1]),
+                         be16_to_cpu(((__be16 *) gid)[2]),
+                         be16_to_cpu(((__be16 *) gid)[3]),
+                         be16_to_cpu(((__be16 *) gid)[4]),
+                         be16_to_cpu(((__be16 *) gid)[5]),
+                         be16_to_cpu(((__be16 *) gid)[6]),
+                         be16_to_cpu(((__be16 *) gid)[7]),
                           *hash);
  
         *index = *hash;
@@ -258,14 +262,14 @@ int mthca_multicast_detach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid)
         if (index == -1) {
                 mthca_err(dev, "MGID %04x:%04x:%04x:%04x:%04x:%04x:%04x:%04x "
                           "not found\n",
-                         be16_to_cpu(((u16 *) gid->raw)[0]),
-                         be16_to_cpu(((u16 *) gid->raw)[1]),
-                         be16_to_cpu(((u16 *) gid->raw)[2]),
-                         be16_to_cpu(((u16 *) gid->raw)[3]),
-                         be16_to_cpu(((u16 *) gid->raw)[4]),
-                         be16_to_cpu(((u16 *) gid->raw)[5]),
-                         be16_to_cpu(((u16 *) gid->raw)[6]),
-                         be16_to_cpu(((u16 *) gid->raw)[7]));
+                         be16_to_cpu(((__be16 *) gid->raw)[0]),
+                         be16_to_cpu(((__be16 *) gid->raw)[1]),
+                         be16_to_cpu(((__be16 *) gid->raw)[2]),
+                         be16_to_cpu(((__be16 *) gid->raw)[3]),
+                         be16_to_cpu(((__be16 *) gid->raw)[4]),
+                         be16_to_cpu(((__be16 *) gid->raw)[5]),
+                         be16_to_cpu(((__be16 *) gid->raw)[6]),
+                         be16_to_cpu(((__be16 *) gid->raw)[7]));
                 err = -EINVAL;
                 goto out;
         }
diff --git a/drivers/infiniband/hw/mthca/mthca_memfree.c b/drivers/infiniband/hw/mthca/mthca_memfree.c

index 2a864615035510fa33d515cc5e6e406a086e42f4..1827400f189be87a61c5c7c062147a978a16b133 100644 (file)
--- a/drivers/infiniband/hw/mthca/mthca_memfree.c
+++ b/drivers/infiniband/hw/mthca/mthca_memfree.c
@@ -1,6 +1,7 @@
  /*
   * Copyright (c) 2004, 2005 Topspin Communications.  All rights reserved.
   * Copyright (c) 2005 Cisco Systems.  All rights reserved.
+ * Copyright (c) 2005 Mellanox Technologies. All rights reserved.
   *
   * This software is available to you under a choice of one of two
   * licenses.  You may choose to be licensed under the terms of the GNU
@@ -285,6 +286,7 @@ struct mthca_icm_table *mthca_alloc_icm_table(struct mthca_dev *dev,
  {
         struct mthca_icm_table *table;
         int num_icm;
+       unsigned chunk_size;
         int i;
         u8 status;
  
@@ -305,7 +307,11 @@ struct mthca_icm_table *mthca_alloc_icm_table(struct mthca_dev *dev,
                 table->icm[i] = NULL;
  
         for (i = 0; i * MTHCA_TABLE_CHUNK_SIZE < reserved * obj_size; ++i) {
-               table->icm[i] = mthca_alloc_icm(dev, MTHCA_TABLE_CHUNK_SIZE >> PAGE_SHIFT,
+               chunk_size = MTHCA_TABLE_CHUNK_SIZE;
+               if ((i + 1) * MTHCA_TABLE_CHUNK_SIZE > nobj * obj_size)
+                       chunk_size = nobj * obj_size - i * MTHCA_TABLE_CHUNK_SIZE;
+
+               table->icm[i] = mthca_alloc_icm(dev, chunk_size >> PAGE_SHIFT,
                                                 (use_lowmem ? GFP_KERNEL : GFP_HIGHUSER) |
                                                 __GFP_NOWARN);
                 if (!table->icm[i])
@@ -481,7 +487,7 @@ void mthca_cleanup_user_db_tab(struct mthca_dev *dev, struct mthca_uar *uar,
         }
  }
  
-int mthca_alloc_db(struct mthca_dev *dev, int type, u32 qn, u32 **db)
+int mthca_alloc_db(struct mthca_dev *dev, int type, u32 qn, __be32 **db)
  {
         int group;
         int start, end, dir;
@@ -564,7 +570,7 @@ found:
  
         page->db_rec[j] = cpu_to_be64((qn << 8) | (type << 5));
  
-       *db = (u32 *) &page->db_rec[j];
+       *db = (__be32 *) &page->db_rec[j];
  
  out:
         up(&dev->db_tab->mutex);
diff --git a/drivers/infiniband/hw/mthca/mthca_memfree.h b/drivers/infiniband/hw/mthca/mthca_memfree.h

index 4761d844cb5f93bbacb9f960b34e70d8cad8402c..bafa51544aa39db8db19cf2626361550fec7ea89 100644 (file)
--- a/drivers/infiniband/hw/mthca/mthca_memfree.h
+++ b/drivers/infiniband/hw/mthca/mthca_memfree.h
@@ -1,6 +1,7 @@
  /*
   * Copyright (c) 2004, 2005 Topspin Communications.  All rights reserved.
   * Copyright (c) 2005 Cisco Systems.  All rights reserved.
+ * Copyright (c) 2005 Mellanox Technologies. All rights reserved.
   *
   * This software is available to you under a choice of one of two
   * licenses.  You may choose to be licensed under the terms of the GNU
@@ -137,7 +138,7 @@ enum {
  
  struct mthca_db_page {
         DECLARE_BITMAP(used, MTHCA_DB_REC_PER_PAGE);
-       u64       *db_rec;
+       __be64    *db_rec;
         dma_addr_t mapping;
  };
  
@@ -172,7 +173,7 @@ void mthca_cleanup_user_db_tab(struct mthca_dev *dev, struct mthca_uar *uar,
  
  int mthca_init_db_tab(struct mthca_dev *dev);
  void mthca_cleanup_db_tab(struct mthca_dev *dev);
-int mthca_alloc_db(struct mthca_dev *dev, int type, u32 qn, u32 **db);
+int mthca_alloc_db(struct mthca_dev *dev, int type, u32 qn, __be32 **db);
  void mthca_free_db(struct mthca_dev *dev, int type, int db_index);
  
  #endif /* MTHCA_MEMFREE_H */
diff --git a/drivers/infiniband/hw/mthca/mthca_mr.c b/drivers/infiniband/hw/mthca/mthca_mr.c

index cbe50feaf68073967c76b0e58569fde0de16e513..1f97a44477f5d8f31e37ebffb8e3c59bab779ddc 100644 (file)
--- a/drivers/infiniband/hw/mthca/mthca_mr.c
+++ b/drivers/infiniband/hw/mthca/mthca_mr.c
@@ -1,5 +1,6 @@
  /*
   * Copyright (c) 2004 Topspin Communications.  All rights reserved.
+ * Copyright (c) 2005 Mellanox Technologies. All rights reserved.
   *
   * This software is available to you under a choice of one of two
   * licenses.  You may choose to be licensed under the terms of the GNU
@@ -50,18 +51,18 @@ struct mthca_mtt {
   * Must be packed because mtt_seg is 64 bits but only aligned to 32 bits.
   */
  struct mthca_mpt_entry {
-       u32 flags;
-       u32 page_size;
-       u32 key;
-       u32 pd;
-       u64 start;
-       u64 length;
-       u32 lkey;
-       u32 window_count;
-       u32 window_count_limit;
-       u64 mtt_seg;
-       u32 mtt_sz;             /* Arbel only */
-       u32 reserved[2];
+       __be32 flags;
+       __be32 page_size;
+       __be32 key;
+       __be32 pd;
+       __be64 start;
+       __be64 length;
+       __be32 lkey;
+       __be32 window_count;
+       __be32 window_count_limit;
+       __be64 mtt_seg;
+       __be32 mtt_sz;          /* Arbel only */
+       u32    reserved[2];
  } __attribute__((packed));
  
  #define MTHCA_MPT_FLAG_SW_OWNS       (0xfUL << 28)
@@ -247,7 +248,7 @@ int mthca_write_mtt(struct mthca_dev *dev, struct mthca_mtt *mtt,
                     int start_index, u64 *buffer_list, int list_len)
  {
         struct mthca_mailbox *mailbox;
-       u64 *mtt_entry;
+       __be64 *mtt_entry;
         int err = 0;
         u8 status;
         int i;
@@ -389,7 +390,7 @@ int mthca_mr_alloc(struct mthca_dev *dev, u32 pd, int buffer_size_shift,
                 for (i = 0; i < sizeof (struct mthca_mpt_entry) / 4; ++i) {
                         if (i % 4 == 0)
                                 printk("[%02x] ", i * 4);
-                       printk(" %08x", be32_to_cpu(((u32 *) mpt_entry)[i]));
+                       printk(" %08x", be32_to_cpu(((__be32 *) mpt_entry)[i]));
                         if ((i + 1) % 4 == 0)
                                 printk("\n");
                 }
@@ -458,7 +459,7 @@ int mthca_mr_alloc_phys(struct mthca_dev *dev, u32 pd,
  static void mthca_free_region(struct mthca_dev *dev, u32 lkey)
  {
         mthca_table_put(dev, dev->mr_table.mpt_table,
-                       arbel_key_to_hw_index(lkey));
+                       key_to_hw_index(dev, lkey));
  
         mthca_free(&dev->mr_table.mpt_alloc, key_to_hw_index(dev, lkey));
  }
@@ -562,7 +563,7 @@ int mthca_fmr_alloc(struct mthca_dev *dev, u32 pd,
                 for (i = 0; i < sizeof (struct mthca_mpt_entry) / 4; ++i) {
                         if (i % 4 == 0)
                                 printk("[%02x] ", i * 4);
-                       printk(" %08x", be32_to_cpu(((u32 *) mpt_entry)[i]));
+                       printk(" %08x", be32_to_cpu(((__be32 *) mpt_entry)[i]));
                         if ((i + 1) % 4 == 0)
                                 printk("\n");
                 }
@@ -669,7 +670,7 @@ int mthca_tavor_map_phys_fmr(struct ib_fmr *ibfmr, u64 *page_list,
         mpt_entry.length = cpu_to_be64(list_len * (1ull << fmr->attr.page_size));
         mpt_entry.start  = cpu_to_be64(iova);
  
-       writel(mpt_entry.lkey, &fmr->mem.tavor.mpt->key);
+       __raw_writel((__force u32) mpt_entry.lkey, &fmr->mem.tavor.mpt->key);
         memcpy_toio(&fmr->mem.tavor.mpt->start, &mpt_entry.start,
                     offsetof(struct mthca_mpt_entry, window_count) -
                     offsetof(struct mthca_mpt_entry, start));
diff --git a/drivers/infiniband/hw/mthca/mthca_pd.c b/drivers/infiniband/hw/mthca/mthca_pd.c

index c2c899844e98b2ab36389c095a53aa24a6181885..3dbf06a6e6f4e097f84aceaa71a56104849ffeca 100644 (file)
--- a/drivers/infiniband/hw/mthca/mthca_pd.c
+++ b/drivers/infiniband/hw/mthca/mthca_pd.c
@@ -1,6 +1,7 @@
  /*
   * Copyright (c) 2004 Topspin Communications.  All rights reserved.
   * Copyright (c) 2005 Cisco Systems.  All rights reserved.
+ * Copyright (c) 2005 Mellanox Technologies. All rights reserved.
   *
   * This software is available to you under a choice of one of two
   * licenses.  You may choose to be licensed under the terms of the GNU
diff --git a/drivers/infiniband/hw/mthca/mthca_profile.c b/drivers/infiniband/hw/mthca/mthca_profile.c

index 4fedc32d5871a4dfa54a9b2de2dc031c41dd6771..0576056b34f463742b7979ac9791b5bde002d8d0 100644 (file)
--- a/drivers/infiniband/hw/mthca/mthca_profile.c
+++ b/drivers/infiniband/hw/mthca/mthca_profile.c
@@ -1,5 +1,6 @@
  /*
   * Copyright (c) 2004, 2005 Topspin Communications.  All rights reserved.
+ * Copyright (c) 2005 Mellanox Technologies. All rights reserved.
   *
   * This software is available to you under a choice of one of two
   * licenses.  You may choose to be licensed under the terms of the GNU
@@ -101,6 +102,7 @@ u64 mthca_make_profile(struct mthca_dev *dev,
         profile[MTHCA_RES_UARC].size = request->uarc_size;
  
         profile[MTHCA_RES_QP].num    = request->num_qp;
+       profile[MTHCA_RES_SRQ].num   = request->num_srq;
         profile[MTHCA_RES_EQP].num   = request->num_qp;
         profile[MTHCA_RES_RDB].num   = request->num_qp * request->rdb_per_qp;
         profile[MTHCA_RES_CQ].num    = request->num_cq;
diff --git a/drivers/infiniband/hw/mthca/mthca_profile.h b/drivers/infiniband/hw/mthca/mthca_profile.h

index 17aef3357661cbf2f35f0ed50e43c36ee99031f1..94641808f97f5cff4ef7cf93dd323d1ab4a5f584 100644 (file)
--- a/drivers/infiniband/hw/mthca/mthca_profile.h
+++ b/drivers/infiniband/hw/mthca/mthca_profile.h
@@ -1,5 +1,6 @@
  /*
   * Copyright (c) 2004, 2005 Topspin Communications.  All rights reserved.
+ * Copyright (c) 2005 Mellanox Technologies. All rights reserved.
   *
   * This software is available to you under a choice of one of two
   * licenses.  You may choose to be licensed under the terms of the GNU
@@ -41,6 +42,7 @@
  struct mthca_profile {
         int num_qp;
         int rdb_per_qp;
+       int num_srq;
         int num_cq;
         int num_mcg;
         int num_mpt;
diff --git a/drivers/infiniband/hw/mthca/mthca_provider.c b/drivers/infiniband/hw/mthca/mthca_provider.c

index 81919a7b4935d97983f00170a302e0c6e0995460..1c1c2e230871202fe4b57bcf26aa602e6b505586 100644 (file)
--- a/drivers/infiniband/hw/mthca/mthca_provider.c
+++ b/drivers/infiniband/hw/mthca/mthca_provider.c
@@ -2,6 +2,8 @@
   * Copyright (c) 2004, 2005 Topspin Communications.  All rights reserved.
   * Copyright (c) 2005 Sun Microsystems, Inc. All rights reserved.
   * Copyright (c) 2005 Cisco Systems. All rights reserved.
+ * Copyright (c) 2005 Mellanox Technologies. All rights reserved.
+ * Copyright (c) 2004 Voltaire, Inc. All rights reserved.
   *
   * This software is available to you under a choice of one of two
   * licenses.  You may choose to be licensed under the terms of the GNU
@@ -34,7 +36,7 @@
   * $Id: mthca_provider.c 1397 2004-12-28 05:09:00Z roland $
   */
  
-#include <ib_smi.h>
+#include <rdma/ib_smi.h>
  #include <linux/mm.h>
  
  #include "mthca_dev.h"
@@ -79,10 +81,10 @@ static int mthca_query_device(struct ib_device *ibdev,
         }
  
         props->device_cap_flags    = mdev->device_cap_flags;
-       props->vendor_id           = be32_to_cpup((u32 *) (out_mad->data + 36)) &
+       props->vendor_id           = be32_to_cpup((__be32 *) (out_mad->data + 36)) &
                 0xffffff;
-       props->vendor_part_id      = be16_to_cpup((u16 *) (out_mad->data + 30));
-       props->hw_ver              = be16_to_cpup((u16 *) (out_mad->data + 32));
+       props->vendor_part_id      = be16_to_cpup((__be16 *) (out_mad->data + 30));
+       props->hw_ver              = be16_to_cpup((__be16 *) (out_mad->data + 32));
         memcpy(&props->sys_image_guid, out_mad->data +  4, 8);
         memcpy(&props->node_guid,      out_mad->data + 12, 8);
  
@@ -118,6 +120,8 @@ static int mthca_query_port(struct ib_device *ibdev,
         if (!in_mad || !out_mad)
                 goto out;
  
+       memset(props, 0, sizeof *props);
+
         memset(in_mad, 0, sizeof *in_mad);
         in_mad->base_version       = 1;
         in_mad->mgmt_class         = IB_MGMT_CLASS_SUBN_LID_ROUTED;
@@ -136,16 +140,17 @@ static int mthca_query_port(struct ib_device *ibdev,
                 goto out;
         }
  
-       props->lid               = be16_to_cpup((u16 *) (out_mad->data + 16));
+       props->lid               = be16_to_cpup((__be16 *) (out_mad->data + 16));
         props->lmc               = out_mad->data[34] & 0x7;
-       props->sm_lid            = be16_to_cpup((u16 *) (out_mad->data + 18));
+       props->sm_lid            = be16_to_cpup((__be16 *) (out_mad->data + 18));
         props->sm_sl             = out_mad->data[36] & 0xf;
         props->state             = out_mad->data[32] & 0xf;
         props->phys_state        = out_mad->data[33] >> 4;
-       props->port_cap_flags    = be32_to_cpup((u32 *) (out_mad->data + 20));
+       props->port_cap_flags    = be32_to_cpup((__be32 *) (out_mad->data + 20));
         props->gid_tbl_len       = to_mdev(ibdev)->limits.gid_table_len;
+       props->max_msg_sz        = 0x80000000;
         props->pkey_tbl_len      = to_mdev(ibdev)->limits.pkey_table_len;
-       props->qkey_viol_cntr    = be16_to_cpup((u16 *) (out_mad->data + 48));
+       props->qkey_viol_cntr    = be16_to_cpup((__be16 *) (out_mad->data + 48));
         props->active_width      = out_mad->data[31] & 0xf;
         props->active_speed      = out_mad->data[35] >> 4;
  
@@ -221,7 +226,7 @@ static int mthca_query_pkey(struct ib_device *ibdev,
                 goto out;
         }
  
-       *pkey = be16_to_cpu(((u16 *) out_mad->data)[index % 32]);
+       *pkey = be16_to_cpu(((__be16 *) out_mad->data)[index % 32]);
  
   out:
         kfree(in_mad);
@@ -420,6 +425,77 @@ static int mthca_ah_destroy(struct ib_ah *ah)
         return 0;
  }
  
+static struct ib_srq *mthca_create_srq(struct ib_pd *pd,
+                                      struct ib_srq_init_attr *init_attr,
+                                      struct ib_udata *udata)
+{
+       struct mthca_create_srq ucmd;
+       struct mthca_ucontext *context = NULL;
+       struct mthca_srq *srq;
+       int err;
+
+       srq = kmalloc(sizeof *srq, GFP_KERNEL);
+       if (!srq)
+               return ERR_PTR(-ENOMEM);
+
+       if (pd->uobject) {
+               context = to_mucontext(pd->uobject->context);
+
+               if (ib_copy_from_udata(&ucmd, udata, sizeof ucmd))
+                       return ERR_PTR(-EFAULT);
+
+               err = mthca_map_user_db(to_mdev(pd->device), &context->uar,
+                                       context->db_tab, ucmd.db_index,
+                                       ucmd.db_page);
+
+               if (err)
+                       goto err_free;
+
+               srq->mr.ibmr.lkey = ucmd.lkey;
+               srq->db_index     = ucmd.db_index;
+       }
+
+       err = mthca_alloc_srq(to_mdev(pd->device), to_mpd(pd),
+                             &init_attr->attr, srq);
+
+       if (err && pd->uobject)
+               mthca_unmap_user_db(to_mdev(pd->device), &context->uar,
+                                   context->db_tab, ucmd.db_index);
+
+       if (err)
+               goto err_free;
+
+       if (context && ib_copy_to_udata(udata, &srq->srqn, sizeof (__u32))) {
+               mthca_free_srq(to_mdev(pd->device), srq);
+               err = -EFAULT;
+               goto err_free;
+       }
+
+       return &srq->ibsrq;
+
+err_free:
+       kfree(srq);
+
+       return ERR_PTR(err);
+}
+
+static int mthca_destroy_srq(struct ib_srq *srq)
+{
+       struct mthca_ucontext *context;
+
+       if (srq->uobject) {
+               context = to_mucontext(srq->uobject->context);
+
+               mthca_unmap_user_db(to_mdev(srq->device), &context->uar,
+                                   context->db_tab, to_msrq(srq)->db_index);
+       }
+
+       mthca_free_srq(to_mdev(srq->device), to_msrq(srq));
+       kfree(srq);
+
+       return 0;
+}
+
  static struct ib_qp *mthca_create_qp(struct ib_pd *pd,
                                      struct ib_qp_init_attr *init_attr,
                                      struct ib_udata *udata)
@@ -956,14 +1032,22 @@ static ssize_t show_hca(struct class_device *cdev, char *buf)
         }
  }
  
+static ssize_t show_board(struct class_device *cdev, char *buf)
+{
+       struct mthca_dev *dev = container_of(cdev, struct mthca_dev, ib_dev.class_dev);
+       return sprintf(buf, "%.*s\n", MTHCA_BOARD_ID_LEN, dev->board_id);
+}
+
  static CLASS_DEVICE_ATTR(hw_rev,   S_IRUGO, show_rev,    NULL);
  static CLASS_DEVICE_ATTR(fw_ver,   S_IRUGO, show_fw_ver, NULL);
  static CLASS_DEVICE_ATTR(hca_type, S_IRUGO, show_hca,    NULL);
+static CLASS_DEVICE_ATTR(board_id, S_IRUGO, show_board,  NULL);
  
  static struct class_device_attribute *mthca_class_attributes[] = {
         &class_device_attr_hw_rev,
         &class_device_attr_fw_ver,
-       &class_device_attr_hca_type
+       &class_device_attr_hca_type,
+       &class_device_attr_board_id
  };
  
  int mthca_register_device(struct mthca_dev *dev)
@@ -990,6 +1074,17 @@ int mthca_register_device(struct mthca_dev *dev)
         dev->ib_dev.dealloc_pd           = mthca_dealloc_pd;
         dev->ib_dev.create_ah            = mthca_ah_create;
         dev->ib_dev.destroy_ah           = mthca_ah_destroy;
+
+       if (dev->mthca_flags & MTHCA_FLAG_SRQ) {
+               dev->ib_dev.create_srq           = mthca_create_srq;
+               dev->ib_dev.destroy_srq          = mthca_destroy_srq;
+
+               if (mthca_is_memfree(dev))
+                       dev->ib_dev.post_srq_recv = mthca_arbel_post_srq_recv;
+               else
+                       dev->ib_dev.post_srq_recv = mthca_tavor_post_srq_recv;
+       }
+
         dev->ib_dev.create_qp            = mthca_create_qp;
         dev->ib_dev.modify_qp            = mthca_modify_qp;
         dev->ib_dev.destroy_qp           = mthca_destroy_qp;
diff --git a/drivers/infiniband/hw/mthca/mthca_provider.h b/drivers/infiniband/hw/mthca/mthca_provider.h

index 1d032791cc8b67a3ad217dc3a235ead72dab1f6b..bcd4b01a339cb1f7abae1d82f582b754df0cb4bc 100644 (file)
--- a/drivers/infiniband/hw/mthca/mthca_provider.h
+++ b/drivers/infiniband/hw/mthca/mthca_provider.h
@@ -1,6 +1,7 @@
  /*
   * Copyright (c) 2004 Topspin Communications.  All rights reserved.
   * Copyright (c) 2005 Cisco Systems.  All rights reserved.
+ * Copyright (c) 2005 Mellanox Technologies. All rights reserved.
   *
   * This software is available to you under a choice of one of two
   * licenses.  You may choose to be licensed under the terms of the GNU
@@ -36,8 +37,8 @@
  #ifndef MTHCA_PROVIDER_H
  #define MTHCA_PROVIDER_H
  
-#include <ib_verbs.h>
-#include <ib_pack.h>
+#include <rdma/ib_verbs.h>
+#include <rdma/ib_pack.h>
  
  #define MTHCA_MPT_FLAG_ATOMIC        (1 << 14)
  #define MTHCA_MPT_FLAG_REMOTE_WRITE  (1 << 13)
@@ -50,6 +51,11 @@ struct mthca_buf_list {
         DECLARE_PCI_UNMAP_ADDR(mapping)
  };
  
+union mthca_buf {
+       struct mthca_buf_list direct;
+       struct mthca_buf_list *page_list;
+};
+
  struct mthca_uar {
         unsigned long pfn;
         int           index;
@@ -181,19 +187,39 @@ struct mthca_cq {
  
         /* Next fields are Arbel only */
         int                    set_ci_db_index;
-       u32                   *set_ci_db;
+       __be32                *set_ci_db;
         int                    arm_db_index;
-       u32                   *arm_db;
+       __be32                *arm_db;
         int                    arm_sn;
  
-       union {
-               struct mthca_buf_list direct;
-               struct mthca_buf_list *page_list;
-       }                      queue;
+       union mthca_buf        queue;
         struct mthca_mr        mr;
         wait_queue_head_t      wait;
  };
  
+struct mthca_srq {
+       struct ib_srq           ibsrq;
+       spinlock_t              lock;
+       atomic_t                refcount;
+       int                     srqn;
+       int                     max;
+       int                     max_gs;
+       int                     wqe_shift;
+       int                     first_free;
+       int                     last_free;
+       u16                     counter;  /* Arbel only */
+       int                     db_index; /* Arbel only */
+       __be32                 *db;       /* Arbel only */
+       void                   *last;
+
+       int                     is_direct;
+       u64                    *wrid;
+       union mthca_buf         queue;
+       struct mthca_mr         mr;
+
+       wait_queue_head_t       wait;
+};
+
  struct mthca_wq {
         spinlock_t lock;
         int        max;
@@ -206,7 +232,7 @@ struct mthca_wq {
         int        wqe_shift;
  
         int        db_index;    /* Arbel only */
-       u32       *db;
+       __be32    *db;
  };
  
  struct mthca_qp {
@@ -227,10 +253,7 @@ struct mthca_qp {
         int                    send_wqe_offset;
  
         u64                   *wrid;
-       union {
-               struct mthca_buf_list direct;
-               struct mthca_buf_list *page_list;
-       }                      queue;
+       union mthca_buf        queue;
  
         wait_queue_head_t      wait;
  };
@@ -277,6 +300,11 @@ static inline struct mthca_cq *to_mcq(struct ib_cq *ibcq)
         return container_of(ibcq, struct mthca_cq, ibcq);
  }
  
+static inline struct mthca_srq *to_msrq(struct ib_srq *ibsrq)
+{
+       return container_of(ibsrq, struct mthca_srq, ibsrq);
+}
+
  static inline struct mthca_qp *to_mqp(struct ib_qp *ibqp)
  {
         return container_of(ibqp, struct mthca_qp, ibqp);
diff --git a/drivers/infiniband/hw/mthca/mthca_qp.c b/drivers/infiniband/hw/mthca/mthca_qp.c

index f7126b14d5aeac2ac06f7d91bd2c0b3523375ff9..0164b84d4ec643062892122cf6ebf567d92ec5f1 100644 (file)
--- a/drivers/infiniband/hw/mthca/mthca_qp.c
+++ b/drivers/infiniband/hw/mthca/mthca_qp.c
@@ -1,6 +1,8 @@
  /*
   * Copyright (c) 2004 Topspin Communications.  All rights reserved.
   * Copyright (c) 2005 Cisco Systems. All rights reserved.
+ * Copyright (c) 2005 Mellanox Technologies. All rights reserved.
+ * Copyright (c) 2004 Voltaire, Inc. All rights reserved. 
   *
   * This software is available to you under a choice of one of two
   * licenses.  You may choose to be licensed under the terms of the GNU
@@ -35,13 +37,14 @@
  
  #include <linux/init.h>
  
-#include <ib_verbs.h>
-#include <ib_cache.h>
-#include <ib_pack.h>
+#include <rdma/ib_verbs.h>
+#include <rdma/ib_cache.h>
+#include <rdma/ib_pack.h>
  
  #include "mthca_dev.h"
  #include "mthca_cmd.h"
  #include "mthca_memfree.h"
+#include "mthca_wqe.h"
  
  enum {
         MTHCA_MAX_DIRECT_QP_SIZE = 4 * PAGE_SIZE,
@@ -95,62 +98,62 @@ enum {
  };
  
  struct mthca_qp_path {
-       u32 port_pkey;
-       u8  rnr_retry;
-       u8  g_mylmc;
-       u16 rlid;
-       u8  ackto;
-       u8  mgid_index;
-       u8  static_rate;
-       u8  hop_limit;
-       u32 sl_tclass_flowlabel;
-       u8  rgid[16];
+       __be32 port_pkey;
+       u8     rnr_retry;
+       u8     g_mylmc;
+       __be16 rlid;
+       u8     ackto;
+       u8     mgid_index;
+       u8     static_rate;
+       u8     hop_limit;
+       __be32 sl_tclass_flowlabel;
+       u8     rgid[16];
  } __attribute__((packed));
  
  struct mthca_qp_context {
-       u32 flags;
-       u32 tavor_sched_queue;  /* Reserved on Arbel */
-       u8  mtu_msgmax;
-       u8  rq_size_stride;     /* Reserved on Tavor */
-       u8  sq_size_stride;     /* Reserved on Tavor */
-       u8  rlkey_arbel_sched_queue;    /* Reserved on Tavor */
-       u32 usr_page;
-       u32 local_qpn;
-       u32 remote_qpn;
-       u32 reserved1[2];
+       __be32 flags;
+       __be32 tavor_sched_queue; /* Reserved on Arbel */
+       u8     mtu_msgmax;
+       u8     rq_size_stride;  /* Reserved on Tavor */
+       u8     sq_size_stride;  /* Reserved on Tavor */
+       u8     rlkey_arbel_sched_queue; /* Reserved on Tavor */
+       __be32 usr_page;
+       __be32 local_qpn;
+       __be32 remote_qpn;
+       u32    reserved1[2];
         struct mthca_qp_path pri_path;
         struct mthca_qp_path alt_path;
-       u32 rdd;
-       u32 pd;
-       u32 wqe_base;
-       u32 wqe_lkey;
-       u32 params1;
-       u32 reserved2;
-       u32 next_send_psn;
-       u32 cqn_snd;
-       u32 snd_wqe_base_l;     /* Next send WQE on Tavor */
-       u32 snd_db_index;       /* (debugging only entries) */
-       u32 last_acked_psn;
-       u32 ssn;
-       u32 params2;
-       u32 rnr_nextrecvpsn;
-       u32 ra_buff_indx;
-       u32 cqn_rcv;
-       u32 rcv_wqe_base_l;     /* Next recv WQE on Tavor */
-       u32 rcv_db_index;       /* (debugging only entries) */
-       u32 qkey;
-       u32 srqn;
-       u32 rmsn;
-       u16 rq_wqe_counter;     /* reserved on Tavor */
-       u16 sq_wqe_counter;     /* reserved on Tavor */
-       u32 reserved3[18];
+       __be32 rdd;
+       __be32 pd;
+       __be32 wqe_base;
+       __be32 wqe_lkey;
+       __be32 params1;
+       __be32 reserved2;
+       __be32 next_send_psn;
+       __be32 cqn_snd;
+       __be32 snd_wqe_base_l;  /* Next send WQE on Tavor */
+       __be32 snd_db_index;    /* (debugging only entries) */
+       __be32 last_acked_psn;
+       __be32 ssn;
+       __be32 params2;
+       __be32 rnr_nextrecvpsn;
+       __be32 ra_buff_indx;
+       __be32 cqn_rcv;
+       __be32 rcv_wqe_base_l;  /* Next recv WQE on Tavor */
+       __be32 rcv_db_index;    /* (debugging only entries) */
+       __be32 qkey;
+       __be32 srqn;
+       __be32 rmsn;
+       __be16 rq_wqe_counter;  /* reserved on Tavor */
+       __be16 sq_wqe_counter;  /* reserved on Tavor */
+       u32    reserved3[18];
  } __attribute__((packed));
  
  struct mthca_qp_param {
-       u32 opt_param_mask;
-       u32 reserved1;
+       __be32 opt_param_mask;
+       u32    reserved1;
         struct mthca_qp_context context;
-       u32 reserved2[62];
+       u32    reserved2[62];
  } __attribute__((packed));
  
  enum {
@@ -173,80 +176,6 @@ enum {
         MTHCA_QP_OPTPAR_SCHED_QUEUE       = 1 << 16
  };
  
-enum {
-       MTHCA_NEXT_DBD       = 1 << 7,
-       MTHCA_NEXT_FENCE     = 1 << 6,
-       MTHCA_NEXT_CQ_UPDATE = 1 << 3,
-       MTHCA_NEXT_EVENT_GEN = 1 << 2,
-       MTHCA_NEXT_SOLICIT   = 1 << 1,
-
-       MTHCA_MLX_VL15       = 1 << 17,
-       MTHCA_MLX_SLR        = 1 << 16
-};
-
-enum {
-       MTHCA_INVAL_LKEY = 0x100
-};
-
-struct mthca_next_seg {
-       u32 nda_op;             /* [31:6] next WQE [4:0] next opcode */
-       u32 ee_nds;             /* [31:8] next EE  [7] DBD [6] F [5:0] next WQE size */
-       u32 flags;              /* [3] CQ [2] Event [1] Solicit */
-       u32 imm;                /* immediate data */
-};
-
-struct mthca_tavor_ud_seg {
-       u32 reserved1;
-       u32 lkey;
-       u64 av_addr;
-       u32 reserved2[4];
-       u32 dqpn;
-       u32 qkey;
-       u32 reserved3[2];
-};
-
-struct mthca_arbel_ud_seg {
-       u32 av[8];
-       u32 dqpn;
-       u32 qkey;
-       u32 reserved[2];
-};
-
-struct mthca_bind_seg {
-       u32 flags;              /* [31] Atomic [30] rem write [29] rem read */
-       u32 reserved;
-       u32 new_rkey;
-       u32 lkey;
-       u64 addr;
-       u64 length;
-};
-
-struct mthca_raddr_seg {
-       u64 raddr;
-       u32 rkey;
-       u32 reserved;
-};
-
-struct mthca_atomic_seg {
-       u64 swap_add;
-       u64 compare;
-};
-
-struct mthca_data_seg {
-       u32 byte_count;
-       u32 lkey;
-       u64 addr;
-};
-
-struct mthca_mlx_seg {
-       u32 nda_op;
-       u32 nds;
-       u32 flags;              /* [17] VL15 [16] SLR [14:12] static rate
-                                  [11:8] SL [3] C [2] E */
-       u16 rlid;
-       u16 vcrc;
-};
-
  static const u8 mthca_opcode[] = {
         [IB_WR_SEND]                 = MTHCA_OPCODE_SEND,
         [IB_WR_SEND_WITH_IMM]        = MTHCA_OPCODE_SEND_IMM,
@@ -573,12 +502,11 @@ static void init_port(struct mthca_dev *dev, int port)
  
         memset(&param, 0, sizeof param);
  
-       param.enable_1x = 1;
-       param.enable_4x = 1;
-       param.vl_cap    = dev->limits.vl_cap;
-       param.mtu_cap   = dev->limits.mtu_cap;
-       param.gid_cap   = dev->limits.gid_table_len;
-       param.pkey_cap  = dev->limits.pkey_table_len;
+       param.port_width = dev->limits.port_width_cap;
+       param.vl_cap     = dev->limits.vl_cap;
+       param.mtu_cap    = dev->limits.mtu_cap;
+       param.gid_cap    = dev->limits.gid_table_len;
+       param.pkey_cap   = dev->limits.pkey_table_len;
  
         err = mthca_INIT_IB(dev, &param, port, &status);
         if (err)
@@ -684,10 +612,13 @@ int mthca_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int attr_mask)
                 qp_context->mtu_msgmax = (attr->path_mtu << 5) | 31;
  
         if (mthca_is_memfree(dev)) {
-               qp_context->rq_size_stride =
-                       ((ffs(qp->rq.max) - 1) << 3) | (qp->rq.wqe_shift - 4);
-               qp_context->sq_size_stride =
-                       ((ffs(qp->sq.max) - 1) << 3) | (qp->sq.wqe_shift - 4);
+               if (qp->rq.max)
+                       qp_context->rq_size_stride = long_log2(qp->rq.max) << 3;
+               qp_context->rq_size_stride |= qp->rq.wqe_shift - 4;
+
+               if (qp->sq.max)
+                       qp_context->sq_size_stride = long_log2(qp->sq.max) << 3;
+               qp_context->sq_size_stride |= qp->sq.wqe_shift - 4;
         }
  
         /* leave arbel_sched_queue as 0 */
@@ -856,6 +787,9 @@ int mthca_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int attr_mask)
  
         qp_context->params2 |= cpu_to_be32(MTHCA_QP_BIT_RSC);
  
+       if (ibqp->srq)
+               qp_context->params2 |= cpu_to_be32(MTHCA_QP_BIT_RIC);
+
         if (attr_mask & IB_QP_MIN_RNR_TIMER) {
                 qp_context->rnr_nextrecvpsn |= cpu_to_be32(attr->min_rnr_timer << 24);
                 qp_param->opt_param_mask |= cpu_to_be32(MTHCA_QP_OPTPAR_RNR_TIMEOUT);
@@ -878,6 +812,10 @@ int mthca_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int attr_mask)
                 qp_param->opt_param_mask |= cpu_to_be32(MTHCA_QP_OPTPAR_Q_KEY);
         }
  
+       if (ibqp->srq)
+               qp_context->srqn = cpu_to_be32(1 << 24 |
+                                              to_msrq(ibqp->srq)->srqn);
+
         err = mthca_MODIFY_QP(dev, state_table[cur_state][new_state].trans,
                               qp->qpn, 0, mailbox, 0, &status);
         if (status) {
@@ -925,10 +863,6 @@ static int mthca_alloc_wqe_buf(struct mthca_dev *dev,
                                struct mthca_qp *qp)
  {
         int size;
-       int i;
-       int npages, shift;
-       dma_addr_t t;
-       u64 *dma_list = NULL;
         int err = -ENOMEM;
  
         size = sizeof (struct mthca_next_seg) +
@@ -978,116 +912,24 @@ static int mthca_alloc_wqe_buf(struct mthca_dev *dev,
         if (!qp->wrid)
                 goto err_out;
  
-       if (size <= MTHCA_MAX_DIRECT_QP_SIZE) {
-               qp->is_direct = 1;
-               npages = 1;
-               shift = get_order(size) + PAGE_SHIFT;
-
-               if (0)
-                       mthca_dbg(dev, "Creating direct QP of size %d (shift %d)\n",
-                                 size, shift);
-
-               qp->queue.direct.buf = dma_alloc_coherent(&dev->pdev->dev, size,
-                                                         &t, GFP_KERNEL);
-               if (!qp->queue.direct.buf)
-                       goto err_out;
-
-               pci_unmap_addr_set(&qp->queue.direct, mapping, t);
-
-               memset(qp->queue.direct.buf, 0, size);
-
-               while (t & ((1 << shift) - 1)) {
-                       --shift;
-                       npages *= 2;
-               }
-
-               dma_list = kmalloc(npages * sizeof *dma_list, GFP_KERNEL);
-               if (!dma_list)
-                       goto err_out_free;
-
-               for (i = 0; i < npages; ++i)
-                       dma_list[i] = t + i * (1 << shift);
-       } else {
-               qp->is_direct = 0;
-               npages = size / PAGE_SIZE;
-               shift = PAGE_SHIFT;
-
-               if (0)
-                       mthca_dbg(dev, "Creating indirect QP with %d pages\n", npages);
-
-               dma_list = kmalloc(npages * sizeof *dma_list, GFP_KERNEL);
-               if (!dma_list)
-                       goto err_out;
-
-               qp->queue.page_list = kmalloc(npages *
-                                             sizeof *qp->queue.page_list,
-                                             GFP_KERNEL);
-               if (!qp->queue.page_list)
-                       goto err_out;
-
-               for (i = 0; i < npages; ++i) {
-                       qp->queue.page_list[i].buf =
-                               dma_alloc_coherent(&dev->pdev->dev, PAGE_SIZE,
-                                                  &t, GFP_KERNEL);
-                       if (!qp->queue.page_list[i].buf)
-                               goto err_out_free;
-
-                       memset(qp->queue.page_list[i].buf, 0, PAGE_SIZE);
-
-                       pci_unmap_addr_set(&qp->queue.page_list[i], mapping, t);
-                       dma_list[i] = t;
-               }
-       }
-
-       err = mthca_mr_alloc_phys(dev, pd->pd_num, dma_list, shift,
-                                 npages, 0, size,
-                                 MTHCA_MPT_FLAG_LOCAL_READ,
-                                 &qp->mr);
+       err = mthca_buf_alloc(dev, size, MTHCA_MAX_DIRECT_QP_SIZE,
+                             &qp->queue, &qp->is_direct, pd, 0, &qp->mr);
         if (err)
-               goto err_out_free;
+               goto err_out;
  
-       kfree(dma_list);
         return 0;
  
- err_out_free:
-       if (qp->is_direct) {
-               dma_free_coherent(&dev->pdev->dev, size, qp->queue.direct.buf,
-                                 pci_unmap_addr(&qp->queue.direct, mapping));
-       } else
-               for (i = 0; i < npages; ++i) {
-                       if (qp->queue.page_list[i].buf)
-                               dma_free_coherent(&dev->pdev->dev, PAGE_SIZE,
-                                                 qp->queue.page_list[i].buf,
-                                                 pci_unmap_addr(&qp->queue.page_list[i],
-                                                                mapping));
-
-               }
-
- err_out:
+err_out:
         kfree(qp->wrid);
-       kfree(dma_list);
         return err;
  }
  
  static void mthca_free_wqe_buf(struct mthca_dev *dev,
                                struct mthca_qp *qp)
  {
-       int i;
-       int size = PAGE_ALIGN(qp->send_wqe_offset +
-                             (qp->sq.max << qp->sq.wqe_shift));
-
-       if (qp->is_direct) {
-               dma_free_coherent(&dev->pdev->dev, size, qp->queue.direct.buf,
-                                 pci_unmap_addr(&qp->queue.direct, mapping));
-       } else {
-               for (i = 0; i < size / PAGE_SIZE; ++i) {
-                       dma_free_coherent(&dev->pdev->dev, PAGE_SIZE,
-                                         qp->queue.page_list[i].buf,
-                                         pci_unmap_addr(&qp->queue.page_list[i],
-                                                        mapping));
-               }
-       }
-
+       mthca_buf_free(dev, PAGE_ALIGN(qp->send_wqe_offset +
+                                      (qp->sq.max << qp->sq.wqe_shift)),
+                      &qp->queue, qp->is_direct, &qp->mr);
         kfree(qp->wrid);
  }
  
@@ -1428,11 +1270,12 @@ void mthca_free_qp(struct mthca_dev *dev,
          * unref the mem-free tables and free the QPN in our table.
          */
         if (!qp->ibqp.uobject) {
-               mthca_cq_clean(dev, to_mcq(qp->ibqp.send_cq)->cqn, qp->qpn);
+               mthca_cq_clean(dev, to_mcq(qp->ibqp.send_cq)->cqn, qp->qpn,
+                              qp->ibqp.srq ? to_msrq(qp->ibqp.srq) : NULL);
                 if (qp->ibqp.send_cq != qp->ibqp.recv_cq)
-                       mthca_cq_clean(dev, to_mcq(qp->ibqp.recv_cq)->cqn, qp->qpn);
+                       mthca_cq_clean(dev, to_mcq(qp->ibqp.recv_cq)->cqn, qp->qpn,
+                                      qp->ibqp.srq ? to_msrq(qp->ibqp.srq) : NULL);
  
-               mthca_free_mr(dev, &qp->mr);
                 mthca_free_memfree(dev, qp);
                 mthca_free_wqe_buf(dev, qp);
         }
@@ -1457,6 +1300,7 @@ static int build_mlx_header(struct mthca_dev *dev, struct mthca_sqp *sqp,
  {
         int header_size;
         int err;
+       u16 pkey;
  
         ib_ud_header_init(256, /* assume a MAD */
                           sqp->ud_header.grh_present,
@@ -1467,8 +1311,8 @@ static int build_mlx_header(struct mthca_dev *dev, struct mthca_sqp *sqp,
                 return err;
         mlx->flags &= ~cpu_to_be32(MTHCA_NEXT_SOLICIT | 1);
         mlx->flags |= cpu_to_be32((!sqp->qp.ibqp.qp_num ? MTHCA_MLX_VL15 : 0) |
-                                 (sqp->ud_header.lrh.destination_lid == 0xffff ?
-                                  MTHCA_MLX_SLR : 0) |
+                                 (sqp->ud_header.lrh.destination_lid ==
+                                  IB_LID_PERMISSIVE ? MTHCA_MLX_SLR : 0) |
                                   (sqp->ud_header.lrh.service_level << 8));
         mlx->rlid = sqp->ud_header.lrh.destination_lid;
         mlx->vcrc = 0;
@@ -1488,18 +1332,16 @@ static int build_mlx_header(struct mthca_dev *dev, struct mthca_sqp *sqp,
         }
  
         sqp->ud_header.lrh.virtual_lane    = !sqp->qp.ibqp.qp_num ? 15 : 0;
-       if (sqp->ud_header.lrh.destination_lid == 0xffff)
-               sqp->ud_header.lrh.source_lid = 0xffff;
+       if (sqp->ud_header.lrh.destination_lid == IB_LID_PERMISSIVE)
+               sqp->ud_header.lrh.source_lid = IB_LID_PERMISSIVE;
         sqp->ud_header.bth.solicited_event = !!(wr->send_flags & IB_SEND_SOLICITED);
         if (!sqp->qp.ibqp.qp_num)
                 ib_get_cached_pkey(&dev->ib_dev, sqp->port,
-                                  sqp->pkey_index,
-                                  &sqp->ud_header.bth.pkey);
+                                  sqp->pkey_index, &pkey);
         else
                 ib_get_cached_pkey(&dev->ib_dev, sqp->port,
-                                  wr->wr.ud.pkey_index,
-                                  &sqp->ud_header.bth.pkey);
-       cpu_to_be16s(&sqp->ud_header.bth.pkey);
+                                  wr->wr.ud.pkey_index, &pkey);
+       sqp->ud_header.bth.pkey = cpu_to_be16(pkey);
         sqp->ud_header.bth.destination_qpn = cpu_to_be32(wr->wr.ud.remote_qpn);
         sqp->ud_header.bth.psn = cpu_to_be32((sqp->send_psn++) & ((1 << 24) - 1));
         sqp->ud_header.deth.qkey = cpu_to_be32(wr->wr.ud.remote_qkey & 0x80000000 ?
@@ -1742,7 +1584,7 @@ int mthca_tavor_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
  
  out:
         if (likely(nreq)) {
-               u32 doorbell[2];
+               __be32 doorbell[2];
  
                 doorbell[0] = cpu_to_be32(((qp->sq.next_ind << qp->sq.wqe_shift) +
                                            qp->send_wqe_offset) | f0 | op0);
@@ -1843,7 +1685,7 @@ int mthca_tavor_post_receive(struct ib_qp *ibqp, struct ib_recv_wr *wr,
  
  out:
         if (likely(nreq)) {
-               u32 doorbell[2];
+               __be32 doorbell[2];
  
                 doorbell[0] = cpu_to_be32((qp->rq.next_ind << qp->rq.wqe_shift) | size0);
                 doorbell[1] = cpu_to_be32((qp->qpn << 8) | nreq);
@@ -2064,7 +1906,7 @@ int mthca_arbel_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
  
  out:
         if (likely(nreq)) {
-               u32 doorbell[2];
+               __be32 doorbell[2];
  
                 doorbell[0] = cpu_to_be32((nreq << 24)                  |
                                           ((qp->sq.head & 0xffff) << 8) |
@@ -2174,19 +2016,25 @@ out:
  }
  
  int mthca_free_err_wqe(struct mthca_dev *dev, struct mthca_qp *qp, int is_send,
-                      int index, int *dbd, u32 *new_wqe)
+                      int index, int *dbd, __be32 *new_wqe)
  {
         struct mthca_next_seg *next;
  
+       /*
+        * For SRQs, all WQEs generate a CQE, so we're always at the
+        * end of the doorbell chain.
+        */
+       if (qp->ibqp.srq) {
+               *new_wqe = 0;
+               return 0;
+       }
+
         if (is_send)
                 next = get_send_wqe(qp, index);
         else
                 next = get_recv_wqe(qp, index);
  
-       if (mthca_is_memfree(dev))
-               *dbd = 1;
-       else
-               *dbd = !!(next->ee_nds & cpu_to_be32(MTHCA_NEXT_DBD));
+       *dbd = !!(next->ee_nds & cpu_to_be32(MTHCA_NEXT_DBD));
         if (next->ee_nds & cpu_to_be32(0x3f))
                 *new_wqe = (next->nda_op & cpu_to_be32(~0x3f)) |
                         (next->ee_nds & cpu_to_be32(0x3f));
diff --git a/drivers/infiniband/hw/mthca/mthca_srq.c b/drivers/infiniband/hw/mthca/mthca_srq.c

new file mode 100644 (file)

index 0000000..75cd2d8
--- /dev/null
+++ b/drivers/infiniband/hw/mthca/mthca_srq.c
@@ -0,0 +1,591 @@
+/*
+ * Copyright (c) 2005 Cisco Systems. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * $Id: mthca_srq.c 3047 2005-08-10 03:59:35Z roland $
+ */
+
+#include "mthca_dev.h"
+#include "mthca_cmd.h"
+#include "mthca_memfree.h"
+#include "mthca_wqe.h"
+
+enum {
+       MTHCA_MAX_DIRECT_SRQ_SIZE = 4 * PAGE_SIZE
+};
+
+struct mthca_tavor_srq_context {
+       __be64 wqe_base_ds;     /* low 6 bits is descriptor size */
+       __be32 state_pd;
+       __be32 lkey;
+       __be32 uar;
+       __be32 wqe_cnt;
+       u32    reserved[2];
+};
+
+struct mthca_arbel_srq_context {
+       __be32 state_logsize_srqn;
+       __be32 lkey;
+       __be32 db_index;
+       __be32 logstride_usrpage;
+       __be64 wqe_base;
+       __be32 eq_pd;
+       __be16 limit_watermark;
+       __be16 wqe_cnt;
+       u16    reserved1;
+       __be16 wqe_counter;
+       u32    reserved2[3];
+};
+
+static void *get_wqe(struct mthca_srq *srq, int n)
+{
+       if (srq->is_direct)
+               return srq->queue.direct.buf + (n << srq->wqe_shift);
+       else
+               return srq->queue.page_list[(n << srq->wqe_shift) >> PAGE_SHIFT].buf +
+                       ((n << srq->wqe_shift) & (PAGE_SIZE - 1));
+}
+
+/*
+ * Return a pointer to the location within a WQE that we're using as a
+ * link when the WQE is in the free list.  We use an offset of 4
+ * because in the Tavor case, posting a WQE may overwrite the first
+ * four bytes of the previous WQE.  The offset avoids corrupting our
+ * free list if the WQE has already completed and been put on the free
+ * list when we post the next WQE.
+ */
+static inline int *wqe_to_link(void *wqe)
+{
+       return (int *) (wqe + 4);
+}
+
+static void mthca_tavor_init_srq_context(struct mthca_dev *dev,
+                                        struct mthca_pd *pd,
+                                        struct mthca_srq *srq,
+                                        struct mthca_tavor_srq_context *context)
+{
+       memset(context, 0, sizeof *context);
+
+       context->wqe_base_ds = cpu_to_be64(1 << (srq->wqe_shift - 4));
+       context->state_pd    = cpu_to_be32(pd->pd_num);
+       context->lkey        = cpu_to_be32(srq->mr.ibmr.lkey);
+
+       if (pd->ibpd.uobject)
+               context->uar =
+                       cpu_to_be32(to_mucontext(pd->ibpd.uobject->context)->uar.index);
+       else
+               context->uar = cpu_to_be32(dev->driver_uar.index);
+}
+
+static void mthca_arbel_init_srq_context(struct mthca_dev *dev,
+                                        struct mthca_pd *pd,
+                                        struct mthca_srq *srq,
+                                        struct mthca_arbel_srq_context *context)
+{
+       int logsize;
+
+       memset(context, 0, sizeof *context);
+
+       logsize = long_log2(srq->max) + srq->wqe_shift;
+       context->state_logsize_srqn = cpu_to_be32(logsize << 24 | srq->srqn);
+       context->lkey = cpu_to_be32(srq->mr.ibmr.lkey);
+       context->db_index = cpu_to_be32(srq->db_index);
+       context->logstride_usrpage = cpu_to_be32((srq->wqe_shift - 4) << 29);
+       if (pd->ibpd.uobject)
+               context->logstride_usrpage |=
+                       cpu_to_be32(to_mucontext(pd->ibpd.uobject->context)->uar.index);
+       else
+               context->logstride_usrpage |= cpu_to_be32(dev->driver_uar.index);
+       context->eq_pd = cpu_to_be32(MTHCA_EQ_ASYNC << 24 | pd->pd_num);
+}
+
+static void mthca_free_srq_buf(struct mthca_dev *dev, struct mthca_srq *srq)
+{
+       mthca_buf_free(dev, srq->max << srq->wqe_shift, &srq->queue,
+                      srq->is_direct, &srq->mr);
+       kfree(srq->wrid);
+}
+
+static int mthca_alloc_srq_buf(struct mthca_dev *dev, struct mthca_pd *pd,
+                              struct mthca_srq *srq)
+{
+       struct mthca_data_seg *scatter;
+       void *wqe;
+       int err;
+       int i;
+
+       if (pd->ibpd.uobject)
+               return 0;
+
+       srq->wrid = kmalloc(srq->max * sizeof (u64), GFP_KERNEL);
+       if (!srq->wrid)
+               return -ENOMEM;
+
+       err = mthca_buf_alloc(dev, srq->max << srq->wqe_shift,
+                             MTHCA_MAX_DIRECT_SRQ_SIZE,
+                             &srq->queue, &srq->is_direct, pd, 1, &srq->mr);
+       if (err) {
+               kfree(srq->wrid);
+               return err;
+       }
+
+       /*
+        * Now initialize the SRQ buffer so that all of the WQEs are
+        * linked into the list of free WQEs.  In addition, set the
+        * scatter list L_Keys to the sentry value of 0x100.
+        */
+       for (i = 0; i < srq->max; ++i) {
+               wqe = get_wqe(srq, i);
+
+               *wqe_to_link(wqe) = i < srq->max - 1 ? i + 1 : -1;
+
+               for (scatter = wqe + sizeof (struct mthca_next_seg);
+                    (void *) scatter < wqe + (1 << srq->wqe_shift);
+                    ++scatter)
+                       scatter->lkey = cpu_to_be32(MTHCA_INVAL_LKEY);
+       }
+
+       return 0;
+}
+
+int mthca_alloc_srq(struct mthca_dev *dev, struct mthca_pd *pd,
+                   struct ib_srq_attr *attr, struct mthca_srq *srq)
+{
+       struct mthca_mailbox *mailbox;
+       u8 status;
+       int ds;
+       int err;
+
+       /* Sanity check SRQ size before proceeding */
+       if (attr->max_wr > 16 << 20 || attr->max_sge > 64)
+               return -EINVAL;
+
+       srq->max      = attr->max_wr;
+       srq->max_gs   = attr->max_sge;
+       srq->last     = NULL;
+       srq->counter  = 0;
+
+       if (mthca_is_memfree(dev))
+               srq->max = roundup_pow_of_two(srq->max + 1);
+
+       ds = min(64UL,
+                roundup_pow_of_two(sizeof (struct mthca_next_seg) +
+                                   srq->max_gs * sizeof (struct mthca_data_seg)));
+       srq->wqe_shift = long_log2(ds);
+
+       srq->srqn = mthca_alloc(&dev->srq_table.alloc);
+       if (srq->srqn == -1)
+               return -ENOMEM;
+
+       if (mthca_is_memfree(dev)) {
+               err = mthca_table_get(dev, dev->srq_table.table, srq->srqn);
+               if (err)
+                       goto err_out;
+
+               if (!pd->ibpd.uobject) {
+                       srq->db_index = mthca_alloc_db(dev, MTHCA_DB_TYPE_SRQ,
+                                                      srq->srqn, &srq->db);
+                       if (srq->db_index < 0) {
+                               err = -ENOMEM;
+                               goto err_out_icm;
+                       }
+               }
+       }
+
+       mailbox = mthca_alloc_mailbox(dev, GFP_KERNEL);
+       if (IS_ERR(mailbox)) {
+               err = PTR_ERR(mailbox);
+               goto err_out_db;
+       }
+
+       err = mthca_alloc_srq_buf(dev, pd, srq);
+       if (err)
+               goto err_out_mailbox;
+
+       spin_lock_init(&srq->lock);
+       atomic_set(&srq->refcount, 1);
+       init_waitqueue_head(&srq->wait);
+
+       if (mthca_is_memfree(dev))
+               mthca_arbel_init_srq_context(dev, pd, srq, mailbox->buf);
+       else
+               mthca_tavor_init_srq_context(dev, pd, srq, mailbox->buf);
+
+       err = mthca_SW2HW_SRQ(dev, mailbox, srq->srqn, &status);
+
+       if (err) {
+               mthca_warn(dev, "SW2HW_SRQ failed (%d)\n", err);
+               goto err_out_free_buf;
+       }
+       if (status) {
+               mthca_warn(dev, "SW2HW_SRQ returned status 0x%02x\n",
+                          status);
+               err = -EINVAL;
+               goto err_out_free_buf;
+       }
+
+       spin_lock_irq(&dev->srq_table.lock);
+       if (mthca_array_set(&dev->srq_table.srq,
+                           srq->srqn & (dev->limits.num_srqs - 1),
+                           srq)) {
+               spin_unlock_irq(&dev->srq_table.lock);
+               goto err_out_free_srq;
+       }
+       spin_unlock_irq(&dev->srq_table.lock);
+
+       mthca_free_mailbox(dev, mailbox);
+
+       srq->first_free = 0;
+       srq->last_free  = srq->max - 1;
+
+       return 0;
+
+err_out_free_srq:
+       err = mthca_HW2SW_SRQ(dev, mailbox, srq->srqn, &status);
+       if (err)
+               mthca_warn(dev, "HW2SW_SRQ failed (%d)\n", err);
+       else if (status)
+               mthca_warn(dev, "HW2SW_SRQ returned status 0x%02x\n", status);
+
+err_out_free_buf:
+       if (!pd->ibpd.uobject)
+               mthca_free_srq_buf(dev, srq);
+
+err_out_mailbox:
+       mthca_free_mailbox(dev, mailbox);
+
+err_out_db:
+       if (!pd->ibpd.uobject && mthca_is_memfree(dev))
+               mthca_free_db(dev, MTHCA_DB_TYPE_SRQ, srq->db_index);
+
+err_out_icm:
+       mthca_table_put(dev, dev->srq_table.table, srq->srqn);
+
+err_out:
+       mthca_free(&dev->srq_table.alloc, srq->srqn);
+
+       return err;
+}
+
+void mthca_free_srq(struct mthca_dev *dev, struct mthca_srq *srq)
+{
+       struct mthca_mailbox *mailbox;
+       int err;
+       u8 status;
+
+       mailbox = mthca_alloc_mailbox(dev, GFP_KERNEL);
+       if (IS_ERR(mailbox)) {
+               mthca_warn(dev, "No memory for mailbox to free SRQ.\n");
+               return;
+       }
+
+       err = mthca_HW2SW_SRQ(dev, mailbox, srq->srqn, &status);
+       if (err)
+               mthca_warn(dev, "HW2SW_SRQ failed (%d)\n", err);
+       else if (status)
+               mthca_warn(dev, "HW2SW_SRQ returned status 0x%02x\n", status);
+
+       spin_lock_irq(&dev->srq_table.lock);
+       mthca_array_clear(&dev->srq_table.srq,
+                         srq->srqn & (dev->limits.num_srqs - 1));
+       spin_unlock_irq(&dev->srq_table.lock);
+
+       atomic_dec(&srq->refcount);
+       wait_event(srq->wait, !atomic_read(&srq->refcount));
+
+       if (!srq->ibsrq.uobject) {
+               mthca_free_srq_buf(dev, srq);
+               if (mthca_is_memfree(dev))
+                       mthca_free_db(dev, MTHCA_DB_TYPE_SRQ, srq->db_index);
+       }
+
+       mthca_table_put(dev, dev->srq_table.table, srq->srqn);
+       mthca_free(&dev->srq_table.alloc, srq->srqn);
+       mthca_free_mailbox(dev, mailbox);
+}
+
+void mthca_srq_event(struct mthca_dev *dev, u32 srqn,
+                    enum ib_event_type event_type)
+{
+       struct mthca_srq *srq;
+       struct ib_event event;
+
+       spin_lock(&dev->srq_table.lock);
+       srq = mthca_array_get(&dev->srq_table.srq, srqn & (dev->limits.num_srqs - 1));
+       if (srq)
+               atomic_inc(&srq->refcount);
+       spin_unlock(&dev->srq_table.lock);
+
+       if (!srq) {
+               mthca_warn(dev, "Async event for bogus SRQ %08x\n", srqn);
+               return;
+       }
+
+       if (!srq->ibsrq.event_handler)
+               goto out;
+
+       event.device      = &dev->ib_dev;
+       event.event       = event_type;
+       event.element.srq  = &srq->ibsrq;
+       srq->ibsrq.event_handler(&event, srq->ibsrq.srq_context);
+
+out:
+       if (atomic_dec_and_test(&srq->refcount))
+               wake_up(&srq->wait);
+}
+
+/*
+ * This function must be called with IRQs disabled.
+ */
+void mthca_free_srq_wqe(struct mthca_srq *srq, u32 wqe_addr)
+{
+       int ind;
+
+       ind = wqe_addr >> srq->wqe_shift;
+
+       spin_lock(&srq->lock);
+
+       if (likely(srq->first_free >= 0))
+               *wqe_to_link(get_wqe(srq, srq->last_free)) = ind;
+       else
+               srq->first_free = ind;
+
+       *wqe_to_link(get_wqe(srq, ind)) = -1;
+       srq->last_free = ind;
+
+       spin_unlock(&srq->lock);
+}
+
+int mthca_tavor_post_srq_recv(struct ib_srq *ibsrq, struct ib_recv_wr *wr,
+                             struct ib_recv_wr **bad_wr)
+{
+       struct mthca_dev *dev = to_mdev(ibsrq->device);
+       struct mthca_srq *srq = to_msrq(ibsrq);
+       unsigned long flags;
+       int err = 0;
+       int first_ind;
+       int ind;
+       int next_ind;
+       int nreq;
+       int i;
+       void *wqe;
+       void *prev_wqe;
+
+       spin_lock_irqsave(&srq->lock, flags);
+
+       first_ind = srq->first_free;
+
+       for (nreq = 0; wr; ++nreq, wr = wr->next) {
+               ind = srq->first_free;
+
+               if (ind < 0) {
+                       mthca_err(dev, "SRQ %06x full\n", srq->srqn);
+                       err = -ENOMEM;
+                       *bad_wr = wr;
+                       return nreq;
+               }
+
+               wqe       = get_wqe(srq, ind);
+               next_ind  = *wqe_to_link(wqe);
+               prev_wqe  = srq->last;
+               srq->last = wqe;
+
+               ((struct mthca_next_seg *) wqe)->nda_op = 0;
+               ((struct mthca_next_seg *) wqe)->ee_nds = 0;
+               /* flags field will always remain 0 */
+
+               wqe += sizeof (struct mthca_next_seg);
+
+               if (unlikely(wr->num_sge > srq->max_gs)) {
+                       err = -EINVAL;
+                       *bad_wr = wr;
+                       srq->last = prev_wqe;
+                       return nreq;
+               }
+
+               for (i = 0; i < wr->num_sge; ++i) {
+                       ((struct mthca_data_seg *) wqe)->byte_count =
+                               cpu_to_be32(wr->sg_list[i].length);
+                       ((struct mthca_data_seg *) wqe)->lkey =
+                               cpu_to_be32(wr->sg_list[i].lkey);
+                       ((struct mthca_data_seg *) wqe)->addr =
+                               cpu_to_be64(wr->sg_list[i].addr);
+                       wqe += sizeof (struct mthca_data_seg);
+               }
+
+               if (i < srq->max_gs) {
+                       ((struct mthca_data_seg *) wqe)->byte_count = 0;
+                       ((struct mthca_data_seg *) wqe)->lkey = cpu_to_be32(MTHCA_INVAL_LKEY);
+                       ((struct mthca_data_seg *) wqe)->addr = 0;
+               }
+
+               if (likely(prev_wqe)) {
+                       ((struct mthca_next_seg *) prev_wqe)->nda_op =
+                               cpu_to_be32((ind << srq->wqe_shift) | 1);
+                       wmb();
+                       ((struct mthca_next_seg *) prev_wqe)->ee_nds =
+                               cpu_to_be32(MTHCA_NEXT_DBD);
+               }
+
+               srq->wrid[ind]  = wr->wr_id;
+               srq->first_free = next_ind;
+       }
+
+       return nreq;
+
+       if (likely(nreq)) {
+               __be32 doorbell[2];
+
+               doorbell[0] = cpu_to_be32(first_ind << srq->wqe_shift);
+               doorbell[1] = cpu_to_be32((srq->srqn << 8) | nreq);
+
+               /*
+                * Make sure that descriptors are written before
+                * doorbell is rung.
+                */
+               wmb();
+
+               mthca_write64(doorbell,
+                             dev->kar + MTHCA_RECEIVE_DOORBELL,
+                             MTHCA_GET_DOORBELL_LOCK(&dev->doorbell_lock));
+       }
+
+       spin_unlock_irqrestore(&srq->lock, flags);
+       return err;
+}
+
+int mthca_arbel_post_srq_recv(struct ib_srq *ibsrq, struct ib_recv_wr *wr,
+                             struct ib_recv_wr **bad_wr)
+{
+       struct mthca_dev *dev = to_mdev(ibsrq->device);
+       struct mthca_srq *srq = to_msrq(ibsrq);
+       unsigned long flags;
+       int err = 0;
+       int ind;
+       int next_ind;
+       int nreq;
+       int i;
+       void *wqe;
+
+       spin_lock_irqsave(&srq->lock, flags);
+
+       for (nreq = 0; wr; ++nreq, wr = wr->next) {
+               ind = srq->first_free;
+
+               if (ind < 0) {
+                       mthca_err(dev, "SRQ %06x full\n", srq->srqn);
+                       err = -ENOMEM;
+                       *bad_wr = wr;
+                       return nreq;
+               }
+
+               wqe       = get_wqe(srq, ind);
+               next_ind  = *wqe_to_link(wqe);
+
+               ((struct mthca_next_seg *) wqe)->nda_op =
+                       cpu_to_be32((next_ind << srq->wqe_shift) | 1);
+               ((struct mthca_next_seg *) wqe)->ee_nds = 0;
+               /* flags field will always remain 0 */
+
+               wqe += sizeof (struct mthca_next_seg);
+
+               if (unlikely(wr->num_sge > srq->max_gs)) {
+                       err = -EINVAL;
+                       *bad_wr = wr;
+                       return nreq;
+               }
+
+               for (i = 0; i < wr->num_sge; ++i) {
+                       ((struct mthca_data_seg *) wqe)->byte_count =
+                               cpu_to_be32(wr->sg_list[i].length);
+                       ((struct mthca_data_seg *) wqe)->lkey =
+                               cpu_to_be32(wr->sg_list[i].lkey);
+                       ((struct mthca_data_seg *) wqe)->addr =
+                               cpu_to_be64(wr->sg_list[i].addr);
+                       wqe += sizeof (struct mthca_data_seg);
+               }
+
+               if (i < srq->max_gs) {
+                       ((struct mthca_data_seg *) wqe)->byte_count = 0;
+                       ((struct mthca_data_seg *) wqe)->lkey = cpu_to_be32(MTHCA_INVAL_LKEY);
+                       ((struct mthca_data_seg *) wqe)->addr = 0;
+               }
+
+               srq->wrid[ind]  = wr->wr_id;
+               srq->first_free = next_ind;
+       }
+
+       if (likely(nreq)) {
+               srq->counter += nreq;
+
+               /*
+                * Make sure that descriptors are written before
+                * we write doorbell record.
+                */
+               wmb();
+               *srq->db = cpu_to_be32(srq->counter);
+       }
+
+       spin_unlock_irqrestore(&srq->lock, flags);
+       return err;
+}
+
+int __devinit mthca_init_srq_table(struct mthca_dev *dev)
+{
+       int err;
+
+       if (!(dev->mthca_flags & MTHCA_FLAG_SRQ))
+               return 0;
+
+       spin_lock_init(&dev->srq_table.lock);
+
+       err = mthca_alloc_init(&dev->srq_table.alloc,
+                              dev->limits.num_srqs,
+                              dev->limits.num_srqs - 1,
+                              dev->limits.reserved_srqs);
+       if (err)
+               return err;
+
+       err = mthca_array_init(&dev->srq_table.srq,
+                              dev->limits.num_srqs);
+       if (err)
+               mthca_alloc_cleanup(&dev->srq_table.alloc);
+
+       return err;
+}
+
+void __devexit mthca_cleanup_srq_table(struct mthca_dev *dev)
+{
+       if (!(dev->mthca_flags & MTHCA_FLAG_SRQ))
+               return;
+
+       mthca_array_cleanup(&dev->srq_table.srq, dev->limits.num_srqs);
+       mthca_alloc_cleanup(&dev->srq_table.alloc);
+}
diff --git a/drivers/infiniband/hw/mthca/mthca_user.h b/drivers/infiniband/hw/mthca/mthca_user.h

index 3024c1b4547d790e3f9cf0bca4c87bf01c736fb6..41613ec8a04e63c92fdfdbfe5d82e897c63c6a8b 100644 (file)
--- a/drivers/infiniband/hw/mthca/mthca_user.h
+++ b/drivers/infiniband/hw/mthca/mthca_user.h
@@ -69,6 +69,17 @@ struct mthca_create_cq_resp {
         __u32 reserved;
  };
  
+struct mthca_create_srq {
+       __u32 lkey;
+       __u32 db_index;
+       __u64 db_page;
+};
+
+struct mthca_create_srq_resp {
+       __u32 srqn;
+       __u32 reserved;
+};
+
  struct mthca_create_qp {
         __u32 lkey;
         __u32 reserved;
diff --git a/drivers/infiniband/hw/mthca/mthca_wqe.h b/drivers/infiniband/hw/mthca/mthca_wqe.h

new file mode 100644 (file)

index 0000000..1f4c0ff
--- /dev/null
+++ b/drivers/infiniband/hw/mthca/mthca_wqe.h
@@ -0,0 +1,114 @@
+/*
+ * Copyright (c) 2005 Cisco Systems. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * $Id: mthca_wqe.h 3047 2005-08-10 03:59:35Z roland $
+ */
+
+#ifndef MTHCA_WQE_H
+#define MTHCA_WQE_H
+
+#include <linux/types.h>
+
+enum {
+       MTHCA_NEXT_DBD       = 1 << 7,
+       MTHCA_NEXT_FENCE     = 1 << 6,
+       MTHCA_NEXT_CQ_UPDATE = 1 << 3,
+       MTHCA_NEXT_EVENT_GEN = 1 << 2,
+       MTHCA_NEXT_SOLICIT   = 1 << 1,
+
+       MTHCA_MLX_VL15       = 1 << 17,
+       MTHCA_MLX_SLR        = 1 << 16
+};
+
+enum {
+       MTHCA_INVAL_LKEY = 0x100
+};
+
+struct mthca_next_seg {
+       __be32 nda_op;          /* [31:6] next WQE [4:0] next opcode */
+       __be32 ee_nds;          /* [31:8] next EE  [7] DBD [6] F [5:0] next WQE size */
+       __be32 flags;           /* [3] CQ [2] Event [1] Solicit */
+       __be32 imm;             /* immediate data */
+};
+
+struct mthca_tavor_ud_seg {
+       u32    reserved1;
+       __be32 lkey;
+       __be64 av_addr;
+       u32    reserved2[4];
+       __be32 dqpn;
+       __be32 qkey;
+       u32    reserved3[2];
+};
+
+struct mthca_arbel_ud_seg {
+       __be32 av[8];
+       __be32 dqpn;
+       __be32 qkey;
+       u32    reserved[2];
+};
+
+struct mthca_bind_seg {
+       __be32 flags;           /* [31] Atomic [30] rem write [29] rem read */
+       u32    reserved;
+       __be32 new_rkey;
+       __be32 lkey;
+       __be64 addr;
+       __be64 length;
+};
+
+struct mthca_raddr_seg {
+       __be64 raddr;
+       __be32 rkey;
+       u32    reserved;
+};
+
+struct mthca_atomic_seg {
+       __be64 swap_add;
+       __be64 compare;
+};
+
+struct mthca_data_seg {
+       __be32 byte_count;
+       __be32 lkey;
+       __be64 addr;
+};
+
+struct mthca_mlx_seg {
+       __be32 nda_op;
+       __be32 nds;
+       __be32 flags;           /* [17] VL15 [16] SLR [14:12] static rate
+                                  [11:8] SL [3] C [2] E */
+       __be16 rlid;
+       __be16 vcrc;
+};
+
+#endif /* MTHCA_WQE_H */
diff --git a/drivers/infiniband/ulp/ipoib/Makefile b/drivers/infiniband/ulp/ipoib/Makefile

index 394bc08abc6fc318677f049bd3d0fc639931a69f..8935e74ae3f8e9608ced0be86d2e3818db268dd8 100644 (file)
--- a/drivers/infiniband/ulp/ipoib/Makefile
+++ b/drivers/infiniband/ulp/ipoib/Makefile
@@ -1,5 +1,3 @@
-EXTRA_CFLAGS += -Idrivers/infiniband/include
-
  obj-$(CONFIG_INFINIBAND_IPOIB)                 += ib_ipoib.o
  
  ib_ipoib-y                                     := ipoib_main.o \
diff --git a/drivers/infiniband/ulp/ipoib/ipoib.h b/drivers/infiniband/ulp/ipoib/ipoib.h

index 04c98f54e9c4dbc4740fc724a571a0978670c834..bea960b8191feffff376dd7b64a26c2d9586d6f6 100644 (file)
--- a/drivers/infiniband/ulp/ipoib/ipoib.h
+++ b/drivers/infiniband/ulp/ipoib/ipoib.h
@@ -1,5 +1,7 @@
  /*
   * Copyright (c) 2004, 2005 Topspin Communications.  All rights reserved.
+ * Copyright (c) 2005 Sun Microsystems, Inc. All rights reserved.
+ * Copyright (c) 2004 Voltaire, Inc. All rights reserved.
   *
   * This software is available to you under a choice of one of two
   * licenses.  You may choose to be licensed under the terms of the GNU
@@ -49,9 +51,9 @@
  #include <asm/atomic.h>
  #include <asm/semaphore.h>
  
-#include <ib_verbs.h>
-#include <ib_pack.h>
-#include <ib_sa.h>
+#include <rdma/ib_verbs.h>
+#include <rdma/ib_pack.h>
+#include <rdma/ib_sa.h>
  
  /* constants */
  
@@ -88,8 +90,8 @@ enum {
  /* structs */
  
  struct ipoib_header {
-       u16 proto;
-       u16 reserved;
+       __be16  proto;
+       u16     reserved;
  };
  
  struct ipoib_pseudoheader {
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_fs.c b/drivers/infiniband/ulp/ipoib/ipoib_fs.c

index a84e5fe0f1933d8e06acacd0b3ea86b4b1612381..38b150f775e7fdf931315869b2ce5e23335d6573 100644 (file)
--- a/drivers/infiniband/ulp/ipoib/ipoib_fs.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_fs.c
@@ -97,7 +97,7 @@ static int ipoib_mcg_seq_show(struct seq_file *file, void *iter_ptr)
  
                 for (n = 0, i = 0; i < sizeof mgid / 2; ++i) {
                         n += sprintf(gid_buf + n, "%x",
-                                    be16_to_cpu(((u16 *)mgid.raw)[i]));
+                                    be16_to_cpu(((__be16 *) mgid.raw)[i]));
                         if (i < sizeof mgid / 2 - 1)
                                 gid_buf[n++] = ':';
                 }
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_ib.c b/drivers/infiniband/ulp/ipoib/ipoib_ib.c

index eee82363167da841c3aad9b56443bac922bceeb0..ef0e3894863c023c08082c4c201da070fb393871 100644 (file)
--- a/drivers/infiniband/ulp/ipoib/ipoib_ib.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_ib.c
@@ -1,5 +1,8 @@
  /*
   * Copyright (c) 2004, 2005 Topspin Communications.  All rights reserved.
+ * Copyright (c) 2005 Sun Microsystems, Inc. All rights reserved.
+ * Copyright (c) 2005 Mellanox Technologies. All rights reserved.
+ * Copyright (c) 2004, 2005 Voltaire, Inc. All rights reserved.
   *
   * This software is available to you under a choice of one of two
   * licenses.  You may choose to be licensed under the terms of the GNU
@@ -35,7 +38,7 @@
  #include <linux/delay.h>
  #include <linux/dma-mapping.h>
  
-#include <ib_cache.h>
+#include <rdma/ib_cache.h>
  
  #include "ipoib.h"
  
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_main.c b/drivers/infiniband/ulp/ipoib/ipoib_main.c

index 6f60abbaebd5602e472f625edc144af0d465aca8..0e8ac138e355bd4c71e2c21f58a809eb7d0f190e 100644 (file)
--- a/drivers/infiniband/ulp/ipoib/ipoib_main.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_main.c
@@ -1,5 +1,7 @@
  /*
   * Copyright (c) 2004 Topspin Communications.  All rights reserved.
+ * Copyright (c) 2005 Sun Microsystems, Inc. All rights reserved.
+ * Copyright (c) 2004 Voltaire, Inc. All rights reserved.
   *
   * This software is available to you under a choice of one of two
   * licenses.  You may choose to be licensed under the terms of the GNU
@@ -34,7 +36,6 @@
  
  #include "ipoib.h"
  
-#include <linux/version.h>
  #include <linux/module.h>
  
  #include <linux/init.h>
@@ -600,14 +601,15 @@ static int ipoib_start_xmit(struct sk_buff *skb, struct net_device *dev)
  
                         ipoib_mcast_send(dev, (union ib_gid *) (phdr->hwaddr + 4), skb);
                 } else {
-                       /* unicast GID -- should be ARP reply */
+                       /* unicast GID -- should be ARP or RARP reply */
  
-                       if (be16_to_cpup((u16 *) skb->data) != ETH_P_ARP) {
+                       if ((be16_to_cpup((__be16 *) skb->data) != ETH_P_ARP) &&
+                           (be16_to_cpup((__be16 *) skb->data) != ETH_P_RARP)) {
                                 ipoib_warn(priv, "Unicast, no %s: type %04x, QPN %06x "
                                            IPOIB_GID_FMT "\n",
                                            skb->dst ? "neigh" : "dst",
-                                          be16_to_cpup((u16 *) skb->data),
-                                          be32_to_cpup((u32 *) phdr->hwaddr),
+                                          be16_to_cpup((__be16 *) skb->data),
+                                          be32_to_cpup((__be32 *) phdr->hwaddr),
                                            IPOIB_GID_ARG(*(union ib_gid *) (phdr->hwaddr + 4)));
                                 dev_kfree_skb_any(skb);
                                 ++priv->stats.tx_dropped;
@@ -670,7 +672,7 @@ static void ipoib_set_mcast_list(struct net_device *dev)
  {
         struct ipoib_dev_priv *priv = netdev_priv(dev);
  
-       schedule_work(&priv->restart_task);
+       queue_work(ipoib_workqueue, &priv->restart_task);
  }
  
  static void ipoib_neigh_destructor(struct neighbour *n)
@@ -779,15 +781,11 @@ void ipoib_dev_cleanup(struct net_device *dev)
  
         ipoib_ib_dev_cleanup(dev);
  
-       if (priv->rx_ring) {
-               kfree(priv->rx_ring);
-               priv->rx_ring = NULL;
-       }
+       kfree(priv->rx_ring);
+       kfree(priv->tx_ring);
  
-       if (priv->tx_ring) {
-               kfree(priv->tx_ring);
-               priv->tx_ring = NULL;
-       }
+       priv->rx_ring = NULL;
+       priv->tx_ring = NULL;
  }
  
  static void ipoib_setup(struct net_device *dev)
@@ -885,6 +883,12 @@ static ssize_t create_child(struct class_device *cdev,
         if (pkey < 0 || pkey > 0xffff)
                 return -EINVAL;
  
+       /*
+        * Set the full membership bit, so that we join the right
+        * broadcast group, etc.
+        */
+       pkey |= 0x8000;
+
         ret = ipoib_vlan_add(container_of(cdev, struct net_device, class_dev),
                              pkey);
  
@@ -937,6 +941,12 @@ static struct net_device *ipoib_add_port(const char *format,
                 goto alloc_mem_failed;
         }
  
+       /*
+        * Set the full membership bit, so that we join the right
+        * broadcast group, etc.
+        */
+       priv->pkey |= 0x8000;
+
         priv->dev->broadcast[8] = priv->pkey >> 8;
         priv->dev->broadcast[9] = priv->pkey & 0xff;
  
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_multicast.c b/drivers/infiniband/ulp/ipoib/ipoib_multicast.c

index 70208c3d21e28d415a8f9a82f886e62b64c4590d..aca7aea18a69006ed0b403a439611c66dd0f9781 100644 (file)
--- a/drivers/infiniband/ulp/ipoib/ipoib_multicast.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_multicast.c
@@ -1,5 +1,7 @@
  /*
   * Copyright (c) 2004, 2005 Topspin Communications.  All rights reserved.
+ * Copyright (c) 2005 Sun Microsystems, Inc. All rights reserved.
+ * Copyright (c) 2004 Voltaire, Inc. All rights reserved.
   *
   * This software is available to you under a choice of one of two
   * licenses.  You may choose to be licensed under the terms of the GNU
@@ -357,7 +359,7 @@ static int ipoib_mcast_sendonly_join(struct ipoib_mcast *mcast)
  
         rec.mgid     = mcast->mcmember.mgid;
         rec.port_gid = priv->local_gid;
-       rec.pkey     = be16_to_cpu(priv->pkey);
+       rec.pkey     = cpu_to_be16(priv->pkey);
  
         ret = ib_sa_mcmember_rec_set(priv->ca, priv->port, &rec,
                                      IB_SA_MCMEMBER_REC_MGID            |
@@ -457,7 +459,7 @@ static void ipoib_mcast_join(struct net_device *dev, struct ipoib_mcast *mcast,
  
         rec.mgid     = mcast->mcmember.mgid;
         rec.port_gid = priv->local_gid;
-       rec.pkey     = be16_to_cpu(priv->pkey);
+       rec.pkey     = cpu_to_be16(priv->pkey);
  
         comp_mask =
                 IB_SA_MCMEMBER_REC_MGID         |
@@ -646,7 +648,7 @@ static int ipoib_mcast_leave(struct net_device *dev, struct ipoib_mcast *mcast)
  
         rec.mgid     = mcast->mcmember.mgid;
         rec.port_gid = priv->local_gid;
-       rec.pkey     = be16_to_cpu(priv->pkey);
+       rec.pkey     = cpu_to_be16(priv->pkey);
  
         /* Remove ourselves from the multicast group */
         ret = ipoib_mcast_detach(dev, be16_to_cpu(mcast->mcmember.mlid),
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_verbs.c b/drivers/infiniband/ulp/ipoib/ipoib_verbs.c

index 4933edf062c2bbc95890fd33da01484aea48a981..79f59d0563edc04b04a1e9d211ce6f6bbfa4c538 100644 (file)
--- a/drivers/infiniband/ulp/ipoib/ipoib_verbs.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_verbs.c
@@ -1,5 +1,6 @@
  /*
   * Copyright (c) 2004, 2005 Topspin Communications.  All rights reserved.
+ * Copyright (c) 2005 Mellanox Technologies. All rights reserved.
   *
   * This software is available to you under a choice of one of two
   * licenses.  You may choose to be licensed under the terms of the GNU
@@ -32,7 +33,7 @@
   * $Id: ipoib_verbs.c 1349 2004-12-16 21:09:43Z roland $
   */
  
-#include <ib_cache.h>
+#include <rdma/ib_cache.h>
  
  #include "ipoib.h"
  
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_vlan.c b/drivers/infiniband/ulp/ipoib/ipoib_vlan.c

index 94b8ea812feff35bcf3bdee04ee85b2df1937f80..332d730e60c23a81e3576ad9e27c00fd43f1cd7e 100644 (file)
--- a/drivers/infiniband/ulp/ipoib/ipoib_vlan.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_vlan.c
@@ -32,7 +32,6 @@
   * $Id: ipoib_vlan.c 1349 2004-12-16 21:09:43Z roland $
   */
  
-#include <linux/version.h>
  #include <linux/module.h>
  
  #include <linux/init.h>
diff --git a/drivers/input/gameport/ns558.c b/drivers/input/gameport/ns558.c

index 1ab5f2dc8a2a4a8fde91f56847165fa39312d9a1..70f051894a3cfa4d50f5dda7268c6af26e63b96f 100644 (file)
--- a/drivers/input/gameport/ns558.c
+++ b/drivers/input/gameport/ns558.c
@@ -275,9 +275,9 @@ static int __init ns558_init(void)
  
  static void __exit ns558_exit(void)
  {
-       struct ns558 *ns558;
+       struct ns558 *ns558, *safe;
  
-       list_for_each_entry(ns558, &ns558_list, node) {
+       list_for_each_entry_safe(ns558, safe, &ns558_list, node) {
                 gameport_unregister_port(ns558->gameport);
                 release_region(ns558->io & ~(ns558->size - 1), ns558->size);
                 kfree(ns558);
diff --git a/drivers/isdn/act2000/capi.c b/drivers/isdn/act2000/capi.c

index afa46681f9833c576e3afd7b54f9a74debdac2bc..6ae6eb32211141a93c299a65f15dd8bd14e20a79 100644 (file)
--- a/drivers/isdn/act2000/capi.c
+++ b/drivers/isdn/act2000/capi.c
@@ -606,7 +606,7 @@ handle_ack(act2000_card *card, act2000_chan *chan, __u8 blocknr) {
                  if ((((m->msg.data_b3_req.fakencci >> 8) & 0xff) == chan->ncci) &&
                     (m->msg.data_b3_req.blocknr == blocknr)) {
                         /* found corresponding DATA_B3_REQ */
-                        skb_unlink(tmp);
+                        skb_unlink(tmp, &card->ackq);
                         chan->queued -= m->msg.data_b3_req.datalen;
                         if (m->msg.data_b3_req.flags)
                                 ret = m->msg.data_b3_req.datalen;
diff --git a/drivers/isdn/capi/capifs.c b/drivers/isdn/capi/capifs.c

index f8570fd9d2abb6ec707d007a4080d91cd5b2fceb..3abd7fc6e5ef3e0331cc080a61a214b2be27066f 100644 (file)
--- a/drivers/isdn/capi/capifs.c
+++ b/drivers/isdn/capi/capifs.c
@@ -191,8 +191,10 @@ static int __init capifs_init(void)
         err = register_filesystem(&capifs_fs_type);
         if (!err) {
                 capifs_mnt = kern_mount(&capifs_fs_type);
-               if (IS_ERR(capifs_mnt))
+               if (IS_ERR(capifs_mnt)) {
                         err = PTR_ERR(capifs_mnt);
+                       unregister_filesystem(&capifs_fs_type);
+               }
         }
         if (!err)
                 printk(KERN_NOTICE "capifs: Rev %s\n", rev);
diff --git a/drivers/isdn/hisax/Kconfig b/drivers/isdn/hisax/Kconfig

index 6c7b8bffc6fdb10da595c30dd04ecb4154418685..801c98f30e5c7c767e1a39822033d68b81540626 100644 (file)
--- a/drivers/isdn/hisax/Kconfig
+++ b/drivers/isdn/hisax/Kconfig
@@ -134,6 +134,7 @@ config HISAX_AVM_A1
  
  config HISAX_FRITZPCI
         bool "AVM PnP/PCI (Fritz!PnP/PCI)"
+       depends on BROKEN || !PPC64
         help
           This enables HiSax support for the AVM "Fritz!PnP" and "Fritz!PCI".
           See <file:Documentation/isdn/README.HiSax> on how to configure it.
diff --git a/drivers/isdn/i4l/isdn_net.c b/drivers/isdn/i4l/isdn_net.c

index f30e8e63ae0dedf9802a044d5d3c6eedd3679721..96c115e13389cd16b2d18a34bd5bc65e19d4fd47 100644 (file)
--- a/drivers/isdn/i4l/isdn_net.c
+++ b/drivers/isdn/i4l/isdn_net.c
@@ -1786,7 +1786,6 @@ isdn_net_receive(struct net_device *ndev, struct sk_buff *skb)
                 lp->stats.rx_bytes += skb->len;
         }
         skb->dev = ndev;
-       skb->input_dev = ndev;
         skb->pkt_type = PACKET_HOST;
         skb->mac.raw = skb->data;
  #ifdef ISDN_DEBUG_NET_DUMP
diff --git a/drivers/isdn/i4l/isdn_ppp.c b/drivers/isdn/i4l/isdn_ppp.c

index 260a323a96d38c07003f80b92c2b3dad5c2c025a..d97a9be5469c45deb6f8d5965a3a84125c446314 100644 (file)
--- a/drivers/isdn/i4l/isdn_ppp.c
+++ b/drivers/isdn/i4l/isdn_ppp.c
@@ -1177,7 +1177,6 @@ isdn_ppp_push_higher(isdn_net_dev * net_dev, isdn_net_local * lp, struct sk_buff
                 mlp->huptimer = 0;
  #endif /* CONFIG_IPPP_FILTER */
         skb->dev = dev;
-       skb->input_dev = dev;
         skb->mac.raw = skb->data;
         netif_rx(skb);
         /* net_dev->local->stats.rx_packets++; done in isdn_net.c */
diff --git a/drivers/isdn/icn/icn.c b/drivers/isdn/icn/icn.c

index e0d1b01cc74c5b5badfa64cd2ca1a4a1457b7c68..386df71eee7473f0c7582c26331bdee63c623b68 100644 (file)
--- a/drivers/isdn/icn/icn.c
+++ b/drivers/isdn/icn/icn.c
@@ -1650,7 +1650,7 @@ static void __exit icn_exit(void)
  {
         isdn_ctrl cmd;
         icn_card *card = cards;
-       icn_card *last;
+       icn_card *last, *tmpcard;
         int i;
         unsigned long flags;
  
@@ -1670,8 +1670,9 @@ static void __exit icn_exit(void)
                         for (i = 0; i < ICN_BCH; i++)
                                 icn_free_queue(card, i);
                 }
-               card = card->next;
+               tmpcard = card->next;
                 spin_unlock_irqrestore(&card->lock, flags);
+               card = tmpcard;
         }
         card = cards;
         cards = NULL;
diff --git a/drivers/macintosh/Kconfig b/drivers/macintosh/Kconfig

index 65ab64c43b3e59ea16d59844064178bb4f62b2b7..bc3e096d84f75a5660e1c98ba9db99d955a1a075 100644 (file)
--- a/drivers/macintosh/Kconfig
+++ b/drivers/macintosh/Kconfig
@@ -103,7 +103,7 @@ config PMAC_MEDIABAY
  # on non-powerbook machines (but only on PMU based ones AFAIK)
  config PMAC_BACKLIGHT
         bool "Backlight control for LCD screens"
-       depends on ADB_PMU
+       depends on ADB_PMU && (BROKEN || !PPC64)
         help
           Say Y here to build in code to manage the LCD backlight on a
           Macintosh PowerBook.  With this code, the backlight will be turned
diff --git a/drivers/md/bitmap.c b/drivers/md/bitmap.c

index 70bca955e0de86c4003cdb919885518d4243b2d3..41df4cda66e2f27b668a98988aaf80121fb16236 100644 (file)
--- a/drivers/md/bitmap.c
+++ b/drivers/md/bitmap.c
@@ -818,8 +818,7 @@ int bitmap_unplug(struct bitmap *bitmap)
         return 0;
  }
  
-static void bitmap_set_memory_bits(struct bitmap *bitmap, sector_t offset,
-       unsigned long sectors, int in_sync);
+static void bitmap_set_memory_bits(struct bitmap *bitmap, sector_t offset);
  /* * bitmap_init_from_disk -- called at bitmap_create time to initialize
   * the in-memory bitmap from the on-disk bitmap -- also, sets up the
   * memory mapping of the bitmap file
@@ -828,7 +827,7 @@ static void bitmap_set_memory_bits(struct bitmap *bitmap, sector_t offset,
   *   previously kicked from the array, we mark all the bits as
   *   1's in order to cause a full resync.
   */
-static int bitmap_init_from_disk(struct bitmap *bitmap, int in_sync)
+static int bitmap_init_from_disk(struct bitmap *bitmap)
  {
         unsigned long i, chunks, index, oldindex, bit;
         struct page *page = NULL, *oldpage = NULL;
@@ -929,8 +928,7 @@ static int bitmap_init_from_disk(struct bitmap *bitmap, int in_sync)
                 }
                 if (test_bit(bit, page_address(page))) {
                         /* if the disk bit is set, set the memory bit */
-                       bitmap_set_memory_bits(bitmap,
-                                       i << CHUNK_BLOCK_SHIFT(bitmap), 1, in_sync);
+                       bitmap_set_memory_bits(bitmap, i << CHUNK_BLOCK_SHIFT(bitmap));
                         bit_cnt++;
                 }
         }
@@ -1426,35 +1424,53 @@ void bitmap_close_sync(struct bitmap *bitmap)
         }
  }
  
-static void bitmap_set_memory_bits(struct bitmap *bitmap, sector_t offset,
-                                  unsigned long sectors, int in_sync)
+static void bitmap_set_memory_bits(struct bitmap *bitmap, sector_t offset)
  {
         /* For each chunk covered by any of these sectors, set the
-        * counter to 1 and set resync_needed unless in_sync.  They should all
+        * counter to 1 and set resync_needed.  They should all
          * be 0 at this point
          */
-       while (sectors) {
-               int secs;
-               bitmap_counter_t *bmc;
-               spin_lock_irq(&bitmap->lock);
-               bmc = bitmap_get_counter(bitmap, offset, &secs, 1);
-               if (!bmc) {
-                       spin_unlock_irq(&bitmap->lock);
-                       return;
-               }
-               if (! *bmc) {
-                       struct page *page;
-                       *bmc = 1 | (in_sync? 0 : NEEDED_MASK);
-                       bitmap_count_page(bitmap, offset, 1);
-                       page = filemap_get_page(bitmap, offset >> CHUNK_BLOCK_SHIFT(bitmap));
-                       set_page_attr(bitmap, page, BITMAP_PAGE_CLEAN);
-               }
+
+       int secs;
+       bitmap_counter_t *bmc;
+       spin_lock_irq(&bitmap->lock);
+       bmc = bitmap_get_counter(bitmap, offset, &secs, 1);
+       if (!bmc) {
                 spin_unlock_irq(&bitmap->lock);
-               if (sectors > secs)
-                       sectors -= secs;
-               else
-                       sectors = 0;
+               return;
+       }
+       if (! *bmc) {
+               struct page *page;
+               *bmc = 1 | NEEDED_MASK;
+               bitmap_count_page(bitmap, offset, 1);
+               page = filemap_get_page(bitmap, offset >> CHUNK_BLOCK_SHIFT(bitmap));
+               set_page_attr(bitmap, page, BITMAP_PAGE_CLEAN);
         }
+       spin_unlock_irq(&bitmap->lock);
+
+}
+
+/*
+ * flush out any pending updates
+ */
+void bitmap_flush(mddev_t *mddev)
+{
+       struct bitmap *bitmap = mddev->bitmap;
+       int sleep;
+
+       if (!bitmap) /* there was no bitmap */
+               return;
+
+       /* run the daemon_work three time to ensure everything is flushed
+        * that can be
+        */
+       sleep = bitmap->daemon_sleep;
+       bitmap->daemon_sleep = 0;
+       bitmap_daemon_work(bitmap);
+       bitmap_daemon_work(bitmap);
+       bitmap_daemon_work(bitmap);
+       bitmap->daemon_sleep = sleep;
+       bitmap_update_sb(bitmap);
  }
  
  /*
@@ -1565,7 +1581,8 @@ int bitmap_create(mddev_t *mddev)
  
         /* now that we have some pages available, initialize the in-memory
          * bitmap from the on-disk bitmap */
-       err = bitmap_init_from_disk(bitmap, mddev->recovery_cp == MaxSector);
+       err = bitmap_init_from_disk(bitmap);
+
         if (err)
                 return err;
  
diff --git a/drivers/md/dm-raid1.c b/drivers/md/dm-raid1.c

index 12031c9d3f1e1394379ee211ae49bf61ff093949..b08df8b9b2cad8994a8d5c739c3943f1548cc17d 100644 (file)
--- a/drivers/md/dm-raid1.c
+++ b/drivers/md/dm-raid1.c
@@ -1230,7 +1230,7 @@ static int __init dm_mirror_init(void)
         if (r)
                 return r;
  
-       _kmirrord_wq = create_workqueue("kmirrord");
+       _kmirrord_wq = create_singlethread_workqueue("kmirrord");
         if (!_kmirrord_wq) {
                 DMERR("couldn't start kmirrord");
                 dm_dirty_log_exit();
diff --git a/drivers/md/md.c b/drivers/md/md.c

index 6580e0fa4a4781a0ae54378a2c57f7ccbda6ad7d..20ca80b7dc20611de86d418faf8da69140fd764a 100644 (file)
--- a/drivers/md/md.c
+++ b/drivers/md/md.c
@@ -256,8 +256,7 @@ static inline void mddev_unlock(mddev_t * mddev)
  {
         up(&mddev->reconfig_sem);
  
-       if (mddev->thread)
-               md_wakeup_thread(mddev->thread);
+       md_wakeup_thread(mddev->thread);
  }
  
  mdk_rdev_t * find_rdev_nr(mddev_t *mddev, int nr)
@@ -623,6 +622,7 @@ static int super_90_validate(mddev_t *mddev, mdk_rdev_t *rdev)
                 mddev->raid_disks = sb->raid_disks;
                 mddev->size = sb->size;
                 mddev->events = md_event(sb);
+               mddev->bitmap_offset = 0;
  
                 if (sb->state & (1<<MD_SB_CLEAN))
                         mddev->recovery_cp = MaxSector;
@@ -938,6 +938,7 @@ static int super_1_validate(mddev_t *mddev, mdk_rdev_t *rdev)
                 mddev->raid_disks = le32_to_cpu(sb->raid_disks);
                 mddev->size = le64_to_cpu(sb->size)/2;
                 mddev->events = le64_to_cpu(sb->events);
+               mddev->bitmap_offset = 0;
                 
                 mddev->recovery_cp = le64_to_cpu(sb->resync_offset);
                 memcpy(mddev->uuid, sb->set_uuid, 16);
@@ -1688,6 +1689,7 @@ static int do_md_run(mddev_t * mddev)
         mddev->pers = pers[pnum];
         spin_unlock(&pers_lock);
  
+       mddev->recovery = 0;
         mddev->resync_max_sectors = mddev->size << 1; /* may be over-ridden by personality */
  
         /* before we start the array running, initialise the bitmap */
@@ -1712,6 +1714,7 @@ static int do_md_run(mddev_t * mddev)
         mddev->in_sync = 1;
         
         set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
+       md_wakeup_thread(mddev->thread);
         
         if (mddev->sb_dirty)
                 md_update_sb(mddev);
@@ -1798,6 +1801,8 @@ static int do_md_stop(mddev_t * mddev, int ro)
                                 goto out;
                         mddev->ro = 1;
                 } else {
+                       bitmap_flush(mddev);
+                       wait_event(mddev->sb_wait, atomic_read(&mddev->pending_writes)==0);
                         if (mddev->ro)
                                 set_disk_ro(disk, 0);
                         blk_queue_make_request(mddev->queue, md_fail_request);
@@ -1822,6 +1827,7 @@ static int do_md_stop(mddev_t * mddev, int ro)
                 fput(mddev->bitmap_file);
                 mddev->bitmap_file = NULL;
         }
+       mddev->bitmap_offset = 0;
  
         /*
          * Free resources if final stop
@@ -2231,8 +2237,7 @@ static int add_new_disk(mddev_t * mddev, mdu_disk_info_t *info)
                         export_rdev(rdev);
  
                 set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
-               if (mddev->thread)
-                       md_wakeup_thread(mddev->thread);
+               md_wakeup_thread(mddev->thread);
                 return err;
         }
  
@@ -3484,7 +3489,6 @@ static void md_do_sync(mddev_t *mddev)
                         goto skip;
                 }
                 ITERATE_MDDEV(mddev2,tmp) {
-                       printk(".");
                         if (mddev2 == mddev)
                                 continue;
                         if (mddev2->curr_resync && 
@@ -4007,3 +4011,5 @@ EXPORT_SYMBOL(md_wakeup_thread);
  EXPORT_SYMBOL(md_print_devices);
  EXPORT_SYMBOL(md_check_recovery);
  MODULE_LICENSE("GPL");
+MODULE_ALIAS("md");
+MODULE_ALIAS_BLOCKDEV_MAJOR(MD_MAJOR);
diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c

index d3a64a04a6d857558e16b54e3794fb7830e5f2ec..51d9645ed09c5e8ff79aa9e7a6604e94dc274a54 100644 (file)
--- a/drivers/md/raid1.c
+++ b/drivers/md/raid1.c
@@ -893,7 +893,6 @@ static int end_sync_read(struct bio *bio, unsigned int bytes_done, int error)
         if (!uptodate) {
                 md_error(r1_bio->mddev,
                          conf->mirrors[r1_bio->read_disk].rdev);
-               set_bit(R1BIO_Degraded, &r1_bio->state);
         } else
                 set_bit(R1BIO_Uptodate, &r1_bio->state);
         rdev_dec_pending(conf->mirrors[r1_bio->read_disk].rdev, conf->mddev);
@@ -918,10 +917,9 @@ static int end_sync_write(struct bio *bio, unsigned int bytes_done, int error)
                         mirror = i;
                         break;
                 }
-       if (!uptodate) {
+       if (!uptodate)
                 md_error(mddev, conf->mirrors[mirror].rdev);
-               set_bit(R1BIO_Degraded, &r1_bio->state);
-       }
+
         update_head_pos(mirror, r1_bio);
  
         if (atomic_dec_and_test(&r1_bio->remaining)) {
@@ -1109,6 +1107,7 @@ static sector_t sync_request(mddev_t *mddev, sector_t sector_nr, int *skipped, i
         int i;
         int write_targets = 0;
         int sync_blocks;
+       int still_degraded = 0;
  
         if (!conf->r1buf_pool)
         {
@@ -1137,7 +1136,10 @@ static sector_t sync_request(mddev_t *mddev, sector_t sector_nr, int *skipped, i
                 return 0;
         }
  
-       if (!bitmap_start_sync(mddev->bitmap, sector_nr, &sync_blocks, mddev->degraded) &&
+       /* before building a request, check if we can skip these blocks..
+        * This call the bitmap_start_sync doesn't actually record anything
+        */
+       if (!bitmap_start_sync(mddev->bitmap, sector_nr, &sync_blocks, 1) &&
             !conf->fullsync) {
                 /* We can skip this block, and probably several more */
                 *skipped = 1;
@@ -1203,24 +1205,23 @@ static sector_t sync_request(mddev_t *mddev, sector_t sector_nr, int *skipped, i
                 if (i == disk) {
                         bio->bi_rw = READ;
                         bio->bi_end_io = end_sync_read;
-               } else if (conf->mirrors[i].rdev &&
-                          !conf->mirrors[i].rdev->faulty &&
-                          (!conf->mirrors[i].rdev->in_sync ||
-                           sector_nr + RESYNC_SECTORS > mddev->recovery_cp)) {
+               } else if (conf->mirrors[i].rdev == NULL ||
+                          conf->mirrors[i].rdev->faulty) {
+                       still_degraded = 1;
+                       continue;
+               } else if (!conf->mirrors[i].rdev->in_sync ||
+                          sector_nr + RESYNC_SECTORS > mddev->recovery_cp) {
                         bio->bi_rw = WRITE;
                         bio->bi_end_io = end_sync_write;
                         write_targets ++;
                 } else
+                       /* no need to read or write here */
                         continue;
                 bio->bi_sector = sector_nr + conf->mirrors[i].rdev->data_offset;
                 bio->bi_bdev = conf->mirrors[i].rdev->bdev;
                 bio->bi_private = r1_bio;
         }
  
-       if (write_targets + 1 < conf->raid_disks)
-               /* array degraded, can't clear bitmap */
-               set_bit(R1BIO_Degraded, &r1_bio->state);
-
         if (write_targets == 0) {
                 /* There is nowhere to write, so all non-sync
                  * drives must be failed - so we are finished
@@ -1243,7 +1244,7 @@ static sector_t sync_request(mddev_t *mddev, sector_t sector_nr, int *skipped, i
                         break;
                 if (sync_blocks == 0) {
                         if (!bitmap_start_sync(mddev->bitmap, sector_nr,
-                                       &sync_blocks, mddev->degraded) &&
+                                       &sync_blocks, still_degraded) &&
                                         !conf->fullsync)
                                 break;
                         if (sync_blocks < (PAGE_SIZE>>9))
diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c

index 4698d5f79575dd00a4bd03c355e86881654d0654..43f231a467d5cb0137f15bd5568de80993a0bb08 100644 (file)
--- a/drivers/md/raid5.c
+++ b/drivers/md/raid5.c
@@ -1653,6 +1653,7 @@ static int run (mddev_t *mddev)
  
         /* device size must be a multiple of chunk size */
         mddev->size &= ~(mddev->chunk_size/1024 -1);
+       mddev->resync_max_sectors = mddev->size << 1;
  
         if (!conf->chunk_size || conf->chunk_size % 4) {
                 printk(KERN_ERR "raid5: invalid chunk size %d for %s\n",
diff --git a/drivers/md/raid6main.c b/drivers/md/raid6main.c

index f5ee16805111b9eea157c72f2a2e5b20c9ea2f47..495dee1d1e8335bcef634be427e0132bb596413e 100644 (file)
--- a/drivers/md/raid6main.c
+++ b/drivers/md/raid6main.c
@@ -1813,6 +1813,7 @@ static int run (mddev_t *mddev)
  
         /* device size must be a multiple of chunk size */
         mddev->size &= ~(mddev->chunk_size/1024 -1);
+       mddev->resync_max_sectors = mddev->size << 1;
  
         if (conf->raid_disks < 4) {
                 printk(KERN_ERR "raid6: not enough configured devices for %s (%d, minimum 4)\n",
diff --git a/drivers/media/dvb/dvb-usb/dibusb-common.c b/drivers/media/dvb/dvb-usb/dibusb-common.c

index 63b626f70c81e1fc2769d91c3141e26d6c89d2d7..9b9d6f8ee74eb768d919297c82a9818cbeac8987 100644 (file)
--- a/drivers/media/dvb/dvb-usb/dibusb-common.c
+++ b/drivers/media/dvb/dvb-usb/dibusb-common.c
@@ -70,13 +70,22 @@ EXPORT_SYMBOL(dibusb_power_ctrl);
  
  int dibusb2_0_streaming_ctrl(struct dvb_usb_device *d, int onoff)
  {
-       u8 b[2];
-       b[0] = DIBUSB_REQ_SET_IOCTL;
-       b[1] = onoff ? DIBUSB_IOCTL_CMD_ENABLE_STREAM : DIBUSB_IOCTL_CMD_DISABLE_STREAM;
+       u8 b[3] = { 0 };
+       int ret;
+
+       if ((ret = dibusb_streaming_ctrl(d,onoff)) < 0)
+               return ret;
  
-       dvb_usb_generic_write(d,b,3);
+       if (onoff) {
+               b[0] = DIBUSB_REQ_SET_STREAMING_MODE;
+               b[1] = 0x00;
+               if ((ret = dvb_usb_generic_write(d,b,2)) < 0)
+                       return ret;
+       }
  
-       return dibusb_streaming_ctrl(d,onoff);
+       b[0] = DIBUSB_REQ_SET_IOCTL;
+       b[1] = onoff ? DIBUSB_IOCTL_CMD_ENABLE_STREAM : DIBUSB_IOCTL_CMD_DISABLE_STREAM;
+       return dvb_usb_generic_write(d,b,3);
  }
  EXPORT_SYMBOL(dibusb2_0_streaming_ctrl);
  
diff --git a/drivers/media/dvb/dvb-usb/dvb-usb-dvb.c b/drivers/media/dvb/dvb-usb/dvb-usb-dvb.c

index 3491ff40885c0f4ff7c0d550196a477f240b05a3..6fa92100248b8291b7353b070dc7347bfea67d38 100644 (file)
--- a/drivers/media/dvb/dvb-usb/dvb-usb-dvb.c
+++ b/drivers/media/dvb/dvb-usb/dvb-usb-dvb.c
@@ -23,12 +23,12 @@ static int dvb_usb_ctrl_feed(struct dvb_demux_feed *dvbdmxfeed, int onoff)
          */
         if (newfeedcount == 0) {
                 deb_ts("stop feeding\n");
+               dvb_usb_urb_kill(d);
  
                 if (d->props.streaming_ctrl != NULL)
                         if ((ret = d->props.streaming_ctrl(d,0)))
                                 err("error while stopping stream.");
  
-               dvb_usb_urb_kill(d);
         }
  
         d->feedcount = newfeedcount;
@@ -44,6 +44,8 @@ static int dvb_usb_ctrl_feed(struct dvb_demux_feed *dvbdmxfeed, int onoff)
          * for reception.
          */
         if (d->feedcount == onoff && d->feedcount > 0) {
+               deb_ts("submitting all URBs\n");
+               dvb_usb_urb_submit(d);
  
                 deb_ts("controlling pid parser\n");
                 if (d->props.caps & DVB_USB_HAS_PID_FILTER &&
@@ -59,7 +61,6 @@ static int dvb_usb_ctrl_feed(struct dvb_demux_feed *dvbdmxfeed, int onoff)
                                 return -ENODEV;
                         }
  
-               dvb_usb_urb_submit(d);
         }
         return 0;
  }
diff --git a/drivers/media/dvb/frontends/Kconfig b/drivers/media/dvb/frontends/Kconfig

index e83256d0fd14be453e3029b52c941b8ac44826c7..a50a41f6f79d4db95e92efb5844c577b82066b29 100644 (file)
--- a/drivers/media/dvb/frontends/Kconfig
+++ b/drivers/media/dvb/frontends/Kconfig
@@ -188,7 +188,7 @@ config DVB_BCM3510
           support this frontend.
  
  config DVB_LGDT330X
-       tristate "LGDT3302 or LGDT3303 based (DViCO FusionHDTV Gold)"
+       tristate "LG Electronics LGDT3302/LGDT3303 based"
         depends on DVB_CORE
         help
           An ATSC 8VSB and QAM64/256 tuner module. Say Y when you want
diff --git a/drivers/media/dvb/frontends/dvb-pll.c b/drivers/media/dvb/frontends/dvb-pll.c

index 5264310c070e616f6c48553b37fbdc376833acd5..536c35d969b7dca1c19a2d1bc3052f1cf8c241d4 100644 (file)
--- a/drivers/media/dvb/frontends/dvb-pll.c
+++ b/drivers/media/dvb/frontends/dvb-pll.c
@@ -225,6 +225,22 @@ struct dvb_pll_desc dvb_pll_tua6034 = {
  };
  EXPORT_SYMBOL(dvb_pll_tua6034);
  
+/* Infineon TUA6034
+ * used in LG Innotek TDVS-H062F
+ */
+struct dvb_pll_desc dvb_pll_tdvs_tua6034 = {
+       .name  = "LG/Infineon TUA6034",
+       .min   =  54000000,
+       .max   = 863000000,
+       .count = 3,
+       .entries = {
+               {  160000000, 44000000, 62500, 0xce, 0x01 },
+               {  455000000, 44000000, 62500, 0xce, 0x02 },
+               {  999999999, 44000000, 62500, 0xce, 0x04 },
+       },
+};
+EXPORT_SYMBOL(dvb_pll_tdvs_tua6034);
+
  /* Philips FMD1216ME
   * used in Medion Hybrid PCMCIA card and USB Box
   */
diff --git a/drivers/media/dvb/frontends/dvb-pll.h b/drivers/media/dvb/frontends/dvb-pll.h

index cb794759d89ede20b028816bd5c40c6140391a5a..205b2d1a88520a476a2fd7e05847f7debf05b8d4 100644 (file)
--- a/drivers/media/dvb/frontends/dvb-pll.h
+++ b/drivers/media/dvb/frontends/dvb-pll.h
@@ -31,6 +31,7 @@ extern struct dvb_pll_desc dvb_pll_unknown_1;
  extern struct dvb_pll_desc dvb_pll_tua6010xs;
  extern struct dvb_pll_desc dvb_pll_env57h1xd5;
  extern struct dvb_pll_desc dvb_pll_tua6034;
+extern struct dvb_pll_desc dvb_pll_tdvs_tua6034;
  extern struct dvb_pll_desc dvb_pll_tda665x;
  extern struct dvb_pll_desc dvb_pll_fmd1216me;
  extern struct dvb_pll_desc dvb_pll_tded4;
diff --git a/drivers/media/dvb/frontends/lgdt330x.c b/drivers/media/dvb/frontends/lgdt330x.c

index e94dee50eecdf96b5ed19774fe9714d028918441..1f1cd7a8d500846c07e8fd4fa49c0d8335cd7443 100644 (file)
--- a/drivers/media/dvb/frontends/lgdt330x.c
+++ b/drivers/media/dvb/frontends/lgdt330x.c
@@ -1,11 +1,8 @@
  /*
- *    Support for LGDT3302 & LGDT3303 (DViCO FusionHDTV Gold) - VSB/QAM
+ *    Support for LGDT3302 and LGDT3303 - VSB/QAM
   *
   *    Copyright (C) 2005 Wilson Michaels <wilsonmichaels@earthlink.net>
   *
- *    Based on code from  Kirk Lapray <kirk_lapray@bigfoot.com>
- *                           Copyright (C) 2005
- *
   *    This program is free software; you can redistribute it and/or modify
   *    it under the terms of the GNU General Public License as published by
   *    the Free Software Foundation; either version 2 of the License, or
@@ -25,11 +22,13 @@
  /*
   *                      NOTES ABOUT THIS DRIVER
   *
- * This driver supports DViCO FusionHDTV Gold under Linux.
+ * This Linux driver supports:
+ *   DViCO FusionHDTV 3 Gold-Q
+ *   DViCO FusionHDTV 3 Gold-T
+ *   DViCO FusionHDTV 5 Gold
   *
   * TODO:
- * BER and signal strength always return 0.
- * Include support for LGDT3303
+ * signal strength always returns 0.
   *
   */
  
@@ -41,7 +40,6 @@
  #include <asm/byteorder.h>
  
  #include "dvb_frontend.h"
-#include "dvb-pll.h"
  #include "lgdt330x_priv.h"
  #include "lgdt330x.h"
  
@@ -70,55 +68,37 @@ struct lgdt330x_state
         u32 current_frequency;
  };
  
-static int i2c_writebytes (struct lgdt330x_state* state,
-                          u8 addr, /* demod_address or pll_address */
+static int i2c_write_demod_bytes (struct lgdt330x_state* state,
                            u8 *buf, /* data bytes to send */
                            int len  /* number of bytes to send */ )
  {
-       u8 tmp[] = { buf[0], buf[1] };
         struct i2c_msg msg =
-               { .addr = addr, .flags = 0, .buf = tmp,  .len = 2 };
-       int err;
+               { .addr = state->config->demod_address,
+                 .flags = 0,
+                 .buf = buf,
+                 .len = 2 };
         int i;
+       int err;
  
-       for (i=1; i<len; i++) {
-               tmp[1] = buf[i];
+       for (i=0; i<len-1; i+=2){
                 if ((err = i2c_transfer(state->i2c, &msg, 1)) != 1) {
-                       printk(KERN_WARNING "lgdt330x: %s error (addr %02x <- %02x, err == %i)\n", __FUNCTION__, addr, buf[0], err);
+                       printk(KERN_WARNING "lgdt330x: %s error (addr %02x <- %02x, err = %i)\n", __FUNCTION__, msg.buf[0], msg.buf[1], err);
                         if (err < 0)
                                 return err;
                         else
                                 return -EREMOTEIO;
                 }
-               tmp[0]++;
+               msg.buf += 2;
         }
         return 0;
  }
  
-#if 0
-static int i2c_readbytes (struct lgdt330x_state* state,
-                         u8 addr, /* demod_address or pll_address */
-                         u8 *buf, /* holds data bytes read */
-                         int len  /* number of bytes to read */ )
-{
-       struct i2c_msg msg =
-               { .addr = addr, .flags = I2C_M_RD, .buf = buf,  .len = len };
-       int err;
-
-       if ((err = i2c_transfer(state->i2c, &msg, 1)) != 1) {
-               printk(KERN_WARNING "lgdt330x: %s error (addr %02x, err == %i)\n", __FUNCTION__, addr, err);
-               return -EREMOTEIO;
-       }
-       return 0;
-}
-#endif
-
  /*
   * This routine writes the register (reg) to the demod bus
   * then reads the data returned for (len) bytes.
   */
  
-static u8 i2c_selectreadbytes (struct lgdt330x_state* state,
+static u8 i2c_read_demod_bytes (struct lgdt330x_state* state,
                                enum I2C_REG reg, u8* buf, int len)
  {
         u8 wr [] = { reg };
@@ -139,7 +119,7 @@ static u8 i2c_selectreadbytes (struct lgdt330x_state* state,
  }
  
  /* Software reset */
-int lgdt330x_SwReset(struct lgdt330x_state* state)
+static int lgdt3302_SwReset(struct lgdt330x_state* state)
  {
         u8 ret;
         u8 reset[] = {
@@ -148,23 +128,51 @@ int lgdt330x_SwReset(struct lgdt330x_state* state)
                       * bits 5-0 are 1 to mask interrupts */
         };
  
-       ret = i2c_writebytes(state,
-                            state->config->demod_address,
+       ret = i2c_write_demod_bytes(state,
+                            reset, sizeof(reset));
+       if (ret == 0) {
+
+               /* force reset high (inactive) and unmask interrupts */
+               reset[1] = 0x7f;
+               ret = i2c_write_demod_bytes(state,
+                                    reset, sizeof(reset));
+       }
+       return ret;
+}
+
+static int lgdt3303_SwReset(struct lgdt330x_state* state)
+{
+       u8 ret;
+       u8 reset[] = {
+               0x02,
+               0x00 /* bit 0 is active low software reset */
+       };
+
+       ret = i2c_write_demod_bytes(state,
                              reset, sizeof(reset));
         if (ret == 0) {
-               /* spec says reset takes 100 ns why wait */
-               /* mdelay(100);    */ /* keep low for 100mS */
-               reset[1] = 0x7f;      /* force reset high (inactive)
-                                      * and unmask interrupts */
-               ret = i2c_writebytes(state,
-                                    state->config->demod_address,
+
+               /* force reset high (inactive) */
+               reset[1] = 0x01;
+               ret = i2c_write_demod_bytes(state,
                                      reset, sizeof(reset));
         }
-       /* Spec does not indicate a need for this either */
-       /*mdelay(5); */               /* wait 5 msec before doing more */
         return ret;
  }
  
+static int lgdt330x_SwReset(struct lgdt330x_state* state)
+{
+       switch (state->config->demod_chip) {
+       case LGDT3302:
+               return lgdt3302_SwReset(state);
+       case LGDT3303:
+               return lgdt3303_SwReset(state);
+       default:
+               return -ENODEV;
+       }
+}
+
+
  static int lgdt330x_init(struct dvb_frontend* fe)
  {
         /* Hardware reset is done using gpio[0] of cx23880x chip.
@@ -173,22 +181,98 @@ static int lgdt330x_init(struct dvb_frontend* fe)
          * Maybe there needs to be a callable function in cx88-core or
          * the caller of this function needs to do it. */
  
-       dprintk("%s entered\n", __FUNCTION__);
-       return lgdt330x_SwReset((struct lgdt330x_state*) fe->demodulator_priv);
+       /*
+        * Array of byte pairs <address, value>
+        * to initialize each different chip
+        */
+       static u8 lgdt3302_init_data[] = {
+               /* Use 50MHz parameter values from spec sheet since xtal is 50 */
+               /* Change the value of NCOCTFV[25:0] of carrier
+                  recovery center frequency register */
+               VSB_CARRIER_FREQ0, 0x00,
+               VSB_CARRIER_FREQ1, 0x87,
+               VSB_CARRIER_FREQ2, 0x8e,
+               VSB_CARRIER_FREQ3, 0x01,
+               /* Change the TPCLK pin polarity
+                  data is valid on falling clock */
+               DEMUX_CONTROL, 0xfb,
+               /* Change the value of IFBW[11:0] of
+                  AGC IF/RF loop filter bandwidth register */
+               AGC_RF_BANDWIDTH0, 0x40,
+               AGC_RF_BANDWIDTH1, 0x93,
+               AGC_RF_BANDWIDTH2, 0x00,
+               /* Change the value of bit 6, 'nINAGCBY' and
+                  'NSSEL[1:0] of ACG function control register 2 */
+               AGC_FUNC_CTRL2, 0xc6,
+               /* Change the value of bit 6 'RFFIX'
+                  of AGC function control register 3 */
+               AGC_FUNC_CTRL3, 0x40,
+               /* Set the value of 'INLVTHD' register 0x2a/0x2c
+                  to 0x7fe */
+               AGC_DELAY0, 0x07,
+               AGC_DELAY2, 0xfe,
+               /* Change the value of IAGCBW[15:8]
+                  of inner AGC loop filter bandwith */
+               AGC_LOOP_BANDWIDTH0, 0x08,
+               AGC_LOOP_BANDWIDTH1, 0x9a
+       };
+
+       static u8 lgdt3303_init_data[] = {
+               0x4c, 0x14
+       };
+
+       struct lgdt330x_state* state = fe->demodulator_priv;
+       char  *chip_name;
+       int    err;
+
+       switch (state->config->demod_chip) {
+       case LGDT3302:
+               chip_name = "LGDT3302";
+               err = i2c_write_demod_bytes(state, lgdt3302_init_data,
+                                                                       sizeof(lgdt3302_init_data));
+               break;
+       case LGDT3303:
+               chip_name = "LGDT3303";
+               err = i2c_write_demod_bytes(state, lgdt3303_init_data,
+                                                                       sizeof(lgdt3303_init_data));
+               break;
+       default:
+               chip_name = "undefined";
+               printk (KERN_WARNING "Only LGDT3302 and LGDT3303 are supported chips.\n");
+               err = -ENODEV;
+       }
+       dprintk("%s entered as %s\n", __FUNCTION__, chip_name);
+       if (err < 0)
+               return err;
+       return lgdt330x_SwReset(state);
  }
  
  static int lgdt330x_read_ber(struct dvb_frontend* fe, u32* ber)
  {
-       *ber = 0; /* Dummy out for now */
+       *ber = 0; /* Not supplied by the demod chips */
         return 0;
  }
  
  static int lgdt330x_read_ucblocks(struct dvb_frontend* fe, u32* ucblocks)
  {
-       struct lgdt330x_state* state = (struct lgdt330x_state*) fe->demodulator_priv;
+       struct lgdt330x_state* state = fe->demodulator_priv;
+       int err;
         u8 buf[2];
  
-       i2c_selectreadbytes(state, PACKET_ERR_COUNTER1, buf, sizeof(buf));
+       switch (state->config->demod_chip) {
+       case LGDT3302:
+               err = i2c_read_demod_bytes(state, LGDT3302_PACKET_ERR_COUNTER1,
+                                                                 buf, sizeof(buf));
+               break;
+       case LGDT3303:
+               err = i2c_read_demod_bytes(state, LGDT3303_PACKET_ERR_COUNTER1,
+                                                                 buf, sizeof(buf));
+               break;
+       default:
+               printk(KERN_WARNING
+                          "Only LGDT3302 and LGDT3303 are supported chips.\n");
+               err = -ENODEV;
+       }
  
         *ucblocks = (buf[0] << 8) | buf[1];
         return 0;
@@ -197,123 +281,113 @@ static int lgdt330x_read_ucblocks(struct dvb_frontend* fe, u32* ucblocks)
  static int lgdt330x_set_parameters(struct dvb_frontend* fe,
                                    struct dvb_frontend_parameters *param)
  {
-       struct lgdt330x_state* state =
-               (struct lgdt330x_state*) fe->demodulator_priv;
+       /*
+        * Array of byte pairs <address, value>
+        * to initialize 8VSB for lgdt3303 chip 50 MHz IF
+        */
+       static u8 lgdt3303_8vsb_44_data[] = {
+               0x04, 0x00,
+               0x0d, 0x40,
+        0x0e, 0x87,
+        0x0f, 0x8e,
+        0x10, 0x01,
+        0x47, 0x8b };
+
+       /*
+        * Array of byte pairs <address, value>
+        * to initialize QAM for lgdt3303 chip
+        */
+       static u8 lgdt3303_qam_data[] = {
+               0x04, 0x00,
+               0x0d, 0x00,
+               0x0e, 0x00,
+               0x0f, 0x00,
+               0x10, 0x00,
+               0x51, 0x63,
+               0x47, 0x66,
+               0x48, 0x66,
+               0x4d, 0x1a,
+               0x49, 0x08,
+               0x4a, 0x9b };
+
+       struct lgdt330x_state* state = fe->demodulator_priv;
  
-       /* Use 50MHz parameter values from spec sheet since xtal is 50 */
         static u8 top_ctrl_cfg[]   = { TOP_CONTROL, 0x03 };
-       static u8 vsb_freq_cfg[]   = { VSB_CARRIER_FREQ0, 0x00, 0x87, 0x8e, 0x01 };
-       static u8 demux_ctrl_cfg[] = { DEMUX_CONTROL, 0xfb };
-       static u8 agc_rf_cfg[]     = { AGC_RF_BANDWIDTH0, 0x40, 0x93, 0x00 };
-       static u8 agc_ctrl_cfg[]   = { AGC_FUNC_CTRL2, 0xc6, 0x40 };
-       static u8 agc_delay_cfg[]  = { AGC_DELAY0, 0x07, 0x00, 0xfe };
-       static u8 agc_loop_cfg[]   = { AGC_LOOP_BANDWIDTH0, 0x08, 0x9a };
  
+       int err;
         /* Change only if we are actually changing the modulation */
         if (state->current_modulation != param->u.vsb.modulation) {
                 switch(param->u.vsb.modulation) {
                 case VSB_8:
                         dprintk("%s: VSB_8 MODE\n", __FUNCTION__);
  
-                       /* Select VSB mode and serial MPEG interface */
-                       top_ctrl_cfg[1] = 0x07;
+                       /* Select VSB mode */
+                       top_ctrl_cfg[1] = 0x03;
  
                         /* Select ANT connector if supported by card */
                         if (state->config->pll_rf_set)
                                 state->config->pll_rf_set(fe, 1);
+
+                       if (state->config->demod_chip == LGDT3303) {
+                               err = i2c_write_demod_bytes(state, lgdt3303_8vsb_44_data,
+                                                                                       sizeof(lgdt3303_8vsb_44_data));
+                       }
                         break;
  
                 case QAM_64:
                         dprintk("%s: QAM_64 MODE\n", __FUNCTION__);
  
-                       /* Select QAM_64 mode and serial MPEG interface */
-                       top_ctrl_cfg[1] = 0x04;
+                       /* Select QAM_64 mode */
+                       top_ctrl_cfg[1] = 0x00;
  
                         /* Select CABLE connector if supported by card */
                         if (state->config->pll_rf_set)
                                 state->config->pll_rf_set(fe, 0);
+
+                       if (state->config->demod_chip == LGDT3303) {
+                               err = i2c_write_demod_bytes(state, lgdt3303_qam_data,
+                                                                                       sizeof(lgdt3303_qam_data));
+                       }
                         break;
  
                 case QAM_256:
                         dprintk("%s: QAM_256 MODE\n", __FUNCTION__);
  
-                       /* Select QAM_256 mode and serial MPEG interface */
-                       top_ctrl_cfg[1] = 0x05;
+                       /* Select QAM_256 mode */
+                       top_ctrl_cfg[1] = 0x01;
  
                         /* Select CABLE connector if supported by card */
                         if (state->config->pll_rf_set)
                                 state->config->pll_rf_set(fe, 0);
+
+                       if (state->config->demod_chip == LGDT3303) {
+                               err = i2c_write_demod_bytes(state, lgdt3303_qam_data,
+                                                                                       sizeof(lgdt3303_qam_data));
+                       }
                         break;
                 default:
                         printk(KERN_WARNING "lgdt330x: %s: Modulation type(%d) UNSUPPORTED\n", __FUNCTION__, param->u.vsb.modulation);
                         return -1;
                 }
-               /* Initializations common to all modes */
+               /*
+                * select serial or parallel MPEG harware interface
+                * Serial:   0x04 for LGDT3302 or 0x40 for LGDT3303
+                * Parallel: 0x00
+                */
+               top_ctrl_cfg[1] |= state->config->serial_mpeg;
  
                 /* Select the requested mode */
-               i2c_writebytes(state, state->config->demod_address,
-                              top_ctrl_cfg, sizeof(top_ctrl_cfg));
-
-               /* Change the value of IFBW[11:0]
-                  of AGC IF/RF loop filter bandwidth register */
-               i2c_writebytes(state, state->config->demod_address,
-                              agc_rf_cfg, sizeof(agc_rf_cfg));
-
-               /* Change the value of bit 6, 'nINAGCBY' and
-                  'NSSEL[1:0] of ACG function control register 2 */
-               /* Change the value of bit 6 'RFFIX'
-                  of AGC function control register 3 */
-               i2c_writebytes(state, state->config->demod_address,
-                              agc_ctrl_cfg, sizeof(agc_ctrl_cfg));
-
-               /* Change the TPCLK pin polarity
-                  data is valid on falling clock */
-               i2c_writebytes(state, state->config->demod_address,
-                              demux_ctrl_cfg, sizeof(demux_ctrl_cfg));
-
-               /* Change the value of NCOCTFV[25:0] of carrier
-                  recovery center frequency register */
-               i2c_writebytes(state, state->config->demod_address,
-                                      vsb_freq_cfg, sizeof(vsb_freq_cfg));
-
-               /* Set the value of 'INLVTHD' register 0x2a/0x2c to 0x7fe */
-               i2c_writebytes(state, state->config->demod_address,
-                              agc_delay_cfg, sizeof(agc_delay_cfg));
-
-               /* Change the value of IAGCBW[15:8]
-                  of inner AGC loop filter bandwith */
-               i2c_writebytes(state, state->config->demod_address,
-                              agc_loop_cfg, sizeof(agc_loop_cfg));
-
+               i2c_write_demod_bytes(state, top_ctrl_cfg,
+                                                         sizeof(top_ctrl_cfg));
                 state->config->set_ts_params(fe, 0);
                 state->current_modulation = param->u.vsb.modulation;
         }
  
         /* Change only if we are actually changing the channel */
         if (state->current_frequency != param->frequency) {
-               u8 buf[5];
-               struct i2c_msg msg = { .flags = 0, .buf = &buf[1], .len = 4 };
-               int err;
-
-               state->config->pll_set(fe, param, buf);
-               msg.addr = buf[0];
-
-               dprintk("%s: tuner at 0x%02x bytes: 0x%02x 0x%02x "
-                       "0x%02x 0x%02x\n", __FUNCTION__,
-                       buf[0],buf[1],buf[2],buf[3],buf[4]);
-               if ((err = i2c_transfer(state->i2c, &msg, 1)) != 1) {
-                       printk(KERN_WARNING "lgdt330x: %s error (addr %02x <- %02x, err = %i)\n", __FUNCTION__, buf[0], buf[1], err);
-                       if (err < 0)
-                               return err;
-                       else
-                               return -EREMOTEIO;
-               }
-#if 0
-               /* Check the status of the tuner pll */
-               i2c_readbytes(state, buf[0], &buf[1], 1);
-               dprintk("%s: tuner status byte = 0x%02x\n", __FUNCTION__, buf[1]);
-#endif
-               /* Update current frequency */
+               /* Tune to the new frequency */
+               state->config->pll_set(fe, param);
+               /* Keep track of the new frequency */
                 state->current_frequency = param->frequency;
         }
         lgdt330x_SwReset(state);
@@ -328,21 +402,15 @@ static int lgdt330x_get_frontend(struct dvb_frontend* fe,
         return 0;
  }
  
-static int lgdt330x_read_status(struct dvb_frontend* fe, fe_status_t* status)
+static int lgdt3302_read_status(struct dvb_frontend* fe, fe_status_t* status)
  {
-       struct lgdt330x_state* state = (struct lgdt330x_state*) fe->demodulator_priv;
+       struct lgdt330x_state* state = fe->demodulator_priv;
         u8 buf[3];
  
         *status = 0; /* Reset status result */
  
-       /*
-        * You must set the Mask bits to 1 in the IRQ_MASK in order
-        * to see that status bit in the IRQ_STATUS register.
-        * This is done in SwReset();
-        */
-
         /* AGC status register */
-       i2c_selectreadbytes(state, AGC_STATUS, buf, 1);
+       i2c_read_demod_bytes(state, AGC_STATUS, buf, 1);
         dprintk("%s: AGC_STATUS = 0x%02x\n", __FUNCTION__, buf[0]);
         if ((buf[0] & 0x0c) == 0x8){
                 /* Test signal does not exist flag */
@@ -353,16 +421,15 @@ static int lgdt330x_read_status(struct dvb_frontend* fe, fe_status_t* status)
                 return 0;
         }
  
+       /*
+        * You must set the Mask bits to 1 in the IRQ_MASK in order
+        * to see that status bit in the IRQ_STATUS register.
+        * This is done in SwReset();
+        */
         /* signal status */
-       i2c_selectreadbytes(state, TOP_CONTROL, buf, sizeof(buf));
+       i2c_read_demod_bytes(state, TOP_CONTROL, buf, sizeof(buf));
         dprintk("%s: TOP_CONTROL = 0x%02x, IRO_MASK = 0x%02x, IRQ_STATUS = 0x%02x\n", __FUNCTION__, buf[0], buf[1], buf[2]);
  
-#if 0
-       /* Alternative method to check for a signal */
-       /* using the SNR good/bad interrupts.   */
-       if ((buf[2] & 0x30) == 0x10)
-               *status |= FE_HAS_SIGNAL;
-#endif
  
         /* sync status */
         if ((buf[2] & 0x03) == 0x01) {
@@ -376,7 +443,7 @@ static int lgdt330x_read_status(struct dvb_frontend* fe, fe_status_t* status)
         }
  
         /* Carrier Recovery Lock Status Register */
-       i2c_selectreadbytes(state, CARRIER_LOCK, buf, 1);
+       i2c_read_demod_bytes(state, CARRIER_LOCK, buf, 1);
         dprintk("%s: CARRIER_LOCK = 0x%02x\n", __FUNCTION__, buf[0]);
         switch (state->current_modulation) {
         case QAM_256:
@@ -396,13 +463,75 @@ static int lgdt330x_read_status(struct dvb_frontend* fe, fe_status_t* status)
         return 0;
  }
  
+static int lgdt3303_read_status(struct dvb_frontend* fe, fe_status_t* status)
+{
+       struct lgdt330x_state* state = fe->demodulator_priv;
+       int err;
+       u8 buf[3];
+
+       *status = 0; /* Reset status result */
+
+       /* lgdt3303 AGC status register */
+       err = i2c_read_demod_bytes(state, 0x58, buf, 1);
+       if (err < 0)
+               return err;
+
+       dprintk("%s: AGC_STATUS = 0x%02x\n", __FUNCTION__, buf[0]);
+       if ((buf[0] & 0x21) == 0x01){
+               /* Test input signal does not exist flag */
+               /* as well as the AGC lock flag.   */
+               *status |= FE_HAS_SIGNAL;
+       } else {
+               /* Without a signal all other status bits are meaningless */
+               return 0;
+       }
+
+       /* Carrier Recovery Lock Status Register */
+       i2c_read_demod_bytes(state, CARRIER_LOCK, buf, 1);
+       dprintk("%s: CARRIER_LOCK = 0x%02x\n", __FUNCTION__, buf[0]);
+       switch (state->current_modulation) {
+       case QAM_256:
+       case QAM_64:
+               /* Need to undestand why there are 3 lock levels here */
+               if ((buf[0] & 0x07) == 0x07)
+                       *status |= FE_HAS_CARRIER;
+               else
+                       break;
+               i2c_read_demod_bytes(state, 0x8a, buf, 1);
+               if ((buf[0] & 0x04) == 0x04)
+                       *status |= FE_HAS_SYNC;
+               if ((buf[0] & 0x01) == 0x01)
+                       *status |= FE_HAS_LOCK;
+               if ((buf[0] & 0x08) == 0x08)
+                       *status |= FE_HAS_VITERBI;
+               break;
+       case VSB_8:
+               if ((buf[0] & 0x80) == 0x80)
+                       *status |= FE_HAS_CARRIER;
+               else
+                       break;
+               i2c_read_demod_bytes(state, 0x38, buf, 1);
+               if ((buf[0] & 0x02) == 0x00)
+                       *status |= FE_HAS_SYNC;
+               if ((buf[0] & 0x01) == 0x01) {
+                       *status |= FE_HAS_LOCK;
+                       *status |= FE_HAS_VITERBI;
+               }
+               break;
+       default:
+               printk("KERN_WARNING lgdt330x: %s: Modulation set to unsupported value\n", __FUNCTION__);
+       }
+       return 0;
+}
+
  static int lgdt330x_read_signal_strength(struct dvb_frontend* fe, u16* strength)
  {
         /* not directly available. */
+       *strength = 0;
         return 0;
  }
  
-static int lgdt330x_read_snr(struct dvb_frontend* fe, u16* snr)
+static int lgdt3302_read_snr(struct dvb_frontend* fe, u16* snr)
  {
  #ifdef SNR_IN_DB
         /*
@@ -451,7 +580,7 @@ static int lgdt330x_read_snr(struct dvb_frontend* fe, u16* snr)
                   91,    115,    144,    182,    229,    288, 362,   456,   574,   722,
                   909,   1144,   1440,   1813,   2282,   2873, 3617,  4553,  5732,  7216,
                   9084,  11436,  14396,  18124,  22817,  28724,  36161, 45524, 57312, 72151,
-                 90833, 114351, 143960, 181235, 228161, 0x040000
+                 90833, 114351, 143960, 181235, 228161, 0x080000
                 };
  
         static u8 buf[5];/* read data buffer */
@@ -459,8 +588,8 @@ static int lgdt330x_read_snr(struct dvb_frontend* fe, u16* snr)
         static u32 snr_db;  /* index into SNR_EQ[] */
         struct lgdt330x_state* state = (struct lgdt330x_state*) fe->demodulator_priv;
  
-       /* read both equalizer and pase tracker noise data */
-       i2c_selectreadbytes(state, EQPH_ERR0, buf, sizeof(buf));
+       /* read both equalizer and phase tracker noise data */
+       i2c_read_demod_bytes(state, EQPH_ERR0, buf, sizeof(buf));
  
         if (state->current_modulation == VSB_8) {
                 /* Equalizer Mean-Square Error Register for VSB */
@@ -496,19 +625,20 @@ static int lgdt330x_read_snr(struct dvb_frontend* fe, u16* snr)
         struct lgdt330x_state* state = (struct lgdt330x_state*) fe->demodulator_priv;
  
         /* read both equalizer and pase tracker noise data */
-       i2c_selectreadbytes(state, EQPH_ERR0, buf, sizeof(buf));
+       i2c_read_demod_bytes(state, EQPH_ERR0, buf, sizeof(buf));
  
         if (state->current_modulation == VSB_8) {
-               /* Equalizer Mean-Square Error Register for VSB */
-               noise = ((buf[0] & 7) << 16) | (buf[1] << 8) | buf[2];
-       } else {
-               /* Phase Tracker Mean-Square Error Register for QAM */
+               /* Phase Tracker Mean-Square Error Register for VSB */
                 noise = ((buf[0] & 7<<3) << 13) | (buf[3] << 8) | buf[4];
+       } else {
+
+               /* Carrier Recovery Mean-Square Error for QAM */
+               i2c_read_demod_bytes(state, 0x1a, buf, 2);
+               noise = ((buf[0] & 3) << 8) | buf[1];
         }
  
         /* Small values for noise mean signal is better so invert noise */
-       /* Noise is 19 bit value so discard 3 LSB*/
-       *snr = ~noise>>3;
+       *snr = ~noise;
  #endif
  
         dprintk("%s: noise = 0x%05x, snr = %idb\n",__FUNCTION__, noise, *snr);
@@ -516,6 +646,32 @@ static int lgdt330x_read_snr(struct dvb_frontend* fe, u16* snr)
         return 0;
  }
  
+static int lgdt3303_read_snr(struct dvb_frontend* fe, u16* snr)
+{
+       /* Return the raw noise value */
+       static u8 buf[5];/* read data buffer */
+       static u32 noise;   /* noise value */
+       struct lgdt330x_state* state = (struct lgdt330x_state*) fe->demodulator_priv;
+
+       if (state->current_modulation == VSB_8) {
+
+               /* Phase Tracker Mean-Square Error Register for VSB */
+               noise = ((buf[0] & 7) << 16) | (buf[3] << 8) | buf[4];
+       } else {
+
+               /* Carrier Recovery Mean-Square Error for QAM */
+               i2c_read_demod_bytes(state, 0x1a, buf, 2);
+               noise = (buf[0] << 8) | buf[1];
+       }
+
+       /* Small values for noise mean signal is better so invert noise */
+       *snr = ~noise;
+
+       dprintk("%s: noise = 0x%05x, snr = %idb\n",__FUNCTION__, noise, *snr);
+
+       return 0;
+}
+
  static int lgdt330x_get_tune_settings(struct dvb_frontend* fe, struct dvb_frontend_tune_settings* fe_tune_settings)
  {
         /* I have no idea about this - it may not be needed */
@@ -531,7 +687,8 @@ static void lgdt330x_release(struct dvb_frontend* fe)
         kfree(state);
  }
  
-static struct dvb_frontend_ops lgdt330x_ops;
+static struct dvb_frontend_ops lgdt3302_ops;
+static struct dvb_frontend_ops lgdt3303_ops;
  
  struct dvb_frontend* lgdt330x_attach(const struct lgdt330x_config* config,
                                      struct i2c_adapter* i2c)
@@ -548,9 +705,19 @@ struct dvb_frontend* lgdt330x_attach(const struct lgdt330x_config* config,
         /* Setup the state */
         state->config = config;
         state->i2c = i2c;
-       memcpy(&state->ops, &lgdt330x_ops, sizeof(struct dvb_frontend_ops));
+       switch (config->demod_chip) {
+       case LGDT3302:
+               memcpy(&state->ops, &lgdt3302_ops, sizeof(struct dvb_frontend_ops));
+               break;
+       case LGDT3303:
+               memcpy(&state->ops, &lgdt3303_ops, sizeof(struct dvb_frontend_ops));
+               break;
+       default:
+               goto error;
+       }
+
         /* Verify communication with demod chip */
-       if (i2c_selectreadbytes(state, 2, buf, 1))
+       if (i2c_read_demod_bytes(state, 2, buf, 1))
                 goto error;
  
         state->current_frequency = -1;
@@ -568,9 +735,33 @@ error:
         return NULL;
  }
  
-static struct dvb_frontend_ops lgdt330x_ops = {
+static struct dvb_frontend_ops lgdt3302_ops = {
+       .info = {
+               .name= "LG Electronics LGDT3302 VSB/QAM Frontend",
+               .type = FE_ATSC,
+               .frequency_min= 54000000,
+               .frequency_max= 858000000,
+               .frequency_stepsize= 62500,
+               /* Symbol rate is for all VSB modes need to check QAM */
+               .symbol_rate_min    = 10762000,
+               .symbol_rate_max    = 10762000,
+               .caps = FE_CAN_QAM_64 | FE_CAN_QAM_256 | FE_CAN_8VSB
+       },
+       .init                 = lgdt330x_init,
+       .set_frontend         = lgdt330x_set_parameters,
+       .get_frontend         = lgdt330x_get_frontend,
+       .get_tune_settings    = lgdt330x_get_tune_settings,
+       .read_status          = lgdt3302_read_status,
+       .read_ber             = lgdt330x_read_ber,
+       .read_signal_strength = lgdt330x_read_signal_strength,
+       .read_snr             = lgdt3302_read_snr,
+       .read_ucblocks        = lgdt330x_read_ucblocks,
+       .release              = lgdt330x_release,
+};
+
+static struct dvb_frontend_ops lgdt3303_ops = {
         .info = {
-               .name= "LG Electronics lgdt330x VSB/QAM Frontend",
+               .name= "LG Electronics LGDT3303 VSB/QAM Frontend",
                 .type = FE_ATSC,
                 .frequency_min= 54000000,
                 .frequency_max= 858000000,
@@ -584,15 +775,15 @@ static struct dvb_frontend_ops lgdt330x_ops = {
         .set_frontend         = lgdt330x_set_parameters,
         .get_frontend         = lgdt330x_get_frontend,
         .get_tune_settings    = lgdt330x_get_tune_settings,
-       .read_status          = lgdt330x_read_status,
+       .read_status          = lgdt3303_read_status,
         .read_ber             = lgdt330x_read_ber,
         .read_signal_strength = lgdt330x_read_signal_strength,
-       .read_snr             = lgdt330x_read_snr,
+       .read_snr             = lgdt3303_read_snr,
         .read_ucblocks        = lgdt330x_read_ucblocks,
         .release              = lgdt330x_release,
  };
  
-MODULE_DESCRIPTION("lgdt330x [DViCO FusionHDTV 3 Gold] (ATSC 8VSB & ITU-T J.83 AnnexB 64/256 QAM) Demodulator Driver");
+MODULE_DESCRIPTION("LGDT330X (ATSC 8VSB & ITU-T J.83 AnnexB 64/256 QAM) Demodulator Driver");
  MODULE_AUTHOR("Wilson Michaels");
  MODULE_LICENSE("GPL");
  
@@ -601,6 +792,5 @@ EXPORT_SYMBOL(lgdt330x_attach);
  /*
   * Local variables:
   * c-basic-offset: 8
- * compile-command: "make DVB=1"
   * End:
   */
diff --git a/drivers/media/dvb/frontends/lgdt330x.h b/drivers/media/dvb/frontends/lgdt330x.h

index 04986f8e7565a1d56a7e8405f456f191e316fc1a..e209ba1e47c5cb3bc9ecff6052e93cfa1eb4e158 100644 (file)
--- a/drivers/media/dvb/frontends/lgdt330x.h
+++ b/drivers/media/dvb/frontends/lgdt330x.h
@@ -1,5 +1,5 @@
  /*
- *    Support for LGDT3302 & LGDT3303 (DViCO FustionHDTV Gold) - VSB/QAM
+ *    Support for LGDT3302 and LGDT3303 - VSB/QAM
   *
   *    Copyright (C) 2005 Wilson Michaels <wilsonmichaels@earthlink.net>
   *
@@ -24,14 +24,26 @@
  
  #include <linux/dvb/frontend.h>
  
+typedef enum lg_chip_t {
+               UNDEFINED,
+               LGDT3302,
+               LGDT3303
+}lg_chip_type;
+
  struct lgdt330x_config
  {
         /* The demodulator's i2c address */
         u8 demod_address;
  
+       /* LG demodulator chip LGDT3302 or LGDT3303 */
+       lg_chip_type demod_chip;
+
+       /* MPEG hardware interface - 0:parallel 1:serial */
+       int serial_mpeg;
+
         /* PLL interface */
         int (*pll_rf_set) (struct dvb_frontend* fe, int index);
-       int (*pll_set)(struct dvb_frontend* fe, struct dvb_frontend_parameters* params, u8* pll_address);
+       int (*pll_set)(struct dvb_frontend* fe, struct dvb_frontend_parameters* params);
  
         /* Need to set device param for start_dma */
         int (*set_ts_params)(struct dvb_frontend* fe, int is_punctured);
diff --git a/drivers/media/dvb/frontends/lgdt330x_priv.h b/drivers/media/dvb/frontends/lgdt330x_priv.h

index 4143ce8f1a954608f257b06c10e07ecb208d15ee..59b7c5b9012d6e89e4ac688eb3bfb5e3783a2d55 100644 (file)
--- a/drivers/media/dvb/frontends/lgdt330x_priv.h
+++ b/drivers/media/dvb/frontends/lgdt330x_priv.h
@@ -1,5 +1,5 @@
  /*
- *    Support for LGDT3302 & LGDT3303 (DViCO FustionHDTV Gold) - VSB/QAM
+ *    Support for LGDT3302 and LGDT3303 - VSB/QAM
   *
   *    Copyright (C) 2005 Wilson Michaels <wilsonmichaels@earthlink.net>
   *
@@ -57,8 +57,10 @@ enum I2C_REG {
         PH_ERR1= 0x4a,
         PH_ERR2= 0x4b,
         DEMUX_CONTROL= 0x66,
-       PACKET_ERR_COUNTER1= 0x6a,
-       PACKET_ERR_COUNTER2= 0x6b,
+       LGDT3302_PACKET_ERR_COUNTER1= 0x6a,
+       LGDT3302_PACKET_ERR_COUNTER2= 0x6b,
+       LGDT3303_PACKET_ERR_COUNTER1= 0x8b,
+       LGDT3303_PACKET_ERR_COUNTER2= 0x8c,
  };
  
  #endif /* _LGDT330X_PRIV_ */
diff --git a/drivers/media/dvb/frontends/tda80xx.c b/drivers/media/dvb/frontends/tda80xx.c

index 88e125079ca15579acf8eea92cf06d480e79d003..d1cabb6a0a139247ff0dc0f0190707edd8446b69 100644 (file)
--- a/drivers/media/dvb/frontends/tda80xx.c
+++ b/drivers/media/dvb/frontends/tda80xx.c
@@ -30,6 +30,7 @@
  #include <linux/kernel.h>
  #include <linux/module.h>
  #include <linux/slab.h>
+#include <asm/irq.h>
  #include <asm/div64.h>
  
  #include "dvb_frontend.h"
diff --git a/drivers/media/video/Kconfig b/drivers/media/video/Kconfig

index ac81e5e01a9a7b135a6aef6f155492c08333d4b2..3f5742396096dd61cbc49648c63d763bb530b84e 100644 (file)
--- a/drivers/media/video/Kconfig
+++ b/drivers/media/video/Kconfig
@@ -356,7 +356,7 @@ config VIDEO_M32R_AR
  
  config VIDEO_M32R_AR_M64278
         tristate "Use Colour AR module M64278(VGA)"
-       depends on VIDEO_M32R_AR
+       depends on VIDEO_M32R_AR && PLAT_M32700UT
         ---help---
           Say Y here to use the Renesas M64278E-800 camera module,
           which supports VGA(640x480 pixcels) size of images.
diff --git a/drivers/media/video/bttv-cards.c b/drivers/media/video/bttv-cards.c

index 6c52fd0bb7df63ad644bfee1cd5b8e54149072d3..a97b9b958ed6d4ca3858519dec5750c6bc8198b0 100644 (file)
--- a/drivers/media/video/bttv-cards.c
+++ b/drivers/media/video/bttv-cards.c
@@ -95,7 +95,7 @@ static int __devinit pvr_boot(struct bttv *btv);
  static unsigned int triton1=0;
  static unsigned int vsfx=0;
  static unsigned int latency = UNSET;
-static unsigned int no_overlay=-1;
+int no_overlay=-1;
  
  static unsigned int card[BTTV_MAX]   = { [ 0 ... (BTTV_MAX-1) ] = UNSET };
  static unsigned int pll[BTTV_MAX]    = { [ 0 ... (BTTV_MAX-1) ] = UNSET };
@@ -4296,9 +4296,11 @@ void __devinit bttv_check_chipset(void)
                 printk(KERN_INFO "bttv: Host bridge needs VSFX enabled.\n");
         if (pcipci_fail) {
                 printk(KERN_WARNING "bttv: BT848 and your chipset may not work together.\n");
-               if (UNSET == no_overlay) {
-                       printk(KERN_WARNING "bttv: going to disable overlay.\n");
+               if (!no_overlay) {
+                       printk(KERN_WARNING "bttv: overlay will be disabled.\n");
                         no_overlay = 1;
+               } else {
+                       printk(KERN_WARNING "bttv: overlay forced. Use this option at your own risk.\n");
                 }
         }
         if (UNSET != latency)
diff --git a/drivers/media/video/bttv-driver.c b/drivers/media/video/bttv-driver.c

index 51a0f6d68e73d0dc74599ff06940bc0b81a963f2..eee9322ce21b06efb02a6bf51e2b662231983189 100644 (file)
--- a/drivers/media/video/bttv-driver.c
+++ b/drivers/media/video/bttv-driver.c
@@ -1,5 +1,5 @@
  /*
-    $Id: bttv-driver.c,v 1.42 2005/07/05 17:37:35 nsh Exp $
+    $Id: bttv-driver.c,v 1.52 2005/08/04 00:55:16 mchehab Exp $
  
      bttv - Bt848 frame grabber driver
  
@@ -80,6 +80,7 @@ static unsigned int irq_iswitch = 0;
  static unsigned int uv_ratio    = 50;
  static unsigned int full_luma_range = 0;
  static unsigned int coring      = 0;
+extern int no_overlay;
  
  /* API features (turn on/off stuff for testing) */
  static unsigned int v4l2        = 1;
@@ -2151,6 +2152,10 @@ static int bttv_s_fmt(struct bttv_fh *fh, struct bttv *btv,
                 return 0;
         }
         case V4L2_BUF_TYPE_VIDEO_OVERLAY:
+               if (no_overlay > 0) {
+                       printk ("V4L2_BUF_TYPE_VIDEO_OVERLAY: no_overlay\n");
+                       return -EINVAL;
+               }
                 return setup_window(fh, btv, &f->fmt.win, 1);
         case V4L2_BUF_TYPE_VBI_CAPTURE:
                 retval = bttv_switch_type(fh,f->type);
@@ -2224,9 +2229,11 @@ static int bttv_do_ioctl(struct inode *inode, struct file *file,
                         /* others */
                         cap->type = VID_TYPE_CAPTURE|
                                 VID_TYPE_TUNER|
-                               VID_TYPE_OVERLAY|
                                 VID_TYPE_CLIPPING|
                                 VID_TYPE_SCALES;
+                       if (no_overlay <= 0)
+                               cap->type |= VID_TYPE_OVERLAY;
+
                         cap->maxwidth  = bttv_tvnorms[btv->tvnorm].swidth;
                         cap->maxheight = bttv_tvnorms[btv->tvnorm].sheight;
                         cap->minwidth  = 48;
@@ -2302,6 +2309,11 @@ static int bttv_do_ioctl(struct inode *inode, struct file *file,
                 struct video_window *win = arg;
                 struct v4l2_window w2;
  
+               if (no_overlay > 0) {
+                       printk ("VIDIOCSWIN: no_overlay\n");
+                       return -EINVAL;
+               }
+
                 w2.field = V4L2_FIELD_ANY;
                 w2.w.left    = win->x;
                 w2.w.top     = win->y;
@@ -2577,10 +2589,12 @@ static int bttv_do_ioctl(struct inode *inode, struct file *file,
                 cap->version = BTTV_VERSION_CODE;
                 cap->capabilities =
                         V4L2_CAP_VIDEO_CAPTURE |
-                       V4L2_CAP_VIDEO_OVERLAY |
                         V4L2_CAP_VBI_CAPTURE |
                         V4L2_CAP_READWRITE |
                         V4L2_CAP_STREAMING;
+               if (no_overlay <= 0)
+                       cap->capabilities |= V4L2_CAP_VIDEO_OVERLAY;
+
                 if (bttv_tvcards[btv->c.type].tuner != UNSET &&
                     bttv_tvcards[btv->c.type].tuner != TUNER_ABSENT)
                         cap->capabilities |= V4L2_CAP_TUNER;
@@ -3076,7 +3090,7 @@ static struct file_operations bttv_fops =
  static struct video_device bttv_video_template =
  {
         .name     = "UNSET",
-       .type     = VID_TYPE_CAPTURE|VID_TYPE_TUNER|VID_TYPE_OVERLAY|
+       .type     = VID_TYPE_CAPTURE|VID_TYPE_TUNER|
                     VID_TYPE_CLIPPING|VID_TYPE_SCALES,
         .hardware = VID_HARDWARE_BT848,
         .fops     = &bttv_fops,
@@ -3756,6 +3770,12 @@ static void bttv_unregister_video(struct bttv *btv)
  /* register video4linux devices */
  static int __devinit bttv_register_video(struct bttv *btv)
  {
+       if (no_overlay <= 0) {
+               bttv_video_template.type |= VID_TYPE_OVERLAY;
+       } else {
+               printk("bttv: Overlay support disabled.\n");
+       }
+
         /* video */
         btv->video_dev = vdev_init(btv, &bttv_video_template, "video");
          if (NULL == btv->video_dev)
@@ -3869,11 +3889,6 @@ static int __devinit bttv_probe(struct pci_dev *dev,
          pci_set_master(dev);
         pci_set_command(dev);
         pci_set_drvdata(dev,btv);
-       if (!pci_dma_supported(dev,0xffffffff)) {
-               printk("bttv%d: Oops: no 32bit PCI DMA ???\n", btv->c.nr);
-               result = -EIO;
-               goto fail1;
-       }
  
          pci_read_config_byte(dev, PCI_CLASS_REVISION, &btv->revision);
          pci_read_config_byte(dev, PCI_LATENCY_TIMER, &lat);
diff --git a/drivers/media/video/bttv.h b/drivers/media/video/bttv.h

index 191eaf1714ba52294d1398f87af3c167875dcbfe..f2af9e1454f08f8039b83587a6acd843975d9584 100644 (file)
--- a/drivers/media/video/bttv.h
+++ b/drivers/media/video/bttv.h
@@ -1,5 +1,5 @@
  /*
- * $Id: bttv.h,v 1.18 2005/05/24 23:41:42 nsh Exp $
+ * $Id: bttv.h,v 1.22 2005/07/28 18:41:21 mchehab Exp $
   *
   *  bttv - Bt848 frame grabber driver
   *
@@ -135,7 +135,9 @@
  #define BTTV_DVICO_DVBT_LITE  0x80
  #define BTTV_TIBET_CS16  0x83
  #define BTTV_KODICOM_4400R  0x84
-#define BTTV_ADLINK_RTV24   0x85
+#define BTTV_ADLINK_RTV24   0x86
+#define BTTV_DVICO_FUSIONHDTV_5_LITE 0x87
+#define BTTV_ACORP_Y878F   0x88
  
  /* i2c address list */
  #define I2C_TSA5522        0xc2
diff --git a/drivers/media/video/bttvp.h b/drivers/media/video/bttvp.h

index f3293e4a15ad3f6137a06d03120dc76d73eb68dc..aab094bc243dd17b383e1d11c7e8c6db19bbcf4a 100644 (file)
--- a/drivers/media/video/bttvp.h
+++ b/drivers/media/video/bttvp.h
@@ -1,5 +1,5 @@
  /*
-    $Id: bttvp.h,v 1.19 2005/06/16 21:38:45 nsh Exp $
+    $Id: bttvp.h,v 1.21 2005/07/15 21:44:14 mchehab Exp $
  
      bttv - Bt848 frame grabber driver
  
@@ -27,7 +27,7 @@
  #define _BTTVP_H_
  
  #include <linux/version.h>
-#define BTTV_VERSION_CODE KERNEL_VERSION(0,9,15)
+#define BTTV_VERSION_CODE KERNEL_VERSION(0,9,16)
  
  #include <linux/types.h>
  #include <linux/wait.h>
diff --git a/drivers/media/video/cx88/cx88-cards.c b/drivers/media/video/cx88/cx88-cards.c

index 3d0c784b376f04beb525222dedc7e076b18ff639..ebf02a7f81e801eaa9497fd91689630ecb4c48f9 100644 (file)
--- a/drivers/media/video/cx88/cx88-cards.c
+++ b/drivers/media/video/cx88/cx88-cards.c
@@ -1,5 +1,5 @@
  /*
- * $Id: cx88-cards.c,v 1.86 2005/07/14 03:06:43 mchehab Exp $
+ * $Id: cx88-cards.c,v 1.90 2005/07/28 02:47:42 mkrufky Exp $
   *
   * device driver for Conexant 2388x based TV cards
   * card-specific stuff.
@@ -90,6 +90,9 @@ struct cx88_board cx88_boards[] = {
                 .input          = {{
                         .type   = CX88_VMUX_TELEVISION,
                         .vmux   = 0,
+               },{
+                       .type   = CX88_VMUX_SVIDEO,
+                       .vmux   = 2,
                 }},
         },
         [CX88_BOARD_PIXELVIEW] = {
@@ -496,6 +499,9 @@ struct cx88_board cx88_boards[] = {
                 .input          = {{
                          .type   = CX88_VMUX_DVB,
                          .vmux   = 0,
+               },{
+                       .type   = CX88_VMUX_SVIDEO,
+                       .vmux   = 2,
                  }},
                 .dvb            = 1,
         },
@@ -753,6 +759,27 @@ struct cx88_board cx88_boards[] = {
                 }},
                 .dvb            = 1,
         },
+       [CX88_BOARD_DVICO_FUSIONHDTV_5_GOLD] = {
+               .name           = "DViCO FusionHDTV 5 Gold",
+               .tuner_type     = TUNER_LG_TDVS_H062F,
+               .radio_type     = UNSET,
+               .tuner_addr     = ADDR_UNSET,
+               .radio_addr     = ADDR_UNSET,
+               /*  See DViCO FusionHDTV 3 Gold-Q for GPIO documentation.  */
+               .input          = {{
+                        .type   = CX88_VMUX_TELEVISION,
+                        .vmux   = 0,
+                        .gpio0  = 0x0f0d,
+                },{
+                        .type   = CX88_VMUX_COMPOSITE1,
+                        .vmux   = 1,
+                        .gpio0  = 0x0f00,
+                },{
+                        .type   = CX88_VMUX_SVIDEO,
+                        .vmux   = 2,
+                        .gpio0  = 0x0f00,
+                }},
+       },
  };
  const unsigned int cx88_bcount = ARRAY_SIZE(cx88_boards);
  
@@ -880,6 +907,10 @@ struct cx88_subid cx88_subids[] = {
                 .subvendor = 0x153b,
                 .subdevice = 0x1166,
                 .card      = CX88_BOARD_TERRATEC_CINERGY_1400_DVB_T1,
+       },{
+               .subvendor = 0x18ac,
+               .subdevice = 0xd500,
+               .card      = CX88_BOARD_DVICO_FUSIONHDTV_5_GOLD,
         },
  };
  const unsigned int cx88_idcount = ARRAY_SIZE(cx88_subids);
diff --git a/drivers/media/video/cx88/cx88-dvb.c b/drivers/media/video/cx88/cx88-dvb.c

index ef0e9a85c3598fe32408cb7f0fbf309f8a4f473c..78d223257a6888650238e1a660bdf8d27be321d6 100644 (file)
--- a/drivers/media/video/cx88/cx88-dvb.c
+++ b/drivers/media/video/cx88/cx88-dvb.c
@@ -1,5 +1,5 @@
  /*
- * $Id: cx88-dvb.c,v 1.54 2005/07/25 05:13:50 mkrufky Exp $
+ * $Id: cx88-dvb.c,v 1.58 2005/08/07 09:24:08 mkrufky Exp $
   *
   * device driver for Conexant 2388x based TV cards
   * MPEG Transport Stream (DVB) routines
@@ -208,14 +208,26 @@ static struct or51132_config pchdtv_hd3000 = {
  
  #ifdef HAVE_LGDT330X
  static int lgdt330x_pll_set(struct dvb_frontend* fe,
-                           struct dvb_frontend_parameters* params,
-                           u8* pllbuf)
+                           struct dvb_frontend_parameters* params)
  {
         struct cx8802_dev *dev= fe->dvb->priv;
+       u8 buf[4];
+       struct i2c_msg msg =
+               { .addr = dev->core->pll_addr, .flags = 0, .buf = buf, .len = 4 };
+       int err;
  
-       pllbuf[0] = dev->core->pll_addr;
-       dvb_pll_configure(dev->core->pll_desc, &pllbuf[1],
-                         params->frequency, 0);
+       dvb_pll_configure(dev->core->pll_desc, buf, params->frequency, 0);
+       dprintk(1, "%s: tuner at 0x%02x bytes: 0x%02x 0x%02x 0x%02x 0x%02x\n",
+                       __FUNCTION__, msg.addr, buf[0],buf[1],buf[2],buf[3]);
+       if ((err = i2c_transfer(&dev->core->i2c_adap, &msg, 1)) != 1) {
+               printk(KERN_WARNING "cx88-dvb: %s error "
+                          "(addr %02x <- %02x, err = %i)\n",
+                          __FUNCTION__, buf[0], buf[1], err);
+               if (err < 0)
+                       return err;
+               else
+                       return -EREMOTEIO;
+       }
         return 0;
  }
  
@@ -244,6 +256,8 @@ static int lgdt330x_set_ts_param(struct dvb_frontend* fe, int is_punctured)
  
  static struct lgdt330x_config fusionhdtv_3_gold = {
         .demod_address    = 0x0e,
+       .demod_chip       = LGDT3302,
+       .serial_mpeg      = 0x04, /* TPSERIAL for 3302 in TOP_CONTROL */
         .pll_set          = lgdt330x_pll_set,
         .set_ts_params    = lgdt330x_set_ts_param,
  };
diff --git a/drivers/media/video/cx88/cx88-video.c b/drivers/media/video/cx88/cx88-video.c

index 5588a3aeecb4f605953f442526714cc116e3d30d..5f58c103198afb02047d90012dcdd158fa58ddcd 100644 (file)
--- a/drivers/media/video/cx88/cx88-video.c
+++ b/drivers/media/video/cx88/cx88-video.c
@@ -1,5 +1,5 @@
  /*
- * $Id: cx88-video.c,v 1.80 2005/07/13 08:49:08 mchehab Exp $
+ * $Id: cx88-video.c,v 1.82 2005/07/22 05:13:34 mkrufky Exp $
   *
   * device driver for Conexant 2388x based TV cards
   * video4linux video interface
@@ -758,10 +758,10 @@ static int video_open(struct inode *inode, struct file *file)
                 struct cx88_core *core = dev->core;
                 int board = core->board;
                 dprintk(1,"video_open: setting radio device\n");
+               cx_write(MO_GP3_IO, cx88_boards[board].radio.gpio3);
                 cx_write(MO_GP0_IO, cx88_boards[board].radio.gpio0);
                 cx_write(MO_GP1_IO, cx88_boards[board].radio.gpio1);
                 cx_write(MO_GP2_IO, cx88_boards[board].radio.gpio2);
-               cx_write(MO_GP3_IO, cx88_boards[board].radio.gpio3);
                 dev->core->tvaudio = WW_FM;
                 cx88_set_tvaudio(core);
                 cx88_set_stereo(core,V4L2_TUNER_MODE_STEREO,1);
diff --git a/drivers/media/video/cx88/cx88.h b/drivers/media/video/cx88/cx88.h

index b008f7db6dfdef4907a720d09cbfa5b58fe41936..da65dc92787cdd6218a5a9fc34e3f611a868b5e8 100644 (file)
--- a/drivers/media/video/cx88/cx88.h
+++ b/drivers/media/video/cx88/cx88.h
@@ -1,5 +1,5 @@
  /*
- * $Id: cx88.h,v 1.69 2005/07/13 17:25:25 mchehab Exp $
+ * $Id: cx88.h,v 1.70 2005/07/24 17:44:09 mkrufky Exp $
   *
   * v4l2 device driver for cx2388x based TV cards
   *
@@ -171,6 +171,7 @@ extern struct sram_channel cx88_sram_channels[];
  #define CX88_BOARD_DVICO_FUSIONHDTV_3_GOLD_T  28
  #define CX88_BOARD_ADSTECH_DVB_T_PCI          29
  #define CX88_BOARD_TERRATEC_CINERGY_1400_DVB_T1  30
+#define CX88_BOARD_DVICO_FUSIONHDTV_5_GOLD 31
  
  enum cx88_itype {
         CX88_VMUX_COMPOSITE1 = 1,
diff --git a/drivers/media/video/msp3400.c b/drivers/media/video/msp3400.c

index 6239254db27ef79507e0e13bc807874bdc489b6f..62f1b8ddb98b8bc153602271c7d84337c6e67efe 100644 (file)
--- a/drivers/media/video/msp3400.c
+++ b/drivers/media/video/msp3400.c
@@ -741,11 +741,9 @@ static int msp34xx_sleep(struct msp3400c *msp, int timeout)
                         schedule_timeout(msecs_to_jiffies(timeout));
                 }
         }
-       if (current->flags & PF_FREEZE) {
-               refrigerator ();
-       }
  
         remove_wait_queue(&msp->wq, &wait);
+       try_to_freeze();
         return msp->restart;
  }
  
diff --git a/drivers/media/video/saa7134/saa7134-i2c.c b/drivers/media/video/saa7134/saa7134-i2c.c

index 93dd61978541d48badf4588aa7310426267d6fd2..1203b93a572c4a80da2a872a80e5a9be7c0d58cd 100644 (file)
--- a/drivers/media/video/saa7134/saa7134-i2c.c
+++ b/drivers/media/video/saa7134/saa7134-i2c.c
@@ -1,5 +1,5 @@
  /*
- * $Id: saa7134-i2c.c,v 1.19 2005/07/07 01:49:30 mkrufky Exp $
+ * $Id: saa7134-i2c.c,v 1.22 2005/07/22 04:09:41 mkrufky Exp $
   *
   * device driver for philips saa7134 based TV cards
   * i2c interface support
@@ -300,6 +300,8 @@ static int saa7134_i2c_xfer(struct i2c_adapter *i2c_adap,
         status = i2c_get_status(dev);
         if (i2c_is_error(status))
                 goto err;
+       /* ensure that the bus is idle for at least one bit slot */
+       msleep(1);
  
         d1printk("\n");
         return num;
diff --git a/drivers/media/video/saa7134/saa7134.h b/drivers/media/video/saa7134/saa7134.h

index 6836c07794fcc541f8fcb708874e12b54c2391aa..2af0cb2a731b77b551752a79430dace51c0f1816 100644 (file)
--- a/drivers/media/video/saa7134/saa7134.h
+++ b/drivers/media/video/saa7134/saa7134.h
@@ -1,5 +1,5 @@
  /*
- * $Id: saa7134.h,v 1.48 2005/07/01 08:22:24 nsh Exp $
+ * $Id: saa7134.h,v 1.49 2005/07/13 17:25:25 mchehab Exp $
   *
   * v4l2 device driver for philips saa7134 based TV cards
   *
@@ -21,7 +21,7 @@
   */
  
  #include <linux/version.h>
-#define SAA7134_VERSION_CODE KERNEL_VERSION(0,2,13)
+#define SAA7134_VERSION_CODE KERNEL_VERSION(0,2,14)
  
  #include <linux/pci.h>
  #include <linux/i2c.h>
diff --git a/drivers/media/video/tea5767.c b/drivers/media/video/tea5767.c

index 4d27ac1b7fb8f4cf700b8ae39d6e8951d4964569..cebcc1fa68d12968e69dbd9297853fd9bb6f64bf 100644 (file)
--- a/drivers/media/video/tea5767.c
+++ b/drivers/media/video/tea5767.c
@@ -2,7 +2,7 @@
   * For Philips TEA5767 FM Chip used on some TV Cards like Prolink Pixelview
   * I2C address is allways 0xC0.
   *
- * $Id: tea5767.c,v 1.21 2005/07/14 03:06:43 mchehab Exp $
+ * $Id: tea5767.c,v 1.27 2005/07/31 12:10:56 mchehab Exp $
   *
   * Copyright (c) 2005 Mauro Carvalho Chehab (mchehab@brturbo.com.br)
   * This code is placed under the terms of the GNU General Public License
@@ -15,7 +15,6 @@
  #include <linux/videodev.h>
  #include <linux/delay.h>
  #include <media/tuner.h>
-#include <media/tuner.h>
  
  #define PREFIX "TEA5767 "
  
@@ -293,16 +292,16 @@ static int tea5767_stereo(struct i2c_client *c)
  
  int tea5767_autodetection(struct i2c_client *c)
  {
-       unsigned char buffer[5] = { 0xff, 0xff, 0xff, 0xff, 0xff };
+       unsigned char buffer[7] = { 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff };
         int rc;
         struct tuner *t = i2c_get_clientdata(c);
  
-       if (5 != (rc = i2c_master_recv(c, buffer, 5))) {
+       if (7 != (rc = i2c_master_recv(c, buffer, 7))) {
                 tuner_warn("It is not a TEA5767. Received %i bytes.\n", rc);
                 return EINVAL;
         }
  
-       /* If all bytes are the same then it's a TV tuner and not a tea5767 chip. */
+       /* If all bytes are the same then it's a TV tuner and not a tea5767 */
         if (buffer[0] == buffer[1] && buffer[0] == buffer[2] &&
             buffer[0] == buffer[3] && buffer[0] == buffer[4]) {
                 tuner_warn("All bytes are equal. It is not a TEA5767\n");
@@ -318,6 +317,17 @@ int tea5767_autodetection(struct i2c_client *c)
                 tuner_warn("Chip ID is not zero. It is not a TEA5767\n");
                 return EINVAL;
         }
+       /* It seems that tea5767 returns 0xff after the 5th byte */
+       if ((buffer[5] != 0xff) || (buffer[6] != 0xff)) {
+               tuner_warn("Returned more than 5 bytes. It is not a TEA5767\n");
+               return EINVAL;
+       }
+
+       /* It seems that tea5767 returns 0xff after the 5th byte */
+       if ((buffer[5] != 0xff) || (buffer[6] != 0xff)) {
+               tuner_warn("Returned more than 5 bytes. It is not a TEA5767\n");
+               return EINVAL;
+       }
  
         tuner_warn("TEA5767 detected.\n");
         return 0;
@@ -327,10 +337,8 @@ int tea5767_tuner_init(struct i2c_client *c)
  {
         struct tuner *t = i2c_get_clientdata(c);
  
-       if (tea5767_autodetection(c) == EINVAL)
-               return EINVAL;
-
-       tuner_info("type set to %d (%s)\n", t->type, "Philips TEA5767HN FM Radio");
+       tuner_info("type set to %d (%s)\n", t->type,
+                       "Philips TEA5767HN FM Radio");
         strlcpy(c->name, "tea5767", sizeof(c->name));
  
         t->tv_freq = set_tv_freq;
diff --git a/drivers/media/video/tuner-core.c b/drivers/media/video/tuner-core.c

index b25a9c08ac022fe7adaf21670781770a2d9af46a..f0a579827a2416d0c132320ac8842118a220ae45 100644 (file)
--- a/drivers/media/video/tuner-core.c
+++ b/drivers/media/video/tuner-core.c
@@ -1,5 +1,5 @@
  /*
- * $Id: tuner-core.c,v 1.58 2005/07/14 03:06:43 mchehab Exp $
+ * $Id: tuner-core.c,v 1.63 2005/07/28 18:19:55 mchehab Exp $
   *
   * i2c tv tuner chip device driver
   * core core, i.e. kernel interfaces, registering and so on
@@ -23,6 +23,8 @@
  #include <media/tuner.h>
  #include <media/audiochip.h>
  
+#include "msp3400.h"
+
  #define UNSET (-1U)
  
  /* standard i2c insmod options */
@@ -42,6 +44,9 @@ module_param(addr, int, 0444);
  static unsigned int no_autodetect = 0;
  module_param(no_autodetect, int, 0444);
  
+static unsigned int show_i2c = 0;
+module_param(show_i2c, int, 0444);
+
  /* insmod options used at runtime => read/write */
  unsigned int tuner_debug = 0;
  module_param(tuner_debug, int, 0644);
@@ -320,6 +325,17 @@ static int tuner_attach(struct i2c_adapter *adap, int addr, int kind)
  
         tuner_info("chip found @ 0x%x (%s)\n", addr << 1, adap->name);
  
+       if (show_i2c) {
+               unsigned char buffer[16];
+               int i,rc;
+
+               memset(buffer, 0, sizeof(buffer));
+               rc = i2c_master_recv(&t->i2c, buffer, sizeof(buffer));
+               printk("tuner-%04x I2C RECV = ",addr);
+               for (i=0;i<rc;i++)
+                       printk("%02x ",buffer[i]);
+               printk("\n");
+       }
         /* TEA5767 autodetection code - only for addr = 0xc0 */
         if (!no_autodetect) {
                 if (addr == 0x60) {
@@ -451,6 +467,17 @@ static int tuner_command(struct i2c_client *client, unsigned int cmd, void *arg)
                         break;
                 }
                 break;
+       case VIDIOCSAUDIO:
+               if (check_mode(t, "VIDIOCSAUDIO") == EINVAL)
+                       return 0;
+               if (check_v4l2(t) == EINVAL)
+                       return 0;
+
+               /* Should be implemented, since bttv calls it */
+               tuner_dbg("VIDIOCSAUDIO not implemented.\n");
+
+               break;
+       case MSP_SET_MATRIX:
         case TDA9887_SET_CONFIG:
                 break;
         /* --- v4l ioctls --- */
diff --git a/drivers/media/video/tuner-simple.c b/drivers/media/video/tuner-simple.c

index a3f8e83f53147fe3dd06f694837576f1a4a212d6..de0c93aeb75d8411835e8c9118f8eb7005002564 100644 (file)
--- a/drivers/media/video/tuner-simple.c
+++ b/drivers/media/video/tuner-simple.c
@@ -1,5 +1,5 @@
  /*
- * $Id: tuner-simple.c,v 1.39 2005/07/07 01:49:30 mkrufky Exp $
+ * $Id: tuner-simple.c,v 1.43 2005/07/28 18:41:21 mchehab Exp $
   *
   * i2c tv tuner chip device driver
   * controls all those simple 4-control-bytes style tuners.
@@ -245,6 +245,12 @@ static struct tunertype tuners[] = {
            /* see tea5767.c for details */},
         { "Philips FMD1216ME MK3 Hybrid Tuner", Philips, PAL,
           16*160.00,16*442.00,0x51,0x52,0x54,0x86,623 },
+
+       { "LG TDVS-H062F/TUA6034", LGINNOTEK, NTSC,
+         16*160.00,16*455.00,0x01,0x02,0x04,0x8e,732},
+
+       { "Ymec TVF66T5-B/DFF", Philips, PAL,
+          16*160.25,16*464.25,0x01,0x02,0x08,0x8e,623},
  };
  
  unsigned const int tuner_count = ARRAY_SIZE(tuners);
diff --git a/drivers/media/video/tveeprom.c b/drivers/media/video/tveeprom.c

index 62b03ef091e0dc985b0c5f9cbce5d8f9a35c805c..127ec38ebd60822d4b48954a6331705c8f3ffd3e 100644 (file)
--- a/drivers/media/video/tveeprom.c
+++ b/drivers/media/video/tveeprom.c
@@ -189,7 +189,7 @@ hauppauge_tuner[] =
         { TUNER_ABSENT,        "Philips FQ1236 MK3"},
         { TUNER_ABSENT,        "Samsung TCPN 2121P30A"},
         { TUNER_ABSENT,        "Samsung TCPE 4121P30A"},
-       { TUNER_ABSENT,        "TCL MFPE05 2"},
+       { TUNER_PHILIPS_FM1216ME_MK3, "TCL MFPE05 2"},
         /* 90-99 */
         { TUNER_ABSENT,        "LG TALN H202T"},
         { TUNER_PHILIPS_FQ1216AME_MK4, "Philips FQ1216AME MK4"},
diff --git a/drivers/message/i2o/Kconfig b/drivers/message/i2o/Kconfig

index 06e8eb19a05c5df2fab8386cd9a68a8ab242e394..43a942a29c2e0fec852c217d3e9867deb8d4d8e4 100644 (file)
--- a/drivers/message/i2o/Kconfig
+++ b/drivers/message/i2o/Kconfig
@@ -53,6 +53,9 @@ config I2O_CONFIG
           To compile this support as a module, choose M here: the
           module will be called i2o_config.
  
+         Note: If you want to use the new API you have to download the
+         i2o_config patch from http://i2o.shadowconnect.com/
+
  config I2O_CONFIG_OLD_IOCTL
         bool "Enable ioctls (OBSOLETE)"
         depends on I2O_CONFIG
diff --git a/drivers/message/i2o/config-osm.c b/drivers/message/i2o/config-osm.c

index fe2e7afc9eae25d0cc52ea46a5d96120e4771958..af32ab4e90cd28eff940016f57f9e2c06e50cf6d 100644 (file)
--- a/drivers/message/i2o/config-osm.c
+++ b/drivers/message/i2o/config-osm.c
@@ -30,503 +30,9 @@
  
  static struct i2o_driver i2o_config_driver;
  
-/* Special file operations for sysfs */
-struct fops_attribute {
-       struct bin_attribute bin;
-       struct file_operations fops;
-};
-
-/**
- *     sysfs_read_dummy
- */
-static ssize_t sysfs_read_dummy(struct kobject *kobj, char *buf, loff_t offset,
-                               size_t count)
-{
-       return 0;
-};
-
-/**
- *     sysfs_write_dummy
- */
-static ssize_t sysfs_write_dummy(struct kobject *kobj, char *buf, loff_t offset,
-                                size_t count)
-{
-       return 0;
-};
-
-/**
- *     sysfs_create_fops_file - Creates attribute with special file operations
- *     @kobj: kobject which should contains the attribute
- *     @attr: attributes which should be used to create file
- *
- *     First creates attribute @attr in kobject @kobj. If it is the first time
- *     this function is called, merge old fops from sysfs with new one and
- *     write it back. Afterwords the new fops will be set for the created
- *     attribute.
- *
- *     Returns 0 on success or negative error code on failure.
- */
-static int sysfs_create_fops_file(struct kobject *kobj,
-                                 struct fops_attribute *attr)
-{
-       struct file_operations tmp, *fops;
-       struct dentry *d;
-       struct qstr qstr;
-       int rc;
-
-       fops = &attr->fops;
-
-       if (fops->read)
-               attr->bin.read = sysfs_read_dummy;
-
-       if (fops->write)
-               attr->bin.write = sysfs_write_dummy;
-
-       if ((rc = sysfs_create_bin_file(kobj, &attr->bin)))
-               return rc;
-
-       qstr.name = attr->bin.attr.name;
-       qstr.len = strlen(qstr.name);
-       qstr.hash = full_name_hash(qstr.name, qstr.len);
-
-       if ((d = lookup_hash(&qstr, kobj->dentry))) {
-               if (!fops->owner) {
-                       memcpy(&tmp, d->d_inode->i_fop, sizeof(tmp));
-                       if (fops->read)
-                               tmp.read = fops->read;
-                       if (fops->write)
-                               tmp.write = fops->write;
-                       memcpy(fops, &tmp, sizeof(tmp));
-               }
-
-               d->d_inode->i_fop = fops;
-       } else
-               sysfs_remove_bin_file(kobj, &attr->bin);
-
-       return -ENOENT;
-};
-
-/**
- *     sysfs_remove_fops_file - Remove attribute with special file operations
- *     @kobj: kobject which contains the attribute
- *     @attr: attributes which are used to create file
- *
- *     Only wrapper arround sysfs_remove_bin_file()
- *
- *     Returns 0 on success or negative error code on failure.
- */
-static inline int sysfs_remove_fops_file(struct kobject *kobj,
-                                        struct fops_attribute *attr)
-{
-       return sysfs_remove_bin_file(kobj, &attr->bin);
-};
-
-/**
- *     i2o_config_read_hrt - Returns the HRT of the controller
- *     @kob: kernel object handle
- *     @buf: buffer into which the HRT should be copied
- *     @off: file offset
- *     @count: number of bytes to read
- *
- *     Put @count bytes starting at @off into @buf from the HRT of the I2O
- *     controller corresponding to @kobj.
- *
- *     Returns number of bytes copied into buffer.
- */
-static ssize_t i2o_config_read_hrt(struct kobject *kobj, char *buf,
-                                  loff_t offset, size_t count)
-{
-       struct i2o_controller *c = kobj_to_i2o_device(kobj)->iop;
-       i2o_hrt *hrt = c->hrt.virt;
-
-       u32 size = (hrt->num_entries * hrt->entry_len + 2) * 4;
-
-       if (offset > size)
-               return 0;
-
-       if (offset + count > size)
-               count = size - offset;
-
-       memcpy(buf, (u8 *) hrt + offset, count);
-
-       return count;
-};
-
-/**
- *     i2o_config_read_lct - Returns the LCT of the controller
- *     @kob: kernel object handle
- *     @buf: buffer into which the LCT should be copied
- *     @off: file offset
- *     @count: number of bytes to read
- *
- *     Put @count bytes starting at @off into @buf from the LCT of the I2O
- *     controller corresponding to @kobj.
- *
- *     Returns number of bytes copied into buffer.
- */
-static ssize_t i2o_config_read_lct(struct kobject *kobj, char *buf,
-                                  loff_t offset, size_t count)
-{
-       struct i2o_controller *c = kobj_to_i2o_device(kobj)->iop;
-       u32 size = c->lct->table_size * 4;
-
-       if (offset > size)
-               return 0;
-
-       if (offset + count > size)
-               count = size - offset;
-
-       memcpy(buf, (u8 *) c->lct + offset, count);
-
-       return count;
-};
-
-#define I2O_CONFIG_SW_ATTR(_name,_mode,_type,_swid) \
-static ssize_t i2o_config_##_name##_read(struct file *file, char __user *buf, size_t count, loff_t * offset) { \
-       return i2o_config_sw_read(file, buf, count, offset, _type, _swid); \
-};\
-\
-static ssize_t i2o_config_##_name##_write(struct file *file, const char __user *buf, size_t count, loff_t * offset) { \
-       return i2o_config_sw_write(file, buf, count, offset, _type, _swid); \
-}; \
-\
-static struct fops_attribute i2o_config_attr_##_name = { \
-       .bin = { .attr = { .name = __stringify(_name), .mode = _mode, \
-                          .owner = THIS_MODULE }, \
-                .size = 0, }, \
-       .fops = { .write = i2o_config_##_name##_write, \
-                 .read = i2o_config_##_name##_read} \
-};
-
-#ifdef CONFIG_I2O_EXT_ADAPTEC
-
-/**
- *     i2o_config_dpt_reagion - Converts type and id to flash region
- *     @swtype: type of software module reading
- *     @swid: id of software which should be read
- *
- *     Converts type and id from I2O spec to the matching region for DPT /
- *     Adaptec controllers.
- *
- *     Returns region which match type and id or -1 on error.
- */
-static u32 i2o_config_dpt_region(u8 swtype, u8 swid)
-{
-       switch (swtype) {
-       case I2O_SOFTWARE_MODULE_IRTOS:
-               /*
-                * content: operation firmware
-                * region size:
-                *      0xbc000 for 2554, 3754, 2564, 3757
-                *      0x170000 for 2865
-                *      0x17c000 for 3966
-                */
-               if (!swid)
-                       return 0;
-
-               break;
-
-       case I2O_SOFTWARE_MODULE_IOP_PRIVATE:
-               /*
-                * content: BIOS and SMOR
-                * BIOS size: first 0x8000 bytes
-                * region size:
-                *      0x40000 for 2554, 3754, 2564, 3757
-                *      0x80000 for 2865, 3966
-                */
-               if (!swid)
-                       return 1;
-
-               break;
-
-       case I2O_SOFTWARE_MODULE_IOP_CONFIG:
-               switch (swid) {
-               case 0:
-                       /*
-                        * content: NVRAM defaults
-                        * region size: 0x2000 bytes
-                        */
-                       return 2;
-               case 1:
-                       /*
-                        * content: serial number
-                        * region size: 0x2000 bytes
-                        */
-                       return 3;
-               }
-               break;
-       }
-
-       return -1;
-};
-
-#endif
-
-/**
- *     i2o_config_sw_read - Read a software module from controller
- *     @file: file pointer
- *     @buf: buffer into which the data should be copied
- *     @count: number of bytes to read
- *     @off: file offset
- *     @swtype: type of software module reading
- *     @swid: id of software which should be read
- *
- *     Transfers @count bytes at offset @offset from IOP into buffer using
- *     type @swtype and id @swid as described in I2O spec.
- *
- *     Returns number of bytes copied into buffer or error code on failure.
- */
-static ssize_t i2o_config_sw_read(struct file *file, char __user * buf,
-                                 size_t count, loff_t * offset, u8 swtype,
-                                 u32 swid)
-{
-       struct sysfs_dirent *sd = file->f_dentry->d_parent->d_fsdata;
-       struct kobject *kobj = sd->s_element;
-       struct i2o_controller *c = kobj_to_i2o_device(kobj)->iop;
-       u32 m, function = I2O_CMD_SW_UPLOAD;
-       struct i2o_dma buffer;
-       struct i2o_message __iomem *msg;
-       u32 __iomem *mptr;
-       int rc, status;
-
-       m = i2o_msg_get_wait(c, &msg, I2O_TIMEOUT_MESSAGE_GET);
-       if (m == I2O_QUEUE_EMPTY)
-               return -EBUSY;
-
-       mptr = &msg->body[3];
-
-       if ((rc = i2o_dma_alloc(&c->pdev->dev, &buffer, count, GFP_KERNEL))) {
-               i2o_msg_nop(c, m);
-               return rc;
-       }
-#ifdef CONFIG_I2O_EXT_ADAPTEC
-       if (c->adaptec) {
-               mptr = &msg->body[4];
-               function = I2O_CMD_PRIVATE;
-
-               writel(TEN_WORD_MSG_SIZE | SGL_OFFSET_8, &msg->u.head[0]);
-
-               writel(I2O_VENDOR_DPT << 16 | I2O_DPT_FLASH_READ,
-                      &msg->body[0]);
-               writel(i2o_config_dpt_region(swtype, swid), &msg->body[1]);
-               writel(*offset, &msg->body[2]);
-               writel(count, &msg->body[3]);
-       } else
-#endif
-               writel(NINE_WORD_MSG_SIZE | SGL_OFFSET_7, &msg->u.head[0]);
-
-       writel(0xD0000000 | count, mptr++);
-       writel(buffer.phys, mptr);
-
-       writel(function << 24 | HOST_TID << 12 | ADAPTER_TID, &msg->u.head[1]);
-       writel(i2o_config_driver.context, &msg->u.head[2]);
-       writel(0, &msg->u.head[3]);
-
-#ifdef CONFIG_I2O_EXT_ADAPTEC
-       if (!c->adaptec)
-#endif
-       {
-               writel((u32) swtype << 16 | (u32) 1 << 8, &msg->body[0]);
-               writel(0, &msg->body[1]);
-               writel(swid, &msg->body[2]);
-       }
-
-       status = i2o_msg_post_wait_mem(c, m, 60, &buffer);
-
-       if (status == I2O_POST_WAIT_OK) {
-               if (!(rc = copy_to_user(buf, buffer.virt, count))) {
-                       rc = count;
-                       *offset += count;
-               }
-       } else
-               rc = -EIO;
-
-       if (status != -ETIMEDOUT)
-               i2o_dma_free(&c->pdev->dev, &buffer);
-
-       return rc;
-};
-
-/**
- *     i2o_config_sw_write - Write a software module to controller
- *     @file: file pointer
- *     @buf: buffer into which the data should be copied
- *     @count: number of bytes to read
- *     @off: file offset
- *     @swtype: type of software module writing
- *     @swid: id of software which should be written
- *
- *     Transfers @count bytes at offset @offset from buffer to IOP using
- *     type @swtype and id @swid as described in I2O spec.
- *
- *     Returns number of bytes copied from buffer or error code on failure.
- */
-static ssize_t i2o_config_sw_write(struct file *file, const char __user * buf,
-                                  size_t count, loff_t * offset, u8 swtype,
-                                  u32 swid)
-{
-       struct sysfs_dirent *sd = file->f_dentry->d_parent->d_fsdata;
-       struct kobject *kobj = sd->s_element;
-       struct i2o_controller *c = kobj_to_i2o_device(kobj)->iop;
-       u32 m, function = I2O_CMD_SW_DOWNLOAD;
-       struct i2o_dma buffer;
-       struct i2o_message __iomem *msg;
-       u32 __iomem *mptr;
-       int rc, status;
-
-       m = i2o_msg_get_wait(c, &msg, I2O_TIMEOUT_MESSAGE_GET);
-       if (m == I2O_QUEUE_EMPTY)
-               return -EBUSY;
-
-       mptr = &msg->body[3];
-
-       if ((rc = i2o_dma_alloc(&c->pdev->dev, &buffer, count, GFP_KERNEL)))
-               goto nop_msg;
-
-       if ((rc = copy_from_user(buffer.virt, buf, count)))
-               goto free_buffer;
-
-#ifdef CONFIG_I2O_EXT_ADAPTEC
-       if (c->adaptec) {
-               mptr = &msg->body[4];
-               function = I2O_CMD_PRIVATE;
-
-               writel(TEN_WORD_MSG_SIZE | SGL_OFFSET_8, &msg->u.head[0]);
-
-               writel(I2O_VENDOR_DPT << 16 | I2O_DPT_FLASH_WRITE,
-                      &msg->body[0]);
-               writel(i2o_config_dpt_region(swtype, swid), &msg->body[1]);
-               writel(*offset, &msg->body[2]);
-               writel(count, &msg->body[3]);
-       } else
-#endif
-               writel(NINE_WORD_MSG_SIZE | SGL_OFFSET_7, &msg->u.head[0]);
-
-       writel(0xD4000000 | count, mptr++);
-       writel(buffer.phys, mptr);
-
-       writel(function << 24 | HOST_TID << 12 | ADAPTER_TID, &msg->u.head[1]);
-       writel(i2o_config_driver.context, &msg->u.head[2]);
-       writel(0, &msg->u.head[3]);
-
-#ifdef CONFIG_I2O_EXT_ADAPTEC
-       if (!c->adaptec)
-#endif
-       {
-               writel((u32) swtype << 16 | (u32) 1 << 8, &msg->body[0]);
-               writel(0, &msg->body[1]);
-               writel(swid, &msg->body[2]);
-       }
-
-       status = i2o_msg_post_wait_mem(c, m, 60, &buffer);
-
-       if (status != -ETIMEDOUT)
-               i2o_dma_free(&c->pdev->dev, &buffer);
-
-       if (status != I2O_POST_WAIT_OK)
-               return -EIO;
-
-       *offset += count;
-
-       return count;
-
-      free_buffer:
-       i2o_dma_free(&c->pdev->dev, &buffer);
-
-      nop_msg:
-       i2o_msg_nop(c, m);
-
-       return rc;
-};
-
-/* attribute for HRT in sysfs */
-static struct bin_attribute i2o_config_hrt_attr = {
-       .attr = {
-                .name = "hrt",
-                .mode = S_IRUGO,
-                .owner = THIS_MODULE},
-       .size = 0,
-       .read = i2o_config_read_hrt
-};
-
-/* attribute for LCT in sysfs */
-static struct bin_attribute i2o_config_lct_attr = {
-       .attr = {
-                .name = "lct",
-                .mode = S_IRUGO,
-                .owner = THIS_MODULE},
-       .size = 0,
-       .read = i2o_config_read_lct
-};
-
-/* IRTOS firmware access */
-I2O_CONFIG_SW_ATTR(irtos, S_IWRSR, I2O_SOFTWARE_MODULE_IRTOS, 0);
-
-#ifdef CONFIG_I2O_EXT_ADAPTEC
-
-/*
- * attribute for BIOS / SMOR, nvram and serial number access on DPT / Adaptec
- * controllers
- */
-I2O_CONFIG_SW_ATTR(bios, S_IWRSR, I2O_SOFTWARE_MODULE_IOP_PRIVATE, 0);
-I2O_CONFIG_SW_ATTR(nvram, S_IWRSR, I2O_SOFTWARE_MODULE_IOP_CONFIG, 0);
-I2O_CONFIG_SW_ATTR(serial, S_IWRSR, I2O_SOFTWARE_MODULE_IOP_CONFIG, 1);
-
-#endif
-
-/**
- *     i2o_config_notify_controller_add - Notify of added controller
- *     @c: the controller which was added
- *
- *     If a I2O controller is added, we catch the notification to add sysfs
- *     entries.
- */
-static void i2o_config_notify_controller_add(struct i2o_controller *c)
-{
-       struct kobject *kobj = &c->exec->device.kobj;
-
-       sysfs_create_bin_file(kobj, &i2o_config_hrt_attr);
-       sysfs_create_bin_file(kobj, &i2o_config_lct_attr);
-
-       sysfs_create_fops_file(kobj, &i2o_config_attr_irtos);
-#ifdef CONFIG_I2O_EXT_ADAPTEC
-       if (c->adaptec) {
-               sysfs_create_fops_file(kobj, &i2o_config_attr_bios);
-               sysfs_create_fops_file(kobj, &i2o_config_attr_nvram);
-               sysfs_create_fops_file(kobj, &i2o_config_attr_serial);
-       }
-#endif
-};
-
-/**
- *     i2o_config_notify_controller_remove - Notify of removed controller
- *     @c: the controller which was removed
- *
- *     If a I2O controller is removed, we catch the notification to remove the
- *     sysfs entries.
- */
-static void i2o_config_notify_controller_remove(struct i2o_controller *c)
-{
-       struct kobject *kobj = &c->exec->device.kobj;
-
-#ifdef CONFIG_I2O_EXT_ADAPTEC
-       if (c->adaptec) {
-               sysfs_remove_fops_file(kobj, &i2o_config_attr_serial);
-               sysfs_remove_fops_file(kobj, &i2o_config_attr_nvram);
-               sysfs_remove_fops_file(kobj, &i2o_config_attr_bios);
-       }
-#endif
-       sysfs_remove_fops_file(kobj, &i2o_config_attr_irtos);
-
-       sysfs_remove_bin_file(kobj, &i2o_config_lct_attr);
-       sysfs_remove_bin_file(kobj, &i2o_config_hrt_attr);
-};
-
  /* Config OSM driver struct */
  static struct i2o_driver i2o_config_driver = {
         .name = OSM_NAME,
-       .notify_controller_add = i2o_config_notify_controller_add,
-       .notify_controller_remove = i2o_config_notify_controller_remove
  };
  
  #ifdef CONFIG_I2O_CONFIG_OLD_IOCTL
diff --git a/drivers/message/i2o/pci.c b/drivers/message/i2o/pci.c

index 7a60fd7be8ad4aa3f334a1885e3eaa439305b26a..66c03e8825703a408b21c239ff5e4781fab71ffc 100644 (file)
--- a/drivers/message/i2o/pci.c
+++ b/drivers/message/i2o/pci.c
@@ -32,6 +32,8 @@
  #include <linux/i2o.h>
  #include "core.h"
  
+#define OSM_DESCRIPTION        "I2O-subsystem"
+
  /* PCI device id table for all I2O controllers */
  static struct pci_device_id __devinitdata i2o_pci_ids[] = {
         {PCI_DEVICE_CLASS(PCI_CLASS_INTELLIGENT_I2O << 8, 0xffff00)},
@@ -66,6 +68,8 @@ static void i2o_pci_free(struct i2o_controller *c)
  
         if (c->base.virt)
                 iounmap(c->base.virt);
+
+       pci_release_regions(c->pdev);
  }
  
  /**
@@ -84,6 +88,11 @@ static int __devinit i2o_pci_alloc(struct i2o_controller *c)
         struct device *dev = &pdev->dev;
         int i;
  
+       if (pci_request_regions(pdev, OSM_DESCRIPTION)) {
+               printk(KERN_ERR "%s: device already claimed\n", c->name);
+               return -ENODEV;
+       }
+
         for (i = 0; i < 6; i++) {
                 /* Skip I/O spaces */
                 if (!(pci_resource_flags(pdev, i) & IORESOURCE_IO)) {
@@ -138,6 +147,7 @@ static int __devinit i2o_pci_alloc(struct i2o_controller *c)
         c->base.virt = ioremap_nocache(c->base.phys, c->base.len);
         if (!c->base.virt) {
                 printk(KERN_ERR "%s: Unable to map controller.\n", c->name);
+               i2o_pci_free(c);
                 return -ENOMEM;
         }
  
diff --git a/drivers/mfd/Kconfig b/drivers/mfd/Kconfig

new file mode 100644 (file)

index 0000000..1588a59
--- /dev/null
+++ b/drivers/mfd/Kconfig
@@ -0,0 +1,16 @@
+#
+# Multifunction miscellaneous devices
+#
+
+menu "Multimedia Capabilities Port drivers"
+
+config MCP
+       tristate
+
+# Interface drivers
+config MCP_SA11X0
+       tristate "Support SA11x0 MCP interface"
+       depends on ARCH_SA1100
+       select MCP
+
+endmenu
diff --git a/drivers/mfd/Makefile b/drivers/mfd/Makefile

new file mode 100644 (file)

index 0000000..98bdd6a
--- /dev/null
+++ b/drivers/mfd/Makefile
@@ -0,0 +1,6 @@
+#
+# Makefile for multifunction miscellaneous devices
+#
+
+obj-$(CONFIG_MCP)              += mcp-core.o
+obj-$(CONFIG_MCP_SA11X0)       += mcp-sa11x0.o
diff --git a/drivers/mfd/mcp-core.c b/drivers/mfd/mcp-core.c

new file mode 100644 (file)

index 0000000..c75d713
--- /dev/null
+++ b/drivers/mfd/mcp-core.c
@@ -0,0 +1,255 @@
+/*
+ *  linux/drivers/mfd/mcp-core.c
+ *
+ *  Copyright (C) 2001 Russell King
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License.
+ *
+ *  Generic MCP (Multimedia Communications Port) layer.  All MCP locking
+ *  is solely held within this file.
+ */
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/errno.h>
+#include <linux/smp.h>
+#include <linux/device.h>
+
+#include <asm/dma.h>
+#include <asm/system.h>
+
+#include "mcp.h"
+
+#define to_mcp(d)              container_of(d, struct mcp, attached_device)
+#define to_mcp_driver(d)       container_of(d, struct mcp_driver, drv)
+
+static int mcp_bus_match(struct device *dev, struct device_driver *drv)
+{
+       return 1;
+}
+
+static int mcp_bus_probe(struct device *dev)
+{
+       struct mcp *mcp = to_mcp(dev);
+       struct mcp_driver *drv = to_mcp_driver(dev->driver);
+
+       return drv->probe(mcp);
+}
+
+static int mcp_bus_remove(struct device *dev)
+{
+       struct mcp *mcp = to_mcp(dev);
+       struct mcp_driver *drv = to_mcp_driver(dev->driver);
+
+       drv->remove(mcp);
+       return 0;
+}
+
+static int mcp_bus_suspend(struct device *dev, pm_message_t state)
+{
+       struct mcp *mcp = to_mcp(dev);
+       int ret = 0;
+
+       if (dev->driver) {
+               struct mcp_driver *drv = to_mcp_driver(dev->driver);
+
+               ret = drv->suspend(mcp, state);
+       }
+       return ret;
+}
+
+static int mcp_bus_resume(struct device *dev)
+{
+       struct mcp *mcp = to_mcp(dev);
+       int ret = 0;
+
+       if (dev->driver) {
+               struct mcp_driver *drv = to_mcp_driver(dev->driver);
+
+               ret = drv->resume(mcp);
+       }
+       return ret;
+}
+
+static struct bus_type mcp_bus_type = {
+       .name           = "mcp",
+       .match          = mcp_bus_match,
+       .suspend        = mcp_bus_suspend,
+       .resume         = mcp_bus_resume,
+};
+
+/**
+ *     mcp_set_telecom_divisor - set the telecom divisor
+ *     @mcp: MCP interface structure
+ *     @div: SIB clock divisor
+ *
+ *     Set the telecom divisor on the MCP interface.  The resulting
+ *     sample rate is SIBCLOCK/div.
+ */
+void mcp_set_telecom_divisor(struct mcp *mcp, unsigned int div)
+{
+       spin_lock_irq(&mcp->lock);
+       mcp->ops->set_telecom_divisor(mcp, div);
+       spin_unlock_irq(&mcp->lock);
+}
+EXPORT_SYMBOL(mcp_set_telecom_divisor);
+
+/**
+ *     mcp_set_audio_divisor - set the audio divisor
+ *     @mcp: MCP interface structure
+ *     @div: SIB clock divisor
+ *
+ *     Set the audio divisor on the MCP interface.
+ */
+void mcp_set_audio_divisor(struct mcp *mcp, unsigned int div)
+{
+       spin_lock_irq(&mcp->lock);
+       mcp->ops->set_audio_divisor(mcp, div);
+       spin_unlock_irq(&mcp->lock);
+}
+EXPORT_SYMBOL(mcp_set_audio_divisor);
+
+/**
+ *     mcp_reg_write - write a device register
+ *     @mcp: MCP interface structure
+ *     @reg: 4-bit register index
+ *     @val: 16-bit data value
+ *
+ *     Write a device register.  The MCP interface must be enabled
+ *     to prevent this function hanging.
+ */
+void mcp_reg_write(struct mcp *mcp, unsigned int reg, unsigned int val)
+{
+       unsigned long flags;
+
+       spin_lock_irqsave(&mcp->lock, flags);
+       mcp->ops->reg_write(mcp, reg, val);
+       spin_unlock_irqrestore(&mcp->lock, flags);
+}
+EXPORT_SYMBOL(mcp_reg_write);
+
+/**
+ *     mcp_reg_read - read a device register
+ *     @mcp: MCP interface structure
+ *     @reg: 4-bit register index
+ *
+ *     Read a device register and return its value.  The MCP interface
+ *     must be enabled to prevent this function hanging.
+ */
+unsigned int mcp_reg_read(struct mcp *mcp, unsigned int reg)
+{
+       unsigned long flags;
+       unsigned int val;
+
+       spin_lock_irqsave(&mcp->lock, flags);
+       val = mcp->ops->reg_read(mcp, reg);
+       spin_unlock_irqrestore(&mcp->lock, flags);
+
+       return val;
+}
+EXPORT_SYMBOL(mcp_reg_read);
+
+/**
+ *     mcp_enable - enable the MCP interface
+ *     @mcp: MCP interface to enable
+ *
+ *     Enable the MCP interface.  Each call to mcp_enable will need
+ *     a corresponding call to mcp_disable to disable the interface.
+ */
+void mcp_enable(struct mcp *mcp)
+{
+       spin_lock_irq(&mcp->lock);
+       if (mcp->use_count++ == 0)
+               mcp->ops->enable(mcp);
+       spin_unlock_irq(&mcp->lock);
+}
+EXPORT_SYMBOL(mcp_enable);
+
+/**
+ *     mcp_disable - disable the MCP interface
+ *     @mcp: MCP interface to disable
+ *
+ *     Disable the MCP interface.  The MCP interface will only be
+ *     disabled once the number of calls to mcp_enable matches the
+ *     number of calls to mcp_disable.
+ */
+void mcp_disable(struct mcp *mcp)
+{
+       unsigned long flags;
+
+       spin_lock_irqsave(&mcp->lock, flags);
+       if (--mcp->use_count == 0)
+               mcp->ops->disable(mcp);
+       spin_unlock_irqrestore(&mcp->lock, flags);
+}
+EXPORT_SYMBOL(mcp_disable);
+
+static void mcp_release(struct device *dev)
+{
+       struct mcp *mcp = container_of(dev, struct mcp, attached_device);
+
+       kfree(mcp);
+}
+
+struct mcp *mcp_host_alloc(struct device *parent, size_t size)
+{
+       struct mcp *mcp;
+
+       mcp = kmalloc(sizeof(struct mcp) + size, GFP_KERNEL);
+       if (mcp) {
+               memset(mcp, 0, sizeof(struct mcp) + size);
+               spin_lock_init(&mcp->lock);
+               mcp->attached_device.parent = parent;
+               mcp->attached_device.bus = &mcp_bus_type;
+               mcp->attached_device.dma_mask = parent->dma_mask;
+               mcp->attached_device.release = mcp_release;
+       }
+       return mcp;
+}
+EXPORT_SYMBOL(mcp_host_alloc);
+
+int mcp_host_register(struct mcp *mcp)
+{
+       strcpy(mcp->attached_device.bus_id, "mcp0");
+       return device_register(&mcp->attached_device);
+}
+EXPORT_SYMBOL(mcp_host_register);
+
+void mcp_host_unregister(struct mcp *mcp)
+{
+       device_unregister(&mcp->attached_device);
+}
+EXPORT_SYMBOL(mcp_host_unregister);
+
+int mcp_driver_register(struct mcp_driver *mcpdrv)
+{
+       mcpdrv->drv.bus = &mcp_bus_type;
+       mcpdrv->drv.probe = mcp_bus_probe;
+       mcpdrv->drv.remove = mcp_bus_remove;
+       return driver_register(&mcpdrv->drv);
+}
+EXPORT_SYMBOL(mcp_driver_register);
+
+void mcp_driver_unregister(struct mcp_driver *mcpdrv)
+{
+       driver_unregister(&mcpdrv->drv);
+}
+EXPORT_SYMBOL(mcp_driver_unregister);
+
+static int __init mcp_init(void)
+{
+       return bus_register(&mcp_bus_type);
+}
+
+static void __exit mcp_exit(void)
+{
+       bus_unregister(&mcp_bus_type);
+}
+
+module_init(mcp_init);
+module_exit(mcp_exit);
+
+MODULE_AUTHOR("Russell King <rmk@arm.linux.org.uk>");
+MODULE_DESCRIPTION("Core multimedia communications port driver");
+MODULE_LICENSE("GPL");
diff --git a/drivers/mfd/mcp-sa11x0.c b/drivers/mfd/mcp-sa11x0.c

new file mode 100644 (file)

index 0000000..e9806fb
--- /dev/null
+++ b/drivers/mfd/mcp-sa11x0.c
@@ -0,0 +1,275 @@
+/*
+ *  linux/drivers/mfd/mcp-sa11x0.c
+ *
+ *  Copyright (C) 2001-2005 Russell King
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License.
+ *
+ *  SA11x0 MCP (Multimedia Communications Port) driver.
+ *
+ *  MCP read/write timeouts from Jordi Colomer, rehacked by rmk.
+ */
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/errno.h>
+#include <linux/kernel.h>
+#include <linux/delay.h>
+#include <linux/spinlock.h>
+#include <linux/slab.h>
+#include <linux/device.h>
+
+#include <asm/dma.h>
+#include <asm/hardware.h>
+#include <asm/mach-types.h>
+#include <asm/system.h>
+#include <asm/arch/mcp.h>
+
+#include <asm/arch/assabet.h>
+
+#include "mcp.h"
+
+struct mcp_sa11x0 {
+       u32     mccr0;
+       u32     mccr1;
+};
+
+#define priv(mcp)      ((struct mcp_sa11x0 *)mcp_priv(mcp))
+
+static void
+mcp_sa11x0_set_telecom_divisor(struct mcp *mcp, unsigned int divisor)
+{
+       unsigned int mccr0;
+
+       divisor /= 32;
+
+       mccr0 = Ser4MCCR0 & ~0x00007f00;
+       mccr0 |= divisor << 8;
+       Ser4MCCR0 = mccr0;
+}
+
+static void
+mcp_sa11x0_set_audio_divisor(struct mcp *mcp, unsigned int divisor)
+{
+       unsigned int mccr0;
+
+       divisor /= 32;
+
+       mccr0 = Ser4MCCR0 & ~0x0000007f;
+       mccr0 |= divisor;
+       Ser4MCCR0 = mccr0;
+}
+
+/*
+ * Write data to the device.  The bit should be set after 3 subframe
+ * times (each frame is 64 clocks).  We wait a maximum of 6 subframes.
+ * We really should try doing something more productive while we
+ * wait.
+ */
+static void
+mcp_sa11x0_write(struct mcp *mcp, unsigned int reg, unsigned int val)
+{
+       int ret = -ETIME;
+       int i;
+
+       Ser4MCDR2 = reg << 17 | MCDR2_Wr | (val & 0xffff);
+
+       for (i = 0; i < 2; i++) {
+               udelay(mcp->rw_timeout);
+               if (Ser4MCSR & MCSR_CWC) {
+                       ret = 0;
+                       break;
+               }
+       }
+
+       if (ret < 0)
+               printk(KERN_WARNING "mcp: write timed out\n");
+}
+
+/*
+ * Read data from the device.  The bit should be set after 3 subframe
+ * times (each frame is 64 clocks).  We wait a maximum of 6 subframes.
+ * We really should try doing something more productive while we
+ * wait.
+ */
+static unsigned int
+mcp_sa11x0_read(struct mcp *mcp, unsigned int reg)
+{
+       int ret = -ETIME;
+       int i;
+
+       Ser4MCDR2 = reg << 17 | MCDR2_Rd;
+
+       for (i = 0; i < 2; i++) {
+               udelay(mcp->rw_timeout);
+               if (Ser4MCSR & MCSR_CRC) {
+                       ret = Ser4MCDR2 & 0xffff;
+                       break;
+               }
+       }
+
+       if (ret < 0)
+               printk(KERN_WARNING "mcp: read timed out\n");
+
+       return ret;
+}
+
+static void mcp_sa11x0_enable(struct mcp *mcp)
+{
+       Ser4MCSR = -1;
+       Ser4MCCR0 |= MCCR0_MCE;
+}
+
+static void mcp_sa11x0_disable(struct mcp *mcp)
+{
+       Ser4MCCR0 &= ~MCCR0_MCE;
+}
+
+/*
+ * Our methods.
+ */
+static struct mcp_ops mcp_sa11x0 = {
+       .set_telecom_divisor    = mcp_sa11x0_set_telecom_divisor,
+       .set_audio_divisor      = mcp_sa11x0_set_audio_divisor,
+       .reg_write              = mcp_sa11x0_write,
+       .reg_read               = mcp_sa11x0_read,
+       .enable                 = mcp_sa11x0_enable,
+       .disable                = mcp_sa11x0_disable,
+};
+
+static int mcp_sa11x0_probe(struct device *dev)
+{
+       struct platform_device *pdev = to_platform_device(dev);
+       struct mcp_plat_data *data = pdev->dev.platform_data;
+       struct mcp *mcp;
+       int ret;
+
+       if (!data)
+               return -ENODEV;
+
+       if (!request_mem_region(0x80060000, 0x60, "sa11x0-mcp"))
+               return -EBUSY;
+
+       mcp = mcp_host_alloc(&pdev->dev, sizeof(struct mcp_sa11x0));
+       if (!mcp) {
+               ret = -ENOMEM;
+               goto release;
+       }
+
+       mcp->owner              = THIS_MODULE;
+       mcp->ops                = &mcp_sa11x0;
+       mcp->sclk_rate          = data->sclk_rate;
+       mcp->dma_audio_rd       = DMA_Ser4MCP0Rd;
+       mcp->dma_audio_wr       = DMA_Ser4MCP0Wr;
+       mcp->dma_telco_rd       = DMA_Ser4MCP1Rd;
+       mcp->dma_telco_wr       = DMA_Ser4MCP1Wr;
+
+       dev_set_drvdata(dev, mcp);
+
+       if (machine_is_assabet()) {
+               ASSABET_BCR_set(ASSABET_BCR_CODEC_RST);
+       }
+
+       /*
+        * Setup the PPC unit correctly.
+        */
+       PPDR &= ~PPC_RXD4;
+       PPDR |= PPC_TXD4 | PPC_SCLK | PPC_SFRM;
+       PSDR |= PPC_RXD4;
+       PSDR &= ~(PPC_TXD4 | PPC_SCLK | PPC_SFRM);
+       PPSR &= ~(PPC_TXD4 | PPC_SCLK | PPC_SFRM);
+
+       /*
+        * Initialise device.  Note that we initially
+        * set the sampling rate to minimum.
+        */
+       Ser4MCSR = -1;
+       Ser4MCCR1 = data->mccr1;
+       Ser4MCCR0 = data->mccr0 | 0x7f7f;
+
+       /*
+        * Calculate the read/write timeout (us) from the bit clock
+        * rate.  This is the period for 3 64-bit frames.  Always
+        * round this time up.
+        */
+       mcp->rw_timeout = (64 * 3 * 1000000 + mcp->sclk_rate - 1) /
+                         mcp->sclk_rate;
+
+       ret = mcp_host_register(mcp);
+       if (ret == 0)
+               goto out;
+
+ release:
+       release_mem_region(0x80060000, 0x60);
+       dev_set_drvdata(dev, NULL);
+
+ out:
+       return ret;
+}
+
+static int mcp_sa11x0_remove(struct device *dev)
+{
+       struct mcp *mcp = dev_get_drvdata(dev);
+
+       dev_set_drvdata(dev, NULL);
+       mcp_host_unregister(mcp);
+       release_mem_region(0x80060000, 0x60);
+
+       return 0;
+}
+
+static int mcp_sa11x0_suspend(struct device *dev, pm_message_t state, u32 level)
+{
+       struct mcp *mcp = dev_get_drvdata(dev);
+
+       if (level == SUSPEND_DISABLE) {
+               priv(mcp)->mccr0 = Ser4MCCR0;
+               priv(mcp)->mccr1 = Ser4MCCR1;
+               Ser4MCCR0 &= ~MCCR0_MCE;
+       }
+       return 0;
+}
+
+static int mcp_sa11x0_resume(struct device *dev, u32 level)
+{
+       struct mcp *mcp = dev_get_drvdata(dev);
+
+       if (level == RESUME_RESTORE_STATE) {
+               Ser4MCCR1 = priv(mcp)->mccr1;
+               Ser4MCCR0 = priv(mcp)->mccr0;
+       }
+       return 0;
+}
+
+/*
+ * The driver for the SA11x0 MCP port.
+ */
+static struct device_driver mcp_sa11x0_driver = {
+       .name           = "sa11x0-mcp",
+       .bus            = &platform_bus_type,
+       .probe          = mcp_sa11x0_probe,
+       .remove         = mcp_sa11x0_remove,
+       .suspend        = mcp_sa11x0_suspend,
+       .resume         = mcp_sa11x0_resume,
+};
+
+/*
+ * This needs re-working
+ */
+static int __init mcp_sa11x0_init(void)
+{
+       return driver_register(&mcp_sa11x0_driver);
+}
+
+static void __exit mcp_sa11x0_exit(void)
+{
+       driver_unregister(&mcp_sa11x0_driver);
+}
+
+module_init(mcp_sa11x0_init);
+module_exit(mcp_sa11x0_exit);
+
+MODULE_AUTHOR("Russell King <rmk@arm.linux.org.uk>");
+MODULE_DESCRIPTION("SA11x0 multimedia communications port driver");
+MODULE_LICENSE("GPL");
diff --git a/drivers/mfd/mcp.h b/drivers/mfd/mcp.h

new file mode 100644 (file)

index 0000000..c093a93
--- /dev/null
+++ b/drivers/mfd/mcp.h
@@ -0,0 +1,66 @@
+/*
+ *  linux/drivers/mfd/mcp.h
+ *
+ *  Copyright (C) 2001 Russell King, All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License.
+ */
+#ifndef MCP_H
+#define MCP_H
+
+struct mcp_ops;
+
+struct mcp {
+       struct module   *owner;
+       struct mcp_ops  *ops;
+       spinlock_t      lock;
+       int             use_count;
+       unsigned int    sclk_rate;
+       unsigned int    rw_timeout;
+       dma_device_t    dma_audio_rd;
+       dma_device_t    dma_audio_wr;
+       dma_device_t    dma_telco_rd;
+       dma_device_t    dma_telco_wr;
+       struct device   attached_device;
+};
+
+struct mcp_ops {
+       void            (*set_telecom_divisor)(struct mcp *, unsigned int);
+       void            (*set_audio_divisor)(struct mcp *, unsigned int);
+       void            (*reg_write)(struct mcp *, unsigned int, unsigned int);
+       unsigned int    (*reg_read)(struct mcp *, unsigned int);
+       void            (*enable)(struct mcp *);
+       void            (*disable)(struct mcp *);
+};
+
+void mcp_set_telecom_divisor(struct mcp *, unsigned int);
+void mcp_set_audio_divisor(struct mcp *, unsigned int);
+void mcp_reg_write(struct mcp *, unsigned int, unsigned int);
+unsigned int mcp_reg_read(struct mcp *, unsigned int);
+void mcp_enable(struct mcp *);
+void mcp_disable(struct mcp *);
+#define mcp_get_sclk_rate(mcp) ((mcp)->sclk_rate)
+
+struct mcp *mcp_host_alloc(struct device *, size_t);
+int mcp_host_register(struct mcp *);
+void mcp_host_unregister(struct mcp *);
+
+struct mcp_driver {
+       struct device_driver drv;
+       int (*probe)(struct mcp *);
+       void (*remove)(struct mcp *);
+       int (*suspend)(struct mcp *, pm_message_t);
+       int (*resume)(struct mcp *);
+};
+
+int mcp_driver_register(struct mcp_driver *);
+void mcp_driver_unregister(struct mcp_driver *);
+
+#define mcp_get_drvdata(mcp)   dev_get_drvdata(&(mcp)->attached_device)
+#define mcp_set_drvdata(mcp,d) dev_set_drvdata(&(mcp)->attached_device, d)
+
+#define mcp_priv(mcp)          ((void *)((mcp)+1))
+
+#endif
diff --git a/drivers/mmc/mmc.c b/drivers/mmc/mmc.c

index eeb9f6668e69167044b3b41bf6559479f30dd5ab..3c5904834fe8289db5672fae41a543d9bc7c6059 100644 (file)
--- a/drivers/mmc/mmc.c
+++ b/drivers/mmc/mmc.c
@@ -361,7 +361,7 @@ static void mmc_decode_cid(struct mmc_card *card)
  
         default:
                 printk("%s: card has unknown MMCA version %d\n",
-                       card->host->host_name, card->csd.mmca_vsn);
+                       mmc_hostname(card->host), card->csd.mmca_vsn);
                 mmc_card_set_bad(card);
                 break;
         }
@@ -383,7 +383,7 @@ static void mmc_decode_csd(struct mmc_card *card)
         csd_struct = UNSTUFF_BITS(resp, 126, 2);
         if (csd_struct != 1 && csd_struct != 2) {
                 printk("%s: unrecognised CSD structure version %d\n",
-                       card->host->host_name, csd_struct);
+                       mmc_hostname(card->host), csd_struct);
                 mmc_card_set_bad(card);
                 return;
         }
@@ -551,7 +551,7 @@ static void mmc_discover_cards(struct mmc_host *host)
                 }
                 if (err != MMC_ERR_NONE) {
                         printk(KERN_ERR "%s: error requesting CID: %d\n",
-                               host->host_name, err);
+                               mmc_hostname(host), err);
                         break;
                 }
  
@@ -796,17 +796,13 @@ struct mmc_host *mmc_alloc_host(int extra, struct device *dev)
  {
         struct mmc_host *host;
  
-       host = kmalloc(sizeof(struct mmc_host) + extra, GFP_KERNEL);
+       host = mmc_alloc_host_sysfs(extra, dev);
         if (host) {
-               memset(host, 0, sizeof(struct mmc_host) + extra);
-
                 spin_lock_init(&host->lock);
                 init_waitqueue_head(&host->wq);
                 INIT_LIST_HEAD(&host->cards);
                 INIT_WORK(&host->detect, mmc_rescan, host);
  
-               host->dev = dev;
-
                 /*
                  * By default, hosts do not support SGIO or large requests.
                  * They have to set these according to their abilities.
@@ -828,15 +824,15 @@ EXPORT_SYMBOL(mmc_alloc_host);
   */
  int mmc_add_host(struct mmc_host *host)
  {
-       static unsigned int host_num;
+       int ret;
  
-       snprintf(host->host_name, sizeof(host->host_name),
-                "mmc%d", host_num++);
-
-       mmc_power_off(host);
-       mmc_detect_change(host);
+       ret = mmc_add_host_sysfs(host);
+       if (ret == 0) {
+               mmc_power_off(host);
+               mmc_detect_change(host);
+       }
  
-       return 0;
+       return ret;
  }
  
  EXPORT_SYMBOL(mmc_add_host);
@@ -859,6 +855,7 @@ void mmc_remove_host(struct mmc_host *host)
         }
  
         mmc_power_off(host);
+       mmc_remove_host_sysfs(host);
  }
  
  EXPORT_SYMBOL(mmc_remove_host);
@@ -872,7 +869,7 @@ EXPORT_SYMBOL(mmc_remove_host);
  void mmc_free_host(struct mmc_host *host)
  {
         flush_scheduled_work();
-       kfree(host);
+       mmc_free_host_sysfs(host);
  }
  
  EXPORT_SYMBOL(mmc_free_host);
diff --git a/drivers/mmc/mmc.h b/drivers/mmc/mmc.h

index b498dffe0b118c48d392b6d4c39730dfb03ab67a..97bae00292fafdfdfc7d785d9b4725603e5583d5 100644 (file)
--- a/drivers/mmc/mmc.h
+++ b/drivers/mmc/mmc.h
@@ -13,4 +13,9 @@
  void mmc_init_card(struct mmc_card *card, struct mmc_host *host);
  int mmc_register_card(struct mmc_card *card);
  void mmc_remove_card(struct mmc_card *card);
+
+struct mmc_host *mmc_alloc_host_sysfs(int extra, struct device *dev);
+int mmc_add_host_sysfs(struct mmc_host *host);
+void mmc_remove_host_sysfs(struct mmc_host *host);
+void mmc_free_host_sysfs(struct mmc_host *host);
  #endif
diff --git a/drivers/mmc/mmc_sysfs.c b/drivers/mmc/mmc_sysfs.c

index 5556cd3b555917768f7fc4d341e4b8c93f63db80..ad8949810fc5cbeb79de505265ed57fb1aeba6b9 100644 (file)
--- a/drivers/mmc/mmc_sysfs.c
+++ b/drivers/mmc/mmc_sysfs.c
@@ -12,6 +12,7 @@
  #include <linux/module.h>
  #include <linux/init.h>
  #include <linux/device.h>
+#include <linux/idr.h>
  
  #include <linux/mmc/card.h>
  #include <linux/mmc/host.h>
@@ -20,6 +21,7 @@
  
  #define dev_to_mmc_card(d)     container_of(d, struct mmc_card, dev)
  #define to_mmc_driver(d)       container_of(d, struct mmc_driver, drv)
+#define cls_dev_to_mmc_host(d) container_of(d, struct mmc_host, class_dev)
  
  #define MMC_ATTR(name, fmt, args...)                                   \
  static ssize_t mmc_##name##_show (struct device *dev, struct device_attribute *attr, char *buf)        \
@@ -206,7 +208,7 @@ void mmc_init_card(struct mmc_card *card, struct mmc_host *host)
  int mmc_register_card(struct mmc_card *card)
  {
         snprintf(card->dev.bus_id, sizeof(card->dev.bus_id),
-                "%s:%04x", card->host->host_name, card->rca);
+                "%s:%04x", mmc_hostname(card->host), card->rca);
  
         return device_add(&card->dev);
  }
@@ -224,13 +226,97 @@ void mmc_remove_card(struct mmc_card *card)
  }
  
  
+static void mmc_host_classdev_release(struct class_device *dev)
+{
+       struct mmc_host *host = cls_dev_to_mmc_host(dev);
+       kfree(host);
+}
+
+static struct class mmc_host_class = {
+       .name           = "mmc_host",
+       .release        = mmc_host_classdev_release,
+};
+
+static DEFINE_IDR(mmc_host_idr);
+static DEFINE_SPINLOCK(mmc_host_lock);
+
+/*
+ * Internal function. Allocate a new MMC host.
+ */
+struct mmc_host *mmc_alloc_host_sysfs(int extra, struct device *dev)
+{
+       struct mmc_host *host;
+
+       host = kmalloc(sizeof(struct mmc_host) + extra, GFP_KERNEL);
+       if (host) {
+               memset(host, 0, sizeof(struct mmc_host) + extra);
+
+               host->dev = dev;
+               host->class_dev.dev = host->dev;
+               host->class_dev.class = &mmc_host_class;
+               class_device_initialize(&host->class_dev);
+       }
+
+       return host;
+}
+
+/*
+ * Internal function. Register a new MMC host with the MMC class.
+ */
+int mmc_add_host_sysfs(struct mmc_host *host)
+{
+       int err;
+
+       if (!idr_pre_get(&mmc_host_idr, GFP_KERNEL))
+               return -ENOMEM;
+
+       spin_lock(&mmc_host_lock);
+       err = idr_get_new(&mmc_host_idr, host, &host->index);
+       spin_unlock(&mmc_host_lock);
+       if (err)
+               return err;
+
+       snprintf(host->class_dev.class_id, BUS_ID_SIZE,
+                "mmc%d", host->index);
+
+       return class_device_add(&host->class_dev);
+}
+
+/*
+ * Internal function. Unregister a MMC host with the MMC class.
+ */
+void mmc_remove_host_sysfs(struct mmc_host *host)
+{
+       class_device_del(&host->class_dev);
+
+       spin_lock(&mmc_host_lock);
+       idr_remove(&mmc_host_idr, host->index);
+       spin_unlock(&mmc_host_lock);
+}
+
+/*
+ * Internal function. Free a MMC host.
+ */
+void mmc_free_host_sysfs(struct mmc_host *host)
+{
+       class_device_put(&host->class_dev);
+}
+
+
  static int __init mmc_init(void)
  {
-       return bus_register(&mmc_bus_type);
+       int ret = bus_register(&mmc_bus_type);
+       if (ret == 0) {
+               ret = class_register(&mmc_host_class);
+               if (ret)
+                       bus_unregister(&mmc_bus_type);
+       }
+       return ret;
  }
  
  static void __exit mmc_exit(void)
  {
+       class_unregister(&mmc_host_class);
         bus_unregister(&mmc_bus_type);
  }
  
diff --git a/drivers/mmc/mmci.c b/drivers/mmc/mmci.c

index 7a42966d755bd00a7defabfe47e8d69fe4820aac..716c4ef4faf6f8c46c9582d5211d5fba3a332255 100644 (file)
--- a/drivers/mmc/mmci.c
+++ b/drivers/mmc/mmci.c
@@ -34,7 +34,7 @@
  
  #ifdef CONFIG_MMC_DEBUG
  #define DBG(host,fmt,args...)  \
-       pr_debug("%s: %s: " fmt, host->mmc->host_name, __func__ , args)
+       pr_debug("%s: %s: " fmt, mmc_hostname(host->mmc), __func__ , args)
  #else
  #define DBG(host,fmt,args...)  do { } while (0)
  #endif
@@ -541,7 +541,7 @@ static int mmci_probe(struct amba_device *dev, void *id)
         mmc_add_host(mmc);
  
         printk(KERN_INFO "%s: MMCI rev %x cfg %02x at 0x%08lx irq %d,%d\n",
-               mmc->host_name, amba_rev(dev), amba_config(dev),
+               mmc_hostname(mmc), amba_rev(dev), amba_config(dev),
                 dev->res.start, dev->irq[0], dev->irq[1]);
  
         init_timer(&host->timer);
diff --git a/drivers/mmc/wbsd.c b/drivers/mmc/wbsd.c

index 8b487ed1069c990d8a57d43041d80c768c0da611..402c2d661fb2814d06983d6a6f0e3c89c110e89f 100644 (file)
--- a/drivers/mmc/wbsd.c
+++ b/drivers/mmc/wbsd.c
@@ -42,7 +42,7 @@
  #include "wbsd.h"
  
  #define DRIVER_NAME "wbsd"
-#define DRIVER_VERSION "1.2"
+#define DRIVER_VERSION "1.3"
  
  #ifdef CONFIG_MMC_DEBUG
  #define DBG(x...) \
@@ -1796,7 +1796,7 @@ static int __devinit wbsd_init(struct device* dev, int base, int irq, int dma,
         
         mmc_add_host(mmc);
  
-       printk(KERN_INFO "%s: W83L51xD", mmc->host_name);
+       printk(KERN_INFO "%s: W83L51xD", mmc_hostname(mmc));
         if (host->chip_id != 0)
                 printk(" id %x", (int)host->chip_id);
         printk(" at 0x%x irq %d", (int)host->base, (int)host->irq);
diff --git a/drivers/net/8139cp.c b/drivers/net/8139cp.c

index 7b293f01c9ed1309b67d0523c18191e91c5fc1ba..34b80de34faec69fa86927f501214377279d5c83 100644 (file)
--- a/drivers/net/8139cp.c
+++ b/drivers/net/8139cp.c
@@ -1897,6 +1897,7 @@ static int cp_resume (struct pci_dev *pdev)
  {
         struct net_device *dev;
         struct cp_private *cp;
+       unsigned long flags;
  
         dev = pci_get_drvdata (pdev);
         cp  = netdev_priv(dev);
@@ -1910,6 +1911,12 @@ static int cp_resume (struct pci_dev *pdev)
         
         cp_init_hw (cp);
         netif_start_queue (dev);
+
+       spin_lock_irqsave (&cp->lock, flags);
+
+       mii_check_media(&cp->mii_if, netif_msg_link(cp), FALSE);
+
+       spin_unlock_irqrestore (&cp->lock, flags);
         
         return 0;
  }
diff --git a/drivers/net/Kconfig b/drivers/net/Kconfig

index 8a835eb58808726eb8c7c478f6bfd3c3a450db6f..79e8aa6f2b9edfc215e43b1a1b0a934274a680f4 100644 (file)
--- a/drivers/net/Kconfig
+++ b/drivers/net/Kconfig
@@ -131,6 +131,8 @@ config NET_SB1000
  
         source "drivers/net/arcnet/Kconfig"
  
+source "drivers/net/phy/Kconfig"
+
  #
  #      Ethernet
  #
@@ -1145,7 +1147,7 @@ config IBMVETH
           be called ibmveth.
  
  config IBM_EMAC
-       tristate "IBM PPC4xx EMAC driver support"
+       bool "IBM PPC4xx EMAC driver support"
         depends on 4xx
         select CRC32
         ---help---
@@ -1154,7 +1156,7 @@ config IBM_EMAC
  
  config IBM_EMAC_ERRMSG
         bool "Verbose error messages"
-       depends on IBM_EMAC
+       depends on IBM_EMAC && BROKEN
  
  config IBM_EMAC_RXB
         int "Number of receive buffers"
diff --git a/drivers/net/Makefile b/drivers/net/Makefile

index 63c6d1e6d4d982937ada0fdaca07003ec29ef9e6..a369ae284a9a23cd5aaa7590ea584573df6dbe78 100644 (file)
--- a/drivers/net/Makefile
+++ b/drivers/net/Makefile
@@ -65,6 +65,7 @@ obj-$(CONFIG_ADAPTEC_STARFIRE) += starfire.o
  #
  
  obj-$(CONFIG_MII) += mii.o
+obj-$(CONFIG_PHYLIB) += phy/
  
  obj-$(CONFIG_SUNDANCE) += sundance.o
  obj-$(CONFIG_HAMACHI) += hamachi.o
diff --git a/drivers/net/Space.c b/drivers/net/Space.c

index 3707df6b0cfaf5a734ef7de6e636fd8a1aecaa76..60304f7e7e5b9703a6973a733cb452ff44517b18 100644 (file)
--- a/drivers/net/Space.c
+++ b/drivers/net/Space.c
@@ -87,7 +87,6 @@ extern struct net_device *mvme147lance_probe(int unit);
  extern struct net_device *tc515_probe(int unit);
  extern struct net_device *lance_probe(int unit);
  extern struct net_device *mace_probe(int unit);
-extern struct net_device *macsonic_probe(int unit);
  extern struct net_device *mac8390_probe(int unit);
  extern struct net_device *mac89x0_probe(int unit);
  extern struct net_device *mc32_probe(int unit);
@@ -284,9 +283,6 @@ static struct devprobe2 m68k_probes[] __initdata = {
  #ifdef CONFIG_MACMACE          /* Mac 68k Quadra AV builtin Ethernet */
         {mace_probe, 0},
  #endif
-#ifdef CONFIG_MACSONIC         /* Mac SONIC-based Ethernet of all sorts */ 
-       {macsonic_probe, 0},
-#endif
  #ifdef CONFIG_MAC8390           /* NuBus NS8390-based cards */
         {mac8390_probe, 0},
  #endif
@@ -318,17 +314,9 @@ static void __init ethif_probe2(int unit)
  #ifdef CONFIG_TR
  /* Token-ring device probe */
  extern int ibmtr_probe_card(struct net_device *);
-extern struct net_device *sk_isa_probe(int unit);
-extern struct net_device *proteon_probe(int unit);
  extern struct net_device *smctr_probe(int unit);
  
  static struct devprobe2 tr_probes2[] __initdata = {
-#ifdef CONFIG_SKISA
-       {sk_isa_probe, 0},
-#endif
-#ifdef CONFIG_PROTEON
-       {proteon_probe, 0},
-#endif
  #ifdef CONFIG_SMCTR
         {smctr_probe, 0},
  #endif
diff --git a/drivers/net/bnx2.c b/drivers/net/bnx2.c

index 8acc655ec1e82e911c88cea2cbe8340cb2ab89d2..7babf6af4e28dd1113b467dd47907e191aad9b02 100644 (file)
--- a/drivers/net/bnx2.c
+++ b/drivers/net/bnx2.c
@@ -14,8 +14,8 @@
  
  #define DRV_MODULE_NAME                "bnx2"
  #define PFX DRV_MODULE_NAME    ": "
-#define DRV_MODULE_VERSION     "1.2.19"
-#define DRV_MODULE_RELDATE     "May 23, 2005"
+#define DRV_MODULE_VERSION     "1.2.20"
+#define DRV_MODULE_RELDATE     "August 22, 2005"
  
  #define RUN_AT(x) (jiffies + (x))
  
@@ -52,7 +52,6 @@ static struct {
         { "HP NC370i Multifunction Gigabit Server Adapter" },
         { "Broadcom NetXtreme II BCM5706 1000Base-SX" },
         { "HP NC370F Multifunction Gigabit Server Adapter" },
-       { 0 },
         };
  
  static struct pci_device_id bnx2_pci_tbl[] = {
@@ -108,6 +107,15 @@ static struct flash_spec flash_table[] =
  
  MODULE_DEVICE_TABLE(pci, bnx2_pci_tbl);
  
+static inline u32 bnx2_tx_avail(struct bnx2 *bp)
+{
+       u32 diff = TX_RING_IDX(bp->tx_prod) - TX_RING_IDX(bp->tx_cons);
+
+       if (diff > MAX_TX_DESC_CNT)
+               diff = (diff & MAX_TX_DESC_CNT) - 1;
+       return (bp->tx_ring_size - diff);
+}
+
  static u32
  bnx2_reg_rd_ind(struct bnx2 *bp, u32 offset)
  {
@@ -807,7 +815,19 @@ bnx2_setup_serdes_phy(struct bnx2 *bp)
                 bnx2_write_phy(bp, MII_ADVERTISE, new_adv);
                 bnx2_write_phy(bp, MII_BMCR, bmcr | BMCR_ANRESTART |
                         BMCR_ANENABLE);
-               bp->serdes_an_pending = SERDES_AN_TIMEOUT / bp->timer_interval;
+               if (CHIP_NUM(bp) == CHIP_NUM_5706) {
+                       /* Speed up link-up time when the link partner
+                        * does not autonegotiate which is very common
+                        * in blade servers. Some blade servers use
+                        * IPMI for kerboard input and it's important
+                        * to minimize link disruptions. Autoneg. involves
+                        * exchanging base pages plus 3 next pages and
+                        * normally completes in about 120 msec.
+                        */
+                       bp->current_interval = SERDES_AN_TIMEOUT;
+                       bp->serdes_an_pending = 1;
+                       mod_timer(&bp->timer, jiffies + bp->current_interval);
+               }
         }
  
         return 0;
@@ -1327,22 +1347,17 @@ bnx2_tx_int(struct bnx2 *bp)
                 }
         }
  
-       atomic_add(tx_free_bd, &bp->tx_avail_bd);
+       bp->tx_cons = sw_cons;
  
         if (unlikely(netif_queue_stopped(bp->dev))) {
-               unsigned long flags;
-
-               spin_lock_irqsave(&bp->tx_lock, flags);
+               spin_lock(&bp->tx_lock);
                 if ((netif_queue_stopped(bp->dev)) &&
-                       (atomic_read(&bp->tx_avail_bd) > MAX_SKB_FRAGS)) {
+                   (bnx2_tx_avail(bp) > MAX_SKB_FRAGS)) {
  
                         netif_wake_queue(bp->dev);
                 }
-               spin_unlock_irqrestore(&bp->tx_lock, flags);
+               spin_unlock(&bp->tx_lock);
         }
-
-       bp->tx_cons = sw_cons;
-
  }
  
  static inline void
@@ -1523,15 +1538,12 @@ bnx2_msi(int irq, void *dev_instance, struct pt_regs *regs)
                 BNX2_PCICFG_INT_ACK_CMD_MASK_INT);
  
         /* Return here if interrupt is disabled. */
-       if (unlikely(atomic_read(&bp->intr_sem) != 0)) {
-               return IRQ_RETVAL(1);
-       }
+       if (unlikely(atomic_read(&bp->intr_sem) != 0))
+               return IRQ_HANDLED;
  
-       if (netif_rx_schedule_prep(dev)) {
-               __netif_rx_schedule(dev);
-       }
+       netif_rx_schedule(dev);
  
-       return IRQ_RETVAL(1);
+       return IRQ_HANDLED;
  }
  
  static irqreturn_t
@@ -1549,22 +1561,19 @@ bnx2_interrupt(int irq, void *dev_instance, struct pt_regs *regs)
         if ((bp->status_blk->status_idx == bp->last_status_idx) ||
             (REG_RD(bp, BNX2_PCICFG_MISC_STATUS) &
              BNX2_PCICFG_MISC_STATUS_INTA_VALUE))
-               return IRQ_RETVAL(0);
+               return IRQ_NONE;
  
         REG_WR(bp, BNX2_PCICFG_INT_ACK_CMD,
                 BNX2_PCICFG_INT_ACK_CMD_USE_INT_HC_PARAM |
                 BNX2_PCICFG_INT_ACK_CMD_MASK_INT);
  
         /* Return here if interrupt is shared and is disabled. */
-       if (unlikely(atomic_read(&bp->intr_sem) != 0)) {
-               return IRQ_RETVAL(1);
-       }
+       if (unlikely(atomic_read(&bp->intr_sem) != 0))
+               return IRQ_HANDLED;
  
-       if (netif_rx_schedule_prep(dev)) {
-               __netif_rx_schedule(dev);
-       }
+       netif_rx_schedule(dev);
  
-       return IRQ_RETVAL(1);
+       return IRQ_HANDLED;
  }
  
  static int
@@ -1581,11 +1590,9 @@ bnx2_poll(struct net_device *dev, int *budget)
                 (bp->status_blk->status_attn_bits_ack &
                 STATUS_ATTN_BITS_LINK_STATE)) {
  
-               unsigned long flags;
-
-               spin_lock_irqsave(&bp->phy_lock, flags);
+               spin_lock(&bp->phy_lock);
                 bnx2_phy_int(bp);
-               spin_unlock_irqrestore(&bp->phy_lock, flags);
+               spin_unlock(&bp->phy_lock);
         }
  
         if (bp->status_blk->status_tx_quick_consumer_index0 != bp->tx_cons) {
@@ -1628,9 +1635,8 @@ bnx2_set_rx_mode(struct net_device *dev)
         struct bnx2 *bp = dev->priv;
         u32 rx_mode, sort_mode;
         int i;
-       unsigned long flags;
  
-       spin_lock_irqsave(&bp->phy_lock, flags);
+       spin_lock_bh(&bp->phy_lock);
  
         rx_mode = bp->rx_mode & ~(BNX2_EMAC_RX_MODE_PROMISCUOUS |
                                   BNX2_EMAC_RX_MODE_KEEP_VLAN_TAG);
@@ -1691,7 +1697,7 @@ bnx2_set_rx_mode(struct net_device *dev)
         REG_WR(bp, BNX2_RPM_SORT_USER0, sort_mode);
         REG_WR(bp, BNX2_RPM_SORT_USER0, sort_mode | BNX2_RPM_SORT_USER0_ENA);
  
-       spin_unlock_irqrestore(&bp->phy_lock, flags);
+       spin_unlock_bh(&bp->phy_lock);
  }
  
  static void
@@ -2960,7 +2966,6 @@ bnx2_init_tx_ring(struct bnx2 *bp)
         bp->tx_prod = 0;
         bp->tx_cons = 0;
         bp->tx_prod_bseq = 0;
-       atomic_set(&bp->tx_avail_bd, bp->tx_ring_size);
         
         val = BNX2_L2CTX_TYPE_TYPE_L2;
         val |= BNX2_L2CTX_TYPE_SIZE_L2;
@@ -3507,11 +3512,11 @@ bnx2_test_registers(struct bnx2 *bp)
                 rw_mask = reg_tbl[i].rw_mask;
                 ro_mask = reg_tbl[i].ro_mask;
  
-               save_val = readl((u8 *) bp->regview + offset);
+               save_val = readl(bp->regview + offset);
  
-               writel(0, (u8 *) bp->regview + offset);
+               writel(0, bp->regview + offset);
  
-               val = readl((u8 *) bp->regview + offset);
+               val = readl(bp->regview + offset);
                 if ((val & rw_mask) != 0) {
                         goto reg_test_err;
                 }
@@ -3520,9 +3525,9 @@ bnx2_test_registers(struct bnx2 *bp)
                         goto reg_test_err;
                 }
  
-               writel(0xffffffff, (u8 *) bp->regview + offset);
+               writel(0xffffffff, bp->regview + offset);
  
-               val = readl((u8 *) bp->regview + offset);
+               val = readl(bp->regview + offset);
                 if ((val & rw_mask) != rw_mask) {
                         goto reg_test_err;
                 }
@@ -3531,11 +3536,11 @@ bnx2_test_registers(struct bnx2 *bp)
                         goto reg_test_err;
                 }
  
-               writel(save_val, (u8 *) bp->regview + offset);
+               writel(save_val, bp->regview + offset);
                 continue;
  
  reg_test_err:
-               writel(save_val, (u8 *) bp->regview + offset);
+               writel(save_val, bp->regview + offset);
                 ret = -ENODEV;
                 break;
         }
@@ -3752,10 +3757,10 @@ bnx2_test_link(struct bnx2 *bp)
  {
         u32 bmsr;
  
-       spin_lock_irq(&bp->phy_lock);
+       spin_lock_bh(&bp->phy_lock);
         bnx2_read_phy(bp, MII_BMSR, &bmsr);
         bnx2_read_phy(bp, MII_BMSR, &bmsr);
-       spin_unlock_irq(&bp->phy_lock);
+       spin_unlock_bh(&bp->phy_lock);
                 
         if (bmsr & BMSR_LSTATUS) {
                 return 0;
@@ -3801,6 +3806,9 @@ bnx2_timer(unsigned long data)
         struct bnx2 *bp = (struct bnx2 *) data;
         u32 msg;
  
+       if (!netif_running(bp->dev))
+               return;
+
         if (atomic_read(&bp->intr_sem) != 0)
                 goto bnx2_restart_timer;
  
@@ -3809,15 +3817,16 @@ bnx2_timer(unsigned long data)
  
         if ((bp->phy_flags & PHY_SERDES_FLAG) &&
             (CHIP_NUM(bp) == CHIP_NUM_5706)) {
-               unsigned long flags;
  
-               spin_lock_irqsave(&bp->phy_lock, flags);
+               spin_lock(&bp->phy_lock);
                 if (bp->serdes_an_pending) {
                         bp->serdes_an_pending--;
                 }
                 else if ((bp->link_up == 0) && (bp->autoneg & AUTONEG_SPEED)) {
                         u32 bmcr;
  
+                       bp->current_interval = bp->timer_interval;
+
                         bnx2_read_phy(bp, MII_BMCR, &bmcr);
  
                         if (bmcr & BMCR_ANENABLE) {
@@ -3860,14 +3869,14 @@ bnx2_timer(unsigned long data)
  
                         }
                 }
+               else
+                       bp->current_interval = bp->timer_interval;
  
-               spin_unlock_irqrestore(&bp->phy_lock, flags);
+               spin_unlock(&bp->phy_lock);
         }
  
  bnx2_restart_timer:
-       bp->timer.expires = RUN_AT(bp->timer_interval);
-
-       add_timer(&bp->timer);
+       mod_timer(&bp->timer, jiffies + bp->current_interval);
  }
  
  /* Called with rtnl_lock */
@@ -3920,12 +3929,7 @@ bnx2_open(struct net_device *dev)
                 return rc;
         }
         
-       init_timer(&bp->timer);
-
-       bp->timer.expires = RUN_AT(bp->timer_interval);
-       bp->timer.data = (unsigned long) bp;
-       bp->timer.function = bnx2_timer;
-       add_timer(&bp->timer);
+       mod_timer(&bp->timer, jiffies + bp->current_interval);
  
         atomic_set(&bp->intr_sem, 0);
  
@@ -3976,12 +3980,17 @@ bnx2_reset_task(void *data)
  {
         struct bnx2 *bp = data;
  
+       if (!netif_running(bp->dev))
+               return;
+
+       bp->in_reset_task = 1;
         bnx2_netif_stop(bp);
  
         bnx2_init_nic(bp);
  
         atomic_set(&bp->intr_sem, 1);
         bnx2_netif_start(bp);
+       bp->in_reset_task = 0;
  }
  
  static void
@@ -4041,9 +4050,7 @@ bnx2_start_xmit(struct sk_buff *skb, struct net_device *dev)
         u16 prod, ring_prod;
         int i;
  
-       if (unlikely(atomic_read(&bp->tx_avail_bd) <
-               (skb_shinfo(skb)->nr_frags + 1))) {
-
+       if (unlikely(bnx2_tx_avail(bp) < (skb_shinfo(skb)->nr_frags + 1))) {
                 netif_stop_queue(dev);
                 printk(KERN_ERR PFX "%s: BUG! Tx ring full when queue awake!\n",
                         dev->name);
@@ -4140,8 +4147,6 @@ bnx2_start_xmit(struct sk_buff *skb, struct net_device *dev)
         prod = NEXT_TX_BD(prod);
         bp->tx_prod_bseq += skb->len;
  
-       atomic_sub(last_frag + 1, &bp->tx_avail_bd);
-
         REG_WR16(bp, MB_TX_CID_ADDR + BNX2_L2CTX_TX_HOST_BIDX, prod);
         REG_WR(bp, MB_TX_CID_ADDR + BNX2_L2CTX_TX_HOST_BSEQ, bp->tx_prod_bseq);
  
@@ -4150,17 +4155,13 @@ bnx2_start_xmit(struct sk_buff *skb, struct net_device *dev)
         bp->tx_prod = prod;
         dev->trans_start = jiffies;
  
-       if (unlikely(atomic_read(&bp->tx_avail_bd) <= MAX_SKB_FRAGS)) {
-               unsigned long flags;
-
-               spin_lock_irqsave(&bp->tx_lock, flags);
-               if (atomic_read(&bp->tx_avail_bd) <= MAX_SKB_FRAGS) {
-                       netif_stop_queue(dev);
-
-                       if (atomic_read(&bp->tx_avail_bd) > MAX_SKB_FRAGS)
-                               netif_wake_queue(dev);
-               }
-               spin_unlock_irqrestore(&bp->tx_lock, flags);
+       if (unlikely(bnx2_tx_avail(bp) <= MAX_SKB_FRAGS)) {
+               spin_lock(&bp->tx_lock);
+               netif_stop_queue(dev);
+               
+               if (bnx2_tx_avail(bp) > MAX_SKB_FRAGS)
+                       netif_wake_queue(dev);
+               spin_unlock(&bp->tx_lock);
         }
  
         return NETDEV_TX_OK;
@@ -4173,7 +4174,13 @@ bnx2_close(struct net_device *dev)
         struct bnx2 *bp = dev->priv;
         u32 reset_code;
  
-       flush_scheduled_work();
+       /* Calling flush_scheduled_work() may deadlock because
+        * linkwatch_event() may be on the workqueue and it will try to get
+        * the rtnl_lock which we are holding.
+        */
+       while (bp->in_reset_task)
+               msleep(1);
+
         bnx2_netif_stop(bp);
         del_timer_sync(&bp->timer);
         if (bp->wol)
@@ -4390,11 +4397,11 @@ bnx2_set_settings(struct net_device *dev, struct ethtool_cmd *cmd)
         bp->req_line_speed = req_line_speed;
         bp->req_duplex = req_duplex;
  
-       spin_lock_irq(&bp->phy_lock);
+       spin_lock_bh(&bp->phy_lock);
  
         bnx2_setup_phy(bp);
  
-       spin_unlock_irq(&bp->phy_lock);
+       spin_unlock_bh(&bp->phy_lock);
  
         return 0;
  }
@@ -4464,19 +4471,20 @@ bnx2_nway_reset(struct net_device *dev)
                 return -EINVAL;
         }
  
-       spin_lock_irq(&bp->phy_lock);
+       spin_lock_bh(&bp->phy_lock);
  
         /* Force a link down visible on the other side */
         if (bp->phy_flags & PHY_SERDES_FLAG) {
                 bnx2_write_phy(bp, MII_BMCR, BMCR_LOOPBACK);
-               spin_unlock_irq(&bp->phy_lock);
+               spin_unlock_bh(&bp->phy_lock);
  
                 msleep(20);
  
-               spin_lock_irq(&bp->phy_lock);
+               spin_lock_bh(&bp->phy_lock);
                 if (CHIP_NUM(bp) == CHIP_NUM_5706) {
-                       bp->serdes_an_pending = SERDES_AN_TIMEOUT /
-                               bp->timer_interval;
+                       bp->current_interval = SERDES_AN_TIMEOUT;
+                       bp->serdes_an_pending = 1;
+                       mod_timer(&bp->timer, jiffies + bp->current_interval);
                 }
         }
  
@@ -4484,7 +4492,7 @@ bnx2_nway_reset(struct net_device *dev)
         bmcr &= ~BMCR_LOOPBACK;
         bnx2_write_phy(bp, MII_BMCR, bmcr | BMCR_ANRESTART | BMCR_ANENABLE);
  
-       spin_unlock_irq(&bp->phy_lock);
+       spin_unlock_bh(&bp->phy_lock);
  
         return 0;
  }
@@ -4670,11 +4678,11 @@ bnx2_set_pauseparam(struct net_device *dev, struct ethtool_pauseparam *epause)
                 bp->autoneg &= ~AUTONEG_FLOW_CTRL;
         }
  
-       spin_lock_irq(&bp->phy_lock);
+       spin_lock_bh(&bp->phy_lock);
  
         bnx2_setup_phy(bp);
  
-       spin_unlock_irq(&bp->phy_lock);
+       spin_unlock_bh(&bp->phy_lock);
  
         return 0;
  }
@@ -4698,7 +4706,7 @@ bnx2_set_rx_csum(struct net_device *dev, u32 data)
  
  #define BNX2_NUM_STATS 45
  
-struct {
+static struct {
         char string[ETH_GSTRING_LEN];
  } bnx2_stats_str_arr[BNX2_NUM_STATS] = {
         { "rx_bytes" },
@@ -4750,7 +4758,7 @@ struct {
  
  #define STATS_OFFSET32(offset_name) (offsetof(struct statistics_block, offset_name) / 4)
  
-unsigned long bnx2_stats_offset_arr[BNX2_NUM_STATS] = {
+static unsigned long bnx2_stats_offset_arr[BNX2_NUM_STATS] = {
      STATS_OFFSET32(stat_IfHCInOctets_hi),
      STATS_OFFSET32(stat_IfHCInBadOctets_hi),
      STATS_OFFSET32(stat_IfHCOutOctets_hi),
@@ -4801,7 +4809,7 @@ unsigned long bnx2_stats_offset_arr[BNX2_NUM_STATS] = {
  /* stat_IfHCInBadOctets and stat_Dot3StatsCarrierSenseErrors are
   * skipped because of errata.
   */               
-u8 bnx2_5706_stats_len_arr[BNX2_NUM_STATS] = {
+static u8 bnx2_5706_stats_len_arr[BNX2_NUM_STATS] = {
         8,0,8,8,8,8,8,8,8,8,
         4,0,4,4,4,4,4,4,4,4,
         4,4,4,4,4,4,4,4,4,4,
@@ -4811,7 +4819,7 @@ u8 bnx2_5706_stats_len_arr[BNX2_NUM_STATS] = {
  
  #define BNX2_NUM_TESTS 6
  
-struct {
+static struct {
         char string[ETH_GSTRING_LEN];
  } bnx2_tests_str_arr[BNX2_NUM_TESTS] = {
         { "register_test (offline)" },
@@ -4910,7 +4918,7 @@ bnx2_get_ethtool_stats(struct net_device *dev,
         struct bnx2 *bp = dev->priv;
         int i;
         u32 *hw_stats = (u32 *) bp->stats_blk;
-       u8 *stats_len_arr = 0;
+       u8 *stats_len_arr = NULL;
  
         if (hw_stats == NULL) {
                 memset(buf, 0, sizeof(u64) * BNX2_NUM_STATS);
@@ -5012,7 +5020,7 @@ static struct ethtool_ops bnx2_ethtool_ops = {
  static int
  bnx2_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
  {
-       struct mii_ioctl_data *data = (struct mii_ioctl_data *)&ifr->ifr_data;
+       struct mii_ioctl_data *data = if_mii(ifr);
         struct bnx2 *bp = dev->priv;
         int err;
  
@@ -5024,9 +5032,9 @@ bnx2_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
         case SIOCGMIIREG: {
                 u32 mii_regval;
  
-               spin_lock_irq(&bp->phy_lock);
+               spin_lock_bh(&bp->phy_lock);
                 err = bnx2_read_phy(bp, data->reg_num & 0x1f, &mii_regval);
-               spin_unlock_irq(&bp->phy_lock);
+               spin_unlock_bh(&bp->phy_lock);
  
                 data->val_out = mii_regval;
  
@@ -5037,9 +5045,9 @@ bnx2_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
                 if (!capable(CAP_NET_ADMIN))
                         return -EPERM;
  
-               spin_lock_irq(&bp->phy_lock);
+               spin_lock_bh(&bp->phy_lock);
                 err = bnx2_write_phy(bp, data->reg_num & 0x1f, data->val_in);
-               spin_unlock_irq(&bp->phy_lock);
+               spin_unlock_bh(&bp->phy_lock);
  
                 return err;
  
@@ -5057,6 +5065,9 @@ bnx2_change_mac_addr(struct net_device *dev, void *p)
         struct sockaddr *addr = p;
         struct bnx2 *bp = dev->priv;
  
+       if (!is_valid_ether_addr(addr->sa_data))
+               return -EINVAL;
+
         memcpy(dev->dev_addr, addr->sa_data, dev->addr_len);
         if (netif_running(dev))
                 bnx2_set_mac_addr(bp);
@@ -5305,6 +5316,7 @@ bnx2_init_board(struct pci_dev *pdev, struct net_device *dev)
         bp->stats_ticks = 1000000 & 0xffff00;
  
         bp->timer_interval =  HZ;
+       bp->current_interval =  HZ;
  
         /* Disable WOL support if we are running on a SERDES chip. */
         if (CHIP_BOND_ID(bp) & CHIP_BOND_ID_SERDES_BIT) {
@@ -5328,6 +5340,15 @@ bnx2_init_board(struct pci_dev *pdev, struct net_device *dev)
         bp->req_line_speed = 0;
         if (bp->phy_flags & PHY_SERDES_FLAG) {
                 bp->advertising = ETHTOOL_ALL_FIBRE_SPEED | ADVERTISED_Autoneg;
+
+               reg = REG_RD_IND(bp, HOST_VIEW_SHMEM_BASE +
+                                BNX2_PORT_HW_CFG_CONFIG);
+               reg &= BNX2_PORT_HW_CFG_CFG_DFLT_LINK_MASK;
+               if (reg == BNX2_PORT_HW_CFG_CFG_DFLT_LINK_1G) {
+                       bp->autoneg = 0;
+                       bp->req_line_speed = bp->line_speed = SPEED_1000;
+                       bp->req_duplex = DUPLEX_FULL;
+               }
         }
         else {
                 bp->advertising = ETHTOOL_ALL_COPPER_SPEED | ADVERTISED_Autoneg;
@@ -5335,11 +5356,17 @@ bnx2_init_board(struct pci_dev *pdev, struct net_device *dev)
  
         bp->req_flow_ctrl = FLOW_CTRL_RX | FLOW_CTRL_TX;
  
+       init_timer(&bp->timer);
+       bp->timer.expires = RUN_AT(bp->timer_interval);
+       bp->timer.data = (unsigned long) bp;
+       bp->timer.function = bnx2_timer;
+
         return 0;
  
  err_out_unmap:
         if (bp->regview) {
                 iounmap(bp->regview);
+               bp->regview = NULL;
         }
  
  err_out_release:
@@ -5454,6 +5481,8 @@ bnx2_remove_one(struct pci_dev *pdev)
         struct net_device *dev = pci_get_drvdata(pdev);
         struct bnx2 *bp = dev->priv;
  
+       flush_scheduled_work();
+
         unregister_netdev(dev);
  
         if (bp->regview)
@@ -5505,12 +5534,12 @@ bnx2_resume(struct pci_dev *pdev)
  }
  
  static struct pci_driver bnx2_pci_driver = {
-       name:           DRV_MODULE_NAME,
-       id_table:       bnx2_pci_tbl,
-       probe:          bnx2_init_one,
-       remove:         __devexit_p(bnx2_remove_one),
-       suspend:        bnx2_suspend,
-       resume:         bnx2_resume,
+       .name           = DRV_MODULE_NAME,
+       .id_table       = bnx2_pci_tbl,
+       .probe          = bnx2_init_one,
+       .remove         = __devexit_p(bnx2_remove_one),
+       .suspend        = bnx2_suspend,
+       .resume         = bnx2_resume,
  };
  
  static int __init bnx2_init(void)
diff --git a/drivers/net/bnx2.h b/drivers/net/bnx2.h

index 8214a2853d0df92a71545348bb70a506fce23ee4..9ad3f5740cd8ee450a3aa96b2996b4d8c8882fca 100644 (file)
--- a/drivers/net/bnx2.h
+++ b/drivers/net/bnx2.h
@@ -3841,12 +3841,12 @@ struct bnx2 {
         struct status_block     *status_blk;
         u32                     last_status_idx;
  
-       atomic_t                tx_avail_bd;
         struct tx_bd            *tx_desc_ring;
         struct sw_bd            *tx_buf_ring;
         u32                     tx_prod_bseq;
         u16                     tx_prod;
         u16                     tx_cons;
+       int                     tx_ring_size;
  
  #ifdef BCM_VLAN 
         struct                  vlan_group *vlgrp;
@@ -3872,8 +3872,10 @@ struct bnx2 {
         char                    *name;
  
         int                     timer_interval;
+       int                     current_interval;
         struct                  timer_list timer;
         struct work_struct      reset_task;
+       int                     in_reset_task;
  
         /* Used to synchronize phy accesses. */
         spinlock_t              phy_lock;
@@ -3927,7 +3929,6 @@ struct bnx2 {
         u16                     fw_wr_seq;
         u16                     fw_drv_pulse_wr_seq;
  
-       int                     tx_ring_size;
         dma_addr_t              tx_desc_mapping;
  
  
@@ -3985,7 +3986,7 @@ struct bnx2 {
  #define PHY_LOOPBACK           2
  
         u8                      serdes_an_pending;
-#define SERDES_AN_TIMEOUT      (2 * HZ)
+#define SERDES_AN_TIMEOUT      (HZ / 3)
  
         u8                      mac_addr[8];
  
@@ -4171,6 +4172,9 @@ struct fw_info {
  
  #define BNX2_PORT_HW_CFG_MAC_LOWER             0x00000054
  #define BNX2_PORT_HW_CFG_CONFIG                        0x00000058
+#define BNX2_PORT_HW_CFG_CFG_DFLT_LINK_MASK     0x001f0000
+#define BNX2_PORT_HW_CFG_CFG_DFLT_LINK_AN       0x00000000
+#define BNX2_PORT_HW_CFG_CFG_DFLT_LINK_1G       0x00030000
  
  #define BNX2_PORT_HW_CFG_IMD_MAC_A_UPPER       0x00000068
  #define BNX2_PORT_HW_CFG_IMD_MAC_A_LOWER       0x0000006c
diff --git a/drivers/net/bonding/bond_3ad.c b/drivers/net/bonding/bond_3ad.c

index a2e8dda5afac6b30aa0a608f521789415fb637ea..d2f34d5a80835c786122a4ba359164d524b5bde6 100644 (file)
--- a/drivers/net/bonding/bond_3ad.c
+++ b/drivers/net/bonding/bond_3ad.c
@@ -2419,22 +2419,19 @@ out:
         return 0;
  }
  
-int bond_3ad_lacpdu_recv(struct sk_buff *skb, struct net_device *dev, struct packet_type* ptype)
+int bond_3ad_lacpdu_recv(struct sk_buff *skb, struct net_device *dev, struct packet_type* ptype, struct net_device *orig_dev)
  {
         struct bonding *bond = dev->priv;
         struct slave *slave = NULL;
         int ret = NET_RX_DROP;
  
-       if (!(dev->flags & IFF_MASTER)) {
+       if (!(dev->flags & IFF_MASTER))
                 goto out;
-       }
  
         read_lock(&bond->lock);
-       slave = bond_get_slave_by_dev((struct bonding *)dev->priv,
-                                     skb->real_dev);
-       if (slave == NULL) {
+       slave = bond_get_slave_by_dev((struct bonding *)dev->priv, orig_dev);
+       if (!slave)
                 goto out_unlock;
-       }
  
         bond_3ad_rx_indication((struct lacpdu *) skb->data, slave, skb->len);
  
diff --git a/drivers/net/bonding/bond_3ad.h b/drivers/net/bonding/bond_3ad.h

index f46823894187fbb117bd79e661376154deb8006d..673a30af5660f58a096f1253c34fb10bd40bcbec 100644 (file)
--- a/drivers/net/bonding/bond_3ad.h
+++ b/drivers/net/bonding/bond_3ad.h
@@ -295,6 +295,6 @@ void bond_3ad_adapter_duplex_changed(struct slave *slave);
  void bond_3ad_handle_link_change(struct slave *slave, char link);
  int  bond_3ad_get_active_agg_info(struct bonding *bond, struct ad_info *ad_info);
  int bond_3ad_xmit_xor(struct sk_buff *skb, struct net_device *dev);
-int bond_3ad_lacpdu_recv(struct sk_buff *skb, struct net_device *dev, struct packet_type* ptype);
+int bond_3ad_lacpdu_recv(struct sk_buff *skb, struct net_device *dev, struct packet_type* ptype, struct net_device *orig_dev);
  #endif //__BOND_3AD_H__
  
diff --git a/drivers/net/bonding/bond_alb.c b/drivers/net/bonding/bond_alb.c

index 5ce606d9dc03f9b145c3024abecfca20ec65fd9d..f8fce39611972f18fefdd8524368955425d84466 100644 (file)
--- a/drivers/net/bonding/bond_alb.c
+++ b/drivers/net/bonding/bond_alb.c
@@ -354,15 +354,14 @@ static void rlb_update_entry_from_arp(struct bonding *bond, struct arp_pkt *arp)
         _unlock_rx_hashtbl(bond);
  }
  
-static int rlb_arp_recv(struct sk_buff *skb, struct net_device *bond_dev, struct packet_type *ptype)
+static int rlb_arp_recv(struct sk_buff *skb, struct net_device *bond_dev, struct packet_type *ptype, struct net_device *orig_dev)
  {
         struct bonding *bond = bond_dev->priv;
         struct arp_pkt *arp = (struct arp_pkt *)skb->data;
         int res = NET_RX_DROP;
  
-       if (!(bond_dev->flags & IFF_MASTER)) {
+       if (!(bond_dev->flags & IFF_MASTER))
                 goto out;
-       }
  
         if (!arp) {
                 dprintk("Packet has no ARP data\n");
@@ -1106,18 +1105,13 @@ static int alb_handle_addr_collision_on_attach(struct bonding *bond, struct slav
                         }
                 }
  
-               if (found) {
-                       /* a slave was found that is using the mac address
-                        * of the new slave
-                        */
-                       printk(KERN_ERR DRV_NAME
-                              ": Error: the hw address of slave %s is not "
-                              "unique - cannot enslave it!",
-                              slave->dev->name);
-                       return -EINVAL;
-               }
+               if (!found)
+                       return 0;
  
-               return 0;
+               /* Try setting slave mac to bond address and fall-through
+                  to code handling that situation below... */
+               alb_set_slave_mac_addr(slave, bond->dev->dev_addr,
+                                      bond->alb_info.rlb_enabled);
         }
  
         /* The slave's address is equal to the address of the bond.
diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c

index 2c930da90a854d957f881cff3b3032ce0b1ff921..94c9f68dd16bacf4686eac5cb7da66062bf8df33 100644 (file)
--- a/drivers/net/bonding/bond_main.c
+++ b/drivers/net/bonding/bond_main.c
@@ -1604,6 +1604,44 @@ static int bond_sethwaddr(struct net_device *bond_dev, struct net_device *slave_
         return 0;
  }
  
+#define BOND_INTERSECT_FEATURES \
+       (NETIF_F_SG|NETIF_F_IP_CSUM|NETIF_F_NO_CSUM|NETIF_F_HW_CSUM)
+
+/* 
+ * Compute the features available to the bonding device by 
+ * intersection of all of the slave devices' BOND_INTERSECT_FEATURES.
+ * Call this after attaching or detaching a slave to update the 
+ * bond's features.
+ */
+static int bond_compute_features(struct bonding *bond)
+{
+       int i;
+       struct slave *slave;
+       struct net_device *bond_dev = bond->dev;
+       int features = bond->bond_features;
+
+       bond_for_each_slave(bond, slave, i) {
+               struct net_device * slave_dev = slave->dev;
+               if (i == 0) {
+                       features |= BOND_INTERSECT_FEATURES;
+               }
+               features &=
+                       ~(~slave_dev->features & BOND_INTERSECT_FEATURES);
+       }
+
+       /* turn off NETIF_F_SG if we need a csum and h/w can't do it */
+       if ((features & NETIF_F_SG) && 
+               !(features & (NETIF_F_IP_CSUM |
+                             NETIF_F_NO_CSUM |
+                             NETIF_F_HW_CSUM))) {
+               features &= ~NETIF_F_SG;
+       }
+
+       bond_dev->features = features;
+
+       return 0;
+}
+
  /* enslave device <slave> to bond device <master> */
  static int bond_enslave(struct net_device *bond_dev, struct net_device *slave_dev)
  {
@@ -1811,6 +1849,8 @@ static int bond_enslave(struct net_device *bond_dev, struct net_device *slave_de
         new_slave->delay = 0;
         new_slave->link_failure_count = 0;
  
+       bond_compute_features(bond);
+
         if (bond->params.miimon && !bond->params.use_carrier) {
                 link_reporting = bond_check_dev_link(bond, slave_dev, 1);
  
@@ -2015,7 +2055,7 @@ err_free:
  
  err_undo_flags:
         bond_dev->features = old_features;
-
+ 
         return res;
  }
  
@@ -2100,6 +2140,8 @@ static int bond_release(struct net_device *bond_dev, struct net_device *slave_de
         /* release the slave from its bond */
         bond_detach_slave(bond, slave);
  
+       bond_compute_features(bond);
+
         if (bond->primary_slave == slave) {
                 bond->primary_slave = NULL;
         }
@@ -2243,6 +2285,8 @@ static int bond_release_all(struct net_device *bond_dev)
                         bond_alb_deinit_slave(bond, slave);
                 }
  
+               bond_compute_features(bond);
+
                 /* now that the slave is detached, unlock and perform
                  * all the undo steps that should not be called from
                  * within a lock.
@@ -3588,6 +3632,7 @@ static int bond_master_netdev_event(unsigned long event, struct net_device *bond
  static int bond_slave_netdev_event(unsigned long event, struct net_device *slave_dev)
  {
         struct net_device *bond_dev = slave_dev->master;
+       struct bonding *bond = bond_dev->priv;
  
         switch (event) {
         case NETDEV_UNREGISTER:
@@ -3626,6 +3671,9 @@ static int bond_slave_netdev_event(unsigned long event, struct net_device *slave
                  * TODO: handle changing the primary's name
                  */
                 break;
+       case NETDEV_FEAT_CHANGE:
+               bond_compute_features(bond);
+               break;
         default:
                 break;
         }
@@ -4526,6 +4574,11 @@ static inline void bond_set_mode_ops(struct bonding *bond, int mode)
         }
  }
  
+static struct ethtool_ops bond_ethtool_ops = {
+       .get_tx_csum            = ethtool_op_get_tx_csum,
+       .get_sg                 = ethtool_op_get_sg,
+};
+
  /*
   * Does not allocate but creates a /proc entry.
   * Allowed to fail.
@@ -4555,6 +4608,7 @@ static int __init bond_init(struct net_device *bond_dev, struct bond_params *par
         bond_dev->stop = bond_close;
         bond_dev->get_stats = bond_get_stats;
         bond_dev->do_ioctl = bond_do_ioctl;
+       bond_dev->ethtool_ops = &bond_ethtool_ops;
         bond_dev->set_multicast_list = bond_set_multicast_list;
         bond_dev->change_mtu = bond_change_mtu;
         bond_dev->set_mac_address = bond_set_mac_address;
@@ -4591,6 +4645,8 @@ static int __init bond_init(struct net_device *bond_dev, struct bond_params *par
                                NETIF_F_HW_VLAN_RX |
                                NETIF_F_HW_VLAN_FILTER);
  
+       bond->bond_features = bond_dev->features;
+
  #ifdef CONFIG_PROC_FS
         bond_create_proc_entry(bond);
  #endif
diff --git a/drivers/net/bonding/bonding.h b/drivers/net/bonding/bonding.h

index d27f377b3eeb52bf2f8a47c3f3b6a5a61463faad..3881969808627cebcb7ed6a25f6b9b3557b6a1ec 100644 (file)
--- a/drivers/net/bonding/bonding.h
+++ b/drivers/net/bonding/bonding.h
@@ -211,6 +211,9 @@ struct bonding {
         struct   bond_params params;
         struct   list_head vlan_list;
         struct   vlan_group *vlgrp;
+       /* the features the bonding device supports, independently 
+        * of any slaves */
+       int      bond_features; 
  };
  
  /**
diff --git a/drivers/net/cs89x0.c b/drivers/net/cs89x0.c

index 2c6dc24c37288d3136880efd7cbea02a982e6fa4..b780307093eb1aadb1d312b186b7168e945753a2 100644 (file)
--- a/drivers/net/cs89x0.c
+++ b/drivers/net/cs89x0.c
@@ -417,6 +417,7 @@ cs89x0_probe1(struct net_device *dev, int ioaddr, int modular)
         struct net_local *lp = netdev_priv(dev);
         static unsigned version_printed;
         int i;
+       int tmp;
         unsigned rev_type = 0;
         int eeprom_buff[CHKSUM_LEN];
         int retval;
@@ -492,14 +493,17 @@ cs89x0_probe1(struct net_device *dev, int ioaddr, int modular)
                                 goto out2;
                         }
         }
-printk("PP_addr=0x%x\n", inw(ioaddr + ADD_PORT));
+       printk(KERN_DEBUG "PP_addr at %x: 0x%x\n",
+                       ioaddr + ADD_PORT, inw(ioaddr + ADD_PORT));
  
         ioaddr &= ~3;
         outw(PP_ChipID, ioaddr + ADD_PORT);
  
-       if (inw(ioaddr + DATA_PORT) != CHIP_EISA_ID_SIG) {
-               printk(KERN_ERR "%s: incorrect signature 0x%x\n",
-                       dev->name, inw(ioaddr + DATA_PORT));
+       tmp = inw(ioaddr + DATA_PORT);
+       if (tmp != CHIP_EISA_ID_SIG) {
+               printk(KERN_DEBUG "%s: incorrect signature at %x: 0x%x!="
+                       CHIP_EISA_ID_SIG_STR "\n",
+                       dev->name, ioaddr + DATA_PORT, tmp);
                 retval = -ENODEV;
                 goto out2;
         }
diff --git a/drivers/net/cs89x0.h b/drivers/net/cs89x0.h

index bd3ad8e6cce9c46e02a5da048fb78a1ce7f954cd..decea264f1214c615b557e2678f34ce6225b8643 100644 (file)
--- a/drivers/net/cs89x0.h
+++ b/drivers/net/cs89x0.h
@@ -93,6 +93,7 @@
  #endif
  
  #define CHIP_EISA_ID_SIG 0x630E   /*  Product ID Code for Crystal Chip (CS8900 spec 4.3) */
+#define CHIP_EISA_ID_SIG_STR "0x630E"
  
  #ifdef IBMEIPKT
  #define EISA_ID_SIG 0x4D24     /*  IBM */
diff --git a/drivers/net/dm9000.c b/drivers/net/dm9000.c

index 5fddc0ff887822fde5cbf904e3db8e46b4becca1..6440a892bb813ca4a08e0a1878cf0dedb43b254d 100644 (file)
--- a/drivers/net/dm9000.c
+++ b/drivers/net/dm9000.c
@@ -48,6 +48,10 @@
   *                        net_device_stats
   *                      * introduced tx_timeout function
   *                      * reworked locking
+ *
+ *       01-Jul-2005   Ben Dooks <ben@simtec.co.uk>
+ *                     * fixed spinlock call without pointer
+ *                     * ensure spinlock is initialised
   */
  
  #include <linux/module.h>
@@ -148,7 +152,6 @@ static int dm9000_probe(struct device *);
  static int dm9000_open(struct net_device *);
  static int dm9000_start_xmit(struct sk_buff *, struct net_device *);
  static int dm9000_stop(struct net_device *);
-static int dm9000_do_ioctl(struct net_device *, struct ifreq *, int);
  
  
  static void dm9000_timer(unsigned long);
@@ -322,7 +325,7 @@ static void dm9000_timeout(struct net_device *dev)
  
         /* Save previous register address */
         reg_save = readb(db->io_addr);
-       spin_lock_irqsave(db->lock,flags);
+       spin_lock_irqsave(&db->lock,flags);
  
         netif_stop_queue(dev);
         dm9000_reset(db);
@@ -333,7 +336,7 @@ static void dm9000_timeout(struct net_device *dev)
  
         /* Restore previous register address */
         writeb(reg_save, db->io_addr);
-       spin_unlock_irqrestore(db->lock,flags);
+       spin_unlock_irqrestore(&db->lock,flags);
  }
  
  
@@ -387,8 +390,6 @@ dm9000_probe(struct device *dev)
         int i;
         u32 id_val;
  
-       printk(KERN_INFO "%s Ethernet Driver\n", CARDNAME);
-
         /* Init network device */
         ndev = alloc_etherdev(sizeof (struct board_info));
         if (!ndev) {
@@ -405,6 +406,8 @@ dm9000_probe(struct device *dev)
         db = (struct board_info *) ndev->priv;
         memset(db, 0, sizeof (*db));
  
+       spin_lock_init(&db->lock);
+
         if (pdev->num_resources < 2) {
                 ret = -ENODEV;
                 goto out;
@@ -541,7 +544,6 @@ dm9000_probe(struct device *dev)
         ndev->stop               = &dm9000_stop;
         ndev->get_stats          = &dm9000_get_stats;
         ndev->set_multicast_list = &dm9000_hash_table;
-       ndev->do_ioctl           = &dm9000_do_ioctl;
  
  #ifdef DM9000_PROGRAM_EEPROM
         program_eeprom(db);
@@ -612,7 +614,7 @@ dm9000_open(struct net_device *dev)
  
         /* set and active a timer process */
         init_timer(&db->timer);
-       db->timer.expires  = DM9000_TIMER_WUT * 2;
+       db->timer.expires  = DM9000_TIMER_WUT;
         db->timer.data     = (unsigned long) dev;
         db->timer.function = &dm9000_timer;
         add_timer(&db->timer);
@@ -845,15 +847,6 @@ dm9000_get_stats(struct net_device *dev)
         return &db->stats;
  }
  
-/*
- *  Process the upper socket ioctl command
- */
-static int
-dm9000_do_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
-{
-       PRINTK1("entering %s\n",__FUNCTION__);
-       return 0;
-}
  
  /*
   *  A periodic timer routine
@@ -864,21 +857,11 @@ dm9000_timer(unsigned long data)
  {
         struct net_device *dev = (struct net_device *) data;
         board_info_t *db = (board_info_t *) dev->priv;
-       u8 reg_save;
-       unsigned long flags;
  
         PRINTK3("dm9000_timer()\n");
  
-       spin_lock_irqsave(db->lock,flags);
-       /* Save previous register address */
-       reg_save = readb(db->io_addr);
-
         mii_check_media(&db->mii, netif_msg_link(db), 0);
  
-       /* Restore previous register address */
-       writeb(reg_save, db->io_addr);
-       spin_unlock_irqrestore(db->lock,flags);
-
         /* Set timer again */
         db->timer.expires = DM9000_TIMER_WUT;
         add_timer(&db->timer);
@@ -1098,9 +1081,14 @@ dm9000_phy_read(struct net_device *dev, int phy_reg_unused, int reg)
  {
         board_info_t *db = (board_info_t *) dev->priv;
         unsigned long flags;
+       unsigned int reg_save;
         int ret;
  
         spin_lock_irqsave(&db->lock,flags);
+
+       /* Save previous register address */
+       reg_save = readb(db->io_addr);
+
         /* Fill the phyxcer register into REG_0C */
         iow(db, DM9000_EPAR, DM9000_PHY | reg);
  
@@ -1111,6 +1099,9 @@ dm9000_phy_read(struct net_device *dev, int phy_reg_unused, int reg)
         /* The read data keeps on REG_0D & REG_0E */
         ret = (ior(db, DM9000_EPDRH) << 8) | ior(db, DM9000_EPDRL);
  
+       /* restore the previous address */
+       writeb(reg_save, db->io_addr);
+
         spin_unlock_irqrestore(&db->lock,flags);
  
         return ret;
@@ -1124,9 +1115,13 @@ dm9000_phy_write(struct net_device *dev, int phyaddr_unused, int reg, int value)
  {
         board_info_t *db = (board_info_t *) dev->priv;
         unsigned long flags;
+       unsigned long reg_save;
  
         spin_lock_irqsave(&db->lock,flags);
  
+       /* Save previous register address */
+       reg_save = readb(db->io_addr);
+
         /* Fill the phyxcer register into REG_0C */
         iow(db, DM9000_EPAR, DM9000_PHY | reg);
  
@@ -1138,6 +1133,9 @@ dm9000_phy_write(struct net_device *dev, int phyaddr_unused, int reg, int value)
         udelay(500);            /* Wait write complete */
         iow(db, DM9000_EPCR, 0x0);      /* Clear phyxcer write command */
  
+       /* restore the previous address */
+       writeb(reg_save, db->io_addr);
+
         spin_unlock_irqrestore(&db->lock,flags);
  }
  
@@ -1202,6 +1200,8 @@ static struct device_driver dm9000_driver = {
  static int __init
  dm9000_init(void)
  {
+       printk(KERN_INFO "%s Ethernet Driver\n", CARDNAME);
+
         return driver_register(&dm9000_driver); /* search board and register */
  }
  
diff --git a/drivers/net/e1000/e1000_main.c b/drivers/net/e1000/e1000_main.c

index 5e5d2c3c7ce4a130fd0b72c014f72f903b5c597b..9b596e0bbf95cc04d73448a38098e9dc3f003bae 100644 (file)
--- a/drivers/net/e1000/e1000_main.c
+++ b/drivers/net/e1000/e1000_main.c
@@ -2767,7 +2767,7 @@ e1000_clean_tx_irq(struct e1000_adapter *adapter)
                                         "  next_to_use          <%x>\n"
                                         "  next_to_clean        <%x>\n"
                                         "buffer_info[next_to_clean]\n"
-                                       "  dma                  <%zx>\n"
+                                       "  dma                  <%llx>\n"
                                         "  time_stamp           <%lx>\n"
                                         "  next_to_watch        <%x>\n"
                                         "  jiffies              <%lx>\n"
@@ -2776,7 +2776,7 @@ e1000_clean_tx_irq(struct e1000_adapter *adapter)
                                 E1000_READ_REG(&adapter->hw, TDT),
                                 tx_ring->next_to_use,
                                 i,
-                               tx_ring->buffer_info[i].dma,
+                               (unsigned long long)tx_ring->buffer_info[i].dma,
                                 tx_ring->buffer_info[i].time_stamp,
                                 eop,
                                 jiffies,
@@ -3789,6 +3789,7 @@ e1000_netpoll(struct net_device *netdev)
         struct e1000_adapter *adapter = netdev_priv(netdev);
         disable_irq(adapter->pdev->irq);
         e1000_intr(adapter->pdev->irq, netdev, NULL);
+       e1000_clean_tx_irq(adapter);
         enable_irq(adapter->pdev->irq);
  }
  #endif
diff --git a/drivers/net/eepro100.c b/drivers/net/eepro100.c

index 1795425f512e1f789dbccea4492cc1d659ed2111..8c62ced2c9b2304a99808f07a26ecb61ee0ce169 100644 (file)
--- a/drivers/net/eepro100.c
+++ b/drivers/net/eepro100.c
@@ -1263,8 +1263,8 @@ speedo_init_rx_ring(struct net_device *dev)
         for (i = 0; i < RX_RING_SIZE; i++) {
                 struct sk_buff *skb;
                 skb = dev_alloc_skb(PKT_BUF_SZ + sizeof(struct RxFD));
-               /* XXX: do we really want to call this before the NULL check? --hch */
-               rx_align(skb);                  /* Align IP on 16 byte boundary */
+               if (skb)
+                       rx_align(skb);        /* Align IP on 16 byte boundary */
                 sp->rx_skbuff[i] = skb;
                 if (skb == NULL)
                         break;                  /* OK.  Just initially short of Rx bufs. */
@@ -1654,8 +1654,8 @@ static inline struct RxFD *speedo_rx_alloc(struct net_device *dev, int entry)
         struct sk_buff *skb;
         /* Get a fresh skbuff to replace the consumed one. */
         skb = dev_alloc_skb(PKT_BUF_SZ + sizeof(struct RxFD));
-       /* XXX: do we really want to call this before the NULL check? --hch */
-       rx_align(skb);                          /* Align IP on 16 byte boundary */
+       if (skb)
+               rx_align(skb);          /* Align IP on 16 byte boundary */
         sp->rx_skbuff[entry] = skb;
         if (skb == NULL) {
                 sp->rx_ringp[entry] = NULL;
diff --git a/drivers/net/forcedeth.c b/drivers/net/forcedeth.c

index 64f0f697c958ad399317fe7cc628bf93d9f73018..7d93948aec83410e2ba2100111b4e88bd74a4909 100644 (file)
--- a/drivers/net/forcedeth.c
+++ b/drivers/net/forcedeth.c
@@ -85,6 +85,16 @@
   *     0.33: 16 May 2005: Support for MCP51 added.
   *     0.34: 18 Jun 2005: Add DEV_NEED_LINKTIMER to all nForce nics.
   *     0.35: 26 Jun 2005: Support for MCP55 added.
+ *     0.36: 28 Jun 2005: Add jumbo frame support.
+ *     0.37: 10 Jul 2005: Additional ethtool support, cleanup of pci id list
+ *     0.38: 16 Jul 2005: tx irq rewrite: Use global flags instead of
+ *                        per-packet flags.
+ *      0.39: 18 Jul 2005: Add 64bit descriptor support.
+ *      0.40: 19 Jul 2005: Add support for mac address change.
+ *      0.41: 30 Jul 2005: Write back original MAC in nv_close instead
+ *                        of nv_remove
+ *      0.42: 06 Aug 2005: Fix lack of link speed initialization
+ *                        in the second (and later) nv_open call
   *
   * Known bugs:
   * We suspect that on some hardware no TX done interrupts are generated.
@@ -96,7 +106,7 @@
   * DEV_NEED_TIMERIRQ will not harm you on sane hardware, only generating a few
   * superfluous timer interrupts from the nic.
   */
-#define FORCEDETH_VERSION              "0.35"
+#define FORCEDETH_VERSION              "0.41"
  #define DRV_NAME                       "forcedeth"
  
  #include <linux/module.h>
@@ -131,11 +141,10 @@
   * Hardware access:
   */
  
-#define DEV_NEED_LASTPACKET1   0x0001  /* set LASTPACKET1 in tx flags */
-#define DEV_IRQMASK_1          0x0002  /* use NVREG_IRQMASK_WANTED_1 for irq mask */
-#define DEV_IRQMASK_2          0x0004  /* use NVREG_IRQMASK_WANTED_2 for irq mask */
-#define DEV_NEED_TIMERIRQ      0x0008  /* set the timer irq flag in the irq mask */
-#define DEV_NEED_LINKTIMER     0x0010  /* poll link settings. Relies on the timer irq */
+#define DEV_NEED_TIMERIRQ      0x0001  /* set the timer irq flag in the irq mask */
+#define DEV_NEED_LINKTIMER     0x0002  /* poll link settings. Relies on the timer irq */
+#define DEV_HAS_LARGEDESC      0x0004  /* device supports jumbo frames and needs packet format 2 */
+#define DEV_HAS_HIGH_DMA        0x0008  /* device supports 64bit dma */
  
  enum {
         NvRegIrqStatus = 0x000,
@@ -146,13 +155,16 @@ enum {
  #define NVREG_IRQ_RX                   0x0002
  #define NVREG_IRQ_RX_NOBUF             0x0004
  #define NVREG_IRQ_TX_ERR               0x0008
-#define NVREG_IRQ_TX2                  0x0010
+#define NVREG_IRQ_TX_OK                        0x0010
  #define NVREG_IRQ_TIMER                        0x0020
  #define NVREG_IRQ_LINK                 0x0040
+#define NVREG_IRQ_TX_ERROR             0x0080
  #define NVREG_IRQ_TX1                  0x0100
-#define NVREG_IRQMASK_WANTED_1         0x005f
-#define NVREG_IRQMASK_WANTED_2         0x0147
-#define NVREG_IRQ_UNKNOWN              (~(NVREG_IRQ_RX_ERROR|NVREG_IRQ_RX|NVREG_IRQ_RX_NOBUF|NVREG_IRQ_TX_ERR|NVREG_IRQ_TX2|NVREG_IRQ_TIMER|NVREG_IRQ_LINK|NVREG_IRQ_TX1))
+#define NVREG_IRQMASK_WANTED           0x00df
+
+#define NVREG_IRQ_UNKNOWN      (~(NVREG_IRQ_RX_ERROR|NVREG_IRQ_RX|NVREG_IRQ_RX_NOBUF|NVREG_IRQ_TX_ERR| \
+                                       NVREG_IRQ_TX_OK|NVREG_IRQ_TIMER|NVREG_IRQ_LINK|NVREG_IRQ_TX_ERROR| \
+                                       NVREG_IRQ_TX1))
  
         NvRegUnknownSetupReg6 = 0x008,
  #define NVREG_UNKSETUP6_VAL            3
@@ -286,6 +298,18 @@ struct ring_desc {
         u32 FlagLen;
  };
  
+struct ring_desc_ex {
+       u32 PacketBufferHigh;
+       u32 PacketBufferLow;
+       u32 Reserved;
+       u32 FlagLen;
+};
+
+typedef union _ring_type {
+       struct ring_desc* orig;
+       struct ring_desc_ex* ex;
+} ring_type;
+
  #define FLAG_MASK_V1 0xffff0000
  #define FLAG_MASK_V2 0xffffc000
  #define LEN_MASK_V1 (0xffffffff ^ FLAG_MASK_V1)
@@ -293,7 +317,7 @@ struct ring_desc {
  
  #define NV_TX_LASTPACKET       (1<<16)
  #define NV_TX_RETRYERROR       (1<<19)
-#define NV_TX_LASTPACKET1      (1<<24)
+#define NV_TX_FORCED_INTERRUPT (1<<24)
  #define NV_TX_DEFERRED         (1<<26)
  #define NV_TX_CARRIERLOST      (1<<27)
  #define NV_TX_LATECOLLISION    (1<<28)
@@ -303,7 +327,7 @@ struct ring_desc {
  
  #define NV_TX2_LASTPACKET      (1<<29)
  #define NV_TX2_RETRYERROR      (1<<18)
-#define NV_TX2_LASTPACKET1     (1<<23)
+#define NV_TX2_FORCED_INTERRUPT        (1<<30)
  #define NV_TX2_DEFERRED                (1<<25)
  #define NV_TX2_CARRIERLOST     (1<<26)
  #define NV_TX2_LATECOLLISION   (1<<27)
@@ -379,9 +403,13 @@ struct ring_desc {
  #define TX_LIMIT_START 62
  
  /* rx/tx mac addr + type + vlan + align + slack*/
-#define RX_NIC_BUFSIZE         (ETH_DATA_LEN + 64)
-/* even more slack */
-#define RX_ALLOC_BUFSIZE       (ETH_DATA_LEN + 128)
+#define NV_RX_HEADERS          (64)
+/* even more slack. */
+#define NV_RX_ALLOC_PAD                (64)
+
+/* maximum mtu size */
+#define NV_PKTLIMIT_1  ETH_DATA_LEN    /* hard limit not known */
+#define NV_PKTLIMIT_2  9100    /* Actual limit according to NVidia: 9202 */
  
  #define OOM_REFILL     (1+HZ/20)
  #define POLL_WAIT      (1+HZ/100)
@@ -396,6 +424,7 @@ struct ring_desc {
   */
  #define DESC_VER_1     0x0
  #define DESC_VER_2     (0x02100|NVREG_TXRXCTL_RXCHECK)
+#define DESC_VER_3      (0x02200|NVREG_TXRXCTL_RXCHECK)
  
  /* PHY defines */
  #define PHY_OUI_MARVELL        0x5043
@@ -468,11 +497,12 @@ struct fe_priv {
         /* rx specific fields.
          * Locking: Within irq hander or disable_irq+spin_lock(&np->lock);
          */
-       struct ring_desc *rx_ring;
+       ring_type rx_ring;
         unsigned int cur_rx, refill_rx;
         struct sk_buff *rx_skbuff[RX_RING];
         dma_addr_t rx_dma[RX_RING];
         unsigned int rx_buf_sz;
+       unsigned int pkt_limit;
         struct timer_list oom_kick;
         struct timer_list nic_poll;
  
@@ -484,7 +514,7 @@ struct fe_priv {
         /*
          * tx specific fields.
          */
-       struct ring_desc *tx_ring;
+       ring_type tx_ring;
         unsigned int next_tx, nic_tx;
         struct sk_buff *tx_skbuff[TX_RING];
         dma_addr_t tx_dma[TX_RING];
@@ -519,6 +549,11 @@ static inline u32 nv_descr_getlength(struct ring_desc *prd, u32 v)
                 & ((v == DESC_VER_1) ? LEN_MASK_V1 : LEN_MASK_V2);
  }
  
+static inline u32 nv_descr_getlength_ex(struct ring_desc_ex *prd, u32 v)
+{
+       return le32_to_cpu(prd->FlagLen) & LEN_MASK_V2;
+}
+
  static int reg_delay(struct net_device *dev, int offset, u32 mask, u32 target,
                                 int delay, int delaymax, const char *msg)
  {
@@ -792,7 +827,7 @@ static int nv_alloc_rx(struct net_device *dev)
                 nr = refill_rx % RX_RING;
                 if (np->rx_skbuff[nr] == NULL) {
  
-                       skb = dev_alloc_skb(RX_ALLOC_BUFSIZE);
+                       skb = dev_alloc_skb(np->rx_buf_sz + NV_RX_ALLOC_PAD);
                         if (!skb)
                                 break;
  
@@ -803,9 +838,16 @@ static int nv_alloc_rx(struct net_device *dev)
                 }
                 np->rx_dma[nr] = pci_map_single(np->pci_dev, skb->data, skb->len,
                                                 PCI_DMA_FROMDEVICE);
-               np->rx_ring[nr].PacketBuffer = cpu_to_le32(np->rx_dma[nr]);
-               wmb();
-               np->rx_ring[nr].FlagLen = cpu_to_le32(RX_NIC_BUFSIZE | NV_RX_AVAIL);
+               if (np->desc_ver == DESC_VER_1 || np->desc_ver == DESC_VER_2) {
+                       np->rx_ring.orig[nr].PacketBuffer = cpu_to_le32(np->rx_dma[nr]);
+                       wmb();
+                       np->rx_ring.orig[nr].FlagLen = cpu_to_le32(np->rx_buf_sz | NV_RX_AVAIL);
+               } else {
+                       np->rx_ring.ex[nr].PacketBufferHigh = cpu_to_le64(np->rx_dma[nr]) >> 32;
+                       np->rx_ring.ex[nr].PacketBufferLow = cpu_to_le64(np->rx_dma[nr]) & 0x0FFFFFFFF;
+                       wmb();
+                       np->rx_ring.ex[nr].FlagLen = cpu_to_le32(np->rx_buf_sz | NV_RX2_AVAIL);
+               }
                 dprintk(KERN_DEBUG "%s: nv_alloc_rx: Packet %d marked as Available\n",
                                         dev->name, refill_rx);
                 refill_rx++;
@@ -831,19 +873,37 @@ static void nv_do_rx_refill(unsigned long data)
         enable_irq(dev->irq);
  }
  
-static int nv_init_ring(struct net_device *dev)
+static void nv_init_rx(struct net_device *dev) 
  {
         struct fe_priv *np = get_nvpriv(dev);
         int i;
  
-       np->next_tx = np->nic_tx = 0;
-       for (i = 0; i < TX_RING; i++)
-               np->tx_ring[i].FlagLen = 0;
-
         np->cur_rx = RX_RING;
         np->refill_rx = 0;
         for (i = 0; i < RX_RING; i++)
-               np->rx_ring[i].FlagLen = 0;
+               if (np->desc_ver == DESC_VER_1 || np->desc_ver == DESC_VER_2)
+                       np->rx_ring.orig[i].FlagLen = 0;
+               else
+                       np->rx_ring.ex[i].FlagLen = 0;
+}
+
+static void nv_init_tx(struct net_device *dev)
+{
+       struct fe_priv *np = get_nvpriv(dev);
+       int i;
+
+       np->next_tx = np->nic_tx = 0;
+       for (i = 0; i < TX_RING; i++)
+               if (np->desc_ver == DESC_VER_1 || np->desc_ver == DESC_VER_2)
+                       np->tx_ring.orig[i].FlagLen = 0;
+               else
+                       np->tx_ring.ex[i].FlagLen = 0;
+}
+
+static int nv_init_ring(struct net_device *dev)
+{
+       nv_init_tx(dev);
+       nv_init_rx(dev);
         return nv_alloc_rx(dev);
  }
  
@@ -852,7 +912,10 @@ static void nv_drain_tx(struct net_device *dev)
         struct fe_priv *np = get_nvpriv(dev);
         int i;
         for (i = 0; i < TX_RING; i++) {
-               np->tx_ring[i].FlagLen = 0;
+               if (np->desc_ver == DESC_VER_1 || np->desc_ver == DESC_VER_2)
+                       np->tx_ring.orig[i].FlagLen = 0;
+               else
+                       np->tx_ring.ex[i].FlagLen = 0;
                 if (np->tx_skbuff[i]) {
                         pci_unmap_single(np->pci_dev, np->tx_dma[i],
                                                 np->tx_skbuff[i]->len,
@@ -869,7 +932,10 @@ static void nv_drain_rx(struct net_device *dev)
         struct fe_priv *np = get_nvpriv(dev);
         int i;
         for (i = 0; i < RX_RING; i++) {
-               np->rx_ring[i].FlagLen = 0;
+               if (np->desc_ver == DESC_VER_1 || np->desc_ver == DESC_VER_2)
+                       np->rx_ring.orig[i].FlagLen = 0;
+               else
+                       np->rx_ring.ex[i].FlagLen = 0;
                 wmb();
                 if (np->rx_skbuff[i]) {
                         pci_unmap_single(np->pci_dev, np->rx_dma[i],
@@ -900,11 +966,19 @@ static int nv_start_xmit(struct sk_buff *skb, struct net_device *dev)
         np->tx_dma[nr] = pci_map_single(np->pci_dev, skb->data,skb->len,
                                         PCI_DMA_TODEVICE);
  
-       np->tx_ring[nr].PacketBuffer = cpu_to_le32(np->tx_dma[nr]);
+       if (np->desc_ver == DESC_VER_1 || np->desc_ver == DESC_VER_2)
+               np->tx_ring.orig[nr].PacketBuffer = cpu_to_le32(np->tx_dma[nr]);
+       else {
+               np->tx_ring.ex[nr].PacketBufferHigh = cpu_to_le64(np->tx_dma[nr]) >> 32;
+               np->tx_ring.ex[nr].PacketBufferLow = cpu_to_le64(np->tx_dma[nr]) & 0x0FFFFFFFF;
+       }
  
         spin_lock_irq(&np->lock);
         wmb();
-       np->tx_ring[nr].FlagLen = cpu_to_le32( (skb->len-1) | np->tx_flags );
+       if (np->desc_ver == DESC_VER_1 || np->desc_ver == DESC_VER_2)
+               np->tx_ring.orig[nr].FlagLen = cpu_to_le32( (skb->len-1) | np->tx_flags );
+       else
+               np->tx_ring.ex[nr].FlagLen = cpu_to_le32( (skb->len-1) | np->tx_flags );
         dprintk(KERN_DEBUG "%s: nv_start_xmit: packet packet %d queued for transmission.\n",
                                 dev->name, np->next_tx);
         {
@@ -942,7 +1016,10 @@ static void nv_tx_done(struct net_device *dev)
         while (np->nic_tx != np->next_tx) {
                 i = np->nic_tx % TX_RING;
  
-               Flags = le32_to_cpu(np->tx_ring[i].FlagLen);
+               if (np->desc_ver == DESC_VER_1 || np->desc_ver == DESC_VER_2)
+                       Flags = le32_to_cpu(np->tx_ring.orig[i].FlagLen);
+               else
+                       Flags = le32_to_cpu(np->tx_ring.ex[i].FlagLen);
  
                 dprintk(KERN_DEBUG "%s: nv_tx_done: looking at packet %d, Flags 0x%x.\n",
                                         dev->name, np->nic_tx, Flags);
@@ -993,9 +1070,56 @@ static void nv_tx_timeout(struct net_device *dev)
         struct fe_priv *np = get_nvpriv(dev);
         u8 __iomem *base = get_hwbase(dev);
  
-       dprintk(KERN_DEBUG "%s: Got tx_timeout. irq: %08x\n", dev->name,
+       printk(KERN_INFO "%s: Got tx_timeout. irq: %08x\n", dev->name,
                         readl(base + NvRegIrqStatus) & NVREG_IRQSTAT_MASK);
  
+       {
+               int i;
+
+               printk(KERN_INFO "%s: Ring at %lx: next %d nic %d\n",
+                               dev->name, (unsigned long)np->ring_addr,
+                               np->next_tx, np->nic_tx);
+               printk(KERN_INFO "%s: Dumping tx registers\n", dev->name);
+               for (i=0;i<0x400;i+= 32) {
+                       printk(KERN_INFO "%3x: %08x %08x %08x %08x %08x %08x %08x %08x\n",
+                                       i,
+                                       readl(base + i + 0), readl(base + i + 4),
+                                       readl(base + i + 8), readl(base + i + 12),
+                                       readl(base + i + 16), readl(base + i + 20),
+                                       readl(base + i + 24), readl(base + i + 28));
+               }
+               printk(KERN_INFO "%s: Dumping tx ring\n", dev->name);
+               for (i=0;i<TX_RING;i+= 4) {
+                       if (np->desc_ver == DESC_VER_1 || np->desc_ver == DESC_VER_2) {
+                               printk(KERN_INFO "%03x: %08x %08x // %08x %08x // %08x %08x // %08x %08x\n",
+                                      i, 
+                                      le32_to_cpu(np->tx_ring.orig[i].PacketBuffer),
+                                      le32_to_cpu(np->tx_ring.orig[i].FlagLen),
+                                      le32_to_cpu(np->tx_ring.orig[i+1].PacketBuffer),
+                                      le32_to_cpu(np->tx_ring.orig[i+1].FlagLen),
+                                      le32_to_cpu(np->tx_ring.orig[i+2].PacketBuffer),
+                                      le32_to_cpu(np->tx_ring.orig[i+2].FlagLen),
+                                      le32_to_cpu(np->tx_ring.orig[i+3].PacketBuffer),
+                                      le32_to_cpu(np->tx_ring.orig[i+3].FlagLen));
+                       } else {
+                               printk(KERN_INFO "%03x: %08x %08x %08x // %08x %08x %08x // %08x %08x %08x // %08x %08x %08x\n",
+                                      i, 
+                                      le32_to_cpu(np->tx_ring.ex[i].PacketBufferHigh),
+                                      le32_to_cpu(np->tx_ring.ex[i].PacketBufferLow),
+                                      le32_to_cpu(np->tx_ring.ex[i].FlagLen),
+                                      le32_to_cpu(np->tx_ring.ex[i+1].PacketBufferHigh),
+                                      le32_to_cpu(np->tx_ring.ex[i+1].PacketBufferLow),
+                                      le32_to_cpu(np->tx_ring.ex[i+1].FlagLen),
+                                      le32_to_cpu(np->tx_ring.ex[i+2].PacketBufferHigh),
+                                      le32_to_cpu(np->tx_ring.ex[i+2].PacketBufferLow),
+                                      le32_to_cpu(np->tx_ring.ex[i+2].FlagLen),
+                                      le32_to_cpu(np->tx_ring.ex[i+3].PacketBufferHigh),
+                                      le32_to_cpu(np->tx_ring.ex[i+3].PacketBufferLow),
+                                      le32_to_cpu(np->tx_ring.ex[i+3].FlagLen));
+                       }
+               }
+       }
+
         spin_lock_irq(&np->lock);
  
         /* 1) stop tx engine */
@@ -1009,7 +1133,10 @@ static void nv_tx_timeout(struct net_device *dev)
                 printk(KERN_DEBUG "%s: tx_timeout: dead entries!\n", dev->name);
                 nv_drain_tx(dev);
                 np->next_tx = np->nic_tx = 0;
-               writel((u32) (np->ring_addr + RX_RING*sizeof(struct ring_desc)), base + NvRegTxRingPhysAddr);
+               if (np->desc_ver == DESC_VER_1 || np->desc_ver == DESC_VER_2)
+                       writel((u32) (np->ring_addr + RX_RING*sizeof(struct ring_desc)), base + NvRegTxRingPhysAddr);
+               else
+                       writel((u32) (np->ring_addr + RX_RING*sizeof(struct ring_desc_ex)), base + NvRegTxRingPhysAddr);
                 netif_wake_queue(dev);
         }
  
@@ -1084,8 +1211,13 @@ static void nv_rx_process(struct net_device *dev)
                         break;  /* we scanned the whole ring - do not continue */
  
                 i = np->cur_rx % RX_RING;
-               Flags = le32_to_cpu(np->rx_ring[i].FlagLen);
-               len = nv_descr_getlength(&np->rx_ring[i], np->desc_ver);
+               if (np->desc_ver == DESC_VER_1 || np->desc_ver == DESC_VER_2) {
+                       Flags = le32_to_cpu(np->rx_ring.orig[i].FlagLen);
+                       len = nv_descr_getlength(&np->rx_ring.orig[i], np->desc_ver);
+               } else {
+                       Flags = le32_to_cpu(np->rx_ring.ex[i].FlagLen);
+                       len = nv_descr_getlength_ex(&np->rx_ring.ex[i], np->desc_ver);
+               }
  
                 dprintk(KERN_DEBUG "%s: nv_rx_process: looking at packet %d, Flags 0x%x.\n",
                                         dev->name, np->cur_rx, Flags);
@@ -1207,15 +1339,133 @@ next_pkt:
         }
  }
  
+static void set_bufsize(struct net_device *dev)
+{
+       struct fe_priv *np = netdev_priv(dev);
+
+       if (dev->mtu <= ETH_DATA_LEN)
+               np->rx_buf_sz = ETH_DATA_LEN + NV_RX_HEADERS;
+       else
+               np->rx_buf_sz = dev->mtu + NV_RX_HEADERS;
+}
+
  /*
   * nv_change_mtu: dev->change_mtu function
   * Called with dev_base_lock held for read.
   */
  static int nv_change_mtu(struct net_device *dev, int new_mtu)
  {
-       if (new_mtu > ETH_DATA_LEN)
+       struct fe_priv *np = get_nvpriv(dev);
+       int old_mtu;
+
+       if (new_mtu < 64 || new_mtu > np->pkt_limit)
                 return -EINVAL;
+
+       old_mtu = dev->mtu;
         dev->mtu = new_mtu;
+
+       /* return early if the buffer sizes will not change */
+       if (old_mtu <= ETH_DATA_LEN && new_mtu <= ETH_DATA_LEN)
+               return 0;
+       if (old_mtu == new_mtu)
+               return 0;
+
+       /* synchronized against open : rtnl_lock() held by caller */
+       if (netif_running(dev)) {
+               u8 *base = get_hwbase(dev);
+               /*
+                * It seems that the nic preloads valid ring entries into an
+                * internal buffer. The procedure for flushing everything is
+                * guessed, there is probably a simpler approach.
+                * Changing the MTU is a rare event, it shouldn't matter.
+                */
+               disable_irq(dev->irq);
+               spin_lock_bh(&dev->xmit_lock);
+               spin_lock(&np->lock);
+               /* stop engines */
+               nv_stop_rx(dev);
+               nv_stop_tx(dev);
+               nv_txrx_reset(dev);
+               /* drain rx queue */
+               nv_drain_rx(dev);
+               nv_drain_tx(dev);
+               /* reinit driver view of the rx queue */
+               nv_init_rx(dev);
+               nv_init_tx(dev);
+               /* alloc new rx buffers */
+               set_bufsize(dev);
+               if (nv_alloc_rx(dev)) {
+                       if (!np->in_shutdown)
+                               mod_timer(&np->oom_kick, jiffies + OOM_REFILL);
+               }
+               /* reinit nic view of the rx queue */
+               writel(np->rx_buf_sz, base + NvRegOffloadConfig);
+               writel((u32) np->ring_addr, base + NvRegRxRingPhysAddr);
+               if (np->desc_ver == DESC_VER_1 || np->desc_ver == DESC_VER_2)
+                       writel((u32) (np->ring_addr + RX_RING*sizeof(struct ring_desc)), base + NvRegTxRingPhysAddr);
+               else
+                       writel((u32) (np->ring_addr + RX_RING*sizeof(struct ring_desc_ex)), base + NvRegTxRingPhysAddr);
+               writel( ((RX_RING-1) << NVREG_RINGSZ_RXSHIFT) + ((TX_RING-1) << NVREG_RINGSZ_TXSHIFT),
+                       base + NvRegRingSizes);
+               pci_push(base);
+               writel(NVREG_TXRXCTL_KICK|np->desc_ver, get_hwbase(dev) + NvRegTxRxControl);
+               pci_push(base);
+
+               /* restart rx engine */
+               nv_start_rx(dev);
+               nv_start_tx(dev);
+               spin_unlock(&np->lock);
+               spin_unlock_bh(&dev->xmit_lock);
+               enable_irq(dev->irq);
+       }
+       return 0;
+}
+
+static void nv_copy_mac_to_hw(struct net_device *dev)
+{
+       u8 *base = get_hwbase(dev);
+       u32 mac[2];
+
+       mac[0] = (dev->dev_addr[0] << 0) + (dev->dev_addr[1] << 8) +
+                       (dev->dev_addr[2] << 16) + (dev->dev_addr[3] << 24);
+       mac[1] = (dev->dev_addr[4] << 0) + (dev->dev_addr[5] << 8);
+
+       writel(mac[0], base + NvRegMacAddrA);
+       writel(mac[1], base + NvRegMacAddrB);
+}
+
+/*
+ * nv_set_mac_address: dev->set_mac_address function
+ * Called with rtnl_lock() held.
+ */
+static int nv_set_mac_address(struct net_device *dev, void *addr)
+{
+       struct fe_priv *np = get_nvpriv(dev);
+       struct sockaddr *macaddr = (struct sockaddr*)addr;
+
+       if(!is_valid_ether_addr(macaddr->sa_data))
+               return -EADDRNOTAVAIL;
+
+       /* synchronized against open : rtnl_lock() held by caller */
+       memcpy(dev->dev_addr, macaddr->sa_data, ETH_ALEN);
+
+       if (netif_running(dev)) {
+               spin_lock_bh(&dev->xmit_lock);
+               spin_lock_irq(&np->lock);
+
+               /* stop rx engine */
+               nv_stop_rx(dev);
+
+               /* set mac address */
+               nv_copy_mac_to_hw(dev);
+
+               /* restart rx engine */
+               nv_start_rx(dev);
+               spin_unlock_irq(&np->lock);
+               spin_unlock_bh(&dev->xmit_lock);
+       } else {
+               nv_copy_mac_to_hw(dev);
+       }
         return 0;
  }
  
@@ -1470,7 +1720,7 @@ static irqreturn_t nv_nic_irq(int foo, void *data, struct pt_regs *regs)
                 if (!(events & np->irqmask))
                         break;
  
-               if (events & (NVREG_IRQ_TX1|NVREG_IRQ_TX2|NVREG_IRQ_TX_ERR)) {
+               if (events & (NVREG_IRQ_TX1|NVREG_IRQ_TX_OK|NVREG_IRQ_TX_ERROR|NVREG_IRQ_TX_ERR)) {
                         spin_lock(&np->lock);
                         nv_tx_done(dev);
                         spin_unlock(&np->lock);
@@ -1761,6 +2011,50 @@ static int nv_set_settings(struct net_device *dev, struct ethtool_cmd *ecmd)
         return 0;
  }
  
+#define FORCEDETH_REGS_VER     1
+#define FORCEDETH_REGS_SIZE    0x400 /* 256 32-bit registers */
+
+static int nv_get_regs_len(struct net_device *dev)
+{
+       return FORCEDETH_REGS_SIZE;
+}
+
+static void nv_get_regs(struct net_device *dev, struct ethtool_regs *regs, void *buf)
+{
+       struct fe_priv *np = get_nvpriv(dev);
+       u8 __iomem *base = get_hwbase(dev);
+       u32 *rbuf = buf;
+       int i;
+
+       regs->version = FORCEDETH_REGS_VER;
+       spin_lock_irq(&np->lock);
+       for (i=0;i<FORCEDETH_REGS_SIZE/sizeof(u32);i++)
+               rbuf[i] = readl(base + i*sizeof(u32));
+       spin_unlock_irq(&np->lock);
+}
+
+static int nv_nway_reset(struct net_device *dev)
+{
+       struct fe_priv *np = get_nvpriv(dev);
+       int ret;
+
+       spin_lock_irq(&np->lock);
+       if (np->autoneg) {
+               int bmcr;
+
+               bmcr = mii_rw(dev, np->phyaddr, MII_BMCR, MII_READ);
+               bmcr |= (BMCR_ANENABLE | BMCR_ANRESTART);
+               mii_rw(dev, np->phyaddr, MII_BMCR, bmcr);
+
+               ret = 0;
+       } else {
+               ret = -EINVAL;
+       }
+       spin_unlock_irq(&np->lock);
+
+       return ret;
+}
+
  static struct ethtool_ops ops = {
         .get_drvinfo = nv_get_drvinfo,
         .get_link = ethtool_op_get_link,
@@ -1768,6 +2062,9 @@ static struct ethtool_ops ops = {
         .set_wol = nv_set_wol,
         .get_settings = nv_get_settings,
         .set_settings = nv_set_settings,
+       .get_regs_len = nv_get_regs_len,
+       .get_regs = nv_get_regs,
+       .nway_reset = nv_nway_reset,
  };
  
  static int nv_open(struct net_device *dev)
@@ -1792,6 +2089,7 @@ static int nv_open(struct net_device *dev)
         writel(0, base + NvRegAdapterControl);
  
         /* 2) initialize descriptor rings */
+       set_bufsize(dev);
         oom = nv_init_ring(dev);
  
         writel(0, base + NvRegLinkSpeed);
@@ -1802,20 +2100,14 @@ static int nv_open(struct net_device *dev)
         np->in_shutdown = 0;
  
         /* 3) set mac address */
-       {
-               u32 mac[2];
-
-               mac[0] = (dev->dev_addr[0] << 0) + (dev->dev_addr[1] << 8) +
-                               (dev->dev_addr[2] << 16) + (dev->dev_addr[3] << 24);
-               mac[1] = (dev->dev_addr[4] << 0) + (dev->dev_addr[5] << 8);
-
-               writel(mac[0], base + NvRegMacAddrA);
-               writel(mac[1], base + NvRegMacAddrB);
-       }
+       nv_copy_mac_to_hw(dev);
  
         /* 4) give hw rings */
         writel((u32) np->ring_addr, base + NvRegRxRingPhysAddr);
-       writel((u32) (np->ring_addr + RX_RING*sizeof(struct ring_desc)), base + NvRegTxRingPhysAddr);
+       if (np->desc_ver == DESC_VER_1 || np->desc_ver == DESC_VER_2)
+               writel((u32) (np->ring_addr + RX_RING*sizeof(struct ring_desc)), base + NvRegTxRingPhysAddr);
+       else
+               writel((u32) (np->ring_addr + RX_RING*sizeof(struct ring_desc_ex)), base + NvRegTxRingPhysAddr);
         writel( ((RX_RING-1) << NVREG_RINGSZ_RXSHIFT) + ((TX_RING-1) << NVREG_RINGSZ_TXSHIFT),
                 base + NvRegRingSizes);
  
@@ -1837,7 +2129,7 @@ static int nv_open(struct net_device *dev)
         writel(NVREG_MISC1_FORCE | NVREG_MISC1_HD, base + NvRegMisc1);
         writel(readl(base + NvRegTransmitterStatus), base + NvRegTransmitterStatus);
         writel(NVREG_PFF_ALWAYS, base + NvRegPacketFilterFlags);
-       writel(NVREG_OFFLOAD_NORMAL, base + NvRegOffloadConfig);
+       writel(np->rx_buf_sz, base + NvRegOffloadConfig);
  
         writel(readl(base + NvRegReceiverStatus), base + NvRegReceiverStatus);
         get_random_bytes(&i, sizeof(i));
@@ -1888,6 +2180,9 @@ static int nv_open(struct net_device *dev)
                 writel(NVREG_MIISTAT_MASK, base + NvRegMIIStatus);
                 dprintk(KERN_INFO "startup: got 0x%08x.\n", miistat);
         }
+       /* set linkspeed to invalid value, thus force nv_update_linkspeed
+        * to init hw */
+       np->linkspeed = 0;
         ret = nv_update_linkspeed(dev);
         nv_start_rx(dev);
         nv_start_tx(dev);
@@ -1942,6 +2237,12 @@ static int nv_close(struct net_device *dev)
         if (np->wolenabled)
                 nv_start_rx(dev);
  
+       /* special op: write back the misordered MAC address - otherwise
+        * the next nv_probe would see a wrong address.
+        */
+       writel(np->orig_mac[0], base + NvRegMacAddrA);
+       writel(np->orig_mac[1], base + NvRegMacAddrB);
+
         /* FIXME: power down nic */
  
         return 0;
@@ -2006,32 +2307,55 @@ static int __devinit nv_probe(struct pci_dev *pci_dev, const struct pci_device_i
         }
  
         /* handle different descriptor versions */
-       if (pci_dev->device == PCI_DEVICE_ID_NVIDIA_NVENET_1 ||
-               pci_dev->device == PCI_DEVICE_ID_NVIDIA_NVENET_2 ||
-               pci_dev->device == PCI_DEVICE_ID_NVIDIA_NVENET_3 ||    
-               pci_dev->device == PCI_DEVICE_ID_NVIDIA_NVENET_12 ||
-               pci_dev->device == PCI_DEVICE_ID_NVIDIA_NVENET_13)
-               np->desc_ver = DESC_VER_1;
-       else
+       if (id->driver_data & DEV_HAS_HIGH_DMA) {
+               /* packet format 3: supports 40-bit addressing */
+               np->desc_ver = DESC_VER_3;
+               if (pci_set_dma_mask(pci_dev, 0x0000007fffffffffULL)) {
+                       printk(KERN_INFO "forcedeth: 64-bit DMA failed, using 32-bit addressing for device %s.\n",
+                                       pci_name(pci_dev));
+               }
+       } else if (id->driver_data & DEV_HAS_LARGEDESC) {
+               /* packet format 2: supports jumbo frames */
                 np->desc_ver = DESC_VER_2;
+       } else {
+               /* original packet format */
+               np->desc_ver = DESC_VER_1;
+       }
+
+       np->pkt_limit = NV_PKTLIMIT_1;
+       if (id->driver_data & DEV_HAS_LARGEDESC)
+               np->pkt_limit = NV_PKTLIMIT_2;
  
         err = -ENOMEM;
         np->base = ioremap(addr, NV_PCI_REGSZ);
         if (!np->base)
                 goto out_relreg;
         dev->base_addr = (unsigned long)np->base;
+
         dev->irq = pci_dev->irq;
-       np->rx_ring = pci_alloc_consistent(pci_dev, sizeof(struct ring_desc) * (RX_RING + TX_RING),
-                                               &np->ring_addr);
-       if (!np->rx_ring)
-               goto out_unmap;
-       np->tx_ring = &np->rx_ring[RX_RING];
+
+       if (np->desc_ver == DESC_VER_1 || np->desc_ver == DESC_VER_2) {
+               np->rx_ring.orig = pci_alloc_consistent(pci_dev,
+                                       sizeof(struct ring_desc) * (RX_RING + TX_RING),
+                                       &np->ring_addr);
+               if (!np->rx_ring.orig)
+                       goto out_unmap;
+               np->tx_ring.orig = &np->rx_ring.orig[RX_RING];
+       } else {
+               np->rx_ring.ex = pci_alloc_consistent(pci_dev,
+                                       sizeof(struct ring_desc_ex) * (RX_RING + TX_RING),
+                                       &np->ring_addr);
+               if (!np->rx_ring.ex)
+                       goto out_unmap;
+               np->tx_ring.ex = &np->rx_ring.ex[RX_RING];
+       }
  
         dev->open = nv_open;
         dev->stop = nv_close;
         dev->hard_start_xmit = nv_start_xmit;
         dev->get_stats = nv_get_stats;
         dev->change_mtu = nv_change_mtu;
+       dev->set_mac_address = nv_set_mac_address;
         dev->set_multicast_list = nv_set_multicast;
  #ifdef CONFIG_NET_POLL_CONTROLLER
         dev->poll_controller = nv_poll_controller;
@@ -2080,17 +2404,10 @@ static int __devinit nv_probe(struct pci_dev *pci_dev, const struct pci_device_i
  
         if (np->desc_ver == DESC_VER_1) {
                 np->tx_flags = NV_TX_LASTPACKET|NV_TX_VALID;
-               if (id->driver_data & DEV_NEED_LASTPACKET1)
-                       np->tx_flags |= NV_TX_LASTPACKET1;
         } else {
                 np->tx_flags = NV_TX2_LASTPACKET|NV_TX2_VALID;
-               if (id->driver_data & DEV_NEED_LASTPACKET1)
-                       np->tx_flags |= NV_TX2_LASTPACKET1;
         }
-       if (id->driver_data & DEV_IRQMASK_1)
-               np->irqmask = NVREG_IRQMASK_WANTED_1;
-       if (id->driver_data & DEV_IRQMASK_2)
-               np->irqmask = NVREG_IRQMASK_WANTED_2;
+       np->irqmask = NVREG_IRQMASK_WANTED;
         if (id->driver_data & DEV_NEED_TIMERIRQ)
                 np->irqmask |= NVREG_IRQ_TIMER;
         if (id->driver_data & DEV_NEED_LINKTIMER) {
@@ -2155,8 +2472,12 @@ static int __devinit nv_probe(struct pci_dev *pci_dev, const struct pci_device_i
         return 0;
  
  out_freering:
-       pci_free_consistent(np->pci_dev, sizeof(struct ring_desc) * (RX_RING + TX_RING),
-                               np->rx_ring, np->ring_addr);
+       if (np->desc_ver == DESC_VER_1 || np->desc_ver == DESC_VER_2)
+               pci_free_consistent(np->pci_dev, sizeof(struct ring_desc) * (RX_RING + TX_RING),
+                                   np->rx_ring.orig, np->ring_addr);
+       else
+               pci_free_consistent(np->pci_dev, sizeof(struct ring_desc_ex) * (RX_RING + TX_RING),
+                                   np->rx_ring.ex, np->ring_addr);
         pci_set_drvdata(pci_dev, NULL);
  out_unmap:
         iounmap(get_hwbase(dev));
@@ -2174,18 +2495,14 @@ static void __devexit nv_remove(struct pci_dev *pci_dev)
  {
         struct net_device *dev = pci_get_drvdata(pci_dev);
         struct fe_priv *np = get_nvpriv(dev);
-       u8 __iomem *base = get_hwbase(dev);
  
         unregister_netdev(dev);
  
-       /* special op: write back the misordered MAC address - otherwise
-        * the next nv_probe would see a wrong address.
-        */
-       writel(np->orig_mac[0], base + NvRegMacAddrA);
-       writel(np->orig_mac[1], base + NvRegMacAddrB);
-
         /* free all structures */
-       pci_free_consistent(np->pci_dev, sizeof(struct ring_desc) * (RX_RING + TX_RING), np->rx_ring, np->ring_addr);
+       if (np->desc_ver == DESC_VER_1 || np->desc_ver == DESC_VER_2)
+               pci_free_consistent(np->pci_dev, sizeof(struct ring_desc) * (RX_RING + TX_RING), np->rx_ring.orig, np->ring_addr);
+       else
+               pci_free_consistent(np->pci_dev, sizeof(struct ring_desc_ex) * (RX_RING + TX_RING), np->rx_ring.ex, np->ring_addr);
         iounmap(get_hwbase(dev));
         pci_release_regions(pci_dev);
         pci_disable_device(pci_dev);
@@ -2195,109 +2512,64 @@ static void __devexit nv_remove(struct pci_dev *pci_dev)
  
  static struct pci_device_id pci_tbl[] = {
         {       /* nForce Ethernet Controller */
-               .vendor = PCI_VENDOR_ID_NVIDIA,
-               .device = PCI_DEVICE_ID_NVIDIA_NVENET_1,
-               .subvendor = PCI_ANY_ID,
-               .subdevice = PCI_ANY_ID,
-               .driver_data = DEV_IRQMASK_1|DEV_NEED_TIMERIRQ|DEV_NEED_LINKTIMER,
+               PCI_DEVICE(PCI_VENDOR_ID_NVIDIA, PCI_DEVICE_ID_NVIDIA_NVENET_1),
+               .driver_data = DEV_NEED_TIMERIRQ|DEV_NEED_LINKTIMER,
         },
         {       /* nForce2 Ethernet Controller */
-               .vendor = PCI_VENDOR_ID_NVIDIA,
-               .device = PCI_DEVICE_ID_NVIDIA_NVENET_2,
-               .subvendor = PCI_ANY_ID,
-               .subdevice = PCI_ANY_ID,
-               .driver_data = DEV_NEED_LASTPACKET1|DEV_IRQMASK_2|DEV_NEED_TIMERIRQ|DEV_NEED_LINKTIMER,
+               PCI_DEVICE(PCI_VENDOR_ID_NVIDIA, PCI_DEVICE_ID_NVIDIA_NVENET_2),
+               .driver_data = DEV_NEED_TIMERIRQ|DEV_NEED_LINKTIMER,
         },
         {       /* nForce3 Ethernet Controller */
-               .vendor = PCI_VENDOR_ID_NVIDIA,
-               .device = PCI_DEVICE_ID_NVIDIA_NVENET_3,
-               .subvendor = PCI_ANY_ID,
-               .subdevice = PCI_ANY_ID,
-               .driver_data = DEV_NEED_LASTPACKET1|DEV_IRQMASK_2|DEV_NEED_TIMERIRQ|DEV_NEED_LINKTIMER,
+               PCI_DEVICE(PCI_VENDOR_ID_NVIDIA, PCI_DEVICE_ID_NVIDIA_NVENET_3),
+               .driver_data = DEV_NEED_TIMERIRQ|DEV_NEED_LINKTIMER,
         },
         {       /* nForce3 Ethernet Controller */
-               .vendor = PCI_VENDOR_ID_NVIDIA,
-               .device = PCI_DEVICE_ID_NVIDIA_NVENET_4,
-               .subvendor = PCI_ANY_ID,
-               .subdevice = PCI_ANY_ID,
-               .driver_data = DEV_NEED_LASTPACKET1|DEV_IRQMASK_2|DEV_NEED_TIMERIRQ|DEV_NEED_LINKTIMER,
+               PCI_DEVICE(PCI_VENDOR_ID_NVIDIA, PCI_DEVICE_ID_NVIDIA_NVENET_4),
+               .driver_data = DEV_NEED_TIMERIRQ|DEV_NEED_LINKTIMER|DEV_HAS_LARGEDESC,
         },
         {       /* nForce3 Ethernet Controller */
-               .vendor = PCI_VENDOR_ID_NVIDIA,
-               .device = PCI_DEVICE_ID_NVIDIA_NVENET_5,
-               .subvendor = PCI_ANY_ID,
-               .subdevice = PCI_ANY_ID,
-               .driver_data = DEV_NEED_LASTPACKET1|DEV_IRQMASK_2|DEV_NEED_TIMERIRQ|DEV_NEED_LINKTIMER,
+               PCI_DEVICE(PCI_VENDOR_ID_NVIDIA, PCI_DEVICE_ID_NVIDIA_NVENET_5),
+               .driver_data = DEV_NEED_TIMERIRQ|DEV_NEED_LINKTIMER|DEV_HAS_LARGEDESC,
         },
         {       /* nForce3 Ethernet Controller */
-               .vendor = PCI_VENDOR_ID_NVIDIA,
-               .device = PCI_DEVICE_ID_NVIDIA_NVENET_6,
-               .subvendor = PCI_ANY_ID,
-               .subdevice = PCI_ANY_ID,
-               .driver_data = DEV_NEED_LASTPACKET1|DEV_IRQMASK_2|DEV_NEED_TIMERIRQ|DEV_NEED_LINKTIMER,
+               PCI_DEVICE(PCI_VENDOR_ID_NVIDIA, PCI_DEVICE_ID_NVIDIA_NVENET_6),
+               .driver_data = DEV_NEED_TIMERIRQ|DEV_NEED_LINKTIMER|DEV_HAS_LARGEDESC,
         },
         {       /* nForce3 Ethernet Controller */
-               .vendor = PCI_VENDOR_ID_NVIDIA,
-               .device = PCI_DEVICE_ID_NVIDIA_NVENET_7,
-               .subvendor = PCI_ANY_ID,
-               .subdevice = PCI_ANY_ID,
-               .driver_data = DEV_NEED_LASTPACKET1|DEV_IRQMASK_2|DEV_NEED_TIMERIRQ|DEV_NEED_LINKTIMER,
+               PCI_DEVICE(PCI_VENDOR_ID_NVIDIA, PCI_DEVICE_ID_NVIDIA_NVENET_7),
+               .driver_data = DEV_NEED_TIMERIRQ|DEV_NEED_LINKTIMER|DEV_HAS_LARGEDESC,
         },
         {       /* CK804 Ethernet Controller */
-               .vendor = PCI_VENDOR_ID_NVIDIA,
-               .device = PCI_DEVICE_ID_NVIDIA_NVENET_8,
-               .subvendor = PCI_ANY_ID,
-               .subdevice = PCI_ANY_ID,
-               .driver_data = DEV_NEED_LASTPACKET1|DEV_IRQMASK_2|DEV_NEED_TIMERIRQ|DEV_NEED_LINKTIMER,
+               PCI_DEVICE(PCI_VENDOR_ID_NVIDIA, PCI_DEVICE_ID_NVIDIA_NVENET_8),
+               .driver_data = DEV_NEED_TIMERIRQ|DEV_NEED_LINKTIMER|DEV_HAS_LARGEDESC|DEV_HAS_HIGH_DMA,
         },
         {       /* CK804 Ethernet Controller */
-               .vendor = PCI_VENDOR_ID_NVIDIA,
-               .device = PCI_DEVICE_ID_NVIDIA_NVENET_9,
-               .subvendor = PCI_ANY_ID,
-               .subdevice = PCI_ANY_ID,
-               .driver_data = DEV_NEED_LASTPACKET1|DEV_IRQMASK_2|DEV_NEED_TIMERIRQ|DEV_NEED_LINKTIMER,
+               PCI_DEVICE(PCI_VENDOR_ID_NVIDIA, PCI_DEVICE_ID_NVIDIA_NVENET_9),
+               .driver_data = DEV_NEED_TIMERIRQ|DEV_NEED_LINKTIMER|DEV_HAS_LARGEDESC|DEV_HAS_HIGH_DMA,
         },
         {       /* MCP04 Ethernet Controller */
-               .vendor = PCI_VENDOR_ID_NVIDIA,
-               .device = PCI_DEVICE_ID_NVIDIA_NVENET_10,
-               .subvendor = PCI_ANY_ID,
-               .subdevice = PCI_ANY_ID,
-               .driver_data = DEV_NEED_LASTPACKET1|DEV_IRQMASK_2|DEV_NEED_TIMERIRQ|DEV_NEED_LINKTIMER,
+               PCI_DEVICE(PCI_VENDOR_ID_NVIDIA, PCI_DEVICE_ID_NVIDIA_NVENET_10),
+               .driver_data = DEV_NEED_TIMERIRQ|DEV_NEED_LINKTIMER|DEV_HAS_LARGEDESC|DEV_HAS_HIGH_DMA,
         },
         {       /* MCP04 Ethernet Controller */
-               .vendor = PCI_VENDOR_ID_NVIDIA,
-               .device = PCI_DEVICE_ID_NVIDIA_NVENET_11,
-               .subvendor = PCI_ANY_ID,
-               .subdevice = PCI_ANY_ID,
-               .driver_data = DEV_NEED_LASTPACKET1|DEV_IRQMASK_2|DEV_NEED_TIMERIRQ|DEV_NEED_LINKTIMER,
+               PCI_DEVICE(PCI_VENDOR_ID_NVIDIA, PCI_DEVICE_ID_NVIDIA_NVENET_11),
+               .driver_data = DEV_NEED_TIMERIRQ|DEV_NEED_LINKTIMER|DEV_HAS_LARGEDESC|DEV_HAS_HIGH_DMA,
         },
         {       /* MCP51 Ethernet Controller */
-               .vendor = PCI_VENDOR_ID_NVIDIA,
-               .device = PCI_DEVICE_ID_NVIDIA_NVENET_12,
-               .subvendor = PCI_ANY_ID,
-               .subdevice = PCI_ANY_ID,
-               .driver_data = DEV_NEED_LASTPACKET1|DEV_IRQMASK_2|DEV_NEED_TIMERIRQ|DEV_NEED_LINKTIMER,
+               PCI_DEVICE(PCI_VENDOR_ID_NVIDIA, PCI_DEVICE_ID_NVIDIA_NVENET_12),
+               .driver_data = DEV_NEED_TIMERIRQ|DEV_NEED_LINKTIMER|DEV_HAS_HIGH_DMA,
         },
         {       /* MCP51 Ethernet Controller */
-               .vendor = PCI_VENDOR_ID_NVIDIA,
-               .device = PCI_DEVICE_ID_NVIDIA_NVENET_13,
-               .subvendor = PCI_ANY_ID,
-               .subdevice = PCI_ANY_ID,
-               .driver_data = DEV_NEED_LASTPACKET1|DEV_IRQMASK_2|DEV_NEED_TIMERIRQ|DEV_NEED_LINKTIMER,
+               PCI_DEVICE(PCI_VENDOR_ID_NVIDIA, PCI_DEVICE_ID_NVIDIA_NVENET_13),
+               .driver_data = DEV_NEED_TIMERIRQ|DEV_NEED_LINKTIMER|DEV_HAS_HIGH_DMA,
         },
         {       /* MCP55 Ethernet Controller */
-               .vendor = PCI_VENDOR_ID_NVIDIA,
-               .device = PCI_DEVICE_ID_NVIDIA_NVENET_14,
-               .subvendor = PCI_ANY_ID,
-               .subdevice = PCI_ANY_ID,
-               .driver_data = DEV_NEED_LASTPACKET1|DEV_IRQMASK_2|DEV_NEED_TIMERIRQ|DEV_NEED_LINKTIMER,
+               PCI_DEVICE(PCI_VENDOR_ID_NVIDIA, PCI_DEVICE_ID_NVIDIA_NVENET_14),
+               .driver_data = DEV_NEED_TIMERIRQ|DEV_NEED_LINKTIMER|DEV_HAS_LARGEDESC|DEV_HAS_HIGH_DMA,
         },
         {       /* MCP55 Ethernet Controller */
-               .vendor = PCI_VENDOR_ID_NVIDIA,
-               .device = PCI_DEVICE_ID_NVIDIA_NVENET_15,
-               .subvendor = PCI_ANY_ID,
-               .subdevice = PCI_ANY_ID,
-               .driver_data = DEV_NEED_LASTPACKET1|DEV_IRQMASK_2|DEV_NEED_TIMERIRQ|DEV_NEED_LINKTIMER,
+               PCI_DEVICE(PCI_VENDOR_ID_NVIDIA, PCI_DEVICE_ID_NVIDIA_NVENET_15),
+               .driver_data = DEV_NEED_TIMERIRQ|DEV_NEED_LINKTIMER|DEV_HAS_LARGEDESC|DEV_HAS_HIGH_DMA,
         },
         {0,},
  };
diff --git a/drivers/net/hamradio/6pack.c b/drivers/net/hamradio/6pack.c

index e44f8e9055ef0c6e82a1d908ae70ecbfa3f678d3..0b230222bfeab592fa23ec7836accddc79df7a62 100644 (file)
--- a/drivers/net/hamradio/6pack.c
+++ b/drivers/net/hamradio/6pack.c
@@ -130,12 +130,11 @@ struct sixpack {
  
  #define AX25_6PACK_HEADER_LEN 0
  
-static void sp_start_tx_timer(struct sixpack *);
  static void sixpack_decode(struct sixpack *, unsigned char[], int);
  static int encode_sixpack(unsigned char *, unsigned char *, int, unsigned char);
  
  /*
- * perform the persistence/slottime algorithm for CSMA access. If the
+ * Perform the persistence/slottime algorithm for CSMA access. If the
   * persistence check was successful, write the data to the serial driver.
   * Note that in case of DAMA operation, the data is not sent here.
   */
@@ -143,7 +142,7 @@ static int encode_sixpack(unsigned char *, unsigned char *, int, unsigned char);
  static void sp_xmit_on_air(unsigned long channel)
  {
         struct sixpack *sp = (struct sixpack *) channel;
-       int actual;
+       int actual, when = sp->slottime;
         static unsigned char random;
  
         random = random * 17 + 41;
@@ -159,20 +158,10 @@ static void sp_xmit_on_air(unsigned long channel)
                 sp->tty->driver->write(sp->tty, &sp->led_state, 1);
                 sp->status2 = 0;
         } else
-               sp_start_tx_timer(sp);
+               mod_timer(&sp->tx_t, jiffies + ((when + 1) * HZ) / 100);
  }
  
  /* ----> 6pack timer interrupt handler and friends. <---- */
-static void sp_start_tx_timer(struct sixpack *sp)
-{
-       int when = sp->slottime;
-
-       del_timer(&sp->tx_t);
-       sp->tx_t.data = (unsigned long) sp;
-       sp->tx_t.function = sp_xmit_on_air;
-       sp->tx_t.expires = jiffies + ((when + 1) * HZ) / 100;
-       add_timer(&sp->tx_t);
-}
  
  /* Encapsulate one AX.25 frame and stuff into a TTY queue. */
  static void sp_encaps(struct sixpack *sp, unsigned char *icp, int len)
@@ -243,8 +232,7 @@ static void sp_encaps(struct sixpack *sp, unsigned char *icp, int len)
                 sp->xleft = count;
                 sp->xhead = sp->xbuff;
                 sp->status2 = count;
-               if (sp->duplex == 0)
-                       sp_start_tx_timer(sp);
+               sp_xmit_on_air((unsigned long)sp);
         }
  
         return;
@@ -320,12 +308,6 @@ static int sp_set_mac_address(struct net_device *dev, void *addr)
  {
         struct sockaddr_ax25 *sa = addr;
  
-       if (sa->sax25_family != AF_AX25)
-               return -EINVAL;
-
-       if (!sa->sax25_ndigis)
-               return -EINVAL;
-
         spin_lock_irq(&dev->xmit_lock);
         memcpy(dev->dev_addr, &sa->sax25_call, AX25_ADDR_LEN);
         spin_unlock_irq(&dev->xmit_lock);
@@ -680,6 +662,9 @@ static int sixpack_open(struct tty_struct *tty)
         netif_start_queue(dev);
  
         init_timer(&sp->tx_t);
+       sp->tx_t.function = sp_xmit_on_air;
+       sp->tx_t.data = (unsigned long) sp;
+
         init_timer(&sp->resync_t);
  
         spin_unlock_bh(&sp->lock);
diff --git a/drivers/net/hamradio/Kconfig b/drivers/net/hamradio/Kconfig

index 7cdebe1a0b6198264f06cbb13020132d41ca40d4..de087cd609d98fcf916c638187ed0df9b0e9eb59 100644 (file)
--- a/drivers/net/hamradio/Kconfig
+++ b/drivers/net/hamradio/Kconfig
@@ -1,6 +1,6 @@
  config MKISS
         tristate "Serial port KISS driver"
-       depends on AX25 && BROKEN_ON_SMP
+       depends on AX25
         ---help---
           KISS is a protocol used for the exchange of data between a computer
           and a Terminal Node Controller (a small embedded system commonly
@@ -17,7 +17,7 @@ config MKISS
  
  config 6PACK
         tristate "Serial port 6PACK driver"
-       depends on AX25 && BROKEN_ON_SMP
+       depends on AX25
         ---help---
           6pack is a transmission protocol for the data exchange between your
           PC and your TNC (the Terminal Node Controller acts as a kind of
diff --git a/drivers/net/hamradio/baycom_epp.c b/drivers/net/hamradio/baycom_epp.c

index a7f15d9f13e5ccd04faa36b6dda826c20e750fbb..5298096afbdb6efc7a9031ec2d746a0e3438772f 100644 (file)
--- a/drivers/net/hamradio/baycom_epp.c
+++ b/drivers/net/hamradio/baycom_epp.c
@@ -54,6 +54,7 @@
  #include <linux/kmod.h>
  #include <linux/hdlcdrv.h>
  #include <linux/baycom.h>
+#include <linux/jiffies.h>
  #if defined(CONFIG_AX25) || defined(CONFIG_AX25_MODULE)
  /* prototypes for ax25_encapsulate and ax25_rebuild_header */
  #include <net/ax25.h> 
@@ -287,7 +288,7 @@ static inline void baycom_int_freq(struct baycom_state *bc)
          * measure the interrupt frequency
          */
         bc->debug_vals.cur_intcnt++;
-       if ((cur_jiffies - bc->debug_vals.last_jiffies) >= HZ) {
+       if (time_after_eq(cur_jiffies, bc->debug_vals.last_jiffies + HZ)) {
                 bc->debug_vals.last_jiffies = cur_jiffies;
                 bc->debug_vals.last_intcnt = bc->debug_vals.cur_intcnt;
                 bc->debug_vals.cur_intcnt = 0;
diff --git a/drivers/net/hamradio/baycom_par.c b/drivers/net/hamradio/baycom_par.c

index 612ad452bee03b32d0dc0188907b37face5c7a9a..3b1bef1ee21507ec9a453382ed94810d0d5053ce 100644 (file)
--- a/drivers/net/hamradio/baycom_par.c
+++ b/drivers/net/hamradio/baycom_par.c
@@ -84,6 +84,7 @@
  #include <linux/baycom.h>
  #include <linux/parport.h>
  #include <linux/bitops.h>
+#include <linux/jiffies.h>
  
  #include <asm/bug.h>
  #include <asm/system.h>
@@ -165,7 +166,7 @@ static void __inline__ baycom_int_freq(struct baycom_state *bc)
          * measure the interrupt frequency
          */
         bc->debug_vals.cur_intcnt++;
-       if ((cur_jiffies - bc->debug_vals.last_jiffies) >= HZ) {
+       if (time_after_eq(cur_jiffies, bc->debug_vals.last_jiffies + HZ)) {
                 bc->debug_vals.last_jiffies = cur_jiffies;
                 bc->debug_vals.last_intcnt = bc->debug_vals.cur_intcnt;
                 bc->debug_vals.cur_intcnt = 0;
diff --git a/drivers/net/hamradio/baycom_ser_fdx.c b/drivers/net/hamradio/baycom_ser_fdx.c

index 25f270b053788cbcb7b87265cab9036ca8add64a..232793d2ce6b0a46bf7d4a1a04e6683ac7624d14 100644 (file)
--- a/drivers/net/hamradio/baycom_ser_fdx.c
+++ b/drivers/net/hamradio/baycom_ser_fdx.c
@@ -79,6 +79,7 @@
  #include <asm/io.h>
  #include <linux/hdlcdrv.h>
  #include <linux/baycom.h>
+#include <linux/jiffies.h>
  
  /* --------------------------------------------------------------------- */
  
@@ -159,7 +160,7 @@ static inline void baycom_int_freq(struct baycom_state *bc)
          * measure the interrupt frequency
          */
         bc->debug_vals.cur_intcnt++;
-       if ((cur_jiffies - bc->debug_vals.last_jiffies) >= HZ) {
+       if (time_after_eq(cur_jiffies, bc->debug_vals.last_jiffies + HZ)) {
                 bc->debug_vals.last_jiffies = cur_jiffies;
                 bc->debug_vals.last_intcnt = bc->debug_vals.cur_intcnt;
                 bc->debug_vals.cur_intcnt = 0;
diff --git a/drivers/net/hamradio/baycom_ser_hdx.c b/drivers/net/hamradio/baycom_ser_hdx.c

index eead85d009627980818e9c66b8420deca995ec46..be596a3eb3fd9b164b6107410498d4dde7a76ace 100644 (file)
--- a/drivers/net/hamradio/baycom_ser_hdx.c
+++ b/drivers/net/hamradio/baycom_ser_hdx.c
@@ -69,6 +69,7 @@
  #include <asm/io.h>
  #include <linux/hdlcdrv.h>
  #include <linux/baycom.h>
+#include <linux/jiffies.h>
  
  /* --------------------------------------------------------------------- */
  
@@ -150,7 +151,7 @@ static inline void baycom_int_freq(struct baycom_state *bc)
          * measure the interrupt frequency
          */
         bc->debug_vals.cur_intcnt++;
-       if ((cur_jiffies - bc->debug_vals.last_jiffies) >= HZ) {
+       if (time_after_eq(cur_jiffies, bc->debug_vals.last_jiffies + HZ)) {
                 bc->debug_vals.last_jiffies = cur_jiffies;
                 bc->debug_vals.last_intcnt = bc->debug_vals.cur_intcnt;
                 bc->debug_vals.cur_intcnt = 0;
diff --git a/drivers/net/hamradio/bpqether.c b/drivers/net/hamradio/bpqether.c

index ba9f0580e1f9be97d9398bcfdccc6d7aa1754789..2946e037a9b1f1e4bab304c2982a0575bf07c69d 100644 (file)
--- a/drivers/net/hamradio/bpqether.c
+++ b/drivers/net/hamradio/bpqether.c
@@ -98,7 +98,7 @@ static char bcast_addr[6]={0xFF,0xFF,0xFF,0xFF,0xFF,0xFF};
  
  static char bpq_eth_addr[6];
  
-static int bpq_rcv(struct sk_buff *, struct net_device *, struct packet_type *);
+static int bpq_rcv(struct sk_buff *, struct net_device *, struct packet_type *, struct net_device *);
  static int bpq_device_event(struct notifier_block *, unsigned long, void *);
  static const char *bpq_print_ethaddr(const unsigned char *);
  
@@ -165,7 +165,7 @@ static inline int dev_is_ethdev(struct net_device *dev)
  /*
   *     Receive an AX.25 frame via an ethernet interface.
   */
-static int bpq_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *ptype)
+static int bpq_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *ptype, struct net_device *orig_dev)
  {
         int len;
         char * ptr;
diff --git a/drivers/net/hamradio/mkiss.c b/drivers/net/hamradio/mkiss.c

index 3035422f5ad8c867dfdfaeb151f7e05647c572ca..63b1a2b86acb19650fdee9e84efb2e86e7971bed 100644 (file)
--- a/drivers/net/hamradio/mkiss.c
+++ b/drivers/net/hamradio/mkiss.c
@@ -1,30 +1,19 @@
  /*
- *     MKISS Driver
+ *  This program is free software; you can distribute it and/or modify it
+ *  under the terms of the GNU General Public License (Version 2) as
+ *  published by the Free Software Foundation.
   *
- *     This module:
- *             This module is free software; you can redistribute it and/or
- *             modify it under the terms of the GNU General Public License
- *             as published by the Free Software Foundation; either version
- *             2 of the License, or (at your option) any later version.
+ *  This program is distributed in the hope it will be useful, but WITHOUT
+ *  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ *  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ *  for more details.
   *
- *             This module implements the AX.25 protocol for kernel-based
- *             devices like TTYs. It interfaces between a raw TTY, and the
- *             kernel's AX.25 protocol layers, just like slip.c.
- *             AX.25 needs to be separated from slip.c while slip.c is no
- *             longer a static kernel device since it is a module.
- *             This method clears the way to implement other kiss protocols
- *             like mkiss smack g8bpq ..... so far only mkiss is implemented.
+ *  You should have received a copy of the GNU General Public License along
+ *  with this program; if not, write to the Free Software Foundation, Inc.,
+ *  59 Temple Place - Suite 330, Boston MA 02111-1307, USA.
   *
- * Hans Alblas <hans@esrac.ele.tue.nl>
- *
- *     History
- *     Jonathan (G4KLX)        Fixed to match Linux networking changes - 2.1.15.
- *     Matthias (DG2FEF)       Added support for FlexNet CRC (on special request)
- *                              Fixed bug in ax25_close(): dev_lock_wait() was
- *                              called twice, causing a deadlock.
- *     Jeroen (PE1RXQ)         Removed old MKISS_MAGIC stuff and calls to
- *                             MOD_*_USE_COUNT
- *                             Remove cli() and fix rtnl lock usage.
+ * Copyright (C) Hans Alblas PE1AYX <hans@esrac.ele.tue.nl>
+ * Copyright (C) 2004, 05 Ralf Baechle DL5RB <ralf@linux-mips.org>
   */
  
  #include <linux/config.h>
@@ -46,177 +35,300 @@
  #include <linux/etherdevice.h>
  #include <linux/skbuff.h>
  #include <linux/if_arp.h>
+#include <linux/jiffies.h>
  
  #include <net/ax25.h>
  
-#include "mkiss.h"
-
  #ifdef CONFIG_INET
  #include <linux/ip.h>
  #include <linux/tcp.h>
  #endif
  
-static char banner[] __initdata = KERN_INFO "mkiss: AX.25 Multikiss, Hans Albas PE1AYX\n";
-
-typedef struct ax25_ctrl {
-       struct ax_disp ctrl;    /*                              */
-       struct net_device  dev; /* the device                   */
-} ax25_ctrl_t;
-
-static ax25_ctrl_t **ax25_ctrls;
-
-int ax25_maxdev = AX25_MAXDEV;         /* Can be overridden with insmod! */
-
-static struct tty_ldisc        ax_ldisc;
-
-static int ax25_init(struct net_device *);
-static int kiss_esc(unsigned char *, unsigned char *, int);
-static int kiss_esc_crc(unsigned char *, unsigned char *, unsigned short, int);
-static void kiss_unesc(struct ax_disp *, unsigned char);
+#define AX_MTU         236
+
+/* SLIP/KISS protocol characters. */
+#define END             0300           /* indicates end of frame       */
+#define ESC             0333           /* indicates byte stuffing      */
+#define ESC_END         0334           /* ESC ESC_END means END 'data' */
+#define ESC_ESC         0335           /* ESC ESC_ESC means ESC 'data' */
+
+struct mkiss {
+       struct tty_struct       *tty;   /* ptr to TTY structure         */
+       struct net_device       *dev;   /* easy for intr handling       */
+
+       /* These are pointers to the malloc()ed frame buffers. */
+       spinlock_t              buflock;/* lock for rbuf and xbuf */
+       unsigned char           *rbuff; /* receiver buffer              */
+       int                     rcount; /* received chars counter       */
+       unsigned char           *xbuff; /* transmitter buffer           */
+       unsigned char           *xhead; /* pointer to next byte to XMIT */
+       int                     xleft;  /* bytes left in XMIT queue     */
+
+       struct net_device_stats stats;
+
+       /* Detailed SLIP statistics. */
+       int             mtu;            /* Our mtu (to spot changes!)   */
+       int             buffsize;       /* Max buffers sizes            */
+
+       unsigned long   flags;          /* Flag values/ mode etc        */
+                                       /* long req'd: used by set_bit --RR */
+#define AXF_INUSE      0               /* Channel in use               */
+#define AXF_ESCAPE     1               /* ESC received                 */
+#define AXF_ERROR      2               /* Parity, etc. error           */
+#define AXF_KEEPTEST   3               /* Keepalive test flag          */
+#define AXF_OUTWAIT    4               /* is outpacket was flag        */
+
+       int             mode;
+        int            crcmode;        /* MW: for FlexNet, SMACK etc.  */
+#define CRC_MODE_NONE   0
+#define CRC_MODE_FLEX   1
+#define CRC_MODE_SMACK  2
+
+       atomic_t                refcnt;
+       struct semaphore        dead_sem;
+};
  
  /*---------------------------------------------------------------------------*/
  
-static const unsigned short Crc_flex_table[] = {
-  0x0f87, 0x1e0e, 0x2c95, 0x3d1c, 0x49a3, 0x582a, 0x6ab1, 0x7b38,
-  0x83cf, 0x9246, 0xa0dd, 0xb154, 0xc5eb, 0xd462, 0xe6f9, 0xf770,
-  0x1f06, 0x0e8f, 0x3c14, 0x2d9d, 0x5922, 0x48ab, 0x7a30, 0x6bb9,
-  0x934e, 0x82c7, 0xb05c, 0xa1d5, 0xd56a, 0xc4e3, 0xf678, 0xe7f1,
-  0x2e85, 0x3f0c, 0x0d97, 0x1c1e, 0x68a1, 0x7928, 0x4bb3, 0x5a3a,
-  0xa2cd, 0xb344, 0x81df, 0x9056, 0xe4e9, 0xf560, 0xc7fb, 0xd672,
-  0x3e04, 0x2f8d, 0x1d16, 0x0c9f, 0x7820, 0x69a9, 0x5b32, 0x4abb,
-  0xb24c, 0xa3c5, 0x915e, 0x80d7, 0xf468, 0xe5e1, 0xd77a, 0xc6f3,
-  0x4d83, 0x5c0a, 0x6e91, 0x7f18, 0x0ba7, 0x1a2e, 0x28b5, 0x393c,
-  0xc1cb, 0xd042, 0xe2d9, 0xf350, 0x87ef, 0x9666, 0xa4fd, 0xb574,
-  0x5d02, 0x4c8b, 0x7e10, 0x6f99, 0x1b26, 0x0aaf, 0x3834, 0x29bd,
-  0xd14a, 0xc0c3, 0xf258, 0xe3d1, 0x976e, 0x86e7, 0xb47c, 0xa5f5,
-  0x6c81, 0x7d08, 0x4f93, 0x5e1a, 0x2aa5, 0x3b2c, 0x09b7, 0x183e,
-  0xe0c9, 0xf140, 0xc3db, 0xd252, 0xa6ed, 0xb764, 0x85ff, 0x9476,
-  0x7c00, 0x6d89, 0x5f12, 0x4e9b, 0x3a24, 0x2bad, 0x1936, 0x08bf,
-  0xf048, 0xe1c1, 0xd35a, 0xc2d3, 0xb66c, 0xa7e5, 0x957e, 0x84f7,
-  0x8b8f, 0x9a06, 0xa89d, 0xb914, 0xcdab, 0xdc22, 0xeeb9, 0xff30,
-  0x07c7, 0x164e, 0x24d5, 0x355c, 0x41e3, 0x506a, 0x62f1, 0x7378,
-  0x9b0e, 0x8a87, 0xb81c, 0xa995, 0xdd2a, 0xcca3, 0xfe38, 0xefb1,
-  0x1746, 0x06cf, 0x3454, 0x25dd, 0x5162, 0x40eb, 0x7270, 0x63f9,
-  0xaa8d, 0xbb04, 0x899f, 0x9816, 0xeca9, 0xfd20, 0xcfbb, 0xde32,
-  0x26c5, 0x374c, 0x05d7, 0x145e, 0x60e1, 0x7168, 0x43f3, 0x527a,
-  0xba0c, 0xab85, 0x991e, 0x8897, 0xfc28, 0xeda1, 0xdf3a, 0xceb3,
-  0x3644, 0x27cd, 0x1556, 0x04df, 0x7060, 0x61e9, 0x5372, 0x42fb,
-  0xc98b, 0xd802, 0xea99, 0xfb10, 0x8faf, 0x9e26, 0xacbd, 0xbd34,
-  0x45c3, 0x544a, 0x66d1, 0x7758, 0x03e7, 0x126e, 0x20f5, 0x317c,
-  0xd90a, 0xc883, 0xfa18, 0xeb91, 0x9f2e, 0x8ea7, 0xbc3c, 0xadb5,
-  0x5542, 0x44cb, 0x7650, 0x67d9, 0x1366, 0x02ef, 0x3074, 0x21fd,
-  0xe889, 0xf900, 0xcb9b, 0xda12, 0xaead, 0xbf24, 0x8dbf, 0x9c36,
-  0x64c1, 0x7548, 0x47d3, 0x565a, 0x22e5, 0x336c, 0x01f7, 0x107e,
-  0xf808, 0xe981, 0xdb1a, 0xca93, 0xbe2c, 0xafa5, 0x9d3e, 0x8cb7,
-  0x7440, 0x65c9, 0x5752, 0x46db, 0x3264, 0x23ed, 0x1176, 0x00ff
+static const unsigned short crc_flex_table[] = {
+       0x0f87, 0x1e0e, 0x2c95, 0x3d1c, 0x49a3, 0x582a, 0x6ab1, 0x7b38,
+       0x83cf, 0x9246, 0xa0dd, 0xb154, 0xc5eb, 0xd462, 0xe6f9, 0xf770,
+       0x1f06, 0x0e8f, 0x3c14, 0x2d9d, 0x5922, 0x48ab, 0x7a30, 0x6bb9,
+       0x934e, 0x82c7, 0xb05c, 0xa1d5, 0xd56a, 0xc4e3, 0xf678, 0xe7f1,
+       0x2e85, 0x3f0c, 0x0d97, 0x1c1e, 0x68a1, 0x7928, 0x4bb3, 0x5a3a,
+       0xa2cd, 0xb344, 0x81df, 0x9056, 0xe4e9, 0xf560, 0xc7fb, 0xd672,
+       0x3e04, 0x2f8d, 0x1d16, 0x0c9f, 0x7820, 0x69a9, 0x5b32, 0x4abb,
+       0xb24c, 0xa3c5, 0x915e, 0x80d7, 0xf468, 0xe5e1, 0xd77a, 0xc6f3,
+       0x4d83, 0x5c0a, 0x6e91, 0x7f18, 0x0ba7, 0x1a2e, 0x28b5, 0x393c,
+       0xc1cb, 0xd042, 0xe2d9, 0xf350, 0x87ef, 0x9666, 0xa4fd, 0xb574,
+       0x5d02, 0x4c8b, 0x7e10, 0x6f99, 0x1b26, 0x0aaf, 0x3834, 0x29bd,
+       0xd14a, 0xc0c3, 0xf258, 0xe3d1, 0x976e, 0x86e7, 0xb47c, 0xa5f5,
+       0x6c81, 0x7d08, 0x4f93, 0x5e1a, 0x2aa5, 0x3b2c, 0x09b7, 0x183e,
+       0xe0c9, 0xf140, 0xc3db, 0xd252, 0xa6ed, 0xb764, 0x85ff, 0x9476,
+       0x7c00, 0x6d89, 0x5f12, 0x4e9b, 0x3a24, 0x2bad, 0x1936, 0x08bf,
+       0xf048, 0xe1c1, 0xd35a, 0xc2d3, 0xb66c, 0xa7e5, 0x957e, 0x84f7,
+       0x8b8f, 0x9a06, 0xa89d, 0xb914, 0xcdab, 0xdc22, 0xeeb9, 0xff30,
+       0x07c7, 0x164e, 0x24d5, 0x355c, 0x41e3, 0x506a, 0x62f1, 0x7378,
+       0x9b0e, 0x8a87, 0xb81c, 0xa995, 0xdd2a, 0xcca3, 0xfe38, 0xefb1,
+       0x1746, 0x06cf, 0x3454, 0x25dd, 0x5162, 0x40eb, 0x7270, 0x63f9,
+       0xaa8d, 0xbb04, 0x899f, 0x9816, 0xeca9, 0xfd20, 0xcfbb, 0xde32,
+       0x26c5, 0x374c, 0x05d7, 0x145e, 0x60e1, 0x7168, 0x43f3, 0x527a,
+       0xba0c, 0xab85, 0x991e, 0x8897, 0xfc28, 0xeda1, 0xdf3a, 0xceb3,
+       0x3644, 0x27cd, 0x1556, 0x04df, 0x7060, 0x61e9, 0x5372, 0x42fb,
+       0xc98b, 0xd802, 0xea99, 0xfb10, 0x8faf, 0x9e26, 0xacbd, 0xbd34,
+       0x45c3, 0x544a, 0x66d1, 0x7758, 0x03e7, 0x126e, 0x20f5, 0x317c,
+       0xd90a, 0xc883, 0xfa18, 0xeb91, 0x9f2e, 0x8ea7, 0xbc3c, 0xadb5,
+       0x5542, 0x44cb, 0x7650, 0x67d9, 0x1366, 0x02ef, 0x3074, 0x21fd,
+       0xe889, 0xf900, 0xcb9b, 0xda12, 0xaead, 0xbf24, 0x8dbf, 0x9c36,
+       0x64c1, 0x7548, 0x47d3, 0x565a, 0x22e5, 0x336c, 0x01f7, 0x107e,
+       0xf808, 0xe981, 0xdb1a, 0xca93, 0xbe2c, 0xafa5, 0x9d3e, 0x8cb7,
+       0x7440, 0x65c9, 0x5752, 0x46db, 0x3264, 0x23ed, 0x1176, 0x00ff
  };
  
-/*---------------------------------------------------------------------------*/
-
  static unsigned short calc_crc_flex(unsigned char *cp, int size)
  {
-    unsigned short crc = 0xffff;
-    
-    while (size--)
-       crc = (crc << 8) ^ Crc_flex_table[((crc >> 8) ^ *cp++) & 0xff];
+       unsigned short crc = 0xffff;
  
-    return crc;
-}
+       while (size--)
+               crc = (crc << 8) ^ crc_flex_table[((crc >> 8) ^ *cp++) & 0xff];
  
-/*---------------------------------------------------------------------------*/
+       return crc;
+}
  
  static int check_crc_flex(unsigned char *cp, int size)
  {
-  unsigned short crc = 0xffff;
+       unsigned short crc = 0xffff;
  
-  if (size < 3)
-      return -1;
+       if (size < 3)
+               return -1;
  
-  while (size--)
-      crc = (crc << 8) ^ Crc_flex_table[((crc >> 8) ^ *cp++) & 0xff];
+       while (size--)
+               crc = (crc << 8) ^ crc_flex_table[((crc >> 8) ^ *cp++) & 0xff];
  
-  if ((crc & 0xffff) != 0x7070) 
-      return -1;
+       if ((crc & 0xffff) != 0x7070)
+               return -1;
  
-  return 0;
+       return 0;
  }
  
-/*---------------------------------------------------------------------------*/
+/*
+ * Standard encapsulation
+ */
  
-/* Find a free channel, and link in this `tty' line. */
-static inline struct ax_disp *ax_alloc(void)
+static int kiss_esc(unsigned char *s, unsigned char *d, int len)
  {
-       ax25_ctrl_t *axp=NULL;
-       int i;
+       unsigned char *ptr = d;
+       unsigned char c;
  
-       for (i = 0; i < ax25_maxdev; i++) {
-               axp = ax25_ctrls[i];
+       /*
+        * Send an initial END character to flush out any data that may have
+        * accumulated in the receiver due to line noise.
+        */
  
-               /* Not allocated ? */
-               if (axp == NULL)
-                       break;
+       *ptr++ = END;
  
-               /* Not in use ? */
-               if (!test_and_set_bit(AXF_INUSE, &axp->ctrl.flags))
+       while (len-- > 0) {
+               switch (c = *s++) {
+               case END:
+                       *ptr++ = ESC;
+                       *ptr++ = ESC_END;
                         break;
+               case ESC:
+                       *ptr++ = ESC;
+                       *ptr++ = ESC_ESC;
+                       break;
+               default:
+                       *ptr++ = c;
+                       break;
+               }
         }
  
-       /* Sorry, too many, all slots in use */
-       if (i >= ax25_maxdev)
-               return NULL;
+       *ptr++ = END;
+
+       return ptr - d;
+}
+
+/*
+ * MW:
+ * OK its ugly, but tell me a better solution without copying the
+ * packet to a temporary buffer :-)
+ */
+static int kiss_esc_crc(unsigned char *s, unsigned char *d, unsigned short crc,
+       int len)
+{
+       unsigned char *ptr = d;
+       unsigned char c=0;
+
+       *ptr++ = END;
+       while (len > 0) {
+               if (len > 2)
+                       c = *s++;
+               else if (len > 1)
+                       c = crc >> 8;
+               else if (len > 0)
+                       c = crc & 0xff;
+
+               len--;
  
-       /* If no channels are available, allocate one */
-       if (axp == NULL && (ax25_ctrls[i] = kmalloc(sizeof(ax25_ctrl_t), GFP_KERNEL)) != NULL) {
-               axp = ax25_ctrls[i];
+               switch (c) {
+               case END:
+                       *ptr++ = ESC;
+                       *ptr++ = ESC_END;
+                       break;
+               case ESC:
+                       *ptr++ = ESC;
+                       *ptr++ = ESC_ESC;
+                       break;
+               default:
+                       *ptr++ = c;
+                       break;
+               }
         }
-       memset(axp, 0, sizeof(ax25_ctrl_t));
-
-       /* Initialize channel control data */
-       set_bit(AXF_INUSE, &axp->ctrl.flags);
-       sprintf(axp->dev.name, "ax%d", i++);
-       axp->ctrl.tty      = NULL;
-       axp->dev.base_addr = i;
-       axp->dev.priv      = (void *)&axp->ctrl;
-       axp->dev.next      = NULL;
-       axp->dev.init      = ax25_init;
-
-       if (axp != NULL) {
-               /*
-                * register device so that it can be ifconfig'ed
-                * ax25_init() will be called as a side-effect
-                * SIDE-EFFECT WARNING: ax25_init() CLEARS axp->ctrl !
-                */
-               if (register_netdev(&axp->dev) == 0) {
-                       /* (Re-)Set the INUSE bit.   Very Important! */
-                       set_bit(AXF_INUSE, &axp->ctrl.flags);
-                       axp->ctrl.dev = &axp->dev;
-                       axp->dev.priv = (void *) &axp->ctrl;
-
-                       return &axp->ctrl;
-               } else {
-                       clear_bit(AXF_INUSE,&axp->ctrl.flags);
-                       printk(KERN_ERR "mkiss: ax_alloc() - register_netdev() failure.\n");
+       *ptr++ = END;
+
+       return ptr - d;
+}
+
+/* Send one completely decapsulated AX.25 packet to the AX.25 layer. */
+static void ax_bump(struct mkiss *ax)
+{
+       struct sk_buff *skb;
+       int count;
+
+       spin_lock_bh(&ax->buflock);
+       if (ax->rbuff[0] > 0x0f) {
+               if (ax->rbuff[0] & 0x20) {
+                       ax->crcmode = CRC_MODE_FLEX;
+                       if (check_crc_flex(ax->rbuff, ax->rcount) < 0) {
+                               ax->stats.rx_errors++;
+                               return;
+                       }
+                       ax->rcount -= 2;
+                        /* dl9sau bugfix: the trailling two bytes flexnet crc
+                         * will not be passed to the kernel. thus we have
+                         * to correct the kissparm signature, because it
+                         * indicates a crc but there's none
+                        */
+                        *ax->rbuff &= ~0x20;
                 }
+       }
+       spin_unlock_bh(&ax->buflock);
+
+       count = ax->rcount;
+
+       if ((skb = dev_alloc_skb(count)) == NULL) {
+               printk(KERN_ERR "mkiss: %s: memory squeeze, dropping packet.\n",
+                      ax->dev->name);
+               ax->stats.rx_dropped++;
+               return;
         }
  
-       return NULL;
+       spin_lock_bh(&ax->buflock);
+       memcpy(skb_put(skb,count), ax->rbuff, count);
+       spin_unlock_bh(&ax->buflock);
+       skb->protocol = ax25_type_trans(skb, ax->dev);
+       netif_rx(skb);
+       ax->dev->last_rx = jiffies;
+       ax->stats.rx_packets++;
+       ax->stats.rx_bytes += count;
  }
  
-/* Free an AX25 channel. */
-static inline void ax_free(struct ax_disp *ax)
+static void kiss_unesc(struct mkiss *ax, unsigned char s)
  {
-       /* Free all AX25 frame buffers. */
-       if (ax->rbuff)
-               kfree(ax->rbuff);
-       ax->rbuff = NULL;
-       if (ax->xbuff)
-               kfree(ax->xbuff);
-       ax->xbuff = NULL;
-       if (!test_and_clear_bit(AXF_INUSE, &ax->flags))
-               printk(KERN_ERR "mkiss: %s: ax_free for already free unit.\n", ax->dev->name);
+       switch (s) {
+       case END:
+               /* drop keeptest bit = VSV */
+               if (test_bit(AXF_KEEPTEST, &ax->flags))
+                       clear_bit(AXF_KEEPTEST, &ax->flags);
+
+               if (!test_and_clear_bit(AXF_ERROR, &ax->flags) && (ax->rcount > 2))
+                       ax_bump(ax);
+
+               clear_bit(AXF_ESCAPE, &ax->flags);
+               ax->rcount = 0;
+               return;
+
+       case ESC:
+               set_bit(AXF_ESCAPE, &ax->flags);
+               return;
+       case ESC_ESC:
+               if (test_and_clear_bit(AXF_ESCAPE, &ax->flags))
+                       s = ESC;
+               break;
+       case ESC_END:
+               if (test_and_clear_bit(AXF_ESCAPE, &ax->flags))
+                       s = END;
+               break;
+       }
+
+       spin_lock_bh(&ax->buflock);
+       if (!test_bit(AXF_ERROR, &ax->flags)) {
+               if (ax->rcount < ax->buffsize) {
+                       ax->rbuff[ax->rcount++] = s;
+                       spin_unlock_bh(&ax->buflock);
+                       return;
+               }
+
+               ax->stats.rx_over_errors++;
+               set_bit(AXF_ERROR, &ax->flags);
+       }
+       spin_unlock_bh(&ax->buflock);
+}
+
+static int ax_set_mac_address(struct net_device *dev, void *addr)
+{
+       struct sockaddr_ax25 *sa = addr;
+
+       spin_lock_irq(&dev->xmit_lock);
+       memcpy(dev->dev_addr, &sa->sax25_call, AX25_ADDR_LEN);
+       spin_unlock_irq(&dev->xmit_lock);
+
+       return 0;
  }
  
-static void ax_changedmtu(struct ax_disp *ax)
+/*---------------------------------------------------------------------------*/
+
+static void ax_changedmtu(struct mkiss *ax)
  {
         struct net_device *dev = ax->dev;
         unsigned char *xbuff, *rbuff, *oxbuff, *orbuff;
@@ -236,7 +348,8 @@ static void ax_changedmtu(struct ax_disp *ax)
         rbuff = kmalloc(len + 4, GFP_ATOMIC);
  
         if (xbuff == NULL || rbuff == NULL)  {
-               printk(KERN_ERR "mkiss: %s: unable to grow ax25 buffers, MTU change cancelled.\n",
+               printk(KERN_ERR "mkiss: %s: unable to grow ax25 buffers, "
+                      "MTU change cancelled.\n",
                        ax->dev->name);
                 dev->mtu = ax->mtu;
                 if (xbuff != NULL)
@@ -258,7 +371,7 @@ static void ax_changedmtu(struct ax_disp *ax)
                         memcpy(ax->xbuff, ax->xhead, ax->xleft);
                 } else  {
                         ax->xleft = 0;
-                       ax->tx_dropped++;
+                       ax->stats.tx_dropped++;
                 }
         }
  
@@ -269,7 +382,7 @@ static void ax_changedmtu(struct ax_disp *ax)
                         memcpy(ax->rbuff, orbuff, ax->rcount);
                 } else  {
                         ax->rcount = 0;
-                       ax->rx_over_errors++;
+                       ax->stats.rx_over_errors++;
                         set_bit(AXF_ERROR, &ax->flags);
                 }
         }
@@ -279,72 +392,14 @@ static void ax_changedmtu(struct ax_disp *ax)
  
         spin_unlock_bh(&ax->buflock);
  
-       if (oxbuff != NULL)
-               kfree(oxbuff);
-       if (orbuff != NULL)
-               kfree(orbuff);
-}
-
-
-/* Set the "sending" flag.  This must be atomic. */
-static inline void ax_lock(struct ax_disp *ax)
-{
-       netif_stop_queue(ax->dev);
-}
-
-
-/* Clear the "sending" flag.  This must be atomic. */
-static inline void ax_unlock(struct ax_disp *ax)
-{
-       netif_start_queue(ax->dev);
-}
-
-/* Send one completely decapsulated AX.25 packet to the AX.25 layer. */
-static void ax_bump(struct ax_disp *ax)
-{
-       struct sk_buff *skb;
-       int count;
-
-       spin_lock_bh(&ax->buflock);
-       if (ax->rbuff[0] > 0x0f) {
-               if (ax->rbuff[0] & 0x20) {
-                       ax->crcmode = CRC_MODE_FLEX;
-                       if (check_crc_flex(ax->rbuff, ax->rcount) < 0) {
-                               ax->rx_errors++;
-                               return;
-                       }
-                       ax->rcount -= 2;
-                        /* dl9sau bugfix: the trailling two bytes flexnet crc
-                         * will not be passed to the kernel. thus we have
-                         * to correct the kissparm signature, because it
-                         * indicates a crc but there's none
-                        */
-                        *ax->rbuff &= ~0x20;
-               }
-       }
-       spin_unlock_bh(&ax->buflock);
-
-       count = ax->rcount;
-
-       if ((skb = dev_alloc_skb(count)) == NULL) {
-               printk(KERN_ERR "mkiss: %s: memory squeeze, dropping packet.\n", ax->dev->name);
-               ax->rx_dropped++;
-               return;
-       }
-
-       spin_lock_bh(&ax->buflock);
-       memcpy(skb_put(skb,count), ax->rbuff, count);
-       spin_unlock_bh(&ax->buflock);
-       skb->protocol = ax25_type_trans(skb, ax->dev);
-       netif_rx(skb);
-       ax->dev->last_rx = jiffies;
-       ax->rx_packets++;
-       ax->rx_bytes+=count;
+       kfree(oxbuff);
+       kfree(orbuff);
  }
  
  /* Encapsulate one AX.25 packet and stuff into a TTY queue. */
-static void ax_encaps(struct ax_disp *ax, unsigned char *icp, int len)
+static void ax_encaps(struct net_device *dev, unsigned char *icp, int len)
  {
+       struct mkiss *ax = netdev_priv(dev);
         unsigned char *p;
         int actual, count;
  
@@ -354,8 +409,8 @@ static void ax_encaps(struct ax_disp *ax, unsigned char *icp, int len)
         if (len > ax->mtu) {            /* Sigh, shouldn't occur BUT ... */
                 len = ax->mtu;
                 printk(KERN_ERR "mkiss: %s: truncating oversized transmit packet!\n", ax->dev->name);
-               ax->tx_dropped++;
-               ax_unlock(ax);
+               ax->stats.tx_dropped++;
+               netif_start_queue(dev);
                 return;
         }
  
@@ -376,10 +431,11 @@ static void ax_encaps(struct ax_disp *ax, unsigned char *icp, int len)
                  break;
         }
         
-       ax->tty->flags |= (1 << TTY_DO_WRITE_WAKEUP);
+       set_bit(TTY_DO_WRITE_WAKEUP, &ax->tty->flags);
         actual = ax->tty->driver->write(ax->tty, ax->xbuff, count);
-       ax->tx_packets++;
-       ax->tx_bytes+=actual;
+       ax->stats.tx_packets++;
+       ax->stats.tx_bytes += actual;
+
         ax->dev->trans_start = jiffies;
         ax->xleft = count - actual;
         ax->xhead = ax->xbuff + actual;
@@ -387,37 +443,10 @@ static void ax_encaps(struct ax_disp *ax, unsigned char *icp, int len)
         spin_unlock_bh(&ax->buflock);
  }
  
-/*
- * Called by the driver when there's room for more data.  If we have
- * more packets to send, we send them here.
- */
-static void ax25_write_wakeup(struct tty_struct *tty)
-{
-       int actual;
-       struct ax_disp *ax = (struct ax_disp *) tty->disc_data;
-
-       /* First make sure we're connected. */
-       if (ax == NULL || ax->magic != AX25_MAGIC || !netif_running(ax->dev))
-               return;
-       if (ax->xleft <= 0)  {
-               /* Now serial buffer is almost free & we can start
-                * transmission of another packet
-                */
-               tty->flags &= ~(1 << TTY_DO_WRITE_WAKEUP);
-
-               netif_wake_queue(ax->dev);
-               return;
-       }
-
-       actual = tty->driver->write(tty, ax->xhead, ax->xleft);
-       ax->xleft -= actual;
-       ax->xhead += actual;
-}
-
  /* Encapsulate an AX.25 packet and kick it into a TTY queue. */
  static int ax_xmit(struct sk_buff *skb, struct net_device *dev)
  {
-       struct ax_disp *ax = netdev_priv(dev);
+       struct mkiss *ax = netdev_priv(dev);
  
         if (!netif_running(dev))  {
                 printk(KERN_ERR "mkiss: %s: xmit call when iface is down\n", dev->name);
@@ -429,7 +458,7 @@ static int ax_xmit(struct sk_buff *skb, struct net_device *dev)
                  * May be we must check transmitter timeout here ?
                  *      14 Oct 1994 Dmitry Gorodchanin.
                  */
-               if (jiffies - dev->trans_start  < 20 * HZ) {
+               if (time_before(jiffies, dev->trans_start + 20 * HZ)) {
                         /* 20 sec timeout not reached */
                         return 1;
                 }
@@ -439,20 +468,30 @@ static int ax_xmit(struct sk_buff *skb, struct net_device *dev)
                        "bad line quality" : "driver error");
  
                 ax->xleft = 0;
-               ax->tty->flags &= ~(1 << TTY_DO_WRITE_WAKEUP);
-               ax_unlock(ax);
+               clear_bit(TTY_DO_WRITE_WAKEUP, &ax->tty->flags);
+               netif_start_queue(dev);
         }
  
         /* We were not busy, so we are now... :-) */
         if (skb != NULL) {
-               ax_lock(ax);
-               ax_encaps(ax, skb->data, skb->len);
+               netif_stop_queue(dev);
+               ax_encaps(dev, skb->data, skb->len);
                 kfree_skb(skb);
         }
  
         return 0;
  }
  
+static int ax_open_dev(struct net_device *dev)
+{
+       struct mkiss *ax = netdev_priv(dev);
+
+       if (ax->tty == NULL)
+               return -ENODEV;
+
+       return 0;
+}
+
  #if defined(CONFIG_AX25) || defined(CONFIG_AX25_MODULE)
  
  /* Return the frame type ID */
@@ -481,7 +520,7 @@ static int ax_rebuild_header(struct sk_buff *skb)
  /* Open the low-level part of the AX25 channel. Easy! */
  static int ax_open(struct net_device *dev)
  {
-       struct ax_disp *ax = netdev_priv(dev);
+       struct mkiss *ax = netdev_priv(dev);
         unsigned long len;
  
         if (ax->tty == NULL)
@@ -518,7 +557,6 @@ static int ax_open(struct net_device *dev)
  
         spin_lock_init(&ax->buflock);
  
-       netif_start_queue(dev);
         return 0;
  
  noxbuff:
@@ -532,68 +570,100 @@ norbuff:
  /* Close the low-level part of the AX25 channel. Easy! */
  static int ax_close(struct net_device *dev)
  {
-       struct ax_disp *ax = netdev_priv(dev);
+       struct mkiss *ax = netdev_priv(dev);
  
-       if (ax->tty == NULL)
-               return -EBUSY;
-
-       ax->tty->flags &= ~(1 << TTY_DO_WRITE_WAKEUP);
+       if (ax->tty)
+               clear_bit(TTY_DO_WRITE_WAKEUP, &ax->tty->flags);
  
         netif_stop_queue(dev);
  
         return 0;
  }
  
-static int ax25_receive_room(struct tty_struct *tty)
+static struct net_device_stats *ax_get_stats(struct net_device *dev)
  {
-       return 65536;  /* We can handle an infinite amount of data. :-) */
+       struct mkiss *ax = netdev_priv(dev);
+
+       return &ax->stats;
+}
+
+static void ax_setup(struct net_device *dev)
+{
+       static char ax25_bcast[AX25_ADDR_LEN] =
+               {'Q'<<1,'S'<<1,'T'<<1,' '<<1,' '<<1,' '<<1,'0'<<1};
+       static char ax25_test[AX25_ADDR_LEN] =
+               {'L'<<1,'I'<<1,'N'<<1,'U'<<1,'X'<<1,' '<<1,'1'<<1};
+
+       /* Finish setting up the DEVICE info. */
+       dev->mtu             = AX_MTU;
+       dev->hard_start_xmit = ax_xmit;
+       dev->open            = ax_open_dev;
+       dev->stop            = ax_close;
+       dev->get_stats       = ax_get_stats;
+       dev->set_mac_address = ax_set_mac_address;
+       dev->hard_header_len = 0;
+       dev->addr_len        = 0;
+       dev->type            = ARPHRD_AX25;
+       dev->tx_queue_len    = 10;
+       dev->hard_header     = ax_header;
+       dev->rebuild_header  = ax_rebuild_header;
+
+       memcpy(dev->broadcast, ax25_bcast, AX25_ADDR_LEN);
+       memcpy(dev->dev_addr,  ax25_test,  AX25_ADDR_LEN);
+
+       dev->flags      = IFF_BROADCAST | IFF_MULTICAST;
  }
  
  /*
- * Handle the 'receiver data ready' interrupt.
- * This function is called by the 'tty_io' module in the kernel when
- * a block of data has been received, which can now be decapsulated
- * and sent on to the AX.25 layer for further processing.
+ * We have a potential race on dereferencing tty->disc_data, because the tty
+ * layer provides no locking at all - thus one cpu could be running
+ * sixpack_receive_buf while another calls sixpack_close, which zeroes
+ * tty->disc_data and frees the memory that sixpack_receive_buf is using.  The
+ * best way to fix this is to use a rwlock in the tty struct, but for now we
+ * use a single global rwlock for all ttys in ppp line discipline.
   */
-static void ax25_receive_buf(struct tty_struct *tty, const unsigned char *cp, char *fp, int count)
+static rwlock_t disc_data_lock = RW_LOCK_UNLOCKED;
+
+static struct mkiss *mkiss_get(struct tty_struct *tty)
  {
-       struct ax_disp *ax = (struct ax_disp *) tty->disc_data;
+       struct mkiss *ax;
  
-       if (ax == NULL || ax->magic != AX25_MAGIC || !netif_running(ax->dev))
-               return;
+       read_lock(&disc_data_lock);
+       ax = tty->disc_data;
+       if (ax)
+               atomic_inc(&ax->refcnt);
+       read_unlock(&disc_data_lock);
  
-       /*
-        * Argh! mtu change time! - costs us the packet part received
-        * at the change
-        */
-       if (ax->mtu != ax->dev->mtu + 73)
-               ax_changedmtu(ax);
-
-       /* Read the characters out of the buffer */
-       while (count--) {
-               if (fp != NULL && *fp++) {
-                       if (!test_and_set_bit(AXF_ERROR, &ax->flags))
-                               ax->rx_errors++;
-                       cp++;
-                       continue;
-               }
+       return ax;
+}
  
-               kiss_unesc(ax, *cp++);
-       }
+static void mkiss_put(struct mkiss *ax)
+{
+       if (atomic_dec_and_test(&ax->refcnt))
+               up(&ax->dead_sem);
  }
  
-static int ax25_open(struct tty_struct *tty)
+static int mkiss_open(struct tty_struct *tty)
  {
-       struct ax_disp *ax = (struct ax_disp *) tty->disc_data;
+       struct net_device *dev;
+       struct mkiss *ax;
         int err;
  
-       /* First make sure we're not already connected. */
-       if (ax && ax->magic == AX25_MAGIC)
-               return -EEXIST;
+       if (!capable(CAP_NET_ADMIN))
+               return -EPERM;
  
-       /* OK.  Find a free AX25 channel to use. */
-       if ((ax = ax_alloc()) == NULL)
-               return -ENFILE;
+       dev = alloc_netdev(sizeof(struct mkiss), "ax%d", ax_setup);
+       if (!dev) {
+               err = -ENOMEM;
+               goto out;
+       }
+
+       ax = netdev_priv(dev);
+       ax->dev = dev;
+
+       spin_lock_init(&ax->buflock);
+       atomic_set(&ax->refcnt, 1);
+       init_MUTEX_LOCKED(&ax->dead_sem);
  
         ax->tty = tty;
         tty->disc_data = ax;
@@ -602,283 +672,212 @@ static int ax25_open(struct tty_struct *tty)
                 tty->driver->flush_buffer(tty);
  
         /* Restore default settings */
-       ax->dev->type = ARPHRD_AX25;
+       dev->type = ARPHRD_AX25;
  
         /* Perform the low-level AX25 initialization. */
-       if ((err = ax_open(ax->dev)))
-               return err;
+       if ((err = ax_open(ax->dev))) {
+               goto out_free_netdev;
+       }
  
-       /* Done.  We have linked the TTY line to a channel. */
-       return ax->dev->base_addr;
-}
+       if (register_netdev(dev))
+               goto out_free_buffers;
  
-static void ax25_close(struct tty_struct *tty)
-{
-       struct ax_disp *ax = (struct ax_disp *) tty->disc_data;
+       netif_start_queue(dev);
  
-       /* First make sure we're connected. */
-       if (ax == NULL || ax->magic != AX25_MAGIC)
-               return;
+       /* Done.  We have linked the TTY line to a channel. */
+       return 0;
  
-       unregister_netdev(ax->dev);
+out_free_buffers:
+       kfree(ax->rbuff);
+       kfree(ax->xbuff);
  
-       tty->disc_data = NULL;
-       ax->tty        = NULL;
+out_free_netdev:
+       free_netdev(dev);
  
-       ax_free(ax);
+out:
+       return err;
  }
  
-
-static struct net_device_stats *ax_get_stats(struct net_device *dev)
+static void mkiss_close(struct tty_struct *tty)
  {
-       static struct net_device_stats stats;
-       struct ax_disp *ax = netdev_priv(dev);
-
-       memset(&stats, 0, sizeof(struct net_device_stats));
-
-       stats.rx_packets     = ax->rx_packets;
-       stats.tx_packets     = ax->tx_packets;
-       stats.rx_bytes       = ax->rx_bytes;
-       stats.tx_bytes       = ax->tx_bytes;
-       stats.rx_dropped     = ax->rx_dropped;
-       stats.tx_dropped     = ax->tx_dropped;
-       stats.tx_errors      = ax->tx_errors;
-       stats.rx_errors      = ax->rx_errors;
-       stats.rx_over_errors = ax->rx_over_errors;
-
-       return &stats;
-}
+       struct mkiss *ax;
  
+       write_lock(&disc_data_lock);
+       ax = tty->disc_data;
+       tty->disc_data = NULL;
+       write_unlock(&disc_data_lock);
  
-/************************************************************************
- *                        STANDARD ENCAPSULATION                        *
- ************************************************************************/
-
-static int kiss_esc(unsigned char *s, unsigned char *d, int len)
-{
-       unsigned char *ptr = d;
-       unsigned char c;
+       if (ax == 0)
+               return;
  
         /*
-        * Send an initial END character to flush out any
-        * data that may have accumulated in the receiver
-        * due to line noise.
+        * We have now ensured that nobody can start using ap from now on, but
+        * we have to wait for all existing users to finish.
          */
+       if (!atomic_dec_and_test(&ax->refcnt))
+               down(&ax->dead_sem);
  
-       *ptr++ = END;
-
-       while (len-- > 0) {
-               switch (c = *s++) {
-                       case END:
-                               *ptr++ = ESC;
-                               *ptr++ = ESC_END;
-                               break;
-                       case ESC:
-                               *ptr++ = ESC;
-                               *ptr++ = ESC_ESC;
-                               break;
-                       default:
-                               *ptr++ = c;
-                               break;
-               }
-       }
+       unregister_netdev(ax->dev);
  
-       *ptr++ = END;
+       /* Free all AX25 frame buffers. */
+       kfree(ax->rbuff);
+       kfree(ax->xbuff);
  
-       return ptr - d;
+       ax->tty = NULL;
  }
  
-/*
- * MW:
- * OK its ugly, but tell me a better solution without copying the
- * packet to a temporary buffer :-)
- */
-static int kiss_esc_crc(unsigned char *s, unsigned char *d, unsigned short crc, int len)
+/* Perform I/O control on an active ax25 channel. */
+static int mkiss_ioctl(struct tty_struct *tty, struct file *file,
+       unsigned int cmd, unsigned long arg)
  {
-       unsigned char *ptr = d;
-       unsigned char c=0;
-
-       *ptr++ = END;
-       while (len > 0) {
-               if (len > 2) 
-                       c = *s++;
-               else if (len > 1)
-                       c = crc >> 8;
-               else if (len > 0)
-                       c = crc & 0xff;
+       struct mkiss *ax = mkiss_get(tty);
+       struct net_device *dev = ax->dev;
+       unsigned int tmp, err;
  
-               len--;
+       /* First make sure we're connected. */
+       if (ax == NULL)
+               return -ENXIO;
  
-               switch (c) {
-                        case END:
-                                *ptr++ = ESC;
-                                *ptr++ = ESC_END;
-                                break;
-                        case ESC:
-                                *ptr++ = ESC;
-                                *ptr++ = ESC_ESC;
-                                break;
-                        default:
-                                *ptr++ = c;
-                                break;
+       switch (cmd) {
+       case SIOCGIFNAME:
+               err = copy_to_user((void __user *) arg, ax->dev->name,
+                                  strlen(ax->dev->name) + 1) ? -EFAULT : 0;
+               break;
+
+       case SIOCGIFENCAP:
+               err = put_user(4, (int __user *) arg);
+               break;
+
+       case SIOCSIFENCAP:
+               if (get_user(tmp, (int __user *) arg)) {
+                       err = -EFAULT;
+                       break;
                 }
-       }
-       *ptr++ = END;
-       return ptr - d;         
-}
  
-static void kiss_unesc(struct ax_disp *ax, unsigned char s)
-{
-       switch (s) {
-               case END:
-                       /* drop keeptest bit = VSV */
-                       if (test_bit(AXF_KEEPTEST, &ax->flags))
-                               clear_bit(AXF_KEEPTEST, &ax->flags);
+               ax->mode = tmp;
+               dev->addr_len        = AX25_ADDR_LEN;
+               dev->hard_header_len = AX25_KISS_HEADER_LEN +
+                                      AX25_MAX_HEADER_LEN + 3;
+               dev->type            = ARPHRD_AX25;
  
-                       if (!test_and_clear_bit(AXF_ERROR, &ax->flags) && (ax->rcount > 2))
-                               ax_bump(ax);
+               err = 0;
+               break;
  
-                       clear_bit(AXF_ESCAPE, &ax->flags);
-                       ax->rcount = 0;
-                       return;
+       case SIOCSIFHWADDR: {
+               char addr[AX25_ADDR_LEN];
+printk(KERN_INFO "In SIOCSIFHWADDR");
  
-               case ESC:
-                       set_bit(AXF_ESCAPE, &ax->flags);
-                       return;
-               case ESC_ESC:
-                       if (test_and_clear_bit(AXF_ESCAPE, &ax->flags))
-                               s = ESC;
+               if (copy_from_user(&addr,
+                                  (void __user *) arg, AX25_ADDR_LEN)) {
+                       err = -EFAULT;
                         break;
-               case ESC_END:
-                       if (test_and_clear_bit(AXF_ESCAPE, &ax->flags))
-                               s = END;
-                       break;
-       }
-
-       spin_lock_bh(&ax->buflock);
-       if (!test_bit(AXF_ERROR, &ax->flags)) {
-               if (ax->rcount < ax->buffsize) {
-                       ax->rbuff[ax->rcount++] = s;
-                       spin_unlock_bh(&ax->buflock);
-                       return;
                 }
  
-               ax->rx_over_errors++;
-               set_bit(AXF_ERROR, &ax->flags);
+               spin_lock_irq(&dev->xmit_lock);
+               memcpy(dev->dev_addr, addr, AX25_ADDR_LEN);
+               spin_unlock_irq(&dev->xmit_lock);
+
+               err = 0;
+               break;
+       }
+       default:
+               err = -ENOIOCTLCMD;
         }
-       spin_unlock_bh(&ax->buflock);
-}
  
+       mkiss_put(ax);
  
-static int ax_set_mac_address(struct net_device *dev, void __user *addr)
-{
-       if (copy_from_user(dev->dev_addr, addr, AX25_ADDR_LEN))
-               return -EFAULT;
-       return 0;
+       return err;
  }
  
-static int ax_set_dev_mac_address(struct net_device *dev, void *addr)
+/*
+ * Handle the 'receiver data ready' interrupt.
+ * This function is called by the 'tty_io' module in the kernel when
+ * a block of data has been received, which can now be decapsulated
+ * and sent on to the AX.25 layer for further processing.
+ */
+static void mkiss_receive_buf(struct tty_struct *tty, const unsigned char *cp,
+       char *fp, int count)
  {
-       struct sockaddr *sa = addr;
-
-       memcpy(dev->dev_addr, sa->sa_data, AX25_ADDR_LEN);
+       struct mkiss *ax = mkiss_get(tty);
  
-       return 0;
-}
-
-
-/* Perform I/O control on an active ax25 channel. */
-static int ax25_disp_ioctl(struct tty_struct *tty, void *file, int cmd, void __user *arg)
-{
-       struct ax_disp *ax = (struct ax_disp *) tty->disc_data;
-       unsigned int tmp;
+       if (!ax)
+               return;
  
-       /* First make sure we're connected. */
-       if (ax == NULL || ax->magic != AX25_MAGIC)
-               return -EINVAL;
+       /*
+        * Argh! mtu change time! - costs us the packet part received
+        * at the change
+        */
+       if (ax->mtu != ax->dev->mtu + 73)
+               ax_changedmtu(ax);
  
-       switch (cmd) {
-               case SIOCGIFNAME:
-                       if (copy_to_user(arg, ax->dev->name, strlen(ax->dev->name) + 1))
-                               return -EFAULT;
-                       return 0;
-
-               case SIOCGIFENCAP:
-                       return put_user(4, (int __user *)arg);
-
-               case SIOCSIFENCAP:
-                       if (get_user(tmp, (int __user *)arg))
-                               return -EFAULT;
-                       ax->mode = tmp;
-                       ax->dev->addr_len        = AX25_ADDR_LEN;         /* sizeof an AX.25 addr */
-                       ax->dev->hard_header_len = AX25_KISS_HEADER_LEN + AX25_MAX_HEADER_LEN + 3;
-                       ax->dev->type            = ARPHRD_AX25;
-                       return 0;
-
-                case SIOCSIFHWADDR:
-                       return ax_set_mac_address(ax->dev, arg);
+       /* Read the characters out of the buffer */
+       while (count--) {
+               if (fp != NULL && *fp++) {
+                       if (!test_and_set_bit(AXF_ERROR, &ax->flags))
+                               ax->stats.rx_errors++;
+                       cp++;
+                       continue;
+               }
  
-               default:
-                       return -ENOIOCTLCMD;
+               kiss_unesc(ax, *cp++);
         }
+
+       mkiss_put(ax);
+       if (test_and_clear_bit(TTY_THROTTLED, &tty->flags)
+           && tty->driver->unthrottle)
+               tty->driver->unthrottle(tty);
  }
  
-static int ax_open_dev(struct net_device *dev)
+static int mkiss_receive_room(struct tty_struct *tty)
  {
-       struct ax_disp *ax = netdev_priv(dev);
-
-       if (ax->tty == NULL)
-               return -ENODEV;
-
-       return 0;
+       return 65536;  /* We can handle an infinite amount of data. :-) */
  }
  
-
-/* Initialize the driver.  Called by network startup. */
-static int ax25_init(struct net_device *dev)
+/*
+ * Called by the driver when there's room for more data.  If we have
+ * more packets to send, we send them here.
+ */
+static void mkiss_write_wakeup(struct tty_struct *tty)
  {
-       struct ax_disp *ax = netdev_priv(dev);
-
-       static char ax25_bcast[AX25_ADDR_LEN] =
-               {'Q'<<1,'S'<<1,'T'<<1,' '<<1,' '<<1,' '<<1,'0'<<1};
-       static char ax25_test[AX25_ADDR_LEN] =
-               {'L'<<1,'I'<<1,'N'<<1,'U'<<1,'X'<<1,' '<<1,'1'<<1};
-
-       if (ax == NULL)         /* Allocation failed ?? */
-               return -ENODEV;
+       struct mkiss *ax = mkiss_get(tty);
+       int actual;
  
-       /* Set up the "AX25 Control Block". (And clear statistics) */
-       memset(ax, 0, sizeof (struct ax_disp));
-       ax->magic  = AX25_MAGIC;
-       ax->dev    = dev;
+       if (!ax)
+               return;
  
-       /* Finish setting up the DEVICE info. */
-       dev->mtu             = AX_MTU;
-       dev->hard_start_xmit = ax_xmit;
-       dev->open            = ax_open_dev;
-       dev->stop            = ax_close;
-       dev->get_stats       = ax_get_stats;
-       dev->set_mac_address = ax_set_dev_mac_address;
-       dev->hard_header_len = 0;
-       dev->addr_len        = 0;
-       dev->type            = ARPHRD_AX25;
-       dev->tx_queue_len    = 10;
-       dev->hard_header     = ax_header;
-       dev->rebuild_header  = ax_rebuild_header;
+       if (ax->xleft <= 0)  {
+               /* Now serial buffer is almost free & we can start
+                * transmission of another packet
+                */
+               clear_bit(TTY_DO_WRITE_WAKEUP, &tty->flags);
  
-       memcpy(dev->broadcast, ax25_bcast, AX25_ADDR_LEN);
-       memcpy(dev->dev_addr,  ax25_test,  AX25_ADDR_LEN);
+               netif_wake_queue(ax->dev);
+               goto out;
+       }
  
-       /* New-style flags. */
-       dev->flags      = IFF_BROADCAST | IFF_MULTICAST;
+       actual = tty->driver->write(tty, ax->xhead, ax->xleft);
+       ax->xleft -= actual;
+       ax->xhead += actual;
  
-       return 0;
+out:
+       mkiss_put(ax);
  }
  
+static struct tty_ldisc ax_ldisc = {
+       .magic          = TTY_LDISC_MAGIC,
+       .name           = "mkiss",
+       .open           = mkiss_open,
+       .close          = mkiss_close,
+       .ioctl          = mkiss_ioctl,
+       .receive_buf    = mkiss_receive_buf,
+       .receive_room   = mkiss_receive_room,
+       .write_wakeup   = mkiss_write_wakeup
+};
  
-/* ******************************************************************** */
-/* *                   Init MKISS driver                             * */
-/* ******************************************************************** */
+static char banner[] __initdata = KERN_INFO \
+       "mkiss: AX.25 Multikiss, Hans Albas PE1AYX\n";
+static char msg_regfail[] __initdata = KERN_ERR \
+       "mkiss: can't register line discipline (err = %d)\n";
  
  static int __init mkiss_init_driver(void)
  {
@@ -886,64 +885,27 @@ static int __init mkiss_init_driver(void)
  
         printk(banner);
  
-       if (ax25_maxdev < 4)
-         ax25_maxdev = 4; /* Sanity */
+       if ((status = tty_register_ldisc(N_AX25, &ax_ldisc)) != 0)
+               printk(msg_regfail);
  
-       if ((ax25_ctrls = kmalloc(sizeof(void *) * ax25_maxdev, GFP_KERNEL)) == NULL) {
-               printk(KERN_ERR "mkiss: Can't allocate ax25_ctrls[] array!\n");
-               return -ENOMEM;
-       }
-
-       /* Clear the pointer array, we allocate devices when we need them */
-       memset(ax25_ctrls, 0, sizeof(void*) * ax25_maxdev); /* Pointers */
-
-       /* Fill in our line protocol discipline, and register it */
-       ax_ldisc.magic          = TTY_LDISC_MAGIC;
-       ax_ldisc.name           = "mkiss";
-       ax_ldisc.open           = ax25_open;
-       ax_ldisc.close          = ax25_close;
-       ax_ldisc.ioctl          = (int (*)(struct tty_struct *, struct file *,
-                                       unsigned int, unsigned long))ax25_disp_ioctl;
-       ax_ldisc.receive_buf    = ax25_receive_buf;
-       ax_ldisc.receive_room   = ax25_receive_room;
-       ax_ldisc.write_wakeup   = ax25_write_wakeup;
-
-       if ((status = tty_register_ldisc(N_AX25, &ax_ldisc)) != 0) {
-               printk(KERN_ERR "mkiss: can't register line discipline (err = %d)\n", status);
-               kfree(ax25_ctrls);
-       }
         return status;
  }
  
+static const char msg_unregfail[] __exitdata = KERN_ERR \
+       "mkiss: can't unregister line discipline (err = %d)\n";
+
  static void __exit mkiss_exit_driver(void)
  {
-       int i;
-
-       for (i = 0; i < ax25_maxdev; i++) {
-               if (ax25_ctrls[i]) {
-                       /*
-                       * VSV = if dev->start==0, then device
-                       * unregistered while close proc.
-                       */
-                       if (netif_running(&ax25_ctrls[i]->dev))
-                               unregister_netdev(&ax25_ctrls[i]->dev);
-                       kfree(ax25_ctrls[i]);
-               }
-       }
+       int ret;
  
-       kfree(ax25_ctrls);
-       ax25_ctrls = NULL;
-
-       if ((i = tty_unregister_ldisc(N_AX25)))
-               printk(KERN_ERR "mkiss: can't unregister line discipline (err = %d)\n", i);
+       if ((ret = tty_unregister_ldisc(N_AX25)))
+               printk(msg_unregfail, ret);
  }
  
-MODULE_AUTHOR("Hans Albas PE1AYX <hans@esrac.ele.tue.nl>");
+MODULE_AUTHOR("Ralf Baechle DL5RB <ralf@linux-mips.org>");
  MODULE_DESCRIPTION("KISS driver for AX.25 over TTYs");
-MODULE_PARM(ax25_maxdev, "i");
-MODULE_PARM_DESC(ax25_maxdev, "number of MKISS devices");
  MODULE_LICENSE("GPL");
  MODULE_ALIAS_LDISC(N_AX25);
+
  module_init(mkiss_init_driver);
  module_exit(mkiss_exit_driver);
-
diff --git a/drivers/net/ibm_emac/ibm_emac_core.c b/drivers/net/ibm_emac/ibm_emac_core.c

index 6482d994d4899aef539f2639bd70703c306d3c90..0de3bb9061741bca16cf853d89941b27355354a9 100644 (file)
--- a/drivers/net/ibm_emac/ibm_emac_core.c
+++ b/drivers/net/ibm_emac/ibm_emac_core.c
@@ -1253,7 +1253,7 @@ static int emac_init_tah(struct ocp_enet_private *fep)
                  TAH_MR_CVR | TAH_MR_ST_768 | TAH_MR_TFS_10KB | TAH_MR_DTFP |
                  TAH_MR_DIG);
  
-       iounmap(&tahp);
+       iounmap(tahp);
  
         return 0;
  }
@@ -1712,11 +1712,10 @@ struct mal_commac_ops emac_commac_ops = {
  };
  
  #ifdef CONFIG_NET_POLL_CONTROLLER
-static int emac_netpoll(struct net_device *ndev)
+static void emac_netpoll(struct net_device *ndev)
  {
         emac_rxeob_dev((void *)ndev, 0);
         emac_txeob_dev((void *)ndev, 0);
-       return 0;
  }
  #endif
  
diff --git a/drivers/net/ibmveth.c b/drivers/net/ibmveth.c

index c39b0609742a3394754b2cf70e2841305bc2a8c8..32d5fabd4b1019a2c26378f72dbd7d6d518fb2e4 100644 (file)
--- a/drivers/net/ibmveth.c
+++ b/drivers/net/ibmveth.c
@@ -1144,7 +1144,7 @@ static void ibmveth_proc_unregister_driver(void)
  
  static struct vio_device_id ibmveth_device_table[] __devinitdata= {
         { "network", "IBM,l-lan"},
-       { 0,}
+       { "", "" }
  };
  
  MODULE_DEVICE_TABLE(vio, ibmveth_device_table);
diff --git a/drivers/net/ioc3-eth.c b/drivers/net/ioc3-eth.c

index d520b5920d6cc866053dc23b3df7887839f52f4f..49e5467bdd7336a34dc9cb5bd5928291c683b408 100644 (file)
--- a/drivers/net/ioc3-eth.c
+++ b/drivers/net/ioc3-eth.c
@@ -499,7 +499,7 @@ static int ioc3_mdio_read(struct net_device *dev, int phy, int reg)
         ioc3_w_micr((phy << MICR_PHYADDR_SHIFT) | reg | MICR_READTRIG);
         while (ioc3_r_micr() & MICR_BUSY);
  
-       return ioc3_r_micr() & MIDR_DATA_MASK;
+       return ioc3_r_midr_r() & MIDR_DATA_MASK;
  }
  
  static void ioc3_mdio_write(struct net_device *dev, int phy, int reg, int data)
@@ -1291,7 +1291,6 @@ static int ioc3_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
         dev->features           = NETIF_F_IP_CSUM;
  #endif
  
-       ioc3_setup_duplex(ip);
         sw_physid1 = ioc3_mdio_read(dev, ip->mii.phy_id, MII_PHYSID1);
         sw_physid2 = ioc3_mdio_read(dev, ip->mii.phy_id, MII_PHYSID2);
  
@@ -1300,6 +1299,7 @@ static int ioc3_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
                 goto out_stop;
  
         mii_check_media(&ip->mii, 1, 1);
+       ioc3_setup_duplex(ip);
  
         vendor = (sw_physid1 << 12) | (sw_physid2 >> 4);
         model  = (sw_physid2 >> 4) & 0x3f;
@@ -1524,7 +1524,7 @@ static void ioc3_get_drvinfo (struct net_device *dev,
         struct ethtool_drvinfo *info)
  {
         struct ioc3_private *ip = netdev_priv(dev);
-                                                                                
+
          strcpy (info->driver, IOC3_NAME);
          strcpy (info->version, IOC3_VERSION);
          strcpy (info->bus_info, pci_name(ip->pdev));
@@ -1550,7 +1550,7 @@ static int ioc3_set_settings(struct net_device *dev, struct ethtool_cmd *cmd)
         spin_lock_irq(&ip->ioc3_lock);
         rc = mii_ethtool_sset(&ip->mii, cmd);
         spin_unlock_irq(&ip->ioc3_lock);
-                                                                        
+
         return rc;
  }
  
diff --git a/drivers/net/iseries_veth.c b/drivers/net/iseries_veth.c

index 55af32e9bf082ad7f8d88b96b2e068eb0e35304f..183ba97785b0dbadeff6745838d254845fa5a6b1 100644 (file)
--- a/drivers/net/iseries_veth.c
+++ b/drivers/net/iseries_veth.c
@@ -1370,7 +1370,7 @@ static int veth_probe(struct vio_dev *vdev, const struct vio_device_id *id)
   */
  static struct vio_device_id veth_device_table[] __devinitdata = {
         { "vlan", "" },
-       { NULL, NULL }
+       { "", "" }
  };
  MODULE_DEVICE_TABLE(vio, veth_device_table);
  
diff --git a/drivers/net/ixgb/ixgb.h b/drivers/net/ixgb/ixgb.h

index f8d3385c7842d3fe6f34b6d672da43353ce139a4..c83271b386215cefef859df73572f34bc535fa17 100644 (file)
--- a/drivers/net/ixgb/ixgb.h
+++ b/drivers/net/ixgb/ixgb.h
@@ -119,7 +119,7 @@ struct ixgb_adapter;
   * so a DMA handle can be stored along with the buffer */
  struct ixgb_buffer {
         struct sk_buff *skb;
-       uint64_t dma;
+       dma_addr_t dma;
         unsigned long time_stamp;
         uint16_t length;
         uint16_t next_to_watch;
diff --git a/drivers/net/ixgb/ixgb_ee.c b/drivers/net/ixgb/ixgb_ee.c

index 3aae110c55606ced13769acddf7d87c2649d1a4f..661a46b95a61e4a73a88f1774d8a3cc3564d8734 100644 (file)
--- a/drivers/net/ixgb/ixgb_ee.c
+++ b/drivers/net/ixgb/ixgb_ee.c
@@ -565,24 +565,6 @@ ixgb_get_ee_mac_addr(struct ixgb_hw *hw,
         }
  }
  
-/******************************************************************************
- * return the compatibility flags from EEPROM
- *
- * hw - Struct containing variables accessed by shared code
- *
- * Returns:
- *          compatibility flags if EEPROM contents are valid, 0 otherwise
- ******************************************************************************/
-uint16_t
-ixgb_get_ee_compatibility(struct ixgb_hw *hw)
-{
-       struct ixgb_ee_map_type *ee_map = (struct ixgb_ee_map_type *)hw->eeprom;
-
-       if(ixgb_check_and_get_eeprom_data(hw) == TRUE)
-               return (le16_to_cpu(ee_map->compatibility));
-
-       return(0);
-}
  
  /******************************************************************************
   * return the Printed Board Assembly number from EEPROM
@@ -602,81 +584,6 @@ ixgb_get_ee_pba_number(struct ixgb_hw *hw)
         return(0);
  }
  
-/******************************************************************************
- * return the Initialization Control Word 1 from EEPROM
- *
- * hw - Struct containing variables accessed by shared code
- *
- * Returns:
- *          Initialization Control Word 1 if EEPROM contents are valid, 0 otherwise
- ******************************************************************************/
-uint16_t
-ixgb_get_ee_init_ctrl_reg_1(struct ixgb_hw *hw)
-{
-       struct ixgb_ee_map_type *ee_map = (struct ixgb_ee_map_type *)hw->eeprom;
-
-       if(ixgb_check_and_get_eeprom_data(hw) == TRUE)
-               return (le16_to_cpu(ee_map->init_ctrl_reg_1));
-
-       return(0);
-}
-
-/******************************************************************************
- * return the Initialization Control Word 2 from EEPROM
- *
- * hw - Struct containing variables accessed by shared code
- *
- * Returns:
- *          Initialization Control Word 2 if EEPROM contents are valid, 0 otherwise
- ******************************************************************************/
-uint16_t
-ixgb_get_ee_init_ctrl_reg_2(struct ixgb_hw *hw)
-{
-       struct ixgb_ee_map_type *ee_map = (struct ixgb_ee_map_type *)hw->eeprom;
-
-       if(ixgb_check_and_get_eeprom_data(hw) == TRUE)
-               return (le16_to_cpu(ee_map->init_ctrl_reg_2));
-
-       return(0);
-}
-
-/******************************************************************************
- * return the Subsystem Id from EEPROM
- *
- * hw - Struct containing variables accessed by shared code
- *
- * Returns:
- *          Subsystem Id if EEPROM contents are valid, 0 otherwise
- ******************************************************************************/
-uint16_t
-ixgb_get_ee_subsystem_id(struct ixgb_hw *hw)
-{
-       struct ixgb_ee_map_type *ee_map = (struct ixgb_ee_map_type *)hw->eeprom;
-
-       if(ixgb_check_and_get_eeprom_data(hw) == TRUE)
-               return (le16_to_cpu(ee_map->subsystem_id));
-
-       return(0);
-}
-
-/******************************************************************************
- * return the Sub Vendor Id from EEPROM
- *
- * hw - Struct containing variables accessed by shared code
- *
- * Returns:
- *          Sub Vendor Id if EEPROM contents are valid, 0 otherwise
- ******************************************************************************/
-uint16_t
-ixgb_get_ee_subvendor_id(struct ixgb_hw *hw)
-{
-       struct ixgb_ee_map_type *ee_map = (struct ixgb_ee_map_type *)hw->eeprom;
-
-       if(ixgb_check_and_get_eeprom_data(hw) == TRUE)
-               return (le16_to_cpu(ee_map->subvendor_id));
-
-       return(0);
-}
  
  /******************************************************************************
   * return the Device Id from EEPROM
@@ -694,81 +601,6 @@ ixgb_get_ee_device_id(struct ixgb_hw *hw)
         if(ixgb_check_and_get_eeprom_data(hw) == TRUE)
                 return (le16_to_cpu(ee_map->device_id));
  
-       return(0);
-}
-
-/******************************************************************************
- * return the Vendor Id from EEPROM
- *
- * hw - Struct containing variables accessed by shared code
- *
- * Returns:
- *          Device Id if EEPROM contents are valid, 0 otherwise
- ******************************************************************************/
-uint16_t
-ixgb_get_ee_vendor_id(struct ixgb_hw *hw)
-{
-       struct ixgb_ee_map_type *ee_map = (struct ixgb_ee_map_type *)hw->eeprom;
-
-       if(ixgb_check_and_get_eeprom_data(hw) == TRUE)
-               return (le16_to_cpu(ee_map->vendor_id));
-
-       return(0);
-}
-
-/******************************************************************************
- * return the Software Defined Pins Register from EEPROM
- *
- * hw - Struct containing variables accessed by shared code
- *
- * Returns:
- *          SDP Register if EEPROM contents are valid, 0 otherwise
- ******************************************************************************/
-uint16_t
-ixgb_get_ee_swdpins_reg(struct ixgb_hw *hw)
-{
-       struct ixgb_ee_map_type *ee_map = (struct ixgb_ee_map_type *)hw->eeprom;
-
-       if(ixgb_check_and_get_eeprom_data(hw) == TRUE)
-               return (le16_to_cpu(ee_map->swdpins_reg));
-
-       return(0);
+       return (0);
  }
  
-/******************************************************************************
- * return the D3 Power Management Bits from EEPROM
- *
- * hw - Struct containing variables accessed by shared code
- *
- * Returns:
- *          D3 Power Management Bits if EEPROM contents are valid, 0 otherwise
- ******************************************************************************/
-uint8_t
-ixgb_get_ee_d3_power(struct ixgb_hw *hw)
-{
-       struct ixgb_ee_map_type *ee_map = (struct ixgb_ee_map_type *)hw->eeprom;
-
-       if(ixgb_check_and_get_eeprom_data(hw) == TRUE)
-               return (le16_to_cpu(ee_map->d3_power));
-
-       return(0);
-}
-
-/******************************************************************************
- * return the D0 Power Management Bits from EEPROM
- *
- * hw - Struct containing variables accessed by shared code
- *
- * Returns:
- *          D0 Power Management Bits if EEPROM contents are valid, 0 otherwise
- ******************************************************************************/
-uint8_t
-ixgb_get_ee_d0_power(struct ixgb_hw *hw)
-{
-       struct ixgb_ee_map_type *ee_map = (struct ixgb_ee_map_type *)hw->eeprom;
-
-       if(ixgb_check_and_get_eeprom_data(hw) == TRUE)
-               return (le16_to_cpu(ee_map->d0_power));
-
-       return(0);
-}
diff --git a/drivers/net/ixgb/ixgb_ethtool.c b/drivers/net/ixgb/ixgb_ethtool.c

index 3fa113854eebc3d27c8cda25e0d5600a30a48e0e..9d026ed77ddd4250660ad117d76c2e319e6d3eda 100644 (file)
--- a/drivers/net/ixgb/ixgb_ethtool.c
+++ b/drivers/net/ixgb/ixgb_ethtool.c
@@ -98,10 +98,10 @@ static struct ixgb_stats ixgb_gstrings_stats[] = {
  static int
  ixgb_get_settings(struct net_device *netdev, struct ethtool_cmd *ecmd)
  {
-       struct ixgb_adapter *adapter = netdev->priv;
+       struct ixgb_adapter *adapter = netdev_priv(netdev);
  
         ecmd->supported = (SUPPORTED_10000baseT_Full | SUPPORTED_FIBRE);
-       ecmd->advertising = (SUPPORTED_10000baseT_Full | SUPPORTED_FIBRE);
+       ecmd->advertising = (ADVERTISED_10000baseT_Full | ADVERTISED_FIBRE);
         ecmd->port = PORT_FIBRE;
         ecmd->transceiver = XCVR_EXTERNAL;
  
@@ -120,7 +120,7 @@ ixgb_get_settings(struct net_device *netdev, struct ethtool_cmd *ecmd)
  static int
  ixgb_set_settings(struct net_device *netdev, struct ethtool_cmd *ecmd)
  {
-       struct ixgb_adapter *adapter = netdev->priv;
+       struct ixgb_adapter *adapter = netdev_priv(netdev);
  
         if(ecmd->autoneg == AUTONEG_ENABLE ||
            ecmd->speed + ecmd->duplex != SPEED_10000 + DUPLEX_FULL)
@@ -130,6 +130,12 @@ ixgb_set_settings(struct net_device *netdev, struct ethtool_cmd *ecmd)
                 ixgb_down(adapter, TRUE);
                 ixgb_reset(adapter);
                 ixgb_up(adapter);
+               /* be optimistic about our link, since we were up before */
+               adapter->link_speed = 10000;
+               adapter->link_duplex = FULL_DUPLEX;
+               netif_carrier_on(netdev);
+               netif_wake_queue(netdev);
+               
         } else
                 ixgb_reset(adapter);
  
@@ -140,7 +146,7 @@ static void
  ixgb_get_pauseparam(struct net_device *netdev,
                          struct ethtool_pauseparam *pause)
  {
-       struct ixgb_adapter *adapter = netdev->priv;
+       struct ixgb_adapter *adapter = netdev_priv(netdev);
         struct ixgb_hw *hw = &adapter->hw;
         
         pause->autoneg = AUTONEG_DISABLE;
@@ -159,7 +165,7 @@ static int
  ixgb_set_pauseparam(struct net_device *netdev,
                          struct ethtool_pauseparam *pause)
  {
-       struct ixgb_adapter *adapter = netdev->priv;
+       struct ixgb_adapter *adapter = netdev_priv(netdev);
         struct ixgb_hw *hw = &adapter->hw;
         
         if(pause->autoneg == AUTONEG_ENABLE)
@@ -177,6 +183,11 @@ ixgb_set_pauseparam(struct net_device *netdev,
         if(netif_running(adapter->netdev)) {
                 ixgb_down(adapter, TRUE);
                 ixgb_up(adapter);
+               /* be optimistic about our link, since we were up before */
+               adapter->link_speed = 10000;
+               adapter->link_duplex = FULL_DUPLEX;
+               netif_carrier_on(netdev);
+               netif_wake_queue(netdev);
         } else
                 ixgb_reset(adapter);
                 
@@ -186,19 +197,26 @@ ixgb_set_pauseparam(struct net_device *netdev,
  static uint32_t
  ixgb_get_rx_csum(struct net_device *netdev)
  {
-       struct ixgb_adapter *adapter = netdev->priv;
+       struct ixgb_adapter *adapter = netdev_priv(netdev);
+
         return adapter->rx_csum;
  }
  
  static int
  ixgb_set_rx_csum(struct net_device *netdev, uint32_t data)
  {
-       struct ixgb_adapter *adapter = netdev->priv;
+       struct ixgb_adapter *adapter = netdev_priv(netdev);
+
         adapter->rx_csum = data;
  
         if(netif_running(netdev)) {
                 ixgb_down(adapter,TRUE);
                 ixgb_up(adapter);
+               /* be optimistic about our link, since we were up before */
+               adapter->link_speed = 10000;
+               adapter->link_duplex = FULL_DUPLEX;
+               netif_carrier_on(netdev);
+               netif_wake_queue(netdev);
         } else
                 ixgb_reset(adapter);
         return 0;
@@ -246,14 +264,15 @@ static void
  ixgb_get_regs(struct net_device *netdev,
                    struct ethtool_regs *regs, void *p)
  {
-       struct ixgb_adapter *adapter = netdev->priv;
+       struct ixgb_adapter *adapter = netdev_priv(netdev);
         struct ixgb_hw *hw = &adapter->hw;
         uint32_t *reg = p;
         uint32_t *reg_start = reg;
         uint8_t i;
  
         /* the 1 (one) below indicates an attempt at versioning, if the
-        * interface in ethtool or the driver this 1 should be incremented */
+        * interface in ethtool or the driver changes, this 1 should be
+        * incremented */
         regs->version = (1<<24) | hw->revision_id << 16 | hw->device_id;
  
         /* General Registers */
@@ -283,7 +302,8 @@ ixgb_get_regs(struct net_device *netdev,
         *reg++ = IXGB_READ_REG(hw, RAIDC);      /*  19 */
         *reg++ = IXGB_READ_REG(hw, RXCSUM);     /*  20 */
  
-       for (i = 0; i < IXGB_RAR_ENTRIES; i++) {
+       /* there are 16 RAR entries in hardware, we only use 3 */
+       for(i = 0; i < 16; i++) {
                 *reg++ = IXGB_READ_REG_ARRAY(hw, RAL, (i << 1)); /*21,...,51 */
                 *reg++ = IXGB_READ_REG_ARRAY(hw, RAH, (i << 1)); /*22,...,52 */
         }
@@ -391,7 +411,7 @@ static int
  ixgb_get_eeprom(struct net_device *netdev,
                   struct ethtool_eeprom *eeprom, uint8_t *bytes)
  {
-       struct ixgb_adapter *adapter = netdev->priv;
+       struct ixgb_adapter *adapter = netdev_priv(netdev);
         struct ixgb_hw *hw = &adapter->hw;
         uint16_t *eeprom_buff;
         int i, max_len, first_word, last_word;
@@ -439,7 +459,7 @@ static int
  ixgb_set_eeprom(struct net_device *netdev,
                   struct ethtool_eeprom *eeprom, uint8_t *bytes)
  {
-       struct ixgb_adapter *adapter = netdev->priv;
+       struct ixgb_adapter *adapter = netdev_priv(netdev);
         struct ixgb_hw *hw = &adapter->hw;
         uint16_t *eeprom_buff;
         void *ptr;
@@ -497,7 +517,7 @@ static void
  ixgb_get_drvinfo(struct net_device *netdev,
                    struct ethtool_drvinfo *drvinfo)
  {
-       struct ixgb_adapter *adapter = netdev->priv;
+       struct ixgb_adapter *adapter = netdev_priv(netdev);
  
         strncpy(drvinfo->driver,  ixgb_driver_name, 32);
         strncpy(drvinfo->version, ixgb_driver_version, 32);
@@ -512,7 +532,7 @@ static void
  ixgb_get_ringparam(struct net_device *netdev,
                 struct ethtool_ringparam *ring)
  {
-       struct ixgb_adapter *adapter = netdev->priv;
+       struct ixgb_adapter *adapter = netdev_priv(netdev);
         struct ixgb_desc_ring *txdr = &adapter->tx_ring;
         struct ixgb_desc_ring *rxdr = &adapter->rx_ring;
  
@@ -530,7 +550,7 @@ static int
  ixgb_set_ringparam(struct net_device *netdev,
                 struct ethtool_ringparam *ring)
  {
-       struct ixgb_adapter *adapter = netdev->priv;
+       struct ixgb_adapter *adapter = netdev_priv(netdev);
         struct ixgb_desc_ring *txdr = &adapter->tx_ring;
         struct ixgb_desc_ring *rxdr = &adapter->rx_ring;
         struct ixgb_desc_ring tx_old, tx_new, rx_old, rx_new;
@@ -573,6 +593,11 @@ ixgb_set_ringparam(struct net_device *netdev,
                 adapter->tx_ring = tx_new;
                 if((err = ixgb_up(adapter)))
                         return err;
+               /* be optimistic about our link, since we were up before */
+               adapter->link_speed = 10000;
+               adapter->link_duplex = FULL_DUPLEX;
+               netif_carrier_on(netdev);
+               netif_wake_queue(netdev);
         }
  
         return 0;
@@ -607,7 +632,7 @@ ixgb_led_blink_callback(unsigned long data)
  static int
  ixgb_phys_id(struct net_device *netdev, uint32_t data)
  {
-       struct ixgb_adapter *adapter = netdev->priv;
+       struct ixgb_adapter *adapter = netdev_priv(netdev);
  
         if(!data || data > (uint32_t)(MAX_SCHEDULE_TIMEOUT / HZ))
                 data = (uint32_t)(MAX_SCHEDULE_TIMEOUT / HZ);
@@ -643,7 +668,7 @@ static void
  ixgb_get_ethtool_stats(struct net_device *netdev, 
                 struct ethtool_stats *stats, uint64_t *data)
  {
-       struct ixgb_adapter *adapter = netdev->priv;
+       struct ixgb_adapter *adapter = netdev_priv(netdev);
         int i;
  
         ixgb_update_stats(adapter);
diff --git a/drivers/net/ixgb/ixgb_hw.h b/drivers/net/ixgb/ixgb_hw.h

index 97898efe7cc8075698ee51f7f35e7cd6a4cd63d1..8bcf31ed10c221ad5609f57f6fecc155d5fbc81a 100644 (file)
--- a/drivers/net/ixgb/ixgb_hw.h
+++ b/drivers/net/ixgb/ixgb_hw.h
@@ -822,17 +822,8 @@ extern void ixgb_clear_vfta(struct ixgb_hw *hw);
  
  /* Access functions to eeprom data */
  void ixgb_get_ee_mac_addr(struct ixgb_hw *hw, uint8_t *mac_addr);
-uint16_t ixgb_get_ee_compatibility(struct ixgb_hw *hw);
  uint32_t ixgb_get_ee_pba_number(struct ixgb_hw *hw);
-uint16_t ixgb_get_ee_init_ctrl_reg_1(struct ixgb_hw *hw);
-uint16_t ixgb_get_ee_init_ctrl_reg_2(struct ixgb_hw *hw);
-uint16_t ixgb_get_ee_subsystem_id(struct ixgb_hw *hw);
-uint16_t ixgb_get_ee_subvendor_id(struct ixgb_hw *hw);
  uint16_t ixgb_get_ee_device_id(struct ixgb_hw *hw);
-uint16_t ixgb_get_ee_vendor_id(struct ixgb_hw *hw);
-uint16_t ixgb_get_ee_swdpins_reg(struct ixgb_hw *hw);
-uint8_t ixgb_get_ee_d3_power(struct ixgb_hw *hw);
-uint8_t ixgb_get_ee_d0_power(struct ixgb_hw *hw);
  boolean_t ixgb_get_eeprom_data(struct ixgb_hw *hw);
  uint16_t ixgb_get_eeprom_word(struct ixgb_hw *hw, uint16_t index);
  
diff --git a/drivers/net/ixgb/ixgb_main.c b/drivers/net/ixgb/ixgb_main.c

index 097b90ccf575b2d17e8f6f67458bcd6003dba96c..5c555373adbe5802054b2ed3558219269d0e9ca5 100644 (file)
--- a/drivers/net/ixgb/ixgb_main.c
+++ b/drivers/net/ixgb/ixgb_main.c
@@ -29,6 +29,11 @@
  #include "ixgb.h"
  
  /* Change Log
+ * 1.0.96 04/19/05
+ * - Make needlessly global code static -- bunk@stusta.de
+ * - ethtool cleanup -- shemminger@osdl.org
+ * - Support for MODULE_VERSION -- linville@tuxdriver.com
+ * - add skb_header_cloned check to the tso path -- herbert@apana.org.au
   * 1.0.88 01/05/05
   * - include fix to the condition that determines when to quit NAPI - Robert Olsson
   * - use netif_poll_{disable/enable} to synchronize between NAPI and i/f up/down
@@ -47,10 +52,9 @@ char ixgb_driver_string[] = "Intel(R) PRO/10GbE Network Driver";
  #else
  #define DRIVERNAPI "-NAPI"
  #endif
-
-#define DRV_VERSION "1.0.95-k2"DRIVERNAPI
+#define DRV_VERSION            "1.0.100-k2"DRIVERNAPI
  char ixgb_driver_version[] = DRV_VERSION;
-char ixgb_copyright[] = "Copyright (c) 1999-2005 Intel Corporation.";
+static char ixgb_copyright[] = "Copyright (c) 1999-2005 Intel Corporation.";
  
  /* ixgb_pci_tbl - PCI Device ID Table
   *
@@ -145,10 +149,12 @@ MODULE_LICENSE("GPL");
  MODULE_VERSION(DRV_VERSION);
  
  /* some defines for controlling descriptor fetches in h/w */
-#define RXDCTL_PTHRESH_DEFAULT 128     /* chip considers prefech below this */
-#define RXDCTL_HTHRESH_DEFAULT 16      /* chip will only prefetch if tail is 
-                                          pushed this many descriptors from head */
  #define RXDCTL_WTHRESH_DEFAULT 16      /* chip writes back at this many or RXT0 */
+#define RXDCTL_PTHRESH_DEFAULT 0               /* chip considers prefech below
+                                                * this */
+#define RXDCTL_HTHRESH_DEFAULT 0               /* chip will only prefetch if tail
+                                                * is pushed this many descriptors
+                                                * from head */
  
  /**
   * ixgb_init_module - Driver Registration Routine
@@ -376,7 +382,7 @@ ixgb_probe(struct pci_dev *pdev,
         SET_NETDEV_DEV(netdev, &pdev->dev);
  
         pci_set_drvdata(pdev, netdev);
-       adapter = netdev->priv;
+       adapter = netdev_priv(netdev);
         adapter->netdev = netdev;
         adapter->pdev = pdev;
         adapter->hw.back = adapter;
@@ -512,7 +518,7 @@ static void __devexit
  ixgb_remove(struct pci_dev *pdev)
  {
         struct net_device *netdev = pci_get_drvdata(pdev);
-       struct ixgb_adapter *adapter = netdev->priv;
+       struct ixgb_adapter *adapter = netdev_priv(netdev);
  
         unregister_netdev(netdev);
  
@@ -583,7 +589,7 @@ ixgb_sw_init(struct ixgb_adapter *adapter)
  static int
  ixgb_open(struct net_device *netdev)
  {
-       struct ixgb_adapter *adapter = netdev->priv;
+       struct ixgb_adapter *adapter = netdev_priv(netdev);
         int err;
  
         /* allocate transmit descriptors */
@@ -626,7 +632,7 @@ err_setup_tx:
  static int
  ixgb_close(struct net_device *netdev)
  {
-       struct ixgb_adapter *adapter = netdev->priv;
+       struct ixgb_adapter *adapter = netdev_priv(netdev);
  
         ixgb_down(adapter, TRUE);
  
@@ -1017,7 +1023,7 @@ ixgb_clean_rx_ring(struct ixgb_adapter *adapter)
  static int
  ixgb_set_mac(struct net_device *netdev, void *p)
  {
-       struct ixgb_adapter *adapter = netdev->priv;
+       struct ixgb_adapter *adapter = netdev_priv(netdev);
         struct sockaddr *addr = p;
  
         if(!is_valid_ether_addr(addr->sa_data))
@@ -1043,7 +1049,7 @@ ixgb_set_mac(struct net_device *netdev, void *p)
  static void
  ixgb_set_multi(struct net_device *netdev)
  {
-       struct ixgb_adapter *adapter = netdev->priv;
+       struct ixgb_adapter *adapter = netdev_priv(netdev);
         struct ixgb_hw *hw = &adapter->hw;
         struct dev_mc_list *mc_ptr;
         uint32_t rctl;
@@ -1371,7 +1377,7 @@ ixgb_tx_queue(struct ixgb_adapter *adapter, int count, int vlan_id,int tx_flags)
  static int
  ixgb_xmit_frame(struct sk_buff *skb, struct net_device *netdev)
  {
-       struct ixgb_adapter *adapter = netdev->priv;
+       struct ixgb_adapter *adapter = netdev_priv(netdev);
         unsigned int first;
         unsigned int tx_flags = 0;
         unsigned long flags;
@@ -1425,7 +1431,7 @@ ixgb_xmit_frame(struct sk_buff *skb, struct net_device *netdev)
  static void
  ixgb_tx_timeout(struct net_device *netdev)
  {
-       struct ixgb_adapter *adapter = netdev->priv;
+       struct ixgb_adapter *adapter = netdev_priv(netdev);
  
         /* Do the reset outside of interrupt context */
         schedule_work(&adapter->tx_timeout_task);
@@ -1434,7 +1440,7 @@ ixgb_tx_timeout(struct net_device *netdev)
  static void
  ixgb_tx_timeout_task(struct net_device *netdev)
  {
-       struct ixgb_adapter *adapter = netdev->priv;
+       struct ixgb_adapter *adapter = netdev_priv(netdev);
  
         ixgb_down(adapter, TRUE);
         ixgb_up(adapter);
@@ -1451,7 +1457,7 @@ ixgb_tx_timeout_task(struct net_device *netdev)
  static struct net_device_stats *
  ixgb_get_stats(struct net_device *netdev)
  {
-       struct ixgb_adapter *adapter = netdev->priv;
+       struct ixgb_adapter *adapter = netdev_priv(netdev);
  
         return &adapter->net_stats;
  }
@@ -1467,7 +1473,7 @@ ixgb_get_stats(struct net_device *netdev)
  static int
  ixgb_change_mtu(struct net_device *netdev, int new_mtu)
  {
-       struct ixgb_adapter *adapter = netdev->priv;
+       struct ixgb_adapter *adapter = netdev_priv(netdev);
         int max_frame = new_mtu + ENET_HEADER_SIZE + ENET_FCS_LENGTH;
         int old_max_frame = netdev->mtu + ENET_HEADER_SIZE + ENET_FCS_LENGTH;
  
@@ -1522,7 +1528,8 @@ ixgb_update_stats(struct ixgb_adapter *adapter)
  
                 multi |= ((u64)IXGB_READ_REG(&adapter->hw, MPRCH) << 32);
                 /* fix up multicast stats by removing broadcasts */
-               multi -= bcast;
+               if(multi >= bcast)
+                       multi -= bcast;
                 
                 adapter->stats.mprcl += (multi & 0xFFFFFFFF);
                 adapter->stats.mprch += (multi >> 32);
@@ -1641,7 +1648,7 @@ static irqreturn_t
  ixgb_intr(int irq, void *data, struct pt_regs *regs)
  {
         struct net_device *netdev = data;
-       struct ixgb_adapter *adapter = netdev->priv;
+       struct ixgb_adapter *adapter = netdev_priv(netdev);
         struct ixgb_hw *hw = &adapter->hw;
         uint32_t icr = IXGB_READ_REG(hw, ICR);
  #ifndef CONFIG_IXGB_NAPI
@@ -1688,7 +1695,7 @@ ixgb_intr(int irq, void *data, struct pt_regs *regs)
  static int
  ixgb_clean(struct net_device *netdev, int *budget)
  {
-       struct ixgb_adapter *adapter = netdev->priv;
+       struct ixgb_adapter *adapter = netdev_priv(netdev);
         int work_to_do = min(*budget, netdev->quota);
         int tx_cleaned;
         int work_done = 0;
@@ -2017,7 +2024,7 @@ ixgb_alloc_rx_buffers(struct ixgb_adapter *adapter)
  static void
  ixgb_vlan_rx_register(struct net_device *netdev, struct vlan_group *grp)
  {
-       struct ixgb_adapter *adapter = netdev->priv;
+       struct ixgb_adapter *adapter = netdev_priv(netdev);
         uint32_t ctrl, rctl;
  
         ixgb_irq_disable(adapter);
@@ -2055,7 +2062,7 @@ ixgb_vlan_rx_register(struct net_device *netdev, struct vlan_group *grp)
  static void
  ixgb_vlan_rx_add_vid(struct net_device *netdev, uint16_t vid)
  {
-       struct ixgb_adapter *adapter = netdev->priv;
+       struct ixgb_adapter *adapter = netdev_priv(netdev);
         uint32_t vfta, index;
  
         /* add VID to filter table */
@@ -2069,7 +2076,7 @@ ixgb_vlan_rx_add_vid(struct net_device *netdev, uint16_t vid)
  static void
  ixgb_vlan_rx_kill_vid(struct net_device *netdev, uint16_t vid)
  {
-       struct ixgb_adapter *adapter = netdev->priv;
+       struct ixgb_adapter *adapter = netdev_priv(netdev);
         uint32_t vfta, index;
  
         ixgb_irq_disable(adapter);
diff --git a/drivers/net/jazzsonic.c b/drivers/net/jazzsonic.c

index 7fec613e1675496167be6fb2c7777209d7e0fe84..8423cb6875f06d75ac38f18881199a5f2b8acb41 100644 (file)
--- a/drivers/net/jazzsonic.c
+++ b/drivers/net/jazzsonic.c
@@ -1,5 +1,10 @@
  /*
- * sonic.c
+ * jazzsonic.c
+ *
+ * (C) 2005 Finn Thain
+ *
+ * Converted to DMA API, and (from the mac68k project) introduced
+ * dhd's support for 16-bit cards.
   *
   * (C) 1996,1998 by Thomas Bogendoerfer (tsbogend@alpha.franken.de)
   * 
@@ -28,8 +33,8 @@
  #include <linux/netdevice.h>
  #include <linux/etherdevice.h>
  #include <linux/skbuff.h>
-#include <linux/bitops.h>
  #include <linux/device.h>
+#include <linux/dma-mapping.h>
  
  #include <asm/bootinfo.h>
  #include <asm/system.h>
@@ -44,22 +49,20 @@ static struct platform_device *jazz_sonic_device;
  
  #define SONIC_MEM_SIZE 0x100
  
-#define SREGS_PAD(n)    u16 n;
-
  #include "sonic.h"
  
  /*
   * Macros to access SONIC registers
   */
-#define SONIC_READ(reg) (*((volatile unsigned int *)base_addr+reg))
+#define SONIC_READ(reg) (*((volatile unsigned int *)dev->base_addr+reg))
  
  #define SONIC_WRITE(reg,val)                                           \
  do {                                                                   \
-       *((volatile unsigned int *)base_addr+(reg)) = (val);            \
+       *((volatile unsigned int *)dev->base_addr+(reg)) = (val);               \
  } while (0)
  
  
-/* use 0 for production, 1 for verification, >2 for debug */
+/* use 0 for production, 1 for verification, >1 for debug */
  #ifdef SONIC_DEBUG
  static unsigned int sonic_debug = SONIC_DEBUG;
  #else 
@@ -85,18 +88,18 @@ static unsigned short known_revisions[] =
         0xffff                  /* end of list */
  };
  
-static int __init sonic_probe1(struct net_device *dev, unsigned long base_addr,
-                               unsigned int irq)
+static int __init sonic_probe1(struct net_device *dev)
  {
         static unsigned version_printed;
         unsigned int silicon_revision;
         unsigned int val;
-       struct sonic_local *lp;
+       struct sonic_local *lp = netdev_priv(dev);
         int err = -ENODEV;
         int i;
  
-       if (!request_mem_region(base_addr, SONIC_MEM_SIZE, jazz_sonic_string))
+       if (!request_mem_region(dev->base_addr, SONIC_MEM_SIZE, jazz_sonic_string))
                 return -EBUSY;
+
         /*
          * get the Silicon Revision ID. If this is one of the known
          * one assume that we found a SONIC ethernet controller at
@@ -120,11 +123,7 @@ static int __init sonic_probe1(struct net_device *dev, unsigned long base_addr,
         if (sonic_debug  &&  version_printed++ == 0)
                 printk(version);
  
-       printk("%s: Sonic ethernet found at 0x%08lx, ", dev->name, base_addr);
-
-       /* Fill in the 'dev' fields. */
-       dev->base_addr = base_addr;
-       dev->irq = irq;
+       printk(KERN_INFO "%s: Sonic ethernet found at 0x%08lx, ", lp->device->bus_id, dev->base_addr);
  
         /*
          * Put the sonic into software reset, then
@@ -138,84 +137,44 @@ static int __init sonic_probe1(struct net_device *dev, unsigned long base_addr,
                 dev->dev_addr[i*2+1] = val >> 8;
         }
  
-       printk("HW Address ");
-       for (i = 0; i < 6; i++) {
-               printk("%2.2x", dev->dev_addr[i]);
-               if (i<5)
-                       printk(":");
-       }
-
-       printk(" IRQ %d\n", irq);
-
         err = -ENOMEM;
      
         /* Initialize the device structure. */
-       if (dev->priv == NULL) {
-               /*
-                * the memory be located in the same 64kb segment
-                */
-               lp = NULL;
-               i = 0;
-               do {
-                       lp = kmalloc(sizeof(*lp), GFP_KERNEL);
-                       if ((unsigned long) lp >> 16
-                           != ((unsigned long)lp + sizeof(*lp) ) >> 16) {
-                               /* FIXME, free the memory later */
-                               kfree(lp);
-                               lp = NULL;
-                       }
-               } while (lp == NULL && i++ < 20);
-
-               if (lp == NULL) {
-                       printk("%s: couldn't allocate memory for descriptors\n",
-                              dev->name);
-                       goto out;
-               }
  
-               memset(lp, 0, sizeof(struct sonic_local));
-
-               /* get the virtual dma address */
-               lp->cda_laddr = vdma_alloc(CPHYSADDR(lp),sizeof(*lp));
-               if (lp->cda_laddr == ~0UL) {
-                       printk("%s: couldn't get DMA page entry for "
-                              "descriptors\n", dev->name);
-                       goto out1;
-               }
-
-               lp->tda_laddr = lp->cda_laddr + sizeof (lp->cda);
-               lp->rra_laddr = lp->tda_laddr + sizeof (lp->tda);
-               lp->rda_laddr = lp->rra_laddr + sizeof (lp->rra);
-       
-               /* allocate receive buffer area */
-               /* FIXME, maybe we should use skbs */
-               lp->rba = kmalloc(SONIC_NUM_RRS * SONIC_RBSIZE, GFP_KERNEL);
-               if (!lp->rba) {
-                       printk("%s: couldn't allocate receive buffers\n",
-                              dev->name);
-                       goto out2;
-               }
+       lp->dma_bitmode = SONIC_BITMODE32;
  
-               /* get virtual dma address */
-               lp->rba_laddr = vdma_alloc(CPHYSADDR(lp->rba),
-                                          SONIC_NUM_RRS * SONIC_RBSIZE);
-               if (lp->rba_laddr == ~0UL) {
-                       printk("%s: couldn't get DMA page entry for receive "
-                              "buffers\n",dev->name);
-                       goto out3;
-               }
-
-               /* now convert pointer to KSEG1 pointer */
-               lp->rba = (char *)KSEG1ADDR(lp->rba);
-               flush_cache_all();
-               dev->priv = (struct sonic_local *)KSEG1ADDR(lp);
+       /* Allocate the entire chunk of memory for the descriptors.
+           Note that this cannot cross a 64K boundary. */
+       if ((lp->descriptors = dma_alloc_coherent(lp->device,
+                               SIZEOF_SONIC_DESC * SONIC_BUS_SCALE(lp->dma_bitmode),
+                               &lp->descriptors_laddr, GFP_KERNEL)) == NULL) {
+               printk(KERN_ERR "%s: couldn't alloc DMA memory for descriptors.\n", lp->device->bus_id);
+               goto out;
         }
  
-       lp = (struct sonic_local *)dev->priv;
+       /* Now set up the pointers to point to the appropriate places */
+       lp->cda = lp->descriptors;
+       lp->tda = lp->cda + (SIZEOF_SONIC_CDA
+                            * SONIC_BUS_SCALE(lp->dma_bitmode));
+       lp->rda = lp->tda + (SIZEOF_SONIC_TD * SONIC_NUM_TDS
+                            * SONIC_BUS_SCALE(lp->dma_bitmode));
+       lp->rra = lp->rda + (SIZEOF_SONIC_RD * SONIC_NUM_RDS
+                            * SONIC_BUS_SCALE(lp->dma_bitmode));
+
+       lp->cda_laddr = lp->descriptors_laddr;
+       lp->tda_laddr = lp->cda_laddr + (SIZEOF_SONIC_CDA
+                            * SONIC_BUS_SCALE(lp->dma_bitmode));
+       lp->rda_laddr = lp->tda_laddr + (SIZEOF_SONIC_TD * SONIC_NUM_TDS
+                            * SONIC_BUS_SCALE(lp->dma_bitmode));
+       lp->rra_laddr = lp->rda_laddr + (SIZEOF_SONIC_RD * SONIC_NUM_RDS
+                            * SONIC_BUS_SCALE(lp->dma_bitmode));
+
         dev->open = sonic_open;
         dev->stop = sonic_close;
         dev->hard_start_xmit = sonic_send_packet;
-       dev->get_stats  = sonic_get_stats;
+       dev->get_stats = sonic_get_stats;
         dev->set_multicast_list = &sonic_multicast_list;
+       dev->tx_timeout = sonic_tx_timeout;
         dev->watchdog_timeo = TX_TIMEOUT;
  
         /*
@@ -226,14 +185,8 @@ static int __init sonic_probe1(struct net_device *dev, unsigned long base_addr,
         SONIC_WRITE(SONIC_MPT,0xffff);
  
         return 0;
-out3:
-       kfree(lp->rba);
-out2:
-       vdma_free(lp->cda_laddr);
-out1:
-       kfree(lp);
  out:
-       release_region(base_addr, SONIC_MEM_SIZE);
+       release_region(dev->base_addr, SONIC_MEM_SIZE);
         return err;
  }
  
@@ -245,7 +198,6 @@ static int __init jazz_sonic_probe(struct device *device)
  {
         struct net_device *dev;
         struct sonic_local *lp;
-       unsigned long base_addr;
         int err = 0;
         int i;
  
@@ -255,21 +207,26 @@ static int __init jazz_sonic_probe(struct device *device)
         if (mips_machgroup != MACH_GROUP_JAZZ)
                 return -ENODEV;
  
-       dev = alloc_etherdev(0);
+       dev = alloc_etherdev(sizeof(struct sonic_local));
         if (!dev)
                 return -ENOMEM;
  
+       lp = netdev_priv(dev);
+       lp->device = device;
+       SET_NETDEV_DEV(dev, device);
+       SET_MODULE_OWNER(dev);
+
         netdev_boot_setup_check(dev);
-       base_addr = dev->base_addr;
  
-       if (base_addr >= KSEG0) { /* Check a single specified location. */
-               err = sonic_probe1(dev, base_addr, dev->irq);
-       } else if (base_addr != 0) { /* Don't probe at all. */
+       if (dev->base_addr >= KSEG0) { /* Check a single specified location. */
+               err = sonic_probe1(dev);
+       } else if (dev->base_addr != 0) { /* Don't probe at all. */
                 err = -ENXIO;
         } else {
                 for (i = 0; sonic_portlist[i].port; i++) {
-                       int io = sonic_portlist[i].port;
-                       if (sonic_probe1(dev, io, sonic_portlist[i].irq) == 0)
+                       dev->base_addr = sonic_portlist[i].port;
+                       dev->irq = sonic_portlist[i].irq;
+                       if (sonic_probe1(dev) == 0)
                                 break;
                 }
                 if (!sonic_portlist[i].port)
@@ -281,14 +238,17 @@ static int __init jazz_sonic_probe(struct device *device)
         if (err)
                 goto out1;
  
+       printk("%s: MAC ", dev->name);
+       for (i = 0; i < 6; i++) {
+               printk("%2.2x", dev->dev_addr[i]);
+               if (i < 5)
+                       printk(":");
+       }
+       printk(" IRQ %d\n", dev->irq);
+
         return 0;
  
  out1:
-       lp = dev->priv;
-       vdma_free(lp->rba_laddr);
-       kfree(lp->rba);
-       vdma_free(lp->cda_laddr);
-       kfree(lp);
         release_region(dev->base_addr, SONIC_MEM_SIZE);
  out:
         free_netdev(dev);
@@ -296,21 +256,22 @@ out:
         return err;
  }
  
-/*
- *      SONIC uses a normal IRQ
- */
-#define sonic_request_irq       request_irq
-#define sonic_free_irq          free_irq
+MODULE_DESCRIPTION("Jazz SONIC ethernet driver");
+module_param(sonic_debug, int, 0);
+MODULE_PARM_DESC(sonic_debug, "jazzsonic debug level (1-4)");
  
-#define sonic_chiptomem(x)      KSEG1ADDR(vdma_log2phys(x))
+#define SONIC_IRQ_FLAG SA_INTERRUPT
  
  #include "sonic.c"
  
  static int __devexit jazz_sonic_device_remove (struct device *device)
  {
         struct net_device *dev = device->driver_data;
+       struct sonic_local* lp = netdev_priv(dev);
  
         unregister_netdev (dev);
+       dma_free_coherent(lp->device, SIZEOF_SONIC_DESC * SONIC_BUS_SCALE(lp->dma_bitmode),
+                         lp->descriptors, lp->descriptors_laddr);
         release_region (dev->base_addr, SONIC_MEM_SIZE);
         free_netdev (dev);
  
@@ -323,7 +284,7 @@ static struct device_driver jazz_sonic_driver = {
         .probe  = jazz_sonic_probe,
         .remove = __devexit_p(jazz_sonic_device_remove),
  };
-                                                                                
+
  static void jazz_sonic_platform_release (struct device *device)
  {
         struct platform_device *pldev;
@@ -336,10 +297,11 @@ static void jazz_sonic_platform_release (struct device *device)
  static int __init jazz_sonic_init_module(void)
  {
         struct platform_device *pldev;
+       int err;
  
-       if (driver_register(&jazz_sonic_driver)) {
+       if ((err = driver_register(&jazz_sonic_driver))) {
                 printk(KERN_ERR "Driver registration failed\n");
-               return -ENOMEM;
+               return err;
         }
  
         jazz_sonic_device = NULL;
diff --git a/drivers/net/loopback.c b/drivers/net/loopback.c

index b33111e2131310e1d6b891772bcca697b2be2435..690a1aae0b34705c6d04cb08bc31f1c3dbc72c0d 100644 (file)
--- a/drivers/net/loopback.c
+++ b/drivers/net/loopback.c
@@ -68,6 +68,7 @@ static DEFINE_PER_CPU(struct net_device_stats, loopback_stats);
   * of largesending device modulo TCP checksum, which is ignored for loopback.
   */
  
+#ifdef LOOPBACK_TSO
  static void emulate_large_send_offload(struct sk_buff *skb)
  {
         struct iphdr *iph = skb->nh.iph;
@@ -119,6 +120,7 @@ static void emulate_large_send_offload(struct sk_buff *skb)
  
         dev_kfree_skb(skb);
  }
+#endif /* LOOPBACK_TSO */
  
  /*
   * The higher levels take care of making this non-reentrant (it's
@@ -130,12 +132,13 @@ static int loopback_xmit(struct sk_buff *skb, struct net_device *dev)
  
         skb_orphan(skb);
  
-       skb->protocol=eth_type_trans(skb,dev);
-       skb->dev=dev;
+       skb->protocol = eth_type_trans(skb,dev);
+       skb->dev = dev;
  #ifndef LOOPBACK_MUST_CHECKSUM
         skb->ip_summed = CHECKSUM_UNNECESSARY;
  #endif
  
+#ifdef LOOPBACK_TSO
         if (skb_shinfo(skb)->tso_size) {
                 BUG_ON(skb->protocol != htons(ETH_P_IP));
                 BUG_ON(skb->nh.iph->protocol != IPPROTO_TCP);
@@ -143,14 +146,14 @@ static int loopback_xmit(struct sk_buff *skb, struct net_device *dev)
                 emulate_large_send_offload(skb);
                 return 0;
         }
-
+#endif
         dev->last_rx = jiffies;
  
         lb_stats = &per_cpu(loopback_stats, get_cpu());
         lb_stats->rx_bytes += skb->len;
-       lb_stats->tx_bytes += skb->len;
+       lb_stats->tx_bytes = lb_stats->rx_bytes;
         lb_stats->rx_packets++;
-       lb_stats->tx_packets++;
+       lb_stats->tx_packets = lb_stats->rx_packets;
         put_cpu();
  
         netif_rx(skb);
@@ -208,13 +211,16 @@ struct net_device loopback_dev = {
         .type                   = ARPHRD_LOOPBACK,      /* 0x0001*/
         .rebuild_header         = eth_rebuild_header,
         .flags                  = IFF_LOOPBACK,
-       .features               = NETIF_F_SG|NETIF_F_FRAGLIST
-                                 |NETIF_F_NO_CSUM|NETIF_F_HIGHDMA
-                                 |NETIF_F_LLTX,
+       .features               = NETIF_F_SG | NETIF_F_FRAGLIST
+#ifdef LOOPBACK_TSO
+                                 | NETIF_F_TSO
+#endif
+                                 | NETIF_F_NO_CSUM | NETIF_F_HIGHDMA
+                                 | NETIF_F_LLTX,
         .ethtool_ops            = &loopback_ethtool_ops,
  };
  
-/* Setup and register the of the LOOPBACK device. */
+/* Setup and register the loopback device. */
  int __init loopback_init(void)
  {
         struct net_device_stats *stats;
diff --git a/drivers/net/macsonic.c b/drivers/net/macsonic.c

index be28c65de729e5b4682b80788a70d48ebf3d9c7d..405e18365edef4c353b681937ebf63eb3971def7 100644 (file)
--- a/drivers/net/macsonic.c
+++ b/drivers/net/macsonic.c
@@ -1,6 +1,12 @@
  /*
   * macsonic.c
   *
+ * (C) 2005 Finn Thain
+ *
+ * Converted to DMA API, converted to unified driver model, made it work as
+ * a module again, and from the mac68k project, introduced more 32-bit cards
+ * and dhd's support for 16-bit cards.
+ *
   * (C) 1998 Alan Cox
   *
   * Debugging Andreas Ehliar, Michael Schmitz
@@ -26,8 +32,8 @@
   */
  
  #include <linux/kernel.h>
+#include <linux/module.h>
  #include <linux/types.h>
-#include <linux/ctype.h>
  #include <linux/fcntl.h>
  #include <linux/interrupt.h>
  #include <linux/init.h>
@@ -41,8 +47,8 @@
  #include <linux/netdevice.h>
  #include <linux/etherdevice.h>
  #include <linux/skbuff.h>
-#include <linux/module.h>
-#include <linux/bitops.h>
+#include <linux/device.h>
+#include <linux/dma-mapping.h>
  
  #include <asm/bootinfo.h>
  #include <asm/system.h>
@@ -54,25 +60,28 @@
  #include <asm/macints.h>
  #include <asm/mac_via.h>
  
-#define SREGS_PAD(n)    u16 n;
+static char mac_sonic_string[] = "macsonic";
+static struct platform_device *mac_sonic_device;
  
  #include "sonic.h"
  
-#define SONIC_READ(reg) \
-       nubus_readl(base_addr+(reg))
-#define SONIC_WRITE(reg,val) \
-       nubus_writel((val), base_addr+(reg))
-#define sonic_read(dev, reg) \
-       nubus_readl((dev)->base_addr+(reg))
-#define sonic_write(dev, reg, val) \
-       nubus_writel((val), (dev)->base_addr+(reg))
-
+/* These should basically be bus-size and endian independent (since
+   the SONIC is at least smart enough that it uses the same endianness
+   as the host, unlike certain less enlightened Macintosh NICs) */
+#define SONIC_READ(reg) (nubus_readw(dev->base_addr + (reg * 4) \
+             + lp->reg_offset))
+#define SONIC_WRITE(reg,val) (nubus_writew(val, dev->base_addr + (reg * 4) \
+             + lp->reg_offset))
+
+/* use 0 for production, 1 for verification, >1 for debug */
+#ifdef SONIC_DEBUG
+static unsigned int sonic_debug = SONIC_DEBUG;
+#else 
+static unsigned int sonic_debug = 1;
+#endif
  
-static int sonic_debug;
  static int sonic_version_printed;
  
-static int reg_offset;
-
  extern int mac_onboard_sonic_probe(struct net_device* dev);
  extern int mac_nubus_sonic_probe(struct net_device* dev);
  
@@ -108,40 +117,6 @@ enum macsonic_type {
  
  #define SONIC_READ_PROM(addr) nubus_readb(prom_addr+addr)
  
-struct net_device * __init macsonic_probe(int unit)
-{
-       struct net_device *dev = alloc_etherdev(0);
-       int err;
-
-       if (!dev)
-               return ERR_PTR(-ENOMEM);
-
-       if (unit >= 0)
-               sprintf(dev->name, "eth%d", unit);
-
-       SET_MODULE_OWNER(dev);
-
-       /* This will catch fatal stuff like -ENOMEM as well as success */
-       err = mac_onboard_sonic_probe(dev);
-       if (err == 0)
-               goto found;
-       if (err != -ENODEV)
-               goto out;
-       err = mac_nubus_sonic_probe(dev);
-       if (err)
-               goto out;
-found:
-       err = register_netdev(dev);
-       if (err)
-               goto out1;
-       return dev;
-out1:
-       kfree(dev->priv);
-out:
-       free_netdev(dev);
-       return ERR_PTR(err);
-}
-
  /*
   * For reversing the PROM address
   */
@@ -160,103 +135,55 @@ static inline void bit_reverse_addr(unsigned char addr[6])
  
  int __init macsonic_init(struct net_device* dev)
  {
-       struct sonic_local* lp = NULL;
-       int i;
+       struct sonic_local* lp = netdev_priv(dev);
  
         /* Allocate the entire chunk of memory for the descriptors.
             Note that this cannot cross a 64K boundary. */
-       for (i = 0; i < 20; i++) {
-               unsigned long desc_base, desc_top;
-               if((lp = kmalloc(sizeof(struct sonic_local), GFP_KERNEL | GFP_DMA)) == NULL) {
-                       printk(KERN_ERR "%s: couldn't allocate descriptor buffers\n", dev->name);
-                       return -ENOMEM;
-               }
-
-               desc_base = (unsigned long) lp;
-               desc_top = desc_base + sizeof(struct sonic_local);
-               if ((desc_top & 0xffff) >= (desc_base & 0xffff))
-                       break;
-               /* Hmm. try again (FIXME: does this actually work?) */
-               kfree(lp);
-               printk(KERN_DEBUG
-                      "%s: didn't get continguous chunk [%08lx - %08lx], trying again\n",
-                      dev->name, desc_base, desc_top);
-       }
-
-       if (lp == NULL) {
-               printk(KERN_ERR "%s: tried 20 times to allocate descriptor buffers, giving up.\n",
-                      dev->name);
+       if ((lp->descriptors = dma_alloc_coherent(lp->device,
+                   SIZEOF_SONIC_DESC * SONIC_BUS_SCALE(lp->dma_bitmode),
+                   &lp->descriptors_laddr, GFP_KERNEL)) == NULL) {
+               printk(KERN_ERR "%s: couldn't alloc DMA memory for descriptors.\n", lp->device->bus_id);
                 return -ENOMEM;
-       }                      
-
-       dev->priv = lp;
-
-#if 0
-       /* this code is only here as a curiousity...   mainly, where the 
-          fuck did SONIC_BUS_SCALE come from, and what was it supposed
-          to do?  the normal allocation works great for 32 bit stuffs..  */
+       }
  
         /* Now set up the pointers to point to the appropriate places */
-       lp->cda = lp->sonic_desc;
-       lp->tda = lp->cda + (SIZEOF_SONIC_CDA * SONIC_BUS_SCALE(lp->dma_bitmode));
+       lp->cda = lp->descriptors;
+       lp->tda = lp->cda + (SIZEOF_SONIC_CDA
+                            * SONIC_BUS_SCALE(lp->dma_bitmode));
         lp->rda = lp->tda + (SIZEOF_SONIC_TD * SONIC_NUM_TDS
-                            * SONIC_BUS_SCALE(lp->dma_bitmode));
+                            * SONIC_BUS_SCALE(lp->dma_bitmode));
         lp->rra = lp->rda + (SIZEOF_SONIC_RD * SONIC_NUM_RDS
-                            * SONIC_BUS_SCALE(lp->dma_bitmode));
+                            * SONIC_BUS_SCALE(lp->dma_bitmode));
  
-#endif
-       
-       memset(lp, 0, sizeof(struct sonic_local));
-
-       lp->cda_laddr = (unsigned int)&(lp->cda);
-       lp->tda_laddr = (unsigned int)lp->tda;
-       lp->rra_laddr = (unsigned int)lp->rra;
-       lp->rda_laddr = (unsigned int)lp->rda;
-
-       /* FIXME, maybe we should use skbs */
-       if ((lp->rba = (char *)
-            kmalloc(SONIC_NUM_RRS * SONIC_RBSIZE, GFP_KERNEL | GFP_DMA)) == NULL) {
-               printk(KERN_ERR "%s: couldn't allocate receive buffers\n", dev->name);
-               dev->priv = NULL;
-               kfree(lp);
-               return -ENOMEM;
-       }
-
-       lp->rba_laddr = (unsigned int)lp->rba;
-
-       {
-               int rs, ds;
-
-               /* almost always 12*4096, but let's not take chances */
-               rs = ((SONIC_NUM_RRS * SONIC_RBSIZE + 4095) / 4096) * 4096;
-               /* almost always under a page, but let's not take chances */
-               ds = ((sizeof(struct sonic_local) + 4095) / 4096) * 4096;
-               kernel_set_cachemode(lp->rba, rs, IOMAP_NOCACHE_SER);
-               kernel_set_cachemode(lp, ds, IOMAP_NOCACHE_SER);
-       }
-       
-#if 0
-       flush_cache_all();
-#endif
+       lp->cda_laddr = lp->descriptors_laddr;
+       lp->tda_laddr = lp->cda_laddr + (SIZEOF_SONIC_CDA
+                            * SONIC_BUS_SCALE(lp->dma_bitmode));
+       lp->rda_laddr = lp->tda_laddr + (SIZEOF_SONIC_TD * SONIC_NUM_TDS
+                            * SONIC_BUS_SCALE(lp->dma_bitmode));
+       lp->rra_laddr = lp->rda_laddr + (SIZEOF_SONIC_RD * SONIC_NUM_RDS
+                            * SONIC_BUS_SCALE(lp->dma_bitmode));
  
         dev->open = sonic_open;
         dev->stop = sonic_close;
         dev->hard_start_xmit = sonic_send_packet;
         dev->get_stats = sonic_get_stats;
         dev->set_multicast_list = &sonic_multicast_list;
+       dev->tx_timeout = sonic_tx_timeout;
+       dev->watchdog_timeo = TX_TIMEOUT;
  
         /*
          * clear tally counter
          */
-       sonic_write(dev, SONIC_CRCT, 0xffff);
-       sonic_write(dev, SONIC_FAET, 0xffff);
-       sonic_write(dev, SONIC_MPT, 0xffff);
+       SONIC_WRITE(SONIC_CRCT, 0xffff);
+       SONIC_WRITE(SONIC_FAET, 0xffff);
+       SONIC_WRITE(SONIC_MPT, 0xffff);
  
         return 0;
  }
  
  int __init mac_onboard_sonic_ethernet_addr(struct net_device* dev)
  {
+       struct sonic_local *lp = netdev_priv(dev);
         const int prom_addr = ONBOARD_SONIC_PROM_BASE;
         int i;
  
@@ -270,6 +197,7 @@ int __init mac_onboard_sonic_ethernet_addr(struct net_device* dev)
            why this is so. */
         if (memcmp(dev->dev_addr, "\x08\x00\x07", 3) &&
             memcmp(dev->dev_addr, "\x00\xA0\x40", 3) &&
+           memcmp(dev->dev_addr, "\x00\x80\x19", 3) &&
             memcmp(dev->dev_addr, "\x00\x05\x02", 3))
                 bit_reverse_addr(dev->dev_addr);
         else
@@ -281,22 +209,23 @@ int __init mac_onboard_sonic_ethernet_addr(struct net_device* dev)
             the card... */
         if (memcmp(dev->dev_addr, "\x08\x00\x07", 3) &&
             memcmp(dev->dev_addr, "\x00\xA0\x40", 3) &&
+           memcmp(dev->dev_addr, "\x00\x80\x19", 3) &&
             memcmp(dev->dev_addr, "\x00\x05\x02", 3))
         {
                 unsigned short val;
  
                 printk(KERN_INFO "macsonic: PROM seems to be wrong, trying CAM entry 15\n");
                 
-               sonic_write(dev, SONIC_CMD, SONIC_CR_RST);
-               sonic_write(dev, SONIC_CEP, 15);
+               SONIC_WRITE(SONIC_CMD, SONIC_CR_RST);
+               SONIC_WRITE(SONIC_CEP, 15);
  
-               val = sonic_read(dev, SONIC_CAP2);
+               val = SONIC_READ(SONIC_CAP2);
                 dev->dev_addr[5] = val >> 8;
                 dev->dev_addr[4] = val & 0xff;
-               val = sonic_read(dev, SONIC_CAP1);
+               val = SONIC_READ(SONIC_CAP1);
                 dev->dev_addr[3] = val >> 8;
                 dev->dev_addr[2] = val & 0xff;
-               val = sonic_read(dev, SONIC_CAP0);
+               val = SONIC_READ(SONIC_CAP0);
                 dev->dev_addr[1] = val >> 8;
                 dev->dev_addr[0] = val & 0xff;
                 
@@ -311,6 +240,7 @@ int __init mac_onboard_sonic_ethernet_addr(struct net_device* dev)
  
         if (memcmp(dev->dev_addr, "\x08\x00\x07", 3) &&
             memcmp(dev->dev_addr, "\x00\xA0\x40", 3) &&
+           memcmp(dev->dev_addr, "\x00\x80\x19", 3) &&
             memcmp(dev->dev_addr, "\x00\x05\x02", 3))
         {
                 /*
@@ -325,8 +255,9 @@ int __init mac_onboard_sonic_probe(struct net_device* dev)
  {
         /* Bwahahaha */
         static int once_is_more_than_enough;
-       int i;
-       int dma_bitmode;
+       struct sonic_local* lp = netdev_priv(dev);
+       int sr;
+       int commslot = 0;
         
         if (once_is_more_than_enough)
                 return -ENODEV;
@@ -335,20 +266,18 @@ int __init mac_onboard_sonic_probe(struct net_device* dev)
         if (!MACH_IS_MAC)
                 return -ENODEV;
  
-       printk(KERN_INFO "Checking for internal Macintosh ethernet (SONIC).. ");
-
         if (macintosh_config->ether_type != MAC_ETHER_SONIC)
-       {
-               printk("none.\n");
                 return -ENODEV;
-       }
-
+       
+       printk(KERN_INFO "Checking for internal Macintosh ethernet (SONIC).. ");
+       
         /* Bogus probing, on the models which may or may not have
            Ethernet (BTW, the Ethernet *is* always at the same
            address, and nothing else lives there, at least if Apple's
            documentation is to be believed) */
         if (macintosh_config->ident == MAC_MODEL_Q630 ||
             macintosh_config->ident == MAC_MODEL_P588 ||
+           macintosh_config->ident == MAC_MODEL_P575 ||
             macintosh_config->ident == MAC_MODEL_C610) {
                 unsigned long flags;
                 int card_present;
@@ -361,13 +290,13 @@ int __init mac_onboard_sonic_probe(struct net_device* dev)
                         printk("none.\n");
                         return -ENODEV;
                 }
+               commslot = 1;
         }
  
         printk("yes\n");        
  
-       /* Danger!  My arms are flailing wildly!  You *must* set this
-           before using sonic_read() */
-
+       /* Danger!  My arms are flailing wildly!  You *must* set lp->reg_offset
+        * and dev->base_addr before using SONIC_READ() or SONIC_WRITE() */
         dev->base_addr = ONBOARD_SONIC_REGISTERS;
         if (via_alt_mapping)
                 dev->irq = IRQ_AUTO_3;
@@ -379,84 +308,66 @@ int __init mac_onboard_sonic_probe(struct net_device* dev)
                 sonic_version_printed = 1;
         }
         printk(KERN_INFO "%s: onboard / comm-slot SONIC at 0x%08lx\n",
-              dev->name, dev->base_addr);
-
-       /* Now do a song and dance routine in an attempt to determine
-           the bus width */
+              lp->device->bus_id, dev->base_addr);
  
         /* The PowerBook's SONIC is 16 bit always. */
         if (macintosh_config->ident == MAC_MODEL_PB520) {
-               reg_offset = 0;
-               dma_bitmode = 0;
-       } else if (macintosh_config->ident == MAC_MODEL_C610) {
-               reg_offset = 0;
-               dma_bitmode = 1;
-       } else {
+               lp->reg_offset = 0;
+               lp->dma_bitmode = SONIC_BITMODE16;
+               sr = SONIC_READ(SONIC_SR);
+       } else if (commslot) {
                 /* Some of the comm-slot cards are 16 bit.  But some
-                   of them are not.  The 32-bit cards use offset 2 and
-                   pad with zeroes or sometimes ones (I think...)
-                   Therefore, if we try offset 0 and get a silicon
-                   revision of 0, we assume 16 bit. */
-               int sr;
-
-               /* Technically this is not necessary since we zeroed
-                   it above */
-               reg_offset = 0;
-               dma_bitmode = 0;
-               sr = sonic_read(dev, SONIC_SR);
-               if (sr == 0 || sr == 0xffff) {
-                       reg_offset = 2;
-                       /* 83932 is 0x0004, 83934 is 0x0100 or 0x0101 */
-                       sr = sonic_read(dev, SONIC_SR);
-                       dma_bitmode = 1;
-                       
+                  of them are not.  The 32-bit cards use offset 2 and
+                  have known revisions, we try reading the revision
+                  register at offset 2, if we don't get a known revision
+                  we assume 16 bit at offset 0.  */
+               lp->reg_offset = 2;
+               lp->dma_bitmode = SONIC_BITMODE16;
+
+               sr = SONIC_READ(SONIC_SR);
+               if (sr == 0x0004 || sr == 0x0006 || sr == 0x0100 || sr == 0x0101) 
+                       /* 83932 is 0x0004 or 0x0006, 83934 is 0x0100 or 0x0101 */
+                       lp->dma_bitmode = SONIC_BITMODE32;
+               else {
+                       lp->dma_bitmode = SONIC_BITMODE16;
+                       lp->reg_offset = 0;
+                       sr = SONIC_READ(SONIC_SR);
                 }
-               printk(KERN_INFO
-                      "%s: revision 0x%04x, using %d bit DMA and register offset %d\n",
-                      dev->name, sr, dma_bitmode?32:16, reg_offset);
+       } else {
+               /* All onboard cards are at offset 2 with 32 bit DMA. */
+               lp->reg_offset = 2;
+               lp->dma_bitmode = SONIC_BITMODE32;
+               sr = SONIC_READ(SONIC_SR);
         }
-       
+       printk(KERN_INFO
+              "%s: revision 0x%04x, using %d bit DMA and register offset %d\n",
+              lp->device->bus_id, sr, lp->dma_bitmode?32:16, lp->reg_offset);
  
-       /* this carries my sincere apologies -- by the time I got to updating
-          the driver, support for "reg_offsets" appeares nowhere in the sonic
-          code, going back for over a year.  Fortunately, my Mac does't seem
-          to use whatever this was.
+#if 0 /* This is sometimes useful to find out how MacOS configured the card. */
+       printk(KERN_INFO "%s: DCR: 0x%04x, DCR2: 0x%04x\n", lp->device->bus_id,
+              SONIC_READ(SONIC_DCR) & 0xffff, SONIC_READ(SONIC_DCR2) & 0xffff);
+#endif
  
-          If you know how this is supposed to be implemented, either fix it,
-          or contact me (sammy@oh.verio.com) to explain what it is. --Sam */
-          
-       if(reg_offset) {
-               printk("%s: register offset unsupported.  please fix this if you know what it is.\n", dev->name);
-               return -ENODEV;
-       }
-       
         /* Software reset, then initialize control registers. */
-       sonic_write(dev, SONIC_CMD, SONIC_CR_RST);
-       sonic_write(dev, SONIC_DCR, SONIC_DCR_BMS |
-                   SONIC_DCR_RFT1 | SONIC_DCR_TFT0 | SONIC_DCR_EXBUS |
-                   (dma_bitmode ? SONIC_DCR_DW : 0));
+       SONIC_WRITE(SONIC_CMD, SONIC_CR_RST);
+
+       SONIC_WRITE(SONIC_DCR, SONIC_DCR_EXBUS | SONIC_DCR_BMS |
+                              SONIC_DCR_RFT1  | SONIC_DCR_TFT0 |
+                              (lp->dma_bitmode ? SONIC_DCR_DW : 0));
  
         /* This *must* be written back to in order to restore the
-           extended programmable output bits */
-       sonic_write(dev, SONIC_DCR2, 0);
+        * extended programmable output bits, as it may not have been
+        * initialised since the hardware reset. */
+       SONIC_WRITE(SONIC_DCR2, 0);
  
         /* Clear *and* disable interrupts to be on the safe side */
-       sonic_write(dev, SONIC_ISR,0x7fff);
-       sonic_write(dev, SONIC_IMR,0);
+       SONIC_WRITE(SONIC_IMR, 0);
+       SONIC_WRITE(SONIC_ISR, 0x7fff);
  
         /* Now look for the MAC address. */
         if (mac_onboard_sonic_ethernet_addr(dev) != 0)
                 return -ENODEV;
  
-       printk(KERN_INFO "MAC ");
-       for (i = 0; i < 6; i++) {
-               printk("%2.2x", dev->dev_addr[i]);
-               if (i < 5)
-                       printk(":");
-       }
-
-       printk(" IRQ %d\n", dev->irq);
-
         /* Shared init code */
         return macsonic_init(dev);
  }
@@ -468,8 +379,10 @@ int __init mac_nubus_sonic_ethernet_addr(struct net_device* dev,
         int i;
         for(i = 0; i < 6; i++)
                 dev->dev_addr[i] = SONIC_READ_PROM(i);
-       /* For now we are going to assume that they're all bit-reversed */
-       bit_reverse_addr(dev->dev_addr);
+
+       /* Some of the addresses are bit-reversed */
+       if (id != MACSONIC_DAYNA)
+               bit_reverse_addr(dev->dev_addr);
  
         return 0;
  }
@@ -487,6 +400,15 @@ int __init macsonic_ident(struct nubus_dev* ndev)
                 else
                         return MACSONIC_APPLE;
         }
+       
+       if (ndev->dr_hw == NUBUS_DRHW_SMC9194 &&
+           ndev->dr_sw == NUBUS_DRSW_DAYNA)
+               return MACSONIC_DAYNA;
+       
+       if (ndev->dr_hw == NUBUS_DRHW_SONIC_LC &&
+           ndev->dr_sw == 0) { /* huh? */
+               return MACSONIC_APPLE16;
+       }
         return -1;
  }
  
@@ -494,12 +416,12 @@ int __init mac_nubus_sonic_probe(struct net_device* dev)
  {
         static int slots;
         struct nubus_dev* ndev = NULL;
+       struct sonic_local* lp = netdev_priv(dev);
         unsigned long base_addr, prom_addr;
         u16 sonic_dcr;
-       int id;
-       int i;
-       int dma_bitmode;
-
+       int id = -1;
+       int reg_offset, dma_bitmode;
+       
         /* Find the first SONIC that hasn't been initialized already */
         while ((ndev = nubus_find_type(NUBUS_CAT_NETWORK,
                                        NUBUS_TYPE_ETHERNET, ndev)) != NULL)
@@ -521,51 +443,52 @@ int __init mac_nubus_sonic_probe(struct net_device* dev)
         case MACSONIC_DUODOCK:
                 base_addr = ndev->board->slot_addr + DUODOCK_SONIC_REGISTERS;
                 prom_addr = ndev->board->slot_addr + DUODOCK_SONIC_PROM_BASE;
-               sonic_dcr = SONIC_DCR_EXBUS | SONIC_DCR_RFT0 | SONIC_DCR_RFT1
-                       | SONIC_DCR_TFT0;
+               sonic_dcr = SONIC_DCR_EXBUS | SONIC_DCR_RFT0 | SONIC_DCR_RFT1 |
+                           SONIC_DCR_TFT0;
                 reg_offset = 2;
-               dma_bitmode = 1;
+               dma_bitmode = SONIC_BITMODE32;
                 break;
         case MACSONIC_APPLE:
                 base_addr = ndev->board->slot_addr + APPLE_SONIC_REGISTERS;
                 prom_addr = ndev->board->slot_addr + APPLE_SONIC_PROM_BASE;
                 sonic_dcr = SONIC_DCR_BMS | SONIC_DCR_RFT1 | SONIC_DCR_TFT0;
                 reg_offset = 0;
-               dma_bitmode = 1;
+               dma_bitmode = SONIC_BITMODE32;
                 break;
         case MACSONIC_APPLE16:
                 base_addr = ndev->board->slot_addr + APPLE_SONIC_REGISTERS;
                 prom_addr = ndev->board->slot_addr + APPLE_SONIC_PROM_BASE;
-               sonic_dcr = SONIC_DCR_EXBUS
-                       | SONIC_DCR_RFT1 | SONIC_DCR_TFT0
-                       | SONIC_DCR_PO1 | SONIC_DCR_BMS; 
+               sonic_dcr = SONIC_DCR_EXBUS | SONIC_DCR_RFT1 | SONIC_DCR_TFT0 |
+                           SONIC_DCR_PO1 | SONIC_DCR_BMS; 
                 reg_offset = 0;
-               dma_bitmode = 0;
+               dma_bitmode = SONIC_BITMODE16;
                 break;
         case MACSONIC_DAYNALINK:
                 base_addr = ndev->board->slot_addr + APPLE_SONIC_REGISTERS;
                 prom_addr = ndev->board->slot_addr + DAYNALINK_PROM_BASE;
-               sonic_dcr = SONIC_DCR_RFT1 | SONIC_DCR_TFT0
-                       | SONIC_DCR_PO1 | SONIC_DCR_BMS; 
+               sonic_dcr = SONIC_DCR_RFT1 | SONIC_DCR_TFT0 |
+                           SONIC_DCR_PO1 | SONIC_DCR_BMS; 
                 reg_offset = 0;
-               dma_bitmode = 0;
+               dma_bitmode = SONIC_BITMODE16;
                 break;
         case MACSONIC_DAYNA:
                 base_addr = ndev->board->slot_addr + DAYNA_SONIC_REGISTERS;
                 prom_addr = ndev->board->slot_addr + DAYNA_SONIC_MAC_ADDR;
-               sonic_dcr = SONIC_DCR_BMS
-                       | SONIC_DCR_RFT1 | SONIC_DCR_TFT0 | SONIC_DCR_PO1;
+               sonic_dcr = SONIC_DCR_BMS |
+                           SONIC_DCR_RFT1 | SONIC_DCR_TFT0 | SONIC_DCR_PO1;
                 reg_offset = 0;
-               dma_bitmode = 0;
+               dma_bitmode = SONIC_BITMODE16;
                 break;
         default:
                 printk(KERN_ERR "macsonic: WTF, id is %d\n", id);
                 return -ENODEV;
         }
  
-       /* Danger!  My arms are flailing wildly!  You *must* set this
-           before using sonic_read() */
+       /* Danger!  My arms are flailing wildly!  You *must* set lp->reg_offset
+        * and dev->base_addr before using SONIC_READ() or SONIC_WRITE() */
         dev->base_addr = base_addr;
+       lp->reg_offset = reg_offset;
+       lp->dma_bitmode = dma_bitmode;
         dev->irq = SLOT2IRQ(ndev->board->slot);
  
         if (!sonic_version_printed) {
@@ -573,29 +496,66 @@ int __init mac_nubus_sonic_probe(struct net_device* dev)
                 sonic_version_printed = 1;
         }
         printk(KERN_INFO "%s: %s in slot %X\n",
-              dev->name, ndev->board->name, ndev->board->slot);
+              lp->device->bus_id, ndev->board->name, ndev->board->slot);
         printk(KERN_INFO "%s: revision 0x%04x, using %d bit DMA and register offset %d\n",
-              dev->name, sonic_read(dev, SONIC_SR), dma_bitmode?32:16, reg_offset);
+              lp->device->bus_id, SONIC_READ(SONIC_SR), dma_bitmode?32:16, reg_offset);
  
-       if(reg_offset) {
-               printk("%s: register offset unsupported.  please fix this if you know what it is.\n", dev->name);
-               return -ENODEV;
-       }
+#if 0 /* This is sometimes useful to find out how MacOS configured the card. */
+       printk(KERN_INFO "%s: DCR: 0x%04x, DCR2: 0x%04x\n", lp->device->bus_id,
+              SONIC_READ(SONIC_DCR) & 0xffff, SONIC_READ(SONIC_DCR2) & 0xffff);
+#endif
  
         /* Software reset, then initialize control registers. */
-       sonic_write(dev, SONIC_CMD, SONIC_CR_RST);
-       sonic_write(dev, SONIC_DCR, sonic_dcr
-                   | (dma_bitmode ? SONIC_DCR_DW : 0));
+       SONIC_WRITE(SONIC_CMD, SONIC_CR_RST);
+       SONIC_WRITE(SONIC_DCR, sonic_dcr | (dma_bitmode ? SONIC_DCR_DW : 0));
+       /* This *must* be written back to in order to restore the
+        * extended programmable output bits, since it may not have been
+        * initialised since the hardware reset. */
+       SONIC_WRITE(SONIC_DCR2, 0);
  
         /* Clear *and* disable interrupts to be on the safe side */
-       sonic_write(dev, SONIC_ISR,0x7fff);
-       sonic_write(dev, SONIC_IMR,0);
+       SONIC_WRITE(SONIC_IMR, 0);
+       SONIC_WRITE(SONIC_ISR, 0x7fff);
  
         /* Now look for the MAC address. */
         if (mac_nubus_sonic_ethernet_addr(dev, prom_addr, id) != 0)
                 return -ENODEV;
  
-       printk(KERN_INFO "MAC ");
+       /* Shared init code */
+       return macsonic_init(dev);
+}
+
+static int __init mac_sonic_probe(struct device *device)
+{
+       struct net_device *dev;
+       struct sonic_local *lp;
+       int err;
+       int i;
+
+       dev = alloc_etherdev(sizeof(struct sonic_local));
+       if (!dev)
+               return -ENOMEM;
+
+       lp = netdev_priv(dev);
+       lp->device = device;
+       SET_NETDEV_DEV(dev, device);
+       SET_MODULE_OWNER(dev);
+
+       /* This will catch fatal stuff like -ENOMEM as well as success */
+       err = mac_onboard_sonic_probe(dev);
+       if (err == 0)
+               goto found;
+       if (err != -ENODEV)
+               goto out;
+       err = mac_nubus_sonic_probe(dev);
+       if (err)
+               goto out;
+found:
+       err = register_netdev(dev);
+       if (err)
+               goto out;
+
+       printk("%s: MAC ", dev->name);
         for (i = 0; i < 6; i++) {
                 printk("%2.2x", dev->dev_addr[i]);
                 if (i < 5)
@@ -603,55 +563,95 @@ int __init mac_nubus_sonic_probe(struct net_device* dev)
         }
         printk(" IRQ %d\n", dev->irq);
  
-       /* Shared init code */
-       return macsonic_init(dev);
-}
+       return 0;
  
-#ifdef MODULE
-static struct net_device *dev_macsonic;
+out:
+       free_netdev(dev);
  
-MODULE_PARM(sonic_debug, "i");
+       return err;
+}
+
+MODULE_DESCRIPTION("Macintosh SONIC ethernet driver");
+module_param(sonic_debug, int, 0);
  MODULE_PARM_DESC(sonic_debug, "macsonic debug level (1-4)");
  
-int
-init_module(void)
+#define SONIC_IRQ_FLAG IRQ_FLG_FAST
+
+#include "sonic.c"
+
+static int __devexit mac_sonic_device_remove (struct device *device)
  {
-        dev_macsonic = macsonic_probe(-1);
-       if (IS_ERR(dev_macsonic)) {
-                printk(KERN_WARNING "macsonic.c: No card found\n");
-               return PTR_ERR(dev_macsonic);
-       }
+       struct net_device *dev = device->driver_data;
+       struct sonic_local* lp = netdev_priv(dev);
+
+       unregister_netdev (dev);
+       dma_free_coherent(lp->device, SIZEOF_SONIC_DESC * SONIC_BUS_SCALE(lp->dma_bitmode),
+                         lp->descriptors, lp->descriptors_laddr);
+       free_netdev (dev);
+
         return 0;
  }
  
-void
-cleanup_module(void)
+static struct device_driver mac_sonic_driver = {
+       .name   = mac_sonic_string,
+       .bus    = &platform_bus_type,
+       .probe  = mac_sonic_probe,
+       .remove = __devexit_p(mac_sonic_device_remove),
+};
+
+static void mac_sonic_platform_release(struct device *device)
  {
-       unregister_netdev(dev_macsonic);
-       kfree(dev_macsonic->priv);
-       free_netdev(dev_macsonic);
+       struct platform_device *pldev;
+
+       /* free device */
+       pldev = to_platform_device (device);
+       kfree (pldev);
  }
-#endif /* MODULE */
  
+static int __init mac_sonic_init_module(void)
+{
+       struct platform_device *pldev;
+       int err;
  
-#define vdma_alloc(foo, bar) ((u32)foo)
-#define vdma_free(baz)
-#define sonic_chiptomem(bat) (bat)
-#define PHYSADDR(quux) (quux)
-#define CPHYSADDR(quux) (quux)
+       if ((err = driver_register(&mac_sonic_driver))) {
+               printk(KERN_ERR "Driver registration failed\n");
+               return err;
+       }
  
-#define sonic_request_irq       request_irq
-#define sonic_free_irq          free_irq
+       mac_sonic_device = NULL;
  
-#include "sonic.c"
+       if (!(pldev = kmalloc (sizeof (*pldev), GFP_KERNEL))) {
+               goto out_unregister;
+       }
  
-/*
- * Local variables:
- *  compile-command: "m68k-linux-gcc -D__KERNEL__ -I../../include -Wall -Wstrict-prototypes -O2 -fomit-frame-pointer -pipe -fno-strength-reduce -ffixed-a2 -DMODULE -DMODVERSIONS -include ../../include/linux/modversions.h   -c -o macsonic.o macsonic.c"
- *  version-control: t
- *  kept-new-versions: 5
- *  c-indent-level: 8
- *  tab-width: 8
- * End:
- *
- */
+       memset(pldev, 0, sizeof (*pldev));
+       pldev->name             = mac_sonic_string;
+       pldev->id               = 0;
+       pldev->dev.release      = mac_sonic_platform_release;
+       mac_sonic_device        = pldev;
+
+       if (platform_device_register (pldev)) {
+               kfree(pldev);
+               mac_sonic_device = NULL;
+       }
+
+       return 0;
+
+out_unregister:
+       platform_device_unregister(pldev);
+
+       return -ENOMEM;
+}
+
+static void __exit mac_sonic_cleanup_module(void)
+{
+       driver_unregister(&mac_sonic_driver);
+
+       if (mac_sonic_device) {
+               platform_device_unregister(mac_sonic_device);
+               mac_sonic_device = NULL;
+       }
+}
+
+module_init(mac_sonic_init_module);
+module_exit(mac_sonic_cleanup_module);
diff --git a/drivers/net/mv643xx_eth.c b/drivers/net/mv643xx_eth.c

index 0405e1f0d3df183fe004173091aed6ca103751e4..fb6b232069d6c43066fbb6f9d7b7244b8770d496 100644 (file)
--- a/drivers/net/mv643xx_eth.c
+++ b/drivers/net/mv643xx_eth.c
@@ -1157,16 +1157,20 @@ static int mv643xx_eth_start_xmit(struct sk_buff *skb, struct net_device *dev)
         if (!skb_shinfo(skb)->nr_frags) {
  linear:
                 if (skb->ip_summed != CHECKSUM_HW) {
+                       /* Errata BTS #50, IHL must be 5 if no HW checksum */
                         pkt_info.cmd_sts = ETH_TX_ENABLE_INTERRUPT |
-                                       ETH_TX_FIRST_DESC | ETH_TX_LAST_DESC;
+                                          ETH_TX_FIRST_DESC |
+                                          ETH_TX_LAST_DESC |
+                                          5 << ETH_TX_IHL_SHIFT;
                         pkt_info.l4i_chk = 0;
                 } else {
-                       u32 ipheader = skb->nh.iph->ihl << 11;
  
                         pkt_info.cmd_sts = ETH_TX_ENABLE_INTERRUPT |
-                                       ETH_TX_FIRST_DESC | ETH_TX_LAST_DESC |
-                                       ETH_GEN_TCP_UDP_CHECKSUM |
-                                       ETH_GEN_IP_V_4_CHECKSUM | ipheader;
+                                          ETH_TX_FIRST_DESC |
+                                          ETH_TX_LAST_DESC |
+                                          ETH_GEN_TCP_UDP_CHECKSUM |
+                                          ETH_GEN_IP_V_4_CHECKSUM |
+                                          skb->nh.iph->ihl << ETH_TX_IHL_SHIFT;
                         /* CPU already calculated pseudo header checksum. */
                         if (skb->nh.iph->protocol == IPPROTO_UDP) {
                                 pkt_info.cmd_sts |= ETH_UDP_FRAME;
@@ -1193,7 +1197,6 @@ linear:
                 stats->tx_bytes += pkt_info.byte_cnt;
         } else {
                 unsigned int frag;
-               u32 ipheader;
  
                 /* Since hardware can't handle unaligned fragments smaller
                  * than 9 bytes, if we find any, we linearize the skb
@@ -1222,12 +1225,16 @@ linear:
                                                         DMA_TO_DEVICE);
                 pkt_info.l4i_chk = 0;
                 pkt_info.return_info = 0;
-               pkt_info.cmd_sts = ETH_TX_FIRST_DESC;
  
-               if (skb->ip_summed == CHECKSUM_HW) {
-                       ipheader = skb->nh.iph->ihl << 11;
-                       pkt_info.cmd_sts |= ETH_GEN_TCP_UDP_CHECKSUM |
-                                       ETH_GEN_IP_V_4_CHECKSUM | ipheader;
+               if (skb->ip_summed != CHECKSUM_HW)
+                       /* Errata BTS #50, IHL must be 5 if no HW checksum */
+                       pkt_info.cmd_sts = ETH_TX_FIRST_DESC |
+                                          5 << ETH_TX_IHL_SHIFT;
+               else {
+                       pkt_info.cmd_sts = ETH_TX_FIRST_DESC |
+                                          ETH_GEN_TCP_UDP_CHECKSUM |
+                                          ETH_GEN_IP_V_4_CHECKSUM |
+                                          skb->nh.iph->ihl << ETH_TX_IHL_SHIFT;
                         /* CPU already calculated pseudo header checksum. */
                         if (skb->nh.iph->protocol == IPPROTO_UDP) {
                                 pkt_info.cmd_sts |= ETH_UDP_FRAME;
diff --git a/drivers/net/mv643xx_eth.h b/drivers/net/mv643xx_eth.h

index 57c4f8fbfdb62648cee570b4dab6553c4f4c8da9..7678b59c29523456454b37fc4eede501a82f96e4 100644 (file)
--- a/drivers/net/mv643xx_eth.h
+++ b/drivers/net/mv643xx_eth.h
@@ -49,7 +49,7 @@
  /* Checksum offload for Tx works for most packets, but
   * fails if previous packet sent did not use hw csum
   */
-#undef MV643XX_CHECKSUM_OFFLOAD_TX
+#define        MV643XX_CHECKSUM_OFFLOAD_TX
  #define        MV643XX_NAPI
  #define        MV643XX_TX_FAST_REFILL
  #undef MV643XX_RX_QUEUE_FILL_ON_TASK   /* Does not work, yet */
@@ -217,6 +217,8 @@
  #define ETH_TX_ENABLE_INTERRUPT                        (BIT23)
  #define ETH_AUTO_MODE                          (BIT30)
  
+#define ETH_TX_IHL_SHIFT                       11
+
  /* typedefs */
  
  typedef enum _eth_func_ret_status {
diff --git a/drivers/net/pci-skeleton.c b/drivers/net/pci-skeleton.c

index 4a391ea0f58aaa4771975a5eff2e0ce338e9dd54..a1ac4bd1696eae7d909729aaca014edbf919cedf 100644 (file)
--- a/drivers/net/pci-skeleton.c
+++ b/drivers/net/pci-skeleton.c
@@ -486,9 +486,9 @@ struct netdrv_private {
  MODULE_AUTHOR ("Jeff Garzik <jgarzik@pobox.com>");
  MODULE_DESCRIPTION ("Skeleton for a PCI Fast Ethernet driver");
  MODULE_LICENSE("GPL");
-MODULE_PARM (multicast_filter_limit, "i");
-MODULE_PARM (max_interrupt_work, "i");
-MODULE_PARM (media, "1-" __MODULE_STRING(8) "i");
+module_param(multicast_filter_limit, int, 0);
+module_param(max_interrupt_work, int, 0);
+module_param_array(media, int, NULL, 0);
  MODULE_PARM_DESC (multicast_filter_limit, "pci-skeleton maximum number of filtered multicast addresses");
  MODULE_PARM_DESC (max_interrupt_work, "pci-skeleton maximum events handled per interrupt");
  MODULE_PARM_DESC (media, "pci-skeleton: Bits 0-3: media type, bit 17: full duplex");
diff --git a/drivers/net/pcmcia/fmvj18x_cs.c b/drivers/net/pcmcia/fmvj18x_cs.c

index 9d8197bb293ac74b39c315ca42302d2c9d051300..384a736a0d2f820f5c6a7fcce3306d8e6ff04264 100644 (file)
--- a/drivers/net/pcmcia/fmvj18x_cs.c
+++ b/drivers/net/pcmcia/fmvj18x_cs.c
@@ -134,7 +134,7 @@ typedef struct local_info_t {
      u_char mc_filter[8];
  } local_info_t;
  
-#define MC_FILTERBREAK 64
+#define MC_FILTERBREAK 8
  
  /*====================================================================*/
  /* 
@@ -1012,7 +1012,7 @@ static void fjn_reset(struct net_device *dev)
         outb(BANK_1U, ioaddr + CONFIG_1);
  
      /* set the multicast table to accept none. */
-    for (i = 0; i < 6; i++) 
+    for (i = 0; i < 8; i++) 
          outb(0x00, ioaddr + MAR_ADR + i);
  
      /* Switch to bank 2 (runtime mode) */
@@ -1269,6 +1269,16 @@ static void set_rx_mode(struct net_device *dev)
      u_long flags;
      int i;
      
+    int saved_config_0 = inb(ioaddr + CONFIG_0);
+     
+    local_irq_save(flags); 
+
+    /* Disable Tx and Rx */
+    if (sram_config == 0) 
+       outb(CONFIG0_RST, ioaddr + CONFIG_0);
+    else
+       outb(CONFIG0_RST_1, ioaddr + CONFIG_0);
+
      if (dev->flags & IFF_PROMISC) {
         /* Unconditionally log net taps. */
         printk("%s: Promiscuous mode enabled.\n", dev->name);
@@ -1290,20 +1300,23 @@ static void set_rx_mode(struct net_device *dev)
         for (i = 0, mclist = dev->mc_list; mclist && i < dev->mc_count;
              i++, mclist = mclist->next) {
             unsigned int bit =
-               ether_crc_le(ETH_ALEN, mclist->dmi_addr) & 0x3f;
-           mc_filter[bit >> 3] |= (1 << bit);
+               ether_crc_le(ETH_ALEN, mclist->dmi_addr) >> 26;
+           mc_filter[bit >> 3] |= (1 << (bit & 7));
         }
+       outb(2, ioaddr + RX_MODE);      /* Use normal mode. */
      }
  
-    local_irq_save(flags); 
      if (memcmp(mc_filter, lp->mc_filter, sizeof(mc_filter))) {
         int saved_bank = inb(ioaddr + CONFIG_1);
         /* Switch to bank 1 and set the multicast table. */
         outb(0xe4, ioaddr + CONFIG_1);
         for (i = 0; i < 8; i++)
-           outb(mc_filter[i], ioaddr + 8 + i);
+           outb(mc_filter[i], ioaddr + MAR_ADR + i);
         memcpy(lp->mc_filter, mc_filter, sizeof(mc_filter));
         outb(saved_bank, ioaddr + CONFIG_1);
      }
+
+    outb(saved_config_0, ioaddr + CONFIG_0);
+
      local_irq_restore(flags);
  }
diff --git a/drivers/net/phy/Kconfig b/drivers/net/phy/Kconfig

new file mode 100644 (file)

index 0000000..6a2fe35
--- /dev/null
+++ b/drivers/net/phy/Kconfig
@@ -0,0 +1,57 @@
+#
+# PHY Layer Configuration
+#
+
+menu "PHY device support"
+
+config PHYLIB
+       tristate "PHY Device support and infrastructure"
+       depends on NET_ETHERNET
+       help
+         Ethernet controllers are usually attached to PHY
+         devices.  This option provides infrastructure for
+         managing PHY devices.
+
+config PHYCONTROL
+       bool "  Support for automatically handling PHY state changes"
+       depends on PHYLIB
+       help
+         Adds code to perform all the work for keeping PHY link
+         state (speed/duplex/etc) up-to-date.  Also handles
+         interrupts.
+
+comment "MII PHY device drivers"
+       depends on PHYLIB
+
+config MARVELL_PHY
+       tristate "Drivers for Marvell PHYs"
+       depends on PHYLIB
+       ---help---
+         Currently has a driver for the 88E1011S
+       
+config DAVICOM_PHY
+       tristate "Drivers for Davicom PHYs"
+       depends on PHYLIB
+       ---help---
+         Currently supports dm9161e and dm9131
+
+config QSEMI_PHY
+       tristate "Drivers for Quality Semiconductor PHYs"
+       depends on PHYLIB
+       ---help---
+         Currently supports the qs6612
+
+config LXT_PHY
+       tristate "Drivers for the Intel LXT PHYs"
+       depends on PHYLIB
+       ---help---
+         Currently supports the lxt970, lxt971
+
+config CICADA_PHY
+       tristate "Drivers for the Cicada PHYs"
+       depends on PHYLIB
+       ---help---
+         Currently supports the cis8204
+
+endmenu
+
diff --git a/drivers/net/phy/Makefile b/drivers/net/phy/Makefile

new file mode 100644 (file)

index 0000000..e4116a5
--- /dev/null
+++ b/drivers/net/phy/Makefile
@@ -0,0 +1,10 @@
+# Makefile for Linux PHY drivers
+
+libphy-objs                    := phy.o phy_device.o mdio_bus.o
+
+obj-$(CONFIG_PHYLIB)           += libphy.o
+obj-$(CONFIG_MARVELL_PHY)      += marvell.o
+obj-$(CONFIG_DAVICOM_PHY)      += davicom.o
+obj-$(CONFIG_CICADA_PHY)       += cicada.o
+obj-$(CONFIG_LXT_PHY)          += lxt.o
+obj-$(CONFIG_QSEMI_PHY)                += qsemi.o
diff --git a/drivers/net/phy/cicada.c b/drivers/net/phy/cicada.c

new file mode 100644 (file)

index 0000000..c47fb2e
--- /dev/null
+++ b/drivers/net/phy/cicada.c
@@ -0,0 +1,134 @@
+/*
+ * drivers/net/phy/cicada.c
+ *
+ * Driver for Cicada PHYs
+ *
+ * Author: Andy Fleming
+ *
+ * Copyright (c) 2004 Freescale Semiconductor, Inc.
+ *
+ * This program is free software; you can redistribute  it and/or modify it
+ * under  the terms of  the GNU General  Public License as published by the
+ * Free Software Foundation;  either version 2 of the  License, or (at your
+ * option) any later version.
+ *
+ */
+#include <linux/config.h>
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/string.h>
+#include <linux/errno.h>
+#include <linux/unistd.h>
+#include <linux/slab.h>
+#include <linux/interrupt.h>
+#include <linux/init.h>
+#include <linux/delay.h>
+#include <linux/netdevice.h>
+#include <linux/etherdevice.h>
+#include <linux/skbuff.h>
+#include <linux/spinlock.h>
+#include <linux/mm.h>
+#include <linux/module.h>
+#include <linux/version.h>
+#include <linux/mii.h>
+#include <linux/ethtool.h>
+#include <linux/phy.h>
+
+#include <asm/io.h>
+#include <asm/irq.h>
+#include <asm/uaccess.h>
+
+/* Cicada Extended Control Register 1 */
+#define MII_CIS8201_EXT_CON1           0x17
+#define MII_CIS8201_EXTCON1_INIT       0x0000
+
+/* Cicada Interrupt Mask Register */
+#define MII_CIS8201_IMASK              0x19
+#define MII_CIS8201_IMASK_IEN          0x8000
+#define MII_CIS8201_IMASK_SPEED        0x4000
+#define MII_CIS8201_IMASK_LINK         0x2000
+#define MII_CIS8201_IMASK_DUPLEX       0x1000
+#define MII_CIS8201_IMASK_MASK         0xf000
+
+/* Cicada Interrupt Status Register */
+#define MII_CIS8201_ISTAT              0x1a
+#define MII_CIS8201_ISTAT_STATUS       0x8000
+#define MII_CIS8201_ISTAT_SPEED        0x4000
+#define MII_CIS8201_ISTAT_LINK         0x2000
+#define MII_CIS8201_ISTAT_DUPLEX       0x1000
+
+/* Cicada Auxiliary Control/Status Register */
+#define MII_CIS8201_AUX_CONSTAT        0x1c
+#define MII_CIS8201_AUXCONSTAT_INIT    0x0004
+#define MII_CIS8201_AUXCONSTAT_DUPLEX  0x0020
+#define MII_CIS8201_AUXCONSTAT_SPEED   0x0018
+#define MII_CIS8201_AUXCONSTAT_GBIT    0x0010
+#define MII_CIS8201_AUXCONSTAT_100     0x0008
+
+MODULE_DESCRIPTION("Cicadia PHY driver");
+MODULE_AUTHOR("Andy Fleming");
+MODULE_LICENSE("GPL");
+
+static int cis820x_config_init(struct phy_device *phydev)
+{
+       int err;
+
+       err = phy_write(phydev, MII_CIS8201_AUX_CONSTAT,
+                       MII_CIS8201_AUXCONSTAT_INIT);
+
+       if (err < 0)
+               return err;
+
+       err = phy_write(phydev, MII_CIS8201_EXT_CON1,
+                       MII_CIS8201_EXTCON1_INIT);
+
+       return err;
+}
+
+static int cis820x_ack_interrupt(struct phy_device *phydev)
+{
+       int err = phy_read(phydev, MII_CIS8201_ISTAT);
+
+       return (err < 0) ? err : 0;
+}
+
+static int cis820x_config_intr(struct phy_device *phydev)
+{
+       int err;
+
+       if(phydev->interrupts == PHY_INTERRUPT_ENABLED)
+               err = phy_write(phydev, MII_CIS8201_IMASK, 
+                               MII_CIS8201_IMASK_MASK);
+       else
+               err = phy_write(phydev, MII_CIS8201_IMASK, 0);
+
+       return err;
+}
+
+/* Cicada 820x */
+static struct phy_driver cis8204_driver = {
+       .phy_id         = 0x000fc440,
+       .name           = "Cicada Cis8204",
+       .phy_id_mask    = 0x000fffc0,
+       .features       = PHY_GBIT_FEATURES,
+       .flags          = PHY_HAS_INTERRUPT,
+       .config_init    = &cis820x_config_init,
+       .config_aneg    = &genphy_config_aneg,
+       .read_status    = &genphy_read_status,
+       .ack_interrupt  = &cis820x_ack_interrupt,
+       .config_intr    = &cis820x_config_intr,
+       .driver         = { .owner = THIS_MODULE,},
+};
+
+static int __init cis8204_init(void)
+{
+       return phy_driver_register(&cis8204_driver);
+}
+
+static void __exit cis8204_exit(void)
+{
+       phy_driver_unregister(&cis8204_driver);
+}
+
+module_init(cis8204_init);
+module_exit(cis8204_exit);
diff --git a/drivers/net/phy/davicom.c b/drivers/net/phy/davicom.c

new file mode 100644 (file)

index 0000000..6caf499
--- /dev/null
+++ b/drivers/net/phy/davicom.c
@@ -0,0 +1,195 @@
+/*
+ * drivers/net/phy/davicom.c
+ *
+ * Driver for Davicom PHYs
+ *
+ * Author: Andy Fleming
+ *
+ * Copyright (c) 2004 Freescale Semiconductor, Inc.
+ *
+ * This program is free software; you can redistribute  it and/or modify it
+ * under  the terms of  the GNU General  Public License as published by the
+ * Free Software Foundation;  either version 2 of the  License, or (at your
+ * option) any later version.
+ *
+ */
+#include <linux/config.h>
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/string.h>
+#include <linux/errno.h>
+#include <linux/unistd.h>
+#include <linux/slab.h>
+#include <linux/interrupt.h>
+#include <linux/init.h>
+#include <linux/delay.h>
+#include <linux/netdevice.h>
+#include <linux/etherdevice.h>
+#include <linux/skbuff.h>
+#include <linux/spinlock.h>
+#include <linux/mm.h>
+#include <linux/module.h>
+#include <linux/version.h>
+#include <linux/mii.h>
+#include <linux/ethtool.h>
+#include <linux/phy.h>
+
+#include <asm/io.h>
+#include <asm/irq.h>
+#include <asm/uaccess.h>
+
+#define MII_DM9161_SCR         0x10
+#define MII_DM9161_SCR_INIT    0x0610
+
+/* DM9161 Interrupt Register */
+#define MII_DM9161_INTR        0x15
+#define MII_DM9161_INTR_PEND           0x8000
+#define MII_DM9161_INTR_DPLX_MASK      0x0800
+#define MII_DM9161_INTR_SPD_MASK       0x0400
+#define MII_DM9161_INTR_LINK_MASK      0x0200
+#define MII_DM9161_INTR_MASK           0x0100
+#define MII_DM9161_INTR_DPLX_CHANGE    0x0010
+#define MII_DM9161_INTR_SPD_CHANGE     0x0008
+#define MII_DM9161_INTR_LINK_CHANGE    0x0004
+#define MII_DM9161_INTR_INIT           0x0000
+#define MII_DM9161_INTR_STOP   \
+(MII_DM9161_INTR_DPLX_MASK | MII_DM9161_INTR_SPD_MASK \
+ | MII_DM9161_INTR_LINK_MASK | MII_DM9161_INTR_MASK)
+
+/* DM9161 10BT Configuration/Status */
+#define MII_DM9161_10BTCSR     0x12
+#define MII_DM9161_10BTCSR_INIT        0x7800
+
+MODULE_DESCRIPTION("Davicom PHY driver");
+MODULE_AUTHOR("Andy Fleming");
+MODULE_LICENSE("GPL");
+
+
+#define DM9161_DELAY 1
+static int dm9161_config_intr(struct phy_device *phydev)
+{
+       int temp;
+
+       temp = phy_read(phydev, MII_DM9161_INTR);
+
+       if (temp < 0)
+               return temp;
+
+       if(PHY_INTERRUPT_ENABLED == phydev->interrupts )
+               temp &= ~(MII_DM9161_INTR_STOP);
+       else
+               temp |= MII_DM9161_INTR_STOP;
+
+       temp = phy_write(phydev, MII_DM9161_INTR, temp);
+
+       return temp;
+}
+
+static int dm9161_config_aneg(struct phy_device *phydev)
+{
+       int err;
+
+       /* Isolate the PHY */
+       err = phy_write(phydev, MII_BMCR, BMCR_ISOLATE);
+
+       if (err < 0)
+               return err;
+
+       /* Configure the new settings */
+       err = genphy_config_aneg(phydev);
+
+       if (err < 0)
+               return err;
+
+       return 0;
+}
+
+static int dm9161_config_init(struct phy_device *phydev)
+{
+       int err;
+
+       /* Isolate the PHY */
+       err = phy_write(phydev, MII_BMCR, BMCR_ISOLATE);
+
+       if (err < 0)
+               return err;
+
+       /* Do not bypass the scrambler/descrambler */
+       err = phy_write(phydev, MII_DM9161_SCR, MII_DM9161_SCR_INIT);
+
+       if (err < 0)
+               return err;
+
+       /* Clear 10BTCSR to default */
+       err = phy_write(phydev, MII_DM9161_10BTCSR, MII_DM9161_10BTCSR_INIT);
+
+       if (err < 0)
+               return err;
+
+       /* Reconnect the PHY, and enable Autonegotiation */
+       err = phy_write(phydev, MII_BMCR, BMCR_ANENABLE);
+
+       if (err < 0)
+               return err;
+
+       return 0;
+}
+
+static int dm9161_ack_interrupt(struct phy_device *phydev)
+{
+       int err = phy_read(phydev, MII_DM9161_INTR);
+
+       return (err < 0) ? err : 0;
+}
+
+static struct phy_driver dm9161_driver = {
+       .phy_id         = 0x0181b880,
+       .name           = "Davicom DM9161E",
+       .phy_id_mask    = 0x0ffffff0,
+       .features       = PHY_BASIC_FEATURES,
+       .config_init    = dm9161_config_init,
+       .config_aneg    = dm9161_config_aneg,
+       .read_status    = genphy_read_status,
+       .driver         = { .owner = THIS_MODULE,},
+};
+
+static struct phy_driver dm9131_driver = {
+       .phy_id         = 0x00181b80,
+       .name           = "Davicom DM9131",
+       .phy_id_mask    = 0x0ffffff0,
+       .features       = PHY_BASIC_FEATURES,
+       .flags          = PHY_HAS_INTERRUPT,
+       .config_aneg    = genphy_config_aneg,
+       .read_status    = genphy_read_status,
+       .ack_interrupt  = dm9161_ack_interrupt,
+       .config_intr    = dm9161_config_intr,
+       .driver         = { .owner = THIS_MODULE,},
+};
+
+static int __init davicom_init(void)
+{
+       int ret;
+
+       ret = phy_driver_register(&dm9161_driver);
+       if (ret)
+               goto err1;
+
+       ret = phy_driver_register(&dm9131_driver);
+       if (ret)
+               goto err2;
+       return 0;
+
+ err2: 
+       phy_driver_unregister(&dm9161_driver);
+ err1:
+       return ret;
+}
+
+static void __exit davicom_exit(void)
+{
+       phy_driver_unregister(&dm9161_driver);
+       phy_driver_unregister(&dm9131_driver);
+}
+
+module_init(davicom_init);
+module_exit(davicom_exit);
diff --git a/drivers/net/phy/lxt.c b/drivers/net/phy/lxt.c

new file mode 100644 (file)

index 0000000..4c84044
--- /dev/null
+++ b/drivers/net/phy/lxt.c
@@ -0,0 +1,179 @@
+/*
+ * drivers/net/phy/lxt.c
+ *
+ * Driver for Intel LXT PHYs
+ *
+ * Author: Andy Fleming
+ *
+ * Copyright (c) 2004 Freescale Semiconductor, Inc.
+ *
+ * This program is free software; you can redistribute  it and/or modify it
+ * under  the terms of  the GNU General  Public License as published by the
+ * Free Software Foundation;  either version 2 of the  License, or (at your
+ * option) any later version.
+ *
+ */
+#include <linux/config.h>
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/string.h>
+#include <linux/errno.h>
+#include <linux/unistd.h>
+#include <linux/slab.h>
+#include <linux/interrupt.h>
+#include <linux/init.h>
+#include <linux/delay.h>
+#include <linux/netdevice.h>
+#include <linux/etherdevice.h>
+#include <linux/skbuff.h>
+#include <linux/spinlock.h>
+#include <linux/mm.h>
+#include <linux/module.h>
+#include <linux/version.h>
+#include <linux/mii.h>
+#include <linux/ethtool.h>
+#include <linux/phy.h>
+
+#include <asm/io.h>
+#include <asm/irq.h>
+#include <asm/uaccess.h>
+
+/* The Level one LXT970 is used by many boards                              */
+
+#define MII_LXT970_IER       17  /* Interrupt Enable Register */
+
+#define MII_LXT970_IER_IEN     0x0002
+
+#define MII_LXT970_ISR       18  /* Interrupt Status Register */
+
+#define MII_LXT970_CONFIG    19  /* Configuration Register    */
+
+/* ------------------------------------------------------------------------- */
+/* The Level one LXT971 is used on some of my custom boards                  */
+
+/* register definitions for the 971 */
+#define MII_LXT971_IER         18  /* Interrupt Enable Register */
+#define MII_LXT971_IER_IEN     0x00f2
+
+#define MII_LXT971_ISR         19  /* Interrupt Status Register */
+
+
+MODULE_DESCRIPTION("Intel LXT PHY driver");
+MODULE_AUTHOR("Andy Fleming");
+MODULE_LICENSE("GPL");
+
+static int lxt970_ack_interrupt(struct phy_device *phydev)
+{
+       int err;
+
+       err = phy_read(phydev, MII_BMSR);
+
+       if (err < 0)
+               return err;
+
+       err = phy_read(phydev, MII_LXT970_ISR);
+
+       if (err < 0)
+               return err;
+
+       return 0;
+}
+
+static int lxt970_config_intr(struct phy_device *phydev)
+{
+       int err;
+
+       if(phydev->interrupts == PHY_INTERRUPT_ENABLED)
+               err = phy_write(phydev, MII_LXT970_IER, MII_LXT970_IER_IEN);
+       else
+               err = phy_write(phydev, MII_LXT970_IER, 0);
+
+       return err;
+}
+
+static int lxt970_config_init(struct phy_device *phydev)
+{
+       int err;
+
+       err = phy_write(phydev, MII_LXT970_CONFIG, 0);
+
+       return err;
+}
+
+
+static int lxt971_ack_interrupt(struct phy_device *phydev)
+{
+       int err = phy_read(phydev, MII_LXT971_ISR);
+
+       if (err < 0)
+               return err;
+
+       return 0;
+}
+
+static int lxt971_config_intr(struct phy_device *phydev)
+{
+       int err;
+
+       if(phydev->interrupts == PHY_INTERRUPT_ENABLED)
+               err = phy_write(phydev, MII_LXT971_IER, MII_LXT971_IER_IEN);
+       else
+               err = phy_write(phydev, MII_LXT971_IER, 0);
+
+       return err;
+}
+
+static struct phy_driver lxt970_driver = {
+       .phy_id         = 0x07810000,
+       .name           = "LXT970",
+       .phy_id_mask    = 0x0fffffff,
+       .features       = PHY_BASIC_FEATURES,
+       .flags          = PHY_HAS_INTERRUPT,
+       .config_init    = lxt970_config_init,
+       .config_aneg    = genphy_config_aneg,
+       .read_status    = genphy_read_status,
+       .ack_interrupt  = lxt970_ack_interrupt,
+       .config_intr    = lxt970_config_intr,
+       .driver         = { .owner = THIS_MODULE,},
+};
+
+static struct phy_driver lxt971_driver = {
+       .phy_id         = 0x0001378e,
+       .name           = "LXT971",
+       .phy_id_mask    = 0x0fffffff,
+       .features       = PHY_BASIC_FEATURES,
+       .flags          = PHY_HAS_INTERRUPT,
+       .config_aneg    = genphy_config_aneg,
+       .read_status    = genphy_read_status,
+       .ack_interrupt  = lxt971_ack_interrupt,
+       .config_intr    = lxt971_config_intr,
+       .driver         = { .owner = THIS_MODULE,},
+};
+
+static int __init lxt_init(void)
+{
+       int ret;
+
+       ret = phy_driver_register(&lxt970_driver);
+       if (ret)
+               goto err1;
+
+       ret = phy_driver_register(&lxt971_driver);
+       if (ret)
+               goto err2;
+       return 0;
+
+ err2: 
+       phy_driver_unregister(&lxt970_driver);
+ err1:
+       return ret;
+}
+
+static void __exit lxt_exit(void)
+{
+       phy_driver_unregister(&lxt970_driver);
+       phy_driver_unregister(&lxt971_driver);
+}
+
+module_init(lxt_init);
+module_exit(lxt_exit);
diff --git a/drivers/net/phy/marvell.c b/drivers/net/phy/marvell.c

new file mode 100644 (file)

index 0000000..4a72b02
--- /dev/null
+++ b/drivers/net/phy/marvell.c
@@ -0,0 +1,140 @@
+/*
+ * drivers/net/phy/marvell.c
+ *
+ * Driver for Marvell PHYs
+ *
+ * Author: Andy Fleming
+ *
+ * Copyright (c) 2004 Freescale Semiconductor, Inc.
+ *
+ * This program is free software; you can redistribute  it and/or modify it
+ * under  the terms of  the GNU General  Public License as published by the
+ * Free Software Foundation;  either version 2 of the  License, or (at your
+ * option) any later version.
+ *
+ */
+#include <linux/config.h>
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/string.h>
+#include <linux/errno.h>
+#include <linux/unistd.h>
+#include <linux/slab.h>
+#include <linux/interrupt.h>
+#include <linux/init.h>
+#include <linux/delay.h>
+#include <linux/netdevice.h>
+#include <linux/etherdevice.h>
+#include <linux/skbuff.h>
+#include <linux/spinlock.h>
+#include <linux/mm.h>
+#include <linux/module.h>
+#include <linux/version.h>
+#include <linux/mii.h>
+#include <linux/ethtool.h>
+#include <linux/phy.h>
+
+#include <asm/io.h>
+#include <asm/irq.h>
+#include <asm/uaccess.h>
+
+#define MII_M1011_IEVENT               0x13
+#define MII_M1011_IEVENT_CLEAR         0x0000
+
+#define MII_M1011_IMASK                        0x12
+#define MII_M1011_IMASK_INIT           0x6400
+#define MII_M1011_IMASK_CLEAR          0x0000
+
+MODULE_DESCRIPTION("Marvell PHY driver");
+MODULE_AUTHOR("Andy Fleming");
+MODULE_LICENSE("GPL");
+
+static int marvell_ack_interrupt(struct phy_device *phydev)
+{
+       int err;
+
+       /* Clear the interrupts by reading the reg */
+       err = phy_read(phydev, MII_M1011_IEVENT);
+
+       if (err < 0)
+               return err;
+
+       return 0;
+}
+
+static int marvell_config_intr(struct phy_device *phydev)
+{
+       int err;
+
+       if(phydev->interrupts == PHY_INTERRUPT_ENABLED)
+               err = phy_write(phydev, MII_M1011_IMASK, MII_M1011_IMASK_INIT);
+       else
+               err = phy_write(phydev, MII_M1011_IMASK, MII_M1011_IMASK_CLEAR);
+
+       return err;
+}
+
+static int marvell_config_aneg(struct phy_device *phydev)
+{
+       int err;
+
+       /* The Marvell PHY has an errata which requires
+        * that certain registers get written in order
+        * to restart autonegotiation */
+       err = phy_write(phydev, MII_BMCR, BMCR_RESET);
+
+       if (err < 0)
+               return err;
+
+       err = phy_write(phydev, 0x1d, 0x1f);
+       if (err < 0)
+               return err;
+
+       err = phy_write(phydev, 0x1e, 0x200c);
+       if (err < 0)
+               return err;
+
+       err = phy_write(phydev, 0x1d, 0x5);
+       if (err < 0)
+               return err;
+
+       err = phy_write(phydev, 0x1e, 0);
+       if (err < 0)
+               return err;
+
+       err = phy_write(phydev, 0x1e, 0x100);
+       if (err < 0)
+               return err;
+
+
+       err = genphy_config_aneg(phydev);
+
+       return err;
+}
+
+
+static struct phy_driver m88e1101_driver = {
+       .phy_id         = 0x01410c00,
+       .phy_id_mask    = 0xffffff00,
+       .name           = "Marvell 88E1101",
+       .features       = PHY_GBIT_FEATURES,
+       .flags          = PHY_HAS_INTERRUPT,
+       .config_aneg    = &marvell_config_aneg,
+       .read_status    = &genphy_read_status,
+       .ack_interrupt  = &marvell_ack_interrupt,
+       .config_intr    = &marvell_config_intr,
+       .driver         = { .owner = THIS_MODULE,},
+};
+
+static int __init marvell_init(void)
+{
+       return phy_driver_register(&m88e1101_driver);
+}
+
+static void __exit marvell_exit(void)
+{
+       phy_driver_unregister(&m88e1101_driver);
+}
+
+module_init(marvell_init);
+module_exit(marvell_exit);
diff --git a/drivers/net/phy/mdio_bus.c b/drivers/net/phy/mdio_bus.c

new file mode 100644 (file)

index 0000000..41f62c0
--- /dev/null
+++ b/drivers/net/phy/mdio_bus.c
@@ -0,0 +1,176 @@
+/*
+ * drivers/net/phy/mdio_bus.c
+ *
+ * MDIO Bus interface
+ *
+ * Author: Andy Fleming
+ *
+ * Copyright (c) 2004 Freescale Semiconductor, Inc.
+ *
+ * This program is free software; you can redistribute  it and/or modify it
+ * under  the terms of  the GNU General  Public License as published by the
+ * Free Software Foundation;  either version 2 of the  License, or (at your
+ * option) any later version.
+ *
+ */
+#include <linux/config.h>
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/string.h>
+#include <linux/errno.h>
+#include <linux/unistd.h>
+#include <linux/slab.h>
+#include <linux/interrupt.h>
+#include <linux/init.h>
+#include <linux/delay.h>
+#include <linux/netdevice.h>
+#include <linux/etherdevice.h>
+#include <linux/skbuff.h>
+#include <linux/spinlock.h>
+#include <linux/mm.h>
+#include <linux/module.h>
+#include <linux/version.h>
+#include <linux/mii.h>
+#include <linux/ethtool.h>
+#include <linux/phy.h>
+
+#include <asm/io.h>
+#include <asm/irq.h>
+#include <asm/uaccess.h>
+
+/* mdiobus_register 
+ *
+ * description: Called by a bus driver to bring up all the PHYs
+ *   on a given bus, and attach them to the bus
+ */
+int mdiobus_register(struct mii_bus *bus)
+{
+       int i;
+       int err = 0;
+
+       spin_lock_init(&bus->mdio_lock);
+
+       if (NULL == bus || NULL == bus->name ||
+                       NULL == bus->read ||
+                       NULL == bus->write)
+               return -EINVAL;
+
+       if (bus->reset)
+               bus->reset(bus);
+
+       for (i = 0; i < PHY_MAX_ADDR; i++) {
+               struct phy_device *phydev;
+
+               phydev = get_phy_device(bus, i);
+
+               if (IS_ERR(phydev))
+                       return PTR_ERR(phydev);
+
+               /* There's a PHY at this address
+                * We need to set:
+                * 1) IRQ
+                * 2) bus_id
+                * 3) parent
+                * 4) bus
+                * 5) mii_bus
+                * And, we need to register it */
+               if (phydev) {
+                       phydev->irq = bus->irq[i];
+
+                       phydev->dev.parent = bus->dev;
+                       phydev->dev.bus = &mdio_bus_type;
+                       sprintf(phydev->dev.bus_id, "phy%d:%d", bus->id, i);
+
+                       phydev->bus = bus;
+
+                       err = device_register(&phydev->dev);
+
+                       if (err)
+                               printk(KERN_ERR "phy %d failed to register\n",
+                                               i);
+               }
+
+               bus->phy_map[i] = phydev;
+       }
+
+       pr_info("%s: probed\n", bus->name);
+
+       return err;
+}
+EXPORT_SYMBOL(mdiobus_register);
+
+void mdiobus_unregister(struct mii_bus *bus)
+{
+       int i;
+
+       for (i = 0; i < PHY_MAX_ADDR; i++) {
+               if (bus->phy_map[i]) {
+                       device_unregister(&bus->phy_map[i]->dev);
+                       kfree(bus->phy_map[i]);
+               }
+       }
+}
+EXPORT_SYMBOL(mdiobus_unregister);
+
+/* mdio_bus_match
+ *
+ * description: Given a PHY device, and a PHY driver, return 1 if
+ *   the driver supports the device.  Otherwise, return 0
+ */
+static int mdio_bus_match(struct device *dev, struct device_driver *drv)
+{
+       struct phy_device *phydev = to_phy_device(dev);
+       struct phy_driver *phydrv = to_phy_driver(drv);
+
+       return (phydrv->phy_id == (phydev->phy_id & phydrv->phy_id_mask));
+}
+
+/* Suspend and resume.  Copied from platform_suspend and
+ * platform_resume
+ */
+static int mdio_bus_suspend(struct device * dev, u32 state)
+{
+       int ret = 0;
+       struct device_driver *drv = dev->driver;
+
+       if (drv && drv->suspend) {
+               ret = drv->suspend(dev, state, SUSPEND_DISABLE);
+               if (ret == 0)
+                       ret = drv->suspend(dev, state, SUSPEND_SAVE_STATE);
+               if (ret == 0)
+                       ret = drv->suspend(dev, state, SUSPEND_POWER_DOWN);
+       }
+       return ret;
+}
+
+static int mdio_bus_resume(struct device * dev)
+{
+       int ret = 0;
+       struct device_driver *drv = dev->driver;
+
+       if (drv && drv->resume) {
+               ret = drv->resume(dev, RESUME_POWER_ON);
+               if (ret == 0)
+                       ret = drv->resume(dev, RESUME_RESTORE_STATE);
+               if (ret == 0)
+                       ret = drv->resume(dev, RESUME_ENABLE);
+       }
+       return ret;
+}
+
+struct bus_type mdio_bus_type = {
+       .name           = "mdio_bus",
+       .match          = mdio_bus_match,
+       .suspend        = mdio_bus_suspend,
+       .resume         = mdio_bus_resume,
+};
+
+int __init mdio_bus_init(void)
+{
+       return bus_register(&mdio_bus_type);
+}
+
+void __exit mdio_bus_exit(void)
+{
+       bus_unregister(&mdio_bus_type);
+}
diff --git a/drivers/net/phy/phy.c b/drivers/net/phy/phy.c

new file mode 100644 (file)

index 0000000..d9e11f9
--- /dev/null
+++ b/drivers/net/phy/phy.c
@@ -0,0 +1,871 @@
+/*
+ * drivers/net/phy/phy.c
+ *
+ * Framework for configuring and reading PHY devices
+ * Based on code in sungem_phy.c and gianfar_phy.c
+ *
+ * Author: Andy Fleming
+ *
+ * Copyright (c) 2004 Freescale Semiconductor, Inc.
+ *
+ * This program is free software; you can redistribute  it and/or modify it
+ * under  the terms of  the GNU General  Public License as published by the
+ * Free Software Foundation;  either version 2 of the  License, or (at your
+ * option) any later version.
+ *
+ */
+#include <linux/config.h>
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/string.h>
+#include <linux/errno.h>
+#include <linux/unistd.h>
+#include <linux/slab.h>
+#include <linux/interrupt.h>
+#include <linux/init.h>
+#include <linux/delay.h>
+#include <linux/netdevice.h>
+#include <linux/etherdevice.h>
+#include <linux/skbuff.h>
+#include <linux/spinlock.h>
+#include <linux/mm.h>
+#include <linux/module.h>
+#include <linux/version.h>
+#include <linux/mii.h>
+#include <linux/ethtool.h>
+#include <linux/phy.h>
+
+#include <asm/io.h>
+#include <asm/irq.h>
+#include <asm/uaccess.h>
+
+/* Convenience function to print out the current phy status
+ */
+void phy_print_status(struct phy_device *phydev)
+{
+       pr_info("%s: Link is %s", phydev->dev.bus_id,
+                       phydev->link ? "Up" : "Down");
+       if (phydev->link)
+               printk(" - %d/%s", phydev->speed,
+                               DUPLEX_FULL == phydev->duplex ?
+                               "Full" : "Half");
+
+       printk("\n");
+}
+EXPORT_SYMBOL(phy_print_status);
+
+
+/* Convenience functions for reading/writing a given PHY
+ * register. They MUST NOT be called from interrupt context,
+ * because the bus read/write functions may wait for an interrupt
+ * to conclude the operation. */
+int phy_read(struct phy_device *phydev, u16 regnum)
+{
+       int retval;
+       struct mii_bus *bus = phydev->bus;
+
+       spin_lock_bh(&bus->mdio_lock);
+       retval = bus->read(bus, phydev->addr, regnum);
+       spin_unlock_bh(&bus->mdio_lock);
+
+       return retval;
+}
+EXPORT_SYMBOL(phy_read);
+
+int phy_write(struct phy_device *phydev, u16 regnum, u16 val)
+{
+       int err;
+       struct mii_bus *bus = phydev->bus;
+
+       spin_lock_bh(&bus->mdio_lock);
+       err = bus->write(bus, phydev->addr, regnum, val);
+       spin_unlock_bh(&bus->mdio_lock);
+
+       return err;
+}
+EXPORT_SYMBOL(phy_write);
+
+
+int phy_clear_interrupt(struct phy_device *phydev)
+{
+       int err = 0;
+
+       if (phydev->drv->ack_interrupt)
+               err = phydev->drv->ack_interrupt(phydev);
+
+       return err;
+}
+
+
+int phy_config_interrupt(struct phy_device *phydev, u32 interrupts)
+{
+       int err = 0;
+
+       phydev->interrupts = interrupts;
+       if (phydev->drv->config_intr)
+               err = phydev->drv->config_intr(phydev);
+
+       return err;
+}
+
+
+/* phy_aneg_done
+ *
+ * description: Reads the status register and returns 0 either if
+ *   auto-negotiation is incomplete, or if there was an error.
+ *   Returns BMSR_ANEGCOMPLETE if auto-negotiation is done.
+ */
+static inline int phy_aneg_done(struct phy_device *phydev)
+{
+       int retval;
+
+       retval = phy_read(phydev, MII_BMSR);
+
+       return (retval < 0) ? retval : (retval & BMSR_ANEGCOMPLETE);
+}
+
+/* A structure for mapping a particular speed and duplex
+ * combination to a particular SUPPORTED and ADVERTISED value */
+struct phy_setting {
+       int speed;
+       int duplex;
+       u32 setting;
+};
+
+/* A mapping of all SUPPORTED settings to speed/duplex */
+static struct phy_setting settings[] = {
+       {
+               .speed = 10000,
+               .duplex = DUPLEX_FULL,
+               .setting = SUPPORTED_10000baseT_Full,
+       },
+       {
+               .speed = SPEED_1000,
+               .duplex = DUPLEX_FULL,
+               .setting = SUPPORTED_1000baseT_Full,
+       },
+       {
+               .speed = SPEED_1000,
+               .duplex = DUPLEX_HALF,
+               .setting = SUPPORTED_1000baseT_Half,
+       },
+       {
+               .speed = SPEED_100,
+               .duplex = DUPLEX_FULL,
+               .setting = SUPPORTED_100baseT_Full,
+       },
+       {
+               .speed = SPEED_100,
+               .duplex = DUPLEX_HALF,
+               .setting = SUPPORTED_100baseT_Half,
+       },
+       {
+               .speed = SPEED_10,
+               .duplex = DUPLEX_FULL,
+               .setting = SUPPORTED_10baseT_Full,
+       },
+       {
+               .speed = SPEED_10,
+               .duplex = DUPLEX_HALF,
+               .setting = SUPPORTED_10baseT_Half,
+       },
+};
+
+#define MAX_NUM_SETTINGS (sizeof(settings)/sizeof(struct phy_setting))
+
+/* phy_find_setting
+ *
+ * description: Searches the settings array for the setting which
+ *   matches the desired speed and duplex, and returns the index
+ *   of that setting.  Returns the index of the last setting if
+ *   none of the others match.
+ */
+static inline int phy_find_setting(int speed, int duplex)
+{
+       int idx = 0;
+
+       while (idx < ARRAY_SIZE(settings) &&
+                       (settings[idx].speed != speed ||
+                       settings[idx].duplex != duplex))
+               idx++;
+
+       return idx < MAX_NUM_SETTINGS ? idx : MAX_NUM_SETTINGS - 1;
+}
+
+/* phy_find_valid
+ * idx: The first index in settings[] to search
+ * features: A mask of the valid settings
+ *
+ * description: Returns the index of the first valid setting less
+ *   than or equal to the one pointed to by idx, as determined by
+ *   the mask in features.  Returns the index of the last setting
+ *   if nothing else matches.
+ */
+static inline int phy_find_valid(int idx, u32 features)
+{
+       while (idx < MAX_NUM_SETTINGS && !(settings[idx].setting & features))
+               idx++;
+
+       return idx < MAX_NUM_SETTINGS ? idx : MAX_NUM_SETTINGS - 1;
+}
+
+/* phy_sanitize_settings
+ *
+ * description: Make sure the PHY is set to supported speeds and
+ *   duplexes.  Drop down by one in this order:  1000/FULL,
+ *   1000/HALF, 100/FULL, 100/HALF, 10/FULL, 10/HALF
+ */
+void phy_sanitize_settings(struct phy_device *phydev)
+{
+       u32 features = phydev->supported;
+       int idx;
+
+       /* Sanitize settings based on PHY capabilities */
+       if ((features & SUPPORTED_Autoneg) == 0)
+               phydev->autoneg = 0;
+
+       idx = phy_find_valid(phy_find_setting(phydev->speed, phydev->duplex),
+                       features);
+
+       phydev->speed = settings[idx].speed;
+       phydev->duplex = settings[idx].duplex;
+}
+EXPORT_SYMBOL(phy_sanitize_settings);
+
+/* phy_ethtool_sset:
+ * A generic ethtool sset function.  Handles all the details
+ *
+ * A few notes about parameter checking:
+ * - We don't set port or transceiver, so we don't care what they
+ *   were set to.
+ * - phy_start_aneg() will make sure forced settings are sane, and
+ *   choose the next best ones from the ones selected, so we don't
+ *   care if ethtool tries to give us bad values
+ *
+ * A note about the PHYCONTROL Layer.  If you turn off
+ * CONFIG_PHYCONTROL, you will need to read the PHY status
+ * registers after this function completes, and update your
+ * controller manually.
+ */
+int phy_ethtool_sset(struct phy_device *phydev, struct ethtool_cmd *cmd)
+{
+       if (cmd->phy_address != phydev->addr)
+               return -EINVAL;
+
+       /* We make sure that we don't pass unsupported
+        * values in to the PHY */
+       cmd->advertising &= phydev->supported;
+
+       /* Verify the settings we care about. */
+       if (cmd->autoneg != AUTONEG_ENABLE && cmd->autoneg != AUTONEG_DISABLE)
+               return -EINVAL;
+
+       if (cmd->autoneg == AUTONEG_ENABLE && cmd->advertising == 0)
+               return -EINVAL;
+
+       if (cmd->autoneg == AUTONEG_DISABLE
+                       && ((cmd->speed != SPEED_1000
+                                       && cmd->speed != SPEED_100
+                                       && cmd->speed != SPEED_10)
+                               || (cmd->duplex != DUPLEX_HALF
+                                       && cmd->duplex != DUPLEX_FULL)))
+               return -EINVAL;
+
+       phydev->autoneg = cmd->autoneg;
+
+       phydev->speed = cmd->speed;
+
+       phydev->advertising = cmd->advertising;
+
+       if (AUTONEG_ENABLE == cmd->autoneg)
+               phydev->advertising |= ADVERTISED_Autoneg;
+       else
+               phydev->advertising &= ~ADVERTISED_Autoneg;
+
+       phydev->duplex = cmd->duplex;
+
+       /* Restart the PHY */
+       phy_start_aneg(phydev);
+
+       return 0;
+}
+
+int phy_ethtool_gset(struct phy_device *phydev, struct ethtool_cmd *cmd)
+{
+       cmd->supported = phydev->supported;
+
+       cmd->advertising = phydev->advertising;
+
+       cmd->speed = phydev->speed;
+       cmd->duplex = phydev->duplex;
+       cmd->port = PORT_MII;
+       cmd->phy_address = phydev->addr;
+       cmd->transceiver = XCVR_EXTERNAL;
+       cmd->autoneg = phydev->autoneg;
+
+       return 0;
+}
+
+
+/* Note that this function is currently incompatible with the
+ * PHYCONTROL layer.  It changes registers without regard to
+ * current state.  Use at own risk
+ */
+int phy_mii_ioctl(struct phy_device *phydev,
+               struct mii_ioctl_data *mii_data, int cmd)
+{
+       u16 val = mii_data->val_in;
+
+       switch (cmd) {
+       case SIOCGMIIPHY:
+               mii_data->phy_id = phydev->addr;
+               break;
+       case SIOCGMIIREG:
+               mii_data->val_out = phy_read(phydev, mii_data->reg_num);
+               break;
+
+       case SIOCSMIIREG:
+               if (!capable(CAP_NET_ADMIN))
+                       return -EPERM;
+
+               if (mii_data->phy_id == phydev->addr) {
+                       switch(mii_data->reg_num) {
+                       case MII_BMCR:
+                               if (val & (BMCR_RESET|BMCR_ANENABLE))
+                                       phydev->autoneg = AUTONEG_DISABLE;
+                               else
+                                       phydev->autoneg = AUTONEG_ENABLE;
+                               if ((!phydev->autoneg) && (val & BMCR_FULLDPLX))
+                                       phydev->duplex = DUPLEX_FULL;
+                               else
+                                       phydev->duplex = DUPLEX_HALF;
+                               break;
+                       case MII_ADVERTISE:
+                               phydev->advertising = val;
+                               break;
+                       default:
+                               /* do nothing */
+                               break;
+                       }
+               }
+
+               phy_write(phydev, mii_data->reg_num, val);
+               
+               if (mii_data->reg_num == MII_BMCR 
+                               && val & BMCR_RESET
+                               && phydev->drv->config_init)
+                       phydev->drv->config_init(phydev);
+               break;
+       }
+
+       return 0;
+}
+
+/* phy_start_aneg
+ *
+ * description: Sanitizes the settings (if we're not
+ *   autonegotiating them), and then calls the driver's
+ *   config_aneg function.  If the PHYCONTROL Layer is operating,
+ *   we change the state to reflect the beginning of
+ *   Auto-negotiation or forcing.
+ */
+int phy_start_aneg(struct phy_device *phydev)
+{
+       int err;
+
+       spin_lock(&phydev->lock);
+
+       if (AUTONEG_DISABLE == phydev->autoneg)
+               phy_sanitize_settings(phydev);
+
+       err = phydev->drv->config_aneg(phydev);
+
+#ifdef CONFIG_PHYCONTROL
+       if (err < 0)
+               goto out_unlock;
+
+       if (phydev->state != PHY_HALTED) {
+               if (AUTONEG_ENABLE == phydev->autoneg) {
+                       phydev->state = PHY_AN;
+                       phydev->link_timeout = PHY_AN_TIMEOUT;
+               } else {
+                       phydev->state = PHY_FORCING;
+                       phydev->link_timeout = PHY_FORCE_TIMEOUT;
+               }
+       }
+
+out_unlock:
+#endif
+       spin_unlock(&phydev->lock);
+       return err;
+}
+EXPORT_SYMBOL(phy_start_aneg);
+
+
+#ifdef CONFIG_PHYCONTROL
+static void phy_change(void *data);
+static void phy_timer(unsigned long data);
+
+/* phy_start_machine:
+ *
+ * description: The PHY infrastructure can run a state machine
+ *   which tracks whether the PHY is starting up, negotiating,
+ *   etc.  This function starts the timer which tracks the state
+ *   of the PHY.  If you want to be notified when the state
+ *   changes, pass in the callback, otherwise, pass NULL.  If you
+ *   want to maintain your own state machine, do not call this
+ *   function. */
+void phy_start_machine(struct phy_device *phydev,
+               void (*handler)(struct net_device *))
+{
+       phydev->adjust_state = handler;
+
+       init_timer(&phydev->phy_timer);
+       phydev->phy_timer.function = &phy_timer;
+       phydev->phy_timer.data = (unsigned long) phydev;
+       mod_timer(&phydev->phy_timer, jiffies + HZ);
+}
+
+/* phy_stop_machine
+ *
+ * description: Stops the state machine timer, sets the state to
+ *   UP (unless it wasn't up yet), and then frees the interrupt,
+ *   if it is in use. This function must be called BEFORE
+ *   phy_detach.
+ */
+void phy_stop_machine(struct phy_device *phydev)
+{
+       del_timer_sync(&phydev->phy_timer);
+
+       spin_lock(&phydev->lock);
+       if (phydev->state > PHY_UP)
+               phydev->state = PHY_UP;
+       spin_unlock(&phydev->lock);
+
+       if (phydev->irq != PHY_POLL)
+               phy_stop_interrupts(phydev);
+
+       phydev->adjust_state = NULL;
+}
+
+/* phy_force_reduction
+ *
+ * description: Reduces the speed/duplex settings by
+ *   one notch.  The order is so:
+ *   1000/FULL, 1000/HALF, 100/FULL, 100/HALF,
+ *   10/FULL, 10/HALF.  The function bottoms out at 10/HALF.
+ */
+static void phy_force_reduction(struct phy_device *phydev)
+{
+       int idx;
+
+       idx = phy_find_setting(phydev->speed, phydev->duplex);
+       
+       idx++;
+
+       idx = phy_find_valid(idx, phydev->supported);
+
+       phydev->speed = settings[idx].speed;
+       phydev->duplex = settings[idx].duplex;
+
+       pr_info("Trying %d/%s\n", phydev->speed,
+                       DUPLEX_FULL == phydev->duplex ?
+                       "FULL" : "HALF");
+}
+
+
+/* phy_error:
+ *
+ * Moves the PHY to the HALTED state in response to a read
+ * or write error, and tells the controller the link is down.
+ * Must not be called from interrupt context, or while the
+ * phydev->lock is held.
+ */
+void phy_error(struct phy_device *phydev)
+{
+       spin_lock(&phydev->lock);
+       phydev->state = PHY_HALTED;
+       spin_unlock(&phydev->lock);
+}
+
+/* phy_interrupt
+ *
+ * description: When a PHY interrupt occurs, the handler disables
+ * interrupts, and schedules a work task to clear the interrupt.
+ */
+static irqreturn_t phy_interrupt(int irq, void *phy_dat, struct pt_regs *regs)
+{
+       struct phy_device *phydev = phy_dat;
+
+       /* The MDIO bus is not allowed to be written in interrupt
+        * context, so we need to disable the irq here.  A work
+        * queue will write the PHY to disable and clear the
+        * interrupt, and then reenable the irq line. */
+       disable_irq_nosync(irq);
+
+       schedule_work(&phydev->phy_queue);
+
+       return IRQ_HANDLED;
+}
+
+/* Enable the interrupts from the PHY side */
+int phy_enable_interrupts(struct phy_device *phydev)
+{
+       int err;
+
+       err = phy_clear_interrupt(phydev);
+
+       if (err < 0)
+               return err;
+
+       err = phy_config_interrupt(phydev, PHY_INTERRUPT_ENABLED);
+
+       return err;
+}
+EXPORT_SYMBOL(phy_enable_interrupts);
+
+/* Disable the PHY interrupts from the PHY side */
+int phy_disable_interrupts(struct phy_device *phydev)
+{
+       int err;
+
+       /* Disable PHY interrupts */
+       err = phy_config_interrupt(phydev, PHY_INTERRUPT_DISABLED);
+
+       if (err)
+               goto phy_err;
+
+       /* Clear the interrupt */
+       err = phy_clear_interrupt(phydev);
+
+       if (err)
+               goto phy_err;
+
+       return 0;
+
+phy_err:
+       phy_error(phydev);
+
+       return err;
+}
+EXPORT_SYMBOL(phy_disable_interrupts);
+
+/* phy_start_interrupts
+ *
+ * description: Request the interrupt for the given PHY.  If
+ *   this fails, then we set irq to PHY_POLL.
+ *   Otherwise, we enable the interrupts in the PHY.
+ *   Returns 0 on success.
+ *   This should only be called with a valid IRQ number.
+ */
+int phy_start_interrupts(struct phy_device *phydev)
+{
+       int err = 0;
+
+       INIT_WORK(&phydev->phy_queue, phy_change, phydev);
+
+       if (request_irq(phydev->irq, phy_interrupt,
+                               SA_SHIRQ,
+                               "phy_interrupt",
+                               phydev) < 0) {
+               printk(KERN_WARNING "%s: Can't get IRQ %d (PHY)\n",
+                               phydev->bus->name,
+                               phydev->irq);
+               phydev->irq = PHY_POLL;
+               return 0;
+       }
+
+       err = phy_enable_interrupts(phydev);
+
+       return err;
+}
+EXPORT_SYMBOL(phy_start_interrupts);
+
+int phy_stop_interrupts(struct phy_device *phydev)
+{
+       int err;
+
+       err = phy_disable_interrupts(phydev);
+
+       if (err)
+               phy_error(phydev);
+
+       free_irq(phydev->irq, phydev);
+
+       return err;
+}
+EXPORT_SYMBOL(phy_stop_interrupts);
+
+
+/* Scheduled by the phy_interrupt/timer to handle PHY changes */
+static void phy_change(void *data)
+{
+       int err;
+       struct phy_device *phydev = data;
+
+       err = phy_disable_interrupts(phydev);
+
+       if (err)
+               goto phy_err;
+
+       spin_lock(&phydev->lock);
+       if ((PHY_RUNNING == phydev->state) || (PHY_NOLINK == phydev->state))
+               phydev->state = PHY_CHANGELINK;
+       spin_unlock(&phydev->lock);
+
+       enable_irq(phydev->irq);
+
+       /* Reenable interrupts */
+       err = phy_config_interrupt(phydev, PHY_INTERRUPT_ENABLED);
+
+       if (err)
+               goto irq_enable_err;
+
+       return;
+
+irq_enable_err:
+       disable_irq(phydev->irq);
+phy_err:
+       phy_error(phydev);
+}
+
+/* Bring down the PHY link, and stop checking the status. */
+void phy_stop(struct phy_device *phydev)
+{
+       spin_lock(&phydev->lock);
+
+       if (PHY_HALTED == phydev->state)
+               goto out_unlock;
+
+       if (phydev->irq != PHY_POLL) {
+               /* Clear any pending interrupts */
+               phy_clear_interrupt(phydev);
+
+               /* Disable PHY Interrupts */
+               phy_config_interrupt(phydev, PHY_INTERRUPT_DISABLED);
+       }
+
+       phydev->state = PHY_HALTED;
+
+out_unlock:
+       spin_unlock(&phydev->lock);
+}
+
+
+/* phy_start
+ *
+ * description: Indicates the attached device's readiness to
+ *   handle PHY-related work.  Used during startup to start the
+ *   PHY, and after a call to phy_stop() to resume operation.
+ *   Also used to indicate the MDIO bus has cleared an error
+ *   condition.
+ */
+void phy_start(struct phy_device *phydev)
+{
+       spin_lock(&phydev->lock);
+
+       switch (phydev->state) {
+               case PHY_STARTING:
+                       phydev->state = PHY_PENDING;
+                       break;
+               case PHY_READY:
+                       phydev->state = PHY_UP;
+                       break;
+               case PHY_HALTED:
+                       phydev->state = PHY_RESUMING;
+               default:
+                       break;
+       }
+       spin_unlock(&phydev->lock);
+}
+EXPORT_SYMBOL(phy_stop);
+EXPORT_SYMBOL(phy_start);
+
+/* PHY timer which handles the state machine */
+static void phy_timer(unsigned long data)
+{
+       struct phy_device *phydev = (struct phy_device *)data;
+       int needs_aneg = 0;
+       int err = 0;
+
+       spin_lock(&phydev->lock);
+
+       if (phydev->adjust_state)
+               phydev->adjust_state(phydev->attached_dev);
+
+       switch(phydev->state) {
+               case PHY_DOWN:
+               case PHY_STARTING:
+               case PHY_READY:
+               case PHY_PENDING:
+                       break;
+               case PHY_UP:
+                       needs_aneg = 1;
+
+                       phydev->link_timeout = PHY_AN_TIMEOUT;
+
+                       break;
+               case PHY_AN:
+                       /* Check if negotiation is done.  Break
+                        * if there's an error */
+                       err = phy_aneg_done(phydev);
+                       if (err < 0)
+                               break;
+
+                       /* If auto-negotiation is done, we change to
+                        * either RUNNING, or NOLINK */
+                       if (err > 0) {
+                               err = phy_read_status(phydev);
+
+                               if (err)
+                                       break;
+
+                               if (phydev->link) {
+                                       phydev->state = PHY_RUNNING;
+                                       netif_carrier_on(phydev->attached_dev);
+                               } else {
+                                       phydev->state = PHY_NOLINK;
+                                       netif_carrier_off(phydev->attached_dev);
+                               }
+
+                               phydev->adjust_link(phydev->attached_dev);
+
+                       } else if (0 == phydev->link_timeout--) {
+                               /* The counter expired, so either we
+                                * switch to forced mode, or the
+                                * magic_aneg bit exists, and we try aneg
+                                * again */
+                               if (!(phydev->drv->flags & PHY_HAS_MAGICANEG)) {
+                                       int idx;
+
+                                       /* We'll start from the
+                                        * fastest speed, and work
+                                        * our way down */
+                                       idx = phy_find_valid(0,
+                                                       phydev->supported);
+
+                                       phydev->speed = settings[idx].speed;
+                                       phydev->duplex = settings[idx].duplex;
+                                       
+                                       phydev->autoneg = AUTONEG_DISABLE;
+                                       phydev->state = PHY_FORCING;
+                                       phydev->link_timeout =
+                                               PHY_FORCE_TIMEOUT;
+
+                                       pr_info("Trying %d/%s\n",
+                                                       phydev->speed,
+                                                       DUPLEX_FULL ==
+                                                       phydev->duplex ?
+                                                       "FULL" : "HALF");
+                               }
+
+                               needs_aneg = 1;
+                       }
+                       break;
+               case PHY_NOLINK:
+                       err = phy_read_status(phydev);
+
+                       if (err)
+                               break;
+
+                       if (phydev->link) {
+                               phydev->state = PHY_RUNNING;
+                               netif_carrier_on(phydev->attached_dev);
+                               phydev->adjust_link(phydev->attached_dev);
+                       }
+                       break;
+               case PHY_FORCING:
+                       err = phy_read_status(phydev);
+
+                       if (err)
+                               break;
+
+                       if (phydev->link) {
+                               phydev->state = PHY_RUNNING;
+                               netif_carrier_on(phydev->attached_dev);
+                       } else {
+                               if (0 == phydev->link_timeout--) {
+                                       phy_force_reduction(phydev);
+                                       needs_aneg = 1;
+                               }
+                       }
+
+                       phydev->adjust_link(phydev->attached_dev);
+                       break;
+               case PHY_RUNNING:
+                       /* Only register a CHANGE if we are
+                        * polling */
+                       if (PHY_POLL == phydev->irq)
+                               phydev->state = PHY_CHANGELINK;
+                       break;
+               case PHY_CHANGELINK:
+                       err = phy_read_status(phydev);
+
+                       if (err)
+                               break;
+
+                       if (phydev->link) {
+                               phydev->state = PHY_RUNNING;
+                               netif_carrier_on(phydev->attached_dev);
+                       } else {
+                               phydev->state = PHY_NOLINK;
+                               netif_carrier_off(phydev->attached_dev);
+                       }
+
+                       phydev->adjust_link(phydev->attached_dev);
+
+                       if (PHY_POLL != phydev->irq)
+                               err = phy_config_interrupt(phydev,
+                                               PHY_INTERRUPT_ENABLED);
+                       break;
+               case PHY_HALTED:
+                       if (phydev->link) {
+                               phydev->link = 0;
+                               netif_carrier_off(phydev->attached_dev);
+                               phydev->adjust_link(phydev->attached_dev);
+                       }
+                       break;
+               case PHY_RESUMING:
+
+                       err = phy_clear_interrupt(phydev);
+
+                       if (err)
+                               break;
+
+                       err = phy_config_interrupt(phydev,
+                                       PHY_INTERRUPT_ENABLED);
+
+                       if (err)
+                               break;
+
+                       if (AUTONEG_ENABLE == phydev->autoneg) {
+                               err = phy_aneg_done(phydev);
+                               if (err < 0)
+                                       break;
+
+                               /* err > 0 if AN is done.
+                                * Otherwise, it's 0, and we're
+                                * still waiting for AN */
+                               if (err > 0) {
+                                       phydev->state = PHY_RUNNING;
+                               } else {
+                                       phydev->state = PHY_AN;
+                                       phydev->link_timeout = PHY_AN_TIMEOUT;
+                               }
+                       } else
+                               phydev->state = PHY_RUNNING;
+                       break;
+       }
+
+       spin_unlock(&phydev->lock);
+
+       if (needs_aneg)
+               err = phy_start_aneg(phydev);
+
+       if (err < 0)
+               phy_error(phydev);
+
+       mod_timer(&phydev->phy_timer, jiffies + PHY_STATE_TIME * HZ);
+}
+
+#endif /* CONFIG_PHYCONTROL */
diff --git a/drivers/net/phy/phy_device.c b/drivers/net/phy/phy_device.c

new file mode 100644 (file)

index 0000000..33f7bdb
--- /dev/null
+++ b/drivers/net/phy/phy_device.c
@@ -0,0 +1,696 @@
+/*
+ * drivers/net/phy/phy_device.c
+ *
+ * Framework for finding and configuring PHYs.
+ * Also contains generic PHY driver
+ *
+ * Author: Andy Fleming
+ *
+ * Copyright (c) 2004 Freescale Semiconductor, Inc.
+ *
+ * This program is free software; you can redistribute  it and/or modify it
+ * under  the terms of  the GNU General  Public License as published by the
+ * Free Software Foundation;  either version 2 of the  License, or (at your
+ * option) any later version.
+ *
+ */
+#include <linux/config.h>
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/string.h>
+#include <linux/errno.h>
+#include <linux/unistd.h>
+#include <linux/slab.h>
+#include <linux/interrupt.h>
+#include <linux/init.h>
+#include <linux/delay.h>
+#include <linux/netdevice.h>
+#include <linux/etherdevice.h>
+#include <linux/skbuff.h>
+#include <linux/spinlock.h>
+#include <linux/mm.h>
+#include <linux/module.h>
+#include <linux/version.h>
+#include <linux/mii.h>
+#include <linux/ethtool.h>
+#include <linux/phy.h>
+
+#include <asm/io.h>
+#include <asm/irq.h>
+#include <asm/uaccess.h>
+
+static struct phy_driver genphy_driver;
+extern int mdio_bus_init(void);
+extern void mdio_bus_exit(void);
+
+/* get_phy_device
+ *
+ * description: Reads the ID registers of the PHY at addr on the
+ *   bus, then allocates and returns the phy_device to
+ *   represent it.
+ */
+struct phy_device * get_phy_device(struct mii_bus *bus, int addr)
+{
+       int phy_reg;
+       u32 phy_id;
+       struct phy_device *dev = NULL;
+
+       /* Grab the bits from PHYIR1, and put them
+        * in the upper half */
+       phy_reg = bus->read(bus, addr, MII_PHYSID1);
+
+       if (phy_reg < 0)
+               return ERR_PTR(phy_reg);
+
+       phy_id = (phy_reg & 0xffff) << 16;
+
+       /* Grab the bits from PHYIR2, and put them in the lower half */
+       phy_reg = bus->read(bus, addr, MII_PHYSID2);
+
+       if (phy_reg < 0)
+               return ERR_PTR(phy_reg);
+
+       phy_id |= (phy_reg & 0xffff);
+
+       /* If the phy_id is all Fs, there is no device there */
+       if (0xffffffff == phy_id)
+               return NULL;
+
+       /* Otherwise, we allocate the device, and initialize the
+        * default values */
+       dev = kcalloc(1, sizeof(*dev), GFP_KERNEL);
+
+       if (NULL == dev)
+               return ERR_PTR(-ENOMEM);
+
+       dev->speed = 0;
+       dev->duplex = -1;
+       dev->pause = dev->asym_pause = 0;
+       dev->link = 1;
+
+       dev->autoneg = AUTONEG_ENABLE;
+
+       dev->addr = addr;
+       dev->phy_id = phy_id;
+       dev->bus = bus;
+
+       dev->state = PHY_DOWN;
+
+       spin_lock_init(&dev->lock);
+
+       return dev;
+}
+
+#ifdef CONFIG_PHYCONTROL
+/* phy_prepare_link:
+ *
+ * description: Tells the PHY infrastructure to handle the
+ *   gory details on monitoring link status (whether through
+ *   polling or an interrupt), and to call back to the
+ *   connected device driver when the link status changes.
+ *   If you want to monitor your own link state, don't call
+ *   this function */
+void phy_prepare_link(struct phy_device *phydev,
+               void (*handler)(struct net_device *))
+{
+       phydev->adjust_link = handler;
+}
+
+/* phy_connect:
+ *
+ * description: Convenience function for connecting ethernet
+ *   devices to PHY devices.  The default behavior is for
+ *   the PHY infrastructure to handle everything, and only notify
+ *   the connected driver when the link status changes.  If you
+ *   don't want, or can't use the provided functionality, you may
+ *   choose to call only the subset of functions which provide
+ *   the desired functionality.
+ */
+struct phy_device * phy_connect(struct net_device *dev, const char *phy_id,
+               void (*handler)(struct net_device *), u32 flags)
+{
+       struct phy_device *phydev;
+
+       phydev = phy_attach(dev, phy_id, flags);
+
+       if (IS_ERR(phydev))
+               return phydev;
+
+       phy_prepare_link(phydev, handler);
+
+       phy_start_machine(phydev, NULL);
+
+       if (phydev->irq > 0)
+               phy_start_interrupts(phydev);
+
+       return phydev;
+}
+EXPORT_SYMBOL(phy_connect);
+
+void phy_disconnect(struct phy_device *phydev)
+{
+       if (phydev->irq > 0)
+               phy_stop_interrupts(phydev);
+
+       phy_stop_machine(phydev);
+       
+       phydev->adjust_link = NULL;
+
+       phy_detach(phydev);
+}
+EXPORT_SYMBOL(phy_disconnect);
+
+#endif /* CONFIG_PHYCONTROL */
+
+/* phy_attach:
+ *
+ *   description: Called by drivers to attach to a particular PHY
+ *     device. The phy_device is found, and properly hooked up
+ *     to the phy_driver.  If no driver is attached, then the
+ *     genphy_driver is used.  The phy_device is given a ptr to
+ *     the attaching device, and given a callback for link status
+ *     change.  The phy_device is returned to the attaching
+ *     driver.
+ */
+static int phy_compare_id(struct device *dev, void *data)
+{
+       return strcmp((char *)data, dev->bus_id) ? 0 : 1;
+}
+
+struct phy_device *phy_attach(struct net_device *dev,
+               const char *phy_id, u32 flags)
+{
+       struct bus_type *bus = &mdio_bus_type;
+       struct phy_device *phydev;
+       struct device *d;
+
+       /* Search the list of PHY devices on the mdio bus for the
+        * PHY with the requested name */
+       d = bus_find_device(bus, NULL, (void *)phy_id, phy_compare_id);
+
+       if (d) {
+               phydev = to_phy_device(d);
+       } else {
+               printk(KERN_ERR "%s not found\n", phy_id);
+               return ERR_PTR(-ENODEV);
+       }
+
+       /* Assume that if there is no driver, that it doesn't
+        * exist, and we should use the genphy driver. */
+       if (NULL == d->driver) {
+               int err;
+               down_write(&d->bus->subsys.rwsem);
+               d->driver = &genphy_driver.driver;
+
+               err = d->driver->probe(d);
+
+               if (err < 0)
+                       return ERR_PTR(err);
+
+               device_bind_driver(d);
+               up_write(&d->bus->subsys.rwsem);
+       }
+
+       if (phydev->attached_dev) {
+               printk(KERN_ERR "%s: %s already attached\n",
+                               dev->name, phy_id);
+               return ERR_PTR(-EBUSY);
+       }
+
+       phydev->attached_dev = dev;
+
+       phydev->dev_flags = flags;
+
+       return phydev;
+}
+EXPORT_SYMBOL(phy_attach);
+
+void phy_detach(struct phy_device *phydev)
+{
+       phydev->attached_dev = NULL;
+
+       /* If the device had no specific driver before (i.e. - it
+        * was using the generic driver), we unbind the device
+        * from the generic driver so that there's a chance a
+        * real driver could be loaded */
+       if (phydev->dev.driver == &genphy_driver.driver) {
+               down_write(&phydev->dev.bus->subsys.rwsem);
+               device_release_driver(&phydev->dev);
+               up_write(&phydev->dev.bus->subsys.rwsem);
+       }
+}
+EXPORT_SYMBOL(phy_detach);
+
+
+/* Generic PHY support and helper functions */
+
+/* genphy_config_advert
+ *
+ * description: Writes MII_ADVERTISE with the appropriate values,
+ *   after sanitizing the values to make sure we only advertise
+ *   what is supported
+ */
+int genphy_config_advert(struct phy_device *phydev)
+{
+       u32 advertise;
+       int adv;
+       int err;
+
+       /* Only allow advertising what
+        * this PHY supports */
+       phydev->advertising &= phydev->supported;
+       advertise = phydev->advertising;
+
+       /* Setup standard advertisement */
+       adv = phy_read(phydev, MII_ADVERTISE);
+
+       if (adv < 0)
+               return adv;
+
+       adv &= ~(ADVERTISE_ALL | ADVERTISE_100BASE4 | ADVERTISE_PAUSE_CAP | 
+                ADVERTISE_PAUSE_ASYM);
+       if (advertise & ADVERTISED_10baseT_Half)
+               adv |= ADVERTISE_10HALF;
+       if (advertise & ADVERTISED_10baseT_Full)
+               adv |= ADVERTISE_10FULL;
+       if (advertise & ADVERTISED_100baseT_Half)
+               adv |= ADVERTISE_100HALF;
+       if (advertise & ADVERTISED_100baseT_Full)
+               adv |= ADVERTISE_100FULL;
+       if (advertise & ADVERTISED_Pause)
+               adv |= ADVERTISE_PAUSE_CAP;
+       if (advertise & ADVERTISED_Asym_Pause)
+               adv |= ADVERTISE_PAUSE_ASYM;
+
+       err = phy_write(phydev, MII_ADVERTISE, adv);
+
+       if (err < 0)
+               return err;
+
+       /* Configure gigabit if it's supported */
+       if (phydev->supported & (SUPPORTED_1000baseT_Half |
+                               SUPPORTED_1000baseT_Full)) {
+               adv = phy_read(phydev, MII_CTRL1000);
+
+               if (adv < 0)
+                       return adv;
+
+               adv &= ~(ADVERTISE_1000FULL | ADVERTISE_1000HALF);
+               if (advertise & SUPPORTED_1000baseT_Half)
+                       adv |= ADVERTISE_1000HALF;
+               if (advertise & SUPPORTED_1000baseT_Full)
+                       adv |= ADVERTISE_1000FULL;
+               err = phy_write(phydev, MII_CTRL1000, adv);
+
+               if (err < 0)
+                       return err;
+       }
+
+       return adv;
+}
+EXPORT_SYMBOL(genphy_config_advert);
+
+/* genphy_setup_forced
+ *
+ * description: Configures MII_BMCR to force speed/duplex
+ *   to the values in phydev. Assumes that the values are valid.
+ *   Please see phy_sanitize_settings() */
+int genphy_setup_forced(struct phy_device *phydev)
+{
+       int ctl = BMCR_RESET;
+
+       phydev->pause = phydev->asym_pause = 0;
+
+       if (SPEED_1000 == phydev->speed)
+               ctl |= BMCR_SPEED1000;
+       else if (SPEED_100 == phydev->speed)
+               ctl |= BMCR_SPEED100;
+
+       if (DUPLEX_FULL == phydev->duplex)
+               ctl |= BMCR_FULLDPLX;
+       
+       ctl = phy_write(phydev, MII_BMCR, ctl);
+
+       if (ctl < 0)
+               return ctl;
+
+       /* We just reset the device, so we'd better configure any
+        * settings the PHY requires to operate */
+       if (phydev->drv->config_init)
+               ctl = phydev->drv->config_init(phydev);
+
+       return ctl;
+}
+
+
+/* Enable and Restart Autonegotiation */
+int genphy_restart_aneg(struct phy_device *phydev)
+{
+       int ctl;
+
+       ctl = phy_read(phydev, MII_BMCR);
+
+       if (ctl < 0)
+               return ctl;
+
+       ctl |= (BMCR_ANENABLE | BMCR_ANRESTART);
+
+       /* Don't isolate the PHY if we're negotiating */
+       ctl &= ~(BMCR_ISOLATE);
+
+       ctl = phy_write(phydev, MII_BMCR, ctl);
+
+       return ctl;
+}
+
+
+/* genphy_config_aneg
+ *
+ * description: If auto-negotiation is enabled, we configure the
+ *   advertising, and then restart auto-negotiation.  If it is not
+ *   enabled, then we write the BMCR
+ */
+int genphy_config_aneg(struct phy_device *phydev)
+{
+       int err = 0;
+
+       if (AUTONEG_ENABLE == phydev->autoneg) {
+               err = genphy_config_advert(phydev);
+
+               if (err < 0)
+                       return err;
+
+               err = genphy_restart_aneg(phydev);
+       } else
+               err = genphy_setup_forced(phydev);
+
+       return err;
+}
+EXPORT_SYMBOL(genphy_config_aneg);
+
+/* genphy_update_link
+ *
+ * description: Update the value in phydev->link to reflect the
+ *   current link value.  In order to do this, we need to read
+ *   the status register twice, keeping the second value
+ */
+int genphy_update_link(struct phy_device *phydev)
+{
+       int status;
+
+       /* Do a fake read */
+       status = phy_read(phydev, MII_BMSR);
+
+       if (status < 0)
+               return status;
+
+       /* Read link and autonegotiation status */
+       status = phy_read(phydev, MII_BMSR);
+
+       if (status < 0)
+               return status;
+
+       if ((status & BMSR_LSTATUS) == 0)
+               phydev->link = 0;
+       else
+               phydev->link = 1;
+
+       return 0;
+}
+
+/* genphy_read_status
+ *
+ * description: Check the link, then figure out the current state
+ *   by comparing what we advertise with what the link partner
+ *   advertises.  Start by checking the gigabit possibilities,
+ *   then move on to 10/100.
+ */
+int genphy_read_status(struct phy_device *phydev)
+{
+       int adv;
+       int err;
+       int lpa;
+       int lpagb = 0;
+
+       /* Update the link, but return if there
+        * was an error */
+       err = genphy_update_link(phydev);
+       if (err)
+               return err;
+
+       if (AUTONEG_ENABLE == phydev->autoneg) {
+               if (phydev->supported & (SUPPORTED_1000baseT_Half
+                                       | SUPPORTED_1000baseT_Full)) {
+                       lpagb = phy_read(phydev, MII_STAT1000);
+
+                       if (lpagb < 0)
+                               return lpagb;
+
+                       adv = phy_read(phydev, MII_CTRL1000);
+
+                       if (adv < 0)
+                               return adv;
+
+                       lpagb &= adv << 2;
+               }
+
+               lpa = phy_read(phydev, MII_LPA);
+
+               if (lpa < 0)
+                       return lpa;
+
+               adv = phy_read(phydev, MII_ADVERTISE);
+
+               if (adv < 0)
+                       return adv;
+
+               lpa &= adv;
+
+               phydev->speed = SPEED_10;
+               phydev->duplex = DUPLEX_HALF;
+               phydev->pause = phydev->asym_pause = 0;
+
+               if (lpagb & (LPA_1000FULL | LPA_1000HALF)) {
+                       phydev->speed = SPEED_1000;
+
+                       if (lpagb & LPA_1000FULL)
+                               phydev->duplex = DUPLEX_FULL;
+               } else if (lpa & (LPA_100FULL | LPA_100HALF)) {
+                       phydev->speed = SPEED_100;
+                       
+                       if (lpa & LPA_100FULL)
+                               phydev->duplex = DUPLEX_FULL;
+               } else
+                       if (lpa & LPA_10FULL)
+                               phydev->duplex = DUPLEX_FULL;
+
+               if (phydev->duplex == DUPLEX_FULL){
+                       phydev->pause = lpa & LPA_PAUSE_CAP ? 1 : 0;
+                       phydev->asym_pause = lpa & LPA_PAUSE_ASYM ? 1 : 0;
+               }
+       } else {
+               int bmcr = phy_read(phydev, MII_BMCR);
+               if (bmcr < 0)
+                       return bmcr;
+
+               if (bmcr & BMCR_FULLDPLX)
+                       phydev->duplex = DUPLEX_FULL;
+               else
+                       phydev->duplex = DUPLEX_HALF;
+
+               if (bmcr & BMCR_SPEED1000)
+                       phydev->speed = SPEED_1000;
+               else if (bmcr & BMCR_SPEED100)
+                       phydev->speed = SPEED_100;
+               else
+                       phydev->speed = SPEED_10;
+
+               phydev->pause = phydev->asym_pause = 0;
+       }
+
+       return 0;
+}
+EXPORT_SYMBOL(genphy_read_status);
+
+static int genphy_config_init(struct phy_device *phydev)
+{
+       u32 val;
+       u32 features;
+
+       /* For now, I'll claim that the generic driver supports
+        * all possible port types */
+       features = (SUPPORTED_TP | SUPPORTED_MII
+                       | SUPPORTED_AUI | SUPPORTED_FIBRE |
+                       SUPPORTED_BNC);
+
+       /* Do we support autonegotiation? */
+       val = phy_read(phydev, MII_BMSR);
+
+       if (val < 0)
+               return val;
+
+       if (val & BMSR_ANEGCAPABLE)
+               features |= SUPPORTED_Autoneg;
+
+       if (val & BMSR_100FULL)
+               features |= SUPPORTED_100baseT_Full;
+       if (val & BMSR_100HALF)
+               features |= SUPPORTED_100baseT_Half;
+       if (val & BMSR_10FULL)
+               features |= SUPPORTED_10baseT_Full;
+       if (val & BMSR_10HALF)
+               features |= SUPPORTED_10baseT_Half;
+
+       if (val & BMSR_ESTATEN) {
+               val = phy_read(phydev, MII_ESTATUS);
+
+               if (val < 0)
+                       return val;
+
+               if (val & ESTATUS_1000_TFULL)
+                       features |= SUPPORTED_1000baseT_Full;
+               if (val & ESTATUS_1000_THALF)
+                       features |= SUPPORTED_1000baseT_Half;
+       }
+
+       phydev->supported = features;
+       phydev->advertising = features;
+
+       return 0;
+}
+
+
+/* phy_probe
+ *
+ * description: Take care of setting up the phy_device structure,
+ *   set the state to READY (the driver's init function should
+ *   set it to STARTING if needed).
+ */
+static int phy_probe(struct device *dev)
+{
+       struct phy_device *phydev;
+       struct phy_driver *phydrv;
+       struct device_driver *drv;
+       int err = 0;
+
+       phydev = to_phy_device(dev);
+
+       /* Make sure the driver is held.
+        * XXX -- Is this correct? */
+       drv = get_driver(phydev->dev.driver);
+       phydrv = to_phy_driver(drv);
+       phydev->drv = phydrv;
+
+       /* Disable the interrupt if the PHY doesn't support it */
+       if (!(phydrv->flags & PHY_HAS_INTERRUPT))
+               phydev->irq = PHY_POLL;
+
+       spin_lock(&phydev->lock);
+
+       /* Start out supporting everything. Eventually,
+        * a controller will attach, and may modify one
+        * or both of these values */
+       phydev->supported = phydrv->features;
+       phydev->advertising = phydrv->features;
+
+       /* Set the state to READY by default */
+       phydev->state = PHY_READY;
+
+       if (phydev->drv->probe)
+               err = phydev->drv->probe(phydev);
+
+       spin_unlock(&phydev->lock);
+
+       if (err < 0)
+               return err;
+
+       if (phydev->drv->config_init)
+               err = phydev->drv->config_init(phydev);
+
+       return err;
+}
+
+static int phy_remove(struct device *dev)
+{
+       struct phy_device *phydev;
+
+       phydev = to_phy_device(dev);
+
+       spin_lock(&phydev->lock);
+       phydev->state = PHY_DOWN;
+       spin_unlock(&phydev->lock);
+
+       if (phydev->drv->remove)
+               phydev->drv->remove(phydev);
+
+       put_driver(dev->driver);
+       phydev->drv = NULL;
+
+       return 0;
+}
+
+int phy_driver_register(struct phy_driver *new_driver)
+{
+       int retval;
+
+       memset(&new_driver->driver, 0, sizeof(new_driver->driver));
+       new_driver->driver.name = new_driver->name;
+       new_driver->driver.bus = &mdio_bus_type;
+       new_driver->driver.probe = phy_probe;
+       new_driver->driver.remove = phy_remove;
+
+       retval = driver_register(&new_driver->driver);
+
+       if (retval) {
+               printk(KERN_ERR "%s: Error %d in registering driver\n",
+                               new_driver->name, retval);
+
+               return retval;
+       }
+
+       pr_info("%s: Registered new driver\n", new_driver->name);
+
+       return 0;
+}
+EXPORT_SYMBOL(phy_driver_register);
+
+void phy_driver_unregister(struct phy_driver *drv)
+{
+       driver_unregister(&drv->driver);
+}
+EXPORT_SYMBOL(phy_driver_unregister);
+
+static struct phy_driver genphy_driver = {
+       .phy_id         = 0xffffffff,
+       .phy_id_mask    = 0xffffffff,
+       .name           = "Generic PHY",
+       .config_init    = genphy_config_init,
+       .features       = 0,
+       .config_aneg    = genphy_config_aneg,
+       .read_status    = genphy_read_status,
+       .driver         = {.owner= THIS_MODULE, },
+};
+
+static int __init phy_init(void)
+{
+       int rc;
+
+       rc = mdio_bus_init();
+       if (rc)
+               return rc;
+
+       rc = phy_driver_register(&genphy_driver);
+       if (rc)
+               mdio_bus_exit();
+
+       return rc;
+}
+
+static void __exit phy_exit(void)
+{
+       phy_driver_unregister(&genphy_driver);
+       mdio_bus_exit();
+}
+
+subsys_initcall(phy_init);
+module_exit(phy_exit);
diff --git a/drivers/net/phy/qsemi.c b/drivers/net/phy/qsemi.c

new file mode 100644 (file)

index 0000000..d461ba4
--- /dev/null
+++ b/drivers/net/phy/qsemi.c
@@ -0,0 +1,143 @@
+/*
+ * drivers/net/phy/qsemi.c
+ *
+ * Driver for Quality Semiconductor PHYs
+ *
+ * Author: Andy Fleming
+ *
+ * Copyright (c) 2004 Freescale Semiconductor, Inc.
+ *
+ * This program is free software; you can redistribute  it and/or modify it
+ * under  the terms of  the GNU General  Public License as published by the
+ * Free Software Foundation;  either version 2 of the  License, or (at your
+ * option) any later version.
+ *
+ */
+#include <linux/config.h>
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/string.h>
+#include <linux/errno.h>
+#include <linux/unistd.h>
+#include <linux/slab.h>
+#include <linux/interrupt.h>
+#include <linux/init.h>
+#include <linux/delay.h>
+#include <linux/netdevice.h>
+#include <linux/etherdevice.h>
+#include <linux/skbuff.h>
+#include <linux/spinlock.h>
+#include <linux/mm.h>
+#include <linux/module.h>
+#include <linux/version.h>
+#include <linux/mii.h>
+#include <linux/ethtool.h>
+#include <linux/phy.h>
+
+#include <asm/io.h>
+#include <asm/irq.h>
+#include <asm/uaccess.h>
+
+/* ------------------------------------------------------------------------- */
+/* The Quality Semiconductor QS6612 is used on the RPX CLLF                  */
+
+/* register definitions */
+
+#define MII_QS6612_MCR         17  /* Mode Control Register      */
+#define MII_QS6612_FTR         27  /* Factory Test Register      */
+#define MII_QS6612_MCO         28  /* Misc. Control Register     */
+#define MII_QS6612_ISR         29  /* Interrupt Source Register  */
+#define MII_QS6612_IMR         30  /* Interrupt Mask Register    */
+#define MII_QS6612_IMR_INIT    0x003a
+#define MII_QS6612_PCR         31  /* 100BaseTx PHY Control Reg. */
+
+#define QS6612_PCR_AN_COMPLETE 0x1000
+#define QS6612_PCR_RLBEN       0x0200
+#define QS6612_PCR_DCREN       0x0100
+#define QS6612_PCR_4B5BEN      0x0040
+#define QS6612_PCR_TX_ISOLATE  0x0020
+#define QS6612_PCR_MLT3_DIS    0x0002
+#define QS6612_PCR_SCRM_DESCRM 0x0001
+
+MODULE_DESCRIPTION("Quality Semiconductor PHY driver");
+MODULE_AUTHOR("Andy Fleming");
+MODULE_LICENSE("GPL");
+
+/* Returns 0, unless there's a write error */
+static int qs6612_config_init(struct phy_device *phydev)
+{
+       /* The PHY powers up isolated on the RPX,
+        * so send a command to allow operation.
+        * XXX - My docs indicate this should be 0x0940
+        * ...or something.  The current value sets three
+        * reserved bits, bit 11, which specifies it should be
+        * set to one, bit 10, which specifies it should be set
+        * to 0, and bit 7, which doesn't specify.  However, my
+        * docs are preliminary, and I will leave it like this
+        * until someone more knowledgable corrects me or it.
+        * -- Andy Fleming
+        */
+       return phy_write(phydev, MII_QS6612_PCR, 0x0dc0);
+}
+
+static int qs6612_ack_interrupt(struct phy_device *phydev)
+{
+       int err;
+
+       err = phy_read(phydev, MII_QS6612_ISR);
+
+       if (err < 0)
+               return err;
+
+       err = phy_read(phydev, MII_BMSR);
+
+       if (err < 0)
+               return err;
+
+       err = phy_read(phydev, MII_EXPANSION);
+
+       if (err < 0)
+               return err;
+
+       return 0;
+}
+
+static int qs6612_config_intr(struct phy_device *phydev)
+{
+       int err;
+       if (phydev->interrupts == PHY_INTERRUPT_ENABLED)
+               err = phy_write(phydev, MII_QS6612_IMR,
+                               MII_QS6612_IMR_INIT);
+       else
+               err = phy_write(phydev, MII_QS6612_IMR, 0);
+
+       return err;
+
+}
+
+static struct phy_driver qs6612_driver = {
+       .phy_id         = 0x00181440,
+       .name           = "QS6612",
+       .phy_id_mask    = 0xfffffff0,
+       .features       = PHY_BASIC_FEATURES,
+       .flags          = PHY_HAS_INTERRUPT,
+       .config_init    = qs6612_config_init,
+       .config_aneg    = genphy_config_aneg,
+       .read_status    = genphy_read_status,
+       .ack_interrupt  = qs6612_ack_interrupt,
+       .config_intr    = qs6612_config_intr,
+       .driver         = { .owner = THIS_MODULE,},
+};
+
+static int __init qs6612_init(void)
+{
+       return phy_driver_register(&qs6612_driver);
+}
+
+static void __exit qs6612_exit(void)
+{
+       phy_driver_unregister(&qs6612_driver);
+}
+
+module_init(qs6612_init);
+module_exit(qs6612_exit);
diff --git a/drivers/net/ppp_generic.c b/drivers/net/ppp_generic.c

index a32668e88e09fc2a157e3ac5500c94ae158c9529..bb71638a7c4484a6dfe5b6ba19ad7512ea4e911d 100644 (file)
--- a/drivers/net/ppp_generic.c
+++ b/drivers/net/ppp_generic.c
@@ -1657,7 +1657,6 @@ ppp_receive_nonmp_frame(struct ppp *ppp, struct sk_buff *skb)
                         skb->dev = ppp->dev;
                         skb->protocol = htons(npindex_to_ethertype[npi]);
                         skb->mac.raw = skb->data;
-                       skb->input_dev = ppp->dev;
                         netif_rx(skb);
                         ppp->dev->last_rx = jiffies;
                 }
diff --git a/drivers/net/pppoe.c b/drivers/net/pppoe.c

index ce1a9bf7b9a76ee1d2d0172c4092de0473a653c7..82f236cc3b9b31e4aa9df662a639456d519ea103 100644 (file)
--- a/drivers/net/pppoe.c
+++ b/drivers/net/pppoe.c
@@ -377,7 +377,8 @@ abort_kfree:
   ***********************************************************************/
  static int pppoe_rcv(struct sk_buff *skb,
                      struct net_device *dev,
-                    struct packet_type *pt)
+                    struct packet_type *pt,
+                    struct net_device *orig_dev)
  
  {
         struct pppoe_hdr *ph;
@@ -426,7 +427,8 @@ out:
   ***********************************************************************/
  static int pppoe_disc_rcv(struct sk_buff *skb,
                           struct net_device *dev,
-                         struct packet_type *pt)
+                         struct packet_type *pt,
+                         struct net_device *orig_dev)
  
  {
         struct pppoe_hdr *ph;
diff --git a/drivers/net/r8169.c b/drivers/net/r8169.c

index d5afe05cd8267de3b9e3e308f28bf711d5efc093..f0471d102e3c61bcfc6089c483009e52b2427cfb 100644 (file)
--- a/drivers/net/r8169.c
+++ b/drivers/net/r8169.c
@@ -187,6 +187,7 @@ static struct pci_device_id rtl8169_pci_tbl[] = {
         { PCI_DEVICE(PCI_VENDOR_ID_REALTEK,     0x8169), },
         { PCI_DEVICE(PCI_VENDOR_ID_DLINK,       0x4300), },
         { PCI_DEVICE(0x16ec,                    0x0116), },
+       { PCI_VENDOR_ID_LINKSYS,                0x1032, PCI_ANY_ID, 0x0024, },
         {0,},
  };
  
diff --git a/drivers/net/rrunner.c b/drivers/net/rrunner.c

index 12a86f96d973d42af9c52a0916a90cfbc86d84d3..ec1a18d189a129a816c2c194bb56e489b20245d6 100644 (file)
--- a/drivers/net/rrunner.c
+++ b/drivers/net/rrunner.c
@@ -1429,6 +1429,7 @@ static int rr_start_xmit(struct sk_buff *skb, struct net_device *dev)
  {
         struct rr_private *rrpriv = netdev_priv(dev);
         struct rr_regs __iomem *regs = rrpriv->regs;
+       struct hippi_cb *hcb = (struct hippi_cb *) skb->cb;
         struct ring_ctrl *txctrl;
         unsigned long flags;
         u32 index, len = skb->len;
@@ -1460,7 +1461,7 @@ static int rr_start_xmit(struct sk_buff *skb, struct net_device *dev)
         ifield = (u32 *)skb_push(skb, 8);
  
         ifield[0] = 0;
-       ifield[1] = skb->private.ifield;
+       ifield[1] = hcb->ifield;
  
         /*
          * We don't need the lock before we are actually going to start
diff --git a/drivers/net/s2io-regs.h b/drivers/net/s2io-regs.h

index 7092ca6b277e31cace6f06cbfa4396a425576055..2234a8f05eb262a9f69bea7c2f47b4a7b053a321 100644 (file)
--- a/drivers/net/s2io-regs.h
+++ b/drivers/net/s2io-regs.h
@@ -62,6 +62,7 @@ typedef struct _XENA_dev_config {
  #define ADAPTER_STATUS_RMAC_REMOTE_FAULT   BIT(6)
  #define ADAPTER_STATUS_RMAC_LOCAL_FAULT    BIT(7)
  #define ADAPTER_STATUS_RMAC_PCC_IDLE       vBIT(0xFF,8,8)
+#define ADAPTER_STATUS_RMAC_PCC_FOUR_IDLE  vBIT(0x0F,8,8)
  #define ADAPTER_STATUS_RC_PRC_QUIESCENT    vBIT(0xFF,16,8)
  #define ADAPTER_STATUS_MC_DRAM_READY       BIT(24)
  #define ADAPTER_STATUS_MC_QUEUES_READY     BIT(25)
@@ -77,21 +78,34 @@ typedef struct _XENA_dev_config {
  #define ADAPTER_ECC_EN                     BIT(55)
  
         u64 serr_source;
-#define SERR_SOURCE_PIC                                        BIT(0)
-#define SERR_SOURCE_TXDMA                              BIT(1)
-#define SERR_SOURCE_RXDMA                              BIT(2)
+#define SERR_SOURCE_PIC                        BIT(0)
+#define SERR_SOURCE_TXDMA              BIT(1)
+#define SERR_SOURCE_RXDMA              BIT(2)
  #define SERR_SOURCE_MAC                 BIT(3)
  #define SERR_SOURCE_MC                  BIT(4)
  #define SERR_SOURCE_XGXS                BIT(5)
-#define        SERR_SOURCE_ANY                                 (SERR_SOURCE_PIC                | \
-                                                                               SERR_SOURCE_TXDMA       | \
-                                                                               SERR_SOURCE_RXDMA       | \
-                                                                               SERR_SOURCE_MAC         | \
-                                                                               SERR_SOURCE_MC      | \
-                                                                               SERR_SOURCE_XGXS)
-
-
-       u8 unused_0[0x800 - 0x120];
+#define        SERR_SOURCE_ANY                 (SERR_SOURCE_PIC        | \
+                                       SERR_SOURCE_TXDMA       | \
+                                       SERR_SOURCE_RXDMA       | \
+                                       SERR_SOURCE_MAC         | \
+                                       SERR_SOURCE_MC          | \
+                                       SERR_SOURCE_XGXS)
+
+       u64 pci_mode;
+#define        GET_PCI_MODE(val)               ((val & vBIT(0xF, 0, 4)) >> 60)
+#define        PCI_MODE_PCI_33                 0
+#define        PCI_MODE_PCI_66                 0x1
+#define        PCI_MODE_PCIX_M1_66             0x2
+#define        PCI_MODE_PCIX_M1_100            0x3
+#define        PCI_MODE_PCIX_M1_133            0x4
+#define        PCI_MODE_PCIX_M2_66             0x5
+#define        PCI_MODE_PCIX_M2_100            0x6
+#define        PCI_MODE_PCIX_M2_133            0x7
+#define        PCI_MODE_UNSUPPORTED            BIT(0)
+#define        PCI_MODE_32_BITS                BIT(8)
+#define        PCI_MODE_UNKNOWN_MODE           BIT(9)
+
+       u8 unused_0[0x800 - 0x128];
  
  /* PCI-X Controller registers */
         u64 pic_int_status;
@@ -153,7 +167,11 @@ typedef struct _XENA_dev_config {
         u8 unused4[0x08];
  
         u64 gpio_int_reg;
+#define GPIO_INT_REG_LINK_DOWN                 BIT(1)
+#define GPIO_INT_REG_LINK_UP                   BIT(2)
         u64 gpio_int_mask;
+#define GPIO_INT_MASK_LINK_DOWN                BIT(1)
+#define GPIO_INT_MASK_LINK_UP                  BIT(2)
         u64 gpio_alarms;
  
         u8 unused5[0x38];
@@ -223,19 +241,16 @@ typedef struct _XENA_dev_config {
         u64 xmsi_data;
  
         u64 rx_mat;
+#define RX_MAT_SET(ring, msi)                  vBIT(msi, (8 * ring), 8)
  
         u8 unused6[0x8];
  
-       u64 tx_mat0_7;
-       u64 tx_mat8_15;
-       u64 tx_mat16_23;
-       u64 tx_mat24_31;
-       u64 tx_mat32_39;
-       u64 tx_mat40_47;
-       u64 tx_mat48_55;
-       u64 tx_mat56_63;
+       u64 tx_mat0_n[0x8];
+#define TX_MAT_SET(fifo, msi)                  vBIT(msi, (8 * fifo), 8)
  
-       u8 unused_1[0x10];
+       u8 unused_1[0x8];
+       u64 stat_byte_cnt;
+#define STAT_BC(n)                              vBIT(n,4,12)
  
         /* Automated statistics collection */
         u64 stat_cfg;
@@ -246,6 +261,7 @@ typedef struct _XENA_dev_config {
  #define STAT_TRSF_PER(n)           TBD
  #define        PER_SEC                                    0x208d5
  #define        SET_UPDT_PERIOD(n)                 vBIT((PER_SEC*n),32,32)
+#define        SET_UPDT_CLICKS(val)               vBIT(val, 32, 32)
  
         u64 stat_addr;
  
@@ -267,8 +283,15 @@ typedef struct _XENA_dev_config {
  
         u64 gpio_control;
  #define GPIO_CTRL_GPIO_0               BIT(8)
+       u64 misc_control;
+#define MISC_LINK_STABILITY_PRD(val)   vBIT(val,29,3)
+
+       u8 unused7_1[0x240 - 0x208];
+
+       u64 wreq_split_mask;
+#define        WREQ_SPLIT_MASK_SET_MASK(val)   vBIT(val, 52, 12)
  
-       u8 unused7[0x600];
+       u8 unused7_2[0x800 - 0x248];
  
  /* TxDMA registers */
         u64 txdma_int_status;
@@ -290,6 +313,7 @@ typedef struct _XENA_dev_config {
  
         u64 pcc_err_reg;
  #define PCC_FB_ECC_DB_ERR              vBIT(0xFF, 16, 8)
+#define PCC_ENABLE_FOUR                        vBIT(0x0F,0,8)
  
         u64 pcc_err_mask;
         u64 pcc_err_alarm;
@@ -468,6 +492,7 @@ typedef struct _XENA_dev_config {
  #define PRC_CTRL_NO_SNOOP                      (BIT(22)|BIT(23))
  #define PRC_CTRL_NO_SNOOP_DESC                 BIT(22)
  #define PRC_CTRL_NO_SNOOP_BUFF                 BIT(23)
+#define PRC_CTRL_BIMODAL_INTERRUPT             BIT(37)
  #define PRC_CTRL_RXD_BACKOFF_INTERVAL(val)     vBIT(val,40,24)
  
         u64 prc_alarm_action;
@@ -691,6 +716,10 @@ typedef struct _XENA_dev_config {
  #define MC_ERR_REG_MIRI_CRI_ERR_0          BIT(22)
  #define MC_ERR_REG_MIRI_CRI_ERR_1          BIT(23)
  #define MC_ERR_REG_SM_ERR                  BIT(31)
+#define MC_ERR_REG_ECC_ALL_SNG            (BIT(6) | \
+                                       BIT(7) | BIT(17) | BIT(19))
+#define MC_ERR_REG_ECC_ALL_DBL            (BIT(14) | \
+                                       BIT(15) | BIT(18) | BIT(20))
         u64 mc_err_mask;
         u64 mc_err_alarm;
  
@@ -736,7 +765,19 @@ typedef struct _XENA_dev_config {
         u64 mc_rldram_test_d1;
         u8 unused24[0x300 - 0x288];
         u64 mc_rldram_test_d2;
-       u8 unused25[0x700 - 0x308];
+
+       u8 unused24_1[0x360 - 0x308];
+       u64 mc_rldram_ctrl;
+#define        MC_RLDRAM_ENABLE_ODT            BIT(7)
+
+       u8 unused24_2[0x640 - 0x368];
+       u64 mc_rldram_ref_per_herc;
+#define        MC_RLDRAM_SET_REF_PERIOD(val)   vBIT(val, 0, 16)
+
+       u8 unused24_3[0x660 - 0x648];
+       u64 mc_rldram_mrs_herc;
+
+       u8 unused25[0x700 - 0x668];
         u64 mc_debug_ctrl;
  
         u8 unused26[0x3000 - 0x2f08];
diff --git a/drivers/net/s2io.c b/drivers/net/s2io.c

index ea638b162d3f09223cd16ca8b97f16073650f354..7ca78228b104f2fa165322a47826e056b4754314 100644 (file)
--- a/drivers/net/s2io.c
+++ b/drivers/net/s2io.c
@@ -11,29 +11,28 @@
   * See the file COPYING in this distribution for more information.
   *
   * Credits:
- * Jeff Garzik         : For pointing out the improper error condition 
- *                       check in the s2io_xmit routine and also some 
- *                       issues in the Tx watch dog function. Also for
- *                       patiently answering all those innumerable 
+ * Jeff Garzik         : For pointing out the improper error condition
+ *                       check in the s2io_xmit routine and also some
+ *                       issues in the Tx watch dog function. Also for
+ *                       patiently answering all those innumerable
   *                       questions regaring the 2.6 porting issues.
   * Stephen Hemminger   : Providing proper 2.6 porting mechanism for some
   *                       macros available only in 2.6 Kernel.
- * Francois Romieu     : For pointing out all code part that were 
+ * Francois Romieu     : For pointing out all code part that were
   *                       deprecated and also styling related comments.
- * Grant Grundler      : For helping me get rid of some Architecture 
+ * Grant Grundler      : For helping me get rid of some Architecture
   *                       dependent code.
   * Christopher Hellwig : Some more 2.6 specific issues in the driver.
- *                             
+ *
   * The module loadable parameters that are supported by the driver and a brief
   * explaination of all the variables.
- * rx_ring_num : This can be used to program the number of receive rings used 
- * in the driver.                                      
- * rx_ring_len: This defines the number of descriptors each ring can have. This 
+ * rx_ring_num : This can be used to program the number of receive rings used
+ * in the driver.
+ * rx_ring_len: This defines the number of descriptors each ring can have. This
   * is also an array of size 8.
   * tx_fifo_num: This defines the number of Tx FIFOs thats used int the driver.
- * tx_fifo_len: This too is an array of 8. Each element defines the number of 
+ * tx_fifo_len: This too is an array of 8. Each element defines the number of
   * Tx descriptors that can be associated with each corresponding FIFO.
- * in PCI Configuration space.
   ************************************************************************/
  
  #include <linux/config.h>
@@ -56,27 +55,39 @@
  #include <linux/ethtool.h>
  #include <linux/version.h>
  #include <linux/workqueue.h>
+#include <linux/if_vlan.h>
  
-#include <asm/io.h>
  #include <asm/system.h>
  #include <asm/uaccess.h>
+#include <asm/io.h>
  
  /* local include */
  #include "s2io.h"
  #include "s2io-regs.h"
  
  /* S2io Driver name & version. */
-static char s2io_driver_name[] = "s2io";
-static char s2io_driver_version[] = "Version 1.7.7.1";
+static char s2io_driver_name[] = "Neterion";
+static char s2io_driver_version[] = "Version 2.0.3.1";
+
+static inline int RXD_IS_UP2DT(RxD_t *rxdp)
+{
+       int ret;
+
+       ret = ((!(rxdp->Control_1 & RXD_OWN_XENA)) &&
+               (GET_RXD_MARKER(rxdp->Control_2) != THE_RXD_MARK));
+
+       return ret;
+}
  
-/* 
+/*
   * Cards with following subsystem_id have a link state indication
   * problem, 600B, 600C, 600D, 640B, 640C and 640D.
   * macro below identifies these cards given the subsystem_id.
   */
-#define CARDS_WITH_FAULTY_LINK_INDICATORS(subid) \
-               (((subid >= 0x600B) && (subid <= 0x600D)) || \
-                ((subid >= 0x640B) && (subid <= 0x640D))) ? 1 : 0
+#define CARDS_WITH_FAULTY_LINK_INDICATORS(dev_type, subid) \
+       (dev_type == XFRAME_I_DEVICE) ?                 \
+               ((((subid >= 0x600B) && (subid <= 0x600D)) || \
+                ((subid >= 0x640B) && (subid <= 0x640D))) ? 1 : 0) : 0
  
  #define LINK_IS_UP(val64) (!(val64 & (ADAPTER_STATUS_RMAC_REMOTE_FAULT | \
                                       ADAPTER_STATUS_RMAC_LOCAL_FAULT)))
@@ -86,9 +97,12 @@ static char s2io_driver_version[] = "Version 1.7.7.1";
  static inline int rx_buffer_level(nic_t * sp, int rxb_size, int ring)
  {
         int level = 0;
-       if ((sp->pkt_cnt[ring] - rxb_size) > 16) {
+       mac_info_t *mac_control;
+
+       mac_control = &sp->mac_control;
+       if ((mac_control->rings[ring].pkt_cnt - rxb_size) > 16) {
                 level = LOW;
-               if ((sp->pkt_cnt[ring] - rxb_size) < MAX_RXDS_PER_BLOCK) {
+               if (rxb_size <= MAX_RXDS_PER_BLOCK) {
                         level = PANIC;
                 }
         }
@@ -145,6 +159,9 @@ static char ethtool_stats_keys[][ETH_GSTRING_LEN] = {
         {"rmac_pause_cnt"},
         {"rmac_accepted_ip"},
         {"rmac_err_tcp"},
+       {"\n DRIVER STATISTICS"},
+       {"single_bit_ecc_errs"},
+       {"double_bit_ecc_errs"},
  };
  
  #define S2IO_STAT_LEN sizeof(ethtool_stats_keys)/ ETH_GSTRING_LEN
@@ -153,8 +170,37 @@ static char ethtool_stats_keys[][ETH_GSTRING_LEN] = {
  #define S2IO_TEST_LEN  sizeof(s2io_gstrings) / ETH_GSTRING_LEN
  #define S2IO_STRINGS_LEN       S2IO_TEST_LEN * ETH_GSTRING_LEN
  
+#define S2IO_TIMER_CONF(timer, handle, arg, exp)               \
+                       init_timer(&timer);                     \
+                       timer.function = handle;                \
+                       timer.data = (unsigned long) arg;       \
+                       mod_timer(&timer, (jiffies + exp))      \
+
+/* Add the vlan */
+static void s2io_vlan_rx_register(struct net_device *dev,
+                                       struct vlan_group *grp)
+{
+       nic_t *nic = dev->priv;
+       unsigned long flags;
+
+       spin_lock_irqsave(&nic->tx_lock, flags);
+       nic->vlgrp = grp;
+       spin_unlock_irqrestore(&nic->tx_lock, flags);
+}
+
+/* Unregister the vlan */
+static void s2io_vlan_rx_kill_vid(struct net_device *dev, unsigned long vid)
+{
+       nic_t *nic = dev->priv;
+       unsigned long flags;
+
+       spin_lock_irqsave(&nic->tx_lock, flags);
+       if (nic->vlgrp)
+               nic->vlgrp->vlan_devices[vid] = NULL;
+       spin_unlock_irqrestore(&nic->tx_lock, flags);
+}
  
-/* 
+/*
   * Constants to be programmed into the Xena's registers, to configure
   * the XAUI.
   */
@@ -162,7 +208,28 @@ static char ethtool_stats_keys[][ETH_GSTRING_LEN] = {
  #define SWITCH_SIGN    0xA5A5A5A5A5A5A5A5ULL
  #define        END_SIGN        0x0
  
-static u64 default_mdio_cfg[] = {
+static u64 herc_act_dtx_cfg[] = {
+       /* Set address */
+       0x8000051536750000ULL, 0x80000515367500E0ULL,
+       /* Write data */
+       0x8000051536750004ULL, 0x80000515367500E4ULL,
+       /* Set address */
+       0x80010515003F0000ULL, 0x80010515003F00E0ULL,
+       /* Write data */
+       0x80010515003F0004ULL, 0x80010515003F00E4ULL,
+       /* Set address */
+       0x801205150D440000ULL, 0x801205150D4400E0ULL,
+       /* Write data */
+       0x801205150D440004ULL, 0x801205150D4400E4ULL,
+       /* Set address */
+       0x80020515F2100000ULL, 0x80020515F21000E0ULL,
+       /* Write data */
+       0x80020515F2100004ULL, 0x80020515F21000E4ULL,
+       /* Done */
+       END_SIGN
+};
+
+static u64 xena_mdio_cfg[] = {
         /* Reset PMA PLL */
         0xC001010000000000ULL, 0xC0010100000000E0ULL,
         0xC0010100008000E4ULL,
@@ -172,7 +239,7 @@ static u64 default_mdio_cfg[] = {
         END_SIGN
  };
  
-static u64 default_dtx_cfg[] = {
+static u64 xena_dtx_cfg[] = {
         0x8000051500000000ULL, 0x80000515000000E0ULL,
         0x80000515D93500E4ULL, 0x8001051500000000ULL,
         0x80010515000000E0ULL, 0x80010515001E00E4ULL,
@@ -196,8 +263,7 @@ static u64 default_dtx_cfg[] = {
         END_SIGN
  };
  
-
-/* 
+/*
   * Constants for Fixing the MacAddress problem seen mostly on
   * Alpha machines.
   */
@@ -226,20 +292,25 @@ static unsigned int tx_fifo_len[MAX_TX_FIFOS] =
  static unsigned int rx_ring_num = 1;
  static unsigned int rx_ring_sz[MAX_RX_RINGS] =
      {[0 ...(MAX_RX_RINGS - 1)] = 0 };
-static unsigned int Stats_refresh_time = 4;
+static unsigned int rts_frm_len[MAX_RX_RINGS] =
+    {[0 ...(MAX_RX_RINGS - 1)] = 0 };
+static unsigned int use_continuous_tx_intrs = 1;
  static unsigned int rmac_pause_time = 65535;
  static unsigned int mc_pause_threshold_q0q3 = 187;
  static unsigned int mc_pause_threshold_q4q7 = 187;
  static unsigned int shared_splits;
  static unsigned int tmac_util_period = 5;
  static unsigned int rmac_util_period = 5;
+static unsigned int bimodal = 0;
  #ifndef CONFIG_S2IO_NAPI
  static unsigned int indicate_max_pkts;
  #endif
+/* Frequency of Rx desc syncs expressed as power of 2 */
+static unsigned int rxsync_frequency = 3;
  
-/* 
+/*
   * S2IO device table.
- * This table lists all the devices that this driver supports. 
+ * This table lists all the devices that this driver supports.
   */
  static struct pci_device_id s2io_tbl[] __devinitdata = {
         {PCI_VENDOR_ID_S2IO, PCI_DEVICE_ID_S2IO_WIN,
@@ -247,9 +318,9 @@ static struct pci_device_id s2io_tbl[] __devinitdata = {
         {PCI_VENDOR_ID_S2IO, PCI_DEVICE_ID_S2IO_UNI,
          PCI_ANY_ID, PCI_ANY_ID},
         {PCI_VENDOR_ID_S2IO, PCI_DEVICE_ID_HERC_WIN,
-        PCI_ANY_ID, PCI_ANY_ID},
-       {PCI_VENDOR_ID_S2IO, PCI_DEVICE_ID_HERC_UNI,
-        PCI_ANY_ID, PCI_ANY_ID},
+         PCI_ANY_ID, PCI_ANY_ID},
+        {PCI_VENDOR_ID_S2IO, PCI_DEVICE_ID_HERC_UNI,
+         PCI_ANY_ID, PCI_ANY_ID},
         {0,}
  };
  
@@ -268,8 +339,8 @@ static struct pci_driver s2io_driver = {
  /**
   * init_shared_mem - Allocation and Initialization of Memory
   * @nic: Device private variable.
- * Description: The function allocates all the memory areas shared 
- * between the NIC and the driver. This includes Tx descriptors, 
+ * Description: The function allocates all the memory areas shared
+ * between the NIC and the driver. This includes Tx descriptors,
   * Rx descriptors and the statistics block.
   */
  
@@ -279,11 +350,11 @@ static int init_shared_mem(struct s2io_nic *nic)
         void *tmp_v_addr, *tmp_v_addr_next;
         dma_addr_t tmp_p_addr, tmp_p_addr_next;
         RxD_block_t *pre_rxd_blk = NULL;
-       int i, j, blk_cnt;
+       int i, j, blk_cnt, rx_sz, tx_sz;
         int lst_size, lst_per_page;
         struct net_device *dev = nic->dev;
  #ifdef CONFIG_2BUFF_MODE
-       unsigned long tmp;
+       u64 tmp;
         buffAdd_t *ba;
  #endif
  
@@ -300,36 +371,41 @@ static int init_shared_mem(struct s2io_nic *nic)
                 size += config->tx_cfg[i].fifo_len;
         }
         if (size > MAX_AVAILABLE_TXDS) {
-               DBG_PRINT(ERR_DBG, "%s: Total number of Tx FIFOs ",
-                         dev->name);
-               DBG_PRINT(ERR_DBG, "exceeds the maximum value ");
-               DBG_PRINT(ERR_DBG, "that can be used\n");
+               DBG_PRINT(ERR_DBG, "%s: Requested TxDs too high, ",
+                         __FUNCTION__);
+               DBG_PRINT(ERR_DBG, "Requested: %d, max supported: 8192\n", size);
                 return FAILURE;
         }
  
         lst_size = (sizeof(TxD_t) * config->max_txds);
+       tx_sz = lst_size * size;
         lst_per_page = PAGE_SIZE / lst_size;
  
         for (i = 0; i < config->tx_fifo_num; i++) {
                 int fifo_len = config->tx_cfg[i].fifo_len;
                 int list_holder_size = fifo_len * sizeof(list_info_hold_t);
-               nic->list_info[i] = kmalloc(list_holder_size, GFP_KERNEL);
-               if (!nic->list_info[i]) {
+               mac_control->fifos[i].list_info = kmalloc(list_holder_size,
+                                                         GFP_KERNEL);
+               if (!mac_control->fifos[i].list_info) {
                         DBG_PRINT(ERR_DBG,
                                   "Malloc failed for list_info\n");
                         return -ENOMEM;
                 }
-               memset(nic->list_info[i], 0, list_holder_size);
+               memset(mac_control->fifos[i].list_info, 0, list_holder_size);
         }
         for (i = 0; i < config->tx_fifo_num; i++) {
                 int page_num = TXD_MEM_PAGE_CNT(config->tx_cfg[i].fifo_len,
                                                 lst_per_page);
-               mac_control->tx_curr_put_info[i].offset = 0;
-               mac_control->tx_curr_put_info[i].fifo_len =
+               mac_control->fifos[i].tx_curr_put_info.offset = 0;
+               mac_control->fifos[i].tx_curr_put_info.fifo_len =
                     config->tx_cfg[i].fifo_len - 1;
-               mac_control->tx_curr_get_info[i].offset = 0;
-               mac_control->tx_curr_get_info[i].fifo_len =
+               mac_control->fifos[i].tx_curr_get_info.offset = 0;
+               mac_control->fifos[i].tx_curr_get_info.fifo_len =
                     config->tx_cfg[i].fifo_len - 1;
+               mac_control->fifos[i].fifo_no = i;
+               mac_control->fifos[i].nic = nic;
+               mac_control->fifos[i].max_txds = MAX_SKB_FRAGS;
+
                 for (j = 0; j < page_num; j++) {
                         int k = 0;
                         dma_addr_t tmp_p;
@@ -345,16 +421,15 @@ static int init_shared_mem(struct s2io_nic *nic)
                         while (k < lst_per_page) {
                                 int l = (j * lst_per_page) + k;
                                 if (l == config->tx_cfg[i].fifo_len)
-                                       goto end_txd_alloc;
-                               nic->list_info[i][l].list_virt_addr =
+                                       break;
+                               mac_control->fifos[i].list_info[l].list_virt_addr =
                                     tmp_v + (k * lst_size);
-                               nic->list_info[i][l].list_phy_addr =
+                               mac_control->fifos[i].list_info[l].list_phy_addr =
                                     tmp_p + (k * lst_size);
                                 k++;
                         }
                 }
         }
-      end_txd_alloc:
  
         /* Allocation and initialization of RXDs in Rings */
         size = 0;
@@ -367,21 +442,26 @@ static int init_shared_mem(struct s2io_nic *nic)
                         return FAILURE;
                 }
                 size += config->rx_cfg[i].num_rxd;
-               nic->block_count[i] =
+               mac_control->rings[i].block_count =
                     config->rx_cfg[i].num_rxd / (MAX_RXDS_PER_BLOCK + 1);
-               nic->pkt_cnt[i] =
-                   config->rx_cfg[i].num_rxd - nic->block_count[i];
+               mac_control->rings[i].pkt_cnt =
+                   config->rx_cfg[i].num_rxd - mac_control->rings[i].block_count;
         }
+       size = (size * (sizeof(RxD_t)));
+       rx_sz = size;
  
         for (i = 0; i < config->rx_ring_num; i++) {
-               mac_control->rx_curr_get_info[i].block_index = 0;
-               mac_control->rx_curr_get_info[i].offset = 0;
-               mac_control->rx_curr_get_info[i].ring_len =
+               mac_control->rings[i].rx_curr_get_info.block_index = 0;
+               mac_control->rings[i].rx_curr_get_info.offset = 0;
+               mac_control->rings[i].rx_curr_get_info.ring_len =
                     config->rx_cfg[i].num_rxd - 1;
-               mac_control->rx_curr_put_info[i].block_index = 0;
-               mac_control->rx_curr_put_info[i].offset = 0;
-               mac_control->rx_curr_put_info[i].ring_len =
+               mac_control->rings[i].rx_curr_put_info.block_index = 0;
+               mac_control->rings[i].rx_curr_put_info.offset = 0;
+               mac_control->rings[i].rx_curr_put_info.ring_len =
                     config->rx_cfg[i].num_rxd - 1;
+               mac_control->rings[i].nic = nic;
+               mac_control->rings[i].ring_no = i;
+
                 blk_cnt =
                     config->rx_cfg[i].num_rxd / (MAX_RXDS_PER_BLOCK + 1);
                 /*  Allocating all the Rx blocks */
@@ -395,32 +475,36 @@ static int init_shared_mem(struct s2io_nic *nic)
                                                           &tmp_p_addr);
                         if (tmp_v_addr == NULL) {
                                 /*
-                                * In case of failure, free_shared_mem() 
-                                * is called, which should free any 
-                                * memory that was alloced till the 
+                                * In case of failure, free_shared_mem()
+                                * is called, which should free any
+                                * memory that was alloced till the
                                  * failure happened.
                                  */
-                               nic->rx_blocks[i][j].block_virt_addr =
+                               mac_control->rings[i].rx_blocks[j].block_virt_addr =
                                     tmp_v_addr;
                                 return -ENOMEM;
                         }
                         memset(tmp_v_addr, 0, size);
-                       nic->rx_blocks[i][j].block_virt_addr = tmp_v_addr;
-                       nic->rx_blocks[i][j].block_dma_addr = tmp_p_addr;
+                       mac_control->rings[i].rx_blocks[j].block_virt_addr =
+                               tmp_v_addr;
+                       mac_control->rings[i].rx_blocks[j].block_dma_addr =
+                               tmp_p_addr;
                 }
                 /* Interlinking all Rx Blocks */
                 for (j = 0; j < blk_cnt; j++) {
-                       tmp_v_addr = nic->rx_blocks[i][j].block_virt_addr;
+                       tmp_v_addr =
+                               mac_control->rings[i].rx_blocks[j].block_virt_addr;
                         tmp_v_addr_next =
-                           nic->rx_blocks[i][(j + 1) %
+                               mac_control->rings[i].rx_blocks[(j + 1) %
                                               blk_cnt].block_virt_addr;
-                       tmp_p_addr = nic->rx_blocks[i][j].block_dma_addr;
+                       tmp_p_addr =
+                               mac_control->rings[i].rx_blocks[j].block_dma_addr;
                         tmp_p_addr_next =
-                           nic->rx_blocks[i][(j + 1) %
+                               mac_control->rings[i].rx_blocks[(j + 1) %
                                               blk_cnt].block_dma_addr;
  
                         pre_rxd_blk = (RxD_block_t *) tmp_v_addr;
-                       pre_rxd_blk->reserved_1 = END_OF_BLOCK; /* last RxD 
+                       pre_rxd_blk->reserved_1 = END_OF_BLOCK; /* last RxD
                                                                  * marker.
                                                                  */
  #ifndef        CONFIG_2BUFF_MODE
@@ -433,43 +517,43 @@ static int init_shared_mem(struct s2io_nic *nic)
         }
  
  #ifdef CONFIG_2BUFF_MODE
-       /* 
+       /*
          * Allocation of Storages for buffer addresses in 2BUFF mode
          * and the buffers as well.
          */
         for (i = 0; i < config->rx_ring_num; i++) {
                 blk_cnt =
                     config->rx_cfg[i].num_rxd / (MAX_RXDS_PER_BLOCK + 1);
-               nic->ba[i] = kmalloc((sizeof(buffAdd_t *) * blk_cnt),
+               mac_control->rings[i].ba = kmalloc((sizeof(buffAdd_t *) * blk_cnt),
                                      GFP_KERNEL);
-               if (!nic->ba[i])
+               if (!mac_control->rings[i].ba)
                         return -ENOMEM;
                 for (j = 0; j < blk_cnt; j++) {
                         int k = 0;
-                       nic->ba[i][j] = kmalloc((sizeof(buffAdd_t) *
+                       mac_control->rings[i].ba[j] = kmalloc((sizeof(buffAdd_t) *
                                                  (MAX_RXDS_PER_BLOCK + 1)),
                                                 GFP_KERNEL);
-                       if (!nic->ba[i][j])
+                       if (!mac_control->rings[i].ba[j])
                                 return -ENOMEM;
                         while (k != MAX_RXDS_PER_BLOCK) {
-                               ba = &nic->ba[i][j][k];
+                               ba = &mac_control->rings[i].ba[j][k];
  
-                               ba->ba_0_org = kmalloc
+                               ba->ba_0_org = (void *) kmalloc
                                     (BUF0_LEN + ALIGN_SIZE, GFP_KERNEL);
                                 if (!ba->ba_0_org)
                                         return -ENOMEM;
-                               tmp = (unsigned long) ba->ba_0_org;
+                               tmp = (u64) ba->ba_0_org;
                                 tmp += ALIGN_SIZE;
-                               tmp &= ~((unsigned long) ALIGN_SIZE);
+                               tmp &= ~((u64) ALIGN_SIZE);
                                 ba->ba_0 = (void *) tmp;
  
-                               ba->ba_1_org = kmalloc
+                               ba->ba_1_org = (void *) kmalloc
                                     (BUF1_LEN + ALIGN_SIZE, GFP_KERNEL);
                                 if (!ba->ba_1_org)
                                         return -ENOMEM;
-                               tmp = (unsigned long) ba->ba_1_org;
+                               tmp = (u64) ba->ba_1_org;
                                 tmp += ALIGN_SIZE;
-                               tmp &= ~((unsigned long) ALIGN_SIZE);
+                               tmp &= ~((u64) ALIGN_SIZE);
                                 ba->ba_1 = (void *) tmp;
                                 k++;
                         }
@@ -483,9 +567,9 @@ static int init_shared_mem(struct s2io_nic *nic)
             (nic->pdev, size, &mac_control->stats_mem_phy);
  
         if (!mac_control->stats_mem) {
-               /* 
-                * In case of failure, free_shared_mem() is called, which 
-                * should free any memory that was alloced till the 
+               /*
+                * In case of failure, free_shared_mem() is called, which
+                * should free any memory that was alloced till the
                  * failure happened.
                  */
                 return -ENOMEM;
@@ -495,15 +579,14 @@ static int init_shared_mem(struct s2io_nic *nic)
         tmp_v_addr = mac_control->stats_mem;
         mac_control->stats_info = (StatInfo_t *) tmp_v_addr;
         memset(tmp_v_addr, 0, size);
-
         DBG_PRINT(INIT_DBG, "%s:Ring Mem PHY: 0x%llx\n", dev->name,
                   (unsigned long long) tmp_p_addr);
  
         return SUCCESS;
  }
  
-/**  
- * free_shared_mem - Free the allocated Memory 
+/**
+ * free_shared_mem - Free the allocated Memory
   * @nic:  Device private variable.
   * Description: This function is to free all memory locations allocated by
   * the init_shared_mem() function and return it to the kernel.
@@ -533,15 +616,19 @@ static void free_shared_mem(struct s2io_nic *nic)
                                                 lst_per_page);
                 for (j = 0; j < page_num; j++) {
                         int mem_blks = (j * lst_per_page);
-                       if (!nic->list_info[i][mem_blks].list_virt_addr)
+                       if ((!mac_control->fifos[i].list_info) ||
+                               (!mac_control->fifos[i].list_info[mem_blks].
+                                list_virt_addr))
                                 break;
                         pci_free_consistent(nic->pdev, PAGE_SIZE,
-                                           nic->list_info[i][mem_blks].
+                                           mac_control->fifos[i].
+                                           list_info[mem_blks].
                                             list_virt_addr,
-                                           nic->list_info[i][mem_blks].
+                                           mac_control->fifos[i].
+                                           list_info[mem_blks].
                                             list_phy_addr);
                 }
-               kfree(nic->list_info[i]);
+               kfree(mac_control->fifos[i].list_info);
         }
  
  #ifndef CONFIG_2BUFF_MODE
@@ -550,10 +637,12 @@ static void free_shared_mem(struct s2io_nic *nic)
         size = SIZE_OF_BLOCK;
  #endif
         for (i = 0; i < config->rx_ring_num; i++) {
-               blk_cnt = nic->block_count[i];
+               blk_cnt = mac_control->rings[i].block_count;
                 for (j = 0; j < blk_cnt; j++) {
-                       tmp_v_addr = nic->rx_blocks[i][j].block_virt_addr;
-                       tmp_p_addr = nic->rx_blocks[i][j].block_dma_addr;
+                       tmp_v_addr = mac_control->rings[i].rx_blocks[j].
+                               block_virt_addr;
+                       tmp_p_addr = mac_control->rings[i].rx_blocks[j].
+                               block_dma_addr;
                         if (tmp_v_addr == NULL)
                                 break;
                         pci_free_consistent(nic->pdev, size,
@@ -566,35 +655,21 @@ static void free_shared_mem(struct s2io_nic *nic)
         for (i = 0; i < config->rx_ring_num; i++) {
                 blk_cnt =
                     config->rx_cfg[i].num_rxd / (MAX_RXDS_PER_BLOCK + 1);
-               if (!nic->ba[i])
-                       goto end_free;
                 for (j = 0; j < blk_cnt; j++) {
                         int k = 0;
-                       if (!nic->ba[i][j]) {
-                               kfree(nic->ba[i]);
-                               goto end_free;
-                       }
+                       if (!mac_control->rings[i].ba[j])
+                               continue;
                         while (k != MAX_RXDS_PER_BLOCK) {
-                               buffAdd_t *ba = &nic->ba[i][j][k];
-                               if (!ba || !ba->ba_0_org || !ba->ba_1_org)
-                               {
-                                       kfree(nic->ba[i]);
-                                       kfree(nic->ba[i][j]);
-                                       if(ba->ba_0_org)
-                                               kfree(ba->ba_0_org);
-                                       if(ba->ba_1_org)
-                                               kfree(ba->ba_1_org);
-                                       goto end_free;
-                               }
+                               buffAdd_t *ba = &mac_control->rings[i].ba[j][k];
                                 kfree(ba->ba_0_org);
                                 kfree(ba->ba_1_org);
                                 k++;
                         }
-                       kfree(nic->ba[i][j]);
+                       kfree(mac_control->rings[i].ba[j]);
                 }
-               kfree(nic->ba[i]);
+               if (mac_control->rings[i].ba)
+                       kfree(mac_control->rings[i].ba);
         }
-end_free:
  #endif
  
         if (mac_control->stats_mem) {
@@ -605,12 +680,93 @@ end_free:
         }
  }
  
-/**  
- *  init_nic - Initialization of hardware 
+/**
+ * s2io_verify_pci_mode -
+ */
+
+static int s2io_verify_pci_mode(nic_t *nic)
+{
+       XENA_dev_config_t *bar0 = (XENA_dev_config_t *) nic->bar0;
+       register u64 val64 = 0;
+       int     mode;
+
+       val64 = readq(&bar0->pci_mode);
+       mode = (u8)GET_PCI_MODE(val64);
+
+       if ( val64 & PCI_MODE_UNKNOWN_MODE)
+               return -1;      /* Unknown PCI mode */
+       return mode;
+}
+
+
+/**
+ * s2io_print_pci_mode -
+ */
+static int s2io_print_pci_mode(nic_t *nic)
+{
+       XENA_dev_config_t *bar0 = (XENA_dev_config_t *) nic->bar0;
+       register u64 val64 = 0;
+       int     mode;
+       struct config_param *config = &nic->config;
+
+       val64 = readq(&bar0->pci_mode);
+       mode = (u8)GET_PCI_MODE(val64);
+
+       if ( val64 & PCI_MODE_UNKNOWN_MODE)
+               return -1;      /* Unknown PCI mode */
+
+       if (val64 & PCI_MODE_32_BITS) {
+               DBG_PRINT(ERR_DBG, "%s: Device is on 32 bit ", nic->dev->name);
+       } else {
+               DBG_PRINT(ERR_DBG, "%s: Device is on 64 bit ", nic->dev->name);
+       }
+
+       switch(mode) {
+               case PCI_MODE_PCI_33:
+                       DBG_PRINT(ERR_DBG, "33MHz PCI bus\n");
+                       config->bus_speed = 33;
+                       break;
+               case PCI_MODE_PCI_66:
+                       DBG_PRINT(ERR_DBG, "66MHz PCI bus\n");
+                       config->bus_speed = 133;
+                       break;
+               case PCI_MODE_PCIX_M1_66:
+                       DBG_PRINT(ERR_DBG, "66MHz PCIX(M1) bus\n");
+                       config->bus_speed = 133; /* Herc doubles the clock rate */
+                       break;
+               case PCI_MODE_PCIX_M1_100:
+                       DBG_PRINT(ERR_DBG, "100MHz PCIX(M1) bus\n");
+                       config->bus_speed = 200;
+                       break;
+               case PCI_MODE_PCIX_M1_133:
+                       DBG_PRINT(ERR_DBG, "133MHz PCIX(M1) bus\n");
+                       config->bus_speed = 266;
+                       break;
+               case PCI_MODE_PCIX_M2_66:
+                       DBG_PRINT(ERR_DBG, "133MHz PCIX(M2) bus\n");
+                       config->bus_speed = 133;
+                       break;
+               case PCI_MODE_PCIX_M2_100:
+                       DBG_PRINT(ERR_DBG, "200MHz PCIX(M2) bus\n");
+                       config->bus_speed = 200;
+                       break;
+               case PCI_MODE_PCIX_M2_133:
+                       DBG_PRINT(ERR_DBG, "266MHz PCIX(M2) bus\n");
+                       config->bus_speed = 266;
+                       break;
+               default:
+                       return -1;      /* Unsupported bus speed */
+       }
+
+       return mode;
+}
+
+/**
+ *  init_nic - Initialization of hardware
   *  @nic: device peivate variable
- *  Description: The function sequentially configures every block 
- *  of the H/W from their reset values. 
- *  Return Value:  SUCCESS on success and 
+ *  Description: The function sequentially configures every block
+ *  of the H/W from their reset values.
+ *  Return Value:  SUCCESS on success and
   *  '-1' on failure (endian settings incorrect).
   */
  
@@ -626,21 +782,32 @@ static int init_nic(struct s2io_nic *nic)
         struct config_param *config;
         int mdio_cnt = 0, dtx_cnt = 0;
         unsigned long long mem_share;
+       int mem_size;
  
         mac_control = &nic->mac_control;
         config = &nic->config;
  
-       /* Initialize swapper control register */
-       if (s2io_set_swapper(nic)) {
+       /* to set the swapper controle on the card */
+       if(s2io_set_swapper(nic)) {
                 DBG_PRINT(ERR_DBG,"ERROR: Setting Swapper failed\n");
                 return -1;
         }
  
+       /*
+        * Herc requires EOI to be removed from reset before XGXS, so..
+        */
+       if (nic->device_type & XFRAME_II_DEVICE) {
+               val64 = 0xA500000000ULL;
+               writeq(val64, &bar0->sw_reset);
+               msleep(500);
+               val64 = readq(&bar0->sw_reset);
+       }
+
         /* Remove XGXS from reset state */
         val64 = 0;
         writeq(val64, &bar0->sw_reset);
-       val64 = readq(&bar0->sw_reset);
         msleep(500);
+       val64 = readq(&bar0->sw_reset);
  
         /*  Enable Receiving broadcasts */
         add = &bar0->mac_cfg;
@@ -660,48 +827,58 @@ static int init_nic(struct s2io_nic *nic)
         val64 = dev->mtu;
         writeq(vBIT(val64, 2, 14), &bar0->rmac_max_pyld_len);
  
-       /* 
-        * Configuring the XAUI Interface of Xena. 
+       /*
+        * Configuring the XAUI Interface of Xena.
          * ***************************************
-        * To Configure the Xena's XAUI, one has to write a series 
-        * of 64 bit values into two registers in a particular 
-        * sequence. Hence a macro 'SWITCH_SIGN' has been defined 
-        * which will be defined in the array of configuration values 
-        * (default_dtx_cfg & default_mdio_cfg) at appropriate places 
-        * to switch writing from one regsiter to another. We continue 
+        * To Configure the Xena's XAUI, one has to write a series
+        * of 64 bit values into two registers in a particular
+        * sequence. Hence a macro 'SWITCH_SIGN' has been defined
+        * which will be defined in the array of configuration values
+        * (xena_dtx_cfg & xena_mdio_cfg) at appropriate places
+        * to switch writing from one regsiter to another. We continue
          * writing these values until we encounter the 'END_SIGN' macro.
-        * For example, After making a series of 21 writes into 
-        * dtx_control register the 'SWITCH_SIGN' appears and hence we 
+        * For example, After making a series of 21 writes into
+        * dtx_control register the 'SWITCH_SIGN' appears and hence we
          * start writing into mdio_control until we encounter END_SIGN.
          */
-       while (1) {
-             dtx_cfg:
-               while (default_dtx_cfg[dtx_cnt] != END_SIGN) {
-                       if (default_dtx_cfg[dtx_cnt] == SWITCH_SIGN) {
-                               dtx_cnt++;
-                               goto mdio_cfg;
-                       }
-                       SPECIAL_REG_WRITE(default_dtx_cfg[dtx_cnt],
+       if (nic->device_type & XFRAME_II_DEVICE) {
+               while (herc_act_dtx_cfg[dtx_cnt] != END_SIGN) {
+                       SPECIAL_REG_WRITE(herc_act_dtx_cfg[dtx_cnt],
                                           &bar0->dtx_control, UF);
-                       val64 = readq(&bar0->dtx_control);
+                       if (dtx_cnt & 0x1)
+                               msleep(1); /* Necessary!! */
                         dtx_cnt++;
                 }
-             mdio_cfg:
-               while (default_mdio_cfg[mdio_cnt] != END_SIGN) {
-                       if (default_mdio_cfg[mdio_cnt] == SWITCH_SIGN) {
+       } else {
+               while (1) {
+                     dtx_cfg:
+                       while (xena_dtx_cfg[dtx_cnt] != END_SIGN) {
+                               if (xena_dtx_cfg[dtx_cnt] == SWITCH_SIGN) {
+                                       dtx_cnt++;
+                                       goto mdio_cfg;
+                               }
+                               SPECIAL_REG_WRITE(xena_dtx_cfg[dtx_cnt],
+                                                 &bar0->dtx_control, UF);
+                               val64 = readq(&bar0->dtx_control);
+                               dtx_cnt++;
+                       }
+                     mdio_cfg:
+                       while (xena_mdio_cfg[mdio_cnt] != END_SIGN) {
+                               if (xena_mdio_cfg[mdio_cnt] == SWITCH_SIGN) {
+                                       mdio_cnt++;
+                                       goto dtx_cfg;
+                               }
+                               SPECIAL_REG_WRITE(xena_mdio_cfg[mdio_cnt],
+                                                 &bar0->mdio_control, UF);
+                               val64 = readq(&bar0->mdio_control);
                                 mdio_cnt++;
+                       }
+                       if ((xena_dtx_cfg[dtx_cnt] == END_SIGN) &&
+                           (xena_mdio_cfg[mdio_cnt] == END_SIGN)) {
+                               break;
+                       } else {
                                 goto dtx_cfg;
                         }
-                       SPECIAL_REG_WRITE(default_mdio_cfg[mdio_cnt],
-                                         &bar0->mdio_control, UF);
-                       val64 = readq(&bar0->mdio_control);
-                       mdio_cnt++;
-               }
-               if ((default_dtx_cfg[dtx_cnt] == END_SIGN) &&
-                   (default_mdio_cfg[mdio_cnt] == END_SIGN)) {
-                       break;
-               } else {
-                       goto dtx_cfg;
                 }
         }
  
@@ -748,12 +925,20 @@ static int init_nic(struct s2io_nic *nic)
         val64 |= BIT(0);        /* To enable the FIFO partition. */
         writeq(val64, &bar0->tx_fifo_partition_0);
  
+       /*
+        * Disable 4 PCCs for Xena1, 2 and 3 as per H/W bug
+        * SXE-008 TRANSMIT DMA ARBITRATION ISSUE.
+        */
+       if ((nic->device_type == XFRAME_I_DEVICE) &&
+               (get_xena_rev_id(nic->pdev) < 4))
+               writeq(PCC_ENABLE_FOUR, &bar0->pcc_enable);
+
         val64 = readq(&bar0->tx_fifo_partition_0);
         DBG_PRINT(INIT_DBG, "Fifo partition at: 0x%p is: 0x%llx\n",
                   &bar0->tx_fifo_partition_0, (unsigned long long) val64);
  
-       /* 
-        * Initialization of Tx_PA_CONFIG register to ignore packet 
+       /*
+        * Initialization of Tx_PA_CONFIG register to ignore packet
          * integrity checking.
          */
         val64 = readq(&bar0->tx_pa_cfg);
@@ -770,85 +955,304 @@ static int init_nic(struct s2io_nic *nic)
         }
         writeq(val64, &bar0->rx_queue_priority);
  
-       /* 
-        * Allocating equal share of memory to all the 
+       /*
+        * Allocating equal share of memory to all the
          * configured Rings.
          */
         val64 = 0;
+       if (nic->device_type & XFRAME_II_DEVICE)
+               mem_size = 32;
+       else
+               mem_size = 64;
+
         for (i = 0; i < config->rx_ring_num; i++) {
                 switch (i) {
                 case 0:
-                       mem_share = (64 / config->rx_ring_num +
-                                    64 % config->rx_ring_num);
+                       mem_share = (mem_size / config->rx_ring_num +
+                                    mem_size % config->rx_ring_num);
                         val64 |= RX_QUEUE_CFG_Q0_SZ(mem_share);
                         continue;
                 case 1:
-                       mem_share = (64 / config->rx_ring_num);
+                       mem_share = (mem_size / config->rx_ring_num);
                         val64 |= RX_QUEUE_CFG_Q1_SZ(mem_share);
                         continue;
                 case 2:
-                       mem_share = (64 / config->rx_ring_num);
+                       mem_share = (mem_size / config->rx_ring_num);
                         val64 |= RX_QUEUE_CFG_Q2_SZ(mem_share);
                         continue;
                 case 3:
-                       mem_share = (64 / config->rx_ring_num);
+                       mem_share = (mem_size / config->rx_ring_num);
                         val64 |= RX_QUEUE_CFG_Q3_SZ(mem_share);
                         continue;
                 case 4:
-                       mem_share = (64 / config->rx_ring_num);
+                       mem_share = (mem_size / config->rx_ring_num);
                         val64 |= RX_QUEUE_CFG_Q4_SZ(mem_share);
                         continue;
                 case 5:
-                       mem_share = (64 / config->rx_ring_num);
+                       mem_share = (mem_size / config->rx_ring_num);
                         val64 |= RX_QUEUE_CFG_Q5_SZ(mem_share);
                         continue;
                 case 6:
-                       mem_share = (64 / config->rx_ring_num);
+                       mem_share = (mem_size / config->rx_ring_num);
                         val64 |= RX_QUEUE_CFG_Q6_SZ(mem_share);
                         continue;
                 case 7:
-                       mem_share = (64 / config->rx_ring_num);
+                       mem_share = (mem_size / config->rx_ring_num);
                         val64 |= RX_QUEUE_CFG_Q7_SZ(mem_share);
                         continue;
                 }
         }
         writeq(val64, &bar0->rx_queue_cfg);
  
-       /* 
-        * Initializing the Tx round robin registers to 0.
-        * Filling Tx and Rx round robin registers as per the 
-        * number of FIFOs and Rings is still TODO.
-        */
-       writeq(0, &bar0->tx_w_round_robin_0);
-       writeq(0, &bar0->tx_w_round_robin_1);
-       writeq(0, &bar0->tx_w_round_robin_2);
-       writeq(0, &bar0->tx_w_round_robin_3);
-       writeq(0, &bar0->tx_w_round_robin_4);
-
-       /* 
-        * TODO
-        * Disable Rx steering. Hard coding all packets be steered to
-        * Queue 0 for now. 
+       /*
+        * Filling Tx round robin registers
+        * as per the number of FIFOs
          */
-       val64 = 0x8080808080808080ULL;
-       writeq(val64, &bar0->rts_qos_steering);
+       switch (config->tx_fifo_num) {
+       case 1:
+               val64 = 0x0000000000000000ULL;
+               writeq(val64, &bar0->tx_w_round_robin_0);
+               writeq(val64, &bar0->tx_w_round_robin_1);
+               writeq(val64, &bar0->tx_w_round_robin_2);
+               writeq(val64, &bar0->tx_w_round_robin_3);
+               writeq(val64, &bar0->tx_w_round_robin_4);
+               break;
+       case 2:
+               val64 = 0x0000010000010000ULL;
+               writeq(val64, &bar0->tx_w_round_robin_0);
+               val64 = 0x0100000100000100ULL;
+               writeq(val64, &bar0->tx_w_round_robin_1);
+               val64 = 0x0001000001000001ULL;
+               writeq(val64, &bar0->tx_w_round_robin_2);
+               val64 = 0x0000010000010000ULL;
+               writeq(val64, &bar0->tx_w_round_robin_3);
+               val64 = 0x0100000000000000ULL;
+               writeq(val64, &bar0->tx_w_round_robin_4);
+               break;
+       case 3:
+               val64 = 0x0001000102000001ULL;
+               writeq(val64, &bar0->tx_w_round_robin_0);
+               val64 = 0x0001020000010001ULL;
+               writeq(val64, &bar0->tx_w_round_robin_1);
+               val64 = 0x0200000100010200ULL;
+               writeq(val64, &bar0->tx_w_round_robin_2);
+               val64 = 0x0001000102000001ULL;
+               writeq(val64, &bar0->tx_w_round_robin_3);
+               val64 = 0x0001020000000000ULL;
+               writeq(val64, &bar0->tx_w_round_robin_4);
+               break;
+       case 4:
+               val64 = 0x0001020300010200ULL;
+               writeq(val64, &bar0->tx_w_round_robin_0);
+               val64 = 0x0100000102030001ULL;
+               writeq(val64, &bar0->tx_w_round_robin_1);
+               val64 = 0x0200010000010203ULL;
+               writeq(val64, &bar0->tx_w_round_robin_2);
+               val64 = 0x0001020001000001ULL;
+               writeq(val64, &bar0->tx_w_round_robin_3);
+               val64 = 0x0203000100000000ULL;
+               writeq(val64, &bar0->tx_w_round_robin_4);
+               break;
+       case 5:
+               val64 = 0x0001000203000102ULL;
+               writeq(val64, &bar0->tx_w_round_robin_0);
+               val64 = 0x0001020001030004ULL;
+               writeq(val64, &bar0->tx_w_round_robin_1);
+               val64 = 0x0001000203000102ULL;
+               writeq(val64, &bar0->tx_w_round_robin_2);
+               val64 = 0x0001020001030004ULL;
+               writeq(val64, &bar0->tx_w_round_robin_3);
+               val64 = 0x0001000000000000ULL;
+               writeq(val64, &bar0->tx_w_round_robin_4);
+               break;
+       case 6:
+               val64 = 0x0001020304000102ULL;
+               writeq(val64, &bar0->tx_w_round_robin_0);
+               val64 = 0x0304050001020001ULL;
+               writeq(val64, &bar0->tx_w_round_robin_1);
+               val64 = 0x0203000100000102ULL;
+               writeq(val64, &bar0->tx_w_round_robin_2);
+               val64 = 0x0304000102030405ULL;
+               writeq(val64, &bar0->tx_w_round_robin_3);
+               val64 = 0x0001000200000000ULL;
+               writeq(val64, &bar0->tx_w_round_robin_4);
+               break;
+       case 7:
+               val64 = 0x0001020001020300ULL;
+               writeq(val64, &bar0->tx_w_round_robin_0);
+               val64 = 0x0102030400010203ULL;
+               writeq(val64, &bar0->tx_w_round_robin_1);
+               val64 = 0x0405060001020001ULL;
+               writeq(val64, &bar0->tx_w_round_robin_2);
+               val64 = 0x0304050000010200ULL;
+               writeq(val64, &bar0->tx_w_round_robin_3);
+               val64 = 0x0102030000000000ULL;
+               writeq(val64, &bar0->tx_w_round_robin_4);
+               break;
+       case 8:
+               val64 = 0x0001020300040105ULL;
+               writeq(val64, &bar0->tx_w_round_robin_0);
+               val64 = 0x0200030106000204ULL;
+               writeq(val64, &bar0->tx_w_round_robin_1);
+               val64 = 0x0103000502010007ULL;
+               writeq(val64, &bar0->tx_w_round_robin_2);
+               val64 = 0x0304010002060500ULL;
+               writeq(val64, &bar0->tx_w_round_robin_3);
+               val64 = 0x0103020400000000ULL;
+               writeq(val64, &bar0->tx_w_round_robin_4);
+               break;
+       }
+
+       /* Filling the Rx round robin registers as per the
+        * number of Rings and steering based on QoS.
+         */
+       switch (config->rx_ring_num) {
+       case 1:
+               val64 = 0x8080808080808080ULL;
+               writeq(val64, &bar0->rts_qos_steering);
+               break;
+       case 2:
+               val64 = 0x0000010000010000ULL;
+               writeq(val64, &bar0->rx_w_round_robin_0);
+               val64 = 0x0100000100000100ULL;
+               writeq(val64, &bar0->rx_w_round_robin_1);
+               val64 = 0x0001000001000001ULL;
+               writeq(val64, &bar0->rx_w_round_robin_2);
+               val64 = 0x0000010000010000ULL;
+               writeq(val64, &bar0->rx_w_round_robin_3);
+               val64 = 0x0100000000000000ULL;
+               writeq(val64, &bar0->rx_w_round_robin_4);
+
+               val64 = 0x8080808040404040ULL;
+               writeq(val64, &bar0->rts_qos_steering);
+               break;
+       case 3:
+               val64 = 0x0001000102000001ULL;
+               writeq(val64, &bar0->rx_w_round_robin_0);
+               val64 = 0x0001020000010001ULL;
+               writeq(val64, &bar0->rx_w_round_robin_1);
+               val64 = 0x0200000100010200ULL;
+               writeq(val64, &bar0->rx_w_round_robin_2);
+               val64 = 0x0001000102000001ULL;
+               writeq(val64, &bar0->rx_w_round_robin_3);
+               val64 = 0x0001020000000000ULL;
+               writeq(val64, &bar0->rx_w_round_robin_4);
+
+               val64 = 0x8080804040402020ULL;
+               writeq(val64, &bar0->rts_qos_steering);
+               break;
+       case 4:
+               val64 = 0x0001020300010200ULL;
+               writeq(val64, &bar0->rx_w_round_robin_0);
+               val64 = 0x0100000102030001ULL;
+               writeq(val64, &bar0->rx_w_round_robin_1);
+               val64 = 0x0200010000010203ULL;
+               writeq(val64, &bar0->rx_w_round_robin_2);
+               val64 = 0x0001020001000001ULL;  
+               writeq(val64, &bar0->rx_w_round_robin_3);
+               val64 = 0x0203000100000000ULL;
+               writeq(val64, &bar0->rx_w_round_robin_4);
+
+               val64 = 0x8080404020201010ULL;
+               writeq(val64, &bar0->rts_qos_steering);
+               break;
+       case 5:
+               val64 = 0x0001000203000102ULL;
+               writeq(val64, &bar0->rx_w_round_robin_0);
+               val64 = 0x0001020001030004ULL;
+               writeq(val64, &bar0->rx_w_round_robin_1);
+               val64 = 0x0001000203000102ULL;
+               writeq(val64, &bar0->rx_w_round_robin_2);
+               val64 = 0x0001020001030004ULL;
+               writeq(val64, &bar0->rx_w_round_robin_3);
+               val64 = 0x0001000000000000ULL;
+               writeq(val64, &bar0->rx_w_round_robin_4);
+
+               val64 = 0x8080404020201008ULL;
+               writeq(val64, &bar0->rts_qos_steering);
+               break;
+       case 6:
+               val64 = 0x0001020304000102ULL;
+               writeq(val64, &bar0->rx_w_round_robin_0);
+               val64 = 0x0304050001020001ULL;
+               writeq(val64, &bar0->rx_w_round_robin_1);
+               val64 = 0x0203000100000102ULL;
+               writeq(val64, &bar0->rx_w_round_robin_2);
+               val64 = 0x0304000102030405ULL;
+               writeq(val64, &bar0->rx_w_round_robin_3);
+               val64 = 0x0001000200000000ULL;
+               writeq(val64, &bar0->rx_w_round_robin_4);
+
+               val64 = 0x8080404020100804ULL;
+               writeq(val64, &bar0->rts_qos_steering);
+               break;
+       case 7:
+               val64 = 0x0001020001020300ULL;
+               writeq(val64, &bar0->rx_w_round_robin_0);
+               val64 = 0x0102030400010203ULL;
+               writeq(val64, &bar0->rx_w_round_robin_1);
+               val64 = 0x0405060001020001ULL;
+               writeq(val64, &bar0->rx_w_round_robin_2);
+               val64 = 0x0304050000010200ULL;
+               writeq(val64, &bar0->rx_w_round_robin_3);
+               val64 = 0x0102030000000000ULL;
+               writeq(val64, &bar0->rx_w_round_robin_4);
+
+               val64 = 0x8080402010080402ULL;
+               writeq(val64, &bar0->rts_qos_steering);
+               break;
+       case 8:
+               val64 = 0x0001020300040105ULL;
+               writeq(val64, &bar0->rx_w_round_robin_0);
+               val64 = 0x0200030106000204ULL;
+               writeq(val64, &bar0->rx_w_round_robin_1);
+               val64 = 0x0103000502010007ULL;
+               writeq(val64, &bar0->rx_w_round_robin_2);
+               val64 = 0x0304010002060500ULL;
+               writeq(val64, &bar0->rx_w_round_robin_3);
+               val64 = 0x0103020400000000ULL;
+               writeq(val64, &bar0->rx_w_round_robin_4);
+
+               val64 = 0x8040201008040201ULL;
+               writeq(val64, &bar0->rts_qos_steering);
+               break;
+       }
  
         /* UDP Fix */
         val64 = 0;
-       for (i = 1; i < 8; i++)
+       for (i = 0; i < 8; i++)
                 writeq(val64, &bar0->rts_frm_len_n[i]);
  
-       /* Set rts_frm_len register for fifo 0 */
-       writeq(MAC_RTS_FRM_LEN_SET(dev->mtu + 22),
-              &bar0->rts_frm_len_n[0]);
+       /* Set the default rts frame length for the rings configured */
+       val64 = MAC_RTS_FRM_LEN_SET(dev->mtu+22);
+       for (i = 0 ; i < config->rx_ring_num ; i++)
+               writeq(val64, &bar0->rts_frm_len_n[i]);
+
+       /* Set the frame length for the configured rings
+        * desired by the user
+        */
+       for (i = 0; i < config->rx_ring_num; i++) {
+               /* If rts_frm_len[i] == 0 then it is assumed that user not
+                * specified frame length steering.
+                * If the user provides the frame length then program
+                * the rts_frm_len register for those values or else
+                * leave it as it is.
+                */
+               if (rts_frm_len[i] != 0) {
+                       writeq(MAC_RTS_FRM_LEN_SET(rts_frm_len[i]),
+                               &bar0->rts_frm_len_n[i]);
+               }
+       }
  
-       /* Enable statistics */
+       /* Program statistics memory */
         writeq(mac_control->stats_mem_phy, &bar0->stat_addr);
-       val64 = SET_UPDT_PERIOD(Stats_refresh_time) |
-           STAT_CFG_STAT_RO | STAT_CFG_STAT_EN;
-       writeq(val64, &bar0->stat_cfg);
  
-       /* 
+       if (nic->device_type == XFRAME_II_DEVICE) {
+               val64 = STAT_BC(0x320);
+               writeq(val64, &bar0->stat_byte_cnt);
+       }
+
+       /*
          * Initializing the sampling rate for the device to calculate the
          * bandwidth utilization.
          */
@@ -857,30 +1261,38 @@ static int init_nic(struct s2io_nic *nic)
         writeq(val64, &bar0->mac_link_util);
  
  
-       /* 
-        * Initializing the Transmit and Receive Traffic Interrupt 
+       /*
+        * Initializing the Transmit and Receive Traffic Interrupt
          * Scheme.
          */
-       /* TTI Initialization. Default Tx timer gets us about
+       /*
+        * TTI Initialization. Default Tx timer gets us about
          * 250 interrupts per sec. Continuous interrupts are enabled
          * by default.
          */
-       val64 = TTI_DATA1_MEM_TX_TIMER_VAL(0x2078) |
-           TTI_DATA1_MEM_TX_URNG_A(0xA) |
+       if (nic->device_type == XFRAME_II_DEVICE) {
+               int count = (nic->config.bus_speed * 125)/2;
+               val64 = TTI_DATA1_MEM_TX_TIMER_VAL(count);
+       } else {
+
+               val64 = TTI_DATA1_MEM_TX_TIMER_VAL(0x2078);
+       }
+       val64 |= TTI_DATA1_MEM_TX_URNG_A(0xA) |
             TTI_DATA1_MEM_TX_URNG_B(0x10) |
-           TTI_DATA1_MEM_TX_URNG_C(0x30) | TTI_DATA1_MEM_TX_TIMER_AC_EN |
-               TTI_DATA1_MEM_TX_TIMER_CI_EN;
+           TTI_DATA1_MEM_TX_URNG_C(0x30) | TTI_DATA1_MEM_TX_TIMER_AC_EN;
+               if (use_continuous_tx_intrs)
+                       val64 |= TTI_DATA1_MEM_TX_TIMER_CI_EN;
         writeq(val64, &bar0->tti_data1_mem);
  
         val64 = TTI_DATA2_MEM_TX_UFC_A(0x10) |
             TTI_DATA2_MEM_TX_UFC_B(0x20) |
-           TTI_DATA2_MEM_TX_UFC_C(0x40) | TTI_DATA2_MEM_TX_UFC_D(0x80);
+           TTI_DATA2_MEM_TX_UFC_C(0x70) | TTI_DATA2_MEM_TX_UFC_D(0x80);
         writeq(val64, &bar0->tti_data2_mem);
  
         val64 = TTI_CMD_MEM_WE | TTI_CMD_MEM_STROBE_NEW_CMD;
         writeq(val64, &bar0->tti_command_mem);
  
-       /* 
+       /*
          * Once the operation completes, the Strobe bit of the command
          * register will be reset. We poll for this particular condition
          * We wait for a maximum of 500ms for the operation to complete,
@@ -901,52 +1313,97 @@ static int init_nic(struct s2io_nic *nic)
                 time++;
         }
  
-       /* RTI Initialization */
-       val64 = RTI_DATA1_MEM_RX_TIMER_VAL(0xFFF) |
-           RTI_DATA1_MEM_RX_URNG_A(0xA) |
-           RTI_DATA1_MEM_RX_URNG_B(0x10) |
-           RTI_DATA1_MEM_RX_URNG_C(0x30) | RTI_DATA1_MEM_RX_TIMER_AC_EN;
+       if (nic->config.bimodal) {
+               int k = 0;
+               for (k = 0; k < config->rx_ring_num; k++) {
+                       val64 = TTI_CMD_MEM_WE | TTI_CMD_MEM_STROBE_NEW_CMD;
+                       val64 |= TTI_CMD_MEM_OFFSET(0x38+k);
+                       writeq(val64, &bar0->tti_command_mem);
+
+               /*
+                * Once the operation completes, the Strobe bit of the command
+                * register will be reset. We poll for this particular condition
+                * We wait for a maximum of 500ms for the operation to complete,
+                * if it's not complete by then we return error.
+               */
+                       time = 0;
+                       while (TRUE) {
+                               val64 = readq(&bar0->tti_command_mem);
+                               if (!(val64 & TTI_CMD_MEM_STROBE_NEW_CMD)) {
+                                       break;
+                               }
+                               if (time > 10) {
+                                       DBG_PRINT(ERR_DBG,
+                                               "%s: TTI init Failed\n",
+                                       dev->name);
+                                       return -1;
+                               }
+                               time++;
+                               msleep(50);
+                       }
+               }
+       } else {
  
-       writeq(val64, &bar0->rti_data1_mem);
+               /* RTI Initialization */
+               if (nic->device_type == XFRAME_II_DEVICE) {
+                       /*
+                        * Programmed to generate Apprx 500 Intrs per
+                        * second
+                        */
+                       int count = (nic->config.bus_speed * 125)/4;
+                       val64 = RTI_DATA1_MEM_RX_TIMER_VAL(count);
+               } else {
+                       val64 = RTI_DATA1_MEM_RX_TIMER_VAL(0xFFF);
+               }
+               val64 |= RTI_DATA1_MEM_RX_URNG_A(0xA) |
+                   RTI_DATA1_MEM_RX_URNG_B(0x10) |
+                   RTI_DATA1_MEM_RX_URNG_C(0x30) | RTI_DATA1_MEM_RX_TIMER_AC_EN;
  
-       val64 = RTI_DATA2_MEM_RX_UFC_A(0x1) |
-           RTI_DATA2_MEM_RX_UFC_B(0x2) |
-           RTI_DATA2_MEM_RX_UFC_C(0x40) | RTI_DATA2_MEM_RX_UFC_D(0x80);
-       writeq(val64, &bar0->rti_data2_mem);
+               writeq(val64, &bar0->rti_data1_mem);
  
-       val64 = RTI_CMD_MEM_WE | RTI_CMD_MEM_STROBE_NEW_CMD;
-       writeq(val64, &bar0->rti_command_mem);
+               val64 = RTI_DATA2_MEM_RX_UFC_A(0x1) |
+                   RTI_DATA2_MEM_RX_UFC_B(0x2) |
+                   RTI_DATA2_MEM_RX_UFC_C(0x40) | RTI_DATA2_MEM_RX_UFC_D(0x80);
+               writeq(val64, &bar0->rti_data2_mem);
  
-       /* 
-        * Once the operation completes, the Strobe bit of the command
-        * register will be reset. We poll for this particular condition
-        * We wait for a maximum of 500ms for the operation to complete,
-        * if it's not complete by then we return error.
-        */
-       time = 0;
-       while (TRUE) {
-               val64 = readq(&bar0->rti_command_mem);
-               if (!(val64 & TTI_CMD_MEM_STROBE_NEW_CMD)) {
-                       break;
-               }
-               if (time > 10) {
-                       DBG_PRINT(ERR_DBG, "%s: RTI init Failed\n",
-                                 dev->name);
-                       return -1;
+               for (i = 0; i < config->rx_ring_num; i++) {
+                       val64 = RTI_CMD_MEM_WE | RTI_CMD_MEM_STROBE_NEW_CMD
+                                       | RTI_CMD_MEM_OFFSET(i);
+                       writeq(val64, &bar0->rti_command_mem);
+
+                       /*
+                        * Once the operation completes, the Strobe bit of the
+                        * command register will be reset. We poll for this
+                        * particular condition. We wait for a maximum of 500ms
+                        * for the operation to complete, if it's not complete
+                        * by then we return error.
+                        */
+                       time = 0;
+                       while (TRUE) {
+                               val64 = readq(&bar0->rti_command_mem);
+                               if (!(val64 & RTI_CMD_MEM_STROBE_NEW_CMD)) {
+                                       break;
+                               }
+                               if (time > 10) {
+                                       DBG_PRINT(ERR_DBG, "%s: RTI init Failed\n",
+                                                 dev->name);
+                                       return -1;
+                               }
+                               time++;
+                               msleep(50);
+                       }
                 }
-               time++;
-               msleep(50);
         }
  
-       /* 
-        * Initializing proper values as Pause threshold into all 
+       /*
+        * Initializing proper values as Pause threshold into all
          * the 8 Queues on Rx side.
          */
         writeq(0xffbbffbbffbbffbbULL, &bar0->mc_pause_thresh_q0q3);
         writeq(0xffbbffbbffbbffbbULL, &bar0->mc_pause_thresh_q4q7);
  
         /* Disable RMAC PAD STRIPPING */
-       add = &bar0->mac_cfg;
+       add = (void *) &bar0->mac_cfg;
         val64 = readq(&bar0->mac_cfg);
         val64 &= ~(MAC_CFG_RMAC_STRIP_PAD);
         writeq(RMAC_CFG_KEY(0x4C0D), &bar0->rmac_cfg_key);
@@ -955,8 +1412,8 @@ static int init_nic(struct s2io_nic *nic)
         writel((u32) (val64 >> 32), (add + 4));
         val64 = readq(&bar0->mac_cfg);
  
-       /* 
-        * Set the time value to be inserted in the pause frame 
+       /*
+        * Set the time value to be inserted in the pause frame
          * generated by xena.
          */
         val64 = readq(&bar0->rmac_pause_cfg);
@@ -964,7 +1421,7 @@ static int init_nic(struct s2io_nic *nic)
         val64 |= RMAC_PAUSE_HG_PTIME(nic->mac_control.rmac_pause_time);
         writeq(val64, &bar0->rmac_pause_cfg);
  
-       /* 
+       /*
          * Set the Threshold Limit for Generating the pause frame
          * If the amount of data in any Queue exceeds ratio of
          * (mac_control.mc_pause_threshold_q0q3 or q4q7)/256
@@ -988,25 +1445,54 @@ static int init_nic(struct s2io_nic *nic)
         }
         writeq(val64, &bar0->mc_pause_thresh_q4q7);
  
-       /* 
-        * TxDMA will stop Read request if the number of read split has 
+       /*
+        * TxDMA will stop Read request if the number of read split has
          * exceeded the limit pointed by shared_splits
          */
         val64 = readq(&bar0->pic_control);
         val64 |= PIC_CNTL_SHARED_SPLITS(shared_splits);
         writeq(val64, &bar0->pic_control);
  
+       /*
+        * Programming the Herc to split every write transaction
+        * that does not start on an ADB to reduce disconnects.
+        */
+       if (nic->device_type == XFRAME_II_DEVICE) {
+               val64 = WREQ_SPLIT_MASK_SET_MASK(255);
+               writeq(val64, &bar0->wreq_split_mask);
+       }
+
+       /* Setting Link stability period to 64 ms */ 
+       if (nic->device_type == XFRAME_II_DEVICE) {
+               val64 = MISC_LINK_STABILITY_PRD(3);
+               writeq(val64, &bar0->misc_control);
+       }
+
         return SUCCESS;
  }
+#define LINK_UP_DOWN_INTERRUPT         1
+#define MAC_RMAC_ERR_TIMER             2
  
-/**  
- *  en_dis_able_nic_intrs - Enable or Disable the interrupts 
+#if defined(CONFIG_MSI_MODE) || defined(CONFIG_MSIX_MODE)
+#define s2io_link_fault_indication(x) MAC_RMAC_ERR_TIMER
+#else
+int s2io_link_fault_indication(nic_t *nic)
+{
+       if (nic->device_type == XFRAME_II_DEVICE)
+               return LINK_UP_DOWN_INTERRUPT;
+       else
+               return MAC_RMAC_ERR_TIMER;
+}
+#endif
+
+/**
+ *  en_dis_able_nic_intrs - Enable or Disable the interrupts
   *  @nic: device private variable,
   *  @mask: A mask indicating which Intr block must be modified and,
   *  @flag: A flag indicating whether to enable or disable the Intrs.
   *  Description: This function will either disable or enable the interrupts
- *  depending on the flag argument. The mask argument can be used to 
- *  enable/disable any Intr block. 
+ *  depending on the flag argument. The mask argument can be used to
+ *  enable/disable any Intr block.
   *  Return Value: NONE.
   */
  
@@ -1024,20 +1510,31 @@ static void en_dis_able_nic_intrs(struct s2io_nic *nic, u16 mask, int flag)
                         temp64 = readq(&bar0->general_int_mask);
                         temp64 &= ~((u64) val64);
                         writeq(temp64, &bar0->general_int_mask);
-                       /*  
-                        * Disabled all PCIX, Flash, MDIO, IIC and GPIO
-                        * interrupts for now. 
-                        * TODO 
+                       /*
+                        * If Hercules adapter enable GPIO otherwise
+                        * disabled all PCIX, Flash, MDIO, IIC and GPIO
+                        * interrupts for now.
+                        * TODO
                          */
-                       writeq(DISABLE_ALL_INTRS, &bar0->pic_int_mask);
-                       /* 
+                       if (s2io_link_fault_indication(nic) ==
+                                       LINK_UP_DOWN_INTERRUPT ) {
+                               temp64 = readq(&bar0->pic_int_mask);
+                               temp64 &= ~((u64) PIC_INT_GPIO);
+                               writeq(temp64, &bar0->pic_int_mask);
+                               temp64 = readq(&bar0->gpio_int_mask);
+                               temp64 &= ~((u64) GPIO_INT_MASK_LINK_UP);
+                               writeq(temp64, &bar0->gpio_int_mask);
+                       } else {
+                               writeq(DISABLE_ALL_INTRS, &bar0->pic_int_mask);
+                       }
+                       /*
                          * No MSI Support is available presently, so TTI and
                          * RTI interrupts are also disabled.
                          */
                 } else if (flag == DISABLE_INTRS) {
-                       /*  
-                        * Disable PIC Intrs in the general 
-                        * intr mask register 
+                       /*
+                        * Disable PIC Intrs in the general
+                        * intr mask register
                          */
                         writeq(DISABLE_ALL_INTRS, &bar0->pic_int_mask);
                         temp64 = readq(&bar0->general_int_mask);
@@ -1055,27 +1552,27 @@ static void en_dis_able_nic_intrs(struct s2io_nic *nic, u16 mask, int flag)
                         temp64 = readq(&bar0->general_int_mask);
                         temp64 &= ~((u64) val64);
                         writeq(temp64, &bar0->general_int_mask);
-                       /* 
-                        * Keep all interrupts other than PFC interrupt 
+                       /*
+                        * Keep all interrupts other than PFC interrupt
                          * and PCC interrupt disabled in DMA level.
                          */
                         val64 = DISABLE_ALL_INTRS & ~(TXDMA_PFC_INT_M |
                                                       TXDMA_PCC_INT_M);
                         writeq(val64, &bar0->txdma_int_mask);
-                       /* 
-                        * Enable only the MISC error 1 interrupt in PFC block 
+                       /*
+                        * Enable only the MISC error 1 interrupt in PFC block
                          */
                         val64 = DISABLE_ALL_INTRS & (~PFC_MISC_ERR_1);
                         writeq(val64, &bar0->pfc_err_mask);
-                       /* 
-                        * Enable only the FB_ECC error interrupt in PCC block 
+                       /*
+                        * Enable only the FB_ECC error interrupt in PCC block
                          */
                         val64 = DISABLE_ALL_INTRS & (~PCC_FB_ECC_ERR);
                         writeq(val64, &bar0->pcc_err_mask);
                 } else if (flag == DISABLE_INTRS) {
-                       /* 
-                        * Disable TxDMA Intrs in the general intr mask 
-                        * register 
+                       /*
+                        * Disable TxDMA Intrs in the general intr mask
+                        * register
                          */
                         writeq(DISABLE_ALL_INTRS, &bar0->txdma_int_mask);
                         writeq(DISABLE_ALL_INTRS, &bar0->pfc_err_mask);
@@ -1093,15 +1590,15 @@ static void en_dis_able_nic_intrs(struct s2io_nic *nic, u16 mask, int flag)
                         temp64 = readq(&bar0->general_int_mask);
                         temp64 &= ~((u64) val64);
                         writeq(temp64, &bar0->general_int_mask);
-                       /* 
-                        * All RxDMA block interrupts are disabled for now 
-                        * TODO 
+                       /*
+                        * All RxDMA block interrupts are disabled for now
+                        * TODO
                          */
                         writeq(DISABLE_ALL_INTRS, &bar0->rxdma_int_mask);
                 } else if (flag == DISABLE_INTRS) {
-                       /*  
-                        * Disable RxDMA Intrs in the general intr mask 
-                        * register 
+                       /*
+                        * Disable RxDMA Intrs in the general intr mask
+                        * register
                          */
                         writeq(DISABLE_ALL_INTRS, &bar0->rxdma_int_mask);
                         temp64 = readq(&bar0->general_int_mask);
@@ -1118,22 +1615,13 @@ static void en_dis_able_nic_intrs(struct s2io_nic *nic, u16 mask, int flag)
                         temp64 = readq(&bar0->general_int_mask);
                         temp64 &= ~((u64) val64);
                         writeq(temp64, &bar0->general_int_mask);
-                       /* 
-                        * All MAC block error interrupts are disabled for now 
-                        * except the link status change interrupt.
+                       /*
+                        * All MAC block error interrupts are disabled for now
                          * TODO
                          */
-                       val64 = MAC_INT_STATUS_RMAC_INT;
-                       temp64 = readq(&bar0->mac_int_mask);
-                       temp64 &= ~((u64) val64);
-                       writeq(temp64, &bar0->mac_int_mask);
-
-                       val64 = readq(&bar0->mac_rmac_err_mask);
-                       val64 &= ~((u64) RMAC_LINK_STATE_CHANGE_INT);
-                       writeq(val64, &bar0->mac_rmac_err_mask);
                 } else if (flag == DISABLE_INTRS) {
-                       /*  
-                        * Disable MAC Intrs in the general intr mask register 
+                       /*
+                        * Disable MAC Intrs in the general intr mask register
                          */
                         writeq(DISABLE_ALL_INTRS, &bar0->mac_int_mask);
                         writeq(DISABLE_ALL_INTRS,
@@ -1152,14 +1640,14 @@ static void en_dis_able_nic_intrs(struct s2io_nic *nic, u16 mask, int flag)
                         temp64 = readq(&bar0->general_int_mask);
                         temp64 &= ~((u64) val64);
                         writeq(temp64, &bar0->general_int_mask);
-                       /* 
+                       /*
                          * All XGXS block error interrupts are disabled for now
-                        * TODO 
+                        * TODO
                          */
                         writeq(DISABLE_ALL_INTRS, &bar0->xgxs_int_mask);
                 } else if (flag == DISABLE_INTRS) {
-                       /*  
-                        * Disable MC Intrs in the general intr mask register 
+                       /*
+                        * Disable MC Intrs in the general intr mask register
                          */
                         writeq(DISABLE_ALL_INTRS, &bar0->xgxs_int_mask);
                         temp64 = readq(&bar0->general_int_mask);
@@ -1175,11 +1663,11 @@ static void en_dis_able_nic_intrs(struct s2io_nic *nic, u16 mask, int flag)
                         temp64 = readq(&bar0->general_int_mask);
                         temp64 &= ~((u64) val64);
                         writeq(temp64, &bar0->general_int_mask);
-                       /* 
-                        * All MC block error interrupts are disabled for now
-                        * TODO 
+                       /*
+                        * Enable all MC Intrs.
                          */
-                       writeq(DISABLE_ALL_INTRS, &bar0->mc_int_mask);
+                       writeq(0x0, &bar0->mc_int_mask);
+                       writeq(0x0, &bar0->mc_err_mask);
                 } else if (flag == DISABLE_INTRS) {
                         /*
                          * Disable MC Intrs in the general intr mask register
@@ -1199,14 +1687,14 @@ static void en_dis_able_nic_intrs(struct s2io_nic *nic, u16 mask, int flag)
                         temp64 = readq(&bar0->general_int_mask);
                         temp64 &= ~((u64) val64);
                         writeq(temp64, &bar0->general_int_mask);
-                       /* 
+                       /*
                          * Enable all the Tx side interrupts
-                        * writing 0 Enables all 64 TX interrupt levels 
+                        * writing 0 Enables all 64 TX interrupt levels
                          */
                         writeq(0x0, &bar0->tx_traffic_mask);
                 } else if (flag == DISABLE_INTRS) {
-                       /* 
-                        * Disable Tx Traffic Intrs in the general intr mask 
+                       /*
+                        * Disable Tx Traffic Intrs in the general intr mask
                          * register.
                          */
                         writeq(DISABLE_ALL_INTRS, &bar0->tx_traffic_mask);
@@ -1226,8 +1714,8 @@ static void en_dis_able_nic_intrs(struct s2io_nic *nic, u16 mask, int flag)
                         /* writing 0 Enables all 8 RX interrupt levels */
                         writeq(0x0, &bar0->rx_traffic_mask);
                 } else if (flag == DISABLE_INTRS) {
-                       /*  
-                        * Disable Rx Traffic Intrs in the general intr mask 
+                       /*
+                        * Disable Rx Traffic Intrs in the general intr mask
                          * register.
                          */
                         writeq(DISABLE_ALL_INTRS, &bar0->rx_traffic_mask);
@@ -1238,24 +1726,66 @@ static void en_dis_able_nic_intrs(struct s2io_nic *nic, u16 mask, int flag)
         }
  }
  
-/**  
- *  verify_xena_quiescence - Checks whether the H/W is ready 
+static int check_prc_pcc_state(u64 val64, int flag, int rev_id, int herc)
+{
+       int ret = 0;
+
+       if (flag == FALSE) {
+               if ((!herc && (rev_id >= 4)) || herc) {
+                       if (!(val64 & ADAPTER_STATUS_RMAC_PCC_IDLE) &&
+                           ((val64 & ADAPTER_STATUS_RC_PRC_QUIESCENT) ==
+                            ADAPTER_STATUS_RC_PRC_QUIESCENT)) {
+                               ret = 1;
+                       }
+               }else {
+                       if (!(val64 & ADAPTER_STATUS_RMAC_PCC_FOUR_IDLE) &&
+                           ((val64 & ADAPTER_STATUS_RC_PRC_QUIESCENT) ==
+                            ADAPTER_STATUS_RC_PRC_QUIESCENT)) {
+                               ret = 1;
+                       }
+               }
+       } else {
+               if ((!herc && (rev_id >= 4)) || herc) {
+                       if (((val64 & ADAPTER_STATUS_RMAC_PCC_IDLE) ==
+                            ADAPTER_STATUS_RMAC_PCC_IDLE) &&
+                           (!(val64 & ADAPTER_STATUS_RC_PRC_QUIESCENT) ||
+                            ((val64 & ADAPTER_STATUS_RC_PRC_QUIESCENT) ==
+                             ADAPTER_STATUS_RC_PRC_QUIESCENT))) {
+                               ret = 1;
+                       }
+               } else {
+                       if (((val64 & ADAPTER_STATUS_RMAC_PCC_FOUR_IDLE) ==
+                            ADAPTER_STATUS_RMAC_PCC_FOUR_IDLE) &&
+                           (!(val64 & ADAPTER_STATUS_RC_PRC_QUIESCENT) ||
+                            ((val64 & ADAPTER_STATUS_RC_PRC_QUIESCENT) ==
+                             ADAPTER_STATUS_RC_PRC_QUIESCENT))) {
+                               ret = 1;
+                       }
+               }
+       }
+
+       return ret;
+}
+/**
+ *  verify_xena_quiescence - Checks whether the H/W is ready
   *  @val64 :  Value read from adapter status register.
   *  @flag : indicates if the adapter enable bit was ever written once
   *  before.
   *  Description: Returns whether the H/W is ready to go or not. Depending
- *  on whether adapter enable bit was written or not the comparison 
+ *  on whether adapter enable bit was written or not the comparison
   *  differs and the calling function passes the input argument flag to
   *  indicate this.
- *  Return: 1 If xena is quiescence 
+ *  Return: 1 If xena is quiescence
   *          0 If Xena is not quiescence
   */
  
-static int verify_xena_quiescence(u64 val64, int flag)
+static int verify_xena_quiescence(nic_t *sp, u64 val64, int flag)
  {
-       int ret = 0;
+       int ret = 0, herc;
         u64 tmp64 = ~((u64) val64);
+       int rev_id = get_xena_rev_id(sp->pdev);
  
+       herc = (sp->device_type == XFRAME_II_DEVICE);
         if (!
             (tmp64 &
              (ADAPTER_STATUS_TDMA_READY | ADAPTER_STATUS_RDMA_READY |
@@ -1263,25 +1793,7 @@ static int verify_xena_quiescence(u64 val64, int flag)
               ADAPTER_STATUS_PIC_QUIESCENT | ADAPTER_STATUS_MC_DRAM_READY |
               ADAPTER_STATUS_MC_QUEUES_READY | ADAPTER_STATUS_M_PLL_LOCK |
               ADAPTER_STATUS_P_PLL_LOCK))) {
-               if (flag == FALSE) {
-                       if (!(val64 & ADAPTER_STATUS_RMAC_PCC_IDLE) &&
-                           ((val64 & ADAPTER_STATUS_RC_PRC_QUIESCENT) ==
-                            ADAPTER_STATUS_RC_PRC_QUIESCENT)) {
-
-                               ret = 1;
-
-                       }
-               } else {
-                       if (((val64 & ADAPTER_STATUS_RMAC_PCC_IDLE) ==
-                            ADAPTER_STATUS_RMAC_PCC_IDLE) &&
-                           (!(val64 & ADAPTER_STATUS_RC_PRC_QUIESCENT) ||
-                            ((val64 & ADAPTER_STATUS_RC_PRC_QUIESCENT) ==
-                             ADAPTER_STATUS_RC_PRC_QUIESCENT))) {
-
-                               ret = 1;
-
-                       }
-               }
+               ret = check_prc_pcc_state(val64, flag, rev_id, herc);
         }
  
         return ret;
@@ -1290,12 +1802,12 @@ static int verify_xena_quiescence(u64 val64, int flag)
  /**
   * fix_mac_address -  Fix for Mac addr problem on Alpha platforms
   * @sp: Pointer to device specifc structure
- * Description : 
+ * Description :
   * New procedure to clear mac address reading  problems on Alpha platforms
   *
   */
  
-static void fix_mac_address(nic_t * sp)
+void fix_mac_address(nic_t * sp)
  {
         XENA_dev_config_t __iomem *bar0 = sp->bar0;
         u64 val64;
@@ -1303,20 +1815,21 @@ static void fix_mac_address(nic_t * sp)
  
         while (fix_mac[i] != END_SIGN) {
                 writeq(fix_mac[i++], &bar0->gpio_control);
+               udelay(10);
                 val64 = readq(&bar0->gpio_control);
         }
  }
  
  /**
- *  start_nic - Turns the device on   
+ *  start_nic - Turns the device on
   *  @nic : device private variable.
- *  Description: 
- *  This function actually turns the device on. Before this  function is 
- *  called,all Registers are configured from their reset states 
- *  and shared memory is allocated but the NIC is still quiescent. On 
+ *  Description:
+ *  This function actually turns the device on. Before this  function is
+ *  called,all Registers are configured from their reset states
+ *  and shared memory is allocated but the NIC is still quiescent. On
   *  calling this function, the device interrupts are cleared and the NIC is
   *  literally switched on by writing into the adapter control register.
- *  Return Value: 
+ *  Return Value:
   *  SUCCESS on success and -1 on failure.
   */
  
@@ -1325,8 +1838,8 @@ static int start_nic(struct s2io_nic *nic)
         XENA_dev_config_t __iomem *bar0 = nic->bar0;
         struct net_device *dev = nic->dev;
         register u64 val64 = 0;
-       u16 interruptible, i;
-       u16 subid;
+       u16 interruptible;
+       u16 subid, i;
         mac_info_t *mac_control;
         struct config_param *config;
  
@@ -1335,10 +1848,12 @@ static int start_nic(struct s2io_nic *nic)
  
         /*  PRC Initialization and configuration */
         for (i = 0; i < config->rx_ring_num; i++) {
-               writeq((u64) nic->rx_blocks[i][0].block_dma_addr,
+               writeq((u64) mac_control->rings[i].rx_blocks[0].block_dma_addr,
                        &bar0->prc_rxd0_n[i]);
  
                 val64 = readq(&bar0->prc_ctrl_n[i]);
+               if (nic->config.bimodal)
+                       val64 |= PRC_CTRL_BIMODAL_INTERRUPT;
  #ifndef CONFIG_2BUFF_MODE
                 val64 |= PRC_CTRL_RC_ENABLED;
  #else
@@ -1354,7 +1869,7 @@ static int start_nic(struct s2io_nic *nic)
         writeq(val64, &bar0->rx_pa_cfg);
  #endif
  
-       /* 
+       /*
          * Enabling MC-RLDRAM. After enabling the device, we timeout
          * for around 100ms, which is approximately the time required
          * for the device to be ready for operation.
@@ -1364,27 +1879,27 @@ static int start_nic(struct s2io_nic *nic)
         SPECIAL_REG_WRITE(val64, &bar0->mc_rldram_mrs, UF);
         val64 = readq(&bar0->mc_rldram_mrs);
  
-       msleep(100);                    /* Delay by around 100 ms. */
+       msleep(100);    /* Delay by around 100 ms. */
  
         /* Enabling ECC Protection. */
         val64 = readq(&bar0->adapter_control);
         val64 &= ~ADAPTER_ECC_EN;
         writeq(val64, &bar0->adapter_control);
  
-       /* 
-        * Clearing any possible Link state change interrupts that 
+       /*
+        * Clearing any possible Link state change interrupts that
          * could have popped up just before Enabling the card.
          */
         val64 = readq(&bar0->mac_rmac_err_reg);
         if (val64)
                 writeq(val64, &bar0->mac_rmac_err_reg);
  
-       /* 
-        * Verify if the device is ready to be enabled, if so enable 
+       /*
+        * Verify if the device is ready to be enabled, if so enable
          * it.
          */
         val64 = readq(&bar0->adapter_status);
-       if (!verify_xena_quiescence(val64, nic->device_enabled_once)) {
+       if (!verify_xena_quiescence(nic, val64, nic->device_enabled_once)) {
                 DBG_PRINT(ERR_DBG, "%s: device is not ready, ", dev->name);
                 DBG_PRINT(ERR_DBG, "Adapter status reads: 0x%llx\n",
                           (unsigned long long) val64);
@@ -1392,16 +1907,18 @@ static int start_nic(struct s2io_nic *nic)
         }
  
         /*  Enable select interrupts */
-       interruptible = TX_TRAFFIC_INTR | RX_TRAFFIC_INTR | TX_MAC_INTR |
-           RX_MAC_INTR;
+       interruptible = TX_TRAFFIC_INTR | RX_TRAFFIC_INTR;
+       interruptible |= TX_PIC_INTR | RX_PIC_INTR;
+       interruptible |= TX_MAC_INTR | RX_MAC_INTR;
+
         en_dis_able_nic_intrs(nic, interruptible, ENABLE_INTRS);
  
-       /* 
+       /*
          * With some switches, link might be already up at this point.
-        * Because of this weird behavior, when we enable laser, 
-        * we may not get link. We need to handle this. We cannot 
-        * figure out which switch is misbehaving. So we are forced to 
-        * make a global change. 
+        * Because of this weird behavior, when we enable laser,
+        * we may not get link. We need to handle this. We cannot
+        * figure out which switch is misbehaving. So we are forced to
+        * make a global change.
          */
  
         /* Enabling Laser. */
@@ -1411,44 +1928,30 @@ static int start_nic(struct s2io_nic *nic)
  
         /* SXE-002: Initialize link and activity LED */
         subid = nic->pdev->subsystem_device;
-       if ((subid & 0xFF) >= 0x07) {
+       if (((subid & 0xFF) >= 0x07) &&
+           (nic->device_type == XFRAME_I_DEVICE)) {
                 val64 = readq(&bar0->gpio_control);
                 val64 |= 0x0000800000000000ULL;
                 writeq(val64, &bar0->gpio_control);
                 val64 = 0x0411040400000000ULL;
-               writeq(val64, (void __iomem *) bar0 + 0x2700);
+               writeq(val64, (void __iomem *) ((u8 *) bar0 + 0x2700));
         }
  
-       /* 
-        * Don't see link state interrupts on certain switches, so 
+       /*
+        * Don't see link state interrupts on certain switches, so
          * directly scheduling a link state task from here.
          */
         schedule_work(&nic->set_link_task);
  
-       /* 
-        * Here we are performing soft reset on XGXS to 
-        * force link down. Since link is already up, we will get
-        * link state change interrupt after this reset
-        */
-       SPECIAL_REG_WRITE(0x80010515001E0000ULL, &bar0->dtx_control, UF);
-       val64 = readq(&bar0->dtx_control);
-       udelay(50);
-       SPECIAL_REG_WRITE(0x80010515001E00E0ULL, &bar0->dtx_control, UF);
-       val64 = readq(&bar0->dtx_control);
-       udelay(50);
-       SPECIAL_REG_WRITE(0x80070515001F00E4ULL, &bar0->dtx_control, UF);
-       val64 = readq(&bar0->dtx_control);
-       udelay(50);
-
         return SUCCESS;
  }
  
-/** 
- *  free_tx_buffers - Free all queued Tx buffers 
+/**
+ *  free_tx_buffers - Free all queued Tx buffers
   *  @nic : device private variable.
- *  Description: 
+ *  Description:
   *  Free all queued Tx buffers.
- *  Return Value: void 
+ *  Return Value: void
  */
  
  static void free_tx_buffers(struct s2io_nic *nic)
@@ -1459,39 +1962,61 @@ static void free_tx_buffers(struct s2io_nic *nic)
         int i, j;
         mac_info_t *mac_control;
         struct config_param *config;
-       int cnt = 0;
+       int cnt = 0, frg_cnt;
  
         mac_control = &nic->mac_control;
         config = &nic->config;
  
         for (i = 0; i < config->tx_fifo_num; i++) {
                 for (j = 0; j < config->tx_cfg[i].fifo_len - 1; j++) {
-                       txdp = (TxD_t *) nic->list_info[i][j].
+                       txdp = (TxD_t *) mac_control->fifos[i].list_info[j].
                             list_virt_addr;
                         skb =
                             (struct sk_buff *) ((unsigned long) txdp->
                                                 Host_Control);
                         if (skb == NULL) {
-                               memset(txdp, 0, sizeof(TxD_t));
+                               memset(txdp, 0, sizeof(TxD_t) *
+                                      config->max_txds);
                                 continue;
                         }
+                       frg_cnt = skb_shinfo(skb)->nr_frags;
+                       pci_unmap_single(nic->pdev, (dma_addr_t)
+                                        txdp->Buffer_Pointer,
+                                        skb->len - skb->data_len,
+                                        PCI_DMA_TODEVICE);
+                       if (frg_cnt) {
+                               TxD_t *temp;
+                               temp = txdp;
+                               txdp++;
+                               for (j = 0; j < frg_cnt; j++, txdp++) {
+                                       skb_frag_t *frag =
+                                           &skb_shinfo(skb)->frags[j];
+                                       pci_unmap_page(nic->pdev,
+                                                      (dma_addr_t)
+                                                      txdp->
+                                                      Buffer_Pointer,
+                                                      frag->size,
+                                                      PCI_DMA_TODEVICE);
+                               }
+                               txdp = temp;
+                       }
                         dev_kfree_skb(skb);
-                       memset(txdp, 0, sizeof(TxD_t));
+                       memset(txdp, 0, sizeof(TxD_t) * config->max_txds);
                         cnt++;
                 }
                 DBG_PRINT(INTR_DBG,
                           "%s:forcibly freeing %d skbs on FIFO%d\n",
                           dev->name, cnt, i);
-               mac_control->tx_curr_get_info[i].offset = 0;
-               mac_control->tx_curr_put_info[i].offset = 0;
+               mac_control->fifos[i].tx_curr_get_info.offset = 0;
+               mac_control->fifos[i].tx_curr_put_info.offset = 0;
         }
  }
  
-/**  
- *   stop_nic -  To stop the nic  
+/**
+ *   stop_nic -  To stop the nic
   *   @nic ; device private variable.
- *   Description: 
- *   This function does exactly the opposite of what the start_nic() 
+ *   Description:
+ *   This function does exactly the opposite of what the start_nic()
   *   function does. This function is called to stop the device.
   *   Return Value:
   *   void.
@@ -1509,8 +2034,9 @@ static void stop_nic(struct s2io_nic *nic)
         config = &nic->config;
  
         /*  Disable all interrupts */
-       interruptible = TX_TRAFFIC_INTR | RX_TRAFFIC_INTR | TX_MAC_INTR |
-           RX_MAC_INTR;
+       interruptible = TX_TRAFFIC_INTR | RX_TRAFFIC_INTR;
+       interruptible |= TX_PIC_INTR | RX_PIC_INTR;
+       interruptible |= TX_MAC_INTR | RX_MAC_INTR;
         en_dis_able_nic_intrs(nic, interruptible, DISABLE_INTRS);
  
         /*  Disable PRCs */
@@ -1521,11 +2047,11 @@ static void stop_nic(struct s2io_nic *nic)
         }
  }
  
-/**  
- *  fill_rx_buffers - Allocates the Rx side skbs 
+/**
+ *  fill_rx_buffers - Allocates the Rx side skbs
   *  @nic:  device private variable
- *  @ring_no: ring number 
- *  Description: 
+ *  @ring_no: ring number
+ *  Description:
   *  The function allocates Rx side skbs and puts the physical
   *  address of these buffers into the RxD buffer pointers, so that the NIC
   *  can DMA the received frame into these locations.
@@ -1533,8 +2059,8 @@ static void stop_nic(struct s2io_nic *nic)
   *  1. single buffer,
   *  2. three buffer and
   *  3. Five buffer modes.
- *  Each mode defines how many fragments the received frame will be split 
- *  up into by the NIC. The frame is split into L3 header, L4 Header, 
+ *  Each mode defines how many fragments the received frame will be split
+ *  up into by the NIC. The frame is split into L3 header, L4 Header,
   *  L4 payload in three buffer mode and in 5 buffer mode, L4 payload itself
   *  is split into 3 fragments. As of now only single buffer mode is
   *  supported.
@@ -1542,7 +2068,7 @@ static void stop_nic(struct s2io_nic *nic)
   *  SUCCESS on success or an appropriate -ve value on failure.
   */
  
-static int fill_rx_buffers(struct s2io_nic *nic, int ring_no)
+int fill_rx_buffers(struct s2io_nic *nic, int ring_no)
  {
         struct net_device *dev = nic->dev;
         struct sk_buff *skb;
@@ -1550,34 +2076,35 @@ static int fill_rx_buffers(struct s2io_nic *nic, int ring_no)
         int off, off1, size, block_no, block_no1;
         int offset, offset1;
         u32 alloc_tab = 0;
-       u32 alloc_cnt = nic->pkt_cnt[ring_no] -
-           atomic_read(&nic->rx_bufs_left[ring_no]);
+       u32 alloc_cnt;
         mac_info_t *mac_control;
         struct config_param *config;
  #ifdef CONFIG_2BUFF_MODE
         RxD_t *rxdpnext;
         int nextblk;
-       unsigned long tmp;
+       u64 tmp;
         buffAdd_t *ba;
         dma_addr_t rxdpphys;
  #endif
  #ifndef CONFIG_S2IO_NAPI
         unsigned long flags;
  #endif
+       RxD_t *first_rxdp = NULL;
  
         mac_control = &nic->mac_control;
         config = &nic->config;
-
+       alloc_cnt = mac_control->rings[ring_no].pkt_cnt -
+           atomic_read(&nic->rx_bufs_left[ring_no]);
         size = dev->mtu + HEADER_ETHERNET_II_802_3_SIZE +
             HEADER_802_2_SIZE + HEADER_SNAP_SIZE;
  
         while (alloc_tab < alloc_cnt) {
-               block_no = mac_control->rx_curr_put_info[ring_no].
+               block_no = mac_control->rings[ring_no].rx_curr_put_info.
                     block_index;
-               block_no1 = mac_control->rx_curr_get_info[ring_no].
+               block_no1 = mac_control->rings[ring_no].rx_curr_get_info.
                     block_index;
-               off = mac_control->rx_curr_put_info[ring_no].offset;
-               off1 = mac_control->rx_curr_get_info[ring_no].offset;
+               off = mac_control->rings[ring_no].rx_curr_put_info.offset;
+               off1 = mac_control->rings[ring_no].rx_curr_get_info.offset;
  #ifndef CONFIG_2BUFF_MODE
                 offset = block_no * (MAX_RXDS_PER_BLOCK + 1) + off;
                 offset1 = block_no1 * (MAX_RXDS_PER_BLOCK + 1) + off1;
@@ -1586,7 +2113,7 @@ static int fill_rx_buffers(struct s2io_nic *nic, int ring_no)
                 offset1 = block_no1 * (MAX_RXDS_PER_BLOCK) + off1;
  #endif
  
-               rxdp = nic->rx_blocks[ring_no][block_no].
+               rxdp = mac_control->rings[ring_no].rx_blocks[block_no].
                     block_virt_addr + off;
                 if ((offset == offset1) && (rxdp->Host_Control)) {
                         DBG_PRINT(INTR_DBG, "%s: Get and Put", dev->name);
@@ -1595,15 +2122,15 @@ static int fill_rx_buffers(struct s2io_nic *nic, int ring_no)
                 }
  #ifndef        CONFIG_2BUFF_MODE
                 if (rxdp->Control_1 == END_OF_BLOCK) {
-                       mac_control->rx_curr_put_info[ring_no].
+                       mac_control->rings[ring_no].rx_curr_put_info.
                             block_index++;
-                       mac_control->rx_curr_put_info[ring_no].
-                           block_index %= nic->block_count[ring_no];
-                       block_no = mac_control->rx_curr_put_info
-                           [ring_no].block_index;
+                       mac_control->rings[ring_no].rx_curr_put_info.
+                           block_index %= mac_control->rings[ring_no].block_count;
+                       block_no = mac_control->rings[ring_no].rx_curr_put_info.
+                               block_index;
                         off++;
                         off %= (MAX_RXDS_PER_BLOCK + 1);
-                       mac_control->rx_curr_put_info[ring_no].offset =
+                       mac_control->rings[ring_no].rx_curr_put_info.offset =
                             off;
                         rxdp = (RxD_t *) ((unsigned long) rxdp->Control_2);
                         DBG_PRINT(INTR_DBG, "%s: Next block at: %p\n",
@@ -1611,30 +2138,30 @@ static int fill_rx_buffers(struct s2io_nic *nic, int ring_no)
                 }
  #ifndef CONFIG_S2IO_NAPI
                 spin_lock_irqsave(&nic->put_lock, flags);
-               nic->put_pos[ring_no] =
+               mac_control->rings[ring_no].put_pos =
                     (block_no * (MAX_RXDS_PER_BLOCK + 1)) + off;
                 spin_unlock_irqrestore(&nic->put_lock, flags);
  #endif
  #else
                 if (rxdp->Host_Control == END_OF_BLOCK) {
-                       mac_control->rx_curr_put_info[ring_no].
+                       mac_control->rings[ring_no].rx_curr_put_info.
                             block_index++;
-                       mac_control->rx_curr_put_info[ring_no].
-                           block_index %= nic->block_count[ring_no];
-                       block_no = mac_control->rx_curr_put_info
-                           [ring_no].block_index;
+                       mac_control->rings[ring_no].rx_curr_put_info.block_index
+                           %= mac_control->rings[ring_no].block_count;
+                       block_no = mac_control->rings[ring_no].rx_curr_put_info
+                           .block_index;
                         off = 0;
                         DBG_PRINT(INTR_DBG, "%s: block%d at: 0x%llx\n",
                                   dev->name, block_no,
                                   (unsigned long long) rxdp->Control_1);
-                       mac_control->rx_curr_put_info[ring_no].offset =
+                       mac_control->rings[ring_no].rx_curr_put_info.offset =
                             off;
-                       rxdp = nic->rx_blocks[ring_no][block_no].
+                       rxdp = mac_control->rings[ring_no].rx_blocks[block_no].
                             block_virt_addr;
                 }
  #ifndef CONFIG_S2IO_NAPI
                 spin_lock_irqsave(&nic->put_lock, flags);
-               nic->put_pos[ring_no] = (block_no *
+               mac_control->rings[ring_no].put_pos = (block_no *
                                          (MAX_RXDS_PER_BLOCK + 1)) + off;
                 spin_unlock_irqrestore(&nic->put_lock, flags);
  #endif
@@ -1646,27 +2173,27 @@ static int fill_rx_buffers(struct s2io_nic *nic, int ring_no)
                 if (rxdp->Control_2 & BIT(0))
  #endif
                 {
-                       mac_control->rx_curr_put_info[ring_no].
+                       mac_control->rings[ring_no].rx_curr_put_info.
                             offset = off;
                         goto end;
                 }
  #ifdef CONFIG_2BUFF_MODE
-               /* 
-                * RxDs Spanning cache lines will be replenished only 
-                * if the succeeding RxD is also owned by Host. It 
-                * will always be the ((8*i)+3) and ((8*i)+6) 
-                * descriptors for the 48 byte descriptor. The offending 
+               /*
+                * RxDs Spanning cache lines will be replenished only
+                * if the succeeding RxD is also owned by Host. It
+                * will always be the ((8*i)+3) and ((8*i)+6)
+                * descriptors for the 48 byte descriptor. The offending
                  * decsriptor is of-course the 3rd descriptor.
                  */
-               rxdpphys = nic->rx_blocks[ring_no][block_no].
+               rxdpphys = mac_control->rings[ring_no].rx_blocks[block_no].
                     block_dma_addr + (off * sizeof(RxD_t));
                 if (((u64) (rxdpphys)) % 128 > 80) {
-                       rxdpnext = nic->rx_blocks[ring_no][block_no].
+                       rxdpnext = mac_control->rings[ring_no].rx_blocks[block_no].
                             block_virt_addr + (off + 1);
                         if (rxdpnext->Host_Control == END_OF_BLOCK) {
                                 nextblk = (block_no + 1) %
-                                   (nic->block_count[ring_no]);
-                               rxdpnext = nic->rx_blocks[ring_no]
+                                   (mac_control->rings[ring_no].block_count);
+                               rxdpnext = mac_control->rings[ring_no].rx_blocks
                                     [nextblk].block_virt_addr;
                         }
                         if (rxdpnext->Control_2 & BIT(0))
@@ -1682,6 +2209,10 @@ static int fill_rx_buffers(struct s2io_nic *nic, int ring_no)
                 if (!skb) {
                         DBG_PRINT(ERR_DBG, "%s: Out of ", dev->name);
                         DBG_PRINT(ERR_DBG, "memory to allocate SKBs\n");
+                       if (first_rxdp) {
+                               wmb();
+                               first_rxdp->Control_1 |= RXD_OWN_XENA;
+                       }
                         return -ENOMEM;
                 }
  #ifndef        CONFIG_2BUFF_MODE
@@ -1692,12 +2223,13 @@ static int fill_rx_buffers(struct s2io_nic *nic, int ring_no)
                 rxdp->Control_2 &= (~MASK_BUFFER0_SIZE);
                 rxdp->Control_2 |= SET_BUFFER0_SIZE(size);
                 rxdp->Host_Control = (unsigned long) (skb);
-               rxdp->Control_1 |= RXD_OWN_XENA;
+               if (alloc_tab & ((1 << rxsync_frequency) - 1))
+                       rxdp->Control_1 |= RXD_OWN_XENA;
                 off++;
                 off %= (MAX_RXDS_PER_BLOCK + 1);
-               mac_control->rx_curr_put_info[ring_no].offset = off;
+               mac_control->rings[ring_no].rx_curr_put_info.offset = off;
  #else
-               ba = &nic->ba[ring_no][block_no][off];
+               ba = &mac_control->rings[ring_no].ba[block_no][off];
                 skb_reserve(skb, BUF0_LEN);
                 tmp = ((unsigned long) skb->data & ALIGN_SIZE);
                 if (tmp)
@@ -1719,22 +2251,41 @@ static int fill_rx_buffers(struct s2io_nic *nic, int ring_no)
                 rxdp->Control_2 |= SET_BUFFER1_SIZE(1); /* dummy. */
                 rxdp->Control_2 |= BIT(0);      /* Set Buffer_Empty bit. */
                 rxdp->Host_Control = (u64) ((unsigned long) (skb));
-               rxdp->Control_1 |= RXD_OWN_XENA;
+               if (alloc_tab & ((1 << rxsync_frequency) - 1))
+                       rxdp->Control_1 |= RXD_OWN_XENA;
                 off++;
-               mac_control->rx_curr_put_info[ring_no].offset = off;
+               mac_control->rings[ring_no].rx_curr_put_info.offset = off;
  #endif
+               rxdp->Control_2 |= SET_RXD_MARKER;
+
+               if (!(alloc_tab & ((1 << rxsync_frequency) - 1))) {
+                       if (first_rxdp) {
+                               wmb();
+                               first_rxdp->Control_1 |= RXD_OWN_XENA;
+                       }
+                       first_rxdp = rxdp;
+               }
                 atomic_inc(&nic->rx_bufs_left[ring_no]);
                 alloc_tab++;
         }
  
        end:
+       /* Transfer ownership of first descriptor to adapter just before
+        * exiting. Before that, use memory barrier so that ownership
+        * and other fields are seen by adapter correctly.
+        */
+       if (first_rxdp) {
+               wmb();
+               first_rxdp->Control_1 |= RXD_OWN_XENA;
+       }
+
         return SUCCESS;
  }
  
  /**
- *  free_rx_buffers - Frees all Rx buffers   
+ *  free_rx_buffers - Frees all Rx buffers
   *  @sp: device private variable.
- *  Description: 
+ *  Description:
   *  This function will free all Rx buffers allocated by host.
   *  Return Value:
   *  NONE.
@@ -1758,7 +2309,8 @@ static void free_rx_buffers(struct s2io_nic *sp)
         for (i = 0; i < config->rx_ring_num; i++) {
                 for (j = 0, blk = 0; j < config->rx_cfg[i].num_rxd; j++) {
                         off = j % (MAX_RXDS_PER_BLOCK + 1);
-                       rxdp = sp->rx_blocks[i][blk].block_virt_addr + off;
+                       rxdp = mac_control->rings[i].rx_blocks[blk].
+                               block_virt_addr + off;
  
  #ifndef CONFIG_2BUFF_MODE
                         if (rxdp->Control_1 == END_OF_BLOCK) {
@@ -1793,7 +2345,7 @@ static void free_rx_buffers(struct s2io_nic *sp)
                                                  HEADER_SNAP_SIZE,
                                                  PCI_DMA_FROMDEVICE);
  #else
-                               ba = &sp->ba[i][blk][off];
+                               ba = &mac_control->rings[i].ba[blk][off];
                                 pci_unmap_single(sp->pdev, (dma_addr_t)
                                                  rxdp->Buffer0_ptr,
                                                  BUF0_LEN,
@@ -1813,10 +2365,10 @@ static void free_rx_buffers(struct s2io_nic *sp)
                         }
                         memset(rxdp, 0, sizeof(RxD_t));
                 }
-               mac_control->rx_curr_put_info[i].block_index = 0;
-               mac_control->rx_curr_get_info[i].block_index = 0;
-               mac_control->rx_curr_put_info[i].offset = 0;
-               mac_control->rx_curr_get_info[i].offset = 0;
+               mac_control->rings[i].rx_curr_put_info.block_index = 0;
+               mac_control->rings[i].rx_curr_get_info.block_index = 0;
+               mac_control->rings[i].rx_curr_put_info.offset = 0;
+               mac_control->rings[i].rx_curr_get_info.offset = 0;
                 atomic_set(&sp->rx_bufs_left[i], 0);
                 DBG_PRINT(INIT_DBG, "%s:Freed 0x%x Rx Buffers on ring%d\n",
                           dev->name, buf_cnt, i);
@@ -1826,7 +2378,7 @@ static void free_rx_buffers(struct s2io_nic *sp)
  /**
   * s2io_poll - Rx interrupt handler for NAPI support
   * @dev : pointer to the device structure.
- * @budget : The number of packets that were budgeted to be processed 
+ * @budget : The number of packets that were budgeted to be processed
   * during  one pass through the 'Poll" function.
   * Description:
   * Comes into picture only if NAPI support has been incorporated. It does
@@ -1836,160 +2388,36 @@ static void free_rx_buffers(struct s2io_nic *sp)
   * 0 on success and 1 if there are No Rx packets to be processed.
   */
  
-#ifdef CONFIG_S2IO_NAPI
+#if defined(CONFIG_S2IO_NAPI)
  static int s2io_poll(struct net_device *dev, int *budget)
  {
         nic_t *nic = dev->priv;
-       XENA_dev_config_t __iomem *bar0 = nic->bar0;
-       int pkts_to_process = *budget, pkt_cnt = 0;
-       register u64 val64 = 0;
-       rx_curr_get_info_t get_info, put_info;
-       int i, get_block, put_block, get_offset, put_offset, ring_bufs;
-#ifndef CONFIG_2BUFF_MODE
-       u16 val16, cksum;
-#endif
-       struct sk_buff *skb;
-       RxD_t *rxdp;
+       int pkt_cnt = 0, org_pkts_to_process;
         mac_info_t *mac_control;
         struct config_param *config;
-#ifdef CONFIG_2BUFF_MODE
-       buffAdd_t *ba;
-#endif
+       XENA_dev_config_t *bar0 = (XENA_dev_config_t *) nic->bar0;
+       u64 val64;
+       int i;
  
+       atomic_inc(&nic->isr_cnt);
         mac_control = &nic->mac_control;
         config = &nic->config;
  
-       if (pkts_to_process > dev->quota)
-               pkts_to_process = dev->quota;
+       nic->pkts_to_process = *budget;
+       if (nic->pkts_to_process > dev->quota)
+               nic->pkts_to_process = dev->quota;
+       org_pkts_to_process = nic->pkts_to_process;
  
         val64 = readq(&bar0->rx_traffic_int);
         writeq(val64, &bar0->rx_traffic_int);
  
         for (i = 0; i < config->rx_ring_num; i++) {
-               get_info = mac_control->rx_curr_get_info[i];
-               get_block = get_info.block_index;
-               put_info = mac_control->rx_curr_put_info[i];
-               put_block = put_info.block_index;
-               ring_bufs = config->rx_cfg[i].num_rxd;
-               rxdp = nic->rx_blocks[i][get_block].block_virt_addr +
-                   get_info.offset;
-#ifndef        CONFIG_2BUFF_MODE
-               get_offset = (get_block * (MAX_RXDS_PER_BLOCK + 1)) +
-                   get_info.offset;
-               put_offset = (put_block * (MAX_RXDS_PER_BLOCK + 1)) +
-                   put_info.offset;
-               while ((!(rxdp->Control_1 & RXD_OWN_XENA)) &&
-                      (((get_offset + 1) % ring_bufs) != put_offset)) {
-                       if (--pkts_to_process < 0) {
-                               goto no_rx;
-                       }
-                       if (rxdp->Control_1 == END_OF_BLOCK) {
-                               rxdp =
-                                   (RxD_t *) ((unsigned long) rxdp->
-                                              Control_2);
-                               get_info.offset++;
-                               get_info.offset %=
-                                   (MAX_RXDS_PER_BLOCK + 1);
-                               get_block++;
-                               get_block %= nic->block_count[i];
-                               mac_control->rx_curr_get_info[i].
-                                   offset = get_info.offset;
-                               mac_control->rx_curr_get_info[i].
-                                   block_index = get_block;
-                               continue;
-                       }
-                       get_offset =
-                           (get_block * (MAX_RXDS_PER_BLOCK + 1)) +
-                           get_info.offset;
-                       skb =
-                           (struct sk_buff *) ((unsigned long) rxdp->
-                                               Host_Control);
-                       if (skb == NULL) {
-                               DBG_PRINT(ERR_DBG, "%s: The skb is ",
-                                         dev->name);
-                               DBG_PRINT(ERR_DBG, "Null in Rx Intr\n");
-                               goto no_rx;
-                       }
-                       val64 = RXD_GET_BUFFER0_SIZE(rxdp->Control_2);
-                       val16 = (u16) (val64 >> 48);
-                       cksum = RXD_GET_L4_CKSUM(rxdp->Control_1);
-                       pci_unmap_single(nic->pdev, (dma_addr_t)
-                                        rxdp->Buffer0_ptr,
-                                        dev->mtu +
-                                        HEADER_ETHERNET_II_802_3_SIZE +
-                                        HEADER_802_2_SIZE +
-                                        HEADER_SNAP_SIZE,
-                                        PCI_DMA_FROMDEVICE);
-                       rx_osm_handler(nic, val16, rxdp, i);
-                       pkt_cnt++;
-                       get_info.offset++;
-                       get_info.offset %= (MAX_RXDS_PER_BLOCK + 1);
-                       rxdp =
-                           nic->rx_blocks[i][get_block].block_virt_addr +
-                           get_info.offset;
-                       mac_control->rx_curr_get_info[i].offset =
-                           get_info.offset;
+               rx_intr_handler(&mac_control->rings[i]);
+               pkt_cnt = org_pkts_to_process - nic->pkts_to_process;
+               if (!nic->pkts_to_process) {
+                       /* Quota for the current iteration has been met */
+                       goto no_rx;
                 }
-#else
-               get_offset = (get_block * (MAX_RXDS_PER_BLOCK + 1)) +
-                   get_info.offset;
-               put_offset = (put_block * (MAX_RXDS_PER_BLOCK + 1)) +
-                   put_info.offset;
-               while (((!(rxdp->Control_1 & RXD_OWN_XENA)) &&
-                       !(rxdp->Control_2 & BIT(0))) &&
-                      (((get_offset + 1) % ring_bufs) != put_offset)) {
-                       if (--pkts_to_process < 0) {
-                               goto no_rx;
-                       }
-                       skb = (struct sk_buff *) ((unsigned long)
-                                                 rxdp->Host_Control);
-                       if (skb == NULL) {
-                               DBG_PRINT(ERR_DBG, "%s: The skb is ",
-                                         dev->name);
-                               DBG_PRINT(ERR_DBG, "Null in Rx Intr\n");
-                               goto no_rx;
-                       }
-
-                       pci_unmap_single(nic->pdev, (dma_addr_t)
-                                        rxdp->Buffer0_ptr,
-                                        BUF0_LEN, PCI_DMA_FROMDEVICE);
-                       pci_unmap_single(nic->pdev, (dma_addr_t)
-                                        rxdp->Buffer1_ptr,
-                                        BUF1_LEN, PCI_DMA_FROMDEVICE);
-                       pci_unmap_single(nic->pdev, (dma_addr_t)
-                                        rxdp->Buffer2_ptr,
-                                        dev->mtu + BUF0_LEN + 4,
-                                        PCI_DMA_FROMDEVICE);
-                       ba = &nic->ba[i][get_block][get_info.offset];
-
-                       rx_osm_handler(nic, rxdp, i, ba);
-
-                       get_info.offset++;
-                       mac_control->rx_curr_get_info[i].offset =
-                           get_info.offset;
-                       rxdp =
-                           nic->rx_blocks[i][get_block].block_virt_addr +
-                           get_info.offset;
-
-                       if (get_info.offset &&
-                           (!(get_info.offset % MAX_RXDS_PER_BLOCK))) {
-                               get_info.offset = 0;
-                               mac_control->rx_curr_get_info[i].
-                                   offset = get_info.offset;
-                               get_block++;
-                               get_block %= nic->block_count[i];
-                               mac_control->rx_curr_get_info[i].
-                                   block_index = get_block;
-                               rxdp =
-                                   nic->rx_blocks[i][get_block].
-                                   block_virt_addr;
-                       }
-                       get_offset =
-                           (get_block * (MAX_RXDS_PER_BLOCK + 1)) +
-                           get_info.offset;
-                       pkt_cnt++;
-               }
-#endif
         }
         if (!pkt_cnt)
                 pkt_cnt = 1;
@@ -2007,9 +2435,10 @@ static int s2io_poll(struct net_device *dev, int *budget)
         }
         /* Re enable the Rx interrupts. */
         en_dis_able_nic_intrs(nic, RX_TRAFFIC_INTR, ENABLE_INTRS);
+       atomic_dec(&nic->isr_cnt);
         return 0;
  
-      no_rx:
+no_rx:
         dev->quota -= pkt_cnt;
         *budget -= pkt_cnt;
  
@@ -2020,279 +2449,204 @@ static int s2io_poll(struct net_device *dev, int *budget)
                         break;
                 }
         }
+       atomic_dec(&nic->isr_cnt);
         return 1;
  }
-#else
-/**  
+#endif
+
+/**
   *  rx_intr_handler - Rx interrupt handler
   *  @nic: device private variable.
- *  Description: 
- *  If the interrupt is because of a received frame or if the 
+ *  Description:
+ *  If the interrupt is because of a received frame or if the
   *  receive ring contains fresh as yet un-processed frames,this function is
- *  called. It picks out the RxD at which place the last Rx processing had 
- *  stopped and sends the skb to the OSM's Rx handler and then increments 
+ *  called. It picks out the RxD at which place the last Rx processing had
+ *  stopped and sends the skb to the OSM's Rx handler and then increments
   *  the offset.
   *  Return Value:
   *  NONE.
   */
-
-static void rx_intr_handler(struct s2io_nic *nic)
+static void rx_intr_handler(ring_info_t *ring_data)
  {
+       nic_t *nic = ring_data->nic;
         struct net_device *dev = (struct net_device *) nic->dev;
-       XENA_dev_config_t *bar0 = (XENA_dev_config_t *) nic->bar0;
+       int get_block, get_offset, put_block, put_offset, ring_bufs;
         rx_curr_get_info_t get_info, put_info;
         RxD_t *rxdp;
         struct sk_buff *skb;
-#ifndef CONFIG_2BUFF_MODE
-       u16 val16, cksum;
-#endif
-       register u64 val64 = 0;
-       int get_block, get_offset, put_block, put_offset, ring_bufs;
-       int i, pkt_cnt = 0;
-       mac_info_t *mac_control;
-       struct config_param *config;
-#ifdef CONFIG_2BUFF_MODE
-       buffAdd_t *ba;
+#ifndef CONFIG_S2IO_NAPI
+       int pkt_cnt = 0;
  #endif
+       spin_lock(&nic->rx_lock);
+       if (atomic_read(&nic->card_state) == CARD_DOWN) {
+               DBG_PRINT(ERR_DBG, "%s: %s going down for reset\n",
+                         __FUNCTION__, dev->name);
+               spin_unlock(&nic->rx_lock);
+       }
  
-       mac_control = &nic->mac_control;
-       config = &nic->config;
-
-       /* 
-        * rx_traffic_int reg is an R1 register, hence we read and write back 
-        * the samevalue in the register to clear it.
-        */
-       val64 = readq(&bar0->rx_traffic_int);
-       writeq(val64, &bar0->rx_traffic_int);
-
-       for (i = 0; i < config->rx_ring_num; i++) {
-               get_info = mac_control->rx_curr_get_info[i];
-               get_block = get_info.block_index;
-               put_info = mac_control->rx_curr_put_info[i];
-               put_block = put_info.block_index;
-               ring_bufs = config->rx_cfg[i].num_rxd;
-               rxdp = nic->rx_blocks[i][get_block].block_virt_addr +
+       get_info = ring_data->rx_curr_get_info;
+       get_block = get_info.block_index;
+       put_info = ring_data->rx_curr_put_info;
+       put_block = put_info.block_index;
+       ring_bufs = get_info.ring_len+1;
+       rxdp = ring_data->rx_blocks[get_block].block_virt_addr +
                     get_info.offset;
-#ifndef        CONFIG_2BUFF_MODE
-               get_offset = (get_block * (MAX_RXDS_PER_BLOCK + 1)) +
-                   get_info.offset;
-               spin_lock(&nic->put_lock);
-               put_offset = nic->put_pos[i];
-               spin_unlock(&nic->put_lock);
-               while ((!(rxdp->Control_1 & RXD_OWN_XENA)) &&
-                      (((get_offset + 1) % ring_bufs) != put_offset)) {
-                       if (rxdp->Control_1 == END_OF_BLOCK) {
-                               rxdp = (RxD_t *) ((unsigned long)
-                                                 rxdp->Control_2);
-                               get_info.offset++;
-                               get_info.offset %=
-                                   (MAX_RXDS_PER_BLOCK + 1);
-                               get_block++;
-                               get_block %= nic->block_count[i];
-                               mac_control->rx_curr_get_info[i].
-                                   offset = get_info.offset;
-                               mac_control->rx_curr_get_info[i].
-                                   block_index = get_block;
-                               continue;
-                       }
-                       get_offset =
-                           (get_block * (MAX_RXDS_PER_BLOCK + 1)) +
-                           get_info.offset;
-                       skb = (struct sk_buff *) ((unsigned long)
-                                                 rxdp->Host_Control);
-                       if (skb == NULL) {
-                               DBG_PRINT(ERR_DBG, "%s: The skb is ",
-                                         dev->name);
-                               DBG_PRINT(ERR_DBG, "Null in Rx Intr\n");
-                               return;
-                       }
-                       val64 = RXD_GET_BUFFER0_SIZE(rxdp->Control_2);
-                       val16 = (u16) (val64 >> 48);
-                       cksum = RXD_GET_L4_CKSUM(rxdp->Control_1);
-                       pci_unmap_single(nic->pdev, (dma_addr_t)
-                                        rxdp->Buffer0_ptr,
-                                        dev->mtu +
-                                        HEADER_ETHERNET_II_802_3_SIZE +
-                                        HEADER_802_2_SIZE +
-                                        HEADER_SNAP_SIZE,
-                                        PCI_DMA_FROMDEVICE);
-                       rx_osm_handler(nic, val16, rxdp, i);
-                       get_info.offset++;
-                       get_info.offset %= (MAX_RXDS_PER_BLOCK + 1);
-                       rxdp =
-                           nic->rx_blocks[i][get_block].block_virt_addr +
-                           get_info.offset;
-                       mac_control->rx_curr_get_info[i].offset =
-                           get_info.offset;
-                       pkt_cnt++;
-                       if ((indicate_max_pkts)
-                           && (pkt_cnt > indicate_max_pkts))
-                               break;
+       get_offset = (get_block * (MAX_RXDS_PER_BLOCK + 1)) +
+               get_info.offset;
+#ifndef CONFIG_S2IO_NAPI
+       spin_lock(&nic->put_lock);
+       put_offset = ring_data->put_pos;
+       spin_unlock(&nic->put_lock);
+#else
+       put_offset = (put_block * (MAX_RXDS_PER_BLOCK + 1)) +
+               put_info.offset;
+#endif
+       while (RXD_IS_UP2DT(rxdp) &&
+              (((get_offset + 1) % ring_bufs) != put_offset)) {
+               skb = (struct sk_buff *) ((unsigned long)rxdp->Host_Control);
+               if (skb == NULL) {
+                       DBG_PRINT(ERR_DBG, "%s: The skb is ",
+                                 dev->name);
+                       DBG_PRINT(ERR_DBG, "Null in Rx Intr\n");
+                       spin_unlock(&nic->rx_lock);
+                       return;
                 }
+#ifndef CONFIG_2BUFF_MODE
+               pci_unmap_single(nic->pdev, (dma_addr_t)
+                                rxdp->Buffer0_ptr,
+                                dev->mtu +
+                                HEADER_ETHERNET_II_802_3_SIZE +
+                                HEADER_802_2_SIZE +
+                                HEADER_SNAP_SIZE,
+                                PCI_DMA_FROMDEVICE);
  #else
-               get_offset = (get_block * (MAX_RXDS_PER_BLOCK + 1)) +
+               pci_unmap_single(nic->pdev, (dma_addr_t)
+                                rxdp->Buffer0_ptr,
+                                BUF0_LEN, PCI_DMA_FROMDEVICE);
+               pci_unmap_single(nic->pdev, (dma_addr_t)
+                                rxdp->Buffer1_ptr,
+                                BUF1_LEN, PCI_DMA_FROMDEVICE);
+               pci_unmap_single(nic->pdev, (dma_addr_t)
+                                rxdp->Buffer2_ptr,
+                                dev->mtu + BUF0_LEN + 4,
+                                PCI_DMA_FROMDEVICE);
+#endif
+               rx_osm_handler(ring_data, rxdp);
+               get_info.offset++;
+               ring_data->rx_curr_get_info.offset =
                     get_info.offset;
-               spin_lock(&nic->put_lock);
-               put_offset = nic->put_pos[i];
-               spin_unlock(&nic->put_lock);
-               while (((!(rxdp->Control_1 & RXD_OWN_XENA)) &&
-                       !(rxdp->Control_2 & BIT(0))) &&
-                      (((get_offset + 1) % ring_bufs) != put_offset)) {
-                       skb = (struct sk_buff *) ((unsigned long)
-                                                 rxdp->Host_Control);
-                       if (skb == NULL) {
-                               DBG_PRINT(ERR_DBG, "%s: The skb is ",
-                                         dev->name);
-                               DBG_PRINT(ERR_DBG, "Null in Rx Intr\n");
-                               return;
-                       }
-
-                       pci_unmap_single(nic->pdev, (dma_addr_t)
-                                        rxdp->Buffer0_ptr,
-                                        BUF0_LEN, PCI_DMA_FROMDEVICE);
-                       pci_unmap_single(nic->pdev, (dma_addr_t)
-                                        rxdp->Buffer1_ptr,
-                                        BUF1_LEN, PCI_DMA_FROMDEVICE);
-                       pci_unmap_single(nic->pdev, (dma_addr_t)
-                                        rxdp->Buffer2_ptr,
-                                        dev->mtu + BUF0_LEN + 4,
-                                        PCI_DMA_FROMDEVICE);
-                       ba = &nic->ba[i][get_block][get_info.offset];
-
-                       rx_osm_handler(nic, rxdp, i, ba);
-
-                       get_info.offset++;
-                       mac_control->rx_curr_get_info[i].offset =
-                           get_info.offset;
-                       rxdp =
-                           nic->rx_blocks[i][get_block].block_virt_addr +
-                           get_info.offset;
+               rxdp = ring_data->rx_blocks[get_block].block_virt_addr +
+                   get_info.offset;
+               if (get_info.offset &&
+                   (!(get_info.offset % MAX_RXDS_PER_BLOCK))) {
+                       get_info.offset = 0;
+                       ring_data->rx_curr_get_info.offset
+                           = get_info.offset;
+                       get_block++;
+                       get_block %= ring_data->block_count;
+                       ring_data->rx_curr_get_info.block_index
+                           = get_block;
+                       rxdp = ring_data->rx_blocks[get_block].block_virt_addr;
+               }
  
-                       if (get_info.offset &&
-                           (!(get_info.offset % MAX_RXDS_PER_BLOCK))) {
-                               get_info.offset = 0;
-                               mac_control->rx_curr_get_info[i].
-                                   offset = get_info.offset;
-                               get_block++;
-                               get_block %= nic->block_count[i];
-                               mac_control->rx_curr_get_info[i].
-                                   block_index = get_block;
-                               rxdp =
-                                   nic->rx_blocks[i][get_block].
-                                   block_virt_addr;
-                       }
-                       get_offset =
-                           (get_block * (MAX_RXDS_PER_BLOCK + 1)) +
+               get_offset = (get_block * (MAX_RXDS_PER_BLOCK + 1)) +
                             get_info.offset;
-                       pkt_cnt++;
-                       if ((indicate_max_pkts)
-                           && (pkt_cnt > indicate_max_pkts))
-                               break;
-               }
-#endif
+#ifdef CONFIG_S2IO_NAPI
+               nic->pkts_to_process -= 1;
+               if (!nic->pkts_to_process)
+                       break;
+#else
+               pkt_cnt++;
                 if ((indicate_max_pkts) && (pkt_cnt > indicate_max_pkts))
                         break;
+#endif
         }
+       spin_unlock(&nic->rx_lock);
  }
-#endif
-/**  
+
+/**
   *  tx_intr_handler - Transmit interrupt handler
   *  @nic : device private variable
- *  Description: 
- *  If an interrupt was raised to indicate DMA complete of the 
- *  Tx packet, this function is called. It identifies the last TxD 
- *  whose buffer was freed and frees all skbs whose data have already 
+ *  Description:
+ *  If an interrupt was raised to indicate DMA complete of the
+ *  Tx packet, this function is called. It identifies the last TxD
+ *  whose buffer was freed and frees all skbs whose data have already
   *  DMA'ed into the NICs internal memory.
   *  Return Value:
   *  NONE
   */
  
-static void tx_intr_handler(struct s2io_nic *nic)
+static void tx_intr_handler(fifo_info_t *fifo_data)
  {
-       XENA_dev_config_t __iomem *bar0 = nic->bar0;
+       nic_t *nic = fifo_data->nic;
         struct net_device *dev = (struct net_device *) nic->dev;
         tx_curr_get_info_t get_info, put_info;
         struct sk_buff *skb;
         TxD_t *txdlp;
-       register u64 val64 = 0;
-       int i;
         u16 j, frg_cnt;
-       mac_info_t *mac_control;
-       struct config_param *config;
-
-       mac_control = &nic->mac_control;
-       config = &nic->config;
  
-       /* 
-        * tx_traffic_int reg is an R1 register, hence we read and write 
-        * back the samevalue in the register to clear it.
-        */
-       val64 = readq(&bar0->tx_traffic_int);
-       writeq(val64, &bar0->tx_traffic_int);
-
-       for (i = 0; i < config->tx_fifo_num; i++) {
-               get_info = mac_control->tx_curr_get_info[i];
-               put_info = mac_control->tx_curr_put_info[i];
-               txdlp = (TxD_t *) nic->list_info[i][get_info.offset].
-                   list_virt_addr;
-               while ((!(txdlp->Control_1 & TXD_LIST_OWN_XENA)) &&
-                      (get_info.offset != put_info.offset) &&
-                      (txdlp->Host_Control)) {
-                       /* Check for TxD errors */
-                       if (txdlp->Control_1 & TXD_T_CODE) {
-                               unsigned long long err;
-                               err = txdlp->Control_1 & TXD_T_CODE;
-                               DBG_PRINT(ERR_DBG, "***TxD error %llx\n",
-                                         err);
-                       }
-
-                       skb = (struct sk_buff *) ((unsigned long)
-                                                 txdlp->Host_Control);
-                       if (skb == NULL) {
-                               DBG_PRINT(ERR_DBG, "%s: Null skb ",
-                                         dev->name);
-                               DBG_PRINT(ERR_DBG, "in Tx Free Intr\n");
-                               return;
-                       }
-                       nic->tx_pkt_count++;
+       get_info = fifo_data->tx_curr_get_info;
+       put_info = fifo_data->tx_curr_put_info;
+       txdlp = (TxD_t *) fifo_data->list_info[get_info.offset].
+           list_virt_addr;
+       while ((!(txdlp->Control_1 & TXD_LIST_OWN_XENA)) &&
+              (get_info.offset != put_info.offset) &&
+              (txdlp->Host_Control)) {
+               /* Check for TxD errors */
+               if (txdlp->Control_1 & TXD_T_CODE) {
+                       unsigned long long err;
+                       err = txdlp->Control_1 & TXD_T_CODE;
+                       DBG_PRINT(ERR_DBG, "***TxD error %llx\n",
+                                 err);
+               }
  
-                       frg_cnt = skb_shinfo(skb)->nr_frags;
+               skb = (struct sk_buff *) ((unsigned long)
+                               txdlp->Host_Control);
+               if (skb == NULL) {
+                       DBG_PRINT(ERR_DBG, "%s: Null skb ",
+                       __FUNCTION__);
+                       DBG_PRINT(ERR_DBG, "in Tx Free Intr\n");
+                       return;
+               }
  
-                       /*  For unfragmented skb */
-                       pci_unmap_single(nic->pdev, (dma_addr_t)
-                                        txdlp->Buffer_Pointer,
-                                        skb->len - skb->data_len,
-                                        PCI_DMA_TODEVICE);
-                       if (frg_cnt) {
-                               TxD_t *temp = txdlp;
-                               txdlp++;
-                               for (j = 0; j < frg_cnt; j++, txdlp++) {
-                                       skb_frag_t *frag =
-                                           &skb_shinfo(skb)->frags[j];
-                                       pci_unmap_page(nic->pdev,
-                                                      (dma_addr_t)
-                                                      txdlp->
-                                                      Buffer_Pointer,
-                                                      frag->size,
-                                                      PCI_DMA_TODEVICE);
-                               }
-                               txdlp = temp;
+               frg_cnt = skb_shinfo(skb)->nr_frags;
+               nic->tx_pkt_count++;
+
+               pci_unmap_single(nic->pdev, (dma_addr_t)
+                                txdlp->Buffer_Pointer,
+                                skb->len - skb->data_len,
+                                PCI_DMA_TODEVICE);
+               if (frg_cnt) {
+                       TxD_t *temp;
+                       temp = txdlp;
+                       txdlp++;
+                       for (j = 0; j < frg_cnt; j++, txdlp++) {
+                               skb_frag_t *frag =
+                                   &skb_shinfo(skb)->frags[j];
+                               if (!txdlp->Buffer_Pointer)
+                                       break;
+                               pci_unmap_page(nic->pdev,
+                                              (dma_addr_t)
+                                              txdlp->
+                                              Buffer_Pointer,
+                                              frag->size,
+                                              PCI_DMA_TODEVICE);
                         }
-                       memset(txdlp, 0,
-                              (sizeof(TxD_t) * config->max_txds));
-
-                       /* Updating the statistics block */
-                       nic->stats.tx_packets++;
-                       nic->stats.tx_bytes += skb->len;
-                       dev_kfree_skb_irq(skb);
-
-                       get_info.offset++;
-                       get_info.offset %= get_info.fifo_len + 1;
-                       txdlp = (TxD_t *) nic->list_info[i]
-                           [get_info.offset].list_virt_addr;
-                       mac_control->tx_curr_get_info[i].offset =
-                           get_info.offset;
+                       txdlp = temp;
                 }
+               memset(txdlp, 0,
+                      (sizeof(TxD_t) * fifo_data->max_txds));
+
+               /* Updating the statistics block */
+               nic->stats.tx_bytes += skb->len;
+               dev_kfree_skb_irq(skb);
+
+               get_info.offset++;
+               get_info.offset %= get_info.fifo_len + 1;
+               txdlp = (TxD_t *) fifo_data->list_info
+                   [get_info.offset].list_virt_addr;
+               fifo_data->tx_curr_get_info.offset =
+                   get_info.offset;
         }
  
         spin_lock(&nic->tx_lock);
@@ -2301,13 +2655,13 @@ static void tx_intr_handler(struct s2io_nic *nic)
         spin_unlock(&nic->tx_lock);
  }
  
-/**  
+/**
   *  alarm_intr_handler - Alarm Interrrupt handler
   *  @nic: device private variable
- *  Description: If the interrupt was neither because of Rx packet or Tx 
+ *  Description: If the interrupt was neither because of Rx packet or Tx
   *  complete, this function is called. If the interrupt was to indicate
- *  a loss of link, the OSM link status handler is invoked for any other 
- *  alarm interrupt the block that raised the interrupt is displayed 
+ *  a loss of link, the OSM link status handler is invoked for any other
+ *  alarm interrupt the block that raised the interrupt is displayed
   *  and a H/W reset is issued.
   *  Return Value:
   *  NONE
@@ -2320,10 +2674,32 @@ static void alarm_intr_handler(struct s2io_nic *nic)
         register u64 val64 = 0, err_reg = 0;
  
         /* Handling link status change error Intr */
-       err_reg = readq(&bar0->mac_rmac_err_reg);
-       writeq(err_reg, &bar0->mac_rmac_err_reg);
-       if (err_reg & RMAC_LINK_STATE_CHANGE_INT) {
-               schedule_work(&nic->set_link_task);
+       if (s2io_link_fault_indication(nic) == MAC_RMAC_ERR_TIMER) {
+               err_reg = readq(&bar0->mac_rmac_err_reg);
+               writeq(err_reg, &bar0->mac_rmac_err_reg);
+               if (err_reg & RMAC_LINK_STATE_CHANGE_INT) {
+                       schedule_work(&nic->set_link_task);
+               }
+       }
+
+       /* Handling Ecc errors */
+       val64 = readq(&bar0->mc_err_reg);
+       writeq(val64, &bar0->mc_err_reg);
+       if (val64 & (MC_ERR_REG_ECC_ALL_SNG | MC_ERR_REG_ECC_ALL_DBL)) {
+               if (val64 & MC_ERR_REG_ECC_ALL_DBL) {
+                       nic->mac_control.stats_info->sw_stat.
+                               double_ecc_errs++;
+                       DBG_PRINT(ERR_DBG, "%s: Device indicates ",
+                                 dev->name);
+                       DBG_PRINT(ERR_DBG, "double ECC error!!\n");
+                       if (nic->device_type != XFRAME_II_DEVICE) {
+                               netif_stop_queue(dev);
+                               schedule_work(&nic->rst_timer_task);
+                       }
+               } else {
+                       nic->mac_control.stats_info->sw_stat.
+                               single_ecc_errs++;
+               }
         }
  
         /* In case of a serious error, the device will be Reset. */
@@ -2338,7 +2714,7 @@ static void alarm_intr_handler(struct s2io_nic *nic)
         /*
          * Also as mentioned in the latest Errata sheets if the PCC_FB_ECC
          * Error occurs, the adapter will be recycled by disabling the
-        * adapter enable bit and enabling it again after the device 
+        * adapter enable bit and enabling it again after the device
          * becomes Quiescent.
          */
         val64 = readq(&bar0->pcc_err_reg);
@@ -2354,18 +2730,18 @@ static void alarm_intr_handler(struct s2io_nic *nic)
         /* Other type of interrupts are not being handled now,  TODO */
  }
  
-/** 
+/**
   *  wait_for_cmd_complete - waits for a command to complete.
- *  @sp : private member of the device structure, which is a pointer to the 
+ *  @sp : private member of the device structure, which is a pointer to the
   *  s2io_nic structure.
- *  Description: Function that waits for a command to Write into RMAC 
- *  ADDR DATA registers to be completed and returns either success or 
- *  error depending on whether the command was complete or not. 
+ *  Description: Function that waits for a command to Write into RMAC
+ *  ADDR DATA registers to be completed and returns either success or
+ *  error depending on whether the command was complete or not.
   *  Return value:
   *   SUCCESS on success and FAILURE on failure.
   */
  
-static int wait_for_cmd_complete(nic_t * sp)
+int wait_for_cmd_complete(nic_t * sp)
  {
         XENA_dev_config_t __iomem *bar0 = sp->bar0;
         int ret = FAILURE, cnt = 0;
@@ -2385,29 +2761,32 @@ static int wait_for_cmd_complete(nic_t * sp)
         return ret;
  }
  
-/** 
- *  s2io_reset - Resets the card. 
+/**
+ *  s2io_reset - Resets the card.
   *  @sp : private member of the device structure.
   *  Description: Function to Reset the card. This function then also
- *  restores the previously saved PCI configuration space registers as 
+ *  restores the previously saved PCI configuration space registers as
   *  the card reset also resets the configuration space.
   *  Return value:
   *  void.
   */
  
-static void s2io_reset(nic_t * sp)
+void s2io_reset(nic_t * sp)
  {
         XENA_dev_config_t __iomem *bar0 = sp->bar0;
         u64 val64;
-       u16 subid;
+       u16 subid, pci_cmd;
+
+       /* Back up  the PCI-X CMD reg, dont want to lose MMRBC, OST settings */
+       pci_read_config_word(sp->pdev, PCIX_COMMAND_REGISTER, &(pci_cmd));
  
         val64 = SW_RESET_ALL;
         writeq(val64, &bar0->sw_reset);
  
-       /* 
-        * At this stage, if the PCI write is indeed completed, the 
-        * card is reset and so is the PCI Config space of the device. 
-        * So a read cannot be issued at this stage on any of the 
+       /*
+        * At this stage, if the PCI write is indeed completed, the
+        * card is reset and so is the PCI Config space of the device.
+        * So a read cannot be issued at this stage on any of the
          * registers to ensure the write into "sw_reset" register
          * has gone through.
          * Question: Is there any system call that will explicitly force
@@ -2418,42 +2797,72 @@ static void s2io_reset(nic_t * sp)
          */
         msleep(250);
  
-       /* Restore the PCI state saved during initializarion. */
+       /* Restore the PCI state saved during initialization. */
         pci_restore_state(sp->pdev);
+       pci_write_config_word(sp->pdev, PCIX_COMMAND_REGISTER,
+                                    pci_cmd);
         s2io_init_pci(sp);
  
-       msleep(250);
+       msleep(250);
+
+       /* Set swapper to enable I/O register access */
+       s2io_set_swapper(sp);
+
+       /* Clear certain PCI/PCI-X fields after reset */
+       if (sp->device_type == XFRAME_II_DEVICE) {
+               /* Clear parity err detect bit */
+               pci_write_config_word(sp->pdev, PCI_STATUS, 0x8000);
+
+               /* Clearing PCIX Ecc status register */
+               pci_write_config_dword(sp->pdev, 0x68, 0x7C);
+
+               /* Clearing PCI_STATUS error reflected here */
+               writeq(BIT(62), &bar0->txpic_int_reg);
+       }
+
+       /* Reset device statistics maintained by OS */
+       memset(&sp->stats, 0, sizeof (struct net_device_stats));
  
         /* SXE-002: Configure link and activity LED to turn it off */
         subid = sp->pdev->subsystem_device;
-       if ((subid & 0xFF) >= 0x07) {
+       if (((subid & 0xFF) >= 0x07) &&
+           (sp->device_type == XFRAME_I_DEVICE)) {
                 val64 = readq(&bar0->gpio_control);
                 val64 |= 0x0000800000000000ULL;
                 writeq(val64, &bar0->gpio_control);
                 val64 = 0x0411040400000000ULL;
-               writeq(val64, (void __iomem *) bar0 + 0x2700);
+               writeq(val64, (void __iomem *) ((u8 *) bar0 + 0x2700));
+       }
+
+       /*
+        * Clear spurious ECC interrupts that would have occured on
+        * XFRAME II cards after reset.
+        */
+       if (sp->device_type == XFRAME_II_DEVICE) {
+               val64 = readq(&bar0->pcc_err_reg);
+               writeq(val64, &bar0->pcc_err_reg);
         }
  
         sp->device_enabled_once = FALSE;
  }
  
  /**
- *  s2io_set_swapper - to set the swapper controle on the card 
- *  @sp : private member of the device structure, 
+ *  s2io_set_swapper - to set the swapper controle on the card
+ *  @sp : private member of the device structure,
   *  pointer to the s2io_nic structure.
- *  Description: Function to set the swapper control on the card 
+ *  Description: Function to set the swapper control on the card
   *  correctly depending on the 'endianness' of the system.
   *  Return value:
   *  SUCCESS on success and FAILURE on failure.
   */
  
-static int s2io_set_swapper(nic_t * sp)
+int s2io_set_swapper(nic_t * sp)
  {
         struct net_device *dev = sp->dev;
         XENA_dev_config_t __iomem *bar0 = sp->bar0;
         u64 val64, valt, valr;
  
-       /* 
+       /*
          * Set proper endian settings and verify the same by reading
          * the PIF Feed-back register.
          */
@@ -2505,8 +2914,9 @@ static int s2io_set_swapper(nic_t * sp)
                         i++;
                 }
                 if(i == 4) {
+                       unsigned long long x = val64;
                         DBG_PRINT(ERR_DBG, "Write failed, Xmsi_addr ");
-                       DBG_PRINT(ERR_DBG, "reads:0x%llx\n",val64);
+                       DBG_PRINT(ERR_DBG, "reads:0x%llx\n", x);
                         return FAILURE;
                 }
         }
@@ -2514,8 +2924,8 @@ static int s2io_set_swapper(nic_t * sp)
         val64 &= 0xFFFF000000000000ULL;
  
  #ifdef  __BIG_ENDIAN
-       /* 
-        * The device by default set to a big endian format, so a 
+       /*
+        * The device by default set to a big endian format, so a
          * big endian driver need not set anything.
          */
         val64 |= (SWAPPER_CTRL_TXP_FE |
@@ -2531,9 +2941,9 @@ static int s2io_set_swapper(nic_t * sp)
                  SWAPPER_CTRL_STATS_FE | SWAPPER_CTRL_STATS_SE);
         writeq(val64, &bar0->swapper_ctrl);
  #else
-       /* 
+       /*
          * Initially we enable all bits to make it accessible by the
-        * driver, then we selectively enable only those bits that 
+        * driver, then we selectively enable only those bits that
          * we want to set.
          */
         val64 |= (SWAPPER_CTRL_TXP_FE |
@@ -2555,8 +2965,8 @@ static int s2io_set_swapper(nic_t * sp)
  #endif
         val64 = readq(&bar0->swapper_ctrl);
  
-       /* 
-        * Verifying if endian settings are accurate by reading a 
+       /*
+        * Verifying if endian settings are accurate by reading a
          * feedback register.
          */
         val64 = readq(&bar0->pif_rd_swapper_fb);
@@ -2576,55 +2986,63 @@ static int s2io_set_swapper(nic_t * sp)
   * Functions defined below concern the OS part of the driver *
   * ********************************************************* */
  
-/**  
+/**
   *  s2io_open - open entry point of the driver
   *  @dev : pointer to the device structure.
   *  Description:
   *  This function is the open entry point of the driver. It mainly calls a
   *  function to allocate Rx buffers and inserts them into the buffer
- *  descriptors and then enables the Rx part of the NIC. 
+ *  descriptors and then enables the Rx part of the NIC.
   *  Return value:
   *  0 on success and an appropriate (-)ve integer as defined in errno.h
   *   file on failure.
   */
  
-static int s2io_open(struct net_device *dev)
+int s2io_open(struct net_device *dev)
  {
         nic_t *sp = dev->priv;
         int err = 0;
  
-       /* 
-        * Make sure you have link off by default every time 
+       /*
+        * Make sure you have link off by default every time
          * Nic is initialized
          */
         netif_carrier_off(dev);
-       sp->last_link_state = LINK_DOWN;
+       sp->last_link_state = 0;
  
         /* Initialize H/W and enable interrupts */
         if (s2io_card_up(sp)) {
                 DBG_PRINT(ERR_DBG, "%s: H/W initialization failed\n",
                           dev->name);
-               return -ENODEV;
+               err = -ENODEV;
+               goto hw_init_failed;
         }
  
         /* After proper initialization of H/W, register ISR */
-       err = request_irq((int) sp->irq, s2io_isr, SA_SHIRQ,
+       err = request_irq((int) sp->pdev->irq, s2io_isr, SA_SHIRQ,
                           sp->name, dev);
         if (err) {
-               s2io_reset(sp);
                 DBG_PRINT(ERR_DBG, "%s: ISR registration failed\n",
                           dev->name);
-               return err;
+               goto isr_registration_failed;
         }
  
         if (s2io_set_mac_addr(dev, dev->dev_addr) == FAILURE) {
                 DBG_PRINT(ERR_DBG, "Set Mac Address Failed\n");
-               s2io_reset(sp);
-               return -ENODEV;
+               err = -ENODEV;
+               goto setting_mac_address_failed;
         }
  
         netif_start_queue(dev);
         return 0;
+
+setting_mac_address_failed:
+       free_irq(sp->pdev->irq, dev);
+isr_registration_failed:
+       del_timer_sync(&sp->alarm_timer);
+       s2io_reset(sp);
+hw_init_failed:
+       return err;
  }
  
  /**
@@ -2640,16 +3058,15 @@ static int s2io_open(struct net_device *dev)
   *  file on failure.
   */
  
-static int s2io_close(struct net_device *dev)
+int s2io_close(struct net_device *dev)
  {
         nic_t *sp = dev->priv;
-
         flush_scheduled_work();
         netif_stop_queue(dev);
         /* Reset card, kill tasklet and free Tx and Rx buffers. */
         s2io_card_down(sp);
  
-       free_irq(dev->irq, dev);
+       free_irq(sp->pdev->irq, dev);
         sp->device_close_flag = TRUE;   /* Device is shut down. */
         return 0;
  }
@@ -2667,7 +3084,7 @@ static int s2io_close(struct net_device *dev)
   *  0 on success & 1 on failure.
   */
  
-static int s2io_xmit(struct sk_buff *skb, struct net_device *dev)
+int s2io_xmit(struct sk_buff *skb, struct net_device *dev)
  {
         nic_t *sp = dev->priv;
         u16 frg_cnt, frg_len, i, queue, queue_len, put_off, get_off;
@@ -2678,29 +3095,39 @@ static int s2io_xmit(struct sk_buff *skb, struct net_device *dev)
  #ifdef NETIF_F_TSO
         int mss;
  #endif
+       u16 vlan_tag = 0;
+       int vlan_priority = 0;
         mac_info_t *mac_control;
         struct config_param *config;
-       XENA_dev_config_t __iomem *bar0 = sp->bar0;
  
         mac_control = &sp->mac_control;
         config = &sp->config;
  
-       DBG_PRINT(TX_DBG, "%s: In S2IO Tx routine\n", dev->name);
+       DBG_PRINT(TX_DBG, "%s: In Neterion Tx routine\n", dev->name);
         spin_lock_irqsave(&sp->tx_lock, flags);
-
         if (atomic_read(&sp->card_state) == CARD_DOWN) {
-               DBG_PRINT(ERR_DBG, "%s: Card going down for reset\n",
+               DBG_PRINT(TX_DBG, "%s: Card going down for reset\n",
                           dev->name);
                 spin_unlock_irqrestore(&sp->tx_lock, flags);
-               return 1;
+               dev_kfree_skb(skb);
+               return 0;
         }
  
         queue = 0;
-       put_off = (u16) mac_control->tx_curr_put_info[queue].offset;
-       get_off = (u16) mac_control->tx_curr_get_info[queue].offset;
-       txdp = (TxD_t *) sp->list_info[queue][put_off].list_virt_addr;
  
-       queue_len = mac_control->tx_curr_put_info[queue].fifo_len + 1;
+       /* Get Fifo number to Transmit based on vlan priority */
+       if (sp->vlgrp && vlan_tx_tag_present(skb)) {
+               vlan_tag = vlan_tx_tag_get(skb);
+               vlan_priority = vlan_tag >> 13;
+               queue = config->fifo_mapping[vlan_priority];
+       }
+
+       put_off = (u16) mac_control->fifos[queue].tx_curr_put_info.offset;
+       get_off = (u16) mac_control->fifos[queue].tx_curr_get_info.offset;
+       txdp = (TxD_t *) mac_control->fifos[queue].list_info[put_off].
+               list_virt_addr;
+
+       queue_len = mac_control->fifos[queue].tx_curr_put_info.fifo_len + 1;
         /* Avoid "put" pointer going beyond "get" pointer */
         if (txdp->Host_Control || (((put_off + 1) % queue_len) == get_off)) {
                 DBG_PRINT(ERR_DBG, "Error in xmit, No free TXDs.\n");
@@ -2709,6 +3136,15 @@ static int s2io_xmit(struct sk_buff *skb, struct net_device *dev)
                 spin_unlock_irqrestore(&sp->tx_lock, flags);
                 return 0;
         }
+
+       /* A buffer with no data will be dropped */
+       if (!skb->len) {
+               DBG_PRINT(TX_DBG, "%s:Buffer has no data..\n", dev->name);
+               dev_kfree_skb(skb);
+               spin_unlock_irqrestore(&sp->tx_lock, flags);
+               return 0;
+       }
+
  #ifdef NETIF_F_TSO
         mss = skb_shinfo(skb)->tso_size;
         if (mss) {
@@ -2720,9 +3156,9 @@ static int s2io_xmit(struct sk_buff *skb, struct net_device *dev)
         frg_cnt = skb_shinfo(skb)->nr_frags;
         frg_len = skb->len - skb->data_len;
  
-       txdp->Host_Control = (unsigned long) skb;
         txdp->Buffer_Pointer = pci_map_single
             (sp->pdev, skb->data, frg_len, PCI_DMA_TODEVICE);
+       txdp->Host_Control = (unsigned long) skb;
         if (skb->ip_summed == CHECKSUM_HW) {
                 txdp->Control_2 |=
                     (TXD_TX_CKO_IPV4_EN | TXD_TX_CKO_TCP_EN |
@@ -2731,6 +3167,11 @@ static int s2io_xmit(struct sk_buff *skb, struct net_device *dev)
  
         txdp->Control_2 |= config->tx_intr_type;
  
+       if (sp->vlgrp && vlan_tx_tag_present(skb)) {
+               txdp->Control_2 |= TXD_VLAN_ENABLE;
+               txdp->Control_2 |= TXD_VLAN_TAG(vlan_tag);
+       }
+
         txdp->Control_1 |= (TXD_BUFFER0_SIZE(frg_len) |
                             TXD_GATHER_CODE_FIRST);
         txdp->Control_1 |= TXD_LIST_OWN_XENA;
@@ -2738,6 +3179,9 @@ static int s2io_xmit(struct sk_buff *skb, struct net_device *dev)
         /* For fragmented SKB. */
         for (i = 0; i < frg_cnt; i++) {
                 skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
+               /* A '0' length fragment will be ignored */
+               if (!frag->size)
+                       continue;
                 txdp++;
                 txdp->Buffer_Pointer = (u64) pci_map_page
                     (sp->pdev, frag->page, frag->page_offset,
@@ -2747,23 +3191,23 @@ static int s2io_xmit(struct sk_buff *skb, struct net_device *dev)
         txdp->Control_1 |= TXD_GATHER_CODE_LAST;
  
         tx_fifo = mac_control->tx_FIFO_start[queue];
-       val64 = sp->list_info[queue][put_off].list_phy_addr;
+       val64 = mac_control->fifos[queue].list_info[put_off].list_phy_addr;
         writeq(val64, &tx_fifo->TxDL_Pointer);
  
         val64 = (TX_FIFO_LAST_TXD_NUM(frg_cnt) | TX_FIFO_FIRST_LIST |
                  TX_FIFO_LAST_LIST);
+
  #ifdef NETIF_F_TSO
         if (mss)
                 val64 |= TX_FIFO_SPECIAL_FUNC;
  #endif
         writeq(val64, &tx_fifo->List_Control);
  
-       /* Perform a PCI read to flush previous writes */
-       val64 = readq(&bar0->general_int_status);
+       mmiowb();
  
         put_off++;
-       put_off %= mac_control->tx_curr_put_info[queue].fifo_len + 1;
-       mac_control->tx_curr_put_info[queue].offset = put_off;
+       put_off %= mac_control->fifos[queue].tx_curr_put_info.fifo_len + 1;
+       mac_control->fifos[queue].tx_curr_put_info.offset = put_off;
  
         /* Avoid "put" pointer going beyond "get" pointer */
         if (((put_off + 1) % queue_len) == get_off) {
@@ -2779,18 +3223,74 @@ static int s2io_xmit(struct sk_buff *skb, struct net_device *dev)
         return 0;
  }
  
+static void
+s2io_alarm_handle(unsigned long data)
+{
+       nic_t *sp = (nic_t *)data;
+
+       alarm_intr_handler(sp);
+       mod_timer(&sp->alarm_timer, jiffies + HZ / 2);
+}
+
+static void s2io_txpic_intr_handle(nic_t *sp)
+{
+       XENA_dev_config_t *bar0 = (XENA_dev_config_t *) sp->bar0;
+       u64 val64;
+
+       val64 = readq(&bar0->pic_int_status);
+       if (val64 & PIC_INT_GPIO) {
+               val64 = readq(&bar0->gpio_int_reg);
+               if ((val64 & GPIO_INT_REG_LINK_DOWN) &&
+                   (val64 & GPIO_INT_REG_LINK_UP)) {
+                       val64 |=  GPIO_INT_REG_LINK_DOWN;
+                       val64 |= GPIO_INT_REG_LINK_UP;
+                       writeq(val64, &bar0->gpio_int_reg);
+                       goto masking;
+               }
+
+               if (((sp->last_link_state == LINK_UP) &&
+                       (val64 & GPIO_INT_REG_LINK_DOWN)) ||
+               ((sp->last_link_state == LINK_DOWN) &&
+               (val64 & GPIO_INT_REG_LINK_UP))) {
+                       val64 = readq(&bar0->gpio_int_mask);
+                       val64 |=  GPIO_INT_MASK_LINK_DOWN;
+                       val64 |= GPIO_INT_MASK_LINK_UP;
+                       writeq(val64, &bar0->gpio_int_mask);
+                       s2io_set_link((unsigned long)sp);
+               }
+masking:
+               if (sp->last_link_state == LINK_UP) {
+                       /*enable down interrupt */
+                       val64 = readq(&bar0->gpio_int_mask);
+                       /* unmasks link down intr */
+                       val64 &=  ~GPIO_INT_MASK_LINK_DOWN;
+                       /* masks link up intr */
+                       val64 |= GPIO_INT_MASK_LINK_UP;
+                       writeq(val64, &bar0->gpio_int_mask);
+               } else {
+                       /*enable UP Interrupt */
+                       val64 = readq(&bar0->gpio_int_mask);
+                       /* unmasks link up interrupt */
+                       val64 &= ~GPIO_INT_MASK_LINK_UP;
+                       /* masks link down interrupt */
+                       val64 |=  GPIO_INT_MASK_LINK_DOWN;
+                       writeq(val64, &bar0->gpio_int_mask);
+               }
+       }
+}
+
  /**
   *  s2io_isr - ISR handler of the device .
   *  @irq: the irq of the device.
   *  @dev_id: a void pointer to the dev structure of the NIC.
   *  @pt_regs: pointer to the registers pushed on the stack.
- *  Description:  This function is the ISR handler of the device. It 
- *  identifies the reason for the interrupt and calls the relevant 
- *  service routines. As a contongency measure, this ISR allocates the 
+ *  Description:  This function is the ISR handler of the device. It
+ *  identifies the reason for the interrupt and calls the relevant
+ *  service routines. As a contongency measure, this ISR allocates the
   *  recv buffers, if their numbers are below the panic value which is
   *  presently set to 25% of the original number of rcv buffers allocated.
   *  Return value:
- *   IRQ_HANDLED: will be returned if IRQ was handled by this routine 
+ *   IRQ_HANDLED: will be returned if IRQ was handled by this routine
   *   IRQ_NONE: will be returned if interrupt is not from our device
   */
  static irqreturn_t s2io_isr(int irq, void *dev_id, struct pt_regs *regs)
@@ -2798,40 +3298,31 @@ static irqreturn_t s2io_isr(int irq, void *dev_id, struct pt_regs *regs)
         struct net_device *dev = (struct net_device *) dev_id;
         nic_t *sp = dev->priv;
         XENA_dev_config_t __iomem *bar0 = sp->bar0;
-#ifndef CONFIG_S2IO_NAPI
-       int i, ret;
-#endif
-       u64 reason = 0;
+       int i;
+       u64 reason = 0, val64;
         mac_info_t *mac_control;
         struct config_param *config;
  
+       atomic_inc(&sp->isr_cnt);
         mac_control = &sp->mac_control;
         config = &sp->config;
  
-       /* 
+       /*
          * Identify the cause for interrupt and call the appropriate
          * interrupt handler. Causes for the interrupt could be;
          * 1. Rx of packet.
          * 2. Tx complete.
          * 3. Link down.
-        * 4. Error in any functional blocks of the NIC. 
+        * 4. Error in any functional blocks of the NIC.
          */
         reason = readq(&bar0->general_int_status);
  
         if (!reason) {
                 /* The interrupt was not raised by Xena. */
+               atomic_dec(&sp->isr_cnt);
                 return IRQ_NONE;
         }
  
-       /* If Intr is because of Tx Traffic */
-       if (reason & GEN_INTR_TXTRAFFIC) {
-               tx_intr_handler(sp);
-       }
-
-       /* If Intr is because of an error */
-       if (reason & (GEN_ERROR_INTR))
-               alarm_intr_handler(sp);
-
  #ifdef CONFIG_S2IO_NAPI
         if (reason & GEN_INTR_RXTRAFFIC) {
                 if (netif_rx_schedule_prep(dev)) {
@@ -2843,17 +3334,43 @@ static irqreturn_t s2io_isr(int irq, void *dev_id, struct pt_regs *regs)
  #else
         /* If Intr is because of Rx Traffic */
         if (reason & GEN_INTR_RXTRAFFIC) {
-               rx_intr_handler(sp);
+               /*
+                * rx_traffic_int reg is an R1 register, writing all 1's
+                * will ensure that the actual interrupt causing bit get's
+                * cleared and hence a read can be avoided.
+                */
+               val64 = 0xFFFFFFFFFFFFFFFFULL;
+               writeq(val64, &bar0->rx_traffic_int);
+               for (i = 0; i < config->rx_ring_num; i++) {
+                       rx_intr_handler(&mac_control->rings[i]);
+               }
         }
  #endif
  
-       /* 
-        * If the Rx buffer count is below the panic threshold then 
-        * reallocate the buffers from the interrupt handler itself, 
+       /* If Intr is because of Tx Traffic */
+       if (reason & GEN_INTR_TXTRAFFIC) {
+               /*
+                * tx_traffic_int reg is an R1 register, writing all 1's
+                * will ensure that the actual interrupt causing bit get's
+                * cleared and hence a read can be avoided.
+                */
+               val64 = 0xFFFFFFFFFFFFFFFFULL;
+               writeq(val64, &bar0->tx_traffic_int);
+
+               for (i = 0; i < config->tx_fifo_num; i++)
+                       tx_intr_handler(&mac_control->fifos[i]);
+       }
+
+       if (reason & GEN_INTR_TXPIC)
+               s2io_txpic_intr_handle(sp);
+       /*
+        * If the Rx buffer count is below the panic threshold then
+        * reallocate the buffers from the interrupt handler itself,
          * else schedule a tasklet to reallocate the buffers.
          */
  #ifndef CONFIG_S2IO_NAPI
         for (i = 0; i < config->rx_ring_num; i++) {
+               int ret;
                 int rxb_size = atomic_read(&sp->rx_bufs_left[i]);
                 int level = rx_buffer_level(sp, rxb_size, i);
  
@@ -2865,6 +3382,7 @@ static irqreturn_t s2io_isr(int irq, void *dev_id, struct pt_regs *regs)
                                           dev->name);
                                 DBG_PRINT(ERR_DBG, " in ISR!!\n");
                                 clear_bit(0, (&sp->tasklet_status));
+                               atomic_dec(&sp->isr_cnt);
                                 return IRQ_HANDLED;
                         }
                         clear_bit(0, (&sp->tasklet_status));
@@ -2874,33 +3392,69 @@ static irqreturn_t s2io_isr(int irq, void *dev_id, struct pt_regs *regs)
         }
  #endif
  
+       atomic_dec(&sp->isr_cnt);
         return IRQ_HANDLED;
  }
  
  /**
- *  s2io_get_stats - Updates the device statistics structure. 
+ * s2io_updt_stats -
+ */
+static void s2io_updt_stats(nic_t *sp)
+{
+       XENA_dev_config_t __iomem *bar0 = sp->bar0;
+       u64 val64;
+       int cnt = 0;
+
+       if (atomic_read(&sp->card_state) == CARD_UP) {
+               /* Apprx 30us on a 133 MHz bus */
+               val64 = SET_UPDT_CLICKS(10) |
+                       STAT_CFG_ONE_SHOT_EN | STAT_CFG_STAT_EN;
+               writeq(val64, &bar0->stat_cfg);
+               do {
+                       udelay(100);
+                       val64 = readq(&bar0->stat_cfg);
+                       if (!(val64 & BIT(0)))
+                               break;
+                       cnt++;
+                       if (cnt == 5)
+                               break; /* Updt failed */
+               } while(1);
+       }
+}
+
+/**
+ *  s2io_get_stats - Updates the device statistics structure.
   *  @dev : pointer to the device structure.
   *  Description:
- *  This function updates the device statistics structure in the s2io_nic 
+ *  This function updates the device statistics structure in the s2io_nic
   *  structure and returns a pointer to the same.
   *  Return value:
   *  pointer to the updated net_device_stats structure.
   */
  
-static struct net_device_stats *s2io_get_stats(struct net_device *dev)
+struct net_device_stats *s2io_get_stats(struct net_device *dev)
  {
         nic_t *sp = dev->priv;
         mac_info_t *mac_control;
         struct config_param *config;
  
+
         mac_control = &sp->mac_control;
         config = &sp->config;
  
-       sp->stats.tx_errors = mac_control->stats_info->tmac_any_err_frms;
-       sp->stats.rx_errors = mac_control->stats_info->rmac_drop_frms;
-       sp->stats.multicast = mac_control->stats_info->rmac_vld_mcst_frms;
+       /* Configure Stats for immediate updt */
+       s2io_updt_stats(sp);
+
+       sp->stats.tx_packets =
+               le32_to_cpu(mac_control->stats_info->tmac_frms);
+       sp->stats.tx_errors =
+               le32_to_cpu(mac_control->stats_info->tmac_any_err_frms);
+       sp->stats.rx_errors =
+               le32_to_cpu(mac_control->stats_info->rmac_drop_frms);
+       sp->stats.multicast =
+               le32_to_cpu(mac_control->stats_info->rmac_vld_mcst_frms);
         sp->stats.rx_length_errors =
-           mac_control->stats_info->rmac_long_frms;
+               le32_to_cpu(mac_control->stats_info->rmac_long_frms);
  
         return (&sp->stats);
  }
@@ -2909,8 +3463,8 @@ static struct net_device_stats *s2io_get_stats(struct net_device *dev)
   *  s2io_set_multicast - entry point for multicast address enable/disable.
   *  @dev : pointer to the device structure
   *  Description:
- *  This function is a driver entry point which gets called by the kernel 
- *  whenever multicast addresses must be enabled/disabled. This also gets 
+ *  This function is a driver entry point which gets called by the kernel
+ *  whenever multicast addresses must be enabled/disabled. This also gets
   *  called to set/reset promiscuous mode. Depending on the deivce flag, we
   *  determine, if multicast address must be enabled or if promiscuous mode
   *  is to be disabled etc.
@@ -2948,6 +3502,8 @@ static void s2io_set_multicast(struct net_device *dev)
                 /*  Disable all Multicast addresses */
                 writeq(RMAC_ADDR_DATA0_MEM_ADDR(dis_addr),
                        &bar0->rmac_addr_data0_mem);
+               writeq(RMAC_ADDR_DATA1_MEM_MASK(0x0),
+                      &bar0->rmac_addr_data1_mem);
                 val64 = RMAC_ADDR_CMD_MEM_WE |
                     RMAC_ADDR_CMD_MEM_STROBE_NEW_CMD |
                     RMAC_ADDR_CMD_MEM_OFFSET(sp->all_multi_pos);
@@ -3010,7 +3566,7 @@ static void s2io_set_multicast(struct net_device *dev)
                         writeq(RMAC_ADDR_DATA0_MEM_ADDR(dis_addr),
                                &bar0->rmac_addr_data0_mem);
                         writeq(RMAC_ADDR_DATA1_MEM_MASK(0ULL),
-                               &bar0->rmac_addr_data1_mem);
+                               &bar0->rmac_addr_data1_mem);
                         val64 = RMAC_ADDR_CMD_MEM_WE |
                             RMAC_ADDR_CMD_MEM_STROBE_NEW_CMD |
                             RMAC_ADDR_CMD_MEM_OFFSET
@@ -3039,8 +3595,7 @@ static void s2io_set_multicast(struct net_device *dev)
                         writeq(RMAC_ADDR_DATA0_MEM_ADDR(mac_addr),
                                &bar0->rmac_addr_data0_mem);
                         writeq(RMAC_ADDR_DATA1_MEM_MASK(0ULL),
-                               &bar0->rmac_addr_data1_mem);
-
+                               &bar0->rmac_addr_data1_mem);
                         val64 = RMAC_ADDR_CMD_MEM_WE |
                             RMAC_ADDR_CMD_MEM_STROBE_NEW_CMD |
                             RMAC_ADDR_CMD_MEM_OFFSET
@@ -3059,12 +3614,12 @@ static void s2io_set_multicast(struct net_device *dev)
  }
  
  /**
- *  s2io_set_mac_addr - Programs the Xframe mac address 
+ *  s2io_set_mac_addr - Programs the Xframe mac address
   *  @dev : pointer to the device structure.
   *  @addr: a uchar pointer to the new mac address which is to be set.
- *  Description : This procedure will program the Xframe to receive 
+ *  Description : This procedure will program the Xframe to receive
   *  frames with new Mac Address
- *  Return value: SUCCESS on success and an appropriate (-)ve integer 
+ *  Return value: SUCCESS on success and an appropriate (-)ve integer
   *  as defined in errno.h file on failure.
   */
  
@@ -3075,10 +3630,10 @@ int s2io_set_mac_addr(struct net_device *dev, u8 * addr)
         register u64 val64, mac_addr = 0;
         int i;
  
-       /* 
+       /*
          * Set the new MAC address as the new unicast filter and reflect this
          * change on the device address registered with the OS. It will be
-        * at offset 0. 
+        * at offset 0.
          */
         for (i = 0; i < ETH_ALEN; i++) {
                 mac_addr <<= 8;
@@ -3102,12 +3657,12 @@ int s2io_set_mac_addr(struct net_device *dev, u8 * addr)
  }
  
  /**
- * s2io_ethtool_sset - Sets different link parameters. 
+ * s2io_ethtool_sset - Sets different link parameters.
   * @sp : private member of the device structure, which is a pointer to the  * s2io_nic structure.
   * @info: pointer to the structure with parameters given by ethtool to set
   * link information.
   * Description:
- * The function sets different link parameters provided by the user onto 
+ * The function sets different link parameters provided by the user onto
   * the NIC.
   * Return value:
   * 0 on success.
@@ -3129,7 +3684,7 @@ static int s2io_ethtool_sset(struct net_device *dev,
  }
  
  /**
- * s2io_ethtol_gset - Return link specific information. 
+ * s2io_ethtol_gset - Return link specific information.
   * @sp : private member of the device structure, pointer to the
   *      s2io_nic structure.
   * @info : pointer to the structure with parameters given by ethtool
@@ -3161,8 +3716,8 @@ static int s2io_ethtool_gset(struct net_device *dev, struct ethtool_cmd *info)
  }
  
  /**
- * s2io_ethtool_gdrvinfo - Returns driver specific information. 
- * @sp : private member of the device structure, which is a pointer to the 
+ * s2io_ethtool_gdrvinfo - Returns driver specific information.
+ * @sp : private member of the device structure, which is a pointer to the
   * s2io_nic structure.
   * @info : pointer to the structure with parameters given by ethtool to
   * return driver information.
@@ -3190,9 +3745,9 @@ static void s2io_ethtool_gdrvinfo(struct net_device *dev,
  
  /**
   *  s2io_ethtool_gregs - dumps the entire space of Xfame into the buffer.
- *  @sp: private member of the device structure, which is a pointer to the 
+ *  @sp: private member of the device structure, which is a pointer to the
   *  s2io_nic structure.
- *  @regs : pointer to the structure with parameters given by ethtool for 
+ *  @regs : pointer to the structure with parameters given by ethtool for
   *  dumping the registers.
   *  @reg_space: The input argumnet into which all the registers are dumped.
   *  Description:
@@ -3221,11 +3776,11 @@ static void s2io_ethtool_gregs(struct net_device *dev,
  
  /**
   *  s2io_phy_id  - timer function that alternates adapter LED.
- *  @data : address of the private member of the device structure, which 
+ *  @data : address of the private member of the device structure, which
   *  is a pointer to the s2io_nic structure, provided as an u32.
- * Description: This is actually the timer function that alternates the 
- * adapter LED bit of the adapter control bit to set/reset every time on 
- * invocation. The timer is set for 1/2 a second, hence tha NIC blinks 
+ * Description: This is actually the timer function that alternates the
+ * adapter LED bit of the adapter control bit to set/reset every time on
+ * invocation. The timer is set for 1/2 a second, hence tha NIC blinks
   *  once every second.
  */
  static void s2io_phy_id(unsigned long data)
@@ -3236,7 +3791,8 @@ static void s2io_phy_id(unsigned long data)
         u16 subid;
  
         subid = sp->pdev->subsystem_device;
-       if ((subid & 0xFF) >= 0x07) {
+       if ((sp->device_type == XFRAME_II_DEVICE) ||
+                  ((subid & 0xFF) >= 0x07)) {
                 val64 = readq(&bar0->gpio_control);
                 val64 ^= GPIO_CTRL_GPIO_0;
                 writeq(val64, &bar0->gpio_control);
@@ -3253,12 +3809,12 @@ static void s2io_phy_id(unsigned long data)
   * s2io_ethtool_idnic - To physically identify the nic on the system.
   * @sp : private member of the device structure, which is a pointer to the
   * s2io_nic structure.
- * @id : pointer to the structure with identification parameters given by 
+ * @id : pointer to the structure with identification parameters given by
   * ethtool.
   * Description: Used to physically identify the NIC on the system.
- * The Link LED will blink for a time specified by the user for 
+ * The Link LED will blink for a time specified by the user for
   * identification.
- * NOTE: The Link has to be Up to be able to blink the LED. Hence 
+ * NOTE: The Link has to be Up to be able to blink the LED. Hence
   * identification is possible only if it's link is up.
   * Return value:
   * int , returns 0 on success
@@ -3273,7 +3829,8 @@ static int s2io_ethtool_idnic(struct net_device *dev, u32 data)
  
         subid = sp->pdev->subsystem_device;
         last_gpio_ctrl_val = readq(&bar0->gpio_control);
-       if ((subid & 0xFF) < 0x07) {
+       if ((sp->device_type == XFRAME_I_DEVICE) &&
+               ((subid & 0xFF) < 0x07)) {
                 val64 = readq(&bar0->adapter_control);
                 if (!(val64 & ADAPTER_CNTL_EN)) {
                         printk(KERN_ERR
@@ -3288,12 +3845,12 @@ static int s2io_ethtool_idnic(struct net_device *dev, u32 data)
         }
         mod_timer(&sp->id_timer, jiffies);
         if (data)
-               msleep(data * 1000);
+               msleep_interruptible(data * HZ);
         else
-               msleep(0xFFFFFFFF);
+               msleep_interruptible(MAX_FLICKER_TIME);
         del_timer_sync(&sp->id_timer);
  
-       if (CARDS_WITH_FAULTY_LINK_INDICATORS(subid)) {
+       if (CARDS_WITH_FAULTY_LINK_INDICATORS(sp->device_type, subid)) {
                 writeq(last_gpio_ctrl_val, &bar0->gpio_control);
                 last_gpio_ctrl_val = readq(&bar0->gpio_control);
         }
@@ -3303,7 +3860,8 @@ static int s2io_ethtool_idnic(struct net_device *dev, u32 data)
  
  /**
   * s2io_ethtool_getpause_data -Pause frame frame generation and reception.
- * @sp : private member of the device structure, which is a pointer to the  * s2io_nic structure.
+ * @sp : private member of the device structure, which is a pointer to the
+ *     s2io_nic structure.
   * @ep : pointer to the structure with pause parameters given by ethtool.
   * Description:
   * Returns the Pause frame generation and reception capability of the NIC.
@@ -3327,7 +3885,7 @@ static void s2io_ethtool_getpause_data(struct net_device *dev,
  
  /**
   * s2io_ethtool_setpause_data -  set/reset pause frame generation.
- * @sp : private member of the device structure, which is a pointer to the 
+ * @sp : private member of the device structure, which is a pointer to the
   *      s2io_nic structure.
   * @ep : pointer to the structure with pause parameters given by ethtool.
   * Description:
@@ -3338,7 +3896,7 @@ static void s2io_ethtool_getpause_data(struct net_device *dev,
   */
  
  static int s2io_ethtool_setpause_data(struct net_device *dev,
-                                     struct ethtool_pauseparam *ep)
+                              struct ethtool_pauseparam *ep)
  {
         u64 val64;
         nic_t *sp = dev->priv;
@@ -3359,13 +3917,13 @@ static int s2io_ethtool_setpause_data(struct net_device *dev,
  
  /**
   * read_eeprom - reads 4 bytes of data from user given offset.
- * @sp : private member of the device structure, which is a pointer to the 
+ * @sp : private member of the device structure, which is a pointer to the
   *      s2io_nic structure.
   * @off : offset at which the data must be written
   * @data : Its an output parameter where the data read at the given
- *     offset is stored.
+ *     offset is stored.
   * Description:
- * Will read 4 bytes of data from the user given offset and return the 
+ * Will read 4 bytes of data from the user given offset and return the
   * read data.
   * NOTE: Will allow to read only part of the EEPROM visible through the
   *   I2C bus.
@@ -3406,7 +3964,7 @@ static int read_eeprom(nic_t * sp, int off, u32 * data)
   *       s2io_nic structure.
   *  @off : offset at which the data must be written
   *  @data : The data that is to be written
- *  @cnt : Number of bytes of the data that are actually to be written into 
+ *  @cnt : Number of bytes of the data that are actually to be written into
   *  the Eeprom. (max of 3)
   * Description:
   *  Actually writes the relevant part of the data value into the Eeprom
@@ -3443,7 +4001,7 @@ static int write_eeprom(nic_t * sp, int off, u32 data, int cnt)
  /**
   *  s2io_ethtool_geeprom  - reads the value stored in the Eeprom.
   *  @sp : private member of the device structure, which is a pointer to the *       s2io_nic structure.
- *  @eeprom : pointer to the user level structure provided by ethtool, 
+ *  @eeprom : pointer to the user level structure provided by ethtool,
   *  containing all relevant information.
   *  @data_buf : user defined value to be written into Eeprom.
   *  Description: Reads the values stored in the Eeprom at given offset
@@ -3454,7 +4012,7 @@ static int write_eeprom(nic_t * sp, int off, u32 data, int cnt)
   */
  
  static int s2io_ethtool_geeprom(struct net_device *dev,
-                               struct ethtool_eeprom *eeprom, u8 * data_buf)
+                        struct ethtool_eeprom *eeprom, u8 * data_buf)
  {
         u32 data, i, valid;
         nic_t *sp = dev->priv;
@@ -3479,7 +4037,7 @@ static int s2io_ethtool_geeprom(struct net_device *dev,
   *  s2io_ethtool_seeprom - tries to write the user provided value in Eeprom
   *  @sp : private member of the device structure, which is a pointer to the
   *  s2io_nic structure.
- *  @eeprom : pointer to the user level structure provided by ethtool, 
+ *  @eeprom : pointer to the user level structure provided by ethtool,
   *  containing all relevant information.
   *  @data_buf ; user defined value to be written into Eeprom.
   *  Description:
@@ -3527,8 +4085,8 @@ static int s2io_ethtool_seeprom(struct net_device *dev,
  }
  
  /**
- * s2io_register_test - reads and writes into all clock domains. 
- * @sp : private member of the device structure, which is a pointer to the 
+ * s2io_register_test - reads and writes into all clock domains.
+ * @sp : private member of the device structure, which is a pointer to the
   * s2io_nic structure.
   * @data : variable that returns the result of each of the test conducted b
   * by the driver.
@@ -3545,8 +4103,8 @@ static int s2io_register_test(nic_t * sp, uint64_t * data)
         u64 val64 = 0;
         int fail = 0;
  
-       val64 = readq(&bar0->pcc_enable);
-       if (val64 != 0xff00000000000000ULL) {
+       val64 = readq(&bar0->pif_rd_swapper_fb);
+       if (val64 != 0x123456789abcdefULL) {
                 fail = 1;
                 DBG_PRINT(INFO_DBG, "Read Test level 1 fails\n");
         }
@@ -3590,13 +4148,13 @@ static int s2io_register_test(nic_t * sp, uint64_t * data)
  }
  
  /**
- * s2io_eeprom_test - to verify that EEprom in the xena can be programmed. 
+ * s2io_eeprom_test - to verify that EEprom in the xena can be programmed.
   * @sp : private member of the device structure, which is a pointer to the
   * s2io_nic structure.
   * @data:variable that returns the result of each of the test conducted by
   * the driver.
   * Description:
- * Verify that EEPROM in the xena can be programmed using I2C_CONTROL 
+ * Verify that EEPROM in the xena can be programmed using I2C_CONTROL
   * register.
   * Return value:
   * 0 on success.
@@ -3661,14 +4219,14 @@ static int s2io_eeprom_test(nic_t * sp, uint64_t * data)
  
  /**
   * s2io_bist_test - invokes the MemBist test of the card .
- * @sp : private member of the device structure, which is a pointer to the 
+ * @sp : private member of the device structure, which is a pointer to the
   * s2io_nic structure.
- * @data:variable that returns the result of each of the test conducted by 
+ * @data:variable that returns the result of each of the test conducted by
   * the driver.
   * Description:
   * This invokes the MemBist test of the card. We give around
   * 2 secs time for the Test to complete. If it's still not complete
- * within this peiod, we consider that the test failed. 
+ * within this peiod, we consider that the test failed.
   * Return value:
   * 0 on success and -1 on failure.
   */
@@ -3697,13 +4255,13 @@ static int s2io_bist_test(nic_t * sp, uint64_t * data)
  }
  
  /**
- * s2io-link_test - verifies the link state of the nic  
- * @sp ; private member of the device structure, which is a pointer to the 
+ * s2io-link_test - verifies the link state of the nic
+ * @sp ; private member of the device structure, which is a pointer to the
   * s2io_nic structure.
   * @data: variable that returns the result of each of the test conducted by
   * the driver.
   * Description:
- * The function verifies the link state of the NIC and updates the input 
+ * The function verifies the link state of the NIC and updates the input
   * argument 'data' appropriately.
   * Return value:
   * 0 on success.
@@ -3722,13 +4280,13 @@ static int s2io_link_test(nic_t * sp, uint64_t * data)
  }
  
  /**
- * s2io_rldram_test - offline test for access to the RldRam chip on the NIC 
- * @sp - private member of the device structure, which is a pointer to the  
+ * s2io_rldram_test - offline test for access to the RldRam chip on the NIC
+ * @sp - private member of the device structure, which is a pointer to the
   * s2io_nic structure.
- * @data - variable that returns the result of each of the test 
+ * @data - variable that returns the result of each of the test
   * conducted by the driver.
   * Description:
- *  This is one of the offline test that tests the read and write 
+ *  This is one of the offline test that tests the read and write
   *  access to the RldRam chip on the NIC.
   * Return value:
   *  0 on success.
@@ -3833,7 +4391,7 @@ static int s2io_rldram_test(nic_t * sp, uint64_t * data)
   *  s2io_nic structure.
   *  @ethtest : pointer to a ethtool command specific structure that will be
   *  returned to the user.
- *  @data : variable that returns the result of each of the test 
+ *  @data : variable that returns the result of each of the test
   * conducted by the driver.
   * Description:
   *  This function conducts 6 tests ( 4 offline and 2 online) to determine
@@ -3851,23 +4409,18 @@ static void s2io_ethtool_test(struct net_device *dev,
  
         if (ethtest->flags == ETH_TEST_FL_OFFLINE) {
                 /* Offline Tests. */
-               if (orig_state) {
+               if (orig_state)
                         s2io_close(sp->dev);
-                       s2io_set_swapper(sp);
-               } else
-                       s2io_set_swapper(sp);
  
                 if (s2io_register_test(sp, &data[0]))
                         ethtest->flags |= ETH_TEST_FL_FAILED;
  
                 s2io_reset(sp);
-               s2io_set_swapper(sp);
  
                 if (s2io_rldram_test(sp, &data[3]))
                         ethtest->flags |= ETH_TEST_FL_FAILED;
  
                 s2io_reset(sp);
-               s2io_set_swapper(sp);
  
                 if (s2io_eeprom_test(sp, &data[1]))
                         ethtest->flags |= ETH_TEST_FL_FAILED;
@@ -3910,61 +4463,111 @@ static void s2io_get_ethtool_stats(struct net_device *dev,
         nic_t *sp = dev->priv;
         StatInfo_t *stat_info = sp->mac_control.stats_info;
  
-       tmp_stats[i++] = le32_to_cpu(stat_info->tmac_frms);
-       tmp_stats[i++] = le32_to_cpu(stat_info->tmac_data_octets);
+       s2io_updt_stats(sp);
+       tmp_stats[i++] =
+               (u64)le32_to_cpu(stat_info->tmac_frms_oflow) << 32  |
+               le32_to_cpu(stat_info->tmac_frms);
+       tmp_stats[i++] =
+               (u64)le32_to_cpu(stat_info->tmac_data_octets_oflow) << 32 |
+               le32_to_cpu(stat_info->tmac_data_octets);
         tmp_stats[i++] = le64_to_cpu(stat_info->tmac_drop_frms);
-       tmp_stats[i++] = le32_to_cpu(stat_info->tmac_mcst_frms);
-       tmp_stats[i++] = le32_to_cpu(stat_info->tmac_bcst_frms);
+       tmp_stats[i++] =
+               (u64)le32_to_cpu(stat_info->tmac_mcst_frms_oflow) << 32 |
+               le32_to_cpu(stat_info->tmac_mcst_frms);
+       tmp_stats[i++] =
+               (u64)le32_to_cpu(stat_info->tmac_bcst_frms_oflow) << 32 |
+               le32_to_cpu(stat_info->tmac_bcst_frms);
         tmp_stats[i++] = le64_to_cpu(stat_info->tmac_pause_ctrl_frms);
-       tmp_stats[i++] = le32_to_cpu(stat_info->tmac_any_err_frms);
+       tmp_stats[i++] =
+               (u64)le32_to_cpu(stat_info->tmac_any_err_frms_oflow) << 32 |
+               le32_to_cpu(stat_info->tmac_any_err_frms);
         tmp_stats[i++] = le64_to_cpu(stat_info->tmac_vld_ip_octets);
-       tmp_stats[i++] = le32_to_cpu(stat_info->tmac_vld_ip);
-       tmp_stats[i++] = le32_to_cpu(stat_info->tmac_drop_ip);
-       tmp_stats[i++] = le32_to_cpu(stat_info->tmac_icmp);
-       tmp_stats[i++] = le32_to_cpu(stat_info->tmac_rst_tcp);
+       tmp_stats[i++] =
+               (u64)le32_to_cpu(stat_info->tmac_vld_ip_oflow) << 32 |
+               le32_to_cpu(stat_info->tmac_vld_ip);
+       tmp_stats[i++] =
+               (u64)le32_to_cpu(stat_info->tmac_drop_ip_oflow) << 32 |
+               le32_to_cpu(stat_info->tmac_drop_ip);
+       tmp_stats[i++] =
+               (u64)le32_to_cpu(stat_info->tmac_icmp_oflow) << 32 |
+               le32_to_cpu(stat_info->tmac_icmp);
+       tmp_stats[i++] =
+               (u64)le32_to_cpu(stat_info->tmac_rst_tcp_oflow) << 32 |
+               le32_to_cpu(stat_info->tmac_rst_tcp);
         tmp_stats[i++] = le64_to_cpu(stat_info->tmac_tcp);
-       tmp_stats[i++] = le32_to_cpu(stat_info->tmac_udp);
-       tmp_stats[i++] = le32_to_cpu(stat_info->rmac_vld_frms);
-       tmp_stats[i++] = le32_to_cpu(stat_info->rmac_data_octets);
+       tmp_stats[i++] = (u64)le32_to_cpu(stat_info->tmac_udp_oflow) << 32 |
+               le32_to_cpu(stat_info->tmac_udp);
+       tmp_stats[i++] =
+               (u64)le32_to_cpu(stat_info->rmac_vld_frms_oflow) << 32 |
+               le32_to_cpu(stat_info->rmac_vld_frms);
+       tmp_stats[i++] =
+               (u64)le32_to_cpu(stat_info->rmac_data_octets_oflow) << 32 |
+               le32_to_cpu(stat_info->rmac_data_octets);
         tmp_stats[i++] = le64_to_cpu(stat_info->rmac_fcs_err_frms);
         tmp_stats[i++] = le64_to_cpu(stat_info->rmac_drop_frms);
-       tmp_stats[i++] = le32_to_cpu(stat_info->rmac_vld_mcst_frms);
-       tmp_stats[i++] = le32_to_cpu(stat_info->rmac_vld_bcst_frms);
+       tmp_stats[i++] =
+               (u64)le32_to_cpu(stat_info->rmac_vld_mcst_frms_oflow) << 32 |
+               le32_to_cpu(stat_info->rmac_vld_mcst_frms);
+       tmp_stats[i++] =
+               (u64)le32_to_cpu(stat_info->rmac_vld_bcst_frms_oflow) << 32 |
+               le32_to_cpu(stat_info->rmac_vld_bcst_frms);
         tmp_stats[i++] = le32_to_cpu(stat_info->rmac_in_rng_len_err_frms);
         tmp_stats[i++] = le64_to_cpu(stat_info->rmac_long_frms);
         tmp_stats[i++] = le64_to_cpu(stat_info->rmac_pause_ctrl_frms);
-       tmp_stats[i++] = le32_to_cpu(stat_info->rmac_discarded_frms);
-       tmp_stats[i++] = le32_to_cpu(stat_info->rmac_usized_frms);
-       tmp_stats[i++] = le32_to_cpu(stat_info->rmac_osized_frms);
-       tmp_stats[i++] = le32_to_cpu(stat_info->rmac_frag_frms);
-       tmp_stats[i++] = le32_to_cpu(stat_info->rmac_jabber_frms);
-       tmp_stats[i++] = le32_to_cpu(stat_info->rmac_ip);
+       tmp_stats[i++] =
+               (u64)le32_to_cpu(stat_info->rmac_discarded_frms_oflow) << 32 |
+               le32_to_cpu(stat_info->rmac_discarded_frms);
+       tmp_stats[i++] =
+               (u64)le32_to_cpu(stat_info->rmac_usized_frms_oflow) << 32 |
+               le32_to_cpu(stat_info->rmac_usized_frms);
+       tmp_stats[i++] =
+               (u64)le32_to_cpu(stat_info->rmac_osized_frms_oflow) << 32 |
+               le32_to_cpu(stat_info->rmac_osized_frms);
+       tmp_stats[i++] =
+               (u64)le32_to_cpu(stat_info->rmac_frag_frms_oflow) << 32 |
+               le32_to_cpu(stat_info->rmac_frag_frms);
+       tmp_stats[i++] =
+               (u64)le32_to_cpu(stat_info->rmac_jabber_frms_oflow) << 32 |
+               le32_to_cpu(stat_info->rmac_jabber_frms);
+       tmp_stats[i++] = (u64)le32_to_cpu(stat_info->rmac_ip_oflow) << 32 |
+               le32_to_cpu(stat_info->rmac_ip);
         tmp_stats[i++] = le64_to_cpu(stat_info->rmac_ip_octets);
         tmp_stats[i++] = le32_to_cpu(stat_info->rmac_hdr_err_ip);
-       tmp_stats[i++] = le32_to_cpu(stat_info->rmac_drop_ip);
-       tmp_stats[i++] = le32_to_cpu(stat_info->rmac_icmp);
+       tmp_stats[i++] = (u64)le32_to_cpu(stat_info->rmac_drop_ip_oflow) << 32 |
+               le32_to_cpu(stat_info->rmac_drop_ip);
+       tmp_stats[i++] = (u64)le32_to_cpu(stat_info->rmac_icmp_oflow) << 32 |
+               le32_to_cpu(stat_info->rmac_icmp);
         tmp_stats[i++] = le64_to_cpu(stat_info->rmac_tcp);
-       tmp_stats[i++] = le32_to_cpu(stat_info->rmac_udp);
-       tmp_stats[i++] = le32_to_cpu(stat_info->rmac_err_drp_udp);
-       tmp_stats[i++] = le32_to_cpu(stat_info->rmac_pause_cnt);
-       tmp_stats[i++] = le32_to_cpu(stat_info->rmac_accepted_ip);
+       tmp_stats[i++] = (u64)le32_to_cpu(stat_info->rmac_udp_oflow) << 32 |
+               le32_to_cpu(stat_info->rmac_udp);
+       tmp_stats[i++] =
+               (u64)le32_to_cpu(stat_info->rmac_err_drp_udp_oflow) << 32 |
+               le32_to_cpu(stat_info->rmac_err_drp_udp);
+       tmp_stats[i++] =
+               (u64)le32_to_cpu(stat_info->rmac_pause_cnt_oflow) << 32 |
+               le32_to_cpu(stat_info->rmac_pause_cnt);
+       tmp_stats[i++] =
+               (u64)le32_to_cpu(stat_info->rmac_accepted_ip_oflow) << 32 |
+               le32_to_cpu(stat_info->rmac_accepted_ip);
         tmp_stats[i++] = le32_to_cpu(stat_info->rmac_err_tcp);
+       tmp_stats[i++] = 0;
+       tmp_stats[i++] = stat_info->sw_stat.single_ecc_errs;
+       tmp_stats[i++] = stat_info->sw_stat.double_ecc_errs;
  }
  
-static int s2io_ethtool_get_regs_len(struct net_device *dev)
+int s2io_ethtool_get_regs_len(struct net_device *dev)
  {
         return (XENA_REG_SPACE);
  }
  
  
-static u32 s2io_ethtool_get_rx_csum(struct net_device * dev)
+u32 s2io_ethtool_get_rx_csum(struct net_device * dev)
  {
         nic_t *sp = dev->priv;
  
         return (sp->rx_csum);
  }
-
-static int s2io_ethtool_set_rx_csum(struct net_device *dev, u32 data)
+int s2io_ethtool_set_rx_csum(struct net_device *dev, u32 data)
  {
         nic_t *sp = dev->priv;
  
@@ -3975,19 +4578,17 @@ static int s2io_ethtool_set_rx_csum(struct net_device *dev, u32 data)
  
         return 0;
  }
-
-static int s2io_get_eeprom_len(struct net_device *dev)
+int s2io_get_eeprom_len(struct net_device *dev)
  {
         return (XENA_EEPROM_SPACE);
  }
  
-static int s2io_ethtool_self_test_count(struct net_device *dev)
+int s2io_ethtool_self_test_count(struct net_device *dev)
  {
         return (S2IO_TEST_LEN);
  }
-
-static void s2io_ethtool_get_strings(struct net_device *dev,
-                                    u32 stringset, u8 * data)
+void s2io_ethtool_get_strings(struct net_device *dev,
+                             u32 stringset, u8 * data)
  {
         switch (stringset) {
         case ETH_SS_TEST:
@@ -3998,13 +4599,12 @@ static void s2io_ethtool_get_strings(struct net_device *dev,
                        sizeof(ethtool_stats_keys));
         }
  }
-
  static int s2io_ethtool_get_stats_count(struct net_device *dev)
  {
         return (S2IO_STAT_LEN);
  }
  
-static int s2io_ethtool_op_set_tx_csum(struct net_device *dev, u32 data)
+int s2io_ethtool_op_set_tx_csum(struct net_device *dev, u32 data)
  {
         if (data)
                 dev->features |= NETIF_F_IP_CSUM;
@@ -4046,21 +4646,18 @@ static struct ethtool_ops netdev_ethtool_ops = {
  };
  
  /**
- *  s2io_ioctl - Entry point for the Ioctl 
+ *  s2io_ioctl - Entry point for the Ioctl
   *  @dev :  Device pointer.
   *  @ifr :  An IOCTL specefic structure, that can contain a pointer to
   *  a proprietary structure used to pass information to the driver.
   *  @cmd :  This is used to distinguish between the different commands that
   *  can be passed to the IOCTL functions.
   *  Description:
- *  This function has support for ethtool, adding multiple MAC addresses on 
- *  the NIC and some DBG commands for the util tool.
- *  Return value:
- *  Currently the IOCTL supports no operations, hence by default this
- *  function returns OP NOT SUPPORTED value.
+ *  Currently there are no special functionality supported in IOCTL, hence
+ *  function always return EOPNOTSUPPORTED
   */
  
-static int s2io_ioctl(struct net_device *dev, struct ifreq *rq, int cmd)
+int s2io_ioctl(struct net_device *dev, struct ifreq *rq, int cmd)
  {
         return -EOPNOTSUPP;
  }
@@ -4076,17 +4673,9 @@ static int s2io_ioctl(struct net_device *dev, struct ifreq *rq, int cmd)
   *   file on failure.
   */
  
-static int s2io_change_mtu(struct net_device *dev, int new_mtu)
+int s2io_change_mtu(struct net_device *dev, int new_mtu)
  {
         nic_t *sp = dev->priv;
-       XENA_dev_config_t __iomem *bar0 = sp->bar0;
-       register u64 val64;
-
-       if (netif_running(dev)) {
-               DBG_PRINT(ERR_DBG, "%s: Must be stopped to ", dev->name);
-               DBG_PRINT(ERR_DBG, "change its MTU \n");
-               return -EBUSY;
-       }
  
         if ((new_mtu < MIN_MTU) || (new_mtu > S2IO_JUMBO_SIZE)) {
                 DBG_PRINT(ERR_DBG, "%s: MTU size is invalid.\n",
@@ -4094,11 +4683,22 @@ static int s2io_change_mtu(struct net_device *dev, int new_mtu)
                 return -EPERM;
         }
  
-       /* Set the new MTU into the PYLD register of the NIC */
-       val64 = new_mtu;
-       writeq(vBIT(val64, 2, 14), &bar0->rmac_max_pyld_len);
-
         dev->mtu = new_mtu;
+       if (netif_running(dev)) {
+               s2io_card_down(sp);
+               netif_stop_queue(dev);
+               if (s2io_card_up(sp)) {
+                       DBG_PRINT(ERR_DBG, "%s: Device bring up failed\n",
+                                 __FUNCTION__);
+               }
+               if (netif_queue_stopped(dev))
+                       netif_wake_queue(dev);
+       } else { /* Device is down */
+               XENA_dev_config_t __iomem *bar0 = sp->bar0;
+               u64 val64 = new_mtu;
+
+               writeq(vBIT(val64, 2, 14), &bar0->rmac_max_pyld_len);
+       }
  
         return 0;
  }
@@ -4108,9 +4708,9 @@ static int s2io_change_mtu(struct net_device *dev, int new_mtu)
   *  @dev_adr : address of the device structure in dma_addr_t format.
   *  Description:
   *  This is the tasklet or the bottom half of the ISR. This is
- *  an extension of the ISR which is scheduled by the scheduler to be run 
+ *  an extension of the ISR which is scheduled by the scheduler to be run
   *  when the load on the CPU is low. All low priority tasks of the ISR can
- *  be pushed into the tasklet. For now the tasklet is used only to 
+ *  be pushed into the tasklet. For now the tasklet is used only to
   *  replenish the Rx buffers in the Rx buffer descriptors.
   *  Return value:
   *  void.
@@ -4166,19 +4766,22 @@ static void s2io_set_link(unsigned long data)
         }
  
         subid = nic->pdev->subsystem_device;
-       /* 
-        * Allow a small delay for the NICs self initiated 
-        * cleanup to complete.
-        */
-       msleep(100);
+       if (s2io_link_fault_indication(nic) == MAC_RMAC_ERR_TIMER) {
+               /*
+                * Allow a small delay for the NICs self initiated
+                * cleanup to complete.
+                */
+               msleep(100);
+       }
  
         val64 = readq(&bar0->adapter_status);
-       if (verify_xena_quiescence(val64, nic->device_enabled_once)) {
+       if (verify_xena_quiescence(nic, val64, nic->device_enabled_once)) {
                 if (LINK_IS_UP(val64)) {
                         val64 = readq(&bar0->adapter_control);
                         val64 |= ADAPTER_CNTL_EN;
                         writeq(val64, &bar0->adapter_control);
-                       if (CARDS_WITH_FAULTY_LINK_INDICATORS(subid)) {
+                       if (CARDS_WITH_FAULTY_LINK_INDICATORS(nic->device_type,
+                                                            subid)) {
                                 val64 = readq(&bar0->gpio_control);
                                 val64 |= GPIO_CTRL_GPIO_0;
                                 writeq(val64, &bar0->gpio_control);
@@ -4187,20 +4790,24 @@ static void s2io_set_link(unsigned long data)
                                 val64 |= ADAPTER_LED_ON;
                                 writeq(val64, &bar0->adapter_control);
                         }
-                       val64 = readq(&bar0->adapter_status);
-                       if (!LINK_IS_UP(val64)) {
-                               DBG_PRINT(ERR_DBG, "%s:", dev->name);
-                               DBG_PRINT(ERR_DBG, " Link down");
-                               DBG_PRINT(ERR_DBG, "after ");
-                               DBG_PRINT(ERR_DBG, "enabling ");
-                               DBG_PRINT(ERR_DBG, "device \n");
+                       if (s2io_link_fault_indication(nic) ==
+                                               MAC_RMAC_ERR_TIMER) {
+                               val64 = readq(&bar0->adapter_status);
+                               if (!LINK_IS_UP(val64)) {
+                                       DBG_PRINT(ERR_DBG, "%s:", dev->name);
+                                       DBG_PRINT(ERR_DBG, " Link down");
+                                       DBG_PRINT(ERR_DBG, "after ");
+                                       DBG_PRINT(ERR_DBG, "enabling ");
+                                       DBG_PRINT(ERR_DBG, "device \n");
+                               }
                         }
                         if (nic->device_enabled_once == FALSE) {
                                 nic->device_enabled_once = TRUE;
                         }
                         s2io_link(nic, LINK_UP);
                 } else {
-                       if (CARDS_WITH_FAULTY_LINK_INDICATORS(subid)) {
+                       if (CARDS_WITH_FAULTY_LINK_INDICATORS(nic->device_type,
+                                                             subid)) {
                                 val64 = readq(&bar0->gpio_control);
                                 val64 &= ~GPIO_CTRL_GPIO_0;
                                 writeq(val64, &bar0->gpio_control);
@@ -4223,9 +4830,11 @@ static void s2io_card_down(nic_t * sp)
         unsigned long flags;
         register u64 val64 = 0;
  
+       del_timer_sync(&sp->alarm_timer);
         /* If s2io_set_link task is executing, wait till it completes. */
-       while (test_and_set_bit(0, &(sp->link_state)))
+       while (test_and_set_bit(0, &(sp->link_state))) {
                 msleep(50);
+       }
         atomic_set(&sp->card_state, CARD_DOWN);
  
         /* disable Tx and Rx traffic on the NIC */
@@ -4237,7 +4846,7 @@ static void s2io_card_down(nic_t * sp)
         /* Check if the device is Quiescent and then Reset the NIC */
         do {
                 val64 = readq(&bar0->adapter_status);
-               if (verify_xena_quiescence(val64, sp->device_enabled_once)) {
+               if (verify_xena_quiescence(sp, val64, sp->device_enabled_once)) {
                         break;
                 }
  
@@ -4251,14 +4860,27 @@ static void s2io_card_down(nic_t * sp)
                         break;
                 }
         } while (1);
-       spin_lock_irqsave(&sp->tx_lock, flags);
         s2io_reset(sp);
  
-       /* Free all unused Tx and Rx buffers */
+       /* Waiting till all Interrupt handlers are complete */
+       cnt = 0;
+       do {
+               msleep(10);
+               if (!atomic_read(&sp->isr_cnt))
+                       break;
+               cnt++;
+       } while(cnt < 5);
+
+       spin_lock_irqsave(&sp->tx_lock, flags);
+       /* Free all Tx buffers */
         free_tx_buffers(sp);
+       spin_unlock_irqrestore(&sp->tx_lock, flags);
+
+       /* Free all Rx buffers */
+       spin_lock_irqsave(&sp->rx_lock, flags);
         free_rx_buffers(sp);
+       spin_unlock_irqrestore(&sp->rx_lock, flags);
  
-       spin_unlock_irqrestore(&sp->tx_lock, flags);
         clear_bit(0, &(sp->link_state));
  }
  
@@ -4276,8 +4898,8 @@ static int s2io_card_up(nic_t * sp)
                 return -ENODEV;
         }
  
-       /* 
-        * Initializing the Rx buffers. For now we are considering only 1 
+       /*
+        * Initializing the Rx buffers. For now we are considering only 1
          * Rx ring and initializing buffers into 30 Rx blocks
          */
         mac_control = &sp->mac_control;
@@ -4311,16 +4933,18 @@ static int s2io_card_up(nic_t * sp)
                 return -ENODEV;
         }
  
+       S2IO_TIMER_CONF(sp->alarm_timer, s2io_alarm_handle, sp, (HZ/2));
+
         atomic_set(&sp->card_state, CARD_UP);
         return 0;
  }
  
-/** 
+/**
   * s2io_restart_nic - Resets the NIC.
   * @data : long pointer to the device private structure
   * Description:
   * This function is scheduled to be run by the s2io_tx_watchdog
- * function after 0.5 secs to reset the NIC. The idea is to reduce 
+ * function after 0.5 secs to reset the NIC. The idea is to reduce
   * the run time of the watch dog routine which is run holding a
   * spin lock.
   */
@@ -4338,10 +4962,11 @@ static void s2io_restart_nic(unsigned long data)
         netif_wake_queue(dev);
         DBG_PRINT(ERR_DBG, "%s: was reset by Tx watchdog timer\n",
                   dev->name);
+
  }
  
-/** 
- *  s2io_tx_watchdog - Watchdog for transmit side. 
+/**
+ *  s2io_tx_watchdog - Watchdog for transmit side.
   *  @dev : Pointer to net device structure
   *  Description:
   *  This function is triggered if the Tx Queue is stopped
@@ -4369,7 +4994,7 @@ static void s2io_tx_watchdog(struct net_device *dev)
   *   @len : length of the packet
   *   @cksum : FCS checksum of the frame.
   *   @ring_no : the ring from which this RxD was extracted.
- *   Description: 
+ *   Description:
   *   This function is called by the Tx interrupt serivce routine to perform
   *   some OS related operations on the SKB before passing it to the upper
   *   layers. It mainly checks if the checksum is OK, if so adds it to the
@@ -4379,35 +5004,68 @@ static void s2io_tx_watchdog(struct net_device *dev)
   *   Return value:
   *   SUCCESS on success and -1 on failure.
   */
-#ifndef CONFIG_2BUFF_MODE
-static int rx_osm_handler(nic_t * sp, u16 len, RxD_t * rxdp, int ring_no)
-#else
-static int rx_osm_handler(nic_t * sp, RxD_t * rxdp, int ring_no,
-                         buffAdd_t * ba)
-#endif
+static int rx_osm_handler(ring_info_t *ring_data, RxD_t * rxdp)
  {
+       nic_t *sp = ring_data->nic;
         struct net_device *dev = (struct net_device *) sp->dev;
-       struct sk_buff *skb =
-           (struct sk_buff *) ((unsigned long) rxdp->Host_Control);
+       struct sk_buff *skb = (struct sk_buff *)
+               ((unsigned long) rxdp->Host_Control);
+       int ring_no = ring_data->ring_no;
         u16 l3_csum, l4_csum;
  #ifdef CONFIG_2BUFF_MODE
-       int buf0_len, buf2_len;
+       int buf0_len = RXD_GET_BUFFER0_SIZE(rxdp->Control_2);
+       int buf2_len = RXD_GET_BUFFER2_SIZE(rxdp->Control_2);
+       int get_block = ring_data->rx_curr_get_info.block_index;
+       int get_off = ring_data->rx_curr_get_info.offset;
+       buffAdd_t *ba = &ring_data->ba[get_block][get_off];
         unsigned char *buff;
+#else
+       u16 len = (u16) ((RXD_GET_BUFFER0_SIZE(rxdp->Control_2)) >> 48);;
+#endif
+       skb->dev = dev;
+       if (rxdp->Control_1 & RXD_T_CODE) {
+               unsigned long long err = rxdp->Control_1 & RXD_T_CODE;
+               DBG_PRINT(ERR_DBG, "%s: Rx error Value: 0x%llx\n",
+                         dev->name, err);
+               dev_kfree_skb(skb);
+               sp->stats.rx_crc_errors++;
+               atomic_dec(&sp->rx_bufs_left[ring_no]);
+               rxdp->Host_Control = 0;
+               return 0;
+       }
+
+       /* Updating statistics */
+       rxdp->Host_Control = 0;
+       sp->rx_pkt_count++;
+       sp->stats.rx_packets++;
+#ifndef CONFIG_2BUFF_MODE
+       sp->stats.rx_bytes += len;
+#else
+       sp->stats.rx_bytes += buf0_len + buf2_len;
  #endif
  
-       l3_csum = RXD_GET_L3_CKSUM(rxdp->Control_1);
-       if ((rxdp->Control_1 & TCP_OR_UDP_FRAME) && (sp->rx_csum)) {
+#ifndef CONFIG_2BUFF_MODE
+       skb_put(skb, len);
+#else
+       buff = skb_push(skb, buf0_len);
+       memcpy(buff, ba->ba_0, buf0_len);
+       skb_put(skb, buf2_len);
+#endif
+
+       if ((rxdp->Control_1 & TCP_OR_UDP_FRAME) &&
+           (sp->rx_csum)) {
+               l3_csum = RXD_GET_L3_CKSUM(rxdp->Control_1);
                 l4_csum = RXD_GET_L4_CKSUM(rxdp->Control_1);
                 if ((l3_csum == L3_CKSUM_OK) && (l4_csum == L4_CKSUM_OK)) {
-                       /* 
+                       /*
                          * NIC verifies if the Checksum of the received
                          * frame is Ok or not and accordingly returns
                          * a flag in the RxD.
                          */
                         skb->ip_summed = CHECKSUM_UNNECESSARY;
                 } else {
-                       /* 
-                        * Packet with erroneous checksum, let the 
+                       /*
+                        * Packet with erroneous checksum, let the
                          * upper layers deal with it.
                          */
                         skb->ip_summed = CHECKSUM_NONE;
@@ -4416,44 +5074,26 @@ static int rx_osm_handler(nic_t * sp, RxD_t * rxdp, int ring_no,
                 skb->ip_summed = CHECKSUM_NONE;
         }
  
-       if (rxdp->Control_1 & RXD_T_CODE) {
-               unsigned long long err = rxdp->Control_1 & RXD_T_CODE;
-               DBG_PRINT(ERR_DBG, "%s: Rx error Value: 0x%llx\n",
-                         dev->name, err);
-       }
-#ifdef CONFIG_2BUFF_MODE
-       buf0_len = RXD_GET_BUFFER0_SIZE(rxdp->Control_2);
-       buf2_len = RXD_GET_BUFFER2_SIZE(rxdp->Control_2);
-#endif
-
-       skb->dev = dev;
-#ifndef CONFIG_2BUFF_MODE
-       skb_put(skb, len);
-       skb->protocol = eth_type_trans(skb, dev);
-#else
-       buff = skb_push(skb, buf0_len);
-       memcpy(buff, ba->ba_0, buf0_len);
-       skb_put(skb, buf2_len);
         skb->protocol = eth_type_trans(skb, dev);
-#endif
-
  #ifdef CONFIG_S2IO_NAPI
-       netif_receive_skb(skb);
+       if (sp->vlgrp && RXD_GET_VLAN_TAG(rxdp->Control_2)) {
+               /* Queueing the vlan frame to the upper layer */
+               vlan_hwaccel_receive_skb(skb, sp->vlgrp,
+                       RXD_GET_VLAN_TAG(rxdp->Control_2));
+       } else {
+               netif_receive_skb(skb);
+       }
  #else
-       netif_rx(skb);
+       if (sp->vlgrp && RXD_GET_VLAN_TAG(rxdp->Control_2)) {
+               /* Queueing the vlan frame to the upper layer */
+               vlan_hwaccel_rx(skb, sp->vlgrp,
+                       RXD_GET_VLAN_TAG(rxdp->Control_2));
+       } else {
+               netif_rx(skb);
+       }
  #endif
-
         dev->last_rx = jiffies;
-       sp->rx_pkt_count++;
-       sp->stats.rx_packets++;
-#ifndef CONFIG_2BUFF_MODE
-       sp->stats.rx_bytes += len;
-#else
-       sp->stats.rx_bytes += buf0_len + buf2_len;
-#endif
-
         atomic_dec(&sp->rx_bufs_left[ring_no]);
-       rxdp->Host_Control = 0;
         return SUCCESS;
  }
  
@@ -4464,13 +5104,13 @@ static int rx_osm_handler(nic_t * sp, RxD_t * rxdp, int ring_no,
   *  @link : inidicates whether link is UP/DOWN.
   *  Description:
   *  This function stops/starts the Tx queue depending on whether the link
- *  status of the NIC is is down or up. This is called by the Alarm 
- *  interrupt handler whenever a link change interrupt comes up. 
+ *  status of the NIC is is down or up. This is called by the Alarm
+ *  interrupt handler whenever a link change interrupt comes up.
   *  Return value:
   *  void.
   */
  
-static void s2io_link(nic_t * sp, int link)
+void s2io_link(nic_t * sp, int link)
  {
         struct net_device *dev = (struct net_device *) sp->dev;
  
@@ -4487,8 +5127,25 @@ static void s2io_link(nic_t * sp, int link)
  }
  
  /**
- *  s2io_init_pci -Initialization of PCI and PCI-X configuration registers . 
- *  @sp : private member of the device structure, which is a pointer to the 
+ *  get_xena_rev_id - to identify revision ID of xena.
+ *  @pdev : PCI Dev structure
+ *  Description:
+ *  Function to identify the Revision ID of xena.
+ *  Return value:
+ *  returns the revision ID of the device.
+ */
+
+int get_xena_rev_id(struct pci_dev *pdev)
+{
+       u8 id = 0;
+       int ret;
+       ret = pci_read_config_byte(pdev, PCI_REVISION_ID, (u8 *) & id);
+       return id;
+}
+
+/**
+ *  s2io_init_pci -Initialization of PCI and PCI-X configuration registers .
+ *  @sp : private member of the device structure, which is a pointer to the
   *  s2io_nic structure.
   *  Description:
   *  This function initializes a few of the PCI and PCI-X configuration registers
@@ -4499,15 +5156,15 @@ static void s2io_link(nic_t * sp, int link)
  
  static void s2io_init_pci(nic_t * sp)
  {
-       u16 pci_cmd = 0;
+       u16 pci_cmd = 0, pcix_cmd = 0;
  
         /* Enable Data Parity Error Recovery in PCI-X command register. */
         pci_read_config_word(sp->pdev, PCIX_COMMAND_REGISTER,
-                            &(sp->pcix_cmd));
+                            &(pcix_cmd));
         pci_write_config_word(sp->pdev, PCIX_COMMAND_REGISTER,
-                             (sp->pcix_cmd | 1));
+                             (pcix_cmd | 1));
         pci_read_config_word(sp->pdev, PCIX_COMMAND_REGISTER,
-                            &(sp->pcix_cmd));
+                            &(pcix_cmd));
  
         /* Set the PErr Response bit in PCI command register. */
         pci_read_config_word(sp->pdev, PCI_COMMAND, &pci_cmd);
@@ -4515,53 +5172,43 @@ static void s2io_init_pci(nic_t * sp)
                               (pci_cmd | PCI_COMMAND_PARITY));
         pci_read_config_word(sp->pdev, PCI_COMMAND, &pci_cmd);
  
-       /* Set MMRB count to 1024 in PCI-X Command register. */
-       sp->pcix_cmd &= 0xFFF3;
-       pci_write_config_word(sp->pdev, PCIX_COMMAND_REGISTER, (sp->pcix_cmd | (0x1 << 2)));    /* MMRBC 1K */
-       pci_read_config_word(sp->pdev, PCIX_COMMAND_REGISTER,
-                            &(sp->pcix_cmd));
-
-       /*  Setting Maximum outstanding splits based on system type. */
-       sp->pcix_cmd &= 0xFF8F;
-
-       sp->pcix_cmd |= XENA_MAX_OUTSTANDING_SPLITS(0x1);       /* 2 splits. */
-       pci_write_config_word(sp->pdev, PCIX_COMMAND_REGISTER,
-                             sp->pcix_cmd);
-       pci_read_config_word(sp->pdev, PCIX_COMMAND_REGISTER,
-                            &(sp->pcix_cmd));
         /* Forcibly disabling relaxed ordering capability of the card. */
-       sp->pcix_cmd &= 0xfffd;
+       pcix_cmd &= 0xfffd;
         pci_write_config_word(sp->pdev, PCIX_COMMAND_REGISTER,
-                             sp->pcix_cmd);
+                             pcix_cmd);
         pci_read_config_word(sp->pdev, PCIX_COMMAND_REGISTER,
-                            &(sp->pcix_cmd));
+                            &(pcix_cmd));
  }
  
  MODULE_AUTHOR("Raghavendra Koushik <raghavendra.koushik@neterion.com>");
  MODULE_LICENSE("GPL");
  module_param(tx_fifo_num, int, 0);
-module_param_array(tx_fifo_len, int, NULL, 0);
  module_param(rx_ring_num, int, 0);
-module_param_array(rx_ring_sz, int, NULL, 0);
-module_param(Stats_refresh_time, int, 0);
+module_param_array(tx_fifo_len, uint, NULL, 0);
+module_param_array(rx_ring_sz, uint, NULL, 0);
+module_param_array(rts_frm_len, uint, NULL, 0);
+module_param(use_continuous_tx_intrs, int, 1);
  module_param(rmac_pause_time, int, 0);
  module_param(mc_pause_threshold_q0q3, int, 0);
  module_param(mc_pause_threshold_q4q7, int, 0);
  module_param(shared_splits, int, 0);
  module_param(tmac_util_period, int, 0);
  module_param(rmac_util_period, int, 0);
+module_param(bimodal, bool, 0);
  #ifndef CONFIG_S2IO_NAPI
  module_param(indicate_max_pkts, int, 0);
  #endif
+module_param(rxsync_frequency, int, 0);
+
  /**
- *  s2io_init_nic - Initialization of the adapter . 
+ *  s2io_init_nic - Initialization of the adapter .
   *  @pdev : structure containing the PCI related information of the device.
   *  @pre: List of PCI devices supported by the driver listed in s2io_tbl.
   *  Description:
   *  The function initializes an adapter identified by the pci_dec structure.
- *  All OS related initialization including memory and device structure and 
- *  initlaization of the device private variable is done. Also the swapper 
- *  control register is initialized to enable read and write into the I/O 
+ *  All OS related initialization including memory and device structure and
+ *  initlaization of the device private variable is done. Also the swapper
+ *  control register is initialized to enable read and write into the I/O
   *  registers of the device.
   *  Return value:
   *  returns 0 on success and negative on failure.
@@ -4572,7 +5219,6 @@ s2io_init_nic(struct pci_dev *pdev, const struct pci_device_id *pre)
  {
         nic_t *sp;
         struct net_device *dev;
-       char *dev_name = "S2IO 10GE NIC";
         int i, j, ret;
         int dma_flag = FALSE;
         u32 mac_up, mac_down;
@@ -4581,10 +5227,11 @@ s2io_init_nic(struct pci_dev *pdev, const struct pci_device_id *pre)
         u16 subid;
         mac_info_t *mac_control;
         struct config_param *config;
+       int mode;
  
-
-       DBG_PRINT(ERR_DBG, "Loading S2IO driver with %s\n",
-               s2io_driver_version);
+#ifdef CONFIG_S2IO_NAPI
+       DBG_PRINT(ERR_DBG, "NAPI support has been enabled\n");
+#endif
  
         if ((ret = pci_enable_device(pdev))) {
                 DBG_PRINT(ERR_DBG,
@@ -4595,7 +5242,6 @@ s2io_init_nic(struct pci_dev *pdev, const struct pci_device_id *pre)
         if (!pci_set_dma_mask(pdev, DMA_64BIT_MASK)) {
                 DBG_PRINT(INIT_DBG, "s2io_init_nic: Using 64bit DMA\n");
                 dma_flag = TRUE;
-
                 if (pci_set_consistent_dma_mask
                     (pdev, DMA_64BIT_MASK)) {
                         DBG_PRINT(ERR_DBG,
@@ -4635,34 +5281,41 @@ s2io_init_nic(struct pci_dev *pdev, const struct pci_device_id *pre)
         memset(sp, 0, sizeof(nic_t));
         sp->dev = dev;
         sp->pdev = pdev;
-       sp->vendor_id = pdev->vendor;
-       sp->device_id = pdev->device;
         sp->high_dma_flag = dma_flag;
-       sp->irq = pdev->irq;
         sp->device_enabled_once = FALSE;
-       strcpy(sp->name, dev_name);
+
+       if ((pdev->device == PCI_DEVICE_ID_HERC_WIN) ||
+               (pdev->device == PCI_DEVICE_ID_HERC_UNI))
+               sp->device_type = XFRAME_II_DEVICE;
+       else
+               sp->device_type = XFRAME_I_DEVICE;
  
         /* Initialize some PCI/PCI-X fields of the NIC. */
         s2io_init_pci(sp);
  
-       /* 
+       /*
          * Setting the device configuration parameters.
-        * Most of these parameters can be specified by the user during 
-        * module insertion as they are module loadable parameters. If 
-        * these parameters are not not specified during load time, they 
+        * Most of these parameters can be specified by the user during
+        * module insertion as they are module loadable parameters. If
+        * these parameters are not not specified during load time, they
          * are initialized with default values.
          */
         mac_control = &sp->mac_control;
         config = &sp->config;
  
         /* Tx side parameters. */
-       tx_fifo_len[0] = DEFAULT_FIFO_LEN;      /* Default value. */
+       if (tx_fifo_len[0] == 0)
+               tx_fifo_len[0] = DEFAULT_FIFO_LEN; /* Default value. */
         config->tx_fifo_num = tx_fifo_num;
         for (i = 0; i < MAX_TX_FIFOS; i++) {
                 config->tx_cfg[i].fifo_len = tx_fifo_len[i];
                 config->tx_cfg[i].fifo_priority = i;
         }
  
+       /* mapping the QoS priority to the configured fifos */
+       for (i = 0; i < MAX_TX_FIFOS; i++)
+               config->fifo_mapping[i] = fifo_map[config->tx_fifo_num][i];
+
         config->tx_intr_type = TXD_INT_TYPE_UTILZ;
         for (i = 0; i < config->tx_fifo_num; i++) {
                 config->tx_cfg[i].f_no_snoop =
@@ -4675,7 +5328,8 @@ s2io_init_nic(struct pci_dev *pdev, const struct pci_device_id *pre)
         config->max_txds = MAX_SKB_FRAGS;
  
         /* Rx side parameters. */
-       rx_ring_sz[0] = SMALL_BLK_CNT;  /* Default value. */
+       if (rx_ring_sz[0] == 0)
+               rx_ring_sz[0] = SMALL_BLK_CNT; /* Default value. */
         config->rx_ring_num = rx_ring_num;
         for (i = 0; i < MAX_RX_RINGS; i++) {
                 config->rx_cfg[i].num_rxd = rx_ring_sz[i] *
@@ -4699,10 +5353,13 @@ s2io_init_nic(struct pci_dev *pdev, const struct pci_device_id *pre)
         for (i = 0; i < config->rx_ring_num; i++)
                 atomic_set(&sp->rx_bufs_left[i], 0);
  
+       /* Initialize the number of ISRs currently running */
+       atomic_set(&sp->isr_cnt, 0);
+
         /*  initialize the shared memory used by the NIC and the host */
         if (init_shared_mem(sp)) {
                 DBG_PRINT(ERR_DBG, "%s: Memory allocation failed\n",
-                         dev->name);
+                         __FUNCTION__);
                 ret = -ENOMEM;
                 goto mem_alloc_failed;
         }
@@ -4743,13 +5400,17 @@ s2io_init_nic(struct pci_dev *pdev, const struct pci_device_id *pre)
         dev->do_ioctl = &s2io_ioctl;
         dev->change_mtu = &s2io_change_mtu;
         SET_ETHTOOL_OPS(dev, &netdev_ethtool_ops);
+       dev->features |= NETIF_F_HW_VLAN_TX | NETIF_F_HW_VLAN_RX;
+       dev->vlan_rx_register = s2io_vlan_rx_register;
+       dev->vlan_rx_kill_vid = (void *)s2io_vlan_rx_kill_vid;
+
         /*
          * will use eth_mac_addr() for  dev->set_mac_address
          * mac address will be set every time dev->open() is called
          */
-#ifdef CONFIG_S2IO_NAPI
+#if defined(CONFIG_S2IO_NAPI)
         dev->poll = s2io_poll;
-       dev->weight = 90;
+       dev->weight = 32;
  #endif
  
         dev->features |= NETIF_F_SG | NETIF_F_IP_CSUM;
@@ -4776,22 +5437,28 @@ s2io_init_nic(struct pci_dev *pdev, const struct pci_device_id *pre)
                 goto set_swap_failed;
         }
  
-       /* Fix for all "FFs" MAC address problems observed on Alpha platforms */
-       fix_mac_address(sp);
-       s2io_reset(sp);
+       /* Verify if the Herc works on the slot its placed into */
+       if (sp->device_type & XFRAME_II_DEVICE) {
+               mode = s2io_verify_pci_mode(sp);
+               if (mode < 0) {
+                       DBG_PRINT(ERR_DBG, "%s: ", __FUNCTION__);
+                       DBG_PRINT(ERR_DBG, " Unsupported PCI bus mode\n");
+                       ret = -EBADSLT;
+                       goto set_swap_failed;
+               }
+       }
  
-       /*
-        * Setting swapper control on the NIC, so the MAC address can be read.
-        */
-       if (s2io_set_swapper(sp)) {
-               DBG_PRINT(ERR_DBG,
-                         "%s: S2IO: swapper settings are wrong\n",
-                         dev->name);
-               ret = -EAGAIN;
-               goto set_swap_failed;
+       /* Not needed for Herc */
+       if (sp->device_type & XFRAME_I_DEVICE) {
+               /*
+                * Fix for all "FFs" MAC address problems observed on
+                * Alpha platforms
+                */
+               fix_mac_address(sp);
+               s2io_reset(sp);
         }
  
-       /*  
+       /*
          * MAC address initialization.
          * For now only one mac address will be read and used.
          */
@@ -4814,37 +5481,28 @@ s2io_init_nic(struct pci_dev *pdev, const struct pci_device_id *pre)
         sp->def_mac_addr[0].mac_addr[5] = (u8) (mac_down >> 16);
         sp->def_mac_addr[0].mac_addr[4] = (u8) (mac_down >> 24);
  
-       DBG_PRINT(INIT_DBG,
-                 "DEFAULT MAC ADDR:0x%02x-%02x-%02x-%02x-%02x-%02x\n",
-                 sp->def_mac_addr[0].mac_addr[0],
-                 sp->def_mac_addr[0].mac_addr[1],
-                 sp->def_mac_addr[0].mac_addr[2],
-                 sp->def_mac_addr[0].mac_addr[3],
-                 sp->def_mac_addr[0].mac_addr[4],
-                 sp->def_mac_addr[0].mac_addr[5]);
-
         /*  Set the factory defined MAC address initially   */
         dev->addr_len = ETH_ALEN;
         memcpy(dev->dev_addr, sp->def_mac_addr, ETH_ALEN);
  
         /*
-        * Initialize the tasklet status and link state flags 
-        * and the card statte parameter
+        * Initialize the tasklet status and link state flags
+        * and the card state parameter
          */
         atomic_set(&(sp->card_state), 0);
         sp->tasklet_status = 0;
         sp->link_state = 0;
  
-
         /* Initialize spinlocks */
         spin_lock_init(&sp->tx_lock);
  #ifndef CONFIG_S2IO_NAPI
         spin_lock_init(&sp->put_lock);
  #endif
+       spin_lock_init(&sp->rx_lock);
  
-       /* 
-        * SXE-002: Configure link and activity LED to init state 
-        * on driver load. 
+       /*
+        * SXE-002: Configure link and activity LED to init state
+        * on driver load.
          */
         subid = sp->pdev->subsystem_device;
         if ((subid & 0xFF) >= 0x07) {
@@ -4864,13 +5522,61 @@ s2io_init_nic(struct pci_dev *pdev, const struct pci_device_id *pre)
                 goto register_failed;
         }
  
-       /* 
-        * Make Link state as off at this point, when the Link change 
-        * interrupt comes the state will be automatically changed to 
+       if (sp->device_type & XFRAME_II_DEVICE) {
+               DBG_PRINT(ERR_DBG, "%s: Neterion Xframe II 10GbE adapter ",
+                         dev->name);
+               DBG_PRINT(ERR_DBG, "(rev %d), Driver %s\n",
+                               get_xena_rev_id(sp->pdev),
+                               s2io_driver_version);
+               DBG_PRINT(ERR_DBG, "MAC ADDR: %02x:%02x:%02x:%02x:%02x:%02x\n",
+                         sp->def_mac_addr[0].mac_addr[0],
+                         sp->def_mac_addr[0].mac_addr[1],
+                         sp->def_mac_addr[0].mac_addr[2],
+                         sp->def_mac_addr[0].mac_addr[3],
+                         sp->def_mac_addr[0].mac_addr[4],
+                         sp->def_mac_addr[0].mac_addr[5]);
+               mode = s2io_print_pci_mode(sp);
+               if (mode < 0) {
+                       DBG_PRINT(ERR_DBG, " Unsupported PCI bus mode ");
+                       ret = -EBADSLT;
+                       goto set_swap_failed;
+               }
+       } else {
+               DBG_PRINT(ERR_DBG, "%s: Neterion Xframe I 10GbE adapter ",
+                         dev->name);
+               DBG_PRINT(ERR_DBG, "(rev %d), Driver %s\n",
+                                       get_xena_rev_id(sp->pdev),
+                                       s2io_driver_version);
+               DBG_PRINT(ERR_DBG, "MAC ADDR: %02x:%02x:%02x:%02x:%02x:%02x\n",
+                         sp->def_mac_addr[0].mac_addr[0],
+                         sp->def_mac_addr[0].mac_addr[1],
+                         sp->def_mac_addr[0].mac_addr[2],
+                         sp->def_mac_addr[0].mac_addr[3],
+                         sp->def_mac_addr[0].mac_addr[4],
+                         sp->def_mac_addr[0].mac_addr[5]);
+       }
+
+       /* Initialize device name */
+       strcpy(sp->name, dev->name);
+       if (sp->device_type & XFRAME_II_DEVICE)
+               strcat(sp->name, ": Neterion Xframe II 10GbE adapter");
+       else
+               strcat(sp->name, ": Neterion Xframe I 10GbE adapter");
+
+       /* Initialize bimodal Interrupts */
+       sp->config.bimodal = bimodal;
+       if (!(sp->device_type & XFRAME_II_DEVICE) && bimodal) {
+               sp->config.bimodal = 0;
+               DBG_PRINT(ERR_DBG,"%s:Bimodal intr not supported by Xframe I\n",
+                       dev->name);
+       }
+
+       /*
+        * Make Link state as off at this point, when the Link change
+        * interrupt comes the state will be automatically changed to
          * the right state.
          */
         netif_carrier_off(dev);
-       sp->last_link_state = LINK_DOWN;
  
         return 0;
  
@@ -4891,11 +5597,11 @@ s2io_init_nic(struct pci_dev *pdev, const struct pci_device_id *pre)
  }
  
  /**
- * s2io_rem_nic - Free the PCI device 
+ * s2io_rem_nic - Free the PCI device
   * @pdev: structure containing the PCI related information of the device.
- * Description: This function is called by the Pci subsystem to release a 
+ * Description: This function is called by the Pci subsystem to release a
   * PCI device and free up all resource held up by the device. This could
- * be in response to a Hot plug event or when the driver is to be removed 
+ * be in response to a Hot plug event or when the driver is to be removed
   * from memory.
   */
  
@@ -4919,7 +5625,6 @@ static void __devexit s2io_rem_nic(struct pci_dev *pdev)
         pci_disable_device(pdev);
         pci_release_regions(pdev);
         pci_set_drvdata(pdev, NULL);
-
         free_netdev(dev);
  }
  
@@ -4935,11 +5640,11 @@ int __init s2io_starter(void)
  }
  
  /**
- * s2io_closer - Cleanup routine for the driver 
+ * s2io_closer - Cleanup routine for the driver
   * Description: This function is the cleanup routine for the driver. It unregist * ers the driver.
   */
  
-static void s2io_closer(void)
+void s2io_closer(void)
  {
         pci_unregister_driver(&s2io_driver);
         DBG_PRINT(INIT_DBG, "cleanup done\n");
diff --git a/drivers/net/s2io.h b/drivers/net/s2io.h

index 1711c8c3dc99f02c50e627e08144f5d6d5ea685e..bc64d967f08094abc2a2145696d8b43be70aec85 100644 (file)
--- a/drivers/net/s2io.h
+++ b/drivers/net/s2io.h
@@ -31,6 +31,9 @@
  #define SUCCESS 0
  #define FAILURE -1
  
+/* Maximum time to flicker LED when asked to identify NIC using ethtool */
+#define MAX_FLICKER_TIME       60000 /* 60 Secs */
+
  /* Maximum outstanding splits to be configured into xena. */
  typedef enum xena_max_outstanding_splits {
         XENA_ONE_SPLIT_TRANSACTION = 0,
@@ -45,10 +48,10 @@ typedef enum xena_max_outstanding_splits {
  #define XENA_MAX_OUTSTANDING_SPLITS(n) (n << 4)
  
  /*  OS concerned variables and constants */
-#define WATCH_DOG_TIMEOUT      5*HZ
-#define EFILL                          0x1234
-#define ALIGN_SIZE                     127
-#define        PCIX_COMMAND_REGISTER   0x62
+#define WATCH_DOG_TIMEOUT              15*HZ
+#define EFILL                          0x1234
+#define ALIGN_SIZE                     127
+#define        PCIX_COMMAND_REGISTER           0x62
  
  /*
   * Debug related variables.
@@ -61,7 +64,7 @@ typedef enum xena_max_outstanding_splits {
  #define        INTR_DBG        4
  
  /* Global variable that defines the present debug level of the driver. */
-static int debug_level = ERR_DBG;      /* Default level. */
+int debug_level = ERR_DBG;     /* Default level. */
  
  /* DEBUG message print. */
  #define DBG_PRINT(dbg_level, args...)  if(!(debug_level<dbg_level)) printk(args)
@@ -71,6 +74,12 @@ static int debug_level = ERR_DBG;    /* Default level. */
  #define L4_CKSUM_OK 0xFFFF
  #define S2IO_JUMBO_SIZE 9600
  
+/* Driver statistics maintained by driver */
+typedef struct {
+       unsigned long long single_ecc_errs;
+       unsigned long long double_ecc_errs;
+} swStat_t;
+
  /* The statistics block of Xena */
  typedef struct stat_block {
  /* Tx MAC statistics counters. */
@@ -186,12 +195,90 @@ typedef struct stat_block {
         u32 rxd_rd_cnt;
         u32 rxf_wr_cnt;
         u32 txf_rd_cnt;
+
+/* Tx MAC statistics overflow counters. */
+       u32 tmac_data_octets_oflow;
+       u32 tmac_frms_oflow;
+       u32 tmac_bcst_frms_oflow;
+       u32 tmac_mcst_frms_oflow;
+       u32 tmac_ucst_frms_oflow;
+       u32 tmac_ttl_octets_oflow;
+       u32 tmac_any_err_frms_oflow;
+       u32 tmac_nucst_frms_oflow;
+       u64 tmac_vlan_frms;
+       u32 tmac_drop_ip_oflow;
+       u32 tmac_vld_ip_oflow;
+       u32 tmac_rst_tcp_oflow;
+       u32 tmac_icmp_oflow;
+       u32 tpa_unknown_protocol;
+       u32 tmac_udp_oflow;
+       u32 reserved_10;
+       u32 tpa_parse_failure;
+
+/* Rx MAC Statistics overflow counters. */
+       u32 rmac_data_octets_oflow;
+       u32 rmac_vld_frms_oflow;
+       u32 rmac_vld_bcst_frms_oflow;
+       u32 rmac_vld_mcst_frms_oflow;
+       u32 rmac_accepted_ucst_frms_oflow;
+       u32 rmac_ttl_octets_oflow;
+       u32 rmac_discarded_frms_oflow;
+       u32 rmac_accepted_nucst_frms_oflow;
+       u32 rmac_usized_frms_oflow;
+       u32 rmac_drop_events_oflow;
+       u32 rmac_frag_frms_oflow;
+       u32 rmac_osized_frms_oflow;
+       u32 rmac_ip_oflow;
+       u32 rmac_jabber_frms_oflow;
+       u32 rmac_icmp_oflow;
+       u32 rmac_drop_ip_oflow;
+       u32 rmac_err_drp_udp_oflow;
+       u32 rmac_udp_oflow;
+       u32 reserved_11;
+       u32 rmac_pause_cnt_oflow;
+       u64 rmac_ttl_1519_4095_frms;
+       u64 rmac_ttl_4096_8191_frms;
+       u64 rmac_ttl_8192_max_frms;
+       u64 rmac_ttl_gt_max_frms;
+       u64 rmac_osized_alt_frms;
+       u64 rmac_jabber_alt_frms;
+       u64 rmac_gt_max_alt_frms;
+       u64 rmac_vlan_frms;
+       u32 rmac_len_discard;
+       u32 rmac_fcs_discard;
+       u32 rmac_pf_discard;
+       u32 rmac_da_discard;
+       u32 rmac_red_discard;
+       u32 rmac_rts_discard;
+       u32 reserved_12;
+       u32 rmac_ingm_full_discard;
+       u32 reserved_13;
+       u32 rmac_accepted_ip_oflow;
+       u32 reserved_14;
+       u32 link_fault_cnt;
+       swStat_t sw_stat;
  } StatInfo_t;
  
-/* Structures representing different init time configuration
+/*
+ * Structures representing different init time configuration
   * parameters of the NIC.
   */
  
+#define MAX_TX_FIFOS 8
+#define MAX_RX_RINGS 8
+
+/* FIFO mappings for all possible number of fifos configured */
+int fifo_map[][MAX_TX_FIFOS] = {
+       {0, 0, 0, 0, 0, 0, 0, 0},
+       {0, 0, 0, 0, 1, 1, 1, 1},
+       {0, 0, 0, 1, 1, 1, 2, 2},
+       {0, 0, 1, 1, 2, 2, 3, 3},
+       {0, 0, 1, 1, 2, 2, 3, 4},
+       {0, 0, 1, 1, 2, 3, 4, 5},
+       {0, 0, 1, 2, 3, 4, 5, 6},
+       {0, 1, 2, 3, 4, 5, 6, 7},
+};
+
  /* Maintains Per FIFO related information. */
  typedef struct tx_fifo_config {
  #define        MAX_AVAILABLE_TXDS      8192
@@ -237,14 +324,14 @@ typedef struct rx_ring_config {
  #define NO_SNOOP_RXD_BUFFER         0x02
  } rx_ring_config_t;
  
-/* This structure provides contains values of the tunable parameters 
- * of the H/W 
+/* This structure provides contains values of the tunable parameters
+ * of the H/W
   */
  struct config_param {
  /* Tx Side */
         u32 tx_fifo_num;        /*Number of Tx FIFOs */
-#define MAX_TX_FIFOS 8
  
+       u8 fifo_mapping[MAX_TX_FIFOS];
         tx_fifo_config_t tx_cfg[MAX_TX_FIFOS];  /*Per-Tx FIFO config */
         u32 max_txds;           /*Max no. of Tx buffer descriptor per TxDL */
         u64 tx_intr_type;
@@ -252,10 +339,10 @@ struct config_param {
  
  /* Rx Side */
         u32 rx_ring_num;        /*Number of receive rings */
-#define MAX_RX_RINGS 8
  #define MAX_RX_BLOCKS_PER_RING  150
  
         rx_ring_config_t rx_cfg[MAX_RX_RINGS];  /*Per-Rx Ring config */
+       u8 bimodal;             /*Flag for setting bimodal interrupts*/
  
  #define HEADER_ETHERNET_II_802_3_SIZE 14
  #define HEADER_802_2_SIZE              3
@@ -269,6 +356,7 @@ struct config_param {
  #define MAX_PYLD_JUMBO              9600
  #define MAX_MTU_JUMBO               (MAX_PYLD_JUMBO+18)
  #define MAX_MTU_JUMBO_VLAN          (MAX_PYLD_JUMBO+22)
+       u16 bus_speed;
  };
  
  /* Structure representing MAC Addrs */
@@ -277,7 +365,7 @@ typedef struct mac_addr {
  } macaddr_t;
  
  /* Structure that represent every FIFO element in the BAR1
- * Address location. 
+ * Address location.
   */
  typedef struct _TxFIFO_element {
         u64 TxDL_Pointer;
@@ -339,6 +427,7 @@ typedef struct _RxD_t {
  #define RXD_FRAME_PROTO         vBIT(0xFFFF,24,8)
  #define RXD_FRAME_PROTO_IPV4    BIT(27)
  #define RXD_FRAME_PROTO_IPV6    BIT(28)
+#define RXD_FRAME_IP_FRAG      BIT(29)
  #define RXD_FRAME_PROTO_TCP     BIT(30)
  #define RXD_FRAME_PROTO_UDP     BIT(31)
  #define TCP_OR_UDP_FRAME        (RXD_FRAME_PROTO_TCP | RXD_FRAME_PROTO_UDP)
@@ -346,11 +435,15 @@ typedef struct _RxD_t {
  #define RXD_GET_L4_CKSUM(val)   ((u16)(val) & 0xFFFF)
  
         u64 Control_2;
+#define        THE_RXD_MARK            0x3
+#define        SET_RXD_MARKER          vBIT(THE_RXD_MARK, 0, 2)
+#define        GET_RXD_MARKER(ctrl)    ((ctrl & SET_RXD_MARKER) >> 62)
+
  #ifndef CONFIG_2BUFF_MODE
-#define MASK_BUFFER0_SIZE       vBIT(0xFFFF,0,16)
-#define SET_BUFFER0_SIZE(val)   vBIT(val,0,16)
+#define MASK_BUFFER0_SIZE       vBIT(0x3FFF,2,14)
+#define SET_BUFFER0_SIZE(val)   vBIT(val,2,14)
  #else
-#define MASK_BUFFER0_SIZE       vBIT(0xFF,0,16)
+#define MASK_BUFFER0_SIZE       vBIT(0xFF,2,14)
  #define MASK_BUFFER1_SIZE       vBIT(0xFFFF,16,16)
  #define MASK_BUFFER2_SIZE       vBIT(0xFFFF,32,16)
  #define SET_BUFFER0_SIZE(val)   vBIT(val,8,8)
@@ -363,7 +456,7 @@ typedef struct _RxD_t {
  #define SET_NUM_TAG(val)       vBIT(val,16,32)
  
  #ifndef CONFIG_2BUFF_MODE
-#define RXD_GET_BUFFER0_SIZE(Control_2) (u64)((Control_2 & vBIT(0xFFFF,0,16)))
+#define RXD_GET_BUFFER0_SIZE(Control_2) (u64)((Control_2 & vBIT(0x3FFF,2,14)))
  #else
  #define RXD_GET_BUFFER0_SIZE(Control_2) (u8)((Control_2 & MASK_BUFFER0_SIZE) \
                                                         >> 48)
@@ -382,7 +475,7 @@ typedef struct _RxD_t {
  #endif
  } RxD_t;
  
-/* Structure that represents the Rx descriptor block which contains 
+/* Structure that represents the Rx descriptor block which contains
   * 128 Rx descriptors.
   */
  #ifndef CONFIG_2BUFF_MODE
@@ -392,11 +485,11 @@ typedef struct _RxD_block {
  
         u64 reserved_0;
  #define END_OF_BLOCK    0xFEFFFFFFFFFFFFFFULL
-       u64 reserved_1;         /* 0xFEFFFFFFFFFFFFFF to mark last 
+       u64 reserved_1;         /* 0xFEFFFFFFFFFFFFFF to mark last
                                  * Rxd in this blk */
         u64 reserved_2_pNext_RxD_block; /* Logical ptr to next */
         u64 pNext_RxD_Blk_physical;     /* Buff0_ptr.In a 32 bit arch
-                                        * the upper 32 bits should 
+                                        * the upper 32 bits should
                                          * be 0 */
  } RxD_block_t;
  #else
@@ -405,13 +498,13 @@ typedef struct _RxD_block {
         RxD_t rxd[MAX_RXDS_PER_BLOCK];
  
  #define END_OF_BLOCK    0xFEFFFFFFFFFFFFFFULL
-       u64 reserved_1;         /* 0xFEFFFFFFFFFFFFFF to mark last Rxd 
+       u64 reserved_1;         /* 0xFEFFFFFFFFFFFFFF to mark last Rxd
                                  * in this blk */
         u64 pNext_RxD_Blk_physical;     /* Phy ponter to next blk. */
  } RxD_block_t;
  #define SIZE_OF_BLOCK  4096
  
-/* Structure to hold virtual addresses of Buf0 and Buf1 in 
+/* Structure to hold virtual addresses of Buf0 and Buf1 in
   * 2buf mode. */
  typedef struct bufAdd {
         void *ba_0_org;
@@ -423,8 +516,8 @@ typedef struct bufAdd {
  
  /* Structure which stores all the MAC control parameters */
  
-/* This structure stores the offset of the RxD in the ring 
- * from which the Rx Interrupt processor can start picking 
+/* This structure stores the offset of the RxD in the ring
+ * from which the Rx Interrupt processor can start picking
   * up the RxDs for processing.
   */
  typedef struct _rx_curr_get_info_t {
@@ -436,7 +529,7 @@ typedef struct _rx_curr_get_info_t {
  typedef rx_curr_get_info_t rx_curr_put_info_t;
  
  /* This structure stores the offset of the TxDl in the FIFO
- * from which the Tx Interrupt processor can start picking 
+ * from which the Tx Interrupt processor can start picking
   * up the TxDLs for send complete interrupt processing.
   */
  typedef struct {
@@ -446,32 +539,96 @@ typedef struct {
  
  typedef tx_curr_get_info_t tx_curr_put_info_t;
  
-/* Infomation related to the Tx and Rx FIFOs and Rings of Xena
- * is maintained in this structure.
- */
-typedef struct mac_info {
-/* rx side stuff */
-       /* Put pointer info which indictes which RxD has to be replenished 
+/* Structure that holds the Phy and virt addresses of the Blocks */
+typedef struct rx_block_info {
+       RxD_t *block_virt_addr;
+       dma_addr_t block_dma_addr;
+} rx_block_info_t;
+
+/* pre declaration of the nic structure */
+typedef struct s2io_nic nic_t;
+
+/* Ring specific structure */
+typedef struct ring_info {
+       /* The ring number */
+       int ring_no;
+
+       /*
+        *  Place holders for the virtual and physical addresses of
+        *  all the Rx Blocks
+        */
+       rx_block_info_t rx_blocks[MAX_RX_BLOCKS_PER_RING];
+       int block_count;
+       int pkt_cnt;
+
+       /*
+        * Put pointer info which indictes which RxD has to be replenished
          * with a new buffer.
          */
-       rx_curr_put_info_t rx_curr_put_info[MAX_RX_RINGS];
+       rx_curr_put_info_t rx_curr_put_info;
  
-       /* Get pointer info which indictes which is the last RxD that was 
+       /*
+        * Get pointer info which indictes which is the last RxD that was
          * processed by the driver.
          */
-       rx_curr_get_info_t rx_curr_get_info[MAX_RX_RINGS];
+       rx_curr_get_info_t rx_curr_get_info;
  
-       u16 rmac_pause_time;
-       u16 mc_pause_threshold_q0q3;
-       u16 mc_pause_threshold_q4q7;
+#ifndef CONFIG_S2IO_NAPI
+       /* Index to the absolute position of the put pointer of Rx ring */
+       int put_pos;
+#endif
+
+#ifdef CONFIG_2BUFF_MODE
+       /* Buffer Address store. */
+       buffAdd_t **ba;
+#endif
+       nic_t *nic;
+} ring_info_t;
  
+/* Fifo specific structure */
+typedef struct fifo_info {
+       /* FIFO number */
+       int fifo_no;
+
+       /* Maximum TxDs per TxDL */
+       int max_txds;
+
+       /* Place holder of all the TX List's Phy and Virt addresses. */
+       list_info_hold_t *list_info;
+
+       /*
+        * Current offset within the tx FIFO where driver would write
+        * new Tx frame
+        */
+       tx_curr_put_info_t tx_curr_put_info;
+
+       /*
+        * Current offset within tx FIFO from where the driver would start freeing
+        * the buffers
+        */
+       tx_curr_get_info_t tx_curr_get_info;
+
+       nic_t *nic;
+}fifo_info_t;
+
+/* Infomation related to the Tx and Rx FIFOs and Rings of Xena
+ * is maintained in this structure.
+ */
+typedef struct mac_info {
  /* tx side stuff */
         /* logical pointer of start of each Tx FIFO */
         TxFIFO_element_t __iomem *tx_FIFO_start[MAX_TX_FIFOS];
  
-/* Current offset within tx_FIFO_start, where driver would write new Tx frame*/
-       tx_curr_put_info_t tx_curr_put_info[MAX_TX_FIFOS];
-       tx_curr_get_info_t tx_curr_get_info[MAX_TX_FIFOS];
+       /* Fifo specific structure */
+       fifo_info_t fifos[MAX_TX_FIFOS];
+
+/* rx side stuff */
+       /* Ring specific structure */
+       ring_info_t rings[MAX_RX_RINGS];
+
+       u16 rmac_pause_time;
+       u16 mc_pause_threshold_q0q3;
+       u16 mc_pause_threshold_q4q7;
  
         void *stats_mem;        /* orignal pointer to allocated mem */
         dma_addr_t stats_mem_phy;       /* Physical address of the stat block */
@@ -485,12 +642,6 @@ typedef struct {
         int usage_cnt;
  } usr_addr_t;
  
-/* Structure that holds the Phy and virt addresses of the Blocks */
-typedef struct rx_block_info {
-       RxD_t *block_virt_addr;
-       dma_addr_t block_dma_addr;
-} rx_block_info_t;
-
  /* Default Tunable parameters of the NIC. */
  #define DEFAULT_FIFO_LEN 4096
  #define SMALL_RXD_CNT  30 * (MAX_RXDS_PER_BLOCK+1)
@@ -499,7 +650,20 @@ typedef struct rx_block_info {
  #define LARGE_BLK_CNT  100
  
  /* Structure representing one instance of the NIC */
-typedef struct s2io_nic {
+struct s2io_nic {
+#ifdef CONFIG_S2IO_NAPI
+       /*
+        * Count of packets to be processed in a given iteration, it will be indicated
+        * by the quota field of the device structure when NAPI is enabled.
+        */
+       int pkts_to_process;
+#endif
+       struct net_device *dev;
+       mac_info_t mac_control;
+       struct config_param config;
+       struct pci_dev *pdev;
+       void __iomem *bar0;
+       void __iomem *bar1;
  #define MAX_MAC_SUPPORTED   16
  #define MAX_SUPPORTED_MULTICASTS MAX_MAC_SUPPORTED
  
@@ -507,33 +671,20 @@ typedef struct s2io_nic {
         macaddr_t pre_mac_addr[MAX_MAC_SUPPORTED];
  
         struct net_device_stats stats;
-       void __iomem *bar0;
-       void __iomem *bar1;
-       struct config_param config;
-       mac_info_t mac_control;
         int high_dma_flag;
         int device_close_flag;
         int device_enabled_once;
  
-       char name[32];
+       char name[50];
         struct tasklet_struct task;
         volatile unsigned long tasklet_status;
-       struct timer_list timer;
-       struct net_device *dev;
-       struct pci_dev *pdev;
  
-       u16 vendor_id;
-       u16 device_id;
-       u16 ccmd;
-       u32 cbar0_1;
-       u32 cbar0_2;
-       u32 cbar1_1;
-       u32 cbar1_2;
-       u32 cirq;
-       u8 cache_line;
-       u32 rom_expansion;
-       u16 pcix_cmd;
-       u32 irq;
+       /* Timer that handles I/O errors/exceptions */
+       struct timer_list alarm_timer;
+
+       /* Space to back up the PCI config space */
+       u32 config_space[256 / sizeof(u32)];
+
         atomic_t rx_bufs_left[MAX_RX_RINGS];
  
         spinlock_t tx_lock;
@@ -558,27 +709,11 @@ typedef struct s2io_nic {
         u16 tx_err_count;
         u16 rx_err_count;
  
-#ifndef CONFIG_S2IO_NAPI
-       /* Index to the absolute position of the put pointer of Rx ring. */
-       int put_pos[MAX_RX_RINGS];
-#endif
-
-       /*
-        *  Place holders for the virtual and physical addresses of 
-        *  all the Rx Blocks
-        */
-       rx_block_info_t rx_blocks[MAX_RX_RINGS][MAX_RX_BLOCKS_PER_RING];
-       int block_count[MAX_RX_RINGS];
-       int pkt_cnt[MAX_RX_RINGS];
-
-       /* Place holder of all the TX List's Phy and Virt addresses. */
-       list_info_hold_t *list_info[MAX_TX_FIFOS];
-
         /*  Id timer, used to blink NIC to physically identify NIC. */
         struct timer_list id_timer;
  
         /*  Restart timer, used to restart NIC if the device is stuck and
-        *  a schedule task that will set the correct Link state once the 
+        *  a schedule task that will set the correct Link state once the
          *  NIC's PHY has stabilized after a state change.
          */
  #ifdef INIT_TQUEUE
@@ -589,12 +724,12 @@ typedef struct s2io_nic {
         struct work_struct set_link_task;
  #endif
  
-       /* Flag that can be used to turn on or turn off the Rx checksum 
+       /* Flag that can be used to turn on or turn off the Rx checksum
          * offload feature.
          */
         int rx_csum;
  
-       /*  after blink, the adapter must be restored with original 
+       /*  after blink, the adapter must be restored with original
          *  values.
          */
         u64 adapt_ctrl_org;
@@ -604,16 +739,19 @@ typedef struct s2io_nic {
  #define        LINK_DOWN       1
  #define        LINK_UP         2
  
-#ifdef CONFIG_2BUFF_MODE
-       /* Buffer Address store. */
-       buffAdd_t **ba[MAX_RX_RINGS];
-#endif
         int task_flag;
  #define CARD_DOWN 1
  #define CARD_UP 2
         atomic_t card_state;
         volatile unsigned long link_state;
-} nic_t;
+       struct vlan_group *vlgrp;
+#define XFRAME_I_DEVICE                1
+#define XFRAME_II_DEVICE       2
+       u8 device_type;
+
+       spinlock_t      rx_lock;
+       atomic_t        isr_cnt;
+};
  
  #define RESET_ERROR 1;
  #define CMD_ERROR   2;
@@ -622,7 +760,8 @@ typedef struct s2io_nic {
  #ifndef readq
  static inline u64 readq(void __iomem *addr)
  {
-       u64 ret = readl(addr + 4);
+       u64 ret = 0;
+       ret = readl(addr + 4);
         ret <<= 32;
         ret |= readl(addr);
  
@@ -637,10 +776,10 @@ static inline void writeq(u64 val, void __iomem *addr)
         writel((u32) (val >> 32), (addr + 4));
  }
  
-/* In 32 bit modes, some registers have to be written in a 
+/* In 32 bit modes, some registers have to be written in a
   * particular order to expect correct hardware operation. The
- * macro SPECIAL_REG_WRITE is used to perform such ordered 
- * writes. Defines UF (Upper First) and LF (Lower First) will 
+ * macro SPECIAL_REG_WRITE is used to perform such ordered
+ * writes. Defines UF (Upper First) and LF (Lower First) will
   * be used to specify the required write order.
   */
  #define UF     1
@@ -716,6 +855,7 @@ static inline void SPECIAL_REG_WRITE(u64 val, void __iomem *addr, int order)
  #define        PCC_FB_ECC_ERR     vBIT(0xff, 16, 8)    /* Interrupt to indicate
                                                    PCC_FB_ECC Error. */
  
+#define RXD_GET_VLAN_TAG(Control_2) (u16)(Control_2 & MASK_VLAN_TAG)
  /*
   * Prototype declaration.
   */
@@ -725,36 +865,30 @@ static void __devexit s2io_rem_nic(struct pci_dev *pdev);
  static int init_shared_mem(struct s2io_nic *sp);
  static void free_shared_mem(struct s2io_nic *sp);
  static int init_nic(struct s2io_nic *nic);
-#ifndef CONFIG_S2IO_NAPI
-static void rx_intr_handler(struct s2io_nic *sp);
-#endif
-static void tx_intr_handler(struct s2io_nic *sp);
+static void rx_intr_handler(ring_info_t *ring_data);
+static void tx_intr_handler(fifo_info_t *fifo_data);
  static void alarm_intr_handler(struct s2io_nic *sp);
  
  static int s2io_starter(void);
-static void s2io_closer(void);
+void s2io_closer(void);
  static void s2io_tx_watchdog(struct net_device *dev);
  static void s2io_tasklet(unsigned long dev_addr);
  static void s2io_set_multicast(struct net_device *dev);
-#ifndef CONFIG_2BUFF_MODE
-static int rx_osm_handler(nic_t * sp, u16 len, RxD_t * rxdp, int ring_no);
-#else
-static int rx_osm_handler(nic_t * sp, RxD_t * rxdp, int ring_no,
-                         buffAdd_t * ba);
-#endif
-static void s2io_link(nic_t * sp, int link);
-static void s2io_reset(nic_t * sp);
-#ifdef CONFIG_S2IO_NAPI
+static int rx_osm_handler(ring_info_t *ring_data, RxD_t * rxdp);
+void s2io_link(nic_t * sp, int link);
+void s2io_reset(nic_t * sp);
+#if defined(CONFIG_S2IO_NAPI)
  static int s2io_poll(struct net_device *dev, int *budget);
  #endif
  static void s2io_init_pci(nic_t * sp);
-static int s2io_set_mac_addr(struct net_device *dev, u8 * addr);
+int s2io_set_mac_addr(struct net_device *dev, u8 * addr);
+static void s2io_alarm_handle(unsigned long data);
  static irqreturn_t s2io_isr(int irq, void *dev_id, struct pt_regs *regs);
-static int verify_xena_quiescence(u64 val64, int flag);
+static int verify_xena_quiescence(nic_t *sp, u64 val64, int flag);
  static struct ethtool_ops netdev_ethtool_ops;
  static void s2io_set_link(unsigned long data);
-static int s2io_set_swapper(nic_t * sp);
-static void s2io_card_down(nic_t * nic);
-static int s2io_card_up(nic_t * nic);
-
+int s2io_set_swapper(nic_t * sp);
+static void s2io_card_down(nic_t *nic);
+static int s2io_card_up(nic_t *nic);
+int get_xena_rev_id(struct pci_dev *pdev);
  #endif                         /* _S2IO_H */
diff --git a/drivers/net/shaper.c b/drivers/net/shaper.c

index 3ad0b6751f6fb3c432ea883d8f753b7831a5c8e4..221354eea21f9a9257771e68074821ea7ea64c2f 100644 (file)
--- a/drivers/net/shaper.c
+++ b/drivers/net/shaper.c
@@ -156,52 +156,6 @@ static int shaper_start_xmit(struct sk_buff *skb, struct net_device *dev)
          
         SHAPERCB(skb)->shapelen= shaper_clocks(shaper,skb);
         
-#ifdef SHAPER_COMPLEX /* and broken.. */
-
-       while(ptr && ptr!=(struct sk_buff *)&shaper->sendq)
-       {
-               if(ptr->pri<skb->pri 
-                       && jiffies - SHAPERCB(ptr)->shapeclock < SHAPER_MAXSLIP)
-               {
-                       struct sk_buff *tmp=ptr->prev;
-
-                       /*
-                        *      It goes before us therefore we slip the length
-                        *      of the new frame.
-                        */
-
-                       SHAPERCB(ptr)->shapeclock+=SHAPERCB(skb)->shapelen;
-                       SHAPERCB(ptr)->shapelatency+=SHAPERCB(skb)->shapelen;
-
-                       /*
-                        *      The packet may have slipped so far back it
-                        *      fell off.
-                        */
-                       if(SHAPERCB(ptr)->shapelatency > SHAPER_LATENCY)
-                       {
-                               skb_unlink(ptr);
-                               dev_kfree_skb(ptr);
-                       }
-                       ptr=tmp;
-               }
-               else
-                       break;
-       }
-       if(ptr==NULL || ptr==(struct sk_buff *)&shaper->sendq)
-               skb_queue_head(&shaper->sendq,skb);
-       else
-       {
-               struct sk_buff *tmp;
-               /*
-                *      Set the packet clock out time according to the
-                *      frames ahead. Im sure a bit of thought could drop
-                *      this loop.
-                */
-               for(tmp=skb_peek(&shaper->sendq); tmp!=NULL && tmp!=ptr; tmp=tmp->next)
-                       SHAPERCB(skb)->shapeclock+=tmp->shapelen;
-               skb_append(ptr,skb);
-       }
-#else
         {
                 struct sk_buff *tmp;
                 /*
@@ -220,7 +174,7 @@ static int shaper_start_xmit(struct sk_buff *skb, struct net_device *dev)
                 } else
                         skb_queue_tail(&shaper->sendq, skb);
         }
-#endif         
+
         if(sh_debug)
                 printk("Frame queued.\n");
         if(skb_queue_len(&shaper->sendq)>SHAPER_QLEN)
@@ -302,7 +256,7 @@ static void shaper_kick(struct shaper *shaper)
                          *      Pull the frame and get interrupts back on.
                          */
                          
-                       skb_unlink(skb);
+                       skb_unlink(skb, &shaper->sendq);
                         if (shaper->recovery < 
                             SHAPERCB(skb)->shapeclock + SHAPERCB(skb)->shapelen)
                                 shaper->recovery = SHAPERCB(skb)->shapeclock + SHAPERCB(skb)->shapelen;
diff --git a/drivers/net/sk98lin/skge.c b/drivers/net/sk98lin/skge.c

index 49bd8c7c3f7dba789f87799236d0b7d6d355ab21..6ee4771addf1e22c5ef1e293889926c83c422f9b 100644 (file)
--- a/drivers/net/sk98lin/skge.c
+++ b/drivers/net/sk98lin/skge.c
@@ -5206,6 +5206,9 @@ static int skge_resume(struct pci_dev *pdev)
  
         return 0;
  }
+#else
+#define skge_suspend NULL
+#define skge_resume NULL
  #endif
  
  static struct pci_device_id skge_pci_tbl[] = {
diff --git a/drivers/net/sk98lin/skgeinit.c b/drivers/net/sk98lin/skgeinit.c

index df4483429a779770eda09827d919f48b42d0e65b..6cb49dd02251273467e542f2b61c82791bcf6325 100644 (file)
--- a/drivers/net/sk98lin/skgeinit.c
+++ b/drivers/net/sk98lin/skgeinit.c
@@ -2016,7 +2016,7 @@ SK_IOC    IoC)            /* IO context */
          * we set the PHY to coma mode and switch to D3 power state.
          */
         if (pAC->GIni.GIYukonLite &&
-               pAC->GIni.GIChipRev == CHIP_REV_YU_LITE_A3) {
+               pAC->GIni.GIChipRev >= CHIP_REV_YU_LITE_A3) {
  
                 /* for all ports switch PHY to coma mode */
                 for (i = 0; i < pAC->GIni.GIMacsFound; i++) {
diff --git a/drivers/net/sk98lin/skxmac2.c b/drivers/net/sk98lin/skxmac2.c

index 94a09deecb3228ba4072872505e0af57490485b9..42d2d963150abb8b22ed99293aaeccbb93c58db6 100644 (file)
--- a/drivers/net/sk98lin/skxmac2.c
+++ b/drivers/net/sk98lin/skxmac2.c
@@ -1065,7 +1065,7 @@ int               Port)   /* Port Index (MAC_1 + n) */
         
         /* WA code for COMA mode */
         if (pAC->GIni.GIYukonLite &&
-               pAC->GIni.GIChipRev == CHIP_REV_YU_LITE_A3) {
+               pAC->GIni.GIChipRev >= CHIP_REV_YU_LITE_A3) {
                 
                 SK_IN32(IoC, B2_GP_IO, &DWord);
  
@@ -1110,7 +1110,7 @@ int               Port)   /* Port Index (MAC_1 + n) */
  
         /* WA code for COMA mode */
         if (pAC->GIni.GIYukonLite &&
-               pAC->GIni.GIChipRev == CHIP_REV_YU_LITE_A3) {
+               pAC->GIni.GIChipRev >= CHIP_REV_YU_LITE_A3) {
                 
                 SK_IN32(IoC, B2_GP_IO, &DWord);
  
@@ -2126,7 +2126,7 @@ SK_U8     Mode)           /* low power mode */
         int             Ret = 0;
  
         if (pAC->GIni.GIYukonLite &&
-           pAC->GIni.GIChipRev == CHIP_REV_YU_LITE_A3) {
+           pAC->GIni.GIChipRev >= CHIP_REV_YU_LITE_A3) {
  
                 /* save current power mode */
                 LastMode = pAC->GIni.GP[Port].PPhyPowerState;
@@ -2253,7 +2253,7 @@ int               Port)           /* Port Index (e.g. MAC_1) */
         int             Ret = 0;
  
         if (pAC->GIni.GIYukonLite &&
-               pAC->GIni.GIChipRev == CHIP_REV_YU_LITE_A3) {
+               pAC->GIni.GIChipRev >= CHIP_REV_YU_LITE_A3) {
  
                 /* save current power mode */
                 LastMode = pAC->GIni.GP[Port].PPhyPowerState;
diff --git a/drivers/net/skge.c b/drivers/net/skge.c

index 5cacc7ad9e79a9f8505b7dbb34a59604293b4601..d7c98515fdfdd55d91cdd3fb54b1ca252ce2ec09 100644 (file)
--- a/drivers/net/skge.c
+++ b/drivers/net/skge.c
@@ -42,7 +42,7 @@
  #include "skge.h"
  
  #define DRV_NAME               "skge"
-#define DRV_VERSION            "0.7"
+#define DRV_VERSION            "0.9"
  #define PFX                    DRV_NAME " "
  
  #define DEFAULT_TX_RING_SIZE   128
@@ -55,7 +55,7 @@
  #define ETH_JUMBO_MTU          9000
  #define TX_WATCHDOG            (5 * HZ)
  #define NAPI_WEIGHT            64
-#define BLINK_HZ               (HZ/4)
+#define BLINK_MS               250
  
  MODULE_DESCRIPTION("SysKonnect Gigabit Ethernet driver");
  MODULE_AUTHOR("Stephen Hemminger <shemminger@osdl.org>");
@@ -75,13 +75,12 @@ static const struct pci_device_id skge_id_table[] = {
         { PCI_DEVICE(PCI_VENDOR_ID_3COM, PCI_DEVICE_ID_3COM_3C940B) },
         { PCI_DEVICE(PCI_VENDOR_ID_SYSKONNECT, PCI_DEVICE_ID_SYSKONNECT_GE) },
         { PCI_DEVICE(PCI_VENDOR_ID_SYSKONNECT, PCI_DEVICE_ID_SYSKONNECT_YU) },
-       { PCI_DEVICE(PCI_VENDOR_ID_SYSKONNECT, 0x9E00) }, /* SK-9Exx  */
         { PCI_DEVICE(PCI_VENDOR_ID_DLINK, PCI_DEVICE_ID_DLINK_DGE510T), },
         { PCI_DEVICE(PCI_VENDOR_ID_MARVELL, 0x4320) },
         { PCI_DEVICE(PCI_VENDOR_ID_MARVELL, 0x5005) }, /* Belkin */
         { PCI_DEVICE(PCI_VENDOR_ID_CNET, PCI_DEVICE_ID_CNET_GIGACARD) },
-       { PCI_DEVICE(PCI_VENDOR_ID_LINKSYS, PCI_DEVICE_ID_LINKSYS_EG1032) },
         { PCI_DEVICE(PCI_VENDOR_ID_LINKSYS, PCI_DEVICE_ID_LINKSYS_EG1064) },
+       { PCI_VENDOR_ID_LINKSYS, 0x1032, PCI_ANY_ID, 0x0015, },
         { 0 }
  };
  MODULE_DEVICE_TABLE(pci, skge_id_table);
@@ -190,7 +189,7 @@ static u32 skge_supported_modes(const struct skge_hw *hw)
  {
         u32 supported;
  
-       if (iscopper(hw)) {
+       if (hw->copper) {
                 supported = SUPPORTED_10baseT_Half
                         | SUPPORTED_10baseT_Full
                         | SUPPORTED_100baseT_Half
@@ -223,7 +222,7 @@ static int skge_get_settings(struct net_device *dev,
         ecmd->transceiver = XCVR_INTERNAL;
         ecmd->supported = skge_supported_modes(hw);
  
-       if (iscopper(hw)) {
+       if (hw->copper) {
                 ecmd->port = PORT_TP;
                 ecmd->phy_address = hw->phy_addr;
         } else
@@ -249,7 +248,7 @@ static int skge_set_settings(struct net_device *dev, struct ethtool_cmd *ecmd)
         } else {
                 u32 setting;
  
-               switch(ecmd->speed) {
+               switch (ecmd->speed) {
                 case SPEED_1000:
                         if (ecmd->duplex == DUPLEX_FULL)
                                 setting = SUPPORTED_1000baseT_Full;
@@ -620,84 +619,98 @@ static int skge_set_coalesce(struct net_device *dev,
         return 0;
  }
  
-static void skge_led_on(struct skge_hw *hw, int port)
+enum led_mode { LED_MODE_OFF, LED_MODE_ON, LED_MODE_TST };
+static void skge_led(struct skge_port *skge, enum led_mode mode)
  {
+       struct skge_hw *hw = skge->hw;
+       int port = skge->port;
+
+       spin_lock_bh(&hw->phy_lock);
         if (hw->chip_id == CHIP_ID_GENESIS) {
-               skge_write8(hw, SK_REG(port, LNK_LED_REG), LINKLED_ON);
-               skge_write8(hw, B0_LED, LED_STAT_ON);
+               switch (mode) {
+               case LED_MODE_OFF:
+                       xm_phy_write(hw, port, PHY_BCOM_P_EXT_CTRL, PHY_B_PEC_LED_OFF);
+                       skge_write8(hw, SK_REG(port, LNK_LED_REG), LINKLED_OFF);
+                       skge_write32(hw, SK_REG(port, RX_LED_VAL), 0);
+                       skge_write8(hw, SK_REG(port, RX_LED_CTRL), LED_T_OFF);
+                       break;
  
-               skge_write8(hw, SK_REG(port, RX_LED_TST), LED_T_ON);
-               skge_write32(hw, SK_REG(port, RX_LED_VAL), 100);
-               skge_write8(hw, SK_REG(port, RX_LED_CTRL), LED_START);
+               case LED_MODE_ON:
+                       skge_write8(hw, SK_REG(port, LNK_LED_REG), LINKLED_ON);
+                       skge_write8(hw, SK_REG(port, LNK_LED_REG), LINKLED_LINKSYNC_ON);
  
-               /* For Broadcom Phy only */
-               xm_phy_write(hw, port, PHY_BCOM_P_EXT_CTRL, PHY_B_PEC_LED_ON);
-       } else {
-               gm_phy_write(hw, port, PHY_MARV_LED_CTRL, 0);
-               gm_phy_write(hw, port, PHY_MARV_LED_OVER,
-                                 PHY_M_LED_MO_DUP(MO_LED_ON)  |
-                                 PHY_M_LED_MO_10(MO_LED_ON)   |
-                                 PHY_M_LED_MO_100(MO_LED_ON)  |
-                                 PHY_M_LED_MO_1000(MO_LED_ON) |
-                                 PHY_M_LED_MO_RX(MO_LED_ON));
-       }
-}
+                       skge_write8(hw, SK_REG(port, RX_LED_CTRL), LED_START);
+                       skge_write8(hw, SK_REG(port, TX_LED_CTRL), LED_START);
  
-static void skge_led_off(struct skge_hw *hw, int port)
-{
-       if (hw->chip_id == CHIP_ID_GENESIS) {
-               skge_write8(hw, SK_REG(port, LNK_LED_REG), LINKLED_OFF);
-               skge_write8(hw, B0_LED, LED_STAT_OFF);
+                       break;
  
-               skge_write32(hw, SK_REG(port, RX_LED_VAL), 0);
-               skge_write8(hw, SK_REG(port, RX_LED_CTRL), LED_T_OFF);
+               case LED_MODE_TST:
+                       skge_write8(hw, SK_REG(port, RX_LED_TST), LED_T_ON);
+                       skge_write32(hw, SK_REG(port, RX_LED_VAL), 100);
+                       skge_write8(hw, SK_REG(port, RX_LED_CTRL), LED_START);
  
-               /* Broadcom only */
-               xm_phy_write(hw, port, PHY_BCOM_P_EXT_CTRL, PHY_B_PEC_LED_OFF);
+                       xm_phy_write(hw, port, PHY_BCOM_P_EXT_CTRL, PHY_B_PEC_LED_ON);
+                       break;
+               }
         } else {
-               gm_phy_write(hw, port, PHY_MARV_LED_CTRL, 0);
-               gm_phy_write(hw, port, PHY_MARV_LED_OVER,
-                                 PHY_M_LED_MO_DUP(MO_LED_OFF)  |
-                                 PHY_M_LED_MO_10(MO_LED_OFF)   |
-                                 PHY_M_LED_MO_100(MO_LED_OFF)  |
-                                 PHY_M_LED_MO_1000(MO_LED_OFF) |
-                                 PHY_M_LED_MO_RX(MO_LED_OFF));
+               switch (mode) {
+               case LED_MODE_OFF:
+                       gm_phy_write(hw, port, PHY_MARV_LED_CTRL, 0);
+                       gm_phy_write(hw, port, PHY_MARV_LED_OVER,
+                                    PHY_M_LED_MO_DUP(MO_LED_OFF)  |
+                                    PHY_M_LED_MO_10(MO_LED_OFF)   |
+                                    PHY_M_LED_MO_100(MO_LED_OFF)  |
+                                    PHY_M_LED_MO_1000(MO_LED_OFF) |
+                                    PHY_M_LED_MO_RX(MO_LED_OFF));
+                       break;
+               case LED_MODE_ON:
+                       gm_phy_write(hw, port, PHY_MARV_LED_CTRL,
+                                    PHY_M_LED_PULS_DUR(PULS_170MS) |
+                                    PHY_M_LED_BLINK_RT(BLINK_84MS) |
+                                    PHY_M_LEDC_TX_CTRL |
+                                    PHY_M_LEDC_DP_CTRL);
+               
+                       gm_phy_write(hw, port, PHY_MARV_LED_OVER,
+                                    PHY_M_LED_MO_RX(MO_LED_OFF) |
+                                    (skge->speed == SPEED_100 ?
+                                     PHY_M_LED_MO_100(MO_LED_ON) : 0));
+                       break;
+               case LED_MODE_TST:
+                       gm_phy_write(hw, port, PHY_MARV_LED_CTRL, 0);
+                       gm_phy_write(hw, port, PHY_MARV_LED_OVER,
+                                    PHY_M_LED_MO_DUP(MO_LED_ON)  |
+                                    PHY_M_LED_MO_10(MO_LED_ON)   |
+                                    PHY_M_LED_MO_100(MO_LED_ON)  |
+                                    PHY_M_LED_MO_1000(MO_LED_ON) |
+                                    PHY_M_LED_MO_RX(MO_LED_ON));
+               }
         }
-}
-
-static void skge_blink_timer(unsigned long data)
-{
-       struct skge_port *skge = (struct skge_port *) data;
-       struct skge_hw *hw = skge->hw;
-       unsigned long flags;
-
-       spin_lock_irqsave(&hw->phy_lock, flags);
-       if (skge->blink_on)
-               skge_led_on(hw, skge->port);
-       else
-               skge_led_off(hw, skge->port);
-       spin_unlock_irqrestore(&hw->phy_lock, flags);
-
-       skge->blink_on = !skge->blink_on;
-       mod_timer(&skge->led_blink, jiffies + BLINK_HZ);
+       spin_unlock_bh(&hw->phy_lock);
  }
  
  /* blink LED's for finding board */
  static int skge_phys_id(struct net_device *dev, u32 data)
  {
         struct skge_port *skge = netdev_priv(dev);
+       unsigned long ms;
+       enum led_mode mode = LED_MODE_TST;
  
         if (!data || data > (u32)(MAX_SCHEDULE_TIMEOUT / HZ))
-               data = (u32)(MAX_SCHEDULE_TIMEOUT / HZ);
+               ms = jiffies_to_msecs(MAX_SCHEDULE_TIMEOUT / HZ) * 1000;
+       else
+               ms = data * 1000;
  
-       /* start blinking */
-       skge->blink_on = 1;
-       mod_timer(&skge->led_blink, jiffies+1);
+       while (ms > 0) {
+               skge_led(skge, mode);
+               mode ^= LED_MODE_TST;
  
-       msleep_interruptible(data * 1000);
-       del_timer_sync(&skge->led_blink);
+               if (msleep_interruptible(BLINK_MS))
+                       break;
+               ms -= BLINK_MS;
+       }
  
-       skge_led_off(skge->hw, skge->port);
+       /* back to regular LED state */
+       skge_led(skge, netif_running(dev) ? LED_MODE_ON : LED_MODE_OFF);
  
         return 0;
  }
@@ -863,6 +876,9 @@ static int skge_rx_fill(struct skge_port *skge)
  
  static void skge_link_up(struct skge_port *skge)
  {
+       skge_write8(skge->hw, SK_REG(skge->port, LNK_LED_REG), 
+                   LED_BLK_OFF|LED_SYNC_OFF|LED_ON);
+
         netif_carrier_on(skge->netdev);
         if (skge->tx_avail > MAX_SKB_FRAGS + 1)
                 netif_wake_queue(skge->netdev);
@@ -881,6 +897,7 @@ static void skge_link_up(struct skge_port *skge)
  
  static void skge_link_down(struct skge_port *skge)
  {
+       skge_write8(skge->hw, SK_REG(skge->port, LNK_LED_REG), LED_OFF);
         netif_carrier_off(skge->netdev);
         netif_stop_queue(skge->netdev);
  
@@ -1028,7 +1045,7 @@ static void bcom_check_link(struct skge_hw *hw, int port)
                         }
  
                         /* Check Duplex mismatch */
-                       switch(aux & PHY_B_AS_AN_RES_MSK) {
+                       switch (aux & PHY_B_AS_AN_RES_MSK) {
                         case PHY_B_RES_1000FD:
                                 skge->duplex = DUPLEX_FULL;
                                 break;
@@ -1099,7 +1116,7 @@ static void bcom_phy_init(struct skge_port *skge, int jumbo)
         r |=  XM_MMU_NO_PRE;
         xm_write16(hw, port, XM_MMU_CMD,r);
  
-       switch(id1) {
+       switch (id1) {
         case PHY_BCOM_ID1_C0:
                 /*
                  * Workaround BCOM Errata for the C0 type.
@@ -1194,13 +1211,6 @@ static void genesis_mac_init(struct skge_hw *hw, int port)
         xm_write16(hw, port, XM_STAT_CMD,
                         XM_SC_CLR_RXC | XM_SC_CLR_TXC);
  
-       /* initialize Rx, Tx and Link LED */
-       skge_write8(hw, SK_REG(port, LNK_LED_REG), LINKLED_ON);
-       skge_write8(hw, SK_REG(port, LNK_LED_REG), LINKLED_LINKSYNC_ON);
-
-       skge_write8(hw, SK_REG(port, RX_LED_CTRL), LED_START);
-       skge_write8(hw, SK_REG(port, TX_LED_CTRL), LED_START);
-
         /* Unreset the XMAC. */
         skge_write16(hw, SK_REG(port, TX_MFF_CTRL1), MFF_CLR_MAC_RST);
  
@@ -1209,7 +1219,6 @@ static void genesis_mac_init(struct skge_hw *hw, int port)
          * namely for the 1000baseTX cards that use the XMAC's
          * GMII mode.
          */
-       spin_lock_bh(&hw->phy_lock);
         /* Take external Phy out of reset */
         r = skge_read32(hw, B2_GP_IO);
         if (port == 0)
@@ -1219,7 +1228,6 @@ static void genesis_mac_init(struct skge_hw *hw, int port)
  
         skge_write32(hw, B2_GP_IO, r);
         skge_read32(hw, B2_GP_IO);
-       spin_unlock_bh(&hw->phy_lock);
  
         /* Enable GMII interfac */
         xm_write16(hw, port, XM_HW_CFG, XM_HW_GMII_MD);
@@ -1569,7 +1577,6 @@ static void yukon_init(struct skge_hw *hw, int port)
  {
         struct skge_port *skge = netdev_priv(hw->dev[port]);
         u16 ctrl, ct1000, adv;
-       u16 ledctrl, ledover;
  
         pr_debug("yukon_init\n");
         if (skge->autoneg == AUTONEG_ENABLE) {
@@ -1596,7 +1603,7 @@ static void yukon_init(struct skge_hw *hw, int port)
         adv = PHY_AN_CSMA;
  
         if (skge->autoneg == AUTONEG_ENABLE) {
-               if (iscopper(hw)) {
+               if (hw->copper) {
                         if (skge->advertising & ADVERTISED_1000baseT_Full)
                                 ct1000 |= PHY_M_1000C_AFD;
                         if (skge->advertising & ADVERTISED_1000baseT_Half)
@@ -1641,32 +1648,11 @@ static void yukon_init(struct skge_hw *hw, int port)
         gm_phy_write(hw, port, PHY_MARV_AUNE_ADV, adv);
         gm_phy_write(hw, port, PHY_MARV_CTRL, ctrl);
  
-       /* Setup Phy LED's */
-       ledctrl = PHY_M_LED_PULS_DUR(PULS_170MS);
-       ledover = 0;
-
-       ledctrl |= PHY_M_LED_BLINK_RT(BLINK_84MS) | PHY_M_LEDC_TX_CTRL;
-
-       /* turn off the Rx LED (LED_RX) */
-       ledover |= PHY_M_LED_MO_RX(MO_LED_OFF);
-
-       /* disable blink mode (LED_DUPLEX) on collisions */
-       ctrl |= PHY_M_LEDC_DP_CTRL;
-       gm_phy_write(hw, port, PHY_MARV_LED_CTRL, ledctrl);
-
-       if (skge->autoneg == AUTONEG_DISABLE || skge->speed == SPEED_100) {
-               /* turn on 100 Mbps LED (LED_LINK100) */
-               ledover |= PHY_M_LED_MO_100(MO_LED_ON);
-       }
-
-       if (ledover)
-               gm_phy_write(hw, port, PHY_MARV_LED_OVER, ledover);
-
         /* Enable phy interrupt on autonegotiation complete (or link up) */
         if (skge->autoneg == AUTONEG_ENABLE)
-               gm_phy_write(hw, port, PHY_MARV_INT_MASK, PHY_M_IS_AN_COMPL);
+               gm_phy_write(hw, port, PHY_MARV_INT_MASK, PHY_M_IS_AN_MSK);
         else
-               gm_phy_write(hw, port, PHY_MARV_INT_MASK, PHY_M_DEF_MSK);
+               gm_phy_write(hw, port, PHY_MARV_INT_MASK, PHY_M_IS_DEF_MSK);
  }
  
  static void yukon_reset(struct skge_hw *hw, int port)
@@ -1691,7 +1677,7 @@ static void yukon_mac_init(struct skge_hw *hw, int port)
  
         /* WA code for COMA mode -- set PHY reset */
         if (hw->chip_id == CHIP_ID_YUKON_LITE &&
-           hw->chip_rev == CHIP_REV_YU_LITE_A3)
+           hw->chip_rev >= CHIP_REV_YU_LITE_A3)
                 skge_write32(hw, B2_GP_IO,
                              (skge_read32(hw, B2_GP_IO) | GP_DIR_9 | GP_IO_9));
  
@@ -1701,7 +1687,7 @@ static void yukon_mac_init(struct skge_hw *hw, int port)
  
         /* WA code for COMA mode -- clear PHY reset */
         if (hw->chip_id == CHIP_ID_YUKON_LITE &&
-           hw->chip_rev == CHIP_REV_YU_LITE_A3)
+           hw->chip_rev >= CHIP_REV_YU_LITE_A3)
                 skge_write32(hw, B2_GP_IO,
                              (skge_read32(hw, B2_GP_IO) | GP_DIR_9)
                              & ~GP_IO_9);
@@ -1709,7 +1695,7 @@ static void yukon_mac_init(struct skge_hw *hw, int port)
         /* Set hardware config mode */
         reg = GPC_INT_POL_HI | GPC_DIS_FC | GPC_DIS_SLEEP |
                 GPC_ENA_XC | GPC_ANEG_ADV_ALL_M | GPC_ENA_PAUSE;
-       reg |= iscopper(hw) ? GPC_HWCFG_GMII_COP : GPC_HWCFG_GMII_FIB;
+       reg |= hw->copper ? GPC_HWCFG_GMII_COP : GPC_HWCFG_GMII_FIB;
  
         /* Clear GMC reset */
         skge_write32(hw, SK_REG(port, GPHY_CTRL), reg | GPC_RST_SET);
@@ -1745,9 +1731,7 @@ static void yukon_mac_init(struct skge_hw *hw, int port)
         gma_write16(hw, port, GM_GP_CTRL, reg);
         skge_read16(hw, GMAC_IRQ_SRC);
  
-       spin_lock_bh(&hw->phy_lock);
         yukon_init(hw, port);
-       spin_unlock_bh(&hw->phy_lock);
  
         /* MIB clear */
         reg = gma_read16(hw, port, GM_PHY_ADDR);
@@ -1796,11 +1780,16 @@ static void yukon_mac_init(struct skge_hw *hw, int port)
         skge_write16(hw, SK_REG(port, RX_GMF_FL_MSK), RX_FF_FL_DEF_MSK);
         reg = GMF_OPER_ON | GMF_RX_F_FL_ON;
         if (hw->chip_id == CHIP_ID_YUKON_LITE &&
-           hw->chip_rev == CHIP_REV_YU_LITE_A3)
+           hw->chip_rev >= CHIP_REV_YU_LITE_A3)
                 reg &= ~GMF_RX_F_FL_ON;
         skge_write8(hw, SK_REG(port, RX_GMF_CTRL_T), GMF_RST_CLR);
         skge_write16(hw, SK_REG(port, RX_GMF_CTRL_T), reg);
-       skge_write16(hw, SK_REG(port, RX_GMF_FL_THR), RX_GMF_FL_THR_DEF);
+       /*
+        * because Pause Packet Truncation in GMAC is not working
+        * we have to increase the Flush Threshold to 64 bytes
+        * in order to flush pause packets in Rx FIFO on Yukon-1
+        */
+       skge_write16(hw, SK_REG(port, RX_GMF_FL_THR), RX_GMF_FL_THR_DEF+1);
  
         /* Configure Tx MAC FIFO */
         skge_write8(hw, SK_REG(port, TX_GMF_CTRL_T), GMF_RST_CLR);
@@ -1813,19 +1802,19 @@ static void yukon_stop(struct skge_port *skge)
         int port = skge->port;
  
         if (hw->chip_id == CHIP_ID_YUKON_LITE &&
-           hw->chip_rev == CHIP_REV_YU_LITE_A3) {
+           hw->chip_rev >= CHIP_REV_YU_LITE_A3) {
                 skge_write32(hw, B2_GP_IO,
                              skge_read32(hw, B2_GP_IO) | GP_DIR_9 | GP_IO_9);
         }
  
         gma_write16(hw, port, GM_GP_CTRL,
                          gma_read16(hw, port, GM_GP_CTRL)
-                        & ~(GM_GPCR_RX_ENA|GM_GPCR_RX_ENA));
+                        & ~(GM_GPCR_TX_ENA|GM_GPCR_RX_ENA));
         gma_read16(hw, port, GM_GP_CTRL);
  
         /* set GPHY Control reset */
-       gma_write32(hw, port, GPHY_CTRL, GPC_RST_SET);
-       gma_write32(hw, port, GMAC_CTRL, GMC_RST_SET);
+       skge_write32(hw, SK_REG(port, GPHY_CTRL), GPC_RST_SET);
+       skge_write32(hw, SK_REG(port, GMAC_CTRL), GMC_RST_SET);
  }
  
  static void yukon_get_stats(struct skge_port *skge, u64 *data)
@@ -1856,11 +1845,12 @@ static void yukon_mac_intr(struct skge_hw *hw, int port)
  
         if (status & GM_IS_RX_FF_OR) {
                 ++skge->net_stats.rx_fifo_errors;
-               gma_write8(hw, port, RX_GMF_CTRL_T, GMF_CLI_RX_FO);
+               skge_write8(hw, SK_REG(port, RX_GMF_CTRL_T), GMF_CLI_RX_FO);
         }
+
         if (status & GM_IS_TX_FF_UR) {
                 ++skge->net_stats.tx_fifo_errors;
-               gma_write8(hw, port, TX_GMF_CTRL_T, GMF_CLI_TX_FU);
+               skge_write8(hw, SK_REG(port, TX_GMF_CTRL_T), GMF_CLI_TX_FU);
         }
  
  }
@@ -1896,7 +1886,7 @@ static void yukon_link_up(struct skge_port *skge)
         reg |= GM_GPCR_RX_ENA | GM_GPCR_TX_ENA;
         gma_write16(hw, port, GM_GP_CTRL, reg);
  
-       gm_phy_write(hw, port, PHY_MARV_INT_MASK, PHY_M_DEF_MSK);
+       gm_phy_write(hw, port, PHY_MARV_INT_MASK, PHY_M_IS_DEF_MSK);
         skge_link_up(skge);
  }
  
@@ -1904,12 +1894,14 @@ static void yukon_link_down(struct skge_port *skge)
  {
         struct skge_hw *hw = skge->hw;
         int port = skge->port;
+       u16 ctrl;
  
         pr_debug("yukon_link_down\n");
         gm_phy_write(hw, port, PHY_MARV_INT_MASK, 0);
-       gm_phy_write(hw, port, GM_GP_CTRL,
-                         gm_phy_read(hw, port, GM_GP_CTRL)
-                         & ~(GM_GPCR_RX_ENA | GM_GPCR_TX_ENA));
+
+       ctrl = gma_read16(hw, port, GM_GP_CTRL);
+       ctrl &= ~(GM_GPCR_RX_ENA | GM_GPCR_TX_ENA);
+       gma_write16(hw, port, GM_GP_CTRL, ctrl);
  
         if (skge->flow_control == FLOW_MODE_REM_SEND) {
                 /* restore Asymmetric Pause bit */
@@ -2097,10 +2089,12 @@ static int skge_up(struct net_device *dev)
         skge_write32(hw, B0_IMSK, hw->intr_mask);
  
         /* Initialze MAC */
+       spin_lock_bh(&hw->phy_lock);
         if (hw->chip_id == CHIP_ID_GENESIS)
                 genesis_mac_init(hw, port);
         else
                 yukon_mac_init(hw, port);
+       spin_unlock_bh(&hw->phy_lock);
  
         /* Configure RAMbuffers */
         chunk = hw->ram_size / ((hw->ports + 1)*2);
@@ -2116,6 +2110,7 @@ static int skge_up(struct net_device *dev)
         /* Start receiver BMU */
         wmb();
         skge_write8(hw, Q_ADDR(rxqaddr[port], Q_CSR), CSR_START | CSR_IRQ_CL_F);
+       skge_led(skge, LED_MODE_ON);
  
         pr_debug("skge_up completed\n");
         return 0;
@@ -2140,8 +2135,6 @@ static int skge_down(struct net_device *dev)
  
         netif_stop_queue(dev);
  
-       del_timer_sync(&skge->led_blink);
-
         /* Stop transmitter */
         skge_write8(hw, Q_ADDR(txqaddr[port], Q_CSR), CSR_STOP);
         skge_write32(hw, RB_ADDR(txqaddr[port], RB_CTRL),
@@ -2175,15 +2168,12 @@ static int skge_down(struct net_device *dev)
         if (hw->chip_id == CHIP_ID_GENESIS) {
                 skge_write8(hw, SK_REG(port, TX_MFF_CTRL2), MFF_RST_SET);
                 skge_write8(hw, SK_REG(port, RX_MFF_CTRL2), MFF_RST_SET);
-               skge_write8(hw, SK_REG(port, TX_LED_CTRL), LED_STOP);
-               skge_write8(hw, SK_REG(port, RX_LED_CTRL), LED_STOP);
         } else {
                 skge_write8(hw, SK_REG(port, RX_GMF_CTRL_T), GMF_RST_SET);
                 skge_write8(hw, SK_REG(port, TX_GMF_CTRL_T), GMF_RST_SET);
         }
  
-       /* turn off led's */
-       skge_write16(hw, B0_LED, LED_STAT_OFF);
+       skge_led(skge, LED_MODE_OFF);
  
         skge_tx_clean(skge);
         skge_rx_clean(skge);
@@ -2633,11 +2623,17 @@ static inline void skge_tx_intr(struct net_device *dev)
         spin_unlock(&skge->tx_lock);
  }
  
+/* Parity errors seem to happen when Genesis is connected to a switch
+ * with no other ports present. Heartbeat error??
+ */
  static void skge_mac_parity(struct skge_hw *hw, int port)
  {
-       printk(KERN_ERR PFX "%s: mac data parity error\n",
-              hw->dev[port] ? hw->dev[port]->name
-              : (port == 0 ? "(port A)": "(port B"));
+       struct net_device *dev = hw->dev[port];
+
+       if (dev) {
+               struct skge_port *skge = netdev_priv(dev);
+               ++skge->net_stats.tx_heartbeat_errors;
+       }
  
         if (hw->chip_id == CHIP_ID_GENESIS)
                 skge_write16(hw, SK_REG(port, TX_MFF_CTRL1),
@@ -2683,18 +2679,6 @@ static void skge_error_irq(struct skge_hw *hw)
                 /* Timestamp (unused) overflow */
                 if (hwstatus & IS_IRQ_TIST_OV)
                         skge_write8(hw, GMAC_TI_ST_CTRL, GMT_ST_CLR_IRQ);
-
-               if (hwstatus & IS_IRQ_SENSOR) {
-                       /* no sensors on 32-bit Yukon */
-                       if (!(skge_read16(hw, B0_CTST) & CS_BUS_SLOT_SZ)) {
-                               printk(KERN_ERR PFX "ignoring bogus sensor interrups\n");
-                               skge_write32(hw, B0_HWE_IMSK,
-                                            IS_ERR_MSK & ~IS_IRQ_SENSOR);
-                       } else
-                               printk(KERN_WARNING PFX "sensor interrupt\n");
-               }
-
-
         }
  
         if (hwstatus & IS_RAM_RD_PAR) {
@@ -2725,9 +2709,10 @@ static void skge_error_irq(struct skge_hw *hw)
  
                 skge_pci_clear(hw);
  
+               /* if error still set then just ignore it */
                 hwstatus = skge_read32(hw, B0_HWE_ISRC);
                 if (hwstatus & IS_IRQ_STAT) {
-                       printk(KERN_WARNING PFX "IRQ status %x: still set ignoring hardware errors\n",
+                       pr_debug("IRQ status %x: still set ignoring hardware errors\n",
                                hwstatus);
                         hw->intr_mask &= ~IS_HW_ERR;
                 }
@@ -2889,7 +2874,7 @@ static const char *skge_board_name(const struct skge_hw *hw)
  static int skge_reset(struct skge_hw *hw)
  {
         u16 ctst;
-       u8 t8, mac_cfg;
+       u8 t8, mac_cfg, pmd_type, phy_type;
         int i;
  
         ctst = skge_read16(hw, B0_CTST);
@@ -2908,18 +2893,19 @@ static int skge_reset(struct skge_hw *hw)
                      ctst & (CS_CLK_RUN_HOT|CS_CLK_RUN_RST|CS_CLK_RUN_ENA));
  
         hw->chip_id = skge_read8(hw, B2_CHIP_ID);
-       hw->phy_type = skge_read8(hw, B2_E_1) & 0xf;
-       hw->pmd_type = skge_read8(hw, B2_PMD_TYP);
+       phy_type = skge_read8(hw, B2_E_1) & 0xf;
+       pmd_type = skge_read8(hw, B2_PMD_TYP);
+       hw->copper = (pmd_type == 'T' || pmd_type == '1');
  
         switch (hw->chip_id) {
         case CHIP_ID_GENESIS:
-               switch (hw->phy_type) {
+               switch (phy_type) {
                 case SK_PHY_BCOM:
                         hw->phy_addr = PHY_ADDR_BCOM;
                         break;
                 default:
                         printk(KERN_ERR PFX "%s: unsupported phy type 0x%x\n",
-                              pci_name(hw->pdev), hw->phy_type);
+                              pci_name(hw->pdev), phy_type);
                         return -EOPNOTSUPP;
                 }
                 break;
@@ -2927,13 +2913,10 @@ static int skge_reset(struct skge_hw *hw)
         case CHIP_ID_YUKON:
         case CHIP_ID_YUKON_LITE:
         case CHIP_ID_YUKON_LP:
-               if (hw->phy_type < SK_PHY_MARV_COPPER && hw->pmd_type != 'S')
-                       hw->phy_type = SK_PHY_MARV_COPPER;
+               if (phy_type < SK_PHY_MARV_COPPER && pmd_type != 'S')
+                       hw->copper = 1;
  
                 hw->phy_addr = PHY_ADDR_MARV;
-               if (!iscopper(hw))
-                       hw->phy_type = SK_PHY_MARV_FIBER;
-
                 break;
  
         default:
@@ -2961,12 +2944,20 @@ static int skge_reset(struct skge_hw *hw)
         else
                 hw->ram_size = t8 * 4096;
  
+       hw->intr_mask = IS_HW_ERR | IS_EXT_REG;
         if (hw->chip_id == CHIP_ID_GENESIS)
                 genesis_init(hw);
         else {
                 /* switch power to VCC (WA for VAUX problem) */
                 skge_write8(hw, B0_POWER_CTRL,
                             PC_VAUX_ENA | PC_VCC_ENA | PC_VAUX_OFF | PC_VCC_ON);
+               /* avoid boards with stuck Hardware error bits */
+               if ((skge_read32(hw, B0_ISRC) & IS_HW_ERR) &&
+                   (skge_read32(hw, B0_HWE_ISRC) & IS_IRQ_SENSOR)) {
+                       printk(KERN_WARNING PFX "stuck hardware sensor bit\n");
+                       hw->intr_mask &= ~IS_HW_ERR;
+               }
+
                 for (i = 0; i < hw->ports; i++) {
                         skge_write16(hw, SK_REG(i, GMAC_LINK_CTRL), GMLC_RST_SET);
                         skge_write16(hw, SK_REG(i, GMAC_LINK_CTRL), GMLC_RST_CLR);
@@ -3007,7 +2998,6 @@ static int skge_reset(struct skge_hw *hw)
         skge_write32(hw, B2_IRQM_INI, skge_usecs2clk(hw, 100));
         skge_write32(hw, B2_IRQM_CTRL, TIM_START);
  
-       hw->intr_mask = IS_HW_ERR | IS_EXT_REG;
         skge_write32(hw, B0_IMSK, hw->intr_mask);
  
         if (hw->chip_id != CHIP_ID_GENESIS)
@@ -3083,10 +3073,6 @@ static struct net_device *skge_devinit(struct skge_hw *hw, int port,
  
         spin_lock_init(&skge->tx_lock);
  
-       init_timer(&skge->led_blink);
-       skge->led_blink.function = skge_blink_timer;
-       skge->led_blink.data = (unsigned long) skge;
-
         if (hw->chip_id != CHIP_ID_GENESIS) {
                 dev->features |= NETIF_F_IP_CSUM | NETIF_F_SG;
                 skge->rx_csum = 1;
diff --git a/drivers/net/skge.h b/drivers/net/skge.h

index fced3d2bc07276fc7b7aecc749630095b26e0c53..f1680beb8e68a903d22228755bc84526656d4b1e 100644 (file)
--- a/drivers/net/skge.h
+++ b/drivers/net/skge.h
@@ -214,8 +214,6 @@ enum {
  
  /*     B2_IRQM_HWE_MSK 32 bit  IRQ Moderation HW Error Mask */
  enum {
-       IS_ERR_MSK      = 0x00003fff,/*                 All Error bits */
-
         IS_IRQ_TIST_OV  = 1<<13, /* Time Stamp Timer Overflow (YUKON only) */
         IS_IRQ_SENSOR   = 1<<12, /* IRQ from Sensor (YUKON only) */
         IS_IRQ_MST_ERR  = 1<<11, /* IRQ master error detected */
@@ -230,6 +228,12 @@ enum {
         IS_M2_PAR_ERR   = 1<<2, /* MAC 2 Parity Error */
         IS_R1_PAR_ERR   = 1<<1, /* Queue R1 Parity Error */
         IS_R2_PAR_ERR   = 1<<0, /* Queue R2 Parity Error */
+
+       IS_ERR_MSK      = IS_IRQ_MST_ERR | IS_IRQ_STAT
+                       | IS_NO_STAT_M1 | IS_NO_STAT_M2
+                       | IS_RAM_RD_PAR | IS_RAM_WR_PAR
+                       | IS_M1_PAR_ERR | IS_M2_PAR_ERR
+                       | IS_R1_PAR_ERR | IS_R2_PAR_ERR,
  };
  
  /*     B2_TST_CTRL1     8 bit  Test Control Register 1 */
@@ -1449,10 +1453,12 @@ enum {
         PHY_M_IS_DTE_CHANGE     = 1<<2, /* DTE Power Det. Status Changed */
         PHY_M_IS_POL_CHANGE     = 1<<1, /* Polarity Changed */
         PHY_M_IS_JABBER         = 1<<0, /* Jabber */
-};
  
-#define PHY_M_DEF_MSK  ( PHY_M_IS_AN_ERROR | PHY_M_IS_LSP_CHANGE | \
-                         PHY_M_IS_LST_CHANGE | PHY_M_IS_FIFO_ERROR)
+       PHY_M_IS_DEF_MSK        = PHY_M_IS_AN_ERROR | PHY_M_IS_LSP_CHANGE |
+                                 PHY_M_IS_LST_CHANGE | PHY_M_IS_FIFO_ERROR,
+
+       PHY_M_IS_AN_MSK         = PHY_M_IS_AN_ERROR | PHY_M_IS_AN_COMPL,
+};
  
  /*****  PHY_MARV_EXT_CTRL      16 bit r/w      Ext. PHY Specific Ctrl *****/
  enum {
@@ -1509,7 +1515,7 @@ enum {
         PHY_M_LEDC_TX_C_MSB     = 1<<0, /* Tx Control (MSB, 88E1111 only) */
  };
  
-#define PHY_M_LED_PULS_DUR(x)  (       ((x)<<12) & PHY_M_LEDC_PULS_MSK)
+#define PHY_M_LED_PULS_DUR(x)  (((x)<<12) & PHY_M_LEDC_PULS_MSK)
  
  enum {
         PULS_NO_STR     = 0,/* no pulse stretching */
@@ -1522,7 +1528,7 @@ enum {
         PULS_1300MS     = 7,/* 1.3 s to 2.7 s */
  };
  
-#define PHY_M_LED_BLINK_RT(x)  (       ((x)<<8) & PHY_M_LEDC_BL_R_MSK)
+#define PHY_M_LED_BLINK_RT(x)  (((x)<<8) & PHY_M_LEDC_BL_R_MSK)
  
  enum {
         BLINK_42MS      = 0,/* 42 ms */
@@ -1602,9 +1608,9 @@ enum {
         PHY_M_FELP_LED0_MSK = 0xf, /* Bit  3.. 0: LED0 Mask (SPEED) */
  };
  
-#define PHY_M_FELP_LED2_CTRL(x)        (       ((x)<<8) & PHY_M_FELP_LED2_MSK)
-#define PHY_M_FELP_LED1_CTRL(x)        (       ((x)<<4) & PHY_M_FELP_LED1_MSK)
-#define PHY_M_FELP_LED0_CTRL(x)        (       ((x)<<0) & PHY_M_FELP_LED0_MSK)
+#define PHY_M_FELP_LED2_CTRL(x)        (((x)<<8) & PHY_M_FELP_LED2_MSK)
+#define PHY_M_FELP_LED1_CTRL(x)        (((x)<<4) & PHY_M_FELP_LED1_MSK)
+#define PHY_M_FELP_LED0_CTRL(x)        (((x)<<0) & PHY_M_FELP_LED0_MSK)
  
  enum {
         LED_PAR_CTRL_COLX       = 0x00,
@@ -1640,7 +1646,7 @@ enum {
         PHY_M_MAC_MD_COPPER     = 5,/* Copper only */
         PHY_M_MAC_MD_1000BX     = 7,/* 1000Base-X only */
  };
-#define PHY_M_MAC_MODE_SEL(x)  (       ((x)<<7) & PHY_M_MAC_MD_MSK)
+#define PHY_M_MAC_MODE_SEL(x)  (((x)<<7) & PHY_M_MAC_MD_MSK)
  
  /*****  PHY_MARV_PHY_CTRL (page 3)             16 bit r/w      LED Control Reg. *****/
  enum {
@@ -1650,10 +1656,10 @@ enum {
         PHY_M_LEDC_STA0_MSK     = 0xf, /* Bit  3.. 0: STAT0 LED Ctrl. Mask */
  };
  
-#define PHY_M_LEDC_LOS_CTRL(x) (       ((x)<<12) & PHY_M_LEDC_LOS_MSK)
-#define PHY_M_LEDC_INIT_CTRL(x)        (       ((x)<<8) & PHY_M_LEDC_INIT_MSK)
-#define PHY_M_LEDC_STA1_CTRL(x)        (       ((x)<<4) & PHY_M_LEDC_STA1_MSK)
-#define PHY_M_LEDC_STA0_CTRL(x)        (       ((x)<<0) & PHY_M_LEDC_STA0_MSK)
+#define PHY_M_LEDC_LOS_CTRL(x) (((x)<<12) & PHY_M_LEDC_LOS_MSK)
+#define PHY_M_LEDC_INIT_CTRL(x)        (((x)<<8) & PHY_M_LEDC_INIT_MSK)
+#define PHY_M_LEDC_STA1_CTRL(x)        (((x)<<4) & PHY_M_LEDC_STA1_MSK)
+#define PHY_M_LEDC_STA0_CTRL(x)        (((x)<<0) & PHY_M_LEDC_STA0_MSK)
  
  /* GMAC registers  */
  /* Port Registers */
@@ -2454,24 +2460,17 @@ struct skge_hw {
  
         u8                   chip_id;
         u8                   chip_rev;
-       u8                   phy_type;
-       u8                   pmd_type;
-       u16                  phy_addr;
+       u8                   copper;
         u8                   ports;
  
         u32                  ram_size;
         u32                  ram_offset;
+       u16                  phy_addr;
  
         struct tasklet_struct ext_tasklet;
         spinlock_t           phy_lock;
  };
  
-
-static inline int iscopper(const struct skge_hw *hw)
-{
-       return (hw->pmd_type == 'T');
-}
-
  enum {
         FLOW_MODE_NONE          = 0, /* No Flow-Control */
         FLOW_MODE_LOC_SEND      = 1, /* Local station sends PAUSE */
@@ -2505,8 +2504,6 @@ struct skge_port {
         dma_addr_t           dma;
         unsigned long        mem_size;
         unsigned int         rx_buf_size;
-
-       struct timer_list    led_blink;
  };
  
  
@@ -2606,17 +2603,6 @@ static inline void gma_write16(const struct skge_hw *hw, int port, int r, u16 v)
         skge_write16(hw, SK_GMAC_REG(port,r), v);
  }
  
-static inline void gma_write32(const struct skge_hw *hw, int port, int r, u32 v)
-{
-       skge_write16(hw, SK_GMAC_REG(port, r), (u16) v);
-       skge_write32(hw, SK_GMAC_REG(port, r+4), (u16)(v >> 16));
-}
-
-static inline void gma_write8(const struct skge_hw *hw, int port, int r, u8 v)
-{
-       skge_write8(hw, SK_GMAC_REG(port,r), v);
-}
-
  static inline void gma_set_addr(struct skge_hw *hw, int port, int reg,
                                     const u8 *addr)
  {
diff --git a/drivers/net/smc-ultra.c b/drivers/net/smc-ultra.c

index 6d9dae60a697050c1d7d1e8f4665420f0ac65074..ba8593ac3f8ab8d690eac367207298bb662099ca 100644 (file)
--- a/drivers/net/smc-ultra.c
+++ b/drivers/net/smc-ultra.c
@@ -68,6 +68,7 @@ static const char version[] =
  #include <linux/etherdevice.h>
  
  #include <asm/io.h>
+#include <asm/irq.h>
  #include <asm/system.h>
  
  #include "8390.h"
diff --git a/drivers/net/smc91x.h b/drivers/net/smc91x.h

index 7089d86e857a6d0dbf62ad48742265fea34aa8f5..a9b06b8d8e3ff185ffbd9cb90ff00e1ee0528fc3 100644 (file)
--- a/drivers/net/smc91x.h
+++ b/drivers/net/smc91x.h
@@ -188,7 +188,7 @@ SMC_outw(u16 val, void __iomem *ioaddr, int reg)
  #define        SMC_IRQ_TRIGGER_TYPE (( \
                    machine_is_omap_h2() \
                 || machine_is_omap_h3() \
-               || (machine_is_omap_innovator() && !cpu_is_omap150()) \
+               || (machine_is_omap_innovator() && !cpu_is_omap1510()) \
         ) ? IRQT_FALLING : IRQT_RISING)
  
  
diff --git a/drivers/net/sonic.c b/drivers/net/sonic.c

index cdc9cc873e067452b75f95b0806863770c887b25..90b818a8de6e41ca6d7f0863525e87c057ce6cfb 100644 (file)
--- a/drivers/net/sonic.c
+++ b/drivers/net/sonic.c
@@ -1,6 +1,11 @@
  /*
   * sonic.c
   *
+ * (C) 2005 Finn Thain
+ *
+ * Converted to DMA API, added zero-copy buffer handling, and
+ * (from the mac68k project) introduced dhd's support for 16-bit cards.
+ *
   * (C) 1996,1998 by Thomas Bogendoerfer (tsbogend@alpha.franken.de)
   * 
   * This driver is based on work from Andreas Busse, but most of
@@ -9,12 +14,23 @@
   * (C) 1995 by Andreas Busse (andy@waldorf-gmbh.de)
   *
   *    Core code included by system sonic drivers
+ *
+ * And... partially rewritten again by David Huggins-Daines in order
+ * to cope with screwed up Macintosh NICs that may or may not use
+ * 16-bit DMA.
+ *
+ * (C) 1999 David Huggins-Daines <dhd@debian.org>
+ *
   */
  
  /*
   * Sources: Olivetti M700-10 Risc Personal Computer hardware handbook,
   * National Semiconductors data sheet for the DP83932B Sonic Ethernet
   * controller, and the files "8390.c" and "skeleton.c" in this directory.
+ *
+ * Additional sources: Nat Semi data sheet for the DP83932C and Nat Semi
+ * Application Note AN-746, the files "lance.c" and "ibmlana.c". See also
+ * the NetBSD file "sys/arch/mac68k/dev/if_sn.c".
   */
  
  
@@ -28,6 +44,9 @@
   */
  static int sonic_open(struct net_device *dev)
  {
+       struct sonic_local *lp = netdev_priv(dev);
+       int i;
+       
         if (sonic_debug > 2)
                 printk("sonic_open: initializing sonic driver.\n");
  
@@ -40,14 +59,59 @@ static int sonic_open(struct net_device *dev)
   * This means that during execution of the handler interrupt are disabled
   * covering another bug otherwise corrupting data.  This doesn't mean
   * this glue works ok under all situations.
+ *
+ * Note (dhd): this also appears to prevent lockups on the Macintrash
+ * when more than one Ethernet card is installed (knock on wood)
+ *
+ * Note (fthain): whether the above is still true is anyones guess. Certainly
+ * the buffer handling algorithms will not tolerate re-entrance without some
+ * mutual exclusion added. Anyway, the memcpy has now been eliminated from the
+ * rx code to make this a faster "fast interrupt".
   */
-//    if (sonic_request_irq(dev->irq, &sonic_interrupt, 0, "sonic", dev)) {
-       if (sonic_request_irq(dev->irq, &sonic_interrupt, SA_INTERRUPT,
-                             "sonic", dev)) {
-               printk("\n%s: unable to get IRQ %d .\n", dev->name, dev->irq);
+       if (request_irq(dev->irq, &sonic_interrupt, SONIC_IRQ_FLAG, "sonic", dev)) {
+               printk(KERN_ERR "\n%s: unable to get IRQ %d .\n", dev->name, dev->irq);
                 return -EAGAIN;
         }
  
+       for (i = 0; i < SONIC_NUM_RRS; i++) {
+               struct sk_buff *skb = dev_alloc_skb(SONIC_RBSIZE + 2);
+               if (skb == NULL) {
+                       while(i > 0) { /* free any that were allocated successfully */
+                               i--;
+                               dev_kfree_skb(lp->rx_skb[i]);
+                               lp->rx_skb[i] = NULL;
+                       }
+                       printk(KERN_ERR "%s: couldn't allocate receive buffers\n",
+                              dev->name);
+                       return -ENOMEM;
+               }
+               skb->dev = dev;
+               /* align IP header unless DMA requires otherwise */
+               if (SONIC_BUS_SCALE(lp->dma_bitmode) == 2)
+                       skb_reserve(skb, 2);
+               lp->rx_skb[i] = skb;
+       }
+
+       for (i = 0; i < SONIC_NUM_RRS; i++) {
+               dma_addr_t laddr = dma_map_single(lp->device, skb_put(lp->rx_skb[i], SONIC_RBSIZE),
+                                                 SONIC_RBSIZE, DMA_FROM_DEVICE);
+               if (!laddr) {
+                       while(i > 0) { /* free any that were mapped successfully */
+                               i--;
+                               dma_unmap_single(lp->device, lp->rx_laddr[i], SONIC_RBSIZE, DMA_FROM_DEVICE);
+                               lp->rx_laddr[i] = (dma_addr_t)0;
+                       }
+                       for (i = 0; i < SONIC_NUM_RRS; i++) {
+                               dev_kfree_skb(lp->rx_skb[i]);
+                               lp->rx_skb[i] = NULL;
+                       }
+                       printk(KERN_ERR "%s: couldn't map rx DMA buffers\n",
+                              dev->name);
+                       return -ENOMEM;
+               }
+               lp->rx_laddr[i] = laddr;
+       }
+
         /*
          * Initialize the SONIC
          */
@@ -67,7 +131,8 @@ static int sonic_open(struct net_device *dev)
   */
  static int sonic_close(struct net_device *dev)
  {
-       unsigned int base_addr = dev->base_addr;
+       struct sonic_local *lp = netdev_priv(dev);
+       int i;
  
         if (sonic_debug > 2)
                 printk("sonic_close\n");
@@ -77,20 +142,56 @@ static int sonic_close(struct net_device *dev)
         /*
          * stop the SONIC, disable interrupts
          */
-       SONIC_WRITE(SONIC_ISR, 0x7fff);
         SONIC_WRITE(SONIC_IMR, 0);
+       SONIC_WRITE(SONIC_ISR, 0x7fff);
         SONIC_WRITE(SONIC_CMD, SONIC_CR_RST);
  
-       sonic_free_irq(dev->irq, dev);  /* release the IRQ */
+       /* unmap and free skbs that haven't been transmitted */
+       for (i = 0; i < SONIC_NUM_TDS; i++) {
+               if(lp->tx_laddr[i]) {
+                       dma_unmap_single(lp->device, lp->tx_laddr[i], lp->tx_len[i], DMA_TO_DEVICE);
+                       lp->tx_laddr[i] = (dma_addr_t)0;
+               }
+               if(lp->tx_skb[i]) {
+                       dev_kfree_skb(lp->tx_skb[i]);
+                       lp->tx_skb[i] = NULL;
+               }
+       }
+
+       /* unmap and free the receive buffers */
+       for (i = 0; i < SONIC_NUM_RRS; i++) {
+               if(lp->rx_laddr[i]) {
+                       dma_unmap_single(lp->device, lp->rx_laddr[i], SONIC_RBSIZE, DMA_FROM_DEVICE);
+                       lp->rx_laddr[i] = (dma_addr_t)0;
+               }
+               if(lp->rx_skb[i]) {
+                       dev_kfree_skb(lp->rx_skb[i]);
+                       lp->rx_skb[i] = NULL;
+               }
+       }
+
+       free_irq(dev->irq, dev);        /* release the IRQ */
  
         return 0;
  }
  
  static void sonic_tx_timeout(struct net_device *dev)
  {
-       struct sonic_local *lp = (struct sonic_local *) dev->priv;
-       printk("%s: transmit timed out.\n", dev->name);
-
+       struct sonic_local *lp = netdev_priv(dev);
+       int i;
+       /* Stop the interrupts for this */
+       SONIC_WRITE(SONIC_IMR, 0);
+       /* We could resend the original skbs. Easier to re-initialise. */
+       for (i = 0; i < SONIC_NUM_TDS; i++) {
+               if(lp->tx_laddr[i]) {
+                       dma_unmap_single(lp->device, lp->tx_laddr[i], lp->tx_len[i], DMA_TO_DEVICE);
+                       lp->tx_laddr[i] = (dma_addr_t)0;
+               }
+               if(lp->tx_skb[i]) {
+                       dev_kfree_skb(lp->tx_skb[i]);
+                       lp->tx_skb[i] = NULL;
+               }
+       }
         /* Try to restart the adaptor. */
         sonic_init(dev);
         lp->stats.tx_errors++;
@@ -100,60 +201,92 @@ static void sonic_tx_timeout(struct net_device *dev)
  
  /*
   * transmit packet
+ *
+ * Appends new TD during transmission thus avoiding any TX interrupts
+ * until we run out of TDs.
+ * This routine interacts closely with the ISR in that it may,
+ *   set tx_skb[i]
+ *   reset the status flags of the new TD
+ *   set and reset EOL flags
+ *   stop the tx queue
+ * The ISR interacts with this routine in various ways. It may,
+ *   reset tx_skb[i]
+ *   test the EOL and status flags of the TDs
+ *   wake the tx queue
+ * Concurrently with all of this, the SONIC is potentially writing to
+ * the status flags of the TDs.
+ * Until some mutual exclusion is added, this code will not work with SMP. However,
+ * MIPS Jazz machines and m68k Macs were all uni-processor machines.
   */
+
  static int sonic_send_packet(struct sk_buff *skb, struct net_device *dev)
  {
-       struct sonic_local *lp = (struct sonic_local *) dev->priv;
-       unsigned int base_addr = dev->base_addr;
-       unsigned int laddr;
-       int entry, length;
-
-       netif_stop_queue(dev);
+       struct sonic_local *lp = netdev_priv(dev);
+       dma_addr_t laddr;
+       int length;
+       int entry = lp->next_tx;
  
         if (sonic_debug > 2)
                 printk("sonic_send_packet: skb=%p, dev=%p\n", skb, dev);
  
+       length = skb->len;
+       if (length < ETH_ZLEN) {
+               skb = skb_padto(skb, ETH_ZLEN);
+               if (skb == NULL)
+                       return 0;
+               length = ETH_ZLEN;
+       }
+
         /*
          * Map the packet data into the logical DMA address space
          */
-       if ((laddr = vdma_alloc(CPHYSADDR(skb->data), skb->len)) == ~0UL) {
-               printk("%s: no VDMA entry for transmit available.\n",
-                      dev->name);
+
+       laddr = dma_map_single(lp->device, skb->data, length, DMA_TO_DEVICE);
+       if (!laddr) {
+               printk(KERN_ERR "%s: failed to map tx DMA buffer.\n", dev->name);
                 dev_kfree_skb(skb);
-               netif_start_queue(dev);
                 return 1;
         }
-       entry = lp->cur_tx & SONIC_TDS_MASK;
+   
+       sonic_tda_put(dev, entry, SONIC_TD_STATUS, 0);       /* clear status */
+       sonic_tda_put(dev, entry, SONIC_TD_FRAG_COUNT, 1);   /* single fragment */
+       sonic_tda_put(dev, entry, SONIC_TD_PKTSIZE, length); /* length of packet */
+       sonic_tda_put(dev, entry, SONIC_TD_FRAG_PTR_L, laddr & 0xffff);
+       sonic_tda_put(dev, entry, SONIC_TD_FRAG_PTR_H, laddr >> 16);
+       sonic_tda_put(dev, entry, SONIC_TD_FRAG_SIZE, length);
+       sonic_tda_put(dev, entry, SONIC_TD_LINK,
+               sonic_tda_get(dev, entry, SONIC_TD_LINK) | SONIC_EOL);
+
+       /*
+        * Must set tx_skb[entry] only after clearing status, and
+        * before clearing EOL and before stopping queue
+        */
+       wmb();
+       lp->tx_len[entry] = length;
         lp->tx_laddr[entry] = laddr;
         lp->tx_skb[entry] = skb;
  
-       length = (skb->len < ETH_ZLEN) ? ETH_ZLEN : skb->len;
-       flush_cache_all();
+       wmb();
+       sonic_tda_put(dev, lp->eol_tx, SONIC_TD_LINK,
+                                 sonic_tda_get(dev, lp->eol_tx, SONIC_TD_LINK) & ~SONIC_EOL);
+       lp->eol_tx = entry;
  
-       /*
-        * Setup the transmit descriptor and issue the transmit command.
-        */
-       lp->tda[entry].tx_status = 0;   /* clear status */
-       lp->tda[entry].tx_frag_count = 1;       /* single fragment */
-       lp->tda[entry].tx_pktsize = length;     /* length of packet */
-       lp->tda[entry].tx_frag_ptr_l = laddr & 0xffff;
-       lp->tda[entry].tx_frag_ptr_h = laddr >> 16;
-       lp->tda[entry].tx_frag_size = length;
-       lp->cur_tx++;
-       lp->stats.tx_bytes += length;
+       lp->next_tx = (entry + 1) & SONIC_TDS_MASK;
+       if (lp->tx_skb[lp->next_tx] != NULL) {
+               /* The ring is full, the ISR has yet to process the next TD. */
+               if (sonic_debug > 3)
+                       printk("%s: stopping queue\n", dev->name);
+               netif_stop_queue(dev);
+               /* after this packet, wait for ISR to free up some TDAs */
+       } else netif_start_queue(dev);
  
         if (sonic_debug > 2)
-               printk("sonic_send_packet: issueing Tx command\n");
+               printk("sonic_send_packet: issuing Tx command\n");
  
         SONIC_WRITE(SONIC_CMD, SONIC_CR_TXP);
  
         dev->trans_start = jiffies;
  
-       if (lp->cur_tx < lp->dirty_tx + SONIC_NUM_TDS)
-               netif_start_queue(dev);
-       else
-               lp->tx_full = 1;
-
         return 0;
  }
  
@@ -164,175 +297,199 @@ static int sonic_send_packet(struct sk_buff *skb, struct net_device *dev)
  static irqreturn_t sonic_interrupt(int irq, void *dev_id, struct pt_regs *regs)
  {
         struct net_device *dev = (struct net_device *) dev_id;
-       unsigned int base_addr = dev->base_addr;
-       struct sonic_local *lp;
+       struct sonic_local *lp = netdev_priv(dev);
         int status;
  
         if (dev == NULL) {
-               printk("sonic_interrupt: irq %d for unknown device.\n", irq);
+               printk(KERN_ERR "sonic_interrupt: irq %d for unknown device.\n", irq);
                 return IRQ_NONE;
         }
  
-       lp = (struct sonic_local *) dev->priv;
-
-       status = SONIC_READ(SONIC_ISR);
-       SONIC_WRITE(SONIC_ISR, 0x7fff); /* clear all bits */
-
-       if (sonic_debug > 2)
-               printk("sonic_interrupt: ISR=%x\n", status);
-
-       if (status & SONIC_INT_PKTRX) {
-               sonic_rx(dev);  /* got packet(s) */
-       }
-
-       if (status & SONIC_INT_TXDN) {
-               int dirty_tx = lp->dirty_tx;
-
-               while (dirty_tx < lp->cur_tx) {
-                       int entry = dirty_tx & SONIC_TDS_MASK;
-                       int status = lp->tda[entry].tx_status;
+       if (!(status = SONIC_READ(SONIC_ISR) & SONIC_IMR_DEFAULT))
+               return IRQ_NONE;
  
-                       if (sonic_debug > 3)
-                               printk
-                                   ("sonic_interrupt: status %d, cur_tx %d, dirty_tx %d\n",
-                                    status, lp->cur_tx, lp->dirty_tx);
+       do {
+               if (status & SONIC_INT_PKTRX) {
+                       if (sonic_debug > 2)
+                               printk("%s: packet rx\n", dev->name);
+                       sonic_rx(dev);  /* got packet(s) */
+                       SONIC_WRITE(SONIC_ISR, SONIC_INT_PKTRX); /* clear the interrupt */
+               }
  
-                       if (status == 0) {
-                               /* It still hasn't been Txed, kick the sonic again */
-                               SONIC_WRITE(SONIC_CMD, SONIC_CR_TXP);
-                               break;
-                       }
+               if (status & SONIC_INT_TXDN) {
+                       int entry = lp->cur_tx;
+                       int td_status;
+                       int freed_some = 0;
  
-                       /* put back EOL and free descriptor */
-                       lp->tda[entry].tx_frag_count = 0;
-                       lp->tda[entry].tx_status = 0;
-
-                       if (status & 0x0001)
-                               lp->stats.tx_packets++;
-                       else {
-                               lp->stats.tx_errors++;
-                               if (status & 0x0642)
-                                       lp->stats.tx_aborted_errors++;
-                               if (status & 0x0180)
-                                       lp->stats.tx_carrier_errors++;
-                               if (status & 0x0020)
-                                       lp->stats.tx_window_errors++;
-                               if (status & 0x0004)
-                                       lp->stats.tx_fifo_errors++;
-                       }
+                       /* At this point, cur_tx is the index of a TD that is one of:
+                        *   unallocated/freed                          (status set   & tx_skb[entry] clear)
+                        *   allocated and sent                         (status set   & tx_skb[entry] set  )
+                        *   allocated and not yet sent                 (status clear & tx_skb[entry] set  )
+                        *   still being allocated by sonic_send_packet (status clear & tx_skb[entry] clear)
+                        */
  
-                       /* We must free the original skb */
-                       if (lp->tx_skb[entry]) {
+                       if (sonic_debug > 2)
+                               printk("%s: tx done\n", dev->name);
+
+                       while (lp->tx_skb[entry] != NULL) {
+                               if ((td_status = sonic_tda_get(dev, entry, SONIC_TD_STATUS)) == 0)
+                                       break;
+
+                               if (td_status & 0x0001) {
+                                       lp->stats.tx_packets++;
+                                       lp->stats.tx_bytes += sonic_tda_get(dev, entry, SONIC_TD_PKTSIZE);
+                               } else {
+                                       lp->stats.tx_errors++;
+                                       if (td_status & 0x0642)
+                                               lp->stats.tx_aborted_errors++;
+                                       if (td_status & 0x0180)
+                                               lp->stats.tx_carrier_errors++;
+                                       if (td_status & 0x0020)
+                                               lp->stats.tx_window_errors++;
+                                       if (td_status & 0x0004)
+                                               lp->stats.tx_fifo_errors++;
+                               }
+
+                               /* We must free the original skb */
                                 dev_kfree_skb_irq(lp->tx_skb[entry]);
-                               lp->tx_skb[entry] = 0;
+                               lp->tx_skb[entry] = NULL;
+                               /* and unmap DMA buffer */
+                               dma_unmap_single(lp->device, lp->tx_laddr[entry], lp->tx_len[entry], DMA_TO_DEVICE);
+                               lp->tx_laddr[entry] = (dma_addr_t)0;
+                               freed_some = 1;
+
+                               if (sonic_tda_get(dev, entry, SONIC_TD_LINK) & SONIC_EOL) {
+                                       entry = (entry + 1) & SONIC_TDS_MASK;
+                                       break;
+                               }
+                               entry = (entry + 1) & SONIC_TDS_MASK;
                         }
-                       /* and the VDMA address */
-                       vdma_free(lp->tx_laddr[entry]);
-                       dirty_tx++;
-               }
  
-               if (lp->tx_full
-                   && dirty_tx + SONIC_NUM_TDS > lp->cur_tx + 2) {
-                       /* The ring is no longer full, clear tbusy. */
-                       lp->tx_full = 0;
-                       netif_wake_queue(dev);
+                       if (freed_some || lp->tx_skb[entry] == NULL)
+                               netif_wake_queue(dev);  /* The ring is no longer full */
+                       lp->cur_tx = entry;
+                       SONIC_WRITE(SONIC_ISR, SONIC_INT_TXDN); /* clear the interrupt */
                 }
  
-               lp->dirty_tx = dirty_tx;
-       }
+               /*
+                * check error conditions
+                */
+               if (status & SONIC_INT_RFO) {
+                       if (sonic_debug > 1)
+                               printk("%s: rx fifo overrun\n", dev->name);
+                       lp->stats.rx_fifo_errors++;
+                       SONIC_WRITE(SONIC_ISR, SONIC_INT_RFO); /* clear the interrupt */
+               }
+               if (status & SONIC_INT_RDE) {
+                       if (sonic_debug > 1)
+                               printk("%s: rx descriptors exhausted\n", dev->name);
+                       lp->stats.rx_dropped++;
+                       SONIC_WRITE(SONIC_ISR, SONIC_INT_RDE); /* clear the interrupt */
+               }
+               if (status & SONIC_INT_RBAE) {
+                       if (sonic_debug > 1)
+                               printk("%s: rx buffer area exceeded\n", dev->name);
+                       lp->stats.rx_dropped++;
+                       SONIC_WRITE(SONIC_ISR, SONIC_INT_RBAE); /* clear the interrupt */
+               }
  
-       /*
-        * check error conditions
-        */
-       if (status & SONIC_INT_RFO) {
-               printk("%s: receive fifo underrun\n", dev->name);
-               lp->stats.rx_fifo_errors++;
-       }
-       if (status & SONIC_INT_RDE) {
-               printk("%s: receive descriptors exhausted\n", dev->name);
-               lp->stats.rx_dropped++;
-       }
-       if (status & SONIC_INT_RBE) {
-               printk("%s: receive buffer exhausted\n", dev->name);
-               lp->stats.rx_dropped++;
-       }
-       if (status & SONIC_INT_RBAE) {
-               printk("%s: receive buffer area exhausted\n", dev->name);
-               lp->stats.rx_dropped++;
-       }
+               /* counter overruns; all counters are 16bit wide */
+               if (status & SONIC_INT_FAE) {
+                       lp->stats.rx_frame_errors += 65536;
+                       SONIC_WRITE(SONIC_ISR, SONIC_INT_FAE); /* clear the interrupt */
+               }
+               if (status & SONIC_INT_CRC) {
+                       lp->stats.rx_crc_errors += 65536;
+                       SONIC_WRITE(SONIC_ISR, SONIC_INT_CRC); /* clear the interrupt */
+               }
+               if (status & SONIC_INT_MP) {
+                       lp->stats.rx_missed_errors += 65536;
+                       SONIC_WRITE(SONIC_ISR, SONIC_INT_MP); /* clear the interrupt */
+               }
  
-       /* counter overruns; all counters are 16bit wide */
-       if (status & SONIC_INT_FAE)
-               lp->stats.rx_frame_errors += 65536;
-       if (status & SONIC_INT_CRC)
-               lp->stats.rx_crc_errors += 65536;
-       if (status & SONIC_INT_MP)
-               lp->stats.rx_missed_errors += 65536;
+               /* transmit error */
+               if (status & SONIC_INT_TXER) {
+                       if ((SONIC_READ(SONIC_TCR) & SONIC_TCR_FU) && (sonic_debug > 2))
+                               printk(KERN_ERR "%s: tx fifo underrun\n", dev->name);
+                       SONIC_WRITE(SONIC_ISR, SONIC_INT_TXER); /* clear the interrupt */
+               }
  
-       /* transmit error */
-       if (status & SONIC_INT_TXER)
-               lp->stats.tx_errors++;
+               /* bus retry */
+               if (status & SONIC_INT_BR) {
+                       printk(KERN_ERR "%s: Bus retry occurred! Device interrupt disabled.\n",
+                               dev->name);
+                       /* ... to help debug DMA problems causing endless interrupts. */
+                       /* Bounce the eth interface to turn on the interrupt again. */
+                       SONIC_WRITE(SONIC_IMR, 0);
+                       SONIC_WRITE(SONIC_ISR, SONIC_INT_BR); /* clear the interrupt */
+               }
  
-       /*
-        * clear interrupt bits and return
-        */
-       SONIC_WRITE(SONIC_ISR, status);
+               /* load CAM done */
+               if (status & SONIC_INT_LCD)
+                       SONIC_WRITE(SONIC_ISR, SONIC_INT_LCD); /* clear the interrupt */
+       } while((status = SONIC_READ(SONIC_ISR) & SONIC_IMR_DEFAULT));
         return IRQ_HANDLED;
  }
  
  /*
- * We have a good packet(s), get it/them out of the buffers.
+ * We have a good packet(s), pass it/them up the network stack.
   */
  static void sonic_rx(struct net_device *dev)
  {
-       unsigned int base_addr = dev->base_addr;
-       struct sonic_local *lp = (struct sonic_local *) dev->priv;
-       sonic_rd_t *rd = &lp->rda[lp->cur_rx & SONIC_RDS_MASK];
+       struct sonic_local *lp = netdev_priv(dev);
         int status;
-
-       while (rd->in_use == 0) {
-               struct sk_buff *skb;
+       int entry = lp->cur_rx;
+
+       while (sonic_rda_get(dev, entry, SONIC_RD_IN_USE) == 0) {
+               struct sk_buff *used_skb;
+               struct sk_buff *new_skb;
+               dma_addr_t new_laddr;
+               u16 bufadr_l;
+               u16 bufadr_h;
                 int pkt_len;
-               unsigned char *pkt_ptr;
  
-               status = rd->rx_status;
-               if (sonic_debug > 3)
-                       printk("status %x, cur_rx %d, cur_rra %x\n",
-                              status, lp->cur_rx, lp->cur_rra);
+               status = sonic_rda_get(dev, entry, SONIC_RD_STATUS);
                 if (status & SONIC_RCR_PRX) {
-                       pkt_len = rd->rx_pktlen;
-                       pkt_ptr =
-                           (char *)
-                           sonic_chiptomem((rd->rx_pktptr_h << 16) +
-                                           rd->rx_pktptr_l);
-
-                       if (sonic_debug > 3)
-                               printk
-                                   ("pktptr %p (rba %p) h:%x l:%x, bsize h:%x l:%x\n",
-                                    pkt_ptr, lp->rba, rd->rx_pktptr_h,
-                                    rd->rx_pktptr_l,
-                                    SONIC_READ(SONIC_RBWC1),
-                                    SONIC_READ(SONIC_RBWC0));
-
                         /* Malloc up new buffer. */
-                       skb = dev_alloc_skb(pkt_len + 2);
-                       if (skb == NULL) {
-                               printk
-                                   ("%s: Memory squeeze, dropping packet.\n",
-                                    dev->name);
+                       new_skb = dev_alloc_skb(SONIC_RBSIZE + 2);
+                       if (new_skb == NULL) {
+                               printk(KERN_ERR "%s: Memory squeeze, dropping packet.\n", dev->name);
+                               lp->stats.rx_dropped++;
+                               break;
+                       }
+                       new_skb->dev = dev;
+                       /* provide 16 byte IP header alignment unless DMA requires otherwise */
+                       if(SONIC_BUS_SCALE(lp->dma_bitmode) == 2)
+                               skb_reserve(new_skb, 2); 
+
+                       new_laddr = dma_map_single(lp->device, skb_put(new_skb, SONIC_RBSIZE),
+                                              SONIC_RBSIZE, DMA_FROM_DEVICE);
+                       if (!new_laddr) {
+                               dev_kfree_skb(new_skb);
+                               printk(KERN_ERR "%s: Failed to map rx buffer, dropping packet.\n", dev->name);
                                 lp->stats.rx_dropped++;
                                 break;
                         }
-                       skb->dev = dev;
-                       skb_reserve(skb, 2);    /* 16 byte align */
-                       skb_put(skb, pkt_len);  /* Make room */
-                       eth_copy_and_sum(skb, pkt_ptr, pkt_len, 0);
-                       skb->protocol = eth_type_trans(skb, dev);
-                       netif_rx(skb);  /* pass the packet to upper layers */
+
+                       /* now we have a new skb to replace it, pass the used one up the stack */
+                       dma_unmap_single(lp->device, lp->rx_laddr[entry], SONIC_RBSIZE, DMA_FROM_DEVICE);
+                       used_skb = lp->rx_skb[entry];
+                       pkt_len = sonic_rda_get(dev, entry, SONIC_RD_PKTLEN);
+                       skb_trim(used_skb, pkt_len);
+                       used_skb->protocol = eth_type_trans(used_skb, dev);
+                       netif_rx(used_skb);
                         dev->last_rx = jiffies;
                         lp->stats.rx_packets++;
                         lp->stats.rx_bytes += pkt_len;
  
+                       /* and insert the new skb */
+                       lp->rx_laddr[entry] = new_laddr;
+                       lp->rx_skb[entry] = new_skb;
+
+                       bufadr_l = (unsigned long)new_laddr & 0xffff;
+                       bufadr_h = (unsigned long)new_laddr >> 16;
+                       sonic_rra_put(dev, entry, SONIC_RR_BUFADR_L, bufadr_l);
+                       sonic_rra_put(dev, entry, SONIC_RR_BUFADR_H, bufadr_h);
                 } else {
                         /* This should only happen, if we enable accepting broken packets. */
                         lp->stats.rx_errors++;
@@ -341,29 +498,35 @@ static void sonic_rx(struct net_device *dev)
                         if (status & SONIC_RCR_CRCR)
                                 lp->stats.rx_crc_errors++;
                 }
-
-               rd->in_use = 1;
-               rd = &lp->rda[(++lp->cur_rx) & SONIC_RDS_MASK];
-               /* now give back the buffer to the receive buffer area */
                 if (status & SONIC_RCR_LPKT) {
                         /*
-                        * this was the last packet out of the current receice buffer
+                        * this was the last packet out of the current receive buffer
                          * give the buffer back to the SONIC
                          */
-                       lp->cur_rra += sizeof(sonic_rr_t);
-                       if (lp->cur_rra >
-                           (lp->rra_laddr +
-                            (SONIC_NUM_RRS -
-                             1) * sizeof(sonic_rr_t))) lp->cur_rra =
-                                   lp->rra_laddr;
-                       SONIC_WRITE(SONIC_RWP, lp->cur_rra & 0xffff);
+                       lp->cur_rwp += SIZEOF_SONIC_RR * SONIC_BUS_SCALE(lp->dma_bitmode);
+                       if (lp->cur_rwp >= lp->rra_end) lp->cur_rwp = lp->rra_laddr & 0xffff;
+                       SONIC_WRITE(SONIC_RWP, lp->cur_rwp);
+                       if (SONIC_READ(SONIC_ISR) & SONIC_INT_RBE) {
+                               if (sonic_debug > 2)
+                                       printk("%s: rx buffer exhausted\n", dev->name);
+                               SONIC_WRITE(SONIC_ISR, SONIC_INT_RBE); /* clear the flag */
+                       }
                 } else
-                       printk
-                           ("%s: rx desc without RCR_LPKT. Shouldn't happen !?\n",
+                       printk(KERN_ERR "%s: rx desc without RCR_LPKT. Shouldn't happen !?\n",
                              dev->name);
+               /*
+                * give back the descriptor
+                */
+               sonic_rda_put(dev, entry, SONIC_RD_LINK,
+                       sonic_rda_get(dev, entry, SONIC_RD_LINK) | SONIC_EOL);
+               sonic_rda_put(dev, entry, SONIC_RD_IN_USE, 1);
+               sonic_rda_put(dev, lp->eol_rx, SONIC_RD_LINK,
+                       sonic_rda_get(dev, lp->eol_rx, SONIC_RD_LINK) & ~SONIC_EOL);
+               lp->eol_rx = entry;
+               lp->cur_rx = entry = (entry + 1) & SONIC_RDS_MASK;
         }
         /*
-        * If any worth-while packets have been received, dev_rint()
+        * If any worth-while packets have been received, netif_rx()
          * has done a mark_bh(NET_BH) for us and will work on them
          * when we get to the bottom-half routine.
          */
@@ -376,8 +539,7 @@ static void sonic_rx(struct net_device *dev)
   */
  static struct net_device_stats *sonic_get_stats(struct net_device *dev)
  {
-       struct sonic_local *lp = (struct sonic_local *) dev->priv;
-       unsigned int base_addr = dev->base_addr;
+       struct sonic_local *lp = netdev_priv(dev);
  
         /* read the tally counter from the SONIC and reset them */
         lp->stats.rx_crc_errors += SONIC_READ(SONIC_CRCT);
@@ -396,8 +558,7 @@ static struct net_device_stats *sonic_get_stats(struct net_device *dev)
   */
  static void sonic_multicast_list(struct net_device *dev)
  {
-       struct sonic_local *lp = (struct sonic_local *) dev->priv;
-       unsigned int base_addr = dev->base_addr;
+       struct sonic_local *lp = netdev_priv(dev);
         unsigned int rcr;
         struct dev_mc_list *dmi = dev->mc_list;
         unsigned char *addr;
@@ -413,20 +574,15 @@ static void sonic_multicast_list(struct net_device *dev)
                         rcr |= SONIC_RCR_AMC;
                 } else {
                         if (sonic_debug > 2)
-                               printk
-                                   ("sonic_multicast_list: mc_count %d\n",
-                                    dev->mc_count);
-                       lp->cda.cam_enable = 1; /* always enable our own address */
+                               printk("sonic_multicast_list: mc_count %d\n", dev->mc_count);
+                       sonic_set_cam_enable(dev, 1);  /* always enable our own address */
                         for (i = 1; i <= dev->mc_count; i++) {
                                 addr = dmi->dmi_addr;
                                 dmi = dmi->next;
-                               lp->cda.cam_desc[i].cam_cap0 =
-                                   addr[1] << 8 | addr[0];
-                               lp->cda.cam_desc[i].cam_cap1 =
-                                   addr[3] << 8 | addr[2];
-                               lp->cda.cam_desc[i].cam_cap2 =
-                                   addr[5] << 8 | addr[4];
-                               lp->cda.cam_enable |= (1 << i);
+                               sonic_cda_put(dev, i, SONIC_CD_CAP0, addr[1] << 8 | addr[0]);
+                               sonic_cda_put(dev, i, SONIC_CD_CAP1, addr[3] << 8 | addr[2]);
+                               sonic_cda_put(dev, i, SONIC_CD_CAP2, addr[5] << 8 | addr[4]);
+                               sonic_set_cam_enable(dev, sonic_get_cam_enable(dev) | (1 << i));
                         }
                         SONIC_WRITE(SONIC_CDC, 16);
                         /* issue Load CAM command */
@@ -447,19 +603,16 @@ static void sonic_multicast_list(struct net_device *dev)
   */
  static int sonic_init(struct net_device *dev)
  {
-       unsigned int base_addr = dev->base_addr;
         unsigned int cmd;
-       struct sonic_local *lp = (struct sonic_local *) dev->priv;
-       unsigned int rra_start;
-       unsigned int rra_end;
+       struct sonic_local *lp = netdev_priv(dev);
         int i;
  
         /*
          * put the Sonic into software-reset mode and
          * disable all interrupts
          */
-       SONIC_WRITE(SONIC_ISR, 0x7fff);
         SONIC_WRITE(SONIC_IMR, 0);
+       SONIC_WRITE(SONIC_ISR, 0x7fff);
         SONIC_WRITE(SONIC_CMD, SONIC_CR_RST);
  
         /*
@@ -475,34 +628,32 @@ static int sonic_init(struct net_device *dev)
         if (sonic_debug > 2)
                 printk("sonic_init: initialize receive resource area\n");
  
-       rra_start = lp->rra_laddr & 0xffff;
-       rra_end =
-           (rra_start + (SONIC_NUM_RRS * sizeof(sonic_rr_t))) & 0xffff;
-
         for (i = 0; i < SONIC_NUM_RRS; i++) {
-               lp->rra[i].rx_bufadr_l =
-                   (lp->rba_laddr + i * SONIC_RBSIZE) & 0xffff;
-               lp->rra[i].rx_bufadr_h =
-                   (lp->rba_laddr + i * SONIC_RBSIZE) >> 16;
-               lp->rra[i].rx_bufsize_l = SONIC_RBSIZE >> 1;
-               lp->rra[i].rx_bufsize_h = 0;
+               u16 bufadr_l = (unsigned long)lp->rx_laddr[i] & 0xffff;
+               u16 bufadr_h = (unsigned long)lp->rx_laddr[i] >> 16;
+               sonic_rra_put(dev, i, SONIC_RR_BUFADR_L, bufadr_l);
+               sonic_rra_put(dev, i, SONIC_RR_BUFADR_H, bufadr_h);
+               sonic_rra_put(dev, i, SONIC_RR_BUFSIZE_L, SONIC_RBSIZE >> 1);
+               sonic_rra_put(dev, i, SONIC_RR_BUFSIZE_H, 0);
         }
  
         /* initialize all RRA registers */
-       SONIC_WRITE(SONIC_RSA, rra_start);
-       SONIC_WRITE(SONIC_REA, rra_end);
-       SONIC_WRITE(SONIC_RRP, rra_start);
-       SONIC_WRITE(SONIC_RWP, rra_end);
+       lp->rra_end = (lp->rra_laddr + SONIC_NUM_RRS * SIZEOF_SONIC_RR *
+                                       SONIC_BUS_SCALE(lp->dma_bitmode)) & 0xffff;
+       lp->cur_rwp = (lp->rra_laddr + (SONIC_NUM_RRS - 1) * SIZEOF_SONIC_RR *
+                                       SONIC_BUS_SCALE(lp->dma_bitmode)) & 0xffff;
+  
+       SONIC_WRITE(SONIC_RSA, lp->rra_laddr & 0xffff);
+       SONIC_WRITE(SONIC_REA, lp->rra_end);
+       SONIC_WRITE(SONIC_RRP, lp->rra_laddr & 0xffff);
+       SONIC_WRITE(SONIC_RWP, lp->cur_rwp);
         SONIC_WRITE(SONIC_URRA, lp->rra_laddr >> 16);
-       SONIC_WRITE(SONIC_EOBC, (SONIC_RBSIZE - 2) >> 1);
-
-       lp->cur_rra =
-           lp->rra_laddr + (SONIC_NUM_RRS - 1) * sizeof(sonic_rr_t);
+       SONIC_WRITE(SONIC_EOBC, (SONIC_RBSIZE >> 1) - (lp->dma_bitmode ? 2 : 1));
  
         /* load the resource pointers */
         if (sonic_debug > 3)
-               printk("sonic_init: issueing RRRA command\n");
-
+               printk("sonic_init: issuing RRRA command\n");
+  
         SONIC_WRITE(SONIC_CMD, SONIC_CR_RRRA);
         i = 0;
         while (i++ < 100) {
@@ -511,27 +662,30 @@ static int sonic_init(struct net_device *dev)
         }
  
         if (sonic_debug > 2)
-               printk("sonic_init: status=%x\n", SONIC_READ(SONIC_CMD));
-
+               printk("sonic_init: status=%x i=%d\n", SONIC_READ(SONIC_CMD), i);
+    
         /*
          * Initialize the receive descriptors so that they
          * become a circular linked list, ie. let the last
          * descriptor point to the first again.
          */
         if (sonic_debug > 2)
-               printk("sonic_init: initialize receive descriptors\n");
-       for (i = 0; i < SONIC_NUM_RDS; i++) {
-               lp->rda[i].rx_status = 0;
-               lp->rda[i].rx_pktlen = 0;
-               lp->rda[i].rx_pktptr_l = 0;
-               lp->rda[i].rx_pktptr_h = 0;
-               lp->rda[i].rx_seqno = 0;
-               lp->rda[i].in_use = 1;
-               lp->rda[i].link =
-                   lp->rda_laddr + (i + 1) * sizeof(sonic_rd_t);
+               printk("sonic_init: initialize receive descriptors\n");      
+       for (i=0; i<SONIC_NUM_RDS; i++) {
+               sonic_rda_put(dev, i, SONIC_RD_STATUS, 0);
+               sonic_rda_put(dev, i, SONIC_RD_PKTLEN, 0);
+               sonic_rda_put(dev, i, SONIC_RD_PKTPTR_L, 0);
+               sonic_rda_put(dev, i, SONIC_RD_PKTPTR_H, 0);
+               sonic_rda_put(dev, i, SONIC_RD_SEQNO, 0);
+               sonic_rda_put(dev, i, SONIC_RD_IN_USE, 1);
+               sonic_rda_put(dev, i, SONIC_RD_LINK,
+                       lp->rda_laddr +
+                       ((i+1) * SIZEOF_SONIC_RD * SONIC_BUS_SCALE(lp->dma_bitmode)));
         }
         /* fix last descriptor */
-       lp->rda[SONIC_NUM_RDS - 1].link = lp->rda_laddr;
+       sonic_rda_put(dev, SONIC_NUM_RDS - 1, SONIC_RD_LINK,
+               (lp->rda_laddr & 0xffff) | SONIC_EOL);
+       lp->eol_rx = SONIC_NUM_RDS - 1;
         lp->cur_rx = 0;
         SONIC_WRITE(SONIC_URDA, lp->rda_laddr >> 16);
         SONIC_WRITE(SONIC_CRDA, lp->rda_laddr & 0xffff);
@@ -542,34 +696,34 @@ static int sonic_init(struct net_device *dev)
         if (sonic_debug > 2)
                 printk("sonic_init: initialize transmit descriptors\n");
         for (i = 0; i < SONIC_NUM_TDS; i++) {
-               lp->tda[i].tx_status = 0;
-               lp->tda[i].tx_config = 0;
-               lp->tda[i].tx_pktsize = 0;
-               lp->tda[i].tx_frag_count = 0;
-               lp->tda[i].link =
-                   (lp->tda_laddr +
-                    (i + 1) * sizeof(sonic_td_t)) | SONIC_END_OF_LINKS;
+               sonic_tda_put(dev, i, SONIC_TD_STATUS, 0);
+               sonic_tda_put(dev, i, SONIC_TD_CONFIG, 0);
+               sonic_tda_put(dev, i, SONIC_TD_PKTSIZE, 0);
+               sonic_tda_put(dev, i, SONIC_TD_FRAG_COUNT, 0);
+               sonic_tda_put(dev, i, SONIC_TD_LINK,
+                       (lp->tda_laddr & 0xffff) +
+                       (i + 1) * SIZEOF_SONIC_TD * SONIC_BUS_SCALE(lp->dma_bitmode));
+               lp->tx_skb[i] = NULL;
         }
-       lp->tda[SONIC_NUM_TDS - 1].link =
-           (lp->tda_laddr & 0xffff) | SONIC_END_OF_LINKS;
+       /* fix last descriptor */
+       sonic_tda_put(dev, SONIC_NUM_TDS - 1, SONIC_TD_LINK,
+               (lp->tda_laddr & 0xffff));
  
         SONIC_WRITE(SONIC_UTDA, lp->tda_laddr >> 16);
         SONIC_WRITE(SONIC_CTDA, lp->tda_laddr & 0xffff);
-       lp->cur_tx = lp->dirty_tx = 0;
-
+       lp->cur_tx = lp->next_tx = 0;
+       lp->eol_tx = SONIC_NUM_TDS - 1;
+    
         /*
          * put our own address to CAM desc[0]
          */
-       lp->cda.cam_desc[0].cam_cap0 =
-           dev->dev_addr[1] << 8 | dev->dev_addr[0];
-       lp->cda.cam_desc[0].cam_cap1 =
-           dev->dev_addr[3] << 8 | dev->dev_addr[2];
-       lp->cda.cam_desc[0].cam_cap2 =
-           dev->dev_addr[5] << 8 | dev->dev_addr[4];
-       lp->cda.cam_enable = 1;
+       sonic_cda_put(dev, 0, SONIC_CD_CAP0, dev->dev_addr[1] << 8 | dev->dev_addr[0]);
+       sonic_cda_put(dev, 0, SONIC_CD_CAP1, dev->dev_addr[3] << 8 | dev->dev_addr[2]);
+       sonic_cda_put(dev, 0, SONIC_CD_CAP2, dev->dev_addr[5] << 8 | dev->dev_addr[4]);
+       sonic_set_cam_enable(dev, 1);
  
         for (i = 0; i < 16; i++)
-               lp->cda.cam_desc[i].cam_entry_pointer = i;
+               sonic_cda_put(dev, i, SONIC_CD_ENTRY_POINTER, i);
  
         /*
          * initialize CAM registers
@@ -588,8 +742,8 @@ static int sonic_init(struct net_device *dev)
                         break;
         }
         if (sonic_debug > 2) {
-               printk("sonic_init: CMD=%x, ISR=%x\n",
-                      SONIC_READ(SONIC_CMD), SONIC_READ(SONIC_ISR));
+               printk("sonic_init: CMD=%x, ISR=%x\n, i=%d",
+                      SONIC_READ(SONIC_CMD), SONIC_READ(SONIC_ISR), i);
         }
  
         /*
@@ -604,7 +758,7 @@ static int sonic_init(struct net_device *dev)
  
         cmd = SONIC_READ(SONIC_CMD);
         if ((cmd & SONIC_CR_RXEN) == 0 || (cmd & SONIC_CR_STP) == 0)
-               printk("sonic_init: failed, status=%x\n", cmd);
+               printk(KERN_ERR "sonic_init: failed, status=%x\n", cmd);
  
         if (sonic_debug > 2)
                 printk("sonic_init: new status=%x\n",
diff --git a/drivers/net/sonic.h b/drivers/net/sonic.h

index c4a6d58e4afbfd592caa466aaddbf699ea5b3a6f..cede969a8baae88fcfc0a8083a049999d84cb11d 100644 (file)
--- a/drivers/net/sonic.h
+++ b/drivers/net/sonic.h
@@ -1,5 +1,5 @@
  /*
- * Helpfile for sonic.c
+ * Header file for sonic.c
   *
   * (C) Waldorf Electronics, Germany
   * Written by Andreas Busse
@@ -9,10 +9,16 @@
   * and pad structure members must be exchanged. Also, the structures
   * need to be changed accordingly to the bus size. 
   *
- * 981229 MSch:        did just that for the 68k Mac port (32 bit, big endian),
- *             see CONFIG_MACSONIC branch below.
+ * 981229 MSch:        did just that for the 68k Mac port (32 bit, big endian)
   *
+ * 990611 David Huggins-Daines <dhd@debian.org>: This machine abstraction
+ * does not cope with 16-bit bus sizes very well.  Therefore I have
+ * rewritten it with ugly macros and evil inlines.
+ *
+ * 050625 Finn Thain: introduced more 32-bit cards and dhd's support
+ *        for 16-bit cards (from the mac68k project).
   */
+
  #ifndef SONIC_H
  #define SONIC_H
  
@@ -83,6 +89,7 @@
  /*
   * Error counters
   */
+
  #define SONIC_CRCT              0x2c
  #define SONIC_FAET              0x2d
  #define SONIC_MPT               0x2e
@@ -182,14 +189,14 @@
  
  #define SONIC_INT_BR           0x4000
  #define SONIC_INT_HBL          0x2000
-#define SONIC_INT_LCD           0x1000
-#define SONIC_INT_PINT          0x0800
-#define SONIC_INT_PKTRX         0x0400
-#define SONIC_INT_TXDN          0x0200
-#define SONIC_INT_TXER          0x0100
-#define SONIC_INT_TC            0x0080
-#define SONIC_INT_RDE           0x0040
-#define SONIC_INT_RBE           0x0020
+#define SONIC_INT_LCD          0x1000
+#define SONIC_INT_PINT         0x0800
+#define SONIC_INT_PKTRX                0x0400
+#define SONIC_INT_TXDN         0x0200
+#define SONIC_INT_TXER         0x0100
+#define SONIC_INT_TC           0x0080
+#define SONIC_INT_RDE          0x0040
+#define SONIC_INT_RBE          0x0020
  #define SONIC_INT_RBAE         0x0010
  #define SONIC_INT_CRC          0x0008
  #define SONIC_INT_FAE          0x0004
@@ -201,224 +208,61 @@
   * The interrupts we allow.
   */
  
-#define SONIC_IMR_DEFAULT      (SONIC_INT_BR | \
-                               SONIC_INT_LCD | \
-                                SONIC_INT_PINT | \
+#define SONIC_IMR_DEFAULT     ( SONIC_INT_BR | \
+                                SONIC_INT_LCD | \
+                                SONIC_INT_RFO | \
                                  SONIC_INT_PKTRX | \
                                  SONIC_INT_TXDN | \
                                  SONIC_INT_TXER | \
                                  SONIC_INT_RDE | \
-                                SONIC_INT_RBE | \
                                  SONIC_INT_RBAE | \
                                  SONIC_INT_CRC | \
                                  SONIC_INT_FAE | \
                                  SONIC_INT_MP)
  
  
-#define        SONIC_END_OF_LINKS      0x0001
-
-
-#ifdef CONFIG_MACSONIC
-/*
- * Big endian like structures on 680x0 Macs
- */
-
-typedef struct {
-       u32 rx_bufadr_l;        /* receive buffer ptr */
-       u32 rx_bufadr_h;
-
-       u32 rx_bufsize_l;       /* no. of words in the receive buffer */
-       u32 rx_bufsize_h;
-} sonic_rr_t;
-
-/*
- * Sonic receive descriptor. Receive descriptors are
- * kept in a linked list of these structures.
- */
-
-typedef struct {
-       SREGS_PAD(pad0);
-       u16 rx_status;          /* status after reception of a packet */
-        SREGS_PAD(pad1);
-       u16 rx_pktlen;          /* length of the packet incl. CRC */
-
-       /*
-        * Pointers to the location in the receive buffer area (RBA)
-        * where the packet resides. A packet is always received into
-        * a contiguous piece of memory.
-        */
-        SREGS_PAD(pad2);
-       u16 rx_pktptr_l;
-        SREGS_PAD(pad3);
-       u16 rx_pktptr_h;
-
-        SREGS_PAD(pad4);
-       u16 rx_seqno;           /* sequence no. */
-
-        SREGS_PAD(pad5);
-       u16 link;               /* link to next RDD (end if EOL bit set) */
-
-       /*
-        * Owner of this descriptor, 0= driver, 1=sonic
-        */
-
-        SREGS_PAD(pad6);
-       u16 in_use;
-
-       caddr_t rda_next;       /* pointer to next RD */
-} sonic_rd_t;
-
-
-/*
- * Describes a Transmit Descriptor
- */
-typedef struct {
-       SREGS_PAD(pad0);
-       u16 tx_status;          /* status after transmission of a packet */
-        SREGS_PAD(pad1);
-       u16 tx_config;          /* transmit configuration for this packet */
-        SREGS_PAD(pad2);
-       u16 tx_pktsize;         /* size of the packet to be transmitted */
-        SREGS_PAD(pad3);
-       u16 tx_frag_count;      /* no. of fragments */
-
-        SREGS_PAD(pad4);
-       u16 tx_frag_ptr_l;
-        SREGS_PAD(pad5);
-       u16 tx_frag_ptr_h;
-        SREGS_PAD(pad6);
-       u16 tx_frag_size;
-
-        SREGS_PAD(pad7);
-       u16 link;               /* ptr to next descriptor */
-} sonic_td_t;
-
-
-/*
- * Describes an entry in the CAM Descriptor Area.
- */
-
-typedef struct {
-       SREGS_PAD(pad0);
-       u16 cam_entry_pointer;
-        SREGS_PAD(pad1);
-       u16 cam_cap0;
-        SREGS_PAD(pad2);
-       u16 cam_cap1;
-        SREGS_PAD(pad3);
-       u16 cam_cap2;
-} sonic_cd_t;
-
+#define SONIC_EOL       0x0001
  #define CAM_DESCRIPTORS 16
  
-
-typedef struct {
-       sonic_cd_t cam_desc[CAM_DESCRIPTORS];
-        SREGS_PAD(pad);
-       u16 cam_enable;
-} sonic_cda_t;
-
-#else                          /* original declarations, little endian 32 bit */
-
-/*
- * structure definitions
- */
-
-typedef struct {
-       u32 rx_bufadr_l;        /* receive buffer ptr */
-       u32 rx_bufadr_h;
-
-       u32 rx_bufsize_l;       /* no. of words in the receive buffer */
-       u32 rx_bufsize_h;
-} sonic_rr_t;
-
-/*
- * Sonic receive descriptor. Receive descriptors are
- * kept in a linked list of these structures.
- */
-
-typedef struct {
-       u16 rx_status;          /* status after reception of a packet */
-        SREGS_PAD(pad0);
-       u16 rx_pktlen;          /* length of the packet incl. CRC */
-        SREGS_PAD(pad1);
-
-       /*
-        * Pointers to the location in the receive buffer area (RBA)
-        * where the packet resides. A packet is always received into
-        * a contiguous piece of memory.
-        */
-       u16 rx_pktptr_l;
-        SREGS_PAD(pad2);
-       u16 rx_pktptr_h;
-        SREGS_PAD(pad3);
-
-       u16 rx_seqno;           /* sequence no. */
-        SREGS_PAD(pad4);
-
-       u16 link;               /* link to next RDD (end if EOL bit set) */
-        SREGS_PAD(pad5);
-
-       /*
-        * Owner of this descriptor, 0= driver, 1=sonic
-        */
-
-       u16 in_use;
-        SREGS_PAD(pad6);
-
-       caddr_t rda_next;       /* pointer to next RD */
-} sonic_rd_t;
-
-
-/*
- * Describes a Transmit Descriptor
- */
-typedef struct {
-       u16 tx_status;          /* status after transmission of a packet */
-        SREGS_PAD(pad0);
-       u16 tx_config;          /* transmit configuration for this packet */
-        SREGS_PAD(pad1);
-       u16 tx_pktsize;         /* size of the packet to be transmitted */
-        SREGS_PAD(pad2);
-       u16 tx_frag_count;      /* no. of fragments */
-        SREGS_PAD(pad3);
-
-       u16 tx_frag_ptr_l;
-        SREGS_PAD(pad4);
-       u16 tx_frag_ptr_h;
-        SREGS_PAD(pad5);
-       u16 tx_frag_size;
-        SREGS_PAD(pad6);
-
-       u16 link;               /* ptr to next descriptor */
-        SREGS_PAD(pad7);
-} sonic_td_t;
-
-
-/*
- * Describes an entry in the CAM Descriptor Area.
- */
-
-typedef struct {
-       u16 cam_entry_pointer;
-        SREGS_PAD(pad0);
-       u16 cam_cap0;
-        SREGS_PAD(pad1);
-       u16 cam_cap1;
-        SREGS_PAD(pad2);
-       u16 cam_cap2;
-        SREGS_PAD(pad3);
-} sonic_cd_t;
-
-#define CAM_DESCRIPTORS 16
-
-
-typedef struct {
-       sonic_cd_t cam_desc[CAM_DESCRIPTORS];
-       u16 cam_enable;
-        SREGS_PAD(pad);
-} sonic_cda_t;
-#endif                         /* endianness */
+/* Offsets in the various DMA buffers accessed by the SONIC */
+
+#define SONIC_BITMODE16 0
+#define SONIC_BITMODE32 1
+#define SONIC_BUS_SCALE(bitmode) ((bitmode) ? 4 : 2)
+/* Note!  These are all measured in bus-size units, so use SONIC_BUS_SCALE */
+#define SIZEOF_SONIC_RR 4
+#define SONIC_RR_BUFADR_L  0
+#define SONIC_RR_BUFADR_H  1
+#define SONIC_RR_BUFSIZE_L 2
+#define SONIC_RR_BUFSIZE_H 3
+
+#define SIZEOF_SONIC_RD 7
+#define SONIC_RD_STATUS   0
+#define SONIC_RD_PKTLEN   1
+#define SONIC_RD_PKTPTR_L 2
+#define SONIC_RD_PKTPTR_H 3
+#define SONIC_RD_SEQNO    4
+#define SONIC_RD_LINK     5
+#define SONIC_RD_IN_USE   6
+
+#define SIZEOF_SONIC_TD 8
+#define SONIC_TD_STATUS       0
+#define SONIC_TD_CONFIG       1
+#define SONIC_TD_PKTSIZE      2
+#define SONIC_TD_FRAG_COUNT   3
+#define SONIC_TD_FRAG_PTR_L   4
+#define SONIC_TD_FRAG_PTR_H   5
+#define SONIC_TD_FRAG_SIZE    6
+#define SONIC_TD_LINK         7
+
+#define SIZEOF_SONIC_CD 4
+#define SONIC_CD_ENTRY_POINTER 0
+#define SONIC_CD_CAP0          1
+#define SONIC_CD_CAP1          2
+#define SONIC_CD_CAP2          3
+
+#define SIZEOF_SONIC_CDA ((CAM_DESCRIPTORS * SIZEOF_SONIC_CD) + 1)
+#define SONIC_CDA_CAM_ENABLE   (CAM_DESCRIPTORS * SIZEOF_SONIC_CD)
  
  /*
   * Some tunables for the buffer areas. Power of 2 is required
@@ -426,44 +270,60 @@ typedef struct {
   *
   * MSch: use more buffer space for the slow m68k Macs!
   */
-#ifdef CONFIG_MACSONIC
-#define SONIC_NUM_RRS    32    /* number of receive resources */
-#define SONIC_NUM_RDS    SONIC_NUM_RRS /* number of receive descriptors */
-#define SONIC_NUM_TDS    32    /* number of transmit descriptors */
-#else
-#define SONIC_NUM_RRS    16    /* number of receive resources */
-#define SONIC_NUM_RDS    SONIC_NUM_RRS /* number of receive descriptors */
-#define SONIC_NUM_TDS    16    /* number of transmit descriptors */
-#endif
-#define SONIC_RBSIZE   1520    /* size of one resource buffer */
+#define SONIC_NUM_RRS   16            /* number of receive resources */
+#define SONIC_NUM_RDS   SONIC_NUM_RRS /* number of receive descriptors */
+#define SONIC_NUM_TDS   16            /* number of transmit descriptors */
  
-#define SONIC_RDS_MASK   (SONIC_NUM_RDS-1)
-#define SONIC_TDS_MASK   (SONIC_NUM_TDS-1)
+#define SONIC_RDS_MASK  (SONIC_NUM_RDS-1)
+#define SONIC_TDS_MASK  (SONIC_NUM_TDS-1)
  
+#define SONIC_RBSIZE   1520          /* size of one resource buffer */
+
+/* Again, measured in bus size units! */
+#define SIZEOF_SONIC_DESC (SIZEOF_SONIC_CDA    \
+       + (SIZEOF_SONIC_TD * SONIC_NUM_TDS)     \
+       + (SIZEOF_SONIC_RD * SONIC_NUM_RDS)     \
+       + (SIZEOF_SONIC_RR * SONIC_NUM_RRS))
  
  /* Information that need to be kept for each board. */
  struct sonic_local {
-       sonic_cda_t cda;        /* virtual CPU address of CDA */
-       sonic_td_t tda[SONIC_NUM_TDS];  /* transmit descriptor area */
-       sonic_rr_t rra[SONIC_NUM_RRS];  /* receive resource area */
-       sonic_rd_t rda[SONIC_NUM_RDS];  /* receive descriptor area */
-       struct sk_buff *tx_skb[SONIC_NUM_TDS];  /* skbuffs for packets to transmit */
-       unsigned int tx_laddr[SONIC_NUM_TDS];   /* logical DMA address fro skbuffs */
-       unsigned char *rba;     /* start of receive buffer areas */
-       unsigned int cda_laddr; /* logical DMA address of CDA */
-       unsigned int tda_laddr; /* logical DMA address of TDA */
-       unsigned int rra_laddr; /* logical DMA address of RRA */
-       unsigned int rda_laddr; /* logical DMA address of RDA */
-       unsigned int rba_laddr; /* logical DMA address of RBA */
-       unsigned int cur_rra;   /* current indexes to resource areas */
+       /* Bus size.  0 == 16 bits, 1 == 32 bits. */
+       int dma_bitmode;
+       /* Register offset within the longword (independent of endianness,
+          and varies from one type of Macintosh SONIC to another
+          (Aarrgh)) */
+       int reg_offset;
+       void *descriptors;
+       /* Crud.  These areas have to be within the same 64K.  Therefore
+       we allocate a desriptors page, and point these to places within it. */
+       void *cda;  /* CAM descriptor area */
+       void *tda;  /* Transmit descriptor area */
+       void *rra;  /* Receive resource area */
+       void *rda;  /* Receive descriptor area */
+       struct sk_buff* volatile rx_skb[SONIC_NUM_RRS]; /* packets to be received */
+       struct sk_buff* volatile tx_skb[SONIC_NUM_TDS]; /* packets to be transmitted */
+       unsigned int tx_len[SONIC_NUM_TDS]; /* lengths of tx DMA mappings */
+       /* Logical DMA addresses on MIPS, bus addresses on m68k
+        * (so "laddr" is a bit misleading) */
+       dma_addr_t descriptors_laddr;
+       u32 cda_laddr;              /* logical DMA address of CDA */
+       u32 tda_laddr;              /* logical DMA address of TDA */
+       u32 rra_laddr;              /* logical DMA address of RRA */
+       u32 rda_laddr;              /* logical DMA address of RDA */
+       dma_addr_t rx_laddr[SONIC_NUM_RRS]; /* logical DMA addresses of rx skbuffs */
+       dma_addr_t tx_laddr[SONIC_NUM_TDS]; /* logical DMA addresses of tx skbuffs */
+       unsigned int rra_end;
+       unsigned int cur_rwp;
         unsigned int cur_rx;
-       unsigned int cur_tx;
-       unsigned int dirty_tx;  /* last unacked transmit packet */
-       char tx_full;
+       unsigned int cur_tx;           /* first unacked transmit packet */
+       unsigned int eol_rx;
+       unsigned int eol_tx;           /* last unacked transmit packet */
+       unsigned int next_tx;          /* next free TD */
+       struct device *device;         /* generic device */
         struct net_device_stats stats;
  };
  
-#define TX_TIMEOUT 6
+#define TX_TIMEOUT (3 * HZ)
  
  /* Index to functions, as function prototypes. */
  
@@ -477,6 +337,114 @@ static void sonic_multicast_list(struct net_device *dev);
  static int sonic_init(struct net_device *dev);
  static void sonic_tx_timeout(struct net_device *dev);
  
+/* Internal inlines for reading/writing DMA buffers.  Note that bus
+   size and endianness matter here, whereas they don't for registers,
+   as far as we can tell. */
+/* OpenBSD calls this "SWO".  I'd like to think that sonic_buf_put()
+   is a much better name. */
+static inline void sonic_buf_put(void* base, int bitmode,
+                                int offset, __u16 val)
+{
+       if (bitmode)
+#ifdef __BIG_ENDIAN
+               ((__u16 *) base + (offset*2))[1] = val;
+#else
+               ((__u16 *) base + (offset*2))[0] = val;
+#endif
+       else
+               ((__u16 *) base)[offset] = val;
+}
+
+static inline __u16 sonic_buf_get(void* base, int bitmode,
+                                 int offset)
+{
+       if (bitmode)
+#ifdef __BIG_ENDIAN
+               return ((volatile __u16 *) base + (offset*2))[1];
+#else
+               return ((volatile __u16 *) base + (offset*2))[0];
+#endif
+       else
+               return ((volatile __u16 *) base)[offset];
+}
+
+/* Inlines that you should actually use for reading/writing DMA buffers */
+static inline void sonic_cda_put(struct net_device* dev, int entry,
+                                int offset, __u16 val)
+{
+       struct sonic_local* lp = (struct sonic_local *) dev->priv;
+       sonic_buf_put(lp->cda, lp->dma_bitmode,
+                     (entry * SIZEOF_SONIC_CD) + offset, val);
+}
+
+static inline __u16 sonic_cda_get(struct net_device* dev, int entry,
+                                 int offset)
+{
+       struct sonic_local* lp = (struct sonic_local *) dev->priv;
+       return sonic_buf_get(lp->cda, lp->dma_bitmode,
+                            (entry * SIZEOF_SONIC_CD) + offset);
+}
+
+static inline void sonic_set_cam_enable(struct net_device* dev, __u16 val)
+{
+       struct sonic_local* lp = (struct sonic_local *) dev->priv;
+       sonic_buf_put(lp->cda, lp->dma_bitmode, SONIC_CDA_CAM_ENABLE, val);
+}
+
+static inline __u16 sonic_get_cam_enable(struct net_device* dev)
+{
+       struct sonic_local* lp = (struct sonic_local *) dev->priv;
+       return sonic_buf_get(lp->cda, lp->dma_bitmode, SONIC_CDA_CAM_ENABLE);
+}
+
+static inline void sonic_tda_put(struct net_device* dev, int entry,
+                                int offset, __u16 val)
+{
+       struct sonic_local* lp = (struct sonic_local *) dev->priv;
+       sonic_buf_put(lp->tda, lp->dma_bitmode,
+                     (entry * SIZEOF_SONIC_TD) + offset, val);
+}
+
+static inline __u16 sonic_tda_get(struct net_device* dev, int entry,
+                                 int offset)
+{
+       struct sonic_local* lp = (struct sonic_local *) dev->priv;
+       return sonic_buf_get(lp->tda, lp->dma_bitmode,
+                            (entry * SIZEOF_SONIC_TD) + offset);
+}
+
+static inline void sonic_rda_put(struct net_device* dev, int entry,
+                                int offset, __u16 val)
+{
+       struct sonic_local* lp = (struct sonic_local *) dev->priv;
+       sonic_buf_put(lp->rda, lp->dma_bitmode,
+                     (entry * SIZEOF_SONIC_RD) + offset, val);
+}
+
+static inline __u16 sonic_rda_get(struct net_device* dev, int entry,
+                                 int offset)
+{
+       struct sonic_local* lp = (struct sonic_local *) dev->priv;
+       return sonic_buf_get(lp->rda, lp->dma_bitmode,
+                            (entry * SIZEOF_SONIC_RD) + offset);
+}
+
+static inline void sonic_rra_put(struct net_device* dev, int entry,
+                                int offset, __u16 val)
+{
+       struct sonic_local* lp = (struct sonic_local *) dev->priv;
+       sonic_buf_put(lp->rra, lp->dma_bitmode,
+                     (entry * SIZEOF_SONIC_RR) + offset, val);
+}
+
+static inline __u16 sonic_rra_get(struct net_device* dev, int entry,
+                                 int offset)
+{
+       struct sonic_local* lp = (struct sonic_local *) dev->priv;
+       return sonic_buf_get(lp->rra, lp->dma_bitmode,
+                            (entry * SIZEOF_SONIC_RR) + offset);
+}
+
  static const char *version =
      "sonic.c:v0.92 20.9.98 tsbogend@alpha.franken.de\n";
  
diff --git a/drivers/net/tg3.c b/drivers/net/tg3.c

index 201a550f0bcc3193a5b4481a67c580ae73145b17..af8263a1580ea5b6fca000c5ea59b2cd3af7a4af 100644 (file)
--- a/drivers/net/tg3.c
+++ b/drivers/net/tg3.c
@@ -66,8 +66,8 @@
  
  #define DRV_MODULE_NAME                "tg3"
  #define PFX DRV_MODULE_NAME    ": "
-#define DRV_MODULE_VERSION     "3.34"
-#define DRV_MODULE_RELDATE     "July 25, 2005"
+#define DRV_MODULE_VERSION     "3.37"
+#define DRV_MODULE_RELDATE     "August 25, 2005"
  
  #define TG3_DEF_MAC_MODE       0
  #define TG3_DEF_RX_MODE                0
@@ -340,41 +340,92 @@ static struct {
  
  static void tg3_write_indirect_reg32(struct tg3 *tp, u32 off, u32 val)
  {
-       if ((tp->tg3_flags & TG3_FLAG_PCIX_TARGET_HWBUG) != 0) {
-               spin_lock_bh(&tp->indirect_lock);
-               pci_write_config_dword(tp->pdev, TG3PCI_REG_BASE_ADDR, off);
-               pci_write_config_dword(tp->pdev, TG3PCI_REG_DATA, val);
-               spin_unlock_bh(&tp->indirect_lock);
-       } else {
-               writel(val, tp->regs + off);
-               if ((tp->tg3_flags & TG3_FLAG_5701_REG_WRITE_BUG) != 0)
-                       readl(tp->regs + off);
+       unsigned long flags;
+
+       spin_lock_irqsave(&tp->indirect_lock, flags);
+       pci_write_config_dword(tp->pdev, TG3PCI_REG_BASE_ADDR, off);
+       pci_write_config_dword(tp->pdev, TG3PCI_REG_DATA, val);
+       spin_unlock_irqrestore(&tp->indirect_lock, flags);
+}
+
+static void tg3_write_flush_reg32(struct tg3 *tp, u32 off, u32 val)
+{
+       writel(val, tp->regs + off);
+       readl(tp->regs + off);
+}
+
+static u32 tg3_read_indirect_reg32(struct tg3 *tp, u32 off)
+{
+       unsigned long flags;
+       u32 val;
+
+       spin_lock_irqsave(&tp->indirect_lock, flags);
+       pci_write_config_dword(tp->pdev, TG3PCI_REG_BASE_ADDR, off);
+       pci_read_config_dword(tp->pdev, TG3PCI_REG_DATA, &val);
+       spin_unlock_irqrestore(&tp->indirect_lock, flags);
+       return val;
+}
+
+static void tg3_write_indirect_mbox(struct tg3 *tp, u32 off, u32 val)
+{
+       unsigned long flags;
+
+       if (off == (MAILBOX_RCVRET_CON_IDX_0 + TG3_64BIT_REG_LOW)) {
+               pci_write_config_dword(tp->pdev, TG3PCI_RCV_RET_RING_CON_IDX +
+                                      TG3_64BIT_REG_LOW, val);
+               return;
         }
+       if (off == (MAILBOX_RCV_STD_PROD_IDX + TG3_64BIT_REG_LOW)) {
+               pci_write_config_dword(tp->pdev, TG3PCI_STD_RING_PROD_IDX +
+                                      TG3_64BIT_REG_LOW, val);
+               return;
+       }
+
+       spin_lock_irqsave(&tp->indirect_lock, flags);
+       pci_write_config_dword(tp->pdev, TG3PCI_REG_BASE_ADDR, off + 0x5600);
+       pci_write_config_dword(tp->pdev, TG3PCI_REG_DATA, val);
+       spin_unlock_irqrestore(&tp->indirect_lock, flags);
+
+       /* In indirect mode when disabling interrupts, we also need
+        * to clear the interrupt bit in the GRC local ctrl register.
+        */
+       if ((off == (MAILBOX_INTERRUPT_0 + TG3_64BIT_REG_LOW)) &&
+           (val == 0x1)) {
+               pci_write_config_dword(tp->pdev, TG3PCI_MISC_LOCAL_CTRL,
+                                      tp->grc_local_ctrl|GRC_LCLCTRL_CLEARINT);
+       }
+}
+
+static u32 tg3_read_indirect_mbox(struct tg3 *tp, u32 off)
+{
+       unsigned long flags;
+       u32 val;
+
+       spin_lock_irqsave(&tp->indirect_lock, flags);
+       pci_write_config_dword(tp->pdev, TG3PCI_REG_BASE_ADDR, off + 0x5600);
+       pci_read_config_dword(tp->pdev, TG3PCI_REG_DATA, &val);
+       spin_unlock_irqrestore(&tp->indirect_lock, flags);
+       return val;
  }
  
  static void _tw32_flush(struct tg3 *tp, u32 off, u32 val)
  {
-       if ((tp->tg3_flags & TG3_FLAG_PCIX_TARGET_HWBUG) != 0) {
-               spin_lock_bh(&tp->indirect_lock);
-               pci_write_config_dword(tp->pdev, TG3PCI_REG_BASE_ADDR, off);
-               pci_write_config_dword(tp->pdev, TG3PCI_REG_DATA, val);
-               spin_unlock_bh(&tp->indirect_lock);
-       } else {
-               void __iomem *dest = tp->regs + off;
-               writel(val, dest);
-               readl(dest);    /* always flush PCI write */
-       }
+       tp->write32(tp, off, val);
+       if (!(tp->tg3_flags & TG3_FLAG_PCIX_TARGET_HWBUG) &&
+           !(tp->tg3_flags & TG3_FLAG_5701_REG_WRITE_BUG) &&
+           !(tp->tg3_flags2 & TG3_FLG2_ICH_WORKAROUND))
+               tp->read32(tp, off);    /* flush */
  }
  
-static inline void _tw32_rx_mbox(struct tg3 *tp, u32 off, u32 val)
+static inline void tw32_mailbox_flush(struct tg3 *tp, u32 off, u32 val)
  {
-       void __iomem *mbox = tp->regs + off;
-       writel(val, mbox);
-       if (tp->tg3_flags & TG3_FLAG_MBOX_WRITE_REORDER)
-               readl(mbox);
+       tp->write32_mbox(tp, off, val);
+       if (!(tp->tg3_flags & TG3_FLAG_MBOX_WRITE_REORDER) &&
+           !(tp->tg3_flags2 & TG3_FLG2_ICH_WORKAROUND))
+               tp->read32_mbox(tp, off);
  }
  
-static inline void _tw32_tx_mbox(struct tg3 *tp, u32 off, u32 val)
+static void tg3_write32_tx_mbox(struct tg3 *tp, u32 off, u32 val)
  {
         void __iomem *mbox = tp->regs + off;
         writel(val, mbox);
@@ -384,46 +435,57 @@ static inline void _tw32_tx_mbox(struct tg3 *tp, u32 off, u32 val)
                 readl(mbox);
  }
  
-#define tw32_mailbox(reg, val)  writel(((val) & 0xffffffff), tp->regs + (reg))
-#define tw32_rx_mbox(reg, val)  _tw32_rx_mbox(tp, reg, val)
-#define tw32_tx_mbox(reg, val)  _tw32_tx_mbox(tp, reg, val)
+static void tg3_write32(struct tg3 *tp, u32 off, u32 val)
+{
+       writel(val, tp->regs + off);
+}
+
+static u32 tg3_read32(struct tg3 *tp, u32 off)
+{
+       return (readl(tp->regs + off)); 
+}
+
+#define tw32_mailbox(reg, val) tp->write32_mbox(tp, reg, val)
+#define tw32_mailbox_f(reg, val)       tw32_mailbox_flush(tp, (reg), (val))
+#define tw32_rx_mbox(reg, val) tp->write32_rx_mbox(tp, reg, val)
+#define tw32_tx_mbox(reg, val) tp->write32_tx_mbox(tp, reg, val)
+#define tr32_mailbox(reg)      tp->read32_mbox(tp, reg)
  
-#define tw32(reg,val)          tg3_write_indirect_reg32(tp,(reg),(val))
+#define tw32(reg,val)          tp->write32(tp, reg, val)
  #define tw32_f(reg,val)                _tw32_flush(tp,(reg),(val))
-#define tw16(reg,val)          writew(((val) & 0xffff), tp->regs + (reg))
-#define tw8(reg,val)           writeb(((val) & 0xff), tp->regs + (reg))
-#define tr32(reg)              readl(tp->regs + (reg))
-#define tr16(reg)              readw(tp->regs + (reg))
-#define tr8(reg)               readb(tp->regs + (reg))
+#define tr32(reg)              tp->read32(tp, reg)
  
  static void tg3_write_mem(struct tg3 *tp, u32 off, u32 val)
  {
-       spin_lock_bh(&tp->indirect_lock);
+       unsigned long flags;
+
+       spin_lock_irqsave(&tp->indirect_lock, flags);
         pci_write_config_dword(tp->pdev, TG3PCI_MEM_WIN_BASE_ADDR, off);
         pci_write_config_dword(tp->pdev, TG3PCI_MEM_WIN_DATA, val);
  
         /* Always leave this as zero. */
         pci_write_config_dword(tp->pdev, TG3PCI_MEM_WIN_BASE_ADDR, 0);
-       spin_unlock_bh(&tp->indirect_lock);
+       spin_unlock_irqrestore(&tp->indirect_lock, flags);
  }
  
  static void tg3_read_mem(struct tg3 *tp, u32 off, u32 *val)
  {
-       spin_lock_bh(&tp->indirect_lock);
+       unsigned long flags;
+
+       spin_lock_irqsave(&tp->indirect_lock, flags);
         pci_write_config_dword(tp->pdev, TG3PCI_MEM_WIN_BASE_ADDR, off);
         pci_read_config_dword(tp->pdev, TG3PCI_MEM_WIN_DATA, val);
  
         /* Always leave this as zero. */
         pci_write_config_dword(tp->pdev, TG3PCI_MEM_WIN_BASE_ADDR, 0);
-       spin_unlock_bh(&tp->indirect_lock);
+       spin_unlock_irqrestore(&tp->indirect_lock, flags);
  }
  
  static void tg3_disable_ints(struct tg3 *tp)
  {
         tw32(TG3PCI_MISC_HOST_CTRL,
              (tp->misc_host_ctrl | MISC_HOST_CTRL_MASK_PCI_INT));
-       tw32_mailbox(MAILBOX_INTERRUPT_0 + TG3_64BIT_REG_LOW, 0x00000001);
-       tr32(MAILBOX_INTERRUPT_0 + TG3_64BIT_REG_LOW);
+       tw32_mailbox_f(MAILBOX_INTERRUPT_0 + TG3_64BIT_REG_LOW, 0x00000001);
  }
  
  static inline void tg3_cond_int(struct tg3 *tp)
@@ -439,9 +501,8 @@ static void tg3_enable_ints(struct tg3 *tp)
  
         tw32(TG3PCI_MISC_HOST_CTRL,
              (tp->misc_host_ctrl & ~MISC_HOST_CTRL_MASK_PCI_INT));
-       tw32_mailbox(MAILBOX_INTERRUPT_0 + TG3_64BIT_REG_LOW,
-                    (tp->last_tag << 24));
-       tr32(MAILBOX_INTERRUPT_0 + TG3_64BIT_REG_LOW);
+       tw32_mailbox_f(MAILBOX_INTERRUPT_0 + TG3_64BIT_REG_LOW,
+                      (tp->last_tag << 24));
         tg3_cond_int(tp);
  }
  
@@ -472,8 +533,6 @@ static inline unsigned int tg3_has_work(struct tg3 *tp)
   */
  static void tg3_restart_ints(struct tg3 *tp)
  {
-       tw32(TG3PCI_MISC_HOST_CTRL,
-               (tp->misc_host_ctrl & ~MISC_HOST_CTRL_MASK_PCI_INT));
         tw32_mailbox(MAILBOX_INTERRUPT_0 + TG3_64BIT_REG_LOW,
                      tp->last_tag << 24);
         mmiowb();
@@ -3278,9 +3337,8 @@ static irqreturn_t tg3_interrupt(int irq, void *dev_id, struct pt_regs *regs)
                         /* No work, shared interrupt perhaps?  re-enable
                          * interrupts, and flush that PCI write
                          */
-                       tw32_mailbox(MAILBOX_INTERRUPT_0 + TG3_64BIT_REG_LOW,
+                       tw32_mailbox_f(MAILBOX_INTERRUPT_0 + TG3_64BIT_REG_LOW,
                                 0x00000000);
-                       tr32(MAILBOX_INTERRUPT_0 + TG3_64BIT_REG_LOW);
                 }
         } else {        /* shared interrupt */
                 handled = 0;
@@ -3323,9 +3381,8 @@ static irqreturn_t tg3_interrupt_tagged(int irq, void *dev_id, struct pt_regs *r
                         /* no work, shared interrupt perhaps?  re-enable
                          * interrupts, and flush that PCI write
                          */
-                       tw32_mailbox(MAILBOX_INTERRUPT_0 + TG3_64BIT_REG_LOW,
-                                    tp->last_tag << 24);
-                       tr32(MAILBOX_INTERRUPT_0 + TG3_64BIT_REG_LOW);
+                       tw32_mailbox_f(MAILBOX_INTERRUPT_0 + TG3_64BIT_REG_LOW,
+                                      tp->last_tag << 24);
                 }
         } else {        /* shared interrupt */
                 handled = 0;
@@ -4216,7 +4273,7 @@ static void tg3_stop_fw(struct tg3 *);
  static int tg3_chip_reset(struct tg3 *tp)
  {
         u32 val;
-       u32 flags_save;
+       void (*write_op)(struct tg3 *, u32, u32);
         int i;
  
         if (!(tp->tg3_flags2 & TG3_FLG2_SUN_570X))
@@ -4228,8 +4285,9 @@ static int tg3_chip_reset(struct tg3 *tp)
          * fun things.  So, temporarily disable the 5701
          * hardware workaround, while we do the reset.
          */
-       flags_save = tp->tg3_flags;
-       tp->tg3_flags &= ~TG3_FLAG_5701_REG_WRITE_BUG;
+       write_op = tp->write32;
+       if (write_op == tg3_write_flush_reg32)
+               tp->write32 = tg3_write32;
  
         /* do the reset */
         val = GRC_MISC_CFG_CORECLK_RESET;
@@ -4248,8 +4306,8 @@ static int tg3_chip_reset(struct tg3 *tp)
                 val |= GRC_MISC_CFG_KEEP_GPHY_POWER;
         tw32(GRC_MISC_CFG, val);
  
-       /* restore 5701 hardware bug workaround flag */
-       tp->tg3_flags = flags_save;
+       /* restore 5701 hardware bug workaround write method */
+       tp->write32 = write_op;
  
         /* Unfortunately, we have to delay before the PCI read back.
          * Some 575X chips even will not respond to a PCI cfg access
@@ -4635,7 +4693,6 @@ static int tg3_load_firmware_cpu(struct tg3 *tp, u32 cpu_base, u32 cpu_scratch_b
                                  int cpu_scratch_size, struct fw_info *info)
  {
         int err, i;
-       u32 orig_tg3_flags = tp->tg3_flags;
         void (*write_op)(struct tg3 *, u32, u32);
  
         if (cpu_base == TX_CPU_BASE &&
@@ -4651,11 +4708,6 @@ static int tg3_load_firmware_cpu(struct tg3 *tp, u32 cpu_base, u32 cpu_scratch_b
         else
                 write_op = tg3_write_indirect_reg32;
  
-       /* Force use of PCI config space for indirect register
-        * write calls.
-        */
-       tp->tg3_flags |= TG3_FLAG_PCIX_TARGET_HWBUG;
-
         /* It is possible that bootcode is still loading at this point.
          * Get the nvram lock first before halting the cpu.
          */
@@ -4691,7 +4743,6 @@ static int tg3_load_firmware_cpu(struct tg3 *tp, u32 cpu_base, u32 cpu_scratch_b
         err = 0;
  
  out:
-       tp->tg3_flags = orig_tg3_flags;
         return err;
  }
  
@@ -5808,8 +5859,7 @@ static int tg3_reset_hw(struct tg3 *tp)
         tw32_f(GRC_LOCAL_CTRL, tp->grc_local_ctrl);
         udelay(100);
  
-       tw32_mailbox(MAILBOX_INTERRUPT_0 + TG3_64BIT_REG_LOW, 0);
-       tr32(MAILBOX_INTERRUPT_0 + TG3_64BIT_REG_LOW);
+       tw32_mailbox_f(MAILBOX_INTERRUPT_0 + TG3_64BIT_REG_LOW, 0);
         tp->last_tag = 0;
  
         if (!(tp->tg3_flags2 & TG3_FLG2_5705_PLUS)) {
@@ -6198,7 +6248,8 @@ static int tg3_test_interrupt(struct tg3 *tp)
                HOSTCC_MODE_NOW);
  
         for (i = 0; i < 5; i++) {
-               int_mbox = tr32(MAILBOX_INTERRUPT_0 + TG3_64BIT_REG_LOW);
+               int_mbox = tr32_mailbox(MAILBOX_INTERRUPT_0 +
+                                       TG3_64BIT_REG_LOW);
                 if (int_mbox != 0)
                         break;
                 msleep(10);
@@ -6598,10 +6649,10 @@ static int tg3_open(struct net_device *dev)
  
         /* Mailboxes */
         printk("DEBUG: SNDHOST_PROD[%08x%08x] SNDNIC_PROD[%08x%08x]\n",
-              tr32(MAILBOX_SNDHOST_PROD_IDX_0 + 0x0),
-              tr32(MAILBOX_SNDHOST_PROD_IDX_0 + 0x4),
-              tr32(MAILBOX_SNDNIC_PROD_IDX_0 + 0x0),
-              tr32(MAILBOX_SNDNIC_PROD_IDX_0 + 0x4));
+              tr32_mailbox(MAILBOX_SNDHOST_PROD_IDX_0 + 0x0),
+              tr32_mailbox(MAILBOX_SNDHOST_PROD_IDX_0 + 0x4),
+              tr32_mailbox(MAILBOX_SNDNIC_PROD_IDX_0 + 0x0),
+              tr32_mailbox(MAILBOX_SNDNIC_PROD_IDX_0 + 0x4));
  
         /* NIC side send descriptors. */
         for (i = 0; i < 6; i++) {
@@ -7865,8 +7916,6 @@ static int tg3_test_loopback(struct tg3 *tp)
  
         err = -EIO;
  
-       tg3_abort_hw(tp, 1);
-
         tg3_reset_hw(tp);
  
         mac_mode = (tp->mac_mode & ~MAC_MODE_PORT_MODE_MASK) |
@@ -7903,7 +7952,7 @@ static int tg3_test_loopback(struct tg3 *tp)
         num_pkts++;
  
         tw32_tx_mbox(MAILBOX_SNDHOST_PROD_IDX_0 + TG3_64BIT_REG_LOW, send_idx);
-       tr32(MAILBOX_SNDHOST_PROD_IDX_0 + TG3_64BIT_REG_LOW);
+       tr32_mailbox(MAILBOX_SNDHOST_PROD_IDX_0 + TG3_64BIT_REG_LOW);
  
         udelay(10);
  
@@ -8970,6 +9019,8 @@ static int __devinit tg3_phy_probe(struct tg3 *tp)
                 tp->phy_id = hw_phy_id;
                 if (hw_phy_id_masked == PHY_ID_BCM8002)
                         tp->tg3_flags2 |= TG3_FLG2_PHY_SERDES;
+               else
+                       tp->tg3_flags2 &= ~TG3_FLG2_PHY_SERDES;
         } else {
                 if (tp->phy_id != PHY_ID_INVALID) {
                         /* Do nothing, phy ID already set up in
@@ -9153,14 +9204,6 @@ static int __devinit tg3_is_sun_570X(struct tg3 *tp)
  static int __devinit tg3_get_invariants(struct tg3 *tp)
  {
         static struct pci_device_id write_reorder_chipsets[] = {
-               { PCI_DEVICE(PCI_VENDOR_ID_INTEL,
-                            PCI_DEVICE_ID_INTEL_82801AA_8) },
-               { PCI_DEVICE(PCI_VENDOR_ID_INTEL,
-                            PCI_DEVICE_ID_INTEL_82801AB_8) },
-               { PCI_DEVICE(PCI_VENDOR_ID_INTEL,
-                            PCI_DEVICE_ID_INTEL_82801BA_11) },
-               { PCI_DEVICE(PCI_VENDOR_ID_INTEL,
-                            PCI_DEVICE_ID_INTEL_82801BA_6) },
                 { PCI_DEVICE(PCI_VENDOR_ID_AMD,
                              PCI_DEVICE_ID_AMD_FE_GATE_700C) },
                 { },
@@ -9177,7 +9220,7 @@ static int __devinit tg3_get_invariants(struct tg3 *tp)
                 tp->tg3_flags2 |= TG3_FLG2_SUN_570X;
  #endif
  
-       /* If we have an AMD 762 or Intel ICH/ICH0/ICH2 chipset, write
+       /* If we have an AMD 762 chipset, write
          * reordering to the mailbox registers done by the host
          * controller can cause major troubles.  We read back from
          * every mailbox register write to force the writes to be
@@ -9215,6 +9258,69 @@ static int __devinit tg3_get_invariants(struct tg3 *tp)
         if (tp->pci_chip_rev_id == CHIPREV_ID_5752_A0_HW)
                 tp->pci_chip_rev_id = CHIPREV_ID_5752_A0;
  
+       /* If we have 5702/03 A1 or A2 on certain ICH chipsets,
+        * we need to disable memory and use config. cycles
+        * only to access all registers. The 5702/03 chips
+        * can mistakenly decode the special cycles from the
+        * ICH chipsets as memory write cycles, causing corruption
+        * of register and memory space. Only certain ICH bridges
+        * will drive special cycles with non-zero data during the
+        * address phase which can fall within the 5703's address
+        * range. This is not an ICH bug as the PCI spec allows
+        * non-zero address during special cycles. However, only
+        * these ICH bridges are known to drive non-zero addresses
+        * during special cycles.
+        *
+        * Since special cycles do not cross PCI bridges, we only
+        * enable this workaround if the 5703 is on the secondary
+        * bus of these ICH bridges.
+        */
+       if ((tp->pci_chip_rev_id == CHIPREV_ID_5703_A1) ||
+           (tp->pci_chip_rev_id == CHIPREV_ID_5703_A2)) {
+               static struct tg3_dev_id {
+                       u32     vendor;
+                       u32     device;
+                       u32     rev;
+               } ich_chipsets[] = {
+                       { PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82801AA_8,
+                         PCI_ANY_ID },
+                       { PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82801AB_8,
+                         PCI_ANY_ID },
+                       { PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82801BA_11,
+                         0xa },
+                       { PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82801BA_6,
+                         PCI_ANY_ID },
+                       { },
+               };
+               struct tg3_dev_id *pci_id = &ich_chipsets[0];
+               struct pci_dev *bridge = NULL;
+
+               while (pci_id->vendor != 0) {
+                       bridge = pci_get_device(pci_id->vendor, pci_id->device,
+                                               bridge);
+                       if (!bridge) {
+                               pci_id++;
+                               continue;
+                       }
+                       if (pci_id->rev != PCI_ANY_ID) {
+                               u8 rev;
+
+                               pci_read_config_byte(bridge, PCI_REVISION_ID,
+                                                    &rev);
+                               if (rev > pci_id->rev)
+                                       continue;
+                       }
+                       if (bridge->subordinate &&
+                           (bridge->subordinate->number ==
+                            tp->pdev->bus->number)) {
+
+                               tp->tg3_flags2 |= TG3_FLG2_ICH_WORKAROUND;
+                               pci_dev_put(bridge);
+                               break;
+                       }
+               }
+       }
+
         /* Find msi capability. */
         if (GET_ASIC_REV(tp->pci_chip_rev_id) == ASIC_REV_5780)
                 tp->msi_cap = pci_find_capability(tp->pdev, PCI_CAP_ID_MSI);
@@ -9302,6 +9408,12 @@ static int __devinit tg3_get_invariants(struct tg3 *tp)
                 }
         }
  
+       /* 5700 BX chips need to have their TX producer index mailboxes
+        * written twice to workaround a bug.
+        */
+       if (GET_CHIP_REV(tp->pci_chip_rev_id) == CHIPREV_5700_BX)
+               tp->tg3_flags |= TG3_FLAG_TXD_MBOX_HWBUG;
+
         /* Back to back register writes can cause problems on this chip,
          * the workaround is to read back all reg writes except those to
          * mailbox regs.  See tg3_write_indirect_reg32().
@@ -9325,6 +9437,43 @@ static int __devinit tg3_get_invariants(struct tg3 *tp)
                 pci_write_config_dword(tp->pdev, TG3PCI_PCISTATE, pci_state_reg);
         }
  
+       /* Default fast path register access methods */
+       tp->read32 = tg3_read32;
+       tp->write32 = tg3_write32;
+       tp->read32_mbox = tg3_read32;
+       tp->write32_mbox = tg3_write32;
+       tp->write32_tx_mbox = tg3_write32;
+       tp->write32_rx_mbox = tg3_write32;
+
+       /* Various workaround register access methods */
+       if (tp->tg3_flags & TG3_FLAG_PCIX_TARGET_HWBUG)
+               tp->write32 = tg3_write_indirect_reg32;
+       else if (tp->tg3_flags & TG3_FLAG_5701_REG_WRITE_BUG)
+               tp->write32 = tg3_write_flush_reg32;
+
+       if ((tp->tg3_flags & TG3_FLAG_TXD_MBOX_HWBUG) ||
+           (tp->tg3_flags & TG3_FLAG_MBOX_WRITE_REORDER)) {
+               tp->write32_tx_mbox = tg3_write32_tx_mbox;
+               if (tp->tg3_flags & TG3_FLAG_MBOX_WRITE_REORDER)
+                       tp->write32_rx_mbox = tg3_write_flush_reg32;
+       }
+
+       if (tp->tg3_flags2 & TG3_FLG2_ICH_WORKAROUND) {
+               tp->read32 = tg3_read_indirect_reg32;
+               tp->write32 = tg3_write_indirect_reg32;
+               tp->read32_mbox = tg3_read_indirect_mbox;
+               tp->write32_mbox = tg3_write_indirect_mbox;
+               tp->write32_tx_mbox = tg3_write_indirect_mbox;
+               tp->write32_rx_mbox = tg3_write_indirect_mbox;
+
+               iounmap(tp->regs);
+               tp->regs = 0;
+
+               pci_read_config_word(tp->pdev, PCI_COMMAND, &pci_cmd);
+               pci_cmd &= ~PCI_COMMAND_MEMORY;
+               pci_write_config_word(tp->pdev, PCI_COMMAND, pci_cmd);
+       }
+
         /* Get eeprom hw config before calling tg3_set_power_state().
          * In particular, the TG3_FLAG_EEPROM_WRITE_PROT flag must be
          * determined before calling tg3_set_power_state() so that
@@ -9539,14 +9688,6 @@ static int __devinit tg3_get_invariants(struct tg3 *tp)
         else
                 tp->tg3_flags &= ~TG3_FLAG_POLL_SERDES;
  
-       /* 5700 BX chips need to have their TX producer index mailboxes
-        * written twice to workaround a bug.
-        */
-       if (GET_CHIP_REV(tp->pci_chip_rev_id) == CHIPREV_5700_BX)
-               tp->tg3_flags |= TG3_FLAG_TXD_MBOX_HWBUG;
-       else
-               tp->tg3_flags &= ~TG3_FLAG_TXD_MBOX_HWBUG;
-
         /* It seems all chips can get confused if TX buffers
          * straddle the 4GB address boundary in some cases.
          */
@@ -10421,6 +10562,12 @@ static int __devinit tg3_init_one(struct pci_dev *pdev,
  
         tg3_init_coal(tp);
  
+       /* Now that we have fully setup the chip, save away a snapshot
+        * of the PCI config space.  We need to restore this after
+        * GRC_MISC_CFG core clock resets and some resume events.
+        */
+       pci_save_state(tp->pdev);
+
         err = register_netdev(dev);
         if (err) {
                 printk(KERN_ERR PFX "Cannot register net device, "
@@ -10430,12 +10577,6 @@ static int __devinit tg3_init_one(struct pci_dev *pdev,
  
         pci_set_drvdata(pdev, dev);
  
-       /* Now that we have fully setup the chip, save away a snapshot
-        * of the PCI config space.  We need to restore this after
-        * GRC_MISC_CFG core clock resets and some resume events.
-        */
-       pci_save_state(tp->pdev);
-
         printk(KERN_INFO "%s: Tigon3 [partno(%s) rev %04x PHY(%s)] (PCI%s:%s:%s) %sBaseT Ethernet ",
                dev->name,
                tp->board_part_number,
@@ -10469,7 +10610,10 @@ static int __devinit tg3_init_one(struct pci_dev *pdev,
         return 0;
  
  err_out_iounmap:
-       iounmap(tp->regs);
+       if (tp->regs) {
+               iounmap(tp->regs);
+               tp->regs = 0;
+       }
  
  err_out_free_dev:
         free_netdev(dev);
@@ -10491,7 +10635,10 @@ static void __devexit tg3_remove_one(struct pci_dev *pdev)
                 struct tg3 *tp = netdev_priv(dev);
  
                 unregister_netdev(dev);
-               iounmap(tp->regs);
+               if (tp->regs) {
+                       iounmap(tp->regs);
+                       tp->regs = 0;
+               }
                 free_netdev(dev);
                 pci_release_regions(pdev);
                 pci_disable_device(pdev);
diff --git a/drivers/net/tg3.h b/drivers/net/tg3.h

index 5c4433c147fa46ed891af4b425ea41078a1ffeeb..c184b773e58543be34027d65810b7fdb2f4f1539 100644 (file)
--- a/drivers/net/tg3.h
+++ b/drivers/net/tg3.h
@@ -2049,6 +2049,11 @@ struct tg3 {
         spinlock_t                      lock;
         spinlock_t                      indirect_lock;
  
+       u32                             (*read32) (struct tg3 *, u32);
+       void                            (*write32) (struct tg3 *, u32, u32);
+       u32                             (*read32_mbox) (struct tg3 *, u32);
+       void                            (*write32_mbox) (struct tg3 *, u32,
+                                                        u32);
         void __iomem                    *regs;
         struct net_device               *dev;
         struct pci_dev                  *pdev;
@@ -2060,6 +2065,8 @@ struct tg3 {
         u32                             msg_enable;
  
         /* begin "tx thread" cacheline section */
+       void                            (*write32_tx_mbox) (struct tg3 *, u32,
+                                                           u32);
         u32                             tx_prod;
         u32                             tx_cons;
         u32                             tx_pending;
@@ -2071,6 +2078,8 @@ struct tg3 {
         dma_addr_t                      tx_desc_mapping;
  
         /* begin "rx thread" cacheline section */
+       void                            (*write32_rx_mbox) (struct tg3 *, u32,
+                                                           u32);
         u32                             rx_rcb_ptr;
         u32                             rx_std_ptr;
         u32                             rx_jumbo_ptr;
@@ -2165,6 +2174,7 @@ struct tg3 {
  #define TG3_FLG2_ANY_SERDES            (TG3_FLG2_PHY_SERDES |  \
                                         TG3_FLG2_MII_SERDES)
  #define TG3_FLG2_PARALLEL_DETECT       0x01000000
+#define TG3_FLG2_ICH_WORKAROUND                0x02000000
  
         u32                             split_mode_max_reqs;
  #define SPLIT_MODE_5704_MAX_REQ                3
diff --git a/drivers/net/tokenring/Kconfig b/drivers/net/tokenring/Kconfig

index 23d0fa4bbceb7bccebcdfc6acf0bbc266861bf22..e4cfc80b283b74a4ead414593340520f5be9554c 100644 (file)
--- a/drivers/net/tokenring/Kconfig
+++ b/drivers/net/tokenring/Kconfig
@@ -84,7 +84,7 @@ config 3C359
  
  config TMS380TR
         tristate "Generic TMS380 Token Ring ISA/PCI adapter support"
-       depends on TR && (PCI || ISA)
+       depends on TR && (PCI || ISA && ISA_DMA_API || MCA)
         select FW_LOADER
         ---help---
           This driver provides generic support for token ring adapters
@@ -158,7 +158,7 @@ config ABYSS
  
  config MADGEMC
         tristate "Madge Smart 16/4 Ringnode MicroChannel"
-       depends on TR && TMS380TR && MCA_LEGACY
+       depends on TR && TMS380TR && MCA
         help
           This tms380 module supports the Madge Smart 16/4 MC16 and MC32
           MicroChannel adapters.
diff --git a/drivers/net/tokenring/abyss.c b/drivers/net/tokenring/abyss.c

index 87103c400999dea48045e60fc8fb62311c39294c..9345e68c451eb4af5111f27ba40b5eb0bbd254d2 100644 (file)
--- a/drivers/net/tokenring/abyss.c
+++ b/drivers/net/tokenring/abyss.c
@@ -139,7 +139,7 @@ static int __devinit abyss_attach(struct pci_dev *pdev, const struct pci_device_
          */
         dev->base_addr += 0x10;
                 
-       ret = tmsdev_init(dev, PCI_MAX_ADDRESS, pdev);
+       ret = tmsdev_init(dev, &pdev->dev);
         if (ret) {
                 printk("%s: unable to get memory for dev->priv.\n", 
                        dev->name);
diff --git a/drivers/net/tokenring/madgemc.c b/drivers/net/tokenring/madgemc.c

index 659cbdbef7f380e4299f44a6cd157bca53cc6eb6..3a25d191ea4af322ae851edd38f5128afb40cd88 100644 (file)
--- a/drivers/net/tokenring/madgemc.c
+++ b/drivers/net/tokenring/madgemc.c
@@ -20,7 +20,7 @@
  static const char version[] = "madgemc.c: v0.91 23/01/2000 by Adam Fritzler\n";
  
  #include <linux/module.h>
-#include <linux/mca-legacy.h>
+#include <linux/mca.h>
  #include <linux/kernel.h>
  #include <linux/errno.h>
  #include <linux/pci.h>
@@ -38,9 +38,7 @@ static const char version[] = "madgemc.c: v0.91 23/01/2000 by Adam Fritzler\n";
  #define MADGEMC_IO_EXTENT 32
  #define MADGEMC_SIF_OFFSET 0x08
  
-struct madgemc_card {
-       struct net_device *dev;
-
+struct card_info {
         /*
          * These are read from the BIA ROM.
          */
@@ -57,16 +55,12 @@ struct madgemc_card {
         unsigned int arblevel:4;
         unsigned int ringspeed:2; /* 0 = 4mb, 1 = 16, 2 = Auto/none */
         unsigned int cabletype:1; /* 0 = RJ45, 1 = DB9 */
-
-       struct madgemc_card *next;
  };
-static struct madgemc_card *madgemc_card_list;
-
  
  static int madgemc_open(struct net_device *dev);
  static int madgemc_close(struct net_device *dev);
  static int madgemc_chipset_init(struct net_device *dev);
-static void madgemc_read_rom(struct madgemc_card *card);
+static void madgemc_read_rom(struct net_device *dev, struct card_info *card);
  static unsigned short madgemc_setnselout_pins(struct net_device *dev);
  static void madgemc_setcabletype(struct net_device *dev, int type);
  
@@ -151,261 +145,237 @@ static void madgemc_sifwritew(struct net_device *dev, unsigned short val, unsign
  
  
  
-static int __init madgemc_probe(void)
+static int __devinit madgemc_probe(struct device *device)
  {      
         static int versionprinted;
         struct net_device *dev;
         struct net_local *tp;
-       struct madgemc_card *card;
-       int i,slot = 0;
-       __u8 posreg[4];
-
-       if (!MCA_bus)
-               return -1;      
- 
-       while (slot != MCA_NOTFOUND) {
-               /*
-                * Currently we only support the MC16/32 (MCA ID 002d)
-                */
-               slot = mca_find_unused_adapter(0x002d, slot);
-               if (slot == MCA_NOTFOUND)
-                       break;
-
-               /*
-                * If we get here, we have an adapter.
-                */
-               if (versionprinted++ == 0)
-                       printk("%s", version);
-
-               dev = alloc_trdev(sizeof(struct net_local));
-               if (dev == NULL) {
-                       printk("madgemc: unable to allocate dev space\n");
-                       if (madgemc_card_list)
-                               return 0;
-                       return -1;
-               }
+       struct card_info *card;
+       struct mca_device *mdev = to_mca_device(device);
+       int ret = 0, i = 0;
+
+       if (versionprinted++ == 0)
+               printk("%s", version);
+
+       if(mca_device_claimed(mdev))
+               return -EBUSY;
+       mca_device_set_claim(mdev, 1);
+
+       dev = alloc_trdev(sizeof(struct net_local));
+       if (!dev) {
+               printk("madgemc: unable to allocate dev space\n");
+               mca_device_set_claim(mdev, 0);
+               ret = -ENOMEM;
+               goto getout;
+       }
  
-               SET_MODULE_OWNER(dev);
-               dev->dma = 0;
+       SET_MODULE_OWNER(dev);
+       dev->dma = 0;
  
-               /*
-                * Fetch MCA config registers
-                */
-               for(i=0;i<4;i++)
-                       posreg[i] = mca_read_stored_pos(slot, i+2);
-               
-               card = kmalloc(sizeof(struct madgemc_card), GFP_KERNEL);
-               if (card==NULL) {
-                       printk("madgemc: unable to allocate card struct\n");
-                       free_netdev(dev);
-                       if (madgemc_card_list)
-                               return 0;
-                       return -1;
-               }
-               card->dev = dev;
-
-               /*
-                * Parse configuration information.  This all comes
-                * directly from the publicly available @002d.ADF.
-                * Get it from Madge or your local ADF library.
-                */
-
-               /*
-                * Base address 
-                */
-               dev->base_addr = 0x0a20 + 
-                       ((posreg[2] & MC16_POS2_ADDR2)?0x0400:0) +
-                       ((posreg[0] & MC16_POS0_ADDR1)?0x1000:0) +
-                       ((posreg[3] & MC16_POS3_ADDR3)?0x2000:0);
-
-               /*
-                * Interrupt line
-                */
-               switch(posreg[0] >> 6) { /* upper two bits */
+       card = kmalloc(sizeof(struct card_info), GFP_KERNEL);
+       if (card==NULL) {
+               printk("madgemc: unable to allocate card struct\n");
+               ret = -ENOMEM;
+               goto getout1;
+       }
+
+       /*
+        * Parse configuration information.  This all comes
+        * directly from the publicly available @002d.ADF.
+        * Get it from Madge or your local ADF library.
+        */
+
+       /*
+        * Base address 
+        */
+       dev->base_addr = 0x0a20 + 
+               ((mdev->pos[2] & MC16_POS2_ADDR2)?0x0400:0) +
+               ((mdev->pos[0] & MC16_POS0_ADDR1)?0x1000:0) +
+               ((mdev->pos[3] & MC16_POS3_ADDR3)?0x2000:0);
+
+       /*
+        * Interrupt line
+        */
+       switch(mdev->pos[0] >> 6) { /* upper two bits */
                 case 0x1: dev->irq = 3; break;
                 case 0x2: dev->irq = 9; break; /* IRQ 2 = IRQ 9 */
                 case 0x3: dev->irq = 10; break;
                 default: dev->irq = 0; break;
-               }
+       }
  
-               if (dev->irq == 0) {
-                       printk("%s: invalid IRQ\n", dev->name);
-                       goto getout1;
-               }
+       if (dev->irq == 0) {
+               printk("%s: invalid IRQ\n", dev->name);
+               ret = -EBUSY;
+               goto getout2;
+       }
  
-               if (!request_region(dev->base_addr, MADGEMC_IO_EXTENT, 
-                                  "madgemc")) {
-                       printk(KERN_INFO "madgemc: unable to setup Smart MC in slot %d because of I/O base conflict at 0x%04lx\n", slot, dev->base_addr);
-                       dev->base_addr += MADGEMC_SIF_OFFSET;
-                       goto getout1;
-               }
+       if (!request_region(dev->base_addr, MADGEMC_IO_EXTENT, 
+                          "madgemc")) {
+               printk(KERN_INFO "madgemc: unable to setup Smart MC in slot %d because of I/O base conflict at 0x%04lx\n", mdev->slot, dev->base_addr);
                 dev->base_addr += MADGEMC_SIF_OFFSET;
+               ret = -EBUSY;
+               goto getout2;
+       }
+       dev->base_addr += MADGEMC_SIF_OFFSET;
+       
+       /*
+        * Arbitration Level
+        */
+       card->arblevel = ((mdev->pos[0] >> 1) & 0x7) + 8;
+
+       /*
+        * Burst mode and Fairness
+        */
+       card->burstmode = ((mdev->pos[2] >> 6) & 0x3);
+       card->fairness = ((mdev->pos[2] >> 4) & 0x1);
+
+       /*
+        * Ring Speed
+        */
+       if ((mdev->pos[1] >> 2)&0x1)
+               card->ringspeed = 2; /* not selected */
+       else if ((mdev->pos[2] >> 5) & 0x1)
+               card->ringspeed = 1; /* 16Mb */
+       else
+               card->ringspeed = 0; /* 4Mb */
+
+       /* 
+        * Cable type
+        */
+       if ((mdev->pos[1] >> 6)&0x1)
+               card->cabletype = 1; /* STP/DB9 */
+       else
+               card->cabletype = 0; /* UTP/RJ-45 */
+
+
+       /* 
+        * ROM Info. This requires us to actually twiddle
+        * bits on the card, so we must ensure above that 
+        * the base address is free of conflict (request_region above).
+        */
+       madgemc_read_rom(dev, card);
                 
-               /*
-                * Arbitration Level
-                */
-               card->arblevel = ((posreg[0] >> 1) & 0x7) + 8;
-
-               /*
-                * Burst mode and Fairness
-                */
-               card->burstmode = ((posreg[2] >> 6) & 0x3);
-               card->fairness = ((posreg[2] >> 4) & 0x1);
-
-               /*
-                * Ring Speed
-                */
-               if ((posreg[1] >> 2)&0x1)
-                       card->ringspeed = 2; /* not selected */
-               else if ((posreg[2] >> 5) & 0x1)
-                       card->ringspeed = 1; /* 16Mb */
-               else
-                       card->ringspeed = 0; /* 4Mb */
-
-               /* 
-                * Cable type
-                */
-               if ((posreg[1] >> 6)&0x1)
-                       card->cabletype = 1; /* STP/DB9 */
-               else
-                       card->cabletype = 0; /* UTP/RJ-45 */
-
-
-               /* 
-                * ROM Info. This requires us to actually twiddle
-                * bits on the card, so we must ensure above that 
-                * the base address is free of conflict (request_region above).
-                */
-               madgemc_read_rom(card);
-               
-               if (card->manid != 0x4d) { /* something went wrong */
-                       printk(KERN_INFO "%s: Madge MC ROM read failed (unknown manufacturer ID %02x)\n", dev->name, card->manid);
-                       goto getout;
-               }
+       if (card->manid != 0x4d) { /* something went wrong */
+               printk(KERN_INFO "%s: Madge MC ROM read failed (unknown manufacturer ID %02x)\n", dev->name, card->manid);
+               goto getout3;
+       }
                 
-               if ((card->cardtype != 0x08) && (card->cardtype != 0x0d)) {
-                       printk(KERN_INFO "%s: Madge MC ROM read failed (unknown card ID %02x)\n", dev->name, card->cardtype);
-                       goto getout;
-               }
+       if ((card->cardtype != 0x08) && (card->cardtype != 0x0d)) {
+               printk(KERN_INFO "%s: Madge MC ROM read failed (unknown card ID %02x)\n", dev->name, card->cardtype);
+               ret = -EIO;
+               goto getout3;
+       }
                
-               /* All cards except Rev 0 and 1 MC16's have 256kb of RAM */
-               if ((card->cardtype == 0x08) && (card->cardrev <= 0x01))
-                       card->ramsize = 128;
-               else
-                       card->ramsize = 256;
-
-               printk("%s: %s Rev %d at 0x%04lx IRQ %d\n", 
-                      dev->name, 
-                      (card->cardtype == 0x08)?MADGEMC16_CARDNAME:
-                      MADGEMC32_CARDNAME, card->cardrev, 
-                      dev->base_addr, dev->irq);
-
-               if (card->cardtype == 0x0d)
-                       printk("%s:     Warning: MC32 support is experimental and highly untested\n", dev->name);
-               
-               if (card->ringspeed==2) { /* Unknown */
-                       printk("%s:     Warning: Ring speed not set in POS -- Please run the reference disk and set it!\n", dev->name);
-                       card->ringspeed = 1; /* default to 16mb */
-               }
+       /* All cards except Rev 0 and 1 MC16's have 256kb of RAM */
+       if ((card->cardtype == 0x08) && (card->cardrev <= 0x01))
+               card->ramsize = 128;
+       else
+               card->ramsize = 256;
+
+       printk("%s: %s Rev %d at 0x%04lx IRQ %d\n", 
+              dev->name, 
+              (card->cardtype == 0x08)?MADGEMC16_CARDNAME:
+              MADGEMC32_CARDNAME, card->cardrev, 
+              dev->base_addr, dev->irq);
+
+       if (card->cardtype == 0x0d)
+               printk("%s:     Warning: MC32 support is experimental and highly untested\n", dev->name);
+       
+       if (card->ringspeed==2) { /* Unknown */
+               printk("%s:     Warning: Ring speed not set in POS -- Please run the reference disk and set it!\n", dev->name);
+               card->ringspeed = 1; /* default to 16mb */
+       }
                 
-               printk("%s:     RAM Size: %dKB\n", dev->name, card->ramsize);
+       printk("%s:     RAM Size: %dKB\n", dev->name, card->ramsize);
  
-               printk("%s:     Ring Speed: %dMb/sec on %s\n", dev->name, 
-                      (card->ringspeed)?16:4, 
-                      card->cabletype?"STP/DB9":"UTP/RJ-45");
-               printk("%s:     Arbitration Level: %d\n", dev->name, 
-                      card->arblevel);
+       printk("%s:     Ring Speed: %dMb/sec on %s\n", dev->name, 
+              (card->ringspeed)?16:4, 
+              card->cabletype?"STP/DB9":"UTP/RJ-45");
+       printk("%s:     Arbitration Level: %d\n", dev->name, 
+              card->arblevel);
  
-               printk("%s:     Burst Mode: ", dev->name);
-               switch(card->burstmode) {
+       printk("%s:     Burst Mode: ", dev->name);
+       switch(card->burstmode) {
                 case 0: printk("Cycle steal"); break;
                 case 1: printk("Limited burst"); break;
                 case 2: printk("Delayed release"); break;
                 case 3: printk("Immediate release"); break;
-               }
-               printk(" (%s)\n", (card->fairness)?"Unfair":"Fair");
-
-
-               /* 
-                * Enable SIF before we assign the interrupt handler,
-                * just in case we get spurious interrupts that need
-                * handling.
-                */ 
-               outb(0, dev->base_addr + MC_CONTROL_REG0); /* sanity */
-               madgemc_setsifsel(dev, 1);
-               if (request_irq(dev->irq, madgemc_interrupt, SA_SHIRQ,
-                              "madgemc", dev)) 
-                       goto getout;
-               
-               madgemc_chipset_init(dev); /* enables interrupts! */
-               madgemc_setcabletype(dev, card->cabletype);
+       }
+       printk(" (%s)\n", (card->fairness)?"Unfair":"Fair");
  
-               /* Setup MCA structures */
-               mca_set_adapter_name(slot, (card->cardtype == 0x08)?MADGEMC16_CARDNAME:MADGEMC32_CARDNAME);
-               mca_set_adapter_procfn(slot, madgemc_mcaproc, dev);
-               mca_mark_as_used(slot);
  
-               printk("%s:     Ring Station Address: ", dev->name);
-               printk("%2.2x", dev->dev_addr[0]);
-               for (i = 1; i < 6; i++)
-                       printk(":%2.2x", dev->dev_addr[i]);
-               printk("\n");
-
-               /* XXX is ISA_MAX_ADDRESS correct here? */
-               if (tmsdev_init(dev, ISA_MAX_ADDRESS, NULL)) {
-                       printk("%s: unable to get memory for dev->priv.\n", 
-                              dev->name);
-                       release_region(dev->base_addr-MADGEMC_SIF_OFFSET, 
-                              MADGEMC_IO_EXTENT); 
-                       
-                       kfree(card);
-                       tmsdev_term(dev);
-                       free_netdev(dev);
-                       if (madgemc_card_list)
-                               return 0;
-                       return -1;
-               }
-               tp = netdev_priv(dev);
-
-               /* 
-                * The MC16 is physically a 32bit card.  However, Madge
-                * insists on calling it 16bit, so I'll assume here that
-                * they know what they're talking about.  Cut off DMA
-                * at 16mb.
-                */
-               tp->setnselout = madgemc_setnselout_pins;
-               tp->sifwriteb = madgemc_sifwriteb;
-               tp->sifreadb = madgemc_sifreadb;
-               tp->sifwritew = madgemc_sifwritew;
-               tp->sifreadw = madgemc_sifreadw;
-               tp->DataRate = (card->ringspeed)?SPEED_16:SPEED_4;
-
-               memcpy(tp->ProductID, "Madge MCA 16/4    ", PROD_ID_SIZE + 1);
-
-               dev->open = madgemc_open;
-               dev->stop = madgemc_close;
-
-               if (register_netdev(dev) == 0) {
-                       /* Enlist in the card list */
-                       card->next = madgemc_card_list;
-                       madgemc_card_list = card;
-                       slot++;
-                       continue; /* successful, try to find another */
-               }
-               
-               free_irq(dev->irq, dev);
-       getout:
-               release_region(dev->base_addr-MADGEMC_SIF_OFFSET, 
-                              MADGEMC_IO_EXTENT); 
-       getout1:
-               kfree(card);
-               free_netdev(dev);
-               slot++;
+       /* 
+        * Enable SIF before we assign the interrupt handler,
+        * just in case we get spurious interrupts that need
+        * handling.
+        */ 
+       outb(0, dev->base_addr + MC_CONTROL_REG0); /* sanity */
+       madgemc_setsifsel(dev, 1);
+       if (request_irq(dev->irq, madgemc_interrupt, SA_SHIRQ,
+                      "madgemc", dev)) {
+               ret = -EBUSY;
+               goto getout3;
         }
  
-       if (madgemc_card_list)
+       madgemc_chipset_init(dev); /* enables interrupts! */
+       madgemc_setcabletype(dev, card->cabletype);
+
+       /* Setup MCA structures */
+       mca_device_set_name(mdev, (card->cardtype == 0x08)?MADGEMC16_CARDNAME:MADGEMC32_CARDNAME);
+       mca_set_adapter_procfn(mdev->slot, madgemc_mcaproc, dev);
+
+       printk("%s:     Ring Station Address: ", dev->name);
+       printk("%2.2x", dev->dev_addr[0]);
+       for (i = 1; i < 6; i++)
+               printk(":%2.2x", dev->dev_addr[i]);
+       printk("\n");
+
+       if (tmsdev_init(dev, device)) {
+               printk("%s: unable to get memory for dev->priv.\n", 
+                      dev->name);
+               ret = -ENOMEM;
+               goto getout4;
+       }
+       tp = netdev_priv(dev);
+
+       /* 
+        * The MC16 is physically a 32bit card.  However, Madge
+        * insists on calling it 16bit, so I'll assume here that
+        * they know what they're talking about.  Cut off DMA
+        * at 16mb.
+        */
+       tp->setnselout = madgemc_setnselout_pins;
+       tp->sifwriteb = madgemc_sifwriteb;
+       tp->sifreadb = madgemc_sifreadb;
+       tp->sifwritew = madgemc_sifwritew;
+       tp->sifreadw = madgemc_sifreadw;
+       tp->DataRate = (card->ringspeed)?SPEED_16:SPEED_4;
+
+       memcpy(tp->ProductID, "Madge MCA 16/4    ", PROD_ID_SIZE + 1);
+
+       dev->open = madgemc_open;
+       dev->stop = madgemc_close;
+
+       tp->tmspriv = card;
+       dev_set_drvdata(device, dev);
+
+       if (register_netdev(dev) == 0)
                 return 0;
-       return -1;
+
+       dev_set_drvdata(device, NULL);
+       ret = -ENOMEM;
+getout4:
+       free_irq(dev->irq, dev);
+getout3:
+       release_region(dev->base_addr-MADGEMC_SIF_OFFSET, 
+                      MADGEMC_IO_EXTENT); 
+getout2:
+       kfree(card);
+getout1:
+       free_netdev(dev);
+getout:
+       mca_device_set_claim(mdev, 0);
+       return ret;
  }
  
  /*
@@ -664,12 +634,12 @@ static void madgemc_chipset_close(struct net_device *dev)
   * is complete.
   *
   */
-static void madgemc_read_rom(struct madgemc_card *card)
+static void madgemc_read_rom(struct net_device *dev, struct card_info *card)
  {
         unsigned long ioaddr;
         unsigned char reg0, reg1, tmpreg0, i;
  
-       ioaddr = card->dev->base_addr;
+       ioaddr = dev->base_addr;
  
         reg0 = inb(ioaddr + MC_CONTROL_REG0);
         reg1 = inb(ioaddr + MC_CONTROL_REG1);
@@ -686,9 +656,9 @@ static void madgemc_read_rom(struct madgemc_card *card)
         outb(tmpreg0 | MC_CONTROL_REG0_PAGE, ioaddr + MC_CONTROL_REG0);
  
         /* Read BIA */
-       card->dev->addr_len = 6;
+       dev->addr_len = 6;
         for (i = 0; i < 6; i++)
-               card->dev->dev_addr[i] = inb(ioaddr + MC_ROM_BIA_START + i);
+               dev->dev_addr[i] = inb(ioaddr + MC_ROM_BIA_START + i);
         
         /* Restore original register values */
         outb(reg0, ioaddr + MC_CONTROL_REG0);
@@ -721,14 +691,10 @@ static int madgemc_close(struct net_device *dev)
  static int madgemc_mcaproc(char *buf, int slot, void *d) 
  {      
         struct net_device *dev = (struct net_device *)d;
-       struct madgemc_card *curcard = madgemc_card_list;
+       struct net_local *tp = dev->priv;
+       struct card_info *curcard = tp->tmspriv;
         int len = 0;
         
-       while (curcard) { /* search for card struct */
-               if (curcard->dev == dev)
-                       break;
-               curcard = curcard->next;
-       }
         len += sprintf(buf+len, "-------\n");
         if (curcard) {
                 struct net_local *tp = netdev_priv(dev);
@@ -763,25 +729,56 @@ static int madgemc_mcaproc(char *buf, int slot, void *d)
         return len;
  }
  
-static void __exit madgemc_exit(void)
+static int __devexit madgemc_remove(struct device *device)
  {
-       struct net_device *dev;
-       struct madgemc_card *this_card;
-       
-       while (madgemc_card_list) {
-               dev = madgemc_card_list->dev;
-               unregister_netdev(dev);
-               release_region(dev->base_addr-MADGEMC_SIF_OFFSET, MADGEMC_IO_EXTENT);
-               free_irq(dev->irq, dev);
-               tmsdev_term(dev);
-               free_netdev(dev);
-               this_card = madgemc_card_list;
-               madgemc_card_list = this_card->next;
-               kfree(this_card);
-       }
+       struct net_device *dev = dev_get_drvdata(device);
+       struct net_local *tp;
+        struct card_info *card;
+
+       if (!dev)
+               BUG();
+
+       tp = dev->priv;
+       card = tp->tmspriv;
+       kfree(card);
+       tp->tmspriv = NULL;
+
+       unregister_netdev(dev);
+       release_region(dev->base_addr-MADGEMC_SIF_OFFSET, MADGEMC_IO_EXTENT);
+       free_irq(dev->irq, dev);
+       tmsdev_term(dev);
+       free_netdev(dev);
+       dev_set_drvdata(device, NULL);
+
+       return 0;
+}
+
+static short madgemc_adapter_ids[] __initdata = {
+       0x002d,
+       0x0000
+};
+
+static struct mca_driver madgemc_driver = {
+       .id_table = madgemc_adapter_ids,
+       .driver = {
+               .name = "madgemc",
+               .bus = &mca_bus_type,
+               .probe = madgemc_probe,
+               .remove = __devexit_p(madgemc_remove),
+       },
+};
+
+static int __init madgemc_init (void)
+{
+       return mca_register_driver (&madgemc_driver);
+}
+
+static void __exit madgemc_exit (void)
+{
+       mca_unregister_driver (&madgemc_driver);
  }
  
-module_init(madgemc_probe);
+module_init(madgemc_init);
  module_exit(madgemc_exit);
  
  MODULE_LICENSE("GPL");
diff --git a/drivers/net/tokenring/proteon.c b/drivers/net/tokenring/proteon.c

index 40ad0fde28afc071fdf48562350d6c04a9fd1e35..eb1423ede75cae8032a0729fdb115e54fa9e7c86 100644 (file)
--- a/drivers/net/tokenring/proteon.c
+++ b/drivers/net/tokenring/proteon.c
@@ -62,8 +62,7 @@ static int dmalist[] __initdata = {
  };
  
  static char cardname[] = "Proteon 1392\0";
-
-struct net_device *proteon_probe(int unit);
+static u64 dma_mask = ISA_MAX_ADDRESS;
  static int proteon_open(struct net_device *dev);
  static void proteon_read_eeprom(struct net_device *dev);
  static unsigned short proteon_setnselout_pins(struct net_device *dev);
@@ -116,7 +115,7 @@ nodev:
         return -ENODEV;
  }
  
-static int __init setup_card(struct net_device *dev)
+static int __init setup_card(struct net_device *dev, struct device *pdev)
  {
         struct net_local *tp;
          static int versionprinted;
@@ -137,7 +136,7 @@ static int __init setup_card(struct net_device *dev)
                 }
         }
         if (err)
-               goto out4;
+               goto out5;
  
         /* At this point we have found a valid card. */
  
@@ -145,14 +144,15 @@ static int __init setup_card(struct net_device *dev)
                 printk(KERN_DEBUG "%s", version);
  
         err = -EIO;
-       if (tmsdev_init(dev, ISA_MAX_ADDRESS, NULL))
+       pdev->dma_mask = &dma_mask;
+       if (tmsdev_init(dev, pdev))
                 goto out4;
  
         dev->base_addr &= ~3; 
                 
         proteon_read_eeprom(dev);
  
-       printk(KERN_DEBUG "%s:    Ring Station Address: ", dev->name);
+       printk(KERN_DEBUG "proteon.c:    Ring Station Address: ");
         printk("%2.2x", dev->dev_addr[0]);
         for (j = 1; j < 6; j++)
                 printk(":%2.2x", dev->dev_addr[j]);
@@ -185,7 +185,7 @@ static int __init setup_card(struct net_device *dev)
                 
                  if(irqlist[j] == 0)
                  {
-                        printk(KERN_INFO "%s: AutoSelect no IRQ available\n", dev->name);
+                        printk(KERN_INFO "proteon.c: AutoSelect no IRQ available\n");
                         goto out3;
                 }
         }
@@ -196,15 +196,15 @@ static int __init setup_card(struct net_device *dev)
                                 break;
                 if (irqlist[j] == 0)
                 {
-                       printk(KERN_INFO "%s: Illegal IRQ %d specified\n",
-                               dev->name, dev->irq);
+                       printk(KERN_INFO "proteon.c: Illegal IRQ %d specified\n",
+                               dev->irq);
                         goto out3;
                 }
                 if (request_irq(dev->irq, tms380tr_interrupt, 0, 
                         cardname, dev))
                 {
-                        printk(KERN_INFO "%s: Selected IRQ %d not available\n", 
-                               dev->name, dev->irq);
+                        printk(KERN_INFO "proteon.c: Selected IRQ %d not available\n",
+                               dev->irq);
                         goto out3;
                 }
         }
@@ -220,7 +220,7 @@ static int __init setup_card(struct net_device *dev)
  
                 if(dmalist[j] == 0)
                 {
-                       printk(KERN_INFO "%s: AutoSelect no DMA available\n", dev->name);
+                       printk(KERN_INFO "proteon.c: AutoSelect no DMA available\n");
                         goto out2;
                 }
         }
@@ -231,25 +231,25 @@ static int __init setup_card(struct net_device *dev)
                                 break;
                 if (dmalist[j] == 0)
                 {
-                        printk(KERN_INFO "%s: Illegal DMA %d specified\n", 
-                               dev->name, dev->dma);
+                        printk(KERN_INFO "proteon.c: Illegal DMA %d specified\n",
+                               dev->dma);
                         goto out2;
                 }
                 if (request_dma(dev->dma, cardname))
                 {
-                        printk(KERN_INFO "%s: Selected DMA %d not available\n", 
-                               dev->name, dev->dma);
+                        printk(KERN_INFO "proteon.c: Selected DMA %d not available\n",
+                               dev->dma);
                         goto out2;
                 }
         }
  
-       printk(KERN_DEBUG "%s:    IO: %#4lx  IRQ: %d  DMA: %d\n",
-              dev->name, dev->base_addr, dev->irq, dev->dma);
-               
         err = register_netdev(dev);
         if (err)
                 goto out;
  
+       printk(KERN_DEBUG "%s:    IO: %#4lx  IRQ: %d  DMA: %d\n",
+              dev->name, dev->base_addr, dev->irq, dev->dma);
+
         return 0;
  out:
         free_dma(dev->dma);
@@ -258,34 +258,11 @@ out2:
  out3:
         tmsdev_term(dev);
  out4:
-       release_region(dev->base_addr, PROTEON_IO_EXTENT); 
+       release_region(dev->base_addr, PROTEON_IO_EXTENT);
+out5:
         return err;
  }
  
-struct net_device * __init proteon_probe(int unit)
-{
-       struct net_device *dev = alloc_trdev(sizeof(struct net_local));
-       int err = 0;
-
-       if (!dev)
-               return ERR_PTR(-ENOMEM);
-
-       if (unit >= 0) {
-               sprintf(dev->name, "tr%d", unit);
-               netdev_boot_setup_check(dev);
-       }
-
-       err = setup_card(dev);
-       if (err)
-               goto out;
-
-       return dev;
-
-out:
-       free_netdev(dev);
-       return ERR_PTR(err);
-}
-
  /*
   * Reads MAC address from adapter RAM, which should've read it from
   * the onboard ROM.  
@@ -352,8 +329,6 @@ static int proteon_open(struct net_device *dev)
         return tms380tr_open(dev);
  }
  
-#ifdef MODULE
-
  #define ISATR_MAX_ADAPTERS 3
  
  static int io[ISATR_MAX_ADAPTERS];
@@ -366,13 +341,23 @@ module_param_array(io, int, NULL, 0);
  module_param_array(irq, int, NULL, 0);
  module_param_array(dma, int, NULL, 0);
  
-static struct net_device *proteon_dev[ISATR_MAX_ADAPTERS];
+static struct platform_device *proteon_dev[ISATR_MAX_ADAPTERS];
+
+static struct device_driver proteon_driver = {
+       .name           = "proteon",
+       .bus            = &platform_bus_type,
+};
  
-int init_module(void)
+static int __init proteon_init(void)
  {
         struct net_device *dev;
+       struct platform_device *pdev;
         int i, num = 0, err = 0;
  
+       err = driver_register(&proteon_driver);
+       if (err)
+               return err;
+
         for (i = 0; i < ISATR_MAX_ADAPTERS ; i++) {
                 dev = alloc_trdev(sizeof(struct net_local));
                 if (!dev)
@@ -381,11 +366,15 @@ int init_module(void)
                 dev->base_addr = io[i];
                 dev->irq = irq[i];
                 dev->dma = dma[i];
-               err = setup_card(dev);
+               pdev = platform_device_register_simple("proteon",
+                       i, NULL, 0);
+               err = setup_card(dev, &pdev->dev);
                 if (!err) {
-                       proteon_dev[i] = dev;
+                       proteon_dev[i] = pdev;
+                       dev_set_drvdata(&pdev->dev, dev);
                         ++num;
                 } else {
+                       platform_device_unregister(pdev);
                         free_netdev(dev);
                 }
         }
@@ -399,23 +388,28 @@ int init_module(void)
         return (0);
  }
  
-void cleanup_module(void)
+static void __exit proteon_cleanup(void)
  {
+       struct net_device *dev;
         int i;
  
         for (i = 0; i < ISATR_MAX_ADAPTERS ; i++) {
-               struct net_device *dev = proteon_dev[i];
+               struct platform_device *pdev = proteon_dev[i];
                 
-               if (!dev) 
+               if (!pdev)
                         continue;
-               
+               dev = dev_get_drvdata(&pdev->dev);
                 unregister_netdev(dev);
                 release_region(dev->base_addr, PROTEON_IO_EXTENT);
                 free_irq(dev->irq, dev);
                 free_dma(dev->dma);
                 tmsdev_term(dev);
                 free_netdev(dev);
+               dev_set_drvdata(&pdev->dev, NULL);
+               platform_device_unregister(pdev);
         }
+       driver_unregister(&proteon_driver);
  }
-#endif /* MODULE */
  
+module_init(proteon_init);
+module_exit(proteon_cleanup);
diff --git a/drivers/net/tokenring/skisa.c b/drivers/net/tokenring/skisa.c

index f26796e2d0e5f4cb0ee2a4a74872779d7d625fca..3c7c66204f7474d0005021afc7fe740698fc2d3a 100644 (file)
--- a/drivers/net/tokenring/skisa.c
+++ b/drivers/net/tokenring/skisa.c
@@ -68,8 +68,7 @@ static int dmalist[] __initdata = {
  };
  
  static char isa_cardname[] = "SK NET TR 4/16 ISA\0";
-
-struct net_device *sk_isa_probe(int unit);
+static u64 dma_mask = ISA_MAX_ADDRESS;
  static int sk_isa_open(struct net_device *dev);
  static void sk_isa_read_eeprom(struct net_device *dev);
  static unsigned short sk_isa_setnselout_pins(struct net_device *dev);
@@ -133,7 +132,7 @@ static int __init sk_isa_probe1(struct net_device *dev, int ioaddr)
         return 0;
  }
  
-static int __init setup_card(struct net_device *dev)
+static int __init setup_card(struct net_device *dev, struct device *pdev)
  {
         struct net_local *tp;
          static int versionprinted;
@@ -154,7 +153,7 @@ static int __init setup_card(struct net_device *dev)
                 }
         }
         if (err)
-               goto out4;
+               goto out5;
  
         /* At this point we have found a valid card. */
  
@@ -162,14 +161,15 @@ static int __init setup_card(struct net_device *dev)
                 printk(KERN_DEBUG "%s", version);
  
         err = -EIO;
-       if (tmsdev_init(dev, ISA_MAX_ADDRESS, NULL))
+       pdev->dma_mask = &dma_mask;
+       if (tmsdev_init(dev, pdev))
                 goto out4;
  
         dev->base_addr &= ~3; 
                 
         sk_isa_read_eeprom(dev);
  
-       printk(KERN_DEBUG "%s:    Ring Station Address: ", dev->name);
+       printk(KERN_DEBUG "skisa.c:    Ring Station Address: ");
         printk("%2.2x", dev->dev_addr[0]);
         for (j = 1; j < 6; j++)
                 printk(":%2.2x", dev->dev_addr[j]);
@@ -202,7 +202,7 @@ static int __init setup_card(struct net_device *dev)
                 
                  if(irqlist[j] == 0)
                  {
-                        printk(KERN_INFO "%s: AutoSelect no IRQ available\n", dev->name);
+                        printk(KERN_INFO "skisa.c: AutoSelect no IRQ available\n");
                         goto out3;
                 }
         }
@@ -213,15 +213,15 @@ static int __init setup_card(struct net_device *dev)
                                 break;
                 if (irqlist[j] == 0)
                 {
-                       printk(KERN_INFO "%s: Illegal IRQ %d specified\n",
-                               dev->name, dev->irq);
+                       printk(KERN_INFO "skisa.c: Illegal IRQ %d specified\n",
+                               dev->irq);
                         goto out3;
                 }
                 if (request_irq(dev->irq, tms380tr_interrupt, 0, 
                         isa_cardname, dev))
                 {
-                        printk(KERN_INFO "%s: Selected IRQ %d not available\n", 
-                               dev->name, dev->irq);
+                        printk(KERN_INFO "skisa.c: Selected IRQ %d not available\n",
+                               dev->irq);
                         goto out3;
                 }
         }
@@ -237,7 +237,7 @@ static int __init setup_card(struct net_device *dev)
  
                 if(dmalist[j] == 0)
                 {
-                       printk(KERN_INFO "%s: AutoSelect no DMA available\n", dev->name);
+                       printk(KERN_INFO "skisa.c: AutoSelect no DMA available\n");
                         goto out2;
                 }
         }
@@ -248,25 +248,25 @@ static int __init setup_card(struct net_device *dev)
                                 break;
                 if (dmalist[j] == 0)
                 {
-                        printk(KERN_INFO "%s: Illegal DMA %d specified\n", 
-                               dev->name, dev->dma);
+                        printk(KERN_INFO "skisa.c: Illegal DMA %d specified\n",
+                               dev->dma);
                         goto out2;
                 }
                 if (request_dma(dev->dma, isa_cardname))
                 {
-                        printk(KERN_INFO "%s: Selected DMA %d not available\n", 
-                               dev->name, dev->dma);
+                        printk(KERN_INFO "skisa.c: Selected DMA %d not available\n",
+                               dev->dma);
                         goto out2;
                 }
         }
  
-       printk(KERN_DEBUG "%s:    IO: %#4lx  IRQ: %d  DMA: %d\n",
-              dev->name, dev->base_addr, dev->irq, dev->dma);
-               
         err = register_netdev(dev);
         if (err)
                 goto out;
  
+       printk(KERN_DEBUG "%s:    IO: %#4lx  IRQ: %d  DMA: %d\n",
+              dev->name, dev->base_addr, dev->irq, dev->dma);
+
         return 0;
  out:
         free_dma(dev->dma);
@@ -275,33 +275,11 @@ out2:
  out3:
         tmsdev_term(dev);
  out4:
-       release_region(dev->base_addr, SK_ISA_IO_EXTENT); 
+       release_region(dev->base_addr, SK_ISA_IO_EXTENT);
+out5:
         return err;
  }
  
-struct net_device * __init sk_isa_probe(int unit)
-{
-       struct net_device *dev = alloc_trdev(sizeof(struct net_local));
-       int err = 0;
-
-       if (!dev)
-               return ERR_PTR(-ENOMEM);
-
-       if (unit >= 0) {
-               sprintf(dev->name, "tr%d", unit);
-               netdev_boot_setup_check(dev);
-       }
-
-       err = setup_card(dev);
-       if (err)
-               goto out;
-
-       return dev;
-out:
-       free_netdev(dev);
-       return ERR_PTR(err);
-}
-
  /*
   * Reads MAC address from adapter RAM, which should've read it from
   * the onboard ROM.  
@@ -361,8 +339,6 @@ static int sk_isa_open(struct net_device *dev)
         return tms380tr_open(dev);
  }
  
-#ifdef MODULE
-
  #define ISATR_MAX_ADAPTERS 3
  
  static int io[ISATR_MAX_ADAPTERS];
@@ -375,13 +351,23 @@ module_param_array(io, int, NULL, 0);
  module_param_array(irq, int, NULL, 0);
  module_param_array(dma, int, NULL, 0);
  
-static struct net_device *sk_isa_dev[ISATR_MAX_ADAPTERS];
+static struct platform_device *sk_isa_dev[ISATR_MAX_ADAPTERS];
  
-int init_module(void)
+static struct device_driver sk_isa_driver = {
+       .name           = "skisa",
+       .bus            = &platform_bus_type,
+};
+
+static int __init sk_isa_init(void)
  {
         struct net_device *dev;
+       struct platform_device *pdev;
         int i, num = 0, err = 0;
  
+       err = driver_register(&sk_isa_driver);
+       if (err)
+               return err;
+
         for (i = 0; i < ISATR_MAX_ADAPTERS ; i++) {
                 dev = alloc_trdev(sizeof(struct net_local));
                 if (!dev)
@@ -390,12 +376,15 @@ int init_module(void)
                 dev->base_addr = io[i];
                 dev->irq = irq[i];
                 dev->dma = dma[i];
-               err = setup_card(dev);
-
+               pdev = platform_device_register_simple("skisa",
+                       i, NULL, 0);
+               err = setup_card(dev, &pdev->dev);
                 if (!err) {
-                       sk_isa_dev[i] = dev;
+                       sk_isa_dev[i] = pdev;
+                       dev_set_drvdata(&sk_isa_dev[i]->dev, dev);
                         ++num;
                 } else {
+                       platform_device_unregister(pdev);
                         free_netdev(dev);
                 }
         }
@@ -409,23 +398,28 @@ int init_module(void)
         return (0);
  }
  
-void cleanup_module(void)
+static void __exit sk_isa_cleanup(void)
  {
+       struct net_device *dev;
         int i;
  
         for (i = 0; i < ISATR_MAX_ADAPTERS ; i++) {
-               struct net_device *dev = sk_isa_dev[i];
+               struct platform_device *pdev = sk_isa_dev[i];
  
-               if (!dev) 
+               if (!pdev)
                         continue;
-               
+               dev = dev_get_drvdata(&pdev->dev);
                 unregister_netdev(dev);
                 release_region(dev->base_addr, SK_ISA_IO_EXTENT);
                 free_irq(dev->irq, dev);
                 free_dma(dev->dma);
                 tmsdev_term(dev);
                 free_netdev(dev);
+               dev_set_drvdata(&pdev->dev, NULL);
+               platform_device_unregister(pdev);
         }
+       driver_unregister(&sk_isa_driver);
  }
-#endif /* MODULE */
  
+module_init(sk_isa_init);
+module_exit(sk_isa_cleanup);
diff --git a/drivers/net/tokenring/tms380tr.c b/drivers/net/tokenring/tms380tr.c

index 5e0b0ce98ed7fbe44383c604171bfeaa30b1d6d9..2e39bf1f74620f88cf5d03fce1a71ac0f1e107fb 100644 (file)
--- a/drivers/net/tokenring/tms380tr.c
+++ b/drivers/net/tokenring/tms380tr.c
@@ -62,6 +62,7 @@
   *                             normal operation.
   *     30-Dec-02       JF      Removed incorrect __init from 
   *                             tms380tr_init_card.
+ *     22-Jul-05       JF      Converted to dma-mapping.
   *                             
   *  To do:
   *    1. Multi/Broadcast packet handling (this may have fixed itself)
@@ -89,7 +90,7 @@ static const char version[] = "tms380tr.c: v1.10 30/12/2002 by Christoph Goos, A
  #include <linux/time.h>
  #include <linux/errno.h>
  #include <linux/init.h>
-#include <linux/pci.h>
+#include <linux/dma-mapping.h>
  #include <linux/delay.h>
  #include <linux/netdevice.h>
  #include <linux/etherdevice.h>
@@ -114,8 +115,6 @@ static const char version[] = "tms380tr.c: v1.10 30/12/2002 by Christoph Goos, A
  #endif
  static unsigned int tms380tr_debug = TMS380TR_DEBUG;
  
-static struct device tms_device;
-
  /* Index to functions, as function prototypes.
   * Alphabetical by function name.
   */
@@ -434,7 +433,7 @@ static void tms380tr_init_net_local(struct net_device *dev)
                         skb_put(tp->Rpl[i].Skb, tp->MaxPacketSize);
  
                         /* data unreachable for DMA ? then use local buffer */
-                       dmabuf = pci_map_single(tp->pdev, tp->Rpl[i].Skb->data, tp->MaxPacketSize, PCI_DMA_FROMDEVICE);
+                       dmabuf = dma_map_single(tp->pdev, tp->Rpl[i].Skb->data, tp->MaxPacketSize, DMA_FROM_DEVICE);
                         if(tp->dmalimit && (dmabuf + tp->MaxPacketSize > tp->dmalimit))
                         {
                                 tp->Rpl[i].SkbStat = SKB_DATA_COPY;
@@ -638,10 +637,10 @@ static int tms380tr_hardware_send_packet(struct sk_buff *skb, struct net_device
         /* Is buffer reachable for Busmaster-DMA? */
  
         length  = skb->len;
-       dmabuf = pci_map_single(tp->pdev, skb->data, length, PCI_DMA_TODEVICE);
+       dmabuf = dma_map_single(tp->pdev, skb->data, length, DMA_TO_DEVICE);
         if(tp->dmalimit && (dmabuf + length > tp->dmalimit)) {
                 /* Copy frame to local buffer */
-               pci_unmap_single(tp->pdev, dmabuf, length, PCI_DMA_TODEVICE);
+               dma_unmap_single(tp->pdev, dmabuf, length, DMA_TO_DEVICE);
                 dmabuf  = 0;
                 i       = tp->TplFree->TPLIndex;
                 buf     = tp->LocalTxBuffers[i];
@@ -1284,9 +1283,7 @@ static int tms380tr_reset_adapter(struct net_device *dev)
         unsigned short count, c, count2;
         const struct firmware *fw_entry = NULL;
  
-       strncpy(tms_device.bus_id,dev->name, BUS_ID_SIZE);
-
-       if (request_firmware(&fw_entry, "tms380tr.bin", &tms_device) != 0) {
+       if (request_firmware(&fw_entry, "tms380tr.bin", tp->pdev) != 0) {
                 printk(KERN_ALERT "%s: firmware %s is missing, cannot start.\n",
                         dev->name, "tms380tr.bin");
                 return (-1);
@@ -2021,7 +2018,7 @@ static void tms380tr_cancel_tx_queue(struct net_local* tp)
  
                 printk(KERN_INFO "Cancel tx (%08lXh).\n", (unsigned long)tpl);
                 if (tpl->DMABuff)
-                       pci_unmap_single(tp->pdev, tpl->DMABuff, tpl->Skb->len, PCI_DMA_TODEVICE);
+                       dma_unmap_single(tp->pdev, tpl->DMABuff, tpl->Skb->len, DMA_TO_DEVICE);
                 dev_kfree_skb_any(tpl->Skb);
         }
  
@@ -2090,7 +2087,7 @@ static void tms380tr_tx_status_irq(struct net_device *dev)
  
                 tp->MacStat.tx_packets++;
                 if (tpl->DMABuff)
-                       pci_unmap_single(tp->pdev, tpl->DMABuff, tpl->Skb->len, PCI_DMA_TODEVICE);
+                       dma_unmap_single(tp->pdev, tpl->DMABuff, tpl->Skb->len, DMA_TO_DEVICE);
                 dev_kfree_skb_irq(tpl->Skb);
                 tpl->BusyFlag = 0;      /* "free" TPL */
         }
@@ -2209,7 +2206,7 @@ static void tms380tr_rcv_status_irq(struct net_device *dev)
                                 tp->MacStat.rx_errors++;
                 }
                 if (rpl->DMABuff)
-                       pci_unmap_single(tp->pdev, rpl->DMABuff, tp->MaxPacketSize, PCI_DMA_TODEVICE);
+                       dma_unmap_single(tp->pdev, rpl->DMABuff, tp->MaxPacketSize, DMA_TO_DEVICE);
                 rpl->DMABuff = 0;
  
                 /* Allocate new skb for rpl */
@@ -2227,7 +2224,7 @@ static void tms380tr_rcv_status_irq(struct net_device *dev)
                         skb_put(rpl->Skb, tp->MaxPacketSize);
  
                         /* Data unreachable for DMA ? then use local buffer */
-                       dmabuf = pci_map_single(tp->pdev, rpl->Skb->data, tp->MaxPacketSize, PCI_DMA_FROMDEVICE);
+                       dmabuf = dma_map_single(tp->pdev, rpl->Skb->data, tp->MaxPacketSize, DMA_FROM_DEVICE);
                         if(tp->dmalimit && (dmabuf + tp->MaxPacketSize > tp->dmalimit))
                         {
                                 rpl->SkbStat = SKB_DATA_COPY;
@@ -2332,23 +2329,26 @@ void tmsdev_term(struct net_device *dev)
         struct net_local *tp;
  
         tp = netdev_priv(dev);
-       pci_unmap_single(tp->pdev, tp->dmabuffer, sizeof(struct net_local),
-               PCI_DMA_BIDIRECTIONAL);
+       dma_unmap_single(tp->pdev, tp->dmabuffer, sizeof(struct net_local),
+               DMA_BIDIRECTIONAL);
  }
  
-int tmsdev_init(struct net_device *dev, unsigned long dmalimit, 
-               struct pci_dev *pdev)
+int tmsdev_init(struct net_device *dev, struct device *pdev)
  {
         struct net_local *tms_local;
  
         memset(dev->priv, 0, sizeof(struct net_local));
         tms_local = netdev_priv(dev);
         init_waitqueue_head(&tms_local->wait_for_tok_int);
-       tms_local->dmalimit = dmalimit;
+       if (pdev->dma_mask)
+               tms_local->dmalimit = *pdev->dma_mask;
+       else
+               return -ENOMEM;
         tms_local->pdev = pdev;
-       tms_local->dmabuffer = pci_map_single(pdev, (void *)tms_local,
-           sizeof(struct net_local), PCI_DMA_BIDIRECTIONAL);
-       if (tms_local->dmabuffer + sizeof(struct net_local) > dmalimit)
+       tms_local->dmabuffer = dma_map_single(pdev, (void *)tms_local,
+           sizeof(struct net_local), DMA_BIDIRECTIONAL);
+       if (tms_local->dmabuffer + sizeof(struct net_local) > 
+                       tms_local->dmalimit)
         {
                 printk(KERN_INFO "%s: Memory not accessible for DMA\n",
                         dev->name);
@@ -2370,8 +2370,6 @@ int tmsdev_init(struct net_device *dev, unsigned long dmalimit,
         return 0;
  }
  
-#ifdef MODULE
-
  EXPORT_SYMBOL(tms380tr_open);
  EXPORT_SYMBOL(tms380tr_close);
  EXPORT_SYMBOL(tms380tr_interrupt);
@@ -2379,6 +2377,8 @@ EXPORT_SYMBOL(tmsdev_init);
  EXPORT_SYMBOL(tmsdev_term);
  EXPORT_SYMBOL(tms380tr_wait);
  
+#ifdef MODULE
+
  static struct module *TMS380_module = NULL;
  
  int init_module(void)
diff --git a/drivers/net/tokenring/tms380tr.h b/drivers/net/tokenring/tms380tr.h

index f2c5ba0f37a550a009483cdc6887fde76136cbc0..30452c67bb68517cfba3236458c35d5c1557cfb4 100644 (file)
--- a/drivers/net/tokenring/tms380tr.h
+++ b/drivers/net/tokenring/tms380tr.h
@@ -17,8 +17,7 @@
  int tms380tr_open(struct net_device *dev);
  int tms380tr_close(struct net_device *dev);
  irqreturn_t tms380tr_interrupt(int irq, void *dev_id, struct pt_regs *regs);
-int tmsdev_init(struct net_device *dev, unsigned long dmalimit,
-               struct pci_dev *pdev);
+int tmsdev_init(struct net_device *dev, struct device *pdev);
  void tmsdev_term(struct net_device *dev);
  void tms380tr_wait(unsigned long time);
  
@@ -719,7 +718,7 @@ struct s_TPL {      /* Transmit Parameter List (align on even word boundaries) */
         struct sk_buff *Skb;
         unsigned char TPLIndex;
         volatile unsigned char BusyFlag;/* Flag: TPL busy? */
-       dma_addr_t DMABuff;             /* DMA IO bus address from pci_map */
+       dma_addr_t DMABuff;             /* DMA IO bus address from dma_map */
  };
  
  /* ---------------------Receive Functions-------------------------------*
@@ -1060,7 +1059,7 @@ struct s_RPL {    /* Receive Parameter List */
         struct sk_buff *Skb;
         SKB_STAT SkbStat;
         int RPLIndex;
-       dma_addr_t DMABuff;             /* DMA IO bus address from pci_map */
+       dma_addr_t DMABuff;             /* DMA IO bus address from dma_map */
  };
  
  /* Information that need to be kept for each board. */
@@ -1091,7 +1090,7 @@ typedef struct net_local {
         RPL *RplTail;
         unsigned char LocalRxBuffers[RPL_NUM][DEFAULT_PACKET_SIZE];
  
-       struct pci_dev *pdev;
+       struct device *pdev;
         int DataRate;
         unsigned char ScbInUse;
         unsigned short CMDqueue;
diff --git a/drivers/net/tokenring/tmspci.c b/drivers/net/tokenring/tmspci.c

index 2e18c0a464828f60960b30c48b1d8873cb28b513..ab47c0547a3ba0ada36f835a7a6a88d5b2ba39a4 100644 (file)
--- a/drivers/net/tokenring/tmspci.c
+++ b/drivers/net/tokenring/tmspci.c
@@ -100,7 +100,7 @@ static int __devinit tms_pci_attach(struct pci_dev *pdev, const struct pci_devic
         unsigned int pci_irq_line;
         unsigned long pci_ioaddr;
         struct card_info *cardinfo = &card_info_table[ent->driver_data];
-               
+
         if (versionprinted++ == 0)
                 printk("%s", version);
  
@@ -143,7 +143,7 @@ static int __devinit tms_pci_attach(struct pci_dev *pdev, const struct pci_devic
                 printk(":%2.2x", dev->dev_addr[i]);
         printk("\n");
                 
-       ret = tmsdev_init(dev, PCI_MAX_ADDRESS, pdev);
+       ret = tmsdev_init(dev, &pdev->dev);
         if (ret) {
                 printk("%s: unable to get memory for dev->priv.\n", dev->name);
                 goto err_out_irq;
diff --git a/drivers/net/wan/cycx_drv.c b/drivers/net/wan/cycx_drv.c

index 6e74af62ca08c73cd53f27e4d73945a4ee1387ad..9e56fc346ba4f72b53e0031167803165bf0dde1d 100644 (file)
--- a/drivers/net/wan/cycx_drv.c
+++ b/drivers/net/wan/cycx_drv.c
@@ -56,7 +56,7 @@
  #include <linux/sched.h>       /* for jiffies, HZ, etc. */
  #include <linux/cycx_drv.h>    /* API definitions */
  #include <linux/cycx_cfm.h>    /* CYCX firmware module definitions */
-#include <linux/delay.h>       /* udelay */
+#include <linux/delay.h>       /* udelay, msleep_interruptible */
  #include <asm/io.h>            /* read[wl], write[wl], ioremap, iounmap */
  
  #define        MOD_VERSION     0
@@ -74,7 +74,6 @@ static int reset_cyc2x(void __iomem *addr);
  static int detect_cyc2x(void __iomem *addr);
  
  /* Miscellaneous functions */
-static void delay_cycx(int sec);
  static int get_option_index(long *optlist, long optval);
  static u16 checksum(u8 *buf, u32 len);
  
@@ -259,7 +258,7 @@ static int memory_exists(void __iomem *addr)
                         if (readw(addr + 0x10) == TEST_PATTERN)
                                 return 1;
  
-               delay_cycx(1);
+               msleep_interruptible(1 * 1000);
         }
  
         return 0;
@@ -316,7 +315,7 @@ static void cycx_reset_boot(void __iomem *addr, u8 *code, u32 len)
  
         /* 80186 was in hold, go */
         writeb(0, addr + START_CPU);
-       delay_cycx(1);
+       msleep_interruptible(1 * 1000);
  }
  
  /* Load data.bin file through boot (reset) interface. */
@@ -462,13 +461,13 @@ static int load_cyc2x(struct cycx_hw *hw, struct cycx_firmware *cfm, u32 len)
                 cycx_reset_boot(hw->dpmbase, reset_image, img_hdr->reset_size);
                 /* reset is waiting for boot */
                 writew(GEN_POWER_ON, pt_cycld);
-               delay_cycx(1);
+               msleep_interruptible(1 * 1000);
  
                 for (j = 0 ; j < 3 ; j++)
                         if (!readw(pt_cycld))
                                 goto reset_loaded;
                         else
-                               delay_cycx(1);
+                               msleep_interruptible(1 * 1000);
         }
  
         printk(KERN_ERR "%s: reset not started.\n", modname);
@@ -495,7 +494,7 @@ reset_loaded:
  
         /* Arthur Ganzert's tip: wait a while after the firmware loading...
            seg abr 26 17:17:12 EST 1999 - acme */
-       delay_cycx(7);
+       msleep_interruptible(7 * 1000);
         printk(KERN_INFO "%s: firmware loaded!\n", modname);
  
         /* enable interrupts */
@@ -547,20 +546,13 @@ static int get_option_index(long *optlist, long optval)
  static int reset_cyc2x(void __iomem *addr)
  {
         writeb(0, addr + RST_ENABLE);
-       delay_cycx(2);
+       msleep_interruptible(2 * 1000);
         writeb(0, addr + RST_DISABLE);
-       delay_cycx(2);
+       msleep_interruptible(2 * 1000);
  
         return memory_exists(addr);
  }
  
-/* Delay */
-static void delay_cycx(int sec)
-{
-       set_current_state(TASK_INTERRUPTIBLE);
-       schedule_timeout(sec * HZ);
-}
-
  /* Calculate 16-bit CRC using CCITT polynomial. */
  static u16 checksum(u8 *buf, u32 len)
  {
diff --git a/drivers/net/wan/hdlc_generic.c b/drivers/net/wan/hdlc_generic.c

index a63f6a2cc4f7d75746a8ba3869c7a62d649fad91..cdd4c09c2d90355adee28e09d42f560ab42d040f 100644 (file)
--- a/drivers/net/wan/hdlc_generic.c
+++ b/drivers/net/wan/hdlc_generic.c
@@ -61,7 +61,7 @@ static struct net_device_stats *hdlc_get_stats(struct net_device *dev)
  
  
  static int hdlc_rcv(struct sk_buff *skb, struct net_device *dev,
-                   struct packet_type *p)
+                   struct packet_type *p, struct net_device *orig_dev)
  {
         hdlc_device *hdlc = dev_to_hdlc(dev);
         if (hdlc->proto.netif_rx)
diff --git a/drivers/net/wan/lapbether.c b/drivers/net/wan/lapbether.c

index 7f2e3653c5e5b5dcfad4da1c4642786b9486f593..6c302e9dbca2e2ee2aeabf570e0c37cca193c88b 100644 (file)
--- a/drivers/net/wan/lapbether.c
+++ b/drivers/net/wan/lapbether.c
@@ -86,7 +86,7 @@ static __inline__ int dev_is_ethdev(struct net_device *dev)
  /*
   *     Receive a LAPB frame via an ethernet interface.
   */
-static int lapbeth_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *ptype)
+static int lapbeth_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *ptype, struct net_device *orig_dev)
  {
         int len, err;
         struct lapbethdev *lapbeth;
diff --git a/drivers/net/wan/sdla_fr.c b/drivers/net/wan/sdla_fr.c

index c5f5e62aab8b8c31814fea2fa900799db0c061c3..0497dbdb8631fe379c7f952dc2c50ee7c21bd814 100644 (file)
--- a/drivers/net/wan/sdla_fr.c
+++ b/drivers/net/wan/sdla_fr.c
@@ -445,7 +445,7 @@ void        s508_s514_unlock(sdla_t *card, unsigned long *smp_flags);
  void   s508_s514_lock(sdla_t *card, unsigned long *smp_flags);
  
  unsigned short calc_checksum (char *, int);
-static int setup_fr_header(struct sk_buff** skb,
+static int setup_fr_header(struct sk_buff *skb,
                            struct net_device* dev, char op_mode);
  
  
@@ -1372,7 +1372,7 @@ static int if_send(struct sk_buff* skb, struct net_device* dev)
         /* Move the if_header() code to here. By inserting frame
          * relay header in if_header() we would break the
          * tcpdump and other packet sniffers */
-       chan->fr_header_len = setup_fr_header(&skb,dev,chan->common.usedby);
+       chan->fr_header_len = setup_fr_header(skb,dev,chan->common.usedby);
         if (chan->fr_header_len < 0 ){
                 ++chan->ifstats.tx_dropped;
                 ++card->wandev.stats.tx_dropped;
@@ -1597,8 +1597,6 @@ static int setup_for_delayed_transmit(struct net_device* dev,
                 return 1;
         }
  
-       skb_unlink(skb);
-       
          chan->transmit_length = len;
         chan->delay_skb = skb;
          
@@ -4871,18 +4869,15 @@ static void unconfig_fr (sdla_t *card)
         }
  }
  
-static int setup_fr_header(struct sk_buff **skb_orig, struct net_device* dev,
+static int setup_fr_header(struct sk_buff *skb, struct net_device* dev,
                            char op_mode)
  {
-       struct sk_buff *skb = *skb_orig;
         fr_channel_t *chan=dev->priv;
  
-       if (op_mode == WANPIPE){
-
+       if (op_mode == WANPIPE) {
                 chan->fr_header[0]=Q922_UI;
                 
                 switch (htons(skb->protocol)){
-                       
                 case ETH_P_IP:
                         chan->fr_header[1]=NLPID_IP;
                         break;
@@ -4894,16 +4889,14 @@ static int setup_fr_header(struct sk_buff **skb_orig, struct net_device* dev,
         }
  
         /* If we are in bridging mode, we must apply
-        * an Ethernet header */
-       if (op_mode == BRIDGE || op_mode == BRIDGE_NODE){
-
-
+        * an Ethernet header
+        */
+       if (op_mode == BRIDGE || op_mode == BRIDGE_NODE) {
                 /* Encapsulate the packet as a bridged Ethernet frame. */
  #ifdef DEBUG
                 printk(KERN_INFO "%s: encapsulating skb for frame relay\n", 
                         dev->name);
  #endif
-               
                 chan->fr_header[0] = 0x03;
                 chan->fr_header[1] = 0x00;
                 chan->fr_header[2] = 0x80;
@@ -4916,7 +4909,6 @@ static int setup_fr_header(struct sk_buff **skb_orig, struct net_device* dev,
                 /* Yuck. */
                 skb->protocol = ETH_P_802_3;
                 return 8;
-
         }
                 
         return 0;
diff --git a/drivers/net/wan/syncppp.c b/drivers/net/wan/syncppp.c

index 84b65c60c799689bd772a5cd33939920dd4acebe..f58c794a963aed4fbe813d5ede9ff8cd913de807 100644 (file)
--- a/drivers/net/wan/syncppp.c
+++ b/drivers/net/wan/syncppp.c
@@ -1447,7 +1447,7 @@ static void sppp_print_bytes (u_char *p, u16 len)
   *     after interrupt servicing to process frames queued via netif_rx.
   */
  
-static int sppp_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *p)
+static int sppp_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *p, struct net_device *orig_dev)
  {
         if ((skb = skb_share_check(skb, GFP_ATOMIC)) == NULL)
                 return NET_RX_DROP;
diff --git a/drivers/net/wireless/Kconfig b/drivers/net/wireless/Kconfig

index 1d3231cc471acff346ef9dd6425d66c1b96c4276..ec3f75a030d2073248791d443d9f432027dd35bf 100644 (file)
--- a/drivers/net/wireless/Kconfig
+++ b/drivers/net/wireless/Kconfig
@@ -270,7 +270,7 @@ config PCMCIA_HERMES
  
  config AIRO_CS
         tristate "Cisco/Aironet 34X/35X/4500/4800 PCMCIA cards"
-       depends on NET_RADIO && PCMCIA
+       depends on NET_RADIO && PCMCIA && (BROKEN || !M32R)
         ---help---
           This is the standard Linux driver to support Cisco/Aironet PCMCIA
           802.11 wireless cards.  This driver is the same as the Aironet
diff --git a/drivers/net/wireless/orinoco.c b/drivers/net/wireless/orinoco.c

index aabcdc2be05ea925fee9d685bbc4d38bcd8c5e0f..9c2d07cde0101a311c298bedba42d318861a9d34 100644 (file)
--- a/drivers/net/wireless/orinoco.c
+++ b/drivers/net/wireless/orinoco.c
@@ -4322,36 +4322,36 @@ static const struct iw_priv_args orinoco_privtab[] = {
   */
  
  static const iw_handler        orinoco_handler[] = {
-       [SIOCSIWCOMMIT-SIOCIWFIRST] (iw_handler) orinoco_ioctl_commit,
-       [SIOCGIWNAME  -SIOCIWFIRST] (iw_handler) orinoco_ioctl_getname,
-       [SIOCSIWFREQ  -SIOCIWFIRST] (iw_handler) orinoco_ioctl_setfreq,
-       [SIOCGIWFREQ  -SIOCIWFIRST] (iw_handler) orinoco_ioctl_getfreq,
-       [SIOCSIWMODE  -SIOCIWFIRST] (iw_handler) orinoco_ioctl_setmode,
-       [SIOCGIWMODE  -SIOCIWFIRST] (iw_handler) orinoco_ioctl_getmode,
-       [SIOCSIWSENS  -SIOCIWFIRST] (iw_handler) orinoco_ioctl_setsens,
-       [SIOCGIWSENS  -SIOCIWFIRST] (iw_handler) orinoco_ioctl_getsens,
-       [SIOCGIWRANGE -SIOCIWFIRST] (iw_handler) orinoco_ioctl_getiwrange,
-       [SIOCSIWSPY   -SIOCIWFIRST] (iw_handler) orinoco_ioctl_setspy,
-       [SIOCGIWSPY   -SIOCIWFIRST] (iw_handler) orinoco_ioctl_getspy,
-       [SIOCSIWAP    -SIOCIWFIRST] (iw_handler) orinoco_ioctl_setwap,
-       [SIOCGIWAP    -SIOCIWFIRST] (iw_handler) orinoco_ioctl_getwap,
-       [SIOCSIWSCAN  -SIOCIWFIRST] (iw_handler) orinoco_ioctl_setscan,
-       [SIOCGIWSCAN  -SIOCIWFIRST] (iw_handler) orinoco_ioctl_getscan,
-       [SIOCSIWESSID -SIOCIWFIRST] (iw_handler) orinoco_ioctl_setessid,
-       [SIOCGIWESSID -SIOCIWFIRST] (iw_handler) orinoco_ioctl_getessid,
-       [SIOCSIWNICKN -SIOCIWFIRST] (iw_handler) orinoco_ioctl_setnick,
-       [SIOCGIWNICKN -SIOCIWFIRST] (iw_handler) orinoco_ioctl_getnick,
-       [SIOCSIWRATE  -SIOCIWFIRST] (iw_handler) orinoco_ioctl_setrate,
-       [SIOCGIWRATE  -SIOCIWFIRST] (iw_handler) orinoco_ioctl_getrate,
-       [SIOCSIWRTS   -SIOCIWFIRST] (iw_handler) orinoco_ioctl_setrts,
-       [SIOCGIWRTS   -SIOCIWFIRST] (iw_handler) orinoco_ioctl_getrts,
-       [SIOCSIWFRAG  -SIOCIWFIRST] (iw_handler) orinoco_ioctl_setfrag,
-       [SIOCGIWFRAG  -SIOCIWFIRST] (iw_handler) orinoco_ioctl_getfrag,
-       [SIOCGIWRETRY -SIOCIWFIRST] (iw_handler) orinoco_ioctl_getretry,
-       [SIOCSIWENCODE-SIOCIWFIRST] (iw_handler) orinoco_ioctl_setiwencode,
-       [SIOCGIWENCODE-SIOCIWFIRST] (iw_handler) orinoco_ioctl_getiwencode,
-       [SIOCSIWPOWER -SIOCIWFIRST] (iw_handler) orinoco_ioctl_setpower,
-       [SIOCGIWPOWER -SIOCIWFIRST] (iw_handler) orinoco_ioctl_getpower,
+       [SIOCSIWCOMMIT-SIOCIWFIRST] = (iw_handler) orinoco_ioctl_commit,
+       [SIOCGIWNAME  -SIOCIWFIRST] = (iw_handler) orinoco_ioctl_getname,
+       [SIOCSIWFREQ  -SIOCIWFIRST] = (iw_handler) orinoco_ioctl_setfreq,
+       [SIOCGIWFREQ  -SIOCIWFIRST] = (iw_handler) orinoco_ioctl_getfreq,
+       [SIOCSIWMODE  -SIOCIWFIRST] = (iw_handler) orinoco_ioctl_setmode,
+       [SIOCGIWMODE  -SIOCIWFIRST] = (iw_handler) orinoco_ioctl_getmode,
+       [SIOCSIWSENS  -SIOCIWFIRST] = (iw_handler) orinoco_ioctl_setsens,
+       [SIOCGIWSENS  -SIOCIWFIRST] = (iw_handler) orinoco_ioctl_getsens,
+       [SIOCGIWRANGE -SIOCIWFIRST] = (iw_handler) orinoco_ioctl_getiwrange,
+       [SIOCSIWSPY   -SIOCIWFIRST] = (iw_handler) orinoco_ioctl_setspy,
+       [SIOCGIWSPY   -SIOCIWFIRST] = (iw_handler) orinoco_ioctl_getspy,
+       [SIOCSIWAP    -SIOCIWFIRST] = (iw_handler) orinoco_ioctl_setwap,
+       [SIOCGIWAP    -SIOCIWFIRST] = (iw_handler) orinoco_ioctl_getwap,
+       [SIOCSIWSCAN  -SIOCIWFIRST] = (iw_handler) orinoco_ioctl_setscan,
+       [SIOCGIWSCAN  -SIOCIWFIRST] = (iw_handler) orinoco_ioctl_getscan,
+       [SIOCSIWESSID -SIOCIWFIRST] = (iw_handler) orinoco_ioctl_setessid,
+       [SIOCGIWESSID -SIOCIWFIRST] = (iw_handler) orinoco_ioctl_getessid,
+       [SIOCSIWNICKN -SIOCIWFIRST] = (iw_handler) orinoco_ioctl_setnick,
+       [SIOCGIWNICKN -SIOCIWFIRST] = (iw_handler) orinoco_ioctl_getnick,
+       [SIOCSIWRATE  -SIOCIWFIRST] = (iw_handler) orinoco_ioctl_setrate,
+       [SIOCGIWRATE  -SIOCIWFIRST] = (iw_handler) orinoco_ioctl_getrate,
+       [SIOCSIWRTS   -SIOCIWFIRST] = (iw_handler) orinoco_ioctl_setrts,
+       [SIOCGIWRTS   -SIOCIWFIRST] = (iw_handler) orinoco_ioctl_getrts,
+       [SIOCSIWFRAG  -SIOCIWFIRST] = (iw_handler) orinoco_ioctl_setfrag,
+       [SIOCGIWFRAG  -SIOCIWFIRST] = (iw_handler) orinoco_ioctl_getfrag,
+       [SIOCGIWRETRY -SIOCIWFIRST] = (iw_handler) orinoco_ioctl_getretry,
+       [SIOCSIWENCODE-SIOCIWFIRST] = (iw_handler) orinoco_ioctl_setiwencode,
+       [SIOCGIWENCODE-SIOCIWFIRST] = (iw_handler) orinoco_ioctl_getiwencode,
+       [SIOCSIWPOWER -SIOCIWFIRST] = (iw_handler) orinoco_ioctl_setpower,
+       [SIOCGIWPOWER -SIOCIWFIRST] = (iw_handler) orinoco_ioctl_getpower,
  };
  
  
@@ -4359,15 +4359,15 @@ static const iw_handler orinoco_handler[] = {
    Added typecasting since we no longer use iwreq_data -- Moustafa
   */
  static const iw_handler        orinoco_private_handler[] = {
-       [0] (iw_handler) orinoco_ioctl_reset,
-       [1] (iw_handler) orinoco_ioctl_reset,
-       [2] (iw_handler) orinoco_ioctl_setport3,
-       [3] (iw_handler) orinoco_ioctl_getport3,
-       [4] (iw_handler) orinoco_ioctl_setpreamble,
-       [5] (iw_handler) orinoco_ioctl_getpreamble,
-       [6] (iw_handler) orinoco_ioctl_setibssport,
-       [7] (iw_handler) orinoco_ioctl_getibssport,
-       [9] (iw_handler) orinoco_ioctl_getrid,
+       [0] = (iw_handler) orinoco_ioctl_reset,
+       [1] = (iw_handler) orinoco_ioctl_reset,
+       [2] = (iw_handler) orinoco_ioctl_setport3,
+       [3] = (iw_handler) orinoco_ioctl_getport3,
+       [4] = (iw_handler) orinoco_ioctl_setpreamble,
+       [5] = (iw_handler) orinoco_ioctl_getpreamble,
+       [6] = (iw_handler) orinoco_ioctl_setibssport,
+       [7] = (iw_handler) orinoco_ioctl_getibssport,
+       [9] = (iw_handler) orinoco_ioctl_getrid,
  };
  
  static const struct iw_handler_def orinoco_handler_def = {
diff --git a/drivers/parport/Kconfig b/drivers/parport/Kconfig

index 16a2e6ae37f4b579bad45b50617d0ab62f524e4f..725a14119f2a4358706c068c6031ba8d7d64da72 100644 (file)
--- a/drivers/parport/Kconfig
+++ b/drivers/parport/Kconfig
@@ -34,7 +34,7 @@ config PARPORT
  
  config PARPORT_PC
         tristate "PC-style hardware"
-       depends on PARPORT && (!SPARC64 || PCI) && !SPARC32
+       depends on PARPORT && (!SPARC64 || PCI) && !SPARC32 && !M32R
         ---help---
           You should say Y here if you have a PC-style parallel port. All
           IBM PC compatible computers and some Alphas have PC-style
diff --git a/drivers/parport/parport_serial.c b/drivers/parport/parport_serial.c

index 00498e2f12059f1cce74a37f008236ae8e6fc98e..d3dad0aac7cb88aafeb71474d4a225b9af703cce 100644 (file)
--- a/drivers/parport/parport_serial.c
+++ b/drivers/parport/parport_serial.c
@@ -23,13 +23,8 @@
  #include <linux/pci.h>
  #include <linux/parport.h>
  #include <linux/parport_pc.h>
-#include <linux/serial.h>
-#include <linux/serialP.h>
-#include <linux/list.h>
  #include <linux/8250_pci.h>
  
-#include <asm/serial.h>
-
  enum parport_pc_pci_cards {
         titan_110l = 0,
         titan_210l,
@@ -168,182 +163,147 @@ static struct pci_device_id parport_serial_pci_tbl[] = {
  };
  MODULE_DEVICE_TABLE(pci,parport_serial_pci_tbl);
  
-struct pci_board_no_ids {
-       int flags;
-       int num_ports;
-       int base_baud;
-       int uart_offset;
-       int reg_shift;
-       int (*init_fn)(struct pci_dev *dev, struct pci_board_no_ids *board,
-                       int enable);
-       int first_uart_offset;
-};
-
-static int __devinit siig10x_init_fn(struct pci_dev *dev, struct pci_board_no_ids *board, int enable)
-{
-       return pci_siig10x_fn(dev, enable);
-}
-
-static int __devinit siig20x_init_fn(struct pci_dev *dev, struct pci_board_no_ids *board, int enable)
-{
-       return pci_siig20x_fn(dev, enable);
-}
-
-static int __devinit netmos_serial_init(struct pci_dev *dev, struct pci_board_no_ids *board, int enable)
-{
-       board->num_ports = dev->subsystem_device & 0xf;
-       return 0;
-}
-
-static struct pci_board_no_ids pci_boards[] __devinitdata = {
-       /*
-        * PCI Flags, Number of Ports, Base (Maximum) Baud Rate,
-        * Offset to get to next UART's registers,
-        * Register shift to use for memory-mapped I/O,
-        * Initialization function, first UART offset
-        */
-
-// Cards not tested are marked n/t
-// If you have one of these cards and it works for you, please tell me..
-
-/* titan_110l */       { SPCI_FL_BASE1 | SPCI_FL_BASE_TABLE, 1, 921600 },
-/* titan_210l */       { SPCI_FL_BASE1 | SPCI_FL_BASE_TABLE, 2, 921600 },
-/* netmos_9xx5_combo */        { SPCI_FL_BASE0 | SPCI_FL_BASE_TABLE, 1, 115200, 0, 0, netmos_serial_init },
-/* netmos_9855 */      { SPCI_FL_BASE2 | SPCI_FL_BASE_TABLE, 1, 115200, 0, 0, netmos_serial_init },
-/* avlab_1s1p (n/t) */ { SPCI_FL_BASE0 | SPCI_FL_BASE_TABLE, 1, 115200 },
-/* avlab_1s1p_650 (nt)*/{ SPCI_FL_BASE0 | SPCI_FL_BASE_TABLE, 1, 115200 },
-/* avlab_1s1p_850 (nt)*/{ SPCI_FL_BASE0 | SPCI_FL_BASE_TABLE, 1, 115200 },
-/* avlab_1s2p (n/t) */ { SPCI_FL_BASE0 | SPCI_FL_BASE_TABLE, 1, 115200 },
-/* avlab_1s2p_650 (nt)*/{ SPCI_FL_BASE0 | SPCI_FL_BASE_TABLE, 1, 115200 },
-/* avlab_1s2p_850 (nt)*/{ SPCI_FL_BASE0 | SPCI_FL_BASE_TABLE, 1, 115200 },
-/* avlab_2s1p (n/t) */ { SPCI_FL_BASE0 | SPCI_FL_BASE_TABLE, 2, 115200 },
-/* avlab_2s1p_650 (nt)*/{ SPCI_FL_BASE0 | SPCI_FL_BASE_TABLE, 2, 115200 },
-/* avlab_2s1p_850 (nt)*/{ SPCI_FL_BASE0 | SPCI_FL_BASE_TABLE, 2, 115200 },
-/* siig_1s1p_10x */    { SPCI_FL_BASE2, 1, 460800, 0, 0, siig10x_init_fn },
-/* siig_2s1p_10x */    { SPCI_FL_BASE2, 1, 921600, 0, 0, siig10x_init_fn },
-/* siig_2p1s_20x */    { SPCI_FL_BASE0, 1, 921600, 0, 0, siig20x_init_fn },
-/* siig_1s1p_20x */    { SPCI_FL_BASE0, 1, 921600, 0, 0, siig20x_init_fn },
-/* siig_2s1p_20x */    { SPCI_FL_BASE0, 1, 921600, 0, 0, siig20x_init_fn },
+/*
+ * This table describes the serial "geometry" of these boards.  Any
+ * quirks for these can be found in drivers/serial/8250_pci.c
+ *
+ * Cards not tested are marked n/t
+ * If you have one of these cards and it works for you, please tell me..
+ */
+static struct pciserial_board pci_parport_serial_boards[] __devinitdata = {
+       [titan_110l] = {
+               .flags          = FL_BASE1 | FL_BASE_BARS,
+               .num_ports      = 1,
+               .base_baud      = 921600,
+               .uart_offset    = 8,
+       },
+       [titan_210l] = {
+               .flags          = FL_BASE1 | FL_BASE_BARS,
+               .num_ports      = 2,
+               .base_baud      = 921600,
+               .uart_offset    = 8,
+       },
+       [netmos_9xx5_combo] = {
+               .flags          = FL_BASE0 | FL_BASE_BARS,
+               .num_ports      = 1,
+               .base_baud      = 115200,
+               .uart_offset    = 8,
+       },
+       [netmos_9855] = {
+               .flags          = FL_BASE2 | FL_BASE_BARS,
+               .num_ports      = 1,
+               .base_baud      = 115200,
+               .uart_offset    = 8,
+       },
+       [avlab_1s1p] = { /* n/t */
+               .flags          = FL_BASE0 | FL_BASE_BARS,
+               .num_ports      = 1,
+               .base_baud      = 115200,
+               .uart_offset    = 8,
+       },
+       [avlab_1s1p_650] = { /* nt */
+               .flags          = FL_BASE0 | FL_BASE_BARS,
+               .num_ports      = 1,
+               .base_baud      = 115200,
+               .uart_offset    = 8,
+       },
+       [avlab_1s1p_850] = { /* nt */
+               .flags          = FL_BASE0 | FL_BASE_BARS,
+               .num_ports      = 1,
+               .base_baud      = 115200,
+               .uart_offset    = 8,
+       },
+       [avlab_1s2p] = { /* n/t */
+               .flags          = FL_BASE0 | FL_BASE_BARS,
+               .num_ports      = 1,
+               .base_baud      = 115200,
+               .uart_offset    = 8,
+       },
+       [avlab_1s2p_650] = { /* nt */
+               .flags          = FL_BASE0 | FL_BASE_BARS,
+               .num_ports      = 1,
+               .base_baud      = 115200,
+               .uart_offset    = 8,
+       },
+       [avlab_1s2p_850] = { /* nt */
+               .flags          = FL_BASE0 | FL_BASE_BARS,
+               .num_ports      = 1,
+               .base_baud      = 115200,
+               .uart_offset    = 8,
+       },
+       [avlab_2s1p] = { /* n/t */
+               .flags          = FL_BASE0 | FL_BASE_BARS,
+               .num_ports      = 2,
+               .base_baud      = 115200,
+               .uart_offset    = 8,
+       },
+       [avlab_2s1p_650] = { /* nt */
+               .flags          = FL_BASE0 | FL_BASE_BARS,
+               .num_ports      = 2,
+               .base_baud      = 115200,
+               .uart_offset    = 8,
+       },
+       [avlab_2s1p_850] = { /* nt */
+               .flags          = FL_BASE0 | FL_BASE_BARS,
+               .num_ports      = 2,
+               .base_baud      = 115200,
+               .uart_offset    = 8,
+       },
+       [siig_1s1p_10x] = {
+               .flags          = FL_BASE2,
+               .num_ports      = 1,
+               .base_baud      = 460800,
+               .uart_offset    = 8,
+       },
+       [siig_2s1p_10x] = {
+               .flags          = FL_BASE2,
+               .num_ports      = 1,
+               .base_baud      = 921600,
+               .uart_offset    = 8,
+       },
+       [siig_2p1s_20x] = {
+               .flags          = FL_BASE0,
+               .num_ports      = 1,
+               .base_baud      = 921600,
+               .uart_offset    = 8,
+       },
+       [siig_1s1p_20x] = {
+               .flags          = FL_BASE0,
+               .num_ports      = 1,
+               .base_baud      = 921600,
+               .uart_offset    = 8,
+       },
+       [siig_2s1p_20x] = {
+               .flags          = FL_BASE0,
+               .num_ports      = 1,
+               .base_baud      = 921600,
+               .uart_offset    = 8,
+       },
  };
  
  struct parport_serial_private {
-       int num_ser;
-       int line[20];
-       struct pci_board_no_ids ser;
+       struct serial_private   *serial;
         int num_par;
         struct parport *port[PARPORT_MAX];
         struct parport_pc_pci par;
  };
  
-static int __devinit get_pci_port (struct pci_dev *dev,
-                                  struct pci_board_no_ids *board,
-                                  struct serial_struct *req,
-                                  int idx)
-{
-       unsigned long port;
-       int base_idx;
-       int max_port;
-       int offset;
-
-       base_idx = SPCI_FL_GET_BASE(board->flags);
-       if (board->flags & SPCI_FL_BASE_TABLE)
-               base_idx += idx;
-
-       if (board->flags & SPCI_FL_REGION_SZ_CAP) {
-               max_port = pci_resource_len(dev, base_idx) / 8;
-               if (idx >= max_port)
-                       return 1;
-       }
-                       
-       offset = board->first_uart_offset;
-
-       /* Timedia/SUNIX uses a mixture of BARs and offsets */
-       /* Ugh, this is ugly as all hell --- TYT */
-       if(dev->vendor == PCI_VENDOR_ID_TIMEDIA )  /* 0x1409 */
-               switch(idx) {
-                       case 0: base_idx=0;
-                               break;
-                       case 1: base_idx=0; offset=8;
-                               break;
-                       case 2: base_idx=1; 
-                               break;
-                       case 3: base_idx=1; offset=8;
-                               break;
-                       case 4: /* BAR 2*/
-                       case 5: /* BAR 3 */
-                       case 6: /* BAR 4*/
-                       case 7: base_idx=idx-2; /* BAR 5*/
-               }
-  
-       port =  pci_resource_start(dev, base_idx) + offset;
-
-       if ((board->flags & SPCI_FL_BASE_TABLE) == 0)
-               port += idx * (board->uart_offset ? board->uart_offset : 8);
-
-       if (pci_resource_flags (dev, base_idx) & IORESOURCE_IO) {
-               int high_bits_offset = ((sizeof(long)-sizeof(int))*8);
-               req->port = port;
-               if (high_bits_offset)
-                       req->port_high = port >> high_bits_offset;
-               else
-                       req->port_high = 0;
-               return 0;
-       }
-       req->io_type = SERIAL_IO_MEM;
-       req->iomem_base = ioremap(port, board->uart_offset);
-       req->iomem_reg_shift = board->reg_shift;
-       req->port = 0;
-       return req->iomem_base ? 0 : 1;
-}
-
  /* Register the serial port(s) of a PCI card. */
  static int __devinit serial_register (struct pci_dev *dev,
                                       const struct pci_device_id *id)
  {
-       struct pci_board_no_ids *board;
         struct parport_serial_private *priv = pci_get_drvdata (dev);
-       struct serial_struct serial_req;
-       int base_baud;
-       int k;
-       int success = 0;
-
-       priv->ser = pci_boards[id->driver_data];
-       board = &priv->ser;
-       if (board->init_fn && ((board->init_fn) (dev, board, 1) != 0))
-               return 1;
-
-       base_baud = board->base_baud;
-       if (!base_baud)
-               base_baud = BASE_BAUD;
-       memset (&serial_req, 0, sizeof (serial_req));
-
-       for (k = 0; k < board->num_ports; k++) {
-               int line;
+       struct pciserial_board *board;
+       struct serial_private *serial;
  
-               if (priv->num_ser == ARRAY_SIZE (priv->line)) {
-                       printk (KERN_WARNING
-                               "parport_serial: %s: only %u serial lines "
-                               "supported (%d reported)\n", pci_name (dev),
-                               ARRAY_SIZE (priv->line), board->num_ports);
-                       break;
-               }
+       board = &pci_parport_serial_boards[id->driver_data];
+       serial = pciserial_init_ports(dev, board);
  
-               serial_req.irq = dev->irq;
-               if (get_pci_port (dev, board, &serial_req, k))
-                       break;
-               serial_req.flags = ASYNC_SKIP_TEST | ASYNC_AUTOPROBE;
-               serial_req.baud_base = base_baud;
-               line = register_serial (&serial_req);
-               if (line < 0) {
-                       printk (KERN_DEBUG
-                               "parport_serial: register_serial failed\n");
-                       continue;
-               }
-               priv->line[priv->num_ser++] = line;
-               success = 1;
-       }
+       if (IS_ERR(serial))
+               return PTR_ERR(serial);
  
-       return success ? 0 : 1;
+       priv->serial = serial;
+       return 0;
  }
  
  /* Register the parallel port(s) of a PCI card. */
@@ -411,7 +371,7 @@ static int __devinit parport_serial_pci_probe (struct pci_dev *dev,
         priv = kmalloc (sizeof *priv, GFP_KERNEL);
         if (!priv)
                 return -ENOMEM;
-       priv->num_ser = priv->num_par = 0;
+       memset(priv, 0, sizeof(struct parport_serial_private));
         pci_set_drvdata (dev, priv);
  
         err = pci_enable_device (dev);
@@ -444,15 +404,12 @@ static void __devexit parport_serial_pci_remove (struct pci_dev *dev)
         struct parport_serial_private *priv = pci_get_drvdata (dev);
         int i;
  
+       pci_set_drvdata(dev, NULL);
+
         // Serial ports
-       for (i = 0; i < priv->num_ser; i++) {
-               unregister_serial (priv->line[i]);
+       if (priv->serial)
+               pciserial_remove_ports(priv->serial);
  
-               if (priv->ser.init_fn)
-                       (priv->ser.init_fn) (dev, &priv->ser, 0);
-       }
-       pci_set_drvdata (dev, NULL);
-       
         // Parallel ports
         for (i = 0; i < priv->num_par; i++)
                 parport_pc_unregister_port (priv->port[i]);
@@ -461,11 +418,47 @@ static void __devexit parport_serial_pci_remove (struct pci_dev *dev)
         return;
  }
  
+static int parport_serial_pci_suspend(struct pci_dev *dev, pm_message_t state)
+{
+       struct parport_serial_private *priv = pci_get_drvdata(dev);
+
+       if (priv->serial)
+               pciserial_suspend_ports(priv->serial);
+
+       /* FIXME: What about parport? */
+
+       pci_save_state(dev);
+       pci_set_power_state(dev, pci_choose_state(dev, state));
+       return 0;
+}
+
+static int parport_serial_pci_resume(struct pci_dev *dev)
+{
+       struct parport_serial_private *priv = pci_get_drvdata(dev);
+
+       pci_set_power_state(dev, PCI_D0);
+       pci_restore_state(dev);
+
+       /*
+        * The device may have been disabled.  Re-enable it.
+        */
+       pci_enable_device(dev);
+
+       if (priv->serial)
+               pciserial_resume_ports(priv->serial);
+
+       /* FIXME: What about parport? */
+
+       return 0;
+}
+
  static struct pci_driver parport_serial_pci_driver = {
         .name           = "parport_serial",
         .id_table       = parport_serial_pci_tbl,
         .probe          = parport_serial_pci_probe,
         .remove         = __devexit_p(parport_serial_pci_remove),
+       .suspend        = parport_serial_pci_suspend,
+       .resume         = parport_serial_pci_resume,
  };
  
  
diff --git a/drivers/pci/bus.c b/drivers/pci/bus.c

index fedae89d8f7d5678b7b7126a084b8a49a9939858..fb9a11243d2a1fbff876749a997617cfb4013670 100644 (file)
--- a/drivers/pci/bus.c
+++ b/drivers/pci/bus.c
@@ -60,7 +60,9 @@ pci_bus_alloc_resource(struct pci_bus *bus, struct resource *res,
                         continue;
  
                 /* Ok, try it out.. */
-               ret = allocate_resource(r, res, size, min, -1, align,
+               ret = allocate_resource(r, res, size,
+                                       r->start ? : min,
+                                       -1, align,
                                         alignf, alignf_data);
                 if (ret == 0)
                         break;
diff --git a/drivers/pci/hotplug/pciehp.h b/drivers/pci/hotplug/pciehp.h

index 46b294a12418a65eb58055c2535a9e2f23bd39d5..2b92b9e8c910a34ddf9e118e292c583ec84b2b37 100644 (file)
--- a/drivers/pci/hotplug/pciehp.h
+++ b/drivers/pci/hotplug/pciehp.h
@@ -23,7 +23,7 @@
   * along with this program; if not, write to the Free Software
   * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
   *
- * Send feedback to <greg@kroah.com>, <dely.l.sy@intel.com>
+ * Send feedback to <greg@kroah.com>, <kristen.c.accardi@intel.com>
   *
   */
  #ifndef _PCIEHP_H
diff --git a/drivers/pci/hotplug/pciehp_core.c b/drivers/pci/hotplug/pciehp_core.c

index df4915dbc321ca21622dd187b1a8ebc852b5bfae..cafc7eadcf8056e36bcbc7dd6e7d7a1da580df52 100644 (file)
--- a/drivers/pci/hotplug/pciehp_core.c
+++ b/drivers/pci/hotplug/pciehp_core.c
@@ -23,7 +23,7 @@
   * along with this program; if not, write to the Free Software
   * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
   *
- * Send feedback to <greg@kroah.com>, <dely.l.sy@intel.com>
+ * Send feedback to <greg@kroah.com>, <kristen.c.accardi@intel.com>
   *
   */
  
diff --git a/drivers/pci/hotplug/pciehp_ctrl.c b/drivers/pci/hotplug/pciehp_ctrl.c

index 0dbcf04aa35e4683a94f5d0779027a44aa4fb9a1..0e0947601526e8df2c4069609feab765cd2ce3dc 100644 (file)
--- a/drivers/pci/hotplug/pciehp_ctrl.c
+++ b/drivers/pci/hotplug/pciehp_ctrl.c
@@ -23,7 +23,7 @@
   * along with this program; if not, write to the Free Software
   * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
   *
- * Send feedback to <greg@kroah.com>, <dely.l.sy@intel.com>
+ * Send feedback to <greg@kroah.com>, <kristen.c.accardi@intel.com>
   *
   */
  
diff --git a/drivers/pci/hotplug/pciehp_hpc.c b/drivers/pci/hotplug/pciehp_hpc.c

index 1cda30bd6e47c90020fec4290f15aee624f95a4f..7a0e27f0e063d40eebb4c02a3bd67b2177fb59d4 100644 (file)
--- a/drivers/pci/hotplug/pciehp_hpc.c
+++ b/drivers/pci/hotplug/pciehp_hpc.c
@@ -23,7 +23,7 @@
   * along with this program; if not, write to the Free Software
   * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
   *
- * Send feedback to <greg@kroah.com>,<dely.l.sy@intel.com>
+ * Send feedback to <greg@kroah.com>,<kristen.c.accardi@intel.com>
   *
   */
  
diff --git a/drivers/pci/hotplug/pciehp_pci.c b/drivers/pci/hotplug/pciehp_pci.c

index 723b12c0bb7c81bbbe6b4374166d69876ad7b31d..33b539b34f7efd8e6f19f72bbd85018d25b247d9 100644 (file)
--- a/drivers/pci/hotplug/pciehp_pci.c
+++ b/drivers/pci/hotplug/pciehp_pci.c
@@ -23,7 +23,7 @@
   * along with this program; if not, write to the Free Software
   * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
   *
- * Send feedback to <greg@kroah.com>, <dely.l.sy@intel.com>
+ * Send feedback to <greg@kroah.com>, <kristen.c.accardi@intel.com>
   *
   */
  
diff --git a/drivers/pci/hotplug/pciehprm.h b/drivers/pci/hotplug/pciehprm.h

index 966775ffb0ffcde90b3d3add9dd6b71bc72af1e5..05f20fbc5f509d6a0f9171ae8f6bf30c64fbbc0e 100644 (file)
--- a/drivers/pci/hotplug/pciehprm.h
+++ b/drivers/pci/hotplug/pciehprm.h
@@ -23,7 +23,7 @@
   * along with this program; if not, write to the Free Software
   * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
   *
- * Send feedback to <greg@kroah.com>, <dely.l.sy@intel.com>
+ * Send feedback to <greg@kroah.com>, <kristen.c.accardi@intel.com>
   *
   */
  
diff --git a/drivers/pci/hotplug/pciehprm_acpi.c b/drivers/pci/hotplug/pciehprm_acpi.c

index 57f4e6d1b27cb39c5c40704789f39a6693a04925..305b47ec2f2cde3a8d796a0860b8793bce29f446 100644 (file)
--- a/drivers/pci/hotplug/pciehprm_acpi.c
+++ b/drivers/pci/hotplug/pciehprm_acpi.c
@@ -20,7 +20,7 @@
   * along with this program; if not, write to the Free Software
   * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
   *
- * Send feedback to <dely.l.sy@intel.com>
+ * Send feedback to <kristen.c.accardi@intel.com>
   *
   */
  
diff --git a/drivers/pci/hotplug/pciehprm_nonacpi.c b/drivers/pci/hotplug/pciehprm_nonacpi.c

index 79a0aa6238ef1aa418994b90d5ed3b2045c0ad24..3622965f89611cda42aef572a22dc30c091140e7 100644 (file)
--- a/drivers/pci/hotplug/pciehprm_nonacpi.c
+++ b/drivers/pci/hotplug/pciehprm_nonacpi.c
@@ -23,7 +23,7 @@
   * along with this program; if not, write to the Free Software
   * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
   *
- * Send feedback to <greg@kroah.com>, <dely.l.sy@intel.com>
+ * Send feedback to <greg@kroah.com>, <kristen.c.accardi@intel.com>
   *
   */
  
diff --git a/drivers/pci/hotplug/pciehprm_nonacpi.h b/drivers/pci/hotplug/pciehprm_nonacpi.h

index 87c90e85ede9a16e0cbd874792fda590034b353d..b10603b0e95861163f049d2f72f9b5296510ec47 100644 (file)
--- a/drivers/pci/hotplug/pciehprm_nonacpi.h
+++ b/drivers/pci/hotplug/pciehprm_nonacpi.h
@@ -23,7 +23,7 @@
   * along with this program; if not, write to the Free Software
   * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
   *
- * Send feedback to <greg@kroah.com>, <dely.l.sy@intel.com>
+ * Send feedback to <greg@kroah.com>, <kristen.c.accardi@intel.com>
   *
   */
  
diff --git a/drivers/pci/hotplug/shpchp.h b/drivers/pci/hotplug/shpchp.h

index 67b6a3370ceba60697bd7da6c16be669e64f69e0..fe4d653da18849f9a7793b62630c45ec009e2139 100644 (file)
--- a/drivers/pci/hotplug/shpchp.h
+++ b/drivers/pci/hotplug/shpchp.h
@@ -23,7 +23,7 @@
   * along with this program; if not, write to the Free Software
   * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
   *
- * Send feedback to <greg@kroah.com>,<dely.l.sy@intel.com>
+ * Send feedback to <greg@kroah.com>,<kristen.c.accardi@intel.com>
   *
   */
  #ifndef _SHPCHP_H
diff --git a/drivers/pci/hotplug/shpchp_core.c b/drivers/pci/hotplug/shpchp_core.c

index a70a5c5705f2dee83f0da960c3a836269735e43d..6f7d8a29957abee6e5375aac85241b23524487f1 100644 (file)
--- a/drivers/pci/hotplug/shpchp_core.c
+++ b/drivers/pci/hotplug/shpchp_core.c
@@ -23,7 +23,7 @@
   * along with this program; if not, write to the Free Software
   * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
   *
- * Send feedback to <greg@kroah.com>, <dely.l.sy@intel.com>
+ * Send feedback to <greg@kroah.com>, <kristen.c.accardi@intel.com>
   *
   */
  
diff --git a/drivers/pci/hotplug/shpchp_ctrl.c b/drivers/pci/hotplug/shpchp_ctrl.c

index 490a9553a0625175d7ba02783988f6915fbc57b7..783b5abb07172e70b2f376658fce051ba4dd57c0 100644 (file)
--- a/drivers/pci/hotplug/shpchp_ctrl.c
+++ b/drivers/pci/hotplug/shpchp_ctrl.c
@@ -23,7 +23,7 @@
   * along with this program; if not, write to the Free Software
   * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
   *
- * Send feedback to <greg@kroah.com>, <dely.l.sy@intel.com>
+ * Send feedback to <greg@kroah.com>, <kristen.c.accardi@intel.com>
   *
   */
  
diff --git a/drivers/pci/hotplug/shpchp_hpc.c b/drivers/pci/hotplug/shpchp_hpc.c

index 38c5d90666975ead9937347d2d3189de78bd1898..8d98410bf1c07a06a19e0c5a032acec8f27c30c7 100644 (file)
--- a/drivers/pci/hotplug/shpchp_hpc.c
+++ b/drivers/pci/hotplug/shpchp_hpc.c
@@ -23,7 +23,7 @@
   * along with this program; if not, write to the Free Software
   * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
   *
- * Send feedback to <greg@kroah.com>,<dely.l.sy@intel.com>
+ * Send feedback to <greg@kroah.com>,<kristen.c.accardi@intel.com>
   *
   */
  
diff --git a/drivers/pci/hotplug/shpchp_pci.c b/drivers/pci/hotplug/shpchp_pci.c

index 90113e9cd69b9a1cebbf436ff227ad404b807bd3..d867099114ecfa6a973bc74dd2de2b4bf79fd102 100644 (file)
--- a/drivers/pci/hotplug/shpchp_pci.c
+++ b/drivers/pci/hotplug/shpchp_pci.c
@@ -23,7 +23,7 @@
   * along with this program; if not, write to the Free Software
   * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
   *
- * Send feedback to <greg@kroah.com>, <dely.l.sy@intel.com>
+ * Send feedback to <greg@kroah.com>, <kristen.c.accardi@intel.com>
   *
   */
  
diff --git a/drivers/pci/hotplug/shpchprm.h b/drivers/pci/hotplug/shpchprm.h

index 88aeb978c91187111dc0430ffdb5ccd33416a5e9..057b192ce589532092798fe08ae09b5a850aa9bd 100644 (file)
--- a/drivers/pci/hotplug/shpchprm.h
+++ b/drivers/pci/hotplug/shpchprm.h
@@ -23,7 +23,7 @@
   * along with this program; if not, write to the Free Software
   * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
   *
- * Send feedback to <greg@kroah.com>, <dely.l.sy@intel.com>
+ * Send feedback to <greg@kroah.com>, <kristen.c.accardi@intel.com>
   *
   */
  
diff --git a/drivers/pci/hotplug/shpchprm_acpi.c b/drivers/pci/hotplug/shpchprm_acpi.c

index 7957cdc72cd0464869096ffe9b59298953fdc539..d37b31658edf68590fba02176a6667222c901ed8 100644 (file)
--- a/drivers/pci/hotplug/shpchprm_acpi.c
+++ b/drivers/pci/hotplug/shpchprm_acpi.c
@@ -20,7 +20,7 @@
   * along with this program; if not, write to the Free Software
   * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
   *
- * Send feedback to <dely.l.sy@intel.com>
+ * Send feedback to <kristen.c.accardi@intel.com>
   *
   */
  
diff --git a/drivers/pci/hotplug/shpchprm_legacy.c b/drivers/pci/hotplug/shpchprm_legacy.c

index 37fa77a98289e038b54d19f6a7c8008431b46428..ba6c549c9b9d1e9cac52709bc9b532508ed63d7c 100644 (file)
--- a/drivers/pci/hotplug/shpchprm_legacy.c
+++ b/drivers/pci/hotplug/shpchprm_legacy.c
@@ -23,7 +23,7 @@
   * along with this program; if not, write to the Free Software
   * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
   *
- * Send feedback to <greg@kroah.com>,<dely.l.sy@intel.com>
+ * Send feedback to <greg@kroah.com>,<kristen.c.accardi@intel.com>
   *
   */
  
diff --git a/drivers/pci/hotplug/shpchprm_legacy.h b/drivers/pci/hotplug/shpchprm_legacy.h

index 29ccea5e57e567d41d28ea8ec2f5ee6a82d80621..21bda74ddfa53de25c6298ce75ae474282440e7e 100644 (file)
--- a/drivers/pci/hotplug/shpchprm_legacy.h
+++ b/drivers/pci/hotplug/shpchprm_legacy.h
@@ -23,7 +23,7 @@
   * along with this program; if not, write to the Free Software
   * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
   *
- * Send feedback to <greg@kroah.com>, <dely.l.sy@intel.com>
+ * Send feedback to <greg@kroah.com>, <kristen.c.accardi@intel.com>
   *
   */
  
diff --git a/drivers/pci/hotplug/shpchprm_nonacpi.c b/drivers/pci/hotplug/shpchprm_nonacpi.c

index 88f4d9f418864cd65c64824ceb6b7ed38144156d..5f75ef7f3df293b1c01d6f9a4fe185575af51852 100644 (file)
--- a/drivers/pci/hotplug/shpchprm_nonacpi.c
+++ b/drivers/pci/hotplug/shpchprm_nonacpi.c
@@ -23,7 +23,7 @@
   * along with this program; if not, write to the Free Software
   * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
   *
- * Send feedback to <greg@kroah.com>, <dely.l.sy@intel.com>
+ * Send feedback to <greg@kroah.com>, <kristen.c.accardi@intel.com>
   *
   */
  
diff --git a/drivers/pci/hotplug/shpchprm_nonacpi.h b/drivers/pci/hotplug/shpchprm_nonacpi.h

index 6bc8668023c38a1180b5574ab2c6754c7c002c28..cddaaa5ee1b3d0f16dee981e3ab39609032b3813 100644 (file)
--- a/drivers/pci/hotplug/shpchprm_nonacpi.h
+++ b/drivers/pci/hotplug/shpchprm_nonacpi.h
@@ -23,7 +23,7 @@
   * along with this program; if not, write to the Free Software
   * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
   *
- * Send feedback to <greg@kroah.com>, <dely.l.sy@intel.com>
+ * Send feedback to <greg@kroah.com>, <kristen.c.accardi@intel.com>
   *
   */
  
diff --git a/drivers/pci/msi.c b/drivers/pci/msi.c

index b5ab9aa6ff7c8703f842901e02497bc05dd3100d..2b85aa39f9544dd4069f5f9ef2b2abaca4ee145b 100644 (file)
--- a/drivers/pci/msi.c
+++ b/drivers/pci/msi.c
@@ -453,7 +453,7 @@ static void enable_msi_mode(struct pci_dev *dev, int pos, int type)
         }
  }
  
-static void disable_msi_mode(struct pci_dev *dev, int pos, int type)
+void disable_msi_mode(struct pci_dev *dev, int pos, int type)
  {
         u16 control;
  
@@ -699,6 +699,9 @@ int pci_enable_msi(struct pci_dev* dev)
         if (!pci_msi_enable || !dev)
                 return status;
  
+       if (dev->no_msi)
+               return status;
+
         temp = dev->irq;
  
         if ((status = msi_init()) < 0)
diff --git a/drivers/pci/pci.h b/drivers/pci/pci.h

index d94d7af4f7a0ce850134688a4407dc95c686f44e..d00168b1f66286f8f897a0f19ea0c44e977e5361 100644 (file)
--- a/drivers/pci/pci.h
+++ b/drivers/pci/pci.h
@@ -47,6 +47,12 @@ extern int pci_msi_quirk;
  #define pci_msi_quirk 0
  #endif
  
+#ifdef CONFIG_PCI_MSI
+void disable_msi_mode(struct pci_dev *dev, int pos, int type);
+#else
+static inline void disable_msi_mode(struct pci_dev *dev, int pos, int type) { }
+#endif
+
  extern int pcie_mch_quirk;
  extern struct device_attribute pci_dev_attrs[];
  extern struct class_device_attribute class_device_attr_cpuaffinity;
diff --git a/drivers/pci/quirks.c b/drivers/pci/quirks.c

index 8d0968bd527e3116c246348fbafd60aebf6d438f..bb36bb69803f44dfee260410aa2f2bf89c712c0c 100644 (file)
--- a/drivers/pci/quirks.c
+++ b/drivers/pci/quirks.c
@@ -373,6 +373,25 @@ static void __devinit quirk_vt82c686_acpi(struct pci_dev *dev)
  }
  DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_VIA,    PCI_DEVICE_ID_VIA_82C686_4,     quirk_vt82c686_acpi );
  
+/*
+ * VIA VT8235 ISA Bridge: Two IO regions pointed to by words at
+ *     0x88 (128 bytes of power management registers)
+ *     0xd0 (16 bytes of SMB registers)
+ */
+static void __devinit quirk_vt8235_acpi(struct pci_dev *dev)
+{
+       u16 pm, smb;
+
+       pci_read_config_word(dev, 0x88, &pm);
+       pm &= PCI_BASE_ADDRESS_IO_MASK;
+       quirk_io_region(dev, pm, 128, PCI_BRIDGE_RESOURCES);
+
+       pci_read_config_word(dev, 0xd0, &smb);
+       smb &= PCI_BASE_ADDRESS_IO_MASK;
+       quirk_io_region(dev, smb, 16, PCI_BRIDGE_RESOURCES + 1);
+}
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_VIA,    PCI_DEVICE_ID_VIA_8235, quirk_vt8235_acpi);
+
  
  #ifdef CONFIG_X86_IO_APIC 
  
@@ -1272,6 +1291,27 @@ DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL,     PCI_DEVICE_ID_INTEL_E7520_MCH,  quir
  DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL,   PCI_DEVICE_ID_INTEL_E7320_MCH,  quirk_pcie_mch );
  DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL,   PCI_DEVICE_ID_INTEL_E7525_MCH,  quirk_pcie_mch );
  
+
+/*
+ * It's possible for the MSI to get corrupted if shpc and acpi
+ * are used together on certain PXH-based systems.
+ */
+static void __devinit quirk_pcie_pxh(struct pci_dev *dev)
+{
+       disable_msi_mode(dev, pci_find_capability(dev, PCI_CAP_ID_MSI),
+                                       PCI_CAP_ID_MSI);
+       dev->no_msi = 1;
+
+       printk(KERN_WARNING "PCI: PXH quirk detected, "
+               "disabling MSI for SHPC device\n");
+}
+DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_INTEL,   PCI_DEVICE_ID_INTEL_PXHD_0,     quirk_pcie_pxh);
+DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_INTEL,   PCI_DEVICE_ID_INTEL_PXHD_1,     quirk_pcie_pxh);
+DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_INTEL,   PCI_DEVICE_ID_INTEL_PXH_0,      quirk_pcie_pxh);
+DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_INTEL,   PCI_DEVICE_ID_INTEL_PXH_1,      quirk_pcie_pxh);
+DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_INTEL,   PCI_DEVICE_ID_INTEL_PXHV,       quirk_pcie_pxh);
+
+
  static void __devinit quirk_netmos(struct pci_dev *dev)
  {
         unsigned int num_parallel = (dev->subsystem_device & 0xf0) >> 4;
diff --git a/drivers/pci/setup-bus.c b/drivers/pci/setup-bus.c

index a2eebc6eaacca5a7fc6750b76c8fc180f13dbe71..6d864c502a1f71f7c1ce57ab14718bc81b11fe9c 100644 (file)
--- a/drivers/pci/setup-bus.c
+++ b/drivers/pci/setup-bus.c
@@ -40,7 +40,7 @@
   * FIXME: IO should be max 256 bytes.  However, since we may
   * have a P2P bridge below a cardbus bridge, we need 4K.
   */
-#define CARDBUS_IO_SIZE                (4096)
+#define CARDBUS_IO_SIZE                (256)
  #define CARDBUS_MEM_SIZE       (32*1024*1024)
  
  static void __devinit
diff --git a/drivers/pci/setup-res.c b/drivers/pci/setup-res.c

index 1ca21d2ba11c68dc93ae9b8746c0e0755b85bfa3..5598b4714f77ac2efaf0f545e404b4c9163c4fcf 100644 (file)
--- a/drivers/pci/setup-res.c
+++ b/drivers/pci/setup-res.c
@@ -33,6 +33,11 @@ pci_update_resource(struct pci_dev *dev, struct resource *res, int resno)
         u32 new, check, mask;
         int reg;
  
+       /* Ignore resources for unimplemented BARs and unused resource slots
+          for 64 bit BARs. */
+       if (!res->flags)
+               return;
+
         pcibios_resource_to_bus(dev, &region, res);
  
         pr_debug("  got res [%lx:%lx] bus [%lx:%lx] flags %lx for "
@@ -48,7 +53,9 @@ pci_update_resource(struct pci_dev *dev, struct resource *res, int resno)
         if (resno < 6) {
                 reg = PCI_BASE_ADDRESS_0 + 4 * resno;
         } else if (resno == PCI_ROM_RESOURCE) {
-               new |= res->flags & IORESOURCE_ROM_ENABLE;
+               if (!(res->flags & IORESOURCE_ROM_ENABLE))
+                       return;
+               new |= PCI_ROM_ADDRESS_ENABLE;
                 reg = dev->rom_base_reg;
         } else {
                 /* Hmm, non-standard resource. */
@@ -67,7 +74,7 @@ pci_update_resource(struct pci_dev *dev, struct resource *res, int resno)
  
         if ((new & (PCI_BASE_ADDRESS_SPACE|PCI_BASE_ADDRESS_MEM_TYPE_MASK)) ==
             (PCI_BASE_ADDRESS_SPACE_MEMORY|PCI_BASE_ADDRESS_MEM_TYPE_64)) {
-               new = 0; /* currently everyone zeros the high address */
+               new = region.start >> 16 >> 16;
                 pci_write_config_dword(dev, reg + 4, new);
                 pci_read_config_dword(dev, reg + 4, &check);
                 if (check != new) {
diff --git a/drivers/pcmcia/ds.c b/drivers/pcmcia/ds.c

index d63f22a5bf7e84bafea7132b0305efe8485042ef..43da2e92d50fe67fd633021de9abef67b130bc51 100644 (file)
--- a/drivers/pcmcia/ds.c
+++ b/drivers/pcmcia/ds.c
@@ -589,8 +589,8 @@ static void pcmcia_delayed_add_pseudo_device(void *data)
  static inline void pcmcia_add_pseudo_device(struct pcmcia_socket *s)
  {
         if (!s->pcmcia_state.device_add_pending) {
-               schedule_work(&s->device_add);
                 s->pcmcia_state.device_add_pending = 1;
+               schedule_work(&s->device_add);
         }
         return;
  }
diff --git a/drivers/pcmcia/pcmcia_resource.c b/drivers/pcmcia/pcmcia_resource.c

index 6f9fdb276402935bec47e407d014dbf30f921dff..599b116d9747211514a6522625e6830756f0d492 100644 (file)
--- a/drivers/pcmcia/pcmcia_resource.c
+++ b/drivers/pcmcia/pcmcia_resource.c
@@ -41,6 +41,7 @@ module_param(io_speed, int, 0444);
  
  
  #ifdef CONFIG_PCMCIA_PROBE
+#include <asm/irq.h>
  /* mask of IRQs already reserved by other cards, we should avoid using them */
  static u8 pcmcia_used_irq[NR_IRQS];
  #endif
diff --git a/drivers/pcmcia/yenta_socket.c b/drivers/pcmcia/yenta_socket.c

index 6837491f021c0b813aee8681cc73eb33dd0a21cd..62fd705203fb0d48ecc55d16b8c68b6376fa852a 100644 (file)
--- a/drivers/pcmcia/yenta_socket.c
+++ b/drivers/pcmcia/yenta_socket.c
@@ -605,9 +605,8 @@ static int yenta_search_res(struct yenta_socket *socket, struct resource *res,
  
  static void yenta_allocate_res(struct yenta_socket *socket, int nr, unsigned type, int addr_start, int addr_end)
  {
-       struct pci_bus *bus;
         struct resource *root, *res;
-       u32 start, end;
+       struct pci_bus_region region;
         unsigned mask;
  
         res = socket->dev->resource + PCI_BRIDGE_RESOURCES + nr;
@@ -620,15 +619,13 @@ static void yenta_allocate_res(struct yenta_socket *socket, int nr, unsigned typ
         if (type & IORESOURCE_IO)
                 mask = ~3;
  
-       bus = socket->dev->subordinate;
-       res->name = bus->name;
+       res->name = socket->dev->subordinate->name;
         res->flags = type;
  
-       start = config_readl(socket, addr_start) & mask;
-       end = config_readl(socket, addr_end) | ~mask;
-       if (start && end > start && !override_bios) {
-               res->start = start;
-               res->end = end;
+       region.start = config_readl(socket, addr_start) & mask;
+       region.end = config_readl(socket, addr_end) | ~mask;
+       if (region.start && region.end > region.start && !override_bios) {
+               pcibios_bus_to_resource(socket->dev, res, &region);
                 root = pci_find_parent_resource(socket->dev, res);
                 if (root && (request_resource(root, res) == 0))
                         return;
@@ -642,6 +639,7 @@ static void yenta_allocate_res(struct yenta_socket *socket, int nr, unsigned typ
                     (yenta_search_res(socket, res, BRIDGE_IO_MIN))) {
                         config_writel(socket, addr_start, res->start);
                         config_writel(socket, addr_end, res->end);
+                       return;
                 }
         } else {
                 if (type & IORESOURCE_PREFETCH) {
@@ -650,6 +648,7 @@ static void yenta_allocate_res(struct yenta_socket *socket, int nr, unsigned typ
                             (yenta_search_res(socket, res, BRIDGE_MEM_MIN))) {
                                 config_writel(socket, addr_start, res->start);
                                 config_writel(socket, addr_end, res->end);
+                               return;
                         }
                         /* Approximating prefetchable by non-prefetchable */
                         res->flags = IORESOURCE_MEM;
@@ -659,6 +658,7 @@ static void yenta_allocate_res(struct yenta_socket *socket, int nr, unsigned typ
                     (yenta_search_res(socket, res, BRIDGE_MEM_MIN))) {
                         config_writel(socket, addr_start, res->start);
                         config_writel(socket, addr_end, res->end);
+                       return;
                 }
         }
  
diff --git a/drivers/pnp/card.c b/drivers/pnp/card.c

index add12f7c489a9666b53983c13077505465c33bd4..6e5229e92fbc7bc9fa6a443a9f78fff7f9867b32 100644 (file)
--- a/drivers/pnp/card.c
+++ b/drivers/pnp/card.c
@@ -312,6 +312,8 @@ found:
         if (drv->link.driver.probe) {
                 if (drv->link.driver.probe(&dev->dev)) {
                         dev->dev.driver = NULL;
+                       dev->card_link = NULL;
+                       up_write(&dev->dev.bus->subsys.rwsem);
                         return NULL;
                 }
         }
diff --git a/drivers/s390/cio/qdio.c b/drivers/s390/cio/qdio.c

index d36258d6665f5a697fc970630af2c436532899fa..381f339e3200a9d98d2031d1d30495c18e713a62 100644 (file)
--- a/drivers/s390/cio/qdio.c
+++ b/drivers/s390/cio/qdio.c
@@ -112,7 +112,7 @@ qdio_min(int a,int b)
  
  /***************** SCRUBBER HELPER ROUTINES **********************/
  
-static inline volatile __u64 
+static inline __u64 
  qdio_get_micros(void)
  {
          return (get_clock() >> 10); /* time>>12 is microseconds */
@@ -230,7 +230,7 @@ qdio_siga_input(struct qdio_q *q)
  }
  
  /* locked by the locks in qdio_activate and qdio_cleanup */
-static __u32 * volatile 
+static __u32 volatile *
  qdio_get_indicator(void)
  {
         int i;
diff --git a/drivers/s390/crypto/z90crypt.h b/drivers/s390/crypto/z90crypt.h

index 82a1d97001d77c2d37b58ac97c92a1e6fb450f43..0a3bb5a10dd48662cad66997b52814fa3a74f278 100644 (file)
--- a/drivers/s390/crypto/z90crypt.h
+++ b/drivers/s390/crypto/z90crypt.h
@@ -35,15 +35,6 @@
  #define z90crypt_RELEASE 3     // 2 = PCIXCC, 3 = rewrite for coding standards
  #define z90crypt_VARIANT 2     // 2 = added PCIXCC MCL3 and CEX2C support
  
-/**
- * If we are not using the sparse checker, __user has no use.
- */
-#ifdef __CHECKER__
-# define __user                __attribute__((noderef, address_space(1)))
-#else
-# define __user
-#endif
-
  /**
   * struct ica_rsa_modexpo
   *
diff --git a/drivers/s390/net/qeth_main.c b/drivers/s390/net/qeth_main.c

index 8f4d2999af8ebafe3721222f9038a58ee0db137e..79c74f3a11f5f471d79fa6c3507e56d9fe553a33 100644 (file)
--- a/drivers/s390/net/qeth_main.c
+++ b/drivers/s390/net/qeth_main.c
@@ -8120,20 +8120,22 @@ static struct notifier_block qeth_ip6_notifier = {
  #endif
  
  static int
-qeth_reboot_event(struct notifier_block *this, unsigned long event, void *ptr)
+__qeth_reboot_event_card(struct device *dev, void *data)
  {
-
-       struct device *entry;
         struct qeth_card *card;
  
-       down_read(&qeth_ccwgroup_driver.driver.bus->subsys.rwsem);
-              list_for_each_entry(entry, &qeth_ccwgroup_driver.driver.devices,
-                                  driver_list) {
-                      card = (struct qeth_card *) entry->driver_data;
-                      qeth_clear_ip_list(card, 0, 0);
-                      qeth_qdio_clear_card(card, 0);
-              }
-       up_read(&qeth_ccwgroup_driver.driver.bus->subsys.rwsem);
+       card = (struct qeth_card *) dev->driver_data;
+       qeth_clear_ip_list(card, 0, 0);
+       qeth_qdio_clear_card(card, 0);
+       return 0;
+}
+
+static int
+qeth_reboot_event(struct notifier_block *this, unsigned long event, void *ptr)
+{
+
+       driver_for_each_device(&qeth_ccwgroup_driver.driver, NULL, NULL,
+                              __qeth_reboot_event_card);
         return NOTIFY_DONE;
  }
  
diff --git a/drivers/s390/net/qeth_proc.c b/drivers/s390/net/qeth_proc.c

index 04719196fd2016b4d76440e4cb7c2e41b5b43153..f2ccfea8fdb89cabe2bf65a7885c3fbcde3b4798 100644 (file)
--- a/drivers/s390/net/qeth_proc.c
+++ b/drivers/s390/net/qeth_proc.c
@@ -27,23 +27,33 @@ const char *VERSION_QETH_PROC_C = "$Revision: 1.13 $";
  #define QETH_PROCFILE_NAME "qeth"
  static struct proc_dir_entry *qeth_procfile;
  
+static int
+qeth_procfile_seq_match(struct device *dev, void *data)
+{
+       return 1;
+}
+
  static void *
  qeth_procfile_seq_start(struct seq_file *s, loff_t *offset)
  {
-       struct list_head *next_card = NULL;
-       int i = 0;
+       struct device *dev;
+       loff_t nr;
  
         down_read(&qeth_ccwgroup_driver.driver.bus->subsys.rwsem);
  
-       if (*offset == 0)
+       nr = *offset;
+       if (nr == 0)
                 return SEQ_START_TOKEN;
  
-       /* get card at pos *offset */
-       list_for_each(next_card, &qeth_ccwgroup_driver.driver.devices)
-               if (++i == *offset)
-                       return next_card;
+       dev = driver_find_device(&qeth_ccwgroup_driver.driver, NULL,
+                                NULL, qeth_procfile_seq_match);
  
-       return NULL;
+       /* get card at pos *offset */
+       nr = *offset;
+       while (nr-- > 1 && dev)
+               dev = driver_find_device(&qeth_ccwgroup_driver.driver, dev,
+                                        NULL, qeth_procfile_seq_match);
+       return (void *) dev;
  }
  
  static void
@@ -55,23 +65,21 @@ qeth_procfile_seq_stop(struct seq_file *s, void* it)
  static void *
  qeth_procfile_seq_next(struct seq_file *s, void *it, loff_t *offset)
  {
-       struct list_head *next_card = NULL;
-       struct list_head *current_card;
+       struct device *prev, *next;
  
         if (it == SEQ_START_TOKEN) {
-               next_card = qeth_ccwgroup_driver.driver.devices.next;
-               if (next_card->next == next_card) /* list empty */
-                       return NULL;
-               (*offset)++;
-       } else {
-               current_card = (struct list_head *)it;
-               if (current_card->next == &qeth_ccwgroup_driver.driver.devices)
-                       return NULL; /* end of list reached */
-               next_card = current_card->next;
-               (*offset)++;
+               next = driver_find_device(&qeth_ccwgroup_driver.driver,
+                                         NULL, NULL, qeth_procfile_seq_match);
+               if (next)
+                       (*offset)++;
+               return (void *) next;
         }
-
-       return next_card;
+       prev = (struct device *) it;
+       next = driver_find_device(&qeth_ccwgroup_driver.driver,
+                                 prev, NULL, qeth_procfile_seq_match);
+       if (next)
+               (*offset)++;
+       return (void *) next;
  }
  
  static inline const char *
@@ -126,7 +134,7 @@ qeth_procfile_seq_show(struct seq_file *s, void *it)
                               "-------------- ---- ------ ---------- ---- "
                               "---- ----- -----\n");
         } else {
-               device = list_entry(it, struct device, driver_list);
+               device = (struct device *) it;
                 card = device->driver_data;
                 seq_printf(s, "%s/%s/%s x%02X   %-10s %-14s %-4i ",
                                 CARD_RDEV_ID(card),
@@ -180,17 +188,20 @@ static struct proc_dir_entry *qeth_perf_procfile;
  static void *
  qeth_perf_procfile_seq_start(struct seq_file *s, loff_t *offset)
  {
-       struct list_head *next_card = NULL;
-       int i = 0;
+       struct device *dev = NULL;
+       int nr;
  
         down_read(&qeth_ccwgroup_driver.driver.bus->subsys.rwsem);
         /* get card at pos *offset */
-       list_for_each(next_card, &qeth_ccwgroup_driver.driver.devices){
-               if (i == *offset)
-                       return next_card;
-               i++;
-       }
-       return NULL;
+       dev = driver_find_device(&qeth_ccwgroup_driver.driver, NULL, NULL,
+                                qeth_procfile_seq_match);
+
+       /* get card at pos *offset */
+       nr = *offset;
+       while (nr-- > 1 && dev)
+               dev = driver_find_device(&qeth_ccwgroup_driver.driver, dev,
+                                        NULL, qeth_procfile_seq_match);
+       return (void *) dev;
  }
  
  static void
@@ -202,12 +213,14 @@ qeth_perf_procfile_seq_stop(struct seq_file *s, void* it)
  static void *
  qeth_perf_procfile_seq_next(struct seq_file *s, void *it, loff_t *offset)
  {
-       struct list_head *current_card = (struct list_head *)it;
+       struct device *prev, *next;
  
-       if (current_card->next == &qeth_ccwgroup_driver.driver.devices)
-               return NULL; /* end of list reached */
-       (*offset)++;
-       return current_card->next;
+       prev = (struct device *) it;
+       next = driver_find_device(&qeth_ccwgroup_driver.driver, prev,
+                                 NULL, qeth_procfile_seq_match);
+       if (next)
+               (*offset)++;
+       return (void *) next;
  }
  
  static int
@@ -216,7 +229,7 @@ qeth_perf_procfile_seq_show(struct seq_file *s, void *it)
         struct device *device;
         struct qeth_card *card;
  
-       device = list_entry(it, struct device, driver_list);
+       device = (struct device *) it;
         card = device->driver_data;
         seq_printf(s, "For card with devnos %s/%s/%s (%s):\n",
                         CARD_RDEV_ID(card),
@@ -318,8 +331,8 @@ static struct proc_dir_entry *qeth_ipato_procfile;
  static void *
  qeth_ipato_procfile_seq_start(struct seq_file *s, loff_t *offset)
  {
-       struct list_head *next_card = NULL;
-       int i = 0;
+       struct device *dev;
+       loff_t nr;
  
         down_read(&qeth_ccwgroup_driver.driver.bus->subsys.rwsem);
         /* TODO: finish this */
@@ -328,13 +341,16 @@ qeth_ipato_procfile_seq_start(struct seq_file *s, loff_t *offset)
          * output driver settings then;
          * else output setting for respective card
          */
+
+       dev = driver_find_device(&qeth_ccwgroup_driver.driver, NULL, NULL,
+                                qeth_procfile_seq_match);
+
         /* get card at pos *offset */
-       list_for_each(next_card, &qeth_ccwgroup_driver.driver.devices){
-               if (i == *offset)
-                       return next_card;
-               i++;
-       }
-       return NULL;
+       nr = *offset;
+       while (nr-- > 1 && dev)
+               dev = driver_find_device(&qeth_ccwgroup_driver.driver, dev,
+                                        NULL, qeth_procfile_seq_match);
+       return (void *) dev;
  }
  
  static void
@@ -346,18 +362,14 @@ qeth_ipato_procfile_seq_stop(struct seq_file *s, void* it)
  static void *
  qeth_ipato_procfile_seq_next(struct seq_file *s, void *it, loff_t *offset)
  {
-       struct list_head *current_card = (struct list_head *)it;
+       struct device *prev, *next;
  
-       /* TODO: finish this */
-       /*
-        * maybe SEQ_SATRT_TOKEN can be returned for offset 0
-        * output driver settings then;
-        * else output setting for respective card
-        */
-       if (current_card->next == &qeth_ccwgroup_driver.driver.devices)
-               return NULL; /* end of list reached */
-       (*offset)++;
-       return current_card->next;
+       prev = (struct device *) it;
+       next = driver_find_device(&qeth_ccwgroup_driver.driver, prev,
+                                 NULL, qeth_procfile_seq_match);
+       if (next)
+               (*offset)++;
+       return (void *) next;
  }
  
  static int
@@ -372,7 +384,7 @@ qeth_ipato_procfile_seq_show(struct seq_file *s, void *it)
          * output driver settings then;
          * else output setting for respective card
          */
-       device = list_entry(it, struct device, driver_list);
+       device = (struct device *) it;
         card = device->driver_data;
  
         return 0;
diff --git a/drivers/s390/scsi/zfcp_aux.c b/drivers/s390/scsi/zfcp_aux.c

index e17b4d58a9f6d6a5b1d3009c141fdaf26123d6cd..bfe3ba73bc0f65310f58caa67e2b46412992d0c1 100644 (file)
--- a/drivers/s390/scsi/zfcp_aux.c
+++ b/drivers/s390/scsi/zfcp_aux.c
@@ -1299,13 +1299,10 @@ struct zfcp_port *
  zfcp_port_enqueue(struct zfcp_adapter *adapter, wwn_t wwpn, u32 status,
                   u32 d_id)
  {
-       struct zfcp_port *port, *tmp_port;
+       struct zfcp_port *port;
         int check_wwpn;
-       scsi_id_t scsi_id;
-       int found;
  
         check_wwpn = !(status & ZFCP_STATUS_PORT_NO_WWPN);
-
         /*
          * check that there is no port with this WWPN already in list
          */
@@ -1368,7 +1365,7 @@ zfcp_port_enqueue(struct zfcp_adapter *adapter, wwn_t wwpn, u32 status,
         } else {
                 snprintf(port->sysfs_device.bus_id,
                          BUS_ID_SIZE, "0x%016llx", wwpn);
-       port->sysfs_device.parent = &adapter->ccw_device->dev;
+               port->sysfs_device.parent = &adapter->ccw_device->dev;
         }
         port->sysfs_device.release = zfcp_sysfs_port_release;
         dev_set_drvdata(&port->sysfs_device, port);
@@ -1388,24 +1385,8 @@ zfcp_port_enqueue(struct zfcp_adapter *adapter, wwn_t wwpn, u32 status,
  
         zfcp_port_get(port);
  
-       scsi_id = 1;
-       found = 0;
         write_lock_irq(&zfcp_data.config_lock);
-       list_for_each_entry(tmp_port, &adapter->port_list_head, list) {
-               if (atomic_test_mask(ZFCP_STATUS_PORT_NO_SCSI_ID,
-                                    &tmp_port->status))
-                       continue;
-               if (tmp_port->scsi_id != scsi_id) {
-                       found = 1;
-                       break;
-               }
-               scsi_id++;
-       }
-       port->scsi_id = scsi_id;
-       if (found)
-               list_add_tail(&port->list, &tmp_port->list);
-       else
-               list_add_tail(&port->list, &adapter->port_list_head);
+       list_add_tail(&port->list, &adapter->port_list_head);
         atomic_clear_mask(ZFCP_STATUS_COMMON_REMOVE, &port->status);
         atomic_set_mask(ZFCP_STATUS_COMMON_RUNNING, &port->status);
         if (d_id == ZFCP_DID_DIRECTORY_SERVICE)
@@ -1427,6 +1408,9 @@ zfcp_port_dequeue(struct zfcp_port *port)
         list_del(&port->list);
         port->adapter->ports--;
         write_unlock_irq(&zfcp_data.config_lock);
+       if (port->rport)
+               fc_remote_port_delete(port->rport);
+       port->rport = NULL;
         zfcp_adapter_put(port->adapter);
         zfcp_sysfs_port_remove_files(&port->sysfs_device,
                                      atomic_read(&port->status));
diff --git a/drivers/s390/scsi/zfcp_ccw.c b/drivers/s390/scsi/zfcp_ccw.c

index 0fc46381fc22735d1a884fe24f913e200dfbb770..b30abab77da39841b8c0db3f37545fd3ae540b07 100644 (file)
--- a/drivers/s390/scsi/zfcp_ccw.c
+++ b/drivers/s390/scsi/zfcp_ccw.c
@@ -202,9 +202,19 @@ static int
  zfcp_ccw_set_offline(struct ccw_device *ccw_device)
  {
         struct zfcp_adapter *adapter;
+       struct zfcp_port *port;
+       struct fc_rport *rport;
  
         down(&zfcp_data.config_sema);
         adapter = dev_get_drvdata(&ccw_device->dev);
+       /* might be racy, but we cannot take config_lock due to the fact that
+          fc_remote_port_delete might sleep */
+       list_for_each_entry(port, &adapter->port_list_head, list)
+               if (port->rport) {
+                       rport = port->rport;
+                       port->rport = NULL;
+                       fc_remote_port_delete(rport);
+               }
         zfcp_erp_adapter_shutdown(adapter, 0);
         zfcp_erp_wait(adapter);
         zfcp_adapter_scsi_unregister(adapter);
diff --git a/drivers/s390/scsi/zfcp_def.h b/drivers/s390/scsi/zfcp_def.h

index 4103b5be768376376ae941d20e6b4feae7983f45..455e902533a9d9c8fb56a73ce932d813537d5421 100644 (file)
--- a/drivers/s390/scsi/zfcp_def.h
+++ b/drivers/s390/scsi/zfcp_def.h
@@ -906,6 +906,7 @@ struct zfcp_adapter {
   */
  struct zfcp_port {
         struct device          sysfs_device;   /* sysfs device */
+       struct fc_rport        *rport;         /* rport of fc transport class */
         struct list_head       list;           /* list of remote ports */
         atomic_t               refcount;       /* reference count */
         wait_queue_head_t      remove_wq;      /* can be used to wait for
@@ -916,7 +917,6 @@ struct zfcp_port {
                                                   list */
         u32                    units;          /* # of logical units in list */
         atomic_t               status;         /* status of this remote port */
-       scsi_id_t              scsi_id;        /* own SCSI ID */
         wwn_t                  wwnn;           /* WWNN if known */
         wwn_t                  wwpn;           /* WWPN */
         fc_id_t                d_id;           /* D_ID */
diff --git a/drivers/s390/scsi/zfcp_erp.c b/drivers/s390/scsi/zfcp_erp.c

index 0cf31f7d1c0f9fd1c4ec80af137e1b5d8dc9ca61..cb4f612550baf459ed752ba385dbeca823574958 100644 (file)
--- a/drivers/s390/scsi/zfcp_erp.c
+++ b/drivers/s390/scsi/zfcp_erp.c
@@ -3360,13 +3360,32 @@ zfcp_erp_action_cleanup(int action, struct zfcp_adapter *adapter,
                 if ((result == ZFCP_ERP_SUCCEEDED)
                     && (!atomic_test_mask(ZFCP_STATUS_UNIT_TEMPORARY,
                                           &unit->status))
-                   && (!unit->device))
-                       scsi_add_device(unit->port->adapter->scsi_host, 0,
-                                       unit->port->scsi_id, unit->scsi_lun);
+                   && !unit->device
+                   && port->rport)
+                       scsi_add_device(port->adapter->scsi_host, 0,
+                                       port->rport->scsi_target_id,
+                                       unit->scsi_lun);
                 zfcp_unit_put(unit);
                 break;
         case ZFCP_ERP_ACTION_REOPEN_PORT_FORCED:
         case ZFCP_ERP_ACTION_REOPEN_PORT:
+               if ((result == ZFCP_ERP_SUCCEEDED)
+                   && !atomic_test_mask(ZFCP_STATUS_PORT_NO_WWPN,
+                                        &port->status)
+                   && !port->rport) {
+                       struct fc_rport_identifiers ids;
+                       ids.node_name = port->wwnn;
+                       ids.port_name = port->wwpn;
+                       ids.port_id = port->d_id;
+                       ids.roles = FC_RPORT_ROLE_FCP_TARGET;
+                       port->rport =
+                               fc_remote_port_add(adapter->scsi_host, 0, &ids);
+                       if (!port->rport)
+                               ZFCP_LOG_NORMAL("failed registration of rport"
+                                               "(adapter %s, wwpn=0x%016Lx)\n",
+                                               zfcp_get_busid_by_port(port),
+                                               port->wwpn);
+               }
                 zfcp_port_put(port);
                 break;
         case ZFCP_ERP_ACTION_REOPEN_ADAPTER:
diff --git a/drivers/s390/scsi/zfcp_ext.h b/drivers/s390/scsi/zfcp_ext.h

index 42df7e57eeae48f1414e0948b3720e8b30e20d16..cd98a2de9f8fa4d997ae12d3058d186efe9e2b74 100644 (file)
--- a/drivers/s390/scsi/zfcp_ext.h
+++ b/drivers/s390/scsi/zfcp_ext.h
@@ -143,6 +143,8 @@ extern int zfcp_scsi_command_async(struct zfcp_adapter *,struct zfcp_unit *,
                                    struct scsi_cmnd *, struct timer_list *);
  extern int zfcp_scsi_command_sync(struct zfcp_unit *, struct scsi_cmnd *,
                                   struct timer_list *);
+extern void zfcp_set_fc_host_attrs(struct zfcp_adapter *);
+extern void zfcp_set_fc_rport_attrs(struct zfcp_port *);
  extern struct scsi_transport_template *zfcp_transport_template;
  extern struct fc_function_template zfcp_transport_functions;
  
diff --git a/drivers/s390/scsi/zfcp_fsf.c b/drivers/s390/scsi/zfcp_fsf.c

index 0d9f20edc490f8c5702e46953f8c357796eaf46d..c007b6424e746cd63854ca8b69bfefbddcfc5802 100644 (file)
--- a/drivers/s390/scsi/zfcp_fsf.c
+++ b/drivers/s390/scsi/zfcp_fsf.c
@@ -2062,6 +2062,7 @@ zfcp_fsf_exchange_config_evaluate(struct zfcp_fsf_req *fsf_req, int xchg_ok)
                 zfcp_erp_adapter_shutdown(adapter, 0);
                 return -EIO;
         }
+       zfcp_set_fc_host_attrs(adapter);
         return 0;
  }
  
diff --git a/drivers/s390/scsi/zfcp_scsi.c b/drivers/s390/scsi/zfcp_scsi.c

index b61d309352c38e5ae02536e271784c030d5fb4e2..31a76065cf2838104e1ace600eea2b39775e337f 100644 (file)
--- a/drivers/s390/scsi/zfcp_scsi.c
+++ b/drivers/s390/scsi/zfcp_scsi.c
@@ -389,7 +389,7 @@ zfcp_unit_lookup(struct zfcp_adapter *adapter, int channel, scsi_id_t id,
         struct zfcp_unit *unit, *retval = NULL;
  
         list_for_each_entry(port, &adapter->port_list_head, list) {
-               if (id != port->scsi_id)
+               if (!port->rport || (id != port->rport->scsi_target_id))
                         continue;
                 list_for_each_entry(unit, &port->unit_list_head, list) {
                         if (lun == unit->scsi_lun) {
@@ -408,7 +408,7 @@ zfcp_port_lookup(struct zfcp_adapter *adapter, int channel, scsi_id_t id)
         struct zfcp_port *port;
  
         list_for_each_entry(port, &adapter->port_list_head, list) {
-               if (id == port->scsi_id)
+               if (port->rport && (id == port->rport->scsi_target_id))
                         return port;
         }
         return (struct zfcp_port *) NULL;
@@ -634,7 +634,6 @@ zfcp_scsi_eh_device_reset_handler(struct scsi_cmnd *scpnt)
  {
         int retval;
         struct zfcp_unit *unit = (struct zfcp_unit *) scpnt->device->hostdata;
-       struct Scsi_Host *scsi_host = scpnt->device->host;
  
         if (!unit) {
                 ZFCP_LOG_NORMAL("bug: Tried reset for nonexistent unit\n");
@@ -729,7 +728,6 @@ zfcp_scsi_eh_bus_reset_handler(struct scsi_cmnd *scpnt)
  {
         int retval = 0;
         struct zfcp_unit *unit;
-       struct Scsi_Host *scsi_host = scpnt->device->host;
  
         unit = (struct zfcp_unit *) scpnt->device->hostdata;
         ZFCP_LOG_NORMAL("bus reset because of problems with "
@@ -753,7 +751,6 @@ zfcp_scsi_eh_host_reset_handler(struct scsi_cmnd *scpnt)
  {
         int retval = 0;
         struct zfcp_unit *unit;
-       struct Scsi_Host *scsi_host = scpnt->device->host;
  
         unit = (struct zfcp_unit *) scpnt->device->hostdata;
         ZFCP_LOG_NORMAL("host reset because of problems with "
@@ -833,6 +830,7 @@ zfcp_adapter_scsi_unregister(struct zfcp_adapter *adapter)
         shost = adapter->scsi_host;
         if (!shost)
                 return;
+       fc_remove_host(shost);
         scsi_remove_host(shost);
         scsi_host_put(shost);
         adapter->scsi_host = NULL;
@@ -906,6 +904,18 @@ zfcp_get_node_name(struct scsi_target *starget)
         read_unlock_irqrestore(&zfcp_data.config_lock, flags);
  }
  
+void
+zfcp_set_fc_host_attrs(struct zfcp_adapter *adapter)
+{
+       struct Scsi_Host *shost = adapter->scsi_host;
+
+       fc_host_node_name(shost) = adapter->wwnn;
+       fc_host_port_name(shost) = adapter->wwpn;
+       strncpy(fc_host_serial_number(shost), adapter->serial_number,
+                min(FC_SERIAL_NUMBER_SIZE, 32));
+       fc_host_supported_classes(shost) = FC_COS_CLASS2 | FC_COS_CLASS3;
+}
+
  struct fc_function_template zfcp_transport_functions = {
         .get_starget_port_id = zfcp_get_port_id,
         .get_starget_port_name = zfcp_get_port_name,
@@ -913,6 +923,11 @@ struct fc_function_template zfcp_transport_functions = {
         .show_starget_port_id = 1,
         .show_starget_port_name = 1,
         .show_starget_node_name = 1,
+       .show_rport_supported_classes = 1,
+       .show_host_node_name = 1,
+       .show_host_port_name = 1,
+       .show_host_supported_classes = 1,
+       .show_host_serial_number = 1,
  };
  
  /**
diff --git a/drivers/s390/scsi/zfcp_sysfs_port.c b/drivers/s390/scsi/zfcp_sysfs_port.c

index 7a84c7d474d961e25985a9cfefa8417ad0e7d6f1..c55e82d91deb103133047d823ce3b122de097271 100644 (file)
--- a/drivers/s390/scsi/zfcp_sysfs_port.c
+++ b/drivers/s390/scsi/zfcp_sysfs_port.c
@@ -67,7 +67,6 @@ static DEVICE_ATTR(_name, S_IRUGO, zfcp_sysfs_port_##_name##_show, NULL);
  ZFCP_DEFINE_PORT_ATTR(status, "0x%08x\n", atomic_read(&port->status));
  ZFCP_DEFINE_PORT_ATTR(wwnn, "0x%016llx\n", port->wwnn);
  ZFCP_DEFINE_PORT_ATTR(d_id, "0x%06x\n", port->d_id);
-ZFCP_DEFINE_PORT_ATTR(scsi_id, "0x%x\n", port->scsi_id);
  ZFCP_DEFINE_PORT_ATTR(in_recovery, "%d\n", atomic_test_mask
                       (ZFCP_STATUS_COMMON_ERP_INUSE, &port->status));
  ZFCP_DEFINE_PORT_ATTR(access_denied, "%d\n", atomic_test_mask
@@ -263,7 +262,6 @@ static struct attribute_group zfcp_port_common_attr_group = {
  static struct attribute *zfcp_port_no_ns_attrs[] = {
         &dev_attr_unit_add.attr,
         &dev_attr_unit_remove.attr,
-       &dev_attr_scsi_id.attr,
         NULL
  };
  
diff --git a/drivers/sbus/char/bbc_envctrl.c b/drivers/sbus/char/bbc_envctrl.c

index b8a2c7353b0a5f3f8d0b7e755fbbcd23430f7336..d44205d52bf3ce7a69a0da0678c0fe0d377f10d9 100644 (file)
--- a/drivers/sbus/char/bbc_envctrl.c
+++ b/drivers/sbus/char/bbc_envctrl.c
@@ -7,6 +7,7 @@
  #define __KERNEL_SYSCALLS__
  
  #include <linux/kernel.h>
+#include <linux/kthread.h>
  #include <linux/sched.h>
  #include <linux/slab.h>
  #include <linux/delay.h>
@@ -459,10 +460,6 @@ static struct task_struct *kenvctrld_task;
  
  static int kenvctrld(void *__unused)
  {
-       daemonize("kenvctrld");
-       allow_signal(SIGKILL);
-       kenvctrld_task = current;
-
         printk(KERN_INFO "bbc_envctrl: kenvctrld starting...\n");
         last_warning_jiffies = jiffies - WARN_INTERVAL;
         for (;;) {
@@ -470,7 +467,7 @@ static int kenvctrld(void *__unused)
                 struct bbc_fan_control *fp;
  
                 msleep_interruptible(POLL_INTERVAL);
-               if (signal_pending(current))
+               if (kthread_should_stop())
                         break;
  
                 for (tp = all_bbc_temps; tp; tp = tp->next) {
@@ -577,7 +574,6 @@ int bbc_envctrl_init(void)
         int temp_index = 0;
         int fan_index = 0;
         int devidx = 0;
-       int err = 0;
  
         while ((echild = bbc_i2c_getdev(devidx++)) != NULL) {
                 if (!strcmp(echild->prom_name, "temperature"))
@@ -585,9 +581,13 @@ int bbc_envctrl_init(void)
                 if (!strcmp(echild->prom_name, "fan-control"))
                         attach_one_fan(echild, fan_index++);
         }
-       if (temp_index != 0 && fan_index != 0)
-               err = kernel_thread(kenvctrld, NULL, CLONE_FS | CLONE_FILES);
-       return err;
+       if (temp_index != 0 && fan_index != 0) {
+               kenvctrld_task = kthread_run(kenvctrld, NULL, "kenvctrld");
+               if (IS_ERR(kenvctrld_task))
+                       return PTR_ERR(kenvctrld_task);
+       }
+
+       return 0;
  }
  
  static void destroy_one_temp(struct bbc_cpu_temperature *tp)
@@ -607,26 +607,7 @@ void bbc_envctrl_cleanup(void)
         struct bbc_cpu_temperature *tp;
         struct bbc_fan_control *fp;
  
-       if (kenvctrld_task != NULL) {
-               force_sig(SIGKILL, kenvctrld_task);
-               for (;;) {
-                       struct task_struct *p;
-                       int found = 0;
-
-                       read_lock(&tasklist_lock);
-                       for_each_process(p) {
-                               if (p == kenvctrld_task) {
-                                       found = 1;
-                                       break;
-                               }
-                       }
-                       read_unlock(&tasklist_lock);
-                       if (!found)
-                               break;
-                       msleep(1000);
-               }
-               kenvctrld_task = NULL;
-       }
+       kthread_stop(kenvctrld_task);
  
         tp = all_bbc_temps;
         while (tp != NULL) {
diff --git a/drivers/sbus/char/envctrl.c b/drivers/sbus/char/envctrl.c

index 9a8c572554f5e6e9d630058656b21300e9bec779..d765cc1bf060bc30d17e1e3ba78d01429dedf457 100644 (file)
--- a/drivers/sbus/char/envctrl.c
+++ b/drivers/sbus/char/envctrl.c
@@ -24,6 +24,7 @@
  #include <linux/config.h>
  #include <linux/module.h>
  #include <linux/sched.h>
+#include <linux/kthread.h>
  #include <linux/errno.h>
  #include <linux/delay.h>
  #include <linux/ioport.h>
@@ -1010,16 +1011,13 @@ static int kenvctrld(void *__unused)
  
         poll_interval = 5000; /* TODO env_mon_interval */
  
-       daemonize("kenvctrld");
-       allow_signal(SIGKILL);
-
-       kenvctrld_task = current;
-
         printk(KERN_INFO "envctrl: %s starting...\n", current->comm);
         for (;;) {
-               if(msleep_interruptible(poll_interval))
-                       break;
+               msleep_interruptible(poll_interval);
  
+               if (kthread_should_stop())
+                       break;
+               
                 for (whichcpu = 0; whichcpu < ENVCTRL_MAX_CPU; ++whichcpu) {
                         if (0 < envctrl_read_cpu_info(whichcpu, cputemp,
                                                       ENVCTRL_CPUTEMP_MON,
@@ -1041,7 +1039,6 @@ static int kenvctrld(void *__unused)
  
  static int __init envctrl_init(void)
  {
-#ifdef CONFIG_PCI
         struct linux_ebus *ebus = NULL;
         struct linux_ebus_device *edev = NULL;
         struct linux_ebus_child *edev_child = NULL;
@@ -1118,9 +1115,11 @@ done:
                         i2c_childlist[i].addr, (0 == i) ? ("\n") : (" "));
         }
  
-       err = kernel_thread(kenvctrld, NULL, CLONE_FS | CLONE_FILES);
-       if (err < 0)
+       kenvctrld_task = kthread_run(kenvctrld, NULL, "kenvctrld");
+       if (IS_ERR(kenvctrld_task)) {
+               err = PTR_ERR(kenvctrld_task);
                 goto out_deregister;
+       }
  
         return 0;
  
@@ -1133,37 +1132,13 @@ out_iounmap:
                         kfree(i2c_childlist[i].tables);
         }
         return err;
-#else
-       return -ENODEV;
-#endif
  }
  
  static void __exit envctrl_cleanup(void)
  {
         int i;
  
-       if (NULL != kenvctrld_task) {
-               force_sig(SIGKILL, kenvctrld_task);
-               for (;;) {
-                       struct task_struct *p;
-                       int found = 0;
-
-                       read_lock(&tasklist_lock);
-                       for_each_process(p) {
-                               if (p == kenvctrld_task) {
-                                       found = 1;
-                                       break;
-                               }
-                       }
-                       read_unlock(&tasklist_lock);
-
-                       if (!found)
-                               break;
-
-                       msleep(1000);
-               }
-               kenvctrld_task = NULL;
-       }
+       kthread_stop(kenvctrld_task);
  
         iounmap(i2c);
         misc_deregister(&envctrl_dev);
diff --git a/drivers/sbus/char/vfc.h b/drivers/sbus/char/vfc.h

index e56a43af0f62ce19d91646dff5d3a382be233fa0..a7782e7da42ee9ce843a9a3ff79ac1a545569a8c 100644 (file)
--- a/drivers/sbus/char/vfc.h
+++ b/drivers/sbus/char/vfc.h
@@ -129,8 +129,6 @@ struct vfc_dev {
         struct vfc_regs *phys_regs;
         unsigned int control_reg;
         struct semaphore device_lock_sem;
-       struct timer_list poll_timer;
-       wait_queue_head_t poll_wait;
         int instance;
         int busy;
         unsigned long which_io;
diff --git a/drivers/sbus/char/vfc_dev.c b/drivers/sbus/char/vfc_dev.c

index 86ce54130954979c9eb4abb7632712805e4c13c7..7a103698fa3c3a3ea1e9023cbb55b5d7f4b8e1ab 100644 (file)
--- a/drivers/sbus/char/vfc_dev.c
+++ b/drivers/sbus/char/vfc_dev.c
@@ -137,7 +137,6 @@ int init_vfc_devstruct(struct vfc_dev *dev, int instance)
         dev->instance=instance;
         init_MUTEX(&dev->device_lock_sem);
         dev->control_reg=0;
-       init_waitqueue_head(&dev->poll_wait);
         dev->busy=0;
         return 0;
  }
diff --git a/drivers/sbus/char/vfc_i2c.c b/drivers/sbus/char/vfc_i2c.c

index 1faf1e75f71fa34b80c3d3f2e601b9a86d54841e..739cad9b19a1f55a98d9c4be5a5d3a08cf43a026 100644 (file)
--- a/drivers/sbus/char/vfc_i2c.c
+++ b/drivers/sbus/char/vfc_i2c.c
@@ -79,25 +79,10 @@ int vfc_pcf8584_init(struct vfc_dev *dev)
         return 0;
  }
  
-void vfc_i2c_delay_wakeup(struct vfc_dev *dev) 
-{
-       /* Used to profile code and eliminate too many delays */
-       VFC_I2C_DEBUG_PRINTK(("vfc%d: Delaying\n", dev->instance));
-       wake_up(&dev->poll_wait);
-}
-
  void vfc_i2c_delay_no_busy(struct vfc_dev *dev, unsigned long usecs) 
  {
-       DEFINE_WAIT(wait);
-       init_timer(&dev->poll_timer);
-       dev->poll_timer.expires = jiffies + usecs_to_jiffies(usecs);
-       dev->poll_timer.data=(unsigned long)dev;
-       dev->poll_timer.function=(void *)(unsigned long)vfc_i2c_delay_wakeup;
-       add_timer(&dev->poll_timer);
-       prepare_to_wait(&dev->poll_wait, &wait, TASK_UNINTERRUPTIBLE);
-       schedule();
-       del_timer(&dev->poll_timer);
-       finish_wait(&dev->poll_wait, &wait);
+       set_current_state(TASK_UNINTERRUPTIBLE);
+       schedule_timeout(usecs_to_jiffies(usecs));
  }
  
  void inline vfc_i2c_delay(struct vfc_dev *dev) 
diff --git a/drivers/scsi/Kconfig b/drivers/scsi/Kconfig

index 96df148ed96967f06e544893ae415ba2bb18699d..12c208fb18c516de702cb575cf82c8348480b53c 100644 (file)
--- a/drivers/scsi/Kconfig
+++ b/drivers/scsi/Kconfig
@@ -424,7 +424,7 @@ config SCSI_IN2000
  source "drivers/scsi/megaraid/Kconfig.megaraid"
  
  config SCSI_SATA
-       bool "Serial ATA (SATA) support"
+       tristate "Serial ATA (SATA) support"
         depends on SCSI
         help
           This driver family supports Serial ATA host controllers
@@ -1696,7 +1696,7 @@ config TT_DMA_EMUL
  
  config MAC_SCSI
         bool "Macintosh NCR5380 SCSI"
-       depends on MAC && SCSI
+       depends on MAC && SCSI=y
         help
           This is the NCR 5380 SCSI controller included on most of the 68030
           based Macintoshes.  If you have one of these say Y and read the
@@ -1717,7 +1717,7 @@ config SCSI_MAC_ESP
  
  config MVME147_SCSI
         bool "WD33C93 SCSI driver for MVME147"
-       depends on MVME147 && SCSI
+       depends on MVME147 && SCSI=y
         help
           Support for the on-board SCSI controller on the Motorola MVME147
           single-board computer.
@@ -1758,7 +1758,7 @@ config SUN3_SCSI
  
  config SUN3X_ESP
         bool "Sun3x ESP SCSI"
-       depends on SUN3X && SCSI
+       depends on SUN3X && SCSI=y
         help
           The ESP was an on-board SCSI controller used on Sun 3/80
           machines.  Say Y here to compile in support for it.
diff --git a/drivers/scsi/aacraid/aacraid.h b/drivers/scsi/aacraid/aacraid.h

index 3a11a536c0dab39e351f8db5e969f1e6c72220c8..4ab07861b457fd80c1af736cb17dd633119228e6 100644 (file)
--- a/drivers/scsi/aacraid/aacraid.h
+++ b/drivers/scsi/aacraid/aacraid.h
@@ -15,11 +15,7 @@
  #define AAC_MAX_LUN            (8)
  
  #define AAC_MAX_HOSTPHYSMEMPAGES (0xfffff)
-/*
- *  max_sectors is an unsigned short, otherwise limit is 0x100000000 / 512
- * Linux has starvation problems if we permit larger than 4MB I/O ...
- */
-#define AAC_MAX_32BIT_SGBCOUNT ((unsigned short)8192)
+#define AAC_MAX_32BIT_SGBCOUNT ((unsigned short)512)
  
  /*
   * These macros convert from physical channels to virtual channels
diff --git a/drivers/scsi/aacraid/linit.c b/drivers/scsi/aacraid/linit.c

index c1a4f978fcbad1156f9badd33af44f122e2bf5ed..562da90480a1426346ceba19efe54108e4043826 100644 (file)
--- a/drivers/scsi/aacraid/linit.c
+++ b/drivers/scsi/aacraid/linit.c
@@ -374,7 +374,8 @@ static int aac_slave_configure(struct scsi_device *sdev)
         else
                 scsi_adjust_queue_depth(sdev, 0, 1);
  
-       if (host->max_sectors < AAC_MAX_32BIT_SGBCOUNT)
+       if (!(((struct aac_dev *)host->hostdata)->adapter_info.options
+         & AAC_OPT_NEW_COMM))
                 blk_queue_max_segment_size(sdev->request_queue, 65536);
  
         return 0;
diff --git a/drivers/scsi/ahci.c b/drivers/scsi/ahci.c

index c5623694d10f56a25ddc1c6f6073deaa3f250232..179c95c878acd73f2f24cec0e7504ba599c22c57 100644 (file)
--- a/drivers/scsi/ahci.c
+++ b/drivers/scsi/ahci.c
@@ -1,26 +1,34 @@
  /*
   *  ahci.c - AHCI SATA support
   *
- *  Copyright 2004 Red Hat, Inc.
+ *  Maintained by:  Jeff Garzik <jgarzik@pobox.com>
+ *                 Please ALWAYS copy linux-ide@vger.kernel.org
+ *                 on emails.
   *
- *  The contents of this file are subject to the Open
- *  Software License version 1.1 that can be found at
- *  http://www.opensource.org/licenses/osl-1.1.txt and is included herein
- *  by reference.
+ *  Copyright 2004-2005 Red Hat, Inc.
   *
- *  Alternatively, the contents of this file may be used under the terms
- *  of the GNU General Public License version 2 (the "GPL") as distributed
- *  in the kernel source COPYING file, in which case the provisions of
- *  the GPL are applicable instead of the above.  If you wish to allow
- *  the use of your version of this file only under the terms of the
- *  GPL and not to allow others to use your version of this file under
- *  the OSL, indicate your decision by deleting the provisions above and
- *  replace them with the notice and other provisions required by the GPL.
- *  If you do not delete the provisions above, a recipient may use your
- *  version of this file under either the OSL or the GPL.
   *
- * Version 1.0 of the AHCI specification:
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2, or (at your option)
+ *  any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; see the file COPYING.  If not, write to
+ *  the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
+ *
+ *
+ * libata documentation is available via 'make {ps|pdf}docs',
+ * as Documentation/DocBook/libata.*
+ *
+ * AHCI hardware documentation:
   * http://www.intel.com/technology/serialata/pdf/rev1_0.pdf
+ * http://www.intel.com/technology/serialata/pdf/rev1_1.pdf
   *
   */
  
@@ -269,6 +277,8 @@ static struct pci_device_id ahci_pci_tbl[] = {
           board_ahci }, /* ESB2 */
         { PCI_VENDOR_ID_INTEL, 0x2683, PCI_ANY_ID, PCI_ANY_ID, 0, 0,
           board_ahci }, /* ESB2 */
+       { PCI_VENDOR_ID_INTEL, 0x27c6, PCI_ANY_ID, PCI_ANY_ID, 0, 0,
+         board_ahci }, /* ICH7-M DH */
         { }     /* terminate list */
  };
  
@@ -584,12 +594,16 @@ static void ahci_intr_error(struct ata_port *ap, u32 irq_stat)
  
  static void ahci_eng_timeout(struct ata_port *ap)
  {
-       void *mmio = ap->host_set->mmio_base;
+       struct ata_host_set *host_set = ap->host_set;
+       void *mmio = host_set->mmio_base;
         void *port_mmio = ahci_port_base(mmio, ap->port_no);
         struct ata_queued_cmd *qc;
+       unsigned long flags;
  
         DPRINTK("ENTER\n");
  
+       spin_lock_irqsave(&host_set->lock, flags);
+
         ahci_intr_error(ap, readl(port_mmio + PORT_IRQ_STAT));
  
         qc = ata_qc_from_tag(ap, ap->active_tag);
@@ -607,6 +621,7 @@ static void ahci_eng_timeout(struct ata_port *ap)
                 ata_qc_complete(qc, ATA_ERR);
         }
  
+       spin_unlock_irqrestore(&host_set->lock, flags);
  }
  
  static inline int ahci_host_intr(struct ata_port *ap, struct ata_queued_cmd *qc)
@@ -696,9 +711,6 @@ static int ahci_qc_issue(struct ata_queued_cmd *qc)
         struct ata_port *ap = qc->ap;
         void *port_mmio = (void *) ap->ioaddr.cmd_addr;
  
-       writel(1, port_mmio + PORT_SCR_ACT);
-       readl(port_mmio + PORT_SCR_ACT);        /* flush */
-
         writel(1, port_mmio + PORT_CMD_ISSUE);
         readl(port_mmio + PORT_CMD_ISSUE);      /* flush */
  
@@ -1105,6 +1117,7 @@ MODULE_AUTHOR("Jeff Garzik");
  MODULE_DESCRIPTION("AHCI SATA low-level driver");
  MODULE_LICENSE("GPL");
  MODULE_DEVICE_TABLE(pci, ahci_pci_tbl);
+MODULE_VERSION(DRV_VERSION);
  
  module_init(ahci_init);
  module_exit(ahci_exit);
diff --git a/drivers/scsi/aic7xxx/aic7xxx_osm.c b/drivers/scsi/aic7xxx/aic7xxx_osm.c

index 116d0f51ca2cae984bc50d09c0d13931f6cbcebf..687f19e9cf03a179d6671163509176c3350fdc1f 100644 (file)
--- a/drivers/scsi/aic7xxx/aic7xxx_osm.c
+++ b/drivers/scsi/aic7xxx/aic7xxx_osm.c
@@ -1264,14 +1264,12 @@ ahc_platform_set_tags(struct ahc_softc *ahc, struct ahc_devinfo *devinfo,
         }
         switch ((dev->flags & (AHC_DEV_Q_BASIC|AHC_DEV_Q_TAGGED))) {
         case AHC_DEV_Q_BASIC:
-               scsi_adjust_queue_depth(sdev,
-                                       MSG_SIMPLE_TASK,
-                                       dev->openings + dev->active);
+               scsi_set_tag_type(sdev, MSG_SIMPLE_TAG);
+               scsi_activate_tcq(sdev, dev->openings + dev->active);
                 break;
         case AHC_DEV_Q_TAGGED:
-               scsi_adjust_queue_depth(sdev,
-                                       MSG_ORDERED_TASK,
-                                       dev->openings + dev->active);
+               scsi_set_tag_type(sdev, MSG_ORDERED_TAG);
+               scsi_activate_tcq(sdev, dev->openings + dev->active);
                 break;
         default:
                 /*
@@ -1280,9 +1278,7 @@ ahc_platform_set_tags(struct ahc_softc *ahc, struct ahc_devinfo *devinfo,
                  * serially on the controller/device.  This should
                  * remove some latency.
                  */
-               scsi_adjust_queue_depth(sdev,
-                                       /*NON-TAGGED*/0,
-                                       /*queue depth*/2);
+               scsi_deactivate_tcq(sdev, 2);
                 break;
         }
  }
@@ -1635,9 +1631,9 @@ ahc_send_async(struct ahc_softc *ahc, char channel,
                 spi_period(starget) = tinfo->curr.period;
                 spi_width(starget) = tinfo->curr.width;
                 spi_offset(starget) = tinfo->curr.offset;
-               spi_dt(starget) = tinfo->curr.ppr_options & MSG_EXT_PPR_DT_REQ;
-               spi_qas(starget) = tinfo->curr.ppr_options & MSG_EXT_PPR_QAS_REQ;
-               spi_iu(starget) = tinfo->curr.ppr_options & MSG_EXT_PPR_IU_REQ;
+               spi_dt(starget) = tinfo->curr.ppr_options & MSG_EXT_PPR_DT_REQ ? 1 : 0;
+               spi_qas(starget) = tinfo->curr.ppr_options & MSG_EXT_PPR_QAS_REQ ? 1 : 0;
+               spi_iu(starget) = tinfo->curr.ppr_options & MSG_EXT_PPR_IU_REQ ? 1 : 0;
                 spi_display_xfer_agreement(starget);
                 break;
         }
@@ -2429,12 +2425,14 @@ static void ahc_linux_set_dt(struct scsi_target *starget, int dt)
         unsigned int ppr_options = tinfo->goal.ppr_options
                 & ~MSG_EXT_PPR_DT_REQ;
         unsigned int period = tinfo->goal.period;
+       unsigned int width = tinfo->goal.width;
         unsigned long flags;
         struct ahc_syncrate *syncrate;
  
         if (dt) {
-               period = 9;     /* 12.5ns is the only period valid for DT */
                 ppr_options |= MSG_EXT_PPR_DT_REQ;
+               if (!width)
+                       ahc_linux_set_width(starget, 1);
         } else if (period == 9)
                 period = 10;    /* if resetting DT, period must be >= 25ns */
  
diff --git a/drivers/scsi/aic7xxx/aicasm/aicasm.c b/drivers/scsi/aic7xxx/aicasm/aicasm.c

index c34639481904bf9e023992c295db5fe88b26909c..f936b691232f90d4d020998d8a3420508b36cc85 100644 (file)
--- a/drivers/scsi/aic7xxx/aicasm/aicasm.c
+++ b/drivers/scsi/aic7xxx/aicasm/aicasm.c
@@ -369,7 +369,7 @@ output_code()
  
                 fprintf(ofile, "%s\t0x%02x, 0x%02x, 0x%02x, 0x%02x",
                         cur_instr == STAILQ_FIRST(&seq_program) ? "" : ",\n",
-#if BYTE_ORDER == LITTLE_ENDIAN
+#ifdef __LITTLE_ENDIAN
                         cur_instr->format.bytes[0],
                         cur_instr->format.bytes[1],
                         cur_instr->format.bytes[2],
@@ -613,7 +613,7 @@ output_listing(char *ifilename)
                                 line++;
                 }
                 fprintf(listfile, "%03x %02x%02x%02x%02x", instrptr,
-#if BYTE_ORDER == LITTLE_ENDIAN
+#ifdef __LITTLE_ENDIAN
                         cur_instr->format.bytes[0],
                         cur_instr->format.bytes[1],
                         cur_instr->format.bytes[2],
diff --git a/drivers/scsi/aic7xxx/aicasm/aicasm_insformat.h b/drivers/scsi/aic7xxx/aicasm/aicasm_insformat.h

index 3e80f07df49c5a4a3902a516272ea35842a53adf..e64f802bbaaa165884bafc117082eab38c23f701 100644 (file)
--- a/drivers/scsi/aic7xxx/aicasm/aicasm_insformat.h
+++ b/drivers/scsi/aic7xxx/aicasm/aicasm_insformat.h
@@ -42,8 +42,10 @@
   * $FreeBSD$
   */
  
+#include <asm/byteorder.h>
+
  struct ins_format1 {
-#if BYTE_ORDER == LITTLE_ENDIAN
+#ifdef __LITTLE_ENDIAN
         uint32_t        immediate       : 8,
                         source          : 9,
                         destination     : 9,
@@ -61,7 +63,7 @@ struct ins_format1 {
  };
  
  struct ins_format2 {
-#if BYTE_ORDER == LITTLE_ENDIAN
+#ifdef __LITTLE_ENDIAN
         uint32_t        shift_control   : 8,
                         source          : 9,
                         destination     : 9,
@@ -79,7 +81,7 @@ struct ins_format2 {
  };
  
  struct ins_format3 {
-#if BYTE_ORDER == LITTLE_ENDIAN
+#ifdef __LITTLE_ENDIAN
         uint32_t        immediate       : 8,
                         source          : 9,
                         address         : 10,
diff --git a/drivers/scsi/arm/Kconfig b/drivers/scsi/arm/Kconfig

index 54b32868aaf7aa6835cae3a183d115a6e9b76f8c..13f23043c8a370f507fc5e8599e5735a4c265c6f 100644 (file)
--- a/drivers/scsi/arm/Kconfig
+++ b/drivers/scsi/arm/Kconfig
@@ -3,7 +3,7 @@
  #
  config SCSI_ACORNSCSI_3
         tristate "Acorn SCSI card (aka30) support"
-       depends on ARCH_ACORN && SCSI
+       depends on ARCH_ACORN && SCSI && BROKEN
         help
           This enables support for the Acorn SCSI card (aka30). If you have an
           Acorn system with one of these, say Y. If unsure, say N.
diff --git a/drivers/scsi/ata_piix.c b/drivers/scsi/ata_piix.c

index a2cfade2c1c655b70b0078cdcfbbd1dd050831df..fb28c1261848c5f0ee66fbbfa09b22dec095456a 100644 (file)
--- a/drivers/scsi/ata_piix.c
+++ b/drivers/scsi/ata_piix.c
@@ -1,24 +1,42 @@
  /*
-
-    ata_piix.c - Intel PATA/SATA controllers
-
-    Maintained by:  Jeff Garzik <jgarzik@pobox.com>
-                   Please ALWAYS copy linux-ide@vger.kernel.org
-                   on emails.
-
-
-       Copyright 2003-2004 Red Hat Inc
-       Copyright 2003-2004 Jeff Garzik
-
-
-       Copyright header from piix.c:
-
-    Copyright (C) 1998-1999 Andrzej Krzysztofowicz, Author and Maintainer
-    Copyright (C) 1998-2000 Andre Hedrick <andre@linux-ide.org>
-    Copyright (C) 2003 Red Hat Inc <alan@redhat.com>
-
-    May be copied or modified under the terms of the GNU General Public License
-
+ *    ata_piix.c - Intel PATA/SATA controllers
+ *
+ *    Maintained by:  Jeff Garzik <jgarzik@pobox.com>
+ *                 Please ALWAYS copy linux-ide@vger.kernel.org
+ *                 on emails.
+ *
+ *
+ *     Copyright 2003-2005 Red Hat Inc
+ *     Copyright 2003-2005 Jeff Garzik
+ *
+ *
+ *     Copyright header from piix.c:
+ *
+ *  Copyright (C) 1998-1999 Andrzej Krzysztofowicz, Author and Maintainer
+ *  Copyright (C) 1998-2000 Andre Hedrick <andre@linux-ide.org>
+ *  Copyright (C) 2003 Red Hat Inc <alan@redhat.com>
+ *
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2, or (at your option)
+ *  any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; see the file COPYING.  If not, write to
+ *  the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
+ *
+ *
+ *  libata documentation is available via 'make {ps|pdf}docs',
+ *  as Documentation/DocBook/libata.*
+ *
+ *  Hardware documentation available at http://developer.intel.com/
+ *
   */
  
  #include <linux/kernel.h>
@@ -32,7 +50,7 @@
  #include <linux/libata.h>
  
  #define DRV_NAME       "ata_piix"
-#define DRV_VERSION    "1.03"
+#define DRV_VERSION    "1.04"
  
  enum {
         PIIX_IOCFG              = 0x54, /* IDE I/O configuration register */
@@ -629,13 +647,13 @@ static int piix_init_one (struct pci_dev *pdev, const struct pci_device_id *ent)
         port_info[1] = NULL;
  
         if (port_info[0]->host_flags & PIIX_FLAG_AHCI) {
-               u8 tmp;
-               pci_read_config_byte(pdev, PIIX_SCC, &tmp);
-               if (tmp == PIIX_AHCI_DEVICE) {
-                       int rc = piix_disable_ahci(pdev);
-                       if (rc)
-                           return rc;
-               }
+               u8 tmp;
+               pci_read_config_byte(pdev, PIIX_SCC, &tmp);
+               if (tmp == PIIX_AHCI_DEVICE) {
+                       int rc = piix_disable_ahci(pdev);
+                       if (rc)
+                               return rc;
+               }
         }
  
         if (port_info[0]->host_flags & PIIX_FLAG_COMBINED) {
diff --git a/drivers/scsi/dc395x.c b/drivers/scsi/dc395x.c

index 929170dcd3cbda7bf739ca8d5dbe4b68379cd453..600ba120286468e63f0df8240e08613dee24fdf5 100644 (file)
--- a/drivers/scsi/dc395x.c
+++ b/drivers/scsi/dc395x.c
@@ -183,7 +183,7 @@
   * cross a page boundy.
   */
  #define SEGMENTX_LEN   (sizeof(struct SGentry)*DC395x_MAX_SG_LISTENTRY)
-#define VIRTX_LEN      (sizeof(void *) * DC395x_MAX_SG_LISTENTRY)
+
  
  struct SGentry {
         u32 address;            /* bus! address */
@@ -235,7 +235,6 @@ struct ScsiReqBlk {
         u8 sg_count;                    /* No of HW sg entries for this request */
         u8 sg_index;                    /* Index of HW sg entry for this request */
         u32 total_xfer_length;          /* Total number of bytes remaining to be transfered */
-       void **virt_map;
         unsigned char *virt_addr;       /* Virtual address of current transfer position */
  
         /*
@@ -1022,14 +1021,14 @@ static void build_srb(struct scsi_cmnd *cmd, struct DeviceCtlBlk *dcb,
                         reqlen, cmd->request_buffer, cmd->use_sg,
                         srb->sg_count);
  
+               srb->virt_addr = page_address(sl->page);
                 for (i = 0; i < srb->sg_count; i++) {
-                       u32 seglen = (u32)sg_dma_len(sl + i);
-                       sgp[i].address = (u32)sg_dma_address(sl + i);
+                       u32 busaddr = (u32)sg_dma_address(&sl[i]);
+                       u32 seglen = (u32)sl[i].length;
+                       sgp[i].address = busaddr;
                         sgp[i].length = seglen;
                         srb->total_xfer_length += seglen;
-                       srb->virt_map[i] = kmap(sl[i].page);
                 }
-               srb->virt_addr = srb->virt_map[0];
                 sgp += srb->sg_count - 1;
  
                 /*
@@ -1976,7 +1975,6 @@ static void sg_update_list(struct ScsiReqBlk *srb, u32 left)
         int segment = cmd->use_sg;
         u32 xferred = srb->total_xfer_length - left; /* bytes transfered */
         struct SGentry *psge = srb->segment_x + srb->sg_index;
-       void **virt = srb->virt_map;
  
         dprintkdbg(DBG_0,
                 "sg_update_list: Transfered %i of %i bytes, %i remain\n",
@@ -2016,16 +2014,16 @@ static void sg_update_list(struct ScsiReqBlk *srb, u32 left)
  
         /* We have to walk the scatterlist to find it */
         sg = (struct scatterlist *)cmd->request_buffer;
-       idx = 0;
         while (segment--) {
                 unsigned long mask =
                     ~((unsigned long)sg->length - 1) & PAGE_MASK;
                 if ((sg_dma_address(sg) & mask) == (psge->address & mask)) {
-                       srb->virt_addr = virt[idx] + (psge->address & ~PAGE_MASK);
+                       srb->virt_addr = (page_address(sg->page)
+                                          + psge->address -
+                                          (psge->address & PAGE_MASK));
                         return;
                 }
                 ++sg;
-               ++idx;
         }
  
         dprintkl(KERN_ERR, "sg_update_list: sg_to_virt failed\n");
@@ -2151,7 +2149,7 @@ static void data_out_phase0(struct AdapterCtlBlk *acb, struct ScsiReqBlk *srb,
                                 DC395x_read32(acb, TRM_S1040_DMA_CXCNT));
                 }
                 /*
-                * calculate all the residue data that not yet transfered
+                * calculate all the residue data that not yet tranfered
                  * SCSI transfer counter + left in SCSI FIFO data
                  *
                  * .....TRM_S1040_SCSI_COUNTER (24bits)
@@ -3269,7 +3267,6 @@ static void pci_unmap_srb(struct AdapterCtlBlk *acb, struct ScsiReqBlk *srb)
         struct scsi_cmnd *cmd = srb->cmd;
         enum dma_data_direction dir = cmd->sc_data_direction;
         if (cmd->use_sg && dir != PCI_DMA_NONE) {
-               int i;
                 /* unmap DC395x SG list */
                 dprintkdbg(DBG_SG, "pci_unmap_srb: list=%08x(%05x)\n",
                         srb->sg_bus_addr, SEGMENTX_LEN);
@@ -3279,8 +3276,6 @@ static void pci_unmap_srb(struct AdapterCtlBlk *acb, struct ScsiReqBlk *srb)
                 dprintkdbg(DBG_SG, "pci_unmap_srb: segs=%i buffer=%p\n",
                         cmd->use_sg, cmd->request_buffer);
                 /* unmap the sg segments */
-               for (i = 0; i < srb->sg_count; i++)
-                       kunmap(virt_to_page(srb->virt_map[i]));
                 pci_unmap_sg(acb->dev,
                              (struct scatterlist *)cmd->request_buffer,
                              cmd->use_sg, dir);
@@ -3327,7 +3322,7 @@ static void srb_done(struct AdapterCtlBlk *acb, struct DeviceCtlBlk *dcb,
  
         if (cmd->use_sg) {
                 struct scatterlist* sg = (struct scatterlist *)cmd->request_buffer;
-               ptr = (struct ScsiInqData *)(srb->virt_map[0] + sg->offset);
+               ptr = (struct ScsiInqData *)(page_address(sg->page) + sg->offset);
         } else {
                 ptr = (struct ScsiInqData *)(cmd->request_buffer);
         }
@@ -4262,9 +4257,8 @@ static void adapter_sg_tables_free(struct AdapterCtlBlk *acb)
         const unsigned srbs_per_page = PAGE_SIZE/SEGMENTX_LEN;
  
         for (i = 0; i < DC395x_MAX_SRB_CNT; i += srbs_per_page)
-               kfree(acb->srb_array[i].segment_x);
-
-       vfree(acb->srb_array[0].virt_map);
+               if (acb->srb_array[i].segment_x)
+                       kfree(acb->srb_array[i].segment_x);
  }
  
  
@@ -4280,12 +4274,9 @@ static int __devinit adapter_sg_tables_alloc(struct AdapterCtlBlk *acb)
         int srb_idx = 0;
         unsigned i = 0;
         struct SGentry *ptr;
-       void **virt_array;
  
-       for (i = 0; i < DC395x_MAX_SRB_CNT; i++) {
+       for (i = 0; i < DC395x_MAX_SRB_CNT; i++)
                 acb->srb_array[i].segment_x = NULL;
-               acb->srb_array[i].virt_map = NULL;
-       }
  
         dprintkdbg(DBG_1, "Allocate %i pages for SG tables\n", pages);
         while (pages--) {
@@ -4306,19 +4297,6 @@ static int __devinit adapter_sg_tables_alloc(struct AdapterCtlBlk *acb)
                     ptr + (i * DC395x_MAX_SG_LISTENTRY);
         else
                 dprintkl(KERN_DEBUG, "No space for tmsrb SG table reserved?!\n");
-
-       virt_array = vmalloc((DC395x_MAX_SRB_CNT + 1) * DC395x_MAX_SG_LISTENTRY * sizeof(void*));
-
-       if (!virt_array) {
-               adapter_sg_tables_free(acb);
-               return 1;
-       }
-
-       for (i = 0; i < DC395x_MAX_SRB_CNT + 1; i++) {
-               acb->srb_array[i].virt_map = virt_array;
-               virt_array += DC395x_MAX_SG_LISTENTRY;
-       }
-
         return 0;
  }
  
diff --git a/drivers/scsi/dpt_i2o.c b/drivers/scsi/dpt_i2o.c

index e2370529c63293595e894603c3bdd7aab2dce1b1..7235f94f1191ef06ce816e0539527c0cd11f256d 100644 (file)
--- a/drivers/scsi/dpt_i2o.c
+++ b/drivers/scsi/dpt_i2o.c
@@ -907,9 +907,13 @@ static int adpt_install_hba(struct scsi_host_template* sht, struct pci_dev* pDev
                 raptorFlag = TRUE;
         }
  
-
+       if (pci_request_regions(pDev, "dpt_i2o")) {
+               PERROR("dpti: adpt_config_hba: pci request region failed\n");
+               return -EINVAL;
+       }
         base_addr_virt = ioremap(base_addr0_phys,hba_map0_area_size);
         if (!base_addr_virt) {
+               pci_release_regions(pDev);
                 PERROR("dpti: adpt_config_hba: io remap failed\n");
                 return -EINVAL;
         }
@@ -919,6 +923,7 @@ static int adpt_install_hba(struct scsi_host_template* sht, struct pci_dev* pDev
                 if (!msg_addr_virt) {
                         PERROR("dpti: adpt_config_hba: io remap failed on BAR1\n");
                         iounmap(base_addr_virt);
+                       pci_release_regions(pDev);
                         return -EINVAL;
                 }
         } else {
@@ -932,6 +937,7 @@ static int adpt_install_hba(struct scsi_host_template* sht, struct pci_dev* pDev
                         iounmap(msg_addr_virt);
                 }
                 iounmap(base_addr_virt);
+               pci_release_regions(pDev);
                 return -ENOMEM;
         }
         memset(pHba, 0, sizeof(adpt_hba));
@@ -1027,6 +1033,7 @@ static void adpt_i2o_delete_hba(adpt_hba* pHba)
         up(&adpt_configuration_lock);
  
         iounmap(pHba->base_addr_virt);
+       pci_release_regions(pHba->pDev);
         if(pHba->msg_addr_virt != pHba->base_addr_virt){
                 iounmap(pHba->msg_addr_virt);
         }
diff --git a/drivers/scsi/ibmvscsi/ibmvscsi.c b/drivers/scsi/ibmvscsi/ibmvscsi.c

index fe09d145542ae44f2dae5249d933f00f368f19b4..2cb3c8340ca8599fba18d38c24778781637703a5 100644 (file)
--- a/drivers/scsi/ibmvscsi/ibmvscsi.c
+++ b/drivers/scsi/ibmvscsi/ibmvscsi.c
@@ -1442,7 +1442,7 @@ static int ibmvscsi_remove(struct vio_dev *vdev)
   */
  static struct vio_device_id ibmvscsi_device_table[] __devinitdata = {
         {"vscsi", "IBM,v-scsi"},
-       {0,}
+       { "", "" }
  };
  
  MODULE_DEVICE_TABLE(vio, ibmvscsi_device_table);
diff --git a/drivers/scsi/ibmvscsi/rpa_vscsi.c b/drivers/scsi/ibmvscsi/rpa_vscsi.c

index 035f615817d74362bbbed9a767b9d92044ac4ebd..8bf5652f106090ab75ec922a747e11bf0d1b2ad0 100644 (file)
--- a/drivers/scsi/ibmvscsi/rpa_vscsi.c
+++ b/drivers/scsi/ibmvscsi/rpa_vscsi.c
@@ -28,6 +28,7 @@
   */
  
  #include <asm/vio.h>
+#include <asm/prom.h>
  #include <asm/iommu.h>
  #include <asm/hvcall.h>
  #include <linux/dma-mapping.h>
diff --git a/drivers/scsi/ibmvscsi/srp.h b/drivers/scsi/ibmvscsi/srp.h

index 2ae5154fd89cfffc0d94ec9932c539ca821f73a4..7d8e4c4accb9571260d1921fe05fa31cdc1afd87 100644 (file)
--- a/drivers/scsi/ibmvscsi/srp.h
+++ b/drivers/scsi/ibmvscsi/srp.h
@@ -35,7 +35,7 @@
  enum srp_types {
         SRP_LOGIN_REQ_TYPE = 0x00,
         SRP_LOGIN_RSP_TYPE = 0xC0,
-       SRP_LOGIN_REJ_TYPE = 0x80,
+       SRP_LOGIN_REJ_TYPE = 0xC2,
         SRP_I_LOGOUT_TYPE = 0x03,
         SRP_T_LOGOUT_TYPE = 0x80,
         SRP_TSK_MGMT_TYPE = 0x01,
diff --git a/drivers/scsi/ips.c b/drivers/scsi/ips.c

index 6dfcb4fbccdda1ed614b2833ea755ed81df7b9ea..4cdd891781b18f0b819fb758301a2625d76ea6ef 100644 (file)
--- a/drivers/scsi/ips.c
+++ b/drivers/scsi/ips.c
@@ -133,10 +133,12 @@
  /* 6.10.00  - Remove 1G Addressing Limitations                               */
  /* 6.11.xx  - Get VersionInfo buffer off the stack !              DDTS 60401 */
  /* 6.11.xx  - Make Logical Drive Info structure safe for DMA      DDTS 60639 */
-/* 7.10.xx  - Add highmem_io flag in SCSI Templete for 2.4 kernels           */
+/* 7.10.18  - Add highmem_io flag in SCSI Templete for 2.4 kernels           */
  /*          - Fix path/name for scsi_hosts.h include for 2.6 kernels         */
  /*          - Fix sort order of 7k                                           */
  /*          - Remove 3 unused "inline" functions                             */
+/* 7.12.xx  - Use STATIC functions whereever possible                        */
+/*          - Clean up deprecated MODULE_PARM calls                          */
  /*****************************************************************************/
  
  /*
@@ -207,8 +209,8 @@ module_param(ips, charp, 0);
  /*
   * DRIVER_VER
   */
-#define IPS_VERSION_HIGH        "7.10"
-#define IPS_VERSION_LOW         ".18 "
+#define IPS_VERSION_HIGH        "7.12"
+#define IPS_VERSION_LOW         ".02 "
  
  #if !defined(__i386__) && !defined(__ia64__) && !defined(__x86_64__)
  #warning "This driver has only been tested on the x86/ia64/x86_64 platforms"
diff --git a/drivers/scsi/ips.h b/drivers/scsi/ips.h

index 480e06f4d6ae2b833f8cf108e2159a11a58ecbfe..505e967013dee182d9ca7cdbfae3df9c56c81114 100644 (file)
--- a/drivers/scsi/ips.h
+++ b/drivers/scsi/ips.h
@@ -87,15 +87,14 @@
        #define scsi_set_pci_device(sh,dev) (0)
     #endif
  
-   #if LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0)
-   
-      #ifndef irqreturn_t
-         typedef void irqreturn_t;
-      #endif 
-      
+   #ifndef IRQ_NONE
+      typedef void irqreturn_t;
        #define IRQ_NONE
        #define IRQ_HANDLED
        #define IRQ_RETVAL(x)
+   #endif
+   
+   #if LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0)
        #define IPS_REGISTER_HOSTS(SHT)      scsi_register_module(MODULE_SCSI_HA,SHT)
        #define IPS_UNREGISTER_HOSTS(SHT)    scsi_unregister_module(MODULE_SCSI_HA,SHT)
        #define IPS_ADD_HOST(shost,device)
@@ -123,6 +122,10 @@
     #ifndef min
        #define min(x,y) ((x) < (y) ? x : y)
     #endif
+   
+   #ifndef __iomem       /* For clean compiles in earlier kernels without __iomem annotations */
+      #define __iomem
+   #endif
  
     #define pci_dma_hi32(a)         ((a >> 16) >> 16)
     #define pci_dma_lo32(a)         (a & 0xffffffff)
@@ -1206,13 +1209,13 @@ typedef struct {
  
  #define IPS_VER_MAJOR 7
  #define IPS_VER_MAJOR_STRING "7"
-#define IPS_VER_MINOR 10
-#define IPS_VER_MINOR_STRING "10"
-#define IPS_VER_BUILD 18
-#define IPS_VER_BUILD_STRING "18"
-#define IPS_VER_STRING "7.10.18"
+#define IPS_VER_MINOR 12
+#define IPS_VER_MINOR_STRING "12"
+#define IPS_VER_BUILD 02
+#define IPS_VER_BUILD_STRING "02"
+#define IPS_VER_STRING "7.12.02"
  #define IPS_RELEASE_ID 0x00020000
-#define IPS_BUILD_IDENT 731
+#define IPS_BUILD_IDENT 761
  #define IPS_LEGALCOPYRIGHT_STRING "(C) Copyright IBM Corp. 1994, 2002. All Rights Reserved."
  #define IPS_ADAPTECCOPYRIGHT_STRING "(c) Copyright Adaptec, Inc. 2002 to 2004. All Rights Reserved."
  #define IPS_DELLCOPYRIGHT_STRING "(c) Copyright Dell 2004. All Rights Reserved."
@@ -1223,12 +1226,12 @@ typedef struct {
  #define IPS_VER_SERVERAID2 "2.88.13"
  #define IPS_VER_NAVAJO "2.88.13"
  #define IPS_VER_SERVERAID3 "6.10.24"
-#define IPS_VER_SERVERAID4H "7.10.11"
-#define IPS_VER_SERVERAID4MLx "7.10.18"
-#define IPS_VER_SARASOTA "7.10.18"
-#define IPS_VER_MARCO "7.10.18"
-#define IPS_VER_SEBRING "7.10.18"
-#define IPS_VER_KEYWEST "7.10.18"
+#define IPS_VER_SERVERAID4H "7.12.02"
+#define IPS_VER_SERVERAID4MLx "7.12.02"
+#define IPS_VER_SARASOTA "7.12.02"
+#define IPS_VER_MARCO "7.12.02"
+#define IPS_VER_SEBRING "7.12.02"
+#define IPS_VER_KEYWEST "7.12.02"
  
  /* Compatability IDs for various adapters */
  #define IPS_COMPAT_UNKNOWN ""
diff --git a/drivers/scsi/libata-core.c b/drivers/scsi/libata-core.c

index 73b1f72b7e430989b261f1dd0c825d9ec02ad18e..dee4b12b034261f4bbdf28e9b956dcb8d0d23ead 100644 (file)
--- a/drivers/scsi/libata-core.c
+++ b/drivers/scsi/libata-core.c
@@ -1,25 +1,35 @@
  /*
-   libata-core.c - helper library for ATA
-
-   Copyright 2003-2004 Red Hat, Inc.  All rights reserved.
-   Copyright 2003-2004 Jeff Garzik
-
-   The contents of this file are subject to the Open
-   Software License version 1.1 that can be found at
-   http://www.opensource.org/licenses/osl-1.1.txt and is included herein
-   by reference.
-
-   Alternatively, the contents of this file may be used under the terms
-   of the GNU General Public License version 2 (the "GPL") as distributed
-   in the kernel source COPYING file, in which case the provisions of
-   the GPL are applicable instead of the above.  If you wish to allow
-   the use of your version of this file only under the terms of the
-   GPL and not to allow others to use your version of this file under
-   the OSL, indicate your decision by deleting the provisions above and
-   replace them with the notice and other provisions required by the GPL.
-   If you do not delete the provisions above, a recipient may use your
-   version of this file under either the OSL or the GPL.
-
+ *  libata-core.c - helper library for ATA
+ *
+ *  Maintained by:  Jeff Garzik <jgarzik@pobox.com>
+ *                 Please ALWAYS copy linux-ide@vger.kernel.org
+ *                 on emails.
+ *
+ *  Copyright 2003-2004 Red Hat, Inc.  All rights reserved.
+ *  Copyright 2003-2004 Jeff Garzik
+ *
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2, or (at your option)
+ *  any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; see the file COPYING.  If not, write to
+ *  the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
+ *
+ *
+ *  libata documentation is available via 'make {ps|pdf}docs',
+ *  as Documentation/DocBook/libata.*
+ *
+ *  Hardware documentation available from http://www.t13.org/ and
+ *  http://www.sata-io.org/
+ *
   */
  
  #include <linux/config.h>
@@ -1304,12 +1314,12 @@ static inline u8 ata_dev_knobble(struct ata_port *ap)
  /**
   *     ata_dev_config - Run device specific handlers and check for
   *                      SATA->PATA bridges
- *     @ap: Bus 
+ *     @ap: Bus
   *     @i:  Device
   *
   *     LOCKING:
   */
- 
+
  void ata_dev_config(struct ata_port *ap, unsigned int i)
  {
         /* limit bridge transfers to udma5, 200 sectors */
@@ -2268,19 +2278,6 @@ void ata_qc_prep(struct ata_queued_cmd *qc)
   *     spin_lock_irqsave(host_set lock)
   */
  
-
-
-/**
- *     ata_sg_init_one - Prepare a one-entry scatter-gather list.
- *     @qc:  Queued command
- *     @buf:  transfer buffer
- *     @buflen:  length of buf
- *
- *     Builds a single-entry scatter-gather list to initiate a
- *     transfer utilizing the specified buffer.
- *
- *     LOCKING:
- */
  void ata_sg_init_one(struct ata_queued_cmd *qc, void *buf, unsigned int buflen)
  {
         struct scatterlist *sg;
@@ -2312,18 +2309,6 @@ void ata_sg_init_one(struct ata_queued_cmd *qc, void *buf, unsigned int buflen)
   *     spin_lock_irqsave(host_set lock)
   */
  
-
-/**
- *     ata_sg_init - Assign a scatter gather list to a queued command
- *     @qc:  Queued command
- *     @sg:  Scatter-gather list
- *     @n_elem:  length of sg list
- *
- *     Attaches a scatter-gather list to a queued command.
- *
- *     LOCKING:
- */
-
  void ata_sg_init(struct ata_queued_cmd *qc, struct scatterlist *sg,
                  unsigned int n_elem)
  {
@@ -2401,6 +2386,27 @@ static int ata_sg_setup(struct ata_queued_cmd *qc)
         return 0;
  }
  
+/**
+ *     ata_poll_qc_complete - turn irq back on and finish qc
+ *     @qc: Command to complete
+ *     @drv_stat: ATA status register content
+ *
+ *     LOCKING:
+ *     None.  (grabs host lock)
+ */
+
+void ata_poll_qc_complete(struct ata_queued_cmd *qc, u8 drv_stat)
+{
+       struct ata_port *ap = qc->ap;
+       unsigned long flags;
+
+       spin_lock_irqsave(&ap->host_set->lock, flags);
+       ap->flags &= ~ATA_FLAG_NOINTR;
+       ata_irq_on(ap);
+       ata_qc_complete(qc, drv_stat);
+       spin_unlock_irqrestore(&ap->host_set->lock, flags);
+}
+
  /**
   *     ata_pio_poll -
   *     @ap:
@@ -2463,11 +2469,10 @@ static void ata_pio_complete (struct ata_port *ap)
         u8 drv_stat;
  
         /*
-        * This is purely hueristic.  This is a fast path.
-        * Sometimes when we enter, BSY will be cleared in
-        * a chk-status or two.  If not, the drive is probably seeking
-        * or something.  Snooze for a couple msecs, then
-        * chk-status again.  If still busy, fall back to
+        * This is purely heuristic.  This is a fast path.  Sometimes when
+        * we enter, BSY will be cleared in a chk-status or two.  If not,
+        * the drive is probably seeking or something.  Snooze for a couple
+        * msecs, then chk-status again.  If still busy, fall back to
          * PIO_ST_POLL state.
          */
         drv_stat = ata_busy_wait(ap, ATA_BUSY | ATA_DRQ, 10);
@@ -2492,9 +2497,7 @@ static void ata_pio_complete (struct ata_port *ap)
  
         ap->pio_task_state = PIO_ST_IDLE;
  
-       ata_irq_on(ap);
-
-       ata_qc_complete(qc, drv_stat);
+       ata_poll_qc_complete(qc, drv_stat);
  }
  
  
@@ -2519,6 +2522,20 @@ void swap_buf_le16(u16 *buf, unsigned int buf_words)
  #endif /* __BIG_ENDIAN */
  }
  
+/**
+ *     ata_mmio_data_xfer - Transfer data by MMIO
+ *     @ap: port to read/write
+ *     @buf: data buffer
+ *     @buflen: buffer length
+ *     @do_write: read/write
+ *
+ *     Transfer data from/to the device data register by MMIO.
+ *
+ *     LOCKING:
+ *     Inherited from caller.
+ *
+ */
+
  static void ata_mmio_data_xfer(struct ata_port *ap, unsigned char *buf,
                                unsigned int buflen, int write_data)
  {
@@ -2527,6 +2544,7 @@ static void ata_mmio_data_xfer(struct ata_port *ap, unsigned char *buf,
         u16 *buf16 = (u16 *) buf;
         void __iomem *mmio = (void __iomem *)ap->ioaddr.data_addr;
  
+       /* Transfer multiple of 2 bytes */
         if (write_data) {
                 for (i = 0; i < words; i++)
                         writew(le16_to_cpu(buf16[i]), mmio);
@@ -2534,19 +2552,76 @@ static void ata_mmio_data_xfer(struct ata_port *ap, unsigned char *buf,
                 for (i = 0; i < words; i++)
                         buf16[i] = cpu_to_le16(readw(mmio));
         }
+
+       /* Transfer trailing 1 byte, if any. */
+       if (unlikely(buflen & 0x01)) {
+               u16 align_buf[1] = { 0 };
+               unsigned char *trailing_buf = buf + buflen - 1;
+
+               if (write_data) {
+                       memcpy(align_buf, trailing_buf, 1);
+                       writew(le16_to_cpu(align_buf[0]), mmio);
+               } else {
+                       align_buf[0] = cpu_to_le16(readw(mmio));
+                       memcpy(trailing_buf, align_buf, 1);
+               }
+       }
  }
  
+/**
+ *     ata_pio_data_xfer - Transfer data by PIO
+ *     @ap: port to read/write
+ *     @buf: data buffer
+ *     @buflen: buffer length
+ *     @do_write: read/write
+ *
+ *     Transfer data from/to the device data register by PIO.
+ *
+ *     LOCKING:
+ *     Inherited from caller.
+ *
+ */
+
  static void ata_pio_data_xfer(struct ata_port *ap, unsigned char *buf,
                               unsigned int buflen, int write_data)
  {
-       unsigned int dwords = buflen >> 1;
+       unsigned int words = buflen >> 1;
  
+       /* Transfer multiple of 2 bytes */
         if (write_data)
-               outsw(ap->ioaddr.data_addr, buf, dwords);
+               outsw(ap->ioaddr.data_addr, buf, words);
         else
-               insw(ap->ioaddr.data_addr, buf, dwords);
+               insw(ap->ioaddr.data_addr, buf, words);
+
+       /* Transfer trailing 1 byte, if any. */
+       if (unlikely(buflen & 0x01)) {
+               u16 align_buf[1] = { 0 };
+               unsigned char *trailing_buf = buf + buflen - 1;
+
+               if (write_data) {
+                       memcpy(align_buf, trailing_buf, 1);
+                       outw(le16_to_cpu(align_buf[0]), ap->ioaddr.data_addr);
+               } else {
+                       align_buf[0] = cpu_to_le16(inw(ap->ioaddr.data_addr));
+                       memcpy(trailing_buf, align_buf, 1);
+               }
+       }
  }
  
+/**
+ *     ata_data_xfer - Transfer data from/to the data register.
+ *     @ap: port to read/write
+ *     @buf: data buffer
+ *     @buflen: buffer length
+ *     @do_write: read/write
+ *
+ *     Transfer data from/to the device data register.
+ *
+ *     LOCKING:
+ *     Inherited from caller.
+ *
+ */
+
  static void ata_data_xfer(struct ata_port *ap, unsigned char *buf,
                           unsigned int buflen, int do_write)
  {
@@ -2556,6 +2631,16 @@ static void ata_data_xfer(struct ata_port *ap, unsigned char *buf,
                 ata_pio_data_xfer(ap, buf, buflen, do_write);
  }
  
+/**
+ *     ata_pio_sector - Transfer ATA_SECT_SIZE (512 bytes) of data.
+ *     @qc: Command on going
+ *
+ *     Transfer ATA_SECT_SIZE of data from/to the ATA device.
+ *
+ *     LOCKING:
+ *     Inherited from caller.
+ */
+
  static void ata_pio_sector(struct ata_queued_cmd *qc)
  {
         int do_write = (qc->tf.flags & ATA_TFLAG_WRITE);
@@ -2594,6 +2679,18 @@ static void ata_pio_sector(struct ata_queued_cmd *qc)
         kunmap(page);
  }
  
+/**
+ *     __atapi_pio_bytes - Transfer data from/to the ATAPI device.
+ *     @qc: Command on going
+ *     @bytes: number of bytes
+ *
+ *     Transfer Transfer data from/to the ATAPI device.
+ *
+ *     LOCKING:
+ *     Inherited from caller.
+ *
+ */
+
  static void __atapi_pio_bytes(struct ata_queued_cmd *qc, unsigned int bytes)
  {
         int do_write = (qc->tf.flags & ATA_TFLAG_WRITE);
@@ -2603,10 +2700,33 @@ static void __atapi_pio_bytes(struct ata_queued_cmd *qc, unsigned int bytes)
         unsigned char *buf;
         unsigned int offset, count;
  
-       if (qc->curbytes == qc->nbytes - bytes)
+       if (qc->curbytes + bytes >= qc->nbytes)
                 ap->pio_task_state = PIO_ST_LAST;
  
  next_sg:
+       if (unlikely(qc->cursg >= qc->n_elem)) {
+               /* 
+                * The end of qc->sg is reached and the device expects
+                * more data to transfer. In order not to overrun qc->sg
+                * and fulfill length specified in the byte count register,
+                *    - for read case, discard trailing data from the device
+                *    - for write case, padding zero data to the device
+                */
+               u16 pad_buf[1] = { 0 };
+               unsigned int words = bytes >> 1;
+               unsigned int i;
+
+               if (words) /* warning if bytes > 1 */
+                       printk(KERN_WARNING "ata%u: %u bytes trailing data\n", 
+                              ap->id, bytes);
+
+               for (i = 0; i < words; i++)
+                       ata_data_xfer(ap, (unsigned char*)pad_buf, 2, do_write);
+
+               ap->pio_task_state = PIO_ST_LAST;
+               return;
+       }
+
         sg = &qc->sg[qc->cursg];
  
         page = sg->page;
@@ -2640,11 +2760,21 @@ next_sg:
  
         kunmap(page);
  
-       if (bytes) {
+       if (bytes)
                 goto next_sg;
-       }
  }
  
+/**
+ *     atapi_pio_bytes - Transfer data from/to the ATAPI device.
+ *     @qc: Command on going
+ *
+ *     Transfer Transfer data from/to the ATAPI device.
+ *
+ *     LOCKING:
+ *     Inherited from caller.
+ *
+ */
+
  static void atapi_pio_bytes(struct ata_queued_cmd *qc)
  {
         struct ata_port *ap = qc->ap;
@@ -2717,9 +2847,7 @@ static void ata_pio_block(struct ata_port *ap)
                 if ((status & ATA_DRQ) == 0) {
                         ap->pio_task_state = PIO_ST_IDLE;
  
-                       ata_irq_on(ap);
-
-                       ata_qc_complete(qc, status);
+                       ata_poll_qc_complete(qc, status);
                         return;
                 }
  
@@ -2749,9 +2877,7 @@ static void ata_pio_error(struct ata_port *ap)
  
         ap->pio_task_state = PIO_ST_IDLE;
  
-       ata_irq_on(ap);
-
-       ata_qc_complete(qc, drv_stat | ATA_ERR);
+       ata_poll_qc_complete(qc, drv_stat | ATA_ERR);
  }
  
  static void ata_pio_task(void *_data)
@@ -2857,8 +2983,10 @@ static void atapi_request_sense(struct ata_port *ap, struct ata_device *dev,
  static void ata_qc_timeout(struct ata_queued_cmd *qc)
  {
         struct ata_port *ap = qc->ap;
+       struct ata_host_set *host_set = ap->host_set;
         struct ata_device *dev = qc->dev;
         u8 host_stat = 0, drv_stat;
+       unsigned long flags;
  
         DPRINTK("ENTER\n");
  
@@ -2869,7 +2997,9 @@ static void ata_qc_timeout(struct ata_queued_cmd *qc)
                 if (!(cmd->eh_eflags & SCSI_EH_CANCEL_CMD)) {
  
                         /* finish completing original command */
+                       spin_lock_irqsave(&host_set->lock, flags);
                         __ata_qc_complete(qc);
+                       spin_unlock_irqrestore(&host_set->lock, flags);
  
                         atapi_request_sense(ap, dev, cmd);
  
@@ -2880,6 +3010,8 @@ static void ata_qc_timeout(struct ata_queued_cmd *qc)
                 }
         }
  
+       spin_lock_irqsave(&host_set->lock, flags);
+
         /* hack alert!  We cannot use the supplied completion
          * function from inside the ->eh_strategy_handler() thread.
          * libata is the only user of ->eh_strategy_handler() in
@@ -2895,7 +3027,7 @@ static void ata_qc_timeout(struct ata_queued_cmd *qc)
                 host_stat = ap->ops->bmdma_status(ap);
  
                 /* before we do anything else, clear DMA-Start bit */
-               ap->ops->bmdma_stop(ap);
+               ap->ops->bmdma_stop(qc);
  
                 /* fall through */
  
@@ -2913,6 +3045,9 @@ static void ata_qc_timeout(struct ata_queued_cmd *qc)
                 ata_qc_complete(qc, drv_stat);
                 break;
         }
+
+       spin_unlock_irqrestore(&host_set->lock, flags);
+
  out:
         DPRINTK("EXIT\n");
  }
@@ -3086,9 +3221,14 @@ void ata_qc_complete(struct ata_queued_cmd *qc, u8 drv_stat)
         if (likely(qc->flags & ATA_QCFLAG_DMAMAP))
                 ata_sg_clean(qc);
  
+       /* atapi: mark qc as inactive to prevent the interrupt handler
+        * from completing the command twice later, before the error handler
+        * is called. (when rc != 0 and atapi request sense is needed)
+        */
+       qc->flags &= ~ATA_QCFLAG_ACTIVE;
+
         /* call completion callback */
         rc = qc->complete_fn(qc, drv_stat);
-       qc->flags &= ~ATA_QCFLAG_ACTIVE;
  
         /* if callback indicates not to complete command (non-zero),
          * return immediately
@@ -3218,11 +3358,13 @@ int ata_qc_issue_prot(struct ata_queued_cmd *qc)
                 break;
  
         case ATA_PROT_ATAPI_NODATA:
+               ap->flags |= ATA_FLAG_NOINTR;
                 ata_tf_to_host_nolock(ap, &qc->tf);
                 queue_work(ata_wq, &ap->packet_task);
                 break;
  
         case ATA_PROT_ATAPI_DMA:
+               ap->flags |= ATA_FLAG_NOINTR;
                 ap->ops->tf_load(ap, &qc->tf);   /* load tf registers */
                 ap->ops->bmdma_setup(qc);           /* set up bmdma */
                 queue_work(ata_wq, &ap->packet_task);
@@ -3267,7 +3409,7 @@ static void ata_bmdma_setup_mmio (struct ata_queued_cmd *qc)
  }
  
  /**
- *     ata_bmdma_start - Start a PCI IDE BMDMA transaction
+ *     ata_bmdma_start_mmio - Start a PCI IDE BMDMA transaction
   *     @qc: Info associated with this ATA transaction.
   *
   *     LOCKING:
@@ -3438,7 +3580,7 @@ u8 ata_bmdma_status(struct ata_port *ap)
  
  /**
   *     ata_bmdma_stop - Stop PCI IDE BMDMA transfer
- *     @ap: Port associated with this ATA transaction.
+ *     @qc: Command we are ending DMA for
   *
   *     Clears the ATA_DMA_START flag in the dma control register
   *
@@ -3448,8 +3590,9 @@ u8 ata_bmdma_status(struct ata_port *ap)
   *     spin_lock_irqsave(host_set lock)
   */
  
-void ata_bmdma_stop(struct ata_port *ap)
+void ata_bmdma_stop(struct ata_queued_cmd *qc)
  {
+       struct ata_port *ap = qc->ap;
         if (ap->flags & ATA_FLAG_MMIO) {
                 void __iomem *mmio = (void __iomem *) ap->ioaddr.bmdma_addr;
  
@@ -3501,7 +3644,7 @@ inline unsigned int ata_host_intr (struct ata_port *ap,
                         goto idle_irq;
  
                 /* before we do anything else, clear DMA-Start bit */
-               ap->ops->bmdma_stop(ap);
+               ap->ops->bmdma_stop(qc);
  
                 /* fall through */
  
@@ -3576,7 +3719,8 @@ irqreturn_t ata_interrupt (int irq, void *dev_instance, struct pt_regs *regs)
                 struct ata_port *ap;
  
                 ap = host_set->ports[i];
-               if (ap && (!(ap->flags & ATA_FLAG_PORT_DISABLED))) {
+               if (ap &&
+                   !(ap->flags & (ATA_FLAG_PORT_DISABLED | ATA_FLAG_NOINTR))) {
                         struct ata_queued_cmd *qc;
  
                         qc = ata_qc_from_tag(ap, ap->active_tag);
@@ -3628,19 +3772,27 @@ static void atapi_packet_task(void *_data)
         /* send SCSI cdb */
         DPRINTK("send cdb\n");
         assert(ap->cdb_len >= 12);
-       ata_data_xfer(ap, qc->cdb, ap->cdb_len, 1);
  
-       /* if we are DMA'ing, irq handler takes over from here */
-       if (qc->tf.protocol == ATA_PROT_ATAPI_DMA)
-               ap->ops->bmdma_start(qc);           /* initiate bmdma */
+       if (qc->tf.protocol == ATA_PROT_ATAPI_DMA ||
+           qc->tf.protocol == ATA_PROT_ATAPI_NODATA) {
+               unsigned long flags;
  
-       /* non-data commands are also handled via irq */
-       else if (qc->tf.protocol == ATA_PROT_ATAPI_NODATA) {
-               /* do nothing */
-       }
+               /* Once we're done issuing command and kicking bmdma,
+                * irq handler takes over.  To not lose irq, we need
+                * to clear NOINTR flag before sending cdb, but
+                * interrupt handler shouldn't be invoked before we're
+                * finished.  Hence, the following locking.
+                */
+               spin_lock_irqsave(&ap->host_set->lock, flags);
+               ap->flags &= ~ATA_FLAG_NOINTR;
+               ata_data_xfer(ap, qc->cdb, ap->cdb_len, 1);
+               if (qc->tf.protocol == ATA_PROT_ATAPI_DMA)
+                       ap->ops->bmdma_start(qc);       /* initiate bmdma */
+               spin_unlock_irqrestore(&ap->host_set->lock, flags);
+       } else {
+               ata_data_xfer(ap, qc->cdb, ap->cdb_len, 1);
  
-       /* PIO commands are handled by polling */
-       else {
+               /* PIO commands are handled by polling */
                 ap->pio_task_state = PIO_ST;
                 queue_work(ata_wq, &ap->pio_task);
         }
@@ -3648,7 +3800,7 @@ static void atapi_packet_task(void *_data)
         return;
  
  err_out:
-       ata_qc_complete(qc, ATA_ERR);
+       ata_poll_qc_complete(qc, ATA_ERR);
  }
  
  
diff --git a/drivers/scsi/libata-scsi.c b/drivers/scsi/libata-scsi.c

index 794fb559efb080aa9986df125cd3730ff48ee5ae..346eb36b1e31e0c5665269077e65ef4f11149644 100644 (file)
--- a/drivers/scsi/libata-scsi.c
+++ b/drivers/scsi/libata-scsi.c
@@ -1,25 +1,36 @@
  /*
-   libata-scsi.c - helper library for ATA
-
-   Copyright 2003-2004 Red Hat, Inc.  All rights reserved.
-   Copyright 2003-2004 Jeff Garzik
-
-   The contents of this file are subject to the Open
-   Software License version 1.1 that can be found at
-   http://www.opensource.org/licenses/osl-1.1.txt and is included herein
-   by reference.
-
-   Alternatively, the contents of this file may be used under the terms
-   of the GNU General Public License version 2 (the "GPL") as distributed
-   in the kernel source COPYING file, in which case the provisions of
-   the GPL are applicable instead of the above.  If you wish to allow
-   the use of your version of this file only under the terms of the
-   GPL and not to allow others to use your version of this file under
-   the OSL, indicate your decision by deleting the provisions above and
-   replace them with the notice and other provisions required by the GPL.
-   If you do not delete the provisions above, a recipient may use your
-   version of this file under either the OSL or the GPL.
-
+ *  libata-scsi.c - helper library for ATA
+ *
+ *  Maintained by:  Jeff Garzik <jgarzik@pobox.com>
+ *                 Please ALWAYS copy linux-ide@vger.kernel.org
+ *                 on emails.
+ *
+ *  Copyright 2003-2004 Red Hat, Inc.  All rights reserved.
+ *  Copyright 2003-2004 Jeff Garzik
+ *
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2, or (at your option)
+ *  any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; see the file COPYING.  If not, write to
+ *  the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
+ *
+ *
+ *  libata documentation is available via 'make {ps|pdf}docs',
+ *  as Documentation/DocBook/libata.*
+ *
+ *  Hardware documentation available from
+ *  - http://www.t10.org/
+ *  - http://www.t13.org/
+ *
   */
  
  #include <linux/kernel.h>
@@ -385,11 +396,66 @@ int ata_scsi_error(struct Scsi_Host *host)
          * appropriate place
          */
         host->host_failed--;
+       INIT_LIST_HEAD(&host->eh_cmd_q);
  
         DPRINTK("EXIT\n");
         return 0;
  }
  
+/**
+ *     ata_scsi_start_stop_xlat - Translate SCSI START STOP UNIT command
+ *     @qc: Storage for translated ATA taskfile
+ *     @scsicmd: SCSI command to translate
+ *
+ *     Sets up an ATA taskfile to issue STANDBY (to stop) or READ VERIFY
+ *     (to start). Perhaps these commands should be preceded by
+ *     CHECK POWER MODE to see what power mode the device is already in.
+ *     [See SAT revision 5 at www.t10.org]
+ *
+ *     LOCKING:
+ *     spin_lock_irqsave(host_set lock)
+ *
+ *     RETURNS:
+ *     Zero on success, non-zero on error.
+ */
+
+static unsigned int ata_scsi_start_stop_xlat(struct ata_queued_cmd *qc,
+                                            u8 *scsicmd)
+{
+       struct ata_taskfile *tf = &qc->tf;
+
+       tf->flags |= ATA_TFLAG_DEVICE | ATA_TFLAG_ISADDR;
+       tf->protocol = ATA_PROT_NODATA;
+       if (scsicmd[1] & 0x1) {
+               ;       /* ignore IMMED bit, violates sat-r05 */
+       }
+       if (scsicmd[4] & 0x2)
+               return 1;       /* LOEJ bit set not supported */
+       if (((scsicmd[4] >> 4) & 0xf) != 0)
+               return 1;       /* power conditions not supported */
+       if (scsicmd[4] & 0x1) {
+               tf->nsect = 1;  /* 1 sector, lba=0 */
+               tf->lbah = 0x0;
+               tf->lbam = 0x0;
+               tf->lbal = 0x0;
+               tf->device |= ATA_LBA;
+               tf->command = ATA_CMD_VERIFY;   /* READ VERIFY */
+       } else {
+               tf->nsect = 0;  /* time period value (0 implies now) */
+               tf->command = ATA_CMD_STANDBY;
+               /* Consider: ATA STANDBY IMMEDIATE command */
+       }
+       /*
+        * Standby and Idle condition timers could be implemented but that
+        * would require libata to implement the Power condition mode page
+        * and allow the user to change it. Changing mode pages requires
+        * MODE SELECT to be implemented.
+        */
+
+       return 0;
+}
+
+
  /**
   *     ata_scsi_flush_xlat - Translate SCSI SYNCHRONIZE CACHE command
   *     @qc: Storage for translated ATA taskfile
@@ -575,11 +641,19 @@ static unsigned int ata_scsi_rw_xlat(struct ata_queued_cmd *qc, u8 *scsicmd)
                 tf->lbah = scsicmd[3];
  
                 VPRINTK("ten-byte command\n");
+               if (qc->nsect == 0) /* we don't support length==0 cmds */
+                       return 1;
                 return 0;
         }
  
         if (scsicmd[0] == READ_6 || scsicmd[0] == WRITE_6) {
                 qc->nsect = tf->nsect = scsicmd[4];
+               if (!qc->nsect) {
+                       qc->nsect = 256;
+                       if (lba48)
+                               tf->hob_nsect = 1;
+               }
+
                 tf->lbal = scsicmd[3];
                 tf->lbam = scsicmd[2];
                 tf->lbah = scsicmd[1] & 0x1f; /* mask out reserved bits */
@@ -619,6 +693,8 @@ static unsigned int ata_scsi_rw_xlat(struct ata_queued_cmd *qc, u8 *scsicmd)
                 tf->lbah = scsicmd[7];
  
                 VPRINTK("sixteen-byte command\n");
+               if (qc->nsect == 0) /* we don't support length==0 cmds */
+                       return 1;
                 return 0;
         }
  
@@ -1434,6 +1510,8 @@ static inline ata_xlat_func_t ata_get_xlat_func(struct ata_device *dev, u8 cmd)
         case VERIFY:
         case VERIFY_16:
                 return ata_scsi_verify_xlat;
+       case START_STOP:
+               return ata_scsi_start_stop_xlat;
         }
  
         return NULL;
diff --git a/drivers/scsi/libata.h b/drivers/scsi/libata.h

index d90430bbb0de1032bc40e6835550962ac4dbdd5f..809c634afbcda8792bd1ec9f07c0e4593251ad93 100644 (file)
--- a/drivers/scsi/libata.h
+++ b/drivers/scsi/libata.h
@@ -1,32 +1,35 @@
  /*
-   libata.h - helper library for ATA
-
-   Copyright 2003-2004 Red Hat, Inc.  All rights reserved.
-   Copyright 2003-2004 Jeff Garzik
-
-   The contents of this file are subject to the Open
-   Software License version 1.1 that can be found at
-   http://www.opensource.org/licenses/osl-1.1.txt and is included herein
-   by reference.
-
-   Alternatively, the contents of this file may be used under the terms
-   of the GNU General Public License version 2 (the "GPL") as distributed
-   in the kernel source COPYING file, in which case the provisions of
-   the GPL are applicable instead of the above.  If you wish to allow
-   the use of your version of this file only under the terms of the
-   GPL and not to allow others to use your version of this file under
-   the OSL, indicate your decision by deleting the provisions above and
-   replace them with the notice and other provisions required by the GPL.
-   If you do not delete the provisions above, a recipient may use your
-   version of this file under either the OSL or the GPL.
-
+ *  libata.h - helper library for ATA
+ *
+ *  Copyright 2003-2004 Red Hat, Inc.  All rights reserved.
+ *  Copyright 2003-2004 Jeff Garzik
+ *
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2, or (at your option)
+ *  any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; see the file COPYING.  If not, write to
+ *  the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
+ *
+ *
+ *  libata documentation is available via 'make {ps|pdf}docs',
+ *  as Documentation/DocBook/libata.*
+ *
   */
  
  #ifndef __LIBATA_H__
  #define __LIBATA_H__
  
  #define DRV_NAME       "libata"
-#define DRV_VERSION    "1.11"  /* must be exactly four chars */
+#define DRV_VERSION    "1.12"  /* must be exactly four chars */
  
  struct ata_scsi_args {
         u16                     *id;
@@ -72,7 +75,7 @@ extern unsigned int ata_scsiop_report_luns(struct ata_scsi_args *args, u8 *rbuf,
  extern void ata_scsi_badcmd(struct scsi_cmnd *cmd,
                             void (*done)(struct scsi_cmnd *),
                             u8 asc, u8 ascq);
-extern void ata_scsi_rbuf_fill(struct ata_scsi_args *args, 
+extern void ata_scsi_rbuf_fill(struct ata_scsi_args *args,
                          unsigned int (*actor) (struct ata_scsi_args *args,
                                             u8 *rbuf, unsigned int buflen));
  
diff --git a/drivers/scsi/sata_nv.c b/drivers/scsi/sata_nv.c

index b0403ccd8a25d97aec9e220d175844dd45448076..03d9bc6e69dfafddda8dc2f39b10953ad00fc6af 100644 (file)
--- a/drivers/scsi/sata_nv.c
+++ b/drivers/scsi/sata_nv.c
@@ -4,21 +4,37 @@
   *  Copyright 2004 NVIDIA Corp.  All rights reserved.
   *  Copyright 2004 Andrew Chew
   *
- *  The contents of this file are subject to the Open
- *  Software License version 1.1 that can be found at
- *  http://www.opensource.org/licenses/osl-1.1.txt and is included herein
- *  by reference.
   *
- *  Alternatively, the contents of this file may be used under the terms
- *  of the GNU General Public License version 2 (the "GPL") as distributed
- *  in the kernel source COPYING file, in which case the provisions of
- *  the GPL are applicable instead of the above.  If you wish to allow
- *  the use of your version of this file only under the terms of the
- *  GPL and not to allow others to use your version of this file under
- *  the OSL, indicate your decision by deleting the provisions above and
- *  replace them with the notice and other provisions required by the GPL.
- *  If you do not delete the provisions above, a recipient may use your
- *  version of this file under either the OSL or the GPL.
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2, or (at your option)
+ *  any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; see the file COPYING.  If not, write to
+ *  the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
+ *
+ *
+ *  libata documentation is available via 'make {ps|pdf}docs',
+ *  as Documentation/DocBook/libata.*
+ *
+ *  No hardware documentation available outside of NVIDIA.
+ *  This driver programs the NVIDIA SATA controller in a similar
+ *  fashion as with other PCI IDE BMDMA controllers, with a few
+ *  NV-specific details such as register offsets, SATA phy location,
+ *  hotplug info, etc.
+ *
+ *
+ *  0.08
+ *     - Added support for MCP51 and MCP55.
+ *
+ *  0.07
+ *     - Added support for RAID class code.
   *
   *  0.06
   *     - Added generic SATA support by using a pci_device_id that filters on
@@ -48,7 +64,7 @@
  #include <linux/libata.h>
  
  #define DRV_NAME                       "sata_nv"
-#define DRV_VERSION                    "0.6"
+#define DRV_VERSION                    "0.8"
  
  #define NV_PORTS                       2
  #define NV_PIO_MASK                    0x1f
@@ -116,7 +132,9 @@ enum nv_host_type
         GENERIC,
         NFORCE2,
         NFORCE3,
-       CK804
+       CK804,
+       MCP51,
+       MCP55
  };
  
  static struct pci_device_id nv_pci_tbl[] = {
@@ -134,9 +152,18 @@ static struct pci_device_id nv_pci_tbl[] = {
                 PCI_ANY_ID, PCI_ANY_ID, 0, 0, CK804 },
         { PCI_VENDOR_ID_NVIDIA, PCI_DEVICE_ID_NVIDIA_NFORCE_MCP04_SATA2,
                 PCI_ANY_ID, PCI_ANY_ID, 0, 0, CK804 },
+       { PCI_VENDOR_ID_NVIDIA, PCI_DEVICE_ID_NVIDIA_NFORCE_MCP51_SATA,
+               PCI_ANY_ID, PCI_ANY_ID, 0, 0, MCP51 },
+       { PCI_VENDOR_ID_NVIDIA, PCI_DEVICE_ID_NVIDIA_NFORCE_MCP51_SATA2,
+               PCI_ANY_ID, PCI_ANY_ID, 0, 0, MCP51 },
+       { PCI_VENDOR_ID_NVIDIA, PCI_DEVICE_ID_NVIDIA_NFORCE_MCP55_SATA,
+               PCI_ANY_ID, PCI_ANY_ID, 0, 0, MCP55 },
         { PCI_VENDOR_ID_NVIDIA, PCI_ANY_ID,
                 PCI_ANY_ID, PCI_ANY_ID,
                 PCI_CLASS_STORAGE_IDE<<8, 0xffff00, GENERIC },
+       { PCI_VENDOR_ID_NVIDIA, PCI_ANY_ID,
+               PCI_ANY_ID, PCI_ANY_ID,
+               PCI_CLASS_STORAGE_RAID<<8, 0xffff00, GENERIC },
         { 0, } /* terminate list */
  };
  
@@ -274,7 +301,8 @@ static irqreturn_t nv_interrupt (int irq, void *dev_instance,
                 struct ata_port *ap;
  
                 ap = host_set->ports[i];
-               if (ap && (!(ap->flags & ATA_FLAG_PORT_DISABLED))) {
+               if (ap &&
+                   !(ap->flags & (ATA_FLAG_PORT_DISABLED | ATA_FLAG_NOINTR))) {
                         struct ata_queued_cmd *qc;
  
                         qc = ata_qc_from_tag(ap, ap->active_tag);
diff --git a/drivers/scsi/sata_promise.c b/drivers/scsi/sata_promise.c

index 5c1d4411457a4a09ab2b6ad814b9d53fb9ea0b07..7c4f6ecc1cc9bb218f4e91a50a414f7bc36d2690 100644 (file)
--- a/drivers/scsi/sata_promise.c
+++ b/drivers/scsi/sata_promise.c
@@ -7,21 +7,26 @@
   *
   *  Copyright 2003-2004 Red Hat, Inc.
   *
- *  The contents of this file are subject to the Open
- *  Software License version 1.1 that can be found at
- *  http://www.opensource.org/licenses/osl-1.1.txt and is included herein
- *  by reference.
   *
- *  Alternatively, the contents of this file may be used under the terms
- *  of the GNU General Public License version 2 (the "GPL") as distributed
- *  in the kernel source COPYING file, in which case the provisions of
- *  the GPL are applicable instead of the above.  If you wish to allow
- *  the use of your version of this file only under the terms of the
- *  GPL and not to allow others to use your version of this file under
- *  the OSL, indicate your decision by deleting the provisions above and
- *  replace them with the notice and other provisions required by the GPL.
- *  If you do not delete the provisions above, a recipient may use your
- *  version of this file under either the OSL or the GPL.
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2, or (at your option)
+ *  any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; see the file COPYING.  If not, write to
+ *  the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
+ *
+ *
+ *  libata documentation is available via 'make {ps|pdf}docs',
+ *  as Documentation/DocBook/libata.*
+ *
+ *  Hardware information only available under NDA.
   *
   */
  
@@ -40,7 +45,7 @@
  #include "sata_promise.h"
  
  #define DRV_NAME       "sata_promise"
-#define DRV_VERSION    "1.01"
+#define DRV_VERSION    "1.02"
  
  
  enum {
@@ -79,7 +84,8 @@ static irqreturn_t pdc_interrupt (int irq, void *dev_instance, struct pt_regs *r
  static void pdc_eng_timeout(struct ata_port *ap);
  static int pdc_port_start(struct ata_port *ap);
  static void pdc_port_stop(struct ata_port *ap);
-static void pdc_phy_reset(struct ata_port *ap);
+static void pdc_pata_phy_reset(struct ata_port *ap);
+static void pdc_sata_phy_reset(struct ata_port *ap);
  static void pdc_qc_prep(struct ata_queued_cmd *qc);
  static void pdc_tf_load_mmio(struct ata_port *ap, struct ata_taskfile *tf);
  static void pdc_exec_command_mmio(struct ata_port *ap, struct ata_taskfile *tf);
@@ -106,19 +112,22 @@ static Scsi_Host_Template pdc_ata_sht = {
         .ordered_flush          = 1,
  };
  
-static struct ata_port_operations pdc_ata_ops = {
+static struct ata_port_operations pdc_sata_ops = {
         .port_disable           = ata_port_disable,
         .tf_load                = pdc_tf_load_mmio,
         .tf_read                = ata_tf_read,
         .check_status           = ata_check_status,
         .exec_command           = pdc_exec_command_mmio,
         .dev_select             = ata_std_dev_select,
-       .phy_reset              = pdc_phy_reset,
+
+       .phy_reset              = pdc_sata_phy_reset,
+
         .qc_prep                = pdc_qc_prep,
         .qc_issue               = pdc_qc_issue_prot,
         .eng_timeout            = pdc_eng_timeout,
         .irq_handler            = pdc_interrupt,
         .irq_clear              = pdc_irq_clear,
+
         .scr_read               = pdc_sata_scr_read,
         .scr_write              = pdc_sata_scr_write,
         .port_start             = pdc_port_start,
@@ -126,6 +135,27 @@ static struct ata_port_operations pdc_ata_ops = {
         .host_stop              = ata_host_stop,
  };
  
+static struct ata_port_operations pdc_pata_ops = {
+       .port_disable           = ata_port_disable,
+       .tf_load                = pdc_tf_load_mmio,
+       .tf_read                = ata_tf_read,
+       .check_status           = ata_check_status,
+       .exec_command           = pdc_exec_command_mmio,
+       .dev_select             = ata_std_dev_select,
+
+       .phy_reset              = pdc_pata_phy_reset,
+
+       .qc_prep                = pdc_qc_prep,
+       .qc_issue               = pdc_qc_issue_prot,
+       .eng_timeout            = pdc_eng_timeout,
+       .irq_handler            = pdc_interrupt,
+       .irq_clear              = pdc_irq_clear,
+
+       .port_start             = pdc_port_start,
+       .port_stop              = pdc_port_stop,
+       .host_stop              = ata_host_stop,
+};
+
  static struct ata_port_info pdc_port_info[] = {
         /* board_2037x */
         {
@@ -135,7 +165,7 @@ static struct ata_port_info pdc_port_info[] = {
                 .pio_mask       = 0x1f, /* pio0-4 */
                 .mwdma_mask     = 0x07, /* mwdma0-2 */
                 .udma_mask      = 0x7f, /* udma0-6 ; FIXME */
-               .port_ops       = &pdc_ata_ops,
+               .port_ops       = &pdc_sata_ops,
         },
  
         /* board_20319 */
@@ -146,7 +176,7 @@ static struct ata_port_info pdc_port_info[] = {
                 .pio_mask       = 0x1f, /* pio0-4 */
                 .mwdma_mask     = 0x07, /* mwdma0-2 */
                 .udma_mask      = 0x7f, /* udma0-6 ; FIXME */
-               .port_ops       = &pdc_ata_ops,
+               .port_ops       = &pdc_sata_ops,
         },
  
         /* board_20619 */
@@ -157,7 +187,7 @@ static struct ata_port_info pdc_port_info[] = {
                 .pio_mask       = 0x1f, /* pio0-4 */
                 .mwdma_mask     = 0x07, /* mwdma0-2 */
                 .udma_mask      = 0x7f, /* udma0-6 ; FIXME */
-               .port_ops       = &pdc_ata_ops,
+               .port_ops       = &pdc_pata_ops,
         },
  };
  
@@ -181,6 +211,10 @@ static struct pci_device_id pdc_ata_pci_tbl[] = {
           board_20319 },
         { PCI_VENDOR_ID_PROMISE, 0x3319, PCI_ANY_ID, PCI_ANY_ID, 0, 0,
           board_20319 },
+       { PCI_VENDOR_ID_PROMISE, 0x3519, PCI_ANY_ID, PCI_ANY_ID, 0, 0,
+         board_20319 },
+       { PCI_VENDOR_ID_PROMISE, 0x3d17, PCI_ANY_ID, PCI_ANY_ID, 0, 0,
+         board_20319 },
         { PCI_VENDOR_ID_PROMISE, 0x3d18, PCI_ANY_ID, PCI_ANY_ID, 0, 0,
           board_20319 },
  
@@ -268,12 +302,23 @@ static void pdc_reset_port(struct ata_port *ap)
         readl(mmio);    /* flush */
  }
  
-static void pdc_phy_reset(struct ata_port *ap)
+static void pdc_sata_phy_reset(struct ata_port *ap)
  {
         pdc_reset_port(ap);
         sata_phy_reset(ap);
  }
  
+static void pdc_pata_phy_reset(struct ata_port *ap)
+{
+       /* FIXME: add cable detect.  Don't assume 40-pin cable */
+       ap->cbl = ATA_CBL_PATA40;
+       ap->udma_mask &= ATA_UDMA_MASK_40C;
+
+       pdc_reset_port(ap);
+       ata_port_probe(ap);
+       ata_bus_reset(ap);
+}
+
  static u32 pdc_sata_scr_read (struct ata_port *ap, unsigned int sc_reg)
  {
         if (sc_reg > SCR_CONTROL)
@@ -321,11 +366,15 @@ static void pdc_qc_prep(struct ata_queued_cmd *qc)
  
  static void pdc_eng_timeout(struct ata_port *ap)
  {
+       struct ata_host_set *host_set = ap->host_set;
         u8 drv_stat;
         struct ata_queued_cmd *qc;
+       unsigned long flags;
  
         DPRINTK("ENTER\n");
  
+       spin_lock_irqsave(&host_set->lock, flags);
+
         qc = ata_qc_from_tag(ap, ap->active_tag);
         if (!qc) {
                 printk(KERN_ERR "ata%u: BUG: timeout without command\n",
@@ -359,6 +408,7 @@ static void pdc_eng_timeout(struct ata_port *ap)
         }
  
  out:
+       spin_unlock_irqrestore(&host_set->lock, flags);
         DPRINTK("EXIT\n");
  }
  
@@ -441,7 +491,8 @@ static irqreturn_t pdc_interrupt (int irq, void *dev_instance, struct pt_regs *r
                 VPRINTK("port %u\n", i);
                 ap = host_set->ports[i];
                 tmp = mask & (1 << (i + 1));
-               if (tmp && ap && (!(ap->flags & ATA_FLAG_PORT_DISABLED))) {
+               if (tmp && ap &&
+                   !(ap->flags & (ATA_FLAG_PORT_DISABLED | ATA_FLAG_NOINTR))) {
                         struct ata_queued_cmd *qc;
  
                         qc = ata_qc_from_tag(ap, ap->active_tag);
diff --git a/drivers/scsi/sata_promise.h b/drivers/scsi/sata_promise.h

index 6e7e96b9ee137d621370c40d9b139eab84f4b355..6ee5e190262de8d55f9689450e8fb08aea67eb77 100644 (file)
--- a/drivers/scsi/sata_promise.h
+++ b/drivers/scsi/sata_promise.h
@@ -3,21 +3,24 @@
   *
   *  Copyright 2003-2004 Red Hat, Inc.
   *
- *  The contents of this file are subject to the Open
- *  Software License version 1.1 that can be found at
- *  http://www.opensource.org/licenses/osl-1.1.txt and is included herein
- *  by reference.
   *
- *  Alternatively, the contents of this file may be used under the terms
- *  of the GNU General Public License version 2 (the "GPL") as distributed
- *  in the kernel source COPYING file, in which case the provisions of
- *  the GPL are applicable instead of the above.  If you wish to allow
- *  the use of your version of this file only under the terms of the
- *  GPL and not to allow others to use your version of this file under
- *  the OSL, indicate your decision by deleting the provisions above and
- *  replace them with the notice and other provisions required by the GPL.
- *  If you do not delete the provisions above, a recipient may use your
- *  version of this file under either the OSL or the GPL.
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2, or (at your option)
+ *  any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; see the file COPYING.  If not, write to
+ *  the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
+ *
+ *
+ *  libata documentation is available via 'make {ps|pdf}docs',
+ *  as Documentation/DocBook/libata.*
   *
   */
  
diff --git a/drivers/scsi/sata_qstor.c b/drivers/scsi/sata_qstor.c

index 1383e8a28d728b34e936c0b9c7e04ffbf9a5818c..9c99ab433bd3650750bc4df43996bd3b4384d81a 100644 (file)
--- a/drivers/scsi/sata_qstor.c
+++ b/drivers/scsi/sata_qstor.c
@@ -6,21 +6,24 @@
   *  Copyright 2005 Pacific Digital Corporation.
   *  (OSL/GPL code release authorized by Jalil Fadavi).
   *
- *  The contents of this file are subject to the Open
- *  Software License version 1.1 that can be found at
- *  http://www.opensource.org/licenses/osl-1.1.txt and is included herein
- *  by reference.
   *
- *  Alternatively, the contents of this file may be used under the terms
- *  of the GNU General Public License version 2 (the "GPL") as distributed
- *  in the kernel source COPYING file, in which case the provisions of
- *  the GPL are applicable instead of the above.  If you wish to allow
- *  the use of your version of this file only under the terms of the
- *  GPL and not to allow others to use your version of this file under
- *  the OSL, indicate your decision by deleting the provisions above and
- *  replace them with the notice and other provisions required by the GPL.
- *  If you do not delete the provisions above, a recipient may use your
- *  version of this file under either the OSL or the GPL.
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2, or (at your option)
+ *  any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; see the file COPYING.  If not, write to
+ *  the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
+ *
+ *
+ *  libata documentation is available via 'make {ps|pdf}docs',
+ *  as Documentation/DocBook/libata.*
   *
   */
  
@@ -117,7 +120,7 @@ static void qs_phy_reset(struct ata_port *ap);
  static void qs_qc_prep(struct ata_queued_cmd *qc);
  static int qs_qc_issue(struct ata_queued_cmd *qc);
  static int qs_check_atapi_dma(struct ata_queued_cmd *qc);
-static void qs_bmdma_stop(struct ata_port *ap);
+static void qs_bmdma_stop(struct ata_queued_cmd *qc);
  static u8 qs_bmdma_status(struct ata_port *ap);
  static void qs_irq_clear(struct ata_port *ap);
  static void qs_eng_timeout(struct ata_port *ap);
@@ -198,7 +201,7 @@ static int qs_check_atapi_dma(struct ata_queued_cmd *qc)
         return 1;       /* ATAPI DMA not supported */
  }
  
-static void qs_bmdma_stop(struct ata_port *ap)
+static void qs_bmdma_stop(struct ata_queued_cmd *qc)
  {
         /* nothing */
  }
@@ -386,7 +389,8 @@ static inline unsigned int qs_intr_pkt(struct ata_host_set *host_set)
                         DPRINTK("SFF=%08x%08x: sCHAN=%u sHST=%d sDST=%02x\n",
                                         sff1, sff0, port_no, sHST, sDST);
                         handled = 1;
-                       if (ap && (!(ap->flags & ATA_FLAG_PORT_DISABLED))) {
+                       if (ap && !(ap->flags &
+                                   (ATA_FLAG_PORT_DISABLED|ATA_FLAG_NOINTR))) {
                                 struct ata_queued_cmd *qc;
                                 struct qs_port_priv *pp = ap->private_data;
                                 if (!pp || pp->state != qs_state_pkt)
@@ -417,7 +421,8 @@ static inline unsigned int qs_intr_mmio(struct ata_host_set *host_set)
         for (port_no = 0; port_no < host_set->n_ports; ++port_no) {
                 struct ata_port *ap;
                 ap = host_set->ports[port_no];
-               if (ap && (!(ap->flags & ATA_FLAG_PORT_DISABLED))) {
+               if (ap &&
+                   !(ap->flags & (ATA_FLAG_PORT_DISABLED | ATA_FLAG_NOINTR))) {
                         struct ata_queued_cmd *qc;
                         struct qs_port_priv *pp = ap->private_data;
                         if (!pp || pp->state != qs_state_mmio)
@@ -431,7 +436,7 @@ static inline unsigned int qs_intr_mmio(struct ata_host_set *host_set)
                                         continue;
                                 DPRINTK("ata%u: protocol %d (dev_stat 0x%X)\n",
                                         ap->id, qc->tf.protocol, status);
-               
+
                                 /* complete taskfile transaction */
                                 pp->state = qs_state_idle;
                                 ata_qc_complete(qc, status);
diff --git a/drivers/scsi/sata_sil.c b/drivers/scsi/sata_sil.c

index 49ed557a4b661e4cfa9569b0d2af72f4375573d8..71d49548f0a36223cfb1aaa060cd77be52e9a9f5 100644 (file)
--- a/drivers/scsi/sata_sil.c
+++ b/drivers/scsi/sata_sil.c
@@ -5,24 +5,32 @@
   *                 Please ALWAYS copy linux-ide@vger.kernel.org
   *                 on emails.
   *
- *  Copyright 2003 Red Hat, Inc.
+ *  Copyright 2003-2005 Red Hat, Inc.
   *  Copyright 2003 Benjamin Herrenschmidt
   *
- *  The contents of this file are subject to the Open
- *  Software License version 1.1 that can be found at
- *  http://www.opensource.org/licenses/osl-1.1.txt and is included herein
- *  by reference.
   *
- *  Alternatively, the contents of this file may be used under the terms
- *  of the GNU General Public License version 2 (the "GPL") as distributed
- *  in the kernel source COPYING file, in which case the provisions of
- *  the GPL are applicable instead of the above.  If you wish to allow
- *  the use of your version of this file only under the terms of the
- *  GPL and not to allow others to use your version of this file under
- *  the OSL, indicate your decision by deleting the provisions above and
- *  replace them with the notice and other provisions required by the GPL.
- *  If you do not delete the provisions above, a recipient may use your
- *  version of this file under either the OSL or the GPL.
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2, or (at your option)
+ *  any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; see the file COPYING.  If not, write to
+ *  the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
+ *
+ *
+ *  libata documentation is available via 'make {ps|pdf}docs',
+ *  as Documentation/DocBook/libata.*
+ *
+ *  Documentation for SiI 3112:
+ *  http://gkernel.sourceforge.net/specs/sii/3112A_SiI-DS-0095-B2.pdf.bz2
+ *
+ *  Other errata and documentation available under NDA.
   *
   */
  
@@ -41,8 +49,11 @@
  #define DRV_VERSION    "0.9"
  
  enum {
+       SIL_FLAG_MOD15WRITE     = (1 << 30),
+
         sil_3112                = 0,
-       sil_3114                = 1,
+       sil_3112_m15w           = 1,
+       sil_3114                = 2,
  
         SIL_FIFO_R0             = 0x40,
         SIL_FIFO_W0             = 0x41,
@@ -76,13 +87,13 @@ static void sil_scr_write (struct ata_port *ap, unsigned int sc_reg, u32 val);
  static void sil_post_set_mode (struct ata_port *ap);
  
  static struct pci_device_id sil_pci_tbl[] = {
-       { 0x1095, 0x3112, PCI_ANY_ID, PCI_ANY_ID, 0, 0, sil_3112 },
-       { 0x1095, 0x0240, PCI_ANY_ID, PCI_ANY_ID, 0, 0, sil_3112 },
+       { 0x1095, 0x3112, PCI_ANY_ID, PCI_ANY_ID, 0, 0, sil_3112_m15w },
+       { 0x1095, 0x0240, PCI_ANY_ID, PCI_ANY_ID, 0, 0, sil_3112_m15w },
         { 0x1095, 0x3512, PCI_ANY_ID, PCI_ANY_ID, 0, 0, sil_3112 },
         { 0x1095, 0x3114, PCI_ANY_ID, PCI_ANY_ID, 0, 0, sil_3114 },
-       { 0x1002, 0x436e, PCI_ANY_ID, PCI_ANY_ID, 0, 0, sil_3112 },
-       { 0x1002, 0x4379, PCI_ANY_ID, PCI_ANY_ID, 0, 0, sil_3112 },
-       { 0x1002, 0x437a, PCI_ANY_ID, PCI_ANY_ID, 0, 0, sil_3112 },
+       { 0x1002, 0x436e, PCI_ANY_ID, PCI_ANY_ID, 0, 0, sil_3112_m15w },
+       { 0x1002, 0x4379, PCI_ANY_ID, PCI_ANY_ID, 0, 0, sil_3112_m15w },
+       { 0x1002, 0x437a, PCI_ANY_ID, PCI_ANY_ID, 0, 0, sil_3112_m15w },
         { }     /* terminate list */
  };
  
@@ -174,6 +185,16 @@ static struct ata_port_info sil_port_info[] = {
                 .mwdma_mask     = 0x07,                 /* mwdma0-2 */
                 .udma_mask      = 0x3f,                 /* udma0-5 */
                 .port_ops       = &sil_ops,
+       }, /* sil_3112_15w - keep it sync'd w/ sil_3112 */
+       {
+               .sht            = &sil_sht,
+               .host_flags     = ATA_FLAG_SATA | ATA_FLAG_NO_LEGACY |
+                                 ATA_FLAG_SRST | ATA_FLAG_MMIO |
+                                 SIL_FLAG_MOD15WRITE,
+               .pio_mask       = 0x1f,                 /* pio0-4 */
+               .mwdma_mask     = 0x07,                 /* mwdma0-2 */
+               .udma_mask      = 0x3f,                 /* udma0-5 */
+               .port_ops       = &sil_ops,
         }, /* sil_3114 */
         {
                 .sht            = &sil_sht,
@@ -323,15 +344,15 @@ static void sil_dev_config(struct ata_port *ap, struct ata_device *dev)
         while ((len > 0) && (s[len - 1] == ' '))
                 len--;
  
-       for (n = 0; sil_blacklist[n].product; n++) 
+       for (n = 0; sil_blacklist[n].product; n++)
                 if (!memcmp(sil_blacklist[n].product, s,
                             strlen(sil_blacklist[n].product))) {
                         quirks = sil_blacklist[n].quirk;
                         break;
                 }
-       
+
         /* limit requests to 15 sectors */
-       if (quirks & SIL_QUIRK_MOD15WRITE) {
+       if ((ap->flags & SIL_FLAG_MOD15WRITE) && (quirks & SIL_QUIRK_MOD15WRITE)) {
                 printk(KERN_INFO "ata%u(%u): applying Seagate errata fix\n",
                        ap->id, dev->devno);
                 ap->host->max_sectors = 15;
diff --git a/drivers/scsi/sata_sis.c b/drivers/scsi/sata_sis.c

index e418b89c6b9d8cf67c57a1b03431b91a19b585f2..43af445b3ad2eeafe74f37dbad64f18300f8be12 100644 (file)
--- a/drivers/scsi/sata_sis.c
+++ b/drivers/scsi/sata_sis.c
@@ -7,21 +7,26 @@
   *
   *  Copyright 2004 Uwe Koziolek
   *
- *  The contents of this file are subject to the Open
- *  Software License version 1.1 that can be found at
- *  http://www.opensource.org/licenses/osl-1.1.txt and is included herein
- *  by reference.
   *
- *  Alternatively, the contents of this file may be used under the terms
- *  of the GNU General Public License version 2 (the "GPL") as distributed
- *  in the kernel source COPYING file, in which case the provisions of
- *  the GPL are applicable instead of the above.  If you wish to allow
- *  the use of your version of this file only under the terms of the
- *  GPL and not to allow others to use your version of this file under
- *  the OSL, indicate your decision by deleting the provisions above and
- *  replace them with the notice and other provisions required by the GPL.
- *  If you do not delete the provisions above, a recipient may use your
- *  version of this file under either the OSL or the GPL.
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2, or (at your option)
+ *  any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; see the file COPYING.  If not, write to
+ *  the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
+ *
+ *
+ *  libata documentation is available via 'make {ps|pdf}docs',
+ *  as Documentation/DocBook/libata.*
+ *
+ *  Hardware documentation available under NDA.
   *
   */
  
@@ -234,7 +239,7 @@ static int sis_init_one (struct pci_dev *pdev, const struct pci_device_id *ent)
         pci_read_config_dword(pdev, SIS_GENCTL, &genctl);
         if ((genctl & GENCTL_IOMAPPED_SCR) == 0)
                 probe_ent->host_flags |= SIS_FLAG_CFGSCR;
-       
+
         /* if hardware thinks SCRs are in IO space, but there are
          * no IO resources assigned, change to PCI cfg space.
          */
diff --git a/drivers/scsi/sata_svw.c b/drivers/scsi/sata_svw.c

index 858e07185dbdf2589042924708baa297a16a3b8d..19d3bb3b0fb659f27f484b75952be3adfa0fed1f 100644 (file)
--- a/drivers/scsi/sata_svw.c
+++ b/drivers/scsi/sata_svw.c
@@ -13,21 +13,26 @@
   *  This driver probably works with non-Apple versions of the
   *  Broadcom chipset...
   *
- *  The contents of this file are subject to the Open
- *  Software License version 1.1 that can be found at
- *  http://www.opensource.org/licenses/osl-1.1.txt and is included herein
- *  by reference.
   *
- *  Alternatively, the contents of this file may be used under the terms
- *  of the GNU General Public License version 2 (the "GPL") as distributed
- *  in the kernel source COPYING file, in which case the provisions of
- *  the GPL are applicable instead of the above.  If you wish to allow
- *  the use of your version of this file only under the terms of the
- *  GPL and not to allow others to use your version of this file under
- *  the OSL, indicate your decision by deleting the provisions above and
- *  replace them with the notice and other provisions required by the GPL.
- *  If you do not delete the provisions above, a recipient may use your
- *  version of this file under either the OSL or the GPL.
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2, or (at your option)
+ *  any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; see the file COPYING.  If not, write to
+ *  the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
+ *
+ *
+ *  libata documentation is available via 'make {ps|pdf}docs',
+ *  as Documentation/DocBook/libata.*
+ *
+ *  Hardware documentation available under NDA.
   *
   */
  
@@ -195,18 +200,18 @@ static void k2_bmdma_start_mmio (struct ata_queued_cmd *qc)
         /* start host DMA transaction */
         dmactl = readb(mmio + ATA_DMA_CMD);
         writeb(dmactl | ATA_DMA_START, mmio + ATA_DMA_CMD);
-       /* There is a race condition in certain SATA controllers that can 
-          be seen when the r/w command is given to the controller before the 
+       /* There is a race condition in certain SATA controllers that can
+          be seen when the r/w command is given to the controller before the
            host DMA is started. On a Read command, the controller would initiate
            the command to the drive even before it sees the DMA start. When there
-          are very fast drives connected to the controller, or when the data request 
+          are very fast drives connected to the controller, or when the data request
            hits in the drive cache, there is the possibility that the drive returns a part
            or all of the requested data to the controller before the DMA start is issued.
            In this case, the controller would become confused as to what to do with the data.
            In the worst case when all the data is returned back to the controller, the
            controller could hang. In other cases it could return partial data returning
            in data corruption. This problem has been seen in PPC systems and can also appear
-          on an system with very fast disks, where the SATA controller is sitting behind a 
+          on an system with very fast disks, where the SATA controller is sitting behind a
            number of bridges, and hence there is significant latency between the r/w command
            and the start command. */
         /* issue r/w command if the access is to ATA*/
@@ -214,7 +219,7 @@ static void k2_bmdma_start_mmio (struct ata_queued_cmd *qc)
                 ap->ops->exec_command(ap, &qc->tf);
  }
  
-                                                                             
+
  static u8 k2_stat_check_status(struct ata_port *ap)
  {
                 return readl((void *) ap->ioaddr.status_addr);
diff --git a/drivers/scsi/sata_sx4.c b/drivers/scsi/sata_sx4.c

index 140cea05de3f23cdfa8d204af0d49b9f52f32913..c72fcc46f0fa82bf6c3a4c5aaf45fb282d5db3b6 100644 (file)
--- a/drivers/scsi/sata_sx4.c
+++ b/drivers/scsi/sata_sx4.c
@@ -7,21 +7,26 @@
   *
   *  Copyright 2003-2004 Red Hat, Inc.
   *
- *  The contents of this file are subject to the Open
- *  Software License version 1.1 that can be found at
- *  http://www.opensource.org/licenses/osl-1.1.txt and is included herein
- *  by reference.
   *
- *  Alternatively, the contents of this file may be used under the terms
- *  of the GNU General Public License version 2 (the "GPL") as distributed
- *  in the kernel source COPYING file, in which case the provisions of
- *  the GPL are applicable instead of the above.  If you wish to allow
- *  the use of your version of this file only under the terms of the
- *  GPL and not to allow others to use your version of this file under
- *  the OSL, indicate your decision by deleting the provisions above and
- *  replace them with the notice and other provisions required by the GPL.
- *  If you do not delete the provisions above, a recipient may use your
- *  version of this file under either the OSL or the GPL.
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2, or (at your option)
+ *  any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; see the file COPYING.  If not, write to
+ *  the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
+ *
+ *
+ *  libata documentation is available via 'make {ps|pdf}docs',
+ *  as Documentation/DocBook/libata.*
+ *
+ *  Hardware documentation available under NDA.
   *
   */
  
@@ -94,7 +99,7 @@ enum {
         PDC_DIMM1_CONTROL_OFFSET      = 0x84,
         PDC_SDRAM_CONTROL_OFFSET      = 0x88,
         PDC_I2C_WRITE                 = 0x00000000,
-       PDC_I2C_READ                  = 0x00000040,     
+       PDC_I2C_READ                  = 0x00000040,
         PDC_I2C_START                 = 0x00000080,
         PDC_I2C_MASK_INT              = 0x00000020,
         PDC_I2C_COMPLETE              = 0x00010000,
@@ -105,16 +110,16 @@ enum {
         PDC_DIMM_SPD_COLUMN_NUM       = 4,
         PDC_DIMM_SPD_MODULE_ROW       = 5,
         PDC_DIMM_SPD_TYPE             = 11,
-       PDC_DIMM_SPD_FRESH_RATE       = 12,         
-       PDC_DIMM_SPD_BANK_NUM         = 17,     
+       PDC_DIMM_SPD_FRESH_RATE       = 12,
+       PDC_DIMM_SPD_BANK_NUM         = 17,
         PDC_DIMM_SPD_CAS_LATENCY      = 18,
-       PDC_DIMM_SPD_ATTRIBUTE        = 21,    
+       PDC_DIMM_SPD_ATTRIBUTE        = 21,
         PDC_DIMM_SPD_ROW_PRE_CHARGE   = 27,
-       PDC_DIMM_SPD_ROW_ACTIVE_DELAY = 28,      
+       PDC_DIMM_SPD_ROW_ACTIVE_DELAY = 28,
         PDC_DIMM_SPD_RAS_CAS_DELAY    = 29,
         PDC_DIMM_SPD_ACTIVE_PRECHARGE = 30,
         PDC_DIMM_SPD_SYSTEM_FREQ      = 126,
-       PDC_CTL_STATUS                = 0x08,   
+       PDC_CTL_STATUS                = 0x08,
         PDC_DIMM_WINDOW_CTLR          = 0x0C,
         PDC_TIME_CONTROL              = 0x3C,
         PDC_TIME_PERIOD               = 0x40,
@@ -157,15 +162,15 @@ static void pdc_exec_command_mmio(struct ata_port *ap, struct ata_taskfile *tf);
  static void pdc20621_host_stop(struct ata_host_set *host_set);
  static unsigned int pdc20621_dimm_init(struct ata_probe_ent *pe);
  static int pdc20621_detect_dimm(struct ata_probe_ent *pe);
-static unsigned int pdc20621_i2c_read(struct ata_probe_ent *pe, 
+static unsigned int pdc20621_i2c_read(struct ata_probe_ent *pe,
                                       u32 device, u32 subaddr, u32 *pdata);
  static int pdc20621_prog_dimm0(struct ata_probe_ent *pe);
  static unsigned int pdc20621_prog_dimm_global(struct ata_probe_ent *pe);
  #ifdef ATA_VERBOSE_DEBUG
-static void pdc20621_get_from_dimm(struct ata_probe_ent *pe, 
+static void pdc20621_get_from_dimm(struct ata_probe_ent *pe,
                                    void *psource, u32 offset, u32 size);
  #endif
-static void pdc20621_put_to_dimm(struct ata_probe_ent *pe, 
+static void pdc20621_put_to_dimm(struct ata_probe_ent *pe,
                                  void *psource, u32 offset, u32 size);
  static void pdc20621_irq_clear(struct ata_port *ap);
  static int pdc20621_qc_issue_prot(struct ata_queued_cmd *qc);
@@ -468,7 +473,7 @@ static void pdc20621_dma_prep(struct ata_queued_cmd *qc)
         for (i = 0; i < last; i++) {
                 buf[idx++] = cpu_to_le32(sg_dma_address(&sg[i]));
                 buf[idx++] = cpu_to_le32(sg_dma_len(&sg[i]));
-               total_len += sg[i].length;
+               total_len += sg_dma_len(&sg[i]);
         }
         buf[idx - 1] |= cpu_to_le32(ATA_PRD_EOT);
         sgt_len = idx * 4;
@@ -825,7 +830,8 @@ static irqreturn_t pdc20621_interrupt (int irq, void *dev_instance, struct pt_re
                         ap = host_set->ports[port_no];
                 tmp = mask & (1 << i);
                 VPRINTK("seq %u, port_no %u, ap %p, tmp %x\n", i, port_no, ap, tmp);
-               if (tmp && ap && (!(ap->flags & ATA_FLAG_PORT_DISABLED))) {
+               if (tmp && ap &&
+                   !(ap->flags & (ATA_FLAG_PORT_DISABLED | ATA_FLAG_NOINTR))) {
                         struct ata_queued_cmd *qc;
  
                         qc = ata_qc_from_tag(ap, ap->active_tag);
@@ -847,10 +853,14 @@ static irqreturn_t pdc20621_interrupt (int irq, void *dev_instance, struct pt_re
  static void pdc_eng_timeout(struct ata_port *ap)
  {
         u8 drv_stat;
+       struct ata_host_set *host_set = ap->host_set;
         struct ata_queued_cmd *qc;
+       unsigned long flags;
  
         DPRINTK("ENTER\n");
  
+       spin_lock_irqsave(&host_set->lock, flags);
+
         qc = ata_qc_from_tag(ap, ap->active_tag);
         if (!qc) {
                 printk(KERN_ERR "ata%u: BUG: timeout without command\n",
@@ -884,6 +894,7 @@ static void pdc_eng_timeout(struct ata_port *ap)
         }
  
  out:
+       spin_unlock_irqrestore(&host_set->lock, flags);
         DPRINTK("EXIT\n");
  }
  
@@ -922,7 +933,7 @@ static void pdc_sata_setup_port(struct ata_ioports *port, unsigned long base)
  
  
  #ifdef ATA_VERBOSE_DEBUG
-static void pdc20621_get_from_dimm(struct ata_probe_ent *pe, void *psource, 
+static void pdc20621_get_from_dimm(struct ata_probe_ent *pe, void *psource,
                                    u32 offset, u32 size)
  {
         u32 window_size;
@@ -936,9 +947,9 @@ static void pdc20621_get_from_dimm(struct ata_probe_ent *pe, void *psource,
         /* hard-code chip #0 */
         mmio += PDC_CHIP0_OFS;
  
-       page_mask = 0x00;       
-       window_size = 0x2000 * 4; /* 32K byte uchar size */  
-       idx = (u16) (offset / window_size); 
+       page_mask = 0x00;
+       window_size = 0x2000 * 4; /* 32K byte uchar size */
+       idx = (u16) (offset / window_size);
  
         writel(0x01, mmio + PDC_GENERAL_CTLR);
         readl(mmio + PDC_GENERAL_CTLR);
@@ -947,19 +958,19 @@ static void pdc20621_get_from_dimm(struct ata_probe_ent *pe, void *psource,
  
         offset -= (idx * window_size);
         idx++;
-       dist = ((long) (window_size - (offset + size))) >= 0 ? size : 
+       dist = ((long) (window_size - (offset + size))) >= 0 ? size :
                 (long) (window_size - offset);
-       memcpy_fromio((char *) psource, (char *) (dimm_mmio + offset / 4), 
+       memcpy_fromio((char *) psource, (char *) (dimm_mmio + offset / 4),
                       dist);
  
-       psource += dist;    
+       psource += dist;
         size -= dist;
         for (; (long) size >= (long) window_size ;) {
                 writel(0x01, mmio + PDC_GENERAL_CTLR);
                 readl(mmio + PDC_GENERAL_CTLR);
                 writel(((idx) << page_mask), mmio + PDC_DIMM_WINDOW_CTLR);
                 readl(mmio + PDC_DIMM_WINDOW_CTLR);
-               memcpy_fromio((char *) psource, (char *) (dimm_mmio), 
+               memcpy_fromio((char *) psource, (char *) (dimm_mmio),
                               window_size / 4);
                 psource += window_size;
                 size -= window_size;
@@ -971,14 +982,14 @@ static void pdc20621_get_from_dimm(struct ata_probe_ent *pe, void *psource,
                 readl(mmio + PDC_GENERAL_CTLR);
                 writel(((idx) << page_mask), mmio + PDC_DIMM_WINDOW_CTLR);
                 readl(mmio + PDC_DIMM_WINDOW_CTLR);
-               memcpy_fromio((char *) psource, (char *) (dimm_mmio), 
+               memcpy_fromio((char *) psource, (char *) (dimm_mmio),
                               size / 4);
         }
  }
  #endif
  
  
-static void pdc20621_put_to_dimm(struct ata_probe_ent *pe, void *psource, 
+static void pdc20621_put_to_dimm(struct ata_probe_ent *pe, void *psource,
                                  u32 offset, u32 size)
  {
         u32 window_size;
@@ -989,16 +1000,16 @@ static void pdc20621_put_to_dimm(struct ata_probe_ent *pe, void *psource,
         struct pdc_host_priv *hpriv = pe->private_data;
         void *dimm_mmio = hpriv->dimm_mmio;
  
-       /* hard-code chip #0 */   
+       /* hard-code chip #0 */
         mmio += PDC_CHIP0_OFS;
  
-       page_mask = 0x00;       
-       window_size = 0x2000 * 4;       /* 32K byte uchar size */  
+       page_mask = 0x00;
+       window_size = 0x2000 * 4;       /* 32K byte uchar size */
         idx = (u16) (offset / window_size);
  
         writel(((idx) << page_mask), mmio + PDC_DIMM_WINDOW_CTLR);
         readl(mmio + PDC_DIMM_WINDOW_CTLR);
-       offset -= (idx * window_size); 
+       offset -= (idx * window_size);
         idx++;
         dist = ((long)(s32)(window_size - (offset + size))) >= 0 ? size :
                 (long) (window_size - offset);
@@ -1006,12 +1017,12 @@ static void pdc20621_put_to_dimm(struct ata_probe_ent *pe, void *psource,
         writel(0x01, mmio + PDC_GENERAL_CTLR);
         readl(mmio + PDC_GENERAL_CTLR);
  
-       psource += dist;    
+       psource += dist;
         size -= dist;
         for (; (long) size >= (long) window_size ;) {
                 writel(((idx) << page_mask), mmio + PDC_DIMM_WINDOW_CTLR);
                 readl(mmio + PDC_DIMM_WINDOW_CTLR);
-               memcpy_toio((char *) (dimm_mmio), (char *) psource, 
+               memcpy_toio((char *) (dimm_mmio), (char *) psource,
                             window_size / 4);
                 writel(0x01, mmio + PDC_GENERAL_CTLR);
                 readl(mmio + PDC_GENERAL_CTLR);
@@ -1019,7 +1030,7 @@ static void pdc20621_put_to_dimm(struct ata_probe_ent *pe, void *psource,
                 size -= window_size;
                 idx ++;
         }
-    
+
         if (size) {
                 writel(((idx) << page_mask), mmio + PDC_DIMM_WINDOW_CTLR);
                 readl(mmio + PDC_DIMM_WINDOW_CTLR);
@@ -1030,12 +1041,12 @@ static void pdc20621_put_to_dimm(struct ata_probe_ent *pe, void *psource,
  }
  
  
-static unsigned int pdc20621_i2c_read(struct ata_probe_ent *pe, u32 device, 
+static unsigned int pdc20621_i2c_read(struct ata_probe_ent *pe, u32 device,
                                       u32 subaddr, u32 *pdata)
  {
         void *mmio = pe->mmio_base;
         u32 i2creg  = 0;
-       u32 status;     
+       u32 status;
         u32 count =0;
  
         /* hard-code chip #0 */
@@ -1049,7 +1060,7 @@ static unsigned int pdc20621_i2c_read(struct ata_probe_ent *pe, u32 device,
         readl(mmio + PDC_I2C_ADDR_DATA_OFFSET);
  
         /* Write Control to perform read operation, mask int */
-       writel(PDC_I2C_READ | PDC_I2C_START | PDC_I2C_MASK_INT, 
+       writel(PDC_I2C_READ | PDC_I2C_START | PDC_I2C_MASK_INT,
                mmio + PDC_I2C_CONTROL_OFFSET);
  
         for (count = 0; count <= 1000; count ++) {
@@ -1062,26 +1073,26 @@ static unsigned int pdc20621_i2c_read(struct ata_probe_ent *pe, u32 device,
         }
  
         *pdata = (status >> 8) & 0x000000ff;
-       return 1;           
+       return 1;
  }
  
  
  static int pdc20621_detect_dimm(struct ata_probe_ent *pe)
  {
         u32 data=0 ;
-       if (pdc20621_i2c_read(pe, PDC_DIMM0_SPD_DEV_ADDRESS, 
+       if (pdc20621_i2c_read(pe, PDC_DIMM0_SPD_DEV_ADDRESS,
                              PDC_DIMM_SPD_SYSTEM_FREQ, &data)) {
                 if (data == 100)
                         return 100;
         } else
                 return 0;
-       
+
         if (pdc20621_i2c_read(pe, PDC_DIMM0_SPD_DEV_ADDRESS, 9, &data)) {
-               if(data <= 0x75) 
+               if(data <= 0x75)
                         return 133;
         } else
                 return 0;
-       
+
         return 0;
  }
  
@@ -1091,15 +1102,15 @@ static int pdc20621_prog_dimm0(struct ata_probe_ent *pe)
         u32 spd0[50];
         u32 data = 0;
         int size, i;
-       u8 bdimmsize; 
+       u8 bdimmsize;
         void *mmio = pe->mmio_base;
         static const struct {
                 unsigned int reg;
                 unsigned int ofs;
         } pdc_i2c_read_data [] = {
-               { PDC_DIMM_SPD_TYPE, 11 },              
+               { PDC_DIMM_SPD_TYPE, 11 },
                 { PDC_DIMM_SPD_FRESH_RATE, 12 },
-               { PDC_DIMM_SPD_COLUMN_NUM, 4 }, 
+               { PDC_DIMM_SPD_COLUMN_NUM, 4 },
                 { PDC_DIMM_SPD_ATTRIBUTE, 21 },
                 { PDC_DIMM_SPD_ROW_NUM, 3 },
                 { PDC_DIMM_SPD_BANK_NUM, 17 },
@@ -1108,7 +1119,7 @@ static int pdc20621_prog_dimm0(struct ata_probe_ent *pe)
                 { PDC_DIMM_SPD_ROW_ACTIVE_DELAY, 28 },
                 { PDC_DIMM_SPD_RAS_CAS_DELAY, 29 },
                 { PDC_DIMM_SPD_ACTIVE_PRECHARGE, 30 },
-               { PDC_DIMM_SPD_CAS_LATENCY, 18 },       
+               { PDC_DIMM_SPD_CAS_LATENCY, 18 },
         };
  
         /* hard-code chip #0 */
@@ -1116,17 +1127,17 @@ static int pdc20621_prog_dimm0(struct ata_probe_ent *pe)
  
         for(i=0; i<ARRAY_SIZE(pdc_i2c_read_data); i++)
                 pdc20621_i2c_read(pe, PDC_DIMM0_SPD_DEV_ADDRESS,
-                                 pdc_i2c_read_data[i].reg, 
+                                 pdc_i2c_read_data[i].reg,
                                   &spd0[pdc_i2c_read_data[i].ofs]);
-  
+
         data |= (spd0[4] - 8) | ((spd0[21] != 0) << 3) | ((spd0[3]-11) << 4);
-       data |= ((spd0[17] / 4) << 6) | ((spd0[5] / 2) << 7) | 
+       data |= ((spd0[17] / 4) << 6) | ((spd0[5] / 2) << 7) |
                 ((((spd0[27] + 9) / 10) - 1) << 8) ;
-       data |= (((((spd0[29] > spd0[28]) 
-                   ? spd0[29] : spd0[28]) + 9) / 10) - 1) << 10; 
+       data |= (((((spd0[29] > spd0[28])
+                   ? spd0[29] : spd0[28]) + 9) / 10) - 1) << 10;
         data |= ((spd0[30] - spd0[29] + 9) / 10 - 2) << 12;
-   
-       if (spd0[18] & 0x08) 
+
+       if (spd0[18] & 0x08)
                 data |= ((0x03) << 14);
         else if (spd0[18] & 0x04)
                 data |= ((0x02) << 14);
@@ -1135,7 +1146,7 @@ static int pdc20621_prog_dimm0(struct ata_probe_ent *pe)
         else
                 data |= (0 << 14);
  
-       /* 
+       /*
            Calculate the size of bDIMMSize (power of 2) and
            merge the DIMM size by program start/end address.
         */
@@ -1145,9 +1156,9 @@ static int pdc20621_prog_dimm0(struct ata_probe_ent *pe)
         data |= (((size / 16) - 1) << 16);
         data |= (0 << 23);
         data |= 8;
-       writel(data, mmio + PDC_DIMM0_CONTROL_OFFSET); 
+       writel(data, mmio + PDC_DIMM0_CONTROL_OFFSET);
         readl(mmio + PDC_DIMM0_CONTROL_OFFSET);
-       return size;                          
+       return size;
  }
  
  
@@ -1167,12 +1178,12 @@ static unsigned int pdc20621_prog_dimm_global(struct ata_probe_ent *pe)
           Refresh Enable (bit 17)
         */
  
-       data = 0x022259F1;   
+       data = 0x022259F1;
         writel(data, mmio + PDC_SDRAM_CONTROL_OFFSET);
         readl(mmio + PDC_SDRAM_CONTROL_OFFSET);
  
         /* Turn on for ECC */
-       pdc20621_i2c_read(pe, PDC_DIMM0_SPD_DEV_ADDRESS, 
+       pdc20621_i2c_read(pe, PDC_DIMM0_SPD_DEV_ADDRESS,
                           PDC_DIMM_SPD_TYPE, &spd0);
         if (spd0 == 0x02) {
                 data |= (0x01 << 16);
@@ -1186,22 +1197,22 @@ static unsigned int pdc20621_prog_dimm_global(struct ata_probe_ent *pe)
         data |= (1<<19);
         writel(data, mmio + PDC_SDRAM_CONTROL_OFFSET);
  
-       error = 1;                     
+       error = 1;
         for (i = 1; i <= 10; i++) {   /* polling ~5 secs */
                 data = readl(mmio + PDC_SDRAM_CONTROL_OFFSET);
                 if (!(data & (1<<19))) {
                         error = 0;
-                       break;     
+                       break;
                 }
                 msleep(i*100);
         }
         return error;
  }
-       
+
  
  static unsigned int pdc20621_dimm_init(struct ata_probe_ent *pe)
  {
-       int speed, size, length; 
+       int speed, size, length;
         u32 addr,spd0,pci_status;
         u32 tmp=0;
         u32 time_period=0;
@@ -1228,7 +1239,7 @@ static unsigned int pdc20621_dimm_init(struct ata_probe_ent *pe)
         /* Wait 3 seconds */
         msleep(3000);
  
-       /* 
+       /*
            When timer is enabled, counter is decreased every internal
            clock cycle.
         */
@@ -1236,24 +1247,24 @@ static unsigned int pdc20621_dimm_init(struct ata_probe_ent *pe)
         tcount = readl(mmio + PDC_TIME_COUNTER);
         VPRINTK("Time Counter Register (0x44): 0x%x\n", tcount);
  
-       /* 
+       /*
            If SX4 is on PCI-X bus, after 3 seconds, the timer counter
            register should be >= (0xffffffff - 3x10^8).
         */
         if(tcount >= PCI_X_TCOUNT) {
                 ticks = (time_period - tcount);
                 VPRINTK("Num counters 0x%x (%d)\n", ticks, ticks);
-       
+
                 clock = (ticks / 300000);
                 VPRINTK("10 * Internal clk = 0x%x (%d)\n", clock, clock);
-               
+
                 clock = (clock * 33);
                 VPRINTK("10 * Internal clk * 33 = 0x%x (%d)\n", clock, clock);
  
                 /* PLL F Param (bit 22:16) */
                 fparam = (1400000 / clock) - 2;
                 VPRINTK("PLL F Param: 0x%x (%d)\n", fparam, fparam);
-               
+
                 /* OD param = 0x2 (bit 31:30), R param = 0x5 (bit 29:25) */
                 pci_status = (0x8a001824 | (fparam << 16));
         } else
@@ -1264,21 +1275,21 @@ static unsigned int pdc20621_dimm_init(struct ata_probe_ent *pe)
         writel(pci_status, mmio + PDC_CTL_STATUS);
         readl(mmio + PDC_CTL_STATUS);
  
-       /* 
+       /*
            Read SPD of DIMM by I2C interface,
            and program the DIMM Module Controller.
         */
         if (!(speed = pdc20621_detect_dimm(pe))) {
-               printk(KERN_ERR "Detect Local DIMM Fail\n");  
+               printk(KERN_ERR "Detect Local DIMM Fail\n");
                 return 1;       /* DIMM error */
         }
         VPRINTK("Local DIMM Speed = %d\n", speed);
  
-       /* Programming DIMM0 Module Control Register (index_CID0:80h) */ 
+       /* Programming DIMM0 Module Control Register (index_CID0:80h) */
         size = pdc20621_prog_dimm0(pe);
         VPRINTK("Local DIMM Size = %dMB\n",size);
  
-       /* Programming DIMM Module Global Control Register (index_CID0:88h) */ 
+       /* Programming DIMM Module Global Control Register (index_CID0:88h) */
         if (pdc20621_prog_dimm_global(pe)) {
                 printk(KERN_ERR "Programming DIMM Module Global Control Register Fail\n");
                 return 1;
@@ -1297,30 +1308,30 @@ static unsigned int pdc20621_dimm_init(struct ata_probe_ent *pe)
  
                 pdc20621_put_to_dimm(pe, (void *) test_parttern1, 0x10040, 40);
                 pdc20621_get_from_dimm(pe, (void *) test_parttern2, 0x40, 40);
-               printk(KERN_ERR "%x, %x, %s\n", test_parttern2[0], 
+               printk(KERN_ERR "%x, %x, %s\n", test_parttern2[0],
                        test_parttern2[1], &(test_parttern2[2]));
-               pdc20621_get_from_dimm(pe, (void *) test_parttern2, 0x10040, 
+               pdc20621_get_from_dimm(pe, (void *) test_parttern2, 0x10040,
                                        40);
-               printk(KERN_ERR "%x, %x, %s\n", test_parttern2[0], 
+               printk(KERN_ERR "%x, %x, %s\n", test_parttern2[0],
                        test_parttern2[1], &(test_parttern2[2]));
  
                 pdc20621_put_to_dimm(pe, (void *) test_parttern1, 0x40, 40);
                 pdc20621_get_from_dimm(pe, (void *) test_parttern2, 0x40, 40);
-               printk(KERN_ERR "%x, %x, %s\n", test_parttern2[0], 
+               printk(KERN_ERR "%x, %x, %s\n", test_parttern2[0],
                        test_parttern2[1], &(test_parttern2[2]));
         }
  #endif
  
         /* ECC initiliazation. */
  
-       pdc20621_i2c_read(pe, PDC_DIMM0_SPD_DEV_ADDRESS, 
+       pdc20621_i2c_read(pe, PDC_DIMM0_SPD_DEV_ADDRESS,
                           PDC_DIMM_SPD_TYPE, &spd0);
         if (spd0 == 0x02) {
                 VPRINTK("Start ECC initialization\n");
                 addr = 0;
                 length = size * 1024 * 1024;
                 while (addr < length) {
-                       pdc20621_put_to_dimm(pe, (void *) &tmp, addr, 
+                       pdc20621_put_to_dimm(pe, (void *) &tmp, addr,
                                              sizeof(u32));
                         addr += sizeof(u32);
                 }
diff --git a/drivers/scsi/sata_uli.c b/drivers/scsi/sata_uli.c

index a71fb54eebd301e07240310964a323b240db670d..1566886815fb50465522c27e056064b680c003e7 100644 (file)
--- a/drivers/scsi/sata_uli.c
+++ b/drivers/scsi/sata_uli.c
@@ -1,21 +1,26 @@
  /*
   *  sata_uli.c - ULi Electronics SATA
   *
- *  The contents of this file are subject to the Open
- *  Software License version 1.1 that can be found at
- *  http://www.opensource.org/licenses/osl-1.1.txt and is included herein
- *  by reference.
   *
- *  Alternatively, the contents of this file may be used under the terms
- *  of the GNU General Public License version 2 (the "GPL") as distributed
- *  in the kernel source COPYING file, in which case the provisions of
- *  the GPL are applicable instead of the above.  If you wish to allow
- *  the use of your version of this file only under the terms of the
- *  GPL and not to allow others to use your version of this file under
- *  the OSL, indicate your decision by deleting the provisions above and
- *  replace them with the notice and other provisions required by the GPL.
- *  If you do not delete the provisions above, a recipient may use your
- *  version of this file under either the OSL or the GPL.
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2, or (at your option)
+ *  any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; see the file COPYING.  If not, write to
+ *  the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
+ *
+ *
+ *  libata documentation is available via 'make {ps|pdf}docs',
+ *  as Documentation/DocBook/libata.*
+ *
+ *  Hardware documentation available under NDA.
   *
   */
  
@@ -214,7 +219,7 @@ static int uli_init_one (struct pci_dev *pdev, const struct pci_device_id *ent)
                 rc = -ENOMEM;
                 goto err_out_regions;
         }
-       
+
         switch (board_idx) {
         case uli_5287:
                 probe_ent->port[0].scr_addr = ULI5287_BASE;
diff --git a/drivers/scsi/sata_via.c b/drivers/scsi/sata_via.c

index f43183c19a12184ebd6902eadf154bc836a7e249..128b996b07b70167e3c928a62c035249cbc2e34f 100644 (file)
--- a/drivers/scsi/sata_via.c
+++ b/drivers/scsi/sata_via.c
@@ -1,34 +1,38 @@
  /*
-   sata_via.c - VIA Serial ATA controllers
-
-   Maintained by:  Jeff Garzik <jgarzik@pobox.com>
-                  Please ALWAYS copy linux-ide@vger.kernel.org
+ *  sata_via.c - VIA Serial ATA controllers
+ *
+ *  Maintained by:  Jeff Garzik <jgarzik@pobox.com>
+ *                Please ALWAYS copy linux-ide@vger.kernel.org
                    on emails.
-
-   Copyright 2003-2004 Red Hat, Inc.  All rights reserved.
-   Copyright 2003-2004 Jeff Garzik
-
-   The contents of this file are subject to the Open
-   Software License version 1.1 that can be found at
-   http://www.opensource.org/licenses/osl-1.1.txt and is included herein
-   by reference.
-
-   Alternatively, the contents of this file may be used under the terms
-   of the GNU General Public License version 2 (the "GPL") as distributed
-   in the kernel source COPYING file, in which case the provisions of
-   the GPL are applicable instead of the above.  If you wish to allow
-   the use of your version of this file only under the terms of the
-   GPL and not to allow others to use your version of this file under
-   the OSL, indicate your decision by deleting the provisions above and
-   replace them with the notice and other provisions required by the GPL.
-   If you do not delete the provisions above, a recipient may use your
-   version of this file under either the OSL or the GPL.
-
-   ----------------------------------------------------------------------
-
-   To-do list:
-   * VT6421 PATA support
-
+ *
+ *  Copyright 2003-2004 Red Hat, Inc.  All rights reserved.
+ *  Copyright 2003-2004 Jeff Garzik
+ *
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2, or (at your option)
+ *  any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; see the file COPYING.  If not, write to
+ *  the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
+ *
+ *
+ *  libata documentation is available via 'make {ps|pdf}docs',
+ *  as Documentation/DocBook/libata.*
+ *
+ *  Hardware documentation available under NDA.
+ *
+ *
+ *  To-do list:
+ *  - VT6421 PATA support
+ *
   */
  
  #include <linux/kernel.h>
@@ -347,7 +351,7 @@ static int svia_init_one (struct pci_dev *pdev, const struct pci_device_id *ent)
                 probe_ent = vt6420_init_probe_ent(pdev);
         else
                 probe_ent = vt6421_init_probe_ent(pdev);
-       
+
         if (!probe_ent) {
                 printk(KERN_ERR DRV_NAME "(%s): out of memory\n",
                        pci_name(pdev));
diff --git a/drivers/scsi/sata_vsc.c b/drivers/scsi/sata_vsc.c

index c5e09dc6f3de67b3250250d7f064ff873d679bb6..3985f344da4d84061dddeff72800f91502bce41c 100644 (file)
--- a/drivers/scsi/sata_vsc.c
+++ b/drivers/scsi/sata_vsc.c
@@ -9,9 +9,29 @@
   *
   *  Bits from Jeff Garzik, Copyright RedHat, Inc.
   *
- *  This file is subject to the terms and conditions of the GNU General Public
- *  License.  See the file "COPYING" in the main directory of this archive
- *  for more details.
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2, or (at your option)
+ *  any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; see the file COPYING.  If not, write to
+ *  the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
+ *
+ *
+ *  libata documentation is available via 'make {ps|pdf}docs',
+ *  as Documentation/DocBook/libata.*
+ *
+ *  Vitesse hardware documentation presumably available under NDA.
+ *  Intel 31244 (same hardware interface) documentation presumably
+ *  available from http://developer.intel.com/
+ *
   */
  
  #include <linux/kernel.h>
@@ -173,7 +193,8 @@ static irqreturn_t vsc_sata_interrupt (int irq, void *dev_instance,
                         struct ata_port *ap;
  
                         ap = host_set->ports[i];
-                       if (ap && (!(ap->flags & ATA_FLAG_PORT_DISABLED))) {
+                       if (ap && !(ap->flags &
+                                   (ATA_FLAG_PORT_DISABLED|ATA_FLAG_NOINTR))) {
                                 struct ata_queued_cmd *qc;
  
                                 qc = ata_qc_from_tag(ap, ap->active_tag);
@@ -342,7 +363,7 @@ static int __devinit vsc_sata_init_one (struct pci_dev *pdev, const struct pci_d
  
         pci_set_master(pdev);
  
-       /* 
+       /*
          * Config offset 0x98 is "Extended Control and Status Register 0"
          * Default value is (1 << 28).  All bits except bit 28 are reserved in
          * DPA mode.  If bit 28 is set, LED 0 reflects all ports' activity.
diff --git a/drivers/scsi/scsi_scan.c b/drivers/scsi/scsi_scan.c

index 2d3c4ac475f233d5cd723331dc117883fb28021d..48edd67982a5f85e8b6f5f2b842240fb5fbfc0d2 100644 (file)
--- a/drivers/scsi/scsi_scan.c
+++ b/drivers/scsi/scsi_scan.c
@@ -336,9 +336,23 @@ static struct scsi_target *scsi_alloc_target(struct device *parent,
         unsigned long flags;
         const int size = sizeof(struct scsi_target)
                 + shost->transportt->target_size;
-       struct scsi_target *starget = kmalloc(size, GFP_ATOMIC);
+       struct scsi_target *starget;
         struct scsi_target *found_target;
  
+       /*
+        * Obtain the real parent from the transport. The transport
+        * is allowed to fail (no error) if there is nothing at that
+        * target id.
+        */
+       if (shost->transportt->target_parent) {
+               spin_lock_irqsave(shost->host_lock, flags);
+               parent = shost->transportt->target_parent(shost, channel, id);
+               spin_unlock_irqrestore(shost->host_lock, flags);
+               if (!parent)
+                       return NULL;
+       }
+
+       starget = kmalloc(size, GFP_KERNEL);
         if (!starget) {
                 printk(KERN_ERR "%s: allocation failure\n", __FUNCTION__);
                 return NULL;
diff --git a/drivers/scsi/scsi_transport_fc.c b/drivers/scsi/scsi_transport_fc.c

index 35d1c1e8e345dd67efcb2afafdcee616b0c23f3d..e6412fce423ce8fa5b2ebfd432f79c46f29c8d64 100644 (file)
--- a/drivers/scsi/scsi_transport_fc.c
+++ b/drivers/scsi/scsi_transport_fc.c
@@ -1022,6 +1022,23 @@ static int fc_rport_match(struct attribute_container *cont,
         return &i->rport_attr_cont.ac == cont;
  }
  
+
+/*
+ * Must be called with shost->host_lock held
+ */
+static struct device *fc_target_parent(struct Scsi_Host *shost,
+                                       int channel, uint id)
+{
+       struct fc_rport *rport;
+
+       list_for_each_entry(rport, &fc_host_rports(shost), peers)
+               if ((rport->channel == channel) &&
+                   (rport->scsi_target_id == id))
+                       return &rport->dev;
+
+       return NULL;
+}
+
  struct scsi_transport_template *
  fc_attach_transport(struct fc_function_template *ft)
  {
@@ -1057,6 +1074,8 @@ fc_attach_transport(struct fc_function_template *ft)
  
         /* Transport uses the shost workq for scsi scanning */
         i->t.create_work_queue = 1;
+
+       i->t.target_parent = fc_target_parent;
         
         /*
          * Setup SCSI Target Attributes.
diff --git a/drivers/scsi/sg.c b/drivers/scsi/sg.c

index 51292f269ce52c0fb034d6e77485b532adcb217b..e822ca0e97cf441ca7ea407266dec3a8aee132f9 100644 (file)
--- a/drivers/scsi/sg.c
+++ b/drivers/scsi/sg.c
@@ -2971,23 +2971,22 @@ static void * dev_seq_start(struct seq_file *s, loff_t *pos)
  {
         struct sg_proc_deviter * it = kmalloc(sizeof(*it), GFP_KERNEL);
  
+       s->private = it;
         if (! it)
                 return NULL;
+
         if (NULL == sg_dev_arr)
-               goto err1;
+               return NULL;
         it->index = *pos;
         it->max = sg_last_dev();
         if (it->index >= it->max)
-               goto err1;
+               return NULL;
         return it;
-err1:
-       kfree(it);
-       return NULL;
  }
  
  static void * dev_seq_next(struct seq_file *s, void *v, loff_t *pos)
  {
-       struct sg_proc_deviter * it = (struct sg_proc_deviter *) v;
+       struct sg_proc_deviter * it = s->private;
  
         *pos = ++it->index;
         return (it->index < it->max) ? it : NULL;
@@ -2995,7 +2994,7 @@ static void * dev_seq_next(struct seq_file *s, void *v, loff_t *pos)
  
  static void dev_seq_stop(struct seq_file *s, void *v)
  {
-       kfree (v);
+       kfree(s->private);
  }
  
  static int sg_proc_open_dev(struct inode *inode, struct file *file)
diff --git a/drivers/scsi/st.c b/drivers/scsi/st.c

index 0291a8fb654d51993c2313cf2778624eea20d53c..0a7839db57529c700e71dd1699eb45f273d44ccb 100644 (file)
--- a/drivers/scsi/st.c
+++ b/drivers/scsi/st.c
@@ -4149,12 +4149,10 @@ static int __init init_st(void)
                         do_create_driverfs_files();
                         return 0;
                 }
-               if (st_sysfs_class)
-                       class_destroy(st_sysfs_class);
                 unregister_chrdev_region(MKDEV(SCSI_TAPE_MAJOR, 0),
-
                                          ST_MAX_TAPE_ENTRIES);
         }
+       class_destroy(st_sysfs_class);
  
         printk(KERN_ERR "Unable to get major %d for SCSI tapes\n", SCSI_TAPE_MAJOR);
         return 1;
@@ -4162,13 +4160,11 @@ static int __init init_st(void)
  
  static void __exit exit_st(void)
  {
-       if (st_sysfs_class)
-               class_destroy(st_sysfs_class);
-       st_sysfs_class = NULL;
         do_remove_driverfs_files();
         scsi_unregister_driver(&st_template.gendrv);
         unregister_chrdev_region(MKDEV(SCSI_TAPE_MAJOR, 0),
                                  ST_MAX_TAPE_ENTRIES);
+       class_destroy(st_sysfs_class);
         kfree(scsi_tapes);
         printk(KERN_INFO "st: Unloaded.\n");
  }
diff --git a/drivers/serial/8250_pci.c b/drivers/serial/8250_pci.c

index 07f05e9d0955eb12ae58705a09ffa4d20b05531d..0e21f583690ebdeffa57f316bd8fba2c67da8e71 100644 (file)
--- a/drivers/serial/8250_pci.c
+++ b/drivers/serial/8250_pci.c
@@ -33,36 +33,6 @@
  
  #undef SERIAL_DEBUG_PCI
  
-/*
- * Definitions for PCI support.
- */
-#define FL_BASE_MASK           0x0007
-#define FL_BASE0               0x0000
-#define FL_BASE1               0x0001
-#define FL_BASE2               0x0002
-#define FL_BASE3               0x0003
-#define FL_BASE4               0x0004
-#define FL_GET_BASE(x)         (x & FL_BASE_MASK)
-
-/* Use successive BARs (PCI base address registers),
-   else use offset into some specified BAR */
-#define FL_BASE_BARS           0x0008
-
-/* do not assign an irq */
-#define FL_NOIRQ               0x0080
-
-/* Use the Base address register size to cap number of ports */
-#define FL_REGION_SZ_CAP       0x0100
-
-struct pci_board {
-       unsigned int flags;
-       unsigned int num_ports;
-       unsigned int base_baud;
-       unsigned int uart_offset;
-       unsigned int reg_shift;
-       unsigned int first_offset;
-};
-
  /*
   * init function returns:
   *  > 0 - number of ports
@@ -75,14 +45,15 @@ struct pci_serial_quirk {
         u32     subvendor;
         u32     subdevice;
         int     (*init)(struct pci_dev *dev);
-       int     (*setup)(struct pci_dev *dev, struct pci_board *board,
-                        struct uart_port *port, int idx);
+       int     (*setup)(struct serial_private *, struct pciserial_board *,
+                        struct uart_port *, int);
         void    (*exit)(struct pci_dev *dev);
  };
  
  #define PCI_NUM_BAR_RESOURCES  6
  
  struct serial_private {
+       struct pci_dev          *dev;
         unsigned int            nr;
         void __iomem            *remapped_bar[PCI_NUM_BAR_RESOURCES];
         struct pci_serial_quirk *quirk;
@@ -101,17 +72,18 @@ static void moan_device(const char *str, struct pci_dev *dev)
  }
  
  static int
-setup_port(struct pci_dev *dev, struct uart_port *port,
+setup_port(struct serial_private *priv, struct uart_port *port,
            int bar, int offset, int regshift)
  {
-       struct serial_private *priv = pci_get_drvdata(dev);
+       struct pci_dev *dev = priv->dev;
         unsigned long base, len;
  
         if (bar >= PCI_NUM_BAR_RESOURCES)
                 return -EINVAL;
  
+       base = pci_resource_start(dev, bar);
+
         if (pci_resource_flags(dev, bar) & IORESOURCE_MEM) {
-               base = pci_resource_start(dev, bar);
                 len =  pci_resource_len(dev, bar);
  
                 if (!priv->remapped_bar[bar])
@@ -120,13 +92,16 @@ setup_port(struct pci_dev *dev, struct uart_port *port,
                         return -ENOMEM;
  
                 port->iotype = UPIO_MEM;
+               port->iobase = 0;
                 port->mapbase = base + offset;
                 port->membase = priv->remapped_bar[bar] + offset;
                 port->regshift = regshift;
         } else {
-               base = pci_resource_start(dev, bar) + offset;
                 port->iotype = UPIO_PORT;
-               port->iobase = base;
+               port->iobase = base + offset;
+               port->mapbase = 0;
+               port->membase = NULL;
+               port->regshift = 0;
         }
         return 0;
  }
@@ -136,7 +111,7 @@ setup_port(struct pci_dev *dev, struct uart_port *port,
   * Not that ugly ;) -- HW
   */
  static int
-afavlab_setup(struct pci_dev *dev, struct pci_board *board,
+afavlab_setup(struct serial_private *priv, struct pciserial_board *board,
               struct uart_port *port, int idx)
  {
         unsigned int bar, offset = board->first_offset;
@@ -149,7 +124,7 @@ afavlab_setup(struct pci_dev *dev, struct pci_board *board,
                 offset += (idx - 4) * board->uart_offset;
         }
  
-       return setup_port(dev, port, bar, offset, board->reg_shift);
+       return setup_port(priv, port, bar, offset, board->reg_shift);
  }
  
  /*
@@ -189,13 +164,13 @@ static int __devinit pci_hp_diva_init(struct pci_dev *dev)
   * some serial ports are supposed to be hidden on certain models.
   */
  static int
-pci_hp_diva_setup(struct pci_dev *dev, struct pci_board *board,
+pci_hp_diva_setup(struct serial_private *priv, struct pciserial_board *board,
               struct uart_port *port, int idx)
  {
         unsigned int offset = board->first_offset;
         unsigned int bar = FL_GET_BASE(board->flags);
  
-       switch (dev->subsystem_device) {
+       switch (priv->dev->subsystem_device) {
         case PCI_DEVICE_ID_HP_DIVA_MAESTRO:
                 if (idx == 3)
                         idx++;
@@ -212,7 +187,7 @@ pci_hp_diva_setup(struct pci_dev *dev, struct pci_board *board,
  
         offset += idx * board->uart_offset;
  
-       return setup_port(dev, port, bar, offset, board->reg_shift);
+       return setup_port(priv, port, bar, offset, board->reg_shift);
  }
  
  /*
@@ -307,7 +282,7 @@ static void __devexit pci_plx9050_exit(struct pci_dev *dev)
  
  /* SBS Technologies Inc. PMC-OCTPRO and P-OCTAL cards */
  static int
-sbs_setup(struct pci_dev *dev, struct pci_board *board,
+sbs_setup(struct serial_private *priv, struct pciserial_board *board,
                 struct uart_port *port, int idx)
  {
         unsigned int bar, offset = board->first_offset;
@@ -323,7 +298,7 @@ sbs_setup(struct pci_dev *dev, struct pci_board *board,
         } else /* we have only 8 ports on PMC-OCTALPRO */
                 return 1;
  
-       return setup_port(dev, port, bar, offset, board->reg_shift);
+       return setup_port(priv, port, bar, offset, board->reg_shift);
  }
  
  /*
@@ -389,6 +364,9 @@ static void __devexit sbs_exit(struct pci_dev *dev)
   *     - 10x cards have control registers in IO and/or memory space;
   *     - 20x cards have control registers in standard PCI configuration space.
   *
+ * Note: all 10x cards have PCI device ids 0x10..
+ *       all 20x cards have PCI device ids 0x20..
+ *
   * There are also Quartet Serial cards which use Oxford Semiconductor
   * 16954 quad UART PCI chip clocked by 18.432 MHz quartz.
   *
@@ -445,24 +423,18 @@ static int pci_siig20x_init(struct pci_dev *dev)
         return 0;
  }
  
-int pci_siig10x_fn(struct pci_dev *dev, int enable)
+static int pci_siig_init(struct pci_dev *dev)
  {
-       int ret = 0;
-       if (enable)
-               ret = pci_siig10x_init(dev);
-       return ret;
-}
+       unsigned int type = dev->device & 0xff00;
  
-int pci_siig20x_fn(struct pci_dev *dev, int enable)
-{
-       int ret = 0;
-       if (enable)
-               ret = pci_siig20x_init(dev);
-       return ret;
-}
+       if (type == 0x1000)
+               return pci_siig10x_init(dev);
+       else if (type == 0x2000)
+               return pci_siig20x_init(dev);
  
-EXPORT_SYMBOL(pci_siig10x_fn);
-EXPORT_SYMBOL(pci_siig20x_fn);
+       moan_device("Unknown SIIG card", dev);
+       return -ENODEV;
+}
  
  /*
   * Timedia has an explosion of boards, and to avoid the PCI table from
@@ -523,7 +495,7 @@ static int __devinit pci_timedia_init(struct pci_dev *dev)
   * Ugh, this is ugly as all hell --- TYT
   */
  static int
-pci_timedia_setup(struct pci_dev *dev, struct pci_board *board,
+pci_timedia_setup(struct serial_private *priv, struct pciserial_board *board,
                   struct uart_port *port, int idx)
  {
         unsigned int bar = 0, offset = board->first_offset;
@@ -549,14 +521,15 @@ pci_timedia_setup(struct pci_dev *dev, struct pci_board *board,
                 bar = idx - 2;
         }
  
-       return setup_port(dev, port, bar, offset, board->reg_shift);
+       return setup_port(priv, port, bar, offset, board->reg_shift);
  }
  
  /*
   * Some Titan cards are also a little weird
   */
  static int
-titan_400l_800l_setup(struct pci_dev *dev, struct pci_board *board,
+titan_400l_800l_setup(struct serial_private *priv,
+                     struct pciserial_board *board,
                       struct uart_port *port, int idx)
  {
         unsigned int bar, offset = board->first_offset;
@@ -573,7 +546,7 @@ titan_400l_800l_setup(struct pci_dev *dev, struct pci_board *board,
                 offset = (idx - 2) * board->uart_offset;
         }
  
-       return setup_port(dev, port, bar, offset, board->reg_shift);
+       return setup_port(priv, port, bar, offset, board->reg_shift);
  }
  
  static int __devinit pci_xircom_init(struct pci_dev *dev)
@@ -593,7 +566,7 @@ static int __devinit pci_netmos_init(struct pci_dev *dev)
  }
  
  static int
-pci_default_setup(struct pci_dev *dev, struct pci_board *board,
+pci_default_setup(struct serial_private *priv, struct pciserial_board *board,
                   struct uart_port *port, int idx)
  {
         unsigned int bar, offset = board->first_offset, maxnr;
@@ -604,13 +577,13 @@ pci_default_setup(struct pci_dev *dev, struct pci_board *board,
         else
                 offset += idx * board->uart_offset;
  
-       maxnr = (pci_resource_len(dev, bar) - board->first_offset) /
+       maxnr = (pci_resource_len(priv->dev, bar) - board->first_offset) /
                 (8 << board->reg_shift);
  
         if (board->flags & FL_REGION_SZ_CAP && idx >= maxnr)
                 return 1;
                         
-       return setup_port(dev, port, bar, offset, board->reg_shift);
+       return setup_port(priv, port, bar, offset, board->reg_shift);
  }
  
  /* This should be in linux/pci_ids.h */
@@ -754,152 +727,15 @@ static struct pci_serial_quirk pci_serial_quirks[] = {
                 .setup          = sbs_setup,
                 .exit           = __devexit_p(sbs_exit),
         },
-
         /*
          * SIIG cards.
-        *  It is not clear whether these could be collapsed.
          */
         {
                 .vendor         = PCI_VENDOR_ID_SIIG,
-               .device         = PCI_DEVICE_ID_SIIG_1S_10x_550,
-               .subvendor      = PCI_ANY_ID,
-               .subdevice      = PCI_ANY_ID,
-               .init           = pci_siig10x_init,
-               .setup          = pci_default_setup,
-       },
-       {
-               .vendor         = PCI_VENDOR_ID_SIIG,
-               .device         = PCI_DEVICE_ID_SIIG_1S_10x_650,
-               .subvendor      = PCI_ANY_ID,
-               .subdevice      = PCI_ANY_ID,
-               .init           = pci_siig10x_init,
-               .setup          = pci_default_setup,
-       },
-       {
-               .vendor         = PCI_VENDOR_ID_SIIG,
-               .device         = PCI_DEVICE_ID_SIIG_1S_10x_850,
-               .subvendor      = PCI_ANY_ID,
-               .subdevice      = PCI_ANY_ID,
-               .init           = pci_siig10x_init,
-               .setup          = pci_default_setup,
-       },
-       {
-               .vendor         = PCI_VENDOR_ID_SIIG,
-               .device         = PCI_DEVICE_ID_SIIG_2S_10x_550,
-               .subvendor      = PCI_ANY_ID,
-               .subdevice      = PCI_ANY_ID,
-               .init           = pci_siig10x_init,
-               .setup          = pci_default_setup,
-       },
-       {
-               .vendor         = PCI_VENDOR_ID_SIIG,
-               .device         = PCI_DEVICE_ID_SIIG_2S_10x_650,
-               .subvendor      = PCI_ANY_ID,
-               .subdevice      = PCI_ANY_ID,
-               .init           = pci_siig10x_init,
-               .setup          = pci_default_setup,
-       },
-       {
-               .vendor         = PCI_VENDOR_ID_SIIG,
-               .device         = PCI_DEVICE_ID_SIIG_2S_10x_850,
-               .subvendor      = PCI_ANY_ID,
-               .subdevice      = PCI_ANY_ID,
-               .init           = pci_siig10x_init,
-               .setup          = pci_default_setup,
-       },
-       {
-               .vendor         = PCI_VENDOR_ID_SIIG,
-               .device         = PCI_DEVICE_ID_SIIG_4S_10x_550,
-               .subvendor      = PCI_ANY_ID,
-               .subdevice      = PCI_ANY_ID,
-               .init           = pci_siig10x_init,
-               .setup          = pci_default_setup,
-       },
-       {
-               .vendor         = PCI_VENDOR_ID_SIIG,
-               .device         = PCI_DEVICE_ID_SIIG_4S_10x_650,
-               .subvendor      = PCI_ANY_ID,
-               .subdevice      = PCI_ANY_ID,
-               .init           = pci_siig10x_init,
-               .setup          = pci_default_setup,
-       },
-       {
-               .vendor         = PCI_VENDOR_ID_SIIG,
-               .device         = PCI_DEVICE_ID_SIIG_4S_10x_850,
-               .subvendor      = PCI_ANY_ID,
-               .subdevice      = PCI_ANY_ID,
-               .init           = pci_siig10x_init,
-               .setup          = pci_default_setup,
-       },
-       {
-               .vendor         = PCI_VENDOR_ID_SIIG,
-               .device         = PCI_DEVICE_ID_SIIG_1S_20x_550,
-               .subvendor      = PCI_ANY_ID,
-               .subdevice      = PCI_ANY_ID,
-               .init           = pci_siig20x_init,
-               .setup          = pci_default_setup,
-       },
-       {
-               .vendor         = PCI_VENDOR_ID_SIIG,
-               .device         = PCI_DEVICE_ID_SIIG_1S_20x_650,
-               .subvendor      = PCI_ANY_ID,
-               .subdevice      = PCI_ANY_ID,
-               .init           = pci_siig20x_init,
-               .setup          = pci_default_setup,
-       },
-       {
-               .vendor         = PCI_VENDOR_ID_SIIG,
-               .device         = PCI_DEVICE_ID_SIIG_1S_20x_850,
-               .subvendor      = PCI_ANY_ID,
-               .subdevice      = PCI_ANY_ID,
-               .init           = pci_siig20x_init,
-               .setup          = pci_default_setup,
-       },
-       {
-               .vendor         = PCI_VENDOR_ID_SIIG,
-               .device         = PCI_DEVICE_ID_SIIG_2S_20x_550,
-               .subvendor      = PCI_ANY_ID,
-               .subdevice      = PCI_ANY_ID,
-               .init           = pci_siig20x_init,
-               .setup          = pci_default_setup,
-       },
-       {       .vendor         = PCI_VENDOR_ID_SIIG,
-               .device         = PCI_DEVICE_ID_SIIG_2S_20x_650,
-               .subvendor      = PCI_ANY_ID,
-               .subdevice      = PCI_ANY_ID,
-               .init           = pci_siig20x_init,
-               .setup          = pci_default_setup,
-       },
-       {
-               .vendor         = PCI_VENDOR_ID_SIIG,
-               .device         = PCI_DEVICE_ID_SIIG_2S_20x_850,
-               .subvendor      = PCI_ANY_ID,
-               .subdevice      = PCI_ANY_ID,
-               .init           = pci_siig20x_init,
-               .setup          = pci_default_setup,
-       },
-       {
-               .vendor         = PCI_VENDOR_ID_SIIG,
-               .device         = PCI_DEVICE_ID_SIIG_4S_20x_550,
-               .subvendor      = PCI_ANY_ID,
-               .subdevice      = PCI_ANY_ID,
-               .init           = pci_siig20x_init,
-               .setup          = pci_default_setup,
-       },
-       {
-               .vendor         = PCI_VENDOR_ID_SIIG,
-               .device         = PCI_DEVICE_ID_SIIG_4S_20x_650,
-               .subvendor      = PCI_ANY_ID,
-               .subdevice      = PCI_ANY_ID,
-               .init           = pci_siig20x_init,
-               .setup          = pci_default_setup,
-       },
-       {
-               .vendor         = PCI_VENDOR_ID_SIIG,
-               .device         = PCI_DEVICE_ID_SIIG_4S_20x_850,
+               .device         = PCI_ANY_ID,
                 .subvendor      = PCI_ANY_ID,
                 .subdevice      = PCI_ANY_ID,
-               .init           = pci_siig20x_init,
+               .init           = pci_siig_init,
                 .setup          = pci_default_setup,
         },
         /*
@@ -990,7 +826,7 @@ static struct pci_serial_quirk *find_quirk(struct pci_dev *dev)
  }
  
  static _INLINE_ int
-get_pci_irq(struct pci_dev *dev, struct pci_board *board, int idx)
+get_pci_irq(struct pci_dev *dev, struct pciserial_board *board)
  {
         if (board->flags & FL_NOIRQ)
                 return 0;
@@ -1115,7 +951,7 @@ enum pci_board_num_t {
   * see first lines of serial_in() and serial_out() in 8250.c
  */
  
-static struct pci_board pci_boards[] __devinitdata = {
+static struct pciserial_board pci_boards[] __devinitdata = {
         [pbn_default] = {
                 .flags          = FL_BASE0,
                 .num_ports      = 1,
@@ -1575,7 +1411,7 @@ static struct pci_board pci_boards[] __devinitdata = {
   * serial specs.  Returns 0 on success, 1 on failure.
   */
  static int __devinit
-serial_pci_guess_board(struct pci_dev *dev, struct pci_board *board)
+serial_pci_guess_board(struct pci_dev *dev, struct pciserial_board *board)
  {
         int num_iomem, num_port, first_port = -1, i;
         
@@ -1640,7 +1476,8 @@ serial_pci_guess_board(struct pci_dev *dev, struct pci_board *board)
  }
  
  static inline int
-serial_pci_matches(struct pci_board *board, struct pci_board *guessed)
+serial_pci_matches(struct pciserial_board *board,
+                  struct pciserial_board *guessed)
  {
         return
             board->num_ports == guessed->num_ports &&
@@ -1650,58 +1487,14 @@ serial_pci_matches(struct pci_board *board, struct pci_board *guessed)
             board->first_offset == guessed->first_offset;
  }
  
-/*
- * Probe one serial board.  Unfortunately, there is no rhyme nor reason
- * to the arrangement of serial ports on a PCI card.
- */
-static int __devinit
-pciserial_init_one(struct pci_dev *dev, const struct pci_device_id *ent)
+struct serial_private *
+pciserial_init_ports(struct pci_dev *dev, struct pciserial_board *board)
  {
+       struct uart_port serial_port;
         struct serial_private *priv;
-       struct pci_board *board, tmp;
         struct pci_serial_quirk *quirk;
         int rc, nr_ports, i;
  
-       if (ent->driver_data >= ARRAY_SIZE(pci_boards)) {
-               printk(KERN_ERR "pci_init_one: invalid driver_data: %ld\n",
-                       ent->driver_data);
-               return -EINVAL;
-       }
-
-       board = &pci_boards[ent->driver_data];
-
-       rc = pci_enable_device(dev);
-       if (rc)
-               return rc;
-
-       if (ent->driver_data == pbn_default) {
-               /*
-                * Use a copy of the pci_board entry for this;
-                * avoid changing entries in the table.
-                */
-               memcpy(&tmp, board, sizeof(struct pci_board));
-               board = &tmp;
-
-               /*
-                * We matched one of our class entries.  Try to
-                * determine the parameters of this board.
-                */
-               rc = serial_pci_guess_board(dev, board);
-               if (rc)
-                       goto disable;
-       } else {
-               /*
-                * We matched an explicit entry.  If we are able to
-                * detect this boards settings with our heuristic,
-                * then we no longer need this entry.
-                */
-               memcpy(&tmp, &pci_boards[pbn_default], sizeof(struct pci_board));
-               rc = serial_pci_guess_board(dev, &tmp);
-               if (rc == 0 && serial_pci_matches(board, &tmp))
-                       moan_device("Redundant entry in serial pci_table.",
-                                   dev);
-       }
-
         nr_ports = board->num_ports;
  
         /*
@@ -1718,8 +1511,10 @@ pciserial_init_one(struct pci_dev *dev, const struct pci_device_id *ent)
          */
         if (quirk->init) {
                 rc = quirk->init(dev);
-               if (rc < 0)
-                       goto disable;
+               if (rc < 0) {
+                       priv = ERR_PTR(rc);
+                       goto err_out;
+               }
                 if (rc)
                         nr_ports = rc;
         }
@@ -1728,27 +1523,26 @@ pciserial_init_one(struct pci_dev *dev, const struct pci_device_id *ent)
                        sizeof(unsigned int) * nr_ports,
                        GFP_KERNEL);
         if (!priv) {
-               rc = -ENOMEM;
-               goto deinit;
+               priv = ERR_PTR(-ENOMEM);
+               goto err_deinit;
         }
  
         memset(priv, 0, sizeof(struct serial_private) +
                         sizeof(unsigned int) * nr_ports);
  
+       priv->dev = dev;
         priv->quirk = quirk;
-       pci_set_drvdata(dev, priv);
+
+       memset(&serial_port, 0, sizeof(struct uart_port));
+       serial_port.flags = UPF_SKIP_TEST | UPF_BOOT_AUTOCONF | UPF_SHARE_IRQ;
+       serial_port.uartclk = board->base_baud * 16;
+       serial_port.irq = get_pci_irq(dev, board);
+       serial_port.dev = &dev->dev;
  
         for (i = 0; i < nr_ports; i++) {
-               struct uart_port serial_port;
-               memset(&serial_port, 0, sizeof(struct uart_port));
-
-               serial_port.flags = UPF_SKIP_TEST | UPF_BOOT_AUTOCONF |
-                                   UPF_SHARE_IRQ;
-               serial_port.uartclk = board->base_baud * 16;
-               serial_port.irq = get_pci_irq(dev, board, i);
-               serial_port.dev = &dev->dev;
-               if (quirk->setup(dev, board, &serial_port, i))
+               if (quirk->setup(priv, board, &serial_port, i))
                         break;
+
  #ifdef SERIAL_DEBUG_PCI
                 printk("Setup PCI port: port %x, irq %d, type %d\n",
                        serial_port.iobase, serial_port.irq, serial_port.iotype);
@@ -1763,24 +1557,21 @@ pciserial_init_one(struct pci_dev *dev, const struct pci_device_id *ent)
  
         priv->nr = i;
  
-       return 0;
+       return priv;
  
- deinit:
+ err_deinit:
         if (quirk->exit)
                 quirk->exit(dev);
- disable:
-       pci_disable_device(dev);
-       return rc;
+ err_out:
+       return priv;
  }
+EXPORT_SYMBOL_GPL(pciserial_init_ports);
  
-static void __devexit pciserial_remove_one(struct pci_dev *dev)
+void pciserial_remove_ports(struct serial_private *priv)
  {
-       struct serial_private *priv = pci_get_drvdata(dev);
         struct pci_serial_quirk *quirk;
         int i;
  
-       pci_set_drvdata(dev, NULL);
-
         for (i = 0; i < priv->nr; i++)
                 serial8250_unregister_port(priv->line[i]);
  
@@ -1793,25 +1584,123 @@ static void __devexit pciserial_remove_one(struct pci_dev *dev)
         /*
          * Find the exit quirks.
          */
-       quirk = find_quirk(dev);
+       quirk = find_quirk(priv->dev);
         if (quirk->exit)
-               quirk->exit(dev);
+               quirk->exit(priv->dev);
+
+       kfree(priv);
+}
+EXPORT_SYMBOL_GPL(pciserial_remove_ports);
+
+void pciserial_suspend_ports(struct serial_private *priv)
+{
+       int i;
+
+       for (i = 0; i < priv->nr; i++)
+               if (priv->line[i] >= 0)
+                       serial8250_suspend_port(priv->line[i]);
+}
+EXPORT_SYMBOL_GPL(pciserial_suspend_ports);
+
+void pciserial_resume_ports(struct serial_private *priv)
+{
+       int i;
+
+       /*
+        * Ensure that the board is correctly configured.
+        */
+       if (priv->quirk->init)
+               priv->quirk->init(priv->dev);
+
+       for (i = 0; i < priv->nr; i++)
+               if (priv->line[i] >= 0)
+                       serial8250_resume_port(priv->line[i]);
+}
+EXPORT_SYMBOL_GPL(pciserial_resume_ports);
+
+/*
+ * Probe one serial board.  Unfortunately, there is no rhyme nor reason
+ * to the arrangement of serial ports on a PCI card.
+ */
+static int __devinit
+pciserial_init_one(struct pci_dev *dev, const struct pci_device_id *ent)
+{
+       struct serial_private *priv;
+       struct pciserial_board *board, tmp;
+       int rc;
+
+       if (ent->driver_data >= ARRAY_SIZE(pci_boards)) {
+               printk(KERN_ERR "pci_init_one: invalid driver_data: %ld\n",
+                       ent->driver_data);
+               return -EINVAL;
+       }
+
+       board = &pci_boards[ent->driver_data];
+
+       rc = pci_enable_device(dev);
+       if (rc)
+               return rc;
+
+       if (ent->driver_data == pbn_default) {
+               /*
+                * Use a copy of the pci_board entry for this;
+                * avoid changing entries in the table.
+                */
+               memcpy(&tmp, board, sizeof(struct pciserial_board));
+               board = &tmp;
+
+               /*
+                * We matched one of our class entries.  Try to
+                * determine the parameters of this board.
+                */
+               rc = serial_pci_guess_board(dev, board);
+               if (rc)
+                       goto disable;
+       } else {
+               /*
+                * We matched an explicit entry.  If we are able to
+                * detect this boards settings with our heuristic,
+                * then we no longer need this entry.
+                */
+               memcpy(&tmp, &pci_boards[pbn_default],
+                      sizeof(struct pciserial_board));
+               rc = serial_pci_guess_board(dev, &tmp);
+               if (rc == 0 && serial_pci_matches(board, &tmp))
+                       moan_device("Redundant entry in serial pci_table.",
+                                   dev);
+       }
  
+       priv = pciserial_init_ports(dev, board);
+       if (!IS_ERR(priv)) {
+               pci_set_drvdata(dev, priv);
+               return 0;
+       }
+
+       rc = PTR_ERR(priv);
+
+ disable:
         pci_disable_device(dev);
+       return rc;
+}
  
-       kfree(priv);
+static void __devexit pciserial_remove_one(struct pci_dev *dev)
+{
+       struct serial_private *priv = pci_get_drvdata(dev);
+
+       pci_set_drvdata(dev, NULL);
+
+       pciserial_remove_ports(priv);
+
+       pci_disable_device(dev);
  }
  
  static int pciserial_suspend_one(struct pci_dev *dev, pm_message_t state)
  {
         struct serial_private *priv = pci_get_drvdata(dev);
  
-       if (priv) {
-               int i;
+       if (priv)
+               pciserial_suspend_ports(priv);
  
-               for (i = 0; i < priv->nr; i++)
-                       serial8250_suspend_port(priv->line[i]);
-       }
         pci_save_state(dev);
         pci_set_power_state(dev, pci_choose_state(dev, state));
         return 0;
@@ -1825,21 +1714,12 @@ static int pciserial_resume_one(struct pci_dev *dev)
         pci_restore_state(dev);
  
         if (priv) {
-               int i;
-
                 /*
                  * The device may have been disabled.  Re-enable it.
                  */
                 pci_enable_device(dev);
  
-               /*
-                * Ensure that the board is correctly configured.
-                */
-               if (priv->quirk->init)
-                       priv->quirk->init(dev);
-
-               for (i = 0; i < priv->nr; i++)
-                       serial8250_resume_port(priv->line[i]);
+               pciserial_resume_ports(priv);
         }
         return 0;
  }
diff --git a/drivers/serial/Kconfig b/drivers/serial/Kconfig

index 97034d3937fd11a1e6d80be2558b7b3f51ef9743..d5797618a3b918cc41e89187780d8d641fe49207 100644 (file)
--- a/drivers/serial/Kconfig
+++ b/drivers/serial/Kconfig
@@ -211,7 +211,7 @@ comment "Non-8250 serial port support"
  
  config SERIAL_AMBA_PL010
         tristate "ARM AMBA PL010 serial port support"
-       depends on ARM_AMBA
+       depends on ARM_AMBA && (BROKEN || !ARCH_VERSATILE)
         select SERIAL_CORE
         help
           This selects the ARM(R) AMBA(R) PrimeCell PL010 UART.  If you have
@@ -819,7 +819,7 @@ config SERIAL_M32R_SIO_CONSOLE
  
  config SERIAL_M32R_PLDSIO
         bool "M32R SIO I/F on a PLD"
-       depends on SERIAL_M32R_SIO=y
+       depends on SERIAL_M32R_SIO=y && (PLAT_OPSPUT || PALT_USRV || PLAT_M32700UT)
         default n
         help
           Say Y here if you want to use the M32R serial controller
diff --git a/drivers/serial/cpm_uart/cpm_uart.h b/drivers/serial/cpm_uart/cpm_uart.h

index 5f6187baad86454f61a50053b824f97b62177e28..73c8a088c160fcda57b98bbc6574cb8f673dadb5 100644 (file)
--- a/drivers/serial/cpm_uart/cpm_uart.h
+++ b/drivers/serial/cpm_uart/cpm_uart.h
@@ -40,13 +40,15 @@
  #define TX_NUM_FIFO    4
  #define TX_BUF_SIZE    32
  
+#define SCC_WAIT_CLOSING 100
+
  struct uart_cpm_port {
         struct uart_port        port;
-       u16                     rx_nrfifos;     
+       u16                     rx_nrfifos;
         u16                     rx_fifosize;
-       u16                     tx_nrfifos;     
+       u16                     tx_nrfifos;
         u16                     tx_fifosize;
-       smc_t                   *smcp;  
+       smc_t                   *smcp;
         smc_uart_t              *smcup;
         scc_t                   *sccp;
         scc_uart_t              *sccup;
@@ -67,6 +69,8 @@ struct uart_cpm_port {
         int                      bits;
         /* Keep track of 'odd' SMC2 wirings */
         int                     is_portb;
+       /* wait on close if needed */
+       int                     wait_closing;
  };
  
  extern int cpm_uart_port_map[UART_NR];
diff --git a/drivers/serial/cpm_uart/cpm_uart_core.c b/drivers/serial/cpm_uart/cpm_uart_core.c

index 29db677d4284dd5455a720acfe3a9d077bab4d40..d639ac92a117c73eec982310534d7b2b4d908df8 100644 (file)
--- a/drivers/serial/cpm_uart/cpm_uart_core.c
+++ b/drivers/serial/cpm_uart/cpm_uart_core.c
@@ -9,9 +9,10 @@
   *
   *  Maintainer: Kumar Gala (kumar.gala@freescale.com) (CPM2)
   *              Pantelis Antoniou (panto@intracom.gr) (CPM1)
- * 
+ *
   *  Copyright (C) 2004 Freescale Semiconductor, Inc.
   *            (C) 2004 Intracom, S.A.
+ *            (C) 2005 MontaVista Software, Inc. by Vitaly Bordug <vbordug@ru.mvista.com>
   *
   * This program is free software; you can redistribute it and/or modify
   * it under the terms of the GNU General Public License as published by
@@ -70,8 +71,22 @@ static void cpm_uart_initbd(struct uart_cpm_port *pinfo);
  
  /**************************************************************/
  
+static inline unsigned long cpu2cpm_addr(void *addr)
+{
+       if ((unsigned long)addr >= CPM_ADDR)
+               return (unsigned long)addr;
+       return virt_to_bus(addr);
+}
+
+static inline void *cpm2cpu_addr(unsigned long addr)
+{
+       if (addr >= CPM_ADDR)
+               return (void *)addr;
+       return bus_to_virt(addr);
+}
+
  /*
- * Check, if transmit buffers are processed            
+ * Check, if transmit buffers are processed
  */
  static unsigned int cpm_uart_tx_empty(struct uart_port *port)
  {
@@ -143,15 +158,18 @@ static void cpm_uart_start_tx(struct uart_port *port, unsigned int tty_start)
         }
  
         if (cpm_uart_tx_pump(port) != 0) {
-               if (IS_SMC(pinfo))
+               if (IS_SMC(pinfo)) {
                         smcp->smc_smcm |= SMCM_TX;
-               else
+                       smcp->smc_smcmr |= SMCMR_TEN;
+               } else {
                         sccp->scc_sccm |= UART_SCCM_TX;
+                       pinfo->sccp->scc_gsmrl |= SCC_GSMRL_ENT;
+               }
         }
  }
  
  /*
- * Stop receiver 
+ * Stop receiver
   */
  static void cpm_uart_stop_rx(struct uart_port *port)
  {
@@ -176,7 +194,7 @@ static void cpm_uart_enable_ms(struct uart_port *port)
  }
  
  /*
- * Generate a break. 
+ * Generate a break.
   */
  static void cpm_uart_break_ctl(struct uart_port *port, int break_state)
  {
@@ -231,7 +249,7 @@ static void cpm_uart_int_rx(struct uart_port *port, struct pt_regs *regs)
                 /* get number of characters, and check spce in flip-buffer */
                 i = bdp->cbd_datlen;
  
-               /* If we have not enough room in tty flip buffer, then we try 
+               /* If we have not enough room in tty flip buffer, then we try
                  * later, which will be the next rx-interrupt or a timeout
                  */
                 if ((tty->flip.count + i) >= TTY_FLIPBUF_SIZE) {
@@ -243,7 +261,7 @@ static void cpm_uart_int_rx(struct uart_port *port, struct pt_regs *regs)
                 }
  
                 /* get pointer */
-               cp = (unsigned char *)bus_to_virt(bdp->cbd_bufaddr);
+               cp = cpm2cpu_addr(bdp->cbd_bufaddr);
  
                 /* loop through the buffer */
                 while (i-- > 0) {
@@ -265,13 +283,14 @@ static void cpm_uart_int_rx(struct uart_port *port, struct pt_regs *regs)
                 }               /* End while (i--) */
  
                 /* This BD is ready to be used again. Clear status. get next */
-               bdp->cbd_sc &= ~(BD_SC_BR | BD_SC_FR | BD_SC_PR | BD_SC_OV);
+               bdp->cbd_sc &= ~(BD_SC_BR | BD_SC_FR | BD_SC_PR | BD_SC_OV | BD_SC_ID);
                 bdp->cbd_sc |= BD_SC_EMPTY;
  
                 if (bdp->cbd_sc & BD_SC_WRAP)
                         bdp = pinfo->rx_bd_base;
                 else
                         bdp++;
+
         } /* End for (;;) */
  
         /* Write back buffer pointer */
@@ -336,22 +355,22 @@ static irqreturn_t cpm_uart_int(int irq, void *data, struct pt_regs *regs)
  
         if (IS_SMC(pinfo)) {
                 events = smcp->smc_smce;
+               smcp->smc_smce = events;
                 if (events & SMCM_BRKE)
                         uart_handle_break(port);
                 if (events & SMCM_RX)
                         cpm_uart_int_rx(port, regs);
                 if (events & SMCM_TX)
                         cpm_uart_int_tx(port, regs);
-               smcp->smc_smce = events;
         } else {
                 events = sccp->scc_scce;
+               sccp->scc_scce = events;
                 if (events & UART_SCCM_BRKE)
                         uart_handle_break(port);
                 if (events & UART_SCCM_RX)
                         cpm_uart_int_rx(port, regs);
                 if (events & UART_SCCM_TX)
                         cpm_uart_int_tx(port, regs);
-               sccp->scc_scce = events;
         }
         return (events) ? IRQ_HANDLED : IRQ_NONE;
  }
@@ -360,6 +379,7 @@ static int cpm_uart_startup(struct uart_port *port)
  {
         int retval;
         struct uart_cpm_port *pinfo = (struct uart_cpm_port *)port;
+       int line = pinfo - cpm_uart_ports;
  
         pr_debug("CPM uart[%d]:startup\n", port->line);
  
@@ -376,9 +396,19 @@ static int cpm_uart_startup(struct uart_port *port)
                 pinfo->sccp->scc_sccm |= UART_SCCM_RX;
         }
  
+       if (!(pinfo->flags & FLAG_CONSOLE))
+               cpm_line_cr_cmd(line,CPM_CR_INIT_TRX);
         return 0;
  }
  
+inline void cpm_uart_wait_until_send(struct uart_cpm_port *pinfo)
+{
+       unsigned long target_jiffies = jiffies + pinfo->wait_closing;
+
+       while (!time_after(jiffies, target_jiffies))
+               schedule();
+}
+
  /*
   * Shutdown the uart
   */
@@ -394,6 +424,12 @@ static void cpm_uart_shutdown(struct uart_port *port)
  
         /* If the port is not the console, disable Rx and Tx. */
         if (!(pinfo->flags & FLAG_CONSOLE)) {
+               /* Wait for all the BDs marked sent */
+               while(!cpm_uart_tx_empty(port))
+                       schedule_timeout(2);
+               if(pinfo->wait_closing)
+                       cpm_uart_wait_until_send(pinfo);
+
                 /* Stop uarts */
                 if (IS_SMC(pinfo)) {
                         volatile smc_t *smcp = pinfo->smcp;
@@ -502,7 +538,7 @@ static void cpm_uart_set_termios(struct uart_port *port,
          */
         if ((termios->c_cflag & CREAD) == 0)
                 port->read_status_mask &= ~BD_SC_EMPTY;
-       
+
         spin_lock_irqsave(&port->lock, flags);
  
         /* Start bit has not been added (so don't, because we would just
@@ -569,7 +605,8 @@ static int cpm_uart_tx_pump(struct uart_port *port)
                 /* Pick next descriptor and fill from buffer */
                 bdp = pinfo->tx_cur;
  
-               p = bus_to_virt(bdp->cbd_bufaddr);
+               p = cpm2cpu_addr(bdp->cbd_bufaddr);
+
                 *p++ = xmit->buf[xmit->tail];
                 bdp->cbd_datlen = 1;
                 bdp->cbd_sc |= BD_SC_READY;
@@ -595,7 +632,7 @@ static int cpm_uart_tx_pump(struct uart_port *port)
  
         while (!(bdp->cbd_sc & BD_SC_READY) && (xmit->tail != xmit->head)) {
                 count = 0;
-               p = bus_to_virt(bdp->cbd_bufaddr);
+               p = cpm2cpu_addr(bdp->cbd_bufaddr);
                 while (count < pinfo->tx_fifosize) {
                         *p++ = xmit->buf[xmit->tail];
                         xmit->tail = (xmit->tail + 1) & (UART_XMIT_SIZE - 1);
@@ -606,6 +643,7 @@ static int cpm_uart_tx_pump(struct uart_port *port)
                 }
                 bdp->cbd_datlen = count;
                 bdp->cbd_sc |= BD_SC_READY;
+               __asm__("eieio");
                 /* Get next BD. */
                 if (bdp->cbd_sc & BD_SC_WRAP)
                         bdp = pinfo->tx_bd_base;
@@ -643,12 +681,12 @@ static void cpm_uart_initbd(struct uart_cpm_port *pinfo)
         mem_addr = pinfo->mem_addr;
         bdp = pinfo->rx_cur = pinfo->rx_bd_base;
         for (i = 0; i < (pinfo->rx_nrfifos - 1); i++, bdp++) {
-               bdp->cbd_bufaddr = virt_to_bus(mem_addr);
+               bdp->cbd_bufaddr = cpu2cpm_addr(mem_addr);
                 bdp->cbd_sc = BD_SC_EMPTY | BD_SC_INTRPT;
                 mem_addr += pinfo->rx_fifosize;
         }
-       
-       bdp->cbd_bufaddr = virt_to_bus(mem_addr);
+
+       bdp->cbd_bufaddr = cpu2cpm_addr(mem_addr);
         bdp->cbd_sc = BD_SC_WRAP | BD_SC_EMPTY | BD_SC_INTRPT;
  
         /* Set the physical address of the host memory
@@ -658,12 +696,12 @@ static void cpm_uart_initbd(struct uart_cpm_port *pinfo)
         mem_addr = pinfo->mem_addr + L1_CACHE_ALIGN(pinfo->rx_nrfifos * pinfo->rx_fifosize);
         bdp = pinfo->tx_cur = pinfo->tx_bd_base;
         for (i = 0; i < (pinfo->tx_nrfifos - 1); i++, bdp++) {
-               bdp->cbd_bufaddr = virt_to_bus(mem_addr);
+               bdp->cbd_bufaddr = cpu2cpm_addr(mem_addr);
                 bdp->cbd_sc = BD_SC_INTRPT;
                 mem_addr += pinfo->tx_fifosize;
         }
-       
-       bdp->cbd_bufaddr = virt_to_bus(mem_addr);
+
+       bdp->cbd_bufaddr = cpu2cpm_addr(mem_addr);
         bdp->cbd_sc = BD_SC_WRAP | BD_SC_INTRPT;
  }
  
@@ -763,6 +801,8 @@ static void cpm_uart_init_smc(struct uart_cpm_port *pinfo)
         /* Using idle charater time requires some additional tuning.  */
         up->smc_mrblr = pinfo->rx_fifosize;
         up->smc_maxidl = pinfo->rx_fifosize;
+       up->smc_brklen = 0;
+       up->smc_brkec = 0;
         up->smc_brkcr = 1;
  
         cpm_line_cr_cmd(line, CPM_CR_INIT_TRX);
@@ -796,7 +836,7 @@ static int cpm_uart_request_port(struct uart_port *port)
         /*
          * Setup any port IO, connect any baud rate generators,
          * etc.  This is expected to be handled by board
-        * dependant code 
+        * dependant code
          */
         if (pinfo->set_lineif)
                 pinfo->set_lineif(pinfo);
@@ -815,6 +855,10 @@ static int cpm_uart_request_port(struct uart_port *port)
                 return ret;
  
         cpm_uart_initbd(pinfo);
+       if (IS_SMC(pinfo))
+               cpm_uart_init_smc(pinfo);
+       else
+               cpm_uart_init_scc(pinfo);
  
         return 0;
  }
@@ -869,7 +913,7 @@ struct uart_cpm_port cpm_uart_ports[UART_NR] = {
                 .flags = FLAG_SMC,
                 .tx_nrfifos = TX_NUM_FIFO,
                 .tx_fifosize = TX_BUF_SIZE,
-               .rx_nrfifos = RX_NUM_FIFO, 
+               .rx_nrfifos = RX_NUM_FIFO,
                 .rx_fifosize = RX_BUF_SIZE,
                 .set_lineif = smc1_lineif,
         },
@@ -883,7 +927,7 @@ struct uart_cpm_port cpm_uart_ports[UART_NR] = {
                 .flags = FLAG_SMC,
                 .tx_nrfifos = TX_NUM_FIFO,
                 .tx_fifosize = TX_BUF_SIZE,
-               .rx_nrfifos = RX_NUM_FIFO, 
+               .rx_nrfifos = RX_NUM_FIFO,
                 .rx_fifosize = RX_BUF_SIZE,
                 .set_lineif = smc2_lineif,
  #ifdef CONFIG_SERIAL_CPM_ALT_SMC2
@@ -899,9 +943,10 @@ struct uart_cpm_port cpm_uart_ports[UART_NR] = {
                 },
                 .tx_nrfifos = TX_NUM_FIFO,
                 .tx_fifosize = TX_BUF_SIZE,
-               .rx_nrfifos = RX_NUM_FIFO, 
+               .rx_nrfifos = RX_NUM_FIFO,
                 .rx_fifosize = RX_BUF_SIZE,
                 .set_lineif = scc1_lineif,
+               .wait_closing = SCC_WAIT_CLOSING,
         },
         [UART_SCC2] = {
                 .port = {
@@ -912,9 +957,10 @@ struct uart_cpm_port cpm_uart_ports[UART_NR] = {
                 },
                 .tx_nrfifos = TX_NUM_FIFO,
                 .tx_fifosize = TX_BUF_SIZE,
-               .rx_nrfifos = RX_NUM_FIFO, 
+               .rx_nrfifos = RX_NUM_FIFO,
                 .rx_fifosize = RX_BUF_SIZE,
                 .set_lineif = scc2_lineif,
+               .wait_closing = SCC_WAIT_CLOSING,
         },
         [UART_SCC3] = {
                 .port = {
@@ -925,9 +971,10 @@ struct uart_cpm_port cpm_uart_ports[UART_NR] = {
                 },
                 .tx_nrfifos = TX_NUM_FIFO,
                 .tx_fifosize = TX_BUF_SIZE,
-               .rx_nrfifos = RX_NUM_FIFO, 
+               .rx_nrfifos = RX_NUM_FIFO,
                 .rx_fifosize = RX_BUF_SIZE,
                 .set_lineif = scc3_lineif,
+               .wait_closing = SCC_WAIT_CLOSING,
         },
         [UART_SCC4] = {
                 .port = {
@@ -938,9 +985,10 @@ struct uart_cpm_port cpm_uart_ports[UART_NR] = {
                 },
                 .tx_nrfifos = TX_NUM_FIFO,
                 .tx_fifosize = TX_BUF_SIZE,
-               .rx_nrfifos = RX_NUM_FIFO, 
+               .rx_nrfifos = RX_NUM_FIFO,
                 .rx_fifosize = RX_BUF_SIZE,
                 .set_lineif = scc4_lineif,
+               .wait_closing = SCC_WAIT_CLOSING,
         },
  };
  
@@ -983,11 +1031,8 @@ static void cpm_uart_console_write(struct console *co, const char *s,
                  * If the buffer address is in the CPM DPRAM, don't
                  * convert it.
                  */
-               if ((uint) (bdp->cbd_bufaddr) > (uint) CPM_ADDR)
-                       cp = (unsigned char *) (bdp->cbd_bufaddr);
-               else
-                       cp = bus_to_virt(bdp->cbd_bufaddr);
-               
+               cp = cpm2cpu_addr(bdp->cbd_bufaddr);
+
                 *cp = *s;
  
                 bdp->cbd_datlen = 1;
@@ -1003,10 +1048,7 @@ static void cpm_uart_console_write(struct console *co, const char *s,
                         while ((bdp->cbd_sc & BD_SC_READY) != 0)
                                 ;
  
-                       if ((uint) (bdp->cbd_bufaddr) > (uint) CPM_ADDR)
-                               cp = (unsigned char *) (bdp->cbd_bufaddr);
-                       else
-                               cp = bus_to_virt(bdp->cbd_bufaddr);
+                       cp = cpm2cpu_addr(bdp->cbd_bufaddr);
  
                         *cp = 13;
                         bdp->cbd_datlen = 1;
@@ -1045,7 +1087,7 @@ static int __init cpm_uart_console_setup(struct console *co, char *options)
         port =
             (struct uart_port *)&cpm_uart_ports[cpm_uart_port_map[co->index]];
         pinfo = (struct uart_cpm_port *)port;
-       
+
         pinfo->flags |= FLAG_CONSOLE;
  
         if (options) {
@@ -1062,7 +1104,7 @@ static int __init cpm_uart_console_setup(struct console *co, char *options)
         /*
          * Setup any port IO, connect any baud rate generators,
          * etc.  This is expected to be handled by board
-        * dependant code 
+        * dependant code
          */
         if (pinfo->set_lineif)
                 pinfo->set_lineif(pinfo);
@@ -1092,14 +1134,14 @@ static int __init cpm_uart_console_setup(struct console *co, char *options)
         return 0;
  }
  
-extern struct uart_driver cpm_reg;
+static struct uart_driver cpm_reg;
  static struct console cpm_scc_uart_console = {
-       .name           "ttyCPM",
-       .write          cpm_uart_console_write,
-       .device         uart_console_device,
-       .setup          cpm_uart_console_setup,
-       .flags          CON_PRINTBUFFER,
-       .index          -1,
+       .name           = "ttyCPM",
+       .write          = cpm_uart_console_write,
+       .device         = uart_console_device,
+       .setup          = cpm_uart_console_setup,
+       .flags          = CON_PRINTBUFFER,
+       .index          = -1,
         .data           = &cpm_reg,
  };
  
diff --git a/drivers/serial/cpm_uart/cpm_uart_cpm1.c b/drivers/serial/cpm_uart/cpm_uart_cpm1.c

index 8efbd6d1d6a402b2144e7c9d42b80a535d38c1e4..4b0786e7eb7ff6ba2ca3b85b4887badef8d72938 100644 (file)
--- a/drivers/serial/cpm_uart/cpm_uart_cpm1.c
+++ b/drivers/serial/cpm_uart/cpm_uart_cpm1.c
@@ -5,7 +5,7 @@
   *
   *  Maintainer: Kumar Gala (kumar.gala@freescale.com) (CPM2)
   *              Pantelis Antoniou (panto@intracom.gr) (CPM1)
- * 
+ *
   *  Copyright (C) 2004 Freescale Semiconductor, Inc.
   *            (C) 2004 Intracom, S.A.
   *
@@ -82,6 +82,17 @@ void cpm_line_cr_cmd(int line, int cmd)
  void smc1_lineif(struct uart_cpm_port *pinfo)
  {
         volatile cpm8xx_t *cp = cpmp;
+
+       (void)cp;       /* fix warning */
+#if defined (CONFIG_MPC885ADS)
+       /* Enable SMC1 transceivers */
+       {
+               cp->cp_pepar |= 0x000000c0;
+               cp->cp_pedir &= ~0x000000c0;
+               cp->cp_peso &= ~0x00000040;
+               cp->cp_peso |= 0x00000080;
+       }
+#elif defined (CONFIG_MPC86XADS)
         unsigned int iobits = 0x000000c0;
  
         if (!pinfo->is_portb) {
@@ -93,41 +104,33 @@ void smc1_lineif(struct uart_cpm_port *pinfo)
                 ((immap_t *)IMAP_ADDR)->im_ioport.iop_padir &= ~iobits;
                 ((immap_t *)IMAP_ADDR)->im_ioport.iop_paodr &= ~iobits;
         }
-
-#ifdef CONFIG_MPC885ADS
-       /* Enable SMC1 transceivers */
-       {
-               volatile uint __iomem *bcsr1 = ioremap(BCSR1, 4);
-               uint tmp;
-
-               tmp = in_be32(bcsr1);
-               tmp &= ~BCSR1_RS232EN_1;
-               out_be32(bcsr1, tmp);
-               iounmap(bcsr1);
-       }
  #endif
-
         pinfo->brg = 1;
  }
  
  void smc2_lineif(struct uart_cpm_port *pinfo)
  {
-#ifdef CONFIG_MPC885ADS
         volatile cpm8xx_t *cp = cpmp;
-       volatile uint __iomem *bcsr1;
-       uint tmp;
  
+       (void)cp;       /* fix warning */
+#if defined (CONFIG_MPC885ADS)
         cp->cp_pepar |= 0x00000c00;
         cp->cp_pedir &= ~0x00000c00;
         cp->cp_peso &= ~0x00000400;
         cp->cp_peso |= 0x00000800;
+#elif defined (CONFIG_MPC86XADS)
+       unsigned int iobits = 0x00000c00;
+
+       if (!pinfo->is_portb) {
+               cp->cp_pbpar |= iobits;
+               cp->cp_pbdir &= ~iobits;
+               cp->cp_pbodr &= ~iobits;
+       } else {
+               ((immap_t *)IMAP_ADDR)->im_ioport.iop_papar |= iobits;
+               ((immap_t *)IMAP_ADDR)->im_ioport.iop_padir &= ~iobits;
+               ((immap_t *)IMAP_ADDR)->im_ioport.iop_paodr &= ~iobits;
+       }
  
-       /* Enable SMC2 transceivers */
-       bcsr1 = ioremap(BCSR1, 4);
-       tmp = in_be32(bcsr1);
-       tmp &= ~BCSR1_RS232EN_2;
-       out_be32(bcsr1, tmp);
-       iounmap(bcsr1);
  #endif
  
         pinfo->brg = 2;
@@ -158,7 +161,7 @@ void scc4_lineif(struct uart_cpm_port *pinfo)
  }
  
  /*
- * Allocate DP-Ram and memory buffers. We need to allocate a transmit and 
+ * Allocate DP-Ram and memory buffers. We need to allocate a transmit and
   * receive buffer descriptors from dual port ram, and a character
   * buffer area from host mem. If we are allocating for the console we need
   * to do it from bootmem
@@ -185,6 +188,8 @@ int cpm_uart_allocbuf(struct uart_cpm_port *pinfo, unsigned int is_con)
         memsz = L1_CACHE_ALIGN(pinfo->rx_nrfifos * pinfo->rx_fifosize) +
             L1_CACHE_ALIGN(pinfo->tx_nrfifos * pinfo->tx_fifosize);
         if (is_con) {
+               /* was hostalloc but changed cause it blows away the */
+               /* large tlb mapping when pinning the kernel area    */
                 mem_addr = (u8 *) cpm_dpram_addr(cpm_dpalloc(memsz, 8));
                 dma_addr = 0;
         } else
diff --git a/drivers/serial/m32r_sio.c b/drivers/serial/m32r_sio.c

index 0301feacbde49f0b57ca842767c8df4086ea1357..9b50560b9d1625d36658f3015424e7099b438ea5 100644 (file)
--- a/drivers/serial/m32r_sio.c
+++ b/drivers/serial/m32r_sio.c
@@ -1123,7 +1123,7 @@ static int __init m32r_sio_console_setup(struct console *co, char *options)
         return uart_set_options(port, co, baud, parity, bits, flow);
  }
  
-extern struct uart_driver m32r_sio_reg;
+static struct uart_driver m32r_sio_reg;
  static struct console m32r_sio_console = {
         .name           = "ttyS",
         .write          = m32r_sio_console_write,
diff --git a/drivers/serial/sn_console.c b/drivers/serial/sn_console.c

index 840815fde49b7e92f099e1e979413691b9a6975c..12d1f14e78ce3a036da34e508fc513b28ce2af1f 100644 (file)
--- a/drivers/serial/sn_console.c
+++ b/drivers/serial/sn_console.c
@@ -1093,6 +1093,7 @@ int __init sn_serial_console_early_setup(void)
                 return -1;
  
         sal_console_port.sc_ops = &poll_ops;
+       spin_lock_init(&sal_console_port.sc_port.lock);
         early_sn_setup();       /* Find SAL entry points */
         register_console(&sal_console_early);
  
diff --git a/drivers/usb/host/ehci-dbg.c b/drivers/usb/host/ehci-dbg.c

index 50cb01831075916c5f8e828f5e15f1ddd0df91df..b01efb6b36f6767e7e492af441421a7d27ce631e 100644 (file)
--- a/drivers/usb/host/ehci-dbg.c
+++ b/drivers/usb/host/ehci-dbg.c
@@ -527,7 +527,7 @@ show_periodic (struct class_device *class_dev, char *buf)
                                                 p.qh->period,
                                                 le32_to_cpup (&p.qh->hw_info2)
                                                         /* uframe masks */
-                                                       & 0xffff,
+                                                       & (QH_CMASK | QH_SMASK),
                                                 p.qh);
                                 size -= temp;
                                 next += temp;
diff --git a/drivers/usb/host/ehci-q.c b/drivers/usb/host/ehci-q.c

index 4f97a4ad1ed35b14a3ec51f1f8407aa88849f1ff..20df01a79b2e89e61192c69be9efc141bd8b9ab3 100644 (file)
--- a/drivers/usb/host/ehci-q.c
+++ b/drivers/usb/host/ehci-q.c
@@ -222,7 +222,7 @@ __acquires(ehci->lock)
                 struct ehci_qh  *qh = (struct ehci_qh *) urb->hcpriv;
  
                 /* S-mask in a QH means it's an interrupt urb */
-               if ((qh->hw_info2 & __constant_cpu_to_le32 (0x00ff)) != 0) {
+               if ((qh->hw_info2 & __constant_cpu_to_le32 (QH_SMASK)) != 0) {
  
                         /* ... update hc-wide periodic stats (for usbfs) */
                         ehci_to_hcd(ehci)->self.bandwidth_int_reqs--;
@@ -428,7 +428,8 @@ halt:
                         /* should be rare for periodic transfers,
                          * except maybe high bandwidth ...
                          */
-                       if (qh->period) {
+                       if ((__constant_cpu_to_le32 (QH_SMASK)
+                                       & qh->hw_info2) != 0) {
                                 intr_deschedule (ehci, qh);
                                 (void) qh_schedule (ehci, qh);
                         } else
diff --git a/drivers/usb/host/ehci-sched.c b/drivers/usb/host/ehci-sched.c

index 9af4f64532a94c4b6dbab29834f7aa29ab273ae9..b56f25864ed60084028e174e035c552bedc6a331 100644 (file)
--- a/drivers/usb/host/ehci-sched.c
+++ b/drivers/usb/host/ehci-sched.c
@@ -301,7 +301,7 @@ static int qh_link_periodic (struct ehci_hcd *ehci, struct ehci_qh *qh)
  
         dev_dbg (&qh->dev->dev,
                 "link qh%d-%04x/%p start %d [%d/%d us]\n",
-               period, le32_to_cpup (&qh->hw_info2) & 0xffff,
+               period, le32_to_cpup (&qh->hw_info2) & (QH_CMASK | QH_SMASK),
                 qh, qh->start, qh->usecs, qh->c_usecs);
  
         /* high bandwidth, or otherwise every microframe */
@@ -385,7 +385,8 @@ static void qh_unlink_periodic (struct ehci_hcd *ehci, struct ehci_qh *qh)
  
         dev_dbg (&qh->dev->dev,
                 "unlink qh%d-%04x/%p start %d [%d/%d us]\n",
-               qh->period, le32_to_cpup (&qh->hw_info2) & 0xffff,
+               qh->period,
+               le32_to_cpup (&qh->hw_info2) & (QH_CMASK | QH_SMASK),
                 qh, qh->start, qh->usecs, qh->c_usecs);
  
         /* qh->qh_next still "live" to HC */
@@ -411,7 +412,7 @@ static void intr_deschedule (struct ehci_hcd *ehci, struct ehci_qh *qh)
          * active high speed queues may need bigger delays...
          */
         if (list_empty (&qh->qtd_list)
-                       || (__constant_cpu_to_le32 (0x0ff << 8)
+                       || (__constant_cpu_to_le32 (QH_CMASK)
                                         & qh->hw_info2) != 0)
                 wait = 2;
         else
@@ -533,7 +534,7 @@ static int qh_schedule (struct ehci_hcd *ehci, struct ehci_qh *qh)
  
         /* reuse the previous schedule slots, if we can */
         if (frame < qh->period) {
-               uframe = ffs (le32_to_cpup (&qh->hw_info2) & 0x00ff);
+               uframe = ffs (le32_to_cpup (&qh->hw_info2) & QH_SMASK);
                 status = check_intr_schedule (ehci, frame, --uframe,
                                 qh, &c_mask);
         } else {
@@ -569,10 +570,10 @@ static int qh_schedule (struct ehci_hcd *ehci, struct ehci_qh *qh)
                 qh->start = frame;
  
                 /* reset S-frame and (maybe) C-frame masks */
-               qh->hw_info2 &= __constant_cpu_to_le32 (~0xffff);
+               qh->hw_info2 &= __constant_cpu_to_le32(~(QH_CMASK | QH_SMASK));
                 qh->hw_info2 |= qh->period
                         ? cpu_to_le32 (1 << uframe)
-                       : __constant_cpu_to_le32 (0xff);
+                       : __constant_cpu_to_le32 (QH_SMASK);
                 qh->hw_info2 |= c_mask;
         } else
                 ehci_dbg (ehci, "reused qh %p schedule\n", qh);
diff --git a/drivers/usb/host/ehci.h b/drivers/usb/host/ehci.h

index 4df498231752812afd20b67c4f995a4ee50fbddd..a7542157534c13c159a2220e02b8d1c44cd16632 100644 (file)
--- a/drivers/usb/host/ehci.h
+++ b/drivers/usb/host/ehci.h
@@ -385,6 +385,11 @@ struct ehci_qh {
         __le32                  hw_info1;        /* see EHCI 3.6.2 */
  #define        QH_HEAD         0x00008000
         __le32                  hw_info2;        /* see EHCI 3.6.2 */
+#define        QH_SMASK        0x000000ff
+#define        QH_CMASK        0x0000ff00
+#define        QH_HUBADDR      0x007f0000
+#define        QH_HUBPORT      0x3f800000
+#define        QH_MULT         0xc0000000
         __le32                  hw_current;      /* qtd list - see EHCI 3.6.4 */
         
         /* qtd overlay (hardware parts of a struct ehci_qtd) */
diff --git a/drivers/usb/host/isp116x-hcd.c b/drivers/usb/host/isp116x-hcd.c

index 50b1970fe6b67468924d730f921e530b29827ad7..76cb496c5836ca3f222cbe688bf67fe2d6a2f1f7 100644 (file)
--- a/drivers/usb/host/isp116x-hcd.c
+++ b/drivers/usb/host/isp116x-hcd.c
@@ -229,9 +229,11 @@ static void preproc_atl_queue(struct isp116x *isp116x)
         struct isp116x_ep *ep;
         struct urb *urb;
         struct ptd *ptd;
-       u16 toggle = 0, dir = PTD_DIR_SETUP, len;
+       u16 len;
  
         for (ep = isp116x->atl_active; ep; ep = ep->active) {
+               u16 toggle = 0, dir = PTD_DIR_SETUP;
+
                 BUG_ON(list_empty(&ep->hep->urb_list));
                 urb = container_of(ep->hep->urb_list.next,
                                    struct urb, urb_list);
diff --git a/drivers/usb/input/wacom.c b/drivers/usb/input/wacom.c

index 02412e31a46b18b085ef3dab1d09e01a97140b4f..3b266af3048a11d5b0510fcc381ffd1530216d04 100644 (file)
--- a/drivers/usb/input/wacom.c
+++ b/drivers/usb/input/wacom.c
@@ -342,9 +342,6 @@ static void wacom_graphire_irq(struct urb *urb, struct pt_regs *regs)
                 goto exit;
         }
  
-       x = le16_to_cpu(*(__le16 *) &data[2]);
-       y = le16_to_cpu(*(__le16 *) &data[4]);
-
         input_regs(dev, regs);
  
         if (data[1] & 0x10) { /* in prox */
@@ -373,15 +370,17 @@ static void wacom_graphire_irq(struct urb *urb, struct pt_regs *regs)
                 }
         }
  
-       if (data[1] & 0x80) {
+       if (data[1] & 0x90) {
+               x = le16_to_cpu(*(__le16 *) &data[2]);
+               y = le16_to_cpu(*(__le16 *) &data[4]);
                 input_report_abs(dev, ABS_X, x);
                 input_report_abs(dev, ABS_Y, y);
-       }
-       if (wacom->tool[0] != BTN_TOOL_MOUSE) {
-               input_report_abs(dev, ABS_PRESSURE, le16_to_cpu(*(__le16 *) &data[6]));
-               input_report_key(dev, BTN_TOUCH, data[1] & 0x01);
-               input_report_key(dev, BTN_STYLUS, data[1] & 0x02);
-               input_report_key(dev, BTN_STYLUS2, data[1] & 0x04);
+               if (wacom->tool[0] != BTN_TOOL_MOUSE) {
+                       input_report_abs(dev, ABS_PRESSURE, le16_to_cpu(*(__le16 *) &data[6]));
+                       input_report_key(dev, BTN_TOUCH, data[1] & 0x01);
+                       input_report_key(dev, BTN_STYLUS, data[1] & 0x02);
+                       input_report_key(dev, BTN_STYLUS2, data[1] & 0x04);
+               }
         }
  
         input_report_key(dev, wacom->tool[0], data[1] & 0x10);
@@ -568,7 +567,7 @@ static void wacom_intuos_irq(struct urb *urb, struct pt_regs *regs)
  
         /* Cintiq doesn't send data when RDY bit isn't set */
         if ((wacom->features->type == CINTIQ) && !(data[1] & 0x40))
-               return;
+               goto exit;
  
         if (wacom->features->type >= INTUOS3) {
                 input_report_abs(dev, ABS_X, (data[2] << 9) | (data[3] << 1) | ((data[9] >> 1) & 1));
diff --git a/drivers/usb/mon/Kconfig b/drivers/usb/mon/Kconfig

index 777642e26b9aa3e8b1f8560b0a4f536f716af3a4..deb9ddffa402ed5e84b9309243650a1bfaf56350 100644 (file)
--- a/drivers/usb/mon/Kconfig
+++ b/drivers/usb/mon/Kconfig
@@ -9,9 +9,8 @@ config USB_MON
         help
           If you say Y here, a component which captures the USB traffic
           between peripheral-specific drivers and HC drivers will be built.
-         The USB_MON is similar in spirit and may be compatible with Dave
-         Harding's USBMon.
+         For more information, see <file:Documentation/usb/usbmon.txt>.
  
-         This is somewhat experimental at this time, but it should be safe,
-         as long as you aren't using modular USB and try to remove this
-         module.
+         This is somewhat experimental at this time, but it should be safe.
+
+         If unsure, say Y.
diff --git a/drivers/usb/mon/Makefile b/drivers/usb/mon/Makefile

index f18d10ce91f951a5697daa50a8e521433b75b58a..b0015b8a1d1f96e372e3d0037b45807112aab3d0 100644 (file)
--- a/drivers/usb/mon/Makefile
+++ b/drivers/usb/mon/Makefile
@@ -4,4 +4,5 @@
  
  usbmon-objs    := mon_main.o mon_stat.o mon_text.o
  
+# This does not use CONFIG_USB_MON because we want this to use a tristate.
  obj-$(CONFIG_USB)      += usbmon.o
diff --git a/drivers/usb/mon/mon_main.c b/drivers/usb/mon/mon_main.c

index aa9d00808e4e3565ae9bd7efd079b2824c79d0aa..508a21028db420c41a3d188155d3cfb0410146e3 100644 (file)
--- a/drivers/usb/mon/mon_main.c
+++ b/drivers/usb/mon/mon_main.c
@@ -2,6 +2,8 @@
   * The USB Monitor, inspired by Dave Harding's USBMon.
   *
   * mon_main.c: Main file, module initiation and exit, registrations, etc.
+ *
+ * Copyright (C) 2005 Pete Zaitcev (zaitcev@redhat.com)
   */
  
  #include <linux/kernel.h>
@@ -311,7 +313,7 @@ static int __init mon_init(void)
  
         mondir = debugfs_create_dir("usbmon", NULL);
         if (IS_ERR(mondir)) {
-               printk(KERN_NOTICE TAG ": debugs is not available\n");
+               printk(KERN_NOTICE TAG ": debugfs is not available\n");
                 return -ENODEV;
         }
         if (mondir == NULL) {
diff --git a/drivers/usb/mon/usb_mon.h b/drivers/usb/mon/usb_mon.h

index ed35c18a5c44bbae2fcd5cf977ae98aeccbf8966..9b06784d2c481055368bdd92f171d68801886d3f 100644 (file)
--- a/drivers/usb/mon/usb_mon.h
+++ b/drivers/usb/mon/usb_mon.h
@@ -1,5 +1,7 @@
  /*
   * The USB Monitor, inspired by Dave Harding's USBMon.
+ *
+ * Copyright (C) 2005 Pete Zaitcev (zaitcev@redhat.com)
   */
  
  #ifndef __USB_MON_H
diff --git a/drivers/usb/net/usbnet.c b/drivers/usb/net/usbnet.c

index 576f3b852fce39ccfff151704c1447b883e1703f..a2f67245f6da37adac5a5bb93e68aea202c3aafb 100644 (file)
--- a/drivers/usb/net/usbnet.c
+++ b/drivers/usb/net/usbnet.c
@@ -1922,7 +1922,7 @@ static int genelink_rx_fixup (struct usbnet *dev, struct sk_buff *skb)
  
                         // copy the packet data to the new skb
                         memcpy(skb_put(gl_skb, size), packet->packet_data, size);
-                       skb_return (dev, skb);
+                       skb_return (dev, gl_skb);
                 }
  
                 // advance to the next packet
@@ -2903,19 +2903,18 @@ static struct net_device_stats *usbnet_get_stats (struct net_device *net)
   * completion callbacks.  2.5 should have fixed those bugs...
   */
  
-static void defer_bh (struct usbnet *dev, struct sk_buff *skb)
+static void defer_bh(struct usbnet *dev, struct sk_buff *skb, struct sk_buff_head *list)
  {
-       struct sk_buff_head     *list = skb->list;
         unsigned long           flags;
  
-       spin_lock_irqsave (&list->lock, flags);
-       __skb_unlink (skb, list);
-       spin_unlock (&list->lock);
-       spin_lock (&dev->done.lock);
-       __skb_queue_tail (&dev->done, skb);
+       spin_lock_irqsave(&list->lock, flags);
+       __skb_unlink(skb, list);
+       spin_unlock(&list->lock);
+       spin_lock(&dev->done.lock);
+       __skb_queue_tail(&dev->done, skb);
         if (dev->done.qlen == 1)
-               tasklet_schedule (&dev->bh);
-       spin_unlock_irqrestore (&dev->done.lock, flags);
+               tasklet_schedule(&dev->bh);
+       spin_unlock_irqrestore(&dev->done.lock, flags);
  }
  
  /* some work can't be done in tasklets, so we use keventd
@@ -3120,7 +3119,7 @@ block:
                 break;
         }
  
-       defer_bh (dev, skb);
+       defer_bh(dev, skb, &dev->rxq);
  
         if (urb) {
                 if (netif_running (dev->net)
@@ -3490,7 +3489,7 @@ static void tx_complete (struct urb *urb, struct pt_regs *regs)
  
         urb->dev = NULL;
         entry->state = tx_done;
-       defer_bh (dev, skb);
+       defer_bh(dev, skb, &dev->txq);
  }
  
  /*-------------------------------------------------------------------------*/
diff --git a/drivers/usb/net/zd1201.c b/drivers/usb/net/zd1201.c

index 29cd801eb958461592a5359cc7c06f0fd181138c..e32a80b39182b5dbbc06d15b2f96abf1897f9e7f 100644 (file)
--- a/drivers/usb/net/zd1201.c
+++ b/drivers/usb/net/zd1201.c
@@ -346,8 +346,7 @@ static void zd1201_usbrx(struct urb *urb, struct pt_regs *regs)
                         if (datalen<14)
                                 goto resubmit;
                         if ((seq & IEEE802_11_SCTL_FRAG) == 0) {
-                               frag = kmalloc(sizeof(struct zd1201_frag*),
-                                   GFP_ATOMIC);
+                               frag = kmalloc(sizeof(*frag), GFP_ATOMIC);
                                 if (!frag)
                                         goto resubmit;
                                 skb = dev_alloc_skb(IEEE802_11_DATA_LEN +14+2);
diff --git a/drivers/video/console/Kconfig b/drivers/video/console/Kconfig

index cbff98337aa6b37069e62bd03d4fc67c7e5eb15a..5fe182d6e4ab44bc09dc605b5980da37a915e67f 100644 (file)
--- a/drivers/video/console/Kconfig
+++ b/drivers/video/console/Kconfig
@@ -6,7 +6,7 @@ menu "Console display driver support"
  
  config VGA_CONSOLE
         bool "VGA text console" if EMBEDDED || !X86
-       depends on !ARCH_ACORN && !ARCH_EBSA110 && !4xx && !8xx && !SPARC32 && !SPARC64 && !M68K && !PARISC
+       depends on !ARCH_ACORN && !ARCH_EBSA110 && !4xx && !8xx && !SPARC32 && !SPARC64 && !M68K && !PARISC && !ARCH_VERSATILE
         default y
         help
           Saying Y here will allow you to use Linux in text mode through a
diff --git a/drivers/video/fbmem.c b/drivers/video/fbmem.c

index d2e19f6dd72c21e57a2eeea7c97fd3ced88d52b8..4ff853fbe0bea8352bf6df8346af90d613b06031 100644 (file)
--- a/drivers/video/fbmem.c
+++ b/drivers/video/fbmem.c
@@ -628,7 +628,7 @@ fb_pan_display(struct fb_info *info, struct fb_var_screeninfo *var)
  int
  fb_set_var(struct fb_info *info, struct fb_var_screeninfo *var)
  {
-       int err;
+       int err, flags = info->flags;
  
         if (var->activate & FB_ACTIVATE_INV_MODE) {
                 struct fb_videomode mode1, mode2;
@@ -682,7 +682,7 @@ fb_set_var(struct fb_info *info, struct fb_var_screeninfo *var)
                             !list_empty(&info->modelist))
                                 err = fb_add_videomode(&mode, &info->modelist);
  
-                       if (!err && info->flags & FBINFO_MISC_USEREVENT) {
+                       if (!err && (flags & FBINFO_MISC_USEREVENT)) {
                                 struct fb_event event;
  
                                 info->flags &= ~FBINFO_MISC_USEREVENT;
diff --git a/drivers/video/fbsysfs.c b/drivers/video/fbsysfs.c

index ed1d4d1ac4f709bed6172bb827cb892c369311d2..1147b899f007d71eaebde3ff7d58d32ff27f935a 100644 (file)
--- a/drivers/video/fbsysfs.c
+++ b/drivers/video/fbsysfs.c
@@ -414,6 +414,13 @@ static ssize_t show_pan(struct class_device *class_device, char *buf)
                         fb_info->var.xoffset);
  }
  
+static ssize_t show_name(struct class_device *class_device, char *buf)
+{
+       struct fb_info *fb_info = (struct fb_info *)class_get_devdata(class_device);
+
+       return snprintf(buf, PAGE_SIZE, "%s\n", fb_info->fix.id);
+}
+
  static struct class_device_attribute class_device_attrs[] = {
         __ATTR(bits_per_pixel, S_IRUGO|S_IWUSR, show_bpp, store_bpp),
         __ATTR(blank, S_IRUGO|S_IWUSR, show_blank, store_blank),
@@ -424,6 +431,7 @@ static struct class_device_attribute class_device_attrs[] = {
         __ATTR(modes, S_IRUGO|S_IWUSR, show_modes, store_modes),
         __ATTR(pan, S_IRUGO|S_IWUSR, show_pan, store_pan),
         __ATTR(virtual_size, S_IRUGO|S_IWUSR, show_virtual, store_virtual),
+       __ATTR(name, S_IRUGO, show_name, NULL),
  };
  
  int fb_init_class_device(struct fb_info *fb_info)
diff --git a/drivers/video/intelfb/intelfbdrv.c b/drivers/video/intelfb/intelfbdrv.c

index 298bc9cd99e7f59772a8d551307ec3b9d77032d4..a112a1786855feb07b581fb184517e5622ff6762 100644 (file)
--- a/drivers/video/intelfb/intelfbdrv.c
+++ b/drivers/video/intelfb/intelfbdrv.c
@@ -583,23 +583,6 @@ intelfb_pci_register(struct pci_dev *pdev, const struct pci_device_id *ent)
                 return -ENODEV;
         }
  
-       /* Map the fb and MMIO regions */
-       dinfo->aperture.virtual = (u8 __iomem *)ioremap_nocache
-               (dinfo->aperture.physical, dinfo->aperture.size);
-       if (!dinfo->aperture.virtual) {
-               ERR_MSG("Cannot remap FB region.\n");
-               cleanup(dinfo);
-               return -ENODEV;
-       }
-       dinfo->mmio_base =
-               (u8 __iomem *)ioremap_nocache(dinfo->mmio_base_phys,
-                                              INTEL_REG_SIZE);
-       if (!dinfo->mmio_base) {
-               ERR_MSG("Cannot remap MMIO region.\n");
-               cleanup(dinfo);
-               return -ENODEV;
-       }
-
         /* Get the chipset info. */
         dinfo->pci_chipset = pdev->device;
  
@@ -630,9 +613,15 @@ intelfb_pci_register(struct pci_dev *pdev, const struct pci_device_id *ent)
                 dinfo->accel = 0;
         }
  
+       if (MB(voffset) < stolen_size)
+               offset = (stolen_size >> 12);
+       else
+               offset = ROUND_UP_TO_PAGE(MB(voffset))/GTT_PAGE_SIZE;
+
         /* Framebuffer parameters - Use all the stolen memory if >= vram */
-       if (ROUND_UP_TO_PAGE(stolen_size) >= MB(vram)) {
+       if (ROUND_UP_TO_PAGE(stolen_size) >= ((offset << 12) +  MB(vram))) {
                 dinfo->fb.size = ROUND_UP_TO_PAGE(stolen_size);
+               dinfo->fb.offset = 0;
                 dinfo->fbmem_gart = 0;
         } else {
                 dinfo->fb.size =  MB(vram);
@@ -663,11 +652,6 @@ intelfb_pci_register(struct pci_dev *pdev, const struct pci_device_id *ent)
                 return -ENODEV;
         }
  
-       if (MB(voffset) < stolen_size)
-               offset = (stolen_size >> 12);
-       else
-               offset = ROUND_UP_TO_PAGE(MB(voffset))/GTT_PAGE_SIZE;
-
         /* set the mem offsets - set them after the already used pages */
         if (dinfo->accel) {
                 dinfo->ring.offset = offset + gtt_info.current_memory;
@@ -682,6 +666,26 @@ intelfb_pci_register(struct pci_dev *pdev, const struct pci_device_id *ent)
                         + (dinfo->cursor.size >> 12);
         }
  
+       /* Map the fb and MMIO regions */
+       /* ioremap only up to the end of used aperture */
+       dinfo->aperture.virtual = (u8 __iomem *)ioremap_nocache
+               (dinfo->aperture.physical, (dinfo->fb.offset << 12)
+                + dinfo->fb.size);
+       if (!dinfo->aperture.virtual) {
+               ERR_MSG("Cannot remap FB region.\n");
+               cleanup(dinfo);
+               return -ENODEV;
+       }
+
+       dinfo->mmio_base =
+               (u8 __iomem *)ioremap_nocache(dinfo->mmio_base_phys,
+                                              INTEL_REG_SIZE);
+       if (!dinfo->mmio_base) {
+               ERR_MSG("Cannot remap MMIO region.\n");
+               cleanup(dinfo);
+               return -ENODEV;
+       }
+
         /* Allocate memories (which aren't stolen) */
         if (dinfo->accel) {
                 if (!(dinfo->gtt_ring_mem =
diff --git a/drivers/video/modedb.c b/drivers/video/modedb.c

index fbf659b6dab01e530acdc4d8e9d544bad909e5fa..3edc9f49344b6df9038988e08377b19f4839a149 100644 (file)
--- a/drivers/video/modedb.c
+++ b/drivers/video/modedb.c
@@ -246,6 +246,11 @@ static const struct fb_videomode modedb[] = {
         /* 480x300 @ 72 Hz, 48.0 kHz hsync */
         NULL, 72, 480, 300, 33386, 40, 24, 11, 19, 80, 3,
         0, FB_VMODE_DOUBLE
+    }, {
+       /* 1920x1200 @ 60 Hz, 74.5 Khz hsync */
+       NULL, 60, 1920, 1200, 5177, 128, 336, 1, 38, 208, 3,
+       FB_SYNC_HOR_HIGH_ACT | FB_SYNC_VERT_HIGH_ACT,
+       FB_VMODE_NONINTERLACED
      },
  };
  
diff --git a/drivers/video/nvidia/nvidia.c b/drivers/video/nvidia/nvidia.c

index b2e6b240786917d9c1cae810a76729d3ff2c0686..52b16850a54efe399ce7ba520a1624b6c1459045 100644 (file)
--- a/drivers/video/nvidia/nvidia.c
+++ b/drivers/video/nvidia/nvidia.c
@@ -1324,6 +1324,13 @@ static int __devinit nvidia_set_fbinfo(struct fb_info *info)
  
                 fb_videomode_to_var(&nvidiafb_default_var, &modedb);
                 nvidiafb_default_var.bits_per_pixel = 8;
+       } else if (par->fpWidth && par->fpHeight) {
+               char buf[16];
+
+               memset(buf, 0, 16);
+               snprintf(buf, 15, "%dx%d", par->fpWidth, par->fpHeight);
+               fb_find_mode(&nvidiafb_default_var, info, buf, specs->modedb,
+                            specs->modedb_len, &modedb, 8);
         }
  
         if (mode_option)
diff --git a/drivers/video/pxafb.c b/drivers/video/pxafb.c

index 16e37a535d850f83e065d82b9b66069f87cdbb5d..30112816420c71e2d304a538c82bd2129a0ecdf3 100644 (file)
--- a/drivers/video/pxafb.c
+++ b/drivers/video/pxafb.c
@@ -717,6 +717,9 @@ static void pxafb_enable_controller(struct pxafb_info *fbi)
         DPRINTK("reg_lccr2 0x%08x\n", (unsigned int) fbi->reg_lccr2);
         DPRINTK("reg_lccr3 0x%08x\n", (unsigned int) fbi->reg_lccr3);
  
+       /* enable LCD controller clock */
+       pxa_set_cken(CKEN16_LCD, 1);
+
         /* Sequence from 11.7.10 */
         LCCR3 = fbi->reg_lccr3;
         LCCR2 = fbi->reg_lccr2;
@@ -750,6 +753,9 @@ static void pxafb_disable_controller(struct pxafb_info *fbi)
  
         schedule_timeout(20 * HZ / 1000);
         remove_wait_queue(&fbi->ctrlr_wait, &wait);
+
+       /* disable LCD controller clock */
+       pxa_set_cken(CKEN16_LCD, 0);
  }
  
  /*
@@ -1299,8 +1305,6 @@ int __init pxafb_probe(struct device *dev)
                 ret = -ENOMEM;
                 goto failed;
         }
-       /* enable LCD controller clock */
-       pxa_set_cken(CKEN16_LCD, 1);
  
         ret = request_irq(IRQ_LCD, pxafb_handle_irq, SA_INTERRUPT, "LCD", fbi);
         if (ret) {
diff --git a/drivers/video/radeonfb.c b/drivers/video/radeonfb.c

index c46387024b1d55717f4a8deaf5cd71dd71179a70..a78b9bd8f89752c856d05e06dc4e74c25faecdad 100644 (file)
--- a/drivers/video/radeonfb.c
+++ b/drivers/video/radeonfb.c
@@ -80,7 +80,7 @@
  #include <video/radeon.h>
  #include <linux/radeonfb.h>
  
-#define DEBUG  1
+#define DEBUG  0
  
  #if DEBUG
  #define RTRACE         printk
diff --git a/drivers/video/sa1100fb.c b/drivers/video/sa1100fb.c

index 2d29db7ef800f195f8e583f456629da81d727c77..beeec7b514251c58007120978dc4f791da5b1a5f 100644 (file)
--- a/drivers/video/sa1100fb.c
+++ b/drivers/video/sa1100fb.c
@@ -598,7 +598,7 @@ sa1100fb_setcolreg(u_int regno, u_int red, u_int green, u_int blue,
   *    requests for the LCD controller.  If we hit this, it means we're
   *    doing nothing but LCD DMA.
   */
-static unsigned int sa1100fb_display_dma_period(struct fb_var_screeninfo *var)
+static inline unsigned int sa1100fb_display_dma_period(struct fb_var_screeninfo *var)
  {
         /*
          * Period = pixclock * bits_per_byte * bytes_per_transfer
diff --git a/drivers/video/tridentfb.c b/drivers/video/tridentfb.c

index da8004e5d03de657c2918d4cd486583ecfa1a88c..698ca9232e73649c7669887685edca5e89b16698 100644 (file)
--- a/drivers/video/tridentfb.c
+++ b/drivers/video/tridentfb.c
@@ -454,13 +454,16 @@ static struct accel_switch accel_image = {
  static void tridentfb_fillrect(struct fb_info * info, const struct fb_fillrect *fr)
  {
         int bpp = info->var.bits_per_pixel;
-       int col;
+       int col = 0;
         
         switch (bpp) {
                 default:
-               case 8: col = fr->color;
+               case 8: col |= fr->color;
+                       col |= col << 8;
+                       col |= col << 16;
                         break;
                 case 16: col = ((u32 *)(info->pseudo_palette))[fr->color];
+                       
                          break;
                 case 32: col = ((u32 *)(info->pseudo_palette))[fr->color];
                          break;
@@ -882,8 +885,9 @@ static int tridentfb_set_par(struct fb_info *info)
  
         write3X4(GraphEngReg, 0x80);    //enable GE for text acceleration
  
-//     if (info->var.accel_flags & FB_ACCELF_TEXT)
-//FIXME                acc->init_accel(info->var.xres,bpp);
+#ifdef CONFIG_FB_TRIDENT_ACCEL 
+       acc->init_accel(info->var.xres,bpp);
+#endif
         
         switch (bpp) {
                 case 8:  tmp = 0x00; break;
@@ -900,7 +904,7 @@ static int tridentfb_set_par(struct fb_info *info)
         write3X4(DRAMControl, tmp);     //both IO,linear enable
  
         write3X4(InterfaceSel, read3X4(InterfaceSel) | 0x40);
-       write3X4(Performance,0x20);
+       write3X4(Performance,0x92);
         write3X4(PCIReg,0x07);          //MMIO & PCI read and write burst enable
  
         /* convert from picoseconds to MHz */
@@ -981,12 +985,14 @@ static int tridentfb_setcolreg(unsigned regno, unsigned red, unsigned green,
                 t_outb(green>>10,0x3C9);
                 t_outb(blue>>10,0x3C9);
  
-       } else
-       if (bpp == 16)                  /* RGB 565 */
-                       ((u32*)info->pseudo_palette)[regno] = (red & 0xF800) |
-                       ((green & 0xFC00) >> 5) | ((blue & 0xF800) >> 11);
-       else
-       if (bpp == 32)          /* ARGB 8888 */
+       } else if (bpp == 16) { /* RGB 565 */
+               u32 col;
+
+               col = (red & 0xF800) | ((green & 0xFC00) >> 5) |
+                       ((blue & 0xF800) >> 11);
+               col |= col << 16;       
+               ((u32 *)(info->pseudo_palette))[regno] = col;
+       } else if (bpp == 32)           /* ARGB 8888 */
                 ((u32*)info->pseudo_palette)[regno] =
                         ((transp & 0xFF00) <<16)        |
                         ((red & 0xFF00) << 8)           |
diff --git a/drivers/w1/w1.c b/drivers/w1/w1.c

index 8a9c4282250225c7a431ea62b3c5e87c98777f30..0bbf029b1ef1658f99b537fa690a10186275b486 100644 (file)
--- a/drivers/w1/w1.c
+++ b/drivers/w1/w1.c
@@ -593,7 +593,7 @@ void w1_search(struct w1_master *dev, w1_slave_found_callback cb)
                  * Return 0 - device(s) present, 1 - no devices present.
                  */
                 if (w1_reset_bus(dev)) {
-                       dev_info(&dev->dev, "No devices present on the wire.\n");
+                       dev_dbg(&dev->dev, "No devices present on the wire.\n");
                         break;
                 }
  
diff --git a/drivers/w1/w1_int.c b/drivers/w1/w1_int.c

index b5a5e04b6d37fb172256595c3e905a22f3378d6c..498ad505fa5f94360309063f7a72ec715ef75dd2 100644 (file)
--- a/drivers/w1/w1_int.c
+++ b/drivers/w1/w1_int.c
@@ -86,9 +86,9 @@ static struct w1_master * w1_alloc_dev(u32 id, int slave_count, int slave_ttl,
  
         dev->driver = driver;
  
-       dev->groups = 23;
+       dev->groups = 1;
         dev->seq = 1;
-       dev->nls = netlink_kernel_create(NETLINK_W1, NULL);
+       dev->nls = netlink_kernel_create(NETLINK_W1, 1, NULL, THIS_MODULE);
         if (!dev->nls) {
                 printk(KERN_ERR "Failed to create new netlink socket(%u) for w1 master %s.\n",
                         NETLINK_NFLOG, dev->dev.bus_id);
@@ -225,3 +225,5 @@ void w1_remove_master_device(struct w1_bus_master *bm)
  
  EXPORT_SYMBOL(w1_add_master_device);
  EXPORT_SYMBOL(w1_remove_master_device);
+
+MODULE_ALIAS_NET_PF_PROTO(PF_NETLINK, NETLINK_W1);
diff --git a/drivers/w1/w1_netlink.c b/drivers/w1/w1_netlink.c

index 2a82fb055c70bfb2ef4a25dff024da1c681bbd13..e7b774423dd65c56fce07dc13ca0017e713fb6f3 100644 (file)
--- a/drivers/w1/w1_netlink.c
+++ b/drivers/w1/w1_netlink.c
@@ -51,7 +51,7 @@ void w1_netlink_send(struct w1_master *dev, struct w1_netlink_msg *msg)
  
         memcpy(data, msg, sizeof(struct w1_netlink_msg));
  
-       NETLINK_CB(skb).dst_groups = dev->groups;
+       NETLINK_CB(skb).dst_group = dev->groups;
         netlink_broadcast(dev->nls, skb, 0, dev->groups, GFP_ATOMIC);
  
  nlmsg_failure:
diff --git a/fs/Kconfig b/fs/Kconfig

index 5d0c4be43dba1f6e929ff50fabcefcff3adaf1db..e54be7058359e0beee537f4f5d21d7544148c687 100644 (file)
--- a/fs/Kconfig
+++ b/fs/Kconfig
@@ -363,12 +363,15 @@ config INOTIFY
         bool "Inotify file change notification support"
         default y
         ---help---
-         Say Y here to enable inotify support and the /dev/inotify character
-         device.  Inotify is a file change notification system and a
+         Say Y here to enable inotify support and the associated system
+         calls.  Inotify is a file change notification system and a
           replacement for dnotify.  Inotify fixes numerous shortcomings in
           dnotify and introduces several new features.  It allows monitoring
-         of both files and directories via a single open fd.  Multiple file
-         events are supported.
+         of both files and directories via a single open fd.  Other features
+         include multiple file events, one-shot support, and unmount
+         notification.
+
+         For more information, see Documentation/filesystems/inotify.txt
  
           If unsure, say Y.
  
diff --git a/fs/adfs/adfs.h b/fs/adfs/adfs.h

index 63f5df9afb71f96c5e649fbb0f2c6cf3275513b6..fd528433de43d1faeff397551081ac4ea48320b5 100644 (file)
--- a/fs/adfs/adfs.h
+++ b/fs/adfs/adfs.h
@@ -97,7 +97,7 @@ extern int adfs_dir_update(struct super_block *sb, struct object_info *obj);
  extern struct inode_operations adfs_file_inode_operations;
  extern struct file_operations adfs_file_operations;
  
-extern inline __u32 signed_asl(__u32 val, signed int shift)
+static inline __u32 signed_asl(__u32 val, signed int shift)
  {
         if (shift >= 0)
                 val <<= shift;
@@ -112,7 +112,7 @@ extern inline __u32 signed_asl(__u32 val, signed int shift)
   *
   * The root directory ID should always be looked up in the map [3.4]
   */
-extern inline int
+static inline int
  __adfs_block_map(struct super_block *sb, unsigned int object_id,
                  unsigned int block)
  {
diff --git a/fs/afs/mntpt.c b/fs/afs/mntpt.c

index bfc28abe1cb1c13b6a11497ff0cb0ca1ae3705e3..31ee06590de549d833be410c742da32240187d22 100644 (file)
--- a/fs/afs/mntpt.c
+++ b/fs/afs/mntpt.c
@@ -30,7 +30,7 @@ static struct dentry *afs_mntpt_lookup(struct inode *dir,
                                        struct dentry *dentry,
                                        struct nameidata *nd);
  static int afs_mntpt_open(struct inode *inode, struct file *file);
-static int afs_mntpt_follow_link(struct dentry *dentry, struct nameidata *nd);
+static void *afs_mntpt_follow_link(struct dentry *dentry, struct nameidata *nd);
  
  struct file_operations afs_mntpt_file_operations = {
         .open           = afs_mntpt_open,
@@ -233,7 +233,7 @@ static struct vfsmount *afs_mntpt_do_automount(struct dentry *mntpt)
  /*
   * follow a link from a mountpoint directory, thus causing it to be mounted
   */
-static int afs_mntpt_follow_link(struct dentry *dentry, struct nameidata *nd)
+static void *afs_mntpt_follow_link(struct dentry *dentry, struct nameidata *nd)
  {
         struct vfsmount *newmnt;
         struct dentry *old_dentry;
@@ -249,7 +249,7 @@ static int afs_mntpt_follow_link(struct dentry *dentry, struct nameidata *nd)
         newmnt = afs_mntpt_do_automount(dentry);
         if (IS_ERR(newmnt)) {
                 path_release(nd);
-               return PTR_ERR(newmnt);
+               return (void *)newmnt;
         }
  
         old_dentry = nd->dentry;
@@ -267,7 +267,7 @@ static int afs_mntpt_follow_link(struct dentry *dentry, struct nameidata *nd)
         }
  
         kleave(" = %d", err);
-       return err;
+       return ERR_PTR(err);
  } /* end afs_mntpt_follow_link() */
  
  /*****************************************************************************/
diff --git a/fs/autofs/symlink.c b/fs/autofs/symlink.c

index f028396f138395a3b5f2958bbf858fc1c6d27e21..52e8772b066e33fece33d73ae5ddf30568c5b4ca 100644 (file)
--- a/fs/autofs/symlink.c
+++ b/fs/autofs/symlink.c
@@ -12,11 +12,12 @@
  
  #include "autofs_i.h"
  
-static int autofs_follow_link(struct dentry *dentry, struct nameidata *nd)
+/* Nothing to release.. */
+static void *autofs_follow_link(struct dentry *dentry, struct nameidata *nd)
  {
         char *s=((struct autofs_symlink *)dentry->d_inode->u.generic_ip)->data;
         nd_set_link(nd, s);
-       return 0;
+       return NULL;
  }
  
  struct inode_operations autofs_symlink_inode_operations = {
diff --git a/fs/autofs4/symlink.c b/fs/autofs4/symlink.c

index c265a66edf0f76555c793eb3ba463a7116b20d80..2ea2c98fd84bdc6ee03e3fda05a1d66425204caf 100644 (file)
--- a/fs/autofs4/symlink.c
+++ b/fs/autofs4/symlink.c
@@ -12,11 +12,11 @@
  
  #include "autofs_i.h"
  
-static int autofs4_follow_link(struct dentry *dentry, struct nameidata *nd)
+static void *autofs4_follow_link(struct dentry *dentry, struct nameidata *nd)
  {
         struct autofs_info *ino = autofs4_dentry_ino(dentry);
         nd_set_link(nd, (char *)ino->u.symlink);
-       return 0;
+       return NULL;
  }
  
  struct inode_operations autofs4_symlink_inode_operations = {
diff --git a/fs/befs/linuxvfs.c b/fs/befs/linuxvfs.c

index de5bb280a82895939c6cd9cce29005b5030689ab..e0a6025f1d06a72a6423984ab838739ed0bf2bf6 100644 (file)
--- a/fs/befs/linuxvfs.c
+++ b/fs/befs/linuxvfs.c
@@ -41,8 +41,8 @@ static struct inode *befs_alloc_inode(struct super_block *sb);
  static void befs_destroy_inode(struct inode *inode);
  static int befs_init_inodecache(void);
  static void befs_destroy_inodecache(void);
-static int befs_follow_link(struct dentry *, struct nameidata *);
-static void befs_put_link(struct dentry *, struct nameidata *);
+static void *befs_follow_link(struct dentry *, struct nameidata *);
+static void befs_put_link(struct dentry *, struct nameidata *, void *);
  static int befs_utf2nls(struct super_block *sb, const char *in, int in_len,
                         char **out, int *out_len);
  static int befs_nls2utf(struct super_block *sb, const char *in, int in_len,
@@ -461,7 +461,7 @@ befs_destroy_inodecache(void)
   * The data stream become link name. Unless the LONG_SYMLINK
   * flag is set.
   */
-static int
+static void *
  befs_follow_link(struct dentry *dentry, struct nameidata *nd)
  {
         befs_inode_info *befs_ino = BEFS_I(dentry->d_inode);
@@ -487,10 +487,10 @@ befs_follow_link(struct dentry *dentry, struct nameidata *nd)
         }
  
         nd_set_link(nd, link);
-       return 0;
+       return NULL;
  }
  
-static void befs_put_link(struct dentry *dentry, struct nameidata *nd)
+static void befs_put_link(struct dentry *dentry, struct nameidata *nd, void *p)
  {
         befs_inode_info *befs_ino = BEFS_I(dentry->d_inode);
         if (befs_ino->i_flags & BEFS_LONG_SYMLINK) {
diff --git a/fs/bio.c b/fs/bio.c

index 249dd6bb66c843ee9e487f124dbe37afab095581..1f2d4649b188015076353340055a4ebdd1e0a6b1 100644 (file)
--- a/fs/bio.c
+++ b/fs/bio.c
@@ -248,17 +248,13 @@ inline void __bio_clone(struct bio *bio, struct bio *bio_src)
  {
         request_queue_t *q = bdev_get_queue(bio_src->bi_bdev);
  
-       memcpy(bio->bi_io_vec, bio_src->bi_io_vec, bio_src->bi_max_vecs * sizeof(struct bio_vec));
+       memcpy(bio->bi_io_vec, bio_src->bi_io_vec,
+               bio_src->bi_max_vecs * sizeof(struct bio_vec));
  
         bio->bi_sector = bio_src->bi_sector;
         bio->bi_bdev = bio_src->bi_bdev;
         bio->bi_flags |= 1 << BIO_CLONED;
         bio->bi_rw = bio_src->bi_rw;
-
-       /*
-        * notes -- maybe just leave bi_idx alone. assume identical mapping
-        * for the clone
-        */
         bio->bi_vcnt = bio_src->bi_vcnt;
         bio->bi_size = bio_src->bi_size;
         bio->bi_idx = bio_src->bi_idx;
diff --git a/fs/cifs/CHANGES b/fs/cifs/CHANGES

index dab4774ee7bbb6c7d9a92655a4a600d80c0da896..3196d4c4eed36fde53bc857563b2dff2d403abb9 100644 (file)
--- a/fs/cifs/CHANGES
+++ b/fs/cifs/CHANGES
@@ -1,3 +1,9 @@
+Version 1.35
+------------
+Add writepage performance improvements.  Fix path name conversions
+for long filenames on mounts which were done with "mapchars" mount option
+specified.
+
  Version 1.34
  ------------
  Fix error mapping of the TOO_MANY_LINKS (hardlinks) case.
diff --git a/fs/cifs/cifsfs.h b/fs/cifs/cifsfs.h

index 78af5850c558127bde1a5c929e72f90cf7470403..1fd21f66f2435198e33eca7a2919ef42b08ef6ce 100644 (file)
--- a/fs/cifs/cifsfs.h
+++ b/fs/cifs/cifsfs.h
@@ -83,8 +83,8 @@ extern int cifs_dir_notify(struct file *, unsigned long arg);
  extern struct dentry_operations cifs_dentry_ops;
  
  /* Functions related to symlinks */
-extern int cifs_follow_link(struct dentry *direntry, struct nameidata *nd);
-extern void cifs_put_link(struct dentry *direntry, struct nameidata *nd);
+extern void *cifs_follow_link(struct dentry *direntry, struct nameidata *nd);
+extern void cifs_put_link(struct dentry *direntry, struct nameidata *nd, void *);
  extern int cifs_readlink(struct dentry *direntry, char __user *buffer, 
                          int buflen);
  extern int cifs_symlink(struct inode *inode, struct dentry *direntry,
diff --git a/fs/cifs/cifssmb.c b/fs/cifs/cifssmb.c

index 3c628bf667a5f010ff17c14d2ffc04f499d80c7d..0db0b313d7150f49795c0cc2a765d1abe298f0b0 100644 (file)
--- a/fs/cifs/cifssmb.c
+++ b/fs/cifs/cifssmb.c
@@ -2602,6 +2602,9 @@ int CIFSFindNext(const int xid, struct cifsTconInfo *tcon,
         if(name_len < PATH_MAX) {
                 memcpy(pSMB->ResumeFileName, psrch_inf->presume_name, name_len);
                 byte_count += name_len;
+               /* 14 byte parm len above enough for 2 byte null terminator */
+               pSMB->ResumeFileName[name_len] = 0;
+               pSMB->ResumeFileName[name_len+1] = 0;
         } else {
                 rc = -EINVAL;
                 goto FNext2_err_exit;
diff --git a/fs/cifs/file.c b/fs/cifs/file.c

index 30ab70ce554716df92739f0b1643ce79053497e7..3497125189dfde1a8b19506216954495fd061c7b 100644 (file)
--- a/fs/cifs/file.c
+++ b/fs/cifs/file.c
@@ -643,7 +643,7 @@ int cifs_lock(struct file *file, int cmd, struct file_lock *pfLock)
                          netfid, length,
                          pfLock->fl_start, numUnlock, numLock, lockType,
                          wait_flag);
-       if (rc == 0 && (pfLock->fl_flags & FL_POSIX))
+       if (pfLock->fl_flags & FL_POSIX)
                 posix_lock_file_wait(file, pfLock);
         FreeXid(xid);
         return rc;
diff --git a/fs/cifs/link.c b/fs/cifs/link.c

index bde0fabfece0aeeaecd9ab4b1c2cf733009975b1..ab925ef4f863c9696b9ed0a1832e9d3d6a0c0d07 100644 (file)
--- a/fs/cifs/link.c
+++ b/fs/cifs/link.c
@@ -92,7 +92,7 @@ cifs_hl_exit:
         return rc;
  }
  
-int
+void *
  cifs_follow_link(struct dentry *direntry, struct nameidata *nd)
  {
         struct inode *inode = direntry->d_inode;
@@ -148,7 +148,7 @@ out:
  out_no_free:
         FreeXid(xid);
         nd_set_link(nd, target_path);
-       return 0;
+       return NULL;    /* No cookie */
  }
  
  int
@@ -330,7 +330,7 @@ cifs_readlink(struct dentry *direntry, char __user *pBuffer, int buflen)
         return rc;
  }
  
-void cifs_put_link(struct dentry *direntry, struct nameidata *nd)
+void cifs_put_link(struct dentry *direntry, struct nameidata *nd, void *cookie)
  {
         char *p = nd_get_link(nd);
         if (!IS_ERR(p))
diff --git a/fs/cifs/misc.c b/fs/cifs/misc.c

index 072b4ee8c53e1a28c51002d8a331c497a0b8c05c..20ae4153f791673d4137bf885db2b8b92cda8852 100644 (file)
--- a/fs/cifs/misc.c
+++ b/fs/cifs/misc.c
@@ -611,6 +611,7 @@ cifsConvertToUCS(__le16 * target, const char *source, int maxlen,
                 src_char = source[i];
                 switch (src_char) {
                         case 0:
+                               target[j] = 0;
                                 goto ctoUCS_out;
                         case ':':
                                 target[j] = cpu_to_le16(UNI_COLON);
diff --git a/fs/dcache.c b/fs/dcache.c

index 3aa8a7e980d80877cff19138ce37b4a01fd2c09a..a15a2e1f55208882ec828339943894928a651bc0 100644 (file)
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -19,6 +19,7 @@
  #include <linux/string.h>
  #include <linux/mm.h>
  #include <linux/fs.h>
+#include <linux/fsnotify.h>
  #include <linux/slab.h>
  #include <linux/init.h>
  #include <linux/smp_lock.h>
@@ -101,6 +102,7 @@ static inline void dentry_iput(struct dentry * dentry)
                 list_del_init(&dentry->d_alias);
                 spin_unlock(&dentry->d_lock);
                 spin_unlock(&dcache_lock);
+               fsnotify_inoderemove(inode);
                 if (dentry->d_op && dentry->d_op->d_iput)
                         dentry->d_op->d_iput(dentry, inode);
                 else
@@ -1165,13 +1167,16 @@ out:
   
  void d_delete(struct dentry * dentry)
  {
+       int isdir = 0;
         /*
          * Are we the only user?
          */
         spin_lock(&dcache_lock);
         spin_lock(&dentry->d_lock);
+       isdir = S_ISDIR(dentry->d_inode->i_mode);
         if (atomic_read(&dentry->d_count) == 1) {
                 dentry_iput(dentry);
+               fsnotify_nameremove(dentry, isdir);
                 return;
         }
  
@@ -1180,6 +1185,8 @@ void d_delete(struct dentry * dentry)
  
         spin_unlock(&dentry->d_lock);
         spin_unlock(&dcache_lock);
+
+       fsnotify_nameremove(dentry, isdir);
  }
  
  static void __d_rehash(struct dentry * entry, struct hlist_head *list)
diff --git a/fs/devfs/base.c b/fs/devfs/base.c

index 1ecfe1f184d4fd85ca38d4f02cd2c20189572902..8b679b67e5e0f78e18dbd25015f7990cfa6f1f5a 100644 (file)
--- a/fs/devfs/base.c
+++ b/fs/devfs/base.c
@@ -2491,11 +2491,11 @@ static int devfs_mknod(struct inode *dir, struct dentry *dentry, int mode,
         return 0;
  }                              /*  End Function devfs_mknod  */
  
-static int devfs_follow_link(struct dentry *dentry, struct nameidata *nd)
+static void *devfs_follow_link(struct dentry *dentry, struct nameidata *nd)
  {
         struct devfs_entry *p = get_devfs_entry_from_vfs_inode(dentry->d_inode);
         nd_set_link(nd, p ? p->u.symlink.linkname : ERR_PTR(-ENODEV));
-       return 0;
+       return NULL;
  }                              /*  End Function devfs_follow_link  */
  
  static struct inode_operations devfs_iops = {
diff --git a/fs/ext2/symlink.c b/fs/ext2/symlink.c

index 9f7bac01d557631e5bcebde72220305dbb3dafcb..1e67d87cfa913b58c648a586235816350481fed5 100644 (file)
--- a/fs/ext2/symlink.c
+++ b/fs/ext2/symlink.c
@@ -21,11 +21,11 @@
  #include "xattr.h"
  #include <linux/namei.h>
  
-static int ext2_follow_link(struct dentry *dentry, struct nameidata *nd)
+static void *ext2_follow_link(struct dentry *dentry, struct nameidata *nd)
  {
         struct ext2_inode_info *ei = EXT2_I(dentry->d_inode);
         nd_set_link(nd, (char *)ei->i_data);
-       return 0;
+       return NULL;
  }
  
  struct inode_operations ext2_symlink_inode_operations = {
diff --git a/fs/ext3/symlink.c b/fs/ext3/symlink.c

index 8c3e72818fb0893e0ddee74c33f46b7e09da57c4..4f79122cde670558c39a6886623c7522286b9854 100644 (file)
--- a/fs/ext3/symlink.c
+++ b/fs/ext3/symlink.c
@@ -23,11 +23,11 @@
  #include <linux/namei.h>
  #include "xattr.h"
  
-static int ext3_follow_link(struct dentry *dentry, struct nameidata *nd)
+static void * ext3_follow_link(struct dentry *dentry, struct nameidata *nd)
  {
         struct ext3_inode_info *ei = EXT3_I(dentry->d_inode);
         nd_set_link(nd, (char*)ei->i_data);
-       return 0;
+       return NULL;
  }
  
  struct inode_operations ext3_symlink_inode_operations = {
diff --git a/fs/freevxfs/vxfs_immed.c b/fs/freevxfs/vxfs_immed.c

index ac677ab262b2b47a9581a44d730942b11b9276a3..d0401dc68d41306cef3781de18e9ac18ef7ad306 100644 (file)
--- a/fs/freevxfs/vxfs_immed.c
+++ b/fs/freevxfs/vxfs_immed.c
@@ -38,7 +38,7 @@
  #include "vxfs_inode.h"
  
  
-static int     vxfs_immed_follow_link(struct dentry *, struct nameidata *);
+static void *  vxfs_immed_follow_link(struct dentry *, struct nameidata *);
  
  static int     vxfs_immed_readpage(struct file *, struct page *);
  
@@ -72,12 +72,12 @@ struct address_space_operations vxfs_immed_aops = {
   * Returns:
   *   Zero on success, else a negative error code.
   */
-static int
+static void *
  vxfs_immed_follow_link(struct dentry *dp, struct nameidata *np)
  {
         struct vxfs_inode_info          *vip = VXFS_INO(dp->d_inode);
         nd_set_link(np, vip->vii_immed.vi_immed);
-       return 0;
+       return NULL;
  }
  
  /**
diff --git a/fs/hfs/bnode.c b/fs/hfs/bnode.c

index 6ad1211f84edb0fcd4264faea1ecedb01fead7b9..a096c5a5666442530eeb208baa4069122511c11d 100644 (file)
--- a/fs/hfs/bnode.c
+++ b/fs/hfs/bnode.c
@@ -480,6 +480,8 @@ void hfs_bnode_put(struct hfs_bnode *node)
                         return;
                 }
                 for (i = 0; i < tree->pages_per_bnode; i++) {
+                       if (!node->page[i])
+                               continue;
                         mark_page_accessed(node->page[i]);
  #if REF_PAGES
                         put_page(node->page[i]);
diff --git a/fs/hfs/extent.c b/fs/hfs/extent.c

index cbc8510ad22212a58f7dfab2ce1a34f36d599064..5ea6b3d45eaa608f9ab38887de18ccb8477dacc2 100644 (file)
--- a/fs/hfs/extent.c
+++ b/fs/hfs/extent.c
@@ -482,7 +482,8 @@ void hfs_file_truncate(struct inode *inode)
                 page_cache_release(page);
                 mark_inode_dirty(inode);
                 return;
-       }
+       } else if (inode->i_size == HFS_I(inode)->phys_size)
+               return;
         size = inode->i_size + HFS_SB(sb)->alloc_blksz - 1;
         blk_cnt = size / HFS_SB(sb)->alloc_blksz;
         alloc_cnt = HFS_I(inode)->alloc_blocks;
diff --git a/fs/hfsplus/bnode.c b/fs/hfsplus/bnode.c

index 267872e84d714dcf23bc4419fcbb1a5c7b8efeac..8868d3b766fd46e1c263dfdde6d1688cc1f5cd09 100644 (file)
--- a/fs/hfsplus/bnode.c
+++ b/fs/hfsplus/bnode.c
@@ -643,6 +643,8 @@ void hfs_bnode_put(struct hfs_bnode *node)
                         return;
                 }
                 for (i = 0; i < tree->pages_per_bnode; i++) {
+                       if (!node->page[i])
+                               continue;
                         mark_page_accessed(node->page[i]);
  #if REF_PAGES
                         put_page(node->page[i]);
diff --git a/fs/hfsplus/extents.c b/fs/hfsplus/extents.c

index 376498cc64fddb5b54df6d8f249df43570d38c5e..e7235ca79a95285a1ddb0cdb79e2cadaa9895705 100644 (file)
--- a/fs/hfsplus/extents.c
+++ b/fs/hfsplus/extents.c
@@ -461,7 +461,9 @@ void hfsplus_file_truncate(struct inode *inode)
                 page_cache_release(page);
                 mark_inode_dirty(inode);
                 return;
-       }
+       } else if (inode->i_size == HFSPLUS_I(inode).phys_size)
+               return;
+
         blk_cnt = (inode->i_size + HFSPLUS_SB(sb).alloc_blksz - 1) >> HFSPLUS_SB(sb).alloc_blksz_shift;
         alloc_cnt = HFSPLUS_I(inode).alloc_blocks;
         if (blk_cnt == alloc_cnt)
diff --git a/fs/hppfs/hppfs_kern.c b/fs/hppfs/hppfs_kern.c

index ff150fedb98150882dabf0b1b0af844bfe016cc7..52930915bad8c8d66eefdb4bfd4863b645ac5a21 100644 (file)
--- a/fs/hppfs/hppfs_kern.c
+++ b/fs/hppfs/hppfs_kern.c
@@ -38,7 +38,7 @@ struct hppfs_inode_info {
  
  static inline struct hppfs_inode_info *HPPFS_I(struct inode *inode)
  {
-       return(list_entry(inode, struct hppfs_inode_info, vfs_inode));
+       return container_of(inode, struct hppfs_inode_info, vfs_inode);
  }
  
  #define HPPFS_SUPER_MAGIC 0xb00000ee
@@ -662,42 +662,36 @@ static int hppfs_readlink(struct dentry *dentry, char *buffer, int buflen)
  {
         struct file *proc_file;
         struct dentry *proc_dentry;
-       int (*readlink)(struct dentry *, char *, int);
-       int err, n;
+       int ret;
  
         proc_dentry = HPPFS_I(dentry->d_inode)->proc_dentry;
         proc_file = dentry_open(dget(proc_dentry), NULL, O_RDONLY);
-       err = PTR_ERR(proc_dentry);
-       if(IS_ERR(proc_dentry))
-               return(err);
+       if (IS_ERR(proc_file))
+               return PTR_ERR(proc_file);
  
-       readlink = proc_dentry->d_inode->i_op->readlink;
-       n = (*readlink)(proc_dentry, buffer, buflen);
+       ret = proc_dentry->d_inode->i_op->readlink(proc_dentry, buffer, buflen);
  
         fput(proc_file);
  
-       return(n);
+       return ret;
  }
  
-static int hppfs_follow_link(struct dentry *dentry, struct nameidata *nd)
+static void* hppfs_follow_link(struct dentry *dentry, struct nameidata *nd)
  {
         struct file *proc_file;
         struct dentry *proc_dentry;
-       int (*follow_link)(struct dentry *, struct nameidata *);
-       int err, n;
+       void *ret;
  
         proc_dentry = HPPFS_I(dentry->d_inode)->proc_dentry;
         proc_file = dentry_open(dget(proc_dentry), NULL, O_RDONLY);
-       err = PTR_ERR(proc_dentry);
-       if(IS_ERR(proc_dentry))
-               return(err);
+       if (IS_ERR(proc_file))
+               return proc_file;
  
-       follow_link = proc_dentry->d_inode->i_op->follow_link;
-       n = (*follow_link)(proc_dentry, nd);
+       ret = proc_dentry->d_inode->i_op->follow_link(proc_dentry, nd);
  
         fput(proc_file);
  
-       return(n);
+       return ret;
  }
  
  static struct inode_operations hppfs_dir_iops = {
diff --git a/fs/inotify.c b/fs/inotify.c

index a8a714e481405f37c3886c1761dd078a9e1fef37..2e4e2a57708cf95dcdf348235376ef901f4dc295 100644 (file)
--- a/fs/inotify.c
+++ b/fs/inotify.c
@@ -90,6 +90,7 @@ struct inotify_device {
         unsigned int            queue_size;     /* size of the queue (bytes) */
         unsigned int            event_count;    /* number of pending events */
         unsigned int            max_events;     /* maximum number of events */
+       u32                     last_wd;        /* the last wd allocated */
  };
  
  /*
@@ -352,7 +353,7 @@ static int inotify_dev_get_wd(struct inotify_device *dev,
         do {
                 if (unlikely(!idr_pre_get(&dev->idr, GFP_KERNEL)))
                         return -ENOSPC;
-               ret = idr_get_new(&dev->idr, watch, &watch->wd);
+               ret = idr_get_new_above(&dev->idr, watch, dev->last_wd+1, &watch->wd);
         } while (ret == -EAGAIN);
  
         return ret;
@@ -401,6 +402,7 @@ static struct inotify_watch *create_watch(struct inotify_device *dev,
                 return ERR_PTR(ret);
         }
  
+       dev->last_wd = watch->wd;
         watch->mask = mask;
         atomic_set(&watch->count, 0);
         INIT_LIST_HEAD(&watch->d_list);
@@ -899,6 +901,7 @@ asmlinkage long sys_inotify_init(void)
         dev->queue_size = 0;
         dev->max_events = inotify_max_queued_events;
         dev->user = user;
+       dev->last_wd = 0;
         atomic_set(&dev->count, 0);
  
         get_inotify_dev(dev);
diff --git a/fs/ioprio.c b/fs/ioprio.c

index 97e1f088ba00b3b63f7f9ea594b756781fcf6596..d1c1f2b2c9da9796affc6be18b8793489b9f2873 100644 (file)
--- a/fs/ioprio.c
+++ b/fs/ioprio.c
@@ -62,6 +62,8 @@ asmlinkage long sys_ioprio_set(int which, int who, int ioprio)
  
                         break;
                 case IOPRIO_CLASS_IDLE:
+                       if (!capable(CAP_SYS_ADMIN))
+                               return -EPERM;
                         break;
                 default:
                         return -EINVAL;
diff --git a/fs/isofs/compress.c b/fs/isofs/compress.c

index 34a44e451689afd59aa30449b56f938bd0109f60..4917315db732e881cecbd50e89573333205d9959 100644 (file)
--- a/fs/isofs/compress.c
+++ b/fs/isofs/compress.c
@@ -129,8 +129,14 @@ static int zisofs_readpage(struct file *file, struct page *page)
         cend = le32_to_cpu(*(__le32 *)(bh->b_data + (blockendptr & bufmask)));
         brelse(bh);
  
+       if (cstart > cend)
+               goto eio;
+               
         csize = cend-cstart;
  
+       if (csize > deflateBound(1UL << zisofs_block_shift))
+               goto eio;
+
         /* Now page[] contains an array of pages, any of which can be NULL,
            and the locks on which we hold.  We should now read the data and
            release the pages.  If the pages are NULL the decompressed data
diff --git a/fs/jffs2/symlink.c b/fs/jffs2/symlink.c

index 65ab6b001dcac9d24d9d40aa8bc8527980a0587b..82ef484f5e12337ba1cc190e30275e0ebe7a9e16 100644 (file)
--- a/fs/jffs2/symlink.c
+++ b/fs/jffs2/symlink.c
@@ -18,7 +18,7 @@
  #include <linux/namei.h>
  #include "nodelist.h"
  
-static int jffs2_follow_link(struct dentry *dentry, struct nameidata *nd);
+static void *jffs2_follow_link(struct dentry *dentry, struct nameidata *nd);
  
  struct inode_operations jffs2_symlink_inode_operations =
  {      
@@ -27,9 +27,10 @@ struct inode_operations jffs2_symlink_inode_operations =
         .setattr =      jffs2_setattr
  };
  
-static int jffs2_follow_link(struct dentry *dentry, struct nameidata *nd)
+static void *jffs2_follow_link(struct dentry *dentry, struct nameidata *nd)
  {
         struct jffs2_inode_info *f = JFFS2_INODE_INFO(dentry->d_inode);
+       char *p = (char *)f->dents;
         
         /*
          * We don't acquire the f->sem mutex here since the only data we
@@ -45,19 +46,20 @@ static int jffs2_follow_link(struct dentry *dentry, struct nameidata *nd)
          * nd_set_link() call.
          */
         
-       if (!f->dents) {
+       if (!p) {
                 printk(KERN_ERR "jffs2_follow_link(): can't find symlink taerget\n");
-               return -EIO;
+               p = ERR_PTR(-EIO);
+       } else {
+               D1(printk(KERN_DEBUG "jffs2_follow_link(): target path is '%s'\n", (char *) f->dents));
         }
-       D1(printk(KERN_DEBUG "jffs2_follow_link(): target path is '%s'\n", (char *) f->dents));
  
-       nd_set_link(nd, (char *)f->dents);
+       nd_set_link(nd, p);
         
         /*
          * We unlock the f->sem mutex but VFS will use the f->dents string. This is safe
          * since the only way that may cause f->dents to be changed is iput() operation.
          * But VFS will not use f->dents after iput() has been called.
          */
-       return 0;
+       return NULL;
  }
  
diff --git a/fs/jfs/inode.c b/fs/jfs/inode.c

index 2137138c59b0a4c08839121f0b89c5a55699a9b0..767c7ecb429ed6f044e1954873c73c1d719eaab8 100644 (file)
--- a/fs/jfs/inode.c
+++ b/fs/jfs/inode.c
@@ -128,6 +128,10 @@ void jfs_delete_inode(struct inode *inode)
  {
         jfs_info("In jfs_delete_inode, inode = 0x%p", inode);
  
+       if (is_bad_inode(inode) ||
+           (JFS_IP(inode)->fileset != cpu_to_le32(FILESYSTEM_I)))
+                       return;
+
         if (test_cflag(COMMIT_Freewmap, inode))
                 jfs_free_zero_link(inode);
  
diff --git a/fs/jfs/jfs_logmgr.c b/fs/jfs/jfs_logmgr.c

index 22815e88e7cc0caa85ccdf4220d2777b1929b363..d27bac6acaa346118b39bb02517987130b5cdcfb 100644 (file)
--- a/fs/jfs/jfs_logmgr.c
+++ b/fs/jfs/jfs_logmgr.c
@@ -191,7 +191,7 @@ static int lbmIOWait(struct lbuf * bp, int flag);
  static bio_end_io_t lbmIODone;
  static void lbmStartIO(struct lbuf * bp);
  static void lmGCwrite(struct jfs_log * log, int cant_block);
-static int lmLogSync(struct jfs_log * log, int nosyncwait);
+static int lmLogSync(struct jfs_log * log, int hard_sync);
  
  
  
@@ -915,19 +915,17 @@ static void lmPostGC(struct lbuf * bp)
   *     if new sync address is available
   *     (normally the case if sync() is executed by back-ground
   *     process).
- *     if not, explicitly run jfs_blogsync() to initiate
- *     getting of new sync address.
   *     calculate new value of i_nextsync which determines when
   *     this code is called again.
   *
   * PARAMETERS: log     - log structure
- *             nosyncwait - 1 if called asynchronously
+ *             hard_sync - 1 to force all metadata to be written
   *
   * RETURN:     0
   *                     
   * serialization: LOG_LOCK() held on entry/exit
   */
-static int lmLogSync(struct jfs_log * log, int nosyncwait)
+static int lmLogSync(struct jfs_log * log, int hard_sync)
  {
         int logsize;
         int written;            /* written since last syncpt */
@@ -941,11 +939,18 @@ static int lmLogSync(struct jfs_log * log, int nosyncwait)
         unsigned long flags;
  
         /* push dirty metapages out to disk */
-       list_for_each_entry(sbi, &log->sb_list, log_list) {
-               filemap_flush(sbi->ipbmap->i_mapping);
-               filemap_flush(sbi->ipimap->i_mapping);
-               filemap_flush(sbi->direct_inode->i_mapping);
-       }
+       if (hard_sync)
+               list_for_each_entry(sbi, &log->sb_list, log_list) {
+                       filemap_fdatawrite(sbi->ipbmap->i_mapping);
+                       filemap_fdatawrite(sbi->ipimap->i_mapping);
+                       filemap_fdatawrite(sbi->direct_inode->i_mapping);
+               }
+       else
+               list_for_each_entry(sbi, &log->sb_list, log_list) {
+                       filemap_flush(sbi->ipbmap->i_mapping);
+                       filemap_flush(sbi->ipimap->i_mapping);
+                       filemap_flush(sbi->direct_inode->i_mapping);
+               }
  
         /*
          *      forward syncpt
@@ -1021,10 +1026,6 @@ static int lmLogSync(struct jfs_log * log, int nosyncwait)
                 /* next syncpt trigger = written + more */
                 log->nextsync = written + more;
  
-       /* return if lmLogSync() from outside of transaction, e.g., sync() */
-       if (nosyncwait)
-               return lsn;
-
         /* if number of bytes written from last sync point is more
          * than 1/4 of the log size, stop new transactions from
          * starting until all current transactions are completed
@@ -1049,11 +1050,12 @@ static int lmLogSync(struct jfs_log * log, int nosyncwait)
   *
   * FUNCTION:   write log SYNCPT record for specified log
   *
- * PARAMETERS: log     - log structure
+ * PARAMETERS: log       - log structure
+ *             hard_sync - set to 1 to force metadata to be written
   */
-void jfs_syncpt(struct jfs_log *log)
+void jfs_syncpt(struct jfs_log *log, int hard_sync)
  {      LOG_LOCK(log);
-       lmLogSync(log, 1);
+       lmLogSync(log, hard_sync);
         LOG_UNLOCK(log);
  }
  
diff --git a/fs/jfs/jfs_logmgr.h b/fs/jfs/jfs_logmgr.h

index 747114cd38b878c3e57f90070fa0207e36207dd5..e4978b5b65ee05000e877209b733dfb1c1355a82 100644 (file)
--- a/fs/jfs/jfs_logmgr.h
+++ b/fs/jfs/jfs_logmgr.h
@@ -510,6 +510,6 @@ extern int lmLogFormat(struct jfs_log *log, s64 logAddress, int logSize);
  extern int lmGroupCommit(struct jfs_log *, struct tblock *);
  extern int jfsIOWait(void *);
  extern void jfs_flush_journal(struct jfs_log * log, int wait);
-extern void jfs_syncpt(struct jfs_log *log);
+extern void jfs_syncpt(struct jfs_log *log, int hard_sync);
  
  #endif                         /* _H_JFS_LOGMGR */
diff --git a/fs/jfs/jfs_txnmgr.c b/fs/jfs/jfs_txnmgr.c

index 121c981ff45363bef9af40c859132ba3e03007f0..c7a92f9deb2b93c269d2b945418030f37f53ba51 100644 (file)
--- a/fs/jfs/jfs_txnmgr.c
+++ b/fs/jfs/jfs_txnmgr.c
@@ -552,6 +552,11 @@ void txEnd(tid_t tid)
                  * synchronize with logsync barrier
                  */
                 if (test_bit(log_SYNCBARRIER, &log->flag)) {
+                       TXN_UNLOCK();
+
+                       /* write dirty metadata & forward log syncpt */
+                       jfs_syncpt(log, 1);
+
                         jfs_info("log barrier off: 0x%x", log->lsn);
  
                         /* enable new transactions start */
@@ -560,11 +565,6 @@ void txEnd(tid_t tid)
                         /* wakeup all waitors for logsync barrier */
                         TXN_WAKEUP(&log->syncwait);
  
-                       TXN_UNLOCK();
-
-                       /* forward log syncpt */
-                       jfs_syncpt(log);
-
                         goto wakeup;
                 }
         }
@@ -657,7 +657,9 @@ struct tlock *txLock(tid_t tid, struct inode *ip, struct metapage * mp,
                                 /* only anonymous txn.
                                  * Remove from anon_list
                                  */
+                               TXN_LOCK();
                                 list_del_init(&jfs_ip->anon_inode_list);
+                               TXN_UNLOCK();
                         }
                         jfs_ip->atlhead = tlck->next;
                 } else {
diff --git a/fs/jfs/super.c b/fs/jfs/super.c

index ee32211288cefadc7b86c7d33129f6d279950ee3..9ff89720f93bab8bfeaee75ac656e04f6a0de3ee 100644 (file)
--- a/fs/jfs/super.c
+++ b/fs/jfs/super.c
@@ -114,6 +114,8 @@ static void jfs_destroy_inode(struct inode *inode)
  {
         struct jfs_inode_info *ji = JFS_IP(inode);
  
+       BUG_ON(!list_empty(&ji->anon_inode_list));
+
         spin_lock_irq(&ji->ag_lock);
         if (ji->active_ag != -1) {
                 struct bmap *bmap = JFS_SBI(inode->i_sb)->bmap;
@@ -531,7 +533,7 @@ static int jfs_sync_fs(struct super_block *sb, int wait)
         /* log == NULL indicates read-only mount */
         if (log) {
                 jfs_flush_journal(log, wait);
-               jfs_syncpt(log);
+               jfs_syncpt(log, 0);
         }
  
         return 0;
diff --git a/fs/jfs/symlink.c b/fs/jfs/symlink.c

index 287d8d6c3cfd6fd8fd147b1c67ffa4c946ab2e69..16477b3835e1d62235ff939bcab6f32b6be454bb 100644 (file)
--- a/fs/jfs/symlink.c
+++ b/fs/jfs/symlink.c
@@ -22,11 +22,11 @@
  #include "jfs_inode.h"
  #include "jfs_xattr.h"
  
-static int jfs_follow_link(struct dentry *dentry, struct nameidata *nd)
+static void *jfs_follow_link(struct dentry *dentry, struct nameidata *nd)
  {
         char *s = JFS_IP(dentry->d_inode)->i_inline;
         nd_set_link(nd, s);
-       return 0;
+       return NULL;
  }
  
  struct inode_operations jfs_symlink_inode_operations = {
diff --git a/fs/namei.c b/fs/namei.c

index 02a824cd3c5c01b7af923ad81e96b940c2cd1a08..6ec1f0fefc5b017aa321738f7d3581370a91fad3 100644 (file)
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -501,6 +501,7 @@ struct path {
  static inline int __do_follow_link(struct path *path, struct nameidata *nd)
  {
         int error;
+       void *cookie;
         struct dentry *dentry = path->dentry;
  
         touch_atime(path->mnt, dentry);
@@ -508,13 +509,15 @@ static inline int __do_follow_link(struct path *path, struct nameidata *nd)
  
         if (path->mnt == nd->mnt)
                 mntget(path->mnt);
-       error = dentry->d_inode->i_op->follow_link(dentry, nd);
-       if (!error) {
+       cookie = dentry->d_inode->i_op->follow_link(dentry, nd);
+       error = PTR_ERR(cookie);
+       if (!IS_ERR(cookie)) {
                 char *s = nd_get_link(nd);
+               error = 0;
                 if (s)
                         error = __vfs_follow_link(nd, s);
                 if (dentry->d_inode->i_op->put_link)
-                       dentry->d_inode->i_op->put_link(dentry, nd);
+                       dentry->d_inode->i_op->put_link(dentry, nd, cookie);
         }
         dput(dentry);
         mntput(path->mnt);
@@ -1801,7 +1804,6 @@ int vfs_rmdir(struct inode *dir, struct dentry *dentry)
         }
         up(&dentry->d_inode->i_sem);
         if (!error) {
-               fsnotify_rmdir(dentry, dentry->d_inode, dir);
                 d_delete(dentry);
         }
         dput(dentry);
@@ -1874,7 +1876,6 @@ int vfs_unlink(struct inode *dir, struct dentry *dentry)
  
         /* We don't d_delete() NFS sillyrenamed files--they still exist. */
         if (!error && !(dentry->d_flags & DCACHE_NFSFS_RENAMED)) {
-               fsnotify_unlink(dentry, dir);
                 d_delete(dentry);
         }
  
@@ -2218,7 +2219,8 @@ int vfs_rename(struct inode *old_dir, struct dentry *old_dentry,
                 error = vfs_rename_other(old_dir,old_dentry,new_dir,new_dentry);
         if (!error) {
                 const char *new_name = old_dentry->d_name.name;
-               fsnotify_move(old_dir, new_dir, old_name, new_name, is_dir);
+               fsnotify_move(old_dir, new_dir, old_name, new_name, is_dir,
+                             new_dentry->d_inode, old_dentry->d_inode);
         }
         fsnotify_oldname_free(old_name);
  
@@ -2345,15 +2347,17 @@ out:
  int generic_readlink(struct dentry *dentry, char __user *buffer, int buflen)
  {
         struct nameidata nd;
-       int res;
+       void *cookie;
+
         nd.depth = 0;
-       res = dentry->d_inode->i_op->follow_link(dentry, &nd);
-       if (!res) {
-               res = vfs_readlink(dentry, buffer, buflen, nd_get_link(&nd));
+       cookie = dentry->d_inode->i_op->follow_link(dentry, &nd);
+       if (!IS_ERR(cookie)) {
+               int res = vfs_readlink(dentry, buffer, buflen, nd_get_link(&nd));
                 if (dentry->d_inode->i_op->put_link)
-                       dentry->d_inode->i_op->put_link(dentry, &nd);
+                       dentry->d_inode->i_op->put_link(dentry, &nd, cookie);
+               cookie = ERR_PTR(res);
         }
-       return res;
+       return PTR_ERR(cookie);
  }
  
  int vfs_follow_link(struct nameidata *nd, const char *link)
@@ -2396,23 +2400,20 @@ int page_readlink(struct dentry *dentry, char __user *buffer, int buflen)
         return res;
  }
  
-int page_follow_link_light(struct dentry *dentry, struct nameidata *nd)
+void *page_follow_link_light(struct dentry *dentry, struct nameidata *nd)
  {
-       struct page *page;
+       struct page *page = NULL;
         nd_set_link(nd, page_getlink(dentry, &page));
-       return 0;
+       return page;
  }
  
-void page_put_link(struct dentry *dentry, struct nameidata *nd)
+void page_put_link(struct dentry *dentry, struct nameidata *nd, void *cookie)
  {
-       if (!IS_ERR(nd_get_link(nd))) {
-               struct page *page;
-               page = find_get_page(dentry->d_inode->i_mapping, 0);
-               if (!page)
-                       BUG();
+       struct page *page = cookie;
+
+       if (page) {
                 kunmap(page);
                 page_cache_release(page);
-               page_cache_release(page);
         }
  }
  
diff --git a/fs/namespace.c b/fs/namespace.c

index 587eb0d707ee8d7fd4119aee67c5e4791f4d9d9e..79bd8a46e1e7298859a85f9ed9efd20e1dfdb19c 100644 (file)
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -160,7 +160,7 @@ clone_mnt(struct vfsmount *old, struct dentry *root)
                 mnt->mnt_root = dget(root);
                 mnt->mnt_mountpoint = mnt->mnt_root;
                 mnt->mnt_parent = mnt;
-               mnt->mnt_namespace = old->mnt_namespace;
+               mnt->mnt_namespace = current->namespace;
  
                 /* stick the duplicate mount on the same expiry list
                  * as the original if that was on one */
diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c

index b38a57e78a63d8fd91866b2649e58c410d2a50ec..2df639f143e8065cdd647044a4283e28fe89d092 100644 (file)
--- a/fs/nfs/dir.c
+++ b/fs/nfs/dir.c
@@ -182,14 +182,16 @@ int nfs_readdir_filler(nfs_readdir_descriptor_t *desc, struct page *page)
                 /* We requested READDIRPLUS, but the server doesn't grok it */
                 if (error == -ENOTSUPP && desc->plus) {
                         NFS_SERVER(inode)->caps &= ~NFS_CAP_READDIRPLUS;
-                       NFS_FLAGS(inode) &= ~NFS_INO_ADVISE_RDPLUS;
+                       clear_bit(NFS_INO_ADVISE_RDPLUS, &NFS_FLAGS(inode));
                         desc->plus = 0;
                         goto again;
                 }
                 goto error;
         }
         SetPageUptodate(page);
-       NFS_FLAGS(inode) |= NFS_INO_INVALID_ATIME;
+       spin_lock(&inode->i_lock);
+       NFS_I(inode)->cache_validity |= NFS_INO_INVALID_ATIME;
+       spin_unlock(&inode->i_lock);
         /* Ensure consistent page alignment of the data.
          * Note: assumes we have exclusive access to this mapping either
          *       through inode->i_sem or some other mechanism.
@@ -462,7 +464,9 @@ int uncached_readdir(nfs_readdir_descriptor_t *desc, void *dirent,
                                                 page,
                                                 NFS_SERVER(inode)->dtsize,
                                                 desc->plus);
-       NFS_FLAGS(inode) |= NFS_INO_INVALID_ATIME;
+       spin_lock(&inode->i_lock);
+       NFS_I(inode)->cache_validity |= NFS_INO_INVALID_ATIME;
+       spin_unlock(&inode->i_lock);
         desc->page = page;
         desc->ptr = kmap(page);         /* matching kunmap in nfs_do_filldir */
         if (desc->error >= 0) {
@@ -545,7 +549,7 @@ static int nfs_readdir(struct file *filp, void *dirent, filldir_t filldir)
                         break;
                 }
                 if (res == -ETOOSMALL && desc->plus) {
-                       NFS_FLAGS(inode) &= ~NFS_INO_ADVISE_RDPLUS;
+                       clear_bit(NFS_INO_ADVISE_RDPLUS, &NFS_FLAGS(inode));
                         nfs_zap_caches(inode);
                         desc->plus = 0;
                         desc->entry->eof = 0;
@@ -608,7 +612,7 @@ static inline int nfs_check_verifier(struct inode *dir, struct dentry *dentry)
  {
         if (IS_ROOT(dentry))
                 return 1;
-       if ((NFS_FLAGS(dir) & NFS_INO_INVALID_ATTR) != 0
+       if ((NFS_I(dir)->cache_validity & NFS_INO_INVALID_ATTR) != 0
                         || nfs_attribute_timeout(dir))
                 return 0;
         return nfs_verify_change_attribute(dir, (unsigned long)dentry->d_fsdata);
@@ -935,6 +939,7 @@ static struct dentry *nfs_atomic_lookup(struct inode *dir, struct dentry *dentry
         error = nfs_revalidate_inode(NFS_SERVER(dir), dir);
         if (error < 0) {
                 res = ERR_PTR(error);
+               unlock_kernel();
                 goto out;
         }
  
@@ -1575,11 +1580,12 @@ out:
  
  int nfs_access_get_cached(struct inode *inode, struct rpc_cred *cred, struct nfs_access_entry *res)
  {
-       struct nfs_access_entry *cache = &NFS_I(inode)->cache_access;
+       struct nfs_inode *nfsi = NFS_I(inode);
+       struct nfs_access_entry *cache = &nfsi->cache_access;
  
         if (cache->cred != cred
                         || time_after(jiffies, cache->jiffies + NFS_ATTRTIMEO(inode))
-                       || (NFS_FLAGS(inode) & NFS_INO_INVALID_ACCESS))
+                       || (nfsi->cache_validity & NFS_INO_INVALID_ACCESS))
                 return -ENOENT;
         memcpy(res, cache, sizeof(*res));
         return 0;
@@ -1587,14 +1593,18 @@ int nfs_access_get_cached(struct inode *inode, struct rpc_cred *cred, struct nfs
  
  void nfs_access_add_cache(struct inode *inode, struct nfs_access_entry *set)
  {
-       struct nfs_access_entry *cache = &NFS_I(inode)->cache_access;
+       struct nfs_inode *nfsi = NFS_I(inode);
+       struct nfs_access_entry *cache = &nfsi->cache_access;
  
         if (cache->cred != set->cred) {
                 if (cache->cred)
                         put_rpccred(cache->cred);
                 cache->cred = get_rpccred(set->cred);
         }
-       NFS_FLAGS(inode) &= ~NFS_INO_INVALID_ACCESS;
+       /* FIXME: replace current access_cache BKL reliance with inode->i_lock */
+       spin_lock(&inode->i_lock);
+       nfsi->cache_validity &= ~NFS_INO_INVALID_ACCESS;
+       spin_unlock(&inode->i_lock);
         cache->jiffies = set->jiffies;
         cache->mask = set->mask;
  }
diff --git a/fs/nfs/file.c b/fs/nfs/file.c

index 5621ba9885f48aeec57595b4003764d683b9cd04..f6b9eda925c526d18c2a1606c1e12568380a557f 100644 (file)
--- a/fs/nfs/file.c
+++ b/fs/nfs/file.c
@@ -134,9 +134,10 @@ nfs_file_release(struct inode *inode, struct file *filp)
   */
  static int nfs_revalidate_file(struct inode *inode, struct file *filp)
  {
+       struct nfs_inode *nfsi = NFS_I(inode);
         int retval = 0;
  
-       if ((NFS_FLAGS(inode) & NFS_INO_REVAL_PAGECACHE) || nfs_attribute_timeout(inode))
+       if ((nfsi->cache_validity & NFS_INO_REVAL_PAGECACHE) || nfs_attribute_timeout(inode))
                 retval = __nfs_revalidate_inode(NFS_SERVER(inode), inode);
         nfs_revalidate_mapping(inode, filp->f_mapping);
         return 0;
@@ -164,7 +165,7 @@ static int nfs_revalidate_file_size(struct inode *inode, struct file *filp)
                 goto force_reval;
         if (nfsi->npages != 0)
                 return 0;
-       if (!(NFS_FLAGS(inode) & NFS_INO_REVAL_PAGECACHE) && !nfs_attribute_timeout(inode))
+       if (!(nfsi->cache_validity & NFS_INO_REVAL_PAGECACHE) && !nfs_attribute_timeout(inode))
                 return 0;
  force_reval:
         return __nfs_revalidate_inode(server, inode);
diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c

index 4845911f1c63f3e3436e5591be1b8c08b006a181..541b418327c8b18b8f30d294beca9c10d3ca732e 100644 (file)
--- a/fs/nfs/inode.c
+++ b/fs/nfs/inode.c
@@ -615,14 +615,18 @@ nfs_zap_caches(struct inode *inode)
         struct nfs_inode *nfsi = NFS_I(inode);
         int mode = inode->i_mode;
  
+       spin_lock(&inode->i_lock);
+
         NFS_ATTRTIMEO(inode) = NFS_MINATTRTIMEO(inode);
         NFS_ATTRTIMEO_UPDATE(inode) = jiffies;
  
         memset(NFS_COOKIEVERF(inode), 0, sizeof(NFS_COOKIEVERF(inode)));
         if (S_ISREG(mode) || S_ISDIR(mode) || S_ISLNK(mode))
-               nfsi->flags |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_DATA|NFS_INO_INVALID_ACCESS|NFS_INO_INVALID_ACL|NFS_INO_REVAL_PAGECACHE;
+               nfsi->cache_validity |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_DATA|NFS_INO_INVALID_ACCESS|NFS_INO_INVALID_ACL|NFS_INO_REVAL_PAGECACHE;
         else
-               nfsi->flags |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_ACCESS|NFS_INO_INVALID_ACL|NFS_INO_REVAL_PAGECACHE;
+               nfsi->cache_validity |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_ACCESS|NFS_INO_INVALID_ACL|NFS_INO_REVAL_PAGECACHE;
+
+       spin_unlock(&inode->i_lock);
  }
  
  static void nfs_zap_acl_cache(struct inode *inode)
@@ -632,7 +636,9 @@ static void nfs_zap_acl_cache(struct inode *inode)
         clear_acl_cache = NFS_PROTO(inode)->clear_acl_cache;
         if (clear_acl_cache != NULL)
                 clear_acl_cache(inode);
-       NFS_I(inode)->flags &= ~NFS_INO_INVALID_ACL;
+       spin_lock(&inode->i_lock);
+       NFS_I(inode)->cache_validity &= ~NFS_INO_INVALID_ACL;
+       spin_unlock(&inode->i_lock);
  }
  
  /*
@@ -739,7 +745,7 @@ nfs_fhget(struct super_block *sb, struct nfs_fh *fh, struct nfs_fattr *fattr)
                         inode->i_fop = &nfs_dir_operations;
                         if (nfs_server_capable(inode, NFS_CAP_READDIRPLUS)
                             && fattr->size <= NFS_LIMIT_READDIRPLUS)
-                               NFS_FLAGS(inode) |= NFS_INO_ADVISE_RDPLUS;
+                               set_bit(NFS_INO_ADVISE_RDPLUS, &NFS_FLAGS(inode));
                 } else if (S_ISLNK(inode->i_mode))
                         inode->i_op = &nfs_symlink_inode_operations;
                 else
@@ -814,55 +820,84 @@ nfs_setattr(struct dentry *dentry, struct iattr *attr)
                 nfs_wb_all(inode);
         }
         error = NFS_PROTO(inode)->setattr(dentry, &fattr, attr);
-       if (error == 0) {
+       if (error == 0)
                 nfs_refresh_inode(inode, &fattr);
+       nfs_end_data_update(inode);
+       unlock_kernel();
+       return error;
+}
+
+/**
+ * nfs_setattr_update_inode - Update inode metadata after a setattr call.
+ * @inode: pointer to struct inode
+ * @attr: pointer to struct iattr
+ *
+ * Note: we do this in the *proc.c in order to ensure that
+ *       it works for things like exclusive creates too.
+ */
+void nfs_setattr_update_inode(struct inode *inode, struct iattr *attr)
+{
+       if ((attr->ia_valid & (ATTR_MODE|ATTR_UID|ATTR_GID)) != 0) {
                 if ((attr->ia_valid & ATTR_MODE) != 0) {
-                       int mode;
-                       mode = inode->i_mode & ~S_IALLUGO;
-                       mode |= attr->ia_mode & S_IALLUGO;
+                       int mode = attr->ia_mode & S_IALLUGO;
+                       mode |= inode->i_mode & ~S_IALLUGO;
                         inode->i_mode = mode;
                 }
                 if ((attr->ia_valid & ATTR_UID) != 0)
                         inode->i_uid = attr->ia_uid;
                 if ((attr->ia_valid & ATTR_GID) != 0)
                         inode->i_gid = attr->ia_gid;
-               if ((attr->ia_valid & ATTR_SIZE) != 0) {
-                       inode->i_size = attr->ia_size;
-                       vmtruncate(inode, attr->ia_size);
-               }
+               spin_lock(&inode->i_lock);
+               NFS_I(inode)->cache_validity |= NFS_INO_INVALID_ACCESS|NFS_INO_INVALID_ACL;
+               spin_unlock(&inode->i_lock);
         }
-       if ((attr->ia_valid & (ATTR_MODE|ATTR_UID|ATTR_GID)) != 0)
-               NFS_FLAGS(inode) |= NFS_INO_INVALID_ACCESS|NFS_INO_INVALID_ACL;
-       nfs_end_data_update(inode);
-       unlock_kernel();
-       return error;
+       if ((attr->ia_valid & ATTR_SIZE) != 0) {
+               inode->i_size = attr->ia_size;
+               vmtruncate(inode, attr->ia_size);
+       }
+}
+
+static int nfs_wait_schedule(void *word)
+{
+       if (signal_pending(current))
+               return -ERESTARTSYS;
+       schedule();
+       return 0;
  }
  
  /*
   * Wait for the inode to get unlocked.
- * (Used for NFS_INO_LOCKED and NFS_INO_REVALIDATING).
   */
-static int
-nfs_wait_on_inode(struct inode *inode, int flag)
+static int nfs_wait_on_inode(struct inode *inode)
  {
         struct rpc_clnt *clnt = NFS_CLIENT(inode);
         struct nfs_inode *nfsi = NFS_I(inode);
-
+       sigset_t oldmask;
         int error;
-       if (!(NFS_FLAGS(inode) & flag))
-               return 0;
+
         atomic_inc(&inode->i_count);
-       error = nfs_wait_event(clnt, nfsi->nfs_i_wait,
-                               !(NFS_FLAGS(inode) & flag));
+       rpc_clnt_sigmask(clnt, &oldmask);
+       error = wait_on_bit_lock(&nfsi->flags, NFS_INO_REVALIDATING,
+                                       nfs_wait_schedule, TASK_INTERRUPTIBLE);
+       rpc_clnt_sigunmask(clnt, &oldmask);
         iput(inode);
+
         return error;
  }
  
+static void nfs_wake_up_inode(struct inode *inode)
+{
+       struct nfs_inode *nfsi = NFS_I(inode);
+
+       clear_bit(NFS_INO_REVALIDATING, &nfsi->flags);
+       smp_mb__after_clear_bit();
+       wake_up_bit(&nfsi->flags, NFS_INO_REVALIDATING);
+}
+
  int nfs_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat)
  {
         struct inode *inode = dentry->d_inode;
-       struct nfs_inode *nfsi = NFS_I(inode);
-       int need_atime = nfsi->flags & NFS_INO_INVALID_ATIME;
+       int need_atime = NFS_I(inode)->cache_validity & NFS_INO_INVALID_ATIME;
         int err;
  
         if (__IS_FLG(inode, MS_NOATIME))
@@ -1008,7 +1043,7 @@ __nfs_revalidate_inode(struct nfs_server *server, struct inode *inode)
         struct nfs_fattr fattr;
         struct nfs_inode *nfsi = NFS_I(inode);
         unsigned long verifier;
-       unsigned int flags;
+       unsigned long cache_validity;
  
         dfprintk(PAGECACHE, "NFS: revalidating (%s/%Ld)\n",
                 inode->i_sb->s_id, (long long)NFS_FILEID(inode));
@@ -1019,18 +1054,19 @@ __nfs_revalidate_inode(struct nfs_server *server, struct inode *inode)
         if (NFS_STALE(inode))
                 goto out_nowait;
  
-       while (NFS_REVALIDATING(inode)) {
-               status = nfs_wait_on_inode(inode, NFS_INO_REVALIDATING);
-               if (status < 0)
-                       goto out_nowait;
-               if (NFS_ATTRTIMEO(inode) == 0)
-                       continue;
-               if (NFS_FLAGS(inode) & (NFS_INO_INVALID_ATTR|NFS_INO_INVALID_DATA|NFS_INO_INVALID_ATIME))
-                       continue;
-               status = NFS_STALE(inode) ? -ESTALE : 0;
-               goto out_nowait;
+       status = nfs_wait_on_inode(inode);
+       if (status < 0)
+               goto out;
+       if (NFS_STALE(inode)) {
+               status = -ESTALE;
+               /* Do we trust the cached ESTALE? */
+               if (NFS_ATTRTIMEO(inode) != 0) {
+                       if (nfsi->cache_validity & (NFS_INO_INVALID_ATTR|NFS_INO_INVALID_DATA|NFS_INO_INVALID_ATIME)) {
+                               /* no */
+                       } else
+                               goto out;
+               }
         }
-       NFS_FLAGS(inode) |= NFS_INO_REVALIDATING;
  
         /* Protect against RPC races by saving the change attribute */
         verifier = nfs_save_change_attribute(inode);
@@ -1042,7 +1078,7 @@ __nfs_revalidate_inode(struct nfs_server *server, struct inode *inode)
                 if (status == -ESTALE) {
                         nfs_zap_caches(inode);
                         if (!S_ISDIR(inode->i_mode))
-                               NFS_FLAGS(inode) |= NFS_INO_STALE;
+                               set_bit(NFS_INO_STALE, &NFS_FLAGS(inode));
                 }
                 goto out;
         }
@@ -1054,25 +1090,30 @@ __nfs_revalidate_inode(struct nfs_server *server, struct inode *inode)
                          (long long)NFS_FILEID(inode), status);
                 goto out;
         }
-       flags = nfsi->flags;
-       nfsi->flags &= ~NFS_INO_REVAL_PAGECACHE;
+       spin_lock(&inode->i_lock);
+       cache_validity = nfsi->cache_validity;
+       nfsi->cache_validity &= ~NFS_INO_REVAL_PAGECACHE;
+
         /*
          * We may need to keep the attributes marked as invalid if
          * we raced with nfs_end_attr_update().
          */
         if (verifier == nfsi->cache_change_attribute)
-               nfsi->flags &= ~(NFS_INO_INVALID_ATTR|NFS_INO_INVALID_ATIME);
-       /* Do the page cache invalidation */
+               nfsi->cache_validity &= ~(NFS_INO_INVALID_ATTR|NFS_INO_INVALID_ATIME);
+       spin_unlock(&inode->i_lock);
+
         nfs_revalidate_mapping(inode, inode->i_mapping);
-       if (flags & NFS_INO_INVALID_ACL)
+
+       if (cache_validity & NFS_INO_INVALID_ACL)
                 nfs_zap_acl_cache(inode);
+
         dfprintk(PAGECACHE, "NFS: (%s/%Ld) revalidation complete\n",
                 inode->i_sb->s_id,
                 (long long)NFS_FILEID(inode));
  
-out:
-       NFS_FLAGS(inode) &= ~NFS_INO_REVALIDATING;
-       wake_up(&nfsi->nfs_i_wait);
+ out:
+       nfs_wake_up_inode(inode);
+
   out_nowait:
         unlock_kernel();
         return status;
@@ -1096,7 +1137,7 @@ int nfs_attribute_timeout(struct inode *inode)
   */
  int nfs_revalidate_inode(struct nfs_server *server, struct inode *inode)
  {
-       if (!(NFS_FLAGS(inode) & (NFS_INO_INVALID_ATTR|NFS_INO_INVALID_DATA))
+       if (!(NFS_I(inode)->cache_validity & (NFS_INO_INVALID_ATTR|NFS_INO_INVALID_DATA))
                         && !nfs_attribute_timeout(inode))
                 return NFS_STALE(inode) ? -ESTALE : 0;
         return __nfs_revalidate_inode(server, inode);
@@ -1111,19 +1152,23 @@ void nfs_revalidate_mapping(struct inode *inode, struct address_space *mapping)
  {
         struct nfs_inode *nfsi = NFS_I(inode);
  
-       if (nfsi->flags & NFS_INO_INVALID_DATA) {
+       if (nfsi->cache_validity & NFS_INO_INVALID_DATA) {
                 if (S_ISREG(inode->i_mode)) {
                         if (filemap_fdatawrite(mapping) == 0)
                                 filemap_fdatawait(mapping);
                         nfs_wb_all(inode);
                 }
                 invalidate_inode_pages2(mapping);
-               nfsi->flags &= ~NFS_INO_INVALID_DATA;
+
+               spin_lock(&inode->i_lock);
+               nfsi->cache_validity &= ~NFS_INO_INVALID_DATA;
                 if (S_ISDIR(inode->i_mode)) {
                         memset(nfsi->cookieverf, 0, sizeof(nfsi->cookieverf));
                         /* This ensures we revalidate child dentries */
                         nfsi->cache_change_attribute++;
                 }
+               spin_unlock(&inode->i_lock);
+
                 dfprintk(PAGECACHE, "NFS: (%s/%Ld) data cache invalidated\n",
                                 inode->i_sb->s_id,
                                 (long long)NFS_FILEID(inode));
@@ -1153,10 +1198,12 @@ void nfs_end_data_update(struct inode *inode)
  
         if (!nfs_have_delegation(inode, FMODE_READ)) {
                 /* Mark the attribute cache for revalidation */
-               nfsi->flags |= NFS_INO_INVALID_ATTR;
+               spin_lock(&inode->i_lock);
+               nfsi->cache_validity |= NFS_INO_INVALID_ATTR;
                 /* Directories and symlinks: invalidate page cache too */
                 if (S_ISDIR(inode->i_mode) || S_ISLNK(inode->i_mode))
-                       nfsi->flags |= NFS_INO_INVALID_DATA;
+                       nfsi->cache_validity |= NFS_INO_INVALID_DATA;
+               spin_unlock(&inode->i_lock);
         }
         nfsi->cache_change_attribute ++;
         atomic_dec(&nfsi->data_updates);
@@ -1181,6 +1228,8 @@ int nfs_refresh_inode(struct inode *inode, struct nfs_fattr *fattr)
         if (nfs_have_delegation(inode, FMODE_READ))
                 return 0;
  
+       spin_lock(&inode->i_lock);
+
         /* Are we in the process of updating data on the server? */
         data_unstable = nfs_caches_unstable(inode);
  
@@ -1189,19 +1238,23 @@ int nfs_refresh_inode(struct inode *inode, struct nfs_fattr *fattr)
                                 && nfsi->change_attr == fattr->pre_change_attr)
                         nfsi->change_attr = fattr->change_attr;
                 if (nfsi->change_attr != fattr->change_attr) {
-                       nfsi->flags |= NFS_INO_INVALID_ATTR;
+                       nfsi->cache_validity |= NFS_INO_INVALID_ATTR;
                         if (!data_unstable)
-                               nfsi->flags |= NFS_INO_REVAL_PAGECACHE;
+                               nfsi->cache_validity |= NFS_INO_REVAL_PAGECACHE;
                 }
         }
  
-       if ((fattr->valid & NFS_ATTR_FATTR) == 0)
+       if ((fattr->valid & NFS_ATTR_FATTR) == 0) {
+               spin_unlock(&inode->i_lock);
                 return 0;
+       }
  
         /* Has the inode gone and changed behind our back? */
         if (nfsi->fileid != fattr->fileid
-                       || (inode->i_mode & S_IFMT) != (fattr->mode & S_IFMT))
+                       || (inode->i_mode & S_IFMT) != (fattr->mode & S_IFMT)) {
+               spin_unlock(&inode->i_lock);
                 return -EIO;
+       }
  
         cur_size = i_size_read(inode);
         new_isize = nfs_size_to_loff_t(fattr->size);
@@ -1216,30 +1269,31 @@ int nfs_refresh_inode(struct inode *inode, struct nfs_fattr *fattr)
  
         /* Verify a few of the more important attributes */
         if (!timespec_equal(&inode->i_mtime, &fattr->mtime)) {
-               nfsi->flags |= NFS_INO_INVALID_ATTR;
+               nfsi->cache_validity |= NFS_INO_INVALID_ATTR;
                 if (!data_unstable)
-                       nfsi->flags |= NFS_INO_REVAL_PAGECACHE;
+                       nfsi->cache_validity |= NFS_INO_REVAL_PAGECACHE;
         }
         if (cur_size != new_isize) {
-               nfsi->flags |= NFS_INO_INVALID_ATTR;
+               nfsi->cache_validity |= NFS_INO_INVALID_ATTR;
                 if (nfsi->npages == 0)
-                       nfsi->flags |= NFS_INO_REVAL_PAGECACHE;
+                       nfsi->cache_validity |= NFS_INO_REVAL_PAGECACHE;
         }
  
         /* Have any file permissions changed? */
         if ((inode->i_mode & S_IALLUGO) != (fattr->mode & S_IALLUGO)
                         || inode->i_uid != fattr->uid
                         || inode->i_gid != fattr->gid)
-               nfsi->flags |= NFS_INO_INVALID_ATTR | NFS_INO_INVALID_ACCESS | NFS_INO_INVALID_ACL;
+               nfsi->cache_validity |= NFS_INO_INVALID_ATTR | NFS_INO_INVALID_ACCESS | NFS_INO_INVALID_ACL;
  
         /* Has the link count changed? */
         if (inode->i_nlink != fattr->nlink)
-               nfsi->flags |= NFS_INO_INVALID_ATTR;
+               nfsi->cache_validity |= NFS_INO_INVALID_ATTR;
  
         if (!timespec_equal(&inode->i_atime, &fattr->atime))
-               nfsi->flags |= NFS_INO_INVALID_ATIME;
+               nfsi->cache_validity |= NFS_INO_INVALID_ATIME;
  
         nfsi->read_cache_jiffies = fattr->timestamp;
+       spin_unlock(&inode->i_lock);
         return 0;
  }
  
@@ -1278,11 +1332,15 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr, unsign
                 goto out_err;
         }
  
+       spin_lock(&inode->i_lock);
+
         /*
          * Make sure the inode's type hasn't changed.
          */
-       if ((inode->i_mode & S_IFMT) != (fattr->mode & S_IFMT))
+       if ((inode->i_mode & S_IFMT) != (fattr->mode & S_IFMT)) {
+               spin_unlock(&inode->i_lock);
                 goto out_changed;
+       }
  
         /*
          * Update the read time so we don't revalidate too often.
@@ -1373,8 +1431,9 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr, unsign
                                 || S_ISLNK(inode->i_mode)))
                 invalid &= ~NFS_INO_INVALID_DATA;
         if (!nfs_have_delegation(inode, FMODE_READ))
-               nfsi->flags |= invalid;
+               nfsi->cache_validity |= invalid;
  
+       spin_unlock(&inode->i_lock);
         return 0;
   out_changed:
         /*
@@ -1391,7 +1450,7 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr, unsign
          */
         nfs_invalidate_inode(inode);
   out_err:
-       NFS_FLAGS(inode) |= NFS_INO_STALE;
+       set_bit(NFS_INO_STALE, &NFS_FLAGS(inode));
         return -ESTALE;
  }
  
@@ -1950,7 +2009,8 @@ static struct inode *nfs_alloc_inode(struct super_block *sb)
         nfsi = (struct nfs_inode *)kmem_cache_alloc(nfs_inode_cachep, SLAB_KERNEL);
         if (!nfsi)
                 return NULL;
-       nfsi->flags = 0;
+       nfsi->flags = 0UL;
+       nfsi->cache_validity = 0UL;
  #ifdef CONFIG_NFS_V3_ACL
         nfsi->acl_access = ERR_PTR(-EAGAIN);
         nfsi->acl_default = ERR_PTR(-EAGAIN);
@@ -1982,7 +2042,6 @@ static void init_once(void * foo, kmem_cache_t * cachep, unsigned long flags)
                 nfsi->ndirty = 0;
                 nfsi->ncommit = 0;
                 nfsi->npages = 0;
-               init_waitqueue_head(&nfsi->nfs_i_wait);
                 nfs4_init_once(nfsi);
         }
  }
diff --git a/fs/nfs/nfs3acl.c b/fs/nfs/nfs3acl.c

index 1b7a3ef2f8131f4f9c02534f55d5a42de4633f67..6a5bbc0ae941aa690e1f73137cbce4af38902092 100644 (file)
--- a/fs/nfs/nfs3acl.c
+++ b/fs/nfs/nfs3acl.c
@@ -308,7 +308,9 @@ static int nfs3_proc_setacls(struct inode *inode, struct posix_acl *acl,
         nfs_begin_data_update(inode);
         status = rpc_call(server->client_acl, ACLPROC3_SETACL,
                           &args, &fattr, 0);
-       NFS_FLAGS(inode) |= NFS_INO_INVALID_ACCESS;
+       spin_lock(&inode->i_lock);
+       NFS_I(inode)->cache_validity |= NFS_INO_INVALID_ACCESS;
+       spin_unlock(&inode->i_lock);
         nfs_end_data_update(inode);
         dprintk("NFS reply setacl: %d\n", status);
  
diff --git a/fs/nfs/nfs3proc.c b/fs/nfs/nfs3proc.c

index 7851569b31c63f03a5095f503d0b1b18183273d4..2681485cf2d00f9f9fdd647a8bf1cd9861299104 100644 (file)
--- a/fs/nfs/nfs3proc.c
+++ b/fs/nfs/nfs3proc.c
@@ -120,6 +120,8 @@ nfs3_proc_setattr(struct dentry *dentry, struct nfs_fattr *fattr,
         dprintk("NFS call  setattr\n");
         fattr->valid = 0;
         status = rpc_call(NFS_CLIENT(inode), NFS3PROC_SETATTR, &arg, fattr, 0);
+       if (status == 0)
+               nfs_setattr_update_inode(inode, sattr);
         dprintk("NFS reply setattr: %d\n", status);
         return status;
  }
@@ -370,6 +372,8 @@ again:
                  * not sure this buys us anything (and I'd have
                  * to revamp the NFSv3 XDR code) */
                 status = nfs3_proc_setattr(dentry, &fattr, sattr);
+               if (status == 0)
+                       nfs_setattr_update_inode(dentry->d_inode, sattr);
                 nfs_refresh_inode(dentry->d_inode, &fattr);
                 dprintk("NFS reply setattr (post-create): %d\n", status);
         }
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c

index 1b76f80aedb921c737586fbca6ceeac3f0f522f2..0c5a308e49638171291d22aa3630fc431ac74528 100644 (file)
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -753,6 +753,7 @@ static int _nfs4_do_setattr(struct nfs_server *server, struct nfs_fattr *fattr,
                  .rpc_argp       = &arg,
                  .rpc_resp       = &res,
          };
+       int status;
  
          fattr->valid = 0;
  
@@ -762,7 +763,8 @@ static int _nfs4_do_setattr(struct nfs_server *server, struct nfs_fattr *fattr,
         } else
                 memcpy(&arg.stateid, &zero_stateid, sizeof(arg.stateid));
  
-       return rpc_call_sync(server->client, &msg, 0);
+       status = rpc_call_sync(server->client, &msg, 0);
+       return status;
  }
  
  static int nfs4_do_setattr(struct nfs_server *server, struct nfs_fattr *fattr,
@@ -1145,6 +1147,8 @@ nfs4_proc_setattr(struct dentry *dentry, struct nfs_fattr *fattr,
  
         status = nfs4_do_setattr(NFS_SERVER(inode), fattr,
                         NFS_FH(inode), sattr, state);
+       if (status == 0)
+               nfs_setattr_update_inode(inode, sattr);
         if (state != NULL)
                 nfs4_close_state(state, FMODE_WRITE);
         put_rpccred(cred);
@@ -1449,8 +1453,10 @@ nfs4_proc_create(struct inode *dir, struct dentry *dentry, struct iattr *sattr,
                 struct nfs_fattr fattr;
                 status = nfs4_do_setattr(NFS_SERVER(dir), &fattr,
                                      NFS_FH(state->inode), sattr, state);
-               if (status == 0)
+               if (status == 0) {
+                       nfs_setattr_update_inode(state->inode, sattr);
                         goto out;
+               }
         } else if (flags != 0)
                 goto out;
         nfs4_close_state(state, flags);
diff --git a/fs/nfs/proc.c b/fs/nfs/proc.c

index cedf636bcf3c9f68b53b6c979d2116f5f4a4511d..be23c3fb9260051b8069ad4afbf2257bed1ca502 100644 (file)
--- a/fs/nfs/proc.c
+++ b/fs/nfs/proc.c
@@ -114,6 +114,8 @@ nfs_proc_setattr(struct dentry *dentry, struct nfs_fattr *fattr,
         dprintk("NFS call  setattr\n");
         fattr->valid = 0;
         status = rpc_call(NFS_CLIENT(inode), NFSPROC_SETATTR, &arg, fattr, 0);
+       if (status == 0)
+               nfs_setattr_update_inode(inode, sattr);
         dprintk("NFS reply setattr: %d\n", status);
         return status;
  }
diff --git a/fs/nfs/read.c b/fs/nfs/read.c

index 6f866b8aa2d56908553972bb18ed6b700ebebfa2..6ceb1d471f2064952d8b4727a28711e821f5aba9 100644 (file)
--- a/fs/nfs/read.c
+++ b/fs/nfs/read.c
@@ -140,7 +140,9 @@ static int nfs_readpage_sync(struct nfs_open_context *ctx, struct inode *inode,
                 if (rdata->res.eof != 0 || result == 0)
                         break;
         } while (count);
-       NFS_FLAGS(inode) |= NFS_INO_INVALID_ATIME;
+       spin_lock(&inode->i_lock);
+       NFS_I(inode)->cache_validity |= NFS_INO_INVALID_ATIME;
+       spin_unlock(&inode->i_lock);
  
         if (count)
                 memclear_highpage_flush(page, rdata->args.pgbase, count);
@@ -473,7 +475,9 @@ void nfs_readpage_result(struct rpc_task *task)
                 }
                 task->tk_status = -EIO;
         }
-       NFS_FLAGS(data->inode) |= NFS_INO_INVALID_ATIME;
+       spin_lock(&data->inode->i_lock);
+       NFS_I(data->inode)->cache_validity |= NFS_INO_INVALID_ATIME;
+       spin_unlock(&data->inode->i_lock);
         data->complete(data, status);
  }
  
diff --git a/fs/nfs/symlink.c b/fs/nfs/symlink.c

index 35f1065991441c3f3d02f6749692da739b8492b3..18dc95b0b64638695a9dfa9b6db1983875d9a757 100644 (file)
--- a/fs/nfs/symlink.c
+++ b/fs/nfs/symlink.c
@@ -27,26 +27,14 @@
  
  /* Symlink caching in the page cache is even more simplistic
   * and straight-forward than readdir caching.
- *
- * At the beginning of the page we store pointer to struct page in question,
- * simplifying nfs_put_link() (if inode got invalidated we can't find the page
- * to be freed via pagecache lookup).
- * The NUL-terminated string follows immediately thereafter.
   */
  
-struct nfs_symlink {
-       struct page *page;
-       char body[0];
-};
-
  static int nfs_symlink_filler(struct inode *inode, struct page *page)
  {
-       const unsigned int pgbase = offsetof(struct nfs_symlink, body);
-       const unsigned int pglen = PAGE_SIZE - pgbase;
         int error;
  
         lock_kernel();
-       error = NFS_PROTO(inode)->readlink(inode, page, pgbase, pglen);
+       error = NFS_PROTO(inode)->readlink(inode, page, 0, PAGE_SIZE);
         unlock_kernel();
         if (error < 0)
                 goto error;
@@ -60,11 +48,10 @@ error:
         return -EIO;
  }
  
-static int nfs_follow_link(struct dentry *dentry, struct nameidata *nd)
+static void *nfs_follow_link(struct dentry *dentry, struct nameidata *nd)
  {
         struct inode *inode = dentry->d_inode;
         struct page *page;
-       struct nfs_symlink *p;
         void *err = ERR_PTR(nfs_revalidate_inode(NFS_SERVER(inode), inode));
         if (err)
                 goto read_failed;
@@ -78,28 +65,20 @@ static int nfs_follow_link(struct dentry *dentry, struct nameidata *nd)
                 err = ERR_PTR(-EIO);
                 goto getlink_read_error;
         }
-       p = kmap(page);
-       p->page = page;
-       nd_set_link(nd, p->body);
-       return 0;
+       nd_set_link(nd, kmap(page));
+       return page;
  
  getlink_read_error:
         page_cache_release(page);
  read_failed:
         nd_set_link(nd, err);
-       return 0;
+       return NULL;
  }
  
-static void nfs_put_link(struct dentry *dentry, struct nameidata *nd)
+static void nfs_put_link(struct dentry *dentry, struct nameidata *nd, void *cookie)
  {
-       char *s = nd_get_link(nd);
-       if (!IS_ERR(s)) {
-               struct nfs_symlink *p;
-               struct page *page;
-
-               p = container_of(s, struct nfs_symlink, body[0]);
-               page = p->page;
-
+       if (cookie) {
+               struct page *page = cookie;
                 kunmap(page);
                 page_cache_release(page);
         }
diff --git a/fs/nfs_common/nfsacl.c b/fs/nfs_common/nfsacl.c

index 18c58c32e326281bfa7226edbaaec135b45c2b70..251e5a1bb1c4cbe0308c042073b0d83cfe71d73d 100644 (file)
--- a/fs/nfs_common/nfsacl.c
+++ b/fs/nfs_common/nfsacl.c
@@ -239,6 +239,7 @@ nfsacl_decode(struct xdr_buf *buf, unsigned int base, unsigned int *aclcnt,
         if (xdr_decode_word(buf, base, &entries) ||
             entries > NFS_ACL_MAX_ENTRIES)
                 return -EINVAL;
+       nfsacl_desc.desc.array_maxlen = entries;
         err = xdr_decode_array2(buf, base + 4, &nfsacl_desc.desc);
         if (err)
                 return err;
diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c

index 07b9a065e9daa75ebb35fcff48e5f441376cc6f8..1697539a7171777815cf6e54bd7738e3e3fdacde 100644 (file)
--- a/fs/nfsd/nfssvc.c
+++ b/fs/nfsd/nfssvc.c
@@ -287,6 +287,7 @@ out:
         svc_exit_thread(rqstp);
  
         /* Release module */
+       unlock_kernel();
         module_put_and_exit(0);
  }
  
diff --git a/fs/ntfs/ChangeLog b/fs/ntfs/ChangeLog

index 9709fac6531dd7e044aee7973870e86137b51fb9..9eecc9939dfe59c5c8a22db8ff596e01eca86bec 100644 (file)
--- a/fs/ntfs/ChangeLog
+++ b/fs/ntfs/ChangeLog
@@ -174,6 +174,9 @@ ToDo/Notes:
           fact that the vfs and ntfs inodes are one struct in memory to find
           the ntfs inode in memory if present.  Also, the ntfs inode has its
           own locking so it does not matter if the vfs inode is locked.
+       - Fix bug in mft record writing where we forgot to set the device in
+         the buffers when mapping them after the VM had discarded them.
+         Thanks to Martin MOKREJŠ for the bug report.
  
  2.1.22 - Many bug and race fixes and error handling improvements.
  
diff --git a/fs/ntfs/aops.c b/fs/ntfs/aops.c

index 3f43bfe6184ea2b344b426313972347b0cf6fb0e..78adad7a988d981a22f61b739cc082071ddc8513 100644 (file)
--- a/fs/ntfs/aops.c
+++ b/fs/ntfs/aops.c
@@ -924,6 +924,7 @@ static int ntfs_write_mst_block(struct page *page,
                         LCN lcn;
                         unsigned int vcn_ofs;
  
+                       bh->b_bdev = vol->sb->s_bdev;
                         /* Obtain the vcn and offset of the current block. */
                         vcn = (VCN)block << bh_size_bits;
                         vcn_ofs = vcn & vol->cluster_size_mask;
diff --git a/fs/ntfs/mft.c b/fs/ntfs/mft.c

index ac9ff39aa8343eb843bb4f0ee2bb7730f969c76e..317f7c679fd3bb6edc89eee24b18e2243d3cf03e 100644 (file)
--- a/fs/ntfs/mft.c
+++ b/fs/ntfs/mft.c
@@ -533,6 +533,7 @@ int ntfs_sync_mft_mirror(ntfs_volume *vol, const unsigned long mft_no,
                         LCN lcn;
                         unsigned int vcn_ofs;
  
+                       bh->b_bdev = vol->sb->s_bdev;
                         /* Obtain the vcn and offset of the current block. */
                         vcn = ((VCN)mft_no << vol->mft_record_size_bits) +
                                         (block_start - m_start);
@@ -725,6 +726,7 @@ int write_mft_record_nolock(ntfs_inode *ni, MFT_RECORD *m, int sync)
                         LCN lcn;
                         unsigned int vcn_ofs;
  
+                       bh->b_bdev = vol->sb->s_bdev;
                         /* Obtain the vcn and offset of the current block. */
                         vcn = ((VCN)ni->mft_no << vol->mft_record_size_bits) +
                                         (block_start - m_start);
diff --git a/fs/proc/base.c b/fs/proc/base.c

index ace151fa487865fd160546d27c07e31384fa17c9..491f2d9f89acd93fabb0d0067ee76f329bc14c2b 100644 (file)
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -890,7 +890,7 @@ static struct file_operations proc_seccomp_operations = {
  };
  #endif /* CONFIG_SECCOMP */
  
-static int proc_pid_follow_link(struct dentry *dentry, struct nameidata *nd)
+static void *proc_pid_follow_link(struct dentry *dentry, struct nameidata *nd)
  {
         struct inode *inode = dentry->d_inode;
         int error = -EACCES;
@@ -907,7 +907,7 @@ static int proc_pid_follow_link(struct dentry *dentry, struct nameidata *nd)
         error = PROC_I(inode)->op.proc_get_link(inode, &nd->dentry, &nd->mnt);
         nd->last_type = LAST_BIND;
  out:
-       return error;
+       return ERR_PTR(error);
  }
  
  static int do_proc_readlink(struct dentry *dentry, struct vfsmount *mnt,
@@ -1692,11 +1692,11 @@ static int proc_self_readlink(struct dentry *dentry, char __user *buffer,
         return vfs_readlink(dentry,buffer,buflen,tmp);
  }
  
-static int proc_self_follow_link(struct dentry *dentry, struct nameidata *nd)
+static void *proc_self_follow_link(struct dentry *dentry, struct nameidata *nd)
  {
         char tmp[30];
         sprintf(tmp, "%d", current->tgid);
-       return vfs_follow_link(nd,tmp);
+       return ERR_PTR(vfs_follow_link(nd,tmp));
  }      
  
  static struct inode_operations proc_self_inode_operations = {
diff --git a/fs/proc/generic.c b/fs/proc/generic.c

index 6c6315d04028711151627a09a29c186fec7d856a..abe8920313fb32b84ad65b78861325727ae5ce45 100644 (file)
--- a/fs/proc/generic.c
+++ b/fs/proc/generic.c
@@ -329,10 +329,10 @@ static void release_inode_number(unsigned int inum)
         spin_unlock(&proc_inum_lock);
  }
  
-static int proc_follow_link(struct dentry *dentry, struct nameidata *nd)
+static void *proc_follow_link(struct dentry *dentry, struct nameidata *nd)
  {
         nd_set_link(nd, PDE(dentry->d_inode)->data);
-       return 0;
+       return NULL;
  }
  
  static struct inode_operations proc_link_inode_operations = {
diff --git a/fs/reiserfs/inode.c b/fs/reiserfs/inode.c

index d9f614a57731bd08bad340ab08b0662dc99cfd58..ff291c973a567f4d9226c3840c98ea002c4ed823 100644 (file)
--- a/fs/reiserfs/inode.c
+++ b/fs/reiserfs/inode.c
@@ -1985,7 +1985,7 @@ int reiserfs_new_inode(struct reiserfs_transaction_handle *th,
          * iput doesn't deadlock in reiserfs_delete_xattrs. The locking
          * code really needs to be reworked, but this will take care of it
          * for now. -jeffm */
-       if (REISERFS_I(dir)->i_acl_default) {
+       if (REISERFS_I(dir)->i_acl_default && !IS_ERR(REISERFS_I(dir)->i_acl_default)) {
                 reiserfs_write_unlock_xattrs(dir->i_sb);
                 iput(inode);
                 reiserfs_write_lock_xattrs(dir->i_sb);
diff --git a/fs/reiserfs/namei.c b/fs/reiserfs/namei.c

index a20bbc1642dcdac2f4cfa608db569eb46e2ea66d..3549067c42d941546f4fa12ad266612a9f9f4413 100644 (file)
--- a/fs/reiserfs/namei.c
+++ b/fs/reiserfs/namei.c
@@ -593,6 +593,9 @@ static int new_inode_init(struct inode *inode, struct inode *dir, int mode)
          */
         inode->i_uid = current->fsuid;
         inode->i_mode = mode;
+       /* Make inode invalid - just in case we are going to drop it before
+        * the initialization happens */
+       INODE_PKEY(inode)->k_objectid = 0;
  
         if (dir->i_mode & S_ISGID) {
                 inode->i_gid = dir->i_gid;
diff --git a/fs/smbfs/sock.c b/fs/smbfs/sock.c

index 93f3cd22a2e93f1885e62b6f3ab4c8cdeceb6204..6815b1b12b68853351a9af047031de63cfae7782 100644 (file)
--- a/fs/smbfs/sock.c
+++ b/fs/smbfs/sock.c
@@ -15,12 +15,12 @@
  #include <linux/file.h>
  #include <linux/in.h>
  #include <linux/net.h>
-#include <linux/tcp.h>
  #include <linux/mm.h>
  #include <linux/netdevice.h>
  #include <linux/smp_lock.h>
  #include <linux/workqueue.h>
  #include <net/scm.h>
+#include <net/tcp_states.h>
  #include <net/ip.h>
  
  #include <linux/smb_fs.h>
diff --git a/fs/smbfs/symlink.c b/fs/smbfs/symlink.c

index 8b069e06433d567e70c3c55b4ec2c2df7f25c337..0c64bc3a0127e9c15c6f76921ea688f7eee73b6f 100644 (file)
--- a/fs/smbfs/symlink.c
+++ b/fs/smbfs/symlink.c
@@ -34,7 +34,7 @@ int smb_symlink(struct inode *inode, struct dentry *dentry, const char *oldname)
         return smb_proc_symlink(server_from_dentry(dentry), dentry, oldname);
  }
  
-static int smb_follow_link(struct dentry *dentry, struct nameidata *nd)
+static void *smb_follow_link(struct dentry *dentry, struct nameidata *nd)
  {
         char *link = __getname();
         DEBUG1("followlink of %s/%s\n", DENTRY_PATH(dentry));
@@ -52,10 +52,10 @@ static int smb_follow_link(struct dentry *dentry, struct nameidata *nd)
                 }
         }
         nd_set_link(nd, link);
-       return 0;
+       return NULL;
  }
  
-static void smb_put_link(struct dentry *dentry, struct nameidata *nd)
+static void smb_put_link(struct dentry *dentry, struct nameidata *nd, void *p)
  {
         char *s = nd_get_link(nd);
         if (!IS_ERR(s))
diff --git a/fs/sysfs/inode.c b/fs/sysfs/inode.c

index d727dc960634a28de3df914ccb26cdc63fac98a9..970a33f03299b12731401a55dbdbe2a1c547c3c0 100644 (file)
--- a/fs/sysfs/inode.c
+++ b/fs/sysfs/inode.c
@@ -228,6 +228,10 @@ void sysfs_hash_and_remove(struct dentry * dir, const char * name)
         struct sysfs_dirent * sd;
         struct sysfs_dirent * parent_sd = dir->d_fsdata;
  
+       if (dir->d_inode == NULL)
+               /* no inode means this hasn't been made visible yet */
+               return;
+
         down(&dir->d_inode->i_sem);
         list_for_each_entry(sd, &parent_sd->s_children, s_sibling) {
                 if (!sd->s_element)
diff --git a/fs/sysfs/symlink.c b/fs/sysfs/symlink.c

index fae57c83a722f4e782a2651e7dc0b74a66dfee76..de402fa915f2778e010d20bfeca2c6ebc05f6ddd 100644 (file)
--- a/fs/sysfs/symlink.c
+++ b/fs/sysfs/symlink.c
@@ -151,17 +151,17 @@ static int sysfs_getlink(struct dentry *dentry, char * path)
  
  }
  
-static int sysfs_follow_link(struct dentry *dentry, struct nameidata *nd)
+static void *sysfs_follow_link(struct dentry *dentry, struct nameidata *nd)
  {
         int error = -ENOMEM;
         unsigned long page = get_zeroed_page(GFP_KERNEL);
         if (page)
                 error = sysfs_getlink(dentry, (char *) page); 
         nd_set_link(nd, error ? ERR_PTR(error) : (char *)page);
-       return 0;
+       return NULL;
  }
  
-static void sysfs_put_link(struct dentry *dentry, struct nameidata *nd)
+static void sysfs_put_link(struct dentry *dentry, struct nameidata *nd, void *cookie)
  {
         char *page = nd_get_link(nd);
         if (!IS_ERR(page))
diff --git a/fs/sysv/symlink.c b/fs/sysv/symlink.c

index ed637db2dcb10293769768cb0b75ca3d6b619815..b85ce61d635ce2a41865f3f324d5fdd9095244ca 100644 (file)
--- a/fs/sysv/symlink.c
+++ b/fs/sysv/symlink.c
@@ -8,10 +8,10 @@
  #include "sysv.h"
  #include <linux/namei.h>
  
-static int sysv_follow_link(struct dentry *dentry, struct nameidata *nd)
+static void *sysv_follow_link(struct dentry *dentry, struct nameidata *nd)
  {
         nd_set_link(nd, (char *)SYSV_I(dentry->d_inode)->i_data);
-       return 0;
+       return NULL;
  }
  
  struct inode_operations sysv_fast_symlink_inode_operations = {
diff --git a/fs/ufs/symlink.c b/fs/ufs/symlink.c

index a0e49149098f2d19bb8ffec6373136758b2ce60d..337512ed57814ac1949c440ef89c2e1389ad2cfc 100644 (file)
--- a/fs/ufs/symlink.c
+++ b/fs/ufs/symlink.c
@@ -29,11 +29,11 @@
  #include <linux/namei.h>
  #include <linux/ufs_fs.h>
  
-static int ufs_follow_link(struct dentry *dentry, struct nameidata *nd)
+static void *ufs_follow_link(struct dentry *dentry, struct nameidata *nd)
  {
         struct ufs_inode_info *p = UFS_I(dentry->d_inode);
         nd_set_link(nd, (char*)p->i_u1.i_symlink);
-       return 0;
+       return NULL;
  }
  
  struct inode_operations ufs_fast_symlink_inode_operations = {
diff --git a/fs/xfs/linux-2.6/xfs_iops.c b/fs/xfs/linux-2.6/xfs_iops.c

index 407e99359391ce68beffd4bc176f147231859f0c..f252605514eb1dce23393e4f904d6f5789c3d535 100644 (file)
--- a/fs/xfs/linux-2.6/xfs_iops.c
+++ b/fs/xfs/linux-2.6/xfs_iops.c
@@ -374,7 +374,7 @@ linvfs_rename(
   * we need to be very careful about how much stack we use.
   * uio is kmalloced for this reason...
   */
-STATIC int
+STATIC void *
  linvfs_follow_link(
         struct dentry           *dentry,
         struct nameidata        *nd)
@@ -391,14 +391,14 @@ linvfs_follow_link(
         link = (char *)kmalloc(MAXNAMELEN+1, GFP_KERNEL);
         if (!link) {
                 nd_set_link(nd, ERR_PTR(-ENOMEM));
-               return 0;
+               return NULL;
         }
  
         uio = (uio_t *)kmalloc(sizeof(uio_t), GFP_KERNEL);
         if (!uio) {
                 kfree(link);
                 nd_set_link(nd, ERR_PTR(-ENOMEM));
-               return 0;
+               return NULL;
         }
  
         vp = LINVFS_GET_VP(dentry->d_inode);
@@ -422,10 +422,10 @@ linvfs_follow_link(
         kfree(uio);
  
         nd_set_link(nd, link);
-       return 0;
+       return NULL;
  }
  
-static void linvfs_put_link(struct dentry *dentry, struct nameidata *nd)
+static void linvfs_put_link(struct dentry *dentry, struct nameidata *nd, void *p)
  {
         char *s = nd_get_link(nd);
         if (!IS_ERR(s))
diff --git a/include/asm-alpha/pci.h b/include/asm-alpha/pci.h

index 28957697e59c5e426a3f969575ec374da251f29c..f681e675b823c80448e3e4f6cf618bb4585d61ff 100644 (file)
--- a/include/asm-alpha/pci.h
+++ b/include/asm-alpha/pci.h
@@ -251,6 +251,9 @@ static inline int pci_get_legacy_ide_irq(struct pci_dev *dev, int channel)
  extern void pcibios_resource_to_bus(struct pci_dev *, struct pci_bus_region *,
                                     struct resource *);
  
+extern void pcibios_bus_to_resource(struct pci_dev *dev, struct resource *res,
+                                   struct pci_bus_region *region);
+
  #define pci_domain_nr(bus) ((struct pci_controller *)(bus)->sysdata)->index
  
  static inline int pci_proc_domain(struct pci_bus *bus)
diff --git a/include/asm-alpha/socket.h b/include/asm-alpha/socket.h

index d00259d3dc7849c69b1d4e9f0c828505142395ab..b5193229132a12e5f73bfe510830aef1ab95235b 100644 (file)
--- a/include/asm-alpha/socket.h
+++ b/include/asm-alpha/socket.h
@@ -25,6 +25,8 @@
  #define SO_ERROR       0x1007
  #define SO_SNDBUF      0x1001
  #define SO_RCVBUF      0x1002
+#define SO_SNDBUFFORCE 0x100a
+#define SO_RCVBUFFORCE 0x100b
  #define        SO_RCVLOWAT     0x1010
  #define        SO_SNDLOWAT     0x1011
  #define        SO_RCVTIMEO     0x1012
diff --git a/include/asm-alpha/system.h b/include/asm-alpha/system.h

index c08ce970ff8c95a203bddcdfb5f82a7e1bf0c892..bdb4d66418f188767d89de67d1f4478ebbe41247 100644 (file)
--- a/include/asm-alpha/system.h
+++ b/include/asm-alpha/system.h
@@ -443,22 +443,19 @@ __xchg_u64(volatile long *m, unsigned long val)
     if something tries to do an invalid xchg().  */
  extern void __xchg_called_with_bad_pointer(void);
  
-static inline unsigned long
-__xchg(volatile void *ptr, unsigned long x, int size)
-{
-       switch (size) {
-               case 1:
-                       return __xchg_u8(ptr, x);
-               case 2:
-                       return __xchg_u16(ptr, x);
-               case 4:
-                       return __xchg_u32(ptr, x);
-               case 8:
-                       return __xchg_u64(ptr, x);
-       }
-       __xchg_called_with_bad_pointer();
-       return x;
-}
+#define __xchg(ptr, x, size) \
+({ \
+       unsigned long __xchg__res; \
+       volatile void *__xchg__ptr = (ptr); \
+       switch (size) { \
+               case 1: __xchg__res = __xchg_u8(__xchg__ptr, x); break; \
+               case 2: __xchg__res = __xchg_u16(__xchg__ptr, x); break; \
+               case 4: __xchg__res = __xchg_u32(__xchg__ptr, x); break; \
+               case 8: __xchg__res = __xchg_u64(__xchg__ptr, x); break; \
+               default: __xchg_called_with_bad_pointer(); __xchg__res = x; \
+       } \
+       __xchg__res; \
+})
  
  #define xchg(ptr,x)                                                         \
    ({                                                                        \
diff --git a/include/asm-arm/arch-ixp4xx/timex.h b/include/asm-arm/arch-ixp4xx/timex.h

index 38c9d77d37276c6167ae6aafe0b28425fe087d7c..3745e35cc030b31f77b1ebbc07ef5d70e18e0e90 100644 (file)
--- a/include/asm-arm/arch-ixp4xx/timex.h
+++ b/include/asm-arm/arch-ixp4xx/timex.h
@@ -7,7 +7,9 @@
  
  /*
   * We use IXP425 General purpose timer for our timer needs, it runs at 
- * 66.66... MHz
+ * 66.66... MHz. We do a convulted calculation of CLOCK_TICK_RATE b/c the
+ * timer register ignores the bottom 2 bits of the LATCH value.
   */
-#define CLOCK_TICK_RATE (66666666)
+#define FREQ 66666666
+#define CLOCK_TICK_RATE (((FREQ / HZ & ~IXP4XX_OST_RELOAD_MASK) + 1) * HZ)
  
diff --git a/include/asm-arm/arch-s3c2410/usb-control.h b/include/asm-arm/arch-s3c2410/usb-control.h

index 1cc85a096b23851eab314e1cd388fe9441ea7cfd..bd43b566db3ece998be04a885d80daaa1f694e36 100644 (file)
--- a/include/asm-arm/arch-s3c2410/usb-control.h
+++ b/include/asm-arm/arch-s3c2410/usb-control.h
@@ -12,6 +12,7 @@
   * Changelog:
   *  11-Sep-2004 BJD  Created file
   *  21-Sep-2004 BJD  Updated port info
+ *  09-Aug-2005 BJD  Renamed s3c2410_report_oc s3c2410_usb_report_oc
  */
  
  #ifndef __ASM_ARCH_USBCONTROL_H
@@ -35,7 +36,7 @@ struct s3c2410_hcd_info {
         void            (*report_oc)(struct s3c2410_hcd_info *, int ports);
  };
  
-static void inline s3c2410_report_oc(struct s3c2410_hcd_info *info, int ports)
+static void inline s3c2410_usb_report_oc(struct s3c2410_hcd_info *info, int ports)
  {
         if (info->report_oc != NULL) {
                 (info->report_oc)(info, ports);
diff --git a/include/asm-arm/arch-sa1100/mcp.h b/include/asm-arm/arch-sa1100/mcp.h

new file mode 100644 (file)

index 0000000..f58a227
--- /dev/null
+++ b/include/asm-arm/arch-sa1100/mcp.h
@@ -0,0 +1,21 @@
+/*
+ *  linux/include/asm-arm/arch-sa1100/mcp.h
+ *
+ *  Copyright (C) 2005 Russell King.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#ifndef __ASM_ARM_ARCH_MCP_H
+#define __ASM_ARM_ARCH_MCP_H
+
+#include <linux/types.h>
+
+struct mcp_plat_data {
+       u32 mccr0;
+       u32 mccr1;
+       unsigned int sclk_rate;
+};
+
+#endif
diff --git a/include/asm-arm/bug.h b/include/asm-arm/bug.h

index 24d11672eb601979d2ca64e3adf60dc923dfe22a..7fb02138f585985dd810407a82a8643bb3874d9f 100644 (file)
--- a/include/asm-arm/bug.h
+++ b/include/asm-arm/bug.h
@@ -5,7 +5,7 @@
  
  #ifdef CONFIG_BUG
  #ifdef CONFIG_DEBUG_BUGVERBOSE
-extern volatile void __bug(const char *file, int line, void *data);
+extern void __bug(const char *file, int line, void *data) __attribute__((noreturn));
  
  /* give file/line information */
  #define BUG()          __bug(__FILE__, __LINE__, NULL)
diff --git a/include/asm-arm/cpu-multi32.h b/include/asm-arm/cpu-multi32.h

index ff48022e472011a0ade2725ebc6e1cdba5028368..4679f63688e99c1471b2699a8f99ae163306edf3 100644 (file)
--- a/include/asm-arm/cpu-multi32.h
+++ b/include/asm-arm/cpu-multi32.h
@@ -31,7 +31,7 @@ extern struct processor {
         /*
          * Special stuff for a reset
          */
-       volatile void (*reset)(unsigned long addr);
+       void (*reset)(unsigned long addr) __attribute__((noreturn));
         /*
          * Idle the processor
          */
diff --git a/include/asm-arm/cpu-single.h b/include/asm-arm/cpu-single.h

index b5ec5d54665df03fe8f4fd6f3b17db687a6ae160..6723e67244fa1ef0f3b38d0fba23820438860855 100644 (file)
--- a/include/asm-arm/cpu-single.h
+++ b/include/asm-arm/cpu-single.h
@@ -41,4 +41,4 @@ extern int cpu_do_idle(void);
  extern void cpu_dcache_clean_area(void *, int);
  extern void cpu_do_switch_mm(unsigned long pgd_phys, struct mm_struct *mm);
  extern void cpu_set_pte(pte_t *ptep, pte_t pte);
-extern volatile void cpu_reset(unsigned long addr);
+extern void cpu_reset(unsigned long addr) __attribute__((noreturn));
diff --git a/include/asm-arm/hardware/gic.h b/include/asm-arm/hardware/gic.h

new file mode 100644 (file)

index 0000000..3fa5eb7
--- /dev/null
+++ b/include/asm-arm/hardware/gic.h
@@ -0,0 +1,41 @@
+/*
+ *  linux/include/asm-arm/hardware/gic.h
+ *
+ *  Copyright (C) 2002 ARM Limited, All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#ifndef __ASM_ARM_HARDWARE_GIC_H
+#define __ASM_ARM_HARDWARE_GIC_H
+
+#include <linux/compiler.h>
+
+#define GIC_CPU_CTRL                   0x00
+#define GIC_CPU_PRIMASK                        0x04
+#define GIC_CPU_BINPOINT               0x08
+#define GIC_CPU_INTACK                 0x0c
+#define GIC_CPU_EOI                    0x10
+#define GIC_CPU_RUNNINGPRI             0x14
+#define GIC_CPU_HIGHPRI                        0x18
+
+#define GIC_DIST_CTRL                  0x000
+#define GIC_DIST_CTR                   0x004
+#define GIC_DIST_ENABLE_SET            0x100
+#define GIC_DIST_ENABLE_CLEAR          0x180
+#define GIC_DIST_PENDING_SET           0x200
+#define GIC_DIST_PENDING_CLEAR         0x280
+#define GIC_DIST_ACTIVE_BIT            0x300
+#define GIC_DIST_PRI                   0x400
+#define GIC_DIST_TARGET                        0x800
+#define GIC_DIST_CONFIG                        0xc00
+#define GIC_DIST_SOFTINT               0xf00
+
+#ifndef __ASSEMBLY__
+void gic_dist_init(void __iomem *base);
+void gic_cpu_init(void __iomem *base);
+void gic_raise_softirq(cpumask_t cpumask, unsigned int irq);
+#endif
+
+#endif
diff --git a/include/asm-arm/pci.h b/include/asm-arm/pci.h

index b28f1c95dd625a7b277cb279a585332cb5a1fe3c..38ea5899a580a9500c7e9a3d49fdffd25b119c8e 100644 (file)
--- a/include/asm-arm/pci.h
+++ b/include/asm-arm/pci.h
@@ -60,6 +60,10 @@ extern void
  pcibios_resource_to_bus(struct pci_dev *dev, struct pci_bus_region *region,
                          struct resource *res);
  
+extern void
+pcibios_bus_to_resource(struct pci_dev *dev, struct resource *res,
+                       struct pci_bus_region *region);
+
  static inline void pcibios_add_platform_entries(struct pci_dev *dev)
  {
  }
diff --git a/include/asm-arm/pgtable.h b/include/asm-arm/pgtable.h

index a9892eb42a235f447df175be3a91e69dad8b6f4b..478c49b56e18d0fa3658eeb7c5a7d25c86f53de6 100644 (file)
--- a/include/asm-arm/pgtable.h
+++ b/include/asm-arm/pgtable.h
@@ -188,12 +188,18 @@ extern void __pgd_error(const char *file, int line, unsigned long val);
  /*
   *   - extended small page/tiny page
   */
+#define PTE_EXT_XN             (1 << 0)        /* v6 */
  #define PTE_EXT_AP_MASK                (3 << 4)
+#define PTE_EXT_AP0            (1 << 4)
+#define PTE_EXT_AP1            (2 << 4)
  #define PTE_EXT_AP_UNO_SRO     (0 << 4)
-#define PTE_EXT_AP_UNO_SRW     (1 << 4)
-#define PTE_EXT_AP_URO_SRW     (2 << 4)
-#define PTE_EXT_AP_URW_SRW     (3 << 4)
+#define PTE_EXT_AP_UNO_SRW     (PTE_EXT_AP0)
+#define PTE_EXT_AP_URO_SRW     (PTE_EXT_AP1)
+#define PTE_EXT_AP_URW_SRW     (PTE_EXT_AP1|PTE_EXT_AP0)
  #define PTE_EXT_TEX(x)         ((x) << 6)      /* v5 */
+#define PTE_EXT_APX            (1 << 9)        /* v6 */
+#define PTE_EXT_SHARED         (1 << 10)       /* v6 */
+#define PTE_EXT_NG             (1 << 11)       /* v6 */
  
  /*
   *   - small page
@@ -224,6 +230,8 @@ extern void __pgd_error(const char *file, int line, unsigned long val);
  #define L_PTE_WRITE            (1 << 5)
  #define L_PTE_EXEC             (1 << 6)
  #define L_PTE_DIRTY            (1 << 7)
+#define L_PTE_SHARED           (1 << 10)       /* shared between CPUs (v6) */
+#define L_PTE_ASID             (1 << 11)       /* non-global (use ASID, v6) */
  
  #ifndef __ASSEMBLY__
  
diff --git a/include/asm-arm/socket.h b/include/asm-arm/socket.h

index 46d20585d95143c0bf87fe8ea299fe785cf0922d..3c51da6438c95b6734e38243ebdbb21af5a6972a 100644 (file)
--- a/include/asm-arm/socket.h
+++ b/include/asm-arm/socket.h
@@ -14,6 +14,8 @@
  #define SO_BROADCAST   6
  #define SO_SNDBUF      7
  #define SO_RCVBUF      8
+#define SO_SNDBUFFORCE 32
+#define SO_RCVBUFFORCE 33
  #define SO_KEEPALIVE   9
  #define SO_OOBINLINE   10
  #define SO_NO_CHECK    11
diff --git a/include/asm-arm/unistd.h b/include/asm-arm/unistd.h

index ace27480886e226cdebc6c00fa55bb355da94dee..abb36e54c966b4cbed6c1b8f7fef60a60ab5e81b 100644 (file)
--- a/include/asm-arm/unistd.h
+++ b/include/asm-arm/unistd.h
@@ -350,6 +350,11 @@
  #endif
  
  #define __NR_vserver                   (__NR_SYSCALL_BASE+313)
+#define __NR_ioprio_set                        (__NR_SYSCALL_BASE+314)
+#define __NR_ioprio_get                        (__NR_SYSCALL_BASE+315)
+#define __NR_inotify_init              (__NR_SYSCALL_BASE+316)
+#define __NR_inotify_add_watch         (__NR_SYSCALL_BASE+317)
+#define __NR_inotify_rm_watch          (__NR_SYSCALL_BASE+318)
  
  /*
   * The following SWIs are ARM private.
diff --git a/include/asm-arm26/socket.h b/include/asm-arm26/socket.h

index 46d20585d95143c0bf87fe8ea299fe785cf0922d..3c51da6438c95b6734e38243ebdbb21af5a6972a 100644 (file)
--- a/include/asm-arm26/socket.h
+++ b/include/asm-arm26/socket.h
@@ -14,6 +14,8 @@
  #define SO_BROADCAST   6
  #define SO_SNDBUF      7
  #define SO_RCVBUF      8
+#define SO_SNDBUFFORCE 32
+#define SO_RCVBUFFORCE 33
  #define SO_KEEPALIVE   9
  #define SO_OOBINLINE   10
  #define SO_NO_CHECK    11
diff --git a/include/asm-cris/socket.h b/include/asm-cris/socket.h

index f159b4f165f74f165671455f8be0cfba48ebc944..8b1da3e58c5580468f60961c498592b71c7f56c7 100644 (file)
--- a/include/asm-cris/socket.h
+++ b/include/asm-cris/socket.h
@@ -16,6 +16,8 @@
  #define SO_BROADCAST   6
  #define SO_SNDBUF      7
  #define SO_RCVBUF      8
+#define SO_SNDBUFFORCE 32
+#define SO_RCVBUFFORCE 33
  #define SO_KEEPALIVE   9
  #define SO_OOBINLINE   10
  #define SO_NO_CHECK    11
diff --git a/include/asm-frv/socket.h b/include/asm-frv/socket.h

index c3be17c7de4bbf4cd60b9b2452009b98409ff124..7177f8b9817cc3586bf6c4bd4b21662ed32d2d72 100644 (file)
--- a/include/asm-frv/socket.h
+++ b/include/asm-frv/socket.h
@@ -14,6 +14,8 @@
  #define SO_BROADCAST   6
  #define SO_SNDBUF      7
  #define SO_RCVBUF      8
+#define SO_SNDBUFFORCE 32
+#define SO_RCVBUFFORCE 33
  #define SO_KEEPALIVE   9
  #define SO_OOBINLINE   10
  #define SO_NO_CHECK    11
diff --git a/include/asm-generic/pci.h b/include/asm-generic/pci.h

index 9d4cc47bde390ed4dbde9866b4839f2dbdfa3c79..ee1d8b5d8168fa99ad3b60be15a9a8b5aa17cbc2 100644 (file)
--- a/include/asm-generic/pci.h
+++ b/include/asm-generic/pci.h
@@ -22,6 +22,14 @@ pcibios_resource_to_bus(struct pci_dev *dev, struct pci_bus_region *region,
         region->end = res->end;
  }
  
+static inline void
+pcibios_bus_to_resource(struct pci_dev *dev, struct resource *res,
+                       struct pci_bus_region *region)
+{
+       res->start = region->start;
+       res->end = region->end;
+}
+
  #define pcibios_scan_all_fns(a, b)     0
  
  #ifndef HAVE_ARCH_PCI_GET_LEGACY_IDE_IRQ
diff --git a/include/asm-h8300/socket.h b/include/asm-h8300/socket.h

index af33b8525dcf4596ce889a9e5e25519dca44c36a..d98cf85bafc1d667d9d2ebdac62752a7da267214 100644 (file)
--- a/include/asm-h8300/socket.h
+++ b/include/asm-h8300/socket.h
@@ -14,6 +14,8 @@
  #define SO_BROADCAST   6
  #define SO_SNDBUF      7
  #define SO_RCVBUF      8
+#define SO_SNDBUFFORCE 32
+#define SO_RCVBUFFORCE 33
  #define SO_KEEPALIVE   9
  #define SO_OOBINLINE   10
  #define SO_NO_CHECK    11
diff --git a/include/asm-i386/checksum.h b/include/asm-i386/checksum.h

index f949e44c2a35d290655258c83cf3f0f5731e7e20..67d3630c4e8953c74773ce17aaf1bf2c197c1930 100644 (file)
--- a/include/asm-i386/checksum.h
+++ b/include/asm-i386/checksum.h
@@ -83,7 +83,7 @@ static inline unsigned short ip_fast_csum(unsigned char * iph,
             "adcl $0, %0        ;\n"
             "notl %0            ;\n"
  "2:                            ;\n"
-       /* Since the input registers which are loaded with iph and ipl
+       /* Since the input registers which are loaded with iph and ihl
            are modified, we must also specify them as outputs, or gcc
            will assume they contain their original values. */
         : "=r" (sum), "=r" (iph), "=r" (ihl)
diff --git a/include/asm-i386/mach-visws/do_timer.h b/include/asm-i386/mach-visws/do_timer.h

index 33acd50fd9a8a99745d3d582b9100eb23f22c86a..92d638fc8b11ab3354a9876d6125baecfbe4770c 100644 (file)
--- a/include/asm-i386/mach-visws/do_timer.h
+++ b/include/asm-i386/mach-visws/do_timer.h
@@ -1,6 +1,7 @@
  /* defines for inline arch setup functions */
  
  #include <asm/fixmap.h>
+#include <asm/i8259.h>
  #include "cobalt.h"
  
  static inline void do_timer_interrupt_hook(struct pt_regs *regs)
diff --git a/include/asm-i386/processor.h b/include/asm-i386/processor.h

index 5d06e6bd6ba0bff037197c5ff047d66f47faa9fd..d0d8b0160090427328fc384baed9ab77872ecb88 100644 (file)
--- a/include/asm-i386/processor.h
+++ b/include/asm-i386/processor.h
@@ -29,7 +29,7 @@ struct desc_struct {
  };
  
  #define desc_empty(desc) \
-               (!((desc)->a + (desc)->b))
+               (!((desc)->a | (desc)->b))
  
  #define desc_equal(desc1, desc2) \
                 (((desc1)->a == (desc2)->a) && ((desc1)->b == (desc2)->b))
diff --git a/include/asm-i386/socket.h b/include/asm-i386/socket.h

index 07f6b38ad140d5ace68f4d44bfeb61cf351b554f..802ae76195b72a4a09e3f9c052e96968389be694 100644 (file)
--- a/include/asm-i386/socket.h
+++ b/include/asm-i386/socket.h
@@ -14,6 +14,8 @@
  #define SO_BROADCAST   6
  #define SO_SNDBUF      7
  #define SO_RCVBUF      8
+#define SO_SNDBUFFORCE 32
+#define SO_RCVBUFFORCE 33
  #define SO_KEEPALIVE   9
  #define SO_OOBINLINE   10
  #define SO_NO_CHECK    11
diff --git a/include/asm-ia64/io.h b/include/asm-ia64/io.h

index 491e9d1fc538d0e31cbc2ecbbf15016cd6d7d194..54e7637a326c4e1ffb6aab9835abab5f384e3be0 100644 (file)
--- a/include/asm-ia64/io.h
+++ b/include/asm-ia64/io.h
@@ -120,14 +120,6 @@ static inline void ___ia64_mmiowb(void)
         ia64_mfa();
  }
  
-static inline const unsigned long
-__ia64_get_io_port_base (void)
-{
-       extern unsigned long ia64_iobase;
-
-       return ia64_iobase;
-}
-
  static inline void*
  __ia64_mk_io_addr (unsigned long port)
  {
diff --git a/include/asm-ia64/iosapic.h b/include/asm-ia64/iosapic.h

index 1093f35b3b906a32f4c9fa0a01df8ab1384a25fd..a429fe225b07b4fb85de63f623640c6b2355f23e 100644 (file)
--- a/include/asm-ia64/iosapic.h
+++ b/include/asm-ia64/iosapic.h
@@ -75,6 +75,8 @@ extern int __devinit iosapic_init (unsigned long address,
                                     unsigned int gsi_base);
  #ifdef CONFIG_HOTPLUG
  extern int iosapic_remove (unsigned int gsi_base);
+#else
+#define iosapic_remove(gsi_base)                               (-EINVAL)
  #endif /* CONFIG_HOTPLUG */
  extern int gsi_to_vector (unsigned int gsi);
  extern int gsi_to_irq (unsigned int gsi);
@@ -102,9 +104,7 @@ extern void __devinit map_iosapic_to_node (unsigned int, int);
  #else
  #define iosapic_system_init(pcat_compat)                       do { } while (0)
  #define iosapic_init(address,gsi_base)                         (-EINVAL)
-#ifdef CONFIG_HOTPLUG
  #define iosapic_remove(gsi_base)                               (-ENODEV)
-#endif /* CONFIG_HOTPLUG */
  #define iosapic_register_intr(gsi,polarity,trigger)            (gsi)
  #define iosapic_unregister_intr(irq)                           do { } while (0)
  #define iosapic_override_isa_irq(isa_irq,gsi,polarity,trigger) do { } while (0)
diff --git a/include/asm-ia64/socket.h b/include/asm-ia64/socket.h

index 21a9f10d6baafd033e96a83d7c9e1e0568b823ea..a255006fb7b51f0db085e82f547f25938ee7c84c 100644 (file)
--- a/include/asm-ia64/socket.h
+++ b/include/asm-ia64/socket.h
@@ -23,6 +23,8 @@
  #define SO_BROADCAST   6
  #define SO_SNDBUF      7
  #define SO_RCVBUF      8
+#define SO_SNDBUFFORCE 32
+#define SO_RCVBUFFORCE 33
  #define SO_KEEPALIVE   9
  #define SO_OOBINLINE   10
  #define SO_NO_CHECK    11
diff --git a/include/asm-m32r/checksum.h b/include/asm-m32r/checksum.h

index 99f37dbf2558f033df58c5fd45ac9cf0fab92833..877ebf46e9ff540299283645f986f91c8e6f7454 100644 (file)
--- a/include/asm-m32r/checksum.h
+++ b/include/asm-m32r/checksum.h
@@ -105,7 +105,7 @@ static inline unsigned short ip_fast_csum(unsigned char * iph,
                 "       addx    %0, %3 \n"
                 "       .fillinsn\n"
                 "2: \n"
-       /* Since the input registers which are loaded with iph and ipl
+       /* Since the input registers which are loaded with iph and ihl
            are modified, we must also specify them as outputs, or gcc
            will assume they contain their original values. */
         : "=&r" (sum), "=r" (iph), "=r" (ihl), "=&r" (tmpreg0), "=&r" (tmpreg1)
diff --git a/include/asm-m32r/smp.h b/include/asm-m32r/smp.h

index b9a20cdad65f4a7c4208e6407b1bc2fdd7218f0d..7885b7df84a2b5209d26bfd639f28082999f9728 100644 (file)
--- a/include/asm-m32r/smp.h
+++ b/include/asm-m32r/smp.h
@@ -61,9 +61,7 @@ extern physid_mask_t phys_cpu_present_map;
   * Some lowlevel functions might want to know about
   * the real CPU ID <-> CPU # mapping.
   */
-extern volatile int physid_2_cpu[NR_CPUS];
  extern volatile int cpu_2_physid[NR_CPUS];
-#define physid_to_cpu(physid)  physid_2_cpu[physid]
  #define cpu_to_physid(cpu_id)  cpu_2_physid[cpu_id]
  
  #define raw_smp_processor_id() (current_thread_info()->cpu)
diff --git a/include/asm-m32r/socket.h b/include/asm-m32r/socket.h

index 159519d99042176a1ec577b64243d7ebb5674a6d..8b6680f223c0b4a5f18781f36129cc1d541b68f8 100644 (file)
--- a/include/asm-m32r/socket.h
+++ b/include/asm-m32r/socket.h
@@ -14,6 +14,8 @@
  #define SO_BROADCAST   6
  #define SO_SNDBUF      7
  #define SO_RCVBUF      8
+#define SO_SNDBUFFORCE 32
+#define SO_RCVBUFFORCE 33
  #define SO_KEEPALIVE   9
  #define SO_OOBINLINE   10
  #define SO_NO_CHECK    11
diff --git a/include/asm-m68k/page.h b/include/asm-m68k/page.h

index 99a51670921089fb3a30b9a95ff8e005febe8d92..206313e2a817798a3e6eb8d77f52230d19fa59b7 100644 (file)
--- a/include/asm-m68k/page.h
+++ b/include/asm-m68k/page.h
@@ -138,13 +138,13 @@ extern unsigned long m68k_memoffset;
  #define __pa(vaddr)            ((unsigned long)(vaddr)+m68k_memoffset)
  #define __va(paddr)            ((void *)((unsigned long)(paddr)-m68k_memoffset))
  #else
-#define __pa(vaddr)            virt_to_phys((void *)vaddr)
-#define __va(paddr)            phys_to_virt((unsigned long)paddr)
+#define __pa(vaddr)            virt_to_phys((void *)(vaddr))
+#define __va(paddr)            phys_to_virt((unsigned long)(paddr))
  #endif
  
  #else  /* !CONFIG_SUN3 */
  /* This #define is a horrible hack to suppress lots of warnings. --m */
-#define __pa(x) ___pa((unsigned long)x)
+#define __pa(x) ___pa((unsigned long)(x))
  static inline unsigned long ___pa(unsigned long x)
  {
       if(x == 0)
diff --git a/include/asm-m68k/socket.h b/include/asm-m68k/socket.h

index 8d0b9fc2d07e4ea82a64504591201ad83ccd8ea8..f578ca4b776a3bfcb45b1e60781ebeb74eac785c 100644 (file)
--- a/include/asm-m68k/socket.h
+++ b/include/asm-m68k/socket.h
@@ -14,6 +14,8 @@
  #define SO_BROADCAST   6
  #define SO_SNDBUF      7
  #define SO_RCVBUF      8
+#define SO_SNDBUFFORCE 32
+#define SO_RCVBUFFORCE 33
  #define SO_KEEPALIVE   9
  #define SO_OOBINLINE   10
  #define SO_NO_CHECK    11
diff --git a/include/asm-mips/socket.h b/include/asm-mips/socket.h

index 020b4db70ee57f04844e6300a1e3e88c31b0f4cd..d478a86294ee8344fa8dd6e236589461ec8c4a4b 100644 (file)
--- a/include/asm-mips/socket.h
+++ b/include/asm-mips/socket.h
@@ -37,6 +37,8 @@ To add: #define SO_REUSEPORT 0x0200   /* Allow local address and port reuse.  */
  #define SO_ERROR       0x1007  /* get error status and clear */
  #define SO_SNDBUF      0x1001  /* Send buffer size. */
  #define SO_RCVBUF      0x1002  /* Receive buffer. */
+#define SO_SNDBUFFORCE 0x100a
+#define SO_RCVBUFFORCE 0x100b
  #define SO_SNDLOWAT    0x1003  /* send low-water mark */
  #define SO_RCVLOWAT    0x1004  /* receive low-water mark */
  #define SO_SNDTIMEO    0x1005  /* send timeout */
diff --git a/include/asm-parisc/pci.h b/include/asm-parisc/pci.h

index ee741c150176a9f9fab30d503f5fcfd521012e92..98d79a3d54fa9e5056fdae209a8dd9bba7c75668 100644 (file)
--- a/include/asm-parisc/pci.h
+++ b/include/asm-parisc/pci.h
@@ -253,6 +253,10 @@ extern void
  pcibios_resource_to_bus(struct pci_dev *dev, struct pci_bus_region *region,
                          struct resource *res);
  
+extern void
+pcibios_bus_to_resource(struct pci_dev *dev, struct resource *res,
+                       struct pci_bus_region *region);
+
  static inline void pcibios_add_platform_entries(struct pci_dev *dev)
  {
  }
diff --git a/include/asm-parisc/socket.h b/include/asm-parisc/socket.h

index 4a77996c1862c53c773d334f8c27646ad4f9cbdc..1bf54dc53c101337200deeaafa7adcf27daf97ff 100644 (file)
--- a/include/asm-parisc/socket.h
+++ b/include/asm-parisc/socket.h
@@ -16,6 +16,8 @@
  /* To add :#define SO_REUSEPORT 0x0200 */
  #define SO_SNDBUF      0x1001
  #define SO_RCVBUF      0x1002
+#define SO_SNDBUFFORCE 0x100a
+#define SO_RCVBUFFORCE 0x100b
  #define SO_SNDLOWAT    0x1003
  #define SO_RCVLOWAT    0x1004
  #define SO_SNDTIMEO    0x1005
diff --git a/include/asm-ppc64/8253pit.h b/include/asm-powerpc/8253pit.h

similarity index 74%

rename from include/asm-ppc64/8253pit.h

rename to include/asm-powerpc/8253pit.h

index 285f78488ccb76f988a020e1730fbbefc251cefb..862708a749b0fb1383c1d83551fc09ae26e5a2cd 100644 (file)
--- a/include/asm-ppc64/8253pit.h
+++ b/include/asm-powerpc/8253pit.h
@@ -5,6 +5,6 @@
  #ifndef _8253PIT_H
  #define _8253PIT_H
  
-#define PIT_TICK_RATE  1193182UL
+#define PIT_TICK_RATE  1193182UL
  
  #endif
diff --git a/include/asm-ppc/agp.h b/include/asm-powerpc/agp.h

similarity index 100%

rename from include/asm-ppc/agp.h

rename to include/asm-powerpc/agp.h
diff --git a/include/asm-powerpc/cputime.h b/include/asm-powerpc/cputime.h

new file mode 100644 (file)

index 0000000..6d68ad7
--- /dev/null
+++ b/include/asm-powerpc/cputime.h
@@ -0,0 +1 @@
+#include <asm-generic/cputime.h>
diff --git a/include/asm-ppc/div64.h b/include/asm-powerpc/div64.h

similarity index 100%

rename from include/asm-ppc/div64.h

rename to include/asm-powerpc/div64.h
diff --git a/include/asm-powerpc/emergency-restart.h b/include/asm-powerpc/emergency-restart.h

new file mode 100644 (file)

index 0000000..3711bd9
--- /dev/null
+++ b/include/asm-powerpc/emergency-restart.h
@@ -0,0 +1 @@
+#include <asm-generic/emergency-restart.h>
diff --git a/include/asm-ppc/errno.h b/include/asm-powerpc/errno.h

similarity index 100%

rename from include/asm-ppc/errno.h

rename to include/asm-powerpc/errno.h
diff --git a/include/asm-ppc/ioctl.h b/include/asm-powerpc/ioctl.h

similarity index 100%

rename from include/asm-ppc/ioctl.h

rename to include/asm-powerpc/ioctl.h
diff --git a/include/asm-ppc/ioctls.h b/include/asm-powerpc/ioctls.h

similarity index 100%

rename from include/asm-ppc/ioctls.h

rename to include/asm-powerpc/ioctls.h
diff --git a/include/asm-ppc/ipc.h b/include/asm-powerpc/ipc.h

similarity index 100%

rename from include/asm-ppc/ipc.h

rename to include/asm-powerpc/ipc.h
diff --git a/include/asm-ppc/linkage.h b/include/asm-powerpc/linkage.h

similarity index 100%

rename from include/asm-ppc/linkage.h

rename to include/asm-powerpc/linkage.h
diff --git a/include/asm-ppc64/local.h b/include/asm-powerpc/local.h

similarity index 100%

rename from include/asm-ppc64/local.h

rename to include/asm-powerpc/local.h
diff --git a/include/asm-ppc/namei.h b/include/asm-powerpc/namei.h

similarity index 100%

rename from include/asm-ppc/namei.h

rename to include/asm-powerpc/namei.h
diff --git a/include/asm-powerpc/percpu.h b/include/asm-powerpc/percpu.h

new file mode 100644 (file)

index 0000000..06a959d
--- /dev/null
+++ b/include/asm-powerpc/percpu.h
@@ -0,0 +1 @@
+#include <asm-generic/percpu.h>
diff --git a/include/asm-ppc/poll.h b/include/asm-powerpc/poll.h

similarity index 100%

rename from include/asm-ppc/poll.h

rename to include/asm-powerpc/poll.h
diff --git a/include/asm-powerpc/resource.h b/include/asm-powerpc/resource.h

new file mode 100644 (file)

index 0000000..04bc4db
--- /dev/null
+++ b/include/asm-powerpc/resource.h
@@ -0,0 +1 @@
+#include <asm-generic/resource.h>
diff --git a/include/asm-ppc/shmparam.h b/include/asm-powerpc/shmparam.h

similarity index 100%

rename from include/asm-ppc/shmparam.h

rename to include/asm-powerpc/shmparam.h
diff --git a/include/asm-ppc/string.h b/include/asm-powerpc/string.h

similarity index 100%

rename from include/asm-ppc/string.h

rename to include/asm-powerpc/string.h
diff --git a/include/asm-ppc/unaligned.h b/include/asm-powerpc/unaligned.h

similarity index 100%

rename from include/asm-ppc/unaligned.h

rename to include/asm-powerpc/unaligned.h
diff --git a/include/asm-ppc/xor.h b/include/asm-powerpc/xor.h

similarity index 100%

rename from include/asm-ppc/xor.h

rename to include/asm-powerpc/xor.h
diff --git a/include/asm-ppc/8253pit.h b/include/asm-ppc/8253pit.h

deleted file mode 100644 (file)

index 285f784..0000000
--- a/include/asm-ppc/8253pit.h
+++ /dev/null
@@ -1,10 +0,0 @@
-/*
- * 8253/8254 Programmable Interval Timer
- */
-
-#ifndef _8253PIT_H
-#define _8253PIT_H
-
-#define PIT_TICK_RATE  1193182UL
-
-#endif
diff --git a/include/asm-ppc/cputime.h b/include/asm-ppc/cputime.h

deleted file mode 100644 (file)

index 8e9faf5..0000000
--- a/include/asm-ppc/cputime.h
+++ /dev/null
@@ -1,6 +0,0 @@
-#ifndef __PPC_CPUTIME_H
-#define __PPC_CPUTIME_H
-
-#include <asm-generic/cputime.h>
-
-#endif /* __PPC_CPUTIME_H */
diff --git a/include/asm-ppc/emergency-restart.h b/include/asm-ppc/emergency-restart.h

deleted file mode 100644 (file)

index 108d8c4..0000000
--- a/include/asm-ppc/emergency-restart.h
+++ /dev/null
@@ -1,6 +0,0 @@
-#ifndef _ASM_EMERGENCY_RESTART_H
-#define _ASM_EMERGENCY_RESTART_H
-
-#include <asm-generic/emergency-restart.h>
-
-#endif /* _ASM_EMERGENCY_RESTART_H */
diff --git a/include/asm-ppc/hdreg.h b/include/asm-ppc/hdreg.h

deleted file mode 100644 (file)

index 7f7fd1a..0000000
--- a/include/asm-ppc/hdreg.h
+++ /dev/null
@@ -1 +0,0 @@
-#include <asm-generic/hdreg.h>
diff --git a/include/asm-ppc/ibm44x.h b/include/asm-ppc/ibm44x.h

index 87f051138b9db550911a2ad933662b2cada44591..e5374be86aefb90a39e608ef09ed002eab7decfe 100644 (file)
--- a/include/asm-ppc/ibm44x.h
+++ b/include/asm-ppc/ibm44x.h
@@ -35,8 +35,10 @@
  #define PPC44x_LOW_SLOT                63
  
  /* LS 32-bits of UART0 physical address location for early serial text debug */
-#ifdef CONFIG_440SP
+#if defined(CONFIG_440SP)
  #define UART0_PHYS_IO_BASE     0xf0000200
+#elif defined(CONFIG_440EP)
+#define UART0_PHYS_IO_BASE     0xe0000000
  #else
  #define UART0_PHYS_IO_BASE     0x40000200
  #endif
@@ -49,11 +51,16 @@
  /*
   * Standard 4GB "page" definitions
   */
-#ifdef CONFIG_440SP
+#if defined(CONFIG_440SP)
  #define        PPC44x_IO_PAGE          0x0000000100000000ULL
  #define        PPC44x_PCICFG_PAGE      0x0000000900000000ULL
  #define        PPC44x_PCIIO_PAGE       PPC44x_PCICFG_PAGE
  #define        PPC44x_PCIMEM_PAGE      0x0000000a00000000ULL
+#elif defined(CONFIG_440EP)
+#define PPC44x_IO_PAGE         0x0000000000000000ULL
+#define PPC44x_PCICFG_PAGE     0x0000000000000000ULL
+#define PPC44x_PCIIO_PAGE      PPC44x_PCICFG_PAGE
+#define PPC44x_PCIMEM_PAGE     0x0000000000000000ULL
  #else
  #define        PPC44x_IO_PAGE          0x0000000100000000ULL
  #define        PPC44x_PCICFG_PAGE      0x0000000200000000ULL
@@ -64,7 +71,7 @@
  /*
   * 36-bit trap ranges
   */
-#ifdef CONFIG_440SP
+#if defined(CONFIG_440SP)
  #define PPC44x_IO_LO           0xf0000000UL
  #define PPC44x_IO_HI           0xf0000fffUL
  #define PPC44x_PCI0CFG_LO      0x0ec00000UL
@@ -75,6 +82,13 @@
  #define PPC44x_PCI2CFG_HI      0x2ec00007UL
  #define PPC44x_PCIMEM_LO       0x80000000UL
  #define PPC44x_PCIMEM_HI       0xdfffffffUL
+#elif defined(CONFIG_440EP)
+#define PPC44x_IO_LO           0xef500000UL
+#define PPC44x_IO_HI           0xefffffffUL
+#define PPC44x_PCI0CFG_LO      0xeec00000UL
+#define PPC44x_PCI0CFG_HI      0xeecfffffUL
+#define PPC44x_PCIMEM_LO       0xa0000000UL
+#define PPC44x_PCIMEM_HI       0xdfffffffUL
  #else
  #define PPC44x_IO_LO           0x40000000UL
  #define PPC44x_IO_HI           0x40000fffUL
@@ -152,6 +166,12 @@
  #define DCRN_SDR_UART0         0x0120
  #define DCRN_SDR_UART1         0x0121
  
+#ifdef CONFIG_440EP
+#define DCRN_SDR_UART2         0x0122
+#define DCRN_SDR_UART3         0x0123
+#define DCRN_SDR_CUST0         0x4000
+#endif
+
  /* SDR read/write helper macros */
  #define SDR_READ(offset) ({\
         mtdcr(DCRN_SDR_CONFIG_ADDR, offset); \
@@ -169,6 +189,14 @@
  #define DCRNCAP_DMA_SG         1       /* have DMA scatter/gather capability */
  #define DCRN_MAL_BASE          0x180
  
+#ifdef CONFIG_440EP
+#define DCRN_DMA2P40_BASE      0x300
+#define DCRN_DMA2P41_BASE      0x308
+#define DCRN_DMA2P42_BASE      0x310
+#define DCRN_DMA2P43_BASE      0x318
+#define DCRN_DMA2P4SR_BASE     0x320
+#endif
+
  /* UIC */
  #define DCRN_UIC0_BASE 0xc0
  #define DCRN_UIC1_BASE 0xd0
@@ -395,11 +423,7 @@
  #define MQ0_CONFIG_SIZE_2G             0x0000c000
  
  /* Internal SRAM Controller 440GX/440SP */
-#ifdef CONFIG_440SP
-#define DCRN_SRAM0_BASE                0x100
-#else /* 440GX */
  #define DCRN_SRAM0_BASE                0x000
-#endif
  
  #define DCRN_SRAM0_SB0CR       (DCRN_SRAM0_BASE + 0x020)
  #define DCRN_SRAM0_SB1CR       (DCRN_SRAM0_BASE + 0x021)
diff --git a/include/asm-ppc/ibm4xx.h b/include/asm-ppc/ibm4xx.h

index 35260afa33a9bde50a18c3b9c560a203a8e05237..e807be96e9814761b9033d55da6332320c287e31 100644 (file)
--- a/include/asm-ppc/ibm4xx.h
+++ b/include/asm-ppc/ibm4xx.h
@@ -97,6 +97,10 @@ void ppc4xx_init(unsigned long r3, unsigned long r4, unsigned long r5,
  
  #elif CONFIG_44x
  
+#if defined(CONFIG_BAMBOO)
+#include <platforms/4xx/bamboo.h>
+#endif
+
  #if defined(CONFIG_EBONY)
  #include <platforms/4xx/ebony.h>
  #endif
diff --git a/include/asm-ppc/ibm_ocp.h b/include/asm-ppc/ibm_ocp.h

index 8c61d93043affa876f5eff0ec500cd8cc0a0b7f7..3f7b5669e6d52c852727e4c7791dd78d89a16abf 100644 (file)
--- a/include/asm-ppc/ibm_ocp.h
+++ b/include/asm-ppc/ibm_ocp.h
@@ -71,6 +71,8 @@ struct ocp_func_emac_data {
  
  /* Sysfs support */
  #define OCP_SYSFS_EMAC_DATA()                                          \
+OCP_SYSFS_ADDTL(struct ocp_func_emac_data, "%d\n", emac, rgmii_idx)    \
+OCP_SYSFS_ADDTL(struct ocp_func_emac_data, "%d\n", emac, rgmii_mux)    \
  OCP_SYSFS_ADDTL(struct ocp_func_emac_data, "%d\n", emac, zmii_idx)     \
  OCP_SYSFS_ADDTL(struct ocp_func_emac_data, "%d\n", emac, zmii_mux)     \
  OCP_SYSFS_ADDTL(struct ocp_func_emac_data, "%d\n", emac, mal_idx)      \
@@ -78,9 +80,14 @@ OCP_SYSFS_ADDTL(struct ocp_func_emac_data, "%d\n", emac, mal_rx_chan)        \
  OCP_SYSFS_ADDTL(struct ocp_func_emac_data, "%d\n", emac, mal_tx_chan)  \
  OCP_SYSFS_ADDTL(struct ocp_func_emac_data, "%d\n", emac, wol_irq)      \
  OCP_SYSFS_ADDTL(struct ocp_func_emac_data, "%d\n", emac, mdio_idx)     \
+OCP_SYSFS_ADDTL(struct ocp_func_emac_data, "%d\n", emac, tah_idx)      \
+OCP_SYSFS_ADDTL(struct ocp_func_emac_data, "%d\n", emac, phy_mode)     \
+OCP_SYSFS_ADDTL(struct ocp_func_emac_data, "0x%08x\n", emac, phy_map)  \
                                                                         \
  void ocp_show_emac_data(struct device *dev)                            \
  {                                                                      \
+       device_create_file(dev, &dev_attr_emac_rgmii_idx);              \
+       device_create_file(dev, &dev_attr_emac_rgmii_mux);              \
         device_create_file(dev, &dev_attr_emac_zmii_idx);               \
         device_create_file(dev, &dev_attr_emac_zmii_mux);               \
         device_create_file(dev, &dev_attr_emac_mal_idx);                \
@@ -88,6 +95,9 @@ void ocp_show_emac_data(struct device *dev)                           \
         device_create_file(dev, &dev_attr_emac_mal_tx_chan);            \
         device_create_file(dev, &dev_attr_emac_wol_irq);                \
         device_create_file(dev, &dev_attr_emac_mdio_idx);               \
+       device_create_file(dev, &dev_attr_emac_tah_idx);                \
+       device_create_file(dev, &dev_attr_emac_phy_mode);               \
+       device_create_file(dev, &dev_attr_emac_phy_map);                \
  }
  
  #ifdef CONFIG_40x
@@ -157,7 +167,7 @@ OCP_SYSFS_ADDTL(struct ocp_func_iic_data, "%d\n", iic, fast_mode)   \
                                                                         \
  void ocp_show_iic_data(struct device *dev)                             \
  {                                                                      \
-       device_create_file(dev, &dev_attr_iic_fast_mode);                       \
+       device_create_file(dev, &dev_attr_iic_fast_mode);               \
  }
  #endif /* __IBM_OCP_H__ */
  #endif /* __KERNEL__ */
diff --git a/include/asm-ppc/local.h b/include/asm-ppc/local.h

deleted file mode 100644 (file)

index b08e3ec..0000000
--- a/include/asm-ppc/local.h
+++ /dev/null
@@ -1,6 +0,0 @@
-#ifndef __PPC_LOCAL_H
-#define __PPC_LOCAL_H
-
-#include <asm-generic/local.h>
-
-#endif /* __PPC_LOCAL_H */
diff --git a/include/asm-ppc/pci.h b/include/asm-ppc/pci.h

index a13d55870e62202b9045e80ac9e8b9ab7c2bb25a..a811e440c97809272e0f9ab4a4f439249eeb66bf 100644 (file)
--- a/include/asm-ppc/pci.h
+++ b/include/asm-ppc/pci.h
@@ -105,6 +105,10 @@ extern void
  pcibios_resource_to_bus(struct pci_dev *dev, struct pci_bus_region *region,
                         struct resource *res);
  
+extern void
+pcibios_bus_to_resource(struct pci_dev *dev, struct resource *res,
+                       struct pci_bus_region *region);
+
  extern void pcibios_add_platform_entries(struct pci_dev *dev);
  
  struct file;
diff --git a/include/asm-ppc/percpu.h b/include/asm-ppc/percpu.h

deleted file mode 100644 (file)

index d66667c..0000000
--- a/include/asm-ppc/percpu.h
+++ /dev/null
@@ -1,6 +0,0 @@
-#ifndef __ARCH_PPC_PERCPU__
-#define __ARCH_PPC_PERCPU__
-
-#include <asm-generic/percpu.h>
-
-#endif /* __ARCH_PPC_PERCPU__ */
diff --git a/include/asm-ppc/pgtable.h b/include/asm-ppc/pgtable.h

index 4d4b20c9de78fbf31097ea3cb5e098b352cf73f7..92f30b28b252e1ab38c78d3727d99e2317dcefd5 100644 (file)
--- a/include/asm-ppc/pgtable.h
+++ b/include/asm-ppc/pgtable.h
@@ -202,18 +202,64 @@ extern unsigned long ioremap_bot, ioremap_base;
   *
   * Note that these bits preclude future use of a page size
   * less than 4KB.
+ *
+ *
+ * PPC 440 core has following TLB attribute fields;
+ *
+ *   TLB1:
+ *   0  1  2  3  4  ... 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31
+ *   RPN.................................  -  -  -  -  -  - ERPN.......
+ *
+ *   TLB2:
+ *   0  1  2  3  4  ... 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31
+ *   -  -  -  -  -    - U0 U1 U2 U3 W  I  M  G  E   - UX UW UR SX SW SR
+ *
+ * There are some constrains and options, to decide mapping software bits
+ * into TLB entry.
+ *
+ *   - PRESENT *must* be in the bottom three bits because swap cache
+ *     entries use the top 29 bits for TLB2.
+ *
+ *   - FILE *must* be in the bottom three bits because swap cache
+ *     entries use the top 29 bits for TLB2.
+ *
+ *   - CACHE COHERENT bit (M) has no effect on PPC440 core, because it
+ *     doesn't support SMP. So we can use this as software bit, like
+ *     DIRTY.
+ *
+ * With the PPC 44x Linux implementation, the 0-11th LSBs of the PTE are used
+ * for memory protection related functions (see PTE structure in
+ * include/asm-ppc/mmu.h).  The _PAGE_XXX definitions in this file map to the
+ * above bits.  Note that the bit values are CPU specific, not architecture
+ * specific.
+ *
+ * The kernel PTE entry holds an arch-dependent swp_entry structure under
+ * certain situations. In other words, in such situations some portion of
+ * the PTE bits are used as a swp_entry. In the PPC implementation, the
+ * 3-24th LSB are shared with swp_entry, however the 0-2nd three LSB still
+ * hold protection values. That means the three protection bits are
+ * reserved for both PTE and SWAP entry at the most significant three
+ * LSBs.
+ *
+ * There are three protection bits available for SWAP entry:
+ *     _PAGE_PRESENT
+ *     _PAGE_FILE
+ *     _PAGE_HASHPTE (if HW has)
+ *
+ * So those three bits have to be inside of 0-2nd LSB of PTE.
+ *
   */
+
  #define _PAGE_PRESENT  0x00000001              /* S: PTE valid */
  #define        _PAGE_RW        0x00000002              /* S: Write permission */
-#define        _PAGE_DIRTY     0x00000004              /* S: Page dirty */
+#define _PAGE_FILE     0x00000004              /* S: nonlinear file mapping */
  #define _PAGE_ACCESSED 0x00000008              /* S: Page referenced */
  #define _PAGE_HWWRITE  0x00000010              /* H: Dirty & RW */
  #define _PAGE_HWEXEC   0x00000020              /* H: Execute permission */
  #define        _PAGE_USER      0x00000040              /* S: User page */
  #define        _PAGE_ENDIAN    0x00000080              /* H: E bit */
  #define        _PAGE_GUARDED   0x00000100              /* H: G bit */
-#define        _PAGE_COHERENT  0x00000200              /* H: M bit */
-#define _PAGE_FILE     0x00000400              /* S: nonlinear file mapping */
+#define        _PAGE_DIRTY     0x00000200              /* S: Page dirty */
  #define        _PAGE_NO_CACHE  0x00000400              /* H: I bit */
  #define        _PAGE_WRITETHRU 0x00000800              /* H: W bit */
  
diff --git a/include/asm-ppc/ppc4xx_dma.h b/include/asm-ppc/ppc4xx_dma.h

index 8636cdbf6f8f36428b49346d86e8604a2481d314..a415001165fabd74f1d53f3c1c5dcbc07674ef7c 100644 (file)
--- a/include/asm-ppc/ppc4xx_dma.h
+++ b/include/asm-ppc/ppc4xx_dma.h
@@ -285,7 +285,7 @@ typedef uint32_t sgl_handle_t;
  
  #define GET_DMA_POLARITY(chan) (DMAReq_ActiveLow(chan) | DMAAck_ActiveLow(chan) | EOT_ActiveLow(chan))
  
-#elif defined(CONFIG_STBXXX_DMA)               /* stb03xxx */
+#elif defined(CONFIG_STB03xxx)         /* stb03xxx */
  
  #define DMA_PPC4xx_SIZE        4096
  
diff --git a/include/asm-ppc/ppc_asm.h b/include/asm-ppc/ppc_asm.h

index f76221def484d3f8bffa45b0f9a71f9ee88a9cd4..bb53e2def363df54400d8cedfbfdb2d56addf23c 100644 (file)
--- a/include/asm-ppc/ppc_asm.h
+++ b/include/asm-ppc/ppc_asm.h
@@ -186,6 +186,12 @@ END_FTR_SECTION_IFCLR(CPU_FTR_601)
  #define PPC405_ERR77_SYNC
  #endif
  
+#ifdef CONFIG_IBM440EP_ERR42
+#define PPC440EP_ERR42 isync
+#else
+#define PPC440EP_ERR42
+#endif
+
  /* The boring bits... */
  
  /* Condition Register Bit Fields */
diff --git a/include/asm-ppc/resource.h b/include/asm-ppc/resource.h

deleted file mode 100644 (file)

index 86a1ea2..0000000
--- a/include/asm-ppc/resource.h
+++ /dev/null
@@ -1,6 +0,0 @@
-#ifndef _PPC_RESOURCE_H
-#define _PPC_RESOURCE_H
-
-#include <asm-generic/resource.h>
-
-#endif
diff --git a/include/asm-ppc/socket.h b/include/asm-ppc/socket.h

index 4134376b0f66c68bc55b6e9291b87c2f812384ec..296e1a3469d0b07f0a40b01b5acee0b696a1daf3 100644 (file)
--- a/include/asm-ppc/socket.h
+++ b/include/asm-ppc/socket.h
@@ -20,6 +20,8 @@
  #define SO_BROADCAST   6
  #define SO_SNDBUF      7
  #define SO_RCVBUF      8
+#define SO_SNDBUFFORCE 32
+#define SO_RCVBUFFORCE 33
  #define SO_KEEPALIVE   9
  #define SO_OOBINLINE   10
  #define SO_NO_CHECK    11
diff --git a/include/asm-ppc/time.h b/include/asm-ppc/time.h

index ce09b47fa819c64333e1fd3a576d93714cbffe0c..321fb75b5f222f7a20ca3841fedf5b040ba823cf 100644 (file)
--- a/include/asm-ppc/time.h
+++ b/include/asm-ppc/time.h
@@ -58,7 +58,7 @@ static __inline__ void set_dec(unsigned int val)
  /* Accessor functions for the timebase (RTC on 601) registers. */
  /* If one day CONFIG_POWER is added just define __USE_RTC as 1 */
  #ifdef CONFIG_6xx
-extern __inline__ int const __USE_RTC(void) {
+extern __inline__ int __attribute_pure__ __USE_RTC(void) {
         return (mfspr(SPRN_PVR)>>16) == 1;
  }
  #else
diff --git a/include/asm-ppc64/abs_addr.h b/include/asm-ppc64/abs_addr.h

index 6d4e8e787058e8253a66ee5265270abcd5b92740..84c24d4cdb71a1e5234a3abec11308468b96528f 100644 (file)
--- a/include/asm-ppc64/abs_addr.h
+++ b/include/asm-ppc64/abs_addr.h
@@ -16,93 +16,51 @@
  #include <asm/page.h>
  #include <asm/prom.h>
  #include <asm/lmb.h>
+#include <asm/firmware.h>
  
-typedef u32 msChunks_entry;
-struct msChunks {
+struct mschunks_map {
          unsigned long num_chunks;
          unsigned long chunk_size;
          unsigned long chunk_shift;
          unsigned long chunk_mask;
-        msChunks_entry *abs;
+        u32 *mapping;
  };
  
-extern struct msChunks msChunks;
+extern struct mschunks_map mschunks_map;
  
-extern unsigned long msChunks_alloc(unsigned long, unsigned long, unsigned long);
-extern unsigned long reloc_offset(void);
+/* Chunks are 256 KB */
+#define MSCHUNKS_CHUNK_SHIFT   (18)
+#define MSCHUNKS_CHUNK_SIZE    (1UL << MSCHUNKS_CHUNK_SHIFT)
+#define MSCHUNKS_OFFSET_MASK   (MSCHUNKS_CHUNK_SIZE - 1)
  
-#ifdef CONFIG_MSCHUNKS
-
-static inline unsigned long
-chunk_to_addr(unsigned long chunk)
+static inline unsigned long chunk_to_addr(unsigned long chunk)
  {
-       unsigned long offset = reloc_offset();
-       struct msChunks *_msChunks = PTRRELOC(&msChunks);
-
-       return chunk << _msChunks->chunk_shift;
+       return chunk << MSCHUNKS_CHUNK_SHIFT;
  }
  
-static inline unsigned long
-addr_to_chunk(unsigned long addr)
+static inline unsigned long addr_to_chunk(unsigned long addr)
  {
-       unsigned long offset = reloc_offset();
-       struct msChunks *_msChunks = PTRRELOC(&msChunks);
-
-       return addr >> _msChunks->chunk_shift;
+       return addr >> MSCHUNKS_CHUNK_SHIFT;
  }
  
-static inline unsigned long
-chunk_offset(unsigned long addr)
+static inline unsigned long phys_to_abs(unsigned long pa)
  {
-       unsigned long offset = reloc_offset();
-       struct msChunks *_msChunks = PTRRELOC(&msChunks);
+       unsigned long chunk;
  
-       return addr & _msChunks->chunk_mask;
-}
+       /* This is a no-op on non-iSeries */
+       if (!firmware_has_feature(FW_FEATURE_ISERIES))
+               return pa;
  
-static inline unsigned long
-abs_chunk(unsigned long pchunk)
-{
-       unsigned long offset = reloc_offset();
-       struct msChunks *_msChunks = PTRRELOC(&msChunks);
-       if ( pchunk >= _msChunks->num_chunks ) {
-               return pchunk;
-       }
-       return PTRRELOC(_msChunks->abs)[pchunk];
-}
+       chunk = addr_to_chunk(pa);
  
-/* A macro so it can take pointers or unsigned long. */
-#define phys_to_abs(pa)                                                     \
-       ({ unsigned long _pa = (unsigned long)(pa);                          \
-          chunk_to_addr(abs_chunk(addr_to_chunk(_pa))) + chunk_offset(_pa); \
-       })
+       if (chunk < mschunks_map.num_chunks)
+               chunk = mschunks_map.mapping[chunk];
  
-static inline unsigned long
-physRpn_to_absRpn(unsigned long rpn)
-{
-       unsigned long pa = rpn << PAGE_SHIFT;
-       unsigned long aa = phys_to_abs(pa);
-       return (aa >> PAGE_SHIFT);
+       return chunk_to_addr(chunk) + (pa & MSCHUNKS_OFFSET_MASK);
  }
  
-/* A macro so it can take pointers or unsigned long. */
-#define abs_to_phys(aa) lmb_abs_to_phys((unsigned long)(aa))
-
-#else  /* !CONFIG_MSCHUNKS */
-
-#define chunk_to_addr(chunk) ((unsigned long)(chunk))
-#define addr_to_chunk(addr) (addr)
-#define chunk_offset(addr) (0)
-#define abs_chunk(pchunk) (pchunk)
-
-#define phys_to_abs(pa) (pa)
-#define physRpn_to_absRpn(rpn) (rpn)
-#define abs_to_phys(aa) (aa)
-
-#endif /* !CONFIG_MSCHUNKS */
-
  /* Convenience macros */
  #define virt_to_abs(va) phys_to_abs(__pa(va))
-#define abs_to_virt(aa) __va(abs_to_phys(aa))
+#define abs_to_virt(aa) __va(aa)
  
  #endif /* _ABS_ADDR_H */
diff --git a/include/asm-ppc64/agp.h b/include/asm-ppc64/agp.h

deleted file mode 100644 (file)

index ca9e423..0000000
--- a/include/asm-ppc64/agp.h
+++ /dev/null
@@ -1,23 +0,0 @@
-#ifndef AGP_H
-#define AGP_H 1
-
-#include <asm/io.h>
-
-/* nothing much needed here */
-
-#define map_page_into_agp(page)
-#define unmap_page_from_agp(page)
-#define flush_agp_mappings()
-#define flush_agp_cache() mb()
-
-/* Convert a physical address to an address suitable for the GART. */
-#define phys_to_gart(x) (x)
-#define gart_to_phys(x) (x)
-
-/* GATT allocation. Returns/accepts GATT kernel virtual address. */
-#define alloc_gatt_pages(order)                \
-       ((char *)__get_free_pages(GFP_KERNEL, (order)))
-#define free_gatt_pages(table, order)  \
-       free_pages((unsigned long)(table), (order))
-
-#endif
diff --git a/include/asm-ppc64/bug.h b/include/asm-ppc64/bug.h

index 169868fa307defa30c7782cea4b96b2ab278ac54..1601782788610748de11a46dd6cfa288f3dc7a5e 100644 (file)
--- a/include/asm-ppc64/bug.h
+++ b/include/asm-ppc64/bug.h
@@ -43,8 +43,8 @@ struct bug_entry *find_bug(unsigned long bugaddr);
                 ".section __bug_table,\"a\"\n\t"                \
                 "       .llong 1b,%1,%2,%3\n"                   \
                 ".previous"                                     \
-               : : "r" (x), "i" (__LINE__), "i" (__FILE__),    \
-                   "i" (__FUNCTION__));                        \
+               : : "r" ((long long)(x)), "i" (__LINE__),       \
+                   "i" (__FILE__), "i" (__FUNCTION__));        \
  } while (0)
  
  #define WARN_ON(x) do {                                                \
@@ -53,7 +53,8 @@ struct bug_entry *find_bug(unsigned long bugaddr);
                 ".section __bug_table,\"a\"\n\t"                \
                 "       .llong 1b,%1,%2,%3\n"                   \
                 ".previous"                                     \
-               : : "r" (x), "i" (__LINE__ + BUG_WARNING_TRAP), \
+               : : "r" ((long long)(x)),                       \
+                   "i" (__LINE__ + BUG_WARNING_TRAP),          \
                     "i" (__FILE__), "i" (__FUNCTION__));        \
  } while (0)
  
diff --git a/include/asm-ppc64/cputable.h b/include/asm-ppc64/cputable.h

index d67fa9e2607908477b8517df6a5ffe8bb9934337..ae6cf3830108c0d59e22a637378e9ed100ddbdb2 100644 (file)
--- a/include/asm-ppc64/cputable.h
+++ b/include/asm-ppc64/cputable.h
@@ -56,11 +56,6 @@ struct cpu_spec {
          * BHT, SPD, etc... from head.S before branching to identify_machine
          */
         cpu_setup_t     cpu_setup;
-
-       /* This is used to identify firmware features which are available
-        * to the kernel.
-        */
-       unsigned long   firmware_features;
  };
  
  extern struct cpu_spec         cpu_specs[];
@@ -71,39 +66,6 @@ static inline unsigned long cpu_has_feature(unsigned long feature)
         return cur_cpu_spec->cpu_features & feature;
  }
  
-
-/* firmware feature bitmask values */
-#define FIRMWARE_MAX_FEATURES 63
-
-#define FW_FEATURE_PFT         (1UL<<0)
-#define FW_FEATURE_TCE         (1UL<<1)        
-#define FW_FEATURE_SPRG0       (1UL<<2)        
-#define FW_FEATURE_DABR                (1UL<<3)        
-#define FW_FEATURE_COPY                (1UL<<4)        
-#define FW_FEATURE_ASR         (1UL<<5)        
-#define FW_FEATURE_DEBUG       (1UL<<6)        
-#define FW_FEATURE_TERM                (1UL<<7)        
-#define FW_FEATURE_PERF                (1UL<<8)        
-#define FW_FEATURE_DUMP                (1UL<<9)        
-#define FW_FEATURE_INTERRUPT   (1UL<<10)       
-#define FW_FEATURE_MIGRATE     (1UL<<11)       
-#define FW_FEATURE_PERFMON     (1UL<<12)       
-#define FW_FEATURE_CRQ         (1UL<<13)       
-#define FW_FEATURE_VIO         (1UL<<14)       
-#define FW_FEATURE_RDMA        (1UL<<15)       
-#define FW_FEATURE_LLAN        (1UL<<16)       
-#define FW_FEATURE_BULK        (1UL<<17)       
-#define FW_FEATURE_XDABR       (1UL<<18)       
-#define FW_FEATURE_MULTITCE    (1UL<<19)       
-#define FW_FEATURE_SPLPAR      (1UL<<20)       
-
-typedef struct {
-    unsigned long val;
-    char * name;
-} firmware_feature_t;
-
-extern firmware_feature_t firmware_features_table[];
-
  #endif /* __ASSEMBLY__ */
  
  /* CPU kernel features */
@@ -140,10 +102,8 @@ extern firmware_feature_t firmware_features_table[];
  #define CPU_FTR_MMCRA_SIHV             ASM_CONST(0x0000080000000000)
  #define CPU_FTR_CTRL                   ASM_CONST(0x0000100000000000)
  
-/* Platform firmware features */
-#define FW_FTR_                                ASM_CONST(0x0000000000000001)
-
  #ifndef __ASSEMBLY__
+
  #define COMMON_USER_PPC64      (PPC_FEATURE_32 | PPC_FEATURE_64 | \
                                  PPC_FEATURE_HAS_FPU | PPC_FEATURE_HAS_MMU)
  
@@ -156,10 +116,9 @@ extern firmware_feature_t firmware_features_table[];
  #define CPU_FTR_PPCAS_ARCH_V2  (CPU_FTR_PPCAS_ARCH_V2_BASE)
  #else
  #define CPU_FTR_PPCAS_ARCH_V2  (CPU_FTR_PPCAS_ARCH_V2_BASE | CPU_FTR_16M_PAGE)
-#endif
+#endif /* CONFIG_PPC_ISERIES */
  
-#define COMMON_PPC64_FW        (0)
-#endif
+#endif /* __ASSEMBLY */
  
  #ifdef __ASSEMBLY__
  
diff --git a/include/asm-ppc64/cputime.h b/include/asm-ppc64/cputime.h

deleted file mode 100644 (file)

index 8e9faf5..0000000
--- a/include/asm-ppc64/cputime.h
+++ /dev/null
@@ -1,6 +0,0 @@
-#ifndef __PPC_CPUTIME_H
-#define __PPC_CPUTIME_H
-
-#include <asm-generic/cputime.h>
-
-#endif /* __PPC_CPUTIME_H */
diff --git a/include/asm-ppc64/div64.h b/include/asm-ppc64/div64.h

deleted file mode 100644 (file)

index 6cd978c..0000000
--- a/include/asm-ppc64/div64.h
+++ /dev/null
@@ -1 +0,0 @@
-#include <asm-generic/div64.h>
diff --git a/include/asm-ppc64/emergency-restart.h b/include/asm-ppc64/emergency-restart.h

deleted file mode 100644 (file)

index 108d8c4..0000000
--- a/include/asm-ppc64/emergency-restart.h
+++ /dev/null
@@ -1,6 +0,0 @@
-#ifndef _ASM_EMERGENCY_RESTART_H
-#define _ASM_EMERGENCY_RESTART_H
-
-#include <asm-generic/emergency-restart.h>
-
-#endif /* _ASM_EMERGENCY_RESTART_H */
diff --git a/include/asm-ppc64/errno.h b/include/asm-ppc64/errno.h

deleted file mode 100644 (file)

index 69bc3b0..0000000
--- a/include/asm-ppc64/errno.h
+++ /dev/null
@@ -1,18 +0,0 @@
-#ifndef _PPC64_ERRNO_H
-#define _PPC64_ERRNO_H
-
-/* 
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
- */
-
-#include <asm-generic/errno.h>
-
-#undef EDEADLOCK
-#define        EDEADLOCK       58      /* File locking deadlock error */
-
-#define _LAST_ERRNO    516
-
-#endif
diff --git a/include/asm-ppc64/firmware.h b/include/asm-ppc64/firmware.h

new file mode 100644 (file)

index 0000000..22bb85c
--- /dev/null
+++ b/include/asm-ppc64/firmware.h
@@ -0,0 +1,101 @@
+/*
+ *  include/asm-ppc64/firmware.h
+ *
+ *  Extracted from include/asm-ppc64/cputable.h
+ *
+ *  Copyright (C) 2001 Ben. Herrenschmidt (benh@kernel.crashing.org)
+ *
+ *  Modifications for ppc64:
+ *      Copyright (C) 2003 Dave Engebretsen <engebret@us.ibm.com>
+ *
+ *  This program is free software; you can redistribute it and/or
+ *  modify it under the terms of the GNU General Public License
+ *  as published by the Free Software Foundation; either version
+ *  2 of the License, or (at your option) any later version.
+ */
+#ifndef __ASM_PPC_FIRMWARE_H
+#define __ASM_PPC_FIRMWARE_H
+
+#ifdef __KERNEL__
+
+#ifndef __ASSEMBLY__
+
+/* firmware feature bitmask values */
+#define FIRMWARE_MAX_FEATURES 63
+
+#define FW_FEATURE_PFT         (1UL<<0)
+#define FW_FEATURE_TCE         (1UL<<1)
+#define FW_FEATURE_SPRG0       (1UL<<2)
+#define FW_FEATURE_DABR                (1UL<<3)
+#define FW_FEATURE_COPY                (1UL<<4)
+#define FW_FEATURE_ASR         (1UL<<5)
+#define FW_FEATURE_DEBUG       (1UL<<6)
+#define FW_FEATURE_TERM                (1UL<<7)
+#define FW_FEATURE_PERF                (1UL<<8)
+#define FW_FEATURE_DUMP                (1UL<<9)
+#define FW_FEATURE_INTERRUPT   (1UL<<10)
+#define FW_FEATURE_MIGRATE     (1UL<<11)
+#define FW_FEATURE_PERFMON     (1UL<<12)
+#define FW_FEATURE_CRQ         (1UL<<13)
+#define FW_FEATURE_VIO         (1UL<<14)
+#define FW_FEATURE_RDMA                (1UL<<15)
+#define FW_FEATURE_LLAN                (1UL<<16)
+#define FW_FEATURE_BULK                (1UL<<17)
+#define FW_FEATURE_XDABR       (1UL<<18)
+#define FW_FEATURE_MULTITCE    (1UL<<19)
+#define FW_FEATURE_SPLPAR      (1UL<<20)
+#define FW_FEATURE_ISERIES     (1UL<<21)
+
+enum {
+       FW_FEATURE_PSERIES_POSSIBLE = FW_FEATURE_PFT | FW_FEATURE_TCE |
+               FW_FEATURE_SPRG0 | FW_FEATURE_DABR | FW_FEATURE_COPY |
+               FW_FEATURE_ASR | FW_FEATURE_DEBUG | FW_FEATURE_TERM |
+               FW_FEATURE_PERF | FW_FEATURE_DUMP | FW_FEATURE_INTERRUPT |
+               FW_FEATURE_MIGRATE | FW_FEATURE_PERFMON | FW_FEATURE_CRQ |
+               FW_FEATURE_VIO | FW_FEATURE_RDMA | FW_FEATURE_LLAN |
+               FW_FEATURE_BULK | FW_FEATURE_XDABR | FW_FEATURE_MULTITCE |
+               FW_FEATURE_SPLPAR,
+       FW_FEATURE_PSERIES_ALWAYS = 0,
+       FW_FEATURE_ISERIES_POSSIBLE = FW_FEATURE_ISERIES,
+       FW_FEATURE_ISERIES_ALWAYS = FW_FEATURE_ISERIES,
+       FW_FEATURE_POSSIBLE =
+#ifdef CONFIG_PPC_PSERIES
+               FW_FEATURE_PSERIES_POSSIBLE |
+#endif
+#ifdef CONFIG_PPC_ISERIES
+               FW_FEATURE_ISERIES_POSSIBLE |
+#endif
+               0,
+       FW_FEATURE_ALWAYS =
+#ifdef CONFIG_PPC_PSERIES
+               FW_FEATURE_PSERIES_ALWAYS &
+#endif
+#ifdef CONFIG_PPC_ISERIES
+               FW_FEATURE_ISERIES_ALWAYS &
+#endif
+               FW_FEATURE_POSSIBLE,
+};
+
+/* This is used to identify firmware features which are available
+ * to the kernel.
+ */
+extern unsigned long   ppc64_firmware_features;
+
+static inline unsigned long firmware_has_feature(unsigned long feature)
+{
+       return (FW_FEATURE_ALWAYS & feature) ||
+               (FW_FEATURE_POSSIBLE & ppc64_firmware_features & feature);
+}
+
+#ifdef CONFIG_PPC_PSERIES
+typedef struct {
+    unsigned long val;
+    char * name;
+} firmware_feature_t;
+
+extern firmware_feature_t firmware_features_table[];
+#endif
+
+#endif /* __ASSEMBLY__ */
+#endif /* __KERNEL__ */
+#endif /* __ASM_PPC_FIRMWARE_H */
diff --git a/include/asm-ppc64/hdreg.h b/include/asm-ppc64/hdreg.h

deleted file mode 100644 (file)

index 7f7fd1a..0000000
--- a/include/asm-ppc64/hdreg.h
+++ /dev/null
@@ -1 +0,0 @@
-#include <asm-generic/hdreg.h>
diff --git a/include/asm-ppc64/iSeries/LparMap.h b/include/asm-ppc64/iSeries/LparMap.h

index 5c32e38c1c0168aab1fa9fc202f3e9f6973709d8..a6840b186d03b87c1967ddb9cf945a3a8be53fb1 100644 (file)
--- a/include/asm-ppc64/iSeries/LparMap.h
+++ b/include/asm-ppc64/iSeries/LparMap.h
@@ -19,6 +19,8 @@
  #ifndef _LPARMAP_H
  #define _LPARMAP_H
  
+#ifndef __ASSEMBLY__
+
  #include <asm/types.h>
  
  /*
@@ -71,6 +73,11 @@ struct LparMap {
         } xRanges[HvRangesToMap];
  };
  
-extern struct LparMap          xLparMap;
+extern const struct LparMap    xLparMap;
+
+#endif /* __ASSEMBLY__ */
+
+/* the fixed address where the LparMap exists */
+#define LPARMAP_PHYS           0x7000
  
  #endif /* _LPARMAP_H */
diff --git a/include/asm-ppc64/imalloc.h b/include/asm-ppc64/imalloc.h

index e46ff68a6e4184923547153af0853d91bef2264e..42adf7033a8106219f74c8678431a1d9037736cd 100644 (file)
--- a/include/asm-ppc64/imalloc.h
+++ b/include/asm-ppc64/imalloc.h
@@ -6,7 +6,7 @@
   */
  #define PHBS_IO_BASE     VMALLOC_END
  #define IMALLOC_BASE      (PHBS_IO_BASE + 0x80000000ul)        /* Reserve 2 gigs for PHBs */
-#define IMALLOC_END       (VMALLOC_START + EADDR_MASK)
+#define IMALLOC_END       (VMALLOC_START + PGTABLE_RANGE)
  
  
  /* imalloc region types */
diff --git a/include/asm-ppc64/ioctl.h b/include/asm-ppc64/ioctl.h

deleted file mode 100644 (file)

index 42b8c5d..0000000
--- a/include/asm-ppc64/ioctl.h
+++ /dev/null
@@ -1,74 +0,0 @@
-#ifndef _PPC64_IOCTL_H
-#define _PPC64_IOCTL_H
-
-
-/*
- * This was copied from the alpha as it's a bit cleaner there.
- *                         -- Cort
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
- */
-
-#define _IOC_NRBITS    8
-#define _IOC_TYPEBITS  8
-#define _IOC_SIZEBITS  13
-#define _IOC_DIRBITS   3
-
-#define _IOC_NRMASK    ((1 << _IOC_NRBITS)-1)
-#define _IOC_TYPEMASK  ((1 << _IOC_TYPEBITS)-1)
-#define _IOC_SIZEMASK  ((1 << _IOC_SIZEBITS)-1)
-#define _IOC_DIRMASK   ((1 << _IOC_DIRBITS)-1)
-
-#define _IOC_NRSHIFT   0
-#define _IOC_TYPESHIFT (_IOC_NRSHIFT+_IOC_NRBITS)
-#define _IOC_SIZESHIFT (_IOC_TYPESHIFT+_IOC_TYPEBITS)
-#define _IOC_DIRSHIFT  (_IOC_SIZESHIFT+_IOC_SIZEBITS)
-
-/*
- * Direction bits _IOC_NONE could be 0, but OSF/1 gives it a bit.
- * And this turns out useful to catch old ioctl numbers in header
- * files for us.
- */
-#define _IOC_NONE      1U
-#define _IOC_READ      2U
-#define _IOC_WRITE     4U
-
-#define _IOC(dir,type,nr,size) \
-       (((dir)  << _IOC_DIRSHIFT) | \
-        ((type) << _IOC_TYPESHIFT) | \
-        ((nr)   << _IOC_NRSHIFT) | \
-        ((size) << _IOC_SIZESHIFT))
-
-/* provoke compile error for invalid uses of size argument */
-extern unsigned int __invalid_size_argument_for_IOC;
-#define _IOC_TYPECHECK(t) \
-       ((sizeof(t) == sizeof(t[1]) && \
-         sizeof(t) < (1 << _IOC_SIZEBITS)) ? \
-         sizeof(t) : __invalid_size_argument_for_IOC)
-
-/* used to create numbers */
-#define _IO(type,nr)           _IOC(_IOC_NONE,(type),(nr),0)
-#define _IOR(type,nr,size)     _IOC(_IOC_READ,(type),(nr),(_IOC_TYPECHECK(size)))
-#define _IOW(type,nr,size)     _IOC(_IOC_WRITE,(type),(nr),(_IOC_TYPECHECK(size)))
-#define _IOWR(type,nr,size)    _IOC(_IOC_READ|_IOC_WRITE,(type),(nr),(_IOC_TYPECHECK(size)))
-#define _IOR_BAD(type,nr,size) _IOC(_IOC_READ,(type),(nr),sizeof(size))
-#define _IOW_BAD(type,nr,size) _IOC(_IOC_WRITE,(type),(nr),sizeof(size))
-#define _IOWR_BAD(type,nr,size)        _IOC(_IOC_READ|_IOC_WRITE,(type),(nr),sizeof(size))
-
-/* used to decode them.. */
-#define _IOC_DIR(nr)           (((nr) >> _IOC_DIRSHIFT) & _IOC_DIRMASK)
-#define _IOC_TYPE(nr)          (((nr) >> _IOC_TYPESHIFT) & _IOC_TYPEMASK)
-#define _IOC_NR(nr)            (((nr) >> _IOC_NRSHIFT) & _IOC_NRMASK)
-#define _IOC_SIZE(nr)          (((nr) >> _IOC_SIZESHIFT) & _IOC_SIZEMASK)
-
-/* various drivers, such as the pcmcia stuff, need these... */
-#define IOC_IN         (_IOC_WRITE << _IOC_DIRSHIFT)
-#define IOC_OUT                (_IOC_READ << _IOC_DIRSHIFT)
-#define IOC_INOUT      ((_IOC_WRITE|_IOC_READ) << _IOC_DIRSHIFT)
-#define IOCSIZE_MASK   (_IOC_SIZEMASK << _IOC_SIZESHIFT)
-#define IOCSIZE_SHIFT  (_IOC_SIZESHIFT)
-
-#endif /* _PPC64_IOCTL_H */
diff --git a/include/asm-ppc64/ioctls.h b/include/asm-ppc64/ioctls.h

deleted file mode 100644 (file)

index 48796bf..0000000
--- a/include/asm-ppc64/ioctls.h
+++ /dev/null
@@ -1,114 +0,0 @@
-#ifndef _ASM_PPC64_IOCTLS_H
-#define _ASM_PPC64_IOCTLS_H
-
-/*
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
- */
-
-#include <asm/ioctl.h>
-
-#define FIOCLEX                _IO('f', 1)
-#define FIONCLEX       _IO('f', 2)
-#define FIOASYNC       _IOW('f', 125, int)
-#define FIONBIO                _IOW('f', 126, int)
-#define FIONREAD       _IOR('f', 127, int)
-#define TIOCINQ                FIONREAD
-#define FIOQSIZE        _IOR('f', 128, loff_t)
-
-#define TIOCGETP       _IOR('t', 8, struct sgttyb)
-#define TIOCSETP       _IOW('t', 9, struct sgttyb)
-#define TIOCSETN       _IOW('t', 10, struct sgttyb)    /* TIOCSETP wo flush */
-
-#define TIOCSETC       _IOW('t', 17, struct tchars)
-#define TIOCGETC       _IOR('t', 18, struct tchars)
-#define TCGETS         _IOR('t', 19, struct termios)
-#define TCSETS         _IOW('t', 20, struct termios)
-#define TCSETSW                _IOW('t', 21, struct termios)
-#define TCSETSF                _IOW('t', 22, struct termios)
-
-#define TCGETA         _IOR('t', 23, struct termio)
-#define TCSETA         _IOW('t', 24, struct termio)
-#define TCSETAW                _IOW('t', 25, struct termio)
-#define TCSETAF                _IOW('t', 28, struct termio)
-
-#define TCSBRK         _IO('t', 29)
-#define TCXONC         _IO('t', 30)
-#define TCFLSH         _IO('t', 31)
-
-#define TIOCSWINSZ     _IOW('t', 103, struct winsize)
-#define TIOCGWINSZ     _IOR('t', 104, struct winsize)
-#define        TIOCSTART       _IO('t', 110)           /* start output, like ^Q */
-#define        TIOCSTOP        _IO('t', 111)           /* stop output, like ^S */
-#define TIOCOUTQ        _IOR('t', 115, int)     /* output queue size */
-
-#define TIOCGLTC       _IOR('t', 116, struct ltchars)
-#define TIOCSLTC       _IOW('t', 117, struct ltchars)
-#define TIOCSPGRP      _IOW('t', 118, int)
-#define TIOCGPGRP      _IOR('t', 119, int)
-
-#define TIOCEXCL       0x540C
-#define TIOCNXCL       0x540D
-#define TIOCSCTTY      0x540E
-
-#define TIOCSTI                0x5412
-#define TIOCMGET       0x5415
-#define TIOCMBIS       0x5416
-#define TIOCMBIC       0x5417
-#define TIOCMSET       0x5418
-# define TIOCM_LE      0x001
-# define TIOCM_DTR     0x002
-# define TIOCM_RTS     0x004
-# define TIOCM_ST      0x008
-# define TIOCM_SR      0x010
-# define TIOCM_CTS     0x020
-# define TIOCM_CAR     0x040
-# define TIOCM_RNG     0x080
-# define TIOCM_DSR     0x100
-# define TIOCM_CD      TIOCM_CAR
-# define TIOCM_RI      TIOCM_RNG
-
-#define TIOCGSOFTCAR   0x5419
-#define TIOCSSOFTCAR   0x541A
-#define TIOCLINUX      0x541C
-#define TIOCCONS       0x541D
-#define TIOCGSERIAL    0x541E
-#define TIOCSSERIAL    0x541F
-#define TIOCPKT                0x5420
-# define TIOCPKT_DATA           0
-# define TIOCPKT_FLUSHREAD      1
-# define TIOCPKT_FLUSHWRITE     2
-# define TIOCPKT_STOP           4
-# define TIOCPKT_START          8
-# define TIOCPKT_NOSTOP                16
-# define TIOCPKT_DOSTOP                32
-
-
-#define TIOCNOTTY      0x5422
-#define TIOCSETD       0x5423
-#define TIOCGETD       0x5424
-#define TCSBRKP                0x5425  /* Needed for POSIX tcsendbreak() */
-#define TIOCSBRK       0x5427  /* BSD compatibility */
-#define TIOCCBRK       0x5428  /* BSD compatibility */
-#define TIOCGSID       0x5429  /* Return the session ID of FD */
-#define TIOCGPTN       _IOR('T',0x30, unsigned int) /* Get Pty Number (of pty-mux device) */
-#define TIOCSPTLCK     _IOW('T',0x31, int)  /* Lock/unlock Pty */
-
-#define TIOCSERCONFIG  0x5453
-#define TIOCSERGWILD   0x5454
-#define TIOCSERSWILD   0x5455
-#define TIOCGLCKTRMIOS 0x5456
-#define TIOCSLCKTRMIOS 0x5457
-#define TIOCSERGSTRUCT 0x5458 /* For debugging only */
-#define TIOCSERGETLSR   0x5459 /* Get line status register */
-  /* ioctl (fd, TIOCSERGETLSR, &result) where result may be as below */
-# define TIOCSER_TEMT    0x01  /* Transmitter physically empty */
-#define TIOCSERGETMULTI 0x545A /* Get multiport config  */
-#define TIOCSERSETMULTI 0x545B /* Set multiport config */
-
-#define TIOCMIWAIT     0x545C  /* wait for a change on serial input line(s) */
-#define TIOCGICOUNT    0x545D  /* read serial port inline interrupt counts */
-
-#endif /* _ASM_PPC64_IOCTLS_H */
diff --git a/include/asm-ppc64/iommu.h b/include/asm-ppc64/iommu.h

index 729de5cc21d984206b33bf8e142cc9cb8a27668b..72dcf8116b04390446c7e1e4c2110f46e5df5962 100644 (file)
--- a/include/asm-ppc64/iommu.h
+++ b/include/asm-ppc64/iommu.h
@@ -104,9 +104,6 @@ extern void iommu_devnode_init_pSeries(struct device_node *dn);
  
  #ifdef CONFIG_PPC_ISERIES
  
-/* Initializes tables for bio buses */
-extern void __init iommu_vio_init(void);
-
  struct iSeries_Device_Node;
  /* Creates table for an individual device node */
  extern void iommu_devnode_init_iSeries(struct iSeries_Device_Node *dn);
diff --git a/include/asm-ppc64/ipc.h b/include/asm-ppc64/ipc.h

deleted file mode 100644 (file)

index a46e3d9..0000000
--- a/include/asm-ppc64/ipc.h
+++ /dev/null
@@ -1 +0,0 @@
-#include <asm-generic/ipc.h>
diff --git a/include/asm-ppc64/linkage.h b/include/asm-ppc64/linkage.h

deleted file mode 100644 (file)

index 291c2d0..0000000
--- a/include/asm-ppc64/linkage.h
+++ /dev/null
@@ -1,6 +0,0 @@
-#ifndef __ASM_LINKAGE_H
-#define __ASM_LINKAGE_H
-
-/* Nothing to see here... */
-
-#endif
diff --git a/include/asm-ppc64/lmb.h b/include/asm-ppc64/lmb.h

index a6cbca21ac1d7082147fa8800d3851d7c5bbe2b1..cb368bf0f264306decc6b7e00618a6bfb5f52174 100644 (file)
--- a/include/asm-ppc64/lmb.h
+++ b/include/asm-ppc64/lmb.h
@@ -22,7 +22,6 @@
  
  struct lmb_property {
         unsigned long base;
-       unsigned long physbase;
         unsigned long size;
  };
  
diff --git a/include/asm-ppc64/machdep.h b/include/asm-ppc64/machdep.h

index f0c1d2d926722aa2b4da7422aa5c033b8c0d3c51..ff2c9287d3b6bda6ea9cc5a01d35a5ee02f63e5c 100644 (file)
--- a/include/asm-ppc64/machdep.h
+++ b/include/asm-ppc64/machdep.h
@@ -84,7 +84,7 @@ struct machdep_calls {
  
         void            (*init_IRQ)(void);
         int             (*get_irq)(struct pt_regs *);
-       void            (*cpu_irq_down)(void);
+       void            (*cpu_irq_down)(int secondary);
  
         /* PCI stuff */
         void            (*pcibios_fixup)(void);
@@ -140,6 +140,9 @@ struct machdep_calls {
  
         /* Idle loop for this platform, leave empty for default idle loop */
         int             (*idle_loop)(void);
+
+       /* Function to enable pmcs for this platform, called once per cpu. */
+       void            (*enable_pmcs)(void);
  };
  
  extern int default_idle(void);
diff --git a/include/asm-ppc64/mmu.h b/include/asm-ppc64/mmu.h

index 70348a85131396773e8b1a115325246211f53e7d..ad36bb28de2983c92750a8072195b34cc5db4c5f 100644 (file)
--- a/include/asm-ppc64/mmu.h
+++ b/include/asm-ppc64/mmu.h
@@ -28,9 +28,12 @@
  #define STE_VSID_SHIFT 12
  
  /* Location of cpu0's segment table */
-#define STAB0_PAGE     0x9
+#define STAB0_PAGE     0x6
  #define STAB0_PHYS_ADDR        (STAB0_PAGE<<PAGE_SHIFT)
-#define STAB0_VIRT_ADDR        (KERNELBASE+STAB0_PHYS_ADDR)
+
+#ifndef __ASSEMBLY__
+extern char initial_stab[];
+#endif /* ! __ASSEMBLY */
  
  /*
   * SLB
@@ -259,8 +262,10 @@ extern void stabs_alloc(void);
  #define VSID_BITS      36
  #define VSID_MODULUS   ((1UL<<VSID_BITS)-1)
  
-#define CONTEXT_BITS   20
-#define USER_ESID_BITS 15
+#define CONTEXT_BITS   19
+#define USER_ESID_BITS 16
+
+#define USER_VSID_RANGE        (1UL << (USER_ESID_BITS + SID_SHIFT))
  
  /*
   * This macro generates asm code to compute the VSID scramble
@@ -302,8 +307,7 @@ typedef unsigned long mm_context_id_t;
  typedef struct {
         mm_context_id_t id;
  #ifdef CONFIG_HUGETLB_PAGE
-       pgd_t *huge_pgdir;
-       u16 htlb_segs; /* bitmask */
+       u16 low_htlb_areas, high_htlb_areas;
  #endif
  } mm_context_t;
  
diff --git a/include/asm-ppc64/naca.h b/include/asm-ppc64/naca.h

index bfb7caa32eaf74ecb599a21ce0435ca23d93e326..d2afe64475975768bd513d732c7b0c3da8813bd4 100644 (file)
--- a/include/asm-ppc64/naca.h
+++ b/include/asm-ppc64/naca.h
@@ -12,8 +12,6 @@
  
  #include <asm/types.h>
  
-#ifndef __ASSEMBLY__
-
  struct naca_struct {
         /* Kernel only data - undefined for user space */
         void *xItVpdAreas;              /* VPD Data                  0x00 */
@@ -23,9 +21,4 @@ struct naca_struct {
  
  extern struct naca_struct naca;
  
-#endif /* __ASSEMBLY__ */
-
-#define NACA_PAGE      0x4
-#define NACA_PHYS_ADDR (NACA_PAGE<<PAGE_SHIFT)
-
  #endif /* _NACA_H */
diff --git a/include/asm-ppc64/namei.h b/include/asm-ppc64/namei.h

deleted file mode 100644 (file)

index a1412a2..0000000
--- a/include/asm-ppc64/namei.h
+++ /dev/null
@@ -1,23 +0,0 @@
-/* 
- * linux/include/asm-ppc/namei.h
- * Adapted from linux/include/asm-alpha/namei.h
- *
- * Included from linux/fs/namei.c
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
- */
-
-#ifndef __PPC64_NAMEI_H
-#define __PPC64_NAMEI_H
-
-/* This dummy routine maybe changed to something useful
- * for /usr/gnemul/ emulation stuff.
- * Look at asm-sparc/namei.h for details.
- */
-
-#define __emul_prefix() NULL
-
-#endif /* __PPC64_NAMEI_H */
diff --git a/include/asm-ppc64/page.h b/include/asm-ppc64/page.h

index a5893a305a09bdb27a2b12b15980507b25a14d3e..a79a08df62bd8c89740c5a3a4db6a6ad29d54519 100644 (file)
--- a/include/asm-ppc64/page.h
+++ b/include/asm-ppc64/page.h
@@ -37,39 +37,45 @@
  
  #define HUGETLB_PAGE_ORDER     (HPAGE_SHIFT - PAGE_SHIFT)
  
-/* For 64-bit processes the hugepage range is 1T-1.5T */
-#define TASK_HPAGE_BASE ASM_CONST(0x0000010000000000)
-#define TASK_HPAGE_END         ASM_CONST(0x0000018000000000)
+#define HTLB_AREA_SHIFT                40
+#define HTLB_AREA_SIZE         (1UL << HTLB_AREA_SHIFT)
+#define GET_HTLB_AREA(x)       ((x) >> HTLB_AREA_SHIFT)
  
  #define LOW_ESID_MASK(addr, len)       (((1U << (GET_ESID(addr+len-1)+1)) \
                                         - (1U << GET_ESID(addr))) & 0xffff)
+#define HTLB_AREA_MASK(addr, len)      (((1U << (GET_HTLB_AREA(addr+len-1)+1)) \
+                                       - (1U << GET_HTLB_AREA(addr))) & 0xffff)
  
  #define ARCH_HAS_HUGEPAGE_ONLY_RANGE
  #define ARCH_HAS_PREPARE_HUGEPAGE_RANGE
+#define ARCH_HAS_SETCLEAR_HUGE_PTE
  
  #define touches_hugepage_low_range(mm, addr, len) \
-       (LOW_ESID_MASK((addr), (len)) & mm->context.htlb_segs)
-#define touches_hugepage_high_range(addr, len) \
-       (((addr) > (TASK_HPAGE_BASE-(len))) && ((addr) < TASK_HPAGE_END))
+       (LOW_ESID_MASK((addr), (len)) & (mm)->context.low_htlb_areas)
+#define touches_hugepage_high_range(mm, addr, len) \
+       (HTLB_AREA_MASK((addr), (len)) & (mm)->context.high_htlb_areas)
  
  #define __within_hugepage_low_range(addr, len, segmask) \
         ((LOW_ESID_MASK((addr), (len)) | (segmask)) == (segmask))
  #define within_hugepage_low_range(addr, len) \
         __within_hugepage_low_range((addr), (len), \
-                                   current->mm->context.htlb_segs)
-#define within_hugepage_high_range(addr, len) (((addr) >= TASK_HPAGE_BASE) \
-         && ((addr)+(len) <= TASK_HPAGE_END) && ((addr)+(len) >= (addr)))
+                                   current->mm->context.low_htlb_areas)
+#define __within_hugepage_high_range(addr, len, zonemask) \
+       ((HTLB_AREA_MASK((addr), (len)) | (zonemask)) == (zonemask))
+#define within_hugepage_high_range(addr, len) \
+       __within_hugepage_high_range((addr), (len), \
+                                   current->mm->context.high_htlb_areas)
  
  #define is_hugepage_only_range(mm, addr, len) \
-       (touches_hugepage_high_range((addr), (len)) || \
+       (touches_hugepage_high_range((mm), (addr), (len)) || \
           touches_hugepage_low_range((mm), (addr), (len)))
  #define HAVE_ARCH_HUGETLB_UNMAPPED_AREA
  
  #define in_hugepage_area(context, addr) \
         (cpu_has_feature(CPU_FTR_16M_PAGE) && \
-        ( (((addr) >= TASK_HPAGE_BASE) && ((addr) < TASK_HPAGE_END)) || \
+        ( ((1 << GET_HTLB_AREA(addr)) & (context).high_htlb_areas) || \
            ( ((addr) < 0x100000000L) && \
-            ((1 << GET_ESID(addr)) & (context).htlb_segs) ) ) )
+            ((1 << GET_ESID(addr)) & (context).low_htlb_areas) ) ) )
  
  #else /* !CONFIG_HUGETLB_PAGE */
  
@@ -125,36 +131,42 @@ extern void copy_user_page(void *to, void *from, unsigned long vaddr, struct pag
   * Entries in the pte table are 64b, while entries in the pgd & pmd are 32b.
   */
  typedef struct { unsigned long pte; } pte_t;
-typedef struct { unsigned int  pmd; } pmd_t;
-typedef struct { unsigned int  pgd; } pgd_t;
+typedef struct { unsigned long pmd; } pmd_t;
+typedef struct { unsigned long pud; } pud_t;
+typedef struct { unsigned long pgd; } pgd_t;
  typedef struct { unsigned long pgprot; } pgprot_t;
  
  #define pte_val(x)     ((x).pte)
  #define pmd_val(x)     ((x).pmd)
+#define pud_val(x)     ((x).pud)
  #define pgd_val(x)     ((x).pgd)
  #define pgprot_val(x)  ((x).pgprot)
  
-#define __pte(x)       ((pte_t) { (x) } )
-#define __pmd(x)       ((pmd_t) { (x) } )
-#define __pgd(x)       ((pgd_t) { (x) } )
-#define __pgprot(x)    ((pgprot_t) { (x) } )
+#define __pte(x)       ((pte_t) { (x) })
+#define __pmd(x)       ((pmd_t) { (x) })
+#define __pud(x)       ((pud_t) { (x) })
+#define __pgd(x)       ((pgd_t) { (x) })
+#define __pgprot(x)    ((pgprot_t) { (x) })
  
  #else
  /*
   * .. while these make it easier on the compiler
   */
  typedef unsigned long pte_t;
-typedef unsigned int  pmd_t;
-typedef unsigned int  pgd_t;
+typedef unsigned long pmd_t;
+typedef unsigned long pud_t;
+typedef unsigned long pgd_t;
  typedef unsigned long pgprot_t;
  
  #define pte_val(x)     (x)
  #define pmd_val(x)     (x)
+#define pud_val(x)     (x)
  #define pgd_val(x)     (x)
  #define pgprot_val(x)  (x)
  
  #define __pte(x)       (x)
  #define __pmd(x)       (x)
+#define __pud(x)       (x)
  #define __pgd(x)       (x)
  #define __pgprot(x)    (x)
  
@@ -208,9 +220,6 @@ extern u64 ppc64_pft_size;          /* Log 2 of page table size */
  #define USER_REGION_ID     (0UL)
  #define REGION_ID(ea)     (((unsigned long)(ea)) >> REGION_SHIFT)
  
-#define __bpn_to_ba(x) ((((unsigned long)(x)) << PAGE_SHIFT) + KERNELBASE)
-#define __ba_to_bpn(x) ((((unsigned long)(x)) & ~REGION_MASK) >> PAGE_SHIFT)
-
  #define __va(x) ((void *)((unsigned long)(x) + KERNELBASE))
  
  #ifdef CONFIG_DISCONTIGMEM
diff --git a/include/asm-ppc64/param.h b/include/asm-ppc64/param.h

index 1fad38dcf7074b08212e11dd64fefd9eab1a792f..76c212d475b35a0d37c8fd12e4e2ae1e1d4ba181 100644 (file)
--- a/include/asm-ppc64/param.h
+++ b/include/asm-ppc64/param.h
@@ -1,6 +1,8 @@
  #ifndef _ASM_PPC64_PARAM_H
  #define _ASM_PPC64_PARAM_H
  
+#include <linux/config.h>
+
  /*
   * This program is free software; you can redistribute it and/or
   * modify it under the terms of the GNU General Public License
@@ -9,7 +11,7 @@
   */
  
  #ifdef __KERNEL__
-# define HZ            1000            /* Internal kernel timer frequency */
+# define HZ            CONFIG_HZ       /* Internal kernel timer frequency */
  # define USER_HZ       100             /* .. some user interfaces are in "ticks" */
  # define CLOCKS_PER_SEC        (USER_HZ)       /* like times() */
  #endif
diff --git a/include/asm-ppc64/pci.h b/include/asm-ppc64/pci.h

index faa772223075f576ab26af6ace89ee37241e386b..4d057452f59bbc92fb998aa54d21d624024da8e2 100644 (file)
--- a/include/asm-ppc64/pci.h
+++ b/include/asm-ppc64/pci.h
@@ -134,6 +134,10 @@ extern void
  pcibios_resource_to_bus(struct pci_dev *dev, struct pci_bus_region *region,
                         struct resource *res);
  
+extern void
+pcibios_bus_to_resource(struct pci_dev *dev, struct resource *res,
+                       struct pci_bus_region *region);
+
  extern int
  unmap_bus_range(struct pci_bus *bus);
  
diff --git a/include/asm-ppc64/percpu.h b/include/asm-ppc64/percpu.h

deleted file mode 100644 (file)

index 60a659a..0000000
--- a/include/asm-ppc64/percpu.h
+++ /dev/null
@@ -1,6 +0,0 @@
-#ifndef __ARCH_PPC64_PERCPU__
-#define __ARCH_PPC64_PERCPU__
-
-#include <asm-generic/percpu.h>
-
-#endif /* __ARCH_PPC64_PERCPU__ */
diff --git a/include/asm-ppc64/pgalloc.h b/include/asm-ppc64/pgalloc.h

index 4fc4b739b380e383f8c9936bc727ce4df4249f5d..26bc49c1108dfcd09cc30c6feb455e00e833e15f 100644 (file)
--- a/include/asm-ppc64/pgalloc.h
+++ b/include/asm-ppc64/pgalloc.h
@@ -6,7 +6,12 @@
  #include <linux/cpumask.h>
  #include <linux/percpu.h>
  
-extern kmem_cache_t *zero_cache;
+extern kmem_cache_t *pgtable_cache[];
+
+#define PTE_CACHE_NUM  0
+#define PMD_CACHE_NUM  1
+#define PUD_CACHE_NUM  1
+#define PGD_CACHE_NUM  0
  
  /*
   * This program is free software; you can redistribute it and/or
@@ -15,30 +20,40 @@ extern kmem_cache_t *zero_cache;
   * 2 of the License, or (at your option) any later version.
   */
  
-static inline pgd_t *
-pgd_alloc(struct mm_struct *mm)
+static inline pgd_t *pgd_alloc(struct mm_struct *mm)
  {
-       return kmem_cache_alloc(zero_cache, GFP_KERNEL);
+       return kmem_cache_alloc(pgtable_cache[PGD_CACHE_NUM], GFP_KERNEL);
  }
  
-static inline void
-pgd_free(pgd_t *pgd)
+static inline void pgd_free(pgd_t *pgd)
  {
-       kmem_cache_free(zero_cache, pgd);
+       kmem_cache_free(pgtable_cache[PGD_CACHE_NUM], pgd);
+}
+
+#define pgd_populate(MM, PGD, PUD)     pgd_set(PGD, PUD)
+
+static inline pud_t *pud_alloc_one(struct mm_struct *mm, unsigned long addr)
+{
+       return kmem_cache_alloc(pgtable_cache[PUD_CACHE_NUM],
+                               GFP_KERNEL|__GFP_REPEAT);
+}
+
+static inline void pud_free(pud_t *pud)
+{
+       kmem_cache_free(pgtable_cache[PUD_CACHE_NUM], pud);
  }
  
  #define pud_populate(MM, PUD, PMD)     pud_set(PUD, PMD)
  
-static inline pmd_t *
-pmd_alloc_one(struct mm_struct *mm, unsigned long addr)
+static inline pmd_t *pmd_alloc_one(struct mm_struct *mm, unsigned long addr)
  {
-       return kmem_cache_alloc(zero_cache, GFP_KERNEL|__GFP_REPEAT);
+       return kmem_cache_alloc(pgtable_cache[PMD_CACHE_NUM],
+                               GFP_KERNEL|__GFP_REPEAT);
  }
  
-static inline void
-pmd_free(pmd_t *pmd)
+static inline void pmd_free(pmd_t *pmd)
  {
-       kmem_cache_free(zero_cache, pmd);
+       kmem_cache_free(pgtable_cache[PMD_CACHE_NUM], pmd);
  }
  
  #define pmd_populate_kernel(mm, pmd, pte) pmd_set(pmd, pte)
@@ -47,44 +62,58 @@ pmd_free(pmd_t *pmd)
  
  static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm, unsigned long address)
  {
-       return kmem_cache_alloc(zero_cache, GFP_KERNEL|__GFP_REPEAT);
+       return kmem_cache_alloc(pgtable_cache[PTE_CACHE_NUM],
+                               GFP_KERNEL|__GFP_REPEAT);
  }
  
  static inline struct page *pte_alloc_one(struct mm_struct *mm, unsigned long address)
  {
-       pte_t *pte = kmem_cache_alloc(zero_cache, GFP_KERNEL|__GFP_REPEAT);
-       if (pte)
-               return virt_to_page(pte);
-       return NULL;
+       return virt_to_page(pte_alloc_one_kernel(mm, address));
  }
                 
  static inline void pte_free_kernel(pte_t *pte)
  {
-       kmem_cache_free(zero_cache, pte);
+       kmem_cache_free(pgtable_cache[PTE_CACHE_NUM], pte);
  }
  
  static inline void pte_free(struct page *ptepage)
  {
-       kmem_cache_free(zero_cache, page_address(ptepage));
+       pte_free_kernel(page_address(ptepage));
  }
  
-struct pte_freelist_batch
+#define PGF_CACHENUM_MASK      0xf
+
+typedef struct pgtable_free {
+       unsigned long val;
+} pgtable_free_t;
+
+static inline pgtable_free_t pgtable_free_cache(void *p, int cachenum,
+                                               unsigned long mask)
  {
-       struct rcu_head rcu;
-       unsigned int    index;
-       struct page *   pages[0];
-};
+       BUG_ON(cachenum > PGF_CACHENUM_MASK);
  
-#define PTE_FREELIST_SIZE      ((PAGE_SIZE - sizeof(struct pte_freelist_batch)) / \
-                                 sizeof(struct page *))
+       return (pgtable_free_t){.val = ((unsigned long) p & ~mask) | cachenum};
+}
  
-extern void pte_free_now(struct page *ptepage);
-extern void pte_free_submit(struct pte_freelist_batch *batch);
+static inline void pgtable_free(pgtable_free_t pgf)
+{
+       void *p = (void *)(pgf.val & ~PGF_CACHENUM_MASK);
+       int cachenum = pgf.val & PGF_CACHENUM_MASK;
  
-DECLARE_PER_CPU(struct pte_freelist_batch *, pte_freelist_cur);
+       kmem_cache_free(pgtable_cache[cachenum], p);
+}
  
-void __pte_free_tlb(struct mmu_gather *tlb, struct page *ptepage);
-#define __pmd_free_tlb(tlb, pmd)       __pte_free_tlb(tlb, virt_to_page(pmd))
+void pgtable_free_tlb(struct mmu_gather *tlb, pgtable_free_t pgf);
+
+#define __pte_free_tlb(tlb, ptepage)   \
+       pgtable_free_tlb(tlb, pgtable_free_cache(page_address(ptepage), \
+               PTE_CACHE_NUM, PTE_TABLE_SIZE-1))
+#define __pmd_free_tlb(tlb, pmd)       \
+       pgtable_free_tlb(tlb, pgtable_free_cache(pmd, \
+               PMD_CACHE_NUM, PMD_TABLE_SIZE-1))
+#define __pud_free_tlb(tlb, pmd)       \
+       pgtable_free_tlb(tlb, pgtable_free_cache(pud, \
+               PUD_CACHE_NUM, PUD_TABLE_SIZE-1))
  
  #define check_pgt_cache()      do { } while (0)
  
diff --git a/include/asm-ppc64/pgtable.h b/include/asm-ppc64/pgtable.h

index 46cf61c2ff69a3b920231122402b1405ca0d4fca..c83679c9d2b0f49d07ade70a6255c5af32d38074 100644 (file)
--- a/include/asm-ppc64/pgtable.h
+++ b/include/asm-ppc64/pgtable.h
@@ -15,19 +15,24 @@
  #include <asm/tlbflush.h>
  #endif /* __ASSEMBLY__ */
  
-#include <asm-generic/pgtable-nopud.h>
-
  /*
   * Entries per page directory level.  The PTE level must use a 64b record
   * for each page table entry.  The PMD and PGD level use a 32b record for 
   * each entry by assuming that each entry is page aligned.
   */
  #define PTE_INDEX_SIZE  9
-#define PMD_INDEX_SIZE  10
-#define PGD_INDEX_SIZE  10
+#define PMD_INDEX_SIZE  7
+#define PUD_INDEX_SIZE  7
+#define PGD_INDEX_SIZE  9
+
+#define PTE_TABLE_SIZE (sizeof(pte_t) << PTE_INDEX_SIZE)
+#define PMD_TABLE_SIZE (sizeof(pmd_t) << PMD_INDEX_SIZE)
+#define PUD_TABLE_SIZE (sizeof(pud_t) << PUD_INDEX_SIZE)
+#define PGD_TABLE_SIZE (sizeof(pgd_t) << PGD_INDEX_SIZE)
  
  #define PTRS_PER_PTE   (1 << PTE_INDEX_SIZE)
  #define PTRS_PER_PMD   (1 << PMD_INDEX_SIZE)
+#define PTRS_PER_PUD   (1 << PMD_INDEX_SIZE)
  #define PTRS_PER_PGD   (1 << PGD_INDEX_SIZE)
  
  /* PMD_SHIFT determines what a second-level page table entry can map */
@@ -35,8 +40,13 @@
  #define PMD_SIZE       (1UL << PMD_SHIFT)
  #define PMD_MASK       (~(PMD_SIZE-1))
  
-/* PGDIR_SHIFT determines what a third-level page table entry can map */
-#define PGDIR_SHIFT    (PMD_SHIFT + PMD_INDEX_SIZE)
+/* PUD_SHIFT determines what a third-level page table entry can map */
+#define PUD_SHIFT      (PMD_SHIFT + PMD_INDEX_SIZE)
+#define PUD_SIZE       (1UL << PUD_SHIFT)
+#define PUD_MASK       (~(PUD_SIZE-1))
+
+/* PGDIR_SHIFT determines what a fourth-level page table entry can map */
+#define PGDIR_SHIFT    (PUD_SHIFT + PUD_INDEX_SIZE)
  #define PGDIR_SIZE     (1UL << PGDIR_SHIFT)
  #define PGDIR_MASK     (~(PGDIR_SIZE-1))
  
@@ -45,15 +55,23 @@
  /*
   * Size of EA range mapped by our pagetables.
   */
-#define EADDR_SIZE (PTE_INDEX_SIZE + PMD_INDEX_SIZE + \
-                    PGD_INDEX_SIZE + PAGE_SHIFT)
-#define EADDR_MASK ((1UL << EADDR_SIZE) - 1)
+#define PGTABLE_EADDR_SIZE (PTE_INDEX_SIZE + PMD_INDEX_SIZE + \
+                           PUD_INDEX_SIZE + PGD_INDEX_SIZE + PAGE_SHIFT)
+#define PGTABLE_RANGE (1UL << PGTABLE_EADDR_SIZE)
+
+#if TASK_SIZE_USER64 > PGTABLE_RANGE
+#error TASK_SIZE_USER64 exceeds pagetable range
+#endif
+
+#if TASK_SIZE_USER64 > (1UL << (USER_ESID_BITS + SID_SHIFT))
+#error TASK_SIZE_USER64 exceeds user VSID range
+#endif
  
  /*
   * Define the address range of the vmalloc VM area.
   */
  #define VMALLOC_START (0xD000000000000000ul)
-#define VMALLOC_SIZE  (0x10000000000UL)
+#define VMALLOC_SIZE  (0x80000000000UL)
  #define VMALLOC_END   (VMALLOC_START + VMALLOC_SIZE)
  
  /*
@@ -154,8 +172,6 @@ extern unsigned long empty_zero_page[PAGE_SIZE/sizeof(unsigned long)];
  #ifndef __ASSEMBLY__
  int hash_huge_page(struct mm_struct *mm, unsigned long access,
                    unsigned long ea, unsigned long vsid, int local);
-
-void hugetlb_mm_free_pgd(struct mm_struct *mm);
  #endif /* __ASSEMBLY__ */
  
  #define HAVE_ARCH_UNMAPPED_AREA
@@ -163,7 +179,6 @@ void hugetlb_mm_free_pgd(struct mm_struct *mm);
  #else
  
  #define hash_huge_page(mm,a,ea,vsid,local)     -1
-#define hugetlb_mm_free_pgd(mm)                        do {} while (0)
  
  #endif
  
@@ -197,39 +212,45 @@ static inline pte_t pfn_pte(unsigned long pfn, pgprot_t pgprot)
  #define pte_pfn(x)             ((unsigned long)((pte_val(x) >> PTE_SHIFT)))
  #define pte_page(x)            pfn_to_page(pte_pfn(x))
  
-#define pmd_set(pmdp, ptep)    \
-       (pmd_val(*(pmdp)) = __ba_to_bpn(ptep))
+#define pmd_set(pmdp, ptep)    ({BUG_ON((u64)ptep < KERNELBASE); pmd_val(*(pmdp)) = (unsigned long)(ptep);})
  #define pmd_none(pmd)          (!pmd_val(pmd))
  #define        pmd_bad(pmd)            (pmd_val(pmd) == 0)
  #define        pmd_present(pmd)        (pmd_val(pmd) != 0)
  #define        pmd_clear(pmdp)         (pmd_val(*(pmdp)) = 0)
-#define pmd_page_kernel(pmd)   (__bpn_to_ba(pmd_val(pmd)))
+#define pmd_page_kernel(pmd)   (pmd_val(pmd))
  #define pmd_page(pmd)          virt_to_page(pmd_page_kernel(pmd))
  
-#define pud_set(pudp, pmdp)    (pud_val(*(pudp)) = (__ba_to_bpn(pmdp)))
+#define pud_set(pudp, pmdp)    (pud_val(*(pudp)) = (unsigned long)(pmdp))
  #define pud_none(pud)          (!pud_val(pud))
-#define pud_bad(pud)           ((pud_val(pud)) == 0UL)
-#define pud_present(pud)       (pud_val(pud) != 0UL)
-#define pud_clear(pudp)                (pud_val(*(pudp)) = 0UL)
-#define pud_page(pud)          (__bpn_to_ba(pud_val(pud)))
+#define pud_bad(pud)           ((pud_val(pud)) == 0)
+#define pud_present(pud)       (pud_val(pud) != 0)
+#define pud_clear(pudp)                (pud_val(*(pudp)) = 0)
+#define pud_page(pud)          (pud_val(pud))
+
+#define pgd_set(pgdp, pudp)    ({pgd_val(*(pgdp)) = (unsigned long)(pudp);})
+#define pgd_none(pgd)          (!pgd_val(pgd))
+#define pgd_bad(pgd)           (pgd_val(pgd) == 0)
+#define pgd_present(pgd)       (pgd_val(pgd) != 0)
+#define pgd_clear(pgdp)                (pgd_val(*(pgdp)) = 0)
+#define pgd_page(pgd)          (pgd_val(pgd))
  
  /* 
   * Find an entry in a page-table-directory.  We combine the address region 
   * (the high order N bits) and the pgd portion of the address.
   */
  /* to avoid overflow in free_pgtables we don't use PTRS_PER_PGD here */
-#define pgd_index(address) (((address) >> (PGDIR_SHIFT)) & 0x7ff)
+#define pgd_index(address) (((address) >> (PGDIR_SHIFT)) & 0x1ff)
  
  #define pgd_offset(mm, address)         ((mm)->pgd + pgd_index(address))
  
-/* Find an entry in the second-level page table.. */
+#define pud_offset(pgdp, addr) \
+  (((pud_t *) pgd_page(*(pgdp))) + (((addr) >> PUD_SHIFT) & (PTRS_PER_PUD - 1)))
+
  #define pmd_offset(pudp,addr) \
-  ((pmd_t *) pud_page(*(pudp)) + (((addr) >> PMD_SHIFT) & (PTRS_PER_PMD - 1)))
+  (((pmd_t *) pud_page(*(pudp))) + (((addr) >> PMD_SHIFT) & (PTRS_PER_PMD - 1)))
  
-/* Find an entry in the third-level page table.. */
  #define pte_offset_kernel(dir,addr) \
-  ((pte_t *) pmd_page_kernel(*(dir)) \
- + (((addr) >> PAGE_SHIFT) & (PTRS_PER_PTE - 1)))
+  (((pte_t *) pmd_page_kernel(*(dir))) + (((addr) >> PAGE_SHIFT) & (PTRS_PER_PTE - 1)))
  
  #define pte_offset_map(dir,addr)       pte_offset_kernel((dir), (addr))
  #define pte_offset_map_nested(dir,addr)        pte_offset_kernel((dir), (addr))
@@ -458,23 +479,20 @@ extern pgprot_t phys_mem_access_prot(struct file *file, unsigned long addr,
  #define pte_same(A,B)  (((pte_val(A) ^ pte_val(B)) & ~_PAGE_HPTEFLAGS) == 0)
  
  #define pmd_ERROR(e) \
-       printk("%s:%d: bad pmd %08x.\n", __FILE__, __LINE__, pmd_val(e))
+       printk("%s:%d: bad pmd %08lx.\n", __FILE__, __LINE__, pmd_val(e))
+#define pud_ERROR(e) \
+       printk("%s:%d: bad pmd %08lx.\n", __FILE__, __LINE__, pud_val(e))
  #define pgd_ERROR(e) \
-       printk("%s:%d: bad pgd %08x.\n", __FILE__, __LINE__, pgd_val(e))
+       printk("%s:%d: bad pgd %08lx.\n", __FILE__, __LINE__, pgd_val(e))
  
  extern pgd_t swapper_pg_dir[];
  
  extern void paging_init(void);
  
-/*
- * Because the huge pgtables are only 2 level, they can take
- * at most around 4M, much less than one hugepage which the
- * process is presumably entitled to use.  So we don't bother
- * freeing up the pagetables on unmap, and wait until
- * destroy_context() to clean up the lot.
- */
+#ifdef CONFIG_HUGETLB_PAGE
  #define hugetlb_free_pgd_range(tlb, addr, end, floor, ceiling) \
-                                               do { } while (0)
+       free_pgd_range(tlb, addr, end, floor, ceiling)
+#endif
  
  /*
   * This gets called at the end of handling a page fault, when
diff --git a/include/asm-ppc64/pmc.h b/include/asm-ppc64/pmc.h

index c924748c0beaa36e9ed4502f31a6f8772766e529..d1d297dbccfec062fde2ae8011ebc32fc4617d33 100644 (file)
--- a/include/asm-ppc64/pmc.h
+++ b/include/asm-ppc64/pmc.h
@@ -26,4 +26,6 @@ typedef void (*perf_irq_t)(struct pt_regs *);
  int reserve_pmc_hardware(perf_irq_t new_perf_irq);
  void release_pmc_hardware(void);
  
+void power4_enable_pmcs(void);
+
  #endif /* _PPC64_PMC_H */
diff --git a/include/asm-ppc64/poll.h b/include/asm-ppc64/poll.h

deleted file mode 100644 (file)

index 370fa3b..0000000
--- a/include/asm-ppc64/poll.h
+++ /dev/null
@@ -1,32 +0,0 @@
-#ifndef __PPC64_POLL_H
-#define __PPC64_POLL_H
-
-/*
- * Copyright (C) 2001 PPC64 Team, IBM Corp
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
- */
-
-#define POLLIN         0x0001
-#define POLLPRI                0x0002
-#define POLLOUT                0x0004
-#define POLLERR                0x0008
-#define POLLHUP                0x0010
-#define POLLNVAL       0x0020
-#define POLLRDNORM     0x0040
-#define POLLRDBAND     0x0080
-#define POLLWRNORM     0x0100
-#define POLLWRBAND     0x0200
-#define POLLMSG                0x0400
-#define POLLREMOVE     0x1000
-
-struct pollfd {
-       int fd;
-       short events;
-       short revents;
-};
-
-#endif /* __PPC64_POLL_H */
diff --git a/include/asm-ppc64/processor.h b/include/asm-ppc64/processor.h

index 352306cfb579909177da4fec25492fd8b53adb2b..7bd4796f1236c5efe6f7c881ab0aa18a141a1fd8 100644 (file)
--- a/include/asm-ppc64/processor.h
+++ b/include/asm-ppc64/processor.h
@@ -268,6 +268,7 @@
  #define PV_970FX       0x003C
  #define        PV_630          0x0040
  #define        PV_630p         0x0041
+#define        PV_970MP        0x0044
  #define        PV_BE           0x0070
  
  /* Platforms supported by PPC64 */
@@ -382,8 +383,8 @@ extern long kernel_thread(int (*fn)(void *), void *arg, unsigned long flags);
  extern struct task_struct *last_task_used_math;
  extern struct task_struct *last_task_used_altivec;
  
-/* 64-bit user address space is 41-bits (2TBs user VM) */
-#define TASK_SIZE_USER64 (0x0000020000000000UL)
+/* 64-bit user address space is 44-bits (16TB user VM) */
+#define TASK_SIZE_USER64 (0x0000100000000000UL)
  
  /* 
   * 32-bit user address space is 4GB - 1 page 
diff --git a/include/asm-ppc64/prom.h b/include/asm-ppc64/prom.h

index 04b1a84f7ca390c5f6c3ad6f5ea9bb22f44f2d40..dc5330b3950963266887091034cb82b53b7e1135 100644 (file)
--- a/include/asm-ppc64/prom.h
+++ b/include/asm-ppc64/prom.h
@@ -22,13 +22,15 @@
  #define RELOC(x)        (*PTRRELOC(&(x)))
  
  /* Definitions used by the flattened device tree */
-#define OF_DT_HEADER           0xd00dfeed      /* 4: version, 4: total size */
-#define OF_DT_BEGIN_NODE       0x1             /* Start node: full name */
+#define OF_DT_HEADER           0xd00dfeed      /* marker */
+#define OF_DT_BEGIN_NODE       0x1             /* Start of node, full name */
  #define OF_DT_END_NODE         0x2             /* End node */
-#define OF_DT_PROP             0x3             /* Property: name off, size, content */
+#define OF_DT_PROP             0x3             /* Property: name off, size,
+                                                * content */
+#define OF_DT_NOP              0x4             /* nop */
  #define OF_DT_END              0x9
  
-#define OF_DT_VERSION          1
+#define OF_DT_VERSION          0x10
  
  /*
   * This is what gets passed to the kernel by prom_init or kexec
@@ -54,7 +56,9 @@ struct boot_param_header
         u32     version;                /* format version */
         u32     last_comp_version;      /* last compatible version */
         /* version 2 fields below */
-       u32     boot_cpuid_phys;        /* Which physical CPU id we're booting on */
+       u32     boot_cpuid_phys;        /* Physical CPU id we're booting on */
+       /* version 3 fields below */
+       u32     dt_strings_size;        /* size of the DT strings block */
  };
  
  
diff --git a/include/asm-ppc64/resource.h b/include/asm-ppc64/resource.h

deleted file mode 100644 (file)

index add031b..0000000
--- a/include/asm-ppc64/resource.h
+++ /dev/null
@@ -1,6 +0,0 @@
-#ifndef _PPC64_RESOURCE_H
-#define _PPC64_RESOURCE_H
-
-#include <asm-generic/resource.h>
-
-#endif /* _PPC64_RESOURCE_H */
diff --git a/include/asm-ppc64/shmparam.h b/include/asm-ppc64/shmparam.h

deleted file mode 100644 (file)

index b2825ce..0000000
--- a/include/asm-ppc64/shmparam.h
+++ /dev/null
@@ -1,13 +0,0 @@
-#ifndef _PPC64_SHMPARAM_H
-#define _PPC64_SHMPARAM_H
-
-/*
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
- */
-
-#define        SHMLBA PAGE_SIZE                 /* attach addr a multiple of this */
-
-#endif /* _PPC64_SHMPARAM_H */
diff --git a/include/asm-ppc64/socket.h b/include/asm-ppc64/socket.h

index 59e00dfc8b8ec48d4e566bd4d20bfd0347740251..9e1af8eb2d965be984631ee2654060d0a7a997fb 100644 (file)
--- a/include/asm-ppc64/socket.h
+++ b/include/asm-ppc64/socket.h
@@ -21,6 +21,8 @@
  #define SO_BROADCAST   6
  #define SO_SNDBUF      7
  #define SO_RCVBUF      8
+#define SO_SNDBUFFORCE 32
+#define SO_RCVBUFFORCE 33
  #define SO_KEEPALIVE   9
  #define SO_OOBINLINE   10
  #define SO_NO_CHECK    11
diff --git a/include/asm-ppc64/string.h b/include/asm-ppc64/string.h

deleted file mode 100644 (file)

index eeca68e..0000000
--- a/include/asm-ppc64/string.h
+++ /dev/null
@@ -1,35 +0,0 @@
-#ifndef _PPC64_STRING_H_
-#define _PPC64_STRING_H_
-
-/*
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
- */
-
-#define __HAVE_ARCH_STRCPY
-#define __HAVE_ARCH_STRNCPY
-#define __HAVE_ARCH_STRLEN
-#define __HAVE_ARCH_STRCMP
-#define __HAVE_ARCH_STRCAT
-#define __HAVE_ARCH_MEMSET
-#define __HAVE_ARCH_MEMCPY
-#define __HAVE_ARCH_MEMMOVE
-#define __HAVE_ARCH_MEMCMP
-#define __HAVE_ARCH_MEMCHR
-
-extern int strcasecmp(const char *, const char *);
-extern int strncasecmp(const char *, const char *, int);
-extern char * strcpy(char *,const char *);
-extern char * strncpy(char *,const char *, __kernel_size_t);
-extern __kernel_size_t strlen(const char *);
-extern int strcmp(const char *,const char *);
-extern char * strcat(char *, const char *);
-extern void * memset(void *,int,__kernel_size_t);
-extern void * memcpy(void *,const void *,__kernel_size_t);
-extern void * memmove(void *,const void *,__kernel_size_t);
-extern int memcmp(const void *,const void *,__kernel_size_t);
-extern void * memchr(const void *,int,__kernel_size_t);
-
-#endif /* _PPC64_STRING_H_ */
diff --git a/include/asm-ppc64/system.h b/include/asm-ppc64/system.h

index 98d120ca8a91dc0be5043d2eedc7f8be082bd826..b9e1835351e98d7874f1c9f06cc7a4fccc3f4904 100644 (file)
--- a/include/asm-ppc64/system.h
+++ b/include/asm-ppc64/system.h
@@ -88,7 +88,7 @@ DEBUGGER_BOILERPLATE(debugger_dabr_match)
  DEBUGGER_BOILERPLATE(debugger_fault_handler)
  
  #ifdef CONFIG_XMON
-extern void xmon_init(void);
+extern void xmon_init(int enable);
  #endif
  
  #else
@@ -302,5 +302,7 @@ __cmpxchg(volatile void *ptr, unsigned long old, unsigned long new, int size)
  
  #define arch_align_stack(x) (x)
  
+extern unsigned long reloc_offset(void);
+
  #endif /* __KERNEL__ */
  #endif
diff --git a/include/asm-ppc64/topology.h b/include/asm-ppc64/topology.h

index fcdcfd26a26bcbc41b6ed18cfd4e3c98bade84f4..1e9b1907323069f2acc3e446f32a43fcd909eb4a 100644 (file)
--- a/include/asm-ppc64/topology.h
+++ b/include/asm-ppc64/topology.h
@@ -33,6 +33,7 @@ static inline int node_to_first_cpu(int node)
         return first_cpu(tmp);
  }
  
+#define pcibus_to_node(node)    (-1)
  #define pcibus_to_cpumask(bus) (cpu_online_map)
  
  #define nr_cpus_node(node)     (nr_cpus_in_node[node])
@@ -59,8 +60,10 @@ static inline int node_to_first_cpu(int node)
         .nr_balance_failed      = 0,                    \
  }
  
-#endif /* CONFIG_NUMA */
+#else
  
  #include <asm-generic/topology.h>
  
+#endif /* CONFIG_NUMA */
+
  #endif /* _ASM_PPC64_TOPOLOGY_H */
diff --git a/include/asm-ppc64/unaligned.h b/include/asm-ppc64/unaligned.h

deleted file mode 100644 (file)

index 636e93c..0000000
--- a/include/asm-ppc64/unaligned.h
+++ /dev/null
@@ -1,21 +0,0 @@
-#ifndef __PPC64_UNALIGNED_H
-#define __PPC64_UNALIGNED_H
-
-/*
- * The PowerPC can do unaligned accesses itself in big endian mode. 
- *
- * The strange macros are there to make sure these can't
- * be misused in a way that makes them not work on other
- * architectures where unaligned accesses aren't as simple.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
- */
-
-#define get_unaligned(ptr) (*(ptr))
-
-#define put_unaligned(val, ptr) ((void)( *(ptr) = (val) ))
-
-#endif /* __PPC64_UNALIGNED_H */
diff --git a/include/asm-ppc64/vio.h b/include/asm-ppc64/vio.h

index 20cd98ee63378c20944c37bceb917ab429d767b6..03f1b95f433bbc7e2e1eda27f00538f7c4ee826a 100644 (file)
--- a/include/asm-ppc64/vio.h
+++ b/include/asm-ppc64/vio.h
@@ -19,13 +19,15 @@
  #include <linux/errno.h>
  #include <linux/device.h>
  #include <linux/dma-mapping.h>
+#include <linux/mod_devicetable.h>
+
  #include <asm/hvcall.h>
-#include <asm/prom.h>
  #include <asm/scatterlist.h>
-/* 
+
+/*
   * Architecture-specific constants for drivers to
   * extract attributes of the device using vio_get_attribute()
-*/
+ */
  #define VETH_MAC_ADDR "local-mac-address"
  #define VETH_MCAST_FILTER_SIZE "ibm,mac-address-filters"
  
@@ -37,64 +39,65 @@
  #define VIO_IRQ_DISABLE                0UL
  #define VIO_IRQ_ENABLE         1UL
  
-struct vio_dev;
-struct vio_driver;
-struct vio_device_id;
  struct iommu_table;
  
-int vio_register_driver(struct vio_driver *drv);
-void vio_unregister_driver(struct vio_driver *drv);
-
-#ifdef CONFIG_PPC_PSERIES
-struct vio_dev * __devinit vio_register_device_node(
-               struct device_node *node_vdev);
-#endif
-void __devinit vio_unregister_device(struct vio_dev *dev);
-struct vio_dev *vio_find_node(struct device_node *vnode);
-
-const void * vio_get_attribute(struct vio_dev *vdev, void* which, int* length);
-int vio_get_irq(struct vio_dev *dev);
-int vio_enable_interrupts(struct vio_dev *dev);
-int vio_disable_interrupts(struct vio_dev *dev);
-
-extern struct dma_mapping_ops vio_dma_ops;
-
-extern struct bus_type vio_bus_type;
-
-struct vio_device_id {
+/*
+ * The vio_dev structure is used to describe virtual I/O devices.
+ */
+struct vio_dev {
+       struct iommu_table *iommu_table;     /* vio_map_* uses this */
+       char *name;
         char *type;
-       char *compat;
+       uint32_t unit_address;
+       unsigned int irq;
+       struct device dev;
  };
  
  struct vio_driver {
         struct list_head node;
         char *name;
-       const struct vio_device_id *id_table;   /* NULL if wants all devices */
-       int  (*probe)  (struct vio_dev *dev, const struct vio_device_id *id);   /* New device inserted */
-       int (*remove) (struct vio_dev *dev);    /* Device removed (NULL if not a hot-plug capable driver) */
+       const struct vio_device_id *id_table;
+       int (*probe)(struct vio_dev *dev, const struct vio_device_id *id);
+       int (*remove)(struct vio_dev *dev);
         unsigned long driver_data;
-
         struct device_driver driver;
  };
  
+struct vio_bus_ops {
+       int (*match)(const struct vio_device_id *id, const struct vio_dev *dev);
+       void (*unregister_device)(struct vio_dev *);
+       void (*release_device)(struct device *);
+};
+
+extern struct dma_mapping_ops vio_dma_ops;
+extern struct bus_type vio_bus_type;
+extern struct vio_dev vio_bus_device;
+
+extern int vio_register_driver(struct vio_driver *drv);
+extern void vio_unregister_driver(struct vio_driver *drv);
+
+extern struct vio_dev * __devinit vio_register_device(struct vio_dev *viodev);
+extern void __devinit vio_unregister_device(struct vio_dev *dev);
+
+extern int vio_bus_init(struct vio_bus_ops *);
+
+#ifdef CONFIG_PPC_PSERIES
+struct device_node;
+
+extern struct vio_dev * __devinit vio_register_device_node(
+               struct device_node *node_vdev);
+extern struct vio_dev *vio_find_node(struct device_node *vnode);
+extern const void *vio_get_attribute(struct vio_dev *vdev, void *which,
+               int *length);
+extern int vio_enable_interrupts(struct vio_dev *dev);
+extern int vio_disable_interrupts(struct vio_dev *dev);
+#endif
+
  static inline struct vio_driver *to_vio_driver(struct device_driver *drv)
  {
         return container_of(drv, struct vio_driver, driver);
  }
  
-/*
- * The vio_dev structure is used to describe virtual I/O devices.
- */
-struct vio_dev {
-       struct iommu_table *iommu_table;     /* vio_map_* uses this */
-       char *name;
-       char *type;
-       uint32_t unit_address;  
-       unsigned int irq;
-
-       struct device dev;
-};
-
  static inline struct vio_dev *to_vio_dev(struct device *dev)
  {
         return container_of(dev, struct vio_dev, dev);
diff --git a/include/asm-ppc64/xics.h b/include/asm-ppc64/xics.h

index 0c45e14e26ca66d219035da90519703b7dbdac09..1092af55d7071a46f49fe11bbd501d2015496221 100644 (file)
--- a/include/asm-ppc64/xics.h
+++ b/include/asm-ppc64/xics.h
@@ -17,7 +17,7 @@
  void xics_init_IRQ(void);
  int xics_get_irq(struct pt_regs *);
  void xics_setup_cpu(void);
-void xics_teardown_cpu(void);
+void xics_teardown_cpu(int secondary);
  void xics_cause_IPI(int cpu);
  void xics_request_IPIs(void);
  void xics_migrate_irqs_away(void);
diff --git a/include/asm-ppc64/xor.h b/include/asm-ppc64/xor.h

deleted file mode 100644 (file)

index c82eb12..0000000
--- a/include/asm-ppc64/xor.h
+++ /dev/null
@@ -1 +0,0 @@
-#include <asm-generic/xor.h>
diff --git a/include/asm-s390/socket.h b/include/asm-s390/socket.h

index 0e96eeca4e6b748233d9ca842e254e466bef0075..15a5298c8744bf1401be8b0cb736b406b410d55d 100644 (file)
--- a/include/asm-s390/socket.h
+++ b/include/asm-s390/socket.h
@@ -22,6 +22,8 @@
  #define SO_BROADCAST   6
  #define SO_SNDBUF      7
  #define SO_RCVBUF      8
+#define SO_SNDBUFFORCE 32
+#define SO_RCVBUFFORCE 33
  #define SO_KEEPALIVE   9
  #define SO_OOBINLINE   10
  #define SO_NO_CHECK    11
diff --git a/include/asm-s390/uaccess.h b/include/asm-s390/uaccess.h

index a7f43a251f81b5e4feb14e3c84428f4a99d1bd82..3e3bfe6a8fa84188f86f1acef2d17e84814771c3 100644 (file)
--- a/include/asm-s390/uaccess.h
+++ b/include/asm-s390/uaccess.h
@@ -149,11 +149,11 @@ struct exception_table_entry
  })
  #endif
  
-#ifndef __CHECKER__
  #define __put_user(x, ptr) \
  ({                                                             \
         __typeof__(*(ptr)) __x = (x);                           \
         int __pu_err;                                           \
+        __chk_user_ptr(ptr);                                    \
         switch (sizeof (*(ptr))) {                              \
         case 1:                                                 \
         case 2:                                                 \
@@ -167,14 +167,6 @@ struct exception_table_entry
          }                                                      \
         __pu_err;                                               \
  })
-#else
-#define __put_user(x, ptr)                     \
-({                                             \
-       void __user *p;                         \
-       p = (ptr);                              \
-       0;                                      \
-})
-#endif
  
  #define put_user(x, ptr)                                       \
  ({                                                             \
@@ -213,11 +205,11 @@ extern int __put_user_bad(void) __attribute__((noreturn));
  })
  #endif
  
-#ifndef __CHECKER__
  #define __get_user(x, ptr)                                     \
  ({                                                             \
         __typeof__(*(ptr)) __x;                                 \
         int __gu_err;                                           \
+        __chk_user_ptr(ptr);                                    \
         switch (sizeof(*(ptr))) {                               \
         case 1:                                                 \
         case 2:                                                 \
@@ -232,15 +224,6 @@ extern int __put_user_bad(void) __attribute__((noreturn));
         (x) = __x;                                              \
         __gu_err;                                               \
  })
-#else
-#define __get_user(x, ptr)                     \
-({                                             \
-       void __user *p;                         \
-       p = (ptr);                              \
-       0;                                      \
-})
-#endif
-
  
  #define get_user(x, ptr)                                       \
  ({                                                             \
diff --git a/include/asm-s390/unistd.h b/include/asm-s390/unistd.h

index 363db45f8d074314ed9603bc8c26466aeb270134..221e965da9242996e92e755ff13c968905f7cb24 100644 (file)
--- a/include/asm-s390/unistd.h
+++ b/include/asm-s390/unistd.h
@@ -274,8 +274,13 @@
  #define __NR_request_key       279
  #define __NR_keyctl            280
  #define __NR_waitid            281
+#define __NR_ioprio_set                282
+#define __NR_ioprio_get                283
+#define __NR_inotify_init      284
+#define __NR_inotify_add_watch 285
+#define __NR_inotify_rm_watch  286
  
-#define NR_syscalls 282
+#define NR_syscalls 287
  
  /* 
   * There are some system calls that are not present on 64 bit, some
diff --git a/include/asm-sh/socket.h b/include/asm-sh/socket.h

index dde696c3b4c7660d59f5e6a3809a003968f5c07b..553904ff9336c2e26304810e0f0db20e0f4a4220 100644 (file)
--- a/include/asm-sh/socket.h
+++ b/include/asm-sh/socket.h
@@ -14,6 +14,8 @@
  #define SO_BROADCAST   6
  #define SO_SNDBUF      7
  #define SO_RCVBUF      8
+#define SO_RCVBUFFORCE 32
+#define SO_SNDBUFFORCE 33
  #define SO_KEEPALIVE   9
  #define SO_OOBINLINE   10
  #define SO_NO_CHECK    11
diff --git a/include/asm-sh/unistd.h b/include/asm-sh/unistd.h

index 245447081f0d42693bfb52b47d216fc5ef26b61f..ea89e8f223eae1264e6a8fb29e7fb92e6eec6a97 100644 (file)
--- a/include/asm-sh/unistd.h
+++ b/include/asm-sh/unistd.h
@@ -295,8 +295,14 @@
  #define __NR_add_key           285
  #define __NR_request_key       286
  #define __NR_keyctl            287
+#define __NR_ioprio_set                288
+#define __NR_ioprio_get                289
+#define __NR_inotify_init      290
+#define __NR_inotify_add_watch 291
+#define __NR_inotify_rm_watch  292
  
-#define NR_syscalls 288
+
+#define NR_syscalls 293
  
  /* user-visible error numbers are in the range -1 - -124: see <asm-sh/errno.h> */
  
@@ -406,7 +412,7 @@ register long __sc6 __asm__ ("r6") = (long) arg3; \
  register long __sc7 __asm__ ("r7") = (long) arg4; \
  register long __sc0 __asm__ ("r0") = (long) arg5; \
  register long __sc1 __asm__ ("r1") = (long) arg6; \
-__asm__ __volatile__ ("trapa   #0x15" \
+__asm__ __volatile__ ("trapa   #0x16" \
         : "=z" (__sc0) \
         : "0" (__sc0), "r" (__sc4), "r" (__sc5), "r" (__sc6), "r" (__sc7),  \
           "r" (__sc3), "r" (__sc1) \
diff --git a/include/asm-sh64/unistd.h b/include/asm-sh64/unistd.h

index 95f0b130405c420a23d52618fa8dc854cb4e245b..2a1cfa404ea4b1b432c41eef1395882e4aabf465 100644 (file)
--- a/include/asm-sh64/unistd.h
+++ b/include/asm-sh64/unistd.h
@@ -338,8 +338,13 @@
  #define __NR_add_key           313
  #define __NR_request_key       314
  #define __NR_keyctl            315
+#define __NR_ioprio_set                316
+#define __NR_ioprio_get                317
+#define __NR_inotify_init      318
+#define __NR_inotify_add_watch 319
+#define __NR_inotify_rm_watch  320
  
-#define NR_syscalls 316
+#define NR_syscalls 321
  
  /* user-visible error numbers are in the range -1 - -125: see <asm-sh64/errno.h> */
  
diff --git a/include/asm-sparc/processor.h b/include/asm-sparc/processor.h

index 32c9699367cf527c27348055d0f841879afc350a..5a7a1a8d29ac9ccbf09ea9fc5df220c4744432cb 100644 (file)
--- a/include/asm-sparc/processor.h
+++ b/include/asm-sparc/processor.h
@@ -19,7 +19,6 @@
  #include <asm/ptrace.h>
  #include <asm/head.h>
  #include <asm/signal.h>
-#include <asm/segment.h>
  #include <asm/btfixup.h>
  #include <asm/page.h>
  
diff --git a/include/asm-sparc/segment.h b/include/asm-sparc/segment.h

deleted file mode 100644 (file)

index a1b7ffc..0000000
--- a/include/asm-sparc/segment.h
+++ /dev/null
@@ -1,6 +0,0 @@
-#ifndef __SPARC_SEGMENT_H
-#define __SPARC_SEGMENT_H
-
-/* Only here because we have some old header files that expect it.. */
-
-#endif
diff --git a/include/asm-sparc/socket.h b/include/asm-sparc/socket.h

index c1154e3ecfdf53b3cbccffa88c032194e10316bc..09575b608adbb584b644adc3183eadacafe738bd 100644 (file)
--- a/include/asm-sparc/socket.h
+++ b/include/asm-sparc/socket.h
@@ -29,6 +29,8 @@
  
  #define SO_SNDBUF      0x1001
  #define SO_RCVBUF      0x1002
+#define SO_SNDBUFFORCE 0x100a
+#define SO_RCVBUFFORCE 0x100b
  #define SO_ERROR       0x1007
  #define SO_TYPE                0x1008
  
diff --git a/include/asm-sparc/system.h b/include/asm-sparc/system.h

index 898562ebe94c6d8e7abfbc75d8d9f22e0ffd8dc1..3557781a4bfd7082bb8bbe8dd1e3a5c8d2bc1f1d 100644 (file)
--- a/include/asm-sparc/system.h
+++ b/include/asm-sparc/system.h
@@ -9,7 +9,6 @@
  #include <linux/threads.h>     /* NR_CPUS */
  #include <linux/thread_info.h>
  
-#include <asm/segment.h>
  #include <asm/page.h>
  #include <asm/psr.h>
  #include <asm/ptrace.h>
diff --git a/include/asm-sparc64/atomic.h b/include/asm-sparc64/atomic.h

index d80f3379669b4625e1be10b21c7498d9cbfb6080..e175afcf2cdeb852d712a40f51b87678dbb26521 100644 (file)
--- a/include/asm-sparc64/atomic.h
+++ b/include/asm-sparc64/atomic.h
@@ -72,10 +72,10 @@ extern int atomic64_sub_ret(int, atomic64_t *);
  
  /* Atomic operations are already serializing */
  #ifdef CONFIG_SMP
-#define smp_mb__before_atomic_dec()    membar("#StoreLoad | #LoadLoad")
-#define smp_mb__after_atomic_dec()     membar("#StoreLoad | #StoreStore")
-#define smp_mb__before_atomic_inc()    membar("#StoreLoad | #LoadLoad")
-#define smp_mb__after_atomic_inc()     membar("#StoreLoad | #StoreStore")
+#define smp_mb__before_atomic_dec()    membar_storeload_loadload();
+#define smp_mb__after_atomic_dec()     membar_storeload_storestore();
+#define smp_mb__before_atomic_inc()    membar_storeload_loadload();
+#define smp_mb__after_atomic_inc()     membar_storeload_storestore();
  #else
  #define smp_mb__before_atomic_dec()    barrier()
  #define smp_mb__after_atomic_dec()     barrier()
diff --git a/include/asm-sparc64/bitops.h b/include/asm-sparc64/bitops.h

index 9c5e71970287fbaebb37297315be05dcce55bcb3..6388b8376c50227bc8609d87a40e52451f74a435 100644 (file)
--- a/include/asm-sparc64/bitops.h
+++ b/include/asm-sparc64/bitops.h
@@ -72,8 +72,8 @@ static inline int __test_and_change_bit(int nr, volatile unsigned long *addr)
  }
  
  #ifdef CONFIG_SMP
-#define smp_mb__before_clear_bit()     membar("#StoreLoad | #LoadLoad")
-#define smp_mb__after_clear_bit()      membar("#StoreLoad | #StoreStore")
+#define smp_mb__before_clear_bit()     membar_storeload_loadload()
+#define smp_mb__after_clear_bit()      membar_storeload_storestore()
  #else
  #define smp_mb__before_clear_bit()     barrier()
  #define smp_mb__after_clear_bit()      barrier()
diff --git a/include/asm-sparc64/processor.h b/include/asm-sparc64/processor.h

index d0bee2413560147a62b4f7f1af7ea7f781e0b847..3169f3e2237efb91769521f0b66a9b898d7df060 100644 (file)
--- a/include/asm-sparc64/processor.h
+++ b/include/asm-sparc64/processor.h
@@ -18,7 +18,6 @@
  #include <asm/a.out.h>
  #include <asm/pstate.h>
  #include <asm/ptrace.h>
-#include <asm/segment.h>
  #include <asm/page.h>
  
  /* The sparc has no problems with write protection */
diff --git a/include/asm-sparc64/segment.h b/include/asm-sparc64/segment.h

deleted file mode 100644 (file)

index b03e709..0000000
--- a/include/asm-sparc64/segment.h
+++ /dev/null
@@ -1,6 +0,0 @@
-#ifndef __SPARC64_SEGMENT_H
-#define __SPARC64_SEGMENT_H
-
-/* Only here because we have some old header files that expect it.. */
-
-#endif
diff --git a/include/asm-sparc64/sfafsr.h b/include/asm-sparc64/sfafsr.h

new file mode 100644 (file)

index 0000000..2f792c2
--- /dev/null
+++ b/include/asm-sparc64/sfafsr.h
@@ -0,0 +1,82 @@
+#ifndef _SPARC64_SFAFSR_H
+#define _SPARC64_SFAFSR_H
+
+#include <asm/const.h>
+
+/* Spitfire Asynchronous Fault Status register, ASI=0x4C VA<63:0>=0x0 */
+
+#define SFAFSR_ME              (_AC(1,UL) << SFAFSR_ME_SHIFT)
+#define SFAFSR_ME_SHIFT                32
+#define SFAFSR_PRIV            (_AC(1,UL) << SFAFSR_PRIV_SHIFT)
+#define SFAFSR_PRIV_SHIFT      31
+#define SFAFSR_ISAP            (_AC(1,UL) << SFAFSR_ISAP_SHIFT)
+#define SFAFSR_ISAP_SHIFT      30
+#define SFAFSR_ETP             (_AC(1,UL) << SFAFSR_ETP_SHIFT)
+#define SFAFSR_ETP_SHIFT       29
+#define SFAFSR_IVUE            (_AC(1,UL) << SFAFSR_IVUE_SHIFT)
+#define SFAFSR_IVUE_SHIFT      28
+#define SFAFSR_TO              (_AC(1,UL) << SFAFSR_TO_SHIFT)
+#define SFAFSR_TO_SHIFT                27
+#define SFAFSR_BERR            (_AC(1,UL) << SFAFSR_BERR_SHIFT)
+#define SFAFSR_BERR_SHIFT      26
+#define SFAFSR_LDP             (_AC(1,UL) << SFAFSR_LDP_SHIFT)
+#define SFAFSR_LDP_SHIFT       25
+#define SFAFSR_CP              (_AC(1,UL) << SFAFSR_CP_SHIFT)
+#define SFAFSR_CP_SHIFT                24
+#define SFAFSR_WP              (_AC(1,UL) << SFAFSR_WP_SHIFT)
+#define SFAFSR_WP_SHIFT                23
+#define SFAFSR_EDP             (_AC(1,UL) << SFAFSR_EDP_SHIFT)
+#define SFAFSR_EDP_SHIFT       22
+#define SFAFSR_UE              (_AC(1,UL) << SFAFSR_UE_SHIFT)
+#define SFAFSR_UE_SHIFT                21
+#define SFAFSR_CE              (_AC(1,UL) << SFAFSR_CE_SHIFT)
+#define SFAFSR_CE_SHIFT                20
+#define SFAFSR_ETS             (_AC(0xf,UL) << SFAFSR_ETS_SHIFT)
+#define SFAFSR_ETS_SHIFT       16
+#define SFAFSR_PSYND           (_AC(0xffff,UL) << SFAFSR_PSYND_SHIFT)
+#define SFAFSR_PSYND_SHIFT     0
+
+/* UDB Error Register, ASI=0x7f VA<63:0>=0x0(High),0x18(Low) for read
+ *                     ASI=0x77 VA<63:0>=0x0(High),0x18(Low) for write
+ */
+
+#define UDBE_UE                        (_AC(1,UL) << 9)
+#define UDBE_CE                        (_AC(1,UL) << 8)
+#define UDBE_E_SYNDR           (_AC(0xff,UL) << 0)
+
+/* The trap handlers for asynchronous errors encode the AFSR and
+ * other pieces of information into a 64-bit argument for C code
+ * encoded as follows:
+ *
+ * -----------------------------------------------
+ * |  UDB_H  |  UDB_L  | TL>1  |  TT  |   AFSR   |
+ * -----------------------------------------------
+ *  63     54 53     44    42   41  33 32       0
+ *
+ * The AFAR is passed in unchanged.
+ */
+#define SFSTAT_UDBH_MASK       (_AC(0x3ff,UL) << SFSTAT_UDBH_SHIFT)
+#define SFSTAT_UDBH_SHIFT      54
+#define SFSTAT_UDBL_MASK       (_AC(0x3ff,UL) << SFSTAT_UDBH_SHIFT)
+#define SFSTAT_UDBL_SHIFT      44
+#define SFSTAT_TL_GT_ONE       (_AC(1,UL) << SFSTAT_TL_GT_ONE_SHIFT)
+#define SFSTAT_TL_GT_ONE_SHIFT 42
+#define SFSTAT_TRAP_TYPE       (_AC(0x1FF,UL) << SFSTAT_TRAP_TYPE_SHIFT)
+#define SFSTAT_TRAP_TYPE_SHIFT 33
+#define SFSTAT_AFSR_MASK       (_AC(0x1ffffffff,UL) << SFSTAT_AFSR_SHIFT)
+#define SFSTAT_AFSR_SHIFT      0
+
+/* ESTATE Error Enable Register, ASI=0x4b VA<63:0>=0x0 */
+#define ESTATE_ERR_CE          0x1 /* Correctable errors                    */
+#define ESTATE_ERR_NCE         0x2 /* TO, BERR, LDP, ETP, EDP, WP, UE, IVUE */
+#define ESTATE_ERR_ISAP                0x4 /* System address parity error           */
+#define ESTATE_ERR_ALL         (ESTATE_ERR_CE | \
+                                ESTATE_ERR_NCE | \
+                                ESTATE_ERR_ISAP)
+
+/* The various trap types that report using the above state. */
+#define TRAP_TYPE_IAE          0x09 /* Instruction Access Error             */
+#define TRAP_TYPE_DAE          0x32 /* Data Access Error                    */
+#define TRAP_TYPE_CEE          0x63 /* Correctable ECC Error                */
+
+#endif /* _SPARC64_SFAFSR_H */
diff --git a/include/asm-sparc64/socket.h b/include/asm-sparc64/socket.h

index 865547a23908120b7e8c8eda37a08b78c1d047a9..59987dad3359912059a37039512dafc807c5d2df 100644 (file)
--- a/include/asm-sparc64/socket.h
+++ b/include/asm-sparc64/socket.h
@@ -29,6 +29,8 @@
  
  #define SO_SNDBUF      0x1001
  #define SO_RCVBUF      0x1002
+#define SO_SNDBUFFORCE 0x100a
+#define SO_RCVBUFFORCE 0x100b
  #define SO_ERROR       0x1007
  #define SO_TYPE                0x1008
  
diff --git a/include/asm-sparc64/spinlock.h b/include/asm-sparc64/spinlock.h

index 9cb93a5c2b4feac61ab2245c9ca17cb74997ded0..a02c4370eb42e0d1c8f6043a25f900e0ea03ff2c 100644 (file)
--- a/include/asm-sparc64/spinlock.h
+++ b/include/asm-sparc64/spinlock.h
@@ -43,7 +43,7 @@ typedef struct {
  #define spin_is_locked(lp)  ((lp)->lock != 0)
  
  #define spin_unlock_wait(lp)   \
-do {   membar("#LoadLoad");    \
+do {   rmb();                  \
  } while((lp)->lock)
  
  static inline void _raw_spin_lock(spinlock_t *lock)
@@ -129,15 +129,18 @@ typedef struct {
  #define spin_is_locked(__lock) ((__lock)->lock != 0)
  #define spin_unlock_wait(__lock)       \
  do { \
-       membar("#LoadLoad"); \
+       rmb(); \
  } while((__lock)->lock)
  
-extern void _do_spin_lock (spinlock_t *lock, char *str);
-extern void _do_spin_unlock (spinlock_t *lock);
-extern int _do_spin_trylock (spinlock_t *lock);
+extern void _do_spin_lock(spinlock_t *lock, char *str, unsigned long caller);
+extern void _do_spin_unlock(spinlock_t *lock);
+extern int _do_spin_trylock(spinlock_t *lock, unsigned long caller);
  
-#define _raw_spin_trylock(lp)  _do_spin_trylock(lp)
-#define _raw_spin_lock(lock)   _do_spin_lock(lock, "spin_lock")
+#define _raw_spin_trylock(lp)  \
+       _do_spin_trylock(lp, (unsigned long) __builtin_return_address(0))
+#define _raw_spin_lock(lock)   \
+       _do_spin_lock(lock, "spin_lock", \
+                     (unsigned long) __builtin_return_address(0))
  #define _raw_spin_unlock(lock) _do_spin_unlock(lock)
  #define _raw_spin_lock_flags(lock, flags) _raw_spin_lock(lock)
  
@@ -279,37 +282,41 @@ typedef struct {
  #define RW_LOCK_UNLOCKED       (rwlock_t) { 0, 0, 0xff, { } }
  #define rwlock_init(lp) do { *(lp) = RW_LOCK_UNLOCKED; } while(0)
  
-extern void _do_read_lock(rwlock_t *rw, char *str);
-extern void _do_read_unlock(rwlock_t *rw, char *str);
-extern void _do_write_lock(rwlock_t *rw, char *str);
-extern void _do_write_unlock(rwlock_t *rw);
-extern int _do_write_trylock(rwlock_t *rw, char *str);
+extern void _do_read_lock(rwlock_t *rw, char *str, unsigned long caller);
+extern void _do_read_unlock(rwlock_t *rw, char *str, unsigned long caller);
+extern void _do_write_lock(rwlock_t *rw, char *str, unsigned long caller);
+extern void _do_write_unlock(rwlock_t *rw, unsigned long caller);
+extern int _do_write_trylock(rwlock_t *rw, char *str, unsigned long caller);
  
  #define _raw_read_lock(lock) \
  do {   unsigned long flags; \
         local_irq_save(flags); \
-       _do_read_lock(lock, "read_lock"); \
+       _do_read_lock(lock, "read_lock", \
+                     (unsigned long) __builtin_return_address(0)); \
         local_irq_restore(flags); \
  } while(0)
  
  #define _raw_read_unlock(lock) \
  do {   unsigned long flags; \
         local_irq_save(flags); \
-       _do_read_unlock(lock, "read_unlock"); \
+       _do_read_unlock(lock, "read_unlock", \
+                     (unsigned long) __builtin_return_address(0)); \
         local_irq_restore(flags); \
  } while(0)
  
  #define _raw_write_lock(lock) \
  do {   unsigned long flags; \
         local_irq_save(flags); \
-       _do_write_lock(lock, "write_lock"); \
+       _do_write_lock(lock, "write_lock", \
+                     (unsigned long) __builtin_return_address(0)); \
         local_irq_restore(flags); \
  } while(0)
  
  #define _raw_write_unlock(lock) \
  do {   unsigned long flags; \
         local_irq_save(flags); \
-       _do_write_unlock(lock); \
+       _do_write_unlock(lock, \
+                     (unsigned long) __builtin_return_address(0)); \
         local_irq_restore(flags); \
  } while(0)
  
@@ -317,7 +324,8 @@ do {        unsigned long flags; \
  ({     unsigned long flags; \
         int val; \
         local_irq_save(flags); \
-       val = _do_write_trylock(lock, "write_trylock"); \
+       val = _do_write_trylock(lock, "write_trylock", \
+                               (unsigned long) __builtin_return_address(0)); \
         local_irq_restore(flags); \
         val; \
  })
diff --git a/include/asm-sparc64/system.h b/include/asm-sparc64/system.h

index ee4bdfc6b88f3a7b047da2361bb117bfbe312581..5e94c05dc2fccf08d6d6fd4440adbb017fd41edf 100644 (file)
--- a/include/asm-sparc64/system.h
+++ b/include/asm-sparc64/system.h
@@ -28,6 +28,14 @@ enum sparc_cpu {
  #define ARCH_SUN4C_SUN4 0
  #define ARCH_SUN4 0
  
+extern void mb(void);
+extern void rmb(void);
+extern void wmb(void);
+extern void membar_storeload(void);
+extern void membar_storeload_storestore(void);
+extern void membar_storeload_loadload(void);
+extern void membar_storestore_loadstore(void);
+
  #endif
  
  #define setipl(__new_ipl) \
@@ -78,16 +86,11 @@ enum sparc_cpu {
  
  #define nop()          __asm__ __volatile__ ("nop")
  
-#define membar(type)   __asm__ __volatile__ ("membar " type : : : "memory")
-#define mb()           \
-       membar("#LoadLoad | #LoadStore | #StoreStore | #StoreLoad")
-#define rmb()          membar("#LoadLoad")
-#define wmb()          membar("#StoreStore")
  #define read_barrier_depends()         do { } while(0)
  #define set_mb(__var, __value) \
-       do { __var = __value; membar("#StoreLoad | #StoreStore"); } while(0)
+       do { __var = __value; membar_storeload_storestore(); } while(0)
  #define set_wmb(__var, __value) \
-       do { __var = __value; membar("#StoreStore"); } while(0)
+       do { __var = __value; wmb(); } while(0)
  
  #ifdef CONFIG_SMP
  #define smp_mb()       mb()
diff --git a/include/asm-sparc64/thread_info.h b/include/asm-sparc64/thread_info.h

index 352d9943661af3738a05c0b29748a8937692f44f..c94d8b3991bdde0cf93be99e258f8798298db7e3 100644 (file)
--- a/include/asm-sparc64/thread_info.h
+++ b/include/asm-sparc64/thread_info.h
@@ -68,6 +68,9 @@ struct thread_info {
  
         struct restart_block    restart_block;
  
+       struct pt_regs          *kern_una_regs;
+       unsigned int            kern_una_insn;
+
         unsigned long           fpregs[0] __attribute__ ((aligned(64)));
  };
  
@@ -103,6 +106,8 @@ struct thread_info {
  #define TI_PCR         0x00000490
  #define TI_CEE_STUFF   0x00000498
  #define TI_RESTART_BLOCK 0x000004a0
+#define TI_KUNA_REGS   0x000004c8
+#define TI_KUNA_INSN   0x000004d0
  #define TI_FPREGS      0x00000500
  
  /* We embed this in the uppermost byte of thread_info->flags */
diff --git a/include/asm-um/page.h b/include/asm-um/page.h

index 5afee8a8cdf39ea0ec3135feab5b45c269daccdd..f58aedadeb4e3d88e222dcfeccfc7745a3cba5d4 100644 (file)
--- a/include/asm-um/page.h
+++ b/include/asm-um/page.h
@@ -104,8 +104,8 @@ extern void *to_virt(unsigned long phys);
   * casting is the right thing, but 32-bit UML can't have 64-bit virtual
   * addresses
   */
-#define __pa(virt) to_phys((void *) (unsigned long) virt)
-#define __va(phys) to_virt((unsigned long) phys)
+#define __pa(virt) to_phys((void *) (unsigned long) (virt))
+#define __va(phys) to_virt((unsigned long) (phys))
  
  #define page_to_pfn(page) ((page) - mem_map)
  #define pfn_to_page(pfn) (mem_map + (pfn))
diff --git a/include/asm-v850/socket.h b/include/asm-v850/socket.h

index 213b852af53e83c77f151137d5ab483fca27f8c8..0240d366a0a4db32672c6e20c734efe21e32aa17 100644 (file)
--- a/include/asm-v850/socket.h
+++ b/include/asm-v850/socket.h
@@ -14,6 +14,8 @@
  #define SO_BROADCAST   6
  #define SO_SNDBUF      7
  #define SO_RCVBUF      8
+#define SO_SNDBUFFORCE 32
+#define SO_RCVBUFFORCE 33
  #define SO_KEEPALIVE   9
  #define SO_OOBINLINE   10
  #define SO_NO_CHECK    11
diff --git a/include/asm-x86_64/checksum.h b/include/asm-x86_64/checksum.h

index d01356f01448188f4f8322c371b0899c2d47ae16..989469e8e0b7d756ce2d3a0e17d76c83d2763d3a 100644 (file)
--- a/include/asm-x86_64/checksum.h
+++ b/include/asm-x86_64/checksum.h
@@ -64,7 +64,7 @@ static inline unsigned short ip_fast_csum(unsigned char *iph, unsigned int ihl)
                 "  adcl $0, %0\n"
                 "  notl %0\n"
                 "2:"
-       /* Since the input registers which are loaded with iph and ipl
+       /* Since the input registers which are loaded with iph and ihl
            are modified, we must also specify them as outputs, or gcc
            will assume they contain their original values. */
         : "=r" (sum), "=r" (iph), "=r" (ihl)
diff --git a/include/asm-x86_64/e820.h b/include/asm-x86_64/e820.h

index 8e94edf0b9844096c99bf808b34270bcceefe0d1..e682edc24a68072c97da7611be582f92b9000425 100644 (file)
--- a/include/asm-x86_64/e820.h
+++ b/include/asm-x86_64/e820.h
@@ -51,6 +51,8 @@ extern int e820_mapped(unsigned long start, unsigned long end, unsigned type);
  
  extern void e820_bootmem_free(pg_data_t *pgdat, unsigned long start,unsigned long end);
  extern void e820_setup_gap(void);
+extern unsigned long e820_hole_size(unsigned long start_pfn,
+                                   unsigned long end_pfn);
  
  extern void __init parse_memopt(char *p, char **end);
  
diff --git a/include/asm-x86_64/processor.h b/include/asm-x86_64/processor.h

index 106f666517bb47eedd3983d8bf72dcdc4fd8d63e..85549e656eeb26aad35a50c973f8ea8b9a441bd2 100644 (file)
--- a/include/asm-x86_64/processor.h
+++ b/include/asm-x86_64/processor.h
@@ -32,7 +32,7 @@
  #define ID_MASK                0x00200000
  
  #define desc_empty(desc) \
-               (!((desc)->a + (desc)->b))
+               (!((desc)->a | (desc)->b))
  
  #define desc_equal(desc1, desc2) \
                 (((desc1)->a == (desc2)->a) && ((desc1)->b == (desc2)->b))
diff --git a/include/asm-x86_64/socket.h b/include/asm-x86_64/socket.h

index d9a252ea8210d2677809339fd725fd948b3deeb8..f2cdbeae5d5ba17e453f24aa172f6076f9441f9b 100644 (file)
--- a/include/asm-x86_64/socket.h
+++ b/include/asm-x86_64/socket.h
@@ -14,6 +14,8 @@
  #define SO_BROADCAST   6
  #define SO_SNDBUF      7
  #define SO_RCVBUF      8
+#define SO_SNDBUFFORCE 32
+#define SO_RCVBUFFORCE 33
  #define SO_KEEPALIVE   9
  #define SO_OOBINLINE   10
  #define SO_NO_CHECK    11
diff --git a/include/asm-xtensa/socket.h b/include/asm-xtensa/socket.h

index daccd05a14cdfdb7cee689951f3b22e3b7926c59..00f83f3a6d72194ad0b0f3059897cecde2d61e0f 100644 (file)
--- a/include/asm-xtensa/socket.h
+++ b/include/asm-xtensa/socket.h
@@ -24,6 +24,8 @@
  #define SO_BROADCAST   6
  #define SO_SNDBUF      7
  #define SO_RCVBUF      8
+#define SO_SNDBUFFORCE 32
+#define SO_RCVBUFFORCE 33
  #define SO_KEEPALIVE   9
  #define SO_OOBINLINE   10
  #define SO_NO_CHECK    11
diff --git a/include/linux/8250_pci.h b/include/linux/8250_pci.h

index 5f3ab21b339b7abb32e6f06917c8b6e0eac58d06..3209dd46ea7d98ce0f846d07aad0222e39764606 100644 (file)
--- a/include/linux/8250_pci.h
+++ b/include/linux/8250_pci.h
@@ -1,2 +1,37 @@
-int pci_siig10x_fn(struct pci_dev *dev, int enable);
-int pci_siig20x_fn(struct pci_dev *dev, int enable);
+/*
+ * Definitions for PCI support.
+ */
+#define FL_BASE_MASK           0x0007
+#define FL_BASE0               0x0000
+#define FL_BASE1               0x0001
+#define FL_BASE2               0x0002
+#define FL_BASE3               0x0003
+#define FL_BASE4               0x0004
+#define FL_GET_BASE(x)         (x & FL_BASE_MASK)
+
+/* Use successive BARs (PCI base address registers),
+   else use offset into some specified BAR */
+#define FL_BASE_BARS           0x0008
+
+/* do not assign an irq */
+#define FL_NOIRQ               0x0080
+
+/* Use the Base address register size to cap number of ports */
+#define FL_REGION_SZ_CAP       0x0100
+
+struct pciserial_board {
+       unsigned int flags;
+       unsigned int num_ports;
+       unsigned int base_baud;
+       unsigned int uart_offset;
+       unsigned int reg_shift;
+       unsigned int first_offset;
+};
+
+struct serial_private;
+
+struct serial_private *
+pciserial_init_ports(struct pci_dev *dev, struct pciserial_board *board);
+void pciserial_remove_ports(struct serial_private *priv);
+void pciserial_suspend_ports(struct serial_private *priv);
+void pciserial_resume_ports(struct serial_private *priv);
diff --git a/include/linux/ata.h b/include/linux/ata.h

index ca5fcadf998194cd663ff0414bc7435896749e84..a5b74efab0679ae2a1f17dd6329596bf89c531ea 100644 (file)
--- a/include/linux/ata.h
+++ b/include/linux/ata.h
@@ -1,24 +1,29 @@
  
  /*
-   Copyright 2003-2004 Red Hat, Inc.  All rights reserved.
-   Copyright 2003-2004 Jeff Garzik
-
-   The contents of this file are subject to the Open
-   Software License version 1.1 that can be found at
-   http://www.opensource.org/licenses/osl-1.1.txt and is included herein
-   by reference.
-
-   Alternatively, the contents of this file may be used under the terms
-   of the GNU General Public License version 2 (the "GPL") as distributed
-   in the kernel source COPYING file, in which case the provisions of
-   the GPL are applicable instead of the above.  If you wish to allow
-   the use of your version of this file only under the terms of the
-   GPL and not to allow others to use your version of this file under
-   the OSL, indicate your decision by deleting the provisions above and
-   replace them with the notice and other provisions required by the GPL.
-   If you do not delete the provisions above, a recipient may use your
-   version of this file under either the OSL or the GPL.
-
+ *  Copyright 2003-2004 Red Hat, Inc.  All rights reserved.
+ *  Copyright 2003-2004 Jeff Garzik
+ *
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2, or (at your option)
+ *  any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; see the file COPYING.  If not, write to
+ *  the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
+ *
+ *
+ *  libata documentation is available via 'make {ps|pdf}docs',
+ *  as Documentation/DocBook/libata.*
+ *
+ *  Hardware documentation available from http://www.t13.org/
+ *
   */
  
  #ifndef __LINUX_ATA_H__
@@ -108,6 +113,8 @@ enum {
  
         /* ATA device commands */
         ATA_CMD_CHK_POWER       = 0xE5, /* check power mode */
+       ATA_CMD_STANDBY         = 0xE2, /* place in standby power mode */
+       ATA_CMD_IDLE            = 0xE3, /* place in idle power mode */
         ATA_CMD_EDD             = 0x90, /* execute device diagnostic */
         ATA_CMD_FLUSH           = 0xE7,
         ATA_CMD_FLUSH_EXT       = 0xEA,
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h

index 0881b5cdee3d443c4aca06bd1c4d3c6652c9a5bc..19bd8e7e11bfcac95586ce28722730f7d4596118 100644 (file)
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -301,6 +301,7 @@ struct blk_queue_tag {
         struct list_head busy_list;     /* fifo list of busy tags */
         int busy;                       /* current depth */
         int max_depth;                  /* what we will send to device */
+       int real_max_depth;             /* what the array can hold */
         atomic_t refcnt;                /* map can be shared */
  };
  
diff --git a/include/linux/dccp.h b/include/linux/dccp.h

new file mode 100644 (file)

index 0000000..007c290
--- /dev/null
+++ b/include/linux/dccp.h
@@ -0,0 +1,456 @@
+#ifndef _LINUX_DCCP_H
+#define _LINUX_DCCP_H
+
+#include <linux/types.h>
+#include <asm/byteorder.h>
+
+/* Structure describing an Internet (DCCP) socket address. */
+struct sockaddr_dccp {
+       __u16   sdccp_family;   /* Address family   */
+       __u16   sdccp_port;     /* Port number      */
+       __u32   sdccp_addr;     /* Internet address */
+       __u32   sdccp_service;  /* Service          */
+       /* Pad to size of `struct sockaddr': 16 bytes . */
+       __u32   sdccp_pad;
+};
+
+/**
+ * struct dccp_hdr - generic part of DCCP packet header
+ *
+ * @dccph_sport - Relevant port on the endpoint that sent this packet
+ * @dccph_dport - Relevant port on the other endpoint
+ * @dccph_doff - Data Offset from the start of the DCCP header, in 32-bit words
+ * @dccph_ccval - Used by the HC-Sender CCID
+ * @dccph_cscov - Parts of the packet that are covered by the Checksum field
+ * @dccph_checksum - Internet checksum, depends on dccph_cscov
+ * @dccph_x - 0 = 24 bit sequence number, 1 = 48
+ * @dccph_type - packet type, see DCCP_PKT_ prefixed macros
+ * @dccph_seq - sequence number high or low order 24 bits, depends on dccph_x
+ */
+struct dccp_hdr {
+       __u16   dccph_sport,
+               dccph_dport;
+       __u8    dccph_doff;
+#if defined(__LITTLE_ENDIAN_BITFIELD)
+       __u8    dccph_cscov:4,
+               dccph_ccval:4;
+#elif defined(__BIG_ENDIAN_BITFIELD)
+       __u8    dccph_ccval:4,
+               dccph_cscov:4;
+#else
+#error  "Adjust your <asm/byteorder.h> defines"
+#endif
+       __u16   dccph_checksum;
+#if defined(__LITTLE_ENDIAN_BITFIELD)
+       __u32   dccph_x:1,
+               dccph_type:4,
+               dccph_reserved:3,
+               dccph_seq:24;
+#elif defined(__BIG_ENDIAN_BITFIELD)
+       __u32   dccph_reserved:3,
+               dccph_type:4,
+               dccph_x:1,
+               dccph_seq:24;
+#else
+#error  "Adjust your <asm/byteorder.h> defines"
+#endif
+};
+
+/**
+ * struct dccp_hdr_ext - the low bits of a 48 bit seq packet
+ *
+ * @dccph_seq_low - low 24 bits of a 48 bit seq packet
+ */
+struct dccp_hdr_ext {
+       __u32   dccph_seq_low;
+};
+
+/**
+ * struct dccp_hdr_request - Conection initiation request header
+ *
+ * @dccph_req_service - Service to which the client app wants to connect
+ * @dccph_req_options - list of options (must be a multiple of 32 bits
+ */
+struct dccp_hdr_request {
+       __u32   dccph_req_service;
+};
+/**
+ * struct dccp_hdr_ack_bits - acknowledgment bits common to most packets
+ *
+ * @dccph_resp_ack_nr_high - 48 bit ack number high order bits, contains GSR
+ * @dccph_resp_ack_nr_low - 48 bit ack number low order bits, contains GSR
+ */
+struct dccp_hdr_ack_bits {
+       __u32   dccph_reserved1:8,
+               dccph_ack_nr_high:24;
+       __u32   dccph_ack_nr_low;
+};
+/**
+ * struct dccp_hdr_response - Conection initiation response header
+ *
+ * @dccph_resp_ack_nr_high - 48 bit ack number high order bits, contains GSR
+ * @dccph_resp_ack_nr_low - 48 bit ack number low order bits, contains GSR
+ * @dccph_resp_service - Echoes the Service Code on a received DCCP-Request
+ * @dccph_resp_options - list of options (must be a multiple of 32 bits
+ */
+struct dccp_hdr_response {
+       struct dccp_hdr_ack_bits        dccph_resp_ack;
+       __u32                           dccph_resp_service;
+};
+
+/**
+ * struct dccp_hdr_reset - Unconditionally shut down a connection
+ *
+ * @dccph_reset_service - Echoes the Service Code on a received DCCP-Request
+ * @dccph_reset_options - list of options (must be a multiple of 32 bits
+ */
+struct dccp_hdr_reset {
+       struct dccp_hdr_ack_bits        dccph_reset_ack;
+       __u8                            dccph_reset_code,
+                                       dccph_reset_data[3];
+};
+
+enum dccp_pkt_type {
+       DCCP_PKT_REQUEST = 0,
+       DCCP_PKT_RESPONSE,
+       DCCP_PKT_DATA,
+       DCCP_PKT_ACK,
+       DCCP_PKT_DATAACK,
+       DCCP_PKT_CLOSEREQ,
+       DCCP_PKT_CLOSE,
+       DCCP_PKT_RESET,
+       DCCP_PKT_SYNC,
+       DCCP_PKT_SYNCACK,
+       DCCP_PKT_INVALID,
+};
+
+#define DCCP_NR_PKT_TYPES DCCP_PKT_INVALID
+
+static inline unsigned int dccp_packet_hdr_len(const __u8 type)
+{
+       if (type == DCCP_PKT_DATA)
+               return 0;
+       if (type == DCCP_PKT_DATAACK    ||
+           type == DCCP_PKT_ACK        ||
+           type == DCCP_PKT_SYNC       ||
+           type == DCCP_PKT_SYNCACK    ||
+           type == DCCP_PKT_CLOSE      ||
+           type == DCCP_PKT_CLOSEREQ)
+               return sizeof(struct dccp_hdr_ack_bits);
+       if (type == DCCP_PKT_REQUEST)
+               return sizeof(struct dccp_hdr_request);
+       if (type == DCCP_PKT_RESPONSE)
+               return sizeof(struct dccp_hdr_response);
+       return sizeof(struct dccp_hdr_reset);
+}
+enum dccp_reset_codes {
+       DCCP_RESET_CODE_UNSPECIFIED = 0,
+       DCCP_RESET_CODE_CLOSED,
+       DCCP_RESET_CODE_ABORTED,
+       DCCP_RESET_CODE_NO_CONNECTION,
+       DCCP_RESET_CODE_PACKET_ERROR,
+       DCCP_RESET_CODE_OPTION_ERROR,
+       DCCP_RESET_CODE_MANDATORY_ERROR,
+       DCCP_RESET_CODE_CONNECTION_REFUSED,
+       DCCP_RESET_CODE_BAD_SERVICE_CODE,
+       DCCP_RESET_CODE_TOO_BUSY,
+       DCCP_RESET_CODE_BAD_INIT_COOKIE,
+       DCCP_RESET_CODE_AGGRESSION_PENALTY,
+};
+
+/* DCCP options */
+enum {
+       DCCPO_PADDING = 0,
+       DCCPO_MANDATORY = 1,
+       DCCPO_MIN_RESERVED = 3,
+       DCCPO_MAX_RESERVED = 31,
+       DCCPO_NDP_COUNT = 37,
+       DCCPO_ACK_VECTOR_0 = 38,
+       DCCPO_ACK_VECTOR_1 = 39,
+       DCCPO_TIMESTAMP = 41,
+       DCCPO_TIMESTAMP_ECHO = 42,
+       DCCPO_ELAPSED_TIME = 43,
+       DCCPO_MAX = 45,
+       DCCPO_MIN_CCID_SPECIFIC = 128,
+       DCCPO_MAX_CCID_SPECIFIC = 255,
+};
+
+/* DCCP features */
+enum {
+       DCCPF_RESERVED = 0,
+       DCCPF_SEQUENCE_WINDOW = 3,
+       DCCPF_SEND_ACK_VECTOR = 6,
+       DCCPF_SEND_NDP_COUNT = 7,
+       /* 10-127 reserved */
+       DCCPF_MIN_CCID_SPECIFIC = 128,
+       DCCPF_MAX_CCID_SPECIFIC = 255,
+};
+
+/* DCCP socket options */
+#define DCCP_SOCKOPT_PACKET_SIZE       1
+
+#ifdef __KERNEL__
+
+#include <linux/in.h>
+#include <linux/list.h>
+#include <linux/uio.h>
+#include <linux/workqueue.h>
+
+#include <net/inet_connection_sock.h>
+#include <net/inet_timewait_sock.h>
+#include <net/sock.h>
+#include <net/tcp_states.h>
+#include <net/tcp.h>
+
+enum dccp_state {
+       DCCP_OPEN       = TCP_ESTABLISHED,
+       DCCP_REQUESTING = TCP_SYN_SENT,
+       DCCP_PARTOPEN   = TCP_FIN_WAIT1, /* FIXME:
+                                           This mapping is horrible, but TCP has
+                                           no matching state for DCCP_PARTOPEN,
+                                           as TCP_SYN_RECV is already used by
+                                           DCCP_RESPOND, why don't stop using TCP
+                                           mapping of states? OK, now we don't use
+                                           sk_stream_sendmsg anymore, so doesn't
+                                           seem to exist any reason for us to
+                                           do the TCP mapping here */
+       DCCP_LISTEN     = TCP_LISTEN,
+       DCCP_RESPOND    = TCP_SYN_RECV,
+       DCCP_CLOSING    = TCP_CLOSING,
+       DCCP_TIME_WAIT  = TCP_TIME_WAIT,
+       DCCP_CLOSED     = TCP_CLOSE,
+       DCCP_MAX_STATES = TCP_MAX_STATES,
+};
+
+#define DCCP_STATE_MASK 0xf
+#define DCCP_ACTION_FIN (1<<7)
+
+enum {
+       DCCPF_OPEN       = TCPF_ESTABLISHED,
+       DCCPF_REQUESTING = TCPF_SYN_SENT,
+       DCCPF_PARTOPEN   = TCPF_FIN_WAIT1,
+       DCCPF_LISTEN     = TCPF_LISTEN,
+       DCCPF_RESPOND    = TCPF_SYN_RECV,
+       DCCPF_CLOSING    = TCPF_CLOSING,
+       DCCPF_TIME_WAIT  = TCPF_TIME_WAIT,
+       DCCPF_CLOSED     = TCPF_CLOSE,
+};
+
+static inline struct dccp_hdr *dccp_hdr(const struct sk_buff *skb)
+{
+       return (struct dccp_hdr *)skb->h.raw;
+}
+
+static inline struct dccp_hdr_ext *dccp_hdrx(const struct sk_buff *skb)
+{
+       return (struct dccp_hdr_ext *)(skb->h.raw + sizeof(struct dccp_hdr));
+}
+
+static inline unsigned int __dccp_basic_hdr_len(const struct dccp_hdr *dh)
+{
+       return sizeof(*dh) + (dh->dccph_x ? sizeof(struct dccp_hdr_ext) : 0);
+}
+
+static inline unsigned int dccp_basic_hdr_len(const struct sk_buff *skb)
+{
+       const struct dccp_hdr *dh = dccp_hdr(skb);
+       return __dccp_basic_hdr_len(dh);
+}
+
+static inline __u64 dccp_hdr_seq(const struct sk_buff *skb)
+{
+       const struct dccp_hdr *dh = dccp_hdr(skb);
+#if defined(__LITTLE_ENDIAN_BITFIELD)
+       __u64 seq_nr = ntohl(dh->dccph_seq << 8);
+#elif defined(__BIG_ENDIAN_BITFIELD)
+       __u64 seq_nr = ntohl(dh->dccph_seq);
+#else
+#error  "Adjust your <asm/byteorder.h> defines"
+#endif
+
+       if (dh->dccph_x != 0)
+               seq_nr = (seq_nr << 32) + ntohl(dccp_hdrx(skb)->dccph_seq_low);
+
+       return seq_nr;
+}
+
+static inline struct dccp_hdr_request *dccp_hdr_request(struct sk_buff *skb)
+{
+       return (struct dccp_hdr_request *)(skb->h.raw + dccp_basic_hdr_len(skb));
+}
+
+static inline struct dccp_hdr_ack_bits *dccp_hdr_ack_bits(const struct sk_buff *skb)
+{
+       return (struct dccp_hdr_ack_bits *)(skb->h.raw + dccp_basic_hdr_len(skb));
+}
+
+static inline u64 dccp_hdr_ack_seq(const struct sk_buff *skb)
+{
+       const struct dccp_hdr_ack_bits *dhack = dccp_hdr_ack_bits(skb);
+#if defined(__LITTLE_ENDIAN_BITFIELD)
+       return (((u64)ntohl(dhack->dccph_ack_nr_high << 8)) << 32) + ntohl(dhack->dccph_ack_nr_low);
+#elif defined(__BIG_ENDIAN_BITFIELD)
+       return (((u64)ntohl(dhack->dccph_ack_nr_high)) << 32) + ntohl(dhack->dccph_ack_nr_low);
+#else
+#error  "Adjust your <asm/byteorder.h> defines"
+#endif
+}
+
+static inline struct dccp_hdr_response *dccp_hdr_response(struct sk_buff *skb)
+{
+       return (struct dccp_hdr_response *)(skb->h.raw + dccp_basic_hdr_len(skb));
+}
+
+static inline struct dccp_hdr_reset *dccp_hdr_reset(struct sk_buff *skb)
+{
+       return (struct dccp_hdr_reset *)(skb->h.raw + dccp_basic_hdr_len(skb));
+}
+
+static inline unsigned int __dccp_hdr_len(const struct dccp_hdr *dh)
+{
+       return __dccp_basic_hdr_len(dh) +
+              dccp_packet_hdr_len(dh->dccph_type);
+}
+
+static inline unsigned int dccp_hdr_len(const struct sk_buff *skb)
+{
+       return __dccp_hdr_len(dccp_hdr(skb));
+}
+
+
+/* initial values for each feature */
+#define DCCPF_INITIAL_SEQUENCE_WINDOW          100
+/* FIXME: for now we're using CCID 3 (TFRC) */
+#define DCCPF_INITIAL_CCID                     3
+#define DCCPF_INITIAL_SEND_ACK_VECTOR          0
+/* FIXME: for now we're default to 1 but it should really be 0 */
+#define DCCPF_INITIAL_SEND_NDP_COUNT           1
+
+#define DCCP_NDP_LIMIT 0xFFFFFF
+
+/**
+  * struct dccp_options - option values for a DCCP connection
+  *    @dccpo_sequence_window - Sequence Window Feature (section 7.5.2)
+  *    @dccpo_ccid - Congestion Control Id (CCID) (section 10)
+  *    @dccpo_send_ack_vector - Send Ack Vector Feature (section 11.5)
+  *    @dccpo_send_ndp_count - Send NDP Count Feature (7.7.2)
+  */
+struct dccp_options {
+       __u64   dccpo_sequence_window;
+       __u8    dccpo_ccid;
+       __u8    dccpo_send_ack_vector;
+       __u8    dccpo_send_ndp_count;
+};
+
+extern void __dccp_options_init(struct dccp_options *dccpo);
+extern void dccp_options_init(struct dccp_options *dccpo);
+extern int dccp_parse_options(struct sock *sk, struct sk_buff *skb);
+
+struct dccp_request_sock {
+       struct inet_request_sock dreq_inet_rsk;
+       __u64                    dreq_iss;
+       __u64                    dreq_isr;
+       __u32                    dreq_service;
+};
+
+static inline struct dccp_request_sock *dccp_rsk(const struct request_sock *req)
+{
+       return (struct dccp_request_sock *)req;
+}
+
+extern struct inet_timewait_death_row dccp_death_row;
+
+/* Read about the ECN nonce to see why it is 253 */
+#define DCCP_MAX_ACK_VECTOR_LEN 253
+
+struct dccp_options_received {
+       u32     dccpor_ndp:24,
+               dccpor_ack_vector_len:8;
+       u32     dccpor_ack_vector_idx:10;
+       /* 22 bits hole, try to pack */
+       u32     dccpor_timestamp;
+       u32     dccpor_timestamp_echo;
+       u32     dccpor_elapsed_time;
+};
+
+struct ccid;
+
+enum dccp_role {
+       DCCP_ROLE_UNDEFINED,
+       DCCP_ROLE_LISTEN,
+       DCCP_ROLE_CLIENT,
+       DCCP_ROLE_SERVER,
+};
+
+/**
+ * struct dccp_sock - DCCP socket state
+ *
+ * @dccps_swl - sequence number window low
+ * @dccps_swh - sequence number window high
+ * @dccps_awl - acknowledgement number window low
+ * @dccps_awh - acknowledgement number window high
+ * @dccps_iss - initial sequence number sent
+ * @dccps_isr - initial sequence number received
+ * @dccps_osr - first OPEN sequence number received
+ * @dccps_gss - greatest sequence number sent
+ * @dccps_gsr - greatest valid sequence number received
+ * @dccps_gar - greatest valid ack number received on a non-Sync; initialized to %dccps_iss
+ * @dccps_timestamp_time - time of latest TIMESTAMP option
+ * @dccps_timestamp_echo - latest timestamp received on a TIMESTAMP option
+ * @dccps_ext_header_len - network protocol overhead (IP/IPv6 options)
+ * @dccps_pmtu_cookie - Last pmtu seen by socket
+ * @dccps_packet_size - Set thru setsockopt
+ * @dccps_role - Role of this sock, one of %dccp_role
+ * @dccps_ndp_count - number of Non Data Packets since last data packet
+ * @dccps_hc_rx_ackpkts - receiver half connection acked packets
+ */
+struct dccp_sock {
+       /* inet_connection_sock has to be the first member of dccp_sock */
+       struct inet_connection_sock     dccps_inet_connection;
+       __u64                           dccps_swl;
+       __u64                           dccps_swh;
+       __u64                           dccps_awl;
+       __u64                           dccps_awh;
+       __u64                           dccps_iss;
+       __u64                           dccps_isr;
+       __u64                           dccps_osr;
+       __u64                           dccps_gss;
+       __u64                           dccps_gsr;
+       __u64                           dccps_gar;
+       unsigned long                   dccps_service;
+       struct timeval                  dccps_timestamp_time;
+       __u32                           dccps_timestamp_echo;
+       __u32                           dccps_packet_size;
+       unsigned long                   dccps_ndp_count;
+       __u16                           dccps_ext_header_len;
+       __u32                           dccps_pmtu_cookie;
+       __u32                           dccps_mss_cache;
+       struct dccp_options             dccps_options;
+       struct dccp_ackpkts             *dccps_hc_rx_ackpkts;
+       void                            *dccps_hc_rx_ccid_private;
+       void                            *dccps_hc_tx_ccid_private;
+       struct ccid                     *dccps_hc_rx_ccid;
+       struct ccid                     *dccps_hc_tx_ccid;
+       struct dccp_options_received    dccps_options_received;
+       enum dccp_role                  dccps_role:2;
+};
+ 
+static inline struct dccp_sock *dccp_sk(const struct sock *sk)
+{
+       return (struct dccp_sock *)sk;
+}
+
+static inline const char *dccp_role(const struct sock *sk)
+{
+       switch (dccp_sk(sk)->dccps_role) {
+       case DCCP_ROLE_UNDEFINED: return "undefined";
+       case DCCP_ROLE_LISTEN:    return "listen";
+       case DCCP_ROLE_SERVER:    return "server";
+       case DCCP_ROLE_CLIENT:    return "client";
+       }
+       return NULL;
+}
+
+#endif /* __KERNEL__ */
+
+#endif /* _LINUX_DCCP_H */
diff --git a/include/linux/dcookies.h b/include/linux/dcookies.h

index c28050136164f239f15486abcd41a7aa132f3d90..1d68428c925db1425ab43db4bd93b06ceca636b7 100644 (file)
--- a/include/linux/dcookies.h
+++ b/include/linux/dcookies.h
@@ -48,12 +48,12 @@ int get_dcookie(struct dentry * dentry, struct vfsmount * vfsmnt,
  
  #else
  
-struct dcookie_user * dcookie_register(void)
+static inline struct dcookie_user * dcookie_register(void)
  {
         return NULL;
  }
  
-void dcookie_unregister(struct dcookie_user * user)
+static inline void dcookie_unregister(struct dcookie_user * user)
  {
         return;
  }
diff --git a/include/linux/ethtool.h b/include/linux/ethtool.h

index a0ab26aab450db653ab44d45296bc2809b42b13a..ed1440ea4c91ebec8d76e295e0b074ffe629c113 100644 (file)
--- a/include/linux/ethtool.h
+++ b/include/linux/ethtool.h
@@ -250,6 +250,12 @@ struct ethtool_stats {
         u64     data[0];
  };
  
+struct ethtool_perm_addr {
+       u32     cmd;            /* ETHTOOL_GPERMADDR */
+       u32     size;
+       u8      data[0];
+};
+
  struct net_device;
  
  /* Some generic methods drivers may use in their ethtool_ops */
@@ -261,6 +267,8 @@ u32 ethtool_op_get_sg(struct net_device *dev);
  int ethtool_op_set_sg(struct net_device *dev, u32 data);
  u32 ethtool_op_get_tso(struct net_device *dev);
  int ethtool_op_set_tso(struct net_device *dev, u32 data);
+int ethtool_op_get_perm_addr(struct net_device *dev, 
+                            struct ethtool_perm_addr *addr, u8 *data);
  
  /**
   * &ethtool_ops - Alter and report network device settings
@@ -294,7 +302,8 @@ int ethtool_op_set_tso(struct net_device *dev, u32 data);
   * get_strings: Return a set of strings that describe the requested objects 
   * phys_id: Identify the device
   * get_stats: Return statistics about the device
- *
+ * get_perm_addr: Gets the permanent hardware address
+ * 
   * Description:
   *
   * get_settings:
@@ -352,6 +361,7 @@ struct ethtool_ops {
         int     (*phys_id)(struct net_device *, u32);
         int     (*get_stats_count)(struct net_device *);
         void    (*get_ethtool_stats)(struct net_device *, struct ethtool_stats *, u64 *);
+       int     (*get_perm_addr)(struct net_device *, struct ethtool_perm_addr *, u8 *);
         int     (*begin)(struct net_device *);
         void    (*complete)(struct net_device *);
  };
@@ -389,6 +399,7 @@ struct ethtool_ops {
  #define ETHTOOL_GSTATS         0x0000001d /* get NIC-specific statistics */
  #define ETHTOOL_GTSO           0x0000001e /* Get TSO enable (ethtool_value) */
  #define ETHTOOL_STSO           0x0000001f /* Set TSO enable (ethtool_value) */
+#define ETHTOOL_GPERMADDR      0x00000020 /* Get permanent hardware address */
  
  /* compatibility with older code */
  #define SPARC_ETH_GSET         ETHTOOL_GSET
@@ -408,6 +419,8 @@ struct ethtool_ops {
  #define SUPPORTED_FIBRE                        (1 << 10)
  #define SUPPORTED_BNC                  (1 << 11)
  #define SUPPORTED_10000baseT_Full      (1 << 12)
+#define SUPPORTED_Pause                        (1 << 13)
+#define SUPPORTED_Asym_Pause           (1 << 14)
  
  /* Indicates what features are advertised by the interface. */
  #define ADVERTISED_10baseT_Half                (1 << 0)
@@ -423,6 +436,8 @@ struct ethtool_ops {
  #define ADVERTISED_FIBRE               (1 << 10)
  #define ADVERTISED_BNC                 (1 << 11)
  #define ADVERTISED_10000baseT_Full     (1 << 12)
+#define ADVERTISED_Pause               (1 << 13)
+#define ADVERTISED_Asym_Pause          (1 << 14)
  
  /* The following are all involved in forcing a particular link
   * mode for the device for setting things.  When getting the
diff --git a/include/linux/fs.h b/include/linux/fs.h

index f9adf75fd9b4badc81b1e5d61df8ba211615f579..67e6732d4fdc736ec39f4c57324e4d64064e063e 100644 (file)
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -993,8 +993,8 @@ struct inode_operations {
         int (*rename) (struct inode *, struct dentry *,
                         struct inode *, struct dentry *);
         int (*readlink) (struct dentry *, char __user *,int);
-       int (*follow_link) (struct dentry *, struct nameidata *);
-       void (*put_link) (struct dentry *, struct nameidata *);
+       void * (*follow_link) (struct dentry *, struct nameidata *);
+       void (*put_link) (struct dentry *, struct nameidata *, void *);
         void (*truncate) (struct inode *);
         int (*permission) (struct inode *, int, struct nameidata *);
         int (*setattr) (struct dentry *, struct iattr *);
@@ -1602,8 +1602,8 @@ extern struct file_operations generic_ro_fops;
  extern int vfs_readlink(struct dentry *, char __user *, int, const char *);
  extern int vfs_follow_link(struct nameidata *, const char *);
  extern int page_readlink(struct dentry *, char __user *, int);
-extern int page_follow_link_light(struct dentry *, struct nameidata *);
-extern void page_put_link(struct dentry *, struct nameidata *);
+extern void *page_follow_link_light(struct dentry *, struct nameidata *);
+extern void page_put_link(struct dentry *, struct nameidata *, void *);
  extern int page_symlink(struct inode *inode, const char *symname, int len);
  extern struct inode_operations page_symlink_inode_operations;
  extern int generic_readlink(struct dentry *, char __user *, int);
diff --git a/include/linux/fsnotify.h b/include/linux/fsnotify.h

index d07a92c94776a90866ab1327f1c6dfc80bbfbf7f..03b8e7932b830a3f1bfffcc531ec4ee9b1662b1e 100644 (file)
--- a/include/linux/fsnotify.h
+++ b/include/linux/fsnotify.h
@@ -21,7 +21,7 @@
   */
  static inline void fsnotify_move(struct inode *old_dir, struct inode *new_dir,
                                  const char *old_name, const char *new_name,
-                                int isdir)
+                                int isdir, struct inode *target, struct inode *source)
  {
         u32 cookie = inotify_get_cookie();
  
@@ -36,31 +36,34 @@ static inline void fsnotify_move(struct inode *old_dir, struct inode *new_dir,
                 isdir = IN_ISDIR;
         inotify_inode_queue_event(old_dir, IN_MOVED_FROM|isdir,cookie,old_name);
         inotify_inode_queue_event(new_dir, IN_MOVED_TO|isdir, cookie, new_name);
+
+       if (target) {
+               inotify_inode_queue_event(target, IN_DELETE_SELF, 0, NULL);
+               inotify_inode_is_dead(target);
+       }
+
+       if (source) {
+               inotify_inode_queue_event(source, IN_MOVE_SELF, 0, NULL);
+       }
  }
  
  /*
- * fsnotify_unlink - file was unlinked
+ * fsnotify_nameremove - a filename was removed from a directory
   */
-static inline void fsnotify_unlink(struct dentry *dentry, struct inode *dir)
+static inline void fsnotify_nameremove(struct dentry *dentry, int isdir)
  {
-       struct inode *inode = dentry->d_inode;
-
-       inode_dir_notify(dir, DN_DELETE);
-       inotify_inode_queue_event(dir, IN_DELETE, 0, dentry->d_name.name);
-       inotify_inode_queue_event(inode, IN_DELETE_SELF, 0, NULL);
-
-       inotify_inode_is_dead(inode);
+       if (isdir)
+               isdir = IN_ISDIR;
+       dnotify_parent(dentry, DN_DELETE);
+       inotify_dentry_parent_queue_event(dentry, IN_DELETE|isdir, 0, dentry->d_name.name);
  }
  
  /*
- * fsnotify_rmdir - directory was removed
+ * fsnotify_inoderemove - an inode is going away
   */
-static inline void fsnotify_rmdir(struct dentry *dentry, struct inode *inode,
-                                 struct inode *dir)
+static inline void fsnotify_inoderemove(struct inode *inode)
  {
-       inode_dir_notify(dir, DN_DELETE);
-       inotify_inode_queue_event(dir,IN_DELETE|IN_ISDIR,0,dentry->d_name.name);
-       inotify_inode_queue_event(inode, IN_DELETE_SELF | IN_ISDIR, 0, NULL);
+       inotify_inode_queue_event(inode, IN_DELETE_SELF, 0, NULL);
         inotify_inode_is_dead(inode);
  }
  
diff --git a/include/linux/hippidevice.h b/include/linux/hippidevice.h

index 9debe6bbe5f0265cf4ab5dd81248d2b9d4d4f04b..bab303dafd6e1e4005d3d021ae7239788a3c2c63 100644 (file)
--- a/include/linux/hippidevice.h
+++ b/include/linux/hippidevice.h
@@ -26,8 +26,12 @@
  #include <linux/if_hippi.h>
  
  #ifdef __KERNEL__
-extern unsigned short hippi_type_trans(struct sk_buff *skb,
-                                      struct net_device *dev);
+
+struct hippi_cb {
+       __u32   ifield;
+};
+
+extern __be16 hippi_type_trans(struct sk_buff *skb, struct net_device *dev);
  
  extern struct net_device *alloc_hippi_dev(int sizeof_priv);
  #endif
diff --git a/include/linux/ide.h b/include/linux/ide.h

index 92129078d4f3946d7e0af5495df920cb88cb147a..a6dbb51ecd7b780bd4659ae569fed4f2368ac2d9 100644 (file)
--- a/include/linux/ide.h
+++ b/include/linux/ide.h
@@ -1501,4 +1501,10 @@ extern struct bus_type ide_bus_type;
  #define ide_id_has_flush_cache_ext(id) \
         (((id)->cfs_enable_2 & 0x2400) == 0x2400)
  
+static inline int hwif_to_node(ide_hwif_t *hwif)
+{
+       struct pci_dev *dev = hwif->pci_dev;
+       return dev ? pcibus_to_node(dev->bus) : -1;
+}
+
  #endif /* _IDE_H */
diff --git a/include/linux/if_ether.h b/include/linux/if_ether.h

index b5b58e9c054c3901f2b5721ea3440f97e4f2c3db..fc2d4c8225aa80d0429572594d26c87a8a5f9425 100644 (file)
--- a/include/linux/if_ether.h
+++ b/include/linux/if_ether.h
@@ -110,6 +110,8 @@ static inline struct ethhdr *eth_hdr(const struct sk_buff *skb)
  {
         return (struct ethhdr *)skb->mac.raw;
  }
+
+extern struct ctl_table ether_table[];
  #endif
  
  #endif /* _LINUX_IF_ETHER_H */
diff --git a/include/linux/if_fc.h b/include/linux/if_fc.h

index 33330b458b9568d5a4303322d8011856c2ab6a66..376a34ea47231282bc883e05fcb2dcd487b52bd3 100644 (file)
--- a/include/linux/if_fc.h
+++ b/include/linux/if_fc.h
@@ -44,7 +44,7 @@ struct fcllc {
         __u8  ssap;                     /* source SAP */
         __u8  llc;                      /* LLC control field */
         __u8  protid[3];                /* protocol id */
-       __u16 ethertype;                /* ether type field */
+       __be16 ethertype;               /* ether type field */
  };
  
  #endif /* _LINUX_IF_FC_H */
diff --git a/include/linux/if_fddi.h b/include/linux/if_fddi.h

index a912818e63618f0df8a54200869d5d3289cc25bd..1288a161bc0baf1b64583d668a24069e0f03a44b 100644 (file)
--- a/include/linux/if_fddi.h
+++ b/include/linux/if_fddi.h
@@ -85,7 +85,7 @@ struct fddi_snap_hdr
         __u8    ssap;                                   /* always 0xAA */
         __u8    ctrl;                                   /* always 0x03 */
         __u8    oui[FDDI_K_OUI_LEN];    /* organizational universal id */
-       __u16   ethertype;                              /* packet type ID field */
+       __be16  ethertype;                              /* packet type ID field */
         } __attribute__ ((packed));
  
  /* Define FDDI LLC frame header */
diff --git a/include/linux/if_frad.h b/include/linux/if_frad.h

index 3c94b1736570157b0e81d87c6ec86640305e1633..511999c7eedaa96149d2e44f06e0059e94c93141 100644 (file)
--- a/include/linux/if_frad.h
+++ b/include/linux/if_frad.h
@@ -191,10 +191,12 @@ struct frad_local
     int               buffer;           /* current buffer for S508 firmware */
  };
  
-extern void dlci_ioctl_set(int (*hook)(unsigned int, void __user *));
-
  #endif /* __KERNEL__ */
  
  #endif /* CONFIG_DLCI || CONFIG_DLCI_MODULE */
  
+#ifdef __KERNEL__
+extern void dlci_ioctl_set(int (*hook)(unsigned int, void __user *));
+#endif
+
  #endif
diff --git a/include/linux/if_hippi.h b/include/linux/if_hippi.h

index c8ca72c46f76e76ebdadae0df70747782c1359fc..94d31ca7d71a0936c19e066f3c899184b64eaa66 100644 (file)
--- a/include/linux/if_hippi.h
+++ b/include/linux/if_hippi.h
@@ -102,9 +102,9 @@ struct hippi_fp_hdr
  #error "Please fix <asm/byteorder.h>"
  #endif
  #else
-       __u32           fixed;
+       __be32          fixed;
  #endif
-       __u32           d2_size;
+       __be32          d2_size;
  } __attribute__ ((packed));
  
  struct hippi_le_hdr
@@ -144,7 +144,7 @@ struct hippi_snap_hdr
         __u8    ssap;                   /* always 0xAA */
         __u8    ctrl;                   /* always 0x03 */
         __u8    oui[HIPPI_OUI_LEN];     /* organizational universal id (zero)*/
-       __u16   ethertype;              /* packet type ID field */
+       __be16  ethertype;              /* packet type ID field */
  } __attribute__ ((packed));
  
  struct hippi_hdr
diff --git a/include/linux/if_tr.h b/include/linux/if_tr.h

index 3fba9e2f5427d633a9260854112647e9294b07a3..5502f597cf0e8808b582c6bb8d7cc52412bd2d11 100644 (file)
--- a/include/linux/if_tr.h
+++ b/include/linux/if_tr.h
@@ -43,12 +43,16 @@ struct trh_hdr {
  };
  
  #ifdef __KERNEL__
+#include <linux/config.h>
  #include <linux/skbuff.h>
  
  static inline struct trh_hdr *tr_hdr(const struct sk_buff *skb)
  {
         return (struct trh_hdr *)skb->mac.raw;
  }
+#ifdef CONFIG_SYSCTL
+extern struct ctl_table tr_table[];
+#endif
  #endif
  
  /* This is an Token-Ring LLC structure */
diff --git a/include/linux/if_vlan.h b/include/linux/if_vlan.h

index 62a9d89dfbe2fcbb78edf0175446e6f2de8b2c32..17d0c0d40b0e376b80e0fa95768dc022961ee671 100644 (file)
--- a/include/linux/if_vlan.h
+++ b/include/linux/if_vlan.h
@@ -155,7 +155,6 @@ static inline int __vlan_hwaccel_rx(struct sk_buff *skb,
  {
         struct net_device_stats *stats;
  
-       skb->real_dev = skb->dev;
         skb->dev = grp->vlan_devices[vlan_tag & VLAN_VID_MASK];
         if (skb->dev == NULL) {
                 dev_kfree_skb_any(skb);
diff --git a/include/linux/igmp.h b/include/linux/igmp.h

index 0c31ef0b5badc3d9dffb39a7414459d2101ae5dd..28f4f3b36950593a5eec787ce7193cb207bf9be8 100644 (file)
--- a/include/linux/igmp.h
+++ b/include/linux/igmp.h
@@ -129,6 +129,9 @@ struct igmpv3_query {
  #include <linux/skbuff.h>
  #include <linux/in.h>
  
+extern int sysctl_igmp_max_memberships;
+extern int sysctl_igmp_max_msf;
+
  struct ip_sf_socklist
  {
         unsigned int            sl_max;
diff --git a/include/linux/in.h b/include/linux/in.h

index fb88c66d748dcad89a08a2f18d9176adb67a2539..ba355384016afa440a492b25c3fd6149f0603a3b 100644 (file)
--- a/include/linux/in.h
+++ b/include/linux/in.h
@@ -32,6 +32,7 @@ enum {
    IPPROTO_PUP = 12,            /* PUP protocol                         */
    IPPROTO_UDP = 17,            /* User Datagram Protocol               */
    IPPROTO_IDP = 22,            /* XNS IDP protocol                     */
+  IPPROTO_DCCP = 33,           /* Datagram Congestion Control Protocol */
    IPPROTO_RSVP = 46,           /* RSVP protocol                        */
    IPPROTO_GRE = 47,            /* Cisco GRE tunnels (rfc 1701,1702)    */
  
diff --git a/include/linux/inet_diag.h b/include/linux/inet_diag.h

new file mode 100644 (file)

index 0000000..a4606e5
--- /dev/null
+++ b/include/linux/inet_diag.h
@@ -0,0 +1,138 @@
+#ifndef _INET_DIAG_H_
+#define _INET_DIAG_H_ 1
+
+/* Just some random number */
+#define TCPDIAG_GETSOCK 18
+#define DCCPDIAG_GETSOCK 19
+
+#define INET_DIAG_GETSOCK_MAX 24
+
+/* Socket identity */
+struct inet_diag_sockid {
+       __u16   idiag_sport;
+       __u16   idiag_dport;
+       __u32   idiag_src[4];
+       __u32   idiag_dst[4];
+       __u32   idiag_if;
+       __u32   idiag_cookie[2];
+#define INET_DIAG_NOCOOKIE (~0U)
+};
+
+/* Request structure */
+
+struct inet_diag_req {
+       __u8    idiag_family;           /* Family of addresses. */
+       __u8    idiag_src_len;
+       __u8    idiag_dst_len;
+       __u8    idiag_ext;              /* Query extended information */
+
+       struct inet_diag_sockid id;
+
+       __u32   idiag_states;           /* States to dump */
+       __u32   idiag_dbs;              /* Tables to dump (NI) */
+};
+
+enum {
+       INET_DIAG_REQ_NONE,
+       INET_DIAG_REQ_BYTECODE,
+};
+
+#define INET_DIAG_REQ_MAX INET_DIAG_REQ_BYTECODE
+
+/* Bytecode is sequence of 4 byte commands followed by variable arguments.
+ * All the commands identified by "code" are conditional jumps forward:
+ * to offset cc+"yes" or to offset cc+"no". "yes" is supposed to be
+ * length of the command and its arguments.
+ */
+ 
+struct inet_diag_bc_op {
+       unsigned char   code;
+       unsigned char   yes;
+       unsigned short  no;
+};
+
+enum {
+       INET_DIAG_BC_NOP,
+       INET_DIAG_BC_JMP,
+       INET_DIAG_BC_S_GE,
+       INET_DIAG_BC_S_LE,
+       INET_DIAG_BC_D_GE,
+       INET_DIAG_BC_D_LE,
+       INET_DIAG_BC_AUTO,
+       INET_DIAG_BC_S_COND,
+       INET_DIAG_BC_D_COND,
+};
+
+struct inet_diag_hostcond {
+       __u8    family;
+       __u8    prefix_len;
+       int     port;
+       __u32   addr[0];
+};
+
+/* Base info structure. It contains socket identity (addrs/ports/cookie)
+ * and, alas, the information shown by netstat. */
+struct inet_diag_msg {
+       __u8    idiag_family;
+       __u8    idiag_state;
+       __u8    idiag_timer;
+       __u8    idiag_retrans;
+
+       struct inet_diag_sockid id;
+
+       __u32   idiag_expires;
+       __u32   idiag_rqueue;
+       __u32   idiag_wqueue;
+       __u32   idiag_uid;
+       __u32   idiag_inode;
+};
+
+/* Extensions */
+
+enum {
+       INET_DIAG_NONE,
+       INET_DIAG_MEMINFO,
+       INET_DIAG_INFO,
+       INET_DIAG_VEGASINFO,
+       INET_DIAG_CONG,
+};
+
+#define INET_DIAG_MAX INET_DIAG_CONG
+
+
+/* INET_DIAG_MEM */
+
+struct inet_diag_meminfo {
+       __u32   idiag_rmem;
+       __u32   idiag_wmem;
+       __u32   idiag_fmem;
+       __u32   idiag_tmem;
+};
+
+/* INET_DIAG_VEGASINFO */
+
+struct tcpvegas_info {
+       __u32   tcpv_enabled;
+       __u32   tcpv_rttcnt;
+       __u32   tcpv_rtt;
+       __u32   tcpv_minrtt;
+};
+
+#ifdef __KERNEL__
+struct sock;
+struct inet_hashinfo;
+
+struct inet_diag_handler {
+       struct inet_hashinfo    *idiag_hashinfo;
+       void                    (*idiag_get_info)(struct sock *sk,
+                                                 struct inet_diag_msg *r,
+                                                 void *info);
+       __u16                   idiag_info_size;
+       __u16                   idiag_type;
+};
+
+extern int  inet_diag_register(const struct inet_diag_handler *handler);
+extern void inet_diag_unregister(const struct inet_diag_handler *handler);
+#endif /* __KERNEL__ */
+
+#endif /* _INET_DIAG_H_ */
diff --git a/include/linux/inotify.h b/include/linux/inotify.h

index a40c2bf0408e0b7014388e26d0f2ae83592a51ee..93bb3afe646bc315d1ce17849acdad149e9b8684 100644 (file)
--- a/include/linux/inotify.h
+++ b/include/linux/inotify.h
@@ -35,6 +35,7 @@ struct inotify_event {
  #define IN_CREATE              0x00000100      /* Subfile was created */
  #define IN_DELETE              0x00000200      /* Subfile was deleted */
  #define IN_DELETE_SELF         0x00000400      /* Self was deleted */
+#define IN_MOVE_SELF           0x00000800      /* Self was moved */
  
  /* the following are legal events.  they are sent as needed to any watch */
  #define IN_UNMOUNT             0x00002000      /* Backing fs was unmounted */
@@ -56,7 +57,8 @@ struct inotify_event {
   */
  #define IN_ALL_EVENTS  (IN_ACCESS | IN_MODIFY | IN_ATTRIB | IN_CLOSE_WRITE | \
                          IN_CLOSE_NOWRITE | IN_OPEN | IN_MOVED_FROM | \
-                        IN_MOVED_TO | IN_DELETE | IN_CREATE | IN_DELETE_SELF)
+                        IN_MOVED_TO | IN_DELETE | IN_CREATE | IN_DELETE_SELF | \
+                        IN_MOVE_SELF)
  
  #ifdef __KERNEL__
  
diff --git a/include/linux/ip.h b/include/linux/ip.h

index 31e7cedd9f844b3e6d28dffe271da3b724e0f644..33e8a19a1a0fbaec33122f4f931c3d577fc975c9 100644 (file)
--- a/include/linux/ip.h
+++ b/include/linux/ip.h
@@ -196,6 +196,8 @@ static inline void inet_sk_copy_descendant(struct sock *sk_to,
  #endif
  #endif
  
+extern int inet_sk_rebuild_header(struct sock *sk);
+
  struct iphdr {
  #if defined(__LITTLE_ENDIAN_BITFIELD)
         __u8    ihl:4,
diff --git a/include/linux/ipv6.h b/include/linux/ipv6.h

index 6fcd6a0ade24860807917b746abc5179c6338f6a..3c7dbc6a0a707510ae0171c20dc5e9918ca673e0 100644 (file)
--- a/include/linux/ipv6.h
+++ b/include/linux/ipv6.h
@@ -193,6 +193,11 @@ struct inet6_skb_parm {
  
  #define IP6CB(skb)     ((struct inet6_skb_parm*)((skb)->cb))
  
+static inline int inet6_iif(const struct sk_buff *skb)
+{
+       return IP6CB(skb)->iif;
+}
+
  struct tcp6_request_sock {
         struct tcp_request_sock req;
         struct in6_addr         loc_addr;
@@ -308,6 +313,36 @@ static inline void inet_sk_copy_descendant(struct sock *sk_to,
  
  #define __ipv6_only_sock(sk)   (inet6_sk(sk)->ipv6only)
  #define ipv6_only_sock(sk)     ((sk)->sk_family == PF_INET6 && __ipv6_only_sock(sk))
+
+#include <linux/tcp.h>
+
+struct tcp6_timewait_sock {
+       struct tcp_timewait_sock tw_v6_sk;
+       struct in6_addr          tw_v6_daddr;
+       struct in6_addr          tw_v6_rcv_saddr;
+};
+
+static inline struct tcp6_timewait_sock *tcp6_twsk(const struct sock *sk)
+{
+       return (struct tcp6_timewait_sock *)sk;
+}
+
+static inline struct in6_addr *__tcp_v6_rcv_saddr(const struct sock *sk)
+{
+       return likely(sk->sk_state != TCP_TIME_WAIT) ?
+               &inet6_sk(sk)->rcv_saddr : &tcp6_twsk(sk)->tw_v6_rcv_saddr;
+}
+
+static inline struct in6_addr *tcp_v6_rcv_saddr(const struct sock *sk)
+{
+       return sk->sk_family == AF_INET6 ? __tcp_v6_rcv_saddr(sk) : NULL;
+}
+
+static inline int inet_v6_ipv6only(const struct sock *sk)
+{
+       return likely(sk->sk_state != TCP_TIME_WAIT) ?
+               ipv6_only_sock(sk) : inet_twsk(sk)->tw_ipv6only;
+}
  #else
  #define __ipv6_only_sock(sk)   0
  #define ipv6_only_sock(sk)     0
@@ -322,8 +357,19 @@ static inline struct raw6_sock *raw6_sk(const struct sock *sk)
         return NULL;
  }
  
-#endif
+#define __tcp_v6_rcv_saddr(__sk)       NULL
+#define tcp_v6_rcv_saddr(__sk)         NULL
+#define tcp_twsk_ipv6only(__sk)                0
+#define inet_v6_ipv6only(__sk)         0
+#endif /* defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) */
  
-#endif
+#define INET6_MATCH(__sk, __saddr, __daddr, __ports, __dif)       \
+       (((*((__u32 *)&(inet_sk(__sk)->dport))) == (__ports))   && \
+        ((__sk)->sk_family             == AF_INET6)            && \
+        ipv6_addr_equal(&inet6_sk(__sk)->daddr, (__saddr))     && \
+        ipv6_addr_equal(&inet6_sk(__sk)->rcv_saddr, (__daddr)) && \
+        (!((__sk)->sk_bound_dev_if) || ((__sk)->sk_bound_dev_if == (__dif))))
  
-#endif
+#endif /* __KERNEL__ */
+
+#endif /* _IPV6_H */
diff --git a/include/linux/libata.h b/include/linux/libata.h

index 6cd9ba63563b4db61b0977a88db84894d313735c..fc05a989928898c91b4800647aa21d228cd5e5a7 100644 (file)
--- a/include/linux/libata.h
+++ b/include/linux/libata.h
@@ -1,23 +1,26 @@
  /*
-   Copyright 2003-2004 Red Hat, Inc.  All rights reserved.
-   Copyright 2003-2004 Jeff Garzik
-
-   The contents of this file are subject to the Open
-   Software License version 1.1 that can be found at
-   http://www.opensource.org/licenses/osl-1.1.txt and is included herein
-   by reference.
-
-   Alternatively, the contents of this file may be used under the terms
-   of the GNU General Public License version 2 (the "GPL") as distributed
-   in the kernel source COPYING file, in which case the provisions of
-   the GPL are applicable instead of the above.  If you wish to allow
-   the use of your version of this file only under the terms of the
-   GPL and not to allow others to use your version of this file under
-   the OSL, indicate your decision by deleting the provisions above and
-   replace them with the notice and other provisions required by the GPL.
-   If you do not delete the provisions above, a recipient may use your
-   version of this file under either the OSL or the GPL.
-
+ *  Copyright 2003-2005 Red Hat, Inc.  All rights reserved.
+ *  Copyright 2003-2005 Jeff Garzik
+ *
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2, or (at your option)
+ *  any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; see the file COPYING.  If not, write to
+ *  the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
+ *
+ *
+ *  libata documentation is available via 'make {ps|pdf}docs',
+ *  as Documentation/DocBook/libata.*
+ *
   */
  
  #ifndef __LINUX_LIBATA_H__
@@ -113,6 +116,8 @@ enum {
         ATA_FLAG_MMIO           = (1 << 6), /* use MMIO, not PIO */
         ATA_FLAG_SATA_RESET     = (1 << 7), /* use COMRESET */
         ATA_FLAG_PIO_DMA        = (1 << 8), /* PIO cmds via DMA */
+       ATA_FLAG_NOINTR         = (1 << 9), /* FIXME: Remove this once
+                                            * proper HSM is in place. */
  
         ATA_QCFLAG_ACTIVE       = (1 << 1), /* cmd not yet ack'd to scsi lyer */
         ATA_QCFLAG_SG           = (1 << 3), /* have s/g table? */
@@ -363,7 +368,7 @@ struct ata_port_operations {
  
         void (*host_stop) (struct ata_host_set *host_set);
  
-       void (*bmdma_stop) (struct ata_port *ap);
+       void (*bmdma_stop) (struct ata_queued_cmd *qc);
         u8   (*bmdma_status) (struct ata_port *ap);
  };
  
@@ -424,7 +429,7 @@ extern void ata_dev_id_string(u16 *id, unsigned char *s,
  extern void ata_dev_config(struct ata_port *ap, unsigned int i);
  extern void ata_bmdma_setup (struct ata_queued_cmd *qc);
  extern void ata_bmdma_start (struct ata_queued_cmd *qc);
-extern void ata_bmdma_stop(struct ata_port *ap);
+extern void ata_bmdma_stop(struct ata_queued_cmd *qc);
  extern u8   ata_bmdma_status(struct ata_port *ap);
  extern void ata_bmdma_irq_clear(struct ata_port *ap);
  extern void ata_qc_complete(struct ata_queued_cmd *qc, u8 drv_stat);
@@ -644,7 +649,7 @@ static inline void scr_write(struct ata_port *ap, unsigned int reg, u32 val)
         ap->ops->scr_write(ap, reg, val);
  }
  
-static inline void scr_write_flush(struct ata_port *ap, unsigned int reg, 
+static inline void scr_write_flush(struct ata_port *ap, unsigned int reg,
                                    u32 val)
  {
         ap->ops->scr_write(ap, reg, val);
diff --git a/include/linux/list.h b/include/linux/list.h

index aab2db21b013e438c672dbfe4fcceb065bf578d3..e6ec596822740ea70a2e6b4b865b704ba8bef4f7 100644 (file)
--- a/include/linux/list.h
+++ b/include/linux/list.h
@@ -418,6 +418,20 @@ static inline void list_splice_init(struct list_head *list,
              &pos->member != (head);                                    \
              pos = n, n = list_entry(n->member.next, typeof(*n), member))
  
+/**
+ * list_for_each_entry_safe_continue - iterate over list of given type
+ *                     continuing after existing point safe against removal of list entry
+ * @pos:       the type * to use as a loop counter.
+ * @n:         another type * to use as temporary storage
+ * @head:      the head for your list.
+ * @member:    the name of the list_struct within the struct.
+ */
+#define list_for_each_entry_safe_continue(pos, n, head, member)                \
+       for (pos = list_entry(pos->member.next, typeof(*pos), member),          \
+               n = list_entry(pos->member.next, typeof(*pos), member);         \
+            &pos->member != (head);                                            \
+            pos = n, n = list_entry(n->member.next, typeof(*n), member))
+
  /**
   * list_for_each_rcu   -       iterate over an rcu-protected list
   * @pos:       the &struct list_head to use as a loop counter.
@@ -620,6 +634,57 @@ static inline void hlist_add_after(struct hlist_node *n,
                 next->next->pprev  = &next->next;
  }
  
+/**
+ * hlist_add_before_rcu - adds the specified element to the specified hlist
+ * before the specified node while permitting racing traversals.
+ * @n: the new element to add to the hash list.
+ * @next: the existing element to add the new element before.
+ *
+ * The caller must take whatever precautions are necessary
+ * (such as holding appropriate locks) to avoid racing
+ * with another list-mutation primitive, such as hlist_add_head_rcu()
+ * or hlist_del_rcu(), running on this same list.
+ * However, it is perfectly legal to run concurrently with
+ * the _rcu list-traversal primitives, such as
+ * hlist_for_each_rcu(), used to prevent memory-consistency
+ * problems on Alpha CPUs.
+ */
+static inline void hlist_add_before_rcu(struct hlist_node *n,
+                                       struct hlist_node *next)
+{
+       n->pprev = next->pprev;
+       n->next = next;
+       smp_wmb();
+       next->pprev = &n->next;
+       *(n->pprev) = n;
+}
+
+/**
+ * hlist_add_after_rcu - adds the specified element to the specified hlist
+ * after the specified node while permitting racing traversals.
+ * @prev: the existing element to add the new element after.
+ * @n: the new element to add to the hash list.
+ *
+ * The caller must take whatever precautions are necessary
+ * (such as holding appropriate locks) to avoid racing
+ * with another list-mutation primitive, such as hlist_add_head_rcu()
+ * or hlist_del_rcu(), running on this same list.
+ * However, it is perfectly legal to run concurrently with
+ * the _rcu list-traversal primitives, such as
+ * hlist_for_each_rcu(), used to prevent memory-consistency
+ * problems on Alpha CPUs.
+ */
+static inline void hlist_add_after_rcu(struct hlist_node *prev,
+                                      struct hlist_node *n)
+{
+       n->next = prev->next;
+       n->pprev = &prev->next;
+       smp_wmb();
+       prev->next = n;
+       if (n->next)
+               n->next->pprev = &n->next;
+}
+
  #define hlist_entry(ptr, type, member) container_of(ptr,type,member)
  
  #define hlist_for_each(pos, head) \
diff --git a/include/linux/mii.h b/include/linux/mii.h

index 374b615ea9ea95ec130aeb10dfffc31f0f29a6f1..9b8d0476988ad3bb1aa9bfeeae74b9d8b1a6f57f 100644 (file)
--- a/include/linux/mii.h
+++ b/include/linux/mii.h
@@ -22,6 +22,7 @@
  #define MII_EXPANSION       0x06        /* Expansion register          */
  #define MII_CTRL1000        0x09        /* 1000BASE-T control          */
  #define MII_STAT1000        0x0a        /* 1000BASE-T status           */
+#define MII_ESTATUS        0x0f        /* Extended Status */
  #define MII_DCOUNTER        0x12        /* Disconnect counter          */
  #define MII_FCSCOUNTER      0x13        /* False carrier counter       */
  #define MII_NWAYTEST        0x14        /* N-way auto-neg test reg     */
@@ -54,7 +55,10 @@
  #define BMSR_ANEGCAPABLE        0x0008  /* Able to do auto-negotiation */
  #define BMSR_RFAULT             0x0010  /* Remote fault detected       */
  #define BMSR_ANEGCOMPLETE       0x0020  /* Auto-negotiation complete   */
-#define BMSR_RESV               0x07c0  /* Unused...                   */
+#define BMSR_RESV               0x00c0  /* Unused...                   */
+#define BMSR_ESTATEN           0x0100  /* Extended Status in R15 */
+#define BMSR_100FULL2          0x0200  /* Can do 100BASE-T2 HDX */
+#define BMSR_100HALF2          0x0400  /* Can do 100BASE-T2 FDX */
  #define BMSR_10HALF             0x0800  /* Can do 10mbps, half-duplex  */
  #define BMSR_10FULL             0x1000  /* Can do 10mbps, full-duplex  */
  #define BMSR_100HALF            0x2000  /* Can do 100mbps, half-duplex */
@@ -114,6 +118,9 @@
  #define EXPANSION_MFAULTS       0x0010  /* Multiple faults detected    */
  #define EXPANSION_RESV          0xffe0  /* Unused...                   */
  
+#define ESTATUS_1000_TFULL     0x2000  /* Can do 1000BT Full */
+#define ESTATUS_1000_THALF     0x1000  /* Can do 1000BT Half */
+
  /* N-way test register. */
  #define NWAYTEST_RESV1          0x00ff  /* Unused...                   */
  #define NWAYTEST_LOOPBACK       0x0100  /* Enable loopback for N-way   */
diff --git a/include/linux/mm.h b/include/linux/mm.h

index 6eb7f48317f8f14a5493e234429697a0c9898d7d..82d7024f0765f19648a4f4ad35693f9d320aefe0 100644 (file)
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -625,10 +625,16 @@ static inline int page_mapped(struct page *page)
   * Used to decide whether a process gets delivered SIGBUS or
   * just gets major/minor fault counters bumped up.
   */
-#define VM_FAULT_OOM   (-1)
-#define VM_FAULT_SIGBUS        0
-#define VM_FAULT_MINOR 1
-#define VM_FAULT_MAJOR 2
+#define VM_FAULT_OOM   0x00
+#define VM_FAULT_SIGBUS        0x01
+#define VM_FAULT_MINOR 0x02
+#define VM_FAULT_MAJOR 0x03
+
+/* 
+ * Special case for get_user_pages.
+ * Must be in a distinct bit from the above VM_FAULT_ flags.
+ */
+#define VM_FAULT_WRITE 0x10
  
  #define offset_in_page(p)      ((unsigned long)(p) & ~PAGE_MASK)
  
@@ -704,7 +710,13 @@ extern pte_t *FASTCALL(pte_alloc_kernel(struct mm_struct *mm, pmd_t *pmd, unsign
  extern pte_t *FASTCALL(pte_alloc_map(struct mm_struct *mm, pmd_t *pmd, unsigned long address));
  extern int install_page(struct mm_struct *mm, struct vm_area_struct *vma, unsigned long addr, struct page *page, pgprot_t prot);
  extern int install_file_pte(struct mm_struct *mm, struct vm_area_struct *vma, unsigned long addr, unsigned long pgoff, pgprot_t prot);
-extern int handle_mm_fault(struct mm_struct *mm,struct vm_area_struct *vma, unsigned long address, int write_access);
+extern int __handle_mm_fault(struct mm_struct *mm,struct vm_area_struct *vma, unsigned long address, int write_access);
+
+static inline int handle_mm_fault(struct mm_struct *mm, struct vm_area_struct *vma, unsigned long address, int write_access)
+{
+       return __handle_mm_fault(mm, vma, address, write_access) & (~VM_FAULT_WRITE);
+}
+
  extern int make_pages_present(unsigned long addr, unsigned long end);
  extern int access_process_vm(struct task_struct *tsk, unsigned long addr, void *buf, int len, int write);
  void install_arg_page(struct vm_area_struct *, struct page *, unsigned long);
diff --git a/include/linux/mmc/host.h b/include/linux/mmc/host.h

index f90f674eb3b0ee48b6701941da6457a898530f5b..9a0893f3249e8b2869d24c6e52e7c1799c8ff7bb 100644 (file)
--- a/include/linux/mmc/host.h
+++ b/include/linux/mmc/host.h
@@ -63,11 +63,12 @@ struct device;
  
  struct mmc_host {
         struct device           *dev;
+       struct class_device     class_dev;
+       int                     index;
         struct mmc_host_ops     *ops;
         unsigned int            f_min;
         unsigned int            f_max;
         u32                     ocr_avail;
-       char                    host_name[8];
  
         /* host specific block data */
         unsigned int            max_seg_size;   /* see blk_queue_max_segment_size */
@@ -97,6 +98,7 @@ extern void mmc_free_host(struct mmc_host *);
  
  #define mmc_priv(x)    ((void *)((x) + 1))
  #define mmc_dev(x)     ((x)->dev)
+#define mmc_hostname(x)        ((x)->class_dev.class_id)
  
  extern int mmc_suspend_host(struct mmc_host *, pm_message_t);
  extern int mmc_resume_host(struct mmc_host *);
diff --git a/include/linux/mod_devicetable.h b/include/linux/mod_devicetable.h

index dce53ac1625d8cea5c78b1940aedc6441dc52d63..47da39ba3f0377f378556150daed3d5530a49849 100644 (file)
--- a/include/linux/mod_devicetable.h
+++ b/include/linux/mod_devicetable.h
@@ -1,6 +1,6 @@
  /*
   * Device tables which are exported to userspace via
- * scripts/table2alias.c.  You must keep that file in sync with this
+ * scripts/mod/file2alias.c.  You must keep that file in sync with this
   * header.
   */
  
@@ -33,7 +33,8 @@ struct ieee1394_device_id {
         __u32 model_id;
         __u32 specifier_id;
         __u32 version;
-       kernel_ulong_t driver_data;
+       kernel_ulong_t driver_data
+               __attribute__((aligned(sizeof(kernel_ulong_t))));
  };
  
  
@@ -182,9 +183,18 @@ struct of_device_id
         char    name[32];
         char    type[32];
         char    compatible[128];
+#if __KERNEL__
         void    *data;
+#else
+       kernel_ulong_t data;
+#endif
  };
  
+/* VIO */
+struct vio_device_id {
+       char type[32];
+       char compat[32];
+};
  
  /* PCMCIA */
  
@@ -208,7 +218,8 @@ struct pcmcia_device_id {
  #ifdef __KERNEL__
         const char *    prod_id[4];
  #else
-       kernel_ulong_t  prod_id[4];
+       kernel_ulong_t  prod_id[4]
+               __attribute__((aligned(sizeof(kernel_ulong_t))));
  #endif
  
         /* not matched against */
diff --git a/include/linux/net.h b/include/linux/net.h

index 20cb226b22685fcd55428104c84b64ba14c4443c..4e981585a89a349f9444ffdbfe9588cbf66d1671 100644 (file)
--- a/include/linux/net.h
+++ b/include/linux/net.h
@@ -84,6 +84,7 @@ enum sock_type {
         SOCK_RAW        = 3,
         SOCK_RDM        = 4,
         SOCK_SEQPACKET  = 5,
+       SOCK_DCCP       = 6,
         SOCK_PACKET     = 10,
  };
  
@@ -282,5 +283,15 @@ static struct proto_ops name##_ops = {                     \
  #define MODULE_ALIAS_NETPROTO(proto) \
         MODULE_ALIAS("net-pf-" __stringify(proto))
  
+#define MODULE_ALIAS_NET_PF_PROTO(pf, proto) \
+       MODULE_ALIAS("net-pf-" __stringify(pf) "-proto-" __stringify(proto))
+
+#ifdef CONFIG_SYSCTL
+#include <linux/sysctl.h>
+extern ctl_table net_table[];
+extern int net_msg_cost;
+extern int net_msg_burst;
+#endif
+
  #endif /* __KERNEL__ */
  #endif /* _LINUX_NET_H */
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h

index 3a0ed7f9e8015bb3165fb1e55eaf44493d9521e2..7c717907896d1d5de04ac4028f70a0ff83b8bb13 100644 (file)
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -244,6 +244,7 @@ struct netdev_boot_setup {
  };
  #define NETDEV_BOOT_SETUP_MAX 8
  
+extern int __init netdev_boot_setup(char *str);
  
  /*
   *     The DEVICE structure.
@@ -336,6 +337,7 @@ struct net_device
         /* Interface address info. */
         unsigned char           broadcast[MAX_ADDR_LEN];        /* hw bcast add */
         unsigned char           dev_addr[MAX_ADDR_LEN]; /* hw address   */
+       unsigned char           perm_addr[MAX_ADDR_LEN]; /* permanent hw address */
         unsigned char           addr_len;       /* hardware address length      */
         unsigned short          dev_id;         /* for shared network cards */
  
@@ -497,10 +499,12 @@ static inline void *netdev_priv(struct net_device *dev)
  #define SET_NETDEV_DEV(net, pdev)      ((net)->class_dev.dev = (pdev))
  
  struct packet_type {
-       __be16                  type;   /* This is really htons(ether_type).    */
-       struct net_device               *dev;   /* NULL is wildcarded here              */
-       int                     (*func) (struct sk_buff *, struct net_device *,
-                                        struct packet_type *);
+       __be16                  type;   /* This is really htons(ether_type). */
+       struct net_device       *dev;   /* NULL is wildcarded here           */
+       int                     (*func) (struct sk_buff *,
+                                        struct net_device *,
+                                        struct packet_type *,
+                                        struct net_device *);
         void                    *af_packet_priv;
         struct list_head        list;
  };
@@ -671,6 +675,7 @@ extern void         dev_queue_xmit_nit(struct sk_buff *skb, struct net_device *dev);
  extern void            dev_init(void);
  
  extern int             netdev_nit;
+extern int             netdev_budget;
  
  /* Called by rtnetlink.c:rtnl_unlock() */
  extern void netdev_run_todo(void);
@@ -697,19 +702,9 @@ static inline int netif_carrier_ok(const struct net_device *dev)
  
  extern void __netdev_watchdog_up(struct net_device *dev);
  
-static inline void netif_carrier_on(struct net_device *dev)
-{
-       if (test_and_clear_bit(__LINK_STATE_NOCARRIER, &dev->state))
-               linkwatch_fire_event(dev);
-       if (netif_running(dev))
-               __netdev_watchdog_up(dev);
-}
+extern void netif_carrier_on(struct net_device *dev);
  
-static inline void netif_carrier_off(struct net_device *dev)
-{
-       if (!test_and_set_bit(__LINK_STATE_NOCARRIER, &dev->state))
-               linkwatch_fire_event(dev);
-}
+extern void netif_carrier_off(struct net_device *dev);
  
  /* Hot-plugging. */
  static inline int netif_device_present(struct net_device *dev)
@@ -916,6 +911,14 @@ extern int skb_checksum_help(struct sk_buff *skb, int inward);
  extern void            net_enable_timestamp(void);
  extern void            net_disable_timestamp(void);
  
+#ifdef CONFIG_PROC_FS
+extern void *dev_seq_start(struct seq_file *seq, loff_t *pos);
+extern void *dev_seq_next(struct seq_file *seq, void *v, loff_t *pos);
+extern void dev_seq_stop(struct seq_file *seq, void *v);
+#endif
+
+extern void linkwatch_run_queue(void);
+
  #endif /* __KERNEL__ */
  
  #endif /* _LINUX_DEV_H */
diff --git a/include/linux/netfilter.h b/include/linux/netfilter.h

index 2e2045482cb134d6bf8f9631e2c32a391052d6a1..be365e70ee998a25865e4504fd853aa9b2bde2a3 100644 (file)
--- a/include/linux/netfilter.h
+++ b/include/linux/netfilter.h
@@ -21,10 +21,23 @@
  #define NF_STOP 5
  #define NF_MAX_VERDICT NF_STOP
  
+/* we overload the higher bits for encoding auxiliary data such as the queue
+ * number. Not nice, but better than additional function arguments. */
+#define NF_VERDICT_MASK 0x0000ffff
+#define NF_VERDICT_BITS 16
+
+#define NF_VERDICT_QMASK 0xffff0000
+#define NF_VERDICT_QBITS 16
+
+#define NF_QUEUE_NR(x) (((x << NF_VERDICT_QBITS) & NF_VERDICT_QMASK) | NF_QUEUE)
+
+/* only for userspace compatibility */
+#ifndef __KERNEL__
  /* Generic cache responses from hook functions.
     <= 0x2000 is used for protocol-flags. */
  #define NFC_UNKNOWN 0x4000
  #define NFC_ALTERED 0x8000
+#endif
  
  #ifdef __KERNEL__
  #include <linux/config.h>
@@ -101,15 +114,51 @@ void nf_unregister_sockopt(struct nf_sockopt_ops *reg);
  
  extern struct list_head nf_hooks[NPROTO][NF_MAX_HOOKS];
  
-typedef void nf_logfn(unsigned int hooknum,
+/* those NF_LOG_* defines and struct nf_loginfo are legacy definitios that will
+ * disappear once iptables is replaced with pkttables.  Please DO NOT use them
+ * for any new code! */
+#define NF_LOG_TCPSEQ          0x01    /* Log TCP sequence numbers */
+#define NF_LOG_TCPOPT          0x02    /* Log TCP options */
+#define NF_LOG_IPOPT           0x04    /* Log IP options */
+#define NF_LOG_UID             0x08    /* Log UID owning local socket */
+#define NF_LOG_MASK            0x0f
+
+#define NF_LOG_TYPE_LOG                0x01
+#define NF_LOG_TYPE_ULOG       0x02
+
+struct nf_loginfo {
+       u_int8_t type;
+       union {
+               struct {
+                       u_int32_t copy_len;
+                       u_int16_t group;
+                       u_int16_t qthreshold;
+               } ulog;
+               struct {
+                       u_int8_t level;
+                       u_int8_t logflags;
+               } log;
+       } u;
+};
+
+typedef void nf_logfn(unsigned int pf,
+                     unsigned int hooknum,
                       const struct sk_buff *skb,
                       const struct net_device *in,
                       const struct net_device *out,
+                     const struct nf_loginfo *li,
                       const char *prefix);
  
+struct nf_logger {
+       struct module   *me;
+       nf_logfn        *logfn;
+       char            *name;
+};
+
  /* Function to register/unregister log function. */
-int nf_log_register(int pf, nf_logfn *logfn);
-void nf_log_unregister(int pf, nf_logfn *logfn);
+int nf_log_register(int pf, struct nf_logger *logger);
+int nf_log_unregister_pf(int pf);
+void nf_log_unregister_logger(struct nf_logger *logger);
  
  /* Calls the registered backend logging function */
  void nf_log_packet(int pf,
@@ -117,6 +166,7 @@ void nf_log_packet(int pf,
                    const struct sk_buff *skb,
                    const struct net_device *in,
                    const struct net_device *out,
+                  struct nf_loginfo *li,
                    const char *fmt, ...);
                     
  /* Activate hook; either okfn or kfree_skb called, unless a hook
@@ -175,11 +225,16 @@ int nf_getsockopt(struct sock *sk, int pf, int optval, char __user *opt,
                   int *len);
  
  /* Packet queuing */
-typedef int (*nf_queue_outfn_t)(struct sk_buff *skb, 
-                                struct nf_info *info, void *data);
+struct nf_queue_handler {
+       int (*outfn)(struct sk_buff *skb, struct nf_info *info,
+                    unsigned int queuenum, void *data);
+       void *data;
+       char *name;
+};
  extern int nf_register_queue_handler(int pf, 
-                                     nf_queue_outfn_t outfn, void *data);
+                                     struct nf_queue_handler *qh);
  extern int nf_unregister_queue_handler(int pf);
+extern void nf_unregister_queue_handlers(struct nf_queue_handler *qh);
  extern void nf_reinject(struct sk_buff *skb,
                         struct nf_info *info,
                         unsigned int verdict);
@@ -190,6 +245,27 @@ extern void nf_ct_attach(struct sk_buff *, struct sk_buff *);
  /* FIXME: Before cache is ever used, this must be implemented for real. */
  extern void nf_invalidate_cache(int pf);
  
+/* Call this before modifying an existing packet: ensures it is
+   modifiable and linear to the point you care about (writable_len).
+   Returns true or false. */
+extern int skb_make_writable(struct sk_buff **pskb, unsigned int writable_len);
+
+struct nf_queue_rerouter {
+       void (*save)(const struct sk_buff *skb, struct nf_info *info);
+       int (*reroute)(struct sk_buff **skb, const struct nf_info *info);
+       int rer_size;
+};
+
+#define nf_info_reroute(x) ((void *)x + sizeof(struct nf_info))
+
+extern int nf_register_queue_rerouter(int pf, struct nf_queue_rerouter *rer);
+extern int nf_unregister_queue_rerouter(int pf);
+
+#ifdef CONFIG_PROC_FS
+#include <linux/proc_fs.h>
+extern struct proc_dir_entry *proc_net_netfilter;
+#endif
+
  #else /* !CONFIG_NETFILTER */
  #define NF_HOOK(pf, hook, skb, indev, outdev, okfn) (okfn)(skb)
  static inline void nf_ct_attach(struct sk_buff *new, struct sk_buff *skb) {}
diff --git a/include/linux/netfilter/nfnetlink.h b/include/linux/netfilter/nfnetlink.h

new file mode 100644 (file)

index 0000000..1d5b10a
--- /dev/null
+++ b/include/linux/netfilter/nfnetlink.h
@@ -0,0 +1,169 @@
+#ifndef _NFNETLINK_H
+#define _NFNETLINK_H
+#include <linux/types.h>
+
+#ifndef __KERNEL__
+/* nfnetlink groups: Up to 32 maximum - backwards compatibility for userspace */
+#define NF_NETLINK_CONNTRACK_NEW               0x00000001
+#define NF_NETLINK_CONNTRACK_UPDATE            0x00000002
+#define NF_NETLINK_CONNTRACK_DESTROY           0x00000004
+#define NF_NETLINK_CONNTRACK_EXP_NEW           0x00000008
+#define NF_NETLINK_CONNTRACK_EXP_UPDATE                0x00000010
+#define NF_NETLINK_CONNTRACK_EXP_DESTROY       0x00000020
+#endif
+
+enum nfnetlink_groups {
+       NFNLGRP_NONE,
+#define NFNLGRP_NONE                   NFNLGRP_NONE
+       NFNLGRP_CONNTRACK_NEW,
+#define NFNLGRP_CONNTRACK_NEW          NFNLGRP_CONNTRACK_NEW
+       NFNLGRP_CONNTRACK_UPDATE,
+#define NFNLGRP_CONNTRACK_UPDATE       NFNLGRP_CONNTRACK_UPDATE
+       NFNLGRP_CONNTRACK_DESTROY,
+#define NFNLGRP_CONNTRACK_DESTROY      NFNLGRP_CONNTRACK_DESTROY
+       NFNLGRP_CONNTRACK_EXP_NEW,
+#define        NFNLGRP_CONNTRACK_EXP_NEW       NFNLGRP_CONNTRACK_EXP_NEW
+       NFNLGRP_CONNTRACK_EXP_UPDATE,
+#define NFNLGRP_CONNTRACK_EXP_UPDATE   NFNLGRP_CONNTRACK_EXP_UPDATE
+       NFNLGRP_CONNTRACK_EXP_DESTROY,
+#define NFNLGRP_CONNTRACK_EXP_DESTROY  NFNLGRP_CONNTRACK_EXP_DESTROY
+       __NFNLGRP_MAX,
+};
+#define NFNLGRP_MAX    (__NFNLGRP_MAX - 1)
+
+/* Generic structure for encapsulation optional netfilter information.
+ * It is reminiscent of sockaddr, but with sa_family replaced
+ * with attribute type. 
+ * ! This should someday be put somewhere generic as now rtnetlink and
+ * ! nfnetlink use the same attributes methods. - J. Schulist.
+ */
+
+struct nfattr
+{
+       u_int16_t nfa_len;
+       u_int16_t nfa_type;
+} __attribute__ ((packed));
+
+/* FIXME: Shamelessly copy and pasted from rtnetlink.h, it's time
+ *       to put this in a generic file */
+
+#define NFA_ALIGNTO     4
+#define NFA_ALIGN(len) (((len) + NFA_ALIGNTO - 1) & ~(NFA_ALIGNTO - 1))
+#define NFA_OK(nfa,len)        ((len) > 0 && (nfa)->nfa_len >= sizeof(struct nfattr) \
+       && (nfa)->nfa_len <= (len))
+#define NFA_NEXT(nfa,attrlen)  ((attrlen) -= NFA_ALIGN((nfa)->nfa_len), \
+       (struct nfattr *)(((char *)(nfa)) + NFA_ALIGN((nfa)->nfa_len)))
+#define NFA_LENGTH(len)        (NFA_ALIGN(sizeof(struct nfattr)) + (len))
+#define NFA_SPACE(len) NFA_ALIGN(NFA_LENGTH(len))
+#define NFA_DATA(nfa)   ((void *)(((char *)(nfa)) + NFA_LENGTH(0)))
+#define NFA_PAYLOAD(nfa) ((int)((nfa)->nfa_len) - NFA_LENGTH(0))
+#define NFA_NEST(skb, type) \
+({     struct nfattr *__start = (struct nfattr *) (skb)->tail; \
+       NFA_PUT(skb, type, 0, NULL); \
+       __start;  })
+#define NFA_NEST_END(skb, start) \
+({      (start)->nfa_len = ((skb)->tail - (unsigned char *) (start)); \
+        (skb)->len; })
+#define NFA_NEST_CANCEL(skb, start) \
+({      if (start) \
+                skb_trim(skb, (unsigned char *) (start) - (skb)->data); \
+        -1; })
+
+/* General form of address family dependent message.
+ */
+struct nfgenmsg {
+       u_int8_t  nfgen_family;         /* AF_xxx */
+       u_int8_t  version;              /* nfnetlink version */
+       u_int16_t res_id;               /* resource id */
+} __attribute__ ((packed));
+
+#define NFNETLINK_V0   0
+
+#define NFM_NFA(n)      ((struct nfattr *)(((char *)(n)) \
+        + NLMSG_ALIGN(sizeof(struct nfgenmsg))))
+#define NFM_PAYLOAD(n)  NLMSG_PAYLOAD(n, sizeof(struct nfgenmsg))
+
+/* netfilter netlink message types are split in two pieces:
+ * 8 bit subsystem, 8bit operation.
+ */
+
+#define NFNL_SUBSYS_ID(x)      ((x & 0xff00) >> 8)
+#define NFNL_MSG_TYPE(x)       (x & 0x00ff)
+
+/* No enum here, otherwise __stringify() trick of MODULE_ALIAS_NFNL_SUBSYS()
+ * won't work anymore */
+#define NFNL_SUBSYS_NONE               0
+#define NFNL_SUBSYS_CTNETLINK          1
+#define NFNL_SUBSYS_CTNETLINK_EXP      2
+#define NFNL_SUBSYS_QUEUE              3
+#define NFNL_SUBSYS_ULOG               4
+#define NFNL_SUBSYS_COUNT              5
+
+#ifdef __KERNEL__
+
+#include <linux/netlink.h>
+#include <linux/capability.h>
+
+struct nfnl_callback
+{
+       int (*call)(struct sock *nl, struct sk_buff *skb, 
+               struct nlmsghdr *nlh, struct nfattr *cda[], int *errp);
+       kernel_cap_t cap_required; /* capabilities required for this msg */
+       u_int16_t attr_count;   /* number of nfattr's */
+};
+
+struct nfnetlink_subsystem
+{
+       const char *name;
+       __u8 subsys_id;         /* nfnetlink subsystem ID */
+       __u8 cb_count;          /* number of callbacks */
+       struct nfnl_callback *cb; /* callback for individual types */
+};
+
+extern void __nfa_fill(struct sk_buff *skb, int attrtype,
+        int attrlen, const void *data);
+#define NFA_PUT(skb, attrtype, attrlen, data) \
+({ if (skb_tailroom(skb) < (int)NFA_SPACE(attrlen)) goto nfattr_failure; \
+   __nfa_fill(skb, attrtype, attrlen, data); })
+
+extern struct semaphore nfnl_sem;
+
+#define nfnl_shlock()          down(&nfnl_sem)
+#define nfnl_shlock_nowait()   down_trylock(&nfnl_sem)
+
+#define nfnl_shunlock()                do { up(&nfnl_sem); \
+                                    if(nfnl && nfnl->sk_receive_queue.qlen) \
+                                           nfnl->sk_data_ready(nfnl, 0); \
+                               } while(0)
+
+extern void nfnl_lock(void);
+extern void nfnl_unlock(void);
+
+extern int nfnetlink_subsys_register(struct nfnetlink_subsystem *n);
+extern int nfnetlink_subsys_unregister(struct nfnetlink_subsystem *n);
+
+extern int nfattr_parse(struct nfattr *tb[], int maxattr, 
+                       struct nfattr *nfa, int len);
+
+#define nfattr_parse_nested(tb, max, nfa) \
+       nfattr_parse((tb), (max), NFA_DATA((nfa)), NFA_PAYLOAD((nfa)))
+
+#define nfattr_bad_size(tb, max, cta_min)                              \
+({     int __i, __res = 0;                                             \
+       for (__i=0; __i<max; __i++)                                     \
+               if (tb[__i] && NFA_PAYLOAD(tb[__i]) < cta_min[__i]){    \
+                       __res = 1;                                      \
+                       break;                                          \
+               }                                                       \
+       __res;                                                          \
+})
+
+extern int nfnetlink_send(struct sk_buff *skb, u32 pid, unsigned group, 
+                         int echo);
+extern int nfnetlink_unicast(struct sk_buff *skb, u_int32_t pid, int flags);
+
+#define MODULE_ALIAS_NFNL_SUBSYS(subsys) \
+       MODULE_ALIAS("nfnetlink-subsys-" __stringify(subsys))
+
+#endif /* __KERNEL__ */
+#endif /* _NFNETLINK_H */
diff --git a/include/linux/netfilter/nfnetlink_conntrack.h b/include/linux/netfilter/nfnetlink_conntrack.h

new file mode 100644 (file)

index 0000000..5c55751
--- /dev/null
+++ b/include/linux/netfilter/nfnetlink_conntrack.h
@@ -0,0 +1,124 @@
+#ifndef _IPCONNTRACK_NETLINK_H
+#define _IPCONNTRACK_NETLINK_H
+#include <linux/netfilter/nfnetlink.h>
+
+enum cntl_msg_types {
+       IPCTNL_MSG_CT_NEW,
+       IPCTNL_MSG_CT_GET,
+       IPCTNL_MSG_CT_DELETE,
+       IPCTNL_MSG_CT_GET_CTRZERO,
+
+       IPCTNL_MSG_MAX
+};
+
+enum ctnl_exp_msg_types {
+       IPCTNL_MSG_EXP_NEW,
+       IPCTNL_MSG_EXP_GET,
+       IPCTNL_MSG_EXP_DELETE,
+
+       IPCTNL_MSG_EXP_MAX
+};
+
+
+enum ctattr_type {
+       CTA_UNSPEC,
+       CTA_TUPLE_ORIG,
+       CTA_TUPLE_REPLY,
+       CTA_STATUS,
+       CTA_PROTOINFO,
+       CTA_HELP,
+       CTA_NAT,
+       CTA_TIMEOUT,
+       CTA_MARK,
+       CTA_COUNTERS_ORIG,
+       CTA_COUNTERS_REPLY,
+       CTA_USE,
+       CTA_ID,
+       __CTA_MAX
+};
+#define CTA_MAX (__CTA_MAX - 1)
+
+enum ctattr_tuple {
+       CTA_TUPLE_UNSPEC,
+       CTA_TUPLE_IP,
+       CTA_TUPLE_PROTO,
+       __CTA_TUPLE_MAX
+};
+#define CTA_TUPLE_MAX (__CTA_TUPLE_MAX - 1)
+
+enum ctattr_ip {
+       CTA_IP_UNSPEC,
+       CTA_IP_V4_SRC,
+       CTA_IP_V4_DST,
+       CTA_IP_V6_SRC,
+       CTA_IP_V6_DST,
+       __CTA_IP_MAX
+};
+#define CTA_IP_MAX (__CTA_IP_MAX - 1)
+
+enum ctattr_l4proto {
+       CTA_PROTO_UNSPEC,
+       CTA_PROTO_NUM,
+       CTA_PROTO_SRC_PORT,
+       CTA_PROTO_DST_PORT,
+       CTA_PROTO_ICMP_ID,
+       CTA_PROTO_ICMP_TYPE,
+       CTA_PROTO_ICMP_CODE,
+       __CTA_PROTO_MAX
+};
+#define CTA_PROTO_MAX (__CTA_PROTO_MAX - 1)
+
+enum ctattr_protoinfo {
+       CTA_PROTOINFO_UNSPEC,
+       CTA_PROTOINFO_TCP_STATE,
+       __CTA_PROTOINFO_MAX
+};
+#define CTA_PROTOINFO_MAX (__CTA_PROTOINFO_MAX - 1)
+
+enum ctattr_counters {
+       CTA_COUNTERS_UNSPEC,
+       CTA_COUNTERS_PACKETS,
+       CTA_COUNTERS_BYTES,
+       __CTA_COUNTERS_MAX
+};
+#define CTA_COUNTERS_MAX (__CTA_COUNTERS_MAX - 1)
+
+enum ctattr_nat {
+       CTA_NAT_UNSPEC,
+       CTA_NAT_MINIP,
+       CTA_NAT_MAXIP,
+       CTA_NAT_PROTO,
+       __CTA_NAT_MAX
+};
+#define CTA_NAT_MAX (__CTA_NAT_MAX - 1)
+
+enum ctattr_protonat {
+       CTA_PROTONAT_UNSPEC,
+       CTA_PROTONAT_PORT_MIN,
+       CTA_PROTONAT_PORT_MAX,
+       __CTA_PROTONAT_MAX
+};
+#define CTA_PROTONAT_MAX (__CTA_PROTONAT_MAX - 1)
+
+enum ctattr_expect {
+       CTA_EXPECT_UNSPEC,
+       CTA_EXPECT_MASTER,
+       CTA_EXPECT_TUPLE,
+       CTA_EXPECT_MASK,
+       CTA_EXPECT_TIMEOUT,
+       CTA_EXPECT_ID,
+       CTA_EXPECT_HELP_NAME,
+       __CTA_EXPECT_MAX
+};
+#define CTA_EXPECT_MAX (__CTA_EXPECT_MAX - 1)
+
+enum ctattr_help {
+       CTA_HELP_UNSPEC,
+       CTA_HELP_NAME,
+       __CTA_HELP_MAX
+};
+#define CTA_HELP_MAX (__CTA_HELP_MAX - 1)
+
+#define CTA_HELP_MAXNAMESIZE   32
+
+#endif /* _IPCONNTRACK_NETLINK_H */
diff --git a/include/linux/netfilter/nfnetlink_log.h b/include/linux/netfilter/nfnetlink_log.h

new file mode 100644 (file)

index 0000000..b04b038
--- /dev/null
+++ b/include/linux/netfilter/nfnetlink_log.h
@@ -0,0 +1,88 @@
+#ifndef _NFNETLINK_LOG_H
+#define _NFNETLINK_LOG_H
+
+/* This file describes the netlink messages (i.e. 'protocol packets'),
+ * and not any kind of function definitions.  It is shared between kernel and
+ * userspace.  Don't put kernel specific stuff in here */
+
+#include <linux/types.h>
+#include <linux/netfilter/nfnetlink.h>
+
+enum nfulnl_msg_types {
+       NFULNL_MSG_PACKET,              /* packet from kernel to userspace */
+       NFULNL_MSG_CONFIG,              /* connect to a particular queue */
+
+       NFULNL_MSG_MAX
+};
+
+struct nfulnl_msg_packet_hdr {
+       u_int16_t       hw_protocol;    /* hw protocol (network order) */
+       u_int8_t        hook;           /* netfilter hook */
+       u_int8_t        _pad;
+} __attribute__ ((packed));
+
+struct nfulnl_msg_packet_hw {
+       u_int16_t       hw_addrlen;
+       u_int16_t       _pad;
+       u_int8_t        hw_addr[8];
+} __attribute__ ((packed));
+
+struct nfulnl_msg_packet_timestamp {
+       aligned_u64     sec;
+       aligned_u64     usec;
+} __attribute__ ((packed));
+
+#define NFULNL_PREFIXLEN       30      /* just like old log target */
+
+enum nfulnl_attr_type {
+       NFULA_UNSPEC,
+       NFULA_PACKET_HDR,
+       NFULA_MARK,                     /* u_int32_t nfmark */
+       NFULA_TIMESTAMP,                /* nfulnl_msg_packet_timestamp */
+       NFULA_IFINDEX_INDEV,            /* u_int32_t ifindex */
+       NFULA_IFINDEX_OUTDEV,           /* u_int32_t ifindex */
+       NFULA_IFINDEX_PHYSINDEV,        /* u_int32_t ifindex */
+       NFULA_IFINDEX_PHYSOUTDEV,       /* u_int32_t ifindex */
+       NFULA_HWADDR,                   /* nfulnl_msg_packet_hw */
+       NFULA_PAYLOAD,                  /* opaque data payload */
+       NFULA_PREFIX,                   /* string prefix */
+       NFULA_UID,                      /* user id of socket */
+
+       __NFULA_MAX
+};
+#define NFULA_MAX (__NFULA_MAX - 1)
+
+enum nfulnl_msg_config_cmds {
+       NFULNL_CFG_CMD_NONE,
+       NFULNL_CFG_CMD_BIND,
+       NFULNL_CFG_CMD_UNBIND,
+       NFULNL_CFG_CMD_PF_BIND,
+       NFULNL_CFG_CMD_PF_UNBIND,
+};
+
+struct nfulnl_msg_config_cmd {
+       u_int8_t        command;        /* nfulnl_msg_config_cmds */
+} __attribute__ ((packed));
+
+struct nfulnl_msg_config_mode {
+       u_int32_t       copy_range;
+       u_int8_t        copy_mode;
+       u_int8_t        _pad;
+} __attribute__ ((packed));
+
+enum nfulnl_attr_config {
+       NFULA_CFG_UNSPEC,
+       NFULA_CFG_CMD,                  /* nfulnl_msg_config_cmd */
+       NFULA_CFG_MODE,                 /* nfulnl_msg_config_mode */
+       NFULA_CFG_NLBUFSIZ,             /* u_int32_t buffer size */
+       NFULA_CFG_TIMEOUT,              /* u_int32_t in 1/100 s */
+       NFULA_CFG_QTHRESH,              /* u_int32_t */
+       __NFULA_CFG_MAX
+};
+#define NFULA_CFG_MAX (__NFULA_CFG_MAX -1)
+
+#define NFULNL_COPY_NONE       0x00
+#define NFULNL_COPY_META       0x01
+#define NFULNL_COPY_PACKET     0x02
+
+#endif /* _NFNETLINK_LOG_H */
diff --git a/include/linux/netfilter/nfnetlink_queue.h b/include/linux/netfilter/nfnetlink_queue.h

new file mode 100644 (file)

index 0000000..9e77437
--- /dev/null
+++ b/include/linux/netfilter/nfnetlink_queue.h
@@ -0,0 +1,89 @@
+#ifndef _NFNETLINK_QUEUE_H
+#define _NFNETLINK_QUEUE_H
+
+#include <linux/types.h>
+#include <linux/netfilter/nfnetlink.h>
+
+enum nfqnl_msg_types {
+       NFQNL_MSG_PACKET,               /* packet from kernel to userspace */
+       NFQNL_MSG_VERDICT,              /* verdict from userspace to kernel */
+       NFQNL_MSG_CONFIG,               /* connect to a particular queue */
+
+       NFQNL_MSG_MAX
+};
+
+struct nfqnl_msg_packet_hdr {
+       u_int32_t       packet_id;      /* unique ID of packet in queue */
+       u_int16_t       hw_protocol;    /* hw protocol (network order) */
+       u_int8_t        hook;           /* netfilter hook */
+} __attribute__ ((packed));
+
+struct nfqnl_msg_packet_hw {
+       u_int16_t       hw_addrlen;
+       u_int16_t       _pad;
+       u_int8_t        hw_addr[8];
+} __attribute__ ((packed));
+
+struct nfqnl_msg_packet_timestamp {
+       aligned_u64     sec;
+       aligned_u64     usec;
+} __attribute__ ((packed));
+
+enum nfqnl_attr_type {
+       NFQA_UNSPEC,
+       NFQA_PACKET_HDR,
+       NFQA_VERDICT_HDR,               /* nfqnl_msg_verdict_hrd */
+       NFQA_MARK,                      /* u_int32_t nfmark */
+       NFQA_TIMESTAMP,                 /* nfqnl_msg_packet_timestamp */
+       NFQA_IFINDEX_INDEV,             /* u_int32_t ifindex */
+       NFQA_IFINDEX_OUTDEV,            /* u_int32_t ifindex */
+       NFQA_IFINDEX_PHYSINDEV,         /* u_int32_t ifindex */
+       NFQA_IFINDEX_PHYSOUTDEV,        /* u_int32_t ifindex */
+       NFQA_HWADDR,                    /* nfqnl_msg_packet_hw */
+       NFQA_PAYLOAD,                   /* opaque data payload */
+
+       __NFQA_MAX
+};
+#define NFQA_MAX (__NFQA_MAX - 1)
+
+struct nfqnl_msg_verdict_hdr {
+       u_int32_t verdict;
+       u_int32_t id;
+} __attribute__ ((packed));
+
+
+enum nfqnl_msg_config_cmds {
+       NFQNL_CFG_CMD_NONE,
+       NFQNL_CFG_CMD_BIND,
+       NFQNL_CFG_CMD_UNBIND,
+       NFQNL_CFG_CMD_PF_BIND,
+       NFQNL_CFG_CMD_PF_UNBIND,
+};
+
+struct nfqnl_msg_config_cmd {
+       u_int8_t        command;        /* nfqnl_msg_config_cmds */
+       u_int8_t        _pad;
+       u_int16_t       pf;             /* AF_xxx for PF_[UN]BIND */
+} __attribute__ ((packed));
+
+enum nfqnl_config_mode {
+       NFQNL_COPY_NONE,
+       NFQNL_COPY_META,
+       NFQNL_COPY_PACKET,
+};
+
+struct nfqnl_msg_config_params {
+       u_int32_t       copy_range;
+       u_int8_t        copy_mode;      /* enum nfqnl_config_mode */
+} __attribute__ ((packed));
+
+
+enum nfqnl_attr_config {
+       NFQA_CFG_UNSPEC,
+       NFQA_CFG_CMD,                   /* nfqnl_msg_config_cmd */
+       NFQA_CFG_PARAMS,                /* nfqnl_msg_config_params */
+       __NFQA_CFG_MAX
+};
+#define NFQA_CFG_MAX (__NFQA_CFG_MAX-1)
+
+#endif /* _NFNETLINK_QUEUE_H */
diff --git a/include/linux/netfilter_decnet.h b/include/linux/netfilter_decnet.h

index 3064eec9cb8e2691d3849bd3592c87c452c3bf7d..6f425369ee29b09a395652f6557ddecfe3f7ee4e 100644 (file)
--- a/include/linux/netfilter_decnet.h
+++ b/include/linux/netfilter_decnet.h
@@ -9,6 +9,8 @@
  
  #include <linux/netfilter.h>
  
+/* only for userspace compatibility */
+#ifndef __KERNEL__
  /* IP Cache bits. */
  /* Src IP address. */
  #define NFC_DN_SRC             0x0001
@@ -18,6 +20,7 @@
  #define NFC_DN_IF_IN           0x0004
  /* Output device. */
  #define NFC_DN_IF_OUT          0x0008
+#endif /* ! __KERNEL__ */
  
  /* DECnet Hooks */
  /* After promisc drops, checksum checks. */
@@ -53,7 +56,21 @@ struct nf_dn_rtmsg {
  
  #define NFDN_RTMSG(r) ((unsigned char *)(r) + NLMSG_ALIGN(sizeof(struct nf_dn_rtmsg)))
  
+#ifndef __KERNEL__
+/* backwards compatibility for userspace */
  #define DNRMG_L1_GROUP 0x01
  #define DNRMG_L2_GROUP 0x02
+#endif
+
+enum {
+       DNRNG_NLGRP_NONE,
+#define DNRNG_NLGRP_NONE       DNRNG_NLGRP_NONE
+       DNRNG_NLGRP_L1,
+#define DNRNG_NLGRP_L1         DNRNG_NLGRP_L1
+       DNRNG_NLGRP_L2,
+#define DNRNG_NLGRP_L2         DNRNG_NLGRP_L2
+       __DNRNG_NLGRP_MAX
+};
+#define DNRNG_NLGRP_MAX        (__DNRNG_NLGRP_MAX - 1)
  
  #endif /*__LINUX_DECNET_NETFILTER_H*/
diff --git a/include/linux/netfilter_ipv4.h b/include/linux/netfilter_ipv4.h

index 3ebc36afae1a4746be0994d709b0f4e4f6f96ba7..fdc4a95273439edd826eb928e0207420db4ea373 100644 (file)
--- a/include/linux/netfilter_ipv4.h
+++ b/include/linux/netfilter_ipv4.h
@@ -8,6 +8,8 @@
  #include <linux/config.h>
  #include <linux/netfilter.h>
  
+/* only for userspace compatibility */
+#ifndef __KERNEL__
  /* IP Cache bits. */
  /* Src IP address. */
  #define NFC_IP_SRC             0x0001
@@ -35,6 +37,7 @@
  #define NFC_IP_DST_PT          0x0400
  /* Something else about the proto */
  #define NFC_IP_PROTO_UNKNOWN   0x2000
+#endif /* ! __KERNEL__ */
  
  /* IP Hooks */
  /* After promisc drops, checksum checks. */
@@ -77,11 +80,6 @@ enum nf_ip_hook_priorities {
  #ifdef __KERNEL__
  extern int ip_route_me_harder(struct sk_buff **pskb);
  
-/* Call this before modifying an existing IP packet: ensures it is
-   modifiable and linear to the point you care about (writable_len).
-   Returns true or false. */
-extern int skb_ip_make_writable(struct sk_buff **pskb,
-                               unsigned int writable_len);
  #endif /*__KERNEL__*/
  
  #endif /*__LINUX_IP_NETFILTER_H*/
diff --git a/include/linux/netfilter_ipv4/ip_conntrack.h b/include/linux/netfilter_ipv4/ip_conntrack.h

index 08fe5f7d14a0b2e7801cf10de6ba948c5520f25b..088742befe4975dd3f9c08441c99da207f4aae20 100644 (file)
--- a/include/linux/netfilter_ipv4/ip_conntrack.h
+++ b/include/linux/netfilter_ipv4/ip_conntrack.h
@@ -65,6 +65,63 @@ enum ip_conntrack_status {
  
         /* Both together */
         IPS_NAT_DONE_MASK = (IPS_DST_NAT_DONE | IPS_SRC_NAT_DONE),
+
+       /* Connection is dying (removed from lists), can not be unset. */
+       IPS_DYING_BIT = 9,
+       IPS_DYING = (1 << IPS_DYING_BIT),
+};
+
+/* Connection tracking event bits */
+enum ip_conntrack_events
+{
+       /* New conntrack */
+       IPCT_NEW_BIT = 0,
+       IPCT_NEW = (1 << IPCT_NEW_BIT),
+
+       /* Expected connection */
+       IPCT_RELATED_BIT = 1,
+       IPCT_RELATED = (1 << IPCT_RELATED_BIT),
+
+       /* Destroyed conntrack */
+       IPCT_DESTROY_BIT = 2,
+       IPCT_DESTROY = (1 << IPCT_DESTROY_BIT),
+
+       /* Timer has been refreshed */
+       IPCT_REFRESH_BIT = 3,
+       IPCT_REFRESH = (1 << IPCT_REFRESH_BIT),
+
+       /* Status has changed */
+       IPCT_STATUS_BIT = 4,
+       IPCT_STATUS = (1 << IPCT_STATUS_BIT),
+
+       /* Update of protocol info */
+       IPCT_PROTOINFO_BIT = 5,
+       IPCT_PROTOINFO = (1 << IPCT_PROTOINFO_BIT),
+
+       /* Volatile protocol info */
+       IPCT_PROTOINFO_VOLATILE_BIT = 6,
+       IPCT_PROTOINFO_VOLATILE = (1 << IPCT_PROTOINFO_VOLATILE_BIT),
+
+       /* New helper for conntrack */
+       IPCT_HELPER_BIT = 7,
+       IPCT_HELPER = (1 << IPCT_HELPER_BIT),
+
+       /* Update of helper info */
+       IPCT_HELPINFO_BIT = 8,
+       IPCT_HELPINFO = (1 << IPCT_HELPINFO_BIT),
+
+       /* Volatile helper info */
+       IPCT_HELPINFO_VOLATILE_BIT = 9,
+       IPCT_HELPINFO_VOLATILE = (1 << IPCT_HELPINFO_VOLATILE_BIT),
+
+       /* NAT info */
+       IPCT_NATINFO_BIT = 10,
+       IPCT_NATINFO = (1 << IPCT_NATINFO_BIT),
+};
+
+enum ip_conntrack_expect_events {
+       IPEXP_NEW_BIT = 0,
+       IPEXP_NEW = (1 << IPEXP_NEW_BIT),
  };
  
  #ifdef __KERNEL__
@@ -152,6 +209,9 @@ struct ip_conntrack
         /* Current number of expected connections */
         unsigned int expecting;
  
+       /* Unique ID that identifies this conntrack*/
+       unsigned int id;
+
         /* Helper, if any. */
         struct ip_conntrack_helper *helper;
  
@@ -171,7 +231,7 @@ struct ip_conntrack
  #endif /* CONFIG_IP_NF_NAT_NEEDED */
  
  #if defined(CONFIG_IP_NF_CONNTRACK_MARK)
-       unsigned long mark;
+       u_int32_t mark;
  #endif
  
         /* Traversed often, so hopefully in different cacheline to top */
@@ -200,6 +260,9 @@ struct ip_conntrack_expect
         /* Usage count. */
         atomic_t use;
  
+       /* Unique ID */
+       unsigned int id;
+
  #ifdef CONFIG_IP_NF_NAT_NEEDED
         /* This is the original per-proto part, used to map the
          * expected connection the way the recipient expects. */
@@ -239,7 +302,12 @@ ip_conntrack_get(const struct sk_buff *skb, enum ip_conntrack_info *ctinfo)
  }
  
  /* decrement reference count on a conntrack */
-extern void ip_conntrack_put(struct ip_conntrack *ct);
+static inline void
+ip_conntrack_put(struct ip_conntrack *ct)
+{
+       IP_NF_ASSERT(ct);
+       nf_conntrack_put(&ct->ct_general);
+}
  
  /* call to create an explicit dependency on ip_conntrack. */
  extern void need_ip_conntrack(void);
@@ -274,12 +342,50 @@ extern void
  ip_ct_iterate_cleanup(int (*iter)(struct ip_conntrack *i, void *data),
                       void *data);
  
+extern struct ip_conntrack_helper *
+__ip_conntrack_helper_find_byname(const char *);
+extern struct ip_conntrack_helper *
+ip_conntrack_helper_find_get(const struct ip_conntrack_tuple *tuple);
+extern void ip_conntrack_helper_put(struct ip_conntrack_helper *helper);
+
+extern struct ip_conntrack_protocol *
+__ip_conntrack_proto_find(u_int8_t protocol);
+extern struct ip_conntrack_protocol *
+ip_conntrack_proto_find_get(u_int8_t protocol);
+extern void ip_conntrack_proto_put(struct ip_conntrack_protocol *proto);
+
+extern void ip_ct_remove_expectations(struct ip_conntrack *ct);
+
+extern struct ip_conntrack *ip_conntrack_alloc(struct ip_conntrack_tuple *,
+                                              struct ip_conntrack_tuple *);
+
+extern void ip_conntrack_free(struct ip_conntrack *ct);
+
+extern void ip_conntrack_hash_insert(struct ip_conntrack *ct);
+
+extern struct ip_conntrack_expect *
+__ip_conntrack_expect_find(const struct ip_conntrack_tuple *tuple);
+
+extern struct ip_conntrack_expect *
+ip_conntrack_expect_find_get(const struct ip_conntrack_tuple *tuple);
+
+extern struct ip_conntrack_tuple_hash *
+__ip_conntrack_find(const struct ip_conntrack_tuple *tuple,
+                    const struct ip_conntrack *ignored_conntrack);
+
+extern void ip_conntrack_flush(void);
+
  /* It's confirmed if it is, or has been in the hash table. */
  static inline int is_confirmed(struct ip_conntrack *ct)
  {
         return test_bit(IPS_CONFIRMED_BIT, &ct->status);
  }
  
+static inline int is_dying(struct ip_conntrack *ct)
+{
+       return test_bit(IPS_DYING_BIT, &ct->status);
+}
+
  extern unsigned int ip_conntrack_htable_size;
   
  struct ip_conntrack_stat
@@ -303,6 +409,85 @@ struct ip_conntrack_stat
  
  #define CONNTRACK_STAT_INC(count) (__get_cpu_var(ip_conntrack_stat).count++)
  
+#ifdef CONFIG_IP_NF_CONNTRACK_EVENTS
+#include <linux/notifier.h>
+#include <linux/interrupt.h>
+
+struct ip_conntrack_ecache {
+       struct ip_conntrack *ct;
+       unsigned int events;
+};
+DECLARE_PER_CPU(struct ip_conntrack_ecache, ip_conntrack_ecache);
+
+#define CONNTRACK_ECACHE(x)    (__get_cpu_var(ip_conntrack_ecache).x)
+ 
+extern struct notifier_block *ip_conntrack_chain;
+extern struct notifier_block *ip_conntrack_expect_chain;
+
+static inline int ip_conntrack_register_notifier(struct notifier_block *nb)
+{
+       return notifier_chain_register(&ip_conntrack_chain, nb);
+}
+
+static inline int ip_conntrack_unregister_notifier(struct notifier_block *nb)
+{
+       return notifier_chain_unregister(&ip_conntrack_chain, nb);
+}
+
+static inline int 
+ip_conntrack_expect_register_notifier(struct notifier_block *nb)
+{
+       return notifier_chain_register(&ip_conntrack_expect_chain, nb);
+}
+
+static inline int
+ip_conntrack_expect_unregister_notifier(struct notifier_block *nb)
+{
+       return notifier_chain_unregister(&ip_conntrack_expect_chain, nb);
+}
+
+extern void ip_ct_deliver_cached_events(const struct ip_conntrack *ct);
+extern void __ip_ct_event_cache_init(struct ip_conntrack *ct);
+
+static inline void 
+ip_conntrack_event_cache(enum ip_conntrack_events event,
+                        const struct sk_buff *skb)
+{
+       struct ip_conntrack *ct = (struct ip_conntrack *)skb->nfct;
+       struct ip_conntrack_ecache *ecache;
+       
+       local_bh_disable();
+       ecache = &__get_cpu_var(ip_conntrack_ecache);
+       if (ct != ecache->ct)
+               __ip_ct_event_cache_init(ct);
+       ecache->events |= event;
+       local_bh_enable();
+}
+
+static inline void ip_conntrack_event(enum ip_conntrack_events event,
+                                     struct ip_conntrack *ct)
+{
+       if (is_confirmed(ct) && !is_dying(ct))
+               notifier_call_chain(&ip_conntrack_chain, event, ct);
+}
+
+static inline void 
+ip_conntrack_expect_event(enum ip_conntrack_expect_events event,
+                         struct ip_conntrack_expect *exp)
+{
+       notifier_call_chain(&ip_conntrack_expect_chain, event, exp);
+}
+#else /* CONFIG_IP_NF_CONNTRACK_EVENTS */
+static inline void ip_conntrack_event_cache(enum ip_conntrack_events event, 
+                                           const struct sk_buff *skb) {}
+static inline void ip_conntrack_event(enum ip_conntrack_events event, 
+                                     struct ip_conntrack *ct) {}
+static inline void ip_ct_deliver_cached_events(const struct ip_conntrack *ct) {}
+static inline void 
+ip_conntrack_expect_event(enum ip_conntrack_expect_events event, 
+                         struct ip_conntrack_expect *exp) {}
+#endif /* CONFIG_IP_NF_CONNTRACK_EVENTS */
+
  #ifdef CONFIG_IP_NF_NAT_NEEDED
  static inline int ip_nat_initialized(struct ip_conntrack *conntrack,
                                      enum ip_nat_manip_type manip)
diff --git a/include/linux/netfilter_ipv4/ip_conntrack_core.h b/include/linux/netfilter_ipv4/ip_conntrack_core.h

index 694aec9b478469dafd09d061bd48d4280d0f9f75..dc4d2a0575de9c666ac45428b81d47e634ac1805 100644 (file)
--- a/include/linux/netfilter_ipv4/ip_conntrack_core.h
+++ b/include/linux/netfilter_ipv4/ip_conntrack_core.h
@@ -2,6 +2,9 @@
  #define _IP_CONNTRACK_CORE_H
  #include <linux/netfilter.h>
  
+#define MAX_IP_CT_PROTO 256
+extern struct ip_conntrack_protocol *ip_ct_protos[MAX_IP_CT_PROTO];
+
  /* This header is used to share core functionality between the
     standalone connection tracking module, and the compatibility layer's use
     of connection tracking. */
@@ -38,12 +41,19 @@ extern int __ip_conntrack_confirm(struct sk_buff **pskb);
  /* Confirm a connection: returns NF_DROP if packet must be dropped. */
  static inline int ip_conntrack_confirm(struct sk_buff **pskb)
  {
-       if ((*pskb)->nfct
-           && !is_confirmed((struct ip_conntrack *)(*pskb)->nfct))
-               return __ip_conntrack_confirm(pskb);
-       return NF_ACCEPT;
+       struct ip_conntrack *ct = (struct ip_conntrack *)(*pskb)->nfct;
+       int ret = NF_ACCEPT;
+
+       if (ct) {
+               if (!is_confirmed(ct))
+                       ret = __ip_conntrack_confirm(pskb);
+               ip_ct_deliver_cached_events(ct);
+       }
+       return ret;
  }
  
+extern void __ip_ct_expect_unlink_destroy(struct ip_conntrack_expect *exp);
+
  extern struct list_head *ip_conntrack_hash;
  extern struct list_head ip_conntrack_expect_list;
  extern rwlock_t ip_conntrack_lock;
diff --git a/include/linux/netfilter_ipv4/ip_conntrack_helper.h b/include/linux/netfilter_ipv4/ip_conntrack_helper.h

index 3692daa93decf3455368fc3bec5dec91fa1c40f0..8d69279ccfe46961d1356b822ba53f326bcc2f1f 100644 (file)
--- a/include/linux/netfilter_ipv4/ip_conntrack_helper.h
+++ b/include/linux/netfilter_ipv4/ip_conntrack_helper.h
@@ -24,6 +24,8 @@ struct ip_conntrack_helper
         int (*help)(struct sk_buff **pskb,
                     struct ip_conntrack *ct,
                     enum ip_conntrack_info conntrackinfo);
+
+       int (*to_nfattr)(struct sk_buff *skb, const struct ip_conntrack *ct);
  };
  
  extern int ip_conntrack_helper_register(struct ip_conntrack_helper *);
diff --git a/include/linux/netfilter_ipv4/ip_conntrack_protocol.h b/include/linux/netfilter_ipv4/ip_conntrack_protocol.h

index e20b57c5e1b7685053be9a0ff5a814d65dab1580..b6b99be8632a8709687e7b003858e1ee2f2de586 100644 (file)
--- a/include/linux/netfilter_ipv4/ip_conntrack_protocol.h
+++ b/include/linux/netfilter_ipv4/ip_conntrack_protocol.h
@@ -2,6 +2,7 @@
  #ifndef _IP_CONNTRACK_PROTOCOL_H
  #define _IP_CONNTRACK_PROTOCOL_H
  #include <linux/netfilter_ipv4/ip_conntrack.h>
+#include <linux/netfilter/nfnetlink_conntrack.h>
  
  struct seq_file;
  
@@ -47,22 +48,22 @@ struct ip_conntrack_protocol
         int (*error)(struct sk_buff *skb, enum ip_conntrack_info *ctinfo,
                      unsigned int hooknum);
  
+       /* convert protoinfo to nfnetink attributes */
+       int (*to_nfattr)(struct sk_buff *skb, struct nfattr *nfa,
+                        const struct ip_conntrack *ct);
+
+       int (*tuple_to_nfattr)(struct sk_buff *skb,
+                              const struct ip_conntrack_tuple *t);
+       int (*nfattr_to_tuple)(struct nfattr *tb[],
+                              struct ip_conntrack_tuple *t);
+
         /* Module (if any) which this is connected to. */
         struct module *me;
  };
  
-#define MAX_IP_CT_PROTO 256
-extern struct ip_conntrack_protocol *ip_ct_protos[MAX_IP_CT_PROTO];
-
  /* Protocol registration. */
  extern int ip_conntrack_protocol_register(struct ip_conntrack_protocol *proto);
  extern void ip_conntrack_protocol_unregister(struct ip_conntrack_protocol *proto);
-
-static inline struct ip_conntrack_protocol *ip_ct_find_proto(u_int8_t protocol)
-{
-       return ip_ct_protos[protocol];
-}
-
  /* Existing built-in protocols */
  extern struct ip_conntrack_protocol ip_conntrack_protocol_tcp;
  extern struct ip_conntrack_protocol ip_conntrack_protocol_udp;
@@ -73,6 +74,11 @@ extern int ip_conntrack_protocol_tcp_init(void);
  /* Log invalid packets */
  extern unsigned int ip_ct_log_invalid;
  
+extern int ip_ct_port_tuple_to_nfattr(struct sk_buff *,
+                                     const struct ip_conntrack_tuple *);
+extern int ip_ct_port_nfattr_to_tuple(struct nfattr *tb[],
+                                     struct ip_conntrack_tuple *);
+
  #ifdef CONFIG_SYSCTL
  #ifdef DEBUG_INVALID_PACKETS
  #define LOG_INVALID(proto) \
diff --git a/include/linux/netfilter_ipv4/ip_logging.h b/include/linux/netfilter_ipv4/ip_logging.h

deleted file mode 100644 (file)

index 0c5c52c..0000000
--- a/include/linux/netfilter_ipv4/ip_logging.h
+++ /dev/null
@@ -1,20 +0,0 @@
-/* IPv4 macros for the internal logging interface. */
-#ifndef __IP_LOGGING_H
-#define __IP_LOGGING_H
-
-#ifdef __KERNEL__
-#include <linux/socket.h>
-#include <linux/netfilter_logging.h>
-
-#define nf_log_ip_packet(pskb,hooknum,in,out,fmt,args...) \
-       nf_log_packet(AF_INET,pskb,hooknum,in,out,fmt,##args)
-
-#define nf_log_ip(pfh,len,fmt,args...) \
-       nf_log(AF_INET,pfh,len,fmt,##args)
-
-#define nf_ip_log_register(logging) nf_log_register(AF_INET,logging)
-#define nf_ip_log_unregister(logging) nf_log_unregister(AF_INET,logging)
-       
-#endif /*__KERNEL__*/
-
-#endif /*__IP_LOGGING_H*/
diff --git a/include/linux/netfilter_ipv4/ip_nat_protocol.h b/include/linux/netfilter_ipv4/ip_nat_protocol.h

index 129708c22386f5beb2d1276e818186dbb100b68f..ef63aa991a0669ffd02843ca332dc267b81c427c 100644 (file)
--- a/include/linux/netfilter_ipv4/ip_nat_protocol.h
+++ b/include/linux/netfilter_ipv4/ip_nat_protocol.h
@@ -4,6 +4,9 @@
  #include <linux/init.h>
  #include <linux/list.h>
  
+#include <linux/netfilter_ipv4/ip_nat.h>
+#include <linux/netfilter/nfnetlink_conntrack.h>
+
  struct iphdr;
  struct ip_nat_range;
  
@@ -15,6 +18,8 @@ struct ip_nat_protocol
         /* Protocol number. */
         unsigned int protonum;
  
+       struct module *me;
+
         /* Translate a packet to the target according to manip type.
            Return true if succeeded. */
         int (*manip_pkt)(struct sk_buff **pskb,
@@ -43,19 +48,20 @@ struct ip_nat_protocol
  
         unsigned int (*print_range)(char *buffer,
                                     const struct ip_nat_range *range);
-};
  
-#define MAX_IP_NAT_PROTO 256
-extern struct ip_nat_protocol *ip_nat_protos[MAX_IP_NAT_PROTO];
+       int (*range_to_nfattr)(struct sk_buff *skb,
+                              const struct ip_nat_range *range);
+
+       int (*nfattr_to_range)(struct nfattr *tb[],
+                              struct ip_nat_range *range);
+};
  
  /* Protocol registration. */
  extern int ip_nat_protocol_register(struct ip_nat_protocol *proto);
  extern void ip_nat_protocol_unregister(struct ip_nat_protocol *proto);
  
-static inline struct ip_nat_protocol *ip_nat_find_proto(u_int8_t protocol)
-{
-       return ip_nat_protos[protocol];
-}
+extern struct ip_nat_protocol *ip_nat_proto_find_get(u_int8_t protocol);
+extern void ip_nat_proto_put(struct ip_nat_protocol *proto);
  
  /* Built-in protocols. */
  extern struct ip_nat_protocol ip_nat_protocol_tcp;
@@ -67,4 +73,9 @@ extern int init_protocols(void) __init;
  extern void cleanup_protocols(void);
  extern struct ip_nat_protocol *find_nat_proto(u_int16_t protonum);
  
+extern int ip_nat_port_range_to_nfattr(struct sk_buff *skb,
+                                      const struct ip_nat_range *range);
+extern int ip_nat_port_nfattr_to_range(struct nfattr *tb[],
+                                      struct ip_nat_range *range);
+
  #endif /*_IP_NAT_PROTO_H*/
diff --git a/include/linux/netfilter_ipv4/ip_tables.h b/include/linux/netfilter_ipv4/ip_tables.h

index 12ce47808e7d24c3d734e3619d40d9994ee38c71..d19d65cf453046ba28724c80f7cea09d2fda4dc0 100644 (file)
--- a/include/linux/netfilter_ipv4/ip_tables.h
+++ b/include/linux/netfilter_ipv4/ip_tables.h
@@ -109,7 +109,8 @@ struct ipt_counters
  
  /* Values for "flag" field in struct ipt_ip (general ip structure). */
  #define IPT_F_FRAG             0x01    /* Set if rule is a fragment rule */
-#define IPT_F_MASK             0x01    /* All possible flag bits mask. */
+#define IPT_F_GOTO             0x02    /* Set if jump is a goto */
+#define IPT_F_MASK             0x03    /* All possible flag bits mask. */
  
  /* Values for "inv" field in struct ipt_ip. */
  #define IPT_INV_VIA_IN         0x01    /* Invert the sense of IN IFACE. */
diff --git a/include/linux/netfilter_ipv4/ipt_LOG.h b/include/linux/netfilter_ipv4/ipt_LOG.h

index d25f782e57d17acff2a6912777264236dd72b3be..22d16177319b9d46a335d11ede61f86c9fb28b8d 100644 (file)
--- a/include/linux/netfilter_ipv4/ipt_LOG.h
+++ b/include/linux/netfilter_ipv4/ipt_LOG.h
@@ -1,6 +1,7 @@
  #ifndef _IPT_LOG_H
  #define _IPT_LOG_H
  
+/* make sure not to change this without changing netfilter.h:NF_LOG_* (!) */
  #define IPT_LOG_TCPSEQ         0x01    /* Log TCP sequence numbers */
  #define IPT_LOG_TCPOPT         0x02    /* Log TCP options */
  #define IPT_LOG_IPOPT          0x04    /* Log IP options */
diff --git a/include/linux/netfilter_ipv4/ipt_NFQUEUE.h b/include/linux/netfilter_ipv4/ipt_NFQUEUE.h

new file mode 100644 (file)

index 0000000..b5b2943
--- /dev/null
+++ b/include/linux/netfilter_ipv4/ipt_NFQUEUE.h
@@ -0,0 +1,16 @@
+/* iptables module for using NFQUEUE mechanism
+ *
+ * (C) 2005 Harald Welte <laforge@netfilter.org>
+ *
+ * This software is distributed under GNU GPL v2, 1991
+ * 
+*/
+#ifndef _IPT_NFQ_TARGET_H
+#define _IPT_NFQ_TARGET_H
+
+/* target info */
+struct ipt_NFQ_info {
+       u_int16_t queuenum;
+};
+
+#endif /* _IPT_DSCP_TARGET_H */
diff --git a/include/linux/netfilter_ipv4/ipt_TTL.h b/include/linux/netfilter_ipv4/ipt_TTL.h

new file mode 100644 (file)

index 0000000..ee6611e
--- /dev/null
+++ b/include/linux/netfilter_ipv4/ipt_TTL.h
@@ -0,0 +1,21 @@
+/* TTL modification module for IP tables
+ * (C) 2000 by Harald Welte <laforge@netfilter.org> */
+
+#ifndef _IPT_TTL_H
+#define _IPT_TTL_H
+
+enum {
+       IPT_TTL_SET = 0,
+       IPT_TTL_INC,
+       IPT_TTL_DEC
+};
+
+#define IPT_TTL_MAXMODE        IPT_TTL_DEC
+
+struct ipt_TTL_info {
+       u_int8_t        mode;
+       u_int8_t        ttl;
+};
+
+
+#endif
diff --git a/include/linux/netfilter_ipv4/ipt_connbytes.h b/include/linux/netfilter_ipv4/ipt_connbytes.h

new file mode 100644 (file)

index 0000000..9e5532f
--- /dev/null
+++ b/include/linux/netfilter_ipv4/ipt_connbytes.h
@@ -0,0 +1,25 @@
+#ifndef _IPT_CONNBYTES_H
+#define _IPT_CONNBYTES_H
+
+enum ipt_connbytes_what {
+       IPT_CONNBYTES_PKTS,
+       IPT_CONNBYTES_BYTES,
+       IPT_CONNBYTES_AVGPKT,
+};
+
+enum ipt_connbytes_direction {
+       IPT_CONNBYTES_DIR_ORIGINAL,
+       IPT_CONNBYTES_DIR_REPLY,
+       IPT_CONNBYTES_DIR_BOTH,
+};
+
+struct ipt_connbytes_info
+{
+       struct {
+               aligned_u64 from;       /* count to be matched */
+               aligned_u64 to;         /* count to be matched */
+       } count;
+       u_int8_t what;          /* ipt_connbytes_what */
+       u_int8_t direction;     /* ipt_connbytes_direction */
+};
+#endif
diff --git a/include/linux/netfilter_ipv4/ipt_dccp.h b/include/linux/netfilter_ipv4/ipt_dccp.h

new file mode 100644 (file)

index 0000000..3cb3a52
--- /dev/null
+++ b/include/linux/netfilter_ipv4/ipt_dccp.h
@@ -0,0 +1,23 @@
+#ifndef _IPT_DCCP_H_
+#define _IPT_DCCP_H_
+
+#define IPT_DCCP_SRC_PORTS             0x01
+#define IPT_DCCP_DEST_PORTS            0x02
+#define IPT_DCCP_TYPE                  0x04
+#define IPT_DCCP_OPTION                        0x08
+
+#define IPT_DCCP_VALID_FLAGS           0x0f
+
+struct ipt_dccp_info {
+       u_int16_t dpts[2];  /* Min, Max */
+       u_int16_t spts[2];  /* Min, Max */
+
+       u_int16_t flags;
+       u_int16_t invflags;
+
+       u_int16_t typemask;
+       u_int8_t option;
+};
+
+#endif /* _IPT_DCCP_H_ */
+
diff --git a/include/linux/netfilter_ipv4/ipt_string.h b/include/linux/netfilter_ipv4/ipt_string.h

new file mode 100644 (file)

index 0000000..a265f6e
--- /dev/null
+++ b/include/linux/netfilter_ipv4/ipt_string.h
@@ -0,0 +1,18 @@
+#ifndef _IPT_STRING_H
+#define _IPT_STRING_H
+
+#define IPT_STRING_MAX_PATTERN_SIZE 128
+#define IPT_STRING_MAX_ALGO_NAME_SIZE 16
+
+struct ipt_string_info
+{
+       u_int16_t from_offset;
+       u_int16_t to_offset;
+       char      algo[IPT_STRING_MAX_ALGO_NAME_SIZE];
+       char      pattern[IPT_STRING_MAX_PATTERN_SIZE];
+       u_int8_t  patlen;
+       u_int8_t  invert;
+       struct ts_config __attribute__((aligned(8))) *config;
+};
+
+#endif /*_IPT_STRING_H*/
diff --git a/include/linux/netfilter_ipv6.h b/include/linux/netfilter_ipv6.h

index bee7a5ec7c663414579e9e4e522c8bbbdbdfaee0..edcc2c6eb5c702ce8f39c156b685023fbbe34025 100644 (file)
--- a/include/linux/netfilter_ipv6.h
+++ b/include/linux/netfilter_ipv6.h
@@ -10,6 +10,8 @@
  
  #include <linux/netfilter.h>
  
+/* only for userspace compatibility */
+#ifndef __KERNEL__
  /* IP Cache bits. */
  /* Src IP address. */
  #define NFC_IP6_SRC              0x0001
@@ -38,6 +40,7 @@
  #define NFC_IP6_DST_PT           0x0400
  /* Something else about the proto */
  #define NFC_IP6_PROTO_UNKNOWN    0x2000
+#endif /* ! __KERNEL__ */
  
  
  /* IP6 Hooks */
@@ -68,4 +71,7 @@ enum nf_ip6_hook_priorities {
         NF_IP6_PRI_LAST = INT_MAX,
  };
  
+extern int ipv6_netfilter_init(void);
+extern void ipv6_netfilter_fini(void);
+
  #endif /*__LINUX_IP6_NETFILTER_H*/
diff --git a/include/linux/netfilter_ipv6/ip6_logging.h b/include/linux/netfilter_ipv6/ip6_logging.h

deleted file mode 100644 (file)

index a0b2ee3..0000000
--- a/include/linux/netfilter_ipv6/ip6_logging.h
+++ /dev/null
@@ -1,20 +0,0 @@
-/* IPv6 macros for the nternal logging interface. */
-#ifndef __IP6_LOGGING_H
-#define __IP6_LOGGING_H
-
-#ifdef __KERNEL__
-#include <linux/socket.h>
-#include <linux/netfilter_logging.h>
-
-#define nf_log_ip6_packet(pskb,hooknum,in,out,fmt,args...) \
-       nf_log_packet(AF_INET6,pskb,hooknum,in,out,fmt,##args)
-
-#define nf_log_ip6(pfh,len,fmt,args...) \
-       nf_log(AF_INET6,pfh,len,fmt,##args)
-
-#define nf_ip6_log_register(logging) nf_log_register(AF_INET6,logging)
-#define nf_ip6_log_unregister(logging) nf_log_unregister(AF_INET6,logging)
-       
-#endif /*__KERNEL__*/
-
-#endif /*__IP6_LOGGING_H*/
diff --git a/include/linux/netfilter_ipv6/ip6_tables.h b/include/linux/netfilter_ipv6/ip6_tables.h

index f1ce3b009853b0fe8d87b2c2a563d31d9d3e6e15..58c72a52dc657ea3028f6abded8b2acdbbff924d 100644 (file)
--- a/include/linux/netfilter_ipv6/ip6_tables.h
+++ b/include/linux/netfilter_ipv6/ip6_tables.h
@@ -111,7 +111,8 @@ struct ip6t_counters
  #define IP6T_F_PROTO           0x01    /* Set if rule cares about upper 
                                            protocols */
  #define IP6T_F_TOS             0x02    /* Match the TOS. */
-#define IP6T_F_MASK            0x03    /* All possible flag bits mask. */
+#define IP6T_F_GOTO            0x04    /* Set if jump is a goto */
+#define IP6T_F_MASK            0x07    /* All possible flag bits mask. */
  
  /* Values for "inv" field in struct ip6t_ip6. */
  #define IP6T_INV_VIA_IN                0x01    /* Invert the sense of IN IFACE. */
diff --git a/include/linux/netfilter_ipv6/ip6t_HL.h b/include/linux/netfilter_ipv6/ip6t_HL.h

new file mode 100644 (file)

index 0000000..afb7813
--- /dev/null
+++ b/include/linux/netfilter_ipv6/ip6t_HL.h
@@ -0,0 +1,22 @@
+/* Hop Limit modification module for ip6tables
+ * Maciej Soltysiak <solt@dns.toxicfilms.tv>
+ * Based on HW's TTL module */
+
+#ifndef _IP6T_HL_H
+#define _IP6T_HL_H
+
+enum {
+       IP6T_HL_SET = 0,
+       IP6T_HL_INC,
+       IP6T_HL_DEC
+};
+
+#define IP6T_HL_MAXMODE        IP6T_HL_DEC
+
+struct ip6t_HL_info {
+       u_int8_t        mode;
+       u_int8_t        hop_limit;
+};
+
+
+#endif
diff --git a/include/linux/netfilter_ipv6/ip6t_LOG.h b/include/linux/netfilter_ipv6/ip6t_LOG.h

index 42996a43bb39c43ce8b33f7d910492a043f066ac..9008ff5c40aec70e209f8f9efd036e34db1621ab 100644 (file)
--- a/include/linux/netfilter_ipv6/ip6t_LOG.h
+++ b/include/linux/netfilter_ipv6/ip6t_LOG.h
@@ -1,6 +1,7 @@
  #ifndef _IP6T_LOG_H
  #define _IP6T_LOG_H
  
+/* make sure not to change this without changing netfilter.h:NF_LOG_* (!) */
  #define IP6T_LOG_TCPSEQ                0x01    /* Log TCP sequence numbers */
  #define IP6T_LOG_TCPOPT                0x02    /* Log TCP options */
  #define IP6T_LOG_IPOPT         0x04    /* Log IP options */
diff --git a/include/linux/netfilter_ipv6/ip6t_REJECT.h b/include/linux/netfilter_ipv6/ip6t_REJECT.h

new file mode 100644 (file)

index 0000000..6be6504
--- /dev/null
+++ b/include/linux/netfilter_ipv6/ip6t_REJECT.h
@@ -0,0 +1,18 @@
+#ifndef _IP6T_REJECT_H
+#define _IP6T_REJECT_H
+
+enum ip6t_reject_with {
+       IP6T_ICMP6_NO_ROUTE,
+       IP6T_ICMP6_ADM_PROHIBITED,
+       IP6T_ICMP6_NOT_NEIGHBOUR,
+       IP6T_ICMP6_ADDR_UNREACH,
+       IP6T_ICMP6_PORT_UNREACH,
+       IP6T_ICMP6_ECHOREPLY,
+       IP6T_TCP_RESET
+};
+
+struct ip6t_reject_info {
+       u_int32_t       with;   /* reject type */
+};
+
+#endif /*_IP6T_REJECT_H*/
diff --git a/include/linux/netlink.h b/include/linux/netlink.h

index 70c2a9dc4b2b73bf6d61c2f17b6f83228c671451..1675186689361370861897bd7b12a1003c9e5af3 100644 (file)
--- a/include/linux/netlink.h
+++ b/include/linux/netlink.h
@@ -8,19 +8,17 @@
  #define NETLINK_W1             1       /* 1-wire subsystem                             */
  #define NETLINK_USERSOCK       2       /* Reserved for user mode socket protocols      */
  #define NETLINK_FIREWALL       3       /* Firewalling hook                             */
-#define NETLINK_TCPDIAG                4       /* TCP socket monitoring                        */
+#define NETLINK_INET_DIAG      4       /* INET socket monitoring                       */
  #define NETLINK_NFLOG          5       /* netfilter/iptables ULOG */
  #define NETLINK_XFRM           6       /* ipsec */
  #define NETLINK_SELINUX                7       /* SELinux event notifications */
-#define NETLINK_ARPD           8
+#define NETLINK_ISCSI          8       /* Open-iSCSI */
  #define NETLINK_AUDIT          9       /* auditing */
  #define NETLINK_FIB_LOOKUP     10      
-#define NETLINK_ROUTE6         11      /* af_inet6 route comm channel */
  #define NETLINK_NETFILTER      12      /* netfilter subsystem */
  #define NETLINK_IP6_FW         13
  #define NETLINK_DNRTMSG                14      /* DECnet routing messages */
  #define NETLINK_KOBJECT_UEVENT 15      /* Kernel messages to userspace */
-#define NETLINK_TAPBASE                16      /* 16 to 31 are ethertap */
  
  #define MAX_LINKS 32           
  
@@ -92,6 +90,15 @@ struct nlmsgerr
         struct nlmsghdr msg;
  };
  
+#define NETLINK_ADD_MEMBERSHIP 1
+#define NETLINK_DROP_MEMBERSHIP        2
+#define NETLINK_PKTINFO                3
+
+struct nl_pktinfo
+{
+       __u32   group;
+};
+
  #define NET_MAJOR 36           /* Major 36 is reserved for networking                                          */
  
  enum {
@@ -108,9 +115,8 @@ struct netlink_skb_parms
  {
         struct ucred            creds;          /* Skb credentials      */
         __u32                   pid;
-       __u32                   groups;
         __u32                   dst_pid;
-       __u32                   dst_groups;
+       __u32                   dst_group;
         kernel_cap_t            eff_cap;
         __u32                   loginuid;       /* Login (audit) uid */
  };
@@ -119,11 +125,11 @@ struct netlink_skb_parms
  #define NETLINK_CREDS(skb)     (&NETLINK_CB((skb)).creds)
  
  
-extern struct sock *netlink_kernel_create(int unit, void (*input)(struct sock *sk, int len));
+extern struct sock *netlink_kernel_create(int unit, unsigned int groups, void (*input)(struct sock *sk, int len), struct module *module);
  extern void netlink_ack(struct sk_buff *in_skb, struct nlmsghdr *nlh, int err);
  extern int netlink_unicast(struct sock *ssk, struct sk_buff *skb, __u32 pid, int nonblock);
  extern int netlink_broadcast(struct sock *ssk, struct sk_buff *skb, __u32 pid,
-                            __u32 group, int allocation);
+                            __u32 group, unsigned int __nocast allocation);
  extern void netlink_set_err(struct sock *ssk, __u32 pid, __u32 group, int code);
  extern int netlink_register_notifier(struct notifier_block *nb);
  extern int netlink_unregister_notifier(struct notifier_block *nb);
diff --git a/include/linux/netpoll.h b/include/linux/netpoll.h

index bcd0ac33f592543b9af4fefc95d10471010201d9..5ade54a78dbbb3a3618d10d5e3d21d5d67a5b2fe 100644 (file)
--- a/include/linux/netpoll.h
+++ b/include/linux/netpoll.h
@@ -9,6 +9,7 @@
  
  #include <linux/netdevice.h>
  #include <linux/interrupt.h>
+#include <linux/rcupdate.h>
  #include <linux/list.h>
  
  struct netpoll;
@@ -26,6 +27,7 @@ struct netpoll {
  struct netpoll_info {
         spinlock_t poll_lock;
         int poll_owner;
+       int tries;
         int rx_flags;
         spinlock_t rx_lock;
         struct netpoll *rx_np; /* netpoll that registered an rx_hook */
@@ -60,25 +62,31 @@ static inline int netpoll_rx(struct sk_buff *skb)
         return ret;
  }
  
-static inline void netpoll_poll_lock(struct net_device *dev)
+static inline void *netpoll_poll_lock(struct net_device *dev)
  {
+       rcu_read_lock(); /* deal with race on ->npinfo */
         if (dev->npinfo) {
                 spin_lock(&dev->npinfo->poll_lock);
                 dev->npinfo->poll_owner = smp_processor_id();
+               return dev->npinfo;
         }
+       return NULL;
  }
  
-static inline void netpoll_poll_unlock(struct net_device *dev)
+static inline void netpoll_poll_unlock(void *have)
  {
-       if (dev->npinfo) {
-               dev->npinfo->poll_owner = -1;
-               spin_unlock(&dev->npinfo->poll_lock);
+       struct netpoll_info *npi = have;
+
+       if (npi) {
+               npi->poll_owner = -1;
+               spin_unlock(&npi->poll_lock);
         }
+       rcu_read_unlock();
  }
  
  #else
  #define netpoll_rx(a) 0
-#define netpoll_poll_lock(a)
+#define netpoll_poll_lock(a) 0
  #define netpoll_poll_unlock(a)
  #endif
  
diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h

index 8ea249110fb0b5029c88b35717f2ce3e5387f507..9a6047ff1b25b198a869678463ed0561cbf417cc 100644 (file)
--- a/include/linux/nfs_fs.h
+++ b/include/linux/nfs_fs.h
@@ -112,7 +112,8 @@ struct nfs_inode {
         /*
          * Various flags
          */
-       unsigned int            flags;
+       unsigned long           flags;                  /* atomic bit ops */
+       unsigned long           cache_validity;         /* bit mask */
  
         /*
          * read_cache_jiffies is when we started read-caching this inode,
@@ -174,8 +175,6 @@ struct nfs_inode {
         /* Open contexts for shared mmap writes */
         struct list_head        open_files;
  
-       wait_queue_head_t       nfs_i_wait;
-
  #ifdef CONFIG_NFS_V4
         struct nfs4_cached_acl  *nfs4_acl;
          /* NFSv4 state */
@@ -188,17 +187,21 @@ struct nfs_inode {
  };
  
  /*
- * Legal inode flag values
+ * Cache validity bit flags
   */
-#define NFS_INO_STALE          0x0001          /* possible stale inode */
-#define NFS_INO_ADVISE_RDPLUS   0x0002          /* advise readdirplus */
-#define NFS_INO_REVALIDATING   0x0004          /* revalidating attrs */
-#define NFS_INO_INVALID_ATTR   0x0008          /* cached attrs are invalid */
-#define NFS_INO_INVALID_DATA   0x0010          /* cached data is invalid */
-#define NFS_INO_INVALID_ATIME  0x0020          /* cached atime is invalid */
-#define NFS_INO_INVALID_ACCESS 0x0040          /* cached access cred invalid */
-#define NFS_INO_INVALID_ACL    0x0080          /* cached acls are invalid */
-#define NFS_INO_REVAL_PAGECACHE        0x1000          /* must revalidate pagecache */
+#define NFS_INO_INVALID_ATTR   0x0001          /* cached attrs are invalid */
+#define NFS_INO_INVALID_DATA   0x0002          /* cached data is invalid */
+#define NFS_INO_INVALID_ATIME  0x0004          /* cached atime is invalid */
+#define NFS_INO_INVALID_ACCESS 0x0008          /* cached access cred invalid */
+#define NFS_INO_INVALID_ACL    0x0010          /* cached acls are invalid */
+#define NFS_INO_REVAL_PAGECACHE        0x0020          /* must revalidate pagecache */
+
+/*
+ * Bit offsets in flags field
+ */
+#define NFS_INO_REVALIDATING   (0)             /* revalidating attrs */
+#define NFS_INO_ADVISE_RDPLUS  (1)             /* advise readdirplus */
+#define NFS_INO_STALE          (2)             /* possible stale inode */
  
  static inline struct nfs_inode *NFS_I(struct inode *inode)
  {
@@ -224,8 +227,7 @@ static inline struct nfs_inode *NFS_I(struct inode *inode)
  #define NFS_ATTRTIMEO_UPDATE(inode)    (NFS_I(inode)->attrtimeo_timestamp)
  
  #define NFS_FLAGS(inode)               (NFS_I(inode)->flags)
-#define NFS_REVALIDATING(inode)                (NFS_FLAGS(inode) & NFS_INO_REVALIDATING)
-#define NFS_STALE(inode)               (NFS_FLAGS(inode) & NFS_INO_STALE)
+#define NFS_STALE(inode)               (test_bit(NFS_INO_STALE, &NFS_FLAGS(inode)))
  
  #define NFS_FILEID(inode)              (NFS_I(inode)->fileid)
  
@@ -236,8 +238,11 @@ static inline int nfs_caches_unstable(struct inode *inode)
  
  static inline void NFS_CACHEINV(struct inode *inode)
  {
-       if (!nfs_caches_unstable(inode))
-               NFS_FLAGS(inode) |= NFS_INO_INVALID_ATTR | NFS_INO_INVALID_ACCESS;
+       if (!nfs_caches_unstable(inode)) {
+               spin_lock(&inode->i_lock);
+               NFS_I(inode)->cache_validity |= NFS_INO_INVALID_ATTR | NFS_INO_INVALID_ACCESS;
+               spin_unlock(&inode->i_lock);
+       }
  }
  
  static inline int nfs_server_capable(struct inode *inode, int cap)
@@ -247,7 +252,7 @@ static inline int nfs_server_capable(struct inode *inode, int cap)
  
  static inline int NFS_USE_READDIRPLUS(struct inode *inode)
  {
-       return NFS_FLAGS(inode) & NFS_INO_ADVISE_RDPLUS;
+       return test_bit(NFS_INO_ADVISE_RDPLUS, &NFS_FLAGS(inode));
  }
  
  /**
@@ -292,6 +297,7 @@ extern int nfs_revalidate_inode(struct nfs_server *server, struct inode *inode);
  extern int __nfs_revalidate_inode(struct nfs_server *, struct inode *);
  extern void nfs_revalidate_mapping(struct inode *inode, struct address_space *mapping);
  extern int nfs_setattr(struct dentry *, struct iattr *);
+extern void nfs_setattr_update_inode(struct inode *inode, struct iattr *attr);
  extern void nfs_begin_attr_update(struct inode *);
  extern void nfs_end_attr_update(struct inode *);
  extern void nfs_begin_data_update(struct inode *);
diff --git a/include/linux/pci.h b/include/linux/pci.h

index 8621cf42b46f36777fba25b1bf408e338d72af71..bc4c40000c0d7350b1ca9e5ce3f97e917c89aa5b 100644 (file)
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -556,7 +556,8 @@ struct pci_dev {
         /* keep track of device state */
         unsigned int    is_enabled:1;   /* pci_enable_device has been called */
         unsigned int    is_busmaster:1; /* device is busmaster */
-       
+       unsigned int    no_msi:1;       /* device may not use msi */
+
         u32             saved_config_space[16]; /* config space saved at suspend time */
         struct bin_attribute *rom_attr; /* attribute descriptor for sysfs ROM entry */
         int rom_attr_enabled;           /* has display of the rom attribute been enabled? */
diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h

index bc4cc10fabe9521bd855fe4f9c5fc21dc88b801f..499a5325f67f75aca4ff64b6ae0f480e2e479344 100644 (file)
--- a/include/linux/pci_ids.h
+++ b/include/linux/pci_ids.h
@@ -881,7 +881,7 @@
  #define PCI_DEVICE_ID_APPLE_UNI_N_PCI15        0x002e
  #define PCI_DEVICE_ID_APPLE_UNI_N_FW2  0x0030
  #define PCI_DEVICE_ID_APPLE_UNI_N_GMAC2        0x0032
-#define PCI_DEVIEC_ID_APPLE_UNI_N_ATA  0x0033
+#define PCI_DEVICE_ID_APPLE_UNI_N_ATA  0x0033
  #define PCI_DEVICE_ID_APPLE_UNI_N_AGP2 0x0034
  #define PCI_DEVICE_ID_APPLE_IPID_ATA100        0x003b
  #define PCI_DEVICE_ID_APPLE_KEYLARGO_I 0x003e
@@ -1249,6 +1249,7 @@
  #define PCI_DEVICE_ID_NVIDIA_NFORCE_MCP51_SATA 0x0266
  #define PCI_DEVICE_ID_NVIDIA_NFORCE_MCP51_SATA2        0x0267
  #define PCI_DEVICE_ID_NVIDIA_NFORCE_MCP55_IDE  0x036E
+#define PCI_DEVICE_ID_NVIDIA_NFORCE_MCP55_SATA 0x036F
  #define PCI_DEVICE_ID_NVIDIA_NVENET_12         0x0268
  #define PCI_DEVICE_ID_NVIDIA_NVENET_13         0x0269
  #define PCI_DEVICE_ID_NVIDIA_MCP51_AUDIO       0x026B
@@ -1580,6 +1581,7 @@
  #define PCI_DEVICE_ID_SERVERWORKS_OSB4IDE 0x0211
  #define PCI_DEVICE_ID_SERVERWORKS_CSB5IDE 0x0212
  #define PCI_DEVICE_ID_SERVERWORKS_CSB6IDE 0x0213
+#define PCI_DEVICE_ID_SERVERWORKS_HT1000IDE 0x0214
  #define PCI_DEVICE_ID_SERVERWORKS_CSB6IDE2 0x0217
  #define PCI_DEVICE_ID_SERVERWORKS_OSB4USB 0x0220
  #define PCI_DEVICE_ID_SERVERWORKS_CSB5USB PCI_DEVICE_ID_SERVERWORKS_OSB4USB
@@ -2184,6 +2186,9 @@
  #define PCI_VENDOR_ID_SIBYTE           0x166d
  #define PCI_DEVICE_ID_BCM1250_HT       0x0002
  
+#define PCI_VENDOR_ID_NETCELL          0x169c
+#define PCI_DEVICE_ID_REVOLUTION       0x0044
+
  #define PCI_VENDOR_ID_LINKSYS          0x1737
  #define PCI_DEVICE_ID_LINKSYS_EG1032   0x1032
  #define PCI_DEVICE_ID_LINKSYS_EG1064   0x1064
@@ -2281,6 +2286,11 @@
  #define PCI_VENDOR_ID_INTEL            0x8086
  #define PCI_DEVICE_ID_INTEL_EESSC      0x0008
  #define PCI_DEVICE_ID_INTEL_21145      0x0039
+#define PCI_DEVICE_ID_INTEL_PXHD_0     0x0320
+#define PCI_DEVICE_ID_INTEL_PXHD_1     0x0321
+#define PCI_DEVICE_ID_INTEL_PXH_0      0x0329
+#define PCI_DEVICE_ID_INTEL_PXH_1      0x032A
+#define PCI_DEVICE_ID_INTEL_PXHV       0x032C
  #define PCI_DEVICE_ID_INTEL_82375      0x0482
  #define PCI_DEVICE_ID_INTEL_82424      0x0483
  #define PCI_DEVICE_ID_INTEL_82378      0x0484
diff --git a/include/linux/phy.h b/include/linux/phy.h

new file mode 100644 (file)

index 0000000..72cb67b
--- /dev/null
+++ b/include/linux/phy.h
@@ -0,0 +1,377 @@
+/*
+ * include/linux/phy.h
+ *
+ * Framework and drivers for configuring and reading different PHYs
+ * Based on code in sungem_phy.c and gianfar_phy.c
+ *
+ * Author: Andy Fleming
+ *
+ * Copyright (c) 2004 Freescale Semiconductor, Inc.
+ *
+ * This program is free software; you can redistribute  it and/or modify it
+ * under  the terms of  the GNU General  Public License as published by the
+ * Free Software Foundation;  either version 2 of the  License, or (at your
+ * option) any later version.
+ *
+ */
+
+#ifndef __PHY_H
+#define __PHY_H
+
+#include <linux/spinlock.h>
+#include <linux/device.h>
+
+#define PHY_BASIC_FEATURES     (SUPPORTED_10baseT_Half | \
+                                SUPPORTED_10baseT_Full | \
+                                SUPPORTED_100baseT_Half | \
+                                SUPPORTED_100baseT_Full | \
+                                SUPPORTED_Autoneg | \
+                                SUPPORTED_TP | \
+                                SUPPORTED_MII)
+
+#define PHY_GBIT_FEATURES      (PHY_BASIC_FEATURES | \
+                                SUPPORTED_1000baseT_Half | \
+                                SUPPORTED_1000baseT_Full)
+
+/* Set phydev->irq to PHY_POLL if interrupts are not supported,
+ * or not desired for this PHY.  Set to PHY_IGNORE_INTERRUPT if
+ * the attached driver handles the interrupt
+ */
+#define PHY_POLL               -1
+#define PHY_IGNORE_INTERRUPT   -2
+
+#define PHY_HAS_INTERRUPT      0x00000001
+#define PHY_HAS_MAGICANEG      0x00000002
+
+#define MII_BUS_MAX 4
+
+
+#define PHY_INIT_TIMEOUT 100000
+#define PHY_STATE_TIME         1
+#define PHY_FORCE_TIMEOUT      10
+#define PHY_AN_TIMEOUT         10
+
+#define PHY_MAX_ADDR 32
+
+/* The Bus class for PHYs.  Devices which provide access to
+ * PHYs should register using this structure */
+struct mii_bus {
+       const char *name;
+       int id;
+       void *priv;
+       int (*read)(struct mii_bus *bus, int phy_id, int regnum);
+       int (*write)(struct mii_bus *bus, int phy_id, int regnum, u16 val);
+       int (*reset)(struct mii_bus *bus);
+
+       /* A lock to ensure that only one thing can read/write
+        * the MDIO bus at a time */
+       spinlock_t mdio_lock;
+
+       struct device *dev;
+
+       /* list of all PHYs on bus */
+       struct phy_device *phy_map[PHY_MAX_ADDR];
+
+       /* Pointer to an array of interrupts, each PHY's
+        * interrupt at the index matching its address */
+       int *irq;
+};
+
+#define PHY_INTERRUPT_DISABLED 0x0
+#define PHY_INTERRUPT_ENABLED 0x80000000
+
+/* PHY state machine states:
+ *
+ * DOWN: PHY device and driver are not ready for anything.  probe
+ * should be called if and only if the PHY is in this state,
+ * given that the PHY device exists.
+ * - PHY driver probe function will, depending on the PHY, set
+ * the state to STARTING or READY
+ *
+ * STARTING:  PHY device is coming up, and the ethernet driver is
+ * not ready.  PHY drivers may set this in the probe function.
+ * If they do, they are responsible for making sure the state is
+ * eventually set to indicate whether the PHY is UP or READY,
+ * depending on the state when the PHY is done starting up.
+ * - PHY driver will set the state to READY
+ * - start will set the state to PENDING
+ *
+ * READY: PHY is ready to send and receive packets, but the
+ * controller is not.  By default, PHYs which do not implement
+ * probe will be set to this state by phy_probe().  If the PHY
+ * driver knows the PHY is ready, and the PHY state is STARTING,
+ * then it sets this STATE.
+ * - start will set the state to UP
+ *
+ * PENDING: PHY device is coming up, but the ethernet driver is
+ * ready.  phy_start will set this state if the PHY state is
+ * STARTING.
+ * - PHY driver will set the state to UP when the PHY is ready
+ *
+ * UP: The PHY and attached device are ready to do work.
+ * Interrupts should be started here.
+ * - timer moves to AN
+ *
+ * AN: The PHY is currently negotiating the link state.  Link is
+ * therefore down for now.  phy_timer will set this state when it
+ * detects the state is UP.  config_aneg will set this state
+ * whenever called with phydev->autoneg set to AUTONEG_ENABLE.
+ * - If autonegotiation finishes, but there's no link, it sets
+ *   the state to NOLINK.
+ * - If aneg finishes with link, it sets the state to RUNNING,
+ *   and calls adjust_link
+ * - If autonegotiation did not finish after an arbitrary amount
+ *   of time, autonegotiation should be tried again if the PHY
+ *   supports "magic" autonegotiation (back to AN)
+ * - If it didn't finish, and no magic_aneg, move to FORCING.
+ *
+ * NOLINK: PHY is up, but not currently plugged in.
+ * - If the timer notes that the link comes back, we move to RUNNING
+ * - config_aneg moves to AN
+ * - phy_stop moves to HALTED
+ *
+ * FORCING: PHY is being configured with forced settings
+ * - if link is up, move to RUNNING
+ * - If link is down, we drop to the next highest setting, and
+ *   retry (FORCING) after a timeout
+ * - phy_stop moves to HALTED
+ *
+ * RUNNING: PHY is currently up, running, and possibly sending
+ * and/or receiving packets
+ * - timer will set CHANGELINK if we're polling (this ensures the
+ *   link state is polled every other cycle of this state machine,
+ *   which makes it every other second)
+ * - irq will set CHANGELINK
+ * - config_aneg will set AN
+ * - phy_stop moves to HALTED
+ *
+ * CHANGELINK: PHY experienced a change in link state
+ * - timer moves to RUNNING if link
+ * - timer moves to NOLINK if the link is down
+ * - phy_stop moves to HALTED
+ *
+ * HALTED: PHY is up, but no polling or interrupts are done. Or
+ * PHY is in an error state.
+ *
+ * - phy_start moves to RESUMING
+ *
+ * RESUMING: PHY was halted, but now wants to run again.
+ * - If we are forcing, or aneg is done, timer moves to RUNNING
+ * - If aneg is not done, timer moves to AN
+ * - phy_stop moves to HALTED
+ */
+enum phy_state {
+       PHY_DOWN=0,
+       PHY_STARTING,
+       PHY_READY,
+       PHY_PENDING,
+       PHY_UP,
+       PHY_AN,
+       PHY_RUNNING,
+       PHY_NOLINK,
+       PHY_FORCING,
+       PHY_CHANGELINK,
+       PHY_HALTED,
+       PHY_RESUMING
+};
+
+/* phy_device: An instance of a PHY
+ *
+ * drv: Pointer to the driver for this PHY instance
+ * bus: Pointer to the bus this PHY is on
+ * dev: driver model device structure for this PHY
+ * phy_id: UID for this device found during discovery
+ * state: state of the PHY for management purposes
+ * dev_flags: Device-specific flags used by the PHY driver.
+ * addr: Bus address of PHY
+ * link_timeout: The number of timer firings to wait before the
+ * giving up on the current attempt at acquiring a link
+ * irq: IRQ number of the PHY's interrupt (-1 if none)
+ * phy_timer: The timer for handling the state machine
+ * phy_queue: A work_queue for the interrupt
+ * attached_dev: The attached enet driver's device instance ptr
+ * adjust_link: Callback for the enet controller to respond to
+ * changes in the link state.
+ * adjust_state: Callback for the enet driver to respond to
+ * changes in the state machine.
+ *
+ * speed, duplex, pause, supported, advertising, and
+ * autoneg are used like in mii_if_info
+ *
+ * interrupts currently only supports enabled or disabled,
+ * but could be changed in the future to support enabling
+ * and disabling specific interrupts
+ *
+ * Contains some infrastructure for polling and interrupt
+ * handling, as well as handling shifts in PHY hardware state
+ */
+struct phy_device {
+       /* Information about the PHY type */
+       /* And management functions */
+       struct phy_driver *drv;
+
+       struct mii_bus *bus;
+
+       struct device dev;
+
+       u32 phy_id;
+
+       enum phy_state state;
+
+       u32 dev_flags;
+
+       /* Bus address of the PHY (0-32) */
+       int addr;
+
+       /* forced speed & duplex (no autoneg)
+        * partner speed & duplex & pause (autoneg)
+        */
+       int speed;
+       int duplex;
+       int pause;
+       int asym_pause;
+
+       /* The most recently read link state */
+       int link;
+
+       /* Enabled Interrupts */
+       u32 interrupts;
+
+       /* Union of PHY and Attached devices' supported modes */
+       /* See mii.h for more info */
+       u32 supported;
+       u32 advertising;
+
+       int autoneg;
+
+       int link_timeout;
+
+       /* Interrupt number for this PHY
+        * -1 means no interrupt */
+       int irq;
+
+       /* private data pointer */
+       /* For use by PHYs to maintain extra state */
+       void *priv;
+
+       /* Interrupt and Polling infrastructure */
+       struct work_struct phy_queue;
+       struct timer_list phy_timer;
+
+       spinlock_t lock;
+
+       struct net_device *attached_dev;
+
+       void (*adjust_link)(struct net_device *dev);
+
+       void (*adjust_state)(struct net_device *dev);
+};
+#define to_phy_device(d) container_of(d, struct phy_device, dev)
+
+/* struct phy_driver: Driver structure for a particular PHY type
+ *
+ * phy_id: The result of reading the UID registers of this PHY
+ *   type, and ANDing them with the phy_id_mask.  This driver
+ *   only works for PHYs with IDs which match this field
+ * name: The friendly name of this PHY type
+ * phy_id_mask: Defines the important bits of the phy_id
+ * features: A list of features (speed, duplex, etc) supported
+ *   by this PHY
+ * flags: A bitfield defining certain other features this PHY
+ *   supports (like interrupts)
+ *
+ * The drivers must implement config_aneg and read_status.  All
+ * other functions are optional. Note that none of these
+ * functions should be called from interrupt time.  The goal is
+ * for the bus read/write functions to be able to block when the
+ * bus transaction is happening, and be freed up by an interrupt
+ * (The MPC85xx has this ability, though it is not currently
+ * supported in the driver).
+ */
+struct phy_driver {
+       u32 phy_id;
+       char *name;
+       unsigned int phy_id_mask;
+       u32 features;
+       u32 flags;
+
+       /* Called to initialize the PHY,
+        * including after a reset */
+       int (*config_init)(struct phy_device *phydev);
+
+       /* Called during discovery.  Used to set
+        * up device-specific structures, if any */
+       int (*probe)(struct phy_device *phydev);
+
+       /* PHY Power Management */
+       int (*suspend)(struct phy_device *phydev);
+       int (*resume)(struct phy_device *phydev);
+
+       /* Configures the advertisement and resets
+        * autonegotiation if phydev->autoneg is on,
+        * forces the speed to the current settings in phydev
+        * if phydev->autoneg is off */
+       int (*config_aneg)(struct phy_device *phydev);
+
+       /* Determines the negotiated speed and duplex */
+       int (*read_status)(struct phy_device *phydev);
+
+       /* Clears any pending interrupts */
+       int (*ack_interrupt)(struct phy_device *phydev);
+
+       /* Enables or disables interrupts */
+       int (*config_intr)(struct phy_device *phydev);
+
+       /* Clears up any memory if needed */
+       void (*remove)(struct phy_device *phydev);
+
+       struct device_driver driver;
+};
+#define to_phy_driver(d) container_of(d, struct phy_driver, driver)
+
+int phy_read(struct phy_device *phydev, u16 regnum);
+int phy_write(struct phy_device *phydev, u16 regnum, u16 val);
+struct phy_device* get_phy_device(struct mii_bus *bus, int addr);
+int phy_clear_interrupt(struct phy_device *phydev);
+int phy_config_interrupt(struct phy_device *phydev, u32 interrupts);
+struct phy_device * phy_attach(struct net_device *dev,
+               const char *phy_id, u32 flags);
+struct phy_device * phy_connect(struct net_device *dev, const char *phy_id,
+               void (*handler)(struct net_device *), u32 flags);
+void phy_disconnect(struct phy_device *phydev);
+void phy_detach(struct phy_device *phydev);
+void phy_start(struct phy_device *phydev);
+void phy_stop(struct phy_device *phydev);
+int phy_start_aneg(struct phy_device *phydev);
+
+int mdiobus_register(struct mii_bus *bus);
+void mdiobus_unregister(struct mii_bus *bus);
+void phy_sanitize_settings(struct phy_device *phydev);
+int phy_stop_interrupts(struct phy_device *phydev);
+
+static inline int phy_read_status(struct phy_device *phydev) {
+       return phydev->drv->read_status(phydev);
+}
+
+int genphy_config_advert(struct phy_device *phydev);
+int genphy_setup_forced(struct phy_device *phydev);
+int genphy_restart_aneg(struct phy_device *phydev);
+int genphy_config_aneg(struct phy_device *phydev);
+int genphy_update_link(struct phy_device *phydev);
+int genphy_read_status(struct phy_device *phydev);
+void phy_driver_unregister(struct phy_driver *drv);
+int phy_driver_register(struct phy_driver *new_driver);
+void phy_prepare_link(struct phy_device *phydev,
+               void (*adjust_link)(struct net_device *));
+void phy_start_machine(struct phy_device *phydev,
+               void (*handler)(struct net_device *));
+void phy_stop_machine(struct phy_device *phydev);
+int phy_ethtool_sset(struct phy_device *phydev, struct ethtool_cmd *cmd);
+int phy_ethtool_gset(struct phy_device *phydev, struct ethtool_cmd *cmd);
+int phy_mii_ioctl(struct phy_device *phydev,
+               struct mii_ioctl_data *mii_data, int cmd);
+int phy_start_interrupts(struct phy_device *phydev);
+void phy_print_status(struct phy_device *phydev);
+
+extern struct bus_type mdio_bus_type;
+#endif /* __PHY_H */
diff --git a/include/linux/raid/bitmap.h b/include/linux/raid/bitmap.h

index 6213e976eadedbd3385ba6e6ecad9f862259aea6..4bf1659f8aa87df1e9aad302f17c008f10850bd5 100644 (file)
--- a/include/linux/raid/bitmap.h
+++ b/include/linux/raid/bitmap.h
@@ -248,6 +248,7 @@ struct bitmap {
  
  /* these are used only by md/bitmap */
  int  bitmap_create(mddev_t *mddev);
+void bitmap_flush(mddev_t *mddev);
  void bitmap_destroy(mddev_t *mddev);
  int  bitmap_active(struct bitmap *bitmap);
  
diff --git a/include/linux/random.h b/include/linux/random.h

index cc670344991606d5d23f94c8f23068528b951f51..7b2adb3322d5a6052504058e152cc0f94828c06e 100644 (file)
--- a/include/linux/random.h
+++ b/include/linux/random.h
@@ -59,6 +59,8 @@ extern __u32 secure_tcp_sequence_number(__u32 saddr, __u32 daddr,
                                         __u16 sport, __u16 dport);
  extern __u32 secure_tcpv6_sequence_number(__u32 *saddr, __u32 *daddr,
                                           __u16 sport, __u16 dport);
+extern u64 secure_dccp_sequence_number(__u32 saddr, __u32 daddr,
+                                      __u16 sport, __u16 dport);
  
  #ifndef MODULE
  extern struct file_operations random_fops, urandom_fops;
diff --git a/include/linux/rtnetlink.h b/include/linux/rtnetlink.h

index 657c05ab8f9eb79c63422d495f105e7c91bb489b..c231e9a08f0bb2b2365b7f5f568de8c459385a59 100644 (file)
--- a/include/linux/rtnetlink.h
+++ b/include/linux/rtnetlink.h
@@ -826,9 +826,8 @@ enum
  #define TCA_RTA(r)  ((struct rtattr*)(((char*)(r)) + NLMSG_ALIGN(sizeof(struct tcmsg))))
  #define TCA_PAYLOAD(n) NLMSG_PAYLOAD(n,sizeof(struct tcmsg))
  
-
-/* RTnetlink multicast groups */
-
+#ifndef __KERNEL__
+/* RTnetlink multicast groups - backwards compatibility for userspace */
  #define RTMGRP_LINK            1
  #define RTMGRP_NOTIFY          2
  #define RTMGRP_NEIGH           4
@@ -847,6 +846,43 @@ enum
  #define RTMGRP_DECnet_ROUTE     0x4000
  
  #define RTMGRP_IPV6_PREFIX     0x20000
+#endif
+
+/* RTnetlink multicast groups */
+enum rtnetlink_groups {
+       RTNLGRP_NONE,
+#define RTNLGRP_NONE           RTNLGRP_NONE
+       RTNLGRP_LINK,
+#define RTNLGRP_LINK           RTNLGRP_LINK
+       RTNLGRP_NOTIFY,
+#define RTNLGRP_NOTIFY         RTNLGRP_NOTIFY
+       RTNLGRP_NEIGH,
+#define RTNLGRP_NEIGH          RTNLGRP_NEIGH
+       RTNLGRP_TC,
+#define RTNLGRP_TC             RTNLGRP_TC
+       RTNLGRP_IPV4_IFADDR,
+#define RTNLGRP_IPV4_IFADDR    RTNLGRP_IPV4_IFADDR
+       RTNLGRP_IPV4_MROUTE,
+#define        RTNLGRP_IPV4_MROUTE     RTNLGRP_IPV4_MROUTE
+       RTNLGRP_IPV4_ROUTE,
+#define RTNLGRP_IPV4_ROUTE     RTNLGRP_IPV4_ROUTE
+       RTNLGRP_IPV6_IFADDR,
+#define RTNLGRP_IPV6_IFADDR    RTNLGRP_IPV6_IFADDR
+       RTNLGRP_IPV6_MROUTE,
+#define RTNLGRP_IPV6_MROUTE    RTNLGRP_IPV6_MROUTE
+       RTNLGRP_IPV6_ROUTE,
+#define RTNLGRP_IPV6_ROUTE     RTNLGRP_IPV6_ROUTE
+       RTNLGRP_IPV6_IFINFO,
+#define RTNLGRP_IPV6_IFINFO    RTNLGRP_IPV6_IFINFO
+       RTNLGRP_DECnet_IFADDR,
+#define RTNLGRP_DECnet_IFADDR  RTNLGRP_DECnet_IFADDR
+       RTNLGRP_DECnet_ROUTE,
+#define RTNLGRP_DECnet_ROUTE   RTNLGRP_DECnet_ROUTE
+       RTNLGRP_IPV6_PREFIX,
+#define RTNLGRP_IPV6_PREFIX    RTNLGRP_IPV6_PREFIX
+       __RTNLGRP_MAX
+};
+#define RTNLGRP_MAX    (__RTNLGRP_MAX - 1)
  
  /* TC action piece */
  struct tcamsg
diff --git a/include/linux/security.h b/include/linux/security.h

index b42095a68b1c44e964d65c1a13541726a717f4ff..7aab6ab7c57febdadfd4d8dedea554594b14baa6 100644 (file)
--- a/include/linux/security.h
+++ b/include/linux/security.h
@@ -2727,7 +2727,8 @@ static inline int security_socket_getpeersec(struct socket *sock, char __user *o
         return security_ops->socket_getpeersec(sock, optval, optlen, len);
  }
  
-static inline int security_sk_alloc(struct sock *sk, int family, int priority)
+static inline int security_sk_alloc(struct sock *sk, int family,
+                                   unsigned int __nocast priority)
  {
         return security_ops->sk_alloc_security(sk, family, priority);
  }
@@ -2844,7 +2845,8 @@ static inline int security_socket_getpeersec(struct socket *sock, char __user *o
         return -ENOPROTOOPT;
  }
  
-static inline int security_sk_alloc(struct sock *sk, int family, int priority)
+static inline int security_sk_alloc(struct sock *sk, int family,
+                                   unsigned int __nocast priority)
  {
         return 0;
  }
diff --git a/include/linux/selinux_netlink.h b/include/linux/selinux_netlink.h

index 957e6ebca4e6ad93275699bd77cd6892b0bbb463..bbf489decd84df3009b79dc24eea7d81d147c9ca 100644 (file)
--- a/include/linux/selinux_netlink.h
+++ b/include/linux/selinux_netlink.h
@@ -20,10 +20,21 @@ enum {
         SELNL_MSG_MAX
  };
  
-/* Multicast groups */
+#ifndef __KERNEL__
+/* Multicast groups - backwards compatiblility for userspace */
  #define SELNL_GRP_NONE         0x00000000
  #define SELNL_GRP_AVC          0x00000001      /* AVC notifications */
  #define SELNL_GRP_ALL          0xffffffff
+#endif
+
+enum selinux_nlgroups {
+       SELNLGRP_NONE,
+#define SELNLGRP_NONE  SELNLGRP_NONE
+       SELNLGRP_AVC,
+#define SELNLGRP_AVC   SELNLGRP_AVC
+       __SELNLGRP_MAX
+};
+#define SELNLGRP_MAX   (__SELNLGRP_MAX - 1)
  
  /* Message structures */
  struct selnl_msg_setenforce {
diff --git a/include/linux/serialP.h b/include/linux/serialP.h

index 2b2f35a64d7510e5436c40bd1ff5caa0b258c7f9..2b9e6b9554d577fa49130c4c4e2a4c745d7b3088 100644 (file)
--- a/include/linux/serialP.h
+++ b/include/linux/serialP.h
@@ -140,44 +140,4 @@ struct rs_multiport_struct {
  #define ALPHA_KLUDGE_MCR 0
  #endif
  
-/*
- * Definitions for PCI support.
- */
-#define SPCI_FL_BASE_MASK      0x0007
-#define SPCI_FL_BASE0  0x0000
-#define SPCI_FL_BASE1  0x0001
-#define SPCI_FL_BASE2  0x0002
-#define SPCI_FL_BASE3  0x0003
-#define SPCI_FL_BASE4  0x0004
-#define SPCI_FL_GET_BASE(x)    (x & SPCI_FL_BASE_MASK)
-
-#define SPCI_FL_IRQ_MASK       (0x0007 << 4)
-#define SPCI_FL_IRQBASE0       (0x0000 << 4)
-#define SPCI_FL_IRQBASE1       (0x0001 << 4)
-#define SPCI_FL_IRQBASE2       (0x0002 << 4)
-#define SPCI_FL_IRQBASE3       (0x0003 << 4)
-#define SPCI_FL_IRQBASE4       (0x0004 << 4)
-#define SPCI_FL_GET_IRQBASE(x)        ((x & SPCI_FL_IRQ_MASK) >> 4)
-
-/* Use successive BARs (PCI base address registers), 
-   else use offset into some specified BAR */
-#define SPCI_FL_BASE_TABLE     0x0100
-
-/* Use successive entries in the irq resource table */
-#define SPCI_FL_IRQ_TABLE      0x0200
-
-/* Use the irq resource table instead of dev->irq */
-#define SPCI_FL_IRQRESOURCE    0x0400
-
-/* Use the Base address register size to cap number of ports */
-#define SPCI_FL_REGION_SZ_CAP  0x0800
-
-/* Do not use irq sharing for this device */
-#define SPCI_FL_NO_SHIRQ       0x1000
-
-/* This is a PNP device */
-#define SPCI_FL_ISPNP          0x2000
-
-#define SPCI_FL_PNPDEFAULT     (SPCI_FL_IRQRESOURCE|SPCI_FL_ISPNP)
-
  #endif /* _LINUX_SERIAL_H */
diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h

index 0061c9470482df8e91ad877e7348dad4db731f5e..42edce6abe2349fc428c6321f81af81578ca5cb2 100644 (file)
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -155,16 +155,29 @@ struct skb_shared_info {
  #define SKB_DATAREF_SHIFT 16
  #define SKB_DATAREF_MASK ((1 << SKB_DATAREF_SHIFT) - 1)
  
+extern struct timeval skb_tv_base;
+
+struct skb_timeval {
+       u32     off_sec;
+       u32     off_usec;
+};
+
+
+enum {
+       SKB_FCLONE_UNAVAILABLE,
+       SKB_FCLONE_ORIG,
+       SKB_FCLONE_CLONE,
+};
+
  /** 
   *     struct sk_buff - socket buffer
   *     @next: Next buffer in list
   *     @prev: Previous buffer in list
   *     @list: List we are on
   *     @sk: Socket we are owned by
- *     @stamp: Time we arrived
+ *     @tstamp: Time we arrived stored as offset to skb_tv_base
   *     @dev: Device we arrived on/are leaving by
   *     @input_dev: Device we arrived on
- *      @real_dev: The real device we are using
   *     @h: Transport layer header
   *     @nh: Network layer header
   *     @mac: Link layer header
@@ -190,14 +203,11 @@ struct skb_shared_info {
   *     @end: End pointer
   *     @destructor: Destruct function
   *     @nfmark: Can be used for communication between hooks
- *     @nfcache: Cache info
   *     @nfct: Associated connection, if any
   *     @nfctinfo: Relationship of this skb to the connection
   *     @nf_bridge: Saved data about a bridged frame - see br_netfilter.c
- *      @private: Data which is private to the HIPPI implementation
   *     @tc_index: Traffic control index
   *     @tc_verd: traffic control verdict
- *     @tc_classid: traffic control classid
   */
  
  struct sk_buff {
@@ -205,12 +215,10 @@ struct sk_buff {
         struct sk_buff          *next;
         struct sk_buff          *prev;
  
-       struct sk_buff_head     *list;
         struct sock             *sk;
-       struct timeval          stamp;
+       struct skb_timeval      tstamp;
         struct net_device       *dev;
         struct net_device       *input_dev;
-       struct net_device       *real_dev;
  
         union {
                 struct tcphdr   *th;
@@ -252,33 +260,28 @@ struct sk_buff {
         __u8                    local_df:1,
                                 cloned:1,
                                 ip_summed:2,
-                               nohdr:1;
-                               /* 3 bits spare */
-       __u8                    pkt_type;
-       __u16                   protocol;
+                               nohdr:1,
+                               nfctinfo:3;
+       __u8                    pkt_type:3,
+                               fclone:2;
+       __be16                  protocol;
  
         void                    (*destructor)(struct sk_buff *skb);
  #ifdef CONFIG_NETFILTER
-       unsigned long           nfmark;
-       __u32                   nfcache;
-       __u32                   nfctinfo;
+       __u32                   nfmark;
         struct nf_conntrack     *nfct;
+#if defined(CONFIG_IP_VS) || defined(CONFIG_IP_VS_MODULE)
+       __u8                    ipvs_property:1;
+#endif
  #ifdef CONFIG_BRIDGE_NETFILTER
         struct nf_bridge_info   *nf_bridge;
  #endif
  #endif /* CONFIG_NETFILTER */
-#if defined(CONFIG_HIPPI)
-       union {
-               __u32           ifield;
-       } private;
-#endif
  #ifdef CONFIG_NET_SCHED
-       __u32                   tc_index;        /* traffic control index */
+       __u16                   tc_index;       /* traffic control index */
  #ifdef CONFIG_NET_CLS_ACT
-       __u32           tc_verd;               /* traffic control verdict */
-       __u32           tc_classid;            /* traffic control classid */
+       __u16                   tc_verd;        /* traffic control verdict */
  #endif
-
  #endif
  
  
@@ -300,8 +303,20 @@ struct sk_buff {
  #include <asm/system.h>
  
  extern void           __kfree_skb(struct sk_buff *skb);
-extern struct sk_buff *alloc_skb(unsigned int size,
-                                unsigned int __nocast priority);
+extern struct sk_buff *__alloc_skb(unsigned int size,
+                                  unsigned int __nocast priority, int fclone);
+static inline struct sk_buff *alloc_skb(unsigned int size,
+                                       unsigned int __nocast priority)
+{
+       return __alloc_skb(size, priority, 0);
+}
+
+static inline struct sk_buff *alloc_skb_fclone(unsigned int size,
+                                              unsigned int __nocast priority)
+{
+       return __alloc_skb(size, priority, 1);
+}
+
  extern struct sk_buff *alloc_skb_from_cache(kmem_cache_t *cp,
                                             unsigned int size,
                                             unsigned int __nocast priority);
@@ -597,7 +612,6 @@ static inline void __skb_queue_head(struct sk_buff_head *list,
  {
         struct sk_buff *prev, *next;
  
-       newsk->list = list;
         list->qlen++;
         prev = (struct sk_buff *)list;
         next = prev->next;
@@ -622,7 +636,6 @@ static inline void __skb_queue_tail(struct sk_buff_head *list,
  {
         struct sk_buff *prev, *next;
  
-       newsk->list = list;
         list->qlen++;
         next = (struct sk_buff *)list;
         prev = next->prev;
@@ -655,7 +668,6 @@ static inline struct sk_buff *__skb_dequeue(struct sk_buff_head *list)
                 next->prev   = prev;
                 prev->next   = next;
                 result->next = result->prev = NULL;
-               result->list = NULL;
         }
         return result;
  }
@@ -664,7 +676,7 @@ static inline struct sk_buff *__skb_dequeue(struct sk_buff_head *list)
  /*
   *     Insert a packet on a list.
   */
-extern void        skb_insert(struct sk_buff *old, struct sk_buff *newsk);
+extern void        skb_insert(struct sk_buff *old, struct sk_buff *newsk, struct sk_buff_head *list);
  static inline void __skb_insert(struct sk_buff *newsk,
                                 struct sk_buff *prev, struct sk_buff *next,
                                 struct sk_buff_head *list)
@@ -672,24 +684,23 @@ static inline void __skb_insert(struct sk_buff *newsk,
         newsk->next = next;
         newsk->prev = prev;
         next->prev  = prev->next = newsk;
-       newsk->list = list;
         list->qlen++;
  }
  
  /*
   *     Place a packet after a given packet in a list.
   */
-extern void       skb_append(struct sk_buff *old, struct sk_buff *newsk);
-static inline void __skb_append(struct sk_buff *old, struct sk_buff *newsk)
+extern void       skb_append(struct sk_buff *old, struct sk_buff *newsk, struct sk_buff_head *list);
+static inline void __skb_append(struct sk_buff *old, struct sk_buff *newsk, struct sk_buff_head *list)
  {
-       __skb_insert(newsk, old, old->next, old->list);
+       __skb_insert(newsk, old, old->next, list);
  }
  
  /*
   * remove sk_buff from list. _Must_ be called atomically, and with
   * the list known..
   */
-extern void       skb_unlink(struct sk_buff *skb);
+extern void       skb_unlink(struct sk_buff *skb, struct sk_buff_head *list);
  static inline void __skb_unlink(struct sk_buff *skb, struct sk_buff_head *list)
  {
         struct sk_buff *next, *prev;
@@ -698,7 +709,6 @@ static inline void __skb_unlink(struct sk_buff *skb, struct sk_buff_head *list)
         next       = skb->next;
         prev       = skb->prev;
         skb->next  = skb->prev = NULL;
-       skb->list  = NULL;
         next->prev = prev;
         prev->next = next;
  }
@@ -1213,6 +1223,8 @@ extern void              skb_copy_and_csum_dev(const struct sk_buff *skb, u8 *to);
  extern void           skb_split(struct sk_buff *skb,
                                  struct sk_buff *skb1, const u32 len);
  
+extern void           skb_release_data(struct sk_buff *skb);
+
  static inline void *skb_header_pointer(const struct sk_buff *skb, int offset,
                                        int len, void *buffer)
  {
@@ -1230,6 +1242,42 @@ static inline void *skb_header_pointer(const struct sk_buff *skb, int offset,
  extern void skb_init(void);
  extern void skb_add_mtu(int mtu);
  
+/**
+ *     skb_get_timestamp - get timestamp from a skb
+ *     @skb: skb to get stamp from
+ *     @stamp: pointer to struct timeval to store stamp in
+ *
+ *     Timestamps are stored in the skb as offsets to a base timestamp.
+ *     This function converts the offset back to a struct timeval and stores
+ *     it in stamp.
+ */
+static inline void skb_get_timestamp(struct sk_buff *skb, struct timeval *stamp)
+{
+       stamp->tv_sec  = skb->tstamp.off_sec;
+       stamp->tv_usec = skb->tstamp.off_usec;
+       if (skb->tstamp.off_sec) {
+               stamp->tv_sec  += skb_tv_base.tv_sec;
+               stamp->tv_usec += skb_tv_base.tv_usec;
+       }
+}
+
+/**
+ *     skb_set_timestamp - set timestamp of a skb
+ *     @skb: skb to set stamp of
+ *     @stamp: pointer to struct timeval to get stamp from
+ *
+ *     Timestamps are stored in the skb as offsets to a base timestamp.
+ *     This function converts a struct timeval to an offset and stores
+ *     it in the skb.
+ */
+static inline void skb_set_timestamp(struct sk_buff *skb, struct timeval *stamp)
+{
+       skb->tstamp.off_sec  = stamp->tv_sec - skb_tv_base.tv_sec;
+       skb->tstamp.off_usec = stamp->tv_usec - skb_tv_base.tv_usec;
+}
+
+extern void __net_timestamp(struct sk_buff *skb);
+
  #ifdef CONFIG_NETFILTER
  static inline void nf_conntrack_put(struct nf_conntrack *nfct)
  {
diff --git a/include/linux/socket.h b/include/linux/socket.h

index a5c7d96e4d2e12172e892b3eb3a3b806e2fa2ca9..1739c2d5b95b449669b887aa86aeb324d836c1f2 100644 (file)
--- a/include/linux/socket.h
+++ b/include/linux/socket.h
@@ -26,6 +26,13 @@ struct __kernel_sockaddr_storage {
  #include <linux/types.h>               /* pid_t                        */
  #include <linux/compiler.h>            /* __user                       */
  
+extern int sysctl_somaxconn;
+extern void sock_init(void);
+#ifdef CONFIG_PROC_FS
+struct seq_file;
+extern void socket_seq_show(struct seq_file *seq);
+#endif
+
  typedef unsigned short sa_family_t;
  
  /*
@@ -271,6 +278,8 @@ struct ucred {
  #define SOL_IRDA        266
  #define SOL_NETBEUI    267
  #define SOL_LLC                268
+#define SOL_DCCP       269
+#define SOL_NETLINK    270
  
  /* IPX options */
  #define IPX_TYPE       1
diff --git a/include/linux/sunrpc/xdr.h b/include/linux/sunrpc/xdr.h

index 34ec3e8d99b3f04fb4e23631f84ce9d497529cf3..23448d0fb5bc522324f176859170874c6f2d0a1a 100644 (file)
--- a/include/linux/sunrpc/xdr.h
+++ b/include/linux/sunrpc/xdr.h
@@ -177,6 +177,7 @@ typedef int (*xdr_xcode_elem_t)(struct xdr_array2_desc *desc, void *elem);
  struct xdr_array2_desc {
         unsigned int elem_size;
         unsigned int array_len;
+       unsigned int array_maxlen;
         xdr_xcode_elem_t xcode;
  };
  
diff --git a/include/linux/swap.h b/include/linux/swap.h

index 239f520cc49ec1c55561e2ac9e085629e489b03a..bfe3e763ccf283d6bc680877aa2f11a658c04afd 100644 (file)
--- a/include/linux/swap.h
+++ b/include/linux/swap.h
@@ -7,7 +7,6 @@
  #include <linux/mmzone.h>
  #include <linux/list.h>
  #include <linux/sched.h>
-#include <linux/pagemap.h>
  
  #include <asm/atomic.h>
  #include <asm/page.h>
@@ -255,6 +254,8 @@ static inline void put_swap_token(struct mm_struct *mm)
  
  #define si_swapinfo(val) \
         do { (val)->freeswap = (val)->totalswap = 0; } while (0)
+/* only sparc can not include linux/pagemap.h in this file
+ * so leave page_cache_release and release_pages undeclared... */
  #define free_page_and_swap_cache(page) \
         page_cache_release(page)
  #define free_pages_and_swap_cache(pages, nr) \
diff --git a/include/linux/tcp.h b/include/linux/tcp.h

index e4fd82e4210428458754a263c7e09930c2cb7e8a..ac4ca44c75caec326729575ba9293aab0ca457f1 100644 (file)
--- a/include/linux/tcp.h
+++ b/include/linux/tcp.h
@@ -55,24 +55,6 @@ struct tcphdr {
         __u16   urg_ptr;
  };
  
-
-enum {
-  TCP_ESTABLISHED = 1,
-  TCP_SYN_SENT,
-  TCP_SYN_RECV,
-  TCP_FIN_WAIT1,
-  TCP_FIN_WAIT2,
-  TCP_TIME_WAIT,
-  TCP_CLOSE,
-  TCP_CLOSE_WAIT,
-  TCP_LAST_ACK,
-  TCP_LISTEN,
-  TCP_CLOSING,  /* now a valid state */
-
-  TCP_MAX_STATES /* Leave at the end! */
-};
-
-#define TCP_STATE_MASK 0xF
  #define TCP_ACTION_FIN (1 << 7)
  
  enum {
@@ -195,8 +177,9 @@ struct tcp_info
  
  #include <linux/config.h>
  #include <linux/skbuff.h>
-#include <linux/ip.h>
  #include <net/sock.h>
+#include <net/inet_connection_sock.h>
+#include <net/inet_timewait_sock.h>
  
  /* This defines a selective acknowledgement block. */
  struct tcp_sack_block {
@@ -236,8 +219,8 @@ static inline struct tcp_request_sock *tcp_rsk(const struct request_sock *req)
  }
  
  struct tcp_sock {
-       /* inet_sock has to be the first member of tcp_sock */
-       struct inet_sock        inet;
+       /* inet_connection_sock has to be the first member of tcp_sock */
+       struct inet_connection_sock     inet_conn;
         int     tcp_header_len; /* Bytes of tcp header to send          */
  
  /*
@@ -258,19 +241,6 @@ struct tcp_sock {
         __u32   snd_sml;        /* Last byte of the most recently transmitted small packet */
         __u32   rcv_tstamp;     /* timestamp of last received ACK (for keepalives) */
         __u32   lsndtime;       /* timestamp of last sent data packet (for restart window) */
-       struct tcp_bind_bucket *bind_hash;
-       /* Delayed ACK control data */
-       struct {
-               __u8    pending;        /* ACK is pending */
-               __u8    quick;          /* Scheduled number of quick acks       */
-               __u8    pingpong;       /* The session is interactive           */
-               __u8    blocked;        /* Delayed ACK was blocked by socket lock*/
-               __u32   ato;            /* Predicted tick of soft clock         */
-               unsigned long timeout;  /* Currently scheduled timeout          */
-               __u32   lrcvtime;       /* timestamp of last received data packet*/
-               __u16   last_seg_size;  /* Size of last incoming segment        */
-               __u16   rcv_mss;        /* MSS used for delayed ACK decisions   */ 
-       } ack;
  
         /* Data for direct copy to user */
         struct {
@@ -288,19 +258,15 @@ struct tcp_sock {
         __u32   mss_cache;      /* Cached effective mss, not including SACKS */
         __u16   xmit_size_goal; /* Goal for segmenting output packets   */
         __u16   ext_header_len; /* Network protocol overhead (IP/IPv6 options) */
-       __u8    ca_state;       /* State of fast-retransmit machine     */
-       __u8    retransmits;    /* Number of unrecovered RTO timeouts.  */
  
-       __u16   advmss;         /* Advertised MSS                       */
         __u32   window_clamp;   /* Maximal window to advertise          */
         __u32   rcv_ssthresh;   /* Current window clamp                 */
  
         __u32   frto_highmark;  /* snd_nxt when RTO occurred */
         __u8    reordering;     /* Packet reordering metric.            */
         __u8    frto_counter;   /* Number of new acks after RTO */
-
-       __u8    unused;
-       __u8    defer_accept;   /* User waits for some data after accept() */
+       __u8    nonagle;        /* Disable Nagle algorithm?             */
+       __u8    keepalive_probes; /* num of allowed keep alive probes   */
  
  /* RTT measurement */
         __u32   srtt;           /* smoothed round trip time << 3        */
@@ -308,19 +274,13 @@ struct tcp_sock {
         __u32   mdev_max;       /* maximal mdev for the last rtt period */
         __u32   rttvar;         /* smoothed mdev_max                    */
         __u32   rtt_seq;        /* sequence number to update rttvar     */
-       __u32   rto;            /* retransmit timeout                   */
  
         __u32   packets_out;    /* Packets which are "in flight"        */
         __u32   left_out;       /* Packets which leaved network */
         __u32   retrans_out;    /* Retransmitted packets out            */
-       __u8    backoff;        /* backoff                              */
  /*
   *      Options received (usually on last packet, some only on SYN packets).
   */
-       __u8    nonagle;        /* Disable Nagle algorithm?             */
-       __u8    keepalive_probes; /* num of allowed keep alive probes   */
-
-       __u8    probes_out;     /* unanswered 0 window probes           */
         struct tcp_options_received rx_opt;
  
  /*
@@ -333,11 +293,6 @@ struct tcp_sock {
         __u32   snd_cwnd_used;
         __u32   snd_cwnd_stamp;
  
-       /* Two commonly used timers in both sender and receiver paths. */
-       unsigned long           timeout;
-       struct timer_list       retransmit_timer;       /* Resend (no ack)      */
-       struct timer_list       delack_timer;           /* Ack delay            */
-
         struct sk_buff_head     out_of_order_queue; /* Out of order segments go here */
  
         struct tcp_func         *af_specific;   /* Operations which are AF_INET{4,6} specific   */
@@ -352,8 +307,7 @@ struct tcp_sock {
         struct tcp_sack_block duplicate_sack[1]; /* D-SACK block */
         struct tcp_sack_block selective_acks[4]; /* The SACKS themselves*/
  
-       __u8    syn_retries;    /* num of allowed syn retries */
-       __u8    ecn_flags;      /* ECN status bits.                     */
+       __u16   advmss;         /* Advertised MSS                       */
         __u16   prior_ssthresh; /* ssthresh saved at recovery start     */
         __u32   lost_out;       /* Lost packets                 */
         __u32   sacked_out;     /* SACK'd packets                       */
@@ -367,14 +321,12 @@ struct tcp_sock {
         int     undo_retrans;   /* number of undoable retransmissions. */
         __u32   urg_seq;        /* Seq of received urgent pointer */
         __u16   urg_data;       /* Saved octet of OOB data and control flags */
-       __u8    pending;        /* Scheduled timer event        */
         __u8    urg_mode;       /* In urgent mode               */
+       __u8    ecn_flags;      /* ECN status bits.                     */
         __u32   snd_up;         /* Urgent pointer               */
  
         __u32   total_retrans;  /* Total retransmits for entire connection */
  
-       struct request_sock_queue accept_queue; /* FIFO of established children */
-
         unsigned int            keepalive_time;   /* time before keep alive takes place */
         unsigned int            keepalive_intvl;  /* time interval between keep alive probes */
         int                     linger2;
@@ -394,11 +346,6 @@ struct tcp_sock {
                 __u32   seq;
                 __u32   time;
         } rcvq_space;
-
-       /* Pluggable TCP congestion control hook */
-       struct tcp_congestion_ops *ca_ops;
-       u32     ca_priv[16];
-#define TCP_CA_PRIV_SIZE       (16*sizeof(u32))
  };
  
  static inline struct tcp_sock *tcp_sk(const struct sock *sk)
@@ -406,9 +353,18 @@ static inline struct tcp_sock *tcp_sk(const struct sock *sk)
         return (struct tcp_sock *)sk;
  }
  
-static inline void *tcp_ca(const struct tcp_sock *tp)
+struct tcp_timewait_sock {
+       struct inet_timewait_sock tw_sk;
+       __u32                     tw_rcv_nxt;
+       __u32                     tw_snd_nxt;
+       __u32                     tw_rcv_wnd;
+       __u32                     tw_ts_recent;
+       long                      tw_ts_recent_stamp;
+};
+
+static inline struct tcp_timewait_sock *tcp_twsk(const struct sock *sk)
  {
-       return (void *) tp->ca_priv;
+       return (struct tcp_timewait_sock *)sk;
  }
  
  #endif
diff --git a/include/linux/tcp_diag.h b/include/linux/tcp_diag.h

deleted file mode 100644 (file)

index 7a59967..0000000
--- a/include/linux/tcp_diag.h
+++ /dev/null
@@ -1,127 +0,0 @@
-#ifndef _TCP_DIAG_H_
-#define _TCP_DIAG_H_ 1
-
-/* Just some random number */
-#define TCPDIAG_GETSOCK 18
-
-/* Socket identity */
-struct tcpdiag_sockid
-{
-       __u16   tcpdiag_sport;
-       __u16   tcpdiag_dport;
-       __u32   tcpdiag_src[4];
-       __u32   tcpdiag_dst[4];
-       __u32   tcpdiag_if;
-       __u32   tcpdiag_cookie[2];
-#define TCPDIAG_NOCOOKIE (~0U)
-};
-
-/* Request structure */
-
-struct tcpdiagreq
-{
-       __u8    tcpdiag_family;         /* Family of addresses. */
-       __u8    tcpdiag_src_len;
-       __u8    tcpdiag_dst_len;
-       __u8    tcpdiag_ext;            /* Query extended information */
-
-       struct tcpdiag_sockid id;
-
-       __u32   tcpdiag_states;         /* States to dump */
-       __u32   tcpdiag_dbs;            /* Tables to dump (NI) */
-};
-
-enum
-{
-       TCPDIAG_REQ_NONE,
-       TCPDIAG_REQ_BYTECODE,
-};
-
-#define TCPDIAG_REQ_MAX TCPDIAG_REQ_BYTECODE
-
-/* Bytecode is sequence of 4 byte commands followed by variable arguments.
- * All the commands identified by "code" are conditional jumps forward:
- * to offset cc+"yes" or to offset cc+"no". "yes" is supposed to be
- * length of the command and its arguments.
- */
- 
-struct tcpdiag_bc_op
-{
-       unsigned char   code;
-       unsigned char   yes;
-       unsigned short  no;
-};
-
-enum
-{
-       TCPDIAG_BC_NOP,
-       TCPDIAG_BC_JMP,
-       TCPDIAG_BC_S_GE,
-       TCPDIAG_BC_S_LE,
-       TCPDIAG_BC_D_GE,
-       TCPDIAG_BC_D_LE,
-       TCPDIAG_BC_AUTO,
-       TCPDIAG_BC_S_COND,
-       TCPDIAG_BC_D_COND,
-};
-
-struct tcpdiag_hostcond
-{
-       __u8    family;
-       __u8    prefix_len;
-       int     port;
-       __u32   addr[0];
-};
-
-/* Base info structure. It contains socket identity (addrs/ports/cookie)
- * and, alas, the information shown by netstat. */
-struct tcpdiagmsg
-{
-       __u8    tcpdiag_family;
-       __u8    tcpdiag_state;
-       __u8    tcpdiag_timer;
-       __u8    tcpdiag_retrans;
-
-       struct tcpdiag_sockid id;
-
-       __u32   tcpdiag_expires;
-       __u32   tcpdiag_rqueue;
-       __u32   tcpdiag_wqueue;
-       __u32   tcpdiag_uid;
-       __u32   tcpdiag_inode;
-};
-
-/* Extensions */
-
-enum
-{
-       TCPDIAG_NONE,
-       TCPDIAG_MEMINFO,
-       TCPDIAG_INFO,
-       TCPDIAG_VEGASINFO,
-       TCPDIAG_CONG,
-};
-
-#define TCPDIAG_MAX TCPDIAG_CONG
-
-
-/* TCPDIAG_MEM */
-
-struct tcpdiag_meminfo
-{
-       __u32   tcpdiag_rmem;
-       __u32   tcpdiag_wmem;
-       __u32   tcpdiag_fmem;
-       __u32   tcpdiag_tmem;
-};
-
-/* TCPDIAG_VEGASINFO */
-
-struct tcpvegas_info {
-       __u32   tcpv_enabled;
-       __u32   tcpv_rttcnt;
-       __u32   tcpv_rtt;
-       __u32   tcpv_minrtt;
-};
-
-#endif /* _TCP_DIAG_H_ */
diff --git a/include/linux/types.h b/include/linux/types.h

index dcb13f865df97ccee9a7764fa247fec754f0609a..2b678c22ca4a0dead6fac2672f76d365ed8945f6 100644 (file)
--- a/include/linux/types.h
+++ b/include/linux/types.h
@@ -123,6 +123,9 @@ typedef             __u64           u_int64_t;
  typedef                __s64           int64_t;
  #endif
  
+/* this is a special 64bit data type that is 8-byte aligned */
+#define aligned_u64 unsigned long long __attribute__((aligned(8)))
+
  /*
   * The type used for indexing onto a disc or disc partition.
   * If required, asm/types.h can override it and define
diff --git a/include/linux/xfrm.h b/include/linux/xfrm.h

index f0d423300d84aead0467e222d4d3d3961627abef..0fb077d68441f864281d8473dd9744c37d9d80df 100644 (file)
--- a/include/linux/xfrm.h
+++ b/include/linux/xfrm.h
@@ -258,9 +258,27 @@ struct xfrm_usersa_flush {
         __u8                            proto;
  };
  
+#ifndef __KERNEL__
+/* backwards compatibility for userspace */
  #define XFRMGRP_ACQUIRE                1
  #define XFRMGRP_EXPIRE         2
  #define XFRMGRP_SA             4
  #define XFRMGRP_POLICY         8
+#endif
+
+enum xfrm_nlgroups {
+       XFRMNLGRP_NONE,
+#define XFRMNLGRP_NONE         XFRMNLGRP_NONE
+       XFRMNLGRP_ACQUIRE,
+#define XFRMNLGRP_ACQUIRE      XFRMNLGRP_ACQUIRE
+       XFRMNLGRP_EXPIRE,
+#define XFRMNLGRP_EXPIRE       XFRMNLGRP_EXPIRE
+       XFRMNLGRP_SA,
+#define XFRMNLGRP_SA           XFRMNLGRP_SA
+       XFRMNLGRP_POLICY,
+#define XFRMNLGRP_POLICY       XFRMNLGRP_POLICY
+       __XFRMNLGRP_MAX
+};
+#define XFRMNLGRP_MAX  (__XFRMNLGRP_MAX - 1)
  
  #endif /* _LINUX_XFRM_H */
diff --git a/include/linux/zlib.h b/include/linux/zlib.h

index 850076ea14d318d43418a98d47876ec9ddee36c0..74f7b78c22d2fb46136999370edd154290a2f745 100644 (file)
--- a/include/linux/zlib.h
+++ b/include/linux/zlib.h
@@ -506,6 +506,11 @@ extern int zlib_deflateReset (z_streamp strm);
     stream state was inconsistent (such as zalloc or state being NULL).
  */
  
+static inline unsigned long deflateBound(unsigned long s)
+{
+       return s + ((s + 7) >> 3) + ((s + 63) >> 6) + 11;
+}
+
  extern int zlib_deflateParams (z_streamp strm, int level, int strategy);
  /*
       Dynamically update the compression level and compression strategy.  The
diff --git a/include/media/tuner.h b/include/media/tuner.h

index d8c0a5563289a11ff571e367aecf394059fb4b4b..eeaa15ddee8512e303671b1fd40e33f04be101f0 100644 (file)
--- a/include/media/tuner.h
+++ b/include/media/tuner.h
@@ -1,5 +1,5 @@
  
-/* $Id: tuner.h,v 1.42 2005/07/06 09:42:19 mchehab Exp $
+/* $Id: tuner.h,v 1.45 2005/07/28 18:41:21 mchehab Exp $
   *
      tuner.h - definition for different tuners
  
@@ -108,6 +108,8 @@
  
  #define TUNER_TEA5767         62       /* Only FM Radio Tuner */
  #define TUNER_PHILIPS_FMD1216ME_MK3 63
+#define TUNER_LG_TDVS_H062F   64       /* DViCO FusionHDTV 5 */
+#define TUNER_YMEC_TVF66T5_B_DFF 65    /* Acorp Y878F */
  
  #define NOTUNER 0
  #define PAL     1      /* PAL_BG */
diff --git a/include/net/act_api.h b/include/net/act_api.h

index ed00a995f576f1d612b526d75b0cb0cd1cb1dbf7..b55eb7c7f0339ce2893334da2ab674e994296d4d 100644 (file)
--- a/include/net/act_api.h
+++ b/include/net/act_api.h
@@ -63,7 +63,7 @@ struct tc_action_ops
         __u32   type; /* TBD to match kind */
         __u32   capab;  /* capabilities includes 4 bit version */
         struct module           *owner;
-       int     (*act)(struct sk_buff **, struct tc_action *);
+       int     (*act)(struct sk_buff **, struct tc_action *, struct tcf_result *);
         int     (*get_stats)(struct sk_buff *, struct tc_action *);
         int     (*dump)(struct sk_buff *, struct tc_action *,int , int);
         int     (*cleanup)(struct tc_action *, int bind);
diff --git a/include/net/addrconf.h b/include/net/addrconf.h

index a0ed9367217601cd1e1dbcb6266bf2e8d25f68cf..750e2508dd90627084e9757363f1d25832047d30 100644 (file)
--- a/include/net/addrconf.h
+++ b/include/net/addrconf.h
@@ -45,6 +45,7 @@ struct prefix_info {
  
  #ifdef __KERNEL__
  
+#include <linux/config.h>
  #include <linux/netdevice.h>
  #include <net/if_inet6.h>
  #include <net/ipv6.h>
@@ -238,5 +239,10 @@ static inline int ipv6_addr_is_ll_all_routers(const struct in6_addr *addr)
                 addr->s6_addr32[3] == htonl(0x00000002));
  }
  
+#ifdef CONFIG_PROC_FS
+extern int if6_proc_init(void);
+extern void if6_proc_exit(void);
+#endif
+
  #endif
  #endif
diff --git a/include/net/af_unix.h b/include/net/af_unix.h

index b60b3846b9d165280d012f48d9d83e4017237ec6..b5d785ab4a0ea3cd41fe6ac2c82edcaf68274c0a 100644 (file)
--- a/include/net/af_unix.h
+++ b/include/net/af_unix.h
@@ -1,5 +1,11 @@
  #ifndef __LINUX_NET_AFUNIX_H
  #define __LINUX_NET_AFUNIX_H
+
+#include <linux/config.h>
+#include <linux/socket.h>
+#include <linux/un.h>
+#include <net/sock.h>
+
  extern void unix_inflight(struct file *fp);
  extern void unix_notinflight(struct file *fp);
  extern void unix_gc(void);
@@ -74,5 +80,14 @@ struct unix_sock {
          wait_queue_head_t       peer_wait;
  };
  #define unix_sk(__sk) ((struct unix_sock *)__sk)
+
+#ifdef CONFIG_SYSCTL
+extern int sysctl_unix_max_dgram_qlen;
+extern void unix_sysctl_register(void);
+extern void unix_sysctl_unregister(void);
+#else
+static inline void unix_sysctl_register(void) {}
+static inline void unix_sysctl_unregister(void) {}
+#endif
  #endif
  #endif
diff --git a/include/net/arp.h b/include/net/arp.h

index a1f09fad6a52e57070d6a27ade661339bfc449c6..a13e30c35f4259e7ba835e7eda73b38995246437 100644 (file)
--- a/include/net/arp.h
+++ b/include/net/arp.h
@@ -11,7 +11,7 @@ extern struct neigh_table arp_tbl;
  
  extern void    arp_init(void);
  extern int     arp_rcv(struct sk_buff *skb, struct net_device *dev,
-                       struct packet_type *pt);
+                       struct packet_type *pt, struct net_device *orig_dev);
  extern int     arp_find(unsigned char *haddr, struct sk_buff *skb);
  extern int     arp_ioctl(unsigned int cmd, void __user *arg);
  extern void     arp_send(int type, int ptype, u32 dest_ip, 
diff --git a/include/net/ax25.h b/include/net/ax25.h

index 828a3a93dda10e07ce851476894296f5b7e4a68f..926eed543023906328aa7a95908d9e54039bfc2c 100644 (file)
--- a/include/net/ax25.h
+++ b/include/net/ax25.h
@@ -139,11 +139,25 @@ enum {
  #define AX25_DEF_DS_TIMEOUT    (3 * 60 * HZ)           /* DAMA timeout 3 minutes */
  
  typedef struct ax25_uid_assoc {
-       struct ax25_uid_assoc   *next;
+       struct hlist_node       uid_node;
+       atomic_t                refcount;
         uid_t                   uid;
         ax25_address            call;
  } ax25_uid_assoc;
  
+#define ax25_uid_for_each(__ax25, node, list) \
+       hlist_for_each_entry(__ax25, node, list, uid_node)
+
+#define ax25_uid_hold(ax25) \
+       atomic_inc(&((ax25)->refcount))
+
+static inline void ax25_uid_put(ax25_uid_assoc *assoc)
+{
+       if (atomic_dec_and_test(&assoc->refcount)) {
+               kfree(assoc);
+       }
+}
+
  typedef struct {
         ax25_address            calls[AX25_MAX_DIGIS];
         unsigned char           repeated[AX25_MAX_DIGIS];
@@ -302,7 +316,7 @@ extern int  ax25_protocol_is_registered(unsigned int);
  
  /* ax25_in.c */
  extern int  ax25_rx_iframe(ax25_cb *, struct sk_buff *);
-extern int  ax25_kiss_rcv(struct sk_buff *, struct net_device *, struct packet_type *);
+extern int  ax25_kiss_rcv(struct sk_buff *, struct net_device *, struct packet_type *, struct net_device *);
  
  /* ax25_ip.c */
  extern int  ax25_encapsulate(struct sk_buff *, struct net_device *, unsigned short, void *, void *, unsigned int);
@@ -376,7 +390,7 @@ extern unsigned long ax25_display_timer(struct timer_list *);
  
  /* ax25_uid.c */
  extern int  ax25_uid_policy;
-extern ax25_address *ax25_findbyuid(uid_t);
+extern ax25_uid_assoc *ax25_findbyuid(uid_t);
  extern int  ax25_uid_ioctl(int, struct sockaddr_ax25 *);
  extern struct file_operations ax25_uid_fops;
  extern void ax25_uid_free(void);
diff --git a/include/net/bluetooth/bluetooth.h b/include/net/bluetooth/bluetooth.h

index 42a84c53678b1562811bec563d7bce81053f3a4c..6dfa4a61ffd04cbb59ff572bc001c0072fd8e5c6 100644 (file)
--- a/include/net/bluetooth/bluetooth.h
+++ b/include/net/bluetooth/bluetooth.h
@@ -57,12 +57,6 @@
  #define BT_DBG(fmt, arg...)  printk(KERN_INFO "%s: " fmt "\n" , __FUNCTION__ , ## arg)
  #define BT_ERR(fmt, arg...)  printk(KERN_ERR  "%s: " fmt "\n" , __FUNCTION__ , ## arg)
  
-#ifdef HCI_DATA_DUMP
-#define BT_DMP(buf, len) bt_dump(__FUNCTION__, buf, len)
-#else
-#define BT_DMP(D...)
-#endif
-
  extern struct proc_dir_entry *proc_bt;
  
  /* Connection and socket states */
@@ -137,11 +131,12 @@ struct sock *bt_accept_dequeue(struct sock *parent, struct socket *newsock);
  
  /* Skb helpers */
  struct bt_skb_cb {
-       int incoming;
+       __u8 pkt_type;
+       __u8 incoming;
  };
  #define bt_cb(skb) ((struct bt_skb_cb *)(skb->cb)) 
  
-static inline struct sk_buff *bt_skb_alloc(unsigned int len, int how)
+static inline struct sk_buff *bt_skb_alloc(unsigned int len, unsigned int __nocast how)
  {
         struct sk_buff *skb;
  
@@ -174,8 +169,6 @@ static inline int skb_frags_no(struct sk_buff *skb)
         return n;
  }
  
-void bt_dump(char *pref, __u8 *buf, int count);
-
  int bt_err(__u16 code);
  
  #endif /* __BLUETOOTH_H */
diff --git a/include/net/bluetooth/hci.h b/include/net/bluetooth/hci.h

index 6f0706f4af68178db7852aa15c728822aefa4a80..371e7d3f2e6fe9574b5ef3418685f156fd879061 100644 (file)
--- a/include/net/bluetooth/hci.h
+++ b/include/net/bluetooth/hci.h
@@ -453,6 +453,15 @@ struct inquiry_info_with_rssi {
         __u16    clock_offset;
         __s8     rssi;
  } __attribute__ ((packed));
+struct inquiry_info_with_rssi_and_pscan_mode {
+       bdaddr_t bdaddr;
+       __u8     pscan_rep_mode;
+       __u8     pscan_period_mode;
+       __u8     pscan_mode;
+       __u8     dev_class[3];
+       __u16    clock_offset;
+       __s8     rssi;
+} __attribute__ ((packed));
  
  #define HCI_EV_CONN_COMPLETE   0x03
  struct hci_ev_conn_complete {
@@ -584,6 +593,12 @@ struct hci_ev_clock_offset {
         __u16    clock_offset;
  } __attribute__ ((packed));
  
+#define HCI_EV_PSCAN_REP_MODE  0x20
+struct hci_ev_pscan_rep_mode {
+       bdaddr_t bdaddr;
+       __u8     pscan_rep_mode;
+} __attribute__ ((packed));
+
  /* Internal events generated by Bluetooth stack */
  #define HCI_EV_STACK_INTERNAL  0xFD
  struct hci_ev_stack_internal {
diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h

index 6d63a47c731bc5bec4ed1f444ebe3b59fc5eda73..7f933f30207830333996a9d872acbbef536e636d 100644 (file)
--- a/include/net/bluetooth/hci_core.h
+++ b/include/net/bluetooth/hci_core.h
@@ -404,7 +404,7 @@ static inline int hci_recv_frame(struct sk_buff *skb)
         bt_cb(skb)->incoming = 1;
  
         /* Time stamp */
-       do_gettimeofday(&skb->stamp);
+       __net_timestamp(skb);
  
         /* Queue frame for rx task */
         skb_queue_tail(&hdev->rx_q, skb);
diff --git a/include/net/bluetooth/rfcomm.h b/include/net/bluetooth/rfcomm.h

index 13669bad00b3737cf260f6a65c872894af3adb27..ffea9d54071f7eb65a4cb4ae089799ed4ceeedb4 100644 (file)
--- a/include/net/bluetooth/rfcomm.h
+++ b/include/net/bluetooth/rfcomm.h
@@ -80,9 +80,9 @@
  #define RFCOMM_RPN_STOP_15     1
  
  #define RFCOMM_RPN_PARITY_NONE 0x0
-#define RFCOMM_RPN_PARITY_ODD  0x4
-#define RFCOMM_RPN_PARITY_EVEN 0x5
-#define RFCOMM_RPN_PARITY_MARK 0x6
+#define RFCOMM_RPN_PARITY_ODD  0x1
+#define RFCOMM_RPN_PARITY_EVEN 0x3
+#define RFCOMM_RPN_PARITY_MARK 0x5
  #define RFCOMM_RPN_PARITY_SPACE        0x7
  
  #define RFCOMM_RPN_FLOW_NONE   0x00
@@ -223,8 +223,14 @@ struct rfcomm_dlc {
  #define RFCOMM_CFC_DISABLED 0
  #define RFCOMM_CFC_ENABLED  RFCOMM_MAX_CREDITS
  
+/* ---- RFCOMM SEND RPN ---- */
+int rfcomm_send_rpn(struct rfcomm_session *s, int cr, u8 dlci,
+                       u8 bit_rate, u8 data_bits, u8 stop_bits,
+                       u8 parity, u8 flow_ctrl_settings, 
+                       u8 xon_char, u8 xoff_char, u16 param_mask);
+
  /* ---- RFCOMM DLCs (channels) ---- */
-struct rfcomm_dlc *rfcomm_dlc_alloc(int prio);
+struct rfcomm_dlc *rfcomm_dlc_alloc(unsigned int __nocast prio);
  void rfcomm_dlc_free(struct rfcomm_dlc *d);
  int  rfcomm_dlc_open(struct rfcomm_dlc *d, bdaddr_t *src, bdaddr_t *dst, u8 channel);
  int  rfcomm_dlc_close(struct rfcomm_dlc *d, int reason);
diff --git a/include/net/datalink.h b/include/net/datalink.h

index 5797ba3d2eb5dbf0b5b8559282669dd57912e84e..deb7ca75db488f94b0c697cbdfa1946c3263b63b 100644 (file)
--- a/include/net/datalink.h
+++ b/include/net/datalink.h
@@ -9,7 +9,7 @@ struct datalink_proto {
          unsigned short  header_length;
  
          int     (*rcvfunc)(struct sk_buff *, struct net_device *,
-                                struct packet_type *);
+                                struct packet_type *, struct net_device *);
         int     (*request)(struct datalink_proto *, struct sk_buff *,
                                          unsigned char *);
         struct list_head node;
diff --git a/include/net/dn.h b/include/net/dn.h

index 5551c46db397ecd9eef41d2954db60de42fd7239..c1dbbd22279394863d3d7cb7d8b7ed948e8bcfa7 100644 (file)
--- a/include/net/dn.h
+++ b/include/net/dn.h
@@ -3,6 +3,7 @@
  
  #include <linux/dn.h>
  #include <net/sock.h>
+#include <net/tcp.h>
  #include <asm/byteorder.h>
  
  typedef unsigned short dn_address;
diff --git a/include/net/icmp.h b/include/net/icmp.h

index e5ef0d15fb45e8e7130c53ecdeb5c0f3279a7492..6cdebeee5f961318bc5c8e942b1a2f92e4b276ce 100644 (file)
--- a/include/net/icmp.h
+++ b/include/net/icmp.h
@@ -57,4 +57,11 @@ static inline struct raw_sock *raw_sk(const struct sock *sk)
         return (struct raw_sock *)sk;
  }
  
+extern int sysctl_icmp_echo_ignore_all;
+extern int sysctl_icmp_echo_ignore_broadcasts;
+extern int sysctl_icmp_ignore_bogus_error_responses;
+extern int sysctl_icmp_errors_use_inbound_ifaddr;
+extern int sysctl_icmp_ratelimit;
+extern int sysctl_icmp_ratemask;
+
  #endif /* _ICMP_H */
diff --git a/include/net/inet6_hashtables.h b/include/net/inet6_hashtables.h

new file mode 100644 (file)

index 0000000..03df3b1
--- /dev/null
+++ b/include/net/inet6_hashtables.h
@@ -0,0 +1,130 @@
+/*
+ * INET                An implementation of the TCP/IP protocol suite for the LINUX
+ *             operating system.  INET is implemented using the BSD Socket
+ *             interface as the means of communication with the user level.
+ *
+ * Authors:    Lotsa people, from code originally in tcp
+ *
+ *     This program is free software; you can redistribute it and/or
+ *      modify it under the terms of the GNU General Public License
+ *      as published by the Free Software Foundation; either version
+ *      2 of the License, or (at your option) any later version.
+ */
+
+#ifndef _INET6_HASHTABLES_H
+#define _INET6_HASHTABLES_H
+
+#include <linux/config.h>
+
+#if defined(CONFIG_IPV6) || defined (CONFIG_IPV6_MODULE)
+#include <linux/in6.h>
+#include <linux/ipv6.h>
+#include <linux/types.h>
+
+#include <net/ipv6.h>
+
+struct inet_hashinfo;
+
+/* I have no idea if this is a good hash for v6 or not. -DaveM */
+static inline int inet6_ehashfn(const struct in6_addr *laddr, const u16 lport,
+                               const struct in6_addr *faddr, const u16 fport,
+                               const int ehash_size)
+{
+       int hashent = (lport ^ fport);
+
+       hashent ^= (laddr->s6_addr32[3] ^ faddr->s6_addr32[3]);
+       hashent ^= hashent >> 16;
+       hashent ^= hashent >> 8;
+       return (hashent & (ehash_size - 1));
+}
+
+static inline int inet6_sk_ehashfn(const struct sock *sk, const int ehash_size)
+{
+       const struct inet_sock *inet = inet_sk(sk);
+       const struct ipv6_pinfo *np = inet6_sk(sk);
+       const struct in6_addr *laddr = &np->rcv_saddr;
+       const struct in6_addr *faddr = &np->daddr;
+       const __u16 lport = inet->num;
+       const __u16 fport = inet->dport;
+       return inet6_ehashfn(laddr, lport, faddr, fport, ehash_size);
+}
+
+/*
+ * Sockets in TCP_CLOSE state are _always_ taken out of the hash, so
+ * we need not check it for TCP lookups anymore, thanks Alexey. -DaveM
+ *
+ * The sockhash lock must be held as a reader here.
+ */
+static inline struct sock *
+               __inet6_lookup_established(struct inet_hashinfo *hashinfo,
+                                          const struct in6_addr *saddr,
+                                          const u16 sport,
+                                          const struct in6_addr *daddr,
+                                          const u16 hnum,
+                                          const int dif)
+{
+       struct sock *sk;
+       const struct hlist_node *node;
+       const __u32 ports = INET_COMBINED_PORTS(sport, hnum);
+       /* Optimize here for direct hit, only listening connections can
+        * have wildcards anyways.
+        */
+       const int hash = inet6_ehashfn(daddr, hnum, saddr, sport,
+                                      hashinfo->ehash_size);
+       struct inet_ehash_bucket *head = &hashinfo->ehash[hash];
+
+       read_lock(&head->lock);
+       sk_for_each(sk, node, &head->chain) {
+               /* For IPV6 do the cheaper port and family tests first. */
+               if (INET6_MATCH(sk, saddr, daddr, ports, dif))
+                       goto hit; /* You sunk my battleship! */
+       }
+       /* Must check for a TIME_WAIT'er before going to listener hash. */
+       sk_for_each(sk, node, &(head + hashinfo->ehash_size)->chain) {
+               const struct inet_timewait_sock *tw = inet_twsk(sk);
+
+               if(*((__u32 *)&(tw->tw_dport))  == ports        &&
+                  sk->sk_family                == PF_INET6) {
+                       const struct tcp6_timewait_sock *tcp6tw = tcp6_twsk(sk);
+
+                       if (ipv6_addr_equal(&tcp6tw->tw_v6_daddr, saddr)        &&
+                           ipv6_addr_equal(&tcp6tw->tw_v6_rcv_saddr, daddr)    &&
+                           (!sk->sk_bound_dev_if || sk->sk_bound_dev_if == dif))
+                               goto hit;
+               }
+       }
+       read_unlock(&head->lock);
+       return NULL;
+
+hit:
+       sock_hold(sk);
+       read_unlock(&head->lock);
+       return sk;
+}
+
+extern struct sock *inet6_lookup_listener(struct inet_hashinfo *hashinfo,
+                                         const struct in6_addr *daddr,
+                                         const unsigned short hnum,
+                                         const int dif);
+
+static inline struct sock *__inet6_lookup(struct inet_hashinfo *hashinfo,
+                                         const struct in6_addr *saddr,
+                                         const u16 sport,
+                                         const struct in6_addr *daddr,
+                                         const u16 hnum,
+                                         const int dif)
+{
+       struct sock *sk = __inet6_lookup_established(hashinfo, saddr, sport,
+                                                    daddr, hnum, dif);
+       if (sk)
+               return sk;
+
+       return inet6_lookup_listener(hashinfo, daddr, hnum, dif);
+}
+
+extern struct sock *inet6_lookup(struct inet_hashinfo *hashinfo,
+                                const struct in6_addr *saddr, const u16 sport,
+                                const struct in6_addr *daddr, const u16 dport,
+                                const int dif);
+#endif /* defined(CONFIG_IPV6) || defined (CONFIG_IPV6_MODULE) */
+#endif /* _INET6_HASHTABLES_H */
diff --git a/include/net/inet_common.h b/include/net/inet_common.h

index fbc1f4d140d82c1d7fe05092a8e12296ccff7284..f943306ce5ff3104092f604ed4d23c5c93de9855 100644 (file)
--- a/include/net/inet_common.h
+++ b/include/net/inet_common.h
@@ -8,6 +8,11 @@ extern struct proto_ops                inet_dgram_ops;
   *     INET4 prototypes used by INET6
   */
  
+struct msghdr;
+struct sock;
+struct sockaddr;
+struct socket;
+
  extern void                    inet_remove_sock(struct sock *sk1);
  extern void                    inet_put_sock(unsigned short num, 
                                               struct sock *sk);
@@ -29,7 +34,6 @@ extern unsigned int           inet_poll(struct file * file, struct socket *sock, struct p
  extern int                     inet_listen(struct socket *sock, int backlog);
  
  extern void                    inet_sock_destruct(struct sock *sk);
-extern atomic_t                        inet_sock_nr;
  
  extern int                     inet_bind(struct socket *sock, 
                                           struct sockaddr *uaddr, int addr_len);
diff --git a/include/net/inet_connection_sock.h b/include/net/inet_connection_sock.h

new file mode 100644 (file)

index 0000000..651f824
--- /dev/null
+++ b/include/net/inet_connection_sock.h
@@ -0,0 +1,276 @@
+/*
+ * NET         Generic infrastructure for INET connection oriented protocols.
+ *
+ *             Definitions for inet_connection_sock 
+ *
+ * Authors:    Many people, see the TCP sources
+ *
+ *             From code originally in TCP
+ *
+ *             This program is free software; you can redistribute it and/or
+ *             modify it under the terms of the GNU General Public License
+ *             as published by the Free Software Foundation; either version
+ *             2 of the License, or (at your option) any later version.
+ */
+#ifndef _INET_CONNECTION_SOCK_H
+#define _INET_CONNECTION_SOCK_H
+
+#include <linux/ip.h>
+#include <linux/string.h>
+#include <linux/timer.h>
+#include <net/request_sock.h>
+
+#define INET_CSK_DEBUG 1
+
+/* Cancel timers, when they are not required. */
+#undef INET_CSK_CLEAR_TIMERS
+
+struct inet_bind_bucket;
+struct inet_hashinfo;
+struct tcp_congestion_ops;
+
+/** inet_connection_sock - INET connection oriented sock
+ *
+ * @icsk_accept_queue:    FIFO of established children 
+ * @icsk_bind_hash:       Bind node
+ * @icsk_timeout:         Timeout
+ * @icsk_retransmit_timer: Resend (no ack)
+ * @icsk_rto:             Retransmit timeout
+ * @icsk_ca_ops                   Pluggable congestion control hook
+ * @icsk_ca_state:        Congestion control state
+ * @icsk_retransmits:     Number of unrecovered [RTO] timeouts
+ * @icsk_pending:         Scheduled timer event
+ * @icsk_backoff:         Backoff
+ * @icsk_syn_retries:      Number of allowed SYN (or equivalent) retries
+ * @icsk_probes_out:      unanswered 0 window probes
+ * @icsk_ack:             Delayed ACK control data
+ */
+struct inet_connection_sock {
+       /* inet_sock has to be the first member! */
+       struct inet_sock          icsk_inet;
+       struct request_sock_queue icsk_accept_queue;
+       struct inet_bind_bucket   *icsk_bind_hash;
+       unsigned long             icsk_timeout;
+       struct timer_list         icsk_retransmit_timer;
+       struct timer_list         icsk_delack_timer;
+       __u32                     icsk_rto;
+       struct tcp_congestion_ops *icsk_ca_ops;
+       __u8                      icsk_ca_state;
+       __u8                      icsk_retransmits;
+       __u8                      icsk_pending;
+       __u8                      icsk_backoff;
+       __u8                      icsk_syn_retries;
+       __u8                      icsk_probes_out;
+       /* 2 BYTES HOLE, TRY TO PACK! */
+       struct {
+               __u8              pending;       /* ACK is pending                         */
+               __u8              quick;         /* Scheduled number of quick acks         */
+               __u8              pingpong;      /* The session is interactive             */
+               __u8              blocked;       /* Delayed ACK was blocked by socket lock */
+               __u32             ato;           /* Predicted tick of soft clock           */
+               unsigned long     timeout;       /* Currently scheduled timeout            */
+               __u32             lrcvtime;      /* timestamp of last received data packet */
+               __u16             last_seg_size; /* Size of last incoming segment          */
+               __u16             rcv_mss;       /* MSS used for delayed ACK decisions     */ 
+       } icsk_ack;
+       u32                       icsk_ca_priv[16];
+#define ICSK_CA_PRIV_SIZE      (16 * sizeof(u32))
+};
+
+#define ICSK_TIME_RETRANS      1       /* Retransmit timer */
+#define ICSK_TIME_DACK         2       /* Delayed ack timer */
+#define ICSK_TIME_PROBE0       3       /* Zero window probe timer */
+#define ICSK_TIME_KEEPOPEN     4       /* Keepalive timer */
+
+static inline struct inet_connection_sock *inet_csk(const struct sock *sk)
+{
+       return (struct inet_connection_sock *)sk;
+}
+
+static inline void *inet_csk_ca(const struct sock *sk)
+{
+       return (void *)inet_csk(sk)->icsk_ca_priv;
+}
+
+extern struct sock *inet_csk_clone(struct sock *sk,
+                                  const struct request_sock *req,
+                                  const unsigned int __nocast priority);
+
+enum inet_csk_ack_state_t {
+       ICSK_ACK_SCHED  = 1,
+       ICSK_ACK_TIMER  = 2,
+       ICSK_ACK_PUSHED = 4
+};
+
+extern void inet_csk_init_xmit_timers(struct sock *sk,
+                                     void (*retransmit_handler)(unsigned long),
+                                     void (*delack_handler)(unsigned long),
+                                     void (*keepalive_handler)(unsigned long));
+extern void inet_csk_clear_xmit_timers(struct sock *sk);
+
+static inline void inet_csk_schedule_ack(struct sock *sk)
+{
+       inet_csk(sk)->icsk_ack.pending |= ICSK_ACK_SCHED;
+}
+
+static inline int inet_csk_ack_scheduled(const struct sock *sk)
+{
+       return inet_csk(sk)->icsk_ack.pending & ICSK_ACK_SCHED;
+}
+
+static inline void inet_csk_delack_init(struct sock *sk)
+{
+       memset(&inet_csk(sk)->icsk_ack, 0, sizeof(inet_csk(sk)->icsk_ack));
+}
+
+extern void inet_csk_delete_keepalive_timer(struct sock *sk);
+extern void inet_csk_reset_keepalive_timer(struct sock *sk, unsigned long timeout);
+
+#ifdef INET_CSK_DEBUG
+extern const char inet_csk_timer_bug_msg[];
+#endif
+
+static inline void inet_csk_clear_xmit_timer(struct sock *sk, const int what)
+{
+       struct inet_connection_sock *icsk = inet_csk(sk);
+       
+       if (what == ICSK_TIME_RETRANS || what == ICSK_TIME_PROBE0) {
+               icsk->icsk_pending = 0;
+#ifdef INET_CSK_CLEAR_TIMERS
+               sk_stop_timer(sk, &icsk->icsk_retransmit_timer);
+#endif
+       } else if (what == ICSK_TIME_DACK) {
+               icsk->icsk_ack.blocked = icsk->icsk_ack.pending = 0;
+#ifdef INET_CSK_CLEAR_TIMERS
+               sk_stop_timer(sk, &icsk->icsk_delack_timer);
+#endif
+       }
+#ifdef INET_CSK_DEBUG
+       else {
+               pr_debug("%s", inet_csk_timer_bug_msg);
+       }
+#endif
+}
+
+/*
+ *     Reset the retransmission timer
+ */
+static inline void inet_csk_reset_xmit_timer(struct sock *sk, const int what,
+                                            unsigned long when,
+                                            const unsigned long max_when)
+{
+       struct inet_connection_sock *icsk = inet_csk(sk);
+
+       if (when > max_when) {
+#ifdef INET_CSK_DEBUG
+               pr_debug("reset_xmit_timer: sk=%p %d when=0x%lx, caller=%p\n",
+                        sk, what, when, current_text_addr());
+#endif
+               when = max_when;
+       }
+
+       if (what == ICSK_TIME_RETRANS || what == ICSK_TIME_PROBE0) {
+               icsk->icsk_pending = what;
+               icsk->icsk_timeout = jiffies + when;
+               sk_reset_timer(sk, &icsk->icsk_retransmit_timer, icsk->icsk_timeout);
+       } else if (what == ICSK_TIME_DACK) {
+               icsk->icsk_ack.pending |= ICSK_ACK_TIMER;
+               icsk->icsk_ack.timeout = jiffies + when;
+               sk_reset_timer(sk, &icsk->icsk_delack_timer, icsk->icsk_ack.timeout);
+       }
+#ifdef INET_CSK_DEBUG
+       else {
+               pr_debug("%s", inet_csk_timer_bug_msg);
+       }
+#endif
+}
+
+extern struct sock *inet_csk_accept(struct sock *sk, int flags, int *err);
+
+extern struct request_sock *inet_csk_search_req(const struct sock *sk,
+                                               struct request_sock ***prevp,
+                                               const __u16 rport,
+                                               const __u32 raddr,
+                                               const __u32 laddr);
+extern int inet_csk_get_port(struct inet_hashinfo *hashinfo,
+                            struct sock *sk, unsigned short snum);
+
+extern struct dst_entry* inet_csk_route_req(struct sock *sk,
+                                           const struct request_sock *req);
+
+static inline void inet_csk_reqsk_queue_add(struct sock *sk,
+                                           struct request_sock *req,
+                                           struct sock *child)
+{
+       reqsk_queue_add(&inet_csk(sk)->icsk_accept_queue, req, sk, child);
+}
+
+extern void inet_csk_reqsk_queue_hash_add(struct sock *sk,
+                                         struct request_sock *req,
+                                         const unsigned timeout);
+
+static inline void inet_csk_reqsk_queue_removed(struct sock *sk,
+                                               struct request_sock *req)
+{
+       if (reqsk_queue_removed(&inet_csk(sk)->icsk_accept_queue, req) == 0)
+               inet_csk_delete_keepalive_timer(sk);
+}
+
+static inline void inet_csk_reqsk_queue_added(struct sock *sk,
+                                             const unsigned long timeout)
+{
+       if (reqsk_queue_added(&inet_csk(sk)->icsk_accept_queue) == 0)
+               inet_csk_reset_keepalive_timer(sk, timeout);
+}
+
+static inline int inet_csk_reqsk_queue_len(const struct sock *sk)
+{
+       return reqsk_queue_len(&inet_csk(sk)->icsk_accept_queue);
+}
+
+static inline int inet_csk_reqsk_queue_young(const struct sock *sk)
+{
+       return reqsk_queue_len_young(&inet_csk(sk)->icsk_accept_queue);
+}
+
+static inline int inet_csk_reqsk_queue_is_full(const struct sock *sk)
+{
+       return reqsk_queue_is_full(&inet_csk(sk)->icsk_accept_queue);
+}
+
+static inline void inet_csk_reqsk_queue_unlink(struct sock *sk,
+                                              struct request_sock *req,
+                                              struct request_sock **prev)
+{
+       reqsk_queue_unlink(&inet_csk(sk)->icsk_accept_queue, req, prev);
+}
+
+static inline void inet_csk_reqsk_queue_drop(struct sock *sk,
+                                            struct request_sock *req,
+                                            struct request_sock **prev)
+{
+       inet_csk_reqsk_queue_unlink(sk, req, prev);
+       inet_csk_reqsk_queue_removed(sk, req);
+       reqsk_free(req);
+}
+
+extern void inet_csk_reqsk_queue_prune(struct sock *parent,
+                                      const unsigned long interval,
+                                      const unsigned long timeout,
+                                      const unsigned long max_rto);
+
+extern void inet_csk_destroy_sock(struct sock *sk);
+
+/*
+ * LISTEN is a special case for poll..
+ */
+static inline unsigned int inet_csk_listen_poll(const struct sock *sk)
+{
+       return !reqsk_queue_empty(&inet_csk(sk)->icsk_accept_queue) ?
+                       (POLLIN | POLLRDNORM) : 0;
+}
+
+extern int  inet_csk_listen_start(struct sock *sk, const int nr_table_entries);
+extern void inet_csk_listen_stop(struct sock *sk);
+
+#endif /* _INET_CONNECTION_SOCK_H */
diff --git a/include/net/inet_hashtables.h b/include/net/inet_hashtables.h

new file mode 100644 (file)

index 0000000..646b6ea
--- /dev/null
+++ b/include/net/inet_hashtables.h
@@ -0,0 +1,427 @@
+/*
+ * INET                An implementation of the TCP/IP protocol suite for the LINUX
+ *             operating system.  INET is implemented using the BSD Socket
+ *             interface as the means of communication with the user level.
+ *
+ * Authors:    Lotsa people, from code originally in tcp
+ *
+ *     This program is free software; you can redistribute it and/or
+ *      modify it under the terms of the GNU General Public License
+ *      as published by the Free Software Foundation; either version
+ *      2 of the License, or (at your option) any later version.
+ */
+
+#ifndef _INET_HASHTABLES_H
+#define _INET_HASHTABLES_H
+
+#include <linux/config.h>
+
+#include <linux/interrupt.h>
+#include <linux/ipv6.h>
+#include <linux/list.h>
+#include <linux/slab.h>
+#include <linux/socket.h>
+#include <linux/spinlock.h>
+#include <linux/types.h>
+#include <linux/wait.h>
+
+#include <net/inet_connection_sock.h>
+#include <net/route.h>
+#include <net/sock.h>
+#include <net/tcp_states.h>
+
+#include <asm/atomic.h>
+#include <asm/byteorder.h>
+
+/* This is for all connections with a full identity, no wildcards.
+ * New scheme, half the table is for TIME_WAIT, the other half is
+ * for the rest.  I'll experiment with dynamic table growth later.
+ */
+struct inet_ehash_bucket {
+       rwlock_t          lock;
+       struct hlist_head chain;
+} __attribute__((__aligned__(8)));
+
+/* There are a few simple rules, which allow for local port reuse by
+ * an application.  In essence:
+ *
+ *     1) Sockets bound to different interfaces may share a local port.
+ *        Failing that, goto test 2.
+ *     2) If all sockets have sk->sk_reuse set, and none of them are in
+ *        TCP_LISTEN state, the port may be shared.
+ *        Failing that, goto test 3.
+ *     3) If all sockets are bound to a specific inet_sk(sk)->rcv_saddr local
+ *        address, and none of them are the same, the port may be
+ *        shared.
+ *        Failing this, the port cannot be shared.
+ *
+ * The interesting point, is test #2.  This is what an FTP server does
+ * all day.  To optimize this case we use a specific flag bit defined
+ * below.  As we add sockets to a bind bucket list, we perform a
+ * check of: (newsk->sk_reuse && (newsk->sk_state != TCP_LISTEN))
+ * As long as all sockets added to a bind bucket pass this test,
+ * the flag bit will be set.
+ * The resulting situation is that tcp_v[46]_verify_bind() can just check
+ * for this flag bit, if it is set and the socket trying to bind has
+ * sk->sk_reuse set, we don't even have to walk the owners list at all,
+ * we return that it is ok to bind this socket to the requested local port.
+ *
+ * Sounds like a lot of work, but it is worth it.  In a more naive
+ * implementation (ie. current FreeBSD etc.) the entire list of ports
+ * must be walked for each data port opened by an ftp server.  Needless
+ * to say, this does not scale at all.  With a couple thousand FTP
+ * users logged onto your box, isn't it nice to know that new data
+ * ports are created in O(1) time?  I thought so. ;-)  -DaveM
+ */
+struct inet_bind_bucket {
+       unsigned short          port;
+       signed short            fastreuse;
+       struct hlist_node       node;
+       struct hlist_head       owners;
+};
+
+#define inet_bind_bucket_for_each(tb, node, head) \
+       hlist_for_each_entry(tb, node, head, node)
+
+struct inet_bind_hashbucket {
+       spinlock_t              lock;
+       struct hlist_head       chain;
+};
+
+/* This is for listening sockets, thus all sockets which possess wildcards. */
+#define INET_LHTABLE_SIZE      32      /* Yes, really, this is all you need. */
+
+struct inet_hashinfo {
+       /* This is for sockets with full identity only.  Sockets here will
+        * always be without wildcards and will have the following invariant:
+        *
+        *          TCP_ESTABLISHED <= sk->sk_state < TCP_CLOSE
+        *
+        * First half of the table is for sockets not in TIME_WAIT, second half
+        * is for TIME_WAIT sockets only.
+        */
+       struct inet_ehash_bucket        *ehash;
+
+       /* Ok, let's try this, I give up, we do need a local binding
+        * TCP hash as well as the others for fast bind/connect.
+        */
+       struct inet_bind_hashbucket     *bhash;
+
+       int                             bhash_size;
+       int                             ehash_size;
+
+       /* All sockets in TCP_LISTEN state will be in here.  This is the only
+        * table where wildcard'd TCP sockets can exist.  Hash function here
+        * is just local port number.
+        */
+       struct hlist_head               listening_hash[INET_LHTABLE_SIZE];
+
+       /* All the above members are written once at bootup and
+        * never written again _or_ are predominantly read-access.
+        *
+        * Now align to a new cache line as all the following members
+        * are often dirty.
+        */
+       rwlock_t                        lhash_lock ____cacheline_aligned;
+       atomic_t                        lhash_users;
+       wait_queue_head_t               lhash_wait;
+       spinlock_t                      portalloc_lock;
+       kmem_cache_t                    *bind_bucket_cachep;
+       int                             port_rover;
+};
+
+static inline int inet_ehashfn(const __u32 laddr, const __u16 lport,
+                              const __u32 faddr, const __u16 fport,
+                              const int ehash_size)
+{
+       int h = (laddr ^ lport) ^ (faddr ^ fport);
+       h ^= h >> 16;
+       h ^= h >> 8;
+       return h & (ehash_size - 1);
+}
+
+static inline int inet_sk_ehashfn(const struct sock *sk, const int ehash_size)
+{
+       const struct inet_sock *inet = inet_sk(sk);
+       const __u32 laddr = inet->rcv_saddr;
+       const __u16 lport = inet->num;
+       const __u32 faddr = inet->daddr;
+       const __u16 fport = inet->dport;
+
+       return inet_ehashfn(laddr, lport, faddr, fport, ehash_size);
+}
+
+extern struct inet_bind_bucket *
+                   inet_bind_bucket_create(kmem_cache_t *cachep,
+                                           struct inet_bind_hashbucket *head,
+                                           const unsigned short snum);
+extern void inet_bind_bucket_destroy(kmem_cache_t *cachep,
+                                    struct inet_bind_bucket *tb);
+
+static inline int inet_bhashfn(const __u16 lport, const int bhash_size)
+{
+       return lport & (bhash_size - 1);
+}
+
+extern void inet_bind_hash(struct sock *sk, struct inet_bind_bucket *tb,
+                          const unsigned short snum);
+
+/* These can have wildcards, don't try too hard. */
+static inline int inet_lhashfn(const unsigned short num)
+{
+       return num & (INET_LHTABLE_SIZE - 1);
+}
+
+static inline int inet_sk_listen_hashfn(const struct sock *sk)
+{
+       return inet_lhashfn(inet_sk(sk)->num);
+}
+
+/* Caller must disable local BH processing. */
+static inline void __inet_inherit_port(struct inet_hashinfo *table,
+                                      struct sock *sk, struct sock *child)
+{
+       const int bhash = inet_bhashfn(inet_sk(child)->num, table->bhash_size);
+       struct inet_bind_hashbucket *head = &table->bhash[bhash];
+       struct inet_bind_bucket *tb;
+
+       spin_lock(&head->lock);
+       tb = inet_csk(sk)->icsk_bind_hash;
+       sk_add_bind_node(child, &tb->owners);
+       inet_csk(child)->icsk_bind_hash = tb;
+       spin_unlock(&head->lock);
+}
+
+static inline void inet_inherit_port(struct inet_hashinfo *table,
+                                    struct sock *sk, struct sock *child)
+{
+       local_bh_disable();
+       __inet_inherit_port(table, sk, child);
+       local_bh_enable();
+}
+
+extern void inet_put_port(struct inet_hashinfo *table, struct sock *sk);
+
+extern void inet_listen_wlock(struct inet_hashinfo *hashinfo);
+
+/*
+ * - We may sleep inside this lock.
+ * - If sleeping is not required (or called from BH),
+ *   use plain read_(un)lock(&inet_hashinfo.lhash_lock).
+ */
+static inline void inet_listen_lock(struct inet_hashinfo *hashinfo)
+{
+       /* read_lock synchronizes to candidates to writers */
+       read_lock(&hashinfo->lhash_lock);
+       atomic_inc(&hashinfo->lhash_users);
+       read_unlock(&hashinfo->lhash_lock);
+}
+
+static inline void inet_listen_unlock(struct inet_hashinfo *hashinfo)
+{
+       if (atomic_dec_and_test(&hashinfo->lhash_users))
+               wake_up(&hashinfo->lhash_wait);
+}
+
+static inline void __inet_hash(struct inet_hashinfo *hashinfo,
+                              struct sock *sk, const int listen_possible)
+{
+       struct hlist_head *list;
+       rwlock_t *lock;
+
+       BUG_TRAP(sk_unhashed(sk));
+       if (listen_possible && sk->sk_state == TCP_LISTEN) {
+               list = &hashinfo->listening_hash[inet_sk_listen_hashfn(sk)];
+               lock = &hashinfo->lhash_lock;
+               inet_listen_wlock(hashinfo);
+       } else {
+               sk->sk_hashent = inet_sk_ehashfn(sk, hashinfo->ehash_size);
+               list = &hashinfo->ehash[sk->sk_hashent].chain;
+               lock = &hashinfo->ehash[sk->sk_hashent].lock;
+               write_lock(lock);
+       }
+       __sk_add_node(sk, list);
+       sock_prot_inc_use(sk->sk_prot);
+       write_unlock(lock);
+       if (listen_possible && sk->sk_state == TCP_LISTEN)
+               wake_up(&hashinfo->lhash_wait);
+}
+
+static inline void inet_hash(struct inet_hashinfo *hashinfo, struct sock *sk)
+{
+       if (sk->sk_state != TCP_CLOSE) {
+               local_bh_disable();
+               __inet_hash(hashinfo, sk, 1);
+               local_bh_enable();
+       }
+}
+
+static inline void inet_unhash(struct inet_hashinfo *hashinfo, struct sock *sk)
+{
+       rwlock_t *lock;
+
+       if (sk_unhashed(sk))
+               goto out;
+
+       if (sk->sk_state == TCP_LISTEN) {
+               local_bh_disable();
+               inet_listen_wlock(hashinfo);
+               lock = &hashinfo->lhash_lock;
+       } else {
+               struct inet_ehash_bucket *head = &hashinfo->ehash[sk->sk_hashent];
+               lock = &head->lock;
+               write_lock_bh(&head->lock);
+       }
+
+       if (__sk_del_node_init(sk))
+               sock_prot_dec_use(sk->sk_prot);
+       write_unlock_bh(lock);
+out:
+       if (sk->sk_state == TCP_LISTEN)
+               wake_up(&hashinfo->lhash_wait);
+}
+
+static inline int inet_iif(const struct sk_buff *skb)
+{
+       return ((struct rtable *)skb->dst)->rt_iif;
+}
+
+extern struct sock *__inet_lookup_listener(const struct hlist_head *head,
+                                          const u32 daddr,
+                                          const unsigned short hnum,
+                                          const int dif);
+
+/* Optimize the common listener case. */
+static inline struct sock *
+               inet_lookup_listener(struct inet_hashinfo *hashinfo,
+                                    const u32 daddr,
+                                    const unsigned short hnum, const int dif)
+{
+       struct sock *sk = NULL;
+       const struct hlist_head *head;
+
+       read_lock(&hashinfo->lhash_lock);
+       head = &hashinfo->listening_hash[inet_lhashfn(hnum)];
+       if (!hlist_empty(head)) {
+               const struct inet_sock *inet = inet_sk((sk = __sk_head(head)));
+
+               if (inet->num == hnum && !sk->sk_node.next &&
+                   (!inet->rcv_saddr || inet->rcv_saddr == daddr) &&
+                   (sk->sk_family == PF_INET || !ipv6_only_sock(sk)) &&
+                   !sk->sk_bound_dev_if)
+                       goto sherry_cache;
+               sk = __inet_lookup_listener(head, daddr, hnum, dif);
+       }
+       if (sk) {
+sherry_cache:
+               sock_hold(sk);
+       }
+       read_unlock(&hashinfo->lhash_lock);
+       return sk;
+}
+
+/* Socket demux engine toys. */
+#ifdef __BIG_ENDIAN
+#define INET_COMBINED_PORTS(__sport, __dport) \
+       (((__u32)(__sport) << 16) | (__u32)(__dport))
+#else /* __LITTLE_ENDIAN */
+#define INET_COMBINED_PORTS(__sport, __dport) \
+       (((__u32)(__dport) << 16) | (__u32)(__sport))
+#endif
+
+#if (BITS_PER_LONG == 64)
+#ifdef __BIG_ENDIAN
+#define INET_ADDR_COOKIE(__name, __saddr, __daddr) \
+       const __u64 __name = (((__u64)(__saddr)) << 32) | ((__u64)(__daddr));
+#else /* __LITTLE_ENDIAN */
+#define INET_ADDR_COOKIE(__name, __saddr, __daddr) \
+       const __u64 __name = (((__u64)(__daddr)) << 32) | ((__u64)(__saddr));
+#endif /* __BIG_ENDIAN */
+#define INET_MATCH(__sk, __cookie, __saddr, __daddr, __ports, __dif)\
+       (((*((__u64 *)&(inet_sk(__sk)->daddr))) == (__cookie))  &&      \
+        ((*((__u32 *)&(inet_sk(__sk)->dport))) == (__ports))   &&      \
+        (!((__sk)->sk_bound_dev_if) || ((__sk)->sk_bound_dev_if == (__dif))))
+#define INET_TW_MATCH(__sk, __cookie, __saddr, __daddr, __ports, __dif)\
+       (((*((__u64 *)&(inet_twsk(__sk)->tw_daddr))) == (__cookie)) &&  \
+        ((*((__u32 *)&(inet_twsk(__sk)->tw_dport))) == (__ports)) &&   \
+        (!((__sk)->sk_bound_dev_if) || ((__sk)->sk_bound_dev_if == (__dif))))
+#else /* 32-bit arch */
+#define INET_ADDR_COOKIE(__name, __saddr, __daddr)
+#define INET_MATCH(__sk, __cookie, __saddr, __daddr, __ports, __dif)   \
+       ((inet_sk(__sk)->daddr          == (__saddr))           &&      \
+        (inet_sk(__sk)->rcv_saddr      == (__daddr))           &&      \
+        ((*((__u32 *)&(inet_sk(__sk)->dport))) == (__ports))   &&      \
+        (!((__sk)->sk_bound_dev_if) || ((__sk)->sk_bound_dev_if == (__dif))))
+#define INET_TW_MATCH(__sk, __cookie, __saddr, __daddr, __ports, __dif)        \
+       ((inet_twsk(__sk)->tw_daddr     == (__saddr))           &&      \
+        (inet_twsk(__sk)->tw_rcv_saddr == (__daddr))           &&      \
+        ((*((__u32 *)&(inet_twsk(__sk)->tw_dport))) == (__ports)) &&   \
+        (!((__sk)->sk_bound_dev_if) || ((__sk)->sk_bound_dev_if == (__dif))))
+#endif /* 64-bit arch */
+
+/*
+ * Sockets in TCP_CLOSE state are _always_ taken out of the hash, so we need
+ * not check it for lookups anymore, thanks Alexey. -DaveM
+ *
+ * Local BH must be disabled here.
+ */
+static inline struct sock *
+       __inet_lookup_established(struct inet_hashinfo *hashinfo,
+                                 const u32 saddr, const u16 sport,
+                                 const u32 daddr, const u16 hnum,
+                                 const int dif)
+{
+       INET_ADDR_COOKIE(acookie, saddr, daddr)
+       const __u32 ports = INET_COMBINED_PORTS(sport, hnum);
+       struct sock *sk;
+       const struct hlist_node *node;
+       /* Optimize here for direct hit, only listening connections can
+        * have wildcards anyways.
+        */
+       const int hash = inet_ehashfn(daddr, hnum, saddr, sport, hashinfo->ehash_size);
+       struct inet_ehash_bucket *head = &hashinfo->ehash[hash];
+
+       read_lock(&head->lock);
+       sk_for_each(sk, node, &head->chain) {
+               if (INET_MATCH(sk, acookie, saddr, daddr, ports, dif))
+                       goto hit; /* You sunk my battleship! */
+       }
+
+       /* Must check for a TIME_WAIT'er before going to listener hash. */
+       sk_for_each(sk, node, &(head + hashinfo->ehash_size)->chain) {
+               if (INET_TW_MATCH(sk, acookie, saddr, daddr, ports, dif))
+                       goto hit;
+       }
+       sk = NULL;
+out:
+       read_unlock(&head->lock);
+       return sk;
+hit:
+       sock_hold(sk);
+       goto out;
+}
+
+static inline struct sock *__inet_lookup(struct inet_hashinfo *hashinfo,
+                                        const u32 saddr, const u16 sport,
+                                        const u32 daddr, const u16 hnum,
+                                        const int dif)
+{
+       struct sock *sk = __inet_lookup_established(hashinfo, saddr, sport, daddr,
+                                                   hnum, dif);
+       return sk ? : inet_lookup_listener(hashinfo, daddr, hnum, dif);
+}
+
+static inline struct sock *inet_lookup(struct inet_hashinfo *hashinfo,
+                                      const u32 saddr, const u16 sport,
+                                      const u32 daddr, const u16 dport,
+                                      const int dif)
+{
+       struct sock *sk;
+
+       local_bh_disable();
+       sk = __inet_lookup(hashinfo, saddr, sport, daddr, ntohs(dport), dif);
+       local_bh_enable();
+
+       return sk;
+}
+#endif /* _INET_HASHTABLES_H */
diff --git a/include/net/inet_timewait_sock.h b/include/net/inet_timewait_sock.h

new file mode 100644 (file)

index 0000000..3b07035
--- /dev/null
+++ b/include/net/inet_timewait_sock.h
@@ -0,0 +1,219 @@
+/*
+ * INET                An implementation of the TCP/IP protocol suite for the LINUX
+ *             operating system.  INET is implemented using the  BSD Socket
+ *             interface as the means of communication with the user level.
+ *
+ *             Definitions for a generic INET TIMEWAIT sock
+ *
+ *             From code originally in net/tcp.h
+ *
+ *             This program is free software; you can redistribute it and/or
+ *             modify it under the terms of the GNU General Public License
+ *             as published by the Free Software Foundation; either version
+ *             2 of the License, or (at your option) any later version.
+ */
+#ifndef _INET_TIMEWAIT_SOCK_
+#define _INET_TIMEWAIT_SOCK_
+
+#include <linux/config.h>
+
+#include <linux/ip.h>
+#include <linux/list.h>
+#include <linux/timer.h>
+#include <linux/types.h>
+#include <linux/workqueue.h>
+
+#include <net/sock.h>
+#include <net/tcp_states.h>
+
+#include <asm/atomic.h>
+
+struct inet_hashinfo;
+
+#define INET_TWDR_RECYCLE_SLOTS_LOG    5
+#define INET_TWDR_RECYCLE_SLOTS                (1 << INET_TWDR_RECYCLE_SLOTS_LOG)
+
+/*
+ * If time > 4sec, it is "slow" path, no recycling is required,
+ * so that we select tick to get range about 4 seconds.
+ */
+#if HZ <= 16 || HZ > 4096
+# error Unsupported: HZ <= 16 or HZ > 4096
+#elif HZ <= 32
+# define INET_TWDR_RECYCLE_TICK (5 + 2 - INET_TWDR_RECYCLE_SLOTS_LOG)
+#elif HZ <= 64
+# define INET_TWDR_RECYCLE_TICK (6 + 2 - INET_TWDR_RECYCLE_SLOTS_LOG)
+#elif HZ <= 128
+# define INET_TWDR_RECYCLE_TICK (7 + 2 - INET_TWDR_RECYCLE_SLOTS_LOG)
+#elif HZ <= 256
+# define INET_TWDR_RECYCLE_TICK (8 + 2 - INET_TWDR_RECYCLE_SLOTS_LOG)
+#elif HZ <= 512
+# define INET_TWDR_RECYCLE_TICK (9 + 2 - INET_TWDR_RECYCLE_SLOTS_LOG)
+#elif HZ <= 1024
+# define INET_TWDR_RECYCLE_TICK (10 + 2 - INET_TWDR_RECYCLE_SLOTS_LOG)
+#elif HZ <= 2048
+# define INET_TWDR_RECYCLE_TICK (11 + 2 - INET_TWDR_RECYCLE_SLOTS_LOG)
+#else
+# define INET_TWDR_RECYCLE_TICK (12 + 2 - INET_TWDR_RECYCLE_SLOTS_LOG)
+#endif
+
+/* TIME_WAIT reaping mechanism. */
+#define INET_TWDR_TWKILL_SLOTS 8 /* Please keep this a power of 2. */
+
+#define INET_TWDR_TWKILL_QUOTA 100
+
+struct inet_timewait_death_row {
+       /* Short-time timewait calendar */
+       int                     twcal_hand;
+       int                     twcal_jiffie;
+       struct timer_list       twcal_timer;
+       struct hlist_head       twcal_row[INET_TWDR_RECYCLE_SLOTS];
+
+       spinlock_t              death_lock;
+       int                     tw_count;
+       int                     period;
+       u32                     thread_slots;
+       struct work_struct      twkill_work;
+       struct timer_list       tw_timer;
+       int                     slot;
+       struct hlist_head       cells[INET_TWDR_TWKILL_SLOTS];
+       struct inet_hashinfo    *hashinfo;
+       int                     sysctl_tw_recycle;
+       int                     sysctl_max_tw_buckets;
+};
+
+extern void inet_twdr_hangman(unsigned long data);
+extern void inet_twdr_twkill_work(void *data);
+extern void inet_twdr_twcal_tick(unsigned long data);
+
+#if (BITS_PER_LONG == 64)
+#define INET_TIMEWAIT_ADDRCMP_ALIGN_BYTES 8
+#else
+#define INET_TIMEWAIT_ADDRCMP_ALIGN_BYTES 4
+#endif
+
+struct inet_bind_bucket;
+
+/*
+ * This is a TIME_WAIT sock. It works around the memory consumption
+ * problems of sockets in such a state on heavily loaded servers, but
+ * without violating the protocol specification.
+ */
+struct inet_timewait_sock {
+       /*
+        * Now struct sock also uses sock_common, so please just
+        * don't add nothing before this first member (__tw_common) --acme
+        */
+       struct sock_common      __tw_common;
+#define tw_family              __tw_common.skc_family
+#define tw_state               __tw_common.skc_state
+#define tw_reuse               __tw_common.skc_reuse
+#define tw_bound_dev_if                __tw_common.skc_bound_dev_if
+#define tw_node                        __tw_common.skc_node
+#define tw_bind_node           __tw_common.skc_bind_node
+#define tw_refcnt              __tw_common.skc_refcnt
+#define tw_prot                        __tw_common.skc_prot
+       volatile unsigned char  tw_substate;
+       /* 3 bits hole, try to pack */
+       unsigned char           tw_rcv_wscale;
+       /* Socket demultiplex comparisons on incoming packets. */
+       /* these five are in inet_sock */
+       __u16                   tw_sport;
+       __u32                   tw_daddr __attribute__((aligned(INET_TIMEWAIT_ADDRCMP_ALIGN_BYTES)));
+       __u32                   tw_rcv_saddr;
+       __u16                   tw_dport;
+       __u16                   tw_num;
+       /* And these are ours. */
+       __u8                    tw_ipv6only:1;
+       /* 31 bits hole, try to pack */
+       int                     tw_hashent;
+       int                     tw_timeout;
+       unsigned long           tw_ttd;
+       struct inet_bind_bucket *tw_tb;
+       struct hlist_node       tw_death_node;
+};
+
+static inline void inet_twsk_add_node(struct inet_timewait_sock *tw,
+                                     struct hlist_head *list)
+{
+       hlist_add_head(&tw->tw_node, list);
+}
+
+static inline void inet_twsk_add_bind_node(struct inet_timewait_sock *tw,
+                                          struct hlist_head *list)
+{
+       hlist_add_head(&tw->tw_bind_node, list);
+}
+
+static inline int inet_twsk_dead_hashed(const struct inet_timewait_sock *tw)
+{
+       return tw->tw_death_node.pprev != NULL;
+}
+
+static inline void inet_twsk_dead_node_init(struct inet_timewait_sock *tw)
+{
+       tw->tw_death_node.pprev = NULL;
+}
+
+static inline void __inet_twsk_del_dead_node(struct inet_timewait_sock *tw)
+{
+       __hlist_del(&tw->tw_death_node);
+       inet_twsk_dead_node_init(tw);
+}
+
+static inline int inet_twsk_del_dead_node(struct inet_timewait_sock *tw)
+{
+       if (inet_twsk_dead_hashed(tw)) {
+               __inet_twsk_del_dead_node(tw);
+               return 1;
+       }
+       return 0;
+}
+
+#define inet_twsk_for_each(tw, node, head) \
+       hlist_for_each_entry(tw, node, head, tw_node)
+
+#define inet_twsk_for_each_inmate(tw, node, jail) \
+       hlist_for_each_entry(tw, node, jail, tw_death_node)
+
+#define inet_twsk_for_each_inmate_safe(tw, node, safe, jail) \
+       hlist_for_each_entry_safe(tw, node, safe, jail, tw_death_node)
+
+static inline struct inet_timewait_sock *inet_twsk(const struct sock *sk)
+{
+       return (struct inet_timewait_sock *)sk;
+}
+
+static inline u32 inet_rcv_saddr(const struct sock *sk)
+{
+       return likely(sk->sk_state != TCP_TIME_WAIT) ?
+               inet_sk(sk)->rcv_saddr : inet_twsk(sk)->tw_rcv_saddr;
+}
+
+static inline void inet_twsk_put(struct inet_timewait_sock *tw)
+{
+       if (atomic_dec_and_test(&tw->tw_refcnt)) {
+#ifdef SOCK_REFCNT_DEBUG
+               printk(KERN_DEBUG "%s timewait_sock %p released\n",
+                      tw->tw_prot->name, tw);
+#endif
+               kmem_cache_free(tw->tw_prot->twsk_slab, tw);
+       }
+}
+
+extern struct inet_timewait_sock *inet_twsk_alloc(const struct sock *sk,
+                                                 const int state);
+
+extern void __inet_twsk_kill(struct inet_timewait_sock *tw,
+                            struct inet_hashinfo *hashinfo);
+
+extern void __inet_twsk_hashdance(struct inet_timewait_sock *tw,
+                                 struct sock *sk,
+                                 struct inet_hashinfo *hashinfo);
+
+extern void inet_twsk_schedule(struct inet_timewait_sock *tw,
+                              struct inet_timewait_death_row *twdr,
+                              const int timeo, const int timewait_len);
+extern void inet_twsk_deschedule(struct inet_timewait_sock *tw,
+                                struct inet_timewait_death_row *twdr);
+#endif /* _INET_TIMEWAIT_SOCK_ */
diff --git a/include/net/ip.h b/include/net/ip.h

index 32360bbe143faebcefe80eb9ae7a43d19585680c..e4563bbee6ea2baab2d6e61571dc2bd78cc1502e 100644 (file)
--- a/include/net/ip.h
+++ b/include/net/ip.h
@@ -86,7 +86,7 @@ extern int            ip_build_and_send_pkt(struct sk_buff *skb, struct sock *sk,
                                               u32 saddr, u32 daddr,
                                               struct ip_options *opt);
  extern int             ip_rcv(struct sk_buff *skb, struct net_device *dev,
-                              struct packet_type *pt);
+                              struct packet_type *pt, struct net_device *orig_dev);
  extern int             ip_local_deliver(struct sk_buff *skb);
  extern int             ip_mr_input(struct sk_buff *skb);
  extern int             ip_output(struct sk_buff *skb);
@@ -140,8 +140,6 @@ struct ip_reply_arg {
  void ip_send_reply(struct sock *sk, struct sk_buff *skb, struct ip_reply_arg *arg,
                    unsigned int len); 
  
-extern int ip_finish_output(struct sk_buff *skb);
-
  struct ipv4_config
  {
         int     log_martians;
@@ -165,6 +163,24 @@ extern int sysctl_local_port_range[2];
  extern int sysctl_ip_default_ttl;
  extern int sysctl_ip_nonlocal_bind;
  
+/* From ip_fragment.c */
+extern int sysctl_ipfrag_high_thresh; 
+extern int sysctl_ipfrag_low_thresh;
+extern int sysctl_ipfrag_time;
+extern int sysctl_ipfrag_secret_interval;
+
+/* From inetpeer.c */
+extern int inet_peer_threshold;
+extern int inet_peer_minttl;
+extern int inet_peer_maxttl;
+extern int inet_peer_gc_mintime;
+extern int inet_peer_gc_maxtime;
+
+/* From ip_output.c */
+extern int sysctl_ip_dynaddr;
+
+extern void ipfrag_init(void);
+
  #ifdef CONFIG_INET
  /* The function in 2.2 was invalid, producing wrong result for
   * check=0xFEFF. It was noticed by Arthur Skawina _year_ ago. --ANK(000625) */
@@ -319,7 +335,10 @@ extern void ip_options_build(struct sk_buff *skb, struct ip_options *opt, u32 da
  extern int ip_options_echo(struct ip_options *dopt, struct sk_buff *skb);
  extern void ip_options_fragment(struct sk_buff *skb);
  extern int ip_options_compile(struct ip_options *opt, struct sk_buff *skb);
-extern int ip_options_get(struct ip_options **optp, unsigned char *data, int optlen, int user);
+extern int ip_options_get(struct ip_options **optp,
+                         unsigned char *data, int optlen);
+extern int ip_options_get_from_user(struct ip_options **optp,
+                                   unsigned char __user *data, int optlen);
  extern void ip_options_undo(struct ip_options * opt);
  extern void ip_forward_options(struct sk_buff *skb);
  extern int ip_options_rcv_srr(struct sk_buff *skb);
@@ -350,5 +369,10 @@ int ipv4_doint_and_flush_strategy(ctl_table *table, int __user *name, int nlen,
                                   void __user *oldval, size_t __user *oldlenp,
                                   void __user *newval, size_t newlen, 
                                   void **context);
+#ifdef CONFIG_PROC_FS
+extern int ip_misc_proc_init(void);
+#endif
+
+extern struct ctl_table ipv4_table[];
  
  #endif /* _IP_H */
diff --git a/include/net/ip6_route.h b/include/net/ip6_route.h

index f920706d526b4397288240e9e11bf6bcaa07db54..1f2e428ca364d0dcbcf3031d68892b1a75fb09b7 100644 (file)
--- a/include/net/ip6_route.h
+++ b/include/net/ip6_route.h
@@ -12,7 +12,6 @@
  #include <net/flow.h>
  #include <net/ip6_fib.h>
  #include <net/sock.h>
-#include <linux/tcp.h>
  #include <linux/ip.h>
  #include <linux/ipv6.h>
  
diff --git a/include/net/ip_fib.h b/include/net/ip_fib.h

index a4208a336ac09e58061af807e28a417f9ad1dd69..14de4ebd12113f4b31b05991159428d842cd78cf 100644 (file)
--- a/include/net/ip_fib.h
+++ b/include/net/ip_fib.h
@@ -295,4 +295,9 @@ static inline void fib_res_put(struct fib_result *res)
  #endif
  }
  
+#ifdef CONFIG_PROC_FS
+extern int  fib_proc_init(void);
+extern void fib_proc_exit(void);
+#endif
+
  #endif  /* _NET_FIB_H */
diff --git a/include/net/ip_vs.h b/include/net/ip_vs.h

index 52da5d26617a2989f4c0c7a0c017b22388582b77..7a3c43711a17a1848c44e4d02d2514f2056ca9f5 100644 (file)
--- a/include/net/ip_vs.h
+++ b/include/net/ip_vs.h
@@ -255,7 +255,6 @@ struct ip_vs_daemon_user {
  #include <asm/atomic.h>                 /* for struct atomic_t */
  #include <linux/netdevice.h>           /* for struct neighbour */
  #include <net/dst.h>                   /* for struct dst_entry */
-#include <net/tcp.h>
  #include <net/udp.h>
  #include <linux/compiler.h>
  
diff --git a/include/net/ipv6.h b/include/net/ipv6.h

index 69324465e8b357fd1d10e90eac83630319f6dac8..3203eaff4bd4b71dcdbd82fe5242c8cf22d8161c 100644 (file)
--- a/include/net/ipv6.h
+++ b/include/net/ipv6.h
@@ -104,6 +104,7 @@ struct frag_hdr {
  
  #ifdef __KERNEL__
  
+#include <linux/config.h>
  #include <net/sock.h>
  
  /* sysctls */
@@ -145,7 +146,6 @@ DECLARE_SNMP_STAT(struct udp_mib, udp_stats_in6);
  #define UDP6_INC_STATS(field)          SNMP_INC_STATS(udp_stats_in6, field)
  #define UDP6_INC_STATS_BH(field)       SNMP_INC_STATS_BH(udp_stats_in6, field)
  #define UDP6_INC_STATS_USER(field)     SNMP_INC_STATS_USER(udp_stats_in6, field)
-extern atomic_t                        inet6_sock_nr;
  
  int snmp6_register_dev(struct inet6_dev *idev);
  int snmp6_unregister_dev(struct inet6_dev *idev);
@@ -346,7 +346,8 @@ static inline int ipv6_addr_any(const struct in6_addr *a)
  
  extern int                     ipv6_rcv(struct sk_buff *skb, 
                                          struct net_device *dev, 
-                                        struct packet_type *pt);
+                                        struct packet_type *pt,
+                                        struct net_device *orig_dev);
  
  /*
   *     upper-layer output functions
@@ -464,8 +465,38 @@ extern int sysctl_ip6frag_low_thresh;
  extern int sysctl_ip6frag_time;
  extern int sysctl_ip6frag_secret_interval;
  
-#endif /* __KERNEL__ */
-#endif /* _NET_IPV6_H */
+extern struct proto_ops inet6_stream_ops;
+extern struct proto_ops inet6_dgram_ops;
+
+extern int ip6_mc_source(int add, int omode, struct sock *sk,
+                        struct group_source_req *pgsr);
+extern int ip6_mc_msfilter(struct sock *sk, struct group_filter *gsf);
+extern int ip6_mc_msfget(struct sock *sk, struct group_filter *gsf,
+                        struct group_filter __user *optval,
+                        int __user *optlen);
+
+#ifdef CONFIG_PROC_FS
+extern int  ac6_proc_init(void);
+extern void ac6_proc_exit(void);
+extern int  raw6_proc_init(void);
+extern void raw6_proc_exit(void);
+extern int  tcp6_proc_init(void);
+extern void tcp6_proc_exit(void);
+extern int  udp6_proc_init(void);
+extern void udp6_proc_exit(void);
+extern int  ipv6_misc_proc_init(void);
+extern void ipv6_misc_proc_exit(void);
+
+extern struct rt6_statistics rt6_stats;
+#endif
  
+#ifdef CONFIG_SYSCTL
+extern ctl_table ipv6_route_table[];
+extern ctl_table ipv6_icmp_table[];
  
+extern void ipv6_sysctl_register(void);
+extern void ipv6_sysctl_unregister(void);
+#endif
  
+#endif /* __KERNEL__ */
+#endif /* _NET_IPV6_H */
diff --git a/include/net/llc.h b/include/net/llc.h

index c9aed2a8b4e20590d5f882e1150e4c714b8e0afa..71769a5aeef3b929afd6b8dd0aa9fdab6f3c7f13 100644 (file)
--- a/include/net/llc.h
+++ b/include/net/llc.h
@@ -46,7 +46,8 @@ struct llc_sap {
         unsigned char    f_bit;
         int              (*rcv_func)(struct sk_buff *skb,
                                      struct net_device *dev,
-                                    struct packet_type *pt);
+                                    struct packet_type *pt,
+                                    struct net_device *orig_dev);
         struct llc_addr  laddr;
         struct list_head node;
         struct {
@@ -64,7 +65,7 @@ extern rwlock_t llc_sap_list_lock;
  extern unsigned char llc_station_mac_sa[ETH_ALEN];
  
  extern int llc_rcv(struct sk_buff *skb, struct net_device *dev,
-                  struct packet_type *pt);
+                  struct packet_type *pt, struct net_device *orig_dev);
  
  extern int llc_mac_hdr_init(struct sk_buff *skb,
                             unsigned char *sa, unsigned char *da);
@@ -78,7 +79,8 @@ extern void llc_set_station_handler(void (*handler)(struct sk_buff *skb));
  extern struct llc_sap *llc_sap_open(unsigned char lsap,
                                     int (*rcv)(struct sk_buff *skb,
                                                struct net_device *dev,
-                                              struct packet_type *pt));
+                                              struct packet_type *pt,
+                                              struct net_device *orig_dev));
  extern void llc_sap_close(struct llc_sap *sap);
  
  extern struct llc_sap *llc_sap_find(unsigned char sap_value);
diff --git a/include/net/neighbour.h b/include/net/neighbour.h

index 89809891e5ab2e8e9430ccac31255d22ee364231..34c07731933db95b38cbb05a4db4ccefde2d6d7e 100644 (file)
--- a/include/net/neighbour.h
+++ b/include/net/neighbour.h
@@ -363,7 +363,14 @@ __neigh_lookup_errno(struct neigh_table *tbl, const void *pkey,
         return neigh_create(tbl, pkey, dev);
  }
  
-#define LOCALLY_ENQUEUED -2
+struct neighbour_cb {
+       unsigned long sched_next;
+       unsigned int flags;
+};
+
+#define LOCALLY_ENQUEUED 0x1
+
+#define NEIGH_CB(skb)  ((struct neighbour_cb *)(skb)->cb)
  
  #endif
  #endif
diff --git a/include/net/p8022.h b/include/net/p8022.h

index 3c99a86c35812da1da1dc34c9cf424386a784f29..42e9fac51b3115f86d0a766d8d19c90cc812ac0e 100644 (file)
--- a/include/net/p8022.h
+++ b/include/net/p8022.h
@@ -4,7 +4,10 @@ extern struct datalink_proto *
         register_8022_client(unsigned char type,
                              int (*func)(struct sk_buff *skb,
                                          struct net_device *dev,
-                                        struct packet_type *pt));
+                                        struct packet_type *pt,
+                                        struct net_device *orig_dev));
  extern void unregister_8022_client(struct datalink_proto *proto);
  
+extern struct datalink_proto *make_8023_client(void);
+extern void destroy_8023_client(struct datalink_proto *dl);
  #endif
diff --git a/include/net/pkt_cls.h b/include/net/pkt_cls.h

index 4abda6aec05a8cc4ca2cde35d6115427d71044fc..b902d24a32563f424d0934a6fb04d504a0b1e3ca 100644 (file)
--- a/include/net/pkt_cls.h
+++ b/include/net/pkt_cls.h
@@ -352,10 +352,10 @@ tcf_change_indev(struct tcf_proto *tp, char *indev, struct rtattr *indev_tlv)
  static inline int
  tcf_match_indev(struct sk_buff *skb, char *indev)
  {
-       if (0 != indev[0]) {
-               if  (NULL == skb->input_dev)
+       if (indev[0]) {
+               if  (!skb->input_dev)
                         return 0;
-               else if (0 != strcmp(indev, skb->input_dev->name))
+               if (strcmp(indev, skb->input_dev->name))
                         return 0;
         }
  
diff --git a/include/net/psnap.h b/include/net/psnap.h

index 9c94e8f98b36631a92477697defe5690d06b8f1b..b2e01cc3fc8a1c892bad42efc008d0c2ba80aa87 100644 (file)
--- a/include/net/psnap.h
+++ b/include/net/psnap.h
@@ -1,7 +1,7 @@
  #ifndef _NET_PSNAP_H
  #define _NET_PSNAP_H
  
-extern struct datalink_proto *register_snap_client(unsigned char *desc, int (*rcvfunc)(struct sk_buff *, struct net_device *, struct packet_type *));
+extern struct datalink_proto *register_snap_client(unsigned char *desc, int (*rcvfunc)(struct sk_buff *, struct net_device *, struct packet_type *, struct net_device *orig_dev));
  extern void unregister_snap_client(struct datalink_proto *proto);
  
  #endif
diff --git a/include/net/raw.h b/include/net/raw.h

index 1c411c45587a6a11862a6394fd8e6fa04c8391b4..f47917469b12121c9b3d770514f474ae5bf7a9d7 100644 (file)
--- a/include/net/raw.h
+++ b/include/net/raw.h
@@ -17,10 +17,10 @@
  #ifndef _RAW_H
  #define _RAW_H
  
+#include <linux/config.h>
  
  extern struct proto raw_prot;
  
-
  extern void    raw_err(struct sock *, struct sk_buff *, u32 info);
  extern int     raw_rcv(struct sock *, struct sk_buff *);
  
@@ -37,6 +37,11 @@ extern struct sock *__raw_v4_lookup(struct sock *sk, unsigned short num,
                                     unsigned long raddr, unsigned long laddr,
                                     int dif);
  
-extern void raw_v4_input(struct sk_buff *skb, struct iphdr *iph, int hash);
+extern int raw_v4_input(struct sk_buff *skb, struct iphdr *iph, int hash);
+
+#ifdef CONFIG_PROC_FS
+extern int  raw_proc_init(void);
+extern void raw_proc_exit(void);
+#endif
  
  #endif /* _RAW_H */
diff --git a/include/net/rawv6.h b/include/net/rawv6.h

index 23fd9a6a221a2094546fe66c7a17b619a906d0c3..14476a71725e56467fe934611d3ec2f7e659abb6 100644 (file)
--- a/include/net/rawv6.h
+++ b/include/net/rawv6.h
@@ -7,10 +7,11 @@
  extern struct hlist_head raw_v6_htable[RAWV6_HTABLE_SIZE];
  extern rwlock_t raw_v6_lock;
  
-extern void ipv6_raw_deliver(struct sk_buff *skb, int nexthdr);
+extern int ipv6_raw_deliver(struct sk_buff *skb, int nexthdr);
  
  extern struct sock *__raw_v6_lookup(struct sock *sk, unsigned short num,
-                                   struct in6_addr *loc_addr, struct in6_addr *rmt_addr);
+                                   struct in6_addr *loc_addr, struct in6_addr *rmt_addr,
+                                   int dif);
  
  extern int                     rawv6_rcv(struct sock *sk,
                                           struct sk_buff *skb);
diff --git a/include/net/request_sock.h b/include/net/request_sock.h

index 72fd6f5e86b19bbb1fc2444620d4764456469403..b52cc52ffe39f1475488c0d904753519a6df531f 100644 (file)
--- a/include/net/request_sock.h
+++ b/include/net/request_sock.h
@@ -89,6 +89,7 @@ struct listen_sock {
         int                     qlen_young;
         int                     clock_hand;
         u32                     hash_rnd;
+       u32                     nr_table_entries;
         struct request_sock     *syn_table[0];
  };
  
@@ -96,6 +97,7 @@ struct listen_sock {
   *
   * @rskq_accept_head - FIFO head of established children
   * @rskq_accept_tail - FIFO tail of established children
+ * @rskq_defer_accept - User waits for some data after accept()
   * @syn_wait_lock - serializer
   *
   * %syn_wait_lock is necessary only to avoid proc interface having to grab the main
@@ -111,6 +113,8 @@ struct request_sock_queue {
         struct request_sock     *rskq_accept_head;
         struct request_sock     *rskq_accept_tail;
         rwlock_t                syn_wait_lock;
+       u8                      rskq_defer_accept;
+       /* 3 bytes hole, try to pack */
         struct listen_sock      *listen_opt;
  };
  
@@ -129,11 +133,13 @@ static inline struct listen_sock *reqsk_queue_yank_listen_sk(struct request_sock
         return lopt;
  }
  
-static inline void reqsk_queue_destroy(struct request_sock_queue *queue)
+static inline void __reqsk_queue_destroy(struct request_sock_queue *queue)
  {
         kfree(reqsk_queue_yank_listen_sk(queue));
  }
  
+extern void reqsk_queue_destroy(struct request_sock_queue *queue);
+
  static inline struct request_sock *
         reqsk_queue_yank_acceptq(struct request_sock_queue *queue)
  {
@@ -221,17 +227,17 @@ static inline int reqsk_queue_added(struct request_sock_queue *queue)
         return prev_qlen;
  }
  
-static inline int reqsk_queue_len(struct request_sock_queue *queue)
+static inline int reqsk_queue_len(const struct request_sock_queue *queue)
  {
         return queue->listen_opt != NULL ? queue->listen_opt->qlen : 0;
  }
  
-static inline int reqsk_queue_len_young(struct request_sock_queue *queue)
+static inline int reqsk_queue_len_young(const struct request_sock_queue *queue)
  {
         return queue->listen_opt->qlen_young;
  }
  
-static inline int reqsk_queue_is_full(struct request_sock_queue *queue)
+static inline int reqsk_queue_is_full(const struct request_sock_queue *queue)
  {
         return queue->listen_opt->qlen >> queue->listen_opt->max_qlen_log;
  }
diff --git a/include/net/route.h b/include/net/route.h

index c3cd069a9aca5f17c0c09b6b6a0f2f7a53be6fa9..dbe79ca67d317cc1410b879e0898ae6ac52a3b03 100644 (file)
--- a/include/net/route.h
+++ b/include/net/route.h
@@ -105,10 +105,6 @@ struct rt_cache_stat
          unsigned int out_hlist_search;
  };
  
-extern struct rt_cache_stat *rt_cache_stat;
-#define RT_CACHE_STAT_INC(field)                                         \
-               (per_cpu_ptr(rt_cache_stat, raw_smp_processor_id())->field++)
-
  extern struct ip_rt_acct *ip_rt_acct;
  
  struct in_device;
@@ -199,4 +195,6 @@ static inline struct inet_peer *rt_get_peer(struct rtable *rt)
         return rt->peer;
  }
  
+extern ctl_table ipv4_route_table[];
+
  #endif /* _ROUTE_H */
diff --git a/include/net/sctp/constants.h b/include/net/sctp/constants.h

index 5999e5684bbfc5136f36964fb3ebe2c0e669f9b1..c51541ee0247cea21202369d8827be0b86db3132 100644 (file)
--- a/include/net/sctp/constants.h
+++ b/include/net/sctp/constants.h
@@ -47,10 +47,10 @@
  #ifndef __sctp_constants_h__
  #define __sctp_constants_h__
  
-#include <linux/tcp.h>  /* For TCP states used in sctp_sock_state_t */
  #include <linux/sctp.h>
  #include <linux/ipv6.h> /* For ipv6hdr. */
  #include <net/sctp/user.h>
+#include <net/tcp_states.h>  /* For TCP states used in sctp_sock_state_t */
  
  /* Value used for stream negotiation. */
  enum { SCTP_MAX_STREAM = 0xffff };
diff --git a/include/net/sock.h b/include/net/sock.h

index a1042d08becd538c4bb330d32ee64cd550ca2f39..312cb25cbd18bf10bc090823aede8a2a0e373758 100644 (file)
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -88,6 +88,7 @@ do {  spin_lock_init(&((__sk)->sk_lock.slock)); \
  } while(0)
  
  struct sock;
+struct proto;
  
  /**
   *     struct sock_common - minimal network layer representation of sockets
@@ -98,10 +99,11 @@ struct sock;
   *     @skc_node: main hash linkage for various protocol lookup tables
   *     @skc_bind_node: bind hash linkage for various protocol lookup tables
   *     @skc_refcnt: reference count
+ *     @skc_prot: protocol handlers inside a network family
   *
   *     This is the minimal network layer representation of sockets, the header
- *     for struct sock and struct tcp_tw_bucket.
-  */
+ *     for struct sock and struct inet_timewait_sock.
+ */
  struct sock_common {
         unsigned short          skc_family;
         volatile unsigned char  skc_state;
@@ -110,11 +112,12 @@ struct sock_common {
         struct hlist_node       skc_node;
         struct hlist_node       skc_bind_node;
         atomic_t                skc_refcnt;
+       struct proto            *skc_prot;
  };
  
  /**
    *    struct sock - network layer representation of sockets
-  *    @__sk_common: shared layout with tcp_tw_bucket
+  *    @__sk_common: shared layout with inet_timewait_sock
    *    @sk_shutdown: mask of %SEND_SHUTDOWN and/or %RCV_SHUTDOWN
    *    @sk_userlocks: %SO_SNDBUF and %SO_RCVBUF settings
    *    @sk_lock:       synchronizer
@@ -136,11 +139,10 @@ struct sock_common {
    *    @sk_no_check: %SO_NO_CHECK setting, wether or not checkup packets
    *    @sk_route_caps: route capabilities (e.g. %NETIF_F_TSO)
    *    @sk_lingertime: %SO_LINGER l_linger setting
-  *    @sk_hashent: hash entry in several tables (e.g. tcp_ehash)
+  *    @sk_hashent: hash entry in several tables (e.g. inet_hashinfo.ehash)
    *    @sk_backlog: always used with the per-socket spinlock held
    *    @sk_callback_lock: used with the callbacks in the end of this struct
    *    @sk_error_queue: rarely used
-  *    @sk_prot: protocol handlers inside a network family
    *    @sk_prot_creator: sk_prot of original sock creator (see ipv6_setsockopt, IPV6_ADDRFORM for instance)
    *    @sk_err: last error
    *    @sk_err_soft: errors that don't cause failure but are the cause of a persistent failure not just 'timed out'
@@ -173,7 +175,7 @@ struct sock_common {
   */
  struct sock {
         /*
-        * Now struct tcp_tw_bucket also uses sock_common, so please just
+        * Now struct inet_timewait_sock also uses sock_common, so please just
          * don't add nothing before this first member (__sk_common) --acme
          */
         struct sock_common      __sk_common;
@@ -184,6 +186,7 @@ struct sock {
  #define sk_node                        __sk_common.skc_node
  #define sk_bind_node           __sk_common.skc_bind_node
  #define sk_refcnt              __sk_common.skc_refcnt
+#define sk_prot                        __sk_common.skc_prot
         unsigned char           sk_shutdown : 2,
                                 sk_no_check : 2,
                                 sk_userlocks : 4;
@@ -218,7 +221,6 @@ struct sock {
                 struct sk_buff *tail;
         } sk_backlog;
         struct sk_buff_head     sk_error_queue;
-       struct proto            *sk_prot;
         struct proto            *sk_prot_creator;
         rwlock_t                sk_callback_lock;
         int                     sk_err,
@@ -253,28 +255,28 @@ struct sock {
  /*
   * Hashed lists helper routines
   */
-static inline struct sock *__sk_head(struct hlist_head *head)
+static inline struct sock *__sk_head(const struct hlist_head *head)
  {
         return hlist_entry(head->first, struct sock, sk_node);
  }
  
-static inline struct sock *sk_head(struct hlist_head *head)
+static inline struct sock *sk_head(const struct hlist_head *head)
  {
         return hlist_empty(head) ? NULL : __sk_head(head);
  }
  
-static inline struct sock *sk_next(struct sock *sk)
+static inline struct sock *sk_next(const struct sock *sk)
  {
         return sk->sk_node.next ?
                 hlist_entry(sk->sk_node.next, struct sock, sk_node) : NULL;
  }
  
-static inline int sk_unhashed(struct sock *sk)
+static inline int sk_unhashed(const struct sock *sk)
  {
         return hlist_unhashed(&sk->sk_node);
  }
  
-static inline int sk_hashed(struct sock *sk)
+static inline int sk_hashed(const struct sock *sk)
  {
         return sk->sk_node.pprev != NULL;
  }
@@ -384,6 +386,11 @@ enum sock_flags {
         SOCK_QUEUE_SHRUNK, /* write queue has been shrunk recently */
  };
  
+static inline void sock_copy_flags(struct sock *nsk, struct sock *osk)
+{
+       nsk->sk_flags = osk->sk_flags;
+}
+
  static inline void sock_set_flag(struct sock *sk, enum sock_flags flag)
  {
         __set_bit(flag, &sk->sk_flags);
@@ -549,6 +556,10 @@ struct proto {
         kmem_cache_t            *slab;
         unsigned int            obj_size;
  
+       kmem_cache_t            *twsk_slab;
+       unsigned int            twsk_obj_size;
+       atomic_t                *orphan_count;
+
         struct request_sock_ops *rsk_prot;
  
         struct module           *owner;
@@ -556,7 +567,9 @@ struct proto {
         char                    name[32];
  
         struct list_head        node;
-
+#ifdef SOCK_REFCNT_DEBUG
+       atomic_t                socks;
+#endif
         struct {
                 int inuse;
                 u8  __pad[SMP_CACHE_BYTES - sizeof(int)];
@@ -566,6 +579,31 @@ struct proto {
  extern int proto_register(struct proto *prot, int alloc_slab);
  extern void proto_unregister(struct proto *prot);
  
+#ifdef SOCK_REFCNT_DEBUG
+static inline void sk_refcnt_debug_inc(struct sock *sk)
+{
+       atomic_inc(&sk->sk_prot->socks);
+}
+
+static inline void sk_refcnt_debug_dec(struct sock *sk)
+{
+       atomic_dec(&sk->sk_prot->socks);
+       printk(KERN_DEBUG "%s socket %p released, %d are still alive\n",
+              sk->sk_prot->name, sk, atomic_read(&sk->sk_prot->socks));
+}
+
+static inline void sk_refcnt_debug_release(const struct sock *sk)
+{
+       if (atomic_read(&sk->sk_refcnt) != 1)
+               printk(KERN_DEBUG "Destruction of the %s socket %p delayed, refcnt=%d\n",
+                      sk->sk_prot->name, sk, atomic_read(&sk->sk_refcnt));
+}
+#else /* SOCK_REFCNT_DEBUG */
+#define sk_refcnt_debug_inc(sk) do { } while (0)
+#define sk_refcnt_debug_dec(sk) do { } while (0)
+#define sk_refcnt_debug_release(sk) do { } while (0)
+#endif /* SOCK_REFCNT_DEBUG */
+
  /* Called with local bh disabled */
  static __inline__ void sock_prot_inc_use(struct proto *prot)
  {
@@ -577,6 +615,15 @@ static __inline__ void sock_prot_dec_use(struct proto *prot)
         prot->stats[smp_processor_id()].inuse--;
  }
  
+/* With per-bucket locks this operation is not-atomic, so that
+ * this version is not worse.
+ */
+static inline void __sk_prot_rehash(struct sock *sk)
+{
+       sk->sk_prot->unhash(sk);
+       sk->sk_prot->hash(sk);
+}
+
  /* About 10 seconds */
  #define SOCK_DESTROY_TIME (10*HZ)
  
@@ -688,6 +735,8 @@ extern struct sock          *sk_alloc(int family,
                                           unsigned int __nocast priority,
                                           struct proto *prot, int zero_it);
  extern void                    sk_free(struct sock *sk);
+extern struct sock             *sk_clone(const struct sock *sk,
+                                         const unsigned int __nocast priority);
  
  extern struct sk_buff          *sock_wmalloc(struct sock *sk,
                                               unsigned long size, int force,
@@ -981,6 +1030,16 @@ sk_dst_check(struct sock *sk, u32 cookie)
         return dst;
  }
  
+static inline void sk_setup_caps(struct sock *sk, struct dst_entry *dst)
+{
+       __sk_dst_set(sk, dst);
+       sk->sk_route_caps = dst->dev->features;
+       if (sk->sk_route_caps & NETIF_F_TSO) {
+               if (sock_flag(sk, SOCK_NO_LARGESEND) || dst->header_len)
+                       sk->sk_route_caps &= ~NETIF_F_TSO;
+       }
+}
+
  static inline void sk_charge_skb(struct sock *sk, struct sk_buff *skb)
  {
         sk->sk_wmem_queued   += skb->truesize;
@@ -1141,7 +1200,7 @@ static inline struct sk_buff *sk_stream_alloc_pskb(struct sock *sk,
         int hdr_len;
  
         hdr_len = SKB_DATA_ALIGN(sk->sk_prot->max_header);
-       skb = alloc_skb(size + hdr_len, gfp);
+       skb = alloc_skb_fclone(size + hdr_len, gfp);
         if (skb) {
                 skb->truesize += mem;
                 if (sk->sk_forward_alloc >= (int)skb->truesize ||
@@ -1223,16 +1282,19 @@ static inline int sock_intr_errno(long timeo)
  static __inline__ void
  sock_recv_timestamp(struct msghdr *msg, struct sock *sk, struct sk_buff *skb)
  {
-       struct timeval *stamp = &skb->stamp;
+       struct timeval stamp;
+
+       skb_get_timestamp(skb, &stamp);
         if (sock_flag(sk, SOCK_RCVTSTAMP)) {
                 /* Race occurred between timestamp enabling and packet
                    receiving.  Fill in the current time for now. */
-               if (stamp->tv_sec == 0)
-                       do_gettimeofday(stamp);
+               if (stamp.tv_sec == 0)
+                       do_gettimeofday(&stamp);
+               skb_set_timestamp(skb, &stamp);
                 put_cmsg(msg, SOL_SOCKET, SO_TIMESTAMP, sizeof(struct timeval),
-                        stamp);
+                        &stamp);
         } else
-               sk->sk_stamp = *stamp;
+               sk->sk_stamp = stamp;
  }
  
  /**
@@ -1257,11 +1319,11 @@ extern int sock_get_timestamp(struct sock *, struct timeval __user *);
   */
  
  #if 0
-#define NETDEBUG(x)    do { } while (0)
-#define LIMIT_NETDEBUG(x) do {} while(0)
+#define NETDEBUG(fmt, args...) do { } while (0)
+#define LIMIT_NETDEBUG(fmt, args...) do { } while(0)
  #else
-#define NETDEBUG(x)    do { x; } while (0)
-#define LIMIT_NETDEBUG(x) do { if (net_ratelimit()) { x; } } while(0)
+#define NETDEBUG(fmt, args...) printk(fmt,##args)
+#define LIMIT_NETDEBUG(fmt, args...) do { if (net_ratelimit()) printk(fmt,##args); } while(0)
  #endif
  
  /*
@@ -1308,4 +1370,14 @@ static inline int siocdevprivate_ioctl(unsigned int fd, unsigned int cmd, unsign
  }
  #endif
  
+extern void sk_init(void);
+
+#ifdef CONFIG_SYSCTL
+extern struct ctl_table core_table[];
+extern int sysctl_optmem_max;
+#endif
+
+extern __u32 sysctl_wmem_default;
+extern __u32 sysctl_rmem_default;
+
  #endif /* _SOCK_H */
diff --git a/include/net/tcp.h b/include/net/tcp.h

index f4f9aba07ac2ae59c6830f95ce7ae392d3bc2c42..d6bcf1317a6a90188905249feb0f9773a6d91a3b 100644 (file)
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -21,360 +21,29 @@
  #define TCP_DEBUG 1
  #define FASTRETRANS_DEBUG 1
  
-/* Cancel timers, when they are not required. */
-#undef TCP_CLEAR_TIMERS
-
  #include <linux/config.h>
  #include <linux/list.h>
  #include <linux/tcp.h>
  #include <linux/slab.h>
  #include <linux/cache.h>
  #include <linux/percpu.h>
+
+#include <net/inet_connection_sock.h>
+#include <net/inet_timewait_sock.h>
+#include <net/inet_hashtables.h>
  #include <net/checksum.h>
  #include <net/request_sock.h>
  #include <net/sock.h>
  #include <net/snmp.h>
  #include <net/ip.h>
-#if defined(CONFIG_IPV6) || defined (CONFIG_IPV6_MODULE)
-#include <linux/ipv6.h>
-#endif
-#include <linux/seq_file.h>
-
-/* This is for all connections with a full identity, no wildcards.
- * New scheme, half the table is for TIME_WAIT, the other half is
- * for the rest.  I'll experiment with dynamic table growth later.
- */
-struct tcp_ehash_bucket {
-       rwlock_t          lock;
-       struct hlist_head chain;
-} __attribute__((__aligned__(8)));
-
-/* This is for listening sockets, thus all sockets which possess wildcards. */
-#define TCP_LHTABLE_SIZE       32      /* Yes, really, this is all you need. */
-
-/* There are a few simple rules, which allow for local port reuse by
- * an application.  In essence:
- *
- *     1) Sockets bound to different interfaces may share a local port.
- *        Failing that, goto test 2.
- *     2) If all sockets have sk->sk_reuse set, and none of them are in
- *        TCP_LISTEN state, the port may be shared.
- *        Failing that, goto test 3.
- *     3) If all sockets are bound to a specific inet_sk(sk)->rcv_saddr local
- *        address, and none of them are the same, the port may be
- *        shared.
- *        Failing this, the port cannot be shared.
- *
- * The interesting point, is test #2.  This is what an FTP server does
- * all day.  To optimize this case we use a specific flag bit defined
- * below.  As we add sockets to a bind bucket list, we perform a
- * check of: (newsk->sk_reuse && (newsk->sk_state != TCP_LISTEN))
- * As long as all sockets added to a bind bucket pass this test,
- * the flag bit will be set.
- * The resulting situation is that tcp_v[46]_verify_bind() can just check
- * for this flag bit, if it is set and the socket trying to bind has
- * sk->sk_reuse set, we don't even have to walk the owners list at all,
- * we return that it is ok to bind this socket to the requested local port.
- *
- * Sounds like a lot of work, but it is worth it.  In a more naive
- * implementation (ie. current FreeBSD etc.) the entire list of ports
- * must be walked for each data port opened by an ftp server.  Needless
- * to say, this does not scale at all.  With a couple thousand FTP
- * users logged onto your box, isn't it nice to know that new data
- * ports are created in O(1) time?  I thought so. ;-)  -DaveM
- */
-struct tcp_bind_bucket {
-       unsigned short          port;
-       signed short            fastreuse;
-       struct hlist_node       node;
-       struct hlist_head       owners;
-};
-
-#define tb_for_each(tb, node, head) hlist_for_each_entry(tb, node, head, node)
-
-struct tcp_bind_hashbucket {
-       spinlock_t              lock;
-       struct hlist_head       chain;
-};
-
-static inline struct tcp_bind_bucket *__tb_head(struct tcp_bind_hashbucket *head)
-{
-       return hlist_entry(head->chain.first, struct tcp_bind_bucket, node);
-}
-
-static inline struct tcp_bind_bucket *tb_head(struct tcp_bind_hashbucket *head)
-{
-       return hlist_empty(&head->chain) ? NULL : __tb_head(head);
-}
-
-extern struct tcp_hashinfo {
-       /* This is for sockets with full identity only.  Sockets here will
-        * always be without wildcards and will have the following invariant:
-        *
-        *          TCP_ESTABLISHED <= sk->sk_state < TCP_CLOSE
-        *
-        * First half of the table is for sockets not in TIME_WAIT, second half
-        * is for TIME_WAIT sockets only.
-        */
-       struct tcp_ehash_bucket *__tcp_ehash;
-
-       /* Ok, let's try this, I give up, we do need a local binding
-        * TCP hash as well as the others for fast bind/connect.
-        */
-       struct tcp_bind_hashbucket *__tcp_bhash;
+#include <net/tcp_states.h>
  
-       int __tcp_bhash_size;
-       int __tcp_ehash_size;
-
-       /* All sockets in TCP_LISTEN state will be in here.  This is the only
-        * table where wildcard'd TCP sockets can exist.  Hash function here
-        * is just local port number.
-        */
-       struct hlist_head __tcp_listening_hash[TCP_LHTABLE_SIZE];
-
-       /* All the above members are written once at bootup and
-        * never written again _or_ are predominantly read-access.
-        *
-        * Now align to a new cache line as all the following members
-        * are often dirty.
-        */
-       rwlock_t __tcp_lhash_lock ____cacheline_aligned;
-       atomic_t __tcp_lhash_users;
-       wait_queue_head_t __tcp_lhash_wait;
-       spinlock_t __tcp_portalloc_lock;
-} tcp_hashinfo;
-
-#define tcp_ehash      (tcp_hashinfo.__tcp_ehash)
-#define tcp_bhash      (tcp_hashinfo.__tcp_bhash)
-#define tcp_ehash_size (tcp_hashinfo.__tcp_ehash_size)
-#define tcp_bhash_size (tcp_hashinfo.__tcp_bhash_size)
-#define tcp_listening_hash (tcp_hashinfo.__tcp_listening_hash)
-#define tcp_lhash_lock (tcp_hashinfo.__tcp_lhash_lock)
-#define tcp_lhash_users        (tcp_hashinfo.__tcp_lhash_users)
-#define tcp_lhash_wait (tcp_hashinfo.__tcp_lhash_wait)
-#define tcp_portalloc_lock (tcp_hashinfo.__tcp_portalloc_lock)
-
-extern kmem_cache_t *tcp_bucket_cachep;
-extern struct tcp_bind_bucket *tcp_bucket_create(struct tcp_bind_hashbucket *head,
-                                                unsigned short snum);
-extern void tcp_bucket_destroy(struct tcp_bind_bucket *tb);
-extern void tcp_bucket_unlock(struct sock *sk);
-extern int tcp_port_rover;
-
-/* These are AF independent. */
-static __inline__ int tcp_bhashfn(__u16 lport)
-{
-       return (lport & (tcp_bhash_size - 1));
-}
-
-extern void tcp_bind_hash(struct sock *sk, struct tcp_bind_bucket *tb,
-                         unsigned short snum);
-
-#if (BITS_PER_LONG == 64)
-#define TCP_ADDRCMP_ALIGN_BYTES 8
-#else
-#define TCP_ADDRCMP_ALIGN_BYTES 4
-#endif
-
-/* This is a TIME_WAIT bucket.  It works around the memory consumption
- * problems of sockets in such a state on heavily loaded servers, but
- * without violating the protocol specification.
- */
-struct tcp_tw_bucket {
-       /*
-        * Now struct sock also uses sock_common, so please just
-        * don't add nothing before this first member (__tw_common) --acme
-        */
-       struct sock_common      __tw_common;
-#define tw_family              __tw_common.skc_family
-#define tw_state               __tw_common.skc_state
-#define tw_reuse               __tw_common.skc_reuse
-#define tw_bound_dev_if                __tw_common.skc_bound_dev_if
-#define tw_node                        __tw_common.skc_node
-#define tw_bind_node           __tw_common.skc_bind_node
-#define tw_refcnt              __tw_common.skc_refcnt
-       volatile unsigned char  tw_substate;
-       unsigned char           tw_rcv_wscale;
-       __u16                   tw_sport;
-       /* Socket demultiplex comparisons on incoming packets. */
-       /* these five are in inet_sock */
-       __u32                   tw_daddr
-               __attribute__((aligned(TCP_ADDRCMP_ALIGN_BYTES)));
-       __u32                   tw_rcv_saddr;
-       __u16                   tw_dport;
-       __u16                   tw_num;
-       /* And these are ours. */
-       int                     tw_hashent;
-       int                     tw_timeout;
-       __u32                   tw_rcv_nxt;
-       __u32                   tw_snd_nxt;
-       __u32                   tw_rcv_wnd;
-       __u32                   tw_ts_recent;
-       long                    tw_ts_recent_stamp;
-       unsigned long           tw_ttd;
-       struct tcp_bind_bucket  *tw_tb;
-       struct hlist_node       tw_death_node;
-#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
-       struct in6_addr         tw_v6_daddr;
-       struct in6_addr         tw_v6_rcv_saddr;
-       int                     tw_v6_ipv6only;
-#endif
-};
-
-static __inline__ void tw_add_node(struct tcp_tw_bucket *tw,
-                                  struct hlist_head *list)
-{
-       hlist_add_head(&tw->tw_node, list);
-}
-
-static __inline__ void tw_add_bind_node(struct tcp_tw_bucket *tw,
-                                       struct hlist_head *list)
-{
-       hlist_add_head(&tw->tw_bind_node, list);
-}
-
-static inline int tw_dead_hashed(struct tcp_tw_bucket *tw)
-{
-       return tw->tw_death_node.pprev != NULL;
-}
-
-static __inline__ void tw_dead_node_init(struct tcp_tw_bucket *tw)
-{
-       tw->tw_death_node.pprev = NULL;
-}
-
-static __inline__ void __tw_del_dead_node(struct tcp_tw_bucket *tw)
-{
-       __hlist_del(&tw->tw_death_node);
-       tw_dead_node_init(tw);
-}
-
-static __inline__ int tw_del_dead_node(struct tcp_tw_bucket *tw)
-{
-       if (tw_dead_hashed(tw)) {
-               __tw_del_dead_node(tw);
-               return 1;
-       }
-       return 0;
-}
-
-#define tw_for_each(tw, node, head) \
-       hlist_for_each_entry(tw, node, head, tw_node)
-
-#define tw_for_each_inmate(tw, node, jail) \
-       hlist_for_each_entry(tw, node, jail, tw_death_node)
-
-#define tw_for_each_inmate_safe(tw, node, safe, jail) \
-       hlist_for_each_entry_safe(tw, node, safe, jail, tw_death_node)
-
-#define tcptw_sk(__sk) ((struct tcp_tw_bucket *)(__sk))
-
-static inline u32 tcp_v4_rcv_saddr(const struct sock *sk)
-{
-       return likely(sk->sk_state != TCP_TIME_WAIT) ?
-               inet_sk(sk)->rcv_saddr : tcptw_sk(sk)->tw_rcv_saddr;
-}
-
-#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
-static inline struct in6_addr *__tcp_v6_rcv_saddr(const struct sock *sk)
-{
-       return likely(sk->sk_state != TCP_TIME_WAIT) ?
-               &inet6_sk(sk)->rcv_saddr : &tcptw_sk(sk)->tw_v6_rcv_saddr;
-}
-
-static inline struct in6_addr *tcp_v6_rcv_saddr(const struct sock *sk)
-{
-       return sk->sk_family == AF_INET6 ? __tcp_v6_rcv_saddr(sk) : NULL;
-}
-
-#define tcptw_sk_ipv6only(__sk)        (tcptw_sk(__sk)->tw_v6_ipv6only)
-
-static inline int tcp_v6_ipv6only(const struct sock *sk)
-{
-       return likely(sk->sk_state != TCP_TIME_WAIT) ?
-               ipv6_only_sock(sk) : tcptw_sk_ipv6only(sk);
-}
-#else
-# define __tcp_v6_rcv_saddr(__sk)      NULL
-# define tcp_v6_rcv_saddr(__sk)                NULL
-# define tcptw_sk_ipv6only(__sk)       0
-# define tcp_v6_ipv6only(__sk)         0
-#endif
+#include <linux/seq_file.h>
  
-extern kmem_cache_t *tcp_timewait_cachep;
-
-static inline void tcp_tw_put(struct tcp_tw_bucket *tw)
-{
-       if (atomic_dec_and_test(&tw->tw_refcnt)) {
-#ifdef INET_REFCNT_DEBUG
-               printk(KERN_DEBUG "tw_bucket %p released\n", tw);
-#endif
-               kmem_cache_free(tcp_timewait_cachep, tw);
-       }
-}
+extern struct inet_hashinfo tcp_hashinfo;
  
  extern atomic_t tcp_orphan_count;
-extern int tcp_tw_count;
  extern void tcp_time_wait(struct sock *sk, int state, int timeo);
-extern void tcp_tw_deschedule(struct tcp_tw_bucket *tw);
-
-
-/* Socket demux engine toys. */
-#ifdef __BIG_ENDIAN
-#define TCP_COMBINED_PORTS(__sport, __dport) \
-       (((__u32)(__sport)<<16) | (__u32)(__dport))
-#else /* __LITTLE_ENDIAN */
-#define TCP_COMBINED_PORTS(__sport, __dport) \
-       (((__u32)(__dport)<<16) | (__u32)(__sport))
-#endif
-
-#if (BITS_PER_LONG == 64)
-#ifdef __BIG_ENDIAN
-#define TCP_V4_ADDR_COOKIE(__name, __saddr, __daddr) \
-       __u64 __name = (((__u64)(__saddr))<<32)|((__u64)(__daddr));
-#else /* __LITTLE_ENDIAN */
-#define TCP_V4_ADDR_COOKIE(__name, __saddr, __daddr) \
-       __u64 __name = (((__u64)(__daddr))<<32)|((__u64)(__saddr));
-#endif /* __BIG_ENDIAN */
-#define TCP_IPV4_MATCH(__sk, __cookie, __saddr, __daddr, __ports, __dif)\
-       (((*((__u64 *)&(inet_sk(__sk)->daddr)))== (__cookie))   &&      \
-        ((*((__u32 *)&(inet_sk(__sk)->dport)))== (__ports))    &&      \
-        (!((__sk)->sk_bound_dev_if) || ((__sk)->sk_bound_dev_if == (__dif))))
-#define TCP_IPV4_TW_MATCH(__sk, __cookie, __saddr, __daddr, __ports, __dif)\
-       (((*((__u64 *)&(tcptw_sk(__sk)->tw_daddr))) == (__cookie)) &&   \
-        ((*((__u32 *)&(tcptw_sk(__sk)->tw_dport))) == (__ports)) &&    \
-        (!((__sk)->sk_bound_dev_if) || ((__sk)->sk_bound_dev_if == (__dif))))
-#else /* 32-bit arch */
-#define TCP_V4_ADDR_COOKIE(__name, __saddr, __daddr)
-#define TCP_IPV4_MATCH(__sk, __cookie, __saddr, __daddr, __ports, __dif)\
-       ((inet_sk(__sk)->daddr                  == (__saddr))   &&      \
-        (inet_sk(__sk)->rcv_saddr              == (__daddr))   &&      \
-        ((*((__u32 *)&(inet_sk(__sk)->dport)))== (__ports))    &&      \
-        (!((__sk)->sk_bound_dev_if) || ((__sk)->sk_bound_dev_if == (__dif))))
-#define TCP_IPV4_TW_MATCH(__sk, __cookie, __saddr, __daddr, __ports, __dif)\
-       ((tcptw_sk(__sk)->tw_daddr              == (__saddr))   &&      \
-        (tcptw_sk(__sk)->tw_rcv_saddr          == (__daddr))   &&      \
-        ((*((__u32 *)&(tcptw_sk(__sk)->tw_dport))) == (__ports)) &&    \
-        (!((__sk)->sk_bound_dev_if) || ((__sk)->sk_bound_dev_if == (__dif))))
-#endif /* 64-bit arch */
-
-#define TCP_IPV6_MATCH(__sk, __saddr, __daddr, __ports, __dif)    \
-       (((*((__u32 *)&(inet_sk(__sk)->dport)))== (__ports))    && \
-        ((__sk)->sk_family             == AF_INET6)            && \
-        ipv6_addr_equal(&inet6_sk(__sk)->daddr, (__saddr))     && \
-        ipv6_addr_equal(&inet6_sk(__sk)->rcv_saddr, (__daddr)) && \
-        (!((__sk)->sk_bound_dev_if) || ((__sk)->sk_bound_dev_if == (__dif))))
-
-/* These can have wildcards, don't try too hard. */
-static __inline__ int tcp_lhashfn(unsigned short num)
-{
-       return num & (TCP_LHTABLE_SIZE - 1);
-}
-
-static __inline__ int tcp_sk_listen_hashfn(struct sock *sk)
-{
-       return tcp_lhashfn(inet_sk(sk)->num);
-}
  
  #define MAX_TCP_HEADER (128 + MAX_HEADER)
  
@@ -478,33 +147,6 @@ static __inline__ int tcp_sk_listen_hashfn(struct sock *sk)
                                          * timestamps. It must be less than
                                          * minimal timewait lifetime.
                                          */
-
-#define TCP_TW_RECYCLE_SLOTS_LOG       5
-#define TCP_TW_RECYCLE_SLOTS           (1<<TCP_TW_RECYCLE_SLOTS_LOG)
-
-/* If time > 4sec, it is "slow" path, no recycling is required,
-   so that we select tick to get range about 4 seconds.
- */
-
-#if HZ <= 16 || HZ > 4096
-# error Unsupported: HZ <= 16 or HZ > 4096
-#elif HZ <= 32
-# define TCP_TW_RECYCLE_TICK (5+2-TCP_TW_RECYCLE_SLOTS_LOG)
-#elif HZ <= 64
-# define TCP_TW_RECYCLE_TICK (6+2-TCP_TW_RECYCLE_SLOTS_LOG)
-#elif HZ <= 128
-# define TCP_TW_RECYCLE_TICK (7+2-TCP_TW_RECYCLE_SLOTS_LOG)
-#elif HZ <= 256
-# define TCP_TW_RECYCLE_TICK (8+2-TCP_TW_RECYCLE_SLOTS_LOG)
-#elif HZ <= 512
-# define TCP_TW_RECYCLE_TICK (9+2-TCP_TW_RECYCLE_SLOTS_LOG)
-#elif HZ <= 1024
-# define TCP_TW_RECYCLE_TICK (10+2-TCP_TW_RECYCLE_SLOTS_LOG)
-#elif HZ <= 2048
-# define TCP_TW_RECYCLE_TICK (11+2-TCP_TW_RECYCLE_SLOTS_LOG)
-#else
-# define TCP_TW_RECYCLE_TICK (12+2-TCP_TW_RECYCLE_SLOTS_LOG)
-#endif
  /*
   *     TCP option
   */
@@ -534,22 +176,18 @@ static __inline__ int tcp_sk_listen_hashfn(struct sock *sk)
  #define TCPOLEN_SACK_BASE_ALIGNED      4
  #define TCPOLEN_SACK_PERBLOCK          8
  
-#define TCP_TIME_RETRANS       1       /* Retransmit timer */
-#define TCP_TIME_DACK          2       /* Delayed ack timer */
-#define TCP_TIME_PROBE0                3       /* Zero window probe timer */
-#define TCP_TIME_KEEPOPEN      4       /* Keepalive timer */
-
  /* Flags in tp->nonagle */
  #define TCP_NAGLE_OFF          1       /* Nagle's algo is disabled */
  #define TCP_NAGLE_CORK         2       /* Socket is corked         */
  #define TCP_NAGLE_PUSH         4       /* Cork is overriden for already queued data */
  
+extern struct inet_timewait_death_row tcp_death_row;
+
  /* sysctl variables for tcp */
  extern int sysctl_tcp_timestamps;
  extern int sysctl_tcp_window_scaling;
  extern int sysctl_tcp_sack;
  extern int sysctl_tcp_fin_timeout;
-extern int sysctl_tcp_tw_recycle;
  extern int sysctl_tcp_keepalive_time;
  extern int sysctl_tcp_keepalive_probes;
  extern int sysctl_tcp_keepalive_intvl;
@@ -564,7 +202,6 @@ extern int sysctl_tcp_stdurg;
  extern int sysctl_tcp_rfc1337;
  extern int sysctl_tcp_abort_on_overflow;
  extern int sysctl_tcp_max_orphans;
-extern int sysctl_tcp_max_tw_buckets;
  extern int sysctl_tcp_fack;
  extern int sysctl_tcp_reordering;
  extern int sysctl_tcp_ecn;
@@ -585,12 +222,6 @@ extern atomic_t tcp_memory_allocated;
  extern atomic_t tcp_sockets_allocated;
  extern int tcp_memory_pressure;
  
-#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
-#define TCP_INET_FAMILY(fam) ((fam) == AF_INET)
-#else
-#define TCP_INET_FAMILY(fam) 1
-#endif
-
  /*
   *     Pointers to address related TCP functions
   *     (i.e. things that depend on the address family)
@@ -671,9 +302,6 @@ DECLARE_SNMP_STAT(struct tcp_mib, tcp_statistics);
  #define TCP_ADD_STATS_BH(field, val)   SNMP_ADD_STATS_BH(tcp_statistics, field, val)
  #define TCP_ADD_STATS_USER(field, val) SNMP_ADD_STATS_USER(tcp_statistics, field, val)
  
-extern void                    tcp_put_port(struct sock *sk);
-extern void                    tcp_inherit_port(struct sock *sk, struct sock *child);
-
  extern void                    tcp_v4_err(struct sk_buff *skb, u32);
  
  extern void                    tcp_shutdown (struct sock *sk, int how);
@@ -682,7 +310,7 @@ extern int                  tcp_v4_rcv(struct sk_buff *skb);
  
  extern int                     tcp_v4_remember_stamp(struct sock *sk);
  
-extern int                     tcp_v4_tw_remember_stamp(struct tcp_tw_bucket *tw);
+extern int                     tcp_v4_tw_remember_stamp(struct inet_timewait_sock *tw);
  
  extern int                     tcp_sendmsg(struct kiocb *iocb, struct sock *sk,
                                             struct msghdr *msg, size_t size);
@@ -704,42 +332,22 @@ extern int                        tcp_rcv_established(struct sock *sk,
  
  extern void                    tcp_rcv_space_adjust(struct sock *sk);
  
-enum tcp_ack_state_t
-{
-       TCP_ACK_SCHED = 1,
-       TCP_ACK_TIMER = 2,
-       TCP_ACK_PUSHED= 4
-};
-
-static inline void tcp_schedule_ack(struct tcp_sock *tp)
+static inline void tcp_dec_quickack_mode(struct sock *sk,
+                                        const unsigned int pkts)
  {
-       tp->ack.pending |= TCP_ACK_SCHED;
-}
-
-static inline int tcp_ack_scheduled(struct tcp_sock *tp)
-{
-       return tp->ack.pending&TCP_ACK_SCHED;
-}
-
-static __inline__ void tcp_dec_quickack_mode(struct tcp_sock *tp, unsigned int pkts)
-{
-       if (tp->ack.quick) {
-               if (pkts >= tp->ack.quick) {
-                       tp->ack.quick = 0;
+       struct inet_connection_sock *icsk = inet_csk(sk);
  
+       if (icsk->icsk_ack.quick) {
+               if (pkts >= icsk->icsk_ack.quick) {
+                       icsk->icsk_ack.quick = 0;
                         /* Leaving quickack mode we deflate ATO. */
-                       tp->ack.ato = TCP_ATO_MIN;
+                       icsk->icsk_ack.ato   = TCP_ATO_MIN;
                 } else
-                       tp->ack.quick -= pkts;
+                       icsk->icsk_ack.quick -= pkts;
         }
  }
  
-extern void tcp_enter_quickack_mode(struct tcp_sock *tp);
-
-static __inline__ void tcp_delack_init(struct tcp_sock *tp)
-{
-       memset(&tp->ack, 0, sizeof(tp->ack));
-}
+extern void tcp_enter_quickack_mode(struct sock *sk);
  
  static inline void tcp_clear_options(struct tcp_options_received *rx_opt)
  {
@@ -755,10 +363,9 @@ enum tcp_tw_status
  };
  
  
-extern enum tcp_tw_status      tcp_timewait_state_process(struct tcp_tw_bucket *tw,
+extern enum tcp_tw_status      tcp_timewait_state_process(struct inet_timewait_sock *tw,
                                                            struct sk_buff *skb,
-                                                          struct tcphdr *th,
-                                                          unsigned len);
+                                                          const struct tcphdr *th);
  
  extern struct sock *           tcp_check_req(struct sock *sk,struct sk_buff *skb,
                                               struct request_sock *req,
@@ -773,7 +380,6 @@ extern void                 tcp_update_metrics(struct sock *sk);
  
  extern void                    tcp_close(struct sock *sk, 
                                           long timeout);
-extern struct sock *           tcp_accept(struct sock *sk, int flags, int *err);
  extern unsigned int            tcp_poll(struct file * file, struct socket *sock, struct poll_table_struct *wait);
  
  extern int                     tcp_getsockopt(struct sock *sk, int level, 
@@ -789,8 +395,6 @@ extern int                  tcp_recvmsg(struct kiocb *iocb, struct sock *sk,
                                             size_t len, int nonblock, 
                                             int flags, int *addr_len);
  
-extern int                     tcp_listen_start(struct sock *sk);
-
  extern void                    tcp_parse_options(struct sk_buff *skb,
                                                   struct tcp_options_received *opt_rx,
                                                   int estab);
@@ -799,11 +403,6 @@ extern void                        tcp_parse_options(struct sk_buff *skb,
   *     TCP v4 functions exported for the inet6 API
   */
  
-extern int                     tcp_v4_rebuild_header(struct sock *sk);
-
-extern int                     tcp_v4_build_header(struct sock *sk, 
-                                                   struct sk_buff *skb);
-
  extern void                    tcp_v4_send_check(struct sock *sk, 
                                                   struct tcphdr *th, int len, 
                                                   struct sk_buff *skb);
@@ -872,18 +471,15 @@ extern void tcp_cwnd_application_limited(struct sock *sk);
  
  /* tcp_timer.c */
  extern void tcp_init_xmit_timers(struct sock *);
-extern void tcp_clear_xmit_timers(struct sock *);
+static inline void tcp_clear_xmit_timers(struct sock *sk)
+{
+       inet_csk_clear_xmit_timers(sk);
+}
  
-extern void tcp_delete_keepalive_timer(struct sock *);
-extern void tcp_reset_keepalive_timer(struct sock *, unsigned long);
  extern unsigned int tcp_sync_mss(struct sock *sk, u32 pmtu);
  extern unsigned int tcp_current_mss(struct sock *sk, int large);
  
-#ifdef TCP_DEBUG
-extern const char tcp_timer_bug_msg[];
-#endif
-
-/* tcp_diag.c */
+/* tcp.c */
  extern void tcp_get_info(struct sock *, struct tcp_info *);
  
  /* Read 'sendfile()'-style from a TCP socket */
@@ -892,72 +488,6 @@ typedef int (*sk_read_actor_t)(read_descriptor_t *, struct sk_buff *,
  extern int tcp_read_sock(struct sock *sk, read_descriptor_t *desc,
                          sk_read_actor_t recv_actor);
  
-static inline void tcp_clear_xmit_timer(struct sock *sk, int what)
-{
-       struct tcp_sock *tp = tcp_sk(sk);
-       
-       switch (what) {
-       case TCP_TIME_RETRANS:
-       case TCP_TIME_PROBE0:
-               tp->pending = 0;
-
-#ifdef TCP_CLEAR_TIMERS
-               sk_stop_timer(sk, &tp->retransmit_timer);
-#endif
-               break;
-       case TCP_TIME_DACK:
-               tp->ack.blocked = 0;
-               tp->ack.pending = 0;
-
-#ifdef TCP_CLEAR_TIMERS
-               sk_stop_timer(sk, &tp->delack_timer);
-#endif
-               break;
-       default:
-#ifdef TCP_DEBUG
-               printk(tcp_timer_bug_msg);
-#endif
-               return;
-       };
-
-}
-
-/*
- *     Reset the retransmission timer
- */
-static inline void tcp_reset_xmit_timer(struct sock *sk, int what, unsigned long when)
-{
-       struct tcp_sock *tp = tcp_sk(sk);
-
-       if (when > TCP_RTO_MAX) {
-#ifdef TCP_DEBUG
-               printk(KERN_DEBUG "reset_xmit_timer sk=%p %d when=0x%lx, caller=%p\n", sk, what, when, current_text_addr());
-#endif
-               when = TCP_RTO_MAX;
-       }
-
-       switch (what) {
-       case TCP_TIME_RETRANS:
-       case TCP_TIME_PROBE0:
-               tp->pending = what;
-               tp->timeout = jiffies+when;
-               sk_reset_timer(sk, &tp->retransmit_timer, tp->timeout);
-               break;
-
-       case TCP_TIME_DACK:
-               tp->ack.pending |= TCP_ACK_TIMER;
-               tp->ack.timeout = jiffies+when;
-               sk_reset_timer(sk, &tp->delack_timer, tp->ack.timeout);
-               break;
-
-       default:
-#ifdef TCP_DEBUG
-               printk(tcp_timer_bug_msg);
-#endif
-               return;
-       };
-}
-
  /* Initialize RCV_MSS value.
   * RCV_MSS is an our guess about MSS used by the peer.
   * We haven't any direct information about the MSS.
@@ -975,7 +505,7 @@ static inline void tcp_initialize_rcv_mss(struct sock *sk)
         hint = min(hint, TCP_MIN_RCVMSS);
         hint = max(hint, TCP_MIN_MSS);
  
-       tp->ack.rcv_mss = hint;
+       inet_csk(sk)->icsk_ack.rcv_mss = hint;
  }
  
  static __inline__ void __tcp_fast_path_on(struct tcp_sock *tp, u32 snd_wnd)
@@ -1110,7 +640,8 @@ static inline void tcp_packets_out_inc(struct sock *sk,
  
         tp->packets_out += tcp_skb_pcount(skb);
         if (!orig)
-               tcp_reset_xmit_timer(sk, TCP_TIME_RETRANS, tp->rto);
+               inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS,
+                                         inet_csk(sk)->icsk_rto, TCP_RTO_MAX);
  }
  
  static inline void tcp_packets_out_dec(struct tcp_sock *tp, 
@@ -1138,29 +669,29 @@ struct tcp_congestion_ops {
         struct list_head        list;
  
         /* initialize private data (optional) */
-       void (*init)(struct tcp_sock *tp);
+       void (*init)(struct sock *sk);
         /* cleanup private data  (optional) */
-       void (*release)(struct tcp_sock *tp);
+       void (*release)(struct sock *sk);
  
         /* return slow start threshold (required) */
-       u32 (*ssthresh)(struct tcp_sock *tp);
+       u32 (*ssthresh)(struct sock *sk);
         /* lower bound for congestion window (optional) */
-       u32 (*min_cwnd)(struct tcp_sock *tp);
+       u32 (*min_cwnd)(struct sock *sk);
         /* do new cwnd calculation (required) */
-       void (*cong_avoid)(struct tcp_sock *tp, u32 ack,
+       void (*cong_avoid)(struct sock *sk, u32 ack,
                            u32 rtt, u32 in_flight, int good_ack);
         /* round trip time sample per acked packet (optional) */
-       void (*rtt_sample)(struct tcp_sock *tp, u32 usrtt);
+       void (*rtt_sample)(struct sock *sk, u32 usrtt);
         /* call before changing ca_state (optional) */
-       void (*set_state)(struct tcp_sock *tp, u8 new_state);
+       void (*set_state)(struct sock *sk, u8 new_state);
         /* call when cwnd event occurs (optional) */
-       void (*cwnd_event)(struct tcp_sock *tp, enum tcp_ca_event ev);
+       void (*cwnd_event)(struct sock *sk, enum tcp_ca_event ev);
         /* new value of cwnd after loss (optional) */
-       u32  (*undo_cwnd)(struct tcp_sock *tp);
+       u32  (*undo_cwnd)(struct sock *sk);
         /* hook for packet ack accounting (optional) */
-       void (*pkts_acked)(struct tcp_sock *tp, u32 num_acked);
-       /* get info for tcp_diag (optional) */
-       void (*get_info)(struct tcp_sock *tp, u32 ext, struct sk_buff *skb);
+       void (*pkts_acked)(struct sock *sk, u32 num_acked);
+       /* get info for inet_diag (optional) */
+       void (*get_info)(struct sock *sk, u32 ext, struct sk_buff *skb);
  
         char            name[TCP_CA_NAME_MAX];
         struct module   *owner;
@@ -1169,30 +700,34 @@ struct tcp_congestion_ops {
  extern int tcp_register_congestion_control(struct tcp_congestion_ops *type);
  extern void tcp_unregister_congestion_control(struct tcp_congestion_ops *type);
  
-extern void tcp_init_congestion_control(struct tcp_sock *tp);
-extern void tcp_cleanup_congestion_control(struct tcp_sock *tp);
+extern void tcp_init_congestion_control(struct sock *sk);
+extern void tcp_cleanup_congestion_control(struct sock *sk);
  extern int tcp_set_default_congestion_control(const char *name);
  extern void tcp_get_default_congestion_control(char *name);
-extern int tcp_set_congestion_control(struct tcp_sock *tp, const char *name);
+extern int tcp_set_congestion_control(struct sock *sk, const char *name);
  
  extern struct tcp_congestion_ops tcp_init_congestion_ops;
-extern u32 tcp_reno_ssthresh(struct tcp_sock *tp);
-extern void tcp_reno_cong_avoid(struct tcp_sock *tp, u32 ack,
+extern u32 tcp_reno_ssthresh(struct sock *sk);
+extern void tcp_reno_cong_avoid(struct sock *sk, u32 ack,
                                 u32 rtt, u32 in_flight, int flag);
-extern u32 tcp_reno_min_cwnd(struct tcp_sock *tp);
+extern u32 tcp_reno_min_cwnd(struct sock *sk);
  extern struct tcp_congestion_ops tcp_reno;
  
-static inline void tcp_set_ca_state(struct tcp_sock *tp, u8 ca_state)
+static inline void tcp_set_ca_state(struct sock *sk, const u8 ca_state)
  {
-       if (tp->ca_ops->set_state)
-               tp->ca_ops->set_state(tp, ca_state);
-       tp->ca_state = ca_state;
+       struct inet_connection_sock *icsk = inet_csk(sk);
+
+       if (icsk->icsk_ca_ops->set_state)
+               icsk->icsk_ca_ops->set_state(sk, ca_state);
+       icsk->icsk_ca_state = ca_state;
  }
  
-static inline void tcp_ca_event(struct tcp_sock *tp, enum tcp_ca_event event)
+static inline void tcp_ca_event(struct sock *sk, const enum tcp_ca_event event)
  {
-       if (tp->ca_ops->cwnd_event)
-               tp->ca_ops->cwnd_event(tp, event);
+       const struct inet_connection_sock *icsk = inet_csk(sk);
+
+       if (icsk->icsk_ca_ops->cwnd_event)
+               icsk->icsk_ca_ops->cwnd_event(sk, event);
  }
  
  /* This determines how many packets are "in the network" to the best
@@ -1218,9 +753,10 @@ static __inline__ unsigned int tcp_packets_in_flight(const struct tcp_sock *tp)
   * The exception is rate halving phase, when cwnd is decreasing towards
   * ssthresh.
   */
-static inline __u32 tcp_current_ssthresh(struct tcp_sock *tp)
+static inline __u32 tcp_current_ssthresh(const struct sock *sk)
  {
-       if ((1<<tp->ca_state)&(TCPF_CA_CWR|TCPF_CA_Recovery))
+       const struct tcp_sock *tp = tcp_sk(sk);
+       if ((1 << inet_csk(sk)->icsk_ca_state) & (TCPF_CA_CWR | TCPF_CA_Recovery))
                 return tp->snd_ssthresh;
         else
                 return max(tp->snd_ssthresh,
@@ -1236,11 +772,14 @@ static inline void tcp_sync_left_out(struct tcp_sock *tp)
         tp->left_out = tp->sacked_out + tp->lost_out;
  }
  
-/* Set slow start threshould and cwnd not falling to slow start */
-static inline void __tcp_enter_cwr(struct tcp_sock *tp)
+/* Set slow start threshold and cwnd not falling to slow start */
+static inline void __tcp_enter_cwr(struct sock *sk)
  {
+       const struct inet_connection_sock *icsk = inet_csk(sk);
+       struct tcp_sock *tp = tcp_sk(sk);
+
         tp->undo_marker = 0;
-       tp->snd_ssthresh = tp->ca_ops->ssthresh(tp);
+       tp->snd_ssthresh = icsk->icsk_ca_ops->ssthresh(sk);
         tp->snd_cwnd = min(tp->snd_cwnd,
                            tcp_packets_in_flight(tp) + 1U);
         tp->snd_cwnd_cnt = 0;
@@ -1249,12 +788,14 @@ static inline void __tcp_enter_cwr(struct tcp_sock *tp)
         TCP_ECN_queue_cwr(tp);
  }
  
-static inline void tcp_enter_cwr(struct tcp_sock *tp)
+static inline void tcp_enter_cwr(struct sock *sk)
  {
+       struct tcp_sock *tp = tcp_sk(sk);
+
         tp->prior_ssthresh = 0;
-       if (tp->ca_state < TCP_CA_CWR) {
-               __tcp_enter_cwr(tp);
-               tcp_set_ca_state(tp, TCP_CA_CWR);
+       if (inet_csk(sk)->icsk_ca_state < TCP_CA_CWR) {
+               __tcp_enter_cwr(sk);
+               tcp_set_ca_state(sk, TCP_CA_CWR);
         }
  }
  
@@ -1277,8 +818,10 @@ static __inline__ void tcp_minshall_update(struct tcp_sock *tp, int mss,
  
  static __inline__ void tcp_check_probe_timer(struct sock *sk, struct tcp_sock *tp)
  {
-       if (!tp->packets_out && !tp->pending)
-               tcp_reset_xmit_timer(sk, TCP_TIME_PROBE0, tp->rto);
+       const struct inet_connection_sock *icsk = inet_csk(sk);
+       if (!tp->packets_out && !icsk->icsk_pending)
+               inet_csk_reset_xmit_timer(sk, ICSK_TIME_PROBE0,
+                                         icsk->icsk_rto, TCP_RTO_MAX);
  }
  
  static __inline__ void tcp_push_pending_frames(struct sock *sk,
@@ -1297,9 +840,6 @@ static __inline__ void tcp_update_wl(struct tcp_sock *tp, u32 ack, u32 seq)
         tp->snd_wl1 = seq;
  }
  
-extern void tcp_destroy_sock(struct sock *sk);
-
-
  /*
   * Calculate(/check) TCP checksum
   */
@@ -1359,8 +899,10 @@ static __inline__ int tcp_prequeue(struct sock *sk, struct sk_buff *skb)
                         tp->ucopy.memory = 0;
                 } else if (skb_queue_len(&tp->ucopy.prequeue) == 1) {
                         wake_up_interruptible(sk->sk_sleep);
-                       if (!tcp_ack_scheduled(tp))
-                               tcp_reset_xmit_timer(sk, TCP_TIME_DACK, (3*TCP_RTO_MIN)/4);
+                       if (!inet_csk_ack_scheduled(sk))
+                               inet_csk_reset_xmit_timer(sk, ICSK_TIME_DACK,
+                                                         (3 * TCP_RTO_MIN) / 4,
+                                                         TCP_RTO_MAX);
                 }
                 return 1;
         }
@@ -1393,9 +935,9 @@ static __inline__ void tcp_set_state(struct sock *sk, int state)
                         TCP_INC_STATS(TCP_MIB_ESTABRESETS);
  
                 sk->sk_prot->unhash(sk);
-               if (tcp_sk(sk)->bind_hash &&
+               if (inet_csk(sk)->icsk_bind_hash &&
                     !(sk->sk_userlocks & SOCK_BINDPORT_LOCK))
-                       tcp_put_port(sk);
+                       inet_put_port(&tcp_hashinfo, sk);
                 /* fall through */
         default:
                 if (oldstate==TCP_ESTABLISHED)
@@ -1422,7 +964,7 @@ static __inline__ void tcp_done(struct sock *sk)
         if (!sock_flag(sk, SOCK_DEAD))
                 sk->sk_state_change(sk);
         else
-               tcp_destroy_sock(sk);
+               inet_csk_destroy_sock(sk);
  }
  
  static __inline__ void tcp_sack_reset(struct tcp_options_received *rx_opt)
@@ -1524,54 +1066,6 @@ static inline int tcp_full_space(const struct sock *sk)
         return tcp_win_from_space(sk->sk_rcvbuf); 
  }
  
-static inline void tcp_acceptq_queue(struct sock *sk, struct request_sock *req,
-                                        struct sock *child)
-{
-       reqsk_queue_add(&tcp_sk(sk)->accept_queue, req, sk, child);
-}
-
-static inline void
-tcp_synq_removed(struct sock *sk, struct request_sock *req)
-{
-       if (reqsk_queue_removed(&tcp_sk(sk)->accept_queue, req) == 0)
-               tcp_delete_keepalive_timer(sk);
-}
-
-static inline void tcp_synq_added(struct sock *sk)
-{
-       if (reqsk_queue_added(&tcp_sk(sk)->accept_queue) == 0)
-               tcp_reset_keepalive_timer(sk, TCP_TIMEOUT_INIT);
-}
-
-static inline int tcp_synq_len(struct sock *sk)
-{
-       return reqsk_queue_len(&tcp_sk(sk)->accept_queue);
-}
-
-static inline int tcp_synq_young(struct sock *sk)
-{
-       return reqsk_queue_len_young(&tcp_sk(sk)->accept_queue);
-}
-
-static inline int tcp_synq_is_full(struct sock *sk)
-{
-       return reqsk_queue_is_full(&tcp_sk(sk)->accept_queue);
-}
-
-static inline void tcp_synq_unlink(struct tcp_sock *tp, struct request_sock *req,
-                                  struct request_sock **prev)
-{
-       reqsk_queue_unlink(&tp->accept_queue, req, prev);
-}
-
-static inline void tcp_synq_drop(struct sock *sk, struct request_sock *req,
-                                    struct request_sock **prev)
-{
-       tcp_synq_unlink(tcp_sk(sk), req, prev);
-       tcp_synq_removed(sk, req);
-       reqsk_free(req);
-}
-
  static __inline__ void tcp_openreq_init(struct request_sock *req,
                                         struct tcp_options_received *rx_opt,
                                         struct sk_buff *skb)
@@ -1593,27 +1087,6 @@ static __inline__ void tcp_openreq_init(struct request_sock *req,
  
  extern void tcp_enter_memory_pressure(void);
  
-extern void tcp_listen_wlock(void);
-
-/* - We may sleep inside this lock.
- * - If sleeping is not required (or called from BH),
- *   use plain read_(un)lock(&tcp_lhash_lock).
- */
-
-static inline void tcp_listen_lock(void)
-{
-       /* read_lock synchronizes to candidates to writers */
-       read_lock(&tcp_lhash_lock);
-       atomic_inc(&tcp_lhash_users);
-       read_unlock(&tcp_lhash_lock);
-}
-
-static inline void tcp_listen_unlock(void)
-{
-       if (atomic_dec_and_test(&tcp_lhash_users))
-               wake_up(&tcp_lhash_wait);
-}
-
  static inline int keepalive_intvl_when(const struct tcp_sock *tp)
  {
         return tp->keepalive_intvl ? : sysctl_tcp_keepalive_intvl;
@@ -1624,12 +1097,13 @@ static inline int keepalive_time_when(const struct tcp_sock *tp)
         return tp->keepalive_time ? : sysctl_tcp_keepalive_time;
  }
  
-static inline int tcp_fin_time(const struct tcp_sock *tp)
+static inline int tcp_fin_time(const struct sock *sk)
  {
-       int fin_timeout = tp->linger2 ? : sysctl_tcp_fin_timeout;
+       int fin_timeout = tcp_sk(sk)->linger2 ? : sysctl_tcp_fin_timeout;
+       const int rto = inet_csk(sk)->icsk_rto;
  
-       if (fin_timeout < (tp->rto<<2) - (tp->rto>>1))
-               fin_timeout = (tp->rto<<2) - (tp->rto>>1);
+       if (fin_timeout < (rto << 2) - (rto >> 1))
+               fin_timeout = (rto << 2) - (rto >> 1);
  
         return fin_timeout;
  }
@@ -1658,15 +1132,6 @@ static inline int tcp_paws_check(const struct tcp_options_received *rx_opt, int
         return 1;
  }
  
-static inline void tcp_v4_setup_caps(struct sock *sk, struct dst_entry *dst)
-{
-       sk->sk_route_caps = dst->dev->features;
-       if (sk->sk_route_caps & NETIF_F_TSO) {
-               if (sock_flag(sk, SOCK_NO_LARGESEND) || dst->header_len)
-                       sk->sk_route_caps &= ~NETIF_F_TSO;
-       }
-}
-
  #define TCP_CHECK_TIMER(sk) do { } while (0)
  
  static inline int tcp_use_frto(const struct sock *sk)
@@ -1718,4 +1183,16 @@ struct tcp_iter_state {
  extern int tcp_proc_register(struct tcp_seq_afinfo *afinfo);
  extern void tcp_proc_unregister(struct tcp_seq_afinfo *afinfo);
  
+extern struct request_sock_ops tcp_request_sock_ops;
+
+extern int tcp_v4_destroy_sock(struct sock *sk);
+
+#ifdef CONFIG_PROC_FS
+extern int  tcp4_proc_init(void);
+extern void tcp4_proc_exit(void);
+#endif
+
+extern void tcp_v4_init(struct net_proto_family *ops);
+extern void tcp_init(void);
+
  #endif /* _TCP_H */
diff --git a/include/net/tcp_ecn.h b/include/net/tcp_ecn.h

index 64980ee8c92a9ee77d6c57b1806f7acd290883cf..c6b84397448dac0b810079c96fe1c642773b16fd 100644 (file)
--- a/include/net/tcp_ecn.h
+++ b/include/net/tcp_ecn.h
@@ -88,7 +88,7 @@ static inline void TCP_ECN_check_ce(struct tcp_sock *tp, struct sk_buff *skb)
                  * it is surely retransmit. It is not in ECN RFC,
                  * but Linux follows this rule. */
                 else if (INET_ECN_is_not_ect((TCP_SKB_CB(skb)->flags)))
-                       tcp_enter_quickack_mode(tp);
+                       tcp_enter_quickack_mode((struct sock *)tp);
         }
  }
  
diff --git a/include/net/tcp_states.h b/include/net/tcp_states.h

new file mode 100644 (file)

index 0000000..b9d4176
--- /dev/null
+++ b/include/net/tcp_states.h
@@ -0,0 +1,34 @@
+/*
+ * INET                An implementation of the TCP/IP protocol suite for the LINUX
+ *             operating system.  INET is implemented using the  BSD Socket
+ *             interface as the means of communication with the user level.
+ *
+ *             Definitions for the TCP protocol sk_state field.
+ *
+ *             This program is free software; you can redistribute it and/or
+ *             modify it under the terms of the GNU General Public License
+ *             as published by the Free Software Foundation; either version
+ *             2 of the License, or (at your option) any later version.
+ */
+#ifndef _LINUX_TCP_STATES_H
+#define _LINUX_TCP_STATES_H
+
+enum {
+       TCP_ESTABLISHED = 1,
+       TCP_SYN_SENT,
+       TCP_SYN_RECV,
+       TCP_FIN_WAIT1,
+       TCP_FIN_WAIT2,
+       TCP_TIME_WAIT,
+       TCP_CLOSE,
+       TCP_CLOSE_WAIT,
+       TCP_LAST_ACK,
+       TCP_LISTEN,
+       TCP_CLOSING,    /* Now a valid state */
+
+       TCP_MAX_STATES  /* Leave at the end! */
+};
+
+#define TCP_STATE_MASK 0xF
+
+#endif /* _LINUX_TCP_STATES_H */
diff --git a/include/net/udp.h b/include/net/udp.h

index ac229b761dbc488b24fb031b1acf51192c6bb927..107b9d791a1f1d31fc478bc31b7fedbe25c2ba22 100644 (file)
--- a/include/net/udp.h
+++ b/include/net/udp.h
@@ -94,6 +94,11 @@ struct udp_iter_state {
         struct seq_operations   seq_ops;
  };
  
+#ifdef CONFIG_PROC_FS
  extern int udp_proc_register(struct udp_seq_afinfo *afinfo);
  extern void udp_proc_unregister(struct udp_seq_afinfo *afinfo);
+
+extern int  udp4_proc_init(void);
+extern void udp4_proc_exit(void);
+#endif
  #endif /* _UDP_H */
diff --git a/include/net/x25.h b/include/net/x25.h

index 8b39b98876e89bc4dd2fdf89be43c5d608d5bcee..fee62ff8c1946c78744f7ab880fa67315ad6cd5e 100644 (file)
--- a/include/net/x25.h
+++ b/include/net/x25.h
@@ -175,7 +175,7 @@ extern void x25_kill_by_neigh(struct x25_neigh *);
  
  /* x25_dev.c */
  extern void x25_send_frame(struct sk_buff *, struct x25_neigh *);
-extern int  x25_lapb_receive_frame(struct sk_buff *, struct net_device *, struct packet_type *);
+extern int  x25_lapb_receive_frame(struct sk_buff *, struct net_device *, struct packet_type *, struct net_device *);
  extern void x25_establish_link(struct x25_neigh *);
  extern void x25_terminate_link(struct x25_neigh *);
  
diff --git a/include/net/x25device.h b/include/net/x25device.h

index d45ae883bd1dabebf1e304bf29e6e5c382267a5f..1a318374faefc42e853ab4692b9aeb046aebda46 100644 (file)
--- a/include/net/x25device.h
+++ b/include/net/x25device.h
@@ -8,7 +8,6 @@
  static inline __be16 x25_type_trans(struct sk_buff *skb, struct net_device *dev)
  {
         skb->mac.raw = skb->data;
-       skb->input_dev = skb->dev = dev;
         skb->pkt_type = PACKET_HOST;
         
         return htons(ETH_P_X25);
diff --git a/include/net/xfrm.h b/include/net/xfrm.h

index 868ef88ef9711c0fd3065deb9780537a301283ed..a9d0d8c5dfbffa04c1ca67ccb35e464940301833 100644 (file)
--- a/include/net/xfrm.h
+++ b/include/net/xfrm.h
@@ -818,7 +818,6 @@ extern void xfrm6_init(void);
  extern void xfrm6_fini(void);
  extern void xfrm_state_init(void);
  extern void xfrm4_state_init(void);
-extern void xfrm4_state_fini(void);
  extern void xfrm6_state_init(void);
  extern void xfrm6_state_fini(void);
  
diff --git a/drivers/infiniband/include/ib_cache.h b/include/rdma/ib_cache.h

similarity index 96%

rename from drivers/infiniband/include/ib_cache.h

rename to include/rdma/ib_cache.h

index 44ef6bb9b9df49ec95cc9c0a73eb4429fb975983..5bf9834f7dcae588f126a8800f6685654e1bb881 100644 (file)
--- a/drivers/infiniband/include/ib_cache.h
+++ b/include/rdma/ib_cache.h
@@ -1,5 +1,7 @@
  /*
   * Copyright (c) 2004 Topspin Communications.  All rights reserved.
+ * Copyright (c) 2005 Intel Corporation. All rights reserved.
+ * Copyright (c) 2005 Sun Microsystems, Inc. All rights reserved.
   *
   * This software is available to you under a choice of one of two
   * licenses.  You may choose to be licensed under the terms of the GNU
@@ -35,7 +37,7 @@
  #ifndef _IB_CACHE_H
  #define _IB_CACHE_H
  
-#include <ib_verbs.h>
+#include <rdma/ib_verbs.h>
  
  /**
   * ib_get_cached_gid - Returns a cached GID table entry
diff --git a/drivers/infiniband/include/ib_cm.h b/include/rdma/ib_cm.h

similarity index 89%

rename from drivers/infiniband/include/ib_cm.h

rename to include/rdma/ib_cm.h

index e5d74a730a705e4f9a6a41cb32de4e14856264c2..77fe9039209b1612590fcf7923fc1faaf8b1c8a7 100644 (file)
--- a/drivers/infiniband/include/ib_cm.h
+++ b/include/rdma/ib_cm.h
@@ -37,8 +37,8 @@
  #if !defined(IB_CM_H)
  #define IB_CM_H
  
-#include <ib_mad.h>
-#include <ib_sa.h>
+#include <rdma/ib_mad.h>
+#include <rdma/ib_sa.h>
  
  enum ib_cm_state {
         IB_CM_IDLE,
@@ -115,7 +115,7 @@ struct ib_cm_req_event_param {
         struct ib_sa_path_rec   *primary_path;
         struct ib_sa_path_rec   *alternate_path;
  
-       u64                     remote_ca_guid;
+       __be64                  remote_ca_guid;
         u32                     remote_qkey;
         u32                     remote_qpn;
         enum ib_qp_type         qp_type;
@@ -132,7 +132,7 @@ struct ib_cm_req_event_param {
  };
  
  struct ib_cm_rep_event_param {
-       u64                     remote_ca_guid;
+       __be64                  remote_ca_guid;
         u32                     remote_qkey;
         u32                     remote_qpn;
         u32                     starting_psn;
@@ -146,38 +146,39 @@ struct ib_cm_rep_event_param {
  };
  
  enum ib_cm_rej_reason {
-       IB_CM_REJ_NO_QP                         = __constant_htons(1),
-       IB_CM_REJ_NO_EEC                        = __constant_htons(2),
-       IB_CM_REJ_NO_RESOURCES                  = __constant_htons(3),
-       IB_CM_REJ_TIMEOUT                       = __constant_htons(4),
-       IB_CM_REJ_UNSUPPORTED                   = __constant_htons(5),
-       IB_CM_REJ_INVALID_COMM_ID               = __constant_htons(6),
-       IB_CM_REJ_INVALID_COMM_INSTANCE         = __constant_htons(7),
-       IB_CM_REJ_INVALID_SERVICE_ID            = __constant_htons(8),
-       IB_CM_REJ_INVALID_TRANSPORT_TYPE        = __constant_htons(9),
-       IB_CM_REJ_STALE_CONN                    = __constant_htons(10),
-       IB_CM_REJ_RDC_NOT_EXIST                 = __constant_htons(11),
-       IB_CM_REJ_INVALID_GID                   = __constant_htons(12),
-       IB_CM_REJ_INVALID_LID                   = __constant_htons(13),
-       IB_CM_REJ_INVALID_SL                    = __constant_htons(14),
-       IB_CM_REJ_INVALID_TRAFFIC_CLASS         = __constant_htons(15),
-       IB_CM_REJ_INVALID_HOP_LIMIT             = __constant_htons(16),
-       IB_CM_REJ_INVALID_PACKET_RATE           = __constant_htons(17),
-       IB_CM_REJ_INVALID_ALT_GID               = __constant_htons(18),
-       IB_CM_REJ_INVALID_ALT_LID               = __constant_htons(19),
-       IB_CM_REJ_INVALID_ALT_SL                = __constant_htons(20),
-       IB_CM_REJ_INVALID_ALT_TRAFFIC_CLASS     = __constant_htons(21),
-       IB_CM_REJ_INVALID_ALT_HOP_LIMIT         = __constant_htons(22),
-       IB_CM_REJ_INVALID_ALT_PACKET_RATE       = __constant_htons(23),
-       IB_CM_REJ_PORT_REDIRECT                 = __constant_htons(24),
-       IB_CM_REJ_INVALID_MTU                   = __constant_htons(26),
-       IB_CM_REJ_INSUFFICIENT_RESP_RESOURCES   = __constant_htons(27),
-       IB_CM_REJ_CONSUMER_DEFINED              = __constant_htons(28),
-       IB_CM_REJ_INVALID_RNR_RETRY             = __constant_htons(29),
-       IB_CM_REJ_DUPLICATE_LOCAL_COMM_ID       = __constant_htons(30),
-       IB_CM_REJ_INVALID_CLASS_VERSION         = __constant_htons(31),
-       IB_CM_REJ_INVALID_FLOW_LABEL            = __constant_htons(32),
-       IB_CM_REJ_INVALID_ALT_FLOW_LABEL        = __constant_htons(33)
+       IB_CM_REJ_NO_QP                         = 1,
+       IB_CM_REJ_NO_EEC                        = 2,
+       IB_CM_REJ_NO_RESOURCES                  = 3,
+       IB_CM_REJ_TIMEOUT                       = 4,
+       IB_CM_REJ_UNSUPPORTED                   = 5,
+       IB_CM_REJ_INVALID_COMM_ID               = 6,
+       IB_CM_REJ_INVALID_COMM_INSTANCE         = 7,
+       IB_CM_REJ_INVALID_SERVICE_ID            = 8,
+       IB_CM_REJ_INVALID_TRANSPORT_TYPE        = 9,
+       IB_CM_REJ_STALE_CONN                    = 10,
+       IB_CM_REJ_RDC_NOT_EXIST                 = 11,
+       IB_CM_REJ_INVALID_GID                   = 12,
+       IB_CM_REJ_INVALID_LID                   = 13,
+       IB_CM_REJ_INVALID_SL                    = 14,
+       IB_CM_REJ_INVALID_TRAFFIC_CLASS         = 15,
+       IB_CM_REJ_INVALID_HOP_LIMIT             = 16,
+       IB_CM_REJ_INVALID_PACKET_RATE           = 17,
+       IB_CM_REJ_INVALID_ALT_GID               = 18,
+       IB_CM_REJ_INVALID_ALT_LID               = 19,
+       IB_CM_REJ_INVALID_ALT_SL                = 20,
+       IB_CM_REJ_INVALID_ALT_TRAFFIC_CLASS     = 21,
+       IB_CM_REJ_INVALID_ALT_HOP_LIMIT         = 22,
+       IB_CM_REJ_INVALID_ALT_PACKET_RATE       = 23,
+       IB_CM_REJ_PORT_CM_REDIRECT              = 24,
+       IB_CM_REJ_PORT_REDIRECT                 = 25,
+       IB_CM_REJ_INVALID_MTU                   = 26,
+       IB_CM_REJ_INSUFFICIENT_RESP_RESOURCES   = 27,
+       IB_CM_REJ_CONSUMER_DEFINED              = 28,
+       IB_CM_REJ_INVALID_RNR_RETRY             = 29,
+       IB_CM_REJ_DUPLICATE_LOCAL_COMM_ID       = 30,
+       IB_CM_REJ_INVALID_CLASS_VERSION         = 31,
+       IB_CM_REJ_INVALID_FLOW_LABEL            = 32,
+       IB_CM_REJ_INVALID_ALT_FLOW_LABEL        = 33
  };
  
  struct ib_cm_rej_event_param {
@@ -221,8 +222,7 @@ struct ib_cm_sidr_req_event_param {
         struct ib_cm_id         *listen_id;
         struct ib_device        *device;
         u8                      port;
-
-       u16     pkey;
+       u16                     pkey;
  };
  
  enum ib_cm_sidr_status {
@@ -284,12 +284,12 @@ typedef int (*ib_cm_handler)(struct ib_cm_id *cm_id,
  struct ib_cm_id {
         ib_cm_handler           cm_handler;
         void                    *context;
-       u64                     service_id;
-       u64                     service_mask;
+       __be64                  service_id;
+       __be64                  service_mask;
         enum ib_cm_state        state;          /* internal CM/debug use */
         enum ib_cm_lap_state    lap_state;      /* internal CM/debug use */
-       u32                     local_id;
-       u32                     remote_id;
+       __be32                  local_id;
+       __be32                  remote_id;
  };
  
  /**
@@ -329,13 +329,13 @@ void ib_destroy_cm_id(struct ib_cm_id *cm_id);
   *   IB_CM_ASSIGN_SERVICE_ID.
   */
  int ib_cm_listen(struct ib_cm_id *cm_id,
-                u64 service_id,
-                u64 service_mask);
+                __be64 service_id,
+                __be64 service_mask);
  
  struct ib_cm_req_param {
         struct ib_sa_path_rec   *primary_path;
         struct ib_sa_path_rec   *alternate_path;
-       u64                     service_id;
+       __be64                  service_id;
         u32                     qp_num;
         enum ib_qp_type         qp_type;
         u32                     starting_psn;
@@ -527,7 +527,7 @@ int ib_send_cm_apr(struct ib_cm_id *cm_id,
  
  struct ib_cm_sidr_req_param {
         struct ib_sa_path_rec   *path;
-       u64                     service_id;
+       __be64                  service_id;
         int                     timeout_ms;
         const void              *private_data;
         u8                      private_data_len;
diff --git a/drivers/infiniband/include/ib_fmr_pool.h b/include/rdma/ib_fmr_pool.h

similarity index 99%

rename from drivers/infiniband/include/ib_fmr_pool.h

rename to include/rdma/ib_fmr_pool.h

index 6c9e24d6e1448968e583577ede6972f982a2f46e..86b7e93f198b429959c70dbf20d5782bb24a351a 100644 (file)
--- a/drivers/infiniband/include/ib_fmr_pool.h
+++ b/include/rdma/ib_fmr_pool.h
@@ -36,7 +36,7 @@
  #if !defined(IB_FMR_POOL_H)
  #define IB_FMR_POOL_H
  
-#include <ib_verbs.h>
+#include <rdma/ib_verbs.h>
  
  struct ib_fmr_pool;
  
diff --git a/drivers/infiniband/include/ib_mad.h b/include/rdma/ib_mad.h

similarity index 98%

rename from drivers/infiniband/include/ib_mad.h

rename to include/rdma/ib_mad.h

index 491b6f25b3b854afa6ac796d6f1042e4d7778b03..fc6b1c18ffc698fae47992646c2abed213a9ac3a 100644 (file)
--- a/drivers/infiniband/include/ib_mad.h
+++ b/include/rdma/ib_mad.h
@@ -41,7 +41,7 @@
  
  #include <linux/pci.h>
  
-#include <ib_verbs.h>
+#include <rdma/ib_verbs.h>
  
  /* Management base version */
  #define IB_MGMT_BASE_VERSION                   1
@@ -90,6 +90,7 @@
  
  #define        IB_MGMT_RMPP_STATUS_SUCCESS             0
  #define        IB_MGMT_RMPP_STATUS_RESX                1
+#define        IB_MGMT_RMPP_STATUS_ABORT_MIN           118
  #define        IB_MGMT_RMPP_STATUS_T2L                 118
  #define        IB_MGMT_RMPP_STATUS_BAD_LEN             119
  #define        IB_MGMT_RMPP_STATUS_BAD_SEG             120
@@ -100,6 +101,7 @@
  #define        IB_MGMT_RMPP_STATUS_UNV                 125
  #define        IB_MGMT_RMPP_STATUS_TMR                 126
  #define        IB_MGMT_RMPP_STATUS_UNSPEC              127
+#define        IB_MGMT_RMPP_STATUS_ABORT_MAX           127
  
  #define IB_QP0         0
  #define IB_QP1         __constant_htonl(1)
@@ -111,12 +113,12 @@ struct ib_mad_hdr {
         u8      mgmt_class;
         u8      class_version;
         u8      method;
-       u16     status;
-       u16     class_specific;
-       u64     tid;
-       u16     attr_id;
-       u16     resv;
-       u32     attr_mod;
+       __be16  status;
+       __be16  class_specific;
+       __be64  tid;
+       __be16  attr_id;
+       __be16  resv;
+       __be32  attr_mod;
  };
  
  struct ib_rmpp_hdr {
@@ -124,8 +126,8 @@ struct ib_rmpp_hdr {
         u8      rmpp_type;
         u8      rmpp_rtime_flags;
         u8      rmpp_status;
-       u32     seg_num;
-       u32     paylen_newwin;
+       __be32  seg_num;
+       __be32  paylen_newwin;
  };
  
  typedef u64 __bitwise ib_sa_comp_mask;
@@ -139,9 +141,9 @@ typedef u64 __bitwise ib_sa_comp_mask;
   * the wire so we can't change the layout)
   */
  struct ib_sa_hdr {
-       u64                     sm_key;
-       u16                     attr_offset;
-       u16                     reserved;
+       __be64                  sm_key;
+       __be16                  attr_offset;
+       __be16                  reserved;
         ib_sa_comp_mask         comp_mask;
  } __attribute__ ((packed));
  
diff --git a/drivers/infiniband/include/ib_pack.h b/include/rdma/ib_pack.h

similarity index 99%

rename from drivers/infiniband/include/ib_pack.h

rename to include/rdma/ib_pack.h

index fe480f3e8654d8afb1fee3d5a158e9860fc60f1b..f926020d63314dc2a5a43fd047ea4ef95d43bc36 100644 (file)
--- a/drivers/infiniband/include/ib_pack.h
+++ b/include/rdma/ib_pack.h
@@ -35,7 +35,7 @@
  #ifndef IB_PACK_H
  #define IB_PACK_H
  
-#include <ib_verbs.h>
+#include <rdma/ib_verbs.h>
  
  enum {
         IB_LRH_BYTES  = 8,
diff --git a/drivers/infiniband/include/ib_sa.h b/include/rdma/ib_sa.h

similarity index 98%

rename from drivers/infiniband/include/ib_sa.h

rename to include/rdma/ib_sa.h

index 6d999f7b5d93fd5146ceb02b4988f0cc00433e5e..c022edfc49da013844509e4b4ff22368449cae9c 100644 (file)
--- a/drivers/infiniband/include/ib_sa.h
+++ b/include/rdma/ib_sa.h
@@ -38,8 +38,8 @@
  
  #include <linux/compiler.h>
  
-#include <ib_verbs.h>
-#include <ib_mad.h>
+#include <rdma/ib_verbs.h>
+#include <rdma/ib_mad.h>
  
  enum {
         IB_SA_CLASS_VERSION             = 2,    /* IB spec version 1.1/1.2 */
@@ -133,16 +133,16 @@ struct ib_sa_path_rec {
         /* reserved */
         union ib_gid dgid;
         union ib_gid sgid;
-       u16          dlid;
-       u16          slid;
+       __be16       dlid;
+       __be16       slid;
         int          raw_traffic;
         /* reserved */
-       u32          flow_label;
+       __be32       flow_label;
         u8           hop_limit;
         u8           traffic_class;
         int          reversible;
         u8           numb_path;
-       u16          pkey;
+       __be16       pkey;
         /* reserved */
         u8           sl;
         u8           mtu_selector;
@@ -176,18 +176,18 @@ struct ib_sa_path_rec {
  struct ib_sa_mcmember_rec {
         union ib_gid mgid;
         union ib_gid port_gid;
-       u32          qkey;
-       u16          mlid;
+       __be32       qkey;
+       __be16       mlid;
         u8           mtu_selector;
         u8           mtu;
         u8           traffic_class;
-       u16          pkey;
+       __be16       pkey;
         u8           rate_selector;
         u8           rate;
         u8           packet_life_time_selector;
         u8           packet_life_time;
         u8           sl;
-       u32          flow_label;
+       __be32       flow_label;
         u8           hop_limit;
         u8           scope;
         u8           join_state;
@@ -238,7 +238,7 @@ struct ib_sa_mcmember_rec {
  struct ib_sa_service_rec {
         u64             id;
         union ib_gid    gid;
-       u16             pkey;
+       __be16          pkey;
         /* reserved */
         u32             lease;
         u8              key[16];
diff --git a/drivers/infiniband/include/ib_smi.h b/include/rdma/ib_smi.h

similarity index 95%

rename from drivers/infiniband/include/ib_smi.h

rename to include/rdma/ib_smi.h

index ca82165149639e1d4cba6591f63a2d33e90dcfb0..87f60737f69575e2b86426f3cbaa592638dca151 100644 (file)
--- a/drivers/infiniband/include/ib_smi.h
+++ b/include/rdma/ib_smi.h
@@ -39,9 +39,7 @@
  #if !defined( IB_SMI_H )
  #define IB_SMI_H
  
-#include <ib_mad.h>
-
-#define IB_LID_PERMISSIVE                      0xFFFF
+#include <rdma/ib_mad.h>
  
  #define IB_SMP_DATA_SIZE                       64
  #define IB_SMP_MAX_PATH_HOPS                   64
@@ -51,16 +49,16 @@ struct ib_smp {
         u8      mgmt_class;
         u8      class_version;
         u8      method;
-       u16     status;
+       __be16  status;
         u8      hop_ptr;
         u8      hop_cnt;
-       u64     tid;
-       u16     attr_id;
-       u16     resv;
-       u32     attr_mod;
-       u64     mkey;
-       u16     dr_slid;
-       u16     dr_dlid;
+       __be64  tid;
+       __be16  attr_id;
+       __be16  resv;
+       __be32  attr_mod;
+       __be64  mkey;
+       __be16  dr_slid;
+       __be16  dr_dlid;
         u8      reserved[28];
         u8      data[IB_SMP_DATA_SIZE];
         u8      initial_path[IB_SMP_MAX_PATH_HOPS];
diff --git a/drivers/infiniband/include/ib_user_cm.h b/include/rdma/ib_user_cm.h

similarity index 95%

rename from drivers/infiniband/include/ib_user_cm.h

rename to include/rdma/ib_user_cm.h

index 500b1af6ff773e00b78f7c3f765f8b94084d7fee..72182d16778b5dabd3a64157b17e8eb9cb0d267f 100644 (file)
--- a/drivers/infiniband/include/ib_user_cm.h
+++ b/include/rdma/ib_user_cm.h
@@ -88,15 +88,15 @@ struct ib_ucm_attr_id {
  };
  
  struct ib_ucm_attr_id_resp {
-       __u64 service_id;
-       __u64 service_mask;
-       __u32 local_id;
-       __u32 remote_id;
+       __be64 service_id;
+       __be64 service_mask;
+       __be32 local_id;
+       __be32 remote_id;
  };
  
  struct ib_ucm_listen {
-       __u64 service_id;
-       __u64 service_mask;
+       __be64 service_id;
+       __be64 service_mask;
         __u32 id;
  };
  
@@ -114,13 +114,13 @@ struct ib_ucm_private_data {
  struct ib_ucm_path_rec {
         __u8  dgid[16];
         __u8  sgid[16];
-       __u16 dlid;
-       __u16 slid;
+       __be16 dlid;
+       __be16 slid;
         __u32 raw_traffic;
-       __u32 flow_label;
+       __be32 flow_label;
         __u32 reversible;
         __u32 mtu;
-       __u16 pkey;
+       __be16 pkey;
         __u8  hop_limit;
         __u8  traffic_class;
         __u8  numb_path;
@@ -138,7 +138,7 @@ struct ib_ucm_req {
         __u32 qpn;
         __u32 qp_type;
         __u32 psn;
-       __u64 sid;
+       __be64 sid;
         __u64 data;
         __u64 primary_path;
         __u64 alternate_path;
@@ -200,7 +200,7 @@ struct ib_ucm_lap {
  struct ib_ucm_sidr_req {
         __u32 id;
         __u32 timeout;
-       __u64 sid;
+       __be64 sid;
         __u64 data;
         __u64 path;
         __u16 pkey;
@@ -237,7 +237,7 @@ struct ib_ucm_req_event_resp {
         /* port */
         struct ib_ucm_path_rec primary_path;
         struct ib_ucm_path_rec alternate_path;
-       __u64                  remote_ca_guid;
+       __be64                 remote_ca_guid;
         __u32                  remote_qkey;
         __u32                  remote_qpn;
         __u32                  qp_type;
@@ -253,7 +253,7 @@ struct ib_ucm_req_event_resp {
  };
  
  struct ib_ucm_rep_event_resp {
-       __u64 remote_ca_guid;
+       __be64 remote_ca_guid;
         __u32 remote_qkey;
         __u32 remote_qpn;
         __u32 starting_psn;
diff --git a/drivers/infiniband/include/ib_user_mad.h b/include/rdma/ib_user_mad.h

similarity index 97%

rename from drivers/infiniband/include/ib_user_mad.h

rename to include/rdma/ib_user_mad.h

index a9a56b50aacc229d3529274769e0fa17ec4e75d4..44537aa32e62319072c104058dfe020682576894 100644 (file)
--- a/drivers/infiniband/include/ib_user_mad.h
+++ b/include/rdma/ib_user_mad.h
@@ -70,8 +70,6 @@
   * @traffic_class - Traffic class in GRH
   * @gid - Remote GID in GRH
   * @flow_label - Flow label in GRH
- *
- * All multi-byte quantities are stored in network (big endian) byte order.
   */
  struct ib_user_mad_hdr {
         __u32   id;
@@ -79,9 +77,9 @@ struct ib_user_mad_hdr {
         __u32   timeout_ms;
         __u32   retries;
         __u32   length;
-       __u32   qpn;
-       __u32   qkey;
-       __u16   lid;
+       __be32  qpn;
+       __be32  qkey;
+       __be16  lid;
         __u8    sl;
         __u8    path_bits;
         __u8    grh_present;
@@ -89,7 +87,7 @@ struct ib_user_mad_hdr {
         __u8    hop_limit;
         __u8    traffic_class;
         __u8    gid[16];
-       __u32   flow_label;
+       __be32  flow_label;
  };
  
  /**
diff --git a/drivers/infiniband/include/ib_user_verbs.h b/include/rdma/ib_user_verbs.h

similarity index 92%

rename from drivers/infiniband/include/ib_user_verbs.h

rename to include/rdma/ib_user_verbs.h

index 7c613706af725f76c08c24903ec7aa1b6ebb7761..7ebb01c8f99638597123d4f4185ebd14be9ab1fa 100644 (file)
--- a/drivers/infiniband/include/ib_user_verbs.h
+++ b/include/rdma/ib_user_verbs.h
@@ -78,7 +78,12 @@ enum {
         IB_USER_VERBS_CMD_POST_SEND,
         IB_USER_VERBS_CMD_POST_RECV,
         IB_USER_VERBS_CMD_ATTACH_MCAST,
-       IB_USER_VERBS_CMD_DETACH_MCAST
+       IB_USER_VERBS_CMD_DETACH_MCAST,
+       IB_USER_VERBS_CMD_CREATE_SRQ,
+       IB_USER_VERBS_CMD_MODIFY_SRQ,
+       IB_USER_VERBS_CMD_QUERY_SRQ,
+       IB_USER_VERBS_CMD_DESTROY_SRQ,
+       IB_USER_VERBS_CMD_POST_SRQ_RECV
  };
  
  /*
@@ -143,8 +148,8 @@ struct ib_uverbs_query_device {
  
  struct ib_uverbs_query_device_resp {
         __u64 fw_ver;
-       __u64 node_guid;
-       __u64 sys_image_guid;
+       __be64 node_guid;
+       __be64 sys_image_guid;
         __u64 max_mr_size;
         __u64 page_size_cap;
         __u32 vendor_id;
@@ -386,4 +391,32 @@ struct ib_uverbs_detach_mcast {
         __u64 driver_data[0];
  };
  
+struct ib_uverbs_create_srq {
+       __u64 response;
+       __u64 user_handle;
+       __u32 pd_handle;
+       __u32 max_wr;
+       __u32 max_sge;
+       __u32 srq_limit;
+       __u64 driver_data[0];
+};
+
+struct ib_uverbs_create_srq_resp {
+       __u32 srq_handle;
+};
+
+struct ib_uverbs_modify_srq {
+       __u32 srq_handle;
+       __u32 attr_mask;
+       __u32 max_wr;
+       __u32 max_sge;
+       __u32 srq_limit;
+       __u32 reserved;
+       __u64 driver_data[0];
+};
+
+struct ib_uverbs_destroy_srq {
+       __u32 srq_handle;
+};
+
  #endif /* IB_USER_VERBS_H */
diff --git a/drivers/infiniband/include/ib_verbs.h b/include/rdma/ib_verbs.h

similarity index 91%

rename from drivers/infiniband/include/ib_verbs.h

rename to include/rdma/ib_verbs.h

index 5d24edaa66e6381b29b32ee4f53995be5f58bdd0..e16cf94870f263936fdcbef502739791a709f11d 100644 (file)
--- a/drivers/infiniband/include/ib_verbs.h
+++ b/include/rdma/ib_verbs.h
@@ -4,6 +4,7 @@
   * Copyright (c) 2004 Intel Corporation.  All rights reserved.
   * Copyright (c) 2004 Topspin Corporation.  All rights reserved.
   * Copyright (c) 2004 Voltaire Corporation.  All rights reserved.
+ * Copyright (c) 2005 Sun Microsystems, Inc. All rights reserved.
   * Copyright (c) 2005 Cisco Systems.  All rights reserved.
   *
   * This software is available to you under a choice of one of two
@@ -50,8 +51,8 @@
  union ib_gid {
         u8      raw[16];
         struct {
-               u64     subnet_prefix;
-               u64     interface_id;
+               __be64  subnet_prefix;
+               __be64  interface_id;
         } global;
  };
  
@@ -87,8 +88,8 @@ enum ib_atomic_cap {
  
  struct ib_device_attr {
         u64                     fw_ver;
-       u64                     node_guid;
-       u64                     sys_image_guid;
+       __be64                  node_guid;
+       __be64                  sys_image_guid;
         u64                     max_mr_size;
         u64                     page_size_cap;
         u32                     vendor_id;
@@ -255,7 +256,10 @@ enum ib_event_type {
         IB_EVENT_PORT_ERR,
         IB_EVENT_LID_CHANGE,
         IB_EVENT_PKEY_CHANGE,
-       IB_EVENT_SM_CHANGE
+       IB_EVENT_SM_CHANGE,
+       IB_EVENT_SRQ_ERR,
+       IB_EVENT_SRQ_LIMIT_REACHED,
+       IB_EVENT_QP_LAST_WQE_REACHED
  };
  
  struct ib_event {
@@ -263,6 +267,7 @@ struct ib_event {
         union {
                 struct ib_cq    *cq;
                 struct ib_qp    *qp;
+               struct ib_srq   *srq;
                 u8              port_num;
         } element;
         enum ib_event_type      event;
@@ -290,8 +295,8 @@ struct ib_global_route {
  };
  
  struct ib_grh {
-       u32             version_tclass_flow;
-       u16             paylen;
+       __be32          version_tclass_flow;
+       __be16          paylen;
         u8              next_hdr;
         u8              hop_limit;
         union ib_gid    sgid;
@@ -302,6 +307,8 @@ enum {
         IB_MULTICAST_QPN = 0xffffff
  };
  
+#define IB_LID_PERMISSIVE      __constant_htons(0xFFFF)
+
  enum ib_ah_flags {
         IB_AH_GRH       = 1
  };
@@ -383,6 +390,23 @@ enum ib_cq_notify {
         IB_CQ_NEXT_COMP
  };
  
+enum ib_srq_attr_mask {
+       IB_SRQ_MAX_WR   = 1 << 0,
+       IB_SRQ_LIMIT    = 1 << 1,
+};
+
+struct ib_srq_attr {
+       u32     max_wr;
+       u32     max_sge;
+       u32     srq_limit;
+};
+
+struct ib_srq_init_attr {
+       void                  (*event_handler)(struct ib_event *, void *);
+       void                   *srq_context;
+       struct ib_srq_attr      attr;
+};
+
  struct ib_qp_cap {
         u32     max_send_wr;
         u32     max_recv_wr;
@@ -710,10 +734,11 @@ struct ib_cq {
  };
  
  struct ib_srq {
-       struct ib_device        *device;
-       struct ib_uobject       *uobject;
-       struct ib_pd            *pd;
-       void                    *srq_context;
+       struct ib_device       *device;
+       struct ib_pd           *pd;
+       struct ib_uobject      *uobject;
+       void                  (*event_handler)(struct ib_event *, void *);
+       void                   *srq_context;
         atomic_t                usecnt;
  };
  
@@ -827,6 +852,18 @@ struct ib_device {
         int                        (*query_ah)(struct ib_ah *ah,
                                                struct ib_ah_attr *ah_attr);
         int                        (*destroy_ah)(struct ib_ah *ah);
+       struct ib_srq *            (*create_srq)(struct ib_pd *pd,
+                                                struct ib_srq_init_attr *srq_init_attr,
+                                                struct ib_udata *udata);
+       int                        (*modify_srq)(struct ib_srq *srq,
+                                                struct ib_srq_attr *srq_attr,
+                                                enum ib_srq_attr_mask srq_attr_mask);
+       int                        (*query_srq)(struct ib_srq *srq,
+                                               struct ib_srq_attr *srq_attr);
+       int                        (*destroy_srq)(struct ib_srq *srq);
+       int                        (*post_srq_recv)(struct ib_srq *srq,
+                                                   struct ib_recv_wr *recv_wr,
+                                                   struct ib_recv_wr **bad_recv_wr);
         struct ib_qp *             (*create_qp)(struct ib_pd *pd,
                                                 struct ib_qp_init_attr *qp_init_attr,
                                                 struct ib_udata *udata);
@@ -1038,6 +1075,65 @@ int ib_query_ah(struct ib_ah *ah, struct ib_ah_attr *ah_attr);
   */
  int ib_destroy_ah(struct ib_ah *ah);
  
+/**
+ * ib_create_srq - Creates a SRQ associated with the specified protection
+ *   domain.
+ * @pd: The protection domain associated with the SRQ.
+ * @srq_init_attr: A list of initial attributes required to create the SRQ.
+ *
+ * srq_attr->max_wr and srq_attr->max_sge are read the determine the
+ * requested size of the SRQ, and set to the actual values allocated
+ * on return.  If ib_create_srq() succeeds, then max_wr and max_sge
+ * will always be at least as large as the requested values.
+ */
+struct ib_srq *ib_create_srq(struct ib_pd *pd,
+                            struct ib_srq_init_attr *srq_init_attr);
+
+/**
+ * ib_modify_srq - Modifies the attributes for the specified SRQ.
+ * @srq: The SRQ to modify.
+ * @srq_attr: On input, specifies the SRQ attributes to modify.  On output,
+ *   the current values of selected SRQ attributes are returned.
+ * @srq_attr_mask: A bit-mask used to specify which attributes of the SRQ
+ *   are being modified.
+ *
+ * The mask may contain IB_SRQ_MAX_WR to resize the SRQ and/or
+ * IB_SRQ_LIMIT to set the SRQ's limit and request notification when
+ * the number of receives queued drops below the limit.
+ */
+int ib_modify_srq(struct ib_srq *srq,
+                 struct ib_srq_attr *srq_attr,
+                 enum ib_srq_attr_mask srq_attr_mask);
+
+/**
+ * ib_query_srq - Returns the attribute list and current values for the
+ *   specified SRQ.
+ * @srq: The SRQ to query.
+ * @srq_attr: The attributes of the specified SRQ.
+ */
+int ib_query_srq(struct ib_srq *srq,
+                struct ib_srq_attr *srq_attr);
+
+/**
+ * ib_destroy_srq - Destroys the specified SRQ.
+ * @srq: The SRQ to destroy.
+ */
+int ib_destroy_srq(struct ib_srq *srq);
+
+/**
+ * ib_post_srq_recv - Posts a list of work requests to the specified SRQ.
+ * @srq: The SRQ to post the work request on.
+ * @recv_wr: A list of work requests to post on the receive queue.
+ * @bad_recv_wr: On an immediate failure, this parameter will reference
+ *   the work request that failed to be posted on the QP.
+ */
+static inline int ib_post_srq_recv(struct ib_srq *srq,
+                                  struct ib_recv_wr *recv_wr,
+                                  struct ib_recv_wr **bad_recv_wr)
+{
+       return srq->device->post_srq_recv(srq, recv_wr, bad_recv_wr);
+}
+
  /**
   * ib_create_qp - Creates a QP associated with the specified protection
   *   domain.
diff --git a/include/scsi/scsi_transport.h b/include/scsi/scsi_transport.h

index a4f1837a33b150b925e471e1c2938d35a6c4eded..f6e0bb484c63efb9194c8be4ce90bc7b5da0d482 100644 (file)
--- a/include/scsi/scsi_transport.h
+++ b/include/scsi/scsi_transport.h
@@ -29,6 +29,14 @@ struct scsi_transport_template {
         struct transport_container target_attrs;
         struct transport_container device_attrs;
  
+       /*
+        * If set, call target_parent prior to allocating a scsi_target,
+        * so we get the appropriate parent for the target. This function
+        * is required for transports like FC and iSCSI that do not put the
+        * scsi_target under scsi_host.
+        */
+       struct device *(*target_parent)(struct Scsi_Host *, int, uint);
+
         /* The size of the specific transport attribute structure (a
          * space of this size will be left at the end of the
          * scsi_* structure */
diff --git a/include/sound/core.h b/include/sound/core.h

index 38b357fc8958e991ef4cb53abdc4e6c01993ba88..f72b3ef515e25e7d8ca1ee64f7f063a7b8d84f9f 100644 (file)
--- a/include/sound/core.h
+++ b/include/sound/core.h
@@ -360,11 +360,13 @@ int snd_device_free_all(snd_card_t *card, snd_device_cmd_t cmd);
  
  /* isadma.c */
  
+#ifdef CONFIG_ISA_DMA_API
  #define DMA_MODE_NO_ENABLE     0x0100
  
  void snd_dma_program(unsigned long dma, unsigned long addr, unsigned int size, unsigned short mode);
  void snd_dma_disable(unsigned long dma);
  unsigned int snd_dma_pointer(unsigned long dma, unsigned int size);
+#endif
  
  /* misc.c */
  
diff --git a/init/main.c b/init/main.c

index c9c311cf1771362a349c177e5662140640370e36..ff410063e4e13ca6a296a680a136a15297d079de 100644 (file)
--- a/init/main.c
+++ b/init/main.c
@@ -47,6 +47,7 @@
  #include <linux/rmap.h>
  #include <linux/mempolicy.h>
  #include <linux/key.h>
+#include <net/sock.h>
  
  #include <asm/io.h>
  #include <asm/bugs.h>
@@ -80,7 +81,6 @@
  static int init(void *);
  
  extern void init_IRQ(void);
-extern void sock_init(void);
  extern void fork_init(unsigned long);
  extern void mca_init(void);
  extern void sbus_init(void);
diff --git a/ipc/sem.c b/ipc/sem.c

index 7e8a25c82ef3e550bdd200006d9dd711c41a3db8..70975ce0784a9082207659c31e4e1cd946f32c7b 100644 (file)
--- a/ipc/sem.c
+++ b/ipc/sem.c
@@ -895,7 +895,7 @@ static inline void lock_semundo(void)
         struct sem_undo_list *undo_list;
  
         undo_list = current->sysvsem.undo_list;
-       if ((undo_list != NULL) && (atomic_read(&undo_list->refcnt) != 1))
+       if (undo_list)
                 spin_lock(&undo_list->lock);
  }
  
@@ -915,7 +915,7 @@ static inline void unlock_semundo(void)
         struct sem_undo_list *undo_list;
  
         undo_list = current->sysvsem.undo_list;
-       if ((undo_list != NULL) && (atomic_read(&undo_list->refcnt) != 1))
+       if (undo_list)
                 spin_unlock(&undo_list->lock);
  }
  
@@ -943,9 +943,7 @@ static inline int get_undo_list(struct sem_undo_list **undo_listp)
                 if (undo_list == NULL)
                         return -ENOMEM;
                 memset(undo_list, 0, size);
-               /* don't initialize unodhd->lock here.  It's done
-                * in copy_semundo() instead.
-                */
+               spin_lock_init(&undo_list->lock);
                 atomic_set(&undo_list->refcnt, 1);
                 current->sysvsem.undo_list = undo_list;
         }
@@ -1231,8 +1229,6 @@ int copy_semundo(unsigned long clone_flags, struct task_struct *tsk)
                 error = get_undo_list(&undo_list);
                 if (error)
                         return error;
-               if (atomic_read(&undo_list->refcnt) == 1)
-                       spin_lock_init(&undo_list->lock);
                 atomic_inc(&undo_list->refcnt);
                 tsk->sysvsem.undo_list = undo_list;
         } else 
diff --git a/ipc/shm.c b/ipc/shm.c

index cce022435dbc95ff562b28cd073b2d239206af52..1d6cf08d950b93c9f5223c7ead161b6f08ed9877 100644 (file)
--- a/ipc/shm.c
+++ b/ipc/shm.c
@@ -170,7 +170,7 @@ static struct vm_operations_struct shm_vm_ops = {
         .open   = shm_open,     /* callback for a new vm-area open */
         .close  = shm_close,    /* callback for when the vm-area is released */
         .nopage = shmem_nopage,
-#ifdef CONFIG_NUMA
+#if defined(CONFIG_NUMA) && defined(CONFIG_SHMEM)
         .set_policy = shmem_set_policy,
         .get_policy = shmem_get_policy,
  #endif
diff --git a/kernel/audit.c b/kernel/audit.c

index ef35166fdc29b459e0beb24505df6766972b261b..7f0699790d469ce218ff4ec91967608275871fb7 100644 (file)
--- a/kernel/audit.c
+++ b/kernel/audit.c
@@ -514,7 +514,8 @@ static int __init audit_init(void)
  {
         printk(KERN_INFO "audit: initializing netlink socket (%s)\n",
                audit_default ? "enabled" : "disabled");
-       audit_sock = netlink_kernel_create(NETLINK_AUDIT, audit_receive);
+       audit_sock = netlink_kernel_create(NETLINK_AUDIT, 0, audit_receive,
+                                          THIS_MODULE);
         if (!audit_sock)
                 audit_panic("cannot initialize netlink socket");
  
diff --git a/kernel/cpuset.c b/kernel/cpuset.c

index 805fb9097318b761b60b98e8ad8f11a7d3756fcf..8ab1b4e518b8909a867595534957dbf3c12cf3d8 100644 (file)
--- a/kernel/cpuset.c
+++ b/kernel/cpuset.c
@@ -398,21 +398,31 @@ static int cpuset_path(const struct cpuset *cs, char *buf, int buflen)
   * to continue to serve a useful existence.  Next time it's released,
   * we will get notified again, if it still has 'notify_on_release' set.
   *
- * Note final arg to call_usermodehelper() is 0 - that means
- * don't wait.  Since we are holding the global cpuset_sem here,
- * and we are asking another thread (started from keventd) to rmdir a
- * cpuset, we can't wait - or we'd deadlock with the removing thread
- * on cpuset_sem.
+ * The final arg to call_usermodehelper() is 0, which means don't
+ * wait.  The separate /sbin/cpuset_release_agent task is forked by
+ * call_usermodehelper(), then control in this thread returns here,
+ * without waiting for the release agent task.  We don't bother to
+ * wait because the caller of this routine has no use for the exit
+ * status of the /sbin/cpuset_release_agent task, so no sense holding
+ * our caller up for that.
+ *
+ * The simple act of forking that task might require more memory,
+ * which might need cpuset_sem.  So this routine must be called while
+ * cpuset_sem is not held, to avoid a possible deadlock.  See also
+ * comments for check_for_release(), below.
   */
  
-static int cpuset_release_agent(char *cpuset_str)
+static void cpuset_release_agent(const char *pathbuf)
  {
         char *argv[3], *envp[3];
         int i;
  
+       if (!pathbuf)
+               return;
+
         i = 0;
         argv[i++] = "/sbin/cpuset_release_agent";
-       argv[i++] = cpuset_str;
+       argv[i++] = (char *)pathbuf;
         argv[i] = NULL;
  
         i = 0;
@@ -421,17 +431,29 @@ static int cpuset_release_agent(char *cpuset_str)
         envp[i++] = "PATH=/sbin:/bin:/usr/sbin:/usr/bin";
         envp[i] = NULL;
  
-       return call_usermodehelper(argv[0], argv, envp, 0);
+       call_usermodehelper(argv[0], argv, envp, 0);
+       kfree(pathbuf);
  }
  
  /*
   * Either cs->count of using tasks transitioned to zero, or the
   * cs->children list of child cpusets just became empty.  If this
   * cs is notify_on_release() and now both the user count is zero and
- * the list of children is empty, send notice to user land.
+ * the list of children is empty, prepare cpuset path in a kmalloc'd
+ * buffer, to be returned via ppathbuf, so that the caller can invoke
+ * cpuset_release_agent() with it later on, once cpuset_sem is dropped.
+ * Call here with cpuset_sem held.
+ *
+ * This check_for_release() routine is responsible for kmalloc'ing
+ * pathbuf.  The above cpuset_release_agent() is responsible for
+ * kfree'ing pathbuf.  The caller of these routines is responsible
+ * for providing a pathbuf pointer, initialized to NULL, then
+ * calling check_for_release() with cpuset_sem held and the address
+ * of the pathbuf pointer, then dropping cpuset_sem, then calling
+ * cpuset_release_agent() with pathbuf, as set by check_for_release().
   */
  
-static void check_for_release(struct cpuset *cs)
+static void check_for_release(struct cpuset *cs, char **ppathbuf)
  {
         if (notify_on_release(cs) && atomic_read(&cs->count) == 0 &&
             list_empty(&cs->children)) {
@@ -441,10 +463,9 @@ static void check_for_release(struct cpuset *cs)
                 if (!buf)
                         return;
                 if (cpuset_path(cs, buf, PAGE_SIZE) < 0)
-                       goto out;
-               cpuset_release_agent(buf);
-out:
-               kfree(buf);
+                       kfree(buf);
+               else
+                       *ppathbuf = buf;
         }
  }
  
@@ -606,6 +627,14 @@ static int validate_change(const struct cpuset *cur, const struct cpuset *trial)
   * Call with cpuset_sem held.  May nest a call to the
   * lock_cpu_hotplug()/unlock_cpu_hotplug() pair.
   */
+
+/*
+ * Hack to avoid 2.6.13 partial node dynamic sched domain bug.
+ * Disable letting 'cpu_exclusive' cpusets define dynamic sched
+ * domains, until the sched domain can handle partial nodes.
+ * Remove this #if hackery when sched domains fixed.
+ */
+#if 0
  static void update_cpu_domains(struct cpuset *cur)
  {
         struct cpuset *c, *par = cur->parent;
@@ -646,6 +675,11 @@ static void update_cpu_domains(struct cpuset *cur)
         partition_sched_domains(&pspan, &cspan);
         unlock_cpu_hotplug();
  }
+#else
+static void update_cpu_domains(struct cpuset *cur)
+{
+}
+#endif
  
  static int update_cpumask(struct cpuset *cs, char *buf)
  {
@@ -727,14 +761,14 @@ static int update_flag(cpuset_flagbits_t bit, struct cpuset *cs, char *buf)
         return 0;
  }
  
-static int attach_task(struct cpuset *cs, char *buf)
+static int attach_task(struct cpuset *cs, char *pidbuf, char **ppathbuf)
  {
         pid_t pid;
         struct task_struct *tsk;
         struct cpuset *oldcs;
         cpumask_t cpus;
  
-       if (sscanf(buf, "%d", &pid) != 1)
+       if (sscanf(pidbuf, "%d", &pid) != 1)
                 return -EIO;
         if (cpus_empty(cs->cpus_allowed) || nodes_empty(cs->mems_allowed))
                 return -ENOSPC;
@@ -777,7 +811,7 @@ static int attach_task(struct cpuset *cs, char *buf)
  
         put_task_struct(tsk);
         if (atomic_dec_and_test(&oldcs->count))
-               check_for_release(oldcs);
+               check_for_release(oldcs, ppathbuf);
         return 0;
  }
  
@@ -801,6 +835,7 @@ static ssize_t cpuset_common_file_write(struct file *file, const char __user *us
         struct cftype *cft = __d_cft(file->f_dentry);
         cpuset_filetype_t type = cft->private;
         char *buffer;
+       char *pathbuf = NULL;
         int retval = 0;
  
         /* Crude upper limit on largest legitimate cpulist user might write. */
@@ -841,7 +876,7 @@ static ssize_t cpuset_common_file_write(struct file *file, const char __user *us
                 retval = update_flag(CS_NOTIFY_ON_RELEASE, cs, buffer);
                 break;
         case FILE_TASKLIST:
-               retval = attach_task(cs, buffer);
+               retval = attach_task(cs, buffer, &pathbuf);
                 break;
         default:
                 retval = -EINVAL;
@@ -852,6 +887,7 @@ static ssize_t cpuset_common_file_write(struct file *file, const char __user *us
                 retval = nbytes;
  out2:
         up(&cpuset_sem);
+       cpuset_release_agent(pathbuf);
  out1:
         kfree(buffer);
         return retval;
@@ -1357,6 +1393,7 @@ static int cpuset_rmdir(struct inode *unused_dir, struct dentry *dentry)
         struct cpuset *cs = dentry->d_fsdata;
         struct dentry *d;
         struct cpuset *parent;
+       char *pathbuf = NULL;
  
         /* the vfs holds both inode->i_sem already */
  
@@ -1376,7 +1413,7 @@ static int cpuset_rmdir(struct inode *unused_dir, struct dentry *dentry)
                 update_cpu_domains(cs);
         list_del(&cs->sibling); /* delete my sibling from parent->children */
         if (list_empty(&parent->children))
-               check_for_release(parent);
+               check_for_release(parent, &pathbuf);
         spin_lock(&cs->dentry->d_lock);
         d = dget(cs->dentry);
         cs->dentry = NULL;
@@ -1384,6 +1421,7 @@ static int cpuset_rmdir(struct inode *unused_dir, struct dentry *dentry)
         cpuset_d_remove_dir(d);
         dput(d);
         up(&cpuset_sem);
+       cpuset_release_agent(pathbuf);
         return 0;
  }
  
@@ -1483,10 +1521,13 @@ void cpuset_exit(struct task_struct *tsk)
         task_unlock(tsk);
  
         if (notify_on_release(cs)) {
+               char *pathbuf = NULL;
+
                 down(&cpuset_sem);
                 if (atomic_dec_and_test(&cs->count))
-                       check_for_release(cs);
+                       check_for_release(cs, &pathbuf);
                 up(&cpuset_sem);
+               cpuset_release_agent(pathbuf);
         } else {
                 atomic_dec(&cs->count);
         }
diff --git a/kernel/exit.c b/kernel/exit.c

index 9d1b10ed0135139b8514f829f345c6525699db50..5b0fb9f09f212d05642dd343be2692a2b0a039b7 100644 (file)
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -829,8 +829,10 @@ fastcall NORET_TYPE void do_exit(long code)
         acct_update_integrals(tsk);
         update_mem_hiwater(tsk);
         group_dead = atomic_dec_and_test(&tsk->signal->live);
-       if (group_dead)
+       if (group_dead) {
+               del_timer_sync(&tsk->signal->real_timer);
                 acct_process(code);
+       }
         exit_mm(tsk);
  
         exit_sem(tsk);
diff --git a/kernel/module.c b/kernel/module.c

index 068e271ab3a538761c9129ec4b5fae0fdbe50910..c32995fbd8fd14b608bb49e5b0f5899e41ffd979 100644 (file)
--- a/kernel/module.c
+++ b/kernel/module.c
@@ -250,13 +250,18 @@ static inline unsigned int block_size(int val)
  /* Created by linker magic */
  extern char __per_cpu_start[], __per_cpu_end[];
  
-static void *percpu_modalloc(unsigned long size, unsigned long align)
+static void *percpu_modalloc(unsigned long size, unsigned long align,
+                            const char *name)
  {
         unsigned long extra;
         unsigned int i;
         void *ptr;
  
-       BUG_ON(align > SMP_CACHE_BYTES);
+       if (align > SMP_CACHE_BYTES) {
+               printk(KERN_WARNING "%s: per-cpu alignment %li > %i\n",
+                      name, align, SMP_CACHE_BYTES);
+               align = SMP_CACHE_BYTES;
+       }
  
         ptr = __per_cpu_start;
         for (i = 0; i < pcpu_num_used; ptr += block_size(pcpu_size[i]), i++) {
@@ -348,7 +353,8 @@ static int percpu_modinit(void)
  }      
  __initcall(percpu_modinit);
  #else /* ... !CONFIG_SMP */
-static inline void *percpu_modalloc(unsigned long size, unsigned long align)
+static inline void *percpu_modalloc(unsigned long size, unsigned long align,
+                                   const char *name)
  {
         return NULL;
  }
@@ -1644,7 +1650,8 @@ static struct module *load_module(void __user *umod,
         if (pcpuindex) {
                 /* We have a special allocation for this section. */
                 percpu = percpu_modalloc(sechdrs[pcpuindex].sh_size,
-                                        sechdrs[pcpuindex].sh_addralign);
+                                        sechdrs[pcpuindex].sh_addralign,
+                                        mod->name);
                 if (!percpu) {
                         err = -ENOMEM;
                         goto free_mod;
diff --git a/kernel/posix-timers.c b/kernel/posix-timers.c

index 10b2ad749d146de67f2e210adb5a4fb4846ba8c5..38798a2ff994e96c017010b8c2c62db578fe2002 100644 (file)
--- a/kernel/posix-timers.c
+++ b/kernel/posix-timers.c
@@ -1166,7 +1166,6 @@ void exit_itimers(struct signal_struct *sig)
                 tmr = list_entry(sig->posix_timers.next, struct k_itimer, list);
                 itimer_delete(tmr);
         }
-       del_timer_sync(&sig->real_timer);
  }
  
  /*
diff --git a/kernel/sched.c b/kernel/sched.c

index a646e4f36c4136d49d3ba1444cc22133e21b4d92..5f889d0cbfcc2e614aef87963ec0bea5ea6a5f21 100644 (file)
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -3378,8 +3378,8 @@ EXPORT_SYMBOL(set_user_nice);
   */
  int can_nice(const task_t *p, const int nice)
  {
-       /* convert nice value [19,-20] to rlimit style value [0,39] */
-       int nice_rlim = 19 - nice;
+       /* convert nice value [19,-20] to rlimit style value [1,40] */
+       int nice_rlim = 20 - nice;
         return (nice_rlim <= p->signal->rlim[RLIMIT_NICE].rlim_cur ||
                 capable(CAP_SYS_NICE));
  }
diff --git a/kernel/signal.c b/kernel/signal.c

index ca1186eef9380cd5e633f644a0891d1dacc5bd16..d282fea8113815c441417f91c8c0a5374879a4a3 100644 (file)
--- a/kernel/signal.c
+++ b/kernel/signal.c
@@ -692,7 +692,7 @@ static void handle_stop_signal(int sig, struct task_struct *p)
  {
         struct task_struct *t;
  
-       if (p->flags & SIGNAL_GROUP_EXIT)
+       if (p->signal->flags & SIGNAL_GROUP_EXIT)
                 /*
                  * The process is in the middle of dying already.
                  */
diff --git a/kernel/sys.c b/kernel/sys.c

index 000e81ad2c1d8320d6b5609e87baccba6a07ad52..0bcaed6560ac19f72ab308b2fe92839238e9ba70 100644 (file)
--- a/kernel/sys.c
+++ b/kernel/sys.c
@@ -404,7 +404,6 @@ void kernel_halt(void)
  {
         notifier_call_chain(&reboot_notifier_list, SYS_HALT, NULL);
         system_state = SYSTEM_HALT;
-       device_suspend(PMSG_SUSPEND);
         device_shutdown();
         printk(KERN_EMERG "System halted.\n");
         machine_halt();
@@ -415,7 +414,6 @@ void kernel_power_off(void)
  {
         notifier_call_chain(&reboot_notifier_list, SYS_POWER_OFF, NULL);
         system_state = SYSTEM_POWER_OFF;
-       device_suspend(PMSG_SUSPEND);
         device_shutdown();
         printk(KERN_EMERG "Power down.\n");
         machine_power_off();
diff --git a/kernel/sys_ni.c b/kernel/sys_ni.c

index 42b40ae5eada0794eec9de1ed92f548d21c07b2e..1ab2370e2efaee04f62334ae98a778ed3bbf9398 100644 (file)
--- a/kernel/sys_ni.c
+++ b/kernel/sys_ni.c
@@ -79,7 +79,6 @@ cond_syscall(sys_request_key);
  cond_syscall(sys_keyctl);
  cond_syscall(compat_sys_keyctl);
  cond_syscall(compat_sys_socketcall);
-cond_syscall(sys_set_zone_reclaim);
  cond_syscall(sys_inotify_init);
  cond_syscall(sys_inotify_add_watch);
  cond_syscall(sys_inotify_rm_watch);
diff --git a/kernel/sysctl.c b/kernel/sysctl.c

index 3e0bbee549ea32270b523e58e76c1a5fb80cf70b..8e56e2495542be41fa688aa7a15604cb2e3e6897 100644 (file)
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -31,6 +31,7 @@
  #include <linux/smp_lock.h>
  #include <linux/init.h>
  #include <linux/kernel.h>
+#include <linux/net.h>
  #include <linux/sysrq.h>
  #include <linux/highuid.h>
  #include <linux/writeback.h>
@@ -136,9 +137,6 @@ static struct ctl_table_header root_table_header =
  
  static ctl_table kern_table[];
  static ctl_table vm_table[];
-#ifdef CONFIG_NET
-extern ctl_table net_table[];
-#endif
  static ctl_table proc_table[];
  static ctl_table fs_table[];
  static ctl_table debug_table[];
diff --git a/kernel/timer.c b/kernel/timer.c

index f2a11887a72680605bc9af158132a4fd104fbe24..5377f40723ff0dc37f13897847ea040f8c317292 100644 (file)
--- a/kernel/timer.c
+++ b/kernel/timer.c
@@ -1023,7 +1023,7 @@ asmlinkage long sys_getppid(void)
         parent = me->group_leader->real_parent;
         for (;;) {
                 pid = parent->tgid;
-#ifdef CONFIG_SMP
+#if defined(CONFIG_SMP) || defined(CONFIG_PREEMPT)
  {
                 struct task_struct *old = parent;
  
diff --git a/kernel/workqueue.c b/kernel/workqueue.c

index 259cf55da3c9002cf390099fafa5f2de885ecc89..c7e36d4a70cabdb9711640575e68ac86d68a7da1 100644 (file)
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -308,8 +308,6 @@ struct workqueue_struct *__create_workqueue(const char *name,
         struct workqueue_struct *wq;
         struct task_struct *p;
  
-       BUG_ON(strlen(name) > 10);
-
         wq = kmalloc(sizeof(*wq), GFP_KERNEL);
         if (!wq)
                 return NULL;
diff --git a/lib/Kconfig b/lib/Kconfig

index eeb429a52152bf51e34c5f67f96d8404d66ddf2e..e43197efeb9c559a459cf199a9908a876eb3a976 100644 (file)
--- a/lib/Kconfig
+++ b/lib/Kconfig
@@ -72,6 +72,9 @@ config TEXTSEARCH
  config TEXTSEARCH_KMP
         tristate
  
+config TEXTSEARCH_BM
+       tristate
+
  config TEXTSEARCH_FSM
         tristate
  
diff --git a/lib/Makefile b/lib/Makefile

index f28d9031303c2cfebb2eb260ab48d6008951fd9e..52f83380f70426600d7f031c54e316cb04151c0a 100644 (file)
--- a/lib/Makefile
+++ b/lib/Makefile
@@ -38,6 +38,7 @@ obj-$(CONFIG_REED_SOLOMON) += reed_solomon/
  
  obj-$(CONFIG_TEXTSEARCH) += textsearch.o
  obj-$(CONFIG_TEXTSEARCH_KMP) += ts_kmp.o
+obj-$(CONFIG_TEXTSEARCH_BM) += ts_bm.o
  obj-$(CONFIG_TEXTSEARCH_FSM) += ts_fsm.o
  
  hostprogs-y    := gen_crc32table
diff --git a/lib/crc32.c b/lib/crc32.c

index 58b222783f9c926049826b90f222d91d8b94578f..065198f98b3f5fc3aebb16ed8b9b81749ec8f741 100644 (file)
--- a/lib/crc32.c
+++ b/lib/crc32.c
@@ -473,7 +473,7 @@ static u32 test_step(u32 init, unsigned char *buf, size_t len)
         init = bitreverse(init);
         crc2 = bitreverse(crc1);
         if (crc1 != bitreverse(crc2))
-               printf("\nBit reversal fail: 0x%08x -> %0x08x -> 0x%08x\n",
+               printf("\nBit reversal fail: 0x%08x -> 0x%08x -> 0x%08x\n",
                        crc1, crc2, bitreverse(crc2));
         crc1 = crc32_le(init, buf, len);
         if (crc1 != crc2)
diff --git a/lib/idr.c b/lib/idr.c

index c5be889de449555dc31b5c5d50a061f34187fdb6..6415d053e2bfa838e3a95313137d07a944261fd8 100644 (file)
--- a/lib/idr.c
+++ b/lib/idr.c
@@ -207,7 +207,7 @@ build_up:
  }
  
  /**
- * idr_get_new_above - allocate new idr entry above a start id
+ * idr_get_new_above - allocate new idr entry above or equal to a start id
   * @idp: idr handle
   * @ptr: pointer you want associated with the ide
   * @start_id: id to start search at
diff --git a/lib/inflate.c b/lib/inflate.c

index 75e7d303c72ed9faf1501cac47e562edd28e9552..6db6e98d1637b58764d3356a52ecff7216dfe3ed 100644 (file)
--- a/lib/inflate.c
+++ b/lib/inflate.c
@@ -326,7 +326,7 @@ DEBG("huft1 ");
    {
      *t = (struct huft *)NULL;
      *m = 0;
-    return 0;
+    return 2;
    }
  
  DEBG("huft2 ");
@@ -374,6 +374,7 @@ DEBG("huft5 ");
      if ((j = *p++) != 0)
        v[x[j]++] = i;
    } while (++i < n);
+  n = x[g];                   /* set n to length of v */
  
  DEBG("h6 ");
  
@@ -410,12 +411,13 @@ DEBG1("1 ");
  DEBG1("2 ");
            f -= a + 1;           /* deduct codes from patterns left */
            xp = c + k;
-          while (++j < z)       /* try smaller tables up to z bits */
-          {
-            if ((f <<= 1) <= *++xp)
-              break;            /* enough codes to use up j bits */
-            f -= *xp;           /* else deduct codes from patterns */
-          }
+          if (j < z)
+            while (++j < z)       /* try smaller tables up to z bits */
+            {
+              if ((f <<= 1) <= *++xp)
+                break;            /* enough codes to use up j bits */
+              f -= *xp;           /* else deduct codes from patterns */
+            }
          }
  DEBG1("3 ");
          z = 1 << j;             /* table entries for j-bit table */
diff --git a/lib/kobject_uevent.c b/lib/kobject_uevent.c

index 8e49d21057e48f1294786a2acbc7f9d4c930c744..04ca4429ddfaf4a077f48c45bb2b718d647b4e42 100644 (file)
--- a/lib/kobject_uevent.c
+++ b/lib/kobject_uevent.c
@@ -93,6 +93,7 @@ static int send_uevent(const char *signal, const char *obj,
                 }
         }
  
+       NETLINK_CB(skb).dst_group = 1;
         return netlink_broadcast(uevent_sock, skb, 0, 1, gfp_mask);
  }
  
@@ -153,7 +154,8 @@ EXPORT_SYMBOL_GPL(kobject_uevent_atomic);
  
  static int __init kobject_uevent_init(void)
  {
-       uevent_sock = netlink_kernel_create(NETLINK_KOBJECT_UEVENT, NULL);
+       uevent_sock = netlink_kernel_create(NETLINK_KOBJECT_UEVENT, 1, NULL,
+                                           THIS_MODULE);
  
         if (!uevent_sock) {
                 printk(KERN_ERR
diff --git a/lib/ts_bm.c b/lib/ts_bm.c

new file mode 100644 (file)

index 0000000..2cc7911
--- /dev/null
+++ b/lib/ts_bm.c
@@ -0,0 +1,185 @@
+/*
+ * lib/ts_bm.c         Boyer-Moore text search implementation
+ *
+ *             This program is free software; you can redistribute it and/or
+ *             modify it under the terms of the GNU General Public License
+ *             as published by the Free Software Foundation; either version
+ *             2 of the License, or (at your option) any later version.
+ *
+ * Authors:    Pablo Neira Ayuso <pablo@eurodev.net>
+ *
+ * ==========================================================================
+ * 
+ *   Implements Boyer-Moore string matching algorithm:
+ *
+ *   [1] A Fast String Searching Algorithm, R.S. Boyer and Moore.
+ *       Communications of the Association for Computing Machinery, 
+ *       20(10), 1977, pp. 762-772.
+ *       http://www.cs.utexas.edu/users/moore/publications/fstrpos.pdf
+ *
+ *   [2] Handbook of Exact String Matching Algorithms, Thierry Lecroq, 2004
+ *       http://www-igm.univ-mlv.fr/~lecroq/string/string.pdf
+ *
+ *   Note: Since Boyer-Moore (BM) performs searches for matchings from right 
+ *   to left, it's still possible that a matching could be spread over 
+ *   multiple blocks, in that case this algorithm won't find any coincidence.
+ *   
+ *   If you're willing to ensure that such thing won't ever happen, use the
+ *   Knuth-Pratt-Morris (KMP) implementation instead. In conclusion, choose 
+ *   the proper string search algorithm depending on your setting. 
+ *
+ *   Say you're using the textsearch infrastructure for filtering, NIDS or 
+ *   any similar security focused purpose, then go KMP. Otherwise, if you 
+ *   really care about performance, say you're classifying packets to apply
+ *   Quality of Service (QoS) policies, and you don't mind about possible
+ *   matchings spread over multiple fragments, then go BM.
+ */
+
+#include <linux/config.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/types.h>
+#include <linux/string.h>
+#include <linux/textsearch.h>
+
+/* Alphabet size, use ASCII */
+#define ASIZE 256
+
+#if 0
+#define DEBUGP printk
+#else
+#define DEBUGP(args, format...)
+#endif
+
+struct ts_bm
+{
+       u8 *            pattern;
+       unsigned int    patlen;
+       unsigned int    bad_shift[ASIZE];
+       unsigned int    good_shift[0];
+};
+
+static unsigned int bm_find(struct ts_config *conf, struct ts_state *state)
+{
+       struct ts_bm *bm = ts_config_priv(conf);
+       unsigned int i, text_len, consumed = state->offset;
+       const u8 *text;
+       int shift = bm->patlen, bs;
+
+       for (;;) {
+               text_len = conf->get_next_block(consumed, &text, conf, state);
+
+               if (unlikely(text_len == 0))
+                       break;
+
+               while (shift < text_len) {
+                       DEBUGP("Searching in position %d (%c)\n", 
+                               shift, text[shift]);
+                       for (i = 0; i < bm->patlen; i++) 
+                            if (text[shift-i] != bm->pattern[bm->patlen-1-i])
+                                    goto next;
+
+                       /* London calling... */
+                       DEBUGP("found!\n");
+                       return consumed += (shift-(bm->patlen-1));
+
+next:                  bs = bm->bad_shift[text[shift-i]];
+
+                       /* Now jumping to... */
+                       shift = max_t(int, shift-i+bs, shift+bm->good_shift[i]);
+               }
+               consumed += text_len;
+       }
+
+       return UINT_MAX;
+}
+
+static void compute_prefix_tbl(struct ts_bm *bm, const u8 *pattern,
+                              unsigned int len)
+{
+       int i, j, ended, l[ASIZE];
+
+       for (i = 0; i < ASIZE; i++)
+               bm->bad_shift[i] = len;
+       for (i = 0; i < len - 1; i++)
+               bm->bad_shift[pattern[i]] = len - 1 - i;
+
+       /* Compute the good shift array, used to match reocurrences 
+        * of a subpattern */
+       for (i = 1; i < bm->patlen; i++) {
+               for (j = 0; j < bm->patlen && bm->pattern[bm->patlen - 1 - j]
+                               == bm->pattern[bm->patlen - 1 - i - j]; j++);
+               l[i] = j;
+       }  
+
+       bm->good_shift[0] = 1;
+       for (i = 1; i < bm->patlen; i++)
+               bm->good_shift[i] = bm->patlen;
+       for (i = bm->patlen - 1; i > 0; i--)
+               bm->good_shift[l[i]] = i;
+       ended = 0;
+       for (i = 0; i < bm->patlen; i++) {
+               if (l[i] == bm->patlen - 1 - i)
+                       ended = i;
+               if (ended)
+                       bm->good_shift[i] = ended;
+       }
+}
+
+static struct ts_config *bm_init(const void *pattern, unsigned int len,
+                                int gfp_mask)
+{
+       struct ts_config *conf;
+       struct ts_bm *bm;
+       unsigned int prefix_tbl_len = len * sizeof(unsigned int);
+       size_t priv_size = sizeof(*bm) + len + prefix_tbl_len;
+
+       conf = alloc_ts_config(priv_size, gfp_mask);
+       if (IS_ERR(conf))
+               return conf;
+
+       bm = ts_config_priv(conf);
+       bm->patlen = len;
+       bm->pattern = (u8 *) bm->good_shift + prefix_tbl_len;
+       compute_prefix_tbl(bm, pattern, len);
+       memcpy(bm->pattern, pattern, len);
+
+       return conf;
+}
+
+static void *bm_get_pattern(struct ts_config *conf)
+{
+       struct ts_bm *bm = ts_config_priv(conf);
+       return bm->pattern;
+}
+
+static unsigned int bm_get_pattern_len(struct ts_config *conf)
+{
+       struct ts_bm *bm = ts_config_priv(conf);
+       return bm->patlen;
+}
+
+static struct ts_ops bm_ops = {
+       .name             = "bm",
+       .find             = bm_find,
+       .init             = bm_init,
+       .get_pattern      = bm_get_pattern,
+       .get_pattern_len  = bm_get_pattern_len,
+       .owner            = THIS_MODULE,
+       .list             = LIST_HEAD_INIT(bm_ops.list)
+};
+
+static int __init init_bm(void)
+{
+       return textsearch_register(&bm_ops);
+}
+
+static void __exit exit_bm(void)
+{
+       textsearch_unregister(&bm_ops);
+}
+
+MODULE_LICENSE("GPL");
+
+module_init(init_bm);
+module_exit(exit_bm);
diff --git a/lib/vsprintf.c b/lib/vsprintf.c

index a9bda0a361f39cfd9de38914894cd182545f6bd6..e4e9031dd9c38709b82fd7de2da931ada0cec248 100644 (file)
--- a/lib/vsprintf.c
+++ b/lib/vsprintf.c
@@ -269,6 +269,7 @@ int vsnprintf(char *buf, size_t size, const char *fmt, va_list args)
         int qualifier;          /* 'h', 'l', or 'L' for integer fields */
                                 /* 'z' support added 23/7/1999 S.H.    */
                                 /* 'z' changed to 'Z' --davidm 1/25/99 */
+                               /* 't' added for ptrdiff_t */
  
         /* Reject out-of-range values early */
         if (unlikely((int) size < 0)) {
@@ -339,7 +340,7 @@ int vsnprintf(char *buf, size_t size, const char *fmt, va_list args)
                 /* get the conversion qualifier */
                 qualifier = -1;
                 if (*fmt == 'h' || *fmt == 'l' || *fmt == 'L' ||
-                   *fmt =='Z' || *fmt == 'z') {
+                   *fmt =='Z' || *fmt == 'z' || *fmt == 't') {
                         qualifier = *fmt;
                         ++fmt;
                         if (qualifier == 'l' && *fmt == 'l') {
@@ -467,6 +468,8 @@ int vsnprintf(char *buf, size_t size, const char *fmt, va_list args)
                                 num = (signed long) num;
                 } else if (qualifier == 'Z' || qualifier == 'z') {
                         num = va_arg(args, size_t);
+               } else if (qualifier == 't') {
+                       num = va_arg(args, ptrdiff_t);
                 } else if (qualifier == 'h') {
                         num = (unsigned short) va_arg(args, int);
                         if (flags & SIGN)
diff --git a/mm/hugetlb.c b/mm/hugetlb.c

index fbd1111ea1194fc106042d57d2115a743dce7ced..6bf720bc662c41983c1208e2aef409d4e0a276ac 100644 (file)
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -301,6 +301,7 @@ void unmap_hugepage_range(struct vm_area_struct *vma, unsigned long start,
  {
         struct mm_struct *mm = vma->vm_mm;
         unsigned long address;
+       pte_t *ptep;
         pte_t pte;
         struct page *page;
  
@@ -309,9 +310,17 @@ void unmap_hugepage_range(struct vm_area_struct *vma, unsigned long start,
         BUG_ON(end & ~HPAGE_MASK);
  
         for (address = start; address < end; address += HPAGE_SIZE) {
-               pte = huge_ptep_get_and_clear(mm, address, huge_pte_offset(mm, address));
+               ptep = huge_pte_offset(mm, address);
+               if (! ptep)
+                       /* This can happen on truncate, or if an
+                        * mmap() is aborted due to an error before
+                        * the prefault */
+                       continue;
+
+               pte = huge_ptep_get_and_clear(mm, address, ptep);
                 if (pte_none(pte))
                         continue;
+
                 page = pte_page(pte);
                 put_page(page);
         }
diff --git a/mm/memory.c b/mm/memory.c

index 6fe77acbc1cd7c3ae02fe8c732fca66c49e0f1d1..a596c1172248e56b8fb220408548330ebd1de538 100644 (file)
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -498,6 +498,17 @@ int copy_page_range(struct mm_struct *dst_mm, struct mm_struct *src_mm,
         unsigned long addr = vma->vm_start;
         unsigned long end = vma->vm_end;
  
+       /*
+        * Don't copy ptes where a page fault will fill them correctly.
+        * Fork becomes much lighter when there are big shared or private
+        * readonly mappings. The tradeoff is that copy_page_range is more
+        * efficient than faulting.
+        */
+       if (!(vma->vm_flags & (VM_HUGETLB|VM_NONLINEAR|VM_RESERVED))) {
+               if (!vma->anon_vma)
+                       return 0;
+       }
+
         if (is_vm_hugetlb_page(vma))
                 return copy_hugetlb_page_range(dst_mm, src_mm, vma);
  
@@ -913,9 +924,13 @@ int get_user_pages(struct task_struct *tsk, struct mm_struct *mm,
                         pud = pud_offset(pgd, pg);
                         BUG_ON(pud_none(*pud));
                         pmd = pmd_offset(pud, pg);
-                       BUG_ON(pmd_none(*pmd));
+                       if (pmd_none(*pmd))
+                               return i ? : -EFAULT;
                         pte = pte_offset_map(pmd, pg);
-                       BUG_ON(pte_none(*pte));
+                       if (pte_none(*pte)) {
+                               pte_unmap(pte);
+                               return i ? : -EFAULT;
+                       }
                         if (pages) {
                                 pages[i] = pte_page(*pte);
                                 get_page(pages[i]);
@@ -940,11 +955,13 @@ int get_user_pages(struct task_struct *tsk, struct mm_struct *mm,
                 }
                 spin_lock(&mm->page_table_lock);
                 do {
+                       int write_access = write;
                         struct page *page;
-                       int lookup_write = write;
  
                         cond_resched_lock(&mm->page_table_lock);
-                       while (!(page = follow_page(mm, start, lookup_write))) {
+                       while (!(page = follow_page(mm, start, write_access))) {
+                               int ret;
+
                                 /*
                                  * Shortcut for anonymous pages. We don't want
                                  * to force the creation of pages tables for
@@ -952,13 +969,23 @@ int get_user_pages(struct task_struct *tsk, struct mm_struct *mm,
                                  * nobody touched so far. This is important
                                  * for doing a core dump for these mappings.
                                  */
-                               if (!lookup_write &&
-                                   untouched_anonymous_page(mm,vma,start)) {
+                               if (!write && untouched_anonymous_page(mm,vma,start)) {
                                         page = ZERO_PAGE(start);
                                         break;
                                 }
                                 spin_unlock(&mm->page_table_lock);
-                               switch (handle_mm_fault(mm,vma,start,write)) {
+                               ret = __handle_mm_fault(mm, vma, start, write_access);
+
+                               /*
+                                * The VM_FAULT_WRITE bit tells us that do_wp_page has
+                                * broken COW when necessary, even if maybe_mkwrite
+                                * decided not to set pte_write. We can thus safely do
+                                * subsequent page lookups as if they were reads.
+                                */
+                               if (ret & VM_FAULT_WRITE)
+                                       write_access = 0;
+                               
+                               switch (ret & ~VM_FAULT_WRITE) {
                                 case VM_FAULT_MINOR:
                                         tsk->min_flt++;
                                         break;
@@ -972,14 +999,6 @@ int get_user_pages(struct task_struct *tsk, struct mm_struct *mm,
                                 default:
                                         BUG();
                                 }
-                               /*
-                                * Now that we have performed a write fault
-                                * and surely no longer have a shared page we
-                                * shouldn't write, we shouldn't ignore an
-                                * unwritable page in the page table if
-                                * we are forcing write access.
-                                */
-                               lookup_write = write && !force;
                                 spin_lock(&mm->page_table_lock);
                         }
                         if (pages) {
@@ -1229,6 +1248,7 @@ static int do_wp_page(struct mm_struct *mm, struct vm_area_struct * vma,
         struct page *old_page, *new_page;
         unsigned long pfn = pte_pfn(pte);
         pte_t entry;
+       int ret;
  
         if (unlikely(!pfn_valid(pfn))) {
                 /*
@@ -1256,7 +1276,7 @@ static int do_wp_page(struct mm_struct *mm, struct vm_area_struct * vma,
                         lazy_mmu_prot_update(entry);
                         pte_unmap(page_table);
                         spin_unlock(&mm->page_table_lock);
-                       return VM_FAULT_MINOR;
+                       return VM_FAULT_MINOR|VM_FAULT_WRITE;
                 }
         }
         pte_unmap(page_table);
@@ -1283,6 +1303,7 @@ static int do_wp_page(struct mm_struct *mm, struct vm_area_struct * vma,
         /*
          * Re-check the pte - we dropped the lock
          */
+       ret = VM_FAULT_MINOR;
         spin_lock(&mm->page_table_lock);
         page_table = pte_offset_map(pmd, address);
         if (likely(pte_same(*page_table, pte))) {
@@ -1299,12 +1320,13 @@ static int do_wp_page(struct mm_struct *mm, struct vm_area_struct * vma,
  
                 /* Free the old page.. */
                 new_page = old_page;
+               ret |= VM_FAULT_WRITE;
         }
         pte_unmap(page_table);
         page_cache_release(new_page);
         page_cache_release(old_page);
         spin_unlock(&mm->page_table_lock);
-       return VM_FAULT_MINOR;
+       return ret;
  
  no_new_page:
         page_cache_release(old_page);
@@ -1996,7 +2018,6 @@ static inline int handle_pte_fault(struct mm_struct *mm,
         if (write_access) {
                 if (!pte_write(entry))
                         return do_wp_page(mm, vma, address, pte, pmd, entry);
-
                 entry = pte_mkdirty(entry);
         }
         entry = pte_mkyoung(entry);
@@ -2011,7 +2032,7 @@ static inline int handle_pte_fault(struct mm_struct *mm,
  /*
   * By the time we get here, we already hold the mm semaphore
   */
-int handle_mm_fault(struct mm_struct *mm, struct vm_area_struct * vma,
+int __handle_mm_fault(struct mm_struct *mm, struct vm_area_struct * vma,
                 unsigned long address, int write_access)
  {
         pgd_t *pgd;
diff --git a/mm/mempolicy.c b/mm/mempolicy.c

index 1694845526be5db9c18ef0183110435a0ccf9ab7..b4eababc8198790961ead62cf421b2ac2c21aaf5 100644 (file)
--- a/mm/mempolicy.c
+++ b/mm/mempolicy.c
@@ -443,7 +443,7 @@ asmlinkage long sys_set_mempolicy(int mode, unsigned long __user *nmask,
         struct mempolicy *new;
         DECLARE_BITMAP(nodes, MAX_NUMNODES);
  
-       if (mode > MPOL_MAX)
+       if (mode < 0 || mode > MPOL_MAX)
                 return -EINVAL;
         err = get_nodes(nodes, nmask, maxnode, mode);
         if (err)
diff --git a/mm/mmap.c b/mm/mmap.c

index da3fa90a0aae2b9af04ec553209deb6a51b80b48..404319477e71d61679166bc954ed246719affe89 100644 (file)
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -143,7 +143,11 @@ int __vm_enough_memory(long pages, int cap_sys_admin)
            leave 3% of the size of this process for other processes */
         allowed -= current->mm->total_vm / 32;
  
-       if (atomic_read(&vm_committed_space) < allowed)
+       /*
+        * cast `allowed' as a signed long because vm_committed_space
+        * sometimes has a negative value
+        */
+       if (atomic_read(&vm_committed_space) < (long)allowed)
                 return 0;
  
         vm_unacct_memory(pages);
diff --git a/mm/mremap.c b/mm/mremap.c

index ec7238a78f36c5eeac4bf1954e215399428a858d..fc45dc9a617b013f4db5aea5fe7898bf6cd677b6 100644 (file)
--- a/mm/mremap.c
+++ b/mm/mremap.c
@@ -229,6 +229,7 @@ static unsigned long move_vma(struct vm_area_struct *vma,
          * since do_munmap() will decrement it by old_len == new_len
          */
         mm->total_vm += new_len >> PAGE_SHIFT;
+       __vm_stat_account(mm, vma->vm_flags, vma->vm_file, new_len>>PAGE_SHIFT);
  
         if (do_munmap(mm, old_addr, old_len) < 0) {
                 /* OOM: unable to split vma, just get accounts right */
@@ -243,7 +244,6 @@ static unsigned long move_vma(struct vm_area_struct *vma,
                         vma->vm_next->vm_flags |= VM_ACCOUNT;
         }
  
-       __vm_stat_account(mm, vma->vm_flags, vma->vm_file, new_len>>PAGE_SHIFT);
         if (vm_flags & VM_LOCKED) {
                 mm->locked_vm += new_len >> PAGE_SHIFT;
                 if (new_len > old_len)
diff --git a/mm/nommu.c b/mm/nommu.c

index ce74452c02d945e40e6cdb503499609f24e1cd64..fd4e8df0f02df979bb2a2de5a891e35f82754a7d 100644 (file)
--- a/mm/nommu.c
+++ b/mm/nommu.c
@@ -1167,7 +1167,11 @@ int __vm_enough_memory(long pages, int cap_sys_admin)
            leave 3% of the size of this process for other processes */
         allowed -= current->mm->total_vm / 32;
  
-       if (atomic_read(&vm_committed_space) < allowed)
+       /*
+        * cast `allowed' as a signed long because vm_committed_space
+        * sometimes has a negative value
+        */
+       if (atomic_read(&vm_committed_space) < (long)allowed)
                 return 0;
  
         vm_unacct_memory(pages);
diff --git a/mm/shmem.c b/mm/shmem.c

index e64fa726a790e0c4162fec88db0e1032dd8a5be0..5a81b1ee4f7a43fc972d7ec762f2d51d7ac8aa42 100644 (file)
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -1773,32 +1773,27 @@ static int shmem_symlink(struct inode *dir, struct dentry *dentry, const char *s
         return 0;
  }
  
-static int shmem_follow_link_inline(struct dentry *dentry, struct nameidata *nd)
+static void *shmem_follow_link_inline(struct dentry *dentry, struct nameidata *nd)
  {
         nd_set_link(nd, (char *)SHMEM_I(dentry->d_inode));
-       return 0;
+       return NULL;
  }
  
-static int shmem_follow_link(struct dentry *dentry, struct nameidata *nd)
+static void *shmem_follow_link(struct dentry *dentry, struct nameidata *nd)
  {
         struct page *page = NULL;
         int res = shmem_getpage(dentry->d_inode, 0, &page, SGP_READ, NULL);
         nd_set_link(nd, res ? ERR_PTR(res) : kmap(page));
-       return 0;
+       return page;
  }
  
-static void shmem_put_link(struct dentry *dentry, struct nameidata *nd)
+static void shmem_put_link(struct dentry *dentry, struct nameidata *nd, void *cookie)
  {
         if (!IS_ERR(nd_get_link(nd))) {
-               struct page *page;
-
-               page = find_get_page(dentry->d_inode->i_mapping, 0);
-               if (!page)
-                       BUG();
+               struct page *page = cookie;
                 kunmap(page);
                 mark_page_accessed(page);
                 page_cache_release(page);
-               page_cache_release(page);
         }
  }
  
diff --git a/net/802/fc.c b/net/802/fc.c

index 640d34e026c2e819623543f48aae3ef8dea126d9..282c4ab1abe60836c79cccb39173cf2cbc918f92 100644 (file)
--- a/net/802/fc.c
+++ b/net/802/fc.c
@@ -87,7 +87,7 @@ static int fc_rebuild_header(struct sk_buff *skb)
         struct fch_hdr *fch=(struct fch_hdr *)skb->data;
         struct fcllc *fcllc=(struct fcllc *)(skb->data+sizeof(struct fch_hdr));
         if(fcllc->ethertype != htons(ETH_P_IP)) {
-               printk("fc_rebuild_header: Don't know how to resolve type %04X addresses ?\n",(unsigned int)htons(fcllc->ethertype));
+               printk("fc_rebuild_header: Don't know how to resolve type %04X addresses ?\n", ntohs(fcllc->ethertype));
                 return 0;
         }
  #ifdef CONFIG_INET
diff --git a/net/802/fddi.c b/net/802/fddi.c

index 5ce24c4bb840c7171b5948d797cb98b5267a9307..ac242a4bc346b541a09fe63456b17dcb64335ca5 100644 (file)
--- a/net/802/fddi.c
+++ b/net/802/fddi.c
@@ -108,8 +108,8 @@ static int fddi_rebuild_header(struct sk_buff       *skb)
         else
  #endif 
         {
-               printk("%s: Don't know how to resolve type %02X addresses.\n",
-                      skb->dev->name, htons(fddi->hdr.llc_snap.ethertype));
+               printk("%s: Don't know how to resolve type %04X addresses.\n",
+                      skb->dev->name, ntohs(fddi->hdr.llc_snap.ethertype));
                 return(0);
         }
  }
diff --git a/net/802/hippi.c b/net/802/hippi.c

index 051e8af56a7785f371b24e8ba9b4a3ae28c8f58e..6d7fed3dd99ac0bd635ab7256ab0fc210fbafe76 100644 (file)
--- a/net/802/hippi.c
+++ b/net/802/hippi.c
@@ -51,6 +51,7 @@ static int hippi_header(struct sk_buff *skb, struct net_device *dev,
                         unsigned len)
  {
         struct hippi_hdr *hip = (struct hippi_hdr *)skb_push(skb, HIPPI_HLEN);
+       struct hippi_cb *hcb = (struct hippi_cb *) skb->cb;
  
         if (!len){
                 len = skb->len - HIPPI_HLEN;
@@ -84,9 +85,10 @@ static int hippi_header(struct sk_buff *skb, struct net_device *dev,
         if (daddr)
         {
                 memcpy(hip->le.dest_switch_addr, daddr + 3, 3);
-               memcpy(&skb->private.ifield, daddr + 2, 4);
+               memcpy(&hcb->ifield, daddr + 2, 4);
                 return HIPPI_HLEN;
         }
+       hcb->ifield = 0;
         return -((int)HIPPI_HLEN);
  }
  
@@ -122,7 +124,7 @@ static int hippi_rebuild_header(struct sk_buff *skb)
   *     Determine the packet's protocol ID.
   */
   
-unsigned short hippi_type_trans(struct sk_buff *skb, struct net_device *dev)
+__be16 hippi_type_trans(struct sk_buff *skb, struct net_device *dev)
  {
         struct hippi_hdr *hip;
         
diff --git a/net/802/p8022.c b/net/802/p8022.c

index 5ae63416df6dfc1bcdd58c10004a3d4b49487df9..b24817c63ca8e31459a7b2c87b43bb2b05726ec8 100644 (file)
--- a/net/802/p8022.c
+++ b/net/802/p8022.c
@@ -35,7 +35,8 @@ static int p8022_request(struct datalink_proto *dl, struct sk_buff *skb,
  struct datalink_proto *register_8022_client(unsigned char type,
                                             int (*func)(struct sk_buff *skb,
                                                         struct net_device *dev,
-                                                       struct packet_type *pt))
+                                                       struct packet_type *pt,
+                                                       struct net_device *orig_dev))
  {
         struct datalink_proto *proto;
  
diff --git a/net/802/p8023.c b/net/802/p8023.c

index a0b61b40225f40fe994e644c99e4ad6e0c17d356..6368d3dce444489ea41e654067aa92c012c0b924 100644 (file)
--- a/net/802/p8023.c
+++ b/net/802/p8023.c
@@ -20,6 +20,7 @@
  #include <linux/skbuff.h>
  
  #include <net/datalink.h>
+#include <net/p8022.h>
  
  /*
   *     Place an 802.3 header on a packet. The driver will do the mac
diff --git a/net/802/psnap.c b/net/802/psnap.c

index 1053821ddf933c922f01e5e8a9733046b0e64676..ab80b1fab53c877eb312e35bea9b98f7bc3b233b 100644 (file)
--- a/net/802/psnap.c
+++ b/net/802/psnap.c
@@ -47,7 +47,7 @@ static struct datalink_proto *find_snap_client(unsigned char *desc)
   *     A SNAP packet has arrived
   */
  static int snap_rcv(struct sk_buff *skb, struct net_device *dev,
-                   struct packet_type *pt)
+                   struct packet_type *pt, struct net_device *orig_dev)
  {
         int rc = 1;
         struct datalink_proto *proto;
@@ -61,7 +61,7 @@ static int snap_rcv(struct sk_buff *skb, struct net_device *dev,
                 /* Pass the frame on. */
                 skb->h.raw  += 5;
                 skb_pull(skb, 5);
-               rc = proto->rcvfunc(skb, dev, &snap_packet_type);
+               rc = proto->rcvfunc(skb, dev, &snap_packet_type, orig_dev);
         } else {
                 skb->sk = NULL;
                 kfree_skb(skb);
@@ -118,7 +118,8 @@ module_exit(snap_exit);
  struct datalink_proto *register_snap_client(unsigned char *desc,
                                             int (*rcvfunc)(struct sk_buff *,
                                                            struct net_device *,
-                                                          struct packet_type *))
+                                                          struct packet_type *,
+                                                          struct net_device *))
  {
         struct datalink_proto *proto = NULL;
  
diff --git a/net/802/sysctl_net_802.c b/net/802/sysctl_net_802.c

index 36079630c49f1fbcfb7b9a29d8c84d46618e2501..700129556c13145b16355d2400d33b6b207843d7 100644 (file)
--- a/net/802/sysctl_net_802.c
+++ b/net/802/sysctl_net_802.c
@@ -10,9 +10,10 @@
   *             2 of the License, or (at your option) any later version.
   */
  
+#include <linux/config.h>
  #include <linux/mm.h>
+#include <linux/if_tr.h>
  #include <linux/sysctl.h>
-#include <linux/config.h>
  
  #ifdef CONFIG_TR
  extern int sysctl_tr_rif_timeout;
diff --git a/net/802/tr.c b/net/802/tr.c

index a755e880f4bafa1d8847ab622d83ec189c95f5b4..1bb7dc1b85cd448d4de0b47485dd54b9212b535d 100644 (file)
--- a/net/802/tr.c
+++ b/net/802/tr.c
@@ -251,10 +251,11 @@ void tr_source_route(struct sk_buff *skb,struct trh_hdr *trh,struct net_device *
         unsigned int hash;
         struct rif_cache *entry;
         unsigned char *olddata;
+       unsigned long flags;
         static const unsigned char mcast_func_addr[] 
                 = {0xC0,0x00,0x00,0x04,0x00,0x00};
         
-       spin_lock_bh(&rif_lock);
+       spin_lock_irqsave(&rif_lock, flags);
  
         /*
          *      Broadcasts are single route as stated in RFC 1042 
@@ -323,7 +324,7 @@ printk("source routing for %02X:%02X:%02X:%02X:%02X:%02X\n",trh->daddr[0],
         else 
                 slack = 18 - ((ntohs(trh->rcf) & TR_RCF_LEN_MASK)>>8);
         olddata = skb->data;
-       spin_unlock_bh(&rif_lock);
+       spin_unlock_irqrestore(&rif_lock, flags);
  
         skb_pull(skb, slack);
         memmove(skb->data, olddata, sizeof(struct trh_hdr) - slack);
@@ -337,10 +338,11 @@ printk("source routing for %02X:%02X:%02X:%02X:%02X:%02X\n",trh->daddr[0],
  static void tr_add_rif_info(struct trh_hdr *trh, struct net_device *dev)
  {
         unsigned int hash, rii_p = 0;
+       unsigned long flags;
         struct rif_cache *entry;
  
  
-       spin_lock_bh(&rif_lock);
+       spin_lock_irqsave(&rif_lock, flags);
         
         /*
          *      Firstly see if the entry exists
@@ -378,7 +380,7 @@ printk("adding rif_entry: addr:%02X:%02X:%02X:%02X:%02X:%02X rcf:%04X\n",
                 if(!entry) 
                 {
                         printk(KERN_DEBUG "tr.c: Couldn't malloc rif cache entry !\n");
-                       spin_unlock_bh(&rif_lock);
+                       spin_unlock_irqrestore(&rif_lock, flags);
                         return;
                 }
  
@@ -420,7 +422,7 @@ printk("updating rif_entry: addr:%02X:%02X:%02X:%02X:%02X:%02X rcf:%04X\n",
                     }                                         
                 entry->last_used=jiffies;               
         }
-       spin_unlock_bh(&rif_lock);
+       spin_unlock_irqrestore(&rif_lock, flags);
  }
  
  /*
@@ -430,9 +432,9 @@ printk("updating rif_entry: addr:%02X:%02X:%02X:%02X:%02X:%02X rcf:%04X\n",
  static void rif_check_expire(unsigned long dummy) 
  {
         int i;
-       unsigned long next_interval = jiffies + sysctl_tr_rif_timeout/2;
+       unsigned long flags, next_interval = jiffies + sysctl_tr_rif_timeout/2;
  
-       spin_lock_bh(&rif_lock);
+       spin_lock_irqsave(&rif_lock, flags);
         
         for(i =0; i < RIF_TABLE_SIZE; i++) {
                 struct rif_cache *entry, **pentry;
@@ -454,7 +456,7 @@ static void rif_check_expire(unsigned long dummy)
                 }
         }
         
-       spin_unlock_bh(&rif_lock);
+       spin_unlock_irqrestore(&rif_lock, flags);
  
         mod_timer(&rif_timer, next_interval);
  
@@ -485,7 +487,7 @@ static struct rif_cache *rif_get_idx(loff_t pos)
  
  static void *rif_seq_start(struct seq_file *seq, loff_t *pos)
  {
-       spin_lock_bh(&rif_lock);
+       spin_lock_irq(&rif_lock);
  
         return *pos ? rif_get_idx(*pos - 1) : SEQ_START_TOKEN;
  }
@@ -516,7 +518,7 @@ static void *rif_seq_next(struct seq_file *seq, void *v, loff_t *pos)
  
  static void rif_seq_stop(struct seq_file *seq, void *v)
  {
-       spin_unlock_bh(&rif_lock);
+       spin_unlock_irq(&rif_lock);
  }
  
  static int rif_seq_show(struct seq_file *seq, void *v)
diff --git a/net/8021q/vlan.h b/net/8021q/vlan.h

index 508b1fa14546803c1c1f8300ba60e791f0e4dd71..9ae3a14dd016d3ed9da0c2fb603c6a8da900e182 100644 (file)
--- a/net/8021q/vlan.h
+++ b/net/8021q/vlan.h
@@ -51,7 +51,7 @@ struct net_device *__find_vlan_dev(struct net_device* real_dev,
  /* found in vlan_dev.c */
  int vlan_dev_rebuild_header(struct sk_buff *skb);
  int vlan_skb_recv(struct sk_buff *skb, struct net_device *dev,
-                  struct packet_type* ptype);
+                  struct packet_type *ptype, struct net_device *orig_dev);
  int vlan_dev_hard_header(struct sk_buff *skb, struct net_device *dev,
                           unsigned short type, void *daddr, void *saddr,
                           unsigned len);
diff --git a/net/8021q/vlan_dev.c b/net/8021q/vlan_dev.c

index 49c487413518c632204cbc0e409297949e8a49c3..145f5cde96cf7a0ece22d77da07e0c4fc0311e53 100644 (file)
--- a/net/8021q/vlan_dev.c
+++ b/net/8021q/vlan_dev.c
@@ -113,7 +113,7 @@ static inline struct sk_buff *vlan_check_reorder_header(struct sk_buff *skb)
   *
   */
  int vlan_skb_recv(struct sk_buff *skb, struct net_device *dev,
-                  struct packet_type* ptype)
+                  struct packet_type* ptype, struct net_device *orig_dev)
  {
         unsigned char *rawp = NULL;
         struct vlan_hdr *vhdr = (struct vlan_hdr *)(skb->data);
diff --git a/net/Kconfig b/net/Kconfig

index 40a31ba86d2c9b689d4e59152539e4e4f10930db..c07aafb59a0fb5a829e68b14bf16c597137d15e3 100644 (file)
--- a/net/Kconfig
+++ b/net/Kconfig
@@ -147,6 +147,7 @@ source "net/bridge/netfilter/Kconfig"
  
  endif
  
+source "net/dccp/Kconfig"
  source "net/sctp/Kconfig"
  source "net/atm/Kconfig"
  source "net/bridge/Kconfig"
@@ -205,6 +206,8 @@ config NET_PKTGEN
           To compile this code as a module, choose M here: the
           module will be called pktgen.
  
+source "net/netfilter/Kconfig"
+
  endmenu
  
  endmenu
diff --git a/net/Makefile b/net/Makefile

index 8e2bdc025ab852fccaf8b66c80175864c806b36a..7e6eff206c813f36d2df224f23d594637f40758b 100644 (file)
--- a/net/Makefile
+++ b/net/Makefile
@@ -16,6 +16,7 @@ obj-$(CONFIG_NET)             += $(tmp-y)
  obj-$(CONFIG_LLC)              += llc/
  obj-$(CONFIG_NET)              += ethernet/ 802/ sched/ netlink/
  obj-$(CONFIG_INET)             += ipv4/
+obj-$(CONFIG_NETFILTER)                += netfilter/
  obj-$(CONFIG_XFRM)             += xfrm/
  obj-$(CONFIG_UNIX)             += unix/
  ifneq ($(CONFIG_IPV6),)
@@ -41,6 +42,7 @@ obj-$(CONFIG_ATM)             += atm/
  obj-$(CONFIG_DECNET)           += decnet/
  obj-$(CONFIG_ECONET)           += econet/
  obj-$(CONFIG_VLAN_8021Q)       += 8021q/
+obj-$(CONFIG_IP_DCCP)          += dccp/
  obj-$(CONFIG_IP_SCTP)          += sctp/
  
  ifeq ($(CONFIG_NET),y)
diff --git a/net/appletalk/aarp.c b/net/appletalk/aarp.c

index c34614ea5fcedcdc898c75c014a6c36ecfe65d4d..7076097debc29e454e02856d232015ad44607f8c 100644 (file)
--- a/net/appletalk/aarp.c
+++ b/net/appletalk/aarp.c
@@ -698,7 +698,7 @@ static void __aarp_resolved(struct aarp_entry **list, struct aarp_entry *a,
   *     frame. We currently only support Ethernet.
   */
  static int aarp_rcv(struct sk_buff *skb, struct net_device *dev,
-                   struct packet_type *pt)
+                   struct packet_type *pt, struct net_device *orig_dev)
  {
         struct elapaarp *ea = aarp_hdr(skb);
         int hash, ret = 0;
diff --git a/net/appletalk/ddp.c b/net/appletalk/ddp.c

index 192b529f86a456e62864867d9a219b5eabb7f796..1d31b3a3f1e598c2da003d9dd4e5c50551719e39 100644 (file)
--- a/net/appletalk/ddp.c
+++ b/net/appletalk/ddp.c
@@ -53,12 +53,12 @@
  
  #include <linux/config.h>
  #include <linux/module.h>
-#include <linux/tcp.h>
  #include <linux/if_arp.h>
  #include <linux/termios.h>     /* For TIOCOUTQ/INQ */
  #include <net/datalink.h>
  #include <net/psnap.h>
  #include <net/sock.h>
+#include <net/tcp_states.h>
  #include <net/route.h>
  #include <linux/atalk.h>
  
@@ -1390,7 +1390,7 @@ free_it:
   *     [ie ARPHRD_ETHERTALK]
   */
  static int atalk_rcv(struct sk_buff *skb, struct net_device *dev,
-                    struct packet_type *pt)
+                    struct packet_type *pt, struct net_device *orig_dev)
  {
         struct ddpehdr *ddp;
         struct sock *sock;
@@ -1482,7 +1482,7 @@ freeit:
   * header and append a long one.
   */
  static int ltalk_rcv(struct sk_buff *skb, struct net_device *dev,
-                       struct packet_type *pt)
+                    struct packet_type *pt, struct net_device *orig_dev)
  {
         /* Expand any short form frames */
         if (skb->mac.raw[2] == 1) {
@@ -1528,7 +1528,7 @@ static int ltalk_rcv(struct sk_buff *skb, struct net_device *dev,
         }
         skb->h.raw = skb->data;
  
-       return atalk_rcv(skb, dev, pt);
+       return atalk_rcv(skb, dev, pt, orig_dev);
  freeit:
         kfree_skb(skb);
         return 0;
diff --git a/net/atm/ipcommon.c b/net/atm/ipcommon.c

index 181a3002d8adc5eebfc117e609ba4427ccf8ff2f..4b1faca5013ff8d573ef784253c77e867be1fcbc 100644 (file)
--- a/net/atm/ipcommon.c
+++ b/net/atm/ipcommon.c
@@ -34,7 +34,6 @@
  
  void skb_migrate(struct sk_buff_head *from,struct sk_buff_head *to)
  {
-       struct sk_buff *skb;
         unsigned long flags;
         struct sk_buff *skb_from = (struct sk_buff *) from;
         struct sk_buff *skb_to = (struct sk_buff *) to;
@@ -47,8 +46,6 @@ void skb_migrate(struct sk_buff_head *from,struct sk_buff_head *to)
         prev->next = skb_to;
         to->prev->next = from->next;
         to->prev = from->prev;
-       for (skb = from->next; skb != skb_to; skb = skb->next)
-               skb->list = to;
         to->qlen += from->qlen;
         spin_unlock(&to->lock);
         from->prev = skb_from;
diff --git a/net/ax25/af_ax25.c b/net/ax25/af_ax25.c

index 707097deac3deb4957741204c7944e01521f3c0f..ea43dfb774e228c3e044c283a7c6be11fd4195eb 100644 (file)
--- a/net/ax25/af_ax25.c
+++ b/net/ax25/af_ax25.c
@@ -45,7 +45,7 @@
  #include <linux/sysctl.h>
  #include <linux/init.h>
  #include <linux/spinlock.h>
-#include <net/tcp.h>
+#include <net/tcp_states.h>
  #include <net/ip.h>
  #include <net/arp.h>
  
@@ -875,12 +875,7 @@ struct sock *ax25_make_new(struct sock *osk, struct ax25_dev *ax25_dev)
         sk->sk_sndbuf   = osk->sk_sndbuf;
         sk->sk_state    = TCP_ESTABLISHED;
         sk->sk_sleep    = osk->sk_sleep;
-
-       if (sock_flag(osk, SOCK_DBG))
-               sock_set_flag(sk, SOCK_DBG);
-
-       if (sock_flag(osk, SOCK_ZAPPED))
-               sock_set_flag(sk, SOCK_ZAPPED);
+       sock_copy_flags(sk, osk);
  
         oax25 = ax25_sk(osk);
  
@@ -1007,7 +1002,8 @@ static int ax25_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
         struct sock *sk = sock->sk;
         struct full_sockaddr_ax25 *addr = (struct full_sockaddr_ax25 *)uaddr;
         ax25_dev *ax25_dev = NULL;
-       ax25_address *call;
+       ax25_uid_assoc *user;
+       ax25_address call;
         ax25_cb *ax25;
         int err = 0;
  
@@ -1026,9 +1022,15 @@ static int ax25_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
         if (addr->fsa_ax25.sax25_family != AF_AX25)
                 return -EINVAL;
  
-       call = ax25_findbyuid(current->euid);
-       if (call == NULL && ax25_uid_policy && !capable(CAP_NET_ADMIN)) {
-               return -EACCES;
+       user = ax25_findbyuid(current->euid);
+       if (user) {
+               call = user->call;
+               ax25_uid_put(user);
+       } else {
+               if (ax25_uid_policy && !capable(CAP_NET_ADMIN))
+                       return -EACCES;
+
+               call = addr->fsa_ax25.sax25_call;
         }
  
         lock_sock(sk);
@@ -1039,10 +1041,7 @@ static int ax25_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
                 goto out;
         }
  
-       if (call == NULL)
-               ax25->source_addr = addr->fsa_ax25.sax25_call;
-       else
-               ax25->source_addr = *call;
+       ax25->source_addr = call;
  
         /*
          * User already set interface with SO_BINDTODEVICE
diff --git a/net/ax25/ax25_ds_in.c b/net/ax25/ax25_ds_in.c

index 8adc0022cf580fe60adc82fb5f58bed34dc089d9..edcaa897027cc031861b6d6bc66c5720a2c25b55 100644 (file)
--- a/net/ax25/ax25_ds_in.c
+++ b/net/ax25/ax25_ds_in.c
@@ -22,8 +22,7 @@
  #include <linux/netdevice.h>
  #include <linux/skbuff.h>
  #include <net/sock.h>
-#include <net/ip.h>                    /* For ip_rcv */
-#include <net/tcp.h>
+#include <net/tcp_states.h>
  #include <asm/uaccess.h>
  #include <asm/system.h>
  #include <linux/fcntl.h>
diff --git a/net/ax25/ax25_ds_timer.c b/net/ax25/ax25_ds_timer.c

index 3a8b67316fc388d31360d999f0ebb35367328cff..061083efc1dcddc70cf900fbc43e77b69bfafa41 100644 (file)
--- a/net/ax25/ax25_ds_timer.c
+++ b/net/ax25/ax25_ds_timer.c
@@ -18,7 +18,7 @@
  #include <linux/string.h>
  #include <linux/sockios.h>
  #include <linux/net.h>
-#include <net/tcp.h>
+#include <net/tcp_states.h>
  #include <net/ax25.h>
  #include <linux/inet.h>
  #include <linux/netdevice.h>
diff --git a/net/ax25/ax25_in.c b/net/ax25/ax25_in.c

index 3dc808fde33ffad4f70fc87ef8a9df08c66ad94a..810c9c76c2e022b08d171e255a0bf1340159f97f 100644 (file)
--- a/net/ax25/ax25_in.c
+++ b/net/ax25/ax25_in.c
@@ -9,7 +9,6 @@
   * Copyright (C) Joerg Reuter DL1BKE (jreuter@yaina.de)
   * Copyright (C) Hans-Joachim Hetscher DD8NE (dd8ne@bnv-bamberg.de)
   */
-#include <linux/config.h>
  #include <linux/errno.h>
  #include <linux/types.h>
  #include <linux/socket.h>
@@ -26,9 +25,7 @@
  #include <linux/skbuff.h>
  #include <linux/netfilter.h>
  #include <net/sock.h>
-#include <net/ip.h>                    /* For ip_rcv */
-#include <net/tcp.h>
-#include <net/arp.h>                   /* For arp_rcv */
+#include <net/tcp_states.h>
  #include <asm/uaccess.h>
  #include <asm/system.h>
  #include <linux/fcntl.h>
@@ -114,7 +111,6 @@ int ax25_rx_iframe(ax25_cb *ax25, struct sk_buff *skb)
  
         pid = *skb->data;
  
-#ifdef CONFIG_INET
         if (pid == AX25_P_IP) {
                 /* working around a TCP bug to keep additional listeners
                  * happy. TCP re-uses the buffer and destroys the original
@@ -132,10 +128,9 @@ int ax25_rx_iframe(ax25_cb *ax25, struct sk_buff *skb)
                 skb->dev      = ax25->ax25_dev->dev;
                 skb->pkt_type = PACKET_HOST;
                 skb->protocol = htons(ETH_P_IP);
-               ip_rcv(skb, skb->dev, NULL);    /* Wrong ptype */
+               netif_rx(skb);
                 return 1;
         }
-#endif
         if (pid == AX25_P_SEGMENT) {
                 skb_pull(skb, 1);       /* Remove PID */
                 return ax25_rx_fragment(ax25, skb);
@@ -250,7 +245,6 @@ static int ax25_rcv(struct sk_buff *skb, struct net_device *dev,
  
                 /* Now we are pointing at the pid byte */
                 switch (skb->data[1]) {
-#ifdef CONFIG_INET
                 case AX25_P_IP:
                         skb_pull(skb,2);                /* drop PID/CTRL */
                         skb->h.raw    = skb->data;
@@ -258,7 +252,7 @@ static int ax25_rcv(struct sk_buff *skb, struct net_device *dev,
                         skb->dev      = dev;
                         skb->pkt_type = PACKET_HOST;
                         skb->protocol = htons(ETH_P_IP);
-                       ip_rcv(skb, dev, ptype);        /* Note ptype here is the wrong one, fix me later */
+                       netif_rx(skb);
                         break;
  
                 case AX25_P_ARP:
@@ -268,9 +262,8 @@ static int ax25_rcv(struct sk_buff *skb, struct net_device *dev,
                         skb->dev      = dev;
                         skb->pkt_type = PACKET_HOST;
                         skb->protocol = htons(ETH_P_ARP);
-                       arp_rcv(skb, dev, ptype);       /* Note ptype here is wrong... */
+                       netif_rx(skb);
                         break;
-#endif
                 case AX25_P_TEXT:
                         /* Now find a suitable dgram socket */
                         sk = ax25_get_socket(&dest, &src, SOCK_DGRAM);
@@ -454,7 +447,7 @@ static int ax25_rcv(struct sk_buff *skb, struct net_device *dev,
   *     Receive an AX.25 frame via a SLIP interface.
   */
  int ax25_kiss_rcv(struct sk_buff *skb, struct net_device *dev,
-                 struct packet_type *ptype)
+                 struct packet_type *ptype, struct net_device *orig_dev)
  {
         skb->sk = NULL;         /* Initially we don't know who it's for */
         skb->destructor = NULL; /* Who initializes this, dammit?! */
diff --git a/net/ax25/ax25_route.c b/net/ax25/ax25_route.c

index 44b99b1ff9f8c7c8a92568d0d4a665979288a268..c288526da4ce194c65f49878f40cd686cedd1216 100644 (file)
--- a/net/ax25/ax25_route.c
+++ b/net/ax25/ax25_route.c
@@ -422,8 +422,8 @@ static inline void ax25_adjust_path(ax25_address *addr, ax25_digi *digipeat)
   */
  int ax25_rt_autobind(ax25_cb *ax25, ax25_address *addr)
  {
+       ax25_uid_assoc *user;
         ax25_route *ax25_rt;
-       ax25_address *call;
         int err;
  
         if ((ax25_rt = ax25_get_route(addr, NULL)) == NULL)
@@ -434,16 +434,18 @@ int ax25_rt_autobind(ax25_cb *ax25, ax25_address *addr)
                 goto put;
         }
  
-       if ((call = ax25_findbyuid(current->euid)) == NULL) {
+       user = ax25_findbyuid(current->euid);
+       if (user) {
+               ax25->source_addr = user->call;
+               ax25_uid_put(user);
+       } else {
                 if (ax25_uid_policy && !capable(CAP_NET_BIND_SERVICE)) {
                         err = -EPERM;
                         goto put;
                 }
-               call = (ax25_address *)ax25->ax25_dev->dev->dev_addr;
+               ax25->source_addr = *(ax25_address *)ax25->ax25_dev->dev->dev_addr;
         }
  
-       ax25->source_addr = *call;
-
         if (ax25_rt->digipeat != NULL) {
                 if ((ax25->digipeat = kmalloc(sizeof(ax25_digi), GFP_ATOMIC)) == NULL) {
                         err = -ENOMEM;
diff --git a/net/ax25/ax25_std_in.c b/net/ax25/ax25_std_in.c

index 7131873322c4cec98554e5334b1899211aca835d..f6ed283e9de82fb2baee56e73580f5bc117fb5f4 100644 (file)
--- a/net/ax25/ax25_std_in.c
+++ b/net/ax25/ax25_std_in.c
@@ -29,8 +29,7 @@
  #include <linux/netdevice.h>
  #include <linux/skbuff.h>
  #include <net/sock.h>
-#include <net/ip.h>                    /* For ip_rcv */
-#include <net/tcp.h>
+#include <net/tcp_states.h>
  #include <asm/uaccess.h>
  #include <asm/system.h>
  #include <linux/fcntl.h>
diff --git a/net/ax25/ax25_std_timer.c b/net/ax25/ax25_std_timer.c

index 066897bc074901965b83487c30e850ee27adc105..a29c480a4dc1f992f7f339c44f4b3d0ca9184526 100644 (file)
--- a/net/ax25/ax25_std_timer.c
+++ b/net/ax25/ax25_std_timer.c
@@ -24,7 +24,7 @@
  #include <linux/netdevice.h>
  #include <linux/skbuff.h>
  #include <net/sock.h>
-#include <net/tcp.h>
+#include <net/tcp_states.h>
  #include <asm/uaccess.h>
  #include <asm/system.h>
  #include <linux/fcntl.h>
diff --git a/net/ax25/ax25_subr.c b/net/ax25/ax25_subr.c

index 99694b57f6f565d36f6787f58c8faa43cced25d2..c41dbe5fadee53867d98e24967ce96f942a32c18 100644 (file)
--- a/net/ax25/ax25_subr.c
+++ b/net/ax25/ax25_subr.c
@@ -24,7 +24,7 @@
  #include <linux/netdevice.h>
  #include <linux/skbuff.h>
  #include <net/sock.h>
-#include <net/tcp.h>
+#include <net/tcp_states.h>
  #include <asm/uaccess.h>
  #include <asm/system.h>
  #include <linux/fcntl.h>
@@ -76,7 +76,7 @@ void ax25_requeue_frames(ax25_cb *ax25)
                 if (skb_prev == NULL)
                         skb_queue_head(&ax25->write_queue, skb);
                 else
-                       skb_append(skb_prev, skb);
+                       skb_append(skb_prev, skb, &ax25->write_queue);
                 skb_prev = skb;
         }
  }
diff --git a/net/ax25/ax25_uid.c b/net/ax25/ax25_uid.c

index cea6b7d1972905ddeb17d3f5c6ab8b38b8b30c30..a8b3822f3ee42155032c914c31e80e1354b1fbfb 100644 (file)
--- a/net/ax25/ax25_uid.c
+++ b/net/ax25/ax25_uid.c
@@ -28,6 +28,7 @@
  #include <linux/fcntl.h>
  #include <linux/mm.h>
  #include <linux/interrupt.h>
+#include <linux/list.h>
  #include <linux/notifier.h>
  #include <linux/proc_fs.h>
  #include <linux/seq_file.h>
@@ -41,38 +42,41 @@
   *     Callsign/UID mapper. This is in kernel space for security on multi-amateur machines.
   */
  
-static ax25_uid_assoc *ax25_uid_list;
+HLIST_HEAD(ax25_uid_list);
  static DEFINE_RWLOCK(ax25_uid_lock);
  
  int ax25_uid_policy = 0;
  
-ax25_address *ax25_findbyuid(uid_t uid)
+ax25_uid_assoc *ax25_findbyuid(uid_t uid)
  {
-       ax25_uid_assoc *ax25_uid;
-       ax25_address *res = NULL;
+       ax25_uid_assoc *ax25_uid, *res = NULL;
+       struct hlist_node *node;
  
         read_lock(&ax25_uid_lock);
-       for (ax25_uid = ax25_uid_list; ax25_uid != NULL; ax25_uid = ax25_uid->next) {
+       ax25_uid_for_each(ax25_uid, node, &ax25_uid_list) {
                 if (ax25_uid->uid == uid) {
-                       res = &ax25_uid->call;
+                       ax25_uid_hold(ax25_uid);
+                       res = ax25_uid;
                         break;
                 }
         }
         read_unlock(&ax25_uid_lock);
  
-       return NULL;
+       return res;
  }
  
  int ax25_uid_ioctl(int cmd, struct sockaddr_ax25 *sax)
  {
-       ax25_uid_assoc *s, *ax25_uid;
+       ax25_uid_assoc *ax25_uid;
+       struct hlist_node *node;
+       ax25_uid_assoc *user;
         unsigned long res;
  
         switch (cmd) {
         case SIOCAX25GETUID:
                 res = -ENOENT;
                 read_lock(&ax25_uid_lock);
-               for (ax25_uid = ax25_uid_list; ax25_uid != NULL; ax25_uid = ax25_uid->next) {
+               ax25_uid_for_each(ax25_uid, node, &ax25_uid_list) {
                         if (ax25cmp(&sax->sax25_call, &ax25_uid->call) == 0) {
                                 res = ax25_uid->uid;
                                 break;
@@ -85,19 +89,22 @@ int ax25_uid_ioctl(int cmd, struct sockaddr_ax25 *sax)
         case SIOCAX25ADDUID:
                 if (!capable(CAP_NET_ADMIN))
                         return -EPERM;
-               if (ax25_findbyuid(sax->sax25_uid))
+               user = ax25_findbyuid(sax->sax25_uid);
+               if (user) {
+                       ax25_uid_put(user);
                         return -EEXIST;
+               }
                 if (sax->sax25_uid == 0)
                         return -EINVAL;
                 if ((ax25_uid = kmalloc(sizeof(*ax25_uid), GFP_KERNEL)) == NULL)
                         return -ENOMEM;
  
+               atomic_set(&ax25_uid->refcount, 1);
                 ax25_uid->uid  = sax->sax25_uid;
                 ax25_uid->call = sax->sax25_call;
  
                 write_lock(&ax25_uid_lock);
-               ax25_uid->next = ax25_uid_list;
-               ax25_uid_list  = ax25_uid;
+               hlist_add_head(&ax25_uid->uid_node, &ax25_uid_list);
                 write_unlock(&ax25_uid_lock);
  
                 return 0;
@@ -106,34 +113,21 @@ int ax25_uid_ioctl(int cmd, struct sockaddr_ax25 *sax)
                 if (!capable(CAP_NET_ADMIN))
                         return -EPERM;
  
+               ax25_uid = NULL;
                 write_lock(&ax25_uid_lock);
-               for (ax25_uid = ax25_uid_list; ax25_uid != NULL; ax25_uid = ax25_uid->next) {
-                       if (ax25cmp(&sax->sax25_call, &ax25_uid->call) == 0) {
+               ax25_uid_for_each(ax25_uid, node, &ax25_uid_list) {
+                       if (ax25cmp(&sax->sax25_call, &ax25_uid->call) == 0)
                                 break;
-                       }
                 }
                 if (ax25_uid == NULL) {
                         write_unlock(&ax25_uid_lock);
                         return -ENOENT;
                 }
-               if ((s = ax25_uid_list) == ax25_uid) {
-                       ax25_uid_list = s->next;
-                       write_unlock(&ax25_uid_lock);
-                       kfree(ax25_uid);
-                       return 0;
-               }
-               while (s != NULL && s->next != NULL) {
-                       if (s->next == ax25_uid) {
-                               s->next = ax25_uid->next;
-                               write_unlock(&ax25_uid_lock);
-                               kfree(ax25_uid);
-                               return 0;
-                       }
-                       s = s->next;
-               }
+               hlist_del_init(&ax25_uid->uid_node);
+               ax25_uid_put(ax25_uid);
                 write_unlock(&ax25_uid_lock);
  
-               return -ENOENT;
+               return 0;
  
         default:
                 return -EINVAL;
@@ -147,13 +141,11 @@ int ax25_uid_ioctl(int cmd, struct sockaddr_ax25 *sax)
  static void *ax25_uid_seq_start(struct seq_file *seq, loff_t *pos)
  {
         struct ax25_uid_assoc *pt;
-       int i = 1;
+       struct hlist_node *node;
+       int i = 0;
  
         read_lock(&ax25_uid_lock);
-       if (*pos == 0)
-               return SEQ_START_TOKEN;
-
-       for (pt = ax25_uid_list; pt != NULL; pt = pt->next) {
+       ax25_uid_for_each(pt, node, &ax25_uid_list) {
                 if (i == *pos)
                         return pt;
                 ++i;
@@ -164,8 +156,9 @@ static void *ax25_uid_seq_start(struct seq_file *seq, loff_t *pos)
  static void *ax25_uid_seq_next(struct seq_file *seq, void *v, loff_t *pos)
  {
         ++*pos;
-       return (v == SEQ_START_TOKEN) ? ax25_uid_list : 
-               ((struct ax25_uid_assoc *) v)->next;
+
+       return hlist_entry(((ax25_uid_assoc *)v)->uid_node.next,
+                          ax25_uid_assoc, uid_node);
  }
  
  static void ax25_uid_seq_stop(struct seq_file *seq, void *v)
@@ -179,7 +172,6 @@ static int ax25_uid_seq_show(struct seq_file *seq, void *v)
                 seq_printf(seq, "Policy: %d\n", ax25_uid_policy);
         else {
                 struct ax25_uid_assoc *pt = v;
-               
  
                 seq_printf(seq, "%6d %s\n", pt->uid, ax2asc(&pt->call));
         }
@@ -213,16 +205,13 @@ struct file_operations ax25_uid_fops = {
   */
  void __exit ax25_uid_free(void)
  {
-       ax25_uid_assoc *s, *ax25_uid;
+       ax25_uid_assoc *ax25_uid;
+       struct hlist_node *node;
  
         write_lock(&ax25_uid_lock);
-       ax25_uid = ax25_uid_list;
-       while (ax25_uid != NULL) {
-               s        = ax25_uid;
-               ax25_uid = ax25_uid->next;
-
-               kfree(s);
+       ax25_uid_for_each(ax25_uid, node, &ax25_uid_list) {
+               hlist_del_init(&ax25_uid->uid_node);
+               ax25_uid_put(ax25_uid);
         }
-       ax25_uid_list = NULL;
         write_unlock(&ax25_uid_lock);
  }
diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c

index fb5524365bc2970c1430a39a7017bbf6d0223c91..55dc42eac92c090cee03ac1adb11946b9880c062 100644 (file)
--- a/net/bluetooth/hci_core.c
+++ b/net/bluetooth/hci_core.c
@@ -191,7 +191,7 @@ static void hci_init_req(struct hci_dev *hdev, unsigned long opt)
  
         /* Special commands */
         while ((skb = skb_dequeue(&hdev->driver_init))) {
-               skb->pkt_type = HCI_COMMAND_PKT;
+               bt_cb(skb)->pkt_type = HCI_COMMAND_PKT;
                 skb->dev = (void *) hdev;
                 skb_queue_tail(&hdev->cmd_q, skb);
                 hci_sched_cmd(hdev);
@@ -299,7 +299,6 @@ struct hci_dev *hci_dev_get(int index)
         read_unlock(&hci_dev_list_lock);
         return hdev;
  }
-EXPORT_SYMBOL(hci_dev_get);
  
  /* ---- Inquiry support ---- */
  static void inquiry_cache_flush(struct hci_dev *hdev)
@@ -996,11 +995,11 @@ static int hci_send_frame(struct sk_buff *skb)
                 return -ENODEV;
         }
  
-       BT_DBG("%s type %d len %d", hdev->name, skb->pkt_type, skb->len);
+       BT_DBG("%s type %d len %d", hdev->name, bt_cb(skb)->pkt_type, skb->len);
  
         if (atomic_read(&hdev->promisc)) {
                 /* Time stamp */
-               do_gettimeofday(&skb->stamp);
+               __net_timestamp(skb);
  
                 hci_send_to_sock(hdev, skb);
         }
@@ -1035,14 +1034,13 @@ int hci_send_cmd(struct hci_dev *hdev, __u16 ogf, __u16 ocf, __u32 plen, void *p
  
         BT_DBG("skb len %d", skb->len);
  
-       skb->pkt_type = HCI_COMMAND_PKT;
+       bt_cb(skb)->pkt_type = HCI_COMMAND_PKT;
         skb->dev = (void *) hdev;
         skb_queue_tail(&hdev->cmd_q, skb);
         hci_sched_cmd(hdev);
  
         return 0;
  }
-EXPORT_SYMBOL(hci_send_cmd);
  
  /* Get data from the previously sent command */
  void *hci_sent_cmd_data(struct hci_dev *hdev, __u16 ogf, __u16 ocf)
@@ -1083,7 +1081,7 @@ int hci_send_acl(struct hci_conn *conn, struct sk_buff *skb, __u16 flags)
         BT_DBG("%s conn %p flags 0x%x", hdev->name, conn, flags);
  
         skb->dev = (void *) hdev;
-       skb->pkt_type = HCI_ACLDATA_PKT;
+       bt_cb(skb)->pkt_type = HCI_ACLDATA_PKT;
         hci_add_acl_hdr(skb, conn->handle, flags | ACL_START);
  
         if (!(list = skb_shinfo(skb)->frag_list)) {
@@ -1105,7 +1103,7 @@ int hci_send_acl(struct hci_conn *conn, struct sk_buff *skb, __u16 flags)
                         skb = list; list = list->next;
                         
                         skb->dev = (void *) hdev;
-                       skb->pkt_type = HCI_ACLDATA_PKT;
+                       bt_cb(skb)->pkt_type = HCI_ACLDATA_PKT;
                         hci_add_acl_hdr(skb, conn->handle, flags | ACL_CONT);
  
                         BT_DBG("%s frag %p len %d", hdev->name, skb, skb->len);
@@ -1141,7 +1139,7 @@ int hci_send_sco(struct hci_conn *conn, struct sk_buff *skb)
         memcpy(skb->h.raw, &hdr, HCI_SCO_HDR_SIZE);
  
         skb->dev = (void *) hdev;
-       skb->pkt_type = HCI_SCODATA_PKT;
+       bt_cb(skb)->pkt_type = HCI_SCODATA_PKT;
         skb_queue_tail(&conn->data_q, skb);
         hci_sched_tx(hdev);
         return 0;
@@ -1371,7 +1369,7 @@ void hci_rx_task(unsigned long arg)
  
                 if (test_bit(HCI_INIT, &hdev->flags)) {
                         /* Don't process data packets in this states. */
-                       switch (skb->pkt_type) {
+                       switch (bt_cb(skb)->pkt_type) {
                         case HCI_ACLDATA_PKT:
                         case HCI_SCODATA_PKT:
                                 kfree_skb(skb);
@@ -1380,7 +1378,7 @@ void hci_rx_task(unsigned long arg)
                 }
  
                 /* Process frame */
-               switch (skb->pkt_type) {
+               switch (bt_cb(skb)->pkt_type) {
                 case HCI_EVENT_PKT:
                         hci_event_packet(hdev, skb);
                         break;
diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c

index c4b592b4ef10083fc5a270fb75ca419faf939e70..d6da0939216d292baf0088a179d92672a647a54d 100644 (file)
--- a/net/bluetooth/hci_event.c
+++ b/net/bluetooth/hci_event.c
@@ -484,14 +484,18 @@ static inline void hci_inquiry_complete_evt(struct hci_dev *hdev, struct sk_buff
  /* Inquiry Result */
  static inline void hci_inquiry_result_evt(struct hci_dev *hdev, struct sk_buff *skb)
  {
+       struct inquiry_data data;
         struct inquiry_info *info = (struct inquiry_info *) (skb->data + 1);
         int num_rsp = *((__u8 *) skb->data);
  
         BT_DBG("%s num_rsp %d", hdev->name, num_rsp);
  
+       if (!num_rsp)
+               return;
+
         hci_dev_lock(hdev);
+
         for (; num_rsp; num_rsp--) {
-               struct inquiry_data data;
                 bacpy(&data.bdaddr, &info->bdaddr);
                 data.pscan_rep_mode     = info->pscan_rep_mode;
                 data.pscan_period_mode  = info->pscan_period_mode;
@@ -502,30 +506,55 @@ static inline void hci_inquiry_result_evt(struct hci_dev *hdev, struct sk_buff *
                 info++;
                 hci_inquiry_cache_update(hdev, &data);
         }
+
         hci_dev_unlock(hdev);
  }
  
  /* Inquiry Result With RSSI */
  static inline void hci_inquiry_result_with_rssi_evt(struct hci_dev *hdev, struct sk_buff *skb)
  {
-       struct inquiry_info_with_rssi *info = (struct inquiry_info_with_rssi *) (skb->data + 1);
+       struct inquiry_data data;
         int num_rsp = *((__u8 *) skb->data);
  
         BT_DBG("%s num_rsp %d", hdev->name, num_rsp);
  
+       if (!num_rsp)
+               return;
+
         hci_dev_lock(hdev);
-       for (; num_rsp; num_rsp--) {
-               struct inquiry_data data;
-               bacpy(&data.bdaddr, &info->bdaddr);
-               data.pscan_rep_mode     = info->pscan_rep_mode;
-               data.pscan_period_mode  = info->pscan_period_mode;
-               data.pscan_mode         = 0x00;
-               memcpy(data.dev_class, info->dev_class, 3);
-               data.clock_offset       = info->clock_offset;
-               data.rssi               = info->rssi;
-               info++;
-               hci_inquiry_cache_update(hdev, &data);
+
+       if ((skb->len - 1) / num_rsp != sizeof(struct inquiry_info_with_rssi)) {
+               struct inquiry_info_with_rssi_and_pscan_mode *info =
+                       (struct inquiry_info_with_rssi_and_pscan_mode *) (skb->data + 1);
+
+               for (; num_rsp; num_rsp--) {
+                       bacpy(&data.bdaddr, &info->bdaddr);
+                       data.pscan_rep_mode     = info->pscan_rep_mode;
+                       data.pscan_period_mode  = info->pscan_period_mode;
+                       data.pscan_mode         = info->pscan_mode;
+                       memcpy(data.dev_class, info->dev_class, 3);
+                       data.clock_offset       = info->clock_offset;
+                       data.rssi               = info->rssi;
+                       info++;
+                       hci_inquiry_cache_update(hdev, &data);
+               }
+       } else {
+               struct inquiry_info_with_rssi *info =
+                       (struct inquiry_info_with_rssi *) (skb->data + 1);
+
+               for (; num_rsp; num_rsp--) {
+                       bacpy(&data.bdaddr, &info->bdaddr);
+                       data.pscan_rep_mode     = info->pscan_rep_mode;
+                       data.pscan_period_mode  = info->pscan_period_mode;
+                       data.pscan_mode         = 0x00;
+                       memcpy(data.dev_class, info->dev_class, 3);
+                       data.clock_offset       = info->clock_offset;
+                       data.rssi               = info->rssi;
+                       info++;
+                       hci_inquiry_cache_update(hdev, &data);
+               }
         }
+
         hci_dev_unlock(hdev);
  }
  
@@ -865,6 +894,24 @@ static inline void hci_clock_offset_evt(struct hci_dev *hdev, struct sk_buff *sk
         hci_dev_unlock(hdev);
  }
  
+/* Page Scan Repetition Mode */
+static inline void hci_pscan_rep_mode_evt(struct hci_dev *hdev, struct sk_buff *skb)
+{
+       struct hci_ev_pscan_rep_mode *ev = (struct hci_ev_pscan_rep_mode *) skb->data;
+       struct inquiry_entry *ie;
+
+       BT_DBG("%s", hdev->name);
+
+       hci_dev_lock(hdev);
+
+       if ((ie = hci_inquiry_cache_lookup(hdev, &ev->bdaddr))) {
+               ie->data.pscan_rep_mode = ev->pscan_rep_mode;
+               ie->timestamp = jiffies;
+       }
+
+       hci_dev_unlock(hdev);
+}
+
  void hci_event_packet(struct hci_dev *hdev, struct sk_buff *skb)
  {
         struct hci_event_hdr *hdr = (struct hci_event_hdr *) skb->data;
@@ -937,6 +984,10 @@ void hci_event_packet(struct hci_dev *hdev, struct sk_buff *skb)
                 hci_clock_offset_evt(hdev, skb);
                 break;
  
+       case HCI_EV_PSCAN_REP_MODE:
+               hci_pscan_rep_mode_evt(hdev, skb);
+               break;
+
         case HCI_EV_CMD_STATUS:
                 cs = (struct hci_ev_cmd_status *) skb->data;
                 skb_pull(skb, sizeof(cs));
@@ -1035,9 +1086,11 @@ void hci_si_event(struct hci_dev *hdev, int type, int dlen, void *data)
         ev->type = type;
         memcpy(ev->data, data, dlen);
  
-       skb->pkt_type = HCI_EVENT_PKT;
+       bt_cb(skb)->incoming = 1;
+       __net_timestamp(skb);
+
+       bt_cb(skb)->pkt_type = HCI_EVENT_PKT;
         skb->dev = (void *) hdev;
         hci_send_to_sock(hdev, skb);
         kfree_skb(skb);
  }
-EXPORT_SYMBOL(hci_si_event);
diff --git a/net/bluetooth/hci_sock.c b/net/bluetooth/hci_sock.c

index ebdcce5e7ca0b6d67e8974a52ede4f934e2c1936..32ef7975a139149a31481711a34de2c27040ce17 100644 (file)
--- a/net/bluetooth/hci_sock.c
+++ b/net/bluetooth/hci_sock.c
@@ -110,11 +110,11 @@ void hci_send_to_sock(struct hci_dev *hdev, struct sk_buff *skb)
                 /* Apply filter */
                 flt = &hci_pi(sk)->filter;
  
-               if (!test_bit((skb->pkt_type == HCI_VENDOR_PKT) ?
-                               0 : (skb->pkt_type & HCI_FLT_TYPE_BITS), &flt->type_mask))
+               if (!test_bit((bt_cb(skb)->pkt_type == HCI_VENDOR_PKT) ?
+                               0 : (bt_cb(skb)->pkt_type & HCI_FLT_TYPE_BITS), &flt->type_mask))
                         continue;
  
-               if (skb->pkt_type == HCI_EVENT_PKT) {
+               if (bt_cb(skb)->pkt_type == HCI_EVENT_PKT) {
                         register int evt = (*(__u8 *)skb->data & HCI_FLT_EVENT_BITS);
  
                         if (!hci_test_bit(evt, &flt->event_mask))
@@ -131,7 +131,7 @@ void hci_send_to_sock(struct hci_dev *hdev, struct sk_buff *skb)
                         continue;
  
                 /* Put type byte before the data */
-               memcpy(skb_push(nskb, 1), &nskb->pkt_type, 1);
+               memcpy(skb_push(nskb, 1), &bt_cb(nskb)->pkt_type, 1);
  
                 if (sock_queue_rcv_skb(sk, nskb))
                         kfree_skb(nskb);
@@ -327,11 +327,17 @@ static inline void hci_sock_cmsg(struct sock *sk, struct msghdr *msg, struct sk_
  {
         __u32 mask = hci_pi(sk)->cmsg_mask;
  
-       if (mask & HCI_CMSG_DIR)
-               put_cmsg(msg, SOL_HCI, HCI_CMSG_DIR, sizeof(int), &bt_cb(skb)->incoming);
+       if (mask & HCI_CMSG_DIR) {
+               int incoming = bt_cb(skb)->incoming;
+               put_cmsg(msg, SOL_HCI, HCI_CMSG_DIR, sizeof(incoming), &incoming);
+       }
+
+       if (mask & HCI_CMSG_TSTAMP) {
+               struct timeval tv;
  
-       if (mask & HCI_CMSG_TSTAMP)
-               put_cmsg(msg, SOL_HCI, HCI_CMSG_TSTAMP, sizeof(skb->stamp), &skb->stamp);
+               skb_get_timestamp(skb, &tv);
+               put_cmsg(msg, SOL_HCI, HCI_CMSG_TSTAMP, sizeof(tv), &tv);
+       }
  }
   
  static int hci_sock_recvmsg(struct kiocb *iocb, struct socket *sock, 
@@ -405,11 +411,11 @@ static int hci_sock_sendmsg(struct kiocb *iocb, struct socket *sock,
                 goto drop;
         }
  
-       skb->pkt_type = *((unsigned char *) skb->data);
+       bt_cb(skb)->pkt_type = *((unsigned char *) skb->data);
         skb_pull(skb, 1);
         skb->dev = (void *) hdev;
  
-       if (skb->pkt_type == HCI_COMMAND_PKT) {
+       if (bt_cb(skb)->pkt_type == HCI_COMMAND_PKT) {
                 u16 opcode = __le16_to_cpu(get_unaligned((u16 *)skb->data));
                 u16 ogf = hci_opcode_ogf(opcode);
                 u16 ocf = hci_opcode_ocf(opcode);
diff --git a/net/bluetooth/l2cap.c b/net/bluetooth/l2cap.c

index 32fccfb5bfa5de1b8c701fe0bb107792e2653534..d3d6bc547212f7d289928cdf393abce3a167eb08 100644 (file)
--- a/net/bluetooth/l2cap.c
+++ b/net/bluetooth/l2cap.c
@@ -372,7 +372,7 @@ static struct proto l2cap_proto = {
         .obj_size       = sizeof(struct l2cap_pinfo)
  };
  
-static struct sock *l2cap_sock_alloc(struct socket *sock, int proto, int prio)
+static struct sock *l2cap_sock_alloc(struct socket *sock, int proto, unsigned int __nocast prio)
  {
         struct sock *sk;
  
diff --git a/net/bluetooth/lib.c b/net/bluetooth/lib.c

index 9efb0a0936123fe4cf88cb8870eb1ac005855674..ee6a66979913c8fb4c0d4dca00335bcc3e89c235 100644 (file)
--- a/net/bluetooth/lib.c
+++ b/net/bluetooth/lib.c
@@ -34,31 +34,6 @@
  
  #include <net/bluetooth/bluetooth.h>
  
-void bt_dump(char *pref, __u8 *buf, int count)
-{
-       char *ptr;
-       char line[100];
-       unsigned int i;
-
-       printk(KERN_INFO "%s: dump, len %d\n", pref, count);
-
-       ptr = line;
-       *ptr = 0;
-       for (i = 0; i < count; i++) {
-               ptr += sprintf(ptr, " %2.2X", buf[i]);
-
-               if (i && !((i + 1) % 20)) {
-                       printk(KERN_INFO "%s:%s\n", pref, line);
-                       ptr = line;
-                       *ptr = 0;
-               }
-       }
-
-       if (line[0])
-               printk(KERN_INFO "%s:%s\n", pref, line);
-}
-EXPORT_SYMBOL(bt_dump);
-
  void baswap(bdaddr_t *dst, bdaddr_t *src)
  {
         unsigned char *d = (unsigned char *) dst;
diff --git a/net/bluetooth/rfcomm/core.c b/net/bluetooth/rfcomm/core.c

index e9e6fda66f1a3e221013c628612808ea5529e2a1..173f46e8cdaedbaffaf2a75852bd3522c7523421 100644 (file)
--- a/net/bluetooth/rfcomm/core.c
+++ b/net/bluetooth/rfcomm/core.c
@@ -21,10 +21,6 @@
     SOFTWARE IS DISCLAIMED.
  */
  
-/* 
-   RPN support    -    Dirk Husemann <hud@zurich.ibm.com>
-*/
-
  /*
   * Bluetooth RFCOMM core.
   *
@@ -115,10 +111,10 @@ static void rfcomm_session_del(struct rfcomm_session *s);
  #define __get_mcc_len(b)  ((b & 0xfe) >> 1)
  
  /* RPN macros */
-#define __rpn_line_settings(data, stop, parity)  ((data & 0x3) | ((stop & 0x1) << 2) | ((parity & 0x3) << 3))
+#define __rpn_line_settings(data, stop, parity)  ((data & 0x3) | ((stop & 0x1) << 2) | ((parity & 0x7) << 3))
  #define __get_rpn_data_bits(line) ((line) & 0x3)
  #define __get_rpn_stop_bits(line) (((line) >> 2) & 0x1)
-#define __get_rpn_parity(line)    (((line) >> 3) & 0x3)
+#define __get_rpn_parity(line)    (((line) >> 3) & 0x7)
  
  static inline void rfcomm_schedule(uint event)
  {
@@ -233,7 +229,7 @@ static void rfcomm_dlc_clear_state(struct rfcomm_dlc *d)
         d->rx_credits = RFCOMM_DEFAULT_CREDITS;
  }
  
-struct rfcomm_dlc *rfcomm_dlc_alloc(int prio)
+struct rfcomm_dlc *rfcomm_dlc_alloc(unsigned int __nocast prio)
  {
         struct rfcomm_dlc *d = kmalloc(sizeof(*d), prio);
         if (!d)
@@ -389,8 +385,6 @@ static int __rfcomm_dlc_close(struct rfcomm_dlc *d, int err)
                 rfcomm_dlc_unlock(d);
  
                 skb_queue_purge(&d->tx_queue);
-               rfcomm_session_put(s);
-
                 rfcomm_dlc_unlink(d);
         }
  
@@ -600,8 +594,6 @@ static struct rfcomm_session *rfcomm_session_create(bdaddr_t *src, bdaddr_t *dst
                 goto failed;
         }
  
-       rfcomm_session_hold(s);
-
         s->initiator = 1;
  
         bacpy(&addr.l2_bdaddr, dst);
@@ -784,10 +776,10 @@ static int rfcomm_send_pn(struct rfcomm_session *s, int cr, struct rfcomm_dlc *d
         return rfcomm_send_frame(s, buf, ptr - buf);
  }
  
-static int rfcomm_send_rpn(struct rfcomm_session *s, int cr, u8 dlci,
-                          u8 bit_rate, u8 data_bits, u8 stop_bits,
-                          u8 parity, u8 flow_ctrl_settings, 
-                          u8 xon_char, u8 xoff_char, u16 param_mask)
+int rfcomm_send_rpn(struct rfcomm_session *s, int cr, u8 dlci,
+                       u8 bit_rate, u8 data_bits, u8 stop_bits,
+                       u8 parity, u8 flow_ctrl_settings, 
+                       u8 xon_char, u8 xoff_char, u16 param_mask)
  {
         struct rfcomm_hdr *hdr;
         struct rfcomm_mcc *mcc;
@@ -795,9 +787,9 @@ static int rfcomm_send_rpn(struct rfcomm_session *s, int cr, u8 dlci,
         u8 buf[16], *ptr = buf;
  
         BT_DBG("%p cr %d dlci %d bit_r 0x%x data_b 0x%x stop_b 0x%x parity 0x%x"
-              "flwc_s 0x%x xon_c 0x%x xoff_c 0x%x p_mask 0x%x", 
-                       s, cr, dlci, bit_rate, data_bits, stop_bits, parity, 
-                       flow_ctrl_settings, xon_char, xoff_char, param_mask);
+                       " flwc_s 0x%x xon_c 0x%x xoff_c 0x%x p_mask 0x%x", 
+               s, cr, dlci, bit_rate, data_bits, stop_bits, parity, 
+               flow_ctrl_settings, xon_char, xoff_char, param_mask);
  
         hdr = (void *) ptr; ptr += sizeof(*hdr);
         hdr->addr = __addr(s->initiator, 0);
@@ -1269,16 +1261,16 @@ static int rfcomm_recv_rpn(struct rfcomm_session *s, int cr, int len, struct sk_
         u8 xon_char  = 0;
         u8 xoff_char = 0;
         u16 rpn_mask = RFCOMM_RPN_PM_ALL;
-       
-       BT_DBG("dlci %d cr %d len 0x%x bitr 0x%x line 0x%x flow 0x%x xonc 0x%x xoffc 0x%x pm 0x%x", 
-              dlci, cr, len, rpn->bit_rate, rpn->line_settings, rpn->flow_ctrl,
-              rpn->xon_char, rpn->xoff_char, rpn->param_mask);
-       
-       if (!cr) 
+
+       BT_DBG("dlci %d cr %d len 0x%x bitr 0x%x line 0x%x flow 0x%x xonc 0x%x xoffc 0x%x pm 0x%x",
+               dlci, cr, len, rpn->bit_rate, rpn->line_settings, rpn->flow_ctrl,
+               rpn->xon_char, rpn->xoff_char, rpn->param_mask);
+
+       if (!cr)
                 return 0;
-       
+
         if (len == 1) {
-               /* request: return default setting */
+               /* This is a request, return default settings */
                 bit_rate  = RFCOMM_RPN_BR_115200;
                 data_bits = RFCOMM_RPN_DATA_8;
                 stop_bits = RFCOMM_RPN_STOP_1;
@@ -1286,11 +1278,12 @@ static int rfcomm_recv_rpn(struct rfcomm_session *s, int cr, int len, struct sk_
                 flow_ctrl = RFCOMM_RPN_FLOW_NONE;
                 xon_char  = RFCOMM_RPN_XON_CHAR;
                 xoff_char = RFCOMM_RPN_XOFF_CHAR;
-
                 goto rpn_out;
         }
-       /* check for sane values: ignore/accept bit_rate, 8 bits, 1 stop bit, no parity,
-                                 no flow control lines, normal XON/XOFF chars */
+
+       /* Check for sane values, ignore/accept bit_rate, 8 bits, 1 stop bit,
+        * no parity, no flow control lines, normal XON/XOFF chars */
+
         if (rpn->param_mask & RFCOMM_RPN_PM_BITRATE) {
                 bit_rate = rpn->bit_rate;
                 if (bit_rate != RFCOMM_RPN_BR_115200) {
@@ -1299,6 +1292,7 @@ static int rfcomm_recv_rpn(struct rfcomm_session *s, int cr, int len, struct sk_
                         rpn_mask ^= RFCOMM_RPN_PM_BITRATE;
                 }
         }
+
         if (rpn->param_mask & RFCOMM_RPN_PM_DATA) {
                 data_bits = __get_rpn_data_bits(rpn->line_settings);
                 if (data_bits != RFCOMM_RPN_DATA_8) {
@@ -1307,6 +1301,7 @@ static int rfcomm_recv_rpn(struct rfcomm_session *s, int cr, int len, struct sk_
                         rpn_mask ^= RFCOMM_RPN_PM_DATA;
                 }
         }
+
         if (rpn->param_mask & RFCOMM_RPN_PM_STOP) {
                 stop_bits = __get_rpn_stop_bits(rpn->line_settings);
                 if (stop_bits != RFCOMM_RPN_STOP_1) {
@@ -1315,6 +1310,7 @@ static int rfcomm_recv_rpn(struct rfcomm_session *s, int cr, int len, struct sk_
                         rpn_mask ^= RFCOMM_RPN_PM_STOP;
                 }
         }
+
         if (rpn->param_mask & RFCOMM_RPN_PM_PARITY) {
                 parity = __get_rpn_parity(rpn->line_settings);
                 if (parity != RFCOMM_RPN_PARITY_NONE) {
@@ -1323,6 +1319,7 @@ static int rfcomm_recv_rpn(struct rfcomm_session *s, int cr, int len, struct sk_
                         rpn_mask ^= RFCOMM_RPN_PM_PARITY;
                 }
         }
+
         if (rpn->param_mask & RFCOMM_RPN_PM_FLOW) {
                 flow_ctrl = rpn->flow_ctrl;
                 if (flow_ctrl != RFCOMM_RPN_FLOW_NONE) {
@@ -1331,6 +1328,7 @@ static int rfcomm_recv_rpn(struct rfcomm_session *s, int cr, int len, struct sk_
                         rpn_mask ^= RFCOMM_RPN_PM_FLOW;
                 }
         }
+
         if (rpn->param_mask & RFCOMM_RPN_PM_XON) {
                 xon_char = rpn->xon_char;
                 if (xon_char != RFCOMM_RPN_XON_CHAR) {
@@ -1339,6 +1337,7 @@ static int rfcomm_recv_rpn(struct rfcomm_session *s, int cr, int len, struct sk_
                         rpn_mask ^= RFCOMM_RPN_PM_XON;
                 }
         }
+
         if (rpn->param_mask & RFCOMM_RPN_PM_XOFF) {
                 xoff_char = rpn->xoff_char;
                 if (xoff_char != RFCOMM_RPN_XOFF_CHAR) {
@@ -1349,9 +1348,8 @@ static int rfcomm_recv_rpn(struct rfcomm_session *s, int cr, int len, struct sk_
         }
  
  rpn_out:
-       rfcomm_send_rpn(s, 0, dlci, 
-                       bit_rate, data_bits, stop_bits, parity, flow_ctrl,
-                       xon_char, xoff_char, rpn_mask);
+       rfcomm_send_rpn(s, 0, dlci, bit_rate, data_bits, stop_bits,
+                       parity, flow_ctrl, xon_char, xoff_char, rpn_mask);
  
         return 0;
  }
@@ -1362,14 +1360,13 @@ static int rfcomm_recv_rls(struct rfcomm_session *s, int cr, struct sk_buff *skb
         u8 dlci = __get_dlci(rls->dlci);
  
         BT_DBG("dlci %d cr %d status 0x%x", dlci, cr, rls->status);
-       
+
         if (!cr)
                 return 0;
  
-       /* FIXME: We should probably do something with this
-          information here. But for now it's sufficient just
-          to reply -- Bluetooth 1.1 says it's mandatory to 
-          recognise and respond to RLS */
+       /* We should probably do something with this information here. But
+        * for now it's sufficient just to reply -- Bluetooth 1.1 says it's
+        * mandatory to recognise and respond to RLS */
  
         rfcomm_send_rls(s, 0, dlci, rls->status);
  
@@ -1385,7 +1382,7 @@ static int rfcomm_recv_msc(struct rfcomm_session *s, int cr, struct sk_buff *skb
         BT_DBG("dlci %d cr %d v24 0x%x", dlci, cr, msc->v24_sig);
  
         d = rfcomm_dlc_get(s, dlci);
-       if (!d) 
+       if (!d)
                 return 0;
  
         if (cr) {
@@ -1393,7 +1390,7 @@ static int rfcomm_recv_msc(struct rfcomm_session *s, int cr, struct sk_buff *skb
                         set_bit(RFCOMM_TX_THROTTLED, &d->flags);
                 else
                         clear_bit(RFCOMM_TX_THROTTLED, &d->flags);
-               
+
                 rfcomm_dlc_lock(d);
                 if (d->modem_status)
                         d->modem_status(d, msc->v24_sig);
@@ -1402,7 +1399,7 @@ static int rfcomm_recv_msc(struct rfcomm_session *s, int cr, struct sk_buff *skb
                 rfcomm_send_msc(s, 0, dlci, msc->v24_sig);
  
                 d->mscex |= RFCOMM_MSCEX_RX;
-       } else 
+       } else
                 d->mscex |= RFCOMM_MSCEX_TX;
  
         return 0;
diff --git a/net/bluetooth/rfcomm/sock.c b/net/bluetooth/rfcomm/sock.c

index 63a123c5c41b38dedef853b56c44ac4f55d55d16..90e19eb6d3cce0de84bb9a48796f9c260aacc71e 100644 (file)
--- a/net/bluetooth/rfcomm/sock.c
+++ b/net/bluetooth/rfcomm/sock.c
@@ -284,7 +284,7 @@ static struct proto rfcomm_proto = {
         .obj_size       = sizeof(struct rfcomm_pinfo)
  };
  
-static struct sock *rfcomm_sock_alloc(struct socket *sock, int proto, int prio)
+static struct sock *rfcomm_sock_alloc(struct socket *sock, int proto, unsigned int __nocast prio)
  {
         struct rfcomm_dlc *d;
         struct sock *sk;
diff --git a/net/bluetooth/rfcomm/tty.c b/net/bluetooth/rfcomm/tty.c

index 6304590fd36a5f6d132f8a6262d5acdb616c0ebf..1bca860a6109fefc7e31216534996476826470ce 100644 (file)
--- a/net/bluetooth/rfcomm/tty.c
+++ b/net/bluetooth/rfcomm/tty.c
@@ -286,7 +286,7 @@ static inline void rfcomm_set_owner_w(struct sk_buff *skb, struct rfcomm_dev *de
         skb->destructor = rfcomm_wfree;
  }
  
-static struct sk_buff *rfcomm_wmalloc(struct rfcomm_dev *dev, unsigned long size, int priority)
+static struct sk_buff *rfcomm_wmalloc(struct rfcomm_dev *dev, unsigned long size, unsigned int __nocast priority)
  {
         if (atomic_read(&dev->wmem_alloc) < rfcomm_room(dev->dlc)) {
                 struct sk_buff *skb = alloc_skb(size, priority);
@@ -528,9 +528,14 @@ static void rfcomm_dev_modem_status(struct rfcomm_dlc *dlc, u8 v24_sig)
         struct rfcomm_dev *dev = dlc->owner;
         if (!dev)
                 return;
-       
+
         BT_DBG("dlc %p dev %p v24_sig 0x%02x", dlc, dev, v24_sig);
  
+       if ((dev->modem_status & TIOCM_CD) && !(v24_sig & RFCOMM_V24_DV)) {
+               if (dev->tty && !C_CLOCAL(dev->tty))
+                       tty_hangup(dev->tty);
+       }
+
         dev->modem_status = 
                 ((v24_sig & RFCOMM_V24_RTC) ? (TIOCM_DSR | TIOCM_DTR) : 0) |
                 ((v24_sig & RFCOMM_V24_RTR) ? (TIOCM_RTS | TIOCM_CTS) : 0) |
@@ -740,20 +745,143 @@ static int rfcomm_tty_ioctl(struct tty_struct *tty, struct file *filp, unsigned
         return -ENOIOCTLCMD;
  }
  
-#define RELEVANT_IFLAG(iflag) (iflag & (IGNBRK|BRKINT|IGNPAR|PARMRK|INPCK))
-
  static void rfcomm_tty_set_termios(struct tty_struct *tty, struct termios *old)
  {
-       BT_DBG("tty %p", tty);
+       struct termios *new = (struct termios *) tty->termios;
+       int old_baud_rate = tty_termios_baud_rate(old);
+       int new_baud_rate = tty_termios_baud_rate(new);
  
-       if ((tty->termios->c_cflag == old->c_cflag) &&
-               (RELEVANT_IFLAG(tty->termios->c_iflag) == RELEVANT_IFLAG(old->c_iflag)))
-               return;
+       u8 baud, data_bits, stop_bits, parity, x_on, x_off;
+       u16 changes = 0;
+
+       struct rfcomm_dev *dev = (struct rfcomm_dev *) tty->driver_data;
+
+       BT_DBG("tty %p termios %p", tty, old);
+
+       /* Handle turning off CRTSCTS */
+       if ((old->c_cflag & CRTSCTS) && !(new->c_cflag & CRTSCTS)) 
+               BT_DBG("Turning off CRTSCTS unsupported");
+
+       /* Parity on/off and when on, odd/even */
+       if (((old->c_cflag & PARENB) != (new->c_cflag & PARENB)) ||
+                       ((old->c_cflag & PARODD) != (new->c_cflag & PARODD)) ) {
+               changes |= RFCOMM_RPN_PM_PARITY;
+               BT_DBG("Parity change detected.");
+       }
+
+       /* Mark and space parity are not supported! */
+       if (new->c_cflag & PARENB) {
+               if (new->c_cflag & PARODD) {
+                       BT_DBG("Parity is ODD");
+                       parity = RFCOMM_RPN_PARITY_ODD;
+               } else {
+                       BT_DBG("Parity is EVEN");
+                       parity = RFCOMM_RPN_PARITY_EVEN;
+               }
+       } else {
+               BT_DBG("Parity is OFF");
+               parity = RFCOMM_RPN_PARITY_NONE;
+       }
+
+       /* Setting the x_on / x_off characters */
+       if (old->c_cc[VSTOP] != new->c_cc[VSTOP]) {
+               BT_DBG("XOFF custom");
+               x_on = new->c_cc[VSTOP];
+               changes |= RFCOMM_RPN_PM_XON;
+       } else {
+               BT_DBG("XOFF default");
+               x_on = RFCOMM_RPN_XON_CHAR;
+       }
+
+       if (old->c_cc[VSTART] != new->c_cc[VSTART]) {
+               BT_DBG("XON custom");
+               x_off = new->c_cc[VSTART];
+               changes |= RFCOMM_RPN_PM_XOFF;
+       } else {
+               BT_DBG("XON default");
+               x_off = RFCOMM_RPN_XOFF_CHAR;
+       }
+
+       /* Handle setting of stop bits */
+       if ((old->c_cflag & CSTOPB) != (new->c_cflag & CSTOPB))
+               changes |= RFCOMM_RPN_PM_STOP;
+
+       /* POSIX does not support 1.5 stop bits and RFCOMM does not
+        * support 2 stop bits. So a request for 2 stop bits gets
+        * translated to 1.5 stop bits */
+       if (new->c_cflag & CSTOPB) {
+               stop_bits = RFCOMM_RPN_STOP_15;
+       } else {
+               stop_bits = RFCOMM_RPN_STOP_1;
+       }
+
+       /* Handle number of data bits [5-8] */
+       if ((old->c_cflag & CSIZE) != (new->c_cflag & CSIZE)) 
+               changes |= RFCOMM_RPN_PM_DATA;
+
+       switch (new->c_cflag & CSIZE) {
+       case CS5:
+               data_bits = RFCOMM_RPN_DATA_5;
+               break;
+       case CS6:
+               data_bits = RFCOMM_RPN_DATA_6;
+               break;
+       case CS7:
+               data_bits = RFCOMM_RPN_DATA_7;
+               break;
+       case CS8:
+               data_bits = RFCOMM_RPN_DATA_8;
+               break;
+       default:
+               data_bits = RFCOMM_RPN_DATA_8;
+               break;
+       }
+
+       /* Handle baudrate settings */
+       if (old_baud_rate != new_baud_rate)
+               changes |= RFCOMM_RPN_PM_BITRATE;
  
-       /* handle turning off CRTSCTS */
-       if ((old->c_cflag & CRTSCTS) && !(tty->termios->c_cflag & CRTSCTS)) {
-               BT_DBG("turning off CRTSCTS");
+       switch (new_baud_rate) {
+       case 2400:
+               baud = RFCOMM_RPN_BR_2400;
+               break;
+       case 4800:
+               baud = RFCOMM_RPN_BR_4800;
+               break;
+       case 7200:
+               baud = RFCOMM_RPN_BR_7200;
+               break;
+       case 9600:
+               baud = RFCOMM_RPN_BR_9600;
+               break;
+       case 19200: 
+               baud = RFCOMM_RPN_BR_19200;
+               break;
+       case 38400:
+               baud = RFCOMM_RPN_BR_38400;
+               break;
+       case 57600:
+               baud = RFCOMM_RPN_BR_57600;
+               break;
+       case 115200:
+               baud = RFCOMM_RPN_BR_115200;
+               break;
+       case 230400:
+               baud = RFCOMM_RPN_BR_230400;
+               break;
+       default:
+               /* 9600 is standard accordinag to the RFCOMM specification */
+               baud = RFCOMM_RPN_BR_9600;
+               break;
+       
         }
+
+       if (changes)
+               rfcomm_send_rpn(dev->dlc->session, 1, dev->dlc->dlci, baud,
+                               data_bits, stop_bits, parity,
+                               RFCOMM_RPN_FLOW_NONE, x_on, x_off, changes);
+
+       return;
  }
  
  static void rfcomm_tty_throttle(struct tty_struct *tty)
@@ -761,7 +889,7 @@ static void rfcomm_tty_throttle(struct tty_struct *tty)
         struct rfcomm_dev *dev = (struct rfcomm_dev *) tty->driver_data;
  
         BT_DBG("tty %p dev %p", tty, dev);
-       
+
         rfcomm_dlc_throttle(dev->dlc);
  }
  
@@ -770,7 +898,7 @@ static void rfcomm_tty_unthrottle(struct tty_struct *tty)
         struct rfcomm_dev *dev = (struct rfcomm_dev *) tty->driver_data;
  
         BT_DBG("tty %p dev %p", tty, dev);
-       
+
         rfcomm_dlc_unthrottle(dev->dlc);
  }
  
@@ -841,35 +969,35 @@ static int rfcomm_tty_tiocmget(struct tty_struct *tty, struct file *filp)
  
  static int rfcomm_tty_tiocmset(struct tty_struct *tty, struct file *filp, unsigned int set, unsigned int clear)
  {
-       struct rfcomm_dev *dev = (struct rfcomm_dev *) tty->driver_data;
-       struct rfcomm_dlc *dlc = dev->dlc;
-       u8 v24_sig;
+       struct rfcomm_dev *dev = (struct rfcomm_dev *) tty->driver_data;
+       struct rfcomm_dlc *dlc = dev->dlc;
+       u8 v24_sig;
  
         BT_DBG("tty %p dev %p set 0x%02x clear 0x%02x", tty, dev, set, clear);
  
-       rfcomm_dlc_get_modem_status(dlc, &v24_sig);
-
-       if (set & TIOCM_DSR || set & TIOCM_DTR)
-               v24_sig |= RFCOMM_V24_RTC;
-       if (set & TIOCM_RTS || set & TIOCM_CTS)
-               v24_sig |= RFCOMM_V24_RTR;
-       if (set & TIOCM_RI)
-               v24_sig |= RFCOMM_V24_IC;
-       if (set & TIOCM_CD)
-               v24_sig |= RFCOMM_V24_DV;
-
-       if (clear & TIOCM_DSR || clear & TIOCM_DTR)
-               v24_sig &= ~RFCOMM_V24_RTC;
-       if (clear & TIOCM_RTS || clear & TIOCM_CTS)
-               v24_sig &= ~RFCOMM_V24_RTR;
-       if (clear & TIOCM_RI)
-               v24_sig &= ~RFCOMM_V24_IC;
-       if (clear & TIOCM_CD)
-               v24_sig &= ~RFCOMM_V24_DV;
-
-       rfcomm_dlc_set_modem_status(dlc, v24_sig);
-
-       return 0;
+       rfcomm_dlc_get_modem_status(dlc, &v24_sig);
+
+       if (set & TIOCM_DSR || set & TIOCM_DTR)
+               v24_sig |= RFCOMM_V24_RTC;
+       if (set & TIOCM_RTS || set & TIOCM_CTS)
+               v24_sig |= RFCOMM_V24_RTR;
+       if (set & TIOCM_RI)
+               v24_sig |= RFCOMM_V24_IC;
+       if (set & TIOCM_CD)
+               v24_sig |= RFCOMM_V24_DV;
+
+       if (clear & TIOCM_DSR || clear & TIOCM_DTR)
+               v24_sig &= ~RFCOMM_V24_RTC;
+       if (clear & TIOCM_RTS || clear & TIOCM_CTS)
+               v24_sig &= ~RFCOMM_V24_RTR;
+       if (clear & TIOCM_RI)
+               v24_sig &= ~RFCOMM_V24_IC;
+       if (clear & TIOCM_CD)
+               v24_sig &= ~RFCOMM_V24_DV;
+
+       rfcomm_dlc_set_modem_status(dlc, v24_sig);
+
+       return 0;
  }
  
  /* ---- TTY structure ---- */
diff --git a/net/bluetooth/sco.c b/net/bluetooth/sco.c

index 746c11fc017e9b8e73320ac126e68a607d02e76a..ce7ab7dfa0b206f7ecfa3e76f15cbc0e71666f2d 100644 (file)
--- a/net/bluetooth/sco.c
+++ b/net/bluetooth/sco.c
@@ -418,7 +418,7 @@ static struct proto sco_proto = {
         .obj_size       = sizeof(struct sco_pinfo)
  };
  
-static struct sock *sco_sock_alloc(struct socket *sock, int proto, int prio)
+static struct sock *sco_sock_alloc(struct socket *sock, int proto, unsigned int __nocast prio)
  {
         struct sock *sk;
  
diff --git a/net/bridge/br_fdb.c b/net/bridge/br_fdb.c

index e6c2200b7ca3f75824f029810221fbcb86d39d5b..24396b914d11634f6e3fe26d9c3c06a9d94ded5c 100644 (file)
--- a/net/bridge/br_fdb.c
+++ b/net/bridge/br_fdb.c
@@ -23,7 +23,7 @@
  #include <asm/atomic.h>
  #include "br_private.h"
  
-static kmem_cache_t *br_fdb_cache;
+static kmem_cache_t *br_fdb_cache __read_mostly;
  static int fdb_insert(struct net_bridge *br, struct net_bridge_port *source,
                       const unsigned char *addr);
  
diff --git a/net/bridge/netfilter/ebt_mark.c b/net/bridge/netfilter/ebt_mark.c

index 02c632b4d3259a97b91a972d97e882dc46959e09..c93d35ab95c02ac50bb7edfe139e2766a3bf6d2b 100644 (file)
--- a/net/bridge/netfilter/ebt_mark.c
+++ b/net/bridge/netfilter/ebt_mark.c
@@ -23,10 +23,9 @@ static int ebt_target_mark(struct sk_buff **pskb, unsigned int hooknr,
  {
         struct ebt_mark_t_info *info = (struct ebt_mark_t_info *)data;
  
-       if ((*pskb)->nfmark != info->mark) {
+       if ((*pskb)->nfmark != info->mark)
                 (*pskb)->nfmark = info->mark;
-               (*pskb)->nfcache |= NFC_ALTERED;
-       }
+
         return info->target;
  }
  
diff --git a/net/bridge/netfilter/ebt_ulog.c b/net/bridge/netfilter/ebt_ulog.c

index 01af4fcef26d0b6926604b2ba92a9d3f41e38bf5..aae26ae2e61f5ec990c0a7f5d23caeaa1e642e68 100644 (file)
--- a/net/bridge/netfilter/ebt_ulog.c
+++ b/net/bridge/netfilter/ebt_ulog.c
@@ -78,8 +78,8 @@ static void ulog_send(unsigned int nlgroup)
         if (ub->qlen > 1)
                 ub->lastnlh->nlmsg_type = NLMSG_DONE;
  
-       NETLINK_CB(ub->skb).dst_groups = 1 << nlgroup;
-       netlink_broadcast(ebtulognl, ub->skb, 0, 1 << nlgroup, GFP_ATOMIC);
+       NETLINK_CB(ub->skb).dst_group = nlgroup + 1;
+       netlink_broadcast(ebtulognl, ub->skb, 0, nlgroup + 1, GFP_ATOMIC);
  
         ub->qlen = 0;
         ub->skb = NULL;
@@ -162,7 +162,7 @@ static void ebt_ulog(const struct sk_buff *skb, unsigned int hooknr,
         pm->version = EBT_ULOG_VERSION;
         do_gettimeofday(&pm->stamp);
         if (ub->qlen == 1)
-               ub->skb->stamp = pm->stamp;
+               skb_set_timestamp(ub->skb, &pm->stamp);
         pm->data_len = copy_len;
         pm->mark = skb->nfmark;
         pm->hook = hooknr;
@@ -258,7 +258,8 @@ static int __init init(void)
                 spin_lock_init(&ulog_buffers[i].lock);
         }
  
-       ebtulognl = netlink_kernel_create(NETLINK_NFLOG, NULL);
+       ebtulognl = netlink_kernel_create(NETLINK_NFLOG, EBT_ULOG_MAXNLGROUPS,
+                                         NULL, THIS_MODULE);
         if (!ebtulognl)
                 ret = -ENOMEM;
         else if ((ret = ebt_register_watcher(&ulog)))
diff --git a/net/compat.c b/net/compat.c

index be5d936dc42396ae0c8a18d77f80b72d04772d83..d99ab969589397f9cc844db57aac0b7ab2f9711f 100644 (file)
--- a/net/compat.c
+++ b/net/compat.c
@@ -91,20 +91,11 @@ int verify_compat_iovec(struct msghdr *kern_msg, struct iovec *kern_iov,
         } else
                 kern_msg->msg_name = NULL;
  
-       if(kern_msg->msg_iovlen > UIO_FASTIOV) {
-               kern_iov = kmalloc(kern_msg->msg_iovlen * sizeof(struct iovec),
-                                  GFP_KERNEL);
-               if(!kern_iov)
-                       return -ENOMEM;
-       }
-
         tot_len = iov_from_user_compat_to_kern(kern_iov,
                                           (struct compat_iovec __user *)kern_msg->msg_iov,
                                           kern_msg->msg_iovlen);
         if(tot_len >= 0)
                 kern_msg->msg_iov = kern_iov;
-       else if(kern_msg->msg_iovlen > UIO_FASTIOV)
-               kfree(kern_iov);
  
         return tot_len;
  }
diff --git a/net/core/Makefile b/net/core/Makefile

index f5f5e58943e862e0dde31729cb2a370060c3dd76..630da0f0579e5c3bf26cc5391ee49f7551f00882 100644 (file)
--- a/net/core/Makefile
+++ b/net/core/Makefile
@@ -12,7 +12,6 @@ obj-y              += dev.o ethtool.o dev_mcast.o dst.o \
  
  obj-$(CONFIG_XFRM) += flow.o
  obj-$(CONFIG_SYSFS) += net-sysfs.o
-obj-$(CONFIG_NETFILTER) += netfilter.o
  obj-$(CONFIG_NET_DIVERT) += dv.o
  obj-$(CONFIG_NET_PKTGEN) += pktgen.o
  obj-$(CONFIG_NET_RADIO) += wireless.o
diff --git a/net/core/datagram.c b/net/core/datagram.c

index fcee054b6f750a98063ec164f667e16d4043731e..da9bf71421a7ef98e6ade66ea8902d7cfaadb37f 100644 (file)
--- a/net/core/datagram.c
+++ b/net/core/datagram.c
@@ -43,7 +43,6 @@
  #include <linux/errno.h>
  #include <linux/sched.h>
  #include <linux/inet.h>
-#include <linux/tcp.h>
  #include <linux/netdevice.h>
  #include <linux/rtnetlink.h>
  #include <linux/poll.h>
@@ -51,9 +50,10 @@
  
  #include <net/protocol.h>
  #include <linux/skbuff.h>
-#include <net/sock.h>
-#include <net/checksum.h>
  
+#include <net/checksum.h>
+#include <net/sock.h>
+#include <net/tcp_states.h>
  
  /*
   *     Is a socket 'connection oriented' ?
diff --git a/net/core/dev.c b/net/core/dev.c

index 52a3bf7ae177a803b5b77190ec1f34e8070175e5..c01511e3d0c14a417f4ac025749405c061446e19 100644 (file)
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -267,10 +267,6 @@ void dev_add_pack(struct packet_type *pt)
         spin_unlock_bh(&ptype_lock);
  }
  
-extern void linkwatch_run_queue(void);
-
-
-
  /**
   *     __dev_remove_pack        - remove packet handler
   *     @pt: packet type declaration
@@ -1009,13 +1005,22 @@ void net_disable_timestamp(void)
         atomic_dec(&netstamp_needed);
  }
  
-static inline void net_timestamp(struct timeval *stamp)
+void __net_timestamp(struct sk_buff *skb)
+{
+       struct timeval tv;
+
+       do_gettimeofday(&tv);
+       skb_set_timestamp(skb, &tv);
+}
+EXPORT_SYMBOL(__net_timestamp);
+
+static inline void net_timestamp(struct sk_buff *skb)
  {
         if (atomic_read(&netstamp_needed))
-               do_gettimeofday(stamp);
+               __net_timestamp(skb);
         else {
-               stamp->tv_sec = 0;
-               stamp->tv_usec = 0;
+               skb->tstamp.off_sec = 0;
+               skb->tstamp.off_usec = 0;
         }
  }
  
@@ -1027,7 +1032,8 @@ static inline void net_timestamp(struct timeval *stamp)
  void dev_queue_xmit_nit(struct sk_buff *skb, struct net_device *dev)
  {
         struct packet_type *ptype;
-       net_timestamp(&skb->stamp);
+
+       net_timestamp(skb);
  
         rcu_read_lock();
         list_for_each_entry_rcu(ptype, &ptype_all, list) {
@@ -1058,7 +1064,7 @@ void dev_queue_xmit_nit(struct sk_buff *skb, struct net_device *dev)
  
                         skb2->h.raw = skb2->nh.raw;
                         skb2->pkt_type = PACKET_OUTGOING;
-                       ptype->func(skb2, skb->dev, ptype);
+                       ptype->func(skb2, skb->dev, ptype, skb->dev);
                 }
         }
         rcu_read_unlock();
@@ -1123,8 +1129,6 @@ static inline int illegal_highdma(struct net_device *dev, struct sk_buff *skb)
  #define illegal_highdma(dev, skb)      (0)
  #endif
  
-extern void skb_release_data(struct sk_buff *);
-
  /* Keep head the same: replace data */
  int __skb_linearize(struct sk_buff *skb, unsigned int __nocast gfp_mask)
  {
@@ -1379,8 +1383,8 @@ int netif_rx(struct sk_buff *skb)
         if (netpoll_rx(skb))
                 return NET_RX_DROP;
  
-       if (!skb->stamp.tv_sec)
-               net_timestamp(&skb->stamp);
+       if (!skb->tstamp.off_sec)
+               net_timestamp(skb);
  
         /*
          * The code is rearranged so that the path is the most
@@ -1425,14 +1429,14 @@ int netif_rx_ni(struct sk_buff *skb)
  
  EXPORT_SYMBOL(netif_rx_ni);
  
-static __inline__ void skb_bond(struct sk_buff *skb)
+static inline struct net_device *skb_bond(struct sk_buff *skb)
  {
         struct net_device *dev = skb->dev;
  
-       if (dev->master) {
-               skb->real_dev = skb->dev;
+       if (dev->master)
                 skb->dev = dev->master;
-       }
+
+       return dev;
  }
  
  static void net_tx_action(struct softirq_action *h)
@@ -1482,10 +1486,11 @@ static void net_tx_action(struct softirq_action *h)
  }
  
  static __inline__ int deliver_skb(struct sk_buff *skb,
-                                 struct packet_type *pt_prev)
+                                 struct packet_type *pt_prev,
+                                 struct net_device *orig_dev)
  {
         atomic_inc(&skb->users);
-       return pt_prev->func(skb, skb->dev, pt_prev);
+       return pt_prev->func(skb, skb->dev, pt_prev, orig_dev);
  }
  
  #if defined(CONFIG_BRIDGE) || defined (CONFIG_BRIDGE_MODULE)
@@ -1496,7 +1501,8 @@ struct net_bridge_fdb_entry *(*br_fdb_get_hook)(struct net_bridge *br,
  void (*br_fdb_put_hook)(struct net_bridge_fdb_entry *ent);
  
  static __inline__ int handle_bridge(struct sk_buff **pskb,
-                                   struct packet_type **pt_prev, int *ret)
+                                   struct packet_type **pt_prev, int *ret,
+                                   struct net_device *orig_dev)
  {
         struct net_bridge_port *port;
  
@@ -1505,14 +1511,14 @@ static __inline__ int handle_bridge(struct sk_buff **pskb,
                 return 0;
  
         if (*pt_prev) {
-               *ret = deliver_skb(*pskb, *pt_prev);
+               *ret = deliver_skb(*pskb, *pt_prev, orig_dev);
                 *pt_prev = NULL;
         } 
         
         return br_handle_frame_hook(port, pskb);
  }
  #else
-#define handle_bridge(skb, pt_prev, ret)       (0)
+#define handle_bridge(skb, pt_prev, ret, orig_dev)     (0)
  #endif
  
  #ifdef CONFIG_NET_CLS_ACT
@@ -1534,17 +1540,14 @@ static int ing_filter(struct sk_buff *skb)
                 __u32 ttl = (__u32) G_TC_RTTL(skb->tc_verd);
                 if (MAX_RED_LOOP < ttl++) {
                         printk("Redir loop detected Dropping packet (%s->%s)\n",
-                               skb->input_dev?skb->input_dev->name:"??",skb->dev->name);
+                               skb->input_dev->name, skb->dev->name);
                         return TC_ACT_SHOT;
                 }
  
                 skb->tc_verd = SET_TC_RTTL(skb->tc_verd,ttl);
  
                 skb->tc_verd = SET_TC_AT(skb->tc_verd,AT_INGRESS);
-               if (NULL == skb->input_dev) {
-                       skb->input_dev = skb->dev;
-                       printk("ing_filter:  fixed  %s out %s\n",skb->input_dev->name,skb->dev->name);
-               }
+
                 spin_lock(&dev->ingress_lock);
                 if ((q = dev->qdisc_ingress) != NULL)
                         result = q->enqueue(skb, q);
@@ -1559,6 +1562,7 @@ static int ing_filter(struct sk_buff *skb)
  int netif_receive_skb(struct sk_buff *skb)
  {
         struct packet_type *ptype, *pt_prev;
+       struct net_device *orig_dev;
         int ret = NET_RX_DROP;
         unsigned short type;
  
@@ -1566,10 +1570,13 @@ int netif_receive_skb(struct sk_buff *skb)
         if (skb->dev->poll && netpoll_rx(skb))
                 return NET_RX_DROP;
  
-       if (!skb->stamp.tv_sec)
-               net_timestamp(&skb->stamp);
+       if (!skb->tstamp.off_sec)
+               net_timestamp(skb);
+
+       if (!skb->input_dev)
+               skb->input_dev = skb->dev;
  
-       skb_bond(skb);
+       orig_dev = skb_bond(skb);
  
         __get_cpu_var(netdev_rx_stat).total++;
  
@@ -1590,14 +1597,14 @@ int netif_receive_skb(struct sk_buff *skb)
         list_for_each_entry_rcu(ptype, &ptype_all, list) {
                 if (!ptype->dev || ptype->dev == skb->dev) {
                         if (pt_prev) 
-                               ret = deliver_skb(skb, pt_prev);
+                               ret = deliver_skb(skb, pt_prev, orig_dev);
                         pt_prev = ptype;
                 }
         }
  
  #ifdef CONFIG_NET_CLS_ACT
         if (pt_prev) {
-               ret = deliver_skb(skb, pt_prev);
+               ret = deliver_skb(skb, pt_prev, orig_dev);
                 pt_prev = NULL; /* noone else should process this after*/
         } else {
                 skb->tc_verd = SET_TC_OK2MUNGE(skb->tc_verd);
@@ -1616,7 +1623,7 @@ ncls:
  
         handle_diverter(skb);
  
-       if (handle_bridge(&skb, &pt_prev, &ret))
+       if (handle_bridge(&skb, &pt_prev, &ret, orig_dev))
                 goto out;
  
         type = skb->protocol;
@@ -1624,13 +1631,13 @@ ncls:
                 if (ptype->type == type &&
                     (!ptype->dev || ptype->dev == skb->dev)) {
                         if (pt_prev) 
-                               ret = deliver_skb(skb, pt_prev);
+                               ret = deliver_skb(skb, pt_prev, orig_dev);
                         pt_prev = ptype;
                 }
         }
  
         if (pt_prev) {
-               ret = pt_prev->func(skb, skb->dev, pt_prev);
+               ret = pt_prev->func(skb, skb->dev, pt_prev, orig_dev);
         } else {
                 kfree_skb(skb);
                 /* Jamal, now you will not able to escape explaining
@@ -1696,7 +1703,8 @@ static void net_rx_action(struct softirq_action *h)
         struct softnet_data *queue = &__get_cpu_var(softnet_data);
         unsigned long start_time = jiffies;
         int budget = netdev_budget;
-       
+       void *have;
+
         local_irq_disable();
  
         while (!list_empty(&queue->poll_list)) {
@@ -1709,10 +1717,10 @@ static void net_rx_action(struct softirq_action *h)
  
                 dev = list_entry(queue->poll_list.next,
                                  struct net_device, poll_list);
-               netpoll_poll_lock(dev);
+               have = netpoll_poll_lock(dev);
  
                 if (dev->quota <= 0 || dev->poll(dev, &budget)) {
-                       netpoll_poll_unlock(dev);
+                       netpoll_poll_unlock(have);
                         local_irq_disable();
                         list_del(&dev->poll_list);
                         list_add_tail(&dev->poll_list, &queue->poll_list);
@@ -1721,7 +1729,7 @@ static void net_rx_action(struct softirq_action *h)
                         else
                                 dev->quota = dev->weight;
                 } else {
-                       netpoll_poll_unlock(dev);
+                       netpoll_poll_unlock(have);
                         dev_put(dev);
                         local_irq_disable();
                 }
diff --git a/net/core/dst.c b/net/core/dst.c

index fc434ade5270e1b99770a9a5f91b2a8747a50093..334790da9f160cd6852a35e5186db16cbb3d14b1 100644 (file)
--- a/net/core/dst.c
+++ b/net/core/dst.c
@@ -45,6 +45,7 @@ static struct timer_list dst_gc_timer =
  static void dst_run_gc(unsigned long dummy)
  {
         int    delayed = 0;
+       int    work_performed;
         struct dst_entry * dst, **dstp;
  
         if (!spin_trylock(&dst_lock)) {
@@ -52,9 +53,9 @@ static void dst_run_gc(unsigned long dummy)
                 return;
         }
  
-
         del_timer(&dst_gc_timer);
         dstp = &dst_garbage_list;
+       work_performed = 0;
         while ((dst = *dstp) != NULL) {
                 if (atomic_read(&dst->__refcnt)) {
                         dstp = &dst->next;
@@ -62,6 +63,7 @@ static void dst_run_gc(unsigned long dummy)
                         continue;
                 }
                 *dstp = dst->next;
+               work_performed = 1;
  
                 dst = dst_destroy(dst);
                 if (dst) {
@@ -86,9 +88,14 @@ static void dst_run_gc(unsigned long dummy)
                 dst_gc_timer_inc = DST_GC_MAX;
                 goto out;
         }
-       if ((dst_gc_timer_expires += dst_gc_timer_inc) > DST_GC_MAX)
-               dst_gc_timer_expires = DST_GC_MAX;
-       dst_gc_timer_inc += DST_GC_INC;
+       if (!work_performed) {
+               if ((dst_gc_timer_expires += dst_gc_timer_inc) > DST_GC_MAX)
+                       dst_gc_timer_expires = DST_GC_MAX;
+               dst_gc_timer_inc += DST_GC_INC;
+       } else {
+               dst_gc_timer_inc = DST_GC_INC;
+               dst_gc_timer_expires = DST_GC_MIN;
+       }
         dst_gc_timer.expires = jiffies + dst_gc_timer_expires;
  #if RT_CACHE_DEBUG >= 2
         printk("dst_total: %d/%d %ld\n",
diff --git a/net/core/ethtool.c b/net/core/ethtool.c

index a3eeb88e1c81fabe1eea26499958723955b08615..289c1b5a8e4a0bf4c497f999d35d5c4f9ea797e4 100644 (file)
--- a/net/core/ethtool.c
+++ b/net/core/ethtool.c
@@ -81,6 +81,18 @@ int ethtool_op_set_tso(struct net_device *dev, u32 data)
         return 0;
  }
  
+int ethtool_op_get_perm_addr(struct net_device *dev, struct ethtool_perm_addr *addr, u8 *data)
+{
+       unsigned char len = dev->addr_len;
+       if ( addr->size < len )
+               return -ETOOSMALL;
+       
+       addr->size = len;
+       memcpy(data, dev->perm_addr, len);
+       return 0;
+}
+ 
+
  /* Handlers for each ethtool command */
  
  static int ethtool_get_settings(struct net_device *dev, void __user *useraddr)
@@ -683,6 +695,39 @@ static int ethtool_get_stats(struct net_device *dev, void __user *useraddr)
         return ret;
  }
  
+static int ethtool_get_perm_addr(struct net_device *dev, void *useraddr)
+{
+       struct ethtool_perm_addr epaddr;
+       u8 *data;
+       int ret;
+
+       if (!dev->ethtool_ops->get_perm_addr)
+               return -EOPNOTSUPP;
+
+       if (copy_from_user(&epaddr,useraddr,sizeof(epaddr)))
+               return -EFAULT;
+
+       data = kmalloc(epaddr.size, GFP_USER);
+       if (!data)
+               return -ENOMEM;
+
+       ret = dev->ethtool_ops->get_perm_addr(dev,&epaddr,data);
+       if (ret)
+               return ret;
+
+       ret = -EFAULT;
+       if (copy_to_user(useraddr, &epaddr, sizeof(epaddr)))
+               goto out;
+       useraddr += sizeof(epaddr);
+       if (copy_to_user(useraddr, data, epaddr.size))
+               goto out;
+       ret = 0;
+
+ out:
+       kfree(data);
+       return ret;
+}
+
  /* The main entry point in this file.  Called from net/core/dev.c */
  
  int dev_ethtool(struct ifreq *ifr)
@@ -806,6 +851,9 @@ int dev_ethtool(struct ifreq *ifr)
         case ETHTOOL_GSTATS:
                 rc = ethtool_get_stats(dev, useraddr);
                 break;
+       case ETHTOOL_GPERMADDR:
+               rc = ethtool_get_perm_addr(dev, useraddr);
+               break;
         default:
                 rc =  -EOPNOTSUPP;
         }
@@ -826,6 +874,7 @@ int dev_ethtool(struct ifreq *ifr)
  
  EXPORT_SYMBOL(dev_ethtool);
  EXPORT_SYMBOL(ethtool_op_get_link);
+EXPORT_SYMBOL_GPL(ethtool_op_get_perm_addr);
  EXPORT_SYMBOL(ethtool_op_get_sg);
  EXPORT_SYMBOL(ethtool_op_get_tso);
  EXPORT_SYMBOL(ethtool_op_get_tx_csum);
diff --git a/net/core/flow.c b/net/core/flow.c

index f289570b15a3b3e3ba442d14ca6e133a8dff575e..7e95b39de9fdd369d0ae0011a42c993c554f37f2 100644 (file)
--- a/net/core/flow.c
+++ b/net/core/flow.c
@@ -42,7 +42,7 @@ static DEFINE_PER_CPU(struct flow_cache_entry **, flow_tables) = { NULL };
  
  #define flow_table(cpu) (per_cpu(flow_tables, cpu))
  
-static kmem_cache_t *flow_cachep;
+static kmem_cache_t *flow_cachep __read_mostly;
  
  static int flow_lwm, flow_hwm;
  
diff --git a/net/core/neighbour.c b/net/core/neighbour.c

index 1beb782ac41b6fe91eb91ddab02ef738ab7ee1d4..39fc55edf691aab986eb1cb53f134f65839c33a4 100644 (file)
--- a/net/core/neighbour.c
+++ b/net/core/neighbour.c
@@ -1217,7 +1217,7 @@ static void neigh_proxy_process(unsigned long arg)
  
         while (skb != (struct sk_buff *)&tbl->proxy_queue) {
                 struct sk_buff *back = skb;
-               long tdif = back->stamp.tv_usec - now;
+               long tdif = NEIGH_CB(back)->sched_next - now;
  
                 skb = skb->next;
                 if (tdif <= 0) {
@@ -1248,8 +1248,9 @@ void pneigh_enqueue(struct neigh_table *tbl, struct neigh_parms *p,
                 kfree_skb(skb);
                 return;
         }
-       skb->stamp.tv_sec  = LOCALLY_ENQUEUED;
-       skb->stamp.tv_usec = sched_next;
+
+       NEIGH_CB(skb)->sched_next = sched_next;
+       NEIGH_CB(skb)->flags |= LOCALLY_ENQUEUED;
  
         spin_lock(&tbl->proxy_queue.lock);
         if (del_timer(&tbl->proxy_timer)) {
@@ -2342,8 +2343,8 @@ void neigh_app_ns(struct neighbour *n)
         }
         nlh                        = (struct nlmsghdr *)skb->data;
         nlh->nlmsg_flags           = NLM_F_REQUEST;
-       NETLINK_CB(skb).dst_groups = RTMGRP_NEIGH;
-       netlink_broadcast(rtnl, skb, 0, RTMGRP_NEIGH, GFP_ATOMIC);
+       NETLINK_CB(skb).dst_group  = RTNLGRP_NEIGH;
+       netlink_broadcast(rtnl, skb, 0, RTNLGRP_NEIGH, GFP_ATOMIC);
  }
  
  static void neigh_app_notify(struct neighbour *n)
@@ -2360,8 +2361,8 @@ static void neigh_app_notify(struct neighbour *n)
                 return;
         }
         nlh                        = (struct nlmsghdr *)skb->data;
-       NETLINK_CB(skb).dst_groups = RTMGRP_NEIGH;
-       netlink_broadcast(rtnl, skb, 0, RTMGRP_NEIGH, GFP_ATOMIC);
+       NETLINK_CB(skb).dst_group  = RTNLGRP_NEIGH;
+       netlink_broadcast(rtnl, skb, 0, RTNLGRP_NEIGH, GFP_ATOMIC);
  }
  
  #endif /* CONFIG_ARPD */
diff --git a/net/core/netfilter.c b/net/core/netfilter.c

deleted file mode 100644 (file)

index 076c156..0000000
--- a/net/core/netfilter.c
+++ /dev/null
@@ -1,648 +0,0 @@
-/* netfilter.c: look after the filters for various protocols. 
- * Heavily influenced by the old firewall.c by David Bonn and Alan Cox.
- *
- * Thanks to Rob `CmdrTaco' Malda for not influencing this code in any
- * way.
- *
- * Rusty Russell (C)2000 -- This code is GPL.
- *
- * February 2000: Modified by James Morris to have 1 queue per protocol.
- * 15-Mar-2000:   Added NF_REPEAT --RR.
- * 08-May-2003:          Internal logging interface added by Jozsef Kadlecsik.
- */
-#include <linux/config.h>
-#include <linux/kernel.h>
-#include <linux/netfilter.h>
-#include <net/protocol.h>
-#include <linux/init.h>
-#include <linux/skbuff.h>
-#include <linux/wait.h>
-#include <linux/module.h>
-#include <linux/interrupt.h>
-#include <linux/if.h>
-#include <linux/netdevice.h>
-#include <linux/inetdevice.h>
-#include <linux/tcp.h>
-#include <linux/udp.h>
-#include <linux/icmp.h>
-#include <net/sock.h>
-#include <net/route.h>
-#include <linux/ip.h>
-
-/* In this code, we can be waiting indefinitely for userspace to
- * service a packet if a hook returns NF_QUEUE.  We could keep a count
- * of skbuffs queued for userspace, and not deregister a hook unless
- * this is zero, but that sucks.  Now, we simply check when the
- * packets come back: if the hook is gone, the packet is discarded. */
-#ifdef CONFIG_NETFILTER_DEBUG
-#define NFDEBUG(format, args...)  printk(format , ## args)
-#else
-#define NFDEBUG(format, args...)
-#endif
-
-/* Sockopts only registered and called from user context, so
-   net locking would be overkill.  Also, [gs]etsockopt calls may
-   sleep. */
-static DECLARE_MUTEX(nf_sockopt_mutex);
-
-struct list_head nf_hooks[NPROTO][NF_MAX_HOOKS];
-static LIST_HEAD(nf_sockopts);
-static DEFINE_SPINLOCK(nf_hook_lock);
-
-/* 
- * A queue handler may be registered for each protocol.  Each is protected by
- * long term mutex.  The handler must provide an an outfn() to accept packets
- * for queueing and must reinject all packets it receives, no matter what.
- */
-static struct nf_queue_handler_t {
-       nf_queue_outfn_t outfn;
-       void *data;
-} queue_handler[NPROTO];
-static DEFINE_RWLOCK(queue_handler_lock);
-
-int nf_register_hook(struct nf_hook_ops *reg)
-{
-       struct list_head *i;
-
-       spin_lock_bh(&nf_hook_lock);
-       list_for_each(i, &nf_hooks[reg->pf][reg->hooknum]) {
-               if (reg->priority < ((struct nf_hook_ops *)i)->priority)
-                       break;
-       }
-       list_add_rcu(&reg->list, i->prev);
-       spin_unlock_bh(&nf_hook_lock);
-
-       synchronize_net();
-       return 0;
-}
-
-void nf_unregister_hook(struct nf_hook_ops *reg)
-{
-       spin_lock_bh(&nf_hook_lock);
-       list_del_rcu(&reg->list);
-       spin_unlock_bh(&nf_hook_lock);
-
-       synchronize_net();
-}
-
-/* Do exclusive ranges overlap? */
-static inline int overlap(int min1, int max1, int min2, int max2)
-{
-       return max1 > min2 && min1 < max2;
-}
-
-/* Functions to register sockopt ranges (exclusive). */
-int nf_register_sockopt(struct nf_sockopt_ops *reg)
-{
-       struct list_head *i;
-       int ret = 0;
-
-       if (down_interruptible(&nf_sockopt_mutex) != 0)
-               return -EINTR;
-
-       list_for_each(i, &nf_sockopts) {
-               struct nf_sockopt_ops *ops = (struct nf_sockopt_ops *)i;
-               if (ops->pf == reg->pf
-                   && (overlap(ops->set_optmin, ops->set_optmax, 
-                               reg->set_optmin, reg->set_optmax)
-                       || overlap(ops->get_optmin, ops->get_optmax, 
-                                  reg->get_optmin, reg->get_optmax))) {
-                       NFDEBUG("nf_sock overlap: %u-%u/%u-%u v %u-%u/%u-%u\n",
-                               ops->set_optmin, ops->set_optmax, 
-                               ops->get_optmin, ops->get_optmax, 
-                               reg->set_optmin, reg->set_optmax,
-                               reg->get_optmin, reg->get_optmax);
-                       ret = -EBUSY;
-                       goto out;
-               }
-       }
-
-       list_add(&reg->list, &nf_sockopts);
-out:
-       up(&nf_sockopt_mutex);
-       return ret;
-}
-
-void nf_unregister_sockopt(struct nf_sockopt_ops *reg)
-{
-       /* No point being interruptible: we're probably in cleanup_module() */
- restart:
-       down(&nf_sockopt_mutex);
-       if (reg->use != 0) {
-               /* To be woken by nf_sockopt call... */
-               /* FIXME: Stuart Young's name appears gratuitously. */
-               set_current_state(TASK_UNINTERRUPTIBLE);
-               reg->cleanup_task = current;
-               up(&nf_sockopt_mutex);
-               schedule();
-               goto restart;
-       }
-       list_del(&reg->list);
-       up(&nf_sockopt_mutex);
-}
-
-/* Call get/setsockopt() */
-static int nf_sockopt(struct sock *sk, int pf, int val, 
-                     char __user *opt, int *len, int get)
-{
-       struct list_head *i;
-       struct nf_sockopt_ops *ops;
-       int ret;
-
-       if (down_interruptible(&nf_sockopt_mutex) != 0)
-               return -EINTR;
-
-       list_for_each(i, &nf_sockopts) {
-               ops = (struct nf_sockopt_ops *)i;
-               if (ops->pf == pf) {
-                       if (get) {
-                               if (val >= ops->get_optmin
-                                   && val < ops->get_optmax) {
-                                       ops->use++;
-                                       up(&nf_sockopt_mutex);
-                                       ret = ops->get(sk, val, opt, len);
-                                       goto out;
-                               }
-                       } else {
-                               if (val >= ops->set_optmin
-                                   && val < ops->set_optmax) {
-                                       ops->use++;
-                                       up(&nf_sockopt_mutex);
-                                       ret = ops->set(sk, val, opt, *len);
-                                       goto out;
-                               }
-                       }
-               }
-       }
-       up(&nf_sockopt_mutex);
-       return -ENOPROTOOPT;
-       
- out:
-       down(&nf_sockopt_mutex);
-       ops->use--;
-       if (ops->cleanup_task)
-               wake_up_process(ops->cleanup_task);
-       up(&nf_sockopt_mutex);
-       return ret;
-}
-
-int nf_setsockopt(struct sock *sk, int pf, int val, char __user *opt,
-                 int len)
-{
-       return nf_sockopt(sk, pf, val, opt, &len, 0);
-}
-
-int nf_getsockopt(struct sock *sk, int pf, int val, char __user *opt, int *len)
-{
-       return nf_sockopt(sk, pf, val, opt, len, 1);
-}
-
-static unsigned int nf_iterate(struct list_head *head,
-                              struct sk_buff **skb,
-                              int hook,
-                              const struct net_device *indev,
-                              const struct net_device *outdev,
-                              struct list_head **i,
-                              int (*okfn)(struct sk_buff *),
-                              int hook_thresh)
-{
-       unsigned int verdict;
-
-       /*
-        * The caller must not block between calls to this
-        * function because of risk of continuing from deleted element.
-        */
-       list_for_each_continue_rcu(*i, head) {
-               struct nf_hook_ops *elem = (struct nf_hook_ops *)*i;
-
-               if (hook_thresh > elem->priority)
-                       continue;
-
-               /* Optimization: we don't need to hold module
-                   reference here, since function can't sleep. --RR */
-               verdict = elem->hook(hook, skb, indev, outdev, okfn);
-               if (verdict != NF_ACCEPT) {
-#ifdef CONFIG_NETFILTER_DEBUG
-                       if (unlikely(verdict > NF_MAX_VERDICT)) {
-                               NFDEBUG("Evil return from %p(%u).\n",
-                                       elem->hook, hook);
-                               continue;
-                       }
-#endif
-                       if (verdict != NF_REPEAT)
-                               return verdict;
-                       *i = (*i)->prev;
-               }
-       }
-       return NF_ACCEPT;
-}
-
-int nf_register_queue_handler(int pf, nf_queue_outfn_t outfn, void *data)
-{      
-       int ret;
-
-       write_lock_bh(&queue_handler_lock);
-       if (queue_handler[pf].outfn)
-               ret = -EBUSY;
-       else {
-               queue_handler[pf].outfn = outfn;
-               queue_handler[pf].data = data;
-               ret = 0;
-       }
-       write_unlock_bh(&queue_handler_lock);
-
-       return ret;
-}
-
-/* The caller must flush their queue before this */
-int nf_unregister_queue_handler(int pf)
-{
-       write_lock_bh(&queue_handler_lock);
-       queue_handler[pf].outfn = NULL;
-       queue_handler[pf].data = NULL;
-       write_unlock_bh(&queue_handler_lock);
-       
-       return 0;
-}
-
-/* 
- * Any packet that leaves via this function must come back 
- * through nf_reinject().
- */
-static int nf_queue(struct sk_buff *skb, 
-                   struct list_head *elem, 
-                   int pf, unsigned int hook,
-                   struct net_device *indev,
-                   struct net_device *outdev,
-                   int (*okfn)(struct sk_buff *))
-{
-       int status;
-       struct nf_info *info;
-#ifdef CONFIG_BRIDGE_NETFILTER
-       struct net_device *physindev = NULL;
-       struct net_device *physoutdev = NULL;
-#endif
-
-       /* QUEUE == DROP if noone is waiting, to be safe. */
-       read_lock(&queue_handler_lock);
-       if (!queue_handler[pf].outfn) {
-               read_unlock(&queue_handler_lock);
-               kfree_skb(skb);
-               return 1;
-       }
-
-       info = kmalloc(sizeof(*info), GFP_ATOMIC);
-       if (!info) {
-               if (net_ratelimit())
-                       printk(KERN_ERR "OOM queueing packet %p\n",
-                              skb);
-               read_unlock(&queue_handler_lock);
-               kfree_skb(skb);
-               return 1;
-       }
-
-       *info = (struct nf_info) { 
-               (struct nf_hook_ops *)elem, pf, hook, indev, outdev, okfn };
-
-       /* If it's going away, ignore hook. */
-       if (!try_module_get(info->elem->owner)) {
-               read_unlock(&queue_handler_lock);
-               kfree(info);
-               return 0;
-       }
-
-       /* Bump dev refs so they don't vanish while packet is out */
-       if (indev) dev_hold(indev);
-       if (outdev) dev_hold(outdev);
-
-#ifdef CONFIG_BRIDGE_NETFILTER
-       if (skb->nf_bridge) {
-               physindev = skb->nf_bridge->physindev;
-               if (physindev) dev_hold(physindev);
-               physoutdev = skb->nf_bridge->physoutdev;
-               if (physoutdev) dev_hold(physoutdev);
-       }
-#endif
-
-       status = queue_handler[pf].outfn(skb, info, queue_handler[pf].data);
-       read_unlock(&queue_handler_lock);
-
-       if (status < 0) {
-               /* James M doesn't say fuck enough. */
-               if (indev) dev_put(indev);
-               if (outdev) dev_put(outdev);
-#ifdef CONFIG_BRIDGE_NETFILTER
-               if (physindev) dev_put(physindev);
-               if (physoutdev) dev_put(physoutdev);
-#endif
-               module_put(info->elem->owner);
-               kfree(info);
-               kfree_skb(skb);
-               return 1;
-       }
-       return 1;
-}
-
-/* Returns 1 if okfn() needs to be executed by the caller,
- * -EPERM for NF_DROP, 0 otherwise. */
-int nf_hook_slow(int pf, unsigned int hook, struct sk_buff **pskb,
-                struct net_device *indev,
-                struct net_device *outdev,
-                int (*okfn)(struct sk_buff *),
-                int hook_thresh)
-{
-       struct list_head *elem;
-       unsigned int verdict;
-       int ret = 0;
-
-       /* We may already have this, but read-locks nest anyway */
-       rcu_read_lock();
-
-       elem = &nf_hooks[pf][hook];
-next_hook:
-       verdict = nf_iterate(&nf_hooks[pf][hook], pskb, hook, indev,
-                            outdev, &elem, okfn, hook_thresh);
-       if (verdict == NF_ACCEPT || verdict == NF_STOP) {
-               ret = 1;
-               goto unlock;
-       } else if (verdict == NF_DROP) {
-               kfree_skb(*pskb);
-               ret = -EPERM;
-       } else if (verdict == NF_QUEUE) {
-               NFDEBUG("nf_hook: Verdict = QUEUE.\n");
-               if (!nf_queue(*pskb, elem, pf, hook, indev, outdev, okfn))
-                       goto next_hook;
-       }
-unlock:
-       rcu_read_unlock();
-       return ret;
-}
-
-void nf_reinject(struct sk_buff *skb, struct nf_info *info,
-                unsigned int verdict)
-{
-       struct list_head *elem = &info->elem->list;
-       struct list_head *i;
-
-       rcu_read_lock();
-
-       /* Release those devices we held, or Alexey will kill me. */
-       if (info->indev) dev_put(info->indev);
-       if (info->outdev) dev_put(info->outdev);
-#ifdef CONFIG_BRIDGE_NETFILTER
-       if (skb->nf_bridge) {
-               if (skb->nf_bridge->physindev)
-                       dev_put(skb->nf_bridge->physindev);
-               if (skb->nf_bridge->physoutdev)
-                       dev_put(skb->nf_bridge->physoutdev);
-       }
-#endif
-
-       /* Drop reference to owner of hook which queued us. */
-       module_put(info->elem->owner);
-
-       list_for_each_rcu(i, &nf_hooks[info->pf][info->hook]) {
-               if (i == elem) 
-                       break;
-       }
-  
-       if (elem == &nf_hooks[info->pf][info->hook]) {
-               /* The module which sent it to userspace is gone. */
-               NFDEBUG("%s: module disappeared, dropping packet.\n",
-                       __FUNCTION__);
-               verdict = NF_DROP;
-       }
-
-       /* Continue traversal iff userspace said ok... */
-       if (verdict == NF_REPEAT) {
-               elem = elem->prev;
-               verdict = NF_ACCEPT;
-       }
-
-       if (verdict == NF_ACCEPT) {
-       next_hook:
-               verdict = nf_iterate(&nf_hooks[info->pf][info->hook],
-                                    &skb, info->hook, 
-                                    info->indev, info->outdev, &elem,
-                                    info->okfn, INT_MIN);
-       }
-
-       switch (verdict) {
-       case NF_ACCEPT:
-               info->okfn(skb);
-               break;
-
-       case NF_QUEUE:
-               if (!nf_queue(skb, elem, info->pf, info->hook, 
-                             info->indev, info->outdev, info->okfn))
-                       goto next_hook;
-               break;
-       }
-       rcu_read_unlock();
-
-       if (verdict == NF_DROP)
-               kfree_skb(skb);
-
-       kfree(info);
-       return;
-}
-
-#ifdef CONFIG_INET
-/* route_me_harder function, used by iptable_nat, iptable_mangle + ip_queue */
-int ip_route_me_harder(struct sk_buff **pskb)
-{
-       struct iphdr *iph = (*pskb)->nh.iph;
-       struct rtable *rt;
-       struct flowi fl = {};
-       struct dst_entry *odst;
-       unsigned int hh_len;
-
-       /* some non-standard hacks like ipt_REJECT.c:send_reset() can cause
-        * packets with foreign saddr to appear on the NF_IP_LOCAL_OUT hook.
-        */
-       if (inet_addr_type(iph->saddr) == RTN_LOCAL) {
-               fl.nl_u.ip4_u.daddr = iph->daddr;
-               fl.nl_u.ip4_u.saddr = iph->saddr;
-               fl.nl_u.ip4_u.tos = RT_TOS(iph->tos);
-               fl.oif = (*pskb)->sk ? (*pskb)->sk->sk_bound_dev_if : 0;
-#ifdef CONFIG_IP_ROUTE_FWMARK
-               fl.nl_u.ip4_u.fwmark = (*pskb)->nfmark;
-#endif
-               fl.proto = iph->protocol;
-               if (ip_route_output_key(&rt, &fl) != 0)
-                       return -1;
-
-               /* Drop old route. */
-               dst_release((*pskb)->dst);
-               (*pskb)->dst = &rt->u.dst;
-       } else {
-               /* non-local src, find valid iif to satisfy
-                * rp-filter when calling ip_route_input. */
-               fl.nl_u.ip4_u.daddr = iph->saddr;
-               if (ip_route_output_key(&rt, &fl) != 0)
-                       return -1;
-
-               odst = (*pskb)->dst;
-               if (ip_route_input(*pskb, iph->daddr, iph->saddr,
-                                  RT_TOS(iph->tos), rt->u.dst.dev) != 0) {
-                       dst_release(&rt->u.dst);
-                       return -1;
-               }
-               dst_release(&rt->u.dst);
-               dst_release(odst);
-       }
-       
-       if ((*pskb)->dst->error)
-               return -1;
-
-       /* Change in oif may mean change in hh_len. */
-       hh_len = (*pskb)->dst->dev->hard_header_len;
-       if (skb_headroom(*pskb) < hh_len) {
-               struct sk_buff *nskb;
-
-               nskb = skb_realloc_headroom(*pskb, hh_len);
-               if (!nskb) 
-                       return -1;
-               if ((*pskb)->sk)
-                       skb_set_owner_w(nskb, (*pskb)->sk);
-               kfree_skb(*pskb);
-               *pskb = nskb;
-       }
-
-       return 0;
-}
-EXPORT_SYMBOL(ip_route_me_harder);
-
-int skb_ip_make_writable(struct sk_buff **pskb, unsigned int writable_len)
-{
-       struct sk_buff *nskb;
-
-       if (writable_len > (*pskb)->len)
-               return 0;
-
-       /* Not exclusive use of packet?  Must copy. */
-       if (skb_shared(*pskb) || skb_cloned(*pskb))
-               goto copy_skb;
-
-       return pskb_may_pull(*pskb, writable_len);
-
-copy_skb:
-       nskb = skb_copy(*pskb, GFP_ATOMIC);
-       if (!nskb)
-               return 0;
-       BUG_ON(skb_is_nonlinear(nskb));
-
-       /* Rest of kernel will get very unhappy if we pass it a
-          suddenly-orphaned skbuff */
-       if ((*pskb)->sk)
-               skb_set_owner_w(nskb, (*pskb)->sk);
-       kfree_skb(*pskb);
-       *pskb = nskb;
-       return 1;
-}
-EXPORT_SYMBOL(skb_ip_make_writable);
-#endif /*CONFIG_INET*/
-
-/* Internal logging interface, which relies on the real 
-   LOG target modules */
-
-#define NF_LOG_PREFIXLEN               128
-
-static nf_logfn *nf_logging[NPROTO]; /* = NULL */
-static int reported = 0;
-static DEFINE_SPINLOCK(nf_log_lock);
-
-int nf_log_register(int pf, nf_logfn *logfn)
-{
-       int ret = -EBUSY;
-
-       /* Any setup of logging members must be done before
-        * substituting pointer. */
-       spin_lock(&nf_log_lock);
-       if (!nf_logging[pf]) {
-               rcu_assign_pointer(nf_logging[pf], logfn);
-               ret = 0;
-       }
-       spin_unlock(&nf_log_lock);
-       return ret;
-}              
-
-void nf_log_unregister(int pf, nf_logfn *logfn)
-{
-       spin_lock(&nf_log_lock);
-       if (nf_logging[pf] == logfn)
-               nf_logging[pf] = NULL;
-       spin_unlock(&nf_log_lock);
-
-       /* Give time to concurrent readers. */
-       synchronize_net();
-}              
-
-void nf_log_packet(int pf,
-                  unsigned int hooknum,
-                  const struct sk_buff *skb,
-                  const struct net_device *in,
-                  const struct net_device *out,
-                  const char *fmt, ...)
-{
-       va_list args;
-       char prefix[NF_LOG_PREFIXLEN];
-       nf_logfn *logfn;
-       
-       rcu_read_lock();
-       logfn = rcu_dereference(nf_logging[pf]);
-       if (logfn) {
-               va_start(args, fmt);
-               vsnprintf(prefix, sizeof(prefix), fmt, args);
-               va_end(args);
-               /* We must read logging before nf_logfn[pf] */
-               logfn(hooknum, skb, in, out, prefix);
-       } else if (!reported) {
-               printk(KERN_WARNING "nf_log_packet: can\'t log yet, "
-                      "no backend logging module loaded in!\n");
-               reported++;
-       }
-       rcu_read_unlock();
-}
-EXPORT_SYMBOL(nf_log_register);
-EXPORT_SYMBOL(nf_log_unregister);
-EXPORT_SYMBOL(nf_log_packet);
-
-/* This does not belong here, but locally generated errors need it if connection
-   tracking in use: without this, connection may not be in hash table, and hence
-   manufactured ICMP or RST packets will not be associated with it. */
-void (*ip_ct_attach)(struct sk_buff *, struct sk_buff *);
-
-void nf_ct_attach(struct sk_buff *new, struct sk_buff *skb)
-{
-       void (*attach)(struct sk_buff *, struct sk_buff *);
-
-       if (skb->nfct && (attach = ip_ct_attach) != NULL) {
-               mb(); /* Just to be sure: must be read before executing this */
-               attach(new, skb);
-       }
-}
-
-void __init netfilter_init(void)
-{
-       int i, h;
-
-       for (i = 0; i < NPROTO; i++) {
-               for (h = 0; h < NF_MAX_HOOKS; h++)
-                       INIT_LIST_HEAD(&nf_hooks[i][h]);
-       }
-}
-
-EXPORT_SYMBOL(ip_ct_attach);
-EXPORT_SYMBOL(nf_ct_attach);
-EXPORT_SYMBOL(nf_getsockopt);
-EXPORT_SYMBOL(nf_hook_slow);
-EXPORT_SYMBOL(nf_hooks);
-EXPORT_SYMBOL(nf_register_hook);
-EXPORT_SYMBOL(nf_register_queue_handler);
-EXPORT_SYMBOL(nf_register_sockopt);
-EXPORT_SYMBOL(nf_reinject);
-EXPORT_SYMBOL(nf_setsockopt);
-EXPORT_SYMBOL(nf_unregister_hook);
-EXPORT_SYMBOL(nf_unregister_queue_handler);
-EXPORT_SYMBOL(nf_unregister_sockopt);
diff --git a/net/core/netpoll.c b/net/core/netpoll.c

index c327c9edadc57f23fba034059232fcbab08c4fc8..a1a9a7abff50981c1e26fdcca3d2b7b2fa315d59 100644 (file)
--- a/net/core/netpoll.c
+++ b/net/core/netpoll.c
@@ -33,6 +33,7 @@
  #define MAX_UDP_CHUNK 1460
  #define MAX_SKBS 32
  #define MAX_QUEUE_DEPTH (MAX_SKBS / 2)
+#define MAX_RETRIES 20000
  
  static DEFINE_SPINLOCK(skb_list_lock);
  static int nr_skbs;
@@ -248,14 +249,14 @@ static void netpoll_send_skb(struct netpoll *np, struct sk_buff *skb)
         int status;
         struct netpoll_info *npinfo;
  
-repeat:
-       if(!np || !np->dev || !netif_running(np->dev)) {
+       if (!np || !np->dev || !netif_running(np->dev)) {
                 __kfree_skb(skb);
                 return;
         }
  
-       /* avoid recursion */
         npinfo = np->dev->npinfo;
+
+       /* avoid recursion */
         if (npinfo->poll_owner == smp_processor_id() ||
             np->dev->xmit_lock_owner == smp_processor_id()) {
                 if (np->drop)
@@ -265,30 +266,37 @@ repeat:
                 return;
         }
  
-       spin_lock(&np->dev->xmit_lock);
-       np->dev->xmit_lock_owner = smp_processor_id();
+       do {
+               npinfo->tries--;
+               spin_lock(&np->dev->xmit_lock);
+               np->dev->xmit_lock_owner = smp_processor_id();
  
-       /*
-        * network drivers do not expect to be called if the queue is
-        * stopped.
-        */
-       if (netif_queue_stopped(np->dev)) {
+               /*
+                * network drivers do not expect to be called if the queue is
+                * stopped.
+                */
+               if (netif_queue_stopped(np->dev)) {
+                       np->dev->xmit_lock_owner = -1;
+                       spin_unlock(&np->dev->xmit_lock);
+                       netpoll_poll(np);
+                       udelay(50);
+                       continue;
+               }
+
+               status = np->dev->hard_start_xmit(skb, np->dev);
                 np->dev->xmit_lock_owner = -1;
                 spin_unlock(&np->dev->xmit_lock);
  
-               netpoll_poll(np);
-               goto repeat;
-       }
-
-       status = np->dev->hard_start_xmit(skb, np->dev);
-       np->dev->xmit_lock_owner = -1;
-       spin_unlock(&np->dev->xmit_lock);
+               /* success */
+               if(!status) {
+                       npinfo->tries = MAX_RETRIES; /* reset */
+                       return;
+               }
  
-       /* transmit busy */
-       if(status) {
+               /* transmit busy */
                 netpoll_poll(np);
-               goto repeat;
-       }
+               udelay(50);
+       } while (npinfo->tries > 0);
  }
  
  void netpoll_send_udp(struct netpoll *np, const char *msg, int len)
@@ -349,15 +357,11 @@ static void arp_reply(struct sk_buff *skb)
         unsigned char *arp_ptr;
         int size, type = ARPOP_REPLY, ptype = ETH_P_ARP;
         u32 sip, tip;
-       unsigned long flags;
         struct sk_buff *send_skb;
         struct netpoll *np = NULL;
  
-       spin_lock_irqsave(&npinfo->rx_lock, flags);
         if (npinfo->rx_np && npinfo->rx_np->dev == skb->dev)
                 np = npinfo->rx_np;
-       spin_unlock_irqrestore(&npinfo->rx_lock, flags);
-
         if (!np)
                 return;
  
@@ -639,9 +643,11 @@ int netpoll_setup(struct netpoll *np)
                 if (!npinfo)
                         goto release;
  
+               npinfo->rx_flags = 0;
                 npinfo->rx_np = NULL;
                 npinfo->poll_lock = SPIN_LOCK_UNLOCKED;
                 npinfo->poll_owner = -1;
+               npinfo->tries = MAX_RETRIES;
                 npinfo->rx_lock = SPIN_LOCK_UNLOCKED;
         } else
                 npinfo = ndev->npinfo;
@@ -718,9 +724,16 @@ int netpoll_setup(struct netpoll *np)
                 npinfo->rx_np = np;
                 spin_unlock_irqrestore(&npinfo->rx_lock, flags);
         }
+
+       /* fill up the skb queue */
+       refill_skbs();
+
         /* last thing to do is link it to the net device structure */
         ndev->npinfo = npinfo;
  
+       /* avoid racing with NAPI reading npinfo */
+       synchronize_rcu();
+
         return 0;
  
   release:
diff --git a/net/core/request_sock.c b/net/core/request_sock.c

index bb55675f0685e1a52accb47c734d1b2f8eb6368d..b8203de5ff073c4e3bda5166ef1542b793141a0c 100644 (file)
--- a/net/core/request_sock.c
+++ b/net/core/request_sock.c
@@ -32,7 +32,6 @@
   * Further increasing requires to change hash table size.
   */
  int sysctl_max_syn_backlog = 256;
-EXPORT_SYMBOL(sysctl_max_syn_backlog);
  
  int reqsk_queue_alloc(struct request_sock_queue *queue,
                       const int nr_table_entries)
@@ -53,6 +52,8 @@ int reqsk_queue_alloc(struct request_sock_queue *queue,
         get_random_bytes(&lopt->hash_rnd, sizeof(lopt->hash_rnd));
         rwlock_init(&queue->syn_wait_lock);
         queue->rskq_accept_head = queue->rskq_accept_head = NULL;
+       queue->rskq_defer_accept = 0;
+       lopt->nr_table_entries = nr_table_entries;
  
         write_lock_bh(&queue->syn_wait_lock);
         queue->listen_opt = lopt;
@@ -62,3 +63,28 @@ int reqsk_queue_alloc(struct request_sock_queue *queue,
  }
  
  EXPORT_SYMBOL(reqsk_queue_alloc);
+
+void reqsk_queue_destroy(struct request_sock_queue *queue)
+{
+       /* make all the listen_opt local to us */
+       struct listen_sock *lopt = reqsk_queue_yank_listen_sk(queue);
+
+       if (lopt->qlen != 0) {
+               int i;
+
+               for (i = 0; i < lopt->nr_table_entries; i++) {
+                       struct request_sock *req;
+
+                       while ((req = lopt->syn_table[i]) != NULL) {
+                               lopt->syn_table[i] = req->dl_next;
+                               lopt->qlen--;
+                               reqsk_free(req);
+                       }
+               }
+       }
+
+       BUG_TRAP(lopt->qlen == 0);
+       kfree(lopt);
+}
+
+EXPORT_SYMBOL(reqsk_queue_destroy);
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c

index 4b1bb30e6381fb9abf82b596852ec89b3c36827c..9bed7569ce3f30b7f13f522f973aa45e73a2840f 100644 (file)
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -148,7 +148,7 @@ int rtnetlink_send(struct sk_buff *skb, u32 pid, unsigned group, int echo)
  {
         int err = 0;
  
-       NETLINK_CB(skb).dst_groups = group;
+       NETLINK_CB(skb).dst_group = group;
         if (echo)
                 atomic_inc(&skb->users);
         netlink_broadcast(rtnl, skb, pid, group, GFP_KERNEL);
@@ -458,8 +458,8 @@ void rtmsg_ifinfo(int type, struct net_device *dev, unsigned change)
                 kfree_skb(skb);
                 return;
         }
-       NETLINK_CB(skb).dst_groups = RTMGRP_LINK;
-       netlink_broadcast(rtnl, skb, 0, RTMGRP_LINK, GFP_KERNEL);
+       NETLINK_CB(skb).dst_group = RTNLGRP_LINK;
+       netlink_broadcast(rtnl, skb, 0, RTNLGRP_LINK, GFP_KERNEL);
  }
  
  static int rtnetlink_done(struct netlink_callback *cb)
@@ -708,7 +708,8 @@ void __init rtnetlink_init(void)
         if (!rta_buf)
                 panic("rtnetlink_init: cannot allocate rta_buf\n");
  
-       rtnl = netlink_kernel_create(NETLINK_ROUTE, rtnetlink_rcv);
+       rtnl = netlink_kernel_create(NETLINK_ROUTE, RTNLGRP_MAX, rtnetlink_rcv,
+                                    THIS_MODULE);
         if (rtnl == NULL)
                 panic("rtnetlink_init: cannot initialize rtnetlink\n");
         netlink_set_nonroot(NETLINK_ROUTE, NL_NONROOT_RECV);
diff --git a/net/core/skbuff.c b/net/core/skbuff.c

index 7eab867ede5938a4fb698df683876937ce106364..f80a28785610d62c180313310a47d2ad0fa11736 100644 (file)
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -68,7 +68,10 @@
  #include <asm/uaccess.h>
  #include <asm/system.h>
  
-static kmem_cache_t *skbuff_head_cache;
+static kmem_cache_t *skbuff_head_cache __read_mostly;
+static kmem_cache_t *skbuff_fclone_cache __read_mostly;
+
+struct timeval __read_mostly skb_tv_base;
  
  /*
   *     Keep out-of-line to prevent kernel bloat.
@@ -118,7 +121,7 @@ void skb_under_panic(struct sk_buff *skb, int sz, void *here)
   */
  
  /**
- *     alloc_skb       -       allocate a network buffer
+ *     __alloc_skb     -       allocate a network buffer
   *     @size: size to allocate
   *     @gfp_mask: allocation mask
   *
@@ -129,14 +132,20 @@ void skb_under_panic(struct sk_buff *skb, int sz, void *here)
   *     Buffers may only be allocated from interrupts using a @gfp_mask of
   *     %GFP_ATOMIC.
   */
-struct sk_buff *alloc_skb(unsigned int size, unsigned int __nocast gfp_mask)
+struct sk_buff *__alloc_skb(unsigned int size, unsigned int __nocast gfp_mask,
+                           int fclone)
  {
         struct sk_buff *skb;
         u8 *data;
  
         /* Get the HEAD */
-       skb = kmem_cache_alloc(skbuff_head_cache,
-                              gfp_mask & ~__GFP_DMA);
+       if (fclone)
+               skb = kmem_cache_alloc(skbuff_fclone_cache,
+                                      gfp_mask & ~__GFP_DMA);
+       else
+               skb = kmem_cache_alloc(skbuff_head_cache,
+                                      gfp_mask & ~__GFP_DMA);
+
         if (!skb)
                 goto out;
  
@@ -153,7 +162,15 @@ struct sk_buff *alloc_skb(unsigned int size, unsigned int __nocast gfp_mask)
         skb->data = data;
         skb->tail = data;
         skb->end  = data + size;
+       if (fclone) {
+               struct sk_buff *child = skb + 1;
+               atomic_t *fclone_ref = (atomic_t *) (child + 1);
  
+               skb->fclone = SKB_FCLONE_ORIG;
+               atomic_set(fclone_ref, 1);
+
+               child->fclone = SKB_FCLONE_UNAVAILABLE;
+       }
         atomic_set(&(skb_shinfo(skb)->dataref), 1);
         skb_shinfo(skb)->nr_frags  = 0;
         skb_shinfo(skb)->tso_size = 0;
@@ -266,8 +283,34 @@ void skb_release_data(struct sk_buff *skb)
   */
  void kfree_skbmem(struct sk_buff *skb)
  {
+       struct sk_buff *other;
+       atomic_t *fclone_ref;
+
         skb_release_data(skb);
-       kmem_cache_free(skbuff_head_cache, skb);
+       switch (skb->fclone) {
+       case SKB_FCLONE_UNAVAILABLE:
+               kmem_cache_free(skbuff_head_cache, skb);
+               break;
+
+       case SKB_FCLONE_ORIG:
+               fclone_ref = (atomic_t *) (skb + 2);
+               if (atomic_dec_and_test(fclone_ref))
+                       kmem_cache_free(skbuff_fclone_cache, skb);
+               break;
+
+       case SKB_FCLONE_CLONE:
+               fclone_ref = (atomic_t *) (skb + 1);
+               other = skb - 1;
+
+               /* The clone portion is available for
+                * fast-cloning again.
+                */
+               skb->fclone = SKB_FCLONE_UNAVAILABLE;
+
+               if (atomic_dec_and_test(fclone_ref))
+                       kmem_cache_free(skbuff_fclone_cache, other);
+               break;
+       };
  }
  
  /**
@@ -281,8 +324,6 @@ void kfree_skbmem(struct sk_buff *skb)
  
  void __kfree_skb(struct sk_buff *skb)
  {
-       BUG_ON(skb->list != NULL);
-
         dst_release(skb->dst);
  #ifdef CONFIG_XFRM
         secpath_put(skb->sp);
@@ -302,7 +343,6 @@ void __kfree_skb(struct sk_buff *skb)
         skb->tc_index = 0;
  #ifdef CONFIG_NET_CLS_ACT
         skb->tc_verd = 0;
-       skb->tc_classid = 0;
  #endif
  #endif
  
@@ -325,19 +365,27 @@ void __kfree_skb(struct sk_buff *skb)
  
  struct sk_buff *skb_clone(struct sk_buff *skb, unsigned int __nocast gfp_mask)
  {
-       struct sk_buff *n = kmem_cache_alloc(skbuff_head_cache, gfp_mask);
-
-       if (!n) 
-               return NULL;
+       struct sk_buff *n;
+
+       n = skb + 1;
+       if (skb->fclone == SKB_FCLONE_ORIG &&
+           n->fclone == SKB_FCLONE_UNAVAILABLE) {
+               atomic_t *fclone_ref = (atomic_t *) (n + 1);
+               n->fclone = SKB_FCLONE_CLONE;
+               atomic_inc(fclone_ref);
+       } else {
+               n = kmem_cache_alloc(skbuff_head_cache, gfp_mask);
+               if (!n)
+                       return NULL;
+               n->fclone = SKB_FCLONE_UNAVAILABLE;
+       }
  
  #define C(x) n->x = skb->x
  
         n->next = n->prev = NULL;
-       n->list = NULL;
         n->sk = NULL;
-       C(stamp);
+       C(tstamp);
         C(dev);
-       C(real_dev);
         C(h);
         C(nh);
         C(mac);
@@ -361,7 +409,6 @@ struct sk_buff *skb_clone(struct sk_buff *skb, unsigned int __nocast gfp_mask)
         n->destructor = NULL;
  #ifdef CONFIG_NETFILTER
         C(nfmark);
-       C(nfcache);
         C(nfct);
         nf_conntrack_get(skb->nfct);
         C(nfctinfo);
@@ -370,9 +417,6 @@ struct sk_buff *skb_clone(struct sk_buff *skb, unsigned int __nocast gfp_mask)
         nf_bridge_get(skb->nf_bridge);
  #endif
  #endif /*CONFIG_NETFILTER*/
-#if defined(CONFIG_HIPPI)
-       C(private);
-#endif
  #ifdef CONFIG_NET_SCHED
         C(tc_index);
  #ifdef CONFIG_NET_CLS_ACT
@@ -380,7 +424,6 @@ struct sk_buff *skb_clone(struct sk_buff *skb, unsigned int __nocast gfp_mask)
         n->tc_verd = CLR_TC_OK2MUNGE(n->tc_verd);
         n->tc_verd = CLR_TC_MUNGED(n->tc_verd);
         C(input_dev);
-       C(tc_classid);
  #endif
  
  #endif
@@ -404,10 +447,8 @@ static void copy_skb_header(struct sk_buff *new, const struct sk_buff *old)
          */
         unsigned long offset = new->data - old->data;
  
-       new->list       = NULL;
         new->sk         = NULL;
         new->dev        = old->dev;
-       new->real_dev   = old->real_dev;
         new->priority   = old->priority;
         new->protocol   = old->protocol;
         new->dst        = dst_clone(old->dst);
@@ -419,12 +460,12 @@ static void copy_skb_header(struct sk_buff *new, const struct sk_buff *old)
         new->mac.raw    = old->mac.raw + offset;
         memcpy(new->cb, old->cb, sizeof(old->cb));
         new->local_df   = old->local_df;
+       new->fclone     = SKB_FCLONE_UNAVAILABLE;
         new->pkt_type   = old->pkt_type;
-       new->stamp      = old->stamp;
+       new->tstamp     = old->tstamp;
         new->destructor = NULL;
  #ifdef CONFIG_NETFILTER
         new->nfmark     = old->nfmark;
-       new->nfcache    = old->nfcache;
         new->nfct       = old->nfct;
         nf_conntrack_get(old->nfct);
         new->nfctinfo   = old->nfctinfo;
@@ -1344,50 +1385,43 @@ void skb_queue_tail(struct sk_buff_head *list, struct sk_buff *newsk)
         __skb_queue_tail(list, newsk);
         spin_unlock_irqrestore(&list->lock, flags);
  }
+
  /**
   *     skb_unlink      -       remove a buffer from a list
   *     @skb: buffer to remove
+ *     @list: list to use
   *
- *     Place a packet after a given packet in a list. The list locks are taken
- *     and this function is atomic with respect to other list locked calls
+ *     Remove a packet from a list. The list locks are taken and this
+ *     function is atomic with respect to other list locked calls
   *
- *     Works even without knowing the list it is sitting on, which can be
- *     handy at times. It also means that THE LIST MUST EXIST when you
- *     unlink. Thus a list must have its contents unlinked before it is
- *     destroyed.
+ *     You must know what list the SKB is on.
   */
-void skb_unlink(struct sk_buff *skb)
+void skb_unlink(struct sk_buff *skb, struct sk_buff_head *list)
  {
-       struct sk_buff_head *list = skb->list;
-
-       if (list) {
-               unsigned long flags;
+       unsigned long flags;
  
-               spin_lock_irqsave(&list->lock, flags);
-               if (skb->list == list)
-                       __skb_unlink(skb, skb->list);
-               spin_unlock_irqrestore(&list->lock, flags);
-       }
+       spin_lock_irqsave(&list->lock, flags);
+       __skb_unlink(skb, list);
+       spin_unlock_irqrestore(&list->lock, flags);
  }
  
-
  /**
   *     skb_append      -       append a buffer
   *     @old: buffer to insert after
   *     @newsk: buffer to insert
+ *     @list: list to use
   *
   *     Place a packet after a given packet in a list. The list locks are taken
   *     and this function is atomic with respect to other list locked calls.
   *     A buffer cannot be placed on two lists at the same time.
   */
-
-void skb_append(struct sk_buff *old, struct sk_buff *newsk)
+void skb_append(struct sk_buff *old, struct sk_buff *newsk, struct sk_buff_head *list)
  {
         unsigned long flags;
  
-       spin_lock_irqsave(&old->list->lock, flags);
-       __skb_append(old, newsk);
-       spin_unlock_irqrestore(&old->list->lock, flags);
+       spin_lock_irqsave(&list->lock, flags);
+       __skb_append(old, newsk, list);
+       spin_unlock_irqrestore(&list->lock, flags);
  }
  
  
@@ -1395,19 +1429,21 @@ void skb_append(struct sk_buff *old, struct sk_buff *newsk)
   *     skb_insert      -       insert a buffer
   *     @old: buffer to insert before
   *     @newsk: buffer to insert
+ *     @list: list to use
+ *
+ *     Place a packet before a given packet in a list. The list locks are
+ *     taken and this function is atomic with respect to other list locked
+ *     calls.
   *
- *     Place a packet before a given packet in a list. The list locks are taken
- *     and this function is atomic with respect to other list locked calls
   *     A buffer cannot be placed on two lists at the same time.
   */
-
-void skb_insert(struct sk_buff *old, struct sk_buff *newsk)
+void skb_insert(struct sk_buff *old, struct sk_buff *newsk, struct sk_buff_head *list)
  {
         unsigned long flags;
  
-       spin_lock_irqsave(&old->list->lock, flags);
-       __skb_insert(newsk, old->prev, old, old->list);
-       spin_unlock_irqrestore(&old->list->lock, flags);
+       spin_lock_irqsave(&list->lock, flags);
+       __skb_insert(newsk, old->prev, old, list);
+       spin_unlock_irqrestore(&list->lock, flags);
  }
  
  #if 0
@@ -1663,12 +1699,23 @@ void __init skb_init(void)
                                               NULL, NULL);
         if (!skbuff_head_cache)
                 panic("cannot create skbuff cache");
+
+       skbuff_fclone_cache = kmem_cache_create("skbuff_fclone_cache",
+                                               (2*sizeof(struct sk_buff)) +
+                                               sizeof(atomic_t),
+                                               0,
+                                               SLAB_HWCACHE_ALIGN,
+                                               NULL, NULL);
+       if (!skbuff_fclone_cache)
+               panic("cannot create skbuff cache");
+
+       do_gettimeofday(&skb_tv_base);
  }
  
  EXPORT_SYMBOL(___pskb_trim);
  EXPORT_SYMBOL(__kfree_skb);
  EXPORT_SYMBOL(__pskb_pull_tail);
-EXPORT_SYMBOL(alloc_skb);
+EXPORT_SYMBOL(__alloc_skb);
  EXPORT_SYMBOL(pskb_copy);
  EXPORT_SYMBOL(pskb_expand_head);
  EXPORT_SYMBOL(skb_checksum);
@@ -1696,3 +1743,4 @@ EXPORT_SYMBOL(skb_prepare_seq_read);
  EXPORT_SYMBOL(skb_seq_read);
  EXPORT_SYMBOL(skb_abort_seq_read);
  EXPORT_SYMBOL(skb_find_text);
+EXPORT_SYMBOL(skb_tv_base);
diff --git a/net/core/sock.c b/net/core/sock.c

index 12f6d9a2a522c730ef1fbc268bcd50b6b28f6b9b..ccd10fd65682202fd36942105fee17cb25fb8f51 100644 (file)
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -260,7 +260,7 @@ int sock_setsockopt(struct socket *sock, int level, int optname,
                            
                         if (val > sysctl_wmem_max)
                                 val = sysctl_wmem_max;
-
+set_sndbuf:
                         sk->sk_userlocks |= SOCK_SNDBUF_LOCK;
                         if ((val * 2) < SOCK_MIN_SNDBUF)
                                 sk->sk_sndbuf = SOCK_MIN_SNDBUF;
@@ -274,6 +274,13 @@ int sock_setsockopt(struct socket *sock, int level, int optname,
                         sk->sk_write_space(sk);
                         break;
  
+               case SO_SNDBUFFORCE:
+                       if (!capable(CAP_NET_ADMIN)) {
+                               ret = -EPERM;
+                               break;
+                       }
+                       goto set_sndbuf;
+
                 case SO_RCVBUF:
                         /* Don't error on this BSD doesn't and if you think
                            about it this is right. Otherwise apps have to
@@ -282,7 +289,7 @@ int sock_setsockopt(struct socket *sock, int level, int optname,
                           
                         if (val > sysctl_rmem_max)
                                 val = sysctl_rmem_max;
-
+set_rcvbuf:
                         sk->sk_userlocks |= SOCK_RCVBUF_LOCK;
                         /* FIXME: is this lower bound the right one? */
                         if ((val * 2) < SOCK_MIN_RCVBUF)
@@ -291,6 +298,13 @@ int sock_setsockopt(struct socket *sock, int level, int optname,
                                 sk->sk_rcvbuf = val * 2;
                         break;
  
+               case SO_RCVBUFFORCE:
+                       if (!capable(CAP_NET_ADMIN)) {
+                               ret = -EPERM;
+                               break;
+                       }
+                       goto set_rcvbuf;
+
                 case SO_KEEPALIVE:
  #ifdef CONFIG_INET
                         if (sk->sk_protocol == IPPROTO_TCP)
@@ -686,6 +700,80 @@ void sk_free(struct sock *sk)
         module_put(owner);
  }
  
+struct sock *sk_clone(const struct sock *sk, const unsigned int __nocast priority)
+{
+       struct sock *newsk = sk_alloc(sk->sk_family, priority, sk->sk_prot, 0);
+
+       if (newsk != NULL) {
+               struct sk_filter *filter;
+
+               memcpy(newsk, sk, sk->sk_prot->obj_size);
+
+               /* SANITY */
+               sk_node_init(&newsk->sk_node);
+               sock_lock_init(newsk);
+               bh_lock_sock(newsk);
+
+               atomic_set(&newsk->sk_rmem_alloc, 0);
+               atomic_set(&newsk->sk_wmem_alloc, 0);
+               atomic_set(&newsk->sk_omem_alloc, 0);
+               skb_queue_head_init(&newsk->sk_receive_queue);
+               skb_queue_head_init(&newsk->sk_write_queue);
+
+               rwlock_init(&newsk->sk_dst_lock);
+               rwlock_init(&newsk->sk_callback_lock);
+
+               newsk->sk_dst_cache     = NULL;
+               newsk->sk_wmem_queued   = 0;
+               newsk->sk_forward_alloc = 0;
+               newsk->sk_send_head     = NULL;
+               newsk->sk_backlog.head  = newsk->sk_backlog.tail = NULL;
+               newsk->sk_userlocks     = sk->sk_userlocks & ~SOCK_BINDPORT_LOCK;
+
+               sock_reset_flag(newsk, SOCK_DONE);
+               skb_queue_head_init(&newsk->sk_error_queue);
+
+               filter = newsk->sk_filter;
+               if (filter != NULL)
+                       sk_filter_charge(newsk, filter);
+
+               if (unlikely(xfrm_sk_clone_policy(newsk))) {
+                       /* It is still raw copy of parent, so invalidate
+                        * destructor and make plain sk_free() */
+                       newsk->sk_destruct = NULL;
+                       sk_free(newsk);
+                       newsk = NULL;
+                       goto out;
+               }
+
+               newsk->sk_err      = 0;
+               newsk->sk_priority = 0;
+               atomic_set(&newsk->sk_refcnt, 2);
+
+               /*
+                * Increment the counter in the same struct proto as the master
+                * sock (sk_refcnt_debug_inc uses newsk->sk_prot->socks, that
+                * is the same as sk->sk_prot->socks, as this field was copied
+                * with memcpy).
+                *
+                * This _changes_ the previous behaviour, where
+                * tcp_create_openreq_child always was incrementing the
+                * equivalent to tcp_prot->socks (inet_sock_nr), so this have
+                * to be taken into account in all callers. -acme
+                */
+               sk_refcnt_debug_inc(newsk);
+               newsk->sk_socket = NULL;
+               newsk->sk_sleep  = NULL;
+
+               if (newsk->sk_prot->sockets_allocated)
+                       atomic_inc(newsk->sk_prot->sockets_allocated);
+       }
+out:
+       return newsk;
+}
+
+EXPORT_SYMBOL_GPL(sk_clone);
+
  void __init sk_init(void)
  {
         if (num_physpages <= 4096) {
@@ -1353,11 +1441,7 @@ void sk_common_release(struct sock *sk)
  
         xfrm_sk_free_policy(sk);
  
-#ifdef INET_REFCNT_DEBUG
-       if (atomic_read(&sk->sk_refcnt) != 1)
-               printk(KERN_DEBUG "Destruction of the socket %p delayed, c=%d\n",
-                      sk, atomic_read(&sk->sk_refcnt));
-#endif
+       sk_refcnt_debug_release(sk);
         sock_put(sk);
  }
  
@@ -1368,7 +1452,8 @@ static LIST_HEAD(proto_list);
  
  int proto_register(struct proto *prot, int alloc_slab)
  {
-       char *request_sock_slab_name;
+       char *request_sock_slab_name = NULL;
+       char *timewait_sock_slab_name;
         int rc = -ENOBUFS;
  
         if (alloc_slab) {
@@ -1399,6 +1484,23 @@ int proto_register(struct proto *prot, int alloc_slab)
                                 goto out_free_request_sock_slab_name;
                         }
                 }
+
+               if (prot->twsk_obj_size) {
+                       static const char mask[] = "tw_sock_%s";
+
+                       timewait_sock_slab_name = kmalloc(strlen(prot->name) + sizeof(mask) - 1, GFP_KERNEL);
+
+                       if (timewait_sock_slab_name == NULL)
+                               goto out_free_request_sock_slab;
+
+                       sprintf(timewait_sock_slab_name, mask, prot->name);
+                       prot->twsk_slab = kmem_cache_create(timewait_sock_slab_name,
+                                                           prot->twsk_obj_size,
+                                                           0, SLAB_HWCACHE_ALIGN,
+                                                           NULL, NULL);
+                       if (prot->twsk_slab == NULL)
+                               goto out_free_timewait_sock_slab_name;
+               }
         }
  
         write_lock(&proto_list_lock);
@@ -1407,6 +1509,13 @@ int proto_register(struct proto *prot, int alloc_slab)
         rc = 0;
  out:
         return rc;
+out_free_timewait_sock_slab_name:
+       kfree(timewait_sock_slab_name);
+out_free_request_sock_slab:
+       if (prot->rsk_prot && prot->rsk_prot->slab) {
+               kmem_cache_destroy(prot->rsk_prot->slab);
+               prot->rsk_prot->slab = NULL;
+       }
  out_free_request_sock_slab_name:
         kfree(request_sock_slab_name);
  out_free_sock_slab:
@@ -1434,6 +1543,14 @@ void proto_unregister(struct proto *prot)
                 prot->rsk_prot->slab = NULL;
         }
  
+       if (prot->twsk_slab != NULL) {
+               const char *name = kmem_cache_name(prot->twsk_slab);
+
+               kmem_cache_destroy(prot->twsk_slab);
+               kfree(name);
+               prot->twsk_slab = NULL;
+       }
+
         list_del(&prot->node);
         write_unlock(&proto_list_lock);
  }
diff --git a/net/core/sysctl_net_core.c b/net/core/sysctl_net_core.c

index 8f817ad9f54629f61dcd5dc54115a04bf527d0f7..2f278c8e474370e422bdcc5b66b8c3fa16a42ca3 100644 (file)
--- a/net/core/sysctl_net_core.c
+++ b/net/core/sysctl_net_core.c
@@ -9,23 +9,18 @@
  #include <linux/sysctl.h>
  #include <linux/config.h>
  #include <linux/module.h>
+#include <linux/socket.h>
+#include <net/sock.h>
  
  #ifdef CONFIG_SYSCTL
  
  extern int netdev_max_backlog;
-extern int netdev_budget;
  extern int weight_p;
-extern int net_msg_cost;
-extern int net_msg_burst;
  
  extern __u32 sysctl_wmem_max;
  extern __u32 sysctl_rmem_max;
-extern __u32 sysctl_wmem_default;
-extern __u32 sysctl_rmem_default;
  
  extern int sysctl_core_destroy_delay;
-extern int sysctl_optmem_max;
-extern int sysctl_somaxconn;
  
  #ifdef CONFIG_NET_DIVERT
  extern char sysctl_divert_version[];
diff --git a/net/core/utils.c b/net/core/utils.c

index 88eb8b68e26b2c21b18a6e5f1256f24d6104d36c..7b5970fc9e407f7886332cba4d95b8ee3cf2c011 100644 (file)
--- a/net/core/utils.c
+++ b/net/core/utils.c
@@ -16,7 +16,9 @@
  #include <linux/module.h>
  #include <linux/jiffies.h>
  #include <linux/kernel.h>
+#include <linux/inet.h>
  #include <linux/mm.h>
+#include <linux/net.h>
  #include <linux/string.h>
  #include <linux/types.h>
  #include <linux/random.h>
diff --git a/net/core/wireless.c b/net/core/wireless.c

index 3ff5639c0b7886f77a92e42047733d6822db5e5c..5caae2399f3a26891cb29cc0d58f34ee0e3b7a11 100644 (file)
--- a/net/core/wireless.c
+++ b/net/core/wireless.c
@@ -571,10 +571,6 @@ static int wireless_seq_show(struct seq_file *seq, void *v)
         return 0;
  }
  
-extern void *dev_seq_start(struct seq_file *seq, loff_t *pos);
-extern void *dev_seq_next(struct seq_file *seq, void *v, loff_t *pos);
-extern void dev_seq_stop(struct seq_file *seq, void *v);
-
  static struct seq_operations wireless_seq_ops = {
         .start = dev_seq_start,
         .next  = dev_seq_next,
@@ -1144,8 +1140,8 @@ static inline void rtmsg_iwinfo(struct net_device *       dev,
                 kfree_skb(skb);
                 return;
         }
-       NETLINK_CB(skb).dst_groups = RTMGRP_LINK;
-       netlink_broadcast(rtnl, skb, 0, RTMGRP_LINK, GFP_ATOMIC);
+       NETLINK_CB(skb).dst_group = RTNLGRP_LINK;
+       netlink_broadcast(rtnl, skb, 0, RTNLGRP_LINK, GFP_ATOMIC);
  }
  #endif /* WE_EVENT_NETLINK */
  
diff --git a/net/dccp/Kconfig b/net/dccp/Kconfig

new file mode 100644 (file)

index 0000000..187ac18
--- /dev/null
+++ b/net/dccp/Kconfig
@@ -0,0 +1,50 @@
+menu "DCCP Configuration (EXPERIMENTAL)"
+       depends on INET && EXPERIMENTAL
+
+config IP_DCCP
+       tristate "The DCCP Protocol (EXPERIMENTAL)"
+       ---help---
+         Datagram Congestion Control Protocol
+
+         From draft-ietf-dccp-spec-11 <http://www.icir.org/kohler/dcp/draft-ietf-dccp-spec-11.txt>.
+
+         The Datagram Congestion Control Protocol (DCCP) is a transport
+         protocol that implements bidirectional, unicast connections of
+         congestion-controlled, unreliable datagrams. It should be suitable
+         for use by applications such as streaming media, Internet telephony,
+         and on-line games
+
+         To compile this protocol support as a module, choose M here: the
+         module will be called dccp.
+
+         If in doubt, say N.
+
+config INET_DCCP_DIAG
+       depends on IP_DCCP && INET_DIAG
+       def_tristate y if (IP_DCCP = y && INET_DIAG = y)
+       def_tristate m
+
+source "net/dccp/ccids/Kconfig"
+
+menu "DCCP Kernel Hacking"
+       depends on IP_DCCP && DEBUG_KERNEL=y
+
+config IP_DCCP_DEBUG
+       bool "DCCP debug messages"
+       ---help---
+         Only use this if you're hacking DCCP.
+
+         Just say N.
+
+config IP_DCCP_UNLOAD_HACK
+       depends on IP_DCCP=m && IP_DCCP_CCID3=m
+       bool "DCCP control sock unload hack"
+       ---help---
+         Enable this to be able to unload the dccp module when the it
+         has only one refcount held, the control sock one. Just execute
+         "rmmod dccp_ccid3 dccp"
+
+         Just say N.
+endmenu
+
+endmenu
diff --git a/net/dccp/Makefile b/net/dccp/Makefile

new file mode 100644 (file)

index 0000000..fb97bb0
--- /dev/null
+++ b/net/dccp/Makefile
@@ -0,0 +1,10 @@
+obj-$(CONFIG_IP_DCCP) += dccp.o
+
+dccp-y := ccid.o input.o ipv4.o minisocks.o options.o output.o proto.o \
+         timer.o
+
+obj-$(CONFIG_INET_DCCP_DIAG) += dccp_diag.o
+
+dccp_diag-y := diag.o
+
+obj-y += ccids/
diff --git a/net/dccp/ccid.c b/net/dccp/ccid.c

new file mode 100644 (file)

index 0000000..9d8fc0e
--- /dev/null
+++ b/net/dccp/ccid.c
@@ -0,0 +1,139 @@
+/*
+ *  net/dccp/ccid.c
+ *
+ *  An implementation of the DCCP protocol
+ *  Arnaldo Carvalho de Melo <acme@conectiva.com.br>
+ *
+ *  CCID infrastructure
+ *
+ *     This program is free software; you can redistribute it and/or modify it
+ *     under the terms of the GNU General Public License version 2 as
+ *     published by the Free Software Foundation.
+ */
+
+#include "ccid.h"
+
+static struct ccid *ccids[CCID_MAX];
+#if defined(CONFIG_SMP) || defined(CONFIG_PREEMPT)
+static atomic_t ccids_lockct = ATOMIC_INIT(0);
+static DEFINE_SPINLOCK(ccids_lock);
+
+/*
+ * The strategy is: modifications ccids vector are short, do not sleep and
+ * veeery rare, but read access should be free of any exclusive locks.
+ */
+static void ccids_write_lock(void)
+{
+       spin_lock(&ccids_lock);
+       while (atomic_read(&ccids_lockct) != 0) {
+               spin_unlock(&ccids_lock);
+               yield();
+               spin_lock(&ccids_lock);
+       }
+}
+
+static inline void ccids_write_unlock(void)
+{
+       spin_unlock(&ccids_lock);
+}
+
+static inline void ccids_read_lock(void)
+{
+       atomic_inc(&ccids_lockct);
+       spin_unlock_wait(&ccids_lock);
+}
+
+static inline void ccids_read_unlock(void)
+{
+       atomic_dec(&ccids_lockct);
+}
+
+#else
+#define ccids_write_lock() do { } while(0)
+#define ccids_write_unlock() do { } while(0)
+#define ccids_read_lock() do { } while(0)
+#define ccids_read_unlock() do { } while(0)
+#endif
+
+int ccid_register(struct ccid *ccid)
+{
+       int err;
+
+       if (ccid->ccid_init == NULL)
+               return -1;
+
+       ccids_write_lock();
+       err = -EEXIST;
+       if (ccids[ccid->ccid_id] == NULL) {
+               ccids[ccid->ccid_id] = ccid;
+               err = 0;
+       }
+       ccids_write_unlock();
+       if (err == 0)
+               pr_info("CCID: Registered CCID %d (%s)\n",
+                       ccid->ccid_id, ccid->ccid_name);
+       return err;
+}
+
+EXPORT_SYMBOL_GPL(ccid_register);
+
+int ccid_unregister(struct ccid *ccid)
+{
+       ccids_write_lock();
+       ccids[ccid->ccid_id] = NULL;
+       ccids_write_unlock();
+       pr_info("CCID: Unregistered CCID %d (%s)\n",
+               ccid->ccid_id, ccid->ccid_name);
+       return 0;
+}
+
+EXPORT_SYMBOL_GPL(ccid_unregister);
+
+struct ccid *ccid_init(unsigned char id, struct sock *sk)
+{
+       struct ccid *ccid;
+
+#ifdef CONFIG_KMOD
+       if (ccids[id] == NULL)
+               request_module("net-dccp-ccid-%d", id);
+#endif
+       ccids_read_lock();
+
+       ccid = ccids[id];
+       if (ccid == NULL)
+               goto out;
+
+       if (!try_module_get(ccid->ccid_owner))
+               goto out_err;
+
+       if (ccid->ccid_init(sk) != 0)
+               goto out_module_put;
+out:
+       ccids_read_unlock();
+       return ccid;
+out_module_put:
+       module_put(ccid->ccid_owner);
+out_err:
+       ccid = NULL;
+       goto out;
+}
+
+EXPORT_SYMBOL_GPL(ccid_init);
+
+void ccid_exit(struct ccid *ccid, struct sock *sk)
+{
+       if (ccid == NULL)
+               return;
+
+       ccids_read_lock();
+
+       if (ccids[ccid->ccid_id] != NULL) {
+               if (ccid->ccid_exit != NULL)
+                       ccid->ccid_exit(sk);
+               module_put(ccid->ccid_owner);
+       }
+
+       ccids_read_unlock();
+}
+
+EXPORT_SYMBOL_GPL(ccid_exit);
diff --git a/net/dccp/ccid.h b/net/dccp/ccid.h

new file mode 100644 (file)

index 0000000..962f1e9
--- /dev/null
+++ b/net/dccp/ccid.h
@@ -0,0 +1,180 @@
+#ifndef _CCID_H
+#define _CCID_H
+/*
+ *  net/dccp/ccid.h
+ *
+ *  An implementation of the DCCP protocol
+ *  Arnaldo Carvalho de Melo <acme@conectiva.com.br>
+ *
+ *  CCID infrastructure
+ *
+ *     This program is free software; you can redistribute it and/or modify it
+ *     under the terms of the GNU General Public License version 2 as
+ *     published by the Free Software Foundation.
+ */
+
+#include <net/sock.h>
+#include <linux/dccp.h>
+#include <linux/list.h>
+#include <linux/module.h>
+
+#define CCID_MAX 255
+
+struct ccid {
+       unsigned char   ccid_id;
+       const char      *ccid_name;
+       struct module   *ccid_owner;
+       int             (*ccid_init)(struct sock *sk);
+       void            (*ccid_exit)(struct sock *sk);
+       int             (*ccid_hc_rx_init)(struct sock *sk);
+       int             (*ccid_hc_tx_init)(struct sock *sk);
+       void            (*ccid_hc_rx_exit)(struct sock *sk);
+       void            (*ccid_hc_tx_exit)(struct sock *sk);
+       void            (*ccid_hc_rx_packet_recv)(struct sock *sk,
+                                                 struct sk_buff *skb);
+       int             (*ccid_hc_rx_parse_options)(struct sock *sk,
+                                                   unsigned char option,
+                                                   unsigned char len, u16 idx,
+                                                   unsigned char* value);
+       void            (*ccid_hc_rx_insert_options)(struct sock *sk,
+                                                    struct sk_buff *skb);
+       void            (*ccid_hc_tx_insert_options)(struct sock *sk,
+                                                    struct sk_buff *skb);
+       void            (*ccid_hc_tx_packet_recv)(struct sock *sk,
+                                                 struct sk_buff *skb);
+       int             (*ccid_hc_tx_parse_options)(struct sock *sk,
+                                                   unsigned char option,
+                                                   unsigned char len, u16 idx,
+                                                   unsigned char* value);
+       int             (*ccid_hc_tx_send_packet)(struct sock *sk,
+                                                 struct sk_buff *skb, int len);
+       void            (*ccid_hc_tx_packet_sent)(struct sock *sk, int more,
+                                                 int len);
+       void            (*ccid_hc_rx_get_info)(struct sock *sk,
+                                              struct tcp_info *info);
+       void            (*ccid_hc_tx_get_info)(struct sock *sk,
+                                              struct tcp_info *info);
+};
+
+extern int        ccid_register(struct ccid *ccid);
+extern int        ccid_unregister(struct ccid *ccid);
+
+extern struct ccid *ccid_init(unsigned char id, struct sock *sk);
+extern void       ccid_exit(struct ccid *ccid, struct sock *sk);
+
+static inline void __ccid_get(struct ccid *ccid)
+{
+       __module_get(ccid->ccid_owner);
+}
+
+static inline int ccid_hc_tx_send_packet(struct ccid *ccid, struct sock *sk,
+                                        struct sk_buff *skb, int len)
+{
+       int rc = 0;
+       if (ccid->ccid_hc_tx_send_packet != NULL)
+               rc = ccid->ccid_hc_tx_send_packet(sk, skb, len);
+       return rc;
+}
+
+static inline void ccid_hc_tx_packet_sent(struct ccid *ccid, struct sock *sk,
+                                         int more, int len)
+{
+       if (ccid->ccid_hc_tx_packet_sent != NULL)
+               ccid->ccid_hc_tx_packet_sent(sk, more, len);
+}
+
+static inline int ccid_hc_rx_init(struct ccid *ccid, struct sock *sk)
+{
+       int rc = 0;
+       if (ccid->ccid_hc_rx_init != NULL)
+               rc = ccid->ccid_hc_rx_init(sk);
+       return rc;
+}
+
+static inline int ccid_hc_tx_init(struct ccid *ccid, struct sock *sk)
+{
+       int rc = 0;
+       if (ccid->ccid_hc_tx_init != NULL)
+               rc = ccid->ccid_hc_tx_init(sk);
+       return rc;
+}
+
+static inline void ccid_hc_rx_exit(struct ccid *ccid, struct sock *sk)
+{
+       if (ccid->ccid_hc_rx_exit != NULL &&
+           dccp_sk(sk)->dccps_hc_rx_ccid_private != NULL)
+               ccid->ccid_hc_rx_exit(sk);
+}
+
+static inline void ccid_hc_tx_exit(struct ccid *ccid, struct sock *sk)
+{
+       if (ccid->ccid_hc_tx_exit != NULL &&
+           dccp_sk(sk)->dccps_hc_tx_ccid_private != NULL)
+               ccid->ccid_hc_tx_exit(sk);
+}
+
+static inline void ccid_hc_rx_packet_recv(struct ccid *ccid, struct sock *sk,
+                                         struct sk_buff *skb)
+{
+       if (ccid->ccid_hc_rx_packet_recv != NULL)
+               ccid->ccid_hc_rx_packet_recv(sk, skb);
+}
+
+static inline void ccid_hc_tx_packet_recv(struct ccid *ccid, struct sock *sk,
+                                         struct sk_buff *skb)
+{
+       if (ccid->ccid_hc_tx_packet_recv != NULL)
+               ccid->ccid_hc_tx_packet_recv(sk, skb);
+}
+
+static inline int ccid_hc_tx_parse_options(struct ccid *ccid, struct sock *sk,
+                                          unsigned char option,
+                                          unsigned char len, u16 idx,
+                                          unsigned char* value)
+{
+       int rc = 0;
+       if (ccid->ccid_hc_tx_parse_options != NULL)
+               rc = ccid->ccid_hc_tx_parse_options(sk, option, len, idx,
+                                                   value);
+       return rc;
+}
+
+static inline int ccid_hc_rx_parse_options(struct ccid *ccid, struct sock *sk,
+                                          unsigned char option,
+                                          unsigned char len, u16 idx,
+                                          unsigned char* value)
+{
+       int rc = 0;
+       if (ccid->ccid_hc_rx_parse_options != NULL)
+               rc = ccid->ccid_hc_rx_parse_options(sk, option, len, idx, value);
+       return rc;
+}
+
+static inline void ccid_hc_tx_insert_options(struct ccid *ccid, struct sock *sk,
+                                            struct sk_buff *skb)
+{
+       if (ccid->ccid_hc_tx_insert_options != NULL)
+               ccid->ccid_hc_tx_insert_options(sk, skb);
+}
+
+static inline void ccid_hc_rx_insert_options(struct ccid *ccid, struct sock *sk,
+                                            struct sk_buff *skb)
+{
+       if (ccid->ccid_hc_rx_insert_options != NULL)
+               ccid->ccid_hc_rx_insert_options(sk, skb);
+}
+
+static inline void ccid_hc_rx_get_info(struct ccid *ccid, struct sock *sk,
+                                      struct tcp_info *info)
+{
+       if (ccid->ccid_hc_rx_get_info != NULL)
+               ccid->ccid_hc_rx_get_info(sk, info);
+}
+
+static inline void ccid_hc_tx_get_info(struct ccid *ccid, struct sock *sk,
+                                      struct tcp_info *info)
+{
+       if (ccid->ccid_hc_tx_get_info != NULL)
+               ccid->ccid_hc_tx_get_info(sk, info);
+}
+#endif /* _CCID_H */
diff --git a/net/dccp/ccids/Kconfig b/net/dccp/ccids/Kconfig

new file mode 100644 (file)

index 0000000..7684d83
--- /dev/null
+++ b/net/dccp/ccids/Kconfig
@@ -0,0 +1,29 @@
+menu "DCCP CCIDs Configuration (EXPERIMENTAL)"
+       depends on IP_DCCP && EXPERIMENTAL
+
+config IP_DCCP_CCID3
+       tristate "CCID3 (TFRC) (EXPERIMENTAL)"
+       depends on IP_DCCP
+       ---help---
+         CCID 3 denotes TCP-Friendly Rate Control (TFRC), an equation-based
+         rate-controlled congestion control mechanism.  TFRC is designed to
+         be reasonably fair when competing for bandwidth with TCP-like flows,
+         where a flow is "reasonably fair" if its sending rate is generally
+         within a factor of two of the sending rate of a TCP flow under the
+         same conditions.  However, TFRC has a much lower variation of
+         throughput over time compared with TCP, which makes CCID 3 more
+         suitable than CCID 2 for applications such streaming media where a
+         relatively smooth sending rate is of importance.
+
+         CCID 3 is further described in [CCID 3 PROFILE]. The TFRC
+         congestion control algorithms were initially described in RFC 3448.
+
+         This text was extracted from draft-ietf-dccp-spec-11.txt.
+         
+         If in doubt, say M.
+
+config IP_DCCP_TFRC_LIB
+       depends on IP_DCCP_CCID3
+       def_tristate IP_DCCP_CCID3
+
+endmenu
diff --git a/net/dccp/ccids/Makefile b/net/dccp/ccids/Makefile

new file mode 100644 (file)

index 0000000..956f79f
--- /dev/null
+++ b/net/dccp/ccids/Makefile
@@ -0,0 +1,5 @@
+obj-$(CONFIG_IP_DCCP_CCID3) += dccp_ccid3.o
+
+dccp_ccid3-y := ccid3.o
+
+obj-y += lib/
diff --git a/net/dccp/ccids/ccid3.c b/net/dccp/ccids/ccid3.c

new file mode 100644 (file)

index 0000000..7bf3b3a
--- /dev/null
+++ b/net/dccp/ccids/ccid3.c
@@ -0,0 +1,1221 @@
+/*
+ *  net/dccp/ccids/ccid3.c
+ *
+ *  Copyright (c) 2005 The University of Waikato, Hamilton, New Zealand.
+ *  Copyright (c) 2005 Ian McDonald <iam4@cs.waikato.ac.nz>
+ *
+ *  An implementation of the DCCP protocol
+ *
+ *  This code has been developed by the University of Waikato WAND
+ *  research group. For further information please see http://www.wand.net.nz/
+ *
+ *  This code also uses code from Lulea University, rereleased as GPL by its
+ *  authors:
+ *  Copyright (c) 2003 Nils-Erik Mattsson, Joacim Haggmark, Magnus Erixzon
+ *
+ *  Changes to meet Linux coding standards, to make it meet latest ccid3 draft
+ *  and to make it work as a loadable module in the DCCP stack written by
+ *  Arnaldo Carvalho de Melo <acme@conectiva.com.br>.
+ *
+ *  Copyright (c) 2005 Arnaldo Carvalho de Melo <acme@conectiva.com.br>
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2 of the License, or
+ *  (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; if not, write to the Free Software
+ *  Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#include <linux/config.h>
+#include "../ccid.h"
+#include "../dccp.h"
+#include "lib/packet_history.h"
+#include "lib/loss_interval.h"
+#include "lib/tfrc.h"
+#include "ccid3.h"
+
+/*
+ * Reason for maths with 10 here is to avoid 32 bit overflow when a is big.
+ */
+static inline u32 usecs_div(const u32 a, const u32 b)
+{
+       const u32 tmp = a * (USEC_PER_SEC / 10);
+       return b > 20 ? tmp / (b / 10) : tmp;
+}
+
+static int ccid3_debug;
+
+#ifdef CCID3_DEBUG
+#define ccid3_pr_debug(format, a...) \
+       do { if (ccid3_debug) \
+               printk(KERN_DEBUG "%s: " format, __FUNCTION__, ##a); \
+       } while (0)
+#else
+#define ccid3_pr_debug(format, a...)
+#endif
+
+static struct dccp_tx_hist *ccid3_tx_hist;
+static struct dccp_rx_hist *ccid3_rx_hist;
+static struct dccp_li_hist *ccid3_li_hist;
+
+static int ccid3_init(struct sock *sk)
+{
+       ccid3_pr_debug("%s, sk=%p\n", dccp_role(sk), sk);
+       return 0;
+}
+
+static void ccid3_exit(struct sock *sk)
+{
+       ccid3_pr_debug("%s, sk=%p\n", dccp_role(sk), sk);
+}
+
+/* TFRC sender states */
+enum ccid3_hc_tx_states {
+               TFRC_SSTATE_NO_SENT = 1,
+       TFRC_SSTATE_NO_FBACK,
+       TFRC_SSTATE_FBACK,
+       TFRC_SSTATE_TERM,
+};
+
+#ifdef CCID3_DEBUG
+static const char *ccid3_tx_state_name(enum ccid3_hc_tx_states state)
+{
+       static char *ccid3_state_names[] = {
+       [TFRC_SSTATE_NO_SENT]  = "NO_SENT",
+       [TFRC_SSTATE_NO_FBACK] = "NO_FBACK",
+       [TFRC_SSTATE_FBACK]    = "FBACK",
+       [TFRC_SSTATE_TERM]     = "TERM",
+       };
+
+       return ccid3_state_names[state];
+}
+#endif
+
+static inline void ccid3_hc_tx_set_state(struct sock *sk,
+                                        enum ccid3_hc_tx_states state)
+{
+       struct dccp_sock *dp = dccp_sk(sk);
+       struct ccid3_hc_tx_sock *hctx = dp->dccps_hc_tx_ccid_private;
+       enum ccid3_hc_tx_states oldstate = hctx->ccid3hctx_state;
+
+       ccid3_pr_debug("%s(%p) %-8.8s -> %s\n",
+                      dccp_role(sk), sk, ccid3_tx_state_name(oldstate),
+                      ccid3_tx_state_name(state));
+       WARN_ON(state == oldstate);
+       hctx->ccid3hctx_state = state;
+}
+
+/* Calculate new t_ipi (inter packet interval) by t_ipi = s / X_inst */
+static inline void ccid3_calc_new_t_ipi(struct ccid3_hc_tx_sock *hctx)
+{
+       /*
+        * If no feedback spec says t_ipi is 1 second (set elsewhere and then
+        * doubles after every no feedback timer (separate function)
+        */
+       if (hctx->ccid3hctx_state != TFRC_SSTATE_NO_FBACK)
+               hctx->ccid3hctx_t_ipi = usecs_div(hctx->ccid3hctx_s,
+                                                 hctx->ccid3hctx_x);
+}
+
+/* Calculate new delta by delta = min(t_ipi / 2, t_gran / 2) */
+static inline void ccid3_calc_new_delta(struct ccid3_hc_tx_sock *hctx)
+{
+       hctx->ccid3hctx_delta = min_t(u32, hctx->ccid3hctx_t_ipi / 2,
+                                          TFRC_OPSYS_HALF_TIME_GRAN);
+}
+
+/*
+ * Update X by
+ *    If (p > 0)
+ *       x_calc = calcX(s, R, p);
+ *       X = max(min(X_calc, 2 * X_recv), s / t_mbi);
+ *    Else
+ *       If (now - tld >= R)
+ *          X = max(min(2 * X, 2 * X_recv), s / R);
+ *          tld = now;
+ */ 
+static void ccid3_hc_tx_update_x(struct sock *sk)
+{
+       struct dccp_sock *dp = dccp_sk(sk);
+       struct ccid3_hc_tx_sock *hctx = dp->dccps_hc_tx_ccid_private;
+
+       /* To avoid large error in calcX */
+       if (hctx->ccid3hctx_p >= TFRC_SMALLEST_P) {
+               hctx->ccid3hctx_x_calc = tfrc_calc_x(hctx->ccid3hctx_s,
+                                                    hctx->ccid3hctx_rtt,
+                                                    hctx->ccid3hctx_p);
+               hctx->ccid3hctx_x = max_t(u32, min_t(u32, hctx->ccid3hctx_x_calc,
+                                                         2 * hctx->ccid3hctx_x_recv),
+                                              (hctx->ccid3hctx_s /
+                                               TFRC_MAX_BACK_OFF_TIME));
+       } else {
+               struct timeval now;
+
+               do_gettimeofday(&now);
+               if (timeval_delta(&now, &hctx->ccid3hctx_t_ld) >=
+                   hctx->ccid3hctx_rtt) {
+                       hctx->ccid3hctx_x = max_t(u32, min_t(u32, hctx->ccid3hctx_x_recv,
+                                                                 hctx->ccid3hctx_x) * 2,
+                                                      usecs_div(hctx->ccid3hctx_s,
+                                                                hctx->ccid3hctx_rtt));
+                       hctx->ccid3hctx_t_ld = now;
+               }
+       }
+}
+
+static void ccid3_hc_tx_no_feedback_timer(unsigned long data)
+{
+       struct sock *sk = (struct sock *)data;
+       struct dccp_sock *dp = dccp_sk(sk);
+       unsigned long next_tmout = 0;
+       struct ccid3_hc_tx_sock *hctx = dp->dccps_hc_tx_ccid_private;
+
+       bh_lock_sock(sk);
+       if (sock_owned_by_user(sk)) {
+               /* Try again later. */
+               /* XXX: set some sensible MIB */
+               sk_reset_timer(sk, &hctx->ccid3hctx_no_feedback_timer,
+                              jiffies + HZ / 5);
+               goto out;
+       }
+
+       ccid3_pr_debug("%s, sk=%p, state=%s\n", dccp_role(sk), sk,
+                      ccid3_tx_state_name(hctx->ccid3hctx_state));
+       
+       switch (hctx->ccid3hctx_state) {
+       case TFRC_SSTATE_TERM:
+               goto out;
+       case TFRC_SSTATE_NO_FBACK:
+               /* Halve send rate */
+               hctx->ccid3hctx_x /= 2;
+               if (hctx->ccid3hctx_x < (hctx->ccid3hctx_s /
+                                        TFRC_MAX_BACK_OFF_TIME))
+                       hctx->ccid3hctx_x = (hctx->ccid3hctx_s /
+                                            TFRC_MAX_BACK_OFF_TIME);
+
+               ccid3_pr_debug("%s, sk=%p, state=%s, updated tx rate to %d "
+                              "bytes/s\n",
+                              dccp_role(sk), sk,
+                              ccid3_tx_state_name(hctx->ccid3hctx_state),
+                              hctx->ccid3hctx_x);
+               next_tmout = max_t(u32, 2 * usecs_div(hctx->ccid3hctx_s,
+                                                     hctx->ccid3hctx_x),
+                                       TFRC_INITIAL_TIMEOUT);
+               /*
+                * FIXME - not sure above calculation is correct. See section
+                * 5 of CCID3 11 should adjust tx_t_ipi and double that to
+                * achieve it really
+                */
+               break;
+       case TFRC_SSTATE_FBACK:
+               /*
+                * Check if IDLE since last timeout and recv rate is less than
+                * 4 packets per RTT
+                */
+               if (!hctx->ccid3hctx_idle ||
+                   (hctx->ccid3hctx_x_recv >=
+                    4 * usecs_div(hctx->ccid3hctx_s, hctx->ccid3hctx_rtt))) {
+                       ccid3_pr_debug("%s, sk=%p, state=%s, not idle\n",
+                                      dccp_role(sk), sk,
+                                      ccid3_tx_state_name(hctx->ccid3hctx_state));
+                       /* Halve sending rate */
+
+                       /*  If (X_calc > 2 * X_recv)
+                        *    X_recv = max(X_recv / 2, s / (2 * t_mbi));
+                        *  Else
+                        *    X_recv = X_calc / 4;
+                        */
+                       BUG_ON(hctx->ccid3hctx_p >= TFRC_SMALLEST_P &&
+                              hctx->ccid3hctx_x_calc == 0);
+
+                       /* check also if p is zero -> x_calc is infinity? */
+                       if (hctx->ccid3hctx_p < TFRC_SMALLEST_P ||
+                           hctx->ccid3hctx_x_calc > 2 * hctx->ccid3hctx_x_recv)
+                               hctx->ccid3hctx_x_recv = max_t(u32, hctx->ccid3hctx_x_recv / 2,
+                                                                   hctx->ccid3hctx_s / (2 * TFRC_MAX_BACK_OFF_TIME));
+                       else
+                               hctx->ccid3hctx_x_recv = hctx->ccid3hctx_x_calc / 4;
+
+                       /* Update sending rate */
+                       ccid3_hc_tx_update_x(sk);
+               }
+               /*
+                * Schedule no feedback timer to expire in
+                * max(4 * R, 2 * s / X)
+                */
+               next_tmout = max_t(u32, hctx->ccid3hctx_t_rto, 
+                                       2 * usecs_div(hctx->ccid3hctx_s,
+                                                     hctx->ccid3hctx_x));
+               break;
+       default:
+               printk(KERN_CRIT "%s: %s, sk=%p, Illegal state (%d)!\n",
+                      __FUNCTION__, dccp_role(sk), sk, hctx->ccid3hctx_state);
+               dump_stack();
+               goto out;
+       }
+
+       sk_reset_timer(sk, &hctx->ccid3hctx_no_feedback_timer, 
+                     jiffies + max_t(u32, 1, usecs_to_jiffies(next_tmout)));
+       hctx->ccid3hctx_idle = 1;
+out:
+       bh_unlock_sock(sk);
+       sock_put(sk);
+}
+
+static int ccid3_hc_tx_send_packet(struct sock *sk,
+                                  struct sk_buff *skb, int len)
+{
+       struct dccp_sock *dp = dccp_sk(sk);
+       struct ccid3_hc_tx_sock *hctx = dp->dccps_hc_tx_ccid_private;
+       struct dccp_tx_hist_entry *new_packet;
+       struct timeval now;
+       long delay;
+       int rc = -ENOTCONN;
+
+       /* Check if pure ACK or Terminating*/
+
+       /*
+        * XXX: We only call this function for DATA and DATAACK, on, these
+        * packets can have zero length, but why the comment about "pure ACK"?
+        */
+       if (hctx == NULL || len == 0 ||
+           hctx->ccid3hctx_state == TFRC_SSTATE_TERM)
+               goto out;
+
+       /* See if last packet allocated was not sent */
+       new_packet = dccp_tx_hist_head(&hctx->ccid3hctx_hist);
+       if (new_packet == NULL || new_packet->dccphtx_sent) {
+               new_packet = dccp_tx_hist_entry_new(ccid3_tx_hist,
+                                                   SLAB_ATOMIC);
+
+               rc = -ENOBUFS;
+               if (new_packet == NULL) {
+                       ccid3_pr_debug("%s, sk=%p, not enough mem to add "
+                                      "to history, send refused\n",
+                                      dccp_role(sk), sk);
+                       goto out;
+               }
+
+               dccp_tx_hist_add_entry(&hctx->ccid3hctx_hist, new_packet);
+       }
+
+       do_gettimeofday(&now);
+
+       switch (hctx->ccid3hctx_state) {
+       case TFRC_SSTATE_NO_SENT:
+               ccid3_pr_debug("%s, sk=%p, first packet(%llu)\n",
+                              dccp_role(sk), sk, dp->dccps_gss);
+
+               hctx->ccid3hctx_no_feedback_timer.function = ccid3_hc_tx_no_feedback_timer;
+               hctx->ccid3hctx_no_feedback_timer.data     = (unsigned long)sk;
+               sk_reset_timer(sk, &hctx->ccid3hctx_no_feedback_timer,
+                              jiffies + usecs_to_jiffies(TFRC_INITIAL_TIMEOUT));
+               hctx->ccid3hctx_last_win_count   = 0;
+               hctx->ccid3hctx_t_last_win_count = now;
+               ccid3_hc_tx_set_state(sk, TFRC_SSTATE_NO_FBACK);
+               hctx->ccid3hctx_t_ipi = TFRC_INITIAL_TIMEOUT;
+
+               /* Set nominal send time for initial packet */
+               hctx->ccid3hctx_t_nom = now;
+               timeval_add_usecs(&hctx->ccid3hctx_t_nom,
+                                 hctx->ccid3hctx_t_ipi);
+               ccid3_calc_new_delta(hctx);
+               rc = 0;
+               break;
+       case TFRC_SSTATE_NO_FBACK:
+       case TFRC_SSTATE_FBACK:
+               delay = (timeval_delta(&now, &hctx->ccid3hctx_t_nom) -
+                        hctx->ccid3hctx_delta);
+               ccid3_pr_debug("send_packet delay=%ld\n", delay);
+               delay /= -1000;
+               /* divide by -1000 is to convert to ms and get sign right */
+               rc = delay > 0 ? delay : 0;
+               break;
+       default:
+               printk(KERN_CRIT "%s: %s, sk=%p, Illegal state (%d)!\n",
+                      __FUNCTION__, dccp_role(sk), sk, hctx->ccid3hctx_state);
+               dump_stack();
+               rc = -EINVAL;
+               break;
+       }
+
+       /* Can we send? if so add options and add to packet history */
+       if (rc == 0)
+               new_packet->dccphtx_ccval =
+                       DCCP_SKB_CB(skb)->dccpd_ccval =
+                               hctx->ccid3hctx_last_win_count;
+out:
+       return rc;
+}
+
+static void ccid3_hc_tx_packet_sent(struct sock *sk, int more, int len)
+{
+       struct dccp_sock *dp = dccp_sk(sk);
+       struct ccid3_hc_tx_sock *hctx = dp->dccps_hc_tx_ccid_private;
+       struct timeval now;
+
+       BUG_ON(hctx == NULL);
+
+       if (hctx->ccid3hctx_state == TFRC_SSTATE_TERM) {
+               ccid3_pr_debug("%s, sk=%p, while state is TFRC_SSTATE_TERM!\n",
+                              dccp_role(sk), sk);
+               return;
+       }
+
+       do_gettimeofday(&now);
+
+       /* check if we have sent a data packet */
+       if (len > 0) {
+               unsigned long quarter_rtt;
+               struct dccp_tx_hist_entry *packet;
+
+               packet = dccp_tx_hist_head(&hctx->ccid3hctx_hist);
+               if (packet == NULL) {
+                       printk(KERN_CRIT "%s: packet doesn't exists in "
+                                        "history!\n", __FUNCTION__);
+                       return;
+               }
+               if (packet->dccphtx_sent) {
+                       printk(KERN_CRIT "%s: no unsent packet in history!\n",
+                              __FUNCTION__);
+                       return;
+               }
+               packet->dccphtx_tstamp = now;
+               packet->dccphtx_seqno  = dp->dccps_gss;
+               /*
+                * Check if win_count have changed
+                * Algorithm in "8.1. Window Counter Valuer" in
+                * draft-ietf-dccp-ccid3-11.txt
+                */
+               quarter_rtt = timeval_delta(&now, &hctx->ccid3hctx_t_last_win_count);
+               if (likely(hctx->ccid3hctx_rtt > 8))
+                       quarter_rtt /= hctx->ccid3hctx_rtt / 4;
+
+               if (quarter_rtt > 0) {
+                       hctx->ccid3hctx_t_last_win_count = now;
+                       hctx->ccid3hctx_last_win_count   = (hctx->ccid3hctx_last_win_count +
+                                                           min_t(unsigned long, quarter_rtt, 5)) % 16;
+                       ccid3_pr_debug("%s, sk=%p, window changed from "
+                                      "%u to %u!\n",
+                                      dccp_role(sk), sk,
+                                      packet->dccphtx_ccval,
+                                      hctx->ccid3hctx_last_win_count);
+               }
+
+               hctx->ccid3hctx_idle = 0;
+               packet->dccphtx_rtt  = hctx->ccid3hctx_rtt;
+               packet->dccphtx_sent = 1;
+       } else
+               ccid3_pr_debug("%s, sk=%p, seqno=%llu NOT inserted!\n",
+                              dccp_role(sk), sk, dp->dccps_gss);
+
+       switch (hctx->ccid3hctx_state) {
+       case TFRC_SSTATE_NO_SENT:
+               /* if first wasn't pure ack */
+               if (len != 0)
+                       printk(KERN_CRIT "%s: %s, First packet sent is noted "
+                                        "as a data packet\n",
+                              __FUNCTION__, dccp_role(sk));
+               return;
+       case TFRC_SSTATE_NO_FBACK:
+       case TFRC_SSTATE_FBACK:
+               if (len > 0) {
+                       hctx->ccid3hctx_t_nom = now;
+                       ccid3_calc_new_t_ipi(hctx);
+                       ccid3_calc_new_delta(hctx);
+                       timeval_add_usecs(&hctx->ccid3hctx_t_nom,
+                                         hctx->ccid3hctx_t_ipi);
+               }
+               break;
+       default:
+               printk(KERN_CRIT "%s: %s, sk=%p, Illegal state (%d)!\n",
+                      __FUNCTION__, dccp_role(sk), sk, hctx->ccid3hctx_state);
+               dump_stack();
+               break;
+       }
+}
+
+static void ccid3_hc_tx_packet_recv(struct sock *sk, struct sk_buff *skb)
+{
+       struct dccp_sock *dp = dccp_sk(sk);
+       struct ccid3_hc_tx_sock *hctx = dp->dccps_hc_tx_ccid_private;
+       struct ccid3_options_received *opt_recv;
+       struct dccp_tx_hist_entry *packet;
+       unsigned long next_tmout; 
+       u32 t_elapsed;
+       u32 pinv;
+       u32 x_recv;
+       u32 r_sample;
+
+       if (hctx == NULL)
+               return;
+
+       if (hctx->ccid3hctx_state == TFRC_SSTATE_TERM) {
+               ccid3_pr_debug("%s, sk=%p, received a packet when "
+                              "terminating!\n", dccp_role(sk), sk);
+               return;
+       }
+
+       /* we are only interested in ACKs */
+       if (!(DCCP_SKB_CB(skb)->dccpd_type == DCCP_PKT_ACK ||
+             DCCP_SKB_CB(skb)->dccpd_type == DCCP_PKT_DATAACK))
+               return;
+
+       opt_recv = &hctx->ccid3hctx_options_received;
+
+       t_elapsed = dp->dccps_options_received.dccpor_elapsed_time;
+       x_recv = opt_recv->ccid3or_receive_rate;
+       pinv = opt_recv->ccid3or_loss_event_rate;
+
+       switch (hctx->ccid3hctx_state) {
+       case TFRC_SSTATE_NO_SENT:
+               /* FIXME: what to do here? */
+               return;
+       case TFRC_SSTATE_NO_FBACK:
+       case TFRC_SSTATE_FBACK:
+               /* Calculate new round trip sample by
+                * R_sample = (now - t_recvdata) - t_delay */
+               /* get t_recvdata from history */
+               packet = dccp_tx_hist_find_entry(&hctx->ccid3hctx_hist,
+                                                DCCP_SKB_CB(skb)->dccpd_ack_seq);
+               if (packet == NULL) {
+                       ccid3_pr_debug("%s, sk=%p, seqno %llu(%s) does't "
+                                      "exist in history!\n",
+                                      dccp_role(sk), sk,
+                                      DCCP_SKB_CB(skb)->dccpd_ack_seq,
+                                      dccp_packet_name(DCCP_SKB_CB(skb)->dccpd_type));
+                       return;
+               }
+
+               /* Update RTT */
+               r_sample = timeval_now_delta(&packet->dccphtx_tstamp);
+               /* FIXME: */
+               // r_sample -= usecs_to_jiffies(t_elapsed * 10);
+
+               /* Update RTT estimate by 
+                * If (No feedback recv)
+                *    R = R_sample;
+                * Else
+                *    R = q * R + (1 - q) * R_sample;
+                *
+                * q is a constant, RFC 3448 recomments 0.9
+                */
+               if (hctx->ccid3hctx_state == TFRC_SSTATE_NO_FBACK) {
+                       ccid3_hc_tx_set_state(sk, TFRC_SSTATE_FBACK);
+                       hctx->ccid3hctx_rtt = r_sample;
+               } else
+                       hctx->ccid3hctx_rtt = (hctx->ccid3hctx_rtt * 9) / 10 +
+                                             r_sample / 10;
+
+               ccid3_pr_debug("%s, sk=%p, New RTT estimate=%uus, "
+                              "r_sample=%us\n", dccp_role(sk), sk,
+                              hctx->ccid3hctx_rtt, r_sample);
+
+               /* Update timeout interval */
+               hctx->ccid3hctx_t_rto = max_t(u32, 4 * hctx->ccid3hctx_rtt,
+                                             USEC_PER_SEC);
+
+               /* Update receive rate */
+               hctx->ccid3hctx_x_recv = x_recv;/* X_recv in bytes per sec */
+
+               /* Update loss event rate */
+               if (pinv == ~0 || pinv == 0)
+                       hctx->ccid3hctx_p = 0;
+               else {
+                       hctx->ccid3hctx_p = 1000000 / pinv;
+
+                       if (hctx->ccid3hctx_p < TFRC_SMALLEST_P) {
+                               hctx->ccid3hctx_p = TFRC_SMALLEST_P;
+                               ccid3_pr_debug("%s, sk=%p, Smallest p used!\n",
+                                              dccp_role(sk), sk);
+                       }
+               }
+
+               /* unschedule no feedback timer */
+               sk_stop_timer(sk, &hctx->ccid3hctx_no_feedback_timer);
+
+               /* Update sending rate */
+               ccid3_hc_tx_update_x(sk);
+
+               /* Update next send time */
+               timeval_sub_usecs(&hctx->ccid3hctx_t_nom,
+                                 hctx->ccid3hctx_t_ipi);
+               ccid3_calc_new_t_ipi(hctx);
+               timeval_add_usecs(&hctx->ccid3hctx_t_nom,
+                                 hctx->ccid3hctx_t_ipi);
+               ccid3_calc_new_delta(hctx);
+
+               /* remove all packets older than the one acked from history */
+               dccp_tx_hist_purge_older(ccid3_tx_hist,
+                                        &hctx->ccid3hctx_hist, packet);
+               /*
+                * As we have calculated new ipi, delta, t_nom it is possible that
+                * we now can send a packet, so wake up dccp_wait_for_ccids.
+                */
+               sk->sk_write_space(sk);
+
+               /*
+                * Schedule no feedback timer to expire in
+                * max(4 * R, 2 * s / X)
+                */
+               next_tmout = max(hctx->ccid3hctx_t_rto,
+                                2 * usecs_div(hctx->ccid3hctx_s,
+                                              hctx->ccid3hctx_x));
+                       
+               ccid3_pr_debug("%s, sk=%p, Scheduled no feedback timer to "
+                              "expire in %lu jiffies (%luus)\n",
+                              dccp_role(sk), sk,
+                              usecs_to_jiffies(next_tmout), next_tmout); 
+
+               sk_reset_timer(sk, &hctx->ccid3hctx_no_feedback_timer, 
+                              jiffies + max_t(u32, 1, usecs_to_jiffies(next_tmout)));
+
+               /* set idle flag */
+               hctx->ccid3hctx_idle = 1;   
+               break;
+       default:
+               printk(KERN_CRIT "%s: %s, sk=%p, Illegal state (%d)!\n",
+                      __FUNCTION__, dccp_role(sk), sk, hctx->ccid3hctx_state);
+               dump_stack();
+               break;
+       }
+}
+
+static void ccid3_hc_tx_insert_options(struct sock *sk, struct sk_buff *skb)
+{
+       const struct dccp_sock *dp = dccp_sk(sk);
+       struct ccid3_hc_tx_sock *hctx = dp->dccps_hc_tx_ccid_private;
+
+       if (hctx == NULL || !(sk->sk_state == DCCP_OPEN ||
+                             sk->sk_state == DCCP_PARTOPEN))
+               return;
+
+        DCCP_SKB_CB(skb)->dccpd_ccval = hctx->ccid3hctx_last_win_count;
+}
+
+static int ccid3_hc_tx_parse_options(struct sock *sk, unsigned char option,
+                                    unsigned char len, u16 idx,
+                                    unsigned char *value)
+{
+       int rc = 0;
+       struct dccp_sock *dp = dccp_sk(sk);
+       struct ccid3_hc_tx_sock *hctx = dp->dccps_hc_tx_ccid_private;
+       struct ccid3_options_received *opt_recv;
+
+       if (hctx == NULL)
+               return 0;
+
+       opt_recv = &hctx->ccid3hctx_options_received;
+
+       if (opt_recv->ccid3or_seqno != dp->dccps_gsr) {
+               opt_recv->ccid3or_seqno              = dp->dccps_gsr;
+               opt_recv->ccid3or_loss_event_rate    = ~0;
+               opt_recv->ccid3or_loss_intervals_idx = 0;
+               opt_recv->ccid3or_loss_intervals_len = 0;
+               opt_recv->ccid3or_receive_rate       = 0;
+       }
+
+       switch (option) {
+       case TFRC_OPT_LOSS_EVENT_RATE:
+               if (len != 4) {
+                       ccid3_pr_debug("%s, sk=%p, invalid len for "
+                                      "TFRC_OPT_LOSS_EVENT_RATE\n",
+                                      dccp_role(sk), sk);
+                       rc = -EINVAL;
+               } else {
+                       opt_recv->ccid3or_loss_event_rate = ntohl(*(u32 *)value);
+                       ccid3_pr_debug("%s, sk=%p, LOSS_EVENT_RATE=%u\n",
+                                      dccp_role(sk), sk,
+                                      opt_recv->ccid3or_loss_event_rate);
+               }
+               break;
+       case TFRC_OPT_LOSS_INTERVALS:
+               opt_recv->ccid3or_loss_intervals_idx = idx;
+               opt_recv->ccid3or_loss_intervals_len = len;
+               ccid3_pr_debug("%s, sk=%p, LOSS_INTERVALS=(%u, %u)\n",
+                              dccp_role(sk), sk,
+                              opt_recv->ccid3or_loss_intervals_idx,
+                              opt_recv->ccid3or_loss_intervals_len);
+               break;
+       case TFRC_OPT_RECEIVE_RATE:
+               if (len != 4) {
+                       ccid3_pr_debug("%s, sk=%p, invalid len for "
+                                      "TFRC_OPT_RECEIVE_RATE\n",
+                                      dccp_role(sk), sk);
+                       rc = -EINVAL;
+               } else {
+                       opt_recv->ccid3or_receive_rate = ntohl(*(u32 *)value);
+                       ccid3_pr_debug("%s, sk=%p, RECEIVE_RATE=%u\n",
+                                      dccp_role(sk), sk,
+                                      opt_recv->ccid3or_receive_rate);
+               }
+               break;
+       }
+
+       return rc;
+}
+
+static int ccid3_hc_tx_init(struct sock *sk)
+{
+       struct dccp_sock *dp = dccp_sk(sk);
+       struct ccid3_hc_tx_sock *hctx;
+
+       ccid3_pr_debug("%s, sk=%p\n", dccp_role(sk), sk);
+
+       hctx = dp->dccps_hc_tx_ccid_private = kmalloc(sizeof(*hctx),
+                                                     gfp_any());
+       if (hctx == NULL)
+               return -ENOMEM;
+
+       memset(hctx, 0, sizeof(*hctx));
+
+       if (dp->dccps_packet_size >= TFRC_MIN_PACKET_SIZE &&
+           dp->dccps_packet_size <= TFRC_MAX_PACKET_SIZE)
+               hctx->ccid3hctx_s = dp->dccps_packet_size;
+       else
+               hctx->ccid3hctx_s = TFRC_STD_PACKET_SIZE;
+
+       /* Set transmission rate to 1 packet per second */
+       hctx->ccid3hctx_x     = hctx->ccid3hctx_s;
+       hctx->ccid3hctx_t_rto = USEC_PER_SEC;
+       hctx->ccid3hctx_state = TFRC_SSTATE_NO_SENT;
+       INIT_LIST_HEAD(&hctx->ccid3hctx_hist);
+       init_timer(&hctx->ccid3hctx_no_feedback_timer);
+
+       return 0;
+}
+
+static void ccid3_hc_tx_exit(struct sock *sk)
+{
+       struct dccp_sock *dp = dccp_sk(sk);
+       struct ccid3_hc_tx_sock *hctx = dp->dccps_hc_tx_ccid_private;
+
+       ccid3_pr_debug("%s, sk=%p\n", dccp_role(sk), sk);
+       BUG_ON(hctx == NULL);
+
+       ccid3_hc_tx_set_state(sk, TFRC_SSTATE_TERM);
+       sk_stop_timer(sk, &hctx->ccid3hctx_no_feedback_timer);
+
+       /* Empty packet history */
+       dccp_tx_hist_purge(ccid3_tx_hist, &hctx->ccid3hctx_hist);
+
+       kfree(dp->dccps_hc_tx_ccid_private);
+       dp->dccps_hc_tx_ccid_private = NULL;
+}
+
+/*
+ * RX Half Connection methods
+ */
+
+/* TFRC receiver states */
+enum ccid3_hc_rx_states {
+               TFRC_RSTATE_NO_DATA = 1,
+       TFRC_RSTATE_DATA,
+       TFRC_RSTATE_TERM    = 127,
+};
+
+#ifdef CCID3_DEBUG
+static const char *ccid3_rx_state_name(enum ccid3_hc_rx_states state)
+{
+       static char *ccid3_rx_state_names[] = {
+       [TFRC_RSTATE_NO_DATA] = "NO_DATA",
+       [TFRC_RSTATE_DATA]    = "DATA",
+       [TFRC_RSTATE_TERM]    = "TERM",
+       };
+
+       return ccid3_rx_state_names[state];
+}
+#endif
+
+static inline void ccid3_hc_rx_set_state(struct sock *sk,
+                                        enum ccid3_hc_rx_states state)
+{
+       struct dccp_sock *dp = dccp_sk(sk);
+       struct ccid3_hc_rx_sock *hcrx = dp->dccps_hc_rx_ccid_private;
+       enum ccid3_hc_rx_states oldstate = hcrx->ccid3hcrx_state;
+
+       ccid3_pr_debug("%s(%p) %-8.8s -> %s\n",
+                      dccp_role(sk), sk, ccid3_rx_state_name(oldstate),
+                      ccid3_rx_state_name(state));
+       WARN_ON(state == oldstate);
+       hcrx->ccid3hcrx_state = state;
+}
+
+static void ccid3_hc_rx_send_feedback(struct sock *sk)
+{
+       struct dccp_sock *dp = dccp_sk(sk);
+       struct ccid3_hc_rx_sock *hcrx = dp->dccps_hc_rx_ccid_private;
+       struct dccp_rx_hist_entry *packet;
+       struct timeval now;
+
+       ccid3_pr_debug("%s, sk=%p\n", dccp_role(sk), sk);
+
+       do_gettimeofday(&now);
+
+       switch (hcrx->ccid3hcrx_state) {
+       case TFRC_RSTATE_NO_DATA:
+               hcrx->ccid3hcrx_x_recv = 0;
+               break;
+       case TFRC_RSTATE_DATA: {
+               const u32 delta = timeval_delta(&now,
+                                       &hcrx->ccid3hcrx_tstamp_last_feedback);
+
+               hcrx->ccid3hcrx_x_recv = (hcrx->ccid3hcrx_bytes_recv *
+                                         USEC_PER_SEC);
+               if (likely(delta > 1))
+                       hcrx->ccid3hcrx_x_recv /= delta;
+       }
+               break;
+       default:
+               printk(KERN_CRIT "%s: %s, sk=%p, Illegal state (%d)!\n",
+                      __FUNCTION__, dccp_role(sk), sk, hcrx->ccid3hcrx_state);
+               dump_stack();
+               return;
+       }
+
+       packet = dccp_rx_hist_find_data_packet(&hcrx->ccid3hcrx_hist);
+       if (packet == NULL) {
+               printk(KERN_CRIT "%s: %s, sk=%p, no data packet in history!\n",
+                      __FUNCTION__, dccp_role(sk), sk);
+               dump_stack();
+               return;
+       }
+
+       hcrx->ccid3hcrx_tstamp_last_feedback = now;
+       hcrx->ccid3hcrx_last_counter         = packet->dccphrx_ccval;
+       hcrx->ccid3hcrx_seqno_last_counter   = packet->dccphrx_seqno;
+       hcrx->ccid3hcrx_bytes_recv           = 0;
+
+       /* Convert to multiples of 10us */
+       hcrx->ccid3hcrx_elapsed_time =
+                       timeval_delta(&now, &packet->dccphrx_tstamp) / 10;
+       if (hcrx->ccid3hcrx_p == 0)
+               hcrx->ccid3hcrx_pinv = ~0;
+       else
+               hcrx->ccid3hcrx_pinv = 1000000 / hcrx->ccid3hcrx_p;
+       dccp_send_ack(sk);
+}
+
+static void ccid3_hc_rx_insert_options(struct sock *sk, struct sk_buff *skb)
+{
+       const struct dccp_sock *dp = dccp_sk(sk);
+       u32 x_recv, pinv;
+       struct ccid3_hc_rx_sock *hcrx = dp->dccps_hc_rx_ccid_private;
+
+       if (hcrx == NULL || !(sk->sk_state == DCCP_OPEN ||
+                             sk->sk_state == DCCP_PARTOPEN))
+               return;
+
+       DCCP_SKB_CB(skb)->dccpd_ccval = hcrx->ccid3hcrx_last_counter;
+
+       if (dccp_packet_without_ack(skb))
+               return;
+               
+       if (hcrx->ccid3hcrx_elapsed_time != 0)
+               dccp_insert_option_elapsed_time(sk, skb,
+                                               hcrx->ccid3hcrx_elapsed_time);
+       dccp_insert_option_timestamp(sk, skb);
+       x_recv = htonl(hcrx->ccid3hcrx_x_recv);
+       pinv   = htonl(hcrx->ccid3hcrx_pinv);
+       dccp_insert_option(sk, skb, TFRC_OPT_LOSS_EVENT_RATE,
+                          &pinv, sizeof(pinv));
+       dccp_insert_option(sk, skb, TFRC_OPT_RECEIVE_RATE,
+                          &x_recv, sizeof(x_recv));
+}
+
+/* calculate first loss interval
+ *
+ * returns estimated loss interval in usecs */
+
+static u32 ccid3_hc_rx_calc_first_li(struct sock *sk)
+{
+       struct dccp_sock *dp = dccp_sk(sk);
+       struct ccid3_hc_rx_sock *hcrx = dp->dccps_hc_rx_ccid_private;
+       struct dccp_rx_hist_entry *entry, *next, *tail = NULL;
+       u32 rtt, delta, x_recv, fval, p, tmp2;
+       struct timeval tstamp = { 0, };
+       int interval = 0;
+       int win_count = 0;
+       int step = 0;
+       u64 tmp1;
+
+       list_for_each_entry_safe(entry, next, &hcrx->ccid3hcrx_hist,
+                                dccphrx_node) {
+               if (dccp_rx_hist_entry_data_packet(entry)) {
+                       tail = entry;
+
+                       switch (step) {
+                       case 0:
+                               tstamp    = entry->dccphrx_tstamp;
+                               win_count = entry->dccphrx_ccval;
+                               step = 1;
+                               break;
+                       case 1:
+                               interval = win_count - entry->dccphrx_ccval;
+                               if (interval < 0)
+                                       interval += TFRC_WIN_COUNT_LIMIT;
+                               if (interval > 4)
+                                       goto found;
+                               break;
+                       }
+               }
+       }
+
+       if (step == 0) {
+               printk(KERN_CRIT "%s: %s, sk=%p, packet history contains no "
+                                "data packets!\n",
+                      __FUNCTION__, dccp_role(sk), sk);
+               return ~0;
+       }
+
+       if (interval == 0) {
+               ccid3_pr_debug("%s, sk=%p, Could not find a win_count "
+                              "interval > 0. Defaulting to 1\n",
+                              dccp_role(sk), sk);
+               interval = 1;
+       }
+found:
+       rtt = timeval_delta(&tstamp, &tail->dccphrx_tstamp) * 4 / interval;
+       ccid3_pr_debug("%s, sk=%p, approximated RTT to %uus\n",
+                      dccp_role(sk), sk, rtt);
+       if (rtt == 0)
+               rtt = 1;
+
+       delta = timeval_now_delta(&hcrx->ccid3hcrx_tstamp_last_feedback);
+       x_recv = hcrx->ccid3hcrx_bytes_recv * USEC_PER_SEC;
+       if (likely(delta > 1))
+               x_recv /= delta;
+
+       tmp1 = (u64)x_recv * (u64)rtt;
+       do_div(tmp1,10000000);
+       tmp2 = (u32)tmp1;
+       fval = (hcrx->ccid3hcrx_s * 100000) / tmp2;
+       /* do not alter order above or you will get overflow on 32 bit */
+       p = tfrc_calc_x_reverse_lookup(fval);
+       ccid3_pr_debug("%s, sk=%p, receive rate=%u bytes/s, implied "
+                      "loss rate=%u\n", dccp_role(sk), sk, x_recv, p);
+
+       if (p == 0)
+               return ~0;
+       else
+               return 1000000 / p; 
+}
+
+static void ccid3_hc_rx_update_li(struct sock *sk, u64 seq_loss, u8 win_loss)
+{
+       struct dccp_sock *dp = dccp_sk(sk);
+       struct ccid3_hc_rx_sock *hcrx = dp->dccps_hc_rx_ccid_private;
+
+       if (seq_loss != DCCP_MAX_SEQNO + 1 &&
+           list_empty(&hcrx->ccid3hcrx_li_hist)) {
+               struct dccp_li_hist_entry *li_tail;
+
+               li_tail = dccp_li_hist_interval_new(ccid3_li_hist,
+                                                   &hcrx->ccid3hcrx_li_hist,
+                                                   seq_loss, win_loss);
+               if (li_tail == NULL)
+                       return;
+               li_tail->dccplih_interval = ccid3_hc_rx_calc_first_li(sk);
+       }
+       /* FIXME: find end of interval */
+}
+
+static void ccid3_hc_rx_detect_loss(struct sock *sk)
+{
+       struct dccp_sock *dp = dccp_sk(sk);
+       struct ccid3_hc_rx_sock *hcrx = dp->dccps_hc_rx_ccid_private;
+       u8 win_loss;
+       const u64 seq_loss = dccp_rx_hist_detect_loss(&hcrx->ccid3hcrx_hist,
+                                                     &hcrx->ccid3hcrx_li_hist,
+                                                     &win_loss);
+
+       ccid3_hc_rx_update_li(sk, seq_loss, win_loss);
+}
+
+static void ccid3_hc_rx_packet_recv(struct sock *sk, struct sk_buff *skb)
+{
+       struct dccp_sock *dp = dccp_sk(sk);
+       struct ccid3_hc_rx_sock *hcrx = dp->dccps_hc_rx_ccid_private;
+       const struct dccp_options_received *opt_recv;
+       struct dccp_rx_hist_entry *packet;
+       struct timeval now;
+       u8 win_count;
+       u32 p_prev;
+       int ins;
+
+       if (hcrx == NULL)
+               return;
+
+       BUG_ON(!(hcrx->ccid3hcrx_state == TFRC_RSTATE_NO_DATA ||
+                hcrx->ccid3hcrx_state == TFRC_RSTATE_DATA));
+
+       opt_recv = &dp->dccps_options_received;
+
+       switch (DCCP_SKB_CB(skb)->dccpd_type) {
+       case DCCP_PKT_ACK:
+               if (hcrx->ccid3hcrx_state == TFRC_RSTATE_NO_DATA)
+                       return;
+       case DCCP_PKT_DATAACK:
+               if (opt_recv->dccpor_timestamp_echo == 0)
+                       break;
+               p_prev = hcrx->ccid3hcrx_rtt;
+               do_gettimeofday(&now);
+               hcrx->ccid3hcrx_rtt = timeval_usecs(&now) -
+                                    (opt_recv->dccpor_timestamp_echo -
+                                     opt_recv->dccpor_elapsed_time) * 10;
+               if (p_prev != hcrx->ccid3hcrx_rtt)
+                       ccid3_pr_debug("%s, New RTT=%luus, elapsed time=%u\n",
+                                      dccp_role(sk), hcrx->ccid3hcrx_rtt,
+                                      opt_recv->dccpor_elapsed_time);
+               break;
+       case DCCP_PKT_DATA:
+               break;
+       default:
+               ccid3_pr_debug("%s, sk=%p, not DATA/DATAACK/ACK packet(%s)\n",
+                              dccp_role(sk), sk,
+                              dccp_packet_name(DCCP_SKB_CB(skb)->dccpd_type));
+               return;
+       }
+
+       packet = dccp_rx_hist_entry_new(ccid3_rx_hist, opt_recv->dccpor_ndp,
+                                       skb, SLAB_ATOMIC);
+       if (packet == NULL) {
+               ccid3_pr_debug("%s, sk=%p, Not enough mem to add rx packet "
+                              "to history (consider it lost)!",
+                              dccp_role(sk), sk);
+               return;
+       }
+
+       win_count = packet->dccphrx_ccval;
+
+       ins = dccp_rx_hist_add_packet(ccid3_rx_hist, &hcrx->ccid3hcrx_hist,
+                                     &hcrx->ccid3hcrx_li_hist, packet);
+
+       if (DCCP_SKB_CB(skb)->dccpd_type == DCCP_PKT_ACK)
+               return;
+
+       switch (hcrx->ccid3hcrx_state) {
+       case TFRC_RSTATE_NO_DATA:
+               ccid3_pr_debug("%s, sk=%p(%s), skb=%p, sending initial "
+                              "feedback\n",
+                              dccp_role(sk), sk,
+                              dccp_state_name(sk->sk_state), skb);
+               ccid3_hc_rx_send_feedback(sk);
+               ccid3_hc_rx_set_state(sk, TFRC_RSTATE_DATA);
+               return;
+       case TFRC_RSTATE_DATA:
+               hcrx->ccid3hcrx_bytes_recv += skb->len -
+                                             dccp_hdr(skb)->dccph_doff * 4;
+               if (ins != 0)
+                       break;
+
+               do_gettimeofday(&now);
+               if (timeval_delta(&now, &hcrx->ccid3hcrx_tstamp_last_ack) >=
+                   hcrx->ccid3hcrx_rtt) {
+                       hcrx->ccid3hcrx_tstamp_last_ack = now;
+                       ccid3_hc_rx_send_feedback(sk);
+               }
+               return;
+       default:
+               printk(KERN_CRIT "%s: %s, sk=%p, Illegal state (%d)!\n",
+                      __FUNCTION__, dccp_role(sk), sk, hcrx->ccid3hcrx_state);
+               dump_stack();
+               return;
+       }
+
+       /* Dealing with packet loss */
+       ccid3_pr_debug("%s, sk=%p(%s), data loss! Reacting...\n",
+                      dccp_role(sk), sk, dccp_state_name(sk->sk_state));
+
+       ccid3_hc_rx_detect_loss(sk);
+       p_prev = hcrx->ccid3hcrx_p;
+       
+       /* Calculate loss event rate */
+       if (!list_empty(&hcrx->ccid3hcrx_li_hist))
+               /* Scaling up by 1000000 as fixed decimal */
+               hcrx->ccid3hcrx_p = 1000000 / dccp_li_hist_calc_i_mean(&hcrx->ccid3hcrx_li_hist);
+
+       if (hcrx->ccid3hcrx_p > p_prev) {
+               ccid3_hc_rx_send_feedback(sk);
+               return;
+       }
+}
+
+static int ccid3_hc_rx_init(struct sock *sk)
+{
+       struct dccp_sock *dp = dccp_sk(sk);
+       struct ccid3_hc_rx_sock *hcrx;
+
+       ccid3_pr_debug("%s, sk=%p\n", dccp_role(sk), sk);
+
+       hcrx = dp->dccps_hc_rx_ccid_private = kmalloc(sizeof(*hcrx),
+                                                     gfp_any());
+       if (hcrx == NULL)
+               return -ENOMEM;
+
+       memset(hcrx, 0, sizeof(*hcrx));
+
+       if (dp->dccps_packet_size >= TFRC_MIN_PACKET_SIZE &&
+           dp->dccps_packet_size <= TFRC_MAX_PACKET_SIZE)
+               hcrx->ccid3hcrx_s = dp->dccps_packet_size;
+       else
+               hcrx->ccid3hcrx_s = TFRC_STD_PACKET_SIZE;
+
+       hcrx->ccid3hcrx_state = TFRC_RSTATE_NO_DATA;
+       INIT_LIST_HEAD(&hcrx->ccid3hcrx_hist);
+       INIT_LIST_HEAD(&hcrx->ccid3hcrx_li_hist);
+       /*
+        * XXX this seems to be paranoid, need to think more about this, for
+        * now start with something different than zero. -acme
+        */
+       hcrx->ccid3hcrx_rtt = USEC_PER_SEC / 5;
+       return 0;
+}
+
+static void ccid3_hc_rx_exit(struct sock *sk)
+{
+       struct dccp_sock *dp = dccp_sk(sk);
+       struct ccid3_hc_rx_sock *hcrx = dp->dccps_hc_rx_ccid_private;
+
+       ccid3_pr_debug("%s, sk=%p\n", dccp_role(sk), sk);
+
+       if (hcrx == NULL)
+               return;
+
+       ccid3_hc_rx_set_state(sk, TFRC_RSTATE_TERM);
+
+       /* Empty packet history */
+       dccp_rx_hist_purge(ccid3_rx_hist, &hcrx->ccid3hcrx_hist);
+
+       /* Empty loss interval history */
+       dccp_li_hist_purge(ccid3_li_hist, &hcrx->ccid3hcrx_li_hist);
+
+       kfree(dp->dccps_hc_rx_ccid_private);
+       dp->dccps_hc_rx_ccid_private = NULL;
+}
+
+static void ccid3_hc_rx_get_info(struct sock *sk, struct tcp_info *info)
+{
+       const struct dccp_sock *dp = dccp_sk(sk);
+       const struct ccid3_hc_rx_sock *hcrx = dp->dccps_hc_rx_ccid_private;
+
+       if (hcrx == NULL)
+               return;
+
+       info->tcpi_ca_state     = hcrx->ccid3hcrx_state;
+       info->tcpi_options      |= TCPI_OPT_TIMESTAMPS;
+       info->tcpi_rcv_rtt      = hcrx->ccid3hcrx_rtt;
+}
+
+static void ccid3_hc_tx_get_info(struct sock *sk, struct tcp_info *info)
+{
+       const struct dccp_sock *dp = dccp_sk(sk);
+       const struct ccid3_hc_tx_sock *hctx = dp->dccps_hc_tx_ccid_private;
+
+       if (hctx == NULL)
+               return;
+
+       info->tcpi_rto = hctx->ccid3hctx_t_rto;
+       info->tcpi_rtt = hctx->ccid3hctx_rtt;
+}
+
+static struct ccid ccid3 = {
+       .ccid_id                   = 3,
+       .ccid_name                 = "ccid3",
+       .ccid_owner                = THIS_MODULE,
+       .ccid_init                 = ccid3_init,
+       .ccid_exit                 = ccid3_exit,
+       .ccid_hc_tx_init           = ccid3_hc_tx_init,
+       .ccid_hc_tx_exit           = ccid3_hc_tx_exit,
+       .ccid_hc_tx_send_packet    = ccid3_hc_tx_send_packet,
+       .ccid_hc_tx_packet_sent    = ccid3_hc_tx_packet_sent,
+       .ccid_hc_tx_packet_recv    = ccid3_hc_tx_packet_recv,
+       .ccid_hc_tx_insert_options = ccid3_hc_tx_insert_options,
+       .ccid_hc_tx_parse_options  = ccid3_hc_tx_parse_options,
+       .ccid_hc_rx_init           = ccid3_hc_rx_init,
+       .ccid_hc_rx_exit           = ccid3_hc_rx_exit,
+       .ccid_hc_rx_insert_options = ccid3_hc_rx_insert_options,
+       .ccid_hc_rx_packet_recv    = ccid3_hc_rx_packet_recv,
+       .ccid_hc_rx_get_info       = ccid3_hc_rx_get_info,
+       .ccid_hc_tx_get_info       = ccid3_hc_tx_get_info,
+};
+ 
+module_param(ccid3_debug, int, 0444);
+MODULE_PARM_DESC(ccid3_debug, "Enable debug messages");
+
+static __init int ccid3_module_init(void)
+{
+       int rc = -ENOBUFS;
+
+       ccid3_rx_hist = dccp_rx_hist_new("ccid3");
+       if (ccid3_rx_hist == NULL)
+               goto out;
+
+       ccid3_tx_hist = dccp_tx_hist_new("ccid3");
+       if (ccid3_tx_hist == NULL)
+               goto out_free_rx;
+
+       ccid3_li_hist = dccp_li_hist_new("ccid3");
+       if (ccid3_li_hist == NULL)
+               goto out_free_tx;
+
+       rc = ccid_register(&ccid3);
+       if (rc != 0) 
+               goto out_free_loss_interval_history;
+out:
+       return rc;
+
+out_free_loss_interval_history:
+       dccp_li_hist_delete(ccid3_li_hist);
+       ccid3_li_hist = NULL;
+out_free_tx:
+       dccp_tx_hist_delete(ccid3_tx_hist);
+       ccid3_tx_hist = NULL;
+out_free_rx:
+       dccp_rx_hist_delete(ccid3_rx_hist);
+       ccid3_rx_hist = NULL;
+       goto out;
+}
+module_init(ccid3_module_init);
+
+static __exit void ccid3_module_exit(void)
+{
+#ifdef CONFIG_IP_DCCP_UNLOAD_HACK
+       /*
+        * Hack to use while developing, so that we get rid of the control
+        * sock, that is what keeps a refcount on dccp.ko -acme
+        */
+       extern void dccp_ctl_sock_exit(void);
+
+       dccp_ctl_sock_exit();
+#endif
+       ccid_unregister(&ccid3);
+
+       if (ccid3_tx_hist != NULL) {
+               dccp_tx_hist_delete(ccid3_tx_hist);
+               ccid3_tx_hist = NULL;
+       }
+       if (ccid3_rx_hist != NULL) {
+               dccp_rx_hist_delete(ccid3_rx_hist);
+               ccid3_rx_hist = NULL;
+       }
+       if (ccid3_li_hist != NULL) {
+               dccp_li_hist_delete(ccid3_li_hist);
+               ccid3_li_hist = NULL;
+       }
+}
+module_exit(ccid3_module_exit);
+
+MODULE_AUTHOR("Ian McDonald <iam4@cs.waikato.ac.nz>, "
+             "Arnaldo Carvalho de Melo <acme@ghostprotocols.net>");
+MODULE_DESCRIPTION("DCCP TFRC CCID3 CCID");
+MODULE_LICENSE("GPL");
+MODULE_ALIAS("net-dccp-ccid-3");
diff --git a/net/dccp/ccids/ccid3.h b/net/dccp/ccids/ccid3.h

new file mode 100644 (file)

index 0000000..ee8cbac
--- /dev/null
+++ b/net/dccp/ccids/ccid3.h
@@ -0,0 +1,137 @@
+/*
+ *  net/dccp/ccids/ccid3.h
+ *
+ *  Copyright (c) 2005 The University of Waikato, Hamilton, New Zealand.
+ *
+ *  An implementation of the DCCP protocol
+ *
+ *  This code has been developed by the University of Waikato WAND
+ *  research group. For further information please see http://www.wand.net.nz/
+ *  or e-mail Ian McDonald - iam4@cs.waikato.ac.nz
+ *
+ *  This code also uses code from Lulea University, rereleased as GPL by its
+ *  authors:
+ *  Copyright (c) 2003 Nils-Erik Mattsson, Joacim Haggmark, Magnus Erixzon
+ *
+ *  Changes to meet Linux coding standards, to make it meet latest ccid3 draft
+ *  and to make it work as a loadable module in the DCCP stack written by
+ *  Arnaldo Carvalho de Melo <acme@conectiva.com.br>.
+ *
+ *  Copyright (c) 2005 Arnaldo Carvalho de Melo <acme@conectiva.com.br>
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2 of the License, or
+ *  (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; if not, write to the Free Software
+ *  Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+#ifndef _DCCP_CCID3_H_
+#define _DCCP_CCID3_H_
+
+#include <linux/config.h>
+#include <linux/list.h>
+#include <linux/time.h>
+#include <linux/types.h>
+
+#define TFRC_MIN_PACKET_SIZE      16
+#define TFRC_STD_PACKET_SIZE     256
+#define TFRC_MAX_PACKET_SIZE   65535
+
+/* Two seconds as per CCID3 spec */
+#define TFRC_INITIAL_TIMEOUT      (2 * USEC_PER_SEC)
+
+/* In usecs - half the scheduling granularity as per RFC3448 4.6 */
+#define TFRC_OPSYS_HALF_TIME_GRAN  (USEC_PER_SEC / (2 * HZ))
+
+/* In seconds */
+#define TFRC_MAX_BACK_OFF_TIME    64
+
+#define TFRC_SMALLEST_P                   40
+
+enum ccid3_options {
+       TFRC_OPT_LOSS_EVENT_RATE = 192,
+       TFRC_OPT_LOSS_INTERVALS  = 193,
+       TFRC_OPT_RECEIVE_RATE    = 194,
+};
+
+struct ccid3_options_received {
+       u64 ccid3or_seqno:48,
+           ccid3or_loss_intervals_idx:16;
+       u16 ccid3or_loss_intervals_len;
+       u32 ccid3or_loss_event_rate;
+       u32 ccid3or_receive_rate;
+};
+
+/** struct ccid3_hc_tx_sock - CCID3 sender half connection sock
+ *
+  * @ccid3hctx_state - Sender state
+  * @ccid3hctx_x - Current sending rate
+  * @ccid3hctx_x_recv - Receive rate
+  * @ccid3hctx_x_calc - Calculated send (?) rate
+  * @ccid3hctx_s - Packet size
+  * @ccid3hctx_rtt - Estimate of current round trip time in usecs
+  * @@ccid3hctx_p - Current loss event rate (0-1) scaled by 1000000
+  * @ccid3hctx_last_win_count - Last window counter sent
+  * @ccid3hctx_t_last_win_count - Timestamp of earliest packet
+  *                              with last_win_count value sent
+  * @ccid3hctx_no_feedback_timer - Handle to no feedback timer
+  * @ccid3hctx_idle - FIXME
+  * @ccid3hctx_t_ld - Time last doubled during slow start
+  * @ccid3hctx_t_nom - Nominal send time of next packet
+  * @ccid3hctx_t_ipi - Interpacket (send) interval
+  * @ccid3hctx_delta - Send timer delta
+  * @ccid3hctx_hist - Packet history
+  */
+struct ccid3_hc_tx_sock {
+       u32                             ccid3hctx_x;
+       u32                             ccid3hctx_x_recv;
+       u32                             ccid3hctx_x_calc;
+       u16                             ccid3hctx_s;
+       u32                             ccid3hctx_rtt;
+       u32                             ccid3hctx_p;
+       u8                              ccid3hctx_state;
+       u8                              ccid3hctx_last_win_count;
+       u8                              ccid3hctx_idle;
+       struct timeval                  ccid3hctx_t_last_win_count;
+       struct timer_list               ccid3hctx_no_feedback_timer;
+       struct timeval                  ccid3hctx_t_ld;
+       struct timeval                  ccid3hctx_t_nom;
+       u32                             ccid3hctx_t_rto;
+       u32                             ccid3hctx_t_ipi;
+       u32                             ccid3hctx_delta;
+       struct list_head                ccid3hctx_hist;
+       struct ccid3_options_received   ccid3hctx_options_received;
+};
+
+struct ccid3_hc_rx_sock {
+       u64                     ccid3hcrx_seqno_last_counter:48,
+                               ccid3hcrx_state:8,
+                               ccid3hcrx_last_counter:4;
+       unsigned long           ccid3hcrx_rtt;
+       u32                     ccid3hcrx_p;
+       u32                     ccid3hcrx_bytes_recv;
+       struct timeval          ccid3hcrx_tstamp_last_feedback;
+       struct timeval          ccid3hcrx_tstamp_last_ack;
+       struct list_head        ccid3hcrx_hist;
+       struct list_head        ccid3hcrx_li_hist;
+       u16                     ccid3hcrx_s;
+       u32                     ccid3hcrx_pinv;
+       u32                     ccid3hcrx_elapsed_time;
+       u32                     ccid3hcrx_x_recv;
+};
+
+#define ccid3_hc_tx_field(s,field) (s->dccps_hc_tx_ccid_private == NULL ? 0 : \
+    ((struct ccid3_hc_tx_sock *)s->dccps_hc_tx_ccid_private)->ccid3hctx_##field)
+
+#define ccid3_hc_rx_field(s,field) (s->dccps_hc_rx_ccid_private == NULL ? 0 : \
+    ((struct ccid3_hc_rx_sock *)s->dccps_hc_rx_ccid_private)->ccid3hcrx_##field)
+
+#endif /* _DCCP_CCID3_H_ */
diff --git a/net/dccp/ccids/lib/Makefile b/net/dccp/ccids/lib/Makefile

new file mode 100644 (file)

index 0000000..5f940a6
--- /dev/null
+++ b/net/dccp/ccids/lib/Makefile
@@ -0,0 +1,3 @@
+obj-$(CONFIG_IP_DCCP_TFRC_LIB) += dccp_tfrc_lib.o
+
+dccp_tfrc_lib-y := loss_interval.o packet_history.o tfrc_equation.o
diff --git a/net/dccp/ccids/lib/loss_interval.c b/net/dccp/ccids/lib/loss_interval.c

new file mode 100644 (file)

index 0000000..4c01a54
--- /dev/null
+++ b/net/dccp/ccids/lib/loss_interval.c
@@ -0,0 +1,144 @@
+/*
+ *  net/dccp/ccids/lib/loss_interval.c
+ *
+ *  Copyright (c) 2005 The University of Waikato, Hamilton, New Zealand.
+ *  Copyright (c) 2005 Ian McDonald <iam4@cs.waikato.ac.nz>
+ *  Copyright (c) 2005 Arnaldo Carvalho de Melo <acme@conectiva.com.br>
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2 of the License, or
+ *  (at your option) any later version.
+ */
+
+#include <linux/config.h>
+#include <linux/module.h>
+
+#include "loss_interval.h"
+
+struct dccp_li_hist *dccp_li_hist_new(const char *name)
+{
+       struct dccp_li_hist *hist = kmalloc(sizeof(*hist), GFP_ATOMIC);
+       static const char dccp_li_hist_mask[] = "li_hist_%s";
+       char *slab_name;
+
+       if (hist == NULL)
+               goto out;
+
+       slab_name = kmalloc(strlen(name) + sizeof(dccp_li_hist_mask) - 1,
+                           GFP_ATOMIC);
+       if (slab_name == NULL)
+               goto out_free_hist;
+
+       sprintf(slab_name, dccp_li_hist_mask, name);
+       hist->dccplih_slab = kmem_cache_create(slab_name,
+                                            sizeof(struct dccp_li_hist_entry),
+                                              0, SLAB_HWCACHE_ALIGN,
+                                              NULL, NULL);
+       if (hist->dccplih_slab == NULL)
+               goto out_free_slab_name;
+out:
+       return hist;
+out_free_slab_name:
+       kfree(slab_name);
+out_free_hist:
+       kfree(hist);
+       hist = NULL;
+       goto out;
+}
+
+EXPORT_SYMBOL_GPL(dccp_li_hist_new);
+
+void dccp_li_hist_delete(struct dccp_li_hist *hist)
+{
+       const char* name = kmem_cache_name(hist->dccplih_slab);
+
+       kmem_cache_destroy(hist->dccplih_slab);
+       kfree(name);
+       kfree(hist);
+}
+
+EXPORT_SYMBOL_GPL(dccp_li_hist_delete);
+
+void dccp_li_hist_purge(struct dccp_li_hist *hist, struct list_head *list)
+{
+       struct dccp_li_hist_entry *entry, *next;
+
+       list_for_each_entry_safe(entry, next, list, dccplih_node) {
+               list_del_init(&entry->dccplih_node);
+               kmem_cache_free(hist->dccplih_slab, entry);
+       }
+}
+
+EXPORT_SYMBOL_GPL(dccp_li_hist_purge);
+
+/* Weights used to calculate loss event rate */
+/*
+ * These are integers as per section 8 of RFC3448. We can then divide by 4 *
+ * when we use it.
+ */
+static const int dccp_li_hist_w[DCCP_LI_HIST_IVAL_F_LENGTH] = {
+       4, 4, 4, 4, 3, 2, 1, 1,
+};
+
+u32 dccp_li_hist_calc_i_mean(struct list_head *list)
+{
+       struct dccp_li_hist_entry *li_entry, *li_next;
+       int i = 0;
+       u32 i_tot;
+       u32 i_tot0 = 0;
+       u32 i_tot1 = 0;
+       u32 w_tot  = 0;
+
+       list_for_each_entry_safe(li_entry, li_next, list, dccplih_node) {
+               if (i < DCCP_LI_HIST_IVAL_F_LENGTH) {
+                       i_tot0 += li_entry->dccplih_interval * dccp_li_hist_w[i];
+                       w_tot  += dccp_li_hist_w[i];
+               }
+
+               if (i != 0)
+                       i_tot1 += li_entry->dccplih_interval * dccp_li_hist_w[i - 1];
+
+               if (++i > DCCP_LI_HIST_IVAL_F_LENGTH)
+                       break;
+       }
+
+       if (i != DCCP_LI_HIST_IVAL_F_LENGTH)
+               return 0;
+
+       i_tot = max(i_tot0, i_tot1);
+
+       /* FIXME: Why do we do this? -Ian McDonald */
+       if (i_tot * 4 < w_tot)
+               i_tot = w_tot * 4;
+
+       return i_tot * 4 / w_tot;
+}
+
+EXPORT_SYMBOL_GPL(dccp_li_hist_calc_i_mean);
+
+struct dccp_li_hist_entry *dccp_li_hist_interval_new(struct dccp_li_hist *hist,
+                                                    struct list_head *list,
+                                                    const u64 seq_loss,
+                                                    const u8 win_loss)
+{
+       struct dccp_li_hist_entry *tail = NULL, *entry;
+       int i;
+
+       for (i = 0; i <= DCCP_LI_HIST_IVAL_F_LENGTH; ++i) {
+               entry = dccp_li_hist_entry_new(hist, SLAB_ATOMIC);
+               if (entry == NULL) {
+                       dccp_li_hist_purge(hist, list);
+                       return NULL;
+               }
+               if (tail == NULL)
+                       tail = entry;
+               list_add(&entry->dccplih_node, list);
+       }
+
+       entry->dccplih_seqno     = seq_loss;
+       entry->dccplih_win_count = win_loss;
+       return tail;
+}
+
+EXPORT_SYMBOL_GPL(dccp_li_hist_interval_new);
diff --git a/net/dccp/ccids/lib/loss_interval.h b/net/dccp/ccids/lib/loss_interval.h

new file mode 100644 (file)

index 0000000..13ad47b
--- /dev/null
+++ b/net/dccp/ccids/lib/loss_interval.h
@@ -0,0 +1,61 @@
+#ifndef _DCCP_LI_HIST_
+#define _DCCP_LI_HIST_
+/*
+ *  net/dccp/ccids/lib/loss_interval.h
+ *
+ *  Copyright (c) 2005 The University of Waikato, Hamilton, New Zealand.
+ *  Copyright (c) 2005 Ian McDonald <iam4@cs.waikato.ac.nz>
+ *  Copyright (c) 2005 Arnaldo Carvalho de Melo <acme@conectiva.com.br>
+ *
+ *  This program is free software; you can redistribute it and/or modify it
+ *  under the terms of the GNU General Public License as published by the Free
+ *  Software Foundation; either version 2 of the License, or (at your option)
+ *  any later version.
+ */
+
+#include <linux/config.h>
+#include <linux/list.h>
+#include <linux/slab.h>
+#include <linux/time.h>
+
+#define DCCP_LI_HIST_IVAL_F_LENGTH  8
+
+struct dccp_li_hist {
+       kmem_cache_t *dccplih_slab;
+};
+
+extern struct dccp_li_hist *dccp_li_hist_new(const char *name);
+extern void dccp_li_hist_delete(struct dccp_li_hist *hist);
+
+struct dccp_li_hist_entry {
+       struct list_head dccplih_node;
+       u64              dccplih_seqno:48,
+                        dccplih_win_count:4;
+       u32              dccplih_interval;
+};
+
+static inline struct dccp_li_hist_entry *
+               dccp_li_hist_entry_new(struct dccp_li_hist *hist,
+                                      const unsigned int __nocast prio)
+{
+       return kmem_cache_alloc(hist->dccplih_slab, prio);
+}
+
+static inline void dccp_li_hist_entry_delete(struct dccp_li_hist *hist,
+                                            struct dccp_li_hist_entry *entry)
+{
+       if (entry != NULL)
+               kmem_cache_free(hist->dccplih_slab, entry);
+}
+
+extern void dccp_li_hist_purge(struct dccp_li_hist *hist,
+                              struct list_head *list);
+
+extern u32 dccp_li_hist_calc_i_mean(struct list_head *list);
+
+extern struct dccp_li_hist_entry *
+                       dccp_li_hist_interval_new(struct dccp_li_hist *hist,
+                                                 struct list_head *list,
+                                                 const u64 seq_loss,
+                                                 const u8 win_loss);
+#endif /* _DCCP_LI_HIST_ */
diff --git a/net/dccp/ccids/lib/packet_history.c b/net/dccp/ccids/lib/packet_history.c

new file mode 100644 (file)

index 0000000..d3f9d20
--- /dev/null
+++ b/net/dccp/ccids/lib/packet_history.c
@@ -0,0 +1,398 @@
+/*
+ *  net/dccp/packet_history.h
+ *
+ *  Copyright (c) 2005 The University of Waikato, Hamilton, New Zealand.
+ *
+ *  An implementation of the DCCP protocol
+ *
+ *  This code has been developed by the University of Waikato WAND
+ *  research group. For further information please see http://www.wand.net.nz/
+ *  or e-mail Ian McDonald - iam4@cs.waikato.ac.nz
+ *
+ *  This code also uses code from Lulea University, rereleased as GPL by its
+ *  authors:
+ *  Copyright (c) 2003 Nils-Erik Mattsson, Joacim Haggmark, Magnus Erixzon
+ *
+ *  Changes to meet Linux coding standards, to make it meet latest ccid3 draft
+ *  and to make it work as a loadable module in the DCCP stack written by
+ *  Arnaldo Carvalho de Melo <acme@conectiva.com.br>.
+ *
+ *  Copyright (c) 2005 Arnaldo Carvalho de Melo <acme@conectiva.com.br>
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2 of the License, or
+ *  (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; if not, write to the Free Software
+ *  Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#include <linux/config.h>
+#include <linux/module.h>
+#include <linux/string.h>
+
+#include "packet_history.h"
+
+struct dccp_rx_hist *dccp_rx_hist_new(const char *name)
+{
+       struct dccp_rx_hist *hist = kmalloc(sizeof(*hist), GFP_ATOMIC);
+       static const char dccp_rx_hist_mask[] = "rx_hist_%s";
+       char *slab_name;
+
+       if (hist == NULL)
+               goto out;
+
+       slab_name = kmalloc(strlen(name) + sizeof(dccp_rx_hist_mask) - 1,
+                           GFP_ATOMIC);
+       if (slab_name == NULL)
+               goto out_free_hist;
+
+       sprintf(slab_name, dccp_rx_hist_mask, name);
+       hist->dccprxh_slab = kmem_cache_create(slab_name,
+                                            sizeof(struct dccp_rx_hist_entry),
+                                              0, SLAB_HWCACHE_ALIGN,
+                                              NULL, NULL);
+       if (hist->dccprxh_slab == NULL)
+               goto out_free_slab_name;
+out:
+       return hist;
+out_free_slab_name:
+       kfree(slab_name);
+out_free_hist:
+       kfree(hist);
+       hist = NULL;
+       goto out;
+}
+
+EXPORT_SYMBOL_GPL(dccp_rx_hist_new);
+
+void dccp_rx_hist_delete(struct dccp_rx_hist *hist)
+{
+       const char* name = kmem_cache_name(hist->dccprxh_slab);
+
+       kmem_cache_destroy(hist->dccprxh_slab);
+       kfree(name);
+       kfree(hist);
+}
+
+EXPORT_SYMBOL_GPL(dccp_rx_hist_delete);
+
+void dccp_rx_hist_purge(struct dccp_rx_hist *hist, struct list_head *list)
+{
+       struct dccp_rx_hist_entry *entry, *next;
+
+       list_for_each_entry_safe(entry, next, list, dccphrx_node) {
+               list_del_init(&entry->dccphrx_node);
+               kmem_cache_free(hist->dccprxh_slab, entry);
+       }
+}
+
+EXPORT_SYMBOL_GPL(dccp_rx_hist_purge);
+
+struct dccp_rx_hist_entry *
+               dccp_rx_hist_find_data_packet(const struct list_head *list)
+{
+       struct dccp_rx_hist_entry *entry, *packet = NULL;
+
+       list_for_each_entry(entry, list, dccphrx_node)
+               if (entry->dccphrx_type == DCCP_PKT_DATA ||
+                   entry->dccphrx_type == DCCP_PKT_DATAACK) {
+                       packet = entry;
+                       break;
+               }
+
+       return packet;
+}
+
+EXPORT_SYMBOL_GPL(dccp_rx_hist_find_data_packet);
+
+int dccp_rx_hist_add_packet(struct dccp_rx_hist *hist,
+                           struct list_head *rx_list,
+                           struct list_head *li_list,
+                           struct dccp_rx_hist_entry *packet)
+{
+       struct dccp_rx_hist_entry *entry, *next, *iter;
+       u8 num_later = 0;
+
+       iter = dccp_rx_hist_head(rx_list);
+       if (iter == NULL)
+               dccp_rx_hist_add_entry(rx_list, packet);
+       else {
+               const u64 seqno = packet->dccphrx_seqno;
+
+               if (after48(seqno, iter->dccphrx_seqno))
+                       dccp_rx_hist_add_entry(rx_list, packet);
+               else {
+                       if (dccp_rx_hist_entry_data_packet(iter))
+                               num_later = 1;
+
+                       list_for_each_entry_continue(iter, rx_list,
+                                                    dccphrx_node) {
+                               if (after48(seqno, iter->dccphrx_seqno)) {
+                                       dccp_rx_hist_add_entry(&iter->dccphrx_node,
+                                                              packet);
+                                       goto trim_history;
+                               }
+
+                               if (dccp_rx_hist_entry_data_packet(iter))
+                                       num_later++;
+
+                               if (num_later == TFRC_RECV_NUM_LATE_LOSS) {
+                                       dccp_rx_hist_entry_delete(hist, packet);
+                                       return 1;
+                               }
+                       }
+
+                       if (num_later < TFRC_RECV_NUM_LATE_LOSS)
+                               dccp_rx_hist_add_entry(rx_list, packet);
+                       /*
+                        * FIXME: else what? should we destroy the packet
+                        * like above?
+                        */
+               }
+       }
+
+trim_history:
+       /*
+        * Trim history (remove all packets after the NUM_LATE_LOSS + 1
+        * data packets)
+        */
+       num_later = TFRC_RECV_NUM_LATE_LOSS + 1;
+
+       if (!list_empty(li_list)) {
+               list_for_each_entry_safe(entry, next, rx_list, dccphrx_node) {
+                       if (num_later == 0) {
+                               list_del_init(&entry->dccphrx_node);
+                               dccp_rx_hist_entry_delete(hist, entry);
+                       } else if (dccp_rx_hist_entry_data_packet(entry))
+                               --num_later;
+               }
+       } else {
+               int step = 0;
+               u8 win_count = 0; /* Not needed, but lets shut up gcc */
+               int tmp;
+               /*
+                * We have no loss interval history so we need at least one
+                * rtt:s of data packets to approximate rtt.
+                */
+               list_for_each_entry_safe(entry, next, rx_list, dccphrx_node) {
+                       if (num_later == 0) {
+                               switch (step) {
+                               case 0:
+                                       step = 1;
+                                       /* OK, find next data packet */
+                                       num_later = 1;
+                                       break;
+                               case 1:
+                                       step = 2;
+                                       /* OK, find next data packet */
+                                       num_later = 1;
+                                       win_count = entry->dccphrx_ccval;
+                                       break;
+                               case 2:
+                                       tmp = win_count - entry->dccphrx_ccval;
+                                       if (tmp < 0)
+                                               tmp += TFRC_WIN_COUNT_LIMIT;
+                                       if (tmp > TFRC_WIN_COUNT_PER_RTT + 1) {
+                                               /*
+                                                * We have found a packet older
+                                                * than one rtt remove the rest
+                                                */
+                                               step = 3;
+                                       } else /* OK, find next data packet */
+                                               num_later = 1;
+                                       break;
+                               case 3:
+                                       list_del_init(&entry->dccphrx_node);
+                                       dccp_rx_hist_entry_delete(hist, entry);
+                                       break;
+                               }
+                       } else if (dccp_rx_hist_entry_data_packet(entry))
+                               --num_later;
+               }
+       }
+
+       return 0;
+}
+
+EXPORT_SYMBOL_GPL(dccp_rx_hist_add_packet);
+
+u64 dccp_rx_hist_detect_loss(struct list_head *rx_list,
+                            struct list_head *li_list, u8 *win_loss)
+{
+       struct dccp_rx_hist_entry *entry, *next, *packet;
+       struct dccp_rx_hist_entry *a_loss = NULL;
+       struct dccp_rx_hist_entry *b_loss = NULL;
+       u64 seq_loss = DCCP_MAX_SEQNO + 1;
+       u8 num_later = TFRC_RECV_NUM_LATE_LOSS;
+
+       list_for_each_entry_safe(entry, next, rx_list, dccphrx_node) {
+               if (num_later == 0) {
+                       b_loss = entry;
+                       break;
+               } else if (dccp_rx_hist_entry_data_packet(entry))
+                       --num_later;
+       }
+
+       if (b_loss == NULL)
+               goto out;
+
+       num_later = 1;
+       list_for_each_entry_safe_continue(entry, next, rx_list, dccphrx_node) {
+               if (num_later == 0) {
+                       a_loss = entry;
+                       break;
+               } else if (dccp_rx_hist_entry_data_packet(entry))
+                       --num_later;
+       }
+
+       if (a_loss == NULL) {
+               if (list_empty(li_list)) {
+                       /* no loss event have occured yet */
+                       LIMIT_NETDEBUG("%s: TODO: find a lost data packet by "
+                                      "comparing to initial seqno\n",
+                                      __FUNCTION__);
+                       goto out;
+               } else {
+                       LIMIT_NETDEBUG("%s: Less than 4 data pkts in history!",
+                                      __FUNCTION__);
+                       goto out;
+               }
+       }
+
+       /* Locate a lost data packet */
+       entry = packet = b_loss;
+       list_for_each_entry_safe_continue(entry, next, rx_list, dccphrx_node) {
+               u64 delta = dccp_delta_seqno(entry->dccphrx_seqno,
+                                            packet->dccphrx_seqno);
+
+               if (delta != 0) {
+                       if (dccp_rx_hist_entry_data_packet(packet))
+                               --delta;
+                       /*
+                        * FIXME: check this, probably this % usage is because
+                        * in earlier drafts the ndp count was just 8 bits
+                        * long, but now it cam be up to 24 bits long.
+                        */
+#if 0
+                       if (delta % DCCP_NDP_LIMIT !=
+                           (packet->dccphrx_ndp -
+                            entry->dccphrx_ndp) % DCCP_NDP_LIMIT)
+#endif
+                       if (delta != packet->dccphrx_ndp - entry->dccphrx_ndp) {
+                               seq_loss = entry->dccphrx_seqno;
+                               dccp_inc_seqno(&seq_loss);
+                       }
+               }
+               packet = entry;
+               if (packet == a_loss)
+                       break;
+       }
+out:
+       if (seq_loss != DCCP_MAX_SEQNO + 1)
+               *win_loss = a_loss->dccphrx_ccval;
+       else
+               *win_loss = 0; /* Paranoia */
+
+       return seq_loss;
+}
+
+EXPORT_SYMBOL_GPL(dccp_rx_hist_detect_loss);
+
+struct dccp_tx_hist *dccp_tx_hist_new(const char *name)
+{
+       struct dccp_tx_hist *hist = kmalloc(sizeof(*hist), GFP_ATOMIC);
+       static const char dccp_tx_hist_mask[] = "tx_hist_%s";
+       char *slab_name;
+
+       if (hist == NULL)
+               goto out;
+
+       slab_name = kmalloc(strlen(name) + sizeof(dccp_tx_hist_mask) - 1,
+                           GFP_ATOMIC);
+       if (slab_name == NULL)
+               goto out_free_hist;
+
+       sprintf(slab_name, dccp_tx_hist_mask, name);
+       hist->dccptxh_slab = kmem_cache_create(slab_name,
+                                            sizeof(struct dccp_tx_hist_entry),
+                                              0, SLAB_HWCACHE_ALIGN,
+                                              NULL, NULL);
+       if (hist->dccptxh_slab == NULL)
+               goto out_free_slab_name;
+out:
+       return hist;
+out_free_slab_name:
+       kfree(slab_name);
+out_free_hist:
+       kfree(hist);
+       hist = NULL;
+       goto out;
+}
+
+EXPORT_SYMBOL_GPL(dccp_tx_hist_new);
+
+void dccp_tx_hist_delete(struct dccp_tx_hist *hist)
+{
+       const char* name = kmem_cache_name(hist->dccptxh_slab);
+
+       kmem_cache_destroy(hist->dccptxh_slab);
+       kfree(name);
+       kfree(hist);
+}
+
+EXPORT_SYMBOL_GPL(dccp_tx_hist_delete);
+
+struct dccp_tx_hist_entry *
+       dccp_tx_hist_find_entry(const struct list_head *list, const u64 seq)
+{
+       struct dccp_tx_hist_entry *packet = NULL, *entry;
+
+       list_for_each_entry(entry, list, dccphtx_node)
+               if (entry->dccphtx_seqno == seq) {
+                       packet = entry;
+                       break;
+               }
+
+       return packet;
+}
+
+EXPORT_SYMBOL_GPL(dccp_tx_hist_find_entry);
+
+void dccp_tx_hist_purge_older(struct dccp_tx_hist *hist,
+                             struct list_head *list,
+                             struct dccp_tx_hist_entry *packet)
+{
+       struct dccp_tx_hist_entry *next;
+
+       list_for_each_entry_safe_continue(packet, next, list, dccphtx_node) {
+               list_del_init(&packet->dccphtx_node);
+               dccp_tx_hist_entry_delete(hist, packet);
+       }
+}
+
+EXPORT_SYMBOL_GPL(dccp_tx_hist_purge_older);
+
+void dccp_tx_hist_purge(struct dccp_tx_hist *hist, struct list_head *list)
+{
+       struct dccp_tx_hist_entry *entry, *next;
+
+       list_for_each_entry_safe(entry, next, list, dccphtx_node) {
+               list_del_init(&entry->dccphtx_node);
+               dccp_tx_hist_entry_delete(hist, entry);
+       }
+}
+
+EXPORT_SYMBOL_GPL(dccp_tx_hist_purge);
+
+MODULE_AUTHOR("Ian McDonald <iam4@cs.waikato.ac.nz>, "
+             "Arnaldo Carvalho de Melo <acme@ghostprotocols.net>");
+MODULE_DESCRIPTION("DCCP TFRC library");
+MODULE_LICENSE("GPL");
diff --git a/net/dccp/ccids/lib/packet_history.h b/net/dccp/ccids/lib/packet_history.h

new file mode 100644 (file)

index 0000000..fb90a91
--- /dev/null
+++ b/net/dccp/ccids/lib/packet_history.h
@@ -0,0 +1,199 @@
+/*
+ *  net/dccp/packet_history.h
+ *
+ *  Copyright (c) 2005 The University of Waikato, Hamilton, New Zealand.
+ *
+ *  An implementation of the DCCP protocol
+ *
+ *  This code has been developed by the University of Waikato WAND
+ *  research group. For further information please see http://www.wand.net.nz/
+ *  or e-mail Ian McDonald - iam4@cs.waikato.ac.nz
+ *
+ *  This code also uses code from Lulea University, rereleased as GPL by its
+ *  authors:
+ *  Copyright (c) 2003 Nils-Erik Mattsson, Joacim Haggmark, Magnus Erixzon
+ *
+ *  Changes to meet Linux coding standards, to make it meet latest ccid3 draft
+ *  and to make it work as a loadable module in the DCCP stack written by
+ *  Arnaldo Carvalho de Melo <acme@conectiva.com.br>.
+ *
+ *  Copyright (c) 2005 Arnaldo Carvalho de Melo <acme@conectiva.com.br>
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2 of the License, or
+ *  (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; if not, write to the Free Software
+ *  Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#ifndef _DCCP_PKT_HIST_
+#define _DCCP_PKT_HIST_
+
+#include <linux/config.h>
+#include <linux/list.h>
+#include <linux/slab.h>
+#include <linux/time.h>
+
+#include "../../dccp.h"
+
+/* Number of later packets received before one is considered lost */
+#define TFRC_RECV_NUM_LATE_LOSS         3
+
+#define TFRC_WIN_COUNT_PER_RTT  4
+#define TFRC_WIN_COUNT_LIMIT   16
+
+struct dccp_tx_hist_entry {
+       struct list_head dccphtx_node;
+       u64              dccphtx_seqno:48,
+                        dccphtx_ccval:4,
+                        dccphtx_sent:1;
+       u32              dccphtx_rtt;
+       struct timeval   dccphtx_tstamp;
+};
+
+struct dccp_rx_hist_entry {
+       struct list_head dccphrx_node;
+       u64              dccphrx_seqno:48,
+                        dccphrx_ccval:4,
+                        dccphrx_type:4;
+       u32              dccphrx_ndp; /* In fact it is from 8 to 24 bits */
+       struct timeval   dccphrx_tstamp;
+};
+
+struct dccp_tx_hist {
+       kmem_cache_t *dccptxh_slab;
+};
+
+extern struct dccp_tx_hist *dccp_tx_hist_new(const char *name);
+extern void dccp_tx_hist_delete(struct dccp_tx_hist *hist);
+
+struct dccp_rx_hist {
+       kmem_cache_t *dccprxh_slab;
+};
+
+extern struct dccp_rx_hist *dccp_rx_hist_new(const char *name);
+extern void dccp_rx_hist_delete(struct dccp_rx_hist *hist);
+extern struct dccp_rx_hist_entry *
+               dccp_rx_hist_find_data_packet(const struct list_head *list);
+
+static inline struct dccp_tx_hist_entry *
+               dccp_tx_hist_entry_new(struct dccp_tx_hist *hist,
+                                      const unsigned int __nocast prio)
+{
+       struct dccp_tx_hist_entry *entry = kmem_cache_alloc(hist->dccptxh_slab,
+                                                           prio);
+
+       if (entry != NULL)
+               entry->dccphtx_sent = 0;
+
+       return entry;
+}
+
+static inline void dccp_tx_hist_entry_delete(struct dccp_tx_hist *hist,
+                                            struct dccp_tx_hist_entry *entry)
+{
+       if (entry != NULL)
+               kmem_cache_free(hist->dccptxh_slab, entry);
+}
+
+extern struct dccp_tx_hist_entry *
+                       dccp_tx_hist_find_entry(const struct list_head *list,
+                                               const u64 seq);
+
+static inline void dccp_tx_hist_add_entry(struct list_head *list,
+                                         struct dccp_tx_hist_entry *entry)
+{
+       list_add(&entry->dccphtx_node, list);
+}
+
+extern void dccp_tx_hist_purge_older(struct dccp_tx_hist *hist,
+                                    struct list_head *list,
+                                    struct dccp_tx_hist_entry *next);
+
+extern void dccp_tx_hist_purge(struct dccp_tx_hist *hist,
+                              struct list_head *list);
+
+static inline struct dccp_tx_hist_entry *
+               dccp_tx_hist_head(struct list_head *list)
+{
+       struct dccp_tx_hist_entry *head = NULL;
+
+       if (!list_empty(list))
+               head = list_entry(list->next, struct dccp_tx_hist_entry,
+                                 dccphtx_node);
+       return head;
+}
+
+static inline struct dccp_rx_hist_entry *
+                    dccp_rx_hist_entry_new(struct dccp_rx_hist *hist,
+                                           const u32 ndp, 
+                                           const struct sk_buff *skb,
+                                           const unsigned int __nocast prio)
+{
+       struct dccp_rx_hist_entry *entry = kmem_cache_alloc(hist->dccprxh_slab,
+                                                           prio);
+
+       if (entry != NULL) {
+               const struct dccp_hdr *dh = dccp_hdr(skb);
+
+               entry->dccphrx_seqno = DCCP_SKB_CB(skb)->dccpd_seq;
+               entry->dccphrx_ccval = dh->dccph_ccval;
+               entry->dccphrx_type  = dh->dccph_type;
+               entry->dccphrx_ndp   = ndp;
+               do_gettimeofday(&(entry->dccphrx_tstamp));
+       }
+
+       return entry;
+}
+
+static inline void dccp_rx_hist_entry_delete(struct dccp_rx_hist *hist,
+                                            struct dccp_rx_hist_entry *entry)
+{
+       if (entry != NULL)
+               kmem_cache_free(hist->dccprxh_slab, entry);
+}
+
+extern void dccp_rx_hist_purge(struct dccp_rx_hist *hist,
+                              struct list_head *list);
+
+static inline void dccp_rx_hist_add_entry(struct list_head *list,
+                                         struct dccp_rx_hist_entry *entry)
+{
+       list_add(&entry->dccphrx_node, list);
+}
+
+static inline struct dccp_rx_hist_entry *
+               dccp_rx_hist_head(struct list_head *list)
+{
+       struct dccp_rx_hist_entry *head = NULL;
+
+       if (!list_empty(list))
+               head = list_entry(list->next, struct dccp_rx_hist_entry,
+                                 dccphrx_node);
+       return head;
+}
+
+static inline int
+       dccp_rx_hist_entry_data_packet(const struct dccp_rx_hist_entry *entry)
+{
+       return entry->dccphrx_type == DCCP_PKT_DATA ||
+              entry->dccphrx_type == DCCP_PKT_DATAACK;
+}
+
+extern int dccp_rx_hist_add_packet(struct dccp_rx_hist *hist,
+                                  struct list_head *rx_list,
+                                  struct list_head *li_list,
+                                  struct dccp_rx_hist_entry *packet);
+
+extern u64 dccp_rx_hist_detect_loss(struct list_head *rx_list,
+                                   struct list_head *li_list, u8 *win_loss);
+
+#endif /* _DCCP_PKT_HIST_ */
diff --git a/net/dccp/ccids/lib/tfrc.h b/net/dccp/ccids/lib/tfrc.h

new file mode 100644 (file)

index 0000000..130c4c4
--- /dev/null
+++ b/net/dccp/ccids/lib/tfrc.h
@@ -0,0 +1,22 @@
+#ifndef _TFRC_H_
+#define _TFRC_H_
+/*
+ *  net/dccp/ccids/lib/tfrc.h
+ *
+ *  Copyright (c) 2005 The University of Waikato, Hamilton, New Zealand.
+ *  Copyright (c) 2005 Ian McDonald <iam4@cs.waikato.ac.nz>
+ *  Copyright (c) 2005 Arnaldo Carvalho de Melo <acme@conectiva.com.br>
+ *  Copyright (c) 2003 Nils-Erik Mattsson, Joacim Haggmark, Magnus Erixzon
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2 of the License, or
+ *  (at your option) any later version.
+ */
+
+#include <linux/types.h>
+
+extern u32 tfrc_calc_x(u16 s, u32 R, u32 p);
+extern u32 tfrc_calc_x_reverse_lookup(u32 fvalue);
+
+#endif /* _TFRC_H_ */
diff --git a/net/dccp/ccids/lib/tfrc_equation.c b/net/dccp/ccids/lib/tfrc_equation.c

new file mode 100644 (file)

index 0000000..d2b5933
--- /dev/null
+++ b/net/dccp/ccids/lib/tfrc_equation.c
@@ -0,0 +1,644 @@
+/*
+ *  net/dccp/ccids/lib/tfrc_equation.c
+ *
+ *  Copyright (c) 2005 The University of Waikato, Hamilton, New Zealand.
+ *  Copyright (c) 2005 Ian McDonald <iam4@cs.waikato.ac.nz>
+ *  Copyright (c) 2005 Arnaldo Carvalho de Melo <acme@conectiva.com.br>
+ *  Copyright (c) 2003 Nils-Erik Mattsson, Joacim Haggmark, Magnus Erixzon
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2 of the License, or
+ *  (at your option) any later version.
+ */
+
+#include <linux/config.h>
+#include <linux/module.h>
+
+#include <asm/bug.h>
+#include <asm/div64.h>
+
+#include "tfrc.h"
+
+#define TFRC_CALC_X_ARRSIZE 500
+
+#define TFRC_CALC_X_SPLIT 50000
+/* equivalent to 0.05 */
+
+static const u32 tfrc_calc_x_lookup[TFRC_CALC_X_ARRSIZE][2] = {
+       {     37172,   8172 },
+       {     53499,  11567 },
+       {     66664,  14180 },
+       {     78298,  16388 },
+       {     89021,  18339 },
+       {     99147,  20108 },
+       {    108858,  21738 },
+       {    118273,  23260 },
+       {    127474,  24693 },
+       {    136520,  26052 },
+       {    145456,  27348 },
+       {    154316,  28589 },
+       {    163130,  29783 },
+       {    171919,  30935 },
+       {    180704,  32049 },
+       {    189502,  33130 },
+       {    198328,  34180 },
+       {    207194,  35202 },
+       {    216114,  36198 },
+       {    225097,  37172 },
+       {    234153,  38123 },
+       {    243294,  39055 },
+       {    252527,  39968 },
+       {    261861,  40864 },
+       {    271305,  41743 },
+       {    280866,  42607 },
+       {    290553,  43457 },
+       {    300372,  44293 },
+       {    310333,  45117 },
+       {    320441,  45929 },
+       {    330705,  46729 },
+       {    341131,  47518 },
+       {    351728,  48297 },
+       {    362501,  49066 },
+       {    373460,  49826 },
+       {    384609,  50577 },
+       {    395958,  51320 },
+       {    407513,  52054 },
+       {    419281,  52780 },
+       {    431270,  53499 },
+       {    443487,  54211 },
+       {    455940,  54916 },
+       {    468635,  55614 },
+       {    481581,  56306 },
+       {    494785,  56991 },
+       {    508254,  57671 },
+       {    521996,  58345 },
+       {    536019,  59014 },
+       {    550331,  59677 },
+       {    564939,  60335 },
+       {    579851,  60988 },
+       {    595075,  61636 },
+       {    610619,  62279 },
+       {    626491,  62918 },
+       {    642700,  63553 },
+       {    659253,  64183 },
+       {    676158,  64809 },
+       {    693424,  65431 },
+       {    711060,  66050 },
+       {    729073,  66664 },
+       {    747472,  67275 },
+       {    766266,  67882 },
+       {    785464,  68486 },
+       {    805073,  69087 },
+       {    825103,  69684 },
+       {    845562,  70278 },
+       {    866460,  70868 },
+       {    887805,  71456 },
+       {    909606,  72041 },
+       {    931873,  72623 },
+       {    954614,  73202 },
+       {    977839,  73778 },
+       {   1001557,  74352 },
+       {   1025777,  74923 },
+       {   1050508,  75492 },
+       {   1075761,  76058 },
+       {   1101544,  76621 },
+       {   1127867,  77183 },
+       {   1154739,  77741 },
+       {   1182172,  78298 },
+       {   1210173,  78852 },
+       {   1238753,  79405 },
+       {   1267922,  79955 },
+       {   1297689,  80503 },
+       {   1328066,  81049 },
+       {   1359060,  81593 },
+       {   1390684,  82135 },
+       {   1422947,  82675 },
+       {   1455859,  83213 },
+       {   1489430,  83750 },
+       {   1523671,  84284 },
+       {   1558593,  84817 },
+       {   1594205,  85348 },
+       {   1630518,  85878 },
+       {   1667543,  86406 },
+       {   1705290,  86932 },
+       {   1743770,  87457 },
+       {   1782994,  87980 },
+       {   1822973,  88501 },
+       {   1863717,  89021 },
+       {   1905237,  89540 },
+       {   1947545,  90057 },
+       {   1990650,  90573 },
+       {   2034566,  91087 },
+       {   2079301,  91600 },
+       {   2124869,  92111 },
+       {   2171279,  92622 },
+       {   2218543,  93131 },
+       {   2266673,  93639 },
+       {   2315680,  94145 },
+       {   2365575,  94650 },
+       {   2416371,  95154 },
+       {   2468077,  95657 },
+       {   2520707,  96159 },
+       {   2574271,  96660 },
+       {   2628782,  97159 },
+       {   2684250,  97658 },
+       {   2740689,  98155 },
+       {   2798110,  98651 },
+       {   2856524,  99147 },
+       {   2915944,  99641 },
+       {   2976382, 100134 },
+       {   3037850, 100626 },
+       {   3100360, 101117 },
+       {   3163924, 101608 },
+       {   3228554, 102097 },
+       {   3294263, 102586 },
+       {   3361063, 103073 },
+       {   3428966, 103560 },
+       {   3497984, 104045 },
+       {   3568131, 104530 },
+       {   3639419, 105014 },
+       {   3711860, 105498 },
+       {   3785467, 105980 },
+       {   3860253, 106462 },
+       {   3936229, 106942 },
+       {   4013410, 107422 },
+       {   4091808, 107902 },
+       {   4171435, 108380 },
+       {   4252306, 108858 },
+       {   4334431, 109335 },
+       {   4417825, 109811 },
+       {   4502501, 110287 },
+       {   4588472, 110762 },
+       {   4675750, 111236 },
+       {   4764349, 111709 },
+       {   4854283, 112182 },
+       {   4945564, 112654 },
+       {   5038206, 113126 },
+       {   5132223, 113597 },
+       {   5227627, 114067 },
+       {   5324432, 114537 },
+       {   5422652, 115006 },
+       {   5522299, 115474 },
+       {   5623389, 115942 },
+       {   5725934, 116409 },
+       {   5829948, 116876 },
+       {   5935446, 117342 },
+       {   6042439, 117808 },
+       {   6150943, 118273 },
+       {   6260972, 118738 },
+       {   6372538, 119202 },
+       {   6485657, 119665 },
+       {   6600342, 120128 },
+       {   6716607, 120591 },
+       {   6834467, 121053 },
+       {   6953935, 121514 },
+       {   7075025, 121976 },
+       {   7197752, 122436 },
+       {   7322131, 122896 },
+       {   7448175, 123356 },
+       {   7575898, 123815 },
+       {   7705316, 124274 },
+       {   7836442, 124733 },
+       {   7969291, 125191 },
+       {   8103877, 125648 },
+       {   8240216, 126105 },
+       {   8378321, 126562 },
+       {   8518208, 127018 },
+       {   8659890, 127474 },
+       {   8803384, 127930 },
+       {   8948702, 128385 },
+       {   9095861, 128840 },
+       {   9244875, 129294 },
+       {   9395760, 129748 },
+       {   9548529, 130202 },
+       {   9703198, 130655 },
+       {   9859782, 131108 },
+       {  10018296, 131561 },
+       {  10178755, 132014 },
+       {  10341174, 132466 },
+       {  10505569, 132917 },
+       {  10671954, 133369 },
+       {  10840345, 133820 },
+       {  11010757, 134271 },
+       {  11183206, 134721 },
+       {  11357706, 135171 },
+       {  11534274, 135621 },
+       {  11712924, 136071 },
+       {  11893673, 136520 },
+       {  12076536, 136969 },
+       {  12261527, 137418 },
+       {  12448664, 137867 },
+       {  12637961, 138315 },
+       {  12829435, 138763 },
+       {  13023101, 139211 },
+       {  13218974, 139658 },
+       {  13417071, 140106 },
+       {  13617407, 140553 },
+       {  13819999, 140999 },
+       {  14024862, 141446 },
+       {  14232012, 141892 },
+       {  14441465, 142339 },
+       {  14653238, 142785 },
+       {  14867346, 143230 },
+       {  15083805, 143676 },
+       {  15302632, 144121 },
+       {  15523842, 144566 },
+       {  15747453, 145011 },
+       {  15973479, 145456 },
+       {  16201939, 145900 },
+       {  16432847, 146345 },
+       {  16666221, 146789 },
+       {  16902076, 147233 },
+       {  17140429, 147677 },
+       {  17381297, 148121 },
+       {  17624696, 148564 },
+       {  17870643, 149007 },
+       {  18119154, 149451 },
+       {  18370247, 149894 },
+       {  18623936, 150336 },
+       {  18880241, 150779 },
+       {  19139176, 151222 },
+       {  19400759, 151664 },
+       {  19665007, 152107 },
+       {  19931936, 152549 },
+       {  20201564, 152991 },
+       {  20473907, 153433 },
+       {  20748982, 153875 },
+       {  21026807, 154316 },
+       {  21307399, 154758 },
+       {  21590773, 155199 },
+       {  21876949, 155641 },
+       {  22165941, 156082 },
+       {  22457769, 156523 },
+       {  22752449, 156964 },
+       {  23049999, 157405 },
+       {  23350435, 157846 },
+       {  23653774, 158287 },
+       {  23960036, 158727 },
+       {  24269236, 159168 },
+       {  24581392, 159608 },
+       {  24896521, 160049 },
+       {  25214642, 160489 },
+       {  25535772, 160929 },
+       {  25859927, 161370 },
+       {  26187127, 161810 },
+       {  26517388, 162250 },
+       {  26850728, 162690 },
+       {  27187165, 163130 },
+       {  27526716, 163569 },
+       {  27869400, 164009 },
+       {  28215234, 164449 },
+       {  28564236, 164889 },
+       {  28916423, 165328 },
+       {  29271815, 165768 },
+       {  29630428, 166208 },
+       {  29992281, 166647 },
+       {  30357392, 167087 },
+       {  30725779, 167526 },
+       {  31097459, 167965 },
+       {  31472452, 168405 },
+       {  31850774, 168844 },
+       {  32232445, 169283 },
+       {  32617482, 169723 },
+       {  33005904, 170162 },
+       {  33397730, 170601 },
+       {  33792976, 171041 },
+       {  34191663, 171480 },
+       {  34593807, 171919 },
+       {  34999428, 172358 },
+       {  35408544, 172797 },
+       {  35821174, 173237 },
+       {  36237335, 173676 },
+       {  36657047, 174115 },
+       {  37080329, 174554 },
+       {  37507197, 174993 },
+       {  37937673, 175433 },
+       {  38371773, 175872 },
+       {  38809517, 176311 },
+       {  39250924, 176750 },
+       {  39696012, 177190 },
+       {  40144800, 177629 },
+       {  40597308, 178068 },
+       {  41053553, 178507 },
+       {  41513554, 178947 },
+       {  41977332, 179386 },
+       {  42444904, 179825 },
+       {  42916290, 180265 },
+       {  43391509, 180704 },
+       {  43870579, 181144 },
+       {  44353520, 181583 },
+       {  44840352, 182023 },
+       {  45331092, 182462 },
+       {  45825761, 182902 },
+       {  46324378, 183342 },
+       {  46826961, 183781 },
+       {  47333531, 184221 },
+       {  47844106, 184661 },
+       {  48358706, 185101 },
+       {  48877350, 185541 },
+       {  49400058, 185981 },
+       {  49926849, 186421 },
+       {  50457743, 186861 },
+       {  50992759, 187301 },
+       {  51531916, 187741 },
+       {  52075235, 188181 },
+       {  52622735, 188622 },
+       {  53174435, 189062 },
+       {  53730355, 189502 },
+       {  54290515, 189943 },
+       {  54854935, 190383 },
+       {  55423634, 190824 },
+       {  55996633, 191265 },
+       {  56573950, 191706 },
+       {  57155606, 192146 },
+       {  57741621, 192587 },
+       {  58332014, 193028 },
+       {  58926806, 193470 },
+       {  59526017, 193911 },
+       {  60129666, 194352 },
+       {  60737774, 194793 },
+       {  61350361, 195235 },
+       {  61967446, 195677 },
+       {  62589050, 196118 },
+       {  63215194, 196560 },
+       {  63845897, 197002 },
+       {  64481179, 197444 },
+       {  65121061, 197886 },
+       {  65765563, 198328 },
+       {  66414705, 198770 },
+       {  67068508, 199213 },
+       {  67726992, 199655 },
+       {  68390177, 200098 },
+       {  69058085, 200540 },
+       {  69730735, 200983 },
+       {  70408147, 201426 },
+       {  71090343, 201869 },
+       {  71777343, 202312 },
+       {  72469168, 202755 },
+       {  73165837, 203199 },
+       {  73867373, 203642 },
+       {  74573795, 204086 },
+       {  75285124, 204529 },
+       {  76001380, 204973 },
+       {  76722586, 205417 },
+       {  77448761, 205861 },
+       {  78179926, 206306 },
+       {  78916102, 206750 },
+       {  79657310, 207194 },
+       {  80403571, 207639 },
+       {  81154906, 208084 },
+       {  81911335, 208529 },
+       {  82672880, 208974 },
+       {  83439562, 209419 },
+       {  84211402, 209864 },
+       {  84988421, 210309 },
+       {  85770640, 210755 },
+       {  86558080, 211201 },
+       {  87350762, 211647 },
+       {  88148708, 212093 },
+       {  88951938, 212539 },
+       {  89760475, 212985 },
+       {  90574339, 213432 },
+       {  91393551, 213878 },
+       {  92218133, 214325 },
+       {  93048107, 214772 },
+       {  93883493, 215219 },
+       {  94724314, 215666 },
+       {  95570590, 216114 },
+       {  96422343, 216561 },
+       {  97279594, 217009 },
+       {  98142366, 217457 },
+       {  99010679, 217905 },
+       {  99884556, 218353 },
+       { 100764018, 218801 },
+       { 101649086, 219250 },
+       { 102539782, 219698 },
+       { 103436128, 220147 },
+       { 104338146, 220596 },
+       { 105245857, 221046 },
+       { 106159284, 221495 },
+       { 107078448, 221945 },
+       { 108003370, 222394 },
+       { 108934074, 222844 },
+       { 109870580, 223294 },
+       { 110812910, 223745 },
+       { 111761087, 224195 },
+       { 112715133, 224646 },
+       { 113675069, 225097 },
+       { 114640918, 225548 },
+       { 115612702, 225999 },
+       { 116590442, 226450 },
+       { 117574162, 226902 },
+       { 118563882, 227353 },
+       { 119559626, 227805 },
+       { 120561415, 228258 },
+       { 121569272, 228710 },
+       { 122583219, 229162 },
+       { 123603278, 229615 },
+       { 124629471, 230068 },
+       { 125661822, 230521 },
+       { 126700352, 230974 },
+       { 127745083, 231428 },
+       { 128796039, 231882 },
+       { 129853241, 232336 },
+       { 130916713, 232790 },
+       { 131986475, 233244 },
+       { 133062553, 233699 },
+       { 134144966, 234153 },
+       { 135233739, 234608 },
+       { 136328894, 235064 },
+       { 137430453, 235519 },
+       { 138538440, 235975 },
+       { 139652876, 236430 },
+       { 140773786, 236886 },
+       { 141901190, 237343 },
+       { 143035113, 237799 },
+       { 144175576, 238256 },
+       { 145322604, 238713 },
+       { 146476218, 239170 },
+       { 147636442, 239627 },
+       { 148803298, 240085 },
+       { 149976809, 240542 },
+       { 151156999, 241000 },
+       { 152343890, 241459 },
+       { 153537506, 241917 },
+       { 154737869, 242376 },
+       { 155945002, 242835 },
+       { 157158929, 243294 },
+       { 158379673, 243753 },
+       { 159607257, 244213 },
+       { 160841704, 244673 },
+       { 162083037, 245133 },
+       { 163331279, 245593 },
+       { 164586455, 246054 },
+       { 165848586, 246514 },
+       { 167117696, 246975 },
+       { 168393810, 247437 },
+       { 169676949, 247898 },
+       { 170967138, 248360 },
+       { 172264399, 248822 },
+       { 173568757, 249284 },
+       { 174880235, 249747 },
+       { 176198856, 250209 },
+       { 177524643, 250672 },
+       { 178857621, 251136 },
+       { 180197813, 251599 },
+       { 181545242, 252063 },
+       { 182899933, 252527 },
+       { 184261908, 252991 },
+       { 185631191, 253456 },
+       { 187007807, 253920 },
+       { 188391778, 254385 },
+       { 189783129, 254851 },
+       { 191181884, 255316 },
+       { 192588065, 255782 },
+       { 194001698, 256248 },
+       { 195422805, 256714 },
+       { 196851411, 257181 },
+       { 198287540, 257648 },
+       { 199731215, 258115 },
+       { 201182461, 258582 },
+       { 202641302, 259050 },
+       { 204107760, 259518 },
+       { 205581862, 259986 },
+       { 207063630, 260454 },
+       { 208553088, 260923 },
+       { 210050262, 261392 },
+       { 211555174, 261861 },
+       { 213067849, 262331 },
+       { 214588312, 262800 },
+       { 216116586, 263270 },
+       { 217652696, 263741 },
+       { 219196666, 264211 },
+       { 220748520, 264682 },
+       { 222308282, 265153 },
+       { 223875978, 265625 },
+       { 225451630, 266097 },
+       { 227035265, 266569 },
+       { 228626905, 267041 },
+       { 230226576, 267514 },
+       { 231834302, 267986 },
+       { 233450107, 268460 },
+       { 235074016, 268933 },
+       { 236706054, 269407 },
+       { 238346244, 269881 },
+       { 239994613, 270355 },
+       { 241651183, 270830 },
+       { 243315981, 271305 }
+};
+
+/* Calculate the send rate as per section 3.1 of RFC3448
+ 
+Returns send rate in bytes per second
+
+Integer maths and lookups are used as not allowed floating point in kernel
+
+The function for Xcalc as per section 3.1 of RFC3448 is:
+
+X =                            s
+     -------------------------------------------------------------
+     R*sqrt(2*b*p/3) + (t_RTO * (3*sqrt(3*b*p/8) * p * (1+32*p^2)))
+
+where 
+X is the trasmit rate in bytes/second
+s is the packet size in bytes
+R is the round trip time in seconds
+p is the loss event rate, between 0 and 1.0, of the number of loss events 
+  as a fraction of the number of packets transmitted
+t_RTO is the TCP retransmission timeout value in seconds
+b is the number of packets acknowledged by a single TCP acknowledgement
+
+we can assume that b = 1 and t_RTO is 4 * R. With this the equation becomes:
+
+X =                            s
+     -----------------------------------------------------------------------
+     R * sqrt(2 * p / 3) + (12 * R * (sqrt(3 * p / 8) * p * (1 + 32 * p^2)))
+
+
+which we can break down into:
+
+X =     s
+     --------
+     R * f(p)
+
+where f(p) = sqrt(2 * p / 3) + (12 * sqrt(3 * p / 8) * p * (1 + 32 * p * p))
+
+Function parameters:
+s - bytes
+R - RTT in usecs
+p - loss rate (decimal fraction multiplied by 1,000,000)
+
+Returns Xcalc in bytes per second
+
+DON'T alter this code unless you run test cases against it as the code
+has been manipulated to stop underflow/overlow.
+
+*/
+u32 tfrc_calc_x(u16 s, u32 R, u32 p)
+{
+       int index;
+       u32 f;
+       u64 tmp1, tmp2;
+
+       if (p < TFRC_CALC_X_SPLIT)
+               index = (p / (TFRC_CALC_X_SPLIT / TFRC_CALC_X_ARRSIZE)) - 1;
+       else
+               index = (p / (1000000 / TFRC_CALC_X_ARRSIZE)) - 1;
+
+       if (index < 0)
+               /* p should be 0 unless there is a bug in my code */
+               index = 0;
+
+       if (R == 0)
+               R = 1; /* RTT can't be zero or else divide by zero */
+
+       BUG_ON(index >= TFRC_CALC_X_ARRSIZE);
+
+       if (p >= TFRC_CALC_X_SPLIT)
+               f = tfrc_calc_x_lookup[index][0];
+       else
+               f = tfrc_calc_x_lookup[index][1];
+
+       tmp1 = ((u64)s * 100000000);
+       tmp2 = ((u64)R * (u64)f);
+       do_div(tmp2, 10000);
+       do_div(tmp1, tmp2); 
+       /* Don't alter above math unless you test due to overflow on 32 bit */
+
+       return (u32)tmp1; 
+}
+
+EXPORT_SYMBOL_GPL(tfrc_calc_x);
+
+/*
+ * args: fvalue - function value to match
+ * returns: p closest to that value
+ *
+ * both fvalue and p are multiplied by 1,000,000 to use ints
+ */
+u32 tfrc_calc_x_reverse_lookup(u32 fvalue)
+{
+       int ctr = 0;
+       int small;
+
+       if (fvalue < tfrc_calc_x_lookup[0][1])
+               return 0;
+
+       if (fvalue <= tfrc_calc_x_lookup[TFRC_CALC_X_ARRSIZE - 1][1])
+               small = 1;
+       else if (fvalue > tfrc_calc_x_lookup[TFRC_CALC_X_ARRSIZE - 1][0])
+               return 1000000;
+       else
+               small = 0;
+
+       while (fvalue > tfrc_calc_x_lookup[ctr][small])
+               ctr++;
+
+       if (small)
+               return TFRC_CALC_X_SPLIT * ctr / TFRC_CALC_X_ARRSIZE;
+       else
+               return 1000000 * ctr / TFRC_CALC_X_ARRSIZE;
+}
+
+EXPORT_SYMBOL_GPL(tfrc_calc_x_reverse_lookup);
diff --git a/net/dccp/dccp.h b/net/dccp/dccp.h

new file mode 100644 (file)

index 0000000..33456c0
--- /dev/null
+++ b/net/dccp/dccp.h
@@ -0,0 +1,493 @@
+#ifndef _DCCP_H
+#define _DCCP_H
+/*
+ *  net/dccp/dccp.h
+ *
+ *  An implementation of the DCCP protocol
+ *  Copyright (c) 2005 Arnaldo Carvalho de Melo <acme@conectiva.com.br>
+ *  Copyright (c) 2005 Ian McDonald <iam4@cs.waikato.ac.nz>
+ *
+ *     This program is free software; you can redistribute it and/or modify it
+ *     under the terms of the GNU General Public License version 2 as
+ *     published by the Free Software Foundation.
+ */
+
+#include <linux/config.h>
+#include <linux/dccp.h>
+#include <net/snmp.h>
+#include <net/sock.h>
+#include <net/tcp.h>
+
+#ifdef CONFIG_IP_DCCP_DEBUG
+extern int dccp_debug;
+
+#define dccp_pr_debug(format, a...) \
+       do { if (dccp_debug) \
+               printk(KERN_DEBUG "%s: " format, __FUNCTION__ , ##a); \
+       } while (0)
+#define dccp_pr_debug_cat(format, a...) do { if (dccp_debug) \
+                                            printk(format, ##a); } while (0)
+#else
+#define dccp_pr_debug(format, a...)
+#define dccp_pr_debug_cat(format, a...)
+#endif
+
+extern struct inet_hashinfo dccp_hashinfo;
+
+extern atomic_t dccp_orphan_count;
+extern int dccp_tw_count;
+extern void dccp_tw_deschedule(struct inet_timewait_sock *tw);
+
+extern void dccp_time_wait(struct sock *sk, int state, int timeo);
+
+/* FIXME: Right size this */
+#define DCCP_MAX_OPT_LEN 128
+
+#define DCCP_MAX_PACKET_HDR 32
+
+#define MAX_DCCP_HEADER  (DCCP_MAX_PACKET_HDR + DCCP_MAX_OPT_LEN + MAX_HEADER)
+
+#define DCCP_TIMEWAIT_LEN (60 * HZ) /* how long to wait to destroy TIME-WAIT
+                                    * state, about 60 seconds */
+
+/* draft-ietf-dccp-spec-11.txt initial RTO value */
+#define DCCP_TIMEOUT_INIT ((unsigned)(3 * HZ))
+
+/* Maximal interval between probes for local resources.  */
+#define DCCP_RESOURCE_PROBE_INTERVAL ((unsigned)(HZ / 2U))
+
+#define DCCP_RTO_MAX ((unsigned)(120 * HZ)) /* FIXME: using TCP value */
+
+extern struct proto dccp_v4_prot;
+
+/* is seq1 < seq2 ? */
+static inline int before48(const u64 seq1, const u64 seq2)
+{
+       return (s64)((seq1 << 16) - (seq2 << 16)) < 0;
+}
+
+/* is seq1 > seq2 ? */
+static inline int after48(const u64 seq1, const u64 seq2)
+{
+       return (s64)((seq2 << 16) - (seq1 << 16)) < 0;
+}
+
+/* is seq2 <= seq1 <= seq3 ? */
+static inline int between48(const u64 seq1, const u64 seq2, const u64 seq3)
+{
+       return (seq3 << 16) - (seq2 << 16) >= (seq1 << 16) - (seq2 << 16);
+}
+
+static inline u64 max48(const u64 seq1, const u64 seq2)
+{
+       return after48(seq1, seq2) ? seq1 : seq2;
+}
+
+enum {
+       DCCP_MIB_NUM = 0,
+       DCCP_MIB_ACTIVEOPENS,                   /* ActiveOpens */
+       DCCP_MIB_ESTABRESETS,                   /* EstabResets */
+       DCCP_MIB_CURRESTAB,                     /* CurrEstab */
+       DCCP_MIB_OUTSEGS,                       /* OutSegs */ 
+       DCCP_MIB_OUTRSTS,
+       DCCP_MIB_ABORTONTIMEOUT,
+       DCCP_MIB_TIMEOUTS,
+       DCCP_MIB_ABORTFAILED,
+       DCCP_MIB_PASSIVEOPENS,
+       DCCP_MIB_ATTEMPTFAILS,
+       DCCP_MIB_OUTDATAGRAMS,
+       DCCP_MIB_INERRS,
+       DCCP_MIB_OPTMANDATORYERROR,
+       DCCP_MIB_INVALIDOPT,
+       __DCCP_MIB_MAX
+};
+
+#define DCCP_MIB_MAX   __DCCP_MIB_MAX
+struct dccp_mib {
+       unsigned long   mibs[DCCP_MIB_MAX];
+} __SNMP_MIB_ALIGN__;
+
+DECLARE_SNMP_STAT(struct dccp_mib, dccp_statistics);
+#define DCCP_INC_STATS(field)      SNMP_INC_STATS(dccp_statistics, field)
+#define DCCP_INC_STATS_BH(field)    SNMP_INC_STATS_BH(dccp_statistics, field)
+#define DCCP_INC_STATS_USER(field)  SNMP_INC_STATS_USER(dccp_statistics, field)
+#define DCCP_DEC_STATS(field)      SNMP_DEC_STATS(dccp_statistics, field)
+#define DCCP_ADD_STATS_BH(field, val) \
+                       SNMP_ADD_STATS_BH(dccp_statistics, field, val)
+#define DCCP_ADD_STATS_USER(field, val)        \
+                       SNMP_ADD_STATS_USER(dccp_statistics, field, val)
+
+extern int  dccp_transmit_skb(struct sock *sk, struct sk_buff *skb);
+extern int  dccp_retransmit_skb(struct sock *sk, struct sk_buff *skb);
+
+extern int dccp_send_response(struct sock *sk);
+extern void dccp_send_ack(struct sock *sk);
+extern void dccp_send_delayed_ack(struct sock *sk);
+extern void dccp_send_sync(struct sock *sk, const u64 seq,
+                          const enum dccp_pkt_type pkt_type);
+
+extern int dccp_write_xmit(struct sock *sk, struct sk_buff *skb, long *timeo);
+extern void dccp_write_space(struct sock *sk);
+
+extern void dccp_init_xmit_timers(struct sock *sk);
+static inline void dccp_clear_xmit_timers(struct sock *sk)
+{
+       inet_csk_clear_xmit_timers(sk);
+}
+
+extern unsigned int dccp_sync_mss(struct sock *sk, u32 pmtu);
+
+extern const char *dccp_packet_name(const int type);
+extern const char *dccp_state_name(const int state);
+
+static inline void dccp_set_state(struct sock *sk, const int state)
+{
+       const int oldstate = sk->sk_state;
+
+       dccp_pr_debug("%s(%p) %-10.10s -> %s\n",
+                     dccp_role(sk), sk,
+                     dccp_state_name(oldstate), dccp_state_name(state));
+       WARN_ON(state == oldstate);
+
+       switch (state) {
+       case DCCP_OPEN:
+               if (oldstate != DCCP_OPEN)
+                       DCCP_INC_STATS(DCCP_MIB_CURRESTAB);
+               break;
+
+       case DCCP_CLOSED:
+               if (oldstate == DCCP_CLOSING || oldstate == DCCP_OPEN)
+                       DCCP_INC_STATS(DCCP_MIB_ESTABRESETS);
+
+               sk->sk_prot->unhash(sk);
+               if (inet_csk(sk)->icsk_bind_hash != NULL &&
+                   !(sk->sk_userlocks & SOCK_BINDPORT_LOCK))
+                       inet_put_port(&dccp_hashinfo, sk);
+               /* fall through */
+       default:
+               if (oldstate == DCCP_OPEN)
+                       DCCP_DEC_STATS(DCCP_MIB_CURRESTAB);
+       }
+
+       /* Change state AFTER socket is unhashed to avoid closed
+        * socket sitting in hash tables.
+        */
+       sk->sk_state = state;
+}
+
+static inline void dccp_done(struct sock *sk)
+{
+       dccp_set_state(sk, DCCP_CLOSED);
+       dccp_clear_xmit_timers(sk);
+
+       sk->sk_shutdown = SHUTDOWN_MASK;
+
+       if (!sock_flag(sk, SOCK_DEAD))
+               sk->sk_state_change(sk);
+       else
+               inet_csk_destroy_sock(sk);
+}
+
+static inline void dccp_openreq_init(struct request_sock *req,
+                                    struct dccp_sock *dp,
+                                    struct sk_buff *skb)
+{
+       /*
+        * FIXME: fill in the other req fields from the DCCP options
+        * received
+        */
+       inet_rsk(req)->rmt_port = dccp_hdr(skb)->dccph_sport;
+       inet_rsk(req)->acked    = 0;
+       req->rcv_wnd = 0;
+}
+
+extern int dccp_v4_conn_request(struct sock *sk, struct sk_buff *skb);
+
+extern struct sock *dccp_create_openreq_child(struct sock *sk,
+                                             const struct request_sock *req,
+                                             const struct sk_buff *skb);
+
+extern int dccp_v4_do_rcv(struct sock *sk, struct sk_buff *skb);
+
+extern void dccp_v4_err(struct sk_buff *skb, u32);
+
+extern int dccp_v4_rcv(struct sk_buff *skb);
+
+extern struct sock *dccp_v4_request_recv_sock(struct sock *sk,
+                                             struct sk_buff *skb,
+                                             struct request_sock *req,
+                                             struct dst_entry *dst);
+extern struct sock *dccp_check_req(struct sock *sk, struct sk_buff *skb,
+                                  struct request_sock *req,
+                                  struct request_sock **prev);
+
+extern int dccp_child_process(struct sock *parent, struct sock *child,
+                             struct sk_buff *skb);
+extern int dccp_rcv_state_process(struct sock *sk, struct sk_buff *skb,
+                                 struct dccp_hdr *dh, unsigned len);
+extern int dccp_rcv_established(struct sock *sk, struct sk_buff *skb,
+                               const struct dccp_hdr *dh, const unsigned len);
+
+extern void            dccp_close(struct sock *sk, long timeout);
+extern struct sk_buff  *dccp_make_response(struct sock *sk,
+                                           struct dst_entry *dst,
+                                           struct request_sock *req);
+extern struct sk_buff  *dccp_make_reset(struct sock *sk,
+                                        struct dst_entry *dst,
+                                        enum dccp_reset_codes code);
+
+extern int        dccp_connect(struct sock *sk);
+extern int        dccp_disconnect(struct sock *sk, int flags);
+extern int        dccp_getsockopt(struct sock *sk, int level, int optname,
+                                  char __user *optval, int __user *optlen);
+extern int        dccp_setsockopt(struct sock *sk, int level, int optname,
+                                  char __user *optval, int optlen);
+extern int        dccp_ioctl(struct sock *sk, int cmd, unsigned long arg);
+extern int        dccp_sendmsg(struct kiocb *iocb, struct sock *sk,
+                               struct msghdr *msg, size_t size);
+extern int        dccp_recvmsg(struct kiocb *iocb, struct sock *sk,
+                               struct msghdr *msg, size_t len, int nonblock,
+                               int flags, int *addr_len);
+extern void       dccp_shutdown(struct sock *sk, int how);
+
+extern int        dccp_v4_checksum(const struct sk_buff *skb,
+                                   const u32 saddr, const u32 daddr);
+
+extern int        dccp_v4_send_reset(struct sock *sk,
+                                     enum dccp_reset_codes code);
+extern void       dccp_send_close(struct sock *sk, const int active);
+
+struct dccp_skb_cb {
+       __u8 dccpd_type;
+       __u8 dccpd_reset_code;
+       __u8 dccpd_service;
+       __u8 dccpd_ccval;
+       __u64 dccpd_seq;
+       __u64 dccpd_ack_seq;
+       int  dccpd_opt_len;
+};
+
+#define DCCP_SKB_CB(__skb) ((struct dccp_skb_cb *)&((__skb)->cb[0]))
+
+static inline int dccp_non_data_packet(const struct sk_buff *skb)
+{
+       const __u8 type = DCCP_SKB_CB(skb)->dccpd_type;
+
+       return type == DCCP_PKT_ACK      ||
+              type == DCCP_PKT_CLOSE    ||
+              type == DCCP_PKT_CLOSEREQ ||
+              type == DCCP_PKT_RESET    ||
+              type == DCCP_PKT_SYNC     ||
+              type == DCCP_PKT_SYNCACK;
+}
+
+static inline int dccp_packet_without_ack(const struct sk_buff *skb)
+{
+       const __u8 type = DCCP_SKB_CB(skb)->dccpd_type;
+
+       return type == DCCP_PKT_DATA || type == DCCP_PKT_REQUEST;
+}
+
+#define DCCP_MAX_SEQNO ((((u64)1) << 48) - 1)
+#define DCCP_PKT_WITHOUT_ACK_SEQ (DCCP_MAX_SEQNO << 2)
+
+static inline void dccp_set_seqno(u64 *seqno, u64 value)
+{
+       if (value > DCCP_MAX_SEQNO)
+               value -= DCCP_MAX_SEQNO + 1;
+       *seqno = value;
+}
+
+static inline u64 dccp_delta_seqno(u64 seqno1, u64 seqno2)
+{
+       return ((seqno2 << 16) - (seqno1 << 16)) >> 16;
+}
+
+static inline void dccp_inc_seqno(u64 *seqno)
+{
+       if (++*seqno > DCCP_MAX_SEQNO)
+               *seqno = 0;
+}
+
+static inline void dccp_hdr_set_seq(struct dccp_hdr *dh, const u64 gss)
+{
+       struct dccp_hdr_ext *dhx = (struct dccp_hdr_ext *)((void *)dh +
+                                                          sizeof(*dh));
+
+#if defined(__LITTLE_ENDIAN_BITFIELD)
+       dh->dccph_seq      = htonl((gss >> 32)) >> 8;
+#elif defined(__BIG_ENDIAN_BITFIELD)
+       dh->dccph_seq      = htonl((gss >> 32));
+#else
+#error  "Adjust your <asm/byteorder.h> defines"
+#endif
+       dhx->dccph_seq_low = htonl(gss & 0xffffffff);
+}
+
+static inline void dccp_hdr_set_ack(struct dccp_hdr_ack_bits *dhack,
+                                   const u64 gsr)
+{
+#if defined(__LITTLE_ENDIAN_BITFIELD)
+       dhack->dccph_ack_nr_high = htonl((gsr >> 32)) >> 8;
+#elif defined(__BIG_ENDIAN_BITFIELD)
+       dhack->dccph_ack_nr_high = htonl((gsr >> 32));
+#else
+#error  "Adjust your <asm/byteorder.h> defines"
+#endif
+       dhack->dccph_ack_nr_low  = htonl(gsr & 0xffffffff);
+}
+
+static inline void dccp_update_gsr(struct sock *sk, u64 seq)
+{
+       struct dccp_sock *dp = dccp_sk(sk);
+
+       dp->dccps_gsr = seq;
+       dccp_set_seqno(&dp->dccps_swl,
+                      (dp->dccps_gsr + 1 -
+                       (dp->dccps_options.dccpo_sequence_window / 4)));
+       dccp_set_seqno(&dp->dccps_swh,
+                      (dp->dccps_gsr +
+                       (3 * dp->dccps_options.dccpo_sequence_window) / 4));
+}
+
+static inline void dccp_update_gss(struct sock *sk, u64 seq)
+{
+       struct dccp_sock *dp = dccp_sk(sk);
+
+       dp->dccps_awh = dp->dccps_gss = seq;
+       dccp_set_seqno(&dp->dccps_awl,
+                      (dp->dccps_gss -
+                       dp->dccps_options.dccpo_sequence_window + 1));
+}
+
+extern void dccp_insert_options(struct sock *sk, struct sk_buff *skb);
+extern void dccp_insert_option_elapsed_time(struct sock *sk,
+                                           struct sk_buff *skb,
+                                           u32 elapsed_time);
+extern void dccp_insert_option_timestamp(struct sock *sk,
+                                        struct sk_buff *skb);
+extern void dccp_insert_option(struct sock *sk, struct sk_buff *skb,
+                              unsigned char option,
+                              const void *value, unsigned char len);
+
+extern struct socket *dccp_ctl_socket;
+
+#define DCCP_ACKPKTS_STATE_RECEIVED    0
+#define DCCP_ACKPKTS_STATE_ECN_MARKED  (1 << 6)
+#define DCCP_ACKPKTS_STATE_NOT_RECEIVED        (3 << 6)
+
+#define DCCP_ACKPKTS_STATE_MASK                0xC0 /* 11000000 */
+#define DCCP_ACKPKTS_LEN_MASK          0x3F /* 00111111 */
+
+/** struct dccp_ackpkts - acknowledgeable packets
+ *
+ * This data structure is the one defined in the DCCP draft
+ * Appendix A.
+ *
+ * @dccpap_buf_head - circular buffer head
+ * @dccpap_buf_tail - circular buffer tail
+ * @dccpap_buf_ackno - ack # of the most recent packet acknowledgeable in the
+ *                    buffer (i.e. %dccpap_buf_head)
+ * @dccpap_buf_nonce - the one-bit sum of the ECN Nonces on all packets acked
+ *                    by the buffer with State 0
+ *
+ * Additionally, the HC-Receiver must keep some information about the
+ * Ack Vectors it has recently sent. For each packet sent carrying an
+ * Ack Vector, it remembers four variables:
+ *
+ * @dccpap_ack_seqno - the Sequence Number used for the packet
+ *                    (HC-Receiver seqno)
+ * @dccpap_ack_ptr - the value of buf_head at the time of acknowledgement.
+ * @dccpap_ack_ackno - the Acknowledgement Number used for the packet
+ *                    (HC-Sender seqno)
+ * @dccpap_ack_nonce - the one-bit sum of the ECN Nonces for all State 0.
+ *
+ * @dccpap_buf_len - circular buffer length
+ * @dccpap_time                - the time in usecs
+ * @dccpap_buf - circular buffer of acknowledgeable packets
+ */
+struct dccp_ackpkts {
+       unsigned int            dccpap_buf_head;
+       unsigned int            dccpap_buf_tail;
+       u64                     dccpap_buf_ackno;
+       u64                     dccpap_ack_seqno;
+       u64                     dccpap_ack_ackno;
+       unsigned int            dccpap_ack_ptr;
+       unsigned int            dccpap_buf_vector_len;
+       unsigned int            dccpap_ack_vector_len;
+       unsigned int            dccpap_buf_len;
+       struct timeval          dccpap_time;
+       u8                      dccpap_buf_nonce;
+       u8                      dccpap_ack_nonce;
+       u8                      dccpap_buf[0];
+};
+
+extern struct dccp_ackpkts *
+               dccp_ackpkts_alloc(unsigned int len,
+                                 const unsigned int __nocast priority);
+extern void dccp_ackpkts_free(struct dccp_ackpkts *ap);
+extern int dccp_ackpkts_add(struct dccp_ackpkts *ap, u64 ackno, u8 state);
+extern void dccp_ackpkts_check_rcv_ackno(struct dccp_ackpkts *ap,
+                                        struct sock *sk, u64 ackno);
+
+static inline suseconds_t timeval_usecs(const struct timeval *tv)
+{
+       return tv->tv_sec * USEC_PER_SEC + tv->tv_usec;
+}
+
+static inline suseconds_t timeval_delta(const struct timeval *large,
+                                       const struct timeval *small)
+{
+       time_t      secs  = large->tv_sec  - small->tv_sec;
+       suseconds_t usecs = large->tv_usec - small->tv_usec;
+
+       if (usecs < 0) {
+               secs--;
+               usecs += USEC_PER_SEC;
+       }
+       return secs * USEC_PER_SEC + usecs;
+}
+
+static inline void timeval_add_usecs(struct timeval *tv,
+                                    const suseconds_t usecs)
+{
+       tv->tv_usec += usecs;
+       while (tv->tv_usec >= USEC_PER_SEC) {
+               tv->tv_sec++;
+               tv->tv_usec -= USEC_PER_SEC;
+       }
+}
+
+static inline void timeval_sub_usecs(struct timeval *tv,
+                                    const suseconds_t usecs)
+{
+       tv->tv_usec -= usecs;
+       while (tv->tv_usec < 0) {
+               tv->tv_sec--;
+               tv->tv_usec += USEC_PER_SEC;
+       }
+}
+
+/*
+ * Returns the difference in usecs between timeval
+ * passed in and current time
+ */
+static inline suseconds_t timeval_now_delta(const struct timeval *tv)
+{
+       struct timeval now;
+       do_gettimeofday(&now);
+       return timeval_delta(&now, tv);
+}
+
+#ifdef CONFIG_IP_DCCP_DEBUG
+extern void dccp_ackvector_print(const u64 ackno,
+                                const unsigned char *vector, int len);
+extern void dccp_ackpkts_print(const struct dccp_ackpkts *ap);
+#else
+static inline void dccp_ackvector_print(const u64 ackno,
+                                       const unsigned char *vector,
+                                       int len) { }
+static inline void dccp_ackpkts_print(const struct dccp_ackpkts *ap) { }
+#endif
+
+#endif /* _DCCP_H */
diff --git a/net/dccp/diag.c b/net/dccp/diag.c

new file mode 100644 (file)

index 0000000..f675d8e
--- /dev/null
+++ b/net/dccp/diag.c
@@ -0,0 +1,71 @@
+/*
+ *  net/dccp/diag.c
+ *
+ *  An implementation of the DCCP protocol
+ *  Arnaldo Carvalho de Melo <acme@mandriva.com>
+ *
+ *     This program is free software; you can redistribute it and/or modify it
+ *     under the terms of the GNU General Public License version 2 as
+ *     published by the Free Software Foundation.
+ */
+
+#include <linux/config.h>
+
+#include <linux/module.h>
+#include <linux/inet_diag.h>
+
+#include "ccid.h"
+#include "dccp.h"
+
+static void dccp_get_info(struct sock *sk, struct tcp_info *info)
+{
+       struct dccp_sock *dp = dccp_sk(sk);
+       const struct inet_connection_sock *icsk = inet_csk(sk);
+
+       memset(info, 0, sizeof(*info));
+
+       info->tcpi_state        = sk->sk_state;
+       info->tcpi_retransmits  = icsk->icsk_retransmits;
+       info->tcpi_probes       = icsk->icsk_probes_out;
+       info->tcpi_backoff      = icsk->icsk_backoff;
+       info->tcpi_pmtu         = dp->dccps_pmtu_cookie;
+
+       if (dp->dccps_options.dccpo_send_ack_vector)
+               info->tcpi_options |= TCPI_OPT_SACK;
+
+       ccid_hc_rx_get_info(dp->dccps_hc_rx_ccid, sk, info);
+       ccid_hc_tx_get_info(dp->dccps_hc_tx_ccid, sk, info);
+}
+
+static void dccp_diag_get_info(struct sock *sk, struct inet_diag_msg *r,
+                              void *_info)
+{
+       r->idiag_rqueue = r->idiag_wqueue = 0;
+
+       if (_info != NULL)
+               dccp_get_info(sk, _info);
+}
+
+static struct inet_diag_handler dccp_diag_handler = {
+       .idiag_hashinfo  = &dccp_hashinfo,
+       .idiag_get_info  = dccp_diag_get_info,
+       .idiag_type      = DCCPDIAG_GETSOCK,
+       .idiag_info_size = sizeof(struct tcp_info),
+};
+
+static int __init dccp_diag_init(void)
+{
+       return inet_diag_register(&dccp_diag_handler);
+}
+
+static void __exit dccp_diag_fini(void)
+{
+       inet_diag_unregister(&dccp_diag_handler);
+}
+
+module_init(dccp_diag_init);
+module_exit(dccp_diag_fini);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Arnaldo Carvalho de Melo <acme@mandriva.com>");
+MODULE_DESCRIPTION("DCCP inet_diag handler");
diff --git a/net/dccp/input.c b/net/dccp/input.c

new file mode 100644 (file)

index 0000000..ef29cef
--- /dev/null
+++ b/net/dccp/input.c
@@ -0,0 +1,600 @@
+/*
+ *  net/dccp/input.c
+ * 
+ *  An implementation of the DCCP protocol
+ *  Arnaldo Carvalho de Melo <acme@conectiva.com.br>
+ *
+ *     This program is free software; you can redistribute it and/or
+ *     modify it under the terms of the GNU General Public License
+ *     as published by the Free Software Foundation; either version
+ *     2 of the License, or (at your option) any later version.
+ */
+
+#include <linux/config.h>
+#include <linux/dccp.h>
+#include <linux/skbuff.h>
+
+#include <net/sock.h>
+
+#include "ccid.h"
+#include "dccp.h"
+
+static void dccp_fin(struct sock *sk, struct sk_buff *skb)
+{
+       sk->sk_shutdown |= RCV_SHUTDOWN;
+       sock_set_flag(sk, SOCK_DONE);
+       __skb_pull(skb, dccp_hdr(skb)->dccph_doff * 4);
+       __skb_queue_tail(&sk->sk_receive_queue, skb);
+       skb_set_owner_r(skb, sk);
+       sk->sk_data_ready(sk, 0);
+}
+
+static void dccp_rcv_close(struct sock *sk, struct sk_buff *skb)
+{
+       dccp_v4_send_reset(sk, DCCP_RESET_CODE_CLOSED);
+       dccp_fin(sk, skb);
+       dccp_set_state(sk, DCCP_CLOSED);
+       sk_wake_async(sk, 1, POLL_HUP);
+}
+
+static void dccp_rcv_closereq(struct sock *sk, struct sk_buff *skb)
+{
+       /*
+        *   Step 7: Check for unexpected packet types
+        *      If (S.is_server and P.type == CloseReq)
+        *        Send Sync packet acknowledging P.seqno
+        *        Drop packet and return
+        */
+       if (dccp_sk(sk)->dccps_role != DCCP_ROLE_CLIENT) {
+               dccp_send_sync(sk, DCCP_SKB_CB(skb)->dccpd_seq, DCCP_PKT_SYNC);
+               return;
+       }
+
+       dccp_set_state(sk, DCCP_CLOSING);
+       dccp_send_close(sk, 0);
+}
+
+static inline void dccp_event_ack_recv(struct sock *sk, struct sk_buff *skb)
+{
+       struct dccp_sock *dp = dccp_sk(sk);
+
+       if (dp->dccps_options.dccpo_send_ack_vector)
+               dccp_ackpkts_check_rcv_ackno(dp->dccps_hc_rx_ackpkts, sk,
+                                            DCCP_SKB_CB(skb)->dccpd_ack_seq);
+}
+
+static int dccp_check_seqno(struct sock *sk, struct sk_buff *skb)
+{
+       const struct dccp_hdr *dh = dccp_hdr(skb);
+       struct dccp_sock *dp = dccp_sk(sk);
+       u64 lswl, lawl;
+
+       /*
+        *   Step 5: Prepare sequence numbers for Sync
+        *     If P.type == Sync or P.type == SyncAck,
+        *        If S.AWL <= P.ackno <= S.AWH and P.seqno >= S.SWL,
+        *           / * P is valid, so update sequence number variables
+        *               accordingly.  After this update, P will pass the tests
+        *               in Step 6.  A SyncAck is generated if necessary in
+        *               Step 15 * /
+        *           Update S.GSR, S.SWL, S.SWH
+        *        Otherwise,
+        *           Drop packet and return
+        */
+       if (dh->dccph_type == DCCP_PKT_SYNC || 
+           dh->dccph_type == DCCP_PKT_SYNCACK) {
+               if (between48(DCCP_SKB_CB(skb)->dccpd_ack_seq,
+                             dp->dccps_awl, dp->dccps_awh) &&
+                   !before48(DCCP_SKB_CB(skb)->dccpd_seq, dp->dccps_swl))
+                       dccp_update_gsr(sk, DCCP_SKB_CB(skb)->dccpd_seq);
+               else
+                       return -1;
+       }
+       
+       /*
+        *   Step 6: Check sequence numbers
+        *      Let LSWL = S.SWL and LAWL = S.AWL
+        *      If P.type == CloseReq or P.type == Close or P.type == Reset,
+        *        LSWL := S.GSR + 1, LAWL := S.GAR
+        *      If LSWL <= P.seqno <= S.SWH
+        *           and (P.ackno does not exist or LAWL <= P.ackno <= S.AWH),
+        *        Update S.GSR, S.SWL, S.SWH
+        *        If P.type != Sync,
+        *           Update S.GAR
+        *      Otherwise,
+        *        Send Sync packet acknowledging P.seqno
+        *        Drop packet and return
+        */
+       lswl = dp->dccps_swl;
+       lawl = dp->dccps_awl;
+
+       if (dh->dccph_type == DCCP_PKT_CLOSEREQ ||
+           dh->dccph_type == DCCP_PKT_CLOSE ||
+           dh->dccph_type == DCCP_PKT_RESET) {
+               lswl = dp->dccps_gsr;
+               dccp_inc_seqno(&lswl);
+               lawl = dp->dccps_gar;
+       }
+
+       if (between48(DCCP_SKB_CB(skb)->dccpd_seq, lswl, dp->dccps_swh) &&
+           (DCCP_SKB_CB(skb)->dccpd_ack_seq == DCCP_PKT_WITHOUT_ACK_SEQ ||
+            between48(DCCP_SKB_CB(skb)->dccpd_ack_seq,
+                      lawl, dp->dccps_awh))) {
+               dccp_update_gsr(sk, DCCP_SKB_CB(skb)->dccpd_seq);
+
+               if (dh->dccph_type != DCCP_PKT_SYNC &&
+                   (DCCP_SKB_CB(skb)->dccpd_ack_seq !=
+                    DCCP_PKT_WITHOUT_ACK_SEQ))
+                       dp->dccps_gar = DCCP_SKB_CB(skb)->dccpd_ack_seq;
+       } else {
+               LIMIT_NETDEBUG(KERN_WARNING "DCCP: Step 6 failed for %s packet, "
+                                           "(LSWL(%llu) <= P.seqno(%llu) <= S.SWH(%llu)) and "
+                                           "(P.ackno %s or LAWL(%llu) <= P.ackno(%llu) <= S.AWH(%llu), "
+                                           "sending SYNC...\n",
+                              dccp_packet_name(dh->dccph_type),
+                              (unsigned long long) lswl,
+                              (unsigned long long)
+                              DCCP_SKB_CB(skb)->dccpd_seq,
+                              (unsigned long long) dp->dccps_swh,
+                              (DCCP_SKB_CB(skb)->dccpd_ack_seq ==
+                               DCCP_PKT_WITHOUT_ACK_SEQ) ? "doesn't exist" : "exists",
+                              (unsigned long long) lawl,
+                              (unsigned long long)
+                              DCCP_SKB_CB(skb)->dccpd_ack_seq,
+                              (unsigned long long) dp->dccps_awh);
+               dccp_send_sync(sk, DCCP_SKB_CB(skb)->dccpd_seq, DCCP_PKT_SYNC);
+               return -1;
+       }
+
+       return 0;
+}
+
+int dccp_rcv_established(struct sock *sk, struct sk_buff *skb,
+                        const struct dccp_hdr *dh, const unsigned len)
+{
+       struct dccp_sock *dp = dccp_sk(sk);
+
+       if (dccp_check_seqno(sk, skb))
+               goto discard;
+
+       if (dccp_parse_options(sk, skb))
+               goto discard;
+
+       if (DCCP_SKB_CB(skb)->dccpd_ack_seq != DCCP_PKT_WITHOUT_ACK_SEQ)
+               dccp_event_ack_recv(sk, skb);
+
+       /*
+        * FIXME: check ECN to see if we should use
+        * DCCP_ACKPKTS_STATE_ECN_MARKED
+        */
+       if (dp->dccps_options.dccpo_send_ack_vector) {
+               struct dccp_ackpkts *ap = dp->dccps_hc_rx_ackpkts;
+
+               if (dccp_ackpkts_add(dp->dccps_hc_rx_ackpkts,
+                                    DCCP_SKB_CB(skb)->dccpd_seq,
+                                    DCCP_ACKPKTS_STATE_RECEIVED)) {
+                       LIMIT_NETDEBUG(KERN_WARNING "DCCP: acknowledgeable "
+                                                   "packets buffer full!\n");
+                       ap->dccpap_ack_seqno = DCCP_MAX_SEQNO + 1;
+                       inet_csk_schedule_ack(sk);
+                       inet_csk_reset_xmit_timer(sk, ICSK_TIME_DACK,
+                                                 TCP_DELACK_MIN,
+                                                 DCCP_RTO_MAX);
+                       goto discard;
+               }
+
+               /*
+                * FIXME: this activation is probably wrong, have to study more
+                * TCP delack machinery and how it fits into DCCP draft, but
+                * for now it kinda "works" 8)
+                */
+               if (!inet_csk_ack_scheduled(sk)) {
+                       inet_csk_schedule_ack(sk);
+                       inet_csk_reset_xmit_timer(sk, ICSK_TIME_DACK, 5 * HZ,
+                                                 DCCP_RTO_MAX);
+               }
+       }
+
+       ccid_hc_rx_packet_recv(dp->dccps_hc_rx_ccid, sk, skb);
+       ccid_hc_tx_packet_recv(dp->dccps_hc_tx_ccid, sk, skb);
+
+       switch (dccp_hdr(skb)->dccph_type) {
+       case DCCP_PKT_DATAACK:
+       case DCCP_PKT_DATA:
+               /*
+                * FIXME: check if sk_receive_queue is full, schedule DATA_DROPPED
+                * option if it is.
+                */
+               __skb_pull(skb, dh->dccph_doff * 4);
+               __skb_queue_tail(&sk->sk_receive_queue, skb);
+               skb_set_owner_r(skb, sk);
+               sk->sk_data_ready(sk, 0);
+               return 0;
+       case DCCP_PKT_ACK:
+               goto discard;
+       case DCCP_PKT_RESET:
+               /*
+                *  Step 9: Process Reset
+                *      If P.type == Reset,
+                *              Tear down connection
+                *              S.state := TIMEWAIT
+                *              Set TIMEWAIT timer
+                *              Drop packet and return
+               */
+               dccp_fin(sk, skb);
+               dccp_time_wait(sk, DCCP_TIME_WAIT, 0);
+               return 0;
+       case DCCP_PKT_CLOSEREQ:
+               dccp_rcv_closereq(sk, skb);
+               goto discard;
+       case DCCP_PKT_CLOSE:
+               dccp_rcv_close(sk, skb);
+               return 0;
+       case DCCP_PKT_REQUEST:
+               /* Step 7 
+                *   or (S.is_server and P.type == Response)
+                *   or (S.is_client and P.type == Request)
+                *   or (S.state >= OPEN and P.type == Request
+                *      and P.seqno >= S.OSR)
+                *    or (S.state >= OPEN and P.type == Response
+                *      and P.seqno >= S.OSR)
+                *    or (S.state == RESPOND and P.type == Data),
+                *  Send Sync packet acknowledging P.seqno
+                *  Drop packet and return
+                */
+               if (dp->dccps_role != DCCP_ROLE_LISTEN)
+                       goto send_sync;
+               goto check_seq;
+       case DCCP_PKT_RESPONSE:
+               if (dp->dccps_role != DCCP_ROLE_CLIENT)
+                       goto send_sync;
+check_seq:
+               if (!before48(DCCP_SKB_CB(skb)->dccpd_seq, dp->dccps_osr)) {
+send_sync:
+                       dccp_send_sync(sk, DCCP_SKB_CB(skb)->dccpd_seq,
+                                      DCCP_PKT_SYNC);
+               }
+               break;
+       case DCCP_PKT_SYNC:
+               dccp_send_sync(sk, DCCP_SKB_CB(skb)->dccpd_seq,
+                              DCCP_PKT_SYNCACK);
+               /*
+                * From the draft:
+                *
+                * As with DCCP-Ack packets, DCCP-Sync and DCCP-SyncAck packets
+                * MAY have non-zero-length application data areas, whose
+                * contents * receivers MUST ignore.
+                */
+               goto discard;
+       }
+
+       DCCP_INC_STATS_BH(DCCP_MIB_INERRS);
+discard:
+       __kfree_skb(skb);
+       return 0;
+}
+
+static int dccp_rcv_request_sent_state_process(struct sock *sk,
+                                              struct sk_buff *skb,
+                                              const struct dccp_hdr *dh,
+                                              const unsigned len)
+{
+       /* 
+        *  Step 4: Prepare sequence numbers in REQUEST
+        *     If S.state == REQUEST,
+        *        If (P.type == Response or P.type == Reset)
+        *              and S.AWL <= P.ackno <= S.AWH,
+        *           / * Set sequence number variables corresponding to the
+        *              other endpoint, so P will pass the tests in Step 6 * /
+        *           Set S.GSR, S.ISR, S.SWL, S.SWH
+        *           / * Response processing continues in Step 10; Reset
+        *              processing continues in Step 9 * /
+       */
+       if (dh->dccph_type == DCCP_PKT_RESPONSE) {
+               const struct inet_connection_sock *icsk = inet_csk(sk);
+               struct dccp_sock *dp = dccp_sk(sk);
+
+               /* Stop the REQUEST timer */
+               inet_csk_clear_xmit_timer(sk, ICSK_TIME_RETRANS);
+               BUG_TRAP(sk->sk_send_head != NULL);
+               __kfree_skb(sk->sk_send_head);
+               sk->sk_send_head = NULL;
+
+               if (!between48(DCCP_SKB_CB(skb)->dccpd_ack_seq,
+                              dp->dccps_awl, dp->dccps_awh)) {
+                       dccp_pr_debug("invalid ackno: S.AWL=%llu, "
+                                     "P.ackno=%llu, S.AWH=%llu \n",
+                                     (unsigned long long)dp->dccps_awl,
+                          (unsigned long long)DCCP_SKB_CB(skb)->dccpd_ack_seq,
+                                     (unsigned long long)dp->dccps_awh);
+                       goto out_invalid_packet;
+               }
+
+               dp->dccps_isr = DCCP_SKB_CB(skb)->dccpd_seq;
+               dccp_update_gsr(sk, dp->dccps_isr);
+               /*
+                * SWL and AWL are initially adjusted so that they are not less than
+                * the initial Sequence Numbers received and sent, respectively:
+                *      SWL := max(GSR + 1 - floor(W/4), ISR),
+                *      AWL := max(GSS - W' + 1, ISS).
+                * These adjustments MUST be applied only at the beginning of the
+                * connection.
+                *
+                * AWL was adjusted in dccp_v4_connect -acme
+                */
+               dccp_set_seqno(&dp->dccps_swl,
+                              max48(dp->dccps_swl, dp->dccps_isr));
+
+               if (ccid_hc_rx_init(dp->dccps_hc_rx_ccid, sk) != 0 ||
+                   ccid_hc_tx_init(dp->dccps_hc_tx_ccid, sk) != 0) {
+                       ccid_hc_rx_exit(dp->dccps_hc_rx_ccid, sk);
+                       ccid_hc_tx_exit(dp->dccps_hc_tx_ccid, sk);
+                       /* FIXME: send appropriate RESET code */
+                       goto out_invalid_packet;
+               }
+
+               dccp_sync_mss(sk, dp->dccps_pmtu_cookie);
+
+               /*
+                *    Step 10: Process REQUEST state (second part)
+                *       If S.state == REQUEST,
+                *        / * If we get here, P is a valid Response from the
+                *            server (see Step 4), and we should move to
+                *            PARTOPEN state. PARTOPEN means send an Ack,
+                *            don't send Data packets, retransmit Acks
+                *            periodically, and always include any Init Cookie
+                *            from the Response * /
+                *        S.state := PARTOPEN
+                *        Set PARTOPEN timer
+                *        Continue with S.state == PARTOPEN
+                *        / * Step 12 will send the Ack completing the
+                *            three-way handshake * /
+                */
+               dccp_set_state(sk, DCCP_PARTOPEN);
+
+               /* Make sure socket is routed, for correct metrics. */
+               inet_sk_rebuild_header(sk);
+
+               if (!sock_flag(sk, SOCK_DEAD)) {
+                       sk->sk_state_change(sk);
+                       sk_wake_async(sk, 0, POLL_OUT);
+               }
+
+               if (sk->sk_write_pending || icsk->icsk_ack.pingpong ||
+                   icsk->icsk_accept_queue.rskq_defer_accept) {
+                       /* Save one ACK. Data will be ready after
+                        * several ticks, if write_pending is set.
+                        *
+                        * It may be deleted, but with this feature tcpdumps
+                        * look so _wonderfully_ clever, that I was not able
+                        * to stand against the temptation 8)     --ANK
+                        */
+                       /*
+                        * OK, in DCCP we can as well do a similar trick, its
+                        * even in the draft, but there is no need for us to
+                        * schedule an ack here, as dccp_sendmsg does this for
+                        * us, also stated in the draft. -acme
+                        */
+                       __kfree_skb(skb);
+                       return 0;
+               } 
+               dccp_send_ack(sk);
+               return -1;
+       }
+
+out_invalid_packet:
+       return 1; /* dccp_v4_do_rcv will send a reset, but...
+                    FIXME: the reset code should be
+                           DCCP_RESET_CODE_PACKET_ERROR */
+}
+
+static int dccp_rcv_respond_partopen_state_process(struct sock *sk,
+                                                  struct sk_buff *skb,
+                                                  const struct dccp_hdr *dh,
+                                                  const unsigned len)
+{
+       int queued = 0;
+
+       switch (dh->dccph_type) {
+       case DCCP_PKT_RESET:
+               inet_csk_clear_xmit_timer(sk, ICSK_TIME_DACK);
+               break;
+       case DCCP_PKT_DATAACK:
+       case DCCP_PKT_ACK:
+               /*
+                * FIXME: we should be reseting the PARTOPEN (DELACK) timer
+                * here but only if we haven't used the DELACK timer for
+                * something else, like sending a delayed ack for a TIMESTAMP
+                * echo, etc, for now were not clearing it, sending an extra
+                * ACK when there is nothing else to do in DELACK is not a big
+                * deal after all.
+                */
+
+               /* Stop the PARTOPEN timer */
+               if (sk->sk_state == DCCP_PARTOPEN)
+                       inet_csk_clear_xmit_timer(sk, ICSK_TIME_DACK);
+
+               dccp_sk(sk)->dccps_osr = DCCP_SKB_CB(skb)->dccpd_seq;
+               dccp_set_state(sk, DCCP_OPEN);
+
+               if (dh->dccph_type == DCCP_PKT_DATAACK) {
+                       dccp_rcv_established(sk, skb, dh, len);
+                       queued = 1; /* packet was queued
+                                      (by dccp_rcv_established) */
+               }
+               break;
+       }
+
+       return queued;
+}
+
+int dccp_rcv_state_process(struct sock *sk, struct sk_buff *skb,
+                          struct dccp_hdr *dh, unsigned len)
+{
+       struct dccp_sock *dp = dccp_sk(sk);
+       const int old_state = sk->sk_state;
+       int queued = 0;
+
+       /*
+        *  Step 3: Process LISTEN state
+        *      (Continuing from dccp_v4_do_rcv and dccp_v6_do_rcv)
+        *
+        *     If S.state == LISTEN,
+        *        If P.type == Request or P contains a valid Init Cookie
+        *              option,
+        *           * Must scan the packet's options to check for an Init
+        *              Cookie.  Only the Init Cookie is processed here,
+        *              however; other options are processed in Step 8.  This
+        *              scan need only be performed if the endpoint uses Init
+        *              Cookies *
+        *           * Generate a new socket and switch to that socket *
+        *           Set S := new socket for this port pair
+        *           S.state = RESPOND
+        *           Choose S.ISS (initial seqno) or set from Init Cookie
+        *           Set S.ISR, S.GSR, S.SWL, S.SWH from packet or Init Cookie
+        *           Continue with S.state == RESPOND
+        *           * A Response packet will be generated in Step 11 *
+        *        Otherwise,
+        *           Generate Reset(No Connection) unless P.type == Reset
+        *           Drop packet and return
+        *
+        * NOTE: the check for the packet types is done in
+        *       dccp_rcv_state_process
+        */
+       if (sk->sk_state == DCCP_LISTEN) {
+               if (dh->dccph_type == DCCP_PKT_REQUEST) {
+                       if (dccp_v4_conn_request(sk, skb) < 0)
+                               return 1;
+
+                       /* FIXME: do congestion control initialization */
+                       goto discard;
+               }
+               if (dh->dccph_type == DCCP_PKT_RESET)
+                       goto discard;
+
+               /* Caller (dccp_v4_do_rcv) will send Reset(No Connection)*/
+               return 1;
+       }
+
+       if (sk->sk_state != DCCP_REQUESTING) {
+               if (dccp_check_seqno(sk, skb))
+                       goto discard;
+
+               /*
+                * Step 8: Process options and mark acknowledgeable
+                */
+               if (dccp_parse_options(sk, skb))
+                       goto discard;
+
+               if (DCCP_SKB_CB(skb)->dccpd_ack_seq !=
+                   DCCP_PKT_WITHOUT_ACK_SEQ)
+                       dccp_event_ack_recv(sk, skb);
+
+               ccid_hc_rx_packet_recv(dp->dccps_hc_rx_ccid, sk, skb);
+               ccid_hc_tx_packet_recv(dp->dccps_hc_tx_ccid, sk, skb);
+
+               /*
+                * FIXME: check ECN to see if we should use
+                * DCCP_ACKPKTS_STATE_ECN_MARKED
+                */
+               if (dp->dccps_options.dccpo_send_ack_vector) {
+                       if (dccp_ackpkts_add(dp->dccps_hc_rx_ackpkts,
+                                            DCCP_SKB_CB(skb)->dccpd_seq,
+                                            DCCP_ACKPKTS_STATE_RECEIVED))
+                               goto discard;
+                       /*
+                        * FIXME: this activation is probably wrong, have to
+                        * study more TCP delack machinery and how it fits into
+                        * DCCP draft, but for now it kinda "works" 8)
+                        */
+                       if ((dp->dccps_hc_rx_ackpkts->dccpap_ack_seqno ==
+                            DCCP_MAX_SEQNO + 1) &&
+                           !inet_csk_ack_scheduled(sk)) {
+                               inet_csk_schedule_ack(sk);
+                               inet_csk_reset_xmit_timer(sk, ICSK_TIME_DACK,
+                                                         TCP_DELACK_MIN,
+                                                         DCCP_RTO_MAX);
+                       }
+               }
+       }
+
+       /*
+        *  Step 9: Process Reset
+        *      If P.type == Reset,
+        *              Tear down connection
+        *              S.state := TIMEWAIT
+        *              Set TIMEWAIT timer
+        *              Drop packet and return
+       */
+       if (dh->dccph_type == DCCP_PKT_RESET) {
+               /*
+                * Queue the equivalent of TCP fin so that dccp_recvmsg
+                * exits the loop
+                */
+               dccp_fin(sk, skb);
+               dccp_time_wait(sk, DCCP_TIME_WAIT, 0);
+               return 0;
+               /*
+                *   Step 7: Check for unexpected packet types
+                *      If (S.is_server and P.type == CloseReq)
+                *          or (S.is_server and P.type == Response)
+                *          or (S.is_client and P.type == Request)
+                *          or (S.state == RESPOND and P.type == Data),
+                *        Send Sync packet acknowledging P.seqno
+                *        Drop packet and return
+                */
+       } else if ((dp->dccps_role != DCCP_ROLE_CLIENT &&
+                   (dh->dccph_type == DCCP_PKT_RESPONSE ||
+                    dh->dccph_type == DCCP_PKT_CLOSEREQ)) ||
+                   (dp->dccps_role == DCCP_ROLE_CLIENT &&
+                    dh->dccph_type == DCCP_PKT_REQUEST) ||
+                   (sk->sk_state == DCCP_RESPOND &&
+                    dh->dccph_type == DCCP_PKT_DATA)) {
+               dccp_send_sync(sk, DCCP_SKB_CB(skb)->dccpd_seq,
+                              DCCP_PKT_SYNC);
+               goto discard;
+       } else if (dh->dccph_type == DCCP_PKT_CLOSEREQ) {
+               dccp_rcv_closereq(sk, skb);
+               goto discard;
+       } else if (dh->dccph_type == DCCP_PKT_CLOSE) {
+               dccp_rcv_close(sk, skb);
+               return 0;
+       }
+
+       switch (sk->sk_state) {
+       case DCCP_CLOSED:
+               return 1;
+
+       case DCCP_REQUESTING:
+               /* FIXME: do congestion control initialization */
+
+               queued = dccp_rcv_request_sent_state_process(sk, skb, dh, len);
+               if (queued >= 0)
+                       return queued;
+
+               __kfree_skb(skb);
+               return 0;
+
+       case DCCP_RESPOND:
+       case DCCP_PARTOPEN:
+               queued = dccp_rcv_respond_partopen_state_process(sk, skb,
+                                                                dh, len);
+               break;
+       }
+
+       if (dh->dccph_type == DCCP_PKT_ACK ||
+           dh->dccph_type == DCCP_PKT_DATAACK) {
+               switch (old_state) {
+               case DCCP_PARTOPEN:
+                       sk->sk_state_change(sk);
+                       sk_wake_async(sk, 0, POLL_OUT);
+                       break;
+               }
+       }
+
+       if (!queued) { 
+discard:
+               __kfree_skb(skb);
+       }
+       return 0;
+}
diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c

new file mode 100644 (file)

index 0000000..3fc75db
--- /dev/null
+++ b/net/dccp/ipv4.c
@@ -0,0 +1,1356 @@
+/*
+ *  net/dccp/ipv4.c
+ *
+ *  An implementation of the DCCP protocol
+ *  Arnaldo Carvalho de Melo <acme@conectiva.com.br>
+ *
+ *     This program is free software; you can redistribute it and/or
+ *     modify it under the terms of the GNU General Public License
+ *     as published by the Free Software Foundation; either version
+ *     2 of the License, or (at your option) any later version.
+ */
+
+#include <linux/config.h>
+#include <linux/dccp.h>
+#include <linux/icmp.h>
+#include <linux/module.h>
+#include <linux/skbuff.h>
+#include <linux/random.h>
+
+#include <net/icmp.h>
+#include <net/inet_hashtables.h>
+#include <net/sock.h>
+#include <net/tcp_states.h>
+#include <net/xfrm.h>
+
+#include "ccid.h"
+#include "dccp.h"
+
+struct inet_hashinfo __cacheline_aligned dccp_hashinfo = {
+       .lhash_lock     = RW_LOCK_UNLOCKED,
+       .lhash_users    = ATOMIC_INIT(0),
+       .lhash_wait = __WAIT_QUEUE_HEAD_INITIALIZER(dccp_hashinfo.lhash_wait),
+       .portalloc_lock = SPIN_LOCK_UNLOCKED,
+       .port_rover     = 1024 - 1,
+};
+
+EXPORT_SYMBOL_GPL(dccp_hashinfo);
+
+static int dccp_v4_get_port(struct sock *sk, const unsigned short snum)
+{
+       return inet_csk_get_port(&dccp_hashinfo, sk, snum);
+}
+
+static void dccp_v4_hash(struct sock *sk)
+{
+       inet_hash(&dccp_hashinfo, sk);
+}
+
+static void dccp_v4_unhash(struct sock *sk)
+{
+       inet_unhash(&dccp_hashinfo, sk);
+}
+
+/* called with local bh disabled */
+static int __dccp_v4_check_established(struct sock *sk, const __u16 lport,
+                                     struct inet_timewait_sock **twp)
+{
+       struct inet_sock *inet = inet_sk(sk);
+       const u32 daddr = inet->rcv_saddr;
+       const u32 saddr = inet->daddr;
+       const int dif = sk->sk_bound_dev_if;
+       INET_ADDR_COOKIE(acookie, saddr, daddr)
+       const __u32 ports = INET_COMBINED_PORTS(inet->dport, lport);
+       const int hash = inet_ehashfn(daddr, lport, saddr, inet->dport,
+                                     dccp_hashinfo.ehash_size);
+       struct inet_ehash_bucket *head = &dccp_hashinfo.ehash[hash];
+       const struct sock *sk2;
+       const struct hlist_node *node;
+       struct inet_timewait_sock *tw;
+
+       write_lock(&head->lock);
+
+       /* Check TIME-WAIT sockets first. */
+       sk_for_each(sk2, node, &(head + dccp_hashinfo.ehash_size)->chain) {
+               tw = inet_twsk(sk2);
+
+               if (INET_TW_MATCH(sk2, acookie, saddr, daddr, ports, dif))
+                       goto not_unique;
+       }
+       tw = NULL;
+
+       /* And established part... */
+       sk_for_each(sk2, node, &head->chain) {
+               if (INET_MATCH(sk2, acookie, saddr, daddr, ports, dif))
+                       goto not_unique;
+       }
+
+       /* Must record num and sport now. Otherwise we will see
+        * in hash table socket with a funny identity. */
+       inet->num = lport;
+       inet->sport = htons(lport);
+       sk->sk_hashent = hash;
+       BUG_TRAP(sk_unhashed(sk));
+       __sk_add_node(sk, &head->chain);
+       sock_prot_inc_use(sk->sk_prot);
+       write_unlock(&head->lock);
+
+       if (twp != NULL) {
+               *twp = tw;
+               NET_INC_STATS_BH(LINUX_MIB_TIMEWAITRECYCLED);
+       } else if (tw != NULL) {
+               /* Silly. Should hash-dance instead... */
+               inet_twsk_deschedule(tw, &dccp_death_row);
+               NET_INC_STATS_BH(LINUX_MIB_TIMEWAITRECYCLED);
+
+               inet_twsk_put(tw);
+       }
+
+       return 0;
+
+not_unique:
+       write_unlock(&head->lock);
+       return -EADDRNOTAVAIL;
+}
+
+/*
+ * Bind a port for a connect operation and hash it.
+ */
+static int dccp_v4_hash_connect(struct sock *sk)
+{
+       const unsigned short snum = inet_sk(sk)->num;
+       struct inet_bind_hashbucket *head;
+       struct inet_bind_bucket *tb;
+       int ret;
+
+       if (snum == 0) {
+               int rover;
+               int low = sysctl_local_port_range[0];
+               int high = sysctl_local_port_range[1];
+               int remaining = (high - low) + 1;
+               struct hlist_node *node;
+               struct inet_timewait_sock *tw = NULL;
+
+               local_bh_disable();
+
+               /* TODO. Actually it is not so bad idea to remove
+                * dccp_hashinfo.portalloc_lock before next submission to
+                * Linus.
+                * As soon as we touch this place at all it is time to think.
+                *
+                * Now it protects single _advisory_ variable
+                * dccp_hashinfo.port_rover, hence it is mostly useless.
+                * Code will work nicely if we just delete it, but
+                * I am afraid in contented case it will work not better or
+                * even worse: another cpu just will hit the same bucket
+                * and spin there.
+                * So some cpu salt could remove both contention and
+                * memory pingpong. Any ideas how to do this in a nice way?
+                */
+               spin_lock(&dccp_hashinfo.portalloc_lock);
+               rover = dccp_hashinfo.port_rover;
+
+               do {
+                       rover++;
+                       if ((rover < low) || (rover > high))
+                               rover = low;
+                       head = &dccp_hashinfo.bhash[inet_bhashfn(rover,
+                                                   dccp_hashinfo.bhash_size)];
+                       spin_lock(&head->lock);
+
+                       /* Does not bother with rcv_saddr checks,
+                        * because the established check is already
+                        * unique enough.
+                        */
+                       inet_bind_bucket_for_each(tb, node, &head->chain) {
+                               if (tb->port == rover) {
+                                       BUG_TRAP(!hlist_empty(&tb->owners));
+                                       if (tb->fastreuse >= 0)
+                                               goto next_port;
+                                       if (!__dccp_v4_check_established(sk,
+                                                                        rover,
+                                                                        &tw))
+                                               goto ok;
+                                       goto next_port;
+                               }
+                       }
+
+                       tb = inet_bind_bucket_create(dccp_hashinfo.bind_bucket_cachep,
+                                                    head, rover);
+                       if (tb == NULL) {
+                               spin_unlock(&head->lock);
+                               break;
+                       }
+                       tb->fastreuse = -1;
+                       goto ok;
+
+               next_port:
+                       spin_unlock(&head->lock);
+               } while (--remaining > 0);
+               dccp_hashinfo.port_rover = rover;
+               spin_unlock(&dccp_hashinfo.portalloc_lock);
+
+               local_bh_enable();
+
+               return -EADDRNOTAVAIL;
+
+ok:
+               /* All locks still held and bhs disabled */
+               dccp_hashinfo.port_rover = rover;
+               spin_unlock(&dccp_hashinfo.portalloc_lock);
+
+               inet_bind_hash(sk, tb, rover);
+               if (sk_unhashed(sk)) {
+                       inet_sk(sk)->sport = htons(rover);
+                       __inet_hash(&dccp_hashinfo, sk, 0);
+               }
+               spin_unlock(&head->lock);
+
+               if (tw != NULL) {
+                       inet_twsk_deschedule(tw, &dccp_death_row);
+                       inet_twsk_put(tw);
+               }
+
+               ret = 0;
+               goto out;
+       }
+
+       head = &dccp_hashinfo.bhash[inet_bhashfn(snum,
+                                                dccp_hashinfo.bhash_size)];
+       tb   = inet_csk(sk)->icsk_bind_hash;
+       spin_lock_bh(&head->lock);
+       if (sk_head(&tb->owners) == sk && sk->sk_bind_node.next == NULL) {
+               __inet_hash(&dccp_hashinfo, sk, 0);
+               spin_unlock_bh(&head->lock);
+               return 0;
+       } else {
+               spin_unlock(&head->lock);
+               /* No definite answer... Walk to established hash table */
+               ret = __dccp_v4_check_established(sk, snum, NULL);
+out:
+               local_bh_enable();
+               return ret;
+       }
+}
+
+static int dccp_v4_connect(struct sock *sk, struct sockaddr *uaddr,
+                          int addr_len)
+{
+       struct inet_sock *inet = inet_sk(sk);
+       struct dccp_sock *dp = dccp_sk(sk);
+       const struct sockaddr_in *usin = (struct sockaddr_in *)uaddr;
+       struct rtable *rt;
+       u32 daddr, nexthop;
+       int tmp;
+       int err;
+
+       dp->dccps_role = DCCP_ROLE_CLIENT;
+
+       if (addr_len < sizeof(struct sockaddr_in))
+               return -EINVAL;
+
+       if (usin->sin_family != AF_INET)
+               return -EAFNOSUPPORT;
+
+       nexthop = daddr = usin->sin_addr.s_addr;
+       if (inet->opt != NULL && inet->opt->srr) {
+               if (daddr == 0)
+                       return -EINVAL;
+               nexthop = inet->opt->faddr;
+       }
+
+       tmp = ip_route_connect(&rt, nexthop, inet->saddr,
+                              RT_CONN_FLAGS(sk), sk->sk_bound_dev_if,
+                              IPPROTO_DCCP,
+                              inet->sport, usin->sin_port, sk);
+       if (tmp < 0)
+               return tmp;
+
+       if (rt->rt_flags & (RTCF_MULTICAST | RTCF_BROADCAST)) {
+               ip_rt_put(rt);
+               return -ENETUNREACH;
+       }
+
+       if (inet->opt == NULL || !inet->opt->srr)
+               daddr = rt->rt_dst;
+
+       if (inet->saddr == 0)
+               inet->saddr = rt->rt_src;
+       inet->rcv_saddr = inet->saddr;
+
+       inet->dport = usin->sin_port;
+       inet->daddr = daddr;
+
+       dp->dccps_ext_header_len = 0;
+       if (inet->opt != NULL)
+               dp->dccps_ext_header_len = inet->opt->optlen;
+       /*
+        * Socket identity is still unknown (sport may be zero).
+        * However we set state to DCCP_REQUESTING and not releasing socket
+        * lock select source port, enter ourselves into the hash tables and
+        * complete initialization after this.
+        */
+       dccp_set_state(sk, DCCP_REQUESTING);
+       err = dccp_v4_hash_connect(sk);
+       if (err != 0)
+               goto failure;
+
+       err = ip_route_newports(&rt, inet->sport, inet->dport, sk);
+       if (err != 0)
+               goto failure;
+
+       /* OK, now commit destination to socket.  */
+       sk_setup_caps(sk, &rt->u.dst);
+
+       dp->dccps_gar =
+               dp->dccps_iss = secure_dccp_sequence_number(inet->saddr,
+                                                           inet->daddr,
+                                                           inet->sport,
+                                                           usin->sin_port);
+       dccp_update_gss(sk, dp->dccps_iss);
+
+       /*
+        * SWL and AWL are initially adjusted so that they are not less than
+        * the initial Sequence Numbers received and sent, respectively:
+        *      SWL := max(GSR + 1 - floor(W/4), ISR),
+        *      AWL := max(GSS - W' + 1, ISS).
+        * These adjustments MUST be applied only at the beginning of the
+        * connection.
+        */
+       dccp_set_seqno(&dp->dccps_awl, max48(dp->dccps_awl, dp->dccps_iss));
+
+       inet->id = dp->dccps_iss ^ jiffies;
+
+       err = dccp_connect(sk);
+       rt = NULL;
+       if (err != 0)
+               goto failure;
+out:
+       return err;
+failure:
+       /*
+        * This unhashes the socket and releases the local port, if necessary.
+        */
+       dccp_set_state(sk, DCCP_CLOSED);
+       ip_rt_put(rt);
+       sk->sk_route_caps = 0;
+       inet->dport = 0;
+       goto out;
+}
+
+/*
+ * This routine does path mtu discovery as defined in RFC1191.
+ */
+static inline void dccp_do_pmtu_discovery(struct sock *sk,
+                                         const struct iphdr *iph,
+                                         u32 mtu)
+{
+       struct dst_entry *dst;
+       const struct inet_sock *inet = inet_sk(sk);
+       const struct dccp_sock *dp = dccp_sk(sk);
+
+       /* We are not interested in DCCP_LISTEN and request_socks (RESPONSEs
+        * send out by Linux are always < 576bytes so they should go through
+        * unfragmented).
+        */
+       if (sk->sk_state == DCCP_LISTEN)
+               return;
+
+       /* We don't check in the destentry if pmtu discovery is forbidden
+        * on this route. We just assume that no packet_to_big packets
+        * are send back when pmtu discovery is not active.
+        * There is a small race when the user changes this flag in the
+        * route, but I think that's acceptable.
+        */
+       if ((dst = __sk_dst_check(sk, 0)) == NULL)
+               return;
+
+       dst->ops->update_pmtu(dst, mtu);
+
+       /* Something is about to be wrong... Remember soft error
+        * for the case, if this connection will not able to recover.
+        */
+       if (mtu < dst_mtu(dst) && ip_dont_fragment(sk, dst))
+               sk->sk_err_soft = EMSGSIZE;
+
+       mtu = dst_mtu(dst);
+
+       if (inet->pmtudisc != IP_PMTUDISC_DONT &&
+           dp->dccps_pmtu_cookie > mtu) {
+               dccp_sync_mss(sk, mtu);
+
+               /*
+                * From: draft-ietf-dccp-spec-11.txt
+                *
+                *      DCCP-Sync packets are the best choice for upward
+                *      probing, since DCCP-Sync probes do not risk application
+                *      data loss.
+                */
+               dccp_send_sync(sk, dp->dccps_gsr, DCCP_PKT_SYNC);
+       } /* else let the usual retransmit timer handle it */
+}
+
+static void dccp_v4_ctl_send_ack(struct sk_buff *rxskb)
+{
+       int err;
+       struct dccp_hdr *rxdh = dccp_hdr(rxskb), *dh;
+       const int dccp_hdr_ack_len = sizeof(struct dccp_hdr) +
+                                    sizeof(struct dccp_hdr_ext) +
+                                    sizeof(struct dccp_hdr_ack_bits);
+       struct sk_buff *skb;
+
+       if (((struct rtable *)rxskb->dst)->rt_type != RTN_LOCAL)
+               return;
+
+       skb = alloc_skb(MAX_DCCP_HEADER + 15, GFP_ATOMIC);
+       if (skb == NULL)
+               return;
+
+       /* Reserve space for headers. */
+       skb_reserve(skb, MAX_DCCP_HEADER);
+
+       skb->dst = dst_clone(rxskb->dst);
+
+       skb->h.raw = skb_push(skb, dccp_hdr_ack_len);
+       dh = dccp_hdr(skb);
+       memset(dh, 0, dccp_hdr_ack_len);
+
+       /* Build DCCP header and checksum it. */
+       dh->dccph_type     = DCCP_PKT_ACK;
+       dh->dccph_sport    = rxdh->dccph_dport;
+       dh->dccph_dport    = rxdh->dccph_sport;
+       dh->dccph_doff     = dccp_hdr_ack_len / 4;
+       dh->dccph_x        = 1;
+
+       dccp_hdr_set_seq(dh, DCCP_SKB_CB(rxskb)->dccpd_ack_seq);
+       dccp_hdr_set_ack(dccp_hdr_ack_bits(skb),
+                        DCCP_SKB_CB(rxskb)->dccpd_seq);
+
+       bh_lock_sock(dccp_ctl_socket->sk);
+       err = ip_build_and_send_pkt(skb, dccp_ctl_socket->sk,
+                                   rxskb->nh.iph->daddr,
+                                   rxskb->nh.iph->saddr, NULL);
+       bh_unlock_sock(dccp_ctl_socket->sk);
+
+       if (err == NET_XMIT_CN || err == 0) {
+               DCCP_INC_STATS_BH(DCCP_MIB_OUTSEGS);
+               DCCP_INC_STATS_BH(DCCP_MIB_OUTRSTS);
+       }
+}
+
+static void dccp_v4_reqsk_send_ack(struct sk_buff *skb,
+                                  struct request_sock *req)
+{
+       dccp_v4_ctl_send_ack(skb);
+}
+
+static int dccp_v4_send_response(struct sock *sk, struct request_sock *req,
+                                struct dst_entry *dst)
+{
+       int err = -1;
+       struct sk_buff *skb;
+
+       /* First, grab a route. */
+       
+       if (dst == NULL && (dst = inet_csk_route_req(sk, req)) == NULL)
+               goto out;
+
+       skb = dccp_make_response(sk, dst, req);
+       if (skb != NULL) {
+               const struct inet_request_sock *ireq = inet_rsk(req);
+
+               err = ip_build_and_send_pkt(skb, sk, ireq->loc_addr,
+                                           ireq->rmt_addr,
+                                           ireq->opt);
+               if (err == NET_XMIT_CN)
+                       err = 0;
+       }
+
+out:
+       dst_release(dst);
+       return err;
+}
+
+/*
+ * This routine is called by the ICMP module when it gets some sort of error
+ * condition. If err < 0 then the socket should be closed and the error
+ * returned to the user. If err > 0 it's just the icmp type << 8 | icmp code.
+ * After adjustment header points to the first 8 bytes of the tcp header. We
+ * need to find the appropriate port.
+ *
+ * The locking strategy used here is very "optimistic". When someone else
+ * accesses the socket the ICMP is just dropped and for some paths there is no
+ * check at all. A more general error queue to queue errors for later handling
+ * is probably better.
+ */
+void dccp_v4_err(struct sk_buff *skb, u32 info)
+{
+       const struct iphdr *iph = (struct iphdr *)skb->data;
+       const struct dccp_hdr *dh = (struct dccp_hdr *)(skb->data +
+                                                       (iph->ihl << 2));
+       struct dccp_sock *dp;
+       struct inet_sock *inet;
+       const int type = skb->h.icmph->type;
+       const int code = skb->h.icmph->code;
+       struct sock *sk;
+       __u64 seq;
+       int err;
+
+       if (skb->len < (iph->ihl << 2) + 8) {
+               ICMP_INC_STATS_BH(ICMP_MIB_INERRORS);
+               return;
+       }
+
+       sk = inet_lookup(&dccp_hashinfo, iph->daddr, dh->dccph_dport,
+                        iph->saddr, dh->dccph_sport, inet_iif(skb));
+       if (sk == NULL) {
+               ICMP_INC_STATS_BH(ICMP_MIB_INERRORS);
+               return;
+       }
+
+       if (sk->sk_state == DCCP_TIME_WAIT) {
+               inet_twsk_put((struct inet_timewait_sock *)sk);
+               return;
+       }
+
+       bh_lock_sock(sk);
+       /* If too many ICMPs get dropped on busy
+        * servers this needs to be solved differently.
+        */
+       if (sock_owned_by_user(sk))
+               NET_INC_STATS_BH(LINUX_MIB_LOCKDROPPEDICMPS);
+
+       if (sk->sk_state == DCCP_CLOSED)
+               goto out;
+
+       dp = dccp_sk(sk);
+       seq = dccp_hdr_seq(skb);
+       if (sk->sk_state != DCCP_LISTEN &&
+           !between48(seq, dp->dccps_swl, dp->dccps_swh)) {
+               NET_INC_STATS(LINUX_MIB_OUTOFWINDOWICMPS);
+               goto out;
+       }
+
+       switch (type) {
+       case ICMP_SOURCE_QUENCH:
+               /* Just silently ignore these. */
+               goto out;
+       case ICMP_PARAMETERPROB:
+               err = EPROTO;
+               break;
+       case ICMP_DEST_UNREACH:
+               if (code > NR_ICMP_UNREACH)
+                       goto out;
+
+               if (code == ICMP_FRAG_NEEDED) { /* PMTU discovery (RFC1191) */
+                       if (!sock_owned_by_user(sk))
+                               dccp_do_pmtu_discovery(sk, iph, info);
+                       goto out;
+               }
+
+               err = icmp_err_convert[code].errno;
+               break;
+       case ICMP_TIME_EXCEEDED:
+               err = EHOSTUNREACH;
+               break;
+       default:
+               goto out;
+       }
+
+       switch (sk->sk_state) {
+               struct request_sock *req , **prev;
+       case DCCP_LISTEN:
+               if (sock_owned_by_user(sk))
+                       goto out;
+               req = inet_csk_search_req(sk, &prev, dh->dccph_dport,
+                                         iph->daddr, iph->saddr);
+               if (!req)
+                       goto out;
+
+               /*
+                * ICMPs are not backlogged, hence we cannot get an established
+                * socket here.
+                */
+               BUG_TRAP(!req->sk);
+
+               if (seq != dccp_rsk(req)->dreq_iss) {
+                       NET_INC_STATS_BH(LINUX_MIB_OUTOFWINDOWICMPS);
+                       goto out;
+               }
+               /*
+                * Still in RESPOND, just remove it silently.
+                * There is no good way to pass the error to the newly
+                * created socket, and POSIX does not want network
+                * errors returned from accept().
+                */
+               inet_csk_reqsk_queue_drop(sk, req, prev);
+               goto out;
+
+       case DCCP_REQUESTING:
+       case DCCP_RESPOND:
+               if (!sock_owned_by_user(sk)) {
+                       DCCP_INC_STATS_BH(DCCP_MIB_ATTEMPTFAILS);
+                       sk->sk_err = err;
+
+                       sk->sk_error_report(sk);
+
+                       dccp_done(sk);
+               } else
+                       sk->sk_err_soft = err;
+               goto out;
+       }
+
+       /* If we've already connected we will keep trying
+        * until we time out, or the user gives up.
+        *
+        * rfc1122 4.2.3.9 allows to consider as hard errors
+        * only PROTO_UNREACH and PORT_UNREACH (well, FRAG_FAILED too,
+        * but it is obsoleted by pmtu discovery).
+        *
+        * Note, that in modern internet, where routing is unreliable
+        * and in each dark corner broken firewalls sit, sending random
+        * errors ordered by their masters even this two messages finally lose
+        * their original sense (even Linux sends invalid PORT_UNREACHs)
+        *
+        * Now we are in compliance with RFCs.
+        *                                                      --ANK (980905)
+        */
+
+       inet = inet_sk(sk);
+       if (!sock_owned_by_user(sk) && inet->recverr) {
+               sk->sk_err = err;
+               sk->sk_error_report(sk);
+       } else /* Only an error on timeout */
+               sk->sk_err_soft = err;
+out:
+       bh_unlock_sock(sk);
+       sock_put(sk);
+}
+
+int dccp_v4_send_reset(struct sock *sk, enum dccp_reset_codes code)
+{
+       struct sk_buff *skb;
+       /*
+        * FIXME: what if rebuild_header fails?
+        * Should we be doing a rebuild_header here?
+        */
+       int err = inet_sk_rebuild_header(sk);
+
+       if (err != 0)
+               return err;
+
+       skb = dccp_make_reset(sk, sk->sk_dst_cache, code);
+       if (skb != NULL) {
+               const struct dccp_sock *dp = dccp_sk(sk);
+               const struct inet_sock *inet = inet_sk(sk);
+
+               err = ip_build_and_send_pkt(skb, sk,
+                                           inet->saddr, inet->daddr, NULL);
+               if (err == NET_XMIT_CN)
+                       err = 0;
+
+               ccid_hc_rx_exit(dp->dccps_hc_rx_ccid, sk);
+               ccid_hc_tx_exit(dp->dccps_hc_tx_ccid, sk);
+       }
+
+       return err;
+}
+
+static inline u64 dccp_v4_init_sequence(const struct sock *sk,
+                                       const struct sk_buff *skb)
+{
+       return secure_dccp_sequence_number(skb->nh.iph->daddr,
+                                          skb->nh.iph->saddr,
+                                          dccp_hdr(skb)->dccph_dport,
+                                          dccp_hdr(skb)->dccph_sport);
+}
+
+int dccp_v4_conn_request(struct sock *sk, struct sk_buff *skb)
+{
+       struct inet_request_sock *ireq;
+       struct dccp_sock dp;
+       struct request_sock *req;
+       struct dccp_request_sock *dreq;
+       const __u32 saddr = skb->nh.iph->saddr;
+       const __u32 daddr = skb->nh.iph->daddr;
+       struct dst_entry *dst = NULL;
+
+       /* Never answer to DCCP_PKT_REQUESTs send to broadcast or multicast */
+       if (((struct rtable *)skb->dst)->rt_flags &
+           (RTCF_BROADCAST | RTCF_MULTICAST))
+               goto drop;
+
+       /*
+        * TW buckets are converted to open requests without
+        * limitations, they conserve resources and peer is
+        * evidently real one.
+        */
+       if (inet_csk_reqsk_queue_is_full(sk))
+               goto drop;
+
+       /*
+        * Accept backlog is full. If we have already queued enough
+        * of warm entries in syn queue, drop request. It is better than
+        * clogging syn queue with openreqs with exponentially increasing
+        * timeout.
+        */
+       if (sk_acceptq_is_full(sk) && inet_csk_reqsk_queue_young(sk) > 1)
+               goto drop;
+
+       req = reqsk_alloc(sk->sk_prot->rsk_prot);
+       if (req == NULL)
+               goto drop;
+
+       /* FIXME: process options */
+
+       dccp_openreq_init(req, &dp, skb);
+
+       ireq = inet_rsk(req);
+       ireq->loc_addr = daddr;
+       ireq->rmt_addr = saddr;
+       /* FIXME: Merge Aristeu's option parsing code when ready */
+       req->rcv_wnd    = 100; /* Fake, option parsing will get the
+                                 right value */
+       ireq->opt       = NULL;
+
+       /* 
+        * Step 3: Process LISTEN state
+        *
+        * Set S.ISR, S.GSR, S.SWL, S.SWH from packet or Init Cookie
+        *
+        * In fact we defer setting S.GSR, S.SWL, S.SWH to
+        * dccp_create_openreq_child.
+        */
+       dreq = dccp_rsk(req);
+       dreq->dreq_isr = DCCP_SKB_CB(skb)->dccpd_seq;
+       dreq->dreq_iss = dccp_v4_init_sequence(sk, skb);
+       dreq->dreq_service = dccp_hdr_request(skb)->dccph_req_service;
+
+       if (dccp_v4_send_response(sk, req, dst))
+               goto drop_and_free;
+
+       inet_csk_reqsk_queue_hash_add(sk, req, DCCP_TIMEOUT_INIT);
+       return 0;
+
+drop_and_free:
+       /*
+        * FIXME: should be reqsk_free after implementing req->rsk_ops
+        */
+       __reqsk_free(req);
+drop:
+       DCCP_INC_STATS_BH(DCCP_MIB_ATTEMPTFAILS);
+       return -1;
+}
+
+/*
+ * The three way handshake has completed - we got a valid ACK or DATAACK -
+ * now create the new socket.
+ *
+ * This is the equivalent of TCP's tcp_v4_syn_recv_sock
+ */
+struct sock *dccp_v4_request_recv_sock(struct sock *sk, struct sk_buff *skb,
+                                      struct request_sock *req,
+                                      struct dst_entry *dst)
+{
+       struct inet_request_sock *ireq;
+       struct inet_sock *newinet;
+       struct dccp_sock *newdp;
+       struct sock *newsk;
+
+       if (sk_acceptq_is_full(sk))
+               goto exit_overflow;
+
+       if (dst == NULL && (dst = inet_csk_route_req(sk, req)) == NULL)
+               goto exit;
+
+       newsk = dccp_create_openreq_child(sk, req, skb);
+       if (newsk == NULL)
+               goto exit;
+
+       sk_setup_caps(newsk, dst);
+
+       newdp              = dccp_sk(newsk);
+       newinet            = inet_sk(newsk);
+       ireq               = inet_rsk(req);
+       newinet->daddr     = ireq->rmt_addr;
+       newinet->rcv_saddr = ireq->loc_addr;
+       newinet->saddr     = ireq->loc_addr;
+       newinet->opt       = ireq->opt;
+       ireq->opt          = NULL;
+       newinet->mc_index  = inet_iif(skb);
+       newinet->mc_ttl    = skb->nh.iph->ttl;
+       newinet->id        = jiffies;
+
+       dccp_sync_mss(newsk, dst_mtu(dst));
+
+       __inet_hash(&dccp_hashinfo, newsk, 0);
+       __inet_inherit_port(&dccp_hashinfo, sk, newsk);
+
+       return newsk;
+
+exit_overflow:
+       NET_INC_STATS_BH(LINUX_MIB_LISTENOVERFLOWS);
+exit:
+       NET_INC_STATS_BH(LINUX_MIB_LISTENDROPS);
+       dst_release(dst);
+       return NULL;
+}
+
+static struct sock *dccp_v4_hnd_req(struct sock *sk, struct sk_buff *skb)
+{
+       const struct dccp_hdr *dh = dccp_hdr(skb);
+       const struct iphdr *iph = skb->nh.iph;
+       struct sock *nsk;
+       struct request_sock **prev;
+       /* Find possible connection requests. */
+       struct request_sock *req = inet_csk_search_req(sk, &prev,
+                                                      dh->dccph_sport,
+                                                      iph->saddr, iph->daddr);
+       if (req != NULL)
+               return dccp_check_req(sk, skb, req, prev);
+
+       nsk = __inet_lookup_established(&dccp_hashinfo,
+                                       iph->saddr, dh->dccph_sport,
+                                       iph->daddr, ntohs(dh->dccph_dport),
+                                       inet_iif(skb));
+       if (nsk != NULL) {
+               if (nsk->sk_state != DCCP_TIME_WAIT) {
+                       bh_lock_sock(nsk);
+                       return nsk;
+               }
+               inet_twsk_put((struct inet_timewait_sock *)nsk);
+               return NULL;
+       }
+
+       return sk;
+}
+
+int dccp_v4_checksum(const struct sk_buff *skb, const u32 saddr,
+                    const u32 daddr)
+{
+       const struct dccp_hdr* dh = dccp_hdr(skb);
+       int checksum_len;
+       u32 tmp;
+
+       if (dh->dccph_cscov == 0)
+               checksum_len = skb->len;
+       else {
+               checksum_len = (dh->dccph_cscov + dh->dccph_x) * sizeof(u32);
+               checksum_len = checksum_len < skb->len ? checksum_len :
+                                                        skb->len;
+       }
+
+       tmp = csum_partial((unsigned char *)dh, checksum_len, 0);
+       return csum_tcpudp_magic(saddr, daddr, checksum_len,
+                                IPPROTO_DCCP, tmp);
+}
+
+static int dccp_v4_verify_checksum(struct sk_buff *skb,
+                                  const u32 saddr, const u32 daddr)
+{
+       struct dccp_hdr *dh = dccp_hdr(skb);
+       int checksum_len;
+       u32 tmp;
+
+       if (dh->dccph_cscov == 0)
+               checksum_len = skb->len;
+       else {
+               checksum_len = (dh->dccph_cscov + dh->dccph_x) * sizeof(u32);
+               checksum_len = checksum_len < skb->len ? checksum_len :
+                                                        skb->len;
+       }
+       tmp = csum_partial((unsigned char *)dh, checksum_len, 0);
+       return csum_tcpudp_magic(saddr, daddr, checksum_len,
+                                IPPROTO_DCCP, tmp) == 0 ? 0 : -1;
+}
+
+static struct dst_entry* dccp_v4_route_skb(struct sock *sk,
+                                          struct sk_buff *skb)
+{
+       struct rtable *rt;
+       struct flowi fl = { .oif = ((struct rtable *)skb->dst)->rt_iif,
+                           .nl_u = { .ip4_u =
+                                     { .daddr = skb->nh.iph->saddr,
+                                       .saddr = skb->nh.iph->daddr,
+                                       .tos = RT_CONN_FLAGS(sk) } },
+                           .proto = sk->sk_protocol,
+                           .uli_u = { .ports =
+                                      { .sport = dccp_hdr(skb)->dccph_dport,
+                                        .dport = dccp_hdr(skb)->dccph_sport }
+                                    }
+                         };
+
+       if (ip_route_output_flow(&rt, &fl, sk, 0)) {
+               IP_INC_STATS_BH(IPSTATS_MIB_OUTNOROUTES);
+               return NULL;
+       }
+
+       return &rt->u.dst;
+}
+
+static void dccp_v4_ctl_send_reset(struct sk_buff *rxskb)
+{
+       int err;
+       struct dccp_hdr *rxdh = dccp_hdr(rxskb), *dh;
+       const int dccp_hdr_reset_len = sizeof(struct dccp_hdr) +
+                                      sizeof(struct dccp_hdr_ext) +
+                                      sizeof(struct dccp_hdr_reset);
+       struct sk_buff *skb;
+       struct dst_entry *dst;
+       u64 seqno;
+
+       /* Never send a reset in response to a reset. */
+       if (rxdh->dccph_type == DCCP_PKT_RESET)
+               return;
+
+       if (((struct rtable *)rxskb->dst)->rt_type != RTN_LOCAL)
+               return;
+
+       dst = dccp_v4_route_skb(dccp_ctl_socket->sk, rxskb);
+       if (dst == NULL)
+               return;
+
+       skb = alloc_skb(MAX_DCCP_HEADER + 15, GFP_ATOMIC);
+       if (skb == NULL)
+               goto out;
+
+       /* Reserve space for headers. */
+       skb_reserve(skb, MAX_DCCP_HEADER);
+       skb->dst = dst_clone(dst);
+
+       skb->h.raw = skb_push(skb, dccp_hdr_reset_len);
+       dh = dccp_hdr(skb);
+       memset(dh, 0, dccp_hdr_reset_len);
+
+       /* Build DCCP header and checksum it. */
+       dh->dccph_type     = DCCP_PKT_RESET;
+       dh->dccph_sport    = rxdh->dccph_dport;
+       dh->dccph_dport    = rxdh->dccph_sport;
+       dh->dccph_doff     = dccp_hdr_reset_len / 4;
+       dh->dccph_x        = 1;
+       dccp_hdr_reset(skb)->dccph_reset_code =
+                               DCCP_SKB_CB(rxskb)->dccpd_reset_code;
+
+       /* See "8.3.1. Abnormal Termination" in draft-ietf-dccp-spec-11 */
+       seqno = 0;
+       if (DCCP_SKB_CB(rxskb)->dccpd_ack_seq != DCCP_PKT_WITHOUT_ACK_SEQ)
+               dccp_set_seqno(&seqno, DCCP_SKB_CB(rxskb)->dccpd_ack_seq + 1);
+
+       dccp_hdr_set_seq(dh, seqno);
+       dccp_hdr_set_ack(dccp_hdr_ack_bits(skb),
+                        DCCP_SKB_CB(rxskb)->dccpd_seq);
+
+       dh->dccph_checksum = dccp_v4_checksum(skb, rxskb->nh.iph->saddr,
+                                             rxskb->nh.iph->daddr);
+
+       bh_lock_sock(dccp_ctl_socket->sk);
+       err = ip_build_and_send_pkt(skb, dccp_ctl_socket->sk,
+                                   rxskb->nh.iph->daddr,
+                                   rxskb->nh.iph->saddr, NULL);
+       bh_unlock_sock(dccp_ctl_socket->sk);
+
+       if (err == NET_XMIT_CN || err == 0) {
+               DCCP_INC_STATS_BH(DCCP_MIB_OUTSEGS);
+               DCCP_INC_STATS_BH(DCCP_MIB_OUTRSTS);
+       }
+out:
+        dst_release(dst);
+}
+
+int dccp_v4_do_rcv(struct sock *sk, struct sk_buff *skb)
+{
+       struct dccp_hdr *dh = dccp_hdr(skb);
+
+       if (sk->sk_state == DCCP_OPEN) { /* Fast path */
+               if (dccp_rcv_established(sk, skb, dh, skb->len))
+                       goto reset;
+               return 0;
+       }
+
+       /*
+        *  Step 3: Process LISTEN state
+        *     If S.state == LISTEN,
+        *        If P.type == Request or P contains a valid Init Cookie
+        *              option,
+        *           * Must scan the packet's options to check for an Init
+        *              Cookie.  Only the Init Cookie is processed here,
+        *              however; other options are processed in Step 8.  This
+        *              scan need only be performed if the endpoint uses Init
+        *              Cookies *
+        *           * Generate a new socket and switch to that socket *
+        *           Set S := new socket for this port pair
+        *           S.state = RESPOND
+        *           Choose S.ISS (initial seqno) or set from Init Cookie
+        *           Set S.ISR, S.GSR, S.SWL, S.SWH from packet or Init Cookie
+        *           Continue with S.state == RESPOND
+        *           * A Response packet will be generated in Step 11 *
+        *        Otherwise,
+        *           Generate Reset(No Connection) unless P.type == Reset
+        *           Drop packet and return
+        *
+        * NOTE: the check for the packet types is done in
+        *       dccp_rcv_state_process
+        */
+       if (sk->sk_state == DCCP_LISTEN) {
+               struct sock *nsk = dccp_v4_hnd_req(sk, skb);
+
+               if (nsk == NULL)
+                       goto discard;
+
+               if (nsk != sk) {
+                       if (dccp_child_process(sk, nsk, skb))
+                               goto reset;
+                       return 0;
+               }
+       }
+
+       if (dccp_rcv_state_process(sk, skb, dh, skb->len))
+               goto reset;
+       return 0;
+
+reset:
+       DCCP_SKB_CB(skb)->dccpd_reset_code = DCCP_RESET_CODE_NO_CONNECTION;
+       dccp_v4_ctl_send_reset(skb);
+discard:
+       kfree_skb(skb);
+       return 0;
+}
+
+static inline int dccp_invalid_packet(struct sk_buff *skb)
+{
+       const struct dccp_hdr *dh;
+
+       if (skb->pkt_type != PACKET_HOST)
+               return 1;
+
+       if (!pskb_may_pull(skb, sizeof(struct dccp_hdr))) {
+               LIMIT_NETDEBUG(KERN_WARNING "DCCP: pskb_may_pull failed\n");
+               return 1;
+       }
+
+       dh = dccp_hdr(skb);
+
+       /* If the packet type is not understood, drop packet and return */
+       if (dh->dccph_type >= DCCP_PKT_INVALID) {
+               LIMIT_NETDEBUG(KERN_WARNING "DCCP: invalid packet type\n");
+               return 1;
+       }
+
+       /*
+        * If P.Data Offset is too small for packet type, or too large for
+        * packet, drop packet and return
+        */
+       if (dh->dccph_doff < dccp_hdr_len(skb) / sizeof(u32)) {
+               LIMIT_NETDEBUG(KERN_WARNING "DCCP: P.Data Offset(%u) "
+                                           "too small 1\n",
+                              dh->dccph_doff);
+               return 1;
+       }
+
+       if (!pskb_may_pull(skb, dh->dccph_doff * sizeof(u32))) {
+               LIMIT_NETDEBUG(KERN_WARNING "DCCP: P.Data Offset(%u) "
+                                           "too small 2\n",
+                              dh->dccph_doff);
+               return 1;
+       }
+
+       dh = dccp_hdr(skb);
+
+       /*
+        * If P.type is not Data, Ack, or DataAck and P.X == 0 (the packet
+        * has short sequence numbers), drop packet and return
+        */
+       if (dh->dccph_x == 0 &&
+           dh->dccph_type != DCCP_PKT_DATA &&
+           dh->dccph_type != DCCP_PKT_ACK &&
+           dh->dccph_type != DCCP_PKT_DATAACK) {
+               LIMIT_NETDEBUG(KERN_WARNING "DCCP: P.type (%s) not Data, Ack "
+                                           "nor DataAck and P.X == 0\n",
+                              dccp_packet_name(dh->dccph_type));
+               return 1;
+       }
+
+       /* If the header checksum is incorrect, drop packet and return */
+       if (dccp_v4_verify_checksum(skb, skb->nh.iph->saddr,
+                                   skb->nh.iph->daddr) < 0) {
+               LIMIT_NETDEBUG(KERN_WARNING "DCCP: header checksum is "
+                                           "incorrect\n");
+               return 1;
+       }
+
+       return 0;
+}
+
+/* this is called when real data arrives */
+int dccp_v4_rcv(struct sk_buff *skb)
+{
+       const struct dccp_hdr *dh;
+       struct sock *sk;
+       int rc;
+
+       /* Step 1: Check header basics: */
+
+       if (dccp_invalid_packet(skb))
+               goto discard_it;
+
+       dh = dccp_hdr(skb);
+#if 0
+       /*
+        * Use something like this to simulate some DATA/DATAACK loss to test
+        * dccp_ackpkts_add, you'll get something like this on a session that
+        * sends 10 DATA/DATAACK packets:
+        *
+        * ackpkts_print: 281473596467422 |0,0|3,0|0,0|3,0|0,0|3,0|0,0|3,0|0,1|
+        *
+        * 0, 0 means: DCCP_ACKPKTS_STATE_RECEIVED, RLE == just this packet
+        * 0, 1 means: DCCP_ACKPKTS_STATE_RECEIVED, RLE == two adjacent packets
+        *                                                 with the same state
+        * 3, 0 means: DCCP_ACKPKTS_STATE_NOT_RECEIVED, RLE == just this packet
+        *
+        * So...
+        *
+        * 281473596467422 was received
+        * 281473596467421 was not received
+        * 281473596467420 was received
+        * 281473596467419 was not received
+        * 281473596467418 was received
+        * 281473596467417 was not received
+        * 281473596467416 was received
+        * 281473596467415 was not received
+        * 281473596467414 was received
+        * 281473596467413 was received (this one was the 3way handshake
+        *                               RESPONSE)
+        *
+        */
+       if (dh->dccph_type == DCCP_PKT_DATA ||
+           dh->dccph_type == DCCP_PKT_DATAACK) {
+               static int discard = 0;
+
+               if (discard) {
+                       discard = 0;
+                       goto discard_it;
+               }
+               discard = 1;
+       }
+#endif
+       DCCP_SKB_CB(skb)->dccpd_seq  = dccp_hdr_seq(skb);
+       DCCP_SKB_CB(skb)->dccpd_type = dh->dccph_type;
+
+       dccp_pr_debug("%8.8s "
+                     "src=%u.%u.%u.%u@%-5d "
+                     "dst=%u.%u.%u.%u@%-5d seq=%llu",
+                     dccp_packet_name(dh->dccph_type),
+                     NIPQUAD(skb->nh.iph->saddr), ntohs(dh->dccph_sport),
+                     NIPQUAD(skb->nh.iph->daddr), ntohs(dh->dccph_dport),
+                     (unsigned long long) DCCP_SKB_CB(skb)->dccpd_seq);
+
+       if (dccp_packet_without_ack(skb)) {
+               DCCP_SKB_CB(skb)->dccpd_ack_seq = DCCP_PKT_WITHOUT_ACK_SEQ;
+               dccp_pr_debug_cat("\n");
+       } else {
+               DCCP_SKB_CB(skb)->dccpd_ack_seq = dccp_hdr_ack_seq(skb);
+               dccp_pr_debug_cat(", ack=%llu\n",
+                                 (unsigned long long)
+                                 DCCP_SKB_CB(skb)->dccpd_ack_seq);
+       }
+
+       /* Step 2:
+        *      Look up flow ID in table and get corresponding socket */
+       sk = __inet_lookup(&dccp_hashinfo,
+                          skb->nh.iph->saddr, dh->dccph_sport,
+                          skb->nh.iph->daddr, ntohs(dh->dccph_dport),
+                          inet_iif(skb));
+
+       /* 
+        * Step 2:
+        *      If no socket ...
+        *              Generate Reset(No Connection) unless P.type == Reset
+        *              Drop packet and return
+        */
+       if (sk == NULL) {
+               dccp_pr_debug("failed to look up flow ID in table and "
+                             "get corresponding socket\n");
+               goto no_dccp_socket;
+       }
+
+       /* 
+        * Step 2:
+        *      ... or S.state == TIMEWAIT,
+        *              Generate Reset(No Connection) unless P.type == Reset
+        *              Drop packet and return
+        */
+              
+       if (sk->sk_state == DCCP_TIME_WAIT) {
+               dccp_pr_debug("sk->sk_state == DCCP_TIME_WAIT: "
+                             "do_time_wait\n");
+                goto do_time_wait;
+       }
+
+       if (!xfrm4_policy_check(sk, XFRM_POLICY_IN, skb)) {
+               dccp_pr_debug("xfrm4_policy_check failed\n");
+               goto discard_and_relse;
+       }
+
+        if (sk_filter(sk, skb, 0)) {
+               dccp_pr_debug("sk_filter failed\n");
+                goto discard_and_relse;
+       }
+
+       skb->dev = NULL;
+
+       bh_lock_sock(sk);
+       rc = 0;
+       if (!sock_owned_by_user(sk))
+               rc = dccp_v4_do_rcv(sk, skb);
+       else
+               sk_add_backlog(sk, skb);
+       bh_unlock_sock(sk);
+
+       sock_put(sk);
+       return rc;
+
+no_dccp_socket:
+       if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb))
+               goto discard_it;
+       /*
+        * Step 2:
+        *              Generate Reset(No Connection) unless P.type == Reset
+        *              Drop packet and return
+        */
+       if (dh->dccph_type != DCCP_PKT_RESET) {
+               DCCP_SKB_CB(skb)->dccpd_reset_code =
+                                       DCCP_RESET_CODE_NO_CONNECTION;
+               dccp_v4_ctl_send_reset(skb);
+       }
+
+discard_it:
+       /* Discard frame. */
+       kfree_skb(skb);
+       return 0;
+
+discard_and_relse:
+       sock_put(sk);
+       goto discard_it;
+
+do_time_wait:
+       inet_twsk_put((struct inet_timewait_sock *)sk);
+       goto no_dccp_socket;
+}
+
+static int dccp_v4_init_sock(struct sock *sk)
+{
+       struct dccp_sock *dp = dccp_sk(sk);
+       static int dccp_ctl_socket_init = 1;
+
+       dccp_options_init(&dp->dccps_options);
+
+       if (dp->dccps_options.dccpo_send_ack_vector) {
+               dp->dccps_hc_rx_ackpkts =
+                       dccp_ackpkts_alloc(DCCP_MAX_ACK_VECTOR_LEN,
+                                          GFP_KERNEL);
+
+               if (dp->dccps_hc_rx_ackpkts == NULL)
+                       return -ENOMEM;
+       }
+
+       /*
+        * FIXME: We're hardcoding the CCID, and doing this at this point makes
+        * the listening (master) sock get CCID control blocks, which is not
+        * necessary, but for now, to not mess with the test userspace apps,
+        * lets leave it here, later the real solution is to do this in a
+        * setsockopt(CCIDs-I-want/accept). -acme
+        */
+       if (likely(!dccp_ctl_socket_init)) {
+               dp->dccps_hc_rx_ccid = ccid_init(dp->dccps_options.dccpo_ccid,
+                                                sk);
+               dp->dccps_hc_tx_ccid = ccid_init(dp->dccps_options.dccpo_ccid,
+                                                sk);
+               if (dp->dccps_hc_rx_ccid == NULL ||
+                   dp->dccps_hc_tx_ccid == NULL) {
+                       ccid_exit(dp->dccps_hc_rx_ccid, sk);
+                       ccid_exit(dp->dccps_hc_tx_ccid, sk);
+                       dccp_ackpkts_free(dp->dccps_hc_rx_ackpkts);
+                       dp->dccps_hc_rx_ackpkts = NULL;
+                       dp->dccps_hc_rx_ccid = dp->dccps_hc_tx_ccid = NULL;
+                       return -ENOMEM;
+               }
+       } else
+               dccp_ctl_socket_init = 0;
+
+       dccp_init_xmit_timers(sk);
+       inet_csk(sk)->icsk_rto = DCCP_TIMEOUT_INIT;
+       sk->sk_state = DCCP_CLOSED;
+       sk->sk_write_space = dccp_write_space;
+       dp->dccps_mss_cache = 536;
+       dp->dccps_role = DCCP_ROLE_UNDEFINED;
+
+       return 0;
+}
+
+static int dccp_v4_destroy_sock(struct sock *sk)
+{
+       struct dccp_sock *dp = dccp_sk(sk);
+
+       /*
+        * DCCP doesn't use sk_qrite_queue, just sk_send_head
+        * for retransmissions
+        */
+       if (sk->sk_send_head != NULL) {
+               kfree_skb(sk->sk_send_head);
+               sk->sk_send_head = NULL;
+       }
+
+       /* Clean up a referenced DCCP bind bucket. */
+       if (inet_csk(sk)->icsk_bind_hash != NULL)
+               inet_put_port(&dccp_hashinfo, sk);
+
+       ccid_hc_rx_exit(dp->dccps_hc_rx_ccid, sk);
+       ccid_hc_tx_exit(dp->dccps_hc_tx_ccid, sk);
+       dccp_ackpkts_free(dp->dccps_hc_rx_ackpkts);
+       dp->dccps_hc_rx_ackpkts = NULL;
+       ccid_exit(dp->dccps_hc_rx_ccid, sk);
+       ccid_exit(dp->dccps_hc_tx_ccid, sk);
+       dp->dccps_hc_rx_ccid = dp->dccps_hc_tx_ccid = NULL;
+
+       return 0;
+}
+
+static void dccp_v4_reqsk_destructor(struct request_sock *req)
+{
+       kfree(inet_rsk(req)->opt);
+}
+
+static struct request_sock_ops dccp_request_sock_ops = {
+       .family         = PF_INET,
+       .obj_size       = sizeof(struct dccp_request_sock),
+       .rtx_syn_ack    = dccp_v4_send_response,
+       .send_ack       = dccp_v4_reqsk_send_ack,
+       .destructor     = dccp_v4_reqsk_destructor,
+       .send_reset     = dccp_v4_ctl_send_reset,
+};
+
+struct proto dccp_v4_prot = {
+       .name                   = "DCCP",
+       .owner                  = THIS_MODULE,
+       .close                  = dccp_close,
+       .connect                = dccp_v4_connect,
+       .disconnect             = dccp_disconnect,
+       .ioctl                  = dccp_ioctl,
+       .init                   = dccp_v4_init_sock,
+       .setsockopt             = dccp_setsockopt,
+       .getsockopt             = dccp_getsockopt,
+       .sendmsg                = dccp_sendmsg,
+       .recvmsg                = dccp_recvmsg,
+       .backlog_rcv            = dccp_v4_do_rcv,
+       .hash                   = dccp_v4_hash,
+       .unhash                 = dccp_v4_unhash,
+       .accept                 = inet_csk_accept,
+       .get_port               = dccp_v4_get_port,
+       .shutdown               = dccp_shutdown,
+       .destroy                = dccp_v4_destroy_sock,
+       .orphan_count           = &dccp_orphan_count,
+       .max_header             = MAX_DCCP_HEADER,
+       .obj_size               = sizeof(struct dccp_sock),
+       .rsk_prot               = &dccp_request_sock_ops,
+       .twsk_obj_size          = sizeof(struct inet_timewait_sock),
+};
diff --git a/net/dccp/minisocks.c b/net/dccp/minisocks.c

new file mode 100644 (file)

index 0000000..ce5dff4
--- /dev/null
+++ b/net/dccp/minisocks.c
@@ -0,0 +1,264 @@
+/*
+ *  net/dccp/minisocks.c
+ *
+ *  An implementation of the DCCP protocol
+ *  Arnaldo Carvalho de Melo <acme@conectiva.com.br>
+ *
+ *     This program is free software; you can redistribute it and/or
+ *     modify it under the terms of the GNU General Public License
+ *     as published by the Free Software Foundation; either version
+ *     2 of the License, or (at your option) any later version.
+ */
+
+#include <linux/config.h>
+#include <linux/dccp.h>
+#include <linux/skbuff.h>
+#include <linux/timer.h>
+
+#include <net/sock.h>
+#include <net/xfrm.h>
+#include <net/inet_timewait_sock.h>
+
+#include "ccid.h"
+#include "dccp.h"
+
+struct inet_timewait_death_row dccp_death_row = {
+       .sysctl_max_tw_buckets = NR_FILE * 2,
+       .period         = DCCP_TIMEWAIT_LEN / INET_TWDR_TWKILL_SLOTS,
+       .death_lock     = SPIN_LOCK_UNLOCKED,
+       .hashinfo       = &dccp_hashinfo,
+       .tw_timer       = TIMER_INITIALIZER(inet_twdr_hangman, 0,
+                                           (unsigned long)&dccp_death_row),
+       .twkill_work    = __WORK_INITIALIZER(dccp_death_row.twkill_work,
+                                            inet_twdr_twkill_work,
+                                            &dccp_death_row),
+/* Short-time timewait calendar */
+
+       .twcal_hand     = -1,
+       .twcal_timer    = TIMER_INITIALIZER(inet_twdr_twcal_tick, 0,
+                                           (unsigned long)&dccp_death_row),
+};
+
+void dccp_time_wait(struct sock *sk, int state, int timeo)
+{
+       struct inet_timewait_sock *tw = NULL;
+
+       if (dccp_death_row.tw_count < dccp_death_row.sysctl_max_tw_buckets)
+               tw = inet_twsk_alloc(sk, state);
+
+       if (tw != NULL) {
+               const struct inet_connection_sock *icsk = inet_csk(sk);
+               const int rto = (icsk->icsk_rto << 2) - (icsk->icsk_rto >> 1);
+
+               /* Linkage updates. */
+               __inet_twsk_hashdance(tw, sk, &dccp_hashinfo);
+
+               /* Get the TIME_WAIT timeout firing. */
+               if (timeo < rto)
+                       timeo = rto;
+
+               tw->tw_timeout = DCCP_TIMEWAIT_LEN;
+               if (state == DCCP_TIME_WAIT)
+                       timeo = DCCP_TIMEWAIT_LEN;
+
+               inet_twsk_schedule(tw, &dccp_death_row, timeo,
+                                  DCCP_TIMEWAIT_LEN);
+               inet_twsk_put(tw);
+       } else {
+               /* Sorry, if we're out of memory, just CLOSE this
+                * socket up.  We've got bigger problems than
+                * non-graceful socket closings.
+                */
+               LIMIT_NETDEBUG(KERN_INFO "DCCP: time wait bucket "
+                                        "table overflow\n");
+       }
+
+       dccp_done(sk);
+}
+
+struct sock *dccp_create_openreq_child(struct sock *sk,
+                                      const struct request_sock *req,
+                                      const struct sk_buff *skb)
+{
+       /*
+        * Step 3: Process LISTEN state
+        *
+        * // Generate a new socket and switch to that socket
+        * Set S := new socket for this port pair
+        */
+       struct sock *newsk = inet_csk_clone(sk, req, GFP_ATOMIC);
+
+       if (newsk != NULL) {
+               const struct dccp_request_sock *dreq = dccp_rsk(req);
+               struct inet_connection_sock *newicsk = inet_csk(sk);
+               struct dccp_sock *newdp = dccp_sk(newsk);
+
+               newdp->dccps_hc_rx_ackpkts = NULL;
+               newdp->dccps_role = DCCP_ROLE_SERVER;
+               newicsk->icsk_rto = DCCP_TIMEOUT_INIT;
+
+               if (newdp->dccps_options.dccpo_send_ack_vector) {
+                       newdp->dccps_hc_rx_ackpkts =
+                               dccp_ackpkts_alloc(DCCP_MAX_ACK_VECTOR_LEN,
+                                                  GFP_ATOMIC);
+                       /*
+                        * XXX: We're using the same CCIDs set on the parent,
+                        * i.e. sk_clone copied the master sock and left the
+                        * CCID pointers for this child, that is why we do the
+                        * __ccid_get calls.
+                        */
+                       if (unlikely(newdp->dccps_hc_rx_ackpkts == NULL))
+                               goto out_free;
+               }
+
+               if (unlikely(ccid_hc_rx_init(newdp->dccps_hc_rx_ccid,
+                                            newsk) != 0 ||
+                            ccid_hc_tx_init(newdp->dccps_hc_tx_ccid,
+                                            newsk) != 0)) {
+                       dccp_ackpkts_free(newdp->dccps_hc_rx_ackpkts);
+                       ccid_hc_rx_exit(newdp->dccps_hc_rx_ccid, newsk);
+                       ccid_hc_tx_exit(newdp->dccps_hc_tx_ccid, newsk);
+out_free:
+                       /* It is still raw copy of parent, so invalidate
+                        * destructor and make plain sk_free() */
+                       newsk->sk_destruct = NULL;
+                       sk_free(newsk);
+                       return NULL;
+               }
+
+               __ccid_get(newdp->dccps_hc_rx_ccid);
+               __ccid_get(newdp->dccps_hc_tx_ccid);
+
+               /*
+                * Step 3: Process LISTEN state
+                *
+                *      Choose S.ISS (initial seqno) or set from Init Cookie
+                *      Set S.ISR, S.GSR, S.SWL, S.SWH from packet or Init
+                *      Cookie
+                */
+
+               /* See dccp_v4_conn_request */
+               newdp->dccps_options.dccpo_sequence_window = req->rcv_wnd;
+
+               newdp->dccps_gar = newdp->dccps_isr = dreq->dreq_isr;
+               dccp_update_gsr(newsk, dreq->dreq_isr);
+
+               newdp->dccps_iss = dreq->dreq_iss;
+               dccp_update_gss(newsk, dreq->dreq_iss);
+
+               /*
+                * SWL and AWL are initially adjusted so that they are not less than
+                * the initial Sequence Numbers received and sent, respectively:
+                *      SWL := max(GSR + 1 - floor(W/4), ISR),
+                *      AWL := max(GSS - W' + 1, ISS).
+                * These adjustments MUST be applied only at the beginning of the
+                * connection.
+                */
+               dccp_set_seqno(&newdp->dccps_swl,
+                              max48(newdp->dccps_swl, newdp->dccps_isr));
+               dccp_set_seqno(&newdp->dccps_awl,
+                              max48(newdp->dccps_awl, newdp->dccps_iss));
+
+               dccp_init_xmit_timers(newsk);
+
+               DCCP_INC_STATS_BH(DCCP_MIB_PASSIVEOPENS);
+       }
+       return newsk;
+}
+
+/* 
+ * Process an incoming packet for RESPOND sockets represented
+ * as an request_sock.
+ */
+struct sock *dccp_check_req(struct sock *sk, struct sk_buff *skb,
+                           struct request_sock *req,
+                           struct request_sock **prev)
+{
+       struct sock *child = NULL;
+
+       /* Check for retransmitted REQUEST */
+       if (dccp_hdr(skb)->dccph_type == DCCP_PKT_REQUEST) {
+               if (after48(DCCP_SKB_CB(skb)->dccpd_seq,
+                           dccp_rsk(req)->dreq_isr)) {
+                       struct dccp_request_sock *dreq = dccp_rsk(req);
+
+                       dccp_pr_debug("Retransmitted REQUEST\n");
+                       /* Send another RESPONSE packet */
+                       dccp_set_seqno(&dreq->dreq_iss, dreq->dreq_iss + 1);
+                       dccp_set_seqno(&dreq->dreq_isr,
+                                      DCCP_SKB_CB(skb)->dccpd_seq);
+                       req->rsk_ops->rtx_syn_ack(sk, req, NULL);
+               }
+               /* Network Duplicate, discard packet */
+               return NULL;
+       }
+
+       DCCP_SKB_CB(skb)->dccpd_reset_code = DCCP_RESET_CODE_PACKET_ERROR;
+
+       if (dccp_hdr(skb)->dccph_type != DCCP_PKT_ACK &&
+           dccp_hdr(skb)->dccph_type != DCCP_PKT_DATAACK)
+               goto drop;
+
+       /* Invalid ACK */
+       if (DCCP_SKB_CB(skb)->dccpd_ack_seq != dccp_rsk(req)->dreq_iss) {
+               dccp_pr_debug("Invalid ACK number: ack_seq=%llu, "
+                             "dreq_iss=%llu\n",
+                             (unsigned long long)
+                             DCCP_SKB_CB(skb)->dccpd_ack_seq,
+                             (unsigned long long)
+                             dccp_rsk(req)->dreq_iss);
+               goto drop;
+       }
+
+       child = dccp_v4_request_recv_sock(sk, skb, req, NULL);
+       if (child == NULL)
+               goto listen_overflow;
+
+       /* FIXME: deal with options */
+
+       inet_csk_reqsk_queue_unlink(sk, req, prev);
+       inet_csk_reqsk_queue_removed(sk, req);
+       inet_csk_reqsk_queue_add(sk, req, child);
+out:
+       return child;
+listen_overflow:
+       dccp_pr_debug("listen_overflow!\n");
+       DCCP_SKB_CB(skb)->dccpd_reset_code = DCCP_RESET_CODE_TOO_BUSY;
+drop:
+       if (dccp_hdr(skb)->dccph_type != DCCP_PKT_RESET)
+               req->rsk_ops->send_reset(skb);
+
+       inet_csk_reqsk_queue_drop(sk, req, prev);
+       goto out;
+}
+
+/*
+ *  Queue segment on the new socket if the new socket is active,
+ *  otherwise we just shortcircuit this and continue with
+ *  the new socket.
+ */
+int dccp_child_process(struct sock *parent, struct sock *child,
+                      struct sk_buff *skb)
+{
+       int ret = 0;
+       const int state = child->sk_state;
+
+       if (!sock_owned_by_user(child)) {
+               ret = dccp_rcv_state_process(child, skb, dccp_hdr(skb),
+                                            skb->len);
+
+               /* Wakeup parent, send SIGIO */
+               if (state == DCCP_RESPOND && child->sk_state != state)
+                       parent->sk_data_ready(parent, 0);
+       } else {
+               /* Alas, it is possible again, because we do lookup
+                * in main socket hash table and lock on listening
+                * socket does not protect us more.
+                */
+               sk_add_backlog(child, skb);
+       }
+
+       bh_unlock_sock(child);
+       sock_put(child);
+       return ret;
+}
diff --git a/net/dccp/options.c b/net/dccp/options.c

new file mode 100644 (file)

index 0000000..382c589
--- /dev/null
+++ b/net/dccp/options.c
@@ -0,0 +1,855 @@
+/*
+ *  net/dccp/options.c
+ *
+ *  An implementation of the DCCP protocol
+ *  Copyright (c) 2005 Aristeu Sergio Rozanski Filho <aris@cathedrallabs.org>
+ *  Copyright (c) 2005 Arnaldo Carvalho de Melo <acme@ghostprotocols.net>
+ *  Copyright (c) 2005 Ian McDonald <iam4@cs.waikato.ac.nz>
+ *
+ *      This program is free software; you can redistribute it and/or
+ *      modify it under the terms of the GNU General Public License
+ *      as published by the Free Software Foundation; either version
+ *      2 of the License, or (at your option) any later version.
+ */
+#include <linux/config.h>
+#include <linux/dccp.h>
+#include <linux/module.h>
+#include <linux/types.h>
+#include <linux/kernel.h>
+#include <linux/skbuff.h>
+
+#include "ccid.h"
+#include "dccp.h"
+
+static void dccp_ackpkts_check_rcv_ackvector(struct dccp_ackpkts *ap,
+                                            struct sock *sk,
+                                            const u64 ackno,
+                                            const unsigned char len,
+                                            const unsigned char *vector);
+
+/* stores the default values for new connection. may be changed with sysctl */
+static const struct dccp_options dccpo_default_values = {
+       .dccpo_sequence_window    = DCCPF_INITIAL_SEQUENCE_WINDOW,
+       .dccpo_ccid               = DCCPF_INITIAL_CCID,
+       .dccpo_send_ack_vector    = DCCPF_INITIAL_SEND_ACK_VECTOR,
+       .dccpo_send_ndp_count     = DCCPF_INITIAL_SEND_NDP_COUNT,
+};
+
+void dccp_options_init(struct dccp_options *dccpo)
+{
+       memcpy(dccpo, &dccpo_default_values, sizeof(*dccpo));
+}
+
+static u32 dccp_decode_value_var(const unsigned char *bf, const u8 len)
+{
+       u32 value = 0;
+
+       if (len > 3)
+               value += *bf++ << 24;
+       if (len > 2)
+               value += *bf++ << 16;
+       if (len > 1)
+               value += *bf++ << 8;
+       if (len > 0)
+               value += *bf;
+
+       return value;
+}
+
+int dccp_parse_options(struct sock *sk, struct sk_buff *skb)
+{
+       struct dccp_sock *dp = dccp_sk(sk);
+#ifdef CONFIG_IP_DCCP_DEBUG
+       const char *debug_prefix = dp->dccps_role == DCCP_ROLE_CLIENT ?
+                                       "CLIENT rx opt: " : "server rx opt: ";
+#endif
+       const struct dccp_hdr *dh = dccp_hdr(skb);
+       const u8 pkt_type = DCCP_SKB_CB(skb)->dccpd_type;
+       unsigned char *options = (unsigned char *)dh + dccp_hdr_len(skb);
+       unsigned char *opt_ptr = options;
+       const unsigned char *opt_end = (unsigned char *)dh +
+                                       (dh->dccph_doff * 4);
+       struct dccp_options_received *opt_recv = &dp->dccps_options_received;
+       unsigned char opt, len;
+       unsigned char *value;
+
+       memset(opt_recv, 0, sizeof(*opt_recv));
+
+       while (opt_ptr != opt_end) {
+               opt   = *opt_ptr++;
+               len   = 0;
+               value = NULL;
+
+               /* Check if this isn't a single byte option */
+               if (opt > DCCPO_MAX_RESERVED) {
+                       if (opt_ptr == opt_end)
+                               goto out_invalid_option;
+
+                       len = *opt_ptr++;
+                       if (len < 3)
+                               goto out_invalid_option;
+                       /*
+                        * Remove the type and len fields, leaving
+                        * just the value size
+                        */
+                       len     -= 2;
+                       value   = opt_ptr;
+                       opt_ptr += len;
+
+                       if (opt_ptr > opt_end)
+                               goto out_invalid_option;
+               }
+
+               switch (opt) {
+               case DCCPO_PADDING:
+                       break;
+               case DCCPO_NDP_COUNT:
+                       if (len > 3)
+                               goto out_invalid_option;
+
+                       opt_recv->dccpor_ndp = dccp_decode_value_var(value, len);
+                       dccp_pr_debug("%sNDP count=%d\n", debug_prefix,
+                                     opt_recv->dccpor_ndp);
+                       break;
+               case DCCPO_ACK_VECTOR_0:
+                       if (len > DCCP_MAX_ACK_VECTOR_LEN)
+                               goto out_invalid_option;
+
+                       if (pkt_type == DCCP_PKT_DATA)
+                               continue;
+
+                       opt_recv->dccpor_ack_vector_len = len;
+                       opt_recv->dccpor_ack_vector_idx = value - options;
+
+                       dccp_pr_debug("%sACK vector 0, len=%d, ack_ackno=%llu\n",
+                                     debug_prefix, len,
+                                     (unsigned long long)
+                                     DCCP_SKB_CB(skb)->dccpd_ack_seq);
+                       dccp_ackvector_print(DCCP_SKB_CB(skb)->dccpd_ack_seq,
+                                            value, len);
+                       dccp_ackpkts_check_rcv_ackvector(dp->dccps_hc_rx_ackpkts,
+                                                        sk,
+                                                DCCP_SKB_CB(skb)->dccpd_ack_seq,
+                                                        len, value);
+                       break;
+               case DCCPO_TIMESTAMP:
+                       if (len != 4)
+                               goto out_invalid_option;
+
+                       opt_recv->dccpor_timestamp = ntohl(*(u32 *)value);
+
+                       dp->dccps_timestamp_echo = opt_recv->dccpor_timestamp;
+                       do_gettimeofday(&dp->dccps_timestamp_time);
+
+                       dccp_pr_debug("%sTIMESTAMP=%u, ackno=%llu\n",
+                                     debug_prefix, opt_recv->dccpor_timestamp,
+                                     (unsigned long long)
+                                     DCCP_SKB_CB(skb)->dccpd_ack_seq);
+                       break;
+               case DCCPO_TIMESTAMP_ECHO:
+                       if (len != 4 && len != 6 && len != 8)
+                               goto out_invalid_option;
+
+                       opt_recv->dccpor_timestamp_echo = ntohl(*(u32 *)value);
+
+                       dccp_pr_debug("%sTIMESTAMP_ECHO=%u, len=%d, ackno=%llu, ",
+                                     debug_prefix,
+                                     opt_recv->dccpor_timestamp_echo,
+                                     len + 2,
+                                     (unsigned long long)
+                                     DCCP_SKB_CB(skb)->dccpd_ack_seq);
+
+                       if (len > 4) {
+                               if (len == 6)
+                                       opt_recv->dccpor_elapsed_time =
+                                                ntohs(*(u16 *)(value + 4));
+                               else
+                                       opt_recv->dccpor_elapsed_time =
+                                                ntohl(*(u32 *)(value + 4));
+
+                               dccp_pr_debug("%sTIMESTAMP_ECHO ELAPSED_TIME=%d\n",
+                                     debug_prefix,
+                                     opt_recv->dccpor_elapsed_time);
+                       }
+                       break;
+               case DCCPO_ELAPSED_TIME:
+                       if (len != 2 && len != 4)
+                               goto out_invalid_option;
+
+                       if (pkt_type == DCCP_PKT_DATA)
+                               continue;
+
+                       if (len == 2)
+                               opt_recv->dccpor_elapsed_time =
+                                                       ntohs(*(u16 *)value);
+                       else
+                               opt_recv->dccpor_elapsed_time =
+                                                       ntohl(*(u32 *)value);
+
+                       dccp_pr_debug("%sELAPSED_TIME=%d\n", debug_prefix,
+                                     opt_recv->dccpor_elapsed_time);
+                       break;
+                       /*
+                        * From draft-ietf-dccp-spec-11.txt:
+                        *
+                        *      Option numbers 128 through 191 are for
+                        *      options sent from the HC-Sender to the
+                        *      HC-Receiver; option numbers 192 through 255
+                        *      are for options sent from the HC-Receiver to
+                        *      the HC-Sender.
+                        */
+               case 128 ... 191: {
+                       const u16 idx = value - options;
+
+                       if (ccid_hc_rx_parse_options(dp->dccps_hc_rx_ccid, sk,
+                                                    opt, len, idx,
+                                                    value) != 0)
+                               goto out_invalid_option;
+               }
+                       break;
+               case 192 ... 255: {
+                       const u16 idx = value - options;
+
+                       if (ccid_hc_tx_parse_options(dp->dccps_hc_tx_ccid, sk,
+                                                    opt, len, idx,
+                                                    value) != 0)
+                               goto out_invalid_option;
+               }
+                       break;
+               default:
+                       pr_info("DCCP(%p): option %d(len=%d) not "
+                               "implemented, ignoring\n",
+                               sk, opt, len);
+                       break;
+               }
+       }
+
+       return 0;
+
+out_invalid_option:
+       DCCP_INC_STATS_BH(DCCP_MIB_INVALIDOPT);
+       DCCP_SKB_CB(skb)->dccpd_reset_code = DCCP_RESET_CODE_OPTION_ERROR;
+       pr_info("DCCP(%p): invalid option %d, len=%d\n", sk, opt, len);
+       return -1;
+}
+
+static void dccp_encode_value_var(const u32 value, unsigned char *to,
+                                 const unsigned int len)
+{
+       if (len > 3)
+               *to++ = (value & 0xFF000000) >> 24;
+       if (len > 2)
+               *to++ = (value & 0xFF0000) >> 16;
+       if (len > 1)
+               *to++ = (value & 0xFF00) >> 8;
+       if (len > 0)
+               *to++ = (value & 0xFF);
+}
+
+static inline int dccp_ndp_len(const int ndp)
+{
+       return likely(ndp <= 0xFF) ? 1 : ndp <= 0xFFFF ? 2 : 3;
+}
+
+void dccp_insert_option(struct sock *sk, struct sk_buff *skb,
+                       const unsigned char option,
+                       const void *value, const unsigned char len)
+{
+       unsigned char *to;
+
+       if (DCCP_SKB_CB(skb)->dccpd_opt_len + len + 2 > DCCP_MAX_OPT_LEN) {
+               LIMIT_NETDEBUG(KERN_INFO "DCCP: packet too small to insert "
+                              "%d option!\n", option);
+               return;
+       }
+
+       DCCP_SKB_CB(skb)->dccpd_opt_len += len + 2;
+
+       to    = skb_push(skb, len + 2);
+       *to++ = option;
+       *to++ = len + 2;
+
+       memcpy(to, value, len);
+}
+
+EXPORT_SYMBOL_GPL(dccp_insert_option);
+
+static void dccp_insert_option_ndp(struct sock *sk, struct sk_buff *skb)
+{
+       struct dccp_sock *dp = dccp_sk(sk);
+       int ndp = dp->dccps_ndp_count;
+
+       if (dccp_non_data_packet(skb))
+               ++dp->dccps_ndp_count;
+       else
+               dp->dccps_ndp_count = 0;
+
+       if (ndp > 0) {
+               unsigned char *ptr;
+               const int ndp_len = dccp_ndp_len(ndp);
+               const int len = ndp_len + 2;
+
+               if (DCCP_SKB_CB(skb)->dccpd_opt_len + len > DCCP_MAX_OPT_LEN)
+                       return;
+
+               DCCP_SKB_CB(skb)->dccpd_opt_len += len;
+
+               ptr = skb_push(skb, len);
+               *ptr++ = DCCPO_NDP_COUNT;
+               *ptr++ = len;
+               dccp_encode_value_var(ndp, ptr, ndp_len);
+       }
+}
+
+static inline int dccp_elapsed_time_len(const u32 elapsed_time)
+{
+       return elapsed_time == 0 ? 0 : elapsed_time <= 0xFFFF ? 2 : 4;
+}
+
+void dccp_insert_option_elapsed_time(struct sock *sk,
+                                    struct sk_buff *skb,
+                                    u32 elapsed_time)
+{
+#ifdef CONFIG_IP_DCCP_DEBUG
+       struct dccp_sock *dp = dccp_sk(sk);
+       const char *debug_prefix = dp->dccps_role == DCCP_ROLE_CLIENT ?
+                                       "CLIENT TX opt: " : "server TX opt: ";
+#endif
+       const int elapsed_time_len = dccp_elapsed_time_len(elapsed_time);
+       const int len = 2 + elapsed_time_len;
+       unsigned char *to;
+
+       if (elapsed_time_len == 0)
+               return;
+
+       if (DCCP_SKB_CB(skb)->dccpd_opt_len + len > DCCP_MAX_OPT_LEN) {
+               LIMIT_NETDEBUG(KERN_INFO "DCCP: packet too small to "
+                                        "insert elapsed time!\n");
+               return;
+       }
+
+       DCCP_SKB_CB(skb)->dccpd_opt_len += len;
+
+       to    = skb_push(skb, len);
+       *to++ = DCCPO_ELAPSED_TIME;
+       *to++ = len;
+
+       if (elapsed_time_len == 2) {
+               const u16 var16 = htons((u16)elapsed_time);
+               memcpy(to, &var16, 2);
+       } else {
+               const u32 var32 = htonl(elapsed_time);
+               memcpy(to, &var32, 4);
+       }
+
+       dccp_pr_debug("%sELAPSED_TIME=%u, len=%d, seqno=%llu\n",
+                     debug_prefix, elapsed_time,
+                     len,
+                     (unsigned long long) DCCP_SKB_CB(skb)->dccpd_seq);
+}
+
+EXPORT_SYMBOL_GPL(dccp_insert_option_elapsed_time);
+
+static void dccp_insert_option_ack_vector(struct sock *sk, struct sk_buff *skb)
+{
+       struct dccp_sock *dp = dccp_sk(sk);
+#ifdef CONFIG_IP_DCCP_DEBUG
+       const char *debug_prefix = dp->dccps_role == DCCP_ROLE_CLIENT ?
+                                       "CLIENT TX opt: " : "server TX opt: ";
+#endif
+       struct dccp_ackpkts *ap = dp->dccps_hc_rx_ackpkts;
+       int len = ap->dccpap_buf_vector_len + 2;
+       const u32 elapsed_time = timeval_now_delta(&ap->dccpap_time) / 10;
+       unsigned char *to, *from;
+
+       if (elapsed_time != 0)
+               dccp_insert_option_elapsed_time(sk, skb, elapsed_time);
+
+       if (DCCP_SKB_CB(skb)->dccpd_opt_len + len > DCCP_MAX_OPT_LEN) {
+               LIMIT_NETDEBUG(KERN_INFO "DCCP: packet too small to "
+                                        "insert ACK Vector!\n");
+               return;
+       }
+
+       /*
+        * XXX: now we have just one ack vector sent record, so
+        * we have to wait for it to be cleared.
+        *
+        * Of course this is not acceptable, but this is just for
+        * basic testing now.
+        */
+       if (ap->dccpap_ack_seqno != DCCP_MAX_SEQNO + 1)
+               return;
+
+       DCCP_SKB_CB(skb)->dccpd_opt_len += len;
+
+       to    = skb_push(skb, len);
+       *to++ = DCCPO_ACK_VECTOR_0;
+       *to++ = len;
+
+       len  = ap->dccpap_buf_vector_len;
+       from = ap->dccpap_buf + ap->dccpap_buf_head;
+
+       /* Check if buf_head wraps */
+       if (ap->dccpap_buf_head + len > ap->dccpap_buf_len) {
+               const unsigned int tailsize = (ap->dccpap_buf_len -
+                                              ap->dccpap_buf_head);
+
+               memcpy(to, from, tailsize);
+               to   += tailsize;
+               len  -= tailsize;
+               from = ap->dccpap_buf;
+       }
+
+       memcpy(to, from, len);
+       /*
+        *      From draft-ietf-dccp-spec-11.txt:
+        *
+        *      For each acknowledgement it sends, the HC-Receiver will add an
+        *      acknowledgement record.  ack_seqno will equal the HC-Receiver
+        *      sequence number it used for the ack packet; ack_ptr will equal
+        *      buf_head; ack_ackno will equal buf_ackno; and ack_nonce will
+        *      equal buf_nonce.
+        *
+        * This implemention uses just one ack record for now.
+        */
+       ap->dccpap_ack_seqno      = DCCP_SKB_CB(skb)->dccpd_seq;
+       ap->dccpap_ack_ptr        = ap->dccpap_buf_head;
+       ap->dccpap_ack_ackno      = ap->dccpap_buf_ackno;
+       ap->dccpap_ack_nonce      = ap->dccpap_buf_nonce;
+       ap->dccpap_ack_vector_len = ap->dccpap_buf_vector_len;
+
+       dccp_pr_debug("%sACK Vector 0, len=%d, ack_seqno=%llu, "
+                     "ack_ackno=%llu\n",
+                     debug_prefix, ap->dccpap_ack_vector_len,
+                     (unsigned long long) ap->dccpap_ack_seqno,
+                     (unsigned long long) ap->dccpap_ack_ackno);
+}
+
+void dccp_insert_option_timestamp(struct sock *sk, struct sk_buff *skb)
+{
+       struct timeval tv;
+       u32 now;
+       
+       do_gettimeofday(&tv);
+       now = (tv.tv_sec * USEC_PER_SEC + tv.tv_usec) / 10;
+       /* yes this will overflow but that is the point as we want a
+        * 10 usec 32 bit timer which mean it wraps every 11.9 hours */
+
+       now = htonl(now);
+       dccp_insert_option(sk, skb, DCCPO_TIMESTAMP, &now, sizeof(now));
+}
+
+EXPORT_SYMBOL_GPL(dccp_insert_option_timestamp);
+
+static void dccp_insert_option_timestamp_echo(struct sock *sk,
+                                             struct sk_buff *skb)
+{
+       struct dccp_sock *dp = dccp_sk(sk);
+#ifdef CONFIG_IP_DCCP_DEBUG
+       const char *debug_prefix = dp->dccps_role == DCCP_ROLE_CLIENT ?
+                                       "CLIENT TX opt: " : "server TX opt: ";
+#endif
+       u32 tstamp_echo;
+       const u32 elapsed_time =
+                       timeval_now_delta(&dp->dccps_timestamp_time) / 10;
+       const int elapsed_time_len = dccp_elapsed_time_len(elapsed_time);
+       const int len = 6 + elapsed_time_len;
+       unsigned char *to;
+
+       if (DCCP_SKB_CB(skb)->dccpd_opt_len + len > DCCP_MAX_OPT_LEN) {
+               LIMIT_NETDEBUG(KERN_INFO "DCCP: packet too small to insert "
+                                        "timestamp echo!\n");
+               return;
+       }
+
+       DCCP_SKB_CB(skb)->dccpd_opt_len += len;
+
+       to    = skb_push(skb, len);
+       *to++ = DCCPO_TIMESTAMP_ECHO;
+       *to++ = len;
+
+       tstamp_echo = htonl(dp->dccps_timestamp_echo);
+       memcpy(to, &tstamp_echo, 4);
+       to += 4;
+       
+       if (elapsed_time_len == 2) {
+               const u16 var16 = htons((u16)elapsed_time);
+               memcpy(to, &var16, 2);
+       } else if (elapsed_time_len == 4) {
+               const u32 var32 = htonl(elapsed_time);
+               memcpy(to, &var32, 4);
+       }
+
+       dccp_pr_debug("%sTIMESTAMP_ECHO=%u, len=%d, seqno=%llu\n",
+                     debug_prefix, dp->dccps_timestamp_echo,
+                     len,
+                     (unsigned long long) DCCP_SKB_CB(skb)->dccpd_seq);
+
+       dp->dccps_timestamp_echo = 0;
+       dp->dccps_timestamp_time.tv_sec = 0;
+       dp->dccps_timestamp_time.tv_usec = 0;
+}
+
+void dccp_insert_options(struct sock *sk, struct sk_buff *skb)
+{
+       struct dccp_sock *dp = dccp_sk(sk);
+
+       DCCP_SKB_CB(skb)->dccpd_opt_len = 0;
+
+       if (dp->dccps_options.dccpo_send_ndp_count)
+               dccp_insert_option_ndp(sk, skb);
+
+       if (!dccp_packet_without_ack(skb)) {
+               if (dp->dccps_options.dccpo_send_ack_vector &&
+                   (dp->dccps_hc_rx_ackpkts->dccpap_buf_ackno !=
+                    DCCP_MAX_SEQNO + 1))
+                       dccp_insert_option_ack_vector(sk, skb);
+
+               if (dp->dccps_timestamp_echo != 0)
+                       dccp_insert_option_timestamp_echo(sk, skb);
+       }
+
+       ccid_hc_rx_insert_options(dp->dccps_hc_rx_ccid, sk, skb);
+       ccid_hc_tx_insert_options(dp->dccps_hc_tx_ccid, sk, skb);
+
+       /* XXX: insert other options when appropriate */
+
+       if (DCCP_SKB_CB(skb)->dccpd_opt_len != 0) {
+               /* The length of all options has to be a multiple of 4 */
+               int padding = DCCP_SKB_CB(skb)->dccpd_opt_len % 4;
+
+               if (padding != 0) {
+                       padding = 4 - padding;
+                       memset(skb_push(skb, padding), 0, padding);
+                       DCCP_SKB_CB(skb)->dccpd_opt_len += padding;
+               }
+       }
+}
+
+struct dccp_ackpkts *dccp_ackpkts_alloc(const unsigned int len,
+                                       const unsigned int __nocast priority)
+{
+       struct dccp_ackpkts *ap = kmalloc(sizeof(*ap) + len, priority);
+
+       if (ap != NULL) {
+#ifdef CONFIG_IP_DCCP_DEBUG
+               memset(ap->dccpap_buf, 0xFF, len);
+#endif
+               ap->dccpap_buf_len   = len;
+               ap->dccpap_buf_head  =
+                       ap->dccpap_buf_tail =
+                               ap->dccpap_buf_len - 1;
+               ap->dccpap_buf_ackno =
+                       ap->dccpap_ack_ackno =
+                               ap->dccpap_ack_seqno = DCCP_MAX_SEQNO + 1;
+               ap->dccpap_buf_nonce = ap->dccpap_buf_nonce = 0;
+               ap->dccpap_ack_ptr   = 0;
+               ap->dccpap_time.tv_sec = 0;
+               ap->dccpap_time.tv_usec = 0;
+               ap->dccpap_buf_vector_len = ap->dccpap_ack_vector_len = 0;
+       }
+
+       return ap;
+}
+
+void dccp_ackpkts_free(struct dccp_ackpkts *ap)
+{
+       if (ap != NULL) {
+#ifdef CONFIG_IP_DCCP_DEBUG
+               memset(ap, 0xFF, sizeof(*ap) + ap->dccpap_buf_len);
+#endif
+               kfree(ap);
+       }
+}
+
+static inline u8 dccp_ackpkts_state(const struct dccp_ackpkts *ap,
+                                   const unsigned int index)
+{
+       return ap->dccpap_buf[index] & DCCP_ACKPKTS_STATE_MASK;
+}
+
+static inline u8 dccp_ackpkts_len(const struct dccp_ackpkts *ap,
+                                 const unsigned int index)
+{
+       return ap->dccpap_buf[index] & DCCP_ACKPKTS_LEN_MASK;
+}
+
+/*
+ * If several packets are missing, the HC-Receiver may prefer to enter multiple
+ * bytes with run length 0, rather than a single byte with a larger run length;
+ * this simplifies table updates if one of the missing packets arrives.
+ */
+static inline int dccp_ackpkts_set_buf_head_state(struct dccp_ackpkts *ap,
+                                                 const unsigned int packets,
+                                                 const unsigned char state)
+{
+       unsigned int gap;
+       signed long new_head;
+
+       if (ap->dccpap_buf_vector_len + packets > ap->dccpap_buf_len)
+               return -ENOBUFS;
+
+       gap      = packets - 1;
+       new_head = ap->dccpap_buf_head - packets;
+
+       if (new_head < 0) {
+               if (gap > 0) {
+                       memset(ap->dccpap_buf, DCCP_ACKPKTS_STATE_NOT_RECEIVED,
+                              gap + new_head + 1);
+                       gap = -new_head;
+               }
+               new_head += ap->dccpap_buf_len;
+       } 
+
+       ap->dccpap_buf_head = new_head;
+
+       if (gap > 0)
+               memset(ap->dccpap_buf + ap->dccpap_buf_head + 1,
+                      DCCP_ACKPKTS_STATE_NOT_RECEIVED, gap);
+
+       ap->dccpap_buf[ap->dccpap_buf_head] = state;
+       ap->dccpap_buf_vector_len += packets;
+       return 0;
+}
+
+/*
+ * Implements the draft-ietf-dccp-spec-11.txt Appendix A
+ */
+int dccp_ackpkts_add(struct dccp_ackpkts *ap, u64 ackno, u8 state)
+{
+       /*
+        * Check at the right places if the buffer is full, if it is, tell the
+        * caller to start dropping packets till the HC-Sender acks our ACK
+        * vectors, when we will free up space in dccpap_buf.
+        *
+        * We may well decide to do buffer compression, etc, but for now lets
+        * just drop.
+        *
+        * From Appendix A:
+        *
+        *      Of course, the circular buffer may overflow, either when the
+        *      HC-Sender is sending data at a very high rate, when the
+        *      HC-Receiver's acknowledgements are not reaching the HC-Sender,
+        *      or when the HC-Sender is forgetting to acknowledge those acks
+        *      (so the HC-Receiver is unable to clean up old state). In this
+        *      case, the HC-Receiver should either compress the buffer (by
+        *      increasing run lengths when possible), transfer its state to
+        *      a larger buffer, or, as a last resort, drop all received
+        *      packets, without processing them whatsoever, until its buffer
+        *      shrinks again.
+        */
+
+       /* See if this is the first ackno being inserted */
+       if (ap->dccpap_buf_vector_len == 0) {
+               ap->dccpap_buf[ap->dccpap_buf_head] = state;
+               ap->dccpap_buf_vector_len = 1;
+       } else if (after48(ackno, ap->dccpap_buf_ackno)) {
+               const u64 delta = dccp_delta_seqno(ap->dccpap_buf_ackno,
+                                                  ackno);
+
+               /*
+                * Look if the state of this packet is the same as the
+                * previous ackno and if so if we can bump the head len.
+                */
+               if (delta == 1 &&
+                   dccp_ackpkts_state(ap, ap->dccpap_buf_head) == state &&
+                   (dccp_ackpkts_len(ap, ap->dccpap_buf_head) <
+                    DCCP_ACKPKTS_LEN_MASK))
+                       ap->dccpap_buf[ap->dccpap_buf_head]++;
+               else if (dccp_ackpkts_set_buf_head_state(ap, delta, state))
+                       return -ENOBUFS;
+       } else {
+               /*
+                * A.1.2.  Old Packets
+                *
+                *      When a packet with Sequence Number S arrives, and
+                *      S <= buf_ackno, the HC-Receiver will scan the table
+                *      for the byte corresponding to S. (Indexing structures
+                *      could reduce the complexity of this scan.)
+                */
+               u64 delta = dccp_delta_seqno(ackno, ap->dccpap_buf_ackno);
+               unsigned int index = ap->dccpap_buf_head;
+
+               while (1) {
+                       const u8 len = dccp_ackpkts_len(ap, index);
+                       const u8 state = dccp_ackpkts_state(ap, index);
+                       /*
+                        * valid packets not yet in dccpap_buf have a reserved
+                        * entry, with a len equal to 0.
+                        */
+                       if (state == DCCP_ACKPKTS_STATE_NOT_RECEIVED &&
+                           len == 0 && delta == 0) { /* Found our
+                                                        reserved seat! */
+                               dccp_pr_debug("Found %llu reserved seat!\n",
+                                             (unsigned long long) ackno);
+                               ap->dccpap_buf[index] = state;
+                               goto out;
+                       }
+                       /* len == 0 means one packet */
+                       if (delta < len + 1)
+                               goto out_duplicate;
+
+                       delta -= len + 1;
+                       if (++index == ap->dccpap_buf_len)
+                               index = 0;
+               }
+       }
+
+       ap->dccpap_buf_ackno = ackno;
+       do_gettimeofday(&ap->dccpap_time);
+out:
+       dccp_pr_debug("");
+       dccp_ackpkts_print(ap);
+       return 0;
+
+out_duplicate:
+       /* Duplicate packet */
+       dccp_pr_debug("Received a dup or already considered lost "
+                     "packet: %llu\n", (unsigned long long) ackno);
+       return -EILSEQ;
+}
+
+#ifdef CONFIG_IP_DCCP_DEBUG
+void dccp_ackvector_print(const u64 ackno, const unsigned char *vector,
+                         int len)
+{
+       if (!dccp_debug)
+               return;
+
+       printk("ACK vector len=%d, ackno=%llu |", len,
+              (unsigned long long) ackno);
+
+       while (len--) {
+               const u8 state = (*vector & DCCP_ACKPKTS_STATE_MASK) >> 6;
+               const u8 rl = (*vector & DCCP_ACKPKTS_LEN_MASK);
+
+               printk("%d,%d|", state, rl);
+               ++vector;
+       }
+
+       printk("\n");
+}
+
+void dccp_ackpkts_print(const struct dccp_ackpkts *ap)
+{
+       dccp_ackvector_print(ap->dccpap_buf_ackno,
+                            ap->dccpap_buf + ap->dccpap_buf_head,
+                            ap->dccpap_buf_vector_len);
+}
+#endif
+
+static void dccp_ackpkts_trow_away_ack_record(struct dccp_ackpkts *ap)
+{
+       /*
+        * As we're keeping track of the ack vector size
+        * (dccpap_buf_vector_len) and the sent ack vector size
+        * (dccpap_ack_vector_len) we don't need dccpap_buf_tail at all, but
+        * keep this code here as in the future we'll implement a vector of
+        * ack records, as suggested in draft-ietf-dccp-spec-11.txt
+        * Appendix A. -acme
+        */
+#if 0
+       ap->dccpap_buf_tail = ap->dccpap_ack_ptr + 1;
+       if (ap->dccpap_buf_tail >= ap->dccpap_buf_len)
+               ap->dccpap_buf_tail -= ap->dccpap_buf_len;
+#endif
+       ap->dccpap_buf_vector_len -= ap->dccpap_ack_vector_len;
+}
+
+void dccp_ackpkts_check_rcv_ackno(struct dccp_ackpkts *ap, struct sock *sk,
+                                u64 ackno)
+{
+       /* Check if we actually sent an ACK vector */
+       if (ap->dccpap_ack_seqno == DCCP_MAX_SEQNO + 1)
+               return;
+
+       if (ackno == ap->dccpap_ack_seqno) {
+#ifdef CONFIG_IP_DCCP_DEBUG
+               struct dccp_sock *dp = dccp_sk(sk);
+               const char *debug_prefix = dp->dccps_role == DCCP_ROLE_CLIENT ?
+                                       "CLIENT rx ack: " : "server rx ack: ";
+#endif
+               dccp_pr_debug("%sACK packet 0, len=%d, ack_seqno=%llu, "
+                             "ack_ackno=%llu, ACKED!\n",
+                             debug_prefix, 1,
+                             (unsigned long long) ap->dccpap_ack_seqno,
+                             (unsigned long long) ap->dccpap_ack_ackno);
+               dccp_ackpkts_trow_away_ack_record(ap);
+               ap->dccpap_ack_seqno = DCCP_MAX_SEQNO + 1;
+       }
+}
+
+static void dccp_ackpkts_check_rcv_ackvector(struct dccp_ackpkts *ap,
+                                            struct sock *sk, u64 ackno,
+                                            const unsigned char len,
+                                            const unsigned char *vector)
+{
+       unsigned char i;
+
+       /* Check if we actually sent an ACK vector */
+       if (ap->dccpap_ack_seqno == DCCP_MAX_SEQNO + 1)
+               return;
+       /*
+        * We're in the receiver half connection, so if the received an ACK
+        * vector ackno (e.g. 50) before dccpap_ack_seqno (e.g. 52), we're
+        * not interested.
+        *
+        * Extra explanation with example:
+        * 
+        * if we received an ACK vector with ackno 50, it can only be acking
+        * 50, 49, 48, etc, not 52 (the seqno for the ACK vector we sent).
+        */
+       /* dccp_pr_debug("is %llu < %llu? ", ackno, ap->dccpap_ack_seqno); */
+       if (before48(ackno, ap->dccpap_ack_seqno)) {
+               /* dccp_pr_debug_cat("yes\n"); */
+               return;
+       }
+       /* dccp_pr_debug_cat("no\n"); */
+
+       i = len;
+       while (i--) {
+               const u8 rl = (*vector & DCCP_ACKPKTS_LEN_MASK);
+               u64 ackno_end_rl;
+
+               dccp_set_seqno(&ackno_end_rl, ackno - rl);
+
+               /*
+                * dccp_pr_debug("is %llu <= %llu <= %llu? ", ackno_end_rl,
+                * ap->dccpap_ack_seqno, ackno);
+                */
+               if (between48(ap->dccpap_ack_seqno, ackno_end_rl, ackno)) {
+                       const u8 state = (*vector &
+                                         DCCP_ACKPKTS_STATE_MASK) >> 6;
+                       /* dccp_pr_debug_cat("yes\n"); */
+
+                       if (state != DCCP_ACKPKTS_STATE_NOT_RECEIVED) {
+#ifdef CONFIG_IP_DCCP_DEBUG
+                               struct dccp_sock *dp = dccp_sk(sk);
+                               const char *debug_prefix =
+                                       dp->dccps_role == DCCP_ROLE_CLIENT ?
+                                       "CLIENT rx ack: " : "server rx ack: ";
+#endif
+                               dccp_pr_debug("%sACK vector 0, len=%d, "
+                                             "ack_seqno=%llu, ack_ackno=%llu, "
+                                             "ACKED!\n",
+                                             debug_prefix, len,
+                                             (unsigned long long)
+                                             ap->dccpap_ack_seqno,
+                                             (unsigned long long)
+                                             ap->dccpap_ack_ackno);
+                               dccp_ackpkts_trow_away_ack_record(ap);
+                       }
+                       /*
+                        * If dccpap_ack_seqno was not received, no problem
+                        * we'll send another ACK vector.
+                        */
+                       ap->dccpap_ack_seqno = DCCP_MAX_SEQNO + 1;
+                       break;
+               }
+               /* dccp_pr_debug_cat("no\n"); */
+
+               dccp_set_seqno(&ackno, ackno_end_rl - 1);
+               ++vector;
+       }
+}
diff --git a/net/dccp/output.c b/net/dccp/output.c

new file mode 100644 (file)

index 0000000..28de157
--- /dev/null
+++ b/net/dccp/output.c
@@ -0,0 +1,528 @@
+/*
+ *  net/dccp/output.c
+ * 
+ *  An implementation of the DCCP protocol
+ *  Arnaldo Carvalho de Melo <acme@conectiva.com.br>
+ *
+ *     This program is free software; you can redistribute it and/or
+ *     modify it under the terms of the GNU General Public License
+ *     as published by the Free Software Foundation; either version
+ *     2 of the License, or (at your option) any later version.
+ */
+
+#include <linux/config.h>
+#include <linux/dccp.h>
+#include <linux/skbuff.h>
+
+#include <net/sock.h>
+
+#include "ccid.h"
+#include "dccp.h"
+
+static inline void dccp_event_ack_sent(struct sock *sk)
+{
+       inet_csk_clear_xmit_timer(sk, ICSK_TIME_DACK);
+}
+
+/*
+ * All SKB's seen here are completely headerless. It is our
+ * job to build the DCCP header, and pass the packet down to
+ * IP so it can do the same plus pass the packet off to the
+ * device.
+ */
+int dccp_transmit_skb(struct sock *sk, struct sk_buff *skb)
+{
+       if (likely(skb != NULL)) {
+               const struct inet_sock *inet = inet_sk(sk);
+               struct dccp_sock *dp = dccp_sk(sk);
+               struct dccp_skb_cb *dcb = DCCP_SKB_CB(skb);
+               struct dccp_hdr *dh;
+               /* XXX For now we're using only 48 bits sequence numbers */
+               const int dccp_header_size = sizeof(*dh) +
+                                            sizeof(struct dccp_hdr_ext) +
+                                         dccp_packet_hdr_len(dcb->dccpd_type);
+               int err, set_ack = 1;
+               u64 ackno = dp->dccps_gsr;
+
+               dccp_inc_seqno(&dp->dccps_gss);
+
+               switch (dcb->dccpd_type) {
+               case DCCP_PKT_DATA:
+                       set_ack = 0;
+                       break;
+               case DCCP_PKT_SYNC:
+               case DCCP_PKT_SYNCACK:
+                       ackno = dcb->dccpd_seq;
+                       break;
+               }
+
+               dcb->dccpd_seq = dp->dccps_gss;
+               dccp_insert_options(sk, skb);
+               
+               skb->h.raw = skb_push(skb, dccp_header_size);
+               dh = dccp_hdr(skb);
+               /*
+                * Data packets are not cloned as they are never retransmitted
+                */
+               if (skb_cloned(skb))
+                       skb_set_owner_w(skb, sk);
+
+               /* Build DCCP header and checksum it. */
+               memset(dh, 0, dccp_header_size);
+               dh->dccph_type  = dcb->dccpd_type;
+               dh->dccph_sport = inet->sport;
+               dh->dccph_dport = inet->dport;
+               dh->dccph_doff  = (dccp_header_size + dcb->dccpd_opt_len) / 4;
+               dh->dccph_ccval = dcb->dccpd_ccval;
+               /* XXX For now we're using only 48 bits sequence numbers */
+               dh->dccph_x     = 1;
+
+               dp->dccps_awh = dp->dccps_gss;
+               dccp_hdr_set_seq(dh, dp->dccps_gss);
+               if (set_ack)
+                       dccp_hdr_set_ack(dccp_hdr_ack_bits(skb), ackno);
+
+               switch (dcb->dccpd_type) {
+               case DCCP_PKT_REQUEST:
+                       dccp_hdr_request(skb)->dccph_req_service =
+                                                       dcb->dccpd_service;
+                       break;
+               case DCCP_PKT_RESET:
+                       dccp_hdr_reset(skb)->dccph_reset_code =
+                                                       dcb->dccpd_reset_code;
+                       break;
+               }
+
+               dh->dccph_checksum = dccp_v4_checksum(skb, inet->saddr,
+                                                     inet->daddr);
+
+               if (set_ack)
+                       dccp_event_ack_sent(sk);
+
+               DCCP_INC_STATS(DCCP_MIB_OUTSEGS);
+
+               err = ip_queue_xmit(skb, 0);
+               if (err <= 0)
+                       return err;
+
+               /* NET_XMIT_CN is special. It does not guarantee,
+                * that this packet is lost. It tells that device
+                * is about to start to drop packets or already
+                * drops some packets of the same priority and
+                * invokes us to send less aggressively.
+                */
+               return err == NET_XMIT_CN ? 0 : err;
+       }
+       return -ENOBUFS;
+}
+
+unsigned int dccp_sync_mss(struct sock *sk, u32 pmtu)
+{
+       struct dccp_sock *dp = dccp_sk(sk);
+       int mss_now;
+
+       /*
+        * FIXME: we really should be using the af_specific thing to support
+        *        IPv6.
+        * mss_now = pmtu - tp->af_specific->net_header_len -
+        *           sizeof(struct dccp_hdr) - sizeof(struct dccp_hdr_ext);
+        */
+       mss_now = pmtu - sizeof(struct iphdr) - sizeof(struct dccp_hdr) -
+                 sizeof(struct dccp_hdr_ext);
+
+       /* Now subtract optional transport overhead */
+       mss_now -= dp->dccps_ext_header_len;
+
+       /*
+        * FIXME: this should come from the CCID infrastructure, where, say,
+        * TFRC will say it wants TIMESTAMPS, ELAPSED time, etc, for now lets
+        * put a rough estimate for NDP + TIMESTAMP + TIMESTAMP_ECHO + ELAPSED
+        * TIME + TFRC_OPT_LOSS_EVENT_RATE + TFRC_OPT_RECEIVE_RATE + padding to
+        * make it a multiple of 4
+        */
+
+       mss_now -= ((5 + 6 + 10 + 6 + 6 + 6 + 3) / 4) * 4;
+
+       /* And store cached results */
+       dp->dccps_pmtu_cookie = pmtu;
+       dp->dccps_mss_cache = mss_now;
+
+       return mss_now;
+}
+
+void dccp_write_space(struct sock *sk)
+{
+       read_lock(&sk->sk_callback_lock);
+
+       if (sk->sk_sleep && waitqueue_active(sk->sk_sleep))
+               wake_up_interruptible(sk->sk_sleep);
+       /* Should agree with poll, otherwise some programs break */
+       if (sock_writeable(sk))
+               sk_wake_async(sk, 2, POLL_OUT);
+
+       read_unlock(&sk->sk_callback_lock);
+}
+
+/**
+ * dccp_wait_for_ccid - Wait for ccid to tell us we can send a packet
+ * @sk: socket to wait for
+ * @timeo: for how long
+ */
+static int dccp_wait_for_ccid(struct sock *sk, struct sk_buff *skb,
+                             long *timeo)
+{
+       struct dccp_sock *dp = dccp_sk(sk);
+       DEFINE_WAIT(wait);
+       long delay;
+       int rc;
+
+       while (1) {
+               prepare_to_wait(sk->sk_sleep, &wait, TASK_INTERRUPTIBLE);
+
+               if (sk->sk_err || (sk->sk_shutdown & SEND_SHUTDOWN))
+                       goto do_error;
+               if (!*timeo)
+                       goto do_nonblock;
+               if (signal_pending(current))
+                       goto do_interrupted;
+
+               rc = ccid_hc_tx_send_packet(dp->dccps_hc_tx_ccid, sk, skb,
+                                           skb->len);
+               if (rc <= 0)
+                       break;
+               delay = msecs_to_jiffies(rc);
+               if (delay > *timeo || delay < 0)
+                       goto do_nonblock;
+
+               sk->sk_write_pending++;
+               release_sock(sk);
+               *timeo -= schedule_timeout(delay);
+               lock_sock(sk);
+               sk->sk_write_pending--;
+       }
+out:
+       finish_wait(sk->sk_sleep, &wait);
+       return rc;
+
+do_error:
+       rc = -EPIPE;
+       goto out;
+do_nonblock:
+       rc = -EAGAIN;
+       goto out;
+do_interrupted:
+       rc = sock_intr_errno(*timeo);
+       goto out;
+}
+
+int dccp_write_xmit(struct sock *sk, struct sk_buff *skb, long *timeo)
+{
+       const struct dccp_sock *dp = dccp_sk(sk);
+       int err = ccid_hc_tx_send_packet(dp->dccps_hc_tx_ccid, sk, skb,
+                                        skb->len);
+
+       if (err > 0)
+               err = dccp_wait_for_ccid(sk, skb, timeo);
+
+       if (err == 0) {
+               const struct dccp_ackpkts *ap = dp->dccps_hc_rx_ackpkts;
+               struct dccp_skb_cb *dcb = DCCP_SKB_CB(skb);
+               const int len = skb->len;
+
+               if (sk->sk_state == DCCP_PARTOPEN) {
+                       /* See 8.1.5.  Handshake Completion */
+                       inet_csk_schedule_ack(sk);
+                       inet_csk_reset_xmit_timer(sk, ICSK_TIME_DACK,
+                                                 inet_csk(sk)->icsk_rto,
+                                                 DCCP_RTO_MAX);
+                       dcb->dccpd_type = DCCP_PKT_DATAACK;
+                       /*
+                        * FIXME: we really should have a
+                        * dccps_ack_pending or use icsk.
+                        */
+               } else if (inet_csk_ack_scheduled(sk) ||
+                          dp->dccps_timestamp_echo != 0 ||
+                          (dp->dccps_options.dccpo_send_ack_vector &&
+                           ap->dccpap_buf_ackno != DCCP_MAX_SEQNO + 1 &&
+                           ap->dccpap_ack_seqno == DCCP_MAX_SEQNO + 1))
+                       dcb->dccpd_type = DCCP_PKT_DATAACK;
+               else
+                       dcb->dccpd_type = DCCP_PKT_DATA;
+
+               err = dccp_transmit_skb(sk, skb);
+               ccid_hc_tx_packet_sent(dp->dccps_hc_tx_ccid, sk, 0, len);
+       }
+
+       return err;
+}
+
+int dccp_retransmit_skb(struct sock *sk, struct sk_buff *skb)
+{
+       if (inet_sk_rebuild_header(sk) != 0)
+               return -EHOSTUNREACH; /* Routing failure or similar. */
+
+       return dccp_transmit_skb(sk, (skb_cloned(skb) ?
+                                     pskb_copy(skb, GFP_ATOMIC):
+                                     skb_clone(skb, GFP_ATOMIC)));
+}
+
+struct sk_buff *dccp_make_response(struct sock *sk, struct dst_entry *dst,
+                                  struct request_sock *req)
+{
+       struct dccp_hdr *dh;
+       const int dccp_header_size = sizeof(struct dccp_hdr) +
+                                    sizeof(struct dccp_hdr_ext) +
+                                    sizeof(struct dccp_hdr_response);
+       struct sk_buff *skb = sock_wmalloc(sk, MAX_HEADER + DCCP_MAX_OPT_LEN +
+                                              dccp_header_size, 1,
+                                          GFP_ATOMIC);
+       if (skb == NULL)
+               return NULL;
+
+       /* Reserve space for headers. */
+       skb_reserve(skb, MAX_HEADER + DCCP_MAX_OPT_LEN + dccp_header_size);
+
+       skb->dst = dst_clone(dst);
+       skb->csum = 0;
+
+       DCCP_SKB_CB(skb)->dccpd_type = DCCP_PKT_RESPONSE;
+       DCCP_SKB_CB(skb)->dccpd_seq  = dccp_rsk(req)->dreq_iss;
+       dccp_insert_options(sk, skb);
+
+       skb->h.raw = skb_push(skb, dccp_header_size);
+
+       dh = dccp_hdr(skb);
+       memset(dh, 0, dccp_header_size);
+
+       dh->dccph_sport = inet_sk(sk)->sport;
+       dh->dccph_dport = inet_rsk(req)->rmt_port;
+       dh->dccph_doff  = (dccp_header_size +
+                          DCCP_SKB_CB(skb)->dccpd_opt_len) / 4;
+       dh->dccph_type  = DCCP_PKT_RESPONSE;
+       dh->dccph_x     = 1;
+       dccp_hdr_set_seq(dh, dccp_rsk(req)->dreq_iss);
+       dccp_hdr_set_ack(dccp_hdr_ack_bits(skb), dccp_rsk(req)->dreq_isr);
+
+       dh->dccph_checksum = dccp_v4_checksum(skb, inet_rsk(req)->loc_addr,
+                                             inet_rsk(req)->rmt_addr);
+
+       DCCP_INC_STATS(DCCP_MIB_OUTSEGS);
+       return skb;
+}
+
+struct sk_buff *dccp_make_reset(struct sock *sk, struct dst_entry *dst,
+                               const enum dccp_reset_codes code)
+                                  
+{
+       struct dccp_hdr *dh;
+       struct dccp_sock *dp = dccp_sk(sk);
+       const int dccp_header_size = sizeof(struct dccp_hdr) +
+                                    sizeof(struct dccp_hdr_ext) +
+                                    sizeof(struct dccp_hdr_reset);
+       struct sk_buff *skb = sock_wmalloc(sk, MAX_HEADER + DCCP_MAX_OPT_LEN +
+                                              dccp_header_size, 1,
+                                          GFP_ATOMIC);
+       if (skb == NULL)
+               return NULL;
+
+       /* Reserve space for headers. */
+       skb_reserve(skb, MAX_HEADER + DCCP_MAX_OPT_LEN + dccp_header_size);
+
+       skb->dst = dst_clone(dst);
+       skb->csum = 0;
+
+       dccp_inc_seqno(&dp->dccps_gss);
+
+       DCCP_SKB_CB(skb)->dccpd_reset_code = code;
+       DCCP_SKB_CB(skb)->dccpd_type       = DCCP_PKT_RESET;
+       DCCP_SKB_CB(skb)->dccpd_seq        = dp->dccps_gss;
+       dccp_insert_options(sk, skb);
+
+       skb->h.raw = skb_push(skb, dccp_header_size);
+
+       dh = dccp_hdr(skb);
+       memset(dh, 0, dccp_header_size);
+
+       dh->dccph_sport = inet_sk(sk)->sport;
+       dh->dccph_dport = inet_sk(sk)->dport;
+       dh->dccph_doff  = (dccp_header_size +
+                          DCCP_SKB_CB(skb)->dccpd_opt_len) / 4;
+       dh->dccph_type  = DCCP_PKT_RESET;
+       dh->dccph_x     = 1;
+       dccp_hdr_set_seq(dh, dp->dccps_gss);
+       dccp_hdr_set_ack(dccp_hdr_ack_bits(skb), dp->dccps_gsr);
+
+       dccp_hdr_reset(skb)->dccph_reset_code = code;
+
+       dh->dccph_checksum = dccp_v4_checksum(skb, inet_sk(sk)->saddr,
+                                             inet_sk(sk)->daddr);
+
+       DCCP_INC_STATS(DCCP_MIB_OUTSEGS);
+       return skb;
+}
+
+/*
+ * Do all connect socket setups that can be done AF independent.
+ */
+static inline void dccp_connect_init(struct sock *sk)
+{
+       struct dst_entry *dst = __sk_dst_get(sk);
+       struct inet_connection_sock *icsk = inet_csk(sk);
+
+       sk->sk_err = 0;
+       sock_reset_flag(sk, SOCK_DONE);
+       
+       dccp_sync_mss(sk, dst_mtu(dst));
+
+       /*
+        * FIXME: set dp->{dccps_swh,dccps_swl}, with
+        * something like dccp_inc_seq
+        */
+
+       icsk->icsk_retransmits = 0;
+}
+
+int dccp_connect(struct sock *sk)
+{
+       struct sk_buff *skb;
+       struct inet_connection_sock *icsk = inet_csk(sk);
+
+       dccp_connect_init(sk);
+
+       skb = alloc_skb(MAX_DCCP_HEADER + 15, sk->sk_allocation);
+       if (unlikely(skb == NULL))
+               return -ENOBUFS;
+
+       /* Reserve space for headers. */
+       skb_reserve(skb, MAX_DCCP_HEADER);
+
+       DCCP_SKB_CB(skb)->dccpd_type = DCCP_PKT_REQUEST;
+       /* FIXME: set service to something meaningful, coming
+        * from userspace*/
+       DCCP_SKB_CB(skb)->dccpd_service = 0;
+       skb->csum = 0;
+       skb_set_owner_w(skb, sk);
+
+       BUG_TRAP(sk->sk_send_head == NULL);
+       sk->sk_send_head = skb;
+       dccp_transmit_skb(sk, skb_clone(skb, GFP_KERNEL));
+       DCCP_INC_STATS(DCCP_MIB_ACTIVEOPENS);
+
+       /* Timer for repeating the REQUEST until an answer. */
+       inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS,
+                                 icsk->icsk_rto, DCCP_RTO_MAX);
+       return 0;
+}
+
+void dccp_send_ack(struct sock *sk)
+{
+       /* If we have been reset, we may not send again. */
+       if (sk->sk_state != DCCP_CLOSED) {
+               struct sk_buff *skb = alloc_skb(MAX_DCCP_HEADER, GFP_ATOMIC);
+
+               if (skb == NULL) {
+                       inet_csk_schedule_ack(sk);
+                       inet_csk(sk)->icsk_ack.ato = TCP_ATO_MIN;
+                       inet_csk_reset_xmit_timer(sk, ICSK_TIME_DACK,
+                                                 TCP_DELACK_MAX,
+                                                 DCCP_RTO_MAX);
+                       return;
+               }
+
+               /* Reserve space for headers */
+               skb_reserve(skb, MAX_DCCP_HEADER);
+               skb->csum = 0;
+               DCCP_SKB_CB(skb)->dccpd_type = DCCP_PKT_ACK;
+               skb_set_owner_w(skb, sk);
+               dccp_transmit_skb(sk, skb);
+       }
+}
+
+EXPORT_SYMBOL_GPL(dccp_send_ack);
+
+void dccp_send_delayed_ack(struct sock *sk)
+{
+       struct inet_connection_sock *icsk = inet_csk(sk);
+       /*
+        * FIXME: tune this timer. elapsed time fixes the skew, so no problem
+        * with using 2s, and active senders also piggyback the ACK into a
+        * DATAACK packet, so this is really for quiescent senders.
+        */
+       unsigned long timeout = jiffies + 2 * HZ;
+
+       /* Use new timeout only if there wasn't a older one earlier. */
+       if (icsk->icsk_ack.pending & ICSK_ACK_TIMER) {
+               /* If delack timer was blocked or is about to expire,
+                * send ACK now.
+                *
+                * FIXME: check the "about to expire" part
+                */
+               if (icsk->icsk_ack.blocked) {
+                       dccp_send_ack(sk);
+                       return;
+               }
+
+               if (!time_before(timeout, icsk->icsk_ack.timeout))
+                       timeout = icsk->icsk_ack.timeout;
+       }
+       icsk->icsk_ack.pending |= ICSK_ACK_SCHED | ICSK_ACK_TIMER;
+       icsk->icsk_ack.timeout = timeout;
+       sk_reset_timer(sk, &icsk->icsk_delack_timer, timeout);
+}
+
+void dccp_send_sync(struct sock *sk, const u64 seq,
+                   const enum dccp_pkt_type pkt_type)
+{
+       /*
+        * We are not putting this on the write queue, so
+        * dccp_transmit_skb() will set the ownership to this
+        * sock.
+        */
+       struct sk_buff *skb = alloc_skb(MAX_DCCP_HEADER, GFP_ATOMIC);
+
+       if (skb == NULL)
+               /* FIXME: how to make sure the sync is sent? */
+               return;
+
+       /* Reserve space for headers and prepare control bits. */
+       skb_reserve(skb, MAX_DCCP_HEADER);
+       skb->csum = 0;
+       DCCP_SKB_CB(skb)->dccpd_type = pkt_type;
+       DCCP_SKB_CB(skb)->dccpd_seq = seq;
+
+       skb_set_owner_w(skb, sk);
+       dccp_transmit_skb(sk, skb);
+}
+
+/*
+ * Send a DCCP_PKT_CLOSE/CLOSEREQ. The caller locks the socket for us. This
+ * cannot be allowed to fail queueing a DCCP_PKT_CLOSE/CLOSEREQ frame under
+ * any circumstances.
+ */
+void dccp_send_close(struct sock *sk, const int active)
+{
+       struct dccp_sock *dp = dccp_sk(sk);
+       struct sk_buff *skb;
+       const unsigned int prio = active ? GFP_KERNEL : GFP_ATOMIC;
+
+       skb = alloc_skb(sk->sk_prot->max_header, prio);
+       if (skb == NULL)
+               return;
+
+       /* Reserve space for headers and prepare control bits. */
+       skb_reserve(skb, sk->sk_prot->max_header);
+       skb->csum = 0;
+       DCCP_SKB_CB(skb)->dccpd_type = dp->dccps_role == DCCP_ROLE_CLIENT ?
+                                       DCCP_PKT_CLOSE : DCCP_PKT_CLOSEREQ;
+
+       skb_set_owner_w(skb, sk);
+       if (active) {
+               BUG_TRAP(sk->sk_send_head == NULL);
+               sk->sk_send_head = skb;
+               dccp_transmit_skb(sk, skb_clone(skb, prio));
+       } else
+               dccp_transmit_skb(sk, skb);
+
+       ccid_hc_rx_exit(dp->dccps_hc_rx_ccid, sk);
+       ccid_hc_tx_exit(dp->dccps_hc_tx_ccid, sk);
+}
diff --git a/net/dccp/proto.c b/net/dccp/proto.c

new file mode 100644 (file)

index 0000000..18a0e69
--- /dev/null
+++ b/net/dccp/proto.c
@@ -0,0 +1,826 @@
+/*
+ *  net/dccp/proto.c
+ *
+ *  An implementation of the DCCP protocol
+ *  Arnaldo Carvalho de Melo <acme@conectiva.com.br>
+ *
+ *     This program is free software; you can redistribute it and/or modify it
+ *     under the terms of the GNU General Public License version 2 as
+ *     published by the Free Software Foundation.
+ */
+
+#include <linux/config.h>
+#include <linux/dccp.h>
+#include <linux/module.h>
+#include <linux/types.h>
+#include <linux/sched.h>
+#include <linux/kernel.h>
+#include <linux/skbuff.h>
+#include <linux/netdevice.h>
+#include <linux/in.h>
+#include <linux/if_arp.h>
+#include <linux/init.h>
+#include <linux/random.h>
+#include <net/checksum.h>
+
+#include <net/inet_common.h>
+#include <net/ip.h>
+#include <net/protocol.h>
+#include <net/sock.h>
+#include <net/xfrm.h>
+
+#include <asm/semaphore.h>
+#include <linux/spinlock.h>
+#include <linux/timer.h>
+#include <linux/delay.h>
+#include <linux/poll.h>
+#include <linux/dccp.h>
+
+#include "ccid.h"
+#include "dccp.h"
+
+DEFINE_SNMP_STAT(struct dccp_mib, dccp_statistics) __read_mostly;
+
+atomic_t dccp_orphan_count = ATOMIC_INIT(0);
+
+static struct net_protocol dccp_protocol = {
+       .handler        = dccp_v4_rcv,
+       .err_handler    = dccp_v4_err,
+};
+
+const char *dccp_packet_name(const int type)
+{
+       static const char *dccp_packet_names[] = {
+               [DCCP_PKT_REQUEST]  = "REQUEST",
+               [DCCP_PKT_RESPONSE] = "RESPONSE",
+               [DCCP_PKT_DATA]     = "DATA",
+               [DCCP_PKT_ACK]      = "ACK",
+               [DCCP_PKT_DATAACK]  = "DATAACK",
+               [DCCP_PKT_CLOSEREQ] = "CLOSEREQ",
+               [DCCP_PKT_CLOSE]    = "CLOSE",
+               [DCCP_PKT_RESET]    = "RESET",
+               [DCCP_PKT_SYNC]     = "SYNC",
+               [DCCP_PKT_SYNCACK]  = "SYNCACK",
+       };
+
+       if (type >= DCCP_NR_PKT_TYPES)
+               return "INVALID";
+       else
+               return dccp_packet_names[type];
+}
+
+EXPORT_SYMBOL_GPL(dccp_packet_name);
+
+const char *dccp_state_name(const int state)
+{
+       static char *dccp_state_names[] = {
+       [DCCP_OPEN]       = "OPEN",
+       [DCCP_REQUESTING] = "REQUESTING",
+       [DCCP_PARTOPEN]   = "PARTOPEN",
+       [DCCP_LISTEN]     = "LISTEN",
+       [DCCP_RESPOND]    = "RESPOND",
+       [DCCP_CLOSING]    = "CLOSING",
+       [DCCP_TIME_WAIT]  = "TIME_WAIT",
+       [DCCP_CLOSED]     = "CLOSED",
+       };
+
+       if (state >= DCCP_MAX_STATES)
+               return "INVALID STATE!";
+       else
+               return dccp_state_names[state];
+}
+
+EXPORT_SYMBOL_GPL(dccp_state_name);
+
+static inline int dccp_listen_start(struct sock *sk)
+{
+       dccp_sk(sk)->dccps_role = DCCP_ROLE_LISTEN;
+       return inet_csk_listen_start(sk, TCP_SYNQ_HSIZE);
+}
+
+int dccp_disconnect(struct sock *sk, int flags)
+{
+       struct inet_connection_sock *icsk = inet_csk(sk);
+       struct inet_sock *inet = inet_sk(sk);
+       int err = 0;
+       const int old_state = sk->sk_state;
+
+       if (old_state != DCCP_CLOSED)
+               dccp_set_state(sk, DCCP_CLOSED);
+
+       /* ABORT function of RFC793 */
+       if (old_state == DCCP_LISTEN) {
+               inet_csk_listen_stop(sk);
+       /* FIXME: do the active reset thing */
+       } else if (old_state == DCCP_REQUESTING)
+               sk->sk_err = ECONNRESET;
+
+       dccp_clear_xmit_timers(sk);
+       __skb_queue_purge(&sk->sk_receive_queue);
+       if (sk->sk_send_head != NULL) {
+               __kfree_skb(sk->sk_send_head);
+               sk->sk_send_head = NULL;
+       }
+
+       inet->dport = 0;
+
+       if (!(sk->sk_userlocks & SOCK_BINDADDR_LOCK))
+               inet_reset_saddr(sk);
+
+       sk->sk_shutdown = 0;
+       sock_reset_flag(sk, SOCK_DONE);
+
+       icsk->icsk_backoff = 0;
+       inet_csk_delack_init(sk);
+       __sk_dst_reset(sk);
+
+       BUG_TRAP(!inet->num || icsk->icsk_bind_hash);
+
+       sk->sk_error_report(sk);
+       return err;
+}
+
+/*
+ *     Wait for a DCCP event.
+ *
+ *     Note that we don't need to lock the socket, as the upper poll layers
+ *     take care of normal races (between the test and the event) and we don't
+ *     go look at any of the socket buffers directly.
+ */
+static unsigned int dccp_poll(struct file *file, struct socket *sock,
+                             poll_table *wait)
+{
+       unsigned int mask;
+       struct sock *sk = sock->sk;
+
+       poll_wait(file, sk->sk_sleep, wait);
+       if (sk->sk_state == DCCP_LISTEN)
+               return inet_csk_listen_poll(sk);
+
+       /* Socket is not locked. We are protected from async events
+          by poll logic and correct handling of state changes
+          made by another threads is impossible in any case.
+        */
+
+       mask = 0;
+       if (sk->sk_err)
+               mask = POLLERR;
+
+       if (sk->sk_shutdown == SHUTDOWN_MASK || sk->sk_state == DCCP_CLOSED)
+               mask |= POLLHUP;
+       if (sk->sk_shutdown & RCV_SHUTDOWN)
+               mask |= POLLIN | POLLRDNORM;
+
+       /* Connected? */
+       if ((1 << sk->sk_state) & ~(DCCPF_REQUESTING | DCCPF_RESPOND)) {
+               if (atomic_read(&sk->sk_rmem_alloc) > 0)
+                       mask |= POLLIN | POLLRDNORM;
+
+               if (!(sk->sk_shutdown & SEND_SHUTDOWN)) {
+                       if (sk_stream_wspace(sk) >= sk_stream_min_wspace(sk)) {
+                               mask |= POLLOUT | POLLWRNORM;
+                       } else {  /* send SIGIO later */
+                               set_bit(SOCK_ASYNC_NOSPACE,
+                                       &sk->sk_socket->flags);
+                               set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
+
+                               /* Race breaker. If space is freed after
+                                * wspace test but before the flags are set,
+                                * IO signal will be lost.
+                                */
+                               if (sk_stream_wspace(sk) >= sk_stream_min_wspace(sk))
+                                       mask |= POLLOUT | POLLWRNORM;
+                       }
+               }
+       }
+       return mask;
+}
+
+int dccp_ioctl(struct sock *sk, int cmd, unsigned long arg)
+{
+       dccp_pr_debug("entry\n");
+       return -ENOIOCTLCMD;
+}
+
+int dccp_setsockopt(struct sock *sk, int level, int optname,
+                   char __user *optval, int optlen)
+{
+       struct dccp_sock *dp;
+       int err;
+       int val;
+
+       if (level != SOL_DCCP)
+               return ip_setsockopt(sk, level, optname, optval, optlen);
+
+       if (optlen < sizeof(int))
+               return -EINVAL;
+
+       if (get_user(val, (int __user *)optval))
+               return -EFAULT;
+
+       lock_sock(sk);
+
+       dp = dccp_sk(sk);
+       err = 0;
+
+       switch (optname) {
+       case DCCP_SOCKOPT_PACKET_SIZE:
+               dp->dccps_packet_size = val;
+               break;
+       default:
+               err = -ENOPROTOOPT;
+               break;
+       }
+       
+       release_sock(sk);
+       return err;
+}
+
+int dccp_getsockopt(struct sock *sk, int level, int optname,
+                   char __user *optval, int __user *optlen)
+{
+       struct dccp_sock *dp;
+       int val, len;
+
+       if (level != SOL_DCCP)
+               return ip_getsockopt(sk, level, optname, optval, optlen);
+
+       if (get_user(len, optlen))
+               return -EFAULT;
+
+       len = min_t(unsigned int, len, sizeof(int));
+       if (len < 0)
+               return -EINVAL;
+
+       dp = dccp_sk(sk);
+
+       switch (optname) {
+       case DCCP_SOCKOPT_PACKET_SIZE:
+               val = dp->dccps_packet_size;
+               break;
+       default:
+               return -ENOPROTOOPT;
+       }
+
+       if (put_user(len, optlen) || copy_to_user(optval, &val, len))
+               return -EFAULT;
+
+       return 0;
+}
+
+int dccp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
+                size_t len)
+{
+       const struct dccp_sock *dp = dccp_sk(sk);
+       const int flags = msg->msg_flags;
+       const int noblock = flags & MSG_DONTWAIT;
+       struct sk_buff *skb;
+       int rc, size;
+       long timeo;
+
+       if (len > dp->dccps_mss_cache)
+               return -EMSGSIZE;
+
+       lock_sock(sk);
+       timeo = sock_sndtimeo(sk, noblock);
+
+       /*
+        * We have to use sk_stream_wait_connect here to set sk_write_pending,
+        * so that the trick in dccp_rcv_request_sent_state_process.
+        */
+       /* Wait for a connection to finish. */
+       if ((1 << sk->sk_state) & ~(DCCPF_OPEN | DCCPF_PARTOPEN | DCCPF_CLOSING))
+               if ((rc = sk_stream_wait_connect(sk, &timeo)) != 0)
+                       goto out_release;
+
+       size = sk->sk_prot->max_header + len;
+       release_sock(sk);
+       skb = sock_alloc_send_skb(sk, size, noblock, &rc);
+       lock_sock(sk);
+       if (skb == NULL)
+               goto out_release;
+
+       skb_reserve(skb, sk->sk_prot->max_header);
+       rc = memcpy_fromiovec(skb_put(skb, len), msg->msg_iov, len);
+       if (rc != 0)
+               goto out_discard;
+
+       rc = dccp_write_xmit(sk, skb, &timeo);
+       /*
+        * XXX we don't use sk_write_queue, so just discard the packet.
+        *     Current plan however is to _use_ sk_write_queue with
+        *     an algorith similar to tcp_sendmsg, where the main difference
+        *     is that in DCCP we have to respect packet boundaries, so
+        *     no coalescing of skbs.
+        *
+        *     This bug was _quickly_ found & fixed by just looking at an OSTRA
+        *     generated callgraph 8) -acme
+        */
+       if (rc != 0)
+               goto out_discard;
+out_release:
+       release_sock(sk);
+       return rc ? : len;
+out_discard:
+       kfree_skb(skb);
+       goto out_release;
+}
+
+int dccp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
+                size_t len, int nonblock, int flags, int *addr_len)
+{
+       const struct dccp_hdr *dh;
+       long timeo;
+
+       lock_sock(sk);
+
+       if (sk->sk_state == DCCP_LISTEN) {
+               len = -ENOTCONN;
+               goto out;
+       }
+
+       timeo = sock_rcvtimeo(sk, nonblock);
+
+       do {
+               struct sk_buff *skb = skb_peek(&sk->sk_receive_queue);
+
+               if (skb == NULL)
+                       goto verify_sock_status;
+
+               dh = dccp_hdr(skb);
+
+               if (dh->dccph_type == DCCP_PKT_DATA ||
+                   dh->dccph_type == DCCP_PKT_DATAACK)
+                       goto found_ok_skb;
+
+               if (dh->dccph_type == DCCP_PKT_RESET ||
+                   dh->dccph_type == DCCP_PKT_CLOSE) {
+                       dccp_pr_debug("found fin ok!\n");
+                       len = 0;
+                       goto found_fin_ok;
+               }
+               dccp_pr_debug("packet_type=%s\n",
+                             dccp_packet_name(dh->dccph_type));
+               sk_eat_skb(sk, skb);
+verify_sock_status:
+               if (sock_flag(sk, SOCK_DONE)) {
+                       len = 0;
+                       break;
+               }
+
+               if (sk->sk_err) {
+                       len = sock_error(sk);
+                       break;
+               }
+
+               if (sk->sk_shutdown & RCV_SHUTDOWN) {
+                       len = 0;
+                       break;
+               }
+
+               if (sk->sk_state == DCCP_CLOSED) {
+                       if (!sock_flag(sk, SOCK_DONE)) {
+                               /* This occurs when user tries to read
+                                * from never connected socket.
+                                */
+                               len = -ENOTCONN;
+                               break;
+                       }
+                       len = 0;
+                       break;
+               }
+
+               if (!timeo) {
+                       len = -EAGAIN;
+                       break;
+               }
+
+               if (signal_pending(current)) {
+                       len = sock_intr_errno(timeo);
+                       break;
+               }
+
+               sk_wait_data(sk, &timeo);
+               continue;
+       found_ok_skb:
+               if (len > skb->len)
+                       len = skb->len;
+               else if (len < skb->len)
+                       msg->msg_flags |= MSG_TRUNC;
+
+               if (skb_copy_datagram_iovec(skb, 0, msg->msg_iov, len)) {
+                       /* Exception. Bailout! */
+                       len = -EFAULT;
+                       break;
+               }
+       found_fin_ok:
+               if (!(flags & MSG_PEEK))
+                       sk_eat_skb(sk, skb);
+               break;
+       } while (1);
+out:
+       release_sock(sk);
+       return len;
+}
+
+static int inet_dccp_listen(struct socket *sock, int backlog)
+{
+       struct sock *sk = sock->sk;
+       unsigned char old_state;
+       int err;
+
+       lock_sock(sk);
+
+       err = -EINVAL;
+       if (sock->state != SS_UNCONNECTED || sock->type != SOCK_DCCP)
+               goto out;
+
+       old_state = sk->sk_state;
+       if (!((1 << old_state) & (DCCPF_CLOSED | DCCPF_LISTEN)))
+               goto out;
+
+       /* Really, if the socket is already in listen state
+        * we can only allow the backlog to be adjusted.
+        */
+       if (old_state != DCCP_LISTEN) {
+               /*
+                * FIXME: here it probably should be sk->sk_prot->listen_start
+                * see tcp_listen_start
+                */
+               err = dccp_listen_start(sk);
+               if (err)
+                       goto out;
+       }
+       sk->sk_max_ack_backlog = backlog;
+       err = 0;
+
+out:
+       release_sock(sk);
+       return err;
+}
+
+static const unsigned char dccp_new_state[] = {
+       /* current state:   new state:      action:     */
+       [0]               = DCCP_CLOSED,
+       [DCCP_OPEN]       = DCCP_CLOSING | DCCP_ACTION_FIN,
+       [DCCP_REQUESTING] = DCCP_CLOSED,
+       [DCCP_PARTOPEN]   = DCCP_CLOSING | DCCP_ACTION_FIN,
+       [DCCP_LISTEN]     = DCCP_CLOSED,
+       [DCCP_RESPOND]    = DCCP_CLOSED,
+       [DCCP_CLOSING]    = DCCP_CLOSED,
+       [DCCP_TIME_WAIT]  = DCCP_CLOSED,
+       [DCCP_CLOSED]     = DCCP_CLOSED,
+};
+
+static int dccp_close_state(struct sock *sk)
+{
+       const int next = dccp_new_state[sk->sk_state];
+       const int ns = next & DCCP_STATE_MASK;
+
+       if (ns != sk->sk_state)
+               dccp_set_state(sk, ns);
+
+       return next & DCCP_ACTION_FIN;
+}
+
+void dccp_close(struct sock *sk, long timeout)
+{
+       struct sk_buff *skb;
+
+       lock_sock(sk);
+
+       sk->sk_shutdown = SHUTDOWN_MASK;
+
+       if (sk->sk_state == DCCP_LISTEN) {
+               dccp_set_state(sk, DCCP_CLOSED);
+
+               /* Special case. */
+               inet_csk_listen_stop(sk);
+
+               goto adjudge_to_death;
+       }
+
+       /*
+        * We need to flush the recv. buffs.  We do this only on the
+        * descriptor close, not protocol-sourced closes, because the
+         *reader process may not have drained the data yet!
+        */
+       /* FIXME: check for unread data */
+       while ((skb = __skb_dequeue(&sk->sk_receive_queue)) != NULL) {
+               __kfree_skb(skb);
+       }
+
+       if (sock_flag(sk, SOCK_LINGER) && !sk->sk_lingertime) {
+               /* Check zero linger _after_ checking for unread data. */
+               sk->sk_prot->disconnect(sk, 0);
+       } else if (dccp_close_state(sk)) {
+               dccp_send_close(sk, 1);
+       }
+
+       sk_stream_wait_close(sk, timeout);
+
+adjudge_to_death:
+       /*
+        * It is the last release_sock in its life. It will remove backlog.
+        */
+       release_sock(sk);
+       /*
+        * Now socket is owned by kernel and we acquire BH lock
+        * to finish close. No need to check for user refs.
+        */
+       local_bh_disable();
+       bh_lock_sock(sk);
+       BUG_TRAP(!sock_owned_by_user(sk));
+
+       sock_hold(sk);
+       sock_orphan(sk);
+
+       /*
+        * The last release_sock may have processed the CLOSE or RESET
+        * packet moving sock to CLOSED state, if not we have to fire
+        * the CLOSE/CLOSEREQ retransmission timer, see "8.3. Termination"
+        * in draft-ietf-dccp-spec-11. -acme
+        */
+       if (sk->sk_state == DCCP_CLOSING) {
+               /* FIXME: should start at 2 * RTT */
+               /* Timer for repeating the CLOSE/CLOSEREQ until an answer. */
+               inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS,
+                                         inet_csk(sk)->icsk_rto,
+                                         DCCP_RTO_MAX);
+#if 0
+               /* Yeah, we should use sk->sk_prot->orphan_count, etc */
+               dccp_set_state(sk, DCCP_CLOSED);
+#endif
+       }
+
+       atomic_inc(sk->sk_prot->orphan_count);
+       if (sk->sk_state == DCCP_CLOSED)
+               inet_csk_destroy_sock(sk);
+
+       /* Otherwise, socket is reprieved until protocol close. */
+
+       bh_unlock_sock(sk);
+       local_bh_enable();
+       sock_put(sk);
+}
+
+void dccp_shutdown(struct sock *sk, int how)
+{
+       dccp_pr_debug("entry\n");
+}
+
+static struct proto_ops inet_dccp_ops = {
+       .family         = PF_INET,
+       .owner          = THIS_MODULE,
+       .release        = inet_release,
+       .bind           = inet_bind,
+       .connect        = inet_stream_connect,
+       .socketpair     = sock_no_socketpair,
+       .accept         = inet_accept,
+       .getname        = inet_getname,
+       /* FIXME: work on tcp_poll to rename it to inet_csk_poll */
+       .poll           = dccp_poll,
+       .ioctl          = inet_ioctl,
+       /* FIXME: work on inet_listen to rename it to sock_common_listen */
+       .listen         = inet_dccp_listen,
+       .shutdown       = inet_shutdown,
+       .setsockopt     = sock_common_setsockopt,
+       .getsockopt     = sock_common_getsockopt,
+       .sendmsg        = inet_sendmsg,
+       .recvmsg        = sock_common_recvmsg,
+       .mmap           = sock_no_mmap,
+       .sendpage       = sock_no_sendpage,
+};
+
+extern struct net_proto_family inet_family_ops;
+
+static struct inet_protosw dccp_v4_protosw = {
+       .type           = SOCK_DCCP,
+       .protocol       = IPPROTO_DCCP,
+       .prot           = &dccp_v4_prot,
+       .ops            = &inet_dccp_ops,
+       .capability     = -1,
+       .no_check       = 0,
+       .flags          = 0,
+};
+
+/*
+ * This is the global socket data structure used for responding to
+ * the Out-of-the-blue (OOTB) packets. A control sock will be created
+ * for this socket at the initialization time.
+ */
+struct socket *dccp_ctl_socket;
+
+static char dccp_ctl_socket_err_msg[] __initdata =
+       KERN_ERR "DCCP: Failed to create the control socket.\n";
+
+static int __init dccp_ctl_sock_init(void)
+{
+       int rc = sock_create_kern(PF_INET, SOCK_DCCP, IPPROTO_DCCP,
+                                 &dccp_ctl_socket);
+       if (rc < 0)
+               printk(dccp_ctl_socket_err_msg);
+       else {
+               dccp_ctl_socket->sk->sk_allocation = GFP_ATOMIC;
+               inet_sk(dccp_ctl_socket->sk)->uc_ttl = -1;
+
+               /* Unhash it so that IP input processing does not even
+                * see it, we do not wish this socket to see incoming
+                * packets.
+                */
+               dccp_ctl_socket->sk->sk_prot->unhash(dccp_ctl_socket->sk);
+       }
+
+       return rc;
+}
+
+#ifdef CONFIG_IP_DCCP_UNLOAD_HACK
+void dccp_ctl_sock_exit(void)
+{
+       if (dccp_ctl_socket != NULL) {
+               sock_release(dccp_ctl_socket);
+               dccp_ctl_socket = NULL;
+       }
+}
+
+EXPORT_SYMBOL_GPL(dccp_ctl_sock_exit);
+#endif
+
+static int __init init_dccp_v4_mibs(void)
+{
+       int rc = -ENOMEM;
+
+       dccp_statistics[0] = alloc_percpu(struct dccp_mib);
+       if (dccp_statistics[0] == NULL)
+               goto out;
+
+       dccp_statistics[1] = alloc_percpu(struct dccp_mib);
+       if (dccp_statistics[1] == NULL)
+               goto out_free_one;
+
+       rc = 0;
+out:
+       return rc;
+out_free_one:
+       free_percpu(dccp_statistics[0]);
+       dccp_statistics[0] = NULL;
+       goto out;
+
+}
+
+static int thash_entries;
+module_param(thash_entries, int, 0444);
+MODULE_PARM_DESC(thash_entries, "Number of ehash buckets");
+
+#ifdef CONFIG_IP_DCCP_DEBUG
+int dccp_debug;
+module_param(dccp_debug, int, 0444);
+MODULE_PARM_DESC(dccp_debug, "Enable debug messages");
+#endif
+
+static int __init dccp_init(void)
+{
+       unsigned long goal;
+       int ehash_order, bhash_order, i;
+       int rc = proto_register(&dccp_v4_prot, 1);
+
+       if (rc)
+               goto out;
+
+       dccp_hashinfo.bind_bucket_cachep =
+               kmem_cache_create("dccp_bind_bucket",
+                                 sizeof(struct inet_bind_bucket), 0,
+                                 SLAB_HWCACHE_ALIGN, NULL, NULL);
+       if (!dccp_hashinfo.bind_bucket_cachep)
+               goto out_proto_unregister;
+
+       /*
+        * Size and allocate the main established and bind bucket
+        * hash tables.
+        *
+        * The methodology is similar to that of the buffer cache.
+        */
+       if (num_physpages >= (128 * 1024))
+               goal = num_physpages >> (21 - PAGE_SHIFT);
+       else
+               goal = num_physpages >> (23 - PAGE_SHIFT);
+
+       if (thash_entries)
+               goal = (thash_entries *
+                       sizeof(struct inet_ehash_bucket)) >> PAGE_SHIFT;
+       for (ehash_order = 0; (1UL << ehash_order) < goal; ehash_order++)
+               ;
+       do {
+               dccp_hashinfo.ehash_size = (1UL << ehash_order) * PAGE_SIZE /
+                                       sizeof(struct inet_ehash_bucket);
+               dccp_hashinfo.ehash_size >>= 1;
+               while (dccp_hashinfo.ehash_size &
+                      (dccp_hashinfo.ehash_size - 1))
+                       dccp_hashinfo.ehash_size--;
+               dccp_hashinfo.ehash = (struct inet_ehash_bucket *)
+                       __get_free_pages(GFP_ATOMIC, ehash_order);
+       } while (!dccp_hashinfo.ehash && --ehash_order > 0);
+
+       if (!dccp_hashinfo.ehash) {
+               printk(KERN_CRIT "Failed to allocate DCCP "
+                                "established hash table\n");
+               goto out_free_bind_bucket_cachep;
+       }
+
+       for (i = 0; i < (dccp_hashinfo.ehash_size << 1); i++) {
+               rwlock_init(&dccp_hashinfo.ehash[i].lock);
+               INIT_HLIST_HEAD(&dccp_hashinfo.ehash[i].chain);
+       }
+
+       bhash_order = ehash_order;
+
+       do {
+               dccp_hashinfo.bhash_size = (1UL << bhash_order) * PAGE_SIZE /
+                                       sizeof(struct inet_bind_hashbucket);
+               if ((dccp_hashinfo.bhash_size > (64 * 1024)) &&
+                   bhash_order > 0)
+                       continue;
+               dccp_hashinfo.bhash = (struct inet_bind_hashbucket *)
+                       __get_free_pages(GFP_ATOMIC, bhash_order);
+       } while (!dccp_hashinfo.bhash && --bhash_order >= 0);
+
+       if (!dccp_hashinfo.bhash) {
+               printk(KERN_CRIT "Failed to allocate DCCP bind hash table\n");
+               goto out_free_dccp_ehash;
+       }
+
+       for (i = 0; i < dccp_hashinfo.bhash_size; i++) {
+               spin_lock_init(&dccp_hashinfo.bhash[i].lock);
+               INIT_HLIST_HEAD(&dccp_hashinfo.bhash[i].chain);
+       }
+
+       if (init_dccp_v4_mibs())
+               goto out_free_dccp_bhash;
+
+       rc = -EAGAIN;
+       if (inet_add_protocol(&dccp_protocol, IPPROTO_DCCP))
+               goto out_free_dccp_v4_mibs;
+
+       inet_register_protosw(&dccp_v4_protosw);
+
+       rc = dccp_ctl_sock_init();
+       if (rc)
+               goto out_unregister_protosw;
+out:
+       return rc;
+out_unregister_protosw:
+       inet_unregister_protosw(&dccp_v4_protosw);
+       inet_del_protocol(&dccp_protocol, IPPROTO_DCCP);
+out_free_dccp_v4_mibs:
+       free_percpu(dccp_statistics[0]);
+       free_percpu(dccp_statistics[1]);
+       dccp_statistics[0] = dccp_statistics[1] = NULL;
+out_free_dccp_bhash:
+       free_pages((unsigned long)dccp_hashinfo.bhash, bhash_order);
+       dccp_hashinfo.bhash = NULL;
+out_free_dccp_ehash:
+       free_pages((unsigned long)dccp_hashinfo.ehash, ehash_order);
+       dccp_hashinfo.ehash = NULL;
+out_free_bind_bucket_cachep:
+       kmem_cache_destroy(dccp_hashinfo.bind_bucket_cachep);
+       dccp_hashinfo.bind_bucket_cachep = NULL;
+out_proto_unregister:
+       proto_unregister(&dccp_v4_prot);
+       goto out;
+}
+
+static const char dccp_del_proto_err_msg[] __exitdata =
+       KERN_ERR "can't remove dccp net_protocol\n";
+
+static void __exit dccp_fini(void)
+{
+       inet_unregister_protosw(&dccp_v4_protosw);
+
+       if (inet_del_protocol(&dccp_protocol, IPPROTO_DCCP) < 0)
+               printk(dccp_del_proto_err_msg);
+
+       free_percpu(dccp_statistics[0]);
+       free_percpu(dccp_statistics[1]);
+       free_pages((unsigned long)dccp_hashinfo.bhash,
+                  get_order(dccp_hashinfo.bhash_size *
+                            sizeof(struct inet_bind_hashbucket)));
+       free_pages((unsigned long)dccp_hashinfo.ehash,
+                  get_order(dccp_hashinfo.ehash_size *
+                            sizeof(struct inet_ehash_bucket)));
+       kmem_cache_destroy(dccp_hashinfo.bind_bucket_cachep);
+       proto_unregister(&dccp_v4_prot);
+}
+
+module_init(dccp_init);
+module_exit(dccp_fini);
+
+/*
+ * __stringify doesn't likes enums, so use SOCK_DCCP (6) and IPPROTO_DCCP (33)
+ * values directly, Also cover the case where the protocol is not specified,
+ * i.e. net-pf-PF_INET-proto-0-type-SOCK_DCCP
+ */
+MODULE_ALIAS("net-pf-" __stringify(PF_INET) "-proto-33-type-6");
+MODULE_ALIAS("net-pf-" __stringify(PF_INET) "-proto-0-type-6");
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Arnaldo Carvalho de Melo <acme@conectiva.com.br>");
+MODULE_DESCRIPTION("DCCP - Datagram Congestion Controlled Protocol");
diff --git a/net/dccp/timer.c b/net/dccp/timer.c

new file mode 100644 (file)

index 0000000..aa34b57
--- /dev/null
+++ b/net/dccp/timer.c
@@ -0,0 +1,255 @@
+/*
+ *  net/dccp/timer.c
+ * 
+ *  An implementation of the DCCP protocol
+ *  Arnaldo Carvalho de Melo <acme@conectiva.com.br>
+ *
+ *     This program is free software; you can redistribute it and/or
+ *     modify it under the terms of the GNU General Public License
+ *     as published by the Free Software Foundation; either version
+ *     2 of the License, or (at your option) any later version.
+ */
+
+#include <linux/config.h>
+#include <linux/dccp.h>
+#include <linux/skbuff.h>
+
+#include "dccp.h"
+
+static void dccp_write_timer(unsigned long data);
+static void dccp_keepalive_timer(unsigned long data);
+static void dccp_delack_timer(unsigned long data);
+
+void dccp_init_xmit_timers(struct sock *sk)
+{
+       inet_csk_init_xmit_timers(sk, &dccp_write_timer, &dccp_delack_timer,
+                                 &dccp_keepalive_timer);
+}
+
+static void dccp_write_err(struct sock *sk)
+{
+       sk->sk_err = sk->sk_err_soft ? : ETIMEDOUT;
+       sk->sk_error_report(sk);
+
+       dccp_v4_send_reset(sk, DCCP_RESET_CODE_ABORTED);
+       dccp_done(sk);
+       DCCP_INC_STATS_BH(DCCP_MIB_ABORTONTIMEOUT);
+}
+
+/* A write timeout has occurred. Process the after effects. */
+static int dccp_write_timeout(struct sock *sk)
+{
+       const struct inet_connection_sock *icsk = inet_csk(sk);
+       int retry_until;
+
+       if (sk->sk_state == DCCP_REQUESTING || sk->sk_state == DCCP_PARTOPEN) {
+               if (icsk->icsk_retransmits != 0)
+                       dst_negative_advice(&sk->sk_dst_cache);
+               retry_until = icsk->icsk_syn_retries ? :
+                           /* FIXME! */ 3 /* FIXME! sysctl_tcp_syn_retries */;
+       } else {
+               if (icsk->icsk_retransmits >=
+                    /* FIXME! sysctl_tcp_retries1 */ 5 /* FIXME! */) {
+                       /* NOTE. draft-ietf-tcpimpl-pmtud-01.txt requires pmtu
+                          black hole detection. :-(
+
+                          It is place to make it. It is not made. I do not want
+                          to make it. It is disguisting. It does not work in any
+                          case. Let me to cite the same draft, which requires for
+                          us to implement this:
+
+   "The one security concern raised by this memo is that ICMP black holes
+   are often caused by over-zealous security administrators who block
+   all ICMP messages.  It is vitally important that those who design and
+   deploy security systems understand the impact of strict filtering on
+   upper-layer protocols.  The safest web site in the world is worthless
+   if most TCP implementations cannot transfer data from it.  It would
+   be far nicer to have all of the black holes fixed rather than fixing
+   all of the TCP implementations."
+
+                           Golden words :-).
+                  */
+
+                       dst_negative_advice(&sk->sk_dst_cache);
+               }
+
+               retry_until = /* FIXME! */ 15 /* FIXME! sysctl_tcp_retries2 */;
+               /*
+                * FIXME: see tcp_write_timout and tcp_out_of_resources
+                */
+       }
+
+       if (icsk->icsk_retransmits >= retry_until) {
+               /* Has it gone just too far? */
+               dccp_write_err(sk);
+               return 1;
+       }
+       return 0;
+}
+
+/* This is the same as tcp_delack_timer, sans prequeue & mem_reclaim stuff */
+static void dccp_delack_timer(unsigned long data)
+{
+       struct sock *sk = (struct sock *)data;
+       struct inet_connection_sock *icsk = inet_csk(sk);
+
+       bh_lock_sock(sk);
+       if (sock_owned_by_user(sk)) {
+               /* Try again later. */
+               icsk->icsk_ack.blocked = 1;
+               NET_INC_STATS_BH(LINUX_MIB_DELAYEDACKLOCKED);
+               sk_reset_timer(sk, &icsk->icsk_delack_timer,
+                              jiffies + TCP_DELACK_MIN);
+               goto out;
+       }
+
+       if (sk->sk_state == DCCP_CLOSED ||
+           !(icsk->icsk_ack.pending & ICSK_ACK_TIMER))
+               goto out;
+       if (time_after(icsk->icsk_ack.timeout, jiffies)) {
+               sk_reset_timer(sk, &icsk->icsk_delack_timer,
+                              icsk->icsk_ack.timeout);
+               goto out;
+       }
+
+       icsk->icsk_ack.pending &= ~ICSK_ACK_TIMER;
+
+       if (inet_csk_ack_scheduled(sk)) {
+               if (!icsk->icsk_ack.pingpong) {
+                       /* Delayed ACK missed: inflate ATO. */
+                       icsk->icsk_ack.ato = min(icsk->icsk_ack.ato << 1,
+                                                icsk->icsk_rto);
+               } else {
+                       /* Delayed ACK missed: leave pingpong mode and
+                        * deflate ATO.
+                        */
+                       icsk->icsk_ack.pingpong = 0;
+                       icsk->icsk_ack.ato = TCP_ATO_MIN;
+               }
+               dccp_send_ack(sk);
+               NET_INC_STATS_BH(LINUX_MIB_DELAYEDACKS);
+       }
+out:
+       bh_unlock_sock(sk);
+       sock_put(sk);
+}
+
+/*
+ *     The DCCP retransmit timer.
+ */
+static void dccp_retransmit_timer(struct sock *sk)
+{
+       struct inet_connection_sock *icsk = inet_csk(sk);
+
+       /*
+        * sk->sk_send_head has to have one skb with
+        * DCCP_SKB_CB(skb)->dccpd_type set to one of the retransmittable DCCP
+        * packet types (REQUEST, RESPONSE, the ACK in the 3way handshake
+        * (PARTOPEN timer), etc).
+        */
+       BUG_TRAP(sk->sk_send_head != NULL);
+
+       /* 
+        * More than than 4MSL (8 minutes) has passed, a RESET(aborted) was
+        * sent, no need to retransmit, this sock is dead.
+        */
+       if (dccp_write_timeout(sk))
+               goto out;
+
+       /*
+        * We want to know the number of packets retransmitted, not the
+        * total number of retransmissions of clones of original packets.
+        */
+       if (icsk->icsk_retransmits == 0)
+               DCCP_INC_STATS_BH(DCCP_MIB_TIMEOUTS);
+
+       if (dccp_retransmit_skb(sk, sk->sk_send_head) < 0) {
+               /*
+                * Retransmission failed because of local congestion,
+                * do not backoff.
+                */
+               if (icsk->icsk_retransmits == 0)
+                       icsk->icsk_retransmits = 1;
+               inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS,
+                                         min(icsk->icsk_rto,
+                                             TCP_RESOURCE_PROBE_INTERVAL),
+                                         DCCP_RTO_MAX);
+               goto out;
+       }
+
+       icsk->icsk_backoff++;
+       icsk->icsk_retransmits++;
+
+       icsk->icsk_rto = min(icsk->icsk_rto << 1, DCCP_RTO_MAX);
+       inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS, icsk->icsk_rto,
+                                 DCCP_RTO_MAX);
+       if (icsk->icsk_retransmits > 3 /* FIXME: sysctl_dccp_retries1 */)
+               __sk_dst_reset(sk);
+out:;
+}
+
+static void dccp_write_timer(unsigned long data)
+{
+       struct sock *sk = (struct sock *)data;
+       struct inet_connection_sock *icsk = inet_csk(sk);
+       int event = 0;
+
+       bh_lock_sock(sk);
+       if (sock_owned_by_user(sk)) {
+               /* Try again later */
+               sk_reset_timer(sk, &icsk->icsk_retransmit_timer,
+                              jiffies + (HZ / 20));
+               goto out;
+       }
+
+       if (sk->sk_state == DCCP_CLOSED || !icsk->icsk_pending)
+               goto out;
+
+       if (time_after(icsk->icsk_timeout, jiffies)) {
+               sk_reset_timer(sk, &icsk->icsk_retransmit_timer,
+                              icsk->icsk_timeout);
+               goto out;
+       }
+
+       event = icsk->icsk_pending;
+       icsk->icsk_pending = 0;
+
+       switch (event) {
+       case ICSK_TIME_RETRANS:
+               dccp_retransmit_timer(sk);
+               break;
+       }
+out:
+       bh_unlock_sock(sk);
+       sock_put(sk);
+}
+
+/*
+ *     Timer for listening sockets
+ */
+static void dccp_response_timer(struct sock *sk)
+{
+       inet_csk_reqsk_queue_prune(sk, TCP_SYNQ_INTERVAL, DCCP_TIMEOUT_INIT,
+                                  DCCP_RTO_MAX);
+}
+
+static void dccp_keepalive_timer(unsigned long data)
+{
+       struct sock *sk = (struct sock *)data;
+
+       /* Only process if socket is not in use. */
+       bh_lock_sock(sk);
+       if (sock_owned_by_user(sk)) {
+               /* Try again later. */ 
+               inet_csk_reset_keepalive_timer(sk, HZ / 20);
+               goto out;
+       }
+
+       if (sk->sk_state == DCCP_LISTEN) {
+               dccp_response_timer(sk);
+               goto out;
+       }
+out:
+       bh_unlock_sock(sk);
+       sock_put(sk);
+}
diff --git a/net/decnet/af_decnet.c b/net/decnet/af_decnet.c

index 96a02800cd283648e7d92d24c05e863ac4706353..621680f127af940ce48fb3fc4486601f019eb371 100644 (file)
--- a/net/decnet/af_decnet.c
+++ b/net/decnet/af_decnet.c
@@ -118,7 +118,7 @@ Version 0.0.6    2.1.110   07-aug-98   Eduardo Marcelo Serrat
  #include <linux/netfilter.h>
  #include <linux/seq_file.h>
  #include <net/sock.h>
-#include <net/tcp.h>
+#include <net/tcp_states.h>
  #include <net/flow.h>
  #include <asm/system.h>
  #include <asm/ioctls.h>
@@ -1763,7 +1763,7 @@ static int dn_recvmsg(struct kiocb *iocb, struct socket *sock,
                 nskb = skb->next;
  
                 if (skb->len == 0) {
-                       skb_unlink(skb);
+                       skb_unlink(skb, queue);
                         kfree_skb(skb);
                         /* 
                          * N.B. Don't refer to skb or cb after this point
@@ -1876,15 +1876,6 @@ static inline unsigned int dn_current_mss(struct sock *sk, int flags)
         return mss_now;
  }
  
-static int dn_error(struct sock *sk, int flags, int err)
-{
-       if (err == -EPIPE)
-               err = sock_error(sk) ? : -EPIPE;
-       if (err == -EPIPE && !(flags & MSG_NOSIGNAL))
-               send_sig(SIGPIPE, current, 0);
-       return err;
-}
-
  static int dn_sendmsg(struct kiocb *iocb, struct socket *sock,
            struct msghdr *msg, size_t size)
  {
@@ -2045,7 +2036,7 @@ out:
         return sent ? sent : err;
  
  out_err:
-       err = dn_error(sk, flags, err);
+       err = sk_stream_error(sk, flags, err);
         release_sock(sk);
         return err;
  }
@@ -2073,7 +2064,7 @@ static struct notifier_block dn_dev_notifier = {
         .notifier_call = dn_device_event,
  };
  
-extern int dn_route_rcv(struct sk_buff *, struct net_device *, struct packet_type *);
+extern int dn_route_rcv(struct sk_buff *, struct net_device *, struct packet_type *, struct net_device *);
  
  static struct packet_type dn_dix_packet_type = {
         .type =         __constant_htons(ETH_P_DNA_RT),
diff --git a/net/decnet/dn_dev.c b/net/decnet/dn_dev.c

index 00233ecbc9cba98022396af66bafaf1b09fb21b4..5610bb16dbf941db2307076b31f9a8e8914c70bf 100644 (file)
--- a/net/decnet/dn_dev.c
+++ b/net/decnet/dn_dev.c
@@ -752,16 +752,16 @@ static void rtmsg_ifa(int event, struct dn_ifaddr *ifa)
  
         skb = alloc_skb(size, GFP_KERNEL);
         if (!skb) {
-               netlink_set_err(rtnl, 0, RTMGRP_DECnet_IFADDR, ENOBUFS);
+               netlink_set_err(rtnl, 0, RTNLGRP_DECnet_IFADDR, ENOBUFS);
                 return;
         }
         if (dn_dev_fill_ifaddr(skb, ifa, 0, 0, event, 0) < 0) {
                 kfree_skb(skb);
-               netlink_set_err(rtnl, 0, RTMGRP_DECnet_IFADDR, EINVAL);
+               netlink_set_err(rtnl, 0, RTNLGRP_DECnet_IFADDR, EINVAL);
                 return;
         }
-       NETLINK_CB(skb).dst_groups = RTMGRP_DECnet_IFADDR;
-       netlink_broadcast(rtnl, skb, 0, RTMGRP_DECnet_IFADDR, GFP_KERNEL);
+       NETLINK_CB(skb).dst_group = RTNLGRP_DECnet_IFADDR;
+       netlink_broadcast(rtnl, skb, 0, RTNLGRP_DECnet_IFADDR, GFP_KERNEL);
  }
  
  static int dn_dev_dump_ifaddr(struct sk_buff *skb, struct netlink_callback *cb)
diff --git a/net/decnet/dn_neigh.c b/net/decnet/dn_neigh.c

index f32dba9e26fe45942fc1334e08410abbc2722b46..8d0cc3cf3e491a636f258af31262b36f54a1efa4 100644 (file)
--- a/net/decnet/dn_neigh.c
+++ b/net/decnet/dn_neigh.c
@@ -148,12 +148,12 @@ static int dn_neigh_construct(struct neighbour *neigh)
  
         __neigh_parms_put(neigh->parms);
         neigh->parms = neigh_parms_clone(parms);
-       rcu_read_unlock();
  
         if (dn_db->use_long)
                 neigh->ops = &dn_long_ops;
         else
                 neigh->ops = &dn_short_ops;
+       rcu_read_unlock();
  
         if (dn->flags & DN_NDFLAG_P3)
                 neigh->ops = &dn_phase3_ops;
diff --git a/net/decnet/dn_nsp_in.c b/net/decnet/dn_nsp_in.c

index 202dbde9850d4dfee2eee86ed05ff9f39c14048f..369f25b60f3f3a3966ccfb2ba93982cece27dd21 100644 (file)
--- a/net/decnet/dn_nsp_in.c
+++ b/net/decnet/dn_nsp_in.c
@@ -60,7 +60,7 @@
  #include <linux/inet.h>
  #include <linux/route.h>
  #include <net/sock.h>
-#include <net/tcp.h>
+#include <net/tcp_states.h>
  #include <asm/system.h>
  #include <linux/fcntl.h>
  #include <linux/mm.h>
diff --git a/net/decnet/dn_nsp_out.c b/net/decnet/dn_nsp_out.c

index 8cce1fdbda907ac29dc29c2ea530102cc8f36d86..e0bebf4bbcadf60998f8d54f429af41eb4cfe08d 100644 (file)
--- a/net/decnet/dn_nsp_out.c
+++ b/net/decnet/dn_nsp_out.c
@@ -479,7 +479,7 @@ int dn_nsp_check_xmit_queue(struct sock *sk, struct sk_buff *skb, struct sk_buff
                 xmit_count = cb2->xmit_count;
                 segnum = cb2->segnum;
                 /* Remove and drop ack'ed packet */
-               skb_unlink(ack);
+               skb_unlink(ack, q);
                 kfree_skb(ack);
                 ack = NULL;
  
diff --git a/net/decnet/dn_route.c b/net/decnet/dn_route.c

index 2399fa8a3f86e8db94dc753acd5c7240bf1f500f..2c915f305be37ffef87997ae5fbab5a166103c3b 100644 (file)
--- a/net/decnet/dn_route.c
+++ b/net/decnet/dn_route.c
@@ -572,7 +572,7 @@ static int dn_route_ptp_hello(struct sk_buff *skb)
         return NET_RX_SUCCESS;
  }
  
-int dn_route_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt)
+int dn_route_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt, struct net_device *orig_dev)
  {
         struct dn_skb_cb *cb;
         unsigned char flags = 0;
diff --git a/net/decnet/dn_table.c b/net/decnet/dn_table.c

index 28ba5777a25ac58ff21e4c7ba9ecae08c96c251f..eeba56f99323b0cf82131b0fa3f6c9614dc2927e 100644 (file)
--- a/net/decnet/dn_table.c
+++ b/net/decnet/dn_table.c
@@ -79,7 +79,7 @@ for( ; ((f) = *(fp)) != NULL && dn_key_eq((f)->fn_key, (key)); (fp) = &(f)->fn_n
  static DEFINE_RWLOCK(dn_fib_tables_lock);
  struct dn_fib_table *dn_fib_tables[RT_TABLE_MAX + 1];
  
-static kmem_cache_t *dn_hash_kmem;
+static kmem_cache_t *dn_hash_kmem __read_mostly;
  static int dn_fib_hash_zombies;
  
  static inline dn_fib_idx_t dn_hash(dn_fib_key_t key, struct dn_zone *dz)
@@ -349,10 +349,10 @@ static void dn_rtmsg_fib(int event, struct dn_fib_node *f, int z, int tb_id,
                  kfree_skb(skb);
                  return;
          }
-        NETLINK_CB(skb).dst_groups = RTMGRP_DECnet_ROUTE;
+        NETLINK_CB(skb).dst_group = RTNLGRP_DECnet_ROUTE;
          if (nlh->nlmsg_flags & NLM_F_ECHO)
                  atomic_inc(&skb->users);
-        netlink_broadcast(rtnl, skb, pid, RTMGRP_DECnet_ROUTE, GFP_KERNEL);
+        netlink_broadcast(rtnl, skb, pid, RTNLGRP_DECnet_ROUTE, GFP_KERNEL);
          if (nlh->nlmsg_flags & NLM_F_ECHO)
                  netlink_unicast(rtnl, skb, pid, MSG_DONTWAIT);
  }
diff --git a/net/decnet/netfilter/dn_rtmsg.c b/net/decnet/netfilter/dn_rtmsg.c

index 284a9998e53d756f02fe84a3cfd16ef2276cf617..1ab94c6e22ed504bba14c81c42470371870bc99b 100644 (file)
--- a/net/decnet/netfilter/dn_rtmsg.c
+++ b/net/decnet/netfilter/dn_rtmsg.c
@@ -19,6 +19,7 @@
  #include <linux/netfilter.h>
  #include <linux/spinlock.h>
  #include <linux/netlink.h>
+#include <linux/netfilter_decnet.h>
  
  #include <net/sock.h>
  #include <net/flow.h>
@@ -71,10 +72,10 @@ static void dnrmg_send_peer(struct sk_buff *skb)
  
         switch(flags & DN_RT_CNTL_MSK) {
                 case DN_RT_PKT_L1RT:
-                       group = DNRMG_L1_GROUP;
+                       group = DNRNG_NLGRP_L1;
                         break;
                 case DN_RT_PKT_L2RT:
-                       group = DNRMG_L2_GROUP;
+                       group = DNRNG_NLGRP_L2;
                         break;
                 default:
                         return;
@@ -83,7 +84,7 @@ static void dnrmg_send_peer(struct sk_buff *skb)
         skb2 = dnrmg_build_message(skb, &status);
         if (skb2 == NULL)
                 return;
-       NETLINK_CB(skb2).dst_groups = group;
+       NETLINK_CB(skb2).dst_group = group;
         netlink_broadcast(dnrmg, skb2, 0, group, GFP_ATOMIC);
  }
  
@@ -138,7 +139,8 @@ static int __init init(void)
  {
         int rv = 0;
  
-       dnrmg = netlink_kernel_create(NETLINK_DNRTMSG, dnrmg_receive_user_sk);
+       dnrmg = netlink_kernel_create(NETLINK_DNRTMSG, DNRNG_NLGRP_MAX,
+                                     dnrmg_receive_user_sk, THIS_MODULE);
         if (dnrmg == NULL) {
                 printk(KERN_ERR "dn_rtmsg: Cannot create netlink socket");
                 return -ENOMEM;
@@ -162,6 +164,7 @@ static void __exit fini(void)
  MODULE_DESCRIPTION("DECnet Routing Message Grabulator");
  MODULE_AUTHOR("Steven Whitehouse <steve@chygwyn.com>");
  MODULE_LICENSE("GPL");
+MODULE_ALIAS_NET_PF_PROTO(PF_NETLINK, NETLINK_DNRTMSG);
  
  module_init(init);
  module_exit(fini);
diff --git a/net/econet/af_econet.c b/net/econet/af_econet.c

index de691e119e173fab412e8e90be336f9d3312e25f..4a62093eb343afae44042592e75271c40f0fc2fd 100644 (file)
--- a/net/econet/af_econet.c
+++ b/net/econet/af_econet.c
@@ -159,7 +159,7 @@ static int econet_recvmsg(struct kiocb *iocb, struct socket *sock,
         err = memcpy_toiovec(msg->msg_iov, skb->data, copied);
         if (err)
                 goto out_free;
-       sk->sk_stamp = skb->stamp;
+       skb_get_timestamp(skb, &sk->sk_stamp);
  
         if (msg->msg_name)
                 memcpy(msg->msg_name, skb->cb, msg->msg_namelen);
@@ -869,7 +869,7 @@ static void aun_tx_ack(unsigned long seq, int result)
  
  foundit:
         tx_result(skb->sk, eb->cookie, result);
-       skb_unlink(skb);
+       skb_unlink(skb, &aun_queue);
         spin_unlock_irqrestore(&aun_queue_lock, flags);
         kfree_skb(skb);
  }
@@ -947,7 +947,7 @@ static void ab_cleanup(unsigned long h)
                 {
                         tx_result(skb->sk, eb->cookie, 
                                   ECTYPE_TRANSMIT_NOT_PRESENT);
-                       skb_unlink(skb);
+                       skb_unlink(skb, &aun_queue);
                         kfree_skb(skb);
                 }
                 skb = newskb;
@@ -1009,7 +1009,7 @@ release:
   *     Receive an Econet frame from a device.
   */
  
-static int econet_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt)
+static int econet_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt, struct net_device *orig_dev)
  {
         struct ec_framehdr *hdr;
         struct sock *sk;
diff --git a/net/ethernet/eth.c b/net/ethernet/eth.c

index f6dbfb99b14ddf76db0d26eae2d904fda3ccb5c0..87a052a9a84f9b4025ebf430f0c6197afd3a759c 100644 (file)
--- a/net/ethernet/eth.c
+++ b/net/ethernet/eth.c
@@ -62,8 +62,6 @@
  #include <asm/system.h>
  #include <asm/checksum.h>
  
-extern int __init netdev_boot_setup(char *str);
-
  __setup("ether=", netdev_boot_setup);
  
  /*
@@ -163,7 +161,6 @@ __be16 eth_type_trans(struct sk_buff *skb, struct net_device *dev)
         skb->mac.raw=skb->data;
         skb_pull(skb,ETH_HLEN);
         eth = eth_hdr(skb);
-       skb->input_dev = dev;
         
         if(*eth->h_dest&1)
         {
diff --git a/net/ethernet/sysctl_net_ether.c b/net/ethernet/sysctl_net_ether.c

index b81a6d532342a3bbb610670174918e02935bcaf2..66b39fc342d20e8a8088857d8ee4d06244f0d8e7 100644 (file)
--- a/net/ethernet/sysctl_net_ether.c
+++ b/net/ethernet/sysctl_net_ether.c
@@ -7,6 +7,7 @@
  
  #include <linux/mm.h>
  #include <linux/sysctl.h>
+#include <linux/if_ether.h>
  
  ctl_table ether_table[] = {
         {0}
diff --git a/net/ipv4/Kconfig b/net/ipv4/Kconfig

index 0b3d9f1d806952b35f9603fcd4dc0163bcfdcc53..e55136ae09f40708bbcdd50004abe97822ba0a71 100644 (file)
--- a/net/ipv4/Kconfig
+++ b/net/ipv4/Kconfig
@@ -413,20 +413,19 @@ config INET_TUNNEL
           
           If unsure, say Y.
  
-config IP_TCPDIAG
-       tristate "IP: TCP socket monitoring interface"
+config INET_DIAG
+       tristate "INET: socket monitoring interface"
         default y
         ---help---
-         Support for TCP socket monitoring interface used by native Linux
-         tools such as ss. ss is included in iproute2, currently downloadable
-         at <http://developer.osdl.org/dev/iproute2>. If you want IPv6 support
-         and have selected IPv6 as a module, you need to build this as a
-         module too.
+         Support for INET (TCP, DCCP, etc) socket monitoring interface used by
+         native Linux tools such as ss. ss is included in iproute2, currently
+         downloadable at <http://developer.osdl.org/dev/iproute2>. 
           
           If unsure, say Y.
  
-config IP_TCPDIAG_IPV6
-       def_bool (IP_TCPDIAG=y && IPV6=y) || (IP_TCPDIAG=m && IPV6)
+config INET_TCP_DIAG
+       depends on INET_DIAG
+       def_tristate INET_DIAG
  
  config TCP_CONG_ADVANCED
         bool "TCP: advanced congestion control"
diff --git a/net/ipv4/Makefile b/net/ipv4/Makefile

index 55dc6cca1e7bb865b445430786740d46ba1bfce9..f0435d00db6befe152f8afaf730d6bdf1b975d6b 100644 (file)
--- a/net/ipv4/Makefile
+++ b/net/ipv4/Makefile
@@ -4,11 +4,12 @@
  
  obj-y     := route.o inetpeer.o protocol.o \
              ip_input.o ip_fragment.o ip_forward.o ip_options.o \
-            ip_output.o ip_sockglue.o \
+            ip_output.o ip_sockglue.o inet_hashtables.o \
+            inet_timewait_sock.o inet_connection_sock.o \
              tcp.o tcp_input.o tcp_output.o tcp_timer.o tcp_ipv4.o \
              tcp_minisocks.o tcp_cong.o \
              datagram.o raw.o udp.o arp.o icmp.o devinet.o af_inet.o igmp.o \
-            sysctl_net_ipv4.o fib_frontend.o fib_semantics.o
+            sysctl_net_ipv4.o fib_frontend.o fib_semantics.o netfilter.o
  
  obj-$(CONFIG_IP_FIB_HASH) += fib_hash.o
  obj-$(CONFIG_IP_FIB_TRIE) += fib_trie.o
@@ -29,8 +30,9 @@ obj-$(CONFIG_IP_ROUTE_MULTIPATH_WRANDOM) += multipath_wrandom.o
  obj-$(CONFIG_IP_ROUTE_MULTIPATH_DRR) += multipath_drr.o
  obj-$(CONFIG_NETFILTER)        += netfilter/
  obj-$(CONFIG_IP_VS) += ipvs/
-obj-$(CONFIG_IP_TCPDIAG) += tcp_diag.o 
+obj-$(CONFIG_INET_DIAG) += inet_diag.o 
  obj-$(CONFIG_IP_ROUTE_MULTIPATH_CACHED) += multipath.o
+obj-$(CONFIG_INET_TCP_DIAG) += tcp_diag.o
  obj-$(CONFIG_TCP_CONG_BIC) += tcp_bic.o
  obj-$(CONFIG_TCP_CONG_WESTWOOD) += tcp_westwood.o
  obj-$(CONFIG_TCP_CONG_HSTCP) += tcp_highspeed.o
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c

index 163ae4068b5f3acfb70c1434bfd0e82c94478cb4..bf147f8db399489260d2292409ba5749b1ef93e8 100644 (file)
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -99,6 +99,7 @@
  #include <net/arp.h>
  #include <net/route.h>
  #include <net/ip_fib.h>
+#include <net/inet_connection_sock.h>
  #include <net/tcp.h>
  #include <net/udp.h>
  #include <linux/skbuff.h>
@@ -112,11 +113,7 @@
  #include <linux/mroute.h>
  #endif
  
-DEFINE_SNMP_STAT(struct linux_mib, net_statistics);
-
-#ifdef INET_REFCNT_DEBUG
-atomic_t inet_sock_nr;
-#endif
+DEFINE_SNMP_STAT(struct linux_mib, net_statistics) __read_mostly;
  
  extern void ip_mc_drop_socket(struct sock *sk);
  
@@ -153,11 +150,7 @@ void inet_sock_destruct(struct sock *sk)
         if (inet->opt)
                 kfree(inet->opt);
         dst_release(sk->sk_dst_cache);
-#ifdef INET_REFCNT_DEBUG
-       atomic_dec(&inet_sock_nr);
-       printk(KERN_DEBUG "INET socket %p released, %d are still alive\n",
-              sk, atomic_read(&inet_sock_nr));
-#endif
+       sk_refcnt_debug_dec(sk);
  }
  
  /*
@@ -210,7 +203,7 @@ int inet_listen(struct socket *sock, int backlog)
          * we can only allow the backlog to be adjusted.
          */
         if (old_state != TCP_LISTEN) {
-               err = tcp_listen_start(sk);
+               err = inet_csk_listen_start(sk, TCP_SYNQ_HSIZE);
                 if (err)
                         goto out;
         }
@@ -235,12 +228,14 @@ static int inet_create(struct socket *sock, int protocol)
         struct proto *answer_prot;
         unsigned char answer_flags;
         char answer_no_check;
-       int err;
+       int try_loading_module = 0;
+       int err = -ESOCKTNOSUPPORT;
  
         sock->state = SS_UNCONNECTED;
  
         /* Look for the requested type/protocol pair. */
         answer = NULL;
+lookup_protocol:
         rcu_read_lock();
         list_for_each_rcu(p, &inetsw[sock->type]) {
                 answer = list_entry(p, struct inet_protosw, list);
@@ -261,9 +256,28 @@ static int inet_create(struct socket *sock, int protocol)
                 answer = NULL;
         }
  
-       err = -ESOCKTNOSUPPORT;
-       if (!answer)
-               goto out_rcu_unlock;
+       if (unlikely(answer == NULL)) {
+               if (try_loading_module < 2) {
+                       rcu_read_unlock();
+                       /*
+                        * Be more specific, e.g. net-pf-2-proto-132-type-1
+                        * (net-pf-PF_INET-proto-IPPROTO_SCTP-type-SOCK_STREAM)
+                        */
+                       if (++try_loading_module == 1)
+                               request_module("net-pf-%d-proto-%d-type-%d",
+                                              PF_INET, protocol, sock->type);
+                       /*
+                        * Fall back to generic, e.g. net-pf-2-proto-132
+                        * (net-pf-PF_INET-proto-IPPROTO_SCTP)
+                        */
+                       else
+                               request_module("net-pf-%d-proto-%d",
+                                              PF_INET, protocol);
+                       goto lookup_protocol;
+               } else
+                       goto out_rcu_unlock;
+       }
+
         err = -EPERM;
         if (answer->capability > 0 && !capable(answer->capability))
                 goto out_rcu_unlock;
@@ -317,9 +331,7 @@ static int inet_create(struct socket *sock, int protocol)
         inet->mc_index  = 0;
         inet->mc_list   = NULL;
  
-#ifdef INET_REFCNT_DEBUG
-       atomic_inc(&inet_sock_nr);
-#endif
+       sk_refcnt_debug_inc(sk);
  
         if (inet->num) {
                 /* It assumes that any protocol which allows
@@ -847,10 +859,6 @@ static struct net_proto_family inet_family_ops = {
         .owner  = THIS_MODULE,
  };
  
-
-extern void tcp_init(void);
-extern void tcp_v4_init(struct net_proto_family *);
-
  /* Upon startup we insert all the elements in inetsw_array[] into
   * the linked list inetsw.
   */
@@ -961,6 +969,119 @@ void inet_unregister_protosw(struct inet_protosw *p)
         }
  }
  
+/*
+ *      Shall we try to damage output packets if routing dev changes?
+ */
+
+int sysctl_ip_dynaddr;
+
+static int inet_sk_reselect_saddr(struct sock *sk)
+{
+       struct inet_sock *inet = inet_sk(sk);
+       int err;
+       struct rtable *rt;
+       __u32 old_saddr = inet->saddr;
+       __u32 new_saddr;
+       __u32 daddr = inet->daddr;
+
+       if (inet->opt && inet->opt->srr)
+               daddr = inet->opt->faddr;
+
+       /* Query new route. */
+       err = ip_route_connect(&rt, daddr, 0,
+                              RT_CONN_FLAGS(sk),
+                              sk->sk_bound_dev_if,
+                              sk->sk_protocol,
+                              inet->sport, inet->dport, sk);
+       if (err)
+               return err;
+
+       sk_setup_caps(sk, &rt->u.dst);
+
+       new_saddr = rt->rt_src;
+
+       if (new_saddr == old_saddr)
+               return 0;
+
+       if (sysctl_ip_dynaddr > 1) {
+               printk(KERN_INFO "%s(): shifting inet->"
+                                "saddr from %d.%d.%d.%d to %d.%d.%d.%d\n",
+                      __FUNCTION__,
+                      NIPQUAD(old_saddr),
+                      NIPQUAD(new_saddr));
+       }
+
+       inet->saddr = inet->rcv_saddr = new_saddr;
+
+       /*
+        * XXX The only one ugly spot where we need to
+        * XXX really change the sockets identity after
+        * XXX it has entered the hashes. -DaveM
+        *
+        * Besides that, it does not check for connection
+        * uniqueness. Wait for troubles.
+        */
+       __sk_prot_rehash(sk);
+       return 0;
+}
+
+int inet_sk_rebuild_header(struct sock *sk)
+{
+       struct inet_sock *inet = inet_sk(sk);
+       struct rtable *rt = (struct rtable *)__sk_dst_check(sk, 0);
+       u32 daddr;
+       int err;
+
+       /* Route is OK, nothing to do. */
+       if (rt)
+               return 0;
+
+       /* Reroute. */
+       daddr = inet->daddr;
+       if (inet->opt && inet->opt->srr)
+               daddr = inet->opt->faddr;
+{
+       struct flowi fl = {
+               .oif = sk->sk_bound_dev_if,
+               .nl_u = {
+                       .ip4_u = {
+                               .daddr  = daddr,
+                               .saddr  = inet->saddr,
+                               .tos    = RT_CONN_FLAGS(sk),
+                       },
+               },
+               .proto = sk->sk_protocol,
+               .uli_u = {
+                       .ports = {
+                               .sport = inet->sport,
+                               .dport = inet->dport,
+                       },
+               },
+       };
+                                               
+       err = ip_route_output_flow(&rt, &fl, sk, 0);
+}
+       if (!err)
+               sk_setup_caps(sk, &rt->u.dst);
+       else {
+               /* Routing failed... */
+               sk->sk_route_caps = 0;
+               /*
+                * Other protocols have to map its equivalent state to TCP_SYN_SENT.
+                * DCCP maps its DCCP_REQUESTING state to TCP_SYN_SENT. -acme
+                */
+               if (!sysctl_ip_dynaddr ||
+                   sk->sk_state != TCP_SYN_SENT ||
+                   (sk->sk_userlocks & SOCK_BINDADDR_LOCK) ||
+                   (err = inet_sk_reselect_saddr(sk)) != 0)
+                       sk->sk_err_soft = -err;
+       }
+
+       return err;
+}
+
+EXPORT_SYMBOL(inet_sk_rebuild_header);
+
  #ifdef CONFIG_IP_MULTICAST
  static struct net_protocol igmp_protocol = {
         .handler =      igmp_rcv,
@@ -1007,7 +1128,6 @@ static int __init init_ipv4_mibs(void)
  }
  
  static int ipv4_proc_init(void);
-extern void ipfrag_init(void);
  
  /*
   *     IP protocol layer initialiser
@@ -1128,19 +1248,10 @@ module_init(inet_init);
  /* ------------------------------------------------------------------------ */
  
  #ifdef CONFIG_PROC_FS
-extern int  fib_proc_init(void);
-extern void fib_proc_exit(void);
  #ifdef CONFIG_IP_FIB_TRIE
  extern int  fib_stat_proc_init(void);
  extern void fib_stat_proc_exit(void);
  #endif
-extern int  ip_misc_proc_init(void);
-extern int  raw_proc_init(void);
-extern void raw_proc_exit(void);
-extern int  tcp4_proc_init(void);
-extern void tcp4_proc_exit(void);
-extern int  udp4_proc_init(void);
-extern void udp4_proc_exit(void);
  
  static int __init ipv4_proc_init(void)
  {
@@ -1205,7 +1316,3 @@ EXPORT_SYMBOL(inet_stream_ops);
  EXPORT_SYMBOL(inet_unregister_protosw);
  EXPORT_SYMBOL(net_statistics);
  EXPORT_SYMBOL(sysctl_ip_nonlocal_bind);
-
-#ifdef INET_REFCNT_DEBUG
-EXPORT_SYMBOL(inet_sock_nr);
-#endif
diff --git a/net/ipv4/arp.c b/net/ipv4/arp.c

index a642fd6128533810e96a159723f2c337fedb6dd9..8bf312bdea13c50f48291df5481affb6cfc10f4a 100644 (file)
--- a/net/ipv4/arp.c
+++ b/net/ipv4/arp.c
@@ -700,7 +700,7 @@ void arp_send(int type, int ptype, u32 dest_ip,
  static void parp_redo(struct sk_buff *skb)
  {
         nf_reset(skb);
-       arp_rcv(skb, skb->dev, NULL);
+       arp_rcv(skb, skb->dev, NULL, skb->dev);
  }
  
  /*
@@ -865,7 +865,7 @@ static int arp_process(struct sk_buff *skb)
                                 if (n)
                                         neigh_release(n);
  
-                               if (skb->stamp.tv_sec == LOCALLY_ENQUEUED || 
+                               if (NEIGH_CB(skb)->flags & LOCALLY_ENQUEUED || 
                                     skb->pkt_type == PACKET_HOST ||
                                     in_dev->arp_parms->proxy_delay == 0) {
                                         arp_send(ARPOP_REPLY,ETH_P_ARP,sip,dev,tip,sha,dev->dev_addr,sha);
@@ -927,7 +927,7 @@ out:
   *     Receive an arp request from the device layer.
   */
  
-int arp_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt)
+int arp_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt, struct net_device *orig_dev)
  {
         struct arphdr *arp;
  
@@ -948,6 +948,8 @@ int arp_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt)
         if ((skb = skb_share_check(skb, GFP_ATOMIC)) == NULL)
                 goto out_of_mem;
  
+       memset(NEIGH_CB(skb), 0, sizeof(struct neighbour_cb));
+
         return NF_HOOK(NF_ARP, NF_ARP_IN, skb, dev, NULL, arp_process);
  
  freeskb:
diff --git a/net/ipv4/datagram.c b/net/ipv4/datagram.c

index b1db561f25423c3575a2e03ff56699fe5bba2c0e..c1b42b5257f8455d54c0388ac1ac25e76595b659 100644 (file)
--- a/net/ipv4/datagram.c
+++ b/net/ipv4/datagram.c
@@ -16,9 +16,10 @@
  #include <linux/module.h>
  #include <linux/ip.h>
  #include <linux/in.h>
+#include <net/ip.h>
  #include <net/sock.h>
-#include <net/tcp.h>
  #include <net/route.h>
+#include <net/tcp_states.h>
  
  int ip4_datagram_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
  {
diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c

index d8a10e3dd77d2090d3729ef39a41633bbe8fbda9..ba2895ae81514ab1aea769339876acce7bf6a76c 100644 (file)
--- a/net/ipv4/devinet.c
+++ b/net/ipv4/devinet.c
@@ -1111,13 +1111,12 @@ static void rtmsg_ifa(int event, struct in_ifaddr* ifa)
         struct sk_buff *skb = alloc_skb(size, GFP_KERNEL);
  
         if (!skb)
-               netlink_set_err(rtnl, 0, RTMGRP_IPV4_IFADDR, ENOBUFS);
+               netlink_set_err(rtnl, 0, RTNLGRP_IPV4_IFADDR, ENOBUFS);
         else if (inet_fill_ifaddr(skb, ifa, current->pid, 0, event, 0) < 0) {
                 kfree_skb(skb);
-               netlink_set_err(rtnl, 0, RTMGRP_IPV4_IFADDR, EINVAL);
+               netlink_set_err(rtnl, 0, RTNLGRP_IPV4_IFADDR, EINVAL);
         } else {
-               NETLINK_CB(skb).dst_groups = RTMGRP_IPV4_IFADDR;
-               netlink_broadcast(rtnl, skb, 0, RTMGRP_IPV4_IFADDR, GFP_KERNEL);
+               netlink_broadcast(rtnl, skb, 0, RTNLGRP_IPV4_IFADDR, GFP_KERNEL);
         }
  }
  
diff --git a/net/ipv4/esp4.c b/net/ipv4/esp4.c

index ba57446d5d1f4a1831f926859dca12c40369067e..b31ffc5053d2efd36c5e4e149c644d1e6c9f1235 100644 (file)
--- a/net/ipv4/esp4.c
+++ b/net/ipv4/esp4.c
@@ -331,8 +331,8 @@ static void esp4_err(struct sk_buff *skb, u32 info)
         x = xfrm_state_lookup((xfrm_address_t *)&iph->daddr, esph->spi, IPPROTO_ESP, AF_INET);
         if (!x)
                 return;
-       NETDEBUG(printk(KERN_DEBUG "pmtu discovery on SA ESP/%08x/%08x\n",
-                       ntohl(esph->spi), ntohl(iph->daddr)));
+       NETDEBUG(KERN_DEBUG "pmtu discovery on SA ESP/%08x/%08x\n",
+                ntohl(esph->spi), ntohl(iph->daddr));
         xfrm_state_put(x);
  }
  
@@ -395,10 +395,10 @@ static int esp_init_state(struct xfrm_state *x)
  
                 if (aalg_desc->uinfo.auth.icv_fullbits/8 !=
                     crypto_tfm_alg_digestsize(esp->auth.tfm)) {
-                       NETDEBUG(printk(KERN_INFO "ESP: %s digestsize %u != %hu\n",
-                              x->aalg->alg_name,
-                              crypto_tfm_alg_digestsize(esp->auth.tfm),
-                              aalg_desc->uinfo.auth.icv_fullbits/8));
+                       NETDEBUG(KERN_INFO "ESP: %s digestsize %u != %hu\n",
+                                x->aalg->alg_name,
+                                crypto_tfm_alg_digestsize(esp->auth.tfm),
+                                aalg_desc->uinfo.auth.icv_fullbits/8);
                         goto error;
                 }
  
diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c

index cd8e45ab95807116a18fe1649422766ef636da3c..4e1379f712696e82be169fd4b69dde6456cee855 100644 (file)
--- a/net/ipv4/fib_frontend.c
+++ b/net/ipv4/fib_frontend.c
@@ -558,16 +558,15 @@ static void nl_fib_input(struct sock *sk, int len)
         nl_fib_lookup(frn, tb);
         
         pid = nlh->nlmsg_pid;           /*pid of sending process */
-       NETLINK_CB(skb).groups = 0;     /* not in mcast group */
         NETLINK_CB(skb).pid = 0;         /* from kernel */
         NETLINK_CB(skb).dst_pid = pid;
-       NETLINK_CB(skb).dst_groups = 0;  /* unicast */
+       NETLINK_CB(skb).dst_group = 0;  /* unicast */
         netlink_unicast(sk, skb, pid, MSG_DONTWAIT);
  }    
  
  static void nl_fib_lookup_init(void)
  {
-      netlink_kernel_create(NETLINK_FIB_LOOKUP, nl_fib_input);
+      netlink_kernel_create(NETLINK_FIB_LOOKUP, 0, nl_fib_input, THIS_MODULE);
  }
  
  static void fib_disable_ip(struct net_device *dev, int force)
@@ -662,5 +661,4 @@ void __init ip_fib_init(void)
  }
  
  EXPORT_SYMBOL(inet_addr_type);
-EXPORT_SYMBOL(ip_dev_find);
  EXPORT_SYMBOL(ip_rt_ioctl);
diff --git a/net/ipv4/fib_hash.c b/net/ipv4/fib_hash.c

index b10d6bb5ef3d67f5bc125243fda1a391f57460d2..2a8c9afc3695d88a673e2f35cba900d0aa63c409 100644 (file)
--- a/net/ipv4/fib_hash.c
+++ b/net/ipv4/fib_hash.c
@@ -45,8 +45,8 @@
  
  #include "fib_lookup.h"
  
-static kmem_cache_t *fn_hash_kmem;
-static kmem_cache_t *fn_alias_kmem;
+static kmem_cache_t *fn_hash_kmem __read_mostly;
+static kmem_cache_t *fn_alias_kmem __read_mostly;
  
  struct fib_node {
         struct hlist_node       fn_hash;
diff --git a/net/ipv4/fib_lookup.h b/net/ipv4/fib_lookup.h

index b729d97cfa9344655e1842530aeb55b5609f308e..ef6609ea0eb757e8b3daa8117f38e7837819d197 100644 (file)
--- a/net/ipv4/fib_lookup.h
+++ b/net/ipv4/fib_lookup.h
@@ -7,6 +7,7 @@
  
  struct fib_alias {
         struct list_head        fa_list;
+       struct rcu_head rcu;
         struct fib_info         *fa_info;
         u8                      fa_tos;
         u8                      fa_type;
diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c

index c886b28ba9f500d7fc80019f6a66fbd72077b8a5..d41219e8037c7a9f8a9c69dc1bd720f36d357d0b 100644 (file)
--- a/net/ipv4/fib_semantics.c
+++ b/net/ipv4/fib_semantics.c
@@ -290,10 +290,10 @@ void rtmsg_fib(int event, u32 key, struct fib_alias *fa,
                 kfree_skb(skb);
                 return;
         }
-       NETLINK_CB(skb).dst_groups = RTMGRP_IPV4_ROUTE;
+       NETLINK_CB(skb).dst_group = RTNLGRP_IPV4_ROUTE;
         if (n->nlmsg_flags&NLM_F_ECHO)
                 atomic_inc(&skb->users);
-       netlink_broadcast(rtnl, skb, pid, RTMGRP_IPV4_ROUTE, GFP_KERNEL);
+       netlink_broadcast(rtnl, skb, pid, RTNLGRP_IPV4_ROUTE, GFP_KERNEL);
         if (n->nlmsg_flags&NLM_F_ECHO)
                 netlink_unicast(rtnl, skb, pid, MSG_DONTWAIT);
  }
@@ -593,10 +593,13 @@ static void fib_hash_move(struct hlist_head *new_info_hash,
                           struct hlist_head *new_laddrhash,
                           unsigned int new_size)
  {
+       struct hlist_head *old_info_hash, *old_laddrhash;
         unsigned int old_size = fib_hash_size;
-       unsigned int i;
+       unsigned int i, bytes;
  
         write_lock(&fib_info_lock);
+       old_info_hash = fib_info_hash;
+       old_laddrhash = fib_info_laddrhash;
         fib_hash_size = new_size;
  
         for (i = 0; i < old_size; i++) {
@@ -636,6 +639,10 @@ static void fib_hash_move(struct hlist_head *new_info_hash,
         fib_info_laddrhash = new_laddrhash;
  
         write_unlock(&fib_info_lock);
+
+       bytes = old_size * sizeof(struct hlist_head *);
+       fib_hash_free(old_info_hash, bytes);
+       fib_hash_free(old_laddrhash, bytes);
  }
  
  struct fib_info *
@@ -847,6 +854,7 @@ failure:
         return NULL;
  }
  
+/* Note! fib_semantic_match intentionally uses  RCU list functions. */
  int fib_semantic_match(struct list_head *head, const struct flowi *flp,
                        struct fib_result *res, __u32 zone, __u32 mask, 
                         int prefixlen)
@@ -854,7 +862,7 @@ int fib_semantic_match(struct list_head *head, const struct flowi *flp,
         struct fib_alias *fa;
         int nh_sel = 0;
  
-       list_for_each_entry(fa, head, fa_list) {
+       list_for_each_entry_rcu(fa, head, fa_list) {
                 int err;
  
                 if (fa->fa_tos &&
diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c

index a701405fab0b3413389dc1899e00b90c9eb028d3..b2dea4e5da77cfe2f00bf09cfaa5a0ec0e171ed5 100644 (file)
--- a/net/ipv4/fib_trie.c
+++ b/net/ipv4/fib_trie.c
@@ -43,7 +43,7 @@
   *             2 of the License, or (at your option) any later version.
   */
  
-#define VERSION "0.325"
+#define VERSION "0.402"
  
  #include <linux/config.h>
  #include <asm/uaccess.h>
@@ -62,6 +62,7 @@
  #include <linux/netdevice.h>
  #include <linux/if_arp.h>
  #include <linux/proc_fs.h>
+#include <linux/rcupdate.h>
  #include <linux/skbuff.h>
  #include <linux/netlink.h>
  #include <linux/init.h>
@@ -77,56 +78,55 @@
  #undef CONFIG_IP_FIB_TRIE_STATS
  #define MAX_CHILDS 16384
  
-#define EXTRACT(p, n, str) ((str)<<(p)>>(32-(n)))
  #define KEYLENGTH (8*sizeof(t_key))
  #define MASK_PFX(k, l) (((l)==0)?0:(k >> (KEYLENGTH-l)) << (KEYLENGTH-l))
  #define TKEY_GET_MASK(offset, bits) (((bits)==0)?0:((t_key)(-1) << (KEYLENGTH - bits) >> offset))
  
-static DEFINE_RWLOCK(fib_lock);
-
  typedef unsigned int t_key;
  
  #define T_TNODE 0
  #define T_LEAF  1
  #define NODE_TYPE_MASK 0x1UL
-#define NODE_PARENT(_node) \
-       ((struct tnode *)((_node)->_parent & ~NODE_TYPE_MASK))
-#define NODE_SET_PARENT(_node, _ptr) \
-       ((_node)->_parent = (((unsigned long)(_ptr)) | \
-                     ((_node)->_parent & NODE_TYPE_MASK)))
-#define NODE_INIT_PARENT(_node, _type) \
-       ((_node)->_parent = (_type))
-#define NODE_TYPE(_node) \
-       ((_node)->_parent & NODE_TYPE_MASK)
-
-#define IS_TNODE(n) (!(n->_parent & T_LEAF))
-#define IS_LEAF(n) (n->_parent & T_LEAF)
+#define NODE_PARENT(node) \
+       ((struct tnode *)rcu_dereference(((node)->parent & ~NODE_TYPE_MASK)))
+
+#define NODE_TYPE(node) ((node)->parent & NODE_TYPE_MASK)
+
+#define NODE_SET_PARENT(node, ptr)             \
+       rcu_assign_pointer((node)->parent,      \
+                          ((unsigned long)(ptr)) | NODE_TYPE(node))
+
+#define IS_TNODE(n) (!(n->parent & T_LEAF))
+#define IS_LEAF(n) (n->parent & T_LEAF)
  
  struct node {
-        t_key key;
-       unsigned long _parent;
+       t_key key;
+       unsigned long parent;
  };
  
  struct leaf {
-        t_key key;
-       unsigned long _parent;
+       t_key key;
+       unsigned long parent;
         struct hlist_head list;
+       struct rcu_head rcu;
  };
  
  struct leaf_info {
         struct hlist_node hlist;
+       struct rcu_head rcu;
         int plen;
         struct list_head falh;
  };
  
  struct tnode {
-        t_key key;
-       unsigned long _parent;
-        unsigned short pos:5;        /* 2log(KEYLENGTH) bits needed */
-        unsigned short bits:5;       /* 2log(KEYLENGTH) bits needed */
-        unsigned short full_children;  /* KEYLENGTH bits needed */
-        unsigned short empty_children; /* KEYLENGTH bits needed */
-        struct node *child[0];
+       t_key key;
+       unsigned long parent;
+       unsigned short pos:5;           /* 2log(KEYLENGTH) bits needed */
+       unsigned short bits:5;          /* 2log(KEYLENGTH) bits needed */
+       unsigned short full_children;   /* KEYLENGTH bits needed */
+       unsigned short empty_children;  /* KEYLENGTH bits needed */
+       struct rcu_head rcu;
+       struct node *child[0];
  };
  
  #ifdef CONFIG_IP_FIB_TRIE_STATS
@@ -150,77 +150,45 @@ struct trie_stat {
  };
  
  struct trie {
-        struct node *trie;
+       struct node *trie;
  #ifdef CONFIG_IP_FIB_TRIE_STATS
         struct trie_use_stats stats;
  #endif
-        int size;
+       int size;
         unsigned int revision;
  };
  
-static int trie_debug = 0;
-
-static int tnode_full(struct tnode *tn, struct node *n);
  static void put_child(struct trie *t, struct tnode *tn, int i, struct node *n);
  static void tnode_put_child_reorg(struct tnode *tn, int i, struct node *n, int wasfull);
-static int tnode_child_length(struct tnode *tn);
  static struct node *resize(struct trie *t, struct tnode *tn);
-static struct tnode *inflate(struct trie *t, struct tnode *tn, int *err);
-static struct tnode *halve(struct trie *t, struct tnode *tn, int *err);
+static struct tnode *inflate(struct trie *t, struct tnode *tn);
+static struct tnode *halve(struct trie *t, struct tnode *tn);
  static void tnode_free(struct tnode *tn);
  static void trie_dump_seq(struct seq_file *seq, struct trie *t);
-extern struct fib_alias *fib_find_alias(struct list_head *fah, u8 tos, u32 prio);
-extern int fib_detect_death(struct fib_info *fi, int order,
-                            struct fib_info **last_resort, int *last_idx, int *dflt);
-
-extern void rtmsg_fib(int event, u32 key, struct fib_alias *fa, int z, int tb_id,
-               struct nlmsghdr *n, struct netlink_skb_parms *req);
  
-static kmem_cache_t *fn_alias_kmem;
+static kmem_cache_t *fn_alias_kmem __read_mostly;
  static struct trie *trie_local = NULL, *trie_main = NULL;
  
-static void trie_bug(char *err)
-{
-       printk("Trie Bug: %s\n", err);
-       BUG();
-}
+
+/* rcu_read_lock needs to be hold by caller from readside */
  
  static inline struct node *tnode_get_child(struct tnode *tn, int i)
  {
-        if (i >= 1<<tn->bits)
-                trie_bug("tnode_get_child");
+       BUG_ON(i >= 1 << tn->bits);
  
-        return tn->child[i];
+       return rcu_dereference(tn->child[i]);
  }
  
-static inline int tnode_child_length(struct tnode *tn)
+static inline int tnode_child_length(const struct tnode *tn)
  {
-        return 1<<tn->bits;
+       return 1 << tn->bits;
  }
  
-/*
-  _________________________________________________________________
-  | i | i | i | i | i | i | i | N | N | N | S | S | S | S | S | C |
-  ----------------------------------------------------------------
-    0   1   2   3   4   5   6   7   8   9  10  11  12  13  14  15
-
-  _________________________________________________________________
-  | C | C | C | u | u | u | u | u | u | u | u | u | u | u | u | u |
-  -----------------------------------------------------------------
-   16  17  18  19  20  21  22  23  24  25  26  27  28  29  30  31
-
-  tp->pos = 7
-  tp->bits = 3
-  n->pos = 15
-  n->bits=4
-  KEYLENGTH=32
-*/
-
  static inline t_key tkey_extract_bits(t_key a, int offset, int bits)
  {
-        if (offset < KEYLENGTH)
+       if (offset < KEYLENGTH)
                 return ((t_key)(a << offset)) >> (KEYLENGTH - bits);
-        else
+       else
                 return 0;
  }
  
@@ -233,8 +201,8 @@ static inline int tkey_sub_equals(t_key a, int offset, int bits, t_key b)
  {
         if (bits == 0 || offset >= KEYLENGTH)
                 return 1;
-        bits = bits > KEYLENGTH ? KEYLENGTH : bits;
-        return ((a ^ b) << offset) >> (KEYLENGTH - bits) == 0;
+       bits = bits > KEYLENGTH ? KEYLENGTH : bits;
+       return ((a ^ b) << offset) >> (KEYLENGTH - bits) == 0;
  }
  
  static inline int tkey_mismatch(t_key a, int offset, t_key b)
@@ -249,14 +217,6 @@ static inline int tkey_mismatch(t_key a, int offset, t_key b)
         return i;
  }
  
-/* Candiate for fib_semantics */
-
-static void fn_free_alias(struct fib_alias *fa)
-{
-       fib_release_info(fa->fa_info);
-       kmem_cache_free(fn_alias_kmem, fa);
-}
-
  /*
    To understand this stuff, an understanding of keys and all their bits is 
    necessary. Every node in the trie has a key associated with it, but not 
@@ -295,7 +255,7 @@ static void fn_free_alias(struct fib_alias *fa)
    tp->pos = 7
    tp->bits = 3
    n->pos = 15
-  n->bits=4
+  n->bits = 4
  
    First, let's just ignore the bits that come before the parent tp, that is 
    the bits from 0 to (tp->pos-1). They are *known* but at this point we do 
@@ -320,60 +280,65 @@ static void fn_free_alias(struct fib_alias *fa)
  
  */
  
-static void check_tnode(struct tnode *tn)
+static inline void check_tnode(const struct tnode *tn)
  {
-       if (tn && tn->pos+tn->bits > 32) {
-               printk("TNODE ERROR tn=%p, pos=%d, bits=%d\n", tn, tn->pos, tn->bits);
-       }
+       WARN_ON(tn && tn->pos+tn->bits > 32);
  }
  
  static int halve_threshold = 25;
  static int inflate_threshold = 50;
  
-static struct leaf *leaf_new(void)
+
+static void __alias_free_mem(struct rcu_head *head)
  {
-       struct leaf *l = kmalloc(sizeof(struct leaf),  GFP_KERNEL);
-       if (l) {
-               NODE_INIT_PARENT(l, T_LEAF);
-               INIT_HLIST_HEAD(&l->list);
-       }
-       return l;
+       struct fib_alias *fa = container_of(head, struct fib_alias, rcu);
+       kmem_cache_free(fn_alias_kmem, fa);
  }
  
-static struct leaf_info *leaf_info_new(int plen)
+static inline void alias_free_mem_rcu(struct fib_alias *fa)
  {
-       struct leaf_info *li = kmalloc(sizeof(struct leaf_info),  GFP_KERNEL);
-       if (li) {
-               li->plen = plen;
-               INIT_LIST_HEAD(&li->falh);
-       }
-       return li;
+       call_rcu(&fa->rcu, __alias_free_mem);
+}
+
+static void __leaf_free_rcu(struct rcu_head *head)
+{
+       kfree(container_of(head, struct leaf, rcu));
+}
+
+static inline void free_leaf(struct leaf *leaf)
+{
+       call_rcu(&leaf->rcu, __leaf_free_rcu);
  }
  
-static inline void free_leaf(struct leaf *l)
+static void __leaf_info_free_rcu(struct rcu_head *head)
  {
-       kfree(l);
+       kfree(container_of(head, struct leaf_info, rcu));
  }
  
-static inline void free_leaf_info(struct leaf_info *li)
+static inline void free_leaf_info(struct leaf_info *leaf)
  {
-       kfree(li);
+       call_rcu(&leaf->rcu, __leaf_info_free_rcu);
  }
  
  static struct tnode *tnode_alloc(unsigned int size)
  {
-       if (size <= PAGE_SIZE) {
-               return kmalloc(size, GFP_KERNEL);
-       } else {
-               return (struct tnode *)
-                       __get_free_pages(GFP_KERNEL, get_order(size));
-       }
+       struct page *pages;
+
+       if (size <= PAGE_SIZE)
+               return kcalloc(size, 1, GFP_KERNEL);
+
+       pages = alloc_pages(GFP_KERNEL|__GFP_ZERO, get_order(size));
+       if (!pages)
+               return NULL;
+
+       return page_address(pages);
  }
  
-static void __tnode_free(struct tnode *tn)
+static void __tnode_free_rcu(struct rcu_head *head)
  {
+       struct tnode *tn = container_of(head, struct tnode, rcu);
         unsigned int size = sizeof(struct tnode) +
-                           (1<<tn->bits) * sizeof(struct node *);
+               (1 << tn->bits) * sizeof(struct node *);
  
         if (size <= PAGE_SIZE)
                 kfree(tn);
@@ -381,15 +346,40 @@ static void __tnode_free(struct tnode *tn)
                 free_pages((unsigned long)tn, get_order(size));
  }
  
+static inline void tnode_free(struct tnode *tn)
+{
+       call_rcu(&tn->rcu, __tnode_free_rcu);
+}
+
+static struct leaf *leaf_new(void)
+{
+       struct leaf *l = kmalloc(sizeof(struct leaf),  GFP_KERNEL);
+       if (l) {
+               l->parent = T_LEAF;
+               INIT_HLIST_HEAD(&l->list);
+       }
+       return l;
+}
+
+static struct leaf_info *leaf_info_new(int plen)
+{
+       struct leaf_info *li = kmalloc(sizeof(struct leaf_info),  GFP_KERNEL);
+       if (li) {
+               li->plen = plen;
+               INIT_LIST_HEAD(&li->falh);
+       }
+       return li;
+}
+
  static struct tnode* tnode_new(t_key key, int pos, int bits)
  {
         int nchildren = 1<<bits;
         int sz = sizeof(struct tnode) + nchildren * sizeof(struct node *);
         struct tnode *tn = tnode_alloc(sz);
  
-       if (tn)  {
+       if (tn) {
                 memset(tn, 0, sz);
-               NODE_INIT_PARENT(tn, T_TNODE);
+               tn->parent = T_TNODE;
                 tn->pos = pos;
                 tn->bits = bits;
                 tn->key = key;
@@ -397,38 +387,17 @@ static struct tnode* tnode_new(t_key key, int pos, int bits)
                 tn->empty_children = 1<<bits;
         }
  
-       if (trie_debug > 0)
-               printk("AT %p s=%u %u\n", tn, (unsigned int) sizeof(struct tnode),
-                      (unsigned int) (sizeof(struct node) * 1<<bits));
+       pr_debug("AT %p s=%u %u\n", tn, (unsigned int) sizeof(struct tnode),
+                (unsigned int) (sizeof(struct node) * 1<<bits));
         return tn;
  }
  
-static void tnode_free(struct tnode *tn)
-{
-       if (!tn) {
-               trie_bug("tnode_free\n");
-       }
-       if (IS_LEAF(tn)) {
-               free_leaf((struct leaf *)tn);
-               if (trie_debug > 0 )
-                       printk("FL %p \n", tn);
-       }
-       else if (IS_TNODE(tn)) {
-               __tnode_free(tn);
-               if (trie_debug > 0 )
-                       printk("FT %p \n", tn);
-       }
-       else {
-               trie_bug("tnode_free\n");
-       }
-}
-
  /*
   * Check whether a tnode 'n' is "full", i.e. it is an internal node
   * and no bits are skipped. See discussion in dyntree paper p. 6
   */
  
-static inline int tnode_full(struct tnode *tn, struct node *n)
+static inline int tnode_full(const struct tnode *tn, const struct node *n)
  {
         if (n == NULL || IS_LEAF(n))
                 return 0;
@@ -448,15 +417,11 @@ static inline void put_child(struct trie *t, struct tnode *tn, int i, struct nod
  
  static void tnode_put_child_reorg(struct tnode *tn, int i, struct node *n, int wasfull)
  {
-       struct node *chi;
+       struct node *chi = tn->child[i];
         int isfull;
  
-       if (i >= 1<<tn->bits) {
-               printk("bits=%d, i=%d\n", tn->bits, i);
-               trie_bug("tnode_put_child_reorg bits");
-       }
-       write_lock_bh(&fib_lock);
-       chi = tn->child[i];
+       BUG_ON(i >= 1<<tn->bits);
+
  
         /* update emptyChildren */
         if (n == NULL && chi != NULL)
@@ -465,33 +430,32 @@ static void tnode_put_child_reorg(struct tnode *tn, int i, struct node *n, int w
                 tn->empty_children--;
  
         /* update fullChildren */
-        if (wasfull == -1)
+       if (wasfull == -1)
                 wasfull = tnode_full(tn, chi);
  
         isfull = tnode_full(tn, n);
         if (wasfull && !isfull)
                 tn->full_children--;
-
         else if (!wasfull && isfull)
                 tn->full_children++;
+
         if (n)
                 NODE_SET_PARENT(n, tn);
  
-       tn->child[i] = n;
-       write_unlock_bh(&fib_lock);
+       rcu_assign_pointer(tn->child[i], n);
  }
  
  static struct node *resize(struct trie *t, struct tnode *tn)
  {
         int i;
         int err = 0;
+       struct tnode *old_tn;
  
         if (!tn)
                 return NULL;
  
-       if (trie_debug)
-               printk("In tnode_resize %p inflate_threshold=%d threshold=%d\n",
-                     tn, inflate_threshold, halve_threshold);
+       pr_debug("In tnode_resize %p inflate_threshold=%d threshold=%d\n",
+                tn, inflate_threshold, halve_threshold);
  
         /* No children */
         if (tn->empty_children == tnode_child_length(tn)) {
@@ -501,20 +465,16 @@ static struct node *resize(struct trie *t, struct tnode *tn)
         /* One child */
         if (tn->empty_children == tnode_child_length(tn) - 1)
                 for (i = 0; i < tnode_child_length(tn); i++) {
+                       struct node *n;
  
-                       write_lock_bh(&fib_lock);
-                       if (tn->child[i] != NULL) {
-
-                               /* compress one level */
-                               struct node *n = tn->child[i];
-                               if (n)
-                                       NODE_INIT_PARENT(n, NODE_TYPE(n));
+                       n = tn->child[i];
+                       if (!n)
+                               continue;
  
-                               write_unlock_bh(&fib_lock);
-                               tnode_free(tn);
-                               return n;
-                       }
-                       write_unlock_bh(&fib_lock);
+                       /* compress one level */
+                       NODE_SET_PARENT(n, NULL);
+                       tnode_free(tn);
+                       return n;
                 }
         /*
          * Double as long as the resulting node has a number of
@@ -566,16 +526,16 @@ static struct node *resize(struct trie *t, struct tnode *tn)
          *
          * expand not_to_be_doubled and to_be_doubled, and shorten:
          * 100 * (tnode_child_length(tn) - tn->empty_children +
-        *    tn->full_children ) >= inflate_threshold * new_child_length
+        *    tn->full_children) >= inflate_threshold * new_child_length
          *
          * expand new_child_length:
          * 100 * (tnode_child_length(tn) - tn->empty_children +
-        *    tn->full_children ) >=
+        *    tn->full_children) >=
          *      inflate_threshold * tnode_child_length(tn) * 2
          *
          * shorten again:
          * 50 * (tn->full_children + tnode_child_length(tn) -
-        *    tn->empty_children ) >= inflate_threshold *
+        *    tn->empty_children) >= inflate_threshold *
          *    tnode_child_length(tn)
          *
          */
@@ -587,9 +547,10 @@ static struct node *resize(struct trie *t, struct tnode *tn)
                50 * (tn->full_children + tnode_child_length(tn) - tn->empty_children) >=
                                 inflate_threshold * tnode_child_length(tn))) {
  
-               tn = inflate(t, tn, &err);
-
-               if (err) {
+               old_tn = tn;
+               tn = inflate(t, tn);
+               if (IS_ERR(tn)) {
+                       tn = old_tn;
  #ifdef CONFIG_IP_FIB_TRIE_STATS
                         t->stats.resize_node_skipped++;
  #endif
@@ -609,9 +570,10 @@ static struct node *resize(struct trie *t, struct tnode *tn)
                100 * (tnode_child_length(tn) - tn->empty_children) <
                halve_threshold * tnode_child_length(tn)) {
  
-               tn = halve(t, tn, &err);
-
-               if (err) {
+               old_tn = tn;
+               tn = halve(t, tn);
+               if (IS_ERR(tn)) {
+                       tn = old_tn;
  #ifdef CONFIG_IP_FIB_TRIE_STATS
                         t->stats.resize_node_skipped++;
  #endif
@@ -621,44 +583,37 @@ static struct node *resize(struct trie *t, struct tnode *tn)
  
  
         /* Only one child remains */
-
         if (tn->empty_children == tnode_child_length(tn) - 1)
                 for (i = 0; i < tnode_child_length(tn); i++) {
-               
-                       write_lock_bh(&fib_lock);
-                       if (tn->child[i] != NULL) {
-                               /* compress one level */
-                               struct node *n = tn->child[i];
-
-                               if (n)
-                                       NODE_INIT_PARENT(n, NODE_TYPE(n));
-
-                               write_unlock_bh(&fib_lock);
-                               tnode_free(tn);
-                               return n;
-                       }
-                       write_unlock_bh(&fib_lock);
+                       struct node *n;
+
+                       n = tn->child[i];
+                       if (!n)
+                               continue;
+
+                       /* compress one level */
+
+                       NODE_SET_PARENT(n, NULL);
+                       tnode_free(tn);
+                       return n;
                 }
  
         return (struct node *) tn;
  }
  
-static struct tnode *inflate(struct trie *t, struct tnode *tn, int *err)
+static struct tnode *inflate(struct trie *t, struct tnode *tn)
  {
         struct tnode *inode;
         struct tnode *oldtnode = tn;
         int olen = tnode_child_length(tn);
         int i;
  
-       if (trie_debug)
-               printk("In inflate\n");
+       pr_debug("In inflate\n");
  
         tn = tnode_new(oldtnode->key, oldtnode->pos, oldtnode->bits + 1);
  
-       if (!tn) {
-               *err = -ENOMEM;
-               return oldtnode;
-       }
+       if (!tn)
+               return ERR_PTR(-ENOMEM);
  
         /*
          * Preallocate and store tnodes before the actual work so we
@@ -666,8 +621,8 @@ static struct tnode *inflate(struct trie *t, struct tnode *tn, int *err)
          * fails. In case of failure we return the oldnode and  inflate
          * of tnode is ignored.
          */
-               
-       for(i = 0; i < olen; i++) {
+
+       for (i = 0; i < olen; i++) {
                 struct tnode *inode = (struct tnode *) tnode_get_child(oldtnode, i);
  
                 if (inode &&
@@ -675,46 +630,30 @@ static struct tnode *inflate(struct trie *t, struct tnode *tn, int *err)
                     inode->pos == oldtnode->pos + oldtnode->bits &&
                     inode->bits > 1) {
                         struct tnode *left, *right;
-
                         t_key m = TKEY_GET_MASK(inode->pos, 1);
  
                         left = tnode_new(inode->key&(~m), inode->pos + 1,
                                          inode->bits - 1);
+                       if (!left)
+                               goto nomem;
  
-                       if (!left) {
-                               *err = -ENOMEM;
-                               break;
-                       }
-               
                         right = tnode_new(inode->key|m, inode->pos + 1,
                                           inode->bits - 1);
  
-                       if (!right) {
-                               *err = -ENOMEM;
-                               break;
-                       }
+                        if (!right) {
+                               tnode_free(left);
+                               goto nomem;
+                        }
  
                         put_child(t, tn, 2*i, (struct node *) left);
                         put_child(t, tn, 2*i+1, (struct node *) right);
                 }
         }
  
-       if (*err) {
-               int size = tnode_child_length(tn);
-               int j;
-
-               for(j = 0; j < size; j++)
-                       if (tn->child[j])
-                               tnode_free((struct tnode *)tn->child[j]);
-
-               tnode_free(tn);
-       
-               *err = -ENOMEM;
-               return oldtnode;
-       }
-
-       for(i = 0; i < olen; i++) {
+       for (i = 0; i < olen; i++) {
                 struct node *node = tnode_get_child(oldtnode, i);
+               struct tnode *left, *right;
+               int size, j;
  
                 /* An empty child */
                 if (node == NULL)
@@ -740,76 +679,82 @@ static struct tnode *inflate(struct trie *t, struct tnode *tn, int *err)
                         put_child(t, tn, 2*i+1, inode->child[1]);
  
                         tnode_free(inode);
+                       continue;
                 }
  
-                       /* An internal node with more than two children */
-               else {
-                       struct tnode *left, *right;
-                       int size, j;
-
-                       /* We will replace this node 'inode' with two new
-                        * ones, 'left' and 'right', each with half of the
-                        * original children. The two new nodes will have
-                        * a position one bit further down the key and this
-                        * means that the "significant" part of their keys
-                        * (see the discussion near the top of this file)
-                        * will differ by one bit, which will be "0" in
-                        * left's key and "1" in right's key. Since we are
-                        * moving the key position by one step, the bit that
-                        * we are moving away from - the bit at position
-                        * (inode->pos) - is the one that will differ between
-                        * left and right. So... we synthesize that bit in the
-                        * two  new keys.
-                        * The mask 'm' below will be a single "one" bit at
-                        * the position (inode->pos)
-                        */
-
-                       /* Use the old key, but set the new significant
-                        *   bit to zero.
-                        */
+               /* An internal node with more than two children */
+
+               /* We will replace this node 'inode' with two new
+                * ones, 'left' and 'right', each with half of the
+                * original children. The two new nodes will have
+                * a position one bit further down the key and this
+                * means that the "significant" part of their keys
+                * (see the discussion near the top of this file)
+                * will differ by one bit, which will be "0" in
+                * left's key and "1" in right's key. Since we are
+                * moving the key position by one step, the bit that
+                * we are moving away from - the bit at position
+                * (inode->pos) - is the one that will differ between
+                * left and right. So... we synthesize that bit in the
+                * two  new keys.
+                * The mask 'm' below will be a single "one" bit at
+                * the position (inode->pos)
+                */
  
-                       left = (struct tnode *) tnode_get_child(tn, 2*i);
-                       put_child(t, tn, 2*i, NULL);
+               /* Use the old key, but set the new significant
+                *   bit to zero.
+                */
  
-                       if (!left)
-                               BUG();
+               left = (struct tnode *) tnode_get_child(tn, 2*i);
+               put_child(t, tn, 2*i, NULL);
  
-                       right = (struct tnode *) tnode_get_child(tn, 2*i+1);
-                       put_child(t, tn, 2*i+1, NULL);
+               BUG_ON(!left);
  
-                       if (!right)
-                               BUG();
+               right = (struct tnode *) tnode_get_child(tn, 2*i+1);
+               put_child(t, tn, 2*i+1, NULL);
  
-                       size = tnode_child_length(left);
-                       for(j = 0; j < size; j++) {
-                               put_child(t, left, j, inode->child[j]);
-                               put_child(t, right, j, inode->child[j + size]);
-                       }
-                       put_child(t, tn, 2*i, resize(t, left));
-                       put_child(t, tn, 2*i+1, resize(t, right));
+               BUG_ON(!right);
  
-                       tnode_free(inode);
+               size = tnode_child_length(left);
+               for (j = 0; j < size; j++) {
+                       put_child(t, left, j, inode->child[j]);
+                       put_child(t, right, j, inode->child[j + size]);
                 }
+               put_child(t, tn, 2*i, resize(t, left));
+               put_child(t, tn, 2*i+1, resize(t, right));
+
+               tnode_free(inode);
         }
         tnode_free(oldtnode);
         return tn;
+nomem:
+       {
+               int size = tnode_child_length(tn);
+               int j;
+
+               for (j = 0; j < size; j++)
+                       if (tn->child[j])
+                               tnode_free((struct tnode *)tn->child[j]);
+
+               tnode_free(tn);
+
+               return ERR_PTR(-ENOMEM);
+       }
  }
  
-static struct tnode *halve(struct trie *t, struct tnode *tn, int *err)
+static struct tnode *halve(struct trie *t, struct tnode *tn)
  {
         struct tnode *oldtnode = tn;
         struct node *left, *right;
         int i;
         int olen = tnode_child_length(tn);
  
-       if (trie_debug) printk("In halve\n");
+       pr_debug("In halve\n");
  
         tn = tnode_new(oldtnode->key, oldtnode->pos, oldtnode->bits - 1);
  
-       if (!tn) {
-               *err = -ENOMEM;
-               return oldtnode;
-       }
+       if (!tn)
+               return ERR_PTR(-ENOMEM);
  
         /*
          * Preallocate and store tnodes before the actual work so we
@@ -818,38 +763,27 @@ static struct tnode *halve(struct trie *t, struct tnode *tn, int *err)
          * of tnode is ignored.
          */
  
-       for(i = 0; i < olen; i += 2) {
+       for (i = 0; i < olen; i += 2) {
                 left = tnode_get_child(oldtnode, i);
                 right = tnode_get_child(oldtnode, i+1);
  
                 /* Two nonempty children */
-               if (left && right)  {
-                       struct tnode *newBinNode =
-                               tnode_new(left->key, tn->pos + tn->bits, 1);
+               if (left && right) {
+                       struct tnode *newn;
  
-                       if (!newBinNode) {
-                               *err = -ENOMEM;
-                               break;
-                       }
-                       put_child(t, tn, i/2, (struct node *)newBinNode);
-               }
-       }
+                       newn = tnode_new(left->key, tn->pos + tn->bits, 1);
  
-       if (*err) {
-               int size = tnode_child_length(tn);
-               int j;
+                       if (!newn)
+                               goto nomem;
  
-               for(j = 0; j < size; j++)
-                       if (tn->child[j])
-                               tnode_free((struct tnode *)tn->child[j]);
+                       put_child(t, tn, i/2, (struct node *)newn);
+               }
  
-               tnode_free(tn);
-       
-               *err = -ENOMEM;
-               return oldtnode;
         }
  
-       for(i = 0; i < olen; i += 2) {
+       for (i = 0; i < olen; i += 2) {
+               struct tnode *newBinNode;
+
                 left = tnode_get_child(oldtnode, i);
                 right = tnode_get_child(oldtnode, i+1);
  
@@ -858,88 +792,99 @@ static struct tnode *halve(struct trie *t, struct tnode *tn, int *err)
                         if (right == NULL)    /* Both are empty */
                                 continue;
                         put_child(t, tn, i/2, right);
-               } else if (right == NULL)
+                       continue;
+               }
+
+               if (right == NULL) {
                         put_child(t, tn, i/2, left);
+                       continue;
+               }
  
                 /* Two nonempty children */
-               else {
-                       struct tnode *newBinNode =
-                               (struct tnode *) tnode_get_child(tn, i/2);
-                       put_child(t, tn, i/2, NULL);
-
-                       if (!newBinNode)
-                               BUG();
-
-                       put_child(t, newBinNode, 0, left);
-                       put_child(t, newBinNode, 1, right);
-                       put_child(t, tn, i/2, resize(t, newBinNode));
-               }
+               newBinNode = (struct tnode *) tnode_get_child(tn, i/2);
+               put_child(t, tn, i/2, NULL);
+               put_child(t, newBinNode, 0, left);
+               put_child(t, newBinNode, 1, right);
+               put_child(t, tn, i/2, resize(t, newBinNode));
         }
         tnode_free(oldtnode);
         return tn;
+nomem:
+       {
+               int size = tnode_child_length(tn);
+               int j;
+
+               for (j = 0; j < size; j++)
+                       if (tn->child[j])
+                               tnode_free((struct tnode *)tn->child[j]);
+
+               tnode_free(tn);
+
+               return ERR_PTR(-ENOMEM);
+       }
  }
  
-static void *trie_init(struct trie *t)
+static void trie_init(struct trie *t)
  {
-       if (t) {
-               t->size = 0;
-               t->trie = NULL;
-               t->revision = 0;
+       if (!t)
+               return;
+
+       t->size = 0;
+       rcu_assign_pointer(t->trie, NULL);
+       t->revision = 0;
  #ifdef CONFIG_IP_FIB_TRIE_STATS
-                       memset(&t->stats, 0, sizeof(struct trie_use_stats));
+       memset(&t->stats, 0, sizeof(struct trie_use_stats));
  #endif
-       }
-       return t;
  }
  
+/* readside most use rcu_read_lock currently dump routines
+ via get_fa_head and dump */
+
  static struct leaf_info *find_leaf_info(struct hlist_head *head, int plen)
  {
         struct hlist_node *node;
         struct leaf_info *li;
  
-       hlist_for_each_entry(li, node, head, hlist) {
+       hlist_for_each_entry_rcu(li, node, head, hlist)
                 if (li->plen == plen)
                         return li;
-       }
+
         return NULL;
  }
  
  static inline struct list_head * get_fa_head(struct leaf *l, int plen)
  {
-       struct list_head *fa_head = NULL;
         struct leaf_info *li = find_leaf_info(&l->list, plen);
  
-       if (li)
-               fa_head = &li->falh;
+       if (!li)
+               return NULL;
  
-       return fa_head;
+       return &li->falh;
  }
  
  static void insert_leaf_info(struct hlist_head *head, struct leaf_info *new)
  {
-       struct leaf_info *li = NULL, *last = NULL;
-       struct hlist_node *node, *tmp;
-
-       write_lock_bh(&fib_lock);
-
-       if (hlist_empty(head))
-               hlist_add_head(&new->hlist, head);
-       else {
-               hlist_for_each_entry_safe(li, node, tmp, head, hlist) {
-               
-                       if (new->plen > li->plen)
-                               break;
-               
-                       last = li;
-               }
-               if (last)
-                       hlist_add_after(&last->hlist, &new->hlist);
-               else
-                       hlist_add_before(&new->hlist, &li->hlist);
-       }
-       write_unlock_bh(&fib_lock);
+        struct leaf_info *li = NULL, *last = NULL;
+        struct hlist_node *node;
+
+        if (hlist_empty(head)) {
+                hlist_add_head_rcu(&new->hlist, head);
+        } else {
+                hlist_for_each_entry(li, node, head, hlist) {
+                        if (new->plen > li->plen)
+                                break;
+
+                        last = li;
+                }
+                if (last)
+                        hlist_add_after_rcu(&last->hlist, &new->hlist);
+                else
+                        hlist_add_before_rcu(&new->hlist, &li->hlist);
+        }
  }
  
+/* rcu_read_lock needs to be hold by caller from readside */
+
  static struct leaf *
  fib_find_node(struct trie *t, u32 key)
  {
@@ -948,61 +893,43 @@ fib_find_node(struct trie *t, u32 key)
         struct node *n;
  
         pos = 0;
-       n = t->trie;
+       n = rcu_dereference(t->trie);
  
         while (n != NULL &&  NODE_TYPE(n) == T_TNODE) {
                 tn = (struct tnode *) n;
-               
+
                 check_tnode(tn);
-               
+
                 if (tkey_sub_equals(tn->key, pos, tn->pos-pos, key)) {
-                       pos=tn->pos + tn->bits;
+                       pos = tn->pos + tn->bits;
                         n = tnode_get_child(tn, tkey_extract_bits(key, tn->pos, tn->bits));
-               }
-               else
+               } else
                         break;
         }
         /* Case we have found a leaf. Compare prefixes */
  
-       if (n != NULL && IS_LEAF(n) && tkey_equals(key, n->key)) {
-               struct leaf *l = (struct leaf *) n;
-               return l;
-       }
+       if (n != NULL && IS_LEAF(n) && tkey_equals(key, n->key))
+               return (struct leaf *)n;
+
         return NULL;
  }
  
  static struct node *trie_rebalance(struct trie *t, struct tnode *tn)
  {
-       int i = 0;
         int wasfull;
         t_key cindex, key;
         struct tnode *tp = NULL;
  
-       if (!tn)
-               BUG();
-
         key = tn->key;
-       i = 0;
  
         while (tn != NULL && NODE_PARENT(tn) != NULL) {
  
-               if (i > 10) {
-                       printk("Rebalance tn=%p \n", tn);
-                       if (tn)                 printk("tn->parent=%p \n", NODE_PARENT(tn));
-               
-                       printk("Rebalance tp=%p \n", tp);
-                       if (tp)                 printk("tp->parent=%p \n", NODE_PARENT(tp));
-               }
-
-               if (i > 12) BUG();
-               i++;
-
                 tp = NODE_PARENT(tn);
                 cindex = tkey_extract_bits(key, tp->pos, tp->bits);
                 wasfull = tnode_full(tp, tnode_get_child(tp, cindex));
                 tn = (struct tnode *) resize (t, (struct tnode *)tn);
                 tnode_put_child_reorg((struct tnode *)tp, cindex,(struct node*)tn, wasfull);
-       
+
                 if (!NODE_PARENT(tn))
                         break;
  
@@ -1015,6 +942,8 @@ static struct node *trie_rebalance(struct trie *t, struct tnode *tn)
         return (struct node*) tn;
  }
  
+/* only used from updater-side */
+
  static  struct list_head *
  fib_insert_node(struct trie *t, int *err, u32 key, int plen)
  {
@@ -1050,20 +979,16 @@ fib_insert_node(struct trie *t, int *err, u32 key, int plen)
  
         while (n != NULL &&  NODE_TYPE(n) == T_TNODE) {
                 tn = (struct tnode *) n;
-               
+
                 check_tnode(tn);
-       
+
                 if (tkey_sub_equals(tn->key, pos, tn->pos-pos, key)) {
                         tp = tn;
-                       pos=tn->pos + tn->bits;
+                       pos = tn->pos + tn->bits;
                         n = tnode_get_child(tn, tkey_extract_bits(key, tn->pos, tn->bits));
  
-                       if (n && NODE_PARENT(n) != tn) {
-                               printk("BUG tn=%p, n->parent=%p\n", tn, NODE_PARENT(n));
-                               BUG();
-                       }
-               }
-               else
+                       BUG_ON(n && NODE_PARENT(n) != tn);
+               } else
                         break;
         }
  
@@ -1073,17 +998,15 @@ fib_insert_node(struct trie *t, int *err, u32 key, int plen)
          * tp is n's (parent) ----> NULL or TNODE
          */
  
-       if (tp && IS_LEAF(tp))
-               BUG();
-
+       BUG_ON(tp && IS_LEAF(tp));
  
         /* Case 1: n is a leaf. Compare prefixes */
  
         if (n != NULL && IS_LEAF(n) && tkey_equals(key, n->key)) {
-               struct leaf *l = ( struct leaf *)  n;
-       
+               struct leaf *l = (struct leaf *) n;
+
                 li = leaf_info_new(plen);
-       
+
                 if (!li) {
                         *err = -ENOMEM;
                         goto err;
@@ -1113,35 +1036,29 @@ fib_insert_node(struct trie *t, int *err, u32 key, int plen)
         fa_head = &li->falh;
         insert_leaf_info(&l->list, li);
  
-       /* Case 2: n is NULL, and will just insert a new leaf */
         if (t->trie && n == NULL) {
+               /* Case 2: n is NULL, and will just insert a new leaf */
  
                 NODE_SET_PARENT(l, tp);
-       
-               if (!tp)
-                       BUG();
  
-               else {
-                       cindex = tkey_extract_bits(key, tp->pos, tp->bits);
-                       put_child(t, (struct tnode *)tp, cindex, (struct node *)l);
-               }
-       }
-       /* Case 3: n is a LEAF or a TNODE and the key doesn't match. */
-       else {
+               cindex = tkey_extract_bits(key, tp->pos, tp->bits);
+               put_child(t, (struct tnode *)tp, cindex, (struct node *)l);
+       } else {
+               /* Case 3: n is a LEAF or a TNODE and the key doesn't match. */
                 /*
                  *  Add a new tnode here
                  *  first tnode need some special handling
                  */
  
                 if (tp)
-                       pos=tp->pos+tp->bits;
+                       pos = tp->pos+tp->bits;
                 else
-                       pos=0;
+                       pos = 0;
+
                 if (n) {
                         newpos = tkey_mismatch(key, pos, n->key);
                         tn = tnode_new(n->key, newpos, 1);
-               }
-               else {
+               } else {
                         newpos = 0;
                         tn = tnode_new(key, newpos, 1); /* First tnode */
                 }
@@ -1151,32 +1068,33 @@ fib_insert_node(struct trie *t, int *err, u32 key, int plen)
                         tnode_free((struct tnode *) l);
                         *err = -ENOMEM;
                         goto err;
-               }               
-               
+               }
+
                 NODE_SET_PARENT(tn, tp);
  
-               missbit=tkey_extract_bits(key, newpos, 1);
+               missbit = tkey_extract_bits(key, newpos, 1);
                 put_child(t, tn, missbit, (struct node *)l);
                 put_child(t, tn, 1-missbit, n);
  
                 if (tp) {
                         cindex = tkey_extract_bits(key, tp->pos, tp->bits);
                         put_child(t, (struct tnode *)tp, cindex, (struct node *)tn);
-               }
-               else {
-                       t->trie = (struct node*) tn; /* First tnode */
+               } else {
+                       rcu_assign_pointer(t->trie, (struct node *)tn); /* First tnode */
                         tp = tn;
                 }
         }
-       if (tp && tp->pos+tp->bits > 32) {
+
+       if (tp && tp->pos + tp->bits > 32)
                 printk("ERROR tp=%p pos=%d, bits=%d, key=%0x plen=%d\n",
                        tp, tp->pos, tp->bits, key, plen);
-       }
+
         /* Rebalance the trie */
-       t->trie = trie_rebalance(t, tp);
+
+       rcu_assign_pointer(t->trie, trie_rebalance(t, tp));
  done:
         t->revision++;
-err:;
+err:
         return fa_head;
  }
  
@@ -1204,17 +1122,18 @@ fn_trie_insert(struct fib_table *tb, struct rtmsg *r, struct kern_rta *rta,
  
         key = ntohl(key);
  
-       if (trie_debug)
-               printk("Insert table=%d %08x/%d\n", tb->tb_id, key, plen);
+       pr_debug("Insert table=%d %08x/%d\n", tb->tb_id, key, plen);
  
-       mask = ntohl( inet_make_mask(plen) );
+       mask = ntohl(inet_make_mask(plen));
  
         if (key & ~mask)
                 return -EINVAL;
  
         key = key & mask;
  
-       if  ((fi = fib_create_info(r, rta, nlhdr, &err)) == NULL)
+       fi = fib_create_info(r, rta, nlhdr, &err);
+
+       if (!fi)
                 goto err;
  
         l = fib_find_node(t, key);
@@ -1236,8 +1155,7 @@ fn_trie_insert(struct fib_table *tb, struct rtmsg *r, struct kern_rta *rta,
          * and we need to allocate a new one of those as well.
          */
  
-       if (fa &&
-           fa->fa_info->fib_priority == fi->fib_priority) {
+       if (fa && fa->fa_info->fib_priority == fi->fib_priority) {
                 struct fib_alias *fa_orig;
  
                 err = -EEXIST;
@@ -1248,22 +1166,27 @@ fn_trie_insert(struct fib_table *tb, struct rtmsg *r, struct kern_rta *rta,
                         struct fib_info *fi_drop;
                         u8 state;
  
-                       write_lock_bh(&fib_lock);
+                       err = -ENOBUFS;
+                       new_fa = kmem_cache_alloc(fn_alias_kmem, SLAB_KERNEL);
+                       if (new_fa == NULL)
+                               goto out;
  
                         fi_drop = fa->fa_info;
-                       fa->fa_info = fi;
-                       fa->fa_type = type;
-                       fa->fa_scope = r->rtm_scope;
+                       new_fa->fa_tos = fa->fa_tos;
+                       new_fa->fa_info = fi;
+                       new_fa->fa_type = type;
+                       new_fa->fa_scope = r->rtm_scope;
                         state = fa->fa_state;
-                       fa->fa_state &= ~FA_S_ACCESSED;
+                       new_fa->fa_state &= ~FA_S_ACCESSED;
  
-                       write_unlock_bh(&fib_lock);
+                       list_replace_rcu(&fa->fa_list, &new_fa->fa_list);
+                       alias_free_mem_rcu(fa);
  
                         fib_release_info(fi_drop);
                         if (state & FA_S_ACCESSED)
-                         rt_cache_flush(-1);
+                               rt_cache_flush(-1);
  
-                           goto succeeded;
+                       goto succeeded;
                 }
                 /* Error if we find a perfect match which
                  * uses the same scope, type, and nexthop
@@ -1285,7 +1208,7 @@ fn_trie_insert(struct fib_table *tb, struct rtmsg *r, struct kern_rta *rta,
                         fa = fa_orig;
         }
         err = -ENOENT;
-       if (!(nlhdr->nlmsg_flags&NLM_F_CREATE))
+       if (!(nlhdr->nlmsg_flags & NLM_F_CREATE))
                 goto out;
  
         err = -ENOBUFS;
@@ -1298,9 +1221,6 @@ fn_trie_insert(struct fib_table *tb, struct rtmsg *r, struct kern_rta *rta,
         new_fa->fa_type = type;
         new_fa->fa_scope = r->rtm_scope;
         new_fa->fa_state = 0;
-#if 0
-       new_fa->dst = NULL;
-#endif
         /*
          * Insert new entry to the list.
          */
@@ -1312,12 +1232,8 @@ fn_trie_insert(struct fib_table *tb, struct rtmsg *r, struct kern_rta *rta,
                         goto out_free_new_fa;
         }
  
-       write_lock_bh(&fib_lock);
-
-       list_add_tail(&new_fa->fa_list,
-                (fa ? &fa->fa_list : fa_head));
-
-       write_unlock_bh(&fib_lock);
+       list_add_tail_rcu(&new_fa->fa_list,
+                         (fa ? &fa->fa_list : fa_head));
  
         rt_cache_flush(-1);
         rtmsg_fib(RTM_NEWROUTE, htonl(key), new_fa, plen, tb->tb_id, nlhdr, req);
@@ -1328,38 +1244,40 @@ out_free_new_fa:
         kmem_cache_free(fn_alias_kmem, new_fa);
  out:
         fib_release_info(fi);
-err:;
+err:
         return err;
  }
  
-static inline int check_leaf(struct trie *t, struct leaf *l,  t_key key, int *plen, const struct flowi *flp,
-                            struct fib_result *res, int *err)
+
+/* should be clalled with rcu_read_lock */
+static inline int check_leaf(struct trie *t, struct leaf *l,
+                            t_key key, int *plen, const struct flowi *flp,
+                            struct fib_result *res)
  {
-       int i;
+       int err, i;
         t_key mask;
         struct leaf_info *li;
         struct hlist_head *hhead = &l->list;
         struct hlist_node *node;
  
-       hlist_for_each_entry(li, node, hhead, hlist) {
-
+       hlist_for_each_entry_rcu(li, node, hhead, hlist) {
                 i = li->plen;
                 mask = ntohl(inet_make_mask(i));
                 if (l->key != (key & mask))
                         continue;
  
-               if (((*err) = fib_semantic_match(&li->falh, flp, res, l->key, mask, i)) == 0) {
+               if ((err = fib_semantic_match(&li->falh, flp, res, l->key, mask, i)) <= 0) {
                         *plen = i;
  #ifdef CONFIG_IP_FIB_TRIE_STATS
                         t->stats.semantic_match_passed++;
  #endif
-                       return 1;
+                       return err;
                 }
  #ifdef CONFIG_IP_FIB_TRIE_STATS
                 t->stats.semantic_match_miss++;
  #endif
         }
-       return 0;
+       return 1;
  }
  
  static int
@@ -1370,13 +1288,17 @@ fn_trie_lookup(struct fib_table *tb, const struct flowi *flp, struct fib_result
         struct node *n;
         struct tnode *pn;
         int pos, bits;
-       t_key key=ntohl(flp->fl4_dst);
+       t_key key = ntohl(flp->fl4_dst);
         int chopped_off;
         t_key cindex = 0;
         int current_prefix_length = KEYLENGTH;
-       n = t->trie;
+       struct tnode *cn;
+       t_key node_prefix, key_prefix, pref_mismatch;
+       int mp;
+
+       rcu_read_lock();
  
-       read_lock(&fib_lock);
+       n = rcu_dereference(t->trie);
         if (!n)
                 goto failed;
  
@@ -1386,15 +1308,14 @@ fn_trie_lookup(struct fib_table *tb, const struct flowi *flp, struct fib_result
  
         /* Just a leaf? */
         if (IS_LEAF(n)) {
-               if (check_leaf(t, (struct leaf *)n, key, &plen, flp, res, &ret))
+               if ((ret = check_leaf(t, (struct leaf *)n, key, &plen, flp, res)) <= 0)
                         goto found;
                 goto failed;
         }
         pn = (struct tnode *) n;
         chopped_off = 0;
  
-        while (pn) {
-
+       while (pn) {
                 pos = pn->pos;
                 bits = pn->bits;
  
@@ -1410,130 +1331,129 @@ fn_trie_lookup(struct fib_table *tb, const struct flowi *flp, struct fib_result
                         goto backtrace;
                 }
  
-               if (IS_TNODE(n)) {
+               if (IS_LEAF(n)) {
+                       if ((ret = check_leaf(t, (struct leaf *)n, key, &plen, flp, res)) <= 0)
+                               goto found;
+                       else
+                               goto backtrace;
+               }
+
  #define HL_OPTIMIZE
  #ifdef HL_OPTIMIZE
-                       struct tnode *cn = (struct tnode *)n;
-                       t_key node_prefix, key_prefix, pref_mismatch;
-                       int mp;
+               cn = (struct tnode *)n;
  
-                       /*
-                        * It's a tnode, and we can do some extra checks here if we
-                        * like, to avoid descending into a dead-end branch.
-                        * This tnode is in the parent's child array at index
-                        * key[p_pos..p_pos+p_bits] but potentially with some bits
-                        * chopped off, so in reality the index may be just a
-                        * subprefix, padded with zero at the end.
-                        * We can also take a look at any skipped bits in this
-                        * tnode - everything up to p_pos is supposed to be ok,
-                        * and the non-chopped bits of the index (se previous
-                        * paragraph) are also guaranteed ok, but the rest is
-                        * considered unknown.
-                        *
-                        * The skipped bits are key[pos+bits..cn->pos].
-                        */
-               
-                       /* If current_prefix_length < pos+bits, we are already doing
-                        * actual prefix  matching, which means everything from
-                        * pos+(bits-chopped_off) onward must be zero along some
-                        * branch of this subtree - otherwise there is *no* valid
-                        * prefix present. Here we can only check the skipped
-                        * bits. Remember, since we have already indexed into the
-                        * parent's child array, we know that the bits we chopped of
-                        * *are* zero.
-                        */
+               /*
+                * It's a tnode, and we can do some extra checks here if we
+                * like, to avoid descending into a dead-end branch.
+                * This tnode is in the parent's child array at index
+                * key[p_pos..p_pos+p_bits] but potentially with some bits
+                * chopped off, so in reality the index may be just a
+                * subprefix, padded with zero at the end.
+                * We can also take a look at any skipped bits in this
+                * tnode - everything up to p_pos is supposed to be ok,
+                * and the non-chopped bits of the index (se previous
+                * paragraph) are also guaranteed ok, but the rest is
+                * considered unknown.
+                *
+                * The skipped bits are key[pos+bits..cn->pos].
+                */
  
-                       /* NOTA BENE: CHECKING ONLY SKIPPED BITS FOR THE NEW NODE HERE */
-               
-                       if (current_prefix_length < pos+bits) {
-                               if (tkey_extract_bits(cn->key, current_prefix_length,
-                                                     cn->pos - current_prefix_length) != 0 ||
-                                   !(cn->child[0]))
-                                       goto backtrace;
-                       }
+               /* If current_prefix_length < pos+bits, we are already doing
+                * actual prefix  matching, which means everything from
+                * pos+(bits-chopped_off) onward must be zero along some
+                * branch of this subtree - otherwise there is *no* valid
+                * prefix present. Here we can only check the skipped
+                * bits. Remember, since we have already indexed into the
+                * parent's child array, we know that the bits we chopped of
+                * *are* zero.
+                */
  
-                       /*
-                        * If chopped_off=0, the index is fully validated and we
-                        * only need to look at the skipped bits for this, the new,
-                        * tnode. What we actually want to do is to find out if
-                        * these skipped bits match our key perfectly, or if we will
-                        * have to count on finding a matching prefix further down,
-                        * because if we do, we would like to have some way of
-                        * verifying the existence of such a prefix at this point.
-                        */
+               /* NOTA BENE: CHECKING ONLY SKIPPED BITS FOR THE NEW NODE HERE */
  
-                       /* The only thing we can do at this point is to verify that
-                        * any such matching prefix can indeed be a prefix to our
-                        * key, and if the bits in the node we are inspecting that
-                        * do not match our key are not ZERO, this cannot be true.
-                        * Thus, find out where there is a mismatch (before cn->pos)
-                        * and verify that all the mismatching bits are zero in the
-                        * new tnode's key.
-                        */
+               if (current_prefix_length < pos+bits) {
+                       if (tkey_extract_bits(cn->key, current_prefix_length,
+                                               cn->pos - current_prefix_length) != 0 ||
+                           !(cn->child[0]))
+                               goto backtrace;
+               }
  
-                       /* Note: We aren't very concerned about the piece of the key
-                        * that precede pn->pos+pn->bits, since these have already been
-                        * checked. The bits after cn->pos aren't checked since these are
-                        * by definition "unknown" at this point. Thus, what we want to
-                        * see is if we are about to enter the "prefix matching" state,
-                        * and in that case verify that the skipped bits that will prevail
-                        * throughout this subtree are zero, as they have to be if we are
-                        * to find a matching prefix.
-                        */
+               /*
+                * If chopped_off=0, the index is fully validated and we
+                * only need to look at the skipped bits for this, the new,
+                * tnode. What we actually want to do is to find out if
+                * these skipped bits match our key perfectly, or if we will
+                * have to count on finding a matching prefix further down,
+                * because if we do, we would like to have some way of
+                * verifying the existence of such a prefix at this point.
+                */
  
-                       node_prefix = MASK_PFX(cn->key, cn->pos);
-                       key_prefix = MASK_PFX(key, cn->pos);
-                       pref_mismatch = key_prefix^node_prefix;
-                       mp = 0;
+               /* The only thing we can do at this point is to verify that
+                * any such matching prefix can indeed be a prefix to our
+                * key, and if the bits in the node we are inspecting that
+                * do not match our key are not ZERO, this cannot be true.
+                * Thus, find out where there is a mismatch (before cn->pos)
+                * and verify that all the mismatching bits are zero in the
+                * new tnode's key.
+                */
  
-                       /* In short: If skipped bits in this node do not match the search
-                        * key, enter the "prefix matching" state.directly.
-                        */
-                       if (pref_mismatch) {
-                               while (!(pref_mismatch & (1<<(KEYLENGTH-1)))) {
-                                       mp++;
-                                       pref_mismatch = pref_mismatch <<1;
-                               }
-                               key_prefix = tkey_extract_bits(cn->key, mp, cn->pos-mp);
-                       
-                               if (key_prefix != 0)
-                                       goto backtrace;
-
-                               if (current_prefix_length >= cn->pos)
-                                       current_prefix_length=mp;
-                      }
-#endif
-                      pn = (struct tnode *)n; /* Descend */
-                      chopped_off = 0;
-                      continue;
+               /* Note: We aren't very concerned about the piece of the key
+                * that precede pn->pos+pn->bits, since these have already been
+                * checked. The bits after cn->pos aren't checked since these are
+                * by definition "unknown" at this point. Thus, what we want to
+                * see is if we are about to enter the "prefix matching" state,
+                * and in that case verify that the skipped bits that will prevail
+                * throughout this subtree are zero, as they have to be if we are
+                * to find a matching prefix.
+                */
+
+               node_prefix = MASK_PFX(cn->key, cn->pos);
+               key_prefix = MASK_PFX(key, cn->pos);
+               pref_mismatch = key_prefix^node_prefix;
+               mp = 0;
+
+               /* In short: If skipped bits in this node do not match the search
+                * key, enter the "prefix matching" state.directly.
+                */
+               if (pref_mismatch) {
+                       while (!(pref_mismatch & (1<<(KEYLENGTH-1)))) {
+                               mp++;
+                               pref_mismatch = pref_mismatch <<1;
+                       }
+                       key_prefix = tkey_extract_bits(cn->key, mp, cn->pos-mp);
+
+                       if (key_prefix != 0)
+                               goto backtrace;
+
+                       if (current_prefix_length >= cn->pos)
+                               current_prefix_length = mp;
                 }
-               if (IS_LEAF(n)) {
-                       if (check_leaf(t, (struct leaf *)n, key, &plen, flp, res, &ret))
-                               goto found;
-              }
+#endif
+               pn = (struct tnode *)n; /* Descend */
+               chopped_off = 0;
+               continue;
+
  backtrace:
                 chopped_off++;
  
                 /* As zero don't change the child key (cindex) */
-               while ((chopped_off <= pn->bits) && !(cindex & (1<<(chopped_off-1)))) {
+               while ((chopped_off <= pn->bits) && !(cindex & (1<<(chopped_off-1))))
                         chopped_off++;
-               }
  
                 /* Decrease current_... with bits chopped off */
                 if (current_prefix_length > pn->pos + pn->bits - chopped_off)
                         current_prefix_length = pn->pos + pn->bits - chopped_off;
-       
+
                 /*
                  * Either we do the actual chop off according or if we have
                  * chopped off all bits in this tnode walk up to our parent.
                  */
  
-               if (chopped_off <= pn->bits)
+               if (chopped_off <= pn->bits) {
                         cindex &= ~(1 << (chopped_off-1));
-               else {
+               } else {
                         if (NODE_PARENT(pn) == NULL)
                                 goto failed;
-               
+
                         /* Get Child's index */
                         cindex = tkey_extract_bits(pn->key, NODE_PARENT(pn)->pos, NODE_PARENT(pn)->bits);
                         pn = NODE_PARENT(pn);
@@ -1548,10 +1468,11 @@ backtrace:
  failed:
         ret = 1;
  found:
-       read_unlock(&fib_lock);
+       rcu_read_unlock();
         return ret;
  }
  
+/* only called from updater side */
  static int trie_leaf_remove(struct trie *t, t_key key)
  {
         t_key cindex;
@@ -1559,24 +1480,20 @@ static int trie_leaf_remove(struct trie *t, t_key key)
         struct node *n = t->trie;
         struct leaf *l;
  
-       if (trie_debug)
-               printk("entering trie_leaf_remove(%p)\n", n);
+       pr_debug("entering trie_leaf_remove(%p)\n", n);
  
         /* Note that in the case skipped bits, those bits are *not* checked!
          * When we finish this, we will have NULL or a T_LEAF, and the
          * T_LEAF may or may not match our key.
          */
  
-        while (n != NULL && IS_TNODE(n)) {
+       while (n != NULL && IS_TNODE(n)) {
                 struct tnode *tn = (struct tnode *) n;
                 check_tnode(tn);
                 n = tnode_get_child(tn ,tkey_extract_bits(key, tn->pos, tn->bits));
  
-                       if (n && NODE_PARENT(n) != tn) {
-                               printk("BUG tn=%p, n->parent=%p\n", tn, NODE_PARENT(n));
-                               BUG();
-                       }
-        }
+               BUG_ON(n && NODE_PARENT(n) != tn);
+       }
         l = (struct leaf *) n;
  
         if (!n || !tkey_equals(l->key, key))
@@ -1590,23 +1507,24 @@ static int trie_leaf_remove(struct trie *t, t_key key)
         t->revision++;
         t->size--;
  
+       preempt_disable();
         tp = NODE_PARENT(n);
         tnode_free((struct tnode *) n);
  
         if (tp) {
                 cindex = tkey_extract_bits(key, tp->pos, tp->bits);
                 put_child(t, (struct tnode *)tp, cindex, NULL);
-               t->trie = trie_rebalance(t, tp);
-       }
-       else
-               t->trie = NULL;
+               rcu_assign_pointer(t->trie, trie_rebalance(t, tp));
+       } else
+               rcu_assign_pointer(t->trie, NULL);
+       preempt_enable();
  
         return 1;
  }
  
  static int
  fn_trie_delete(struct fib_table *tb, struct rtmsg *r, struct kern_rta *rta,
-              struct nlmsghdr *nlhdr, struct netlink_skb_parms *req)
+               struct nlmsghdr *nlhdr, struct netlink_skb_parms *req)
  {
         struct trie *t = (struct trie *) tb->tb_data;
         u32 key, mask;
@@ -1615,6 +1533,8 @@ fn_trie_delete(struct fib_table *tb, struct rtmsg *r, struct kern_rta *rta,
         struct fib_alias *fa, *fa_to_delete;
         struct list_head *fa_head;
         struct leaf *l;
+       struct leaf_info *li;
+
  
         if (plen > 32)
                 return -EINVAL;
@@ -1624,7 +1544,7 @@ fn_trie_delete(struct fib_table *tb, struct rtmsg *r, struct kern_rta *rta,
                 memcpy(&key, rta->rta_dst, 4);
  
         key = ntohl(key);
-       mask = ntohl( inet_make_mask(plen) );
+       mask = ntohl(inet_make_mask(plen));
  
         if (key & ~mask)
                 return -EINVAL;
@@ -1641,11 +1561,11 @@ fn_trie_delete(struct fib_table *tb, struct rtmsg *r, struct kern_rta *rta,
         if (!fa)
                 return -ESRCH;
  
-       if (trie_debug)
-               printk("Deleting %08x/%d tos=%d t=%p\n", key, plen, tos, t);
+       pr_debug("Deleting %08x/%d tos=%d t=%p\n", key, plen, tos, t);
  
         fa_to_delete = NULL;
         fa_head = fa->fa_list.prev;
+
         list_for_each_entry(fa, fa_head, fa_list) {
                 struct fib_info *fi = fa->fa_info;
  
@@ -1664,39 +1584,31 @@ fn_trie_delete(struct fib_table *tb, struct rtmsg *r, struct kern_rta *rta,
                 }
         }
  
-       if (fa_to_delete) {
-               int kill_li = 0;
-               struct leaf_info *li;
-
-               fa = fa_to_delete;
-               rtmsg_fib(RTM_DELROUTE, htonl(key), fa, plen, tb->tb_id, nlhdr, req);
+       if (!fa_to_delete)
+               return -ESRCH;
  
-               l = fib_find_node(t, key);
-               li = find_leaf_info(&l->list, plen);
+       fa = fa_to_delete;
+       rtmsg_fib(RTM_DELROUTE, htonl(key), fa, plen, tb->tb_id, nlhdr, req);
  
-               write_lock_bh(&fib_lock);
+       l = fib_find_node(t, key);
+       li = find_leaf_info(&l->list, plen);
  
-               list_del(&fa->fa_list);
+       list_del_rcu(&fa->fa_list);
  
-               if (list_empty(fa_head)) {
-                       hlist_del(&li->hlist);
-                       kill_li = 1;
-               }
-               write_unlock_bh(&fib_lock);
-       
-               if (kill_li)
-                       free_leaf_info(li);
+       if (list_empty(fa_head)) {
+               hlist_del_rcu(&li->hlist);
+               free_leaf_info(li);
+       }
  
-               if (hlist_empty(&l->list))
-                       trie_leaf_remove(t, key);
+       if (hlist_empty(&l->list))
+               trie_leaf_remove(t, key);
  
-               if (fa->fa_state & FA_S_ACCESSED)
-                       rt_cache_flush(-1);
+       if (fa->fa_state & FA_S_ACCESSED)
+               rt_cache_flush(-1);
  
-               fn_free_alias(fa);
-               return 0;
-       }
-       return -ESRCH;
+       fib_release_info(fa->fa_info);
+       alias_free_mem_rcu(fa);
+       return 0;
  }
  
  static int trie_flush_list(struct trie *t, struct list_head *head)
@@ -1706,14 +1618,11 @@ static int trie_flush_list(struct trie *t, struct list_head *head)
  
         list_for_each_entry_safe(fa, fa_node, head, fa_list) {
                 struct fib_info *fi = fa->fa_info;
-       
-               if (fi && (fi->fib_flags&RTNH_F_DEAD)) {
-
-                       write_lock_bh(&fib_lock);
-                       list_del(&fa->fa_list);
-                       write_unlock_bh(&fib_lock);
  
-                       fn_free_alias(fa);
+               if (fi && (fi->fib_flags & RTNH_F_DEAD)) {
+                       list_del_rcu(&fa->fa_list);
+                       fib_release_info(fa->fa_info);
+                       alias_free_mem_rcu(fa);
                         found++;
                 }
         }
@@ -1728,37 +1637,34 @@ static int trie_flush_leaf(struct trie *t, struct leaf *l)
         struct leaf_info *li = NULL;
  
         hlist_for_each_entry_safe(li, node, tmp, lih, hlist) {
-               
                 found += trie_flush_list(t, &li->falh);
  
                 if (list_empty(&li->falh)) {
-
-                       write_lock_bh(&fib_lock);
-                       hlist_del(&li->hlist);
-                       write_unlock_bh(&fib_lock);
-
+                       hlist_del_rcu(&li->hlist);
                         free_leaf_info(li);
                 }
         }
         return found;
  }
  
+/* rcu_read_lock needs to be hold by caller from readside */
+
  static struct leaf *nextleaf(struct trie *t, struct leaf *thisleaf)
  {
         struct node *c = (struct node *) thisleaf;
         struct tnode *p;
         int idx;
+       struct node *trie = rcu_dereference(t->trie);
  
         if (c == NULL) {
-               if (t->trie == NULL)
+               if (trie == NULL)
                         return NULL;
  
-               if (IS_LEAF(t->trie))          /* trie w. just a leaf */
-                       return (struct leaf *) t->trie;
+               if (IS_LEAF(trie))          /* trie w. just a leaf */
+                       return (struct leaf *) trie;
  
-               p = (struct tnode*) t->trie;  /* Start */
-       }
-       else
+               p = (struct tnode*) trie;  /* Start */
+       } else
                 p = (struct tnode *) NODE_PARENT(c);
  
         while (p) {
@@ -1771,29 +1677,31 @@ static struct leaf *nextleaf(struct trie *t, struct leaf *thisleaf)
                         pos = 0;
  
                 last = 1 << p->bits;
-               for(idx = pos; idx < last ; idx++) {
-                       if (p->child[idx]) {
-
-                               /* Decend if tnode */
-
-                               while (IS_TNODE(p->child[idx])) {
-                                       p = (struct tnode*) p->child[idx];
-                                       idx = 0;
-                               
-                                       /* Rightmost non-NULL branch */
-                                       if (p && IS_TNODE(p))
-                                               while (p->child[idx] == NULL && idx < (1 << p->bits)) idx++;
-
-                                       /* Done with this tnode? */
-                                       if (idx >= (1 << p->bits) || p->child[idx] == NULL )
-                                               goto up;
-                               }
-                               return (struct leaf*) p->child[idx];
+               for (idx = pos; idx < last ; idx++) {
+                       c = rcu_dereference(p->child[idx]);
+
+                       if (!c)
+                               continue;
+
+                       /* Decend if tnode */
+                       while (IS_TNODE(c)) {
+                               p = (struct tnode *) c;
+                               idx = 0;
+
+                               /* Rightmost non-NULL branch */
+                               if (p && IS_TNODE(p))
+                                       while (!(c = rcu_dereference(p->child[idx]))
+                                              && idx < (1<<p->bits)) idx++;
+
+                               /* Done with this tnode? */
+                               if (idx >= (1 << p->bits) || !c)
+                                       goto up;
                         }
+                       return (struct leaf *) c;
                 }
  up:
                 /* No more children go up one step  */
-               c = (struct node*) p;
+               c = (struct node *) p;
                 p = (struct tnode *) NODE_PARENT(p);
         }
         return NULL; /* Ready. Root of trie */
@@ -1807,23 +1715,24 @@ static int fn_trie_flush(struct fib_table *tb)
  
         t->revision++;
  
-       for (h=0; (l = nextleaf(t, l)) != NULL; h++) {
+       rcu_read_lock();
+       for (h = 0; (l = nextleaf(t, l)) != NULL; h++) {
                 found += trie_flush_leaf(t, l);
  
                 if (ll && hlist_empty(&ll->list))
                         trie_leaf_remove(t, ll->key);
                 ll = l;
         }
+       rcu_read_unlock();  
  
         if (ll && hlist_empty(&ll->list))
                 trie_leaf_remove(t, ll->key);
  
-       if (trie_debug)
-               printk("trie_flush found=%d\n", found);
+       pr_debug("trie_flush found=%d\n", found);
         return found;
  }
  
-static int trie_last_dflt=-1;
+static int trie_last_dflt = -1;
  
  static void
  fn_trie_select_default(struct fib_table *tb, const struct flowi *flp, struct fib_result *res)
@@ -1840,7 +1749,7 @@ fn_trie_select_default(struct fib_table *tb, const struct flowi *flp, struct fib
         last_resort = NULL;
         order = -1;
  
-       read_lock(&fib_lock);
+       rcu_read_lock();
  
         l = fib_find_node(t, 0);
         if (!l)
@@ -1853,20 +1762,20 @@ fn_trie_select_default(struct fib_table *tb, const struct flowi *flp, struct fib
         if (list_empty(fa_head))
                 goto out;
  
-       list_for_each_entry(fa, fa_head, fa_list) {
+       list_for_each_entry_rcu(fa, fa_head, fa_list) {
                 struct fib_info *next_fi = fa->fa_info;
-       
+
                 if (fa->fa_scope != res->scope ||
                     fa->fa_type != RTN_UNICAST)
                         continue;
-       
+
                 if (next_fi->fib_priority > res->fi->fib_priority)
                         break;
                 if (!next_fi->fib_nh[0].nh_gw ||
                     next_fi->fib_nh[0].nh_scope != RT_SCOPE_LINK)
                         continue;
                 fa->fa_state |= FA_S_ACCESSED;
-       
+
                 if (fi == NULL) {
                         if (next_fi != res->fi)
                                 break;
@@ -1904,7 +1813,7 @@ fn_trie_select_default(struct fib_table *tb, const struct flowi *flp, struct fib
         }
         trie_last_dflt = last_idx;
   out:;
-       read_unlock(&fib_lock);
+       rcu_read_unlock();
  }
  
  static int fn_trie_dump_fa(t_key key, int plen, struct list_head *fah, struct fib_table *tb,
@@ -1913,12 +1822,14 @@ static int fn_trie_dump_fa(t_key key, int plen, struct list_head *fah, struct fi
         int i, s_i;
         struct fib_alias *fa;
  
-       u32 xkey=htonl(key);
+       u32 xkey = htonl(key);
  
-       s_i=cb->args[3];
+       s_i = cb->args[3];
         i = 0;
  
-       list_for_each_entry(fa, fah, fa_list) {
+       /* rcu_read_lock is hold by caller */
+
+       list_for_each_entry_rcu(fa, fah, fa_list) {
                 if (i < s_i) {
                         i++;
                         continue;
@@ -1946,10 +1857,10 @@ static int fn_trie_dump_fa(t_key key, int plen, struct list_head *fah, struct fi
                                   fa->fa_info, 0) < 0) {
                         cb->args[3] = i;
                         return -1;
-                       }
+               }
                 i++;
         }
-       cb->args[3]=i;
+       cb->args[3] = i;
         return skb->len;
  }
  
@@ -1959,10 +1870,10 @@ static int fn_trie_dump_plen(struct trie *t, int plen, struct fib_table *tb, str
         int h, s_h;
         struct list_head *fa_head;
         struct leaf *l = NULL;
-       s_h=cb->args[2];
  
-       for (h=0; (l = nextleaf(t, l)) != NULL; h++) {
+       s_h = cb->args[2];
  
+       for (h = 0; (l = nextleaf(t, l)) != NULL; h++) {
                 if (h < s_h)
                         continue;
                 if (h > s_h)
@@ -1970,7 +1881,7 @@ static int fn_trie_dump_plen(struct trie *t, int plen, struct fib_table *tb, str
                                sizeof(cb->args) - 3*sizeof(cb->args[0]));
  
                 fa_head = get_fa_head(l, plen);
-       
+
                 if (!fa_head)
                         continue;
  
@@ -1978,11 +1889,11 @@ static int fn_trie_dump_plen(struct trie *t, int plen, struct fib_table *tb, str
                         continue;
  
                 if (fn_trie_dump_fa(l->key, plen, fa_head, tb, skb, cb)<0) {
-                       cb->args[2]=h;
+                       cb->args[2] = h;
                         return -1;
                 }
         }
-       cb->args[2]=h;
+       cb->args[2] = h;
         return skb->len;
  }
  
@@ -1993,25 +1904,24 @@ static int fn_trie_dump(struct fib_table *tb, struct sk_buff *skb, struct netlin
  
         s_m = cb->args[1];
  
-       read_lock(&fib_lock);
-       for (m=0; m<=32; m++) {
-
+       rcu_read_lock();
+       for (m = 0; m <= 32; m++) {
                 if (m < s_m)
                         continue;
                 if (m > s_m)
                         memset(&cb->args[2], 0,
-                              sizeof(cb->args) - 2*sizeof(cb->args[0]));
+                               sizeof(cb->args) - 2*sizeof(cb->args[0]));
  
                 if (fn_trie_dump_plen(t, 32-m, tb, skb, cb)<0) {
                         cb->args[1] = m;
                         goto out;
                 }
         }
-       read_unlock(&fib_lock);
+       rcu_read_unlock();
         cb->args[1] = m;
         return skb->len;
- out:
-       read_unlock(&fib_lock);
+out:
+       rcu_read_unlock();
         return -1;
  }
  
@@ -2051,9 +1961,9 @@ struct fib_table * __init fib_hash_init(int id)
         trie_init(t);
  
         if (id == RT_TABLE_LOCAL)
-                trie_local = t;
+               trie_local = t;
         else if (id == RT_TABLE_MAIN)
-                trie_main = t;
+               trie_main = t;
  
         if (id == RT_TABLE_LOCAL)
                 printk("IPv4 FIB: Using LC-trie version %s\n", VERSION);
@@ -2065,7 +1975,8 @@ struct fib_table * __init fib_hash_init(int id)
  
  static void putspace_seq(struct seq_file *seq, int n)
  {
-       while (n--) seq_printf(seq, " ");
+       while (n--)
+               seq_printf(seq, " ");
  }
  
  static void printbin_seq(struct seq_file *seq, unsigned int v, int bits)
@@ -2086,29 +1997,22 @@ static void printnode_seq(struct seq_file *seq, int indent, struct node *n,
                 seq_printf(seq, "%d/", cindex);
                 printbin_seq(seq, cindex, bits);
                 seq_printf(seq, ": ");
-       }
-       else
+       } else
                 seq_printf(seq, "<root>: ");
         seq_printf(seq, "%s:%p ", IS_LEAF(n)?"Leaf":"Internal node", n);
  
-       if (IS_LEAF(n))
-               seq_printf(seq, "key=%d.%d.%d.%d\n",
-                          n->key >> 24, (n->key >> 16) % 256, (n->key >> 8) % 256, n->key % 256);
-       else {
-               int plen = ((struct tnode *)n)->pos;
-               t_key prf=MASK_PFX(n->key, plen);
-               seq_printf(seq, "key=%d.%d.%d.%d/%d\n",
-                          prf >> 24, (prf >> 16) % 256, (prf >> 8) % 256, prf % 256, plen);
-       }
         if (IS_LEAF(n)) {
-               struct leaf *l=(struct leaf *)n;
+               struct leaf *l = (struct leaf *)n;
                 struct fib_alias *fa;
                 int i;
-               for (i=32; i>=0; i--)
-                 if (find_leaf_info(&l->list, i)) {
-               
+
+               seq_printf(seq, "key=%d.%d.%d.%d\n",
+                          n->key >> 24, (n->key >> 16) % 256, (n->key >> 8) % 256, n->key % 256);
+
+               for (i = 32; i >= 0; i--)
+                       if (find_leaf_info(&l->list, i)) {
                                 struct list_head *fa_head = get_fa_head(l, i);
-                       
+
                                 if (!fa_head)
                                         continue;
  
@@ -2118,17 +2022,16 @@ static void printnode_seq(struct seq_file *seq, int indent, struct node *n,
                                 putspace_seq(seq, indent+2);
                                 seq_printf(seq, "{/%d...dumping}\n", i);
  
-
-                               list_for_each_entry(fa, fa_head, fa_list) {
+                               list_for_each_entry_rcu(fa, fa_head, fa_list) {
                                         putspace_seq(seq, indent+2);
-                                       if (fa->fa_info->fib_nh == NULL) {
-                                               seq_printf(seq, "Error _fib_nh=NULL\n");
-                                               continue;
-                                       }
                                         if (fa->fa_info == NULL) {
                                                 seq_printf(seq, "Error fa_info=NULL\n");
                                                 continue;
                                         }
+                                       if (fa->fa_info->fib_nh == NULL) {
+                                               seq_printf(seq, "Error _fib_nh=NULL\n");
+                                               continue;
+                                       }
  
                                         seq_printf(seq, "{type=%d scope=%d TOS=%d}\n",
                                               fa->fa_type,
@@ -2136,11 +2039,16 @@ static void printnode_seq(struct seq_file *seq, int indent, struct node *n,
                                               fa->fa_tos);
                                 }
                         }
-       }
-       else if (IS_TNODE(n)) {
+       } else {
                 struct tnode *tn = (struct tnode *)n;
+               int plen = ((struct tnode *)n)->pos;
+               t_key prf = MASK_PFX(n->key, plen);
+
+               seq_printf(seq, "key=%d.%d.%d.%d/%d\n",
+                          prf >> 24, (prf >> 16) % 256, (prf >> 8) % 256, prf % 256, plen);
+
                 putspace_seq(seq, indent); seq_printf(seq, "|    ");
-               seq_printf(seq, "{key prefix=%08x/", tn->key&TKEY_GET_MASK(0, tn->pos));
+               seq_printf(seq, "{key prefix=%08x/", tn->key & TKEY_GET_MASK(0, tn->pos));
                 printbin_seq(seq, tkey_extract_bits(tn->key, 0, tn->pos), tn->pos);
                 seq_printf(seq, "}\n");
                 putspace_seq(seq, indent); seq_printf(seq, "|    ");
@@ -2154,194 +2062,196 @@ static void printnode_seq(struct seq_file *seq, int indent, struct node *n,
  
  static void trie_dump_seq(struct seq_file *seq, struct trie *t)
  {
-       struct node *n = t->trie;
-       int cindex=0;
-       int indent=1;
-       int pend=0;
+       struct node *n;
+       int cindex = 0;
+       int indent = 1;
+       int pend = 0;
         int depth = 0;
+       struct tnode *tn;
  
-       read_lock(&fib_lock);
-
+       rcu_read_lock();
+       n = rcu_dereference(t->trie);
         seq_printf(seq, "------ trie_dump of t=%p ------\n", t);
-       if (n) {
-               printnode_seq(seq, indent, n, pend, cindex, 0);
-               if (IS_TNODE(n)) {
-                       struct tnode *tn = (struct tnode *)n;
-                       pend = tn->pos+tn->bits;
-                       putspace_seq(seq, indent); seq_printf(seq, "\\--\n");
-                       indent += 3;
-                       depth++;
-
-                       while (tn && cindex < (1 << tn->bits)) {
-                               if (tn->child[cindex]) {
-                               
-                                       /* Got a child */
-                               
-                                       printnode_seq(seq, indent, tn->child[cindex], pend, cindex, tn->bits);
-                                       if (IS_LEAF(tn->child[cindex])) {
-                                               cindex++;
-                                       
-                                       }
-                                       else {
-                                               /*
-                                                * New tnode. Decend one level
-                                                */
-                                       
-                                               depth++;
-                                               n = tn->child[cindex];
-                                               tn = (struct tnode *)n;
-                                               pend = tn->pos+tn->bits;
-                                               putspace_seq(seq, indent); seq_printf(seq, "\\--\n");
-                                               indent+=3;
-                                               cindex=0;
-                                       }
-                               }
-                               else
-                                       cindex++;
  
+       if (!n) {
+               seq_printf(seq, "------ trie is empty\n");
+
+               rcu_read_unlock();
+               return;
+       }
+
+       printnode_seq(seq, indent, n, pend, cindex, 0);
+
+       if (!IS_TNODE(n)) {
+               rcu_read_unlock();
+               return;
+       }
+
+       tn = (struct tnode *)n;
+       pend = tn->pos+tn->bits;
+       putspace_seq(seq, indent); seq_printf(seq, "\\--\n");
+       indent += 3;
+       depth++;
+
+       while (tn && cindex < (1 << tn->bits)) {
+               struct node *child = rcu_dereference(tn->child[cindex]);
+               if (!child)
+                       cindex++;
+               else {
+                       /* Got a child */
+                       printnode_seq(seq, indent, child, pend,
+                                     cindex, tn->bits);
+
+                       if (IS_LEAF(child))
+                               cindex++;
+
+                       else {
                                 /*
-                                * Test if we are done
+                                * New tnode. Decend one level
                                  */
-                       
-                               while (cindex >= (1 << tn->bits)) {
  
-                                       /*
-                                        * Move upwards and test for root
-                                        * pop off all traversed  nodes
-                                        */
-                               
-                                       if (NODE_PARENT(tn) == NULL) {
-                                               tn = NULL;
-                                               n = NULL;
-                                               break;
-                                       }
-                                       else {
-                                               cindex = tkey_extract_bits(tn->key, NODE_PARENT(tn)->pos, NODE_PARENT(tn)->bits);
-                                               tn = NODE_PARENT(tn);
-                                               cindex++;
-                                               n = (struct node *)tn;
-                                               pend = tn->pos+tn->bits;
-                                               indent-=3;
-                                               depth--;
-                                       }
-                               }
+                               depth++;
+                               n = child;
+                               tn = (struct tnode *)n;
+                               pend = tn->pos+tn->bits;
+                               putspace_seq(seq, indent);
+                               seq_printf(seq, "\\--\n");
+                               indent += 3;
+                               cindex = 0;
                         }
                 }
-               else n = NULL;
-       }
-       else seq_printf(seq, "------ trie is empty\n");
  
-       read_unlock(&fib_lock);
+               /*
+                * Test if we are done
+                */
+
+               while (cindex >= (1 << tn->bits)) {
+                       /*
+                        * Move upwards and test for root
+                        * pop off all traversed  nodes
+                        */
+
+                       if (NODE_PARENT(tn) == NULL) {
+                               tn = NULL;
+                               break;
+                       }
+
+                       cindex = tkey_extract_bits(tn->key, NODE_PARENT(tn)->pos, NODE_PARENT(tn)->bits);
+                       cindex++;
+                       tn = NODE_PARENT(tn);
+                       pend = tn->pos + tn->bits;
+                       indent -= 3;
+                       depth--;
+               }
+       }
+       rcu_read_unlock();
  }
  
  static struct trie_stat *trie_stat_new(void)
  {
-       struct trie_stat *s = kmalloc(sizeof(struct trie_stat), GFP_KERNEL);
+       struct trie_stat *s;
         int i;
  
-       if (s) {
-               s->totdepth = 0;
-               s->maxdepth = 0;
-               s->tnodes = 0;
-               s->leaves = 0;
-               s->nullpointers = 0;
-       
-               for(i=0; i< MAX_CHILDS; i++)
-                       s->nodesizes[i] = 0;
-       }
+       s = kmalloc(sizeof(struct trie_stat), GFP_KERNEL);
+       if (!s)
+               return NULL;
+
+       s->totdepth = 0;
+       s->maxdepth = 0;
+       s->tnodes = 0;
+       s->leaves = 0;
+       s->nullpointers = 0;
+
+       for (i = 0; i < MAX_CHILDS; i++)
+               s->nodesizes[i] = 0;
+
         return s;
  }
  
  static struct trie_stat *trie_collect_stats(struct trie *t)
  {
-       struct node *n = t->trie;
+       struct node *n;
         struct trie_stat *s = trie_stat_new();
         int cindex = 0;
-       int indent = 1;
         int pend = 0;
         int depth = 0;
  
-       read_lock(&fib_lock);   
+       if (!s)
+               return NULL;
  
-       if (s) {
-               if (n) {
-                       if (IS_TNODE(n)) {
-                               struct tnode *tn = (struct tnode *)n;
-                               pend = tn->pos+tn->bits;
-                               indent += 3;
-                               s->nodesizes[tn->bits]++;
-                               depth++;
+       rcu_read_lock();
+       n = rcu_dereference(t->trie);
  
-                               while (tn && cindex < (1 << tn->bits)) {
-                                       if (tn->child[cindex]) {
-                                               /* Got a child */
-                               
-                                               if (IS_LEAF(tn->child[cindex])) {
-                                                       cindex++;
-                                       
-                                                       /* stats */
-                                                       if (depth > s->maxdepth)
-                                                               s->maxdepth = depth;
-                                                       s->totdepth += depth;
-                                                       s->leaves++;
-                                               }
-                               
-                                               else {
-                                                       /*
-                                                        * New tnode. Decend one level
-                                                        */
-                                       
-                                                       s->tnodes++;
-                                                       s->nodesizes[tn->bits]++;
-                                                       depth++;
-                                       
-                                                       n = tn->child[cindex];
-                                                       tn = (struct tnode *)n;
-                                                       pend = tn->pos+tn->bits;
-
-                                                       indent += 3;
-                                                       cindex = 0;
-                                               }
-                                       }
-                                       else {
-                                               cindex++;
-                                               s->nullpointers++;
-                                       }
+       if (!n)
+               return s;
+
+       if (IS_TNODE(n)) {
+               struct tnode *tn = (struct tnode *)n;
+               pend = tn->pos+tn->bits;
+               s->nodesizes[tn->bits]++;
+               depth++;
+
+               while (tn && cindex < (1 << tn->bits)) {
+                       struct node *ch = rcu_dereference(tn->child[cindex]);
+                       if (ch) {
  
+                               /* Got a child */
+
+                               if (IS_LEAF(tn->child[cindex])) {
+                                       cindex++;
+
+                                       /* stats */
+                                       if (depth > s->maxdepth)
+                                               s->maxdepth = depth;
+                                       s->totdepth += depth;
+                                       s->leaves++;
+                               } else {
                                         /*
-                                        * Test if we are done
+                                        * New tnode. Decend one level
                                          */
-                       
-                                       while (cindex >= (1 << tn->bits)) {
-
-                                               /*
-                                                * Move upwards and test for root
-                                                * pop off all traversed  nodes
-                                                */
-
-                                       
-                                               if (NODE_PARENT(tn) == NULL) {
-                                                       tn = NULL;
-                                                       n = NULL;
-                                                       break;
-                                               }
-                                               else {
-                                                       cindex = tkey_extract_bits(tn->key, NODE_PARENT(tn)->pos, NODE_PARENT(tn)->bits);
-                                                       tn = NODE_PARENT(tn);
-                                                       cindex++;
-                                                       n = (struct node *)tn;
-                                                       pend = tn->pos+tn->bits;
-                                                       indent -= 3;
-                                                       depth--;
-                                               }
-                                       }
+
+                                       s->tnodes++;
+                                       s->nodesizes[tn->bits]++;
+                                       depth++;
+
+                                       n = ch;
+                                       tn = (struct tnode *)n;
+                                       pend = tn->pos+tn->bits;
+
+                                       cindex = 0;
                                 }
+                       } else {
+                               cindex++;
+                               s->nullpointers++;
                         }
-                       else n = NULL;
+
+                       /*
+                        * Test if we are done
+                        */
+
+                       while (cindex >= (1 << tn->bits)) {
+                               /*
+                                * Move upwards and test for root
+                                * pop off all traversed  nodes
+                                */
+
+                               if (NODE_PARENT(tn) == NULL) {
+                                       tn = NULL;
+                                       n = NULL;
+                                       break;
+                               }
+
+                               cindex = tkey_extract_bits(tn->key, NODE_PARENT(tn)->pos, NODE_PARENT(tn)->bits);
+                               tn = NODE_PARENT(tn);
+                               cindex++;
+                               n = (struct node *)tn;
+                               pend = tn->pos+tn->bits;
+                               depth--;
+                       }
                 }
         }
  
-       read_unlock(&fib_lock); 
+       rcu_read_unlock();
         return s;
  }
  
@@ -2359,17 +2269,22 @@ static struct fib_alias *fib_triestat_get_next(struct seq_file *seq)
  
  static void *fib_triestat_seq_start(struct seq_file *seq, loff_t *pos)
  {
-       void *v = NULL;
+       if (!ip_fib_main_table)
+               return NULL;
  
-       if (ip_fib_main_table)
-               v = *pos ? fib_triestat_get_next(seq) : SEQ_START_TOKEN;
-       return v;
+       if (*pos)
+               return fib_triestat_get_next(seq);
+       else
+               return SEQ_START_TOKEN;
  }
  
  static void *fib_triestat_seq_next(struct seq_file *seq, void *v, loff_t *pos)
  {
         ++*pos;
-       return v == SEQ_START_TOKEN ? fib_triestat_get_first(seq) : fib_triestat_get_next(seq);
+       if (v == SEQ_START_TOKEN)
+               return fib_triestat_get_first(seq);
+       else
+               return fib_triestat_get_next(seq);
  }
  
  static void fib_triestat_seq_stop(struct seq_file *seq, void *v)
@@ -2388,22 +2303,22 @@ static void collect_and_show(struct trie *t, struct seq_file *seq)
  {
         int bytes = 0; /* How many bytes are used, a ref is 4 bytes */
         int i, max, pointers;
-        struct trie_stat *stat;
+       struct trie_stat *stat;
         int avdepth;
  
         stat = trie_collect_stats(t);
  
-       bytes=0;
+       bytes = 0;
         seq_printf(seq, "trie=%p\n", t);
  
         if (stat) {
                 if (stat->leaves)
-                       avdepth=stat->totdepth*100 / stat->leaves;
+                       avdepth = stat->totdepth*100 / stat->leaves;
                 else
-                       avdepth=0;
-               seq_printf(seq, "Aver depth: %d.%02d\n", avdepth / 100, avdepth % 100 );
+                       avdepth = 0;
+               seq_printf(seq, "Aver depth: %d.%02d\n", avdepth / 100, avdepth % 100);
                 seq_printf(seq, "Max depth: %4d\n", stat->maxdepth);
-                       
+
                 seq_printf(seq, "Leaves: %d\n", stat->leaves);
                 bytes += sizeof(struct leaf) * stat->leaves;
                 seq_printf(seq, "Internal nodes: %d\n", stat->tnodes);
@@ -2455,11 +2370,9 @@ static int fib_triestat_seq_show(struct seq_file *seq, void *v)
  
                 if (trie_main)
                         collect_and_show(trie_main, seq);
-       }
-       else {
-               snprintf(bf, sizeof(bf),
-                        "*\t%08X\t%08X", 200, 400);
-       
+       } else {
+               snprintf(bf, sizeof(bf), "*\t%08X\t%08X", 200, 400);
+
                 seq_printf(seq, "%-127s\n", bf);
         }
         return 0;
@@ -2520,22 +2433,27 @@ static struct fib_alias *fib_trie_get_next(struct seq_file *seq)
  
  static void *fib_trie_seq_start(struct seq_file *seq, loff_t *pos)
  {
-       void *v = NULL;
+       if (!ip_fib_main_table)
+               return NULL;
  
-       if (ip_fib_main_table)
-               v = *pos ? fib_trie_get_next(seq) : SEQ_START_TOKEN;
-       return v;
+       if (*pos)
+               return fib_trie_get_next(seq);
+       else
+               return SEQ_START_TOKEN;
  }
  
  static void *fib_trie_seq_next(struct seq_file *seq, void *v, loff_t *pos)
  {
         ++*pos;
-       return v == SEQ_START_TOKEN ? fib_trie_get_first(seq) : fib_trie_get_next(seq);
+       if (v == SEQ_START_TOKEN)
+               return fib_trie_get_first(seq);
+       else
+               return fib_trie_get_next(seq);
+
  }
  
  static void fib_trie_seq_stop(struct seq_file *seq, void *v)
  {
-
  }
  
  /*
@@ -2555,9 +2473,7 @@ static int fib_trie_seq_show(struct seq_file *seq, void *v)
  
                 if (trie_main)
                         trie_dump_seq(seq, trie_main);
-       }
-
-       else {
+       } else {
                 snprintf(bf, sizeof(bf),
                          "*\t%08X\t%08X", 200, 400);
                 seq_printf(seq, "%-127s\n", bf);
diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c

index 279f57abfecb5f3bbe8c10c23a4445f7180bd01e..24eb56ae1b5ac4e5d4f6235654a899ea676e4039 100644 (file)
--- a/net/ipv4/icmp.c
+++ b/net/ipv4/icmp.c
@@ -114,7 +114,7 @@ struct icmp_bxm {
  /*
   *     Statistics
   */
-DEFINE_SNMP_STAT(struct icmp_mib, icmp_statistics);
+DEFINE_SNMP_STAT(struct icmp_mib, icmp_statistics) __read_mostly;
  
  /* An array of errno for error messages from dest unreach. */
  /* RFC 1122: 3.2.2.1 States that NET_UNREACH, HOST_UNREACH and SR_FAILED MUST be considered 'transient errs'. */
@@ -349,12 +349,12 @@ static void icmp_push_reply(struct icmp_bxm *icmp_param,
  {
         struct sk_buff *skb;
  
-       ip_append_data(icmp_socket->sk, icmp_glue_bits, icmp_param,
-                      icmp_param->data_len+icmp_param->head_len,
-                      icmp_param->head_len,
-                      ipc, rt, MSG_DONTWAIT);
-
-       if ((skb = skb_peek(&icmp_socket->sk->sk_write_queue)) != NULL) {
+       if (ip_append_data(icmp_socket->sk, icmp_glue_bits, icmp_param,
+                          icmp_param->data_len+icmp_param->head_len,
+                          icmp_param->head_len,
+                          ipc, rt, MSG_DONTWAIT) < 0)
+               ip_flush_pending_frames(icmp_socket->sk);
+       else if ((skb = skb_peek(&icmp_socket->sk->sk_write_queue)) != NULL) {
                 struct icmphdr *icmph = skb->h.icmph;
                 unsigned int csum = 0;
                 struct sk_buff *skb1;
@@ -627,11 +627,10 @@ static void icmp_unreach(struct sk_buff *skb)
                         break;
                 case ICMP_FRAG_NEEDED:
                         if (ipv4_config.no_pmtu_disc) {
-                               LIMIT_NETDEBUG(
-                                       printk(KERN_INFO "ICMP: %u.%u.%u.%u: "
+                               LIMIT_NETDEBUG(KERN_INFO "ICMP: %u.%u.%u.%u: "
                                                          "fragmentation needed "
                                                          "and DF set.\n",
-                                              NIPQUAD(iph->daddr)));
+                                              NIPQUAD(iph->daddr));
                         } else {
                                 info = ip_rt_frag_needed(iph,
                                                      ntohs(icmph->un.frag.mtu));
@@ -640,10 +639,9 @@ static void icmp_unreach(struct sk_buff *skb)
                         }
                         break;
                 case ICMP_SR_FAILED:
-                       LIMIT_NETDEBUG(
-                               printk(KERN_INFO "ICMP: %u.%u.%u.%u: Source "
+                       LIMIT_NETDEBUG(KERN_INFO "ICMP: %u.%u.%u.%u: Source "
                                                  "Route Failed.\n",
-                                      NIPQUAD(iph->daddr)));
+                                      NIPQUAD(iph->daddr));
                         break;
                 default:
                         break;
@@ -936,8 +934,7 @@ int icmp_rcv(struct sk_buff *skb)
         case CHECKSUM_HW:
                 if (!(u16)csum_fold(skb->csum))
                         break;
-               NETDEBUG(if (net_ratelimit())
-                               printk(KERN_DEBUG "icmp v4 hw csum failure\n"));
+               LIMIT_NETDEBUG(KERN_DEBUG "icmp v4 hw csum failure\n");
         case CHECKSUM_NONE:
                 if ((u16)csum_fold(skb_checksum(skb, 0, skb->len, 0)))
                         goto error;
diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c

index 5088f90835ae00694a31a8cdfa9b91829a4c5396..44607f4767b871820f5d33ab9dfd230e59933745 100644 (file)
--- a/net/ipv4/igmp.c
+++ b/net/ipv4/igmp.c
@@ -904,7 +904,7 @@ int igmp_rcv(struct sk_buff *skb)
         case IGMP_MTRACE_RESP:
                 break;
         default:
-               NETDEBUG(printk(KERN_DEBUG "New IGMP type=%d, why we do not know about it?\n", ih->type));
+               NETDEBUG(KERN_DEBUG "New IGMP type=%d, why we do not know about it?\n", ih->type);
         }
         in_dev_put(in_dev);
         kfree_skb(skb);
diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c

new file mode 100644 (file)

index 0000000..fe3c6d3
--- /dev/null
+++ b/net/ipv4/inet_connection_sock.c
@@ -0,0 +1,641 @@
+/*
+ * INET                An implementation of the TCP/IP protocol suite for the LINUX
+ *             operating system.  INET is implemented using the  BSD Socket
+ *             interface as the means of communication with the user level.
+ *
+ *             Support for INET connection oriented protocols.
+ *
+ * Authors:    See the TCP sources
+ *
+ *             This program is free software; you can redistribute it and/or
+ *             modify it under the terms of the GNU General Public License
+ *             as published by the Free Software Foundation; either version
+ *             2 of the License, or(at your option) any later version.
+ */
+
+#include <linux/config.h>
+#include <linux/module.h>
+#include <linux/jhash.h>
+
+#include <net/inet_connection_sock.h>
+#include <net/inet_hashtables.h>
+#include <net/inet_timewait_sock.h>
+#include <net/ip.h>
+#include <net/route.h>
+#include <net/tcp_states.h>
+#include <net/xfrm.h>
+
+#ifdef INET_CSK_DEBUG
+const char inet_csk_timer_bug_msg[] = "inet_csk BUG: unknown timer value\n";
+EXPORT_SYMBOL(inet_csk_timer_bug_msg);
+#endif
+
+/*
+ * This array holds the first and last local port number.
+ * For high-usage systems, use sysctl to change this to
+ * 32768-61000
+ */
+int sysctl_local_port_range[2] = { 1024, 4999 };
+
+static inline int inet_csk_bind_conflict(struct sock *sk, struct inet_bind_bucket *tb)
+{
+       const u32 sk_rcv_saddr = inet_rcv_saddr(sk);
+       struct sock *sk2;
+       struct hlist_node *node;
+       int reuse = sk->sk_reuse;
+
+       sk_for_each_bound(sk2, node, &tb->owners) {
+               if (sk != sk2 &&
+                   !inet_v6_ipv6only(sk2) &&
+                   (!sk->sk_bound_dev_if ||
+                    !sk2->sk_bound_dev_if ||
+                    sk->sk_bound_dev_if == sk2->sk_bound_dev_if)) {
+                       if (!reuse || !sk2->sk_reuse ||
+                           sk2->sk_state == TCP_LISTEN) {
+                               const u32 sk2_rcv_saddr = inet_rcv_saddr(sk2);
+                               if (!sk2_rcv_saddr || !sk_rcv_saddr ||
+                                   sk2_rcv_saddr == sk_rcv_saddr)
+                                       break;
+                       }
+               }
+       }
+       return node != NULL;
+}
+
+/* Obtain a reference to a local port for the given sock,
+ * if snum is zero it means select any available local port.
+ */
+int inet_csk_get_port(struct inet_hashinfo *hashinfo,
+                     struct sock *sk, unsigned short snum)
+{
+       struct inet_bind_hashbucket *head;
+       struct hlist_node *node;
+       struct inet_bind_bucket *tb;
+       int ret;
+
+       local_bh_disable();
+       if (!snum) {
+               int low = sysctl_local_port_range[0];
+               int high = sysctl_local_port_range[1];
+               int remaining = (high - low) + 1;
+               int rover;
+
+               spin_lock(&hashinfo->portalloc_lock);
+               if (hashinfo->port_rover < low)
+                       rover = low;
+               else
+                       rover = hashinfo->port_rover;
+               do {
+                       rover++;
+                       if (rover > high)
+                               rover = low;
+                       head = &hashinfo->bhash[inet_bhashfn(rover, hashinfo->bhash_size)];
+                       spin_lock(&head->lock);
+                       inet_bind_bucket_for_each(tb, node, &head->chain)
+                               if (tb->port == rover)
+                                       goto next;
+                       break;
+               next:
+                       spin_unlock(&head->lock);
+               } while (--remaining > 0);
+               hashinfo->port_rover = rover;
+               spin_unlock(&hashinfo->portalloc_lock);
+
+               /* Exhausted local port range during search?  It is not
+                * possible for us to be holding one of the bind hash
+                * locks if this test triggers, because if 'remaining'
+                * drops to zero, we broke out of the do/while loop at
+                * the top level, not from the 'break;' statement.
+                */
+               ret = 1;
+               if (remaining <= 0)
+                       goto fail;
+
+               /* OK, here is the one we will use.  HEAD is
+                * non-NULL and we hold it's mutex.
+                */
+               snum = rover;
+       } else {
+               head = &hashinfo->bhash[inet_bhashfn(snum, hashinfo->bhash_size)];
+               spin_lock(&head->lock);
+               inet_bind_bucket_for_each(tb, node, &head->chain)
+                       if (tb->port == snum)
+                               goto tb_found;
+       }
+       tb = NULL;
+       goto tb_not_found;
+tb_found:
+       if (!hlist_empty(&tb->owners)) {
+               if (sk->sk_reuse > 1)
+                       goto success;
+               if (tb->fastreuse > 0 &&
+                   sk->sk_reuse && sk->sk_state != TCP_LISTEN) {
+                       goto success;
+               } else {
+                       ret = 1;
+                       if (inet_csk_bind_conflict(sk, tb))
+                               goto fail_unlock;
+               }
+       }
+tb_not_found:
+       ret = 1;
+       if (!tb && (tb = inet_bind_bucket_create(hashinfo->bind_bucket_cachep, head, snum)) == NULL)
+               goto fail_unlock;
+       if (hlist_empty(&tb->owners)) {
+               if (sk->sk_reuse && sk->sk_state != TCP_LISTEN)
+                       tb->fastreuse = 1;
+               else
+                       tb->fastreuse = 0;
+       } else if (tb->fastreuse &&
+                  (!sk->sk_reuse || sk->sk_state == TCP_LISTEN))
+               tb->fastreuse = 0;
+success:
+       if (!inet_csk(sk)->icsk_bind_hash)
+               inet_bind_hash(sk, tb, snum);
+       BUG_TRAP(inet_csk(sk)->icsk_bind_hash == tb);
+       ret = 0;
+
+fail_unlock:
+       spin_unlock(&head->lock);
+fail:
+       local_bh_enable();
+       return ret;
+}
+
+EXPORT_SYMBOL_GPL(inet_csk_get_port);
+
+/*
+ * Wait for an incoming connection, avoid race conditions. This must be called
+ * with the socket locked.
+ */
+static int inet_csk_wait_for_connect(struct sock *sk, long timeo)
+{
+       struct inet_connection_sock *icsk = inet_csk(sk);
+       DEFINE_WAIT(wait);
+       int err;
+
+       /*
+        * True wake-one mechanism for incoming connections: only
+        * one process gets woken up, not the 'whole herd'.
+        * Since we do not 'race & poll' for established sockets
+        * anymore, the common case will execute the loop only once.
+        *
+        * Subtle issue: "add_wait_queue_exclusive()" will be added
+        * after any current non-exclusive waiters, and we know that
+        * it will always _stay_ after any new non-exclusive waiters
+        * because all non-exclusive waiters are added at the
+        * beginning of the wait-queue. As such, it's ok to "drop"
+        * our exclusiveness temporarily when we get woken up without
+        * having to remove and re-insert us on the wait queue.
+        */
+       for (;;) {
+               prepare_to_wait_exclusive(sk->sk_sleep, &wait,
+                                         TASK_INTERRUPTIBLE);
+               release_sock(sk);
+               if (reqsk_queue_empty(&icsk->icsk_accept_queue))
+                       timeo = schedule_timeout(timeo);
+               lock_sock(sk);
+               err = 0;
+               if (!reqsk_queue_empty(&icsk->icsk_accept_queue))
+                       break;
+               err = -EINVAL;
+               if (sk->sk_state != TCP_LISTEN)
+                       break;
+               err = sock_intr_errno(timeo);
+               if (signal_pending(current))
+                       break;
+               err = -EAGAIN;
+               if (!timeo)
+                       break;
+       }
+       finish_wait(sk->sk_sleep, &wait);
+       return err;
+}
+
+/*
+ * This will accept the next outstanding connection.
+ */
+struct sock *inet_csk_accept(struct sock *sk, int flags, int *err)
+{
+       struct inet_connection_sock *icsk = inet_csk(sk);
+       struct sock *newsk;
+       int error;
+
+       lock_sock(sk);
+
+       /* We need to make sure that this socket is listening,
+        * and that it has something pending.
+        */
+       error = -EINVAL;
+       if (sk->sk_state != TCP_LISTEN)
+               goto out_err;
+
+       /* Find already established connection */
+       if (reqsk_queue_empty(&icsk->icsk_accept_queue)) {
+               long timeo = sock_rcvtimeo(sk, flags & O_NONBLOCK);
+
+               /* If this is a non blocking socket don't sleep */
+               error = -EAGAIN;
+               if (!timeo)
+                       goto out_err;
+
+               error = inet_csk_wait_for_connect(sk, timeo);
+               if (error)
+                       goto out_err;
+       }
+
+       newsk = reqsk_queue_get_child(&icsk->icsk_accept_queue, sk);
+       BUG_TRAP(newsk->sk_state != TCP_SYN_RECV);
+out:
+       release_sock(sk);
+       return newsk;
+out_err:
+       newsk = NULL;
+       *err = error;
+       goto out;
+}
+
+EXPORT_SYMBOL(inet_csk_accept);
+
+/*
+ * Using different timers for retransmit, delayed acks and probes
+ * We may wish use just one timer maintaining a list of expire jiffies 
+ * to optimize.
+ */
+void inet_csk_init_xmit_timers(struct sock *sk,
+                              void (*retransmit_handler)(unsigned long),
+                              void (*delack_handler)(unsigned long),
+                              void (*keepalive_handler)(unsigned long))
+{
+       struct inet_connection_sock *icsk = inet_csk(sk);
+
+       init_timer(&icsk->icsk_retransmit_timer);
+       init_timer(&icsk->icsk_delack_timer);
+       init_timer(&sk->sk_timer);
+
+       icsk->icsk_retransmit_timer.function = retransmit_handler;
+       icsk->icsk_delack_timer.function     = delack_handler;
+       sk->sk_timer.function                = keepalive_handler;
+
+       icsk->icsk_retransmit_timer.data = 
+               icsk->icsk_delack_timer.data =
+                       sk->sk_timer.data  = (unsigned long)sk;
+
+       icsk->icsk_pending = icsk->icsk_ack.pending = 0;
+}
+
+EXPORT_SYMBOL(inet_csk_init_xmit_timers);
+
+void inet_csk_clear_xmit_timers(struct sock *sk)
+{
+       struct inet_connection_sock *icsk = inet_csk(sk);
+
+       icsk->icsk_pending = icsk->icsk_ack.pending = icsk->icsk_ack.blocked = 0;
+
+       sk_stop_timer(sk, &icsk->icsk_retransmit_timer);
+       sk_stop_timer(sk, &icsk->icsk_delack_timer);
+       sk_stop_timer(sk, &sk->sk_timer);
+}
+
+EXPORT_SYMBOL(inet_csk_clear_xmit_timers);
+
+void inet_csk_delete_keepalive_timer(struct sock *sk)
+{
+       sk_stop_timer(sk, &sk->sk_timer);
+}
+
+EXPORT_SYMBOL(inet_csk_delete_keepalive_timer);
+
+void inet_csk_reset_keepalive_timer(struct sock *sk, unsigned long len)
+{
+       sk_reset_timer(sk, &sk->sk_timer, jiffies + len);
+}
+
+EXPORT_SYMBOL(inet_csk_reset_keepalive_timer);
+
+struct dst_entry* inet_csk_route_req(struct sock *sk,
+                                    const struct request_sock *req)
+{
+       struct rtable *rt;
+       const struct inet_request_sock *ireq = inet_rsk(req);
+       struct ip_options *opt = inet_rsk(req)->opt;
+       struct flowi fl = { .oif = sk->sk_bound_dev_if,
+                           .nl_u = { .ip4_u =
+                                     { .daddr = ((opt && opt->srr) ?
+                                                 opt->faddr :
+                                                 ireq->rmt_addr),
+                                       .saddr = ireq->loc_addr,
+                                       .tos = RT_CONN_FLAGS(sk) } },
+                           .proto = sk->sk_protocol,
+                           .uli_u = { .ports =
+                                      { .sport = inet_sk(sk)->sport,
+                                        .dport = ireq->rmt_port } } };
+
+       if (ip_route_output_flow(&rt, &fl, sk, 0)) {
+               IP_INC_STATS_BH(IPSTATS_MIB_OUTNOROUTES);
+               return NULL;
+       }
+       if (opt && opt->is_strictroute && rt->rt_dst != rt->rt_gateway) {
+               ip_rt_put(rt);
+               IP_INC_STATS_BH(IPSTATS_MIB_OUTNOROUTES);
+               return NULL;
+       }
+       return &rt->u.dst;
+}
+
+EXPORT_SYMBOL_GPL(inet_csk_route_req);
+
+static inline u32 inet_synq_hash(const u32 raddr, const u16 rport,
+                                const u32 rnd, const u16 synq_hsize)
+{
+       return jhash_2words(raddr, (u32)rport, rnd) & (synq_hsize - 1);
+}
+
+#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+#define AF_INET_FAMILY(fam) ((fam) == AF_INET)
+#else
+#define AF_INET_FAMILY(fam) 1
+#endif
+
+struct request_sock *inet_csk_search_req(const struct sock *sk,
+                                        struct request_sock ***prevp,
+                                        const __u16 rport, const __u32 raddr,
+                                        const __u32 laddr)
+{
+       const struct inet_connection_sock *icsk = inet_csk(sk);
+       struct listen_sock *lopt = icsk->icsk_accept_queue.listen_opt;
+       struct request_sock *req, **prev;
+
+       for (prev = &lopt->syn_table[inet_synq_hash(raddr, rport, lopt->hash_rnd,
+                                                   lopt->nr_table_entries)];
+            (req = *prev) != NULL;
+            prev = &req->dl_next) {
+               const struct inet_request_sock *ireq = inet_rsk(req);
+
+               if (ireq->rmt_port == rport &&
+                   ireq->rmt_addr == raddr &&
+                   ireq->loc_addr == laddr &&
+                   AF_INET_FAMILY(req->rsk_ops->family)) {
+                       BUG_TRAP(!req->sk);
+                       *prevp = prev;
+                       break;
+               }
+       }
+
+       return req;
+}
+
+EXPORT_SYMBOL_GPL(inet_csk_search_req);
+
+void inet_csk_reqsk_queue_hash_add(struct sock *sk, struct request_sock *req,
+                                  const unsigned timeout)
+{
+       struct inet_connection_sock *icsk = inet_csk(sk);
+       struct listen_sock *lopt = icsk->icsk_accept_queue.listen_opt;
+       const u32 h = inet_synq_hash(inet_rsk(req)->rmt_addr, inet_rsk(req)->rmt_port,
+                                    lopt->hash_rnd, lopt->nr_table_entries);
+
+       reqsk_queue_hash_req(&icsk->icsk_accept_queue, h, req, timeout);
+       inet_csk_reqsk_queue_added(sk, timeout);
+}
+
+/* Only thing we need from tcp.h */
+extern int sysctl_tcp_synack_retries;
+
+EXPORT_SYMBOL_GPL(inet_csk_reqsk_queue_hash_add);
+
+void inet_csk_reqsk_queue_prune(struct sock *parent,
+                               const unsigned long interval,
+                               const unsigned long timeout,
+                               const unsigned long max_rto)
+{
+       struct inet_connection_sock *icsk = inet_csk(parent);
+       struct request_sock_queue *queue = &icsk->icsk_accept_queue;
+       struct listen_sock *lopt = queue->listen_opt;
+       int max_retries = icsk->icsk_syn_retries ? : sysctl_tcp_synack_retries;
+       int thresh = max_retries;
+       unsigned long now = jiffies;
+       struct request_sock **reqp, *req;
+       int i, budget;
+
+       if (lopt == NULL || lopt->qlen == 0)
+               return;
+
+       /* Normally all the openreqs are young and become mature
+        * (i.e. converted to established socket) for first timeout.
+        * If synack was not acknowledged for 3 seconds, it means
+        * one of the following things: synack was lost, ack was lost,
+        * rtt is high or nobody planned to ack (i.e. synflood).
+        * When server is a bit loaded, queue is populated with old
+        * open requests, reducing effective size of queue.
+        * When server is well loaded, queue size reduces to zero
+        * after several minutes of work. It is not synflood,
+        * it is normal operation. The solution is pruning
+        * too old entries overriding normal timeout, when
+        * situation becomes dangerous.
+        *
+        * Essentially, we reserve half of room for young
+        * embrions; and abort old ones without pity, if old
+        * ones are about to clog our table.
+        */
+       if (lopt->qlen>>(lopt->max_qlen_log-1)) {
+               int young = (lopt->qlen_young<<1);
+
+               while (thresh > 2) {
+                       if (lopt->qlen < young)
+                               break;
+                       thresh--;
+                       young <<= 1;
+               }
+       }
+
+       if (queue->rskq_defer_accept)
+               max_retries = queue->rskq_defer_accept;
+
+       budget = 2 * (lopt->nr_table_entries / (timeout / interval));
+       i = lopt->clock_hand;
+
+       do {
+               reqp=&lopt->syn_table[i];
+               while ((req = *reqp) != NULL) {
+                       if (time_after_eq(now, req->expires)) {
+                               if ((req->retrans < thresh ||
+                                    (inet_rsk(req)->acked && req->retrans < max_retries))
+                                   && !req->rsk_ops->rtx_syn_ack(parent, req, NULL)) {
+                                       unsigned long timeo;
+
+                                       if (req->retrans++ == 0)
+                                               lopt->qlen_young--;
+                                       timeo = min((timeout << req->retrans), max_rto);
+                                       req->expires = now + timeo;
+                                       reqp = &req->dl_next;
+                                       continue;
+                               }
+
+                               /* Drop this request */
+                               inet_csk_reqsk_queue_unlink(parent, req, reqp);
+                               reqsk_queue_removed(queue, req);
+                               reqsk_free(req);
+                               continue;
+                       }
+                       reqp = &req->dl_next;
+               }
+
+               i = (i + 1) & (lopt->nr_table_entries - 1);
+
+       } while (--budget > 0);
+
+       lopt->clock_hand = i;
+
+       if (lopt->qlen)
+               inet_csk_reset_keepalive_timer(parent, interval);
+}
+
+EXPORT_SYMBOL_GPL(inet_csk_reqsk_queue_prune);
+
+struct sock *inet_csk_clone(struct sock *sk, const struct request_sock *req,
+                           const unsigned int __nocast priority)
+{
+       struct sock *newsk = sk_clone(sk, priority);
+
+       if (newsk != NULL) {
+               struct inet_connection_sock *newicsk = inet_csk(newsk);
+
+               newsk->sk_state = TCP_SYN_RECV;
+               newicsk->icsk_bind_hash = NULL;
+
+               inet_sk(newsk)->dport = inet_rsk(req)->rmt_port;
+               newsk->sk_write_space = sk_stream_write_space;
+
+               newicsk->icsk_retransmits = 0;
+               newicsk->icsk_backoff     = 0;
+               newicsk->icsk_probes_out  = 0;
+
+               /* Deinitialize accept_queue to trap illegal accesses. */
+               memset(&newicsk->icsk_accept_queue, 0, sizeof(newicsk->icsk_accept_queue));
+       }
+       return newsk;
+}
+
+EXPORT_SYMBOL_GPL(inet_csk_clone);
+
+/*
+ * At this point, there should be no process reference to this
+ * socket, and thus no user references at all.  Therefore we
+ * can assume the socket waitqueue is inactive and nobody will
+ * try to jump onto it.
+ */
+void inet_csk_destroy_sock(struct sock *sk)
+{
+       BUG_TRAP(sk->sk_state == TCP_CLOSE);
+       BUG_TRAP(sock_flag(sk, SOCK_DEAD));
+
+       /* It cannot be in hash table! */
+       BUG_TRAP(sk_unhashed(sk));
+
+       /* If it has not 0 inet_sk(sk)->num, it must be bound */
+       BUG_TRAP(!inet_sk(sk)->num || inet_csk(sk)->icsk_bind_hash);
+
+       sk->sk_prot->destroy(sk);
+
+       sk_stream_kill_queues(sk);
+
+       xfrm_sk_free_policy(sk);
+
+       sk_refcnt_debug_release(sk);
+
+       atomic_dec(sk->sk_prot->orphan_count);
+       sock_put(sk);
+}
+
+EXPORT_SYMBOL(inet_csk_destroy_sock);
+
+int inet_csk_listen_start(struct sock *sk, const int nr_table_entries)
+{
+       struct inet_sock *inet = inet_sk(sk);
+       struct inet_connection_sock *icsk = inet_csk(sk);
+       int rc = reqsk_queue_alloc(&icsk->icsk_accept_queue, nr_table_entries);
+
+       if (rc != 0)
+               return rc;
+
+       sk->sk_max_ack_backlog = 0;
+       sk->sk_ack_backlog = 0;
+       inet_csk_delack_init(sk);
+
+       /* There is race window here: we announce ourselves listening,
+        * but this transition is still not validated by get_port().
+        * It is OK, because this socket enters to hash table only
+        * after validation is complete.
+        */
+       sk->sk_state = TCP_LISTEN;
+       if (!sk->sk_prot->get_port(sk, inet->num)) {
+               inet->sport = htons(inet->num);
+
+               sk_dst_reset(sk);
+               sk->sk_prot->hash(sk);
+
+               return 0;
+       }
+
+       sk->sk_state = TCP_CLOSE;
+       __reqsk_queue_destroy(&icsk->icsk_accept_queue);
+       return -EADDRINUSE;
+}
+
+EXPORT_SYMBOL_GPL(inet_csk_listen_start);
+
+/*
+ *     This routine closes sockets which have been at least partially
+ *     opened, but not yet accepted.
+ */
+void inet_csk_listen_stop(struct sock *sk)
+{
+       struct inet_connection_sock *icsk = inet_csk(sk);
+       struct request_sock *acc_req;
+       struct request_sock *req;
+
+       inet_csk_delete_keepalive_timer(sk);
+
+       /* make all the listen_opt local to us */
+       acc_req = reqsk_queue_yank_acceptq(&icsk->icsk_accept_queue);
+
+       /* Following specs, it would be better either to send FIN
+        * (and enter FIN-WAIT-1, it is normal close)
+        * or to send active reset (abort).
+        * Certainly, it is pretty dangerous while synflood, but it is
+        * bad justification for our negligence 8)
+        * To be honest, we are not able to make either
+        * of the variants now.                 --ANK
+        */
+       reqsk_queue_destroy(&icsk->icsk_accept_queue);
+
+       while ((req = acc_req) != NULL) {
+               struct sock *child = req->sk;
+
+               acc_req = req->dl_next;
+
+               local_bh_disable();
+               bh_lock_sock(child);
+               BUG_TRAP(!sock_owned_by_user(child));
+               sock_hold(child);
+
+               sk->sk_prot->disconnect(child, O_NONBLOCK);
+
+               sock_orphan(child);
+
+               atomic_inc(sk->sk_prot->orphan_count);
+
+               inet_csk_destroy_sock(child);
+
+               bh_unlock_sock(child);
+               local_bh_enable();
+               sock_put(child);
+
+               sk_acceptq_removed(sk);
+               __reqsk_free(req);
+       }
+       BUG_TRAP(!sk->sk_ack_backlog);
+}
+
+EXPORT_SYMBOL_GPL(inet_csk_listen_stop);
diff --git a/net/ipv4/inet_diag.c b/net/ipv4/inet_diag.c

new file mode 100644 (file)

index 0000000..71f3c73
--- /dev/null
+++ b/net/ipv4/inet_diag.c
@@ -0,0 +1,868 @@
+/*
+ * inet_diag.c Module for monitoring INET transport protocols sockets.
+ *
+ * Version:    $Id: inet_diag.c,v 1.3 2002/02/01 22:01:04 davem Exp $
+ *
+ * Authors:    Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
+ *
+ *     This program is free software; you can redistribute it and/or
+ *      modify it under the terms of the GNU General Public License
+ *      as published by the Free Software Foundation; either version
+ *      2 of the License, or (at your option) any later version.
+ */
+
+#include <linux/config.h>
+#include <linux/module.h>
+#include <linux/types.h>
+#include <linux/fcntl.h>
+#include <linux/random.h>
+#include <linux/cache.h>
+#include <linux/init.h>
+#include <linux/time.h>
+
+#include <net/icmp.h>
+#include <net/tcp.h>
+#include <net/ipv6.h>
+#include <net/inet_common.h>
+#include <net/inet_connection_sock.h>
+#include <net/inet_hashtables.h>
+#include <net/inet_timewait_sock.h>
+#include <net/inet6_hashtables.h>
+
+#include <linux/inet.h>
+#include <linux/stddef.h>
+
+#include <linux/inet_diag.h>
+
+static const struct inet_diag_handler **inet_diag_table;
+
+struct inet_diag_entry {
+       u32 *saddr;
+       u32 *daddr;
+       u16 sport;
+       u16 dport;
+       u16 family;
+       u16 userlocks;
+};
+
+static struct sock *idiagnl;
+
+#define INET_DIAG_PUT(skb, attrtype, attrlen) \
+       RTA_DATA(__RTA_PUT(skb, attrtype, attrlen))
+
+static int inet_diag_fill(struct sk_buff *skb, struct sock *sk,
+                       int ext, u32 pid, u32 seq, u16 nlmsg_flags,
+                       const struct nlmsghdr *unlh)
+{
+       const struct inet_sock *inet = inet_sk(sk);
+       const struct inet_connection_sock *icsk = inet_csk(sk);
+       struct inet_diag_msg *r;
+       struct nlmsghdr  *nlh;
+       void *info = NULL;
+       struct inet_diag_meminfo  *minfo = NULL;
+       unsigned char    *b = skb->tail;
+       const struct inet_diag_handler *handler;
+
+       handler = inet_diag_table[unlh->nlmsg_type];
+       BUG_ON(handler == NULL);
+
+       nlh = NLMSG_PUT(skb, pid, seq, unlh->nlmsg_type, sizeof(*r));
+       nlh->nlmsg_flags = nlmsg_flags;
+
+       r = NLMSG_DATA(nlh);
+       if (sk->sk_state != TCP_TIME_WAIT) {
+               if (ext & (1 << (INET_DIAG_MEMINFO - 1)))
+                       minfo = INET_DIAG_PUT(skb, INET_DIAG_MEMINFO,
+                                             sizeof(*minfo));
+               if (ext & (1 << (INET_DIAG_INFO - 1)))
+                       info = INET_DIAG_PUT(skb, INET_DIAG_INFO,
+                                          handler->idiag_info_size);
+               
+               if ((ext & (1 << (INET_DIAG_CONG - 1))) && icsk->icsk_ca_ops) {
+                       size_t len = strlen(icsk->icsk_ca_ops->name);
+                       strcpy(INET_DIAG_PUT(skb, INET_DIAG_CONG, len + 1),
+                              icsk->icsk_ca_ops->name);
+               }
+       }
+       r->idiag_family = sk->sk_family;
+       r->idiag_state = sk->sk_state;
+       r->idiag_timer = 0;
+       r->idiag_retrans = 0;
+
+       r->id.idiag_if = sk->sk_bound_dev_if;
+       r->id.idiag_cookie[0] = (u32)(unsigned long)sk;
+       r->id.idiag_cookie[1] = (u32)(((unsigned long)sk >> 31) >> 1);
+
+       if (r->idiag_state == TCP_TIME_WAIT) {
+               const struct inet_timewait_sock *tw = inet_twsk(sk);
+               long tmo = tw->tw_ttd - jiffies;
+               if (tmo < 0)
+                       tmo = 0;
+
+               r->id.idiag_sport = tw->tw_sport;
+               r->id.idiag_dport = tw->tw_dport;
+               r->id.idiag_src[0] = tw->tw_rcv_saddr;
+               r->id.idiag_dst[0] = tw->tw_daddr;
+               r->idiag_state = tw->tw_substate;
+               r->idiag_timer = 3;
+               r->idiag_expires = (tmo * 1000 + HZ - 1) / HZ;
+               r->idiag_rqueue = 0;
+               r->idiag_wqueue = 0;
+               r->idiag_uid = 0;
+               r->idiag_inode = 0;
+#if defined(CONFIG_IPV6) || defined (CONFIG_IPV6_MODULE)
+               if (r->idiag_family == AF_INET6) {
+                       const struct tcp6_timewait_sock *tcp6tw = tcp6_twsk(sk);
+
+                       ipv6_addr_copy((struct in6_addr *)r->id.idiag_src,
+                                      &tcp6tw->tw_v6_rcv_saddr);
+                       ipv6_addr_copy((struct in6_addr *)r->id.idiag_dst,
+                                      &tcp6tw->tw_v6_daddr);
+               }
+#endif
+               nlh->nlmsg_len = skb->tail - b;
+               return skb->len;
+       }
+
+       r->id.idiag_sport = inet->sport;
+       r->id.idiag_dport = inet->dport;
+       r->id.idiag_src[0] = inet->rcv_saddr;
+       r->id.idiag_dst[0] = inet->daddr;
+
+#if defined(CONFIG_IPV6) || defined (CONFIG_IPV6_MODULE)
+       if (r->idiag_family == AF_INET6) {
+               struct ipv6_pinfo *np = inet6_sk(sk);
+
+               ipv6_addr_copy((struct in6_addr *)r->id.idiag_src,
+                              &np->rcv_saddr);
+               ipv6_addr_copy((struct in6_addr *)r->id.idiag_dst,
+                              &np->daddr);
+       }
+#endif
+
+#define EXPIRES_IN_MS(tmo)  ((tmo - jiffies) * 1000 + HZ - 1) / HZ
+
+       if (icsk->icsk_pending == ICSK_TIME_RETRANS) {
+               r->idiag_timer = 1;
+               r->idiag_retrans = icsk->icsk_retransmits;
+               r->idiag_expires = EXPIRES_IN_MS(icsk->icsk_timeout);
+       } else if (icsk->icsk_pending == ICSK_TIME_PROBE0) {
+               r->idiag_timer = 4;
+               r->idiag_retrans = icsk->icsk_probes_out;
+               r->idiag_expires = EXPIRES_IN_MS(icsk->icsk_timeout);
+       } else if (timer_pending(&sk->sk_timer)) {
+               r->idiag_timer = 2;
+               r->idiag_retrans = icsk->icsk_probes_out;
+               r->idiag_expires = EXPIRES_IN_MS(sk->sk_timer.expires);
+       } else {
+               r->idiag_timer = 0;
+               r->idiag_expires = 0;
+       }
+#undef EXPIRES_IN_MS
+
+       r->idiag_uid = sock_i_uid(sk);
+       r->idiag_inode = sock_i_ino(sk);
+
+       if (minfo) {
+               minfo->idiag_rmem = atomic_read(&sk->sk_rmem_alloc);
+               minfo->idiag_wmem = sk->sk_wmem_queued;
+               minfo->idiag_fmem = sk->sk_forward_alloc;
+               minfo->idiag_tmem = atomic_read(&sk->sk_wmem_alloc);
+       }
+
+       handler->idiag_get_info(sk, r, info);
+
+       if (sk->sk_state < TCP_TIME_WAIT &&
+           icsk->icsk_ca_ops && icsk->icsk_ca_ops->get_info)
+               icsk->icsk_ca_ops->get_info(sk, ext, skb);
+
+       nlh->nlmsg_len = skb->tail - b;
+       return skb->len;
+
+rtattr_failure:
+nlmsg_failure:
+       skb_trim(skb, b - skb->data);
+       return -1;
+}
+
+static int inet_diag_get_exact(struct sk_buff *in_skb, const struct nlmsghdr *nlh)
+{
+       int err;
+       struct sock *sk;
+       struct inet_diag_req *req = NLMSG_DATA(nlh);
+       struct sk_buff *rep;
+       struct inet_hashinfo *hashinfo;
+       const struct inet_diag_handler *handler;
+
+       handler = inet_diag_table[nlh->nlmsg_type];
+       BUG_ON(handler == NULL);
+       hashinfo = handler->idiag_hashinfo;
+
+       if (req->idiag_family == AF_INET) {
+               sk = inet_lookup(hashinfo, req->id.idiag_dst[0],
+                                req->id.idiag_dport, req->id.idiag_src[0],
+                                req->id.idiag_sport, req->id.idiag_if);
+       }
+#if defined(CONFIG_IPV6) || defined (CONFIG_IPV6_MODULE)
+       else if (req->idiag_family == AF_INET6) {
+               sk = inet6_lookup(hashinfo,
+                                 (struct in6_addr *)req->id.idiag_dst,
+                                 req->id.idiag_dport,
+                                 (struct in6_addr *)req->id.idiag_src,
+                                 req->id.idiag_sport,
+                                 req->id.idiag_if);
+       }
+#endif
+       else {
+               return -EINVAL;
+       }
+
+       if (sk == NULL)
+               return -ENOENT;
+
+       err = -ESTALE;
+       if ((req->id.idiag_cookie[0] != INET_DIAG_NOCOOKIE ||
+            req->id.idiag_cookie[1] != INET_DIAG_NOCOOKIE) &&
+           ((u32)(unsigned long)sk != req->id.idiag_cookie[0] ||
+            (u32)((((unsigned long)sk) >> 31) >> 1) != req->id.idiag_cookie[1]))
+               goto out;
+
+       err = -ENOMEM;
+       rep = alloc_skb(NLMSG_SPACE((sizeof(struct inet_diag_msg) +
+                                    sizeof(struct inet_diag_meminfo) +
+                                    handler->idiag_info_size + 64)),
+                       GFP_KERNEL);
+       if (!rep)
+               goto out;
+
+       if (inet_diag_fill(rep, sk, req->idiag_ext,
+                        NETLINK_CB(in_skb).pid,
+                        nlh->nlmsg_seq, 0, nlh) <= 0)
+               BUG();
+
+       err = netlink_unicast(idiagnl, rep, NETLINK_CB(in_skb).pid,
+                             MSG_DONTWAIT);
+       if (err > 0)
+               err = 0;
+
+out:
+       if (sk) {
+               if (sk->sk_state == TCP_TIME_WAIT)
+                       inet_twsk_put((struct inet_timewait_sock *)sk);
+               else
+                       sock_put(sk);
+       }
+       return err;
+}
+
+static int bitstring_match(const u32 *a1, const u32 *a2, int bits)
+{
+       int words = bits >> 5;
+
+       bits &= 0x1f;
+
+       if (words) {
+               if (memcmp(a1, a2, words << 2))
+                       return 0;
+       }
+       if (bits) {
+               __u32 w1, w2;
+               __u32 mask;
+
+               w1 = a1[words];
+               w2 = a2[words];
+
+               mask = htonl((0xffffffff) << (32 - bits));
+
+               if ((w1 ^ w2) & mask)
+                       return 0;
+       }
+
+       return 1;
+}
+
+
+static int inet_diag_bc_run(const void *bc, int len,
+                         const struct inet_diag_entry *entry)
+{
+       while (len > 0) {
+               int yes = 1;
+               const struct inet_diag_bc_op *op = bc;
+
+               switch (op->code) {
+               case INET_DIAG_BC_NOP:
+                       break;
+               case INET_DIAG_BC_JMP:
+                       yes = 0;
+                       break;
+               case INET_DIAG_BC_S_GE:
+                       yes = entry->sport >= op[1].no;
+                       break;
+               case INET_DIAG_BC_S_LE:
+                       yes = entry->dport <= op[1].no;
+                       break;
+               case INET_DIAG_BC_D_GE:
+                       yes = entry->dport >= op[1].no;
+                       break;
+               case INET_DIAG_BC_D_LE:
+                       yes = entry->dport <= op[1].no;
+                       break;
+               case INET_DIAG_BC_AUTO:
+                       yes = !(entry->userlocks & SOCK_BINDPORT_LOCK);
+                       break;
+               case INET_DIAG_BC_S_COND:
+               case INET_DIAG_BC_D_COND: {
+                       struct inet_diag_hostcond *cond;
+                       u32 *addr;
+
+                       cond = (struct inet_diag_hostcond *)(op + 1);
+                       if (cond->port != -1 &&
+                           cond->port != (op->code == INET_DIAG_BC_S_COND ?
+                                            entry->sport : entry->dport)) {
+                               yes = 0;
+                               break;
+                       }
+                       
+                       if (cond->prefix_len == 0)
+                               break;
+
+                       if (op->code == INET_DIAG_BC_S_COND)
+                               addr = entry->saddr;
+                       else
+                               addr = entry->daddr;
+
+                       if (bitstring_match(addr, cond->addr, cond->prefix_len))
+                               break;
+                       if (entry->family == AF_INET6 &&
+                           cond->family == AF_INET) {
+                               if (addr[0] == 0 && addr[1] == 0 &&
+                                   addr[2] == htonl(0xffff) &&
+                                   bitstring_match(addr + 3, cond->addr,
+                                                   cond->prefix_len))
+                                       break;
+                       }
+                       yes = 0;
+                       break;
+               }
+               }
+
+               if (yes) { 
+                       len -= op->yes;
+                       bc += op->yes;
+               } else {
+                       len -= op->no;
+                       bc += op->no;
+               }
+       }
+       return (len == 0);
+}
+
+static int valid_cc(const void *bc, int len, int cc)
+{
+       while (len >= 0) {
+               const struct inet_diag_bc_op *op = bc;
+
+               if (cc > len)
+                       return 0;
+               if (cc == len)
+                       return 1;
+               if (op->yes < 4)
+                       return 0;
+               len -= op->yes;
+               bc  += op->yes;
+       }
+       return 0;
+}
+
+static int inet_diag_bc_audit(const void *bytecode, int bytecode_len)
+{
+       const unsigned char *bc = bytecode;
+       int  len = bytecode_len;
+
+       while (len > 0) {
+               struct inet_diag_bc_op *op = (struct inet_diag_bc_op *)bc;
+
+//printk("BC: %d %d %d {%d} / %d\n", op->code, op->yes, op->no, op[1].no, len);
+               switch (op->code) {
+               case INET_DIAG_BC_AUTO:
+               case INET_DIAG_BC_S_COND:
+               case INET_DIAG_BC_D_COND:
+               case INET_DIAG_BC_S_GE:
+               case INET_DIAG_BC_S_LE:
+               case INET_DIAG_BC_D_GE:
+               case INET_DIAG_BC_D_LE:
+                       if (op->yes < 4 || op->yes > len + 4)
+                               return -EINVAL;
+               case INET_DIAG_BC_JMP:
+                       if (op->no < 4 || op->no > len + 4)
+                               return -EINVAL;
+                       if (op->no < len &&
+                           !valid_cc(bytecode, bytecode_len, len - op->no))
+                               return -EINVAL;
+                       break;
+               case INET_DIAG_BC_NOP:
+                       if (op->yes < 4 || op->yes > len + 4)
+                               return -EINVAL;
+                       break;
+               default:
+                       return -EINVAL;
+               }
+               bc += op->yes;
+               len -= op->yes;
+       }
+       return len == 0 ? 0 : -EINVAL;
+}
+
+static int inet_diag_dump_sock(struct sk_buff *skb, struct sock *sk,
+                            struct netlink_callback *cb)
+{
+       struct inet_diag_req *r = NLMSG_DATA(cb->nlh);
+
+       if (cb->nlh->nlmsg_len > 4 + NLMSG_SPACE(sizeof(*r))) {
+               struct inet_diag_entry entry;
+               struct rtattr *bc = (struct rtattr *)(r + 1);
+               struct inet_sock *inet = inet_sk(sk);
+
+               entry.family = sk->sk_family;
+#if defined(CONFIG_IPV6) || defined (CONFIG_IPV6_MODULE)
+               if (entry.family == AF_INET6) {
+                       struct ipv6_pinfo *np = inet6_sk(sk);
+
+                       entry.saddr = np->rcv_saddr.s6_addr32;
+                       entry.daddr = np->daddr.s6_addr32;
+               } else
+#endif
+               {
+                       entry.saddr = &inet->rcv_saddr;
+                       entry.daddr = &inet->daddr;
+               }
+               entry.sport = inet->num;
+               entry.dport = ntohs(inet->dport);
+               entry.userlocks = sk->sk_userlocks;
+
+               if (!inet_diag_bc_run(RTA_DATA(bc), RTA_PAYLOAD(bc), &entry))
+                       return 0;
+       }
+
+       return inet_diag_fill(skb, sk, r->idiag_ext, NETLINK_CB(cb->skb).pid,
+                           cb->nlh->nlmsg_seq, NLM_F_MULTI, cb->nlh);
+}
+
+static int inet_diag_fill_req(struct sk_buff *skb, struct sock *sk,
+                           struct request_sock *req,
+                           u32 pid, u32 seq,
+                           const struct nlmsghdr *unlh)
+{
+       const struct inet_request_sock *ireq = inet_rsk(req);
+       struct inet_sock *inet = inet_sk(sk);
+       unsigned char *b = skb->tail;
+       struct inet_diag_msg *r;
+       struct nlmsghdr *nlh;
+       long tmo;
+
+       nlh = NLMSG_PUT(skb, pid, seq, unlh->nlmsg_type, sizeof(*r));
+       nlh->nlmsg_flags = NLM_F_MULTI;
+       r = NLMSG_DATA(nlh);
+
+       r->idiag_family = sk->sk_family;
+       r->idiag_state = TCP_SYN_RECV;
+       r->idiag_timer = 1;
+       r->idiag_retrans = req->retrans;
+
+       r->id.idiag_if = sk->sk_bound_dev_if;
+       r->id.idiag_cookie[0] = (u32)(unsigned long)req;
+       r->id.idiag_cookie[1] = (u32)(((unsigned long)req >> 31) >> 1);
+
+       tmo = req->expires - jiffies;
+       if (tmo < 0)
+               tmo = 0;
+
+       r->id.idiag_sport = inet->sport;
+       r->id.idiag_dport = ireq->rmt_port;
+       r->id.idiag_src[0] = ireq->loc_addr;
+       r->id.idiag_dst[0] = ireq->rmt_addr;
+       r->idiag_expires = jiffies_to_msecs(tmo);
+       r->idiag_rqueue = 0;
+       r->idiag_wqueue = 0;
+       r->idiag_uid = sock_i_uid(sk);
+       r->idiag_inode = 0;
+#if defined(CONFIG_IPV6) || defined (CONFIG_IPV6_MODULE)
+       if (r->idiag_family == AF_INET6) {
+               ipv6_addr_copy((struct in6_addr *)r->id.idiag_src,
+                              &tcp6_rsk(req)->loc_addr);
+               ipv6_addr_copy((struct in6_addr *)r->id.idiag_dst,
+                              &tcp6_rsk(req)->rmt_addr);
+       }
+#endif
+       nlh->nlmsg_len = skb->tail - b;
+
+       return skb->len;
+
+nlmsg_failure:
+       skb_trim(skb, b - skb->data);
+       return -1;
+}
+
+static int inet_diag_dump_reqs(struct sk_buff *skb, struct sock *sk,
+                            struct netlink_callback *cb)
+{
+       struct inet_diag_entry entry;
+       struct inet_diag_req *r = NLMSG_DATA(cb->nlh);
+       struct inet_connection_sock *icsk = inet_csk(sk);
+       struct listen_sock *lopt;
+       struct rtattr *bc = NULL;
+       struct inet_sock *inet = inet_sk(sk);
+       int j, s_j;
+       int reqnum, s_reqnum;
+       int err = 0;
+
+       s_j = cb->args[3];
+       s_reqnum = cb->args[4];
+
+       if (s_j > 0)
+               s_j--;
+
+       entry.family = sk->sk_family;
+
+       read_lock_bh(&icsk->icsk_accept_queue.syn_wait_lock);
+
+       lopt = icsk->icsk_accept_queue.listen_opt;
+       if (!lopt || !lopt->qlen)
+               goto out;
+
+       if (cb->nlh->nlmsg_len > 4 + NLMSG_SPACE(sizeof(*r))) {
+               bc = (struct rtattr *)(r + 1);
+               entry.sport = inet->num;
+               entry.userlocks = sk->sk_userlocks;
+       }
+
+       for (j = s_j; j < lopt->nr_table_entries; j++) {
+               struct request_sock *req, *head = lopt->syn_table[j];
+
+               reqnum = 0;
+               for (req = head; req; reqnum++, req = req->dl_next) {
+                       struct inet_request_sock *ireq = inet_rsk(req);
+
+                       if (reqnum < s_reqnum)
+                               continue;
+                       if (r->id.idiag_dport != ireq->rmt_port &&
+                           r->id.idiag_dport)
+                               continue;
+
+                       if (bc) {
+                               entry.saddr =
+#if defined(CONFIG_IPV6) || defined (CONFIG_IPV6_MODULE)
+                                       (entry.family == AF_INET6) ?
+                                       tcp6_rsk(req)->loc_addr.s6_addr32 :
+#endif
+                                       &ireq->loc_addr;
+                               entry.daddr = 
+#if defined(CONFIG_IPV6) || defined (CONFIG_IPV6_MODULE)
+                                       (entry.family == AF_INET6) ?
+                                       tcp6_rsk(req)->rmt_addr.s6_addr32 :
+#endif
+                                       &ireq->rmt_addr;
+                               entry.dport = ntohs(ireq->rmt_port);
+
+                               if (!inet_diag_bc_run(RTA_DATA(bc),
+                                                   RTA_PAYLOAD(bc), &entry))
+                                       continue;
+                       }
+
+                       err = inet_diag_fill_req(skb, sk, req,
+                                              NETLINK_CB(cb->skb).pid,
+                                              cb->nlh->nlmsg_seq, cb->nlh);
+                       if (err < 0) {
+                               cb->args[3] = j + 1;
+                               cb->args[4] = reqnum;
+                               goto out;
+                       }
+               }
+
+               s_reqnum = 0;
+       }
+
+out:
+       read_unlock_bh(&icsk->icsk_accept_queue.syn_wait_lock);
+
+       return err;
+}
+
+static int inet_diag_dump(struct sk_buff *skb, struct netlink_callback *cb)
+{
+       int i, num;
+       int s_i, s_num;
+       struct inet_diag_req *r = NLMSG_DATA(cb->nlh);
+       const struct inet_diag_handler *handler;
+       struct inet_hashinfo *hashinfo;
+
+       handler = inet_diag_table[cb->nlh->nlmsg_type];
+       BUG_ON(handler == NULL);
+       hashinfo = handler->idiag_hashinfo;
+               
+       s_i = cb->args[1];
+       s_num = num = cb->args[2];
+
+       if (cb->args[0] == 0) {
+               if (!(r->idiag_states & (TCPF_LISTEN | TCPF_SYN_RECV)))
+                       goto skip_listen_ht;
+
+               inet_listen_lock(hashinfo);
+               for (i = s_i; i < INET_LHTABLE_SIZE; i++) {
+                       struct sock *sk;
+                       struct hlist_node *node;
+
+                       num = 0;
+                       sk_for_each(sk, node, &hashinfo->listening_hash[i]) {
+                               struct inet_sock *inet = inet_sk(sk);
+
+                               if (num < s_num) {
+                                       num++;
+                                       continue;
+                               }
+
+                               if (r->id.idiag_sport != inet->sport &&
+                                   r->id.idiag_sport)
+                                       goto next_listen;
+
+                               if (!(r->idiag_states & TCPF_LISTEN) ||
+                                   r->id.idiag_dport ||
+                                   cb->args[3] > 0)
+                                       goto syn_recv;
+
+                               if (inet_diag_dump_sock(skb, sk, cb) < 0) {
+                                       inet_listen_unlock(hashinfo);
+                                       goto done;
+                               }
+
+syn_recv:
+                               if (!(r->idiag_states & TCPF_SYN_RECV))
+                                       goto next_listen;
+
+                               if (inet_diag_dump_reqs(skb, sk, cb) < 0) {
+                                       inet_listen_unlock(hashinfo);
+                                       goto done;
+                               }
+
+next_listen:
+                               cb->args[3] = 0;
+                               cb->args[4] = 0;
+                               ++num;
+                       }
+
+                       s_num = 0;
+                       cb->args[3] = 0;
+                       cb->args[4] = 0;
+               }
+               inet_listen_unlock(hashinfo);
+skip_listen_ht:
+               cb->args[0] = 1;
+               s_i = num = s_num = 0;
+       }
+
+       if (!(r->idiag_states & ~(TCPF_LISTEN | TCPF_SYN_RECV)))
+               return skb->len;
+
+       for (i = s_i; i < hashinfo->ehash_size; i++) {
+               struct inet_ehash_bucket *head = &hashinfo->ehash[i];
+               struct sock *sk;
+               struct hlist_node *node;
+
+               if (i > s_i)
+                       s_num = 0;
+
+               read_lock_bh(&head->lock);
+
+               num = 0;
+               sk_for_each(sk, node, &head->chain) {
+                       struct inet_sock *inet = inet_sk(sk);
+
+                       if (num < s_num)
+                               goto next_normal;
+                       if (!(r->idiag_states & (1 << sk->sk_state)))
+                               goto next_normal;
+                       if (r->id.idiag_sport != inet->sport &&
+                           r->id.idiag_sport)
+                               goto next_normal;
+                       if (r->id.idiag_dport != inet->dport && r->id.idiag_dport)
+                               goto next_normal;
+                       if (inet_diag_dump_sock(skb, sk, cb) < 0) {
+                               read_unlock_bh(&head->lock);
+                               goto done;
+                       }
+next_normal:
+                       ++num;
+               }
+
+               if (r->idiag_states & TCPF_TIME_WAIT) {
+                       sk_for_each(sk, node,
+                                   &hashinfo->ehash[i + hashinfo->ehash_size].chain) {
+                               struct inet_sock *inet = inet_sk(sk);
+
+                               if (num < s_num)
+                                       goto next_dying;
+                               if (r->id.idiag_sport != inet->sport &&
+                                   r->id.idiag_sport)
+                                       goto next_dying;
+                               if (r->id.idiag_dport != inet->dport &&
+                                   r->id.idiag_dport)
+                                       goto next_dying;
+                               if (inet_diag_dump_sock(skb, sk, cb) < 0) {
+                                       read_unlock_bh(&head->lock);
+                                       goto done;
+                               }
+next_dying:
+                               ++num;
+                       }
+               }
+               read_unlock_bh(&head->lock);
+       }
+
+done:
+       cb->args[1] = i;
+       cb->args[2] = num;
+       return skb->len;
+}
+
+static int inet_diag_dump_done(struct netlink_callback *cb)
+{
+       return 0;
+}
+
+
+static __inline__ int
+inet_diag_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
+{
+       if (!(nlh->nlmsg_flags&NLM_F_REQUEST))
+               return 0;
+
+       if (nlh->nlmsg_type >= INET_DIAG_GETSOCK_MAX)
+               goto err_inval;
+
+       if (inet_diag_table[nlh->nlmsg_type] == NULL)
+               return -ENOENT;
+
+       if (NLMSG_LENGTH(sizeof(struct inet_diag_req)) > skb->len)
+               goto err_inval;
+
+       if (nlh->nlmsg_flags&NLM_F_DUMP) {
+               if (nlh->nlmsg_len >
+                   (4 + NLMSG_SPACE(sizeof(struct inet_diag_req)))) {
+                       struct rtattr *rta = (void *)(NLMSG_DATA(nlh) +
+                                                sizeof(struct inet_diag_req));
+                       if (rta->rta_type != INET_DIAG_REQ_BYTECODE ||
+                           rta->rta_len < 8 ||
+                           rta->rta_len >
+                           (nlh->nlmsg_len -
+                            NLMSG_SPACE(sizeof(struct inet_diag_req))))
+                               goto err_inval;
+                       if (inet_diag_bc_audit(RTA_DATA(rta), RTA_PAYLOAD(rta)))
+                               goto err_inval;
+               }
+               return netlink_dump_start(idiagnl, skb, nlh,
+                                         inet_diag_dump,
+                                         inet_diag_dump_done);
+       } else {
+               return inet_diag_get_exact(skb, nlh);
+       }
+
+err_inval:
+       return -EINVAL;
+}
+
+
+static inline void inet_diag_rcv_skb(struct sk_buff *skb)
+{
+       int err;
+       struct nlmsghdr * nlh;
+
+       if (skb->len >= NLMSG_SPACE(0)) {
+               nlh = (struct nlmsghdr *)skb->data;
+               if (nlh->nlmsg_len < sizeof(*nlh) || skb->len < nlh->nlmsg_len)
+                       return;
+               err = inet_diag_rcv_msg(skb, nlh);
+               if (err || nlh->nlmsg_flags & NLM_F_ACK) 
+                       netlink_ack(skb, nlh, err);
+       }
+}
+
+static void inet_diag_rcv(struct sock *sk, int len)
+{
+       struct sk_buff *skb;
+       unsigned int qlen = skb_queue_len(&sk->sk_receive_queue);
+
+       while (qlen-- && (skb = skb_dequeue(&sk->sk_receive_queue))) {
+               inet_diag_rcv_skb(skb);
+               kfree_skb(skb);
+       }
+}
+
+static DEFINE_SPINLOCK(inet_diag_register_lock);
+
+int inet_diag_register(const struct inet_diag_handler *h)
+{
+       const __u16 type = h->idiag_type;
+       int err = -EINVAL;
+
+       if (type >= INET_DIAG_GETSOCK_MAX)
+               goto out;
+
+       spin_lock(&inet_diag_register_lock);
+       err = -EEXIST;
+       if (inet_diag_table[type] == NULL) {
+               inet_diag_table[type] = h;
+               err = 0;
+       }
+       spin_unlock(&inet_diag_register_lock);
+out:
+       return err;
+}
+EXPORT_SYMBOL_GPL(inet_diag_register);
+
+void inet_diag_unregister(const struct inet_diag_handler *h)
+{
+       const __u16 type = h->idiag_type;
+
+       if (type >= INET_DIAG_GETSOCK_MAX)
+               return;
+
+       spin_lock(&inet_diag_register_lock);
+       inet_diag_table[type] = NULL;
+       spin_unlock(&inet_diag_register_lock);
+
+       synchronize_rcu();
+}
+EXPORT_SYMBOL_GPL(inet_diag_unregister);
+
+static int __init inet_diag_init(void)
+{
+       const int inet_diag_table_size = (INET_DIAG_GETSOCK_MAX *
+                                         sizeof(struct inet_diag_handler *));
+       int err = -ENOMEM;
+
+       inet_diag_table = kmalloc(inet_diag_table_size, GFP_KERNEL);
+       if (!inet_diag_table)
+               goto out;
+
+       memset(inet_diag_table, 0, inet_diag_table_size);
+       idiagnl = netlink_kernel_create(NETLINK_INET_DIAG, 0, inet_diag_rcv,
+                                       THIS_MODULE);
+       if (idiagnl == NULL)
+               goto out_free_table;
+       err = 0;
+out:
+       return err;
+out_free_table:
+       kfree(inet_diag_table);
+       goto out;
+}
+
+static void __exit inet_diag_exit(void)
+{
+       sock_release(idiagnl->sk_socket);
+       kfree(inet_diag_table);
+}
+
+module_init(inet_diag_init);
+module_exit(inet_diag_exit);
+MODULE_LICENSE("GPL");
diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c

new file mode 100644 (file)

index 0000000..e8d29fe
--- /dev/null
+++ b/net/ipv4/inet_hashtables.c
@@ -0,0 +1,165 @@
+/*
+ * INET                An implementation of the TCP/IP protocol suite for the LINUX
+ *             operating system.  INET is implemented using the BSD Socket
+ *             interface as the means of communication with the user level.
+ *
+ *             Generic INET transport hashtables
+ *
+ * Authors:    Lotsa people, from code originally in tcp
+ *
+ *     This program is free software; you can redistribute it and/or
+ *      modify it under the terms of the GNU General Public License
+ *      as published by the Free Software Foundation; either version
+ *      2 of the License, or (at your option) any later version.
+ */
+
+#include <linux/config.h>
+#include <linux/module.h>
+#include <linux/sched.h>
+#include <linux/slab.h>
+#include <linux/wait.h>
+
+#include <net/inet_connection_sock.h>
+#include <net/inet_hashtables.h>
+
+/*
+ * Allocate and initialize a new local port bind bucket.
+ * The bindhash mutex for snum's hash chain must be held here.
+ */
+struct inet_bind_bucket *inet_bind_bucket_create(kmem_cache_t *cachep,
+                                                struct inet_bind_hashbucket *head,
+                                                const unsigned short snum)
+{
+       struct inet_bind_bucket *tb = kmem_cache_alloc(cachep, SLAB_ATOMIC);
+
+       if (tb != NULL) {
+               tb->port      = snum;
+               tb->fastreuse = 0;
+               INIT_HLIST_HEAD(&tb->owners);
+               hlist_add_head(&tb->node, &head->chain);
+       }
+       return tb;
+}
+
+EXPORT_SYMBOL(inet_bind_bucket_create);
+
+/*
+ * Caller must hold hashbucket lock for this tb with local BH disabled
+ */
+void inet_bind_bucket_destroy(kmem_cache_t *cachep, struct inet_bind_bucket *tb)
+{
+       if (hlist_empty(&tb->owners)) {
+               __hlist_del(&tb->node);
+               kmem_cache_free(cachep, tb);
+       }
+}
+
+void inet_bind_hash(struct sock *sk, struct inet_bind_bucket *tb,
+                   const unsigned short snum)
+{
+       inet_sk(sk)->num = snum;
+       sk_add_bind_node(sk, &tb->owners);
+       inet_csk(sk)->icsk_bind_hash = tb;
+}
+
+EXPORT_SYMBOL(inet_bind_hash);
+
+/*
+ * Get rid of any references to a local port held by the given sock.
+ */
+static void __inet_put_port(struct inet_hashinfo *hashinfo, struct sock *sk)
+{
+       const int bhash = inet_bhashfn(inet_sk(sk)->num, hashinfo->bhash_size);
+       struct inet_bind_hashbucket *head = &hashinfo->bhash[bhash];
+       struct inet_bind_bucket *tb;
+
+       spin_lock(&head->lock);
+       tb = inet_csk(sk)->icsk_bind_hash;
+       __sk_del_bind_node(sk);
+       inet_csk(sk)->icsk_bind_hash = NULL;
+       inet_sk(sk)->num = 0;
+       inet_bind_bucket_destroy(hashinfo->bind_bucket_cachep, tb);
+       spin_unlock(&head->lock);
+}
+
+void inet_put_port(struct inet_hashinfo *hashinfo, struct sock *sk)
+{
+       local_bh_disable();
+       __inet_put_port(hashinfo, sk);
+       local_bh_enable();
+}
+
+EXPORT_SYMBOL(inet_put_port);
+
+/*
+ * This lock without WQ_FLAG_EXCLUSIVE is good on UP and it can be very bad on SMP.
+ * Look, when several writers sleep and reader wakes them up, all but one
+ * immediately hit write lock and grab all the cpus. Exclusive sleep solves
+ * this, _but_ remember, it adds useless work on UP machines (wake up each
+ * exclusive lock release). It should be ifdefed really.
+ */
+void inet_listen_wlock(struct inet_hashinfo *hashinfo)
+{
+       write_lock(&hashinfo->lhash_lock);
+
+       if (atomic_read(&hashinfo->lhash_users)) {
+               DEFINE_WAIT(wait);
+
+               for (;;) {
+                       prepare_to_wait_exclusive(&hashinfo->lhash_wait,
+                                                 &wait, TASK_UNINTERRUPTIBLE);
+                       if (!atomic_read(&hashinfo->lhash_users))
+                               break;
+                       write_unlock_bh(&hashinfo->lhash_lock);
+                       schedule();
+                       write_lock_bh(&hashinfo->lhash_lock);
+               }
+
+               finish_wait(&hashinfo->lhash_wait, &wait);
+       }
+}
+
+EXPORT_SYMBOL(inet_listen_wlock);
+
+/*
+ * Don't inline this cruft. Here are some nice properties to exploit here. The
+ * BSD API does not allow a listening sock to specify the remote port nor the
+ * remote address for the connection. So always assume those are both
+ * wildcarded during the search since they can never be otherwise.
+ */
+struct sock *__inet_lookup_listener(const struct hlist_head *head, const u32 daddr,
+                                   const unsigned short hnum, const int dif)
+{
+       struct sock *result = NULL, *sk;
+       const struct hlist_node *node;
+       int hiscore = -1;
+
+       sk_for_each(sk, node, head) {
+               const struct inet_sock *inet = inet_sk(sk);
+
+               if (inet->num == hnum && !ipv6_only_sock(sk)) {
+                       const __u32 rcv_saddr = inet->rcv_saddr;
+                       int score = sk->sk_family == PF_INET ? 1 : 0;
+
+                       if (rcv_saddr) {
+                               if (rcv_saddr != daddr)
+                                       continue;
+                               score += 2;
+                       }
+                       if (sk->sk_bound_dev_if) {
+                               if (sk->sk_bound_dev_if != dif)
+                                       continue;
+                               score += 2;
+                       }
+                       if (score == 5)
+                               return sk;
+                       if (score > hiscore) {
+                               hiscore = score;
+                               result  = sk;
+                       }
+               }
+       }
+       return result;
+}
+
+EXPORT_SYMBOL_GPL(__inet_lookup_listener);
diff --git a/net/ipv4/inet_timewait_sock.c b/net/ipv4/inet_timewait_sock.c

new file mode 100644 (file)

index 0000000..4d1502a
--- /dev/null
+++ b/net/ipv4/inet_timewait_sock.c
@@ -0,0 +1,384 @@
+/*
+ * INET                An implementation of the TCP/IP protocol suite for the LINUX
+ *             operating system.  INET is implemented using the  BSD Socket
+ *             interface as the means of communication with the user level.
+ *
+ *             Generic TIME_WAIT sockets functions
+ *
+ *             From code orinally in TCP
+ */
+
+#include <linux/config.h>
+
+#include <net/inet_hashtables.h>
+#include <net/inet_timewait_sock.h>
+#include <net/ip.h>
+
+/* Must be called with locally disabled BHs. */
+void __inet_twsk_kill(struct inet_timewait_sock *tw, struct inet_hashinfo *hashinfo)
+{
+       struct inet_bind_hashbucket *bhead;
+       struct inet_bind_bucket *tb;
+       /* Unlink from established hashes. */
+       struct inet_ehash_bucket *ehead = &hashinfo->ehash[tw->tw_hashent];
+
+       write_lock(&ehead->lock);
+       if (hlist_unhashed(&tw->tw_node)) {
+               write_unlock(&ehead->lock);
+               return;
+       }
+       __hlist_del(&tw->tw_node);
+       sk_node_init(&tw->tw_node);
+       write_unlock(&ehead->lock);
+
+       /* Disassociate with bind bucket. */
+       bhead = &hashinfo->bhash[inet_bhashfn(tw->tw_num, hashinfo->bhash_size)];
+       spin_lock(&bhead->lock);
+       tb = tw->tw_tb;
+       __hlist_del(&tw->tw_bind_node);
+       tw->tw_tb = NULL;
+       inet_bind_bucket_destroy(hashinfo->bind_bucket_cachep, tb);
+       spin_unlock(&bhead->lock);
+#ifdef SOCK_REFCNT_DEBUG
+       if (atomic_read(&tw->tw_refcnt) != 1) {
+               printk(KERN_DEBUG "%s timewait_sock %p refcnt=%d\n",
+                      tw->tw_prot->name, tw, atomic_read(&tw->tw_refcnt));
+       }
+#endif
+       inet_twsk_put(tw);
+}
+
+EXPORT_SYMBOL_GPL(__inet_twsk_kill);
+
+/*
+ * Enter the time wait state. This is called with locally disabled BH.
+ * Essentially we whip up a timewait bucket, copy the relevant info into it
+ * from the SK, and mess with hash chains and list linkage.
+ */
+void __inet_twsk_hashdance(struct inet_timewait_sock *tw, struct sock *sk,
+                          struct inet_hashinfo *hashinfo)
+{
+       const struct inet_sock *inet = inet_sk(sk);
+       const struct inet_connection_sock *icsk = inet_csk(sk);
+       struct inet_ehash_bucket *ehead = &hashinfo->ehash[sk->sk_hashent];
+       struct inet_bind_hashbucket *bhead;
+       /* Step 1: Put TW into bind hash. Original socket stays there too.
+          Note, that any socket with inet->num != 0 MUST be bound in
+          binding cache, even if it is closed.
+        */
+       bhead = &hashinfo->bhash[inet_bhashfn(inet->num, hashinfo->bhash_size)];
+       spin_lock(&bhead->lock);
+       tw->tw_tb = icsk->icsk_bind_hash;
+       BUG_TRAP(icsk->icsk_bind_hash);
+       inet_twsk_add_bind_node(tw, &tw->tw_tb->owners);
+       spin_unlock(&bhead->lock);
+
+       write_lock(&ehead->lock);
+
+       /* Step 2: Remove SK from established hash. */
+       if (__sk_del_node_init(sk))
+               sock_prot_dec_use(sk->sk_prot);
+
+       /* Step 3: Hash TW into TIMEWAIT half of established hash table. */
+       inet_twsk_add_node(tw, &(ehead + hashinfo->ehash_size)->chain);
+       atomic_inc(&tw->tw_refcnt);
+
+       write_unlock(&ehead->lock);
+}
+
+EXPORT_SYMBOL_GPL(__inet_twsk_hashdance);
+
+struct inet_timewait_sock *inet_twsk_alloc(const struct sock *sk, const int state)
+{
+       struct inet_timewait_sock *tw = kmem_cache_alloc(sk->sk_prot_creator->twsk_slab,
+                                                        SLAB_ATOMIC);
+       if (tw != NULL) {
+               const struct inet_sock *inet = inet_sk(sk);
+
+               /* Give us an identity. */
+               tw->tw_daddr        = inet->daddr;
+               tw->tw_rcv_saddr    = inet->rcv_saddr;
+               tw->tw_bound_dev_if = sk->sk_bound_dev_if;
+               tw->tw_num          = inet->num;
+               tw->tw_state        = TCP_TIME_WAIT;
+               tw->tw_substate     = state;
+               tw->tw_sport        = inet->sport;
+               tw->tw_dport        = inet->dport;
+               tw->tw_family       = sk->sk_family;
+               tw->tw_reuse        = sk->sk_reuse;
+               tw->tw_hashent      = sk->sk_hashent;
+               tw->tw_ipv6only     = 0;
+               tw->tw_prot         = sk->sk_prot_creator;
+               atomic_set(&tw->tw_refcnt, 1);
+               inet_twsk_dead_node_init(tw);
+       }
+
+       return tw;
+}
+
+EXPORT_SYMBOL_GPL(inet_twsk_alloc);
+
+/* Returns non-zero if quota exceeded.  */
+static int inet_twdr_do_twkill_work(struct inet_timewait_death_row *twdr,
+                                   const int slot)
+{
+       struct inet_timewait_sock *tw;
+       struct hlist_node *node;
+       unsigned int killed;
+       int ret;
+
+       /* NOTE: compare this to previous version where lock
+        * was released after detaching chain. It was racy,
+        * because tw buckets are scheduled in not serialized context
+        * in 2.3 (with netfilter), and with softnet it is common, because
+        * soft irqs are not sequenced.
+        */
+       killed = 0;
+       ret = 0;
+rescan:
+       inet_twsk_for_each_inmate(tw, node, &twdr->cells[slot]) {
+               __inet_twsk_del_dead_node(tw);
+               spin_unlock(&twdr->death_lock);
+               __inet_twsk_kill(tw, twdr->hashinfo);
+               inet_twsk_put(tw);
+               killed++;
+               spin_lock(&twdr->death_lock);
+               if (killed > INET_TWDR_TWKILL_QUOTA) {
+                       ret = 1;
+                       break;
+               }
+
+               /* While we dropped twdr->death_lock, another cpu may have
+                * killed off the next TW bucket in the list, therefore
+                * do a fresh re-read of the hlist head node with the
+                * lock reacquired.  We still use the hlist traversal
+                * macro in order to get the prefetches.
+                */
+               goto rescan;
+       }
+
+       twdr->tw_count -= killed;
+       NET_ADD_STATS_BH(LINUX_MIB_TIMEWAITED, killed);
+
+       return ret;
+}
+
+void inet_twdr_hangman(unsigned long data)
+{
+       struct inet_timewait_death_row *twdr;
+       int unsigned need_timer;
+
+       twdr = (struct inet_timewait_death_row *)data;
+       spin_lock(&twdr->death_lock);
+
+       if (twdr->tw_count == 0)
+               goto out;
+
+       need_timer = 0;
+       if (inet_twdr_do_twkill_work(twdr, twdr->slot)) {
+               twdr->thread_slots |= (1 << twdr->slot);
+               mb();
+               schedule_work(&twdr->twkill_work);
+               need_timer = 1;
+       } else {
+               /* We purged the entire slot, anything left?  */
+               if (twdr->tw_count)
+                       need_timer = 1;
+       }
+       twdr->slot = ((twdr->slot + 1) & (INET_TWDR_TWKILL_SLOTS - 1));
+       if (need_timer)
+               mod_timer(&twdr->tw_timer, jiffies + twdr->period);
+out:
+       spin_unlock(&twdr->death_lock);
+}
+
+EXPORT_SYMBOL_GPL(inet_twdr_hangman);
+
+extern void twkill_slots_invalid(void);
+
+void inet_twdr_twkill_work(void *data)
+{
+       struct inet_timewait_death_row *twdr = data;
+       int i;
+
+       if ((INET_TWDR_TWKILL_SLOTS - 1) > (sizeof(twdr->thread_slots) * 8))
+               twkill_slots_invalid();
+
+       while (twdr->thread_slots) {
+               spin_lock_bh(&twdr->death_lock);
+               for (i = 0; i < INET_TWDR_TWKILL_SLOTS; i++) {
+                       if (!(twdr->thread_slots & (1 << i)))
+                               continue;
+
+                       while (inet_twdr_do_twkill_work(twdr, i) != 0) {
+                               if (need_resched()) {
+                                       spin_unlock_bh(&twdr->death_lock);
+                                       schedule();
+                                       spin_lock_bh(&twdr->death_lock);
+                               }
+                       }
+
+                       twdr->thread_slots &= ~(1 << i);
+               }
+               spin_unlock_bh(&twdr->death_lock);
+       }
+}
+
+EXPORT_SYMBOL_GPL(inet_twdr_twkill_work);
+
+/* These are always called from BH context.  See callers in
+ * tcp_input.c to verify this.
+ */
+
+/* This is for handling early-kills of TIME_WAIT sockets. */
+void inet_twsk_deschedule(struct inet_timewait_sock *tw,
+                         struct inet_timewait_death_row *twdr)
+{
+       spin_lock(&twdr->death_lock);
+       if (inet_twsk_del_dead_node(tw)) {
+               inet_twsk_put(tw);
+               if (--twdr->tw_count == 0)
+                       del_timer(&twdr->tw_timer);
+       }
+       spin_unlock(&twdr->death_lock);
+       __inet_twsk_kill(tw, twdr->hashinfo);
+}
+
+EXPORT_SYMBOL(inet_twsk_deschedule);
+
+void inet_twsk_schedule(struct inet_timewait_sock *tw,
+                      struct inet_timewait_death_row *twdr,
+                      const int timeo, const int timewait_len)
+{
+       struct hlist_head *list;
+       int slot;
+
+       /* timeout := RTO * 3.5
+        *
+        * 3.5 = 1+2+0.5 to wait for two retransmits.
+        *
+        * RATIONALE: if FIN arrived and we entered TIME-WAIT state,
+        * our ACK acking that FIN can be lost. If N subsequent retransmitted
+        * FINs (or previous seqments) are lost (probability of such event
+        * is p^(N+1), where p is probability to lose single packet and
+        * time to detect the loss is about RTO*(2^N - 1) with exponential
+        * backoff). Normal timewait length is calculated so, that we
+        * waited at least for one retransmitted FIN (maximal RTO is 120sec).
+        * [ BTW Linux. following BSD, violates this requirement waiting
+        *   only for 60sec, we should wait at least for 240 secs.
+        *   Well, 240 consumes too much of resources 8)
+        * ]
+        * This interval is not reduced to catch old duplicate and
+        * responces to our wandering segments living for two MSLs.
+        * However, if we use PAWS to detect
+        * old duplicates, we can reduce the interval to bounds required
+        * by RTO, rather than MSL. So, if peer understands PAWS, we
+        * kill tw bucket after 3.5*RTO (it is important that this number
+        * is greater than TS tick!) and detect old duplicates with help
+        * of PAWS.
+        */
+       slot = (timeo + (1 << INET_TWDR_RECYCLE_TICK) - 1) >> INET_TWDR_RECYCLE_TICK;
+
+       spin_lock(&twdr->death_lock);
+
+       /* Unlink it, if it was scheduled */
+       if (inet_twsk_del_dead_node(tw))
+               twdr->tw_count--;
+       else
+               atomic_inc(&tw->tw_refcnt);
+
+       if (slot >= INET_TWDR_RECYCLE_SLOTS) {
+               /* Schedule to slow timer */
+               if (timeo >= timewait_len) {
+                       slot = INET_TWDR_TWKILL_SLOTS - 1;
+               } else {
+                       slot = (timeo + twdr->period - 1) / twdr->period;
+                       if (slot >= INET_TWDR_TWKILL_SLOTS)
+                               slot = INET_TWDR_TWKILL_SLOTS - 1;
+               }
+               tw->tw_ttd = jiffies + timeo;
+               slot = (twdr->slot + slot) & (INET_TWDR_TWKILL_SLOTS - 1);
+               list = &twdr->cells[slot];
+       } else {
+               tw->tw_ttd = jiffies + (slot << INET_TWDR_RECYCLE_TICK);
+
+               if (twdr->twcal_hand < 0) {
+                       twdr->twcal_hand = 0;
+                       twdr->twcal_jiffie = jiffies;
+                       twdr->twcal_timer.expires = twdr->twcal_jiffie +
+                                             (slot << INET_TWDR_RECYCLE_TICK);
+                       add_timer(&twdr->twcal_timer);
+               } else {
+                       if (time_after(twdr->twcal_timer.expires,
+                                      jiffies + (slot << INET_TWDR_RECYCLE_TICK)))
+                               mod_timer(&twdr->twcal_timer,
+                                         jiffies + (slot << INET_TWDR_RECYCLE_TICK));
+                       slot = (twdr->twcal_hand + slot) & (INET_TWDR_RECYCLE_SLOTS - 1);
+               }
+               list = &twdr->twcal_row[slot];
+       }
+
+       hlist_add_head(&tw->tw_death_node, list);
+
+       if (twdr->tw_count++ == 0)
+               mod_timer(&twdr->tw_timer, jiffies + twdr->period);
+       spin_unlock(&twdr->death_lock);
+}
+
+EXPORT_SYMBOL_GPL(inet_twsk_schedule);
+
+void inet_twdr_twcal_tick(unsigned long data)
+{
+       struct inet_timewait_death_row *twdr;
+       int n, slot;
+       unsigned long j;
+       unsigned long now = jiffies;
+       int killed = 0;
+       int adv = 0;
+
+       twdr = (struct inet_timewait_death_row *)data;
+
+       spin_lock(&twdr->death_lock);
+       if (twdr->twcal_hand < 0)
+               goto out;
+
+       slot = twdr->twcal_hand;
+       j = twdr->twcal_jiffie;
+
+       for (n = 0; n < INET_TWDR_RECYCLE_SLOTS; n++) {
+               if (time_before_eq(j, now)) {
+                       struct hlist_node *node, *safe;
+                       struct inet_timewait_sock *tw;
+
+                       inet_twsk_for_each_inmate_safe(tw, node, safe,
+                                                      &twdr->twcal_row[slot]) {
+                               __inet_twsk_del_dead_node(tw);
+                               __inet_twsk_kill(tw, twdr->hashinfo);
+                               inet_twsk_put(tw);
+                               killed++;
+                       }
+               } else {
+                       if (!adv) {
+                               adv = 1;
+                               twdr->twcal_jiffie = j;
+                               twdr->twcal_hand = slot;
+                       }
+
+                       if (!hlist_empty(&twdr->twcal_row[slot])) {
+                               mod_timer(&twdr->twcal_timer, j);
+                               goto out;
+                       }
+               }
+               j += 1 << INET_TWDR_RECYCLE_TICK;
+               slot = (slot + 1) & (INET_TWDR_RECYCLE_SLOTS - 1);
+       }
+       twdr->twcal_hand = -1;
+
+out:
+       if ((twdr->tw_count -= killed) == 0)
+               del_timer(&twdr->tw_timer);
+       NET_ADD_STATS_BH(LINUX_MIB_TIMEWAITKILLED, killed);
+       spin_unlock(&twdr->death_lock);
+}
+
+EXPORT_SYMBOL_GPL(inet_twdr_twcal_tick);
diff --git a/net/ipv4/inetpeer.c b/net/ipv4/inetpeer.c

index 95473953c406ebe2cc7217918381b5896f3ace66..f84ba9c96551e015882d9895ac197814c3b769f1 100644 (file)
--- a/net/ipv4/inetpeer.c
+++ b/net/ipv4/inetpeer.c
@@ -20,6 +20,7 @@
  #include <linux/kernel.h>
  #include <linux/mm.h>
  #include <linux/net.h>
+#include <net/ip.h>
  #include <net/inetpeer.h>
  
  /*
@@ -72,7 +73,7 @@
  /* Exported for inet_getid inline function.  */
  DEFINE_SPINLOCK(inet_peer_idlock);
  
-static kmem_cache_t *peer_cachep;
+static kmem_cache_t *peer_cachep __read_mostly;
  
  #define node_height(x) x->avl_height
  static struct inet_peer peer_fake_node = {
@@ -450,11 +451,12 @@ static void peer_check_expire(unsigned long dummy)
         /* Trigger the timer after inet_peer_gc_mintime .. inet_peer_gc_maxtime
          * interval depending on the total number of entries (more entries,
          * less interval). */
-       peer_periodic_timer.expires = jiffies
-               + inet_peer_gc_maxtime
-               - (inet_peer_gc_maxtime - inet_peer_gc_mintime) / HZ *
-                       peer_total / inet_peer_threshold * HZ;
+       if (peer_total >= inet_peer_threshold)
+               peer_periodic_timer.expires = jiffies + inet_peer_gc_mintime;
+       else
+               peer_periodic_timer.expires = jiffies
+                       + inet_peer_gc_maxtime
+                       - (inet_peer_gc_maxtime - inet_peer_gc_mintime) / HZ *
+                               peer_total / inet_peer_threshold * HZ;
         add_timer(&peer_periodic_timer);
  }
-
-EXPORT_SYMBOL(inet_peer_idlock);
diff --git a/net/ipv4/ip_forward.c b/net/ipv4/ip_forward.c

index 77094aac6c28c6164342066a7a72690934405f1c..0923add122b415b8000d46fbad28fcffe217e0cc 100644 (file)
--- a/net/ipv4/ip_forward.c
+++ b/net/ipv4/ip_forward.c
@@ -76,16 +76,12 @@ int ip_forward(struct sk_buff *skb)
          *      that reaches zero, we must reply an ICMP control message telling
          *      that the packet's lifetime expired.
          */
-
-       iph = skb->nh.iph;
-
-       if (iph->ttl <= 1)
+       if (skb->nh.iph->ttl <= 1)
                  goto too_many_hops;
  
         if (!xfrm4_route_forward(skb))
                 goto drop;
  
-       iph = skb->nh.iph;
         rt = (struct rtable*)skb->dst;
  
         if (opt->is_strictroute && rt->rt_dst != rt->rt_gateway)
diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c

index 7f68e27eb4ea894c21f36e2d99f7f9423f1ba657..9e6e683cc34d4396560b19bfc9b1d4f2bca2f9df 100644 (file)
--- a/net/ipv4/ip_fragment.c
+++ b/net/ipv4/ip_fragment.c
@@ -377,7 +377,7 @@ static struct ipq *ip_frag_create(unsigned hash, struct iphdr *iph, u32 user)
         return ip_frag_intern(hash, qp);
  
  out_nomem:
-       NETDEBUG(if (net_ratelimit()) printk(KERN_ERR "ip_frag_create: no memory left !\n"));
+       LIMIT_NETDEBUG(KERN_ERR "ip_frag_create: no memory left !\n");
         return NULL;
  }
  
@@ -533,7 +533,7 @@ static void ip_frag_queue(struct ipq *qp, struct sk_buff *skb)
         if (skb->dev)
                 qp->iif = skb->dev->ifindex;
         skb->dev = NULL;
-       qp->stamp = skb->stamp;
+       skb_get_timestamp(skb, &qp->stamp);
         qp->meat += skb->len;
         atomic_add(skb->truesize, &ip_frag_mem);
         if (offset == 0)
@@ -615,7 +615,7 @@ static struct sk_buff *ip_frag_reasm(struct ipq *qp, struct net_device *dev)
  
         head->next = NULL;
         head->dev = dev;
-       head->stamp = qp->stamp;
+       skb_set_timestamp(head, &qp->stamp);
  
         iph = head->nh.iph;
         iph->frag_off = 0;
@@ -625,10 +625,8 @@ static struct sk_buff *ip_frag_reasm(struct ipq *qp, struct net_device *dev)
         return head;
  
  out_nomem:
-       NETDEBUG(if (net_ratelimit())
-                printk(KERN_ERR 
-                       "IP: queue_glue: no memory for gluing queue %p\n",
-                       qp));
+       LIMIT_NETDEBUG(KERN_ERR "IP: queue_glue: no memory for gluing "
+                             "queue %p\n", qp);
         goto out_fail;
  out_oversize:
         if (net_ratelimit())
diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c

index 8848355222241cbd1763330a88183a2b45b34966..f0d5740d7e220f5675602ee43682c526f4dfbf8c 100644 (file)
--- a/net/ipv4/ip_gre.c
+++ b/net/ipv4/ip_gre.c
@@ -290,7 +290,6 @@ static struct ip_tunnel * ipgre_tunnel_locate(struct ip_tunnel_parm *parms, int
  
         dev_hold(dev);
         ipgre_tunnel_link(nt);
-       /* Do not decrement MOD_USE_COUNT here. */
         return nt;
  
  failed:
@@ -1277,12 +1276,28 @@ err1:
         goto out;
  }
  
-static void ipgre_fini(void)
+static void __exit ipgre_destroy_tunnels(void)
+{
+       int prio;
+
+       for (prio = 0; prio < 4; prio++) {
+               int h;
+               for (h = 0; h < HASH_SIZE; h++) {
+                       struct ip_tunnel *t;
+                       while ((t = tunnels[prio][h]) != NULL)
+                               unregister_netdevice(t->dev);
+               }
+       }
+}
+
+static void __exit ipgre_fini(void)
  {
         if (inet_del_protocol(&ipgre_protocol, IPPROTO_GRE) < 0)
                 printk(KERN_INFO "ipgre close: can't remove protocol\n");
  
-       unregister_netdev(ipgre_fb_tunnel_dev);
+       rtnl_lock();
+       ipgre_destroy_tunnels();
+       rtnl_unlock();
  }
  
  module_init(ipgre_init);
diff --git a/net/ipv4/ip_input.c b/net/ipv4/ip_input.c

index c703528e0bcd524751b8b4e43415e2b8e8ccadaf..473d0f2b2e0d6b5a4252f7c825ea870a2afc2c91 100644 (file)
--- a/net/ipv4/ip_input.c
+++ b/net/ipv4/ip_input.c
@@ -150,7 +150,7 @@
   *     SNMP management statistics
   */
  
-DEFINE_SNMP_STAT(struct ipstats_mib, ip_statistics);
+DEFINE_SNMP_STAT(struct ipstats_mib, ip_statistics) __read_mostly;
  
  /*
   *     Process Router Attention IP option
@@ -225,8 +225,8 @@ static inline int ip_local_deliver_finish(struct sk_buff *skb)
                 /* If there maybe a raw socket we must check - if not we
                  * don't care less
                  */
-               if (raw_sk)
-                       raw_v4_input(skb, skb->nh.iph, hash);
+               if (raw_sk && !raw_v4_input(skb, skb->nh.iph, hash))
+                       raw_sk = NULL;
  
                 if ((ipprot = rcu_dereference(inet_protos[hash])) != NULL) {
                         int ret;
@@ -279,18 +279,70 @@ int ip_local_deliver(struct sk_buff *skb)
                        ip_local_deliver_finish);
  }
  
-static inline int ip_rcv_finish(struct sk_buff *skb)
+static inline int ip_rcv_options(struct sk_buff *skb)
  {
+       struct ip_options *opt;
+       struct iphdr *iph;
         struct net_device *dev = skb->dev;
+
+       /* It looks as overkill, because not all
+          IP options require packet mangling.
+          But it is the easiest for now, especially taking
+          into account that combination of IP options
+          and running sniffer is extremely rare condition.
+                                             --ANK (980813)
+       */
+       if (skb_cow(skb, skb_headroom(skb))) {
+               IP_INC_STATS_BH(IPSTATS_MIB_INDISCARDS);
+               goto drop;
+       }
+
+       iph = skb->nh.iph;
+
+       if (ip_options_compile(NULL, skb)) {
+               IP_INC_STATS_BH(IPSTATS_MIB_INHDRERRORS);
+               goto drop;
+       }
+
+       opt = &(IPCB(skb)->opt);
+       if (unlikely(opt->srr)) {
+               struct in_device *in_dev = in_dev_get(dev);
+               if (in_dev) {
+                       if (!IN_DEV_SOURCE_ROUTE(in_dev)) {
+                               if (IN_DEV_LOG_MARTIANS(in_dev) &&
+                                   net_ratelimit())
+                                       printk(KERN_INFO "source route option "
+                                              "%u.%u.%u.%u -> %u.%u.%u.%u\n",
+                                              NIPQUAD(iph->saddr),
+                                              NIPQUAD(iph->daddr));
+                               in_dev_put(in_dev);
+                               goto drop;
+                       }
+
+                       in_dev_put(in_dev);
+               }
+
+               if (ip_options_rcv_srr(skb))
+                       goto drop;
+       }
+
+       return 0;
+drop:
+       return -1;
+}
+
+static inline int ip_rcv_finish(struct sk_buff *skb)
+{
         struct iphdr *iph = skb->nh.iph;
-       int err;
  
         /*
          *      Initialise the virtual path cache for the packet. It describes
          *      how the packet travels inside Linux networking.
          */ 
-       if (skb->dst == NULL) {
-               if ((err = ip_route_input(skb, iph->daddr, iph->saddr, iph->tos, dev))) {
+       if (likely(skb->dst == NULL)) {
+               int err = ip_route_input(skb, iph->daddr, iph->saddr, iph->tos,
+                                        skb->dev);
+               if (unlikely(err)) {
                         if (err == -EHOSTUNREACH)
                                 IP_INC_STATS_BH(IPSTATS_MIB_INADDRERRORS);
                         goto drop; 
@@ -298,7 +350,7 @@ static inline int ip_rcv_finish(struct sk_buff *skb)
         }
  
  #ifdef CONFIG_NET_CLS_ROUTE
-       if (skb->dst->tclassid) {
+       if (unlikely(skb->dst->tclassid)) {
                 struct ip_rt_acct *st = ip_rt_acct + 256*smp_processor_id();
                 u32 idx = skb->dst->tclassid;
                 st[idx&0xFF].o_packets++;
@@ -308,48 +360,11 @@ static inline int ip_rcv_finish(struct sk_buff *skb)
         }
  #endif
  
-       if (iph->ihl > 5) {
-               struct ip_options *opt;
-
-               /* It looks as overkill, because not all
-                  IP options require packet mangling.
-                  But it is the easiest for now, especially taking
-                  into account that combination of IP options
-                  and running sniffer is extremely rare condition.
-                                                     --ANK (980813)
-               */
-
-               if (skb_cow(skb, skb_headroom(skb))) {
-                       IP_INC_STATS_BH(IPSTATS_MIB_INDISCARDS);
-                       goto drop;
-               }
-               iph = skb->nh.iph;
-
-               if (ip_options_compile(NULL, skb))
-                       goto inhdr_error;
-
-               opt = &(IPCB(skb)->opt);
-               if (opt->srr) {
-                       struct in_device *in_dev = in_dev_get(dev);
-                       if (in_dev) {
-                               if (!IN_DEV_SOURCE_ROUTE(in_dev)) {
-                                       if (IN_DEV_LOG_MARTIANS(in_dev) && net_ratelimit())
-                                               printk(KERN_INFO "source route option %u.%u.%u.%u -> %u.%u.%u.%u\n",
-                                                      NIPQUAD(iph->saddr), NIPQUAD(iph->daddr));
-                                       in_dev_put(in_dev);
-                                       goto drop;
-                               }
-                               in_dev_put(in_dev);
-                       }
-                       if (ip_options_rcv_srr(skb))
-                               goto drop;
-               }
-       }
+       if (iph->ihl > 5 && ip_rcv_options(skb))
+               goto drop;
  
         return dst_input(skb);
  
-inhdr_error:
-       IP_INC_STATS_BH(IPSTATS_MIB_INHDRERRORS);
  drop:
          kfree_skb(skb);
          return NET_RX_DROP;
@@ -358,9 +373,10 @@ drop:
  /*
   *     Main IP Receive routine.
   */ 
-int ip_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt)
+int ip_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt, struct net_device *orig_dev)
  {
         struct iphdr *iph;
+       u32 len;
  
         /* When the interface is in promisc. mode, drop all the crap
          * that it receives, do not try to analyse it.
@@ -392,29 +408,27 @@ int ip_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt)
          */
  
         if (iph->ihl < 5 || iph->version != 4)
-               goto inhdr_error; 
+               goto inhdr_error;
  
         if (!pskb_may_pull(skb, iph->ihl*4))
                 goto inhdr_error;
  
         iph = skb->nh.iph;
  
-       if (ip_fast_csum((u8 *)iph, iph->ihl) != 0)
-               goto inhdr_error; 
+       if (unlikely(ip_fast_csum((u8 *)iph, iph->ihl)))
+               goto inhdr_error;
  
-       {
-               __u32 len = ntohs(iph->tot_len); 
-               if (skb->len < len || len < (iph->ihl<<2))
-                       goto inhdr_error;
+       len = ntohs(iph->tot_len);
+       if (skb->len < len || len < (iph->ihl*4))
+               goto inhdr_error;
  
-               /* Our transport medium may have padded the buffer out. Now we know it
-                * is IP we can trim to the true length of the frame.
-                * Note this now means skb->len holds ntohs(iph->tot_len).
-                */
-               if (pskb_trim_rcsum(skb, len)) {
-                       IP_INC_STATS_BH(IPSTATS_MIB_INDISCARDS);
-                       goto drop;
-               }
+       /* Our transport medium may have padded the buffer out. Now we know it
+        * is IP we can trim to the true length of the frame.
+        * Note this now means skb->len holds ntohs(iph->tot_len).
+        */
+       if (pskb_trim_rcsum(skb, len)) {
+               IP_INC_STATS_BH(IPSTATS_MIB_INDISCARDS);
+               goto drop;
         }
  
         return NF_HOOK(PF_INET, NF_IP_PRE_ROUTING, skb, dev, NULL,
@@ -428,5 +442,4 @@ out:
          return NET_RX_DROP;
  }
  
-EXPORT_SYMBOL(ip_rcv);
  EXPORT_SYMBOL(ip_statistics);
diff --git a/net/ipv4/ip_options.c b/net/ipv4/ip_options.c

index 6d89f3f3e70193042ee8873834e57875bc20abba..bce4e875193be1d596c4c607ab42e9e1f966513f 100644 (file)
--- a/net/ipv4/ip_options.c
+++ b/net/ipv4/ip_options.c
@@ -489,23 +489,18 @@ void ip_options_undo(struct ip_options * opt)
         }
  }
  
-int ip_options_get(struct ip_options **optp, unsigned char *data, int optlen, int user)
+static struct ip_options *ip_options_get_alloc(const int optlen)
  {
-       struct ip_options *opt;
+       struct ip_options *opt = kmalloc(sizeof(*opt) + ((optlen + 3) & ~3),
+                                        GFP_KERNEL);
+       if (opt)
+               memset(opt, 0, sizeof(*opt));
+       return opt;
+}
  
-       opt = kmalloc(sizeof(struct ip_options)+((optlen+3)&~3), GFP_KERNEL);
-       if (!opt)
-               return -ENOMEM;
-       memset(opt, 0, sizeof(struct ip_options));
-       if (optlen) {
-               if (user) {
-                       if (copy_from_user(opt->__data, data, optlen)) {
-                               kfree(opt);
-                               return -EFAULT;
-                       }
-               } else
-                       memcpy(opt->__data, data, optlen);
-       }
+static int ip_options_get_finish(struct ip_options **optp,
+                                struct ip_options *opt, int optlen)
+{
         while (optlen & 3)
                 opt->__data[optlen++] = IPOPT_END;
         opt->optlen = optlen;
@@ -521,6 +516,30 @@ int ip_options_get(struct ip_options **optp, unsigned char *data, int optlen, in
         return 0;
  }
  
+int ip_options_get_from_user(struct ip_options **optp, unsigned char __user *data, int optlen)
+{
+       struct ip_options *opt = ip_options_get_alloc(optlen);
+
+       if (!opt)
+               return -ENOMEM;
+       if (optlen && copy_from_user(opt->__data, data, optlen)) {
+               kfree(opt);
+               return -EFAULT;
+       }
+       return ip_options_get_finish(optp, opt, optlen);
+}
+
+int ip_options_get(struct ip_options **optp, unsigned char *data, int optlen)
+{
+       struct ip_options *opt = ip_options_get_alloc(optlen);
+
+       if (!opt)
+               return -ENOMEM;
+       if (optlen)
+               memcpy(opt->__data, data, optlen);
+       return ip_options_get_finish(optp, opt, optlen);
+}
+
  void ip_forward_options(struct sk_buff *skb)
  {
         struct   ip_options * opt       = &(IPCB(skb)->opt);
@@ -620,6 +639,3 @@ int ip_options_rcv_srr(struct sk_buff *skb)
         }
         return 0;
  }
-
-EXPORT_SYMBOL(ip_options_compile);
-EXPORT_SYMBOL(ip_options_undo);
diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c

index 80d13103b2b018624b0b845b3c713cfe4eafcaa7..3f1a263e1249ebbaef7e3fb719252bb90d19ee6a 100644 (file)
--- a/net/ipv4/ip_output.c
+++ b/net/ipv4/ip_output.c
@@ -69,13 +69,10 @@
  #include <net/ip.h>
  #include <net/protocol.h>
  #include <net/route.h>
-#include <net/tcp.h>
-#include <net/udp.h>
  #include <linux/skbuff.h>
  #include <net/sock.h>
  #include <net/arp.h>
  #include <net/icmp.h>
-#include <net/raw.h>
  #include <net/checksum.h>
  #include <net/inetpeer.h>
  #include <net/checksum.h>
@@ -84,12 +81,8 @@
  #include <linux/netfilter_bridge.h>
  #include <linux/mroute.h>
  #include <linux/netlink.h>
+#include <linux/tcp.h>
  
-/*
- *      Shall we try to damage output packets if routing dev changes?
- */
-
-int sysctl_ip_dynaddr;
  int sysctl_ip_default_ttl = IPDEFTTL;
  
  /* Generate a checksum for an outgoing IP datagram. */
@@ -165,6 +158,8 @@ int ip_build_and_send_pkt(struct sk_buff *skb, struct sock *sk,
                        dst_output);
  }
  
+EXPORT_SYMBOL_GPL(ip_build_and_send_pkt);
+
  static inline int ip_finish_output2(struct sk_buff *skb)
  {
         struct dst_entry *dst = skb->dst;
@@ -205,7 +200,7 @@ static inline int ip_finish_output2(struct sk_buff *skb)
         return -EINVAL;
  }
  
-int ip_finish_output(struct sk_buff *skb)
+static inline int ip_finish_output(struct sk_buff *skb)
  {
         struct net_device *dev = skb->dst->dev;
  
@@ -329,8 +324,7 @@ int ip_queue_xmit(struct sk_buff *skb, int ipfragok)
                         if (ip_route_output_flow(&rt, &fl, sk, 0))
                                 goto no_route;
                 }
-               __sk_dst_set(sk, &rt->u.dst);
-               tcp_v4_setup_caps(sk, &rt->u.dst);
+               sk_setup_caps(sk, &rt->u.dst);
         }
         skb->dst = dst_clone(&rt->u.dst);
  
@@ -392,7 +386,6 @@ static void ip_copy_metadata(struct sk_buff *to, struct sk_buff *from)
  #endif
  #ifdef CONFIG_NETFILTER
         to->nfmark = from->nfmark;
-       to->nfcache = from->nfcache;
         /* Connection association is same as pre-frag packet */
         nf_conntrack_put(to->nfct);
         to->nfct = from->nfct;
@@ -580,7 +573,7 @@ slow_path:
                  */
  
                 if ((skb2 = alloc_skb(len+hlen+ll_rs, GFP_ATOMIC)) == NULL) {
-                       NETDEBUG(printk(KERN_INFO "IP: frag: no memory for new fragment!\n"));
+                       NETDEBUG(KERN_INFO "IP: frag: no memory for new fragment!\n");
                         err = -ENOMEM;
                         goto fail;
                 }
@@ -1329,12 +1322,7 @@ void __init ip_init(void)
  #endif
  }
  
-EXPORT_SYMBOL(ip_finish_output);
  EXPORT_SYMBOL(ip_fragment);
  EXPORT_SYMBOL(ip_generic_getfrag);
  EXPORT_SYMBOL(ip_queue_xmit);
  EXPORT_SYMBOL(ip_send_check);
-
-#ifdef CONFIG_SYSCTL
-EXPORT_SYMBOL(sysctl_ip_default_ttl);
-#endif
diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c

index fc7c481d0d79378981841ff6989bcaebc0b14f23..2f0b47da5b37e3b3c0c44235355e704958cba8a3 100644 (file)
--- a/net/ipv4/ip_sockglue.c
+++ b/net/ipv4/ip_sockglue.c
@@ -153,7 +153,7 @@ int ip_cmsg_send(struct msghdr *msg, struct ipcm_cookie *ipc)
                 switch (cmsg->cmsg_type) {
                 case IP_RETOPTS:
                         err = cmsg->cmsg_len - CMSG_ALIGN(sizeof(struct cmsghdr));
-                       err = ip_options_get(&ipc->opt, CMSG_DATA(cmsg), err < 40 ? err : 40, 0);
+                       err = ip_options_get(&ipc->opt, CMSG_DATA(cmsg), err < 40 ? err : 40);
                         if (err)
                                 return err;
                         break;
@@ -425,7 +425,7 @@ int ip_setsockopt(struct sock *sk, int level, int optname, char __user *optval,
                         struct ip_options * opt = NULL;
                         if (optlen > 40 || optlen < 0)
                                 goto e_inval;
-                       err = ip_options_get(&opt, optval, optlen, 1);
+                       err = ip_options_get_from_user(&opt, optval, optlen);
                         if (err)
                                 break;
                         if (sk->sk_type == SOCK_STREAM) {
@@ -614,7 +614,6 @@ int ip_setsockopt(struct sock *sk, int level, int optname, char __user *optval,
                 }
                 case IP_MSFILTER:
                 {
-                       extern int sysctl_optmem_max;
                         extern int sysctl_igmp_max_msf;
                         struct ip_msfilter *msf;
  
@@ -769,7 +768,6 @@ int ip_setsockopt(struct sock *sk, int level, int optname, char __user *optval,
                 }
                 case MCAST_MSFILTER:
                 {
-                       extern int sysctl_optmem_max;
                         extern int sysctl_igmp_max_msf;
                         struct sockaddr_in *psin;
                         struct ip_msfilter *msf = NULL;
@@ -848,6 +846,9 @@ mc_msf_out:
   
                 case IP_IPSEC_POLICY:
                 case IP_XFRM_POLICY:
+                       err = -EPERM;
+                       if (!capable(CAP_NET_ADMIN))
+                               break;
                         err = xfrm_user_policy(sk, optname, optval, optlen);
                         break;
  
@@ -1087,7 +1088,5 @@ int ip_getsockopt(struct sock *sk, int level, int optname, char __user *optval,
  
  EXPORT_SYMBOL(ip_cmsg_recv);
  
-#ifdef CONFIG_IP_SCTP_MODULE
  EXPORT_SYMBOL(ip_getsockopt);
  EXPORT_SYMBOL(ip_setsockopt);
-#endif
diff --git a/net/ipv4/ipcomp.c b/net/ipv4/ipcomp.c

index 2065944fd9e5117739ed1f2aeed2c2c59cf50151..dcb7ee6c4858c4395444b7cb83c9f29a73823091 100644 (file)
--- a/net/ipv4/ipcomp.c
+++ b/net/ipv4/ipcomp.c
@@ -214,8 +214,8 @@ static void ipcomp4_err(struct sk_buff *skb, u32 info)
                               spi, IPPROTO_COMP, AF_INET);
         if (!x)
                 return;
-       NETDEBUG(printk(KERN_DEBUG "pmtu discovery on SA IPCOMP/%08x/%u.%u.%u.%u\n",
-              spi, NIPQUAD(iph->daddr)));
+       NETDEBUG(KERN_DEBUG "pmtu discovery on SA IPCOMP/%08x/%u.%u.%u.%u\n",
+                spi, NIPQUAD(iph->daddr));
         xfrm_state_put(x);
  }
  
@@ -358,7 +358,7 @@ static struct crypto_tfm **ipcomp_alloc_tfms(const char *alg_name)
         int cpu;
  
         /* This can be any valid CPU ID so we don't need locking. */
-       cpu = smp_processor_id();
+       cpu = raw_smp_processor_id();
  
         list_for_each_entry(pos, &ipcomp_tfms_list, list) {
                 struct crypto_tfm *tfm;
diff --git a/net/ipv4/ipconfig.c b/net/ipv4/ipconfig.c

index d2bf8e1930a3709a41ce0154fabc50d0ff781b7d..63e106605f289a1503e63105f68ad8520e8bc36a 100644 (file)
--- a/net/ipv4/ipconfig.c
+++ b/net/ipv4/ipconfig.c
@@ -393,7 +393,7 @@ static int __init ic_defaults(void)
  
  #ifdef IPCONFIG_RARP
  
-static int ic_rarp_recv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt);
+static int ic_rarp_recv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt, struct net_device *orig_dev);
  
  static struct packet_type rarp_packet_type __initdata = {
         .type = __constant_htons(ETH_P_RARP),
@@ -414,7 +414,7 @@ static inline void ic_rarp_cleanup(void)
   *  Process received RARP packet.
   */
  static int __init
-ic_rarp_recv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt)
+ic_rarp_recv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt, struct net_device *orig_dev)
  {
         struct arphdr *rarp;
         unsigned char *rarp_ptr;
@@ -555,7 +555,7 @@ struct bootp_pkt {          /* BOOTP packet format */
  #define DHCPRELEASE    7
  #define DHCPINFORM     8
  
-static int ic_bootp_recv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt);
+static int ic_bootp_recv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt, struct net_device *orig_dev);
  
  static struct packet_type bootp_packet_type __initdata = {
         .type = __constant_htons(ETH_P_IP),
@@ -823,7 +823,7 @@ static void __init ic_do_bootp_ext(u8 *ext)
  /*
   *  Receive BOOTP reply.
   */
-static int __init ic_bootp_recv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt)
+static int __init ic_bootp_recv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt, struct net_device *orig_dev)
  {
         struct bootp_pkt *b;
         struct iphdr *h;
diff --git a/net/ipv4/ipip.c b/net/ipv4/ipip.c

index c3947cd566b7a6c0e421dfab418ac09976221d30..c05c1df0bb045e17f92b9cbe5dcad23ed8df2c6e 100644 (file)
--- a/net/ipv4/ipip.c
+++ b/net/ipv4/ipip.c
@@ -255,7 +255,6 @@ static struct ip_tunnel * ipip_tunnel_locate(struct ip_tunnel_parm *parms, int c
  
         dev_hold(dev);
         ipip_tunnel_link(nt);
-       /* Do not decrement MOD_USE_COUNT here. */
         return nt;
  
  failed:
@@ -920,12 +919,29 @@ static int __init ipip_init(void)
         goto out;
  }
  
+static void __exit ipip_destroy_tunnels(void)
+{
+       int prio;
+
+       for (prio = 1; prio < 4; prio++) {
+               int h;
+               for (h = 0; h < HASH_SIZE; h++) {
+                       struct ip_tunnel *t;
+                       while ((t = tunnels[prio][h]) != NULL)
+                               unregister_netdevice(t->dev);
+               }
+       }
+}
+
  static void __exit ipip_fini(void)
  {
         if (ipip_unregister() < 0)
                 printk(KERN_INFO "ipip close: can't deregister tunnel\n");
  
-       unregister_netdev(ipip_fb_tunnel_dev);
+       rtnl_lock();
+       ipip_destroy_tunnels();
+       unregister_netdevice(ipip_fb_tunnel_dev);
+       rtnl_unlock();
  }
  
  module_init(ipip_init);
diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c

index 7833d920bdba02fad46b1853374b11f01d3396b2..9dbf5909f3a6a190fcef1961e3737100a4b12e57 100644 (file)
--- a/net/ipv4/ipmr.c
+++ b/net/ipv4/ipmr.c
@@ -103,7 +103,7 @@ static DEFINE_SPINLOCK(mfc_unres_lock);
     In this case data path is free of exclusive locks at all.
   */
  
-static kmem_cache_t *mrt_cachep;
+static kmem_cache_t *mrt_cachep __read_mostly;
  
  static int ip_mr_forward(struct sk_buff *skb, struct mfc_cache *cache, int local);
  static int ipmr_cache_report(struct sk_buff *pkt, vifi_t vifi, int assert);
@@ -362,7 +362,7 @@ out:
  
  /* Fill oifs list. It is called under write locked mrt_lock. */
  
-static void ipmr_update_threshoulds(struct mfc_cache *cache, unsigned char *ttls)
+static void ipmr_update_thresholds(struct mfc_cache *cache, unsigned char *ttls)
  {
         int vifi;
  
@@ -727,7 +727,7 @@ static int ipmr_mfc_add(struct mfcctl *mfc, int mrtsock)
         if (c != NULL) {
                 write_lock_bh(&mrt_lock);
                 c->mfc_parent = mfc->mfcc_parent;
-               ipmr_update_threshoulds(c, mfc->mfcc_ttls);
+               ipmr_update_thresholds(c, mfc->mfcc_ttls);
                 if (!mrtsock)
                         c->mfc_flags |= MFC_STATIC;
                 write_unlock_bh(&mrt_lock);
@@ -744,7 +744,7 @@ static int ipmr_mfc_add(struct mfcctl *mfc, int mrtsock)
         c->mfc_origin=mfc->mfcc_origin.s_addr;
         c->mfc_mcastgrp=mfc->mfcc_mcastgrp.s_addr;
         c->mfc_parent=mfc->mfcc_parent;
-       ipmr_update_threshoulds(c, mfc->mfcc_ttls);
+       ipmr_update_thresholds(c, mfc->mfcc_ttls);
         if (!mrtsock)
                 c->mfc_flags |= MFC_STATIC;
  
diff --git a/net/ipv4/ipvs/ip_vs_app.c b/net/ipv4/ipvs/ip_vs_app.c

index d9212addd1933a8a47fe4ca28aaae15b05762bcc..6e092dadb3883964a33bdc657417331ebdbc2b25 100644 (file)
--- a/net/ipv4/ipvs/ip_vs_app.c
+++ b/net/ipv4/ipvs/ip_vs_app.c
@@ -26,6 +26,7 @@
  #include <linux/in.h>
  #include <linux/ip.h>
  #include <net/protocol.h>
+#include <net/tcp.h>
  #include <asm/system.h>
  #include <linux/stat.h>
  #include <linux/proc_fs.h>
diff --git a/net/ipv4/ipvs/ip_vs_conn.c b/net/ipv4/ipvs/ip_vs_conn.c

index d0145a8b1551765b2cb31049d256e2546f1d91d1..e11952ea17afd89cfba231fee4e27754fbd9c849 100644 (file)
--- a/net/ipv4/ipvs/ip_vs_conn.c
+++ b/net/ipv4/ipvs/ip_vs_conn.c
@@ -40,7 +40,7 @@
  static struct list_head *ip_vs_conn_tab;
  
  /*  SLAB cache for IPVS connections */
-static kmem_cache_t *ip_vs_conn_cachep;
+static kmem_cache_t *ip_vs_conn_cachep __read_mostly;
  
  /*  counter for current IPVS connections */
  static atomic_t ip_vs_conn_count = ATOMIC_INIT(0);
diff --git a/net/ipv4/ipvs/ip_vs_core.c b/net/ipv4/ipvs/ip_vs_core.c

index 5fb257dd07cb38187e0b01158f4a75e4d4d3cd8a..3ac7eeca04ac4ca4da81bdc1f6e75cce70316b79 100644 (file)
--- a/net/ipv4/ipvs/ip_vs_core.c
+++ b/net/ipv4/ipvs/ip_vs_core.c
@@ -22,6 +22,7 @@
   *
   * Changes:
   *     Paul `Rusty' Russell            properly handle non-linear skbs
+ *     Harald Welte                    don't use nfcache
   *
   */
  
@@ -529,7 +530,7 @@ static unsigned int ip_vs_post_routing(unsigned int hooknum,
                                        const struct net_device *out,
                                        int (*okfn)(struct sk_buff *))
  {
-       if (!((*pskb)->nfcache & NFC_IPVS_PROPERTY))
+       if (!((*pskb)->ipvs_property))
                 return NF_ACCEPT;
  
         /* The packet was sent from IPVS, exit this chain */
@@ -701,7 +702,7 @@ static int ip_vs_out_icmp(struct sk_buff **pskb, int *related)
         /* do the statistics and put it back */
         ip_vs_out_stats(cp, skb);
  
-       skb->nfcache |= NFC_IPVS_PROPERTY;
+       skb->ipvs_property = 1;
         verdict = NF_ACCEPT;
  
    out:
@@ -739,7 +740,7 @@ ip_vs_out(unsigned int hooknum, struct sk_buff **pskb,
  
         EnterFunction(11);
  
-       if (skb->nfcache & NFC_IPVS_PROPERTY)
+       if (skb->ipvs_property)
                 return NF_ACCEPT;
  
         iph = skb->nh.iph;
@@ -821,7 +822,7 @@ ip_vs_out(unsigned int hooknum, struct sk_buff **pskb,
         ip_vs_set_state(cp, IP_VS_DIR_OUTPUT, skb, pp);
         ip_vs_conn_put(cp);
  
-       skb->nfcache |= NFC_IPVS_PROPERTY;
+       skb->ipvs_property = 1;
  
         LeaveFunction(11);
         return NF_ACCEPT;
diff --git a/net/ipv4/ipvs/ip_vs_ctl.c b/net/ipv4/ipvs/ip_vs_ctl.c

index 7d99ede2ef79316f4842e6b2b2eb7465cbcd0bcb..2d66848e7aa06bb75df50aad715d6c45837535e7 100644 (file)
--- a/net/ipv4/ipvs/ip_vs_ctl.c
+++ b/net/ipv4/ipvs/ip_vs_ctl.c
@@ -1598,7 +1598,7 @@ static ctl_table vs_table[] = {
         { .ctl_name = 0 }
  };
  
-static ctl_table ipv4_table[] = {
+static ctl_table ipvs_ipv4_table[] = {
         {
                 .ctl_name       = NET_IPV4,
                 .procname       = "ipv4",
@@ -1613,7 +1613,7 @@ static ctl_table vs_root_table[] = {
                 .ctl_name       = CTL_NET,
                 .procname       = "net",
                 .mode           = 0555,
-               .child          = ipv4_table,
+               .child          = ipvs_ipv4_table,
         },
         { .ctl_name = 0 }
  };
diff --git a/net/ipv4/ipvs/ip_vs_lblc.c b/net/ipv4/ipvs/ip_vs_lblc.c

index c035838b780a0fa56b5352d83e71e9ed7d8e3f38..561cda326fa8092cf2b27da87cac78d68cded80e 100644 (file)
--- a/net/ipv4/ipvs/ip_vs_lblc.c
+++ b/net/ipv4/ipvs/ip_vs_lblc.c
@@ -131,7 +131,7 @@ static ctl_table vs_table[] = {
         { .ctl_name = 0 }
  };
  
-static ctl_table ipv4_table[] = {
+static ctl_table ipvs_ipv4_table[] = {
         {
                 .ctl_name       = NET_IPV4,
                 .procname       = "ipv4", 
@@ -146,7 +146,7 @@ static ctl_table lblc_root_table[] = {
                 .ctl_name       = CTL_NET,
                 .procname       = "net", 
                 .mode           = 0555, 
-               .child          = ipv4_table
+               .child          = ipvs_ipv4_table
         },
         { .ctl_name = 0 }
  };
diff --git a/net/ipv4/ipvs/ip_vs_lblcr.c b/net/ipv4/ipvs/ip_vs_lblcr.c

index 22b5dd55d2710a7b697e2ce538d68b9429bc2f45..ce456dbf09a54967c4edde5d648ffd3661e400a1 100644 (file)
--- a/net/ipv4/ipvs/ip_vs_lblcr.c
+++ b/net/ipv4/ipvs/ip_vs_lblcr.c
@@ -320,7 +320,7 @@ static ctl_table vs_table[] = {
         { .ctl_name = 0 }
  };
  
-static ctl_table ipv4_table[] = {
+static ctl_table ipvs_ipv4_table[] = {
         {
                 .ctl_name       = NET_IPV4,
                 .procname       = "ipv4", 
@@ -335,7 +335,7 @@ static ctl_table lblcr_root_table[] = {
                 .ctl_name       = CTL_NET,
                 .procname       = "net", 
                 .mode           = 0555, 
-               .child          = ipv4_table
+               .child          = ipvs_ipv4_table
         },
         { .ctl_name = 0 }
  };
diff --git a/net/ipv4/ipvs/ip_vs_proto_tcp.c b/net/ipv4/ipvs/ip_vs_proto_tcp.c

index e65de675da74b57e1ab01332a8daa438ea8190a7..c19408973c091d12bb5d4e850c37af7aa3b64e37 100644 (file)
--- a/net/ipv4/ipvs/ip_vs_proto_tcp.c
+++ b/net/ipv4/ipvs/ip_vs_proto_tcp.c
@@ -604,14 +604,14 @@ void ip_vs_tcp_conn_listen(struct ip_vs_conn *cp)
  }
  
  
-static void tcp_init(struct ip_vs_protocol *pp)
+static void ip_vs_tcp_init(struct ip_vs_protocol *pp)
  {
         IP_VS_INIT_HASH_TABLE(tcp_apps);
         pp->timeout_table = tcp_timeouts;
  }
  
  
-static void tcp_exit(struct ip_vs_protocol *pp)
+static void ip_vs_tcp_exit(struct ip_vs_protocol *pp)
  {
  }
  
@@ -621,8 +621,8 @@ struct ip_vs_protocol ip_vs_protocol_tcp = {
         .protocol =             IPPROTO_TCP,
         .dont_defrag =          0,
         .appcnt =               ATOMIC_INIT(0),
-       .init =                 tcp_init,
-       .exit =                 tcp_exit,
+       .init =                 ip_vs_tcp_init,
+       .exit =                 ip_vs_tcp_exit,
         .register_app =         tcp_register_app,
         .unregister_app =       tcp_unregister_app,
         .conn_schedule =        tcp_conn_schedule,
diff --git a/net/ipv4/ipvs/ip_vs_xmit.c b/net/ipv4/ipvs/ip_vs_xmit.c

index a8512a3fd08a02503aae30187f07f6b81ff9732c..3b87482049cf395804431a35c065ff6722bcc4ab 100644 (file)
--- a/net/ipv4/ipvs/ip_vs_xmit.c
+++ b/net/ipv4/ipvs/ip_vs_xmit.c
@@ -127,7 +127,7 @@ ip_vs_dst_reset(struct ip_vs_dest *dest)
  
  #define IP_VS_XMIT(skb, rt)                            \
  do {                                                   \
-       (skb)->nfcache |= NFC_IPVS_PROPERTY;            \
+       (skb)->ipvs_property = 1;                       \
         (skb)->ip_summed = CHECKSUM_NONE;               \
         NF_HOOK(PF_INET, NF_IP_LOCAL_OUT, (skb), NULL,  \
                 (rt)->u.dst.dev, dst_output);           \
diff --git a/net/ipv4/multipath_drr.c b/net/ipv4/multipath_drr.c

index c9cf8726051d7cae07a3d1278b0bea36b1c2eee5..db67373f9b348669de5045ba39c558105c7b1219 100644 (file)
--- a/net/ipv4/multipath_drr.c
+++ b/net/ipv4/multipath_drr.c
@@ -107,7 +107,7 @@ static int drr_dev_event(struct notifier_block *this,
         return NOTIFY_DONE;
  }
  
-struct notifier_block drr_dev_notifier = {
+static struct notifier_block drr_dev_notifier = {
         .notifier_call  = drr_dev_event,
  };
  
diff --git a/net/ipv4/netfilter.c b/net/ipv4/netfilter.c

new file mode 100644 (file)

index 0000000..ae0779d
--- /dev/null
+++ b/net/ipv4/netfilter.c
@@ -0,0 +1,139 @@
+/* IPv4 specific functions of netfilter core */
+
+#include <linux/config.h>
+#ifdef CONFIG_NETFILTER
+
+#include <linux/kernel.h>
+#include <linux/netfilter.h>
+#include <linux/netfilter_ipv4.h>
+
+#include <linux/tcp.h>
+#include <linux/udp.h>
+#include <linux/icmp.h>
+#include <net/route.h>
+#include <linux/ip.h>
+
+/* route_me_harder function, used by iptable_nat, iptable_mangle + ip_queue */
+int ip_route_me_harder(struct sk_buff **pskb)
+{
+       struct iphdr *iph = (*pskb)->nh.iph;
+       struct rtable *rt;
+       struct flowi fl = {};
+       struct dst_entry *odst;
+       unsigned int hh_len;
+
+       /* some non-standard hacks like ipt_REJECT.c:send_reset() can cause
+        * packets with foreign saddr to appear on the NF_IP_LOCAL_OUT hook.
+        */
+       if (inet_addr_type(iph->saddr) == RTN_LOCAL) {
+               fl.nl_u.ip4_u.daddr = iph->daddr;
+               fl.nl_u.ip4_u.saddr = iph->saddr;
+               fl.nl_u.ip4_u.tos = RT_TOS(iph->tos);
+               fl.oif = (*pskb)->sk ? (*pskb)->sk->sk_bound_dev_if : 0;
+#ifdef CONFIG_IP_ROUTE_FWMARK
+               fl.nl_u.ip4_u.fwmark = (*pskb)->nfmark;
+#endif
+               fl.proto = iph->protocol;
+               if (ip_route_output_key(&rt, &fl) != 0)
+                       return -1;
+
+               /* Drop old route. */
+               dst_release((*pskb)->dst);
+               (*pskb)->dst = &rt->u.dst;
+       } else {
+               /* non-local src, find valid iif to satisfy
+                * rp-filter when calling ip_route_input. */
+               fl.nl_u.ip4_u.daddr = iph->saddr;
+               if (ip_route_output_key(&rt, &fl) != 0)
+                       return -1;
+
+               odst = (*pskb)->dst;
+               if (ip_route_input(*pskb, iph->daddr, iph->saddr,
+                                  RT_TOS(iph->tos), rt->u.dst.dev) != 0) {
+                       dst_release(&rt->u.dst);
+                       return -1;
+               }
+               dst_release(&rt->u.dst);
+               dst_release(odst);
+       }
+       
+       if ((*pskb)->dst->error)
+               return -1;
+
+       /* Change in oif may mean change in hh_len. */
+       hh_len = (*pskb)->dst->dev->hard_header_len;
+       if (skb_headroom(*pskb) < hh_len) {
+               struct sk_buff *nskb;
+
+               nskb = skb_realloc_headroom(*pskb, hh_len);
+               if (!nskb) 
+                       return -1;
+               if ((*pskb)->sk)
+                       skb_set_owner_w(nskb, (*pskb)->sk);
+               kfree_skb(*pskb);
+               *pskb = nskb;
+       }
+
+       return 0;
+}
+EXPORT_SYMBOL(ip_route_me_harder);
+
+/*
+ * Extra routing may needed on local out, as the QUEUE target never
+ * returns control to the table.
+ */
+
+struct ip_rt_info {
+       u_int32_t daddr;
+       u_int32_t saddr;
+       u_int8_t tos;
+};
+
+static void queue_save(const struct sk_buff *skb, struct nf_info *info)
+{
+       struct ip_rt_info *rt_info = nf_info_reroute(info);
+
+       if (info->hook == NF_IP_LOCAL_OUT) {
+               const struct iphdr *iph = skb->nh.iph;
+
+               rt_info->tos = iph->tos;
+               rt_info->daddr = iph->daddr;
+               rt_info->saddr = iph->saddr;
+       }
+}
+
+static int queue_reroute(struct sk_buff **pskb, const struct nf_info *info)
+{
+       const struct ip_rt_info *rt_info = nf_info_reroute(info);
+
+       if (info->hook == NF_IP_LOCAL_OUT) {
+               struct iphdr *iph = (*pskb)->nh.iph;
+
+               if (!(iph->tos == rt_info->tos
+                     && iph->daddr == rt_info->daddr
+                     && iph->saddr == rt_info->saddr))
+                       return ip_route_me_harder(pskb);
+       }
+       return 0;
+}
+
+static struct nf_queue_rerouter ip_reroute = {
+       .rer_size       = sizeof(struct ip_rt_info),
+       .save           = queue_save,
+       .reroute        = queue_reroute,
+};
+
+static int init(void)
+{
+       return nf_register_queue_rerouter(PF_INET, &ip_reroute);
+}
+
+static void fini(void)
+{
+       nf_unregister_queue_rerouter(PF_INET);
+}
+
+module_init(init);
+module_exit(fini);
+
+#endif /* CONFIG_NETFILTER */
diff --git a/net/ipv4/netfilter/Kconfig b/net/ipv4/netfilter/Kconfig

index 46d4cb1c06f053b3d3e831d2649202d4282de513..e046f55218142d091ba71da87e6af0747bad7277 100644 (file)
--- a/net/ipv4/netfilter/Kconfig
+++ b/net/ipv4/netfilter/Kconfig
@@ -40,6 +40,16 @@ config IP_NF_CONNTRACK_MARK
           of packets, but this mark value is kept in the conntrack session
           instead of the individual packets.
         
+config IP_NF_CONNTRACK_EVENTS
+       bool "Connection tracking events"
+       depends on IP_NF_CONNTRACK
+       help
+         If this option is enabled, the connection tracking code will
+         provide a notifier chain that can be used by other kernel code
+         to get notified about changes in the connection tracking state.
+         
+         IF unsure, say `N'.
+
  config IP_NF_CT_PROTO_SCTP
         tristate  'SCTP protocol connection tracking support (EXPERIMENTAL)'
         depends on IP_NF_CONNTRACK && EXPERIMENTAL
@@ -100,11 +110,15 @@ config IP_NF_AMANDA
           To compile it as a module, choose M here.  If unsure, say Y.
  
  config IP_NF_QUEUE
-       tristate "Userspace queueing via NETLINK"
+       tristate "IP Userspace queueing via NETLINK (OBSOLETE)"
         help
           Netfilter has the ability to queue packets to user space: the
           netlink device can be used to access them using this driver.
  
+         This option enables the old IPv4-only "ip_queue" implementation
+         which has been obsoleted by the new "nfnetlink_queue" code (see
+         CONFIG_NETFILTER_NETLINK_QUEUE).
+
           To compile it as a module, choose M here.  If unsure, say N.
  
  config IP_NF_IPTABLES
@@ -340,6 +354,17 @@ config IP_NF_MATCH_SCTP
           If you want to compile it as a module, say M here and read
           <file:Documentation/modules.txt>.  If unsure, say `N'.
  
+config IP_NF_MATCH_DCCP
+       tristate  'DCCP protocol match support'
+       depends on IP_NF_IPTABLES
+       help
+         With this option enabled, you will be able to use the iptables
+         `dccp' match in order to match on DCCP source/destination ports
+         and DCCP flags.
+
+         If you want to compile it as a module, say M here and read
+         <file:Documentation/modules.txt>.  If unsure, say `N'.
+
  config IP_NF_MATCH_COMMENT
         tristate  'comment match support'
         depends on IP_NF_IPTABLES
@@ -361,6 +386,16 @@ config IP_NF_MATCH_CONNMARK
           <file:Documentation/modules.txt>.  The module will be called
           ipt_connmark.o.  If unsure, say `N'.
  
+config IP_NF_MATCH_CONNBYTES
+       tristate  'Connection byte/packet counter match support'
+       depends on IP_NF_CT_ACCT && IP_NF_IPTABLES
+       help
+         This option adds a `connbytes' match, which allows you to match the
+         number of bytes and/or packets for each direction within a connection.
+
+         If you want to compile it as a module, say M here and read
+         <file:Documentation/modules.txt>.  If unsure, say `N'.
+
  config IP_NF_MATCH_HASHLIMIT
         tristate  'hashlimit match support'
         depends on IP_NF_IPTABLES
@@ -375,6 +410,19 @@ config IP_NF_MATCH_HASHLIMIT
           destination IP' or `500pps from any given source IP'  with a single
           IPtables rule.
  
+config IP_NF_MATCH_STRING
+       tristate  'string match support'
+       depends on IP_NF_IPTABLES 
+       select TEXTSEARCH
+       select TEXTSEARCH_KMP
+       select TEXTSEARCH_BM
+       select TEXTSEARCH_FSM
+       help
+         This option adds a `string' match, which allows you to look for
+         pattern matchings in packets.
+
+         To compile it as a module, choose M here.  If unsure, say N.
+
  # `filter', generic and specific targets
  config IP_NF_FILTER
         tristate "Packet filtering"
@@ -616,6 +664,20 @@ config IP_NF_TARGET_CLASSIFY
  
           To compile it as a module, choose M here.  If unsure, say N.
  
+config IP_NF_TARGET_TTL
+       tristate  'TTL target support'
+       depends on IP_NF_MANGLE
+       help
+         This option adds a `TTL' target, which enables the user to modify
+         the TTL value of the IP header.
+
+         While it is safe to decrement/lower the TTL, this target also enables
+         functionality to increment and set the TTL value of the IP header to
+         arbitrary values.  This is EXTREMELY DANGEROUS since you can easily
+         create immortal packets that loop forever on the network.
+
+         To compile it as a module, choose M here.  If unsure, say N.
+
  config IP_NF_TARGET_CONNMARK
         tristate  'CONNMARK target support'
         depends on IP_NF_CONNTRACK_MARK && IP_NF_MANGLE
@@ -692,5 +754,11 @@ config IP_NF_ARP_MANGLE
           Allows altering the ARP packet payload: source and destination
           hardware and network addresses.
  
+config IP_NF_CONNTRACK_NETLINK
+        tristate 'Connection tracking netlink interface'
+        depends on IP_NF_CONNTRACK && NETFILTER_NETLINK
+        help
+          This option enables support for a netlink-based userspace interface
+
  endmenu
  
diff --git a/net/ipv4/netfilter/Makefile b/net/ipv4/netfilter/Makefile

index 45796d5924dd9c23c791ea246cffb4395c442423..a7bd38f5052202c8230e572f27080b39c40db416 100644 (file)
--- a/net/ipv4/netfilter/Makefile
+++ b/net/ipv4/netfilter/Makefile
@@ -9,6 +9,10 @@ iptable_nat-objs       := ip_nat_standalone.o ip_nat_rule.o ip_nat_core.o ip_nat_helpe
  # connection tracking
  obj-$(CONFIG_IP_NF_CONNTRACK) += ip_conntrack.o
  
+# conntrack netlink interface
+obj-$(CONFIG_IP_NF_CONNTRACK_NETLINK) += ip_conntrack_netlink.o
+
+
  # SCTP protocol connection tracking
  obj-$(CONFIG_IP_NF_CT_PROTO_SCTP) += ip_conntrack_proto_sctp.o
  
@@ -38,6 +42,7 @@ obj-$(CONFIG_IP_NF_MATCH_HELPER) += ipt_helper.o
  obj-$(CONFIG_IP_NF_MATCH_LIMIT) += ipt_limit.o
  obj-$(CONFIG_IP_NF_MATCH_HASHLIMIT) += ipt_hashlimit.o
  obj-$(CONFIG_IP_NF_MATCH_SCTP) += ipt_sctp.o
+obj-$(CONFIG_IP_NF_MATCH_DCCP) += ipt_dccp.o
  obj-$(CONFIG_IP_NF_MATCH_MARK) += ipt_mark.o
  obj-$(CONFIG_IP_NF_MATCH_MAC) += ipt_mac.o
  obj-$(CONFIG_IP_NF_MATCH_IPRANGE) += ipt_iprange.o
@@ -54,11 +59,13 @@ obj-$(CONFIG_IP_NF_MATCH_TTL) += ipt_ttl.o
  obj-$(CONFIG_IP_NF_MATCH_STATE) += ipt_state.o
  obj-$(CONFIG_IP_NF_MATCH_CONNMARK) += ipt_connmark.o
  obj-$(CONFIG_IP_NF_MATCH_CONNTRACK) += ipt_conntrack.o
+obj-$(CONFIG_IP_NF_MATCH_CONNBYTES) += ipt_connbytes.o
  obj-$(CONFIG_IP_NF_MATCH_TCPMSS) += ipt_tcpmss.o
  obj-$(CONFIG_IP_NF_MATCH_REALM) += ipt_realm.o
  obj-$(CONFIG_IP_NF_MATCH_ADDRTYPE) += ipt_addrtype.o
  obj-$(CONFIG_IP_NF_MATCH_PHYSDEV) += ipt_physdev.o
  obj-$(CONFIG_IP_NF_MATCH_COMMENT) += ipt_comment.o
+obj-$(CONFIG_IP_NF_MATCH_STRING) += ipt_string.o
  
  # targets
  obj-$(CONFIG_IP_NF_TARGET_REJECT) += ipt_REJECT.o
@@ -78,6 +85,7 @@ obj-$(CONFIG_IP_NF_TARGET_ULOG) += ipt_ULOG.o
  obj-$(CONFIG_IP_NF_TARGET_TCPMSS) += ipt_TCPMSS.o
  obj-$(CONFIG_IP_NF_TARGET_NOTRACK) += ipt_NOTRACK.o
  obj-$(CONFIG_IP_NF_TARGET_CLUSTERIP) += ipt_CLUSTERIP.o
+obj-$(CONFIG_IP_NF_TARGET_TTL) += ipt_TTL.o
  
  # generic ARP tables
  obj-$(CONFIG_IP_NF_ARPTABLES) += arp_tables.o
@@ -87,3 +95,4 @@ obj-$(CONFIG_IP_NF_ARP_MANGLE) += arpt_mangle.o
  obj-$(CONFIG_IP_NF_ARPFILTER) += arptable_filter.o
  
  obj-$(CONFIG_IP_NF_QUEUE) += ip_queue.o
+obj-$(CONFIG_NETFILTER_NETLINK_QUEUE) += ipt_NFQUEUE.o
diff --git a/net/ipv4/netfilter/ip_conntrack_amanda.c b/net/ipv4/netfilter/ip_conntrack_amanda.c

index 01e1b58322a9b07cba23e4d2b678debf873c42d1..be4c9eb3243f94698f35de5af6d11aad219137a7 100644 (file)
--- a/net/ipv4/netfilter/ip_conntrack_amanda.c
+++ b/net/ipv4/netfilter/ip_conntrack_amanda.c
@@ -40,7 +40,7 @@ MODULE_PARM_DESC(master_timeout, "timeout for the master connection");
  static char *conns[] = { "DATA ", "MESG ", "INDEX " };
  
  /* This is slow, but it's simple. --RR */
-static char amanda_buffer[65536];
+static char *amanda_buffer;
  static DEFINE_SPINLOCK(amanda_buffer_lock);
  
  unsigned int (*ip_nat_amanda_hook)(struct sk_buff **pskb,
@@ -153,11 +153,25 @@ static struct ip_conntrack_helper amanda_helper = {
  static void __exit fini(void)
  {
         ip_conntrack_helper_unregister(&amanda_helper);
+       kfree(amanda_buffer);
  }
  
  static int __init init(void)
  {
-       return ip_conntrack_helper_register(&amanda_helper);
+       int ret;
+
+       amanda_buffer = kmalloc(65536, GFP_KERNEL);
+       if (!amanda_buffer)
+               return -ENOMEM;
+
+       ret = ip_conntrack_helper_register(&amanda_helper);
+       if (ret < 0) {
+               kfree(amanda_buffer);
+               return ret;
+       }
+       return 0;
+
+
  }
  
  module_init(init);
diff --git a/net/ipv4/netfilter/ip_conntrack_core.c b/net/ipv4/netfilter/ip_conntrack_core.c

index 86f04e41dd8efb13bbca229cd83fe5f63aa87104..a0648600190e1b16827e0f363ea138b759f9f656 100644 (file)
--- a/net/ipv4/netfilter/ip_conntrack_core.c
+++ b/net/ipv4/netfilter/ip_conntrack_core.c
@@ -37,6 +37,7 @@
  #include <linux/err.h>
  #include <linux/percpu.h>
  #include <linux/moduleparam.h>
+#include <linux/notifier.h>
  
  /* ip_conntrack_lock protects the main hash table, protocol/helper/expected
     registrations, conntrack timers*/
@@ -49,7 +50,7 @@
  #include <linux/netfilter_ipv4/ip_conntrack_core.h>
  #include <linux/netfilter_ipv4/listhelp.h>
  
-#define IP_CONNTRACK_VERSION   "2.1"
+#define IP_CONNTRACK_VERSION   "2.3"
  
  #if 0
  #define DEBUGP printk
@@ -69,22 +70,81 @@ static LIST_HEAD(helpers);
  unsigned int ip_conntrack_htable_size = 0;
  int ip_conntrack_max;
  struct list_head *ip_conntrack_hash;
-static kmem_cache_t *ip_conntrack_cachep;
-static kmem_cache_t *ip_conntrack_expect_cachep;
+static kmem_cache_t *ip_conntrack_cachep __read_mostly;
+static kmem_cache_t *ip_conntrack_expect_cachep __read_mostly;
  struct ip_conntrack ip_conntrack_untracked;
  unsigned int ip_ct_log_invalid;
  static LIST_HEAD(unconfirmed);
  static int ip_conntrack_vmalloc;
  
-DEFINE_PER_CPU(struct ip_conntrack_stat, ip_conntrack_stat);
+static unsigned int ip_conntrack_next_id = 1;
+static unsigned int ip_conntrack_expect_next_id = 1;
+#ifdef CONFIG_IP_NF_CONNTRACK_EVENTS
+struct notifier_block *ip_conntrack_chain;
+struct notifier_block *ip_conntrack_expect_chain;
+
+DEFINE_PER_CPU(struct ip_conntrack_ecache, ip_conntrack_ecache);
  
-void 
-ip_conntrack_put(struct ip_conntrack *ct)
+/* deliver cached events and clear cache entry - must be called with locally
+ * disabled softirqs */
+static inline void
+__ip_ct_deliver_cached_events(struct ip_conntrack_ecache *ecache)
  {
-       IP_NF_ASSERT(ct);
-       nf_conntrack_put(&ct->ct_general);
+       DEBUGP("ecache: delivering events for %p\n", ecache->ct);
+       if (is_confirmed(ecache->ct) && !is_dying(ecache->ct) && ecache->events)
+               notifier_call_chain(&ip_conntrack_chain, ecache->events,
+                                   ecache->ct);
+       ecache->events = 0;
+       ip_conntrack_put(ecache->ct);
+       ecache->ct = NULL;
  }
  
+/* Deliver all cached events for a particular conntrack. This is called
+ * by code prior to async packet handling or freeing the skb */
+void ip_ct_deliver_cached_events(const struct ip_conntrack *ct)
+{
+       struct ip_conntrack_ecache *ecache;
+       
+       local_bh_disable();
+       ecache = &__get_cpu_var(ip_conntrack_ecache);
+       if (ecache->ct == ct)
+               __ip_ct_deliver_cached_events(ecache);
+       local_bh_enable();
+}
+
+void __ip_ct_event_cache_init(struct ip_conntrack *ct)
+{
+       struct ip_conntrack_ecache *ecache;
+
+       /* take care of delivering potentially old events */
+       ecache = &__get_cpu_var(ip_conntrack_ecache);
+       BUG_ON(ecache->ct == ct);
+       if (ecache->ct)
+               __ip_ct_deliver_cached_events(ecache);
+       /* initialize for this conntrack/packet */
+       ecache->ct = ct;
+       nf_conntrack_get(&ct->ct_general);
+}
+
+/* flush the event cache - touches other CPU's data and must not be called while
+ * packets are still passing through the code */
+static void ip_ct_event_cache_flush(void)
+{
+       struct ip_conntrack_ecache *ecache;
+       int cpu;
+
+       for_each_cpu(cpu) {
+               ecache = &per_cpu(ip_conntrack_ecache, cpu);
+               if (ecache->ct)
+                       ip_conntrack_put(ecache->ct);
+       }
+}
+#else
+static inline void ip_ct_event_cache_flush(void) {}
+#endif /* CONFIG_IP_NF_CONNTRACK_EVENTS */
+
+DEFINE_PER_CPU(struct ip_conntrack_stat, ip_conntrack_stat);
+
  static int ip_conntrack_hash_rnd_initted;
  static unsigned int ip_conntrack_hash_rnd;
  
@@ -144,6 +204,13 @@ static void unlink_expect(struct ip_conntrack_expect *exp)
         list_del(&exp->list);
         CONNTRACK_STAT_INC(expect_delete);
         exp->master->expecting--;
+       ip_conntrack_expect_put(exp);
+}
+
+void __ip_ct_expect_unlink_destroy(struct ip_conntrack_expect *exp)
+{
+       unlink_expect(exp);
+       ip_conntrack_expect_put(exp);
  }
  
  static void expectation_timed_out(unsigned long ul_expect)
@@ -156,6 +223,33 @@ static void expectation_timed_out(unsigned long ul_expect)
         ip_conntrack_expect_put(exp);
  }
  
+struct ip_conntrack_expect *
+__ip_conntrack_expect_find(const struct ip_conntrack_tuple *tuple)
+{
+       struct ip_conntrack_expect *i;
+       
+       list_for_each_entry(i, &ip_conntrack_expect_list, list) {
+               if (ip_ct_tuple_mask_cmp(tuple, &i->tuple, &i->mask)) {
+                       atomic_inc(&i->use);
+                       return i;
+               }
+       }
+       return NULL;
+}
+
+/* Just find a expectation corresponding to a tuple. */
+struct ip_conntrack_expect *
+ip_conntrack_expect_find_get(const struct ip_conntrack_tuple *tuple)
+{
+       struct ip_conntrack_expect *i;
+       
+       read_lock_bh(&ip_conntrack_lock);
+       i = __ip_conntrack_expect_find(tuple);
+       read_unlock_bh(&ip_conntrack_lock);
+
+       return i;
+}
+
  /* If an expectation for this connection is found, it gets delete from
   * global list then returned. */
  static struct ip_conntrack_expect *
@@ -180,7 +274,7 @@ find_expectation(const struct ip_conntrack_tuple *tuple)
  }
  
  /* delete all expectations for this conntrack */
-static void remove_expectations(struct ip_conntrack *ct)
+void ip_ct_remove_expectations(struct ip_conntrack *ct)
  {
         struct ip_conntrack_expect *i, *tmp;
  
@@ -210,7 +304,7 @@ clean_from_lists(struct ip_conntrack *ct)
         LIST_DELETE(&ip_conntrack_hash[hr], &ct->tuplehash[IP_CT_DIR_REPLY]);
  
         /* Destroy all pending expectations */
-       remove_expectations(ct);
+       ip_ct_remove_expectations(ct);
  }
  
  static void
@@ -223,10 +317,13 @@ destroy_conntrack(struct nf_conntrack *nfct)
         IP_NF_ASSERT(atomic_read(&nfct->use) == 0);
         IP_NF_ASSERT(!timer_pending(&ct->timeout));
  
+       ip_conntrack_event(IPCT_DESTROY, ct);
+       set_bit(IPS_DYING_BIT, &ct->status);
+
         /* To make sure we don't get any weird locking issues here:
          * destroy_conntrack() MUST NOT be called with a write lock
          * to ip_conntrack_lock!!! -HW */
-       proto = ip_ct_find_proto(ct->tuplehash[IP_CT_DIR_REPLY].tuple.dst.protonum);
+       proto = __ip_conntrack_proto_find(ct->tuplehash[IP_CT_DIR_REPLY].tuple.dst.protonum);
         if (proto && proto->destroy)
                 proto->destroy(ct);
  
@@ -238,7 +335,7 @@ destroy_conntrack(struct nf_conntrack *nfct)
          * except TFTP can create an expectation on the first packet,
          * before connection is in the list, so we need to clean here,
          * too. */
-       remove_expectations(ct);
+       ip_ct_remove_expectations(ct);
  
         /* We overload first tuple to link into unconfirmed list. */
         if (!is_confirmed(ct)) {
@@ -253,8 +350,7 @@ destroy_conntrack(struct nf_conntrack *nfct)
                 ip_conntrack_put(ct->master);
  
         DEBUGP("destroy_conntrack: returning ct=%p to slab\n", ct);
-       kmem_cache_free(ip_conntrack_cachep, ct);
-       atomic_dec(&ip_conntrack_count);
+       ip_conntrack_free(ct);
  }
  
  static void death_by_timeout(unsigned long ul_conntrack)
@@ -280,7 +376,7 @@ conntrack_tuple_cmp(const struct ip_conntrack_tuple_hash *i,
                 && ip_ct_tuple_equal(tuple, &i->tuple);
  }
  
-static struct ip_conntrack_tuple_hash *
+struct ip_conntrack_tuple_hash *
  __ip_conntrack_find(const struct ip_conntrack_tuple *tuple,
                     const struct ip_conntrack *ignored_conntrack)
  {
@@ -315,6 +411,29 @@ ip_conntrack_find_get(const struct ip_conntrack_tuple *tuple,
         return h;
  }
  
+static void __ip_conntrack_hash_insert(struct ip_conntrack *ct,
+                                       unsigned int hash,
+                                       unsigned int repl_hash) 
+{
+       ct->id = ++ip_conntrack_next_id;
+       list_prepend(&ip_conntrack_hash[hash],
+                    &ct->tuplehash[IP_CT_DIR_ORIGINAL].list);
+       list_prepend(&ip_conntrack_hash[repl_hash],
+                    &ct->tuplehash[IP_CT_DIR_REPLY].list);
+}
+
+void ip_conntrack_hash_insert(struct ip_conntrack *ct)
+{
+       unsigned int hash, repl_hash;
+
+       hash = hash_conntrack(&ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple);
+       repl_hash = hash_conntrack(&ct->tuplehash[IP_CT_DIR_REPLY].tuple);
+
+       write_lock_bh(&ip_conntrack_lock);
+       __ip_conntrack_hash_insert(ct, hash, repl_hash);
+       write_unlock_bh(&ip_conntrack_lock);
+}
+
  /* Confirm a connection given skb; places it in hash table */
  int
  __ip_conntrack_confirm(struct sk_buff **pskb)
@@ -361,10 +480,7 @@ __ip_conntrack_confirm(struct sk_buff **pskb)
                 /* Remove from unconfirmed list */
                 list_del(&ct->tuplehash[IP_CT_DIR_ORIGINAL].list);
  
-               list_prepend(&ip_conntrack_hash[hash],
-                            &ct->tuplehash[IP_CT_DIR_ORIGINAL]);
-               list_prepend(&ip_conntrack_hash[repl_hash],
-                            &ct->tuplehash[IP_CT_DIR_REPLY]);
+               __ip_conntrack_hash_insert(ct, hash, repl_hash);
                 /* Timer relative to confirmation time, not original
                    setting time, otherwise we'd get timer wrap in
                    weird delay cases. */
@@ -374,6 +490,16 @@ __ip_conntrack_confirm(struct sk_buff **pskb)
                 set_bit(IPS_CONFIRMED_BIT, &ct->status);
                 CONNTRACK_STAT_INC(insert);
                 write_unlock_bh(&ip_conntrack_lock);
+               if (ct->helper)
+                       ip_conntrack_event_cache(IPCT_HELPER, *pskb);
+#ifdef CONFIG_IP_NF_NAT_NEEDED
+               if (test_bit(IPS_SRC_NAT_DONE_BIT, &ct->status) ||
+                   test_bit(IPS_DST_NAT_DONE_BIT, &ct->status))
+                       ip_conntrack_event_cache(IPCT_NATINFO, *pskb);
+#endif
+               ip_conntrack_event_cache(master_ct(ct) ?
+                                        IPCT_RELATED : IPCT_NEW, *pskb);
+
                 return NF_ACCEPT;
         }
  
@@ -438,34 +564,84 @@ static inline int helper_cmp(const struct ip_conntrack_helper *i,
         return ip_ct_tuple_mask_cmp(rtuple, &i->tuple, &i->mask);
  }
  
-static struct ip_conntrack_helper *ip_ct_find_helper(const struct ip_conntrack_tuple *tuple)
+static struct ip_conntrack_helper *
+__ip_conntrack_helper_find( const struct ip_conntrack_tuple *tuple)
  {
         return LIST_FIND(&helpers, helper_cmp,
                          struct ip_conntrack_helper *,
                          tuple);
  }
  
-/* Allocate a new conntrack: we return -ENOMEM if classification
-   failed due to stress.  Otherwise it really is unclassifiable. */
-static struct ip_conntrack_tuple_hash *
-init_conntrack(const struct ip_conntrack_tuple *tuple,
-              struct ip_conntrack_protocol *protocol,
-              struct sk_buff *skb)
+struct ip_conntrack_helper *
+ip_conntrack_helper_find_get( const struct ip_conntrack_tuple *tuple)
+{
+       struct ip_conntrack_helper *helper;
+
+       /* need ip_conntrack_lock to assure that helper exists until
+        * try_module_get() is called */
+       read_lock_bh(&ip_conntrack_lock);
+
+       helper = __ip_conntrack_helper_find(tuple);
+       if (helper) {
+               /* need to increase module usage count to assure helper will
+                * not go away while the caller is e.g. busy putting a
+                * conntrack in the hash that uses the helper */
+               if (!try_module_get(helper->me))
+                       helper = NULL;
+       }
+
+       read_unlock_bh(&ip_conntrack_lock);
+
+       return helper;
+}
+
+void ip_conntrack_helper_put(struct ip_conntrack_helper *helper)
+{
+       module_put(helper->me);
+}
+
+struct ip_conntrack_protocol *
+__ip_conntrack_proto_find(u_int8_t protocol)
+{
+       return ip_ct_protos[protocol];
+}
+
+/* this is guaranteed to always return a valid protocol helper, since
+ * it falls back to generic_protocol */
+struct ip_conntrack_protocol *
+ip_conntrack_proto_find_get(u_int8_t protocol)
+{
+       struct ip_conntrack_protocol *p;
+
+       preempt_disable();
+       p = __ip_conntrack_proto_find(protocol);
+       if (p) {
+               if (!try_module_get(p->me))
+                       p = &ip_conntrack_generic_protocol;
+       }
+       preempt_enable();
+       
+       return p;
+}
+
+void ip_conntrack_proto_put(struct ip_conntrack_protocol *p)
+{
+       module_put(p->me);
+}
+
+struct ip_conntrack *ip_conntrack_alloc(struct ip_conntrack_tuple *orig,
+                                       struct ip_conntrack_tuple *repl)
  {
         struct ip_conntrack *conntrack;
-       struct ip_conntrack_tuple repl_tuple;
-       size_t hash;
-       struct ip_conntrack_expect *exp;
  
         if (!ip_conntrack_hash_rnd_initted) {
                 get_random_bytes(&ip_conntrack_hash_rnd, 4);
                 ip_conntrack_hash_rnd_initted = 1;
         }
  
-       hash = hash_conntrack(tuple);
-
         if (ip_conntrack_max
             && atomic_read(&ip_conntrack_count) >= ip_conntrack_max) {
+               unsigned int hash = hash_conntrack(orig);
                 /* Try dropping from this hash chain. */
                 if (!early_drop(&ip_conntrack_hash[hash])) {
                         if (net_ratelimit())
@@ -476,11 +652,6 @@ init_conntrack(const struct ip_conntrack_tuple *tuple,
                 }
         }
  
-       if (!ip_ct_invert_tuple(&repl_tuple, tuple, protocol)) {
-               DEBUGP("Can't invert tuple.\n");
-               return NULL;
-       }
-
         conntrack = kmem_cache_alloc(ip_conntrack_cachep, GFP_ATOMIC);
         if (!conntrack) {
                 DEBUGP("Can't allocate conntrack.\n");
@@ -490,17 +661,50 @@ init_conntrack(const struct ip_conntrack_tuple *tuple,
         memset(conntrack, 0, sizeof(*conntrack));
         atomic_set(&conntrack->ct_general.use, 1);
         conntrack->ct_general.destroy = destroy_conntrack;
-       conntrack->tuplehash[IP_CT_DIR_ORIGINAL].tuple = *tuple;
-       conntrack->tuplehash[IP_CT_DIR_REPLY].tuple = repl_tuple;
-       if (!protocol->new(conntrack, skb)) {
-               kmem_cache_free(ip_conntrack_cachep, conntrack);
-               return NULL;
-       }
+       conntrack->tuplehash[IP_CT_DIR_ORIGINAL].tuple = *orig;
+       conntrack->tuplehash[IP_CT_DIR_REPLY].tuple = *repl;
         /* Don't set timer yet: wait for confirmation */
         init_timer(&conntrack->timeout);
         conntrack->timeout.data = (unsigned long)conntrack;
         conntrack->timeout.function = death_by_timeout;
  
+       atomic_inc(&ip_conntrack_count);
+
+       return conntrack;
+}
+
+void
+ip_conntrack_free(struct ip_conntrack *conntrack)
+{
+       atomic_dec(&ip_conntrack_count);
+       kmem_cache_free(ip_conntrack_cachep, conntrack);
+}
+
+/* Allocate a new conntrack: we return -ENOMEM if classification
+ * failed due to stress.   Otherwise it really is unclassifiable */
+static struct ip_conntrack_tuple_hash *
+init_conntrack(struct ip_conntrack_tuple *tuple,
+              struct ip_conntrack_protocol *protocol,
+              struct sk_buff *skb)
+{
+       struct ip_conntrack *conntrack;
+       struct ip_conntrack_tuple repl_tuple;
+       struct ip_conntrack_expect *exp;
+
+       if (!ip_ct_invert_tuple(&repl_tuple, tuple, protocol)) {
+               DEBUGP("Can't invert tuple.\n");
+               return NULL;
+       }
+
+       conntrack = ip_conntrack_alloc(tuple, &repl_tuple);
+       if (conntrack == NULL || IS_ERR(conntrack))
+               return (struct ip_conntrack_tuple_hash *)conntrack;
+
+       if (!protocol->new(conntrack, skb)) {
+               ip_conntrack_free(conntrack);
+               return NULL;
+       }
+
         write_lock_bh(&ip_conntrack_lock);
         exp = find_expectation(tuple);
  
@@ -512,11 +716,16 @@ init_conntrack(const struct ip_conntrack_tuple *tuple,
                 conntrack->master = exp->master;
  #ifdef CONFIG_IP_NF_CONNTRACK_MARK
                 conntrack->mark = exp->master->mark;
+#endif
+#if defined(CONFIG_IP_NF_TARGET_MASQUERADE) || \
+    defined(CONFIG_IP_NF_TARGET_MASQUERADE_MODULE)
+               /* this is ugly, but there is no other place where to put it */
+               conntrack->nat.masq_index = exp->master->nat.masq_index;
  #endif
                 nf_conntrack_get(&conntrack->master->ct_general);
                 CONNTRACK_STAT_INC(expect_new);
         } else {
-               conntrack->helper = ip_ct_find_helper(&repl_tuple);
+               conntrack->helper = __ip_conntrack_helper_find(&repl_tuple);
  
                 CONNTRACK_STAT_INC(new);
         }
@@ -524,7 +733,6 @@ init_conntrack(const struct ip_conntrack_tuple *tuple,
         /* Overload tuple linked list to put us in unconfirmed list. */
         list_add(&conntrack->tuplehash[IP_CT_DIR_ORIGINAL].list, &unconfirmed);
  
-       atomic_inc(&ip_conntrack_count);
         write_unlock_bh(&ip_conntrack_lock);
  
         if (exp) {
@@ -602,7 +810,7 @@ unsigned int ip_conntrack_in(unsigned int hooknum,
         struct ip_conntrack *ct;
         enum ip_conntrack_info ctinfo;
         struct ip_conntrack_protocol *proto;
-       int set_reply;
+       int set_reply = 0;
         int ret;
  
         /* Previously seen (loopback or untracked)?  Ignore. */
@@ -620,9 +828,6 @@ unsigned int ip_conntrack_in(unsigned int hooknum,
                 return NF_DROP;
         }
  
-       /* FIXME: Do this right please. --RR */
-       (*pskb)->nfcache |= NFC_UNKNOWN;
-
  /* Doesn't cover locally-generated broadcast, so not worth it. */
  #if 0
         /* Ignore broadcast: no `connection'. */
@@ -638,7 +843,7 @@ unsigned int ip_conntrack_in(unsigned int hooknum,
         }
  #endif
  
-       proto = ip_ct_find_proto((*pskb)->nh.iph->protocol);
+       proto = __ip_conntrack_proto_find((*pskb)->nh.iph->protocol);
  
         /* It may be an special packet, error, unclean...
          * inverse of the return code tells to the netfilter
@@ -674,8 +879,8 @@ unsigned int ip_conntrack_in(unsigned int hooknum,
                 return -ret;
         }
  
-       if (set_reply)
-               set_bit(IPS_SEEN_REPLY_BIT, &ct->status);
+       if (set_reply && !test_and_set_bit(IPS_SEEN_REPLY_BIT, &ct->status))
+               ip_conntrack_event_cache(IPCT_STATUS, *pskb);
  
         return ret;
  }
@@ -684,7 +889,7 @@ int invert_tuplepr(struct ip_conntrack_tuple *inverse,
                    const struct ip_conntrack_tuple *orig)
  {
         return ip_ct_invert_tuple(inverse, orig, 
-                                 ip_ct_find_proto(orig->dst.protonum));
+                                 __ip_conntrack_proto_find(orig->dst.protonum));
  }
  
  /* Would two expected things clash? */
@@ -764,6 +969,8 @@ static void ip_conntrack_expect_insert(struct ip_conntrack_expect *exp)
         exp->timeout.expires = jiffies + exp->master->helper->timeout * HZ;
         add_timer(&exp->timeout);
  
+       exp->id = ++ip_conntrack_expect_next_id;
+       atomic_inc(&exp->use);
         CONNTRACK_STAT_INC(expect_create);
  }
  
@@ -822,6 +1029,7 @@ int ip_conntrack_expect_related(struct ip_conntrack_expect *expect)
                 evict_oldest_expect(expect->master);
  
         ip_conntrack_expect_insert(expect);
+       ip_conntrack_expect_event(IPEXP_NEW, expect);
         ret = 0;
  out:
         write_unlock_bh(&ip_conntrack_lock);
@@ -842,7 +1050,7 @@ void ip_conntrack_alter_reply(struct ip_conntrack *conntrack,
  
         conntrack->tuplehash[IP_CT_DIR_REPLY].tuple = *newreply;
         if (!conntrack->master && conntrack->expecting == 0)
-               conntrack->helper = ip_ct_find_helper(newreply);
+               conntrack->helper = __ip_conntrack_helper_find(newreply);
         write_unlock_bh(&ip_conntrack_lock);
  }
  
@@ -856,11 +1064,26 @@ int ip_conntrack_helper_register(struct ip_conntrack_helper *me)
         return 0;
  }
  
+struct ip_conntrack_helper *
+__ip_conntrack_helper_find_byname(const char *name)
+{
+       struct ip_conntrack_helper *h;
+
+       list_for_each_entry(h, &helpers, list) {
+               if (!strcmp(h->name, name))
+                       return h;
+       }
+
+       return NULL;
+}
+
  static inline int unhelp(struct ip_conntrack_tuple_hash *i,
                          const struct ip_conntrack_helper *me)
  {
-       if (tuplehash_to_ctrack(i)->helper == me)
+       if (tuplehash_to_ctrack(i)->helper == me) {
+               ip_conntrack_event(IPCT_HELPER, tuplehash_to_ctrack(i));
                 tuplehash_to_ctrack(i)->helper = NULL;
+       }
         return 0;
  }
  
@@ -922,12 +1145,46 @@ void ip_ct_refresh_acct(struct ip_conntrack *ct,
                 if (del_timer(&ct->timeout)) {
                         ct->timeout.expires = jiffies + extra_jiffies;
                         add_timer(&ct->timeout);
+                       ip_conntrack_event_cache(IPCT_REFRESH, skb);
                 }
                 ct_add_counters(ct, ctinfo, skb);
                 write_unlock_bh(&ip_conntrack_lock);
         }
  }
  
+#if defined(CONFIG_IP_NF_CONNTRACK_NETLINK) || \
+    defined(CONFIG_IP_NF_CONNTRACK_NETLINK_MODULE)
+/* Generic function for tcp/udp/sctp/dccp and alike. This needs to be
+ * in ip_conntrack_core, since we don't want the protocols to autoload
+ * or depend on ctnetlink */
+int ip_ct_port_tuple_to_nfattr(struct sk_buff *skb,
+                              const struct ip_conntrack_tuple *tuple)
+{
+       NFA_PUT(skb, CTA_PROTO_SRC_PORT, sizeof(u_int16_t),
+               &tuple->src.u.tcp.port);
+       NFA_PUT(skb, CTA_PROTO_DST_PORT, sizeof(u_int16_t),
+               &tuple->dst.u.tcp.port);
+       return 0;
+
+nfattr_failure:
+       return -1;
+}
+
+int ip_ct_port_nfattr_to_tuple(struct nfattr *tb[],
+                              struct ip_conntrack_tuple *t)
+{
+       if (!tb[CTA_PROTO_SRC_PORT-1] || !tb[CTA_PROTO_DST_PORT-1])
+               return -EINVAL;
+
+       t->src.u.tcp.port =
+               *(u_int16_t *)NFA_DATA(tb[CTA_PROTO_SRC_PORT-1]);
+       t->dst.u.tcp.port =
+               *(u_int16_t *)NFA_DATA(tb[CTA_PROTO_DST_PORT-1]);
+
+       return 0;
+}
+#endif
+
  /* Returns new sk_buff, or NULL */
  struct sk_buff *
  ip_ct_gather_frags(struct sk_buff *skb, u_int32_t user)
@@ -938,10 +1195,8 @@ ip_ct_gather_frags(struct sk_buff *skb, u_int32_t user)
         skb = ip_defrag(skb, user);
         local_bh_enable();
  
-       if (skb) {
+       if (skb)
                 ip_send_check(skb->nh.iph);
-               skb->nfcache |= NFC_ALTERED;
-       }
         return skb;
  }
  
@@ -1091,16 +1346,14 @@ static void free_conntrack_hash(void)
                                      * ip_conntrack_htable_size));
  }
  
-/* Mishearing the voices in his head, our hero wonders how he's
-   supposed to kill the mall. */
-void ip_conntrack_cleanup(void)
+void ip_conntrack_flush()
  {
-       ip_ct_attach = NULL;
         /* This makes sure all current packets have passed through
             netfilter framework.  Roll on, two-stage module
             delete... */
         synchronize_net();
- 
+
+       ip_ct_event_cache_flush();
   i_see_dead_people:
         ip_ct_iterate_cleanup(kill_all, NULL);
         if (atomic_read(&ip_conntrack_count) != 0) {
@@ -1110,7 +1363,14 @@ void ip_conntrack_cleanup(void)
         /* wait until all references to ip_conntrack_untracked are dropped */
         while (atomic_read(&ip_conntrack_untracked.ct_general.use) > 1)
                 schedule();
+}
  
+/* Mishearing the voices in his head, our hero wonders how he's
+   supposed to kill the mall. */
+void ip_conntrack_cleanup(void)
+{
+       ip_ct_attach = NULL;
+       ip_conntrack_flush();
         kmem_cache_destroy(ip_conntrack_cachep);
         kmem_cache_destroy(ip_conntrack_expect_cachep);
         free_conntrack_hash();
diff --git a/net/ipv4/netfilter/ip_conntrack_ftp.c b/net/ipv4/netfilter/ip_conntrack_ftp.c

index 7a3b773be3f93cf4e7a2d1d706fec6d63f3f252c..3a2627db1729b81ed9002e086a107e8bd0d50595 100644 (file)
--- a/net/ipv4/netfilter/ip_conntrack_ftp.c
+++ b/net/ipv4/netfilter/ip_conntrack_ftp.c
@@ -25,8 +25,7 @@ MODULE_AUTHOR("Rusty Russell <rusty@rustcorp.com.au>");
  MODULE_DESCRIPTION("ftp connection tracking helper");
  
  /* This is slow, but it's simple. --RR */
-static char ftp_buffer[65536];
-
+static char *ftp_buffer;
  static DEFINE_SPINLOCK(ip_ftp_lock);
  
  #define MAX_PORTS 8
@@ -262,7 +261,8 @@ static int find_nl_seq(u32 seq, const struct ip_ct_ftp_master *info, int dir)
  }
  
  /* We don't update if it's older than what we have. */
-static void update_nl_seq(u32 nl_seq, struct ip_ct_ftp_master *info, int dir)
+static void update_nl_seq(u32 nl_seq, struct ip_ct_ftp_master *info, int dir,
+                         struct sk_buff *skb)
  {
         unsigned int i, oldest = NUM_SEQ_TO_REMEMBER;
  
@@ -276,10 +276,13 @@ static void update_nl_seq(u32 nl_seq, struct ip_ct_ftp_master *info, int dir)
                         oldest = i;
         }
  
-       if (info->seq_aft_nl_num[dir] < NUM_SEQ_TO_REMEMBER)
+       if (info->seq_aft_nl_num[dir] < NUM_SEQ_TO_REMEMBER) {
                 info->seq_aft_nl[dir][info->seq_aft_nl_num[dir]++] = nl_seq;
-       else if (oldest != NUM_SEQ_TO_REMEMBER)
+               ip_conntrack_event_cache(IPCT_HELPINFO_VOLATILE, skb);
+       } else if (oldest != NUM_SEQ_TO_REMEMBER) {
                 info->seq_aft_nl[dir][oldest] = nl_seq;
+               ip_conntrack_event_cache(IPCT_HELPINFO_VOLATILE, skb);
+       }
  }
  
  static int help(struct sk_buff **pskb,
@@ -439,7 +442,7 @@ out_update_nl:
         /* Now if this ends in \n, update ftp info.  Seq may have been
          * adjusted by NAT code. */
         if (ends_in_nl)
-               update_nl_seq(seq, ct_ftp_info,dir);
+               update_nl_seq(seq, ct_ftp_info,dir, *pskb);
   out:
         spin_unlock_bh(&ip_ftp_lock);
         return ret;
@@ -457,6 +460,8 @@ static void fini(void)
                                 ports[i]);
                 ip_conntrack_helper_unregister(&ftp[i]);
         }
+
+       kfree(ftp_buffer);
  }
  
  static int __init init(void)
@@ -464,6 +469,10 @@ static int __init init(void)
         int i, ret;
         char *tmpname;
  
+       ftp_buffer = kmalloc(65536, GFP_KERNEL);
+       if (!ftp_buffer)
+               return -ENOMEM;
+
         if (ports_c == 0)
                 ports[ports_c++] = FTP_PORT;
  
diff --git a/net/ipv4/netfilter/ip_conntrack_irc.c b/net/ipv4/netfilter/ip_conntrack_irc.c

index 4a28f297d502a38d4c10b780306b5bfca127f97f..25438eec21a11008c48d607f1c375f34c8ed81f9 100644 (file)
--- a/net/ipv4/netfilter/ip_conntrack_irc.c
+++ b/net/ipv4/netfilter/ip_conntrack_irc.c
@@ -39,7 +39,7 @@ static int ports_c;
  static int max_dcc_channels = 8;
  static unsigned int dcc_timeout = 300;
  /* This is slow, but it's simple. --RR */
-static char irc_buffer[65536];
+static char *irc_buffer;
  static DEFINE_SPINLOCK(irc_buffer_lock);
  
  unsigned int (*ip_nat_irc_hook)(struct sk_buff **pskb,
@@ -257,6 +257,10 @@ static int __init init(void)
                 printk("ip_conntrack_irc: dcc_timeout must be a positive integer\n");
                 return -EBUSY;
         }
+
+       irc_buffer = kmalloc(65536, GFP_KERNEL);
+       if (!irc_buffer)
+               return -ENOMEM;
         
         /* If no port given, default to standard irc port */
         if (ports_c == 0)
@@ -304,6 +308,7 @@ static void fini(void)
                        ports[i]);
                 ip_conntrack_helper_unregister(&irc_helpers[i]);
         }
+       kfree(irc_buffer);
  }
  
  module_init(init);
diff --git a/net/ipv4/netfilter/ip_conntrack_netlink.c b/net/ipv4/netfilter/ip_conntrack_netlink.c

new file mode 100644 (file)

index 0000000..a4e9278
--- /dev/null
+++ b/net/ipv4/netfilter/ip_conntrack_netlink.c
@@ -0,0 +1,1579 @@
+/* Connection tracking via netlink socket. Allows for user space
+ * protocol helpers and general trouble making from userspace.
+ *
+ * (C) 2001 by Jay Schulist <jschlst@samba.org>
+ * (C) 2002-2005 by Harald Welte <laforge@gnumonks.org>
+ * (C) 2003 by Patrick Mchardy <kaber@trash.net>
+ * (C) 2005 by Pablo Neira Ayuso <pablo@eurodev.net>
+ *
+ * I've reworked this stuff to use attributes instead of conntrack 
+ * structures. 5.44 am. I need more tea. --pablo 05/07/11.
+ *
+ * Initial connection tracking via netlink development funded and 
+ * generally made possible by Network Robots, Inc. (www.networkrobots.com)
+ *
+ * Further development of this code funded by Astaro AG (http://www.astaro.com)
+ *
+ * This software may be used and distributed according to the terms
+ * of the GNU General Public License, incorporated herein by reference.
+ */
+
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/types.h>
+#include <linux/timer.h>
+#include <linux/skbuff.h>
+#include <linux/errno.h>
+#include <linux/netlink.h>
+#include <linux/spinlock.h>
+#include <linux/notifier.h>
+#include <linux/rtnetlink.h>
+
+#include <linux/netfilter.h>
+#include <linux/netfilter_ipv4.h>
+#include <linux/netfilter_ipv4/ip_tables.h>
+#include <linux/netfilter_ipv4/ip_conntrack.h>
+#include <linux/netfilter_ipv4/ip_conntrack_core.h>
+#include <linux/netfilter_ipv4/ip_conntrack_helper.h>
+#include <linux/netfilter_ipv4/ip_conntrack_protocol.h>
+#include <linux/netfilter_ipv4/ip_nat_protocol.h>
+
+#include <linux/netfilter/nfnetlink.h>
+#include <linux/netfilter/nfnetlink_conntrack.h>
+
+MODULE_LICENSE("GPL");
+
+static char __initdata version[] = "0.90";
+
+#if 0
+#define DEBUGP printk
+#else
+#define DEBUGP(format, args...)
+#endif
+
+
+static inline int
+ctnetlink_dump_tuples_proto(struct sk_buff *skb, 
+                           const struct ip_conntrack_tuple *tuple)
+{
+       struct ip_conntrack_protocol *proto;
+
+       NFA_PUT(skb, CTA_PROTO_NUM, sizeof(u_int8_t), &tuple->dst.protonum);
+
+       proto = ip_conntrack_proto_find_get(tuple->dst.protonum);
+       if (proto && proto->tuple_to_nfattr)
+               return proto->tuple_to_nfattr(skb, tuple);
+
+       return 0;
+
+nfattr_failure:
+       return -1;
+}
+
+static inline int
+ctnetlink_dump_tuples(struct sk_buff *skb, 
+                     const struct ip_conntrack_tuple *tuple)
+{
+       struct nfattr *nest_parms;
+       
+       nest_parms = NFA_NEST(skb, CTA_TUPLE_IP);
+       NFA_PUT(skb, CTA_IP_V4_SRC, sizeof(u_int32_t), &tuple->src.ip);
+       NFA_PUT(skb, CTA_IP_V4_DST, sizeof(u_int32_t), &tuple->dst.ip);
+       NFA_NEST_END(skb, nest_parms);
+
+       nest_parms = NFA_NEST(skb, CTA_TUPLE_PROTO);
+       ctnetlink_dump_tuples_proto(skb, tuple);
+       NFA_NEST_END(skb, nest_parms);
+
+       return 0;
+
+nfattr_failure:
+       return -1;
+}
+
+static inline int
+ctnetlink_dump_status(struct sk_buff *skb, const struct ip_conntrack *ct)
+{
+       u_int32_t status = htonl((u_int32_t) ct->status);
+       NFA_PUT(skb, CTA_STATUS, sizeof(status), &status);
+       return 0;
+
+nfattr_failure:
+       return -1;
+}
+
+static inline int
+ctnetlink_dump_timeout(struct sk_buff *skb, const struct ip_conntrack *ct)
+{
+       long timeout_l = ct->timeout.expires - jiffies;
+       u_int32_t timeout;
+
+       if (timeout_l < 0)
+               timeout = 0;
+       else
+               timeout = htonl(timeout_l / HZ);
+       
+       NFA_PUT(skb, CTA_TIMEOUT, sizeof(timeout), &timeout);
+       return 0;
+
+nfattr_failure:
+       return -1;
+}
+
+static inline int
+ctnetlink_dump_protoinfo(struct sk_buff *skb, const struct ip_conntrack *ct)
+{
+       struct ip_conntrack_protocol *proto = ip_conntrack_proto_find_get(ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.protonum);
+
+       struct nfattr *nest_proto;
+       int ret;
+       
+       if (!proto || !proto->to_nfattr)
+               return 0;
+       
+       nest_proto = NFA_NEST(skb, CTA_PROTOINFO);
+
+       ret = proto->to_nfattr(skb, nest_proto, ct);
+
+       ip_conntrack_proto_put(proto);
+
+       NFA_NEST_END(skb, nest_proto);
+
+       return ret;
+
+nfattr_failure:
+       return -1;
+}
+
+static inline int
+ctnetlink_dump_helpinfo(struct sk_buff *skb, const struct ip_conntrack *ct)
+{
+       struct nfattr *nest_helper;
+
+       if (!ct->helper)
+               return 0;
+               
+       nest_helper = NFA_NEST(skb, CTA_HELP);
+       NFA_PUT(skb, CTA_HELP_NAME, CTA_HELP_MAXNAMESIZE, &ct->helper->name);
+
+       if (ct->helper->to_nfattr)
+               ct->helper->to_nfattr(skb, ct);
+
+       NFA_NEST_END(skb, nest_helper);
+
+       return 0;
+
+nfattr_failure:
+       return -1;
+}
+
+#ifdef CONFIG_IP_NF_CT_ACCT
+static inline int
+ctnetlink_dump_counters(struct sk_buff *skb, const struct ip_conntrack *ct,
+                       enum ip_conntrack_dir dir)
+{
+       enum ctattr_type type = dir ? CTA_COUNTERS_REPLY: CTA_COUNTERS_ORIG;
+       struct nfattr *nest_count = NFA_NEST(skb, type);
+       u_int64_t tmp;
+
+       tmp = cpu_to_be64(ct->counters[dir].packets);
+       NFA_PUT(skb, CTA_COUNTERS_PACKETS, sizeof(u_int64_t), &tmp);
+
+       tmp = cpu_to_be64(ct->counters[dir].bytes);
+       NFA_PUT(skb, CTA_COUNTERS_BYTES, sizeof(u_int64_t), &tmp);
+
+       NFA_NEST_END(skb, nest_count);
+
+       return 0;
+
+nfattr_failure:
+       return -1;
+}
+#else
+#define ctnetlink_dump_counters(a, b, c) (0)
+#endif
+
+#ifdef CONFIG_IP_NF_CONNTRACK_MARK
+static inline int
+ctnetlink_dump_mark(struct sk_buff *skb, const struct ip_conntrack *ct)
+{
+       u_int32_t mark = htonl(ct->mark);
+
+       NFA_PUT(skb, CTA_MARK, sizeof(u_int32_t), &mark);
+       return 0;
+
+nfattr_failure:
+       return -1;
+}
+#else
+#define ctnetlink_dump_mark(a, b) (0)
+#endif
+
+static inline int
+ctnetlink_dump_id(struct sk_buff *skb, const struct ip_conntrack *ct)
+{
+       u_int32_t id = htonl(ct->id);
+       NFA_PUT(skb, CTA_ID, sizeof(u_int32_t), &id);
+       return 0;
+
+nfattr_failure:
+       return -1;
+}
+
+static inline int
+ctnetlink_dump_use(struct sk_buff *skb, const struct ip_conntrack *ct)
+{
+       unsigned int use = htonl(atomic_read(&ct->ct_general.use));
+       
+       NFA_PUT(skb, CTA_USE, sizeof(u_int32_t), &use);
+       return 0;
+
+nfattr_failure:
+       return -1;
+}
+
+#define tuple(ct, dir) (&(ct)->tuplehash[dir].tuple)
+
+static int
+ctnetlink_fill_info(struct sk_buff *skb, u32 pid, u32 seq,
+                   int event, int nowait, 
+                   const struct ip_conntrack *ct)
+{
+       struct nlmsghdr *nlh;
+       struct nfgenmsg *nfmsg;
+       struct nfattr *nest_parms;
+       unsigned char *b;
+
+       b = skb->tail;
+
+       event |= NFNL_SUBSYS_CTNETLINK << 8;
+       nlh    = NLMSG_PUT(skb, pid, seq, event, sizeof(struct nfgenmsg));
+       nfmsg  = NLMSG_DATA(nlh);
+
+       nlh->nlmsg_flags    = (nowait && pid) ? NLM_F_MULTI : 0;
+       nfmsg->nfgen_family = AF_INET;
+       nfmsg->version      = NFNETLINK_V0;
+       nfmsg->res_id       = 0;
+
+       nest_parms = NFA_NEST(skb, CTA_TUPLE_ORIG);
+       if (ctnetlink_dump_tuples(skb, tuple(ct, IP_CT_DIR_ORIGINAL)) < 0)
+               goto nfattr_failure;
+       NFA_NEST_END(skb, nest_parms);
+       
+       nest_parms = NFA_NEST(skb, CTA_TUPLE_REPLY);
+       if (ctnetlink_dump_tuples(skb, tuple(ct, IP_CT_DIR_REPLY)) < 0)
+               goto nfattr_failure;
+       NFA_NEST_END(skb, nest_parms);
+
+       if (ctnetlink_dump_status(skb, ct) < 0 ||
+           ctnetlink_dump_timeout(skb, ct) < 0 ||
+           ctnetlink_dump_counters(skb, ct, IP_CT_DIR_ORIGINAL) < 0 ||
+           ctnetlink_dump_counters(skb, ct, IP_CT_DIR_REPLY) < 0 ||
+           ctnetlink_dump_protoinfo(skb, ct) < 0 ||
+           ctnetlink_dump_helpinfo(skb, ct) < 0 ||
+           ctnetlink_dump_mark(skb, ct) < 0 ||
+           ctnetlink_dump_id(skb, ct) < 0 ||
+           ctnetlink_dump_use(skb, ct) < 0)
+               goto nfattr_failure;
+
+       nlh->nlmsg_len = skb->tail - b;
+       return skb->len;
+
+nlmsg_failure:
+nfattr_failure:
+       skb_trim(skb, b - skb->data);
+       return -1;
+}
+
+#ifdef CONFIG_IP_NF_CONNTRACK_EVENTS
+static int ctnetlink_conntrack_event(struct notifier_block *this,
+                                     unsigned long events, void *ptr)
+{
+       struct nlmsghdr *nlh;
+       struct nfgenmsg *nfmsg;
+       struct nfattr *nest_parms;
+       struct ip_conntrack *ct = (struct ip_conntrack *)ptr;
+       struct sk_buff *skb;
+       unsigned int type;
+       unsigned char *b;
+       unsigned int flags = 0, group;
+
+       /* ignore our fake conntrack entry */
+       if (ct == &ip_conntrack_untracked)
+               return NOTIFY_DONE;
+
+       if (events & IPCT_DESTROY) {
+               type = IPCTNL_MSG_CT_DELETE;
+               group = NFNLGRP_CONNTRACK_DESTROY;
+               goto alloc_skb;
+       }
+       if (events & (IPCT_NEW | IPCT_RELATED)) {
+               type = IPCTNL_MSG_CT_NEW;
+               flags = NLM_F_CREATE|NLM_F_EXCL;
+               /* dump everything */
+               events = ~0UL;
+               group = NFNLGRP_CONNTRACK_NEW;
+               goto alloc_skb;
+       }
+       if (events & (IPCT_STATUS |
+                     IPCT_PROTOINFO |
+                     IPCT_HELPER |
+                     IPCT_HELPINFO |
+                     IPCT_NATINFO)) {
+               type = IPCTNL_MSG_CT_NEW;
+               group = NFNLGRP_CONNTRACK_UPDATE;
+               goto alloc_skb;
+       } 
+       
+       return NOTIFY_DONE;
+
+alloc_skb:
+  /* FIXME: Check if there are any listeners before, don't hurt performance */
+       
+       skb = alloc_skb(NLMSG_GOODSIZE, GFP_ATOMIC);
+       if (!skb)
+               return NOTIFY_DONE;
+
+       b = skb->tail;
+
+       type |= NFNL_SUBSYS_CTNETLINK << 8;
+       nlh   = NLMSG_PUT(skb, 0, 0, type, sizeof(struct nfgenmsg));
+       nfmsg = NLMSG_DATA(nlh);
+
+       nlh->nlmsg_flags    = flags;
+       nfmsg->nfgen_family = AF_INET;
+       nfmsg->version  = NFNETLINK_V0;
+       nfmsg->res_id   = 0;
+
+       nest_parms = NFA_NEST(skb, CTA_TUPLE_ORIG);
+       if (ctnetlink_dump_tuples(skb, tuple(ct, IP_CT_DIR_ORIGINAL)) < 0)
+               goto nfattr_failure;
+       NFA_NEST_END(skb, nest_parms);
+       
+       nest_parms = NFA_NEST(skb, CTA_TUPLE_REPLY);
+       if (ctnetlink_dump_tuples(skb, tuple(ct, IP_CT_DIR_REPLY)) < 0)
+               goto nfattr_failure;
+       NFA_NEST_END(skb, nest_parms);
+       
+       /* NAT stuff is now a status flag */
+       if ((events & IPCT_STATUS || events & IPCT_NATINFO)
+           && ctnetlink_dump_status(skb, ct) < 0)
+               goto nfattr_failure;
+       if (events & IPCT_REFRESH
+           && ctnetlink_dump_timeout(skb, ct) < 0)
+               goto nfattr_failure;
+       if (events & IPCT_PROTOINFO
+           && ctnetlink_dump_protoinfo(skb, ct) < 0)
+               goto nfattr_failure;
+       if (events & IPCT_HELPINFO
+           && ctnetlink_dump_helpinfo(skb, ct) < 0)
+               goto nfattr_failure;
+
+       if (ctnetlink_dump_counters(skb, ct, IP_CT_DIR_ORIGINAL) < 0 ||
+           ctnetlink_dump_counters(skb, ct, IP_CT_DIR_REPLY) < 0)
+               goto nfattr_failure;
+
+       nlh->nlmsg_len = skb->tail - b;
+       nfnetlink_send(skb, 0, group, 0);
+       return NOTIFY_DONE;
+
+nlmsg_failure:
+nfattr_failure:
+       kfree_skb(skb);
+       return NOTIFY_DONE;
+}
+#endif /* CONFIG_IP_NF_CONNTRACK_EVENTS */
+
+static int ctnetlink_done(struct netlink_callback *cb)
+{
+       DEBUGP("entered %s\n", __FUNCTION__);
+       return 0;
+}
+
+static int
+ctnetlink_dump_table(struct sk_buff *skb, struct netlink_callback *cb)
+{
+       struct ip_conntrack *ct = NULL;
+       struct ip_conntrack_tuple_hash *h;
+       struct list_head *i;
+       u_int32_t *id = (u_int32_t *) &cb->args[1];
+
+       DEBUGP("entered %s, last bucket=%lu id=%u\n", __FUNCTION__, 
+                       cb->args[0], *id);
+
+       read_lock_bh(&ip_conntrack_lock);
+       for (; cb->args[0] < ip_conntrack_htable_size; cb->args[0]++, *id = 0) {
+               list_for_each_prev(i, &ip_conntrack_hash[cb->args[0]]) {
+                       h = (struct ip_conntrack_tuple_hash *) i;
+                       if (DIRECTION(h) != IP_CT_DIR_ORIGINAL)
+                               continue;
+                       ct = tuplehash_to_ctrack(h);
+                       if (ct->id <= *id)
+                               continue;
+                       if (ctnetlink_fill_info(skb, NETLINK_CB(cb->skb).pid,
+                                               cb->nlh->nlmsg_seq,
+                                               IPCTNL_MSG_CT_NEW,
+                                               1, ct) < 0)
+                               goto out;
+                       *id = ct->id;
+               }
+       }
+out:   
+       read_unlock_bh(&ip_conntrack_lock);
+
+       DEBUGP("leaving, last bucket=%lu id=%u\n", cb->args[0], *id);
+
+       return skb->len;
+}
+
+#ifdef CONFIG_IP_NF_CT_ACCT
+static int
+ctnetlink_dump_table_w(struct sk_buff *skb, struct netlink_callback *cb)
+{
+       struct ip_conntrack *ct = NULL;
+       struct ip_conntrack_tuple_hash *h;
+       struct list_head *i;
+       u_int32_t *id = (u_int32_t *) &cb->args[1];
+
+       DEBUGP("entered %s, last bucket=%u id=%u\n", __FUNCTION__, 
+                       cb->args[0], *id);
+
+       write_lock_bh(&ip_conntrack_lock);
+       for (; cb->args[0] < ip_conntrack_htable_size; cb->args[0]++, *id = 0) {
+               list_for_each_prev(i, &ip_conntrack_hash[cb->args[0]]) {
+                       h = (struct ip_conntrack_tuple_hash *) i;
+                       if (DIRECTION(h) != IP_CT_DIR_ORIGINAL)
+                               continue;
+                       ct = tuplehash_to_ctrack(h);
+                       if (ct->id <= *id)
+                               continue;
+                       if (ctnetlink_fill_info(skb, NETLINK_CB(cb->skb).pid,
+                                               cb->nlh->nlmsg_seq,
+                                               IPCTNL_MSG_CT_NEW,
+                                               1, ct) < 0)
+                               goto out;
+                       *id = ct->id;
+
+                       memset(&ct->counters, 0, sizeof(ct->counters));
+               }
+       }
+out:   
+       write_unlock_bh(&ip_conntrack_lock);
+
+       DEBUGP("leaving, last bucket=%lu id=%u\n", cb->args[0], *id);
+
+       return skb->len;
+}
+#endif
+
+static const int cta_min_ip[CTA_IP_MAX] = {
+       [CTA_IP_V4_SRC-1]       = sizeof(u_int32_t),
+       [CTA_IP_V4_DST-1]       = sizeof(u_int32_t),
+};
+
+static inline int
+ctnetlink_parse_tuple_ip(struct nfattr *attr, struct ip_conntrack_tuple *tuple)
+{
+       struct nfattr *tb[CTA_IP_MAX];
+
+       DEBUGP("entered %s\n", __FUNCTION__);
+
+       
+       if (nfattr_parse_nested(tb, CTA_IP_MAX, attr) < 0)
+               goto nfattr_failure;
+
+       if (nfattr_bad_size(tb, CTA_IP_MAX, cta_min_ip))
+               return -EINVAL;
+
+       if (!tb[CTA_IP_V4_SRC-1])
+               return -EINVAL;
+       tuple->src.ip = *(u_int32_t *)NFA_DATA(tb[CTA_IP_V4_SRC-1]);
+
+       if (!tb[CTA_IP_V4_DST-1])
+               return -EINVAL;
+       tuple->dst.ip = *(u_int32_t *)NFA_DATA(tb[CTA_IP_V4_DST-1]);
+
+       DEBUGP("leaving\n");
+
+       return 0;
+
+nfattr_failure:
+       return -1;
+}
+
+static const int cta_min_proto[CTA_PROTO_MAX] = {
+       [CTA_PROTO_NUM-1]       = sizeof(u_int16_t),
+       [CTA_PROTO_SRC_PORT-1]  = sizeof(u_int16_t),
+       [CTA_PROTO_DST_PORT-1]  = sizeof(u_int16_t),
+       [CTA_PROTO_ICMP_TYPE-1] = sizeof(u_int8_t),
+       [CTA_PROTO_ICMP_CODE-1] = sizeof(u_int8_t),
+       [CTA_PROTO_ICMP_ID-1]   = sizeof(u_int16_t),
+};
+
+static inline int
+ctnetlink_parse_tuple_proto(struct nfattr *attr, 
+                           struct ip_conntrack_tuple *tuple)
+{
+       struct nfattr *tb[CTA_PROTO_MAX];
+       struct ip_conntrack_protocol *proto;
+       int ret = 0;
+
+       DEBUGP("entered %s\n", __FUNCTION__);
+
+       if (nfattr_parse_nested(tb, CTA_PROTO_MAX, attr) < 0)
+               goto nfattr_failure;
+
+       if (nfattr_bad_size(tb, CTA_PROTO_MAX, cta_min_proto))
+               return -EINVAL;
+
+       if (!tb[CTA_PROTO_NUM-1])
+               return -EINVAL;
+       tuple->dst.protonum = *(u_int16_t *)NFA_DATA(tb[CTA_PROTO_NUM-1]);
+
+       proto = ip_conntrack_proto_find_get(tuple->dst.protonum);
+
+       if (likely(proto && proto->nfattr_to_tuple)) {
+               ret = proto->nfattr_to_tuple(tb, tuple);
+               ip_conntrack_proto_put(proto);
+       }
+       
+       return ret;
+
+nfattr_failure:
+       return -1;
+}
+
+static inline int
+ctnetlink_parse_tuple(struct nfattr *cda[], struct ip_conntrack_tuple *tuple,
+                     enum ctattr_tuple type)
+{
+       struct nfattr *tb[CTA_TUPLE_MAX];
+       int err;
+
+       DEBUGP("entered %s\n", __FUNCTION__);
+
+       memset(tuple, 0, sizeof(*tuple));
+
+       if (nfattr_parse_nested(tb, CTA_TUPLE_MAX, cda[type-1]) < 0)
+               goto nfattr_failure;
+
+       if (!tb[CTA_TUPLE_IP-1])
+               return -EINVAL;
+
+       err = ctnetlink_parse_tuple_ip(tb[CTA_TUPLE_IP-1], tuple);
+       if (err < 0)
+               return err;
+
+       if (!tb[CTA_TUPLE_PROTO-1])
+               return -EINVAL;
+
+       err = ctnetlink_parse_tuple_proto(tb[CTA_TUPLE_PROTO-1], tuple);
+       if (err < 0)
+               return err;
+
+       /* orig and expect tuples get DIR_ORIGINAL */
+       if (type == CTA_TUPLE_REPLY)
+               tuple->dst.dir = IP_CT_DIR_REPLY;
+       else
+               tuple->dst.dir = IP_CT_DIR_ORIGINAL;
+
+       DUMP_TUPLE(tuple);
+
+       DEBUGP("leaving\n");
+
+       return 0;
+
+nfattr_failure:
+       return -1;
+}
+
+#ifdef CONFIG_IP_NF_NAT_NEEDED
+static const int cta_min_protonat[CTA_PROTONAT_MAX] = {
+       [CTA_PROTONAT_PORT_MIN-1]       = sizeof(u_int16_t),
+       [CTA_PROTONAT_PORT_MAX-1]       = sizeof(u_int16_t),
+};
+
+static int ctnetlink_parse_nat_proto(struct nfattr *attr,
+                                    const struct ip_conntrack *ct,
+                                    struct ip_nat_range *range)
+{
+       struct nfattr *tb[CTA_PROTONAT_MAX];
+       struct ip_nat_protocol *npt;
+
+       DEBUGP("entered %s\n", __FUNCTION__);
+
+       if (nfattr_parse_nested(tb, CTA_PROTONAT_MAX, attr) < 0)
+               goto nfattr_failure;
+
+       if (nfattr_bad_size(tb, CTA_PROTONAT_MAX, cta_min_protonat))
+               goto nfattr_failure;
+
+       npt = ip_nat_proto_find_get(ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.protonum);
+       if (!npt)
+               return 0;
+
+       if (!npt->nfattr_to_range) {
+               ip_nat_proto_put(npt);
+               return 0;
+       }
+
+       /* nfattr_to_range returns 1 if it parsed, 0 if not, neg. on error */
+       if (npt->nfattr_to_range(tb, range) > 0)
+               range->flags |= IP_NAT_RANGE_PROTO_SPECIFIED;
+
+       ip_nat_proto_put(npt);
+
+       DEBUGP("leaving\n");
+       return 0;
+
+nfattr_failure:
+       return -1;
+}
+
+static inline int
+ctnetlink_parse_nat(struct nfattr *cda[],
+                   const struct ip_conntrack *ct, struct ip_nat_range *range)
+{
+       struct nfattr *tb[CTA_NAT_MAX];
+       int err;
+
+       DEBUGP("entered %s\n", __FUNCTION__);
+
+       memset(range, 0, sizeof(*range));
+       
+       if (nfattr_parse_nested(tb, CTA_NAT_MAX, cda[CTA_NAT-1]) < 0)
+               goto nfattr_failure;
+
+       if (tb[CTA_NAT_MINIP-1])
+               range->min_ip = *(u_int32_t *)NFA_DATA(tb[CTA_NAT_MINIP-1]);
+
+       if (!tb[CTA_NAT_MAXIP-1])
+               range->max_ip = range->min_ip;
+       else
+               range->max_ip = *(u_int32_t *)NFA_DATA(tb[CTA_NAT_MAXIP-1]);
+
+       if (range->min_ip)
+               range->flags |= IP_NAT_RANGE_MAP_IPS;
+
+       if (!tb[CTA_NAT_PROTO-1])
+               return 0;
+
+       err = ctnetlink_parse_nat_proto(tb[CTA_NAT_PROTO-1], ct, range);
+       if (err < 0)
+               return err;
+
+       DEBUGP("leaving\n");
+       return 0;
+
+nfattr_failure:
+       return -1;
+}
+#endif
+
+static inline int
+ctnetlink_parse_help(struct nfattr *attr, char **helper_name)
+{
+       struct nfattr *tb[CTA_HELP_MAX];
+
+       DEBUGP("entered %s\n", __FUNCTION__);
+
+       if (nfattr_parse_nested(tb, CTA_HELP_MAX, attr) < 0)
+               goto nfattr_failure;
+
+       if (!tb[CTA_HELP_NAME-1])
+               return -EINVAL;
+
+       *helper_name = NFA_DATA(tb[CTA_HELP_NAME-1]);
+
+       return 0;
+
+nfattr_failure:
+       return -1;
+}
+
+static int
+ctnetlink_del_conntrack(struct sock *ctnl, struct sk_buff *skb, 
+                       struct nlmsghdr *nlh, struct nfattr *cda[], int *errp)
+{
+       struct ip_conntrack_tuple_hash *h;
+       struct ip_conntrack_tuple tuple;
+       struct ip_conntrack *ct;
+       int err = 0;
+
+       DEBUGP("entered %s\n", __FUNCTION__);
+
+       if (cda[CTA_TUPLE_ORIG-1])
+               err = ctnetlink_parse_tuple(cda, &tuple, CTA_TUPLE_ORIG);
+       else if (cda[CTA_TUPLE_REPLY-1])
+               err = ctnetlink_parse_tuple(cda, &tuple, CTA_TUPLE_REPLY);
+       else {
+               /* Flush the whole table */
+               ip_conntrack_flush();
+               return 0;
+       }
+
+       if (err < 0)
+               return err;
+
+       h = ip_conntrack_find_get(&tuple, NULL);
+       if (!h) {
+               DEBUGP("tuple not found in conntrack hash\n");
+               return -ENOENT;
+       }
+
+       ct = tuplehash_to_ctrack(h);
+       
+       if (cda[CTA_ID-1]) {
+               u_int32_t id = ntohl(*(u_int32_t *)NFA_DATA(cda[CTA_ID-1]));
+               if (ct->id != id) {
+                       ip_conntrack_put(ct);
+                       return -ENOENT;
+               }
+       }       
+       if (del_timer(&ct->timeout)) {
+               ip_conntrack_put(ct);
+               ct->timeout.function((unsigned long)ct);
+               return 0;
+       }
+       ip_conntrack_put(ct);
+       DEBUGP("leaving\n");
+
+       return 0;
+}
+
+static int
+ctnetlink_get_conntrack(struct sock *ctnl, struct sk_buff *skb, 
+                       struct nlmsghdr *nlh, struct nfattr *cda[], int *errp)
+{
+       struct ip_conntrack_tuple_hash *h;
+       struct ip_conntrack_tuple tuple;
+       struct ip_conntrack *ct;
+       struct sk_buff *skb2 = NULL;
+       int err = 0;
+
+       DEBUGP("entered %s\n", __FUNCTION__);
+
+       if (nlh->nlmsg_flags & NLM_F_DUMP) {
+               struct nfgenmsg *msg = NLMSG_DATA(nlh);
+               u32 rlen;
+
+               if (msg->nfgen_family != AF_INET)
+                       return -EAFNOSUPPORT;
+
+               if (NFNL_MSG_TYPE(nlh->nlmsg_type) ==
+                                       IPCTNL_MSG_CT_GET_CTRZERO) {
+#ifdef CONFIG_IP_NF_CT_ACCT
+                       if ((*errp = netlink_dump_start(ctnl, skb, nlh,
+                                               ctnetlink_dump_table_w,
+                                               ctnetlink_done)) != 0)
+                               return -EINVAL;
+#else
+                       return -ENOTSUPP;
+#endif
+               } else {
+                       if ((*errp = netlink_dump_start(ctnl, skb, nlh,
+                                                       ctnetlink_dump_table,
+                                                       ctnetlink_done)) != 0)
+                       return -EINVAL;
+               }
+
+               rlen = NLMSG_ALIGN(nlh->nlmsg_len);
+               if (rlen > skb->len)
+                       rlen = skb->len;
+               skb_pull(skb, rlen);
+               return 0;
+       }
+
+       if (cda[CTA_TUPLE_ORIG-1])
+               err = ctnetlink_parse_tuple(cda, &tuple, CTA_TUPLE_ORIG);
+       else if (cda[CTA_TUPLE_REPLY-1])
+               err = ctnetlink_parse_tuple(cda, &tuple, CTA_TUPLE_REPLY);
+       else
+               return -EINVAL;
+
+       if (err < 0)
+               return err;
+
+       h = ip_conntrack_find_get(&tuple, NULL);
+       if (!h) {
+               DEBUGP("tuple not found in conntrack hash");
+               return -ENOENT;
+       }
+       DEBUGP("tuple found\n");
+       ct = tuplehash_to_ctrack(h);
+
+       err = -ENOMEM;
+       skb2 = alloc_skb(NLMSG_GOODSIZE, GFP_ATOMIC);
+       if (!skb2) {
+               ip_conntrack_put(ct);
+               return -ENOMEM;
+       }
+       NETLINK_CB(skb2).dst_pid = NETLINK_CB(skb).pid;
+
+       err = ctnetlink_fill_info(skb2, NETLINK_CB(skb).pid, nlh->nlmsg_seq, 
+                                 IPCTNL_MSG_CT_NEW, 1, ct);
+       ip_conntrack_put(ct);
+       if (err <= 0)
+               goto out;
+
+       err = netlink_unicast(ctnl, skb2, NETLINK_CB(skb).pid, MSG_DONTWAIT);
+       if (err < 0)
+               goto out;
+
+       DEBUGP("leaving\n");
+       return 0;
+
+out:
+       if (skb2)
+               kfree_skb(skb2);
+       return -1;
+}
+
+static inline int
+ctnetlink_change_status(struct ip_conntrack *ct, struct nfattr *cda[])
+{
+       unsigned long d, status = *(u_int32_t *)NFA_DATA(cda[CTA_STATUS-1]);
+       d = ct->status ^ status;
+
+       if (d & (IPS_EXPECTED|IPS_CONFIRMED|IPS_DYING))
+               /* unchangeable */
+               return -EINVAL;
+       
+       if (d & IPS_SEEN_REPLY && !(status & IPS_SEEN_REPLY))
+               /* SEEN_REPLY bit can only be set */
+               return -EINVAL;
+
+       
+       if (d & IPS_ASSURED && !(status & IPS_ASSURED))
+               /* ASSURED bit can only be set */
+               return -EINVAL;
+
+       if (cda[CTA_NAT-1]) {
+#ifndef CONFIG_IP_NF_NAT_NEEDED
+               return -EINVAL;
+#else
+               unsigned int hooknum;
+               struct ip_nat_range range;
+
+               if (ctnetlink_parse_nat(cda, ct, &range) < 0)
+                       return -EINVAL;
+
+               DEBUGP("NAT: %u.%u.%u.%u-%u.%u.%u.%u:%u-%u\n", 
+                      NIPQUAD(range.min_ip), NIPQUAD(range.max_ip),
+                      htons(range.min.all), htons(range.max.all));
+               
+               /* This is tricky but it works. ip_nat_setup_info needs the
+                * hook number as parameter, so let's do the correct 
+                * conversion and run away */
+               if (status & IPS_SRC_NAT_DONE)
+                       hooknum = NF_IP_POST_ROUTING; /* IP_NAT_MANIP_SRC */
+               else if (status & IPS_DST_NAT_DONE)
+                       hooknum = NF_IP_PRE_ROUTING;  /* IP_NAT_MANIP_DST */
+               else 
+                       return -EINVAL; /* Missing NAT flags */
+
+               DEBUGP("NAT status: %lu\n", 
+                      status & (IPS_NAT_MASK | IPS_NAT_DONE_MASK));
+               
+               if (ip_nat_initialized(ct, hooknum))
+                       return -EEXIST;
+               ip_nat_setup_info(ct, &range, hooknum);
+
+                DEBUGP("NAT status after setup_info: %lu\n",
+                       ct->status & (IPS_NAT_MASK | IPS_NAT_DONE_MASK));
+#endif
+       }
+
+       /* Be careful here, modifying NAT bits can screw up things,
+        * so don't let users modify them directly if they don't pass
+        * ip_nat_range. */
+       ct->status |= status & ~(IPS_NAT_DONE_MASK | IPS_NAT_MASK);
+       return 0;
+}
+
+
+static inline int
+ctnetlink_change_helper(struct ip_conntrack *ct, struct nfattr *cda[])
+{
+       struct ip_conntrack_helper *helper;
+       char *helpname;
+       int err;
+
+       DEBUGP("entered %s\n", __FUNCTION__);
+
+       /* don't change helper of sibling connections */
+       if (ct->master)
+               return -EINVAL;
+
+       err = ctnetlink_parse_help(cda[CTA_HELP-1], &helpname);
+       if (err < 0)
+               return err;
+
+       helper = __ip_conntrack_helper_find_byname(helpname);
+       if (!helper) {
+               if (!strcmp(helpname, ""))
+                       helper = NULL;
+               else
+                       return -EINVAL;
+       }
+
+       if (ct->helper) {
+               if (!helper) {
+                       /* we had a helper before ... */
+                       ip_ct_remove_expectations(ct);
+                       ct->helper = NULL;
+               } else {
+                       /* need to zero data of old helper */
+                       memset(&ct->help, 0, sizeof(ct->help));
+               }
+       }
+       
+       ct->helper = helper;
+
+       return 0;
+}
+
+static inline int
+ctnetlink_change_timeout(struct ip_conntrack *ct, struct nfattr *cda[])
+{
+       u_int32_t timeout = ntohl(*(u_int32_t *)NFA_DATA(cda[CTA_TIMEOUT-1]));
+       
+       if (!del_timer(&ct->timeout))
+               return -ETIME;
+
+       ct->timeout.expires = jiffies + timeout * HZ;
+       add_timer(&ct->timeout);
+
+       return 0;
+}
+
+static int
+ctnetlink_change_conntrack(struct ip_conntrack *ct, struct nfattr *cda[])
+{
+       int err;
+
+       DEBUGP("entered %s\n", __FUNCTION__);
+
+       if (cda[CTA_HELP-1]) {
+               err = ctnetlink_change_helper(ct, cda);
+               if (err < 0)
+                       return err;
+       }
+
+       if (cda[CTA_TIMEOUT-1]) {
+               err = ctnetlink_change_timeout(ct, cda);
+               if (err < 0)
+                       return err;
+       }
+
+       if (cda[CTA_STATUS-1]) {
+               err = ctnetlink_change_status(ct, cda);
+               if (err < 0)
+                       return err;
+       }
+
+       DEBUGP("all done\n");
+       return 0;
+}
+
+static int
+ctnetlink_create_conntrack(struct nfattr *cda[], 
+                          struct ip_conntrack_tuple *otuple,
+                          struct ip_conntrack_tuple *rtuple)
+{
+       struct ip_conntrack *ct;
+       int err = -EINVAL;
+
+       DEBUGP("entered %s\n", __FUNCTION__);
+
+       ct = ip_conntrack_alloc(otuple, rtuple);
+       if (ct == NULL || IS_ERR(ct))
+               return -ENOMEM; 
+
+       if (!cda[CTA_TIMEOUT-1])
+               goto err;
+       ct->timeout.expires = ntohl(*(u_int32_t *)NFA_DATA(cda[CTA_TIMEOUT-1]));
+
+       ct->timeout.expires = jiffies + ct->timeout.expires * HZ;
+       ct->status |= IPS_CONFIRMED;
+
+       err = ctnetlink_change_status(ct, cda);
+       if (err < 0)
+               goto err;
+
+       ct->helper = ip_conntrack_helper_find_get(rtuple);
+
+       add_timer(&ct->timeout);
+       ip_conntrack_hash_insert(ct);
+
+       if (ct->helper)
+               ip_conntrack_helper_put(ct->helper);
+
+       DEBUGP("conntrack with id %u inserted\n", ct->id);
+       return 0;
+
+err:   
+       ip_conntrack_free(ct);
+       return err;
+}
+
+static int 
+ctnetlink_new_conntrack(struct sock *ctnl, struct sk_buff *skb, 
+                       struct nlmsghdr *nlh, struct nfattr *cda[], int *errp)
+{
+       struct ip_conntrack_tuple otuple, rtuple;
+       struct ip_conntrack_tuple_hash *h = NULL;
+       int err = 0;
+
+       DEBUGP("entered %s\n", __FUNCTION__);
+
+       if (cda[CTA_TUPLE_ORIG-1]) {
+               err = ctnetlink_parse_tuple(cda, &otuple, CTA_TUPLE_ORIG);
+               if (err < 0)
+                       return err;
+       }
+
+       if (cda[CTA_TUPLE_REPLY-1]) {
+               err = ctnetlink_parse_tuple(cda, &rtuple, CTA_TUPLE_REPLY);
+               if (err < 0)
+                       return err;
+       }
+
+       write_lock_bh(&ip_conntrack_lock);
+       if (cda[CTA_TUPLE_ORIG-1])
+               h = __ip_conntrack_find(&otuple, NULL);
+       else if (cda[CTA_TUPLE_REPLY-1])
+               h = __ip_conntrack_find(&rtuple, NULL);
+
+       if (h == NULL) {
+               write_unlock_bh(&ip_conntrack_lock);
+               DEBUGP("no such conntrack, create new\n");
+               err = -ENOENT;
+               if (nlh->nlmsg_flags & NLM_F_CREATE)
+                       err = ctnetlink_create_conntrack(cda, &otuple, &rtuple);
+               return err;
+       }
+       /* implicit 'else' */
+
+       /* we only allow nat config for new conntracks */
+       if (cda[CTA_NAT-1]) {
+               err = -EINVAL;
+               goto out_unlock;
+       }
+
+       /* We manipulate the conntrack inside the global conntrack table lock,
+        * so there's no need to increase the refcount */
+       DEBUGP("conntrack found\n");
+       err = -EEXIST;
+       if (!(nlh->nlmsg_flags & NLM_F_EXCL))
+               err = ctnetlink_change_conntrack(tuplehash_to_ctrack(h), cda);
+
+out_unlock:
+       write_unlock_bh(&ip_conntrack_lock);
+       return err;
+}
+
+/*********************************************************************** 
+ * EXPECT 
+ ***********************************************************************/ 
+
+static inline int
+ctnetlink_exp_dump_tuple(struct sk_buff *skb,
+                        const struct ip_conntrack_tuple *tuple,
+                        enum ctattr_expect type)
+{
+       struct nfattr *nest_parms = NFA_NEST(skb, type);
+       
+       if (ctnetlink_dump_tuples(skb, tuple) < 0)
+               goto nfattr_failure;
+
+       NFA_NEST_END(skb, nest_parms);
+
+       return 0;
+
+nfattr_failure:
+       return -1;
+}                      
+
+static inline int
+ctnetlink_exp_dump_expect(struct sk_buff *skb,
+                          const struct ip_conntrack_expect *exp)
+{
+       struct ip_conntrack *master = exp->master;
+       u_int32_t timeout = htonl((exp->timeout.expires - jiffies) / HZ);
+       u_int32_t id = htonl(exp->id);
+
+       if (ctnetlink_exp_dump_tuple(skb, &exp->tuple, CTA_EXPECT_TUPLE) < 0)
+               goto nfattr_failure;
+       if (ctnetlink_exp_dump_tuple(skb, &exp->mask, CTA_EXPECT_MASK) < 0)
+               goto nfattr_failure;
+       if (ctnetlink_exp_dump_tuple(skb,
+                                &master->tuplehash[IP_CT_DIR_ORIGINAL].tuple,
+                                CTA_EXPECT_MASTER) < 0)
+               goto nfattr_failure;
+       
+       NFA_PUT(skb, CTA_EXPECT_TIMEOUT, sizeof(timeout), &timeout);
+       NFA_PUT(skb, CTA_EXPECT_ID, sizeof(u_int32_t), &id);
+
+       return 0;
+       
+nfattr_failure:
+       return -1;
+}
+
+static int
+ctnetlink_exp_fill_info(struct sk_buff *skb, u32 pid, u32 seq,
+                   int event, 
+                   int nowait, 
+                   const struct ip_conntrack_expect *exp)
+{
+       struct nlmsghdr *nlh;
+       struct nfgenmsg *nfmsg;
+       unsigned char *b;
+
+       b = skb->tail;
+
+       event |= NFNL_SUBSYS_CTNETLINK_EXP << 8;
+       nlh    = NLMSG_PUT(skb, pid, seq, event, sizeof(struct nfgenmsg));
+       nfmsg  = NLMSG_DATA(nlh);
+
+       nlh->nlmsg_flags    = (nowait && pid) ? NLM_F_MULTI : 0;
+       nfmsg->nfgen_family = AF_INET;
+       nfmsg->version      = NFNETLINK_V0;
+       nfmsg->res_id       = 0;
+
+       if (ctnetlink_exp_dump_expect(skb, exp) < 0)
+               goto nfattr_failure;
+
+       nlh->nlmsg_len = skb->tail - b;
+       return skb->len;
+
+nlmsg_failure:
+nfattr_failure:
+       skb_trim(skb, b - skb->data);
+       return -1;
+}
+
+#ifdef CONFIG_IP_NF_CONNTRACK_EVENTS
+static int ctnetlink_expect_event(struct notifier_block *this,
+                                 unsigned long events, void *ptr)
+{
+       struct nlmsghdr *nlh;
+       struct nfgenmsg *nfmsg;
+       struct ip_conntrack_expect *exp = (struct ip_conntrack_expect *)ptr;
+       struct sk_buff *skb;
+       unsigned int type;
+       unsigned char *b;
+       int flags = 0;
+       u16 proto;
+
+       if (events & IPEXP_NEW) {
+               type = IPCTNL_MSG_EXP_NEW;
+               flags = NLM_F_CREATE|NLM_F_EXCL;
+       } else
+               return NOTIFY_DONE;
+
+       skb = alloc_skb(NLMSG_GOODSIZE, GFP_ATOMIC);
+       if (!skb)
+               return NOTIFY_DONE;
+
+       b = skb->tail;
+
+       type |= NFNL_SUBSYS_CTNETLINK << 8;
+       nlh   = NLMSG_PUT(skb, 0, 0, type, sizeof(struct nfgenmsg));
+       nfmsg = NLMSG_DATA(nlh);
+
+       nlh->nlmsg_flags    = flags;
+       nfmsg->nfgen_family = AF_INET;
+       nfmsg->version      = NFNETLINK_V0;
+       nfmsg->res_id       = 0;
+
+       if (ctnetlink_exp_dump_expect(skb, exp) < 0)
+               goto nfattr_failure;
+
+       nlh->nlmsg_len = skb->tail - b;
+       proto = exp->tuple.dst.protonum;
+       nfnetlink_send(skb, 0, NFNLGRP_CONNTRACK_EXP_NEW, 0);
+       return NOTIFY_DONE;
+
+nlmsg_failure:
+nfattr_failure:
+       kfree_skb(skb);
+       return NOTIFY_DONE;
+}
+#endif
+
+static int
+ctnetlink_exp_dump_table(struct sk_buff *skb, struct netlink_callback *cb)
+{
+       struct ip_conntrack_expect *exp = NULL;
+       struct list_head *i;
+       u_int32_t *id = (u_int32_t *) &cb->args[0];
+
+       DEBUGP("entered %s, last id=%llu\n", __FUNCTION__, *id);
+
+       read_lock_bh(&ip_conntrack_lock);
+       list_for_each_prev(i, &ip_conntrack_expect_list) {
+               exp = (struct ip_conntrack_expect *) i;
+               if (exp->id <= *id)
+                       continue;
+               if (ctnetlink_exp_fill_info(skb, NETLINK_CB(cb->skb).pid,
+                                           cb->nlh->nlmsg_seq,
+                                           IPCTNL_MSG_EXP_NEW,
+                                           1, exp) < 0)
+                       goto out;
+               *id = exp->id;
+       }
+out:   
+       read_unlock_bh(&ip_conntrack_lock);
+
+       DEBUGP("leaving, last id=%llu\n", *id);
+
+       return skb->len;
+}
+
+static int
+ctnetlink_get_expect(struct sock *ctnl, struct sk_buff *skb, 
+                    struct nlmsghdr *nlh, struct nfattr *cda[], int *errp)
+{
+       struct ip_conntrack_tuple tuple;
+       struct ip_conntrack_expect *exp;
+       struct sk_buff *skb2;
+       int err = 0;
+
+       DEBUGP("entered %s\n", __FUNCTION__);
+
+       if (nlh->nlmsg_flags & NLM_F_DUMP) {
+               struct nfgenmsg *msg = NLMSG_DATA(nlh);
+               u32 rlen;
+
+               if (msg->nfgen_family != AF_INET)
+                       return -EAFNOSUPPORT;
+
+               if ((*errp = netlink_dump_start(ctnl, skb, nlh,
+                                               ctnetlink_exp_dump_table,
+                                               ctnetlink_done)) != 0)
+                       return -EINVAL;
+               rlen = NLMSG_ALIGN(nlh->nlmsg_len);
+               if (rlen > skb->len)
+                       rlen = skb->len;
+               skb_pull(skb, rlen);
+               return 0;
+       }
+
+       if (cda[CTA_EXPECT_MASTER-1])
+               err = ctnetlink_parse_tuple(cda, &tuple, CTA_EXPECT_MASTER);
+       else
+               return -EINVAL;
+
+       if (err < 0)
+               return err;
+
+       exp = ip_conntrack_expect_find_get(&tuple);
+       if (!exp)
+               return -ENOENT;
+
+       err = -ENOMEM;
+       skb2 = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
+       if (!skb2)
+               goto out;
+       NETLINK_CB(skb2).dst_pid = NETLINK_CB(skb).pid;
+       
+       err = ctnetlink_exp_fill_info(skb2, NETLINK_CB(skb).pid, 
+                                     nlh->nlmsg_seq, IPCTNL_MSG_EXP_NEW,
+                                     1, exp);
+       if (err <= 0)
+               goto out;
+
+       ip_conntrack_expect_put(exp);
+
+       err = netlink_unicast(ctnl, skb2, NETLINK_CB(skb).pid, MSG_DONTWAIT);
+       if (err < 0)
+               goto free;
+
+       return err;
+
+out:
+       ip_conntrack_expect_put(exp);
+free:
+       if (skb2)
+               kfree_skb(skb2);
+       return err;
+}
+
+static int
+ctnetlink_del_expect(struct sock *ctnl, struct sk_buff *skb, 
+                    struct nlmsghdr *nlh, struct nfattr *cda[], int *errp)
+{
+       struct ip_conntrack_expect *exp, *tmp;
+       struct ip_conntrack_tuple tuple;
+       struct ip_conntrack_helper *h;
+       int err;
+
+       if (cda[CTA_EXPECT_TUPLE-1]) {
+               /* delete a single expect by tuple */
+               err = ctnetlink_parse_tuple(cda, &tuple, CTA_EXPECT_TUPLE);
+               if (err < 0)
+                       return err;
+
+               /* bump usage count to 2 */
+               exp = ip_conntrack_expect_find_get(&tuple);
+               if (!exp)
+                       return -ENOENT;
+
+               if (cda[CTA_EXPECT_ID-1]) {
+                       u_int32_t id = 
+                               *(u_int32_t *)NFA_DATA(cda[CTA_EXPECT_ID-1]);
+                       if (exp->id != ntohl(id)) {
+                               ip_conntrack_expect_put(exp);
+                               return -ENOENT;
+                       }
+               }
+
+               /* after list removal, usage count == 1 */
+               ip_conntrack_unexpect_related(exp);
+               /* have to put what we 'get' above. 
+                * after this line usage count == 0 */
+               ip_conntrack_expect_put(exp);
+       } else if (cda[CTA_EXPECT_HELP_NAME-1]) {
+               char *name = NFA_DATA(cda[CTA_EXPECT_HELP_NAME-1]);
+
+               /* delete all expectations for this helper */
+               write_lock_bh(&ip_conntrack_lock);
+               h = __ip_conntrack_helper_find_byname(name);
+               if (!h) {
+                       write_unlock_bh(&ip_conntrack_lock);
+                       return -EINVAL;
+               }
+               list_for_each_entry_safe(exp, tmp, &ip_conntrack_expect_list,
+                                        list) {
+                       if (exp->master->helper == h 
+                           && del_timer(&exp->timeout))
+                               __ip_ct_expect_unlink_destroy(exp);
+               }
+               write_unlock(&ip_conntrack_lock);
+       } else {
+               /* This basically means we have to flush everything*/
+               write_lock_bh(&ip_conntrack_lock);
+               list_for_each_entry_safe(exp, tmp, &ip_conntrack_expect_list,
+                                        list) {
+                       if (del_timer(&exp->timeout))
+                               __ip_ct_expect_unlink_destroy(exp);
+               }
+               write_unlock_bh(&ip_conntrack_lock);
+       }
+
+       return 0;
+}
+static int
+ctnetlink_change_expect(struct ip_conntrack_expect *x, struct nfattr *cda[])
+{
+       return -EOPNOTSUPP;
+}
+
+static int
+ctnetlink_create_expect(struct nfattr *cda[])
+{
+       struct ip_conntrack_tuple tuple, mask, master_tuple;
+       struct ip_conntrack_tuple_hash *h = NULL;
+       struct ip_conntrack_expect *exp;
+       struct ip_conntrack *ct;
+       int err = 0;
+
+       DEBUGP("entered %s\n", __FUNCTION__);
+
+       /* caller guarantees that those three CTA_EXPECT_* exist */
+       err = ctnetlink_parse_tuple(cda, &tuple, CTA_EXPECT_TUPLE);
+       if (err < 0)
+               return err;
+       err = ctnetlink_parse_tuple(cda, &mask, CTA_EXPECT_MASK);
+       if (err < 0)
+               return err;
+       err = ctnetlink_parse_tuple(cda, &master_tuple, CTA_EXPECT_MASTER);
+       if (err < 0)
+               return err;
+
+       /* Look for master conntrack of this expectation */
+       h = ip_conntrack_find_get(&master_tuple, NULL);
+       if (!h)
+               return -ENOENT;
+       ct = tuplehash_to_ctrack(h);
+
+       if (!ct->helper) {
+               /* such conntrack hasn't got any helper, abort */
+               err = -EINVAL;
+               goto out;
+       }
+
+       exp = ip_conntrack_expect_alloc(ct);
+       if (!exp) {
+               err = -ENOMEM;
+               goto out;
+       }
+       
+       exp->expectfn = NULL;
+       exp->master = ct;
+       memcpy(&exp->tuple, &tuple, sizeof(struct ip_conntrack_tuple));
+       memcpy(&exp->mask, &mask, sizeof(struct ip_conntrack_tuple));
+
+       err = ip_conntrack_expect_related(exp);
+       ip_conntrack_expect_put(exp);
+
+out:   
+       ip_conntrack_put(tuplehash_to_ctrack(h));
+       return err;
+}
+
+static int
+ctnetlink_new_expect(struct sock *ctnl, struct sk_buff *skb,
+                    struct nlmsghdr *nlh, struct nfattr *cda[], int *errp)
+{
+       struct ip_conntrack_tuple tuple;
+       struct ip_conntrack_expect *exp;
+       int err = 0;
+
+       DEBUGP("entered %s\n", __FUNCTION__);   
+
+       if (!cda[CTA_EXPECT_TUPLE-1]
+           || !cda[CTA_EXPECT_MASK-1]
+           || !cda[CTA_EXPECT_MASTER-1])
+               return -EINVAL;
+
+       err = ctnetlink_parse_tuple(cda, &tuple, CTA_EXPECT_TUPLE);
+       if (err < 0)
+               return err;
+
+       write_lock_bh(&ip_conntrack_lock);
+       exp = __ip_conntrack_expect_find(&tuple);
+
+       if (!exp) {
+               write_unlock_bh(&ip_conntrack_lock);
+               err = -ENOENT;
+               if (nlh->nlmsg_flags & NLM_F_CREATE)
+                       err = ctnetlink_create_expect(cda);
+               return err;
+       }
+
+       err = -EEXIST;
+       if (!(nlh->nlmsg_flags & NLM_F_EXCL))
+               err = ctnetlink_change_expect(exp, cda);
+       write_unlock_bh(&ip_conntrack_lock);
+
+       DEBUGP("leaving\n");
+       
+       return err;
+}
+
+#ifdef CONFIG_IP_NF_CONNTRACK_EVENTS
+static struct notifier_block ctnl_notifier = {
+       .notifier_call  = ctnetlink_conntrack_event,
+};
+
+static struct notifier_block ctnl_notifier_exp = {
+       .notifier_call  = ctnetlink_expect_event,
+};
+#endif
+
+static struct nfnl_callback ctnl_cb[IPCTNL_MSG_MAX] = {
+       [IPCTNL_MSG_CT_NEW]             = { .call = ctnetlink_new_conntrack,
+                                           .attr_count = CTA_MAX,
+                                           .cap_required = CAP_NET_ADMIN },
+       [IPCTNL_MSG_CT_GET]             = { .call = ctnetlink_get_conntrack,
+                                           .attr_count = CTA_MAX,
+                                           .cap_required = CAP_NET_ADMIN },
+       [IPCTNL_MSG_CT_DELETE]          = { .call = ctnetlink_del_conntrack,
+                                           .attr_count = CTA_MAX,
+                                           .cap_required = CAP_NET_ADMIN },
+       [IPCTNL_MSG_CT_GET_CTRZERO]     = { .call = ctnetlink_get_conntrack,
+                                           .attr_count = CTA_MAX,
+                                           .cap_required = CAP_NET_ADMIN },
+};
+
+static struct nfnl_callback ctnl_exp_cb[IPCTNL_MSG_EXP_MAX] = {
+       [IPCTNL_MSG_EXP_GET]            = { .call = ctnetlink_get_expect,
+                                           .attr_count = CTA_EXPECT_MAX,
+                                           .cap_required = CAP_NET_ADMIN },
+       [IPCTNL_MSG_EXP_NEW]            = { .call = ctnetlink_new_expect,
+                                           .attr_count = CTA_EXPECT_MAX,
+                                           .cap_required = CAP_NET_ADMIN },
+       [IPCTNL_MSG_EXP_DELETE]         = { .call = ctnetlink_del_expect,
+                                           .attr_count = CTA_EXPECT_MAX,
+                                           .cap_required = CAP_NET_ADMIN },
+};
+
+static struct nfnetlink_subsystem ctnl_subsys = {
+       .name                           = "conntrack",
+       .subsys_id                      = NFNL_SUBSYS_CTNETLINK,
+       .cb_count                       = IPCTNL_MSG_MAX,
+       .cb                             = ctnl_cb,
+};
+
+static struct nfnetlink_subsystem ctnl_exp_subsys = {
+       .name                           = "conntrack_expect",
+       .subsys_id                      = NFNL_SUBSYS_CTNETLINK_EXP,
+       .cb_count                       = IPCTNL_MSG_EXP_MAX,
+       .cb                             = ctnl_exp_cb,
+};
+
+static int __init ctnetlink_init(void)
+{
+       int ret;
+
+       printk("ctnetlink v%s: registering with nfnetlink.\n", version);
+       ret = nfnetlink_subsys_register(&ctnl_subsys);
+       if (ret < 0) {
+               printk("ctnetlink_init: cannot register with nfnetlink.\n");
+               goto err_out;
+       }
+
+       ret = nfnetlink_subsys_register(&ctnl_exp_subsys);
+       if (ret < 0) {
+               printk("ctnetlink_init: cannot register exp with nfnetlink.\n");
+               goto err_unreg_subsys;
+       }
+
+#ifdef CONFIG_IP_NF_CONNTRACK_EVENTS
+       ret = ip_conntrack_register_notifier(&ctnl_notifier);
+       if (ret < 0) {
+               printk("ctnetlink_init: cannot register notifier.\n");
+               goto err_unreg_exp_subsys;
+       }
+
+       ret = ip_conntrack_expect_register_notifier(&ctnl_notifier_exp);
+       if (ret < 0) {
+               printk("ctnetlink_init: cannot expect register notifier.\n");
+               goto err_unreg_notifier;
+       }
+#endif
+
+       return 0;
+
+#ifdef CONFIG_IP_NF_CONNTRACK_EVENTS
+err_unreg_notifier:
+       ip_conntrack_unregister_notifier(&ctnl_notifier);
+err_unreg_exp_subsys:
+       nfnetlink_subsys_unregister(&ctnl_exp_subsys);
+#endif
+err_unreg_subsys:
+       nfnetlink_subsys_unregister(&ctnl_subsys);
+err_out:
+       return ret;
+}
+
+static void __exit ctnetlink_exit(void)
+{
+       printk("ctnetlink: unregistering from nfnetlink.\n");
+
+#ifdef CONFIG_IP_NF_CONNTRACK_EVENTS
+       ip_conntrack_unregister_notifier(&ctnl_notifier_exp);
+       ip_conntrack_unregister_notifier(&ctnl_notifier);
+#endif
+
+       nfnetlink_subsys_unregister(&ctnl_exp_subsys);
+       nfnetlink_subsys_unregister(&ctnl_subsys);
+       return;
+}
+
+module_init(ctnetlink_init);
+module_exit(ctnetlink_exit);
diff --git a/net/ipv4/netfilter/ip_conntrack_proto_icmp.c b/net/ipv4/netfilter/ip_conntrack_proto_icmp.c

index 602c74db3252de8a35b37d438a222ffdfb197df5..838d1d69b36e0acd868038124c39a81d3a92722b 100644 (file)
--- a/net/ipv4/netfilter/ip_conntrack_proto_icmp.c
+++ b/net/ipv4/netfilter/ip_conntrack_proto_icmp.c
@@ -102,22 +102,24 @@ static int icmp_packet(struct ip_conntrack *ct,
                         ct->timeout.function((unsigned long)ct);
         } else {
                 atomic_inc(&ct->proto.icmp.count);
+               ip_conntrack_event_cache(IPCT_PROTOINFO_VOLATILE, skb);
                 ip_ct_refresh_acct(ct, ctinfo, skb, ip_ct_icmp_timeout);
         }
  
         return NF_ACCEPT;
  }
  
+static u_int8_t valid_new[] = { 
+       [ICMP_ECHO] = 1,
+       [ICMP_TIMESTAMP] = 1,
+       [ICMP_INFO_REQUEST] = 1,
+       [ICMP_ADDRESS] = 1 
+};
+
  /* Called when a new connection for this protocol found. */
  static int icmp_new(struct ip_conntrack *conntrack,
                     const struct sk_buff *skb)
  {
-       static u_int8_t valid_new[]
-               = { [ICMP_ECHO] = 1,
-                   [ICMP_TIMESTAMP] = 1,
-                   [ICMP_INFO_REQUEST] = 1,
-                   [ICMP_ADDRESS] = 1 };
-
         if (conntrack->tuplehash[0].tuple.dst.u.icmp.type >= sizeof(valid_new)
             || !valid_new[conntrack->tuplehash[0].tuple.dst.u.icmp.type]) {
                 /* Can't create a new ICMP `conn' with this. */
@@ -158,11 +160,12 @@ icmp_error_message(struct sk_buff *skb,
                 return NF_ACCEPT;
         }
  
-       innerproto = ip_ct_find_proto(inside->ip.protocol);
+       innerproto = ip_conntrack_proto_find_get(inside->ip.protocol);
         dataoff = skb->nh.iph->ihl*4 + sizeof(inside->icmp) + inside->ip.ihl*4;
         /* Are they talking about one of our connections? */
         if (!ip_ct_get_tuple(&inside->ip, skb, dataoff, &origtuple, innerproto)) {
                 DEBUGP("icmp_error: ! get_tuple p=%u", inside->ip.protocol);
+               ip_conntrack_proto_put(innerproto);
                 return NF_ACCEPT;
         }
  
@@ -170,8 +173,10 @@ icmp_error_message(struct sk_buff *skb,
            been preserved inside the ICMP. */
         if (!ip_ct_invert_tuple(&innertuple, &origtuple, innerproto)) {
                 DEBUGP("icmp_error_track: Can't invert tuple\n");
+               ip_conntrack_proto_put(innerproto);
                 return NF_ACCEPT;
         }
+       ip_conntrack_proto_put(innerproto);
  
         *ctinfo = IP_CT_RELATED;
  
@@ -212,7 +217,7 @@ icmp_error(struct sk_buff *skb, enum ip_conntrack_info *ctinfo,
         icmph = skb_header_pointer(skb, skb->nh.iph->ihl*4, sizeof(_ih), &_ih);
         if (icmph == NULL) {
                 if (LOG_INVALID(IPPROTO_ICMP))
-                       nf_log_packet(PF_INET, 0, skb, NULL, NULL,
+                       nf_log_packet(PF_INET, 0, skb, NULL, NULL, NULL,
                                       "ip_ct_icmp: short packet ");
                 return -NF_ACCEPT;
         }
@@ -226,13 +231,13 @@ icmp_error(struct sk_buff *skb, enum ip_conntrack_info *ctinfo,
                 if (!(u16)csum_fold(skb->csum)) 
                         break;
                 if (LOG_INVALID(IPPROTO_ICMP))
-                       nf_log_packet(PF_INET, 0, skb, NULL, NULL, 
+                       nf_log_packet(PF_INET, 0, skb, NULL, NULL, NULL,
                                       "ip_ct_icmp: bad HW ICMP checksum ");
                 return -NF_ACCEPT;
         case CHECKSUM_NONE:
                 if ((u16)csum_fold(skb_checksum(skb, 0, skb->len, 0))) {
                         if (LOG_INVALID(IPPROTO_ICMP))
-                               nf_log_packet(PF_INET, 0, skb, NULL, NULL, 
+                               nf_log_packet(PF_INET, 0, skb, NULL, NULL, NULL,
                                               "ip_ct_icmp: bad ICMP checksum ");
                         return -NF_ACCEPT;
                 }
@@ -249,7 +254,7 @@ checksum_skipped:
          */
         if (icmph->type > NR_ICMP_TYPES) {
                 if (LOG_INVALID(IPPROTO_ICMP))
-                       nf_log_packet(PF_INET, 0, skb, NULL, NULL,
+                       nf_log_packet(PF_INET, 0, skb, NULL, NULL, NULL,
                                       "ip_ct_icmp: invalid ICMP type ");
                 return -NF_ACCEPT;
         }
@@ -265,6 +270,47 @@ checksum_skipped:
         return icmp_error_message(skb, ctinfo, hooknum);
  }
  
+#if defined(CONFIG_IP_NF_CONNTRACK_NETLINK) || \
+    defined(CONFIG_IP_NF_CONNTRACK_NETLINK_MODULE)
+static int icmp_tuple_to_nfattr(struct sk_buff *skb,
+                               const struct ip_conntrack_tuple *t)
+{
+       NFA_PUT(skb, CTA_PROTO_ICMP_ID, sizeof(u_int16_t),
+               &t->src.u.icmp.id);
+       NFA_PUT(skb, CTA_PROTO_ICMP_TYPE, sizeof(u_int8_t),
+               &t->dst.u.icmp.type);
+       NFA_PUT(skb, CTA_PROTO_ICMP_CODE, sizeof(u_int8_t),
+               &t->dst.u.icmp.code);
+
+       if (t->dst.u.icmp.type >= sizeof(valid_new) 
+           || !valid_new[t->dst.u.icmp.type])
+               return -EINVAL;
+
+       return 0;
+
+nfattr_failure:
+       return -1;
+}
+
+static int icmp_nfattr_to_tuple(struct nfattr *tb[],
+                               struct ip_conntrack_tuple *tuple)
+{
+       if (!tb[CTA_PROTO_ICMP_TYPE-1]
+           || !tb[CTA_PROTO_ICMP_CODE-1]
+           || !tb[CTA_PROTO_ICMP_ID-1])
+               return -1;
+
+       tuple->dst.u.icmp.type = 
+                       *(u_int8_t *)NFA_DATA(tb[CTA_PROTO_ICMP_TYPE-1]);
+       tuple->dst.u.icmp.code =
+                       *(u_int8_t *)NFA_DATA(tb[CTA_PROTO_ICMP_CODE-1]);
+       tuple->src.u.icmp.id =
+                       *(u_int8_t *)NFA_DATA(tb[CTA_PROTO_ICMP_ID-1]);
+
+       return 0;
+}
+#endif
+
  struct ip_conntrack_protocol ip_conntrack_protocol_icmp =
  {
         .proto                  = IPPROTO_ICMP,
@@ -276,4 +322,9 @@ struct ip_conntrack_protocol ip_conntrack_protocol_icmp =
         .packet                 = icmp_packet,
         .new                    = icmp_new,
         .error                  = icmp_error,
+#if defined(CONFIG_IP_NF_CONNTRACK_NETLINK) || \
+    defined(CONFIG_IP_NF_CONNTRACK_NETLINK_MODULE)
+       .tuple_to_nfattr        = icmp_tuple_to_nfattr,
+       .nfattr_to_tuple        = icmp_nfattr_to_tuple,
+#endif
  };
diff --git a/net/ipv4/netfilter/ip_conntrack_proto_sctp.c b/net/ipv4/netfilter/ip_conntrack_proto_sctp.c

index 31d75390bf12b5e648b1b4cd73f701e75764a48d..a875f35e576ddfd117a2ac4224f1a5e9feff3016 100644 (file)
--- a/net/ipv4/netfilter/ip_conntrack_proto_sctp.c
+++ b/net/ipv4/netfilter/ip_conntrack_proto_sctp.c
@@ -404,6 +404,8 @@ static int sctp_packet(struct ip_conntrack *conntrack,
                 }
  
                 conntrack->proto.sctp.state = newconntrack;
+               if (oldsctpstate != newconntrack)
+                       ip_conntrack_event_cache(IPCT_PROTOINFO, skb);
                 write_unlock_bh(&sctp_lock);
         }
  
@@ -503,7 +505,12 @@ static struct ip_conntrack_protocol ip_conntrack_protocol_sctp = {
         .packet          = sctp_packet, 
         .new             = sctp_new, 
         .destroy         = NULL, 
-       .me              = THIS_MODULE 
+       .me              = THIS_MODULE,
+#if defined(CONFIG_IP_NF_CONNTRACK_NETLINK) || \
+    defined(CONFIG_IP_NF_CONNTRACK_NETLINK_MODULE)
+       .tuple_to_nfattr = ip_ct_port_tuple_to_nfattr,
+       .nfattr_to_tuple = ip_ct_port_nfattr_to_tuple,
+#endif
  };
  
  #ifdef CONFIG_SYSCTL
diff --git a/net/ipv4/netfilter/ip_conntrack_proto_tcp.c b/net/ipv4/netfilter/ip_conntrack_proto_tcp.c

index 809dfed766d4274962fcd949c17814a1011ccace..f23ef1f88c46b40701f956b74f49f3c9d7ef9ceb 100644 (file)
--- a/net/ipv4/netfilter/ip_conntrack_proto_tcp.c
+++ b/net/ipv4/netfilter/ip_conntrack_proto_tcp.c
@@ -336,6 +336,23 @@ static int tcp_print_conntrack(struct seq_file *s,
         return seq_printf(s, "%s ", tcp_conntrack_names[state]);
  }
  
+#if defined(CONFIG_IP_NF_CONNTRACK_NETLINK) || \
+    defined(CONFIG_IP_NF_CONNTRACK_NETLINK_MODULE)
+static int tcp_to_nfattr(struct sk_buff *skb, struct nfattr *nfa,
+                        const struct ip_conntrack *ct)
+{
+       read_lock_bh(&tcp_lock);
+       NFA_PUT(skb, CTA_PROTOINFO_TCP_STATE, sizeof(u_int8_t),
+               &ct->proto.tcp.state);
+       read_unlock_bh(&tcp_lock);
+
+       return 0;
+
+nfattr_failure:
+       return -1;
+}
+#endif
+
  static unsigned int get_conntrack_index(const struct tcphdr *tcph)
  {
         if (tcph->rst) return TCP_RST_SET;
@@ -699,7 +716,7 @@ static int tcp_in_window(struct ip_ct_tcp *state,
                 res = 1;
         } else {
                 if (LOG_INVALID(IPPROTO_TCP))
-                       nf_log_packet(PF_INET, 0, skb, NULL, NULL,
+                       nf_log_packet(PF_INET, 0, skb, NULL, NULL, NULL,
                         "ip_ct_tcp: %s ",
                         before(seq, sender->td_maxend + 1) ?
                         after(end, sender->td_end - receiver->td_maxwin - 1) ?
@@ -798,7 +815,7 @@ static int tcp_error(struct sk_buff *skb,
                                 sizeof(_tcph), &_tcph);
         if (th == NULL) {
                 if (LOG_INVALID(IPPROTO_TCP))
-                       nf_log_packet(PF_INET, 0, skb, NULL, NULL, 
+                       nf_log_packet(PF_INET, 0, skb, NULL, NULL, NULL,
                                 "ip_ct_tcp: short packet ");
                 return -NF_ACCEPT;
         }
@@ -806,7 +823,7 @@ static int tcp_error(struct sk_buff *skb,
         /* Not whole TCP header or malformed packet */
         if (th->doff*4 < sizeof(struct tcphdr) || tcplen < th->doff*4) {
                 if (LOG_INVALID(IPPROTO_TCP))
-                       nf_log_packet(PF_INET, 0, skb, NULL, NULL, 
+                       nf_log_packet(PF_INET, 0, skb, NULL, NULL, NULL,
                                 "ip_ct_tcp: truncated/malformed packet ");
                 return -NF_ACCEPT;
         }
@@ -823,7 +840,7 @@ static int tcp_error(struct sk_buff *skb,
                                  skb->ip_summed == CHECKSUM_HW ? skb->csum
                                  : skb_checksum(skb, iph->ihl*4, tcplen, 0))) {
                 if (LOG_INVALID(IPPROTO_TCP))
-                       nf_log_packet(PF_INET, 0, skb, NULL, NULL, 
+                       nf_log_packet(PF_INET, 0, skb, NULL, NULL, NULL,
                                   "ip_ct_tcp: bad TCP checksum ");
                 return -NF_ACCEPT;
         }
@@ -832,7 +849,7 @@ static int tcp_error(struct sk_buff *skb,
         tcpflags = (((u_int8_t *)th)[13] & ~(TH_ECE|TH_CWR));
         if (!tcp_valid_flags[tcpflags]) {
                 if (LOG_INVALID(IPPROTO_TCP))
-                       nf_log_packet(PF_INET, 0, skb, NULL, NULL, 
+                       nf_log_packet(PF_INET, 0, skb, NULL, NULL, NULL,
                                   "ip_ct_tcp: invalid TCP flag combination ");
                 return -NF_ACCEPT;
         }
@@ -880,8 +897,9 @@ static int tcp_packet(struct ip_conntrack *conntrack,
                          */
                         write_unlock_bh(&tcp_lock);
                         if (LOG_INVALID(IPPROTO_TCP))
-                               nf_log_packet(PF_INET, 0, skb, NULL, NULL, 
-                                         "ip_ct_tcp: killing out of sync session ");
+                               nf_log_packet(PF_INET, 0, skb, NULL, NULL,
+                                             NULL, "ip_ct_tcp: "
+                                             "killing out of sync session ");
                         if (del_timer(&conntrack->timeout))
                                 conntrack->timeout.function((unsigned long)
                                                             conntrack);
@@ -895,7 +913,7 @@ static int tcp_packet(struct ip_conntrack *conntrack,
                 
                 write_unlock_bh(&tcp_lock);
                 if (LOG_INVALID(IPPROTO_TCP))
-                       nf_log_packet(PF_INET, 0, skb, NULL, NULL, 
+                       nf_log_packet(PF_INET, 0, skb, NULL, NULL, NULL,
                                   "ip_ct_tcp: invalid packet ignored ");
                 return NF_ACCEPT;
         case TCP_CONNTRACK_MAX:
@@ -905,7 +923,7 @@ static int tcp_packet(struct ip_conntrack *conntrack,
                        old_state);
                 write_unlock_bh(&tcp_lock);
                 if (LOG_INVALID(IPPROTO_TCP))
-                       nf_log_packet(PF_INET, 0, skb, NULL, NULL, 
+                       nf_log_packet(PF_INET, 0, skb, NULL, NULL, NULL,
                                   "ip_ct_tcp: invalid state ");
                 return -NF_ACCEPT;
         case TCP_CONNTRACK_SYN_SENT:
@@ -926,7 +944,7 @@ static int tcp_packet(struct ip_conntrack *conntrack,
                         write_unlock_bh(&tcp_lock);
                         if (LOG_INVALID(IPPROTO_TCP))
                                 nf_log_packet(PF_INET, 0, skb, NULL, NULL,
-                                             "ip_ct_tcp: invalid SYN");
+                                             NULL, "ip_ct_tcp: invalid SYN");
                         return -NF_ACCEPT;
                 }
         case TCP_CONNTRACK_CLOSE:
@@ -973,6 +991,10 @@ static int tcp_packet(struct ip_conntrack *conntrack,
                   ? ip_ct_tcp_timeout_max_retrans : *tcp_timeouts[new_state];
         write_unlock_bh(&tcp_lock);
  
+       ip_conntrack_event_cache(IPCT_PROTOINFO_VOLATILE, skb);
+       if (new_state != old_state)
+               ip_conntrack_event_cache(IPCT_PROTOINFO, skb);
+
         if (!test_bit(IPS_SEEN_REPLY_BIT, &conntrack->status)) {
                 /* If only reply is a RST, we can consider ourselves not to
                    have an established connection: this is a fairly common
@@ -1096,4 +1118,10 @@ struct ip_conntrack_protocol ip_conntrack_protocol_tcp =
         .packet                 = tcp_packet,
         .new                    = tcp_new,
         .error                  = tcp_error,
+#if defined(CONFIG_IP_NF_CONNTRACK_NETLINK) || \
+    defined(CONFIG_IP_NF_CONNTRACK_NETLINK_MODULE)
+       .to_nfattr              = tcp_to_nfattr,
+       .tuple_to_nfattr        = ip_ct_port_tuple_to_nfattr,
+       .nfattr_to_tuple        = ip_ct_port_nfattr_to_tuple,
+#endif
  };
diff --git a/net/ipv4/netfilter/ip_conntrack_proto_udp.c b/net/ipv4/netfilter/ip_conntrack_proto_udp.c

index 8c1eaba098d4cbf6829c8bf79eb3a0ef29a01b14..f2dcac7c76607830e28c092494238b9ea548e2ae 100644 (file)
--- a/net/ipv4/netfilter/ip_conntrack_proto_udp.c
+++ b/net/ipv4/netfilter/ip_conntrack_proto_udp.c
@@ -73,7 +73,8 @@ static int udp_packet(struct ip_conntrack *conntrack,
                 ip_ct_refresh_acct(conntrack, ctinfo, skb, 
                                    ip_ct_udp_timeout_stream);
                 /* Also, more likely to be important, and not a probe */
-               set_bit(IPS_ASSURED_BIT, &conntrack->status);
+               if (!test_and_set_bit(IPS_ASSURED_BIT, &conntrack->status))
+                       ip_conntrack_event_cache(IPCT_STATUS, skb);
         } else
                 ip_ct_refresh_acct(conntrack, ctinfo, skb, ip_ct_udp_timeout);
  
@@ -97,7 +98,7 @@ static int udp_error(struct sk_buff *skb, enum ip_conntrack_info *ctinfo,
         hdr = skb_header_pointer(skb, iph->ihl*4, sizeof(_hdr), &_hdr);
         if (hdr == NULL) {
                 if (LOG_INVALID(IPPROTO_UDP))
-                       nf_log_packet(PF_INET, 0, skb, NULL, NULL, 
+                       nf_log_packet(PF_INET, 0, skb, NULL, NULL, NULL,
                                   "ip_ct_udp: short packet ");
                 return -NF_ACCEPT;
         }
@@ -105,7 +106,7 @@ static int udp_error(struct sk_buff *skb, enum ip_conntrack_info *ctinfo,
         /* Truncated/malformed packets */
         if (ntohs(hdr->len) > udplen || ntohs(hdr->len) < sizeof(*hdr)) {
                 if (LOG_INVALID(IPPROTO_UDP))
-                       nf_log_packet(PF_INET, 0, skb, NULL, NULL, 
+                       nf_log_packet(PF_INET, 0, skb, NULL, NULL, NULL,
                                   "ip_ct_udp: truncated/malformed packet ");
                 return -NF_ACCEPT;
         }
@@ -125,7 +126,7 @@ static int udp_error(struct sk_buff *skb, enum ip_conntrack_info *ctinfo,
                                  skb->ip_summed == CHECKSUM_HW ? skb->csum
                                  : skb_checksum(skb, iph->ihl*4, udplen, 0))) {
                 if (LOG_INVALID(IPPROTO_UDP))
-                       nf_log_packet(PF_INET, 0, skb, NULL, NULL, 
+                       nf_log_packet(PF_INET, 0, skb, NULL, NULL, NULL,
                                   "ip_ct_udp: bad UDP checksum ");
                 return -NF_ACCEPT;
         }
@@ -144,4 +145,9 @@ struct ip_conntrack_protocol ip_conntrack_protocol_udp =
         .packet                 = udp_packet,
         .new                    = udp_new,
         .error                  = udp_error,
+#if defined(CONFIG_IP_NF_CONNTRACK_NETLINK) || \
+    defined(CONFIG_IP_NF_CONNTRACK_NETLINK_MODULE)
+       .tuple_to_nfattr        = ip_ct_port_tuple_to_nfattr,
+       .nfattr_to_tuple        = ip_ct_port_nfattr_to_tuple,
+#endif
  };
diff --git a/net/ipv4/netfilter/ip_conntrack_standalone.c b/net/ipv4/netfilter/ip_conntrack_standalone.c

index 61798c46e91d06e051947db664c301a45b5b6b74..ee5895afd0c3e8a0bfb932c56bc623f16de0cd05 100644 (file)
--- a/net/ipv4/netfilter/ip_conntrack_standalone.c
+++ b/net/ipv4/netfilter/ip_conntrack_standalone.c
@@ -5,7 +5,7 @@
  */
  
  /* (C) 1999-2001 Paul `Rusty' Russell
- * (C) 2002-2004 Netfilter Core Team <coreteam@netfilter.org>
+ * (C) 2002-2005 Netfilter Core Team <coreteam@netfilter.org>
   *
   * This program is free software; you can redistribute it and/or modify
   * it under the terms of the GNU General Public License version 2 as
@@ -147,8 +147,7 @@ static int ct_seq_show(struct seq_file *s, void *v)
         if (DIRECTION(hash))
                 return 0;
  
-       proto = ip_ct_find_proto(conntrack->tuplehash[IP_CT_DIR_ORIGINAL]
-                              .tuple.dst.protonum);
+       proto = __ip_conntrack_proto_find(conntrack->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.protonum);
         IP_NF_ASSERT(proto);
  
         if (seq_printf(s, "%-8s %u %ld ",
@@ -185,7 +184,7 @@ static int ct_seq_show(struct seq_file *s, void *v)
                         return -ENOSPC;
  
  #if defined(CONFIG_IP_NF_CONNTRACK_MARK)
-       if (seq_printf(s, "mark=%lu ", conntrack->mark))
+       if (seq_printf(s, "mark=%u ", conntrack->mark))
                 return -ENOSPC;
  #endif
  
@@ -283,7 +282,7 @@ static int exp_seq_show(struct seq_file *s, void *v)
         seq_printf(s, "proto=%u ", expect->tuple.dst.protonum);
  
         print_tuple(s, &expect->tuple,
-                   ip_ct_find_proto(expect->tuple.dst.protonum));
+                   __ip_conntrack_proto_find(expect->tuple.dst.protonum));
         return seq_putc(s, '\n');
  }
  
@@ -889,6 +888,7 @@ static int init_or_cleanup(int init)
         return ret;
  
   cleanup:
+       synchronize_net();
  #ifdef CONFIG_SYSCTL
         unregister_sysctl_table(ip_ct_sysctl_header);
   cleanup_localinops:
@@ -971,6 +971,14 @@ void need_ip_conntrack(void)
  {
  }
  
+#ifdef CONFIG_IP_NF_CONNTRACK_EVENTS
+EXPORT_SYMBOL_GPL(ip_conntrack_chain);
+EXPORT_SYMBOL_GPL(ip_conntrack_expect_chain);
+EXPORT_SYMBOL_GPL(ip_conntrack_register_notifier);
+EXPORT_SYMBOL_GPL(ip_conntrack_unregister_notifier);
+EXPORT_SYMBOL_GPL(__ip_ct_event_cache_init);
+EXPORT_PER_CPU_SYMBOL_GPL(ip_conntrack_ecache);
+#endif
  EXPORT_SYMBOL(ip_conntrack_protocol_register);
  EXPORT_SYMBOL(ip_conntrack_protocol_unregister);
  EXPORT_SYMBOL(ip_ct_get_tuple);
@@ -982,12 +990,16 @@ EXPORT_SYMBOL(ip_conntrack_helper_register);
  EXPORT_SYMBOL(ip_conntrack_helper_unregister);
  EXPORT_SYMBOL(ip_ct_iterate_cleanup);
  EXPORT_SYMBOL(ip_ct_refresh_acct);
-EXPORT_SYMBOL(ip_ct_protos);
-EXPORT_SYMBOL(ip_ct_find_proto);
+
  EXPORT_SYMBOL(ip_conntrack_expect_alloc);
  EXPORT_SYMBOL(ip_conntrack_expect_put);
+EXPORT_SYMBOL_GPL(ip_conntrack_expect_find_get);
  EXPORT_SYMBOL(ip_conntrack_expect_related);
  EXPORT_SYMBOL(ip_conntrack_unexpect_related);
+EXPORT_SYMBOL_GPL(ip_conntrack_expect_list);
+EXPORT_SYMBOL_GPL(__ip_conntrack_expect_find);
+EXPORT_SYMBOL_GPL(__ip_ct_expect_unlink_destroy);
+
  EXPORT_SYMBOL(ip_conntrack_tuple_taken);
  EXPORT_SYMBOL(ip_ct_gather_frags);
  EXPORT_SYMBOL(ip_conntrack_htable_size);
@@ -995,7 +1007,28 @@ EXPORT_SYMBOL(ip_conntrack_lock);
  EXPORT_SYMBOL(ip_conntrack_hash);
  EXPORT_SYMBOL(ip_conntrack_untracked);
  EXPORT_SYMBOL_GPL(ip_conntrack_find_get);
-EXPORT_SYMBOL_GPL(ip_conntrack_put);
  #ifdef CONFIG_IP_NF_NAT_NEEDED
  EXPORT_SYMBOL(ip_conntrack_tcp_update);
  #endif
+
+EXPORT_SYMBOL_GPL(ip_conntrack_flush);
+EXPORT_SYMBOL_GPL(__ip_conntrack_find);
+
+EXPORT_SYMBOL_GPL(ip_conntrack_alloc);
+EXPORT_SYMBOL_GPL(ip_conntrack_free);
+EXPORT_SYMBOL_GPL(ip_conntrack_hash_insert);
+
+EXPORT_SYMBOL_GPL(ip_ct_remove_expectations);
+
+EXPORT_SYMBOL_GPL(ip_conntrack_helper_find_get);
+EXPORT_SYMBOL_GPL(ip_conntrack_helper_put);
+EXPORT_SYMBOL_GPL(__ip_conntrack_helper_find_byname);
+
+EXPORT_SYMBOL_GPL(ip_conntrack_proto_find_get);
+EXPORT_SYMBOL_GPL(ip_conntrack_proto_put);
+EXPORT_SYMBOL_GPL(__ip_conntrack_proto_find);
+#if defined(CONFIG_IP_NF_CONNTRACK_NETLINK) || \
+    defined(CONFIG_IP_NF_CONNTRACK_NETLINK_MODULE)
+EXPORT_SYMBOL_GPL(ip_ct_port_tuple_to_nfattr);
+EXPORT_SYMBOL_GPL(ip_ct_port_nfattr_to_tuple);
+#endif
diff --git a/net/ipv4/netfilter/ip_nat_core.c b/net/ipv4/netfilter/ip_nat_core.c

index 739b6dde1c826e3e77ef246ba240234a3b31c3f6..1adedb743f609f8b349a545184040d54512f639d 100644 (file)
--- a/net/ipv4/netfilter/ip_nat_core.c
+++ b/net/ipv4/netfilter/ip_nat_core.c
@@ -47,8 +47,39 @@ DEFINE_RWLOCK(ip_nat_lock);
  static unsigned int ip_nat_htable_size;
  
  static struct list_head *bysource;
+
+#define MAX_IP_NAT_PROTO 256
  struct ip_nat_protocol *ip_nat_protos[MAX_IP_NAT_PROTO];
  
+static inline struct ip_nat_protocol *
+__ip_nat_proto_find(u_int8_t protonum)
+{
+       return ip_nat_protos[protonum];
+}
+
+struct ip_nat_protocol *
+ip_nat_proto_find_get(u_int8_t protonum)
+{
+       struct ip_nat_protocol *p;
+
+       /* we need to disable preemption to make sure 'p' doesn't get
+        * removed until we've grabbed the reference */
+       preempt_disable();
+       p = __ip_nat_proto_find(protonum);
+       if (p) {
+               if (!try_module_get(p->me))
+                       p = &ip_nat_unknown_protocol;
+       }
+       preempt_enable();
+
+       return p;
+}
+
+void
+ip_nat_proto_put(struct ip_nat_protocol *p)
+{
+       module_put(p->me);
+}
  
  /* We keep an extra hash for each conntrack, for fast searching. */
  static inline unsigned int
@@ -103,7 +134,8 @@ static int
  in_range(const struct ip_conntrack_tuple *tuple,
          const struct ip_nat_range *range)
  {
-       struct ip_nat_protocol *proto = ip_nat_find_proto(tuple->dst.protonum);
+       struct ip_nat_protocol *proto = 
+                               __ip_nat_proto_find(tuple->dst.protonum);
  
         /* If we are supposed to map IPs, then we must be in the
            range specified, otherwise let this drag us onto a new src IP. */
@@ -216,8 +248,7 @@ get_unique_tuple(struct ip_conntrack_tuple *tuple,
                  struct ip_conntrack *conntrack,
                  enum ip_nat_manip_type maniptype)
  {
-       struct ip_nat_protocol *proto
-               = ip_nat_find_proto(orig_tuple->dst.protonum);
+       struct ip_nat_protocol *proto;
  
         /* 1) If this srcip/proto/src-proto-part is currently mapped,
            and that same mapping gives a unique tuple within the given
@@ -242,14 +273,20 @@ get_unique_tuple(struct ip_conntrack_tuple *tuple,
         /* 3) The per-protocol part of the manip is made to map into
            the range to make a unique tuple. */
  
+       proto = ip_nat_proto_find_get(orig_tuple->dst.protonum);
+
         /* Only bother mapping if it's not already in range and unique */
         if ((!(range->flags & IP_NAT_RANGE_PROTO_SPECIFIED)
              || proto->in_range(tuple, maniptype, &range->min, &range->max))
-           && !ip_nat_used_tuple(tuple, conntrack))
+           && !ip_nat_used_tuple(tuple, conntrack)) {
+               ip_nat_proto_put(proto);
                 return;
+       }
  
         /* Last change: get protocol to try to obtain unique tuple. */
         proto->unique_tuple(tuple, range, maniptype, conntrack);
+
+       ip_nat_proto_put(proto);
  }
  
  unsigned int
@@ -320,17 +357,20 @@ manip_pkt(u_int16_t proto,
           enum ip_nat_manip_type maniptype)
  {
         struct iphdr *iph;
+       struct ip_nat_protocol *p;
  
-       (*pskb)->nfcache |= NFC_ALTERED;
-       if (!skb_ip_make_writable(pskb, iphdroff + sizeof(*iph)))
+       if (!skb_make_writable(pskb, iphdroff + sizeof(*iph)))
                 return 0;
  
         iph = (void *)(*pskb)->data + iphdroff;
  
         /* Manipulate protcol part. */
-       if (!ip_nat_find_proto(proto)->manip_pkt(pskb, iphdroff,
-                                                target, maniptype))
+       p = ip_nat_proto_find_get(proto);
+       if (!p->manip_pkt(pskb, iphdroff, target, maniptype)) {
+               ip_nat_proto_put(p);
                 return 0;
+       }
+       ip_nat_proto_put(p);
  
         iph = (void *)(*pskb)->data + iphdroff;
  
@@ -391,7 +431,7 @@ int icmp_reply_translation(struct sk_buff **pskb,
         struct ip_conntrack_tuple inner, target;
         int hdrlen = (*pskb)->nh.iph->ihl * 4;
  
-       if (!skb_ip_make_writable(pskb, hdrlen + sizeof(*inside)))
+       if (!skb_make_writable(pskb, hdrlen + sizeof(*inside)))
                 return 0;
  
         inside = (void *)(*pskb)->data + (*pskb)->nh.iph->ihl*4;
@@ -426,7 +466,8 @@ int icmp_reply_translation(struct sk_buff **pskb,
  
         if (!ip_ct_get_tuple(&inside->ip, *pskb, (*pskb)->nh.iph->ihl*4 +
                              sizeof(struct icmphdr) + inside->ip.ihl*4,
-                            &inner, ip_ct_find_proto(inside->ip.protocol)))
+                            &inner,
+                            __ip_conntrack_proto_find(inside->ip.protocol)))
                 return 0;
  
         /* Change inner back to look like incoming packet.  We do the
@@ -496,6 +537,49 @@ void ip_nat_protocol_unregister(struct ip_nat_protocol *proto)
         synchronize_net();
  }
  
+#if defined(CONFIG_IP_NF_CONNTRACK_NETLINK) || \
+    defined(CONFIG_IP_NF_CONNTRACK_NETLINK_MODULE)
+int
+ip_nat_port_range_to_nfattr(struct sk_buff *skb, 
+                           const struct ip_nat_range *range)
+{
+       NFA_PUT(skb, CTA_PROTONAT_PORT_MIN, sizeof(u_int16_t),
+               &range->min.tcp.port);
+       NFA_PUT(skb, CTA_PROTONAT_PORT_MAX, sizeof(u_int16_t),
+               &range->max.tcp.port);
+
+       return 0;
+
+nfattr_failure:
+       return -1;
+}
+
+int
+ip_nat_port_nfattr_to_range(struct nfattr *tb[], struct ip_nat_range *range)
+{
+       int ret = 0;
+       
+       /* we have to return whether we actually parsed something or not */
+
+       if (tb[CTA_PROTONAT_PORT_MIN-1]) {
+               ret = 1;
+               range->min.tcp.port = 
+                       *(u_int16_t *)NFA_DATA(tb[CTA_PROTONAT_PORT_MIN-1]);
+       }
+       
+       if (!tb[CTA_PROTONAT_PORT_MAX-1]) {
+               if (ret) 
+                       range->max.tcp.port = range->min.tcp.port;
+       } else {
+               ret = 1;
+               range->max.tcp.port = 
+                       *(u_int16_t *)NFA_DATA(tb[CTA_PROTONAT_PORT_MAX-1]);
+       }
+
+       return ret;
+}
+#endif
+
  int __init ip_nat_init(void)
  {
         size_t i;
diff --git a/net/ipv4/netfilter/ip_nat_helper.c b/net/ipv4/netfilter/ip_nat_helper.c

index 158f34f32c043e789bb263d74bdd1a01299c8b03..d2dd5d3135563f9e17631f6ab72e8bd42bcad387 100644 (file)
--- a/net/ipv4/netfilter/ip_nat_helper.c
+++ b/net/ipv4/netfilter/ip_nat_helper.c
@@ -168,7 +168,7 @@ ip_nat_mangle_tcp_packet(struct sk_buff **pskb,
         struct tcphdr *tcph;
         int datalen;
  
-       if (!skb_ip_make_writable(pskb, (*pskb)->len))
+       if (!skb_make_writable(pskb, (*pskb)->len))
                 return 0;
  
         if (rep_len > match_len
@@ -228,7 +228,7 @@ ip_nat_mangle_udp_packet(struct sk_buff **pskb,
                                match_offset + match_len)
                 return 0;
  
-       if (!skb_ip_make_writable(pskb, (*pskb)->len))
+       if (!skb_make_writable(pskb, (*pskb)->len))
                 return 0;
  
         if (rep_len > match_len
@@ -315,7 +315,7 @@ ip_nat_sack_adjust(struct sk_buff **pskb,
         optoff = (*pskb)->nh.iph->ihl*4 + sizeof(struct tcphdr);
         optend = (*pskb)->nh.iph->ihl*4 + tcph->doff*4;
  
-       if (!skb_ip_make_writable(pskb, optend))
+       if (!skb_make_writable(pskb, optend))
                 return 0;
  
         dir = CTINFO2DIR(ctinfo);
@@ -363,7 +363,7 @@ ip_nat_seq_adjust(struct sk_buff **pskb,
         this_way = &ct->nat.info.seq[dir];
         other_way = &ct->nat.info.seq[!dir];
  
-       if (!skb_ip_make_writable(pskb, (*pskb)->nh.iph->ihl*4+sizeof(*tcph)))
+       if (!skb_make_writable(pskb, (*pskb)->nh.iph->ihl*4+sizeof(*tcph)))
                 return 0;
  
         tcph = (void *)(*pskb)->data + (*pskb)->nh.iph->ihl*4;
diff --git a/net/ipv4/netfilter/ip_nat_proto_icmp.c b/net/ipv4/netfilter/ip_nat_proto_icmp.c

index 6596c9ee1655914db4789691416d60aee56ea86c..93871904399916cc1cde540b1cd3baba5111d98d 100644 (file)
--- a/net/ipv4/netfilter/ip_nat_proto_icmp.c
+++ b/net/ipv4/netfilter/ip_nat_proto_icmp.c
@@ -62,7 +62,7 @@ icmp_manip_pkt(struct sk_buff **pskb,
         struct icmphdr *hdr;
         unsigned int hdroff = iphdroff + iph->ihl*4;
  
-       if (!skb_ip_make_writable(pskb, hdroff + sizeof(*hdr)))
+       if (!skb_make_writable(pskb, hdroff + sizeof(*hdr)))
                 return 0;
  
         hdr = (struct icmphdr *)((*pskb)->data + hdroff);
@@ -106,11 +106,18 @@ icmp_print_range(char *buffer, const struct ip_nat_range *range)
         else return 0;
  }
  
-struct ip_nat_protocol ip_nat_protocol_icmp
-= { "ICMP", IPPROTO_ICMP,
-    icmp_manip_pkt,
-    icmp_in_range,
-    icmp_unique_tuple,
-    icmp_print,
-    icmp_print_range
+struct ip_nat_protocol ip_nat_protocol_icmp = {
+       .name                   = "ICMP",
+       .protonum               = IPPROTO_ICMP,
+       .me                     = THIS_MODULE,
+       .manip_pkt              = icmp_manip_pkt,
+       .in_range               = icmp_in_range,
+       .unique_tuple           = icmp_unique_tuple,
+       .print                  = icmp_print,
+       .print_range            = icmp_print_range,
+#if defined(CONFIG_IP_NF_CONNTRACK_NETLINK) || \
+    defined(CONFIG_IP_NF_CONNTRACK_NETLINK_MODULE)
+       .range_to_nfattr        = ip_nat_port_range_to_nfattr,
+       .nfattr_to_range        = ip_nat_port_nfattr_to_range,
+#endif
  };
diff --git a/net/ipv4/netfilter/ip_nat_proto_tcp.c b/net/ipv4/netfilter/ip_nat_proto_tcp.c

index a98e36d2b3c627d66cec58ece9d2b173c98de71d..1d381bf68574ade3451930faa7c82ee9c42020f4 100644 (file)
--- a/net/ipv4/netfilter/ip_nat_proto_tcp.c
+++ b/net/ipv4/netfilter/ip_nat_proto_tcp.c
@@ -12,6 +12,7 @@
  #include <linux/ip.h>
  #include <linux/tcp.h>
  #include <linux/if.h>
+#include <linux/netfilter/nfnetlink_conntrack.h>
  #include <linux/netfilter_ipv4/ip_nat.h>
  #include <linux/netfilter_ipv4/ip_nat_rule.h>
  #include <linux/netfilter_ipv4/ip_nat_protocol.h>
@@ -102,7 +103,7 @@ tcp_manip_pkt(struct sk_buff **pskb,
         if ((*pskb)->len >= hdroff + sizeof(struct tcphdr))
                 hdrsize = sizeof(struct tcphdr);
  
-       if (!skb_ip_make_writable(pskb, hdroff + hdrsize))
+       if (!skb_make_writable(pskb, hdroff + hdrsize))
                 return 0;
  
         iph = (struct iphdr *)((*pskb)->data + iphdroff);
@@ -169,11 +170,18 @@ tcp_print_range(char *buffer, const struct ip_nat_range *range)
         else return 0;
  }
  
-struct ip_nat_protocol ip_nat_protocol_tcp
-= { "TCP", IPPROTO_TCP,
-    tcp_manip_pkt,
-    tcp_in_range,
-    tcp_unique_tuple,
-    tcp_print,
-    tcp_print_range
+struct ip_nat_protocol ip_nat_protocol_tcp = {
+       .name                   = "TCP",
+       .protonum               = IPPROTO_TCP,
+       .me                     = THIS_MODULE,
+       .manip_pkt              = tcp_manip_pkt,
+       .in_range               = tcp_in_range,
+       .unique_tuple           = tcp_unique_tuple,
+       .print                  = tcp_print,
+       .print_range            = tcp_print_range,
+#if defined(CONFIG_IP_NF_CONNTRACK_NETLINK) || \
+    defined(CONFIG_IP_NF_CONNTRACK_NETLINK_MODULE)
+       .range_to_nfattr        = ip_nat_port_range_to_nfattr,
+       .nfattr_to_range        = ip_nat_port_nfattr_to_range,
+#endif
  };
diff --git a/net/ipv4/netfilter/ip_nat_proto_udp.c b/net/ipv4/netfilter/ip_nat_proto_udp.c

index 9f66e56256644e0756e82f73e70a412883de23b1..c4906e1aa24a01027db952b38bccc2bea57f202e 100644 (file)
--- a/net/ipv4/netfilter/ip_nat_proto_udp.c
+++ b/net/ipv4/netfilter/ip_nat_proto_udp.c
@@ -94,7 +94,7 @@ udp_manip_pkt(struct sk_buff **pskb,
         u32 oldip, newip;
         u16 *portptr, newport;
  
-       if (!skb_ip_make_writable(pskb, hdroff + sizeof(*hdr)))
+       if (!skb_make_writable(pskb, hdroff + sizeof(*hdr)))
                 return 0;
  
         iph = (struct iphdr *)((*pskb)->data + iphdroff);
@@ -156,11 +156,18 @@ udp_print_range(char *buffer, const struct ip_nat_range *range)
         else return 0;
  }
  
-struct ip_nat_protocol ip_nat_protocol_udp
-= { "UDP", IPPROTO_UDP,
-    udp_manip_pkt,
-    udp_in_range,
-    udp_unique_tuple,
-    udp_print,
-    udp_print_range
+struct ip_nat_protocol ip_nat_protocol_udp = {
+       .name                   = "UDP",
+       .protonum               = IPPROTO_UDP,
+       .me                     = THIS_MODULE,
+       .manip_pkt              = udp_manip_pkt,
+       .in_range               = udp_in_range,
+       .unique_tuple           = udp_unique_tuple,
+       .print                  = udp_print,
+       .print_range            = udp_print_range,
+#if defined(CONFIG_IP_NF_CONNTRACK_NETLINK) || \
+    defined(CONFIG_IP_NF_CONNTRACK_NETLINK_MODULE)
+       .range_to_nfattr        = ip_nat_port_range_to_nfattr,
+       .nfattr_to_range        = ip_nat_port_nfattr_to_range,
+#endif
  };
diff --git a/net/ipv4/netfilter/ip_nat_proto_unknown.c b/net/ipv4/netfilter/ip_nat_proto_unknown.c

index f5525bd58d16f2e8acb6dd7c167a130ad8f57dfe..99bbef56f84e9ff346b856760e4db18319d3d78f 100644 (file)
--- a/net/ipv4/netfilter/ip_nat_proto_unknown.c
+++ b/net/ipv4/netfilter/ip_nat_proto_unknown.c
@@ -61,10 +61,11 @@ unknown_print_range(char *buffer, const struct ip_nat_range *range)
  }
  
  struct ip_nat_protocol ip_nat_unknown_protocol = {
-       "unknown", 0,
-       unknown_manip_pkt,
-       unknown_in_range,
-       unknown_unique_tuple,
-       unknown_print,
-       unknown_print_range
+       .name                   = "unknown",
+       .me                     = THIS_MODULE,
+       .manip_pkt              = unknown_manip_pkt,
+       .in_range               = unknown_in_range,
+       .unique_tuple           = unknown_unique_tuple,
+       .print                  = unknown_print,
+       .print_range            = unknown_print_range
  };
diff --git a/net/ipv4/netfilter/ip_nat_snmp_basic.c b/net/ipv4/netfilter/ip_nat_snmp_basic.c

index 2a48b6e635aef35193fb3b70babe78d17c03519d..93b2c5111bb2338b15f765ed604866d4a860310b 100644 (file)
--- a/net/ipv4/netfilter/ip_nat_snmp_basic.c
+++ b/net/ipv4/netfilter/ip_nat_snmp_basic.c
@@ -1275,7 +1275,7 @@ static int help(struct sk_buff **pskb,
                  return NF_DROP;
         }
  
-       if (!skb_ip_make_writable(pskb, (*pskb)->len))
+       if (!skb_make_writable(pskb, (*pskb)->len))
                 return NF_DROP;
  
         spin_lock_bh(&snmp_lock);
diff --git a/net/ipv4/netfilter/ip_nat_standalone.c b/net/ipv4/netfilter/ip_nat_standalone.c

index bc59d0d6e89ef5bf16512ea6cc0a5d246d6a4f2d..89db052add81e4a61def654f00cc4bcc9d92f889 100644 (file)
--- a/net/ipv4/netfilter/ip_nat_standalone.c
+++ b/net/ipv4/netfilter/ip_nat_standalone.c
@@ -73,8 +73,6 @@ ip_nat_fn(unsigned int hooknum,
         IP_NF_ASSERT(!((*pskb)->nh.iph->frag_off
                        & htons(IP_MF|IP_OFFSET)));
  
-       (*pskb)->nfcache |= NFC_UNKNOWN;
-
         /* If we had a hardware checksum before, it's now invalid */
         if ((*pskb)->ip_summed == CHECKSUM_HW)
                 if (skb_checksum_help(*pskb, (out == NULL)))
@@ -102,6 +100,10 @@ ip_nat_fn(unsigned int hooknum,
                 return NF_ACCEPT;
         }
  
+       /* Don't try to NAT if this packet is not conntracked */
+       if (ct == &ip_conntrack_untracked)
+               return NF_ACCEPT;
+
         switch (ctinfo) {
         case IP_CT_RELATED:
         case IP_CT_RELATED+IP_CT_IS_REPLY:
@@ -392,6 +394,8 @@ module_exit(fini);
  EXPORT_SYMBOL(ip_nat_setup_info);
  EXPORT_SYMBOL(ip_nat_protocol_register);
  EXPORT_SYMBOL(ip_nat_protocol_unregister);
+EXPORT_SYMBOL_GPL(ip_nat_proto_find_get);
+EXPORT_SYMBOL_GPL(ip_nat_proto_put);
  EXPORT_SYMBOL(ip_nat_cheat_check);
  EXPORT_SYMBOL(ip_nat_mangle_tcp_packet);
  EXPORT_SYMBOL(ip_nat_mangle_udp_packet);
diff --git a/net/ipv4/netfilter/ip_queue.c b/net/ipv4/netfilter/ip_queue.c

index eda1fba431a415cef7f9533c8fdbcaca03f4f4a1..d54f14d926f6e0caafc80a68cc8e84d60d1b86d7 100644 (file)
--- a/net/ipv4/netfilter/ip_queue.c
+++ b/net/ipv4/netfilter/ip_queue.c
@@ -43,17 +43,10 @@
  #define NET_IPQ_QMAX 2088
  #define NET_IPQ_QMAX_NAME "ip_queue_maxlen"
  
-struct ipq_rt_info {
-       __u8 tos;
-       __u32 daddr;
-       __u32 saddr;
-};
-
  struct ipq_queue_entry {
         struct list_head list;
         struct nf_info *info;
         struct sk_buff *skb;
-       struct ipq_rt_info rt_info;
  };
  
  typedef int (*ipq_cmpfn)(struct ipq_queue_entry *, unsigned long);
@@ -214,6 +207,12 @@ ipq_build_packet_message(struct ipq_queue_entry *entry, int *errp)
                 break;
         
         case IPQ_COPY_PACKET:
+               if (entry->skb->ip_summed == CHECKSUM_HW &&
+                   (*errp = skb_checksum_help(entry->skb,
+                                              entry->info->outdev == NULL))) {
+                       read_unlock_bh(&queue_lock);
+                       return NULL;
+               }
                 if (copy_range == 0 || copy_range > entry->skb->len)
                         data_len = entry->skb->len;
                 else
@@ -241,8 +240,8 @@ ipq_build_packet_message(struct ipq_queue_entry *entry, int *errp)
  
         pmsg->packet_id       = (unsigned long )entry;
         pmsg->data_len        = data_len;
-       pmsg->timestamp_sec   = entry->skb->stamp.tv_sec;
-       pmsg->timestamp_usec  = entry->skb->stamp.tv_usec;
+       pmsg->timestamp_sec   = skb_tv_base.tv_sec + entry->skb->tstamp.off_sec;
+       pmsg->timestamp_usec  = skb_tv_base.tv_usec + entry->skb->tstamp.off_usec;
         pmsg->mark            = entry->skb->nfmark;
         pmsg->hook            = entry->info->hook;
         pmsg->hw_protocol     = entry->skb->protocol;
@@ -281,7 +280,8 @@ nlmsg_failure:
  }
  
  static int
-ipq_enqueue_packet(struct sk_buff *skb, struct nf_info *info, void *data)
+ipq_enqueue_packet(struct sk_buff *skb, struct nf_info *info,
+                  unsigned int queuenum, void *data)
  {
         int status = -EINVAL;
         struct sk_buff *nskb;
@@ -299,14 +299,6 @@ ipq_enqueue_packet(struct sk_buff *skb, struct nf_info *info, void *data)
         entry->info = info;
         entry->skb = skb;
  
-       if (entry->info->hook == NF_IP_LOCAL_OUT) {
-               struct iphdr *iph = skb->nh.iph;
-
-               entry->rt_info.tos = iph->tos;
-               entry->rt_info.daddr = iph->daddr;
-               entry->rt_info.saddr = iph->saddr;
-       }
-
         nskb = ipq_build_packet_message(entry, &status);
         if (nskb == NULL)
                 goto err_out_free;
@@ -382,23 +374,11 @@ ipq_mangle_ipv4(ipq_verdict_msg_t *v, struct ipq_queue_entry *e)
                 }
                 skb_put(e->skb, diff);
         }
-       if (!skb_ip_make_writable(&e->skb, v->data_len))
+       if (!skb_make_writable(&e->skb, v->data_len))
                 return -ENOMEM;
         memcpy(e->skb->data, v->payload, v->data_len);
-       e->skb->nfcache |= NFC_ALTERED;
-
-       /*
-        * Extra routing may needed on local out, as the QUEUE target never
-        * returns control to the table.
-        */
-       if (e->info->hook == NF_IP_LOCAL_OUT) {
-               struct iphdr *iph = e->skb->nh.iph;
-
-               if (!(iph->tos == e->rt_info.tos
-                     && iph->daddr == e->rt_info.daddr
-                     && iph->saddr == e->rt_info.saddr))
-                       return ip_route_me_harder(&e->skb);
-       }
+       e->skb->ip_summed = CHECKSUM_NONE;
+
         return 0;
  }
  
@@ -676,6 +656,11 @@ ipq_get_info(char *buffer, char **start, off_t offset, int length)
  }
  #endif /* CONFIG_PROC_FS */
  
+static struct nf_queue_handler nfqh = {
+       .name   = "ip_queue",
+       .outfn  = &ipq_enqueue_packet,
+};
+
  static int
  init_or_cleanup(int init)
  {
@@ -686,7 +671,8 @@ init_or_cleanup(int init)
                 goto cleanup;
  
         netlink_register_notifier(&ipq_nl_notifier);
-       ipqnl = netlink_kernel_create(NETLINK_FIREWALL, ipq_rcv_sk);
+       ipqnl = netlink_kernel_create(NETLINK_FIREWALL, 0, ipq_rcv_sk,
+                                     THIS_MODULE);
         if (ipqnl == NULL) {
                 printk(KERN_ERR "ip_queue: failed to create netlink socket\n");
                 goto cleanup_netlink_notifier;
@@ -703,7 +689,7 @@ init_or_cleanup(int init)
         register_netdevice_notifier(&ipq_dev_notifier);
         ipq_sysctl_header = register_sysctl_table(ipq_root_table, 0);
         
-       status = nf_register_queue_handler(PF_INET, ipq_enqueue_packet, NULL);
+       status = nf_register_queue_handler(PF_INET, &nfqh);
         if (status < 0) {
                 printk(KERN_ERR "ip_queue: failed to register queue handler\n");
                 goto cleanup_sysctl;
@@ -711,7 +697,7 @@ init_or_cleanup(int init)
         return status;
  
  cleanup:
-       nf_unregister_queue_handler(PF_INET);
+       nf_unregister_queue_handlers(&nfqh);
         synchronize_net();
         ipq_flush(NF_DROP);
         
diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c

index c88dfcd38c5623792e9876810129be204e010915..eef99a1b5de6e5fd35103497e8caa13458f11820 100644 (file)
--- a/net/ipv4/netfilter/ip_tables.c
+++ b/net/ipv4/netfilter/ip_tables.c
@@ -312,7 +312,6 @@ ipt_do_table(struct sk_buff **pskb,
         do {
                 IP_NF_ASSERT(e);
                 IP_NF_ASSERT(back);
-               (*pskb)->nfcache |= e->nfcache;
                 if (ip_packet_match(ip, indev, outdev, &e->ip, offset)) {
                         struct ipt_entry_target *t;
  
@@ -341,8 +340,8 @@ ipt_do_table(struct sk_buff **pskb,
                                                          back->comefrom);
                                         continue;
                                 }
-                               if (table_base + v
-                                   != (void *)e + e->next_offset) {
+                               if (table_base + v != (void *)e + e->next_offset
+                                   && !(e->ip.flags & IPT_F_GOTO)) {
                                         /* Save old back ptr in next entry */
                                         struct ipt_entry *next
                                                 = (void *)e + e->next_offset;
diff --git a/net/ipv4/netfilter/ipt_CLASSIFY.c b/net/ipv4/netfilter/ipt_CLASSIFY.c

index 9842e6e231845c7aec4e42dafe4585f03fca4def..dab78d8bd494fd3d00fd92a1126b2ea56e0d389d 100644 (file)
--- a/net/ipv4/netfilter/ipt_CLASSIFY.c
+++ b/net/ipv4/netfilter/ipt_CLASSIFY.c
@@ -32,10 +32,8 @@ target(struct sk_buff **pskb,
  {
         const struct ipt_classify_target_info *clinfo = targinfo;
  
-       if((*pskb)->priority != clinfo->priority) {
+       if((*pskb)->priority != clinfo->priority) 
                 (*pskb)->priority = clinfo->priority;
-               (*pskb)->nfcache |= NFC_ALTERED;
-       }
  
         return IPT_CONTINUE;
  }
diff --git a/net/ipv4/netfilter/ipt_CLUSTERIP.c b/net/ipv4/netfilter/ipt_CLUSTERIP.c

index 6706d3a1bc4fbe548746412597497ddccf895dc4..2d05cafec22120ecae24351e31f4a5f17431433a 100644 (file)
--- a/net/ipv4/netfilter/ipt_CLUSTERIP.c
+++ b/net/ipv4/netfilter/ipt_CLUSTERIP.c
@@ -367,7 +367,7 @@ target(struct sk_buff **pskb,
  #ifdef DEBUG_CLUSTERP
         DUMP_TUPLE(&ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple);
  #endif
-       DEBUGP("hash=%u ct_hash=%lu ", hash, ct->mark);
+       DEBUGP("hash=%u ct_hash=%u ", hash, ct->mark);
         if (!clusterip_responsible(cipinfo->config, hash)) {
                 DEBUGP("not responsible\n");
                 return NF_DROP;
diff --git a/net/ipv4/netfilter/ipt_CONNMARK.c b/net/ipv4/netfilter/ipt_CONNMARK.c

index 30ddd3e18eb747184b80eea693778058a85c14ec..134638021339d8126885231fa7abc896ecfb2266 100644 (file)
--- a/net/ipv4/netfilter/ipt_CONNMARK.c
+++ b/net/ipv4/netfilter/ipt_CONNMARK.c
@@ -40,9 +40,9 @@ target(struct sk_buff **pskb,
         void *userinfo)
  {
         const struct ipt_connmark_target_info *markinfo = targinfo;
-       unsigned long diff;
-       unsigned long nfmark;
-       unsigned long newmark;
+       u_int32_t diff;
+       u_int32_t nfmark;
+       u_int32_t newmark;
  
         enum ip_conntrack_info ctinfo;
         struct ip_conntrack *ct = ip_conntrack_get((*pskb), &ctinfo);
@@ -61,10 +61,8 @@ target(struct sk_buff **pskb,
             case IPT_CONNMARK_RESTORE:
                 nfmark = (*pskb)->nfmark;
                 diff = (ct->mark ^ nfmark) & markinfo->mask;
-               if (diff != 0) {
+               if (diff != 0)
                     (*pskb)->nfmark = nfmark ^ diff;
-                   (*pskb)->nfcache |= NFC_ALTERED;
-               }
                 break;
             }
         }
@@ -94,6 +92,11 @@ checkentry(const char *tablename,
             }
         }
  
+       if (matchinfo->mark > 0xffffffff || matchinfo->mask > 0xffffffff) {
+               printk(KERN_WARNING "CONNMARK: Only supports 32bit mark\n");
+               return 0;
+       }
+
         return 1;
  }
  
diff --git a/net/ipv4/netfilter/ipt_DSCP.c b/net/ipv4/netfilter/ipt_DSCP.c

index 3ea4509099f907ec15fe0975e3dc9d8def7f78eb..6e319570a28caf01107dff4e825f6844aa52d992 100644 (file)
--- a/net/ipv4/netfilter/ipt_DSCP.c
+++ b/net/ipv4/netfilter/ipt_DSCP.c
@@ -39,7 +39,7 @@ target(struct sk_buff **pskb,
         if (((*pskb)->nh.iph->tos & IPT_DSCP_MASK) != sh_dscp) {
                 u_int16_t diffs[2];
  
-               if (!skb_ip_make_writable(pskb, sizeof(struct iphdr)))
+               if (!skb_make_writable(pskb, sizeof(struct iphdr)))
                         return NF_DROP;
  
                 diffs[0] = htons((*pskb)->nh.iph->tos) ^ 0xFFFF;
@@ -51,7 +51,6 @@ target(struct sk_buff **pskb,
                                                  sizeof(diffs),
                                                  (*pskb)->nh.iph->check
                                                  ^ 0xFFFF));
-               (*pskb)->nfcache |= NFC_ALTERED;
         }
         return IPT_CONTINUE;
  }
diff --git a/net/ipv4/netfilter/ipt_ECN.c b/net/ipv4/netfilter/ipt_ECN.c

index ada9911118e9a7ec2ee841db8b3f250e48bfb71c..a1319693f648c2beba63fea33070c8c79fe04fef 100644 (file)
--- a/net/ipv4/netfilter/ipt_ECN.c
+++ b/net/ipv4/netfilter/ipt_ECN.c
@@ -31,7 +31,7 @@ set_ect_ip(struct sk_buff **pskb, const struct ipt_ECN_info *einfo)
             != (einfo->ip_ect & IPT_ECN_IP_MASK)) {
                 u_int16_t diffs[2];
  
-               if (!skb_ip_make_writable(pskb, sizeof(struct iphdr)))
+               if (!skb_make_writable(pskb, sizeof(struct iphdr)))
                         return 0;
  
                 diffs[0] = htons((*pskb)->nh.iph->tos) ^ 0xFFFF;
@@ -43,7 +43,6 @@ set_ect_ip(struct sk_buff **pskb, const struct ipt_ECN_info *einfo)
                                                  sizeof(diffs),
                                                  (*pskb)->nh.iph->check
                                                  ^0xFFFF));
-               (*pskb)->nfcache |= NFC_ALTERED;
         } 
         return 1;
  }
@@ -61,16 +60,20 @@ set_ect_tcp(struct sk_buff **pskb, const struct ipt_ECN_info *einfo, int inward)
         if (!tcph)
                 return 0;
  
-       if (!(einfo->operation & IPT_ECN_OP_SET_ECE
-             || tcph->ece == einfo->proto.tcp.ece)
-           && (!(einfo->operation & IPT_ECN_OP_SET_CWR
-                 || tcph->cwr == einfo->proto.tcp.cwr)))
+       if ((!(einfo->operation & IPT_ECN_OP_SET_ECE) ||
+            tcph->ece == einfo->proto.tcp.ece) &&
+           ((!(einfo->operation & IPT_ECN_OP_SET_CWR) ||
+            tcph->cwr == einfo->proto.tcp.cwr)))
                 return 1;
  
-       if (!skb_ip_make_writable(pskb, (*pskb)->nh.iph->ihl*4+sizeof(*tcph)))
+       if (!skb_make_writable(pskb, (*pskb)->nh.iph->ihl*4+sizeof(*tcph)))
                 return 0;
         tcph = (void *)(*pskb)->nh.iph + (*pskb)->nh.iph->ihl*4;
  
+       if ((*pskb)->ip_summed == CHECKSUM_HW &&
+           skb_checksum_help(*pskb, inward))
+               return 0;
+
         diffs[0] = ((u_int16_t *)tcph)[6];
         if (einfo->operation & IPT_ECN_OP_SET_ECE)
                 tcph->ece = einfo->proto.tcp.ece;
@@ -79,14 +82,10 @@ set_ect_tcp(struct sk_buff **pskb, const struct ipt_ECN_info *einfo, int inward)
         diffs[1] = ((u_int16_t *)tcph)[6];
         diffs[0] = diffs[0] ^ 0xFFFF;
  
-       if ((*pskb)->ip_summed != CHECKSUM_HW)
+       if ((*pskb)->ip_summed != CHECKSUM_UNNECESSARY)
                 tcph->check = csum_fold(csum_partial((char *)diffs,
                                                      sizeof(diffs),
                                                      tcph->check^0xFFFF));
-       else
-               if (skb_checksum_help(*pskb, inward))
-                       return 0;
-       (*pskb)->nfcache |= NFC_ALTERED;
         return 1;
  }
  
diff --git a/net/ipv4/netfilter/ipt_LOG.c b/net/ipv4/netfilter/ipt_LOG.c

index ef08733d26da277a2fc473454e4f7b6832e6e4c3..92ed050fac69bcce35424122c8a69c5d4ac5c48f 100644 (file)
--- a/net/ipv4/netfilter/ipt_LOG.c
+++ b/net/ipv4/netfilter/ipt_LOG.c
@@ -27,10 +27,6 @@ MODULE_LICENSE("GPL");
  MODULE_AUTHOR("Netfilter Core Team <coreteam@netfilter.org>");
  MODULE_DESCRIPTION("iptables syslog logging module");
  
-static unsigned int nflog = 1;
-module_param(nflog, int, 0400);
-MODULE_PARM_DESC(nflog, "register as internal netfilter logging module");
- 
  #if 0
  #define DEBUGP printk
  #else
@@ -41,11 +37,17 @@ MODULE_PARM_DESC(nflog, "register as internal netfilter logging module");
  static DEFINE_SPINLOCK(log_lock);
  
  /* One level of recursion won't kill us */
-static void dump_packet(const struct ipt_log_info *info,
+static void dump_packet(const struct nf_loginfo *info,
                         const struct sk_buff *skb,
                         unsigned int iphoff)
  {
         struct iphdr _iph, *ih;
+       unsigned int logflags;
+
+       if (info->type == NF_LOG_TYPE_LOG)
+               logflags = info->u.log.logflags;
+       else
+               logflags = NF_LOG_MASK;
  
         ih = skb_header_pointer(skb, iphoff, sizeof(_iph), &_iph);
         if (ih == NULL) {
@@ -76,7 +78,7 @@ static void dump_packet(const struct ipt_log_info *info,
         if (ntohs(ih->frag_off) & IP_OFFSET)
                 printk("FRAG:%u ", ntohs(ih->frag_off) & IP_OFFSET);
  
-       if ((info->logflags & IPT_LOG_IPOPT)
+       if ((logflags & IPT_LOG_IPOPT)
             && ih->ihl * 4 > sizeof(struct iphdr)) {
                 unsigned char _opt[4 * 15 - sizeof(struct iphdr)], *op;
                 unsigned int i, optsize;
@@ -119,7 +121,7 @@ static void dump_packet(const struct ipt_log_info *info,
                 printk("SPT=%u DPT=%u ",
                        ntohs(th->source), ntohs(th->dest));
                 /* Max length: 30 "SEQ=4294967295 ACK=4294967295 " */
-               if (info->logflags & IPT_LOG_TCPSEQ)
+               if (logflags & IPT_LOG_TCPSEQ)
                         printk("SEQ=%u ACK=%u ",
                                ntohl(th->seq), ntohl(th->ack_seq));
                 /* Max length: 13 "WINDOW=65535 " */
@@ -146,7 +148,7 @@ static void dump_packet(const struct ipt_log_info *info,
                 /* Max length: 11 "URGP=65535 " */
                 printk("URGP=%u ", ntohs(th->urg_ptr));
  
-               if ((info->logflags & IPT_LOG_TCPOPT)
+               if ((logflags & IPT_LOG_TCPOPT)
                     && th->doff * 4 > sizeof(struct tcphdr)) {
                         unsigned char _opt[4 * 15 - sizeof(struct tcphdr)];
                         unsigned char *op;
@@ -328,7 +330,7 @@ static void dump_packet(const struct ipt_log_info *info,
         }
  
         /* Max length: 15 "UID=4294967295 " */
-       if ((info->logflags & IPT_LOG_UID) && !iphoff && skb->sk) {
+       if ((logflags & IPT_LOG_UID) && !iphoff && skb->sk) {
                 read_lock_bh(&skb->sk->sk_callback_lock);
                 if (skb->sk->sk_socket && skb->sk->sk_socket->file)
                         printk("UID=%u ", skb->sk->sk_socket->file->f_uid);
@@ -349,19 +351,31 @@ static void dump_packet(const struct ipt_log_info *info,
         /* maxlen = 230+   91  + 230 + 252 = 803 */
  }
  
+struct nf_loginfo default_loginfo = {
+       .type   = NF_LOG_TYPE_LOG,
+       .u = {
+               .log = {
+                       .level    = 0,
+                       .logflags = NF_LOG_MASK,
+               },
+       },
+};
+
  static void
-ipt_log_packet(unsigned int hooknum,
+ipt_log_packet(unsigned int pf,
+              unsigned int hooknum,
                const struct sk_buff *skb,
                const struct net_device *in,
                const struct net_device *out,
-              const struct ipt_log_info *loginfo,
-              const char *level_string,
+              const struct nf_loginfo *loginfo,
                const char *prefix)
  {
+       if (!loginfo)
+               loginfo = &default_loginfo;
+
         spin_lock_bh(&log_lock);
-       printk(level_string);
-       printk("%sIN=%s OUT=%s ",
-              prefix == NULL ? loginfo->prefix : prefix,
+       printk("<%d>%sIN=%s OUT=%s ", loginfo->u.log.level,
+              prefix,
                in ? in->name : "",
                out ? out->name : "");
  #ifdef CONFIG_BRIDGE_NETFILTER
@@ -405,28 +419,15 @@ ipt_log_target(struct sk_buff **pskb,
                void *userinfo)
  {
         const struct ipt_log_info *loginfo = targinfo;
-       char level_string[4] = "< >";
+       struct nf_loginfo li;
  
-       level_string[1] = '0' + (loginfo->level % 8);
-       ipt_log_packet(hooknum, *pskb, in, out, loginfo, level_string, NULL);
+       li.type = NF_LOG_TYPE_LOG;
+       li.u.log.level = loginfo->level;
+       li.u.log.logflags = loginfo->logflags;
  
-       return IPT_CONTINUE;
-}
+       nf_log_packet(PF_INET, hooknum, *pskb, in, out, &li, loginfo->prefix);
  
-static void
-ipt_logfn(unsigned int hooknum,
-         const struct sk_buff *skb,
-         const struct net_device *in,
-         const struct net_device *out,
-         const char *prefix)
-{
-       struct ipt_log_info loginfo = { 
-               .level = 0, 
-               .logflags = IPT_LOG_MASK, 
-               .prefix = "" 
-       };
-
-       ipt_log_packet(hooknum, skb, in, out, &loginfo, KERN_WARNING, prefix);
+       return IPT_CONTINUE;
  }
  
  static int ipt_log_checkentry(const char *tablename,
@@ -464,20 +465,29 @@ static struct ipt_target ipt_log_reg = {
         .me             = THIS_MODULE,
  };
  
+static struct nf_logger ipt_log_logger ={
+       .name           = "ipt_LOG",
+       .logfn          = &ipt_log_packet,
+       .me             = THIS_MODULE,
+};
+
  static int __init init(void)
  {
         if (ipt_register_target(&ipt_log_reg))
                 return -EINVAL;
-       if (nflog)
-               nf_log_register(PF_INET, &ipt_logfn);
+       if (nf_log_register(PF_INET, &ipt_log_logger) < 0) {
+               printk(KERN_WARNING "ipt_LOG: not logging via system console "
+                      "since somebody else already registered for PF_INET\n");
+               /* we cannot make module load fail here, since otherwise
+                * iptables userspace would abort */
+       }
         
         return 0;
  }
  
  static void __exit fini(void)
  {
-       if (nflog)
-               nf_log_unregister(PF_INET, &ipt_logfn);
+       nf_log_unregister_logger(&ipt_log_logger);
         ipt_unregister_target(&ipt_log_reg);
  }
  
diff --git a/net/ipv4/netfilter/ipt_MARK.c b/net/ipv4/netfilter/ipt_MARK.c

index 33c6f9b63b8d7af0d0fa36c74158b0fc633f2e37..52b4f2c296bf81a3772c25a122c8ca7a12f49bb0 100644 (file)
--- a/net/ipv4/netfilter/ipt_MARK.c
+++ b/net/ipv4/netfilter/ipt_MARK.c
@@ -29,10 +29,9 @@ target_v0(struct sk_buff **pskb,
  {
         const struct ipt_mark_target_info *markinfo = targinfo;
  
-       if((*pskb)->nfmark != markinfo->mark) {
+       if((*pskb)->nfmark != markinfo->mark)
                 (*pskb)->nfmark = markinfo->mark;
-               (*pskb)->nfcache |= NFC_ALTERED;
-       }
+
         return IPT_CONTINUE;
  }
  
@@ -61,10 +60,9 @@ target_v1(struct sk_buff **pskb,
                 break;
         }
  
-       if((*pskb)->nfmark != mark) {
+       if((*pskb)->nfmark != mark)
                 (*pskb)->nfmark = mark;
-               (*pskb)->nfcache |= NFC_ALTERED;
-       }
+
         return IPT_CONTINUE;
  }
  
@@ -76,6 +74,8 @@ checkentry_v0(const char *tablename,
               unsigned int targinfosize,
               unsigned int hook_mask)
  {
+       struct ipt_mark_target_info *markinfo = targinfo;
+
         if (targinfosize != IPT_ALIGN(sizeof(struct ipt_mark_target_info))) {
                 printk(KERN_WARNING "MARK: targinfosize %u != %Zu\n",
                        targinfosize,
@@ -88,6 +88,11 @@ checkentry_v0(const char *tablename,
                 return 0;
         }
  
+       if (markinfo->mark > 0xffffffff) {
+               printk(KERN_WARNING "MARK: Only supports 32bit wide mark\n");
+               return 0;
+       }
+
         return 1;
  }
  
@@ -120,6 +125,11 @@ checkentry_v1(const char *tablename,
                 return 0;
         }
  
+       if (markinfo->mark > 0xffffffff) {
+               printk(KERN_WARNING "MARK: Only supports 32bit wide mark\n");
+               return 0;
+       }
+
         return 1;
  }
  
diff --git a/net/ipv4/netfilter/ipt_MASQUERADE.c b/net/ipv4/netfilter/ipt_MASQUERADE.c

index 91e74502c3d36ae652eca8041cc8dc66690eaf44..2f3e181c8e97ad2455b0d488d8388b007c1b0a95 100644 (file)
--- a/net/ipv4/netfilter/ipt_MASQUERADE.c
+++ b/net/ipv4/netfilter/ipt_MASQUERADE.c
@@ -86,11 +86,6 @@ masquerade_target(struct sk_buff **pskb,
  
         IP_NF_ASSERT(hooknum == NF_IP_POST_ROUTING);
  
-       /* FIXME: For the moment, don't do local packets, breaks
-          testsuite for 2.3.49 --RR */
-       if ((*pskb)->sk)
-               return NF_ACCEPT;
-
         ct = ip_conntrack_get(*pskb, &ctinfo);
         IP_NF_ASSERT(ct && (ctinfo == IP_CT_NEW || ctinfo == IP_CT_RELATED
                             || ctinfo == IP_CT_RELATED + IP_CT_IS_REPLY));
diff --git a/net/ipv4/netfilter/ipt_NETMAP.c b/net/ipv4/netfilter/ipt_NETMAP.c

index 06254b29d034fab98e9b16ff6444ec71a0e8640a..e6e7b6095363db60b760b93196c5d289a3796da0 100644 (file)
--- a/net/ipv4/netfilter/ipt_NETMAP.c
+++ b/net/ipv4/netfilter/ipt_NETMAP.c
@@ -46,7 +46,8 @@ check(const char *tablename,
                 DEBUGP(MODULENAME":check: size %u.\n", targinfosize);
                 return 0;
         }
-       if (hook_mask & ~((1 << NF_IP_PRE_ROUTING) | (1 << NF_IP_POST_ROUTING))) {
+       if (hook_mask & ~((1 << NF_IP_PRE_ROUTING) | (1 << NF_IP_POST_ROUTING) |
+                         (1 << NF_IP_LOCAL_OUT))) {
                 DEBUGP(MODULENAME":check: bad hooks %x.\n", hook_mask);
                 return 0;
         }
@@ -76,12 +77,13 @@ target(struct sk_buff **pskb,
         struct ip_nat_range newrange;
  
         IP_NF_ASSERT(hooknum == NF_IP_PRE_ROUTING
-                    || hooknum == NF_IP_POST_ROUTING);
+                    || hooknum == NF_IP_POST_ROUTING
+                    || hooknum == NF_IP_LOCAL_OUT);
         ct = ip_conntrack_get(*pskb, &ctinfo);
  
         netmask = ~(mr->range[0].min_ip ^ mr->range[0].max_ip);
  
-       if (hooknum == NF_IP_PRE_ROUTING)
+       if (hooknum == NF_IP_PRE_ROUTING || hooknum == NF_IP_LOCAL_OUT)
                 new_ip = (*pskb)->nh.iph->daddr & ~netmask;
         else
                 new_ip = (*pskb)->nh.iph->saddr & ~netmask;
diff --git a/net/ipv4/netfilter/ipt_NFQUEUE.c b/net/ipv4/netfilter/ipt_NFQUEUE.c

new file mode 100644 (file)

index 0000000..3cedc9b
--- /dev/null
+++ b/net/ipv4/netfilter/ipt_NFQUEUE.c
@@ -0,0 +1,70 @@
+/* iptables module for using new netfilter netlink queue
+ *
+ * (C) 2005 by Harald Welte <laforge@netfilter.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as 
+ * published by the Free Software Foundation.
+ * 
+ */
+
+#include <linux/module.h>
+#include <linux/skbuff.h>
+
+#include <linux/netfilter.h>
+#include <linux/netfilter_ipv4/ip_tables.h>
+#include <linux/netfilter_ipv4/ipt_NFQUEUE.h>
+
+MODULE_AUTHOR("Harald Welte <laforge@netfilter.org>");
+MODULE_DESCRIPTION("iptables NFQUEUE target");
+MODULE_LICENSE("GPL");
+
+static unsigned int
+target(struct sk_buff **pskb,
+       const struct net_device *in,
+       const struct net_device *out,
+       unsigned int hooknum,
+       const void *targinfo,
+       void *userinfo)
+{
+       const struct ipt_NFQ_info *tinfo = targinfo;
+
+       return NF_QUEUE_NR(tinfo->queuenum);
+}
+
+static int
+checkentry(const char *tablename,
+          const struct ipt_entry *e,
+           void *targinfo,
+           unsigned int targinfosize,
+           unsigned int hook_mask)
+{
+       if (targinfosize != IPT_ALIGN(sizeof(struct ipt_NFQ_info))) {
+               printk(KERN_WARNING "NFQUEUE: targinfosize %u != %Zu\n",
+                      targinfosize,
+                      IPT_ALIGN(sizeof(struct ipt_NFQ_info)));
+               return 0;
+       }
+
+       return 1;
+}
+
+static struct ipt_target ipt_NFQ_reg = {
+       .name           = "NFQUEUE",
+       .target         = target,
+       .checkentry     = checkentry,
+       .me             = THIS_MODULE,
+};
+
+static int __init init(void)
+{
+       return ipt_register_target(&ipt_NFQ_reg);
+}
+
+static void __exit fini(void)
+{
+       ipt_unregister_target(&ipt_NFQ_reg);
+}
+
+module_init(init);
+module_exit(fini);
diff --git a/net/ipv4/netfilter/ipt_REJECT.c b/net/ipv4/netfilter/ipt_REJECT.c

index 91569644602008d5b6d04d943de37379c4430f20..f115a84a4ac628ba5a41883eab33c127ce797f40 100644 (file)
--- a/net/ipv4/netfilter/ipt_REJECT.c
+++ b/net/ipv4/netfilter/ipt_REJECT.c
@@ -156,7 +156,6 @@ static void send_reset(struct sk_buff *oldskb, int hook)
  
         /* This packet will not be the same as the other: clear nf fields */
         nf_reset(nskb);
-       nskb->nfcache = 0;
         nskb->nfmark = 0;
  #ifdef CONFIG_BRIDGE_NETFILTER
         nf_bridge_put(nskb->nf_bridge);
diff --git a/net/ipv4/netfilter/ipt_TCPMSS.c b/net/ipv4/netfilter/ipt_TCPMSS.c

index 1049050b2bfbc0a5123662548b37ac382e1cb11f..8db70d6908c33947917babc2c6cf2df11ce871a3 100644 (file)
--- a/net/ipv4/netfilter/ipt_TCPMSS.c
+++ b/net/ipv4/netfilter/ipt_TCPMSS.c
@@ -58,7 +58,11 @@ ipt_tcpmss_target(struct sk_buff **pskb,
         unsigned int i;
         u_int8_t *opt;
  
-       if (!skb_ip_make_writable(pskb, (*pskb)->len))
+       if (!skb_make_writable(pskb, (*pskb)->len))
+               return NF_DROP;
+
+       if ((*pskb)->ip_summed == CHECKSUM_HW &&
+           skb_checksum_help(*pskb, out == NULL))
                 return NF_DROP;
  
         iph = (*pskb)->nh.iph;
@@ -186,10 +190,6 @@ ipt_tcpmss_target(struct sk_buff **pskb,
                newmss);
  
   retmodified:
-       /* We never hw checksum SYN packets.  */
-       BUG_ON((*pskb)->ip_summed == CHECKSUM_HW);
-
-       (*pskb)->nfcache |= NFC_UNKNOWN | NFC_ALTERED;
         return IPT_CONTINUE;
  }
  
diff --git a/net/ipv4/netfilter/ipt_TOS.c b/net/ipv4/netfilter/ipt_TOS.c

index 85c70d240f8bf433e6e7b59d7cc9712371a5769b..deadb36d442805aefba5d96465f5a478aa016fa3 100644 (file)
--- a/net/ipv4/netfilter/ipt_TOS.c
+++ b/net/ipv4/netfilter/ipt_TOS.c
@@ -33,7 +33,7 @@ target(struct sk_buff **pskb,
         if (((*pskb)->nh.iph->tos & IPTOS_TOS_MASK) != tosinfo->tos) {
                 u_int16_t diffs[2];
  
-               if (!skb_ip_make_writable(pskb, sizeof(struct iphdr)))
+               if (!skb_make_writable(pskb, sizeof(struct iphdr)))
                         return NF_DROP;
  
                 diffs[0] = htons((*pskb)->nh.iph->tos) ^ 0xFFFF;
@@ -46,7 +46,6 @@ target(struct sk_buff **pskb,
                                                  sizeof(diffs),
                                                  (*pskb)->nh.iph->check
                                                  ^0xFFFF));
-               (*pskb)->nfcache |= NFC_ALTERED;
         }
         return IPT_CONTINUE;
  }
diff --git a/net/ipv4/netfilter/ipt_TTL.c b/net/ipv4/netfilter/ipt_TTL.c

new file mode 100644 (file)

index 0000000..b9ae6a9
--- /dev/null
+++ b/net/ipv4/netfilter/ipt_TTL.c
@@ -0,0 +1,119 @@
+/* TTL modification target for IP tables
+ * (C) 2000,2005 by Harald Welte <laforge@netfilter.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ */
+
+#include <linux/module.h>
+#include <linux/skbuff.h>
+#include <linux/ip.h>
+#include <net/checksum.h>
+
+#include <linux/netfilter_ipv4/ip_tables.h>
+#include <linux/netfilter_ipv4/ipt_TTL.h>
+
+MODULE_AUTHOR("Harald Welte <laforge@netfilter.org>");
+MODULE_DESCRIPTION("IP tables TTL modification module");
+MODULE_LICENSE("GPL");
+
+static unsigned int 
+ipt_ttl_target(struct sk_buff **pskb, const struct net_device *in, 
+               const struct net_device *out, unsigned int hooknum, 
+               const void *targinfo, void *userinfo)
+{
+       struct iphdr *iph;
+       const struct ipt_TTL_info *info = targinfo;
+       u_int16_t diffs[2];
+       int new_ttl;
+
+       if (!skb_make_writable(pskb, (*pskb)->len))
+               return NF_DROP;
+
+       iph = (*pskb)->nh.iph;
+
+       switch (info->mode) {
+               case IPT_TTL_SET:
+                       new_ttl = info->ttl;
+                       break;
+               case IPT_TTL_INC:
+                       new_ttl = iph->ttl + info->ttl;
+                       if (new_ttl > 255)
+                               new_ttl = 255;
+                       break;
+               case IPT_TTL_DEC:
+                       new_ttl = iph->ttl - info->ttl;
+                       if (new_ttl < 0)
+                               new_ttl = 0;
+                       break;
+               default:
+                       new_ttl = iph->ttl;
+                       break;
+       }
+
+       if (new_ttl != iph->ttl) {
+               diffs[0] = htons(((unsigned)iph->ttl) << 8) ^ 0xFFFF;
+               iph->ttl = new_ttl;
+               diffs[1] = htons(((unsigned)iph->ttl) << 8);
+               iph->check = csum_fold(csum_partial((char *)diffs,
+                                                   sizeof(diffs),
+                                                   iph->check^0xFFFF));
+       }
+
+       return IPT_CONTINUE;
+}
+
+static int ipt_ttl_checkentry(const char *tablename,
+               const struct ipt_entry *e,
+               void *targinfo,
+               unsigned int targinfosize,
+               unsigned int hook_mask)
+{
+       struct ipt_TTL_info *info = targinfo;
+
+       if (targinfosize != IPT_ALIGN(sizeof(struct ipt_TTL_info))) {
+               printk(KERN_WARNING "ipt_TTL: targinfosize %u != %Zu\n",
+                               targinfosize,
+                               IPT_ALIGN(sizeof(struct ipt_TTL_info)));
+               return 0;
+       }
+
+       if (strcmp(tablename, "mangle")) {
+               printk(KERN_WARNING "ipt_TTL: can only be called from "
+                       "\"mangle\" table, not \"%s\"\n", tablename);
+               return 0;
+       }
+
+       if (info->mode > IPT_TTL_MAXMODE) {
+               printk(KERN_WARNING "ipt_TTL: invalid or unknown Mode %u\n", 
+                       info->mode);
+               return 0;
+       }
+
+       if ((info->mode != IPT_TTL_SET) && (info->ttl == 0))
+               return 0;
+
+       return 1;
+}
+
+static struct ipt_target ipt_TTL = { 
+       .name           = "TTL",
+       .target         = ipt_ttl_target, 
+       .checkentry     = ipt_ttl_checkentry, 
+       .me             = THIS_MODULE,
+};
+
+static int __init init(void)
+{
+       return ipt_register_target(&ipt_TTL);
+}
+
+static void __exit fini(void)
+{
+       ipt_unregister_target(&ipt_TTL);
+}
+
+module_init(init);
+module_exit(fini);
diff --git a/net/ipv4/netfilter/ipt_ULOG.c b/net/ipv4/netfilter/ipt_ULOG.c

index 52a0076302a7668a4e627f86c4b4186f964cd9e2..e2c14f3cb2fc6a91e32d81bcdda5777cbd248c06 100644 (file)
--- a/net/ipv4/netfilter/ipt_ULOG.c
+++ b/net/ipv4/netfilter/ipt_ULOG.c
@@ -62,6 +62,7 @@
  MODULE_LICENSE("GPL");
  MODULE_AUTHOR("Harald Welte <laforge@gnumonks.org>");
  MODULE_DESCRIPTION("iptables userspace logging module");
+MODULE_ALIAS_NET_PF_PROTO(PF_NETLINK, NETLINK_NFLOG);
  
  #define ULOG_NL_EVENT          111             /* Harald's favorite number */
  #define ULOG_MAXNLGROUPS       32              /* numer of nlgroups */
@@ -115,10 +116,10 @@ static void ulog_send(unsigned int nlgroupnum)
         if (ub->qlen > 1)
                 ub->lastnlh->nlmsg_type = NLMSG_DONE;
  
-       NETLINK_CB(ub->skb).dst_groups = (1 << nlgroupnum);
-       DEBUGP("ipt_ULOG: throwing %d packets to netlink mask %u\n",
-               ub->qlen, nlgroupnum);
-       netlink_broadcast(nflognl, ub->skb, 0, (1 << nlgroupnum), GFP_ATOMIC);
+       NETLINK_CB(ub->skb).dst_group = nlgroupnum + 1;
+       DEBUGP("ipt_ULOG: throwing %d packets to netlink group %u\n",
+               ub->qlen, nlgroupnum + 1);
+       netlink_broadcast(nflognl, ub->skb, 0, nlgroupnum + 1, GFP_ATOMIC);
  
         ub->qlen = 0;
         ub->skb = NULL;
@@ -219,13 +220,13 @@ static void ipt_ulog_packet(unsigned int hooknum,
         pm = NLMSG_DATA(nlh);
  
         /* We might not have a timestamp, get one */
-       if (skb->stamp.tv_sec == 0)
-               do_gettimeofday((struct timeval *)&skb->stamp);
+       if (skb->tstamp.off_sec == 0)
+               __net_timestamp((struct sk_buff *)skb);
  
         /* copy hook, prefix, timestamp, payload, etc. */
         pm->data_len = copy_len;
-       pm->timestamp_sec = skb->stamp.tv_sec;
-       pm->timestamp_usec = skb->stamp.tv_usec;
+       pm->timestamp_sec = skb_tv_base.tv_sec + skb->tstamp.off_sec;
+       pm->timestamp_usec = skb_tv_base.tv_usec + skb->tstamp.off_usec;
         pm->mark = skb->nfmark;
         pm->hook = hooknum;
         if (prefix != NULL)
@@ -303,18 +304,27 @@ static unsigned int ipt_ulog_target(struct sk_buff **pskb,
         return IPT_CONTINUE;
  }
   
-static void ipt_logfn(unsigned int hooknum,
+static void ipt_logfn(unsigned int pf,
+                     unsigned int hooknum,
                       const struct sk_buff *skb,
                       const struct net_device *in,
                       const struct net_device *out,
+                     const struct nf_loginfo *li,
                       const char *prefix)
  {
-       struct ipt_ulog_info loginfo = { 
-               .nl_group = ULOG_DEFAULT_NLGROUP,
-               .copy_range = 0,
-               .qthreshold = ULOG_DEFAULT_QTHRESHOLD,
-               .prefix = ""
-       };
+       struct ipt_ulog_info loginfo;
+
+       if (!li || li->type != NF_LOG_TYPE_ULOG) {
+               loginfo.nl_group = ULOG_DEFAULT_NLGROUP;
+               loginfo.copy_range = 0;
+               loginfo.qthreshold = ULOG_DEFAULT_QTHRESHOLD;
+               loginfo.prefix[0] = '\0';
+       } else {
+               loginfo.nl_group = li->u.ulog.group;
+               loginfo.copy_range = li->u.ulog.copy_len;
+               loginfo.qthreshold = li->u.ulog.qthreshold;
+               strlcpy(loginfo.prefix, prefix, sizeof(loginfo.prefix));
+       }
  
         ipt_ulog_packet(hooknum, skb, in, out, &loginfo, prefix);
  }
@@ -354,6 +364,12 @@ static struct ipt_target ipt_ulog_reg = {
         .me             = THIS_MODULE,
  };
  
+static struct nf_logger ipt_ulog_logger = {
+       .name           = "ipt_ULOG",
+       .logfn          = &ipt_logfn,
+       .me             = THIS_MODULE,
+};
+
  static int __init init(void)
  {
         int i;
@@ -372,7 +388,8 @@ static int __init init(void)
                 ulog_buffers[i].timer.data = i;
         }
  
-       nflognl = netlink_kernel_create(NETLINK_NFLOG, NULL);
+       nflognl = netlink_kernel_create(NETLINK_NFLOG, ULOG_MAXNLGROUPS, NULL,
+                                       THIS_MODULE);
         if (!nflognl)
                 return -ENOMEM;
  
@@ -381,7 +398,7 @@ static int __init init(void)
                 return -EINVAL;
         }
         if (nflog)
-               nf_log_register(PF_INET, &ipt_logfn);
+               nf_log_register(PF_INET, &ipt_ulog_logger);
         
         return 0;
  }
@@ -394,7 +411,7 @@ static void __exit fini(void)
         DEBUGP("ipt_ULOG: cleanup_module\n");
  
         if (nflog)
-               nf_log_unregister(PF_INET, &ipt_logfn);
+               nf_log_unregister_logger(&ipt_ulog_logger);
         ipt_unregister_target(&ipt_ulog_reg);
         sock_release(nflognl->sk_socket);
  
diff --git a/net/ipv4/netfilter/ipt_connbytes.c b/net/ipv4/netfilter/ipt_connbytes.c

new file mode 100644 (file)

index 0000000..df4a42c
--- /dev/null
+++ b/net/ipv4/netfilter/ipt_connbytes.c
@@ -0,0 +1,162 @@
+/* Kernel module to match connection tracking byte counter.
+ * GPL (C) 2002 Martin Devera (devik@cdi.cz).
+ *
+ * 2004-07-20 Harald Welte <laforge@netfilter.org>
+ *     - reimplemented to use per-connection accounting counters
+ *     - add functionality to match number of packets
+ *     - add functionality to match average packet size
+ *     - add support to match directions seperately
+ *
+ */
+#include <linux/module.h>
+#include <linux/skbuff.h>
+#include <linux/netfilter_ipv4/ip_conntrack.h>
+#include <linux/netfilter_ipv4/ip_tables.h>
+#include <linux/netfilter_ipv4/ipt_connbytes.h>
+
+#include <asm/div64.h>
+#include <asm/bitops.h>
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Harald Welte <laforge@netfilter.org>");
+MODULE_DESCRIPTION("iptables match for matching number of pkts/bytes per connection");
+
+/* 64bit divisor, dividend and result. dynamic precision */
+static u_int64_t div64_64(u_int64_t dividend, u_int64_t divisor)
+{
+       u_int32_t d = divisor;
+
+       if (divisor > 0xffffffffULL) {
+               unsigned int shift = fls(divisor >> 32);
+
+               d = divisor >> shift;
+               dividend >>= shift;
+       }
+
+       do_div(dividend, d);
+       return dividend;
+}
+
+static int
+match(const struct sk_buff *skb,
+      const struct net_device *in,
+      const struct net_device *out,
+      const void *matchinfo,
+      int offset,
+      int *hotdrop)
+{
+       const struct ipt_connbytes_info *sinfo = matchinfo;
+       enum ip_conntrack_info ctinfo;
+       struct ip_conntrack *ct;
+       u_int64_t what = 0;     /* initialize to make gcc happy */
+
+       if (!(ct = ip_conntrack_get((struct sk_buff *)skb, &ctinfo)))
+               return 0; /* no match */
+
+       switch (sinfo->what) {
+       case IPT_CONNBYTES_PKTS:
+               switch (sinfo->direction) {
+               case IPT_CONNBYTES_DIR_ORIGINAL:
+                       what = ct->counters[IP_CT_DIR_ORIGINAL].packets;
+                       break;
+               case IPT_CONNBYTES_DIR_REPLY:
+                       what = ct->counters[IP_CT_DIR_REPLY].packets;
+                       break;
+               case IPT_CONNBYTES_DIR_BOTH:
+                       what = ct->counters[IP_CT_DIR_ORIGINAL].packets;
+                       what += ct->counters[IP_CT_DIR_REPLY].packets;
+                       break;
+               }
+               break;
+       case IPT_CONNBYTES_BYTES:
+               switch (sinfo->direction) {
+               case IPT_CONNBYTES_DIR_ORIGINAL:
+                       what = ct->counters[IP_CT_DIR_ORIGINAL].bytes;
+                       break;
+               case IPT_CONNBYTES_DIR_REPLY:
+                       what = ct->counters[IP_CT_DIR_REPLY].bytes;
+                       break;
+               case IPT_CONNBYTES_DIR_BOTH:
+                       what = ct->counters[IP_CT_DIR_ORIGINAL].bytes;
+                       what += ct->counters[IP_CT_DIR_REPLY].bytes;
+                       break;
+               }
+               break;
+       case IPT_CONNBYTES_AVGPKT:
+               switch (sinfo->direction) {
+               case IPT_CONNBYTES_DIR_ORIGINAL:
+                       what = div64_64(ct->counters[IP_CT_DIR_ORIGINAL].bytes,
+                                       ct->counters[IP_CT_DIR_ORIGINAL].packets);
+                       break;
+               case IPT_CONNBYTES_DIR_REPLY:
+                       what = div64_64(ct->counters[IP_CT_DIR_REPLY].bytes,
+                                       ct->counters[IP_CT_DIR_REPLY].packets);
+                       break;
+               case IPT_CONNBYTES_DIR_BOTH:
+                       {
+                               u_int64_t bytes;
+                               u_int64_t pkts;
+                               bytes = ct->counters[IP_CT_DIR_ORIGINAL].bytes +
+                                       ct->counters[IP_CT_DIR_REPLY].bytes;
+                               pkts = ct->counters[IP_CT_DIR_ORIGINAL].packets+
+                                       ct->counters[IP_CT_DIR_REPLY].packets;
+
+                               /* FIXME_THEORETICAL: what to do if sum
+                                * overflows ? */
+
+                               what = div64_64(bytes, pkts);
+                       }
+                       break;
+               }
+               break;
+       }
+
+       if (sinfo->count.to)
+               return (what <= sinfo->count.to && what >= sinfo->count.from);
+       else
+               return (what >= sinfo->count.from);
+}
+
+static int check(const char *tablename,
+                const struct ipt_ip *ip,
+                void *matchinfo,
+                unsigned int matchsize,
+                unsigned int hook_mask)
+{
+       const struct ipt_connbytes_info *sinfo = matchinfo;
+
+       if (matchsize != IPT_ALIGN(sizeof(struct ipt_connbytes_info)))
+               return 0;
+
+       if (sinfo->what != IPT_CONNBYTES_PKTS &&
+           sinfo->what != IPT_CONNBYTES_BYTES &&
+           sinfo->what != IPT_CONNBYTES_AVGPKT)
+               return 0;
+
+       if (sinfo->direction != IPT_CONNBYTES_DIR_ORIGINAL &&
+           sinfo->direction != IPT_CONNBYTES_DIR_REPLY &&
+           sinfo->direction != IPT_CONNBYTES_DIR_BOTH)
+               return 0;
+
+       return 1;
+}
+
+static struct ipt_match state_match = {
+       .name           = "connbytes",
+       .match          = &match,
+       .checkentry     = &check,
+       .me             = THIS_MODULE
+};
+
+static int __init init(void)
+{
+       return ipt_register_match(&state_match);
+}
+
+static void __exit fini(void)
+{
+       ipt_unregister_match(&state_match);
+}
+
+module_init(init);
+module_exit(fini);
diff --git a/net/ipv4/netfilter/ipt_connmark.c b/net/ipv4/netfilter/ipt_connmark.c

index 2706f96cea55dba1078baeca1e788a2fa116be94..bf8de47ce0041487822543d275523541e4023dc1 100644 (file)
--- a/net/ipv4/netfilter/ipt_connmark.c
+++ b/net/ipv4/netfilter/ipt_connmark.c
@@ -54,9 +54,16 @@ checkentry(const char *tablename,
            unsigned int matchsize,
            unsigned int hook_mask)
  {
+       struct ipt_connmark_info *cm = 
+                               (struct ipt_connmark_info *)matchinfo;
         if (matchsize != IPT_ALIGN(sizeof(struct ipt_connmark_info)))
                 return 0;
  
+       if (cm->mark > 0xffffffff || cm->mask > 0xffffffff) {
+               printk(KERN_WARNING "connmark: only support 32bit mark\n");
+               return 0;
+       }
+
         return 1;
  }
  
diff --git a/net/ipv4/netfilter/ipt_dccp.c b/net/ipv4/netfilter/ipt_dccp.c

new file mode 100644 (file)

index 0000000..ad3278b
--- /dev/null
+++ b/net/ipv4/netfilter/ipt_dccp.c
@@ -0,0 +1,176 @@
+/*
+ * iptables module for DCCP protocol header matching
+ *
+ * (C) 2005 by Harald Welte <laforge@netfilter.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/module.h>
+#include <linux/skbuff.h>
+#include <linux/spinlock.h>
+#include <net/ip.h>
+#include <linux/dccp.h>
+
+#include <linux/netfilter_ipv4/ip_tables.h>
+#include <linux/netfilter_ipv4/ipt_dccp.h>
+
+#define DCCHECK(cond, option, flag, invflag) (!((flag) & (option)) \
+                                 || (!!((invflag) & (option)) ^ (cond)))
+
+static unsigned char *dccp_optbuf;
+static DEFINE_SPINLOCK(dccp_buflock);
+
+static inline int
+dccp_find_option(u_int8_t option,
+                const struct sk_buff *skb,
+                const struct dccp_hdr *dh,
+                int *hotdrop)
+{
+       /* tcp.doff is only 4 bits, ie. max 15 * 4 bytes */
+       unsigned char *op;
+       unsigned int optoff = __dccp_hdr_len(dh);
+       unsigned int optlen = dh->dccph_doff*4 - __dccp_hdr_len(dh);
+       unsigned int i;
+
+       if (dh->dccph_doff * 4 < __dccp_hdr_len(dh)) {
+               *hotdrop = 1;
+               return 0;
+       }
+
+       if (!optlen)
+               return 0;
+
+       spin_lock_bh(&dccp_buflock);
+       op = skb_header_pointer(skb,
+                               skb->nh.iph->ihl*4 + optoff,
+                               optlen, dccp_optbuf);
+       if (op == NULL) {
+               /* If we don't have the whole header, drop packet. */
+               spin_unlock_bh(&dccp_buflock);
+               *hotdrop = 1;
+               return 0;
+       }
+
+       for (i = 0; i < optlen; ) {
+               if (op[i] == option) {
+                       spin_unlock_bh(&dccp_buflock);
+                       return 1;
+               }
+
+               if (op[i] < 2) 
+                       i++;
+               else 
+                       i += op[i+1]?:1;
+       }
+
+       spin_unlock_bh(&dccp_buflock);
+       return 0;
+}
+
+
+static inline int
+match_types(const struct dccp_hdr *dh, u_int16_t typemask)
+{
+       return (typemask & (1 << dh->dccph_type));
+}
+
+static inline int
+match_option(u_int8_t option, const struct sk_buff *skb,
+            const struct dccp_hdr *dh, int *hotdrop)
+{
+       return dccp_find_option(option, skb, dh, hotdrop);
+}
+
+static int
+match(const struct sk_buff *skb,
+      const struct net_device *in,
+      const struct net_device *out,
+      const void *matchinfo,
+      int offset,
+      int *hotdrop)
+{
+       const struct ipt_dccp_info *info = 
+                               (const struct ipt_dccp_info *)matchinfo;
+       struct dccp_hdr _dh, *dh;
+
+       if (offset)
+               return 0;
+       
+       dh = skb_header_pointer(skb, skb->nh.iph->ihl*4, sizeof(_dh), &_dh);
+       if (dh == NULL) {
+               *hotdrop = 1;
+               return 0;
+               }
+
+       return  DCCHECK(((ntohs(dh->dccph_sport) >= info->spts[0]) 
+                       && (ntohs(dh->dccph_sport) <= info->spts[1])), 
+                       IPT_DCCP_SRC_PORTS, info->flags, info->invflags)
+               && DCCHECK(((ntohs(dh->dccph_dport) >= info->dpts[0]) 
+                       && (ntohs(dh->dccph_dport) <= info->dpts[1])), 
+                       IPT_DCCP_DEST_PORTS, info->flags, info->invflags)
+               && DCCHECK(match_types(dh, info->typemask),
+                          IPT_DCCP_TYPE, info->flags, info->invflags)
+               && DCCHECK(match_option(info->option, skb, dh, hotdrop),
+                          IPT_DCCP_OPTION, info->flags, info->invflags);
+}
+
+static int
+checkentry(const char *tablename,
+          const struct ipt_ip *ip,
+          void *matchinfo,
+          unsigned int matchsize,
+          unsigned int hook_mask)
+{
+       const struct ipt_dccp_info *info;
+
+       info = (const struct ipt_dccp_info *)matchinfo;
+
+       return ip->proto == IPPROTO_DCCP
+               && !(ip->invflags & IPT_INV_PROTO)
+               && matchsize == IPT_ALIGN(sizeof(struct ipt_dccp_info))
+               && !(info->flags & ~IPT_DCCP_VALID_FLAGS)
+               && !(info->invflags & ~IPT_DCCP_VALID_FLAGS)
+               && !(info->invflags & ~info->flags);
+}
+
+static struct ipt_match dccp_match = 
+{ 
+       .name           = "dccp",
+       .match          = &match,
+       .checkentry     = &checkentry,
+       .me             = THIS_MODULE,
+};
+
+static int __init init(void)
+{
+       int ret;
+
+       /* doff is 8 bits, so the maximum option size is (4*256).  Don't put
+        * this in BSS since DaveM is worried about locked TLB's for kernel
+        * BSS. */
+       dccp_optbuf = kmalloc(256 * 4, GFP_KERNEL);
+       if (!dccp_optbuf)
+               return -ENOMEM;
+       ret = ipt_register_match(&dccp_match);
+       if (ret)
+               kfree(dccp_optbuf);
+
+       return ret;
+}
+
+static void __exit fini(void)
+{
+       ipt_unregister_match(&dccp_match);
+       kfree(dccp_optbuf);
+}
+
+module_init(init);
+module_exit(fini);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Harald Welte <laforge@netfilter.org>");
+MODULE_DESCRIPTION("Match for DCCP protocol packets");
+
diff --git a/net/ipv4/netfilter/ipt_hashlimit.c b/net/ipv4/netfilter/ipt_hashlimit.c

index 564b49bfebcf6feda673d223713d911da17145a1..2dd1cccbdab9e938f20920c9355ab1a834bdc0ff 100644 (file)
--- a/net/ipv4/netfilter/ipt_hashlimit.c
+++ b/net/ipv4/netfilter/ipt_hashlimit.c
@@ -94,7 +94,7 @@ struct ipt_hashlimit_htable {
  static DEFINE_SPINLOCK(hashlimit_lock);        /* protects htables list */
  static DECLARE_MUTEX(hlimit_mutex);    /* additional checkentry protection */
  static HLIST_HEAD(hashlimit_htables);
-static kmem_cache_t *hashlimit_cachep;
+static kmem_cache_t *hashlimit_cachep __read_mostly;
  
  static inline int dst_cmp(const struct dsthash_ent *ent, struct dsthash_dst *b)
  {
diff --git a/net/ipv4/netfilter/ipt_mark.c b/net/ipv4/netfilter/ipt_mark.c

index 8955728127b9528e720fdba0259da1f9d216f261..00bef6cdd3f8e3c6b164bea7142b8dba81448be9 100644 (file)
--- a/net/ipv4/netfilter/ipt_mark.c
+++ b/net/ipv4/netfilter/ipt_mark.c
@@ -37,9 +37,16 @@ checkentry(const char *tablename,
             unsigned int matchsize,
             unsigned int hook_mask)
  {
+       struct ipt_mark_info *minfo = (struct ipt_mark_info *) matchinfo;
+
         if (matchsize != IPT_ALIGN(sizeof(struct ipt_mark_info)))
                 return 0;
  
+       if (minfo->mark > 0xffffffff || minfo->mask > 0xffffffff) {
+               printk(KERN_WARNING "mark: only supports 32bit mark\n");
+               return 0;
+       }
+
         return 1;
  }
  
diff --git a/net/ipv4/netfilter/ipt_owner.c b/net/ipv4/netfilter/ipt_owner.c

index 3b9065e06381c1a2473f3a028299d94fe380471e..c1889f88262b4867bc844322351e4909f14ed434 100644 (file)
--- a/net/ipv4/netfilter/ipt_owner.c
+++ b/net/ipv4/netfilter/ipt_owner.c
@@ -20,106 +20,6 @@ MODULE_LICENSE("GPL");
  MODULE_AUTHOR("Marc Boucher <marc@mbsi.ca>");
  MODULE_DESCRIPTION("iptables owner match");
  
-static int
-match_comm(const struct sk_buff *skb, const char *comm)
-{
-       struct task_struct *g, *p;
-       struct files_struct *files;
-       int i;
-
-       read_lock(&tasklist_lock);
-       do_each_thread(g, p) {
-               if(strncmp(p->comm, comm, sizeof(p->comm)))
-                       continue;
-
-               task_lock(p);
-               files = p->files;
-               if(files) {
-                       spin_lock(&files->file_lock);
-                       for (i=0; i < files->max_fds; i++) {
-                               if (fcheck_files(files, i) ==
-                                   skb->sk->sk_socket->file) {
-                                       spin_unlock(&files->file_lock);
-                                       task_unlock(p);
-                                       read_unlock(&tasklist_lock);
-                                       return 1;
-                               }
-                       }
-                       spin_unlock(&files->file_lock);
-               }
-               task_unlock(p);
-       } while_each_thread(g, p);
-       read_unlock(&tasklist_lock);
-       return 0;
-}
-
-static int
-match_pid(const struct sk_buff *skb, pid_t pid)
-{
-       struct task_struct *p;
-       struct files_struct *files;
-       int i;
-
-       read_lock(&tasklist_lock);
-       p = find_task_by_pid(pid);
-       if (!p)
-               goto out;
-       task_lock(p);
-       files = p->files;
-       if(files) {
-               spin_lock(&files->file_lock);
-               for (i=0; i < files->max_fds; i++) {
-                       if (fcheck_files(files, i) ==
-                           skb->sk->sk_socket->file) {
-                               spin_unlock(&files->file_lock);
-                               task_unlock(p);
-                               read_unlock(&tasklist_lock);
-                               return 1;
-                       }
-               }
-               spin_unlock(&files->file_lock);
-       }
-       task_unlock(p);
-out:
-       read_unlock(&tasklist_lock);
-       return 0;
-}
-
-static int
-match_sid(const struct sk_buff *skb, pid_t sid)
-{
-       struct task_struct *g, *p;
-       struct file *file = skb->sk->sk_socket->file;
-       int i, found=0;
-
-       read_lock(&tasklist_lock);
-       do_each_thread(g, p) {
-               struct files_struct *files;
-               if (p->signal->session != sid)
-                       continue;
-
-               task_lock(p);
-               files = p->files;
-               if (files) {
-                       spin_lock(&files->file_lock);
-                       for (i=0; i < files->max_fds; i++) {
-                               if (fcheck_files(files, i) == file) {
-                                       found = 1;
-                                       break;
-                               }
-                       }
-                       spin_unlock(&files->file_lock);
-               }
-               task_unlock(p);
-               if (found)
-                       goto out;
-       } while_each_thread(g, p);
-out:
-       read_unlock(&tasklist_lock);
-
-       return found;
-}
-
  static int
  match(const struct sk_buff *skb,
        const struct net_device *in,
@@ -145,24 +45,6 @@ match(const struct sk_buff *skb,
                         return 0;
         }
  
-       if(info->match & IPT_OWNER_PID) {
-               if (!match_pid(skb, info->pid) ^
-                   !!(info->invert & IPT_OWNER_PID))
-                       return 0;
-       }
-
-       if(info->match & IPT_OWNER_SID) {
-               if (!match_sid(skb, info->sid) ^
-                   !!(info->invert & IPT_OWNER_SID))
-                       return 0;
-       }
-
-       if(info->match & IPT_OWNER_COMM) {
-               if (!match_comm(skb, info->comm) ^
-                   !!(info->invert & IPT_OWNER_COMM))
-                       return 0;
-       }
-
         return 1;
  }
  
@@ -173,6 +55,8 @@ checkentry(const char *tablename,
             unsigned int matchsize,
             unsigned int hook_mask)
  {
+       const struct ipt_owner_info *info = matchinfo;
+
          if (hook_mask
              & ~((1 << NF_IP_LOCAL_OUT) | (1 << NF_IP_POST_ROUTING))) {
                  printk("ipt_owner: only valid for LOCAL_OUT or POST_ROUTING.\n");
@@ -184,15 +68,13 @@ checkentry(const char *tablename,
                        IPT_ALIGN(sizeof(struct ipt_owner_info)));
                 return 0;
         }
-#ifdef CONFIG_SMP
-       /* files->file_lock can not be used in a BH */
-       if (((struct ipt_owner_info *)matchinfo)->match
-           & (IPT_OWNER_PID|IPT_OWNER_SID|IPT_OWNER_COMM)) {
-               printk("ipt_owner: pid, sid and command matching is broken "
-                      "on SMP.\n");
+
+       if (info->match & (IPT_OWNER_PID|IPT_OWNER_SID|IPT_OWNER_COMM)) {
+               printk("ipt_owner: pid, sid and command matching "
+                      "not supported anymore\n");
                 return 0;
         }
-#endif
+
         return 1;
  }
  
diff --git a/net/ipv4/netfilter/ipt_string.c b/net/ipv4/netfilter/ipt_string.c

new file mode 100644 (file)

index 0000000..b5def20
--- /dev/null
+++ b/net/ipv4/netfilter/ipt_string.c
@@ -0,0 +1,91 @@
+/* String matching match for iptables
+ * 
+ * (C) 2005 Pablo Neira Ayuso <pablo@eurodev.net>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/skbuff.h>
+#include <linux/netfilter_ipv4/ip_tables.h>
+#include <linux/netfilter_ipv4/ipt_string.h>
+#include <linux/textsearch.h>
+
+MODULE_AUTHOR("Pablo Neira Ayuso <pablo@eurodev.net>");
+MODULE_DESCRIPTION("IP tables string match module");
+MODULE_LICENSE("GPL");
+
+static int match(const struct sk_buff *skb,
+                const struct net_device *in,
+                const struct net_device *out,
+                const void *matchinfo,
+                int offset,
+                int *hotdrop)
+{
+       struct ts_state state;
+       struct ipt_string_info *conf = (struct ipt_string_info *) matchinfo;
+
+       memset(&state, 0, sizeof(struct ts_state));
+
+       return (skb_find_text((struct sk_buff *)skb, conf->from_offset, 
+                            conf->to_offset, conf->config, &state) 
+                            != UINT_MAX) && !conf->invert;
+}
+
+#define STRING_TEXT_PRIV(m) ((struct ipt_string_info *) m)
+
+static int checkentry(const char *tablename,
+                     const struct ipt_ip *ip,
+                     void *matchinfo,
+                     unsigned int matchsize,
+                     unsigned int hook_mask)
+{
+       struct ipt_string_info *conf = matchinfo;
+       struct ts_config *ts_conf;
+
+       if (matchsize != IPT_ALIGN(sizeof(struct ipt_string_info)))
+               return 0;
+
+       /* Damn, can't handle this case properly with iptables... */
+       if (conf->from_offset > conf->to_offset)
+               return 0;
+
+       ts_conf = textsearch_prepare(conf->algo, conf->pattern, conf->patlen,
+                                    GFP_KERNEL, TS_AUTOLOAD);
+       if (IS_ERR(ts_conf))
+               return 0;
+
+       conf->config = ts_conf;
+
+       return 1;
+}
+
+static void destroy(void *matchinfo, unsigned int matchsize)
+{
+       textsearch_destroy(STRING_TEXT_PRIV(matchinfo)->config);
+}
+
+static struct ipt_match string_match = {
+       .name           = "string",
+       .match          = match,
+       .checkentry     = checkentry,
+       .destroy        = destroy,
+       .me             = THIS_MODULE
+};
+
+static int __init init(void)
+{
+       return ipt_register_match(&string_match);
+}
+
+static void __exit fini(void)
+{
+       ipt_unregister_match(&string_match);
+}
+
+module_init(init);
+module_exit(fini);
diff --git a/net/ipv4/proc.c b/net/ipv4/proc.c

index 912bbcc7f4152efb998f20328d433d0535dc0f85..f7943ba1f43c42d3d8c6546730c283ae134f53e6 100644 (file)
--- a/net/ipv4/proc.c
+++ b/net/ipv4/proc.c
@@ -59,13 +59,10 @@ static int fold_prot_inuse(struct proto *proto)
   */
  static int sockstat_seq_show(struct seq_file *seq, void *v)
  {
-       /* From net/socket.c */
-       extern void socket_seq_show(struct seq_file *seq);
-
         socket_seq_show(seq);
         seq_printf(seq, "TCP: inuse %d orphan %d tw %d alloc %d mem %d\n",
                    fold_prot_inuse(&tcp_prot), atomic_read(&tcp_orphan_count),
-                  tcp_tw_count, atomic_read(&tcp_sockets_allocated),
+                  tcp_death_row.tw_count, atomic_read(&tcp_sockets_allocated),
                    atomic_read(&tcp_memory_allocated));
         seq_printf(seq, "UDP: inuse %d\n", fold_prot_inuse(&udp_prot));
         seq_printf(seq, "RAW: inuse %d\n", fold_prot_inuse(&raw_prot));
diff --git a/net/ipv4/protocol.c b/net/ipv4/protocol.c

index 0db405a869f2524c4768884580590a0a42c07a37..291831e792aff067c46e8a3959c9a7058580ddbc 100644 (file)
--- a/net/ipv4/protocol.c
+++ b/net/ipv4/protocol.c
@@ -40,7 +40,6 @@
  #include <linux/timer.h>
  #include <net/ip.h>
  #include <net/protocol.h>
-#include <net/tcp.h>
  #include <linux/skbuff.h>
  #include <net/sock.h>
  #include <net/icmp.h>
diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c

index d1835b1bc8c469d8394e0ae1a0bb8c9b9a6f975c..304bb0a1d4f0f1b936e0500f08282bf137ae4505 100644 (file)
--- a/net/ipv4/raw.c
+++ b/net/ipv4/raw.c
@@ -59,7 +59,6 @@
  #include <linux/netdevice.h>
  #include <linux/in_route.h>
  #include <linux/route.h>
-#include <linux/tcp.h>
  #include <linux/skbuff.h>
  #include <net/dst.h>
  #include <net/sock.h>
@@ -71,6 +70,7 @@
  #include <net/udp.h>
  #include <net/raw.h>
  #include <net/snmp.h>
+#include <net/tcp_states.h>
  #include <net/inet_common.h>
  #include <net/checksum.h>
  #include <net/xfrm.h>
@@ -150,10 +150,11 @@ static __inline__ int icmp_filter(struct sock *sk, struct sk_buff *skb)
   * RFC 1122: SHOULD pass TOS value up to the transport layer.
   * -> It does. And not only TOS, but all IP header.
   */
-void raw_v4_input(struct sk_buff *skb, struct iphdr *iph, int hash)
+int raw_v4_input(struct sk_buff *skb, struct iphdr *iph, int hash)
  {
         struct sock *sk;
         struct hlist_head *head;
+       int delivered = 0;
  
         read_lock(&raw_v4_lock);
         head = &raw_v4_htable[hash];
@@ -164,6 +165,7 @@ void raw_v4_input(struct sk_buff *skb, struct iphdr *iph, int hash)
                              skb->dev->ifindex);
  
         while (sk) {
+               delivered = 1;
                 if (iph->protocol != IPPROTO_ICMP || !icmp_filter(sk, skb)) {
                         struct sk_buff *clone = skb_clone(skb, GFP_ATOMIC);
  
@@ -177,6 +179,7 @@ void raw_v4_input(struct sk_buff *skb, struct iphdr *iph, int hash)
         }
  out:
         read_unlock(&raw_v4_lock);
+       return delivered;
  }
  
  void raw_err (struct sock *sk, struct sk_buff *skb, u32 info)
diff --git a/net/ipv4/route.c b/net/ipv4/route.c

index d675ff80b04d98910d083cd9e1dea8c754d550e2..8c0b14e3beecc761e08c684cea964720fd2b2b97 100644 (file)
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -240,7 +240,9 @@ static unsigned                     rt_hash_mask;
  static int                     rt_hash_log;
  static unsigned int            rt_hash_rnd;
  
-struct rt_cache_stat *rt_cache_stat;
+static struct rt_cache_stat *rt_cache_stat;
+#define RT_CACHE_STAT_INC(field)                                         \
+               (per_cpu_ptr(rt_cache_stat, raw_smp_processor_id())->field++)
  
  static int rt_intern_hash(unsigned hash, struct rtable *rth,
                                 struct rtable **res);
@@ -2600,6 +2602,8 @@ int __ip_route_output_key(struct rtable **rp, const struct flowi *flp)
         return ip_route_output_slow(rp, flp);
  }
  
+EXPORT_SYMBOL_GPL(__ip_route_output_key);
+
  int ip_route_output_flow(struct rtable **rp, struct flowi *flp, struct sock *sk, int flags)
  {
         int err;
@@ -2618,6 +2622,8 @@ int ip_route_output_flow(struct rtable **rp, struct flowi *flp, struct sock *sk,
         return 0;
  }
  
+EXPORT_SYMBOL_GPL(ip_route_output_flow);
+
  int ip_route_output_key(struct rtable **rp, struct flowi *flp)
  {
         return ip_route_output_flow(rp, flp, NULL, 0);
diff --git a/net/ipv4/syncookies.c b/net/ipv4/syncookies.c

index 72d014442185fcb1d29ee855b17da26a5bb82988..a34e60ea48a15f3b33a0531a4d5cc260a9e62f17 100644 (file)
--- a/net/ipv4/syncookies.c
+++ b/net/ipv4/syncookies.c
@@ -169,8 +169,6 @@ static inline int cookie_check(struct sk_buff *skb, __u32 cookie)
         return mssind < NUM_MSS ? msstab[mssind] + 1 : 0;
  }
  
-extern struct request_sock_ops tcp_request_sock_ops;
-
  static inline struct sock *get_cookie_sock(struct sock *sk, struct sk_buff *skb,
                                            struct request_sock *req,
                                            struct dst_entry *dst)
@@ -180,7 +178,7 @@ static inline struct sock *get_cookie_sock(struct sock *sk, struct sk_buff *skb,
  
         child = tp->af_specific->syn_recv_sock(sk, skb, req, dst);
         if (child)
-               tcp_acceptq_queue(sk, req, child);
+               inet_csk_reqsk_queue_add(sk, req, child);
         else
                 reqsk_free(req);
  
diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c

index e32894532416ed0381368fb6b09b99318356ec3f..652685623519d74cca40682afbc7ade2785a8eab 100644 (file)
--- a/net/ipv4/sysctl_net_ipv4.c
+++ b/net/ipv4/sysctl_net_ipv4.c
@@ -11,7 +11,9 @@
  #include <linux/module.h>
  #include <linux/sysctl.h>
  #include <linux/config.h>
+#include <linux/igmp.h>
  #include <net/snmp.h>
+#include <net/icmp.h>
  #include <net/ip.h>
  #include <net/route.h>
  #include <net/tcp.h>
@@ -19,36 +21,6 @@
  /* From af_inet.c */
  extern int sysctl_ip_nonlocal_bind;
  
-/* From icmp.c */
-extern int sysctl_icmp_echo_ignore_all;
-extern int sysctl_icmp_echo_ignore_broadcasts;
-extern int sysctl_icmp_ignore_bogus_error_responses;
-extern int sysctl_icmp_errors_use_inbound_ifaddr;
-
-/* From ip_fragment.c */
-extern int sysctl_ipfrag_low_thresh;
-extern int sysctl_ipfrag_high_thresh; 
-extern int sysctl_ipfrag_time;
-extern int sysctl_ipfrag_secret_interval;
-
-/* From ip_output.c */
-extern int sysctl_ip_dynaddr;
-
-/* From icmp.c */
-extern int sysctl_icmp_ratelimit;
-extern int sysctl_icmp_ratemask;
-
-/* From igmp.c */
-extern int sysctl_igmp_max_memberships;
-extern int sysctl_igmp_max_msf;
-
-/* From inetpeer.c */
-extern int inet_peer_threshold;
-extern int inet_peer_minttl;
-extern int inet_peer_maxttl;
-extern int inet_peer_gc_mintime;
-extern int inet_peer_gc_maxtime;
-
  #ifdef CONFIG_SYSCTL
  static int tcp_retr1_max = 255; 
  static int ip_local_port_range_min[] = { 1, 1 };
@@ -57,8 +29,6 @@ static int ip_local_port_range_max[] = { 65535, 65535 };
  
  struct ipv4_config ipv4_config;
  
-extern ctl_table ipv4_route_table[];
-
  #ifdef CONFIG_SYSCTL
  
  static
@@ -136,10 +106,11 @@ static int proc_tcp_congestion_control(ctl_table *ctl, int write, struct file *
         return ret;
  }
  
-int sysctl_tcp_congestion_control(ctl_table *table, int __user *name, int nlen,
-                                 void __user *oldval, size_t __user *oldlenp,
-                                 void __user *newval, size_t newlen,
-                                 void **context)
+static int sysctl_tcp_congestion_control(ctl_table *table, int __user *name,
+                                        int nlen, void __user *oldval,
+                                        size_t __user *oldlenp,
+                                        void __user *newval, size_t newlen,
+                                        void **context)
  {
         char val[TCP_CA_NAME_MAX];
         ctl_table tbl = {
@@ -259,7 +230,7 @@ ctl_table ipv4_table[] = {
         {
                 .ctl_name       = NET_TCP_MAX_TW_BUCKETS,
                 .procname       = "tcp_max_tw_buckets",
-               .data           = &sysctl_tcp_max_tw_buckets,
+               .data           = &tcp_death_row.sysctl_max_tw_buckets,
                 .maxlen         = sizeof(int),
                 .mode           = 0644,
                 .proc_handler   = &proc_dointvec
@@ -363,7 +334,7 @@ ctl_table ipv4_table[] = {
         {
                 .ctl_name       = NET_TCP_TW_RECYCLE,
                 .procname       = "tcp_tw_recycle",
-               .data           = &sysctl_tcp_tw_recycle,
+               .data           = &tcp_death_row.sysctl_tw_recycle,
                 .maxlen         = sizeof(int),
                 .mode           = 0644,
                 .proc_handler   = &proc_dointvec
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c

index ddb6ce4ecff291e9ecec53e86a2781eefe61a3bb..02fdda68718d0c98d60d1d8de42fc87689e9991c 100644 (file)
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -269,13 +269,12 @@
  
  int sysctl_tcp_fin_timeout = TCP_FIN_TIMEOUT;
  
-DEFINE_SNMP_STAT(struct tcp_mib, tcp_statistics);
-
-kmem_cache_t *tcp_bucket_cachep;
-kmem_cache_t *tcp_timewait_cachep;
+DEFINE_SNMP_STAT(struct tcp_mib, tcp_statistics) __read_mostly;
  
  atomic_t tcp_orphan_count = ATOMIC_INIT(0);
  
+EXPORT_SYMBOL_GPL(tcp_orphan_count);
+
  int sysctl_tcp_mem[3];
  int sysctl_tcp_wmem[3] = { 4 * 1024, 16 * 1024, 128 * 1024 };
  int sysctl_tcp_rmem[3] = { 4 * 1024, 87380, 87380 * 2 };
@@ -310,15 +309,6 @@ void tcp_enter_memory_pressure(void)
  
  EXPORT_SYMBOL(tcp_enter_memory_pressure);
  
-/*
- * LISTEN is a special case for poll..
- */
-static __inline__ unsigned int tcp_listen_poll(struct sock *sk,
-                                              poll_table *wait)
-{
-       return !reqsk_queue_empty(&tcp_sk(sk)->accept_queue) ? (POLLIN | POLLRDNORM) : 0;
-}
-
  /*
   *     Wait for a TCP event.
   *
@@ -334,7 +324,7 @@ unsigned int tcp_poll(struct file *file, struct socket *sock, poll_table *wait)
  
         poll_wait(file, sk->sk_sleep, wait);
         if (sk->sk_state == TCP_LISTEN)
-               return tcp_listen_poll(sk, wait);
+               return inet_csk_listen_poll(sk);
  
         /* Socket is not locked. We are protected from async events
            by poll logic and correct handling of state changes
@@ -457,109 +447,6 @@ int tcp_ioctl(struct sock *sk, int cmd, unsigned long arg)
         return put_user(answ, (int __user *)arg);
  }
  
-
-int tcp_listen_start(struct sock *sk)
-{
-       struct inet_sock *inet = inet_sk(sk);
-       struct tcp_sock *tp = tcp_sk(sk);
-       int rc = reqsk_queue_alloc(&tp->accept_queue, TCP_SYNQ_HSIZE);
-
-       if (rc != 0)
-               return rc;
-
-       sk->sk_max_ack_backlog = 0;
-       sk->sk_ack_backlog = 0;
-       tcp_delack_init(tp);
-
-       /* There is race window here: we announce ourselves listening,
-        * but this transition is still not validated by get_port().
-        * It is OK, because this socket enters to hash table only
-        * after validation is complete.
-        */
-       sk->sk_state = TCP_LISTEN;
-       if (!sk->sk_prot->get_port(sk, inet->num)) {
-               inet->sport = htons(inet->num);
-
-               sk_dst_reset(sk);
-               sk->sk_prot->hash(sk);
-
-               return 0;
-       }
-
-       sk->sk_state = TCP_CLOSE;
-       reqsk_queue_destroy(&tp->accept_queue);
-       return -EADDRINUSE;
-}
-
-/*
- *     This routine closes sockets which have been at least partially
- *     opened, but not yet accepted.
- */
-
-static void tcp_listen_stop (struct sock *sk)
-{
-       struct tcp_sock *tp = tcp_sk(sk);
-       struct listen_sock *lopt;
-       struct request_sock *acc_req;
-       struct request_sock *req;
-       int i;
-
-       tcp_delete_keepalive_timer(sk);
-
-       /* make all the listen_opt local to us */
-       lopt = reqsk_queue_yank_listen_sk(&tp->accept_queue);
-       acc_req = reqsk_queue_yank_acceptq(&tp->accept_queue);
-
-       if (lopt->qlen) {
-               for (i = 0; i < TCP_SYNQ_HSIZE; i++) {
-                       while ((req = lopt->syn_table[i]) != NULL) {
-                               lopt->syn_table[i] = req->dl_next;
-                               lopt->qlen--;
-                               reqsk_free(req);
-
-               /* Following specs, it would be better either to send FIN
-                * (and enter FIN-WAIT-1, it is normal close)
-                * or to send active reset (abort).
-                * Certainly, it is pretty dangerous while synflood, but it is
-                * bad justification for our negligence 8)
-                * To be honest, we are not able to make either
-                * of the variants now.                 --ANK
-                */
-                       }
-               }
-       }
-       BUG_TRAP(!lopt->qlen);
-
-       kfree(lopt);
-
-       while ((req = acc_req) != NULL) {
-               struct sock *child = req->sk;
-
-               acc_req = req->dl_next;
-
-               local_bh_disable();
-               bh_lock_sock(child);
-               BUG_TRAP(!sock_owned_by_user(child));
-               sock_hold(child);
-
-               tcp_disconnect(child, O_NONBLOCK);
-
-               sock_orphan(child);
-
-               atomic_inc(&tcp_orphan_count);
-
-               tcp_destroy_sock(child);
-
-               bh_unlock_sock(child);
-               local_bh_enable();
-               sock_put(child);
-
-               sk_acceptq_removed(sk);
-               __reqsk_free(req);
-       }
-       BUG_TRAP(!sk->sk_ack_backlog);
-}
-
  static inline void tcp_mark_push(struct tcp_sock *tp, struct sk_buff *skb)
  {
         TCP_SKB_CB(skb)->flags |= TCPCB_FLAG_PSH;
@@ -584,7 +471,7 @@ static inline void skb_entail(struct sock *sk, struct tcp_sock *tp,
         sk_charge_skb(sk, skb);
         if (!sk->sk_send_head)
                 sk->sk_send_head = skb;
-       else if (tp->nonagle&TCP_NAGLE_PUSH)
+       if (tp->nonagle & TCP_NAGLE_PUSH)
                 tp->nonagle &= ~TCP_NAGLE_PUSH; 
  }
  
@@ -975,7 +862,7 @@ do_fault:
         if (!skb->len) {
                 if (sk->sk_send_head == skb)
                         sk->sk_send_head = NULL;
-               __skb_unlink(skb, skb->list);
+               __skb_unlink(skb, &sk->sk_write_queue);
                 sk_stream_free_skb(sk, skb);
         }
  
@@ -1057,20 +944,21 @@ static void cleanup_rbuf(struct sock *sk, int copied)
         BUG_TRAP(!skb || before(tp->copied_seq, TCP_SKB_CB(skb)->end_seq));
  #endif
  
-       if (tcp_ack_scheduled(tp)) {
+       if (inet_csk_ack_scheduled(sk)) {
+               const struct inet_connection_sock *icsk = inet_csk(sk);
                    /* Delayed ACKs frequently hit locked sockets during bulk
                     * receive. */
-               if (tp->ack.blocked ||
+               if (icsk->icsk_ack.blocked ||
                     /* Once-per-two-segments ACK was not sent by tcp_input.c */
-                   tp->rcv_nxt - tp->rcv_wup > tp->ack.rcv_mss ||
+                   tp->rcv_nxt - tp->rcv_wup > icsk->icsk_ack.rcv_mss ||
                     /*
                      * If this read emptied read buffer, we send ACK, if
                      * connection is not bidirectional, user drained
                      * receive buffer and there was a small segment
                      * in queue.
                      */
-                   (copied > 0 && (tp->ack.pending & TCP_ACK_PUSHED) &&
-                    !tp->ack.pingpong && !atomic_read(&sk->sk_rmem_alloc)))
+                   (copied > 0 && (icsk->icsk_ack.pending & ICSK_ACK_PUSHED) &&
+                    !icsk->icsk_ack.pingpong && !atomic_read(&sk->sk_rmem_alloc)))
                         time_to_ack = 1;
         }
  
@@ -1572,40 +1460,6 @@ void tcp_shutdown(struct sock *sk, int how)
         }
  }
  
-/*
- * At this point, there should be no process reference to this
- * socket, and thus no user references at all.  Therefore we
- * can assume the socket waitqueue is inactive and nobody will
- * try to jump onto it.
- */
-void tcp_destroy_sock(struct sock *sk)
-{
-       BUG_TRAP(sk->sk_state == TCP_CLOSE);
-       BUG_TRAP(sock_flag(sk, SOCK_DEAD));
-
-       /* It cannot be in hash table! */
-       BUG_TRAP(sk_unhashed(sk));
-
-       /* If it has not 0 inet_sk(sk)->num, it must be bound */
-       BUG_TRAP(!inet_sk(sk)->num || tcp_sk(sk)->bind_hash);
-
-       sk->sk_prot->destroy(sk);
-
-       sk_stream_kill_queues(sk);
-
-       xfrm_sk_free_policy(sk);
-
-#ifdef INET_REFCNT_DEBUG
-       if (atomic_read(&sk->sk_refcnt) != 1) {
-               printk(KERN_DEBUG "Destruction TCP %p delayed, c=%d\n",
-                      sk, atomic_read(&sk->sk_refcnt));
-       }
-#endif
-
-       atomic_dec(&tcp_orphan_count);
-       sock_put(sk);
-}
-
  void tcp_close(struct sock *sk, long timeout)
  {
         struct sk_buff *skb;
@@ -1618,7 +1472,7 @@ void tcp_close(struct sock *sk, long timeout)
                 tcp_set_state(sk, TCP_CLOSE);
  
                 /* Special case. */
-               tcp_listen_stop(sk);
+               inet_csk_listen_stop(sk);
  
                 goto adjudge_to_death;
         }
@@ -1721,12 +1575,12 @@ adjudge_to_death:
                         tcp_send_active_reset(sk, GFP_ATOMIC);
                         NET_INC_STATS_BH(LINUX_MIB_TCPABORTONLINGER);
                 } else {
-                       int tmo = tcp_fin_time(tp);
+                       const int tmo = tcp_fin_time(sk);
  
                         if (tmo > TCP_TIMEWAIT_LEN) {
-                               tcp_reset_keepalive_timer(sk, tcp_fin_time(tp));
+                               inet_csk_reset_keepalive_timer(sk, tcp_fin_time(sk));
                         } else {
-                               atomic_inc(&tcp_orphan_count);
+                               atomic_inc(sk->sk_prot->orphan_count);
                                 tcp_time_wait(sk, TCP_FIN_WAIT2, tmo);
                                 goto out;
                         }
@@ -1734,7 +1588,7 @@ adjudge_to_death:
         }
         if (sk->sk_state != TCP_CLOSE) {
                 sk_stream_mem_reclaim(sk);
-               if (atomic_read(&tcp_orphan_count) > sysctl_tcp_max_orphans ||
+               if (atomic_read(sk->sk_prot->orphan_count) > sysctl_tcp_max_orphans ||
                     (sk->sk_wmem_queued > SOCK_MIN_SNDBUF &&
                      atomic_read(&tcp_memory_allocated) > sysctl_tcp_mem[2])) {
                         if (net_ratelimit())
@@ -1745,10 +1599,10 @@ adjudge_to_death:
                         NET_INC_STATS_BH(LINUX_MIB_TCPABORTONMEMORY);
                 }
         }
-       atomic_inc(&tcp_orphan_count);
+       atomic_inc(sk->sk_prot->orphan_count);
  
         if (sk->sk_state == TCP_CLOSE)
-               tcp_destroy_sock(sk);
+               inet_csk_destroy_sock(sk);
         /* Otherwise, socket is reprieved until protocol close. */
  
  out:
@@ -1769,6 +1623,7 @@ static inline int tcp_need_reset(int state)
  int tcp_disconnect(struct sock *sk, int flags)
  {
         struct inet_sock *inet = inet_sk(sk);
+       struct inet_connection_sock *icsk = inet_csk(sk);
         struct tcp_sock *tp = tcp_sk(sk);
         int err = 0;
         int old_state = sk->sk_state;
@@ -1778,7 +1633,7 @@ int tcp_disconnect(struct sock *sk, int flags)
  
         /* ABORT function of RFC793 */
         if (old_state == TCP_LISTEN) {
-               tcp_listen_stop(sk);
+               inet_csk_listen_stop(sk);
         } else if (tcp_need_reset(old_state) ||
                    (tp->snd_nxt != tp->write_seq &&
                     (1 << old_state) & (TCPF_CLOSING | TCPF_LAST_ACK))) {
@@ -1805,118 +1660,26 @@ int tcp_disconnect(struct sock *sk, int flags)
         tp->srtt = 0;
         if ((tp->write_seq += tp->max_window + 2) == 0)
                 tp->write_seq = 1;
-       tp->backoff = 0;
+       icsk->icsk_backoff = 0;
         tp->snd_cwnd = 2;
-       tp->probes_out = 0;
+       icsk->icsk_probes_out = 0;
         tp->packets_out = 0;
         tp->snd_ssthresh = 0x7fffffff;
         tp->snd_cwnd_cnt = 0;
-       tcp_set_ca_state(tp, TCP_CA_Open);
+       tcp_set_ca_state(sk, TCP_CA_Open);
         tcp_clear_retrans(tp);
-       tcp_delack_init(tp);
+       inet_csk_delack_init(sk);
         sk->sk_send_head = NULL;
         tp->rx_opt.saw_tstamp = 0;
         tcp_sack_reset(&tp->rx_opt);
         __sk_dst_reset(sk);
  
-       BUG_TRAP(!inet->num || tp->bind_hash);
+       BUG_TRAP(!inet->num || icsk->icsk_bind_hash);
  
         sk->sk_error_report(sk);
         return err;
  }
  
-/*
- *     Wait for an incoming connection, avoid race
- *     conditions. This must be called with the socket locked.
- */
-static int wait_for_connect(struct sock *sk, long timeo)
-{
-       struct tcp_sock *tp = tcp_sk(sk);
-       DEFINE_WAIT(wait);
-       int err;
-
-       /*
-        * True wake-one mechanism for incoming connections: only
-        * one process gets woken up, not the 'whole herd'.
-        * Since we do not 'race & poll' for established sockets
-        * anymore, the common case will execute the loop only once.
-        *
-        * Subtle issue: "add_wait_queue_exclusive()" will be added
-        * after any current non-exclusive waiters, and we know that
-        * it will always _stay_ after any new non-exclusive waiters
-        * because all non-exclusive waiters are added at the
-        * beginning of the wait-queue. As such, it's ok to "drop"
-        * our exclusiveness temporarily when we get woken up without
-        * having to remove and re-insert us on the wait queue.
-        */
-       for (;;) {
-               prepare_to_wait_exclusive(sk->sk_sleep, &wait,
-                                         TASK_INTERRUPTIBLE);
-               release_sock(sk);
-               if (reqsk_queue_empty(&tp->accept_queue))
-                       timeo = schedule_timeout(timeo);
-               lock_sock(sk);
-               err = 0;
-               if (!reqsk_queue_empty(&tp->accept_queue))
-                       break;
-               err = -EINVAL;
-               if (sk->sk_state != TCP_LISTEN)
-                       break;
-               err = sock_intr_errno(timeo);
-               if (signal_pending(current))
-                       break;
-               err = -EAGAIN;
-               if (!timeo)
-                       break;
-       }
-       finish_wait(sk->sk_sleep, &wait);
-       return err;
-}
-
-/*
- *     This will accept the next outstanding connection.
- */
-
-struct sock *tcp_accept(struct sock *sk, int flags, int *err)
-{
-       struct tcp_sock *tp = tcp_sk(sk);
-       struct sock *newsk;
-       int error;
-
-       lock_sock(sk);
-
-       /* We need to make sure that this socket is listening,
-        * and that it has something pending.
-        */
-       error = -EINVAL;
-       if (sk->sk_state != TCP_LISTEN)
-               goto out_err;
-
-       /* Find already established connection */
-       if (reqsk_queue_empty(&tp->accept_queue)) {
-               long timeo = sock_rcvtimeo(sk, flags & O_NONBLOCK);
-
-               /* If this is a non blocking socket don't sleep */
-               error = -EAGAIN;
-               if (!timeo)
-                       goto out_err;
-
-               error = wait_for_connect(sk, timeo);
-               if (error)
-                       goto out_err;
-       }
-
-       newsk = reqsk_queue_get_child(&tp->accept_queue, sk);
-       BUG_TRAP(newsk->sk_state != TCP_SYN_RECV);
-out:
-       release_sock(sk);
-       return newsk;
-out_err:
-       newsk = NULL;
-       *err = error;
-       goto out;
-}
-
  /*
   *     Socket option code for TCP.
   */
@@ -1924,6 +1687,7 @@ int tcp_setsockopt(struct sock *sk, int level, int optname, char __user *optval,
                    int optlen)
  {
         struct tcp_sock *tp = tcp_sk(sk);
+       struct inet_connection_sock *icsk = inet_csk(sk);
         int val;
         int err = 0;
  
@@ -1945,7 +1709,7 @@ int tcp_setsockopt(struct sock *sk, int level, int optname, char __user *optval,
                 name[val] = 0;
  
                 lock_sock(sk);
-               err = tcp_set_congestion_control(tp, name);
+               err = tcp_set_congestion_control(sk, name);
                 release_sock(sk);
                 return err;
         }
@@ -2022,7 +1786,7 @@ int tcp_setsockopt(struct sock *sk, int level, int optname, char __user *optval,
                                         elapsed = tp->keepalive_time - elapsed;
                                 else
                                         elapsed = 0;
-                               tcp_reset_keepalive_timer(sk, elapsed);
+                               inet_csk_reset_keepalive_timer(sk, elapsed);
                         }
                 }
                 break;
@@ -2042,7 +1806,7 @@ int tcp_setsockopt(struct sock *sk, int level, int optname, char __user *optval,
                 if (val < 1 || val > MAX_TCP_SYNCNT)
                         err = -EINVAL;
                 else
-                       tp->syn_retries = val;
+                       icsk->icsk_syn_retries = val;
                 break;
  
         case TCP_LINGER2:
@@ -2055,15 +1819,15 @@ int tcp_setsockopt(struct sock *sk, int level, int optname, char __user *optval,
                 break;
  
         case TCP_DEFER_ACCEPT:
-               tp->defer_accept = 0;
+               icsk->icsk_accept_queue.rskq_defer_accept = 0;
                 if (val > 0) {
                         /* Translate value in seconds to number of
                          * retransmits */
-                       while (tp->defer_accept < 32 &&
+                       while (icsk->icsk_accept_queue.rskq_defer_accept < 32 &&
                                val > ((TCP_TIMEOUT_INIT / HZ) <<
-                                      tp->defer_accept))
-                               tp->defer_accept++;
-                       tp->defer_accept++;
+                                      icsk->icsk_accept_queue.rskq_defer_accept))
+                               icsk->icsk_accept_queue.rskq_defer_accept++;
+                       icsk->icsk_accept_queue.rskq_defer_accept++;
                 }
                 break;
  
@@ -2081,16 +1845,16 @@ int tcp_setsockopt(struct sock *sk, int level, int optname, char __user *optval,
  
         case TCP_QUICKACK:
                 if (!val) {
-                       tp->ack.pingpong = 1;
+                       icsk->icsk_ack.pingpong = 1;
                 } else {
-                       tp->ack.pingpong = 0;
+                       icsk->icsk_ack.pingpong = 0;
                         if ((1 << sk->sk_state) &
                             (TCPF_ESTABLISHED | TCPF_CLOSE_WAIT) &&
-                           tcp_ack_scheduled(tp)) {
-                               tp->ack.pending |= TCP_ACK_PUSHED;
+                           inet_csk_ack_scheduled(sk)) {
+                               icsk->icsk_ack.pending |= ICSK_ACK_PUSHED;
                                 cleanup_rbuf(sk, 1);
                                 if (!(val & 1))
-                                       tp->ack.pingpong = 1;
+                                       icsk->icsk_ack.pingpong = 1;
                         }
                 }
                 break;
@@ -2107,15 +1871,16 @@ int tcp_setsockopt(struct sock *sk, int level, int optname, char __user *optval,
  void tcp_get_info(struct sock *sk, struct tcp_info *info)
  {
         struct tcp_sock *tp = tcp_sk(sk);
+       const struct inet_connection_sock *icsk = inet_csk(sk);
         u32 now = tcp_time_stamp;
  
         memset(info, 0, sizeof(*info));
  
         info->tcpi_state = sk->sk_state;
-       info->tcpi_ca_state = tp->ca_state;
-       info->tcpi_retransmits = tp->retransmits;
-       info->tcpi_probes = tp->probes_out;
-       info->tcpi_backoff = tp->backoff;
+       info->tcpi_ca_state = icsk->icsk_ca_state;
+       info->tcpi_retransmits = icsk->icsk_retransmits;
+       info->tcpi_probes = icsk->icsk_probes_out;
+       info->tcpi_backoff = icsk->icsk_backoff;
  
         if (tp->rx_opt.tstamp_ok)
                 info->tcpi_options |= TCPI_OPT_TIMESTAMPS;
@@ -2130,10 +1895,10 @@ void tcp_get_info(struct sock *sk, struct tcp_info *info)
         if (tp->ecn_flags&TCP_ECN_OK)
                 info->tcpi_options |= TCPI_OPT_ECN;
  
-       info->tcpi_rto = jiffies_to_usecs(tp->rto);
-       info->tcpi_ato = jiffies_to_usecs(tp->ack.ato);
+       info->tcpi_rto = jiffies_to_usecs(icsk->icsk_rto);
+       info->tcpi_ato = jiffies_to_usecs(icsk->icsk_ack.ato);
         info->tcpi_snd_mss = tp->mss_cache;
-       info->tcpi_rcv_mss = tp->ack.rcv_mss;
+       info->tcpi_rcv_mss = icsk->icsk_ack.rcv_mss;
  
         info->tcpi_unacked = tp->packets_out;
         info->tcpi_sacked = tp->sacked_out;
@@ -2142,7 +1907,7 @@ void tcp_get_info(struct sock *sk, struct tcp_info *info)
         info->tcpi_fackets = tp->fackets_out;
  
         info->tcpi_last_data_sent = jiffies_to_msecs(now - tp->lsndtime);
-       info->tcpi_last_data_recv = jiffies_to_msecs(now - tp->ack.lrcvtime);
+       info->tcpi_last_data_recv = jiffies_to_msecs(now - icsk->icsk_ack.lrcvtime);
         info->tcpi_last_ack_recv = jiffies_to_msecs(now - tp->rcv_tstamp);
  
         info->tcpi_pmtu = tp->pmtu_cookie;
@@ -2165,6 +1930,7 @@ EXPORT_SYMBOL_GPL(tcp_get_info);
  int tcp_getsockopt(struct sock *sk, int level, int optname, char __user *optval,
                    int __user *optlen)
  {
+       struct inet_connection_sock *icsk = inet_csk(sk);
         struct tcp_sock *tp = tcp_sk(sk);
         int val, len;
  
@@ -2202,7 +1968,7 @@ int tcp_getsockopt(struct sock *sk, int level, int optname, char __user *optval,
                 val = tp->keepalive_probes ? : sysctl_tcp_keepalive_probes;
                 break;
         case TCP_SYNCNT:
-               val = tp->syn_retries ? : sysctl_tcp_syn_retries;
+               val = icsk->icsk_syn_retries ? : sysctl_tcp_syn_retries;
                 break;
         case TCP_LINGER2:
                 val = tp->linger2;
@@ -2210,8 +1976,8 @@ int tcp_getsockopt(struct sock *sk, int level, int optname, char __user *optval,
                         val = (val ? : sysctl_tcp_fin_timeout) / HZ;
                 break;
         case TCP_DEFER_ACCEPT:
-               val = !tp->defer_accept ? 0 : ((TCP_TIMEOUT_INIT / HZ) <<
-                                              (tp->defer_accept - 1));
+               val = !icsk->icsk_accept_queue.rskq_defer_accept ? 0 :
+                       ((TCP_TIMEOUT_INIT / HZ) << (icsk->icsk_accept_queue.rskq_defer_accept - 1));
                 break;
         case TCP_WINDOW_CLAMP:
                 val = tp->window_clamp;
@@ -2232,7 +1998,7 @@ int tcp_getsockopt(struct sock *sk, int level, int optname, char __user *optval,
                 return 0;
         }
         case TCP_QUICKACK:
-               val = !tp->ack.pingpong;
+               val = !icsk->icsk_ack.pingpong;
                 break;
  
         case TCP_CONGESTION:
@@ -2241,7 +2007,7 @@ int tcp_getsockopt(struct sock *sk, int level, int optname, char __user *optval,
                 len = min_t(unsigned int, len, TCP_CA_NAME_MAX);
                 if (put_user(len, optlen))
                         return -EFAULT;
-               if (copy_to_user(optval, tp->ca_ops->name, len))
+               if (copy_to_user(optval, icsk->icsk_ca_ops->name, len))
                         return -EFAULT;
                 return 0;
         default:
@@ -2278,79 +2044,72 @@ void __init tcp_init(void)
                 __skb_cb_too_small_for_tcp(sizeof(struct tcp_skb_cb),
                                            sizeof(skb->cb));
  
-       tcp_bucket_cachep = kmem_cache_create("tcp_bind_bucket",
-                                             sizeof(struct tcp_bind_bucket),
-                                             0, SLAB_HWCACHE_ALIGN,
-                                             NULL, NULL);
-       if (!tcp_bucket_cachep)
+       tcp_hashinfo.bind_bucket_cachep =
+               kmem_cache_create("tcp_bind_bucket",
+                                 sizeof(struct inet_bind_bucket), 0,
+                                 SLAB_HWCACHE_ALIGN, NULL, NULL);
+       if (!tcp_hashinfo.bind_bucket_cachep)
                 panic("tcp_init: Cannot alloc tcp_bind_bucket cache.");
  
-       tcp_timewait_cachep = kmem_cache_create("tcp_tw_bucket",
-                                               sizeof(struct tcp_tw_bucket),
-                                               0, SLAB_HWCACHE_ALIGN,
-                                               NULL, NULL);
-       if (!tcp_timewait_cachep)
-               panic("tcp_init: Cannot alloc tcp_tw_bucket cache.");
-
         /* Size and allocate the main established and bind bucket
          * hash tables.
          *
          * The methodology is similar to that of the buffer cache.
          */
-       tcp_ehash = (struct tcp_ehash_bucket *)
+       tcp_hashinfo.ehash =
                 alloc_large_system_hash("TCP established",
-                                       sizeof(struct tcp_ehash_bucket),
+                                       sizeof(struct inet_ehash_bucket),
                                         thash_entries,
                                         (num_physpages >= 128 * 1024) ?
                                                 (25 - PAGE_SHIFT) :
                                                 (27 - PAGE_SHIFT),
                                         HASH_HIGHMEM,
-                                       &tcp_ehash_size,
+                                       &tcp_hashinfo.ehash_size,
                                         NULL,
                                         0);
-       tcp_ehash_size = (1 << tcp_ehash_size) >> 1;
-       for (i = 0; i < (tcp_ehash_size << 1); i++) {
-               rwlock_init(&tcp_ehash[i].lock);
-               INIT_HLIST_HEAD(&tcp_ehash[i].chain);
+       tcp_hashinfo.ehash_size = (1 << tcp_hashinfo.ehash_size) >> 1;
+       for (i = 0; i < (tcp_hashinfo.ehash_size << 1); i++) {
+               rwlock_init(&tcp_hashinfo.ehash[i].lock);
+               INIT_HLIST_HEAD(&tcp_hashinfo.ehash[i].chain);
         }
  
-       tcp_bhash = (struct tcp_bind_hashbucket *)
+       tcp_hashinfo.bhash =
                 alloc_large_system_hash("TCP bind",
-                                       sizeof(struct tcp_bind_hashbucket),
-                                       tcp_ehash_size,
+                                       sizeof(struct inet_bind_hashbucket),
+                                       tcp_hashinfo.ehash_size,
                                         (num_physpages >= 128 * 1024) ?
                                                 (25 - PAGE_SHIFT) :
                                                 (27 - PAGE_SHIFT),
                                         HASH_HIGHMEM,
-                                       &tcp_bhash_size,
+                                       &tcp_hashinfo.bhash_size,
                                         NULL,
                                         64 * 1024);
-       tcp_bhash_size = 1 << tcp_bhash_size;
-       for (i = 0; i < tcp_bhash_size; i++) {
-               spin_lock_init(&tcp_bhash[i].lock);
-               INIT_HLIST_HEAD(&tcp_bhash[i].chain);
+       tcp_hashinfo.bhash_size = 1 << tcp_hashinfo.bhash_size;
+       for (i = 0; i < tcp_hashinfo.bhash_size; i++) {
+               spin_lock_init(&tcp_hashinfo.bhash[i].lock);
+               INIT_HLIST_HEAD(&tcp_hashinfo.bhash[i].chain);
         }
  
         /* Try to be a bit smarter and adjust defaults depending
          * on available memory.
          */
         for (order = 0; ((1 << order) << PAGE_SHIFT) <
-                       (tcp_bhash_size * sizeof(struct tcp_bind_hashbucket));
+                       (tcp_hashinfo.bhash_size * sizeof(struct inet_bind_hashbucket));
                         order++)
                 ;
         if (order >= 4) {
                 sysctl_local_port_range[0] = 32768;
                 sysctl_local_port_range[1] = 61000;
-               sysctl_tcp_max_tw_buckets = 180000;
+               tcp_death_row.sysctl_max_tw_buckets = 180000;
                 sysctl_tcp_max_orphans = 4096 << (order - 4);
                 sysctl_max_syn_backlog = 1024;
         } else if (order < 3) {
                 sysctl_local_port_range[0] = 1024 * (3 - order);
-               sysctl_tcp_max_tw_buckets >>= (3 - order);
+               tcp_death_row.sysctl_max_tw_buckets >>= (3 - order);
                 sysctl_tcp_max_orphans >>= (3 - order);
                 sysctl_max_syn_backlog = 128;
         }
-       tcp_port_rover = sysctl_local_port_range[0] - 1;
+       tcp_hashinfo.port_rover = sysctl_local_port_range[0] - 1;
  
         sysctl_tcp_mem[0] =  768 << order;
         sysctl_tcp_mem[1] = 1024 << order;
@@ -2365,14 +2124,12 @@ void __init tcp_init(void)
  
         printk(KERN_INFO "TCP: Hash tables configured "
                "(established %d bind %d)\n",
-              tcp_ehash_size << 1, tcp_bhash_size);
+              tcp_hashinfo.ehash_size << 1, tcp_hashinfo.bhash_size);
  
         tcp_register_congestion_control(&tcp_reno);
  }
  
-EXPORT_SYMBOL(tcp_accept);
  EXPORT_SYMBOL(tcp_close);
-EXPORT_SYMBOL(tcp_destroy_sock);
  EXPORT_SYMBOL(tcp_disconnect);
  EXPORT_SYMBOL(tcp_getsockopt);
  EXPORT_SYMBOL(tcp_ioctl);
@@ -2384,4 +2141,3 @@ EXPORT_SYMBOL(tcp_sendpage);
  EXPORT_SYMBOL(tcp_setsockopt);
  EXPORT_SYMBOL(tcp_shutdown);
  EXPORT_SYMBOL(tcp_statistics);
-EXPORT_SYMBOL(tcp_timewait_cachep);
diff --git a/net/ipv4/tcp_bic.c b/net/ipv4/tcp_bic.c

index ec38d45d6649633178c8d541a34499349443c8a4..b940346de4e7cbc5bc724245992c07f6c0e41120 100644 (file)
--- a/net/ipv4/tcp_bic.c
+++ b/net/ipv4/tcp_bic.c
@@ -86,11 +86,11 @@ static inline void bictcp_reset(struct bictcp *ca)
         ca->delayed_ack = 2 << ACK_RATIO_SHIFT;
  }
  
-static void bictcp_init(struct tcp_sock *tp)
+static void bictcp_init(struct sock *sk)
  {
-       bictcp_reset(tcp_ca(tp));
+       bictcp_reset(inet_csk_ca(sk));
         if (initial_ssthresh)
-               tp->snd_ssthresh = initial_ssthresh;
+               tcp_sk(sk)->snd_ssthresh = initial_ssthresh;
  }
  
  /*
@@ -156,9 +156,10 @@ static inline void bictcp_update(struct bictcp *ca, u32 cwnd)
  
  
  /* Detect low utilization in congestion avoidance */
-static inline void bictcp_low_utilization(struct tcp_sock *tp, int flag)
+static inline void bictcp_low_utilization(struct sock *sk, int flag)
  {
-       struct bictcp *ca = tcp_ca(tp);
+       const struct tcp_sock *tp = tcp_sk(sk);
+       struct bictcp *ca = inet_csk_ca(sk);
         u32 dist, delay;
  
         /* No time stamp */
@@ -208,12 +209,13 @@ static inline void bictcp_low_utilization(struct tcp_sock *tp, int flag)
  
  }
  
-static void bictcp_cong_avoid(struct tcp_sock *tp, u32 ack,
+static void bictcp_cong_avoid(struct sock *sk, u32 ack,
                               u32 seq_rtt, u32 in_flight, int data_acked)
  {
-       struct bictcp *ca = tcp_ca(tp);
+       struct tcp_sock *tp = tcp_sk(sk);
+       struct bictcp *ca = inet_csk_ca(sk);
  
-       bictcp_low_utilization(tp, data_acked);
+       bictcp_low_utilization(sk, data_acked);
  
         if (in_flight < tp->snd_cwnd)
                 return;
@@ -242,9 +244,10 @@ static void bictcp_cong_avoid(struct tcp_sock *tp, u32 ack,
   *     behave like Reno until low_window is reached,
   *     then increase congestion window slowly
   */
-static u32 bictcp_recalc_ssthresh(struct tcp_sock *tp)
+static u32 bictcp_recalc_ssthresh(struct sock *sk)
  {
-       struct bictcp *ca = tcp_ca(tp);
+       const struct tcp_sock *tp = tcp_sk(sk);
+       struct bictcp *ca = inet_csk_ca(sk);
  
         ca->epoch_start = 0;    /* end of epoch */
  
@@ -269,31 +272,34 @@ static u32 bictcp_recalc_ssthresh(struct tcp_sock *tp)
                 return max((tp->snd_cwnd * beta) / BICTCP_BETA_SCALE, 2U);
  }
  
-static u32 bictcp_undo_cwnd(struct tcp_sock *tp)
+static u32 bictcp_undo_cwnd(struct sock *sk)
  {
-       struct bictcp *ca = tcp_ca(tp);
-
+       const struct tcp_sock *tp = tcp_sk(sk);
+       const struct bictcp *ca = inet_csk_ca(sk);
         return max(tp->snd_cwnd, ca->last_max_cwnd);
  }
  
-static u32 bictcp_min_cwnd(struct tcp_sock *tp)
+static u32 bictcp_min_cwnd(struct sock *sk)
  {
+       const struct tcp_sock *tp = tcp_sk(sk);
         return tp->snd_ssthresh;
  }
  
-static void bictcp_state(struct tcp_sock *tp, u8 new_state)
+static void bictcp_state(struct sock *sk, u8 new_state)
  {
         if (new_state == TCP_CA_Loss)
-               bictcp_reset(tcp_ca(tp));
+               bictcp_reset(inet_csk_ca(sk));
  }
  
  /* Track delayed acknowledgement ratio using sliding window
   * ratio = (15*ratio + sample) / 16
   */
-static void bictcp_acked(struct tcp_sock *tp, u32 cnt)
+static void bictcp_acked(struct sock *sk, u32 cnt)
  {
-       if (cnt > 0 &&  tp->ca_state == TCP_CA_Open) {
-               struct bictcp *ca = tcp_ca(tp);
+       const struct inet_connection_sock *icsk = inet_csk(sk);
+
+       if (cnt > 0 &&  icsk->icsk_ca_state == TCP_CA_Open) {
+               struct bictcp *ca = inet_csk_ca(sk);
                 cnt -= ca->delayed_ack >> ACK_RATIO_SHIFT;
                 ca->delayed_ack += cnt;
         }
@@ -314,7 +320,7 @@ static struct tcp_congestion_ops bictcp = {
  
  static int __init bictcp_register(void)
  {
-       BUG_ON(sizeof(struct bictcp) > TCP_CA_PRIV_SIZE);
+       BUG_ON(sizeof(struct bictcp) > ICSK_CA_PRIV_SIZE);
         return tcp_register_congestion_control(&bictcp);
  }
  
diff --git a/net/ipv4/tcp_cong.c b/net/ipv4/tcp_cong.c

index 4970d10a7785af03276c22a29f03e8f7c90c14e9..bbf2d6624e894b927a169d92ae2be882c851b91a 100644 (file)
--- a/net/ipv4/tcp_cong.c
+++ b/net/ipv4/tcp_cong.c
@@ -73,33 +73,36 @@ void tcp_unregister_congestion_control(struct tcp_congestion_ops *ca)
  EXPORT_SYMBOL_GPL(tcp_unregister_congestion_control);
  
  /* Assign choice of congestion control. */
-void tcp_init_congestion_control(struct tcp_sock *tp)
+void tcp_init_congestion_control(struct sock *sk)
  {
+       struct inet_connection_sock *icsk = inet_csk(sk);
         struct tcp_congestion_ops *ca;
  
-       if (tp->ca_ops != &tcp_init_congestion_ops)
+       if (icsk->icsk_ca_ops != &tcp_init_congestion_ops)
                 return;
  
         rcu_read_lock();
         list_for_each_entry_rcu(ca, &tcp_cong_list, list) {
                 if (try_module_get(ca->owner)) {
-                       tp->ca_ops = ca;
+                       icsk->icsk_ca_ops = ca;
                         break;
                 }
  
         }
         rcu_read_unlock();
  
-       if (tp->ca_ops->init)
-               tp->ca_ops->init(tp);
+       if (icsk->icsk_ca_ops->init)
+               icsk->icsk_ca_ops->init(sk);
  }
  
  /* Manage refcounts on socket close. */
-void tcp_cleanup_congestion_control(struct tcp_sock *tp)
+void tcp_cleanup_congestion_control(struct sock *sk)
  {
-       if (tp->ca_ops->release)
-               tp->ca_ops->release(tp);
-       module_put(tp->ca_ops->owner);
+       struct inet_connection_sock *icsk = inet_csk(sk);
+
+       if (icsk->icsk_ca_ops->release)
+               icsk->icsk_ca_ops->release(sk);
+       module_put(icsk->icsk_ca_ops->owner);
  }
  
  /* Used by sysctl to change default congestion control */
@@ -143,14 +146,15 @@ void tcp_get_default_congestion_control(char *name)
  }
  
  /* Change congestion control for socket */
-int tcp_set_congestion_control(struct tcp_sock *tp, const char *name)
+int tcp_set_congestion_control(struct sock *sk, const char *name)
  {
+       struct inet_connection_sock *icsk = inet_csk(sk);
         struct tcp_congestion_ops *ca;
         int err = 0;
  
         rcu_read_lock();
         ca = tcp_ca_find(name);
-       if (ca == tp->ca_ops)
+       if (ca == icsk->icsk_ca_ops)
                 goto out;
  
         if (!ca)
@@ -160,10 +164,10 @@ int tcp_set_congestion_control(struct tcp_sock *tp, const char *name)
                 err = -EBUSY;
  
         else {
-               tcp_cleanup_congestion_control(tp);
-               tp->ca_ops = ca;
-               if (tp->ca_ops->init)
-                       tp->ca_ops->init(tp);
+               tcp_cleanup_congestion_control(sk);
+               icsk->icsk_ca_ops = ca;
+               if (icsk->icsk_ca_ops->init)
+                       icsk->icsk_ca_ops->init(sk);
         }
   out:
         rcu_read_unlock();
@@ -177,9 +181,11 @@ int tcp_set_congestion_control(struct tcp_sock *tp, const char *name)
  /* This is Jacobson's slow start and congestion avoidance.
   * SIGCOMM '88, p. 328.
   */
-void tcp_reno_cong_avoid(struct tcp_sock *tp, u32 ack, u32 rtt, u32 in_flight,
+void tcp_reno_cong_avoid(struct sock *sk, u32 ack, u32 rtt, u32 in_flight,
                          int flag)
  {
+       struct tcp_sock *tp = tcp_sk(sk);
+
         if (in_flight < tp->snd_cwnd)
                 return;
  
@@ -202,15 +208,17 @@ void tcp_reno_cong_avoid(struct tcp_sock *tp, u32 ack, u32 rtt, u32 in_flight,
  EXPORT_SYMBOL_GPL(tcp_reno_cong_avoid);
  
  /* Slow start threshold is half the congestion window (min 2) */
-u32 tcp_reno_ssthresh(struct tcp_sock *tp)
+u32 tcp_reno_ssthresh(struct sock *sk)
  {
+       const struct tcp_sock *tp = tcp_sk(sk);
         return max(tp->snd_cwnd >> 1U, 2U);
  }
  EXPORT_SYMBOL_GPL(tcp_reno_ssthresh);
  
  /* Lower bound on congestion window. */
-u32 tcp_reno_min_cwnd(struct tcp_sock *tp)
+u32 tcp_reno_min_cwnd(struct sock *sk)
  {
+       const struct tcp_sock *tp = tcp_sk(sk);
         return tp->snd_ssthresh/2;
  }
  EXPORT_SYMBOL_GPL(tcp_reno_min_cwnd);
diff --git a/net/ipv4/tcp_diag.c b/net/ipv4/tcp_diag.c

index f66945cb158fd346c0b2f87d0dec068620b7a838..c148c1081880a03aa1a7f24a692cb439f7ee3d70 100644 (file)
--- a/net/ipv4/tcp_diag.c
+++ b/net/ipv4/tcp_diag.c
@@ -1,5 +1,5 @@
  /*
- * tcp_diag.c  Module for monitoring TCP sockets.
+ * tcp_diag.c  Module for monitoring TCP transport protocols sockets.
   *
   * Version:    $Id: tcp_diag.c,v 1.3 2002/02/01 22:01:04 davem Exp $
   *
@@ -12,779 +12,43 @@
   */
  
  #include <linux/config.h>
-#include <linux/module.h>
-#include <linux/types.h>
-#include <linux/fcntl.h>
-#include <linux/random.h>
-#include <linux/cache.h>
-#include <linux/init.h>
-#include <linux/time.h>
-
-#include <net/icmp.h>
-#include <net/tcp.h>
-#include <net/ipv6.h>
-#include <net/inet_common.h>
-
-#include <linux/inet.h>
-#include <linux/stddef.h>
-
-#include <linux/tcp_diag.h>
  
-struct tcpdiag_entry
-{
-       u32 *saddr;
-       u32 *daddr;
-       u16 sport;
-       u16 dport;
-       u16 family;
-       u16 userlocks;
-};
+#include <linux/module.h>
+#include <linux/inet_diag.h>
  
-static struct sock *tcpnl;
+#include <linux/tcp.h>
  
-#define TCPDIAG_PUT(skb, attrtype, attrlen) \
-       RTA_DATA(__RTA_PUT(skb, attrtype, attrlen))
+#include <net/tcp.h>
  
-static int tcpdiag_fill(struct sk_buff *skb, struct sock *sk,
-                       int ext, u32 pid, u32 seq, u16 nlmsg_flags)
+static void tcp_diag_get_info(struct sock *sk, struct inet_diag_msg *r,
+                             void *_info)
  {
-       struct inet_sock *inet = inet_sk(sk);
-       struct tcp_sock *tp = tcp_sk(sk);
-       struct tcpdiagmsg *r;
-       struct nlmsghdr  *nlh;
-       struct tcp_info  *info = NULL;
-       struct tcpdiag_meminfo  *minfo = NULL;
-       unsigned char    *b = skb->tail;
-
-       nlh = NLMSG_PUT(skb, pid, seq, TCPDIAG_GETSOCK, sizeof(*r));
-       nlh->nlmsg_flags = nlmsg_flags;
-       r = NLMSG_DATA(nlh);
-       if (sk->sk_state != TCP_TIME_WAIT) {
-               if (ext & (1<<(TCPDIAG_MEMINFO-1)))
-                       minfo = TCPDIAG_PUT(skb, TCPDIAG_MEMINFO, sizeof(*minfo));
-               if (ext & (1<<(TCPDIAG_INFO-1)))
-                       info = TCPDIAG_PUT(skb, TCPDIAG_INFO, sizeof(*info));
-               
-               if (ext & (1<<(TCPDIAG_CONG-1))) {
-                       size_t len = strlen(tp->ca_ops->name);
-                       strcpy(TCPDIAG_PUT(skb, TCPDIAG_CONG, len+1),
-                              tp->ca_ops->name);
-               }
-       }
-       r->tcpdiag_family = sk->sk_family;
-       r->tcpdiag_state = sk->sk_state;
-       r->tcpdiag_timer = 0;
-       r->tcpdiag_retrans = 0;
-
-       r->id.tcpdiag_if = sk->sk_bound_dev_if;
-       r->id.tcpdiag_cookie[0] = (u32)(unsigned long)sk;
-       r->id.tcpdiag_cookie[1] = (u32)(((unsigned long)sk >> 31) >> 1);
-
-       if (r->tcpdiag_state == TCP_TIME_WAIT) {
-               struct tcp_tw_bucket *tw = (struct tcp_tw_bucket*)sk;
-               long tmo = tw->tw_ttd - jiffies;
-               if (tmo < 0)
-                       tmo = 0;
-
-               r->id.tcpdiag_sport = tw->tw_sport;
-               r->id.tcpdiag_dport = tw->tw_dport;
-               r->id.tcpdiag_src[0] = tw->tw_rcv_saddr;
-               r->id.tcpdiag_dst[0] = tw->tw_daddr;
-               r->tcpdiag_state = tw->tw_substate;
-               r->tcpdiag_timer = 3;
-               r->tcpdiag_expires = (tmo*1000+HZ-1)/HZ;
-               r->tcpdiag_rqueue = 0;
-               r->tcpdiag_wqueue = 0;
-               r->tcpdiag_uid = 0;
-               r->tcpdiag_inode = 0;
-#ifdef CONFIG_IP_TCPDIAG_IPV6
-               if (r->tcpdiag_family == AF_INET6) {
-                       ipv6_addr_copy((struct in6_addr *)r->id.tcpdiag_src,
-                                      &tw->tw_v6_rcv_saddr);
-                       ipv6_addr_copy((struct in6_addr *)r->id.tcpdiag_dst,
-                                      &tw->tw_v6_daddr);
-               }
-#endif
-               nlh->nlmsg_len = skb->tail - b;
-               return skb->len;
-       }
-
-       r->id.tcpdiag_sport = inet->sport;
-       r->id.tcpdiag_dport = inet->dport;
-       r->id.tcpdiag_src[0] = inet->rcv_saddr;
-       r->id.tcpdiag_dst[0] = inet->daddr;
-
-#ifdef CONFIG_IP_TCPDIAG_IPV6
-       if (r->tcpdiag_family == AF_INET6) {
-               struct ipv6_pinfo *np = inet6_sk(sk);
-
-               ipv6_addr_copy((struct in6_addr *)r->id.tcpdiag_src,
-                              &np->rcv_saddr);
-               ipv6_addr_copy((struct in6_addr *)r->id.tcpdiag_dst,
-                              &np->daddr);
-       }
-#endif
-
-#define EXPIRES_IN_MS(tmo)  ((tmo-jiffies)*1000+HZ-1)/HZ
-
-       if (tp->pending == TCP_TIME_RETRANS) {
-               r->tcpdiag_timer = 1;
-               r->tcpdiag_retrans = tp->retransmits;
-               r->tcpdiag_expires = EXPIRES_IN_MS(tp->timeout);
-       } else if (tp->pending == TCP_TIME_PROBE0) {
-               r->tcpdiag_timer = 4;
-               r->tcpdiag_retrans = tp->probes_out;
-               r->tcpdiag_expires = EXPIRES_IN_MS(tp->timeout);
-       } else if (timer_pending(&sk->sk_timer)) {
-               r->tcpdiag_timer = 2;
-               r->tcpdiag_retrans = tp->probes_out;
-               r->tcpdiag_expires = EXPIRES_IN_MS(sk->sk_timer.expires);
-       } else {
-               r->tcpdiag_timer = 0;
-               r->tcpdiag_expires = 0;
-       }
-#undef EXPIRES_IN_MS
+       const struct tcp_sock *tp = tcp_sk(sk);
+       struct tcp_info *info = _info;
  
-       r->tcpdiag_rqueue = tp->rcv_nxt - tp->copied_seq;
-       r->tcpdiag_wqueue = tp->write_seq - tp->snd_una;
-       r->tcpdiag_uid = sock_i_uid(sk);
-       r->tcpdiag_inode = sock_i_ino(sk);
-
-       if (minfo) {
-               minfo->tcpdiag_rmem = atomic_read(&sk->sk_rmem_alloc);
-               minfo->tcpdiag_wmem = sk->sk_wmem_queued;
-               minfo->tcpdiag_fmem = sk->sk_forward_alloc;
-               minfo->tcpdiag_tmem = atomic_read(&sk->sk_wmem_alloc);
-       }
-
-       if (info) 
+       r->idiag_rqueue = tp->rcv_nxt - tp->copied_seq;
+       r->idiag_wqueue = tp->write_seq - tp->snd_una;
+       if (info != NULL)
                 tcp_get_info(sk, info);
-
-       if (sk->sk_state < TCP_TIME_WAIT && tp->ca_ops->get_info)
-               tp->ca_ops->get_info(tp, ext, skb);
-
-       nlh->nlmsg_len = skb->tail - b;
-       return skb->len;
-
-rtattr_failure:
-nlmsg_failure:
-       skb_trim(skb, b - skb->data);
-       return -1;
-}
-
-extern struct sock *tcp_v4_lookup(u32 saddr, u16 sport, u32 daddr, u16 dport,
-                                 int dif);
-#ifdef CONFIG_IP_TCPDIAG_IPV6
-extern struct sock *tcp_v6_lookup(struct in6_addr *saddr, u16 sport,
-                                 struct in6_addr *daddr, u16 dport,
-                                 int dif);
-#else
-static inline struct sock *tcp_v6_lookup(struct in6_addr *saddr, u16 sport,
-                                        struct in6_addr *daddr, u16 dport,
-                                        int dif)
-{
-       return NULL;
-}
-#endif
-
-static int tcpdiag_get_exact(struct sk_buff *in_skb, const struct nlmsghdr *nlh)
-{
-       int err;
-       struct sock *sk;
-       struct tcpdiagreq *req = NLMSG_DATA(nlh);
-       struct sk_buff *rep;
-
-       if (req->tcpdiag_family == AF_INET) {
-               sk = tcp_v4_lookup(req->id.tcpdiag_dst[0], req->id.tcpdiag_dport,
-                                  req->id.tcpdiag_src[0], req->id.tcpdiag_sport,
-                                  req->id.tcpdiag_if);
-       }
-#ifdef CONFIG_IP_TCPDIAG_IPV6
-       else if (req->tcpdiag_family == AF_INET6) {
-               sk = tcp_v6_lookup((struct in6_addr*)req->id.tcpdiag_dst, req->id.tcpdiag_dport,
-                                  (struct in6_addr*)req->id.tcpdiag_src, req->id.tcpdiag_sport,
-                                  req->id.tcpdiag_if);
-       }
-#endif
-       else {
-               return -EINVAL;
-       }
-
-       if (sk == NULL)
-               return -ENOENT;
-
-       err = -ESTALE;
-       if ((req->id.tcpdiag_cookie[0] != TCPDIAG_NOCOOKIE ||
-            req->id.tcpdiag_cookie[1] != TCPDIAG_NOCOOKIE) &&
-           ((u32)(unsigned long)sk != req->id.tcpdiag_cookie[0] ||
-            (u32)((((unsigned long)sk) >> 31) >> 1) != req->id.tcpdiag_cookie[1]))
-               goto out;
-
-       err = -ENOMEM;
-       rep = alloc_skb(NLMSG_SPACE(sizeof(struct tcpdiagmsg)+
-                                   sizeof(struct tcpdiag_meminfo)+
-                                   sizeof(struct tcp_info)+64), GFP_KERNEL);
-       if (!rep)
-               goto out;
-
-       if (tcpdiag_fill(rep, sk, req->tcpdiag_ext,
-                        NETLINK_CB(in_skb).pid,
-                        nlh->nlmsg_seq, 0) <= 0)
-               BUG();
-
-       err = netlink_unicast(tcpnl, rep, NETLINK_CB(in_skb).pid, MSG_DONTWAIT);
-       if (err > 0)
-               err = 0;
-
-out:
-       if (sk) {
-               if (sk->sk_state == TCP_TIME_WAIT)
-                       tcp_tw_put((struct tcp_tw_bucket*)sk);
-               else
-                       sock_put(sk);
-       }
-       return err;
-}
-
-static int bitstring_match(const u32 *a1, const u32 *a2, int bits)
-{
-       int words = bits >> 5;
-
-       bits &= 0x1f;
-
-       if (words) {
-               if (memcmp(a1, a2, words << 2))
-                       return 0;
-       }
-       if (bits) {
-               __u32 w1, w2;
-               __u32 mask;
-
-               w1 = a1[words];
-               w2 = a2[words];
-
-               mask = htonl((0xffffffff) << (32 - bits));
-
-               if ((w1 ^ w2) & mask)
-                       return 0;
-       }
-
-       return 1;
-}
-
-
-static int tcpdiag_bc_run(const void *bc, int len,
-                         const struct tcpdiag_entry *entry)
-{
-       while (len > 0) {
-               int yes = 1;
-               const struct tcpdiag_bc_op *op = bc;
-
-               switch (op->code) {
-               case TCPDIAG_BC_NOP:
-                       break;
-               case TCPDIAG_BC_JMP:
-                       yes = 0;
-                       break;
-               case TCPDIAG_BC_S_GE:
-                       yes = entry->sport >= op[1].no;
-                       break;
-               case TCPDIAG_BC_S_LE:
-                       yes = entry->dport <= op[1].no;
-                       break;
-               case TCPDIAG_BC_D_GE:
-                       yes = entry->dport >= op[1].no;
-                       break;
-               case TCPDIAG_BC_D_LE:
-                       yes = entry->dport <= op[1].no;
-                       break;
-               case TCPDIAG_BC_AUTO:
-                       yes = !(entry->userlocks & SOCK_BINDPORT_LOCK);
-                       break;
-               case TCPDIAG_BC_S_COND:
-               case TCPDIAG_BC_D_COND:
-               {
-                       struct tcpdiag_hostcond *cond = (struct tcpdiag_hostcond*)(op+1);
-                       u32 *addr;
-
-                       if (cond->port != -1 &&
-                           cond->port != (op->code == TCPDIAG_BC_S_COND ?
-                                            entry->sport : entry->dport)) {
-                               yes = 0;
-                               break;
-                       }
-                       
-                       if (cond->prefix_len == 0)
-                               break;
-
-                       if (op->code == TCPDIAG_BC_S_COND)
-                               addr = entry->saddr;
-                       else
-                               addr = entry->daddr;
-
-                       if (bitstring_match(addr, cond->addr, cond->prefix_len))
-                               break;
-                       if (entry->family == AF_INET6 &&
-                           cond->family == AF_INET) {
-                               if (addr[0] == 0 && addr[1] == 0 &&
-                                   addr[2] == htonl(0xffff) &&
-                                   bitstring_match(addr+3, cond->addr, cond->prefix_len))
-                                       break;
-                       }
-                       yes = 0;
-                       break;
-               }
-               }
-
-               if (yes) { 
-                       len -= op->yes;
-                       bc += op->yes;
-               } else {
-                       len -= op->no;
-                       bc += op->no;
-               }
-       }
-       return (len == 0);
-}
-
-static int valid_cc(const void *bc, int len, int cc)
-{
-       while (len >= 0) {
-               const struct tcpdiag_bc_op *op = bc;
-
-               if (cc > len)
-                       return 0;
-               if (cc == len)
-                       return 1;
-               if (op->yes < 4)
-                       return 0;
-               len -= op->yes;
-               bc  += op->yes;
-       }
-       return 0;
-}
-
-static int tcpdiag_bc_audit(const void *bytecode, int bytecode_len)
-{
-       const unsigned char *bc = bytecode;
-       int  len = bytecode_len;
-
-       while (len > 0) {
-               struct tcpdiag_bc_op *op = (struct tcpdiag_bc_op*)bc;
-
-//printk("BC: %d %d %d {%d} / %d\n", op->code, op->yes, op->no, op[1].no, len);
-               switch (op->code) {
-               case TCPDIAG_BC_AUTO:
-               case TCPDIAG_BC_S_COND:
-               case TCPDIAG_BC_D_COND:
-               case TCPDIAG_BC_S_GE:
-               case TCPDIAG_BC_S_LE:
-               case TCPDIAG_BC_D_GE:
-               case TCPDIAG_BC_D_LE:
-                       if (op->yes < 4 || op->yes > len+4)
-                               return -EINVAL;
-               case TCPDIAG_BC_JMP:
-                       if (op->no < 4 || op->no > len+4)
-                               return -EINVAL;
-                       if (op->no < len &&
-                           !valid_cc(bytecode, bytecode_len, len-op->no))
-                               return -EINVAL;
-                       break;
-               case TCPDIAG_BC_NOP:
-                       if (op->yes < 4 || op->yes > len+4)
-                               return -EINVAL;
-                       break;
-               default:
-                       return -EINVAL;
-               }
-               bc += op->yes;
-               len -= op->yes;
-       }
-       return len == 0 ? 0 : -EINVAL;
-}
-
-static int tcpdiag_dump_sock(struct sk_buff *skb, struct sock *sk,
-                            struct netlink_callback *cb)
-{
-       struct tcpdiagreq *r = NLMSG_DATA(cb->nlh);
-
-       if (cb->nlh->nlmsg_len > 4 + NLMSG_SPACE(sizeof(*r))) {
-               struct tcpdiag_entry entry;
-               struct rtattr *bc = (struct rtattr *)(r + 1);
-               struct inet_sock *inet = inet_sk(sk);
-
-               entry.family = sk->sk_family;
-#ifdef CONFIG_IP_TCPDIAG_IPV6
-               if (entry.family == AF_INET6) {
-                       struct ipv6_pinfo *np = inet6_sk(sk);
-
-                       entry.saddr = np->rcv_saddr.s6_addr32;
-                       entry.daddr = np->daddr.s6_addr32;
-               } else
-#endif
-               {
-                       entry.saddr = &inet->rcv_saddr;
-                       entry.daddr = &inet->daddr;
-               }
-               entry.sport = inet->num;
-               entry.dport = ntohs(inet->dport);
-               entry.userlocks = sk->sk_userlocks;
-
-               if (!tcpdiag_bc_run(RTA_DATA(bc), RTA_PAYLOAD(bc), &entry))
-                       return 0;
-       }
-
-       return tcpdiag_fill(skb, sk, r->tcpdiag_ext, NETLINK_CB(cb->skb).pid,
-                           cb->nlh->nlmsg_seq, NLM_F_MULTI);
  }
  
-static int tcpdiag_fill_req(struct sk_buff *skb, struct sock *sk,
-                           struct request_sock *req,
-                           u32 pid, u32 seq)
-{
-       const struct inet_request_sock *ireq = inet_rsk(req);
-       struct inet_sock *inet = inet_sk(sk);
-       unsigned char *b = skb->tail;
-       struct tcpdiagmsg *r;
-       struct nlmsghdr *nlh;
-       long tmo;
-
-       nlh = NLMSG_PUT(skb, pid, seq, TCPDIAG_GETSOCK, sizeof(*r));
-       nlh->nlmsg_flags = NLM_F_MULTI;
-       r = NLMSG_DATA(nlh);
-
-       r->tcpdiag_family = sk->sk_family;
-       r->tcpdiag_state = TCP_SYN_RECV;
-       r->tcpdiag_timer = 1;
-       r->tcpdiag_retrans = req->retrans;
-
-       r->id.tcpdiag_if = sk->sk_bound_dev_if;
-       r->id.tcpdiag_cookie[0] = (u32)(unsigned long)req;
-       r->id.tcpdiag_cookie[1] = (u32)(((unsigned long)req >> 31) >> 1);
-
-       tmo = req->expires - jiffies;
-       if (tmo < 0)
-               tmo = 0;
-
-       r->id.tcpdiag_sport = inet->sport;
-       r->id.tcpdiag_dport = ireq->rmt_port;
-       r->id.tcpdiag_src[0] = ireq->loc_addr;
-       r->id.tcpdiag_dst[0] = ireq->rmt_addr;
-       r->tcpdiag_expires = jiffies_to_msecs(tmo),
-       r->tcpdiag_rqueue = 0;
-       r->tcpdiag_wqueue = 0;
-       r->tcpdiag_uid = sock_i_uid(sk);
-       r->tcpdiag_inode = 0;
-#ifdef CONFIG_IP_TCPDIAG_IPV6
-       if (r->tcpdiag_family == AF_INET6) {
-               ipv6_addr_copy((struct in6_addr *)r->id.tcpdiag_src,
-                              &tcp6_rsk(req)->loc_addr);
-               ipv6_addr_copy((struct in6_addr *)r->id.tcpdiag_dst,
-                              &tcp6_rsk(req)->rmt_addr);
-       }
-#endif
-       nlh->nlmsg_len = skb->tail - b;
-
-       return skb->len;
-
-nlmsg_failure:
-       skb_trim(skb, b - skb->data);
-       return -1;
-}
-
-static int tcpdiag_dump_reqs(struct sk_buff *skb, struct sock *sk,
-                            struct netlink_callback *cb)
-{
-       struct tcpdiag_entry entry;
-       struct tcpdiagreq *r = NLMSG_DATA(cb->nlh);
-       struct tcp_sock *tp = tcp_sk(sk);
-       struct listen_sock *lopt;
-       struct rtattr *bc = NULL;
-       struct inet_sock *inet = inet_sk(sk);
-       int j, s_j;
-       int reqnum, s_reqnum;
-       int err = 0;
-
-       s_j = cb->args[3];
-       s_reqnum = cb->args[4];
-
-       if (s_j > 0)
-               s_j--;
-
-       entry.family = sk->sk_family;
-
-       read_lock_bh(&tp->accept_queue.syn_wait_lock);
-
-       lopt = tp->accept_queue.listen_opt;
-       if (!lopt || !lopt->qlen)
-               goto out;
-
-       if (cb->nlh->nlmsg_len > 4 + NLMSG_SPACE(sizeof(*r))) {
-               bc = (struct rtattr *)(r + 1);
-               entry.sport = inet->num;
-               entry.userlocks = sk->sk_userlocks;
-       }
-
-       for (j = s_j; j < TCP_SYNQ_HSIZE; j++) {
-               struct request_sock *req, *head = lopt->syn_table[j];
-
-               reqnum = 0;
-               for (req = head; req; reqnum++, req = req->dl_next) {
-                       struct inet_request_sock *ireq = inet_rsk(req);
-
-                       if (reqnum < s_reqnum)
-                               continue;
-                       if (r->id.tcpdiag_dport != ireq->rmt_port &&
-                           r->id.tcpdiag_dport)
-                               continue;
-
-                       if (bc) {
-                               entry.saddr =
-#ifdef CONFIG_IP_TCPDIAG_IPV6
-                                       (entry.family == AF_INET6) ?
-                                       tcp6_rsk(req)->loc_addr.s6_addr32 :
-#endif
-                                       &ireq->loc_addr;
-                               entry.daddr = 
-#ifdef CONFIG_IP_TCPDIAG_IPV6
-                                       (entry.family == AF_INET6) ?
-                                       tcp6_rsk(req)->rmt_addr.s6_addr32 :
-#endif
-                                       &ireq->rmt_addr;
-                               entry.dport = ntohs(ireq->rmt_port);
-
-                               if (!tcpdiag_bc_run(RTA_DATA(bc),
-                                                   RTA_PAYLOAD(bc), &entry))
-                                       continue;
-                       }
-
-                       err = tcpdiag_fill_req(skb, sk, req,
-                                              NETLINK_CB(cb->skb).pid,
-                                              cb->nlh->nlmsg_seq);
-                       if (err < 0) {
-                               cb->args[3] = j + 1;
-                               cb->args[4] = reqnum;
-                               goto out;
-                       }
-               }
-
-               s_reqnum = 0;
-       }
-
-out:
-       read_unlock_bh(&tp->accept_queue.syn_wait_lock);
-
-       return err;
-}
-
-static int tcpdiag_dump(struct sk_buff *skb, struct netlink_callback *cb)
-{
-       int i, num;
-       int s_i, s_num;
-       struct tcpdiagreq *r = NLMSG_DATA(cb->nlh);
-
-       s_i = cb->args[1];
-       s_num = num = cb->args[2];
-
-       if (cb->args[0] == 0) {
-               if (!(r->tcpdiag_states&(TCPF_LISTEN|TCPF_SYN_RECV)))
-                       goto skip_listen_ht;
-               tcp_listen_lock();
-               for (i = s_i; i < TCP_LHTABLE_SIZE; i++) {
-                       struct sock *sk;
-                       struct hlist_node *node;
-
-                       num = 0;
-                       sk_for_each(sk, node, &tcp_listening_hash[i]) {
-                               struct inet_sock *inet = inet_sk(sk);
-
-                               if (num < s_num) {
-                                       num++;
-                                       continue;
-                               }
-
-                               if (r->id.tcpdiag_sport != inet->sport &&
-                                   r->id.tcpdiag_sport)
-                                       goto next_listen;
-
-                               if (!(r->tcpdiag_states&TCPF_LISTEN) ||
-                                   r->id.tcpdiag_dport ||
-                                   cb->args[3] > 0)
-                                       goto syn_recv;
-
-                               if (tcpdiag_dump_sock(skb, sk, cb) < 0) {
-                                       tcp_listen_unlock();
-                                       goto done;
-                               }
-
-syn_recv:
-                               if (!(r->tcpdiag_states&TCPF_SYN_RECV))
-                                       goto next_listen;
-
-                               if (tcpdiag_dump_reqs(skb, sk, cb) < 0) {
-                                       tcp_listen_unlock();
-                                       goto done;
-                               }
-
-next_listen:
-                               cb->args[3] = 0;
-                               cb->args[4] = 0;
-                               ++num;
-                       }
-
-                       s_num = 0;
-                       cb->args[3] = 0;
-                       cb->args[4] = 0;
-               }
-               tcp_listen_unlock();
-skip_listen_ht:
-               cb->args[0] = 1;
-               s_i = num = s_num = 0;
-       }
-
-       if (!(r->tcpdiag_states&~(TCPF_LISTEN|TCPF_SYN_RECV)))
-               return skb->len;
-
-       for (i = s_i; i < tcp_ehash_size; i++) {
-               struct tcp_ehash_bucket *head = &tcp_ehash[i];
-               struct sock *sk;
-               struct hlist_node *node;
-
-               if (i > s_i)
-                       s_num = 0;
-
-               read_lock_bh(&head->lock);
-
-               num = 0;
-               sk_for_each(sk, node, &head->chain) {
-                       struct inet_sock *inet = inet_sk(sk);
-
-                       if (num < s_num)
-                               goto next_normal;
-                       if (!(r->tcpdiag_states & (1 << sk->sk_state)))
-                               goto next_normal;
-                       if (r->id.tcpdiag_sport != inet->sport &&
-                           r->id.tcpdiag_sport)
-                               goto next_normal;
-                       if (r->id.tcpdiag_dport != inet->dport && r->id.tcpdiag_dport)
-                               goto next_normal;
-                       if (tcpdiag_dump_sock(skb, sk, cb) < 0) {
-                               read_unlock_bh(&head->lock);
-                               goto done;
-                       }
-next_normal:
-                       ++num;
-               }
-
-               if (r->tcpdiag_states&TCPF_TIME_WAIT) {
-                       sk_for_each(sk, node,
-                                   &tcp_ehash[i + tcp_ehash_size].chain) {
-                               struct inet_sock *inet = inet_sk(sk);
-
-                               if (num < s_num)
-                                       goto next_dying;
-                               if (r->id.tcpdiag_sport != inet->sport &&
-                                   r->id.tcpdiag_sport)
-                                       goto next_dying;
-                               if (r->id.tcpdiag_dport != inet->dport &&
-                                   r->id.tcpdiag_dport)
-                                       goto next_dying;
-                               if (tcpdiag_dump_sock(skb, sk, cb) < 0) {
-                                       read_unlock_bh(&head->lock);
-                                       goto done;
-                               }
-next_dying:
-                               ++num;
-                       }
-               }
-               read_unlock_bh(&head->lock);
-       }
-
-done:
-       cb->args[1] = i;
-       cb->args[2] = num;
-       return skb->len;
-}
-
-static int tcpdiag_dump_done(struct netlink_callback *cb)
-{
-       return 0;
-}
-
-
-static __inline__ int
-tcpdiag_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
-{
-       if (!(nlh->nlmsg_flags&NLM_F_REQUEST))
-               return 0;
-
-       if (nlh->nlmsg_type != TCPDIAG_GETSOCK)
-               goto err_inval;
-
-       if (NLMSG_LENGTH(sizeof(struct tcpdiagreq)) > skb->len)
-               goto err_inval;
-
-       if (nlh->nlmsg_flags&NLM_F_DUMP) {
-               if (nlh->nlmsg_len > 4 + NLMSG_SPACE(sizeof(struct tcpdiagreq))) {
-                       struct rtattr *rta = (struct rtattr*)(NLMSG_DATA(nlh) + sizeof(struct tcpdiagreq));
-                       if (rta->rta_type != TCPDIAG_REQ_BYTECODE ||
-                           rta->rta_len < 8 ||
-                           rta->rta_len > nlh->nlmsg_len - NLMSG_SPACE(sizeof(struct tcpdiagreq)))
-                               goto err_inval;
-                       if (tcpdiag_bc_audit(RTA_DATA(rta), RTA_PAYLOAD(rta)))
-                               goto err_inval;
-               }
-               return netlink_dump_start(tcpnl, skb, nlh,
-                                         tcpdiag_dump,
-                                         tcpdiag_dump_done);
-       } else {
-               return tcpdiag_get_exact(skb, nlh);
-       }
-
-err_inval:
-       return -EINVAL;
-}
-
-
-static inline void tcpdiag_rcv_skb(struct sk_buff *skb)
-{
-       int err;
-       struct nlmsghdr * nlh;
-
-       if (skb->len >= NLMSG_SPACE(0)) {
-               nlh = (struct nlmsghdr *)skb->data;
-               if (nlh->nlmsg_len < sizeof(*nlh) || skb->len < nlh->nlmsg_len)
-                       return;
-               err = tcpdiag_rcv_msg(skb, nlh);
-               if (err || nlh->nlmsg_flags & NLM_F_ACK) 
-                       netlink_ack(skb, nlh, err);
-       }
-}
-
-static void tcpdiag_rcv(struct sock *sk, int len)
-{
-       struct sk_buff *skb;
-       unsigned int qlen = skb_queue_len(&sk->sk_receive_queue);
-
-       while (qlen-- && (skb = skb_dequeue(&sk->sk_receive_queue))) {
-               tcpdiag_rcv_skb(skb);
-               kfree_skb(skb);
-       }
-}
+static struct inet_diag_handler tcp_diag_handler = {
+       .idiag_hashinfo  = &tcp_hashinfo,
+       .idiag_get_info  = tcp_diag_get_info,
+       .idiag_type      = TCPDIAG_GETSOCK,
+       .idiag_info_size = sizeof(struct tcp_info),
+};
  
-static int __init tcpdiag_init(void)
+static int __init tcp_diag_init(void)
  {
-       tcpnl = netlink_kernel_create(NETLINK_TCPDIAG, tcpdiag_rcv);
-       if (tcpnl == NULL)
-               return -ENOMEM;
-       return 0;
+       return inet_diag_register(&tcp_diag_handler);
  }
  
-static void __exit tcpdiag_exit(void)
+static void __exit tcp_diag_exit(void)
  {
-       sock_release(tcpnl->sk_socket);
+       inet_diag_unregister(&tcp_diag_handler);
  }
  
-module_init(tcpdiag_init);
-module_exit(tcpdiag_exit);
+module_init(tcp_diag_init);
+module_exit(tcp_diag_exit);
  MODULE_LICENSE("GPL");
diff --git a/net/ipv4/tcp_highspeed.c b/net/ipv4/tcp_highspeed.c

index 36c51f8136bfa6aae3bb1f3f3abf12889673c799..6acc04bde08099c598e633a21518ed51e5b75bd2 100644 (file)
--- a/net/ipv4/tcp_highspeed.c
+++ b/net/ipv4/tcp_highspeed.c
@@ -98,9 +98,10 @@ struct hstcp {
         u32     ai;
  };
  
-static void hstcp_init(struct tcp_sock *tp)
+static void hstcp_init(struct sock *sk)
  {
-       struct hstcp *ca = tcp_ca(tp);
+       struct tcp_sock *tp = tcp_sk(sk);
+       struct hstcp *ca = inet_csk_ca(sk);
  
         ca->ai = 0;
  
@@ -109,10 +110,11 @@ static void hstcp_init(struct tcp_sock *tp)
         tp->snd_cwnd_clamp = min_t(u32, tp->snd_cwnd_clamp, 0xffffffff/128);
  }
  
-static void hstcp_cong_avoid(struct tcp_sock *tp, u32 adk, u32 rtt,
+static void hstcp_cong_avoid(struct sock *sk, u32 adk, u32 rtt,
                              u32 in_flight, int good)
  {
-       struct hstcp *ca = tcp_ca(tp);
+       struct tcp_sock *tp = tcp_sk(sk);
+       struct hstcp *ca = inet_csk_ca(sk);
  
         if (in_flight < tp->snd_cwnd)
                 return;
@@ -143,9 +145,10 @@ static void hstcp_cong_avoid(struct tcp_sock *tp, u32 adk, u32 rtt,
         }
  }
  
-static u32 hstcp_ssthresh(struct tcp_sock *tp)
+static u32 hstcp_ssthresh(struct sock *sk)
  {
-       struct hstcp *ca = tcp_ca(tp);
+       const struct tcp_sock *tp = tcp_sk(sk);
+       const struct hstcp *ca = inet_csk_ca(sk);
  
         /* Do multiplicative decrease */
         return max(tp->snd_cwnd - ((tp->snd_cwnd * hstcp_aimd_vals[ca->ai].md) >> 8), 2U);
@@ -164,7 +167,7 @@ static struct tcp_congestion_ops tcp_highspeed = {
  
  static int __init hstcp_register(void)
  {
-       BUG_ON(sizeof(struct hstcp) > TCP_CA_PRIV_SIZE);
+       BUG_ON(sizeof(struct hstcp) > ICSK_CA_PRIV_SIZE);
         return tcp_register_congestion_control(&tcp_highspeed);
  }
  
diff --git a/net/ipv4/tcp_htcp.c b/net/ipv4/tcp_htcp.c

index 40168275acf9d278c6a6bedb768a2c06640a4a41..e47b37984e951e087cc2185b97336a5e3dadc21f 100644 (file)
--- a/net/ipv4/tcp_htcp.c
+++ b/net/ipv4/tcp_htcp.c
@@ -55,18 +55,21 @@ static inline void htcp_reset(struct htcp *ca)
         ca->snd_cwnd_cnt2 = 0;
  }
  
-static u32 htcp_cwnd_undo(struct tcp_sock *tp)
+static u32 htcp_cwnd_undo(struct sock *sk)
  {
-       struct htcp *ca = tcp_ca(tp);
+       const struct tcp_sock *tp = tcp_sk(sk);
+       struct htcp *ca = inet_csk_ca(sk);
         ca->ccount = ca->undo_ccount;
         ca->maxRTT = ca->undo_maxRTT;
         ca->old_maxB = ca->undo_old_maxB;
         return max(tp->snd_cwnd, (tp->snd_ssthresh<<7)/ca->beta);
  }
  
-static inline void measure_rtt(struct tcp_sock *tp)
+static inline void measure_rtt(struct sock *sk)
  {
-       struct htcp *ca = tcp_ca(tp);
+       const struct inet_connection_sock *icsk = inet_csk(sk);
+       const struct tcp_sock *tp = tcp_sk(sk);
+       struct htcp *ca = inet_csk_ca(sk);
         u32 srtt = tp->srtt>>3;
  
         /* keep track of minimum RTT seen so far, minRTT is zero at first */
@@ -74,7 +77,7 @@ static inline void measure_rtt(struct tcp_sock *tp)
                 ca->minRTT = srtt;
  
         /* max RTT */
-       if (tp->ca_state == TCP_CA_Open && tp->snd_ssthresh < 0xFFFF && ca->ccount > 3) {
+       if (icsk->icsk_ca_state == TCP_CA_Open && tp->snd_ssthresh < 0xFFFF && ca->ccount > 3) {
                 if (ca->maxRTT < ca->minRTT)
                         ca->maxRTT = ca->minRTT;
                 if (ca->maxRTT < srtt && srtt <= ca->maxRTT+HZ/50)
@@ -82,13 +85,16 @@ static inline void measure_rtt(struct tcp_sock *tp)
         }
  }
  
-static void measure_achieved_throughput(struct tcp_sock *tp, u32 pkts_acked)
+static void measure_achieved_throughput(struct sock *sk, u32 pkts_acked)
  {
-       struct htcp *ca = tcp_ca(tp);
+       const struct inet_connection_sock *icsk = inet_csk(sk);
+       const struct tcp_sock *tp = tcp_sk(sk);
+       struct htcp *ca = inet_csk_ca(sk);
         u32 now = tcp_time_stamp;
  
         /* achieved throughput calculations */
-       if (tp->ca_state != TCP_CA_Open && tp->ca_state != TCP_CA_Disorder) {
+       if (icsk->icsk_ca_state != TCP_CA_Open &&
+           icsk->icsk_ca_state != TCP_CA_Disorder) {
                 ca->packetcount = 0;
                 ca->lasttime = now;
                 return;
@@ -173,9 +179,9 @@ static inline void htcp_alpha_update(struct htcp *ca)
   * that point do we really have a real sense of maxRTT (the queues en route
   * were getting just too full now).
   */
-static void htcp_param_update(struct tcp_sock *tp)
+static void htcp_param_update(struct sock *sk)
  {
-       struct htcp *ca = tcp_ca(tp);
+       struct htcp *ca = inet_csk_ca(sk);
         u32 minRTT = ca->minRTT;
         u32 maxRTT = ca->maxRTT;
  
@@ -187,17 +193,19 @@ static void htcp_param_update(struct tcp_sock *tp)
                 ca->maxRTT = minRTT + ((maxRTT-minRTT)*95)/100;
  }
  
-static u32 htcp_recalc_ssthresh(struct tcp_sock *tp)
+static u32 htcp_recalc_ssthresh(struct sock *sk)
  {
-       struct htcp *ca = tcp_ca(tp);
-       htcp_param_update(tp);
+       const struct tcp_sock *tp = tcp_sk(sk);
+       const struct htcp *ca = inet_csk_ca(sk);
+       htcp_param_update(sk);
         return max((tp->snd_cwnd * ca->beta) >> 7, 2U);
  }
  
-static void htcp_cong_avoid(struct tcp_sock *tp, u32 ack, u32 rtt,
+static void htcp_cong_avoid(struct sock *sk, u32 ack, u32 rtt,
                             u32 in_flight, int data_acked)
  {
-       struct htcp *ca = tcp_ca(tp);
+       struct tcp_sock *tp = tcp_sk(sk);
+       struct htcp *ca = inet_csk_ca(sk);
  
         if (in_flight < tp->snd_cwnd)
                 return;
@@ -207,7 +215,7 @@ static void htcp_cong_avoid(struct tcp_sock *tp, u32 ack, u32 rtt,
                 if (tp->snd_cwnd < tp->snd_cwnd_clamp)
                         tp->snd_cwnd++;
         } else {
-               measure_rtt(tp);
+               measure_rtt(sk);
  
                 /* keep track of number of round-trip times since last backoff event */
                 if (ca->snd_cwnd_cnt2++ > tp->snd_cwnd) {
@@ -229,28 +237,29 @@ static void htcp_cong_avoid(struct tcp_sock *tp, u32 ack, u32 rtt,
  }
  
  /* Lower bound on congestion window. */
-static u32 htcp_min_cwnd(struct tcp_sock *tp)
+static u32 htcp_min_cwnd(struct sock *sk)
  {
+       const struct tcp_sock *tp = tcp_sk(sk);
         return tp->snd_ssthresh;
  }
  
  
-static void htcp_init(struct tcp_sock *tp)
+static void htcp_init(struct sock *sk)
  {
-       struct htcp *ca = tcp_ca(tp);
+       struct htcp *ca = inet_csk_ca(sk);
  
         memset(ca, 0, sizeof(struct htcp));
         ca->alpha = ALPHA_BASE;
         ca->beta = BETA_MIN;
  }
  
-static void htcp_state(struct tcp_sock *tp, u8 new_state)
+static void htcp_state(struct sock *sk, u8 new_state)
  {
         switch (new_state) {
         case TCP_CA_CWR:
         case TCP_CA_Recovery:
         case TCP_CA_Loss:
-               htcp_reset(tcp_ca(tp));
+               htcp_reset(inet_csk_ca(sk));
                 break;
         }
  }
@@ -269,7 +278,7 @@ static struct tcp_congestion_ops htcp = {
  
  static int __init htcp_register(void)
  {
-       BUG_ON(sizeof(struct htcp) > TCP_CA_PRIV_SIZE);
+       BUG_ON(sizeof(struct htcp) > ICSK_CA_PRIV_SIZE);
         BUILD_BUG_ON(BETA_MIN >= BETA_MAX);
         if (!use_bandwidth_switch)
                 htcp.pkts_acked = NULL;
diff --git a/net/ipv4/tcp_hybla.c b/net/ipv4/tcp_hybla.c

index 13a66342c304d57db6f2b74fa6dbf04882a8eb2f..77add63623df2a1034a21bfdb6fbaddf82385a71 100644 (file)
--- a/net/ipv4/tcp_hybla.c
+++ b/net/ipv4/tcp_hybla.c
@@ -33,19 +33,20 @@ MODULE_PARM_DESC(rtt0, "reference rout trip time (ms)");
  
  
  /* This is called to refresh values for hybla parameters */
-static inline void hybla_recalc_param (struct tcp_sock *tp)
+static inline void hybla_recalc_param (struct sock *sk)
  {
-       struct hybla *ca = tcp_ca(tp);
+       struct hybla *ca = inet_csk_ca(sk);
  
-       ca->rho_3ls = max_t(u32, tp->srtt / msecs_to_jiffies(rtt0), 8);
+       ca->rho_3ls = max_t(u32, tcp_sk(sk)->srtt / msecs_to_jiffies(rtt0), 8);
         ca->rho = ca->rho_3ls >> 3;
         ca->rho2_7ls = (ca->rho_3ls * ca->rho_3ls) << 1;
         ca->rho2 = ca->rho2_7ls >>7;
  }
  
-static void hybla_init(struct tcp_sock *tp)
+static void hybla_init(struct sock *sk)
  {
-       struct hybla *ca = tcp_ca(tp);
+       struct tcp_sock *tp = tcp_sk(sk);
+       struct hybla *ca = inet_csk_ca(sk);
  
         ca->rho = 0;
         ca->rho2 = 0;
@@ -57,17 +58,16 @@ static void hybla_init(struct tcp_sock *tp)
         tp->snd_cwnd_clamp = 65535;
  
         /* 1st Rho measurement based on initial srtt */
-       hybla_recalc_param(tp);
+       hybla_recalc_param(sk);
  
         /* set minimum rtt as this is the 1st ever seen */
         ca->minrtt = tp->srtt;
         tp->snd_cwnd = ca->rho;
  }
  
-static void hybla_state(struct tcp_sock *tp, u8 ca_state)
+static void hybla_state(struct sock *sk, u8 ca_state)
  {
-       struct hybla *ca = tcp_ca(tp);
-
+       struct hybla *ca = inet_csk_ca(sk);
         ca->hybla_en = (ca_state == TCP_CA_Open);
  }
  
@@ -86,27 +86,28 @@ static inline u32 hybla_fraction(u32 odds)
   *     o Give cwnd a new value based on the model proposed
   *     o remember increments <1
   */
-static void hybla_cong_avoid(struct tcp_sock *tp, u32 ack, u32 rtt,
+static void hybla_cong_avoid(struct sock *sk, u32 ack, u32 rtt,
                             u32 in_flight, int flag)
  {
-       struct hybla *ca = tcp_ca(tp);
+       struct tcp_sock *tp = tcp_sk(sk);
+       struct hybla *ca = inet_csk_ca(sk);
         u32 increment, odd, rho_fractions;
         int is_slowstart = 0;
  
         /*  Recalculate rho only if this srtt is the lowest */
         if (tp->srtt < ca->minrtt){
-               hybla_recalc_param(tp);
+               hybla_recalc_param(sk);
                 ca->minrtt = tp->srtt;
         }
  
         if (!ca->hybla_en)
-               return tcp_reno_cong_avoid(tp, ack, rtt, in_flight, flag);
+               return tcp_reno_cong_avoid(sk, ack, rtt, in_flight, flag);
  
         if (in_flight < tp->snd_cwnd)
                 return;
  
         if (ca->rho == 0)
-               hybla_recalc_param(tp);
+               hybla_recalc_param(sk);
  
         rho_fractions = ca->rho_3ls - (ca->rho << 3);
  
@@ -170,7 +171,7 @@ static struct tcp_congestion_ops tcp_hybla = {
  
  static int __init hybla_register(void)
  {
-       BUG_ON(sizeof(struct hybla) > TCP_CA_PRIV_SIZE);
+       BUG_ON(sizeof(struct hybla) > ICSK_CA_PRIV_SIZE);
         return tcp_register_congestion_control(&tcp_hybla);
  }
  
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c

index 53a8a5399f1e6e6de3b13a05d691612c533f433c..1afb080bdf0cca2956808c26830aaa0b006af0ac 100644 (file)
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -114,20 +114,21 @@ int sysctl_tcp_moderate_rcvbuf = 1;
  /* Adapt the MSS value used to make delayed ack decision to the 
   * real world.
   */ 
-static inline void tcp_measure_rcv_mss(struct tcp_sock *tp,
-                                      struct sk_buff *skb)
+static inline void tcp_measure_rcv_mss(struct sock *sk,
+                                      const struct sk_buff *skb)
  {
-       unsigned int len, lss;
+       struct inet_connection_sock *icsk = inet_csk(sk);
+       const unsigned int lss = icsk->icsk_ack.last_seg_size; 
+       unsigned int len;
  
-       lss = tp->ack.last_seg_size; 
-       tp->ack.last_seg_size = 0; 
+       icsk->icsk_ack.last_seg_size = 0; 
  
         /* skb->len may jitter because of SACKs, even if peer
          * sends good full-sized frames.
          */
         len = skb->len;
-       if (len >= tp->ack.rcv_mss) {
-               tp->ack.rcv_mss = len;
+       if (len >= icsk->icsk_ack.rcv_mss) {
+               icsk->icsk_ack.rcv_mss = len;
         } else {
                 /* Otherwise, we make more careful check taking into account,
                  * that SACKs block is variable.
@@ -147,41 +148,44 @@ static inline void tcp_measure_rcv_mss(struct tcp_sock *tp,
                          * tcp header plus fixed timestamp option length.
                          * Resulting "len" is MSS free of SACK jitter.
                          */
-                       len -= tp->tcp_header_len;
-                       tp->ack.last_seg_size = len;
+                       len -= tcp_sk(sk)->tcp_header_len;
+                       icsk->icsk_ack.last_seg_size = len;
                         if (len == lss) {
-                               tp->ack.rcv_mss = len;
+                               icsk->icsk_ack.rcv_mss = len;
                                 return;
                         }
                 }
-               tp->ack.pending |= TCP_ACK_PUSHED;
+               icsk->icsk_ack.pending |= ICSK_ACK_PUSHED;
         }
  }
  
-static void tcp_incr_quickack(struct tcp_sock *tp)
+static void tcp_incr_quickack(struct sock *sk)
  {
-       unsigned quickacks = tp->rcv_wnd/(2*tp->ack.rcv_mss);
+       struct inet_connection_sock *icsk = inet_csk(sk);
+       unsigned quickacks = tcp_sk(sk)->rcv_wnd / (2 * icsk->icsk_ack.rcv_mss);
  
         if (quickacks==0)
                 quickacks=2;
-       if (quickacks > tp->ack.quick)
-               tp->ack.quick = min(quickacks, TCP_MAX_QUICKACKS);
+       if (quickacks > icsk->icsk_ack.quick)
+               icsk->icsk_ack.quick = min(quickacks, TCP_MAX_QUICKACKS);
  }
  
-void tcp_enter_quickack_mode(struct tcp_sock *tp)
+void tcp_enter_quickack_mode(struct sock *sk)
  {
-       tcp_incr_quickack(tp);
-       tp->ack.pingpong = 0;
-       tp->ack.ato = TCP_ATO_MIN;
+       struct inet_connection_sock *icsk = inet_csk(sk);
+       tcp_incr_quickack(sk);
+       icsk->icsk_ack.pingpong = 0;
+       icsk->icsk_ack.ato = TCP_ATO_MIN;
  }
  
  /* Send ACKs quickly, if "quick" count is not exhausted
   * and the session is not interactive.
   */
  
-static __inline__ int tcp_in_quickack_mode(struct tcp_sock *tp)
+static inline int tcp_in_quickack_mode(const struct sock *sk)
  {
-       return (tp->ack.quick && !tp->ack.pingpong);
+       const struct inet_connection_sock *icsk = inet_csk(sk);
+       return icsk->icsk_ack.quick && !icsk->icsk_ack.pingpong;
  }
  
  /* Buffer size and advertised window tuning.
@@ -224,8 +228,8 @@ static void tcp_fixup_sndbuf(struct sock *sk)
   */
  
  /* Slow part of check#2. */
-static int __tcp_grow_window(struct sock *sk, struct tcp_sock *tp,
-                            struct sk_buff *skb)
+static int __tcp_grow_window(const struct sock *sk, struct tcp_sock *tp,
+                            const struct sk_buff *skb)
  {
         /* Optimize this! */
         int truesize = tcp_win_from_space(skb->truesize)/2;
@@ -233,7 +237,7 @@ static int __tcp_grow_window(struct sock *sk, struct tcp_sock *tp,
  
         while (tp->rcv_ssthresh <= window) {
                 if (truesize <= skb->len)
-                       return 2*tp->ack.rcv_mss;
+                       return 2 * inet_csk(sk)->icsk_ack.rcv_mss;
  
                 truesize >>= 1;
                 window >>= 1;
@@ -260,7 +264,7 @@ static inline void tcp_grow_window(struct sock *sk, struct tcp_sock *tp,
  
                 if (incr) {
                         tp->rcv_ssthresh = min(tp->rcv_ssthresh + incr, tp->window_clamp);
-                       tp->ack.quick |= 1;
+                       inet_csk(sk)->icsk_ack.quick |= 1;
                 }
         }
  }
@@ -321,11 +325,12 @@ static void tcp_init_buffer_space(struct sock *sk)
  /* 5. Recalculate window clamp after socket hit its memory bounds. */
  static void tcp_clamp_window(struct sock *sk, struct tcp_sock *tp)
  {
+       struct inet_connection_sock *icsk = inet_csk(sk);
         struct sk_buff *skb;
         unsigned int app_win = tp->rcv_nxt - tp->copied_seq;
         int ofo_win = 0;
  
-       tp->ack.quick = 0;
+       icsk->icsk_ack.quick = 0;
  
         skb_queue_walk(&tp->out_of_order_queue, skb) {
                 ofo_win += skb->len;
@@ -346,8 +351,8 @@ static void tcp_clamp_window(struct sock *sk, struct tcp_sock *tp)
                 app_win += ofo_win;
                 if (atomic_read(&sk->sk_rmem_alloc) >= 2 * sk->sk_rcvbuf)
                         app_win >>= 1;
-               if (app_win > tp->ack.rcv_mss)
-                       app_win -= tp->ack.rcv_mss;
+               if (app_win > icsk->icsk_ack.rcv_mss)
+                       app_win -= icsk->icsk_ack.rcv_mss;
                 app_win = max(app_win, 2U*tp->advmss);
  
                 if (!ofo_win)
@@ -415,11 +420,12 @@ new_measure:
         tp->rcv_rtt_est.time = tcp_time_stamp;
  }
  
-static inline void tcp_rcv_rtt_measure_ts(struct tcp_sock *tp, struct sk_buff *skb)
+static inline void tcp_rcv_rtt_measure_ts(struct sock *sk, const struct sk_buff *skb)
  {
+       struct tcp_sock *tp = tcp_sk(sk);
         if (tp->rx_opt.rcv_tsecr &&
             (TCP_SKB_CB(skb)->end_seq -
-            TCP_SKB_CB(skb)->seq >= tp->ack.rcv_mss))
+            TCP_SKB_CB(skb)->seq >= inet_csk(sk)->icsk_ack.rcv_mss))
                 tcp_rcv_rtt_update(tp, tcp_time_stamp - tp->rx_opt.rcv_tsecr, 0);
  }
  
@@ -492,41 +498,42 @@ new_measure:
   */
  static void tcp_event_data_recv(struct sock *sk, struct tcp_sock *tp, struct sk_buff *skb)
  {
+       struct inet_connection_sock *icsk = inet_csk(sk);
         u32 now;
  
-       tcp_schedule_ack(tp);
+       inet_csk_schedule_ack(sk);
  
-       tcp_measure_rcv_mss(tp, skb);
+       tcp_measure_rcv_mss(sk, skb);
  
         tcp_rcv_rtt_measure(tp);
         
         now = tcp_time_stamp;
  
-       if (!tp->ack.ato) {
+       if (!icsk->icsk_ack.ato) {
                 /* The _first_ data packet received, initialize
                  * delayed ACK engine.
                  */
-               tcp_incr_quickack(tp);
-               tp->ack.ato = TCP_ATO_MIN;
+               tcp_incr_quickack(sk);
+               icsk->icsk_ack.ato = TCP_ATO_MIN;
         } else {
-               int m = now - tp->ack.lrcvtime;
+               int m = now - icsk->icsk_ack.lrcvtime;
  
                 if (m <= TCP_ATO_MIN/2) {
                         /* The fastest case is the first. */
-                       tp->ack.ato = (tp->ack.ato>>1) + TCP_ATO_MIN/2;
-               } else if (m < tp->ack.ato) {
-                       tp->ack.ato = (tp->ack.ato>>1) + m;
-                       if (tp->ack.ato > tp->rto)
-                               tp->ack.ato = tp->rto;
-               } else if (m > tp->rto) {
+                       icsk->icsk_ack.ato = (icsk->icsk_ack.ato >> 1) + TCP_ATO_MIN / 2;
+               } else if (m < icsk->icsk_ack.ato) {
+                       icsk->icsk_ack.ato = (icsk->icsk_ack.ato >> 1) + m;
+                       if (icsk->icsk_ack.ato > icsk->icsk_rto)
+                               icsk->icsk_ack.ato = icsk->icsk_rto;
+               } else if (m > icsk->icsk_rto) {
                         /* Too long gap. Apparently sender falled to
                          * restart window, so that we send ACKs quickly.
                          */
-                       tcp_incr_quickack(tp);
+                       tcp_incr_quickack(sk);
                         sk_stream_mem_reclaim(sk);
                 }
         }
-       tp->ack.lrcvtime = now;
+       icsk->icsk_ack.lrcvtime = now;
  
         TCP_ECN_check_ce(tp, skb);
  
@@ -543,8 +550,10 @@ static void tcp_event_data_recv(struct sock *sk, struct tcp_sock *tp, struct sk_
   * To save cycles in the RFC 1323 implementation it was better to break
   * it up into three procedures. -- erics
   */
-static void tcp_rtt_estimator(struct tcp_sock *tp, __u32 mrtt, u32 *usrtt)
+static void tcp_rtt_estimator(struct sock *sk, const __u32 mrtt, u32 *usrtt)
  {
+       struct tcp_sock *tp = tcp_sk(sk);
+       const struct inet_connection_sock *icsk = inet_csk(sk);
         long m = mrtt; /* RTT */
  
         /*      The following amusing code comes from Jacobson's
@@ -604,15 +613,16 @@ static void tcp_rtt_estimator(struct tcp_sock *tp, __u32 mrtt, u32 *usrtt)
                 tp->rtt_seq = tp->snd_nxt;
         }
  
-       if (tp->ca_ops->rtt_sample)
-               tp->ca_ops->rtt_sample(tp, *usrtt);
+       if (icsk->icsk_ca_ops->rtt_sample)
+               icsk->icsk_ca_ops->rtt_sample(sk, *usrtt);
  }
  
  /* Calculate rto without backoff.  This is the second half of Van Jacobson's
   * routine referred to above.
   */
-static inline void tcp_set_rto(struct tcp_sock *tp)
+static inline void tcp_set_rto(struct sock *sk)
  {
+       const struct tcp_sock *tp = tcp_sk(sk);
         /* Old crap is replaced with new one. 8)
          *
          * More seriously:
@@ -623,7 +633,7 @@ static inline void tcp_set_rto(struct tcp_sock *tp)
          *    is invisible. Actually, Linux-2.4 also generates erratic
          *    ACKs in some curcumstances.
          */
-       tp->rto = (tp->srtt >> 3) + tp->rttvar;
+       inet_csk(sk)->icsk_rto = (tp->srtt >> 3) + tp->rttvar;
  
         /* 2. Fixups made earlier cannot be right.
          *    If we do not estimate RTO correctly without them,
@@ -635,10 +645,10 @@ static inline void tcp_set_rto(struct tcp_sock *tp)
  /* NOTE: clamping at TCP_RTO_MIN is not required, current algo
   * guarantees that rto is higher.
   */
-static inline void tcp_bound_rto(struct tcp_sock *tp)
+static inline void tcp_bound_rto(struct sock *sk)
  {
-       if (tp->rto > TCP_RTO_MAX)
-               tp->rto = TCP_RTO_MAX;
+       if (inet_csk(sk)->icsk_rto > TCP_RTO_MAX)
+               inet_csk(sk)->icsk_rto = TCP_RTO_MAX;
  }
  
  /* Save metrics learned by this TCP session.
@@ -656,9 +666,10 @@ void tcp_update_metrics(struct sock *sk)
         dst_confirm(dst);
  
         if (dst && (dst->flags&DST_HOST)) {
+               const struct inet_connection_sock *icsk = inet_csk(sk);
                 int m;
  
-               if (tp->backoff || !tp->srtt) {
+               if (icsk->icsk_backoff || !tp->srtt) {
                         /* This session failed to estimate rtt. Why?
                          * Probably, no packets returned in time.
                          * Reset our results.
@@ -707,7 +718,7 @@ void tcp_update_metrics(struct sock *sk)
                             tp->snd_cwnd > dst_metric(dst, RTAX_CWND))
                                 dst->metrics[RTAX_CWND-1] = tp->snd_cwnd;
                 } else if (tp->snd_cwnd > tp->snd_ssthresh &&
-                          tp->ca_state == TCP_CA_Open) {
+                          icsk->icsk_ca_state == TCP_CA_Open) {
                         /* Cong. avoidance phase, cwnd is reliable. */
                         if (!dst_metric_locked(dst, RTAX_SSTHRESH))
                                 dst->metrics[RTAX_SSTHRESH-1] =
@@ -801,9 +812,9 @@ static void tcp_init_metrics(struct sock *sk)
                 tp->mdev = dst_metric(dst, RTAX_RTTVAR);
                 tp->mdev_max = tp->rttvar = max(tp->mdev, TCP_RTO_MIN);
         }
-       tcp_set_rto(tp);
-       tcp_bound_rto(tp);
-       if (tp->rto < TCP_TIMEOUT_INIT && !tp->rx_opt.saw_tstamp)
+       tcp_set_rto(sk);
+       tcp_bound_rto(sk);
+       if (inet_csk(sk)->icsk_rto < TCP_TIMEOUT_INIT && !tp->rx_opt.saw_tstamp)
                 goto reset;
         tp->snd_cwnd = tcp_init_cwnd(tp, dst);
         tp->snd_cwnd_stamp = tcp_time_stamp;
@@ -817,12 +828,14 @@ reset:
         if (!tp->rx_opt.saw_tstamp && tp->srtt) {
                 tp->srtt = 0;
                 tp->mdev = tp->mdev_max = tp->rttvar = TCP_TIMEOUT_INIT;
-               tp->rto = TCP_TIMEOUT_INIT;
+               inet_csk(sk)->icsk_rto = TCP_TIMEOUT_INIT;
         }
  }
  
-static void tcp_update_reordering(struct tcp_sock *tp, int metric, int ts)
+static void tcp_update_reordering(struct sock *sk, const int metric,
+                                 const int ts)
  {
+       struct tcp_sock *tp = tcp_sk(sk);
         if (metric > tp->reordering) {
                 tp->reordering = min(TCP_MAX_REORDERING, metric);
  
@@ -837,7 +850,7 @@ static void tcp_update_reordering(struct tcp_sock *tp, int metric, int ts)
                         NET_INC_STATS_BH(LINUX_MIB_TCPSACKREORDER);
  #if FASTRETRANS_DEBUG > 1
                 printk(KERN_DEBUG "Disorder%d %d %u f%u s%u rr%d\n",
-                      tp->rx_opt.sack_ok, tp->ca_state,
+                      tp->rx_opt.sack_ok, inet_csk(sk)->icsk_ca_state,
                        tp->reordering,
                        tp->fackets_out,
                        tp->sacked_out,
@@ -899,6 +912,7 @@ static void tcp_update_reordering(struct tcp_sock *tp, int metric, int ts)
  static int
  tcp_sacktag_write_queue(struct sock *sk, struct sk_buff *ack_skb, u32 prior_snd_una)
  {
+       const struct inet_connection_sock *icsk = inet_csk(sk);
         struct tcp_sock *tp = tcp_sk(sk);
         unsigned char *ptr = ack_skb->h.raw + TCP_SKB_CB(ack_skb)->sacked;
         struct tcp_sack_block *sp = (struct tcp_sack_block *)(ptr+2);
@@ -1064,7 +1078,7 @@ tcp_sacktag_write_queue(struct sock *sk, struct sk_buff *ack_skb, u32 prior_snd_
          * we have to account for reordering! Ugly,
          * but should help.
          */
-       if (lost_retrans && tp->ca_state == TCP_CA_Recovery) {
+       if (lost_retrans && icsk->icsk_ca_state == TCP_CA_Recovery) {
                 struct sk_buff *skb;
  
                 sk_stream_for_retrans_queue(skb, sk) {
@@ -1093,8 +1107,8 @@ tcp_sacktag_write_queue(struct sock *sk, struct sk_buff *ack_skb, u32 prior_snd_
  
         tp->left_out = tp->sacked_out + tp->lost_out;
  
-       if ((reord < tp->fackets_out) && tp->ca_state != TCP_CA_Loss)
-               tcp_update_reordering(tp, ((tp->fackets_out + 1) - reord), 0);
+       if ((reord < tp->fackets_out) && icsk->icsk_ca_state != TCP_CA_Loss)
+               tcp_update_reordering(sk, ((tp->fackets_out + 1) - reord), 0);
  
  #if FASTRETRANS_DEBUG > 0
         BUG_TRAP((int)tp->sacked_out >= 0);
@@ -1111,17 +1125,18 @@ tcp_sacktag_write_queue(struct sock *sk, struct sk_buff *ack_skb, u32 prior_snd_
   */
  void tcp_enter_frto(struct sock *sk)
  {
+       const struct inet_connection_sock *icsk = inet_csk(sk);
         struct tcp_sock *tp = tcp_sk(sk);
         struct sk_buff *skb;
  
         tp->frto_counter = 1;
  
-       if (tp->ca_state <= TCP_CA_Disorder ||
+       if (icsk->icsk_ca_state <= TCP_CA_Disorder ||
              tp->snd_una == tp->high_seq ||
-            (tp->ca_state == TCP_CA_Loss && !tp->retransmits)) {
-               tp->prior_ssthresh = tcp_current_ssthresh(tp);
-               tp->snd_ssthresh = tp->ca_ops->ssthresh(tp);
-               tcp_ca_event(tp, CA_EVENT_FRTO);
+            (icsk->icsk_ca_state == TCP_CA_Loss && !icsk->icsk_retransmits)) {
+               tp->prior_ssthresh = tcp_current_ssthresh(sk);
+               tp->snd_ssthresh = icsk->icsk_ca_ops->ssthresh(sk);
+               tcp_ca_event(sk, CA_EVENT_FRTO);
         }
  
         /* Have to clear retransmission markers here to keep the bookkeeping
@@ -1138,7 +1153,7 @@ void tcp_enter_frto(struct sock *sk)
         }
         tcp_sync_left_out(tp);
  
-       tcp_set_ca_state(tp, TCP_CA_Open);
+       tcp_set_ca_state(sk, TCP_CA_Open);
         tp->frto_highmark = tp->snd_nxt;
  }
  
@@ -1184,7 +1199,7 @@ static void tcp_enter_frto_loss(struct sock *sk)
  
         tp->reordering = min_t(unsigned int, tp->reordering,
                                              sysctl_tcp_reordering);
-       tcp_set_ca_state(tp, TCP_CA_Loss);
+       tcp_set_ca_state(sk, TCP_CA_Loss);
         tp->high_seq = tp->frto_highmark;
         TCP_ECN_queue_cwr(tp);
  }
@@ -1208,16 +1223,17 @@ void tcp_clear_retrans(struct tcp_sock *tp)
   */
  void tcp_enter_loss(struct sock *sk, int how)
  {
+       const struct inet_connection_sock *icsk = inet_csk(sk);
         struct tcp_sock *tp = tcp_sk(sk);
         struct sk_buff *skb;
         int cnt = 0;
  
         /* Reduce ssthresh if it has not yet been made inside this window. */
-       if (tp->ca_state <= TCP_CA_Disorder || tp->snd_una == tp->high_seq ||
-           (tp->ca_state == TCP_CA_Loss && !tp->retransmits)) {
-               tp->prior_ssthresh = tcp_current_ssthresh(tp);
-               tp->snd_ssthresh = tp->ca_ops->ssthresh(tp);
-               tcp_ca_event(tp, CA_EVENT_LOSS);
+       if (icsk->icsk_ca_state <= TCP_CA_Disorder || tp->snd_una == tp->high_seq ||
+           (icsk->icsk_ca_state == TCP_CA_Loss && !icsk->icsk_retransmits)) {
+               tp->prior_ssthresh = tcp_current_ssthresh(sk);
+               tp->snd_ssthresh = icsk->icsk_ca_ops->ssthresh(sk);
+               tcp_ca_event(sk, CA_EVENT_LOSS);
         }
         tp->snd_cwnd       = 1;
         tp->snd_cwnd_cnt   = 0;
@@ -1248,12 +1264,12 @@ void tcp_enter_loss(struct sock *sk, int how)
  
         tp->reordering = min_t(unsigned int, tp->reordering,
                                              sysctl_tcp_reordering);
-       tcp_set_ca_state(tp, TCP_CA_Loss);
+       tcp_set_ca_state(sk, TCP_CA_Loss);
         tp->high_seq = tp->snd_nxt;
         TCP_ECN_queue_cwr(tp);
  }
  
-static int tcp_check_sack_reneging(struct sock *sk, struct tcp_sock *tp)
+static int tcp_check_sack_reneging(struct sock *sk)
  {
         struct sk_buff *skb;
  
@@ -1265,12 +1281,14 @@ static int tcp_check_sack_reneging(struct sock *sk, struct tcp_sock *tp)
          */
         if ((skb = skb_peek(&sk->sk_write_queue)) != NULL &&
             (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED)) {
+               struct inet_connection_sock *icsk = inet_csk(sk);
                 NET_INC_STATS_BH(LINUX_MIB_TCPSACKRENEGING);
  
                 tcp_enter_loss(sk, 1);
-               tp->retransmits++;
+               icsk->icsk_retransmits++;
                 tcp_retransmit_skb(sk, skb_peek(&sk->sk_write_queue));
-               tcp_reset_xmit_timer(sk, TCP_TIME_RETRANS, tp->rto);
+               inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS,
+                                         icsk->icsk_rto, TCP_RTO_MAX);
                 return 1;
         }
         return 0;
@@ -1281,15 +1299,15 @@ static inline int tcp_fackets_out(struct tcp_sock *tp)
         return IsReno(tp) ? tp->sacked_out+1 : tp->fackets_out;
  }
  
-static inline int tcp_skb_timedout(struct tcp_sock *tp, struct sk_buff *skb)
+static inline int tcp_skb_timedout(struct sock *sk, struct sk_buff *skb)
  {
-       return (tcp_time_stamp - TCP_SKB_CB(skb)->when > tp->rto);
+       return (tcp_time_stamp - TCP_SKB_CB(skb)->when > inet_csk(sk)->icsk_rto);
  }
  
  static inline int tcp_head_timedout(struct sock *sk, struct tcp_sock *tp)
  {
         return tp->packets_out &&
-              tcp_skb_timedout(tp, skb_peek(&sk->sk_write_queue));
+              tcp_skb_timedout(sk, skb_peek(&sk->sk_write_queue));
  }
  
  /* Linux NewReno/SACK/FACK/ECN state machine.
@@ -1423,8 +1441,9 @@ static int tcp_time_to_recover(struct sock *sk, struct tcp_sock *tp)
   * in assumption of absent reordering, interpret this as reordering.
   * The only another reason could be bug in receiver TCP.
   */
-static void tcp_check_reno_reordering(struct tcp_sock *tp, int addend)
+static void tcp_check_reno_reordering(struct sock *sk, const int addend)
  {
+       struct tcp_sock *tp = tcp_sk(sk);
         u32 holes;
  
         holes = max(tp->lost_out, 1U);
@@ -1432,16 +1451,17 @@ static void tcp_check_reno_reordering(struct tcp_sock *tp, int addend)
  
         if ((tp->sacked_out + holes) > tp->packets_out) {
                 tp->sacked_out = tp->packets_out - holes;
-               tcp_update_reordering(tp, tp->packets_out+addend, 0);
+               tcp_update_reordering(sk, tp->packets_out + addend, 0);
         }
  }
  
  /* Emulate SACKs for SACKless connection: account for a new dupack. */
  
-static void tcp_add_reno_sack(struct tcp_sock *tp)
+static void tcp_add_reno_sack(struct sock *sk)
  {
+       struct tcp_sock *tp = tcp_sk(sk);
         tp->sacked_out++;
-       tcp_check_reno_reordering(tp, 0);
+       tcp_check_reno_reordering(sk, 0);
         tcp_sync_left_out(tp);
  }
  
@@ -1456,7 +1476,7 @@ static void tcp_remove_reno_sacks(struct sock *sk, struct tcp_sock *tp, int acke
                 else
                         tp->sacked_out -= acked-1;
         }
-       tcp_check_reno_reordering(tp, acked);
+       tcp_check_reno_reordering(sk, acked);
         tcp_sync_left_out(tp);
  }
  
@@ -1509,7 +1529,7 @@ static void tcp_update_scoreboard(struct sock *sk, struct tcp_sock *tp)
                 struct sk_buff *skb;
  
                 sk_stream_for_retrans_queue(skb, sk) {
-                       if (tcp_skb_timedout(tp, skb) &&
+                       if (tcp_skb_timedout(sk, skb) &&
                             !(TCP_SKB_CB(skb)->sacked&TCPCB_TAGBITS)) {
                                 TCP_SKB_CB(skb)->sacked |= TCPCB_LOST;
                                 tp->lost_out += tcp_skb_pcount(skb);
@@ -1530,14 +1550,16 @@ static inline void tcp_moderate_cwnd(struct tcp_sock *tp)
  }
  
  /* Decrease cwnd each second ack. */
-static void tcp_cwnd_down(struct tcp_sock *tp)
+static void tcp_cwnd_down(struct sock *sk)
  {
+       const struct inet_connection_sock *icsk = inet_csk(sk);
+       struct tcp_sock *tp = tcp_sk(sk);
         int decr = tp->snd_cwnd_cnt + 1;
  
         tp->snd_cwnd_cnt = decr&1;
         decr >>= 1;
  
-       if (decr && tp->snd_cwnd > tp->ca_ops->min_cwnd(tp))
+       if (decr && tp->snd_cwnd > icsk->icsk_ca_ops->min_cwnd(sk))
                 tp->snd_cwnd -= decr;
  
         tp->snd_cwnd = min(tp->snd_cwnd, tcp_packets_in_flight(tp)+1);
@@ -1571,11 +1593,15 @@ static void DBGUNDO(struct sock *sk, struct tcp_sock *tp, const char *msg)
  #define DBGUNDO(x...) do { } while (0)
  #endif
  
-static void tcp_undo_cwr(struct tcp_sock *tp, int undo)
+static void tcp_undo_cwr(struct sock *sk, const int undo)
  {
+       struct tcp_sock *tp = tcp_sk(sk);
+
         if (tp->prior_ssthresh) {
-               if (tp->ca_ops->undo_cwnd)
-                       tp->snd_cwnd = tp->ca_ops->undo_cwnd(tp);
+               const struct inet_connection_sock *icsk = inet_csk(sk);
+
+               if (icsk->icsk_ca_ops->undo_cwnd)
+                       tp->snd_cwnd = icsk->icsk_ca_ops->undo_cwnd(sk);
                 else
                         tp->snd_cwnd = max(tp->snd_cwnd, tp->snd_ssthresh<<1);
  
@@ -1603,9 +1629,9 @@ static int tcp_try_undo_recovery(struct sock *sk, struct tcp_sock *tp)
                 /* Happy end! We did not retransmit anything
                  * or our original transmission succeeded.
                  */
-               DBGUNDO(sk, tp, tp->ca_state == TCP_CA_Loss ? "loss" : "retrans");
-               tcp_undo_cwr(tp, 1);
-               if (tp->ca_state == TCP_CA_Loss)
+               DBGUNDO(sk, tp, inet_csk(sk)->icsk_ca_state == TCP_CA_Loss ? "loss" : "retrans");
+               tcp_undo_cwr(sk, 1);
+               if (inet_csk(sk)->icsk_ca_state == TCP_CA_Loss)
                         NET_INC_STATS_BH(LINUX_MIB_TCPLOSSUNDO);
                 else
                         NET_INC_STATS_BH(LINUX_MIB_TCPFULLUNDO);
@@ -1618,7 +1644,7 @@ static int tcp_try_undo_recovery(struct sock *sk, struct tcp_sock *tp)
                 tcp_moderate_cwnd(tp);
                 return 1;
         }
-       tcp_set_ca_state(tp, TCP_CA_Open);
+       tcp_set_ca_state(sk, TCP_CA_Open);
         return 0;
  }
  
@@ -1627,7 +1653,7 @@ static void tcp_try_undo_dsack(struct sock *sk, struct tcp_sock *tp)
  {
         if (tp->undo_marker && !tp->undo_retrans) {
                 DBGUNDO(sk, tp, "D-SACK");
-               tcp_undo_cwr(tp, 1);
+               tcp_undo_cwr(sk, 1);
                 tp->undo_marker = 0;
                 NET_INC_STATS_BH(LINUX_MIB_TCPDSACKUNDO);
         }
@@ -1648,10 +1674,10 @@ static int tcp_try_undo_partial(struct sock *sk, struct tcp_sock *tp,
                 if (tp->retrans_out == 0)
                         tp->retrans_stamp = 0;
  
-               tcp_update_reordering(tp, tcp_fackets_out(tp)+acked, 1);
+               tcp_update_reordering(sk, tcp_fackets_out(tp) + acked, 1);
  
                 DBGUNDO(sk, tp, "Hoe");
-               tcp_undo_cwr(tp, 0);
+               tcp_undo_cwr(sk, 0);
                 NET_INC_STATS_BH(LINUX_MIB_TCPPARTIALUNDO);
  
                 /* So... Do not make Hoe's retransmit yet.
@@ -1674,22 +1700,23 @@ static int tcp_try_undo_loss(struct sock *sk, struct tcp_sock *tp)
                 DBGUNDO(sk, tp, "partial loss");
                 tp->lost_out = 0;
                 tp->left_out = tp->sacked_out;
-               tcp_undo_cwr(tp, 1);
+               tcp_undo_cwr(sk, 1);
                 NET_INC_STATS_BH(LINUX_MIB_TCPLOSSUNDO);
-               tp->retransmits = 0;
+               inet_csk(sk)->icsk_retransmits = 0;
                 tp->undo_marker = 0;
                 if (!IsReno(tp))
-                       tcp_set_ca_state(tp, TCP_CA_Open);
+                       tcp_set_ca_state(sk, TCP_CA_Open);
                 return 1;
         }
         return 0;
  }
  
-static inline void tcp_complete_cwr(struct tcp_sock *tp)
+static inline void tcp_complete_cwr(struct sock *sk)
  {
+       struct tcp_sock *tp = tcp_sk(sk);
         tp->snd_cwnd = min(tp->snd_cwnd, tp->snd_ssthresh);
         tp->snd_cwnd_stamp = tcp_time_stamp;
-       tcp_ca_event(tp, CA_EVENT_COMPLETE_CWR);
+       tcp_ca_event(sk, CA_EVENT_COMPLETE_CWR);
  }
  
  static void tcp_try_to_open(struct sock *sk, struct tcp_sock *tp, int flag)
@@ -1700,21 +1727,21 @@ static void tcp_try_to_open(struct sock *sk, struct tcp_sock *tp, int flag)
                 tp->retrans_stamp = 0;
  
         if (flag&FLAG_ECE)
-               tcp_enter_cwr(tp);
+               tcp_enter_cwr(sk);
  
-       if (tp->ca_state != TCP_CA_CWR) {
+       if (inet_csk(sk)->icsk_ca_state != TCP_CA_CWR) {
                 int state = TCP_CA_Open;
  
                 if (tp->left_out || tp->retrans_out || tp->undo_marker)
                         state = TCP_CA_Disorder;
  
-               if (tp->ca_state != state) {
-                       tcp_set_ca_state(tp, state);
+               if (inet_csk(sk)->icsk_ca_state != state) {
+                       tcp_set_ca_state(sk, state);
                         tp->high_seq = tp->snd_nxt;
                 }
                 tcp_moderate_cwnd(tp);
         } else {
-               tcp_cwnd_down(tp);
+               tcp_cwnd_down(sk);
         }
  }
  
@@ -1733,6 +1760,7 @@ static void
  tcp_fastretrans_alert(struct sock *sk, u32 prior_snd_una,
                       int prior_packets, int flag)
  {
+       struct inet_connection_sock *icsk = inet_csk(sk);
         struct tcp_sock *tp = tcp_sk(sk);
         int is_dupack = (tp->snd_una == prior_snd_una && !(flag&FLAG_NOT_DUP));
  
@@ -1750,13 +1778,13 @@ tcp_fastretrans_alert(struct sock *sk, u32 prior_snd_una,
                 tp->prior_ssthresh = 0;
  
         /* B. In all the states check for reneging SACKs. */
-       if (tp->sacked_out && tcp_check_sack_reneging(sk, tp))
+       if (tp->sacked_out && tcp_check_sack_reneging(sk))
                 return;
  
         /* C. Process data loss notification, provided it is valid. */
         if ((flag&FLAG_DATA_LOST) &&
             before(tp->snd_una, tp->high_seq) &&
-           tp->ca_state != TCP_CA_Open &&
+           icsk->icsk_ca_state != TCP_CA_Open &&
             tp->fackets_out > tp->reordering) {
                 tcp_mark_head_lost(sk, tp, tp->fackets_out-tp->reordering, tp->high_seq);
                 NET_INC_STATS_BH(LINUX_MIB_TCPLOSS);
@@ -1767,14 +1795,14 @@ tcp_fastretrans_alert(struct sock *sk, u32 prior_snd_una,
  
         /* E. Check state exit conditions. State can be terminated
          *    when high_seq is ACKed. */
-       if (tp->ca_state == TCP_CA_Open) {
+       if (icsk->icsk_ca_state == TCP_CA_Open) {
                 if (!sysctl_tcp_frto)
                         BUG_TRAP(tp->retrans_out == 0);
                 tp->retrans_stamp = 0;
         } else if (!before(tp->snd_una, tp->high_seq)) {
-               switch (tp->ca_state) {
+               switch (icsk->icsk_ca_state) {
                 case TCP_CA_Loss:
-                       tp->retransmits = 0;
+                       icsk->icsk_retransmits = 0;
                         if (tcp_try_undo_recovery(sk, tp))
                                 return;
                         break;
@@ -1783,8 +1811,8 @@ tcp_fastretrans_alert(struct sock *sk, u32 prior_snd_una,
                         /* CWR is to be held something *above* high_seq
                          * is ACKed for CWR bit to reach receiver. */
                         if (tp->snd_una != tp->high_seq) {
-                               tcp_complete_cwr(tp);
-                               tcp_set_ca_state(tp, TCP_CA_Open);
+                               tcp_complete_cwr(sk);
+                               tcp_set_ca_state(sk, TCP_CA_Open);
                         }
                         break;
  
@@ -1795,7 +1823,7 @@ tcp_fastretrans_alert(struct sock *sk, u32 prior_snd_una,
                              * catching for all duplicate ACKs. */
                             IsReno(tp) || tp->snd_una != tp->high_seq) {
                                 tp->undo_marker = 0;
-                               tcp_set_ca_state(tp, TCP_CA_Open);
+                               tcp_set_ca_state(sk, TCP_CA_Open);
                         }
                         break;
  
@@ -1804,17 +1832,17 @@ tcp_fastretrans_alert(struct sock *sk, u32 prior_snd_una,
                                 tcp_reset_reno_sack(tp);
                         if (tcp_try_undo_recovery(sk, tp))
                                 return;
-                       tcp_complete_cwr(tp);
+                       tcp_complete_cwr(sk);
                         break;
                 }
         }
  
         /* F. Process state. */
-       switch (tp->ca_state) {
+       switch (icsk->icsk_ca_state) {
         case TCP_CA_Recovery:
                 if (prior_snd_una == tp->snd_una) {
                         if (IsReno(tp) && is_dupack)
-                               tcp_add_reno_sack(tp);
+                               tcp_add_reno_sack(sk);
                 } else {
                         int acked = prior_packets - tp->packets_out;
                         if (IsReno(tp))
@@ -1824,13 +1852,13 @@ tcp_fastretrans_alert(struct sock *sk, u32 prior_snd_una,
                 break;
         case TCP_CA_Loss:
                 if (flag&FLAG_DATA_ACKED)
-                       tp->retransmits = 0;
+                       icsk->icsk_retransmits = 0;
                 if (!tcp_try_undo_loss(sk, tp)) {
                         tcp_moderate_cwnd(tp);
                         tcp_xmit_retransmit_queue(sk);
                         return;
                 }
-               if (tp->ca_state != TCP_CA_Open)
+               if (icsk->icsk_ca_state != TCP_CA_Open)
                         return;
                 /* Loss is undone; fall through to processing in Open state. */
         default:
@@ -1838,10 +1866,10 @@ tcp_fastretrans_alert(struct sock *sk, u32 prior_snd_una,
                         if (tp->snd_una != prior_snd_una)
                                 tcp_reset_reno_sack(tp);
                         if (is_dupack)
-                               tcp_add_reno_sack(tp);
+                               tcp_add_reno_sack(sk);
                 }
  
-               if (tp->ca_state == TCP_CA_Disorder)
+               if (icsk->icsk_ca_state == TCP_CA_Disorder)
                         tcp_try_undo_dsack(sk, tp);
  
                 if (!tcp_time_to_recover(sk, tp)) {
@@ -1861,30 +1889,28 @@ tcp_fastretrans_alert(struct sock *sk, u32 prior_snd_una,
                 tp->undo_marker = tp->snd_una;
                 tp->undo_retrans = tp->retrans_out;
  
-               if (tp->ca_state < TCP_CA_CWR) {
+               if (icsk->icsk_ca_state < TCP_CA_CWR) {
                         if (!(flag&FLAG_ECE))
-                               tp->prior_ssthresh = tcp_current_ssthresh(tp);
-                       tp->snd_ssthresh = tp->ca_ops->ssthresh(tp);
+                               tp->prior_ssthresh = tcp_current_ssthresh(sk);
+                       tp->snd_ssthresh = icsk->icsk_ca_ops->ssthresh(sk);
                         TCP_ECN_queue_cwr(tp);
                 }
  
                 tp->snd_cwnd_cnt = 0;
-               tcp_set_ca_state(tp, TCP_CA_Recovery);
+               tcp_set_ca_state(sk, TCP_CA_Recovery);
         }
  
         if (is_dupack || tcp_head_timedout(sk, tp))
                 tcp_update_scoreboard(sk, tp);
-       tcp_cwnd_down(tp);
+       tcp_cwnd_down(sk);
         tcp_xmit_retransmit_queue(sk);
  }
  
  /* Read draft-ietf-tcplw-high-performance before mucking
   * with this code. (Superceeds RFC1323)
   */
-static void tcp_ack_saw_tstamp(struct tcp_sock *tp, u32 *usrtt, int flag)
+static void tcp_ack_saw_tstamp(struct sock *sk, u32 *usrtt, int flag)
  {
-       __u32 seq_rtt;
-
         /* RTTM Rule: A TSecr value received in a segment is used to
          * update the averaged RTT measurement only if the segment
          * acknowledges some new data, i.e., only if it advances the
@@ -1900,14 +1926,15 @@ static void tcp_ack_saw_tstamp(struct tcp_sock *tp, u32 *usrtt, int flag)
          * answer arrives rto becomes 120 seconds! If at least one of segments
          * in window is lost... Voila.                          --ANK (010210)
          */
-       seq_rtt = tcp_time_stamp - tp->rx_opt.rcv_tsecr;
-       tcp_rtt_estimator(tp, seq_rtt, usrtt);
-       tcp_set_rto(tp);
-       tp->backoff = 0;
-       tcp_bound_rto(tp);
+       struct tcp_sock *tp = tcp_sk(sk);
+       const __u32 seq_rtt = tcp_time_stamp - tp->rx_opt.rcv_tsecr;
+       tcp_rtt_estimator(sk, seq_rtt, usrtt);
+       tcp_set_rto(sk);
+       inet_csk(sk)->icsk_backoff = 0;
+       tcp_bound_rto(sk);
  }
  
-static void tcp_ack_no_tstamp(struct tcp_sock *tp, u32 seq_rtt, u32 *usrtt, int flag)
+static void tcp_ack_no_tstamp(struct sock *sk, u32 seq_rtt, u32 *usrtt, int flag)
  {
         /* We don't have a timestamp. Can only use
          * packets that are not retransmitted to determine
@@ -1921,27 +1948,29 @@ static void tcp_ack_no_tstamp(struct tcp_sock *tp, u32 seq_rtt, u32 *usrtt, int
         if (flag & FLAG_RETRANS_DATA_ACKED)
                 return;
  
-       tcp_rtt_estimator(tp, seq_rtt, usrtt);
-       tcp_set_rto(tp);
-       tp->backoff = 0;
-       tcp_bound_rto(tp);
+       tcp_rtt_estimator(sk, seq_rtt, usrtt);
+       tcp_set_rto(sk);
+       inet_csk(sk)->icsk_backoff = 0;
+       tcp_bound_rto(sk);
  }
  
-static inline void tcp_ack_update_rtt(struct tcp_sock *tp,
-                                     int flag, s32 seq_rtt, u32 *usrtt)
+static inline void tcp_ack_update_rtt(struct sock *sk, const int flag,
+                                     const s32 seq_rtt, u32 *usrtt)
  {
+       const struct tcp_sock *tp = tcp_sk(sk);
         /* Note that peer MAY send zero echo. In this case it is ignored. (rfc1323) */
         if (tp->rx_opt.saw_tstamp && tp->rx_opt.rcv_tsecr)
-               tcp_ack_saw_tstamp(tp, usrtt, flag);
+               tcp_ack_saw_tstamp(sk, usrtt, flag);
         else if (seq_rtt >= 0)
-               tcp_ack_no_tstamp(tp, seq_rtt, usrtt, flag);
+               tcp_ack_no_tstamp(sk, seq_rtt, usrtt, flag);
  }
  
-static inline void tcp_cong_avoid(struct tcp_sock *tp, u32 ack, u32 rtt,
+static inline void tcp_cong_avoid(struct sock *sk, u32 ack, u32 rtt,
                                   u32 in_flight, int good)
  {
-       tp->ca_ops->cong_avoid(tp, ack, rtt, in_flight, good);
-       tp->snd_cwnd_stamp = tcp_time_stamp;
+       const struct inet_connection_sock *icsk = inet_csk(sk);
+       icsk->icsk_ca_ops->cong_avoid(sk, ack, rtt, in_flight, good);
+       tcp_sk(sk)->snd_cwnd_stamp = tcp_time_stamp;
  }
  
  /* Restart timer after forward progress on connection.
@@ -1951,9 +1980,9 @@ static inline void tcp_cong_avoid(struct tcp_sock *tp, u32 ack, u32 rtt,
  static inline void tcp_ack_packets_out(struct sock *sk, struct tcp_sock *tp)
  {
         if (!tp->packets_out) {
-               tcp_clear_xmit_timer(sk, TCP_TIME_RETRANS);
+               inet_csk_clear_xmit_timer(sk, ICSK_TIME_RETRANS);
         } else {
-               tcp_reset_xmit_timer(sk, TCP_TIME_RETRANS, tp->rto);
+               inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS, inet_csk(sk)->icsk_rto, TCP_RTO_MAX);
         }
  }
  
@@ -2068,9 +2097,13 @@ static int tcp_clean_rtx_queue(struct sock *sk, __s32 *seq_rtt_p, s32 *seq_usrtt
                                 seq_rtt = -1;
                         } else if (seq_rtt < 0)
                                 seq_rtt = now - scb->when;
-                       if (seq_usrtt)
-                               *seq_usrtt = (usnow.tv_sec - skb->stamp.tv_sec) * 1000000
-                                       + (usnow.tv_usec - skb->stamp.tv_usec);
+                       if (seq_usrtt) {
+                               struct timeval tv;
+                       
+                               skb_get_timestamp(skb, &tv);
+                               *seq_usrtt = (usnow.tv_sec - tv.tv_sec) * 1000000
+                                       + (usnow.tv_usec - tv.tv_usec);
+                       }
  
                         if (sacked & TCPCB_SACKED_ACKED)
                                 tp->sacked_out -= tcp_skb_pcount(skb);
@@ -2085,16 +2118,17 @@ static int tcp_clean_rtx_queue(struct sock *sk, __s32 *seq_rtt_p, s32 *seq_usrtt
                         seq_rtt = now - scb->when;
                 tcp_dec_pcount_approx(&tp->fackets_out, skb);
                 tcp_packets_out_dec(tp, skb);
-               __skb_unlink(skb, skb->list);
+               __skb_unlink(skb, &sk->sk_write_queue);
                 sk_stream_free_skb(sk, skb);
         }
  
         if (acked&FLAG_ACKED) {
-               tcp_ack_update_rtt(tp, acked, seq_rtt, seq_usrtt);
+               const struct inet_connection_sock *icsk = inet_csk(sk);
+               tcp_ack_update_rtt(sk, acked, seq_rtt, seq_usrtt);
                 tcp_ack_packets_out(sk, tp);
  
-               if (tp->ca_ops->pkts_acked)
-                       tp->ca_ops->pkts_acked(tp, pkts_acked);
+               if (icsk->icsk_ca_ops->pkts_acked)
+                       icsk->icsk_ca_ops->pkts_acked(sk, pkts_acked);
         }
  
  #if FASTRETRANS_DEBUG > 0
@@ -2102,19 +2136,20 @@ static int tcp_clean_rtx_queue(struct sock *sk, __s32 *seq_rtt_p, s32 *seq_usrtt
         BUG_TRAP((int)tp->lost_out >= 0);
         BUG_TRAP((int)tp->retrans_out >= 0);
         if (!tp->packets_out && tp->rx_opt.sack_ok) {
+               const struct inet_connection_sock *icsk = inet_csk(sk);
                 if (tp->lost_out) {
                         printk(KERN_DEBUG "Leak l=%u %d\n",
-                              tp->lost_out, tp->ca_state);
+                              tp->lost_out, icsk->icsk_ca_state);
                         tp->lost_out = 0;
                 }
                 if (tp->sacked_out) {
                         printk(KERN_DEBUG "Leak s=%u %d\n",
-                              tp->sacked_out, tp->ca_state);
+                              tp->sacked_out, icsk->icsk_ca_state);
                         tp->sacked_out = 0;
                 }
                 if (tp->retrans_out) {
                         printk(KERN_DEBUG "Leak r=%u %d\n",
-                              tp->retrans_out, tp->ca_state);
+                              tp->retrans_out, icsk->icsk_ca_state);
                         tp->retrans_out = 0;
                 }
         }
@@ -2125,40 +2160,43 @@ static int tcp_clean_rtx_queue(struct sock *sk, __s32 *seq_rtt_p, s32 *seq_usrtt
  
  static void tcp_ack_probe(struct sock *sk)
  {
-       struct tcp_sock *tp = tcp_sk(sk);
+       const struct tcp_sock *tp = tcp_sk(sk);
+       struct inet_connection_sock *icsk = inet_csk(sk);
  
         /* Was it a usable window open? */
  
         if (!after(TCP_SKB_CB(sk->sk_send_head)->end_seq,
                    tp->snd_una + tp->snd_wnd)) {
-               tp->backoff = 0;
-               tcp_clear_xmit_timer(sk, TCP_TIME_PROBE0);
+               icsk->icsk_backoff = 0;
+               inet_csk_clear_xmit_timer(sk, ICSK_TIME_PROBE0);
                 /* Socket must be waked up by subsequent tcp_data_snd_check().
                  * This function is not for random using!
                  */
         } else {
-               tcp_reset_xmit_timer(sk, TCP_TIME_PROBE0,
-                                    min(tp->rto << tp->backoff, TCP_RTO_MAX));
+               inet_csk_reset_xmit_timer(sk, ICSK_TIME_PROBE0,
+                                         min(icsk->icsk_rto << icsk->icsk_backoff, TCP_RTO_MAX),
+                                         TCP_RTO_MAX);
         }
  }
  
-static inline int tcp_ack_is_dubious(struct tcp_sock *tp, int flag)
+static inline int tcp_ack_is_dubious(const struct sock *sk, const int flag)
  {
         return (!(flag & FLAG_NOT_DUP) || (flag & FLAG_CA_ALERT) ||
-               tp->ca_state != TCP_CA_Open);
+               inet_csk(sk)->icsk_ca_state != TCP_CA_Open);
  }
  
-static inline int tcp_may_raise_cwnd(struct tcp_sock *tp, int flag)
+static inline int tcp_may_raise_cwnd(const struct sock *sk, const int flag)
  {
+       const struct tcp_sock *tp = tcp_sk(sk);
         return (!(flag & FLAG_ECE) || tp->snd_cwnd < tp->snd_ssthresh) &&
-               !((1<<tp->ca_state)&(TCPF_CA_Recovery|TCPF_CA_CWR));
+               !((1 << inet_csk(sk)->icsk_ca_state) & (TCPF_CA_Recovery | TCPF_CA_CWR));
  }
  
  /* Check that window update is acceptable.
   * The function assumes that snd_una<=ack<=snd_next.
   */
-static inline int tcp_may_update_window(struct tcp_sock *tp, u32 ack,
-                                       u32 ack_seq, u32 nwin)
+static inline int tcp_may_update_window(const struct tcp_sock *tp, const u32 ack,
+                                       const u32 ack_seq, const u32 nwin)
  {
         return (after(ack, tp->snd_una) ||
                 after(ack_seq, tp->snd_wl1) ||
@@ -2241,6 +2279,7 @@ static void tcp_process_frto(struct sock *sk, u32 prior_snd_una)
  /* This routine deals with incoming acks, but not outgoing ones. */
  static int tcp_ack(struct sock *sk, struct sk_buff *skb, int flag)
  {
+       struct inet_connection_sock *icsk = inet_csk(sk);
         struct tcp_sock *tp = tcp_sk(sk);
         u32 prior_snd_una = tp->snd_una;
         u32 ack_seq = TCP_SKB_CB(skb)->seq;
@@ -2268,7 +2307,7 @@ static int tcp_ack(struct sock *sk, struct sk_buff *skb, int flag)
                 tp->snd_una = ack;
                 flag |= FLAG_WIN_UPDATE;
  
-               tcp_ca_event(tp, CA_EVENT_FAST_ACK);
+               tcp_ca_event(sk, CA_EVENT_FAST_ACK);
  
                 NET_INC_STATS_BH(LINUX_MIB_TCPHPACKS);
         } else {
@@ -2285,7 +2324,7 @@ static int tcp_ack(struct sock *sk, struct sk_buff *skb, int flag)
                 if (TCP_ECN_rcv_ecn_echo(tp, skb->h.th))
                         flag |= FLAG_ECE;
  
-               tcp_ca_event(tp, CA_EVENT_SLOW_ACK);
+               tcp_ca_event(sk, CA_EVENT_SLOW_ACK);
         }
  
         /* We passed data and got it acked, remove any soft error
@@ -2301,19 +2340,19 @@ static int tcp_ack(struct sock *sk, struct sk_buff *skb, int flag)
  
         /* See if we can take anything off of the retransmit queue. */
         flag |= tcp_clean_rtx_queue(sk, &seq_rtt,
-                                   tp->ca_ops->rtt_sample ? &seq_usrtt : NULL);
+                                   icsk->icsk_ca_ops->rtt_sample ? &seq_usrtt : NULL);
  
         if (tp->frto_counter)
                 tcp_process_frto(sk, prior_snd_una);
  
-       if (tcp_ack_is_dubious(tp, flag)) {
+       if (tcp_ack_is_dubious(sk, flag)) {
                 /* Advanve CWND, if state allows this. */
-               if ((flag & FLAG_DATA_ACKED) && tcp_may_raise_cwnd(tp, flag))
-                       tcp_cong_avoid(tp, ack,  seq_rtt, prior_in_flight, 0);
+               if ((flag & FLAG_DATA_ACKED) && tcp_may_raise_cwnd(sk, flag))
+                       tcp_cong_avoid(sk, ack,  seq_rtt, prior_in_flight, 0);
                 tcp_fastretrans_alert(sk, prior_snd_una, prior_packets, flag);
         } else {
                 if ((flag & FLAG_DATA_ACKED))
-                       tcp_cong_avoid(tp, ack, seq_rtt, prior_in_flight, 1);
+                       tcp_cong_avoid(sk, ack, seq_rtt, prior_in_flight, 1);
         }
  
         if ((flag & FLAG_FORWARD_PROGRESS) || !(flag&FLAG_NOT_DUP))
@@ -2322,7 +2361,7 @@ static int tcp_ack(struct sock *sk, struct sk_buff *skb, int flag)
         return 1;
  
  no_queue:
-       tp->probes_out = 0;
+       icsk->icsk_probes_out = 0;
  
         /* If this ack opens up a zero window, clear backoff.  It was
          * being used to time the probes, and is probably far higher than
@@ -2500,8 +2539,9 @@ static inline void tcp_replace_ts_recent(struct tcp_sock *tp, u32 seq)
   * up to bandwidth of 18Gigabit/sec. 8) ]
   */
  
-static int tcp_disordered_ack(struct tcp_sock *tp, struct sk_buff *skb)
+static int tcp_disordered_ack(const struct sock *sk, const struct sk_buff *skb)
  {
+       struct tcp_sock *tp = tcp_sk(sk);
         struct tcphdr *th = skb->h.th;
         u32 seq = TCP_SKB_CB(skb)->seq;
         u32 ack = TCP_SKB_CB(skb)->ack_seq;
@@ -2516,14 +2556,15 @@ static int tcp_disordered_ack(struct tcp_sock *tp, struct sk_buff *skb)
                 !tcp_may_update_window(tp, ack, seq, ntohs(th->window) << tp->rx_opt.snd_wscale) &&
  
                 /* 4. ... and sits in replay window. */
-               (s32)(tp->rx_opt.ts_recent - tp->rx_opt.rcv_tsval) <= (tp->rto*1024)/HZ);
+               (s32)(tp->rx_opt.ts_recent - tp->rx_opt.rcv_tsval) <= (inet_csk(sk)->icsk_rto * 1024) / HZ);
  }
  
-static inline int tcp_paws_discard(struct tcp_sock *tp, struct sk_buff *skb)
+static inline int tcp_paws_discard(const struct sock *sk, const struct sk_buff *skb)
  {
+       const struct tcp_sock *tp = tcp_sk(sk);
         return ((s32)(tp->rx_opt.ts_recent - tp->rx_opt.rcv_tsval) > TCP_PAWS_WINDOW &&
                 xtime.tv_sec < tp->rx_opt.ts_recent_stamp + TCP_PAWS_24DAYS &&
-               !tcp_disordered_ack(tp, skb));
+               !tcp_disordered_ack(sk, skb));
  }
  
  /* Check segment sequence number for validity.
@@ -2586,7 +2627,7 @@ static void tcp_fin(struct sk_buff *skb, struct sock *sk, struct tcphdr *th)
  {
         struct tcp_sock *tp = tcp_sk(sk);
  
-       tcp_schedule_ack(tp);
+       inet_csk_schedule_ack(sk);
  
         sk->sk_shutdown |= RCV_SHUTDOWN;
         sock_set_flag(sk, SOCK_DONE);
@@ -2596,7 +2637,7 @@ static void tcp_fin(struct sk_buff *skb, struct sock *sk, struct tcphdr *th)
                 case TCP_ESTABLISHED:
                         /* Move to CLOSE_WAIT */
                         tcp_set_state(sk, TCP_CLOSE_WAIT);
-                       tp->ack.pingpong = 1;
+                       inet_csk(sk)->icsk_ack.pingpong = 1;
                         break;
  
                 case TCP_CLOSE_WAIT:
@@ -2694,7 +2735,7 @@ static void tcp_send_dupack(struct sock *sk, struct sk_buff *skb)
         if (TCP_SKB_CB(skb)->end_seq != TCP_SKB_CB(skb)->seq &&
             before(TCP_SKB_CB(skb)->seq, tp->rcv_nxt)) {
                 NET_INC_STATS_BH(LINUX_MIB_DELAYEDACKLOST);
-               tcp_enter_quickack_mode(tp);
+               tcp_enter_quickack_mode(sk);
  
                 if (tp->rx_opt.sack_ok && sysctl_tcp_dsack) {
                         u32 end_seq = TCP_SKB_CB(skb)->end_seq;
@@ -2853,7 +2894,7 @@ static void tcp_ofo_queue(struct sock *sk)
  
                 if (!after(TCP_SKB_CB(skb)->end_seq, tp->rcv_nxt)) {
                         SOCK_DEBUG(sk, "ofo packet was already received \n");
-                       __skb_unlink(skb, skb->list);
+                       __skb_unlink(skb, &tp->out_of_order_queue);
                         __kfree_skb(skb);
                         continue;
                 }
@@ -2861,7 +2902,7 @@ static void tcp_ofo_queue(struct sock *sk)
                            tp->rcv_nxt, TCP_SKB_CB(skb)->seq,
                            TCP_SKB_CB(skb)->end_seq);
  
-               __skb_unlink(skb, skb->list);
+               __skb_unlink(skb, &tp->out_of_order_queue);
                 __skb_queue_tail(&sk->sk_receive_queue, skb);
                 tp->rcv_nxt = TCP_SKB_CB(skb)->end_seq;
                 if(skb->h.th->fin)
@@ -2942,7 +2983,7 @@ queue_and_out:
                          * gap in queue is filled.
                          */
                         if (skb_queue_empty(&tp->out_of_order_queue))
-                               tp->ack.pingpong = 0;
+                               inet_csk(sk)->icsk_ack.pingpong = 0;
                 }
  
                 if (tp->rx_opt.num_sacks)
@@ -2963,8 +3004,8 @@ queue_and_out:
                 tcp_dsack_set(tp, TCP_SKB_CB(skb)->seq, TCP_SKB_CB(skb)->end_seq);
  
  out_of_window:
-               tcp_enter_quickack_mode(tp);
-               tcp_schedule_ack(tp);
+               tcp_enter_quickack_mode(sk);
+               inet_csk_schedule_ack(sk);
  drop:
                 __kfree_skb(skb);
                 return;
@@ -2974,7 +3015,7 @@ drop:
         if (!before(TCP_SKB_CB(skb)->seq, tp->rcv_nxt + tcp_receive_window(tp)))
                 goto out_of_window;
  
-       tcp_enter_quickack_mode(tp);
+       tcp_enter_quickack_mode(sk);
  
         if (before(TCP_SKB_CB(skb)->seq, tp->rcv_nxt)) {
                 /* Partial packet, seq < rcv_next < end_seq */
@@ -3003,7 +3044,7 @@ drop:
  
         /* Disable header prediction. */
         tp->pred_flags = 0;
-       tcp_schedule_ack(tp);
+       inet_csk_schedule_ack(sk);
  
         SOCK_DEBUG(sk, "out of order segment: rcv_next %X seq %X - %X\n",
                    tp->rcv_nxt, TCP_SKB_CB(skb)->seq, TCP_SKB_CB(skb)->end_seq);
@@ -3027,7 +3068,7 @@ drop:
                 u32 end_seq = TCP_SKB_CB(skb)->end_seq;
  
                 if (seq == TCP_SKB_CB(skb1)->end_seq) {
-                       __skb_append(skb1, skb);
+                       __skb_append(skb1, skb, &tp->out_of_order_queue);
  
                         if (!tp->rx_opt.num_sacks ||
                             tp->selective_acks[0].end_seq != seq)
@@ -3071,7 +3112,7 @@ drop:
                                tcp_dsack_extend(tp, TCP_SKB_CB(skb1)->seq, end_seq);
                                break;
                        }
-                      __skb_unlink(skb1, skb1->list);
+                      __skb_unlink(skb1, &tp->out_of_order_queue);
                        tcp_dsack_extend(tp, TCP_SKB_CB(skb1)->seq, TCP_SKB_CB(skb1)->end_seq);
                        __kfree_skb(skb1);
                 }
@@ -3088,8 +3129,9 @@ add_sack:
   * simplifies code)
   */
  static void
-tcp_collapse(struct sock *sk, struct sk_buff *head,
-            struct sk_buff *tail, u32 start, u32 end)
+tcp_collapse(struct sock *sk, struct sk_buff_head *list,
+            struct sk_buff *head, struct sk_buff *tail,
+            u32 start, u32 end)
  {
         struct sk_buff *skb;
  
@@ -3099,7 +3141,7 @@ tcp_collapse(struct sock *sk, struct sk_buff *head,
                 /* No new bits? It is possible on ofo queue. */
                 if (!before(start, TCP_SKB_CB(skb)->end_seq)) {
                         struct sk_buff *next = skb->next;
-                       __skb_unlink(skb, skb->list);
+                       __skb_unlink(skb, list);
                         __kfree_skb(skb);
                         NET_INC_STATS_BH(LINUX_MIB_TCPRCVCOLLAPSED);
                         skb = next;
@@ -3145,7 +3187,7 @@ tcp_collapse(struct sock *sk, struct sk_buff *head,
                 nskb->mac.raw = nskb->head + (skb->mac.raw-skb->head);
                 memcpy(nskb->cb, skb->cb, sizeof(skb->cb));
                 TCP_SKB_CB(nskb)->seq = TCP_SKB_CB(nskb)->end_seq = start;
-               __skb_insert(nskb, skb->prev, skb, skb->list);
+               __skb_insert(nskb, skb->prev, skb, list);
                 sk_stream_set_owner_r(nskb, sk);
  
                 /* Copy data, releasing collapsed skbs. */
@@ -3164,7 +3206,7 @@ tcp_collapse(struct sock *sk, struct sk_buff *head,
                         }
                         if (!before(start, TCP_SKB_CB(skb)->end_seq)) {
                                 struct sk_buff *next = skb->next;
-                               __skb_unlink(skb, skb->list);
+                               __skb_unlink(skb, list);
                                 __kfree_skb(skb);
                                 NET_INC_STATS_BH(LINUX_MIB_TCPRCVCOLLAPSED);
                                 skb = next;
@@ -3200,7 +3242,8 @@ static void tcp_collapse_ofo_queue(struct sock *sk)
                 if (skb == (struct sk_buff *)&tp->out_of_order_queue ||
                     after(TCP_SKB_CB(skb)->seq, end) ||
                     before(TCP_SKB_CB(skb)->end_seq, start)) {
-                       tcp_collapse(sk, head, skb, start, end);
+                       tcp_collapse(sk, &tp->out_of_order_queue,
+                                    head, skb, start, end);
                         head = skb;
                         if (skb == (struct sk_buff *)&tp->out_of_order_queue)
                                 break;
@@ -3237,7 +3280,8 @@ static int tcp_prune_queue(struct sock *sk)
                 tp->rcv_ssthresh = min(tp->rcv_ssthresh, 4U * tp->advmss);
  
         tcp_collapse_ofo_queue(sk);
-       tcp_collapse(sk, sk->sk_receive_queue.next,
+       tcp_collapse(sk, &sk->sk_receive_queue,
+                    sk->sk_receive_queue.next,
                      (struct sk_buff*)&sk->sk_receive_queue,
                      tp->copied_seq, tp->rcv_nxt);
         sk_stream_mem_reclaim(sk);
@@ -3286,12 +3330,12 @@ void tcp_cwnd_application_limited(struct sock *sk)
  {
         struct tcp_sock *tp = tcp_sk(sk);
  
-       if (tp->ca_state == TCP_CA_Open &&
+       if (inet_csk(sk)->icsk_ca_state == TCP_CA_Open &&
             sk->sk_socket && !test_bit(SOCK_NOSPACE, &sk->sk_socket->flags)) {
                 /* Limited by application or receiver window. */
                 u32 win_used = max(tp->snd_cwnd_used, 2U);
                 if (win_used < tp->snd_cwnd) {
-                       tp->snd_ssthresh = tcp_current_ssthresh(tp);
+                       tp->snd_ssthresh = tcp_current_ssthresh(sk);
                         tp->snd_cwnd = (tp->snd_cwnd + win_used) >> 1;
                 }
                 tp->snd_cwnd_used = 0;
@@ -3370,13 +3414,13 @@ static void __tcp_ack_snd_check(struct sock *sk, int ofo_possible)
         struct tcp_sock *tp = tcp_sk(sk);
  
             /* More than one full frame received... */
-       if (((tp->rcv_nxt - tp->rcv_wup) > tp->ack.rcv_mss
+       if (((tp->rcv_nxt - tp->rcv_wup) > inet_csk(sk)->icsk_ack.rcv_mss
              /* ... and right edge of window advances far enough.
               * (tcp_recvmsg() will send ACK otherwise). Or...
               */
              && __tcp_select_window(sk) >= tp->rcv_wnd) ||
             /* We ACK each frame or... */
-           tcp_in_quickack_mode(tp) ||
+           tcp_in_quickack_mode(sk) ||
             /* We have out of order data. */
             (ofo_possible &&
              skb_peek(&tp->out_of_order_queue))) {
@@ -3390,8 +3434,7 @@ static void __tcp_ack_snd_check(struct sock *sk, int ofo_possible)
  
  static __inline__ void tcp_ack_snd_check(struct sock *sk)
  {
-       struct tcp_sock *tp = tcp_sk(sk);
-       if (!tcp_ack_scheduled(tp)) {
+       if (!inet_csk_ack_scheduled(sk)) {
                 /* We sent a data segment already. */
                 return;
         }
@@ -3462,7 +3505,7 @@ static void tcp_check_urg(struct sock * sk, struct tcphdr * th)
                 struct sk_buff *skb = skb_peek(&sk->sk_receive_queue);
                 tp->copied_seq++;
                 if (skb && !before(tp->copied_seq, TCP_SKB_CB(skb)->end_seq)) {
-                       __skb_unlink(skb, skb->list);
+                       __skb_unlink(skb, &sk->sk_receive_queue);
                         __kfree_skb(skb);
                 }
         }
@@ -3645,7 +3688,7 @@ int tcp_rcv_established(struct sock *sk, struct sk_buff *skb,
                                     tp->rcv_nxt == tp->rcv_wup)
                                         tcp_store_ts_recent(tp);
  
-                               tcp_rcv_rtt_measure_ts(tp, skb);
+                               tcp_rcv_rtt_measure_ts(sk, skb);
  
                                 /* We know that such packets are checksummed
                                  * on entry.
@@ -3678,7 +3721,7 @@ int tcp_rcv_established(struct sock *sk, struct sk_buff *skb,
                                             tp->rcv_nxt == tp->rcv_wup)
                                                 tcp_store_ts_recent(tp);
  
-                                       tcp_rcv_rtt_measure_ts(tp, skb);
+                                       tcp_rcv_rtt_measure_ts(sk, skb);
  
                                         __skb_pull(skb, tcp_header_len);
                                         tp->rcv_nxt = TCP_SKB_CB(skb)->end_seq;
@@ -3699,7 +3742,7 @@ int tcp_rcv_established(struct sock *sk, struct sk_buff *skb,
                                     tp->rcv_nxt == tp->rcv_wup)
                                         tcp_store_ts_recent(tp);
  
-                               tcp_rcv_rtt_measure_ts(tp, skb);
+                               tcp_rcv_rtt_measure_ts(sk, skb);
  
                                 if ((int)skb->truesize > sk->sk_forward_alloc)
                                         goto step5;
@@ -3719,7 +3762,7 @@ int tcp_rcv_established(struct sock *sk, struct sk_buff *skb,
                                 /* Well, only one small jumplet in fast path... */
                                 tcp_ack(sk, skb, FLAG_DATA);
                                 tcp_data_snd_check(sk, tp);
-                               if (!tcp_ack_scheduled(tp))
+                               if (!inet_csk_ack_scheduled(sk))
                                         goto no_ack;
                         }
  
@@ -3741,7 +3784,7 @@ slow_path:
          * RFC1323: H1. Apply PAWS check first.
          */
         if (tcp_fast_parse_options(skb, th, tp) && tp->rx_opt.saw_tstamp &&
-           tcp_paws_discard(tp, skb)) {
+           tcp_paws_discard(sk, skb)) {
                 if (!th->rst) {
                         NET_INC_STATS_BH(LINUX_MIB_PAWSESTABREJECTED);
                         tcp_send_dupack(sk, skb);
@@ -3788,7 +3831,7 @@ step5:
         if(th->ack)
                 tcp_ack(sk, skb, FLAG_SLOWPATH);
  
-       tcp_rcv_rtt_measure_ts(tp, skb);
+       tcp_rcv_rtt_measure_ts(sk, skb);
  
         /* Process urgent data. */
         tcp_urg(sk, skb, th);
@@ -3817,6 +3860,7 @@ static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb,
         tcp_parse_options(skb, &tp->rx_opt, 0);
  
         if (th->ack) {
+               struct inet_connection_sock *icsk;
                 /* rfc793:
                  * "If the state is SYN-SENT then
                  *    first check the ACK bit
@@ -3920,7 +3964,7 @@ static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb,
  
                 tcp_init_metrics(sk);
  
-               tcp_init_congestion_control(tp);
+               tcp_init_congestion_control(sk);
  
                 /* Prevent spurious tcp_cwnd_restart() on first data
                  * packet.
@@ -3930,7 +3974,7 @@ static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb,
                 tcp_init_buffer_space(sk);
  
                 if (sock_flag(sk, SOCK_KEEPOPEN))
-                       tcp_reset_keepalive_timer(sk, keepalive_time_when(tp));
+                       inet_csk_reset_keepalive_timer(sk, keepalive_time_when(tp));
  
                 if (!tp->rx_opt.snd_wscale)
                         __tcp_fast_path_on(tp, tp->snd_wnd);
@@ -3942,7 +3986,11 @@ static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb,
                         sk_wake_async(sk, 0, POLL_OUT);
                 }
  
-               if (sk->sk_write_pending || tp->defer_accept || tp->ack.pingpong) {
+               icsk = inet_csk(sk);
+
+               if (sk->sk_write_pending ||
+                   icsk->icsk_accept_queue.rskq_defer_accept ||
+                   icsk->icsk_ack.pingpong) {
                         /* Save one ACK. Data will be ready after
                          * several ticks, if write_pending is set.
                          *
@@ -3950,12 +3998,13 @@ static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb,
                          * look so _wonderfully_ clever, that I was not able
                          * to stand against the temptation 8)     --ANK
                          */
-                       tcp_schedule_ack(tp);
-                       tp->ack.lrcvtime = tcp_time_stamp;
-                       tp->ack.ato      = TCP_ATO_MIN;
-                       tcp_incr_quickack(tp);
-                       tcp_enter_quickack_mode(tp);
-                       tcp_reset_xmit_timer(sk, TCP_TIME_DACK, TCP_DELACK_MAX);
+                       inet_csk_schedule_ack(sk);
+                       icsk->icsk_ack.lrcvtime = tcp_time_stamp;
+                       icsk->icsk_ack.ato       = TCP_ATO_MIN;
+                       tcp_incr_quickack(sk);
+                       tcp_enter_quickack_mode(sk);
+                       inet_csk_reset_xmit_timer(sk, ICSK_TIME_DACK,
+                                                 TCP_DELACK_MAX, TCP_RTO_MAX);
  
  discard:
                         __kfree_skb(skb);
@@ -4111,7 +4160,7 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb,
         }
  
         if (tcp_fast_parse_options(skb, th, tp) && tp->rx_opt.saw_tstamp &&
-           tcp_paws_discard(tp, skb)) {
+           tcp_paws_discard(sk, skb)) {
                 if (!th->rst) {
                         NET_INC_STATS_BH(LINUX_MIB_PAWSESTABREJECTED);
                         tcp_send_dupack(sk, skb);
@@ -4180,7 +4229,7 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb,
                                  */
                                 if (tp->rx_opt.saw_tstamp && tp->rx_opt.rcv_tsecr &&
                                     !tp->srtt)
-                                       tcp_ack_saw_tstamp(tp, 0, 0);
+                                       tcp_ack_saw_tstamp(sk, NULL, 0);
  
                                 if (tp->rx_opt.tstamp_ok)
                                         tp->advmss -= TCPOLEN_TSTAMP_ALIGNED;
@@ -4192,7 +4241,7 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb,
  
                                 tcp_init_metrics(sk);
  
-                               tcp_init_congestion_control(tp);
+                               tcp_init_congestion_control(sk);
  
                                 /* Prevent spurious tcp_cwnd_restart() on
                                  * first data packet.
@@ -4227,9 +4276,9 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb,
                                                 return 1;
                                         }
  
-                                       tmo = tcp_fin_time(tp);
+                                       tmo = tcp_fin_time(sk);
                                         if (tmo > TCP_TIMEWAIT_LEN) {
-                                               tcp_reset_keepalive_timer(sk, tmo - TCP_TIMEWAIT_LEN);
+                                               inet_csk_reset_keepalive_timer(sk, tmo - TCP_TIMEWAIT_LEN);
                                         } else if (th->fin || sock_owned_by_user(sk)) {
                                                 /* Bad case. We could lose such FIN otherwise.
                                                  * It is not a big problem, but it looks confusing
@@ -4237,7 +4286,7 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb,
                                                  * if it spins in bh_lock_sock(), but it is really
                                                  * marginal case.
                                                  */
-                                               tcp_reset_keepalive_timer(sk, tmo);
+                                               inet_csk_reset_keepalive_timer(sk, tmo);
                                         } else {
                                                 tcp_time_wait(sk, TCP_FIN_WAIT2, tmo);
                                                 goto discard;
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c

index 62f62bb05c2ae479eae4c03ee2986fd43e20b9a4..13dfb391cdf17a376c301c9f56973db93696c1d6 100644 (file)
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -64,7 +64,9 @@
  #include <linux/times.h>
  
  #include <net/icmp.h>
+#include <net/inet_hashtables.h>
  #include <net/tcp.h>
+#include <net/transp_v6.h>
  #include <net/ipv6.h>
  #include <net/inet_common.h>
  #include <net/xfrm.h>
@@ -75,7 +77,6 @@
  #include <linux/proc_fs.h>
  #include <linux/seq_file.h>
  
-extern int sysctl_ip_dynaddr;
  int sysctl_tcp_tw_reuse;
  int sysctl_tcp_low_latency;
  
@@ -88,458 +89,29 @@ static struct socket *tcp_socket;
  void tcp_v4_send_check(struct sock *sk, struct tcphdr *th, int len,
                        struct sk_buff *skb);
  
-struct tcp_hashinfo __cacheline_aligned tcp_hashinfo = {
-       .__tcp_lhash_lock       =       RW_LOCK_UNLOCKED,
-       .__tcp_lhash_users      =       ATOMIC_INIT(0),
-       .__tcp_lhash_wait
-         = __WAIT_QUEUE_HEAD_INITIALIZER(tcp_hashinfo.__tcp_lhash_wait),
-       .__tcp_portalloc_lock   =       SPIN_LOCK_UNLOCKED
+struct inet_hashinfo __cacheline_aligned tcp_hashinfo = {
+       .lhash_lock     = RW_LOCK_UNLOCKED,
+       .lhash_users    = ATOMIC_INIT(0),
+       .lhash_wait     = __WAIT_QUEUE_HEAD_INITIALIZER(tcp_hashinfo.lhash_wait),
+       .portalloc_lock = SPIN_LOCK_UNLOCKED,
+       .port_rover     = 1024 - 1,
  };
  
-/*
- * This array holds the first and last local port number.
- * For high-usage systems, use sysctl to change this to
- * 32768-61000
- */
-int sysctl_local_port_range[2] = { 1024, 4999 };
-int tcp_port_rover = 1024 - 1;
-
-static __inline__ int tcp_hashfn(__u32 laddr, __u16 lport,
-                                __u32 faddr, __u16 fport)
-{
-       int h = (laddr ^ lport) ^ (faddr ^ fport);
-       h ^= h >> 16;
-       h ^= h >> 8;
-       return h & (tcp_ehash_size - 1);
-}
-
-static __inline__ int tcp_sk_hashfn(struct sock *sk)
-{
-       struct inet_sock *inet = inet_sk(sk);
-       __u32 laddr = inet->rcv_saddr;
-       __u16 lport = inet->num;
-       __u32 faddr = inet->daddr;
-       __u16 fport = inet->dport;
-
-       return tcp_hashfn(laddr, lport, faddr, fport);
-}
-
-/* Allocate and initialize a new TCP local port bind bucket.
- * The bindhash mutex for snum's hash chain must be held here.
- */
-struct tcp_bind_bucket *tcp_bucket_create(struct tcp_bind_hashbucket *head,
-                                         unsigned short snum)
-{
-       struct tcp_bind_bucket *tb = kmem_cache_alloc(tcp_bucket_cachep,
-                                                     SLAB_ATOMIC);
-       if (tb) {
-               tb->port = snum;
-               tb->fastreuse = 0;
-               INIT_HLIST_HEAD(&tb->owners);
-               hlist_add_head(&tb->node, &head->chain);
-       }
-       return tb;
-}
-
-/* Caller must hold hashbucket lock for this tb with local BH disabled */
-void tcp_bucket_destroy(struct tcp_bind_bucket *tb)
-{
-       if (hlist_empty(&tb->owners)) {
-               __hlist_del(&tb->node);
-               kmem_cache_free(tcp_bucket_cachep, tb);
-       }
-}
-
-/* Caller must disable local BH processing. */
-static __inline__ void __tcp_inherit_port(struct sock *sk, struct sock *child)
-{
-       struct tcp_bind_hashbucket *head =
-                               &tcp_bhash[tcp_bhashfn(inet_sk(child)->num)];
-       struct tcp_bind_bucket *tb;
-
-       spin_lock(&head->lock);
-       tb = tcp_sk(sk)->bind_hash;
-       sk_add_bind_node(child, &tb->owners);
-       tcp_sk(child)->bind_hash = tb;
-       spin_unlock(&head->lock);
-}
-
-inline void tcp_inherit_port(struct sock *sk, struct sock *child)
-{
-       local_bh_disable();
-       __tcp_inherit_port(sk, child);
-       local_bh_enable();
-}
-
-void tcp_bind_hash(struct sock *sk, struct tcp_bind_bucket *tb,
-                  unsigned short snum)
-{
-       inet_sk(sk)->num = snum;
-       sk_add_bind_node(sk, &tb->owners);
-       tcp_sk(sk)->bind_hash = tb;
-}
-
-static inline int tcp_bind_conflict(struct sock *sk, struct tcp_bind_bucket *tb)
-{
-       const u32 sk_rcv_saddr = tcp_v4_rcv_saddr(sk);
-       struct sock *sk2;
-       struct hlist_node *node;
-       int reuse = sk->sk_reuse;
-
-       sk_for_each_bound(sk2, node, &tb->owners) {
-               if (sk != sk2 &&
-                   !tcp_v6_ipv6only(sk2) &&
-                   (!sk->sk_bound_dev_if ||
-                    !sk2->sk_bound_dev_if ||
-                    sk->sk_bound_dev_if == sk2->sk_bound_dev_if)) {
-                       if (!reuse || !sk2->sk_reuse ||
-                           sk2->sk_state == TCP_LISTEN) {
-                               const u32 sk2_rcv_saddr = tcp_v4_rcv_saddr(sk2);
-                               if (!sk2_rcv_saddr || !sk_rcv_saddr ||
-                                   sk2_rcv_saddr == sk_rcv_saddr)
-                                       break;
-                       }
-               }
-       }
-       return node != NULL;
-}
-
-/* Obtain a reference to a local port for the given sock,
- * if snum is zero it means select any available local port.
- */
  static int tcp_v4_get_port(struct sock *sk, unsigned short snum)
  {
-       struct tcp_bind_hashbucket *head;
-       struct hlist_node *node;
-       struct tcp_bind_bucket *tb;
-       int ret;
-
-       local_bh_disable();
-       if (!snum) {
-               int low = sysctl_local_port_range[0];
-               int high = sysctl_local_port_range[1];
-               int remaining = (high - low) + 1;
-               int rover;
-
-               spin_lock(&tcp_portalloc_lock);
-               if (tcp_port_rover < low)
-                       rover = low;
-               else
-                       rover = tcp_port_rover;
-               do {
-                       rover++;
-                       if (rover > high)
-                               rover = low;
-                       head = &tcp_bhash[tcp_bhashfn(rover)];
-                       spin_lock(&head->lock);
-                       tb_for_each(tb, node, &head->chain)
-                               if (tb->port == rover)
-                                       goto next;
-                       break;
-               next:
-                       spin_unlock(&head->lock);
-               } while (--remaining > 0);
-               tcp_port_rover = rover;
-               spin_unlock(&tcp_portalloc_lock);
-
-               /* Exhausted local port range during search? */
-               ret = 1;
-               if (remaining <= 0)
-                       goto fail;
-
-               /* OK, here is the one we will use.  HEAD is
-                * non-NULL and we hold it's mutex.
-                */
-               snum = rover;
-       } else {
-               head = &tcp_bhash[tcp_bhashfn(snum)];
-               spin_lock(&head->lock);
-               tb_for_each(tb, node, &head->chain)
-                       if (tb->port == snum)
-                               goto tb_found;
-       }
-       tb = NULL;
-       goto tb_not_found;
-tb_found:
-       if (!hlist_empty(&tb->owners)) {
-               if (sk->sk_reuse > 1)
-                       goto success;
-               if (tb->fastreuse > 0 &&
-                   sk->sk_reuse && sk->sk_state != TCP_LISTEN) {
-                       goto success;
-               } else {
-                       ret = 1;
-                       if (tcp_bind_conflict(sk, tb))
-                               goto fail_unlock;
-               }
-       }
-tb_not_found:
-       ret = 1;
-       if (!tb && (tb = tcp_bucket_create(head, snum)) == NULL)
-               goto fail_unlock;
-       if (hlist_empty(&tb->owners)) {
-               if (sk->sk_reuse && sk->sk_state != TCP_LISTEN)
-                       tb->fastreuse = 1;
-               else
-                       tb->fastreuse = 0;
-       } else if (tb->fastreuse &&
-                  (!sk->sk_reuse || sk->sk_state == TCP_LISTEN))
-               tb->fastreuse = 0;
-success:
-       if (!tcp_sk(sk)->bind_hash)
-               tcp_bind_hash(sk, tb, snum);
-       BUG_TRAP(tcp_sk(sk)->bind_hash == tb);
-       ret = 0;
-
-fail_unlock:
-       spin_unlock(&head->lock);
-fail:
-       local_bh_enable();
-       return ret;
-}
-
-/* Get rid of any references to a local port held by the
- * given sock.
- */
-static void __tcp_put_port(struct sock *sk)
-{
-       struct inet_sock *inet = inet_sk(sk);
-       struct tcp_bind_hashbucket *head = &tcp_bhash[tcp_bhashfn(inet->num)];
-       struct tcp_bind_bucket *tb;
-
-       spin_lock(&head->lock);
-       tb = tcp_sk(sk)->bind_hash;
-       __sk_del_bind_node(sk);
-       tcp_sk(sk)->bind_hash = NULL;
-       inet->num = 0;
-       tcp_bucket_destroy(tb);
-       spin_unlock(&head->lock);
-}
-
-void tcp_put_port(struct sock *sk)
-{
-       local_bh_disable();
-       __tcp_put_port(sk);
-       local_bh_enable();
-}
-
-/* This lock without WQ_FLAG_EXCLUSIVE is good on UP and it can be very bad on SMP.
- * Look, when several writers sleep and reader wakes them up, all but one
- * immediately hit write lock and grab all the cpus. Exclusive sleep solves
- * this, _but_ remember, it adds useless work on UP machines (wake up each
- * exclusive lock release). It should be ifdefed really.
- */
-
-void tcp_listen_wlock(void)
-{
-       write_lock(&tcp_lhash_lock);
-
-       if (atomic_read(&tcp_lhash_users)) {
-               DEFINE_WAIT(wait);
-
-               for (;;) {
-                       prepare_to_wait_exclusive(&tcp_lhash_wait,
-                                               &wait, TASK_UNINTERRUPTIBLE);
-                       if (!atomic_read(&tcp_lhash_users))
-                               break;
-                       write_unlock_bh(&tcp_lhash_lock);
-                       schedule();
-                       write_lock_bh(&tcp_lhash_lock);
-               }
-
-               finish_wait(&tcp_lhash_wait, &wait);
-       }
-}
-
-static __inline__ void __tcp_v4_hash(struct sock *sk, const int listen_possible)
-{
-       struct hlist_head *list;
-       rwlock_t *lock;
-
-       BUG_TRAP(sk_unhashed(sk));
-       if (listen_possible && sk->sk_state == TCP_LISTEN) {
-               list = &tcp_listening_hash[tcp_sk_listen_hashfn(sk)];
-               lock = &tcp_lhash_lock;
-               tcp_listen_wlock();
-       } else {
-               list = &tcp_ehash[(sk->sk_hashent = tcp_sk_hashfn(sk))].chain;
-               lock = &tcp_ehash[sk->sk_hashent].lock;
-               write_lock(lock);
-       }
-       __sk_add_node(sk, list);
-       sock_prot_inc_use(sk->sk_prot);
-       write_unlock(lock);
-       if (listen_possible && sk->sk_state == TCP_LISTEN)
-               wake_up(&tcp_lhash_wait);
+       return inet_csk_get_port(&tcp_hashinfo, sk, snum);
  }
  
  static void tcp_v4_hash(struct sock *sk)
  {
-       if (sk->sk_state != TCP_CLOSE) {
-               local_bh_disable();
-               __tcp_v4_hash(sk, 1);
-               local_bh_enable();
-       }
+       inet_hash(&tcp_hashinfo, sk);
  }
  
  void tcp_unhash(struct sock *sk)
  {
-       rwlock_t *lock;
-
-       if (sk_unhashed(sk))
-               goto ende;
-
-       if (sk->sk_state == TCP_LISTEN) {
-               local_bh_disable();
-               tcp_listen_wlock();
-               lock = &tcp_lhash_lock;
-       } else {
-               struct tcp_ehash_bucket *head = &tcp_ehash[sk->sk_hashent];
-               lock = &head->lock;
-               write_lock_bh(&head->lock);
-       }
-
-       if (__sk_del_node_init(sk))
-               sock_prot_dec_use(sk->sk_prot);
-       write_unlock_bh(lock);
-
- ende:
-       if (sk->sk_state == TCP_LISTEN)
-               wake_up(&tcp_lhash_wait);
-}
-
-/* Don't inline this cruft.  Here are some nice properties to
- * exploit here.  The BSD API does not allow a listening TCP
- * to specify the remote port nor the remote address for the
- * connection.  So always assume those are both wildcarded
- * during the search since they can never be otherwise.
- */
-static struct sock *__tcp_v4_lookup_listener(struct hlist_head *head, u32 daddr,
-                                            unsigned short hnum, int dif)
-{
-       struct sock *result = NULL, *sk;
-       struct hlist_node *node;
-       int score, hiscore;
-
-       hiscore=-1;
-       sk_for_each(sk, node, head) {
-               struct inet_sock *inet = inet_sk(sk);
-
-               if (inet->num == hnum && !ipv6_only_sock(sk)) {
-                       __u32 rcv_saddr = inet->rcv_saddr;
-
-                       score = (sk->sk_family == PF_INET ? 1 : 0);
-                       if (rcv_saddr) {
-                               if (rcv_saddr != daddr)
-                                       continue;
-                               score+=2;
-                       }
-                       if (sk->sk_bound_dev_if) {
-                               if (sk->sk_bound_dev_if != dif)
-                                       continue;
-                               score+=2;
-                       }
-                       if (score == 5)
-                               return sk;
-                       if (score > hiscore) {
-                               hiscore = score;
-                               result = sk;
-                       }
-               }
-       }
-       return result;
-}
-
-/* Optimize the common listener case. */
-static inline struct sock *tcp_v4_lookup_listener(u32 daddr,
-               unsigned short hnum, int dif)
-{
-       struct sock *sk = NULL;
-       struct hlist_head *head;
-
-       read_lock(&tcp_lhash_lock);
-       head = &tcp_listening_hash[tcp_lhashfn(hnum)];
-       if (!hlist_empty(head)) {
-               struct inet_sock *inet = inet_sk((sk = __sk_head(head)));
-
-               if (inet->num == hnum && !sk->sk_node.next &&
-                   (!inet->rcv_saddr || inet->rcv_saddr == daddr) &&
-                   (sk->sk_family == PF_INET || !ipv6_only_sock(sk)) &&
-                   !sk->sk_bound_dev_if)
-                       goto sherry_cache;
-               sk = __tcp_v4_lookup_listener(head, daddr, hnum, dif);
-       }
-       if (sk) {
-sherry_cache:
-               sock_hold(sk);
-       }
-       read_unlock(&tcp_lhash_lock);
-       return sk;
-}
-
-/* Sockets in TCP_CLOSE state are _always_ taken out of the hash, so
- * we need not check it for TCP lookups anymore, thanks Alexey. -DaveM
- *
- * Local BH must be disabled here.
- */
-
-static inline struct sock *__tcp_v4_lookup_established(u32 saddr, u16 sport,
-                                                      u32 daddr, u16 hnum,
-                                                      int dif)
-{
-       struct tcp_ehash_bucket *head;
-       TCP_V4_ADDR_COOKIE(acookie, saddr, daddr)
-       __u32 ports = TCP_COMBINED_PORTS(sport, hnum);
-       struct sock *sk;
-       struct hlist_node *node;
-       /* Optimize here for direct hit, only listening connections can
-        * have wildcards anyways.
-        */
-       int hash = tcp_hashfn(daddr, hnum, saddr, sport);
-       head = &tcp_ehash[hash];
-       read_lock(&head->lock);
-       sk_for_each(sk, node, &head->chain) {
-               if (TCP_IPV4_MATCH(sk, acookie, saddr, daddr, ports, dif))
-                       goto hit; /* You sunk my battleship! */
-       }
-
-       /* Must check for a TIME_WAIT'er before going to listener hash. */
-       sk_for_each(sk, node, &(head + tcp_ehash_size)->chain) {
-               if (TCP_IPV4_TW_MATCH(sk, acookie, saddr, daddr, ports, dif))
-                       goto hit;
-       }
-       sk = NULL;
-out:
-       read_unlock(&head->lock);
-       return sk;
-hit:
-       sock_hold(sk);
-       goto out;
-}
-
-static inline struct sock *__tcp_v4_lookup(u32 saddr, u16 sport,
-                                          u32 daddr, u16 hnum, int dif)
-{
-       struct sock *sk = __tcp_v4_lookup_established(saddr, sport,
-                                                     daddr, hnum, dif);
-
-       return sk ? : tcp_v4_lookup_listener(daddr, hnum, dif);
-}
-
-inline struct sock *tcp_v4_lookup(u32 saddr, u16 sport, u32 daddr,
-                                 u16 dport, int dif)
-{
-       struct sock *sk;
-
-       local_bh_disable();
-       sk = __tcp_v4_lookup(saddr, sport, daddr, ntohs(dport), dif);
-       local_bh_enable();
-
-       return sk;
+       inet_unhash(&tcp_hashinfo, sk);
  }
  
-EXPORT_SYMBOL_GPL(tcp_v4_lookup);
-
  static inline __u32 tcp_v4_init_sequence(struct sock *sk, struct sk_buff *skb)
  {
         return secure_tcp_sequence_number(skb->nh.iph->daddr,
@@ -550,27 +122,28 @@ static inline __u32 tcp_v4_init_sequence(struct sock *sk, struct sk_buff *skb)
  
  /* called with local bh disabled */
  static int __tcp_v4_check_established(struct sock *sk, __u16 lport,
-                                     struct tcp_tw_bucket **twp)
+                                     struct inet_timewait_sock **twp)
  {
         struct inet_sock *inet = inet_sk(sk);
         u32 daddr = inet->rcv_saddr;
         u32 saddr = inet->daddr;
         int dif = sk->sk_bound_dev_if;
-       TCP_V4_ADDR_COOKIE(acookie, saddr, daddr)
-       __u32 ports = TCP_COMBINED_PORTS(inet->dport, lport);
-       int hash = tcp_hashfn(daddr, lport, saddr, inet->dport);
-       struct tcp_ehash_bucket *head = &tcp_ehash[hash];
+       INET_ADDR_COOKIE(acookie, saddr, daddr)
+       const __u32 ports = INET_COMBINED_PORTS(inet->dport, lport);
+       const int hash = inet_ehashfn(daddr, lport, saddr, inet->dport, tcp_hashinfo.ehash_size);
+       struct inet_ehash_bucket *head = &tcp_hashinfo.ehash[hash];
         struct sock *sk2;
-       struct hlist_node *node;
-       struct tcp_tw_bucket *tw;
+       const struct hlist_node *node;
+       struct inet_timewait_sock *tw;
  
         write_lock(&head->lock);
  
         /* Check TIME-WAIT sockets first. */
-       sk_for_each(sk2, node, &(head + tcp_ehash_size)->chain) {
-               tw = (struct tcp_tw_bucket *)sk2;
+       sk_for_each(sk2, node, &(head + tcp_hashinfo.ehash_size)->chain) {
+               tw = inet_twsk(sk2);
  
-               if (TCP_IPV4_TW_MATCH(sk2, acookie, saddr, daddr, ports, dif)) {
+               if (INET_TW_MATCH(sk2, acookie, saddr, daddr, ports, dif)) {
+                       const struct tcp_timewait_sock *tcptw = tcp_twsk(sk2);
                         struct tcp_sock *tp = tcp_sk(sk);
  
                         /* With PAWS, it is safe from the viewpoint
@@ -587,15 +160,15 @@ static int __tcp_v4_check_established(struct sock *sk, __u16 lport,
                            fall back to VJ's scheme and use initial
                            timestamp retrieved from peer table.
                          */
-                       if (tw->tw_ts_recent_stamp &&
+                       if (tcptw->tw_ts_recent_stamp &&
                             (!twp || (sysctl_tcp_tw_reuse &&
                                       xtime.tv_sec -
-                                     tw->tw_ts_recent_stamp > 1))) {
-                               if ((tp->write_seq =
-                                               tw->tw_snd_nxt + 65535 + 2) == 0)
+                                     tcptw->tw_ts_recent_stamp > 1))) {
+                               tp->write_seq = tcptw->tw_snd_nxt + 65535 + 2;
+                               if (tp->write_seq == 0)
                                         tp->write_seq = 1;
-                               tp->rx_opt.ts_recent       = tw->tw_ts_recent;
-                               tp->rx_opt.ts_recent_stamp = tw->tw_ts_recent_stamp;
+                               tp->rx_opt.ts_recent       = tcptw->tw_ts_recent;
+                               tp->rx_opt.ts_recent_stamp = tcptw->tw_ts_recent_stamp;
                                 sock_hold(sk2);
                                 goto unique;
                         } else
@@ -606,7 +179,7 @@ static int __tcp_v4_check_established(struct sock *sk, __u16 lport,
  
         /* And established part... */
         sk_for_each(sk2, node, &head->chain) {
-               if (TCP_IPV4_MATCH(sk2, acookie, saddr, daddr, ports, dif))
+               if (INET_MATCH(sk2, acookie, saddr, daddr, ports, dif))
                         goto not_unique;
         }
  
@@ -626,10 +199,10 @@ unique:
                 NET_INC_STATS_BH(LINUX_MIB_TIMEWAITRECYCLED);
         } else if (tw) {
                 /* Silly. Should hash-dance instead... */
-               tcp_tw_deschedule(tw);
+               inet_twsk_deschedule(tw, &tcp_death_row);
                 NET_INC_STATS_BH(LINUX_MIB_TIMEWAITRECYCLED);
  
-               tcp_tw_put(tw);
+               inet_twsk_put(tw);
         }
  
         return 0;
@@ -652,9 +225,9 @@ static inline u32 connect_port_offset(const struct sock *sk)
   */
  static inline int tcp_v4_hash_connect(struct sock *sk)
  {
-       unsigned short snum = inet_sk(sk)->num;
-       struct tcp_bind_hashbucket *head;
-       struct tcp_bind_bucket *tb;
+       const unsigned short snum = inet_sk(sk)->num;
+       struct inet_bind_hashbucket *head;
+       struct inet_bind_bucket *tb;
         int ret;
  
         if (!snum) {
@@ -666,19 +239,19 @@ static inline int tcp_v4_hash_connect(struct sock *sk)
                 static u32 hint;
                 u32 offset = hint + connect_port_offset(sk);
                 struct hlist_node *node;
-               struct tcp_tw_bucket *tw = NULL;
+               struct inet_timewait_sock *tw = NULL;
  
                 local_bh_disable();
                 for (i = 1; i <= range; i++) {
                         port = low + (i + offset) % range;
-                       head = &tcp_bhash[tcp_bhashfn(port)];
+                       head = &tcp_hashinfo.bhash[inet_bhashfn(port, tcp_hashinfo.bhash_size)];
                         spin_lock(&head->lock);
  
                         /* Does not bother with rcv_saddr checks,
                          * because the established check is already
                          * unique enough.
                          */
-                       tb_for_each(tb, node, &head->chain) {
+                       inet_bind_bucket_for_each(tb, node, &head->chain) {
                                 if (tb->port == port) {
                                         BUG_TRAP(!hlist_empty(&tb->owners));
                                         if (tb->fastreuse >= 0)
@@ -691,7 +264,7 @@ static inline int tcp_v4_hash_connect(struct sock *sk)
                                 }
                         }
  
-                       tb = tcp_bucket_create(head, port);
+                       tb = inet_bind_bucket_create(tcp_hashinfo.bind_bucket_cachep, head, port);
                         if (!tb) {
                                 spin_unlock(&head->lock);
                                 break;
@@ -710,27 +283,27 @@ ok:
                 hint += i;
  
                 /* Head lock still held and bh's disabled */
-               tcp_bind_hash(sk, tb, port);
+               inet_bind_hash(sk, tb, port);
                 if (sk_unhashed(sk)) {
                         inet_sk(sk)->sport = htons(port);
-                       __tcp_v4_hash(sk, 0);
+                       __inet_hash(&tcp_hashinfo, sk, 0);
                 }
                 spin_unlock(&head->lock);
  
                 if (tw) {
-                       tcp_tw_deschedule(tw);
-                       tcp_tw_put(tw);
+                       inet_twsk_deschedule(tw, &tcp_death_row);;
+                       inet_twsk_put(tw);
                 }
  
                 ret = 0;
                 goto out;
         }
  
-       head  = &tcp_bhash[tcp_bhashfn(snum)];
-       tb  = tcp_sk(sk)->bind_hash;
+       head = &tcp_hashinfo.bhash[inet_bhashfn(snum, tcp_hashinfo.bhash_size)];
+       tb  = inet_csk(sk)->icsk_bind_hash;
         spin_lock_bh(&head->lock);
         if (sk_head(&tb->owners) == sk && !sk->sk_bind_node.next) {
-               __tcp_v4_hash(sk, 0);
+               __inet_hash(&tcp_hashinfo, sk, 0);
                 spin_unlock_bh(&head->lock);
                 return 0;
         } else {
@@ -793,7 +366,7 @@ int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
                 tp->write_seq              = 0;
         }
  
-       if (sysctl_tcp_tw_recycle &&
+       if (tcp_death_row.sysctl_tw_recycle &&
             !tp->rx_opt.ts_recent_stamp && rt->rt_dst == daddr) {
                 struct inet_peer *peer = rt_get_peer(rt);
  
@@ -832,8 +405,7 @@ int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
                 goto failure;
  
         /* OK, now commit destination to socket.  */
-       __sk_dst_set(sk, &rt->u.dst);
-       tcp_v4_setup_caps(sk, &rt->u.dst);
+       sk_setup_caps(sk, &rt->u.dst);
  
         if (!tp->write_seq)
                 tp->write_seq = secure_tcp_sequence_number(inet->saddr,
@@ -859,53 +431,6 @@ failure:
         return err;
  }
  
-static __inline__ int tcp_v4_iif(struct sk_buff *skb)
-{
-       return ((struct rtable *)skb->dst)->rt_iif;
-}
-
-static __inline__ u32 tcp_v4_synq_hash(u32 raddr, u16 rport, u32 rnd)
-{
-       return (jhash_2words(raddr, (u32) rport, rnd) & (TCP_SYNQ_HSIZE - 1));
-}
-
-static struct request_sock *tcp_v4_search_req(struct tcp_sock *tp,
-                                             struct request_sock ***prevp,
-                                             __u16 rport,
-                                             __u32 raddr, __u32 laddr)
-{
-       struct listen_sock *lopt = tp->accept_queue.listen_opt;
-       struct request_sock *req, **prev;
-
-       for (prev = &lopt->syn_table[tcp_v4_synq_hash(raddr, rport, lopt->hash_rnd)];
-            (req = *prev) != NULL;
-            prev = &req->dl_next) {
-               const struct inet_request_sock *ireq = inet_rsk(req);
-
-               if (ireq->rmt_port == rport &&
-                   ireq->rmt_addr == raddr &&
-                   ireq->loc_addr == laddr &&
-                   TCP_INET_FAMILY(req->rsk_ops->family)) {
-                       BUG_TRAP(!req->sk);
-                       *prevp = prev;
-                       break;
-               }
-       }
-
-       return req;
-}
-
-static void tcp_v4_synq_add(struct sock *sk, struct request_sock *req)
-{
-       struct tcp_sock *tp = tcp_sk(sk);
-       struct listen_sock *lopt = tp->accept_queue.listen_opt;
-       u32 h = tcp_v4_synq_hash(inet_rsk(req)->rmt_addr, inet_rsk(req)->rmt_port, lopt->hash_rnd);
-
-       reqsk_queue_hash_req(&tp->accept_queue, h, req, TCP_TIMEOUT_INIT);
-       tcp_synq_added(sk);
-}
-
-
  /*
   * This routine does path mtu discovery as defined in RFC1191.
   */
@@ -988,14 +513,14 @@ void tcp_v4_err(struct sk_buff *skb, u32 info)
                 return;
         }
  
-       sk = tcp_v4_lookup(iph->daddr, th->dest, iph->saddr,
-                          th->source, tcp_v4_iif(skb));
+       sk = inet_lookup(&tcp_hashinfo, iph->daddr, th->dest, iph->saddr,
+                        th->source, inet_iif(skb));
         if (!sk) {
                 ICMP_INC_STATS_BH(ICMP_MIB_INERRORS);
                 return;
         }
         if (sk->sk_state == TCP_TIME_WAIT) {
-               tcp_tw_put((struct tcp_tw_bucket *)sk);
+               inet_twsk_put((struct inet_timewait_sock *)sk);
                 return;
         }
  
@@ -1049,8 +574,8 @@ void tcp_v4_err(struct sk_buff *skb, u32 info)
                 if (sock_owned_by_user(sk))
                         goto out;
  
-               req = tcp_v4_search_req(tp, &prev, th->dest,
-                                       iph->daddr, iph->saddr);
+               req = inet_csk_search_req(sk, &prev, th->dest,
+                                         iph->daddr, iph->saddr);
                 if (!req)
                         goto out;
  
@@ -1070,7 +595,7 @@ void tcp_v4_err(struct sk_buff *skb, u32 info)
                  * created socket, and POSIX does not want network
                  * errors returned from accept().
                  */
-               tcp_synq_drop(sk, req, prev);
+               inet_csk_reqsk_queue_drop(sk, req, prev);
                 goto out;
  
         case TCP_SYN_SENT:
@@ -1240,12 +765,13 @@ static void tcp_v4_send_ack(struct sk_buff *skb, u32 seq, u32 ack,
  
  static void tcp_v4_timewait_ack(struct sock *sk, struct sk_buff *skb)
  {
-       struct tcp_tw_bucket *tw = (struct tcp_tw_bucket *)sk;
+       struct inet_timewait_sock *tw = inet_twsk(sk);
+       const struct tcp_timewait_sock *tcptw = tcp_twsk(sk);
  
-       tcp_v4_send_ack(skb, tw->tw_snd_nxt, tw->tw_rcv_nxt,
-                       tw->tw_rcv_wnd >> tw->tw_rcv_wscale, tw->tw_ts_recent);
+       tcp_v4_send_ack(skb, tcptw->tw_snd_nxt, tcptw->tw_rcv_nxt,
+                       tcptw->tw_rcv_wnd >> tw->tw_rcv_wscale, tcptw->tw_ts_recent);
  
-       tcp_tw_put(tw);
+       inet_twsk_put(tw);
  }
  
  static void tcp_v4_reqsk_send_ack(struct sk_buff *skb, struct request_sock *req)
@@ -1254,36 +780,6 @@ static void tcp_v4_reqsk_send_ack(struct sk_buff *skb, struct request_sock *req)
                         req->ts_recent);
  }
  
-static struct dst_entry* tcp_v4_route_req(struct sock *sk,
-                                         struct request_sock *req)
-{
-       struct rtable *rt;
-       const struct inet_request_sock *ireq = inet_rsk(req);
-       struct ip_options *opt = inet_rsk(req)->opt;
-       struct flowi fl = { .oif = sk->sk_bound_dev_if,
-                           .nl_u = { .ip4_u =
-                                     { .daddr = ((opt && opt->srr) ?
-                                                 opt->faddr :
-                                                 ireq->rmt_addr),
-                                       .saddr = ireq->loc_addr,
-                                       .tos = RT_CONN_FLAGS(sk) } },
-                           .proto = IPPROTO_TCP,
-                           .uli_u = { .ports =
-                                      { .sport = inet_sk(sk)->sport,
-                                        .dport = ireq->rmt_port } } };
-
-       if (ip_route_output_flow(&rt, &fl, sk, 0)) {
-               IP_INC_STATS_BH(IPSTATS_MIB_OUTNOROUTES);
-               return NULL;
-       }
-       if (opt && opt->is_strictroute && rt->rt_dst != rt->rt_gateway) {
-               ip_rt_put(rt);
-               IP_INC_STATS_BH(IPSTATS_MIB_OUTNOROUTES);
-               return NULL;
-       }
-       return &rt->u.dst;
-}
-
  /*
   *     Send a SYN-ACK after having received an ACK.
   *     This still operates on a request_sock only, not on a big
@@ -1297,7 +793,7 @@ static int tcp_v4_send_synack(struct sock *sk, struct request_sock *req,
         struct sk_buff * skb;
  
         /* First, grab a route. */
-       if (!dst && (dst = tcp_v4_route_req(sk, req)) == NULL)
+       if (!dst && (dst = inet_csk_route_req(sk, req)) == NULL)
                 goto out;
  
         skb = tcp_make_synack(sk, dst, req);
@@ -1399,7 +895,7 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb)
          * limitations, they conserve resources and peer is
          * evidently real one.
          */
-       if (tcp_synq_is_full(sk) && !isn) {
+       if (inet_csk_reqsk_queue_is_full(sk) && !isn) {
  #ifdef CONFIG_SYN_COOKIES
                 if (sysctl_tcp_syncookies) {
                         want_cookie = 1;
@@ -1413,7 +909,7 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb)
          * clogging syn queue with openreqs with exponentially increasing
          * timeout.
          */
-       if (sk_acceptq_is_full(sk) && tcp_synq_young(sk) > 1)
+       if (sk_acceptq_is_full(sk) && inet_csk_reqsk_queue_young(sk) > 1)
                 goto drop;
  
         req = reqsk_alloc(&tcp_request_sock_ops);
@@ -1469,8 +965,8 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb)
                  * are made in the function processing timewait state.
                  */
                 if (tmp_opt.saw_tstamp &&
-                   sysctl_tcp_tw_recycle &&
-                   (dst = tcp_v4_route_req(sk, req)) != NULL &&
+                   tcp_death_row.sysctl_tw_recycle &&
+                   (dst = inet_csk_route_req(sk, req)) != NULL &&
                     (peer = rt_get_peer((struct rtable *)dst)) != NULL &&
                     peer->v4daddr == saddr) {
                         if (xtime.tv_sec < peer->tcp_ts_stamp + TCP_PAWS_MSL &&
@@ -1483,7 +979,7 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb)
                 }
                 /* Kill the following clause, if you dislike this way. */
                 else if (!sysctl_tcp_syncookies &&
-                        (sysctl_max_syn_backlog - tcp_synq_len(sk) <
+                        (sysctl_max_syn_backlog - inet_csk_reqsk_queue_len(sk) <
                           (sysctl_max_syn_backlog >> 2)) &&
                          (!peer || !peer->tcp_ts_stamp) &&
                          (!dst || !dst_metric(dst, RTAX_RTT))) {
@@ -1494,12 +990,10 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb)
                          * to destinations, already remembered
                          * to the moment of synflood.
                          */
-                       NETDEBUG(if (net_ratelimit()) \
-                                       printk(KERN_DEBUG "TCP: drop open "
-                                                         "request from %u.%u."
-                                                         "%u.%u/%u\n", \
-                                              NIPQUAD(saddr),
-                                              ntohs(skb->h.th->source)));
+                       LIMIT_NETDEBUG(KERN_DEBUG "TCP: drop open "
+                                      "request from %u.%u.%u.%u/%u\n",
+                                      NIPQUAD(saddr),
+                                      ntohs(skb->h.th->source));
                         dst_release(dst);
                         goto drop_and_free;
                 }
@@ -1514,7 +1008,7 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb)
         if (want_cookie) {
                 reqsk_free(req);
         } else {
-               tcp_v4_synq_add(sk, req);
+               inet_csk_reqsk_queue_hash_add(sk, req, TCP_TIMEOUT_INIT);
         }
         return 0;
  
@@ -1542,15 +1036,14 @@ struct sock *tcp_v4_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
         if (sk_acceptq_is_full(sk))
                 goto exit_overflow;
  
-       if (!dst && (dst = tcp_v4_route_req(sk, req)) == NULL)
+       if (!dst && (dst = inet_csk_route_req(sk, req)) == NULL)
                 goto exit;
  
         newsk = tcp_create_openreq_child(sk, req, skb);
         if (!newsk)
                 goto exit;
  
-       newsk->sk_dst_cache = dst;
-       tcp_v4_setup_caps(newsk, dst);
+       sk_setup_caps(newsk, dst);
  
         newtp                 = tcp_sk(newsk);
         newinet               = inet_sk(newsk);
@@ -1560,7 +1053,7 @@ struct sock *tcp_v4_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
         newinet->saddr        = ireq->loc_addr;
         newinet->opt          = ireq->opt;
         ireq->opt             = NULL;
-       newinet->mc_index     = tcp_v4_iif(skb);
+       newinet->mc_index     = inet_iif(skb);
         newinet->mc_ttl       = skb->nh.iph->ttl;
         newtp->ext_header_len = 0;
         if (newinet->opt)
@@ -1571,8 +1064,8 @@ struct sock *tcp_v4_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
         newtp->advmss = dst_metric(dst, RTAX_ADVMSS);
         tcp_initialize_rcv_mss(newsk);
  
-       __tcp_v4_hash(newsk, 0);
-       __tcp_inherit_port(sk, newsk);
+       __inet_hash(&tcp_hashinfo, newsk, 0);
+       __inet_inherit_port(&tcp_hashinfo, sk, newsk);
  
         return newsk;
  
@@ -1588,27 +1081,24 @@ static struct sock *tcp_v4_hnd_req(struct sock *sk, struct sk_buff *skb)
  {
         struct tcphdr *th = skb->h.th;
         struct iphdr *iph = skb->nh.iph;
-       struct tcp_sock *tp = tcp_sk(sk);
         struct sock *nsk;
         struct request_sock **prev;
         /* Find possible connection requests. */
-       struct request_sock *req = tcp_v4_search_req(tp, &prev, th->source,
-                                                    iph->saddr, iph->daddr);
+       struct request_sock *req = inet_csk_search_req(sk, &prev, th->source,
+                                                      iph->saddr, iph->daddr);
         if (req)
                 return tcp_check_req(sk, skb, req, prev);
  
-       nsk = __tcp_v4_lookup_established(skb->nh.iph->saddr,
-                                         th->source,
-                                         skb->nh.iph->daddr,
-                                         ntohs(th->dest),
-                                         tcp_v4_iif(skb));
+       nsk = __inet_lookup_established(&tcp_hashinfo, skb->nh.iph->saddr,
+                                       th->source, skb->nh.iph->daddr,
+                                       ntohs(th->dest), inet_iif(skb));
  
         if (nsk) {
                 if (nsk->sk_state != TCP_TIME_WAIT) {
                         bh_lock_sock(nsk);
                         return nsk;
                 }
-               tcp_tw_put((struct tcp_tw_bucket *)nsk);
+               inet_twsk_put((struct inet_timewait_sock *)nsk);
                 return NULL;
         }
  
@@ -1627,8 +1117,7 @@ static int tcp_v4_checksum_init(struct sk_buff *skb)
                                   skb->nh.iph->daddr, skb->csum))
                         return 0;
  
-               NETDEBUG(if (net_ratelimit())
-                               printk(KERN_DEBUG "hw tcp v4 csum failed\n"));
+               LIMIT_NETDEBUG(KERN_DEBUG "hw tcp v4 csum failed\n");
                 skb->ip_summed = CHECKSUM_NONE;
         }
         if (skb->len <= 76) {
@@ -1744,9 +1233,9 @@ int tcp_v4_rcv(struct sk_buff *skb)
         TCP_SKB_CB(skb)->flags   = skb->nh.iph->tos;
         TCP_SKB_CB(skb)->sacked  = 0;
  
-       sk = __tcp_v4_lookup(skb->nh.iph->saddr, th->source,
-                            skb->nh.iph->daddr, ntohs(th->dest),
-                            tcp_v4_iif(skb));
+       sk = __inet_lookup(&tcp_hashinfo, skb->nh.iph->saddr, th->source,
+                          skb->nh.iph->daddr, ntohs(th->dest),
+                          inet_iif(skb));
  
         if (!sk)
                 goto no_tcp_socket;
@@ -1798,24 +1287,26 @@ discard_and_relse:
  
  do_time_wait:
         if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb)) {
-               tcp_tw_put((struct tcp_tw_bucket *) sk);
+               inet_twsk_put((struct inet_timewait_sock *) sk);
                 goto discard_it;
         }
  
         if (skb->len < (th->doff << 2) || tcp_checksum_complete(skb)) {
                 TCP_INC_STATS_BH(TCP_MIB_INERRS);
-               tcp_tw_put((struct tcp_tw_bucket *) sk);
+               inet_twsk_put((struct inet_timewait_sock *) sk);
                 goto discard_it;
         }
-       switch (tcp_timewait_state_process((struct tcp_tw_bucket *)sk,
-                                          skb, th, skb->len)) {
+       switch (tcp_timewait_state_process((struct inet_timewait_sock *)sk,
+                                          skb, th)) {
         case TCP_TW_SYN: {
-               struct sock *sk2 = tcp_v4_lookup_listener(skb->nh.iph->daddr,
-                                                         ntohs(th->dest),
-                                                         tcp_v4_iif(skb));
+               struct sock *sk2 = inet_lookup_listener(&tcp_hashinfo,
+                                                       skb->nh.iph->daddr,
+                                                       ntohs(th->dest),
+                                                       inet_iif(skb));
                 if (sk2) {
-                       tcp_tw_deschedule((struct tcp_tw_bucket *)sk);
-                       tcp_tw_put((struct tcp_tw_bucket *)sk);
+                       inet_twsk_deschedule((struct inet_timewait_sock *)sk,
+                                            &tcp_death_row);
+                       inet_twsk_put((struct inet_timewait_sock *)sk);
                         sk = sk2;
                         goto process;
                 }
@@ -1831,112 +1322,6 @@ do_time_wait:
         goto discard_it;
  }
  
-/* With per-bucket locks this operation is not-atomic, so that
- * this version is not worse.
- */
-static void __tcp_v4_rehash(struct sock *sk)
-{
-       sk->sk_prot->unhash(sk);
-       sk->sk_prot->hash(sk);
-}
-
-static int tcp_v4_reselect_saddr(struct sock *sk)
-{
-       struct inet_sock *inet = inet_sk(sk);
-       int err;
-       struct rtable *rt;
-       __u32 old_saddr = inet->saddr;
-       __u32 new_saddr;
-       __u32 daddr = inet->daddr;
-
-       if (inet->opt && inet->opt->srr)
-               daddr = inet->opt->faddr;
-
-       /* Query new route. */
-       err = ip_route_connect(&rt, daddr, 0,
-                              RT_CONN_FLAGS(sk),
-                              sk->sk_bound_dev_if,
-                              IPPROTO_TCP,
-                              inet->sport, inet->dport, sk);
-       if (err)
-               return err;
-
-       __sk_dst_set(sk, &rt->u.dst);
-       tcp_v4_setup_caps(sk, &rt->u.dst);
-
-       new_saddr = rt->rt_src;
-
-       if (new_saddr == old_saddr)
-               return 0;
-
-       if (sysctl_ip_dynaddr > 1) {
-               printk(KERN_INFO "tcp_v4_rebuild_header(): shifting inet->"
-                                "saddr from %d.%d.%d.%d to %d.%d.%d.%d\n",
-                      NIPQUAD(old_saddr),
-                      NIPQUAD(new_saddr));
-       }
-
-       inet->saddr = new_saddr;
-       inet->rcv_saddr = new_saddr;
-
-       /* XXX The only one ugly spot where we need to
-        * XXX really change the sockets identity after
-        * XXX it has entered the hashes. -DaveM
-        *
-        * Besides that, it does not check for connection
-        * uniqueness. Wait for troubles.
-        */
-       __tcp_v4_rehash(sk);
-       return 0;
-}
-
-int tcp_v4_rebuild_header(struct sock *sk)
-{
-       struct inet_sock *inet = inet_sk(sk);
-       struct rtable *rt = (struct rtable *)__sk_dst_check(sk, 0);
-       u32 daddr;
-       int err;
-
-       /* Route is OK, nothing to do. */
-       if (rt)
-               return 0;
-
-       /* Reroute. */
-       daddr = inet->daddr;
-       if (inet->opt && inet->opt->srr)
-               daddr = inet->opt->faddr;
-
-       {
-               struct flowi fl = { .oif = sk->sk_bound_dev_if,
-                                   .nl_u = { .ip4_u =
-                                             { .daddr = daddr,
-                                               .saddr = inet->saddr,
-                                               .tos = RT_CONN_FLAGS(sk) } },
-                                   .proto = IPPROTO_TCP,
-                                   .uli_u = { .ports =
-                                              { .sport = inet->sport,
-                                                .dport = inet->dport } } };
-                                               
-               err = ip_route_output_flow(&rt, &fl, sk, 0);
-       }
-       if (!err) {
-               __sk_dst_set(sk, &rt->u.dst);
-               tcp_v4_setup_caps(sk, &rt->u.dst);
-               return 0;
-       }
-
-       /* Routing failed... */
-       sk->sk_route_caps = 0;
-
-       if (!sysctl_ip_dynaddr ||
-           sk->sk_state != TCP_SYN_SENT ||
-           (sk->sk_userlocks & SOCK_BINDADDR_LOCK) ||
-           (err = tcp_v4_reselect_saddr(sk)) != 0)
-               sk->sk_err_soft = -err;
-
-       return err;
-}
-
  static void v4_addr2sockaddr(struct sock *sk, struct sockaddr * uaddr)
  {
         struct sockaddr_in *sin = (struct sockaddr_in *) uaddr;
@@ -1985,18 +1370,18 @@ int tcp_v4_remember_stamp(struct sock *sk)
         return 0;
  }
  
-int tcp_v4_tw_remember_stamp(struct tcp_tw_bucket *tw)
+int tcp_v4_tw_remember_stamp(struct inet_timewait_sock *tw)
  {
-       struct inet_peer *peer = NULL;
-
-       peer = inet_getpeer(tw->tw_daddr, 1);
+       struct inet_peer *peer = inet_getpeer(tw->tw_daddr, 1);
  
         if (peer) {
-               if ((s32)(peer->tcp_ts - tw->tw_ts_recent) <= 0 ||
+               const struct tcp_timewait_sock *tcptw = tcp_twsk((struct sock *)tw);
+
+               if ((s32)(peer->tcp_ts - tcptw->tw_ts_recent) <= 0 ||
                     (peer->tcp_ts_stamp + TCP_PAWS_MSL < xtime.tv_sec &&
-                    peer->tcp_ts_stamp <= tw->tw_ts_recent_stamp)) {
-                       peer->tcp_ts_stamp = tw->tw_ts_recent_stamp;
-                       peer->tcp_ts = tw->tw_ts_recent;
+                    peer->tcp_ts_stamp <= tcptw->tw_ts_recent_stamp)) {
+                       peer->tcp_ts_stamp = tcptw->tw_ts_recent_stamp;
+                       peer->tcp_ts       = tcptw->tw_ts_recent;
                 }
                 inet_putpeer(peer);
                 return 1;
@@ -2008,7 +1393,7 @@ int tcp_v4_tw_remember_stamp(struct tcp_tw_bucket *tw)
  struct tcp_func ipv4_specific = {
         .queue_xmit     =       ip_queue_xmit,
         .send_check     =       tcp_v4_send_check,
-       .rebuild_header =       tcp_v4_rebuild_header,
+       .rebuild_header =       inet_sk_rebuild_header,
         .conn_request   =       tcp_v4_conn_request,
         .syn_recv_sock  =       tcp_v4_syn_recv_sock,
         .remember_stamp =       tcp_v4_remember_stamp,
@@ -2024,13 +1409,14 @@ struct tcp_func ipv4_specific = {
   */
  static int tcp_v4_init_sock(struct sock *sk)
  {
+       struct inet_connection_sock *icsk = inet_csk(sk);
         struct tcp_sock *tp = tcp_sk(sk);
  
         skb_queue_head_init(&tp->out_of_order_queue);
         tcp_init_xmit_timers(sk);
         tcp_prequeue_init(tp);
  
-       tp->rto  = TCP_TIMEOUT_INIT;
+       icsk->icsk_rto = TCP_TIMEOUT_INIT;
         tp->mdev = TCP_TIMEOUT_INIT;
  
         /* So many TCP implementations out there (incorrectly) count the
@@ -2048,7 +1434,7 @@ static int tcp_v4_init_sock(struct sock *sk)
         tp->mss_cache = 536;
  
         tp->reordering = sysctl_tcp_reordering;
-       tp->ca_ops = &tcp_init_congestion_ops;
+       icsk->icsk_ca_ops = &tcp_init_congestion_ops;
  
         sk->sk_state = TCP_CLOSE;
  
@@ -2071,7 +1457,7 @@ int tcp_v4_destroy_sock(struct sock *sk)
  
         tcp_clear_xmit_timers(sk);
  
-       tcp_cleanup_congestion_control(tp);
+       tcp_cleanup_congestion_control(sk);
  
         /* Cleanup up the write buffer. */
         sk_stream_writequeue_purge(sk);
@@ -2083,8 +1469,8 @@ int tcp_v4_destroy_sock(struct sock *sk)
         __skb_queue_purge(&tp->ucopy.prequeue);
  
         /* Clean up a referenced TCP bind bucket. */
-       if (tp->bind_hash)
-               tcp_put_port(sk);
+       if (inet_csk(sk)->icsk_bind_hash)
+               inet_put_port(&tcp_hashinfo, sk);
  
         /*
          * If sendmsg cached page exists, toss it.
@@ -2104,13 +1490,13 @@ EXPORT_SYMBOL(tcp_v4_destroy_sock);
  #ifdef CONFIG_PROC_FS
  /* Proc filesystem TCP sock list dumping. */
  
-static inline struct tcp_tw_bucket *tw_head(struct hlist_head *head)
+static inline struct inet_timewait_sock *tw_head(struct hlist_head *head)
  {
         return hlist_empty(head) ? NULL :
-               list_entry(head->first, struct tcp_tw_bucket, tw_node);
+               list_entry(head->first, struct inet_timewait_sock, tw_node);
  }
  
-static inline struct tcp_tw_bucket *tw_next(struct tcp_tw_bucket *tw)
+static inline struct inet_timewait_sock *tw_next(struct inet_timewait_sock *tw)
  {
         return tw->tw_node.next ?
                 hlist_entry(tw->tw_node.next, typeof(*tw), tw_node) : NULL;
@@ -2118,14 +1504,14 @@ static inline struct tcp_tw_bucket *tw_next(struct tcp_tw_bucket *tw)
  
  static void *listening_get_next(struct seq_file *seq, void *cur)
  {
-       struct tcp_sock *tp;
+       struct inet_connection_sock *icsk;
         struct hlist_node *node;
         struct sock *sk = cur;
         struct tcp_iter_state* st = seq->private;
  
         if (!sk) {
                 st->bucket = 0;
-               sk = sk_head(&tcp_listening_hash[0]);
+               sk = sk_head(&tcp_hashinfo.listening_hash[0]);
                 goto get_sk;
         }
  
@@ -2134,7 +1520,7 @@ static void *listening_get_next(struct seq_file *seq, void *cur)
         if (st->state == TCP_SEQ_STATE_OPENREQ) {
                 struct request_sock *req = cur;
  
-               tp = tcp_sk(st->syn_wait_sk);
+               icsk = inet_csk(st->syn_wait_sk);
                 req = req->dl_next;
                 while (1) {
                         while (req) {
@@ -2147,17 +1533,17 @@ static void *listening_get_next(struct seq_file *seq, void *cur)
                         if (++st->sbucket >= TCP_SYNQ_HSIZE)
                                 break;
  get_req:
-                       req = tp->accept_queue.listen_opt->syn_table[st->sbucket];
+                       req = icsk->icsk_accept_queue.listen_opt->syn_table[st->sbucket];
                 }
                 sk        = sk_next(st->syn_wait_sk);
                 st->state = TCP_SEQ_STATE_LISTENING;
-               read_unlock_bh(&tp->accept_queue.syn_wait_lock);
+               read_unlock_bh(&icsk->icsk_accept_queue.syn_wait_lock);
         } else {
-               tp = tcp_sk(sk);
-               read_lock_bh(&tp->accept_queue.syn_wait_lock);
-               if (reqsk_queue_len(&tp->accept_queue))
+               icsk = inet_csk(sk);
+               read_lock_bh(&icsk->icsk_accept_queue.syn_wait_lock);
+               if (reqsk_queue_len(&icsk->icsk_accept_queue))
                         goto start_req;
-               read_unlock_bh(&tp->accept_queue.syn_wait_lock);
+               read_unlock_bh(&icsk->icsk_accept_queue.syn_wait_lock);
                 sk = sk_next(sk);
         }
  get_sk:
@@ -2166,9 +1552,9 @@ get_sk:
                         cur = sk;
                         goto out;
                 }
-               tp = tcp_sk(sk);
-               read_lock_bh(&tp->accept_queue.syn_wait_lock);
-               if (reqsk_queue_len(&tp->accept_queue)) {
+               icsk = inet_csk(sk);
+               read_lock_bh(&icsk->icsk_accept_queue.syn_wait_lock);
+               if (reqsk_queue_len(&icsk->icsk_accept_queue)) {
  start_req:
                         st->uid         = sock_i_uid(sk);
                         st->syn_wait_sk = sk;
@@ -2176,10 +1562,10 @@ start_req:
                         st->sbucket     = 0;
                         goto get_req;
                 }
-               read_unlock_bh(&tp->accept_queue.syn_wait_lock);
+               read_unlock_bh(&icsk->icsk_accept_queue.syn_wait_lock);
         }
-       if (++st->bucket < TCP_LHTABLE_SIZE) {
-               sk = sk_head(&tcp_listening_hash[st->bucket]);
+       if (++st->bucket < INET_LHTABLE_SIZE) {
+               sk = sk_head(&tcp_hashinfo.listening_hash[st->bucket]);
                 goto get_sk;
         }
         cur = NULL;
@@ -2203,16 +1589,16 @@ static void *established_get_first(struct seq_file *seq)
         struct tcp_iter_state* st = seq->private;
         void *rc = NULL;
  
-       for (st->bucket = 0; st->bucket < tcp_ehash_size; ++st->bucket) {
+       for (st->bucket = 0; st->bucket < tcp_hashinfo.ehash_size; ++st->bucket) {
                 struct sock *sk;
                 struct hlist_node *node;
-               struct tcp_tw_bucket *tw;
+               struct inet_timewait_sock *tw;
  
                 /* We can reschedule _before_ having picked the target: */
                 cond_resched_softirq();
  
-               read_lock(&tcp_ehash[st->bucket].lock);
-               sk_for_each(sk, node, &tcp_ehash[st->bucket].chain) {
+               read_lock(&tcp_hashinfo.ehash[st->bucket].lock);
+               sk_for_each(sk, node, &tcp_hashinfo.ehash[st->bucket].chain) {
                         if (sk->sk_family != st->family) {
                                 continue;
                         }
@@ -2220,15 +1606,15 @@ static void *established_get_first(struct seq_file *seq)
                         goto out;
                 }
                 st->state = TCP_SEQ_STATE_TIME_WAIT;
-               tw_for_each(tw, node,
-                           &tcp_ehash[st->bucket + tcp_ehash_size].chain) {
+               inet_twsk_for_each(tw, node,
+                                  &tcp_hashinfo.ehash[st->bucket + tcp_hashinfo.ehash_size].chain) {
                         if (tw->tw_family != st->family) {
                                 continue;
                         }
                         rc = tw;
                         goto out;
                 }
-               read_unlock(&tcp_ehash[st->bucket].lock);
+               read_unlock(&tcp_hashinfo.ehash[st->bucket].lock);
                 st->state = TCP_SEQ_STATE_ESTABLISHED;
         }
  out:
@@ -2238,7 +1624,7 @@ out:
  static void *established_get_next(struct seq_file *seq, void *cur)
  {
         struct sock *sk = cur;
-       struct tcp_tw_bucket *tw;
+       struct inet_timewait_sock *tw;
         struct hlist_node *node;
         struct tcp_iter_state* st = seq->private;
  
@@ -2255,15 +1641,15 @@ get_tw:
                         cur = tw;
                         goto out;
                 }
-               read_unlock(&tcp_ehash[st->bucket].lock);
+               read_unlock(&tcp_hashinfo.ehash[st->bucket].lock);
                 st->state = TCP_SEQ_STATE_ESTABLISHED;
  
                 /* We can reschedule between buckets: */
                 cond_resched_softirq();
  
-               if (++st->bucket < tcp_ehash_size) {
-                       read_lock(&tcp_ehash[st->bucket].lock);
-                       sk = sk_head(&tcp_ehash[st->bucket].chain);
+               if (++st->bucket < tcp_hashinfo.ehash_size) {
+                       read_lock(&tcp_hashinfo.ehash[st->bucket].lock);
+                       sk = sk_head(&tcp_hashinfo.ehash[st->bucket].chain);
                 } else {
                         cur = NULL;
                         goto out;
@@ -2277,7 +1663,7 @@ get_tw:
         }
  
         st->state = TCP_SEQ_STATE_TIME_WAIT;
-       tw = tw_head(&tcp_ehash[st->bucket + tcp_ehash_size].chain);
+       tw = tw_head(&tcp_hashinfo.ehash[st->bucket + tcp_hashinfo.ehash_size].chain);
         goto get_tw;
  found:
         cur = sk;
@@ -2301,12 +1687,12 @@ static void *tcp_get_idx(struct seq_file *seq, loff_t pos)
         void *rc;
         struct tcp_iter_state* st = seq->private;
  
-       tcp_listen_lock();
+       inet_listen_lock(&tcp_hashinfo);
         st->state = TCP_SEQ_STATE_LISTENING;
         rc        = listening_get_idx(seq, &pos);
  
         if (!rc) {
-               tcp_listen_unlock();
+               inet_listen_unlock(&tcp_hashinfo);
                 local_bh_disable();
                 st->state = TCP_SEQ_STATE_ESTABLISHED;
                 rc        = established_get_idx(seq, pos);
@@ -2339,7 +1725,7 @@ static void *tcp_seq_next(struct seq_file *seq, void *v, loff_t *pos)
         case TCP_SEQ_STATE_LISTENING:
                 rc = listening_get_next(seq, v);
                 if (!rc) {
-                       tcp_listen_unlock();
+                       inet_listen_unlock(&tcp_hashinfo);
                         local_bh_disable();
                         st->state = TCP_SEQ_STATE_ESTABLISHED;
                         rc        = established_get_first(seq);
@@ -2362,17 +1748,17 @@ static void tcp_seq_stop(struct seq_file *seq, void *v)
         switch (st->state) {
         case TCP_SEQ_STATE_OPENREQ:
                 if (v) {
-                       struct tcp_sock *tp = tcp_sk(st->syn_wait_sk);
-                       read_unlock_bh(&tp->accept_queue.syn_wait_lock);
+                       struct inet_connection_sock *icsk = inet_csk(st->syn_wait_sk);
+                       read_unlock_bh(&icsk->icsk_accept_queue.syn_wait_lock);
                 }
         case TCP_SEQ_STATE_LISTENING:
                 if (v != SEQ_START_TOKEN)
-                       tcp_listen_unlock();
+                       inet_listen_unlock(&tcp_hashinfo);
                 break;
         case TCP_SEQ_STATE_TIME_WAIT:
         case TCP_SEQ_STATE_ESTABLISHED:
                 if (v)
-                       read_unlock(&tcp_ehash[st->bucket].lock);
+                       read_unlock(&tcp_hashinfo.ehash[st->bucket].lock);
                 local_bh_enable();
                 break;
         }
@@ -2469,18 +1855,19 @@ static void get_tcp4_sock(struct sock *sp, char *tmpbuf, int i)
         int timer_active;
         unsigned long timer_expires;
         struct tcp_sock *tp = tcp_sk(sp);
+       const struct inet_connection_sock *icsk = inet_csk(sp);
         struct inet_sock *inet = inet_sk(sp);
         unsigned int dest = inet->daddr;
         unsigned int src = inet->rcv_saddr;
         __u16 destp = ntohs(inet->dport);
         __u16 srcp = ntohs(inet->sport);
  
-       if (tp->pending == TCP_TIME_RETRANS) {
+       if (icsk->icsk_pending == ICSK_TIME_RETRANS) {
                 timer_active    = 1;
-               timer_expires   = tp->timeout;
-       } else if (tp->pending == TCP_TIME_PROBE0) {
+               timer_expires   = icsk->icsk_timeout;
+       } else if (icsk->icsk_pending == ICSK_TIME_PROBE0) {
                 timer_active    = 4;
-               timer_expires   = tp->timeout;
+               timer_expires   = icsk->icsk_timeout;
         } else if (timer_pending(&sp->sk_timer)) {
                 timer_active    = 2;
                 timer_expires   = sp->sk_timer.expires;
@@ -2495,17 +1882,19 @@ static void get_tcp4_sock(struct sock *sp, char *tmpbuf, int i)
                 tp->write_seq - tp->snd_una, tp->rcv_nxt - tp->copied_seq,
                 timer_active,
                 jiffies_to_clock_t(timer_expires - jiffies),
-               tp->retransmits,
+               icsk->icsk_retransmits,
                 sock_i_uid(sp),
-               tp->probes_out,
+               icsk->icsk_probes_out,
                 sock_i_ino(sp),
                 atomic_read(&sp->sk_refcnt), sp,
-               tp->rto, tp->ack.ato, (tp->ack.quick << 1) | tp->ack.pingpong,
+               icsk->icsk_rto,
+               icsk->icsk_ack.ato,
+               (icsk->icsk_ack.quick << 1) | icsk->icsk_ack.pingpong,
                 tp->snd_cwnd,
                 tp->snd_ssthresh >= 0xFFFF ? -1 : tp->snd_ssthresh);
  }
  
-static void get_timewait4_sock(struct tcp_tw_bucket *tw, char *tmpbuf, int i)
+static void get_timewait4_sock(struct inet_timewait_sock *tw, char *tmpbuf, int i)
  {
         unsigned int dest, src;
         __u16 destp, srcp;
@@ -2585,7 +1974,7 @@ struct proto tcp_prot = {
         .close                  = tcp_close,
         .connect                = tcp_v4_connect,
         .disconnect             = tcp_disconnect,
-       .accept                 = tcp_accept,
+       .accept                 = inet_csk_accept,
         .ioctl                  = tcp_ioctl,
         .init                   = tcp_v4_init_sock,
         .destroy                = tcp_v4_destroy_sock,
@@ -2600,6 +1989,7 @@ struct proto tcp_prot = {
         .get_port               = tcp_v4_get_port,
         .enter_memory_pressure  = tcp_enter_memory_pressure,
         .sockets_allocated      = &tcp_sockets_allocated,
+       .orphan_count           = &tcp_orphan_count,
         .memory_allocated       = &tcp_memory_allocated,
         .memory_pressure        = &tcp_memory_pressure,
         .sysctl_mem             = sysctl_tcp_mem,
@@ -2607,6 +1997,7 @@ struct proto tcp_prot = {
         .sysctl_rmem            = sysctl_tcp_rmem,
         .max_header             = MAX_TCP_HEADER,
         .obj_size               = sizeof(struct tcp_sock),
+       .twsk_obj_size          = sizeof(struct tcp_timewait_sock),
         .rsk_prot               = &tcp_request_sock_ops,
  };
  
@@ -2628,19 +2019,13 @@ void __init tcp_v4_init(struct net_proto_family *ops)
  }
  
  EXPORT_SYMBOL(ipv4_specific);
-EXPORT_SYMBOL(tcp_bind_hash);
-EXPORT_SYMBOL(tcp_bucket_create);
+EXPORT_SYMBOL(inet_bind_bucket_create);
  EXPORT_SYMBOL(tcp_hashinfo);
-EXPORT_SYMBOL(tcp_inherit_port);
-EXPORT_SYMBOL(tcp_listen_wlock);
-EXPORT_SYMBOL(tcp_port_rover);
  EXPORT_SYMBOL(tcp_prot);
-EXPORT_SYMBOL(tcp_put_port);
  EXPORT_SYMBOL(tcp_unhash);
  EXPORT_SYMBOL(tcp_v4_conn_request);
  EXPORT_SYMBOL(tcp_v4_connect);
  EXPORT_SYMBOL(tcp_v4_do_rcv);
-EXPORT_SYMBOL(tcp_v4_rebuild_header);
  EXPORT_SYMBOL(tcp_v4_remember_stamp);
  EXPORT_SYMBOL(tcp_v4_send_check);
  EXPORT_SYMBOL(tcp_v4_syn_recv_sock);
diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c

index f42a284164b794aefec97bb296a115bc5dbc231a..a88db28b0af7d83db35c516cabcc337169a88aa3 100644 (file)
--- a/net/ipv4/tcp_minisocks.c
+++ b/net/ipv4/tcp_minisocks.c
@@ -35,13 +35,27 @@
  #define SYNC_INIT 1
  #endif
  
-int sysctl_tcp_tw_recycle;
-int sysctl_tcp_max_tw_buckets = NR_FILE*2;
-
  int sysctl_tcp_syncookies = SYNC_INIT; 
  int sysctl_tcp_abort_on_overflow;
  
-static void tcp_tw_schedule(struct tcp_tw_bucket *tw, int timeo);
+struct inet_timewait_death_row tcp_death_row = {
+       .sysctl_max_tw_buckets = NR_FILE * 2,
+       .period         = TCP_TIMEWAIT_LEN / INET_TWDR_TWKILL_SLOTS,
+       .death_lock     = SPIN_LOCK_UNLOCKED,
+       .hashinfo       = &tcp_hashinfo,
+       .tw_timer       = TIMER_INITIALIZER(inet_twdr_hangman, 0,
+                                           (unsigned long)&tcp_death_row),
+       .twkill_work    = __WORK_INITIALIZER(tcp_death_row.twkill_work,
+                                            inet_twdr_twkill_work,
+                                            &tcp_death_row),
+/* Short-time timewait calendar */
+
+       .twcal_hand     = -1,
+       .twcal_timer    = TIMER_INITIALIZER(inet_twdr_twcal_tick, 0,
+                                           (unsigned long)&tcp_death_row),
+};
+
+EXPORT_SYMBOL_GPL(tcp_death_row);
  
  static __inline__ int tcp_in_window(u32 seq, u32 end_seq, u32 s_win, u32 e_win)
  {
@@ -52,47 +66,6 @@ static __inline__ int tcp_in_window(u32 seq, u32 end_seq, u32 s_win, u32 e_win)
         return (seq == e_win && seq == end_seq);
  }
  
-/* New-style handling of TIME_WAIT sockets. */
-
-int tcp_tw_count;
-
-
-/* Must be called with locally disabled BHs. */
-static void tcp_timewait_kill(struct tcp_tw_bucket *tw)
-{
-       struct tcp_ehash_bucket *ehead;
-       struct tcp_bind_hashbucket *bhead;
-       struct tcp_bind_bucket *tb;
-
-       /* Unlink from established hashes. */
-       ehead = &tcp_ehash[tw->tw_hashent];
-       write_lock(&ehead->lock);
-       if (hlist_unhashed(&tw->tw_node)) {
-               write_unlock(&ehead->lock);
-               return;
-       }
-       __hlist_del(&tw->tw_node);
-       sk_node_init(&tw->tw_node);
-       write_unlock(&ehead->lock);
-
-       /* Disassociate with bind bucket. */
-       bhead = &tcp_bhash[tcp_bhashfn(tw->tw_num)];
-       spin_lock(&bhead->lock);
-       tb = tw->tw_tb;
-       __hlist_del(&tw->tw_bind_node);
-       tw->tw_tb = NULL;
-       tcp_bucket_destroy(tb);
-       spin_unlock(&bhead->lock);
-
-#ifdef INET_REFCNT_DEBUG
-       if (atomic_read(&tw->tw_refcnt) != 1) {
-               printk(KERN_DEBUG "tw_bucket %p refcnt=%d\n", tw,
-                      atomic_read(&tw->tw_refcnt));
-       }
-#endif
-       tcp_tw_put(tw);
-}
-
  /* 
   * * Main purpose of TIME-WAIT state is to close connection gracefully,
   *   when one of ends sits in LAST-ACK or CLOSING retransmitting FIN
@@ -122,19 +95,20 @@ static void tcp_timewait_kill(struct tcp_tw_bucket *tw)
   * to avoid misread sequence numbers, states etc.  --ANK
   */
  enum tcp_tw_status
-tcp_timewait_state_process(struct tcp_tw_bucket *tw, struct sk_buff *skb,
-                          struct tcphdr *th, unsigned len)
+tcp_timewait_state_process(struct inet_timewait_sock *tw, struct sk_buff *skb,
+                          const struct tcphdr *th)
  {
+       struct tcp_timewait_sock *tcptw = tcp_twsk((struct sock *)tw);
         struct tcp_options_received tmp_opt;
         int paws_reject = 0;
  
         tmp_opt.saw_tstamp = 0;
-       if (th->doff > (sizeof(struct tcphdr) >> 2) && tw->tw_ts_recent_stamp) {
+       if (th->doff > (sizeof(*th) >> 2) && tcptw->tw_ts_recent_stamp) {
                 tcp_parse_options(skb, &tmp_opt, 0);
  
                 if (tmp_opt.saw_tstamp) {
-                       tmp_opt.ts_recent          = tw->tw_ts_recent;
-                       tmp_opt.ts_recent_stamp = tw->tw_ts_recent_stamp;
+                       tmp_opt.ts_recent       = tcptw->tw_ts_recent;
+                       tmp_opt.ts_recent_stamp = tcptw->tw_ts_recent_stamp;
                         paws_reject = tcp_paws_check(&tmp_opt, th->rst);
                 }
         }
@@ -145,20 +119,20 @@ tcp_timewait_state_process(struct tcp_tw_bucket *tw, struct sk_buff *skb,
                 /* Out of window, send ACK */
                 if (paws_reject ||
                     !tcp_in_window(TCP_SKB_CB(skb)->seq, TCP_SKB_CB(skb)->end_seq,
-                                  tw->tw_rcv_nxt,
-                                  tw->tw_rcv_nxt + tw->tw_rcv_wnd))
+                                  tcptw->tw_rcv_nxt,
+                                  tcptw->tw_rcv_nxt + tcptw->tw_rcv_wnd))
                         return TCP_TW_ACK;
  
                 if (th->rst)
                         goto kill;
  
-               if (th->syn && !before(TCP_SKB_CB(skb)->seq, tw->tw_rcv_nxt))
+               if (th->syn && !before(TCP_SKB_CB(skb)->seq, tcptw->tw_rcv_nxt))
                         goto kill_with_rst;
  
                 /* Dup ACK? */
-               if (!after(TCP_SKB_CB(skb)->end_seq, tw->tw_rcv_nxt) ||
+               if (!after(TCP_SKB_CB(skb)->end_seq, tcptw->tw_rcv_nxt) ||
                     TCP_SKB_CB(skb)->end_seq == TCP_SKB_CB(skb)->seq) {
-                       tcp_tw_put(tw);
+                       inet_twsk_put(tw);
                         return TCP_TW_SUCCESS;
                 }
  
@@ -166,19 +140,19 @@ tcp_timewait_state_process(struct tcp_tw_bucket *tw, struct sk_buff *skb,
                  * reset.
                  */
                 if (!th->fin ||
-                   TCP_SKB_CB(skb)->end_seq != tw->tw_rcv_nxt + 1) {
+                   TCP_SKB_CB(skb)->end_seq != tcptw->tw_rcv_nxt + 1) {
  kill_with_rst:
-                       tcp_tw_deschedule(tw);
-                       tcp_tw_put(tw);
+                       inet_twsk_deschedule(tw, &tcp_death_row);
+                       inet_twsk_put(tw);
                         return TCP_TW_RST;
                 }
  
                 /* FIN arrived, enter true time-wait state. */
-               tw->tw_substate = TCP_TIME_WAIT;
-               tw->tw_rcv_nxt  = TCP_SKB_CB(skb)->end_seq;
+               tw->tw_substate   = TCP_TIME_WAIT;
+               tcptw->tw_rcv_nxt = TCP_SKB_CB(skb)->end_seq;
                 if (tmp_opt.saw_tstamp) {
-                       tw->tw_ts_recent_stamp  = xtime.tv_sec;
-                       tw->tw_ts_recent        = tmp_opt.rcv_tsval;
+                       tcptw->tw_ts_recent_stamp = xtime.tv_sec;
+                       tcptw->tw_ts_recent       = tmp_opt.rcv_tsval;
                 }
  
                 /* I am shamed, but failed to make it more elegant.
@@ -187,11 +161,13 @@ kill_with_rst:
                  * do not undertsnad recycling in any case, it not
                  * a big problem in practice. --ANK */
                 if (tw->tw_family == AF_INET &&
-                   sysctl_tcp_tw_recycle && tw->tw_ts_recent_stamp &&
+                   tcp_death_row.sysctl_tw_recycle && tcptw->tw_ts_recent_stamp &&
                     tcp_v4_tw_remember_stamp(tw))
-                       tcp_tw_schedule(tw, tw->tw_timeout);
+                       inet_twsk_schedule(tw, &tcp_death_row, tw->tw_timeout,
+                                          TCP_TIMEWAIT_LEN);
                 else
-                       tcp_tw_schedule(tw, TCP_TIMEWAIT_LEN);
+                       inet_twsk_schedule(tw, &tcp_death_row, TCP_TIMEWAIT_LEN,
+                                          TCP_TIMEWAIT_LEN);
                 return TCP_TW_ACK;
         }
  
@@ -213,7 +189,7 @@ kill_with_rst:
          */
  
         if (!paws_reject &&
-           (TCP_SKB_CB(skb)->seq == tw->tw_rcv_nxt &&
+           (TCP_SKB_CB(skb)->seq == tcptw->tw_rcv_nxt &&
              (TCP_SKB_CB(skb)->seq == TCP_SKB_CB(skb)->end_seq || th->rst))) {
                 /* In window segment, it may be only reset or bare ack. */
  
@@ -224,19 +200,20 @@ kill_with_rst:
                          */
                         if (sysctl_tcp_rfc1337 == 0) {
  kill:
-                               tcp_tw_deschedule(tw);
-                               tcp_tw_put(tw);
+                               inet_twsk_deschedule(tw, &tcp_death_row);
+                               inet_twsk_put(tw);
                                 return TCP_TW_SUCCESS;
                         }
                 }
-               tcp_tw_schedule(tw, TCP_TIMEWAIT_LEN);
+               inet_twsk_schedule(tw, &tcp_death_row, TCP_TIMEWAIT_LEN,
+                                  TCP_TIMEWAIT_LEN);
  
                 if (tmp_opt.saw_tstamp) {
-                       tw->tw_ts_recent        = tmp_opt.rcv_tsval;
-                       tw->tw_ts_recent_stamp  = xtime.tv_sec;
+                       tcptw->tw_ts_recent       = tmp_opt.rcv_tsval;
+                       tcptw->tw_ts_recent_stamp = xtime.tv_sec;
                 }
  
-               tcp_tw_put(tw);
+               inet_twsk_put(tw);
                 return TCP_TW_SUCCESS;
         }
  
@@ -258,9 +235,10 @@ kill:
          */
  
         if (th->syn && !th->rst && !th->ack && !paws_reject &&
-           (after(TCP_SKB_CB(skb)->seq, tw->tw_rcv_nxt) ||
-            (tmp_opt.saw_tstamp && (s32)(tw->tw_ts_recent - tmp_opt.rcv_tsval) < 0))) {
-               u32 isn = tw->tw_snd_nxt + 65535 + 2;
+           (after(TCP_SKB_CB(skb)->seq, tcptw->tw_rcv_nxt) ||
+            (tmp_opt.saw_tstamp &&
+             (s32)(tcptw->tw_ts_recent - tmp_opt.rcv_tsval) < 0))) {
+               u32 isn = tcptw->tw_snd_nxt + 65535 + 2;
                 if (isn == 0)
                         isn++;
                 TCP_SKB_CB(skb)->when = isn;
@@ -278,107 +256,57 @@ kill:
                  * Do not reschedule in the last case.
                  */
                 if (paws_reject || th->ack)
-                       tcp_tw_schedule(tw, TCP_TIMEWAIT_LEN);
+                       inet_twsk_schedule(tw, &tcp_death_row, TCP_TIMEWAIT_LEN,
+                                          TCP_TIMEWAIT_LEN);
  
                 /* Send ACK. Note, we do not put the bucket,
                  * it will be released by caller.
                  */
                 return TCP_TW_ACK;
         }
-       tcp_tw_put(tw);
+       inet_twsk_put(tw);
         return TCP_TW_SUCCESS;
  }
  
-/* Enter the time wait state.  This is called with locally disabled BH.
- * Essentially we whip up a timewait bucket, copy the
- * relevant info into it from the SK, and mess with hash chains
- * and list linkage.
- */
-static void __tcp_tw_hashdance(struct sock *sk, struct tcp_tw_bucket *tw)
-{
-       struct tcp_ehash_bucket *ehead = &tcp_ehash[sk->sk_hashent];
-       struct tcp_bind_hashbucket *bhead;
-
-       /* Step 1: Put TW into bind hash. Original socket stays there too.
-          Note, that any socket with inet_sk(sk)->num != 0 MUST be bound in
-          binding cache, even if it is closed.
-        */
-       bhead = &tcp_bhash[tcp_bhashfn(inet_sk(sk)->num)];
-       spin_lock(&bhead->lock);
-       tw->tw_tb = tcp_sk(sk)->bind_hash;
-       BUG_TRAP(tcp_sk(sk)->bind_hash);
-       tw_add_bind_node(tw, &tw->tw_tb->owners);
-       spin_unlock(&bhead->lock);
-
-       write_lock(&ehead->lock);
-
-       /* Step 2: Remove SK from established hash. */
-       if (__sk_del_node_init(sk))
-               sock_prot_dec_use(sk->sk_prot);
-
-       /* Step 3: Hash TW into TIMEWAIT half of established hash table. */
-       tw_add_node(tw, &(ehead + tcp_ehash_size)->chain);
-       atomic_inc(&tw->tw_refcnt);
-
-       write_unlock(&ehead->lock);
-}
-
  /* 
   * Move a socket to time-wait or dead fin-wait-2 state.
   */ 
  void tcp_time_wait(struct sock *sk, int state, int timeo)
  {
-       struct tcp_tw_bucket *tw = NULL;
-       struct tcp_sock *tp = tcp_sk(sk);
+       struct inet_timewait_sock *tw = NULL;
+       const struct tcp_sock *tp = tcp_sk(sk);
         int recycle_ok = 0;
  
-       if (sysctl_tcp_tw_recycle && tp->rx_opt.ts_recent_stamp)
+       if (tcp_death_row.sysctl_tw_recycle && tp->rx_opt.ts_recent_stamp)
                 recycle_ok = tp->af_specific->remember_stamp(sk);
  
-       if (tcp_tw_count < sysctl_tcp_max_tw_buckets)
-               tw = kmem_cache_alloc(tcp_timewait_cachep, SLAB_ATOMIC);
-
-       if(tw != NULL) {
-               struct inet_sock *inet = inet_sk(sk);
-               int rto = (tp->rto<<2) - (tp->rto>>1);
-
-               /* Give us an identity. */
-               tw->tw_daddr            = inet->daddr;
-               tw->tw_rcv_saddr        = inet->rcv_saddr;
-               tw->tw_bound_dev_if     = sk->sk_bound_dev_if;
-               tw->tw_num              = inet->num;
-               tw->tw_state            = TCP_TIME_WAIT;
-               tw->tw_substate         = state;
-               tw->tw_sport            = inet->sport;
-               tw->tw_dport            = inet->dport;
-               tw->tw_family           = sk->sk_family;
-               tw->tw_reuse            = sk->sk_reuse;
-               tw->tw_rcv_wscale       = tp->rx_opt.rcv_wscale;
-               atomic_set(&tw->tw_refcnt, 1);
+       if (tcp_death_row.tw_count < tcp_death_row.sysctl_max_tw_buckets)
+               tw = inet_twsk_alloc(sk, state);
  
-               tw->tw_hashent          = sk->sk_hashent;
-               tw->tw_rcv_nxt          = tp->rcv_nxt;
-               tw->tw_snd_nxt          = tp->snd_nxt;
-               tw->tw_rcv_wnd          = tcp_receive_window(tp);
-               tw->tw_ts_recent        = tp->rx_opt.ts_recent;
-               tw->tw_ts_recent_stamp  = tp->rx_opt.ts_recent_stamp;
-               tw_dead_node_init(tw);
+       if (tw != NULL) {
+               struct tcp_timewait_sock *tcptw = tcp_twsk((struct sock *)tw);
+               const struct inet_connection_sock *icsk = inet_csk(sk);
+               const int rto = (icsk->icsk_rto << 2) - (icsk->icsk_rto >> 1);
+
+               tw->tw_rcv_wscale       = tp->rx_opt.rcv_wscale;
+               tcptw->tw_rcv_nxt       = tp->rcv_nxt;
+               tcptw->tw_snd_nxt       = tp->snd_nxt;
+               tcptw->tw_rcv_wnd       = tcp_receive_window(tp);
+               tcptw->tw_ts_recent     = tp->rx_opt.ts_recent;
+               tcptw->tw_ts_recent_stamp = tp->rx_opt.ts_recent_stamp;
  
  #if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
                 if (tw->tw_family == PF_INET6) {
                         struct ipv6_pinfo *np = inet6_sk(sk);
+                       struct tcp6_timewait_sock *tcp6tw = tcp6_twsk((struct sock *)tw);
  
-                       ipv6_addr_copy(&tw->tw_v6_daddr, &np->daddr);
-                       ipv6_addr_copy(&tw->tw_v6_rcv_saddr, &np->rcv_saddr);
-                       tw->tw_v6_ipv6only = np->ipv6only;
-               } else {
-                       memset(&tw->tw_v6_daddr, 0, sizeof(tw->tw_v6_daddr));
-                       memset(&tw->tw_v6_rcv_saddr, 0, sizeof(tw->tw_v6_rcv_saddr));
-                       tw->tw_v6_ipv6only = 0;
+                       ipv6_addr_copy(&tcp6tw->tw_v6_daddr, &np->daddr);
+                       ipv6_addr_copy(&tcp6tw->tw_v6_rcv_saddr, &np->rcv_saddr);
+                       tw->tw_ipv6only = np->ipv6only;
                 }
  #endif
                 /* Linkage updates. */
-               __tcp_tw_hashdance(sk, tw);
+               __inet_twsk_hashdance(tw, sk, &tcp_hashinfo);
  
                 /* Get the TIME_WAIT timeout firing. */
                 if (timeo < rto)
@@ -392,8 +320,9 @@ void tcp_time_wait(struct sock *sk, int state, int timeo)
                                 timeo = TCP_TIMEWAIT_LEN;
                 }
  
-               tcp_tw_schedule(tw, timeo);
-               tcp_tw_put(tw);
+               inet_twsk_schedule(tw, &tcp_death_row, timeo,
+                                  TCP_TIMEWAIT_LEN);
+               inet_twsk_put(tw);
         } else {
                 /* Sorry, if we're out of memory, just CLOSE this
                  * socket up.  We've got bigger problems than
@@ -407,277 +336,6 @@ void tcp_time_wait(struct sock *sk, int state, int timeo)
         tcp_done(sk);
  }
  
-/* Kill off TIME_WAIT sockets once their lifetime has expired. */
-static int tcp_tw_death_row_slot;
-
-static void tcp_twkill(unsigned long);
-
-/* TIME_WAIT reaping mechanism. */
-#define TCP_TWKILL_SLOTS       8       /* Please keep this a power of 2. */
-#define TCP_TWKILL_PERIOD      (TCP_TIMEWAIT_LEN/TCP_TWKILL_SLOTS)
-
-#define TCP_TWKILL_QUOTA       100
-
-static struct hlist_head tcp_tw_death_row[TCP_TWKILL_SLOTS];
-static DEFINE_SPINLOCK(tw_death_lock);
-static struct timer_list tcp_tw_timer = TIMER_INITIALIZER(tcp_twkill, 0, 0);
-static void twkill_work(void *);
-static DECLARE_WORK(tcp_twkill_work, twkill_work, NULL);
-static u32 twkill_thread_slots;
-
-/* Returns non-zero if quota exceeded.  */
-static int tcp_do_twkill_work(int slot, unsigned int quota)
-{
-       struct tcp_tw_bucket *tw;
-       struct hlist_node *node;
-       unsigned int killed;
-       int ret;
-
-       /* NOTE: compare this to previous version where lock
-        * was released after detaching chain. It was racy,
-        * because tw buckets are scheduled in not serialized context
-        * in 2.3 (with netfilter), and with softnet it is common, because
-        * soft irqs are not sequenced.
-        */
-       killed = 0;
-       ret = 0;
-rescan:
-       tw_for_each_inmate(tw, node, &tcp_tw_death_row[slot]) {
-               __tw_del_dead_node(tw);
-               spin_unlock(&tw_death_lock);
-               tcp_timewait_kill(tw);
-               tcp_tw_put(tw);
-               killed++;
-               spin_lock(&tw_death_lock);
-               if (killed > quota) {
-                       ret = 1;
-                       break;
-               }
-
-               /* While we dropped tw_death_lock, another cpu may have
-                * killed off the next TW bucket in the list, therefore
-                * do a fresh re-read of the hlist head node with the
-                * lock reacquired.  We still use the hlist traversal
-                * macro in order to get the prefetches.
-                */
-               goto rescan;
-       }
-
-       tcp_tw_count -= killed;
-       NET_ADD_STATS_BH(LINUX_MIB_TIMEWAITED, killed);
-
-       return ret;
-}
-
-static void tcp_twkill(unsigned long dummy)
-{
-       int need_timer, ret;
-
-       spin_lock(&tw_death_lock);
-
-       if (tcp_tw_count == 0)
-               goto out;
-
-       need_timer = 0;
-       ret = tcp_do_twkill_work(tcp_tw_death_row_slot, TCP_TWKILL_QUOTA);
-       if (ret) {
-               twkill_thread_slots |= (1 << tcp_tw_death_row_slot);
-               mb();
-               schedule_work(&tcp_twkill_work);
-               need_timer = 1;
-       } else {
-               /* We purged the entire slot, anything left?  */
-               if (tcp_tw_count)
-                       need_timer = 1;
-       }
-       tcp_tw_death_row_slot =
-               ((tcp_tw_death_row_slot + 1) & (TCP_TWKILL_SLOTS - 1));
-       if (need_timer)
-               mod_timer(&tcp_tw_timer, jiffies + TCP_TWKILL_PERIOD);
-out:
-       spin_unlock(&tw_death_lock);
-}
-
-extern void twkill_slots_invalid(void);
-
-static void twkill_work(void *dummy)
-{
-       int i;
-
-       if ((TCP_TWKILL_SLOTS - 1) > (sizeof(twkill_thread_slots) * 8))
-               twkill_slots_invalid();
-
-       while (twkill_thread_slots) {
-               spin_lock_bh(&tw_death_lock);
-               for (i = 0; i < TCP_TWKILL_SLOTS; i++) {
-                       if (!(twkill_thread_slots & (1 << i)))
-                               continue;
-
-                       while (tcp_do_twkill_work(i, TCP_TWKILL_QUOTA) != 0) {
-                               if (need_resched()) {
-                                       spin_unlock_bh(&tw_death_lock);
-                                       schedule();
-                                       spin_lock_bh(&tw_death_lock);
-                               }
-                       }
-
-                       twkill_thread_slots &= ~(1 << i);
-               }
-               spin_unlock_bh(&tw_death_lock);
-       }
-}
-
-/* These are always called from BH context.  See callers in
- * tcp_input.c to verify this.
- */
-
-/* This is for handling early-kills of TIME_WAIT sockets. */
-void tcp_tw_deschedule(struct tcp_tw_bucket *tw)
-{
-       spin_lock(&tw_death_lock);
-       if (tw_del_dead_node(tw)) {
-               tcp_tw_put(tw);
-               if (--tcp_tw_count == 0)
-                       del_timer(&tcp_tw_timer);
-       }
-       spin_unlock(&tw_death_lock);
-       tcp_timewait_kill(tw);
-}
-
-/* Short-time timewait calendar */
-
-static int tcp_twcal_hand = -1;
-static int tcp_twcal_jiffie;
-static void tcp_twcal_tick(unsigned long);
-static struct timer_list tcp_twcal_timer =
-               TIMER_INITIALIZER(tcp_twcal_tick, 0, 0);
-static struct hlist_head tcp_twcal_row[TCP_TW_RECYCLE_SLOTS];
-
-static void tcp_tw_schedule(struct tcp_tw_bucket *tw, int timeo)
-{
-       struct hlist_head *list;
-       int slot;
-
-       /* timeout := RTO * 3.5
-        *
-        * 3.5 = 1+2+0.5 to wait for two retransmits.
-        *
-        * RATIONALE: if FIN arrived and we entered TIME-WAIT state,
-        * our ACK acking that FIN can be lost. If N subsequent retransmitted
-        * FINs (or previous seqments) are lost (probability of such event
-        * is p^(N+1), where p is probability to lose single packet and
-        * time to detect the loss is about RTO*(2^N - 1) with exponential
-        * backoff). Normal timewait length is calculated so, that we
-        * waited at least for one retransmitted FIN (maximal RTO is 120sec).
-        * [ BTW Linux. following BSD, violates this requirement waiting
-        *   only for 60sec, we should wait at least for 240 secs.
-        *   Well, 240 consumes too much of resources 8)
-        * ]
-        * This interval is not reduced to catch old duplicate and
-        * responces to our wandering segments living for two MSLs.
-        * However, if we use PAWS to detect
-        * old duplicates, we can reduce the interval to bounds required
-        * by RTO, rather than MSL. So, if peer understands PAWS, we
-        * kill tw bucket after 3.5*RTO (it is important that this number
-        * is greater than TS tick!) and detect old duplicates with help
-        * of PAWS.
-        */
-       slot = (timeo + (1<<TCP_TW_RECYCLE_TICK) - 1) >> TCP_TW_RECYCLE_TICK;
-
-       spin_lock(&tw_death_lock);
-
-       /* Unlink it, if it was scheduled */
-       if (tw_del_dead_node(tw))
-               tcp_tw_count--;
-       else
-               atomic_inc(&tw->tw_refcnt);
-
-       if (slot >= TCP_TW_RECYCLE_SLOTS) {
-               /* Schedule to slow timer */
-               if (timeo >= TCP_TIMEWAIT_LEN) {
-                       slot = TCP_TWKILL_SLOTS-1;
-               } else {
-                       slot = (timeo + TCP_TWKILL_PERIOD-1) / TCP_TWKILL_PERIOD;
-                       if (slot >= TCP_TWKILL_SLOTS)
-                               slot = TCP_TWKILL_SLOTS-1;
-               }
-               tw->tw_ttd = jiffies + timeo;
-               slot = (tcp_tw_death_row_slot + slot) & (TCP_TWKILL_SLOTS - 1);
-               list = &tcp_tw_death_row[slot];
-       } else {
-               tw->tw_ttd = jiffies + (slot << TCP_TW_RECYCLE_TICK);
-
-               if (tcp_twcal_hand < 0) {
-                       tcp_twcal_hand = 0;
-                       tcp_twcal_jiffie = jiffies;
-                       tcp_twcal_timer.expires = tcp_twcal_jiffie + (slot<<TCP_TW_RECYCLE_TICK);
-                       add_timer(&tcp_twcal_timer);
-               } else {
-                       if (time_after(tcp_twcal_timer.expires, jiffies + (slot<<TCP_TW_RECYCLE_TICK)))
-                               mod_timer(&tcp_twcal_timer, jiffies + (slot<<TCP_TW_RECYCLE_TICK));
-                       slot = (tcp_twcal_hand + slot)&(TCP_TW_RECYCLE_SLOTS-1);
-               }
-               list = &tcp_twcal_row[slot];
-       }
-
-       hlist_add_head(&tw->tw_death_node, list);
-
-       if (tcp_tw_count++ == 0)
-               mod_timer(&tcp_tw_timer, jiffies+TCP_TWKILL_PERIOD);
-       spin_unlock(&tw_death_lock);
-}
-
-void tcp_twcal_tick(unsigned long dummy)
-{
-       int n, slot;
-       unsigned long j;
-       unsigned long now = jiffies;
-       int killed = 0;
-       int adv = 0;
-
-       spin_lock(&tw_death_lock);
-       if (tcp_twcal_hand < 0)
-               goto out;
-
-       slot = tcp_twcal_hand;
-       j = tcp_twcal_jiffie;
-
-       for (n=0; n<TCP_TW_RECYCLE_SLOTS; n++) {
-               if (time_before_eq(j, now)) {
-                       struct hlist_node *node, *safe;
-                       struct tcp_tw_bucket *tw;
-
-                       tw_for_each_inmate_safe(tw, node, safe,
-                                          &tcp_twcal_row[slot]) {
-                               __tw_del_dead_node(tw);
-                               tcp_timewait_kill(tw);
-                               tcp_tw_put(tw);
-                               killed++;
-                       }
-               } else {
-                       if (!adv) {
-                               adv = 1;
-                               tcp_twcal_jiffie = j;
-                               tcp_twcal_hand = slot;
-                       }
-
-                       if (!hlist_empty(&tcp_twcal_row[slot])) {
-                               mod_timer(&tcp_twcal_timer, j);
-                               goto out;
-                       }
-               }
-               j += (1<<TCP_TW_RECYCLE_TICK);
-               slot = (slot+1)&(TCP_TW_RECYCLE_SLOTS-1);
-       }
-       tcp_twcal_hand = -1;
-
-out:
-       if ((tcp_tw_count -= killed) == 0)
-               del_timer(&tcp_tw_timer);
-       NET_ADD_STATS_BH(LINUX_MIB_TIMEWAITKILLED, killed);
-       spin_unlock(&tw_death_lock);
-}
-
  /* This is not only more efficient than what we used to do, it eliminates
   * a lot of code duplication between IPv4/IPv6 SYN recv processing. -DaveM
   *
@@ -686,75 +344,27 @@ out:
   */
  struct sock *tcp_create_openreq_child(struct sock *sk, struct request_sock *req, struct sk_buff *skb)
  {
-       /* allocate the newsk from the same slab of the master sock,
-        * if not, at sk_free time we'll try to free it from the wrong
-        * slabcache (i.e. is it TCPv4 or v6?), this is handled thru sk->sk_prot -acme */
-       struct sock *newsk = sk_alloc(PF_INET, GFP_ATOMIC, sk->sk_prot, 0);
+       struct sock *newsk = inet_csk_clone(sk, req, GFP_ATOMIC);
  
-       if(newsk != NULL) {
-               struct inet_request_sock *ireq = inet_rsk(req);
+       if (newsk != NULL) {
+               const struct inet_request_sock *ireq = inet_rsk(req);
                 struct tcp_request_sock *treq = tcp_rsk(req);
+               struct inet_connection_sock *newicsk = inet_csk(sk);
                 struct tcp_sock *newtp;
-               struct sk_filter *filter;
-
-               memcpy(newsk, sk, sizeof(struct tcp_sock));
-               newsk->sk_state = TCP_SYN_RECV;
-
-               /* SANITY */
-               sk_node_init(&newsk->sk_node);
-               tcp_sk(newsk)->bind_hash = NULL;
-
-               /* Clone the TCP header template */
-               inet_sk(newsk)->dport = ireq->rmt_port;
-
-               sock_lock_init(newsk);
-               bh_lock_sock(newsk);
-
-               rwlock_init(&newsk->sk_dst_lock);
-               atomic_set(&newsk->sk_rmem_alloc, 0);
-               skb_queue_head_init(&newsk->sk_receive_queue);
-               atomic_set(&newsk->sk_wmem_alloc, 0);
-               skb_queue_head_init(&newsk->sk_write_queue);
-               atomic_set(&newsk->sk_omem_alloc, 0);
-               newsk->sk_wmem_queued = 0;
-               newsk->sk_forward_alloc = 0;
-
-               sock_reset_flag(newsk, SOCK_DONE);
-               newsk->sk_userlocks = sk->sk_userlocks & ~SOCK_BINDPORT_LOCK;
-               newsk->sk_backlog.head = newsk->sk_backlog.tail = NULL;
-               newsk->sk_send_head = NULL;
-               rwlock_init(&newsk->sk_callback_lock);
-               skb_queue_head_init(&newsk->sk_error_queue);
-               newsk->sk_write_space = sk_stream_write_space;
-
-               if ((filter = newsk->sk_filter) != NULL)
-                       sk_filter_charge(newsk, filter);
-
-               if (unlikely(xfrm_sk_clone_policy(newsk))) {
-                       /* It is still raw copy of parent, so invalidate
-                        * destructor and make plain sk_free() */
-                       newsk->sk_destruct = NULL;
-                       sk_free(newsk);
-                       return NULL;
-               }
  
                 /* Now setup tcp_sock */
                 newtp = tcp_sk(newsk);
                 newtp->pred_flags = 0;
                 newtp->rcv_nxt = treq->rcv_isn + 1;
-               newtp->snd_nxt = treq->snt_isn + 1;
-               newtp->snd_una = treq->snt_isn + 1;
-               newtp->snd_sml = treq->snt_isn + 1;
+               newtp->snd_nxt = newtp->snd_una = newtp->snd_sml = treq->snt_isn + 1;
  
                 tcp_prequeue_init(newtp);
  
                 tcp_init_wl(newtp, treq->snt_isn, treq->rcv_isn);
  
-               newtp->retransmits = 0;
-               newtp->backoff = 0;
                 newtp->srtt = 0;
                 newtp->mdev = TCP_TIMEOUT_INIT;
-               newtp->rto = TCP_TIMEOUT_INIT;
+               newicsk->icsk_rto = TCP_TIMEOUT_INIT;
  
                 newtp->packets_out = 0;
                 newtp->left_out = 0;
@@ -774,9 +384,9 @@ struct sock *tcp_create_openreq_child(struct sock *sk, struct request_sock *req,
                 newtp->frto_counter = 0;
                 newtp->frto_highmark = 0;
  
-               newtp->ca_ops = &tcp_reno;
+               newicsk->icsk_ca_ops = &tcp_reno;
  
-               tcp_set_ca_state(newtp, TCP_CA_Open);
+               tcp_set_ca_state(newsk, TCP_CA_Open);
                 tcp_init_xmit_timers(newsk);
                 skb_queue_head_init(&newtp->out_of_order_queue);
                 newtp->rcv_wup = treq->rcv_isn + 1;
@@ -789,26 +399,12 @@ struct sock *tcp_create_openreq_child(struct sock *sk, struct request_sock *req,
                 newtp->rx_opt.dsack = 0;
                 newtp->rx_opt.eff_sacks = 0;
  
-               newtp->probes_out = 0;
                 newtp->rx_opt.num_sacks = 0;
                 newtp->urg_data = 0;
-               /* Deinitialize accept_queue to trap illegal accesses. */
-               memset(&newtp->accept_queue, 0, sizeof(newtp->accept_queue));
-
-               /* Back to base struct sock members. */
-               newsk->sk_err = 0;
-               newsk->sk_priority = 0;
-               atomic_set(&newsk->sk_refcnt, 2);
-#ifdef INET_REFCNT_DEBUG
-               atomic_inc(&inet_sock_nr);
-#endif
-               atomic_inc(&tcp_sockets_allocated);
  
                 if (sock_flag(newsk, SOCK_KEEPOPEN))
-                       tcp_reset_keepalive_timer(newsk,
-                                                 keepalive_time_when(newtp));
-               newsk->sk_socket = NULL;
-               newsk->sk_sleep = NULL;
+                       inet_csk_reset_keepalive_timer(newsk,
+                                                      keepalive_time_when(newtp));
  
                 newtp->rx_opt.tstamp_ok = ireq->tstamp_ok;
                 if((newtp->rx_opt.sack_ok = ireq->sack_ok) != 0) {
@@ -838,7 +434,7 @@ struct sock *tcp_create_openreq_child(struct sock *sk, struct request_sock *req,
                         newtp->tcp_header_len = sizeof(struct tcphdr);
                 }
                 if (skb->len >= TCP_MIN_RCVMSS+newtp->tcp_header_len)
-                       newtp->ack.last_seg_size = skb->len-newtp->tcp_header_len;
+                       newicsk->icsk_ack.last_seg_size = skb->len - newtp->tcp_header_len;
                 newtp->rx_opt.mss_clamp = req->mss;
                 TCP_ECN_openreq_child(newtp, req);
                 if (newtp->ecn_flags&TCP_ECN_OK)
@@ -934,9 +530,10 @@ struct sock *tcp_check_req(struct sock *sk,struct sk_buff *skb,
            does sequence test, SYN is truncated, and thus we consider
            it a bare ACK.
  
-          If tp->defer_accept, we silently drop this bare ACK.  Otherwise,
-          we create an established connection.  Both ends (listening sockets)
-          accept the new incoming connection and try to talk to each other. 8-)
+          If icsk->icsk_accept_queue.rskq_defer_accept, we silently drop this
+          bare ACK.  Otherwise, we create an established connection.  Both
+          ends (listening sockets) accept the new incoming connection and try
+          to talk to each other. 8-)
  
            Note: This case is both harmless, and rare.  Possibility is about the
            same as us discovering intelligent life on another plant tomorrow.
@@ -1003,7 +600,8 @@ struct sock *tcp_check_req(struct sock *sk,struct sk_buff *skb,
                         return NULL;
  
                 /* If TCP_DEFER_ACCEPT is set, drop bare ACK. */
-               if (tp->defer_accept && TCP_SKB_CB(skb)->end_seq == tcp_rsk(req)->rcv_isn + 1) {
+               if (inet_csk(sk)->icsk_accept_queue.rskq_defer_accept &&
+                   TCP_SKB_CB(skb)->end_seq == tcp_rsk(req)->rcv_isn + 1) {
                         inet_rsk(req)->acked = 1;
                         return NULL;
                 }
@@ -1018,10 +616,10 @@ struct sock *tcp_check_req(struct sock *sk,struct sk_buff *skb,
                 if (child == NULL)
                         goto listen_overflow;
  
-               tcp_synq_unlink(tp, req, prev);
-               tcp_synq_removed(sk, req);
+               inet_csk_reqsk_queue_unlink(sk, req, prev);
+               inet_csk_reqsk_queue_removed(sk, req);
  
-               tcp_acceptq_queue(sk, req, child);
+               inet_csk_reqsk_queue_add(sk, req, child);
                 return child;
  
         listen_overflow:
@@ -1035,7 +633,7 @@ struct sock *tcp_check_req(struct sock *sk,struct sk_buff *skb,
                 if (!(flg & TCP_FLAG_RST))
                         req->rsk_ops->send_reset(skb);
  
-               tcp_synq_drop(sk, req, prev);
+               inet_csk_reqsk_queue_drop(sk, req, prev);
                 return NULL;
  }
  
@@ -1074,4 +672,3 @@ EXPORT_SYMBOL(tcp_check_req);
  EXPORT_SYMBOL(tcp_child_process);
  EXPORT_SYMBOL(tcp_create_openreq_child);
  EXPORT_SYMBOL(tcp_timewait_state_process);
-EXPORT_SYMBOL(tcp_tw_deschedule);
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c

index e3f8ea1bfa9c01179f93094c534c2ea3f07bcd9f..75b68116682ae2912ca8e0dd4faf84eb4993e6bd 100644 (file)
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -105,18 +105,19 @@ static __u16 tcp_advertise_mss(struct sock *sk)
  
  /* RFC2861. Reset CWND after idle period longer RTO to "restart window".
   * This is the first part of cwnd validation mechanism. */
-static void tcp_cwnd_restart(struct tcp_sock *tp, struct dst_entry *dst)
+static void tcp_cwnd_restart(struct sock *sk, struct dst_entry *dst)
  {
+       struct tcp_sock *tp = tcp_sk(sk);
         s32 delta = tcp_time_stamp - tp->lsndtime;
         u32 restart_cwnd = tcp_init_cwnd(tp, dst);
         u32 cwnd = tp->snd_cwnd;
  
-       tcp_ca_event(tp, CA_EVENT_CWND_RESTART);
+       tcp_ca_event(sk, CA_EVENT_CWND_RESTART);
  
-       tp->snd_ssthresh = tcp_current_ssthresh(tp);
+       tp->snd_ssthresh = tcp_current_ssthresh(sk);
         restart_cwnd = min(restart_cwnd, cwnd);
  
-       while ((delta -= tp->rto) > 0 && cwnd > restart_cwnd)
+       while ((delta -= inet_csk(sk)->icsk_rto) > 0 && cwnd > restart_cwnd)
                 cwnd >>= 1;
         tp->snd_cwnd = max(cwnd, restart_cwnd);
         tp->snd_cwnd_stamp = tcp_time_stamp;
@@ -126,26 +127,25 @@ static void tcp_cwnd_restart(struct tcp_sock *tp, struct dst_entry *dst)
  static inline void tcp_event_data_sent(struct tcp_sock *tp,
                                        struct sk_buff *skb, struct sock *sk)
  {
-       u32 now = tcp_time_stamp;
+       struct inet_connection_sock *icsk = inet_csk(sk);
+       const u32 now = tcp_time_stamp;
  
-       if (!tp->packets_out && (s32)(now - tp->lsndtime) > tp->rto)
-               tcp_cwnd_restart(tp, __sk_dst_get(sk));
+       if (!tp->packets_out && (s32)(now - tp->lsndtime) > icsk->icsk_rto)
+               tcp_cwnd_restart(sk, __sk_dst_get(sk));
  
         tp->lsndtime = now;
  
         /* If it is a reply for ato after last received
          * packet, enter pingpong mode.
          */
-       if ((u32)(now - tp->ack.lrcvtime) < tp->ack.ato)
-               tp->ack.pingpong = 1;
+       if ((u32)(now - icsk->icsk_ack.lrcvtime) < icsk->icsk_ack.ato)
+               icsk->icsk_ack.pingpong = 1;
  }
  
  static __inline__ void tcp_event_ack_sent(struct sock *sk, unsigned int pkts)
  {
-       struct tcp_sock *tp = tcp_sk(sk);
-
-       tcp_dec_quickack_mode(tp, pkts);
-       tcp_clear_xmit_timer(sk, TCP_TIME_DACK);
+       tcp_dec_quickack_mode(sk, pkts);
+       inet_csk_clear_xmit_timer(sk, ICSK_TIME_DACK);
  }
  
  /* Determine a window scaling and initial window to offer.
@@ -265,6 +265,7 @@ static __inline__ u16 tcp_select_window(struct sock *sk)
  static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb)
  {
         if (skb != NULL) {
+               const struct inet_connection_sock *icsk = inet_csk(sk);
                 struct inet_sock *inet = inet_sk(sk);
                 struct tcp_sock *tp = tcp_sk(sk);
                 struct tcp_skb_cb *tcb = TCP_SKB_CB(skb);
@@ -280,8 +281,8 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb)
  #define SYSCTL_FLAG_SACK       0x4
  
                 /* If congestion control is doing timestamping */
-               if (tp->ca_ops->rtt_sample)
-                       do_gettimeofday(&skb->stamp);
+               if (icsk->icsk_ca_ops->rtt_sample)
+                       __net_timestamp(skb);
  
                 sysctl_flags = 0;
                 if (tcb->flags & TCPCB_FLAG_SYN) {
@@ -308,7 +309,7 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb)
                 }
                 
                 if (tcp_packets_in_flight(tp) == 0)
-                       tcp_ca_event(tp, CA_EVENT_TX_START);
+                       tcp_ca_event(sk, CA_EVENT_TX_START);
  
                 th = (struct tcphdr *) skb_push(skb, tcp_header_size);
                 skb->h.th = th;
@@ -366,7 +367,7 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb)
                 if (err <= 0)
                         return err;
  
-               tcp_enter_cwr(tp);
+               tcp_enter_cwr(sk);
  
                 /* NET_XMIT_CN is special. It does not guarantee,
                  * that this packet is lost. It tells that device
@@ -403,11 +404,9 @@ static void tcp_queue_skb(struct sock *sk, struct sk_buff *skb)
                 sk->sk_send_head = skb;
  }
  
-static void tcp_set_skb_tso_segs(struct sock *sk, struct sk_buff *skb)
+static void tcp_set_skb_tso_segs(struct sock *sk, struct sk_buff *skb, unsigned int mss_now)
  {
-       struct tcp_sock *tp = tcp_sk(sk);
-
-       if (skb->len <= tp->mss_cache ||
+       if (skb->len <= mss_now ||
             !(sk->sk_route_caps & NETIF_F_TSO)) {
                 /* Avoid the costly divide in the normal
                  * non-TSO case.
@@ -417,10 +416,10 @@ static void tcp_set_skb_tso_segs(struct sock *sk, struct sk_buff *skb)
         } else {
                 unsigned int factor;
  
-               factor = skb->len + (tp->mss_cache - 1);
-               factor /= tp->mss_cache;
+               factor = skb->len + (mss_now - 1);
+               factor /= mss_now;
                 skb_shinfo(skb)->tso_segs = factor;
-               skb_shinfo(skb)->tso_size = tp->mss_cache;
+               skb_shinfo(skb)->tso_size = mss_now;
         }
  }
  
@@ -429,7 +428,7 @@ static void tcp_set_skb_tso_segs(struct sock *sk, struct sk_buff *skb)
   * packet to the list.  This won't be called frequently, I hope. 
   * Remember, these are still headerless SKBs at this point.
   */
-static int tcp_fragment(struct sock *sk, struct sk_buff *skb, u32 len)
+static int tcp_fragment(struct sock *sk, struct sk_buff *skb, u32 len, unsigned int mss_now)
  {
         struct tcp_sock *tp = tcp_sk(sk);
         struct sk_buff *buff;
@@ -484,7 +483,7 @@ static int tcp_fragment(struct sock *sk, struct sk_buff *skb, u32 len)
          * skbs, which it never sent before. --ANK
          */
         TCP_SKB_CB(buff)->when = TCP_SKB_CB(skb)->when;
-       buff->stamp = skb->stamp;
+       buff->tstamp = skb->tstamp;
  
         if (TCP_SKB_CB(skb)->sacked & TCPCB_LOST) {
                 tp->lost_out -= tcp_skb_pcount(skb);
@@ -492,8 +491,8 @@ static int tcp_fragment(struct sock *sk, struct sk_buff *skb, u32 len)
         }
  
         /* Fix up tso_factor for both original and new SKB.  */
-       tcp_set_skb_tso_segs(sk, skb);
-       tcp_set_skb_tso_segs(sk, buff);
+       tcp_set_skb_tso_segs(sk, skb, mss_now);
+       tcp_set_skb_tso_segs(sk, buff, mss_now);
  
         if (TCP_SKB_CB(skb)->sacked & TCPCB_LOST) {
                 tp->lost_out += tcp_skb_pcount(skb);
@@ -507,7 +506,7 @@ static int tcp_fragment(struct sock *sk, struct sk_buff *skb, u32 len)
  
         /* Link BUFF into the send queue. */
         skb_header_release(buff);
-       __skb_append(skb, buff);
+       __skb_append(skb, buff, &sk->sk_write_queue);
  
         return 0;
  }
@@ -569,7 +568,7 @@ int tcp_trim_head(struct sock *sk, struct sk_buff *skb, u32 len)
          * factor and mss.
          */
         if (tcp_skb_pcount(skb) > 1)
-               tcp_set_skb_tso_segs(sk, skb);
+               tcp_set_skb_tso_segs(sk, skb, tcp_current_mss(sk, 1));
  
         return 0;
  }
@@ -698,7 +697,7 @@ static inline void tcp_cwnd_validate(struct sock *sk, struct tcp_sock *tp)
                 if (tp->packets_out > tp->snd_cwnd_used)
                         tp->snd_cwnd_used = tp->packets_out;
  
-               if ((s32)(tcp_time_stamp - tp->snd_cwnd_stamp) >= tp->rto)
+               if ((s32)(tcp_time_stamp - tp->snd_cwnd_stamp) >= inet_csk(sk)->icsk_rto)
                         tcp_cwnd_application_limited(sk);
         }
  }
@@ -734,12 +733,14 @@ static inline unsigned int tcp_cwnd_test(struct tcp_sock *tp, struct sk_buff *sk
  /* This must be invoked the first time we consider transmitting
   * SKB onto the wire.
   */
-static inline int tcp_init_tso_segs(struct sock *sk, struct sk_buff *skb)
+static inline int tcp_init_tso_segs(struct sock *sk, struct sk_buff *skb, unsigned int mss_now)
  {
         int tso_segs = tcp_skb_pcount(skb);
  
-       if (!tso_segs) {
-               tcp_set_skb_tso_segs(sk, skb);
+       if (!tso_segs ||
+           (tso_segs > 1 &&
+            skb_shinfo(skb)->tso_size != mss_now)) {
+               tcp_set_skb_tso_segs(sk, skb, mss_now);
                 tso_segs = tcp_skb_pcount(skb);
         }
         return tso_segs;
@@ -817,7 +818,7 @@ static unsigned int tcp_snd_test(struct sock *sk, struct sk_buff *skb,
         struct tcp_sock *tp = tcp_sk(sk);
         unsigned int cwnd_quota;
  
-       tcp_init_tso_segs(sk, skb);
+       tcp_init_tso_segs(sk, skb, cur_mss);
  
         if (!tcp_nagle_test(tp, skb, cur_mss, nonagle))
                 return 0;
@@ -854,14 +855,15 @@ int tcp_may_send_now(struct sock *sk, struct tcp_sock *tp)
   * know that all the data is in scatter-gather pages, and that the
   * packet has never been sent out before (and thus is not cloned).
   */
-static int tso_fragment(struct sock *sk, struct sk_buff *skb, unsigned int len)
+static int tso_fragment(struct sock *sk, struct sk_buff *skb, unsigned int len, unsigned int mss_now)
  {
         struct sk_buff *buff;
         int nlen = skb->len - len;
         u16 flags;
  
         /* All of a TSO frame must be composed of paged data.  */
-       BUG_ON(skb->len != skb->data_len);
+       if (skb->len != skb->data_len)
+               return tcp_fragment(sk, skb, len, mss_now);
  
         buff = sk_stream_alloc_pskb(sk, 0, 0, GFP_ATOMIC);
         if (unlikely(buff == NULL))
@@ -887,12 +889,12 @@ static int tso_fragment(struct sock *sk, struct sk_buff *skb, unsigned int len)
         skb_split(skb, buff, len);
  
         /* Fix up tso_factor for both original and new SKB.  */
-       tcp_set_skb_tso_segs(sk, skb);
-       tcp_set_skb_tso_segs(sk, buff);
+       tcp_set_skb_tso_segs(sk, skb, mss_now);
+       tcp_set_skb_tso_segs(sk, buff, mss_now);
  
         /* Link BUFF into the send queue. */
         skb_header_release(buff);
-       __skb_append(skb, buff);
+       __skb_append(skb, buff, &sk->sk_write_queue);
  
         return 0;
  }
@@ -904,12 +906,13 @@ static int tso_fragment(struct sock *sk, struct sk_buff *skb, unsigned int len)
   */
  static int tcp_tso_should_defer(struct sock *sk, struct tcp_sock *tp, struct sk_buff *skb)
  {
+       const struct inet_connection_sock *icsk = inet_csk(sk);
         u32 send_win, cong_win, limit, in_flight;
  
         if (TCP_SKB_CB(skb)->flags & TCPCB_FLAG_FIN)
                 return 0;
  
-       if (tp->ca_state != TCP_CA_Open)
+       if (icsk->icsk_ca_state != TCP_CA_Open)
                 return 0;
  
         in_flight = tcp_packets_in_flight(tp);
@@ -924,10 +927,6 @@ static int tcp_tso_should_defer(struct sock *sk, struct tcp_sock *tp, struct sk_
  
         limit = min(send_win, cong_win);
  
-       /* If sk_send_head can be sent fully now, just do it.  */
-       if (skb->len <= limit)
-               return 0;
-
         if (sysctl_tcp_tso_win_divisor) {
                 u32 chunk = min(tp->snd_wnd, tp->snd_cwnd * tp->mss_cache);
  
@@ -972,19 +971,20 @@ static int tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle)
         if (unlikely(sk->sk_state == TCP_CLOSE))
                 return 0;
  
-       skb = sk->sk_send_head;
-       if (unlikely(!skb))
-               return 0;
-
-       tso_segs = tcp_init_tso_segs(sk, skb);
-       cwnd_quota = tcp_cwnd_test(tp, skb);
-       if (unlikely(!cwnd_quota))
-               goto out;
-
         sent_pkts = 0;
-       while (likely(tcp_snd_wnd_test(tp, skb, mss_now))) {
+       while ((skb = sk->sk_send_head)) {
+               unsigned int limit;
+
+               tso_segs = tcp_init_tso_segs(sk, skb, mss_now);
                 BUG_ON(!tso_segs);
  
+               cwnd_quota = tcp_cwnd_test(tp, skb);
+               if (!cwnd_quota)
+                       break;
+
+               if (unlikely(!tcp_snd_wnd_test(tp, skb, mss_now)))
+                       break;
+
                 if (tso_segs == 1) {
                         if (unlikely(!tcp_nagle_test(tp, skb, mss_now,
                                                      (tcp_skb_is_last(sk, skb) ?
@@ -995,9 +995,10 @@ static int tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle)
                                 break;
                 }
  
+               limit = mss_now;
                 if (tso_segs > 1) {
-                       u32 limit = tcp_window_allows(tp, skb,
-                                                     mss_now, cwnd_quota);
+                       limit = tcp_window_allows(tp, skb,
+                                                 mss_now, cwnd_quota);
  
                         if (skb->len < limit) {
                                 unsigned int trim = skb->len % mss_now;
@@ -1005,15 +1006,12 @@ static int tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle)
                                 if (trim)
                                         limit = skb->len - trim;
                         }
-                       if (skb->len > limit) {
-                               if (tso_fragment(sk, skb, limit))
-                                       break;
-                       }
-               } else if (unlikely(skb->len > mss_now)) {
-                       if (unlikely(tcp_fragment(sk, skb,  mss_now)))
-                               break;
                 }
  
+               if (skb->len > limit &&
+                   unlikely(tso_fragment(sk, skb, limit, mss_now)))
+                       break;
+
                 TCP_SKB_CB(skb)->when = tcp_time_stamp;
  
                 if (unlikely(tcp_transmit_skb(sk, skb_clone(skb, GFP_ATOMIC))))
@@ -1026,27 +1024,12 @@ static int tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle)
  
                 tcp_minshall_update(tp, mss_now, skb);
                 sent_pkts++;
-
-               /* Do not optimize this to use tso_segs. If we chopped up
-                * the packet above, tso_segs will no longer be valid.
-                */
-               cwnd_quota -= tcp_skb_pcount(skb);
-
-               BUG_ON(cwnd_quota < 0);
-               if (!cwnd_quota)
-                       break;
-
-               skb = sk->sk_send_head;
-               if (!skb)
-                       break;
-               tso_segs = tcp_init_tso_segs(sk, skb);
         }
  
         if (likely(sent_pkts)) {
                 tcp_cwnd_validate(sk, tp);
                 return 0;
         }
-out:
         return !tp->packets_out && sk->sk_send_head;
  }
  
@@ -1076,15 +1059,18 @@ void tcp_push_one(struct sock *sk, unsigned int mss_now)
  
         BUG_ON(!skb || skb->len < mss_now);
  
-       tso_segs = tcp_init_tso_segs(sk, skb);
+       tso_segs = tcp_init_tso_segs(sk, skb, mss_now);
         cwnd_quota = tcp_snd_test(sk, skb, mss_now, TCP_NAGLE_PUSH);
  
         if (likely(cwnd_quota)) {
+               unsigned int limit;
+
                 BUG_ON(!tso_segs);
  
+               limit = mss_now;
                 if (tso_segs > 1) {
-                       u32 limit = tcp_window_allows(tp, skb,
-                                                     mss_now, cwnd_quota);
+                       limit = tcp_window_allows(tp, skb,
+                                                 mss_now, cwnd_quota);
  
                         if (skb->len < limit) {
                                 unsigned int trim = skb->len % mss_now;
@@ -1092,15 +1078,12 @@ void tcp_push_one(struct sock *sk, unsigned int mss_now)
                                 if (trim)
                                         limit = skb->len - trim;
                         }
-                       if (skb->len > limit) {
-                               if (unlikely(tso_fragment(sk, skb, limit)))
-                                       return;
-                       }
-               } else if (unlikely(skb->len > mss_now)) {
-                       if (unlikely(tcp_fragment(sk, skb, mss_now)))
-                               return;
                 }
  
+               if (skb->len > limit &&
+                   unlikely(tso_fragment(sk, skb, limit, mss_now)))
+                       return;
+
                 /* Send it out now. */
                 TCP_SKB_CB(skb)->when = tcp_time_stamp;
  
@@ -1166,6 +1149,7 @@ void tcp_push_one(struct sock *sk, unsigned int mss_now)
   */
  u32 __tcp_select_window(struct sock *sk)
  {
+       struct inet_connection_sock *icsk = inet_csk(sk);
         struct tcp_sock *tp = tcp_sk(sk);
         /* MSS for the peer's data.  Previous verions used mss_clamp
          * here.  I don't know if the value based on our guesses
@@ -1173,7 +1157,7 @@ u32 __tcp_select_window(struct sock *sk)
          * but may be worse for the performance because of rcv_mss
          * fluctuations.  --SAW  1998/11/1
          */
-       int mss = tp->ack.rcv_mss;
+       int mss = icsk->icsk_ack.rcv_mss;
         int free_space = tcp_space(sk);
         int full_space = min_t(int, tp->window_clamp, tcp_full_space(sk));
         int window;
@@ -1182,7 +1166,7 @@ u32 __tcp_select_window(struct sock *sk)
                 mss = full_space; 
  
         if (free_space < full_space/2) {
-               tp->ack.quick = 0;
+               icsk->icsk_ack.quick = 0;
  
                 if (tcp_memory_pressure)
                         tp->rcv_ssthresh = min(tp->rcv_ssthresh, 4U*tp->advmss);
@@ -1257,7 +1241,7 @@ static void tcp_retrans_try_collapse(struct sock *sk, struct sk_buff *skb, int m
                        tcp_skb_pcount(next_skb) != 1);
  
                 /* Ok.  We will be able to collapse the packet. */
-               __skb_unlink(next_skb, next_skb->list);
+               __skb_unlink(next_skb, &sk->sk_write_queue);
  
                 memcpy(skb_put(skb, next_skb_size), next_skb->data, next_skb_size);
  
@@ -1305,6 +1289,7 @@ static void tcp_retrans_try_collapse(struct sock *sk, struct sk_buff *skb, int m
   */ 
  void tcp_simple_retransmit(struct sock *sk)
  {
+       const struct inet_connection_sock *icsk = inet_csk(sk);
         struct tcp_sock *tp = tcp_sk(sk);
         struct sk_buff *skb;
         unsigned int mss = tcp_current_mss(sk, 0);
@@ -1335,12 +1320,12 @@ void tcp_simple_retransmit(struct sock *sk)
          * in network, but units changed and effective
          * cwnd/ssthresh really reduced now.
          */
-       if (tp->ca_state != TCP_CA_Loss) {
+       if (icsk->icsk_ca_state != TCP_CA_Loss) {
                 tp->high_seq = tp->snd_nxt;
-               tp->snd_ssthresh = tcp_current_ssthresh(tp);
+               tp->snd_ssthresh = tcp_current_ssthresh(sk);
                 tp->prior_ssthresh = 0;
                 tp->undo_marker = 0;
-               tcp_set_ca_state(tp, TCP_CA_Loss);
+               tcp_set_ca_state(sk, TCP_CA_Loss);
         }
         tcp_xmit_retransmit_queue(sk);
  }
@@ -1386,15 +1371,21 @@ int tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb)
  
         if (skb->len > cur_mss) {
                 int old_factor = tcp_skb_pcount(skb);
-               int new_factor;
+               int diff;
  
-               if (tcp_fragment(sk, skb, cur_mss))
+               if (tcp_fragment(sk, skb, cur_mss, cur_mss))
                         return -ENOMEM; /* We'll try again later. */
  
                 /* New SKB created, account for it. */
-               new_factor = tcp_skb_pcount(skb);
-               tp->packets_out -= old_factor - new_factor;
-               tp->packets_out += tcp_skb_pcount(skb->next);
+               diff = old_factor - tcp_skb_pcount(skb) -
+                      tcp_skb_pcount(skb->next);
+               tp->packets_out -= diff;
+
+               if (diff > 0) {
+                       tp->fackets_out -= diff;
+                       if ((int)tp->fackets_out < 0)
+                               tp->fackets_out = 0;
+               }
         }
  
         /* Collapse two adjacent packets if worthwhile and we can. */
@@ -1474,6 +1465,7 @@ int tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb)
   */
  void tcp_xmit_retransmit_queue(struct sock *sk)
  {
+       const struct inet_connection_sock *icsk = inet_csk(sk);
         struct tcp_sock *tp = tcp_sk(sk);
         struct sk_buff *skb;
         int packet_cnt = tp->lost_out;
@@ -1497,14 +1489,16 @@ void tcp_xmit_retransmit_queue(struct sock *sk)
                                 if (!(sacked&(TCPCB_SACKED_ACKED|TCPCB_SACKED_RETRANS))) {
                                         if (tcp_retransmit_skb(sk, skb))
                                                 return;
-                                       if (tp->ca_state != TCP_CA_Loss)
+                                       if (icsk->icsk_ca_state != TCP_CA_Loss)
                                                 NET_INC_STATS_BH(LINUX_MIB_TCPFASTRETRANS);
                                         else
                                                 NET_INC_STATS_BH(LINUX_MIB_TCPSLOWSTARTRETRANS);
  
                                         if (skb ==
                                             skb_peek(&sk->sk_write_queue))
-                                               tcp_reset_xmit_timer(sk, TCP_TIME_RETRANS, tp->rto);
+                                               inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS,
+                                                                         inet_csk(sk)->icsk_rto,
+                                                                         TCP_RTO_MAX);
                                 }
  
                                 packet_cnt -= tcp_skb_pcount(skb);
@@ -1517,7 +1511,7 @@ void tcp_xmit_retransmit_queue(struct sock *sk)
         /* OK, demanded retransmission is finished. */
  
         /* Forward retransmissions are possible only during Recovery. */
-       if (tp->ca_state != TCP_CA_Recovery)
+       if (icsk->icsk_ca_state != TCP_CA_Recovery)
                 return;
  
         /* No forward retransmissions in Reno are possible. */
@@ -1557,7 +1551,9 @@ void tcp_xmit_retransmit_queue(struct sock *sk)
                         break;
  
                 if (skb == skb_peek(&sk->sk_write_queue))
-                       tcp_reset_xmit_timer(sk, TCP_TIME_RETRANS, tp->rto);
+                       inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS,
+                                                 inet_csk(sk)->icsk_rto,
+                                                 TCP_RTO_MAX);
  
                 NET_INC_STATS_BH(LINUX_MIB_TCPFORWARDRETRANS);
         }
@@ -1586,7 +1582,7 @@ void tcp_send_fin(struct sock *sk)
         } else {
                 /* Socket is locked, keep trying until memory is available. */
                 for (;;) {
-                       skb = alloc_skb(MAX_TCP_HEADER, GFP_KERNEL);
+                       skb = alloc_skb_fclone(MAX_TCP_HEADER, GFP_KERNEL);
                         if (skb)
                                 break;
                         yield();
@@ -1793,8 +1789,8 @@ static inline void tcp_connect_init(struct sock *sk)
         tp->rcv_wup = 0;
         tp->copied_seq = 0;
  
-       tp->rto = TCP_TIMEOUT_INIT;
-       tp->retransmits = 0;
+       inet_csk(sk)->icsk_rto = TCP_TIMEOUT_INIT;
+       inet_csk(sk)->icsk_retransmits = 0;
         tcp_clear_retrans(tp);
  }
  
@@ -1808,7 +1804,7 @@ int tcp_connect(struct sock *sk)
  
         tcp_connect_init(sk);
  
-       buff = alloc_skb(MAX_TCP_HEADER + 15, sk->sk_allocation);
+       buff = alloc_skb_fclone(MAX_TCP_HEADER + 15, sk->sk_allocation);
         if (unlikely(buff == NULL))
                 return -ENOBUFS;
  
@@ -1837,7 +1833,8 @@ int tcp_connect(struct sock *sk)
         TCP_INC_STATS(TCP_MIB_ACTIVEOPENS);
  
         /* Timer for repeating the SYN until an answer. */
-       tcp_reset_xmit_timer(sk, TCP_TIME_RETRANS, tp->rto);
+       inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS,
+                                 inet_csk(sk)->icsk_rto, TCP_RTO_MAX);
         return 0;
  }
  
@@ -1847,20 +1844,21 @@ int tcp_connect(struct sock *sk)
   */
  void tcp_send_delayed_ack(struct sock *sk)
  {
-       struct tcp_sock *tp = tcp_sk(sk);
-       int ato = tp->ack.ato;
+       struct inet_connection_sock *icsk = inet_csk(sk);
+       int ato = icsk->icsk_ack.ato;
         unsigned long timeout;
  
         if (ato > TCP_DELACK_MIN) {
+               const struct tcp_sock *tp = tcp_sk(sk);
                 int max_ato = HZ/2;
  
-               if (tp->ack.pingpong || (tp->ack.pending&TCP_ACK_PUSHED))
+               if (icsk->icsk_ack.pingpong || (icsk->icsk_ack.pending & ICSK_ACK_PUSHED))
                         max_ato = TCP_DELACK_MAX;
  
                 /* Slow path, intersegment interval is "high". */
  
                 /* If some rtt estimate is known, use it to bound delayed ack.
-                * Do not use tp->rto here, use results of rtt measurements
+                * Do not use inet_csk(sk)->icsk_rto here, use results of rtt measurements
                  * directly.
                  */
                 if (tp->srtt) {
@@ -1877,21 +1875,22 @@ void tcp_send_delayed_ack(struct sock *sk)
         timeout = jiffies + ato;
  
         /* Use new timeout only if there wasn't a older one earlier. */
-       if (tp->ack.pending&TCP_ACK_TIMER) {
+       if (icsk->icsk_ack.pending & ICSK_ACK_TIMER) {
                 /* If delack timer was blocked or is about to expire,
                  * send ACK now.
                  */
-               if (tp->ack.blocked || time_before_eq(tp->ack.timeout, jiffies+(ato>>2))) {
+               if (icsk->icsk_ack.blocked ||
+                   time_before_eq(icsk->icsk_ack.timeout, jiffies + (ato >> 2))) {
                         tcp_send_ack(sk);
                         return;
                 }
  
-               if (!time_before(timeout, tp->ack.timeout))
-                       timeout = tp->ack.timeout;
+               if (!time_before(timeout, icsk->icsk_ack.timeout))
+                       timeout = icsk->icsk_ack.timeout;
         }
-       tp->ack.pending |= TCP_ACK_SCHED|TCP_ACK_TIMER;
-       tp->ack.timeout = timeout;
-       sk_reset_timer(sk, &tp->delack_timer, timeout);
+       icsk->icsk_ack.pending |= ICSK_ACK_SCHED | ICSK_ACK_TIMER;
+       icsk->icsk_ack.timeout = timeout;
+       sk_reset_timer(sk, &icsk->icsk_delack_timer, timeout);
  }
  
  /* This routine sends an ack and also updates the window. */
@@ -1908,9 +1907,10 @@ void tcp_send_ack(struct sock *sk)
                  */
                 buff = alloc_skb(MAX_TCP_HEADER, GFP_ATOMIC);
                 if (buff == NULL) {
-                       tcp_schedule_ack(tp);
-                       tp->ack.ato = TCP_ATO_MIN;
-                       tcp_reset_xmit_timer(sk, TCP_TIME_DACK, TCP_DELACK_MAX);
+                       inet_csk_schedule_ack(sk);
+                       inet_csk(sk)->icsk_ack.ato = TCP_ATO_MIN;
+                       inet_csk_reset_xmit_timer(sk, ICSK_TIME_DACK,
+                                                 TCP_DELACK_MAX, TCP_RTO_MAX);
                         return;
                 }
  
@@ -1991,7 +1991,7 @@ int tcp_write_wakeup(struct sock *sk)
                             skb->len > mss) {
                                 seg_size = min(seg_size, mss);
                                 TCP_SKB_CB(skb)->flags |= TCPCB_FLAG_PSH;
-                               if (tcp_fragment(sk, skb, seg_size))
+                               if (tcp_fragment(sk, skb, seg_size, mss))
                                         return -1;
                                 /* SWS override triggered forced fragmentation.
                                  * Disable TSO, the connection is too sick. */
@@ -2000,7 +2000,7 @@ int tcp_write_wakeup(struct sock *sk)
                                         sk->sk_route_caps &= ~NETIF_F_TSO;
                                 }
                         } else if (!tcp_skb_pcount(skb))
-                               tcp_set_skb_tso_segs(sk, skb);
+                               tcp_set_skb_tso_segs(sk, skb, mss);
  
                         TCP_SKB_CB(skb)->flags |= TCPCB_FLAG_PSH;
                         TCP_SKB_CB(skb)->when = tcp_time_stamp;
@@ -2024,6 +2024,7 @@ int tcp_write_wakeup(struct sock *sk)
   */
  void tcp_send_probe0(struct sock *sk)
  {
+       struct inet_connection_sock *icsk = inet_csk(sk);
         struct tcp_sock *tp = tcp_sk(sk);
         int err;
  
@@ -2031,28 +2032,31 @@ void tcp_send_probe0(struct sock *sk)
  
         if (tp->packets_out || !sk->sk_send_head) {
                 /* Cancel probe timer, if it is not required. */
-               tp->probes_out = 0;
-               tp->backoff = 0;
+               icsk->icsk_probes_out = 0;
+               icsk->icsk_backoff = 0;
                 return;
         }
  
         if (err <= 0) {
-               if (tp->backoff < sysctl_tcp_retries2)
-                       tp->backoff++;
-               tp->probes_out++;
-               tcp_reset_xmit_timer (sk, TCP_TIME_PROBE0, 
-                                     min(tp->rto << tp->backoff, TCP_RTO_MAX));
+               if (icsk->icsk_backoff < sysctl_tcp_retries2)
+                       icsk->icsk_backoff++;
+               icsk->icsk_probes_out++;
+               inet_csk_reset_xmit_timer(sk, ICSK_TIME_PROBE0, 
+                                         min(icsk->icsk_rto << icsk->icsk_backoff, TCP_RTO_MAX),
+                                         TCP_RTO_MAX);
         } else {
                 /* If packet was not sent due to local congestion,
-                * do not backoff and do not remember probes_out.
+                * do not backoff and do not remember icsk_probes_out.
                  * Let local senders to fight for local resources.
                  *
                  * Use accumulated backoff yet.
                  */
-               if (!tp->probes_out)
-                       tp->probes_out=1;
-               tcp_reset_xmit_timer (sk, TCP_TIME_PROBE0, 
-                                     min(tp->rto << tp->backoff, TCP_RESOURCE_PROBE_INTERVAL));
+               if (!icsk->icsk_probes_out)
+                       icsk->icsk_probes_out = 1;
+               inet_csk_reset_xmit_timer(sk, ICSK_TIME_PROBE0, 
+                                         min(icsk->icsk_rto << icsk->icsk_backoff,
+                                             TCP_RESOURCE_PROBE_INTERVAL),
+                                         TCP_RTO_MAX);
         }
  }
  
diff --git a/net/ipv4/tcp_scalable.c b/net/ipv4/tcp_scalable.c

index 70e108e15c71e42b5992f9ba01964d99ca70c490..327770bf552230211f2db5d799183fa08dd7e440 100644 (file)
--- a/net/ipv4/tcp_scalable.c
+++ b/net/ipv4/tcp_scalable.c
@@ -16,9 +16,10 @@
  #define TCP_SCALABLE_AI_CNT    50U
  #define TCP_SCALABLE_MD_SCALE  3
  
-static void tcp_scalable_cong_avoid(struct tcp_sock *tp, u32 ack, u32 rtt,
+static void tcp_scalable_cong_avoid(struct sock *sk, u32 ack, u32 rtt,
                                     u32 in_flight, int flag)
  {
+       struct tcp_sock *tp = tcp_sk(sk);
         if (in_flight < tp->snd_cwnd)
                 return;
  
@@ -35,8 +36,9 @@ static void tcp_scalable_cong_avoid(struct tcp_sock *tp, u32 ack, u32 rtt,
         tp->snd_cwnd_stamp = tcp_time_stamp;
  }
  
-static u32 tcp_scalable_ssthresh(struct tcp_sock *tp)
+static u32 tcp_scalable_ssthresh(struct sock *sk)
  {
+       const struct tcp_sock *tp = tcp_sk(sk);
         return max(tp->snd_cwnd - (tp->snd_cwnd>>TCP_SCALABLE_MD_SCALE), 2U);
  }
  
diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c

index 0084227438c28d26bc2d089b1facc4675310f741..415ee47ac1c55f24c7d58a8327b089c6925c34a3 100644 (file)
--- a/net/ipv4/tcp_timer.c
+++ b/net/ipv4/tcp_timer.c
@@ -36,49 +36,13 @@ static void tcp_write_timer(unsigned long);
  static void tcp_delack_timer(unsigned long);
  static void tcp_keepalive_timer (unsigned long data);
  
-#ifdef TCP_DEBUG
-const char tcp_timer_bug_msg[] = KERN_DEBUG "tcpbug: unknown timer value\n";
-EXPORT_SYMBOL(tcp_timer_bug_msg);
-#endif
-
-/*
- * Using different timers for retransmit, delayed acks and probes
- * We may wish use just one timer maintaining a list of expire jiffies 
- * to optimize.
- */
-
  void tcp_init_xmit_timers(struct sock *sk)
  {
-       struct tcp_sock *tp = tcp_sk(sk);
-
-       init_timer(&tp->retransmit_timer);
-       tp->retransmit_timer.function=&tcp_write_timer;
-       tp->retransmit_timer.data = (unsigned long) sk;
-       tp->pending = 0;
-
-       init_timer(&tp->delack_timer);
-       tp->delack_timer.function=&tcp_delack_timer;
-       tp->delack_timer.data = (unsigned long) sk;
-       tp->ack.pending = 0;
-
-       init_timer(&sk->sk_timer);
-       sk->sk_timer.function   = &tcp_keepalive_timer;
-       sk->sk_timer.data       = (unsigned long)sk;
+       inet_csk_init_xmit_timers(sk, &tcp_write_timer, &tcp_delack_timer,
+                                 &tcp_keepalive_timer);
  }
  
-void tcp_clear_xmit_timers(struct sock *sk)
-{
-       struct tcp_sock *tp = tcp_sk(sk);
-
-       tp->pending = 0;
-       sk_stop_timer(sk, &tp->retransmit_timer);
-
-       tp->ack.pending = 0;
-       tp->ack.blocked = 0;
-       sk_stop_timer(sk, &tp->delack_timer);
-
-       sk_stop_timer(sk, &sk->sk_timer);
-}
+EXPORT_SYMBOL(tcp_init_xmit_timers);
  
  static void tcp_write_err(struct sock *sk)
  {
@@ -155,15 +119,15 @@ static int tcp_orphan_retries(struct sock *sk, int alive)
  /* A write timeout has occurred. Process the after effects. */
  static int tcp_write_timeout(struct sock *sk)
  {
-       struct tcp_sock *tp = tcp_sk(sk);
+       const struct inet_connection_sock *icsk = inet_csk(sk);
         int retry_until;
  
         if ((1 << sk->sk_state) & (TCPF_SYN_SENT | TCPF_SYN_RECV)) {
-               if (tp->retransmits)
+               if (icsk->icsk_retransmits)
                         dst_negative_advice(&sk->sk_dst_cache);
-               retry_until = tp->syn_retries ? : sysctl_tcp_syn_retries;
+               retry_until = icsk->icsk_syn_retries ? : sysctl_tcp_syn_retries;
         } else {
-               if (tp->retransmits >= sysctl_tcp_retries1) {
+               if (icsk->icsk_retransmits >= sysctl_tcp_retries1) {
                         /* NOTE. draft-ietf-tcpimpl-pmtud-01.txt requires pmtu black
                            hole detection. :-(
  
@@ -189,16 +153,16 @@ static int tcp_write_timeout(struct sock *sk)
  
                 retry_until = sysctl_tcp_retries2;
                 if (sock_flag(sk, SOCK_DEAD)) {
-                       int alive = (tp->rto < TCP_RTO_MAX);
+                       const int alive = (icsk->icsk_rto < TCP_RTO_MAX);
   
                         retry_until = tcp_orphan_retries(sk, alive);
  
-                       if (tcp_out_of_resources(sk, alive || tp->retransmits < retry_until))
+                       if (tcp_out_of_resources(sk, alive || icsk->icsk_retransmits < retry_until))
                                 return 1;
                 }
         }
  
-       if (tp->retransmits >= retry_until) {
+       if (icsk->icsk_retransmits >= retry_until) {
                 /* Has it gone just too far? */
                 tcp_write_err(sk);
                 return 1;
@@ -210,26 +174,27 @@ static void tcp_delack_timer(unsigned long data)
  {
         struct sock *sk = (struct sock*)data;
         struct tcp_sock *tp = tcp_sk(sk);
+       struct inet_connection_sock *icsk = inet_csk(sk);
  
         bh_lock_sock(sk);
         if (sock_owned_by_user(sk)) {
                 /* Try again later. */
-               tp->ack.blocked = 1;
+               icsk->icsk_ack.blocked = 1;
                 NET_INC_STATS_BH(LINUX_MIB_DELAYEDACKLOCKED);
-               sk_reset_timer(sk, &tp->delack_timer, jiffies + TCP_DELACK_MIN);
+               sk_reset_timer(sk, &icsk->icsk_delack_timer, jiffies + TCP_DELACK_MIN);
                 goto out_unlock;
         }
  
         sk_stream_mem_reclaim(sk);
  
-       if (sk->sk_state == TCP_CLOSE || !(tp->ack.pending & TCP_ACK_TIMER))
+       if (sk->sk_state == TCP_CLOSE || !(icsk->icsk_ack.pending & ICSK_ACK_TIMER))
                 goto out;
  
-       if (time_after(tp->ack.timeout, jiffies)) {
-               sk_reset_timer(sk, &tp->delack_timer, tp->ack.timeout);
+       if (time_after(icsk->icsk_ack.timeout, jiffies)) {
+               sk_reset_timer(sk, &icsk->icsk_delack_timer, icsk->icsk_ack.timeout);
                 goto out;
         }
-       tp->ack.pending &= ~TCP_ACK_TIMER;
+       icsk->icsk_ack.pending &= ~ICSK_ACK_TIMER;
  
         if (!skb_queue_empty(&tp->ucopy.prequeue)) {
                 struct sk_buff *skb;
@@ -242,16 +207,16 @@ static void tcp_delack_timer(unsigned long data)
                 tp->ucopy.memory = 0;
         }
  
-       if (tcp_ack_scheduled(tp)) {
-               if (!tp->ack.pingpong) {
+       if (inet_csk_ack_scheduled(sk)) {
+               if (!icsk->icsk_ack.pingpong) {
                         /* Delayed ACK missed: inflate ATO. */
-                       tp->ack.ato = min(tp->ack.ato << 1, tp->rto);
+                       icsk->icsk_ack.ato = min(icsk->icsk_ack.ato << 1, icsk->icsk_rto);
                 } else {
                         /* Delayed ACK missed: leave pingpong mode and
                          * deflate ATO.
                          */
-                       tp->ack.pingpong = 0;
-                       tp->ack.ato = TCP_ATO_MIN;
+                       icsk->icsk_ack.pingpong = 0;
+                       icsk->icsk_ack.ato      = TCP_ATO_MIN;
                 }
                 tcp_send_ack(sk);
                 NET_INC_STATS_BH(LINUX_MIB_DELAYEDACKS);
@@ -268,11 +233,12 @@ out_unlock:
  
  static void tcp_probe_timer(struct sock *sk)
  {
+       struct inet_connection_sock *icsk = inet_csk(sk);
         struct tcp_sock *tp = tcp_sk(sk);
         int max_probes;
  
         if (tp->packets_out || !sk->sk_send_head) {
-               tp->probes_out = 0;
+               icsk->icsk_probes_out = 0;
                 return;
         }
  
@@ -283,7 +249,7 @@ static void tcp_probe_timer(struct sock *sk)
          * FIXME: We ought not to do it, Solaris 2.5 actually has fixing
          * this behaviour in Solaris down as a bug fix. [AC]
          *
-        * Let me to explain. probes_out is zeroed by incoming ACKs
+        * Let me to explain. icsk_probes_out is zeroed by incoming ACKs
          * even if they advertise zero window. Hence, connection is killed only
          * if we received no ACKs for normal connection timeout. It is not killed
          * only because window stays zero for some time, window may be zero
@@ -294,15 +260,15 @@ static void tcp_probe_timer(struct sock *sk)
         max_probes = sysctl_tcp_retries2;
  
         if (sock_flag(sk, SOCK_DEAD)) {
-               int alive = ((tp->rto<<tp->backoff) < TCP_RTO_MAX);
+               const int alive = ((icsk->icsk_rto << icsk->icsk_backoff) < TCP_RTO_MAX);
   
                 max_probes = tcp_orphan_retries(sk, alive);
  
-               if (tcp_out_of_resources(sk, alive || tp->probes_out <= max_probes))
+               if (tcp_out_of_resources(sk, alive || icsk->icsk_probes_out <= max_probes))
                         return;
         }
  
-       if (tp->probes_out > max_probes) {
+       if (icsk->icsk_probes_out > max_probes) {
                 tcp_write_err(sk);
         } else {
                 /* Only send another probe if we didn't close things up. */
@@ -317,6 +283,7 @@ static void tcp_probe_timer(struct sock *sk)
  static void tcp_retransmit_timer(struct sock *sk)
  {
         struct tcp_sock *tp = tcp_sk(sk);
+       struct inet_connection_sock *icsk = inet_csk(sk);
  
         if (!tp->packets_out)
                 goto out;
@@ -351,20 +318,21 @@ static void tcp_retransmit_timer(struct sock *sk)
         if (tcp_write_timeout(sk))
                 goto out;
  
-       if (tp->retransmits == 0) {
-               if (tp->ca_state == TCP_CA_Disorder || tp->ca_state == TCP_CA_Recovery) {
+       if (icsk->icsk_retransmits == 0) {
+               if (icsk->icsk_ca_state == TCP_CA_Disorder ||
+                   icsk->icsk_ca_state == TCP_CA_Recovery) {
                         if (tp->rx_opt.sack_ok) {
-                               if (tp->ca_state == TCP_CA_Recovery)
+                               if (icsk->icsk_ca_state == TCP_CA_Recovery)
                                         NET_INC_STATS_BH(LINUX_MIB_TCPSACKRECOVERYFAIL);
                                 else
                                         NET_INC_STATS_BH(LINUX_MIB_TCPSACKFAILURES);
                         } else {
-                               if (tp->ca_state == TCP_CA_Recovery)
+                               if (icsk->icsk_ca_state == TCP_CA_Recovery)
                                         NET_INC_STATS_BH(LINUX_MIB_TCPRENORECOVERYFAIL);
                                 else
                                         NET_INC_STATS_BH(LINUX_MIB_TCPRENOFAILURES);
                         }
-               } else if (tp->ca_state == TCP_CA_Loss) {
+               } else if (icsk->icsk_ca_state == TCP_CA_Loss) {
                         NET_INC_STATS_BH(LINUX_MIB_TCPLOSSFAILURES);
                 } else {
                         NET_INC_STATS_BH(LINUX_MIB_TCPTIMEOUTS);
@@ -381,10 +349,11 @@ static void tcp_retransmit_timer(struct sock *sk)
                 /* Retransmission failed because of local congestion,
                  * do not backoff.
                  */
-               if (!tp->retransmits)
-                       tp->retransmits=1;
-               tcp_reset_xmit_timer(sk, TCP_TIME_RETRANS,
-                                    min(tp->rto, TCP_RESOURCE_PROBE_INTERVAL));
+               if (!icsk->icsk_retransmits)
+                       icsk->icsk_retransmits = 1;
+               inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS,
+                                         min(icsk->icsk_rto, TCP_RESOURCE_PROBE_INTERVAL),
+                                         TCP_RTO_MAX);
                 goto out;
         }
  
@@ -403,13 +372,13 @@ static void tcp_retransmit_timer(struct sock *sk)
          * implemented ftp to mars will work nicely. We will have to fix
          * the 120 second clamps though!
          */
-       tp->backoff++;
-       tp->retransmits++;
+       icsk->icsk_backoff++;
+       icsk->icsk_retransmits++;
  
  out_reset_timer:
-       tp->rto = min(tp->rto << 1, TCP_RTO_MAX);
-       tcp_reset_xmit_timer(sk, TCP_TIME_RETRANS, tp->rto);
-       if (tp->retransmits > sysctl_tcp_retries1)
+       icsk->icsk_rto = min(icsk->icsk_rto << 1, TCP_RTO_MAX);
+       inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS, icsk->icsk_rto, TCP_RTO_MAX);
+       if (icsk->icsk_retransmits > sysctl_tcp_retries1)
                 __sk_dst_reset(sk);
  
  out:;
@@ -418,32 +387,32 @@ out:;
  static void tcp_write_timer(unsigned long data)
  {
         struct sock *sk = (struct sock*)data;
-       struct tcp_sock *tp = tcp_sk(sk);
+       struct inet_connection_sock *icsk = inet_csk(sk);
         int event;
  
         bh_lock_sock(sk);
         if (sock_owned_by_user(sk)) {
                 /* Try again later */
-               sk_reset_timer(sk, &tp->retransmit_timer, jiffies + (HZ / 20));
+               sk_reset_timer(sk, &icsk->icsk_retransmit_timer, jiffies + (HZ / 20));
                 goto out_unlock;
         }
  
-       if (sk->sk_state == TCP_CLOSE || !tp->pending)
+       if (sk->sk_state == TCP_CLOSE || !icsk->icsk_pending)
                 goto out;
  
-       if (time_after(tp->timeout, jiffies)) {
-               sk_reset_timer(sk, &tp->retransmit_timer, tp->timeout);
+       if (time_after(icsk->icsk_timeout, jiffies)) {
+               sk_reset_timer(sk, &icsk->icsk_retransmit_timer, icsk->icsk_timeout);
                 goto out;
         }
  
-       event = tp->pending;
-       tp->pending = 0;
+       event = icsk->icsk_pending;
+       icsk->icsk_pending = 0;
  
         switch (event) {
-       case TCP_TIME_RETRANS:
+       case ICSK_TIME_RETRANS:
                 tcp_retransmit_timer(sk);
                 break;
-       case TCP_TIME_PROBE0:
+       case ICSK_TIME_PROBE0:
                 tcp_probe_timer(sk);
                 break;
         }
@@ -462,96 +431,8 @@ out_unlock:
  
  static void tcp_synack_timer(struct sock *sk)
  {
-       struct tcp_sock *tp = tcp_sk(sk);
-       struct listen_sock *lopt = tp->accept_queue.listen_opt;
-       int max_retries = tp->syn_retries ? : sysctl_tcp_synack_retries;
-       int thresh = max_retries;
-       unsigned long now = jiffies;
-       struct request_sock **reqp, *req;
-       int i, budget;
-
-       if (lopt == NULL || lopt->qlen == 0)
-               return;
-
-       /* Normally all the openreqs are young and become mature
-        * (i.e. converted to established socket) for first timeout.
-        * If synack was not acknowledged for 3 seconds, it means
-        * one of the following things: synack was lost, ack was lost,
-        * rtt is high or nobody planned to ack (i.e. synflood).
-        * When server is a bit loaded, queue is populated with old
-        * open requests, reducing effective size of queue.
-        * When server is well loaded, queue size reduces to zero
-        * after several minutes of work. It is not synflood,
-        * it is normal operation. The solution is pruning
-        * too old entries overriding normal timeout, when
-        * situation becomes dangerous.
-        *
-        * Essentially, we reserve half of room for young
-        * embrions; and abort old ones without pity, if old
-        * ones are about to clog our table.
-        */
-       if (lopt->qlen>>(lopt->max_qlen_log-1)) {
-               int young = (lopt->qlen_young<<1);
-
-               while (thresh > 2) {
-                       if (lopt->qlen < young)
-                               break;
-                       thresh--;
-                       young <<= 1;
-               }
-       }
-
-       if (tp->defer_accept)
-               max_retries = tp->defer_accept;
-
-       budget = 2*(TCP_SYNQ_HSIZE/(TCP_TIMEOUT_INIT/TCP_SYNQ_INTERVAL));
-       i = lopt->clock_hand;
-
-       do {
-               reqp=&lopt->syn_table[i];
-               while ((req = *reqp) != NULL) {
-                       if (time_after_eq(now, req->expires)) {
-                               if ((req->retrans < thresh ||
-                                    (inet_rsk(req)->acked && req->retrans < max_retries))
-                                   && !req->rsk_ops->rtx_syn_ack(sk, req, NULL)) {
-                                       unsigned long timeo;
-
-                                       if (req->retrans++ == 0)
-                                               lopt->qlen_young--;
-                                       timeo = min((TCP_TIMEOUT_INIT << req->retrans),
-                                                   TCP_RTO_MAX);
-                                       req->expires = now + timeo;
-                                       reqp = &req->dl_next;
-                                       continue;
-                               }
-
-                               /* Drop this request */
-                               tcp_synq_unlink(tp, req, reqp);
-                               reqsk_queue_removed(&tp->accept_queue, req);
-                               reqsk_free(req);
-                               continue;
-                       }
-                       reqp = &req->dl_next;
-               }
-
-               i = (i+1)&(TCP_SYNQ_HSIZE-1);
-
-       } while (--budget > 0);
-
-       lopt->clock_hand = i;
-
-       if (lopt->qlen)
-               tcp_reset_keepalive_timer(sk, TCP_SYNQ_INTERVAL);
-}
-
-void tcp_delete_keepalive_timer (struct sock *sk)
-{
-       sk_stop_timer(sk, &sk->sk_timer);
-}
-
-void tcp_reset_keepalive_timer (struct sock *sk, unsigned long len)
-{
-       sk_reset_timer(sk, &sk->sk_timer, jiffies + len);
+       inet_csk_reqsk_queue_prune(sk, TCP_SYNQ_INTERVAL,
+                                  TCP_TIMEOUT_INIT, TCP_RTO_MAX);
  }
  
  void tcp_set_keepalive(struct sock *sk, int val)
@@ -560,15 +441,16 @@ void tcp_set_keepalive(struct sock *sk, int val)
                 return;
  
         if (val && !sock_flag(sk, SOCK_KEEPOPEN))
-               tcp_reset_keepalive_timer(sk, keepalive_time_when(tcp_sk(sk)));
+               inet_csk_reset_keepalive_timer(sk, keepalive_time_when(tcp_sk(sk)));
         else if (!val)
-               tcp_delete_keepalive_timer(sk);
+               inet_csk_delete_keepalive_timer(sk);
  }
  
  
  static void tcp_keepalive_timer (unsigned long data)
  {
         struct sock *sk = (struct sock *) data;
+       struct inet_connection_sock *icsk = inet_csk(sk);
         struct tcp_sock *tp = tcp_sk(sk);
         __u32 elapsed;
  
@@ -576,7 +458,7 @@ static void tcp_keepalive_timer (unsigned long data)
         bh_lock_sock(sk);
         if (sock_owned_by_user(sk)) {
                 /* Try again later. */ 
-               tcp_reset_keepalive_timer (sk, HZ/20);
+               inet_csk_reset_keepalive_timer (sk, HZ/20);
                 goto out;
         }
  
@@ -587,7 +469,7 @@ static void tcp_keepalive_timer (unsigned long data)
  
         if (sk->sk_state == TCP_FIN_WAIT2 && sock_flag(sk, SOCK_DEAD)) {
                 if (tp->linger2 >= 0) {
-                       int tmo = tcp_fin_time(tp) - TCP_TIMEWAIT_LEN;
+                       const int tmo = tcp_fin_time(sk) - TCP_TIMEWAIT_LEN;
  
                         if (tmo > 0) {
                                 tcp_time_wait(sk, TCP_FIN_WAIT2, tmo);
@@ -610,14 +492,14 @@ static void tcp_keepalive_timer (unsigned long data)
         elapsed = tcp_time_stamp - tp->rcv_tstamp;
  
         if (elapsed >= keepalive_time_when(tp)) {
-               if ((!tp->keepalive_probes && tp->probes_out >= sysctl_tcp_keepalive_probes) ||
-                    (tp->keepalive_probes && tp->probes_out >= tp->keepalive_probes)) {
+               if ((!tp->keepalive_probes && icsk->icsk_probes_out >= sysctl_tcp_keepalive_probes) ||
+                    (tp->keepalive_probes && icsk->icsk_probes_out >= tp->keepalive_probes)) {
                         tcp_send_active_reset(sk, GFP_ATOMIC);
                         tcp_write_err(sk);
                         goto out;
                 }
                 if (tcp_write_wakeup(sk) <= 0) {
-                       tp->probes_out++;
+                       icsk->icsk_probes_out++;
                         elapsed = keepalive_intvl_when(tp);
                 } else {
                         /* If keepalive was lost due to local congestion,
@@ -634,7 +516,7 @@ static void tcp_keepalive_timer (unsigned long data)
         sk_stream_mem_reclaim(sk);
  
  resched:
-       tcp_reset_keepalive_timer (sk, elapsed);
+       inet_csk_reset_keepalive_timer (sk, elapsed);
         goto out;
  
  death: 
@@ -644,8 +526,3 @@ out:
         bh_unlock_sock(sk);
         sock_put(sk);
  }
-
-EXPORT_SYMBOL(tcp_clear_xmit_timers);
-EXPORT_SYMBOL(tcp_delete_keepalive_timer);
-EXPORT_SYMBOL(tcp_init_xmit_timers);
-EXPORT_SYMBOL(tcp_reset_keepalive_timer);
diff --git a/net/ipv4/tcp_vegas.c b/net/ipv4/tcp_vegas.c

index 9bd443db5193a82c69fee4796e386106f959b705..93c5f92070f9129bfd55667f9945fbaeb7cee194 100644 (file)
--- a/net/ipv4/tcp_vegas.c
+++ b/net/ipv4/tcp_vegas.c
@@ -35,7 +35,7 @@
  #include <linux/mm.h>
  #include <linux/module.h>
  #include <linux/skbuff.h>
-#include <linux/tcp_diag.h>
+#include <linux/inet_diag.h>
  
  #include <net/tcp.h>
  
@@ -82,9 +82,10 @@ struct vegas {
   * Instead we must wait until the completion of an RTT during
   * which we actually receive ACKs.
   */
-static inline void vegas_enable(struct tcp_sock *tp)
+static inline void vegas_enable(struct sock *sk)
  {
-       struct vegas *vegas = tcp_ca(tp);
+       const struct tcp_sock *tp = tcp_sk(sk);
+       struct vegas *vegas = inet_csk_ca(sk);
  
         /* Begin taking Vegas samples next time we send something. */
         vegas->doing_vegas_now = 1;
@@ -97,19 +98,19 @@ static inline void vegas_enable(struct tcp_sock *tp)
  }
  
  /* Stop taking Vegas samples for now. */
-static inline void vegas_disable(struct tcp_sock *tp)
+static inline void vegas_disable(struct sock *sk)
  {
-       struct vegas *vegas = tcp_ca(tp);
+       struct vegas *vegas = inet_csk_ca(sk);
  
         vegas->doing_vegas_now = 0;
  }
  
-static void tcp_vegas_init(struct tcp_sock *tp)
+static void tcp_vegas_init(struct sock *sk)
  {
-       struct vegas *vegas = tcp_ca(tp);
+       struct vegas *vegas = inet_csk_ca(sk);
  
         vegas->baseRTT = 0x7fffffff;
-       vegas_enable(tp);
+       vegas_enable(sk);
  }
  
  /* Do RTT sampling needed for Vegas.
@@ -120,9 +121,9 @@ static void tcp_vegas_init(struct tcp_sock *tp)
   *   o min-filter RTT samples from a much longer window (forever for now)
   *     to find the propagation delay (baseRTT)
   */
-static void tcp_vegas_rtt_calc(struct tcp_sock *tp, u32 usrtt)
+static void tcp_vegas_rtt_calc(struct sock *sk, u32 usrtt)
  {
-       struct vegas *vegas = tcp_ca(tp);
+       struct vegas *vegas = inet_csk_ca(sk);
         u32 vrtt = usrtt + 1; /* Never allow zero rtt or baseRTT */
  
         /* Filter to find propagation delay: */
@@ -136,13 +137,13 @@ static void tcp_vegas_rtt_calc(struct tcp_sock *tp, u32 usrtt)
         vegas->cntRTT++;
  }
  
-static void tcp_vegas_state(struct tcp_sock *tp, u8 ca_state)
+static void tcp_vegas_state(struct sock *sk, u8 ca_state)
  {
  
         if (ca_state == TCP_CA_Open)
-               vegas_enable(tp);
+               vegas_enable(sk);
         else
-               vegas_disable(tp);
+               vegas_disable(sk);
  }
  
  /*
@@ -154,20 +155,21 @@ static void tcp_vegas_state(struct tcp_sock *tp, u8 ca_state)
   * packets, _then_ we can make Vegas calculations
   * again.
   */
-static void tcp_vegas_cwnd_event(struct tcp_sock *tp, enum tcp_ca_event event)
+static void tcp_vegas_cwnd_event(struct sock *sk, enum tcp_ca_event event)
  {
         if (event == CA_EVENT_CWND_RESTART ||
             event == CA_EVENT_TX_START)
-               tcp_vegas_init(tp);
+               tcp_vegas_init(sk);
  }
  
-static void tcp_vegas_cong_avoid(struct tcp_sock *tp, u32 ack,
+static void tcp_vegas_cong_avoid(struct sock *sk, u32 ack,
                                  u32 seq_rtt, u32 in_flight, int flag)
  {
-       struct vegas *vegas = tcp_ca(tp);
+       struct tcp_sock *tp = tcp_sk(sk);
+       struct vegas *vegas = inet_csk_ca(sk);
  
         if (!vegas->doing_vegas_now)
-               return tcp_reno_cong_avoid(tp, ack, seq_rtt, in_flight, flag);
+               return tcp_reno_cong_avoid(sk, ack, seq_rtt, in_flight, flag);
  
         /* The key players are v_beg_snd_una and v_beg_snd_nxt.
          *
@@ -219,7 +221,7 @@ static void tcp_vegas_cong_avoid(struct tcp_sock *tp, u32 ack,
                  * but that's not too awful, since we're taking the min,
                  * rather than averaging.
                  */
-               tcp_vegas_rtt_calc(tp, seq_rtt*1000);
+               tcp_vegas_rtt_calc(sk, seq_rtt * 1000);
  
                 /* We do the Vegas calculations only if we got enough RTT
                  * samples that we can be reasonably sure that we got
@@ -359,14 +361,14 @@ static void tcp_vegas_cong_avoid(struct tcp_sock *tp, u32 ack,
  }
  
  /* Extract info for Tcp socket info provided via netlink. */
-static void tcp_vegas_get_info(struct tcp_sock *tp, u32 ext,
+static void tcp_vegas_get_info(struct sock *sk, u32 ext,
                                struct sk_buff *skb)
  {
-       const struct vegas *ca = tcp_ca(tp);
-       if (ext & (1<<(TCPDIAG_VEGASINFO-1))) {
+       const struct vegas *ca = inet_csk_ca(sk);
+       if (ext & (1 << (INET_DIAG_VEGASINFO - 1))) {
                 struct tcpvegas_info *info;
  
-               info = RTA_DATA(__RTA_PUT(skb, TCPDIAG_VEGASINFO,
+               info = RTA_DATA(__RTA_PUT(skb, INET_DIAG_VEGASINFO,
                                           sizeof(*info)));
  
                 info->tcpv_enabled = ca->doing_vegas_now;
@@ -393,7 +395,7 @@ static struct tcp_congestion_ops tcp_vegas = {
  
  static int __init tcp_vegas_register(void)
  {
-       BUG_ON(sizeof(struct vegas) > TCP_CA_PRIV_SIZE);
+       BUG_ON(sizeof(struct vegas) > ICSK_CA_PRIV_SIZE);
         tcp_register_congestion_control(&tcp_vegas);
         return 0;
  }
diff --git a/net/ipv4/tcp_westwood.c b/net/ipv4/tcp_westwood.c

index ef827242c940ec4271746111159bf8fb34441968..0c340c3756c2e06082f9cd400a53498614239fae 100644 (file)
--- a/net/ipv4/tcp_westwood.c
+++ b/net/ipv4/tcp_westwood.c
@@ -8,7 +8,7 @@
  #include <linux/mm.h>
  #include <linux/module.h>
  #include <linux/skbuff.h>
-#include <linux/tcp_diag.h>
+#include <linux/inet_diag.h>
  #include <net/tcp.h>
  
  /* TCP Westwood structure */
@@ -40,9 +40,9 @@ struct westwood {
   * way as soon as possible. It will reasonably happen within the first
   * RTT period of the connection lifetime.
   */
-static void tcp_westwood_init(struct tcp_sock *tp)
+static void tcp_westwood_init(struct sock *sk)
  {
-       struct westwood *w = tcp_ca(tp);
+       struct westwood *w = inet_csk_ca(sk);
  
         w->bk = 0;
          w->bw_ns_est = 0;
@@ -51,7 +51,7 @@ static void tcp_westwood_init(struct tcp_sock *tp)
          w->cumul_ack = 0;
         w->rtt_min = w->rtt = TCP_WESTWOOD_INIT_RTT;
         w->rtt_win_sx = tcp_time_stamp;
-       w->snd_una = tp->snd_una;
+       w->snd_una = tcp_sk(sk)->snd_una;
  }
  
  /*
@@ -74,11 +74,11 @@ static inline void westwood_filter(struct westwood *w, u32 delta)
   * Called after processing group of packets.
   * but all westwood needs is the last sample of srtt.
   */
-static void tcp_westwood_pkts_acked(struct tcp_sock *tp, u32 cnt)
+static void tcp_westwood_pkts_acked(struct sock *sk, u32 cnt)
  {
-       struct westwood *w = tcp_ca(tp);
+       struct westwood *w = inet_csk_ca(sk);
         if (cnt > 0)
-               w->rtt = tp->srtt >> 3;
+               w->rtt = tcp_sk(sk)->srtt >> 3;
  }
  
  /*
@@ -86,9 +86,9 @@ static void tcp_westwood_pkts_acked(struct tcp_sock *tp, u32 cnt)
   * It updates RTT evaluation window if it is the right moment to do
   * it. If so it calls filter for evaluating bandwidth.
   */
-static void westwood_update_window(struct tcp_sock *tp)
+static void westwood_update_window(struct sock *sk)
  {
-       struct westwood *w = tcp_ca(tp);
+       struct westwood *w = inet_csk_ca(sk);
         s32 delta = tcp_time_stamp - w->rtt_win_sx;
  
         /*
@@ -114,11 +114,12 @@ static void westwood_update_window(struct tcp_sock *tp)
   * header prediction is successful. In such case in fact update is
   * straight forward and doesn't need any particular care.
   */
-static inline void westwood_fast_bw(struct tcp_sock *tp)
+static inline void westwood_fast_bw(struct sock *sk)
  {
-       struct westwood *w = tcp_ca(tp);
+       const struct tcp_sock *tp = tcp_sk(sk);
+       struct westwood *w = inet_csk_ca(sk);
  
-       westwood_update_window(tp);
+       westwood_update_window(sk);
  
         w->bk += tp->snd_una - w->snd_una;
         w->snd_una = tp->snd_una;
@@ -130,9 +131,10 @@ static inline void westwood_fast_bw(struct tcp_sock *tp)
   * This function evaluates cumul_ack for evaluating bk in case of
   * delayed or partial acks.
   */
-static inline u32 westwood_acked_count(struct tcp_sock *tp)
+static inline u32 westwood_acked_count(struct sock *sk)
  {
-       struct westwood *w = tcp_ca(tp);
+       const struct tcp_sock *tp = tcp_sk(sk);
+       struct westwood *w = inet_csk_ca(sk);
  
         w->cumul_ack = tp->snd_una - w->snd_una;
  
@@ -160,9 +162,10 @@ static inline u32 westwood_acked_count(struct tcp_sock *tp)
         return w->cumul_ack;
  }
  
-static inline u32 westwood_bw_rttmin(const struct tcp_sock *tp)
+static inline u32 westwood_bw_rttmin(const struct sock *sk)
  {
-       struct westwood *w = tcp_ca(tp);
+       const struct tcp_sock *tp = tcp_sk(sk);
+       const struct westwood *w = inet_csk_ca(sk);
         return max_t(u32, (w->bw_est * w->rtt_min) / tp->mss_cache, 2);
  }
  
@@ -172,31 +175,32 @@ static inline u32 westwood_bw_rttmin(const struct tcp_sock *tp)
   * in packets we use mss_cache). Rttmin is guaranteed to be >= 2
   * so avoids ever returning 0.
   */
-static u32 tcp_westwood_cwnd_min(struct tcp_sock *tp)
+static u32 tcp_westwood_cwnd_min(struct sock *sk)
  {
-       return westwood_bw_rttmin(tp);
+       return westwood_bw_rttmin(sk);
  }
  
-static void tcp_westwood_event(struct tcp_sock *tp, enum tcp_ca_event event)
+static void tcp_westwood_event(struct sock *sk, enum tcp_ca_event event)
  {
-       struct westwood *w = tcp_ca(tp);
+       struct tcp_sock *tp = tcp_sk(sk);
+       struct westwood *w = inet_csk_ca(sk);
  
         switch(event) {
         case CA_EVENT_FAST_ACK:
-               westwood_fast_bw(tp);
+               westwood_fast_bw(sk);
                 break;
  
         case CA_EVENT_COMPLETE_CWR:
-               tp->snd_cwnd = tp->snd_ssthresh = westwood_bw_rttmin(tp);
+               tp->snd_cwnd = tp->snd_ssthresh = westwood_bw_rttmin(sk);
                 break;
  
         case CA_EVENT_FRTO:
-               tp->snd_ssthresh = westwood_bw_rttmin(tp);
+               tp->snd_ssthresh = westwood_bw_rttmin(sk);
                 break;
  
         case CA_EVENT_SLOW_ACK:
-               westwood_update_window(tp);
-               w->bk += westwood_acked_count(tp);
+               westwood_update_window(sk);
+               w->bk += westwood_acked_count(sk);
                 w->rtt_min = min(w->rtt, w->rtt_min);
                 break;
  
@@ -208,15 +212,15 @@ static void tcp_westwood_event(struct tcp_sock *tp, enum tcp_ca_event event)
  
  
  /* Extract info for Tcp socket info provided via netlink. */
-static void tcp_westwood_info(struct tcp_sock *tp, u32 ext,
+static void tcp_westwood_info(struct sock *sk, u32 ext,
                               struct sk_buff *skb)
  {
-       const struct westwood *ca = tcp_ca(tp);
-       if (ext & (1<<(TCPDIAG_VEGASINFO-1))) {
+       const struct westwood *ca = inet_csk_ca(sk);
+       if (ext & (1 << (INET_DIAG_VEGASINFO - 1))) {
                 struct rtattr *rta;
                 struct tcpvegas_info *info;
  
-               rta = __RTA_PUT(skb, TCPDIAG_VEGASINFO, sizeof(*info));
+               rta = __RTA_PUT(skb, INET_DIAG_VEGASINFO, sizeof(*info));
                 info = RTA_DATA(rta);
                 info->tcpv_enabled = 1;
                 info->tcpv_rttcnt = 0;
@@ -242,7 +246,7 @@ static struct tcp_congestion_ops tcp_westwood = {
  
  static int __init tcp_westwood_register(void)
  {
-       BUG_ON(sizeof(struct westwood) > TCP_CA_PRIV_SIZE);
+       BUG_ON(sizeof(struct westwood) > ICSK_CA_PRIV_SIZE);
         return tcp_register_congestion_control(&tcp_westwood);
  }
  
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c

index 7c24e64b443f80eae0c5a3663caff46f37011f69..e5beca7de86c4b67e7455172ac66f6a7ad54ad49 100644 (file)
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -95,7 +95,8 @@
  #include <linux/ipv6.h>
  #include <linux/netdevice.h>
  #include <net/snmp.h>
-#include <net/tcp.h>
+#include <net/ip.h>
+#include <net/tcp_states.h>
  #include <net/protocol.h>
  #include <linux/skbuff.h>
  #include <linux/proc_fs.h>
@@ -112,7 +113,7 @@
   *     Snmp MIB for the UDP layer
   */
  
-DEFINE_SNMP_STAT(struct udp_mib, udp_statistics);
+DEFINE_SNMP_STAT(struct udp_mib, udp_statistics) __read_mostly;
  
  struct hlist_head udp_hash[UDP_HTABLE_SIZE];
  DEFINE_RWLOCK(udp_hash_lock);
@@ -628,7 +629,7 @@ back_from_confirm:
                 /* ... which is an evident application bug. --ANK */
                 release_sock(sk);
  
-               NETDEBUG(if (net_ratelimit()) printk(KERN_DEBUG "udp cork app bug 2\n"));
+               LIMIT_NETDEBUG(KERN_DEBUG "udp cork app bug 2\n");
                 err = -EINVAL;
                 goto out;
         }
@@ -693,7 +694,7 @@ static int udp_sendpage(struct sock *sk, struct page *page, int offset,
         if (unlikely(!up->pending)) {
                 release_sock(sk);
  
-               NETDEBUG(if (net_ratelimit()) printk(KERN_DEBUG "udp cork app bug 3\n"));
+               LIMIT_NETDEBUG(KERN_DEBUG "udp cork app bug 3\n");
                 return -EINVAL;
         }
  
@@ -1102,7 +1103,7 @@ static int udp_checksum_init(struct sk_buff *skb, struct udphdr *uh,
                 skb->ip_summed = CHECKSUM_UNNECESSARY;
                 if (!udp_check(uh, ulen, saddr, daddr, skb->csum))
                         return 0;
-               NETDEBUG(if (net_ratelimit()) printk(KERN_DEBUG "udp v4 hw csum failure.\n"));
+               LIMIT_NETDEBUG(KERN_DEBUG "udp v4 hw csum failure.\n");
                 skb->ip_summed = CHECKSUM_NONE;
         }
         if (skb->ip_summed != CHECKSUM_UNNECESSARY)
@@ -1181,14 +1182,13 @@ int udp_rcv(struct sk_buff *skb)
         return(0);
  
  short_packet:
-       NETDEBUG(if (net_ratelimit())
-               printk(KERN_DEBUG "UDP: short packet: From %u.%u.%u.%u:%u %d/%d to %u.%u.%u.%u:%u\n",
-                       NIPQUAD(saddr),
-                       ntohs(uh->source),
-                       ulen,
-                       len,
-                       NIPQUAD(daddr),
-                       ntohs(uh->dest)));
+       LIMIT_NETDEBUG(KERN_DEBUG "UDP: short packet: From %u.%u.%u.%u:%u %d/%d to %u.%u.%u.%u:%u\n",
+                      NIPQUAD(saddr),
+                      ntohs(uh->source),
+                      ulen,
+                      len,
+                      NIPQUAD(daddr),
+                      ntohs(uh->dest));
  no_header:
         UDP_INC_STATS_BH(UDP_MIB_INERRORS);
         kfree_skb(skb);
@@ -1199,13 +1199,12 @@ csum_error:
          * RFC1122: OK.  Discards the bad packet silently (as far as 
          * the network is concerned, anyway) as per 4.1.3.4 (MUST). 
          */
-       NETDEBUG(if (net_ratelimit())
-                printk(KERN_DEBUG "UDP: bad checksum. From %d.%d.%d.%d:%d to %d.%d.%d.%d:%d ulen %d\n",
-                       NIPQUAD(saddr),
-                       ntohs(uh->source),
-                       NIPQUAD(daddr),
-                       ntohs(uh->dest),
-                       ulen));
+       LIMIT_NETDEBUG(KERN_DEBUG "UDP: bad checksum. From %d.%d.%d.%d:%d to %d.%d.%d.%d:%d ulen %d\n",
+                      NIPQUAD(saddr),
+                      ntohs(uh->source),
+                      NIPQUAD(daddr),
+                      ntohs(uh->dest),
+                      ulen);
  drop:
         UDP_INC_STATS_BH(UDP_MIB_INERRORS);
         kfree_skb(skb);
diff --git a/net/ipv4/xfrm4_state.c b/net/ipv4/xfrm4_state.c

index 050611d7a9670d178c37cc6a835fd7503f0117b9..d23e07fc81facafe1c7a34e10c9a0d5fc75a4168 100644 (file)
--- a/net/ipv4/xfrm4_state.c
+++ b/net/ipv4/xfrm4_state.c
@@ -128,8 +128,10 @@ void __init xfrm4_state_init(void)
         xfrm_state_register_afinfo(&xfrm4_state_afinfo);
  }
  
+#if 0
  void __exit xfrm4_state_fini(void)
  {
         xfrm_state_unregister_afinfo(&xfrm4_state_afinfo);
  }
+#endif  /*  0  */
  
diff --git a/net/ipv6/Makefile b/net/ipv6/Makefile

index b39e04940590bd07ad7f311412505ff8201834bc..6460eec834b7b9f9ca354f0b91622698fc2d4cb8 100644 (file)
--- a/net/ipv6/Makefile
+++ b/net/ipv6/Makefile
@@ -8,7 +8,7 @@ ipv6-objs :=    af_inet6.o anycast.o ip6_output.o ip6_input.o addrconf.o sit.o \
                 route.o ip6_fib.o ipv6_sockglue.o ndisc.o udp.o raw.o \
                 protocol.o icmp.o mcast.o reassembly.o tcp_ipv6.o \
                 exthdrs.o sysctl_net_ipv6.o datagram.o proc.o \
-               ip6_flowlabel.o ipv6_syms.o
+               ip6_flowlabel.o ipv6_syms.o netfilter.o
  
  ipv6-$(CONFIG_XFRM) += xfrm6_policy.o xfrm6_state.o xfrm6_input.o \
         xfrm6_output.o
@@ -23,3 +23,5 @@ obj-$(CONFIG_NETFILTER)       += netfilter/
  obj-$(CONFIG_IPV6_TUNNEL) += ip6_tunnel.o
  
  obj-y += exthdrs_core.o
+
+obj-$(subst m,y,$(CONFIG_IPV6)) += inet6_hashtables.o
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c

index 77004b9456c049f05cadbc52de47a4880c62fdd7..937ad32db77c182f35f5e4b4c7cd0530e0750fb8 100644 (file)
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -1041,9 +1041,9 @@ int ipv6_rcv_saddr_equal(const struct sock *sk, const struct sock *sk2)
         const struct in6_addr *sk_rcv_saddr6 = &inet6_sk(sk)->rcv_saddr;
         const struct in6_addr *sk2_rcv_saddr6 = tcp_v6_rcv_saddr(sk2);
         u32 sk_rcv_saddr = inet_sk(sk)->rcv_saddr;
-       u32 sk2_rcv_saddr = tcp_v4_rcv_saddr(sk2);
+       u32 sk2_rcv_saddr = inet_rcv_saddr(sk2);
         int sk_ipv6only = ipv6_only_sock(sk);
-       int sk2_ipv6only = tcp_v6_ipv6only(sk2);
+       int sk2_ipv6only = inet_v6_ipv6only(sk2);
         int addr_type = ipv6_addr_type(sk_rcv_saddr6);
         int addr_type2 = sk2_rcv_saddr6 ? ipv6_addr_type(sk2_rcv_saddr6) : IPV6_ADDR_MAPPED;
  
@@ -1126,7 +1126,7 @@ void addrconf_leave_solict(struct inet6_dev *idev, struct in6_addr *addr)
         __ipv6_dev_mc_dec(idev, &maddr);
  }
  
-void addrconf_join_anycast(struct inet6_ifaddr *ifp)
+static void addrconf_join_anycast(struct inet6_ifaddr *ifp)
  {
         struct in6_addr addr;
         ipv6_addr_prefix(&addr, &ifp->addr, ifp->prefix_len);
@@ -1135,7 +1135,7 @@ void addrconf_join_anycast(struct inet6_ifaddr *ifp)
         ipv6_dev_ac_inc(ifp->idev->dev, &addr);
  }
  
-void addrconf_leave_anycast(struct inet6_ifaddr *ifp)
+static void addrconf_leave_anycast(struct inet6_ifaddr *ifp)
  {
         struct in6_addr addr;
         ipv6_addr_prefix(&addr, &ifp->addr, ifp->prefix_len);
@@ -2858,16 +2858,16 @@ static void inet6_ifa_notify(int event, struct inet6_ifaddr *ifa)
  
         skb = alloc_skb(size, GFP_ATOMIC);
         if (!skb) {
-               netlink_set_err(rtnl, 0, RTMGRP_IPV6_IFADDR, ENOBUFS);
+               netlink_set_err(rtnl, 0, RTNLGRP_IPV6_IFADDR, ENOBUFS);
                 return;
         }
         if (inet6_fill_ifaddr(skb, ifa, current->pid, 0, event, 0) < 0) {
                 kfree_skb(skb);
-               netlink_set_err(rtnl, 0, RTMGRP_IPV6_IFADDR, EINVAL);
+               netlink_set_err(rtnl, 0, RTNLGRP_IPV6_IFADDR, EINVAL);
                 return;
         }
-       NETLINK_CB(skb).dst_groups = RTMGRP_IPV6_IFADDR;
-       netlink_broadcast(rtnl, skb, 0, RTMGRP_IPV6_IFADDR, GFP_ATOMIC);
+       NETLINK_CB(skb).dst_group = RTNLGRP_IPV6_IFADDR;
+       netlink_broadcast(rtnl, skb, 0, RTNLGRP_IPV6_IFADDR, GFP_ATOMIC);
  }
  
  static void inline ipv6_store_devconf(struct ipv6_devconf *cnf,
@@ -2994,16 +2994,16 @@ void inet6_ifinfo_notify(int event, struct inet6_dev *idev)
         
         skb = alloc_skb(size, GFP_ATOMIC);
         if (!skb) {
-               netlink_set_err(rtnl, 0, RTMGRP_IPV6_IFINFO, ENOBUFS);
+               netlink_set_err(rtnl, 0, RTNLGRP_IPV6_IFINFO, ENOBUFS);
                 return;
         }
         if (inet6_fill_ifinfo(skb, idev, current->pid, 0, event, 0) < 0) {
                 kfree_skb(skb);
-               netlink_set_err(rtnl, 0, RTMGRP_IPV6_IFINFO, EINVAL);
+               netlink_set_err(rtnl, 0, RTNLGRP_IPV6_IFINFO, EINVAL);
                 return;
         }
-       NETLINK_CB(skb).dst_groups = RTMGRP_IPV6_IFINFO;
-       netlink_broadcast(rtnl, skb, 0, RTMGRP_IPV6_IFINFO, GFP_ATOMIC);
+       NETLINK_CB(skb).dst_group = RTNLGRP_IPV6_IFINFO;
+       netlink_broadcast(rtnl, skb, 0, RTNLGRP_IPV6_IFINFO, GFP_ATOMIC);
  }
  
  static int inet6_fill_prefix(struct sk_buff *skb, struct inet6_dev *idev,
@@ -3054,16 +3054,16 @@ static void inet6_prefix_notify(int event, struct inet6_dev *idev,
  
         skb = alloc_skb(size, GFP_ATOMIC);
         if (!skb) {
-               netlink_set_err(rtnl, 0, RTMGRP_IPV6_PREFIX, ENOBUFS);
+               netlink_set_err(rtnl, 0, RTNLGRP_IPV6_PREFIX, ENOBUFS);
                 return;
         }
         if (inet6_fill_prefix(skb, idev, pinfo, current->pid, 0, event, 0) < 0) {
                 kfree_skb(skb);
-               netlink_set_err(rtnl, 0, RTMGRP_IPV6_PREFIX, EINVAL);
+               netlink_set_err(rtnl, 0, RTNLGRP_IPV6_PREFIX, EINVAL);
                 return;
         }
-       NETLINK_CB(skb).dst_groups = RTMGRP_IPV6_PREFIX;
-       netlink_broadcast(rtnl, skb, 0, RTMGRP_IPV6_PREFIX, GFP_ATOMIC);
+       NETLINK_CB(skb).dst_group = RTNLGRP_IPV6_PREFIX;
+       netlink_broadcast(rtnl, skb, 0, RTNLGRP_IPV6_PREFIX, GFP_ATOMIC);
  }
  
  static struct rtnetlink_link inet6_rtnetlink_table[RTM_NR_MSGTYPES] = {
diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c

index 28d9bcab0970ce63fbe6a224a0488b23db6847f9..4f8795af2edb68d950d909952f583ff0e3be8005 100644 (file)
--- a/net/ipv6/af_inet6.c
+++ b/net/ipv6/af_inet6.c
@@ -44,6 +44,7 @@
  #include <linux/netdevice.h>
  #include <linux/icmpv6.h>
  #include <linux/smp_lock.h>
+#include <linux/netfilter_ipv6.h>
  
  #include <net/ip.h>
  #include <net/ipv6.h>
@@ -66,45 +67,14 @@ MODULE_AUTHOR("Cast of dozens");
  MODULE_DESCRIPTION("IPv6 protocol stack for Linux");
  MODULE_LICENSE("GPL");
  
-/* IPv6 procfs goodies... */
-
-#ifdef CONFIG_PROC_FS
-extern int raw6_proc_init(void);
-extern void raw6_proc_exit(void);
-extern int tcp6_proc_init(void);
-extern void tcp6_proc_exit(void);
-extern int udp6_proc_init(void);
-extern void udp6_proc_exit(void);
-extern int ipv6_misc_proc_init(void);
-extern void ipv6_misc_proc_exit(void);
-extern int ac6_proc_init(void);
-extern void ac6_proc_exit(void);
-extern int if6_proc_init(void);
-extern void if6_proc_exit(void);
-#endif
-
  int sysctl_ipv6_bindv6only;
  
-#ifdef INET_REFCNT_DEBUG
-atomic_t inet6_sock_nr;
-EXPORT_SYMBOL(inet6_sock_nr);
-#endif
-
  /* The inetsw table contains everything that inet_create needs to
   * build a new socket.
   */
  static struct list_head inetsw6[SOCK_MAX];
  static DEFINE_SPINLOCK(inetsw6_lock);
  
-static void inet6_sock_destruct(struct sock *sk)
-{
-       inet_sock_destruct(sk);
-
-#ifdef INET_REFCNT_DEBUG
-       atomic_dec(&inet6_sock_nr);
-#endif
-}
-
  static __inline__ struct ipv6_pinfo *inet6_sk_generic(struct sock *sk)
  {
         const int offset = sk->sk_prot->obj_size - sizeof(struct ipv6_pinfo);
@@ -185,7 +155,7 @@ static int inet6_create(struct socket *sock, int protocol)
                         inet->hdrincl = 1;
         }
  
-       sk->sk_destruct         = inet6_sock_destruct;
+       sk->sk_destruct         = inet_sock_destruct;
         sk->sk_family           = PF_INET6;
         sk->sk_protocol         = protocol;
  
@@ -212,12 +182,17 @@ static int inet6_create(struct socket *sock, int protocol)
                 inet->pmtudisc = IP_PMTUDISC_DONT;
         else
                 inet->pmtudisc = IP_PMTUDISC_WANT;
+       /* 
+        * Increment only the relevant sk_prot->socks debug field, this changes
+        * the previous behaviour of incrementing both the equivalent to
+        * answer->prot->socks (inet6_sock_nr) and inet_sock_nr.
+        *
+        * This allows better debug granularity as we'll know exactly how many
+        * UDPv6, TCPv6, etc socks were allocated, not the sum of all IPv6
+        * transport protocol socks. -acme
+        */
+       sk_refcnt_debug_inc(sk);
  
-
-#ifdef INET_REFCNT_DEBUG
-       atomic_inc(&inet6_sock_nr);
-       atomic_inc(&inet_sock_nr);
-#endif
         if (inet->num) {
                 /* It assumes that any protocol which allows
                  * the user to assign a number at socket
@@ -513,11 +488,6 @@ static struct net_proto_family inet6_family_ops = {
         .owner  = THIS_MODULE,
  };
  
-#ifdef CONFIG_SYSCTL
-extern void ipv6_sysctl_register(void);
-extern void ipv6_sysctl_unregister(void);
-#endif
-
  /* Same as inet6_dgram_ops, sans udp_poll.  */
  static struct proto_ops inet6_sockraw_ops = {
         .family =       PF_INET6,
@@ -684,8 +654,6 @@ static void cleanup_ipv6_mibs(void)
         snmp6_mib_free((void **)udp_stats_in6);
  }
  
-extern int ipv6_misc_proc_init(void);
-
  static int __init inet6_init(void)
  {
         struct sk_buff *dummy_skb;
@@ -757,6 +725,9 @@ static int __init inet6_init(void)
         err = igmp6_init(&inet6_family_ops);
         if (err)
                 goto igmp_fail;
+       err = ipv6_netfilter_init();
+       if (err)
+               goto netfilter_fail;
         /* Create /proc/foo6 entries. */
  #ifdef CONFIG_PROC_FS
         err = -ENOMEM;
@@ -813,6 +784,8 @@ proc_tcp6_fail:
         raw6_proc_exit();
  proc_raw6_fail:
  #endif
+       ipv6_netfilter_fini();
+netfilter_fail:
         igmp6_cleanup();
  igmp_fail:
         ndisc_cleanup();
@@ -852,6 +825,7 @@ static void __exit inet6_exit(void)
         ip6_route_cleanup();
         ipv6_packet_cleanup();
         igmp6_cleanup();
+       ipv6_netfilter_fini();
         ndisc_cleanup();
         icmpv6_cleanup();
  #ifdef CONFIG_SYSCTL
diff --git a/net/ipv6/ah6.c b/net/ipv6/ah6.c

index 986fdfdccbcdee96a9f2b7d014fa7575ae4bd386..0ebfad907a039824deb6d9974fba131a4365034e 100644 (file)
--- a/net/ipv6/ah6.c
+++ b/net/ipv6/ah6.c
@@ -131,10 +131,10 @@ static int ipv6_clear_mutable_options(struct ipv6hdr *iph, int len)
                 case NEXTHDR_HOP:
                 case NEXTHDR_DEST:
                         if (!zero_out_mutable_opts(exthdr.opth)) {
-                               LIMIT_NETDEBUG(printk(
+                               LIMIT_NETDEBUG(
                                         KERN_WARNING "overrun %sopts\n",
                                         nexthdr == NEXTHDR_HOP ?
-                                               "hop" : "dest"));
+                                               "hop" : "dest");
                                 return -EINVAL;
                         }
                         break;
@@ -293,8 +293,7 @@ static int ah6_input(struct xfrm_state *x, struct xfrm_decap_state *decap, struc
                 skb_push(skb, skb->data - skb->nh.raw);
                 ahp->icv(ahp, skb, ah->auth_data);
                 if (memcmp(ah->auth_data, auth_data, ahp->icv_trunc_len)) {
-                       LIMIT_NETDEBUG(
-                               printk(KERN_WARNING "ipsec ah authentication error\n"));
+                       LIMIT_NETDEBUG(KERN_WARNING "ipsec ah authentication error\n");
                         x->stats.integrity_failed++;
                         goto free_out;
                 }
@@ -332,9 +331,9 @@ static void ah6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
         if (!x)
                 return;
  
-       NETDEBUG(printk(KERN_DEBUG "pmtu discovery on SA AH/%08x/"
-                       "%04x:%04x:%04x:%04x:%04x:%04x:%04x:%04x\n",
-              ntohl(ah->spi), NIP6(iph->daddr)));
+       NETDEBUG(KERN_DEBUG "pmtu discovery on SA AH/%08x/"
+                "%04x:%04x:%04x:%04x:%04x:%04x:%04x:%04x\n",
+                ntohl(ah->spi), NIP6(iph->daddr));
  
         xfrm_state_put(x);
  }
diff --git a/net/ipv6/datagram.c b/net/ipv6/datagram.c

index 5229365cd8b4849371e498489fe0fe65427c6aa8..01468fab3d3db103388f17d960d46e2f44f4e2f6 100644 (file)
--- a/net/ipv6/datagram.c
+++ b/net/ipv6/datagram.c
@@ -29,6 +29,7 @@
  #include <net/addrconf.h>
  #include <net/transp_v6.h>
  #include <net/ip6_route.h>
+#include <net/tcp_states.h>
  
  #include <linux/errqueue.h>
  #include <asm/uaccess.h>
@@ -588,8 +589,8 @@ int datagram_send_ctl(struct msghdr *msg, struct flowi *fl,
                         break;
  
                 default:
-                       LIMIT_NETDEBUG(
-                               printk(KERN_DEBUG "invalid cmsg type: %d\n", cmsg->cmsg_type));
+                       LIMIT_NETDEBUG(KERN_DEBUG "invalid cmsg type: %d\n",
+                                      cmsg->cmsg_type);
                         err = -EINVAL;
                         break;
                 };
diff --git a/net/ipv6/esp6.c b/net/ipv6/esp6.c

index 324db62515a29e88c51c3fff9fe48e2960ac51da..e8bff9d3d96c8bd6fcef20eae749964713bf6461 100644 (file)
--- a/net/ipv6/esp6.c
+++ b/net/ipv6/esp6.c
@@ -212,8 +212,7 @@ static int esp6_input(struct xfrm_state *x, struct xfrm_decap_state *decap, stru
  
                 padlen = nexthdr[0];
                 if (padlen+2 >= elen) {
-                       LIMIT_NETDEBUG(
-                               printk(KERN_WARNING "ipsec esp packet is garbage padlen=%d, elen=%d\n", padlen+2, elen));
+                       LIMIT_NETDEBUG(KERN_WARNING "ipsec esp packet is garbage padlen=%d, elen=%d\n", padlen+2, elen);
                         ret = -EINVAL;
                         goto out;
                 }
diff --git a/net/ipv6/exthdrs.c b/net/ipv6/exthdrs.c

index e0839eafc3a90e36e085ef50f895382d5d441f9a..5be6da2584eec540d38d554b068b0d6ad5ebd910 100644 (file)
--- a/net/ipv6/exthdrs.c
+++ b/net/ipv6/exthdrs.c
@@ -424,8 +424,8 @@ static int ipv6_hop_ra(struct sk_buff *skb, int optoff)
                 IP6CB(skb)->ra = optoff;
                 return 1;
         }
-       LIMIT_NETDEBUG(
-                printk(KERN_DEBUG "ipv6_hop_ra: wrong RA length %d\n", skb->nh.raw[optoff+1]));
+       LIMIT_NETDEBUG(KERN_DEBUG "ipv6_hop_ra: wrong RA length %d\n",
+                      skb->nh.raw[optoff+1]);
         kfree_skb(skb);
         return 0;
  }
@@ -437,8 +437,8 @@ static int ipv6_hop_jumbo(struct sk_buff *skb, int optoff)
         u32 pkt_len;
  
         if (skb->nh.raw[optoff+1] != 4 || (optoff&3) != 2) {
-               LIMIT_NETDEBUG(
-                        printk(KERN_DEBUG "ipv6_hop_jumbo: wrong jumbo opt length/alignment %d\n", skb->nh.raw[optoff+1]));
+               LIMIT_NETDEBUG(KERN_DEBUG "ipv6_hop_jumbo: wrong jumbo opt length/alignment %d\n",
+                              skb->nh.raw[optoff+1]);
                 IP6_INC_STATS_BH(IPSTATS_MIB_INHDRERRORS);
                 goto drop;
         }
diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c

index ff3ec9822e36bb5e8c0100014c20bd8e3b14af50..5176fc655ea907084f814c17590eccccadfabdf4 100644 (file)
--- a/net/ipv6/icmp.c
+++ b/net/ipv6/icmp.c
@@ -67,7 +67,7 @@
  #include <asm/uaccess.h>
  #include <asm/system.h>
  
-DEFINE_SNMP_STAT(struct icmpv6_mib, icmpv6_statistics);
+DEFINE_SNMP_STAT(struct icmpv6_mib, icmpv6_statistics) __read_mostly;
  
  /*
   *     The ICMP socket(s). This is the most convenient way to flow control
@@ -332,8 +332,7 @@ void icmpv6_send(struct sk_buff *skb, int type, int code, __u32 info,
          *      for now we don't know that.
          */
         if ((addr_type == IPV6_ADDR_ANY) || (addr_type & IPV6_ADDR_MULTICAST)) {
-               LIMIT_NETDEBUG(
-                       printk(KERN_DEBUG "icmpv6_send: addr_any/mcast source\n"));
+               LIMIT_NETDEBUG(KERN_DEBUG "icmpv6_send: addr_any/mcast source\n");
                 return;
         }
  
@@ -341,8 +340,7 @@ void icmpv6_send(struct sk_buff *skb, int type, int code, __u32 info,
          *      Never answer to a ICMP packet.
          */
         if (is_ineligible(skb)) {
-               LIMIT_NETDEBUG(
-                       printk(KERN_DEBUG "icmpv6_send: no reply to icmp error\n")); 
+               LIMIT_NETDEBUG(KERN_DEBUG "icmpv6_send: no reply to icmp error\n");
                 return;
         }
  
@@ -393,8 +391,7 @@ void icmpv6_send(struct sk_buff *skb, int type, int code, __u32 info,
         len = skb->len - msg.offset;
         len = min_t(unsigned int, len, IPV6_MIN_MTU - sizeof(struct ipv6hdr) -sizeof(struct icmp6hdr));
         if (len < 0) {
-               LIMIT_NETDEBUG(
-                       printk(KERN_DEBUG "icmp: len problem\n"));
+               LIMIT_NETDEBUG(KERN_DEBUG "icmp: len problem\n");
                 goto out_dst_release;
         }
  
@@ -551,7 +548,8 @@ static void icmpv6_notify(struct sk_buff *skb, int type, int code, u32 info)
  
         read_lock(&raw_v6_lock);
         if ((sk = sk_head(&raw_v6_htable[hash])) != NULL) {
-               while((sk = __raw_v6_lookup(sk, nexthdr, daddr, saddr))) {
+               while((sk = __raw_v6_lookup(sk, nexthdr, daddr, saddr,
+                                           skb->dev->ifindex))) {
                         rawv6_err(sk, skb, NULL, type, code, inner_offset, info);
                         sk = sk_next(sk);
                 }
@@ -583,17 +581,15 @@ static int icmpv6_rcv(struct sk_buff **pskb, unsigned int *nhoffp)
                 skb->ip_summed = CHECKSUM_UNNECESSARY;
                 if (csum_ipv6_magic(saddr, daddr, skb->len, IPPROTO_ICMPV6,
                                     skb->csum)) {
-                       LIMIT_NETDEBUG(
-                               printk(KERN_DEBUG "ICMPv6 hw checksum failed\n"));
+                       LIMIT_NETDEBUG(KERN_DEBUG "ICMPv6 hw checksum failed\n");
                         skb->ip_summed = CHECKSUM_NONE;
                 }
         }
         if (skb->ip_summed == CHECKSUM_NONE) {
                 if (csum_ipv6_magic(saddr, daddr, skb->len, IPPROTO_ICMPV6,
                                     skb_checksum(skb, 0, skb->len, 0))) {
-                       LIMIT_NETDEBUG(
-                               printk(KERN_DEBUG "ICMPv6 checksum failed [%04x:%04x:%04x:%04x:%04x:%04x:%04x:%04x > %04x:%04x:%04x:%04x:%04x:%04x:%04x:%04x]\n",
-                                      NIP6(*saddr), NIP6(*daddr)));
+                       LIMIT_NETDEBUG(KERN_DEBUG "ICMPv6 checksum failed [%04x:%04x:%04x:%04x:%04x:%04x:%04x:%04x > %04x:%04x:%04x:%04x:%04x:%04x:%04x:%04x]\n",
+                                      NIP6(*saddr), NIP6(*daddr));
                         goto discard_it;
                 }
         }
@@ -669,8 +665,7 @@ static int icmpv6_rcv(struct sk_buff **pskb, unsigned int *nhoffp)
                 break;
  
         default:
-               LIMIT_NETDEBUG(
-                       printk(KERN_DEBUG "icmpv6: msg of unknown type\n"));
+               LIMIT_NETDEBUG(KERN_DEBUG "icmpv6: msg of unknown type\n");
  
                 /* informational */
                 if (type & ICMPV6_INFOMSG_MASK)
diff --git a/net/ipv6/inet6_hashtables.c b/net/ipv6/inet6_hashtables.c

new file mode 100644 (file)

index 0000000..01d5f46
--- /dev/null
+++ b/net/ipv6/inet6_hashtables.c
@@ -0,0 +1,81 @@
+/*
+ * INET                An implementation of the TCP/IP protocol suite for the LINUX
+ *             operating system.  INET is implemented using the BSD Socket
+ *             interface as the means of communication with the user level.
+ *
+ *             Generic INET6 transport hashtables
+ *
+ * Authors:    Lotsa people, from code originally in tcp
+ *
+ *     This program is free software; you can redistribute it and/or
+ *      modify it under the terms of the GNU General Public License
+ *      as published by the Free Software Foundation; either version
+ *      2 of the License, or (at your option) any later version.
+ */
+
+#include <linux/config.h>
+
+#include <linux/module.h>
+
+#include <net/inet_connection_sock.h>
+#include <net/inet_hashtables.h>
+#include <net/inet6_hashtables.h>
+
+struct sock *inet6_lookup_listener(struct inet_hashinfo *hashinfo,
+                                  const struct in6_addr *daddr,
+                                  const unsigned short hnum, const int dif)
+{
+       struct sock *sk;
+       const struct hlist_node *node;
+       struct sock *result = NULL;
+       int score, hiscore = 0;
+
+       read_lock(&hashinfo->lhash_lock);
+       sk_for_each(sk, node, &hashinfo->listening_hash[inet_lhashfn(hnum)]) {
+               if (inet_sk(sk)->num == hnum && sk->sk_family == PF_INET6) {
+                       const struct ipv6_pinfo *np = inet6_sk(sk);
+                       
+                       score = 1;
+                       if (!ipv6_addr_any(&np->rcv_saddr)) {
+                               if (!ipv6_addr_equal(&np->rcv_saddr, daddr))
+                                       continue;
+                               score++;
+                       }
+                       if (sk->sk_bound_dev_if) {
+                               if (sk->sk_bound_dev_if != dif)
+                                       continue;
+                               score++;
+                       }
+                       if (score == 3) {
+                               result = sk;
+                               break;
+                       }
+                       if (score > hiscore) {
+                               hiscore = score;
+                               result = sk;
+                       }
+               }
+       }
+       if (result)
+               sock_hold(result);
+       read_unlock(&hashinfo->lhash_lock);
+       return result;
+}
+
+EXPORT_SYMBOL_GPL(inet6_lookup_listener);
+
+struct sock *inet6_lookup(struct inet_hashinfo *hashinfo,
+                         const struct in6_addr *saddr, const u16 sport,
+                         const struct in6_addr *daddr, const u16 dport,
+                         const int dif)
+{
+       struct sock *sk;
+
+       local_bh_disable();
+       sk = __inet6_lookup(hashinfo, saddr, sport, daddr, ntohs(dport), dif);
+       local_bh_enable();
+
+       return sk;
+}
+
+EXPORT_SYMBOL_GPL(inet6_lookup);
diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c

index 1b354aa979340f41c0bc5d413f87dbdf2ff69117..16af874c9e8f15e39242b034ae59d8e623ed3e37 100644 (file)
--- a/net/ipv6/ip6_fib.c
+++ b/net/ipv6/ip6_fib.c
@@ -49,7 +49,7 @@
  
  struct rt6_statistics  rt6_stats;
  
-static kmem_cache_t * fib6_node_kmem;
+static kmem_cache_t * fib6_node_kmem __read_mostly;
  
  enum fib_walk_state_t
  {
diff --git a/net/ipv6/ip6_input.c b/net/ipv6/ip6_input.c

index 866f10726c5832732769fcb9250da70a2ae02fb9..6e3480426939150b7c997dc01c41d7a1805951d1 100644 (file)
--- a/net/ipv6/ip6_input.c
+++ b/net/ipv6/ip6_input.c
@@ -56,7 +56,7 @@ static inline int ip6_rcv_finish( struct sk_buff *skb)
         return dst_input(skb);
  }
  
-int ipv6_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt)
+int ipv6_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt, struct net_device *orig_dev)
  {
         struct ipv6hdr *hdr;
         u32             pkt_len;
@@ -166,8 +166,8 @@ resubmit:
         nexthdr = skb->nh.raw[nhoff];
  
         raw_sk = sk_head(&raw_v6_htable[nexthdr & (MAX_INET_PROTOS - 1)]);
-       if (raw_sk)
-               ipv6_raw_deliver(skb, nexthdr);
+       if (raw_sk && !ipv6_raw_deliver(skb, nexthdr))
+               raw_sk = NULL;
  
         hash = nexthdr & (MAX_INET_PROTOS - 1);
         if ((ipprot = rcu_dereference(inet6_protos[hash])) != NULL) {
@@ -198,12 +198,13 @@ resubmit:
                 if (!raw_sk) {
                         if (xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb)) {
                                 IP6_INC_STATS_BH(IPSTATS_MIB_INUNKNOWNPROTOS);
-                               icmpv6_param_prob(skb, ICMPV6_UNK_NEXTHDR, nhoff);
+                               icmpv6_send(skb, ICMPV6_PARAMPROB,
+                                           ICMPV6_UNK_NEXTHDR, nhoff,
+                                           skb->dev);
                         }
-               } else {
+               } else
                         IP6_INC_STATS_BH(IPSTATS_MIB_INDELIVERS);
-                       kfree_skb(skb);
-               }
+               kfree_skb(skb);
         }
         rcu_read_unlock();
         return 0;
diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c

index ae652ca14bc9ef013431324276dc241ab2631ecc..01ef94f7c7f1ce5dfd899703d287be308376a83c 100644 (file)
--- a/net/ipv6/ip6_output.c
+++ b/net/ipv6/ip6_output.c
@@ -153,51 +153,6 @@ int ip6_output(struct sk_buff *skb)
                 return ip6_output2(skb);
  }
  
-#ifdef CONFIG_NETFILTER
-int ip6_route_me_harder(struct sk_buff *skb)
-{
-       struct ipv6hdr *iph = skb->nh.ipv6h;
-       struct dst_entry *dst;
-       struct flowi fl = {
-               .oif = skb->sk ? skb->sk->sk_bound_dev_if : 0,
-               .nl_u =
-               { .ip6_u =
-                 { .daddr = iph->daddr,
-                   .saddr = iph->saddr, } },
-               .proto = iph->nexthdr,
-       };
-
-       dst = ip6_route_output(skb->sk, &fl);
-
-       if (dst->error) {
-               IP6_INC_STATS(IPSTATS_MIB_OUTNOROUTES);
-               LIMIT_NETDEBUG(
-                       printk(KERN_DEBUG "ip6_route_me_harder: No more route.\n"));
-               dst_release(dst);
-               return -EINVAL;
-       }
-
-       /* Drop old route. */
-       dst_release(skb->dst);
-
-       skb->dst = dst;
-       return 0;
-}
-#endif
-
-static inline int ip6_maybe_reroute(struct sk_buff *skb)
-{
-#ifdef CONFIG_NETFILTER
-       if (skb->nfcache & NFC_ALTERED){
-               if (ip6_route_me_harder(skb) != 0){
-                       kfree_skb(skb);
-                       return -EINVAL;
-               }
-       }
-#endif /* CONFIG_NETFILTER */
-       return dst_output(skb);
-}
-
  /*
   *     xmit an sk_buff (used by TCP)
   */
@@ -266,7 +221,8 @@ int ip6_xmit(struct sock *sk, struct sk_buff *skb, struct flowi *fl,
         mtu = dst_mtu(dst);
         if ((skb->len <= mtu) || ipfragok) {
                 IP6_INC_STATS(IPSTATS_MIB_OUTREQUESTS);
-               return NF_HOOK(PF_INET6, NF_IP6_LOCAL_OUT, skb, NULL, dst->dev, ip6_maybe_reroute);
+               return NF_HOOK(PF_INET6, NF_IP6_LOCAL_OUT, skb, NULL, dst->dev,
+                               dst_output);
         }
  
         if (net_ratelimit())
@@ -321,7 +277,9 @@ static int ip6_call_ra_chain(struct sk_buff *skb, int sel)
         read_lock(&ip6_ra_lock);
         for (ra = ip6_ra_chain; ra; ra = ra->next) {
                 struct sock *sk = ra->sk;
-               if (sk && ra->sel == sel) {
+               if (sk && ra->sel == sel &&
+                   (!sk->sk_bound_dev_if ||
+                    sk->sk_bound_dev_if == skb->dev->ifindex)) {
                         if (last) {
                                 struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
                                 if (skb2)
@@ -667,7 +625,7 @@ slow_path:
                  */
  
                 if ((frag = alloc_skb(len+hlen+sizeof(struct frag_hdr)+LL_RESERVED_SPACE(rt->u.dst.dev), GFP_ATOMIC)) == NULL) {
-                       NETDEBUG(printk(KERN_INFO "IPv6: frag: no memory for new fragment!\n"));
+                       NETDEBUG(KERN_INFO "IPv6: frag: no memory for new fragment!\n");
                         IP6_INC_STATS(IPSTATS_MIB_FRAGFAILS);
                         err = -ENOMEM;
                         goto fail;
diff --git a/net/ipv6/ipcomp6.c b/net/ipv6/ipcomp6.c

index 423feb46ccc026841ea52ef771d8f85842944dc1..135383ef538f5a33573d745d17962237af3ae082 100644 (file)
--- a/net/ipv6/ipcomp6.c
+++ b/net/ipv6/ipcomp6.c
@@ -354,7 +354,7 @@ static struct crypto_tfm **ipcomp6_alloc_tfms(const char *alg_name)
         int cpu;
  
         /* This can be any valid CPU ID so we don't need locking. */
-       cpu = smp_processor_id();
+       cpu = raw_smp_processor_id();
  
         list_for_each_entry(pos, &ipcomp6_tfms_list, list) {
                 struct crypto_tfm *tfm;
diff --git a/net/ipv6/ipv6_sockglue.c b/net/ipv6/ipv6_sockglue.c

index f3ef4c38d315fbdff7786fd04b03d435cff9270c..76466af8331e8c117b12f75b0a6740a5f9a590c5 100644 (file)
--- a/net/ipv6/ipv6_sockglue.c
+++ b/net/ipv6/ipv6_sockglue.c
@@ -55,7 +55,7 @@
  
  #include <asm/uaccess.h>
  
-DEFINE_SNMP_STAT(struct ipstats_mib, ipv6_statistics);
+DEFINE_SNMP_STAT(struct ipstats_mib, ipv6_statistics) __read_mostly;
  
  static struct packet_type ipv6_packet_type = {
         .type = __constant_htons(ETH_P_IPV6), 
@@ -109,13 +109,6 @@ int ip6_ra_control(struct sock *sk, int sel, void (*destructor)(struct sock *))
         return 0;
  }
  
-extern int ip6_mc_source(int add, int omode, struct sock *sk,
-       struct group_source_req *pgsr);
-extern int ip6_mc_msfilter(struct sock *sk, struct group_filter *gsf);
-extern int ip6_mc_msfget(struct sock *sk, struct group_filter *gsf,
-       struct group_filter __user *optval, int __user *optlen);
-
-
  int ipv6_setsockopt(struct sock *sk, int level, int optname,
                     char __user *optval, int optlen)
  {
@@ -163,6 +156,13 @@ int ipv6_setsockopt(struct sock *sk, int level, int optname,
                         fl6_free_socklist(sk);
                         ipv6_sock_mc_close(sk);
  
+                       /*
+                        * Sock is moving from IPv6 to IPv4 (sk_prot), so
+                        * remove it from the refcnt debug socks count in the
+                        * original family...
+                        */
+                       sk_refcnt_debug_dec(sk);
+
                         if (sk->sk_protocol == IPPROTO_TCP) {
                                 struct tcp_sock *tp = tcp_sk(sk);
  
@@ -192,9 +192,11 @@ int ipv6_setsockopt(struct sock *sk, int level, int optname,
                                 kfree_skb(pktopt);
  
                         sk->sk_destruct = inet_sock_destruct;
-#ifdef INET_REFCNT_DEBUG
-                       atomic_dec(&inet6_sock_nr);
-#endif
+                       /*
+                        * ... and add it to the refcnt debug socks count
+                        * in the new family. -acme
+                        */
+                       sk_refcnt_debug_inc(sk);
                         module_put(THIS_MODULE);
                         retv = 0;
                         break;
@@ -437,7 +439,6 @@ done:
         }
         case MCAST_MSFILTER:
         {
-               extern int sysctl_optmem_max;
                 extern int sysctl_mld_max_msf;
                 struct group_filter *gsf;
  
@@ -504,6 +505,9 @@ done:
                 break;
         case IPV6_IPSEC_POLICY:
         case IPV6_XFRM_POLICY:
+               retv = -EPERM;
+               if (!capable(CAP_NET_ADMIN))
+                       break;
                 retv = xfrm_user_policy(sk, optname, optval, optlen);
                 break;
  
diff --git a/net/ipv6/ipv6_syms.c b/net/ipv6/ipv6_syms.c

index 5ade5a5d199053dc20d349a8c8f313263a240260..37a4a99c9fe9e1837e5cd82f686ff3736fd94daa 100644 (file)
--- a/net/ipv6/ipv6_syms.c
+++ b/net/ipv6/ipv6_syms.c
@@ -15,9 +15,6 @@ EXPORT_SYMBOL(ndisc_mc_map);
  EXPORT_SYMBOL(register_inet6addr_notifier);
  EXPORT_SYMBOL(unregister_inet6addr_notifier);
  EXPORT_SYMBOL(ip6_route_output);
-#ifdef CONFIG_NETFILTER
-EXPORT_SYMBOL(ip6_route_me_harder);
-#endif
  EXPORT_SYMBOL(addrconf_lock);
  EXPORT_SYMBOL(ipv6_setsockopt);
  EXPORT_SYMBOL(ipv6_getsockopt);
diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c

index 7ae72d4c9bd2cbc31cb23f8ad553109bd5fd657a..a7eae30f4554b86ed9d035f0fa1863fab073a115 100644 (file)
--- a/net/ipv6/ndisc.c
+++ b/net/ipv6/ndisc.c
@@ -812,7 +812,7 @@ static void ndisc_recv_ns(struct sk_buff *skb)
                 if (ipv6_chk_acast_addr(dev, &msg->target) ||
                     (idev->cnf.forwarding && 
                      pneigh_lookup(&nd_tbl, &msg->target, dev, 0))) {
-                       if (skb->stamp.tv_sec != LOCALLY_ENQUEUED &&
+                       if (!(NEIGH_CB(skb)->flags & LOCALLY_ENQUEUED) &&
                             skb->pkt_type != PACKET_HOST &&
                             inc != 0 &&
                             idev->nd_parms->proxy_delay != 0) {
@@ -1487,6 +1487,8 @@ int ndisc_rcv(struct sk_buff *skb)
                 return 0;
         }
  
+       memset(NEIGH_CB(skb), 0, sizeof(struct neighbour_cb));
+
         switch (msg->icmph.icmp6_type) {
         case NDISC_NEIGHBOUR_SOLICITATION:
                 ndisc_recv_ns(skb);
diff --git a/net/ipv6/netfilter.c b/net/ipv6/netfilter.c

new file mode 100644 (file)

index 0000000..f8626eb
--- /dev/null
+++ b/net/ipv6/netfilter.c
@@ -0,0 +1,104 @@
+#include <linux/config.h>
+#include <linux/init.h>
+
+#ifdef CONFIG_NETFILTER
+
+#include <linux/kernel.h>
+#include <linux/ipv6.h>
+#include <linux/netfilter.h>
+#include <linux/netfilter_ipv6.h>
+#include <net/dst.h>
+#include <net/ipv6.h>
+#include <net/ip6_route.h>
+
+int ip6_route_me_harder(struct sk_buff *skb)
+{
+       struct ipv6hdr *iph = skb->nh.ipv6h;
+       struct dst_entry *dst;
+       struct flowi fl = {
+               .oif = skb->sk ? skb->sk->sk_bound_dev_if : 0,
+               .nl_u =
+               { .ip6_u =
+                 { .daddr = iph->daddr,
+                   .saddr = iph->saddr, } },
+               .proto = iph->nexthdr,
+       };
+
+       dst = ip6_route_output(skb->sk, &fl);
+
+       if (dst->error) {
+               IP6_INC_STATS(IPSTATS_MIB_OUTNOROUTES);
+               LIMIT_NETDEBUG(KERN_DEBUG "ip6_route_me_harder: No more route.\n");
+               dst_release(dst);
+               return -EINVAL;
+       }
+
+       /* Drop old route. */
+       dst_release(skb->dst);
+
+       skb->dst = dst;
+       return 0;
+}
+EXPORT_SYMBOL(ip6_route_me_harder);
+
+/*
+ * Extra routing may needed on local out, as the QUEUE target never
+ * returns control to the table.
+ */
+
+struct ip6_rt_info {
+       struct in6_addr daddr;
+       struct in6_addr saddr;
+};
+
+static void save(const struct sk_buff *skb, struct nf_info *info)
+{
+       struct ip6_rt_info *rt_info = nf_info_reroute(info);
+
+       if (info->hook == NF_IP6_LOCAL_OUT) {
+               struct ipv6hdr *iph = skb->nh.ipv6h;
+
+               rt_info->daddr = iph->daddr;
+               rt_info->saddr = iph->saddr;
+       }
+}
+
+static int reroute(struct sk_buff **pskb, const struct nf_info *info)
+{
+       struct ip6_rt_info *rt_info = nf_info_reroute(info);
+
+       if (info->hook == NF_IP6_LOCAL_OUT) {
+               struct ipv6hdr *iph = (*pskb)->nh.ipv6h;
+               if (!ipv6_addr_equal(&iph->daddr, &rt_info->daddr) ||
+                   !ipv6_addr_equal(&iph->saddr, &rt_info->saddr))
+                       return ip6_route_me_harder(*pskb);
+       }
+       return 0;
+}
+
+static struct nf_queue_rerouter ip6_reroute = {
+       .rer_size       = sizeof(struct ip6_rt_info),
+       .save           = &save,
+       .reroute        = &reroute,
+};
+
+int __init ipv6_netfilter_init(void)
+{
+       return nf_register_queue_rerouter(PF_INET6, &ip6_reroute);
+}
+
+void ipv6_netfilter_fini(void)
+{
+       nf_unregister_queue_rerouter(PF_INET6);
+}
+
+#else /* CONFIG_NETFILTER */
+int __init ipv6_netfilter_init(void)
+{
+       return 0;
+}
+
+void ipv6_netfilter_fini(void)
+{
+}
+#endif /* CONFIG_NETFILTER */
diff --git a/net/ipv6/netfilter/Kconfig b/net/ipv6/netfilter/Kconfig

index 77ec704c9ee34c6315c07f917bc5b17da5e5203e..216fbe1ac65c71c86c4d7545ee2c3025049670c2 100644 (file)
--- a/net/ipv6/netfilter/Kconfig
+++ b/net/ipv6/netfilter/Kconfig
@@ -10,13 +10,16 @@ menu "IPv6: Netfilter Configuration (EXPERIMENTAL)"
  #  dep_tristate '  FTP protocol support' CONFIG_IP6_NF_FTP $CONFIG_IP6_NF_CONNTRACK
  #fi
  config IP6_NF_QUEUE
-       tristate "Userspace queueing via NETLINK"
+       tristate "IP6 Userspace queueing via NETLINK (OBSOLETE)"
         ---help---
  
           This option adds a queue handler to the kernel for IPv6
-         packets which lets us to receive the filtered packets
-         with QUEUE target using libiptc as we can do with
-         the IPv4 now.
+         packets which enables users to receive the filtered packets
+         with QUEUE target using libipq.
+
+         THis option enables the old IPv6-only "ip6_queue" implementation
+         which has been obsoleted by the new "nfnetlink_queue" code (see
+         CONFIG_NETFILTER_NETLINK_QUEUE).
  
           (C) Fernando Anton 2001
           IPv64 Project - Work based in IPv64 draft by Arturo Azcorra.
@@ -196,6 +199,16 @@ config IP6_NF_TARGET_LOG
  
           To compile it as a module, choose M here.  If unsure, say N.
  
+config IP6_NF_TARGET_REJECT
+       tristate "REJECT target support"
+       depends on IP6_NF_FILTER
+       help
+         The REJECT target allows a filtering rule to specify that an ICMPv6
+         error should be issued in response to an incoming packet, rather
+         than silently being dropped.
+
+         To compile it as a module, choose M here.  If unsure, say N.
+
  #  if [ "$CONFIG_IP6_NF_FILTER" != "n" ]; then
  #    dep_tristate '    REJECT target support' CONFIG_IP6_NF_TARGET_REJECT $CONFIG_IP6_NF_FILTER
  #    if [ "$CONFIG_EXPERIMENTAL" = "y" ]; then
@@ -226,6 +239,22 @@ config IP6_NF_TARGET_MARK
  
           To compile it as a module, choose M here.  If unsure, say N.
  
+config IP6_NF_TARGET_HL
+       tristate  'HL (hoplimit) target support'
+       depends on IP6_NF_MANGLE
+       help
+         This option adds a `HL' target, which enables the user to decrement
+         the hoplimit value of the IPv6 header or set it to a given (lower)
+         value.
+       
+         While it is safe to decrement the hoplimit value, this option also
+         enables functionality to increment and set the hoplimit value of the
+         IPv6 header to arbitrary values.  This is EXTREMELY DANGEROUS since
+         you can easily create immortal packets that loop forever on the
+         network.  
+
+         To compile it as a module, choose M here.  If unsure, say N.
+
  #dep_tristate '  LOG target support' CONFIG_IP6_NF_TARGET_LOG $CONFIG_IP6_NF_IPTABLES
  config IP6_NF_RAW
         tristate  'raw table support (required for TRACE)'
diff --git a/net/ipv6/netfilter/Makefile b/net/ipv6/netfilter/Makefile

index 2e51714953b6576d74f328dc9c7c397131692f1f..bd9a16a5cbba3b48e9d9199eb68a16cbb60c1246 100644 (file)
--- a/net/ipv6/netfilter/Makefile
+++ b/net/ipv6/netfilter/Makefile
@@ -20,7 +20,10 @@ obj-$(CONFIG_IP6_NF_MATCH_PHYSDEV) += ip6t_physdev.o
  obj-$(CONFIG_IP6_NF_FILTER) += ip6table_filter.o
  obj-$(CONFIG_IP6_NF_MANGLE) += ip6table_mangle.o
  obj-$(CONFIG_IP6_NF_TARGET_MARK) += ip6t_MARK.o
+obj-$(CONFIG_IP6_NF_TARGET_HL) += ip6t_HL.o
  obj-$(CONFIG_IP6_NF_QUEUE) += ip6_queue.o
  obj-$(CONFIG_IP6_NF_TARGET_LOG) += ip6t_LOG.o
  obj-$(CONFIG_IP6_NF_RAW) += ip6table_raw.o
  obj-$(CONFIG_IP6_NF_MATCH_HL) += ip6t_hl.o
+obj-$(CONFIG_IP6_NF_TARGET_REJECT) += ip6t_REJECT.o
+obj-$(CONFIG_NETFILTER_NETLINK_QUEUE) += ip6t_NFQUEUE.o
diff --git a/net/ipv6/netfilter/ip6_queue.c b/net/ipv6/netfilter/ip6_queue.c

index 5493180f0d441200675a5ecd0406617e5a7d7cf5..aa11cf366efab29a428434e014c08034b0eb54ee 100644 (file)
--- a/net/ipv6/netfilter/ip6_queue.c
+++ b/net/ipv6/netfilter/ip6_queue.c
@@ -47,16 +47,10 @@
  #define NET_IPQ_QMAX 2088
  #define NET_IPQ_QMAX_NAME "ip6_queue_maxlen"
  
-struct ipq_rt_info {
-       struct in6_addr daddr;
-       struct in6_addr saddr;
-};
-
  struct ipq_queue_entry {
         struct list_head list;
         struct nf_info *info;
         struct sk_buff *skb;
-       struct ipq_rt_info rt_info;
  };
  
  typedef int (*ipq_cmpfn)(struct ipq_queue_entry *, unsigned long);
@@ -211,6 +205,12 @@ ipq_build_packet_message(struct ipq_queue_entry *entry, int *errp)
                 break;
         
         case IPQ_COPY_PACKET:
+               if (entry->skb->ip_summed == CHECKSUM_HW &&
+                   (*errp = skb_checksum_help(entry->skb,
+                                              entry->info->outdev == NULL))) {
+                       read_unlock_bh(&queue_lock);
+                       return NULL;
+               }
                 if (copy_range == 0 || copy_range > entry->skb->len)
                         data_len = entry->skb->len;
                 else
@@ -238,8 +238,8 @@ ipq_build_packet_message(struct ipq_queue_entry *entry, int *errp)
  
         pmsg->packet_id       = (unsigned long )entry;
         pmsg->data_len        = data_len;
-       pmsg->timestamp_sec   = entry->skb->stamp.tv_sec;
-       pmsg->timestamp_usec  = entry->skb->stamp.tv_usec;
+       pmsg->timestamp_sec   = skb_tv_base.tv_sec + entry->skb->tstamp.off_sec;
+       pmsg->timestamp_usec  = skb_tv_base.tv_usec + entry->skb->tstamp.off_usec;
         pmsg->mark            = entry->skb->nfmark;
         pmsg->hook            = entry->info->hook;
         pmsg->hw_protocol     = entry->skb->protocol;
@@ -278,7 +278,8 @@ nlmsg_failure:
  }
  
  static int
-ipq_enqueue_packet(struct sk_buff *skb, struct nf_info *info, void *data)
+ipq_enqueue_packet(struct sk_buff *skb, struct nf_info *info, 
+                  unsigned int queuenum, void *data)
  {
         int status = -EINVAL;
         struct sk_buff *nskb;
@@ -296,13 +297,6 @@ ipq_enqueue_packet(struct sk_buff *skb, struct nf_info *info, void *data)
         entry->info = info;
         entry->skb = skb;
  
-       if (entry->info->hook == NF_IP_LOCAL_OUT) {
-               struct ipv6hdr *iph = skb->nh.ipv6h;
-
-               entry->rt_info.daddr = iph->daddr;
-               entry->rt_info.saddr = iph->saddr;
-       }
-
         nskb = ipq_build_packet_message(entry, &status);
         if (nskb == NULL)
                 goto err_out_free;
@@ -378,22 +372,11 @@ ipq_mangle_ipv6(ipq_verdict_msg_t *v, struct ipq_queue_entry *e)
                 }
                 skb_put(e->skb, diff);
         }
-       if (!skb_ip_make_writable(&e->skb, v->data_len))
+       if (!skb_make_writable(&e->skb, v->data_len))
                 return -ENOMEM;
         memcpy(e->skb->data, v->payload, v->data_len);
-       e->skb->nfcache |= NFC_ALTERED;
-
-       /*
-        * Extra routing may needed on local out, as the QUEUE target never
-        * returns control to the table.
-         * Not a nice way to cmp, but works
-        */
-       if (e->info->hook == NF_IP_LOCAL_OUT) {
-               struct ipv6hdr *iph = e->skb->nh.ipv6h;
-               if (!ipv6_addr_equal(&iph->daddr, &e->rt_info.daddr) ||
-                   !ipv6_addr_equal(&iph->saddr, &e->rt_info.saddr))
-                       return ip6_route_me_harder(e->skb);
-       }
+       e->skb->ip_summed = CHECKSUM_NONE;
+
         return 0;
  }
  
@@ -669,6 +652,11 @@ ipq_get_info(char *buffer, char **start, off_t offset, int length)
         return len;
  }
  
+static struct nf_queue_handler nfqh = {
+       .name   = "ip6_queue",
+       .outfn  = &ipq_enqueue_packet,
+};
+
  static int
  init_or_cleanup(int init)
  {
@@ -679,7 +667,8 @@ init_or_cleanup(int init)
                 goto cleanup;
  
         netlink_register_notifier(&ipq_nl_notifier);
-       ipqnl = netlink_kernel_create(NETLINK_IP6_FW, ipq_rcv_sk);
+       ipqnl = netlink_kernel_create(NETLINK_IP6_FW, 0, ipq_rcv_sk,
+                                     THIS_MODULE);
         if (ipqnl == NULL) {
                 printk(KERN_ERR "ip6_queue: failed to create netlink socket\n");
                 goto cleanup_netlink_notifier;
@@ -696,7 +685,7 @@ init_or_cleanup(int init)
         register_netdevice_notifier(&ipq_dev_notifier);
         ipq_sysctl_header = register_sysctl_table(ipq_root_table, 0);
         
-       status = nf_register_queue_handler(PF_INET6, ipq_enqueue_packet, NULL);
+       status = nf_register_queue_handler(PF_INET6, &nfqh);
         if (status < 0) {
                 printk(KERN_ERR "ip6_queue: failed to register queue handler\n");
                 goto cleanup_sysctl;
@@ -704,7 +693,7 @@ init_or_cleanup(int init)
         return status;
  
  cleanup:
-       nf_unregister_queue_handler(PF_INET6);
+       nf_unregister_queue_handlers(&nfqh);
         synchronize_net();
         ipq_flush(NF_DROP);
         
diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c

index 73034511c8db49d8b03ec2f441899a2118523af1..1cb8adb2787fc59f44c15dbf0f06ca533955e6f3 100644 (file)
--- a/net/ipv6/netfilter/ip6_tables.c
+++ b/net/ipv6/netfilter/ip6_tables.c
@@ -401,7 +401,6 @@ ip6t_do_table(struct sk_buff **pskb,
         do {
                 IP_NF_ASSERT(e);
                 IP_NF_ASSERT(back);
-               (*pskb)->nfcache |= e->nfcache;
                 if (ip6_packet_match(*pskb, indev, outdev, &e->ipv6,
                         &protoff, &offset)) {
                         struct ip6t_entry_target *t;
@@ -434,8 +433,8 @@ ip6t_do_table(struct sk_buff **pskb,
                                                          back->comefrom);
                                         continue;
                                 }
-                               if (table_base + v
-                                   != (void *)e + e->next_offset) {
+                               if (table_base + v != (void *)e + e->next_offset
+                                   && !(e->ipv6.flags & IP6T_F_GOTO)) {
                                         /* Save old back ptr in next entry */
                                         struct ip6t_entry *next
                                                 = (void *)e + e->next_offset;
diff --git a/net/ipv6/netfilter/ip6t_HL.c b/net/ipv6/netfilter/ip6t_HL.c

new file mode 100644 (file)

index 0000000..8f5549b
--- /dev/null
+++ b/net/ipv6/netfilter/ip6t_HL.c
@@ -0,0 +1,118 @@
+/* 
+ * Hop Limit modification target for ip6tables
+ * Maciej Soltysiak <solt@dns.toxicfilms.tv>
+ * Based on HW's TTL module
+ *
+ * This software is distributed under the terms of GNU GPL
+ */
+
+#include <linux/module.h>
+#include <linux/skbuff.h>
+#include <linux/ip.h>
+
+#include <linux/netfilter_ipv6/ip6_tables.h>
+#include <linux/netfilter_ipv6/ip6t_HL.h>
+
+MODULE_AUTHOR("Maciej Soltysiak <solt@dns.toxicfilms.tv>");
+MODULE_DESCRIPTION("IP tables Hop Limit modification module");
+MODULE_LICENSE("GPL");
+
+static unsigned int ip6t_hl_target(struct sk_buff **pskb, 
+                                  const struct net_device *in,
+                                  const struct net_device *out,
+                                  unsigned int hooknum,
+                                  const void *targinfo, void *userinfo)
+{
+       struct ipv6hdr *ip6h;
+       const struct ip6t_HL_info *info = targinfo;
+       u_int16_t diffs[2];
+       int new_hl;
+
+       if (!skb_make_writable(pskb, (*pskb)->len))
+               return NF_DROP;
+
+       ip6h = (*pskb)->nh.ipv6h;
+
+       switch (info->mode) {
+               case IP6T_HL_SET:
+                       new_hl = info->hop_limit;
+                       break;
+               case IP6T_HL_INC:
+                       new_hl = ip6h->hop_limit + info->hop_limit;
+                       if (new_hl > 255)
+                               new_hl = 255;
+                       break;
+               case IP6T_HL_DEC:
+                       new_hl = ip6h->hop_limit - info->hop_limit;
+                       if (new_hl < 0)
+                               new_hl = 0;
+                       break;
+               default:
+                       new_hl = ip6h->hop_limit;
+                       break;
+       }
+
+       if (new_hl != ip6h->hop_limit) {
+               diffs[0] = htons(((unsigned)ip6h->hop_limit) << 8) ^ 0xFFFF;
+               ip6h->hop_limit = new_hl;
+               diffs[1] = htons(((unsigned)ip6h->hop_limit) << 8);
+       }
+
+       return IP6T_CONTINUE;
+}
+
+static int ip6t_hl_checkentry(const char *tablename,
+               const struct ip6t_entry *e,
+               void *targinfo,
+               unsigned int targinfosize,
+               unsigned int hook_mask)
+{
+       struct ip6t_HL_info *info = targinfo;
+
+       if (targinfosize != IP6T_ALIGN(sizeof(struct ip6t_HL_info))) {
+               printk(KERN_WARNING "ip6t_HL: targinfosize %u != %Zu\n",
+                               targinfosize,
+                               IP6T_ALIGN(sizeof(struct ip6t_HL_info)));
+               return 0;       
+       }       
+
+       if (strcmp(tablename, "mangle")) {
+               printk(KERN_WARNING "ip6t_HL: can only be called from "
+                       "\"mangle\" table, not \"%s\"\n", tablename);
+               return 0;
+       }
+
+       if (info->mode > IP6T_HL_MAXMODE) {
+               printk(KERN_WARNING "ip6t_HL: invalid or unknown Mode %u\n", 
+                       info->mode);
+               return 0;
+       }
+
+       if ((info->mode != IP6T_HL_SET) && (info->hop_limit == 0)) {
+               printk(KERN_WARNING "ip6t_HL: increment/decrement doesn't "
+                       "make sense with value 0\n");
+               return 0;
+       }
+       
+       return 1;
+}
+
+static struct ip6t_target ip6t_HL = { 
+       .name           = "HL", 
+       .target         = ip6t_hl_target, 
+       .checkentry     = ip6t_hl_checkentry, 
+       .me             = THIS_MODULE
+};
+
+static int __init init(void)
+{
+       return ip6t_register_target(&ip6t_HL);
+}
+
+static void __exit fini(void)
+{
+       ip6t_unregister_target(&ip6t_HL);
+}
+
+module_init(init);
+module_exit(fini);
diff --git a/net/ipv6/netfilter/ip6t_LOG.c b/net/ipv6/netfilter/ip6t_LOG.c

index a692e26a4fa37715eecdc87bbb647efcc7f796e7..0cd1d1bd9033ced7e87f26ef5193983e24951159 100644 (file)
--- a/net/ipv6/netfilter/ip6t_LOG.c
+++ b/net/ipv6/netfilter/ip6t_LOG.c
@@ -26,10 +26,6 @@ MODULE_AUTHOR("Jan Rekorajski <baggins@pld.org.pl>");
  MODULE_DESCRIPTION("IP6 tables LOG target module");
  MODULE_LICENSE("GPL");
  
-static unsigned int nflog = 1;
-module_param(nflog, int, 0400);
-MODULE_PARM_DESC(nflog, "register as internal netfilter logging module");
- 
  struct in_device;
  #include <net/route.h>
  #include <linux/netfilter_ipv6/ip6t_LOG.h>
@@ -44,7 +40,7 @@ struct in_device;
  static DEFINE_SPINLOCK(log_lock);
  
  /* One level of recursion won't kill us */
-static void dump_packet(const struct ip6t_log_info *info,
+static void dump_packet(const struct nf_loginfo *info,
                         const struct sk_buff *skb, unsigned int ip6hoff,
                         int recurse)
  {
@@ -53,6 +49,12 @@ static void dump_packet(const struct ip6t_log_info *info,
         struct ipv6hdr _ip6h, *ih;
         unsigned int ptr;
         unsigned int hdrlen = 0;
+       unsigned int logflags;
+
+       if (info->type == NF_LOG_TYPE_LOG)
+               logflags = info->u.log.logflags;
+       else
+               logflags = NF_LOG_MASK;
  
         ih = skb_header_pointer(skb, ip6hoff, sizeof(_ip6h), &_ip6h);
         if (ih == NULL) {
@@ -84,7 +86,7 @@ static void dump_packet(const struct ip6t_log_info *info,
                 }
  
                 /* Max length: 48 "OPT (...) " */
-               if (info->logflags & IP6T_LOG_IPOPT)
+               if (logflags & IP6T_LOG_IPOPT)
                         printk("OPT ( ");
  
                 switch (currenthdr) {
@@ -119,7 +121,7 @@ static void dump_packet(const struct ip6t_log_info *info,
                 case IPPROTO_ROUTING:
                 case IPPROTO_HOPOPTS:
                         if (fragment) {
-                               if (info->logflags & IP6T_LOG_IPOPT)
+                               if (logflags & IP6T_LOG_IPOPT)
                                         printk(")");
                                 return;
                         }
@@ -127,7 +129,7 @@ static void dump_packet(const struct ip6t_log_info *info,
                         break;
                 /* Max Length */
                 case IPPROTO_AH:
-                       if (info->logflags & IP6T_LOG_IPOPT) {
+                       if (logflags & IP6T_LOG_IPOPT) {
                                 struct ip_auth_hdr _ahdr, *ah;
  
                                 /* Max length: 3 "AH " */
@@ -158,7 +160,7 @@ static void dump_packet(const struct ip6t_log_info *info,
                         hdrlen = (hp->hdrlen+2)<<2;
                         break;
                 case IPPROTO_ESP:
-                       if (info->logflags & IP6T_LOG_IPOPT) {
+                       if (logflags & IP6T_LOG_IPOPT) {
                                 struct ip_esp_hdr _esph, *eh;
  
                                 /* Max length: 4 "ESP " */
@@ -190,7 +192,7 @@ static void dump_packet(const struct ip6t_log_info *info,
                         printk("Unknown Ext Hdr %u", currenthdr);
                         return;
                 }
-               if (info->logflags & IP6T_LOG_IPOPT)
+               if (logflags & IP6T_LOG_IPOPT)
                         printk(") ");
  
                 currenthdr = hp->nexthdr;
@@ -218,7 +220,7 @@ static void dump_packet(const struct ip6t_log_info *info,
                 printk("SPT=%u DPT=%u ",
                        ntohs(th->source), ntohs(th->dest));
                 /* Max length: 30 "SEQ=4294967295 ACK=4294967295 " */
-               if (info->logflags & IP6T_LOG_TCPSEQ)
+               if (logflags & IP6T_LOG_TCPSEQ)
                         printk("SEQ=%u ACK=%u ",
                                ntohl(th->seq), ntohl(th->ack_seq));
                 /* Max length: 13 "WINDOW=65535 " */
@@ -245,7 +247,7 @@ static void dump_packet(const struct ip6t_log_info *info,
                 /* Max length: 11 "URGP=65535 " */
                 printk("URGP=%u ", ntohs(th->urg_ptr));
  
-               if ((info->logflags & IP6T_LOG_TCPOPT)
+               if ((logflags & IP6T_LOG_TCPOPT)
                     && th->doff * 4 > sizeof(struct tcphdr)) {
                         u_int8_t _opt[60 - sizeof(struct tcphdr)], *op;
                         unsigned int i;
@@ -349,7 +351,7 @@ static void dump_packet(const struct ip6t_log_info *info,
         }
  
         /* Max length: 15 "UID=4294967295 " */
-       if ((info->logflags & IP6T_LOG_UID) && recurse && skb->sk) {
+       if ((logflags & IP6T_LOG_UID) && recurse && skb->sk) {
                 read_lock_bh(&skb->sk->sk_callback_lock);
                 if (skb->sk->sk_socket && skb->sk->sk_socket->file)
                         printk("UID=%u ", skb->sk->sk_socket->file->f_uid);
@@ -357,19 +359,31 @@ static void dump_packet(const struct ip6t_log_info *info,
         }
  }
  
+static struct nf_loginfo default_loginfo = {
+       .type   = NF_LOG_TYPE_LOG,
+       .u = {
+               .log = {
+                       .level    = 0,
+                       .logflags = NF_LOG_MASK,
+               },
+       },
+};
+
  static void
-ip6t_log_packet(unsigned int hooknum,
+ip6t_log_packet(unsigned int pf,
+               unsigned int hooknum,
                 const struct sk_buff *skb,
                 const struct net_device *in,
                 const struct net_device *out,
-               const struct ip6t_log_info *loginfo,
-               const char *level_string,
+               const struct nf_loginfo *loginfo,
                 const char *prefix)
  {
+       if (!loginfo)
+               loginfo = &default_loginfo;
+
         spin_lock_bh(&log_lock);
-       printk(level_string);
-       printk("%sIN=%s OUT=%s ",
-               prefix == NULL ? loginfo->prefix : prefix,
+       printk("<%d>%sIN=%s OUT=%s ", loginfo->u.log.level, 
+               prefix,
                 in ? in->name : "",
                 out ? out->name : "");
         if (in && !out) {
@@ -416,29 +430,17 @@ ip6t_log_target(struct sk_buff **pskb,
                 void *userinfo)
  {
         const struct ip6t_log_info *loginfo = targinfo;
-       char level_string[4] = "< >";
+       struct nf_loginfo li;
+
+       li.type = NF_LOG_TYPE_LOG;
+       li.u.log.level = loginfo->level;
+       li.u.log.logflags = loginfo->logflags;
  
-       level_string[1] = '0' + (loginfo->level % 8);
-       ip6t_log_packet(hooknum, *pskb, in, out, loginfo, level_string, NULL);
+       nf_log_packet(PF_INET6, hooknum, *pskb, in, out, &li, loginfo->prefix);
  
         return IP6T_CONTINUE;
  }
  
-static void
-ip6t_logfn(unsigned int hooknum,
-          const struct sk_buff *skb,
-          const struct net_device *in,
-          const struct net_device *out,
-          const char *prefix)
-{
-       struct ip6t_log_info loginfo = {
-               .level = 0,
-               .logflags = IP6T_LOG_MASK,
-               .prefix = ""
-       };
-
-       ip6t_log_packet(hooknum, skb, in, out, &loginfo, KERN_WARNING, prefix);
-}
  
  static int ip6t_log_checkentry(const char *tablename,
                                const struct ip6t_entry *e,
@@ -475,20 +477,29 @@ static struct ip6t_target ip6t_log_reg = {
         .me             = THIS_MODULE,
  };
  
+static struct nf_logger ip6t_logger = {
+       .name           = "ip6t_LOG",
+       .logfn          = &ip6t_log_packet,
+       .me             = THIS_MODULE,
+};
+
  static int __init init(void)
  {
         if (ip6t_register_target(&ip6t_log_reg))
                 return -EINVAL;
-       if (nflog)
-               nf_log_register(PF_INET6, &ip6t_logfn);
+       if (nf_log_register(PF_INET6, &ip6t_logger) < 0) {
+               printk(KERN_WARNING "ip6t_LOG: not logging via system console "
+                      "since somebody else already registered for PF_INET6\n");
+               /* we cannot make module load fail here, since otherwise
+                * ip6tables userspace would abort */
+       }
  
         return 0;
  }
  
  static void __exit fini(void)
  {
-       if (nflog)
-               nf_log_unregister(PF_INET6, &ip6t_logfn);
+       nf_log_unregister_logger(&ip6t_logger);
         ip6t_unregister_target(&ip6t_log_reg);
  }
  
diff --git a/net/ipv6/netfilter/ip6t_MARK.c b/net/ipv6/netfilter/ip6t_MARK.c

index d09ceb05013a90346f916a44a5c77830164a4250..81924fcc5857afd71f571cdda023448ef2d7931e 100644 (file)
--- a/net/ipv6/netfilter/ip6t_MARK.c
+++ b/net/ipv6/netfilter/ip6t_MARK.c
@@ -28,10 +28,9 @@ target(struct sk_buff **pskb,
  {
         const struct ip6t_mark_target_info *markinfo = targinfo;
  
-       if((*pskb)->nfmark != markinfo->mark) {
+       if((*pskb)->nfmark != markinfo->mark)
                 (*pskb)->nfmark = markinfo->mark;
-               (*pskb)->nfcache |= NFC_ALTERED;
-       }
+
         return IP6T_CONTINUE;
  }
  
diff --git a/net/ipv6/netfilter/ip6t_NFQUEUE.c b/net/ipv6/netfilter/ip6t_NFQUEUE.c

new file mode 100644 (file)

index 0000000..c6e3730
--- /dev/null
+++ b/net/ipv6/netfilter/ip6t_NFQUEUE.c
@@ -0,0 +1,70 @@
+/* ip6tables module for using new netfilter netlink queue
+ *
+ * (C) 2005 by Harald Welte <laforge@netfilter.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as 
+ * published by the Free Software Foundation.
+ * 
+ */
+
+#include <linux/module.h>
+#include <linux/skbuff.h>
+
+#include <linux/netfilter.h>
+#include <linux/netfilter_ipv6/ip6_tables.h>
+#include <linux/netfilter_ipv4/ipt_NFQUEUE.h>
+
+MODULE_AUTHOR("Harald Welte <laforge@netfilter.org>");
+MODULE_DESCRIPTION("ip6tables NFQUEUE target");
+MODULE_LICENSE("GPL");
+
+static unsigned int
+target(struct sk_buff **pskb,
+       const struct net_device *in,
+       const struct net_device *out,
+       unsigned int hooknum,
+       const void *targinfo,
+       void *userinfo)
+{
+       const struct ipt_NFQ_info *tinfo = targinfo;
+
+       return NF_QUEUE_NR(tinfo->queuenum);
+}
+
+static int
+checkentry(const char *tablename,
+          const struct ip6t_entry *e,
+           void *targinfo,
+           unsigned int targinfosize,
+           unsigned int hook_mask)
+{
+       if (targinfosize != IP6T_ALIGN(sizeof(struct ipt_NFQ_info))) {
+               printk(KERN_WARNING "NFQUEUE: targinfosize %u != %Zu\n",
+                      targinfosize,
+                      IP6T_ALIGN(sizeof(struct ipt_NFQ_info)));
+               return 0;
+       }
+
+       return 1;
+}
+
+static struct ip6t_target ipt_NFQ_reg = {
+       .name           = "NFQUEUE",
+       .target         = target,
+       .checkentry     = checkentry,
+       .me             = THIS_MODULE,
+};
+
+static int __init init(void)
+{
+       return ip6t_register_target(&ipt_NFQ_reg);
+}
+
+static void __exit fini(void)
+{
+       ip6t_unregister_target(&ipt_NFQ_reg);
+}
+
+module_init(init);
+module_exit(fini);
diff --git a/net/ipv6/netfilter/ip6t_REJECT.c b/net/ipv6/netfilter/ip6t_REJECT.c

new file mode 100644 (file)

index 0000000..14316c3
--- /dev/null
+++ b/net/ipv6/netfilter/ip6t_REJECT.c
@@ -0,0 +1,284 @@
+/*
+ * IP6 tables REJECT target module
+ * Linux INET6 implementation
+ *
+ * Copyright (C)2003 USAGI/WIDE Project
+ *
+ * Authors:
+ *     Yasuyuki Kozakai        <yasuyuki.kozakai@toshiba.co.jp>
+ *
+ * Based on net/ipv4/netfilter/ipt_REJECT.c
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#include <linux/config.h>
+#include <linux/module.h>
+#include <linux/skbuff.h>
+#include <linux/icmpv6.h>
+#include <linux/netdevice.h>
+#include <net/ipv6.h>
+#include <net/tcp.h>
+#include <net/icmp.h>
+#include <net/ip6_checksum.h>
+#include <net/ip6_fib.h>
+#include <net/ip6_route.h>
+#include <net/flow.h>
+#include <linux/netfilter_ipv6/ip6_tables.h>
+#include <linux/netfilter_ipv6/ip6t_REJECT.h>
+
+MODULE_AUTHOR("Yasuyuki KOZAKAI <yasuyuki.kozakai@toshiba.co.jp>");
+MODULE_DESCRIPTION("IP6 tables REJECT target module");
+MODULE_LICENSE("GPL");
+
+#if 0
+#define DEBUGP printk
+#else
+#define DEBUGP(format, args...)
+#endif
+
+/* Send RST reply */
+static void send_reset(struct sk_buff *oldskb)
+{
+       struct sk_buff *nskb;
+       struct tcphdr otcph, *tcph;
+       unsigned int otcplen, hh_len;
+       int tcphoff, needs_ack;
+       struct ipv6hdr *oip6h = oldskb->nh.ipv6h, *ip6h;
+       struct dst_entry *dst = NULL;
+       u8 proto;
+       struct flowi fl;
+
+       if ((!(ipv6_addr_type(&oip6h->saddr) & IPV6_ADDR_UNICAST)) ||
+           (!(ipv6_addr_type(&oip6h->daddr) & IPV6_ADDR_UNICAST))) {
+               DEBUGP("ip6t_REJECT: addr is not unicast.\n");
+               return;
+       }
+
+       proto = oip6h->nexthdr;
+       tcphoff = ipv6_skip_exthdr(oldskb, ((u8*)(oip6h+1) - oldskb->data), &proto);
+
+       if ((tcphoff < 0) || (tcphoff > oldskb->len)) {
+               DEBUGP("ip6t_REJECT: Can't get TCP header.\n");
+               return;
+       }
+
+       otcplen = oldskb->len - tcphoff;
+
+       /* IP header checks: fragment, too short. */
+       if ((proto != IPPROTO_TCP) || (otcplen < sizeof(struct tcphdr))) {
+               DEBUGP("ip6t_REJECT: proto(%d) != IPPROTO_TCP, or too short. otcplen = %d\n",
+                       proto, otcplen);
+               return;
+       }
+
+       if (skb_copy_bits(oldskb, tcphoff, &otcph, sizeof(struct tcphdr)))
+               BUG();
+
+       /* No RST for RST. */
+       if (otcph.rst) {
+               DEBUGP("ip6t_REJECT: RST is set\n");
+               return;
+       }
+
+       /* Check checksum. */
+       if (csum_ipv6_magic(&oip6h->saddr, &oip6h->daddr, otcplen, IPPROTO_TCP,
+                           skb_checksum(oldskb, tcphoff, otcplen, 0))) {
+               DEBUGP("ip6t_REJECT: TCP checksum is invalid\n");
+               return;
+       }
+
+       memset(&fl, 0, sizeof(fl));
+       fl.proto = IPPROTO_TCP;
+       ipv6_addr_copy(&fl.fl6_src, &oip6h->daddr);
+       ipv6_addr_copy(&fl.fl6_dst, &oip6h->saddr);
+       fl.fl_ip_sport = otcph.dest;
+       fl.fl_ip_dport = otcph.source;
+       dst = ip6_route_output(NULL, &fl);
+       if (dst == NULL)
+               return;
+       if (dst->error ||
+           xfrm_lookup(&dst, &fl, NULL, 0)) {
+               dst_release(dst);
+               return;
+       }
+
+       hh_len = (dst->dev->hard_header_len + 15)&~15;
+       nskb = alloc_skb(hh_len + 15 + dst->header_len + sizeof(struct ipv6hdr)
+                        + sizeof(struct tcphdr) + dst->trailer_len,
+                        GFP_ATOMIC);
+
+       if (!nskb) {
+               if (net_ratelimit())
+                       printk("ip6t_REJECT: Can't alloc skb\n");
+               dst_release(dst);
+               return;
+       }
+
+       nskb->dst = dst;
+
+       skb_reserve(nskb, hh_len + dst->header_len);
+
+       ip6h = nskb->nh.ipv6h = (struct ipv6hdr *)
+                                       skb_put(nskb, sizeof(struct ipv6hdr));
+       ip6h->version = 6;
+       ip6h->hop_limit = dst_metric(dst, RTAX_HOPLIMIT);
+       ip6h->nexthdr = IPPROTO_TCP;
+       ip6h->payload_len = htons(sizeof(struct tcphdr));
+       ipv6_addr_copy(&ip6h->saddr, &oip6h->daddr);
+       ipv6_addr_copy(&ip6h->daddr, &oip6h->saddr);
+
+       tcph = (struct tcphdr *)skb_put(nskb, sizeof(struct tcphdr));
+       /* Truncate to length (no data) */
+       tcph->doff = sizeof(struct tcphdr)/4;
+       tcph->source = otcph.dest;
+       tcph->dest = otcph.source;
+
+       if (otcph.ack) {
+               needs_ack = 0;
+               tcph->seq = otcph.ack_seq;
+               tcph->ack_seq = 0;
+       } else {
+               needs_ack = 1;
+               tcph->ack_seq = htonl(ntohl(otcph.seq) + otcph.syn + otcph.fin
+                                     + otcplen - (otcph.doff<<2));
+               tcph->seq = 0;
+       }
+
+       /* Reset flags */
+       ((u_int8_t *)tcph)[13] = 0;
+       tcph->rst = 1;
+       tcph->ack = needs_ack;
+       tcph->window = 0;
+       tcph->urg_ptr = 0;
+       tcph->check = 0;
+
+       /* Adjust TCP checksum */
+       tcph->check = csum_ipv6_magic(&nskb->nh.ipv6h->saddr,
+                                     &nskb->nh.ipv6h->daddr,
+                                     sizeof(struct tcphdr), IPPROTO_TCP,
+                                     csum_partial((char *)tcph,
+                                                  sizeof(struct tcphdr), 0));
+
+       NF_HOOK(PF_INET6, NF_IP6_LOCAL_OUT, nskb, NULL, nskb->dst->dev,
+               dst_output);
+}
+
+static inline void
+send_unreach(struct sk_buff *skb_in, unsigned char code, unsigned int hooknum)
+{
+       if (hooknum == NF_IP6_LOCAL_OUT && skb_in->dev == NULL)
+               skb_in->dev = &loopback_dev;
+
+       icmpv6_send(skb_in, ICMPV6_DEST_UNREACH, code, 0, NULL);
+}
+
+static unsigned int reject6_target(struct sk_buff **pskb,
+                          const struct net_device *in,
+                          const struct net_device *out,
+                          unsigned int hooknum,
+                          const void *targinfo,
+                          void *userinfo)
+{
+       const struct ip6t_reject_info *reject = targinfo;
+
+       DEBUGP(KERN_DEBUG "%s: medium point\n", __FUNCTION__);
+       /* WARNING: This code causes reentry within ip6tables.
+          This means that the ip6tables jump stack is now crap.  We
+          must return an absolute verdict. --RR */
+       switch (reject->with) {
+       case IP6T_ICMP6_NO_ROUTE:
+               send_unreach(*pskb, ICMPV6_NOROUTE, hooknum);
+               break;
+       case IP6T_ICMP6_ADM_PROHIBITED:
+               send_unreach(*pskb, ICMPV6_ADM_PROHIBITED, hooknum);
+               break;
+       case IP6T_ICMP6_NOT_NEIGHBOUR:
+               send_unreach(*pskb, ICMPV6_NOT_NEIGHBOUR, hooknum);
+               break;
+       case IP6T_ICMP6_ADDR_UNREACH:
+               send_unreach(*pskb, ICMPV6_ADDR_UNREACH, hooknum);
+               break;
+       case IP6T_ICMP6_PORT_UNREACH:
+               send_unreach(*pskb, ICMPV6_PORT_UNREACH, hooknum);
+               break;
+       case IP6T_ICMP6_ECHOREPLY:
+               /* Do nothing */
+               break;
+       case IP6T_TCP_RESET:
+               send_reset(*pskb);
+               break;
+       default:
+               if (net_ratelimit())
+                       printk(KERN_WARNING "ip6t_REJECT: case %u not handled yet\n", reject->with);
+               break;
+       }
+
+       return NF_DROP;
+}
+
+static int check(const char *tablename,
+                const struct ip6t_entry *e,
+                void *targinfo,
+                unsigned int targinfosize,
+                unsigned int hook_mask)
+{
+       const struct ip6t_reject_info *rejinfo = targinfo;
+
+       if (targinfosize != IP6T_ALIGN(sizeof(struct ip6t_reject_info))) {
+               DEBUGP("ip6t_REJECT: targinfosize %u != 0\n", targinfosize);
+               return 0;
+       }
+
+       /* Only allow these for packet filtering. */
+       if (strcmp(tablename, "filter") != 0) {
+               DEBUGP("ip6t_REJECT: bad table `%s'.\n", tablename);
+               return 0;
+       }
+
+       if ((hook_mask & ~((1 << NF_IP6_LOCAL_IN)
+                          | (1 << NF_IP6_FORWARD)
+                          | (1 << NF_IP6_LOCAL_OUT))) != 0) {
+               DEBUGP("ip6t_REJECT: bad hook mask %X\n", hook_mask);
+               return 0;
+       }
+
+       if (rejinfo->with == IP6T_ICMP6_ECHOREPLY) {
+               printk("ip6t_REJECT: ECHOREPLY is not supported.\n");
+               return 0;
+       } else if (rejinfo->with == IP6T_TCP_RESET) {
+               /* Must specify that it's a TCP packet */
+               if (e->ipv6.proto != IPPROTO_TCP
+                   || (e->ipv6.invflags & IP6T_INV_PROTO)) {
+                       DEBUGP("ip6t_REJECT: TCP_RESET illegal for non-tcp\n");
+                       return 0;
+               }
+       }
+
+       return 1;
+}
+
+static struct ip6t_target ip6t_reject_reg = {
+       .name           = "REJECT",
+       .target         = reject6_target,
+       .checkentry     = check,
+       .me             = THIS_MODULE
+};
+
+static int __init init(void)
+{
+       if (ip6t_register_target(&ip6t_reject_reg))
+               return -EINVAL;
+       return 0;
+}
+
+static void __exit fini(void)
+{
+       ip6t_unregister_target(&ip6t_reject_reg);
+}
+
+module_init(init);
+module_exit(fini);
diff --git a/net/ipv6/netfilter/ip6t_owner.c b/net/ipv6/netfilter/ip6t_owner.c

index ab0e32d3de462fea928017ae728374b0c1e1a200..9b91decbfddba43f44f05ab1fe104e9c4c016000 100644 (file)
--- a/net/ipv6/netfilter/ip6t_owner.c
+++ b/net/ipv6/netfilter/ip6t_owner.c
@@ -20,71 +20,6 @@ MODULE_AUTHOR("Marc Boucher <marc@mbsi.ca>");
  MODULE_DESCRIPTION("IP6 tables owner matching module");
  MODULE_LICENSE("GPL");
  
-static int
-match_pid(const struct sk_buff *skb, pid_t pid)
-{
-       struct task_struct *p;
-       struct files_struct *files;
-       int i;
-
-       read_lock(&tasklist_lock);
-       p = find_task_by_pid(pid);
-       if (!p)
-               goto out;
-       task_lock(p);
-       files = p->files;
-       if(files) {
-               spin_lock(&files->file_lock);
-               for (i=0; i < files->max_fds; i++) {
-                       if (fcheck_files(files, i) == skb->sk->sk_socket->file) {
-                               spin_unlock(&files->file_lock);
-                               task_unlock(p);
-                               read_unlock(&tasklist_lock);
-                               return 1;
-                       }
-               }
-               spin_unlock(&files->file_lock);
-       }
-       task_unlock(p);
-out:
-       read_unlock(&tasklist_lock);
-       return 0;
-}
-
-static int
-match_sid(const struct sk_buff *skb, pid_t sid)
-{
-       struct task_struct *g, *p;
-       struct file *file = skb->sk->sk_socket->file;
-       int i, found=0;
-
-       read_lock(&tasklist_lock);
-       do_each_thread(g, p) {
-               struct files_struct *files;
-               if (p->signal->session != sid)
-                       continue;
-
-               task_lock(p);
-               files = p->files;
-               if (files) {
-                       spin_lock(&files->file_lock);
-                       for (i=0; i < files->max_fds; i++) {
-                               if (fcheck_files(files, i) == file) {
-                                       found = 1;
-                                       break;
-                               }
-                       }
-                       spin_unlock(&files->file_lock);
-               }
-               task_unlock(p);
-               if (found)
-                       goto out;
-       } while_each_thread(g, p);
-out:
-       read_unlock(&tasklist_lock);
-
-       return found;
-}
  
  static int
  match(const struct sk_buff *skb,
@@ -112,18 +47,6 @@ match(const struct sk_buff *skb,
                         return 0;
         }
  
-       if(info->match & IP6T_OWNER_PID) {
-               if (!match_pid(skb, info->pid) ^
-                   !!(info->invert & IP6T_OWNER_PID))
-                       return 0;
-       }
-
-       if(info->match & IP6T_OWNER_SID) {
-               if (!match_sid(skb, info->sid) ^
-                   !!(info->invert & IP6T_OWNER_SID))
-                       return 0;
-       }
-
         return 1;
  }
  
@@ -134,6 +57,8 @@ checkentry(const char *tablename,
             unsigned int matchsize,
             unsigned int hook_mask)
  {
+       const struct ip6t_owner_info *info = matchinfo;
+
          if (hook_mask
              & ~((1 << NF_IP6_LOCAL_OUT) | (1 << NF_IP6_POST_ROUTING))) {
                  printk("ip6t_owner: only valid for LOCAL_OUT or POST_ROUTING.\n");
@@ -142,14 +67,13 @@ checkentry(const char *tablename,
  
         if (matchsize != IP6T_ALIGN(sizeof(struct ip6t_owner_info)))
                 return 0;
-#ifdef CONFIG_SMP
-       /* files->file_lock can not be used in a BH */
-       if (((struct ip6t_owner_info *)matchinfo)->match
-           & (IP6T_OWNER_PID|IP6T_OWNER_SID)) {
-               printk("ip6t_owner: pid and sid matching is broken on SMP.\n");
+
+       if (info->match & (IP6T_OWNER_PID|IP6T_OWNER_SID)) {
+               printk("ipt_owner: pid and sid matching "
+                      "not supported anymore\n");
                 return 0;
         }
-#endif
+
         return 1;
  }
  
diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c

index e2b848ec98513ac9ababcc9f3d588c0fd93efc84..7a5863298f3f8efe49977a9a7d39edf5fba71442 100644 (file)
--- a/net/ipv6/raw.c
+++ b/net/ipv6/raw.c
@@ -49,6 +49,7 @@
  #include <net/transp_v6.h>
  #include <net/udp.h>
  #include <net/inet_common.h>
+#include <net/tcp_states.h>
  
  #include <net/rawv6.h>
  #include <net/xfrm.h>
@@ -81,7 +82,8 @@ static void raw_v6_unhash(struct sock *sk)
  
  /* Grumble... icmp and ip_input want to get at this... */
  struct sock *__raw_v6_lookup(struct sock *sk, unsigned short num,
-                            struct in6_addr *loc_addr, struct in6_addr *rmt_addr)
+                            struct in6_addr *loc_addr, struct in6_addr *rmt_addr,
+                            int dif)
  {
         struct hlist_node *node;
         int is_multicast = ipv6_addr_is_multicast(loc_addr);
@@ -94,6 +96,9 @@ struct sock *__raw_v6_lookup(struct sock *sk, unsigned short num,
                             !ipv6_addr_equal(&np->daddr, rmt_addr))
                                 continue;
  
+                       if (sk->sk_bound_dev_if && sk->sk_bound_dev_if != dif)
+                               continue;
+
                         if (!ipv6_addr_any(&np->rcv_saddr)) {
                                 if (ipv6_addr_equal(&np->rcv_saddr, loc_addr))
                                         goto found;
@@ -137,11 +142,12 @@ static __inline__ int icmpv6_filter(struct sock *sk, struct sk_buff *skb)
   *
   *     Caller owns SKB so we must make clones.
   */
-void ipv6_raw_deliver(struct sk_buff *skb, int nexthdr)
+int ipv6_raw_deliver(struct sk_buff *skb, int nexthdr)
  {
         struct in6_addr *saddr;
         struct in6_addr *daddr;
         struct sock *sk;
+       int delivered = 0;
         __u8 hash;
  
         saddr = &skb->nh.ipv6h->saddr;
@@ -160,9 +166,10 @@ void ipv6_raw_deliver(struct sk_buff *skb, int nexthdr)
         if (sk == NULL)
                 goto out;
  
-       sk = __raw_v6_lookup(sk, nexthdr, daddr, saddr);
+       sk = __raw_v6_lookup(sk, nexthdr, daddr, saddr, skb->dev->ifindex);
  
         while (sk) {
+               delivered = 1;
                 if (nexthdr != IPPROTO_ICMPV6 || !icmpv6_filter(sk, skb)) {
                         struct sk_buff *clone = skb_clone(skb, GFP_ATOMIC);
  
@@ -170,10 +177,12 @@ void ipv6_raw_deliver(struct sk_buff *skb, int nexthdr)
                         if (clone)
                                 rawv6_rcv(sk, clone);
                 }
-               sk = __raw_v6_lookup(sk_next(sk), nexthdr, daddr, saddr);
+               sk = __raw_v6_lookup(sk_next(sk), nexthdr, daddr, saddr,
+                                    skb->dev->ifindex);
         }
  out:
         read_unlock(&raw_v6_lock);
+       return delivered;
  }
  
  /* This cleans up af_inet6 a bit. -DaveM */
@@ -328,12 +337,13 @@ int rawv6_rcv(struct sock *sk, struct sk_buff *skb)
  
         if (skb->ip_summed != CHECKSUM_UNNECESSARY) {
                 if (skb->ip_summed == CHECKSUM_HW) {
+                       skb_postpull_rcsum(skb, skb->nh.raw,
+                                          skb->h.raw - skb->nh.raw);
                         skb->ip_summed = CHECKSUM_UNNECESSARY;
                         if (csum_ipv6_magic(&skb->nh.ipv6h->saddr,
                                             &skb->nh.ipv6h->daddr,
                                             skb->len, inet->num, skb->csum)) {
-                               LIMIT_NETDEBUG(
-                               printk(KERN_DEBUG "raw v6 hw csum failure.\n"));
+                               LIMIT_NETDEBUG(KERN_DEBUG "raw v6 hw csum failure.\n");
                                 skb->ip_summed = CHECKSUM_NONE;
                         }
                 }
diff --git a/net/ipv6/reassembly.c b/net/ipv6/reassembly.c

index 59e7c631787279bef822efccb9ea4e51813bcc6e..9d9e04344c777c8bd331b7b1f5e566fae38351e4 100644 (file)
--- a/net/ipv6/reassembly.c
+++ b/net/ipv6/reassembly.c
@@ -562,7 +562,7 @@ static void ip6_frag_queue(struct frag_queue *fq, struct sk_buff *skb,
         if (skb->dev)
                 fq->iif = skb->dev->ifindex;
         skb->dev = NULL;
-       fq->stamp = skb->stamp;
+       skb_get_timestamp(skb, &fq->stamp);
         fq->meat += skb->len;
         atomic_add(skb->truesize, &ip6_frag_mem);
  
@@ -664,7 +664,7 @@ static int ip6_frag_reasm(struct frag_queue *fq, struct sk_buff **skb_in,
  
         head->next = NULL;
         head->dev = dev;
-       head->stamp = fq->stamp;
+       skb_set_timestamp(head, &fq->stamp);
         head->nh.ipv6h->payload_len = htons(payload_len);
  
         *skb_in = head;
diff --git a/net/ipv6/route.c b/net/ipv6/route.c

index 878789b3122de74a29ffe4b9cf59963d35bb7b11..5d5bbb49ec7893811743477e45ed1e6a651b2115 100644 (file)
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -1372,7 +1372,7 @@ int ipv6_route_ioctl(unsigned int cmd, void __user *arg)
   *     Drop the packet on the floor
   */
  
-int ip6_pkt_discard(struct sk_buff *skb)
+static int ip6_pkt_discard(struct sk_buff *skb)
  {
         IP6_INC_STATS(IPSTATS_MIB_OUTNOROUTES);
         icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_NOROUTE, 0, skb->dev);
@@ -1380,7 +1380,7 @@ int ip6_pkt_discard(struct sk_buff *skb)
         return 0;
  }
  
-int ip6_pkt_discard_out(struct sk_buff *skb)
+static int ip6_pkt_discard_out(struct sk_buff *skb)
  {
         skb->dev = skb->dst->dev;
         return ip6_pkt_discard(skb);
@@ -1850,16 +1850,16 @@ void inet6_rt_notify(int event, struct rt6_info *rt, struct nlmsghdr *nlh,
         
         skb = alloc_skb(size, gfp_any());
         if (!skb) {
-               netlink_set_err(rtnl, 0, RTMGRP_IPV6_ROUTE, ENOBUFS);
+               netlink_set_err(rtnl, 0, RTNLGRP_IPV6_ROUTE, ENOBUFS);
                 return;
         }
         if (rt6_fill_node(skb, rt, NULL, NULL, 0, event, pid, seq, 0, 0) < 0) {
                 kfree_skb(skb);
-               netlink_set_err(rtnl, 0, RTMGRP_IPV6_ROUTE, EINVAL);
+               netlink_set_err(rtnl, 0, RTNLGRP_IPV6_ROUTE, EINVAL);
                 return;
         }
-       NETLINK_CB(skb).dst_groups = RTMGRP_IPV6_ROUTE;
-       netlink_broadcast(rtnl, skb, 0, RTMGRP_IPV6_ROUTE, gfp_any());
+       NETLINK_CB(skb).dst_group = RTNLGRP_IPV6_ROUTE;
+       netlink_broadcast(rtnl, skb, 0, RTNLGRP_IPV6_ROUTE, gfp_any());
  }
  
  /*
@@ -1960,8 +1960,6 @@ static int rt6_proc_info(char *buffer, char **start, off_t offset, int length)
         return arg.len;
  }
  
-extern struct rt6_statistics rt6_stats;
-
  static int rt6_stats_seq_show(struct seq_file *seq, void *v)
  {
         seq_printf(seq, "%04x %04x %04x %04x %04x %04x %04x\n",
diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c

index b788f55e139b80756e23ef024335c3d2024d9f63..c3123c9e1a8e9510511d14ca4ac58f5f63d09c46 100644 (file)
--- a/net/ipv6/sit.c
+++ b/net/ipv6/sit.c
@@ -195,7 +195,6 @@ static struct ip_tunnel * ipip6_tunnel_locate(struct ip_tunnel_parm *parms, int
         dev_hold(dev);
  
         ipip6_tunnel_link(nt);
-       /* Do not decrement MOD_USE_COUNT here. */
         return nt;
  
  failed:
@@ -771,7 +770,7 @@ static int ipip6_tunnel_init(struct net_device *dev)
         return 0;
  }
  
-int __init ipip6_fb_tunnel_init(struct net_device *dev)
+static int __init ipip6_fb_tunnel_init(struct net_device *dev)
  {
         struct ip_tunnel *tunnel = dev->priv;
         struct iphdr *iph = &tunnel->parms.iph;
@@ -794,10 +793,28 @@ static struct net_protocol sit_protocol = {
         .err_handler    =       ipip6_err,
  };
  
+static void __exit sit_destroy_tunnels(void)
+{
+       int prio;
+
+       for (prio = 1; prio < 4; prio++) {
+               int h;
+               for (h = 0; h < HASH_SIZE; h++) {
+                       struct ip_tunnel *t;
+                       while ((t = tunnels[prio][h]) != NULL)
+                               unregister_netdevice(t->dev);
+               }
+       }
+}
+
  void __exit sit_cleanup(void)
  {
         inet_del_protocol(&sit_protocol, IPPROTO_IPV6);
-       unregister_netdev(ipip6_fb_tunnel_dev);
+
+       rtnl_lock();
+       sit_destroy_tunnels();
+       unregister_netdevice(ipip6_fb_tunnel_dev);
+       rtnl_unlock();
  }
  
  int __init sit_init(void)
diff --git a/net/ipv6/sysctl_net_ipv6.c b/net/ipv6/sysctl_net_ipv6.c

index 3a18e0e6ffed8793e953ed231b0e73abff972661..8eff9fa1e983312b38926e1d5e3286d7474f84b6 100644 (file)
--- a/net/ipv6/sysctl_net_ipv6.c
+++ b/net/ipv6/sysctl_net_ipv6.c
@@ -14,9 +14,6 @@
  #include <net/ipv6.h>
  #include <net/addrconf.h>
  
-extern ctl_table ipv6_route_table[];
-extern ctl_table ipv6_icmp_table[];
-
  #ifdef CONFIG_SYSCTL
  
  static ctl_table ipv6_table[] = {
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c

index f6e288dc116ede93c2f755075c641303ca4bca47..794734f1d230b3b5a5449eb013f4d0545bcb7871 100644 (file)
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -47,6 +47,7 @@
  
  #include <net/tcp.h>
  #include <net/ndisc.h>
+#include <net/inet6_hashtables.h>
  #include <net/ipv6.h>
  #include <net/transp_v6.h>
  #include <net/addrconf.h>
@@ -75,34 +76,11 @@ static int  tcp_v6_xmit(struct sk_buff *skb, int ipfragok);
  static struct tcp_func ipv6_mapped;
  static struct tcp_func ipv6_specific;
  
-/* I have no idea if this is a good hash for v6 or not. -DaveM */
-static __inline__ int tcp_v6_hashfn(struct in6_addr *laddr, u16 lport,
-                                   struct in6_addr *faddr, u16 fport)
+static inline int tcp_v6_bind_conflict(const struct sock *sk,
+                                      const struct inet_bind_bucket *tb)
  {
-       int hashent = (lport ^ fport);
-
-       hashent ^= (laddr->s6_addr32[3] ^ faddr->s6_addr32[3]);
-       hashent ^= hashent>>16;
-       hashent ^= hashent>>8;
-       return (hashent & (tcp_ehash_size - 1));
-}
-
-static __inline__ int tcp_v6_sk_hashfn(struct sock *sk)
-{
-       struct inet_sock *inet = inet_sk(sk);
-       struct ipv6_pinfo *np = inet6_sk(sk);
-       struct in6_addr *laddr = &np->rcv_saddr;
-       struct in6_addr *faddr = &np->daddr;
-       __u16 lport = inet->num;
-       __u16 fport = inet->dport;
-       return tcp_v6_hashfn(laddr, lport, faddr, fport);
-}
-
-static inline int tcp_v6_bind_conflict(struct sock *sk,
-                                      struct tcp_bind_bucket *tb)
-{
-       struct sock *sk2;
-       struct hlist_node *node;
+       const struct sock *sk2;
+       const struct hlist_node *node;
  
         /* We must walk the whole port owner list in this case. -DaveM */
         sk_for_each_bound(sk2, node, &tb->owners) {
@@ -126,8 +104,8 @@ static inline int tcp_v6_bind_conflict(struct sock *sk,
   */
  static int tcp_v6_get_port(struct sock *sk, unsigned short snum)
  {
-       struct tcp_bind_hashbucket *head;
-       struct tcp_bind_bucket *tb;
+       struct inet_bind_hashbucket *head;
+       struct inet_bind_bucket *tb;
         struct hlist_node *node;
         int ret;
  
@@ -138,37 +116,42 @@ static int tcp_v6_get_port(struct sock *sk, unsigned short snum)
                 int remaining = (high - low) + 1;
                 int rover;
  
-               spin_lock(&tcp_portalloc_lock);
-               if (tcp_port_rover < low)
+               spin_lock(&tcp_hashinfo.portalloc_lock);
+               if (tcp_hashinfo.port_rover < low)
                         rover = low;
                 else
-                       rover = tcp_port_rover;
+                       rover = tcp_hashinfo.port_rover;
                 do {    rover++;
                         if (rover > high)
                                 rover = low;
-                       head = &tcp_bhash[tcp_bhashfn(rover)];
+                       head = &tcp_hashinfo.bhash[inet_bhashfn(rover, tcp_hashinfo.bhash_size)];
                         spin_lock(&head->lock);
-                       tb_for_each(tb, node, &head->chain)
+                       inet_bind_bucket_for_each(tb, node, &head->chain)
                                 if (tb->port == rover)
                                         goto next;
                         break;
                 next:
                         spin_unlock(&head->lock);
                 } while (--remaining > 0);
-               tcp_port_rover = rover;
-               spin_unlock(&tcp_portalloc_lock);
-
-               /* Exhausted local port range during search? */
+               tcp_hashinfo.port_rover = rover;
+               spin_unlock(&tcp_hashinfo.portalloc_lock);
+
+               /* Exhausted local port range during search?  It is not
+                * possible for us to be holding one of the bind hash
+                * locks if this test triggers, because if 'remaining'
+                * drops to zero, we broke out of the do/while loop at
+                * the top level, not from the 'break;' statement.
+                */
                 ret = 1;
-               if (remaining <= 0)
+               if (unlikely(remaining <= 0))
                         goto fail;
  
                 /* OK, here is the one we will use. */
                 snum = rover;
         } else {
-               head = &tcp_bhash[tcp_bhashfn(snum)];
+               head = &tcp_hashinfo.bhash[inet_bhashfn(snum, tcp_hashinfo.bhash_size)];
                 spin_lock(&head->lock);
-               tb_for_each(tb, node, &head->chain)
+               inet_bind_bucket_for_each(tb, node, &head->chain)
                         if (tb->port == snum)
                                 goto tb_found;
         }
@@ -187,8 +170,11 @@ tb_found:
         }
  tb_not_found:
         ret = 1;
-       if (!tb && (tb = tcp_bucket_create(head, snum)) == NULL)
-               goto fail_unlock;
+       if (tb == NULL) {
+               tb = inet_bind_bucket_create(tcp_hashinfo.bind_bucket_cachep, head, snum);
+               if (tb == NULL)
+                       goto fail_unlock;
+       }
         if (hlist_empty(&tb->owners)) {
                 if (sk->sk_reuse && sk->sk_state != TCP_LISTEN)
                         tb->fastreuse = 1;
@@ -199,9 +185,9 @@ tb_not_found:
                 tb->fastreuse = 0;
  
  success:
-       if (!tcp_sk(sk)->bind_hash)
-               tcp_bind_hash(sk, tb, snum);
-       BUG_TRAP(tcp_sk(sk)->bind_hash == tb);
+       if (!inet_csk(sk)->icsk_bind_hash)
+               inet_bind_hash(sk, tb, snum);
+       BUG_TRAP(inet_csk(sk)->icsk_bind_hash == tb);
         ret = 0;
  
  fail_unlock:
@@ -219,13 +205,13 @@ static __inline__ void __tcp_v6_hash(struct sock *sk)
         BUG_TRAP(sk_unhashed(sk));
  
         if (sk->sk_state == TCP_LISTEN) {
-               list = &tcp_listening_hash[tcp_sk_listen_hashfn(sk)];
-               lock = &tcp_lhash_lock;
-               tcp_listen_wlock();
+               list = &tcp_hashinfo.listening_hash[inet_sk_listen_hashfn(sk)];
+               lock = &tcp_hashinfo.lhash_lock;
+               inet_listen_wlock(&tcp_hashinfo);
         } else {
-               sk->sk_hashent = tcp_v6_sk_hashfn(sk);
-               list = &tcp_ehash[sk->sk_hashent].chain;
-               lock = &tcp_ehash[sk->sk_hashent].lock;
+               sk->sk_hashent = inet6_sk_ehashfn(sk, tcp_hashinfo.ehash_size);
+               list = &tcp_hashinfo.ehash[sk->sk_hashent].chain;
+               lock = &tcp_hashinfo.ehash[sk->sk_hashent].lock;
                 write_lock(lock);
         }
  
@@ -250,131 +236,11 @@ static void tcp_v6_hash(struct sock *sk)
         }
  }
  
-static struct sock *tcp_v6_lookup_listener(struct in6_addr *daddr, unsigned short hnum, int dif)
-{
-       struct sock *sk;
-       struct hlist_node *node;
-       struct sock *result = NULL;
-       int score, hiscore;
-
-       hiscore=0;
-       read_lock(&tcp_lhash_lock);
-       sk_for_each(sk, node, &tcp_listening_hash[tcp_lhashfn(hnum)]) {
-               if (inet_sk(sk)->num == hnum && sk->sk_family == PF_INET6) {
-                       struct ipv6_pinfo *np = inet6_sk(sk);
-                       
-                       score = 1;
-                       if (!ipv6_addr_any(&np->rcv_saddr)) {
-                               if (!ipv6_addr_equal(&np->rcv_saddr, daddr))
-                                       continue;
-                               score++;
-                       }
-                       if (sk->sk_bound_dev_if) {
-                               if (sk->sk_bound_dev_if != dif)
-                                       continue;
-                               score++;
-                       }
-                       if (score == 3) {
-                               result = sk;
-                               break;
-                       }
-                       if (score > hiscore) {
-                               hiscore = score;
-                               result = sk;
-                       }
-               }
-       }
-       if (result)
-               sock_hold(result);
-       read_unlock(&tcp_lhash_lock);
-       return result;
-}
-
-/* Sockets in TCP_CLOSE state are _always_ taken out of the hash, so
- * we need not check it for TCP lookups anymore, thanks Alexey. -DaveM
- *
- * The sockhash lock must be held as a reader here.
- */
-
-static inline struct sock *__tcp_v6_lookup_established(struct in6_addr *saddr, u16 sport,
-                                                      struct in6_addr *daddr, u16 hnum,
-                                                      int dif)
-{
-       struct tcp_ehash_bucket *head;
-       struct sock *sk;
-       struct hlist_node *node;
-       __u32 ports = TCP_COMBINED_PORTS(sport, hnum);
-       int hash;
-
-       /* Optimize here for direct hit, only listening connections can
-        * have wildcards anyways.
-        */
-       hash = tcp_v6_hashfn(daddr, hnum, saddr, sport);
-       head = &tcp_ehash[hash];
-       read_lock(&head->lock);
-       sk_for_each(sk, node, &head->chain) {
-               /* For IPV6 do the cheaper port and family tests first. */
-               if(TCP_IPV6_MATCH(sk, saddr, daddr, ports, dif))
-                       goto hit; /* You sunk my battleship! */
-       }
-       /* Must check for a TIME_WAIT'er before going to listener hash. */
-       sk_for_each(sk, node, &(head + tcp_ehash_size)->chain) {
-               /* FIXME: acme: check this... */
-               struct tcp_tw_bucket *tw = (struct tcp_tw_bucket *)sk;
-
-               if(*((__u32 *)&(tw->tw_dport))  == ports        &&
-                  sk->sk_family                == PF_INET6) {
-                       if(ipv6_addr_equal(&tw->tw_v6_daddr, saddr)     &&
-                          ipv6_addr_equal(&tw->tw_v6_rcv_saddr, daddr) &&
-                          (!sk->sk_bound_dev_if || sk->sk_bound_dev_if == dif))
-                               goto hit;
-               }
-       }
-       read_unlock(&head->lock);
-       return NULL;
-
-hit:
-       sock_hold(sk);
-       read_unlock(&head->lock);
-       return sk;
-}
-
-
-static inline struct sock *__tcp_v6_lookup(struct in6_addr *saddr, u16 sport,
-                                          struct in6_addr *daddr, u16 hnum,
-                                          int dif)
-{
-       struct sock *sk;
-
-       sk = __tcp_v6_lookup_established(saddr, sport, daddr, hnum, dif);
-
-       if (sk)
-               return sk;
-
-       return tcp_v6_lookup_listener(daddr, hnum, dif);
-}
-
-inline struct sock *tcp_v6_lookup(struct in6_addr *saddr, u16 sport,
-                                 struct in6_addr *daddr, u16 dport,
-                                 int dif)
-{
-       struct sock *sk;
-
-       local_bh_disable();
-       sk = __tcp_v6_lookup(saddr, sport, daddr, ntohs(dport), dif);
-       local_bh_enable();
-
-       return sk;
-}
-
-EXPORT_SYMBOL_GPL(tcp_v6_lookup);
-
-
  /*
   * Open request hash tables.
   */
  
-static u32 tcp_v6_synq_hash(struct in6_addr *raddr, u16 rport, u32 rnd)
+static u32 tcp_v6_synq_hash(const struct in6_addr *raddr, const u16 rport, const u32 rnd)
  {
         u32 a, b, c;
  
@@ -394,14 +260,15 @@ static u32 tcp_v6_synq_hash(struct in6_addr *raddr, u16 rport, u32 rnd)
         return c & (TCP_SYNQ_HSIZE - 1);
  }
  
-static struct request_sock *tcp_v6_search_req(struct tcp_sock *tp,
+static struct request_sock *tcp_v6_search_req(const struct sock *sk,
                                               struct request_sock ***prevp,
                                               __u16 rport,
                                               struct in6_addr *raddr,
                                               struct in6_addr *laddr,
                                               int iif)
  {
-       struct listen_sock *lopt = tp->accept_queue.listen_opt;
+       const struct inet_connection_sock *icsk = inet_csk(sk);
+       struct listen_sock *lopt = icsk->icsk_accept_queue.listen_opt;
         struct request_sock *req, **prev;  
  
         for (prev = &lopt->syn_table[tcp_v6_synq_hash(raddr, rport, lopt->hash_rnd)];
@@ -446,44 +313,48 @@ static __u32 tcp_v6_init_sequence(struct sock *sk, struct sk_buff *skb)
         }
  }
  
-static int __tcp_v6_check_established(struct sock *sk, __u16 lport,
-                                     struct tcp_tw_bucket **twp)
+static int __tcp_v6_check_established(struct sock *sk, const __u16 lport,
+                                     struct inet_timewait_sock **twp)
  {
         struct inet_sock *inet = inet_sk(sk);
-       struct ipv6_pinfo *np = inet6_sk(sk);
-       struct in6_addr *daddr = &np->rcv_saddr;
-       struct in6_addr *saddr = &np->daddr;
-       int dif = sk->sk_bound_dev_if;
-       u32 ports = TCP_COMBINED_PORTS(inet->dport, lport);
-       int hash = tcp_v6_hashfn(daddr, inet->num, saddr, inet->dport);
-       struct tcp_ehash_bucket *head = &tcp_ehash[hash];
+       const struct ipv6_pinfo *np = inet6_sk(sk);
+       const struct in6_addr *daddr = &np->rcv_saddr;
+       const struct in6_addr *saddr = &np->daddr;
+       const int dif = sk->sk_bound_dev_if;
+       const u32 ports = INET_COMBINED_PORTS(inet->dport, lport);
+       const int hash = inet6_ehashfn(daddr, inet->num, saddr, inet->dport,
+                                      tcp_hashinfo.ehash_size);
+       struct inet_ehash_bucket *head = &tcp_hashinfo.ehash[hash];
         struct sock *sk2;
-       struct hlist_node *node;
-       struct tcp_tw_bucket *tw;
+       const struct hlist_node *node;
+       struct inet_timewait_sock *tw;
  
         write_lock(&head->lock);
  
         /* Check TIME-WAIT sockets first. */
-       sk_for_each(sk2, node, &(head + tcp_ehash_size)->chain) {
-               tw = (struct tcp_tw_bucket*)sk2;
+       sk_for_each(sk2, node, &(head + tcp_hashinfo.ehash_size)->chain) {
+               const struct tcp6_timewait_sock *tcp6tw = tcp6_twsk(sk2);
+
+               tw = inet_twsk(sk2);
  
                 if(*((__u32 *)&(tw->tw_dport))  == ports        &&
                    sk2->sk_family               == PF_INET6     &&
-                  ipv6_addr_equal(&tw->tw_v6_daddr, saddr)     &&
-                  ipv6_addr_equal(&tw->tw_v6_rcv_saddr, daddr) &&
+                  ipv6_addr_equal(&tcp6tw->tw_v6_daddr, saddr) &&
+                  ipv6_addr_equal(&tcp6tw->tw_v6_rcv_saddr, daddr)     &&
                    sk2->sk_bound_dev_if == sk->sk_bound_dev_if) {
+                       const struct tcp_timewait_sock *tcptw = tcp_twsk(sk2);
                         struct tcp_sock *tp = tcp_sk(sk);
  
-                       if (tw->tw_ts_recent_stamp &&
-                           (!twp || (sysctl_tcp_tw_reuse &&
-                                     xtime.tv_sec - 
-                                     tw->tw_ts_recent_stamp > 1))) {
+                       if (tcptw->tw_ts_recent_stamp &&
+                           (!twp ||
+                            (sysctl_tcp_tw_reuse &&
+                             xtime.tv_sec - tcptw->tw_ts_recent_stamp > 1))) {
                                 /* See comment in tcp_ipv4.c */
-                               tp->write_seq = tw->tw_snd_nxt + 65535 + 2;
+                               tp->write_seq = tcptw->tw_snd_nxt + 65535 + 2;
                                 if (!tp->write_seq)
                                         tp->write_seq = 1;
-                               tp->rx_opt.ts_recent = tw->tw_ts_recent;
-                               tp->rx_opt.ts_recent_stamp = tw->tw_ts_recent_stamp;
+                               tp->rx_opt.ts_recent       = tcptw->tw_ts_recent;
+                               tp->rx_opt.ts_recent_stamp = tcptw->tw_ts_recent_stamp;
                                 sock_hold(sk2);
                                 goto unique;
                         } else
@@ -494,7 +365,7 @@ static int __tcp_v6_check_established(struct sock *sk, __u16 lport,
  
         /* And established part... */
         sk_for_each(sk2, node, &head->chain) {
-               if(TCP_IPV6_MATCH(sk2, saddr, daddr, ports, dif))
+               if (INET6_MATCH(sk2, saddr, daddr, ports, dif))
                         goto not_unique;
         }
  
@@ -510,10 +381,10 @@ unique:
                 NET_INC_STATS_BH(LINUX_MIB_TIMEWAITRECYCLED);
         } else if (tw) {
                 /* Silly. Should hash-dance instead... */
-               tcp_tw_deschedule(tw);
+               inet_twsk_deschedule(tw, &tcp_death_row);
                 NET_INC_STATS_BH(LINUX_MIB_TIMEWAITRECYCLED);
  
-               tcp_tw_put(tw);
+               inet_twsk_put(tw);
         }
         return 0;
  
@@ -535,8 +406,8 @@ static inline u32 tcpv6_port_offset(const struct sock *sk)
  static int tcp_v6_hash_connect(struct sock *sk)
  {
         unsigned short snum = inet_sk(sk)->num;
-       struct tcp_bind_hashbucket *head;
-       struct tcp_bind_bucket *tb;
+       struct inet_bind_hashbucket *head;
+       struct inet_bind_bucket *tb;
         int ret;
  
         if (!snum) {
@@ -548,19 +419,19 @@ static int tcp_v6_hash_connect(struct sock *sk)
                 static u32 hint;
                 u32 offset = hint + tcpv6_port_offset(sk);
                 struct hlist_node *node;
-               struct tcp_tw_bucket *tw = NULL;
+               struct inet_timewait_sock *tw = NULL;
  
                 local_bh_disable();
                 for (i = 1; i <= range; i++) {
                         port = low + (i + offset) % range;
-                       head = &tcp_bhash[tcp_bhashfn(port)];
+                       head = &tcp_hashinfo.bhash[inet_bhashfn(port, tcp_hashinfo.bhash_size)];
                         spin_lock(&head->lock);
  
                         /* Does not bother with rcv_saddr checks,
                          * because the established check is already
                          * unique enough.
                          */
-                       tb_for_each(tb, node, &head->chain) {
+                       inet_bind_bucket_for_each(tb, node, &head->chain) {
                                 if (tb->port == port) {
                                         BUG_TRAP(!hlist_empty(&tb->owners));
                                         if (tb->fastreuse >= 0)
@@ -573,7 +444,7 @@ static int tcp_v6_hash_connect(struct sock *sk)
                                 }
                         }
  
-                       tb = tcp_bucket_create(head, port);
+                       tb = inet_bind_bucket_create(tcp_hashinfo.bind_bucket_cachep, head, port);
                         if (!tb) {
                                 spin_unlock(&head->lock);
                                 break;
@@ -592,7 +463,7 @@ ok:
                 hint += i;
  
                 /* Head lock still held and bh's disabled */
-               tcp_bind_hash(sk, tb, port);
+               inet_bind_hash(sk, tb, port);
                 if (sk_unhashed(sk)) {
                         inet_sk(sk)->sport = htons(port);
                         __tcp_v6_hash(sk);
@@ -600,16 +471,16 @@ ok:
                 spin_unlock(&head->lock);
  
                 if (tw) {
-                       tcp_tw_deschedule(tw);
-                       tcp_tw_put(tw);
+                       inet_twsk_deschedule(tw, &tcp_death_row);
+                       inet_twsk_put(tw);
                 }
  
                 ret = 0;
                 goto out;
         }
  
-       head  = &tcp_bhash[tcp_bhashfn(snum)];
-       tb  = tcp_sk(sk)->bind_hash;
+       head = &tcp_hashinfo.bhash[inet_bhashfn(snum, tcp_hashinfo.bhash_size)];
+       tb   = inet_csk(sk)->icsk_bind_hash;
         spin_lock_bh(&head->lock);
  
         if (sk_head(&tb->owners) == sk && !sk->sk_bind_node.next) {
@@ -626,11 +497,6 @@ out:
         }
  }
  
-static __inline__ int tcp_v6_iif(struct sk_buff *skb)
-{
-       return IP6CB(skb)->iif;
-}
-
  static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr, 
                           int addr_len)
  {
@@ -822,14 +688,15 @@ static void tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
                 int type, int code, int offset, __u32 info)
  {
         struct ipv6hdr *hdr = (struct ipv6hdr*)skb->data;
-       struct tcphdr *th = (struct tcphdr *)(skb->data+offset);
+       const struct tcphdr *th = (struct tcphdr *)(skb->data+offset);
         struct ipv6_pinfo *np;
         struct sock *sk;
         int err;
         struct tcp_sock *tp; 
         __u32 seq;
  
-       sk = tcp_v6_lookup(&hdr->daddr, th->dest, &hdr->saddr, th->source, skb->dev->ifindex);
+       sk = inet6_lookup(&tcp_hashinfo, &hdr->daddr, th->dest, &hdr->saddr,
+                         th->source, skb->dev->ifindex);
  
         if (sk == NULL) {
                 ICMP6_INC_STATS_BH(__in6_dev_get(skb->dev), ICMP6_MIB_INERRORS);
@@ -837,7 +704,7 @@ static void tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
         }
  
         if (sk->sk_state == TCP_TIME_WAIT) {
-               tcp_tw_put((struct tcp_tw_bucket*)sk);
+               inet_twsk_put((struct inet_timewait_sock *)sk);
                 return;
         }
  
@@ -915,8 +782,8 @@ static void tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
                 if (sock_owned_by_user(sk))
                         goto out;
  
-               req = tcp_v6_search_req(tp, &prev, th->dest, &hdr->daddr,
-                                       &hdr->saddr, tcp_v6_iif(skb));
+               req = tcp_v6_search_req(sk, &prev, th->dest, &hdr->daddr,
+                                       &hdr->saddr, inet6_iif(skb));
                 if (!req)
                         goto out;
  
@@ -930,7 +797,7 @@ static void tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
                         goto out;
                 }
  
-               tcp_synq_drop(sk, req, prev);
+               inet_csk_reqsk_queue_drop(sk, req, prev);
                 goto out;
  
         case TCP_SYN_SENT:
@@ -1127,7 +994,7 @@ static void tcp_v6_send_reset(struct sk_buff *skb)
                                     buff->csum);
  
         fl.proto = IPPROTO_TCP;
-       fl.oif = tcp_v6_iif(skb);
+       fl.oif = inet6_iif(skb);
         fl.fl_ip_dport = t1->dest;
         fl.fl_ip_sport = t1->source;
  
@@ -1196,7 +1063,7 @@ static void tcp_v6_send_ack(struct sk_buff *skb, u32 seq, u32 ack, u32 win, u32
                                     buff->csum);
  
         fl.proto = IPPROTO_TCP;
-       fl.oif = tcp_v6_iif(skb);
+       fl.oif = inet6_iif(skb);
         fl.fl_ip_dport = t1->dest;
         fl.fl_ip_sport = t1->source;
  
@@ -1215,12 +1082,14 @@ static void tcp_v6_send_ack(struct sk_buff *skb, u32 seq, u32 ack, u32 win, u32
  
  static void tcp_v6_timewait_ack(struct sock *sk, struct sk_buff *skb)
  {
-       struct tcp_tw_bucket *tw = (struct tcp_tw_bucket *)sk;
+       struct inet_timewait_sock *tw = inet_twsk(sk);
+       const struct tcp_timewait_sock *tcptw = tcp_twsk(sk);
  
-       tcp_v6_send_ack(skb, tw->tw_snd_nxt, tw->tw_rcv_nxt,
-                       tw->tw_rcv_wnd >> tw->tw_rcv_wscale, tw->tw_ts_recent);
+       tcp_v6_send_ack(skb, tcptw->tw_snd_nxt, tcptw->tw_rcv_nxt,
+                       tcptw->tw_rcv_wnd >> tw->tw_rcv_wscale,
+                       tcptw->tw_ts_recent);
  
-       tcp_tw_put(tw);
+       inet_twsk_put(tw);
  }
  
  static void tcp_v6_reqsk_send_ack(struct sk_buff *skb, struct request_sock *req)
@@ -1232,28 +1101,25 @@ static void tcp_v6_reqsk_send_ack(struct sk_buff *skb, struct request_sock *req)
  static struct sock *tcp_v6_hnd_req(struct sock *sk,struct sk_buff *skb)
  {
         struct request_sock *req, **prev;
-       struct tcphdr *th = skb->h.th;
-       struct tcp_sock *tp = tcp_sk(sk);
+       const struct tcphdr *th = skb->h.th;
         struct sock *nsk;
  
         /* Find possible connection requests. */
-       req = tcp_v6_search_req(tp, &prev, th->source, &skb->nh.ipv6h->saddr,
-                               &skb->nh.ipv6h->daddr, tcp_v6_iif(skb));
+       req = tcp_v6_search_req(sk, &prev, th->source, &skb->nh.ipv6h->saddr,
+                               &skb->nh.ipv6h->daddr, inet6_iif(skb));
         if (req)
                 return tcp_check_req(sk, skb, req, prev);
  
-       nsk = __tcp_v6_lookup_established(&skb->nh.ipv6h->saddr,
-                                         th->source,
-                                         &skb->nh.ipv6h->daddr,
-                                         ntohs(th->dest),
-                                         tcp_v6_iif(skb));
+       nsk = __inet6_lookup_established(&tcp_hashinfo, &skb->nh.ipv6h->saddr,
+                                        th->source, &skb->nh.ipv6h->daddr,
+                                        ntohs(th->dest), inet6_iif(skb));
  
         if (nsk) {
                 if (nsk->sk_state != TCP_TIME_WAIT) {
                         bh_lock_sock(nsk);
                         return nsk;
                 }
-               tcp_tw_put((struct tcp_tw_bucket*)nsk);
+               inet_twsk_put((struct inet_timewait_sock *)nsk);
                 return NULL;
         }
  
@@ -1266,12 +1132,12 @@ static struct sock *tcp_v6_hnd_req(struct sock *sk,struct sk_buff *skb)
  
  static void tcp_v6_synq_add(struct sock *sk, struct request_sock *req)
  {
-       struct tcp_sock *tp = tcp_sk(sk);
-       struct listen_sock *lopt = tp->accept_queue.listen_opt;
-       u32 h = tcp_v6_synq_hash(&tcp6_rsk(req)->rmt_addr, inet_rsk(req)->rmt_port, lopt->hash_rnd);
+       struct inet_connection_sock *icsk = inet_csk(sk);
+       struct listen_sock *lopt = icsk->icsk_accept_queue.listen_opt;
+       const u32 h = tcp_v6_synq_hash(&tcp6_rsk(req)->rmt_addr, inet_rsk(req)->rmt_port, lopt->hash_rnd);
  
-       reqsk_queue_hash_req(&tp->accept_queue, h, req, TCP_TIMEOUT_INIT);
-       tcp_synq_added(sk);
+       reqsk_queue_hash_req(&icsk->icsk_accept_queue, h, req, TCP_TIMEOUT_INIT);
+       inet_csk_reqsk_queue_added(sk, TCP_TIMEOUT_INIT);
  }
  
  
@@ -1296,13 +1162,13 @@ static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb)
         /*
          *      There are no SYN attacks on IPv6, yet...        
          */
-       if (tcp_synq_is_full(sk) && !isn) {
+       if (inet_csk_reqsk_queue_is_full(sk) && !isn) {
                 if (net_ratelimit())
                         printk(KERN_INFO "TCPv6: dropping request, synflood is possible\n");
                 goto drop;              
         }
  
-       if (sk_acceptq_is_full(sk) && tcp_synq_young(sk) > 1)
+       if (sk_acceptq_is_full(sk) && inet_csk_reqsk_queue_young(sk) > 1)
                 goto drop;
  
         req = reqsk_alloc(&tcp6_request_sock_ops);
@@ -1334,7 +1200,7 @@ static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb)
         /* So that link locals have meaning */
         if (!sk->sk_bound_dev_if &&
             ipv6_addr_type(&treq->rmt_addr) & IPV6_ADDR_LINKLOCAL)
-               treq->iif = tcp_v6_iif(skb);
+               treq->iif = inet6_iif(skb);
  
         if (isn == 0) 
                 isn = tcp_v6_init_sequence(sk,skb);
@@ -1399,15 +1265,14 @@ static struct sock * tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
                 newsk->sk_backlog_rcv = tcp_v4_do_rcv;
                 newnp->pktoptions  = NULL;
                 newnp->opt         = NULL;
-               newnp->mcast_oif   = tcp_v6_iif(skb);
+               newnp->mcast_oif   = inet6_iif(skb);
                 newnp->mcast_hops  = skb->nh.ipv6h->hop_limit;
  
-               /* Charge newly allocated IPv6 socket. Though it is mapped,
-                * it is IPv6 yet.
+               /*
+                * No need to charge this sock to the relevant IPv6 refcnt debug socks count
+                * here, tcp_create_openreq_child now does this for us, see the comment in
+                * that function for the gory details. -acme
                  */
-#ifdef INET_REFCNT_DEBUG
-               atomic_inc(&inet6_sock_nr);
-#endif
  
                 /* It is tricky place. Until this moment IPv4 tcp
                    worked with IPv6 af_tcp.af_specific.
@@ -1462,10 +1327,11 @@ static struct sock * tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
         if (newsk == NULL)
                 goto out;
  
-       /* Charge newly allocated IPv6 socket */
-#ifdef INET_REFCNT_DEBUG
-       atomic_inc(&inet6_sock_nr);
-#endif
+       /*
+        * No need to charge this sock to the relevant IPv6 refcnt debug socks
+        * count here, tcp_create_openreq_child now does this for us, see the
+        * comment in that function for the gory details. -acme
+        */
  
         ip6_dst_store(newsk, dst, NULL);
         newsk->sk_route_caps = dst->dev->features &
@@ -1504,7 +1370,7 @@ static struct sock * tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
                         skb_set_owner_r(newnp->pktoptions, newsk);
         }
         newnp->opt        = NULL;
-       newnp->mcast_oif  = tcp_v6_iif(skb);
+       newnp->mcast_oif  = inet6_iif(skb);
         newnp->mcast_hops = skb->nh.ipv6h->hop_limit;
  
         /* Clone native IPv6 options from listening socket (if any)
@@ -1531,7 +1397,7 @@ static struct sock * tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
         newinet->daddr = newinet->saddr = newinet->rcv_saddr = LOOPBACK4_IPV6;
  
         __tcp_v6_hash(newsk);
-       tcp_inherit_port(sk, newsk);
+       inet_inherit_port(&tcp_hashinfo, sk, newsk);
  
         return newsk;
  
@@ -1552,7 +1418,7 @@ static int tcp_v6_checksum_init(struct sk_buff *skb)
                 if (!tcp_v6_check(skb->h.th,skb->len,&skb->nh.ipv6h->saddr,
                                   &skb->nh.ipv6h->daddr,skb->csum))
                         return 0;
-               LIMIT_NETDEBUG(printk(KERN_DEBUG "hw tcp v6 csum failed\n"));
+               LIMIT_NETDEBUG(KERN_DEBUG "hw tcp v6 csum failed\n");
         }
         if (skb->len <= 76) {
                 if (tcp_v6_check(skb->h.th,skb->len,&skb->nh.ipv6h->saddr,
@@ -1679,7 +1545,7 @@ ipv6_pktoptions:
         if (TCP_SKB_CB(opt_skb)->end_seq == tp->rcv_nxt &&
             !((1 << sk->sk_state) & (TCPF_CLOSE | TCPF_LISTEN))) {
                 if (np->rxopt.bits.rxinfo)
-                       np->mcast_oif = tcp_v6_iif(opt_skb);
+                       np->mcast_oif = inet6_iif(opt_skb);
                 if (np->rxopt.bits.rxhlim)
                         np->mcast_hops = opt_skb->nh.ipv6h->hop_limit;
                 if (ipv6_opt_accepted(sk, opt_skb)) {
@@ -1734,8 +1600,9 @@ static int tcp_v6_rcv(struct sk_buff **pskb, unsigned int *nhoffp)
         TCP_SKB_CB(skb)->flags = ipv6_get_dsfield(skb->nh.ipv6h);
         TCP_SKB_CB(skb)->sacked = 0;
  
-       sk = __tcp_v6_lookup(&skb->nh.ipv6h->saddr, th->source,
-                            &skb->nh.ipv6h->daddr, ntohs(th->dest), tcp_v6_iif(skb));
+       sk = __inet6_lookup(&tcp_hashinfo, &skb->nh.ipv6h->saddr, th->source,
+                           &skb->nh.ipv6h->daddr, ntohs(th->dest),
+                           inet6_iif(skb));
  
         if (!sk)
                 goto no_tcp_socket;
@@ -1790,26 +1657,29 @@ discard_and_relse:
  
  do_time_wait:
         if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb)) {
-               tcp_tw_put((struct tcp_tw_bucket *) sk);
+               inet_twsk_put((struct inet_timewait_sock *)sk);
                 goto discard_it;
         }
  
         if (skb->len < (th->doff<<2) || tcp_checksum_complete(skb)) {
                 TCP_INC_STATS_BH(TCP_MIB_INERRS);
-               tcp_tw_put((struct tcp_tw_bucket *) sk);
+               inet_twsk_put((struct inet_timewait_sock *)sk);
                 goto discard_it;
         }
  
-       switch(tcp_timewait_state_process((struct tcp_tw_bucket *)sk,
-                                         skb, th, skb->len)) {
+       switch (tcp_timewait_state_process((struct inet_timewait_sock *)sk,
+                                          skb, th)) {
         case TCP_TW_SYN:
         {
                 struct sock *sk2;
  
-               sk2 = tcp_v6_lookup_listener(&skb->nh.ipv6h->daddr, ntohs(th->dest), tcp_v6_iif(skb));
+               sk2 = inet6_lookup_listener(&tcp_hashinfo,
+                                           &skb->nh.ipv6h->daddr,
+                                           ntohs(th->dest), inet6_iif(skb));
                 if (sk2 != NULL) {
-                       tcp_tw_deschedule((struct tcp_tw_bucket *)sk);
-                       tcp_tw_put((struct tcp_tw_bucket *)sk);
+                       struct inet_timewait_sock *tw = inet_twsk(sk);
+                       inet_twsk_deschedule(tw, &tcp_death_row);
+                       inet_twsk_put(tw);
                         sk = sk2;
                         goto process;
                 }
@@ -1978,7 +1848,7 @@ static struct tcp_func ipv6_specific = {
  static struct tcp_func ipv6_mapped = {
         .queue_xmit     =       ip_queue_xmit,
         .send_check     =       tcp_v4_send_check,
-       .rebuild_header =       tcp_v4_rebuild_header,
+       .rebuild_header =       inet_sk_rebuild_header,
         .conn_request   =       tcp_v6_conn_request,
         .syn_recv_sock  =       tcp_v6_syn_recv_sock,
         .remember_stamp =       tcp_v4_remember_stamp,
@@ -1997,13 +1867,14 @@ static struct tcp_func ipv6_mapped = {
   */
  static int tcp_v6_init_sock(struct sock *sk)
  {
+       struct inet_connection_sock *icsk = inet_csk(sk);
         struct tcp_sock *tp = tcp_sk(sk);
  
         skb_queue_head_init(&tp->out_of_order_queue);
         tcp_init_xmit_timers(sk);
         tcp_prequeue_init(tp);
  
-       tp->rto  = TCP_TIMEOUT_INIT;
+       icsk->icsk_rto = TCP_TIMEOUT_INIT;
         tp->mdev = TCP_TIMEOUT_INIT;
  
         /* So many TCP implementations out there (incorrectly) count the
@@ -2025,7 +1896,7 @@ static int tcp_v6_init_sock(struct sock *sk)
         sk->sk_state = TCP_CLOSE;
  
         tp->af_specific = &ipv6_specific;
-       tp->ca_ops = &tcp_init_congestion_ops;
+       icsk->icsk_ca_ops = &tcp_init_congestion_ops;
         sk->sk_write_space = sk_stream_write_space;
         sock_set_flag(sk, SOCK_USE_WRITE_QUEUE);
  
@@ -2039,8 +1910,6 @@ static int tcp_v6_init_sock(struct sock *sk)
  
  static int tcp_v6_destroy_sock(struct sock *sk)
  {
-       extern int tcp_v4_destroy_sock(struct sock *sk);
-
         tcp_v4_destroy_sock(sk);
         return inet6_destroy_sock(sk);
  }
@@ -2086,18 +1955,20 @@ static void get_tcp6_sock(struct seq_file *seq, struct sock *sp, int i)
         unsigned long timer_expires;
         struct inet_sock *inet = inet_sk(sp);
         struct tcp_sock *tp = tcp_sk(sp);
+       const struct inet_connection_sock *icsk = inet_csk(sp);
         struct ipv6_pinfo *np = inet6_sk(sp);
  
         dest  = &np->daddr;
         src   = &np->rcv_saddr;
         destp = ntohs(inet->dport);
         srcp  = ntohs(inet->sport);
-       if (tp->pending == TCP_TIME_RETRANS) {
+
+       if (icsk->icsk_pending == ICSK_TIME_RETRANS) {
                 timer_active    = 1;
-               timer_expires   = tp->timeout;
-       } else if (tp->pending == TCP_TIME_PROBE0) {
+               timer_expires   = icsk->icsk_timeout;
+       } else if (icsk->icsk_pending == ICSK_TIME_PROBE0) {
                 timer_active    = 4;
-               timer_expires   = tp->timeout;
+               timer_expires   = icsk->icsk_timeout;
         } else if (timer_pending(&sp->sk_timer)) {
                 timer_active    = 2;
                 timer_expires   = sp->sk_timer.expires;
@@ -2118,28 +1989,31 @@ static void get_tcp6_sock(struct seq_file *seq, struct sock *sp, int i)
                    tp->write_seq-tp->snd_una, tp->rcv_nxt-tp->copied_seq,
                    timer_active,
                    jiffies_to_clock_t(timer_expires - jiffies),
-                  tp->retransmits,
+                  icsk->icsk_retransmits,
                    sock_i_uid(sp),
-                  tp->probes_out,
+                  icsk->icsk_probes_out,
                    sock_i_ino(sp),
                    atomic_read(&sp->sk_refcnt), sp,
-                  tp->rto, tp->ack.ato, (tp->ack.quick<<1)|tp->ack.pingpong,
+                  icsk->icsk_rto,
+                  icsk->icsk_ack.ato,
+                  (icsk->icsk_ack.quick << 1 ) | icsk->icsk_ack.pingpong,
                    tp->snd_cwnd, tp->snd_ssthresh>=0xFFFF?-1:tp->snd_ssthresh
                    );
  }
  
  static void get_timewait6_sock(struct seq_file *seq, 
-                              struct tcp_tw_bucket *tw, int i)
+                              struct inet_timewait_sock *tw, int i)
  {
         struct in6_addr *dest, *src;
         __u16 destp, srcp;
+       struct tcp6_timewait_sock *tcp6tw = tcp6_twsk((struct sock *)tw);
         int ttd = tw->tw_ttd - jiffies;
  
         if (ttd < 0)
                 ttd = 0;
  
-       dest  = &tw->tw_v6_daddr;
-       src   = &tw->tw_v6_rcv_saddr;
+       dest = &tcp6tw->tw_v6_daddr;
+       src  = &tcp6tw->tw_v6_rcv_saddr;
         destp = ntohs(tw->tw_dport);
         srcp  = ntohs(tw->tw_sport);
  
@@ -2214,7 +2088,7 @@ struct proto tcpv6_prot = {
         .close                  = tcp_close,
         .connect                = tcp_v6_connect,
         .disconnect             = tcp_disconnect,
-       .accept                 = tcp_accept,
+       .accept                 = inet_csk_accept,
         .ioctl                  = tcp_ioctl,
         .init                   = tcp_v6_init_sock,
         .destroy                = tcp_v6_destroy_sock,
@@ -2231,11 +2105,13 @@ struct proto tcpv6_prot = {
         .sockets_allocated      = &tcp_sockets_allocated,
         .memory_allocated       = &tcp_memory_allocated,
         .memory_pressure        = &tcp_memory_pressure,
+       .orphan_count           = &tcp_orphan_count,
         .sysctl_mem             = sysctl_tcp_mem,
         .sysctl_wmem            = sysctl_tcp_wmem,
         .sysctl_rmem            = sysctl_tcp_rmem,
         .max_header             = MAX_TCP_HEADER,
         .obj_size               = sizeof(struct tcp6_sock),
+       .twsk_obj_size          = sizeof(struct tcp6_timewait_sock),
         .rsk_prot               = &tcp6_request_sock_ops,
  };
  
@@ -2245,8 +2121,6 @@ static struct inet6_protocol tcpv6_protocol = {
         .flags          =       INET6_PROTO_NOPOLICY|INET6_PROTO_FINAL,
  };
  
-extern struct proto_ops inet6_stream_ops;
-
  static struct inet_protosw tcpv6_protosw = {
         .type           =       SOCK_STREAM,
         .protocol       =       IPPROTO_TCP,
diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c

index eff050ac7049601e6cc040c14210ac678b16e788..390d750449ce6f801dd3ed906dd68fa7bcc20126 100644 (file)
--- a/net/ipv6/udp.c
+++ b/net/ipv6/udp.c
@@ -51,6 +51,7 @@
  #include <net/udp.h>
  #include <net/raw.h>
  #include <net/inet_common.h>
+#include <net/tcp_states.h>
  
  #include <net/ip6_checksum.h>
  #include <net/xfrm.h>
@@ -58,7 +59,7 @@
  #include <linux/proc_fs.h>
  #include <linux/seq_file.h>
  
-DEFINE_SNMP_STAT(struct udp_mib, udp_stats_in6);
+DEFINE_SNMP_STAT(struct udp_mib, udp_stats_in6) __read_mostly;
  
  /* Grrr, addr_type already calculated by caller, but I don't want
   * to add some silly "cookie" argument to this method just for that.
@@ -477,8 +478,7 @@ static int udpv6_rcv(struct sk_buff **pskb, unsigned int *nhoffp)
                 /* RFC 2460 section 8.1 says that we SHOULD log
                    this error. Well, it is reasonable.
                  */
-               LIMIT_NETDEBUG(
-                       printk(KERN_INFO "IPv6: udp checksum is 0\n"));
+               LIMIT_NETDEBUG(KERN_INFO "IPv6: udp checksum is 0\n");
                 goto discard;
         }
  
@@ -493,7 +493,7 @@ static int udpv6_rcv(struct sk_buff **pskb, unsigned int *nhoffp)
         if (skb->ip_summed==CHECKSUM_HW) {
                 skb->ip_summed = CHECKSUM_UNNECESSARY;
                 if (csum_ipv6_magic(saddr, daddr, ulen, IPPROTO_UDP, skb->csum)) {
-                       LIMIT_NETDEBUG(printk(KERN_DEBUG "udp v6 hw csum failure.\n"));
+                       LIMIT_NETDEBUG(KERN_DEBUG "udp v6 hw csum failure.\n");
                         skb->ip_summed = CHECKSUM_NONE;
                 }
         }
@@ -825,7 +825,7 @@ back_from_confirm:
                 /* ... which is an evident application bug. --ANK */
                 release_sock(sk);
  
-               LIMIT_NETDEBUG(printk(KERN_DEBUG "udp cork app bug 2\n"));
+               LIMIT_NETDEBUG(KERN_DEBUG "udp cork app bug 2\n");
                 err = -EINVAL;
                 goto out;
         }
@@ -1054,8 +1054,6 @@ struct proto udpv6_prot = {
         .obj_size =     sizeof(struct udp6_sock),
  };
  
-extern struct proto_ops inet6_dgram_ops;
-
  static struct inet_protosw udpv6_protosw = {
         .type =      SOCK_DGRAM,
         .protocol =  IPPROTO_UDP,
diff --git a/net/ipv6/xfrm6_tunnel.c b/net/ipv6/xfrm6_tunnel.c

index 60c26c87277e7fa63a34f9b8671cbdd79efb1fd4..fbef7826a74f610556d02b11b9332f11a823b980 100644 (file)
--- a/net/ipv6/xfrm6_tunnel.c
+++ b/net/ipv6/xfrm6_tunnel.c
@@ -79,7 +79,7 @@ static u32 xfrm6_tunnel_spi;
  #define XFRM6_TUNNEL_SPI_MIN   1
  #define XFRM6_TUNNEL_SPI_MAX   0xffffffff
  
-static kmem_cache_t *xfrm6_tunnel_spi_kmem;
+static kmem_cache_t *xfrm6_tunnel_spi_kmem __read_mostly;
  
  #define XFRM6_TUNNEL_SPI_BYADDR_HSIZE 256
  #define XFRM6_TUNNEL_SPI_BYSPI_HSIZE 256
diff --git a/net/ipx/af_ipx.c b/net/ipx/af_ipx.c

index 5a27e5df5886e91e4acd3448c995c2b07c79d275..34b3bb86840912da024646f070170f9c933c650e 100644 (file)
--- a/net/ipx/af_ipx.c
+++ b/net/ipx/af_ipx.c
@@ -44,7 +44,6 @@
  #include <linux/socket.h>
  #include <linux/sockios.h>
  #include <linux/string.h>
-#include <linux/tcp.h>
  #include <linux/types.h>
  #include <linux/termios.h>
  
@@ -52,6 +51,7 @@
  #include <net/p8022.h>
  #include <net/psnap.h>
  #include <net/sock.h>
+#include <net/tcp_states.h>
  
  #include <asm/uaccess.h>
  
@@ -1627,7 +1627,7 @@ out:
         return rc;
  }
  
-static int ipx_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt)
+static int ipx_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt, struct net_device *orig_dev)
  {
         /* NULL here for pt means the packet was looped back */
         struct ipx_interface *intrfc;
@@ -1796,8 +1796,8 @@ static int ipx_recvmsg(struct kiocb *iocb, struct socket *sock,
                                      copied);
         if (rc)
                 goto out_free;
-       if (skb->stamp.tv_sec)
-               sk->sk_stamp = skb->stamp;
+       if (skb->tstamp.off_sec)
+               skb_get_timestamp(skb, &sk->sk_stamp);
  
         msg->msg_namelen = sizeof(*sipx);
  
@@ -1940,9 +1940,7 @@ static struct notifier_block ipx_dev_notifier = {
  };
  
  extern struct datalink_proto *make_EII_client(void);
-extern struct datalink_proto *make_8023_client(void);
  extern void destroy_EII_client(struct datalink_proto *);
-extern void destroy_8023_client(struct datalink_proto *);
  
  static unsigned char ipx_8022_type = 0xE0;
  static unsigned char ipx_snap_id[5] = { 0x0, 0x0, 0x0, 0x81, 0x37 };
diff --git a/net/ipx/ipx_proc.c b/net/ipx/ipx_proc.c

index b6761913445ad478026c31f749c102d3ff855529..1f73d9ea434da9938b7750725cad19f561647aa7 100644 (file)
--- a/net/ipx/ipx_proc.c
+++ b/net/ipx/ipx_proc.c
@@ -10,7 +10,7 @@
  #include <linux/proc_fs.h>
  #include <linux/spinlock.h>
  #include <linux/seq_file.h>
-#include <linux/tcp.h>
+#include <net/tcp_states.h>
  #include <net/ipx.h>
  
  static __inline__ struct ipx_interface *ipx_get_interface_idx(loff_t pos)
diff --git a/net/irda/af_irda.c b/net/irda/af_irda.c

index 92c6e8d4e731b196a58af56ab5803723db63f2e1..6f92f9c62990ab4854178cc67e72c2a71b2f64f7 100644 (file)
--- a/net/irda/af_irda.c
+++ b/net/irda/af_irda.c
@@ -56,7 +56,7 @@
  #include <asm/uaccess.h>
  
  #include <net/sock.h>
-#include <net/tcp.h>
+#include <net/tcp_states.h>
  
  #include <net/irda/af_irda.h>
  
diff --git a/net/irda/irlap_frame.c b/net/irda/irlap_frame.c

index 6dafbb43b5296922f62cacf8c596d9987d835c0f..3e9a06abbdd08d8e223169bb4f0155b6dd409138 100644 (file)
--- a/net/irda/irlap_frame.c
+++ b/net/irda/irlap_frame.c
@@ -988,9 +988,6 @@ void irlap_resend_rejected_frames(struct irlap_cb *self, int command)
                         IRDA_DEBUG(0, "%s(), unable to copy\n", __FUNCTION__);
                         return;
                 }
-               /* Unlink tx_skb from list */
-               tx_skb->next = tx_skb->prev = NULL;
-               tx_skb->list = NULL;
  
                 /* Clear old Nr field + poll bit */
                 tx_skb->data[1] &= 0x0f;
@@ -1063,9 +1060,6 @@ void irlap_resend_rejected_frame(struct irlap_cb *self, int command)
                         IRDA_DEBUG(0, "%s(), unable to copy\n", __FUNCTION__);
                         return;
                 }
-               /* Unlink tx_skb from list */
-               tx_skb->next = tx_skb->prev = NULL;
-               tx_skb->list = NULL;
  
                 /* Clear old Nr field + poll bit */
                 tx_skb->data[1] &= 0x0f;
@@ -1309,7 +1303,7 @@ static void irlap_recv_test_frame(struct irlap_cb *self, struct sk_buff *skb,
   * Jean II
   */
  int irlap_driver_rcv(struct sk_buff *skb, struct net_device *dev,
-                    struct packet_type *ptype)
+                    struct packet_type *ptype, struct net_device *orig_dev)
  {
         struct irlap_info info;
         struct irlap_cb *self;
diff --git a/net/irda/irlmp.c b/net/irda/irlmp.c

index 7a4a4d7fbe66bf7774d91e9e04c1a42c2bf5b495..c19e9ce05a3a117d9a404140c735ea8a5c766d47 100644 (file)
--- a/net/irda/irlmp.c
+++ b/net/irda/irlmp.c
@@ -53,7 +53,6 @@ struct irlmp_cb *irlmp = NULL;
  /* These can be altered by the sysctl interface */
  int  sysctl_discovery         = 0;
  int  sysctl_discovery_timeout = 3; /* 3 seconds by default */
-EXPORT_SYMBOL(sysctl_discovery_timeout);
  int  sysctl_discovery_slots   = 6; /* 6 slots by default */
  int  sysctl_lap_keepalive_time = LM_IDLE_TIMEOUT * 1000 / HZ;
  char sysctl_devname[65];
@@ -67,7 +66,6 @@ const char *irlmp_reasons[] = {
         "LM_INIT_DISCONNECT",
         "ERROR, NOT USED",
  };
-EXPORT_SYMBOL(irlmp_reasons);
  
  /*
   * Function irlmp_init (void)
@@ -675,7 +673,6 @@ struct lsap_cb *irlmp_dup(struct lsap_cb *orig, void *instance)
  
         return new;
  }
-EXPORT_SYMBOL(irlmp_dup);
  
  /*
   * Function irlmp_disconnect_request (handle, userdata)
diff --git a/net/irda/irmod.c b/net/irda/irmod.c

index 6ffaed4544e963bbd010b515977b3d2586cc5cf8..634901dd156fc7b2154234f423417324032515a0 100644 (file)
--- a/net/irda/irmod.c
+++ b/net/irda/irmod.c
@@ -54,7 +54,7 @@ extern int  irsock_init(void);
  extern void irsock_cleanup(void);
  /* irlap_frame.c */
  extern int  irlap_driver_rcv(struct sk_buff *, struct net_device *, 
-                            struct packet_type *);
+                            struct packet_type *, struct net_device *);
  
  /*
   * Module parameters
diff --git a/net/irda/irnet/irnet.h b/net/irda/irnet/irnet.h

index 9004f7349a7604d9043cb6322a10f4d5c53ade51..b391cb3893d4852c57374eef08b95bd79daac3b1 100644 (file)
--- a/net/irda/irnet/irnet.h
+++ b/net/irda/irnet/irnet.h
@@ -517,9 +517,6 @@ extern int
         irda_irnet_init(void);          /* Initialise IrDA part of IrNET */
  extern void
         irda_irnet_cleanup(void);       /* Teardown IrDA part of IrNET */
-/* ---------------------------- MODULE ---------------------------- */
-extern int
-       irnet_init(void);               /* Initialise IrNET module */
  
  /**************************** VARIABLES ****************************/
  
diff --git a/net/irda/irnet/irnet_ppp.c b/net/irda/irnet/irnet_ppp.c

index f8f984bb99225631db2ca2c1edef438ec6bb4cb8..e53bf9e0053ee831d46639a4e70917c0436af585 100644 (file)
--- a/net/irda/irnet/irnet_ppp.c
+++ b/net/irda/irnet/irnet_ppp.c
@@ -1107,7 +1107,7 @@ ppp_irnet_cleanup(void)
  /*
   * Module main entry point
   */
-int __init
+static int __init
  irnet_init(void)
  {
    int err;
diff --git a/net/irda/irqueue.c b/net/irda/irqueue.c

index b0dd3ea35999698d2d577de56d1bef0ff35b248c..1ba8c7106639604f9ed91f8acff0afdc2ed5f170 100644 (file)
--- a/net/irda/irqueue.c
+++ b/net/irda/irqueue.c
@@ -822,7 +822,6 @@ void* hashbin_find_next( hashbin_t* hashbin, long hashv, const char* name,
  
         return entry;
  }
-EXPORT_SYMBOL(hashbin_find_next);
  
  /*
   * Function hashbin_get_first (hashbin)
diff --git a/net/lapb/lapb_subr.c b/net/lapb/lapb_subr.c

index 5de05a0bc0ffe11ab5e59e2edc93302d06df866f..8b5eefd70f03c3f686f14fc84947ac5e7ce90ab3 100644 (file)
--- a/net/lapb/lapb_subr.c
+++ b/net/lapb/lapb_subr.c
@@ -78,7 +78,7 @@ void lapb_requeue_frames(struct lapb_cb *lapb)
                 if (!skb_prev)
                         skb_queue_head(&lapb->write_queue, skb);
                 else
-                       skb_append(skb_prev, skb);
+                       skb_append(skb_prev, skb, &lapb->write_queue);
                 skb_prev = skb;
         }
  }
diff --git a/net/llc/af_llc.c b/net/llc/af_llc.c

index 20b4cfebd74ca543fc894b075802226d912011b3..66f55e514b568d2682f0c859e0db1e90cd252cf2 100644 (file)
--- a/net/llc/af_llc.c
+++ b/net/llc/af_llc.c
@@ -23,13 +23,13 @@
  #include <linux/config.h>
  #include <linux/kernel.h>
  #include <linux/module.h>
-#include <linux/tcp.h>
  #include <linux/rtnetlink.h>
  #include <linux/init.h>
  #include <net/llc.h>
  #include <net/llc_sap.h>
  #include <net/llc_pdu.h>
  #include <net/llc_conn.h>
+#include <net/tcp_states.h>
  
  /* remember: uninitialized global data is zeroed because its in .bss */
  static u16 llc_ui_sap_last_autoport = LLC_SAP_DYN_START;
@@ -714,7 +714,7 @@ static int llc_ui_recvmsg(struct kiocb *iocb, struct socket *sock,
         if (uaddr)
                 memcpy(uaddr, llc_ui_skb_cb(skb), sizeof(*uaddr));
         msg->msg_namelen = sizeof(*uaddr);
-       if (!skb->list) {
+       if (!skb->next) {
  dgram_free:
                 kfree_skb(skb);
         }
diff --git a/net/llc/llc_conn.c b/net/llc/llc_conn.c

index eba812a9c69c6e0e0de271b5ec275d696da8e52d..4c644bc70eaec7b4a9f3f11d55f603afd72922b0 100644 (file)
--- a/net/llc/llc_conn.c
+++ b/net/llc/llc_conn.c
@@ -16,7 +16,7 @@
  #include <net/llc_sap.h>
  #include <net/llc_conn.h>
  #include <net/sock.h>
-#include <linux/tcp.h>
+#include <net/tcp_states.h>
  #include <net/llc_c_ev.h>
  #include <net/llc_c_ac.h>
  #include <net/llc_c_st.h>
@@ -71,7 +71,11 @@ int llc_conn_state_process(struct sock *sk, struct sk_buff *skb)
  
         if (!ev->ind_prim && !ev->cfm_prim) {
                 /* indicate or confirm not required */
-               if (!skb->list)
+               /* XXX this is not very pretty, perhaps we should store
+                * XXX indicate/confirm-needed state in the llc_conn_state_ev
+                * XXX control block of the SKB instead? -DaveM
+                */
+               if (!skb->next)
                         goto out_kfree_skb;
                 goto out_skb_put;
         }
diff --git a/net/llc/llc_core.c b/net/llc/llc_core.c

index 5ff02c080a0bfdbfee1bbacda96843c51843fc89..9727455bf0e7907d4285c42e2744bb1e4d52aed9 100644 (file)
--- a/net/llc/llc_core.c
+++ b/net/llc/llc_core.c
@@ -103,7 +103,8 @@ out:
  struct llc_sap *llc_sap_open(unsigned char lsap,
                              int (*func)(struct sk_buff *skb,
                                          struct net_device *dev,
-                                        struct packet_type *pt))
+                                        struct packet_type *pt,
+                                        struct net_device *orig_dev))
  {
         struct llc_sap *sap = llc_sap_find(lsap);
  
diff --git a/net/llc/llc_if.c b/net/llc/llc_if.c

index 0f9fc48aeaf916318a2198283ab007b08e96df27..0f84f66018e4ca2f63c09ee7b0a8ecdbfb429c19 100644 (file)
--- a/net/llc/llc_if.c
+++ b/net/llc/llc_if.c
@@ -15,7 +15,6 @@
  #include <linux/module.h>
  #include <linux/kernel.h>
  #include <linux/netdevice.h>
-#include <linux/tcp.h>
  #include <asm/errno.h>
  #include <net/llc_if.h>
  #include <net/llc_sap.h>
@@ -25,6 +24,7 @@
  #include <net/llc_c_ev.h>
  #include <net/llc_c_ac.h>
  #include <net/llc_c_st.h>
+#include <net/tcp_states.h>
  
  u8 llc_mac_null_var[IFHWADDRLEN];
  
diff --git a/net/llc/llc_input.c b/net/llc/llc_input.c

index 4da6976efc9c716b1460af2549a3a36f98849f71..13b46240b7a108148657aa736a03a52b7e540b41 100644 (file)
--- a/net/llc/llc_input.c
+++ b/net/llc/llc_input.c
@@ -132,7 +132,7 @@ static inline int llc_fixup_skb(struct sk_buff *skb)
   *     data now), it queues this frame in the connection's backlog.
   */
  int llc_rcv(struct sk_buff *skb, struct net_device *dev,
-           struct packet_type *pt)
+           struct packet_type *pt, struct net_device *orig_dev)
  {
         struct llc_sap *sap;
         struct llc_pdu_sn *pdu;
@@ -165,7 +165,7 @@ int llc_rcv(struct sk_buff *skb, struct net_device *dev,
          * LLC functionality
          */
         if (sap->rcv_func) {
-               sap->rcv_func(skb, dev, pt);
+               sap->rcv_func(skb, dev, pt, orig_dev);
                 goto out;
         }
         dest = llc_pdu_type(skb);
diff --git a/net/llc/llc_sap.c b/net/llc/llc_sap.c

index 965c94eb4bbc7b93867f4f4bf707adef0a613552..34228ef149854175c7ecc65a6bc7d008c05421b5 100644 (file)
--- a/net/llc/llc_sap.c
+++ b/net/llc/llc_sap.c
@@ -21,7 +21,7 @@
  #include <net/llc_s_ev.h>
  #include <net/llc_s_st.h>
  #include <net/sock.h>
-#include <linux/tcp.h>
+#include <net/tcp_states.h>
  #include <linux/llc.h>
  
  /**
diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig

new file mode 100644 (file)

index 0000000..8296b38
--- /dev/null
+++ b/net/netfilter/Kconfig
@@ -0,0 +1,24 @@
+config NETFILTER_NETLINK
+       tristate "Netfilter netlink interface"
+       help
+         If this option is enabled, the kernel will include support
+         for the new netfilter netlink interface.
+
+config NETFILTER_NETLINK_QUEUE
+       tristate "Netfilter NFQUEUE over NFNETLINK interface"
+       depends on NETFILTER_NETLINK
+       help
+         If this option isenabled, the kernel will include support
+         for queueing packets via NFNETLINK.
+         
+config NETFILTER_NETLINK_LOG
+       tristate "Netfilter LOG over NFNETLINK interface"
+       depends on NETFILTER_NETLINK
+       help
+         If this option is enabled, the kernel will include support
+         for logging packets via NFNETLINK.
+
+         This obsoletes the existing ipt_ULOG and ebg_ulog mechanisms,
+         and is also scheduled to replace the old syslog-based ipt_LOG
+         and ip6t_LOG modules.
+
diff --git a/net/netfilter/Makefile b/net/netfilter/Makefile

new file mode 100644 (file)

index 0000000..b3b44f8
--- /dev/null
+++ b/net/netfilter/Makefile
@@ -0,0 +1,7 @@
+netfilter-objs := core.o nf_log.o nf_queue.o nf_sockopt.o
+
+obj-$(CONFIG_NETFILTER) = netfilter.o
+
+obj-$(CONFIG_NETFILTER_NETLINK) += nfnetlink.o
+obj-$(CONFIG_NETFILTER_NETLINK_QUEUE) += nfnetlink_queue.o
+obj-$(CONFIG_NETFILTER_NETLINK_LOG) += nfnetlink_log.o
diff --git a/net/netfilter/core.c b/net/netfilter/core.c

new file mode 100644 (file)

index 0000000..1ceb1a6
--- /dev/null
+++ b/net/netfilter/core.c
@@ -0,0 +1,216 @@
+/* netfilter.c: look after the filters for various protocols. 
+ * Heavily influenced by the old firewall.c by David Bonn and Alan Cox.
+ *
+ * Thanks to Rob `CmdrTaco' Malda for not influencing this code in any
+ * way.
+ *
+ * Rusty Russell (C)2000 -- This code is GPL.
+ *
+ * February 2000: Modified by James Morris to have 1 queue per protocol.
+ * 15-Mar-2000:   Added NF_REPEAT --RR.
+ * 08-May-2003:          Internal logging interface added by Jozsef Kadlecsik.
+ */
+#include <linux/config.h>
+#include <linux/kernel.h>
+#include <linux/netfilter.h>
+#include <net/protocol.h>
+#include <linux/init.h>
+#include <linux/skbuff.h>
+#include <linux/wait.h>
+#include <linux/module.h>
+#include <linux/interrupt.h>
+#include <linux/if.h>
+#include <linux/netdevice.h>
+#include <linux/inetdevice.h>
+#include <linux/proc_fs.h>
+#include <net/sock.h>
+
+#include "nf_internals.h"
+
+/* In this code, we can be waiting indefinitely for userspace to
+ * service a packet if a hook returns NF_QUEUE.  We could keep a count
+ * of skbuffs queued for userspace, and not deregister a hook unless
+ * this is zero, but that sucks.  Now, we simply check when the
+ * packets come back: if the hook is gone, the packet is discarded. */
+struct list_head nf_hooks[NPROTO][NF_MAX_HOOKS];
+EXPORT_SYMBOL(nf_hooks);
+static DEFINE_SPINLOCK(nf_hook_lock);
+
+int nf_register_hook(struct nf_hook_ops *reg)
+{
+       struct list_head *i;
+
+       spin_lock_bh(&nf_hook_lock);
+       list_for_each(i, &nf_hooks[reg->pf][reg->hooknum]) {
+               if (reg->priority < ((struct nf_hook_ops *)i)->priority)
+                       break;
+       }
+       list_add_rcu(&reg->list, i->prev);
+       spin_unlock_bh(&nf_hook_lock);
+
+       synchronize_net();
+       return 0;
+}
+EXPORT_SYMBOL(nf_register_hook);
+
+void nf_unregister_hook(struct nf_hook_ops *reg)
+{
+       spin_lock_bh(&nf_hook_lock);
+       list_del_rcu(&reg->list);
+       spin_unlock_bh(&nf_hook_lock);
+
+       synchronize_net();
+}
+EXPORT_SYMBOL(nf_unregister_hook);
+
+unsigned int nf_iterate(struct list_head *head,
+                       struct sk_buff **skb,
+                       int hook,
+                       const struct net_device *indev,
+                       const struct net_device *outdev,
+                       struct list_head **i,
+                       int (*okfn)(struct sk_buff *),
+                       int hook_thresh)
+{
+       unsigned int verdict;
+
+       /*
+        * The caller must not block between calls to this
+        * function because of risk of continuing from deleted element.
+        */
+       list_for_each_continue_rcu(*i, head) {
+               struct nf_hook_ops *elem = (struct nf_hook_ops *)*i;
+
+               if (hook_thresh > elem->priority)
+                       continue;
+
+               /* Optimization: we don't need to hold module
+                   reference here, since function can't sleep. --RR */
+               verdict = elem->hook(hook, skb, indev, outdev, okfn);
+               if (verdict != NF_ACCEPT) {
+#ifdef CONFIG_NETFILTER_DEBUG
+                       if (unlikely((verdict & NF_VERDICT_MASK)
+                                                       > NF_MAX_VERDICT)) {
+                               NFDEBUG("Evil return from %p(%u).\n",
+                                       elem->hook, hook);
+                               continue;
+                       }
+#endif
+                       if (verdict != NF_REPEAT)
+                               return verdict;
+                       *i = (*i)->prev;
+               }
+       }
+       return NF_ACCEPT;
+}
+
+
+/* Returns 1 if okfn() needs to be executed by the caller,
+ * -EPERM for NF_DROP, 0 otherwise. */
+int nf_hook_slow(int pf, unsigned int hook, struct sk_buff **pskb,
+                struct net_device *indev,
+                struct net_device *outdev,
+                int (*okfn)(struct sk_buff *),
+                int hook_thresh)
+{
+       struct list_head *elem;
+       unsigned int verdict;
+       int ret = 0;
+
+       /* We may already have this, but read-locks nest anyway */
+       rcu_read_lock();
+
+       elem = &nf_hooks[pf][hook];
+next_hook:
+       verdict = nf_iterate(&nf_hooks[pf][hook], pskb, hook, indev,
+                            outdev, &elem, okfn, hook_thresh);
+       if (verdict == NF_ACCEPT || verdict == NF_STOP) {
+               ret = 1;
+               goto unlock;
+       } else if (verdict == NF_DROP) {
+               kfree_skb(*pskb);
+               ret = -EPERM;
+       } else if ((verdict & NF_VERDICT_MASK)  == NF_QUEUE) {
+               NFDEBUG("nf_hook: Verdict = QUEUE.\n");
+               if (!nf_queue(pskb, elem, pf, hook, indev, outdev, okfn,
+                             verdict >> NF_VERDICT_BITS))
+                       goto next_hook;
+       }
+unlock:
+       rcu_read_unlock();
+       return ret;
+}
+EXPORT_SYMBOL(nf_hook_slow);
+
+
+int skb_make_writable(struct sk_buff **pskb, unsigned int writable_len)
+{
+       struct sk_buff *nskb;
+
+       if (writable_len > (*pskb)->len)
+               return 0;
+
+       /* Not exclusive use of packet?  Must copy. */
+       if (skb_shared(*pskb) || skb_cloned(*pskb))
+               goto copy_skb;
+
+       return pskb_may_pull(*pskb, writable_len);
+
+copy_skb:
+       nskb = skb_copy(*pskb, GFP_ATOMIC);
+       if (!nskb)
+               return 0;
+       BUG_ON(skb_is_nonlinear(nskb));
+
+       /* Rest of kernel will get very unhappy if we pass it a
+          suddenly-orphaned skbuff */
+       if ((*pskb)->sk)
+               skb_set_owner_w(nskb, (*pskb)->sk);
+       kfree_skb(*pskb);
+       *pskb = nskb;
+       return 1;
+}
+EXPORT_SYMBOL(skb_make_writable);
+
+
+/* This does not belong here, but locally generated errors need it if connection
+   tracking in use: without this, connection may not be in hash table, and hence
+   manufactured ICMP or RST packets will not be associated with it. */
+void (*ip_ct_attach)(struct sk_buff *, struct sk_buff *);
+EXPORT_SYMBOL(ip_ct_attach);
+
+void nf_ct_attach(struct sk_buff *new, struct sk_buff *skb)
+{
+       void (*attach)(struct sk_buff *, struct sk_buff *);
+
+       if (skb->nfct && (attach = ip_ct_attach) != NULL) {
+               mb(); /* Just to be sure: must be read before executing this */
+               attach(new, skb);
+       }
+}
+EXPORT_SYMBOL(nf_ct_attach);
+
+#ifdef CONFIG_PROC_FS
+struct proc_dir_entry *proc_net_netfilter;
+EXPORT_SYMBOL(proc_net_netfilter);
+#endif
+
+void __init netfilter_init(void)
+{
+       int i, h;
+       for (i = 0; i < NPROTO; i++) {
+               for (h = 0; h < NF_MAX_HOOKS; h++)
+                       INIT_LIST_HEAD(&nf_hooks[i][h]);
+       }
+
+#ifdef CONFIG_PROC_FS
+       proc_net_netfilter = proc_mkdir("netfilter", proc_net);
+       if (!proc_net_netfilter)
+               panic("cannot create netfilter proc entry");
+#endif
+
+       if (netfilter_queue_init() < 0)
+               panic("cannot initialize nf_queue");
+       if (netfilter_log_init() < 0)
+               panic("cannot initialize nf_log");
+}
diff --git a/net/netfilter/nf_internals.h b/net/netfilter/nf_internals.h

new file mode 100644 (file)

index 0000000..6bdee29
--- /dev/null
+++ b/net/netfilter/nf_internals.h
@@ -0,0 +1,39 @@
+#ifndef _NF_INTERNALS_H
+#define _NF_INTERNALS_H
+
+#include <linux/config.h>
+#include <linux/list.h>
+#include <linux/skbuff.h>
+#include <linux/netdevice.h>
+
+#ifdef CONFIG_NETFILTER_DEBUG
+#define NFDEBUG(format, args...)  printk(format , ## args)
+#else
+#define NFDEBUG(format, args...)
+#endif
+
+
+/* core.c */
+extern unsigned int nf_iterate(struct list_head *head,
+                               struct sk_buff **skb,
+                               int hook,
+                               const struct net_device *indev,
+                               const struct net_device *outdev,
+                               struct list_head **i,
+                               int (*okfn)(struct sk_buff *),
+                               int hook_thresh);
+
+/* nf_queue.c */
+extern int nf_queue(struct sk_buff **skb, 
+                   struct list_head *elem, 
+                   int pf, unsigned int hook,
+                   struct net_device *indev,
+                   struct net_device *outdev,
+                   int (*okfn)(struct sk_buff *),
+                   unsigned int queuenum);
+extern int __init netfilter_queue_init(void);
+
+/* nf_log.c */
+extern int __init netfilter_log_init(void);
+
+#endif
diff --git a/net/netfilter/nf_log.c b/net/netfilter/nf_log.c

new file mode 100644 (file)

index 0000000..3e76bd0
--- /dev/null
+++ b/net/netfilter/nf_log.c
@@ -0,0 +1,178 @@
+#include <linux/config.h>
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/proc_fs.h>
+#include <linux/skbuff.h>
+#include <linux/netfilter.h>
+#include <linux/seq_file.h>
+#include <net/protocol.h>
+
+#include "nf_internals.h"
+
+/* Internal logging interface, which relies on the real 
+   LOG target modules */
+
+#define NF_LOG_PREFIXLEN               128
+
+static struct nf_logger *nf_logging[NPROTO]; /* = NULL */
+static DEFINE_SPINLOCK(nf_log_lock);
+
+/* return EBUSY if somebody else is registered, EEXIST if the same logger
+ * is registred, 0 on success. */
+int nf_log_register(int pf, struct nf_logger *logger)
+{
+       int ret = -EBUSY;
+
+       if (pf >= NPROTO)
+               return -EINVAL;
+
+       /* Any setup of logging members must be done before
+        * substituting pointer. */
+       spin_lock(&nf_log_lock);
+       if (!nf_logging[pf]) {
+               rcu_assign_pointer(nf_logging[pf], logger);
+               ret = 0;
+       } else if (nf_logging[pf] == logger)
+               ret = -EEXIST;
+
+       spin_unlock(&nf_log_lock);
+       return ret;
+}              
+EXPORT_SYMBOL(nf_log_register);
+
+int nf_log_unregister_pf(int pf)
+{
+       if (pf >= NPROTO)
+               return -EINVAL;
+
+       spin_lock(&nf_log_lock);
+       nf_logging[pf] = NULL;
+       spin_unlock(&nf_log_lock);
+
+       /* Give time to concurrent readers. */
+       synchronize_net();
+
+       return 0;
+}
+EXPORT_SYMBOL(nf_log_unregister_pf);
+
+void nf_log_unregister_logger(struct nf_logger *logger)
+{
+       int i;
+
+       spin_lock(&nf_log_lock);
+       for (i = 0; i < NPROTO; i++) {
+               if (nf_logging[i] == logger)
+                       nf_logging[i] = NULL;
+       }
+       spin_unlock(&nf_log_lock);
+
+       synchronize_net();
+}
+EXPORT_SYMBOL(nf_log_unregister_logger);
+
+void nf_log_packet(int pf,
+                  unsigned int hooknum,
+                  const struct sk_buff *skb,
+                  const struct net_device *in,
+                  const struct net_device *out,
+                  struct nf_loginfo *loginfo,
+                  const char *fmt, ...)
+{
+       va_list args;
+       char prefix[NF_LOG_PREFIXLEN];
+       struct nf_logger *logger;
+       
+       rcu_read_lock();
+       logger = rcu_dereference(nf_logging[pf]);
+       if (logger) {
+               va_start(args, fmt);
+               vsnprintf(prefix, sizeof(prefix), fmt, args);
+               va_end(args);
+               /* We must read logging before nf_logfn[pf] */
+               logger->logfn(pf, hooknum, skb, in, out, loginfo, prefix);
+       } else if (net_ratelimit()) {
+               printk(KERN_WARNING "nf_log_packet: can\'t log since "
+                      "no backend logging module loaded in! Please either "
+                      "load one, or disable logging explicitly\n");
+       }
+       rcu_read_unlock();
+}
+EXPORT_SYMBOL(nf_log_packet);
+
+#ifdef CONFIG_PROC_FS
+static void *seq_start(struct seq_file *seq, loff_t *pos)
+{
+       rcu_read_lock();
+
+       if (*pos >= NPROTO)
+               return NULL;
+
+       return pos;
+}
+
+static void *seq_next(struct seq_file *s, void *v, loff_t *pos)
+{
+       (*pos)++;
+
+       if (*pos >= NPROTO)
+               return NULL;
+
+       return pos;
+}
+
+static void seq_stop(struct seq_file *s, void *v)
+{
+       rcu_read_unlock();
+}
+
+static int seq_show(struct seq_file *s, void *v)
+{
+       loff_t *pos = v;
+       const struct nf_logger *logger;
+
+       logger = rcu_dereference(nf_logging[*pos]);
+
+       if (!logger)
+               return seq_printf(s, "%2lld NONE\n", *pos);
+       
+       return seq_printf(s, "%2lld %s\n", *pos, logger->name);
+}
+
+static struct seq_operations nflog_seq_ops = {
+       .start  = seq_start,
+       .next   = seq_next,
+       .stop   = seq_stop,
+       .show   = seq_show,
+};
+
+static int nflog_open(struct inode *inode, struct file *file)
+{
+       return seq_open(file, &nflog_seq_ops);
+}
+
+static struct file_operations nflog_file_ops = {
+       .owner   = THIS_MODULE,
+       .open    = nflog_open,
+       .read    = seq_read,
+       .llseek  = seq_lseek,
+       .release = seq_release,
+};
+
+#endif /* PROC_FS */
+
+
+int __init netfilter_log_init(void)
+{
+#ifdef CONFIG_PROC_FS
+       struct proc_dir_entry *pde;
+
+       pde = create_proc_entry("nf_log", S_IRUGO, proc_net_netfilter);
+       if (!pde)
+               return -1;
+
+       pde->proc_fops = &nflog_file_ops;
+#endif
+       return 0;
+}
diff --git a/net/netfilter/nf_queue.c b/net/netfilter/nf_queue.c

new file mode 100644 (file)

index 0000000..d10d552
--- /dev/null
+++ b/net/netfilter/nf_queue.c
@@ -0,0 +1,343 @@
+#include <linux/config.h>
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/proc_fs.h>
+#include <linux/skbuff.h>
+#include <linux/netfilter.h>
+#include <linux/seq_file.h>
+#include <net/protocol.h>
+
+#include "nf_internals.h"
+
+/* 
+ * A queue handler may be registered for each protocol.  Each is protected by
+ * long term mutex.  The handler must provide an an outfn() to accept packets
+ * for queueing and must reinject all packets it receives, no matter what.
+ */
+static struct nf_queue_handler *queue_handler[NPROTO];
+static struct nf_queue_rerouter *queue_rerouter;
+
+static DEFINE_RWLOCK(queue_handler_lock);
+
+/* return EBUSY when somebody else is registered, return EEXIST if the
+ * same handler is registered, return 0 in case of success. */
+int nf_register_queue_handler(int pf, struct nf_queue_handler *qh)
+{      
+       int ret;
+
+       if (pf >= NPROTO)
+               return -EINVAL;
+
+       write_lock_bh(&queue_handler_lock);
+       if (queue_handler[pf] == qh)
+               ret = -EEXIST;
+       else if (queue_handler[pf])
+               ret = -EBUSY;
+       else {
+               queue_handler[pf] = qh;
+               ret = 0;
+       }
+       write_unlock_bh(&queue_handler_lock);
+
+       return ret;
+}
+EXPORT_SYMBOL(nf_register_queue_handler);
+
+/* The caller must flush their queue before this */
+int nf_unregister_queue_handler(int pf)
+{
+       if (pf >= NPROTO)
+               return -EINVAL;
+
+       write_lock_bh(&queue_handler_lock);
+       queue_handler[pf] = NULL;
+       write_unlock_bh(&queue_handler_lock);
+       
+       return 0;
+}
+EXPORT_SYMBOL(nf_unregister_queue_handler);
+
+int nf_register_queue_rerouter(int pf, struct nf_queue_rerouter *rer)
+{
+       if (pf >= NPROTO)
+               return -EINVAL;
+
+       write_lock_bh(&queue_handler_lock);
+       memcpy(&queue_rerouter[pf], rer, sizeof(queue_rerouter[pf]));
+       write_unlock_bh(&queue_handler_lock);
+
+       return 0;
+}
+EXPORT_SYMBOL_GPL(nf_register_queue_rerouter);
+
+int nf_unregister_queue_rerouter(int pf)
+{
+       if (pf >= NPROTO)
+               return -EINVAL;
+
+       write_lock_bh(&queue_handler_lock);
+       memset(&queue_rerouter[pf], 0, sizeof(queue_rerouter[pf]));
+       write_unlock_bh(&queue_handler_lock);
+       return 0;
+}
+EXPORT_SYMBOL_GPL(nf_unregister_queue_rerouter);
+
+void nf_unregister_queue_handlers(struct nf_queue_handler *qh)
+{
+       int pf;
+
+       write_lock_bh(&queue_handler_lock);
+       for (pf = 0; pf < NPROTO; pf++)  {
+               if (queue_handler[pf] == qh)
+                       queue_handler[pf] = NULL;
+       }
+       write_unlock_bh(&queue_handler_lock);
+}
+EXPORT_SYMBOL_GPL(nf_unregister_queue_handlers);
+
+/* 
+ * Any packet that leaves via this function must come back 
+ * through nf_reinject().
+ */
+int nf_queue(struct sk_buff **skb, 
+            struct list_head *elem, 
+            int pf, unsigned int hook,
+            struct net_device *indev,
+            struct net_device *outdev,
+            int (*okfn)(struct sk_buff *),
+            unsigned int queuenum)
+{
+       int status;
+       struct nf_info *info;
+#ifdef CONFIG_BRIDGE_NETFILTER
+       struct net_device *physindev = NULL;
+       struct net_device *physoutdev = NULL;
+#endif
+
+       /* QUEUE == DROP if noone is waiting, to be safe. */
+       read_lock(&queue_handler_lock);
+       if (!queue_handler[pf]->outfn) {
+               read_unlock(&queue_handler_lock);
+               kfree_skb(*skb);
+               return 1;
+       }
+
+       info = kmalloc(sizeof(*info)+queue_rerouter[pf].rer_size, GFP_ATOMIC);
+       if (!info) {
+               if (net_ratelimit())
+                       printk(KERN_ERR "OOM queueing packet %p\n",
+                              *skb);
+               read_unlock(&queue_handler_lock);
+               kfree_skb(*skb);
+               return 1;
+       }
+
+       *info = (struct nf_info) { 
+               (struct nf_hook_ops *)elem, pf, hook, indev, outdev, okfn };
+
+       /* If it's going away, ignore hook. */
+       if (!try_module_get(info->elem->owner)) {
+               read_unlock(&queue_handler_lock);
+               kfree(info);
+               return 0;
+       }
+
+       /* Bump dev refs so they don't vanish while packet is out */
+       if (indev) dev_hold(indev);
+       if (outdev) dev_hold(outdev);
+
+#ifdef CONFIG_BRIDGE_NETFILTER
+       if ((*skb)->nf_bridge) {
+               physindev = (*skb)->nf_bridge->physindev;
+               if (physindev) dev_hold(physindev);
+               physoutdev = (*skb)->nf_bridge->physoutdev;
+               if (physoutdev) dev_hold(physoutdev);
+       }
+#endif
+       if (queue_rerouter[pf].save)
+               queue_rerouter[pf].save(*skb, info);
+
+       status = queue_handler[pf]->outfn(*skb, info, queuenum,
+                                         queue_handler[pf]->data);
+
+       if (status >= 0 && queue_rerouter[pf].reroute)
+               status = queue_rerouter[pf].reroute(skb, info);
+
+       read_unlock(&queue_handler_lock);
+
+       if (status < 0) {
+               /* James M doesn't say fuck enough. */
+               if (indev) dev_put(indev);
+               if (outdev) dev_put(outdev);
+#ifdef CONFIG_BRIDGE_NETFILTER
+               if (physindev) dev_put(physindev);
+               if (physoutdev) dev_put(physoutdev);
+#endif
+               module_put(info->elem->owner);
+               kfree(info);
+               kfree_skb(*skb);
+
+               return 1;
+       }
+
+       return 1;
+}
+
+void nf_reinject(struct sk_buff *skb, struct nf_info *info,
+                unsigned int verdict)
+{
+       struct list_head *elem = &info->elem->list;
+       struct list_head *i;
+
+       rcu_read_lock();
+
+       /* Release those devices we held, or Alexey will kill me. */
+       if (info->indev) dev_put(info->indev);
+       if (info->outdev) dev_put(info->outdev);
+#ifdef CONFIG_BRIDGE_NETFILTER
+       if (skb->nf_bridge) {
+               if (skb->nf_bridge->physindev)
+                       dev_put(skb->nf_bridge->physindev);
+               if (skb->nf_bridge->physoutdev)
+                       dev_put(skb->nf_bridge->physoutdev);
+       }
+#endif
+
+       /* Drop reference to owner of hook which queued us. */
+       module_put(info->elem->owner);
+
+       list_for_each_rcu(i, &nf_hooks[info->pf][info->hook]) {
+               if (i == elem) 
+                       break;
+       }
+  
+       if (elem == &nf_hooks[info->pf][info->hook]) {
+               /* The module which sent it to userspace is gone. */
+               NFDEBUG("%s: module disappeared, dropping packet.\n",
+                       __FUNCTION__);
+               verdict = NF_DROP;
+       }
+
+       /* Continue traversal iff userspace said ok... */
+       if (verdict == NF_REPEAT) {
+               elem = elem->prev;
+               verdict = NF_ACCEPT;
+       }
+
+       if (verdict == NF_ACCEPT) {
+       next_hook:
+               verdict = nf_iterate(&nf_hooks[info->pf][info->hook],
+                                    &skb, info->hook, 
+                                    info->indev, info->outdev, &elem,
+                                    info->okfn, INT_MIN);
+       }
+
+       switch (verdict & NF_VERDICT_MASK) {
+       case NF_ACCEPT:
+               info->okfn(skb);
+               break;
+
+       case NF_QUEUE:
+               if (!nf_queue(&skb, elem, info->pf, info->hook, 
+                             info->indev, info->outdev, info->okfn,
+                             verdict >> NF_VERDICT_BITS))
+                       goto next_hook;
+               break;
+       }
+       rcu_read_unlock();
+
+       if (verdict == NF_DROP)
+               kfree_skb(skb);
+
+       kfree(info);
+       return;
+}
+EXPORT_SYMBOL(nf_reinject);
+
+#ifdef CONFIG_PROC_FS
+static void *seq_start(struct seq_file *seq, loff_t *pos)
+{
+       if (*pos >= NPROTO)
+               return NULL;
+
+       return pos;
+}
+
+static void *seq_next(struct seq_file *s, void *v, loff_t *pos)
+{
+       (*pos)++;
+
+       if (*pos >= NPROTO)
+               return NULL;
+
+       return pos;
+}
+
+static void seq_stop(struct seq_file *s, void *v)
+{
+
+}
+
+static int seq_show(struct seq_file *s, void *v)
+{
+       int ret;
+       loff_t *pos = v;
+       struct nf_queue_handler *qh;
+
+       read_lock_bh(&queue_handler_lock);
+       qh = queue_handler[*pos];
+       if (!qh)
+               ret = seq_printf(s, "%2lld NONE\n", *pos);
+       else
+               ret = seq_printf(s, "%2lld %s\n", *pos, qh->name);
+       read_unlock_bh(&queue_handler_lock);
+
+       return ret;
+}
+
+static struct seq_operations nfqueue_seq_ops = {
+       .start  = seq_start,
+       .next   = seq_next,
+       .stop   = seq_stop,
+       .show   = seq_show,
+};
+
+static int nfqueue_open(struct inode *inode, struct file *file)
+{
+       return seq_open(file, &nfqueue_seq_ops);
+}
+
+static struct file_operations nfqueue_file_ops = {
+       .owner   = THIS_MODULE,
+       .open    = nfqueue_open,
+       .read    = seq_read,
+       .llseek  = seq_lseek,
+       .release = seq_release,
+};
+#endif /* PROC_FS */
+
+
+int __init netfilter_queue_init(void)
+{
+#ifdef CONFIG_PROC_FS
+       struct proc_dir_entry *pde;
+#endif
+       queue_rerouter = kmalloc(NPROTO * sizeof(struct nf_queue_rerouter),
+                                GFP_KERNEL);
+       if (!queue_rerouter)
+               return -ENOMEM;
+
+#ifdef CONFIG_PROC_FS
+       pde = create_proc_entry("nf_queue", S_IRUGO, proc_net_netfilter);
+       if (!pde) {
+               kfree(queue_rerouter);
+               return -1;
+       }
+       pde->proc_fops = &nfqueue_file_ops;
+#endif
+       memset(queue_rerouter, 0, NPROTO * sizeof(struct nf_queue_rerouter));
+
+       return 0;
+}
+
diff --git a/net/netfilter/nf_sockopt.c b/net/netfilter/nf_sockopt.c

new file mode 100644 (file)

index 0000000..61a833a
--- /dev/null
+++ b/net/netfilter/nf_sockopt.c
@@ -0,0 +1,132 @@
+#include <linux/config.h>
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/skbuff.h>
+#include <linux/netfilter.h>
+#include <net/sock.h>
+
+#include "nf_internals.h"
+
+/* Sockopts only registered and called from user context, so
+   net locking would be overkill.  Also, [gs]etsockopt calls may
+   sleep. */
+static DECLARE_MUTEX(nf_sockopt_mutex);
+static LIST_HEAD(nf_sockopts);
+
+/* Do exclusive ranges overlap? */
+static inline int overlap(int min1, int max1, int min2, int max2)
+{
+       return max1 > min2 && min1 < max2;
+}
+
+/* Functions to register sockopt ranges (exclusive). */
+int nf_register_sockopt(struct nf_sockopt_ops *reg)
+{
+       struct list_head *i;
+       int ret = 0;
+
+       if (down_interruptible(&nf_sockopt_mutex) != 0)
+               return -EINTR;
+
+       list_for_each(i, &nf_sockopts) {
+               struct nf_sockopt_ops *ops = (struct nf_sockopt_ops *)i;
+               if (ops->pf == reg->pf
+                   && (overlap(ops->set_optmin, ops->set_optmax, 
+                               reg->set_optmin, reg->set_optmax)
+                       || overlap(ops->get_optmin, ops->get_optmax, 
+                                  reg->get_optmin, reg->get_optmax))) {
+                       NFDEBUG("nf_sock overlap: %u-%u/%u-%u v %u-%u/%u-%u\n",
+                               ops->set_optmin, ops->set_optmax, 
+                               ops->get_optmin, ops->get_optmax, 
+                               reg->set_optmin, reg->set_optmax,
+                               reg->get_optmin, reg->get_optmax);
+                       ret = -EBUSY;
+                       goto out;
+               }
+       }
+
+       list_add(&reg->list, &nf_sockopts);
+out:
+       up(&nf_sockopt_mutex);
+       return ret;
+}
+EXPORT_SYMBOL(nf_register_sockopt);
+
+void nf_unregister_sockopt(struct nf_sockopt_ops *reg)
+{
+       /* No point being interruptible: we're probably in cleanup_module() */
+ restart:
+       down(&nf_sockopt_mutex);
+       if (reg->use != 0) {
+               /* To be woken by nf_sockopt call... */
+               /* FIXME: Stuart Young's name appears gratuitously. */
+               set_current_state(TASK_UNINTERRUPTIBLE);
+               reg->cleanup_task = current;
+               up(&nf_sockopt_mutex);
+               schedule();
+               goto restart;
+       }
+       list_del(&reg->list);
+       up(&nf_sockopt_mutex);
+}
+EXPORT_SYMBOL(nf_unregister_sockopt);
+
+/* Call get/setsockopt() */
+static int nf_sockopt(struct sock *sk, int pf, int val, 
+                     char __user *opt, int *len, int get)
+{
+       struct list_head *i;
+       struct nf_sockopt_ops *ops;
+       int ret;
+
+       if (down_interruptible(&nf_sockopt_mutex) != 0)
+               return -EINTR;
+
+       list_for_each(i, &nf_sockopts) {
+               ops = (struct nf_sockopt_ops *)i;
+               if (ops->pf == pf) {
+                       if (get) {
+                               if (val >= ops->get_optmin
+                                   && val < ops->get_optmax) {
+                                       ops->use++;
+                                       up(&nf_sockopt_mutex);
+                                       ret = ops->get(sk, val, opt, len);
+                                       goto out;
+                               }
+                       } else {
+                               if (val >= ops->set_optmin
+                                   && val < ops->set_optmax) {
+                                       ops->use++;
+                                       up(&nf_sockopt_mutex);
+                                       ret = ops->set(sk, val, opt, *len);
+                                       goto out;
+                               }
+                       }
+               }
+       }
+       up(&nf_sockopt_mutex);
+       return -ENOPROTOOPT;
+       
+ out:
+       down(&nf_sockopt_mutex);
+       ops->use--;
+       if (ops->cleanup_task)
+               wake_up_process(ops->cleanup_task);
+       up(&nf_sockopt_mutex);
+       return ret;
+}
+
+int nf_setsockopt(struct sock *sk, int pf, int val, char __user *opt,
+                 int len)
+{
+       return nf_sockopt(sk, pf, val, opt, &len, 0);
+}
+EXPORT_SYMBOL(nf_setsockopt);
+
+int nf_getsockopt(struct sock *sk, int pf, int val, char __user *opt, int *len)
+{
+       return nf_sockopt(sk, pf, val, opt, len, 1);
+}
+EXPORT_SYMBOL(nf_getsockopt);
+
diff --git a/net/netfilter/nfnetlink.c b/net/netfilter/nfnetlink.c

new file mode 100644 (file)

index 0000000..e089f17
--- /dev/null
+++ b/net/netfilter/nfnetlink.c
@@ -0,0 +1,376 @@
+/* Netfilter messages via netlink socket. Allows for user space
+ * protocol helpers and general trouble making from userspace.
+ *
+ * (C) 2001 by Jay Schulist <jschlst@samba.org>,
+ * (C) 2002-2005 by Harald Welte <laforge@gnumonks.org>
+ * (C) 2005 by Pablo Neira Ayuso <pablo@eurodev.net>
+ *
+ * Initial netfilter messages via netlink development funded and
+ * generally made possible by Network Robots, Inc. (www.networkrobots.com)
+ *
+ * Further development of this code funded by Astaro AG (http://www.astaro.com)
+ *
+ * This software may be used and distributed according to the terms
+ * of the GNU General Public License, incorporated herein by reference.
+ */
+
+#include <linux/config.h>
+#include <linux/module.h>
+#include <linux/types.h>
+#include <linux/socket.h>
+#include <linux/kernel.h>
+#include <linux/major.h>
+#include <linux/sched.h>
+#include <linux/timer.h>
+#include <linux/string.h>
+#include <linux/sockios.h>
+#include <linux/net.h>
+#include <linux/fcntl.h>
+#include <linux/skbuff.h>
+#include <asm/uaccess.h>
+#include <asm/system.h>
+#include <net/sock.h>
+#include <linux/init.h>
+#include <linux/spinlock.h>
+
+#include <linux/netfilter.h>
+#include <linux/netlink.h>
+#include <linux/netfilter/nfnetlink.h>
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Harald Welte <laforge@netfilter.org>");
+MODULE_ALIAS_NET_PF_PROTO(PF_NETLINK, NETLINK_NETFILTER);
+
+static char __initdata nfversion[] = "0.30";
+
+#if 0
+#define DEBUGP(format, args...)        \
+               printk(KERN_DEBUG "%s(%d):%s(): " format, __FILE__, \
+                       __LINE__, __FUNCTION__, ## args)
+#else
+#define DEBUGP(format, args...)
+#endif
+
+static struct sock *nfnl = NULL;
+static struct nfnetlink_subsystem *subsys_table[NFNL_SUBSYS_COUNT];
+DECLARE_MUTEX(nfnl_sem);
+
+void nfnl_lock(void)
+{
+       nfnl_shlock();
+}
+
+void nfnl_unlock(void)
+{
+       nfnl_shunlock();
+}
+
+int nfnetlink_subsys_register(struct nfnetlink_subsystem *n)
+{
+       DEBUGP("registering subsystem ID %u\n", n->subsys_id);
+
+       nfnl_lock();
+       if (subsys_table[n->subsys_id]) {
+               nfnl_unlock();
+               return -EBUSY;
+       }
+       subsys_table[n->subsys_id] = n;
+       nfnl_unlock();
+
+       return 0;
+}
+
+int nfnetlink_subsys_unregister(struct nfnetlink_subsystem *n)
+{
+       DEBUGP("unregistering subsystem ID %u\n", n->subsys_id);
+
+       nfnl_lock();
+       subsys_table[n->subsys_id] = NULL;
+       nfnl_unlock();
+
+       return 0;
+}
+
+static inline struct nfnetlink_subsystem *nfnetlink_get_subsys(u_int16_t type)
+{
+       u_int8_t subsys_id = NFNL_SUBSYS_ID(type);
+
+       if (subsys_id >= NFNL_SUBSYS_COUNT
+           || subsys_table[subsys_id] == NULL)
+               return NULL;
+
+       return subsys_table[subsys_id];
+}
+
+static inline struct nfnl_callback *
+nfnetlink_find_client(u_int16_t type, struct nfnetlink_subsystem *ss)
+{
+       u_int8_t cb_id = NFNL_MSG_TYPE(type);
+       
+       if (cb_id >= ss->cb_count) {
+               DEBUGP("msgtype %u >= %u, returning\n", type, ss->cb_count);
+               return NULL;
+       }
+
+       return &ss->cb[cb_id];
+}
+
+void __nfa_fill(struct sk_buff *skb, int attrtype, int attrlen,
+               const void *data)
+{
+       struct nfattr *nfa;
+       int size = NFA_LENGTH(attrlen);
+
+       nfa = (struct nfattr *)skb_put(skb, NFA_ALIGN(size));
+       nfa->nfa_type = attrtype;
+       nfa->nfa_len  = size;
+       memcpy(NFA_DATA(nfa), data, attrlen);
+       memset(NFA_DATA(nfa) + attrlen, 0, NFA_ALIGN(size) - size);
+}
+
+int nfattr_parse(struct nfattr *tb[], int maxattr, struct nfattr *nfa, int len)
+{
+       memset(tb, 0, sizeof(struct nfattr *) * maxattr);
+
+       while (NFA_OK(nfa, len)) {
+               unsigned flavor = nfa->nfa_type;
+               if (flavor && flavor <= maxattr)
+                       tb[flavor-1] = nfa;
+               nfa = NFA_NEXT(nfa, len);
+       }
+
+       return 0;
+}
+
+/**
+ * nfnetlink_check_attributes - check and parse nfnetlink attributes
+ *
+ * subsys: nfnl subsystem for which this message is to be parsed
+ * nlmsghdr: netlink message to be checked/parsed
+ * cda: array of pointers, needs to be at least subsys->attr_count big
+ *
+ */
+static int
+nfnetlink_check_attributes(struct nfnetlink_subsystem *subsys,
+                          struct nlmsghdr *nlh, struct nfattr *cda[])
+{
+       int min_len;
+       u_int16_t attr_count;
+       u_int8_t cb_id = NFNL_MSG_TYPE(nlh->nlmsg_type);
+
+       if (unlikely(cb_id >= subsys->cb_count)) {
+               DEBUGP("msgtype %u >= %u, returning\n",
+                       cb_id, subsys->cb_count);
+               return -EINVAL;
+       }
+
+       min_len = NLMSG_ALIGN(sizeof(struct nfgenmsg));
+       if (unlikely(nlh->nlmsg_len < min_len))
+               return -EINVAL;
+
+       attr_count = subsys->cb[cb_id].attr_count;
+       memset(cda, 0, sizeof(struct nfattr *) * attr_count);
+
+       /* check attribute lengths. */
+       if (likely(nlh->nlmsg_len > min_len)) {
+               struct nfattr *attr = NFM_NFA(NLMSG_DATA(nlh));
+               int attrlen = nlh->nlmsg_len - NLMSG_ALIGN(min_len);
+
+               while (NFA_OK(attr, attrlen)) {
+                       unsigned flavor = attr->nfa_type;
+                       if (flavor) {
+                               if (flavor > attr_count)
+                                       return -EINVAL;
+                               cda[flavor - 1] = attr;
+                       }
+                       attr = NFA_NEXT(attr, attrlen);
+               }
+       }
+
+       /* implicit: if nlmsg_len == min_len, we return 0, and an empty
+        * (zeroed) cda[] array. The message is valid, but empty. */
+
+        return 0;
+}
+
+int nfnetlink_send(struct sk_buff *skb, u32 pid, unsigned group, int echo)
+{
+       int allocation = in_interrupt() ? GFP_ATOMIC : GFP_KERNEL;
+       int err = 0;
+
+       NETLINK_CB(skb).dst_group = group;
+       if (echo)
+               atomic_inc(&skb->users);
+       netlink_broadcast(nfnl, skb, pid, group, allocation);
+       if (echo)
+               err = netlink_unicast(nfnl, skb, pid, MSG_DONTWAIT);
+
+       return err;
+}
+
+int nfnetlink_unicast(struct sk_buff *skb, u_int32_t pid, int flags)
+{
+       return netlink_unicast(nfnl, skb, pid, flags);
+}
+
+/* Process one complete nfnetlink message. */
+static inline int nfnetlink_rcv_msg(struct sk_buff *skb,
+                                   struct nlmsghdr *nlh, int *errp)
+{
+       struct nfnl_callback *nc;
+       struct nfnetlink_subsystem *ss;
+       int type, err = 0;
+
+       DEBUGP("entered; subsys=%u, msgtype=%u\n",
+                NFNL_SUBSYS_ID(nlh->nlmsg_type),
+                NFNL_MSG_TYPE(nlh->nlmsg_type));
+
+       /* Only requests are handled by kernel now. */
+       if (!(nlh->nlmsg_flags & NLM_F_REQUEST)) {
+               DEBUGP("received non-request message\n");
+               return 0;
+       }
+
+       /* All the messages must at least contain nfgenmsg */
+       if (nlh->nlmsg_len < 
+                       NLMSG_LENGTH(NLMSG_ALIGN(sizeof(struct nfgenmsg)))) {
+               DEBUGP("received message was too short\n");
+               return 0;
+       }
+
+       type = nlh->nlmsg_type;
+       ss = nfnetlink_get_subsys(type);
+       if (!ss) {
+#ifdef CONFIG_KMOD
+               /* don't call nfnl_shunlock, since it would reenter
+                * with further packet processing */
+               up(&nfnl_sem);
+               request_module("nfnetlink-subsys-%d", NFNL_SUBSYS_ID(type));
+               nfnl_shlock();
+               ss = nfnetlink_get_subsys(type);
+               if (!ss)
+#endif
+               goto err_inval;
+       }
+
+       nc = nfnetlink_find_client(type, ss);
+       if (!nc) {
+               DEBUGP("unable to find client for type %d\n", type);
+               goto err_inval;
+       }
+
+       if (nc->cap_required && 
+           !cap_raised(NETLINK_CB(skb).eff_cap, nc->cap_required)) {
+               DEBUGP("permission denied for type %d\n", type);
+               *errp = -EPERM;
+               return -1;
+       }
+
+       {
+               u_int16_t attr_count = 
+                       ss->cb[NFNL_MSG_TYPE(nlh->nlmsg_type)].attr_count;
+               struct nfattr *cda[attr_count];
+
+               memset(cda, 0, sizeof(struct nfattr *) * attr_count);
+               
+               err = nfnetlink_check_attributes(ss, nlh, cda);
+               if (err < 0)
+                       goto err_inval;
+
+               DEBUGP("calling handler\n");
+               err = nc->call(nfnl, skb, nlh, cda, errp);
+               *errp = err;
+               return err;
+       }
+
+err_inval:
+       DEBUGP("returning -EINVAL\n");
+       *errp = -EINVAL;
+       return -1;
+}
+
+/* Process one packet of messages. */
+static inline int nfnetlink_rcv_skb(struct sk_buff *skb)
+{
+       int err;
+       struct nlmsghdr *nlh;
+
+       while (skb->len >= NLMSG_SPACE(0)) {
+               u32 rlen;
+
+               nlh = (struct nlmsghdr *)skb->data;
+               if (nlh->nlmsg_len < sizeof(struct nlmsghdr)
+                   || skb->len < nlh->nlmsg_len)
+                       return 0;
+               rlen = NLMSG_ALIGN(nlh->nlmsg_len);
+               if (rlen > skb->len)
+                       rlen = skb->len;
+               if (nfnetlink_rcv_msg(skb, nlh, &err)) {
+                       if (!err)
+                               return -1;
+                       netlink_ack(skb, nlh, err);
+               } else
+                       if (nlh->nlmsg_flags & NLM_F_ACK)
+                               netlink_ack(skb, nlh, 0);
+               skb_pull(skb, rlen);
+       }
+
+       return 0;
+}
+
+static void nfnetlink_rcv(struct sock *sk, int len)
+{
+       do {
+               struct sk_buff *skb;
+
+               if (nfnl_shlock_nowait())
+                       return;
+
+               while ((skb = skb_dequeue(&sk->sk_receive_queue)) != NULL) {
+                       if (nfnetlink_rcv_skb(skb)) {
+                               if (skb->len)
+                                       skb_queue_head(&sk->sk_receive_queue,
+                                                      skb);
+                               else
+                                       kfree_skb(skb);
+                               break;
+                       }
+                       kfree_skb(skb);
+               }
+
+               /* don't call nfnl_shunlock, since it would reenter
+                * with further packet processing */
+               up(&nfnl_sem);
+       } while(nfnl && nfnl->sk_receive_queue.qlen);
+}
+
+void __exit nfnetlink_exit(void)
+{
+       printk("Removing netfilter NETLINK layer.\n");
+       sock_release(nfnl->sk_socket);
+       return;
+}
+
+int __init nfnetlink_init(void)
+{
+       printk("Netfilter messages via NETLINK v%s.\n", nfversion);
+
+       nfnl = netlink_kernel_create(NETLINK_NETFILTER, NFNLGRP_MAX,
+                                    nfnetlink_rcv, THIS_MODULE);
+       if (!nfnl) {
+               printk(KERN_ERR "cannot initialize nfnetlink!\n");
+               return -1;
+       }
+
+       return 0;
+}
+
+module_init(nfnetlink_init);
+module_exit(nfnetlink_exit);
+
+EXPORT_SYMBOL_GPL(nfnetlink_subsys_register);
+EXPORT_SYMBOL_GPL(nfnetlink_subsys_unregister);
+EXPORT_SYMBOL_GPL(nfnetlink_send);
+EXPORT_SYMBOL_GPL(nfnetlink_unicast);
+EXPORT_SYMBOL_GPL(nfattr_parse);
+EXPORT_SYMBOL_GPL(__nfa_fill);
diff --git a/net/netfilter/nfnetlink_log.c b/net/netfilter/nfnetlink_log.c

new file mode 100644 (file)

index 0000000..ff5601c
--- /dev/null
+++ b/net/netfilter/nfnetlink_log.c
@@ -0,0 +1,1055 @@
+/*
+ * This is a module which is used for logging packets to userspace via
+ * nfetlink.
+ *
+ * (C) 2005 by Harald Welte <laforge@netfilter.org>
+ *
+ * Based on the old ipv4-only ipt_ULOG.c:
+ * (C) 2000-2004 by Harald Welte <laforge@netfilter.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ */
+#include <linux/module.h>
+#include <linux/skbuff.h>
+#include <linux/init.h>
+#include <linux/ip.h>
+#include <linux/ipv6.h>
+#include <linux/netdevice.h>
+#include <linux/netfilter.h>
+#include <linux/netlink.h>
+#include <linux/netfilter/nfnetlink.h>
+#include <linux/netfilter/nfnetlink_log.h>
+#include <linux/spinlock.h>
+#include <linux/sysctl.h>
+#include <linux/proc_fs.h>
+#include <linux/security.h>
+#include <linux/list.h>
+#include <linux/jhash.h>
+#include <linux/random.h>
+#include <net/sock.h>
+
+#include <asm/atomic.h>
+
+#ifdef CONFIG_BRIDGE_NETFILTER
+#include "../bridge/br_private.h"
+#endif
+
+#define NFULNL_NLBUFSIZ_DEFAULT        4096
+#define NFULNL_TIMEOUT_DEFAULT         100     /* every second */
+#define NFULNL_QTHRESH_DEFAULT         100     /* 100 packets */
+
+#define PRINTR(x, args...)     do { if (net_ratelimit()) \
+                                    printk(x, ## args); } while (0);
+
+#if 0
+#define UDEBUG(x, args ...)    printk(KERN_DEBUG "%s(%d):%s(): " x,       \
+                                       __FILE__, __LINE__, __FUNCTION__,  \
+                                       ## args)
+#else
+#define UDEBUG(x, ...)
+#endif
+
+struct nfulnl_instance {
+       struct hlist_node hlist;        /* global list of instances */
+       spinlock_t lock;
+       atomic_t use;                   /* use count */
+
+       unsigned int qlen;              /* number of nlmsgs in skb */
+       struct sk_buff *skb;            /* pre-allocatd skb */
+       struct nlmsghdr *lastnlh;       /* netlink header of last msg in skb */
+       struct timer_list timer;
+       int peer_pid;                   /* PID of the peer process */
+
+       /* configurable parameters */
+       unsigned int flushtimeout;      /* timeout until queue flush */
+       unsigned int nlbufsiz;          /* netlink buffer allocation size */
+       unsigned int qthreshold;        /* threshold of the queue */
+       u_int32_t copy_range;
+       u_int16_t group_num;            /* number of this queue */
+       u_int8_t copy_mode;     
+};
+
+static DEFINE_RWLOCK(instances_lock);
+
+#define INSTANCE_BUCKETS       16
+static struct hlist_head instance_table[INSTANCE_BUCKETS];
+static unsigned int hash_init;
+
+static inline u_int8_t instance_hashfn(u_int16_t group_num)
+{
+       return ((group_num & 0xff) % INSTANCE_BUCKETS);
+}
+
+static struct nfulnl_instance *
+__instance_lookup(u_int16_t group_num)
+{
+       struct hlist_head *head;
+       struct hlist_node *pos;
+       struct nfulnl_instance *inst;
+
+       UDEBUG("entering (group_num=%u)\n", group_num);
+
+       head = &instance_table[instance_hashfn(group_num)];
+       hlist_for_each_entry(inst, pos, head, hlist) {
+               if (inst->group_num == group_num)
+                       return inst;
+       }
+       return NULL;
+}
+
+static inline void
+instance_get(struct nfulnl_instance *inst)
+{
+       atomic_inc(&inst->use);
+}
+
+static struct nfulnl_instance *
+instance_lookup_get(u_int16_t group_num)
+{
+       struct nfulnl_instance *inst;
+
+       read_lock_bh(&instances_lock);
+       inst = __instance_lookup(group_num);
+       if (inst)
+               instance_get(inst);
+       read_unlock_bh(&instances_lock);
+
+       return inst;
+}
+
+static void
+instance_put(struct nfulnl_instance *inst)
+{
+       if (inst && atomic_dec_and_test(&inst->use)) {
+               UDEBUG("kfree(inst=%p)\n", inst);
+               kfree(inst);
+       }
+}
+
+static void nfulnl_timer(unsigned long data);
+
+static struct nfulnl_instance *
+instance_create(u_int16_t group_num, int pid)
+{
+       struct nfulnl_instance *inst;
+
+       UDEBUG("entering (group_num=%u, pid=%d)\n", group_num,
+               pid);
+
+       write_lock_bh(&instances_lock); 
+       if (__instance_lookup(group_num)) {
+               inst = NULL;
+               UDEBUG("aborting, instance already exists\n");
+               goto out_unlock;
+       }
+
+       inst = kmalloc(sizeof(*inst), GFP_ATOMIC);
+       if (!inst)
+               goto out_unlock;
+
+       memset(inst, 0, sizeof(*inst));
+       INIT_HLIST_NODE(&inst->hlist);
+       inst->lock = SPIN_LOCK_UNLOCKED;
+       /* needs to be two, since we _put() after creation */
+       atomic_set(&inst->use, 2);
+
+       init_timer(&inst->timer);
+       inst->timer.function = nfulnl_timer;
+       inst->timer.data = (unsigned long)inst;
+       /* don't start timer yet. (re)start it  with every packet */
+
+       inst->peer_pid = pid;
+       inst->group_num = group_num;
+
+       inst->qthreshold        = NFULNL_QTHRESH_DEFAULT;
+       inst->flushtimeout      = NFULNL_TIMEOUT_DEFAULT;
+       inst->nlbufsiz          = NFULNL_NLBUFSIZ_DEFAULT;
+       inst->copy_mode         = NFULNL_COPY_PACKET;
+       inst->copy_range        = 0xffff;
+
+       if (!try_module_get(THIS_MODULE))
+               goto out_free;
+
+       hlist_add_head(&inst->hlist, 
+                      &instance_table[instance_hashfn(group_num)]);
+
+       UDEBUG("newly added node: %p, next=%p\n", &inst->hlist, 
+               inst->hlist.next);
+
+       write_unlock_bh(&instances_lock);
+
+       return inst;
+
+out_free:
+       instance_put(inst);
+out_unlock:
+       write_unlock_bh(&instances_lock);
+       return NULL;
+}
+
+static int __nfulnl_send(struct nfulnl_instance *inst);
+
+static void
+_instance_destroy2(struct nfulnl_instance *inst, int lock)
+{
+       /* first pull it out of the global list */
+       if (lock)
+               write_lock_bh(&instances_lock);
+
+       UDEBUG("removing instance %p (queuenum=%u) from hash\n",
+               inst, inst->group_num);
+
+       hlist_del(&inst->hlist);
+
+       if (lock)
+               write_unlock_bh(&instances_lock);
+
+       /* then flush all pending packets from skb */
+
+       spin_lock_bh(&inst->lock);
+       if (inst->skb) {
+               if (inst->qlen)
+                       __nfulnl_send(inst);
+               if (inst->skb) {
+                       kfree_skb(inst->skb);
+                       inst->skb = NULL;
+               }
+       }
+       spin_unlock_bh(&inst->lock);
+
+       /* and finally put the refcount */
+       instance_put(inst);
+
+       module_put(THIS_MODULE);
+}
+
+static inline void
+__instance_destroy(struct nfulnl_instance *inst)
+{
+       _instance_destroy2(inst, 0);
+}
+
+static inline void
+instance_destroy(struct nfulnl_instance *inst)
+{
+       _instance_destroy2(inst, 1);
+}
+
+static int
+nfulnl_set_mode(struct nfulnl_instance *inst, u_int8_t mode,
+                 unsigned int range)
+{
+       int status = 0;
+
+       spin_lock_bh(&inst->lock);
+       
+       switch (mode) {
+       case NFULNL_COPY_NONE:
+       case NFULNL_COPY_META:
+               inst->copy_mode = mode;
+               inst->copy_range = 0;
+               break;
+               
+       case NFULNL_COPY_PACKET:
+               inst->copy_mode = mode;
+               /* we're using struct nfattr which has 16bit nfa_len */
+               if (range > 0xffff)
+                       inst->copy_range = 0xffff;
+               else
+                       inst->copy_range = range;
+               break;
+               
+       default:
+               status = -EINVAL;
+               break;
+       }
+
+       spin_unlock_bh(&inst->lock);
+
+       return status;
+}
+
+static int
+nfulnl_set_nlbufsiz(struct nfulnl_instance *inst, u_int32_t nlbufsiz)
+{
+       int status;
+
+       spin_lock_bh(&inst->lock);
+       if (nlbufsiz < NFULNL_NLBUFSIZ_DEFAULT)
+               status = -ERANGE;
+       else if (nlbufsiz > 131072)
+               status = -ERANGE;
+       else {
+               inst->nlbufsiz = nlbufsiz;
+               status = 0;
+       }
+       spin_unlock_bh(&inst->lock);
+
+       return status;
+}
+
+static int
+nfulnl_set_timeout(struct nfulnl_instance *inst, u_int32_t timeout)
+{
+       spin_lock_bh(&inst->lock);
+       inst->flushtimeout = timeout;
+       spin_unlock_bh(&inst->lock);
+
+       return 0;
+}
+
+static int
+nfulnl_set_qthresh(struct nfulnl_instance *inst, u_int32_t qthresh)
+{
+       spin_lock_bh(&inst->lock);
+       inst->qthreshold = qthresh;
+       spin_unlock_bh(&inst->lock);
+
+       return 0;
+}
+
+static struct sk_buff *nfulnl_alloc_skb(unsigned int inst_size, 
+                                       unsigned int pkt_size)
+{
+       struct sk_buff *skb;
+
+       UDEBUG("entered (%u, %u)\n", inst_size, pkt_size);
+
+       /* alloc skb which should be big enough for a whole multipart
+        * message.  WARNING: has to be <= 128k due to slab restrictions */
+
+       skb = alloc_skb(inst_size, GFP_ATOMIC);
+       if (!skb) {
+               PRINTR("nfnetlink_log: can't alloc whole buffer (%u bytes)\n",
+                       inst_size);
+
+               /* try to allocate only as much as we need for current
+                * packet */
+
+               skb = alloc_skb(pkt_size, GFP_ATOMIC);
+               if (!skb)
+                       PRINTR("nfnetlink_log: can't even alloc %u bytes\n",
+                               pkt_size);
+       }
+
+       return skb;
+}
+
+static int
+__nfulnl_send(struct nfulnl_instance *inst)
+{
+       int status;
+
+       if (timer_pending(&inst->timer))
+               del_timer(&inst->timer);
+
+       if (inst->qlen > 1)
+               inst->lastnlh->nlmsg_type = NLMSG_DONE;
+
+       status = nfnetlink_unicast(inst->skb, inst->peer_pid, MSG_DONTWAIT);
+       if (status < 0) {
+               UDEBUG("netlink_unicast() failed\n");
+               /* FIXME: statistics */
+       }
+
+       inst->qlen = 0;
+       inst->skb = NULL;
+       inst->lastnlh = NULL;
+
+       return status;
+}
+
+static void nfulnl_timer(unsigned long data)
+{
+       struct nfulnl_instance *inst = (struct nfulnl_instance *)data; 
+
+       UDEBUG("timer function called, flushing buffer\n");
+
+       spin_lock_bh(&inst->lock);
+       __nfulnl_send(inst);
+       instance_put(inst);
+       spin_unlock_bh(&inst->lock);
+}
+
+static inline int 
+__build_packet_message(struct nfulnl_instance *inst,
+                       const struct sk_buff *skb, 
+                       unsigned int data_len,
+                       unsigned int pf,
+                       unsigned int hooknum,
+                       const struct net_device *indev,
+                       const struct net_device *outdev,
+                       const struct nf_loginfo *li,
+                       const char *prefix)
+{
+       unsigned char *old_tail;
+       struct nfulnl_msg_packet_hdr pmsg;
+       struct nlmsghdr *nlh;
+       struct nfgenmsg *nfmsg;
+       u_int32_t tmp_uint;
+
+       UDEBUG("entered\n");
+               
+       old_tail = inst->skb->tail;
+       nlh = NLMSG_PUT(inst->skb, 0, 0, 
+                       NFNL_SUBSYS_ULOG << 8 | NFULNL_MSG_PACKET,
+                       sizeof(struct nfgenmsg));
+       nfmsg = NLMSG_DATA(nlh);
+       nfmsg->nfgen_family = pf;
+       nfmsg->version = NFNETLINK_V0;
+       nfmsg->res_id = htons(inst->group_num);
+
+       pmsg.hw_protocol        = htons(skb->protocol);
+       pmsg.hook               = hooknum;
+
+       NFA_PUT(inst->skb, NFULA_PACKET_HDR, sizeof(pmsg), &pmsg);
+
+       if (prefix) {
+               int slen = strlen(prefix);
+               if (slen > NFULNL_PREFIXLEN)
+                       slen = NFULNL_PREFIXLEN;
+               NFA_PUT(inst->skb, NFULA_PREFIX, slen, prefix);
+       }
+
+       if (indev) {
+               tmp_uint = htonl(indev->ifindex);
+#ifndef CONFIG_BRIDGE_NETFILTER
+               NFA_PUT(inst->skb, NFULA_IFINDEX_INDEV, sizeof(tmp_uint),
+                       &tmp_uint);
+#else
+               if (pf == PF_BRIDGE) {
+                       /* Case 1: outdev is physical input device, we need to
+                        * look for bridge group (when called from
+                        * netfilter_bridge) */
+                       NFA_PUT(inst->skb, NFULA_IFINDEX_PHYSINDEV,
+                               sizeof(tmp_uint), &tmp_uint);
+                       /* this is the bridge group "brX" */
+                       tmp_uint = htonl(indev->br_port->br->dev->ifindex);
+                       NFA_PUT(inst->skb, NFULA_IFINDEX_INDEV,
+                               sizeof(tmp_uint), &tmp_uint);
+               } else {
+                       /* Case 2: indev is bridge group, we need to look for
+                        * physical device (when called from ipv4) */
+                       NFA_PUT(inst->skb, NFULA_IFINDEX_INDEV,
+                               sizeof(tmp_uint), &tmp_uint);
+                       if (skb->nf_bridge && skb->nf_bridge->physindev) {
+                               tmp_uint = 
+                                   htonl(skb->nf_bridge->physindev->ifindex);
+                               NFA_PUT(inst->skb, NFULA_IFINDEX_PHYSINDEV,
+                                       sizeof(tmp_uint), &tmp_uint);
+                       }
+               }
+#endif
+       }
+
+       if (outdev) {
+               tmp_uint = htonl(outdev->ifindex);
+#ifndef CONFIG_BRIDGE_NETFILTER
+               NFA_PUT(inst->skb, NFULA_IFINDEX_OUTDEV, sizeof(tmp_uint),
+                       &tmp_uint);
+#else
+               if (pf == PF_BRIDGE) {
+                       /* Case 1: outdev is physical output device, we need to
+                        * look for bridge group (when called from
+                        * netfilter_bridge) */
+                       NFA_PUT(inst->skb, NFULA_IFINDEX_PHYSOUTDEV,
+                               sizeof(tmp_uint), &tmp_uint);
+                       /* this is the bridge group "brX" */
+                       tmp_uint = htonl(outdev->br_port->br->dev->ifindex);
+                       NFA_PUT(inst->skb, NFULA_IFINDEX_OUTDEV,
+                               sizeof(tmp_uint), &tmp_uint);
+               } else {
+                       /* Case 2: indev is a bridge group, we need to look
+                        * for physical device (when called from ipv4) */
+                       NFA_PUT(inst->skb, NFULA_IFINDEX_OUTDEV,
+                               sizeof(tmp_uint), &tmp_uint);
+                       if (skb->nf_bridge) {
+                               tmp_uint = 
+                                   htonl(skb->nf_bridge->physoutdev->ifindex);
+                               NFA_PUT(inst->skb, NFULA_IFINDEX_PHYSOUTDEV,
+                                       sizeof(tmp_uint), &tmp_uint);
+                       }
+               }
+#endif
+       }
+
+       if (skb->nfmark) {
+               tmp_uint = htonl(skb->nfmark);
+               NFA_PUT(inst->skb, NFULA_MARK, sizeof(tmp_uint), &tmp_uint);
+       }
+
+       if (indev && skb->dev && skb->dev->hard_header_parse) {
+               struct nfulnl_msg_packet_hw phw;
+
+               phw.hw_addrlen = 
+                       skb->dev->hard_header_parse((struct sk_buff *)skb, 
+                                                   phw.hw_addr);
+               phw.hw_addrlen = htons(phw.hw_addrlen);
+               NFA_PUT(inst->skb, NFULA_HWADDR, sizeof(phw), &phw);
+       }
+
+       if (skb->tstamp.off_sec) {
+               struct nfulnl_msg_packet_timestamp ts;
+
+               ts.sec = cpu_to_be64(skb_tv_base.tv_sec + skb->tstamp.off_sec);
+               ts.usec = cpu_to_be64(skb_tv_base.tv_usec + skb->tstamp.off_usec);
+
+               NFA_PUT(inst->skb, NFULA_TIMESTAMP, sizeof(ts), &ts);
+       }
+
+       /* UID */
+       if (skb->sk) {
+               read_lock_bh(&skb->sk->sk_callback_lock);
+               if (skb->sk->sk_socket && skb->sk->sk_socket->file) {
+                       u_int32_t uid = htonl(skb->sk->sk_socket->file->f_uid);
+                       /* need to unlock here since NFA_PUT may goto */
+                       read_unlock_bh(&skb->sk->sk_callback_lock);
+                       NFA_PUT(inst->skb, NFULA_UID, sizeof(uid), &uid);
+               } else
+                       read_unlock_bh(&skb->sk->sk_callback_lock);
+       }
+
+       if (data_len) {
+               struct nfattr *nfa;
+               int size = NFA_LENGTH(data_len);
+
+               if (skb_tailroom(inst->skb) < (int)NFA_SPACE(data_len)) {
+                       printk(KERN_WARNING "nfnetlink_log: no tailroom!\n");
+                       goto nlmsg_failure;
+               }
+
+               nfa = (struct nfattr *)skb_put(inst->skb, NFA_ALIGN(size));
+               nfa->nfa_type = NFULA_PAYLOAD;
+               nfa->nfa_len = size;
+
+               if (skb_copy_bits(skb, 0, NFA_DATA(nfa), data_len))
+                       BUG();
+       }
+               
+       nlh->nlmsg_len = inst->skb->tail - old_tail;
+       return 0;
+
+nlmsg_failure:
+       UDEBUG("nlmsg_failure\n");
+nfattr_failure:
+       PRINTR(KERN_ERR "nfnetlink_log: error creating log nlmsg\n");
+       return -1;
+}
+
+#define RCV_SKB_FAIL(err) do { netlink_ack(skb, nlh, (err)); return; } while (0)
+
+static struct nf_loginfo default_loginfo = {
+       .type =         NF_LOG_TYPE_ULOG,
+       .u = {
+               .ulog = {
+                       .copy_len       = 0xffff,
+                       .group          = 0,
+                       .qthreshold     = 1,
+               },
+       },
+};
+
+/* log handler for internal netfilter logging api */
+static void
+nfulnl_log_packet(unsigned int pf,
+                 unsigned int hooknum,
+                 const struct sk_buff *skb,
+                 const struct net_device *in,
+                 const struct net_device *out,
+                 const struct nf_loginfo *li_user,
+                 const char *prefix)
+{
+       unsigned int size, data_len;
+       struct nfulnl_instance *inst;
+       const struct nf_loginfo *li;
+       unsigned int qthreshold;
+       unsigned int nlbufsiz;
+
+       if (li_user && li_user->type == NF_LOG_TYPE_ULOG) 
+               li = li_user;
+       else
+               li = &default_loginfo;
+
+       inst = instance_lookup_get(li->u.ulog.group);
+       if (!inst)
+               inst = instance_lookup_get(0);
+       if (!inst) {
+               PRINTR("nfnetlink_log: trying to log packet, "
+                       "but no instance for group %u\n", li->u.ulog.group);
+               return;
+       }
+
+       /* all macros expand to constant values at compile time */
+       /* FIXME: do we want to make the size calculation conditional based on
+        * what is actually present?  way more branches and checks, but more
+        * memory efficient... */
+       size =    NLMSG_SPACE(sizeof(struct nfgenmsg))
+               + NFA_SPACE(sizeof(struct nfulnl_msg_packet_hdr))
+               + NFA_SPACE(sizeof(u_int32_t))  /* ifindex */
+               + NFA_SPACE(sizeof(u_int32_t))  /* ifindex */
+#ifdef CONFIG_BRIDGE_NETFILTER
+               + NFA_SPACE(sizeof(u_int32_t))  /* ifindex */
+               + NFA_SPACE(sizeof(u_int32_t))  /* ifindex */
+#endif
+               + NFA_SPACE(sizeof(u_int32_t))  /* mark */
+               + NFA_SPACE(sizeof(u_int32_t))  /* uid */
+               + NFA_SPACE(NFULNL_PREFIXLEN)   /* prefix */
+               + NFA_SPACE(sizeof(struct nfulnl_msg_packet_hw))
+               + NFA_SPACE(sizeof(struct nfulnl_msg_packet_timestamp));
+
+       UDEBUG("initial size=%u\n", size);
+
+       spin_lock_bh(&inst->lock);
+
+       qthreshold = inst->qthreshold;
+       /* per-rule qthreshold overrides per-instance */
+       if (qthreshold > li->u.ulog.qthreshold)
+               qthreshold = li->u.ulog.qthreshold;
+       
+       switch (inst->copy_mode) {
+       case NFULNL_COPY_META:
+       case NFULNL_COPY_NONE:
+               data_len = 0;
+               break;
+       
+       case NFULNL_COPY_PACKET:
+               if (inst->copy_range == 0 
+                   || inst->copy_range > skb->len)
+                       data_len = skb->len;
+               else
+                       data_len = inst->copy_range;
+               
+               size += NFA_SPACE(data_len);
+               UDEBUG("copy_packet, therefore size now %u\n", size);
+               break;
+       
+       default:
+               spin_unlock_bh(&inst->lock);
+               instance_put(inst);
+               return;
+       }
+
+       if (size > inst->nlbufsiz)
+               nlbufsiz = size;
+       else
+               nlbufsiz = inst->nlbufsiz;
+
+       if (!inst->skb) {
+               if (!(inst->skb = nfulnl_alloc_skb(nlbufsiz, size))) {
+                       UDEBUG("error in nfulnl_alloc_skb(%u, %u)\n",
+                               inst->nlbufsiz, size);
+                       goto alloc_failure;
+               }
+       } else if (inst->qlen >= qthreshold ||
+                  size > skb_tailroom(inst->skb)) {
+               /* either the queue len is too high or we don't have
+                * enough room in the skb left. flush to userspace. */
+               UDEBUG("flushing old skb\n");
+
+               __nfulnl_send(inst);
+
+               if (!(inst->skb = nfulnl_alloc_skb(nlbufsiz, size))) {
+                       UDEBUG("error in nfulnl_alloc_skb(%u, %u)\n",
+                               inst->nlbufsiz, size);
+                       goto alloc_failure;
+               }
+       }
+
+       UDEBUG("qlen %d, qthreshold %d\n", inst->qlen, qthreshold);
+       inst->qlen++;
+
+       __build_packet_message(inst, skb, data_len, pf,
+                               hooknum, in, out, li, prefix);
+
+       /* timer_pending always called within inst->lock, so there
+        * is no chance of a race here */
+       if (!timer_pending(&inst->timer)) {
+               instance_get(inst);
+               inst->timer.expires = jiffies + (inst->flushtimeout*HZ/100);
+               add_timer(&inst->timer);
+       }
+       spin_unlock_bh(&inst->lock);
+
+       return;
+
+alloc_failure:
+       spin_unlock_bh(&inst->lock);
+       instance_put(inst);
+       UDEBUG("error allocating skb\n");
+       /* FIXME: statistics */
+}
+
+static int
+nfulnl_rcv_nl_event(struct notifier_block *this,
+                  unsigned long event, void *ptr)
+{
+       struct netlink_notify *n = ptr;
+
+       if (event == NETLINK_URELEASE &&
+           n->protocol == NETLINK_NETFILTER && n->pid) {
+               int i;
+
+               /* destroy all instances for this pid */
+               write_lock_bh(&instances_lock);
+               for  (i = 0; i < INSTANCE_BUCKETS; i++) {
+                       struct hlist_node *tmp, *t2;
+                       struct nfulnl_instance *inst;
+                       struct hlist_head *head = &instance_table[i];
+
+                       hlist_for_each_entry_safe(inst, tmp, t2, head, hlist) {
+                               UDEBUG("node = %p\n", inst);
+                               if (n->pid == inst->peer_pid)
+                                       __instance_destroy(inst);
+                       }
+               }
+               write_unlock_bh(&instances_lock);
+       }
+       return NOTIFY_DONE;
+}
+
+static struct notifier_block nfulnl_rtnl_notifier = {
+       .notifier_call  = nfulnl_rcv_nl_event,
+};
+
+static int
+nfulnl_recv_unsupp(struct sock *ctnl, struct sk_buff *skb,
+                 struct nlmsghdr *nlh, struct nfattr *nfqa[], int *errp)
+{
+       return -ENOTSUPP;
+}
+
+static struct nf_logger nfulnl_logger = {
+       .name   = "nfnetlink_log",
+       .logfn  = &nfulnl_log_packet,
+       .me     = THIS_MODULE,
+};
+
+static const int nfula_min[NFULA_MAX] = {
+       [NFULA_PACKET_HDR-1]    = sizeof(struct nfulnl_msg_packet_hdr),
+       [NFULA_MARK-1]          = sizeof(u_int32_t),
+       [NFULA_TIMESTAMP-1]     = sizeof(struct nfulnl_msg_packet_timestamp),
+       [NFULA_IFINDEX_INDEV-1] = sizeof(u_int32_t),
+       [NFULA_IFINDEX_OUTDEV-1]= sizeof(u_int32_t),
+       [NFULA_HWADDR-1]        = sizeof(struct nfulnl_msg_packet_hw),
+       [NFULA_PAYLOAD-1]       = 0,
+       [NFULA_PREFIX-1]        = 0,
+       [NFULA_UID-1]           = sizeof(u_int32_t),
+};
+
+static const int nfula_cfg_min[NFULA_CFG_MAX] = {
+       [NFULA_CFG_CMD-1]       = sizeof(struct nfulnl_msg_config_cmd),
+       [NFULA_CFG_MODE-1]      = sizeof(struct nfulnl_msg_config_mode),
+       [NFULA_CFG_TIMEOUT-1]   = sizeof(u_int32_t),
+       [NFULA_CFG_QTHRESH-1]   = sizeof(u_int32_t),
+       [NFULA_CFG_NLBUFSIZ-1]  = sizeof(u_int32_t),
+};
+
+static int
+nfulnl_recv_config(struct sock *ctnl, struct sk_buff *skb,
+                  struct nlmsghdr *nlh, struct nfattr *nfula[], int *errp)
+{
+       struct nfgenmsg *nfmsg = NLMSG_DATA(nlh);
+       u_int16_t group_num = ntohs(nfmsg->res_id);
+       struct nfulnl_instance *inst;
+       int ret = 0;
+
+       UDEBUG("entering for msg %u\n", NFNL_MSG_TYPE(nlh->nlmsg_type));
+
+       if (nfattr_bad_size(nfula, NFULA_CFG_MAX, nfula_cfg_min)) {
+               UDEBUG("bad attribute size\n");
+               return -EINVAL;
+       }
+
+       inst = instance_lookup_get(group_num);
+       if (nfula[NFULA_CFG_CMD-1]) {
+               u_int8_t pf = nfmsg->nfgen_family;
+               struct nfulnl_msg_config_cmd *cmd;
+               cmd = NFA_DATA(nfula[NFULA_CFG_CMD-1]);
+               UDEBUG("found CFG_CMD for\n");
+
+               switch (cmd->command) {
+               case NFULNL_CFG_CMD_BIND:
+                       if (inst) {
+                               ret = -EBUSY;
+                               goto out_put;
+                       }
+
+                       inst = instance_create(group_num,
+                                              NETLINK_CB(skb).pid);
+                       if (!inst) {
+                               ret = -EINVAL;
+                               goto out_put;
+                       }
+                       break;
+               case NFULNL_CFG_CMD_UNBIND:
+                       if (!inst) {
+                               ret = -ENODEV;
+                               goto out_put;
+                       }
+
+                       if (inst->peer_pid != NETLINK_CB(skb).pid) {
+                               ret = -EPERM;
+                               goto out_put;
+                       }
+
+                       instance_destroy(inst);
+                       break;
+               case NFULNL_CFG_CMD_PF_BIND:
+                       UDEBUG("registering log handler for pf=%u\n", pf);
+                       ret = nf_log_register(pf, &nfulnl_logger);
+                       break;
+               case NFULNL_CFG_CMD_PF_UNBIND:
+                       UDEBUG("unregistering log handler for pf=%u\n", pf);
+                       /* This is a bug and a feature.  We cannot unregister
+                        * other handlers, like nfnetlink_inst can */
+                       nf_log_unregister_pf(pf);
+                       break;
+               default:
+                       ret = -EINVAL;
+                       break;
+               }
+       } else {
+               if (!inst) {
+                       UDEBUG("no config command, and no instance for "
+                               "group=%u pid=%u =>ENOENT\n",
+                               group_num, NETLINK_CB(skb).pid);
+                       ret = -ENOENT;
+                       goto out_put;
+               }
+
+               if (inst->peer_pid != NETLINK_CB(skb).pid) {
+                       UDEBUG("no config command, and wrong pid\n");
+                       ret = -EPERM;
+                       goto out_put;
+               }
+       }
+
+       if (nfula[NFULA_CFG_MODE-1]) {
+               struct nfulnl_msg_config_mode *params;
+               params = NFA_DATA(nfula[NFULA_CFG_MODE-1]);
+
+               nfulnl_set_mode(inst, params->copy_mode,
+                               ntohs(params->copy_range));
+       }
+
+       if (nfula[NFULA_CFG_TIMEOUT-1]) {
+               u_int32_t timeout = 
+                       *(u_int32_t *)NFA_DATA(nfula[NFULA_CFG_TIMEOUT-1]);
+
+               nfulnl_set_timeout(inst, ntohl(timeout));
+       }
+
+       if (nfula[NFULA_CFG_NLBUFSIZ-1]) {
+               u_int32_t nlbufsiz = 
+                       *(u_int32_t *)NFA_DATA(nfula[NFULA_CFG_NLBUFSIZ-1]);
+
+               nfulnl_set_nlbufsiz(inst, ntohl(nlbufsiz));
+       }
+
+       if (nfula[NFULA_CFG_QTHRESH-1]) {
+               u_int32_t qthresh = 
+                       *(u_int16_t *)NFA_DATA(nfula[NFULA_CFG_QTHRESH-1]);
+
+               nfulnl_set_qthresh(inst, ntohl(qthresh));
+       }
+
+out_put:
+       instance_put(inst);
+       return ret;
+}
+
+static struct nfnl_callback nfulnl_cb[NFULNL_MSG_MAX] = {
+       [NFULNL_MSG_PACKET]     = { .call = nfulnl_recv_unsupp,
+                                   .attr_count = NFULA_MAX,
+                                   .cap_required = CAP_NET_ADMIN, },
+       [NFULNL_MSG_CONFIG]     = { .call = nfulnl_recv_config,
+                                   .attr_count = NFULA_CFG_MAX,
+                                   .cap_required = CAP_NET_ADMIN },
+};
+
+static struct nfnetlink_subsystem nfulnl_subsys = {
+       .name           = "log",
+       .subsys_id      = NFNL_SUBSYS_ULOG,
+       .cb_count       = NFULNL_MSG_MAX,
+       .cb             = nfulnl_cb,
+};
+
+#ifdef CONFIG_PROC_FS
+struct iter_state {
+       unsigned int bucket;
+};
+
+static struct hlist_node *get_first(struct seq_file *seq)
+{
+       struct iter_state *st = seq->private;
+
+       if (!st)
+               return NULL;
+
+       for (st->bucket = 0; st->bucket < INSTANCE_BUCKETS; st->bucket++) {
+               if (!hlist_empty(&instance_table[st->bucket]))
+                       return instance_table[st->bucket].first;
+       }
+       return NULL;
+}
+
+static struct hlist_node *get_next(struct seq_file *seq, struct hlist_node *h)
+{
+       struct iter_state *st = seq->private;
+
+       h = h->next;
+       while (!h) {
+               if (++st->bucket >= INSTANCE_BUCKETS)
+                       return NULL;
+
+               h = instance_table[st->bucket].first;
+       }
+       return h;
+}
+
+static struct hlist_node *get_idx(struct seq_file *seq, loff_t pos)
+{
+       struct hlist_node *head;
+       head = get_first(seq);
+
+       if (head)
+               while (pos && (head = get_next(seq, head)))
+                       pos--;
+       return pos ? NULL : head;
+}
+
+static void *seq_start(struct seq_file *seq, loff_t *pos)
+{
+       read_lock_bh(&instances_lock);
+       return get_idx(seq, *pos);
+}
+
+static void *seq_next(struct seq_file *s, void *v, loff_t *pos)
+{
+       (*pos)++;
+       return get_next(s, v);
+}
+
+static void seq_stop(struct seq_file *s, void *v)
+{
+       read_unlock_bh(&instances_lock);
+}
+
+static int seq_show(struct seq_file *s, void *v)
+{
+       const struct nfulnl_instance *inst = v;
+
+       return seq_printf(s, "%5d %6d %5d %1d %5d %6d %2d\n", 
+                         inst->group_num,
+                         inst->peer_pid, inst->qlen, 
+                         inst->copy_mode, inst->copy_range,
+                         inst->flushtimeout, atomic_read(&inst->use));
+}
+
+static struct seq_operations nful_seq_ops = {
+       .start  = seq_start,
+       .next   = seq_next,
+       .stop   = seq_stop,
+       .show   = seq_show,
+};
+
+static int nful_open(struct inode *inode, struct file *file)
+{
+       struct seq_file *seq;
+       struct iter_state *is;
+       int ret;
+
+       is = kmalloc(sizeof(*is), GFP_KERNEL);
+       if (!is)
+               return -ENOMEM;
+       memset(is, 0, sizeof(*is));
+       ret = seq_open(file, &nful_seq_ops);
+       if (ret < 0)
+               goto out_free;
+       seq = file->private_data;
+       seq->private = is;
+       return ret;
+out_free:
+       kfree(is);
+       return ret;
+}
+
+static struct file_operations nful_file_ops = {
+       .owner   = THIS_MODULE,
+       .open    = nful_open,
+       .read    = seq_read,
+       .llseek  = seq_lseek,
+       .release = seq_release_private,
+};
+
+#endif /* PROC_FS */
+
+static int
+init_or_cleanup(int init)
+{
+       int i, status = -ENOMEM;
+#ifdef CONFIG_PROC_FS
+       struct proc_dir_entry *proc_nful;
+#endif
+       
+       if (!init)
+               goto cleanup;
+
+       for (i = 0; i < INSTANCE_BUCKETS; i++)
+               INIT_HLIST_HEAD(&instance_table[i]);
+       
+       /* it's not really all that important to have a random value, so
+        * we can do this from the init function, even if there hasn't
+        * been that much entropy yet */
+       get_random_bytes(&hash_init, sizeof(hash_init));
+
+       netlink_register_notifier(&nfulnl_rtnl_notifier);
+       status = nfnetlink_subsys_register(&nfulnl_subsys);
+       if (status < 0) {
+               printk(KERN_ERR "log: failed to create netlink socket\n");
+               goto cleanup_netlink_notifier;
+       }
+
+#ifdef CONFIG_PROC_FS
+       proc_nful = create_proc_entry("nfnetlink_log", 0440,
+                                     proc_net_netfilter);
+       if (!proc_nful)
+               goto cleanup_subsys;
+       proc_nful->proc_fops = &nful_file_ops;
+#endif
+
+       return status;
+
+cleanup:
+       nf_log_unregister_logger(&nfulnl_logger);
+#ifdef CONFIG_PROC_FS
+       remove_proc_entry("nfnetlink_log", proc_net_netfilter);
+cleanup_subsys:
+#endif
+       nfnetlink_subsys_unregister(&nfulnl_subsys);
+cleanup_netlink_notifier:
+       netlink_unregister_notifier(&nfulnl_rtnl_notifier);
+       return status;
+}
+
+static int __init init(void)
+{
+       
+       return init_or_cleanup(1);
+}
+
+static void __exit fini(void)
+{
+       init_or_cleanup(0);
+}
+
+MODULE_DESCRIPTION("netfilter userspace logging");
+MODULE_AUTHOR("Harald Welte <laforge@netfilter.org>");
+MODULE_LICENSE("GPL");
+MODULE_ALIAS_NFNL_SUBSYS(NFNL_SUBSYS_ULOG);
+
+module_init(init);
+module_exit(fini);
diff --git a/net/netfilter/nfnetlink_queue.c b/net/netfilter/nfnetlink_queue.c

new file mode 100644 (file)

index 0000000..e3a5285
--- /dev/null
+++ b/net/netfilter/nfnetlink_queue.c
@@ -0,0 +1,1132 @@
+/*
+ * This is a module which is used for queueing packets and communicating with
+ * userspace via nfetlink.
+ *
+ * (C) 2005 by Harald Welte <laforge@netfilter.org>
+ *
+ * Based on the old ipv4-only ip_queue.c:
+ * (C) 2000-2002 James Morris <jmorris@intercode.com.au>
+ * (C) 2003-2005 Netfilter Core Team <coreteam@netfilter.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ */
+#include <linux/module.h>
+#include <linux/skbuff.h>
+#include <linux/init.h>
+#include <linux/spinlock.h>
+#include <linux/notifier.h>
+#include <linux/netdevice.h>
+#include <linux/netfilter.h>
+#include <linux/proc_fs.h>
+#include <linux/netfilter_ipv4.h>
+#include <linux/netfilter_ipv6.h>
+#include <linux/netfilter/nfnetlink.h>
+#include <linux/netfilter/nfnetlink_queue.h>
+#include <linux/list.h>
+#include <net/sock.h>
+
+#include <asm/atomic.h>
+
+#ifdef CONFIG_BRIDGE_NETFILTER
+#include "../bridge/br_private.h"
+#endif
+
+#define NFQNL_QMAX_DEFAULT 1024
+
+#if 0
+#define QDEBUG(x, args ...)    printk(KERN_DEBUG "%s(%d):%s(): " x,       \
+                                       __FILE__, __LINE__, __FUNCTION__,  \
+                                       ## args)
+#else
+#define QDEBUG(x, ...)
+#endif
+
+struct nfqnl_queue_entry {
+       struct list_head list;
+       struct nf_info *info;
+       struct sk_buff *skb;
+       unsigned int id;
+};
+
+struct nfqnl_instance {
+       struct hlist_node hlist;                /* global list of queues */
+       atomic_t use;
+
+       int peer_pid;
+       unsigned int queue_maxlen;
+       unsigned int copy_range;
+       unsigned int queue_total;
+       unsigned int queue_dropped;
+       unsigned int queue_user_dropped;
+
+       atomic_t id_sequence;                   /* 'sequence' of pkt ids */
+
+       u_int16_t queue_num;                    /* number of this queue */
+       u_int8_t copy_mode;
+
+       spinlock_t lock;
+
+       struct list_head queue_list;            /* packets in queue */
+};
+
+typedef int (*nfqnl_cmpfn)(struct nfqnl_queue_entry *, unsigned long);
+
+static DEFINE_RWLOCK(instances_lock);
+
+u_int64_t htonll(u_int64_t in)
+{
+       u_int64_t out;
+       int i;
+
+       for (i = 0; i < sizeof(u_int64_t); i++)
+               ((u_int8_t *)&out)[sizeof(u_int64_t)-1] = ((u_int8_t *)&in)[i];
+
+       return out;
+}
+
+#define INSTANCE_BUCKETS       16
+static struct hlist_head instance_table[INSTANCE_BUCKETS];
+
+static inline u_int8_t instance_hashfn(u_int16_t queue_num)
+{
+       return ((queue_num >> 8) | queue_num) % INSTANCE_BUCKETS;
+}
+
+static struct nfqnl_instance *
+__instance_lookup(u_int16_t queue_num)
+{
+       struct hlist_head *head;
+       struct hlist_node *pos;
+       struct nfqnl_instance *inst;
+
+       head = &instance_table[instance_hashfn(queue_num)];
+       hlist_for_each_entry(inst, pos, head, hlist) {
+               if (inst->queue_num == queue_num)
+                       return inst;
+       }
+       return NULL;
+}
+
+static struct nfqnl_instance *
+instance_lookup_get(u_int16_t queue_num)
+{
+       struct nfqnl_instance *inst;
+
+       read_lock_bh(&instances_lock);
+       inst = __instance_lookup(queue_num);
+       if (inst)
+               atomic_inc(&inst->use);
+       read_unlock_bh(&instances_lock);
+
+       return inst;
+}
+
+static void
+instance_put(struct nfqnl_instance *inst)
+{
+       if (inst && atomic_dec_and_test(&inst->use)) {
+               QDEBUG("kfree(inst=%p)\n", inst);
+               kfree(inst);
+       }
+}
+
+static struct nfqnl_instance *
+instance_create(u_int16_t queue_num, int pid)
+{
+       struct nfqnl_instance *inst;
+
+       QDEBUG("entering for queue_num=%u, pid=%d\n", queue_num, pid);
+
+       write_lock_bh(&instances_lock); 
+       if (__instance_lookup(queue_num)) {
+               inst = NULL;
+               QDEBUG("aborting, instance already exists\n");
+               goto out_unlock;
+       }
+
+       inst = kmalloc(sizeof(*inst), GFP_ATOMIC);
+       if (!inst)
+               goto out_unlock;
+
+       memset(inst, 0, sizeof(*inst));
+       inst->queue_num = queue_num;
+       inst->peer_pid = pid;
+       inst->queue_maxlen = NFQNL_QMAX_DEFAULT;
+       inst->copy_range = 0xfffff;
+       inst->copy_mode = NFQNL_COPY_NONE;
+       atomic_set(&inst->id_sequence, 0);
+       /* needs to be two, since we _put() after creation */
+       atomic_set(&inst->use, 2);
+       inst->lock = SPIN_LOCK_UNLOCKED;
+       INIT_LIST_HEAD(&inst->queue_list);
+
+       if (!try_module_get(THIS_MODULE))
+               goto out_free;
+
+       hlist_add_head(&inst->hlist, 
+                      &instance_table[instance_hashfn(queue_num)]);
+
+       write_unlock_bh(&instances_lock);
+
+       QDEBUG("successfully created new instance\n");
+
+       return inst;
+
+out_free:
+       kfree(inst);
+out_unlock:
+       write_unlock_bh(&instances_lock);
+       return NULL;
+}
+
+static void nfqnl_flush(struct nfqnl_instance *queue, int verdict);
+
+static void
+_instance_destroy2(struct nfqnl_instance *inst, int lock)
+{
+       /* first pull it out of the global list */
+       if (lock)
+               write_lock_bh(&instances_lock);
+
+       QDEBUG("removing instance %p (queuenum=%u) from hash\n",
+               inst, inst->queue_num);
+       hlist_del(&inst->hlist);
+
+       if (lock)
+               write_unlock_bh(&instances_lock);
+
+       /* then flush all pending skbs from the queue */
+       nfqnl_flush(inst, NF_DROP);
+
+       /* and finally put the refcount */
+       instance_put(inst);
+
+       module_put(THIS_MODULE);
+}
+
+static inline void
+__instance_destroy(struct nfqnl_instance *inst)
+{
+       _instance_destroy2(inst, 0);
+}
+
+static inline void
+instance_destroy(struct nfqnl_instance *inst)
+{
+       _instance_destroy2(inst, 1);
+}
+
+
+
+static void
+issue_verdict(struct nfqnl_queue_entry *entry, int verdict)
+{
+       QDEBUG("entering for entry %p, verdict %u\n", entry, verdict);
+
+       /* TCP input path (and probably other bits) assume to be called
+        * from softirq context, not from syscall, like issue_verdict is
+        * called.  TCP input path deadlocks with locks taken from timer
+        * softirq, e.g.  We therefore emulate this by local_bh_disable() */
+
+       local_bh_disable();
+       nf_reinject(entry->skb, entry->info, verdict);
+       local_bh_enable();
+
+       kfree(entry);
+}
+
+static inline void
+__enqueue_entry(struct nfqnl_instance *queue,
+                     struct nfqnl_queue_entry *entry)
+{
+       list_add(&entry->list, &queue->queue_list);
+       queue->queue_total++;
+}
+
+/*
+ * Find and return a queued entry matched by cmpfn, or return the last
+ * entry if cmpfn is NULL.
+ */
+static inline struct nfqnl_queue_entry *
+__find_entry(struct nfqnl_instance *queue, nfqnl_cmpfn cmpfn, 
+                  unsigned long data)
+{
+       struct list_head *p;
+
+       list_for_each_prev(p, &queue->queue_list) {
+               struct nfqnl_queue_entry *entry = (struct nfqnl_queue_entry *)p;
+               
+               if (!cmpfn || cmpfn(entry, data))
+                       return entry;
+       }
+       return NULL;
+}
+
+static inline void
+__dequeue_entry(struct nfqnl_instance *q, struct nfqnl_queue_entry *entry)
+{
+       list_del(&entry->list);
+       q->queue_total--;
+}
+
+static inline struct nfqnl_queue_entry *
+__find_dequeue_entry(struct nfqnl_instance *queue,
+                    nfqnl_cmpfn cmpfn, unsigned long data)
+{
+       struct nfqnl_queue_entry *entry;
+
+       entry = __find_entry(queue, cmpfn, data);
+       if (entry == NULL)
+               return NULL;
+
+       __dequeue_entry(queue, entry);
+       return entry;
+}
+
+
+static inline void
+__nfqnl_flush(struct nfqnl_instance *queue, int verdict)
+{
+       struct nfqnl_queue_entry *entry;
+       
+       while ((entry = __find_dequeue_entry(queue, NULL, 0)))
+               issue_verdict(entry, verdict);
+}
+
+static inline int
+__nfqnl_set_mode(struct nfqnl_instance *queue,
+                unsigned char mode, unsigned int range)
+{
+       int status = 0;
+       
+       switch (mode) {
+       case NFQNL_COPY_NONE:
+       case NFQNL_COPY_META:
+               queue->copy_mode = mode;
+               queue->copy_range = 0;
+               break;
+               
+       case NFQNL_COPY_PACKET:
+               queue->copy_mode = mode;
+               /* we're using struct nfattr which has 16bit nfa_len */
+               if (range > 0xffff)
+                       queue->copy_range = 0xffff;
+               else
+                       queue->copy_range = range;
+               break;
+               
+       default:
+               status = -EINVAL;
+
+       }
+       return status;
+}
+
+static struct nfqnl_queue_entry *
+find_dequeue_entry(struct nfqnl_instance *queue,
+                        nfqnl_cmpfn cmpfn, unsigned long data)
+{
+       struct nfqnl_queue_entry *entry;
+       
+       spin_lock_bh(&queue->lock);
+       entry = __find_dequeue_entry(queue, cmpfn, data);
+       spin_unlock_bh(&queue->lock);
+
+       return entry;
+}
+
+static void
+nfqnl_flush(struct nfqnl_instance *queue, int verdict)
+{
+       spin_lock_bh(&queue->lock);
+       __nfqnl_flush(queue, verdict);
+       spin_unlock_bh(&queue->lock);
+}
+
+static struct sk_buff *
+nfqnl_build_packet_message(struct nfqnl_instance *queue,
+                          struct nfqnl_queue_entry *entry, int *errp)
+{
+       unsigned char *old_tail;
+       size_t size;
+       size_t data_len = 0;
+       struct sk_buff *skb;
+       struct nfqnl_msg_packet_hdr pmsg;
+       struct nlmsghdr *nlh;
+       struct nfgenmsg *nfmsg;
+       unsigned int tmp_uint;
+
+       QDEBUG("entered\n");
+
+       /* all macros expand to constant values at compile time */
+       size =    NLMSG_SPACE(sizeof(struct nfqnl_msg_packet_hdr))
+               + NLMSG_SPACE(sizeof(u_int32_t))        /* ifindex */
+               + NLMSG_SPACE(sizeof(u_int32_t))        /* ifindex */
+#ifdef CONFIG_BRIDGE_NETFILTER
+               + NLMSG_SPACE(sizeof(u_int32_t))        /* ifindex */
+               + NLMSG_SPACE(sizeof(u_int32_t))        /* ifindex */
+#endif
+               + NLMSG_SPACE(sizeof(u_int32_t))        /* mark */
+               + NLMSG_SPACE(sizeof(struct nfqnl_msg_packet_hw))
+               + NLMSG_SPACE(sizeof(struct nfqnl_msg_packet_timestamp));
+
+       spin_lock_bh(&queue->lock);
+       
+       switch (queue->copy_mode) {
+       case NFQNL_COPY_META:
+       case NFQNL_COPY_NONE:
+               data_len = 0;
+               break;
+       
+       case NFQNL_COPY_PACKET:
+               if (queue->copy_range == 0 
+                   || queue->copy_range > entry->skb->len)
+                       data_len = entry->skb->len;
+               else
+                       data_len = queue->copy_range;
+               
+               size += NLMSG_SPACE(data_len);
+               break;
+       
+       default:
+               *errp = -EINVAL;
+               spin_unlock_bh(&queue->lock);
+               return NULL;
+       }
+
+       spin_unlock_bh(&queue->lock);
+
+       skb = alloc_skb(size, GFP_ATOMIC);
+       if (!skb)
+               goto nlmsg_failure;
+               
+       old_tail= skb->tail;
+       nlh = NLMSG_PUT(skb, 0, 0, 
+                       NFNL_SUBSYS_QUEUE << 8 | NFQNL_MSG_PACKET,
+                       sizeof(struct nfgenmsg));
+       nfmsg = NLMSG_DATA(nlh);
+       nfmsg->nfgen_family = entry->info->pf;
+       nfmsg->version = NFNETLINK_V0;
+       nfmsg->res_id = htons(queue->queue_num);
+
+       pmsg.packet_id          = htonl(entry->id);
+       pmsg.hw_protocol        = htons(entry->skb->protocol);
+       pmsg.hook               = entry->info->hook;
+
+       NFA_PUT(skb, NFQA_PACKET_HDR, sizeof(pmsg), &pmsg);
+
+       if (entry->info->indev) {
+               tmp_uint = htonl(entry->info->indev->ifindex);
+#ifndef CONFIG_BRIDGE_NETFILTER
+               NFA_PUT(skb, NFQA_IFINDEX_INDEV, sizeof(tmp_uint), &tmp_uint);
+#else
+               if (entry->info->pf == PF_BRIDGE) {
+                       /* Case 1: indev is physical input device, we need to
+                        * look for bridge group (when called from 
+                        * netfilter_bridge) */
+                       NFA_PUT(skb, NFQA_IFINDEX_PHYSINDEV, sizeof(tmp_uint), 
+                               &tmp_uint);
+                       /* this is the bridge group "brX" */
+                       tmp_uint = htonl(entry->info->indev->br_port->br->dev->ifindex);
+                       NFA_PUT(skb, NFQA_IFINDEX_INDEV, sizeof(tmp_uint),
+                               &tmp_uint);
+               } else {
+                       /* Case 2: indev is bridge group, we need to look for
+                        * physical device (when called from ipv4) */
+                       NFA_PUT(skb, NFQA_IFINDEX_INDEV, sizeof(tmp_uint),
+                               &tmp_uint);
+                       if (entry->skb->nf_bridge
+                           && entry->skb->nf_bridge->physindev) {
+                               tmp_uint = htonl(entry->skb->nf_bridge->physindev->ifindex);
+                               NFA_PUT(skb, NFQA_IFINDEX_PHYSINDEV,
+                                       sizeof(tmp_uint), &tmp_uint);
+                       }
+               }
+#endif
+       }
+
+       if (entry->info->outdev) {
+               tmp_uint = htonl(entry->info->outdev->ifindex);
+#ifndef CONFIG_BRIDGE_NETFILTER
+               NFA_PUT(skb, NFQA_IFINDEX_OUTDEV, sizeof(tmp_uint), &tmp_uint);
+#else
+               if (entry->info->pf == PF_BRIDGE) {
+                       /* Case 1: outdev is physical output device, we need to
+                        * look for bridge group (when called from 
+                        * netfilter_bridge) */
+                       NFA_PUT(skb, NFQA_IFINDEX_PHYSOUTDEV, sizeof(tmp_uint),
+                               &tmp_uint);
+                       /* this is the bridge group "brX" */
+                       tmp_uint = htonl(entry->info->outdev->br_port->br->dev->ifindex);
+                       NFA_PUT(skb, NFQA_IFINDEX_OUTDEV, sizeof(tmp_uint),
+                               &tmp_uint);
+               } else {
+                       /* Case 2: outdev is bridge group, we need to look for
+                        * physical output device (when called from ipv4) */
+                       NFA_PUT(skb, NFQA_IFINDEX_OUTDEV, sizeof(tmp_uint),
+                               &tmp_uint);
+                       if (entry->skb->nf_bridge
+                           && entry->skb->nf_bridge->physoutdev) {
+                               tmp_uint = htonl(entry->skb->nf_bridge->physoutdev->ifindex);
+                               NFA_PUT(skb, NFQA_IFINDEX_PHYSOUTDEV,
+                                       sizeof(tmp_uint), &tmp_uint);
+                       }
+               }
+#endif
+       }
+
+       if (entry->skb->nfmark) {
+               tmp_uint = htonl(entry->skb->nfmark);
+               NFA_PUT(skb, NFQA_MARK, sizeof(u_int32_t), &tmp_uint);
+       }
+
+       if (entry->info->indev && entry->skb->dev
+           && entry->skb->dev->hard_header_parse) {
+               struct nfqnl_msg_packet_hw phw;
+
+               phw.hw_addrlen =
+                       entry->skb->dev->hard_header_parse(entry->skb,
+                                                          phw.hw_addr);
+               phw.hw_addrlen = htons(phw.hw_addrlen);
+               NFA_PUT(skb, NFQA_HWADDR, sizeof(phw), &phw);
+       }
+
+       if (entry->skb->tstamp.off_sec) {
+               struct nfqnl_msg_packet_timestamp ts;
+
+               ts.sec = htonll(skb_tv_base.tv_sec + entry->skb->tstamp.off_sec);
+               ts.usec = htonll(skb_tv_base.tv_usec + entry->skb->tstamp.off_usec);
+
+               NFA_PUT(skb, NFQA_TIMESTAMP, sizeof(ts), &ts);
+       }
+
+       if (data_len) {
+               struct nfattr *nfa;
+               int size = NFA_LENGTH(data_len);
+
+               if (skb_tailroom(skb) < (int)NFA_SPACE(data_len)) {
+                       printk(KERN_WARNING "nf_queue: no tailroom!\n");
+                       goto nlmsg_failure;
+               }
+
+               nfa = (struct nfattr *)skb_put(skb, NFA_ALIGN(size));
+               nfa->nfa_type = NFQA_PAYLOAD;
+               nfa->nfa_len = size;
+
+               if (skb_copy_bits(entry->skb, 0, NFA_DATA(nfa), data_len))
+                       BUG();
+       }
+               
+       nlh->nlmsg_len = skb->tail - old_tail;
+       return skb;
+
+nlmsg_failure:
+nfattr_failure:
+       if (skb)
+               kfree_skb(skb);
+       *errp = -EINVAL;
+       if (net_ratelimit())
+               printk(KERN_ERR "nf_queue: error creating packet message\n");
+       return NULL;
+}
+
+static int
+nfqnl_enqueue_packet(struct sk_buff *skb, struct nf_info *info, 
+                    unsigned int queuenum, void *data)
+{
+       int status = -EINVAL;
+       struct sk_buff *nskb;
+       struct nfqnl_instance *queue;
+       struct nfqnl_queue_entry *entry;
+
+       QDEBUG("entered\n");
+
+       queue = instance_lookup_get(queuenum);
+       if (!queue) {
+               QDEBUG("no queue instance matching\n");
+               return -EINVAL;
+       }
+
+       if (queue->copy_mode == NFQNL_COPY_NONE) {
+               QDEBUG("mode COPY_NONE, aborting\n");
+               status = -EAGAIN;
+               goto err_out_put;
+       }
+
+       entry = kmalloc(sizeof(*entry), GFP_ATOMIC);
+       if (entry == NULL) {
+               if (net_ratelimit())
+                       printk(KERN_ERR 
+                               "nf_queue: OOM in nfqnl_enqueue_packet()\n");
+               status = -ENOMEM;
+               goto err_out_put;
+       }
+
+       entry->info = info;
+       entry->skb = skb;
+       entry->id = atomic_inc_return(&queue->id_sequence);
+
+       nskb = nfqnl_build_packet_message(queue, entry, &status);
+       if (nskb == NULL)
+               goto err_out_free;
+               
+       spin_lock_bh(&queue->lock);
+       
+       if (!queue->peer_pid)
+               goto err_out_free_nskb; 
+
+       if (queue->queue_total >= queue->queue_maxlen) {
+                queue->queue_dropped++;
+               status = -ENOSPC;
+               if (net_ratelimit())
+                         printk(KERN_WARNING "ip_queue: full at %d entries, "
+                                "dropping packets(s). Dropped: %d\n", 
+                                queue->queue_total, queue->queue_dropped);
+               goto err_out_free_nskb;
+       }
+
+       /* nfnetlink_unicast will either free the nskb or add it to a socket */
+       status = nfnetlink_unicast(nskb, queue->peer_pid, MSG_DONTWAIT);
+       if (status < 0) {
+               queue->queue_user_dropped++;
+               goto err_out_unlock;
+       }
+
+       __enqueue_entry(queue, entry);
+
+       spin_unlock_bh(&queue->lock);
+       instance_put(queue);
+       return status;
+
+err_out_free_nskb:
+       kfree_skb(nskb); 
+       
+err_out_unlock:
+       spin_unlock_bh(&queue->lock);
+
+err_out_free:
+       kfree(entry);
+err_out_put:
+       instance_put(queue);
+       return status;
+}
+
+static int
+nfqnl_mangle(void *data, int data_len, struct nfqnl_queue_entry *e)
+{
+       int diff;
+
+       diff = data_len - e->skb->len;
+       if (diff < 0)
+               skb_trim(e->skb, data_len);
+       else if (diff > 0) {
+               if (data_len > 0xFFFF)
+                       return -EINVAL;
+               if (diff > skb_tailroom(e->skb)) {
+                       struct sk_buff *newskb;
+                       
+                       newskb = skb_copy_expand(e->skb,
+                                                skb_headroom(e->skb),
+                                                diff,
+                                                GFP_ATOMIC);
+                       if (newskb == NULL) {
+                               printk(KERN_WARNING "ip_queue: OOM "
+                                     "in mangle, dropping packet\n");
+                               return -ENOMEM;
+                       }
+                       if (e->skb->sk)
+                               skb_set_owner_w(newskb, e->skb->sk);
+                       kfree_skb(e->skb);
+                       e->skb = newskb;
+               }
+               skb_put(e->skb, diff);
+       }
+       if (!skb_make_writable(&e->skb, data_len))
+               return -ENOMEM;
+       memcpy(e->skb->data, data, data_len);
+
+       return 0;
+}
+
+static inline int
+id_cmp(struct nfqnl_queue_entry *e, unsigned long id)
+{
+       return (id == e->id);
+}
+
+static int
+nfqnl_set_mode(struct nfqnl_instance *queue,
+              unsigned char mode, unsigned int range)
+{
+       int status;
+
+       spin_lock_bh(&queue->lock);
+       status = __nfqnl_set_mode(queue, mode, range);
+       spin_unlock_bh(&queue->lock);
+
+       return status;
+}
+
+static int
+dev_cmp(struct nfqnl_queue_entry *entry, unsigned long ifindex)
+{
+       if (entry->info->indev)
+               if (entry->info->indev->ifindex == ifindex)
+                       return 1;
+                       
+       if (entry->info->outdev)
+               if (entry->info->outdev->ifindex == ifindex)
+                       return 1;
+
+       return 0;
+}
+
+/* drop all packets with either indev or outdev == ifindex from all queue
+ * instances */
+static void
+nfqnl_dev_drop(int ifindex)
+{
+       int i;
+       
+       QDEBUG("entering for ifindex %u\n", ifindex);
+
+       /* this only looks like we have to hold the readlock for a way too long
+        * time, issue_verdict(),  nf_reinject(), ... - but we always only
+        * issue NF_DROP, which is processed directly in nf_reinject() */
+       read_lock_bh(&instances_lock);
+
+       for  (i = 0; i < INSTANCE_BUCKETS; i++) {
+               struct hlist_node *tmp;
+               struct nfqnl_instance *inst;
+               struct hlist_head *head = &instance_table[i];
+
+               hlist_for_each_entry(inst, tmp, head, hlist) {
+                       struct nfqnl_queue_entry *entry;
+                       while ((entry = find_dequeue_entry(inst, dev_cmp, 
+                                                          ifindex)) != NULL)
+                               issue_verdict(entry, NF_DROP);
+               }
+       }
+
+       read_unlock_bh(&instances_lock);
+}
+
+#define RCV_SKB_FAIL(err) do { netlink_ack(skb, nlh, (err)); return; } while (0)
+
+static int
+nfqnl_rcv_dev_event(struct notifier_block *this,
+                   unsigned long event, void *ptr)
+{
+       struct net_device *dev = ptr;
+
+       /* Drop any packets associated with the downed device */
+       if (event == NETDEV_DOWN)
+               nfqnl_dev_drop(dev->ifindex);
+       return NOTIFY_DONE;
+}
+
+static struct notifier_block nfqnl_dev_notifier = {
+       .notifier_call  = nfqnl_rcv_dev_event,
+};
+
+static int
+nfqnl_rcv_nl_event(struct notifier_block *this,
+                  unsigned long event, void *ptr)
+{
+       struct netlink_notify *n = ptr;
+
+       if (event == NETLINK_URELEASE &&
+           n->protocol == NETLINK_NETFILTER && n->pid) {
+               int i;
+
+               /* destroy all instances for this pid */
+               write_lock_bh(&instances_lock);
+               for  (i = 0; i < INSTANCE_BUCKETS; i++) {
+                       struct hlist_node *tmp, *t2;
+                       struct nfqnl_instance *inst;
+                       struct hlist_head *head = &instance_table[i];
+
+                       hlist_for_each_entry_safe(inst, tmp, t2, head, hlist) {
+                               if (n->pid == inst->peer_pid)
+                                       __instance_destroy(inst);
+                       }
+               }
+               write_unlock_bh(&instances_lock);
+       }
+       return NOTIFY_DONE;
+}
+
+static struct notifier_block nfqnl_rtnl_notifier = {
+       .notifier_call  = nfqnl_rcv_nl_event,
+};
+
+static const int nfqa_verdict_min[NFQA_MAX] = {
+       [NFQA_VERDICT_HDR-1]    = sizeof(struct nfqnl_msg_verdict_hdr),
+       [NFQA_MARK-1]           = sizeof(u_int32_t),
+       [NFQA_PAYLOAD-1]        = 0,
+};
+
+static int
+nfqnl_recv_verdict(struct sock *ctnl, struct sk_buff *skb,
+                  struct nlmsghdr *nlh, struct nfattr *nfqa[], int *errp)
+{
+       struct nfgenmsg *nfmsg = NLMSG_DATA(nlh);
+       u_int16_t queue_num = ntohs(nfmsg->res_id);
+
+       struct nfqnl_msg_verdict_hdr *vhdr;
+       struct nfqnl_instance *queue;
+       unsigned int verdict;
+       struct nfqnl_queue_entry *entry;
+       int err;
+
+       if (nfattr_bad_size(nfqa, NFQA_MAX, nfqa_verdict_min)) {
+               QDEBUG("bad attribute size\n");
+               return -EINVAL;
+       }
+
+       queue = instance_lookup_get(queue_num);
+       if (!queue)
+               return -ENODEV;
+
+       if (queue->peer_pid != NETLINK_CB(skb).pid) {
+               err = -EPERM;
+               goto err_out_put;
+       }
+
+       if (!nfqa[NFQA_VERDICT_HDR-1]) {
+               err = -EINVAL;
+               goto err_out_put;
+       }
+
+       vhdr = NFA_DATA(nfqa[NFQA_VERDICT_HDR-1]);
+       verdict = ntohl(vhdr->verdict);
+
+       if ((verdict & NF_VERDICT_MASK) > NF_MAX_VERDICT) {
+               err = -EINVAL;
+               goto err_out_put;
+       }
+
+       entry = find_dequeue_entry(queue, id_cmp, ntohl(vhdr->id));
+       if (entry == NULL) {
+               err = -ENOENT;
+               goto err_out_put;
+       }
+
+       if (nfqa[NFQA_PAYLOAD-1]) {
+               if (nfqnl_mangle(NFA_DATA(nfqa[NFQA_PAYLOAD-1]),
+                                NFA_PAYLOAD(nfqa[NFQA_PAYLOAD-1]), entry) < 0)
+                       verdict = NF_DROP;
+       }
+
+       if (nfqa[NFQA_MARK-1])
+               skb->nfmark = ntohl(*(u_int32_t *)NFA_DATA(nfqa[NFQA_MARK-1]));
+               
+       issue_verdict(entry, verdict);
+       instance_put(queue);
+       return 0;
+
+err_out_put:
+       instance_put(queue);
+       return err;
+}
+
+static int
+nfqnl_recv_unsupp(struct sock *ctnl, struct sk_buff *skb,
+                 struct nlmsghdr *nlh, struct nfattr *nfqa[], int *errp)
+{
+       return -ENOTSUPP;
+}
+
+static const int nfqa_cfg_min[NFQA_CFG_MAX] = {
+       [NFQA_CFG_CMD-1]        = sizeof(struct nfqnl_msg_config_cmd),
+       [NFQA_CFG_PARAMS-1]     = sizeof(struct nfqnl_msg_config_params),
+};
+
+static struct nf_queue_handler nfqh = {
+       .name   = "nf_queue",
+       .outfn  = &nfqnl_enqueue_packet,
+};
+
+static int
+nfqnl_recv_config(struct sock *ctnl, struct sk_buff *skb,
+                 struct nlmsghdr *nlh, struct nfattr *nfqa[], int *errp)
+{
+       struct nfgenmsg *nfmsg = NLMSG_DATA(nlh);
+       u_int16_t queue_num = ntohs(nfmsg->res_id);
+       struct nfqnl_instance *queue;
+       int ret = 0;
+
+       QDEBUG("entering for msg %u\n", NFNL_MSG_TYPE(nlh->nlmsg_type));
+
+       if (nfattr_bad_size(nfqa, NFQA_CFG_MAX, nfqa_cfg_min)) {
+               QDEBUG("bad attribute size\n");
+               return -EINVAL;
+       }
+
+       queue = instance_lookup_get(queue_num);
+       if (nfqa[NFQA_CFG_CMD-1]) {
+               struct nfqnl_msg_config_cmd *cmd;
+               cmd = NFA_DATA(nfqa[NFQA_CFG_CMD-1]);
+               QDEBUG("found CFG_CMD\n");
+
+               switch (cmd->command) {
+               case NFQNL_CFG_CMD_BIND:
+                       if (queue)
+                               return -EBUSY;
+
+                       queue = instance_create(queue_num, NETLINK_CB(skb).pid);
+                       if (!queue)
+                               return -EINVAL;
+                       break;
+               case NFQNL_CFG_CMD_UNBIND:
+                       if (!queue)
+                               return -ENODEV;
+
+                       if (queue->peer_pid != NETLINK_CB(skb).pid) {
+                               ret = -EPERM;
+                               goto out_put;
+                       }
+
+                       instance_destroy(queue);
+                       break;
+               case NFQNL_CFG_CMD_PF_BIND:
+                       QDEBUG("registering queue handler for pf=%u\n",
+                               ntohs(cmd->pf));
+                       ret = nf_register_queue_handler(ntohs(cmd->pf), &nfqh);
+                       break;
+               case NFQNL_CFG_CMD_PF_UNBIND:
+                       QDEBUG("unregistering queue handler for pf=%u\n",
+                               ntohs(cmd->pf));
+                       /* This is a bug and a feature.  We can unregister
+                        * other handlers(!) */
+                       ret = nf_unregister_queue_handler(ntohs(cmd->pf));
+                       break;
+               default:
+                       ret = -EINVAL;
+                       break;
+               }
+       } else {
+               if (!queue) {
+                       QDEBUG("no config command, and no instance ENOENT\n");
+                       ret = -ENOENT;
+                       goto out_put;
+               }
+
+               if (queue->peer_pid != NETLINK_CB(skb).pid) {
+                       QDEBUG("no config command, and wrong pid\n");
+                       ret = -EPERM;
+                       goto out_put;
+               }
+       }
+
+       if (nfqa[NFQA_CFG_PARAMS-1]) {
+               struct nfqnl_msg_config_params *params;
+               params = NFA_DATA(nfqa[NFQA_CFG_PARAMS-1]);
+
+               nfqnl_set_mode(queue, params->copy_mode,
+                               ntohl(params->copy_range));
+       }
+
+out_put:
+       instance_put(queue);
+       return ret;
+}
+
+static struct nfnl_callback nfqnl_cb[NFQNL_MSG_MAX] = {
+       [NFQNL_MSG_PACKET]      = { .call = nfqnl_recv_unsupp,
+                                   .attr_count = NFQA_MAX,
+                                   .cap_required = CAP_NET_ADMIN },
+       [NFQNL_MSG_VERDICT]     = { .call = nfqnl_recv_verdict,
+                                   .attr_count = NFQA_MAX,
+                                   .cap_required = CAP_NET_ADMIN },
+       [NFQNL_MSG_CONFIG]      = { .call = nfqnl_recv_config,
+                                   .attr_count = NFQA_CFG_MAX,
+                                   .cap_required = CAP_NET_ADMIN },
+};
+
+static struct nfnetlink_subsystem nfqnl_subsys = {
+       .name           = "nf_queue",
+       .subsys_id      = NFNL_SUBSYS_QUEUE,
+       .cb_count       = NFQNL_MSG_MAX,
+       .cb             = nfqnl_cb,
+};
+
+#ifdef CONFIG_PROC_FS
+struct iter_state {
+       unsigned int bucket;
+};
+
+static struct hlist_node *get_first(struct seq_file *seq)
+{
+       struct iter_state *st = seq->private;
+
+       if (!st)
+               return NULL;
+
+       for (st->bucket = 0; st->bucket < INSTANCE_BUCKETS; st->bucket++) {
+               if (!hlist_empty(&instance_table[st->bucket]))
+                       return instance_table[st->bucket].first;
+       }
+       return NULL;
+}
+
+static struct hlist_node *get_next(struct seq_file *seq, struct hlist_node *h)
+{
+       struct iter_state *st = seq->private;
+
+       h = h->next;
+       while (!h) {
+               if (++st->bucket >= INSTANCE_BUCKETS)
+                       return NULL;
+
+               h = instance_table[st->bucket].first;
+       }
+       return h;
+}
+
+static struct hlist_node *get_idx(struct seq_file *seq, loff_t pos)
+{
+       struct hlist_node *head;
+       head = get_first(seq);
+
+       if (head)
+               while (pos && (head = get_next(seq, head)))
+                       pos--;
+       return pos ? NULL : head;
+}
+
+static void *seq_start(struct seq_file *seq, loff_t *pos)
+{
+       read_lock_bh(&instances_lock);
+       return get_idx(seq, *pos);
+}
+
+static void *seq_next(struct seq_file *s, void *v, loff_t *pos)
+{
+       (*pos)++;
+       return get_next(s, v);
+}
+
+static void seq_stop(struct seq_file *s, void *v)
+{
+       read_unlock_bh(&instances_lock);
+}
+
+static int seq_show(struct seq_file *s, void *v)
+{
+       const struct nfqnl_instance *inst = v;
+
+       return seq_printf(s, "%5d %6d %5d %1d %5d %5d %5d %8d %2d\n",
+                         inst->queue_num,
+                         inst->peer_pid, inst->queue_total,
+                         inst->copy_mode, inst->copy_range,
+                         inst->queue_dropped, inst->queue_user_dropped,
+                         atomic_read(&inst->id_sequence),
+                         atomic_read(&inst->use));
+}
+
+static struct seq_operations nfqnl_seq_ops = {
+       .start  = seq_start,
+       .next   = seq_next,
+       .stop   = seq_stop,
+       .show   = seq_show,
+};
+
+static int nfqnl_open(struct inode *inode, struct file *file)
+{
+       struct seq_file *seq;
+       struct iter_state *is;
+       int ret;
+
+       is = kmalloc(sizeof(*is), GFP_KERNEL);
+       if (!is)
+               return -ENOMEM;
+       memset(is, 0, sizeof(*is));
+       ret = seq_open(file, &nfqnl_seq_ops);
+       if (ret < 0)
+               goto out_free;
+       seq = file->private_data;
+       seq->private = is;
+       return ret;
+out_free:
+       kfree(is);
+       return ret;
+}
+
+static struct file_operations nfqnl_file_ops = {
+       .owner   = THIS_MODULE,
+       .open    = nfqnl_open,
+       .read    = seq_read,
+       .llseek  = seq_lseek,
+       .release = seq_release_private,
+};
+
+#endif /* PROC_FS */
+
+static int
+init_or_cleanup(int init)
+{
+       int i, status = -ENOMEM;
+#ifdef CONFIG_PROC_FS
+       struct proc_dir_entry *proc_nfqueue;
+#endif
+       
+       if (!init)
+               goto cleanup;
+
+       for (i = 0; i < INSTANCE_BUCKETS; i++)
+               INIT_HLIST_HEAD(&instance_table[i]);
+
+       netlink_register_notifier(&nfqnl_rtnl_notifier);
+       status = nfnetlink_subsys_register(&nfqnl_subsys);
+       if (status < 0) {
+               printk(KERN_ERR "nf_queue: failed to create netlink socket\n");
+               goto cleanup_netlink_notifier;
+       }
+
+#ifdef CONFIG_PROC_FS
+       proc_nfqueue = create_proc_entry("nfnetlink_queue", 0440,
+                                        proc_net_netfilter);
+       if (!proc_nfqueue)
+               goto cleanup_subsys;
+       proc_nfqueue->proc_fops = &nfqnl_file_ops;
+#endif
+
+       register_netdevice_notifier(&nfqnl_dev_notifier);
+
+       return status;
+
+cleanup:
+       nf_unregister_queue_handlers(&nfqh);
+       unregister_netdevice_notifier(&nfqnl_dev_notifier);
+#ifdef CONFIG_PROC_FS
+       remove_proc_entry("nfnetlink_queue", proc_net_netfilter);
+cleanup_subsys:
+#endif 
+       nfnetlink_subsys_unregister(&nfqnl_subsys);
+cleanup_netlink_notifier:
+       netlink_unregister_notifier(&nfqnl_rtnl_notifier);
+       return status;
+}
+
+static int __init init(void)
+{
+       
+       return init_or_cleanup(1);
+}
+
+static void __exit fini(void)
+{
+       init_or_cleanup(0);
+}
+
+MODULE_DESCRIPTION("netfilter packet queue handler");
+MODULE_AUTHOR("Harald Welte <laforge@netfilter.org>");
+MODULE_LICENSE("GPL");
+MODULE_ALIAS_NFNL_SUBSYS(NFNL_SUBSYS_QUEUE);
+
+module_init(init);
+module_exit(fini);
diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c

index ff774a06c89df46c2c4ce8b0a5d7e6ce07a90f71..62435ffc61846396e6a862dd9907319a1a3b8688 100644 (file)
--- a/net/netlink/af_netlink.c
+++ b/net/netlink/af_netlink.c
@@ -13,7 +13,12 @@
   *                               added netlink_proto_exit
   * Tue Jan 22 18:32:44 BRST 2002 Arnaldo C. de Melo <acme@conectiva.com.br>
   *                              use nlk_sk, as sk->protinfo is on a diet 8)
- *
+ * Fri Jul 22 19:51:12 MEST 2005 Harald Welte <laforge@gnumonks.org>
+ *                              - inc module use count of module that owns
+ *                                the kernel socket in case userspace opens
+ *                                socket of same protocol
+ *                              - remove all module support, since netlink is
+ *                                mandatory if CONFIG_NET=y these days
   */
  
  #include <linux/config.h>
@@ -55,21 +60,29 @@
  #include <net/scm.h>
  
  #define Nprintk(a...)
+#define NLGRPSZ(x)     (ALIGN(x, sizeof(unsigned long) * 8) / 8)
  
  struct netlink_sock {
         /* struct sock has to be the first member of netlink_sock */
         struct sock             sk;
         u32                     pid;
-       unsigned int            groups;
         u32                     dst_pid;
-       unsigned int            dst_groups;
+       u32                     dst_group;
+       u32                     flags;
+       u32                     subscriptions;
+       u32                     ngroups;
+       unsigned long           *groups;
         unsigned long           state;
         wait_queue_head_t       wait;
         struct netlink_callback *cb;
         spinlock_t              cb_lock;
         void                    (*data_ready)(struct sock *sk, int bytes);
+       struct module           *module;
  };
  
+#define NETLINK_KERNEL_SOCKET  0x1
+#define NETLINK_RECV_PKTINFO   0x2
+
  static inline struct netlink_sock *nlk_sk(struct sock *sk)
  {
         return (struct netlink_sock *)sk;
@@ -92,6 +105,9 @@ struct netlink_table {
         struct nl_pid_hash hash;
         struct hlist_head mc_list;
         unsigned int nl_nonroot;
+       unsigned int groups;
+       struct module *module;
+       int registered;
  };
  
  static struct netlink_table *nl_table;
@@ -106,6 +122,11 @@ static atomic_t nl_table_users = ATOMIC_INIT(0);
  
  static struct notifier_block *netlink_chain;
  
+static u32 netlink_group_mask(u32 group)
+{
+       return group ? 1 << (group - 1) : 0;
+}
+
  static struct hlist_head *nl_pid_hashfn(struct nl_pid_hash *hash, u32 pid)
  {
         return &hash->table[jhash_1word(pid, hash->rnd) & hash->mask];
@@ -122,6 +143,7 @@ static void netlink_sock_destruct(struct sock *sk)
         BUG_TRAP(!atomic_read(&sk->sk_rmem_alloc));
         BUG_TRAP(!atomic_read(&sk->sk_wmem_alloc));
         BUG_TRAP(!nlk_sk(sk)->cb);
+       BUG_TRAP(!nlk_sk(sk)->groups);
  }
  
  /* This lock without WQ_FLAG_EXCLUSIVE is good on UP and it is _very_ bad on SMP.
@@ -317,7 +339,7 @@ static void netlink_remove(struct sock *sk)
         netlink_table_grab();
         if (sk_del_node_init(sk))
                 nl_table[sk->sk_protocol].hash.entries--;
-       if (nlk_sk(sk)->groups)
+       if (nlk_sk(sk)->subscriptions)
                 __sk_del_bind_node(sk);
         netlink_table_ungrab();
  }
@@ -328,19 +350,11 @@ static struct proto netlink_proto = {
         .obj_size = sizeof(struct netlink_sock),
  };
  
-static int netlink_create(struct socket *sock, int protocol)
+static int __netlink_create(struct socket *sock, int protocol)
  {
         struct sock *sk;
         struct netlink_sock *nlk;
  
-       sock->state = SS_UNCONNECTED;
-
-       if (sock->type != SOCK_RAW && sock->type != SOCK_DGRAM)
-               return -ESOCKTNOSUPPORT;
-
-       if (protocol<0 || protocol >= MAX_LINKS)
-               return -EPROTONOSUPPORT;
-
         sock->ops = &netlink_ops;
  
         sk = sk_alloc(PF_NETLINK, GFP_KERNEL, &netlink_proto, 1);
@@ -350,15 +364,67 @@ static int netlink_create(struct socket *sock, int protocol)
         sock_init_data(sock, sk);
  
         nlk = nlk_sk(sk);
-
         spin_lock_init(&nlk->cb_lock);
         init_waitqueue_head(&nlk->wait);
-       sk->sk_destruct = netlink_sock_destruct;
  
+       sk->sk_destruct = netlink_sock_destruct;
         sk->sk_protocol = protocol;
         return 0;
  }
  
+static int netlink_create(struct socket *sock, int protocol)
+{
+       struct module *module = NULL;
+       struct netlink_sock *nlk;
+       unsigned int groups;
+       int err = 0;
+
+       sock->state = SS_UNCONNECTED;
+
+       if (sock->type != SOCK_RAW && sock->type != SOCK_DGRAM)
+               return -ESOCKTNOSUPPORT;
+
+       if (protocol<0 || protocol >= MAX_LINKS)
+               return -EPROTONOSUPPORT;
+
+       netlink_lock_table();
+#ifdef CONFIG_KMOD
+       if (!nl_table[protocol].registered) {
+               netlink_unlock_table();
+               request_module("net-pf-%d-proto-%d", PF_NETLINK, protocol);
+               netlink_lock_table();
+       }
+#endif
+       if (nl_table[protocol].registered &&
+           try_module_get(nl_table[protocol].module))
+               module = nl_table[protocol].module;
+       else
+               err = -EPROTONOSUPPORT;
+       groups = nl_table[protocol].groups;
+       netlink_unlock_table();
+
+       if (err || (err = __netlink_create(sock, protocol) < 0))
+               goto out_module;
+
+       nlk = nlk_sk(sock->sk);
+
+       nlk->groups = kmalloc(NLGRPSZ(groups), GFP_KERNEL);
+       if (nlk->groups == NULL) {
+               err = -ENOMEM;
+               goto out_module;
+       }
+       memset(nlk->groups, 0, NLGRPSZ(groups));
+       nlk->ngroups = groups;
+
+       nlk->module = module;
+out:
+       return err;
+
+out_module:
+       module_put(module);
+       goto out;
+}
+
  static int netlink_release(struct socket *sock)
  {
         struct sock *sk = sock->sk;
@@ -387,14 +453,27 @@ static int netlink_release(struct socket *sock)
  
         skb_queue_purge(&sk->sk_write_queue);
  
-       if (nlk->pid && !nlk->groups) {
+       if (nlk->pid && !nlk->subscriptions) {
                 struct netlink_notify n = {
                                                 .protocol = sk->sk_protocol,
                                                 .pid = nlk->pid,
                                           };
                 notifier_call_chain(&netlink_chain, NETLINK_URELEASE, &n);
         }       
-       
+
+       if (nlk->module)
+               module_put(nlk->module);
+
+       if (nlk->flags & NETLINK_KERNEL_SOCKET) {
+               netlink_table_grab();
+               nl_table[sk->sk_protocol].module = NULL;
+               nl_table[sk->sk_protocol].registered = 0;
+               netlink_table_ungrab();
+       }
+
+       kfree(nlk->groups);
+       nlk->groups = NULL;
+
         sock_put(sk);
         return 0;
  }
@@ -443,6 +522,18 @@ static inline int netlink_capable(struct socket *sock, unsigned int flag)
                capable(CAP_NET_ADMIN);
  } 
  
+static void
+netlink_update_subscriptions(struct sock *sk, unsigned int subscriptions)
+{
+       struct netlink_sock *nlk = nlk_sk(sk);
+
+       if (nlk->subscriptions && !subscriptions)
+               __sk_del_bind_node(sk);
+       else if (!nlk->subscriptions && subscriptions)
+               sk_add_bind_node(sk, &nl_table[sk->sk_protocol].mc_list);
+       nlk->subscriptions = subscriptions;
+}
+
  static int netlink_bind(struct socket *sock, struct sockaddr *addr, int addr_len)
  {
         struct sock *sk = sock->sk;
@@ -468,15 +559,14 @@ static int netlink_bind(struct socket *sock, struct sockaddr *addr, int addr_len
                         return err;
         }
  
-       if (!nladdr->nl_groups && !nlk->groups)
+       if (!nladdr->nl_groups && !(u32)nlk->groups[0])
                 return 0;
  
         netlink_table_grab();
-       if (nlk->groups && !nladdr->nl_groups)
-               __sk_del_bind_node(sk);
-       else if (!nlk->groups && nladdr->nl_groups)
-               sk_add_bind_node(sk, &nl_table[sk->sk_protocol].mc_list);
-       nlk->groups = nladdr->nl_groups;
+       netlink_update_subscriptions(sk, nlk->subscriptions +
+                                        hweight32(nladdr->nl_groups) -
+                                        hweight32(nlk->groups[0]));
+       nlk->groups[0] = (nlk->groups[0] & ~0xffffffffUL) | nladdr->nl_groups; 
         netlink_table_ungrab();
  
         return 0;
@@ -493,7 +583,7 @@ static int netlink_connect(struct socket *sock, struct sockaddr *addr,
         if (addr->sa_family == AF_UNSPEC) {
                 sk->sk_state    = NETLINK_UNCONNECTED;
                 nlk->dst_pid    = 0;
-               nlk->dst_groups = 0;
+               nlk->dst_group  = 0;
                 return 0;
         }
         if (addr->sa_family != AF_NETLINK)
@@ -509,7 +599,7 @@ static int netlink_connect(struct socket *sock, struct sockaddr *addr,
         if (err == 0) {
                 sk->sk_state    = NETLINK_CONNECTED;
                 nlk->dst_pid    = nladdr->nl_pid;
-               nlk->dst_groups = nladdr->nl_groups;
+               nlk->dst_group  = ffs(nladdr->nl_groups);
         }
  
         return err;
@@ -527,10 +617,10 @@ static int netlink_getname(struct socket *sock, struct sockaddr *addr, int *addr
  
         if (peer) {
                 nladdr->nl_pid = nlk->dst_pid;
-               nladdr->nl_groups = nlk->dst_groups;
+               nladdr->nl_groups = netlink_group_mask(nlk->dst_group);
         } else {
                 nladdr->nl_pid = nlk->pid;
-               nladdr->nl_groups = nlk->groups;
+               nladdr->nl_groups = nlk->groups[0];
         }
         return 0;
  }
@@ -731,7 +821,8 @@ static inline int do_one_broadcast(struct sock *sk,
         if (p->exclude_sk == sk)
                 goto out;
  
-       if (nlk->pid == p->pid || !(nlk->groups & p->group))
+       if (nlk->pid == p->pid || p->group - 1 >= nlk->ngroups ||
+           !test_bit(p->group - 1, nlk->groups))
                 goto out;
  
         if (p->failure) {
@@ -770,7 +861,7 @@ out:
  }
  
  int netlink_broadcast(struct sock *ssk, struct sk_buff *skb, u32 pid,
-                     u32 group, int allocation)
+                     u32 group, unsigned int __nocast allocation)
  {
         struct netlink_broadcast_data info;
         struct hlist_node *node;
@@ -827,7 +918,8 @@ static inline int do_one_set_err(struct sock *sk,
         if (sk == p->exclude_sk)
                 goto out;
  
-       if (nlk->pid == p->pid || !(nlk->groups & p->group))
+       if (nlk->pid == p->pid || p->group - 1 >= nlk->ngroups ||
+           !test_bit(p->group - 1, nlk->groups))
                 goto out;
  
         sk->sk_err = p->code;
@@ -855,6 +947,94 @@ void netlink_set_err(struct sock *ssk, u32 pid, u32 group, int code)
         read_unlock(&nl_table_lock);
  }
  
+static int netlink_setsockopt(struct socket *sock, int level, int optname,
+                              char __user *optval, int optlen)
+{
+       struct sock *sk = sock->sk;
+       struct netlink_sock *nlk = nlk_sk(sk);
+       int val = 0, err;
+
+       if (level != SOL_NETLINK)
+               return -ENOPROTOOPT;
+
+       if (optlen >= sizeof(int) &&
+           get_user(val, (int __user *)optval))
+               return -EFAULT;
+
+       switch (optname) {
+       case NETLINK_PKTINFO:
+               if (val)
+                       nlk->flags |= NETLINK_RECV_PKTINFO;
+               else
+                       nlk->flags &= ~NETLINK_RECV_PKTINFO;
+               err = 0;
+               break;
+       case NETLINK_ADD_MEMBERSHIP:
+       case NETLINK_DROP_MEMBERSHIP: {
+               unsigned int subscriptions;
+               int old, new = optname == NETLINK_ADD_MEMBERSHIP ? 1 : 0;
+
+               if (!netlink_capable(sock, NL_NONROOT_RECV))
+                       return -EPERM;
+               if (!val || val - 1 >= nlk->ngroups)
+                       return -EINVAL;
+               netlink_table_grab();
+               old = test_bit(val - 1, nlk->groups);
+               subscriptions = nlk->subscriptions - old + new;
+               if (new)
+                       __set_bit(val - 1, nlk->groups);
+               else
+                       __clear_bit(val - 1, nlk->groups);
+               netlink_update_subscriptions(sk, subscriptions);
+               netlink_table_ungrab();
+               err = 0;
+               break;
+       }
+       default:
+               err = -ENOPROTOOPT;
+       }
+       return err;
+}
+
+static int netlink_getsockopt(struct socket *sock, int level, int optname,
+                              char __user *optval, int __user *optlen)
+{
+       struct sock *sk = sock->sk;
+       struct netlink_sock *nlk = nlk_sk(sk);
+       int len, val, err;
+
+       if (level != SOL_NETLINK)
+               return -ENOPROTOOPT;
+
+       if (get_user(len, optlen))
+               return -EFAULT;
+       if (len < 0)
+               return -EINVAL;
+
+       switch (optname) {
+       case NETLINK_PKTINFO:
+               if (len < sizeof(int))
+                       return -EINVAL;
+               len = sizeof(int);
+               val = nlk->flags & NETLINK_RECV_PKTINFO ? 1 : 0;
+               put_user(len, optlen);
+               put_user(val, optval);
+               err = 0;
+               break;
+       default:
+               err = -ENOPROTOOPT;
+       }
+       return err;
+}
+
+static void netlink_cmsg_recv_pktinfo(struct msghdr *msg, struct sk_buff *skb)
+{
+       struct nl_pktinfo info;
+
+       info.group = NETLINK_CB(skb).dst_group;
+       put_cmsg(msg, SOL_NETLINK, NETLINK_PKTINFO, sizeof(info), &info);
+}
+
  static inline void netlink_rcv_wake(struct sock *sk)
  {
         struct netlink_sock *nlk = nlk_sk(sk);
@@ -873,7 +1053,7 @@ static int netlink_sendmsg(struct kiocb *kiocb, struct socket *sock,
         struct netlink_sock *nlk = nlk_sk(sk);
         struct sockaddr_nl *addr=msg->msg_name;
         u32 dst_pid;
-       u32 dst_groups;
+       u32 dst_group;
         struct sk_buff *skb;
         int err;
         struct scm_cookie scm;
@@ -891,12 +1071,12 @@ static int netlink_sendmsg(struct kiocb *kiocb, struct socket *sock,
                 if (addr->nl_family != AF_NETLINK)
                         return -EINVAL;
                 dst_pid = addr->nl_pid;
-               dst_groups = addr->nl_groups;
-               if (dst_groups && !netlink_capable(sock, NL_NONROOT_SEND))
+               dst_group = ffs(addr->nl_groups);
+               if (dst_group && !netlink_capable(sock, NL_NONROOT_SEND))
                         return -EPERM;
         } else {
                 dst_pid = nlk->dst_pid;
-               dst_groups = nlk->dst_groups;
+               dst_group = nlk->dst_group;
         }
  
         if (!nlk->pid) {
@@ -914,9 +1094,8 @@ static int netlink_sendmsg(struct kiocb *kiocb, struct socket *sock,
                 goto out;
  
         NETLINK_CB(skb).pid     = nlk->pid;
-       NETLINK_CB(skb).groups  = nlk->groups;
         NETLINK_CB(skb).dst_pid = dst_pid;
-       NETLINK_CB(skb).dst_groups = dst_groups;
+       NETLINK_CB(skb).dst_group = dst_group;
         NETLINK_CB(skb).loginuid = audit_get_loginuid(current->audit_context);
         memcpy(NETLINK_CREDS(skb), &siocb->scm->creds, sizeof(struct ucred));
  
@@ -938,9 +1117,9 @@ static int netlink_sendmsg(struct kiocb *kiocb, struct socket *sock,
                 goto out;
         }
  
-       if (dst_groups) {
+       if (dst_group) {
                 atomic_inc(&skb->users);
-               netlink_broadcast(sk, skb, dst_pid, dst_groups, GFP_KERNEL);
+               netlink_broadcast(sk, skb, dst_pid, dst_group, GFP_KERNEL);
         }
         err = netlink_unicast(sk, skb, dst_pid, msg->msg_flags&MSG_DONTWAIT);
  
@@ -986,7 +1165,7 @@ static int netlink_recvmsg(struct kiocb *kiocb, struct socket *sock,
                 addr->nl_family = AF_NETLINK;
                 addr->nl_pad    = 0;
                 addr->nl_pid    = NETLINK_CB(skb).pid;
-               addr->nl_groups = NETLINK_CB(skb).dst_groups;
+               addr->nl_groups = netlink_group_mask(NETLINK_CB(skb).dst_group);
                 msg->msg_namelen = sizeof(*addr);
         }
  
@@ -1001,6 +1180,8 @@ static int netlink_recvmsg(struct kiocb *kiocb, struct socket *sock,
                 netlink_dump(sk);
  
         scm_recv(sock, msg, siocb->scm, flags);
+       if (nlk->flags & NETLINK_RECV_PKTINFO)
+               netlink_cmsg_recv_pktinfo(msg, skb);
  
  out:
         netlink_rcv_wake(sk);
@@ -1023,10 +1204,13 @@ static void netlink_data_ready(struct sock *sk, int len)
   */
  
  struct sock *
-netlink_kernel_create(int unit, void (*input)(struct sock *sk, int len))
+netlink_kernel_create(int unit, unsigned int groups,
+                      void (*input)(struct sock *sk, int len),
+                      struct module *module)
  {
         struct socket *sock;
         struct sock *sk;
+       struct netlink_sock *nlk;
  
         if (!nl_table)
                 return NULL;
@@ -1037,20 +1221,31 @@ netlink_kernel_create(int unit, void (*input)(struct sock *sk, int len))
         if (sock_create_lite(PF_NETLINK, SOCK_DGRAM, unit, &sock))
                 return NULL;
  
-       if (netlink_create(sock, unit) < 0) {
-               sock_release(sock);
-               return NULL;
-       }
+       if (__netlink_create(sock, unit) < 0)
+               goto out_sock_release;
+
         sk = sock->sk;
         sk->sk_data_ready = netlink_data_ready;
         if (input)
                 nlk_sk(sk)->data_ready = input;
  
-       if (netlink_insert(sk, 0)) {
-               sock_release(sock);
-               return NULL;
-       }
+       if (netlink_insert(sk, 0))
+               goto out_sock_release;
+
+       nlk = nlk_sk(sk);
+       nlk->flags |= NETLINK_KERNEL_SOCKET;
+
+       netlink_table_grab();
+       nl_table[unit].groups = groups < 32 ? 32 : groups;
+       nl_table[unit].module = module;
+       nl_table[unit].registered = 1;
+       netlink_table_ungrab();
+
         return sk;
+
+out_sock_release:
+       sock_release(sock);
+       return NULL;
  }
  
  void netlink_set_nonroot(int protocol, unsigned int flags)
@@ -1288,7 +1483,8 @@ static int netlink_seq_show(struct seq_file *seq, void *v)
                            s,
                            s->sk_protocol,
                            nlk->pid,
-                          nlk->groups,
+                          nlk->flags & NETLINK_KERNEL_SOCKET ?
+                               0 : (unsigned int)nlk->groups[0],
                            atomic_read(&s->sk_rmem_alloc),
                            atomic_read(&s->sk_wmem_alloc),
                            nlk->cb,
@@ -1362,8 +1558,8 @@ static struct proto_ops netlink_ops = {
         .ioctl =        sock_no_ioctl,
         .listen =       sock_no_listen,
         .shutdown =     sock_no_shutdown,
-       .setsockopt =   sock_no_setsockopt,
-       .getsockopt =   sock_no_getsockopt,
+       .setsockopt =   netlink_setsockopt,
+       .getsockopt =   netlink_getsockopt,
         .sendmsg =      netlink_sendmsg,
         .recvmsg =      netlink_recvmsg,
         .mmap =         sock_no_mmap,
@@ -1438,21 +1634,7 @@ out:
         return err;
  }
  
-static void __exit netlink_proto_exit(void)
-{
-       sock_unregister(PF_NETLINK);
-       proc_net_remove("netlink");
-       kfree(nl_table);
-       nl_table = NULL;
-       proto_unregister(&netlink_proto);
-}
-
  core_initcall(netlink_proto_init);
-module_exit(netlink_proto_exit);
-
-MODULE_LICENSE("GPL");
-
-MODULE_ALIAS_NETPROTO(PF_NETLINK);
  
  EXPORT_SYMBOL(netlink_ack);
  EXPORT_SYMBOL(netlink_broadcast);
diff --git a/net/netrom/af_netrom.c b/net/netrom/af_netrom.c

index 31ed4a9a1d066c0cdef1c3e5b6371fa3678037d4..4b53de982114d242706ba632960781190641c85a 100644 (file)
--- a/net/netrom/af_netrom.c
+++ b/net/netrom/af_netrom.c
@@ -39,7 +39,7 @@
  #include <linux/proc_fs.h>
  #include <linux/seq_file.h>
  #include <net/ip.h>
-#include <net/tcp.h>
+#include <net/tcp_states.h>
  #include <net/arp.h>
  #include <linux/init.h>
  
@@ -459,12 +459,7 @@ static struct sock *nr_make_new(struct sock *osk)
         sk->sk_sndbuf   = osk->sk_sndbuf;
         sk->sk_state    = TCP_ESTABLISHED;
         sk->sk_sleep    = osk->sk_sleep;
-
-       if (sock_flag(osk, SOCK_ZAPPED))
-               sock_set_flag(sk, SOCK_ZAPPED);
-
-       if (sock_flag(osk, SOCK_DBG))
-               sock_set_flag(sk, SOCK_DBG);
+       sock_copy_flags(sk, osk);
  
         skb_queue_head_init(&nr->ack_queue);
         skb_queue_head_init(&nr->reseq_queue);
@@ -541,7 +536,8 @@ static int nr_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
         struct nr_sock *nr = nr_sk(sk);
         struct full_sockaddr_ax25 *addr = (struct full_sockaddr_ax25 *)uaddr;
         struct net_device *dev;
-       ax25_address *user, *source;
+       ax25_uid_assoc *user;
+       ax25_address *source;
  
         lock_sock(sk);
         if (!sock_flag(sk, SOCK_ZAPPED)) {
@@ -580,16 +576,19 @@ static int nr_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
         } else {
                 source = &addr->fsa_ax25.sax25_call;
  
-               if ((user = ax25_findbyuid(current->euid)) == NULL) {
+               user = ax25_findbyuid(current->euid);
+               if (user) {
+                       nr->user_addr   = user->call;
+                       ax25_uid_put(user);
+               } else {
                         if (ax25_uid_policy && !capable(CAP_NET_BIND_SERVICE)) {
                                 release_sock(sk);
                                 dev_put(dev);
                                 return -EPERM;
                         }
-                       user = source;
+                       nr->user_addr   = *source;
                 }
  
-               nr->user_addr   = *user;
                 nr->source_addr = *source;
         }
  
@@ -609,7 +608,8 @@ static int nr_connect(struct socket *sock, struct sockaddr *uaddr,
         struct sock *sk = sock->sk;
         struct nr_sock *nr = nr_sk(sk);
         struct sockaddr_ax25 *addr = (struct sockaddr_ax25 *)uaddr;
-       ax25_address *user, *source = NULL;
+       ax25_address *source = NULL;
+       ax25_uid_assoc *user;
         struct net_device *dev;
  
         lock_sock(sk);
@@ -650,16 +650,19 @@ static int nr_connect(struct socket *sock, struct sockaddr *uaddr,
                 }
                 source = (ax25_address *)dev->dev_addr;
  
-               if ((user = ax25_findbyuid(current->euid)) == NULL) {
+               user = ax25_findbyuid(current->euid);
+               if (user) {
+                       nr->user_addr   = user->call;
+                       ax25_uid_put(user);
+               } else {
                         if (ax25_uid_policy && !capable(CAP_NET_ADMIN)) {
                                 dev_put(dev);
                                 release_sock(sk);
                                 return -EPERM;
                         }
-                       user = source;
+                       nr->user_addr   = *source;
                 }
  
-               nr->user_addr   = *user;
                 nr->source_addr = *source;
                 nr->device      = dev;
  
@@ -855,17 +858,16 @@ int nr_rx_frame(struct sk_buff *skb, struct net_device *dev)
         frametype          = skb->data[19] & 0x0F;
         flags              = skb->data[19] & 0xF0;
  
-#ifdef CONFIG_INET
         /*
          * Check for an incoming IP over NET/ROM frame.
          */
-       if (frametype == NR_PROTOEXT && circuit_index == NR_PROTO_IP && circuit_id == NR_PROTO_IP) {
+       if (frametype == NR_PROTOEXT &&
+           circuit_index == NR_PROTO_IP && circuit_id == NR_PROTO_IP) {
                 skb_pull(skb, NR_NETWORK_LEN + NR_TRANSPORT_LEN);
                 skb->h.raw = skb->data;
  
                 return nr_rx_ip(skb, dev);
         }
-#endif
  
         /*
          * Find an existing socket connection, based on circuit ID, if it's
diff --git a/net/netrom/nr_dev.c b/net/netrom/nr_dev.c

index 220bf7494f71591b59be618f7bcc2fcdc669b741..263da4c26494cb2cf875d22b75ca773be770c8b8 100644 (file)
--- a/net/netrom/nr_dev.c
+++ b/net/netrom/nr_dev.c
@@ -38,8 +38,6 @@
  #include <net/ax25.h>
  #include <net/netrom.h>
  
-#ifdef CONFIG_INET
-
  /*
   *     Only allow IP over NET/ROM frames through if the netrom device is up.
   */
@@ -64,11 +62,12 @@ int nr_rx_ip(struct sk_buff *skb, struct net_device *dev)
         skb->nh.raw   = skb->data;
         skb->pkt_type = PACKET_HOST;
  
-       ip_rcv(skb, skb->dev, NULL);
+       netif_rx(skb);
  
         return 1;
  }
  
+#ifdef CONFIG_INET
  
  static int nr_rebuild_header(struct sk_buff *skb)
  {
diff --git a/net/netrom/nr_in.c b/net/netrom/nr_in.c

index 9c44b379412630dd7b269142b5fd557fca59139a..64b81a7969077c706353e76309a67698380f2095 100644 (file)
--- a/net/netrom/nr_in.c
+++ b/net/netrom/nr_in.c
@@ -22,8 +22,7 @@
  #include <linux/netdevice.h>
  #include <linux/skbuff.h>
  #include <net/sock.h>
-#include <net/tcp.h>
-#include <net/ip.h>                    /* For ip_rcv */
+#include <net/tcp_states.h>
  #include <asm/uaccess.h>
  #include <asm/system.h>
  #include <linux/fcntl.h>
diff --git a/net/netrom/nr_subr.c b/net/netrom/nr_subr.c

index 0627347b14b88d7458b356e24aff9d051d464f75..587bed2674bfb4dcb09e40f13ae0d33b8342e053 100644 (file)
--- a/net/netrom/nr_subr.c
+++ b/net/netrom/nr_subr.c
@@ -21,7 +21,7 @@
  #include <linux/netdevice.h>
  #include <linux/skbuff.h>
  #include <net/sock.h>
-#include <net/tcp.h>
+#include <net/tcp_states.h>
  #include <asm/uaccess.h>
  #include <asm/system.h>
  #include <linux/fcntl.h>
@@ -77,7 +77,7 @@ void nr_requeue_frames(struct sock *sk)
                 if (skb_prev == NULL)
                         skb_queue_head(&sk->sk_write_queue, skb);
                 else
-                       skb_append(skb_prev, skb);
+                       skb_append(skb_prev, skb, &sk->sk_write_queue);
                 skb_prev = skb;
         }
  }
diff --git a/net/netrom/nr_timer.c b/net/netrom/nr_timer.c

index faabda8088be5cba98de33e09b3982805a52fbb2..75b72d389ba95283b4c0f1e9adfc69eb8d3ed2a5 100644 (file)
--- a/net/netrom/nr_timer.c
+++ b/net/netrom/nr_timer.c
@@ -22,7 +22,7 @@
  #include <linux/netdevice.h>
  #include <linux/skbuff.h>
  #include <net/sock.h>
-#include <net/tcp.h>
+#include <net/tcp_states.h>
  #include <asm/uaccess.h>
  #include <asm/system.h>
  #include <linux/fcntl.h>
diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c

index c9d5980aa4de5f16726bd646e2ebc93513fc46e1..ba997095f08f7ba3c670e0de052a694d39da6a11 100644 (file)
--- a/net/packet/af_packet.c
+++ b/net/packet/af_packet.c
@@ -241,7 +241,7 @@ static struct proto_ops packet_ops;
  #ifdef CONFIG_SOCK_PACKET
  static struct proto_ops packet_ops_spkt;
  
-static int packet_rcv_spkt(struct sk_buff *skb, struct net_device *dev,  struct packet_type *pt)
+static int packet_rcv_spkt(struct sk_buff *skb, struct net_device *dev,  struct packet_type *pt, struct net_device *orig_dev)
  {
         struct sock *sk;
         struct sockaddr_pkt *spkt;
@@ -441,7 +441,7 @@ static inline unsigned run_filter(struct sk_buff *skb, struct sock *sk, unsigned
     we will not harm anyone.
   */
  
-static int packet_rcv(struct sk_buff *skb, struct net_device *dev,  struct packet_type *pt)
+static int packet_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt, struct net_device *orig_dev)
  {
         struct sock *sk;
         struct sockaddr_ll *sll;
@@ -546,7 +546,7 @@ drop:
  }
  
  #ifdef CONFIG_PACKET_MMAP
-static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev,  struct packet_type *pt)
+static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt, struct net_device *orig_dev)
  {
         struct sock *sk;
         struct packet_sock *po;
@@ -635,12 +635,12 @@ static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev,  struct pack
         h->tp_snaplen = snaplen;
         h->tp_mac = macoff;
         h->tp_net = netoff;
-       if (skb->stamp.tv_sec == 0) { 
-               do_gettimeofday(&skb->stamp);
+       if (skb->tstamp.off_sec == 0) { 
+               __net_timestamp(skb);
                 sock_enable_timestamp(sk);
         }
-       h->tp_sec = skb->stamp.tv_sec;
-       h->tp_usec = skb->stamp.tv_usec;
+       h->tp_sec = skb_tv_base.tv_sec + skb->tstamp.off_sec;
+       h->tp_usec = skb_tv_base.tv_usec + skb->tstamp.off_usec;
  
         sll = (struct sockaddr_ll*)((u8*)h + TPACKET_ALIGN(sizeof(*h)));
         sll->sll_halen = 0;
diff --git a/net/rose/af_rose.c b/net/rose/af_rose.c

index 7eb6a5bf93ea1961bc0c35e70843a646c106596f..c6e59f84c3ae78594dca71aa480a724cae9a00d0 100644 (file)
--- a/net/rose/af_rose.c
+++ b/net/rose/af_rose.c
@@ -41,7 +41,7 @@
  #include <net/rose.h>
  #include <linux/proc_fs.h>
  #include <linux/seq_file.h>
-#include <net/tcp.h>
+#include <net/tcp_states.h>
  #include <net/ip.h>
  #include <net/arp.h>
  
@@ -556,12 +556,7 @@ static struct sock *rose_make_new(struct sock *osk)
         sk->sk_sndbuf   = osk->sk_sndbuf;
         sk->sk_state    = TCP_ESTABLISHED;
         sk->sk_sleep    = osk->sk_sleep;
-
-       if (sock_flag(osk, SOCK_ZAPPED))
-               sock_set_flag(sk, SOCK_ZAPPED);
-
-       if (sock_flag(osk, SOCK_DBG))
-               sock_set_flag(sk, SOCK_DBG);
+       sock_copy_flags(sk, osk);
  
         init_timer(&rose->timer);
         init_timer(&rose->idletimer);
@@ -631,7 +626,8 @@ static int rose_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
         struct rose_sock *rose = rose_sk(sk);
         struct sockaddr_rose *addr = (struct sockaddr_rose *)uaddr;
         struct net_device *dev;
-       ax25_address *user, *source;
+       ax25_address *source;
+       ax25_uid_assoc *user;
         int n;
  
         if (!sock_flag(sk, SOCK_ZAPPED))
@@ -656,14 +652,17 @@ static int rose_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
  
         source = &addr->srose_call;
  
-       if ((user = ax25_findbyuid(current->euid)) == NULL) {
+       user = ax25_findbyuid(current->euid);
+       if (user) {
+               rose->source_call = user->call;
+               ax25_uid_put(user);
+       } else {
                 if (ax25_uid_policy && !capable(CAP_NET_BIND_SERVICE))
                         return -EACCES;
-               user = source;
+               rose->source_call   = *source;
         }
  
         rose->source_addr   = addr->srose_addr;
-       rose->source_call   = *user;
         rose->device        = dev;
         rose->source_ndigis = addr->srose_ndigis;
  
@@ -690,8 +689,8 @@ static int rose_connect(struct socket *sock, struct sockaddr *uaddr, int addr_le
         struct rose_sock *rose = rose_sk(sk);
         struct sockaddr_rose *addr = (struct sockaddr_rose *)uaddr;
         unsigned char cause, diagnostic;
-       ax25_address *user;
         struct net_device *dev;
+       ax25_uid_assoc *user;
         int n;
  
         if (sk->sk_state == TCP_ESTABLISHED && sock->state == SS_CONNECTING) {
@@ -741,12 +740,14 @@ static int rose_connect(struct socket *sock, struct sockaddr *uaddr, int addr_le
                 if ((dev = rose_dev_first()) == NULL)
                         return -ENETUNREACH;
  
-               if ((user = ax25_findbyuid(current->euid)) == NULL)
+               user = ax25_findbyuid(current->euid);
+               if (!user)
                         return -EINVAL;
  
                 memcpy(&rose->source_addr, dev->dev_addr, ROSE_ADDR_LEN);
-               rose->source_call = *user;
+               rose->source_call = user->call;
                 rose->device      = dev;
+               ax25_uid_put(user);
  
                 rose_insert_socket(sk);         /* Finish the bind */
         }
diff --git a/net/rose/rose_in.c b/net/rose/rose_in.c

index ef475a1bb1ba6d84d91b7db4c1c20045b80cb880..8348d33f1efefceb64472f52577067c3550931bf 100644 (file)
--- a/net/rose/rose_in.c
+++ b/net/rose/rose_in.c
@@ -26,8 +26,7 @@
  #include <linux/netdevice.h>
  #include <linux/skbuff.h>
  #include <net/sock.h>
-#include <net/ip.h>                    /* For ip_rcv */
-#include <net/tcp.h>
+#include <net/tcp_states.h>
  #include <asm/system.h>
  #include <linux/fcntl.h>
  #include <linux/mm.h>
diff --git a/net/rose/rose_route.c b/net/rose/rose_route.c

index ff73ebb912b8ebd0f6887bdb776caa25922d9b5a..4510cd7613ecd851259bd4d6b43dc7637fd54ebf 100644 (file)
--- a/net/rose/rose_route.c
+++ b/net/rose/rose_route.c
@@ -24,7 +24,7 @@
  #include <linux/if_arp.h>
  #include <linux/skbuff.h>
  #include <net/sock.h>
-#include <net/tcp.h>
+#include <net/tcp_states.h>
  #include <asm/system.h>
  #include <asm/uaccess.h>
  #include <linux/fcntl.h>
@@ -994,8 +994,10 @@ int rose_route_frame(struct sk_buff *skb, ax25_cb *ax25)
          *      1. The frame isn't for us,
          *      2. It isn't "owned" by any existing route.
          */
-       if (frametype != ROSE_CALL_REQUEST)     /* XXX */
-               return 0;
+       if (frametype != ROSE_CALL_REQUEST) {   /* XXX */
+               res = 0;
+               goto out;
+       }
  
         len  = (((skb->data[3] >> 4) & 0x0F) + 1) / 2;
         len += (((skb->data[3] >> 0) & 0x0F) + 1) / 2;
diff --git a/net/rose/rose_subr.c b/net/rose/rose_subr.c

index 7db7e1cedc3aed3e0cb07ebb5630df776cbb16ad..a29a3a960fd657efa21c50a13b3936f5ce28e7bb 100644 (file)
--- a/net/rose/rose_subr.c
+++ b/net/rose/rose_subr.c
@@ -21,7 +21,7 @@
  #include <linux/netdevice.h>
  #include <linux/skbuff.h>
  #include <net/sock.h>
-#include <net/tcp.h>
+#include <net/tcp_states.h>
  #include <asm/system.h>
  #include <linux/fcntl.h>
  #include <linux/mm.h>
@@ -74,7 +74,7 @@ void rose_requeue_frames(struct sock *sk)
                 if (skb_prev == NULL)
                         skb_queue_head(&sk->sk_write_queue, skb);
                 else
-                       skb_append(skb_prev, skb);
+                       skb_append(skb_prev, skb, &sk->sk_write_queue);
                 skb_prev = skb;
         }
  }
diff --git a/net/rose/rose_timer.c b/net/rose/rose_timer.c

index 84dd4403f792d3c566cb0625965e8b902bcbbdf6..50ae0371dab872e1ee0b51d11362d8f7de532f8b 100644 (file)
--- a/net/rose/rose_timer.c
+++ b/net/rose/rose_timer.c
@@ -22,7 +22,7 @@
  #include <linux/netdevice.h>
  #include <linux/skbuff.h>
  #include <net/sock.h>
-#include <net/tcp.h>
+#include <net/tcp_states.h>
  #include <asm/system.h>
  #include <linux/fcntl.h>
  #include <linux/mm.h>
diff --git a/net/rxrpc/transport.c b/net/rxrpc/transport.c

index 9bce7794130af0e3b3bad3988cbbf350a51f642a..122c086ee2dbfc00f3cff0a0813919fb303ff03f 100644 (file)
--- a/net/rxrpc/transport.c
+++ b/net/rxrpc/transport.c
@@ -330,7 +330,7 @@ static int rxrpc_incoming_msg(struct rxrpc_transport *trans,
  
         msg->trans = trans;
         msg->state = RXRPC_MSG_RECEIVED;
-       msg->stamp = pkt->stamp;
+       skb_get_timestamp(pkt, &msg->stamp);
         if (msg->stamp.tv_sec == 0) {
                 do_gettimeofday(&msg->stamp); 
                 if (pkt->sk) 
diff --git a/net/sched/Kconfig b/net/sched/Kconfig

index 59d3e71f8b85296c820a0099010d50341d0135fb..45d3bc0812c8b270ea95e7a6c21799d6ec42273b 100644 (file)
--- a/net/sched/Kconfig
+++ b/net/sched/Kconfig
@@ -491,6 +491,7 @@ config NET_EMATCH_TEXT
         depends on NET_EMATCH
         select TEXTSEARCH
         select TEXTSEARCH_KMP
+       select TEXTSEARCH_BM
         select TEXTSEARCH_FSM
         ---help---
           Say Y here if you want to be ablt to classify packets based on
diff --git a/net/sched/act_api.c b/net/sched/act_api.c

index 249c61936ea0391f30bddb767117d928ff958625..8aebe8f6d271b3876e5e6730829c3b4bf1cc4c63 100644 (file)
--- a/net/sched/act_api.c
+++ b/net/sched/act_api.c
@@ -165,7 +165,7 @@ int tcf_action_exec(struct sk_buff *skb, struct tc_action *act,
         while ((a = act) != NULL) {
  repeat:
                 if (a->ops && a->ops->act) {
-                       ret = a->ops->act(&skb, a);
+                       ret = a->ops->act(&skb, a, res);
                         if (TC_MUNGED & skb->tc_verd) {
                                 /* copied already, allow trampling */
                                 skb->tc_verd = SET_TC_OK2MUNGE(skb->tc_verd);
@@ -179,11 +179,6 @@ repeat:
                 act = a->next;
         }
  exec_done:
-       if (skb->tc_classid > 0) {
-               res->classid = skb->tc_classid;
-               res->class = 0;
-               skb->tc_classid = 0;
-       }
         return ret;
  }
  
@@ -598,7 +593,7 @@ static int tca_action_flush(struct rtattr *rta, struct nlmsghdr *n, u32 pid)
         nlh->nlmsg_flags |= NLM_F_ROOT;
         module_put(a->ops->owner);
         kfree(a);
-       err = rtnetlink_send(skb, pid, RTMGRP_TC, n->nlmsg_flags&NLM_F_ECHO);
+       err = rtnetlink_send(skb, pid, RTNLGRP_TC, n->nlmsg_flags&NLM_F_ECHO);
         if (err > 0)
                 return 0;
  
@@ -661,7 +656,7 @@ tca_action_gd(struct rtattr *rta, struct nlmsghdr *n, u32 pid, int event)
  
                 /* now do the delete */
                 tcf_action_destroy(head, 0);
-               ret = rtnetlink_send(skb, pid, RTMGRP_TC,
+               ret = rtnetlink_send(skb, pid, RTNLGRP_TC,
                                      n->nlmsg_flags&NLM_F_ECHO);
                 if (ret > 0)
                         return 0;
@@ -703,9 +698,9 @@ static int tcf_add_notify(struct tc_action *a, u32 pid, u32 seq, int event,
         x->rta_len = skb->tail - (u8*)x;
         
         nlh->nlmsg_len = skb->tail - b;
-       NETLINK_CB(skb).dst_groups = RTMGRP_TC;
+       NETLINK_CB(skb).dst_group = RTNLGRP_TC;
         
-       err = rtnetlink_send(skb, pid, RTMGRP_TC, flags&NLM_F_ECHO);
+       err = rtnetlink_send(skb, pid, RTNLGRP_TC, flags&NLM_F_ECHO);
         if (err > 0)
                 err = 0;
         return err;
diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c

index 3b5714ef4d1af2463d127589d7aa8af91c166384..b4d89fbb378212966b93d9cbc25596ce8add064c 100644 (file)
--- a/net/sched/cls_api.c
+++ b/net/sched/cls_api.c
@@ -367,7 +367,7 @@ static int tfilter_notify(struct sk_buff *oskb, struct nlmsghdr *n,
                 return -EINVAL;
         }
  
-       return rtnetlink_send(skb, pid, RTMGRP_TC, n->nlmsg_flags&NLM_F_ECHO);
+       return rtnetlink_send(skb, pid, RTNLGRP_TC, n->nlmsg_flags&NLM_F_ECHO);
  }
  
  struct tcf_dump_args
diff --git a/net/sched/gact.c b/net/sched/gact.c

index a811c89fef7fd105e98f2b2dadf1518ff3eb0847..d1c6d542912a418a89fe47c67870aed328b029a1 100644 (file)
--- a/net/sched/gact.c
+++ b/net/sched/gact.c
@@ -135,7 +135,7 @@ tcf_gact_cleanup(struct tc_action *a, int bind)
  }
  
  static int
-tcf_gact(struct sk_buff **pskb, struct tc_action *a)
+tcf_gact(struct sk_buff **pskb, struct tc_action *a, struct tcf_result *res)
  {
         struct tcf_gact *p = PRIV(a, gact);
         struct sk_buff *skb = *pskb;
diff --git a/net/sched/ipt.c b/net/sched/ipt.c

index b114d994d5236002c8faaacc740c89138bffe2f7..f50136eed211a3e4b4a8936525e4a17b165f6c0c 100644 (file)
--- a/net/sched/ipt.c
+++ b/net/sched/ipt.c
@@ -201,7 +201,7 @@ tcf_ipt_cleanup(struct tc_action *a, int bind)
  }
  
  static int
-tcf_ipt(struct sk_buff **pskb, struct tc_action *a)
+tcf_ipt(struct sk_buff **pskb, struct tc_action *a, struct tcf_result *res)
  {
         int ret = 0, result = 0;
         struct tcf_ipt *p = PRIV(a, ipt);
diff --git a/net/sched/mirred.c b/net/sched/mirred.c

index f309ce336803480b090e9100d7cc2978aea78b90..20d06916dc0b7a4ac576f4ef06b78dc41484963b 100644 (file)
--- a/net/sched/mirred.c
+++ b/net/sched/mirred.c
@@ -158,7 +158,7 @@ tcf_mirred_cleanup(struct tc_action *a, int bind)
  }
  
  static int
-tcf_mirred(struct sk_buff **pskb, struct tc_action *a)
+tcf_mirred(struct sk_buff **pskb, struct tc_action *a, struct tcf_result *res)
  {
         struct tcf_mirred *p = PRIV(a, mirred);
         struct net_device *dev;
diff --git a/net/sched/pedit.c b/net/sched/pedit.c

index 678be6a645fbf41548d2c120ccd278ed0e03042a..767d24f4610ec8e516ae0ec3847043e70fa2f353 100644 (file)
--- a/net/sched/pedit.c
+++ b/net/sched/pedit.c
@@ -130,7 +130,7 @@ tcf_pedit_cleanup(struct tc_action *a, int bind)
  }
  
  static int
-tcf_pedit(struct sk_buff **pskb, struct tc_action *a)
+tcf_pedit(struct sk_buff **pskb, struct tc_action *a, struct tcf_result *res)
  {
         struct tcf_pedit *p = PRIV(a, pedit);
         struct sk_buff *skb = *pskb;
diff --git a/net/sched/police.c b/net/sched/police.c

index c03545faf5233c23e2b9c0dabbd4f4c17493c9e2..eb39fb2f39b6da90c76a24698289212864dfb354 100644 (file)
--- a/net/sched/police.c
+++ b/net/sched/police.c
@@ -284,7 +284,8 @@ static int tcf_act_police_cleanup(struct tc_action *a, int bind)
         return 0;
  }
  
-static int tcf_act_police(struct sk_buff **pskb, struct tc_action *a)
+static int tcf_act_police(struct sk_buff **pskb, struct tc_action *a,
+                          struct tcf_result *res)
  {
         psched_time_t now;
         struct sk_buff *skb = *pskb;
diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c

index b9a069af4a02f24353b31f144d574cfd8d57904d..737681cb9a928d65800a7e85661b034764683840 100644 (file)
--- a/net/sched/sch_api.c
+++ b/net/sched/sch_api.c
@@ -816,7 +816,7 @@ static int qdisc_notify(struct sk_buff *oskb, struct nlmsghdr *n,
         }
  
         if (skb->len)
-               return rtnetlink_send(skb, pid, RTMGRP_TC, n->nlmsg_flags&NLM_F_ECHO);
+               return rtnetlink_send(skb, pid, RTNLGRP_TC, n->nlmsg_flags&NLM_F_ECHO);
  
  err_out:
         kfree_skb(skb);
@@ -1040,7 +1040,7 @@ static int tclass_notify(struct sk_buff *oskb, struct nlmsghdr *n,
                 return -EINVAL;
         }
  
-       return rtnetlink_send(skb, pid, RTMGRP_TC, n->nlmsg_flags&NLM_F_ECHO);
+       return rtnetlink_send(skb, pid, RTNLGRP_TC, n->nlmsg_flags&NLM_F_ECHO);
  }
  
  struct qdisc_dump_args
diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c

index 8edefd5d095d5019bae647ccc845314de65c5264..99ceb91f0150a158aa926c3c3ebeb13d1860cbd4 100644 (file)
--- a/net/sched/sch_generic.c
+++ b/net/sched/sch_generic.c
@@ -238,6 +238,20 @@ static void dev_watchdog_down(struct net_device *dev)
         spin_unlock_bh(&dev->xmit_lock);
  }
  
+void netif_carrier_on(struct net_device *dev)
+{
+       if (test_and_clear_bit(__LINK_STATE_NOCARRIER, &dev->state))
+               linkwatch_fire_event(dev);
+       if (netif_running(dev))
+               __netdev_watchdog_up(dev);
+}
+
+void netif_carrier_off(struct net_device *dev)
+{
+       if (!test_and_set_bit(__LINK_STATE_NOCARRIER, &dev->state))
+               linkwatch_fire_event(dev);
+}
+
  /* "NOOP" scheduler: the best scheduler, recommended for all interfaces
     under all circumstances. It is difficult to invent anything faster or
     cheaper.
@@ -438,6 +452,7 @@ struct Qdisc * qdisc_create_dflt(struct net_device *dev, struct Qdisc_ops *ops)
         if (!ops->init || ops->init(sch, NULL) == 0)
                 return sch;
  
+       qdisc_destroy(sch);
  errout:
         return NULL;
  }
@@ -599,6 +614,8 @@ void dev_shutdown(struct net_device *dev)
  }
  
  EXPORT_SYMBOL(__netdev_watchdog_up);
+EXPORT_SYMBOL(netif_carrier_on);
+EXPORT_SYMBOL(netif_carrier_off);
  EXPORT_SYMBOL(noop_qdisc);
  EXPORT_SYMBOL(noop_qdisc_ops);
  EXPORT_SYMBOL(qdisc_create_dflt);
diff --git a/net/sched/simple.c b/net/sched/simple.c

index 3ab4c675ab5dad934834cd50ffbabc5192d0ec0b..8a6ae4f491e872dd3b1f381ab29ef98b4656011c 100644 (file)
--- a/net/sched/simple.c
+++ b/net/sched/simple.c
@@ -44,7 +44,7 @@ static DEFINE_RWLOCK(simp_lock);
  #include <net/pkt_act.h>
  #include <net/act_generic.h>
  
-static int tcf_simp(struct sk_buff **pskb, struct tc_action *a)
+static int tcf_simp(struct sk_buff **pskb, struct tc_action *a, struct tcf_result *res)
  {
         struct sk_buff *skb = *pskb;
         struct tcf_defact *p = PRIV(a, defact);
diff --git a/net/sctp/input.c b/net/sctp/input.c

index 742be9171b7df4b1741fad84852f47e6193c03b9..28f32243397f3b1b9755f42fcc70fa824d92dea0 100644 (file)
--- a/net/sctp/input.c
+++ b/net/sctp/input.c
@@ -236,8 +236,8 @@ int sctp_rcv(struct sk_buff *skb)
         }
  
         /* SCTP seems to always need a timestamp right now (FIXME) */
-       if (skb->stamp.tv_sec == 0) {
-               do_gettimeofday(&skb->stamp);
+       if (skb->tstamp.off_sec == 0) {
+               __net_timestamp(skb);
                 sock_enable_timestamp(sk); 
         }
  
diff --git a/net/sctp/ipv6.c b/net/sctp/ipv6.c

index e9b2fd480d6153edd54d6e045140bb4e023bcefd..fa3be2b8fb5ffcaa88ac19e5919f7703461f90ef 100644 (file)
--- a/net/sctp/ipv6.c
+++ b/net/sctp/ipv6.c
@@ -66,8 +66,8 @@
  #include <linux/seq_file.h>
  
  #include <net/protocol.h>
-#include <net/tcp.h>
  #include <net/ndisc.h>
+#include <net/ip.h>
  #include <net/ipv6.h>
  #include <net/transp_v6.h>
  #include <net/addrconf.h>
@@ -641,10 +641,7 @@ static struct sock *sctp_v6_create_accept_sk(struct sock *sk,
         else
                 newinet->pmtudisc = IP_PMTUDISC_WANT;
  
-#ifdef INET_REFCNT_DEBUG
-       atomic_inc(&inet6_sock_nr);
-       atomic_inc(&inet_sock_nr);
-#endif
+       sk_refcnt_debug_inc(newsk);
  
         if (newsk->sk_prot->init(newsk)) {
                 sk_common_release(newsk);
diff --git a/net/sctp/proc.c b/net/sctp/proc.c

index 98d49ec9b74b93a4256f430e83838f2e911baf42..b74f7772b576b131b34b25c33fa544d4d6b84c8d 100644 (file)
--- a/net/sctp/proc.c
+++ b/net/sctp/proc.c
@@ -57,6 +57,7 @@ static struct snmp_mib sctp_snmp_list[] = {
         SNMP_MIB_ITEM("SctpReasmUsrMsgs", SCTP_MIB_REASMUSRMSGS),
         SNMP_MIB_ITEM("SctpOutSCTPPacks", SCTP_MIB_OUTSCTPPACKS),
         SNMP_MIB_ITEM("SctpInSCTPPacks", SCTP_MIB_INSCTPPACKS),
+       SNMP_MIB_SENTINEL
  };
  
  /* Return the current value of a particular entry in the mib by adding its
diff --git a/net/sctp/protocol.c b/net/sctp/protocol.c

index ce9245e71fca9b0694c473c4cc21b7eb8c5ba058..e7025be77691c096d23807fbc75818d00610380e 100644 (file)
--- a/net/sctp/protocol.c
+++ b/net/sctp/protocol.c
@@ -62,7 +62,7 @@
  /* Global data structures. */
  struct sctp_globals sctp_globals;
  struct proc_dir_entry  *proc_net_sctp;
-DEFINE_SNMP_STAT(struct sctp_mib, sctp_statistics);
+DEFINE_SNMP_STAT(struct sctp_mib, sctp_statistics) __read_mostly;
  
  struct idr sctp_assocs_id;
  DEFINE_SPINLOCK(sctp_assocs_id_lock);
@@ -78,8 +78,8 @@ static struct sctp_pf *sctp_pf_inet_specific;
  static struct sctp_af *sctp_af_v4_specific;
  static struct sctp_af *sctp_af_v6_specific;
  
-kmem_cache_t *sctp_chunk_cachep;
-kmem_cache_t *sctp_bucket_cachep;
+kmem_cache_t *sctp_chunk_cachep __read_mostly;
+kmem_cache_t *sctp_bucket_cachep __read_mostly;
  
  extern int sctp_snmp_proc_init(void);
  extern int sctp_snmp_proc_exit(void);
@@ -593,9 +593,7 @@ static struct sock *sctp_v4_create_accept_sk(struct sock *sk,
         newinet->mc_index = 0;
         newinet->mc_list = NULL;
  
-#ifdef INET_REFCNT_DEBUG
-       atomic_inc(&inet_sock_nr);
-#endif
+       sk_refcnt_debug_inc(newsk);
  
         if (newsk->sk_prot->init(newsk)) {
                 sk_common_release(newsk);
@@ -1244,6 +1242,10 @@ SCTP_STATIC __exit void sctp_exit(void)
  module_init(sctp_init);
  module_exit(sctp_exit);
  
+/*
+ * __stringify doesn't likes enums, so use IPPROTO_SCTP value (132) directly.
+ */
+MODULE_ALIAS("net-pf-" __stringify(PF_INET) "-proto-132");
  MODULE_AUTHOR("Linux Kernel SCTP developers <lksctp-developers@lists.sourceforge.net>");
  MODULE_DESCRIPTION("Support for the SCTP protocol (RFC2960)");
  MODULE_LICENSE("GPL");
diff --git a/net/sctp/sm_make_chunk.c b/net/sctp/sm_make_chunk.c

index 00d32b7c8266a055b81d1b6569dbb1c59ef6ac46..3868a8d70cc058332f8918a5bab47b6233530c68 100644 (file)
--- a/net/sctp/sm_make_chunk.c
+++ b/net/sctp/sm_make_chunk.c
@@ -1362,6 +1362,7 @@ struct sctp_association *sctp_unpack_cookie(
         char *key;
         sctp_scope_t scope;
         struct sk_buff *skb = chunk->skb;
+       struct timeval tv;
  
         headersize = sizeof(sctp_chunkhdr_t) + SCTP_SECRET_SIZE;
         bodysize = ntohs(chunk->chunk_hdr->length) - headersize;
@@ -1434,7 +1435,8 @@ no_hmac:
          * an association, there is no need to check cookie's expiration
          * for init collision case of lost COOKIE ACK.
          */
-       if (!asoc && tv_lt(bear_cookie->expiration, skb->stamp)) {
+       skb_get_timestamp(skb, &tv);
+       if (!asoc && tv_lt(bear_cookie->expiration, tv)) {
                 __u16 len;
                 /*
                  * Section 3.3.10.3 Stale Cookie Error (3)
@@ -1447,10 +1449,9 @@ no_hmac:
                 len = ntohs(chunk->chunk_hdr->length);
                 *errp = sctp_make_op_error_space(asoc, chunk, len);
                 if (*errp) {
-                       suseconds_t usecs = (skb->stamp.tv_sec -
+                       suseconds_t usecs = (tv.tv_sec -
                                 bear_cookie->expiration.tv_sec) * 1000000L +
-                               skb->stamp.tv_usec -
-                               bear_cookie->expiration.tv_usec;
+                               tv.tv_usec - bear_cookie->expiration.tv_usec;
  
                         usecs = htonl(usecs);
                         sctp_init_cause(*errp, SCTP_ERROR_STALE_COOKIE,
diff --git a/net/sctp/socket.c b/net/sctp/socket.c

index 091a66f06a35a7518f9f1d2ab8143d0631b6efec..4454afe4727ef2bb95778a5a8193c8b74df6dcd9 100644 (file)
--- a/net/sctp/socket.c
+++ b/net/sctp/socket.c
@@ -4892,7 +4892,7 @@ static void sctp_sock_migrate(struct sock *oldsk, struct sock *newsk,
         sctp_skb_for_each(skb, &oldsk->sk_receive_queue, tmp) {
                 event = sctp_skb2event(skb);
                 if (event->asoc == assoc) {
-                       __skb_unlink(skb, skb->list);
+                       __skb_unlink(skb, &oldsk->sk_receive_queue);
                         __skb_queue_tail(&newsk->sk_receive_queue, skb);
                 }
         }
@@ -4921,7 +4921,7 @@ static void sctp_sock_migrate(struct sock *oldsk, struct sock *newsk,
                 sctp_skb_for_each(skb, &oldsp->pd_lobby, tmp) {
                         event = sctp_skb2event(skb);
                         if (event->asoc == assoc) {
-                               __skb_unlink(skb, skb->list);
+                               __skb_unlink(skb, &oldsp->pd_lobby);
                                 __skb_queue_tail(queue, skb);
                         }
                 }
diff --git a/net/sctp/ulpqueue.c b/net/sctp/ulpqueue.c

index 8bbc279d6c99378934a8f4d2fb3943a04bd70143..ec2c857eae7fecfd6800940975c6f91d4741d995 100644 (file)
--- a/net/sctp/ulpqueue.c
+++ b/net/sctp/ulpqueue.c
@@ -50,9 +50,9 @@
  
  /* Forward declarations for internal helpers.  */
  static struct sctp_ulpevent * sctp_ulpq_reasm(struct sctp_ulpq *ulpq,
-                                               struct sctp_ulpevent *);
+                                             struct sctp_ulpevent *);
  static struct sctp_ulpevent * sctp_ulpq_order(struct sctp_ulpq *,
-                                               struct sctp_ulpevent *);
+                                             struct sctp_ulpevent *);
  
  /* 1st Level Abstractions */
  
@@ -125,7 +125,9 @@ int sctp_ulpq_tail_data(struct sctp_ulpq *ulpq, struct sctp_chunk *chunk,
                 event = sctp_ulpq_order(ulpq, event);
         }
  
-       /* Send event to the ULP.  */
+       /* Send event to the ULP.  'event' is the sctp_ulpevent for
+        * very first SKB on the 'temp' list.
+        */
         if (event)
                 sctp_ulpq_tail_event(ulpq, event);
  
@@ -158,14 +160,18 @@ static int sctp_ulpq_clear_pd(struct sctp_ulpq *ulpq)
         return sctp_clear_pd(ulpq->asoc->base.sk);
  }
  
-
-
+/* If the SKB of 'event' is on a list, it is the first such member
+ * of that list.
+ */
  int sctp_ulpq_tail_event(struct sctp_ulpq *ulpq, struct sctp_ulpevent *event)
  {
         struct sock *sk = ulpq->asoc->base.sk;
-       struct sk_buff_head *queue;
+       struct sk_buff_head *queue, *skb_list;
+       struct sk_buff *skb = sctp_event2skb(event);
         int clear_pd = 0;
  
+       skb_list = (struct sk_buff_head *) skb->prev;
+
         /* If the socket is just going to throw this away, do not
          * even try to deliver it.
          */
@@ -197,10 +203,10 @@ int sctp_ulpq_tail_event(struct sctp_ulpq *ulpq, struct sctp_ulpevent *event)
         /* If we are harvesting multiple skbs they will be
          * collected on a list.
          */
-       if (sctp_event2skb(event)->list)
-               sctp_skb_list_tail(sctp_event2skb(event)->list, queue);
+       if (skb_list)
+               sctp_skb_list_tail(skb_list, queue);
         else
-               __skb_queue_tail(queue, sctp_event2skb(event));
+               __skb_queue_tail(queue, skb);
  
         /* Did we just complete partial delivery and need to get
          * rolling again?  Move pending data to the receive
@@ -214,10 +220,11 @@ int sctp_ulpq_tail_event(struct sctp_ulpq *ulpq, struct sctp_ulpevent *event)
         return 1;
  
  out_free:
-       if (sctp_event2skb(event)->list)
-               sctp_queue_purge_ulpevents(sctp_event2skb(event)->list);
+       if (skb_list)
+               sctp_queue_purge_ulpevents(skb_list);
         else
                 sctp_ulpevent_free(event);
+
         return 0;
  }
  
@@ -269,7 +276,7 @@ static inline void sctp_ulpq_store_reasm(struct sctp_ulpq *ulpq,
   * payload was fragmented on the way and ip had to reassemble them.
   * We add the rest of skb's to the first skb's fraglist.
   */
-static struct sctp_ulpevent *sctp_make_reassembled_event(struct sk_buff *f_frag, struct sk_buff *l_frag)
+static struct sctp_ulpevent *sctp_make_reassembled_event(struct sk_buff_head *queue, struct sk_buff *f_frag, struct sk_buff *l_frag)
  {
         struct sk_buff *pos;
         struct sctp_ulpevent *event;
@@ -294,7 +301,7 @@ static struct sctp_ulpevent *sctp_make_reassembled_event(struct sk_buff *f_frag,
                 skb_shinfo(f_frag)->frag_list = pos;
  
         /* Remove the first fragment from the reassembly queue.  */
-       __skb_unlink(f_frag, f_frag->list);
+       __skb_unlink(f_frag, queue);
         while (pos) {
  
                 pnext = pos->next;
@@ -304,7 +311,7 @@ static struct sctp_ulpevent *sctp_make_reassembled_event(struct sk_buff *f_frag,
                 f_frag->data_len += pos->len;
  
                 /* Remove the fragment from the reassembly queue.  */
-               __skb_unlink(pos, pos->list);
+               __skb_unlink(pos, queue);
         
                 /* Break if we have reached the last fragment.  */
                 if (pos == l_frag)
@@ -375,7 +382,7 @@ static inline struct sctp_ulpevent *sctp_ulpq_retrieve_reassembled(struct sctp_u
  done:
         return retval;
  found:
-       retval = sctp_make_reassembled_event(first_frag, pos);
+       retval = sctp_make_reassembled_event(&ulpq->reasm, first_frag, pos);
         if (retval)
                 retval->msg_flags |= MSG_EOR;
         goto done;
@@ -435,7 +442,7 @@ static inline struct sctp_ulpevent *sctp_ulpq_retrieve_partial(struct sctp_ulpq
          * further.
          */
  done:
-       retval = sctp_make_reassembled_event(first_frag, last_frag);
+       retval = sctp_make_reassembled_event(&ulpq->reasm, first_frag, last_frag);
         if (retval && is_last)
                 retval->msg_flags |= MSG_EOR;
  
@@ -527,7 +534,7 @@ static inline struct sctp_ulpevent *sctp_ulpq_retrieve_first(struct sctp_ulpq *u
          * further.
          */
  done:
-       retval = sctp_make_reassembled_event(first_frag, last_frag);
+       retval = sctp_make_reassembled_event(&ulpq->reasm, first_frag, last_frag);
         return retval;
  }
  
@@ -537,6 +544,7 @@ done:
  static inline void sctp_ulpq_retrieve_ordered(struct sctp_ulpq *ulpq,
                                               struct sctp_ulpevent *event)
  {
+       struct sk_buff_head *event_list;
         struct sk_buff *pos, *tmp;
         struct sctp_ulpevent *cevent;
         struct sctp_stream *in;
@@ -547,6 +555,8 @@ static inline void sctp_ulpq_retrieve_ordered(struct sctp_ulpq *ulpq,
         ssn = event->ssn;
         in  = &ulpq->asoc->ssnmap->in;
  
+       event_list = (struct sk_buff_head *) sctp_event2skb(event)->prev;
+
         /* We are holding the chunks by stream, by SSN.  */
         sctp_skb_for_each(pos, &ulpq->lobby, tmp) {
                 cevent = (struct sctp_ulpevent *) pos->cb;
@@ -567,10 +577,10 @@ static inline void sctp_ulpq_retrieve_ordered(struct sctp_ulpq *ulpq,
                 /* Found it, so mark in the ssnmap. */
                 sctp_ssn_next(in, sid);
  
-               __skb_unlink(pos, pos->list);
+               __skb_unlink(pos, &ulpq->lobby);
  
                 /* Attach all gathered skbs to the event.  */
-               __skb_queue_tail(sctp_event2skb(event)->list, pos);
+               __skb_queue_tail(event_list, pos);
         }
  }
  
@@ -626,7 +636,7 @@ static inline void sctp_ulpq_store_ordered(struct sctp_ulpq *ulpq,
  }
  
  static struct sctp_ulpevent *sctp_ulpq_order(struct sctp_ulpq *ulpq,
-                                               struct sctp_ulpevent *event)
+                                            struct sctp_ulpevent *event)
  {
         __u16 sid, ssn;
         struct sctp_stream *in;
@@ -667,7 +677,7 @@ static inline void sctp_ulpq_reap_ordered(struct sctp_ulpq *ulpq)
  {
         struct sk_buff *pos, *tmp;
         struct sctp_ulpevent *cevent;
-       struct sctp_ulpevent *event = NULL;
+       struct sctp_ulpevent *event;
         struct sctp_stream *in;
         struct sk_buff_head temp;
         __u16 csid, cssn;
@@ -675,6 +685,8 @@ static inline void sctp_ulpq_reap_ordered(struct sctp_ulpq *ulpq)
         in  = &ulpq->asoc->ssnmap->in;
  
         /* We are holding the chunks by stream, by SSN.  */
+       skb_queue_head_init(&temp);
+       event = NULL;
         sctp_skb_for_each(pos, &ulpq->lobby, tmp) {
                 cevent = (struct sctp_ulpevent *) pos->cb;
                 csid = cevent->stream;
@@ -686,19 +698,20 @@ static inline void sctp_ulpq_reap_ordered(struct sctp_ulpq *ulpq)
                 /* Found it, so mark in the ssnmap. */         
                 sctp_ssn_next(in, csid);
  
-               __skb_unlink(pos, pos->list);
+               __skb_unlink(pos, &ulpq->lobby);
                 if (!event) {                                           
                         /* Create a temporary list to collect chunks on.  */
                         event = sctp_skb2event(pos);
-                       skb_queue_head_init(&temp);
                         __skb_queue_tail(&temp, sctp_event2skb(event));
                 } else {
                         /* Attach all gathered skbs to the event.  */
-                       __skb_queue_tail(sctp_event2skb(event)->list, pos);
+                       __skb_queue_tail(&temp, pos);
                 }
         }
  
-       /* Send event to the ULP.  */
+       /* Send event to the ULP.  'event' is the sctp_ulpevent for
+        * very first SKB on the 'temp' list.
+        */
         if (event)
                 sctp_ulpq_tail_event(ulpq, event);
  }
diff --git a/net/socket.c b/net/socket.c

index 6f2a178819726b7b878aa22a6d79954b61c1e239..94fe638b4d72b3721cbda637573c7182f5535b32 100644 (file)
--- a/net/socket.c
+++ b/net/socket.c
@@ -70,6 +70,8 @@
  #include <linux/seq_file.h>
  #include <linux/wanrouter.h>
  #include <linux/if_bridge.h>
+#include <linux/if_frad.h>
+#include <linux/if_vlan.h>
  #include <linux/init.h>
  #include <linux/poll.h>
  #include <linux/cache.h>
@@ -272,7 +274,7 @@ int move_addr_to_user(void *kaddr, int klen, void __user *uaddr, int __user *ule
  
  #define SOCKFS_MAGIC 0x534F434B
  
-static kmem_cache_t * sock_inode_cachep;
+static kmem_cache_t * sock_inode_cachep __read_mostly;
  
  static struct inode *sock_alloc_inode(struct super_block *sb)
  {
@@ -331,7 +333,7 @@ static struct super_block *sockfs_get_sb(struct file_system_type *fs_type,
         return get_sb_pseudo(fs_type, "socket:", &sockfs_ops, SOCKFS_MAGIC);
  }
  
-static struct vfsmount *sock_mnt;
+static struct vfsmount *sock_mnt __read_mostly;
  
  static struct file_system_type sock_fs_type = {
         .name =         "sockfs",
@@ -404,6 +406,7 @@ int sock_map_fd(struct socket *sock)
                 file->f_mode = FMODE_READ | FMODE_WRITE;
                 file->f_flags = O_RDWR;
                 file->f_pos = 0;
+               file->private_data = sock;
                 fd_install(fd, file);
         }
  
@@ -436,6 +439,9 @@ struct socket *sockfd_lookup(int fd, int *err)
                 return NULL;
         }
  
+       if (file->f_op == &socket_file_ops)
+               return file->private_data;      /* set in sock_map_fd */
+
         inode = file->f_dentry->d_inode;
         if (!S_ISSOCK(inode->i_mode)) {
                 *err = -ENOTSOCK;
@@ -720,8 +726,8 @@ static ssize_t sock_aio_write(struct kiocb *iocb, const char __user *ubuf,
         return __sock_sendmsg(iocb, sock, &x->async_msg, size);
  }
  
-ssize_t sock_sendpage(struct file *file, struct page *page,
-                     int offset, size_t size, loff_t *ppos, int more)
+static ssize_t sock_sendpage(struct file *file, struct page *page,
+                            int offset, size_t size, loff_t *ppos, int more)
  {
         struct socket *sock;
         int flags;
@@ -944,7 +950,7 @@ static int sock_mmap(struct file * file, struct vm_area_struct * vma)
         return sock->ops->mmap(file, sock, vma);
  }
  
-int sock_close(struct inode *inode, struct file *filp)
+static int sock_close(struct inode *inode, struct file *filp)
  {
         /*
          *      It was possible the inode is NULL we were 
@@ -2023,9 +2029,6 @@ int sock_unregister(int family)
         return 0;
  }
  
-
-extern void sk_init(void);
-
  void __init sock_init(void)
  {
         /*
diff --git a/net/sunrpc/auth_gss/gss_krb5_crypto.c b/net/sunrpc/auth_gss/gss_krb5_crypto.c

index 24c21f2a33a7b9bfa53d06de59455f0cca172579..5a7265aeaf839c160ee047d9f319b0763fdefe82 100644 (file)
--- a/net/sunrpc/auth_gss/gss_krb5_crypto.c
+++ b/net/sunrpc/auth_gss/gss_krb5_crypto.c
@@ -185,9 +185,7 @@ make_checksum(s32 cksumtype, char *header, int hdrlen, struct xdr_buf *body,
                         sg->page = body->pages[i];
                         sg->offset = offset;
                         sg->length = thislen;
-                       kmap(sg->page); /* XXX kmap_atomic? */
                         crypto_digest_update(tfm, sg, 1);
-                       kunmap(sg->page);
                         len -= thislen;
                         i++;
                         offset = 0;
diff --git a/net/sunrpc/rpc_pipe.c b/net/sunrpc/rpc_pipe.c

index 554f224c0445b11e8e38377226934ca7e9c8b0e5..fe1a73ce6cffef7e639c1bebbc62e82fb010469d 100644 (file)
--- a/net/sunrpc/rpc_pipe.c
+++ b/net/sunrpc/rpc_pipe.c
@@ -28,13 +28,13 @@
  #include <linux/workqueue.h>
  #include <linux/sunrpc/rpc_pipe_fs.h>
  
-static struct vfsmount *rpc_mount;
+static struct vfsmount *rpc_mount __read_mostly;
  static int rpc_mount_count;
  
  static struct file_system_type rpc_pipe_fs_type;
  
  
-static kmem_cache_t *rpc_inode_cachep;
+static kmem_cache_t *rpc_inode_cachep __read_mostly;
  
  #define RPC_UPCALL_TIMEOUT (30*HZ)
  
diff --git a/net/sunrpc/sched.c b/net/sunrpc/sched.c

index 2d9eb7fbd5213323f870fa25286512e466a7e764..f3104035e35d446828a0d29b92e947dfb7dfdcc5 100644 (file)
--- a/net/sunrpc/sched.c
+++ b/net/sunrpc/sched.c
@@ -34,10 +34,10 @@ static int                  rpc_task_id;
  #define RPC_BUFFER_MAXSIZE     (2048)
  #define RPC_BUFFER_POOLSIZE    (8)
  #define RPC_TASK_POOLSIZE      (8)
-static kmem_cache_t    *rpc_task_slabp;
-static kmem_cache_t    *rpc_buffer_slabp;
-static mempool_t       *rpc_task_mempool;
-static mempool_t       *rpc_buffer_mempool;
+static kmem_cache_t    *rpc_task_slabp __read_mostly;
+static kmem_cache_t    *rpc_buffer_slabp __read_mostly;
+static mempool_t       *rpc_task_mempool __read_mostly;
+static mempool_t       *rpc_buffer_mempool __read_mostly;
  
  static void                    __rpc_default_timer(struct rpc_task *task);
  static void                    rpciod_killall(void);
diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c

index 56db8f13e6cb40c4cf9ecb765923993564c16c7e..05fe2e735538e15999a5b018f19b5c8474573dc7 100644 (file)
--- a/net/sunrpc/svcsock.c
+++ b/net/sunrpc/svcsock.c
@@ -34,7 +34,7 @@
  #include <net/sock.h>
  #include <net/checksum.h>
  #include <net/ip.h>
-#include <net/tcp.h>
+#include <net/tcp_states.h>
  #include <asm/uaccess.h>
  #include <asm/ioctls.h>
  
@@ -584,13 +584,16 @@ svc_udp_recvfrom(struct svc_rqst *rqstp)
                 /* possibly an icmp error */
                 dprintk("svc: recvfrom returned error %d\n", -err);
         }
-       if (skb->stamp.tv_sec == 0) {
-               skb->stamp.tv_sec = xtime.tv_sec; 
-               skb->stamp.tv_usec = xtime.tv_nsec * 1000; 
+       if (skb->tstamp.off_sec == 0) {
+               struct timeval tv;
+
+               tv.tv_sec = xtime.tv_sec;
+               tv.tv_usec = xtime.tv_nsec * 1000;
+               skb_set_timestamp(skb, &tv);
                 /* Don't enable netstamp, sunrpc doesn't 
                    need that much accuracy */
         }
-       svsk->sk_sk->sk_stamp = skb->stamp;
+       skb_get_timestamp(skb, &svsk->sk_sk->sk_stamp);
         set_bit(SK_DATA, &svsk->sk_flags); /* there may be more data... */
  
         /*
diff --git a/net/sunrpc/xdr.c b/net/sunrpc/xdr.c

index 8a4d9c106af1b37b1120a2adb53e21f7fb3dc317..fde16f40a581dfb64e725e694afb4df5090693ed 100644 (file)
--- a/net/sunrpc/xdr.c
+++ b/net/sunrpc/xdr.c
@@ -993,6 +993,7 @@ xdr_xcode_array2(struct xdr_buf *buf, unsigned int base,
                         return -EINVAL;
         } else {
                 if (xdr_decode_word(buf, base, &desc->array_len) != 0 ||
+                   desc->array_len > desc->array_maxlen ||
                     (unsigned long) base + 4 + desc->array_len *
                                     desc->elem_size > buf->len)
                         return -EINVAL;
diff --git a/net/sysctl_net.c b/net/sysctl_net.c

index 3f6e31069c547ebb8b7a850da018f1a3e1169234..c5241fcbb9662451918242557022da60ff003ca0 100644 (file)
--- a/net/sysctl_net.c
+++ b/net/sysctl_net.c
@@ -17,17 +17,15 @@
  #include <linux/sysctl.h>
  
  #ifdef CONFIG_INET
-extern struct ctl_table ipv4_table[];
+#include <net/ip.h>
  #endif
  
-extern struct ctl_table core_table[];
-
  #ifdef CONFIG_NET
-extern struct ctl_table ether_table[];
+#include <linux/if_ether.h>
  #endif
  
  #ifdef CONFIG_TR
-extern struct ctl_table tr_table[];
+#include <linux/if_tr.h>
  #endif
  
  struct ctl_table net_table[] = {
diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c

index d403e34088ad75c1c12ee5fbba7bf8d90e233cfc..41feca3bef86346f3890f8c428c795ff7316abf5 100644 (file)
--- a/net/unix/af_unix.c
+++ b/net/unix/af_unix.c
@@ -105,7 +105,7 @@
  #include <linux/skbuff.h>
  #include <linux/netdevice.h>
  #include <net/sock.h>
-#include <linux/tcp.h>
+#include <net/tcp_states.h>
  #include <net/af_unix.h>
  #include <linux/proc_fs.h>
  #include <linux/seq_file.h>
@@ -2026,14 +2026,6 @@ static struct net_proto_family unix_family_ops = {
         .owner  = THIS_MODULE,
  };
  
-#ifdef CONFIG_SYSCTL
-extern void unix_sysctl_register(void);
-extern void unix_sysctl_unregister(void);
-#else
-static inline void unix_sysctl_register(void) {}
-static inline void unix_sysctl_unregister(void) {}
-#endif
-
  static int __init af_unix_init(void)
  {
         int rc = -1;
diff --git a/net/unix/garbage.c b/net/unix/garbage.c

index 4bd95c8f5934a5ca77d265bfdf01e046339ca634..6ffc64e1712d291f40723109db1a774e507c738c 100644 (file)
--- a/net/unix/garbage.c
+++ b/net/unix/garbage.c
@@ -76,11 +76,11 @@
  #include <linux/netdevice.h>
  #include <linux/file.h>
  #include <linux/proc_fs.h>
-#include <linux/tcp.h>
  
  #include <net/sock.h>
  #include <net/af_unix.h>
  #include <net/scm.h>
+#include <net/tcp_states.h>
  
  /* Internal data structures and random procedures: */
  
@@ -286,16 +286,16 @@ void unix_gc(void)
                         skb = skb_peek(&s->sk_receive_queue);
                         while (skb &&
                                skb != (struct sk_buff *)&s->sk_receive_queue) {
-                               nextsk=skb->next;
+                               nextsk = skb->next;
                                 /*
                                  *      Do we have file descriptors ?
                                  */
-                               if(UNIXCB(skb).fp)
-                               {
-                                       __skb_unlink(skb, skb->list);
-                                       __skb_queue_tail(&hitlist,skb);
+                               if (UNIXCB(skb).fp) {
+                                       __skb_unlink(skb,
+                                                    &s->sk_receive_queue);
+                                       __skb_queue_tail(&hitlist, skb);
                                 }
-                               skb=nextsk;
+                               skb = nextsk;
                         }
                         spin_unlock(&s->sk_receive_queue.lock);
                 }
diff --git a/net/unix/sysctl_net_unix.c b/net/unix/sysctl_net_unix.c

index c974dac4580a6e4bc0170103af79d83a9e7c4dcb..690ffa5d5bfbf5bd5e01e0bf0fa61ca263f68b10 100644 (file)
--- a/net/unix/sysctl_net_unix.c
+++ b/net/unix/sysctl_net_unix.c
@@ -12,7 +12,7 @@
  #include <linux/mm.h>
  #include <linux/sysctl.h>
  
-extern int sysctl_unix_max_dgram_qlen;
+#include <net/af_unix.h>
  
  static ctl_table unix_table[] = {
         {
diff --git a/net/wanrouter/af_wanpipe.c b/net/wanrouter/af_wanpipe.c

index d93b19faaab7f9b4b1e3fee8caa0975aae9b3eaa..596cb96e5f471227c348ce214d46dce63f5cf4a7 100644 (file)
--- a/net/wanrouter/af_wanpipe.c
+++ b/net/wanrouter/af_wanpipe.c
@@ -57,7 +57,7 @@
  #include <linux/wanpipe.h>
  #include <linux/if_wanpipe.h>
  #include <linux/pkt_sched.h>
-#include <linux/tcp.h>
+#include <linux/tcp_states.h>
  #include <linux/if_wanpipe_common.h>
  #include <linux/sdla_x25.h>
  
diff --git a/net/x25/af_x25.c b/net/x25/af_x25.c

index 04bec047fa9ab5910d7ef7c134e734f178e42aa0..020d73cc8414916f6a0bf65b2fbe675ec5759ead 100644 (file)
--- a/net/x25/af_x25.c
+++ b/net/x25/af_x25.c
@@ -47,7 +47,7 @@
  #include <linux/if_arp.h>
  #include <linux/skbuff.h>
  #include <net/sock.h>
-#include <net/tcp.h>
+#include <net/tcp_states.h>
  #include <asm/uaccess.h>
  #include <linux/fcntl.h>
  #include <linux/termios.h>     /* For TIOCINQ/OUTQ */
diff --git a/net/x25/x25_dev.c b/net/x25/x25_dev.c

index 36fc3bf6d8827c4a47de80900ebee0a5ab54e7fa..adfe7b8df35591d29ee2d8a91de8bca57e9a3052 100644 (file)
--- a/net/x25/x25_dev.c
+++ b/net/x25/x25_dev.c
@@ -81,7 +81,7 @@ static int x25_receive_data(struct sk_buff *skb, struct x25_neigh *nb)
  }
  
  int x25_lapb_receive_frame(struct sk_buff *skb, struct net_device *dev,
-                          struct packet_type *ptype)
+                          struct packet_type *ptype, struct net_device *orig_dev)
  {
         struct sk_buff *nskb;
         struct x25_neigh *nb;
diff --git a/net/x25/x25_in.c b/net/x25/x25_in.c

index b0197c70a9fc5fd2bd1ffb46462490f7636b8417..26146874b839eedb667327b67ed85c624834409c 100644 (file)
--- a/net/x25/x25_in.c
+++ b/net/x25/x25_in.c
@@ -28,7 +28,7 @@
  #include <linux/string.h>
  #include <linux/skbuff.h>
  #include <net/sock.h>
-#include <net/tcp.h>
+#include <net/tcp_states.h>
  #include <net/x25.h>
  
  static int x25_queue_rx_frame(struct sock *sk, struct sk_buff *skb, int more)
diff --git a/net/x25/x25_subr.c b/net/x25/x25_subr.c

index 7fd872ad0c20a2acbb709e5cb8ca9c22bae7ebac..8be9b8fbc24d143e6d3df6c9dff14f005a6b4cfe 100644 (file)
--- a/net/x25/x25_subr.c
+++ b/net/x25/x25_subr.c
@@ -27,7 +27,7 @@
  #include <linux/string.h>
  #include <linux/skbuff.h>
  #include <net/sock.h>
-#include <net/tcp.h>
+#include <net/tcp_states.h>
  #include <net/x25.h>
  
  /*
@@ -80,7 +80,7 @@ void x25_requeue_frames(struct sock *sk)
                 if (!skb_prev)
                         skb_queue_head(&sk->sk_write_queue, skb);
                 else
-                       skb_append(skb_prev, skb);
+                       skb_append(skb_prev, skb, &sk->sk_write_queue);
                 skb_prev = skb;
         }
  }
diff --git a/net/x25/x25_timer.c b/net/x25/x25_timer.c

index d6a21a3ad80e78b2b9299b4d52817266b1cc1b9f..0a92e1da3922dc802b51cc93dd0d9f8a0a9d2f22 100644 (file)
--- a/net/x25/x25_timer.c
+++ b/net/x25/x25_timer.c
@@ -23,7 +23,7 @@
  #include <linux/jiffies.h>
  #include <linux/timer.h>
  #include <net/sock.h>
-#include <net/tcp.h>
+#include <net/tcp_states.h>
  #include <net/x25.h>
  
  static void x25_heartbeat_expiry(unsigned long);
diff --git a/net/xfrm/xfrm_input.c b/net/xfrm/xfrm_input.c

index c58a6f05a0b66366411b7725faa6422b9c645611..2407a7072327f9156ea6e5771762bb6adc0f072b 100644 (file)
--- a/net/xfrm/xfrm_input.c
+++ b/net/xfrm/xfrm_input.c
@@ -12,7 +12,7 @@
  #include <net/ip.h>
  #include <net/xfrm.h>
  
-static kmem_cache_t *secpath_cachep;
+static kmem_cache_t *secpath_cachep __read_mostly;
  
  void __secpath_destroy(struct sec_path *sp)
  {
diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c

index d65ed8684fc1a340b1f470e3d3f50fc2b940a60f..83c8135e17641dd19bd5fdb204dff3096bdd2eeb 100644 (file)
--- a/net/xfrm/xfrm_policy.c
+++ b/net/xfrm/xfrm_policy.c
@@ -37,7 +37,7 @@ EXPORT_SYMBOL(xfrm_policy_list);
  static DEFINE_RWLOCK(xfrm_policy_afinfo_lock);
  static struct xfrm_policy_afinfo *xfrm_policy_afinfo[NPROTO];
  
-static kmem_cache_t *xfrm_dst_cache;
+static kmem_cache_t *xfrm_dst_cache __read_mostly;
  
  static struct work_struct xfrm_policy_gc_work;
  static struct list_head xfrm_policy_gc_list =
diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c

index 8da3e25b2c4c1f305fd85428d3a9eb62b543bfba..c35336a0f71b5c50cca286fc2eca45043d205807 100644 (file)
--- a/net/xfrm/xfrm_user.c
+++ b/net/xfrm/xfrm_user.c
@@ -1125,9 +1125,8 @@ static int xfrm_exp_state_notify(struct xfrm_state *x, struct km_event *c)
         if (build_expire(skb, x, c->data.hard) < 0)
                 BUG();
  
-       NETLINK_CB(skb).dst_groups = XFRMGRP_EXPIRE;
-
-       return netlink_broadcast(xfrm_nl, skb, 0, XFRMGRP_EXPIRE, GFP_ATOMIC);
+       NETLINK_CB(skb).dst_group = XFRMNLGRP_EXPIRE;
+       return netlink_broadcast(xfrm_nl, skb, 0, XFRMNLGRP_EXPIRE, GFP_ATOMIC);
  }
  
  static int xfrm_notify_sa_flush(struct km_event *c)
@@ -1152,7 +1151,8 @@ static int xfrm_notify_sa_flush(struct km_event *c)
  
         nlh->nlmsg_len = skb->tail - b;
  
-       return netlink_broadcast(xfrm_nl, skb, 0, XFRMGRP_SA, GFP_ATOMIC);
+       NETLINK_CB(skb).dst_group = XFRMNLGRP_SA;
+       return netlink_broadcast(xfrm_nl, skb, 0, XFRMNLGRP_SA, GFP_ATOMIC);
  
  nlmsg_failure:
         kfree_skb(skb);
@@ -1226,7 +1226,8 @@ static int xfrm_notify_sa(struct xfrm_state *x, struct km_event *c)
  
         nlh->nlmsg_len = skb->tail - b;
  
-       return netlink_broadcast(xfrm_nl, skb, 0, XFRMGRP_SA, GFP_ATOMIC);
+       NETLINK_CB(skb).dst_group = XFRMNLGRP_SA;
+       return netlink_broadcast(xfrm_nl, skb, 0, XFRMNLGRP_SA, GFP_ATOMIC);
  
  nlmsg_failure:
  rtattr_failure:
@@ -1304,9 +1305,8 @@ static int xfrm_send_acquire(struct xfrm_state *x, struct xfrm_tmpl *xt,
         if (build_acquire(skb, x, xt, xp, dir) < 0)
                 BUG();
  
-       NETLINK_CB(skb).dst_groups = XFRMGRP_ACQUIRE;
-
-       return netlink_broadcast(xfrm_nl, skb, 0, XFRMGRP_ACQUIRE, GFP_ATOMIC);
+       NETLINK_CB(skb).dst_group = XFRMNLGRP_ACQUIRE;
+       return netlink_broadcast(xfrm_nl, skb, 0, XFRMNLGRP_ACQUIRE, GFP_ATOMIC);
  }
  
  /* User gives us xfrm_user_policy_info followed by an array of 0
@@ -1405,9 +1405,8 @@ static int xfrm_exp_policy_notify(struct xfrm_policy *xp, int dir, struct km_eve
         if (build_polexpire(skb, xp, dir, c->data.hard) < 0)
                 BUG();
  
-       NETLINK_CB(skb).dst_groups = XFRMGRP_EXPIRE;
-
-       return netlink_broadcast(xfrm_nl, skb, 0, XFRMGRP_EXPIRE, GFP_ATOMIC);
+       NETLINK_CB(skb).dst_group = XFRMNLGRP_EXPIRE;
+       return netlink_broadcast(xfrm_nl, skb, 0, XFRMNLGRP_EXPIRE, GFP_ATOMIC);
  }
  
  static int xfrm_notify_policy(struct xfrm_policy *xp, int dir, struct km_event *c)
@@ -1455,7 +1454,8 @@ static int xfrm_notify_policy(struct xfrm_policy *xp, int dir, struct km_event *
  
         nlh->nlmsg_len = skb->tail - b;
  
-       return netlink_broadcast(xfrm_nl, skb, 0, XFRMGRP_POLICY, GFP_ATOMIC);
+       NETLINK_CB(skb).dst_group = XFRMNLGRP_POLICY;
+       return netlink_broadcast(xfrm_nl, skb, 0, XFRMNLGRP_POLICY, GFP_ATOMIC);
  
  nlmsg_failure:
  rtattr_failure:
@@ -1480,7 +1480,8 @@ static int xfrm_notify_policy_flush(struct km_event *c)
  
         nlh->nlmsg_len = skb->tail - b;
  
-       return netlink_broadcast(xfrm_nl, skb, 0, XFRMGRP_POLICY, GFP_ATOMIC);
+       NETLINK_CB(skb).dst_group = XFRMNLGRP_POLICY;
+       return netlink_broadcast(xfrm_nl, skb, 0, XFRMNLGRP_POLICY, GFP_ATOMIC);
  
  nlmsg_failure:
         kfree_skb(skb);
@@ -1519,7 +1520,8 @@ static int __init xfrm_user_init(void)
  {
         printk(KERN_INFO "Initializing IPsec netlink socket\n");
  
-       xfrm_nl = netlink_kernel_create(NETLINK_XFRM, xfrm_netlink_rcv);
+       xfrm_nl = netlink_kernel_create(NETLINK_XFRM, XFRMNLGRP_MAX,
+                                       xfrm_netlink_rcv, THIS_MODULE);
         if (xfrm_nl == NULL)
                 return -ENOMEM;
  
@@ -1537,3 +1539,4 @@ static void __exit xfrm_user_exit(void)
  module_init(xfrm_user_init);
  module_exit(xfrm_user_exit);
  MODULE_LICENSE("GPL");
+MODULE_ALIAS_NET_PF_PROTO(PF_NETLINK, NETLINK_XFRM);
diff --git a/scripts/mod/file2alias.c b/scripts/mod/file2alias.c

index 5180405c1a844efa5a42ed8032c0059c506f5768..d8ee38aede26fee4cb8b78d8bd99467267c80da6 100644 (file)
--- a/scripts/mod/file2alias.c
+++ b/scripts/mod/file2alias.c
@@ -341,6 +341,22 @@ static int do_of_entry (const char *filename, struct of_device_id *of, char *ali
      return 1;
  }
  
+static int do_vio_entry(const char *filename, struct vio_device_id *vio,
+               char *alias)
+{
+       char *tmp;
+
+       sprintf(alias, "vio:T%sS%s", vio->type[0] ? vio->type : "*",
+                       vio->compat[0] ? vio->compat : "*");
+
+       /* Replace all whitespace with underscores */
+       for (tmp = alias; tmp && *tmp; tmp++)
+               if (isspace (*tmp))
+                       *tmp = '_';
+
+       return 1;
+}
+
  /* Ignore any prefix, eg. v850 prepends _ */
  static inline int sym_is(const char *symbol, const char *name)
  {
@@ -422,6 +438,9 @@ void handle_moddevtable(struct module *mod, struct elf_info *info,
          else if (sym_is(symname, "__mod_of_device_table"))
                 do_table(symval, sym->st_size, sizeof(struct of_device_id),
                          do_of_entry, mod);
+        else if (sym_is(symname, "__mod_vio_device_table"))
+               do_table(symval, sym->st_size, sizeof(struct vio_device_id),
+                        do_vio_entry, mod);
  
  }
  
diff --git a/scripts/mod/modpost.c b/scripts/mod/modpost.c

index 9b9f94c915d234365c14444872c5ea5e1163bd6d..09ffca54b3734136aa46afcfcbaeb279e80960d9 100644 (file)
--- a/scripts/mod/modpost.c
+++ b/scripts/mod/modpost.c
@@ -359,11 +359,16 @@ handle_modversions(struct module *mod, struct elf_info *info,
                 /* ignore __this_module, it will be resolved shortly */
                 if (strcmp(symname, MODULE_SYMBOL_PREFIX "__this_module") == 0)
                         break;
-#ifdef STT_REGISTER
+/* cope with newer glibc (2.3.4 or higher) STT_ definition in elf.h */
+#if defined(STT_REGISTER) || defined(STT_SPARC_REGISTER)
+/* add compatibility with older glibc */
+#ifndef STT_SPARC_REGISTER
+#define STT_SPARC_REGISTER STT_REGISTER
+#endif
                 if (info->hdr->e_machine == EM_SPARC ||
                     info->hdr->e_machine == EM_SPARCV9) {
                         /* Ignore register directives. */
-                       if (ELF_ST_TYPE(sym->st_info) == STT_REGISTER)
+                       if (ELF_ST_TYPE(sym->st_info) == STT_SPARC_REGISTER)
                                 break;
                 }
  #endif
diff --git a/security/keys/keyctl.c b/security/keys/keyctl.c

index fea262860ea01656dbe7514b4b4a301ca47424b6..a6516a64b297898923ca760e2e3b779880021c30 100644 (file)
--- a/security/keys/keyctl.c
+++ b/security/keys/keyctl.c
@@ -49,9 +49,6 @@ asmlinkage long sys_add_key(const char __user *_type,
                 goto error;
         type[31] = '\0';
  
-       if (!type[0])
-               goto error;
-
         ret = -EPERM;
         if (type[0] == '.')
                 goto error;
@@ -144,6 +141,10 @@ asmlinkage long sys_request_key(const char __user *_type,
                 goto error;
         type[31] = '\0';
  
+       ret = -EPERM;
+       if (type[0] == '.')
+               goto error;
+
         /* pull the description into kernel space */
         ret = -EFAULT;
         dlen = strnlen_user(_description, PAGE_SIZE - 1);
@@ -362,7 +363,7 @@ long keyctl_revoke_key(key_serial_t id)
  
         key_put(key);
   error:
-       return 0;
+       return ret;
  
  } /* end keyctl_revoke_key() */
  
@@ -685,6 +686,8 @@ long keyctl_read_key(key_serial_t keyid, char __user *buffer, size_t buflen)
                         goto can_read_key2;
  
                 ret = PTR_ERR(skey);
+               if (ret == -EAGAIN)
+                       ret = -EACCES;
                 goto error2;
         }
  
diff --git a/security/keys/keyring.c b/security/keys/keyring.c

index a1f6bac647a1c3a673bfbb2b4b03d0556cc9be88..9c208c756df8136cbaa0a06f5442af60c712ae6d 100644 (file)
--- a/security/keys/keyring.c
+++ b/security/keys/keyring.c
@@ -201,7 +201,11 @@ static void keyring_destroy(struct key *keyring)
  
         if (keyring->description) {
                 write_lock(&keyring_name_lock);
-               list_del(&keyring->type_data.link);
+
+               if (keyring->type_data.link.next != NULL &&
+                   !list_empty(&keyring->type_data.link))
+                       list_del(&keyring->type_data.link);
+
                 write_unlock(&keyring_name_lock);
         }
  
diff --git a/security/keys/process_keys.c b/security/keys/process_keys.c

index 9b0369c5a223acbf951178e87ebbb0789458b507..c089f78fb94ec170dbd042f08a4a61b9915c526e 100644 (file)
--- a/security/keys/process_keys.c
+++ b/security/keys/process_keys.c
@@ -678,7 +678,7 @@ long join_session_keyring(const char *name)
                 keyring = keyring_alloc(name, tsk->uid, tsk->gid, 0, NULL);
                 if (IS_ERR(keyring)) {
                         ret = PTR_ERR(keyring);
-                       goto error;
+                       goto error2;
                 }
         }
         else if (IS_ERR(keyring)) {
diff --git a/security/keys/request_key.c b/security/keys/request_key.c

index dfcd983af1fd88405d28c7af2a002e3e141759e5..90c1506d007cc219c17ebd9c571d94b9c6a58ed9 100644 (file)
--- a/security/keys/request_key.c
+++ b/security/keys/request_key.c
@@ -405,7 +405,7 @@ struct key *request_key_and_link(struct key_type *type,
                 key_user_put(user);
  
                 /* link the new key into the appropriate keyring */
-               if (!PTR_ERR(key))
+               if (!IS_ERR(key))
                         request_key_link(key, dest_keyring);
         }
  
diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c

index 2253f388234ff7b2e031c5d30da29f5f0c1a6831..8641f8894b4c0aa458238ca8c9195a0495c9238d 100644 (file)
--- a/security/selinux/hooks.c
+++ b/security/selinux/hooks.c
@@ -659,7 +659,7 @@ static inline u16 socket_type_to_security_class(int family, int type, int protoc
                         return SECCLASS_NETLINK_ROUTE_SOCKET;
                 case NETLINK_FIREWALL:
                         return SECCLASS_NETLINK_FIREWALL_SOCKET;
-               case NETLINK_TCPDIAG:
+               case NETLINK_INET_DIAG:
                         return SECCLASS_NETLINK_TCPDIAG_SOCKET;
                 case NETLINK_NFLOG:
                         return SECCLASS_NETLINK_NFLOG_SOCKET;
diff --git a/security/selinux/netlink.c b/security/selinux/netlink.c

index 18d08acafa7827485161bee36f7a4ea16ca9587d..e203883406dd662bda61e5f9a55c5d1bcbbdbeeb 100644 (file)
--- a/security/selinux/netlink.c
+++ b/security/selinux/netlink.c
@@ -80,7 +80,8 @@ static void selnl_notify(int msgtype, void *data)
         nlh = NLMSG_PUT(skb, 0, 0, msgtype, len);
         selnl_add_payload(nlh, len, msgtype, data);
         nlh->nlmsg_len = skb->tail - tmp;
-       netlink_broadcast(selnl, skb, 0, SELNL_GRP_AVC, GFP_USER);
+       NETLINK_CB(skb).dst_group = SELNLGRP_AVC;
+       netlink_broadcast(selnl, skb, 0, SELNLGRP_AVC, GFP_USER);
  out:
         return;
         
@@ -103,7 +104,8 @@ void selnl_notify_policyload(u32 seqno)
  
  static int __init selnl_init(void)
  {
-       selnl = netlink_kernel_create(NETLINK_SELINUX, NULL);
+       selnl = netlink_kernel_create(NETLINK_SELINUX, SELNLGRP_MAX, NULL,
+                                     THIS_MODULE);
         if (selnl == NULL)
                 panic("SELinux:  Cannot create netlink socket.");
         netlink_set_nonroot(NETLINK_SELINUX, NL_NONROOT_RECV);  
diff --git a/security/selinux/nlmsgtab.c b/security/selinux/nlmsgtab.c

index 92b057becb4b63abe6cb16d477998d3fcdc79fdb..69b9329b2054e1b74c0529a94991673055442305 100644 (file)
--- a/security/selinux/nlmsgtab.c
+++ b/security/selinux/nlmsgtab.c
@@ -16,7 +16,7 @@
  #include <linux/rtnetlink.h>
  #include <linux/if.h>
  #include <linux/netfilter_ipv4/ip_queue.h>
-#include <linux/tcp_diag.h>
+#include <linux/inet_diag.h>
  #include <linux/xfrm.h>
  #include <linux/audit.h>
  
@@ -76,6 +76,7 @@ static struct nlmsg_perm nlmsg_firewall_perms[] =
  static struct nlmsg_perm nlmsg_tcpdiag_perms[] =
  {
         { TCPDIAG_GETSOCK,      NETLINK_TCPDIAG_SOCKET__NLMSG_READ },
+       { DCCPDIAG_GETSOCK,     NETLINK_TCPDIAG_SOCKET__NLMSG_READ },
  };
  
  static struct nlmsg_perm nlmsg_xfrm_perms[] =
diff --git a/sound/Kconfig b/sound/Kconfig

index ee794ae06040a2cf768092f79300ab4f8f1cf4fe..b65ee4701f98fab7d6564d9cee6d771455aa3603 100644 (file)
--- a/sound/Kconfig
+++ b/sound/Kconfig
@@ -77,7 +77,7 @@ source "sound/parisc/Kconfig"
  endmenu
  
  menu "Open Sound System"
-       depends on SOUND!=n && (BROKEN || (!SPARC32 && !SPARC64))
+       depends on SOUND!=n
  
  config SOUND_PRIME
         tristate "Open Sound System (DEPRECATED)"
diff --git a/sound/core/Makefile b/sound/core/Makefile

index 764ac184b2232bd7e7810efc5c8f05a244263e07..969d75528bdeb9189f5ff9fdc8aaddac165bf1f5 100644 (file)
--- a/sound/core/Makefile
+++ b/sound/core/Makefile
@@ -5,7 +5,7 @@
  
  snd-objs     := sound.o init.o memory.o info.o control.o misc.o \
                  device.o wrappers.o
-ifeq ($(CONFIG_ISA),y)
+ifeq ($(CONFIG_ISA_DMA_API),y)
  snd-objs     += isadma.o
  endif
  ifeq ($(CONFIG_SND_OSSEMUL),y)
diff --git a/sound/core/sound.c b/sound/core/sound.c

index 7612884f530b17abfe554a867e55309b45a2a815..3271e9245490b81d3b3f6c3d9d3e6b54ec66612d 100644 (file)
--- a/sound/core/sound.c
+++ b/sound/core/sound.c
@@ -432,7 +432,7 @@ EXPORT_SYMBOL(snd_device_new);
  EXPORT_SYMBOL(snd_device_register);
  EXPORT_SYMBOL(snd_device_free);
    /* isadma.c */
-#ifdef CONFIG_ISA
+#ifdef CONFIG_ISA_DMA_API
  EXPORT_SYMBOL(snd_dma_program);
  EXPORT_SYMBOL(snd_dma_disable);
  EXPORT_SYMBOL(snd_dma_pointer);
diff --git a/sound/isa/Kconfig b/sound/isa/Kconfig

index 148a856a43ad7631207804137347ba6cd26184ca..be4ea60a367924b0808651f5a542aa0021940804 100644 (file)
--- a/sound/isa/Kconfig
+++ b/sound/isa/Kconfig
@@ -1,7 +1,7 @@
  # ALSA ISA drivers
  
  menu "ISA devices"
-       depends on SND!=n && ISA
+       depends on SND!=n && ISA && ISA_DMA_API
  
  config SND_AD1848_LIB
          tristate
diff --git a/sound/oss/Kconfig b/sound/oss/Kconfig

index 7bd95ceab7cc62ab59339a4854dc0bc834b0eebd..953e5f3ea03d6bb990584f58bed059b6f8420c25 100644 (file)
--- a/sound/oss/Kconfig
+++ b/sound/oss/Kconfig
@@ -6,7 +6,7 @@
  # Prompt user for primary drivers.
  config SOUND_BT878
         tristate "BT878 audio dma"
-       depends on SOUND_PRIME
+       depends on SOUND_PRIME && PCI
         ---help---
           Audio DMA support for bt878 based grabber boards.  As you might have
           already noticed, bt878 is listed with two functions in /proc/pci.
@@ -80,14 +80,14 @@ config SOUND_EMU10K1
  
  config MIDI_EMU10K1
         bool "Creative SBLive! MIDI (EXPERIMENTAL)"
-       depends on SOUND_EMU10K1 && EXPERIMENTAL
+       depends on SOUND_EMU10K1 && EXPERIMENTAL && ISA_DMA_API
         help
           Say Y if you want to be able to use the OSS /dev/sequencer
           interface.  This code is still experimental.
  
  config SOUND_FUSION
         tristate "Crystal SoundFusion (CS4280/461x)"
-       depends on SOUND_PRIME
+       depends on SOUND_PRIME && PCI
         help
           This module drives the Crystal SoundFusion devices (CS4280/46xx
           series) when wired as native sound drivers with AC97 codecs.  If
@@ -95,7 +95,7 @@ config SOUND_FUSION
  
  config SOUND_CS4281
         tristate "Crystal Sound CS4281"
-       depends on SOUND_PRIME
+       depends on SOUND_PRIME && PCI
         help
           Picture and feature list at
           <http://www.pcbroker.com/crystal4281.html>.
@@ -179,7 +179,7 @@ config SOUND_HARMONY
  
  config SOUND_SONICVIBES
         tristate "S3 SonicVibes"
-       depends on SOUND_PRIME
+       depends on SOUND_PRIME && PCI
         help
           Say Y or M if you have a PCI sound card utilizing the S3
           SonicVibes chipset. To find out if your sound card uses a
@@ -226,7 +226,7 @@ config SOUND_AU1550_AC97
  
  config SOUND_TRIDENT
         tristate "Trident 4DWave DX/NX, SiS 7018 or ALi 5451 PCI Audio Core"
-       depends on SOUND_PRIME
+       depends on SOUND_PRIME && PCI
         ---help---
           Say Y or M if you have a PCI sound card utilizing the Trident
           4DWave-DX/NX chipset or your mother board chipset has SiS 7018
@@ -503,7 +503,7 @@ config SOUND_VIA82CXXX
  
  config MIDI_VIA82CXXX
         bool "VIA 82C686 MIDI"
-       depends on SOUND_VIA82CXXX
+       depends on SOUND_VIA82CXXX && ISA_DMA_API
         help
           Answer Y to use the MIDI interface of the Via686. You may need to
           enable this in the BIOS before it will work. This is for connection
@@ -512,7 +512,7 @@ config MIDI_VIA82CXXX
  
  config SOUND_OSS
         tristate "OSS sound modules"
-       depends on SOUND_PRIME
+       depends on SOUND_PRIME && ISA_DMA_API
         help
           OSS is the Open Sound System suite of sound card drivers.  They make
           sound programming easier since they provide a common API.  Say Y or
diff --git a/sound/oss/Makefile b/sound/oss/Makefile

index db9afb61d6ffb49e02ff3bf78c6dec67c664e618..9bf3ee544d86e62987c2d23e20bea83b5bfcd0c2 100644 (file)
--- a/sound/oss/Makefile
+++ b/sound/oss/Makefile
@@ -80,7 +80,7 @@ obj-$(CONFIG_SOUND_ALI5455)   += ali5455.o ac97_codec.o
  obj-$(CONFIG_SOUND_IT8172)     += ite8172.o ac97_codec.o
  obj-$(CONFIG_SOUND_FORTE)      += forte.o ac97_codec.o
  
-obj-$(CONFIG_SOUND_AD1980)     += ac97_plugin_ad1980.o
+obj-$(CONFIG_SOUND_AD1980)     += ac97_plugin_ad1980.o ac97_codec.o
  obj-$(CONFIG_SOUND_WM97XX)     += ac97_plugin_wm97xx.o
  
  ifeq ($(CONFIG_MIDI_EMU10K1),y)
diff --git a/sound/oss/i810_audio.c b/sound/oss/i810_audio.c

index 7e9f667cf7a7176b15e3b5e24dfda480db54c033..b9a640fe48b10c857867a6c86854a78fee0a73fc 100644 (file)
--- a/sound/oss/i810_audio.c
+++ b/sound/oss/i810_audio.c
@@ -3430,9 +3430,9 @@ out_iospace:
                 release_mem_region(card->iobase_mmio_phys, 256);
         }
  out_pio:       
-       release_region(card->iobase, 64);
-out_region2:
         release_region(card->ac97base, 256);
+out_region2:
+       release_region(card->iobase, 64);
  out_region1:
         pci_free_consistent(pci_dev, sizeof(struct i810_channel)*NR_HW_CH,
             card->channel, card->chandma);
diff --git a/sound/oss/vidc.h b/sound/oss/vidc.h

index bab7044572d38b02572e99e3294562e9698fc9db..d5b8064dc5650bebe99cae1fcb23ec60a4215719 100644 (file)
--- a/sound/oss/vidc.h
+++ b/sound/oss/vidc.h
@@ -10,10 +10,6 @@
   *  VIDC sound function prototypes
   */
  
-/* vidc.c */
-
-extern int vidc_busy;
-
  /* vidc_fill.S */
  
  /*
diff --git a/sound/pci/Kconfig b/sound/pci/Kconfig

index 6d7a00f34d822cc3171e4518f02a8271a1303104..26b42bb20a0a180f0493e670be5ee218a09f4c17 100644 (file)
--- a/sound/pci/Kconfig
+++ b/sound/pci/Kconfig
@@ -314,7 +314,7 @@ config SND_YMFPCI
  
  config SND_ALS4000
         tristate "Avance Logic ALS4000"
-       depends on SND
+       depends on SND && ISA_DMA_API
         select SND_OPL3_LIB
         select SND_MPU401_UART
         select SND_PCM
diff --git a/sound/ppc/pmac.c b/sound/ppc/pmac.c

index 844d76152ea271028654cdcc4767c41d2db0dffc..c89e82eb06a6df91840c5863e8804d643db94e09 100644 (file)
--- a/sound/ppc/pmac.c
+++ b/sound/ppc/pmac.c
@@ -765,7 +765,8 @@ snd_pmac_ctrl_intr(int irq, void *devid, struct pt_regs *regs)
   */
  static void snd_pmac_sound_feature(pmac_t *chip, int enable)
  {
-       ppc_md.feature_call(PMAC_FTR_SOUND_CHIP_ENABLE, chip->node, 0, enable);
+       if (ppc_md.feature_call)
+               ppc_md.feature_call(PMAC_FTR_SOUND_CHIP_ENABLE, chip->node, 0, enable);
  }
  
  /*
author	Linus Torvalds <torvalds@g5.osdl.org>
	Tue, 30 Aug 2005 14:45:15 +0000 (07:45 -0700)
committer	Linus Torvalds <torvalds@g5.osdl.org>
	Tue, 30 Aug 2005 14:45:15 +0000 (07:45 -0700)