--- linux-2.6.11.6/drivers/md/bitmap.h.orig	Sun Jun 12 00:11:44 2005
+++ linux-2.6.11.6/drivers/md/bitmap.h	Sun Jun 12 00:11:44 2005
@@ -0,0 +1,62 @@
+#ifndef BITMAP_H
+#define BITMAP_H 1
+
+struct bitmap_page {
+        /*
+         * If a page is missing then we use a per
+         * page pending write count instead. pages is the number of
+         * 4k pages in the map.
+         */ 
+        char * map;
+        /*
+         * more precise count per zone (1/16 page), for emergencies.
+         */
+        short *zoneinfo;
+        /*
+         * count of dirty bits on the page
+         */ 
+        unsigned short  count;
+};
+
+struct bitmap {
+	struct bitmap_page * bp;  // array of size pages
+	unsigned long pages;      // maximum #pages, mapping 32k blocks each
+
+	int (*start) (struct bitmap * bitmap, __u64 events);
+	void (*stop) (struct bitmap * bitmap);
+	int (*testbits) (struct bitmap * bitmap, sector_t shift, unsigned long bits);
+	int (*setbits) (struct bitmap * bitmap, sector_t shift, unsigned long bits);
+	int (*clearbits) (struct bitmap * bitmap, sector_t shift, unsigned long bits);
+	int (*active) (struct bitmap * bitmap);
+
+        /* bitmap spinlock */
+	rwlock_t lock;
+
+#define BITMAP_ACTIVE 0x01
+        unsigned long flags;
+
+        /*
+         * events count at startup of the bitmap
+         */
+        __u64 events;
+
+        /*
+         * number of missing zoneinfo sections
+         */
+        unsigned long missing_zones;
+        unsigned long missing_pages;
+
+        /*
+         * pages currently in the map
+         */
+        unsigned long current_pages;
+        unsigned long alloced_pages;
+
+	void (*print_stats) (struct bitmap * bitmap);
+};
+
+
+int bitmap_init(struct bitmap * bitmap, unsigned long long bits);
+void bitmap_destr(struct bitmap * bitmap);
+
+#endif
--- linux-2.6.11.6/drivers/md/bitmap.c.orig	Sun Jun 12 00:11:44 2005
+++ linux-2.6.11.6/drivers/md/bitmap.c	Sun Jun 12 00:11:44 2005
@@ -0,0 +1,887 @@
+/*
+ * bitmap.c two-level bitmap (C) Peter T. Breuer (ptb@ot.uc3m.es) 2003
+ *
+ * bitmap_init   - sets nr blks
+ * bitmap->start - then calls the setup part for the 1st
+ *                 level in the bitmap, which uses memory (kmalloc) so
+ *                 can fail. You should examine the return value. 0 is
+ *                 OK. -ve is FAIL.
+ *
+ * bitmap->stop  - inverse to bitmap->start. kfrees the memory claimed in 
+ *                 bitmap_init.
+ *
+ * bitmap_detr   - inverse to init. Detroys pages, etc.
+ */
+
+#include <linux/module.h>
+#include <linux/version.h>
+#include <linux/errno.h>
+#include <linux/proc_fs.h>  // PTB for kmalloc! How?
+#include <linux/vmalloc.h>  // PTB for vmalloc
+#include <linux/init.h>
+#include <linux/config.h>
+
+# define DEBUG 1
+
+#include "bitmap.h"
+
+/* use 16 bits of the address as extra bitmap */
+#define ZONESHIFT 4
+/* top 16 bits are nonzero */
+#define IS_ADDRESS(x) \
+  ((( ((unsigned long)(x)) >> ((sizeof(char*)<<3) - (1<<ZONESHIFT))) ) != 0)
+
+#ifndef PRINTK
+#  if DEBUG > 0 
+#    define PRINTK(x...) printk(x)
+#  else
+#    define PRINTK(x...)
+#  endif
+#endif
+
+/* cache of ready to go pages */
+static kmem_cache_t * bitmap_page_cache;
+
+/*
+ * replaces kfree on bitmap pages.
+ */
+static void
+bitmap_free_page(struct bitmap * bitmap, unsigned char * page) {
+        
+        if (!page)
+                return;
+
+        kmem_cache_free(bitmap_page_cache, (void *)page);
+        bitmap->alloced_pages--;
+}
+
+static void
+bitmap_print_stats(struct bitmap * bitmap) {
+        printk(KERN_INFO "bitmap: %lu pages in use\n",
+                bitmap->current_pages);
+        printk(KERN_INFO "bitmap: %lu pages allocated\n",
+                bitmap->alloced_pages);
+        printk(KERN_INFO "bitmap: %lu pages pre-allocated \n",
+                bitmap->alloced_pages - bitmap->current_pages);
+}
+
+/*
+ * mark bitmap inactive and maybe prune the page cache
+ */
+static void
+bitmap_stop(struct bitmap * bitmap) {
+
+        write_lock(&bitmap->lock);
+        bitmap->flags &= ~BITMAP_ACTIVE;
+        write_unlock(&bitmap->lock);
+}
+
+/*
+ * frees mamory kmalloced in bitmap_init
+ */
+void
+bitmap_destr(struct bitmap *bitmap) {
+
+        unsigned long k;
+        struct bitmap_page * bp;
+        unsigned long pages;
+
+        printk(KERN_DEBUG "destroying bitmap %p\n", bitmap);
+
+        bitmap_stop(bitmap);
+        bitmap_print_stats(bitmap);
+
+        write_lock(&bitmap->lock);
+        bitmap->flags &= ~BITMAP_ACTIVE;
+        bp = bitmap->bp;
+        pages = bitmap->pages;
+        bitmap->bp = NULL;
+        bitmap->missing_pages = pages;
+        bitmap->current_pages = 0;
+        bitmap->alloced_pages = 0;
+        bitmap->missing_zones = pages;
+        write_unlock(&bitmap->lock);
+
+        if (bp) {
+                for (k = 0; k < pages; k++) {
+                        if (IS_ADDRESS(bp[k].map)) {
+                                bitmap_free_page (bitmap, bp[k].map);
+                                bp[k].map = NULL;
+                        }
+                        if (IS_ADDRESS(bp[k].zoneinfo)) {
+                                kfree (bp[k].zoneinfo);
+                                bp[k].zoneinfo = NULL;
+                        }
+                }
+                vfree (bp);
+        }
+
+        bitmap_stop(bitmap);
+
+        MOD_DEC_USE_COUNT;
+        THIS_MODULE->unsafe = 0;
+}
+
+/* 
+ * tests if the bitmap is marked active (has been started)
+ * Returns the events count on the bitmap.
+ */
+static int
+bitmap_active(struct bitmap * bitmap) {
+        int res = 0;
+        if (!bitmap)
+                return res;
+        read_lock(&bitmap->lock);
+        if (bitmap->flags & BITMAP_ACTIVE) {
+            res = (unsigned int)bitmap->events;
+            if (res <= 0)
+                res = 1;
+        } else {
+            res = 0;
+        }
+        read_unlock(&bitmap->lock);
+        return res;
+}
+
+/*
+ * replaces kmalloc for bitmap pages.
+ */
+static unsigned char *
+bitmap_alloc_page(struct bitmap *bitmap) {
+        unsigned char *page;
+
+        page = kmem_cache_alloc(bitmap_page_cache, GFP_KERNEL);
+        /* PTB zeroing is done by the constructor and only
+         *     clean pages are returned to the cache
+         */
+        if (page)
+                bitmap->alloced_pages++;
+        return page;
+}
+
+
+
+/* 
+ * marks the bitmap active and primes the free page cache.
+ */
+static int
+bitmap_start(struct bitmap * bitmap, u64 events) {
+
+        struct bitmap_page * bp;
+        unsigned long pages;
+        
+        /* take lock to read data */
+        write_lock(&bitmap->lock);
+        pages = bitmap->pages;
+        bp    = bitmap->bp;
+        write_unlock(&bitmap->lock);
+
+        if (!bp) {
+
+                int k;
+
+                bp = vmalloc (pages * sizeof(*bp));
+                if (!bp) {
+                        printk(KERN_WARNING "bitmap: cannot get %luB of memory!\n",
+                                pages * sizeof(*bp));
+                        return -ENOMEM;
+                }
+                memset (bp, 0, pages * sizeof(*bp));
+
+                for (k = 0; k < pages; k++) {
+                        if (bp[k].zoneinfo)
+                                continue;
+                        bp[k].zoneinfo =
+                                kmalloc (sizeof(*bp[k].zoneinfo)<<ZONESHIFT,
+                                       GFP_KERNEL);
+                        if (bp[k].zoneinfo)
+                                bitmap->missing_zones--;
+                }
+                if (bitmap->missing_zones > 0) {
+                        printk(KERN_WARNING "bitmap: warning! cannot get %ld*%uB memory!\n",
+                              bitmap->missing_zones,
+                              sizeof(*bp->zoneinfo) << ZONESHIFT);
+                }
+        }
+
+        /*
+         * this is 16 shorts or 32 bytes + 4 bytes extra per page of 4096
+         * bytes, which is a reserve of less that 0.1%. But each page
+         * bitmaps 32MB of disk, so a 1GB disk takes 32 pages or 128KB, and
+         * a 1TB disk takes 128MB of pages. In those circumstances,
+         * adding a capital cost of about 108KB doesn't seem bad.
+         */
+                
+
+        write_lock(&bitmap->lock);
+        bitmap->bp = bp;
+
+        bitmap->flags |= BITMAP_ACTIVE;
+        bitmap->events = events;
+        write_unlock(&bitmap->lock);
+
+        /* seed the page cache */
+        bitmap_free_page(bitmap, bitmap_alloc_page(bitmap));
+
+        return 0;
+}
+
+/*
+ * Test if the page is in the map, and bring it in if not. 
+ *
+ * Return 0 for success.
+ */
+static int
+bitmap_checkpage (struct bitmap *bitmap, unsigned long page)
+{
+        unsigned char * mappage;
+
+        read_lock(&bitmap->lock);
+        if (page < 0 || page >= bitmap->pages) {
+                read_unlock(&bitmap->lock);
+		return -EINVAL;
+        }
+
+        if (bitmap->bp == NULL) {
+                read_unlock(&bitmap->lock);
+		return -ENODEV;
+        }
+
+
+        if (IS_ADDRESS(bitmap->bp[page].map)) {
+                read_unlock(&bitmap->lock);
+		return 0;
+        }
+        read_unlock(&bitmap->lock);
+
+        /* the page address was NULL */
+
+	if ((mappage = bitmap_alloc_page(bitmap)) == NULL) {
+                /* failed - check to see if we have backup counters */
+                void * tmp;
+                int need_new_zoneinfo = 0;
+
+                write_lock(&bitmap->lock);
+                if (!IS_ADDRESS(bitmap->bp[page].zoneinfo))
+                        need_new_zoneinfo = 1;
+                write_unlock(&bitmap->lock);
+
+                if (need_new_zoneinfo) {
+                        /* rarely, we might make an extra backup counter */
+                        tmp = kmalloc(sizeof(*bitmap->bp->zoneinfo)<<ZONESHIFT,
+                                    GFP_KERNEL);
+                        if (tmp) {
+
+                                write_lock(&bitmap->lock);
+                                if (IS_ADDRESS(bitmap->bp[page].zoneinfo)) {
+                                    /* somebody else made it first, backout */
+                                        need_new_zoneinfo = 0;
+                                } else {
+                                        bitmap->bp[page].zoneinfo = tmp;
+                                        bitmap->missing_zones--;
+                                }
+                                write_unlock(&bitmap->lock);
+
+                                if (!need_new_zoneinfo)
+                                        kfree(tmp);
+                        }
+                }
+		return -ENOMEM;
+        } 
+
+        /* got a page */
+
+        write_lock(&bitmap->lock);
+
+        /* recheck the page */
+
+        if (IS_ADDRESS(bitmap->bp[page].map)) {
+                /* somebody beat us to getting the page */
+                write_unlock(&bitmap->lock);
+                bitmap_free_page(bitmap, mappage);
+                return 0;
+        }
+
+        /* no page in place and we have one, so maybe install it */
+
+        if (bitmap->bp[page].count != 0) {
+                /* inpage bitmap - can't replace until no pending writes */
+                write_unlock(&bitmap->lock);
+                bitmap_free_page(bitmap, mappage);
+                return -EINVAL;
+        }
+
+        /* good case - we get to make a new page */
+        memset(mappage, 0, PAGE_SIZE);
+        bitmap->bp[page].map = mappage;
+        if (IS_ADDRESS(bitmap->bp[page].zoneinfo)) {
+                memset(bitmap->bp[page].zoneinfo, 0,
+                        sizeof(*bitmap->bp->zoneinfo) << ZONESHIFT);
+        }
+        bitmap->missing_pages--;
+        bitmap->current_pages++;
+        write_unlock(&bitmap->lock);
+	return 0;
+
+}
+
+/* 
+ * offset8 is the BYTE offset, not the bit offset
+ * We call this routine under lock.
+ */
+static int
+bitmap_clear_mask8 (struct bitmap *bitmap, sector_t offset8,
+		  unsigned char mask, unsigned char **this_page)
+{
+
+        unsigned long page  ;
+        unsigned long pageoff;
+
+        unsigned char oldmask;
+        unsigned char newmask;
+        unsigned char diffmask;
+	
+        page    = offset8 >> PAGE_SHIFT;
+
+        if (!IS_ADDRESS(bitmap->bp[page].map)) {
+                int bits = hweight8(mask);
+                bitmap->bp[page].count -= bits;
+                if (IS_ADDRESS(bitmap->bp[page].zoneinfo)) {
+                        int zoneoffset = (offset8  >> (PAGE_SHIFT - ZONESHIFT));
+                        int zone = zoneoffset & ((1<<ZONESHIFT) - 1);
+                        bitmap->bp[page].zoneinfo[zone] -= bits;
+                }
+                return -EINVAL;
+        }
+
+        pageoff = offset8 & ~PAGE_MASK;
+
+	oldmask = bitmap->bp[page].map[pageoff];
+	newmask = oldmask & ~mask;
+	diffmask = newmask ^ oldmask;
+
+	if (diffmask) {
+	        unsigned bits = hweight8 (diffmask);
+                int newcount = (bitmap->bp[page].count -= bits);
+
+		bitmap->bp[page].map[pageoff] = newmask;
+
+                /* most frequent case is a +ve result and return */
+                if (newcount > 0)
+                        return 0;
+                /* negative count is a major misaccounting */
+                if (newcount < 0) {
+                        printk(KERN_WARNING "bitmap: dirty count %d on page %lu\n",
+                                newcount, page);
+                        return 0;
+                }
+                /* newcount == 0 is when we want to detach the page */
+                *this_page = bitmap->bp[page].map;
+                bitmap->bp[page].map = NULL;
+                bitmap->missing_pages++;
+                bitmap->current_pages--;
+                return 0;
+	}
+        return 0;
+}
+
+/*
+ * offset is the bit count, i.e. the block number.
+ */
+static int
+bitmap_clear_mask (struct bitmap *bitmap, sector_t offset, unsigned char mask)
+{
+
+	unsigned long blkgrp;
+	unsigned char blkoff;
+        unsigned long page  ;
+        unsigned long pageoff;
+        int errs = 0;
+        unsigned char * free_page0 = NULL, *free_page1 = NULL;
+
+	blkgrp = offset >> 3;
+        page   = blkgrp >> PAGE_SHIFT;
+
+	blkoff = offset & 7;
+        pageoff= blkgrp & ~PAGE_MASK;
+
+        write_lock(&bitmap->lock);
+
+	if (blkoff) {
+		unsigned char maskdiv = 0xff & (mask << blkoff);
+		unsigned char maskrem = 0xff & (mask >> (8 - blkoff));
+
+                if (maskdiv) {
+                        if (bitmap_clear_mask8 (bitmap, blkgrp, maskdiv, &free_page0) < 0)
+                                errs++;
+                }
+		if (!maskrem) {
+                        goto out;
+                }
+                if (pageoff + 1 < PAGE_SIZE) {
+
+                        if (bitmap_clear_mask8 (bitmap, blkgrp + 1, maskrem, &free_page1) < 0)
+                                errs++;
+                        goto out;
+                } 
+
+                if (bitmap_clear_mask8 (bitmap, blkgrp + 1, maskrem, &free_page1) < 0)
+                        errs++;
+                goto out;
+	}
+
+        // normal situation. Offset is multiple of 8
+	
+        if (bitmap_clear_mask8 (bitmap, blkgrp, mask, &free_page0) < 0)
+                errs++;
+out:
+        write_unlock(&bitmap->lock);
+        if (free_page0)
+                bitmap_free_page(bitmap, free_page0);
+        if (free_page1)
+                bitmap_free_page(bitmap, free_page1);
+        return errs > 0 ? -EINVAL : 0;
+}
+
+
+/* 
+ * offset8 is the BYTE offset, not the bit offset.
+ * We call this routine under lock.
+ */
+static int
+bitmap_set_mask8 (struct bitmap *bitmap, sector_t offset8,
+		  unsigned char mask)
+{
+
+        unsigned long page  ;
+        unsigned long pageoff;
+
+        unsigned char oldmask;
+        unsigned char newmask;
+        unsigned char diffmask;
+	
+        page    = offset8 >> PAGE_SHIFT;
+        pageoff = offset8 & ~PAGE_MASK;
+
+        if (!IS_ADDRESS(bitmap->bp[page].map)) {
+                int bits = hweight8(mask);
+                bitmap->bp[page].count += bits;
+                /* mark the zone instead - we have 16 low bits markable */
+                if (IS_ADDRESS(bitmap->bp[page].zoneinfo)) {
+                        int zoneoffset = (offset8  >> (PAGE_SHIFT - ZONESHIFT));
+                        int zone = zoneoffset & ((1 << ZONESHIFT) - 1);
+                        bitmap->bp[page].zoneinfo[zone] += bits;
+                }
+                return -EINVAL;
+        }
+
+	oldmask = bitmap->bp[page].map[pageoff];
+	newmask = oldmask | mask;
+	diffmask = newmask ^ oldmask;
+
+	if (diffmask) {
+                unsigned int bits = hweight8 (diffmask);
+		bitmap->bp[page].map[pageoff] = newmask;
+		bitmap->bp[page].count += bits;
+	}
+        return 0;
+}
+
+/* 
+ * offset8 is the BYTE offset, not the bit offset.
+ * We call this routine under lock.
+ */
+static int
+bitmap_test_mask8 (struct bitmap *bitmap, sector_t offset8,
+		  unsigned char mask)
+{
+
+        unsigned long page  ;
+        unsigned long pageoff;
+
+        if (!mask)
+                return 0;
+	
+        page    = offset8 >> PAGE_SHIFT;
+        pageoff = offset8 & ~PAGE_MASK;
+
+        if (!IS_ADDRESS(bitmap->bp[page].map)) {
+                /* look at zone instead - we have 16 low bits markable */
+                if (IS_ADDRESS(bitmap->bp[page].zoneinfo)) {
+                        int zoneoffset = (offset8  >> (PAGE_SHIFT - ZONESHIFT));
+                        int zone = zoneoffset & ((1 << ZONESHIFT) - 1);
+                        if (bitmap->bp[page].zoneinfo[zone] > 0)
+                                return 1;
+                        return 0;
+                }
+                /* try the count */
+                if (bitmap->bp[page].count > 0)
+                        return 1;
+                return 0;
+        }
+
+	return (bitmap->bp[page].map[pageoff] & mask) != 0;
+}
+
+/*
+ * here offset is the BIT offset
+ */
+static int
+bitmap_set_mask (struct bitmap *bitmap, sector_t offset, unsigned char mask)
+{
+
+	unsigned long blkgrp;
+	unsigned char blkoff;
+        unsigned long page  ;
+        unsigned long pageoff;
+        int errs = 0;
+
+	blkgrp = offset >> 3;
+        page   = blkgrp >> PAGE_SHIFT;
+
+        if (bitmap_checkpage(bitmap, page) < 0)
+                errs++;
+
+        /* the page may or may not have been made */
+
+        write_lock(&bitmap->lock);
+	blkoff = offset & 7;
+        pageoff= blkgrp & ~PAGE_MASK;
+
+	if (blkoff) {
+		unsigned char maskdiv = 0xff & (mask << blkoff);
+		unsigned char maskrem = 0xff & (mask >> (8 - blkoff));
+
+                if (maskdiv) {
+                        if (bitmap_set_mask8(bitmap, blkgrp, maskdiv) < 0)
+                                errs++;
+                }
+
+		if (!maskrem) {
+                        write_unlock(&bitmap->lock);
+                        return errs > 0 ? -EINVAL : 0 ;
+                }
+
+                if (pageoff + 1 < PAGE_SIZE) {
+
+                        if (bitmap_set_mask8(bitmap, blkgrp + 1, maskrem) < 0)
+                                errs++;
+                        write_unlock(&bitmap->lock);
+                        return errs > 0 ? -EINVAL : 0 ;
+                } 
+                write_unlock(&bitmap->lock);
+                if (bitmap_checkpage(bitmap, page+1) < 0)
+                        errs++;
+
+                write_lock(&bitmap->lock);
+
+                if (bitmap_set_mask8(bitmap, blkgrp + 1, maskrem) < 0)
+                        errs++;
+                        
+                write_unlock(&bitmap->lock);
+                return errs > 0 ? -EINVAL : 0 ;
+	}
+
+        // normal situation. Offset is multiple of 8
+	
+        if (bitmap_set_mask8(bitmap, blkgrp, mask) < 0)
+                errs++;
+
+        write_unlock(&bitmap->lock);
+        return errs > 0 ? -EINVAL : 0;
+}
+
+/*
+ * here offset is the BIT offset
+ */
+static int
+bitmap_test_mask (struct bitmap *bitmap, sector_t offset, unsigned char mask)
+{
+
+	unsigned long blkgrp;
+	unsigned char blkoff;
+        unsigned long page  ;
+        unsigned long pageoff;
+        int err;
+
+	blkgrp = offset >> 3;
+        page   = blkgrp >> PAGE_SHIFT;
+
+        if (bitmap_checkpage(bitmap, page) < 0)
+                ; // ignore. We'll react below
+
+        /* the page may or may not have been made */
+
+        write_lock(&bitmap->lock);
+	blkoff = offset & 7;
+        pageoff= blkgrp & ~PAGE_MASK;
+
+	if (blkoff) {
+		unsigned char maskdiv = 0xff & (mask << blkoff);
+		unsigned char maskrem = 0xff & (mask >> (8 - blkoff));
+
+                if (maskdiv) {
+                        if (err = bitmap_test_mask8(bitmap, blkgrp, maskdiv), err) {
+                                write_unlock(&bitmap->lock);
+                                return err;
+                        }
+                }
+
+		if (!maskrem) {
+                        write_unlock(&bitmap->lock);
+                        return 0 ;
+                }
+
+                if (pageoff + 1 < PAGE_SIZE) {
+
+                        if (err = bitmap_test_mask8(bitmap, blkgrp + 1, maskrem), err) {
+                                write_unlock(&bitmap->lock);
+                                return err;
+                        }
+                        write_unlock(&bitmap->lock);
+                        return  0 ;
+                } 
+                write_unlock(&bitmap->lock);
+                if (err = bitmap_checkpage(bitmap, page+1), err < 0)
+                        ; // ignore errors
+
+                write_lock(&bitmap->lock);
+
+                if (err = bitmap_test_mask8(bitmap, blkgrp + 1, maskrem), err) {
+                        write_unlock(&bitmap->lock);
+                        return err;
+                }
+                        
+                write_unlock(&bitmap->lock);
+                return 0 ;
+	}
+
+        // normal situation. Offset is multiple of 8
+	
+        if (err = bitmap_test_mask8(bitmap, blkgrp, mask), err) {
+                write_unlock(&bitmap->lock);
+                return err ;
+        }
+
+        write_unlock(&bitmap->lock);
+        return 0;
+}
+
+static int
+bitmap_clear_bits (struct bitmap *bitmap, sector_t offset, unsigned long bits)
+{
+
+        unsigned char mask;
+        int errs = 0;
+
+        while (offset + bits >= (offset | 7) + 1) {
+                int more = (offset | 7) + 1 - offset;
+                mask = (1 << more) - 1;
+                /* ignore errors and do what we can */
+                if (bitmap_clear_mask(bitmap, offset, mask) < 0) {
+                        errs++;
+                }
+                bits -= more;
+                offset |= 7;
+                offset++;
+        }
+
+        if (bits > 0) {
+                mask = (1 << bits) - 1;
+                /* ignore errors and do what we can */
+                if (bitmap_clear_mask(bitmap, offset, mask) < 0) {
+                        errs++;
+                }
+                bits = 0;
+                offset += bits;
+        }
+
+        return (errs > 0) ? -EINVAL : 0;
+}
+
+static int
+bitmap_set_bits (struct bitmap *bitmap, sector_t offset, unsigned long bits)
+{
+
+        unsigned char mask;
+        int errs = 0;
+
+        while (offset + bits >= (offset | 7) + 1) {
+                int more = (offset | 7) + 1 - offset;
+                mask = (1 << more) - 1;
+                /* ignore errors and do what we can */
+                if (bitmap_set_mask(bitmap, offset, mask) < 0) {
+                        errs++;
+                }
+                bits -= more;
+                offset |= 7;
+                offset++;
+        }
+
+        if (bits > 0) {
+                mask = (1 << bits) - 1;
+                /* ignore errors and do what we can */
+                if (bitmap_set_mask(bitmap, offset, mask) < 0) {
+                        errs++;
+                }
+                bits = 0;
+                offset += bits;
+        }
+
+        return (errs > 0) ? -EINVAL : 0;
+}
+
+static int
+bitmap_test_bits (struct bitmap *bitmap, sector_t offset, unsigned long bits)
+{
+        unsigned char mask;
+        int err;
+
+        while (offset + bits >= (offset | 7) + 1) {
+                int more = (offset | 7) + 1 - offset;
+                mask = (1 << more) - 1;
+                /* ignore errors and do what we can */
+                if (err = bitmap_test_mask(bitmap, offset, mask), err) {
+                        return err;
+                }
+                bits -= more;
+                offset |= 7;
+                offset++;
+        }
+
+        if (bits > 0) {
+                mask = (1 << bits) - 1;
+                /* ignore errors and do what we can */
+                if (err = bitmap_test_mask(bitmap, offset, mask), err) {
+                        return err;
+                }
+                bits = 0;
+                offset += bits;
+        }
+
+        return 0;
+}
+
+/*
+static int
+bitmap_test_bit (struct bitmap *bitmap, unsigned long block)
+{
+	unsigned long blkgrp;
+	unsigned char blkoff;
+        unsigned long page  ;
+        unsigned long pageoff;
+        int res;
+
+	blkgrp = block >> 3;
+        page   = blkgrp >> PAGE_SHIFT;
+
+        read_lock(&bitmap->lock);
+        // high bits zero means no page address
+        if (!IS_ADDRESS(bitmap->bp[page].map)) {
+                int zoneoffset = (blkgrp  >> (PAGE_SHIFT - ZONESHIFT));
+                int zone = zoneoffset & ((1 << ZONESHIFT) - 1);
+                // use the counter instead - this is zoned
+                res = (bitmap->bp[page].count > 0);
+                if (res && IS_ADDRESS(bitmap->bp[page].zoneinfo))
+                        res = (bitmap->bp[page].zoneinfo[zone] > 0);
+                read_unlock(&bitmap->lock);
+                return res;
+        }
+
+	blkoff = block & 7;
+        pageoff= blkgrp & ~PAGE_MASK;
+
+	res = test_bit ((pageoff << 3) + blkoff, bitmap->bp[page].map) != 0;
+        read_unlock(&bitmap->lock);
+        return res;
+}
+*/
+
+int
+bitmap_init(struct bitmap * bitmap, unsigned long long blocks) {
+
+        unsigned long pages = (blocks + (PAGE_SIZE * 8 - 1)) / (PAGE_SIZE * 8);
+
+        memset(bitmap, 0, sizeof(*bitmap));
+	rwlock_init (&bitmap->lock);
+        write_lock(&bitmap->lock);
+
+        bitmap->start = bitmap_start;
+        bitmap->stop = bitmap_stop;
+        bitmap->testbits = bitmap_test_bits;
+        bitmap->setbits = bitmap_set_bits;
+        bitmap->clearbits = bitmap_clear_bits;
+        bitmap->active = bitmap_active;
+        bitmap->print_stats = bitmap_print_stats;
+
+        /* now do 1st level init stuff */
+        if (pages < 0) {
+                write_unlock(&bitmap->lock);
+                printk(KERN_WARNING "bitmap: initialised for -ve number of pages (%ld)!\n",
+                        pages);
+                return -EINVAL;
+        }
+        bitmap->pages = pages;
+        bitmap->missing_zones = pages;
+        bitmap->missing_pages = pages;
+        bitmap->current_pages = 0;
+        bitmap->alloced_pages = 0;
+
+        // we are all using the same page cache.
+        MOD_INC_USE_COUNT;
+        THIS_MODULE->unsafe = 0;
+
+        write_unlock(&bitmap->lock);
+        return 0;
+}
+
+static void
+bitmap_clear_page(void *data, kmem_cache_t *cache, unsigned long flags) {
+
+        if (! (flags & SLAB_CTOR_CONSTRUCTOR))
+                return;
+        if (!data)
+                return;
+        memset(data, 0, PAGE_SIZE);
+}
+
+int
+bitmap_init_page_cache(void) {
+        bitmap_page_cache =
+            kmem_cache_create("bitmap_page", PAGE_SIZE, 0, 0, bitmap_clear_page, NULL);
+        if (!bitmap_page_cache)
+                return -ENOMEM;
+        return 0;
+}
+
+#ifdef MODULE
+void cleanup_module(void)
+{
+        if (!bitmap_page_cache)
+                return;
+        kmem_cache_destroy(bitmap_page_cache);
+        bitmap_page_cache = NULL;
+}
+
+int init_module(void)
+{
+        return bitmap_init_page_cache();
+}
+
+  MODULE_AUTHOR ("Peter T. Breuer");
+  MODULE_DESCRIPTION ("Bitmap support");
+  MODULE_LICENSE("GPL");
+  int linux_version_code = LINUX_VERSION_CODE;
+#else           /* MODULE */
+__initcall(bitmap_init_page_cache);
+#endif          /* MODULE */
+
+EXPORT_SYMBOL(bitmap_destr);
+EXPORT_SYMBOL(bitmap_init);
+
+/* Compile line:
+ *
+ *  gcc -O2 -D__KERNEL__ -DMODULE -c bitmap.c -o bitmap.o
+ *
+ */
+                      
--- linux-2.6.11.6/include/linux/raid/raid1.h.orig	Sat Mar 26 04:28:22 2005
+++ linux-2.6.11.6/include/linux/raid/raid1.h	Sun Jun 12 00:11:44 2005
@@ -50,6 +50,17 @@
 
 	mempool_t *r1bio_pool;
 	mempool_t *r1buf_pool;
+
+#ifdef CONFIG_MD_FR1
+        long                    last_clean_sector;  /* helps debugging   */
+        long                    last_dirty_sector;
+        int                     sync_mode;          /* clean/dirty pass? */
+        void                    *bitmap;            /* the array bitmap  */
+        int                     bitmap_dirty;       /* flag              */
+        int                     latency[MD_SB_DISKS];
+        int                     last_source;        /* disk read from    */
+#endif /* CONFIG_MD_FR1 */
+
 };
 
 typedef struct r1_private_data_s conf_t;
@@ -85,6 +96,12 @@
 	int			read_disk;
 
 	struct list_head	retry_list;
+
+#ifdef CONFIG_MD_FR1
+        int                     nonoperational; /* no of bad mirrors */
+        unsigned long           start_jiffies;  /* when i/o started  */
+#endif /* CONFIG_MD_FR1 */
+
 	/*
 	 * if the IO is in WRITE direction, then multiple bios are used.
 	 * We choose the number when they are allocated.
@@ -95,4 +112,12 @@
 /* bits for r1bio.state */
 #define	R1BIO_Uptodate	0
 #define	R1BIO_IsSync	1
+#ifdef CONFIG_MD_FR1
+  #define R1BIO_AsyncPhase 4
+  #define R1BIO_AsyncIO    5
+#endif /* CONFIG_MD_FR1 */
+#ifdef DO_ADD_READ_WRITE_CORRECT
+#define R1BIO_ReadRetry    6
+#endif /* DO_ADD_READ_WRITE_CORRECT */
+
 #endif
--- linux-2.6.11.6/include/linux/raid/md_p.h.orig	Sat Mar 26 04:28:16 2005
+++ linux-2.6.11.6/include/linux/raid/md_p.h	Sun Jun 12 09:11:53 2005
@@ -140,7 +140,20 @@
 	__u32 cp_events_hi;	/* 10 high-order of checkpoint update count   */
 #endif
 	__u32 recovery_cp;	/* 11 recovery checkpoint sector count	      */
-	__u32 gstate_sreserved[MD_SB_GENERIC_STATE_WORDS - 12];
+#ifdef __BIG_ENDIAN
+        __u32 bitmap_events_hi; /* 12 high-order bits of bitmap events count  */
+        __u32 bitmap_events_lo; /* 13 low-order bits of bitmap events count   */
+#else
+        __u32 bitmap_events_lo; /* 12 low-order bits of bitmap events count   */
+        __u32 bitmap_events_hi; /* 13 high-order bits of bitmap events count  */
+#endif
+        /* Begin macros to use 2 ints to support fast raid */
+#define MD_SB_BITMAP_EVENTS_LO(sb)     (sb)->bitmap_events_lo
+#define MD_SB_BITMAP_EVENTS_HI(sb)     (sb)->bitmap_events_hi
+#define MD_SB_BITMAP_EVENTS(sb) \
+    (((u64)MD_SB_BITMAP_EVENTS_HI(sb) << 32)|((u64)MD_SB_BITMAP_EVENTS_LO(sb)))
+        /* End macros to use 2 ints to support fast raid */
+	__u32 gstate_sreserved[MD_SB_GENERIC_STATE_WORDS - 14];
 
 	/*
 	 * Personality information
@@ -215,7 +228,15 @@
 	__u64	resync_offset;	/* data before this offset (from data_offset) known to be in sync */
 	__u32	sb_csum;	/* checksum upto devs[max_dev] */
 	__u32	max_dev;	/* size of devs[] array to consider */
-	__u8	pad3[64-32];	/* set to 0 when writing */
+        __u32 bitmap_events_lo; /* low-order bits of bitmap events count   */
+        __u32 bitmap_events_hi; /* high-order bits of bitmap events count  */
+        /* Begin macros to use 2 ints to support fast raid */
+#define MD_SB_BITMAP_EVENTS_LO_1(sb)     (sb)->bitmap_events_lo
+#define MD_SB_BITMAP_EVENTS_HI_1(sb)     (sb)->bitmap_events_hi
+#define MD_SB_BITMAP_EVENTS_1(sb) \
+    (((u64)MD_SB_BITMAP_EVENTS_HI_1(sb) << 32)|((u64)MD_SB_BITMAP_EVENTS_LO_1(sb)))
+        /* End macros to use 2 ints to support fast raid */
+	__u8	pad3[64-40];	/* set to 0 when writing */
 
 	/* device state information. Indexed by dev_number.
 	 * 2 bytes per device
--- linux-2.6.11.6/include/linux/raid/md_k.h.orig	Sat Mar 26 04:28:14 2005
+++ linux-2.6.11.6/include/linux/raid/md_k.h	Sun Jun 12 00:11:44 2005
@@ -238,6 +238,9 @@
 #define	MD_RECOVERY_INTR	3
 #define	MD_RECOVERY_DONE	4
 #define	MD_RECOVERY_NEEDED	5
+        /* begin bits added to support fast raid */
+#define MD_BITMAP_REPAIR	8
+        /* end bits added to support fast raid */
 	unsigned long			recovery;
 
 	int				in_sync;	/* know to not need resync */
@@ -261,6 +264,9 @@
 	request_queue_t			*queue;	/* for plugging ... */
 
 	struct list_head		all_mddevs;
+/* added for bitmap */
+	__u64				bitmap_events; /* last bitmap stamp */
+/* end of additions for bitmap */
 };
 
 
--- linux-2.6/drivers/md/raid1.c.orig	2005-07-13 13:48:25.922131227 +0200
+++ linux-2.6/drivers/md/raid1.c	2005-07-18 01:21:25.475150139 +0200
@@ -20,6 +20,30 @@
  * You should have received a copy of the GNU General Public License
  * (for example /usr/src/linux/COPYING); if not, write to the Free
  * Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ *
+ * Changes by Peter T. Breuer <ptb@it.uc3m.es> 31/1/2003 to support
+ * bitmapped intelligence in resync:
+ *
+ *      - bitmap attached on setfaulty (mark bad)
+ *      - bitmap marked during normal i/o if faulty disk
+ *      - bitmap used to skip nondirty blocks during sync
+ *      - bitmap removed on set active
+ *
+ *   Minor changes are needed in raid1.h (extra fields in conf) and in
+ *   md.c (support hotadd directly after setfaulty, or disk recognition).
+ *
+ * Changes by PTB 10/8/2004 to redo read-balancing so that it reads
+ * from the fastest disk, as determined by latency testing every so
+ * often.
+ * Changes by PTB 6/1/2005 to make read errors not fault the disk out
+ * of the array but cause retries instead. And also (with CORRECT set)
+ * trigger rewrite of the bad sector.
+ * Changes by PTB 15/3/2005 to keep rdev from being kfreed in
+ * export_rdev in md.c and instead free it here during replacement of the rdev
+ * in add_disk. Otherwise we would trace along a freed struct to see if
+ * it represents the dev we are interested in replacing. Thanks to
+ * Denis Bonnenfant (denis DOT bonnenfant AT diderot DOT org) for finding
+ * this and several other associated problems in the 2.6.8.1 port.
  */
 
 #include <linux/raid/raid1.h>
@@ -29,6 +53,22 @@
  */
 #define	NR_RAID1_BIOS 256
 
+#ifdef CONFIG_MD_FR1
+/*
+ * When to consider switching read disks:
+ */
+#define MAX_WORK_PER_DISK (128 * 8)
+/*
+ * Weightings for calculating latency:
+ */
+#define MAX_TEST_PER_DISK 64
+#define LATENCY_OLD_WEIGHT 9
+#define LATENCY_NEW_WEIGHT 1
+#define LATENCY_SUM_WEIGHT (LATENCY_OLD_WEIGHT + LATENCY_NEW_WEIGHT)
+
+#include "bitmap.h"
+#endif /* CONFIG_MD_FR1 */
+
 static mdk_personality_t raid1_personality;
 
 static void unplug_slaves(mddev_t *mddev);
@@ -182,6 +222,60 @@ static inline void put_buf(r1bio_t *r1_b
 	spin_unlock_irqrestore(&conf->resync_lock, flags);
 }
 
+static int
+map (mddev_t * mddev, mdk_rdev_t ** rdevp)
+{
+        conf_t *conf = mddev_to_conf (mddev);
+        int i, disks = conf->raid_disks;
+
+        /*
+         * Later we do read balancing on the read side
+         * now we use the first available disk.
+         */
+
+        spin_lock_irq (&conf->device_lock);
+#ifdef CONFIG_MD_RAID1_ROBUST_READ
+        /*
+         * Uh, no. Choose the next disk if we can, not the first.
+         */
+        for (i = 0; i < disks; i++) {
+                if (conf->mirrors[i].rdev == *rdevp) {
+                        i++;
+                        break;
+                }
+        }
+        if (i >= disks)
+                i = 0;
+        for (; i < disks; i++) {
+                mdk_rdev_t *rdev = conf->mirrors[i].rdev;
+                if (rdev && rdev != *rdevp && rdev->in_sync) {
+                        *rdevp = rdev;
+                        atomic_inc (&rdev->nr_pending);
+                        spin_unlock_irq (&conf->device_lock);
+                        return i;
+                }
+        }
+        /*
+         * If for some reason we found nothing, dropthru and use the old
+         * routine.
+         */
+#endif          /* CONFIG_MD_RAID1_ROBUST_READ */
+        for (i = 0; i < disks; i++) {
+                mdk_rdev_t *rdev = conf->mirrors[i].rdev;
+                if (rdev && rdev->in_sync) {
+                        *rdevp = rdev;
+                        atomic_inc (&rdev->nr_pending);
+                        spin_unlock_irq (&conf->device_lock);
+                        return i;
+                }
+        }
+        spin_unlock_irq (&conf->device_lock);
+
+        printk (KERN_ERR
+                "raid1_map(): huh, no more operational devices?\n");
+        return -1;
+}
+
 static void reschedule_retry(r1bio_t *r1_bio)
 {
 	unsigned long flags;
@@ -203,9 +297,51 @@ static void reschedule_retry(r1bio_t *r1
 static void raid_end_bio_io(r1bio_t *r1_bio)
 {
 	struct bio *bio = r1_bio->master_bio;
+#ifdef CONFIG_MD_FR1
+        /*
+         * calculate latency on reads, and fold into rolling average
+         * under lock.
+         *
+         * on writes, clear the bitmap if all disks were written
+         */
+        int uptodate = test_bit(R1BIO_Uptodate, &r1_bio->state);
+	conf_t *conf = mddev_to_conf(r1_bio->mddev);
 
+        /* if we should mark the bitmap clean, do so */
+        if (uptodate && bio_data_dir(bio) == WRITE
+                     && r1_bio->nonoperational <= 0) {
+                struct bitmap * bitmap = conf->bitmap;
+                if (bitmap && bitmap->active(bitmap)) {
+                        bitmap->clearbits(bitmap,
+                                bio->bi_sector >> 1, bio->bi_size >> 10);
+                }
+        }
+        /* calculate the latency of the read device */
+        if (uptodate && (bio_data_dir(bio) == READ
+                      || bio_data_dir(bio) == READA)) {
+                unsigned long latency = jiffies - r1_bio->start_jiffies;
+                /* find the mirror component being read */
+                int mirror = r1_bio->read_disk;
+
+                if (latency < 120 * HZ && latency >= 0) {
+                        /* count in 1/10ths if we have total weights 9+1 = 10 */
+                        latency *= LATENCY_SUM_WEIGHT * LATENCY_SUM_WEIGHT;
+	                spin_lock_irq(&conf->device_lock);
+                        conf->latency[mirror] *= LATENCY_OLD_WEIGHT;
+                        conf->latency[mirror] += LATENCY_NEW_WEIGHT * latency;
+                        conf->latency[mirror] /= LATENCY_SUM_WEIGHT;
+	                spin_unlock_irq(&conf->device_lock);
+                } else {
+                        printk(KERN_ERR
+                            "raid1: bad latency %lu jiffies on disk %d\n", 
+                            latency, mirror);
+                }
+        }
+        bio_endio(bio, bio->bi_size, uptodate ? 0 : -EIO);
+#else
 	bio_endio(bio, bio->bi_size,
 		test_bit(R1BIO_Uptodate, &r1_bio->state) ? 0 : -EIO);
+#endif /* CONFIG_MD_FR1 */
 	free_r1bio(r1_bio);
 }
 
@@ -234,9 +370,19 @@ static int raid1_end_read_request(struct
 	/*
 	 * this branch is our 'one mirror IO has finished' event handler:
 	 */
-	if (!uptodate)
+	if (!uptodate) {
+#ifdef CONFIG_MD_RAID1_ROBUST_READ
+	        /*
+                 * Only fault disk out of array on write error, not read.
+                 */
+	        if (0)
+#endif /* CONFIG_MD_RAID1_ROBUST_READ */
 		md_error(r1_bio->mddev, conf->mirrors[mirror].rdev);
-	else
+#ifdef DO_ADD_READ_WRITE_CORRECT
+	        else    /* tell next time we're here that we're a retry */
+	                set_bit(R1BIO_ReadRetry, &r1_bio->state);
+#endif /* DO_ADD_READ_WRITE_CORRECT */
+        } else
 		/*
 		 * Set R1BIO_Uptodate in our master bio, so that
 		 * we will return a good error code for to the higher
@@ -253,7 +399,19 @@ static int raid1_end_read_request(struct
 	/*
 	 * we have only one bio on the read side
 	 */
-	if (uptodate)
+	if (uptodate
+#ifdef CONFIG_MD_RAID1_ROBUST_READ
+                /* Give up and error if we're last */
+                || (atomic_dec_and_test(&r1_bio->remaining))
+#endif /* CONFIG_MD_RAID1_ROBUST_READ */
+                )
+#ifdef DO_ADD_READ_WRITE_CORRECT
+	        if (uptodate && test_bit(R1BIO_ReadRetry, &r1_bio->state)) {
+	                /* Success at last - rewrite failed reads */
+                        set_bit(R1BIO_IsSync, &r1_bio->state);
+			reschedule_retry(r1_bio);
+		} else
+#endif /* DO_ADD_READ_WRITE_CORRECT */
 		raid_end_bio_io(r1_bio);
 	else {
 		/*
@@ -383,12 +541,22 @@ static int read_balance(conf_t *conf, r1
 	 */
 	if (conf->next_seq_sect == this_sector)
 		goto rb_out;
+#ifdef CONFIG_MD_FR1
+        /*
+         * Make slower disks appear more distant.
+         */
+	current_distance = abs(this_sector - conf->mirrors[disk].head_position)
+                         * conf->latency[disk];
+
+	/* Find the disk whose head is closest (weighting by latency) */
+#else
 	if (this_sector == conf->mirrors[new_disk].head_position)
 		goto rb_out;
 
 	current_distance = abs(this_sector - conf->mirrors[disk].head_position);
 
 	/* Find the disk whose head is closest */
+#endif /* CONFIG_MD_FR1 */
 
 	do {
 		if (disk <= 0)
@@ -404,7 +572,16 @@ static int read_balance(conf_t *conf, r1
 			new_rdev = rdev;
 			break;
 		}
+
+#ifdef CONFIG_MD_FR1
+                /*
+                 * Make slower disks appear more distant.
+                 */
+		new_distance = abs(this_sector - conf->mirrors[disk].head_position)
+                             * conf->latency[disk] - 1;
+#else
 		new_distance = abs(this_sector - conf->mirrors[disk].head_position);
+#endif /* CONFIG_MD_FR1 */
 		if (new_distance < current_distance) {
 			current_distance = new_distance;
 			new_disk = disk;
@@ -524,6 +701,13 @@ static int make_request(request_queue_t 
 	struct bio *read_bio;
 	int i, disks;
 	mdk_rdev_t *rdev;
+#ifdef CONFIG_MD_FR1
+        /* 
+         * counts of working and non-working (absent, faulty) disks.
+         */
+	int sum_bios = 0, sum_nobios = 0;
+	struct bitmap * bitmap = conf->bitmap;
+#endif /* CONFIG_MD_FR1 */
 
 	/*
 	 * Register the new request and wait if the reconstruction
@@ -555,6 +739,9 @@ static int make_request(request_queue_t 
 
 	r1_bio->mddev = mddev;
 	r1_bio->sector = bio->bi_sector;
+#ifdef CONFIG_MD_FR1
+	r1_bio->start_jiffies = jiffies;
+#endif /* CONFIG_MD_FR1 */
 
 	r1_bio->state = 0;
 
@@ -582,6 +769,19 @@ static int make_request(request_queue_t 
 		read_bio->bi_end_io = raid1_end_read_request;
 		read_bio->bi_rw = READ;
 		read_bio->bi_private = r1_bio;
+#ifdef CONFIG_MD_RAID1_ROBUST_READ
+		atomic_set(&r1_bio->remaining, 0);
+		/* count source devices under spinlock */
+		spin_lock_irq(&conf->device_lock);
+	        disks = conf->raid_disks;
+		for (i = 0;  i < disks; i++) {
+			if (conf->mirrors[i].rdev &&
+			!conf->mirrors[i].rdev->faulty) {
+				atomic_inc(&r1_bio->remaining);
+			} 
+		}
+		spin_unlock_irq(&conf->device_lock);
+#endif /* CONFIG_MD_RAID1_ROBUST_READ */
 
 		generic_make_request(read_bio);
 		return 0;
@@ -605,13 +805,45 @@ static int make_request(request_queue_t 
 				r1_bio->bios[i] = NULL;
 			} else
 				r1_bio->bios[i] = bio;
-		} else
+		} else {
+#ifdef CONFIG_MD_FR1
+                        sum_bios++;
+#endif /* CONFIG_MD_FR1 */
 			r1_bio->bios[i] = NULL;
-	}
+#ifdef CONFIG_MD_FR1
+                        sum_nobios++;
+#endif /* CONFIG_MD_FR1 */
+                }
+        /* zero the bad disk count on the r1bio by default */
+	}
+#ifdef CONFIG_MD_FR1
+        r1_bio->nonoperational = 0;
+#endif /* CONFIG_MD_FR1 */
 	rcu_read_unlock();
 
+#ifdef CONFIG_MD_FR1
+        /* mark the bitmap before write, just in case */
+        if (bitmap->active(bitmap)) {
+                 bitmap->setbits(bitmap, r1_bio->sector >> 1,
+                          r1_bio->sectors >> 1);
+        }
+#endif /* CONFIG_MD_FR1 */
+
 	atomic_set(&r1_bio->remaining, 1);
 	md_write_start(mddev);
+#ifdef CONFIG_MD_FR1
+        /* count inactive disks, note bitmap dirty if didn't know before */
+	if (sum_nobios > 0) {
+                 /* mark bitmap as dirty if it wasn't so marked */
+	        spin_lock_irq(&conf->device_lock);
+                if (!conf->bitmap_dirty && bitmap->active(bitmap)) {
+                        conf->bitmap_dirty = 1;
+                        mddev->bitmap_events = mddev->events;
+                }
+	        spin_unlock_irq(&conf->device_lock);
+        }
+        r1_bio->nonoperational = sum_nobios;
+#endif /* CONFIG_MD_FR1 */
 	for (i = 0; i < disks; i++) {
 		struct bio *mbio;
 		if (!r1_bio->bios[i])
@@ -652,6 +884,89 @@ static void status(struct seq_file *seq,
 	seq_printf(seq, "]");
 }
 
+#ifdef CONFIG_MD_FR1
+/*
+ * Local bitmap  support functions.
+ */
+static int
+create_bitmap(conf_t *conf) {
+
+        struct bitmap * bitmap;
+        unsigned long blocks;
+        mddev_t *mddev = conf->mddev;
+
+        /* need size to have been set already */
+        blocks = mddev->size;
+
+        bitmap = kmalloc (sizeof (*bitmap), GFP_KERNEL);
+        if (!bitmap) {
+                printk(KERN_WARNING "raid1: out of memory for bitmap head\n");
+                return -ENOMEM;
+        }
+
+        if (bitmap_init (bitmap, blocks) < 0) {
+                printk(KERN_WARNING "raid1: failed to init bitmap\n");
+                kfree(bitmap);
+                return -ENOMEM;
+        }
+
+        /* take the spinlock for the ops on the configuration */
+        spin_lock_irq(&conf->device_lock);
+        conf->bitmap = bitmap;
+        conf->bitmap_dirty = 0;
+        spin_unlock_irq(&conf->device_lock);
+        return 0;
+}
+
+static void
+remove_bitmap (conf_t *conf) {
+
+        struct bitmap * bitmap;
+
+        spin_lock_irq(&conf->device_lock);
+        bitmap = conf->bitmap;
+        if (!bitmap) {
+                spin_unlock_irq(&conf->device_lock);
+                return;
+        }
+        conf->bitmap = NULL;
+        spin_unlock_irq(&conf->device_lock);
+
+        bitmap_destr(bitmap);
+        kfree(bitmap);
+}
+
+static int
+start_bitmap (conf_t *conf) {
+
+        mddev_t *mddev;
+        struct bitmap * bitmap;
+
+        spin_lock_irq(&conf->device_lock);
+        mddev  = conf->mddev;
+        bitmap = conf->bitmap;
+        spin_unlock_irq(&conf->device_lock);
+        if (!bitmap) {
+                return -EINVAL;
+        }
+
+        if (bitmap->active(bitmap)) {
+                printk(KERN_WARNING "raid1: bitmap %x already active!\n",
+                    (unsigned) bitmap);
+                return 0;
+        }
+        if (bitmap->start(bitmap, mddev->events) < 0) {
+                printk(KERN_WARNING "raid1: bitmap %x failed to start!\n",
+                    (unsigned) bitmap);
+                return -EINVAL;
+        }
+
+        printk(KERN_INFO "raid1: made bitmap %x at events %x:%x\n",
+                (unsigned) bitmap, (unsigned)(u32)(mddev->events >> 32),
+                (unsigned)(u32)(mddev->events));
+        return 0;
+}
+#endif /* CONFIG_MD_FR1 */
 
 static void error(mddev_t *mddev, mdk_rdev_t *rdev)
 {
@@ -685,6 +1000,9 @@ static void error(mddev_t *mddev, mdk_rd
 	printk(KERN_ALERT "raid1: Disk failure on %s, disabling device. \n"
 		"	Operation continuing on %d devices\n",
 		bdevname(rdev->bdev,b), conf->working_disks);
+#ifdef CONFIG_MD_FR1
+        start_bitmap(conf);
+#endif /* CONFIG_MD_FR1 */
 }
 
 static void print_conf(conf_t *conf)
@@ -703,6 +1021,14 @@ static void print_conf(conf_t *conf)
 	for (i = 0; i < conf->raid_disks; i++) {
 		char b[BDEVNAME_SIZE];
 		tmp = conf->mirrors + i;
+#ifdef CONFIG_MD_FR1
+                /*
+                 * Remove repeats from debug printout.
+                 */
+                if (i > 0 && memcmp(tmp, &conf->mirrors[i-1], sizeof(*tmp)) == 0) {
+                        continue;
+                }
+#endif /* CONFIG_MD_FR1 */
 		if (tmp->rdev)
 			printk(" disk %d, wo:%d, o:%d, dev:%s\n",
 				i, !tmp->rdev->in_sync, !tmp->rdev->faulty,
@@ -722,6 +1048,14 @@ static void close_sync(conf_t *conf)
 
 	mempool_destroy(conf->r1buf_pool);
 	conf->r1buf_pool = NULL;
+
+#ifdef CONFIG_MD_FR1
+        if (conf->bitmap) {
+                struct bitmap *bitmap = conf->bitmap;
+                bitmap->stop(bitmap);
+                bitmap->print_stats(bitmap);
+        }
+#endif /* CONFIG_MD_FR1 */
 }
 
 static int raid1_spare_active(mddev_t *mddev)
@@ -757,9 +1091,42 @@ static int raid1_add_disk(mddev_t *mddev
 	int mirror;
 	mirror_info_t *p;
 
-	for (mirror=0; mirror < mddev->raid_disks; mirror++)
+	for (mirror=0; mirror < mddev->raid_disks; mirror++) {
+#ifdef CONFIG_MD_FR1
+            /*
+             * allow a disk which has only been set faulty but not
+             * removed yet to be reinserted, thus triggering a hot
+             * repair.
+             */
+		p = &conf->mirrors[mirror]; 
+                if (unlikely(!p->rdev))
+                        goto insert_or_replace;
+	        printk(KERN_DEBUG "raid1: testing p->rdev %p\n", p->rdev);
+                if (unlikely(p->rdev == rdev))
+                        goto insert_or_replace;
+	        printk(KERN_DEBUG "raid1: testing p->rdev->bdev %p\n",
+                        p->rdev->bdev);
+                if (!p->rdev->bdev)
+                        goto insert_or_replace; // weird!
+	        printk(KERN_DEBUG "raid1: testing p->rdev->bdev->bd_dev %x\n",
+                        p->rdev->bdev->bd_dev);
+                if (p->rdev->bdev->bd_dev == rdev->bdev->bd_dev)
+                        goto insert_or_replace;
+                continue;
+
+insert_or_replace:
+               if (1) {
+                        if (p->rdev && p->rdev != rdev) {
+                        /* kill the rdev left by export_rdev() */
+	                        printk(KERN_INFO
+                                "raid1: late free of exported rdev %p\n",
+                                        p->rdev);
+                                kfree(p->rdev);
+                        }
+		        p->rdev = rdev;
+#else
 		if ( !(p=conf->mirrors+mirror)->rdev) {
-
+#endif /* CONFIG_MD_FR1 */
 			blk_queue_stack_limits(mddev->queue,
 					       rdev->bdev->bd_disk->queue);
 			/* as we don't honour merge_bvec_fn, we must never risk
@@ -776,7 +1143,7 @@ static int raid1_add_disk(mddev_t *mddev
 			p->rdev = rdev;
 			break;
 		}
-
+        }
 	print_conf(conf);
 	return found;
 }
@@ -860,6 +1227,14 @@ static int end_sync_write(struct bio *bi
 	update_head_pos(mirror, r1_bio);
 
 	if (atomic_dec_and_test(&r1_bio->remaining)) {
+#ifdef CONFIG_MD_FR1
+                /* clean the bitmap after resync */
+                struct bitmap * bitmap = conf->bitmap;
+                if (bitmap && bitmap->active(bitmap)) {
+                        bitmap->clearbits(bitmap, r1_bio->sector >> 1,
+                                r1_bio->sectors >> 1);
+                }
+#endif /* CONFIG_MD_FR1 */
 		md_done_sync(mddev, r1_bio->sectors, uptodate);
 		put_buf(r1_bio);
 	}
@@ -950,7 +1325,10 @@ static void raid1d(mddev_t *mddev)
 		} else {
 			int disk;
 			bio = r1_bio->bios[r1_bio->read_disk];
-			if ((disk=read_balance(conf, r1_bio)) == -1) {
+#ifdef CONFIG_MD_RAID1_ROBUST_READ
+                        rdev = conf->mirrors[r1_bio->read_disk].rdev;
+#endif /* CONFIG_MD_RAID1_ROBUST_READ */
+			if ((disk=map(mddev, &rdev)) == -1) {
 				printk(KERN_ALERT "raid1: %s: unrecoverable I/O"
 				       " read error for block %llu\n",
 				       bdevname(bio->bi_bdev,b),
@@ -1020,6 +1398,22 @@ static int sync_request(mddev_t *mddev, 
 	int i;
 	int write_targets = 0;
 
+#ifdef CONFIG_MD_FR1
+        /*
+         * Will need to count mirror components currently with a bitmap
+         * which have been marked faulty and nonoperational at some
+         * point beforehand, and have been accumulating marks on the
+         * bitmap to indicate dirty blocks that need syncing.
+         */
+        struct bitmap * bitmap = conf->bitmap;
+        int count, block_not_dirty;
+        int targets[MD_SB_DISKS];
+        /*
+         * discount the skipped sectors back to the md.c code
+         */
+        extern atomic_t md_throttle[];
+#endif /* CONFIG_MD_FR1 */
+
 	if (!conf->r1buf_pool)
 		if (init_resync(conf))
 			return -ENOMEM;
@@ -1030,6 +1424,25 @@ static int sync_request(mddev_t *mddev, 
 		return 0;
 	}
 
+#ifdef CONFIG_MD_FR1
+        /* also remove bitmap if not indicated */
+        if (!sector_nr) {
+                if (! test_and_clear_bit(MD_BITMAP_REPAIR, &mddev->recovery)) {
+                        /* has to be outside spinlock as it takes it */
+                        printk(KERN_WARNING
+                            "%s: no repair bit on sb so removed bitmap %x\n",
+                            mdname(mddev), (unsigned)bitmap);
+                        if (bitmap)
+                                bitmap->stop (bitmap);
+                } else {
+                        printk(KERN_WARNING
+                            "%s: repair bit set on sb so retained bitmap %x\n",
+                            mdname(mddev), (unsigned)bitmap);
+                }
+                /* reset the bitmap indicator always */
+        }
+#endif /* CONFIG_MD_FR1 */
+
 	/*
 	 * If there is non-resync activity waiting for us then
 	 * put in a delay to throttle resync.
@@ -1045,6 +1458,34 @@ static int sync_request(mddev_t *mddev, 
 	 */
 	disk = conf->last_used;
 	/* make sure disk is operational */
+#ifdef CONFIG_MD_FR1
+        /* setup extra report counters for skipped/synced blocks */
+        if (!sector_nr) {
+                conf->sync_mode = -1;
+                conf->last_clean_sector = -1;
+                conf->last_dirty_sector = -1;
+        }
+
+	nr_sectors = RESYNC_SECTORS;
+	if (max_sector - sector_nr < nr_sectors)
+		nr_sectors = max_sector - sector_nr;
+
+        /* go looking for the faulted (nonoperational) mirrors, under lock */
+        count = 0;
+	while(1) {
+		if (disk <= 0)
+			disk = conf->raid_disks;
+                disk--;
+		if (conf->mirrors[disk].rdev
+                && !conf->mirrors[disk].rdev->faulty
+                && (!conf->mirrors[disk].rdev->in_sync ||
+			    sector_nr + nr_sectors > mddev->recovery_cp)) {
+                        targets[count++] = disk;
+                }
+		if (disk == conf->last_used)
+			break;
+        }
+#endif /* CONFIG_MD_FR1 */
 
 	while (conf->mirrors[disk].rdev == NULL ||
 	       !conf->mirrors[disk].rdev->in_sync) {
@@ -1053,12 +1494,66 @@ static int sync_request(mddev_t *mddev, 
 		disk--;
 		if (disk == conf->last_used)
 			break;
+// #ifdef CONFIG_MD_FR1
+//                if (0)
+// #endif /* CONFIG_MD_FR1 */
 	}
 	conf->last_used = disk;
 	atomic_inc(&conf->mirrors[disk].rdev->nr_pending);
 
 
 	mirror = conf->mirrors + disk;
+#ifdef CONFIG_MD_FR1
+        /*
+         * check if bitmap says reync block can be skipped, and do so
+         */
+        block_not_dirty = bitmap->active(bitmap)
+            && !bitmap->testbits(bitmap, sector_nr >> 1, nr_sectors >> 1);
+
+        if (count > 0 && block_not_dirty) {
+                /* skip */
+
+	        md_sync_acct(mirror->rdev->bdev, nr_sectors);
+                // sync_request_done(sector_nr, conf);
+		md_done_sync(mddev, nr_sectors, 1);
+
+                for (i = 0; i < count; i++) {
+                        int mirror = targets[i];
+	                atomic_dec(&conf->mirrors[mirror].rdev->nr_pending);
+                }
+
+                /* do these conf accesses under lock, though only accounting */
+                spin_lock_irq(&conf->resync_lock);
+                if (conf->sync_mode != 0) {
+                        if (conf->sync_mode == 1) {
+                                printk(KERN_INFO "raid1: synced dirty sectors %lu-%lu\n",
+                                conf->last_clean_sector+1,
+                                conf->last_dirty_sector);
+                        }
+                        conf->sync_mode = 0;
+                }
+                conf->last_clean_sector = sector_nr + nr_sectors - 1;
+                if (sector_nr + nr_sectors >= mddev->size << 1) {
+                        printk(KERN_INFO "raid1: skipped clean sectors %lu-%lu\n",
+                        conf->last_dirty_sector+1,
+                        conf->last_clean_sector);
+                }
+                /* update md driver throttle discount */
+                atomic_add(nr_sectors, &md_throttle[mddev->md_minor]);
+
+	        /*
+	        * Wake up any possible resync thread that waits for the device
+	        * to go idle.
+	        */
+	        --conf->barrier;
+                wake_up(&conf->wait_idle);
+	        wake_up(&conf->wait_resume);
+                spin_unlock_irq(&conf->resync_lock);
+
+	        md_wakeup_thread(mddev->thread);
+                return nr_sectors;
+        }
+#endif /* CONFIG_MD_FR1 */
 
 	r1_bio = mempool_alloc(conf->r1buf_pool, GFP_NOIO);
 
@@ -1152,6 +1647,28 @@ static int sync_request(mddev_t *mddev, 
 
 	generic_make_request(bio);
 
+#ifdef CONFIG_MD_FR1
+        /* printout info from time to time */
+        spin_lock_irq(&conf->resync_lock);
+        if (conf->sync_mode != 1) {
+                if (conf->sync_mode == 0) {
+                        printk(KERN_INFO
+                                "raid1: skipped clean sectors %lu-%lu\n",
+                        conf->last_dirty_sector+1,
+                        conf->last_clean_sector);
+                }
+                conf->sync_mode = 1;
+        }
+        conf->last_dirty_sector = sector_nr + nr_sectors - 1;
+
+        if (sector_nr + nr_sectors >= mddev->size << 1) {
+                printk(KERN_INFO "raid1: synced dirty sectors %lu-%lu\n",
+                conf->last_clean_sector+1,
+                conf->last_dirty_sector);
+        }
+        spin_unlock_irq(&conf->resync_lock);
+#endif /* CONFIG_MD_FR1 */
+
 	return nr_sectors;
 }
 
@@ -1259,6 +1776,17 @@ static int run(mddev_t *mddev)
 	conf->last_used = j;
 
 
+#ifdef CONFIG_MD_FR1
+        /* make the bitmap now - hope mddev->size exists already */
+        if (create_bitmap(conf) < 0) {
+                printk(KERN_ERR "raid1: out of memory for bitmap on %s\n",
+                        mdname(mddev));
+                goto out_free_conf;
+        }
+
+        /* PTB set it active too */
+        start_bitmap (conf);
+#endif /* CONFIG_MD_FR1 */
 
 	{
 		mddev->thread = md_register_thread(raid1d, mddev, "%s_raid1");
@@ -1295,6 +1823,9 @@ out_free_conf:
 			kfree(conf->mirrors);
 		if (conf->poolinfo)
 			kfree(conf->poolinfo);
+#ifdef CONFIG_MD_FR1
+        	remove_bitmap (conf);
+#endif /* CONFIG_MD_FR1 */
 		kfree(conf);
 		mddev->private = NULL;
 	}
@@ -1315,6 +1846,9 @@ static int stop(mddev_t *mddev)
 		kfree(conf->mirrors);
 	if (conf->poolinfo)
 		kfree(conf->poolinfo);
+#ifdef CONFIG_MD_FR1
+        remove_bitmap (conf);
+#endif /* CONFIG_MD_FR1 */
 	kfree(conf);
 	mddev->private = NULL;
 	return 0;
--- linux-2.6/drivers/md/md.c.orig	2005-07-13 15:54:26.194462796 +0200
+++ linux-2.6/drivers/md/md.c	2005-07-18 00:49:02.201350780 +0200
@@ -27,6 +27,27 @@
    You should have received a copy of the GNU General Public License
    (for example /usr/src/linux/COPYING); if not, write to the Free
    Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+
+   Changes 31/1/2003 by Peter T.  Breuer <ptb@it.uc3m.es> to support
+   hotadd directly after setfaulty without intervening hotremove
+   ("hotrepair") when there is no persistent superblock, and to flag a
+   potential hotrepair when an old disk is re-added and the uuid matches
+   ours.  The flag is used by the raid1 driver, at the moment, in order
+   to trigger an intelligent resync.
+  
+   Yet more changes by PTB 12/3/2003 to notify devices via ioctls when
+   they have been incorporated or removed from a raid array.
+  
+   Yet more changes by PTB 26/3/2004 to make the speed calculations
+   appropriate to fr1, and throttle by real i/o, not resync total.
+
+   Changes by PTB 15/3/2005 to make sure the sb of the rdev is read
+   just before we check its uuid.
+
+   Changes by PTB 15/3/2005 to keep rdev from being kfreed in
+   export_rdev c and instead free it in raid1 during 
+   pers->add_disk. Otherwise we would trace along a freed struct there
+   to see if it represents a dev we are interested in repairing.
 */
 
 #include <linux/module.h>
@@ -55,6 +76,7 @@
 #define DEBUG 0
 #define dprintk(x...) ((void)(DEBUG && printk(x)))
 
+#define MD_BITMAP_SUPPORT 1
 
 #ifndef MODULE
 static void autostart_arrays (int part);
@@ -122,6 +144,14 @@ static ctl_table raid_root_table[] = {
 	{ .ctl_name = 0 }
 };
 
+#ifdef MD_BITMAP_SUPPORT
+/* PTB md_throttle permits speed calculation adjustments from personality */
+#ifdef MODULE
+static
+#endif /* MODULE */
+atomic_t md_throttle[MAX_MD_DEVS];
+#endif /* MD_BITMAP_SUPPORT */
+
 static struct block_device_operations md_fops;
 
 /*
@@ -173,7 +203,14 @@ static void mddev_put(mddev_t *mddev)
 		return;
 	if (!mddev->raid_disks && list_empty(&mddev->disks)) {
 		list_del(&mddev->all_mddevs);
+#ifdef MD_BITMAP_SUPPORT
+	        spin_unlock(&all_mddevs_lock);
+                /* blk_put_queue calls kblockd_flush, which can sleep */
+#endif /* MD_BITMAP_SUPPORT */
 		blk_put_queue(mddev->queue);
+#ifdef MD_BITMAP_SUPPORT
+	        spin_lock(&all_mddevs_lock);
+#endif /* MD_BITMAP_SUPPORT */
 		kfree(mddev);
 	}
 	spin_unlock(&all_mddevs_lock);
@@ -355,6 +392,10 @@ static int sync_page_io(struct block_dev
 static int read_disk_sb(mdk_rdev_t * rdev)
 {
 	char b[BDEVNAME_SIZE];
+#ifdef MD_BITMAP_SUPPORT
+	mdp_super_t *sb;
+#endif /* MD_BITMAP_SUPPORT */
+
 	if (!rdev->sb_page) {
 		MD_BUG();
 		return -EINVAL;
@@ -366,6 +407,11 @@ static int read_disk_sb(mdk_rdev_t * rde
 	if (!sync_page_io(rdev->bdev, rdev->sb_offset<<1, MD_SB_BYTES, rdev->sb_page, READ))
 		goto fail;
 	rdev->sb_loaded = 1;
+#ifdef MD_BITMAP_SUPPORT
+	sb = (mdp_super_t *)page_address(rdev->sb_page);
+	printk(KERN_INFO "%s (read) [events: %08lx]\n",
+		bdevname(rdev->bdev,b), (unsigned long)sb->events_lo);
+#endif /* MD_BITMAP_SUPPORT */
 	return 0;
 
 fail:
@@ -582,6 +628,9 @@ static int super_90_validate(mddev_t *md
 		mddev->raid_disks = sb->raid_disks;
 		mddev->size = sb->size;
 		mddev->events = md_event(sb);
+#ifdef MD_BITMAP_SUPPORT
+                mddev->bitmap_events = MD_SB_BITMAP_EVENTS(sb);
+#endif /* MD_BITMAP_SUPPORT */
 
 		if (sb->state & (1<<MD_SB_CLEAN))
 			mddev->recovery_cp = MaxSector;
@@ -669,6 +718,10 @@ static void super_90_sync(mddev_t *mddev
 	sb->state = 0;
 	sb->events_hi = (mddev->events>>32);
 	sb->events_lo = (u32)mddev->events;
+#ifdef MD_BITMAP_SUPPORT
+        MD_SB_BITMAP_EVENTS_HI(sb) = (u32)(mddev->bitmap_events >> 32);
+        MD_SB_BITMAP_EVENTS_LO(sb) = (u32)mddev->bitmap_events;
+#endif /* MD_BITMAP_SUPPORT */
 
 	if (mddev->in_sync)
 	{
@@ -872,6 +925,11 @@ static int super_1_validate(mddev_t *mdd
 		mddev->raid_disks = le32_to_cpu(sb->raid_disks);
 		mddev->size = le64_to_cpu(sb->size)/2;
 		mddev->events = le64_to_cpu(sb->events);
+#ifdef MD_BITMAP_SUPPORT
+		mddev->bitmap_events =
+		    (((__u64)le32_to_cpu(MD_SB_BITMAP_EVENTS_HI_1(sb)))<<32)
+                    | (__u64)le32_to_cpu(MD_SB_BITMAP_EVENTS_LO_1(sb));
+#endif /* MD_BITMAP_SUPPORT */
 		
 		mddev->recovery_cp = le64_to_cpu(sb->resync_offset);
 		memcpy(mddev->uuid, sb->set_uuid, 16);
@@ -928,6 +986,10 @@ static void super_1_sync(mddev_t *mddev,
 
 	sb->utime = cpu_to_le64((__u64)mddev->utime);
 	sb->events = cpu_to_le64(mddev->events);
+#ifdef MD_BITMAP_SUPPORT
+        MD_SB_BITMAP_EVENTS_HI_1(sb) = (u32)cpu_to_le32(mddev->bitmap_events >> 32);
+        MD_SB_BITMAP_EVENTS_LO_1(sb) = (u32)cpu_to_le32(mddev->bitmap_events);
+#endif /* MD_BITMAP_SUPPORT */
 	if (mddev->in_sync)
 		sb->resync_offset = cpu_to_le64(mddev->recovery_cp);
 	else
@@ -998,6 +1060,91 @@ static int match_mddev_units(mddev_t *md
 	return 0;
 }
 
+#ifdef MD_BITMAP_SUPPORT
+static mdk_rdev_t *
+find_rdev_all (dev_t dev)
+{
+	struct list_head *tmp;
+        mddev_t *mddev;
+        static mdk_rdev_t * find_rdev(mddev_t *mddev, dev_t dev);
+
+        ITERATE_MDDEV(mddev, tmp) {
+                mdk_rdev_t *rdev = find_rdev(mddev, dev);
+                if (rdev)
+                        return rdev;
+        }
+        return NULL;
+}
+
+/*
+ * This is registered to other devices as a callback
+ */
+static int
+md_hot_cmd_disk(dev_t dev, int cmd) {
+
+        static int hot_add_disk(mddev_t * mddev, dev_t dev);
+        static int set_disk_faulty(mddev_t *mddev, dev_t dev);
+
+        mdk_rdev_t *rdev;
+        mddev_t *mddev;
+        int res;
+
+        rdev = find_rdev_all(dev);
+        if (!rdev)
+                return -EINVAL;
+        mddev = rdev->mddev;
+        if (!mddev)
+                return -EINVAL;
+
+        switch(cmd) {
+            case HOT_ADD_DISK:
+                res = hot_add_disk(mddev, dev);
+                return res;
+            case SET_DISK_FAULTY:
+               res = set_disk_faulty(mddev, dev);
+                return res;
+        }
+        return -EINVAL;
+}
+
+static void
+notify_device (mddev_t * mddev, dev_t dev)
+{
+#ifndef BLKMDNTFY
+#define BLKMDNTFY _IOW(0x12,133,int)
+#endif
+        struct block_device *bdev;
+
+        bdev = bdget (dev);
+        if (!bdev)
+                return;
+        printk (KERN_INFO "%s: notifying dev %x it is now in array\n",
+                mdname(mddev), dev);
+        ioctl_by_bdev (bdev, BLKMDNTFY, MKDEV (MD_MAJOR, mddev->md_minor));
+#ifndef BLKMDRGTR
+#define BLKMDRGTR _IOW(0x12,135,unsigned long)
+#endif
+        ioctl_by_bdev (bdev, BLKMDRGTR, (unsigned long)md_hot_cmd_disk);
+	bdput(bdev);
+}
+static void
+unnotify_device (mddev_t * mddev, dev_t dev)
+{
+#ifndef BLKMDUNTFY
+#define BLKMDUNTFY _IOW(0x12,134,int)
+#endif
+        struct block_device *bdev;
+
+        bdev = bdget (dev);
+        if (!bdev)
+                return;
+        printk (KERN_INFO "%s: notifying dev %x it is no longer in array\n",
+                mdname(mddev), dev);
+        ioctl_by_bdev(bdev, BLKMDUNTFY, MKDEV(MD_MAJOR, mddev->md_minor));
+	bdput(bdev);
+}
+#endif /* MD_BITMAP_SUPPORT */
+
 static LIST_HEAD(pending_raid_disks);
 
 static int bind_rdev_to_array(mdk_rdev_t * rdev, mddev_t * mddev)
@@ -1036,6 +1183,9 @@ static int bind_rdev_to_array(mdk_rdev_t
 	list_add(&rdev->same_set, &mddev->disks);
 	rdev->mddev = mddev;
 	printk(KERN_INFO "md: bind<%s>\n", bdevname(rdev->bdev,b));
+#ifdef MD_BITMAP_SUPPORT
+        notify_device(mddev, rdev->bdev->bd_inode->i_rdev);
+#endif /* MD_BITMAP_SUPPORT */
 	return 0;
 }
 
@@ -1046,6 +1196,9 @@ static void unbind_rdev_from_array(mdk_r
 		MD_BUG();
 		return;
 	}
+#ifdef MD_BITMAP_SUPPORT
+        unnotify_device(rdev->mddev, rdev->bdev->bd_inode->i_rdev);
+#endif /* MD_BITMAP_SUPPORT */
 	list_del_init(&rdev->same_set);
 	printk(KERN_INFO "md: unbind<%s>\n", bdevname(rdev->bdev,b));
 	rdev->mddev = NULL;
@@ -1075,6 +1228,10 @@ static int lock_rdev(mdk_rdev_t *rdev, d
 		blkdev_put(bdev);
 		return err;
 	}
+#ifdef MD_BITMAP_SUPPORT
+        printk(KERN_DEBUG "md %d: %s restores bdev %x rdev(%p)->bdev(%p)\n",
+                __LINE__, __FUNCTION__, dev, rdev, bdev);
+#endif /* MD_BITMAP_SUPPORT */
 	rdev->bdev = bdev;
 	return err;
 }
@@ -1082,6 +1239,9 @@ static int lock_rdev(mdk_rdev_t *rdev, d
 static void unlock_rdev(mdk_rdev_t *rdev)
 {
 	struct block_device *bdev = rdev->bdev;
+#ifdef MD_BITMAP_SUPPORT
+        printk(KERN_DEBUG "md %d: %s nulls bdev\n", __LINE__, __FUNCTION__);
+#endif /* MD_BITMAP_SUPPORT */
 	rdev->bdev = NULL;
 	if (!bdev)
 		MD_BUG();
@@ -1104,6 +1264,16 @@ static void export_rdev(mdk_rdev_t * rde
 	md_autodetect_dev(rdev->bdev->bd_dev);
 #endif
 	unlock_rdev(rdev);
+#ifdef MD_BITMAP_SUPPORT
+#ifdef CONFIG_MD_FR1
+            /* FIXME only kfree if pers hot_add does not kfree instead.
+             * Use some flag in future! 
+             */
+	printk(KERN_WARNING
+                "md: WARNING: delaying free of exported rdev %p\n", rdev);
+        if (0)
+#endif /* CONFIG_MD_FR1 */
+#endif /* MD_BITMAP_SUPPORT */
 	kfree(rdev);
 }
 
@@ -2169,6 +2339,10 @@ static int hot_add_disk(mddev_t * mddev,
 	int err;
 	unsigned int size;
 	mdk_rdev_t *rdev;
+#ifdef MD_BITMAP_SUPPORT
+        int hotrepair = 0;
+	mdp_super_t *sb;
+#endif /* MD_BITMAP_SUPPORT */
 
 	if (!mddev->pers)
 		return -ENODEV;
@@ -2186,7 +2360,42 @@ static int hot_add_disk(mddev_t * mddev,
 		return -EINVAL;
 	}
 
+#ifdef MD_BITMAP_SUPPORT
+        /*
+         * This is a do at most once loop because the remove in the loop will
+         * cause the test to fail the next time round. And if that
+         * doesn't break us out, then the hotrepair count will.
+         */
+        rdev = find_rdev(mddev, dev);
+        if (rdev) {
+                int mirror;
+
+        /* found it in array, so it's not yet been removed */
+               if (rdev->bdev->bd_inode->i_rdev != dev
+               || !rdev->faulty) {
+                       printk(KERN_WARNING "%s: cannot add existing component %x\n",
+                                mdname(mddev), dev);
+                       return -EBUSY;
+                }
+        /*
+         * Allow "hotrepair" of faulty device. Have rdev->faulty;
+         */
+                printk(KERN_WARNING "%s: repair of faulty disk %x!\n",
+                       mdname(mddev), dev);
+
+                mirror = rdev->raid_disk;
+                rdev->raid_disk = -1;
+                err = hot_remove_disk(mddev, dev);
+                if (err < 0) {
+                       printk(KERN_WARNING "%s: remove disk %x errored\n",
+                                mdname(mddev), dev);
+                       return err;       
+                }
+        }
+	rdev = md_import_device (dev, 0, 0); // PTB -1 == don't check sb
+#else
 	rdev = md_import_device (dev, -1, 0);
+#endif /* MD_BITMAP_SUPPORT */
 	if (IS_ERR(rdev)) {
 		printk(KERN_WARNING 
 			"md: error, md_import_device() returned %ld\n",
@@ -2219,6 +2428,71 @@ static int hot_add_disk(mddev_t * mddev,
 		err = -EINVAL;
 		goto abort_export;
 	}
+
+#ifdef MD_BITMAP_SUPPORT
+        printk(KERN_INFO "md: old uuid %x %x %x %x\n",
+                *(__u32 *)(mddev->uuid+0),
+                *(__u32 *)(mddev->uuid+4),
+                *(__u32 *)(mddev->uuid+8),
+                *(__u32 *)(mddev->uuid+12));
+        /* get the component's superblock */
+        if (!rdev->sb_loaded)
+                read_disk_sb(rdev);
+	sb = (mdp_super_t *)page_address(rdev->sb_page);
+        if (sb) {
+                printk(KERN_INFO "md: new uuid %x %x %x %x\n",
+                        sb->set_uuid0,
+                        sb->set_uuid1,
+                        sb->set_uuid2,
+                        sb->set_uuid3);
+        } else {
+                printk(KERN_INFO "md: new component device has no sb\n");
+        }
+        /* let's check the new disk sb at this poimt */
+        if (mddev->persistent && sb 
+                && sb->set_uuid0 == *(__u32 *)(mddev->uuid+0)
+                && sb->set_uuid1 == *(__u32 *)(mddev->uuid+4)
+                && sb->set_uuid2 == *(__u32 *)(mddev->uuid+8)
+                && sb->set_uuid3 == *(__u32 *)(mddev->uuid+12)) {
+                long long disk_events, bitmap_events;
+                disk_events   = sb->events_hi;
+                disk_events <<= 32;
+                disk_events  |= sb->events_lo;
+                bitmap_events = mddev->bitmap_events;
+
+                /* This is where we should examine conf->events_chkpt_*
+                 */
+                if (disk_events <  bitmap_events
+                &&  disk_events >= bitmap_events - 2
+                        ) {
+                        printk(KERN_WARNING
+                        "%s: warning - new disk %x nearly too old for repair (disk %Ld < bitmap %Ld)\n",
+                        mdname(mddev), dev, disk_events, bitmap_events);
+                }
+                if (disk_events < bitmap_events - 2) {
+                        /* new disk is too old! */
+                        printk(KERN_INFO
+                        "%s: new disk %x too old for repair (disk %Ld < bitmap %Ld)\n",
+                                mdname(mddev), dev, disk_events, bitmap_events);
+                        hotrepair = 0;
+                } else {
+                        printk(KERN_INFO
+                        "%s: repairing old mirror component %x (disk %Ld >= bitmap %Ld)\n",
+                                mdname(mddev), dev, disk_events, bitmap_events);
+                        hotrepair = 1;
+                }
+        } else if (!mddev->persistent && hotrepair) {
+                printk(KERN_INFO
+                        "md: forced repair of mirror component %x\n",
+                        dev);
+                hotrepair = 1;
+        } else {
+                /* failed match */
+                printk(KERN_INFO "md: adding new mirror component %x\n", dev);
+                hotrepair = 0;
+        }
+#endif /* MD_BITMAP_SUPPORT */
+
 	rdev->in_sync = 0;
 	rdev->desc_nr = -1;
 	bind_rdev_to_array(rdev, mddev);
@@ -2235,6 +2509,21 @@ static int hot_add_disk(mddev_t * mddev,
 		goto abort_unbind_export;
 	}
 
+#ifdef MD_BITMAP_SUPPORT
+        /*
+         * Maybe say something nice - 1 means we want to respect
+         * the bitmap in raid1 resync if there is one, 0
+         * means we need to kill any bitmap that we have been
+         * saving but we'll do it in the raid1 resync instead of here
+         */
+        printk(KERN_DEBUG "%s: set repair bit to %d on superblock %p\n",
+                mdname(mddev), hotrepair, mddev);
+        if (hotrepair)
+                set_bit  (MD_BITMAP_REPAIR, &mddev->recovery);
+        else
+                clear_bit(MD_BITMAP_REPAIR, &mddev->recovery);
+#endif /* MD_BITMAP_SUPPORT */
+
 	rdev->raid_disk = -1;
 
 	md_update_sb(mddev);
@@ -3241,6 +3530,10 @@ static void md_do_sync(mddev_t *mddev)
 	mddev_t *mddev2;
 	unsigned int currspeed = 0,
 		 window;
+#ifdef MD_BITMAP_SUPPORT
+        /* PTB add realspeed for i/o limiting calculation */
+        unsigned realspeed;
+#endif /* MD_BITMAP_SUPPORT */
 	sector_t max_sectors,j;
 	unsigned long mark[SYNC_MARKS];
 	sector_t mark_cnt[SYNC_MARKS];
@@ -3319,7 +3612,7 @@ static void md_do_sync(mddev_t *mddev)
 		/* recovery follows the physical size of devices */
 		max_sectors = mddev->size << 1;
 
-	printk(KERN_INFO "md: syncing RAID array %s\n", mdname(mddev));
+	printk(KERN_INFO "md: syncing RAID array %s)\n", mdname(mddev));
 	printk(KERN_INFO "md: minimum _guaranteed_ reconstruction speed:"
 		" %d KB/sec/disc.\n", sysctl_speed_limit_min);
 	printk(KERN_INFO "md: using maximum available idle IO bandwith "
@@ -3349,6 +3642,9 @@ static void md_do_sync(mddev_t *mddev)
 	atomic_set(&mddev->recovery_active, 0);
 	init_waitqueue_head(&mddev->recovery_wait);
 	last_check = 0;
+#ifdef MD_BITMAP_SUPPORT
+        atomic_set(&md_throttle[mddev->md_minor], 0);
+#endif /* MD_BITMAP_SUPPORT */
 
 	if (j>2) {
 		printk(KERN_INFO 
@@ -3385,6 +3681,10 @@ static void md_do_sync(mddev_t *mddev)
 
 			mddev->resync_mark = mark[next];
 			mddev->resync_mark_cnt = mark_cnt[next];
+#ifdef MD_BITMAP_SUPPORT
+                        /* PTB reset count of skipped blocks this mark */
+                        atomic_set(&md_throttle[mddev->md_minor], 0);
+#endif /* MD_BITMAP_SUPPORT */
 			mark[next] = jiffies;
 			mark_cnt[next] = j - atomic_read(&mddev->recovery_active);
 			last_mark = next;
@@ -3414,10 +3714,23 @@ static void md_do_sync(mddev_t *mddev)
 		cond_resched();
 
 		currspeed = ((unsigned long)(j-mddev->resync_mark_cnt))/2/((jiffies-mddev->resync_mark)/HZ +1) +1;
+#ifdef MD_BITMAP_SUPPORT
+                /*
+                 * some of the blocks are skipped, not synced, so
+                 * should not count when limiting i/o. Let personality say.
+                 */
+                realspeed = ((unsigned long)(j - mddev->resync_mark_cnt - atomic_read(&md_throttle[mddev->md_minor])))/2/((jiffies-mddev->resync_mark)/HZ +1) +1;
+#endif /* MD_BITMAP_SUPPORT */
 
 		if (currspeed > sysctl_speed_limit_min) {
+#ifdef MD_BITMAP_SUPPORT
+                        /* PTB use realspeed for upper limit on i/o */
+			if ((realspeed > sysctl_speed_limit_max) ||
+					!is_mddev_idle(mddev)) {
+#else
 			if ((currspeed > sysctl_speed_limit_max) ||
 					!is_mddev_idle(mddev)) {
+#endif /* MD_BITMAP_SUPPORT */
 				msleep_interruptible(250);
 				goto repeat;
 			}
@@ -3515,15 +3828,35 @@ void md_check_recovery(mddev_t *mddev)
 				mddev->pers->spare_active(mddev);
 			}
 			md_update_sb(mddev);
+#ifdef MD_BITMAP_SUPPORT
+                        mddev->recovery &= 1 << MD_BITMAP_REPAIR;
+
+                        printk(KERN_ERR
+                            "%s: md_check_recovery 1:"
+                            " repair bit %lx on sb %p preserved\n",
+                            mdname(mddev), mddev->recovery, mddev);
+#else
 			mddev->recovery = 0;
+#endif /* MD_BITMAP_SUPPORT */
 			/* flag recovery needed just to double check */
 			set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
 			goto unlock;
 		}
 		if (mddev->recovery)
 			/* probably just the RECOVERY_NEEDED flag */
+#ifdef MD_BITMAP_SUPPORT
+                {
+                        mddev->recovery &= 1 << MD_BITMAP_REPAIR;
+
+                        printk(KERN_ERR
+                            "%s: md_check_recovery 2:"
+                            " repair bit %lx on sb %p preserved\n",
+                            mdname(mddev), mddev->recovery, mddev);
+                }
+#else
 			mddev->recovery = 0;
-
+			/* flag recovery needed just to double check */
+#endif /* MD_BITMAP_SUPPORT */
 		/* no recovery is running.
 		 * remove any failed drives, then
 		 * add spares if possible.
@@ -3565,7 +3898,16 @@ void md_check_recovery(mddev_t *mddev)
 					" thread...\n", 
 					mdname(mddev));
 				/* leave the spares where they are, it shouldn't hurt */
+#ifdef MD_BITMAP_SUPPORT
+                                mddev->recovery &= 1 << MD_BITMAP_REPAIR;
+
+                                printk(KERN_ERR
+                                    "%s: md_check_recovery 3:"
+                                    " repair bit %lx on sb %p preserved\n",
+                                    mdname(mddev), mddev->recovery, mddev);
+#else
 				mddev->recovery = 0;
+#endif /* MD_BITMAP_SUPPORT */
 			} else {
 				md_wakeup_thread(mddev->sync_thread);
 			}
@@ -3745,4 +4087,7 @@ EXPORT_SYMBOL(md_unregister_thread);
 EXPORT_SYMBOL(md_wakeup_thread);
 EXPORT_SYMBOL(md_print_devices);
 EXPORT_SYMBOL(md_check_recovery);
+#ifdef MD_BITMAP_SUPPORT
+EXPORT_SYMBOL(md_throttle);
+#endif /* MD_BITMAP_SUPPORT */
 MODULE_LICENSE("GPL");
--- linux-2.6/drivers/md/Kconfig.orig	2005-07-13 13:27:18.467236952 +0200
+++ linux-2.6/drivers/md/Kconfig	2005-07-18 00:49:28.215689466 +0200
@@ -104,6 +104,29 @@ config MD_RAID10
 
 	  If unsure, say Y.
 
+config MD_FR1
+        bool "FR-1 (fast intelligent mirroring) mode (EXPERIMENTAL)"
+        depends on BLK_DEV_MD && EXPERIMENTAL && MD_RAID1
+        ---help---
+          This driver offers a faster software RAID-1 performance than
+          standard RAID1 when resynchronizing disks and reading and has
+          various optimizations designed to automate administration.
+
+          If you want to use the FR-1 driver instead of a standard RAID1
+          driver, say Y. This option modifies the raid1 code directly.
+
+          If unsure, say N.
+
+config MD_RAID1_ROBUST_READ
+        bool "Robust reads for RAID1 (EXPERIMENTAL)"
+        depends on BLK_DEV_MD && EXPERIMENTAL && MD_RAID1
+        ---help---
+          This option makes RAID1 more robust in the face of read
+          errors from component disks. The disk will not be faulted but
+          the read will be retried from redundent data.
+
+          If unsure, say N.
+
 config MD_RAID5
 	tristate "RAID-4/RAID-5 mode"
 	depends on BLK_DEV_MD
@@ -161,6 +184,26 @@ config MD_MULTIPATH
 
 	  If unsure, say N.
 
+config MD_BITMAP
+        tristate "Bitmap support for fast raid (EXPERIMENTAL)"
+        depends on MD_FR1
+        ---help---
+          This driver provides bitmap support for Fast RAID.
+
+          Information about Software RAID on Linux is contained in the
+          Software-RAID mini-HOWTO, available from
+          <http://www.tldp.org/docs.html#howto>.  There you will also
+          learn where to get the supporting user space utilities raidtools.
+
+          If you want to use any of the Fast RAID driver options like FR1,
+          say Y.  This code is also available as a module called
+          bitmap.ko ( = code which can be inserted in and removed from
+          the running kernel whenever you want).  If you want to compile
+          it as a module, say M here and read
+          <file:Documentation/modules.txt>. 
+
+          If unsure, say N.
+
 config MD_FAULTY
 	tristate "Faulty test module for MD"
 	depends on BLK_DEV_MD
--- linux-2.6/drivers/md/Makefile.orig	2005-07-13 13:27:25.354002964 +0200
+++ linux-2.6/drivers/md/Makefile	2005-07-18 00:49:43.003039832 +0200
@@ -26,6 +26,7 @@ obj-$(CONFIG_MD_RAID1)		+= raid1.o
 obj-$(CONFIG_MD_RAID10)		+= raid10.o
 obj-$(CONFIG_MD_RAID5)		+= raid5.o xor.o
 obj-$(CONFIG_MD_RAID6)		+= raid6.o xor.o
+obj-$(CONFIG_MD_BITMAP)		+= bitmap.o
 obj-$(CONFIG_MD_MULTIPATH)	+= multipath.o
 obj-$(CONFIG_MD_FAULTY)		+= faulty.o
 obj-$(CONFIG_BLK_DEV_MD)	+= md.o

