--- linux-2.4.30/include/linux/raid/raid1.h.pre-fr1	Sun Aug 12 21:39:02 2001
+++ linux-2.4.30/include/linux/raid/raid1.h	Wed Apr  6 19:17:25 2005
@@ -59,6 +59,15 @@
 	md_wait_queue_head_t	wait_done;
 	md_wait_queue_head_t	wait_ready;
 	md_spinlock_t		segment_lock;
+
+	long                    last_clean_sector;  /* helps debugging */
+	long                    last_dirty_sector;
+	int                     sync_mode;          /* clean/dirty in sync? */
+	void                    *bitmap;            /* the array bitmap */
+	int                     bitmap_dirty;       /* flag */
+	int                     latency[MD_SB_DISKS];
+	int                     last_source;        /* PTB disk read from */
+
 };
 
 typedef struct raid1_private_data raid1_conf_t;
@@ -86,9 +95,19 @@
 	struct buffer_head	*mirror_bh_list;
 	struct buffer_head	bh_req;
 	struct raid1_bh		*next_r1;	/* next for retry or in free list */
+	int		        nonoperational; /* no of bad mirror comps */
+        unsigned long           start_jiffies;  /* PTB when i/o started */
 };
 /* bits for raid1_bh.state */
 #define	R1BH_Uptodate	1
 #define	R1BH_SyncPhase	2
 #define	R1BH_PreAlloc	3	/* this was pre-allocated, add to free list */
+#define	R1BH_AsyncPhase 4
+#define	R1BH_AsyncIO    5
+#ifdef CONFIG_MD_RAID1_ROBUST_READ
+#define	R1BH_ReadRetry  6
+#endif /* CONFIG_MD_RAID1_ROBUST_READ */
+#ifdef CONFIG_MD_RAID1_READ_WRITE_CORRECT
+#define	R1BH_ReadRewrite 7
+#endif /* CONFIG_MD_RAID1_READ_WRITE_CORRECT */
 #endif
--- linux-2.4.30/include/linux/raid/md_p.h.pre-fr1	Tue Nov 14 22:16:37 2000
+++ linux-2.4.30/include/linux/raid/md_p.h	Wed Apr  6 18:18:04 2005
@@ -66,7 +66,7 @@
 #define MD_SB_GENERIC_WORDS		(MD_SB_GENERIC_CONSTANT_WORDS + MD_SB_GENERIC_STATE_WORDS)
 #define MD_SB_PERSONALITY_WORDS		64
 #define MD_SB_DESCRIPTOR_WORDS		32
-#define MD_SB_DISKS			27
+#define MD_SB_DISKS			26
 #define MD_SB_DISKS_WORDS		(MD_SB_DISKS*MD_SB_DESCRIPTOR_WORDS)
 #define MD_SB_RESERVED_WORDS		(1024 - MD_SB_GENERIC_WORDS - MD_SB_PERSONALITY_WORDS - MD_SB_DISKS_WORDS - MD_SB_DESCRIPTOR_WORDS)
 #define MD_SB_EQUAL_WORDS		(MD_SB_GENERIC_WORDS + MD_SB_PERSONALITY_WORDS + MD_SB_DISKS_WORDS)
@@ -154,6 +154,9 @@
 	/*
 	 * Reserved
 	 */
+#define MD_SB_BITMAP_REPAIR(sb) (sb)->reserved[0]
+#define MD_SB_EVENTS_LO(sb)     (sb)->reserved[2]
+#define MD_SB_EVENTS_HI(sb)     (sb)->reserved[3]
 	__u32 reserved[MD_SB_RESERVED_WORDS];
 
 	/*
--- linux-2.4.30/drivers/md/md.c.pre-fr1	Mon Aug 25 13:44:42 2003
+++ linux-2.4.30/drivers/md/md.c	Wed Apr  6 22:41:29 2005
@@ -26,6 +26,19 @@
    You should have received a copy of the GNU General Public License
    (for example /usr/src/linux/COPYING); if not, write to the Free
    Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+
+   Changes 31/1/2003 by Peter T.  Breuer <ptb@it.uc3m.es> to support
+   hotadd directly after setfaulty without intervening hotremove
+   ("hotrepair") when there is no persistent superblock, and to flag a
+   potential hotrepair when an old disk is re-added and the uuid matches
+   ours.  The flag is used by the raid1 driver, at the moment, in order
+   to trigger an intelligent resync.
+  
+   Yet more changes by PTB 12/3/2003 to notify devices via ioctls when
+   they have been incorporated or removed from a raid array.
+  
+   Yet more changes by PTB 26/3/2004 to make the speed calculations
+   appropriate to fr1, and throttle by real i/o, not resync total.
 */
 
 #include <linux/module.h>
@@ -108,6 +121,13 @@
 static int md_hardsect_sizes[MAX_MD_DEVS];
 static int md_maxreadahead[MAX_MD_DEVS];
 static mdk_thread_t *md_recovery_thread;
+#if defined(CONFIG_MD_BITMAP) || defined(CONFIG_MD_BITMAP_MODULE)
+/* PTB md_throttle permits speed calculation adjustments from personality */
+#ifdef MODULE
+static
+#endif /* MODULE */
+atomic_t md_throttle[MAX_MD_DEVS];
+#endif /* MD_BITMAP_SUPPORT */
 
 int md_size[MAX_MD_DEVS];
 
@@ -524,7 +544,8 @@
 		printk(NO_SB,partition_name(dev));
 		return -EINVAL;
 	}
-	printk(KERN_INFO " [events: %08lx]\n", (unsigned long)rdev->sb->events_lo);
+	printk(KERN_INFO "%s (read) [events: %08lx]\n",
+                partition_name(rdev->dev), (unsigned long)rdev->sb->events_lo);
 	ret = 0;
 abort:
 	return ret;
@@ -611,6 +632,68 @@
 	return 0;
 }
 
+static int
+md_hot_add_disk(kdev_t dev, int cmd) {
+
+        static mdk_rdev_t * find_rdev_all(kdev_t dev);
+        static int hot_add_disk(mddev_t * mddev, kdev_t dev);
+        static int set_disk_faulty(mddev_t *mddev, kdev_t dev);
+
+        mdk_rdev_t *rdev;
+        mddev_t *mddev;
+        int res;
+
+        rdev = find_rdev_all(dev);
+        if (!rdev)
+                return -EINVAL;
+        mddev = rdev->mddev;
+        if (!mddev)
+                return -EINVAL;
+
+        switch(cmd) {
+            case HOT_ADD_DISK:
+                res = hot_add_disk(mddev, dev);
+                return res;
+            case SET_DISK_FAULTY:
+	        res = set_disk_faulty(mddev, dev);
+                return res;
+        }
+        return -EINVAL;
+}
+
+#if defined(CONFIG_MD_BITMAP) || defined(CONFIG_MD_BITMAP_MODULE)
+static void
+notify_device (mddev_t * mddev, kdev_t dev)
+{
+#ifndef BLKMDNTFY
+#define BLKMDNTFY _IOW(0x12,133,int)
+#endif
+	struct block_device *bdev;
+	printk (KERN_INFO "md%d: notifying dev %x\n", mdidx(mddev), dev);
+        bdev = bdget (dev);
+	if (!bdev)
+                return;
+        ioctl_by_bdev (bdev, BLKMDNTFY, MKDEV (MD_MAJOR, mddev->__minor));
+#ifndef BLKMDRGTR
+#define BLKMDRGTR _IOW(0x12,135,unsigned long)
+#endif
+        ioctl_by_bdev (bdev, BLKMDRGTR, (unsigned long)md_hot_add_disk);
+}
+static void
+unnotify_device (mddev_t * mddev, kdev_t dev)
+{
+#ifndef BLKMDUNTFY
+#define BLKMDUNTFY _IOW(0x12,134,int)
+#endif
+	struct block_device *bdev;
+	printk (KERN_INFO "md%d: unnotifying dev %x\n", mdidx(mddev), dev);
+        bdev = bdget (dev);
+	if (!bdev)
+                return;
+        ioctl_by_bdev(bdev, BLKMDUNTFY, MKDEV(MD_MAJOR, mddev->__minor));
+}
+#endif /* MD_BITMAP_SUPPORT */
+
 static MD_LIST_HEAD(all_raid_disks);
 static MD_LIST_HEAD(pending_raid_disks);
 
@@ -634,6 +717,9 @@
 	rdev->mddev = mddev;
 	mddev->nb_dev++;
 	printk(KERN_INFO "md: bind<%s,%d>\n", partition_name(rdev->dev), mddev->nb_dev);
+#if defined(CONFIG_MD_BITMAP) || defined(CONFIG_MD_BITMAP_MODULE)
+        notify_device(mddev, rdev->dev);
+#endif /* MD_BITMAP_SUPPORT */
 }
 
 static void unbind_rdev_from_array(mdk_rdev_t * rdev)
@@ -642,6 +728,9 @@
 		MD_BUG();
 		return;
 	}
+#if defined(CONFIG_MD_BITMAP) || defined(CONFIG_MD_BITMAP_MODULE)
+        unnotify_device(rdev->mddev, rdev->dev);
+#endif /* MD_BITMAP_SUPPORT */
 	md_list_del(&rdev->same_set);
 	MD_INIT_LIST_HEAD(&rdev->same_set);
 	rdev->mddev->nb_dev--;
@@ -2383,6 +2472,9 @@
 	unsigned int size;
 	mdk_rdev_t *rdev;
 	mdp_disk_t *disk;
+#if defined(CONFIG_MD_BITMAP) || defined(CONFIG_MD_BITMAP_MODULE)
+        int hotrepair = 0;
+#endif /* MD_BITMAP_SUPPORT */
 
 	if (!mddev->pers)
 		return -ENODEV;
@@ -2398,11 +2490,48 @@
 
 	persistent = !mddev->sb->not_persistent;
 
+#if defined(CONFIG_MD_BITMAP) || defined(CONFIG_MD_BITMAP_MODULE)
+        /*
+         * This is a do at most once loop because the remove in the loop will
+         * cause the test to fail the next time round. And if that
+         * doesn't break us out, then the hotrepair count will.
+         */
+        while ((rdev = find_rdev(mddev, dev)) != NULL) {
+
+	        if (hotrepair || rdev->dev != dev || !rdev->faulty) {
+	                printk(KERN_WARNING "md%d: cannot add existing component %x\n",
+                                mdidx(mddev), dev);
+	                return -EBUSY;
+                }
+        /*
+         * Allow "hotrepair" of merely faulty device too if no superblock to
+         * go by or (later) if there is a matching superblock. We assume then
+         * that hotadd after setfaulty of the same device is a
+         * hotrepair.
+         */
+	        printk(KERN_WARNING "md%d: repair of faulty disk %x!\n",
+	                mdidx(mddev), dev);
+
+         /* Remove will cause find_rdev to fail next time */
+	        err = hot_remove_disk(mddev, dev);
+                if (err < 0) {
+	                printk(KERN_WARNING "md%d: remove disk %x errored\n",
+                                mdidx(mddev), dev);
+	                return err;       
+                }
+        /* This will inevitably error us out of the loop interior next time */
+                hotrepair = 1;
+                rdev = NULL;
+        }
+
+	err = md_import_device (dev, persistent);
+#else
 	rdev = find_rdev(mddev, dev);
 	if (rdev)
 		return -EBUSY;
 
 	err = md_import_device (dev, 0);
+#endif /* MD_BITMAP_SUPPORT */
 	if (err) {
 		printk(KERN_WARNING "md: error, md_import_device() returned %d\n", err);
 		return -EINVAL;
@@ -2426,6 +2554,58 @@
 		err = -ENOSPC;
 		goto abort_export;
 	}
+#if defined(CONFIG_MD_BITMAP) || defined(CONFIG_MD_BITMAP_MODULE)
+        /* let's check the new disk sb at this poimt */
+        if (persistent && rdev->sb 
+                && rdev->sb->set_uuid0 == mddev->sb->set_uuid0
+                && rdev->sb->set_uuid1 == mddev->sb->set_uuid1
+                && rdev->sb->set_uuid2 == mddev->sb->set_uuid2
+                && rdev->sb->set_uuid3 == mddev->sb->set_uuid3) {
+                unsigned long long disk_events, bitmap_events;
+                disk_events = rdev->sb->events_lo;
+                bitmap_events = 0;
+                //bitmap_events |= MD_SB_EVENTS_HI(mddev->sb);
+                //bitmap_events <<= 32;
+                bitmap_events |= MD_SB_EVENTS_LO(mddev->sb);
+
+                /* This is where we should examine conf->events_chkpt_*
+                 */
+                if (disk_events == bitmap_events - 1) {
+                        printk(KERN_WARNING "md%d: warning - new disk %x nearly too old for repair (disk %Lu < bitmap %Lu)\n",
+                        mdidx(mddev), dev, disk_events, bitmap_events);
+                }
+                if (disk_events < bitmap_events - 1) {
+                        /* new disk is too old! */
+                        hotrepair = 0;
+                        printk(KERN_INFO "md%d: new disk %x too old for repair (disk %Lu < bitmap %Lu)\n",
+                                mdidx(mddev), dev, disk_events, bitmap_events);
+                } else {
+                        hotrepair = 1;
+                        printk(KERN_INFO "md%d: repairing old mirror component %x (disk %Lu >= bitmap %Lu)\n",
+                                mdidx(mddev), dev, disk_events, bitmap_events);
+                }
+        } else if (!persistent && hotrepair) {
+                hotrepair = 1;
+                printk(KERN_INFO "md: forced repair of mirror component %x\n",
+                        dev);
+        } else {
+                /* failed match */
+                hotrepair = 0;
+                printk(KERN_INFO "md: adding new mirror component %x\n",
+                        dev);
+                printk(KERN_DEBUG "md: old uuid %x %x %x %x\n",
+                        mddev->sb->set_uuid0,
+                        mddev->sb->set_uuid1,
+                        mddev->sb->set_uuid2,
+                        mddev->sb->set_uuid3);
+                printk(KERN_DEBUG "md: new uuid %x %x %x %x\n",
+                        rdev->sb->set_uuid0,
+                        rdev->sb->set_uuid1,
+                        rdev->sb->set_uuid2,
+                        rdev->sb->set_uuid3);
+        }
+#endif /* MD_BITMAP_SUPPORT */
+
 	bind_rdev_to_array(rdev, mddev);
 
 	/*
@@ -2480,6 +2660,17 @@
 	mddev->sb->spare_disks++;
 	mddev->sb->working_disks++;
 
+#if defined(CONFIG_MD_BITMAP) || defined(CONFIG_MD_BITMAP_MODULE)
+        /*
+         * Maybe say something nice - 1 means we want to respect
+         * the bitmap in raid1 resync if there is one, 0
+         * means we need to kill any bitmap that we have been
+         * saving but we'll do it in the raid1 resync instead of here
+         */
+        printk(KERN_DEBUG "md%d: set repair bit to %d on superblock\n",
+                mdidx(mddev), hotrepair);
+        MD_SB_BITMAP_REPAIR(mddev->sb) = hotrepair;
+#endif /* MD_BITMAP_SUPPORT */
 	mddev->sb_dirty = 1;
 	md_update_sb(mddev);
 
@@ -3419,6 +3610,10 @@
 	mddev_t *mddev2;
 	unsigned int max_sectors, currspeed,
 		j, window, err, serialize;
+#if defined(CONFIG_MD_BITMAP) || defined(CONFIG_MD_BITMAP_MODULE)
+	/* PTB add realspeed for i/o limiting calculation */
+	unsigned realspeed;
+#endif /* MD_BITMAP_SUPPORT */
 	unsigned long mark[SYNC_MARKS];
 	unsigned long mark_cnt[SYNC_MARKS];
 	int last_mark,m;
@@ -3488,6 +3683,9 @@
 	atomic_set(&mddev->recovery_active, 0);
 	init_waitqueue_head(&mddev->recovery_wait);
 	last_check = 0;
+#if defined(CONFIG_MD_BITMAP) || defined(CONFIG_MD_BITMAP_MODULE)
+        atomic_set(&md_throttle[mdidx(mddev)], 0);
+#endif /* MD_BITMAP_SUPPORT */
 	for (j = 0; j < max_sectors;) {
 		int sectors;
 
@@ -3515,6 +3713,10 @@
 
 			mddev->resync_mark = mark[next];
 			mddev->resync_mark_cnt = mark_cnt[next];
+#if defined(CONFIG_MD_BITMAP) || defined(CONFIG_MD_BITMAP_MODULE)
+                        /* PTB reset count of skipped blocks this mark */
+                        atomic_set(&md_throttle[mdidx(mddev)], 0);
+#endif /* MD_BITMAP_SUPPORT */
 			mark[next] = jiffies;
 			mark_cnt[next] = j - atomic_read(&mddev->recovery_active);
 			last_mark = next;
@@ -3540,16 +3742,34 @@
 		 * about not overloading the IO subsystem. (things like an
 		 * e2fsck being done on the RAID array should execute fast)
 		 */
-		if (md_need_resched(current))
-			schedule();
+		if (md_need_resched(current)) {
+			/* PTB this seems not to progress when over loop dev */
+ 
+			current->state = TASK_INTERRUPTIBLE;
+			md_schedule_timeout(1);
+                }
 
 		currspeed = (j-mddev->resync_mark_cnt)/2/((jiffies-mddev->resync_mark)/HZ +1) +1;
 
+#if defined(CONFIG_MD_BITMAP) || defined(CONFIG_MD_BITMAP_MODULE)
+                /*
+                 * PTB some of the blocks are skipped, not synced, so
+                 * should not count when limiting i/o. Let personality say.
+                 */
+		realspeed = (j - mddev->resync_mark_cnt - atomic_read(&md_throttle[mdidx(mddev)]))/2/((jiffies-mddev->resync_mark)/HZ +1) +1;
+#endif /* MD_BITMAP_SUPPORT */
+ 
 		if (currspeed > sysctl_speed_limit_min) {
 			current->nice = 19;
 
-			if ((currspeed > sysctl_speed_limit_max) ||
-					!is_mddev_idle(mddev)) {
+			if (
+#if defined(CONFIG_MD_BITMAP) || defined(CONFIG_MD_BITMAP_MODULE)
+                        /* PTB use realspeed for upper limit on i/o */
+                            (realspeed > sysctl_speed_limit_max) ||
+#else
+                            (currspeed > sysctl_speed_limit_max) ||
+#endif /* MD_BITMAP_SUPPORT */
+                                        !is_mddev_idle(mddev)) {
 				current->state = TASK_INTERRUPTIBLE;
 				md_schedule_timeout(HZ/4);
 				goto repeat;
@@ -4108,6 +4328,9 @@
 }
 #endif
 
+#if defined(CONFIG_MD_BITMAP) || defined(CONFIG_MD_BITMAP_MODULE)
+MD_EXPORT_SYMBOL(md_throttle);
+#endif /* MD_BITMAP_SUPPORT */
 MD_EXPORT_SYMBOL(md_size);
 MD_EXPORT_SYMBOL(register_md_personality);
 MD_EXPORT_SYMBOL(unregister_md_personality);
--- linux-2.4.30/drivers/md/raid1.c.pre-fr1	Mon Apr  4 03:42:19 2005
+++ linux-2.4.30/drivers/md/raid1.c	Wed Apr  6 22:38:41 2005
@@ -20,6 +20,28 @@
  * You should have received a copy of the GNU General Public License
  * (for example /usr/src/linux/COPYING); if not, write to the Free
  * Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ *
+ * Changes by Peter T. Breuer <ptb@it.uc3m.es> 31/1/2003 to support
+ * bitmapped intelligence in resync:
+ *
+ *      - bitmap attached on setfaulty (mark bad)
+ *      - bitmap marked during normal i/o if faulty disk
+ *      - bitmap used to skip nondirty blocks during sync
+ *      - bitmap removed on set active
+ *
+ *   Minor changes are needed in raid1.h (extra fields in conf) and in
+ *   md.c (support hotadd directly after setfaulty, or disk recognition).
+ *
+ * More changes by PTB 20/2/2003 to let the bitmap always be present and
+ * thus allow asynchronous mirror writes by using it as a journal log.
+ *
+ * Changes by PTB 10/8/2004 to redo read-balancing so that it reads
+ * from the fastest disk, as determined by latency testing every so
+ * often.
+ *
+ * Changes by PTB 6/1/2005 to make read errors not fault the disk out
+ * of the array but cause retries instead. And also (with CORRECT set)
+ * trigger rewrite of the bad sector.
  */
 
 #include <linux/module.h>
@@ -32,7 +54,15 @@
 #define MD_DRIVER
 #define MD_PERSONALITY
 
+#if defined(CONFIG_MD_FR1) || defined(CONFIG_MD_FR1_MODULE)
+#define MAX_WORK_PER_DISK (128 * 8)
+#define MAX_TEST_PER_DISK 64
+#define LATENCY_OLD_WEIGHT 9
+#define LATENCY_NEW_WEIGHT 1
+#define LATENCY_SUM_WEIGHT (LATENCY_OLD_WEIGHT + LATENCY_NEW_WEIGHT)
+#else
 #define MAX_WORK_PER_DISK 128
+#endif /* CONFIG_MD_FR1 */
 
 #define	NR_RESERVED_BUFS	32
 
@@ -50,11 +80,19 @@
 #define PRINTK(x...)  do { } while (0)
 #endif
 
+#if defined(CONFIG_MD_FR1) || defined(CONFIG_MD_FR1_MODULE)
+#include "bitmap.h"
+#endif /* CONFIG_MD_FR1 */
 
 static mdk_personality_t raid1_personality;
 static md_spinlock_t retry_list_lock = MD_SPIN_LOCK_UNLOCKED;
 struct raid1_bh *raid1_retry_list = NULL, **raid1_retry_tail;
 
+#if defined(CONFIG_MD_FR1) || defined(CONFIG_MD_FR1_MODULE)
+/* module params */
+static int async;   /* async writes */
+#endif /* CONFIG_MD_FR1 */
+
 static struct buffer_head *raid1_alloc_bh(raid1_conf_t *conf, int cnt)
 {
 	/* return a linked list of "cnt" struct buffer_heads.
@@ -325,6 +363,9 @@
 {
 	raid1_conf_t *conf = mddev_to_conf(mddev);
 	int i, disks = MD_SB_DISKS;
+#ifdef CONFIG_MD_RAID1_ROBUST_READ
+        kdev_t dev = *rdev;
+#endif /* CONFIG_MD_RAID1_READ_WRITE_CORRECT */
 	unsigned long flags;
 
 	/*
@@ -332,6 +373,30 @@
 	 * now we use the first available disk.
 	 */
 
+#ifdef CONFIG_MD_RAID1_ROBUST_READ
+        /*
+         * Uh, no. Choose the next disk if we can, not the first.
+         */
+	md_spin_lock_irqsave(&conf->device_lock, flags);
+	for (i = 0; i < conf->raid_disks; i++) {
+		if (conf->mirrors[i].dev == dev)
+                    	break;
+        }
+        i++;
+	if (i >= conf->raid_disks)
+		i = 0;
+	for (; i < conf->raid_disks; i++) {
+		if (conf->mirrors[i].operational) {
+			*rdev = conf->mirrors[i].dev;
+			return (0);
+		}
+        }
+	md_spin_unlock_irqrestore(&conf->device_lock, flags);
+        /*
+         * If for some reason we found nothing, dropthru and use the old
+         * routine.
+         */
+#endif /* CONFIG_MD_RAID1_READ_WRITE_CORRECT */
 	md_spin_lock_irqsave(&conf->device_lock, flags);
 	for (i = 0; i < disks; i++) {
 		if (conf->mirrors[i].operational) {
@@ -400,13 +465,65 @@
 static void raid1_end_bh_io (struct raid1_bh *r1_bh, int uptodate)
 {
 	struct buffer_head *bh = r1_bh->master_bh;
+#if defined(CONFIG_MD_FR1) || defined(CONFIG_MD_FR1_MODULE)
+        raid1_conf_t * conf = mddev_to_conf(r1_bh->mddev);
 
+        /* if nobody has done the final end_io yet, do it now */
+	if (!test_and_set_bit(R1BH_AsyncPhase, &r1_bh->state)) {
+
+                PRINTK(KERN_DEBUG "raid1: sync end i/o on sectors %lu-%lu\n",
+                        bh->b_rsector, bh->b_rsector + (bh->b_size >> 9) - 1);
 
+	        io_request_done(bh->b_rsector, conf,
+	                test_bit(R1BH_SyncPhase, &r1_bh->state));
+	        bh->b_end_io(bh, uptodate);
+        } 
+#else
 	io_request_done(bh->b_rsector, mddev_to_conf(r1_bh->mddev),
 	                test_bit(R1BH_SyncPhase, &r1_bh->state));
 	bh->b_end_io(bh, uptodate);
+#endif /* CONFIG_MD_FR1 */
+
+#if defined(CONFIG_MD_FR1) || defined(CONFIG_MD_FR1_MODULE)
+        /* if we should mark the bitmap clean, do so */
+        if (uptodate && r1_bh->cmd == WRITE && r1_bh->nonoperational <= 0) {
+                struct bitmap * bitmap = conf->bitmap;
+                if (bitmap && bitmap->active(bitmap)) {
+                        bitmap->clearbits(bitmap, (bitmap_offset_t) (bh->b_rsector >> 1), bh->b_size >> 10);
+                }
+        }
+        /* PTB calculate the latency of the read device */
+        if (uptodate && (r1_bh->cmd == READ || r1_bh->cmd == READA)) {
+                unsigned long latency = jiffies - r1_bh->start_jiffies;
+                kdev_t dev = (&r1_bh->bh_req)->b_dev;
+                int i;
+
+                /* PTB find the mirror component being read */
+                for (i = 0; i < conf->raid_disks; i++) {
+                    if (conf->mirrors[i].dev == dev)
+                        break;
+                }
+                if (i < conf->raid_disks) {
+                        if (latency < 120 * HZ && latency >= 0) {
+                            /* PTB count in 1/10ths if we have total
+                             *     weights 9+1 = 10 */
+                                latency *= LATENCY_SUM_WEIGHT * LATENCY_SUM_WEIGHT;
+                                conf->latency[i] = LATENCY_OLD_WEIGHT * conf->latency[i]
+                                                 + LATENCY_NEW_WEIGHT * latency;
+                                conf->latency[i] /= LATENCY_SUM_WEIGHT;
+                        } else {
+		               printk(KERN_ERR "raid1: bad latency %lu jiffies\n", 
+			         latency);
+                        }
+                } else {
+		       printk(KERN_ERR "raid1: could not find dev %02x:%02x\n", 
+                               MAJOR(dev), MINOR(dev));
+                }
+        }
+#endif /* CONFIG_MD_FR1 */
         raid1_free_r1bh(r1_bh);
 }
+
 void raid1_end_request (struct buffer_head *bh, int uptodate)
 {
 	struct raid1_bh * r1_bh = (struct raid1_bh *)(bh->b_private);
@@ -414,9 +531,27 @@
 	/*
 	 * this branch is our 'one mirror IO has finished' event handler:
 	 */
-	if (!uptodate)
-		md_error (r1_bh->mddev, bh->b_dev);
-	else
+	if (!uptodate) {
+#ifdef CONFIG_MD_RAID1_ROBUST_READ
+		/*
+		 * Only fault disk out of array on write error, not read.
+		 */
+                if (r1_bh->cmd == WRITE)
+                       	if (printk(KERN_ALERT
+                          "raid1: erroring bh WRITE for sector %ld\n",
+                                  bh->b_rsector), 1)
+#endif /* CONFIG_MD_RAID1_ROBUST_READ */
+			md_error (r1_bh->mddev, bh->b_dev);
+#ifdef CONFIG_MD_RAID1_READ_WRITE_CORRECT
+                } else {  /* tell next time we're here that we're a retry */
+                       	printk(KERN_ALERT
+                          "raid1: set retry bit on bh READ for sector %ld\n",
+                                  bh->b_rsector);
+			set_bit(R1BH_ReadRetry, &r1_bh->state);
+                }
+#endif /* CONFIG_MD_RAID1_READ_WRITE_CORRECT */
+
+        } else
 		/*
 		 * Set R1BH_Uptodate in our master buffer_head, so that
 		 * we will return a good error code for to the higher
@@ -438,7 +573,21 @@
 		 * we have only one buffer_head on the read side
 		 */
 		
-		if (uptodate) {
+               if (uptodate
+#ifdef CONFIG_MD_RAID1_ROBUST_READ
+			/* Give up and error if we're last */
+			|| atomic_dec_and_test(&r1_bh->remaining)
+#endif /* CONFIG_MD_RAID1_ROBUST_READ */
+			) {
+#ifdef CONFIG_MD_RAID1_READ_WRITE_CORRECT
+			if (uptodate && test_bit(R1BH_ReadRewrite, &r1_bh->state)) {
+				/* Success at last - rewrite failed reads */
+                                r1_bh->cmd = SPECIAL;
+				raid1_reschedule_retry(r1_bh);
+                                return;
+			} else
+#endif /* CONFIG_MD_RAID1_READ_WRITE_CORRECT */
+
 			raid1_end_bh_io(r1_bh, uptodate);
 			return;
 		}
@@ -447,6 +596,13 @@
 		 */
 		printk(KERN_ERR "raid1: %s: rescheduling block %lu\n", 
 			 partition_name(bh->b_dev), bh->b_blocknr);
+#ifdef CONFIG_MD_RAID1_ROBUST_READ
+               /*
+                * if not uptodate and not the last possible try,
+                * bh will be rescheduled and repointed while on the
+                * queue, by raid1_map.
+                */
+#endif /* CONFIG_MD_RAID1_ROBUST_READ */
 		raid1_reschedule_retry(r1_bh);
 		return;
 	}
@@ -456,10 +612,39 @@
 	 *
 	 * Let's see if all mirrored write operations have finished 
 	 * already.
+         *
+         * In any case, do the end io early on the master bh if we are
+         * uptodate, and AsyncIO is set on the bh. We set AsyncPhase
+         * when this happens, so we don't do it twice, inadvertently.
 	 */
+		
+#if defined(CONFIG_MD_FR1) || defined(CONFIG_MD_FR1_MODULE)
+        if (uptodate
+        &&  test_bit(R1BH_AsyncIO, &r1_bh->state)
+        && !test_and_set_bit(R1BH_AsyncPhase, &r1_bh->state)) {
 
-	if (atomic_dec_and_test(&r1_bh->remaining))
+	        struct buffer_head *mbh = r1_bh->master_bh;
+
+	        raid1_conf_t *conf = mddev_to_conf(r1_bh->mddev);
+
+                PRINTK(KERN_DEBUG "raid1: async end i/o on sectors %lu-%lu\n",
+                        mbh->b_rsector, mbh->b_rsector + (mbh->b_size >> 9) - 1);
+
+	        io_request_done(mbh->b_rsector, conf,
+			test_bit(R1BH_SyncPhase, &r1_bh->state));
+	        mbh->b_end_io(mbh, uptodate);
+        }
+#endif /* CONFIG_MD_FR1 */
+
+	if (atomic_dec_and_test(&r1_bh->remaining)) {
+#if defined(CONFIG_MD_FR1) || defined(CONFIG_MD_FR1_MODULE)
+	        if (test_and_set_bit(R1BH_AsyncIO, &r1_bh->state)) {
+                        /* we made a copy for the buffer, remove it now */
+                        kfree(bh->b_data);
+                }
+#endif /* CONFIG_MD_FR1 */
 		raid1_end_bh_io(r1_bh, test_bit(R1BH_Uptodate, &r1_bh->state));
+        }
 }
 
 /*
@@ -520,7 +705,7 @@
 	 * Don't touch anything for sequential reads.
 	 */
 
-	if (this_sector == conf->mirrors[new_disk].head_position)
+	if (0 && /* PTB */ this_sector == conf->mirrors[new_disk].head_position)
 		goto rb_out;
 	
 	/*
@@ -531,7 +716,16 @@
 	 */
 	
 	if (conf->sect_count >= conf->mirrors[new_disk].sect_limit) {
-		conf->sect_count = 0;
+
+#if defined(CONFIG_MD_FR1) || defined(CONFIG_MD_FR1_MODULE)
+                PRINTK(KERN_INFO
+                  "raid1: disk %d latency %d abandoned after %d sectors\n",
+                  new_disk,
+                  conf->latency[new_disk],
+	          conf->sect_count);
+
+                /* PTB move on to run a short test on the next disk */
+#endif /* CONFIG_MD_FR1 */
 
 #if defined(CONFIG_SPARC64) && (__GNUC__ == 2) && (__GNUC_MINOR__ == 92)
 		/* Work around a compiler bug in egcs-2.92.11 19980921 */
@@ -546,6 +740,39 @@
 		} while ((conf->mirrors[new_disk].write_only) ||
 			 (!conf->mirrors[new_disk].operational));
 
+#if defined(CONFIG_MD_FR1) || defined(CONFIG_MD_FR1_MODULE)
+                /* PTB if tested all, need to choose best */
+                if (new_disk == conf->last_source) {
+
+                        int fastest = -1;
+                        unsigned long best_latency = 0x7fffffff;
+                        int i;
+
+	                for (i = 0; i < conf->raid_disks; i++) {
+	                        if (conf->mirrors[i].write_only
+                                || !conf->mirrors[i].operational)
+                                        continue;
+                                if (conf->latency[i] <= best_latency) {
+                                    best_latency = conf->latency[i];
+                                    fastest = i;
+                                }
+                        }
+                        if (fastest >= 0)
+                                new_disk = fastest;
+	                conf->mirrors[new_disk].sect_limit = MAX_WORK_PER_DISK;
+                        conf->last_source = new_disk;
+                } else {
+                        /* PTB only a short test run */
+	                conf->mirrors[new_disk].sect_limit = MAX_TEST_PER_DISK;
+                }
+
+		conf->sect_count = 0;
+                PRINTK(KERN_DEBUG
+                  "raid1: choosing disk %d latency %d\n",
+                  new_disk,
+                  conf->latency[new_disk]);
+#endif /* CONFIG_MD_FR1 */
+
 		goto rb_out;
 	}
 	
@@ -596,6 +823,11 @@
 	int disks = MD_SB_DISKS;
 	int i, sum_bhs = 0;
 	struct mirror_info *mirror;
+#if defined(CONFIG_MD_FR1) || defined(CONFIG_MD_FR1_MODULE)
+	int sum_nobhs = 0;
+	struct bitmap * bitmap = conf->bitmap;
+	char * async_data; // copy of buffer used for async writes
+#endif /* CONFIG_MD_FR1 */
 	kdev_t dev;
 
 	if (!buffer_locked(bh))
@@ -635,6 +867,10 @@
 	r1_bh->master_bh = bh;
 	r1_bh->mddev = mddev;
 	r1_bh->cmd = rw;
+#if defined(CONFIG_MD_FR1) || defined(CONFIG_MD_FR1_MODULE)
+	r1_bh->start_jiffies = jiffies; /* PTB record start time */
+	async_data = NULL;
+#endif /* CONFIG_MD_FR1 */
 
 	if (rw == READ) {
 		/*
@@ -653,6 +889,20 @@
 	/*	bh_req->b_rsector = bh->n_rsector; */
 		bh_req->b_end_io = raid1_end_request;
 		bh_req->b_private = r1_bh;
+#ifdef CONFIG_MD_RAID1_ROBUST_READ
+		atomic_set(&r1_bh->remaining, 0);
+		/* count target devices under spinlock */
+		md_spin_lock_irq(&conf->device_lock);
+		for (i = 0;  i < disks; i++) {
+	                if (!conf->mirrors[i].operational
+                        ||  !conf->mirrors[i].used_slot) {
+                                	continue;
+			} 
+			atomic_inc(&r1_bh->remaining);
+		}
+		md_spin_unlock_irq(&conf->device_lock);
+#endif /* CONFIG_MD_RAID1_ROBUST_READ */
+
 		generic_make_request (rw, bh_req);
 		return 0;
 	}
@@ -662,11 +912,65 @@
 	 */
 
 	bhl = raid1_alloc_bh(conf, conf->raid_disks);
+
+#if defined(CONFIG_MD_FR1) || defined(CONFIG_MD_FR1_MODULE)
+        if (bitmap->active(bitmap)) {
+
+                int err = bitmap->setbits(bitmap, (bitmap_offset_t) (bh->b_rsector >> 1),
+                        bh->b_size >> 10);
+
+                /*
+                 * PTB Do async i/o if we marked the bitmap (so it's safe to)
+                 * and we are supposed to.
+                 */
+                if (async && err >= 0) {
+                        async_data = kmalloc(bh->b_size, GFP_KERNEL);
+                        if (async_data) {
+                                memcpy(async_data, bh->b_data, bh->b_size);
+                                set_bit(R1BH_AsyncIO, &r1_bh->state);
+                        }
+                }
+                /*
+                 * PTB Even if the async bit is not set then we STILL need to
+                 * balance the setbits above with a clearbits in the end_io 
+                 * whether setbits errored or not above. That's because
+                 * setbits errors if the bitmap page is not there and
+                 * then we can only count attempted writes in the bitmap,
+                 * not actual writes, so we have to balance that with
+                 * attempted clears. And we do. See the end_io.
+                 */
+        }
+#endif /* CONFIG_MD_FR1 */
 	spin_lock_irq(&conf->device_lock);
 	for (i = 0; i < disks; i++) {
 		struct buffer_head *mbh;
-		if (!conf->mirrors[i].operational) 
+		if (!conf->mirrors[i].operational) {
+
+#if defined(CONFIG_MD_FR1) || defined(CONFIG_MD_FR1_MODULE)
+                        struct bitmap * bitmap = conf->bitmap;
+
+                        if (!conf->mirrors[i].used_slot) {
+                                continue; 
+                        }
+
+                        /* notionally mark bitmap here */
+                        if (sum_nobhs++ <= 0) {
+                                PRINTK(KERN_DEBUG "raid1: mark mirror %d blk %lu-%lu\n",
+                                i, bh->b_rsector >> 1,
+                                (bh->b_rsector >> 1) + (bh->b_size >> 10) - 1);
+                        }
+
+                        if (!conf->bitmap_dirty && bitmap->active(bitmap)) {
+                                conf->bitmap_dirty = 1;
+                                MD_SB_EVENTS_LO(mddev->sb) =
+                                    mddev->sb->events_lo;
+                                MD_SB_EVENTS_HI(mddev->sb) =
+                                    mddev->sb->events_hi;
+                        }
+
+#endif /* CONFIG_MD_FR1 */
 			continue;
+                }
  
 	/*
 	 * We should use a private pool (size depending on NR_REQUEST),
@@ -703,6 +1007,10 @@
  		mbh->b_size       = bh->b_size;
  		mbh->b_page	  = bh->b_page;
  		mbh->b_data	  = bh->b_data;
+#if defined(CONFIG_MD_FR1) || defined(CONFIG_MD_FR1_MODULE)
+ 		mbh->b_data       =
+                 test_bit(R1BH_AsyncIO, &r1_bh->state)? async_data : bh->b_data;
+#endif /* CONFIG_MD_FR1 */
  		mbh->b_list       = BUF_LOCKED;
  		mbh->b_end_io     = raid1_end_request;
  		mbh->b_private    = r1_bh;
@@ -719,6 +1027,9 @@
 		return 0;
 	}
 	md_atomic_set(&r1_bh->remaining, sum_bhs);
+#if defined(CONFIG_MD_FR1) || defined(CONFIG_MD_FR1_MODULE)
+	r1_bh->nonoperational = sum_nobhs;
+#endif /* CONFIG_MD_FR1 */
 
 	/*
 	 * We have to be a bit careful about the semaphore above, thats
@@ -769,6 +1080,85 @@
 #define ALREADY_SYNCING KERN_INFO \
 "raid1: syncing already in progress.\n"
 
+#if defined(CONFIG_MD_FR1) || defined(CONFIG_MD_FR1_MODULE)
+static int
+raid1_create_bitmap(mddev_t *mddev) {
+
+        struct bitmap * bitmap;
+        unsigned long blocks;
+	raid1_conf_t *conf = mddev_to_conf(mddev);
+
+        /* need size to have been set already */
+        blocks = mddev->sb->size << 1;
+
+        bitmap = kmalloc (sizeof (*bitmap), GFP_KERNEL);
+	if (!bitmap) {
+                printk(KERN_WARNING "raid1: out of memory for bitmap head\n");
+                return -ENOMEM;
+        }
+
+	if (bitmap_init (bitmap, blocks) < 0) {
+                printk(KERN_WARNING "raid1: failed to init bitmap\n");
+                kfree(bitmap);
+                return -ENOMEM;
+        }
+
+        /* take the spinlock for the ops on the configuration */
+	spin_lock_irq(&conf->segment_lock);
+        conf->bitmap = bitmap;
+        conf->bitmap_dirty = 0;
+	spin_unlock_irq(&conf->segment_lock);
+        return 0;
+}
+
+static void
+raid1_remove_bitmap (mddev_t *mddev) {
+
+	raid1_conf_t *conf = mddev_to_conf(mddev);
+        struct bitmap * bitmap;
+
+	spin_lock_irq(&conf->segment_lock);
+        bitmap = conf->bitmap;
+        if (!bitmap) {
+	        spin_unlock_irq(&conf->segment_lock);
+                return;
+        }
+        conf->bitmap = NULL;
+	spin_unlock_irq(&conf->segment_lock);
+
+        bitmap_destr(bitmap);
+        kfree(bitmap);
+}
+
+static int
+raid1_start_bitmap (mddev_t *mddev) {
+
+	raid1_conf_t *conf = mddev_to_conf(mddev);
+        struct bitmap * bitmap;
+
+	spin_lock_irq(&conf->segment_lock);
+        bitmap = conf->bitmap;
+	spin_unlock_irq(&conf->segment_lock);
+        if (!bitmap) {
+                return -EINVAL;
+        }
+
+        if (bitmap->active(bitmap)) {
+                printk(KERN_WARNING "raid1: bitmap %x already active!\n",
+                    (unsigned) bitmap);
+                return 0;
+        }
+	if (bitmap->start(bitmap, md_event(mddev->sb)) < 0) {
+                printk(KERN_WARNING "raid1: bitmap %x failed to start!\n",
+                    (unsigned) bitmap);
+                return -EINVAL;
+        }
+
+        PRINTK(KERN_DEBUG "raid1: made bitmap %x\n", (unsigned) bitmap);
+        return 0;
+}
+#endif /* CONFIG_MD_FR1 */
+
 static void mark_disk_bad (mddev_t *mddev, int failed)
 {
 	raid1_conf_t *conf = mddev_to_conf(mddev);
@@ -777,6 +1167,13 @@
 
 	mirror->operational = 0;
 	mark_disk_faulty(sb->disks+mirror->number);
+#if defined(CONFIG_MD_FR1) || defined(CONFIG_MD_FR1_MODULE)
+        /*
+         * Activate the bitmap on a mirror just marked faulty (and
+         * nonoperational).
+         */
+	raid1_start_bitmap (mddev);
+#endif /* CONFIG_MD_FR1 */
 	mark_disk_nonsync(sb->disks+mirror->number);
 	mark_disk_inactive(sb->disks+mirror->number);
 	if (!mirror->write_only)
@@ -848,6 +1245,14 @@
 
 	for (i = 0; i < MD_SB_DISKS; i++) {
 		tmp = conf->mirrors + i;
+#if defined(CONFIG_MD_FR1) || defined(CONFIG_MD_FR1_MODULE)
+		/*
+		 * Remove repeats from debug printout.
+		 */
+		if (i > 0 && memcmp(tmp, &conf->mirrors[i-1], sizeof(*tmp)) == 0) {
+			continue;
+		}
+#endif /* CONFIG_MD_FR1 */
 		printk(" disk %d, s:%d, o:%d, n:%d rd:%d us:%d dev:%s\n",
 			i, tmp->spare,tmp->operational,
 			tmp->number,tmp->raid_disk,tmp->used_slot,
@@ -939,16 +1344,36 @@
 	case DISKOP_SPARE_WRITE:
 	case DISKOP_SPARE_INACTIVE:
 
+#if defined(CONFIG_MD_FR1) || defined(CONFIG_MD_FR1_MODULE)
+                PRINTK(KERN_DEBUG "raid1: diskop SPARE %s\n",
+                        state == DISKOP_SPARE_WRITE ? "WRITE" : 
+                        state == DISKOP_SPARE_INACTIVE ? "INACTIVE" : 
+                        state == DISKOP_SPARE_ACTIVE ? "ACTIVE" : ""
+                        );
+#endif /* CONFIG_MD_FR1 */
 		/*
 		 * Find the spare disk ... (can only be in the 'high'
 		 * area of the array)
 		 */
 		for (i = conf->raid_disks; i < MD_SB_DISKS; i++) {
 			tmp = conf->mirrors + i;
-			if (tmp->spare && tmp->number == (*d)->number) {
+#if defined(CONFIG_MD_FR1) || defined(CONFIG_MD_FR1_MODULE)
+			if (tmp->spare
+                        && (tmp->number == (*d)->number
+                        /*
+                         * I'm not sure we now need to allow match by
+                         * device number too. FIXME.
+                         */
+                            || tmp->dev == MKDEV((*d)->major,(*d)->minor))) {
 				spare_disk = i;
 				break;
 			}
+#else
+			if (tmp->spare && tmp->number == (*d)->number) {
+				spare_disk = i;
+				break;
+                        }
+#endif /* CONFIG_MD_FR1 */
 		}
 		if (spare_disk == -1) {
 			MD_BUG();
@@ -1104,6 +1529,10 @@
 		fdisk->spare = 0;
 		fdisk->write_only = 0;
 
+#if defined(CONFIG_MD_FR1) || defined(CONFIG_MD_FR1_MODULE)
+                PRINTK(KERN_DEBUG "raid1: diskop SPARE device %x now ACTIVE\n",
+                        fdisk->dev);
+#endif /* CONFIG_MD_FR1 */
 		/*
 		 * if we activate a spare, we definitely replace a
 		 * non-operational disk slot in the 'low' area of
@@ -1115,6 +1544,11 @@
 		break;
 
 	case DISKOP_HOT_REMOVE_DISK:
+
+#if defined(CONFIG_MD_FR1) || defined(CONFIG_MD_FR1_MODULE)
+                PRINTK(KERN_DEBUG "raid1: diskop HOT REMOVE\n");
+#endif /* CONFIG_MD_FR1 */
+
 		rdisk = conf->mirrors + removed_disk;
 
 		if (rdisk->spare && (removed_disk < conf->raid_disks)) {
@@ -1148,6 +1582,11 @@
 		adisk->head_position = 0;
 		conf->nr_disks++;
 
+#if defined(CONFIG_MD_FR1) || defined(CONFIG_MD_FR1_MODULE)
+                PRINTK(KERN_DEBUG "raid1: diskop HOT ADDed mirror %d disk %d bitmap %x\n",
+                        added_disk, adisk->number, (unsigned)conf->bitmap);
+#endif /* CONFIG_MD_FR1 */
+
 		break;
 
 	default:
@@ -1292,6 +1731,13 @@
 		case READA:
 			dev = bh->b_dev;
 			raid1_map (mddev, &bh->b_dev);
+#ifdef CONFIG_MD_RAID1_ROBUST_READ
+                        /* raid1_map incorrectly used to change target to
+                         * 0th disk always - now I hope it does a
+                         * better job that before and switches target to
+                         * next disk in the mirror.
+                         */
+#endif /* CONFIG_MD_RAID1_ROBUST_READ */
 			if (bh->b_dev == dev) {
 				printk (IO_ERROR, partition_name(bh->b_dev), bh->b_blocknr);
 				raid1_end_bh_io(r1_bh, 0);
@@ -1398,6 +1844,22 @@
 	int block_nr;
 	int buffs;
 	kdev_t dev;
+#if defined(CONFIG_MD_FR1) || defined(CONFIG_MD_FR1_MODULE)
+        /*
+         * Will need to count mirror components currently with a bitmap
+         * which have been marked faulty and nonoperational at some
+         * point beforehand, and have been accumulating marks on the
+         * bitmap to indicate dirty blocks that need syncing.
+         */
+        struct bitmap * bitmap = conf->bitmap;
+        int count, block_not_dirty;
+        int targets[MD_SB_DISKS];
+        /*
+         * PTB discount the skipped sectors back to the md.c code
+         */
+        extern atomic_t md_throttle[];
+
+#endif /* CONFIG_MD_FR1 */
 
 	if (!sector_nr) {
 		/* we want enough buffers to hold twice the window of 128*/
@@ -1406,9 +1868,29 @@
 		if (buffs < 2)
 			goto nomem;
 		conf->window = buffs*(PAGE_SIZE>>9)/2;
+#if defined(CONFIG_MD_FR1) || defined(CONFIG_MD_FR1_MODULE)
+                /* also remove bitmap if not indicated */
+                if (! MD_SB_BITMAP_REPAIR(mddev->sb)) {
+                        /* has to be outside spinlock as it takes it */
+                        printk(KERN_WARNING "md%d: removed bitmap %x\n",
+                                mdidx(mddev), (unsigned)bitmap);
+                        bitmap->stop (bitmap);
+                } else {
+                        printk(KERN_WARNING "md%d: retained bitmap %x\n",
+                                mdidx(mddev), (unsigned)bitmap);
+                }
+                /* reset the bitmap indicator always */
+                MD_SB_BITMAP_REPAIR(mddev->sb) = 0;
+#endif /* CONFIG_MD_FR1 */
 	}
 	spin_lock_irq(&conf->segment_lock);
 	if (!sector_nr) {
+#if defined(CONFIG_MD_FR1) || defined(CONFIG_MD_FR1_MODULE)
+                /* setup extra report counters for skipped/synced blocks */
+                conf->sync_mode = -1;
+                conf->last_clean_sector = -1;
+                conf->last_dirty_sector = -1;
+#endif /* CONFIG_MD_FR1 */
 		/* initialize ...*/
 		conf->start_active = 0;
 		conf->start_ready = 0;
@@ -1422,7 +1904,7 @@
 			MD_BUG();
 	}
 	while (sector_nr >= conf->start_pending) {
-		PRINTK("wait .. sect=%lu start_active=%d ready=%d pending=%d future=%d, cnt_done=%d active=%d ready=%d pending=%d future=%d\n",
+		PRINTK("wait .. sect=%lu start_active=%ld ready=%ld pending=%ld future=%ld, cnt_done=%d active=%d ready=%d pending=%d future=%d\n",
 			sector_nr, conf->start_active, conf->start_ready, conf->start_pending, conf->start_future,
 			conf->cnt_done, conf->cnt_active, conf->cnt_ready, conf->cnt_pending, conf->cnt_future);
 		wait_event_lock_irq(conf->wait_done,
@@ -1463,9 +1948,71 @@
 	conf->last_used = disk;
 	
 	mirror = conf->mirrors+conf->last_used;
+
+#if defined(CONFIG_MD_FR1) || defined(CONFIG_MD_FR1_MODULE)
+        /* PTB go looking for the faulted (nonoperational) mirrors, under lock */
+        count = 0;
+	while (1) {
+                const int maxdisk = 2 * conf->raid_disks - conf->working_disks;
+		if (disk <= 0)
+                        disk = maxdisk > MD_SB_DISKS ? MD_SB_DISKS : maxdisk;
+		disk--;
+		if (disk == conf->last_used)
+			break;
+                if (!conf->mirrors[disk].operational)
+                        continue;
+                /* We need them to be writable */
+                if (conf->mirrors[disk].write_only) {
+                        targets[count++] = disk;
+                }
+	}
+
+        bitmap = conf->bitmap;
+        block_not_dirty = bitmap->active(bitmap)
+            && !bitmap->testbits(bitmap, (bitmap_offset_t) (sector_nr >> 1), 1);
+#endif /* CONFIG_MD_FR1 */
+
 	dev = mirror->dev;
 	spin_unlock_irq(&conf->device_lock);
-	
+
+#if defined(CONFIG_MD_FR1) || defined(CONFIG_MD_FR1_MODULE)
+        if (count > 0 && block_not_dirty) {
+
+                const int done = 2 - (sector_nr & 1);
+
+	        md_sync_acct(mirror->dev, done);
+                sync_request_done(sector_nr, conf);
+		md_done_sync(mddev, done, 1);
+
+                /* do these conf accesses under lock, though only accounting */
+	        spin_lock_irq(&conf->segment_lock);
+                if (conf->sync_mode != 0) {
+                        if (conf->sync_mode == 1) {
+                                printk(KERN_INFO "raid1: synced dirty sectors %lu-%lu\n",
+                                conf->last_clean_sector+1,
+                                conf->last_dirty_sector);
+                        }
+                        conf->sync_mode = 0;
+                }
+
+                conf->last_clean_sector = sector_nr + done - 1;
+                if (mddev->sb && sector_nr + done >= mddev->sb->size<<1) {
+                        printk(KERN_INFO "raid1: skipped clean sectors %lu-%lu\n",
+                        conf->last_dirty_sector+1,
+                        conf->last_clean_sector);
+                }
+ 
+                /* PTB here be dragons - update md driver throttle discount */
+                atomic_add(done, &md_throttle[mdidx(mddev)]);
+	        spin_unlock_irq(&conf->segment_lock);
+
+		wake_up(&conf->wait_ready);
+                /* skip remainder of block */
+                return done;
+        }
+  	
+        /* read */
+#endif /* CONFIG_MD_FR1 */
 	r1_bh = raid1_alloc_buf (conf);
 	r1_bh->master_bh = NULL;
 	r1_bh->mddev = mddev;
@@ -1499,6 +2046,30 @@
 	generic_make_request(READ, bh);
 	md_sync_acct(bh->b_dev, bh->b_size/512);
 
+#if defined(CONFIG_MD_FR1) || defined(CONFIG_MD_FR1_MODULE)
+        /* printout info from time to time */
+	spin_lock_irq(&conf->segment_lock);
+        if (conf->sync_mode != 1) {
+                if (conf->sync_mode == 0) {
+                        printk(KERN_INFO "raid1: skipped clean sectors %lu-%lu\n",
+                        conf->last_dirty_sector+1,
+                        conf->last_clean_sector);
+ 
+ 
+                }
+                conf->sync_mode = 1;
+        }
+        conf->last_dirty_sector = sector_nr + (bsize >> 9) - 1;
+
+        if (mddev->sb && sector_nr + (bsize >> 9) >= mddev->sb->size<<1) {
+                printk(KERN_INFO "raid1: synced dirty sectors %lu-%lu\n",
+                conf->last_clean_sector+1,
+                conf->last_dirty_sector);
+        }
+ 
+	spin_unlock_irq(&conf->segment_lock);
+#endif /* CONFIG_MD_FR1 */
+
 	return (bsize >> 9);
 
 nomem:
@@ -1531,6 +2102,14 @@
 		mddev_t *mddev = r1_bh->mddev;
  		unsigned long sect = bh->b_blocknr;
 		int size = bh->b_size;
+#if defined(CONFIG_MD_FR1) || defined(CONFIG_MD_FR1_MODULE)
+                raid1_conf_t * conf = mddev_to_conf(mddev);
+                struct bitmap * bitmap = conf->bitmap;
+                if (bitmap && bitmap->active(bitmap)) {
+                        /* PTB clean the bitmap after resync */
+                        bitmap->clearbits(bitmap, (bitmap_offset_t)(sect >> 1), size >> 10);
+                }
+#endif /* CONFIG_MD_FR1 */
 		raid1_free_buf(r1_bh);
 		sync_request_done(sect, mddev_to_conf(mddev));
 		md_done_sync(mddev,size>>9, uptodate);
@@ -1576,6 +2155,11 @@
 #define START_RESYNC KERN_WARNING \
 "raid1: raid set md%d not clean; reconstructing mirrors\n"
 
+#if defined(CONFIG_MD_FR1) || defined(CONFIG_MD_FR1_MODULE)
+#define BITMAP_ERROR KERN_ERR \
+"raid1: out of memory for bitmap on md%d\n"
+#endif /* CONFIG_MD_FR1 */
+
 static int raid1_run (mddev_t *mddev)
 {
 	raid1_conf_t *conf;
@@ -1744,6 +2328,16 @@
 		/* nothing */;
 	conf->last_used = j;
 
+#if defined(CONFIG_MD_FR1) || defined(CONFIG_MD_FR1_MODULE)
+        /* make the bitmap at this point - hope mddev->size exists already */
+        if (raid1_create_bitmap(mddev) < 0) {
+                printk(BITMAP_ERROR, mdidx(mddev));
+		goto out_free_conf;
+        }
+
+        /* set it active too */
+        raid1_start_bitmap (mddev);
+#endif /* CONFIG_MD_FR1 */
 
 
 	{
@@ -1803,6 +2397,9 @@
 	raid1_shrink_r1bh(conf);
 	raid1_shrink_bh(conf);
 	raid1_shrink_buffers(conf);
+#if defined(CONFIG_MD_FR1) || defined(CONFIG_MD_FR1_MODULE)
+	raid1_remove_bitmap (mddev);
+#endif /* CONFIG_MD_FR1 */
 	kfree(conf);
 	mddev->private = NULL;
 out:
@@ -1864,6 +2461,9 @@
 	raid1_shrink_r1bh(conf);
 	raid1_shrink_bh(conf);
 	raid1_shrink_buffers(conf);
+#if defined(CONFIG_MD_FR1) || defined(CONFIG_MD_FR1_MODULE)
+	raid1_remove_bitmap (mddev);
+#endif /* CONFIG_MD_FR1 */
 	kfree(conf);
 	mddev->private = NULL;
 	MOD_DEC_USE_COUNT;
@@ -1896,4 +2496,8 @@
 
 module_init(raid1_init);
 module_exit(raid1_exit);
+#if defined(CONFIG_MD_FR1) || defined(CONFIG_MD_FR1_MODULE)
+MODULE_PARM(async, "i");
+MODULE_PARM_DESC(async, "Do async writes");
+#endif /* CONFIG_MD_FR1 */
 MODULE_LICENSE("GPL");
--- linux-2.4.30/drivers/md/bitmap.c.pre-fr1	Wed Apr  6 18:18:04 2005
+++ linux-2.4.30/drivers/md/bitmap.c	Wed Apr  6 18:18:04 2005
@@ -0,0 +1,880 @@
+/*
+ * bitmap.c two-level bitmap (C) Peter T. Breuer (ptb@ot.uc3m.es) 2003
+ *
+ * bitmap_init   - sets nr blks
+ * bitmap->start - then calls the setup part for the 1st
+ *                 level in the bitmap, which uses memory (kmalloc) so
+ *                 can fail. You should examine the return value. 0 is
+ *                 OK. -ve is FAIL.
+ *
+ * bitmap->stop  - inverse to bitmap->start. kfrees the memory claimed in 
+ *                 bitmap_init.
+ */
+
+#include <linux/module.h>
+#include <linux/version.h>
+#include <linux/errno.h>
+#include <linux/wrapper.h>
+#include <linux/proc_fs.h>  // PTB for kmalloc! How?
+#include <linux/vmalloc.h>  // PTB for vmalloc
+#include <linux/init.h>
+#include <linux/locks.h>
+#include <linux/config.h>
+
+# define DEBUG 1
+
+#include "bitmap.h"
+
+/* use 16 bits of the address as extra bitmap */
+#define ZONESHIFT 4
+/* top 16 bits are nonzero */
+#define IS_ADDRESS(x) \
+  ((( ((unsigned long)(x)) >> ((sizeof(char*)<<3) - (1<<ZONESHIFT))) ) != 0)
+
+#ifndef PRINTK
+#  if DEBUG > 0 
+#    define PRINTK(x...) printk(x)
+#  else
+#    define PRINTK(x...)
+#  endif
+#endif
+
+/* common cache of ready to go pages */
+static kmem_cache_t * bitmap_page_cache;
+/* list of all created bitmaps */
+struct list_head bitmap_list;
+
+/*
+ * replaces kfree on bitmap pages.
+ */
+static void
+bitmap_free_page(struct bitmap * bitmap, unsigned char * page) {
+        
+        if (!page)
+                return;
+
+        kmem_cache_free(bitmap_page_cache, (void *)page);
+}
+
+/*
+ * frees mamory kmalloced in bitmap_init
+ */
+void
+bitmap_destr(struct bitmap *bitmap) {
+
+        unsigned long k;
+        struct bitmap_page * bp;
+        unsigned long pages;
+        static void bitmap_stop(struct bitmap * bitmap);
+
+        bitmap_stop(bitmap);
+
+        write_lock(&bitmap->lock);
+        bitmap->flags &= ~BITMAP_ACTIVE;
+        bp = bitmap->bp;
+        pages = bitmap->pages;
+        bitmap->bp = NULL;
+        list_del(&bitmap->list);
+        write_unlock(&bitmap->lock);
+
+        if (bp) {
+                for (k = 0; k < pages; k++) {
+                        unsigned char *mappage;
+                        void *zoneinfo;
+
+                        mappage  = bp[k].map;
+                        zoneinfo = bp[k].zoneinfo;
+                        bp[k].map = NULL;
+                        bp[k].zoneinfo = NULL;
+
+                        if (IS_ADDRESS(mappage)) {
+                                /* zero page returned to cache via abort */
+                                memset (mappage, 0, PAGE_SIZE);
+                                bitmap_free_page (bitmap, mappage);
+                        	bitmap->missing_pages++;
+                        }
+                        if (IS_ADDRESS(zoneinfo)) {
+                                kfree (zoneinfo);
+        			bitmap->missing_zones++;
+                        }
+                }
+                if (bitmap->missing_pages < pages) {
+                        printk(KERN_WARNING "bitmap: mislaid %lu pages. Oops!\n",
+                        	pages - bitmap->missing_pages);
+                }
+                if (bitmap->missing_pages > pages) {
+                        printk(KERN_WARNING "bitmap: freed %lu extra pages. Oops!\n",
+                        	bitmap->missing_pages - pages);
+                }
+                if (bitmap->missing_zones < pages) {
+                        printk(KERN_WARNING "bitmap: mislaid %lu zoneinfo block. Oops!\n",
+                        	pages - bitmap->missing_zones);
+                }
+                if (bitmap->missing_zones > pages) {
+                        printk(KERN_WARNING "bitmap: freed %lu extra zoneinfo block. Oops!\n",
+                        	bitmap->missing_zones - pages);
+                }
+                vfree (bp);
+        }
+
+        bitmap_stop(bitmap);
+
+}
+
+/* 
+ * tests if the bitmap is marked active (has been started)
+ */
+static int
+bitmap_active(struct bitmap * bitmap) {
+        int res = 0;
+        if (!bitmap)
+                return res;
+        read_lock(&bitmap->lock);
+        res = bitmap->bp != NULL && (bitmap->flags & BITMAP_ACTIVE) != 0;
+        read_unlock(&bitmap->lock);
+        return res;
+}
+
+/*
+ * replaces kmalloc for bitmap pages.
+ */
+static unsigned char *
+bitmap_alloc_page(struct bitmap *bitmap) {
+        unsigned char *page;
+
+        page = kmem_cache_alloc(bitmap_page_cache, GFP_KERNEL);
+        /* PTB zeroing is done by the constructor and only
+         *     clean pages are returned to the cache
+         */
+        return page;
+}
+
+/*
+ * mark bitmap inactive and maybe prune the page cache
+ */
+static void
+bitmap_stop(struct bitmap * bitmap) {
+
+        write_lock(&bitmap->lock);
+        bitmap->flags &= ~BITMAP_ACTIVE;
+        write_unlock(&bitmap->lock);
+
+}
+
+/* 
+ * marks the bitmap active and primes the free page cache.
+ */
+static int
+bitmap_start(struct bitmap * bitmap, u64 events) {
+
+        struct bitmap_page * bp;
+        unsigned long pages;
+        
+        /* take lock to read data */
+        write_lock(&bitmap->lock);
+        pages = bitmap->pages;
+        bp    = bitmap->bp;
+        write_unlock(&bitmap->lock);
+
+        if (!bp) {
+
+                int k;
+
+                bp = vmalloc (pages * sizeof(*bp));
+                if (!bp) {
+                        printk(KERN_WARNING "bitmap: cannot get %luB of memory!\n",
+                                pages * sizeof(*bp));
+                        return -ENOMEM;
+                }
+                memset (bp, 0, pages * sizeof(*bp));
+
+                for (k = 0; k < pages; k++) {
+                        if (bp[k].zoneinfo)
+                                continue;
+                        bp[k].zoneinfo =
+                                kmalloc (sizeof(*bp[k].zoneinfo)<<ZONESHIFT,
+                                       GFP_KERNEL);
+                        if (bp[k].zoneinfo)
+                                bitmap->missing_zones--;
+                }
+                if (bitmap->missing_zones > 0) {
+                        printk(KERN_WARNING "bitmap: warning! cannot get %ld*%uB memory!\n",
+                              bitmap->missing_zones,
+                              sizeof(*bp->zoneinfo) << ZONESHIFT);
+                }
+        }
+
+        /*
+         * this is 16 shorts or 32 bytes + 4 bytes extra per page of 4096
+         * bytes, which is a reserve of less than 0.1%. Each page
+         * bitmaps 32MB of disk, so a 1GB disk takes 32 pages or 128KB, and
+         * a 1TB disk takes 128MB of pages. In those circumstances,
+         * adding a capital cost of about 1.08MB/TB doesn't seem bad.
+         */
+                
+
+        write_lock(&bitmap->lock);
+        bitmap->bp = bp;
+
+        /* bp is nonzero here */
+        bitmap->flags |= BITMAP_ACTIVE;
+        bitmap->events = events;
+        write_unlock(&bitmap->lock);
+
+        /* seed the page cache */
+        bitmap_free_page(bitmap, bitmap_alloc_page(bitmap));
+
+        return 0;
+}
+
+static int
+bitmap_checkpage (struct bitmap *bitmap, unsigned long page)
+{
+        unsigned char * mappage;
+
+        read_lock(&bitmap->lock);
+        if (page < 0 || page >= bitmap->pages) {
+                read_unlock(&bitmap->lock);
+		return -EINVAL;
+        }
+
+        if (bitmap->bp == NULL) {
+                read_unlock(&bitmap->lock);
+		return -ENODEV;
+        }
+
+
+        if (IS_ADDRESS(bitmap->bp[page].map)) {
+                read_unlock(&bitmap->lock);
+		return 0;
+        }
+        read_unlock(&bitmap->lock);
+
+        /* the page address was NULL */
+
+	if ((mappage = bitmap_alloc_page(bitmap)) == NULL) {
+                /* failed - check to see if we have backup counters */
+                void * tmp;
+                int need_new_zoneinfo = 0;
+
+                write_lock(&bitmap->lock);
+                if (!IS_ADDRESS(bitmap->bp[page].zoneinfo))
+                        need_new_zoneinfo = 1;
+                write_unlock(&bitmap->lock);
+
+                if (need_new_zoneinfo) {
+                        /* rarely, we might make an extra backup counter */
+                        tmp = kmalloc(sizeof(*bitmap->bp->zoneinfo)<<ZONESHIFT,
+                                    GFP_KERNEL);
+                        if (tmp) {
+
+                                write_lock(&bitmap->lock);
+                                if (IS_ADDRESS(bitmap->bp[page].zoneinfo)) {
+                                    /* somebody else made it first, backout */
+                                        need_new_zoneinfo = 0;
+                                } else {
+                                        bitmap->bp[page].zoneinfo = tmp;
+                                        bitmap->missing_zones--;
+                                }
+                                write_unlock(&bitmap->lock);
+
+                                if (!need_new_zoneinfo)
+                                        kfree(tmp);
+                        }
+                }
+		return -ENOMEM;
+        } 
+
+        /* got a page */
+
+        write_lock(&bitmap->lock);
+
+        /* recheck the page */
+
+        if (IS_ADDRESS(bitmap->bp[page].map)) {
+                /* somebody beat us to getting the page */
+                write_unlock(&bitmap->lock);
+                bitmap_free_page(bitmap, mappage);
+                return 0;
+        }
+
+        /* no page in place and we have one, so maybe install it */
+
+        if (bitmap->bp[page].count != 0) {
+                /* inpage bitmap - can't replace until no pending writes */
+                write_unlock(&bitmap->lock);
+                bitmap_free_page(bitmap, mappage);
+                return -EINVAL;
+        }
+
+        /* good case - we get to make a new page */
+        memset(mappage, 0, PAGE_SIZE);
+        bitmap->bp[page].map = mappage;
+        if (IS_ADDRESS(bitmap->bp[page].zoneinfo)) {
+                memset(bitmap->bp[page].zoneinfo, 0,
+                        sizeof(*bitmap->bp->zoneinfo) << ZONESHIFT);
+        }
+        bitmap->missing_pages--;
+        write_unlock(&bitmap->lock);
+	return 0;
+
+}
+
+/* 
+ * offset8 is the BYTE offset, not the bit offset
+ * We call this routine under lock.
+ */
+static int
+bitmap_clear_mask8 (struct bitmap *bitmap, bitmap_offset_t offset8,
+		  unsigned char mask, unsigned char **this_page)
+{
+
+        unsigned long page  ;
+        unsigned long pageoff;
+
+        unsigned char oldmask;
+        unsigned char newmask;
+        unsigned char diffmask;
+	
+        page    = offset8 >> PAGE_SHIFT;
+
+        if (!IS_ADDRESS(bitmap->bp[page].map)) {
+                int bits = hweight8(mask);
+                bitmap->bp[page].count -= bits;
+                if (IS_ADDRESS(bitmap->bp[page].zoneinfo)) {
+                        int zoneoffset = (offset8  >> (PAGE_SHIFT - ZONESHIFT));
+                        int zone = zoneoffset & ((1<<ZONESHIFT) - 1);
+                        bitmap->bp[page].zoneinfo[zone] -= bits;
+                }
+                return -EINVAL;
+        }
+
+        pageoff = offset8 & ~PAGE_MASK;
+
+	oldmask = bitmap->bp[page].map[pageoff];
+	newmask = oldmask & ~mask;
+	diffmask = newmask ^ oldmask;
+
+	if (diffmask) {
+	        unsigned bits = hweight8 (diffmask);
+                int newcount = (bitmap->bp[page].count -= bits);
+
+		bitmap->bp[page].map[pageoff] = newmask;
+
+                /* most frequent case is a +ve result and return */
+                if (newcount > 0)
+                        return 0;
+                /* negative count is a major misaccounting */
+                if (newcount < 0) {
+                        printk(KERN_WARNING "bitmap: dirty count %d on page %lu\n",
+                                newcount, page);
+                        return 0;
+                }
+                /* newcount == 0 is when we want to detach the page */
+                *this_page = bitmap->bp[page].map;
+                bitmap->bp[page].map = NULL;
+                bitmap->missing_pages++;
+                return 0;
+	}
+        return 0;
+}
+
+static int
+bitmap_clear_mask (struct bitmap *bitmap, bitmap_offset_t offset, unsigned char mask)
+{
+
+	unsigned long blkgrp;
+	unsigned char blkoff;
+        unsigned long page  ;
+        unsigned long pageoff;
+        int errs = 0;
+        unsigned char * free_page0 = NULL, *free_page1 = NULL;
+
+	blkgrp = offset >> 3;
+        page   = blkgrp >> PAGE_SHIFT;
+
+	blkoff = offset & 7;
+        pageoff= blkgrp & ~PAGE_MASK;
+
+        write_lock(&bitmap->lock);
+
+	if (blkoff) {
+		unsigned char maskdiv = 0xff & (mask << blkoff);
+		unsigned char maskrem = 0xff & (mask >> (8 - blkoff));
+
+                if (maskdiv) {
+                        if (bitmap_clear_mask8 (bitmap, blkgrp, maskdiv, &free_page0) < 0)
+                                errs++;
+                }
+		if (!maskrem) {
+                        goto out;
+                }
+                if (pageoff + 1 < PAGE_SIZE) {
+
+                        if (bitmap_clear_mask8 (bitmap, blkgrp + 1, maskrem, &free_page1) < 0)
+                                errs++;
+                        goto out;
+                } 
+
+                if (bitmap_clear_mask8 (bitmap, blkgrp + 1, maskrem, &free_page1) < 0)
+                        errs++;
+                goto out;
+	}
+
+        // normal situation. Offset is multiple of 8
+	
+        if (bitmap_clear_mask8 (bitmap, blkgrp, mask, &free_page0) < 0)
+                errs++;
+out:
+        write_unlock(&bitmap->lock);
+        if (free_page0)
+                bitmap_free_page(bitmap, free_page0);
+        if (free_page1)
+                bitmap_free_page(bitmap, free_page1);
+        return errs > 0 ? -EINVAL : 0;
+}
+
+
+/* 
+ * offset8 is the BYTE offset, not the bit offset.
+ * We call this routine under lock.
+ */
+static int
+bitmap_set_mask8 (struct bitmap *bitmap, bitmap_offset_t offset8,
+		  unsigned char mask)
+{
+
+        unsigned long page  ;
+        unsigned long pageoff;
+
+        unsigned char oldmask;
+        unsigned char newmask;
+        unsigned char diffmask;
+	
+        page    = offset8 >> PAGE_SHIFT;
+        pageoff = offset8 & ~PAGE_MASK;
+
+        if (!IS_ADDRESS(bitmap->bp[page].map)) {
+                int bits = hweight8(mask);
+                bitmap->bp[page].count += bits;
+                /* mark the zone instead - we have 16 low bits markable */
+                if (IS_ADDRESS(bitmap->bp[page].zoneinfo)) {
+                        int zoneoffset = (offset8  >> (PAGE_SHIFT - ZONESHIFT));
+                        int zone = zoneoffset & ((1 << ZONESHIFT) - 1);
+                        bitmap->bp[page].zoneinfo[zone] += bits;
+                }
+                return -EINVAL;
+        }
+
+	oldmask = bitmap->bp[page].map[pageoff];
+	newmask = oldmask | mask;
+	diffmask = newmask ^ oldmask;
+
+	if (diffmask) {
+                unsigned int bits = hweight8 (diffmask);
+		bitmap->bp[page].map[pageoff] = newmask;
+		bitmap->bp[page].count += bits;
+	}
+        return 0;
+}
+
+/*
+ * here offset is the BIT offset
+ */
+static int
+bitmap_set_mask (struct bitmap *bitmap, bitmap_offset_t offset, unsigned char mask)
+{
+
+	unsigned long blkgrp;
+	unsigned char blkoff;
+        unsigned long page  ;
+        unsigned long pageoff;
+        int errs = 0;
+
+	blkgrp = offset >> 3;
+        page   = blkgrp >> PAGE_SHIFT;
+
+        if (bitmap_checkpage(bitmap, page) < 0)
+                errs++;
+
+        /* the page may or may not have been made */
+
+        write_lock(&bitmap->lock);
+	blkoff = offset & 7;
+        pageoff= blkgrp & ~PAGE_MASK;
+
+	if (blkoff) {
+		unsigned char maskdiv = 0xff & (mask << blkoff);
+		unsigned char maskrem = 0xff & (mask >> (8 - blkoff));
+
+                if (maskdiv) {
+                        if (bitmap_set_mask8(bitmap, blkgrp, maskdiv) < 0)
+                                errs++;
+                }
+
+		if (!maskrem) {
+                        write_unlock(&bitmap->lock);
+                        return errs > 0 ? -EINVAL : 0 ;
+                }
+
+                if (pageoff + 1 < PAGE_SIZE) {
+
+                        if (bitmap_set_mask8(bitmap, blkgrp + 1, maskrem) < 0)
+                                errs++;
+                        write_unlock(&bitmap->lock);
+                        return errs > 0 ? -EINVAL : 0 ;
+                } 
+                write_unlock(&bitmap->lock);
+                if (bitmap_checkpage(bitmap, page+1) < 0)
+                        errs++;
+
+                write_lock(&bitmap->lock);
+
+                if (bitmap_set_mask8(bitmap, blkgrp + 1, maskrem) < 0)
+                        errs++;
+                        
+                write_unlock(&bitmap->lock);
+                return errs > 0 ? -EINVAL : 0 ;
+	}
+
+        // normal situation. Offset is multiple of 8
+	
+        if (bitmap_set_mask8(bitmap, blkgrp, mask) < 0)
+                errs++;
+
+        write_unlock(&bitmap->lock);
+        return errs > 0 ? -EINVAL : 0;
+}
+
+/* 
+ * offset8 is the BYTE offset, not the bit offset.
+ * We call this routine under lock.
+ */
+static int
+bitmap_test_mask8 (struct bitmap *bitmap, bitmap_offset_t offset8,
+		  unsigned char mask)
+{
+        unsigned long page  ;
+        unsigned long pageoff;
+
+        if (!mask)
+                return 0;
+	
+        page    = offset8 >> PAGE_SHIFT;
+        pageoff = offset8 & ~PAGE_MASK;
+
+        if (!IS_ADDRESS(bitmap->bp[page].map)) {
+                /* look at zone instead - we have 16 low bits markable */
+                if (IS_ADDRESS(bitmap->bp[page].zoneinfo)) {
+                        int zoneoffset = (offset8  >> (PAGE_SHIFT - ZONESHIFT));
+                        int zone = zoneoffset & ((1 << ZONESHIFT) - 1);
+                        if (bitmap->bp[page].zoneinfo[zone] > 0)
+                                return 1;
+                        return 0;
+                }
+                /* try the count */
+                if (bitmap->bp[page].count > 0)
+                        return 1;
+                return 0;
+        }
+
+	return (bitmap->bp[page].map[pageoff] & mask) != 0;
+}
+
+/*
+ * here offset is the BIT offset
+ */
+static int
+bitmap_test_mask (struct bitmap *bitmap, bitmap_offset_t offset, unsigned char mask)
+{
+
+	unsigned long blkgrp;
+	unsigned char blkoff;
+        unsigned long page  ;
+        unsigned long pageoff;
+        int err;
+
+	blkgrp = offset >> 3;
+        page   = blkgrp >> PAGE_SHIFT;
+
+        if (bitmap_checkpage(bitmap, page) < 0)
+                ; // ignore. We'll react below
+
+        /* the page may or may not have been made */
+
+        write_lock(&bitmap->lock);
+	blkoff = offset & 7;
+        pageoff= blkgrp & ~PAGE_MASK;
+
+	if (blkoff) {
+		unsigned char maskdiv = 0xff & (mask << blkoff);
+		unsigned char maskrem = 0xff & (mask >> (8 - blkoff));
+
+                if (maskdiv) {
+                        if (err = bitmap_test_mask8(bitmap, blkgrp, maskdiv), err) {
+                                write_unlock(&bitmap->lock);
+                                return err;
+                        }
+                }
+
+		if (!maskrem) {
+                        write_unlock(&bitmap->lock);
+                        return 0 ;
+                }
+
+                if (pageoff + 1 < PAGE_SIZE) {
+
+                        if (err = bitmap_test_mask8(bitmap, blkgrp + 1, maskrem), err) {
+                                write_unlock(&bitmap->lock);
+                                return err;
+                        }
+                        write_unlock(&bitmap->lock);
+                        return  0 ;
+                } 
+                write_unlock(&bitmap->lock);
+                if (err = bitmap_checkpage(bitmap, page+1), err < 0)
+                        ; // ignore errors
+ 
+                write_lock(&bitmap->lock);
+
+                if (err = bitmap_test_mask8(bitmap, blkgrp + 1, maskrem), err) {
+                        write_unlock(&bitmap->lock);
+                        return err;
+                }
+                        
+                write_unlock(&bitmap->lock);
+                return 0 ;
+	}
+
+        // normal situation. Offset is multiple of 8
+	
+        if (err = bitmap_test_mask8(bitmap, blkgrp, mask), err) {
+                write_unlock(&bitmap->lock);
+                return err ;
+        }
+
+        write_unlock(&bitmap->lock);
+        return 0;
+}
+ 
+static int
+bitmap_clear_bits (struct bitmap *bitmap, bitmap_offset_t offset, unsigned long bits)
+{
+
+        unsigned char mask;
+        int errs = 0;
+
+        while (offset + bits > (offset | 7)) {
+                int more = (offset | 7) + 1 - offset;
+                mask = (1 << more) - 1;
+                /* ignore errors and do what we can */
+                if (bitmap_clear_mask(bitmap, offset, mask) < 0) {
+                        errs++;
+                }
+                bits -= more;
+                offset |= 7;
+                offset++;
+        }
+
+        if (bits > 0) {
+                mask = (1 << bits) - 1;
+                /* ignore errors and do what we can */
+                if (bitmap_clear_mask(bitmap, offset, mask) < 0) {
+                        errs++;
+                }
+                bits = 0;
+                offset += bits;
+        }
+
+        return (errs > 0) ? -EINVAL : 0;
+}
+
+static int
+bitmap_set_bits (struct bitmap *bitmap, bitmap_offset_t offset, unsigned long bits)
+{
+        unsigned char mask;
+        int errs = 0;
+
+        while (offset + bits >= (offset | 7) + 1) {
+                int more = (offset | 7) + 1 - offset;
+                mask = (1 << more) - 1;
+                /* ignore errors and do what we can */
+                if (bitmap_set_mask(bitmap, offset, mask) < 0) {
+                        errs++;
+                }
+                bits -= more;
+                offset |= 7;
+                offset++;
+        }
+
+        if (bits > 0) {
+                mask = (1 << bits) - 1;
+                /* ignore errors and do what we can */
+                if (bitmap_set_mask(bitmap, offset, mask) < 0) {
+                        errs++;
+                }
+                bits = 0;
+                offset += bits;
+        }
+
+        return (errs > 0) ? -EINVAL : 0;
+}
+
+static int
+bitmap_test_bits (struct bitmap *bitmap, bitmap_offset_t offset, unsigned long bits)
+{
+        unsigned char mask;
+        int err;
+
+        while (offset + bits >= (offset | 7) + 1) {
+                int more = (offset | 7) + 1 - offset;
+                mask = (1 << more) - 1;
+                /* ignore errors and do what we can */
+                if (err = bitmap_test_mask(bitmap, offset, mask), err) {
+                        return err;
+                }
+                bits -= more;
+                offset |= 7;
+                offset++;
+        }
+
+        if (bits > 0) {
+                mask = (1 << bits) - 1;
+                /* ignore errors and do what we can */
+                if (err = bitmap_test_mask(bitmap, offset, mask), err) {
+                        return err;
+                }
+                bits = 0;
+                offset += bits;
+        }
+        return 0;
+}
+
+/*
+static int
+bitmap_test_bit (struct bitmap *bitmap, unsigned long block)
+{
+	unsigned long blkgrp;
+	unsigned char blkoff;
+        unsigned long page  ;
+        unsigned long pageoff;
+        int res;
+
+	blkgrp = block >> 3;
+        page   = blkgrp >> PAGE_SHIFT;
+
+        read_lock(&bitmap->lock);
+        // high bits zero means no page address
+        if (!IS_ADDRESS(bitmap->bp[page].map)) {
+                int zoneoffset = (blkgrp  >> (PAGE_SHIFT - ZONESHIFT));
+                int zone = zoneoffset & ((1 << ZONESHIFT) - 1);
+                // use the counter instead - this is zoned
+                res = (bitmap->bp[page].count > 0);
+                if (res && IS_ADDRESS(bitmap->bp[page].zoneinfo))
+                        res = (bitmap->bp[page].zoneinfo[zone] > 0);
+                read_unlock(&bitmap->lock);
+                return res;
+        }
+
+	blkoff = block & 7;
+        pageoff= blkgrp & ~PAGE_MASK;
+
+	res = test_bit ((pageoff << 3) + blkoff, bitmap->bp[page].map) != 0;
+        read_unlock(&bitmap->lock);
+        return res;
+}
+*/
+
+
+int
+bitmap_init(struct bitmap * bitmap, bitmap_offset_t bits) {
+
+        int pages = (bits + (PAGE_SIZE * 8 - 1)) / (PAGE_SIZE * 8);
+
+        memset(bitmap, 0, sizeof(*bitmap));
+	rwlock_init (&bitmap->lock);
+        write_lock(&bitmap->lock);
+
+        bitmap->start = bitmap_start;
+        bitmap->stop = bitmap_stop;
+        bitmap->testbits = bitmap_test_bits;
+        bitmap->setbits = bitmap_set_bits;
+        bitmap->clearbits = bitmap_clear_bits;
+        bitmap->active = bitmap_active;
+
+        /* now do 1st level init stuff */
+        if (pages < 0) {
+                write_unlock(&bitmap->lock);
+                printk(KERN_WARNING "bitmap: initialised for -ve number of pages (%d)!\n",
+                        pages);
+                return -EINVAL;
+        }
+        bitmap->pages = pages;
+        bitmap->missing_zones = pages;
+        bitmap->missing_pages = pages;
+        list_add(&bitmap->list, &bitmap_list);
+        write_unlock(&bitmap->lock);
+        return 0;
+}
+
+static void
+bitmap_clear_page(void *data, kmem_cache_t *cache, unsigned long flags) {
+
+        if (! (flags & SLAB_CTOR_CONSTRUCTOR))
+                return;
+        if (!data)
+                return;
+        memset(data, 0, PAGE_SIZE);
+}
+
+int
+bitmap_init_list(void) {
+        INIT_LIST_HEAD(&bitmap_list);
+        return 0;
+}
+
+int
+bitmap_init_page_cache(void) {
+        
+        bitmap_page_cache =
+            kmem_cache_create("bitmap_page", PAGE_SIZE, 0, 0, bitmap_clear_page, NULL);
+        if (!bitmap_page_cache)
+                return -ENOMEM;
+        return 0;
+}
+
+#ifdef MODULE
+void cleanup_module(void)
+{
+        struct bitmap *bitmap, *tbm;
+        list_for_each_entry_safe(bitmap, tbm, &bitmap_list, list)  {
+            printk(KERN_WARNING "bitmap: destroyed leftover bitmap %p in cleanup.\n", bitmap);
+            bitmap_destr(bitmap);
+        }
+        INIT_LIST_HEAD(&bitmap_list);
+        if (!bitmap_page_cache)
+                return;
+        kmem_cache_destroy(bitmap_page_cache);
+        bitmap_page_cache = NULL;
+}
+
+int init_module(void)
+{
+        return bitmap_init_list() || bitmap_init_page_cache();
+}
+
+  MODULE_AUTHOR ("Peter T. Breuer");
+  MODULE_DESCRIPTION ("Bitmap support");
+  MODULE_LICENSE("GPL");
+  int linux_version_code = LINUX_VERSION_CODE;
+#else           /* MODULE */
+__initcall(bitmap_init_list);
+__initcall(bitmap_init_page_cache);
+#endif          /* MODULE */
+
+/* Compile line:
+ *
+ *  gcc -O2 -D__KERNEL__ -DMODULE -c bitmap.c -o bitmap.o
+ *
+ */
+                      
--- linux-2.4.30/drivers/md/bitmap.h.pre-fr1	Wed Apr  6 18:18:04 2005
+++ linux-2.4.30/drivers/md/bitmap.h	Wed Apr  6 18:18:04 2005
@@ -0,0 +1,57 @@
+#ifndef BITMAP_H
+#define BITMAP_H 1
+
+typedef __s64 bitmap_offset_t;
+
+struct bitmap_page {
+        /*
+         * If a page is missing then we use a per
+         * page pending write count instead. pages is the number of
+         * 4k pages in the map.
+         */ 
+        char * map;
+        /*
+         * more precise count per zone (1/16 page), for emergencies.
+         */
+        short *zoneinfo;
+        /*
+         * count of dirty bits on the page
+         */ 
+        unsigned short  count;
+};
+
+struct bitmap {
+	struct bitmap_page * bp;
+	unsigned long pages;
+
+	int (*start) (struct bitmap * bitmap, __u64 events);
+	void (*stop) (struct bitmap * bitmap);
+	int (*testbits) (struct bitmap * bitmap, bitmap_offset_t shift, unsigned long nbits);
+	int (*setbits) (struct bitmap * bitmap, bitmap_offset_t shift, unsigned long nbits);
+	int (*clearbits) (struct bitmap * bitmap, bitmap_offset_t shift, unsigned long nbits);
+	int (*active) (struct bitmap * bitmap);
+
+        /* bitmap spinlock */
+	rwlock_t lock;
+
+#define BITMAP_ACTIVE 0x01
+        unsigned long flags;
+
+        /*
+         * events count at startup of the bitmap
+         */
+        __u64 events;
+
+        /*
+         * number of missing zoneinfo sections
+         */
+        unsigned long missing_zones;
+        unsigned long missing_pages;
+        struct list_head list;
+};
+
+
+int bitmap_init(struct bitmap * bitmap, bitmap_offset_t bits);
+void bitmap_destr(struct bitmap * bitmap);
+
+#endif
--- linux-2.4.30/drivers/md/Makefile.pre-fr1	Sun Nov 11 19:09:32 2001
+++ linux-2.4.30/drivers/md/Makefile	Wed Apr  6 18:18:04 2005
@@ -7,6 +7,7 @@
 export-objs	:= md.o xor.o
 list-multi	:= lvm-mod.o
 lvm-mod-objs	:= lvm.o lvm-snap.o lvm-fs.o
+fr1-objs	:= raid1.o
 
 # Note: link order is important.  All raid personalities
 # and xor.o must come before md.o, as they each initialise 
@@ -17,6 +18,8 @@
 obj-$(CONFIG_MD_RAID0)		+= raid0.o
 obj-$(CONFIG_MD_RAID1)		+= raid1.o
 obj-$(CONFIG_MD_RAID5)		+= raid5.o xor.o
+obj-$(CONFIG_MD_BITMAP)		+= bitmap.o
+obj-$(CONFIG_MD_FR1)		+= fr1.o
 obj-$(CONFIG_MD_MULTIPATH)	+= multipath.o
 obj-$(CONFIG_BLK_DEV_MD)	+= md.o
 obj-$(CONFIG_BLK_DEV_LVM)	+= lvm-mod.o
@@ -25,3 +28,9 @@
 
 lvm-mod.o: $(lvm-mod-objs)
 	$(LD) -r -o $@ $(lvm-mod-objs)
+
+fr1.o: $(fr1-objs)
+	$(LD) -r -o $@ $(fr1-objs)
+
+clean:
+	rm -f *.o .*.sw?
--- linux-2.4.30/drivers/md/Config.in.pre-fr1	Fri Sep 14 23:22:18 2001
+++ linux-2.4.30/drivers/md/Config.in	Wed Apr  6 18:18:04 2005
@@ -10,7 +10,14 @@
 dep_tristate '  Linear (append) mode' CONFIG_MD_LINEAR $CONFIG_BLK_DEV_MD
 dep_tristate '  RAID-0 (striping) mode' CONFIG_MD_RAID0 $CONFIG_BLK_DEV_MD
 dep_tristate '  RAID-1 (mirroring) mode' CONFIG_MD_RAID1 $CONFIG_BLK_DEV_MD
+if [ "$CONFIG_MD_RAID1" != "n" ]; then
+  dep_bool     '   RAID-1 robust read protocol' CONFIG_MD_RAID1_ROBUST_READ $CONFIG_BLK_DEV_MD
+fi
 dep_tristate '  RAID-4/RAID-5 mode' CONFIG_MD_RAID5 $CONFIG_BLK_DEV_MD
+dep_tristate '  Bitmap support for fast raid' CONFIG_MD_BITMAP $CONFIG_BLK_DEV_MD
+if [ "$CONFIG_MD_RAID1" != "y" ]; then
+  dep_tristate '  FR-1 (fast intelligent mirroring) mode' CONFIG_MD_FR1 $CONFIG__MD_BITMAP
+fi
 dep_tristate '  Multipath I/O support' CONFIG_MD_MULTIPATH $CONFIG_BLK_DEV_MD
 
 dep_tristate ' Logical volume manager (LVM) support' CONFIG_BLK_DEV_LVM $CONFIG_MD
--- linux-2.4.30/Documentation/Configure.help.pre-fr1	Mon Apr  4 03:42:19 2005
+++ linux-2.4.30/Documentation/Configure.help	Wed Apr  6 18:18:04 2005
@@ -2050,6 +2050,53 @@
 
   If unsure, say Y.
 
+FAST RAID-1 (mirroring) mode
+CONFIG_MD_FR1
+  This driver offers a faster software RAID-1 performance when
+  resynchronizing disks and reading, offers asynchronous writes, and has
+  various optimizations designed to automate administration.
+
+  Information about Software RAID on Linux is contained in the
+  Software-RAID mini-HOWTO, available from
+  <http://www.tldp.org/docs.html#howto>.  There you will also
+  learn where to get the supporting user space utilities raidtools.
+
+  If you want to use a FR-1 array, say Y. This code is also
+  available as a module called fr1.o ( = code which can be inserted
+  in and removed from the running kernel whenever you want).  If you
+  want to compile it as a module, say M here and read
+  <file:Documentation/modules.txt>. You cannot compile both this and
+  RAID1 into the kernel, so you may prefer to say M.
+
+  If unsure, say Y.
+
+FAST RAID-1 bitmap support
+CONFIG_MD_BITMAP
+  This driver provides the needed bitmap support for the Fast RAID-1
+  module FR1.
+
+  Information about Software RAID on Linux is contained in the
+  Software-RAID mini-HOWTO, available from
+  <http://www.tldp.org/docs.html#howto>.  There you will also
+  learn where to get the supporting user space utilities raidtools.
+
+  If you want to use Fast RAID-1, say Y. This code is also
+  available as a module called bitmap.o ( = code which can be inserted
+  in and removed from the running kernel whenever you want).  If you
+  want to compile it as a module, say M here and read
+  <file:Documentation/modules.txt>. You cannot compile both this and
+  FR1 into the kernel, so you may prefer to say M.
+
+  If unsure, say Y.
+
+Robust RAID-1 read protocol
+CONFIG_MD_RAID1_ROBUST_READ
+  This modifcation changes the RAID-1 read protocol to withstand read
+  errors without ejecting the disk from the array. The read is retried
+  from another mirror instead.
+
+  If unsure, say N.
+
 RAID-4/RAID-5 mode
 CONFIG_MD_RAID5
   A RAID-5 set of N drives with a capacity of C MB per drive provides

