--- linux-2.4/drivers/md/raid1.c.ORIG	2005-07-03 02:03:29.483821024 +0200
+++ linux-2.4/drivers/md/raid1.c	2005-07-03 02:51:03.211090929 +0200
@@ -20,6 +20,28 @@
  * You should have received a copy of the GNU General Public License
  * (for example /usr/src/linux/COPYING); if not, write to the Free
  * Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ *
+ * Changes by Peter T. Breuer <ptb@it.uc3m.es> 31/1/2003 to support
+ * bitmapped intelligence in resync:
+ *
+ *      - bitmap attached on setfaulty (mark bad)
+ *      - bitmap marked during normal i/o if faulty disk
+ *      - bitmap used to skip nondirty blocks during sync
+ *      - bitmap removed on set active
+ *
+ *   Minor changes are needed in raid1.h (extra fields in conf) and in
+ *   md.c (support hotadd directly after setfaulty, or disk recognition).
+ *
+ * More changes by PTB 20/2/2003 to let the bitmap always be present and
+ * thus allow asynchronous mirror writes by using it as a journal log.
+ *
+ * Changes by PTB 10/8/2004 to redo read-balancing so that it reads
+ * from the fastest disk, as determined by latency testing every so
+ * often.
+ *
+ * Changes by PTB 6/1/2005 to make read errors not fault the disk out
+ * of the array but cause retries instead. And also (with CORRECT set)
+ * trigger rewrite of the bad sector.
  */
 
 #include <linux/module.h>
@@ -32,7 +54,15 @@
 #define MD_DRIVER
 #define MD_PERSONALITY
 
+#if defined(CONFIG_MD_FR1) || defined(CONFIG_MD_FR1_MODULE)
+#define MAX_WORK_PER_DISK (128 * 8)
+#define MAX_TEST_PER_DISK 64
+#define LATENCY_OLD_WEIGHT 9
+#define LATENCY_NEW_WEIGHT 1
+#define LATENCY_SUM_WEIGHT (LATENCY_OLD_WEIGHT + LATENCY_NEW_WEIGHT)
+#else
 #define MAX_WORK_PER_DISK 128
+#endif /* CONFIG_MD_FR1 */
 
 #define	NR_RESERVED_BUFS	32
 
@@ -50,11 +80,19 @@
 #define PRINTK(x...)  do { } while (0)
 #endif
 
+#if defined(CONFIG_MD_FR1) || defined(CONFIG_MD_FR1_MODULE)
+#include "bitmap.h"
+#endif /* CONFIG_MD_FR1 */
 
 static mdk_personality_t raid1_personality;
 static md_spinlock_t retry_list_lock = MD_SPIN_LOCK_UNLOCKED;
 struct raid1_bh *raid1_retry_list = NULL, **raid1_retry_tail;
 
+#if defined(CONFIG_MD_FR1) || defined(CONFIG_MD_FR1_MODULE)
+/* module params */
+static int async;   /* async writes */
+#endif /* CONFIG_MD_FR1 */
+
 static struct buffer_head *raid1_alloc_bh(raid1_conf_t *conf, int cnt)
 {
 	/* return a linked list of "cnt" struct buffer_heads.
@@ -325,6 +363,9 @@ static int raid1_map (mddev_t *mddev, kd
 {
 	raid1_conf_t *conf = mddev_to_conf(mddev);
 	int i, disks = MD_SB_DISKS;
+#ifdef CONFIG_MD_RAID1_ROBUST_READ
+        kdev_t dev = *rdev;
+#endif /* CONFIG_MD_RAID1_READ_WRITE_CORRECT */
 	unsigned long flags;
 
 	/*
@@ -332,6 +373,30 @@ static int raid1_map (mddev_t *mddev, kd
 	 * now we use the first available disk.
 	 */
 
+#ifdef CONFIG_MD_RAID1_ROBUST_READ
+        /*
+         * Uh, no. Choose the next disk if we can, not the first.
+         */
+	md_spin_lock_irqsave(&conf->device_lock, flags);
+	for (i = 0; i < conf->raid_disks; i++) {
+		if (conf->mirrors[i].dev == dev)
+                    	break;
+        }
+        i++;
+	if (i >= conf->raid_disks)
+		i = 0;
+	for (; i < conf->raid_disks; i++) {
+		if (conf->mirrors[i].operational) {
+			*rdev = conf->mirrors[i].dev;
+			return (0);
+		}
+        }
+	md_spin_unlock_irqrestore(&conf->device_lock, flags);
+        /*
+         * If for some reason we found nothing, dropthru and use the old
+         * routine.
+         */
+#endif /* CONFIG_MD_RAID1_READ_WRITE_CORRECT */
 	md_spin_lock_irqsave(&conf->device_lock, flags);
 	for (i = 0; i < disks; i++) {
 		if (conf->mirrors[i].operational) {
@@ -400,13 +465,65 @@ static void inline sync_request_done (un
 static void raid1_end_bh_io (struct raid1_bh *r1_bh, int uptodate)
 {
 	struct buffer_head *bh = r1_bh->master_bh;
+#if defined(CONFIG_MD_FR1) || defined(CONFIG_MD_FR1_MODULE)
+	raid1_conf_t * conf = mddev_to_conf(r1_bh->mddev);
+
+	/* if nobody has done the final end_io yet, do it now */
+	if (!test_and_set_bit(R1BH_AsyncPhase, &r1_bh->state)) {
+
+		PRINTK(KERN_DEBUG "raid1: sync end i/o on sectors %lu-%lu\n",
+			bh->b_rsector, bh->b_rsector + (bh->b_size >> 9) - 1);
 
+		io_request_done(bh->b_rsector, conf,
+			test_bit(R1BH_SyncPhase, &r1_bh->state));
+		bh->b_end_io(bh, uptodate);
+	}
+#else
 	io_request_done(bh->b_rsector, mddev_to_conf(r1_bh->mddev),
 			test_bit(R1BH_SyncPhase, &r1_bh->state));
 
 	bh->b_end_io(bh, uptodate);
+#endif /* CONFIG_MD_FR1 */
+
+#if defined(CONFIG_MD_FR1) || defined(CONFIG_MD_FR1_MODULE)
+	/* if we should mark the bitmap clean, do so */
+	if (uptodate && r1_bh->cmd == WRITE && r1_bh->nonoperational <= 0) {
+	struct bitmap * bitmap = conf->bitmap;
+		if (bitmap && bitmap->active(bitmap)) {
+			bitmap->clearbits(bitmap, (bitmap_offset_t) (bh->b_rsector >> 1), bh->b_size >> 10);
+		}
+	}
+	/* PTB calculate the latency of the read device */
+	if (uptodate && (r1_bh->cmd == READ || r1_bh->cmd == READA)) {
+		unsigned long latency = jiffies - r1_bh->start_jiffies;
+		kdev_t dev = (&r1_bh->bh_req)->b_dev;
+		int i;
+
+		/* PTB find the mirror component being read */
+		for (i = 0; i < conf->raid_disks; i++) {
+			if (conf->mirrors[i].dev == dev)
+			break;
+		}
+		if (i < conf->raid_disks) {
+			if (latency < 120 * HZ && latency >= 0) {
+				/* PTB count in 1/10ths if we have total weights 9+1 = 10 */
+				latency *= LATENCY_SUM_WEIGHT * LATENCY_SUM_WEIGHT;
+				conf->latency[i] = LATENCY_OLD_WEIGHT * conf->latency[i]
+						+ LATENCY_NEW_WEIGHT * latency;
+				conf->latency[i] /= LATENCY_SUM_WEIGHT;
+			} else {
+				printk(KERN_ERR "raid1: bad latency %lu jiffies\n", 
+				latency);
+			}
+		} else {
+			printk(KERN_ERR "raid1: could not find dev %02x:%02x\n", 
+			MAJOR(dev), MINOR(dev));
+		}
+	}
+#endif /* CONFIG_MD_FR1 */
 	raid1_free_r1bh(r1_bh);
 }
+
 void raid1_end_request (struct buffer_head *bh, int uptodate)
 {
 	struct raid1_bh * r1_bh = (struct raid1_bh *)(bh->b_private);
@@ -414,9 +531,27 @@ void raid1_end_request (struct buffer_he
 	/*
 	 * this branch is our 'one mirror IO has finished' event handler:
 	 */
-	if (!uptodate)
-		md_error (r1_bh->mddev, bh->b_dev);
-	else
+	if (!uptodate) {
+#ifdef CONFIG_MD_RAID1_ROBUST_READ
+		/*
+		 * Only fault disk out of array on write error, not read.
+		 */
+                if (r1_bh->cmd == WRITE)
+                       	if (printk(KERN_ALERT
+                          "raid1: erroring bh WRITE for sector %ld\n",
+                                  bh->b_rsector), 1)
+#endif /* CONFIG_MD_RAID1_ROBUST_READ */
+			md_error (r1_bh->mddev, bh->b_dev);
+#ifdef CONFIG_MD_RAID1_READ_WRITE_CORRECT
+                } else {  /* tell next time we're here that we're a retry */
+                       	printk(KERN_ALERT
+                          "raid1: set retry bit on bh READ for sector %ld\n",
+                                  bh->b_rsector);
+			set_bit(R1BH_ReadRetry, &r1_bh->state);
+                }
+#endif /* CONFIG_MD_RAID1_READ_WRITE_CORRECT */
+
+        } else
 		/*
 		 * Set R1BH_Uptodate in our master buffer_head, so that
 		 * we will return a good error code for to the higher
@@ -438,7 +573,21 @@ void raid1_end_request (struct buffer_he
 		 * we have only one buffer_head on the read side
 		 */
 		
-		if (uptodate) {
+               if (uptodate
+#ifdef CONFIG_MD_RAID1_ROBUST_READ
+			/* Give up and error if we're last */
+			|| atomic_dec_and_test(&r1_bh->remaining)
+#endif /* CONFIG_MD_RAID1_ROBUST_READ */
+			) {
+#ifdef CONFIG_MD_RAID1_READ_WRITE_CORRECT
+			if (uptodate && test_bit(R1BH_ReadRewrite, &r1_bh->state)) {
+				/* Success at last - rewrite failed reads */
+                                r1_bh->cmd = SPECIAL;
+				raid1_reschedule_retry(r1_bh);
+                                return;
+			} else
+#endif /* CONFIG_MD_RAID1_READ_WRITE_CORRECT */
+
 			raid1_end_bh_io(r1_bh, uptodate);
 			return;
 		}
@@ -447,6 +596,13 @@ void raid1_end_request (struct buffer_he
 		 */
 		printk(KERN_ERR "raid1: %s: rescheduling block %lu\n", 
 			 partition_name(bh->b_dev), bh->b_blocknr);
+#ifdef CONFIG_MD_RAID1_ROBUST_READ
+               /*
+                * if not uptodate and not the last possible try,
+                * bh will be rescheduled and repointed while on the
+                * queue, by raid1_map.
+                */
+#endif /* CONFIG_MD_RAID1_ROBUST_READ */
 		raid1_reschedule_retry(r1_bh);
 		return;
 	}
@@ -456,10 +612,39 @@ void raid1_end_request (struct buffer_he
 	 *
 	 * Let's see if all mirrored write operations have finished 
 	 * already.
+         *
+         * In any case, do the end io early on the master bh if we are
+         * uptodate, and AsyncIO is set on the bh. We set AsyncPhase
+         * when this happens, so we don't do it twice, inadvertently.
 	 */
+		
+#if defined(CONFIG_MD_FR1) || defined(CONFIG_MD_FR1_MODULE)
+        if (uptodate
+        &&  test_bit(R1BH_AsyncIO, &r1_bh->state)
+        && !test_and_set_bit(R1BH_AsyncPhase, &r1_bh->state)) {
+
+	        struct buffer_head *mbh = r1_bh->master_bh;
 
-	if (atomic_dec_and_test(&r1_bh->remaining))
+	        raid1_conf_t *conf = mddev_to_conf(r1_bh->mddev);
+
+                PRINTK(KERN_DEBUG "raid1: async end i/o on sectors %lu-%lu\n",
+                        mbh->b_rsector, mbh->b_rsector + (mbh->b_size >> 9) - 1);
+
+	        io_request_done(mbh->b_rsector, conf,
+			test_bit(R1BH_SyncPhase, &r1_bh->state));
+	        mbh->b_end_io(mbh, uptodate);
+        }
+#endif /* CONFIG_MD_FR1 */
+
+	if (atomic_dec_and_test(&r1_bh->remaining)) {
+#if defined(CONFIG_MD_FR1) || defined(CONFIG_MD_FR1_MODULE)
+	        if (test_and_set_bit(R1BH_AsyncIO, &r1_bh->state)) {
+                        /* we made a copy for the buffer, remove it now */
+                        kfree(bh->b_data);
+                }
+#endif /* CONFIG_MD_FR1 */
 		raid1_end_bh_io(r1_bh, test_bit(R1BH_Uptodate, &r1_bh->state));
+        }
 }
 
 /*
@@ -520,7 +705,7 @@ static int raid1_read_balance (raid1_con
 	 * Don't touch anything for sequential reads.
 	 */
 
-	if (this_sector == conf->mirrors[new_disk].head_position)
+	if (0 && /* PTB */ this_sector == conf->mirrors[new_disk].head_position)
 		goto rb_out;
 	
 	/*
@@ -531,7 +716,16 @@ static int raid1_read_balance (raid1_con
 	 */
 	
 	if (conf->sect_count >= conf->mirrors[new_disk].sect_limit) {
-		conf->sect_count = 0;
+
+#if defined(CONFIG_MD_FR1) || defined(CONFIG_MD_FR1_MODULE)
+                PRINTK(KERN_INFO
+                  "raid1: disk %d latency %d abandoned after %d sectors\n",
+                  new_disk,
+                  conf->latency[new_disk],
+	          conf->sect_count);
+
+                /* PTB move on to run a short test on the next disk */
+#endif /* CONFIG_MD_FR1 */
 
 #if defined(CONFIG_SPARC64) && (__GNUC__ == 2) && (__GNUC_MINOR__ == 92)
 		/* Work around a compiler bug in egcs-2.92.11 19980921 */
@@ -546,6 +740,39 @@ static int raid1_read_balance (raid1_con
 		} while ((conf->mirrors[new_disk].write_only) ||
 			 (!conf->mirrors[new_disk].operational));
 
+#if defined(CONFIG_MD_FR1) || defined(CONFIG_MD_FR1_MODULE)
+                /* PTB if tested all, need to choose best */
+                if (new_disk == conf->last_source) {
+
+                        int fastest = -1;
+                        unsigned long best_latency = 0x7fffffff;
+                        int i;
+
+	                for (i = 0; i < conf->raid_disks; i++) {
+	                        if (conf->mirrors[i].write_only
+                                || !conf->mirrors[i].operational)
+                                        continue;
+                                if (conf->latency[i] <= best_latency) {
+                                    best_latency = conf->latency[i];
+                                    fastest = i;
+                                }
+                        }
+                        if (fastest >= 0)
+                                new_disk = fastest;
+	                conf->mirrors[new_disk].sect_limit = MAX_WORK_PER_DISK;
+                        conf->last_source = new_disk;
+                } else {
+                        /* PTB only a short test run */
+	                conf->mirrors[new_disk].sect_limit = MAX_TEST_PER_DISK;
+                }
+
+		conf->sect_count = 0;
+                PRINTK(KERN_DEBUG
+                  "raid1: choosing disk %d latency %d\n",
+                  new_disk,
+                  conf->latency[new_disk]);
+#endif /* CONFIG_MD_FR1 */
+
 		goto rb_out;
 	}
 	
@@ -596,6 +823,11 @@ static int raid1_make_request (mddev_t *
 	int disks = MD_SB_DISKS;
 	int i, sum_bhs = 0;
 	struct mirror_info *mirror;
+#if defined(CONFIG_MD_FR1) || defined(CONFIG_MD_FR1_MODULE)
+	int sum_nobhs = 0;
+	struct bitmap * bitmap = conf->bitmap;
+	char * async_data; // copy of buffer used for async writes
+#endif /* CONFIG_MD_FR1 */
 	kdev_t dev;
 
 	if (!buffer_locked(bh))
@@ -635,6 +867,10 @@ static int raid1_make_request (mddev_t *
 	r1_bh->master_bh = bh;
 	r1_bh->mddev = mddev;
 	r1_bh->cmd = rw;
+#if defined(CONFIG_MD_FR1) || defined(CONFIG_MD_FR1_MODULE)
+	r1_bh->start_jiffies = jiffies; /* PTB record start time */
+	async_data = NULL;
+#endif /* CONFIG_MD_FR1 */
 
 	if (rw == READ) {
 		/*
@@ -653,6 +889,20 @@ static int raid1_make_request (mddev_t *
 	/*	bh_req->b_rsector = bh->n_rsector; */
 		bh_req->b_end_io = raid1_end_request;
 		bh_req->b_private = r1_bh;
+#ifdef CONFIG_MD_RAID1_ROBUST_READ
+		atomic_set(&r1_bh->remaining, 0);
+		/* count target devices under spinlock */
+		md_spin_lock_irq(&conf->device_lock);
+		for (i = 0;  i < disks; i++) {
+	                if (!conf->mirrors[i].operational
+                        ||  !conf->mirrors[i].used_slot) {
+                                	continue;
+			} 
+			atomic_inc(&r1_bh->remaining);
+		}
+		md_spin_unlock_irq(&conf->device_lock);
+#endif /* CONFIG_MD_RAID1_ROBUST_READ */
+
 		generic_make_request (rw, bh_req);
 		return 0;
 	}
@@ -662,11 +912,65 @@ static int raid1_make_request (mddev_t *
 	 */
 
 	bhl = raid1_alloc_bh(conf, conf->raid_disks);
+
+#if defined(CONFIG_MD_FR1) || defined(CONFIG_MD_FR1_MODULE)
+        if (bitmap->active(bitmap)) {
+
+                int err = bitmap->setbits(bitmap, (bitmap_offset_t) (bh->b_rsector >> 1),
+                        bh->b_size >> 10);
+
+                /*
+                 * PTB Do async i/o if we marked the bitmap (so it's safe to)
+                 * and we are supposed to.
+                 */
+                if (async && err >= 0) {
+                        async_data = kmalloc(bh->b_size, GFP_KERNEL);
+                        if (async_data) {
+                                memcpy(async_data, bh->b_data, bh->b_size);
+                                set_bit(R1BH_AsyncIO, &r1_bh->state);
+                        }
+                }
+                /*
+                 * PTB Even if the async bit is not set then we STILL need to
+                 * balance the setbits above with a clearbits in the end_io 
+                 * whether setbits errored or not above. That's because
+                 * setbits errors if the bitmap page is not there and
+                 * then we can only count attempted writes in the bitmap,
+                 * not actual writes, so we have to balance that with
+                 * attempted clears. And we do. See the end_io.
+                 */
+        }
+#endif /* CONFIG_MD_FR1 */
 	spin_lock_irq(&conf->device_lock);
 	for (i = 0; i < disks; i++) {
 		struct buffer_head *mbh;
-		if (!conf->mirrors[i].operational) 
+		if (!conf->mirrors[i].operational) {
+
+#if defined(CONFIG_MD_FR1) || defined(CONFIG_MD_FR1_MODULE)
+                        struct bitmap * bitmap = conf->bitmap;
+
+                        if (!conf->mirrors[i].used_slot) {
+                                continue; 
+                        }
+
+                        /* notionally mark bitmap here */
+                        if (sum_nobhs++ <= 0) {
+                                PRINTK(KERN_DEBUG "raid1: mark mirror %d blk %lu-%lu\n",
+                                i, bh->b_rsector >> 1,
+                                (bh->b_rsector >> 1) + (bh->b_size >> 10) - 1);
+                        }
+
+                        if (!conf->bitmap_dirty && bitmap->active(bitmap)) {
+                                conf->bitmap_dirty = 1;
+                                MD_SB_EVENTS_LO(mddev->sb) =
+                                    mddev->sb->events_lo;
+                                MD_SB_EVENTS_HI(mddev->sb) =
+                                    mddev->sb->events_hi;
+                        }
+
+#endif /* CONFIG_MD_FR1 */
 			continue;
+                }
  
 	/*
 	 * We should use a private pool (size depending on NR_REQUEST),
@@ -703,6 +1007,10 @@ static int raid1_make_request (mddev_t *
  		mbh->b_size       = bh->b_size;
  		mbh->b_page	  = bh->b_page;
  		mbh->b_data	  = bh->b_data;
+#if defined(CONFIG_MD_FR1) || defined(CONFIG_MD_FR1_MODULE)
+ 		mbh->b_data       =
+                 test_bit(R1BH_AsyncIO, &r1_bh->state)? async_data : bh->b_data;
+#endif /* CONFIG_MD_FR1 */
  		mbh->b_list       = BUF_LOCKED;
  		mbh->b_end_io     = raid1_end_request;
  		mbh->b_private    = r1_bh;
@@ -719,6 +1027,9 @@ static int raid1_make_request (mddev_t *
 		return 0;
 	}
 	md_atomic_set(&r1_bh->remaining, sum_bhs);
+#if defined(CONFIG_MD_FR1) || defined(CONFIG_MD_FR1_MODULE)
+	r1_bh->nonoperational = sum_nobhs;
+#endif /* CONFIG_MD_FR1 */
 
 	/*
 	 * We have to be a bit careful about the semaphore above, thats
@@ -769,6 +1080,85 @@ static void raid1_status(struct seq_file
 #define ALREADY_SYNCING KERN_INFO \
 "raid1: syncing already in progress.\n"
 
+#if defined(CONFIG_MD_FR1) || defined(CONFIG_MD_FR1_MODULE)
+static int
+raid1_create_bitmap(mddev_t *mddev) {
+
+        struct bitmap * bitmap;
+        unsigned long blocks;
+	raid1_conf_t *conf = mddev_to_conf(mddev);
+
+        /* need size to have been set already */
+        blocks = mddev->sb->size << 1;
+
+        bitmap = kmalloc (sizeof (*bitmap), GFP_KERNEL);
+	if (!bitmap) {
+                printk(KERN_WARNING "raid1: out of memory for bitmap head\n");
+                return -ENOMEM;
+        }
+
+	if (bitmap_init (bitmap, blocks) < 0) {
+                printk(KERN_WARNING "raid1: failed to init bitmap\n");
+                kfree(bitmap);
+                return -ENOMEM;
+        }
+
+        /* take the spinlock for the ops on the configuration */
+	spin_lock_irq(&conf->segment_lock);
+        conf->bitmap = bitmap;
+        conf->bitmap_dirty = 0;
+	spin_unlock_irq(&conf->segment_lock);
+        return 0;
+}
+
+static void
+raid1_remove_bitmap (mddev_t *mddev) {
+
+	raid1_conf_t *conf = mddev_to_conf(mddev);
+        struct bitmap * bitmap;
+
+	spin_lock_irq(&conf->segment_lock);
+        bitmap = conf->bitmap;
+        if (!bitmap) {
+	        spin_unlock_irq(&conf->segment_lock);
+                return;
+        }
+        conf->bitmap = NULL;
+	spin_unlock_irq(&conf->segment_lock);
+
+        bitmap_destr(bitmap);
+        kfree(bitmap);
+}
+
+static int
+raid1_start_bitmap (mddev_t *mddev) {
+
+	raid1_conf_t *conf = mddev_to_conf(mddev);
+        struct bitmap * bitmap;
+
+	spin_lock_irq(&conf->segment_lock);
+        bitmap = conf->bitmap;
+	spin_unlock_irq(&conf->segment_lock);
+        if (!bitmap) {
+                return -EINVAL;
+        }
+
+        if (bitmap->active(bitmap)) {
+                printk(KERN_WARNING "raid1: bitmap %x already active!\n",
+                    (unsigned) bitmap);
+                return 0;
+        }
+	if (bitmap->start(bitmap, md_event(mddev->sb)) < 0) {
+                printk(KERN_WARNING "raid1: bitmap %x failed to start!\n",
+                    (unsigned) bitmap);
+                return -EINVAL;
+        }
+
+        PRINTK(KERN_DEBUG "raid1: made bitmap %x\n", (unsigned) bitmap);
+        return 0;
+}
+#endif /* CONFIG_MD_FR1 */
+
 static void mark_disk_bad (mddev_t *mddev, int failed)
 {
 	raid1_conf_t *conf = mddev_to_conf(mddev);
@@ -777,6 +1167,13 @@ static void mark_disk_bad (mddev_t *mdde
 
 	mirror->operational = 0;
 	mark_disk_faulty(sb->disks+mirror->number);
+#if defined(CONFIG_MD_FR1) || defined(CONFIG_MD_FR1_MODULE)
+        /*
+         * Activate the bitmap on a mirror just marked faulty (and
+         * nonoperational).
+         */
+	raid1_start_bitmap (mddev);
+#endif /* CONFIG_MD_FR1 */
 	mark_disk_nonsync(sb->disks+mirror->number);
 	mark_disk_inactive(sb->disks+mirror->number);
 	if (!mirror->write_only)
@@ -848,6 +1245,14 @@ static void print_raid1_conf (raid1_conf
 
 	for (i = 0; i < MD_SB_DISKS; i++) {
 		tmp = conf->mirrors + i;
+#if defined(CONFIG_MD_FR1) || defined(CONFIG_MD_FR1_MODULE)
+		/*
+		 * Remove repeats from debug printout.
+		 */
+		if (i > 0 && memcmp(tmp, &conf->mirrors[i-1], sizeof(*tmp)) == 0) {
+			continue;
+		}
+#endif /* CONFIG_MD_FR1 */
 		printk(" disk %d, s:%d, o:%d, n:%d rd:%d us:%d dev:%s\n",
 			i, tmp->spare,tmp->operational,
 			tmp->number,tmp->raid_disk,tmp->used_slot,
@@ -939,16 +1344,36 @@ static int raid1_diskop(mddev_t *mddev, 
 	case DISKOP_SPARE_WRITE:
 	case DISKOP_SPARE_INACTIVE:
 
+#if defined(CONFIG_MD_FR1) || defined(CONFIG_MD_FR1_MODULE)
+                PRINTK(KERN_DEBUG "raid1: diskop SPARE %s\n",
+                        state == DISKOP_SPARE_WRITE ? "WRITE" : 
+                        state == DISKOP_SPARE_INACTIVE ? "INACTIVE" : 
+                        state == DISKOP_SPARE_ACTIVE ? "ACTIVE" : ""
+                        );
+#endif /* CONFIG_MD_FR1 */
 		/*
 		 * Find the spare disk ... (can only be in the 'high'
 		 * area of the array)
 		 */
 		for (i = conf->raid_disks; i < MD_SB_DISKS; i++) {
 			tmp = conf->mirrors + i;
-			if (tmp->spare && tmp->number == (*d)->number) {
+#if defined(CONFIG_MD_FR1) || defined(CONFIG_MD_FR1_MODULE)
+			if (tmp->spare
+                        && (tmp->number == (*d)->number
+                        /*
+                         * I'm not sure we now need to allow match by
+                         * device number too. FIXME.
+                         */
+                            || tmp->dev == MKDEV((*d)->major,(*d)->minor))) {
 				spare_disk = i;
 				break;
 			}
+#else
+			if (tmp->spare && tmp->number == (*d)->number) {
+				spare_disk = i;
+				break;
+                        }
+#endif /* CONFIG_MD_FR1 */
 		}
 		if (spare_disk == -1) {
 			MD_BUG();
@@ -1104,6 +1529,10 @@ static int raid1_diskop(mddev_t *mddev, 
 		fdisk->spare = 0;
 		fdisk->write_only = 0;
 
+#if defined(CONFIG_MD_FR1) || defined(CONFIG_MD_FR1_MODULE)
+                PRINTK(KERN_DEBUG "raid1: diskop SPARE device %x now ACTIVE\n",
+                        fdisk->dev);
+#endif /* CONFIG_MD_FR1 */
 		/*
 		 * if we activate a spare, we definitely replace a
 		 * non-operational disk slot in the 'low' area of
@@ -1115,6 +1544,11 @@ static int raid1_diskop(mddev_t *mddev, 
 		break;
 
 	case DISKOP_HOT_REMOVE_DISK:
+
+#if defined(CONFIG_MD_FR1) || defined(CONFIG_MD_FR1_MODULE)
+                PRINTK(KERN_DEBUG "raid1: diskop HOT REMOVE\n");
+#endif /* CONFIG_MD_FR1 */
+
 		rdisk = conf->mirrors + removed_disk;
 
 		if (rdisk->spare && (removed_disk < conf->raid_disks)) {
@@ -1148,6 +1582,11 @@ static int raid1_diskop(mddev_t *mddev, 
 		adisk->head_position = 0;
 		conf->nr_disks++;
 
+#if defined(CONFIG_MD_FR1) || defined(CONFIG_MD_FR1_MODULE)
+                PRINTK(KERN_DEBUG "raid1: diskop HOT ADDed mirror %d disk %d bitmap %x\n",
+                        added_disk, adisk->number, (unsigned)conf->bitmap);
+#endif /* CONFIG_MD_FR1 */
+
 		break;
 
 	default:
@@ -1292,6 +1731,13 @@ static void raid1d (void *data)
 		case READA:
 			dev = bh->b_dev;
 			raid1_map (mddev, &bh->b_dev);
+#ifdef CONFIG_MD_RAID1_ROBUST_READ
+                        /* raid1_map incorrectly used to change target to
+                         * 0th disk always - now I hope it does a
+                         * better job that before and switches target to
+                         * next disk in the mirror.
+                         */
+#endif /* CONFIG_MD_RAID1_ROBUST_READ */
 			if (bh->b_dev == dev) {
 				printk (IO_ERROR, partition_name(bh->b_dev), bh->b_blocknr);
 				raid1_end_bh_io(r1_bh, 0);
@@ -1398,6 +1844,22 @@ static int raid1_sync_request (mddev_t *
 	int block_nr;
 	int buffs;
 	kdev_t dev;
+#if defined(CONFIG_MD_FR1) || defined(CONFIG_MD_FR1_MODULE)
+        /*
+         * Will need to count mirror components currently with a bitmap
+         * which have been marked faulty and nonoperational at some
+         * point beforehand, and have been accumulating marks on the
+         * bitmap to indicate dirty blocks that need syncing.
+         */
+        struct bitmap * bitmap = conf->bitmap;
+        int count, block_not_dirty;
+        int targets[MD_SB_DISKS];
+        /*
+         * PTB discount the skipped sectors back to the md.c code
+         */
+        extern atomic_t md_throttle[];
+
+#endif /* CONFIG_MD_FR1 */
 
 	if (!sector_nr) {
 		/* we want enough buffers to hold twice the window of 128*/
@@ -1406,9 +1868,29 @@ static int raid1_sync_request (mddev_t *
 		if (buffs < 2)
 			goto nomem;
 		conf->window = buffs*(PAGE_SIZE>>9)/2;
+#if defined(CONFIG_MD_FR1) || defined(CONFIG_MD_FR1_MODULE)
+                /* also remove bitmap if not indicated */
+                if (! MD_SB_BITMAP_REPAIR(mddev->sb)) {
+                        /* has to be outside spinlock as it takes it */
+                        printk(KERN_WARNING "md%d: removed bitmap %x\n",
+                                mdidx(mddev), (unsigned)bitmap);
+                        bitmap->stop (bitmap);
+                } else {
+                        printk(KERN_WARNING "md%d: retained bitmap %x\n",
+                                mdidx(mddev), (unsigned)bitmap);
+                }
+                /* reset the bitmap indicator always */
+                MD_SB_BITMAP_REPAIR(mddev->sb) = 0;
+#endif /* CONFIG_MD_FR1 */
 	}
 	spin_lock_irq(&conf->segment_lock);
 	if (!sector_nr) {
+#if defined(CONFIG_MD_FR1) || defined(CONFIG_MD_FR1_MODULE)
+                /* setup extra report counters for skipped/synced blocks */
+                conf->sync_mode = -1;
+                conf->last_clean_sector = -1;
+                conf->last_dirty_sector = -1;
+#endif /* CONFIG_MD_FR1 */
 		/* initialize ...*/
 		conf->start_active = 0;
 		conf->start_ready = 0;
@@ -1422,7 +1904,7 @@ static int raid1_sync_request (mddev_t *
 			MD_BUG();
 	}
 	while (sector_nr >= conf->start_pending) {
-		PRINTK("wait .. sect=%lu start_active=%d ready=%d pending=%d future=%d, cnt_done=%d active=%d ready=%d pending=%d future=%d\n",
+		PRINTK("wait .. sect=%lu start_active=%ld ready=%ld pending=%ld future=%ld, cnt_done=%d active=%d ready=%d pending=%d future=%d\n",
 			sector_nr, conf->start_active, conf->start_ready, conf->start_pending, conf->start_future,
 			conf->cnt_done, conf->cnt_active, conf->cnt_ready, conf->cnt_pending, conf->cnt_future);
 		wait_event_lock_irq(conf->wait_done,
@@ -1463,9 +1945,71 @@ static int raid1_sync_request (mddev_t *
 	conf->last_used = disk;
 	
 	mirror = conf->mirrors+conf->last_used;
+
+#if defined(CONFIG_MD_FR1) || defined(CONFIG_MD_FR1_MODULE)
+        /* PTB go looking for the faulted (nonoperational) mirrors, under lock */
+        count = 0;
+	while (1) {
+                const int maxdisk = 2 * conf->raid_disks - conf->working_disks;
+		if (disk <= 0)
+                        disk = maxdisk > MD_SB_DISKS ? MD_SB_DISKS : maxdisk;
+		disk--;
+		if (disk == conf->last_used)
+			break;
+                if (!conf->mirrors[disk].operational)
+                        continue;
+                /* We need them to be writable */
+                if (conf->mirrors[disk].write_only) {
+                        targets[count++] = disk;
+                }
+	}
+
+        bitmap = conf->bitmap;
+        block_not_dirty = bitmap->active(bitmap)
+            && !bitmap->testbits(bitmap, (bitmap_offset_t) (sector_nr >> 1), 1);
+#endif /* CONFIG_MD_FR1 */
+
 	dev = mirror->dev;
 	spin_unlock_irq(&conf->device_lock);
-	
+
+#if defined(CONFIG_MD_FR1) || defined(CONFIG_MD_FR1_MODULE)
+        if (count > 0 && block_not_dirty) {
+
+                const int done = 2 - (sector_nr & 1);
+
+	        md_sync_acct(mirror->dev, done);
+                sync_request_done(sector_nr, conf);
+		md_done_sync(mddev, done, 1);
+
+                /* do these conf accesses under lock, though only accounting */
+	        spin_lock_irq(&conf->segment_lock);
+                if (conf->sync_mode != 0) {
+                        if (conf->sync_mode == 1) {
+                                printk(KERN_INFO "raid1: synced dirty sectors %lu-%lu\n",
+                                conf->last_clean_sector+1,
+                                conf->last_dirty_sector);
+                        }
+                        conf->sync_mode = 0;
+                }
+
+                conf->last_clean_sector = sector_nr + done - 1;
+                if (mddev->sb && sector_nr + done >= mddev->sb->size<<1) {
+                        printk(KERN_INFO "raid1: skipped clean sectors %lu-%lu\n",
+                        conf->last_dirty_sector+1,
+                        conf->last_clean_sector);
+                }
+ 
+                /* PTB here be dragons - update md driver throttle discount */
+                atomic_add(done, &md_throttle[mdidx(mddev)]);
+	        spin_unlock_irq(&conf->segment_lock);
+
+		wake_up(&conf->wait_ready);
+                /* skip remainder of block */
+                return done;
+        }
+  	
+        /* read */
+#endif /* CONFIG_MD_FR1 */
 	r1_bh = raid1_alloc_buf (conf);
 	r1_bh->master_bh = NULL;
 	r1_bh->mddev = mddev;
@@ -1499,6 +2043,30 @@ static int raid1_sync_request (mddev_t *
 	generic_make_request(READ, bh);
 	md_sync_acct(bh->b_dev, bh->b_size/512);
 
+#if defined(CONFIG_MD_FR1) || defined(CONFIG_MD_FR1_MODULE)
+        /* printout info from time to time */
+	spin_lock_irq(&conf->segment_lock);
+        if (conf->sync_mode != 1) {
+                if (conf->sync_mode == 0) {
+                        printk(KERN_INFO "raid1: skipped clean sectors %lu-%lu\n",
+                        conf->last_dirty_sector+1,
+                        conf->last_clean_sector);
+ 
+ 
+                }
+                conf->sync_mode = 1;
+        }
+        conf->last_dirty_sector = sector_nr + (bsize >> 9) - 1;
+
+        if (mddev->sb && sector_nr + (bsize >> 9) >= mddev->sb->size<<1) {
+                printk(KERN_INFO "raid1: synced dirty sectors %lu-%lu\n",
+                conf->last_clean_sector+1,
+                conf->last_dirty_sector);
+        }
+ 
+	spin_unlock_irq(&conf->segment_lock);
+#endif /* CONFIG_MD_FR1 */
+
 	return (bsize >> 9);
 
 nomem:
@@ -1531,6 +2099,14 @@ static void end_sync_write(struct buffer
 		mddev_t *mddev = r1_bh->mddev;
  		unsigned long sect = bh->b_blocknr;
 		int size = bh->b_size;
+#if defined(CONFIG_MD_FR1) || defined(CONFIG_MD_FR1_MODULE)
+                raid1_conf_t * conf = mddev_to_conf(mddev);
+                struct bitmap * bitmap = conf->bitmap;
+                if (bitmap && bitmap->active(bitmap)) {
+                        /* PTB clean the bitmap after resync */
+                        bitmap->clearbits(bitmap, (bitmap_offset_t)(sect >> 1), size >> 10);
+                }
+#endif /* CONFIG_MD_FR1 */
 		raid1_free_buf(r1_bh);
 		sync_request_done(sect, mddev_to_conf(mddev));
 		md_done_sync(mddev,size>>9, uptodate);
@@ -1576,6 +2152,11 @@ static void end_sync_write(struct buffer
 #define START_RESYNC KERN_WARNING \
 "raid1: raid set md%d not clean; reconstructing mirrors\n"
 
+#if defined(CONFIG_MD_FR1) || defined(CONFIG_MD_FR1_MODULE)
+#define BITMAP_ERROR KERN_ERR \
+"raid1: out of memory for bitmap on md%d\n"
+#endif /* CONFIG_MD_FR1 */
+
 static int raid1_run (mddev_t *mddev)
 {
 	raid1_conf_t *conf;
@@ -1744,6 +2325,16 @@ static int raid1_run (mddev_t *mddev)
 		/* nothing */;
 	conf->last_used = j;
 
+#if defined(CONFIG_MD_FR1) || defined(CONFIG_MD_FR1_MODULE)
+        /* make the bitmap at this point - hope mddev->size exists already */
+        if (raid1_create_bitmap(mddev) < 0) {
+                printk(BITMAP_ERROR, mdidx(mddev));
+		goto out_free_conf;
+        }
+
+        /* set it active too */
+        raid1_start_bitmap (mddev);
+#endif /* CONFIG_MD_FR1 */
 
 
 	{
@@ -1803,6 +2394,9 @@ out_free_conf:
 	raid1_shrink_r1bh(conf);
 	raid1_shrink_bh(conf);
 	raid1_shrink_buffers(conf);
+#if defined(CONFIG_MD_FR1) || defined(CONFIG_MD_FR1_MODULE)
+	raid1_remove_bitmap (mddev);
+#endif /* CONFIG_MD_FR1 */
 	kfree(conf);
 	mddev->private = NULL;
 out:
@@ -1864,6 +2458,9 @@ static int raid1_stop (mddev_t *mddev)
 	raid1_shrink_r1bh(conf);
 	raid1_shrink_bh(conf);
 	raid1_shrink_buffers(conf);
+#if defined(CONFIG_MD_FR1) || defined(CONFIG_MD_FR1_MODULE)
+	raid1_remove_bitmap (mddev);
+#endif /* CONFIG_MD_FR1 */
 	kfree(conf);
 	mddev->private = NULL;
 	MOD_DEC_USE_COUNT;
@@ -1896,4 +2493,8 @@ static void raid1_exit (void)
 
 module_init(raid1_init);
 module_exit(raid1_exit);
+#if defined(CONFIG_MD_FR1) || defined(CONFIG_MD_FR1_MODULE)
+MODULE_PARM(async, "i");
+MODULE_PARM_DESC(async, "Do async writes");
+#endif /* CONFIG_MD_FR1 */
 MODULE_LICENSE("GPL");

