Re: Linux 2.6.23.17

From: Greg KH
Date: Mon Feb 25 2008 - 20:04:21 EST


diff --git a/Makefile b/Makefile
index 3a932c7..0fe28d1 100644
--- a/Makefile
+++ b/Makefile
@@ -1,7 +1,7 @@
VERSION = 2
PATCHLEVEL = 6
SUBLEVEL = 23
-EXTRAVERSION = .16
+EXTRAVERSION = .17
NAME = Arr Matey! A Hairy Bilge Rat!

# *DOCUMENTATION*
diff --git a/arch/powerpc/platforms/powermac/feature.c b/arch/powerpc/platforms/powermac/feature.c
index ba931be..5169ecc 100644
--- a/arch/powerpc/platforms/powermac/feature.c
+++ b/arch/powerpc/platforms/powermac/feature.c
@@ -2565,6 +2565,8 @@ static void __init probe_uninorth(void)

/* Locate core99 Uni-N */
uninorth_node = of_find_node_by_name(NULL, "uni-n");
+ uninorth_maj = 1;
+
/* Locate G5 u3 */
if (uninorth_node == NULL) {
uninorth_node = of_find_node_by_name(NULL, "u3");
@@ -2575,8 +2577,10 @@ static void __init probe_uninorth(void)
uninorth_node = of_find_node_by_name(NULL, "u4");
uninorth_maj = 4;
}
- if (uninorth_node == NULL)
+ if (uninorth_node == NULL) {
+ uninorth_maj = 0;
return;
+ }

addrp = of_get_property(uninorth_node, "reg", NULL);
if (addrp == NULL)
@@ -3029,3 +3033,8 @@ void pmac_resume_agp_for_card(struct pci_dev *dev)
pmac_agp_resume(pmac_agp_bridge);
}
EXPORT_SYMBOL(pmac_resume_agp_for_card);
+
+int pmac_get_uninorth_variant(void)
+{
+ return uninorth_maj;
+}
diff --git a/arch/x86_64/mm/pageattr.c b/arch/x86_64/mm/pageattr.c
index eff3b22..7770e10 100644
--- a/arch/x86_64/mm/pageattr.c
+++ b/arch/x86_64/mm/pageattr.c
@@ -207,7 +207,7 @@ int change_page_attr_addr(unsigned long address, int numpages, pgprot_t prot)
if (__pa(address) < KERNEL_TEXT_SIZE) {
unsigned long addr2;
pgprot_t prot2;
- addr2 = __START_KERNEL_map + __pa(address);
+ addr2 = __START_KERNEL_map + __pa(address) - phys_base;
/* Make sure the kernel mappings stay executable */
prot2 = pte_pgprot(pte_mkexec(pfn_pte(0, prot)));
err = __change_page_attr(addr2, pfn, prot2,
diff --git a/drivers/macintosh/smu.c b/drivers/macintosh/smu.c
index d409f67..1ebe7a3 100644
--- a/drivers/macintosh/smu.c
+++ b/drivers/macintosh/smu.c
@@ -85,6 +85,7 @@ struct smu_device {
u32 cmd_buf_abs; /* command buffer absolute */
struct list_head cmd_list;
struct smu_cmd *cmd_cur; /* pending command */
+ int broken_nap;
struct list_head cmd_i2c_list;
struct smu_i2c_cmd *cmd_i2c_cur; /* pending i2c command */
struct timer_list i2c_timer;
@@ -135,6 +136,19 @@ static void smu_start_cmd(void)
fend = faddr + smu->cmd_buf->length + 2;
flush_inval_dcache_range(faddr, fend);

+
+ /* We also disable NAP mode for the duration of the command
+ * on U3 based machines.
+ * This is slightly racy as it can be written back to 1 by a sysctl
+ * but that never happens in practice. There seem to be an issue with
+ * U3 based machines such as the iMac G5 where napping for the
+ * whole duration of the command prevents the SMU from fetching it
+ * from memory. This might be related to the strange i2c based
+ * mechanism the SMU uses to access memory.
+ */
+ if (smu->broken_nap)
+ powersave_nap = 0;
+
/* This isn't exactly a DMA mapping here, I suspect
* the SMU is actually communicating with us via i2c to the
* northbridge or the CPU to access RAM.
@@ -211,6 +225,10 @@ static irqreturn_t smu_db_intr(int irq, void *arg)
misc = cmd->misc;
mb();
cmd->status = rc;
+
+ /* Re-enable NAP mode */
+ if (smu->broken_nap)
+ powersave_nap = 1;
bail:
/* Start next command if any */
smu_start_cmd();
@@ -461,7 +479,7 @@ int __init smu_init (void)
if (np == NULL)
return -ENODEV;

- printk(KERN_INFO "SMU driver %s %s\n", VERSION, AUTHOR);
+ printk(KERN_INFO "SMU: Driver %s %s\n", VERSION, AUTHOR);

if (smu_cmdbuf_abs == 0) {
printk(KERN_ERR "SMU: Command buffer not allocated !\n");
@@ -533,6 +551,11 @@ int __init smu_init (void)
goto fail;
}

+ /* U3 has an issue with NAP mode when issuing SMU commands */
+ smu->broken_nap = pmac_get_uninorth_variant() < 4;
+ if (smu->broken_nap)
+ printk(KERN_INFO "SMU: using NAP mode workaround\n");
+
sys_ctrler = SYS_CTRLER_SMU;
return 0;

diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c
index 2c6116f..2b28a24 100644
--- a/drivers/scsi/sd.c
+++ b/drivers/scsi/sd.c
@@ -901,6 +901,7 @@ static void sd_rw_intr(struct scsi_cmnd * SCpnt)
unsigned int xfer_size = SCpnt->request_bufflen;
unsigned int good_bytes = result ? 0 : xfer_size;
u64 start_lba = SCpnt->request->sector;
+ u64 end_lba = SCpnt->request->sector + (xfer_size / 512);
u64 bad_lba;
struct scsi_sense_hdr sshdr;
int sense_valid = 0;
@@ -939,26 +940,23 @@ static void sd_rw_intr(struct scsi_cmnd * SCpnt)
goto out;
if (xfer_size <= SCpnt->device->sector_size)
goto out;
- switch (SCpnt->device->sector_size) {
- case 256:
+ if (SCpnt->device->sector_size < 512) {
+ /* only legitimate sector_size here is 256 */
start_lba <<= 1;
- break;
- case 512:
- break;
- case 1024:
- start_lba >>= 1;
- break;
- case 2048:
- start_lba >>= 2;
- break;
- case 4096:
- start_lba >>= 3;
- break;
- default:
- /* Print something here with limiting frequency. */
- goto out;
- break;
+ end_lba <<= 1;
+ } else {
+ /* be careful ... don't want any overflows */
+ u64 factor = SCpnt->device->sector_size / 512;
+ do_div(start_lba, factor);
+ do_div(end_lba, factor);
}
+
+ if (bad_lba < start_lba || bad_lba >= end_lba)
+ /* the bad lba was reported incorrectly, we have
+ * no idea where the error is
+ */
+ goto out;
+
/* This computation should always be done in terms of
* the resolution of the device's medium.
*/
diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index a2a4865..331a5bb 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -717,6 +717,17 @@ int nfs_flush_incompatible(struct file *file, struct page *page)
}

/*
+ * If the page cache is marked as unsafe or invalid, then we can't rely on
+ * the PageUptodate() flag. In this case, we will need to turn off
+ * write optimisations that depend on the page contents being correct.
+ */
+static int nfs_write_pageuptodate(struct page *page, struct inode *inode)
+{
+ return PageUptodate(page) &&
+ !(NFS_I(inode)->cache_validity & (NFS_INO_REVAL_PAGECACHE|NFS_INO_INVALID_DATA));
+}
+
+/*
* Update and possibly write a cached page of an NFS file.
*
* XXX: Keep an eye on generic_file_read to make sure it doesn't do bad
@@ -737,10 +748,13 @@ int nfs_updatepage(struct file *file, struct page *page,
(long long)(page_offset(page) +offset));

/* If we're not using byte range locks, and we know the page
- * is entirely in cache, it may be more efficient to avoid
- * fragmenting write requests.
+ * is up to date, it may be more efficient to extend the write
+ * to cover the entire page in order to avoid fragmentation
+ * inefficiencies.
*/
- if (PageUptodate(page) && inode->i_flock == NULL && !(file->f_mode & O_SYNC)) {
+ if (nfs_write_pageuptodate(page, inode) &&
+ inode->i_flock == NULL &&
+ !(file->f_mode & O_SYNC)) {
count = max(count + offset, nfs_page_length(page));
offset = 0;
}
diff --git a/include/asm-powerpc/pmac_feature.h b/include/asm-powerpc/pmac_feature.h
index 26bcb0a..877c35a 100644
--- a/include/asm-powerpc/pmac_feature.h
+++ b/include/asm-powerpc/pmac_feature.h
@@ -392,6 +392,14 @@ extern u32 __iomem *uninorth_base;
#define UN_BIS(r,v) (UN_OUT((r), UN_IN(r) | (v)))
#define UN_BIC(r,v) (UN_OUT((r), UN_IN(r) & ~(v)))

+/* Uninorth variant:
+ *
+ * 0 = not uninorth
+ * 1 = U1.x or U2.x
+ * 3 = U3
+ * 4 = U4
+ */
+extern int pmac_get_uninorth_variant(void);

#endif /* __ASM_POWERPC_PMAC_FEATURE_H */
#endif /* __KERNEL__ */
diff --git a/include/linux/ktime.h b/include/linux/ktime.h
index dae7143..15a0229 100644
--- a/include/linux/ktime.h
+++ b/include/linux/ktime.h
@@ -289,6 +289,8 @@ static inline ktime_t ktime_add_us(const ktime_t kt, const u64 usec)
return ktime_add_ns(kt, usec * 1000);
}

+extern ktime_t ktime_add_safe(const ktime_t lhs, const ktime_t rhs);
+
/*
* The resolution of the clocks. The resolution value is returned in
* the clock_getres() system call to give application programmers an
diff --git a/kernel/futex.c b/kernel/futex.c
index b658a9a..0c55a58 100644
--- a/kernel/futex.c
+++ b/kernel/futex.c
@@ -2063,7 +2063,7 @@ asmlinkage long sys_futex(u32 __user *uaddr, int op, u32 val,

t = timespec_to_ktime(ts);
if (cmd == FUTEX_WAIT)
- t = ktime_add(ktime_get(), t);
+ t = ktime_add_safe(ktime_get(), t);
tp = &t;
}
/*
diff --git a/kernel/futex_compat.c b/kernel/futex_compat.c
index f938c23..bba74b6 100644
--- a/kernel/futex_compat.c
+++ b/kernel/futex_compat.c
@@ -175,7 +175,7 @@ asmlinkage long compat_sys_futex(u32 __user *uaddr, int op, u32 val,

t = timespec_to_ktime(ts);
if (cmd == FUTEX_WAIT)
- t = ktime_add(ktime_get(), t);
+ t = ktime_add_safe(ktime_get(), t);
tp = &t;
}
if (cmd == FUTEX_REQUEUE || cmd == FUTEX_CMP_REQUEUE)
diff --git a/kernel/hrtimer.c b/kernel/hrtimer.c
index ee8d0ac..2ee0497 100644
--- a/kernel/hrtimer.c
+++ b/kernel/hrtimer.c
@@ -301,6 +301,24 @@ unsigned long ktime_divns(const ktime_t kt, s64 div)
}
#endif /* BITS_PER_LONG >= 64 */

+/*
+ * Add two ktime values and do a safety check for overflow:
+ */
+
+ktime_t ktime_add_safe(const ktime_t lhs, const ktime_t rhs)
+{
+ ktime_t res = ktime_add(lhs, rhs);
+
+ /*
+ * We use KTIME_SEC_MAX here, the maximum timeout which we can
+ * return to user space in a timespec:
+ */
+ if (res.tv64 < 0 || res.tv64 < lhs.tv64 || res.tv64 < rhs.tv64)
+ res = ktime_set(KTIME_SEC_MAX, 0);
+
+ return res;
+}
+
/* High resolution timer related functions */
#ifdef CONFIG_HIGH_RES_TIMERS

@@ -658,13 +676,7 @@ hrtimer_forward(struct hrtimer *timer, ktime_t now, ktime_t interval)
*/
orun++;
}
- timer->expires = ktime_add(timer->expires, interval);
- /*
- * Make sure, that the result did not wrap with a very large
- * interval.
- */
- if (timer->expires.tv64 < 0)
- timer->expires = ktime_set(KTIME_SEC_MAX, 0);
+ timer->expires = ktime_add_safe(timer->expires, interval);

return orun;
}
@@ -815,7 +827,7 @@ hrtimer_start(struct hrtimer *timer, ktime_t tim, const enum hrtimer_mode mode)
new_base = switch_hrtimer_base(timer, base);

if (mode == HRTIMER_MODE_REL) {
- tim = ktime_add(tim, new_base->get_time());
+ tim = ktime_add_safe(tim, new_base->get_time());
/*
* CONFIG_TIME_LOW_RES is a temporary way for architectures
* to signal that they simply return xtime in
@@ -824,16 +836,8 @@ hrtimer_start(struct hrtimer *timer, ktime_t tim, const enum hrtimer_mode mode)
* timeouts. This will go away with the GTOD framework.
*/
#ifdef CONFIG_TIME_LOW_RES
- tim = ktime_add(tim, base->resolution);
+ tim = ktime_add_safe(tim, base->resolution);
#endif
- /*
- * Careful here: User space might have asked for a
- * very long sleep, so the add above might result in a
- * negative number, which enqueues the timer in front
- * of the queue.
- */
- if (tim.tv64 < 0)
- tim.tv64 = KTIME_MAX;
}
timer->expires = tim;

diff --git a/kernel/irq/chip.c b/kernel/irq/chip.c
index f1a73f0..7279484 100644
--- a/kernel/irq/chip.c
+++ b/kernel/irq/chip.c
@@ -246,6 +246,17 @@ static unsigned int default_startup(unsigned int irq)
}

/*
+ * default shutdown function
+ */
+static void default_shutdown(unsigned int irq)
+{
+ struct irq_desc *desc = irq_desc + irq;
+
+ desc->chip->mask(irq);
+ desc->status |= IRQ_MASKED;
+}
+
+/*
* Fixup enable/disable function pointers
*/
void irq_chip_set_defaults(struct irq_chip *chip)
@@ -256,8 +267,15 @@ void irq_chip_set_defaults(struct irq_chip *chip)
chip->disable = default_disable;
if (!chip->startup)
chip->startup = default_startup;
+ /*
+ * We use chip->disable, when the user provided its own. When
+ * we have default_disable set for chip->disable, then we need
+ * to use default_shutdown, otherwise the irq line is not
+ * disabled on free_irq():
+ */
if (!chip->shutdown)
- chip->shutdown = chip->disable;
+ chip->shutdown = chip->disable != default_disable ?
+ chip->disable : default_shutdown;
if (!chip->name)
chip->name = chip->typename;
if (!chip->end)
diff --git a/kernel/posix-timers.c b/kernel/posix-timers.c
index 7a15afb..00c9e25 100644
--- a/kernel/posix-timers.c
+++ b/kernel/posix-timers.c
@@ -765,9 +765,11 @@ common_timer_set(struct k_itimer *timr, int flags,
/* SIGEV_NONE timers are not queued ! See common_timer_get */
if (((timr->it_sigev_notify & ~SIGEV_THREAD_ID) == SIGEV_NONE)) {
/* Setup correct expiry time for relative timers */
- if (mode == HRTIMER_MODE_REL)
- timer->expires = ktime_add(timer->expires,
- timer->base->get_time());
+ if (mode == HRTIMER_MODE_REL) {
+ timer->expires =
+ ktime_add_safe(timer->expires,
+ timer->base->get_time());
+ }
return 0;
}

diff --git a/mm/memory.c b/mm/memory.c
index f82b359..51a8691 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -981,6 +981,8 @@ int get_user_pages(struct task_struct *tsk, struct mm_struct *mm,
int i;
unsigned int vm_flags;

+ if (len <= 0)
+ return 0;
/*
* Require read or write permissions.
* If 'force' is set, we only require the "MAY" flags.
diff --git a/net/netfilter/nf_conntrack_proto_tcp.c b/net/netfilter/nf_conntrack_proto_tcp.c
index 70c5b7d..09b902d 100644
--- a/net/netfilter/nf_conntrack_proto_tcp.c
+++ b/net/netfilter/nf_conntrack_proto_tcp.c
@@ -135,7 +135,7 @@ enum tcp_bit_set {
* CLOSE_WAIT: ACK seen (after FIN)
* LAST_ACK: FIN seen (after FIN)
* TIME_WAIT: last ACK seen
- * CLOSE: closed connection
+ * CLOSE: closed connection (RST)
*
* LISTEN state is not used.
*
@@ -834,8 +834,21 @@ static int tcp_packet(struct nf_conn *conntrack,
case TCP_CONNTRACK_SYN_SENT:
if (old_state < TCP_CONNTRACK_TIME_WAIT)
break;
- if ((conntrack->proto.tcp.seen[!dir].flags &
- IP_CT_TCP_FLAG_CLOSE_INIT)
+ /* RFC 1122: "When a connection is closed actively,
+ * it MUST linger in TIME-WAIT state for a time 2xMSL
+ * (Maximum Segment Lifetime). However, it MAY accept
+ * a new SYN from the remote TCP to reopen the connection
+ * directly from TIME-WAIT state, if..."
+ * We ignore the conditions because we are in the
+ * TIME-WAIT state anyway.
+ *
+ * Handle aborted connections: we and the server
+ * think there is an existing connection but the client
+ * aborts it and starts a new one.
+ */
+ if (((conntrack->proto.tcp.seen[dir].flags
+ | conntrack->proto.tcp.seen[!dir].flags)
+ & IP_CT_TCP_FLAG_CLOSE_INIT)
|| (conntrack->proto.tcp.last_dir == dir
&& conntrack->proto.tcp.last_index == TCP_RST_SET)) {
/* Attempt to reopen a closed/aborted connection.
@@ -850,16 +863,23 @@ static int tcp_packet(struct nf_conn *conntrack,
case TCP_CONNTRACK_IGNORE:
/* Ignored packets:
*
+ * Our connection entry may be out of sync, so ignore
+ * packets which may signal the real connection between
+ * the client and the server.
+ *
* a) SYN in ORIGINAL
* b) SYN/ACK in REPLY
* c) ACK in reply direction after initial SYN in original.
+ *
+ * If the ignored packet is invalid, the receiver will send
+ * a RST we'll catch below.
*/
if (index == TCP_SYNACK_SET
&& conntrack->proto.tcp.last_index == TCP_SYN_SET
&& conntrack->proto.tcp.last_dir != dir
&& ntohl(th->ack_seq) ==
conntrack->proto.tcp.last_end) {
- /* This SYN/ACK acknowledges a SYN that we earlier
+ /* b) This SYN/ACK acknowledges a SYN that we earlier
* ignored as invalid. This means that the client and
* the server are both in sync, while the firewall is
* not. We kill this session and block the SYN/ACK so
@@ -884,7 +904,7 @@ static int tcp_packet(struct nf_conn *conntrack,
write_unlock_bh(&tcp_lock);
if (LOG_INVALID(IPPROTO_TCP))
nf_log_packet(pf, 0, skb, NULL, NULL, NULL,
- "nf_ct_tcp: invalid packed ignored ");
+ "nf_ct_tcp: invalid packet ignored ");
return NF_ACCEPT;
case TCP_CONNTRACK_MAX:
/* Invalid packet */
@@ -938,8 +958,7 @@ static int tcp_packet(struct nf_conn *conntrack,

conntrack->proto.tcp.state = new_state;
if (old_state != new_state
- && (new_state == TCP_CONNTRACK_FIN_WAIT
- || new_state == TCP_CONNTRACK_CLOSE))
+ && new_state == TCP_CONNTRACK_FIN_WAIT)
conntrack->proto.tcp.seen[dir].flags |= IP_CT_TCP_FLAG_CLOSE_INIT;
timeout = conntrack->proto.tcp.retrans >= nf_ct_tcp_max_retrans
&& *tcp_timeouts[new_state] > nf_ct_tcp_timeout_max_retrans
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/