* [PATCH for review] [1/145] x86_64: Update defconfig
[not found] <20060810 935.775038000@suse.de>
@ 2006-08-10 19:35 ` Andi Kleen
2006-08-10 19:35 ` [PATCH for review] [2/145] i386: " Andi Kleen
` (144 subsequent siblings)
145 siblings, 0 replies; 199+ messages in thread
From: Andi Kleen @ 2006-08-10 19:35 UTC (permalink / raw)
r
Enable cpufrequency debugging
Disable soft watchdog
Signed-off-by: Andi Kleen <ak@suse.de>
---
arch/x86_64/defconfig | 56 ++++++++------------------------------------------
1 files changed, 10 insertions(+), 46 deletions(-)
Index: linux/arch/x86_64/defconfig
===================================================================
--- linux.orig/arch/x86_64/defconfig
+++ linux/arch/x86_64/defconfig
@@ -1,7 +1,7 @@
#
# Automatically generated make config: don't edit
-# Linux kernel version: 2.6.18-rc2
-# Tue Jul 18 17:13:20 2006
+# Linux kernel version: 2.6.18-rc3-git6
+# Sat Aug 5 02:32:50 2006
#
CONFIG_X86_64=y
CONFIG_64BIT=y
@@ -201,7 +201,7 @@ CONFIG_ACPI_THERMAL=y
CONFIG_ACPI_NUMA=y
# CONFIG_ACPI_ASUS is not set
# CONFIG_ACPI_IBM is not set
-CONFIG_ACPI_TOSHIBA=y
+# CONFIG_ACPI_TOSHIBA is not set
CONFIG_ACPI_BLACKLIST_YEAR=0
# CONFIG_ACPI_DEBUG is not set
CONFIG_ACPI_EC=y
@@ -216,7 +216,7 @@ CONFIG_ACPI_CONTAINER=y
#
CONFIG_CPU_FREQ=y
CONFIG_CPU_FREQ_TABLE=y
-# CONFIG_CPU_FREQ_DEBUG is not set
+CONFIG_CPU_FREQ_DEBUG=y
CONFIG_CPU_FREQ_STAT=y
# CONFIG_CPU_FREQ_STAT_DETAILS is not set
CONFIG_CPU_FREQ_DEFAULT_GOV_PERFORMANCE=y
@@ -512,7 +512,7 @@ CONFIG_SCSI_CONSTANTS=y
CONFIG_SCSI_SPI_ATTRS=y
CONFIG_SCSI_FC_ATTRS=y
# CONFIG_SCSI_ISCSI_ATTRS is not set
-# CONFIG_SCSI_SAS_ATTRS is not set
+CONFIG_SCSI_SAS_ATTRS=y
#
# SCSI low-level drivers
@@ -538,7 +538,7 @@ CONFIG_MEGARAID_MAILBOX=y
CONFIG_MEGARAID_SAS=y
CONFIG_SCSI_SATA=y
CONFIG_SCSI_SATA_AHCI=y
-# CONFIG_SCSI_SATA_SVW is not set
+CONFIG_SCSI_SATA_SVW=y
CONFIG_SCSI_ATA_PIIX=y
# CONFIG_SCSI_SATA_MV is not set
CONFIG_SCSI_SATA_NV=y
@@ -589,7 +589,7 @@ CONFIG_BLK_DEV_DM=y
CONFIG_FUSION=y
CONFIG_FUSION_SPI=y
# CONFIG_FUSION_FC is not set
-# CONFIG_FUSION_SAS is not set
+CONFIG_FUSION_SAS=y
CONFIG_FUSION_MAX_SGE=128
# CONFIG_FUSION_CTL is not set
@@ -675,7 +675,7 @@ CONFIG_NET_PCI=y
# CONFIG_PCNET32 is not set
# CONFIG_AMD8111_ETH is not set
# CONFIG_ADAPTEC_STARFIRE is not set
-# CONFIG_B44 is not set
+CONFIG_B44=y
CONFIG_FORCEDETH=y
# CONFIG_DGRS is not set
# CONFIG_EEPRO100 is not set
@@ -842,44 +842,7 @@ CONFIG_LEGACY_PTY_COUNT=256
#
# Watchdog Cards
#
-CONFIG_WATCHDOG=y
-# CONFIG_WATCHDOG_NOWAYOUT is not set
-
-#
-# Watchdog Device Drivers
-#
-CONFIG_SOFT_WATCHDOG=y
-# CONFIG_ACQUIRE_WDT is not set
-# CONFIG_ADVANTECH_WDT is not set
-# CONFIG_ALIM1535_WDT is not set
-# CONFIG_ALIM7101_WDT is not set
-# CONFIG_SC520_WDT is not set
-# CONFIG_EUROTECH_WDT is not set
-# CONFIG_IB700_WDT is not set
-# CONFIG_IBMASR is not set
-# CONFIG_WAFER_WDT is not set
-# CONFIG_I6300ESB_WDT is not set
-# CONFIG_I8XX_TCO is not set
-# CONFIG_SC1200_WDT is not set
-# CONFIG_60XX_WDT is not set
-# CONFIG_SBC8360_WDT is not set
-# CONFIG_CPU5_WDT is not set
-# CONFIG_W83627HF_WDT is not set
-# CONFIG_W83877F_WDT is not set
-# CONFIG_W83977F_WDT is not set
-# CONFIG_MACHZ_WDT is not set
-# CONFIG_SBC_EPX_C3_WATCHDOG is not set
-
-#
-# PCI-based Watchdog Cards
-#
-# CONFIG_PCIPCWATCHDOG is not set
-# CONFIG_WDTPCI is not set
-
-#
-# USB-based Watchdog Cards
-#
-# CONFIG_USBPCWATCHDOG is not set
+# CONFIG_WATCHDOG is not set
CONFIG_HW_RANDOM=y
CONFIG_HW_RANDOM_INTEL=y
CONFIG_HW_RANDOM_AMD=y
@@ -1056,6 +1019,7 @@ CONFIG_VGACON_SOFT_SCROLLBACK=y
CONFIG_VGACON_SOFT_SCROLLBACK_SIZE=256
CONFIG_VIDEO_SELECT=y
CONFIG_DUMMY_CONSOLE=y
+# CONFIG_BACKLIGHT_LCD_SUPPORT is not set
#
# Sound
^ permalink raw reply [flat|nested] 199+ messages in thread* [PATCH for review] [2/145] i386: Update defconfig
[not found] <20060810 935.775038000@suse.de>
2006-08-10 19:35 ` [PATCH for review] [1/145] x86_64: Update defconfig Andi Kleen
@ 2006-08-10 19:35 ` Andi Kleen
2006-08-10 19:35 ` [PATCH for review] [3/145] i386: Allow to use GENERICARCH for UP kernels Andi Kleen
` (143 subsequent siblings)
145 siblings, 0 replies; 199+ messages in thread
From: Andi Kleen @ 2006-08-10 19:35 UTC (permalink / raw)
r
This is based on the x86-64 defconfig which works on a wide range of systems.
Signed-off-by: Andi Kleen <ak@suse.de>
---
arch/i386/defconfig | 972 ++++++++++++++++++++++------------------------------
1 files changed, 424 insertions(+), 548 deletions(-)
Index: linux/arch/i386/defconfig
===================================================================
--- linux.orig/arch/i386/defconfig
+++ linux/arch/i386/defconfig
@@ -1,55 +1,63 @@
#
# Automatically generated make config: don't edit
+# Linux kernel version: 2.6.18-rc4
+# Wed Aug 9 19:34:20 2006
#
CONFIG_X86_32=y
+CONFIG_GENERIC_TIME=y
+CONFIG_LOCKDEP_SUPPORT=y
+CONFIG_STACKTRACE_SUPPORT=y
CONFIG_SEMAPHORE_SLEEPERS=y
CONFIG_X86=y
CONFIG_MMU=y
CONFIG_GENERIC_ISA_DMA=y
CONFIG_GENERIC_IOMAP=y
+CONFIG_GENERIC_HWEIGHT=y
CONFIG_ARCH_MAY_HAVE_PC_FDC=y
CONFIG_DMI=y
+CONFIG_DEFCONFIG_LIST="/lib/modules/$UNAME_RELEASE/.config"
#
# Code maturity level options
#
CONFIG_EXPERIMENTAL=y
-CONFIG_BROKEN_ON_SMP=y
+CONFIG_LOCK_KERNEL=y
CONFIG_INIT_ENV_ARG_LIMIT=32
#
# General setup
#
CONFIG_LOCALVERSION=""
-# CONFIG_LOCALVERSION_AUTO is not set
+CONFIG_LOCALVERSION_AUTO=y
CONFIG_SWAP=y
CONFIG_SYSVIPC=y
-# CONFIG_POSIX_MQUEUE is not set
+CONFIG_POSIX_MQUEUE=y
# CONFIG_BSD_PROCESS_ACCT is not set
+# CONFIG_TASKSTATS is not set
CONFIG_SYSCTL=y
# CONFIG_AUDIT is not set
CONFIG_IKCONFIG=y
CONFIG_IKCONFIG_PROC=y
+# CONFIG_CPUSETS is not set
+# CONFIG_RELAY is not set
CONFIG_INITRAMFS_SOURCE=""
CONFIG_UID16=y
-CONFIG_VM86=y
CONFIG_CC_OPTIMIZE_FOR_SIZE=y
# CONFIG_EMBEDDED is not set
CONFIG_KALLSYMS=y
+CONFIG_KALLSYMS_ALL=y
# CONFIG_KALLSYMS_EXTRA_PASS is not set
CONFIG_HOTPLUG=y
CONFIG_PRINTK=y
CONFIG_BUG=y
CONFIG_ELF_CORE=y
CONFIG_BASE_FULL=y
+CONFIG_RT_MUTEXES=y
CONFIG_FUTEX=y
CONFIG_EPOLL=y
CONFIG_SHMEM=y
-CONFIG_CC_ALIGN_FUNCTIONS=0
-CONFIG_CC_ALIGN_LABELS=0
-CONFIG_CC_ALIGN_LOOPS=0
-CONFIG_CC_ALIGN_JUMPS=0
CONFIG_SLAB=y
+CONFIG_VM_EVENT_COUNTERS=y
# CONFIG_TINY_SHMEM is not set
CONFIG_BASE_SMALL=0
# CONFIG_SLOB is not set
@@ -60,41 +68,45 @@ CONFIG_BASE_SMALL=0
CONFIG_MODULES=y
CONFIG_MODULE_UNLOAD=y
CONFIG_MODULE_FORCE_UNLOAD=y
-CONFIG_OBSOLETE_MODPARM=y
# CONFIG_MODVERSIONS is not set
# CONFIG_MODULE_SRCVERSION_ALL is not set
# CONFIG_KMOD is not set
+CONFIG_STOP_MACHINE=y
#
# Block layer
#
-# CONFIG_LBD is not set
+CONFIG_LBD=y
+# CONFIG_BLK_DEV_IO_TRACE is not set
+# CONFIG_LSF is not set
#
# IO Schedulers
#
CONFIG_IOSCHED_NOOP=y
-# CONFIG_IOSCHED_AS is not set
-# CONFIG_IOSCHED_DEADLINE is not set
+CONFIG_IOSCHED_AS=y
+CONFIG_IOSCHED_DEADLINE=y
CONFIG_IOSCHED_CFQ=y
-# CONFIG_DEFAULT_AS is not set
+CONFIG_DEFAULT_AS=y
# CONFIG_DEFAULT_DEADLINE is not set
-CONFIG_DEFAULT_CFQ=y
+# CONFIG_DEFAULT_CFQ is not set
# CONFIG_DEFAULT_NOOP is not set
-CONFIG_DEFAULT_IOSCHED="cfq"
+CONFIG_DEFAULT_IOSCHED="anticipatory"
#
# Processor type and features
#
-CONFIG_X86_PC=y
+CONFIG_SMP=y
+# CONFIG_X86_PC is not set
# CONFIG_X86_ELAN is not set
# CONFIG_X86_VOYAGER is not set
# CONFIG_X86_NUMAQ is not set
# CONFIG_X86_SUMMIT is not set
# CONFIG_X86_BIGSMP is not set
# CONFIG_X86_VISWS is not set
-# CONFIG_X86_GENERICARCH is not set
+CONFIG_X86_GENERICARCH=y
# CONFIG_X86_ES7000 is not set
+CONFIG_X86_CYCLONE_TIMER=y
# CONFIG_M386 is not set
# CONFIG_M486 is not set
# CONFIG_M586 is not set
@@ -102,11 +114,11 @@ CONFIG_X86_PC=y
# CONFIG_M586MMX is not set
# CONFIG_M686 is not set
# CONFIG_MPENTIUMII is not set
-# CONFIG_MPENTIUMIII is not set
+CONFIG_MPENTIUMIII=y
# CONFIG_MPENTIUMM is not set
# CONFIG_MPENTIUM4 is not set
# CONFIG_MK6 is not set
-CONFIG_MK7=y
+# CONFIG_MK7 is not set
# CONFIG_MK8 is not set
# CONFIG_MCRUSOE is not set
# CONFIG_MEFFICEON is not set
@@ -117,10 +129,10 @@ CONFIG_MK7=y
# CONFIG_MGEODE_LX is not set
# CONFIG_MCYRIXIII is not set
# CONFIG_MVIAC3_2 is not set
-# CONFIG_X86_GENERIC is not set
+CONFIG_X86_GENERIC=y
CONFIG_X86_CMPXCHG=y
CONFIG_X86_XADD=y
-CONFIG_X86_L1_CACHE_SHIFT=6
+CONFIG_X86_L1_CACHE_SHIFT=7
CONFIG_RWSEM_XCHGADD_ALGORITHM=y
CONFIG_GENERIC_CALIBRATE_DELAY=y
CONFIG_X86_WP_WORKS_OK=y
@@ -131,26 +143,28 @@ CONFIG_X86_CMPXCHG64=y
CONFIG_X86_GOOD_APIC=y
CONFIG_X86_INTEL_USERCOPY=y
CONFIG_X86_USE_PPRO_CHECKSUM=y
-CONFIG_X86_USE_3DNOW=y
CONFIG_X86_TSC=y
-# CONFIG_HPET_TIMER is not set
-# CONFIG_SMP is not set
-CONFIG_PREEMPT_NONE=y
-# CONFIG_PREEMPT_VOLUNTARY is not set
+CONFIG_HPET_TIMER=y
+CONFIG_HPET_EMULATE_RTC=y
+CONFIG_NR_CPUS=32
+CONFIG_SCHED_SMT=y
+CONFIG_SCHED_MC=y
+# CONFIG_PREEMPT_NONE is not set
+CONFIG_PREEMPT_VOLUNTARY=y
# CONFIG_PREEMPT is not set
-CONFIG_X86_UP_APIC=y
-CONFIG_X86_UP_IOAPIC=y
+CONFIG_PREEMPT_BKL=y
CONFIG_X86_LOCAL_APIC=y
CONFIG_X86_IO_APIC=y
CONFIG_X86_MCE=y
CONFIG_X86_MCE_NONFATAL=y
-# CONFIG_X86_MCE_P4THERMAL is not set
+CONFIG_X86_MCE_P4THERMAL=y
+CONFIG_VM86=y
# CONFIG_TOSHIBA is not set
# CONFIG_I8K is not set
# CONFIG_X86_REBOOTFIXUPS is not set
-# CONFIG_MICROCODE is not set
-# CONFIG_X86_MSR is not set
-# CONFIG_X86_CPUID is not set
+CONFIG_MICROCODE=y
+CONFIG_X86_MSR=y
+CONFIG_X86_CPUID=y
#
# Firmware Drivers
@@ -161,65 +175,60 @@ CONFIG_X86_MCE_NONFATAL=y
CONFIG_NOHIGHMEM=y
# CONFIG_HIGHMEM4G is not set
# CONFIG_HIGHMEM64G is not set
-CONFIG_VMSPLIT_3G=y
-# CONFIG_VMSPLIT_3G_OPT is not set
-# CONFIG_VMSPLIT_2G is not set
-# CONFIG_VMSPLIT_1G is not set
CONFIG_PAGE_OFFSET=0xC0000000
-CONFIG_ARCH_FLATMEM_ENABLE=y
-CONFIG_ARCH_SPARSEMEM_ENABLE=y
-CONFIG_ARCH_SELECT_MEMORY_MODEL=y
CONFIG_SELECT_MEMORY_MODEL=y
CONFIG_FLATMEM_MANUAL=y
# CONFIG_DISCONTIGMEM_MANUAL is not set
# CONFIG_SPARSEMEM_MANUAL is not set
CONFIG_FLATMEM=y
CONFIG_FLAT_NODE_MEM_MAP=y
-CONFIG_SPARSEMEM_STATIC=y
+# CONFIG_SPARSEMEM_STATIC is not set
CONFIG_SPLIT_PTLOCK_CPUS=4
+CONFIG_RESOURCES_64BIT=y
# CONFIG_MATH_EMULATION is not set
CONFIG_MTRR=y
# CONFIG_EFI is not set
+# CONFIG_IRQBALANCE is not set
CONFIG_REGPARM=y
-# CONFIG_SECCOMP is not set
-CONFIG_HZ_100=y
-# CONFIG_HZ_250 is not set
+CONFIG_SECCOMP=y
+# CONFIG_HZ_100 is not set
+CONFIG_HZ_250=y
# CONFIG_HZ_1000 is not set
-CONFIG_HZ=100
+CONFIG_HZ=250
# CONFIG_KEXEC is not set
CONFIG_PHYSICAL_START=0x100000
-CONFIG_DOUBLEFAULT=y
+# CONFIG_HOTPLUG_CPU is not set
+CONFIG_COMPAT_VDSO=y
#
# Power management options (ACPI, APM)
#
CONFIG_PM=y
-# CONFIG_PM_LEGACY is not set
+CONFIG_PM_LEGACY=y
# CONFIG_PM_DEBUG is not set
-CONFIG_SOFTWARE_SUSPEND=y
-CONFIG_PM_STD_PARTITION=""
#
# ACPI (Advanced Configuration and Power Interface) Support
#
CONFIG_ACPI=y
-# CONFIG_ACPI_SLEEP is not set
-# CONFIG_ACPI_AC is not set
-# CONFIG_ACPI_BATTERY is not set
-# CONFIG_ACPI_BUTTON is not set
+CONFIG_ACPI_AC=y
+CONFIG_ACPI_BATTERY=y
+CONFIG_ACPI_BUTTON=y
# CONFIG_ACPI_VIDEO is not set
# CONFIG_ACPI_HOTKEY is not set
-# CONFIG_ACPI_FAN is not set
-# CONFIG_ACPI_PROCESSOR is not set
+CONFIG_ACPI_FAN=y
+# CONFIG_ACPI_DOCK is not set
+CONFIG_ACPI_PROCESSOR=y
+CONFIG_ACPI_THERMAL=y
# CONFIG_ACPI_ASUS is not set
# CONFIG_ACPI_IBM is not set
# CONFIG_ACPI_TOSHIBA is not set
-CONFIG_ACPI_BLACKLIST_YEAR=0
-# CONFIG_ACPI_DEBUG is not set
+CONFIG_ACPI_BLACKLIST_YEAR=2001
+CONFIG_ACPI_DEBUG=y
CONFIG_ACPI_EC=y
CONFIG_ACPI_POWER=y
CONFIG_ACPI_SYSTEM=y
-# CONFIG_X86_PM_TIMER is not set
+CONFIG_X86_PM_TIMER=y
# CONFIG_ACPI_CONTAINER is not set
#
@@ -230,7 +239,41 @@ CONFIG_ACPI_SYSTEM=y
#
# CPU Frequency scaling
#
-# CONFIG_CPU_FREQ is not set
+CONFIG_CPU_FREQ=y
+CONFIG_CPU_FREQ_TABLE=y
+CONFIG_CPU_FREQ_DEBUG=y
+CONFIG_CPU_FREQ_STAT=y
+# CONFIG_CPU_FREQ_STAT_DETAILS is not set
+CONFIG_CPU_FREQ_DEFAULT_GOV_PERFORMANCE=y
+# CONFIG_CPU_FREQ_DEFAULT_GOV_USERSPACE is not set
+CONFIG_CPU_FREQ_GOV_PERFORMANCE=y
+# CONFIG_CPU_FREQ_GOV_POWERSAVE is not set
+CONFIG_CPU_FREQ_GOV_USERSPACE=y
+CONFIG_CPU_FREQ_GOV_ONDEMAND=y
+# CONFIG_CPU_FREQ_GOV_CONSERVATIVE is not set
+
+#
+# CPUFreq processor drivers
+#
+CONFIG_X86_ACPI_CPUFREQ=y
+# CONFIG_X86_POWERNOW_K6 is not set
+# CONFIG_X86_POWERNOW_K7 is not set
+CONFIG_X86_POWERNOW_K8=y
+CONFIG_X86_POWERNOW_K8_ACPI=y
+# CONFIG_X86_GX_SUSPMOD is not set
+# CONFIG_X86_SPEEDSTEP_CENTRINO is not set
+# CONFIG_X86_SPEEDSTEP_ICH is not set
+# CONFIG_X86_SPEEDSTEP_SMI is not set
+# CONFIG_X86_P4_CLOCKMOD is not set
+# CONFIG_X86_CPUFREQ_NFORCE2 is not set
+# CONFIG_X86_LONGRUN is not set
+# CONFIG_X86_LONGHAUL is not set
+
+#
+# shared options
+#
+CONFIG_X86_ACPI_CPUFREQ_PROC_INTF=y
+# CONFIG_X86_SPEEDSTEP_LIB is not set
#
# Bus options (PCI, PCMCIA, EISA, MCA, ISA)
@@ -244,12 +287,13 @@ CONFIG_PCI_BIOS=y
CONFIG_PCI_DIRECT=y
CONFIG_PCI_MMCONFIG=y
# CONFIG_PCIEPORTBUS is not set
-# CONFIG_PCI_MSI is not set
-# CONFIG_PCI_LEGACY_PROC is not set
+CONFIG_PCI_MSI=y
+# CONFIG_PCI_DEBUG is not set
CONFIG_ISA_DMA_API=y
# CONFIG_ISA is not set
# CONFIG_MCA is not set
# CONFIG_SCx200 is not set
+CONFIG_K8_NB=y
#
# PCCARD (PCMCIA/CardBus) support
@@ -278,93 +322,48 @@ CONFIG_NET=y
#
# CONFIG_NETDEBUG is not set
CONFIG_PACKET=y
-CONFIG_PACKET_MMAP=y
+# CONFIG_PACKET_MMAP is not set
CONFIG_UNIX=y
+CONFIG_XFRM=y
+# CONFIG_XFRM_USER is not set
# CONFIG_NET_KEY is not set
CONFIG_INET=y
-# CONFIG_IP_MULTICAST is not set
+CONFIG_IP_MULTICAST=y
# CONFIG_IP_ADVANCED_ROUTER is not set
CONFIG_IP_FIB_HASH=y
-# CONFIG_IP_PNP is not set
+CONFIG_IP_PNP=y
+CONFIG_IP_PNP_DHCP=y
+# CONFIG_IP_PNP_BOOTP is not set
+# CONFIG_IP_PNP_RARP is not set
# CONFIG_NET_IPIP is not set
# CONFIG_NET_IPGRE is not set
+# CONFIG_IP_MROUTE is not set
# CONFIG_ARPD is not set
# CONFIG_SYN_COOKIES is not set
# CONFIG_INET_AH is not set
# CONFIG_INET_ESP is not set
# CONFIG_INET_IPCOMP is not set
+# CONFIG_INET_XFRM_TUNNEL is not set
# CONFIG_INET_TUNNEL is not set
-# CONFIG_INET_DIAG is not set
+CONFIG_INET_XFRM_MODE_TRANSPORT=y
+CONFIG_INET_XFRM_MODE_TUNNEL=y
+CONFIG_INET_DIAG=y
+CONFIG_INET_TCP_DIAG=y
# CONFIG_TCP_CONG_ADVANCED is not set
CONFIG_TCP_CONG_BIC=y
-
-#
-# IP: Virtual Server Configuration
-#
-# CONFIG_IP_VS is not set
-# CONFIG_IPV6 is not set
-CONFIG_NETFILTER=y
-# CONFIG_NETFILTER_DEBUG is not set
-
-#
-# Core Netfilter Configuration
-#
-# CONFIG_NETFILTER_NETLINK is not set
-CONFIG_NETFILTER_XTABLES=y
-# CONFIG_NETFILTER_XT_TARGET_CLASSIFY is not set
-# CONFIG_NETFILTER_XT_TARGET_MARK is not set
-# CONFIG_NETFILTER_XT_TARGET_NFQUEUE is not set
-# CONFIG_NETFILTER_XT_MATCH_COMMENT is not set
-# CONFIG_NETFILTER_XT_MATCH_CONNTRACK is not set
-# CONFIG_NETFILTER_XT_MATCH_DCCP is not set
-# CONFIG_NETFILTER_XT_MATCH_HELPER is not set
-# CONFIG_NETFILTER_XT_MATCH_LENGTH is not set
-CONFIG_NETFILTER_XT_MATCH_LIMIT=y
-CONFIG_NETFILTER_XT_MATCH_MAC=y
-# CONFIG_NETFILTER_XT_MATCH_MARK is not set
-# CONFIG_NETFILTER_XT_MATCH_PKTTYPE is not set
-# CONFIG_NETFILTER_XT_MATCH_REALM is not set
-# CONFIG_NETFILTER_XT_MATCH_SCTP is not set
-CONFIG_NETFILTER_XT_MATCH_STATE=y
-# CONFIG_NETFILTER_XT_MATCH_STRING is not set
-# CONFIG_NETFILTER_XT_MATCH_TCPMSS is not set
-
-#
-# IP: Netfilter Configuration
-#
-CONFIG_IP_NF_CONNTRACK=y
-# CONFIG_IP_NF_CT_ACCT is not set
-# CONFIG_IP_NF_CONNTRACK_MARK is not set
-# CONFIG_IP_NF_CONNTRACK_EVENTS is not set
-# CONFIG_IP_NF_CT_PROTO_SCTP is not set
-CONFIG_IP_NF_FTP=y
-# CONFIG_IP_NF_IRC is not set
-# CONFIG_IP_NF_NETBIOS_NS is not set
-# CONFIG_IP_NF_TFTP is not set
-# CONFIG_IP_NF_AMANDA is not set
-# CONFIG_IP_NF_PPTP is not set
-# CONFIG_IP_NF_QUEUE is not set
-CONFIG_IP_NF_IPTABLES=y
-# CONFIG_IP_NF_MATCH_IPRANGE is not set
-# CONFIG_IP_NF_MATCH_MULTIPORT is not set
-# CONFIG_IP_NF_MATCH_TOS is not set
-# CONFIG_IP_NF_MATCH_RECENT is not set
-# CONFIG_IP_NF_MATCH_ECN is not set
-# CONFIG_IP_NF_MATCH_DSCP is not set
-# CONFIG_IP_NF_MATCH_AH_ESP is not set
-# CONFIG_IP_NF_MATCH_TTL is not set
-# CONFIG_IP_NF_MATCH_OWNER is not set
-# CONFIG_IP_NF_MATCH_ADDRTYPE is not set
-# CONFIG_IP_NF_MATCH_HASHLIMIT is not set
-CONFIG_IP_NF_FILTER=y
-# CONFIG_IP_NF_TARGET_REJECT is not set
-CONFIG_IP_NF_TARGET_LOG=y
-# CONFIG_IP_NF_TARGET_ULOG is not set
-# CONFIG_IP_NF_TARGET_TCPMSS is not set
-# CONFIG_IP_NF_NAT is not set
-# CONFIG_IP_NF_MANGLE is not set
-# CONFIG_IP_NF_RAW is not set
-# CONFIG_IP_NF_ARPTABLES is not set
+CONFIG_IPV6=y
+# CONFIG_IPV6_PRIVACY is not set
+# CONFIG_IPV6_ROUTER_PREF is not set
+# CONFIG_INET6_AH is not set
+# CONFIG_INET6_ESP is not set
+# CONFIG_INET6_IPCOMP is not set
+# CONFIG_INET6_XFRM_TUNNEL is not set
+# CONFIG_INET6_TUNNEL is not set
+CONFIG_INET6_XFRM_MODE_TRANSPORT=y
+CONFIG_INET6_XFRM_MODE_TUNNEL=y
+# CONFIG_IPV6_TUNNEL is not set
+# CONFIG_NETWORK_SECMARK is not set
+# CONFIG_NETFILTER is not set
#
# DCCP Configuration (EXPERIMENTAL)
@@ -402,6 +401,7 @@ CONFIG_IP_NF_TARGET_LOG=y
# Network testing
#
# CONFIG_NET_PKTGEN is not set
+# CONFIG_NET_TCPPROBE is not set
# CONFIG_HAMRADIO is not set
# CONFIG_IRDA is not set
# CONFIG_BT is not set
@@ -416,7 +416,9 @@ CONFIG_IP_NF_TARGET_LOG=y
#
CONFIG_STANDALONE=y
CONFIG_PREVENT_FIRMWARE_BUILD=y
-# CONFIG_FW_LOADER is not set
+CONFIG_FW_LOADER=y
+# CONFIG_DEBUG_DRIVER is not set
+# CONFIG_SYS_HYPERVISOR is not set
#
# Connector - unified userspace <-> kernelspace linker
@@ -431,13 +433,7 @@ CONFIG_PREVENT_FIRMWARE_BUILD=y
#
# Parallel port support
#
-CONFIG_PARPORT=y
-CONFIG_PARPORT_PC=y
-# CONFIG_PARPORT_SERIAL is not set
-# CONFIG_PARPORT_PC_FIFO is not set
-# CONFIG_PARPORT_PC_SUPERIO is not set
-# CONFIG_PARPORT_GSC is not set
-CONFIG_PARPORT_1284=y
+# CONFIG_PARPORT is not set
#
# Plug and Play support
@@ -447,8 +443,7 @@ CONFIG_PARPORT_1284=y
#
# Block devices
#
-# CONFIG_BLK_DEV_FD is not set
-# CONFIG_PARIDE is not set
+CONFIG_BLK_DEV_FD=y
# CONFIG_BLK_CPQ_DA is not set
# CONFIG_BLK_CPQ_CISS_DA is not set
# CONFIG_BLK_DEV_DAC960 is not set
@@ -459,8 +454,11 @@ CONFIG_BLK_DEV_LOOP=y
# CONFIG_BLK_DEV_NBD is not set
# CONFIG_BLK_DEV_SX8 is not set
# CONFIG_BLK_DEV_UB is not set
-# CONFIG_BLK_DEV_RAM is not set
+CONFIG_BLK_DEV_RAM=y
CONFIG_BLK_DEV_RAM_COUNT=16
+CONFIG_BLK_DEV_RAM_SIZE=4096
+CONFIG_BLK_DEV_RAM_BLOCKSIZE=1024
+CONFIG_BLK_DEV_INITRD=y
# CONFIG_CDROM_PKTCDVD is not set
# CONFIG_ATA_OVER_ETH is not set
@@ -476,7 +474,7 @@ CONFIG_BLK_DEV_IDE=y
# CONFIG_BLK_DEV_IDE_SATA is not set
# CONFIG_BLK_DEV_HD_IDE is not set
CONFIG_BLK_DEV_IDEDISK=y
-# CONFIG_IDEDISK_MULTI_MODE is not set
+CONFIG_IDEDISK_MULTI_MODE=y
CONFIG_BLK_DEV_IDECD=y
# CONFIG_BLK_DEV_IDETAPE is not set
# CONFIG_BLK_DEV_IDEFLOPPY is not set
@@ -486,10 +484,10 @@ CONFIG_BLK_DEV_IDECD=y
#
# IDE chipset support/bugfixes
#
-# CONFIG_IDE_GENERIC is not set
+CONFIG_IDE_GENERIC=y
# CONFIG_BLK_DEV_CMD640 is not set
CONFIG_BLK_DEV_IDEPCI=y
-CONFIG_IDEPCI_SHARE_IRQ=y
+# CONFIG_IDEPCI_SHARE_IRQ is not set
# CONFIG_BLK_DEV_OFFBOARD is not set
# CONFIG_BLK_DEV_GENERIC is not set
# CONFIG_BLK_DEV_OPTI621 is not set
@@ -500,7 +498,7 @@ CONFIG_IDEDMA_PCI_AUTO=y
# CONFIG_IDEDMA_ONLYDISK is not set
# CONFIG_BLK_DEV_AEC62XX is not set
# CONFIG_BLK_DEV_ALI15X3 is not set
-# CONFIG_BLK_DEV_AMD74XX is not set
+CONFIG_BLK_DEV_AMD74XX=y
# CONFIG_BLK_DEV_ATIIXP is not set
# CONFIG_BLK_DEV_CMD64X is not set
# CONFIG_BLK_DEV_TRIFLEX is not set
@@ -511,7 +509,7 @@ CONFIG_IDEDMA_PCI_AUTO=y
# CONFIG_BLK_DEV_HPT34X is not set
# CONFIG_BLK_DEV_HPT366 is not set
# CONFIG_BLK_DEV_SC1200 is not set
-# CONFIG_BLK_DEV_PIIX is not set
+CONFIG_BLK_DEV_PIIX=y
# CONFIG_BLK_DEV_IT821X is not set
# CONFIG_BLK_DEV_NS87415 is not set
# CONFIG_BLK_DEV_PDC202XX_OLD is not set
@@ -521,7 +519,7 @@ CONFIG_IDEDMA_PCI_AUTO=y
# CONFIG_BLK_DEV_SIS5513 is not set
# CONFIG_BLK_DEV_SLC90E66 is not set
# CONFIG_BLK_DEV_TRM290 is not set
-CONFIG_BLK_DEV_VIA82CXXX=y
+# CONFIG_BLK_DEV_VIA82CXXX is not set
# CONFIG_IDE_ARM is not set
CONFIG_BLK_DEV_IDEDMA=y
# CONFIG_IDEDMA_IVB is not set
@@ -555,8 +553,8 @@ CONFIG_BLK_DEV_SD=y
#
# SCSI Transport Attributes
#
-# CONFIG_SCSI_SPI_ATTRS is not set
-# CONFIG_SCSI_FC_ATTRS is not set
+CONFIG_SCSI_SPI_ATTRS=y
+CONFIG_SCSI_FC_ATTRS=y
# CONFIG_SCSI_ISCSI_ATTRS is not set
# CONFIG_SCSI_SAS_ATTRS is not set
@@ -564,18 +562,47 @@ CONFIG_BLK_DEV_SD=y
# SCSI low-level drivers
#
# CONFIG_ISCSI_TCP is not set
-# CONFIG_BLK_DEV_3W_XXXX_RAID is not set
+CONFIG_BLK_DEV_3W_XXXX_RAID=y
# CONFIG_SCSI_3W_9XXX is not set
# CONFIG_SCSI_ACARD is not set
# CONFIG_SCSI_AACRAID is not set
-# CONFIG_SCSI_AIC7XXX is not set
+CONFIG_SCSI_AIC7XXX=y
+CONFIG_AIC7XXX_CMDS_PER_DEVICE=32
+CONFIG_AIC7XXX_RESET_DELAY_MS=5000
+CONFIG_AIC7XXX_DEBUG_ENABLE=y
+CONFIG_AIC7XXX_DEBUG_MASK=0
+CONFIG_AIC7XXX_REG_PRETTY_PRINT=y
# CONFIG_SCSI_AIC7XXX_OLD is not set
-# CONFIG_SCSI_AIC79XX is not set
+CONFIG_SCSI_AIC79XX=y
+CONFIG_AIC79XX_CMDS_PER_DEVICE=32
+CONFIG_AIC79XX_RESET_DELAY_MS=4000
+# CONFIG_AIC79XX_ENABLE_RD_STRM is not set
+# CONFIG_AIC79XX_DEBUG_ENABLE is not set
+CONFIG_AIC79XX_DEBUG_MASK=0
+# CONFIG_AIC79XX_REG_PRETTY_PRINT is not set
# CONFIG_SCSI_DPT_I2O is not set
+# CONFIG_SCSI_ADVANSYS is not set
# CONFIG_MEGARAID_NEWGEN is not set
# CONFIG_MEGARAID_LEGACY is not set
# CONFIG_MEGARAID_SAS is not set
-# CONFIG_SCSI_SATA is not set
+CONFIG_SCSI_SATA=y
+CONFIG_SCSI_SATA_AHCI=y
+CONFIG_SCSI_SATA_SVW=y
+CONFIG_SCSI_ATA_PIIX=y
+# CONFIG_SCSI_SATA_MV is not set
+CONFIG_SCSI_SATA_NV=y
+# CONFIG_SCSI_PDC_ADMA is not set
+# CONFIG_SCSI_HPTIOP is not set
+# CONFIG_SCSI_SATA_QSTOR is not set
+CONFIG_SCSI_SATA_PROMISE=y
+# CONFIG_SCSI_SATA_SX4 is not set
+CONFIG_SCSI_SATA_SIL=y
+# CONFIG_SCSI_SATA_SIL24 is not set
+# CONFIG_SCSI_SATA_SIS is not set
+# CONFIG_SCSI_SATA_ULI is not set
+CONFIG_SCSI_SATA_VIA=y
+# CONFIG_SCSI_SATA_VITESSE is not set
+CONFIG_SCSI_SATA_INTEL_COMBINED=y
# CONFIG_SCSI_BUSLOGIC is not set
# CONFIG_SCSI_DMX3191D is not set
# CONFIG_SCSI_EATA is not set
@@ -584,11 +611,8 @@ CONFIG_BLK_DEV_SD=y
# CONFIG_SCSI_IPS is not set
# CONFIG_SCSI_INITIO is not set
# CONFIG_SCSI_INIA100 is not set
-# CONFIG_SCSI_PPA is not set
-# CONFIG_SCSI_IMM is not set
# CONFIG_SCSI_SYM53C8XX_2 is not set
# CONFIG_SCSI_IPR is not set
-# CONFIG_SCSI_QLOGIC_FC is not set
# CONFIG_SCSI_QLOGIC_1280 is not set
# CONFIG_SCSI_QLA_FC is not set
# CONFIG_SCSI_LPFC is not set
@@ -605,15 +629,43 @@ CONFIG_BLK_DEV_SD=y
#
# Fusion MPT device support
#
-# CONFIG_FUSION is not set
-# CONFIG_FUSION_SPI is not set
+CONFIG_FUSION=y
+CONFIG_FUSION_SPI=y
# CONFIG_FUSION_FC is not set
# CONFIG_FUSION_SAS is not set
+CONFIG_FUSION_MAX_SGE=128
+# CONFIG_FUSION_CTL is not set
#
# IEEE 1394 (FireWire) support
#
-# CONFIG_IEEE1394 is not set
+CONFIG_IEEE1394=y
+
+#
+# Subsystem Options
+#
+# CONFIG_IEEE1394_VERBOSEDEBUG is not set
+# CONFIG_IEEE1394_OUI_DB is not set
+# CONFIG_IEEE1394_EXTRA_CONFIG_ROMS is not set
+# CONFIG_IEEE1394_EXPORT_FULL_API is not set
+
+#
+# Device Drivers
+#
+
+#
+# Texas Instruments PCILynx requires I2C
+#
+CONFIG_IEEE1394_OHCI1394=y
+
+#
+# Protocol Drivers
+#
+# CONFIG_IEEE1394_VIDEO1394 is not set
+# CONFIG_IEEE1394_SBP2 is not set
+# CONFIG_IEEE1394_ETH1394 is not set
+# CONFIG_IEEE1394_DV1394 is not set
+CONFIG_IEEE1394_RAWIO=y
#
# I2O device support
@@ -652,45 +704,60 @@ CONFIG_MII=y
#
# Tulip family network device support
#
-# CONFIG_NET_TULIP is not set
+CONFIG_NET_TULIP=y
+# CONFIG_DE2104X is not set
+CONFIG_TULIP=y
+# CONFIG_TULIP_MWI is not set
+# CONFIG_TULIP_MMIO is not set
+# CONFIG_TULIP_NAPI is not set
+# CONFIG_DE4X5 is not set
+# CONFIG_WINBOND_840 is not set
+# CONFIG_DM9102 is not set
+# CONFIG_ULI526X is not set
# CONFIG_HP100 is not set
CONFIG_NET_PCI=y
# CONFIG_PCNET32 is not set
# CONFIG_AMD8111_ETH is not set
# CONFIG_ADAPTEC_STARFIRE is not set
-# CONFIG_B44 is not set
-# CONFIG_FORCEDETH is not set
+CONFIG_B44=y
+CONFIG_FORCEDETH=y
# CONFIG_DGRS is not set
# CONFIG_EEPRO100 is not set
CONFIG_E100=y
# CONFIG_FEALNX is not set
# CONFIG_NATSEMI is not set
# CONFIG_NE2K_PCI is not set
-# CONFIG_8139CP is not set
-# CONFIG_8139TOO is not set
+CONFIG_8139CP=y
+CONFIG_8139TOO=y
+# CONFIG_8139TOO_PIO is not set
+# CONFIG_8139TOO_TUNE_TWISTER is not set
+# CONFIG_8139TOO_8129 is not set
+# CONFIG_8139_OLD_RX_RESET is not set
# CONFIG_SIS900 is not set
# CONFIG_EPIC100 is not set
# CONFIG_SUNDANCE is not set
# CONFIG_TLAN is not set
# CONFIG_VIA_RHINE is not set
-# CONFIG_NET_POCKET is not set
#
# Ethernet (1000 Mbit)
#
# CONFIG_ACENIC is not set
# CONFIG_DL2K is not set
-# CONFIG_E1000 is not set
+CONFIG_E1000=y
+# CONFIG_E1000_NAPI is not set
+# CONFIG_E1000_DISABLE_PACKET_SPLIT is not set
# CONFIG_NS83820 is not set
# CONFIG_HAMACHI is not set
# CONFIG_YELLOWFIN is not set
-# CONFIG_R8169 is not set
+CONFIG_R8169=y
+# CONFIG_R8169_NAPI is not set
# CONFIG_SIS190 is not set
# CONFIG_SKGE is not set
-# CONFIG_SKY2 is not set
+CONFIG_SKY2=y
# CONFIG_SK98LIN is not set
# CONFIG_VIA_VELOCITY is not set
-# CONFIG_TIGON3 is not set
+CONFIG_TIGON3=y
# CONFIG_BNX2 is not set
#
@@ -699,6 +766,7 @@ CONFIG_E100=y
# CONFIG_CHELSIO_T1 is not set
# CONFIG_IXGB is not set
# CONFIG_S2IO is not set
+# CONFIG_MYRI10GE is not set
#
# Token Ring devices
@@ -716,14 +784,15 @@ CONFIG_E100=y
# CONFIG_WAN is not set
# CONFIG_FDDI is not set
# CONFIG_HIPPI is not set
-# CONFIG_PLIP is not set
# CONFIG_PPP is not set
# CONFIG_SLIP is not set
# CONFIG_NET_FC is not set
# CONFIG_SHAPER is not set
-# CONFIG_NETCONSOLE is not set
-# CONFIG_NETPOLL is not set
-# CONFIG_NET_POLL_CONTROLLER is not set
+CONFIG_NETCONSOLE=y
+CONFIG_NETPOLL=y
+# CONFIG_NETPOLL_RX is not set
+# CONFIG_NETPOLL_TRAP is not set
+CONFIG_NET_POLL_CONTROLLER=y
#
# ISDN subsystem
@@ -745,8 +814,8 @@ CONFIG_INPUT=y
#
CONFIG_INPUT_MOUSEDEV=y
CONFIG_INPUT_MOUSEDEV_PSAUX=y
-CONFIG_INPUT_MOUSEDEV_SCREEN_X=1280
-CONFIG_INPUT_MOUSEDEV_SCREEN_Y=1024
+CONFIG_INPUT_MOUSEDEV_SCREEN_X=1024
+CONFIG_INPUT_MOUSEDEV_SCREEN_Y=768
# CONFIG_INPUT_JOYDEV is not set
# CONFIG_INPUT_TSDEV is not set
CONFIG_INPUT_EVDEV=y
@@ -776,7 +845,6 @@ CONFIG_SERIO=y
CONFIG_SERIO_I8042=y
# CONFIG_SERIO_SERPORT is not set
# CONFIG_SERIO_CT82C710 is not set
-# CONFIG_SERIO_PARKBD is not set
# CONFIG_SERIO_PCIPS2 is not set
CONFIG_SERIO_LIBPS2=y
# CONFIG_SERIO_RAW is not set
@@ -788,14 +856,15 @@ CONFIG_SERIO_LIBPS2=y
CONFIG_VT=y
CONFIG_VT_CONSOLE=y
CONFIG_HW_CONSOLE=y
+# CONFIG_VT_HW_CONSOLE_BINDING is not set
# CONFIG_SERIAL_NONSTANDARD is not set
#
# Serial drivers
#
CONFIG_SERIAL_8250=y
-# CONFIG_SERIAL_8250_CONSOLE is not set
-# CONFIG_SERIAL_8250_ACPI is not set
+CONFIG_SERIAL_8250_CONSOLE=y
+CONFIG_SERIAL_8250_PCI=y
CONFIG_SERIAL_8250_NR_UARTS=4
CONFIG_SERIAL_8250_RUNTIME_UARTS=4
# CONFIG_SERIAL_8250_EXTENDED is not set
@@ -804,14 +873,11 @@ CONFIG_SERIAL_8250_RUNTIME_UARTS=4
# Non-8250 serial port support
#
CONFIG_SERIAL_CORE=y
+CONFIG_SERIAL_CORE_CONSOLE=y
# CONFIG_SERIAL_JSM is not set
CONFIG_UNIX98_PTYS=y
CONFIG_LEGACY_PTYS=y
CONFIG_LEGACY_PTY_COUNT=256
-CONFIG_PRINTER=y
-# CONFIG_LP_CONSOLE is not set
-# CONFIG_PPDEV is not set
-# CONFIG_TIPAR is not set
#
# IPMI
@@ -822,8 +888,12 @@ CONFIG_PRINTER=y
# Watchdog Cards
#
# CONFIG_WATCHDOG is not set
-# CONFIG_HW_RANDOM is not set
-CONFIG_NVRAM=y
+CONFIG_HW_RANDOM=y
+CONFIG_HW_RANDOM_INTEL=y
+CONFIG_HW_RANDOM_AMD=y
+CONFIG_HW_RANDOM_GEODE=y
+CONFIG_HW_RANDOM_VIA=y
+# CONFIG_NVRAM is not set
CONFIG_RTC=y
# CONFIG_DTLK is not set
# CONFIG_R3964 is not set
@@ -833,31 +903,28 @@ CONFIG_RTC=y
#
# Ftape, the floppy tape device driver
#
-# CONFIG_FTAPE is not set
CONFIG_AGP=y
# CONFIG_AGP_ALI is not set
# CONFIG_AGP_ATI is not set
# CONFIG_AGP_AMD is not set
-# CONFIG_AGP_AMD64 is not set
-# CONFIG_AGP_INTEL is not set
+CONFIG_AGP_AMD64=y
+CONFIG_AGP_INTEL=y
# CONFIG_AGP_NVIDIA is not set
# CONFIG_AGP_SIS is not set
# CONFIG_AGP_SWORKS is not set
-CONFIG_AGP_VIA=y
+# CONFIG_AGP_VIA is not set
# CONFIG_AGP_EFFICEON is not set
-CONFIG_DRM=y
-# CONFIG_DRM_TDFX is not set
-# CONFIG_DRM_R128 is not set
-CONFIG_DRM_RADEON=y
-# CONFIG_DRM_MGA is not set
-# CONFIG_DRM_SIS is not set
-# CONFIG_DRM_VIA is not set
-# CONFIG_DRM_SAVAGE is not set
+# CONFIG_DRM is not set
# CONFIG_MWAVE is not set
+# CONFIG_PC8736x_GPIO is not set
+# CONFIG_NSC_GPIO is not set
# CONFIG_CS5535_GPIO is not set
-# CONFIG_RAW_DRIVER is not set
-# CONFIG_HPET is not set
-# CONFIG_HANGCHECK_TIMER is not set
+CONFIG_RAW_DRIVER=y
+CONFIG_MAX_RAW_DEVS=256
+CONFIG_HPET=y
+# CONFIG_HPET_RTC_IRQ is not set
+CONFIG_HPET_MMAP=y
+CONFIG_HANGCHECK_TIMER=y
#
# TPM devices
@@ -868,59 +935,7 @@ CONFIG_DRM_RADEON=y
#
# I2C support
#
-CONFIG_I2C=y
-CONFIG_I2C_CHARDEV=y
-
-#
-# I2C Algorithms
-#
-CONFIG_I2C_ALGOBIT=y
-# CONFIG_I2C_ALGOPCF is not set
-# CONFIG_I2C_ALGOPCA is not set
-
-#
-# I2C Hardware Bus support
-#
-# CONFIG_I2C_ALI1535 is not set
-# CONFIG_I2C_ALI1563 is not set
-# CONFIG_I2C_ALI15X3 is not set
-# CONFIG_I2C_AMD756 is not set
-# CONFIG_I2C_AMD8111 is not set
-# CONFIG_I2C_I801 is not set
-# CONFIG_I2C_I810 is not set
-# CONFIG_I2C_PIIX4 is not set
-CONFIG_I2C_ISA=y
-# CONFIG_I2C_NFORCE2 is not set
-# CONFIG_I2C_PARPORT is not set
-# CONFIG_I2C_PARPORT_LIGHT is not set
-# CONFIG_I2C_PROSAVAGE is not set
-# CONFIG_I2C_SAVAGE4 is not set
-# CONFIG_SCx200_ACB is not set
-# CONFIG_I2C_SIS5595 is not set
-# CONFIG_I2C_SIS630 is not set
-# CONFIG_I2C_SIS96X is not set
-# CONFIG_I2C_STUB is not set
-# CONFIG_I2C_VIA is not set
-CONFIG_I2C_VIAPRO=y
-# CONFIG_I2C_VOODOO3 is not set
-# CONFIG_I2C_PCA_ISA is not set
-
-#
-# Miscellaneous I2C Chip support
-#
-# CONFIG_SENSORS_DS1337 is not set
-# CONFIG_SENSORS_DS1374 is not set
-# CONFIG_SENSORS_EEPROM is not set
-# CONFIG_SENSORS_PCF8574 is not set
-# CONFIG_SENSORS_PCA9539 is not set
-# CONFIG_SENSORS_PCF8591 is not set
-# CONFIG_SENSORS_RTC8564 is not set
-# CONFIG_SENSORS_MAX6875 is not set
-# CONFIG_RTC_X1205_I2C is not set
-# CONFIG_I2C_DEBUG_CORE is not set
-# CONFIG_I2C_DEBUG_ALGO is not set
-# CONFIG_I2C_DEBUG_BUS is not set
-# CONFIG_I2C_DEBUG_CHIP is not set
+# CONFIG_I2C is not set
#
# SPI support
@@ -931,51 +946,12 @@ CONFIG_I2C_VIAPRO=y
#
# Dallas's 1-wire bus
#
-# CONFIG_W1 is not set
#
# Hardware Monitoring support
#
-CONFIG_HWMON=y
-CONFIG_HWMON_VID=y
-# CONFIG_SENSORS_ADM1021 is not set
-# CONFIG_SENSORS_ADM1025 is not set
-# CONFIG_SENSORS_ADM1026 is not set
-# CONFIG_SENSORS_ADM1031 is not set
-# CONFIG_SENSORS_ADM9240 is not set
-# CONFIG_SENSORS_ASB100 is not set
-# CONFIG_SENSORS_ATXP1 is not set
-# CONFIG_SENSORS_DS1621 is not set
-# CONFIG_SENSORS_F71805F is not set
-# CONFIG_SENSORS_FSCHER is not set
-# CONFIG_SENSORS_FSCPOS is not set
-# CONFIG_SENSORS_GL518SM is not set
-# CONFIG_SENSORS_GL520SM is not set
-CONFIG_SENSORS_IT87=y
-# CONFIG_SENSORS_LM63 is not set
-# CONFIG_SENSORS_LM75 is not set
-# CONFIG_SENSORS_LM77 is not set
-# CONFIG_SENSORS_LM78 is not set
-# CONFIG_SENSORS_LM80 is not set
-# CONFIG_SENSORS_LM83 is not set
-# CONFIG_SENSORS_LM85 is not set
-# CONFIG_SENSORS_LM87 is not set
-# CONFIG_SENSORS_LM90 is not set
-# CONFIG_SENSORS_LM92 is not set
-# CONFIG_SENSORS_MAX1619 is not set
-# CONFIG_SENSORS_PC87360 is not set
-# CONFIG_SENSORS_SIS5595 is not set
-# CONFIG_SENSORS_SMSC47M1 is not set
-# CONFIG_SENSORS_SMSC47B397 is not set
-# CONFIG_SENSORS_VIA686A is not set
-# CONFIG_SENSORS_VT8231 is not set
-# CONFIG_SENSORS_W83781D is not set
-# CONFIG_SENSORS_W83792D is not set
-# CONFIG_SENSORS_W83L785TS is not set
-# CONFIG_SENSORS_W83627HF is not set
-# CONFIG_SENSORS_W83627EHF is not set
-# CONFIG_SENSORS_HDAPS is not set
-# CONFIG_HWMON_DEBUG_CHIP is not set
+# CONFIG_HWMON is not set
+# CONFIG_HWMON_VID is not set
#
# Misc devices
@@ -983,117 +959,31 @@ CONFIG_SENSORS_IT87=y
# CONFIG_IBM_ASM is not set
#
-# Multimedia Capabilities Port drivers
-#
-
-#
# Multimedia devices
#
-CONFIG_VIDEO_DEV=y
-
-#
-# Video For Linux
-#
-
-#
-# Video Adapters
-#
-# CONFIG_VIDEO_ADV_DEBUG is not set
-# CONFIG_VIDEO_BT848 is not set
-# CONFIG_VIDEO_BWQCAM is not set
-# CONFIG_VIDEO_CQCAM is not set
-# CONFIG_VIDEO_W9966 is not set
-# CONFIG_VIDEO_CPIA is not set
-# CONFIG_VIDEO_SAA5246A is not set
-# CONFIG_VIDEO_SAA5249 is not set
-# CONFIG_TUNER_3036 is not set
-# CONFIG_VIDEO_STRADIS is not set
-# CONFIG_VIDEO_ZORAN is not set
-CONFIG_VIDEO_SAA7134=y
-# CONFIG_VIDEO_SAA7134_ALSA is not set
-# CONFIG_VIDEO_MXB is not set
-# CONFIG_VIDEO_DPC is not set
-# CONFIG_VIDEO_HEXIUM_ORION is not set
-# CONFIG_VIDEO_HEXIUM_GEMINI is not set
-# CONFIG_VIDEO_CX88 is not set
-# CONFIG_VIDEO_EM28XX is not set
-# CONFIG_VIDEO_OVCAMCHIP is not set
-# CONFIG_VIDEO_AUDIO_DECODER is not set
-# CONFIG_VIDEO_DECODER is not set
-
-#
-# Radio Adapters
-#
-# CONFIG_RADIO_GEMTEK_PCI is not set
-# CONFIG_RADIO_MAXIRADIO is not set
-# CONFIG_RADIO_MAESTRO is not set
+# CONFIG_VIDEO_DEV is not set
+CONFIG_VIDEO_V4L2=y
#
# Digital Video Broadcasting Devices
#
# CONFIG_DVB is not set
-CONFIG_VIDEO_TUNER=y
-CONFIG_VIDEO_BUF=y
-CONFIG_VIDEO_IR=y
+# CONFIG_USB_DABUSB is not set
#
# Graphics support
#
-CONFIG_FB=y
-CONFIG_FB_CFB_FILLRECT=y
-CONFIG_FB_CFB_COPYAREA=y
-CONFIG_FB_CFB_IMAGEBLIT=y
-# CONFIG_FB_MACMODES is not set
-CONFIG_FB_MODE_HELPERS=y
-# CONFIG_FB_TILEBLITTING is not set
-# CONFIG_FB_CIRRUS is not set
-# CONFIG_FB_PM2 is not set
-# CONFIG_FB_CYBER2000 is not set
-# CONFIG_FB_ARC is not set
-# CONFIG_FB_ASILIANT is not set
-# CONFIG_FB_IMSTT is not set
-# CONFIG_FB_VGA16 is not set
-# CONFIG_FB_VESA is not set
-CONFIG_VIDEO_SELECT=y
-# CONFIG_FB_HGA is not set
-# CONFIG_FB_S1D13XXX is not set
-# CONFIG_FB_NVIDIA is not set
-# CONFIG_FB_RIVA is not set
-# CONFIG_FB_I810 is not set
-# CONFIG_FB_INTEL is not set
-# CONFIG_FB_MATROX is not set
-# CONFIG_FB_RADEON_OLD is not set
-CONFIG_FB_RADEON=y
-CONFIG_FB_RADEON_I2C=y
-# CONFIG_FB_RADEON_DEBUG is not set
-# CONFIG_FB_ATY128 is not set
-# CONFIG_FB_ATY is not set
-# CONFIG_FB_SAVAGE is not set
-# CONFIG_FB_SIS is not set
-# CONFIG_FB_NEOMAGIC is not set
-# CONFIG_FB_KYRO is not set
-# CONFIG_FB_3DFX is not set
-# CONFIG_FB_VOODOO1 is not set
-# CONFIG_FB_CYBLA is not set
-# CONFIG_FB_TRIDENT is not set
-# CONFIG_FB_GEODE is not set
-# CONFIG_FB_VIRTUAL is not set
+CONFIG_FIRMWARE_EDID=y
+# CONFIG_FB is not set
#
# Console display driver support
#
CONFIG_VGA_CONSOLE=y
+CONFIG_VGACON_SOFT_SCROLLBACK=y
+CONFIG_VGACON_SOFT_SCROLLBACK_SIZE=128
+# CONFIG_VIDEO_SELECT is not set
CONFIG_DUMMY_CONSOLE=y
-CONFIG_FRAMEBUFFER_CONSOLE=y
-# CONFIG_FRAMEBUFFER_CONSOLE_ROTATION is not set
-# CONFIG_FONTS is not set
-CONFIG_FONT_8x8=y
-CONFIG_FONT_8x16=y
-
-#
-# Logo configuration
-#
-# CONFIG_LOGO is not set
# CONFIG_BACKLIGHT_LCD_SUPPORT is not set
#
@@ -1104,97 +994,29 @@ CONFIG_SOUND=y
#
# Advanced Linux Sound Architecture
#
-CONFIG_SND=y
-CONFIG_SND_TIMER=y
-CONFIG_SND_PCM=y
-CONFIG_SND_RAWMIDI=y
-CONFIG_SND_SEQUENCER=y
-# CONFIG_SND_SEQ_DUMMY is not set
-# CONFIG_SND_MIXER_OSS is not set
-# CONFIG_SND_PCM_OSS is not set
-# CONFIG_SND_SEQUENCER_OSS is not set
-CONFIG_SND_RTCTIMER=y
-CONFIG_SND_SEQ_RTCTIMER_DEFAULT=y
-# CONFIG_SND_DYNAMIC_MINORS is not set
-# CONFIG_SND_SUPPORT_OLD_API is not set
-# CONFIG_SND_VERBOSE_PRINTK is not set
-# CONFIG_SND_DEBUG is not set
-
-#
-# Generic devices
-#
-CONFIG_SND_MPU401_UART=y
-CONFIG_SND_AC97_CODEC=y
-CONFIG_SND_AC97_BUS=y
-# CONFIG_SND_DUMMY is not set
-# CONFIG_SND_VIRMIDI is not set
-# CONFIG_SND_MTPAV is not set
-# CONFIG_SND_SERIAL_U16550 is not set
-# CONFIG_SND_MPU401 is not set
-
-#
-# PCI devices
-#
-# CONFIG_SND_AD1889 is not set
-# CONFIG_SND_ALS4000 is not set
-# CONFIG_SND_ALI5451 is not set
-# CONFIG_SND_ATIIXP is not set
-# CONFIG_SND_ATIIXP_MODEM is not set
-# CONFIG_SND_AU8810 is not set
-# CONFIG_SND_AU8820 is not set
-# CONFIG_SND_AU8830 is not set
-# CONFIG_SND_AZT3328 is not set
-# CONFIG_SND_BT87X is not set
-# CONFIG_SND_CA0106 is not set
-# CONFIG_SND_CMIPCI is not set
-# CONFIG_SND_CS4281 is not set
-# CONFIG_SND_CS46XX is not set
-# CONFIG_SND_CS5535AUDIO is not set
-# CONFIG_SND_EMU10K1 is not set
-# CONFIG_SND_EMU10K1X is not set
-# CONFIG_SND_ENS1370 is not set
-# CONFIG_SND_ENS1371 is not set
-# CONFIG_SND_ES1938 is not set
-# CONFIG_SND_ES1968 is not set
-# CONFIG_SND_FM801 is not set
-# CONFIG_SND_HDA_INTEL is not set
-# CONFIG_SND_HDSP is not set
-# CONFIG_SND_HDSPM is not set
-# CONFIG_SND_ICE1712 is not set
-# CONFIG_SND_ICE1724 is not set
-# CONFIG_SND_INTEL8X0 is not set
-# CONFIG_SND_INTEL8X0M is not set
-# CONFIG_SND_KORG1212 is not set
-# CONFIG_SND_MAESTRO3 is not set
-# CONFIG_SND_MIXART is not set
-# CONFIG_SND_NM256 is not set
-# CONFIG_SND_PCXHR is not set
-# CONFIG_SND_RME32 is not set
-# CONFIG_SND_RME96 is not set
-# CONFIG_SND_RME9652 is not set
-# CONFIG_SND_SONICVIBES is not set
-# CONFIG_SND_TRIDENT is not set
-CONFIG_SND_VIA82XX=y
-# CONFIG_SND_VIA82XX_MODEM is not set
-# CONFIG_SND_VX222 is not set
-# CONFIG_SND_YMFPCI is not set
-
-#
-# USB devices
-#
-# CONFIG_SND_USB_AUDIO is not set
-# CONFIG_SND_USB_USX2Y is not set
+# CONFIG_SND is not set
#
# Open Sound System
#
-# CONFIG_SOUND_PRIME is not set
+CONFIG_SOUND_PRIME=y
+# CONFIG_SOUND_BT878 is not set
+# CONFIG_SOUND_EMU10K1 is not set
+# CONFIG_SOUND_FUSION is not set
+# CONFIG_SOUND_ES1371 is not set
+CONFIG_SOUND_ICH=y
+# CONFIG_SOUND_TRIDENT is not set
+# CONFIG_SOUND_MSNDCLAS is not set
+# CONFIG_SOUND_MSNDPIN is not set
+# CONFIG_SOUND_VIA82CXXX is not set
+# CONFIG_SOUND_OSS is not set
#
# USB support
#
CONFIG_USB_ARCH_HAS_HCD=y
CONFIG_USB_ARCH_HAS_OHCI=y
+CONFIG_USB_ARCH_HAS_EHCI=y
CONFIG_USB=y
# CONFIG_USB_DEBUG is not set
@@ -1213,17 +1035,19 @@ CONFIG_USB_DEVICEFS=y
CONFIG_USB_EHCI_HCD=y
# CONFIG_USB_EHCI_SPLIT_ISO is not set
# CONFIG_USB_EHCI_ROOT_HUB_TT is not set
+# CONFIG_USB_EHCI_TT_NEWSCHED is not set
# CONFIG_USB_ISP116X_HCD is not set
-# CONFIG_USB_OHCI_HCD is not set
+CONFIG_USB_OHCI_HCD=y
+# CONFIG_USB_OHCI_BIG_ENDIAN is not set
+CONFIG_USB_OHCI_LITTLE_ENDIAN=y
CONFIG_USB_UHCI_HCD=y
# CONFIG_USB_SL811_HCD is not set
#
# USB Device Class drivers
#
-# CONFIG_OBSOLETE_OSS_USB_DRIVER is not set
# CONFIG_USB_ACM is not set
-# CONFIG_USB_PRINTER is not set
+CONFIG_USB_PRINTER=y
#
# NOTE: USB_STORAGE enables SCSI, and 'SCSI disk support'
@@ -1248,21 +1072,17 @@ CONFIG_USB_STORAGE=y
#
# USB Input Devices
#
-# CONFIG_USB_HID is not set
-
-#
-# USB HID Boot Protocol drivers
-#
-# CONFIG_USB_KBD is not set
-# CONFIG_USB_MOUSE is not set
+CONFIG_USB_HID=y
+CONFIG_USB_HIDINPUT=y
+# CONFIG_USB_HIDINPUT_POWERBOOK is not set
+# CONFIG_HID_FF is not set
+# CONFIG_USB_HIDDEV is not set
# CONFIG_USB_AIPTEK is not set
# CONFIG_USB_WACOM is not set
# CONFIG_USB_ACECAD is not set
# CONFIG_USB_KBTAB is not set
# CONFIG_USB_POWERMATE is not set
-# CONFIG_USB_MTOUCH is not set
-# CONFIG_USB_ITMTOUCH is not set
-# CONFIG_USB_EGALAX is not set
+# CONFIG_USB_TOUCHSCREEN is not set
# CONFIG_USB_YEALINK is not set
# CONFIG_USB_XPAD is not set
# CONFIG_USB_ATI_REMOTE is not set
@@ -1277,21 +1097,6 @@ CONFIG_USB_STORAGE=y
# CONFIG_USB_MICROTEK is not set
#
-# USB Multimedia devices
-#
-# CONFIG_USB_DABUSB is not set
-# CONFIG_USB_VICAM is not set
-# CONFIG_USB_DSBR is not set
-# CONFIG_USB_ET61X251 is not set
-# CONFIG_USB_IBMCAM is not set
-# CONFIG_USB_KONICAWC is not set
-# CONFIG_USB_OV511 is not set
-# CONFIG_USB_SE401 is not set
-# CONFIG_USB_SN9C102 is not set
-# CONFIG_USB_STV680 is not set
-# CONFIG_USB_PWC is not set
-
-#
# USB Network Adapters
#
# CONFIG_USB_CATC is not set
@@ -1299,12 +1104,11 @@ CONFIG_USB_STORAGE=y
# CONFIG_USB_PEGASUS is not set
# CONFIG_USB_RTL8150 is not set
# CONFIG_USB_USBNET is not set
-# CONFIG_USB_MON is not set
+CONFIG_USB_MON=y
#
# USB port drivers
#
-# CONFIG_USB_USS720 is not set
#
# USB Serial Converter support
@@ -1321,10 +1125,12 @@ CONFIG_USB_STORAGE=y
# CONFIG_USB_LEGOTOWER is not set
# CONFIG_USB_LCD is not set
# CONFIG_USB_LED is not set
+# CONFIG_USB_CYPRESS_CY7C63 is not set
# CONFIG_USB_CYTHERM is not set
# CONFIG_USB_PHIDGETKIT is not set
# CONFIG_USB_PHIDGETSERVO is not set
# CONFIG_USB_IDMOUSE is not set
+# CONFIG_USB_APPLEDISPLAY is not set
# CONFIG_USB_SISUSBVGA is not set
# CONFIG_USB_LD is not set
# CONFIG_USB_TEST is not set
@@ -1344,37 +1150,78 @@ CONFIG_USB_STORAGE=y
# CONFIG_MMC is not set
#
+# LED devices
+#
+# CONFIG_NEW_LEDS is not set
+
+#
+# LED drivers
+#
+
+#
+# LED Triggers
+#
+
+#
# InfiniBand support
#
# CONFIG_INFINIBAND is not set
#
-# SN Devices
+# EDAC - error detection and reporting (RAS) (EXPERIMENTAL)
#
+# CONFIG_EDAC is not set
#
-# EDAC - error detection and reporting (RAS)
+# Real Time Clock
+#
+# CONFIG_RTC_CLASS is not set
+
+#
+# DMA Engine support
+#
+# CONFIG_DMA_ENGINE is not set
+
+#
+# DMA Clients
+#
+
+#
+# DMA Devices
#
-# CONFIG_EDAC is not set
#
# File systems
#
CONFIG_EXT2_FS=y
-# CONFIG_EXT2_FS_XATTR is not set
+CONFIG_EXT2_FS_XATTR=y
+CONFIG_EXT2_FS_POSIX_ACL=y
+# CONFIG_EXT2_FS_SECURITY is not set
# CONFIG_EXT2_FS_XIP is not set
-# CONFIG_EXT3_FS is not set
-# CONFIG_REISERFS_FS is not set
+CONFIG_EXT3_FS=y
+CONFIG_EXT3_FS_XATTR=y
+CONFIG_EXT3_FS_POSIX_ACL=y
+# CONFIG_EXT3_FS_SECURITY is not set
+CONFIG_JBD=y
+# CONFIG_JBD_DEBUG is not set
+CONFIG_FS_MBCACHE=y
+CONFIG_REISERFS_FS=y
+# CONFIG_REISERFS_CHECK is not set
+# CONFIG_REISERFS_PROC_INFO is not set
+CONFIG_REISERFS_FS_XATTR=y
+CONFIG_REISERFS_FS_POSIX_ACL=y
+# CONFIG_REISERFS_FS_SECURITY is not set
# CONFIG_JFS_FS is not set
-# CONFIG_FS_POSIX_ACL is not set
+CONFIG_FS_POSIX_ACL=y
# CONFIG_XFS_FS is not set
# CONFIG_OCFS2_FS is not set
# CONFIG_MINIX_FS is not set
# CONFIG_ROMFS_FS is not set
-# CONFIG_INOTIFY is not set
+CONFIG_INOTIFY=y
+CONFIG_INOTIFY_USER=y
# CONFIG_QUOTA is not set
CONFIG_DNOTIFY=y
-# CONFIG_AUTOFS_FS is not set
+CONFIG_AUTOFS_FS=y
# CONFIG_AUTOFS4_FS is not set
# CONFIG_FUSE_FS is not set
@@ -1382,18 +1229,17 @@ CONFIG_DNOTIFY=y
# CD-ROM/DVD Filesystems
#
CONFIG_ISO9660_FS=y
-CONFIG_JOLIET=y
-CONFIG_ZISOFS=y
-CONFIG_ZISOFS_FS=y
+# CONFIG_JOLIET is not set
+# CONFIG_ZISOFS is not set
# CONFIG_UDF_FS is not set
#
# DOS/FAT/NT Filesystems
#
CONFIG_FAT_FS=y
-# CONFIG_MSDOS_FS is not set
+CONFIG_MSDOS_FS=y
CONFIG_VFAT_FS=y
-CONFIG_FAT_DEFAULT_CODEPAGE=850
+CONFIG_FAT_DEFAULT_CODEPAGE=437
CONFIG_FAT_DEFAULT_IOCHARSET="iso8859-1"
# CONFIG_NTFS_FS is not set
@@ -1404,10 +1250,9 @@ CONFIG_PROC_FS=y
CONFIG_PROC_KCORE=y
CONFIG_SYSFS=y
CONFIG_TMPFS=y
-# CONFIG_HUGETLBFS is not set
-# CONFIG_HUGETLB_PAGE is not set
+CONFIG_HUGETLBFS=y
+CONFIG_HUGETLB_PAGE=y
CONFIG_RAMFS=y
-# CONFIG_RELAYFS_FS is not set
# CONFIG_CONFIGFS_FS is not set
#
@@ -1430,13 +1275,26 @@ CONFIG_RAMFS=y
#
# Network File Systems
#
-# CONFIG_NFS_FS is not set
-# CONFIG_NFSD is not set
+CONFIG_NFS_FS=y
+CONFIG_NFS_V3=y
+# CONFIG_NFS_V3_ACL is not set
+# CONFIG_NFS_V4 is not set
+# CONFIG_NFS_DIRECTIO is not set
+CONFIG_NFSD=y
+CONFIG_NFSD_V3=y
+# CONFIG_NFSD_V3_ACL is not set
+# CONFIG_NFSD_V4 is not set
+CONFIG_NFSD_TCP=y
+CONFIG_ROOT_NFS=y
+CONFIG_LOCKD=y
+CONFIG_LOCKD_V4=y
+CONFIG_EXPORTFS=y
+CONFIG_NFS_COMMON=y
+CONFIG_SUNRPC=y
+# CONFIG_RPCSEC_GSS_KRB5 is not set
+# CONFIG_RPCSEC_GSS_SPKM3 is not set
# CONFIG_SMB_FS is not set
-CONFIG_CIFS=y
-# CONFIG_CIFS_STATS is not set
-# CONFIG_CIFS_XATTR is not set
-# CONFIG_CIFS_EXPERIMENTAL is not set
+# CONFIG_CIFS is not set
# CONFIG_NCP_FS is not set
# CONFIG_CODA_FS is not set
# CONFIG_AFS_FS is not set
@@ -1445,33 +1303,18 @@ CONFIG_CIFS=y
#
# Partition Types
#
-CONFIG_PARTITION_ADVANCED=y
-# CONFIG_ACORN_PARTITION is not set
-# CONFIG_OSF_PARTITION is not set
-# CONFIG_AMIGA_PARTITION is not set
-# CONFIG_ATARI_PARTITION is not set
-# CONFIG_MAC_PARTITION is not set
+# CONFIG_PARTITION_ADVANCED is not set
CONFIG_MSDOS_PARTITION=y
-# CONFIG_BSD_DISKLABEL is not set
-# CONFIG_MINIX_SUBPARTITION is not set
-# CONFIG_SOLARIS_X86_PARTITION is not set
-# CONFIG_UNIXWARE_DISKLABEL is not set
-# CONFIG_LDM_PARTITION is not set
-# CONFIG_SGI_PARTITION is not set
-# CONFIG_ULTRIX_PARTITION is not set
-# CONFIG_SUN_PARTITION is not set
-# CONFIG_KARMA_PARTITION is not set
-# CONFIG_EFI_PARTITION is not set
#
# Native Language Support
#
CONFIG_NLS=y
-CONFIG_NLS_DEFAULT="iso8859-15"
-# CONFIG_NLS_CODEPAGE_437 is not set
+CONFIG_NLS_DEFAULT="iso8859-1"
+CONFIG_NLS_CODEPAGE_437=y
# CONFIG_NLS_CODEPAGE_737 is not set
# CONFIG_NLS_CODEPAGE_775 is not set
-CONFIG_NLS_CODEPAGE_850=y
+# CONFIG_NLS_CODEPAGE_850 is not set
# CONFIG_NLS_CODEPAGE_852 is not set
# CONFIG_NLS_CODEPAGE_855 is not set
# CONFIG_NLS_CODEPAGE_857 is not set
@@ -1491,7 +1334,7 @@ CONFIG_NLS_CODEPAGE_850=y
# CONFIG_NLS_ISO8859_8 is not set
# CONFIG_NLS_CODEPAGE_1250 is not set
# CONFIG_NLS_CODEPAGE_1251 is not set
-# CONFIG_NLS_ASCII is not set
+CONFIG_NLS_ASCII=y
CONFIG_NLS_ISO8859_1=y
# CONFIG_NLS_ISO8859_2 is not set
# CONFIG_NLS_ISO8859_3 is not set
@@ -1510,20 +1353,49 @@ CONFIG_NLS_UTF8=y
#
# Instrumentation Support
#
-# CONFIG_PROFILING is not set
-# CONFIG_KPROBES is not set
+CONFIG_PROFILING=y
+CONFIG_OPROFILE=y
+CONFIG_KPROBES=y
#
# Kernel hacking
#
+CONFIG_TRACE_IRQFLAGS_SUPPORT=y
# CONFIG_PRINTK_TIME is not set
CONFIG_MAGIC_SYSRQ=y
-# CONFIG_DEBUG_KERNEL is not set
-CONFIG_LOG_BUF_SHIFT=14
+CONFIG_UNUSED_SYMBOLS=y
+CONFIG_DEBUG_KERNEL=y
+CONFIG_LOG_BUF_SHIFT=18
+CONFIG_DETECT_SOFTLOCKUP=y
+# CONFIG_SCHEDSTATS is not set
+# CONFIG_DEBUG_SLAB is not set
+# CONFIG_DEBUG_RT_MUTEXES is not set
+# CONFIG_RT_MUTEX_TESTER is not set
+# CONFIG_DEBUG_SPINLOCK is not set
+# CONFIG_DEBUG_MUTEXES is not set
+# CONFIG_DEBUG_RWSEMS is not set
+# CONFIG_DEBUG_LOCK_ALLOC is not set
+# CONFIG_PROVE_LOCKING is not set
+# CONFIG_DEBUG_SPINLOCK_SLEEP is not set
+# CONFIG_DEBUG_LOCKING_API_SELFTESTS is not set
+# CONFIG_DEBUG_KOBJECT is not set
CONFIG_DEBUG_BUGVERBOSE=y
+# CONFIG_DEBUG_INFO is not set
+# CONFIG_DEBUG_FS is not set
+# CONFIG_DEBUG_VM is not set
+# CONFIG_FRAME_POINTER is not set
+CONFIG_UNWIND_INFO=y
+CONFIG_STACK_UNWIND=y
+# CONFIG_FORCED_INLINING is not set
+# CONFIG_RCU_TORTURE_TEST is not set
CONFIG_EARLY_PRINTK=y
+CONFIG_DEBUG_STACKOVERFLOW=y
+# CONFIG_DEBUG_STACK_USAGE is not set
+# CONFIG_DEBUG_RODATA is not set
+# CONFIG_4KSTACKS is not set
CONFIG_X86_FIND_SMP_CONFIG=y
CONFIG_X86_MPPARSE=y
+CONFIG_DOUBLEFAULT=y
#
# Security options
@@ -1547,8 +1419,12 @@ CONFIG_X86_MPPARSE=y
# CONFIG_CRC16 is not set
CONFIG_CRC32=y
# CONFIG_LIBCRC32C is not set
-CONFIG_ZLIB_INFLATE=y
+CONFIG_PLIST=y
CONFIG_GENERIC_HARDIRQS=y
CONFIG_GENERIC_IRQ_PROBE=y
+CONFIG_GENERIC_PENDING_IRQ=y
+CONFIG_X86_SMP=y
+CONFIG_X86_HT=y
CONFIG_X86_BIOS_REBOOT=y
+CONFIG_X86_TRAMPOLINE=y
CONFIG_KTIME_SCALAR=y
^ permalink raw reply [flat|nested] 199+ messages in thread* [PATCH for review] [3/145] i386: Allow to use GENERICARCH for UP kernels
[not found] <20060810 935.775038000@suse.de>
2006-08-10 19:35 ` [PATCH for review] [1/145] x86_64: Update defconfig Andi Kleen
2006-08-10 19:35 ` [PATCH for review] [2/145] i386: " Andi Kleen
@ 2006-08-10 19:35 ` Andi Kleen
2006-08-10 19:47 ` Dave Hansen
2006-08-10 19:35 ` [PATCH for review] [4/145] x86_64: Temporarily revert parts of the Core 2 nmi nmi watchdog support Andi Kleen
` (142 subsequent siblings)
145 siblings, 1 reply; 199+ messages in thread
From: Andi Kleen @ 2006-08-10 19:35 UTC (permalink / raw)
r
There are some machines around (large xSeries or Unisys ES7000) that
need physical IO-APIC destination mode to access all of their IO
devices. This currently doesn't work in UP kernels as used in
distribution installers.
This patch allows to compile even UP kernels as GENERICARCH which
allows to use physical or clustered APIC mode.
Signed-off-by: Andi Kleen <ak@suse.de>
---
arch/i386/Kconfig | 7 +--
arch/i386/kernel/io_apic.c | 1
arch/i386/kernel/mpparse.c | 1
arch/i386/mach-generic/bigsmp.c | 1
arch/i386/mach-generic/es7000.c | 1
arch/i386/mach-generic/probe.c | 2
arch/i386/mach-generic/summit.c | 1
include/asm-i386/genapic.h | 69 ++++++++++++++++++-------------
include/asm-i386/mach-es7000/mach_apic.h | 4 +
include/asm-i386/mach-summit/mach_apic.h | 11 ++++
include/asm-i386/smp.h | 19 +++++---
11 files changed, 76 insertions(+), 41 deletions(-)
Index: linux/arch/i386/mach-generic/probe.c
===================================================================
--- linux.orig/arch/i386/mach-generic/probe.c
+++ linux/arch/i386/mach-generic/probe.c
@@ -119,7 +119,9 @@ int __init acpi_madt_oem_check(char *oem
return 0;
}
+#ifdef CONFIG_SMP
int hard_smp_processor_id(void)
{
return genapic->get_apic_id(*(unsigned long *)(APIC_BASE+APIC_ID));
}
+#endif
Index: linux/arch/i386/Kconfig
===================================================================
--- linux.orig/arch/i386/Kconfig
+++ linux/arch/i386/Kconfig
@@ -165,7 +165,6 @@ config X86_VISWS
config X86_GENERICARCH
bool "Generic architecture (Summit, bigsmp, ES7000, default)"
- depends on SMP
help
This option compiles in the Summit, bigsmp, ES7000, default subarchitectures.
It is intended for a generic binary kernel.
@@ -261,7 +260,7 @@ source "kernel/Kconfig.preempt"
config X86_UP_APIC
bool "Local APIC support on uniprocessors"
- depends on !SMP && !(X86_VISWS || X86_VOYAGER)
+ depends on !SMP && !(X86_VISWS || X86_VOYAGER || X86_GENERICARCH)
help
A local APIC (Advanced Programmable Interrupt Controller) is an
integrated interrupt controller in the CPU. If you have a single-CPU
@@ -286,12 +285,12 @@ config X86_UP_IOAPIC
config X86_LOCAL_APIC
bool
- depends on X86_UP_APIC || ((X86_VISWS || SMP) && !X86_VOYAGER)
+ depends on X86_UP_APIC || ((X86_VISWS || SMP) && !X86_VOYAGER) || X86_GENERICARCH
default y
config X86_IO_APIC
bool
- depends on X86_UP_IOAPIC || (SMP && !(X86_VISWS || X86_VOYAGER))
+ depends on X86_UP_IOAPIC || (SMP && !(X86_VISWS || X86_VOYAGER)) || X86_GENERICARCH
default y
config X86_VISWS_APIC
Index: linux/arch/i386/mach-generic/bigsmp.c
===================================================================
--- linux.orig/arch/i386/mach-generic/bigsmp.c
+++ linux/arch/i386/mach-generic/bigsmp.c
@@ -5,6 +5,7 @@
#define APIC_DEFINITION 1
#include <linux/threads.h>
#include <linux/cpumask.h>
+#include <asm/smp.h>
#include <asm/mpspec.h>
#include <asm/genapic.h>
#include <asm/fixmap.h>
Index: linux/arch/i386/mach-generic/es7000.c
===================================================================
--- linux.orig/arch/i386/mach-generic/es7000.c
+++ linux/arch/i386/mach-generic/es7000.c
@@ -4,6 +4,7 @@
#define APIC_DEFINITION 1
#include <linux/threads.h>
#include <linux/cpumask.h>
+#include <asm/smp.h>
#include <asm/mpspec.h>
#include <asm/genapic.h>
#include <asm/fixmap.h>
Index: linux/arch/i386/mach-generic/summit.c
===================================================================
--- linux.orig/arch/i386/mach-generic/summit.c
+++ linux/arch/i386/mach-generic/summit.c
@@ -4,6 +4,7 @@
#define APIC_DEFINITION 1
#include <linux/threads.h>
#include <linux/cpumask.h>
+#include <asm/smp.h>
#include <asm/mpspec.h>
#include <asm/genapic.h>
#include <asm/fixmap.h>
Index: linux/include/asm-i386/genapic.h
===================================================================
--- linux.orig/include/asm-i386/genapic.h
+++ linux/include/asm-i386/genapic.h
@@ -1,6 +1,8 @@
#ifndef _ASM_GENAPIC_H
#define _ASM_GENAPIC_H 1
+#include <asm/mpspec.h>
+
/*
* Generic APIC driver interface.
*
@@ -63,14 +65,25 @@ struct genapic {
unsigned (*get_apic_id)(unsigned long x);
unsigned long apic_id_mask;
unsigned int (*cpu_mask_to_apicid)(cpumask_t cpumask);
-
+
+#ifdef CONFIG_SMP
/* ipi */
void (*send_IPI_mask)(cpumask_t mask, int vector);
void (*send_IPI_allbutself)(int vector);
void (*send_IPI_all)(int vector);
+#endif
};
-#define APICFUNC(x) .x = x
+#define APICFUNC(x) .x = x,
+
+/* More functions could be probably marked IPIFUNC and save some space
+ in UP GENERICARCH kernels, but I don't have the nerve right now
+ to untangle this mess. -AK */
+#ifdef CONFIG_SMP
+#define IPIFUNC(x) APICFUNC(x)
+#else
+#define IPIFUNC(x)
+#endif
#define APIC_INIT(aname, aprobe) { \
.name = aname, \
@@ -80,33 +93,33 @@ struct genapic {
.no_balance_irq = NO_BALANCE_IRQ, \
.ESR_DISABLE = esr_disable, \
.apic_destination_logical = APIC_DEST_LOGICAL, \
- APICFUNC(apic_id_registered), \
- APICFUNC(target_cpus), \
- APICFUNC(check_apicid_used), \
- APICFUNC(check_apicid_present), \
- APICFUNC(init_apic_ldr), \
- APICFUNC(ioapic_phys_id_map), \
- APICFUNC(clustered_apic_check), \
- APICFUNC(multi_timer_check), \
- APICFUNC(apicid_to_node), \
- APICFUNC(cpu_to_logical_apicid), \
- APICFUNC(cpu_present_to_apicid), \
- APICFUNC(apicid_to_cpu_present), \
- APICFUNC(mpc_apic_id), \
- APICFUNC(setup_portio_remap), \
- APICFUNC(check_phys_apicid_present), \
- APICFUNC(mpc_oem_bus_info), \
- APICFUNC(mpc_oem_pci_bus), \
- APICFUNC(mps_oem_check), \
- APICFUNC(get_apic_id), \
+ APICFUNC(apic_id_registered) \
+ APICFUNC(target_cpus) \
+ APICFUNC(check_apicid_used) \
+ APICFUNC(check_apicid_present) \
+ APICFUNC(init_apic_ldr) \
+ APICFUNC(ioapic_phys_id_map) \
+ APICFUNC(clustered_apic_check) \
+ APICFUNC(multi_timer_check) \
+ APICFUNC(apicid_to_node) \
+ APICFUNC(cpu_to_logical_apicid) \
+ APICFUNC(cpu_present_to_apicid) \
+ APICFUNC(apicid_to_cpu_present) \
+ APICFUNC(mpc_apic_id) \
+ APICFUNC(setup_portio_remap) \
+ APICFUNC(check_phys_apicid_present) \
+ APICFUNC(mpc_oem_bus_info) \
+ APICFUNC(mpc_oem_pci_bus) \
+ APICFUNC(mps_oem_check) \
+ APICFUNC(get_apic_id) \
.apic_id_mask = APIC_ID_MASK, \
- APICFUNC(cpu_mask_to_apicid), \
- APICFUNC(acpi_madt_oem_check), \
- APICFUNC(send_IPI_mask), \
- APICFUNC(send_IPI_allbutself), \
- APICFUNC(send_IPI_all), \
- APICFUNC(enable_apic_mode), \
- APICFUNC(phys_pkg_id), \
+ APICFUNC(cpu_mask_to_apicid) \
+ APICFUNC(acpi_madt_oem_check) \
+ IPIFUNC(send_IPI_mask) \
+ IPIFUNC(send_IPI_allbutself) \
+ IPIFUNC(send_IPI_all) \
+ APICFUNC(enable_apic_mode) \
+ APICFUNC(phys_pkg_id) \
}
extern struct genapic *genapic;
Index: linux/include/asm-i386/mach-summit/mach_apic.h
===================================================================
--- linux.orig/include/asm-i386/mach-summit/mach_apic.h
+++ linux/include/asm-i386/mach-summit/mach_apic.h
@@ -46,10 +46,12 @@ extern u8 cpu_2_logical_apicid[];
static inline void init_apic_ldr(void)
{
unsigned long val, id;
- int i, count;
- u8 lid;
+ int count = 0;
u8 my_id = (u8)hard_smp_processor_id();
u8 my_cluster = (u8)apicid_cluster(my_id);
+#ifdef CONFIG_SMP
+ u8 lid;
+ int i;
/* Create logical APIC IDs by counting CPUs already in cluster. */
for (count = 0, i = NR_CPUS; --i >= 0; ) {
@@ -57,6 +59,7 @@ static inline void init_apic_ldr(void)
if (lid != BAD_APICID && apicid_cluster(lid) == my_cluster)
++count;
}
+#endif
/* We only have a 4 wide bitmap in cluster mode. If a deranged
* BIOS puts 5 CPUs in one APIC cluster, we're hosed. */
BUG_ON(count >= XAPIC_DEST_CPUS_SHIFT);
@@ -91,9 +94,13 @@ static inline int apicid_to_node(int log
/* Mapping from cpu number to logical apicid */
static inline int cpu_to_logical_apicid(int cpu)
{
+#ifdef CONFIG_SMP
if (cpu >= NR_CPUS)
return BAD_APICID;
return (int)cpu_2_logical_apicid[cpu];
+#else
+ return logical_smp_processor_id();
+#endif
}
static inline int cpu_present_to_apicid(int mps_cpu)
Index: linux/include/asm-i386/mach-es7000/mach_apic.h
===================================================================
--- linux.orig/include/asm-i386/mach-es7000/mach_apic.h
+++ linux/include/asm-i386/mach-es7000/mach_apic.h
@@ -123,9 +123,13 @@ extern u8 cpu_2_logical_apicid[];
/* Mapping from cpu number to logical apicid */
static inline int cpu_to_logical_apicid(int cpu)
{
+#ifdef CONFIG_SMP
if (cpu >= NR_CPUS)
return BAD_APICID;
return (int)cpu_2_logical_apicid[cpu];
+#else
+ return logical_smp_processor_id();
+#endif
}
static inline int mpc_apic_id(struct mpc_config_processor *m, struct mpc_config_translation *unused)
Index: linux/arch/i386/kernel/io_apic.c
===================================================================
--- linux.orig/arch/i386/kernel/io_apic.c
+++ linux/arch/i386/kernel/io_apic.c
@@ -40,6 +40,7 @@
#include <asm/nmi.h>
#include <mach_apic.h>
+#include <mach_apicdef.h>
#include "io_ports.h"
Index: linux/arch/i386/kernel/mpparse.c
===================================================================
--- linux.orig/arch/i386/kernel/mpparse.c
+++ linux/arch/i386/kernel/mpparse.c
@@ -30,6 +30,7 @@
#include <asm/io_apic.h>
#include <mach_apic.h>
+#include <mach_apicdef.h>
#include <mach_mpparse.h>
#include <bios_ebda.h>
Index: linux/include/asm-i386/smp.h
===================================================================
--- linux.orig/include/asm-i386/smp.h
+++ linux/include/asm-i386/smp.h
@@ -80,17 +80,11 @@ static inline int hard_smp_processor_id(
return GET_APIC_ID(*(unsigned long *)(APIC_BASE+APIC_ID));
}
#endif
-
-static __inline int logical_smp_processor_id(void)
-{
- /* we don't want to mark this access volatile - bad code generation */
- return GET_APIC_LOGICAL_ID(*(unsigned long *)(APIC_BASE+APIC_LDR));
-}
-
#endif
extern int __cpu_disable(void);
extern void __cpu_die(unsigned int cpu);
+
#endif /* !__ASSEMBLY__ */
#else /* CONFIG_SMP */
@@ -100,4 +94,15 @@ extern void __cpu_die(unsigned int cpu);
#define NO_PROC_ID 0xFF /* No processor magic marker */
#endif
+
+#ifndef __ASSEMBLY__
+#ifdef CONFIG_X86_LOCAL_APIC
+static __inline int logical_smp_processor_id(void)
+{
+ /* we don't want to mark this access volatile - bad code generation */
+ return GET_APIC_LOGICAL_ID(*(unsigned long *)(APIC_BASE+APIC_LDR));
+}
+#endif
+#endif
+
#endif
^ permalink raw reply [flat|nested] 199+ messages in thread* Re: [PATCH for review] [3/145] i386: Allow to use GENERICARCH for UP kernels
2006-08-10 19:35 ` [PATCH for review] [3/145] i386: Allow to use GENERICARCH for UP kernels Andi Kleen
@ 2006-08-10 19:47 ` Dave Hansen
2006-08-10 19:50 ` Andi Kleen
0 siblings, 1 reply; 199+ messages in thread
From: Dave Hansen @ 2006-08-10 19:47 UTC (permalink / raw)
To: Andi Kleen; +Cc: Linux Kernel Mailing List, Keith Mannthey
On Thu, 2006-08-10 at 21:35 +0200, Andi Kleen wrote:
> --- linux.orig/include/asm-i386/mach-summit/mach_apic.h
> +++ linux/include/asm-i386/mach-summit/mach_apic.h
> @@ -46,10 +46,12 @@ extern u8 cpu_2_logical_apicid[];
> static inline void init_apic_ldr(void)
> {
> unsigned long val, id;
> - int i, count;
> - u8 lid;
> + int count = 0;
> u8 my_id = (u8)hard_smp_processor_id();
> u8 my_cluster = (u8)apicid_cluster(my_id);
> +#ifdef CONFIG_SMP
> + u8 lid;
> + int i;
>
> /* Create logical APIC IDs by counting CPUs already in cluster. */
> for (count = 0, i = NR_CPUS; --i >= 0; ) {
> @@ -57,6 +59,7 @@ static inline void init_apic_ldr(void)
> if (lid != BAD_APICID && apicid_cluster(lid) == my_cluster)
> ++count;
> }
> +#endif
Why does this particular loop have to go? I'm sure it's OK, but I also
wonder if there is a nice way to do it without the #ifdef.
-- Dave
^ permalink raw reply [flat|nested] 199+ messages in thread* Re: [PATCH for review] [3/145] i386: Allow to use GENERICARCH for UP kernels
2006-08-10 19:47 ` Dave Hansen
@ 2006-08-10 19:50 ` Andi Kleen
0 siblings, 0 replies; 199+ messages in thread
From: Andi Kleen @ 2006-08-10 19:50 UTC (permalink / raw)
To: Dave Hansen; +Cc: Linux Kernel Mailing List, Keith Mannthey
> Why does this particular loop have to go? I'm sure it's OK, but I also
> wonder if there is a nice way to do it without the #ifdef.
My memory is fuzzy because that is actually an quite old patch. But I think
it was to avoid some dependency issue with needing something that wasn't
available on the UP kernel.
If you know of a nicer way to do this please submit a patch.
-Andi
^ permalink raw reply [flat|nested] 199+ messages in thread
* [PATCH for review] [4/145] x86_64: Temporarily revert parts of the Core 2 nmi nmi watchdog support
[not found] <20060810 935.775038000@suse.de>
` (2 preceding siblings ...)
2006-08-10 19:35 ` [PATCH for review] [3/145] i386: Allow to use GENERICARCH for UP kernels Andi Kleen
@ 2006-08-10 19:35 ` Andi Kleen
2006-08-10 19:35 ` [PATCH for review] [5/145] x86_64: Add performance counter reservation framework for UP kernels Andi Kleen
` (141 subsequent siblings)
145 siblings, 0 replies; 199+ messages in thread
From: Andi Kleen @ 2006-08-10 19:35 UTC (permalink / raw)
r
This makes merging easier. They are readded a few patches later.
Signed-off-by: Andi Kleen <ak@suse.de>
---
arch/i386/kernel/nmi.c | 65 -------------------------
arch/x86_64/kernel/nmi.c | 81 +-------------------------------
include/asm-i386/intel_arch_perfmon.h | 19 -------
include/asm-x86_64/intel_arch_perfmon.h | 19 -------
4 files changed, 6 insertions(+), 178 deletions(-)
Index: linux/arch/i386/kernel/nmi.c
===================================================================
--- linux.orig/arch/i386/kernel/nmi.c
+++ linux/arch/i386/kernel/nmi.c
@@ -24,7 +24,6 @@
#include <asm/smp.h>
#include <asm/nmi.h>
-#include <asm/intel_arch_perfmon.h>
#include "mach_traps.h"
@@ -96,9 +95,6 @@ int nmi_active;
(P4_CCCR_OVF_PMI0|P4_CCCR_THRESHOLD(15)|P4_CCCR_COMPLEMENT| \
P4_CCCR_COMPARE|P4_CCCR_REQUIRED|P4_CCCR_ESCR_SELECT(4)|P4_CCCR_ENABLE)
-#define ARCH_PERFMON_NMI_EVENT_SEL ARCH_PERFMON_UNHALTED_CORE_CYCLES_SEL
-#define ARCH_PERFMON_NMI_EVENT_UMASK ARCH_PERFMON_UNHALTED_CORE_CYCLES_UMASK
-
#ifdef CONFIG_SMP
/* The performance counters used by NMI_LOCAL_APIC don't trigger when
* the CPU is idle. To make sure the NMI watchdog really ticks on all
@@ -211,8 +207,6 @@ static int __init setup_nmi_watchdog(cha
__setup("nmi_watchdog=", setup_nmi_watchdog);
-static void disable_intel_arch_watchdog(void);
-
static void disable_lapic_nmi_watchdog(void)
{
if (nmi_active <= 0)
@@ -222,10 +216,6 @@ static void disable_lapic_nmi_watchdog(v
wrmsr(MSR_K7_EVNTSEL0, 0, 0);
break;
case X86_VENDOR_INTEL:
- if (cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON)) {
- disable_intel_arch_watchdog();
- break;
- }
switch (boot_cpu_data.x86) {
case 6:
if (boot_cpu_data.x86_model > 0xd)
@@ -454,53 +444,6 @@ static int setup_p4_watchdog(void)
return 1;
}
-static void disable_intel_arch_watchdog(void)
-{
- unsigned ebx;
-
- /*
- * Check whether the Architectural PerfMon supports
- * Unhalted Core Cycles Event or not.
- * NOTE: Corresponding bit = 0 in ebp indicates event present.
- */
- ebx = cpuid_ebx(10);
- if (!(ebx & ARCH_PERFMON_UNHALTED_CORE_CYCLES_PRESENT))
- wrmsr(MSR_ARCH_PERFMON_EVENTSEL0, 0, 0);
-}
-
-static int setup_intel_arch_watchdog(void)
-{
- unsigned int evntsel;
- unsigned ebx;
-
- /*
- * Check whether the Architectural PerfMon supports
- * Unhalted Core Cycles Event or not.
- * NOTE: Corresponding bit = 0 in ebp indicates event present.
- */
- ebx = cpuid_ebx(10);
- if ((ebx & ARCH_PERFMON_UNHALTED_CORE_CYCLES_PRESENT))
- return 0;
-
- nmi_perfctr_msr = MSR_ARCH_PERFMON_PERFCTR0;
-
- clear_msr_range(MSR_ARCH_PERFMON_EVENTSEL0, 2);
- clear_msr_range(MSR_ARCH_PERFMON_PERFCTR0, 2);
-
- evntsel = ARCH_PERFMON_EVENTSEL_INT
- | ARCH_PERFMON_EVENTSEL_OS
- | ARCH_PERFMON_EVENTSEL_USR
- | ARCH_PERFMON_NMI_EVENT_SEL
- | ARCH_PERFMON_NMI_EVENT_UMASK;
-
- wrmsr(MSR_ARCH_PERFMON_EVENTSEL0, evntsel, 0);
- write_watchdog_counter("INTEL_ARCH_PERFCTR0");
- apic_write(APIC_LVTPC, APIC_DM_NMI);
- evntsel |= ARCH_PERFMON_EVENTSEL0_ENABLE;
- wrmsr(MSR_ARCH_PERFMON_EVENTSEL0, evntsel, 0);
- return 1;
-}
-
void setup_apic_nmi_watchdog (void)
{
switch (boot_cpu_data.x86_vendor) {
@@ -510,11 +453,6 @@ void setup_apic_nmi_watchdog (void)
setup_k7_watchdog();
break;
case X86_VENDOR_INTEL:
- if (cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON)) {
- if (!setup_intel_arch_watchdog())
- return;
- break;
- }
switch (boot_cpu_data.x86) {
case 6:
if (boot_cpu_data.x86_model > 0xd)
@@ -619,8 +557,7 @@ void nmi_watchdog_tick (struct pt_regs *
wrmsr(MSR_P4_IQ_CCCR0, nmi_p4_cccr_val, 0);
apic_write(APIC_LVTPC, APIC_DM_NMI);
}
- else if (nmi_perfctr_msr == MSR_P6_PERFCTR0 ||
- nmi_perfctr_msr == MSR_ARCH_PERFMON_PERFCTR0) {
+ else if (nmi_perfctr_msr == MSR_P6_PERFCTR0) {
/* Only P6 based Pentium M need to re-unmask
* the apic vector but it doesn't hurt
* other P6 variant */
Index: linux/arch/x86_64/kernel/nmi.c
===================================================================
--- linux.orig/arch/x86_64/kernel/nmi.c
+++ linux/arch/x86_64/kernel/nmi.c
@@ -26,7 +26,6 @@
#include <asm/proto.h>
#include <asm/kdebug.h>
#include <asm/mce.h>
-#include <asm/intel_arch_perfmon.h>
/*
* lapic_nmi_owner tracks the ownership of the lapic NMI hardware:
@@ -66,9 +65,6 @@ static unsigned int nmi_p4_cccr_val;
#define K7_EVENT_CYCLES_PROCESSOR_IS_RUNNING 0x76
#define K7_NMI_EVENT K7_EVENT_CYCLES_PROCESSOR_IS_RUNNING
-#define ARCH_PERFMON_NMI_EVENT_SEL ARCH_PERFMON_UNHALTED_CORE_CYCLES_SEL
-#define ARCH_PERFMON_NMI_EVENT_UMASK ARCH_PERFMON_UNHALTED_CORE_CYCLES_UMASK
-
#define MSR_P4_MISC_ENABLE 0x1A0
#define MSR_P4_MISC_ENABLE_PERF_AVAIL (1<<7)
#define MSR_P4_MISC_ENABLE_PEBS_UNAVAIL (1<<12)
@@ -100,10 +96,7 @@ static __cpuinit inline int nmi_known_cp
case X86_VENDOR_AMD:
return boot_cpu_data.x86 == 15;
case X86_VENDOR_INTEL:
- if (cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON))
- return 1;
- else
- return (boot_cpu_data.x86 == 15);
+ return boot_cpu_data.x86 == 15;
}
return 0;
}
@@ -209,8 +202,6 @@ int __init setup_nmi_watchdog(char *str)
__setup("nmi_watchdog=", setup_nmi_watchdog);
-static void disable_intel_arch_watchdog(void);
-
static void disable_lapic_nmi_watchdog(void)
{
if (nmi_active <= 0)
@@ -223,8 +214,6 @@ static void disable_lapic_nmi_watchdog(v
if (boot_cpu_data.x86 == 15) {
wrmsr(MSR_P4_IQ_CCCR0, 0, 0);
wrmsr(MSR_P4_CRU_ESCR0, 0, 0);
- } else if (cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON)) {
- disable_intel_arch_watchdog();
}
break;
}
@@ -377,53 +366,6 @@ static void setup_k7_watchdog(void)
wrmsr(MSR_K7_EVNTSEL0, evntsel, 0);
}
-static void disable_intel_arch_watchdog(void)
-{
- unsigned ebx;
-
- /*
- * Check whether the Architectural PerfMon supports
- * Unhalted Core Cycles Event or not.
- * NOTE: Corresponding bit = 0 in ebp indicates event present.
- */
- ebx = cpuid_ebx(10);
- if (!(ebx & ARCH_PERFMON_UNHALTED_CORE_CYCLES_PRESENT))
- wrmsr(MSR_ARCH_PERFMON_EVENTSEL0, 0, 0);
-}
-
-static int setup_intel_arch_watchdog(void)
-{
- unsigned int evntsel;
- unsigned ebx;
-
- /*
- * Check whether the Architectural PerfMon supports
- * Unhalted Core Cycles Event or not.
- * NOTE: Corresponding bit = 0 in ebp indicates event present.
- */
- ebx = cpuid_ebx(10);
- if ((ebx & ARCH_PERFMON_UNHALTED_CORE_CYCLES_PRESENT))
- return 0;
-
- nmi_perfctr_msr = MSR_ARCH_PERFMON_PERFCTR0;
-
- clear_msr_range(MSR_ARCH_PERFMON_EVENTSEL0, 2);
- clear_msr_range(MSR_ARCH_PERFMON_PERFCTR0, 2);
-
- evntsel = ARCH_PERFMON_EVENTSEL_INT
- | ARCH_PERFMON_EVENTSEL_OS
- | ARCH_PERFMON_EVENTSEL_USR
- | ARCH_PERFMON_NMI_EVENT_SEL
- | ARCH_PERFMON_NMI_EVENT_UMASK;
-
- wrmsr(MSR_ARCH_PERFMON_EVENTSEL0, evntsel, 0);
- wrmsrl(MSR_ARCH_PERFMON_PERFCTR0, -((u64)cpu_khz * 1000 / nmi_hz));
- apic_write(APIC_LVTPC, APIC_DM_NMI);
- evntsel |= ARCH_PERFMON_EVENTSEL0_ENABLE;
- wrmsr(MSR_ARCH_PERFMON_EVENTSEL0, evntsel, 0);
- return 1;
-}
-
static int setup_p4_watchdog(void)
{
@@ -477,16 +419,10 @@ void setup_apic_nmi_watchdog(void)
setup_k7_watchdog();
break;
case X86_VENDOR_INTEL:
- if (cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON)) {
- if (!setup_intel_arch_watchdog())
- return;
- } else if (boot_cpu_data.x86 == 15) {
- if (!setup_p4_watchdog())
- return;
- } else {
+ if (boot_cpu_data.x86 != 15)
+ return;
+ if (!setup_p4_watchdog())
return;
- }
-
break;
default:
@@ -571,14 +507,7 @@ void __kprobes nmi_watchdog_tick(struct
*/
wrmsr(MSR_P4_IQ_CCCR0, nmi_p4_cccr_val, 0);
apic_write(APIC_LVTPC, APIC_DM_NMI);
- } else if (nmi_perfctr_msr == MSR_ARCH_PERFMON_PERFCTR0) {
- /*
- * For Intel based architectural perfmon
- * - LVTPC is masked on interrupt and must be
- * unmasked by the LVTPC handler.
- */
- apic_write(APIC_LVTPC, APIC_DM_NMI);
- }
+ }
wrmsrl(nmi_perfctr_msr, -((u64)cpu_khz * 1000 / nmi_hz));
}
}
Index: linux/include/asm-i386/intel_arch_perfmon.h
===================================================================
--- linux.orig/include/asm-i386/intel_arch_perfmon.h
+++ /dev/null
@@ -1,19 +0,0 @@
-#ifndef X86_INTEL_ARCH_PERFMON_H
-#define X86_INTEL_ARCH_PERFMON_H 1
-
-#define MSR_ARCH_PERFMON_PERFCTR0 0xc1
-#define MSR_ARCH_PERFMON_PERFCTR1 0xc2
-
-#define MSR_ARCH_PERFMON_EVENTSEL0 0x186
-#define MSR_ARCH_PERFMON_EVENTSEL1 0x187
-
-#define ARCH_PERFMON_EVENTSEL0_ENABLE (1 << 22)
-#define ARCH_PERFMON_EVENTSEL_INT (1 << 20)
-#define ARCH_PERFMON_EVENTSEL_OS (1 << 17)
-#define ARCH_PERFMON_EVENTSEL_USR (1 << 16)
-
-#define ARCH_PERFMON_UNHALTED_CORE_CYCLES_SEL (0x3c)
-#define ARCH_PERFMON_UNHALTED_CORE_CYCLES_UMASK (0x00 << 8)
-#define ARCH_PERFMON_UNHALTED_CORE_CYCLES_PRESENT (1 << 0)
-
-#endif /* X86_INTEL_ARCH_PERFMON_H */
Index: linux/include/asm-x86_64/intel_arch_perfmon.h
===================================================================
--- linux.orig/include/asm-x86_64/intel_arch_perfmon.h
+++ /dev/null
@@ -1,19 +0,0 @@
-#ifndef X86_64_INTEL_ARCH_PERFMON_H
-#define X86_64_INTEL_ARCH_PERFMON_H 1
-
-#define MSR_ARCH_PERFMON_PERFCTR0 0xc1
-#define MSR_ARCH_PERFMON_PERFCTR1 0xc2
-
-#define MSR_ARCH_PERFMON_EVENTSEL0 0x186
-#define MSR_ARCH_PERFMON_EVENTSEL1 0x187
-
-#define ARCH_PERFMON_EVENTSEL0_ENABLE (1 << 22)
-#define ARCH_PERFMON_EVENTSEL_INT (1 << 20)
-#define ARCH_PERFMON_EVENTSEL_OS (1 << 17)
-#define ARCH_PERFMON_EVENTSEL_USR (1 << 16)
-
-#define ARCH_PERFMON_UNHALTED_CORE_CYCLES_SEL (0x3c)
-#define ARCH_PERFMON_UNHALTED_CORE_CYCLES_UMASK (0x00 << 8)
-#define ARCH_PERFMON_UNHALTED_CORE_CYCLES_PRESENT (1 << 0)
-
-#endif /* X86_64_INTEL_ARCH_PERFMON_H */
^ permalink raw reply [flat|nested] 199+ messages in thread* [PATCH for review] [5/145] x86_64: Add performance counter reservation framework for UP kernels
[not found] <20060810 935.775038000@suse.de>
` (3 preceding siblings ...)
2006-08-10 19:35 ` [PATCH for review] [4/145] x86_64: Temporarily revert parts of the Core 2 nmi nmi watchdog support Andi Kleen
@ 2006-08-10 19:35 ` Andi Kleen
2006-08-10 19:35 ` [PATCH for review] [6/145] x86_64: Utilize performance counter reservation framework in oprofile Andi Kleen
` (140 subsequent siblings)
145 siblings, 0 replies; 199+ messages in thread
From: Andi Kleen @ 2006-08-10 19:35 UTC (permalink / raw)
r
From: dzickus <dzickus@redhat.com>
Adds basic infrastructure to allow subsystems to reserve performance
counters on the x86 chips. Only UP kernels are supported in this patch to
make reviewing easier. The SMP portion makes a lot more changes.
Think of this as a locking mechanism where each bit represents a different
counter. In addition, each subsystem should also reserve an appropriate
event selection register that will correspond to the performance counter it
will be using (this is mainly neccessary for the Pentium 4 chips as they
break the 1:1 relationship to performance counters).
This will help prevent subsystems like oprofile from interfering with the
nmi watchdog.
Signed-off-by: Don Zickus <dzickus@redhat.com>
Signed-off-by: Andi Kleen <ak@suse.de>
---
arch/i386/kernel/nmi.c | 188 +++++++++++++++++++++++++++++++++++++++--------
arch/x86_64/kernel/nmi.c | 178 ++++++++++++++++++++++++++++++++++----------
include/asm-i386/nmi.h | 7 +
include/asm-x86_64/nmi.h | 8 +-
4 files changed, 308 insertions(+), 73 deletions(-)
Index: linux/arch/x86_64/kernel/nmi.c
===================================================================
--- linux.orig/arch/x86_64/kernel/nmi.c
+++ linux/arch/x86_64/kernel/nmi.c
@@ -27,6 +27,20 @@
#include <asm/kdebug.h>
#include <asm/mce.h>
+/* perfctr_nmi_owner tracks the ownership of the perfctr registers:
+ * evtsel_nmi_owner tracks the ownership of the event selection
+ * - different performance counters/ event selection may be reserved for
+ * different subsystems this reservation system just tries to coordinate
+ * things a little
+ */
+static DEFINE_PER_CPU(unsigned, perfctr_nmi_owner);
+static DEFINE_PER_CPU(unsigned, evntsel_nmi_owner[2]);
+
+/* this number is calculated from Intel's MSR_P4_CRU_ESCR5 register and it's
+ * offset from MSR_P4_BSU_ESCR0. It will be the max for all platforms (for now)
+ */
+#define NMI_MAX_COUNTER_BITS 66
+
/*
* lapic_nmi_owner tracks the ownership of the lapic NMI hardware:
* - it may be reserved by some other driver, or not
@@ -90,6 +104,95 @@ static unsigned int nmi_p4_cccr_val;
(P4_CCCR_OVF_PMI0|P4_CCCR_THRESHOLD(15)|P4_CCCR_COMPLEMENT| \
P4_CCCR_COMPARE|P4_CCCR_REQUIRED|P4_CCCR_ESCR_SELECT(4)|P4_CCCR_ENABLE)
+/* converts an msr to an appropriate reservation bit */
+static inline unsigned int nmi_perfctr_msr_to_bit(unsigned int msr)
+{
+ /* returns the bit offset of the performance counter register */
+ switch (boot_cpu_data.x86_vendor) {
+ case X86_VENDOR_AMD:
+ return (msr - MSR_K7_PERFCTR0);
+ case X86_VENDOR_INTEL:
+ return (msr - MSR_P4_BPU_PERFCTR0);
+ }
+ return 0;
+}
+
+/* converts an msr to an appropriate reservation bit */
+static inline unsigned int nmi_evntsel_msr_to_bit(unsigned int msr)
+{
+ /* returns the bit offset of the event selection register */
+ switch (boot_cpu_data.x86_vendor) {
+ case X86_VENDOR_AMD:
+ return (msr - MSR_K7_EVNTSEL0);
+ case X86_VENDOR_INTEL:
+ return (msr - MSR_P4_BSU_ESCR0);
+ }
+ return 0;
+}
+
+/* checks for a bit availability (hack for oprofile) */
+int avail_to_resrv_perfctr_nmi_bit(unsigned int counter)
+{
+ BUG_ON(counter > NMI_MAX_COUNTER_BITS);
+
+ return (!test_bit(counter, &__get_cpu_var(perfctr_nmi_owner)));
+}
+
+/* checks the an msr for availability */
+int avail_to_resrv_perfctr_nmi(unsigned int msr)
+{
+ unsigned int counter;
+
+ counter = nmi_perfctr_msr_to_bit(msr);
+ BUG_ON(counter > NMI_MAX_COUNTER_BITS);
+
+ return (!test_bit(counter, &__get_cpu_var(perfctr_nmi_owner)));
+}
+
+int reserve_perfctr_nmi(unsigned int msr)
+{
+ unsigned int counter;
+
+ counter = nmi_perfctr_msr_to_bit(msr);
+ BUG_ON(counter > NMI_MAX_COUNTER_BITS);
+
+ if (!test_and_set_bit(counter, &__get_cpu_var(perfctr_nmi_owner)))
+ return 1;
+ return 0;
+}
+
+void release_perfctr_nmi(unsigned int msr)
+{
+ unsigned int counter;
+
+ counter = nmi_perfctr_msr_to_bit(msr);
+ BUG_ON(counter > NMI_MAX_COUNTER_BITS);
+
+ clear_bit(counter, &__get_cpu_var(perfctr_nmi_owner));
+}
+
+int reserve_evntsel_nmi(unsigned int msr)
+{
+ unsigned int counter;
+
+ counter = nmi_evntsel_msr_to_bit(msr);
+ BUG_ON(counter > NMI_MAX_COUNTER_BITS);
+
+ if (!test_and_set_bit(counter, &__get_cpu_var(evntsel_nmi_owner)))
+ return 1;
+ return 0;
+}
+
+void release_evntsel_nmi(unsigned int msr)
+{
+ unsigned int counter;
+
+ counter = nmi_evntsel_msr_to_bit(msr);
+ BUG_ON(counter > NMI_MAX_COUNTER_BITS);
+
+ clear_bit(counter, &__get_cpu_var(evntsel_nmi_owner));
+}
+
static __cpuinit inline int nmi_known_cpu(void)
{
switch (boot_cpu_data.x86_vendor) {
@@ -325,34 +428,22 @@ late_initcall(init_lapic_nmi_sysfs);
#endif /* CONFIG_PM */
-/*
- * Activate the NMI watchdog via the local APIC.
- * Original code written by Keith Owens.
- */
-
-static void clear_msr_range(unsigned int base, unsigned int n)
+static int setup_k7_watchdog(void)
{
- unsigned int i;
-
- for(i = 0; i < n; ++i)
- wrmsr(base+i, 0, 0);
-}
-
-static void setup_k7_watchdog(void)
-{
- int i;
unsigned int evntsel;
nmi_perfctr_msr = MSR_K7_PERFCTR0;
- for(i = 0; i < 4; ++i) {
- /* Simulator may not support it */
- if (checking_wrmsrl(MSR_K7_EVNTSEL0+i, 0UL)) {
- nmi_perfctr_msr = 0;
- return;
- }
- wrmsrl(MSR_K7_PERFCTR0+i, 0UL);
- }
+ if (!reserve_perfctr_nmi(nmi_perfctr_msr))
+ goto fail;
+
+ if (!reserve_evntsel_nmi(MSR_K7_EVNTSEL0))
+ goto fail1;
+
+ /* Simulator may not support it */
+ if (checking_wrmsrl(MSR_K7_EVNTSEL0, 0UL))
+ goto fail2;
+ wrmsrl(MSR_K7_PERFCTR0, 0UL);
evntsel = K7_EVNTSEL_INT
| K7_EVNTSEL_OS
@@ -364,6 +455,13 @@ static void setup_k7_watchdog(void)
apic_write(APIC_LVTPC, APIC_DM_NMI);
evntsel |= K7_EVNTSEL_ENABLE;
wrmsr(MSR_K7_EVNTSEL0, evntsel, 0);
+ return 1;
+fail2:
+ release_evntsel_nmi(MSR_K7_EVNTSEL0);
+fail1:
+ release_perfctr_nmi(nmi_perfctr_msr);
+fail:
+ return 0;
}
@@ -382,22 +480,11 @@ static int setup_p4_watchdog(void)
nmi_p4_cccr_val |= P4_CCCR_OVF_PMI1;
#endif
- if (!(misc_enable & MSR_P4_MISC_ENABLE_PEBS_UNAVAIL))
- clear_msr_range(0x3F1, 2);
- /* MSR 0x3F0 seems to have a default value of 0xFC00, but current
- docs doesn't fully define it, so leave it alone for now. */
- if (boot_cpu_data.x86_model >= 0x3) {
- /* MSR_P4_IQ_ESCR0/1 (0x3ba/0x3bb) removed */
- clear_msr_range(0x3A0, 26);
- clear_msr_range(0x3BC, 3);
- } else {
- clear_msr_range(0x3A0, 31);
- }
- clear_msr_range(0x3C0, 6);
- clear_msr_range(0x3C8, 6);
- clear_msr_range(0x3E0, 2);
- clear_msr_range(MSR_P4_CCCR0, 18);
- clear_msr_range(MSR_P4_PERFCTR0, 18);
+ if (!reserve_perfctr_nmi(nmi_perfctr_msr))
+ goto fail;
+
+ if (!reserve_evntsel_nmi(MSR_P4_CRU_ESCR0))
+ goto fail1;
wrmsr(MSR_P4_CRU_ESCR0, P4_NMI_CRU_ESCR0, 0);
wrmsr(MSR_P4_IQ_CCCR0, P4_NMI_IQ_CCCR0 & ~P4_CCCR_ENABLE, 0);
@@ -406,6 +493,10 @@ static int setup_p4_watchdog(void)
apic_write(APIC_LVTPC, APIC_DM_NMI);
wrmsr(MSR_P4_IQ_CCCR0, nmi_p4_cccr_val, 0);
return 1;
+fail1:
+ release_perfctr_nmi(nmi_perfctr_msr);
+fail:
+ return 0;
}
void setup_apic_nmi_watchdog(void)
@@ -416,7 +507,8 @@ void setup_apic_nmi_watchdog(void)
return;
if (strstr(boot_cpu_data.x86_model_id, "Screwdriver"))
return;
- setup_k7_watchdog();
+ if (!setup_k7_watchdog())
+ return;
break;
case X86_VENDOR_INTEL:
if (boot_cpu_data.x86 != 15)
@@ -588,6 +680,12 @@ int proc_unknown_nmi_panic(struct ctl_ta
EXPORT_SYMBOL(nmi_active);
EXPORT_SYMBOL(nmi_watchdog);
+EXPORT_SYMBOL(avail_to_resrv_perfctr_nmi);
+EXPORT_SYMBOL(avail_to_resrv_perfctr_nmi_bit);
+EXPORT_SYMBOL(reserve_perfctr_nmi);
+EXPORT_SYMBOL(release_perfctr_nmi);
+EXPORT_SYMBOL(reserve_evntsel_nmi);
+EXPORT_SYMBOL(release_evntsel_nmi);
EXPORT_SYMBOL(reserve_lapic_nmi);
EXPORT_SYMBOL(release_lapic_nmi);
EXPORT_SYMBOL(disable_timer_nmi_watchdog);
Index: linux/include/asm-x86_64/nmi.h
===================================================================
--- linux.orig/include/asm-x86_64/nmi.h
+++ linux/include/asm-x86_64/nmi.h
@@ -56,7 +56,13 @@ extern int panic_on_timeout;
extern int unknown_nmi_panic;
extern int check_nmi_watchdog(void);
-
+extern int avail_to_resrv_perfctr_nmi_bit(unsigned int);
+extern int avail_to_resrv_perfctr_nmi(unsigned int);
+extern int reserve_perfctr_nmi(unsigned int);
+extern void release_perfctr_nmi(unsigned int);
+extern int reserve_evntsel_nmi(unsigned int);
+extern void release_evntsel_nmi(unsigned int);
+
extern void setup_apic_nmi_watchdog (void);
extern int reserve_lapic_nmi(void);
extern void release_lapic_nmi(void);
Index: linux/arch/i386/kernel/nmi.c
===================================================================
--- linux.orig/arch/i386/kernel/nmi.c
+++ linux/arch/i386/kernel/nmi.c
@@ -34,6 +34,20 @@ static unsigned int nmi_perfctr_msr; /*
static unsigned int nmi_p4_cccr_val;
extern void show_registers(struct pt_regs *regs);
+/* perfctr_nmi_owner tracks the ownership of the perfctr registers:
+ * evtsel_nmi_owner tracks the ownership of the event selection
+ * - different performance counters/ event selection may be reserved for
+ * different subsystems this reservation system just tries to coordinate
+ * things a little
+ */
+static DEFINE_PER_CPU(unsigned long, perfctr_nmi_owner);
+static DEFINE_PER_CPU(unsigned long, evntsel_nmi_owner[3]);
+
+/* this number is calculated from Intel's MSR_P4_CRU_ESCR5 register and it's
+ * offset from MSR_P4_BSU_ESCR0. It will be the max for all platforms (for now)
+ */
+#define NMI_MAX_COUNTER_BITS 66
+
/*
* lapic_nmi_owner tracks the ownership of the lapic NMI hardware:
* - it may be reserved by some other driver, or not
@@ -95,6 +109,105 @@ int nmi_active;
(P4_CCCR_OVF_PMI0|P4_CCCR_THRESHOLD(15)|P4_CCCR_COMPLEMENT| \
P4_CCCR_COMPARE|P4_CCCR_REQUIRED|P4_CCCR_ESCR_SELECT(4)|P4_CCCR_ENABLE)
+/* converts an msr to an appropriate reservation bit */
+static inline unsigned int nmi_perfctr_msr_to_bit(unsigned int msr)
+{
+ /* returns the bit offset of the performance counter register */
+ switch (boot_cpu_data.x86_vendor) {
+ case X86_VENDOR_AMD:
+ return (msr - MSR_K7_PERFCTR0);
+ case X86_VENDOR_INTEL:
+ switch (boot_cpu_data.x86) {
+ case 6:
+ return (msr - MSR_P6_PERFCTR0);
+ case 15:
+ return (msr - MSR_P4_BPU_PERFCTR0);
+ }
+ }
+ return 0;
+}
+
+/* converts an msr to an appropriate reservation bit */
+static inline unsigned int nmi_evntsel_msr_to_bit(unsigned int msr)
+{
+ /* returns the bit offset of the event selection register */
+ switch (boot_cpu_data.x86_vendor) {
+ case X86_VENDOR_AMD:
+ return (msr - MSR_K7_EVNTSEL0);
+ case X86_VENDOR_INTEL:
+ switch (boot_cpu_data.x86) {
+ case 6:
+ return (msr - MSR_P6_EVNTSEL0);
+ case 15:
+ return (msr - MSR_P4_BSU_ESCR0);
+ }
+ }
+ return 0;
+}
+
+/* checks for a bit availability (hack for oprofile) */
+int avail_to_resrv_perfctr_nmi_bit(unsigned int counter)
+{
+ BUG_ON(counter > NMI_MAX_COUNTER_BITS);
+
+ return (!test_bit(counter, &__get_cpu_var(perfctr_nmi_owner)));
+}
+
+/* checks the an msr for availability */
+int avail_to_resrv_perfctr_nmi(unsigned int msr)
+{
+ unsigned int counter;
+
+ counter = nmi_perfctr_msr_to_bit(msr);
+ BUG_ON(counter > NMI_MAX_COUNTER_BITS);
+
+ return (!test_bit(counter, &__get_cpu_var(perfctr_nmi_owner)));
+}
+
+int reserve_perfctr_nmi(unsigned int msr)
+{
+ unsigned int counter;
+
+ counter = nmi_perfctr_msr_to_bit(msr);
+ BUG_ON(counter > NMI_MAX_COUNTER_BITS);
+
+ if (!test_and_set_bit(counter, &__get_cpu_var(perfctr_nmi_owner)))
+ return 1;
+ return 0;
+}
+
+void release_perfctr_nmi(unsigned int msr)
+{
+ unsigned int counter;
+
+ counter = nmi_perfctr_msr_to_bit(msr);
+ BUG_ON(counter > NMI_MAX_COUNTER_BITS);
+
+ clear_bit(counter, &__get_cpu_var(perfctr_nmi_owner));
+}
+
+int reserve_evntsel_nmi(unsigned int msr)
+{
+ unsigned int counter;
+
+ counter = nmi_evntsel_msr_to_bit(msr);
+ BUG_ON(counter > NMI_MAX_COUNTER_BITS);
+
+ if (!test_and_set_bit(counter, &__get_cpu_var(evntsel_nmi_owner)[0]))
+ return 1;
+ return 0;
+}
+
+void release_evntsel_nmi(unsigned int msr)
+{
+ unsigned int counter;
+
+ counter = nmi_evntsel_msr_to_bit(msr);
+ BUG_ON(counter > NMI_MAX_COUNTER_BITS);
+
+ clear_bit(counter, &__get_cpu_var(evntsel_nmi_owner)[0]);
+}
+
#ifdef CONFIG_SMP
/* The performance counters used by NMI_LOCAL_APIC don't trigger when
* the CPU is idle. To make sure the NMI watchdog really ticks on all
@@ -344,14 +457,6 @@ late_initcall(init_lapic_nmi_sysfs);
* Original code written by Keith Owens.
*/
-static void clear_msr_range(unsigned int base, unsigned int n)
-{
- unsigned int i;
-
- for(i = 0; i < n; ++i)
- wrmsr(base+i, 0, 0);
-}
-
static void write_watchdog_counter(const char *descr)
{
u64 count = (u64)cpu_khz * 1000;
@@ -362,14 +467,19 @@ static void write_watchdog_counter(const
wrmsrl(nmi_perfctr_msr, 0 - count);
}
-static void setup_k7_watchdog(void)
+static int setup_k7_watchdog(void)
{
unsigned int evntsel;
nmi_perfctr_msr = MSR_K7_PERFCTR0;
- clear_msr_range(MSR_K7_EVNTSEL0, 4);
- clear_msr_range(MSR_K7_PERFCTR0, 4);
+ if (!reserve_perfctr_nmi(nmi_perfctr_msr))
+ goto fail;
+
+ if (!reserve_evntsel_nmi(MSR_K7_EVNTSEL0))
+ goto fail1;
+
+ wrmsrl(MSR_K7_PERFCTR0, 0UL);
evntsel = K7_EVNTSEL_INT
| K7_EVNTSEL_OS
@@ -381,16 +491,24 @@ static void setup_k7_watchdog(void)
apic_write(APIC_LVTPC, APIC_DM_NMI);
evntsel |= K7_EVNTSEL_ENABLE;
wrmsr(MSR_K7_EVNTSEL0, evntsel, 0);
+ return 1;
+fail1:
+ release_perfctr_nmi(nmi_perfctr_msr);
+fail:
+ return 0;
}
-static void setup_p6_watchdog(void)
+static int setup_p6_watchdog(void)
{
unsigned int evntsel;
nmi_perfctr_msr = MSR_P6_PERFCTR0;
- clear_msr_range(MSR_P6_EVNTSEL0, 2);
- clear_msr_range(MSR_P6_PERFCTR0, 2);
+ if (!reserve_perfctr_nmi(nmi_perfctr_msr))
+ goto fail;
+
+ if (!reserve_evntsel_nmi(MSR_P6_EVNTSEL0))
+ goto fail1;
evntsel = P6_EVNTSEL_INT
| P6_EVNTSEL_OS
@@ -402,6 +520,11 @@ static void setup_p6_watchdog(void)
apic_write(APIC_LVTPC, APIC_DM_NMI);
evntsel |= P6_EVNTSEL0_ENABLE;
wrmsr(MSR_P6_EVNTSEL0, evntsel, 0);
+ return 1;
+fail1:
+ release_perfctr_nmi(nmi_perfctr_msr);
+fail:
+ return 0;
}
static int setup_p4_watchdog(void)
@@ -419,22 +542,11 @@ static int setup_p4_watchdog(void)
nmi_p4_cccr_val |= P4_CCCR_OVF_PMI1;
#endif
- if (!(misc_enable & MSR_P4_MISC_ENABLE_PEBS_UNAVAIL))
- clear_msr_range(0x3F1, 2);
- /* MSR 0x3F0 seems to have a default value of 0xFC00, but current
- docs doesn't fully define it, so leave it alone for now. */
- if (boot_cpu_data.x86_model >= 0x3) {
- /* MSR_P4_IQ_ESCR0/1 (0x3ba/0x3bb) removed */
- clear_msr_range(0x3A0, 26);
- clear_msr_range(0x3BC, 3);
- } else {
- clear_msr_range(0x3A0, 31);
- }
- clear_msr_range(0x3C0, 6);
- clear_msr_range(0x3C8, 6);
- clear_msr_range(0x3E0, 2);
- clear_msr_range(MSR_P4_CCCR0, 18);
- clear_msr_range(MSR_P4_PERFCTR0, 18);
+ if (!reserve_perfctr_nmi(nmi_perfctr_msr))
+ goto fail;
+
+ if (!reserve_evntsel_nmi(MSR_P4_CRU_ESCR0))
+ goto fail1;
wrmsr(MSR_P4_CRU_ESCR0, P4_NMI_CRU_ESCR0, 0);
wrmsr(MSR_P4_IQ_CCCR0, P4_NMI_IQ_CCCR0 & ~P4_CCCR_ENABLE, 0);
@@ -442,6 +554,10 @@ static int setup_p4_watchdog(void)
apic_write(APIC_LVTPC, APIC_DM_NMI);
wrmsr(MSR_P4_IQ_CCCR0, nmi_p4_cccr_val, 0);
return 1;
+fail1:
+ release_perfctr_nmi(nmi_perfctr_msr);
+fail:
+ return 0;
}
void setup_apic_nmi_watchdog (void)
@@ -450,7 +566,8 @@ void setup_apic_nmi_watchdog (void)
case X86_VENDOR_AMD:
if (boot_cpu_data.x86 != 6 && boot_cpu_data.x86 != 15)
return;
- setup_k7_watchdog();
+ if (!setup_k7_watchdog())
+ return;
break;
case X86_VENDOR_INTEL:
switch (boot_cpu_data.x86) {
@@ -458,7 +575,8 @@ void setup_apic_nmi_watchdog (void)
if (boot_cpu_data.x86_model > 0xd)
return;
- setup_p6_watchdog();
+ if(!setup_p6_watchdog())
+ return;
break;
case 15:
if (boot_cpu_data.x86_model > 0x4)
@@ -612,6 +730,12 @@ int proc_unknown_nmi_panic(ctl_table *ta
EXPORT_SYMBOL(nmi_active);
EXPORT_SYMBOL(nmi_watchdog);
+EXPORT_SYMBOL(avail_to_resrv_perfctr_nmi);
+EXPORT_SYMBOL(avail_to_resrv_perfctr_nmi_bit);
+EXPORT_SYMBOL(reserve_perfctr_nmi);
+EXPORT_SYMBOL(release_perfctr_nmi);
+EXPORT_SYMBOL(reserve_evntsel_nmi);
+EXPORT_SYMBOL(release_evntsel_nmi);
EXPORT_SYMBOL(reserve_lapic_nmi);
EXPORT_SYMBOL(release_lapic_nmi);
EXPORT_SYMBOL(disable_timer_nmi_watchdog);
Index: linux/include/asm-i386/nmi.h
===================================================================
--- linux.orig/include/asm-i386/nmi.h
+++ linux/include/asm-i386/nmi.h
@@ -25,6 +25,13 @@ void set_nmi_callback(nmi_callback_t cal
*/
void unset_nmi_callback(void);
+extern int avail_to_resrv_perfctr_nmi_bit(unsigned int);
+extern int avail_to_resrv_perfctr_nmi(unsigned int);
+extern int reserve_perfctr_nmi(unsigned int);
+extern void release_perfctr_nmi(unsigned int);
+extern int reserve_evntsel_nmi(unsigned int);
+extern void release_evntsel_nmi(unsigned int);
+
extern void setup_apic_nmi_watchdog (void);
extern int reserve_lapic_nmi(void);
extern void release_lapic_nmi(void);
^ permalink raw reply [flat|nested] 199+ messages in thread* [PATCH for review] [6/145] x86_64: Utilize performance counter reservation framework in oprofile
[not found] <20060810 935.775038000@suse.de>
` (4 preceding siblings ...)
2006-08-10 19:35 ` [PATCH for review] [5/145] x86_64: Add performance counter reservation framework for UP kernels Andi Kleen
@ 2006-08-10 19:35 ` Andi Kleen
2006-08-10 19:35 ` [PATCH for review] [7/145] x86_64: Add SMP support on x86_64 to reservation framework Andi Kleen
` (139 subsequent siblings)
145 siblings, 0 replies; 199+ messages in thread
From: Andi Kleen @ 2006-08-10 19:35 UTC (permalink / raw)
r
From: dzickus <dzickus@redhat.com>
Incorporates the new performance counter reservation system in oprofile.
Also cleans up a lot of the initialization code. The code original zero'd
out every register associated with performance counters regardless if those
registers were used or not. This causes issues with the nmi watchdog.
Now oprofile tries to reserve registers and gives up if it can't get them.
Cc: levon@movementarian.org
Cc: oprofile-list@lists.sf.net
Signed-off-by: Don Zickus <dzickus@redhat.com
Signed-off-by: Andi Kleen <ak@suse.de>
---
arch/i386/oprofile/nmi_int.c | 41 ++++++---
arch/i386/oprofile/op_model_athlon.c | 54 +++++++++---
arch/i386/oprofile/op_model_p4.c | 152 +++++++++++++++++------------------
arch/i386/oprofile/op_model_ppro.c | 65 ++++++++++++--
arch/i386/oprofile/op_x86_model.h | 1
5 files changed, 199 insertions(+), 114 deletions(-)
Index: linux/arch/i386/oprofile/nmi_int.c
===================================================================
--- linux.orig/arch/i386/oprofile/nmi_int.c
+++ linux/arch/i386/oprofile/nmi_int.c
@@ -98,15 +98,19 @@ static void nmi_cpu_save_registers(struc
unsigned int i;
for (i = 0; i < nr_ctrs; ++i) {
- rdmsr(counters[i].addr,
- counters[i].saved.low,
- counters[i].saved.high);
+ if (counters[i].addr){
+ rdmsr(counters[i].addr,
+ counters[i].saved.low,
+ counters[i].saved.high);
+ }
}
for (i = 0; i < nr_ctrls; ++i) {
- rdmsr(controls[i].addr,
- controls[i].saved.low,
- controls[i].saved.high);
+ if (controls[i].addr){
+ rdmsr(controls[i].addr,
+ controls[i].saved.low,
+ controls[i].saved.high);
+ }
}
}
@@ -205,15 +209,19 @@ static void nmi_restore_registers(struct
unsigned int i;
for (i = 0; i < nr_ctrls; ++i) {
- wrmsr(controls[i].addr,
- controls[i].saved.low,
- controls[i].saved.high);
+ if (controls[i].addr){
+ wrmsr(controls[i].addr,
+ controls[i].saved.low,
+ controls[i].saved.high);
+ }
}
for (i = 0; i < nr_ctrs; ++i) {
- wrmsr(counters[i].addr,
- counters[i].saved.low,
- counters[i].saved.high);
+ if (counters[i].addr){
+ wrmsr(counters[i].addr,
+ counters[i].saved.low,
+ counters[i].saved.high);
+ }
}
}
@@ -234,6 +242,7 @@ static void nmi_cpu_shutdown(void * dumm
apic_write(APIC_LVTPC, saved_lvtpc[cpu]);
apic_write(APIC_LVTERR, v);
nmi_restore_registers(msrs);
+ model->shutdown(msrs);
}
@@ -284,6 +293,14 @@ static int nmi_create_files(struct super
struct dentry * dir;
char buf[4];
+ /* quick little hack to _not_ expose a counter if it is not
+ * available for use. This should protect userspace app.
+ * NOTE: assumes 1:1 mapping here (that counters are organized
+ * sequentially in their struct assignment).
+ */
+ if (unlikely(!avail_to_resrv_perfctr_nmi_bit(i)))
+ continue;
+
snprintf(buf, sizeof(buf), "%d", i);
dir = oprofilefs_mkdir(sb, root, buf);
oprofilefs_create_ulong(sb, dir, "enabled", &counter_config[i].enabled);
Index: linux/arch/i386/oprofile/op_model_athlon.c
===================================================================
--- linux.orig/arch/i386/oprofile/op_model_athlon.c
+++ linux/arch/i386/oprofile/op_model_athlon.c
@@ -21,10 +21,12 @@
#define NUM_COUNTERS 4
#define NUM_CONTROLS 4
+#define CTR_IS_RESERVED(msrs,c) (msrs->counters[(c)].addr ? 1 : 0)
#define CTR_READ(l,h,msrs,c) do {rdmsr(msrs->counters[(c)].addr, (l), (h));} while (0)
#define CTR_WRITE(l,msrs,c) do {wrmsr(msrs->counters[(c)].addr, -(unsigned int)(l), -1);} while (0)
#define CTR_OVERFLOWED(n) (!((n) & (1U<<31)))
+#define CTRL_IS_RESERVED(msrs,c) (msrs->controls[(c)].addr ? 1 : 0)
#define CTRL_READ(l,h,msrs,c) do {rdmsr(msrs->controls[(c)].addr, (l), (h));} while (0)
#define CTRL_WRITE(l,h,msrs,c) do {wrmsr(msrs->controls[(c)].addr, (l), (h));} while (0)
#define CTRL_SET_ACTIVE(n) (n |= (1<<22))
@@ -40,15 +42,21 @@ static unsigned long reset_value[NUM_COU
static void athlon_fill_in_addresses(struct op_msrs * const msrs)
{
- msrs->counters[0].addr = MSR_K7_PERFCTR0;
- msrs->counters[1].addr = MSR_K7_PERFCTR1;
- msrs->counters[2].addr = MSR_K7_PERFCTR2;
- msrs->counters[3].addr = MSR_K7_PERFCTR3;
-
- msrs->controls[0].addr = MSR_K7_EVNTSEL0;
- msrs->controls[1].addr = MSR_K7_EVNTSEL1;
- msrs->controls[2].addr = MSR_K7_EVNTSEL2;
- msrs->controls[3].addr = MSR_K7_EVNTSEL3;
+ int i;
+
+ for (i=0; i < NUM_COUNTERS; i++) {
+ if (reserve_perfctr_nmi(MSR_K7_PERFCTR0 + i))
+ msrs->counters[i].addr = MSR_K7_PERFCTR0 + i;
+ else
+ msrs->counters[i].addr = 0;
+ }
+
+ for (i=0; i < NUM_CONTROLS; i++) {
+ if (reserve_evntsel_nmi(MSR_K7_EVNTSEL0 + i))
+ msrs->controls[i].addr = MSR_K7_EVNTSEL0 + i;
+ else
+ msrs->controls[i].addr = 0;
+ }
}
@@ -59,19 +67,23 @@ static void athlon_setup_ctrs(struct op_
/* clear all counters */
for (i = 0 ; i < NUM_CONTROLS; ++i) {
+ if (unlikely(!CTRL_IS_RESERVED(msrs,i)))
+ continue;
CTRL_READ(low, high, msrs, i);
CTRL_CLEAR(low);
CTRL_WRITE(low, high, msrs, i);
}
-
+
/* avoid a false detection of ctr overflows in NMI handler */
for (i = 0; i < NUM_COUNTERS; ++i) {
+ if (unlikely(!CTR_IS_RESERVED(msrs,i)))
+ continue;
CTR_WRITE(1, msrs, i);
}
/* enable active counters */
for (i = 0; i < NUM_COUNTERS; ++i) {
- if (counter_config[i].enabled) {
+ if ((counter_config[i].enabled) && (CTR_IS_RESERVED(msrs,i))) {
reset_value[i] = counter_config[i].count;
CTR_WRITE(counter_config[i].count, msrs, i);
@@ -98,6 +110,8 @@ static int athlon_check_ctrs(struct pt_r
int i;
for (i = 0 ; i < NUM_COUNTERS; ++i) {
+ if (!reset_value[i])
+ continue;
CTR_READ(low, high, msrs, i);
if (CTR_OVERFLOWED(low)) {
oprofile_add_sample(regs, i);
@@ -132,12 +146,27 @@ static void athlon_stop(struct op_msrs c
/* Subtle: stop on all counters to avoid race with
* setting our pm callback */
for (i = 0 ; i < NUM_COUNTERS ; ++i) {
+ if (!reset_value[i])
+ continue;
CTRL_READ(low, high, msrs, i);
CTRL_SET_INACTIVE(low);
CTRL_WRITE(low, high, msrs, i);
}
}
+static void athlon_shutdown(struct op_msrs const * const msrs)
+{
+ int i;
+
+ for (i = 0 ; i < NUM_COUNTERS ; ++i) {
+ if (CTR_IS_RESERVED(msrs,i))
+ release_perfctr_nmi(MSR_K7_PERFCTR0 + i);
+ }
+ for (i = 0 ; i < NUM_CONTROLS ; ++i) {
+ if (CTRL_IS_RESERVED(msrs,i))
+ release_evntsel_nmi(MSR_K7_EVNTSEL0 + i);
+ }
+}
struct op_x86_model_spec const op_athlon_spec = {
.num_counters = NUM_COUNTERS,
@@ -146,5 +175,6 @@ struct op_x86_model_spec const op_athlon
.setup_ctrs = &athlon_setup_ctrs,
.check_ctrs = &athlon_check_ctrs,
.start = &athlon_start,
- .stop = &athlon_stop
+ .stop = &athlon_stop,
+ .shutdown = &athlon_shutdown
};
Index: linux/arch/i386/oprofile/op_model_p4.c
===================================================================
--- linux.orig/arch/i386/oprofile/op_model_p4.c
+++ linux/arch/i386/oprofile/op_model_p4.c
@@ -32,7 +32,7 @@
#define NUM_CONTROLS_HT2 (NUM_ESCRS_HT2 + NUM_CCCRS_HT2)
static unsigned int num_counters = NUM_COUNTERS_NON_HT;
-
+static unsigned int num_controls = NUM_CONTROLS_NON_HT;
/* this has to be checked dynamically since the
hyper-threadedness of a chip is discovered at
@@ -40,8 +40,10 @@ static unsigned int num_counters = NUM_C
static inline void setup_num_counters(void)
{
#ifdef CONFIG_SMP
- if (smp_num_siblings == 2)
+ if (smp_num_siblings == 2){
num_counters = NUM_COUNTERS_HT2;
+ num_controls = NUM_CONTROLS_HT2;
+ }
#endif
}
@@ -97,15 +99,6 @@ static struct p4_counter_binding p4_coun
#define NUM_UNUSED_CCCRS NUM_CCCRS_NON_HT - NUM_COUNTERS_NON_HT
-/* All cccr we don't use. */
-static int p4_unused_cccr[NUM_UNUSED_CCCRS] = {
- MSR_P4_BPU_CCCR1, MSR_P4_BPU_CCCR3,
- MSR_P4_MS_CCCR1, MSR_P4_MS_CCCR3,
- MSR_P4_FLAME_CCCR1, MSR_P4_FLAME_CCCR3,
- MSR_P4_IQ_CCCR0, MSR_P4_IQ_CCCR1,
- MSR_P4_IQ_CCCR2, MSR_P4_IQ_CCCR3
-};
-
/* p4 event codes in libop/op_event.h are indices into this table. */
static struct p4_event_binding p4_events[NUM_EVENTS] = {
@@ -372,6 +365,8 @@ static struct p4_event_binding p4_events
#define CCCR_OVF_P(cccr) ((cccr) & (1U<<31))
#define CCCR_CLEAR_OVF(cccr) ((cccr) &= (~(1U<<31)))
+#define CTRL_IS_RESERVED(msrs,c) (msrs->controls[(c)].addr ? 1 : 0)
+#define CTR_IS_RESERVED(msrs,c) (msrs->counters[(c)].addr ? 1 : 0)
#define CTR_READ(l,h,i) do {rdmsr(p4_counters[(i)].counter_address, (l), (h));} while (0)
#define CTR_WRITE(l,i) do {wrmsr(p4_counters[(i)].counter_address, -(u32)(l), -1);} while (0)
#define CTR_OVERFLOW_P(ctr) (!((ctr) & 0x80000000))
@@ -401,29 +396,34 @@ static unsigned long reset_value[NUM_COU
static void p4_fill_in_addresses(struct op_msrs * const msrs)
{
unsigned int i;
- unsigned int addr, stag;
+ unsigned int addr, cccraddr, stag;
setup_num_counters();
stag = get_stagger();
- /* the counter registers we pay attention to */
+ /* initialize some registers */
for (i = 0; i < num_counters; ++i) {
- msrs->counters[i].addr =
- p4_counters[VIRT_CTR(stag, i)].counter_address;
+ msrs->counters[i].addr = 0;
}
-
- /* FIXME: bad feeling, we don't save the 10 counters we don't use. */
-
- /* 18 CCCR registers */
- for (i = 0, addr = MSR_P4_BPU_CCCR0 + stag;
- addr <= MSR_P4_IQ_CCCR5; ++i, addr += addr_increment()) {
- msrs->controls[i].addr = addr;
+ for (i = 0; i < num_controls; ++i) {
+ msrs->controls[i].addr = 0;
}
+ /* the counter & cccr registers we pay attention to */
+ for (i = 0; i < num_counters; ++i) {
+ addr = p4_counters[VIRT_CTR(stag, i)].counter_address;
+ cccraddr = p4_counters[VIRT_CTR(stag, i)].cccr_address;
+ if (reserve_perfctr_nmi(addr)){
+ msrs->counters[i].addr = addr;
+ msrs->controls[i].addr = cccraddr;
+ }
+ }
+
/* 43 ESCR registers in three or four discontiguous group */
for (addr = MSR_P4_BSU_ESCR0 + stag;
addr < MSR_P4_IQ_ESCR0; ++i, addr += addr_increment()) {
- msrs->controls[i].addr = addr;
+ if (reserve_evntsel_nmi(addr))
+ msrs->controls[i].addr = addr;
}
/* no IQ_ESCR0/1 on some models, we save a seconde time BSU_ESCR0/1
@@ -431,47 +431,57 @@ static void p4_fill_in_addresses(struct
if (boot_cpu_data.x86_model >= 0x3) {
for (addr = MSR_P4_BSU_ESCR0 + stag;
addr <= MSR_P4_BSU_ESCR1; ++i, addr += addr_increment()) {
- msrs->controls[i].addr = addr;
+ if (reserve_evntsel_nmi(addr))
+ msrs->controls[i].addr = addr;
}
} else {
for (addr = MSR_P4_IQ_ESCR0 + stag;
addr <= MSR_P4_IQ_ESCR1; ++i, addr += addr_increment()) {
- msrs->controls[i].addr = addr;
+ if (reserve_evntsel_nmi(addr))
+ msrs->controls[i].addr = addr;
}
}
for (addr = MSR_P4_RAT_ESCR0 + stag;
addr <= MSR_P4_SSU_ESCR0; ++i, addr += addr_increment()) {
- msrs->controls[i].addr = addr;
+ if (reserve_evntsel_nmi(addr))
+ msrs->controls[i].addr = addr;
}
for (addr = MSR_P4_MS_ESCR0 + stag;
addr <= MSR_P4_TC_ESCR1; ++i, addr += addr_increment()) {
- msrs->controls[i].addr = addr;
+ if (reserve_evntsel_nmi(addr))
+ msrs->controls[i].addr = addr;
}
for (addr = MSR_P4_IX_ESCR0 + stag;
addr <= MSR_P4_CRU_ESCR3; ++i, addr += addr_increment()) {
- msrs->controls[i].addr = addr;
+ if (reserve_evntsel_nmi(addr))
+ msrs->controls[i].addr = addr;
}
/* there are 2 remaining non-contiguously located ESCRs */
if (num_counters == NUM_COUNTERS_NON_HT) {
/* standard non-HT CPUs handle both remaining ESCRs*/
- msrs->controls[i++].addr = MSR_P4_CRU_ESCR5;
- msrs->controls[i++].addr = MSR_P4_CRU_ESCR4;
+ if (reserve_evntsel_nmi(MSR_P4_CRU_ESCR5))
+ msrs->controls[i++].addr = MSR_P4_CRU_ESCR5;
+ if (reserve_evntsel_nmi(MSR_P4_CRU_ESCR4))
+ msrs->controls[i++].addr = MSR_P4_CRU_ESCR4;
} else if (stag == 0) {
/* HT CPUs give the first remainder to the even thread, as
the 32nd control register */
- msrs->controls[i++].addr = MSR_P4_CRU_ESCR4;
+ if (reserve_evntsel_nmi(MSR_P4_CRU_ESCR4))
+ msrs->controls[i++].addr = MSR_P4_CRU_ESCR4;
} else {
/* and two copies of the second to the odd thread,
for the 22st and 23nd control registers */
- msrs->controls[i++].addr = MSR_P4_CRU_ESCR5;
- msrs->controls[i++].addr = MSR_P4_CRU_ESCR5;
+ if (reserve_evntsel_nmi(MSR_P4_CRU_ESCR5)) {
+ msrs->controls[i++].addr = MSR_P4_CRU_ESCR5;
+ msrs->controls[i++].addr = MSR_P4_CRU_ESCR5;
+ }
}
}
@@ -544,7 +554,6 @@ static void p4_setup_ctrs(struct op_msrs
{
unsigned int i;
unsigned int low, high;
- unsigned int addr;
unsigned int stag;
stag = get_stagger();
@@ -557,59 +566,24 @@ static void p4_setup_ctrs(struct op_msrs
/* clear the cccrs we will use */
for (i = 0 ; i < num_counters ; i++) {
+ if (unlikely(!CTRL_IS_RESERVED(msrs,i)))
+ continue;
rdmsr(p4_counters[VIRT_CTR(stag, i)].cccr_address, low, high);
CCCR_CLEAR(low);
CCCR_SET_REQUIRED_BITS(low);
wrmsr(p4_counters[VIRT_CTR(stag, i)].cccr_address, low, high);
}
- /* clear cccrs outside our concern */
- for (i = stag ; i < NUM_UNUSED_CCCRS ; i += addr_increment()) {
- rdmsr(p4_unused_cccr[i], low, high);
- CCCR_CLEAR(low);
- CCCR_SET_REQUIRED_BITS(low);
- wrmsr(p4_unused_cccr[i], low, high);
- }
-
/* clear all escrs (including those outside our concern) */
- for (addr = MSR_P4_BSU_ESCR0 + stag;
- addr < MSR_P4_IQ_ESCR0; addr += addr_increment()) {
- wrmsr(addr, 0, 0);
- }
-
- /* On older models clear also MSR_P4_IQ_ESCR0/1 */
- if (boot_cpu_data.x86_model < 0x3) {
- wrmsr(MSR_P4_IQ_ESCR0, 0, 0);
- wrmsr(MSR_P4_IQ_ESCR1, 0, 0);
- }
-
- for (addr = MSR_P4_RAT_ESCR0 + stag;
- addr <= MSR_P4_SSU_ESCR0; ++i, addr += addr_increment()) {
- wrmsr(addr, 0, 0);
- }
-
- for (addr = MSR_P4_MS_ESCR0 + stag;
- addr <= MSR_P4_TC_ESCR1; addr += addr_increment()){
- wrmsr(addr, 0, 0);
- }
-
- for (addr = MSR_P4_IX_ESCR0 + stag;
- addr <= MSR_P4_CRU_ESCR3; addr += addr_increment()){
- wrmsr(addr, 0, 0);
+ for (i = num_counters; i < num_controls; i++) {
+ if (unlikely(!CTRL_IS_RESERVED(msrs,i)))
+ continue;
+ wrmsr(msrs->controls[i].addr, 0, 0);
}
- if (num_counters == NUM_COUNTERS_NON_HT) {
- wrmsr(MSR_P4_CRU_ESCR4, 0, 0);
- wrmsr(MSR_P4_CRU_ESCR5, 0, 0);
- } else if (stag == 0) {
- wrmsr(MSR_P4_CRU_ESCR4, 0, 0);
- } else {
- wrmsr(MSR_P4_CRU_ESCR5, 0, 0);
- }
-
/* setup all counters */
for (i = 0 ; i < num_counters ; ++i) {
- if (counter_config[i].enabled) {
+ if ((counter_config[i].enabled) && (CTRL_IS_RESERVED(msrs,i))) {
reset_value[i] = counter_config[i].count;
pmc_setup_one_p4_counter(i);
CTR_WRITE(counter_config[i].count, VIRT_CTR(stag, i));
@@ -696,12 +670,32 @@ static void p4_stop(struct op_msrs const
stag = get_stagger();
for (i = 0; i < num_counters; ++i) {
+ if (!reset_value[i])
+ continue;
CCCR_READ(low, high, VIRT_CTR(stag, i));
CCCR_SET_DISABLE(low);
CCCR_WRITE(low, high, VIRT_CTR(stag, i));
}
}
+static void p4_shutdown(struct op_msrs const * const msrs)
+{
+ int i;
+
+ for (i = 0 ; i < num_counters ; ++i) {
+ if (CTR_IS_RESERVED(msrs,i))
+ release_perfctr_nmi(msrs->counters[i].addr);
+ }
+ /* some of the control registers are specially reserved in
+ * conjunction with the counter registers (hence the starting offset).
+ * This saves a few bits.
+ */
+ for (i = num_counters ; i < num_controls ; ++i) {
+ if (CTRL_IS_RESERVED(msrs,i))
+ release_evntsel_nmi(msrs->controls[i].addr);
+ }
+}
+
#ifdef CONFIG_SMP
struct op_x86_model_spec const op_p4_ht2_spec = {
@@ -711,7 +705,8 @@ struct op_x86_model_spec const op_p4_ht2
.setup_ctrs = &p4_setup_ctrs,
.check_ctrs = &p4_check_ctrs,
.start = &p4_start,
- .stop = &p4_stop
+ .stop = &p4_stop,
+ .shutdown = &p4_shutdown
};
#endif
@@ -722,5 +717,6 @@ struct op_x86_model_spec const op_p4_spe
.setup_ctrs = &p4_setup_ctrs,
.check_ctrs = &p4_check_ctrs,
.start = &p4_start,
- .stop = &p4_stop
+ .stop = &p4_stop,
+ .shutdown = &p4_shutdown
};
Index: linux/arch/i386/oprofile/op_model_ppro.c
===================================================================
--- linux.orig/arch/i386/oprofile/op_model_ppro.c
+++ linux/arch/i386/oprofile/op_model_ppro.c
@@ -22,10 +22,12 @@
#define NUM_COUNTERS 2
#define NUM_CONTROLS 2
+#define CTR_IS_RESERVED(msrs,c) (msrs->counters[(c)].addr ? 1 : 0)
#define CTR_READ(l,h,msrs,c) do {rdmsr(msrs->counters[(c)].addr, (l), (h));} while (0)
#define CTR_WRITE(l,msrs,c) do {wrmsr(msrs->counters[(c)].addr, -(u32)(l), -1);} while (0)
#define CTR_OVERFLOWED(n) (!((n) & (1U<<31)))
+#define CTRL_IS_RESERVED(msrs,c) (msrs->controls[(c)].addr ? 1 : 0)
#define CTRL_READ(l,h,msrs,c) do {rdmsr((msrs->controls[(c)].addr), (l), (h));} while (0)
#define CTRL_WRITE(l,h,msrs,c) do {wrmsr((msrs->controls[(c)].addr), (l), (h));} while (0)
#define CTRL_SET_ACTIVE(n) (n |= (1<<22))
@@ -41,11 +43,21 @@ static unsigned long reset_value[NUM_COU
static void ppro_fill_in_addresses(struct op_msrs * const msrs)
{
- msrs->counters[0].addr = MSR_P6_PERFCTR0;
- msrs->counters[1].addr = MSR_P6_PERFCTR1;
+ int i;
+
+ for (i=0; i < NUM_COUNTERS; i++) {
+ if (reserve_perfctr_nmi(MSR_P6_PERFCTR0 + i))
+ msrs->counters[i].addr = MSR_P6_PERFCTR0 + i;
+ else
+ msrs->counters[i].addr = 0;
+ }
- msrs->controls[0].addr = MSR_P6_EVNTSEL0;
- msrs->controls[1].addr = MSR_P6_EVNTSEL1;
+ for (i=0; i < NUM_CONTROLS; i++) {
+ if (reserve_evntsel_nmi(MSR_P6_EVNTSEL0 + i))
+ msrs->controls[i].addr = MSR_P6_EVNTSEL0 + i;
+ else
+ msrs->controls[i].addr = 0;
+ }
}
@@ -56,6 +68,8 @@ static void ppro_setup_ctrs(struct op_ms
/* clear all counters */
for (i = 0 ; i < NUM_CONTROLS; ++i) {
+ if (unlikely(!CTRL_IS_RESERVED(msrs,i)))
+ continue;
CTRL_READ(low, high, msrs, i);
CTRL_CLEAR(low);
CTRL_WRITE(low, high, msrs, i);
@@ -63,12 +77,14 @@ static void ppro_setup_ctrs(struct op_ms
/* avoid a false detection of ctr overflows in NMI handler */
for (i = 0; i < NUM_COUNTERS; ++i) {
+ if (unlikely(!CTR_IS_RESERVED(msrs,i)))
+ continue;
CTR_WRITE(1, msrs, i);
}
/* enable active counters */
for (i = 0; i < NUM_COUNTERS; ++i) {
- if (counter_config[i].enabled) {
+ if ((counter_config[i].enabled) && (CTR_IS_RESERVED(msrs,i))) {
reset_value[i] = counter_config[i].count;
CTR_WRITE(counter_config[i].count, msrs, i);
@@ -81,6 +97,8 @@ static void ppro_setup_ctrs(struct op_ms
CTRL_SET_UM(low, counter_config[i].unit_mask);
CTRL_SET_EVENT(low, counter_config[i].event);
CTRL_WRITE(low, high, msrs, i);
+ } else {
+ reset_value[i] = 0;
}
}
}
@@ -93,6 +111,8 @@ static int ppro_check_ctrs(struct pt_reg
int i;
for (i = 0 ; i < NUM_COUNTERS; ++i) {
+ if (!reset_value[i])
+ continue;
CTR_READ(low, high, msrs, i);
if (CTR_OVERFLOWED(low)) {
oprofile_add_sample(regs, i);
@@ -118,18 +138,38 @@ static int ppro_check_ctrs(struct pt_reg
static void ppro_start(struct op_msrs const * const msrs)
{
unsigned int low,high;
- CTRL_READ(low, high, msrs, 0);
- CTRL_SET_ACTIVE(low);
- CTRL_WRITE(low, high, msrs, 0);
+
+ if (reset_value[0]) {
+ CTRL_READ(low, high, msrs, 0);
+ CTRL_SET_ACTIVE(low);
+ CTRL_WRITE(low, high, msrs, 0);
+ }
}
static void ppro_stop(struct op_msrs const * const msrs)
{
unsigned int low,high;
- CTRL_READ(low, high, msrs, 0);
- CTRL_SET_INACTIVE(low);
- CTRL_WRITE(low, high, msrs, 0);
+
+ if (reset_value[0]) {
+ CTRL_READ(low, high, msrs, 0);
+ CTRL_SET_INACTIVE(low);
+ CTRL_WRITE(low, high, msrs, 0);
+ }
+}
+
+static void ppro_shutdown(struct op_msrs const * const msrs)
+{
+ int i;
+
+ for (i = 0 ; i < NUM_COUNTERS ; ++i) {
+ if (CTR_IS_RESERVED(msrs,i))
+ release_perfctr_nmi(MSR_P6_PERFCTR0 + i);
+ }
+ for (i = 0 ; i < NUM_CONTROLS ; ++i) {
+ if (CTRL_IS_RESERVED(msrs,i))
+ release_evntsel_nmi(MSR_P6_EVNTSEL0 + i);
+ }
}
@@ -140,5 +180,6 @@ struct op_x86_model_spec const op_ppro_s
.setup_ctrs = &ppro_setup_ctrs,
.check_ctrs = &ppro_check_ctrs,
.start = &ppro_start,
- .stop = &ppro_stop
+ .stop = &ppro_stop,
+ .shutdown = &ppro_shutdown
};
Index: linux/arch/i386/oprofile/op_x86_model.h
===================================================================
--- linux.orig/arch/i386/oprofile/op_x86_model.h
+++ linux/arch/i386/oprofile/op_x86_model.h
@@ -40,6 +40,7 @@ struct op_x86_model_spec {
struct op_msrs const * const msrs);
void (*start)(struct op_msrs const * const msrs);
void (*stop)(struct op_msrs const * const msrs);
+ void (*shutdown)(struct op_msrs const * const msrs);
};
extern struct op_x86_model_spec const op_ppro_spec;
^ permalink raw reply [flat|nested] 199+ messages in thread* [PATCH for review] [7/145] x86_64: Add SMP support on x86_64 to reservation framework
[not found] <20060810 935.775038000@suse.de>
` (5 preceding siblings ...)
2006-08-10 19:35 ` [PATCH for review] [6/145] x86_64: Utilize performance counter reservation framework in oprofile Andi Kleen
@ 2006-08-10 19:35 ` Andi Kleen
2006-08-10 19:35 ` [PATCH for review] [8/145] x86_64: Add SMP support on i386 " Andi Kleen
` (138 subsequent siblings)
145 siblings, 0 replies; 199+ messages in thread
From: Andi Kleen @ 2006-08-10 19:35 UTC (permalink / raw)
r
From: dzickus <dzickus@redhat.com>
This patch includes the changes to make the nmi watchdog on x86_64 SMP
aware. A bunch of code was moved around to make it simpler to read. In
addition, it is now possible to determine if a particular NMI was the result
of the watchdog or not. This feature allows the kernel to filter out
unknown NMIs easier.
Signed-off-by: Don Zickus <dzickus@redhat.com>
Signed-off-by: Andi Kleen <ak@suse.de>
---
arch/x86_64/kernel/apic.c | 3
arch/x86_64/kernel/nmi.c | 426 ++++++++++++++++++++++++++++++----------------
include/asm-x86_64/nmi.h | 3
3 files changed, 290 insertions(+), 142 deletions(-)
Index: linux/arch/x86_64/kernel/apic.c
===================================================================
--- linux.orig/arch/x86_64/kernel/apic.c
+++ linux/arch/x86_64/kernel/apic.c
@@ -479,8 +479,7 @@ void __cpuinit setup_local_APIC (void)
}
nmi_watchdog_default();
- if (nmi_watchdog == NMI_LOCAL_APIC)
- setup_apic_nmi_watchdog();
+ setup_apic_nmi_watchdog(NULL);
apic_pm_activate();
}
Index: linux/arch/x86_64/kernel/nmi.c
===================================================================
--- linux.orig/arch/x86_64/kernel/nmi.c
+++ linux/arch/x86_64/kernel/nmi.c
@@ -56,53 +56,29 @@ static unsigned int lapic_nmi_owner;
#define LAPIC_NMI_RESERVED (1<<1)
/* nmi_active:
- * +1: the lapic NMI watchdog is active, but can be disabled
- * 0: the lapic NMI watchdog has not been set up, and cannot
+ * >0: the lapic NMI watchdog is active, but can be disabled
+ * <0: the lapic NMI watchdog has not been set up, and cannot
* be enabled
- * -1: the lapic NMI watchdog is disabled, but can be enabled
+ * 0: the lapic NMI watchdog is disabled, but can be enabled
*/
-int nmi_active; /* oprofile uses this */
+atomic_t nmi_active = ATOMIC_INIT(0); /* oprofile uses this */
int panic_on_timeout;
unsigned int nmi_watchdog = NMI_DEFAULT;
static unsigned int nmi_hz = HZ;
-static unsigned int nmi_perfctr_msr; /* the MSR to reset in NMI handler */
-static unsigned int nmi_p4_cccr_val;
-/* Note that these events don't tick when the CPU idles. This means
- the frequency varies with CPU load. */
-
-#define K7_EVNTSEL_ENABLE (1 << 22)
-#define K7_EVNTSEL_INT (1 << 20)
-#define K7_EVNTSEL_OS (1 << 17)
-#define K7_EVNTSEL_USR (1 << 16)
-#define K7_EVENT_CYCLES_PROCESSOR_IS_RUNNING 0x76
-#define K7_NMI_EVENT K7_EVENT_CYCLES_PROCESSOR_IS_RUNNING
+struct nmi_watchdog_ctlblk {
+ int enabled;
+ u64 check_bit;
+ unsigned int cccr_msr;
+ unsigned int perfctr_msr; /* the MSR to reset in NMI handler */
+ unsigned int evntsel_msr; /* the MSR to select the events to handle */
+};
+static DEFINE_PER_CPU(struct nmi_watchdog_ctlblk, nmi_watchdog_ctlblk);
-#define MSR_P4_MISC_ENABLE 0x1A0
-#define MSR_P4_MISC_ENABLE_PERF_AVAIL (1<<7)
-#define MSR_P4_MISC_ENABLE_PEBS_UNAVAIL (1<<12)
-#define MSR_P4_PERFCTR0 0x300
-#define MSR_P4_CCCR0 0x360
-#define P4_ESCR_EVENT_SELECT(N) ((N)<<25)
-#define P4_ESCR_OS (1<<3)
-#define P4_ESCR_USR (1<<2)
-#define P4_CCCR_OVF_PMI0 (1<<26)
-#define P4_CCCR_OVF_PMI1 (1<<27)
-#define P4_CCCR_THRESHOLD(N) ((N)<<20)
-#define P4_CCCR_COMPLEMENT (1<<19)
-#define P4_CCCR_COMPARE (1<<18)
-#define P4_CCCR_REQUIRED (3<<16)
-#define P4_CCCR_ESCR_SELECT(N) ((N)<<13)
-#define P4_CCCR_ENABLE (1<<12)
-/* Set up IQ_COUNTER0 to behave like a clock, by having IQ_CCCR0 filter
- CRU_ESCR0 (with any non-null event selector) through a complemented
- max threshold. [IA32-Vol3, Section 14.9.9] */
-#define MSR_P4_IQ_COUNTER0 0x30C
-#define P4_NMI_CRU_ESCR0 (P4_ESCR_EVENT_SELECT(0x3F)|P4_ESCR_OS|P4_ESCR_USR)
-#define P4_NMI_IQ_CCCR0 \
- (P4_CCCR_OVF_PMI0|P4_CCCR_THRESHOLD(15)|P4_CCCR_COMPLEMENT| \
- P4_CCCR_COMPARE|P4_CCCR_REQUIRED|P4_CCCR_ESCR_SELECT(4)|P4_CCCR_ENABLE)
+/* local prototypes */
+static void stop_apic_nmi_watchdog(void *unused);
+static int unknown_nmi_panic_callback(struct pt_regs *regs, int cpu);
/* converts an msr to an appropriate reservation bit */
static inline unsigned int nmi_perfctr_msr_to_bit(unsigned int msr)
@@ -241,6 +217,12 @@ int __init check_nmi_watchdog (void)
int *counts;
int cpu;
+ if ((nmi_watchdog == NMI_NONE) || (nmi_watchdog == NMI_DEFAULT))
+ return 0;
+
+ if (!atomic_read(&nmi_active))
+ return 0;
+
counts = kmalloc(NR_CPUS * sizeof(int), GFP_KERNEL);
if (!counts)
return -1;
@@ -258,19 +240,22 @@ int __init check_nmi_watchdog (void)
mdelay((10*1000)/nmi_hz); // wait 10 ticks
for_each_online_cpu(cpu) {
+ if (!per_cpu(nmi_watchdog_ctlblk, cpu).enabled)
+ continue;
if (cpu_pda(cpu)->__nmi_count - counts[cpu] <= 5) {
- endflag = 1;
printk("CPU#%d: NMI appears to be stuck (%d->%d)!\n",
cpu,
counts[cpu],
cpu_pda(cpu)->__nmi_count);
- nmi_active = 0;
- lapic_nmi_owner &= ~LAPIC_NMI_WATCHDOG;
- nmi_perfctr_msr = 0;
- kfree(counts);
- return -1;
+ per_cpu(nmi_watchdog_ctlblk, cpu).enabled = 0;
+ atomic_dec(&nmi_active);
}
}
+ if (!atomic_read(&nmi_active)) {
+ kfree(counts);
+ atomic_set(&nmi_active, -1);
+ return -1;
+ }
endflag = 1;
printk("OK.\n");
@@ -297,8 +282,11 @@ int __init setup_nmi_watchdog(char *str)
get_option(&str, &nmi);
- if (nmi >= NMI_INVALID)
+ if ((nmi >= NMI_INVALID) || (nmi < NMI_NONE))
return 0;
+
+ if ((nmi == NMI_LOCAL_APIC) && (nmi_known_cpu() == 0))
+ return 0; /* no lapic support */
nmi_watchdog = nmi;
return 1;
}
@@ -307,31 +295,30 @@ __setup("nmi_watchdog=", setup_nmi_watch
static void disable_lapic_nmi_watchdog(void)
{
- if (nmi_active <= 0)
+ BUG_ON(nmi_watchdog != NMI_LOCAL_APIC);
+
+ if (atomic_read(&nmi_active) <= 0)
return;
- switch (boot_cpu_data.x86_vendor) {
- case X86_VENDOR_AMD:
- wrmsr(MSR_K7_EVNTSEL0, 0, 0);
- break;
- case X86_VENDOR_INTEL:
- if (boot_cpu_data.x86 == 15) {
- wrmsr(MSR_P4_IQ_CCCR0, 0, 0);
- wrmsr(MSR_P4_CRU_ESCR0, 0, 0);
- }
- break;
- }
- nmi_active = -1;
- /* tell do_nmi() and others that we're not active any more */
- nmi_watchdog = 0;
+
+ on_each_cpu(stop_apic_nmi_watchdog, NULL, 0, 1);
+
+ BUG_ON(atomic_read(&nmi_active) != 0);
}
static void enable_lapic_nmi_watchdog(void)
{
- if (nmi_active < 0) {
- nmi_watchdog = NMI_LOCAL_APIC;
- touch_nmi_watchdog();
- setup_apic_nmi_watchdog();
- }
+ BUG_ON(nmi_watchdog != NMI_LOCAL_APIC);
+
+ /* are we already enabled */
+ if (atomic_read(&nmi_active) != 0)
+ return;
+
+ /* are we lapic aware */
+ if (nmi_known_cpu() <= 0)
+ return;
+
+ on_each_cpu(setup_apic_nmi_watchdog, NULL, 0, 1);
+ touch_nmi_watchdog();
}
int reserve_lapic_nmi(void)
@@ -363,21 +350,24 @@ void release_lapic_nmi(void)
void disable_timer_nmi_watchdog(void)
{
- if ((nmi_watchdog != NMI_IO_APIC) || (nmi_active <= 0))
+ BUG_ON(nmi_watchdog != NMI_IO_APIC);
+
+ if (atomic_read(&nmi_active) <= 0)
return;
disable_irq(0);
- unset_nmi_callback();
- nmi_active = -1;
- nmi_watchdog = NMI_NONE;
+ on_each_cpu(stop_apic_nmi_watchdog, NULL, 0, 1);
+
+ BUG_ON(atomic_read(&nmi_active) != 0);
}
void enable_timer_nmi_watchdog(void)
{
- if (nmi_active < 0) {
- nmi_watchdog = NMI_IO_APIC;
+ BUG_ON(nmi_watchdog != NMI_IO_APIC);
+
+ if (atomic_read(&nmi_active) == 0) {
touch_nmi_watchdog();
- nmi_active = 1;
+ on_each_cpu(setup_apic_nmi_watchdog, NULL, 0, 1);
enable_irq(0);
}
}
@@ -388,7 +378,7 @@ static int nmi_pm_active; /* nmi_active
static int lapic_nmi_suspend(struct sys_device *dev, pm_message_t state)
{
- nmi_pm_active = nmi_active;
+ nmi_pm_active = atomic_read(&nmi_active);
disable_lapic_nmi_watchdog();
return 0;
}
@@ -396,7 +386,7 @@ static int lapic_nmi_suspend(struct sys_
static int lapic_nmi_resume(struct sys_device *dev)
{
if (nmi_pm_active > 0)
- enable_lapic_nmi_watchdog();
+ enable_lapic_nmi_watchdog();
return 0;
}
@@ -415,7 +405,13 @@ static int __init init_lapic_nmi_sysfs(v
{
int error;
- if (nmi_active == 0 || nmi_watchdog != NMI_LOCAL_APIC)
+ /* should really be a BUG_ON but b/c this is an
+ * init call, it just doesn't work. -dcz
+ */
+ if (nmi_watchdog != NMI_LOCAL_APIC)
+ return 0;
+
+ if ( atomic_read(&nmi_active) < 0 )
return 0;
error = sysdev_class_register(&nmi_sysclass);
@@ -428,100 +424,232 @@ late_initcall(init_lapic_nmi_sysfs);
#endif /* CONFIG_PM */
+/*
+ * Activate the NMI watchdog via the local APIC.
+ * Original code written by Keith Owens.
+ */
+
+/* Note that these events don't tick when the CPU idles. This means
+ the frequency varies with CPU load. */
+
+#define K7_EVNTSEL_ENABLE (1 << 22)
+#define K7_EVNTSEL_INT (1 << 20)
+#define K7_EVNTSEL_OS (1 << 17)
+#define K7_EVNTSEL_USR (1 << 16)
+#define K7_EVENT_CYCLES_PROCESSOR_IS_RUNNING 0x76
+#define K7_NMI_EVENT K7_EVENT_CYCLES_PROCESSOR_IS_RUNNING
+
static int setup_k7_watchdog(void)
{
+ unsigned int perfctr_msr, evntsel_msr;
unsigned int evntsel;
+ struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk);
- nmi_perfctr_msr = MSR_K7_PERFCTR0;
-
- if (!reserve_perfctr_nmi(nmi_perfctr_msr))
+ perfctr_msr = MSR_K7_PERFCTR0;
+ evntsel_msr = MSR_K7_EVNTSEL0;
+ if (!reserve_perfctr_nmi(perfctr_msr))
goto fail;
- if (!reserve_evntsel_nmi(MSR_K7_EVNTSEL0))
+ if (!reserve_evntsel_nmi(evntsel_msr))
goto fail1;
/* Simulator may not support it */
- if (checking_wrmsrl(MSR_K7_EVNTSEL0, 0UL))
+ if (checking_wrmsrl(evntsel_msr, 0UL))
goto fail2;
- wrmsrl(MSR_K7_PERFCTR0, 0UL);
+ wrmsrl(perfctr_msr, 0UL);
evntsel = K7_EVNTSEL_INT
| K7_EVNTSEL_OS
| K7_EVNTSEL_USR
| K7_NMI_EVENT;
- wrmsr(MSR_K7_EVNTSEL0, evntsel, 0);
- wrmsrl(MSR_K7_PERFCTR0, -((u64)cpu_khz * 1000 / nmi_hz));
+ /* setup the timer */
+ wrmsr(evntsel_msr, evntsel, 0);
+ wrmsrl(perfctr_msr, -((u64)cpu_khz * 1000 / nmi_hz));
apic_write(APIC_LVTPC, APIC_DM_NMI);
evntsel |= K7_EVNTSEL_ENABLE;
- wrmsr(MSR_K7_EVNTSEL0, evntsel, 0);
+ wrmsr(evntsel_msr, evntsel, 0);
+
+ wd->perfctr_msr = perfctr_msr;
+ wd->evntsel_msr = evntsel_msr;
+ wd->cccr_msr = 0; //unused
+ wd->check_bit = 1ULL<<63;
return 1;
fail2:
- release_evntsel_nmi(MSR_K7_EVNTSEL0);
+ release_evntsel_nmi(evntsel_msr);
fail1:
- release_perfctr_nmi(nmi_perfctr_msr);
+ release_perfctr_nmi(perfctr_msr);
fail:
return 0;
}
+static void stop_k7_watchdog(void)
+{
+ struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk);
+
+ wrmsr(wd->evntsel_msr, 0, 0);
+
+ release_evntsel_nmi(wd->evntsel_msr);
+ release_perfctr_nmi(wd->perfctr_msr);
+}
+
+/* Note that these events don't tick when the CPU idles. This means
+ the frequency varies with CPU load. */
+
+#define MSR_P4_MISC_ENABLE_PERF_AVAIL (1<<7)
+#define P4_ESCR_EVENT_SELECT(N) ((N)<<25)
+#define P4_ESCR_OS (1<<3)
+#define P4_ESCR_USR (1<<2)
+#define P4_CCCR_OVF_PMI0 (1<<26)
+#define P4_CCCR_OVF_PMI1 (1<<27)
+#define P4_CCCR_THRESHOLD(N) ((N)<<20)
+#define P4_CCCR_COMPLEMENT (1<<19)
+#define P4_CCCR_COMPARE (1<<18)
+#define P4_CCCR_REQUIRED (3<<16)
+#define P4_CCCR_ESCR_SELECT(N) ((N)<<13)
+#define P4_CCCR_ENABLE (1<<12)
+#define P4_CCCR_OVF (1<<31)
+/* Set up IQ_COUNTER0 to behave like a clock, by having IQ_CCCR0 filter
+ CRU_ESCR0 (with any non-null event selector) through a complemented
+ max threshold. [IA32-Vol3, Section 14.9.9] */
static int setup_p4_watchdog(void)
{
+ unsigned int perfctr_msr, evntsel_msr, cccr_msr;
+ unsigned int evntsel, cccr_val;
unsigned int misc_enable, dummy;
+ unsigned int ht_num;
+ struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk);
- rdmsr(MSR_P4_MISC_ENABLE, misc_enable, dummy);
+ rdmsr(MSR_IA32_MISC_ENABLE, misc_enable, dummy);
if (!(misc_enable & MSR_P4_MISC_ENABLE_PERF_AVAIL))
return 0;
- nmi_perfctr_msr = MSR_P4_IQ_COUNTER0;
- nmi_p4_cccr_val = P4_NMI_IQ_CCCR0;
#ifdef CONFIG_SMP
- if (smp_num_siblings == 2)
- nmi_p4_cccr_val |= P4_CCCR_OVF_PMI1;
+ /* detect which hyperthread we are on */
+ if (smp_num_siblings == 2) {
+ unsigned int ebx, apicid;
+
+ ebx = cpuid_ebx(1);
+ apicid = (ebx >> 24) & 0xff;
+ ht_num = apicid & 1;
+ } else
#endif
+ ht_num = 0;
+
+ /* performance counters are shared resources
+ * assign each hyperthread its own set
+ * (re-use the ESCR0 register, seems safe
+ * and keeps the cccr_val the same)
+ */
+ if (!ht_num) {
+ /* logical cpu 0 */
+ perfctr_msr = MSR_P4_IQ_PERFCTR0;
+ evntsel_msr = MSR_P4_CRU_ESCR0;
+ cccr_msr = MSR_P4_IQ_CCCR0;
+ cccr_val = P4_CCCR_OVF_PMI0 | P4_CCCR_ESCR_SELECT(4);
+ } else {
+ /* logical cpu 1 */
+ perfctr_msr = MSR_P4_IQ_PERFCTR1;
+ evntsel_msr = MSR_P4_CRU_ESCR0;
+ cccr_msr = MSR_P4_IQ_CCCR1;
+ cccr_val = P4_CCCR_OVF_PMI1 | P4_CCCR_ESCR_SELECT(4);
+ }
- if (!reserve_perfctr_nmi(nmi_perfctr_msr))
+ if (!reserve_perfctr_nmi(perfctr_msr))
goto fail;
- if (!reserve_evntsel_nmi(MSR_P4_CRU_ESCR0))
+ if (!reserve_evntsel_nmi(evntsel_msr))
goto fail1;
- wrmsr(MSR_P4_CRU_ESCR0, P4_NMI_CRU_ESCR0, 0);
- wrmsr(MSR_P4_IQ_CCCR0, P4_NMI_IQ_CCCR0 & ~P4_CCCR_ENABLE, 0);
- Dprintk("setting P4_IQ_COUNTER0 to 0x%08lx\n", -(cpu_khz * 1000UL / nmi_hz));
- wrmsrl(MSR_P4_IQ_COUNTER0, -((u64)cpu_khz * 1000 / nmi_hz));
+ evntsel = P4_ESCR_EVENT_SELECT(0x3F)
+ | P4_ESCR_OS
+ | P4_ESCR_USR;
+
+ cccr_val |= P4_CCCR_THRESHOLD(15)
+ | P4_CCCR_COMPLEMENT
+ | P4_CCCR_COMPARE
+ | P4_CCCR_REQUIRED;
+
+ wrmsr(evntsel_msr, evntsel, 0);
+ wrmsr(cccr_msr, cccr_val, 0);
+ wrmsrl(perfctr_msr, -((u64)cpu_khz * 1000 / nmi_hz));
apic_write(APIC_LVTPC, APIC_DM_NMI);
- wrmsr(MSR_P4_IQ_CCCR0, nmi_p4_cccr_val, 0);
+ cccr_val |= P4_CCCR_ENABLE;
+ wrmsr(cccr_msr, cccr_val, 0);
+
+ wd->perfctr_msr = perfctr_msr;
+ wd->evntsel_msr = evntsel_msr;
+ wd->cccr_msr = cccr_msr;
+ wd->check_bit = 1ULL<<39;
return 1;
fail1:
- release_perfctr_nmi(nmi_perfctr_msr);
+ release_perfctr_nmi(perfctr_msr);
fail:
return 0;
}
-void setup_apic_nmi_watchdog(void)
+static void stop_p4_watchdog(void)
{
- switch (boot_cpu_data.x86_vendor) {
- case X86_VENDOR_AMD:
- if (boot_cpu_data.x86 != 15)
- return;
- if (strstr(boot_cpu_data.x86_model_id, "Screwdriver"))
- return;
- if (!setup_k7_watchdog())
- return;
- break;
- case X86_VENDOR_INTEL:
- if (boot_cpu_data.x86 != 15)
- return;
- if (!setup_p4_watchdog())
+ struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk);
+
+ wrmsr(wd->cccr_msr, 0, 0);
+ wrmsr(wd->evntsel_msr, 0, 0);
+
+ release_evntsel_nmi(wd->evntsel_msr);
+ release_perfctr_nmi(wd->perfctr_msr);
+}
+
+void setup_apic_nmi_watchdog(void *unused)
+{
+ /* only support LOCAL and IO APICs for now */
+ if ((nmi_watchdog != NMI_LOCAL_APIC) &&
+ (nmi_watchdog != NMI_IO_APIC))
+ return;
+
+ if (nmi_watchdog == NMI_LOCAL_APIC) {
+ switch (boot_cpu_data.x86_vendor) {
+ case X86_VENDOR_AMD:
+ if (strstr(boot_cpu_data.x86_model_id, "Screwdriver"))
+ return;
+ if (!setup_k7_watchdog())
+ return;
+ break;
+ case X86_VENDOR_INTEL:
+ if (!setup_p4_watchdog())
+ return;
+ break;
+ default:
return;
- break;
+ }
+ }
+ __get_cpu_var(nmi_watchdog_ctlblk.enabled) = 1;
+ atomic_inc(&nmi_active);
+}
- default:
- return;
+static void stop_apic_nmi_watchdog(void *unused)
+{
+ /* only support LOCAL and IO APICs for now */
+ if ((nmi_watchdog != NMI_LOCAL_APIC) &&
+ (nmi_watchdog != NMI_IO_APIC))
+ return;
+
+ if (nmi_watchdog == NMI_LOCAL_APIC) {
+ switch (boot_cpu_data.x86_vendor) {
+ case X86_VENDOR_AMD:
+ if (strstr(boot_cpu_data.x86_model_id, "Screwdriver"))
+ return;
+ stop_k7_watchdog();
+ break;
+ case X86_VENDOR_INTEL:
+ stop_p4_watchdog();
+ break;
+ default:
+ return;
+ }
}
- lapic_nmi_owner = LAPIC_NMI_WATCHDOG;
- nmi_active = 1;
+ __get_cpu_var(nmi_watchdog_ctlblk.enabled) = 0;
+ atomic_dec(&nmi_active);
}
/*
@@ -558,50 +686,70 @@ void __kprobes nmi_watchdog_tick(struct
{
int sum;
int touched = 0;
+ struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk);
+ u64 dummy;
+
+ /* check for other users first */
+ if (notify_die(DIE_NMI, "nmi", regs, reason, 2, SIGINT)
+ == NOTIFY_STOP) {
+ touched = 1;
+ }
sum = read_pda(apic_timer_irqs);
if (__get_cpu_var(nmi_touch)) {
__get_cpu_var(nmi_touch) = 0;
touched = 1;
}
+
#ifdef CONFIG_X86_MCE
/* Could check oops_in_progress here too, but it's safer
not too */
if (atomic_read(&mce_entry) > 0)
touched = 1;
#endif
+ /* if the apic timer isn't firing, this cpu isn't doing much */
if (!touched && __get_cpu_var(last_irq_sum) == sum) {
/*
* Ayiee, looks like this CPU is stuck ...
* wait a few IRQs (5 seconds) before doing the oops ...
*/
local_inc(&__get_cpu_var(alert_counter));
- if (local_read(&__get_cpu_var(alert_counter)) == 5*nmi_hz) {
- if (notify_die(DIE_NMI, "nmi", regs, reason, 2, SIGINT)
- == NOTIFY_STOP) {
- local_set(&__get_cpu_var(alert_counter), 0);
- return;
- }
+ if (local_read(&__get_cpu_var(alert_counter)) == 5*nmi_hz)
die_nmi("NMI Watchdog detected LOCKUP on CPU %d\n", regs);
- }
} else {
__get_cpu_var(last_irq_sum) = sum;
local_set(&__get_cpu_var(alert_counter), 0);
}
- if (nmi_perfctr_msr) {
- if (nmi_perfctr_msr == MSR_P4_IQ_COUNTER0) {
- /*
- * P4 quirks:
- * - An overflown perfctr will assert its interrupt
- * until the OVF flag in its CCCR is cleared.
- * - LVTPC is masked on interrupt and must be
- * unmasked by the LVTPC handler.
- */
- wrmsr(MSR_P4_IQ_CCCR0, nmi_p4_cccr_val, 0);
- apic_write(APIC_LVTPC, APIC_DM_NMI);
- }
- wrmsrl(nmi_perfctr_msr, -((u64)cpu_khz * 1000 / nmi_hz));
+
+ /* see if the nmi watchdog went off */
+ if (wd->enabled) {
+ if (nmi_watchdog == NMI_LOCAL_APIC) {
+ rdmsrl(wd->perfctr_msr, dummy);
+ if (dummy & wd->check_bit){
+ /* this wasn't a watchdog timer interrupt */
+ goto done;
+ }
+
+ /* only Intel uses the cccr msr */
+ if (wd->cccr_msr != 0) {
+ /*
+ * P4 quirks:
+ * - An overflown perfctr will assert its interrupt
+ * until the OVF flag in its CCCR is cleared.
+ * - LVTPC is masked on interrupt and must be
+ * unmasked by the LVTPC handler.
+ */
+ rdmsrl(wd->cccr_msr, dummy);
+ dummy &= ~P4_CCCR_OVF;
+ wrmsrl(wd->cccr_msr, dummy);
+ apic_write(APIC_LVTPC, APIC_DM_NMI);
+ }
+ /* start the cycle over again */
+ wrmsrl(wd->perfctr_msr, -((u64)cpu_khz * 1000 / nmi_hz));
+ }
}
+done:
+ return;
}
static __kprobes int dummy_nmi_callback(struct pt_regs * regs, int cpu)
Index: linux/include/asm-x86_64/nmi.h
===================================================================
--- linux.orig/include/asm-x86_64/nmi.h
+++ linux/include/asm-x86_64/nmi.h
@@ -63,7 +63,7 @@ extern void release_perfctr_nmi(unsigned
extern int reserve_evntsel_nmi(unsigned int);
extern void release_evntsel_nmi(unsigned int);
-extern void setup_apic_nmi_watchdog (void);
+extern void setup_apic_nmi_watchdog (void *);
extern int reserve_lapic_nmi(void);
extern void release_lapic_nmi(void);
extern void disable_timer_nmi_watchdog(void);
@@ -73,6 +73,7 @@ extern void nmi_watchdog_tick (struct pt
extern void nmi_watchdog_default(void);
extern int setup_nmi_watchdog(char *);
+extern atomic_t nmi_active;
extern unsigned int nmi_watchdog;
#define NMI_DEFAULT -1
#define NMI_NONE 0
^ permalink raw reply [flat|nested] 199+ messages in thread* [PATCH for review] [8/145] x86_64: Add SMP support on i386 to reservation framework
[not found] <20060810 935.775038000@suse.de>
` (6 preceding siblings ...)
2006-08-10 19:35 ` [PATCH for review] [7/145] x86_64: Add SMP support on x86_64 to reservation framework Andi Kleen
@ 2006-08-10 19:35 ` Andi Kleen
2006-08-10 19:35 ` [PATCH for review] [9/145] x86_64: Cleanup NMI interrupt path Andi Kleen
` (137 subsequent siblings)
145 siblings, 0 replies; 199+ messages in thread
From: Andi Kleen @ 2006-08-10 19:35 UTC (permalink / raw)
r
From: dzickus <dzickus@redhat.com>
This patch includes the changes to make the nmi watchdog on i386 SMP aware.
A bunch of code was moved around to make it simpler to read. In addition,
it is now possible to determine if a particular NMI was the result of the
watchdog or not. This feature allows the kernel to filter out unknown NMIs
easier.
Signed-off-by: Don Zickus <dzickus@redhat.com>
Signed-off-by: Andi Kleen <ak@suse.de>
---
arch/i386/kernel/apic.c | 3
arch/i386/kernel/nmi.c | 537 ++++++++++++++++++++++++-------------
arch/i386/kernel/traps.c | 2
arch/i386/oprofile/nmi_timer_int.c | 4
include/asm-i386/nmi.h | 5
5 files changed, 360 insertions(+), 191 deletions(-)
Index: linux/arch/i386/kernel/apic.c
===================================================================
--- linux.orig/arch/i386/kernel/apic.c
+++ linux/arch/i386/kernel/apic.c
@@ -586,8 +586,7 @@ void __devinit setup_local_APIC(void)
printk("No ESR for 82489DX.\n");
}
- if (nmi_watchdog == NMI_LOCAL_APIC)
- setup_apic_nmi_watchdog();
+ setup_apic_nmi_watchdog(NULL);
apic_pm_activate();
}
Index: linux/arch/i386/kernel/nmi.c
===================================================================
--- linux.orig/arch/i386/kernel/nmi.c
+++ linux/arch/i386/kernel/nmi.c
@@ -24,16 +24,10 @@
#include <asm/smp.h>
#include <asm/nmi.h>
+#include <asm/kdebug.h>
#include "mach_traps.h"
-unsigned int nmi_watchdog = NMI_NONE;
-extern int unknown_nmi_panic;
-static unsigned int nmi_hz = HZ;
-static unsigned int nmi_perfctr_msr; /* the MSR to reset in NMI handler */
-static unsigned int nmi_p4_cccr_val;
-extern void show_registers(struct pt_regs *regs);
-
/* perfctr_nmi_owner tracks the ownership of the perfctr registers:
* evtsel_nmi_owner tracks the ownership of the event selection
* - different performance counters/ event selection may be reserved for
@@ -63,51 +57,31 @@ static unsigned int lapic_nmi_owner;
#define LAPIC_NMI_RESERVED (1<<1)
/* nmi_active:
- * +1: the lapic NMI watchdog is active, but can be disabled
- * 0: the lapic NMI watchdog has not been set up, and cannot
+ * >0: the lapic NMI watchdog is active, but can be disabled
+ * <0: the lapic NMI watchdog has not been set up, and cannot
* be enabled
- * -1: the lapic NMI watchdog is disabled, but can be enabled
+ * 0: the lapic NMI watchdog is disabled, but can be enabled
*/
-int nmi_active;
+atomic_t nmi_active = ATOMIC_INIT(0); /* oprofile uses this */
-#define K7_EVNTSEL_ENABLE (1 << 22)
-#define K7_EVNTSEL_INT (1 << 20)
-#define K7_EVNTSEL_OS (1 << 17)
-#define K7_EVNTSEL_USR (1 << 16)
-#define K7_EVENT_CYCLES_PROCESSOR_IS_RUNNING 0x76
-#define K7_NMI_EVENT K7_EVENT_CYCLES_PROCESSOR_IS_RUNNING
+unsigned int nmi_watchdog = NMI_DEFAULT;
+static unsigned int nmi_hz = HZ;
-#define P6_EVNTSEL0_ENABLE (1 << 22)
-#define P6_EVNTSEL_INT (1 << 20)
-#define P6_EVNTSEL_OS (1 << 17)
-#define P6_EVNTSEL_USR (1 << 16)
-#define P6_EVENT_CPU_CLOCKS_NOT_HALTED 0x79
-#define P6_NMI_EVENT P6_EVENT_CPU_CLOCKS_NOT_HALTED
+struct nmi_watchdog_ctlblk {
+ int enabled;
+ u64 check_bit;
+ unsigned int cccr_msr;
+ unsigned int perfctr_msr; /* the MSR to reset in NMI handler */
+ unsigned int evntsel_msr; /* the MSR to select the events to handle */
+};
+static DEFINE_PER_CPU(struct nmi_watchdog_ctlblk, nmi_watchdog_ctlblk);
-#define MSR_P4_MISC_ENABLE 0x1A0
-#define MSR_P4_MISC_ENABLE_PERF_AVAIL (1<<7)
-#define MSR_P4_MISC_ENABLE_PEBS_UNAVAIL (1<<12)
-#define MSR_P4_PERFCTR0 0x300
-#define MSR_P4_CCCR0 0x360
-#define P4_ESCR_EVENT_SELECT(N) ((N)<<25)
-#define P4_ESCR_OS (1<<3)
-#define P4_ESCR_USR (1<<2)
-#define P4_CCCR_OVF_PMI0 (1<<26)
-#define P4_CCCR_OVF_PMI1 (1<<27)
-#define P4_CCCR_THRESHOLD(N) ((N)<<20)
-#define P4_CCCR_COMPLEMENT (1<<19)
-#define P4_CCCR_COMPARE (1<<18)
-#define P4_CCCR_REQUIRED (3<<16)
-#define P4_CCCR_ESCR_SELECT(N) ((N)<<13)
-#define P4_CCCR_ENABLE (1<<12)
-/* Set up IQ_COUNTER0 to behave like a clock, by having IQ_CCCR0 filter
- CRU_ESCR0 (with any non-null event selector) through a complemented
- max threshold. [IA32-Vol3, Section 14.9.9] */
-#define MSR_P4_IQ_COUNTER0 0x30C
-#define P4_NMI_CRU_ESCR0 (P4_ESCR_EVENT_SELECT(0x3F)|P4_ESCR_OS|P4_ESCR_USR)
-#define P4_NMI_IQ_CCCR0 \
- (P4_CCCR_OVF_PMI0|P4_CCCR_THRESHOLD(15)|P4_CCCR_COMPLEMENT| \
- P4_CCCR_COMPARE|P4_CCCR_REQUIRED|P4_CCCR_ESCR_SELECT(4)|P4_CCCR_ENABLE)
+/* local prototypes */
+static void stop_apic_nmi_watchdog(void *unused);
+static int unknown_nmi_panic_callback(struct pt_regs *regs, int cpu);
+
+extern void show_registers(struct pt_regs *regs);
+extern int unknown_nmi_panic;
/* converts an msr to an appropriate reservation bit */
static inline unsigned int nmi_perfctr_msr_to_bit(unsigned int msr)
@@ -208,6 +182,17 @@ void release_evntsel_nmi(unsigned int ms
clear_bit(counter, &__get_cpu_var(evntsel_nmi_owner)[0]);
}
+static __cpuinit inline int nmi_known_cpu(void)
+{
+ switch (boot_cpu_data.x86_vendor) {
+ case X86_VENDOR_AMD:
+ return ((boot_cpu_data.x86 == 15) || (boot_cpu_data.x86 == 6));
+ case X86_VENDOR_INTEL:
+ return ((boot_cpu_data.x86 == 15) || (boot_cpu_data.x86 == 6));
+ }
+ return 0;
+}
+
#ifdef CONFIG_SMP
/* The performance counters used by NMI_LOCAL_APIC don't trigger when
* the CPU is idle. To make sure the NMI watchdog really ticks on all
@@ -234,7 +219,10 @@ static int __init check_nmi_watchdog(voi
unsigned int *prev_nmi_count;
int cpu;
- if (nmi_watchdog == NMI_NONE)
+ if ((nmi_watchdog == NMI_NONE) || (nmi_watchdog == NMI_DEFAULT))
+ return 0;
+
+ if (!atomic_read(&nmi_active))
return 0;
prev_nmi_count = kmalloc(NR_CPUS * sizeof(int), GFP_KERNEL);
@@ -258,18 +246,22 @@ static int __init check_nmi_watchdog(voi
if (!cpu_isset(cpu, cpu_callin_map))
continue;
#endif
+ if (!per_cpu(nmi_watchdog_ctlblk, cpu).enabled)
+ continue;
if (nmi_count(cpu) - prev_nmi_count[cpu] <= 5) {
- endflag = 1;
printk("CPU#%d: NMI appears to be stuck (%d->%d)!\n",
cpu,
prev_nmi_count[cpu],
nmi_count(cpu));
- nmi_active = 0;
- lapic_nmi_owner &= ~LAPIC_NMI_WATCHDOG;
- kfree(prev_nmi_count);
- return -1;
+ per_cpu(nmi_watchdog_ctlblk, cpu).enabled = 0;
+ atomic_dec(&nmi_active);
}
}
+ if (!atomic_read(&nmi_active)) {
+ kfree(prev_nmi_count);
+ atomic_set(&nmi_active, -1);
+ return -1;
+ }
endflag = 1;
printk("OK.\n");
@@ -290,31 +282,16 @@ static int __init setup_nmi_watchdog(cha
get_option(&str, &nmi);
- if (nmi >= NMI_INVALID)
+ if ((nmi >= NMI_INVALID) || (nmi < NMI_NONE))
return 0;
- if (nmi == NMI_NONE)
- nmi_watchdog = nmi;
/*
* If any other x86 CPU has a local APIC, then
* please test the NMI stuff there and send me the
* missing bits. Right now Intel P6/P4 and AMD K7 only.
*/
- if ((nmi == NMI_LOCAL_APIC) &&
- (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL) &&
- (boot_cpu_data.x86 == 6 || boot_cpu_data.x86 == 15))
- nmi_watchdog = nmi;
- if ((nmi == NMI_LOCAL_APIC) &&
- (boot_cpu_data.x86_vendor == X86_VENDOR_AMD) &&
- (boot_cpu_data.x86 == 6 || boot_cpu_data.x86 == 15))
- nmi_watchdog = nmi;
- /*
- * We can enable the IO-APIC watchdog
- * unconditionally.
- */
- if (nmi == NMI_IO_APIC) {
- nmi_active = 1;
- nmi_watchdog = nmi;
- }
+ if ((nmi == NMI_LOCAL_APIC) && (nmi_known_cpu() == 0))
+ return 0; /* no lapic support */
+ nmi_watchdog = nmi;
return 1;
}
@@ -322,41 +299,30 @@ __setup("nmi_watchdog=", setup_nmi_watch
static void disable_lapic_nmi_watchdog(void)
{
- if (nmi_active <= 0)
+ BUG_ON(nmi_watchdog != NMI_LOCAL_APIC);
+
+ if (atomic_read(&nmi_active) <= 0)
return;
- switch (boot_cpu_data.x86_vendor) {
- case X86_VENDOR_AMD:
- wrmsr(MSR_K7_EVNTSEL0, 0, 0);
- break;
- case X86_VENDOR_INTEL:
- switch (boot_cpu_data.x86) {
- case 6:
- if (boot_cpu_data.x86_model > 0xd)
- break;
- wrmsr(MSR_P6_EVNTSEL0, 0, 0);
- break;
- case 15:
- if (boot_cpu_data.x86_model > 0x4)
- break;
+ on_each_cpu(stop_apic_nmi_watchdog, NULL, 0, 1);
- wrmsr(MSR_P4_IQ_CCCR0, 0, 0);
- wrmsr(MSR_P4_CRU_ESCR0, 0, 0);
- break;
- }
- break;
- }
- nmi_active = -1;
- /* tell do_nmi() and others that we're not active any more */
- nmi_watchdog = 0;
+ BUG_ON(atomic_read(&nmi_active) != 0);
}
static void enable_lapic_nmi_watchdog(void)
{
- if (nmi_active < 0) {
- nmi_watchdog = NMI_LOCAL_APIC;
- setup_apic_nmi_watchdog();
- }
+ BUG_ON(nmi_watchdog != NMI_LOCAL_APIC);
+
+ /* are we already enabled */
+ if (atomic_read(&nmi_active) != 0)
+ return;
+
+ /* are we lapic aware */
+ if (nmi_known_cpu() <= 0)
+ return;
+
+ on_each_cpu(setup_apic_nmi_watchdog, NULL, 0, 1);
+ touch_nmi_watchdog();
}
int reserve_lapic_nmi(void)
@@ -388,20 +354,25 @@ void release_lapic_nmi(void)
void disable_timer_nmi_watchdog(void)
{
- if ((nmi_watchdog != NMI_IO_APIC) || (nmi_active <= 0))
+ BUG_ON(nmi_watchdog != NMI_IO_APIC);
+
+ if (atomic_read(&nmi_active) <= 0)
return;
- unset_nmi_callback();
- nmi_active = -1;
- nmi_watchdog = NMI_NONE;
+ disable_irq(0);
+ on_each_cpu(stop_apic_nmi_watchdog, NULL, 0, 1);
+
+ BUG_ON(atomic_read(&nmi_active) != 0);
}
void enable_timer_nmi_watchdog(void)
{
- if (nmi_active < 0) {
- nmi_watchdog = NMI_IO_APIC;
+ BUG_ON(nmi_watchdog != NMI_IO_APIC);
+
+ if (atomic_read(&nmi_active) == 0) {
touch_nmi_watchdog();
- nmi_active = 1;
+ on_each_cpu(setup_apic_nmi_watchdog, NULL, 0, 1);
+ enable_irq(0);
}
}
@@ -411,7 +382,7 @@ static int nmi_pm_active; /* nmi_active
static int lapic_nmi_suspend(struct sys_device *dev, pm_message_t state)
{
- nmi_pm_active = nmi_active;
+ nmi_pm_active = atomic_read(&nmi_active);
disable_lapic_nmi_watchdog();
return 0;
}
@@ -439,7 +410,13 @@ static int __init init_lapic_nmi_sysfs(v
{
int error;
- if (nmi_active == 0 || nmi_watchdog != NMI_LOCAL_APIC)
+ /* should really be a BUG_ON but b/c this is an
+ * init call, it just doesn't work. -dcz
+ */
+ if (nmi_watchdog != NMI_LOCAL_APIC)
+ return 0;
+
+ if ( atomic_read(&nmi_active) < 0 )
return 0;
error = sysdev_class_register(&nmi_sysclass);
@@ -457,143 +434,312 @@ late_initcall(init_lapic_nmi_sysfs);
* Original code written by Keith Owens.
*/
-static void write_watchdog_counter(const char *descr)
+static void write_watchdog_counter(unsigned int perfctr_msr, const char *descr)
{
u64 count = (u64)cpu_khz * 1000;
do_div(count, nmi_hz);
if(descr)
Dprintk("setting %s to -0x%08Lx\n", descr, count);
- wrmsrl(nmi_perfctr_msr, 0 - count);
+ wrmsrl(perfctr_msr, 0 - count);
}
+/* Note that these events don't tick when the CPU idles. This means
+ the frequency varies with CPU load. */
+
+#define K7_EVNTSEL_ENABLE (1 << 22)
+#define K7_EVNTSEL_INT (1 << 20)
+#define K7_EVNTSEL_OS (1 << 17)
+#define K7_EVNTSEL_USR (1 << 16)
+#define K7_EVENT_CYCLES_PROCESSOR_IS_RUNNING 0x76
+#define K7_NMI_EVENT K7_EVENT_CYCLES_PROCESSOR_IS_RUNNING
+
static int setup_k7_watchdog(void)
{
+ unsigned int perfctr_msr, evntsel_msr;
unsigned int evntsel;
+ struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk);
- nmi_perfctr_msr = MSR_K7_PERFCTR0;
-
- if (!reserve_perfctr_nmi(nmi_perfctr_msr))
+ perfctr_msr = MSR_K7_PERFCTR0;
+ evntsel_msr = MSR_K7_EVNTSEL0;
+ if (!reserve_perfctr_nmi(perfctr_msr))
goto fail;
- if (!reserve_evntsel_nmi(MSR_K7_EVNTSEL0))
+ if (!reserve_evntsel_nmi(evntsel_msr))
goto fail1;
- wrmsrl(MSR_K7_PERFCTR0, 0UL);
+ wrmsrl(perfctr_msr, 0UL);
evntsel = K7_EVNTSEL_INT
| K7_EVNTSEL_OS
| K7_EVNTSEL_USR
| K7_NMI_EVENT;
- wrmsr(MSR_K7_EVNTSEL0, evntsel, 0);
- write_watchdog_counter("K7_PERFCTR0");
+ /* setup the timer */
+ wrmsr(evntsel_msr, evntsel, 0);
+ write_watchdog_counter(perfctr_msr, "K7_PERFCTR0");
apic_write(APIC_LVTPC, APIC_DM_NMI);
evntsel |= K7_EVNTSEL_ENABLE;
- wrmsr(MSR_K7_EVNTSEL0, evntsel, 0);
+ wrmsr(evntsel_msr, evntsel, 0);
+
+ wd->perfctr_msr = perfctr_msr;
+ wd->evntsel_msr = evntsel_msr;
+ wd->cccr_msr = 0; //unused
+ wd->check_bit = 1ULL<<63;
return 1;
fail1:
- release_perfctr_nmi(nmi_perfctr_msr);
+ release_perfctr_nmi(perfctr_msr);
fail:
return 0;
}
+static void stop_k7_watchdog(void)
+{
+ struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk);
+
+ wrmsr(wd->evntsel_msr, 0, 0);
+
+ release_evntsel_nmi(wd->evntsel_msr);
+ release_perfctr_nmi(wd->perfctr_msr);
+}
+
+#define P6_EVNTSEL0_ENABLE (1 << 22)
+#define P6_EVNTSEL_INT (1 << 20)
+#define P6_EVNTSEL_OS (1 << 17)
+#define P6_EVNTSEL_USR (1 << 16)
+#define P6_EVENT_CPU_CLOCKS_NOT_HALTED 0x79
+#define P6_NMI_EVENT P6_EVENT_CPU_CLOCKS_NOT_HALTED
+
static int setup_p6_watchdog(void)
{
+ unsigned int perfctr_msr, evntsel_msr;
unsigned int evntsel;
+ struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk);
- nmi_perfctr_msr = MSR_P6_PERFCTR0;
-
- if (!reserve_perfctr_nmi(nmi_perfctr_msr))
+ perfctr_msr = MSR_P6_PERFCTR0;
+ evntsel_msr = MSR_P6_EVNTSEL0;
+ if (!reserve_perfctr_nmi(perfctr_msr))
goto fail;
- if (!reserve_evntsel_nmi(MSR_P6_EVNTSEL0))
+ if (!reserve_evntsel_nmi(evntsel_msr))
goto fail1;
+ wrmsrl(perfctr_msr, 0UL);
+
evntsel = P6_EVNTSEL_INT
| P6_EVNTSEL_OS
| P6_EVNTSEL_USR
| P6_NMI_EVENT;
- wrmsr(MSR_P6_EVNTSEL0, evntsel, 0);
- write_watchdog_counter("P6_PERFCTR0");
+ /* setup the timer */
+ wrmsr(evntsel_msr, evntsel, 0);
+ write_watchdog_counter(perfctr_msr, "P6_PERFCTR0");
apic_write(APIC_LVTPC, APIC_DM_NMI);
evntsel |= P6_EVNTSEL0_ENABLE;
- wrmsr(MSR_P6_EVNTSEL0, evntsel, 0);
+ wrmsr(evntsel_msr, evntsel, 0);
+
+ wd->perfctr_msr = perfctr_msr;
+ wd->evntsel_msr = evntsel_msr;
+ wd->cccr_msr = 0; //unused
+ wd->check_bit = 1ULL<<39;
return 1;
fail1:
- release_perfctr_nmi(nmi_perfctr_msr);
+ release_perfctr_nmi(perfctr_msr);
fail:
return 0;
}
+static void stop_p6_watchdog(void)
+{
+ struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk);
+
+ wrmsr(wd->evntsel_msr, 0, 0);
+
+ release_evntsel_nmi(wd->evntsel_msr);
+ release_perfctr_nmi(wd->perfctr_msr);
+}
+
+/* Note that these events don't tick when the CPU idles. This means
+ the frequency varies with CPU load. */
+
+#define MSR_P4_MISC_ENABLE_PERF_AVAIL (1<<7)
+#define P4_ESCR_EVENT_SELECT(N) ((N)<<25)
+#define P4_ESCR_OS (1<<3)
+#define P4_ESCR_USR (1<<2)
+#define P4_CCCR_OVF_PMI0 (1<<26)
+#define P4_CCCR_OVF_PMI1 (1<<27)
+#define P4_CCCR_THRESHOLD(N) ((N)<<20)
+#define P4_CCCR_COMPLEMENT (1<<19)
+#define P4_CCCR_COMPARE (1<<18)
+#define P4_CCCR_REQUIRED (3<<16)
+#define P4_CCCR_ESCR_SELECT(N) ((N)<<13)
+#define P4_CCCR_ENABLE (1<<12)
+#define P4_CCCR_OVF (1<<31)
+/* Set up IQ_COUNTER0 to behave like a clock, by having IQ_CCCR0 filter
+ CRU_ESCR0 (with any non-null event selector) through a complemented
+ max threshold. [IA32-Vol3, Section 14.9.9] */
+
static int setup_p4_watchdog(void)
{
+ unsigned int perfctr_msr, evntsel_msr, cccr_msr;
+ unsigned int evntsel, cccr_val;
unsigned int misc_enable, dummy;
+ unsigned int ht_num;
+ struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk);
- rdmsr(MSR_P4_MISC_ENABLE, misc_enable, dummy);
+ rdmsr(MSR_IA32_MISC_ENABLE, misc_enable, dummy);
if (!(misc_enable & MSR_P4_MISC_ENABLE_PERF_AVAIL))
return 0;
- nmi_perfctr_msr = MSR_P4_IQ_COUNTER0;
- nmi_p4_cccr_val = P4_NMI_IQ_CCCR0;
#ifdef CONFIG_SMP
- if (smp_num_siblings == 2)
- nmi_p4_cccr_val |= P4_CCCR_OVF_PMI1;
+ /* detect which hyperthread we are on */
+ if (smp_num_siblings == 2) {
+ unsigned int ebx, apicid;
+
+ ebx = cpuid_ebx(1);
+ apicid = (ebx >> 24) & 0xff;
+ ht_num = apicid & 1;
+ } else
#endif
+ ht_num = 0;
- if (!reserve_perfctr_nmi(nmi_perfctr_msr))
+ /* performance counters are shared resources
+ * assign each hyperthread its own set
+ * (re-use the ESCR0 register, seems safe
+ * and keeps the cccr_val the same)
+ */
+ if (!ht_num) {
+ /* logical cpu 0 */
+ perfctr_msr = MSR_P4_IQ_PERFCTR0;
+ evntsel_msr = MSR_P4_CRU_ESCR0;
+ cccr_msr = MSR_P4_IQ_CCCR0;
+ cccr_val = P4_CCCR_OVF_PMI0 | P4_CCCR_ESCR_SELECT(4);
+ } else {
+ /* logical cpu 1 */
+ perfctr_msr = MSR_P4_IQ_PERFCTR1;
+ evntsel_msr = MSR_P4_CRU_ESCR0;
+ cccr_msr = MSR_P4_IQ_CCCR1;
+ cccr_val = P4_CCCR_OVF_PMI1 | P4_CCCR_ESCR_SELECT(4);
+ }
+
+ if (!reserve_perfctr_nmi(perfctr_msr))
goto fail;
- if (!reserve_evntsel_nmi(MSR_P4_CRU_ESCR0))
+ if (!reserve_evntsel_nmi(evntsel_msr))
goto fail1;
- wrmsr(MSR_P4_CRU_ESCR0, P4_NMI_CRU_ESCR0, 0);
- wrmsr(MSR_P4_IQ_CCCR0, P4_NMI_IQ_CCCR0 & ~P4_CCCR_ENABLE, 0);
- write_watchdog_counter("P4_IQ_COUNTER0");
+ evntsel = P4_ESCR_EVENT_SELECT(0x3F)
+ | P4_ESCR_OS
+ | P4_ESCR_USR;
+
+ cccr_val |= P4_CCCR_THRESHOLD(15)
+ | P4_CCCR_COMPLEMENT
+ | P4_CCCR_COMPARE
+ | P4_CCCR_REQUIRED;
+
+ wrmsr(evntsel_msr, evntsel, 0);
+ wrmsr(cccr_msr, cccr_val, 0);
+ write_watchdog_counter(perfctr_msr, "P4_IQ_COUNTER0");
apic_write(APIC_LVTPC, APIC_DM_NMI);
- wrmsr(MSR_P4_IQ_CCCR0, nmi_p4_cccr_val, 0);
+ cccr_val |= P4_CCCR_ENABLE;
+ wrmsr(cccr_msr, cccr_val, 0);
+ wd->perfctr_msr = perfctr_msr;
+ wd->evntsel_msr = evntsel_msr;
+ wd->cccr_msr = cccr_msr;
+ wd->check_bit = 1ULL<<39;
return 1;
fail1:
- release_perfctr_nmi(nmi_perfctr_msr);
+ release_perfctr_nmi(perfctr_msr);
fail:
return 0;
}
-void setup_apic_nmi_watchdog (void)
+static void stop_p4_watchdog(void)
{
- switch (boot_cpu_data.x86_vendor) {
- case X86_VENDOR_AMD:
- if (boot_cpu_data.x86 != 6 && boot_cpu_data.x86 != 15)
- return;
- if (!setup_k7_watchdog())
- return;
- break;
- case X86_VENDOR_INTEL:
- switch (boot_cpu_data.x86) {
- case 6:
- if (boot_cpu_data.x86_model > 0xd)
- return;
+ struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk);
+
+ wrmsr(wd->cccr_msr, 0, 0);
+ wrmsr(wd->evntsel_msr, 0, 0);
+
+ release_evntsel_nmi(wd->evntsel_msr);
+ release_perfctr_nmi(wd->perfctr_msr);
+}
+
+void setup_apic_nmi_watchdog (void *unused)
+{
+ /* only support LOCAL and IO APICs for now */
+ if ((nmi_watchdog != NMI_LOCAL_APIC) &&
+ (nmi_watchdog != NMI_IO_APIC))
+ return;
- if(!setup_p6_watchdog())
+ if (nmi_watchdog == NMI_LOCAL_APIC) {
+ switch (boot_cpu_data.x86_vendor) {
+ case X86_VENDOR_AMD:
+ if (boot_cpu_data.x86 != 6 && boot_cpu_data.x86 != 15)
return;
- break;
- case 15:
- if (boot_cpu_data.x86_model > 0x4)
+ if (!setup_k7_watchdog())
return;
+ break;
+ case X86_VENDOR_INTEL:
+ switch (boot_cpu_data.x86) {
+ case 6:
+ if (boot_cpu_data.x86_model > 0xd)
+ return;
- if (!setup_p4_watchdog())
+ if (!setup_p6_watchdog())
+ return;
+ break;
+ case 15:
+ if (boot_cpu_data.x86_model > 0x4)
+ return;
+
+ if (!setup_p4_watchdog())
+ return;
+ break;
+ default:
return;
+ }
+ break;
+ default:
+ return;
+ }
+ }
+ __get_cpu_var(nmi_watchdog_ctlblk.enabled) = 1;
+ atomic_inc(&nmi_active);
+}
+
+static void stop_apic_nmi_watchdog(void *unused)
+{
+ /* only support LOCAL and IO APICs for now */
+ if ((nmi_watchdog != NMI_LOCAL_APIC) &&
+ (nmi_watchdog != NMI_IO_APIC))
+ return;
+
+ if (nmi_watchdog == NMI_LOCAL_APIC) {
+ switch (boot_cpu_data.x86_vendor) {
+ case X86_VENDOR_AMD:
+ stop_k7_watchdog();
+ break;
+ case X86_VENDOR_INTEL:
+ switch (boot_cpu_data.x86) {
+ case 6:
+ if (boot_cpu_data.x86_model > 0xd)
+ break;
+ stop_p6_watchdog();
+ break;
+ case 15:
+ if (boot_cpu_data.x86_model > 0x4)
+ break;
+ stop_p4_watchdog();
+ break;
+ }
break;
default:
return;
}
- break;
- default:
- return;
}
- lapic_nmi_owner = LAPIC_NMI_WATCHDOG;
- nmi_active = 1;
+ __get_cpu_var(nmi_watchdog_ctlblk.enabled) = 0;
+ atomic_dec(&nmi_active);
}
/*
@@ -635,7 +781,7 @@ EXPORT_SYMBOL(touch_nmi_watchdog);
extern void die_nmi(struct pt_regs *, const char *msg);
-void nmi_watchdog_tick (struct pt_regs * regs)
+void nmi_watchdog_tick (struct pt_regs * regs, unsigned reason)
{
/*
@@ -644,11 +790,21 @@ void nmi_watchdog_tick (struct pt_regs *
* smp_processor_id().
*/
unsigned int sum;
+ int touched = 0;
int cpu = smp_processor_id();
+ struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk);
+ u64 dummy;
+
+ /* check for other users first */
+ if (notify_die(DIE_NMI, "nmi", regs, reason, 2, SIGINT)
+ == NOTIFY_STOP) {
+ touched = 1;
+ }
sum = per_cpu(irq_stat, cpu).apic_timer_irqs;
- if (last_irq_sums[cpu] == sum) {
+ /* if the apic timer isn't firing, this cpu isn't doing much */
+ if (!touched && last_irq_sums[cpu] == sum) {
/*
* Ayiee, looks like this CPU is stuck ...
* wait a few IRQs (5 seconds) before doing the oops ...
@@ -663,26 +819,41 @@ void nmi_watchdog_tick (struct pt_regs *
last_irq_sums[cpu] = sum;
alert_counter[cpu] = 0;
}
- if (nmi_perfctr_msr) {
- if (nmi_perfctr_msr == MSR_P4_IQ_COUNTER0) {
- /*
- * P4 quirks:
- * - An overflown perfctr will assert its interrupt
- * until the OVF flag in its CCCR is cleared.
- * - LVTPC is masked on interrupt and must be
- * unmasked by the LVTPC handler.
- */
- wrmsr(MSR_P4_IQ_CCCR0, nmi_p4_cccr_val, 0);
- apic_write(APIC_LVTPC, APIC_DM_NMI);
- }
- else if (nmi_perfctr_msr == MSR_P6_PERFCTR0) {
- /* Only P6 based Pentium M need to re-unmask
- * the apic vector but it doesn't hurt
- * other P6 variant */
- apic_write(APIC_LVTPC, APIC_DM_NMI);
+ /* see if the nmi watchdog went off */
+ if (wd->enabled) {
+ if (nmi_watchdog == NMI_LOCAL_APIC) {
+ rdmsrl(wd->perfctr_msr, dummy);
+ if (dummy & wd->check_bit){
+ /* this wasn't a watchdog timer interrupt */
+ goto done;
+ }
+
+ /* only Intel P4 uses the cccr msr */
+ if (wd->cccr_msr != 0) {
+ /*
+ * P4 quirks:
+ * - An overflown perfctr will assert its interrupt
+ * until the OVF flag in its CCCR is cleared.
+ * - LVTPC is masked on interrupt and must be
+ * unmasked by the LVTPC handler.
+ */
+ rdmsrl(wd->cccr_msr, dummy);
+ dummy &= ~P4_CCCR_OVF;
+ wrmsrl(wd->cccr_msr, dummy);
+ apic_write(APIC_LVTPC, APIC_DM_NMI);
+ }
+ else if (wd->perfctr_msr == MSR_P6_PERFCTR0) {
+ /* Only P6 based Pentium M need to re-unmask
+ * the apic vector but it doesn't hurt
+ * other P6 variant */
+ apic_write(APIC_LVTPC, APIC_DM_NMI);
+ }
+ /* start the cycle over again */
+ write_watchdog_counter(wd->perfctr_msr, NULL);
}
- write_watchdog_counter(NULL);
}
+done:
+ return;
}
#ifdef CONFIG_SYSCTL
Index: linux/arch/i386/kernel/traps.c
===================================================================
--- linux.orig/arch/i386/kernel/traps.c
+++ linux/arch/i386/kernel/traps.c
@@ -721,7 +721,7 @@ static void default_do_nmi(struct pt_reg
* so it must be the NMI watchdog.
*/
if (nmi_watchdog) {
- nmi_watchdog_tick(regs);
+ nmi_watchdog_tick(regs, reason);
return;
}
#endif
Index: linux/include/asm-i386/nmi.h
===================================================================
--- linux.orig/include/asm-i386/nmi.h
+++ linux/include/asm-i386/nmi.h
@@ -32,13 +32,14 @@ extern void release_perfctr_nmi(unsigned
extern int reserve_evntsel_nmi(unsigned int);
extern void release_evntsel_nmi(unsigned int);
-extern void setup_apic_nmi_watchdog (void);
+extern void setup_apic_nmi_watchdog (void *);
extern int reserve_lapic_nmi(void);
extern void release_lapic_nmi(void);
extern void disable_timer_nmi_watchdog(void);
extern void enable_timer_nmi_watchdog(void);
-extern void nmi_watchdog_tick (struct pt_regs * regs);
+extern void nmi_watchdog_tick (struct pt_regs * regs, unsigned reason);
+extern atomic_t nmi_active;
extern unsigned int nmi_watchdog;
#define NMI_DEFAULT -1
#define NMI_NONE 0
Index: linux/arch/i386/oprofile/nmi_timer_int.c
===================================================================
--- linux.orig/arch/i386/oprofile/nmi_timer_int.c
+++ linux/arch/i386/oprofile/nmi_timer_int.c
@@ -42,9 +42,7 @@ static void timer_stop(void)
int __init op_nmi_timer_init(struct oprofile_operations * ops)
{
- extern int nmi_active;
-
- if (nmi_active <= 0)
+ if (atomic_read(&nmi_active) <= 0)
return -ENODEV;
ops->start = timer_start;
^ permalink raw reply [flat|nested] 199+ messages in thread* [PATCH for review] [9/145] x86_64: Cleanup NMI interrupt path
[not found] <20060810 935.775038000@suse.de>
` (7 preceding siblings ...)
2006-08-10 19:35 ` [PATCH for review] [8/145] x86_64: Add SMP support on i386 " Andi Kleen
@ 2006-08-10 19:35 ` Andi Kleen
2006-08-10 19:35 ` [PATCH for review] [10/145] x86_64: Add TIF_RESTORE_SIGMASK Andi Kleen
` (136 subsequent siblings)
145 siblings, 0 replies; 199+ messages in thread
From: Andi Kleen @ 2006-08-10 19:35 UTC (permalink / raw)
r
From: dzickus <dzickus@redhat.com>
This patch cleans up the NMI interrupt path. Instead of being gated by if
the 'nmi callback' is set, the interrupt handler now calls everyone who is
registered on the die_chain and additionally checks the nmi watchdog,
reseting it if enabled. This allows more subsystems to hook into the NMI if
they need to (without being block by set_nmi_callback).
Signed-off-by: Don Zickus <dzickus@redhat.com>
Signed-off-by: Andi Kleen <ak@suse.de>
---
arch/i386/kernel/nmi.c | 16 +++++++++++++---
arch/i386/kernel/traps.c | 24 +++++++++++-------------
arch/x86_64/kernel/nmi.c | 26 +++++++++++++++++++-------
arch/x86_64/kernel/traps.c | 8 ++++----
include/asm-i386/nmi.h | 2 +-
include/asm-x86_64/nmi.h | 10 +++++++++-
6 files changed, 57 insertions(+), 29 deletions(-)
Index: linux/arch/i386/kernel/nmi.c
===================================================================
--- linux.orig/arch/i386/kernel/nmi.c
+++ linux/arch/i386/kernel/nmi.c
@@ -781,7 +781,7 @@ EXPORT_SYMBOL(touch_nmi_watchdog);
extern void die_nmi(struct pt_regs *, const char *msg);
-void nmi_watchdog_tick (struct pt_regs * regs, unsigned reason)
+int nmi_watchdog_tick (struct pt_regs * regs, unsigned reason)
{
/*
@@ -794,10 +794,12 @@ void nmi_watchdog_tick (struct pt_regs *
int cpu = smp_processor_id();
struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk);
u64 dummy;
+ int rc=0;
/* check for other users first */
if (notify_die(DIE_NMI, "nmi", regs, reason, 2, SIGINT)
== NOTIFY_STOP) {
+ rc = 1;
touched = 1;
}
@@ -850,10 +852,18 @@ void nmi_watchdog_tick (struct pt_regs *
}
/* start the cycle over again */
write_watchdog_counter(wd->perfctr_msr, NULL);
- }
+ rc = 1;
+ } else if (nmi_watchdog == NMI_IO_APIC) {
+ /* don't know how to accurately check for this.
+ * just assume it was a watchdog timer interrupt
+ * This matches the old behaviour.
+ */
+ rc = 1;
+ } else
+ printk(KERN_WARNING "Unknown enabled NMI hardware?!\n");
}
done:
- return;
+ return rc;
}
#ifdef CONFIG_SYSCTL
Index: linux/arch/i386/kernel/traps.c
===================================================================
--- linux.orig/arch/i386/kernel/traps.c
+++ linux/arch/i386/kernel/traps.c
@@ -703,6 +703,13 @@ void die_nmi (struct pt_regs *regs, cons
do_exit(SIGSEGV);
}
+static int dummy_nmi_callback(struct pt_regs * regs, int cpu)
+{
+ return 0;
+}
+
+static nmi_callback_t nmi_callback = dummy_nmi_callback;
+
static void default_do_nmi(struct pt_regs * regs)
{
unsigned char reason = 0;
@@ -720,12 +727,11 @@ static void default_do_nmi(struct pt_reg
* Ok, so this is none of the documented NMI sources,
* so it must be the NMI watchdog.
*/
- if (nmi_watchdog) {
- nmi_watchdog_tick(regs, reason);
+ if (nmi_watchdog_tick(regs, reason))
return;
- }
#endif
- unknown_nmi_error(reason, regs);
+ if (!rcu_dereference(nmi_callback)(regs, smp_processor_id()))
+ unknown_nmi_error(reason, regs);
return;
}
if (notify_die(DIE_NMI, "nmi", regs, reason, 2, SIGINT) == NOTIFY_STOP)
@@ -741,13 +747,6 @@ static void default_do_nmi(struct pt_reg
reassert_nmi();
}
-static int dummy_nmi_callback(struct pt_regs * regs, int cpu)
-{
- return 0;
-}
-
-static nmi_callback_t nmi_callback = dummy_nmi_callback;
-
fastcall void do_nmi(struct pt_regs * regs, long error_code)
{
int cpu;
@@ -758,8 +757,7 @@ fastcall void do_nmi(struct pt_regs * re
++nmi_count(cpu);
- if (!rcu_dereference(nmi_callback)(regs, cpu))
- default_do_nmi(regs);
+ default_do_nmi(regs);
nmi_exit();
}
Index: linux/arch/x86_64/kernel/nmi.c
===================================================================
--- linux.orig/arch/x86_64/kernel/nmi.c
+++ linux/arch/x86_64/kernel/nmi.c
@@ -682,16 +682,18 @@ void touch_nmi_watchdog (void)
touch_softlockup_watchdog();
}
-void __kprobes nmi_watchdog_tick(struct pt_regs * regs, unsigned reason)
+int __kprobes nmi_watchdog_tick(struct pt_regs * regs, unsigned reason)
{
int sum;
int touched = 0;
struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk);
u64 dummy;
+ int rc=0;
/* check for other users first */
if (notify_die(DIE_NMI, "nmi", regs, reason, 2, SIGINT)
== NOTIFY_STOP) {
+ rc = 1;
touched = 1;
}
@@ -746,10 +748,18 @@ void __kprobes nmi_watchdog_tick(struct
}
/* start the cycle over again */
wrmsrl(wd->perfctr_msr, -((u64)cpu_khz * 1000 / nmi_hz));
- }
+ rc = 1;
+ } else if (nmi_watchdog == NMI_IO_APIC) {
+ /* don't know how to accurately check for this.
+ * just assume it was a watchdog timer interrupt
+ * This matches the old behaviour.
+ */
+ rc = 1;
+ } else
+ printk(KERN_WARNING "Unknown enabled NMI hardware?!\n");
}
done:
- return;
+ return rc;
}
static __kprobes int dummy_nmi_callback(struct pt_regs * regs, int cpu)
@@ -761,15 +771,17 @@ static nmi_callback_t nmi_callback = dum
asmlinkage __kprobes void do_nmi(struct pt_regs * regs, long error_code)
{
- int cpu = safe_smp_processor_id();
-
nmi_enter();
add_pda(__nmi_count,1);
- if (!rcu_dereference(nmi_callback)(regs, cpu))
- default_do_nmi(regs);
+ default_do_nmi(regs);
nmi_exit();
}
+int do_nmi_callback(struct pt_regs * regs, int cpu)
+{
+ return rcu_dereference(nmi_callback)(regs, cpu);
+}
+
void set_nmi_callback(nmi_callback_t callback)
{
vmalloc_sync_all();
Index: linux/arch/x86_64/kernel/traps.c
===================================================================
--- linux.orig/arch/x86_64/kernel/traps.c
+++ linux/arch/x86_64/kernel/traps.c
@@ -777,12 +777,12 @@ asmlinkage __kprobes void default_do_nmi
* Ok, so this is none of the documented NMI sources,
* so it must be the NMI watchdog.
*/
- if (nmi_watchdog > 0) {
- nmi_watchdog_tick(regs,reason);
+ if (nmi_watchdog_tick(regs,reason))
return;
- }
+ if (!do_nmi_callback(regs,cpu))
#endif
- unknown_nmi_error(reason, regs);
+ unknown_nmi_error(reason, regs);
+
return;
}
if (notify_die(DIE_NMI, "nmi", regs, reason, 2, SIGINT) == NOTIFY_STOP)
Index: linux/include/asm-i386/nmi.h
===================================================================
--- linux.orig/include/asm-i386/nmi.h
+++ linux/include/asm-i386/nmi.h
@@ -37,7 +37,7 @@ extern int reserve_lapic_nmi(void);
extern void release_lapic_nmi(void);
extern void disable_timer_nmi_watchdog(void);
extern void enable_timer_nmi_watchdog(void);
-extern void nmi_watchdog_tick (struct pt_regs * regs, unsigned reason);
+extern int nmi_watchdog_tick (struct pt_regs * regs, unsigned reason);
extern atomic_t nmi_active;
extern unsigned int nmi_watchdog;
Index: linux/include/asm-x86_64/nmi.h
===================================================================
--- linux.orig/include/asm-x86_64/nmi.h
+++ linux/include/asm-x86_64/nmi.h
@@ -26,6 +26,14 @@ void set_nmi_callback(nmi_callback_t cal
*/
void unset_nmi_callback(void);
+/**
+ * do_nmi_callback
+ *
+ * Check to see if a callback exists and execute it. Return 1
+ * if the handler exists and was handled successfully.
+ */
+int do_nmi_callback(struct pt_regs *regs, int cpu);
+
#ifdef CONFIG_PM
/** Replace the PM callback routine for NMI. */
@@ -68,7 +76,7 @@ extern int reserve_lapic_nmi(void);
extern void release_lapic_nmi(void);
extern void disable_timer_nmi_watchdog(void);
extern void enable_timer_nmi_watchdog(void);
-extern void nmi_watchdog_tick (struct pt_regs * regs, unsigned reason);
+extern int nmi_watchdog_tick (struct pt_regs * regs, unsigned reason);
extern void nmi_watchdog_default(void);
extern int setup_nmi_watchdog(char *);
^ permalink raw reply [flat|nested] 199+ messages in thread* [PATCH for review] [10/145] x86_64: Add TIF_RESTORE_SIGMASK
[not found] <20060810 935.775038000@suse.de>
` (8 preceding siblings ...)
2006-08-10 19:35 ` [PATCH for review] [9/145] x86_64: Cleanup NMI interrupt path Andi Kleen
@ 2006-08-10 19:35 ` Andi Kleen
2006-08-10 19:35 ` [PATCH for review] [11/145] x86_64: Add ppoll/pselect syscalls Andi Kleen
` (135 subsequent siblings)
145 siblings, 0 replies; 199+ messages in thread
From: Andi Kleen @ 2006-08-10 19:35 UTC (permalink / raw)
r
We need TIF_RESTORE_SIGMASK in order to support ppoll() and pselect()
system calls. This patch originally came from Andi, and was based
heavily on David Howells' implementation of same on i386. I fixed a typo
which was causing do_signal() to use the wrong signal mask.
Signed-off-by: David Woodhouse <dwmw2@infradead.org>
Signed-off-by: Andi Kleen <ak@suse.de>
---
arch/x86_64/ia32/ia32_signal.c | 28 +++++--------
arch/x86_64/kernel/signal.c | 82 ++++++++++++++++-----------------------
include/asm-x86_64/signal.h | 4 -
include/asm-x86_64/thread_info.h | 2
include/asm-x86_64/unistd.h | 1
5 files changed, 49 insertions(+), 68 deletions(-)
Index: linux/arch/x86_64/ia32/ia32_signal.c
===================================================================
--- linux.orig/arch/x86_64/ia32/ia32_signal.c
+++ linux/arch/x86_64/ia32/ia32_signal.c
@@ -113,25 +113,19 @@ int copy_siginfo_from_user32(siginfo_t *
}
asmlinkage long
-sys32_sigsuspend(int history0, int history1, old_sigset_t mask,
- struct pt_regs *regs)
+sys32_sigsuspend(int history0, int history1, old_sigset_t mask)
{
- sigset_t saveset;
-
mask &= _BLOCKABLE;
spin_lock_irq(¤t->sighand->siglock);
- saveset = current->blocked;
+ current->saved_sigmask = current->blocked;
siginitset(¤t->blocked, mask);
recalc_sigpending();
spin_unlock_irq(¤t->sighand->siglock);
- regs->rax = -EINTR;
- while (1) {
- current->state = TASK_INTERRUPTIBLE;
- schedule();
- if (do_signal(regs, &saveset))
- return -EINTR;
- }
+ current->state = TASK_INTERRUPTIBLE;
+ schedule();
+ set_thread_flag(TIF_RESTORE_SIGMASK);
+ return -ERESTARTNOHAND;
}
asmlinkage long
@@ -508,11 +502,11 @@ int ia32_setup_frame(int sig, struct k_s
current->comm, current->pid, frame, regs->rip, frame->pretcode);
#endif
- return 1;
+ return 0;
give_sigsegv:
force_sigsegv(sig, current);
- return 0;
+ return -EFAULT;
}
int ia32_setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info,
@@ -595,7 +589,7 @@ int ia32_setup_rt_frame(int sig, struct
regs->ss = __USER32_DS;
set_fs(USER_DS);
- regs->eflags &= ~TF_MASK;
+ regs->eflags &= ~TF_MASK;
if (test_thread_flag(TIF_SINGLESTEP))
ptrace_notify(SIGTRAP);
@@ -604,9 +598,9 @@ int ia32_setup_rt_frame(int sig, struct
current->comm, current->pid, frame, regs->rip, frame->pretcode);
#endif
- return 1;
+ return 0;
give_sigsegv:
force_sigsegv(sig, current);
- return 0;
+ return -EFAULT;
}
Index: linux/arch/x86_64/kernel/signal.c
===================================================================
--- linux.orig/arch/x86_64/kernel/signal.c
+++ linux/arch/x86_64/kernel/signal.c
@@ -38,37 +38,6 @@ int ia32_setup_frame(int sig, struct k_s
sigset_t *set, struct pt_regs * regs);
asmlinkage long
-sys_rt_sigsuspend(sigset_t __user *unewset, size_t sigsetsize, struct pt_regs *regs)
-{
- sigset_t saveset, newset;
-
- /* XXX: Don't preclude handling different sized sigset_t's. */
- if (sigsetsize != sizeof(sigset_t))
- return -EINVAL;
-
- if (copy_from_user(&newset, unewset, sizeof(newset)))
- return -EFAULT;
- sigdelsetmask(&newset, ~_BLOCKABLE);
-
- spin_lock_irq(¤t->sighand->siglock);
- saveset = current->blocked;
- current->blocked = newset;
- recalc_sigpending();
- spin_unlock_irq(¤t->sighand->siglock);
-#ifdef DEBUG_SIG
- printk("rt_sigsuspend savset(%lx) newset(%lx) regs(%p) rip(%lx)\n",
- saveset, newset, regs, regs->rip);
-#endif
- regs->rax = -EINTR;
- while (1) {
- current->state = TASK_INTERRUPTIBLE;
- schedule();
- if (do_signal(regs, &saveset))
- return -EINTR;
- }
-}
-
-asmlinkage long
sys_sigaltstack(const stack_t __user *uss, stack_t __user *uoss,
struct pt_regs *regs)
{
@@ -341,11 +310,11 @@ static int setup_rt_frame(int sig, struc
current->comm, current->pid, frame, regs->rip, frame->pretcode);
#endif
- return 1;
+ return 0;
give_sigsegv:
force_sigsegv(sig, current);
- return 0;
+ return -EFAULT;
}
/*
@@ -408,7 +377,7 @@ handle_signal(unsigned long sig, siginfo
#endif
ret = setup_rt_frame(sig, ka, info, oldset, regs);
- if (ret) {
+ if (ret == 0) {
spin_lock_irq(¤t->sighand->siglock);
sigorsets(¤t->blocked,¤t->blocked,&ka->sa.sa_mask);
if (!(ka->sa.sa_flags & SA_NODEFER))
@@ -425,11 +394,12 @@ handle_signal(unsigned long sig, siginfo
* want to handle. Thus you cannot kill init even with a SIGKILL even by
* mistake.
*/
-int do_signal(struct pt_regs *regs, sigset_t *oldset)
+static void do_signal(struct pt_regs *regs)
{
struct k_sigaction ka;
siginfo_t info;
int signr;
+ sigset_t *oldset;
/*
* We want the common case to go fast, which
@@ -438,9 +408,11 @@ int do_signal(struct pt_regs *regs, sigs
* if so.
*/
if (!user_mode(regs))
- return 1;
+ return;
- if (!oldset)
+ if (test_thread_flag(TIF_RESTORE_SIGMASK))
+ oldset = ¤t->saved_sigmask;
+ else
oldset = ¤t->blocked;
signr = get_signal_to_deliver(&info, &ka, regs, NULL);
@@ -454,30 +426,46 @@ int do_signal(struct pt_regs *regs, sigs
set_debugreg(current->thread.debugreg7, 7);
/* Whee! Actually deliver the signal. */
- return handle_signal(signr, &info, &ka, oldset, regs);
+ if (handle_signal(signr, &info, &ka, oldset, regs) == 0) {
+ /* a signal was successfully delivered; the saved
+ * sigmask will have been stored in the signal frame,
+ * and will be restored by sigreturn, so we can simply
+ * clear the TIF_RESTORE_SIGMASK flag */
+ clear_thread_flag(TIF_RESTORE_SIGMASK);
+ }
+ return;
}
/* Did we come from a system call? */
if ((long)regs->orig_rax >= 0) {
/* Restart the system call - no handlers present */
long res = regs->rax;
- if (res == -ERESTARTNOHAND ||
- res == -ERESTARTSYS ||
- res == -ERESTARTNOINTR) {
+ switch (res) {
+ case -ERESTARTNOHAND:
+ case -ERESTARTSYS:
+ case -ERESTARTNOINTR:
regs->rax = regs->orig_rax;
regs->rip -= 2;
- }
- if (regs->rax == (unsigned long)-ERESTART_RESTARTBLOCK) {
+ break;
+ case -ERESTART_RESTARTBLOCK:
regs->rax = test_thread_flag(TIF_IA32) ?
__NR_ia32_restart_syscall :
__NR_restart_syscall;
regs->rip -= 2;
+ break;
}
}
- return 0;
+
+ /* if there's no signal to deliver, we just put the saved sigmask
+ back. */
+ if (test_thread_flag(TIF_RESTORE_SIGMASK)) {
+ clear_thread_flag(TIF_RESTORE_SIGMASK);
+ sigprocmask(SIG_SETMASK, ¤t->saved_sigmask, NULL);
+ }
}
-void do_notify_resume(struct pt_regs *regs, sigset_t *oldset, __u32 thread_info_flags)
+void
+do_notify_resume(struct pt_regs *regs, void *unused, __u32 thread_info_flags)
{
#ifdef DEBUG_SIG
printk("do_notify_resume flags:%x rip:%lx rsp:%lx caller:%lx pending:%lx\n",
@@ -491,8 +479,8 @@ void do_notify_resume(struct pt_regs *re
}
/* deal with pending signal delivery */
- if (thread_info_flags & _TIF_SIGPENDING)
- do_signal(regs,oldset);
+ if (thread_info_flags & (_TIF_SIGPENDING|_TIF_RESTORE_SIGMASK))
+ do_signal(regs);
}
void signal_fault(struct pt_regs *regs, void __user *frame, char *where)
Index: linux/include/asm-x86_64/signal.h
===================================================================
--- linux.orig/include/asm-x86_64/signal.h
+++ linux/include/asm-x86_64/signal.h
@@ -24,10 +24,6 @@ typedef struct {
} sigset_t;
-struct pt_regs;
-asmlinkage int do_signal(struct pt_regs *regs, sigset_t *oldset);
-
-
#else
/* Here we must cater to libcs that poke about in kernel headers. */
Index: linux/include/asm-x86_64/thread_info.h
===================================================================
--- linux.orig/include/asm-x86_64/thread_info.h
+++ linux/include/asm-x86_64/thread_info.h
@@ -114,6 +114,7 @@ static inline struct thread_info *stack_
#define TIF_IRET 5 /* force IRET */
#define TIF_SYSCALL_AUDIT 7 /* syscall auditing active */
#define TIF_SECCOMP 8 /* secure computing */
+#define TIF_RESTORE_SIGMASK 9 /* restore signal mask in do_signal */
/* 16 free */
#define TIF_IA32 17 /* 32bit process */
#define TIF_FORK 18 /* ret_from_fork */
@@ -128,6 +129,7 @@ static inline struct thread_info *stack_
#define _TIF_IRET (1<<TIF_IRET)
#define _TIF_SYSCALL_AUDIT (1<<TIF_SYSCALL_AUDIT)
#define _TIF_SECCOMP (1<<TIF_SECCOMP)
+#define _TIF_RESTORE_SIGMASK (1<<TIF_RESTORE_SIGMASK)
#define _TIF_IA32 (1<<TIF_IA32)
#define _TIF_FORK (1<<TIF_FORK)
#define _TIF_ABI_PENDING (1<<TIF_ABI_PENDING)
Index: linux/include/asm-x86_64/unistd.h
===================================================================
--- linux.orig/include/asm-x86_64/unistd.h
+++ linux/include/asm-x86_64/unistd.h
@@ -658,6 +658,7 @@ do { \
#define __ARCH_WANT_SYS_SIGPENDING
#define __ARCH_WANT_SYS_SIGPROCMASK
#define __ARCH_WANT_SYS_RT_SIGACTION
+#define __ARCH_WANT_SYS_RT_SIGSUSPEND
#define __ARCH_WANT_SYS_TIME
#define __ARCH_WANT_COMPAT_SYS_TIME
^ permalink raw reply [flat|nested] 199+ messages in thread* [PATCH for review] [11/145] x86_64: Add ppoll/pselect syscalls
[not found] <20060810 935.775038000@suse.de>
` (9 preceding siblings ...)
2006-08-10 19:35 ` [PATCH for review] [10/145] x86_64: Add TIF_RESTORE_SIGMASK Andi Kleen
@ 2006-08-10 19:35 ` Andi Kleen
2006-08-10 19:35 ` [PATCH for review] [12/145] i386/x86-64: Remove un/set_nmi_callback and reserve/release_lapic_nmi functions Andi Kleen
` (134 subsequent siblings)
145 siblings, 0 replies; 199+ messages in thread
From: Andi Kleen @ 2006-08-10 19:35 UTC (permalink / raw)
r
Needed TIF_RESTORE_SIGMASK first
Signed-off-by: Andi Kleen <ak@suse.de>
---
arch/x86_64/ia32/ia32entry.S | 4 ++--
include/asm-x86_64/unistd.h | 4 ++--
2 files changed, 4 insertions(+), 4 deletions(-)
Index: linux/arch/x86_64/ia32/ia32entry.S
===================================================================
--- linux.orig/arch/x86_64/ia32/ia32entry.S
+++ linux/arch/x86_64/ia32/ia32entry.S
@@ -703,8 +703,8 @@ ia32_sys_call_table:
.quad sys_readlinkat /* 305 */
.quad sys_fchmodat
.quad sys_faccessat
- .quad quiet_ni_syscall /* pselect6 for now */
- .quad quiet_ni_syscall /* ppoll for now */
+ .quad compat_sys_pselect6
+ .quad compat_sys_ppoll
.quad sys_unshare /* 310 */
.quad compat_sys_set_robust_list
.quad compat_sys_get_robust_list
Index: linux/include/asm-x86_64/unistd.h
===================================================================
--- linux.orig/include/asm-x86_64/unistd.h
+++ linux/include/asm-x86_64/unistd.h
@@ -600,9 +600,9 @@ __SYSCALL(__NR_fchmodat, sys_fchmodat)
#define __NR_faccessat 269
__SYSCALL(__NR_faccessat, sys_faccessat)
#define __NR_pselect6 270
-__SYSCALL(__NR_pselect6, sys_ni_syscall) /* for now */
+__SYSCALL(__NR_pselect6, sys_pselect6)
#define __NR_ppoll 271
-__SYSCALL(__NR_ppoll, sys_ni_syscall) /* for now */
+__SYSCALL(__NR_ppoll, sys_ppoll)
#define __NR_unshare 272
__SYSCALL(__NR_unshare, sys_unshare)
#define __NR_set_robust_list 273
^ permalink raw reply [flat|nested] 199+ messages in thread* [PATCH for review] [12/145] i386/x86-64: Remove un/set_nmi_callback and reserve/release_lapic_nmi functions
[not found] <20060810 935.775038000@suse.de>
` (10 preceding siblings ...)
2006-08-10 19:35 ` [PATCH for review] [11/145] x86_64: Add ppoll/pselect syscalls Andi Kleen
@ 2006-08-10 19:35 ` Andi Kleen
2006-08-10 19:35 ` [PATCH for review] [13/145] x86_64: Add abilty to enable/disable nmi watchdog with sysctl Andi Kleen
` (133 subsequent siblings)
145 siblings, 0 replies; 199+ messages in thread
From: Andi Kleen @ 2006-08-10 19:35 UTC (permalink / raw)
r
From: dzickus <dzickus@redhat.com>
Removes the un/set_nmi_callback and reserve/release_lapic_nmi functions as
they are no longer needed. The various subsystems are modified to register
with the die_notifier instead.
Also includes compile fixes by Andrew Morton.
Signed-off-by: Don Zickus <dzickus@redhat.com>
Signed-off-by: Andi Kleen <ak@suse.de>
---
arch/i386/kernel/crash.c | 20 ++++++-
arch/i386/kernel/nmi.c | 85 +++---------------------------
arch/i386/kernel/traps.c | 23 --------
arch/i386/oprofile/nmi_int.c | 47 ++++++++++-------
arch/i386/oprofile/nmi_timer_int.c | 33 +++++++++--
arch/x86_64/kernel/crash.c | 20 ++++++-
arch/x86_64/kernel/nmi.c | 102 ++-----------------------------------
include/asm-i386/nmi.h | 21 +------
include/asm-x86_64/nmi.h | 21 -------
kernel/sysctl.c | 4 -
10 files changed, 116 insertions(+), 260 deletions(-)
Index: linux/arch/x86_64/kernel/nmi.c
===================================================================
--- linux.orig/arch/x86_64/kernel/nmi.c
+++ linux/arch/x86_64/kernel/nmi.c
@@ -41,20 +41,6 @@ static DEFINE_PER_CPU(unsigned, evntsel_
*/
#define NMI_MAX_COUNTER_BITS 66
-/*
- * lapic_nmi_owner tracks the ownership of the lapic NMI hardware:
- * - it may be reserved by some other driver, or not
- * - when not reserved by some other driver, it may be used for
- * the NMI watchdog, or not
- *
- * This is maintained separately from nmi_active because the NMI
- * watchdog may also be driven from the I/O APIC timer.
- */
-static DEFINE_SPINLOCK(lapic_nmi_owner_lock);
-static unsigned int lapic_nmi_owner;
-#define LAPIC_NMI_WATCHDOG (1<<0)
-#define LAPIC_NMI_RESERVED (1<<1)
-
/* nmi_active:
* >0: the lapic NMI watchdog is active, but can be disabled
* <0: the lapic NMI watchdog has not been set up, and cannot
@@ -321,33 +307,6 @@ static void enable_lapic_nmi_watchdog(vo
touch_nmi_watchdog();
}
-int reserve_lapic_nmi(void)
-{
- unsigned int old_owner;
-
- spin_lock(&lapic_nmi_owner_lock);
- old_owner = lapic_nmi_owner;
- lapic_nmi_owner |= LAPIC_NMI_RESERVED;
- spin_unlock(&lapic_nmi_owner_lock);
- if (old_owner & LAPIC_NMI_RESERVED)
- return -EBUSY;
- if (old_owner & LAPIC_NMI_WATCHDOG)
- disable_lapic_nmi_watchdog();
- return 0;
-}
-
-void release_lapic_nmi(void)
-{
- unsigned int new_owner;
-
- spin_lock(&lapic_nmi_owner_lock);
- new_owner = lapic_nmi_owner & ~LAPIC_NMI_RESERVED;
- lapic_nmi_owner = new_owner;
- spin_unlock(&lapic_nmi_owner_lock);
- if (new_owner & LAPIC_NMI_WATCHDOG)
- enable_lapic_nmi_watchdog();
-}
-
void disable_timer_nmi_watchdog(void)
{
BUG_ON(nmi_watchdog != NMI_IO_APIC);
@@ -762,13 +721,6 @@ done:
return rc;
}
-static __kprobes int dummy_nmi_callback(struct pt_regs * regs, int cpu)
-{
- return 0;
-}
-
-static nmi_callback_t nmi_callback = dummy_nmi_callback;
-
asmlinkage __kprobes void do_nmi(struct pt_regs * regs, long error_code)
{
nmi_enter();
@@ -779,21 +731,12 @@ asmlinkage __kprobes void do_nmi(struct
int do_nmi_callback(struct pt_regs * regs, int cpu)
{
- return rcu_dereference(nmi_callback)(regs, cpu);
-}
-
-void set_nmi_callback(nmi_callback_t callback)
-{
- vmalloc_sync_all();
- rcu_assign_pointer(nmi_callback, callback);
-}
-EXPORT_SYMBOL_GPL(set_nmi_callback);
-
-void unset_nmi_callback(void)
-{
- nmi_callback = dummy_nmi_callback;
+#ifdef CONFIG_SYSCTL
+ if (unknown_nmi_panic)
+ return unknown_nmi_panic_callback(regs, cpu);
+#endif
+ return 0;
}
-EXPORT_SYMBOL_GPL(unset_nmi_callback);
#ifdef CONFIG_SYSCTL
@@ -802,37 +745,8 @@ static int unknown_nmi_panic_callback(st
unsigned char reason = get_nmi_reason();
char buf[64];
- if (!(reason & 0xc0)) {
- sprintf(buf, "NMI received for unknown reason %02x\n", reason);
- die_nmi(buf,regs);
- }
- return 0;
-}
-
-/*
- * proc handler for /proc/sys/kernel/unknown_nmi_panic
- */
-int proc_unknown_nmi_panic(struct ctl_table *table, int write, struct file *file,
- void __user *buffer, size_t *length, loff_t *ppos)
-{
- int old_state;
-
- old_state = unknown_nmi_panic;
- proc_dointvec(table, write, file, buffer, length, ppos);
- if (!!old_state == !!unknown_nmi_panic)
- return 0;
-
- if (unknown_nmi_panic) {
- if (reserve_lapic_nmi() < 0) {
- unknown_nmi_panic = 0;
- return -EBUSY;
- } else {
- set_nmi_callback(unknown_nmi_panic_callback);
- }
- } else {
- release_lapic_nmi();
- unset_nmi_callback();
- }
+ sprintf(buf, "NMI received for unknown reason %02x\n", reason);
+ die_nmi(buf,regs);
return 0;
}
@@ -846,8 +760,6 @@ EXPORT_SYMBOL(reserve_perfctr_nmi);
EXPORT_SYMBOL(release_perfctr_nmi);
EXPORT_SYMBOL(reserve_evntsel_nmi);
EXPORT_SYMBOL(release_evntsel_nmi);
-EXPORT_SYMBOL(reserve_lapic_nmi);
-EXPORT_SYMBOL(release_lapic_nmi);
EXPORT_SYMBOL(disable_timer_nmi_watchdog);
EXPORT_SYMBOL(enable_timer_nmi_watchdog);
EXPORT_SYMBOL(touch_nmi_watchdog);
Index: linux/include/asm-x86_64/nmi.h
===================================================================
--- linux.orig/include/asm-x86_64/nmi.h
+++ linux/include/asm-x86_64/nmi.h
@@ -7,25 +7,6 @@
#include <linux/pm.h>
#include <asm/io.h>
-struct pt_regs;
-
-typedef int (*nmi_callback_t)(struct pt_regs * regs, int cpu);
-
-/**
- * set_nmi_callback
- *
- * Set a handler for an NMI. Only one handler may be
- * set. Return 1 if the NMI was handled.
- */
-void set_nmi_callback(nmi_callback_t callback);
-
-/**
- * unset_nmi_callback
- *
- * Remove the handler previously set.
- */
-void unset_nmi_callback(void);
-
/**
* do_nmi_callback
*
@@ -72,8 +53,6 @@ extern int reserve_evntsel_nmi(unsigned
extern void release_evntsel_nmi(unsigned int);
extern void setup_apic_nmi_watchdog (void *);
-extern int reserve_lapic_nmi(void);
-extern void release_lapic_nmi(void);
extern void disable_timer_nmi_watchdog(void);
extern void enable_timer_nmi_watchdog(void);
extern int nmi_watchdog_tick (struct pt_regs * regs, unsigned reason);
Index: linux/arch/i386/oprofile/nmi_int.c
===================================================================
--- linux.orig/arch/i386/oprofile/nmi_int.c
+++ linux/arch/i386/oprofile/nmi_int.c
@@ -17,14 +17,15 @@
#include <asm/nmi.h>
#include <asm/msr.h>
#include <asm/apic.h>
+#include <asm/kdebug.h>
#include "op_counter.h"
#include "op_x86_model.h"
-
+
static struct op_x86_model_spec const * model;
static struct op_msrs cpu_msrs[NR_CPUS];
static unsigned long saved_lvtpc[NR_CPUS];
-
+
static int nmi_start(void);
static void nmi_stop(void);
@@ -82,13 +83,24 @@ static void exit_driverfs(void)
#define exit_driverfs() do { } while (0)
#endif /* CONFIG_PM */
-
-static int nmi_callback(struct pt_regs * regs, int cpu)
+int profile_exceptions_notify(struct notifier_block *self,
+ unsigned long val, void *data)
{
- return model->check_ctrs(regs, &cpu_msrs[cpu]);
+ struct die_args *args = (struct die_args *)data;
+ int ret = NOTIFY_DONE;
+ int cpu = smp_processor_id();
+
+ switch(val) {
+ case DIE_NMI:
+ if (model->check_ctrs(args->regs, &cpu_msrs[cpu]))
+ ret = NOTIFY_STOP;
+ break;
+ default:
+ break;
+ }
+ return ret;
}
-
-
+
static void nmi_cpu_save_registers(struct op_msrs * msrs)
{
unsigned int const nr_ctrs = model->num_counters;
@@ -174,27 +186,29 @@ static void nmi_cpu_setup(void * dummy)
apic_write(APIC_LVTPC, APIC_DM_NMI);
}
+static struct notifier_block profile_exceptions_nb = {
+ .notifier_call = profile_exceptions_notify,
+ .next = NULL,
+ .priority = 0
+};
static int nmi_setup(void)
{
+ int err=0;
+
if (!allocate_msrs())
return -ENOMEM;
- /* We walk a thin line between law and rape here.
- * We need to be careful to install our NMI handler
- * without actually triggering any NMIs as this will
- * break the core code horrifically.
- */
- if (reserve_lapic_nmi() < 0) {
+ if ((err = register_die_notifier(&profile_exceptions_nb))){
free_msrs();
- return -EBUSY;
+ return err;
}
+
/* We need to serialize save and setup for HT because the subset
* of msrs are distinct for save and setup operations
*/
on_each_cpu(nmi_save_registers, NULL, 0, 1);
on_each_cpu(nmi_cpu_setup, NULL, 0, 1);
- set_nmi_callback(nmi_callback);
nmi_enabled = 1;
return 0;
}
@@ -250,8 +264,7 @@ static void nmi_shutdown(void)
{
nmi_enabled = 0;
on_each_cpu(nmi_cpu_shutdown, NULL, 0, 1);
- unset_nmi_callback();
- release_lapic_nmi();
+ unregister_die_notifier(&profile_exceptions_nb);
free_msrs();
}
Index: linux/arch/i386/oprofile/nmi_timer_int.c
===================================================================
--- linux.orig/arch/i386/oprofile/nmi_timer_int.c
+++ linux/arch/i386/oprofile/nmi_timer_int.c
@@ -17,32 +17,49 @@
#include <asm/nmi.h>
#include <asm/apic.h>
#include <asm/ptrace.h>
+#include <asm/kdebug.h>
-static int nmi_timer_callback(struct pt_regs * regs, int cpu)
+int profile_timer_exceptions_notify(struct notifier_block *self,
+ unsigned long val, void *data)
{
- oprofile_add_sample(regs, 0);
- return 1;
+ struct die_args *args = (struct die_args *)data;
+ int ret = NOTIFY_DONE;
+
+ switch(val) {
+ case DIE_NMI:
+ oprofile_add_sample(args->regs, 0);
+ ret = NOTIFY_STOP;
+ break;
+ default:
+ break;
+ }
+ return ret;
}
+static struct notifier_block profile_timer_exceptions_nb = {
+ .notifier_call = profile_timer_exceptions_notify,
+ .next = NULL,
+ .priority = 0
+};
+
static int timer_start(void)
{
- disable_timer_nmi_watchdog();
- set_nmi_callback(nmi_timer_callback);
+ if (register_die_notifier(&profile_timer_exceptions_nb))
+ return 1;
return 0;
}
static void timer_stop(void)
{
- enable_timer_nmi_watchdog();
- unset_nmi_callback();
+ unregister_die_notifier(&profile_timer_exceptions_nb);
synchronize_sched(); /* Allow already-started NMIs to complete. */
}
int __init op_nmi_timer_init(struct oprofile_operations * ops)
{
- if (atomic_read(&nmi_active) <= 0)
+ if ((nmi_watchdog != NMI_IO_APIC) || (atomic_read(&nmi_active) <= 0))
return -ENODEV;
ops->start = timer_start;
Index: linux/arch/i386/kernel/nmi.c
===================================================================
--- linux.orig/arch/i386/kernel/nmi.c
+++ linux/arch/i386/kernel/nmi.c
@@ -42,20 +42,6 @@ static DEFINE_PER_CPU(unsigned long, evn
*/
#define NMI_MAX_COUNTER_BITS 66
-/*
- * lapic_nmi_owner tracks the ownership of the lapic NMI hardware:
- * - it may be reserved by some other driver, or not
- * - when not reserved by some other driver, it may be used for
- * the NMI watchdog, or not
- *
- * This is maintained separately from nmi_active because the NMI
- * watchdog may also be driven from the I/O APIC timer.
- */
-static DEFINE_SPINLOCK(lapic_nmi_owner_lock);
-static unsigned int lapic_nmi_owner;
-#define LAPIC_NMI_WATCHDOG (1<<0)
-#define LAPIC_NMI_RESERVED (1<<1)
-
/* nmi_active:
* >0: the lapic NMI watchdog is active, but can be disabled
* <0: the lapic NMI watchdog has not been set up, and cannot
@@ -325,33 +311,6 @@ static void enable_lapic_nmi_watchdog(vo
touch_nmi_watchdog();
}
-int reserve_lapic_nmi(void)
-{
- unsigned int old_owner;
-
- spin_lock(&lapic_nmi_owner_lock);
- old_owner = lapic_nmi_owner;
- lapic_nmi_owner |= LAPIC_NMI_RESERVED;
- spin_unlock(&lapic_nmi_owner_lock);
- if (old_owner & LAPIC_NMI_RESERVED)
- return -EBUSY;
- if (old_owner & LAPIC_NMI_WATCHDOG)
- disable_lapic_nmi_watchdog();
- return 0;
-}
-
-void release_lapic_nmi(void)
-{
- unsigned int new_owner;
-
- spin_lock(&lapic_nmi_owner_lock);
- new_owner = lapic_nmi_owner & ~LAPIC_NMI_RESERVED;
- lapic_nmi_owner = new_owner;
- spin_unlock(&lapic_nmi_owner_lock);
- if (new_owner & LAPIC_NMI_WATCHDOG)
- enable_lapic_nmi_watchdog();
-}
-
void disable_timer_nmi_watchdog(void)
{
BUG_ON(nmi_watchdog != NMI_IO_APIC);
@@ -866,6 +825,15 @@ done:
return rc;
}
+int do_nmi_callback(struct pt_regs * regs, int cpu)
+{
+#ifdef CONFIG_SYSCTL
+ if (unknown_nmi_panic)
+ return unknown_nmi_panic_callback(regs, cpu);
+#endif
+ return 0;
+}
+
#ifdef CONFIG_SYSCTL
static int unknown_nmi_panic_callback(struct pt_regs *regs, int cpu)
@@ -873,37 +841,8 @@ static int unknown_nmi_panic_callback(st
unsigned char reason = get_nmi_reason();
char buf[64];
- if (!(reason & 0xc0)) {
- sprintf(buf, "NMI received for unknown reason %02x\n", reason);
- die_nmi(regs, buf);
- }
- return 0;
-}
-
-/*
- * proc handler for /proc/sys/kernel/unknown_nmi_panic
- */
-int proc_unknown_nmi_panic(ctl_table *table, int write, struct file *file,
- void __user *buffer, size_t *length, loff_t *ppos)
-{
- int old_state;
-
- old_state = unknown_nmi_panic;
- proc_dointvec(table, write, file, buffer, length, ppos);
- if (!!old_state == !!unknown_nmi_panic)
- return 0;
-
- if (unknown_nmi_panic) {
- if (reserve_lapic_nmi() < 0) {
- unknown_nmi_panic = 0;
- return -EBUSY;
- } else {
- set_nmi_callback(unknown_nmi_panic_callback);
- }
- } else {
- release_lapic_nmi();
- unset_nmi_callback();
- }
+ sprintf(buf, "NMI received for unknown reason %02x\n", reason);
+ die_nmi(regs, buf);
return 0;
}
@@ -917,7 +856,5 @@ EXPORT_SYMBOL(reserve_perfctr_nmi);
EXPORT_SYMBOL(release_perfctr_nmi);
EXPORT_SYMBOL(reserve_evntsel_nmi);
EXPORT_SYMBOL(release_evntsel_nmi);
-EXPORT_SYMBOL(reserve_lapic_nmi);
-EXPORT_SYMBOL(release_lapic_nmi);
EXPORT_SYMBOL(disable_timer_nmi_watchdog);
EXPORT_SYMBOL(enable_timer_nmi_watchdog);
Index: linux/arch/i386/kernel/traps.c
===================================================================
--- linux.orig/arch/i386/kernel/traps.c
+++ linux/arch/i386/kernel/traps.c
@@ -703,13 +703,6 @@ void die_nmi (struct pt_regs *regs, cons
do_exit(SIGSEGV);
}
-static int dummy_nmi_callback(struct pt_regs * regs, int cpu)
-{
- return 0;
-}
-
-static nmi_callback_t nmi_callback = dummy_nmi_callback;
-
static void default_do_nmi(struct pt_regs * regs)
{
unsigned char reason = 0;
@@ -729,9 +722,10 @@ static void default_do_nmi(struct pt_reg
*/
if (nmi_watchdog_tick(regs, reason))
return;
+ if (!do_nmi_callback(regs, smp_processor_id()))
#endif
- if (!rcu_dereference(nmi_callback)(regs, smp_processor_id()))
unknown_nmi_error(reason, regs);
+
return;
}
if (notify_die(DIE_NMI, "nmi", regs, reason, 2, SIGINT) == NOTIFY_STOP)
@@ -762,19 +756,6 @@ fastcall void do_nmi(struct pt_regs * re
nmi_exit();
}
-void set_nmi_callback(nmi_callback_t callback)
-{
- vmalloc_sync_all();
- rcu_assign_pointer(nmi_callback, callback);
-}
-EXPORT_SYMBOL_GPL(set_nmi_callback);
-
-void unset_nmi_callback(void)
-{
- nmi_callback = dummy_nmi_callback;
-}
-EXPORT_SYMBOL_GPL(unset_nmi_callback);
-
#ifdef CONFIG_KPROBES
fastcall void __kprobes do_int3(struct pt_regs *regs, long error_code)
{
Index: linux/include/asm-i386/nmi.h
===================================================================
--- linux.orig/include/asm-i386/nmi.h
+++ linux/include/asm-i386/nmi.h
@@ -6,24 +6,13 @@
#include <linux/pm.h>
-struct pt_regs;
-
-typedef int (*nmi_callback_t)(struct pt_regs * regs, int cpu);
-
-/**
- * set_nmi_callback
- *
- * Set a handler for an NMI. Only one handler may be
- * set. Return 1 if the NMI was handled.
- */
-void set_nmi_callback(nmi_callback_t callback);
-
/**
- * unset_nmi_callback
+ * do_nmi_callback
*
- * Remove the handler previously set.
+ * Check to see if a callback exists and execute it. Return 1
+ * if the handler exists and was handled successfully.
*/
-void unset_nmi_callback(void);
+int do_nmi_callback(struct pt_regs *regs, int cpu);
extern int avail_to_resrv_perfctr_nmi_bit(unsigned int);
extern int avail_to_resrv_perfctr_nmi(unsigned int);
@@ -33,8 +22,6 @@ extern int reserve_evntsel_nmi(unsigned
extern void release_evntsel_nmi(unsigned int);
extern void setup_apic_nmi_watchdog (void *);
-extern int reserve_lapic_nmi(void);
-extern void release_lapic_nmi(void);
extern void disable_timer_nmi_watchdog(void);
extern void enable_timer_nmi_watchdog(void);
extern int nmi_watchdog_tick (struct pt_regs * regs, unsigned reason);
Index: linux/kernel/sysctl.c
===================================================================
--- linux.orig/kernel/sysctl.c
+++ linux/kernel/sysctl.c
@@ -76,8 +76,6 @@ extern int compat_log;
#if defined(CONFIG_X86_LOCAL_APIC) && defined(CONFIG_X86)
int unknown_nmi_panic;
-extern int proc_unknown_nmi_panic(ctl_table *, int, struct file *,
- void __user *, size_t *, loff_t *);
#endif
/* this is needed for the proc_dointvec_minmax for [fs_]overflow UID and GID */
@@ -628,7 +626,7 @@ static ctl_table kern_table[] = {
.data = &unknown_nmi_panic,
.maxlen = sizeof (int),
.mode = 0644,
- .proc_handler = &proc_unknown_nmi_panic,
+ .proc_handler = &proc_dointvec,
},
#endif
#if defined(CONFIG_X86)
Index: linux/arch/i386/kernel/crash.c
===================================================================
--- linux.orig/arch/i386/kernel/crash.c
+++ linux/arch/i386/kernel/crash.c
@@ -22,6 +22,8 @@
#include <asm/nmi.h>
#include <asm/hw_irq.h>
#include <asm/apic.h>
+#include <asm/kdebug.h>
+
#include <mach_ipi.h>
@@ -93,9 +95,18 @@ static void crash_save_self(struct pt_re
#if defined(CONFIG_SMP) && defined(CONFIG_X86_LOCAL_APIC)
static atomic_t waiting_for_crash_ipi;
-static int crash_nmi_callback(struct pt_regs *regs, int cpu)
+static int crash_nmi_callback(struct notifier_block *self,
+ unsigned long val, void *data)
{
+ struct pt_regs *regs;
struct pt_regs fixed_regs;
+ int cpu;
+
+ if (val != DIE_NMI)
+ return NOTIFY_OK;
+
+ regs = ((struct die_args *)data)->regs;
+ cpu = raw_smp_processor_id();
/* Don't do anything if this handler is invoked on crashing cpu.
* Otherwise, system will completely hang. Crashing cpu can get
@@ -125,13 +136,18 @@ static void smp_send_nmi_allbutself(void
send_IPI_allbutself(NMI_VECTOR);
}
+static struct notifier_block crash_nmi_nb = {
+ .notifier_call = crash_nmi_callback,
+};
+
static void nmi_shootdown_cpus(void)
{
unsigned long msecs;
atomic_set(&waiting_for_crash_ipi, num_online_cpus() - 1);
/* Would it be better to replace the trap vector here? */
- set_nmi_callback(crash_nmi_callback);
+ if (register_die_notifier(&crash_nmi_nb))
+ return; /* return what? */
/* Ensure the new callback function is set before sending
* out the NMI
*/
Index: linux/arch/x86_64/kernel/crash.c
===================================================================
--- linux.orig/arch/x86_64/kernel/crash.c
+++ linux/arch/x86_64/kernel/crash.c
@@ -23,6 +23,7 @@
#include <asm/nmi.h>
#include <asm/hw_irq.h>
#include <asm/mach_apic.h>
+#include <asm/kdebug.h>
/* This keeps a track of which one is crashing cpu. */
static int crashing_cpu;
@@ -95,8 +96,18 @@ static void crash_save_self(struct pt_re
#ifdef CONFIG_SMP
static atomic_t waiting_for_crash_ipi;
-static int crash_nmi_callback(struct pt_regs *regs, int cpu)
+static int crash_nmi_callback(struct notifier_block *self,
+ unsigned long val, void *data)
{
+ struct pt_regs *regs;
+ int cpu;
+
+ if (val != DIE_NMI)
+ return NOTIFY_OK;
+
+ regs = ((struct die_args *)data)->regs;
+ cpu = raw_smp_processor_id();
+
/*
* Don't do anything if this handler is invoked on crashing cpu.
* Otherwise, system will completely hang. Crashing cpu can get
@@ -127,12 +138,17 @@ static void smp_send_nmi_allbutself(void
* cpu hotplug shouldn't matter.
*/
+static struct notifier_block crash_nmi_nb = {
+ .notifier_call = crash_nmi_callback,
+};
+
static void nmi_shootdown_cpus(void)
{
unsigned long msecs;
atomic_set(&waiting_for_crash_ipi, num_online_cpus() - 1);
- set_nmi_callback(crash_nmi_callback);
+ if (register_die_notifier(&crash_nmi_nb))
+ return; /* return what? */
/*
* Ensure the new callback function is set before sending
^ permalink raw reply [flat|nested] 199+ messages in thread* [PATCH for review] [13/145] x86_64: Add abilty to enable/disable nmi watchdog with sysctl
[not found] <20060810 935.775038000@suse.de>
` (11 preceding siblings ...)
2006-08-10 19:35 ` [PATCH for review] [12/145] i386/x86-64: Remove un/set_nmi_callback and reserve/release_lapic_nmi functions Andi Kleen
@ 2006-08-10 19:35 ` Andi Kleen
2006-08-10 19:48 ` Oleg Verych
2006-08-10 19:35 ` [PATCH for review] [14/145] x86_64: Add abilty to enable/disable nmi watchdog from procfs (update) Andi Kleen
` (132 subsequent siblings)
145 siblings, 1 reply; 199+ messages in thread
From: Andi Kleen @ 2006-08-10 19:35 UTC (permalink / raw)
r
From: dzickus <dzickus@redhat.com>
Adds a new /proc/sys/kernel/nmi call that will enable/disable the nmi
watchdog.
Signed-off-by: Don Zickus <dzickus@redhat.com>
Signed-off-by: Andi Kleen <ak@suse.de>
---
arch/i386/kernel/nmi.c | 52 +++++++++++++++++++++++++++++++++++++++++++++++
arch/x86_64/kernel/nmi.c | 48 +++++++++++++++++++++++++++++++++++++++++++
include/asm-i386/nmi.h | 1
include/asm-x86_64/nmi.h | 1
include/linux/sysctl.h | 1
kernel/sysctl.c | 11 +++++++++
6 files changed, 114 insertions(+)
Index: linux/arch/i386/kernel/nmi.c
===================================================================
--- linux.orig/arch/i386/kernel/nmi.c
+++ linux/arch/i386/kernel/nmi.c
@@ -846,6 +846,58 @@ static int unknown_nmi_panic_callback(st
return 0;
}
+/*
+ * proc handler for /proc/sys/kernel/nmi_watchdog
+ */
+int proc_nmi_enabled(struct ctl_table *table, int write, struct file *file,
+ void __user *buffer, size_t *length, loff_t *ppos)
+{
+ int old_state;
+
+ nmi_watchdog_enabled = (atomic_read(&nmi_active) > 0) ? 1 : 0;
+ old_state = nmi_watchdog_enabled;
+ proc_dointvec(table, write, file, buffer, length, ppos);
+ if (!!old_state == !!nmi_watchdog_enabled)
+ return 0;
+
+ if (atomic_read(&nmi_active) < 0) {
+ printk(KERN_WARNING "NMI watchdog is permanently disabled\n");
+ return -EINVAL;
+ }
+
+ if (nmi_watchdog == NMI_DEFAULT) {
+ if (nmi_known_cpu() > 0)
+ nmi_watchdog = NMI_LOCAL_APIC;
+ else
+ nmi_watchdog = NMI_IO_APIC;
+ }
+
+ if (nmi_watchdog == NMI_LOCAL_APIC)
+ {
+ if (nmi_watchdog_enabled)
+ enable_lapic_nmi_watchdog();
+ else
+ disable_lapic_nmi_watchdog();
+ } else if (nmi_watchdog == NMI_IO_APIC) {
+ /* FIXME
+ * for some reason these functions don't work
+ */
+ printk("Can not enable/disable NMI on IO APIC\n");
+ return -EINVAL;
+#if 0
+ if (nmi_watchdog_enabled)
+ enable_timer_nmi_watchdog();
+ else
+ disable_timer_nmi_watchdog();
+#endif
+ } else {
+ printk( KERN_WARNING
+ "NMI watchdog doesn't know what hardware to touch\n");
+ return -EIO;
+ }
+ return 0;
+}
+
#endif
EXPORT_SYMBOL(nmi_active);
Index: linux/arch/x86_64/kernel/nmi.c
===================================================================
--- linux.orig/arch/x86_64/kernel/nmi.c
+++ linux/arch/x86_64/kernel/nmi.c
@@ -750,6 +750,54 @@ static int unknown_nmi_panic_callback(st
return 0;
}
+/*
+ * proc handler for /proc/sys/kernel/nmi
+ */
+int proc_nmi_enabled(struct ctl_table *table, int write, struct file *file,
+ void __user *buffer, size_t *length, loff_t *ppos)
+{
+ int old_state;
+
+ nmi_watchdog_enabled = (atomic_read(&nmi_active) > 0) ? 1 : 0;
+ old_state = nmi_watchdog_enabled;
+ proc_dointvec(table, write, file, buffer, length, ppos);
+ if (!!old_state == !!nmi_watchdog_enabled)
+ return 0;
+
+ if (atomic_read(&nmi_active) < 0) {
+ printk( KERN_WARNING "NMI watchdog is permanently disabled\n");
+ return -EINVAL;
+ }
+
+ /* if nmi_watchdog is not set yet, then set it */
+ nmi_watchdog_default();
+
+ if (nmi_watchdog == NMI_LOCAL_APIC)
+ {
+ if (nmi_watchdog_enabled)
+ enable_lapic_nmi_watchdog();
+ else
+ disable_lapic_nmi_watchdog();
+ } else if (nmi_watchdog == NMI_IO_APIC) {
+ /* FIXME
+ * for some reason these functions don't work
+ */
+ printk("Can not enable/disable NMI on IO APIC\n");
+ return -EIO;
+#if 0
+ if (nmi_watchdog_enabled)
+ enable_timer_nmi_watchdog();
+ else
+ disable_timer_nmi_watchdog();
+#endif
+ } else {
+ printk(KERN_WARNING
+ "NMI watchdog doesn't know what hardware to touch\n");
+ return -EIO;
+ }
+ return 0;
+}
+
#endif
EXPORT_SYMBOL(nmi_active);
Index: linux/include/asm-i386/nmi.h
===================================================================
--- linux.orig/include/asm-i386/nmi.h
+++ linux/include/asm-i386/nmi.h
@@ -14,6 +14,7 @@
*/
int do_nmi_callback(struct pt_regs *regs, int cpu);
+extern int nmi_watchdog_enabled;
extern int avail_to_resrv_perfctr_nmi_bit(unsigned int);
extern int avail_to_resrv_perfctr_nmi(unsigned int);
extern int reserve_perfctr_nmi(unsigned int);
Index: linux/include/asm-x86_64/nmi.h
===================================================================
--- linux.orig/include/asm-x86_64/nmi.h
+++ linux/include/asm-x86_64/nmi.h
@@ -43,6 +43,7 @@ extern void die_nmi(char *str, struct pt
extern int panic_on_timeout;
extern int unknown_nmi_panic;
+extern int nmi_watchdog_enabled;
extern int check_nmi_watchdog(void);
extern int avail_to_resrv_perfctr_nmi_bit(unsigned int);
Index: linux/include/linux/sysctl.h
===================================================================
--- linux.orig/include/linux/sysctl.h
+++ linux/include/linux/sysctl.h
@@ -150,6 +150,7 @@ enum
KERN_IA64_UNALIGNED=72, /* int: ia64 unaligned userland trap enable */
KERN_COMPAT_LOG=73, /* int: print compat layer messages */
KERN_MAX_LOCK_DEPTH=74,
+ KERN_NMI_WATCHDOG=75, /* int: enable/disable nmi watchdog */
};
Index: linux/kernel/sysctl.c
===================================================================
--- linux.orig/kernel/sysctl.c
+++ linux/kernel/sysctl.c
@@ -76,6 +76,9 @@ extern int compat_log;
#if defined(CONFIG_X86_LOCAL_APIC) && defined(CONFIG_X86)
int unknown_nmi_panic;
+int nmi_watchdog_enabled;
+extern int proc_nmi_enabled(struct ctl_table *, int , struct file *,
+ void __user *, size_t *, loff_t *);
#endif
/* this is needed for the proc_dointvec_minmax for [fs_]overflow UID and GID */
@@ -628,6 +631,14 @@ static ctl_table kern_table[] = {
.mode = 0644,
.proc_handler = &proc_dointvec,
},
+ {
+ .ctl_name = KERN_NMI_WATCHDOG,
+ .procname = "nmi_watchdog",
+ .data = &nmi_watchdog_enabled,
+ .maxlen = sizeof (int),
+ .mode = 0644,
+ .proc_handler = &proc_nmi_enabled,
+ },
#endif
#if defined(CONFIG_X86)
{
^ permalink raw reply [flat|nested] 199+ messages in thread* Re: [PATCH for review] [13/145] x86_64: Add abilty to enable/disable nmi watchdog with sysctl
2006-08-10 19:35 ` [PATCH for review] [13/145] x86_64: Add abilty to enable/disable nmi watchdog with sysctl Andi Kleen
@ 2006-08-10 19:48 ` Oleg Verych
2006-08-11 6:44 ` Andi Kleen
0 siblings, 1 reply; 199+ messages in thread
From: Oleg Verych @ 2006-08-10 19:48 UTC (permalink / raw)
To: linux-kernel
Andi Kleen wrote:
> r
>
> From: dzickus <dzickus@redhat.com>
>
> Adds a new /proc/sys/kernel/nmi call that will enable/disable the nmi
> watchdog.
>
> Signed-off-by: Don Zickus <dzickus@redhat.com>
> Signed-off-by: Andi Kleen <ak@suse.de>
>
> ---
> arch/i386/kernel/nmi.c | 52 +++++++++++++++++++++++++++++++++++++++++++++++
> arch/x86_64/kernel/nmi.c | 48 +++++++++++++++++++++++++++++++++++++++++++
> include/asm-i386/nmi.h | 1
> include/asm-x86_64/nmi.h | 1
> include/linux/sysctl.h | 1
> kernel/sysctl.c | 11 +++++++++
> 6 files changed, 114 insertions(+)
Hallo, Andi Kleen.
I'm a kernelnewbie, so any *answer* to this will be very appreciated.
Files 'nmi.c' from both archs don't match, obviously. But lets see, how.
cd /tmp/
diff -purN /tmp/ia32.c /tmp/amd64.c
--- /tmp/ia32.c 2006-08-10 21:12:19.292953750 +0200
+++ /tmp/amd64.c 2006-08-10 21:11:49.503092000 +0200
@@ -10,16 +10,12 @@
+ return 0;
+
+ if (atomic_read(&nmi_active) < 0) {
-+ printk(KERN_WARNING "NMI watchdog is permanently disabled\n");
++ printk( KERN_WARNING "NMI watchdog is permanently disabled\n");
+ return -EINVAL;
Something is wrong;
-+ if (nmi_watchdog == NMI_DEFAULT) {
-+ if (nmi_known_cpu() > 0)
-+ nmi_watchdog = NMI_LOCAL_APIC;
-+ else
-+ nmi_watchdog = NMI_IO_APIC;
-+ }
++ /* if nmi_watchdog is not set yet, then set it */
++ nmi_watchdog_default();
i don't know about nmi, but please drop a word why this is different in both files;
+
+ if (nmi_watchdog == NMI_LOCAL_APIC)
+ {
@@ -32,7 +28,7 @@
+ * for some reason these functions don't work
+ */
+ printk("Can not enable/disable NMI on IO APIC\n");
-+ return -EINVAL;
++ return -EIO;
and this;
+#if 0
+ if (nmi_watchdog_enabled)
+ enable_timer_nmi_watchdog();
@@ -40,7 +36,7 @@
+ disable_timer_nmi_watchdog();
+#endif
+ } else {
-+ printk( KERN_WARNING
++ printk(KERN_WARNING
dup.
+ "NMI watchdog doesn't know what hardware to touch\n");
+ return -EIO;
+ }
Maybe this must be one file for both archs ?
Thanks.
--
-o--=O`C
#oo'L O
<___=E M
^ permalink raw reply [flat|nested] 199+ messages in thread* Re: [PATCH for review] [13/145] x86_64: Add abilty to enable/disable nmi watchdog with sysctl
2006-08-10 19:48 ` Oleg Verych
@ 2006-08-11 6:44 ` Andi Kleen
0 siblings, 0 replies; 199+ messages in thread
From: Andi Kleen @ 2006-08-11 6:44 UTC (permalink / raw)
To: Oleg Verych; +Cc: linux-kernel
> Files 'nmi.c' from both archs don't match, obviously. But lets see, how.
nmi.c for x86-64 only aims to support new hardware and is thus a somewhat
cleaner version of the i386 version. Also there are some differences how
it interfaces with the rest of the port.
> -+ if (nmi_watchdog == NMI_DEFAULT) {
> -+ if (nmi_known_cpu() > 0)
> -+ nmi_watchdog = NMI_LOCAL_APIC;
> -+ else
> -+ nmi_watchdog = NMI_IO_APIC;
> -+ }
> ++ /* if nmi_watchdog is not set yet, then set it */
> ++ nmi_watchdog_default();
>
> i don't know about nmi, but please drop a word why this is different in both files;
They've involving independently and not all changes are added to both.
In this case it was a x86-64 specific cleanup.
> Maybe this must be one file for both archs ?
No.
-Andi
^ permalink raw reply [flat|nested] 199+ messages in thread
* [PATCH for review] [14/145] x86_64: Add abilty to enable/disable nmi watchdog from procfs (update)
[not found] <20060810 935.775038000@suse.de>
` (12 preceding siblings ...)
2006-08-10 19:35 ` [PATCH for review] [13/145] x86_64: Add abilty to enable/disable nmi watchdog with sysctl Andi Kleen
@ 2006-08-10 19:35 ` Andi Kleen
2006-08-10 19:35 ` [PATCH for review] [15/145] x86_64: Allow users to force a panic on NMI Andi Kleen
` (131 subsequent siblings)
145 siblings, 0 replies; 199+ messages in thread
From: Andi Kleen @ 2006-08-10 19:35 UTC (permalink / raw)
r
From: Don Zickus <dzickus@redhat.com>
Adds a new /proc/sys/kernel/nmi_watchdog call that will enable/disable the
nmi watchdog.
By entering a non-zero value here, a user can enable the nmi watchdog to
monitor the online cpus in the system. By entering a zero value here, a
user can disable the nmi watchdog and free up a performance counter which
could then be utilized by the oprofile subsystem, otherwise oprofile may be
short a counter when in use.
Signed-off-by: Don Zickus <dzickus@redhat.com>
Signed-off-by: Andi Kleen <ak@suse.de>
Cc: Andi Kleen <ak@muc.de>
Signed-off-by: Andrew Morton <akpm@osdl.org>
---
Documentation/filesystems/proc.txt | 14 +++++++++-----
arch/i386/kernel/nmi.c | 21 ++++-----------------
arch/x86_64/kernel/nmi.c | 21 ++++-----------------
3 files changed, 17 insertions(+), 39 deletions(-)
Index: linux/arch/i386/kernel/nmi.c
===================================================================
--- linux.orig/arch/i386/kernel/nmi.c
+++ linux/arch/i386/kernel/nmi.c
@@ -847,7 +847,7 @@ static int unknown_nmi_panic_callback(st
}
/*
- * proc handler for /proc/sys/kernel/nmi_watchdog
+ * proc handler for /proc/sys/kernel/nmi
*/
int proc_nmi_enabled(struct ctl_table *table, int write, struct file *file,
void __user *buffer, size_t *length, loff_t *ppos)
@@ -861,8 +861,8 @@ int proc_nmi_enabled(struct ctl_table *t
return 0;
if (atomic_read(&nmi_active) < 0) {
- printk(KERN_WARNING "NMI watchdog is permanently disabled\n");
- return -EINVAL;
+ printk( KERN_WARNING "NMI watchdog is permanently disabled\n");
+ return -EIO;
}
if (nmi_watchdog == NMI_DEFAULT) {
@@ -872,24 +872,11 @@ int proc_nmi_enabled(struct ctl_table *t
nmi_watchdog = NMI_IO_APIC;
}
- if (nmi_watchdog == NMI_LOCAL_APIC)
- {
+ if (nmi_watchdog == NMI_LOCAL_APIC) {
if (nmi_watchdog_enabled)
enable_lapic_nmi_watchdog();
else
disable_lapic_nmi_watchdog();
- } else if (nmi_watchdog == NMI_IO_APIC) {
- /* FIXME
- * for some reason these functions don't work
- */
- printk("Can not enable/disable NMI on IO APIC\n");
- return -EINVAL;
-#if 0
- if (nmi_watchdog_enabled)
- enable_timer_nmi_watchdog();
- else
- disable_timer_nmi_watchdog();
-#endif
} else {
printk( KERN_WARNING
"NMI watchdog doesn't know what hardware to touch\n");
Index: linux/arch/x86_64/kernel/nmi.c
===================================================================
--- linux.orig/arch/x86_64/kernel/nmi.c
+++ linux/arch/x86_64/kernel/nmi.c
@@ -167,7 +167,7 @@ static __cpuinit inline int nmi_known_cp
}
/* Run after command line and cpu_init init, but before all other checks */
-void __cpuinit nmi_watchdog_default(void)
+void nmi_watchdog_default(void)
{
if (nmi_watchdog != NMI_DEFAULT)
return;
@@ -766,32 +766,19 @@ int proc_nmi_enabled(struct ctl_table *t
if (atomic_read(&nmi_active) < 0) {
printk( KERN_WARNING "NMI watchdog is permanently disabled\n");
- return -EINVAL;
+ return -EIO;
}
/* if nmi_watchdog is not set yet, then set it */
nmi_watchdog_default();
- if (nmi_watchdog == NMI_LOCAL_APIC)
- {
+ if (nmi_watchdog == NMI_LOCAL_APIC) {
if (nmi_watchdog_enabled)
enable_lapic_nmi_watchdog();
else
disable_lapic_nmi_watchdog();
- } else if (nmi_watchdog == NMI_IO_APIC) {
- /* FIXME
- * for some reason these functions don't work
- */
- printk("Can not enable/disable NMI on IO APIC\n");
- return -EIO;
-#if 0
- if (nmi_watchdog_enabled)
- enable_timer_nmi_watchdog();
- else
- disable_timer_nmi_watchdog();
-#endif
} else {
- printk(KERN_WARNING
+ printk( KERN_WARNING
"NMI watchdog doesn't know what hardware to touch\n");
return -EIO;
}
Index: linux/Documentation/filesystems/proc.txt
===================================================================
--- linux.orig/Documentation/filesystems/proc.txt
+++ linux/Documentation/filesystems/proc.txt
@@ -1124,11 +1124,15 @@ debugging information is displayed on co
NMI switch that most IA32 servers have fires unknown NMI up, for example.
If a system hangs up, try pressing the NMI switch.
-[NOTE]
- This function and oprofile share a NMI callback. Therefore this function
- cannot be enabled when oprofile is activated.
- And NMI watchdog will be disabled when the value in this file is set to
- non-zero.
+nmi_watchdog
+------------
+
+Enables/Disables the NMI watchdog on x86 systems. When the value is non-zero
+the NMI watchdog is enabled and will continuously test all online cpus to
+determine whether or not they are still functioning properly.
+
+Because the NMI watchdog shares registers with oprofile, by disabling the NMI
+watchdog, oprofile may have more registers to utilize.
2.4 /proc/sys/vm - The virtual memory subsystem
^ permalink raw reply [flat|nested] 199+ messages in thread* [PATCH for review] [15/145] x86_64: Allow users to force a panic on NMI
[not found] <20060810 935.775038000@suse.de>
` (13 preceding siblings ...)
2006-08-10 19:35 ` [PATCH for review] [14/145] x86_64: Add abilty to enable/disable nmi watchdog from procfs (update) Andi Kleen
@ 2006-08-10 19:35 ` Andi Kleen
2006-08-10 19:35 ` [PATCH for review] [16/145] x86_64: x86 clean up nmi panic messages Andi Kleen
` (130 subsequent siblings)
145 siblings, 0 replies; 199+ messages in thread
From: Andi Kleen @ 2006-08-10 19:35 UTC (permalink / raw)
r
From: Don Zickus <dzickus@redhat.com>
To quote Alan Cox:
The default Linux behaviour on an NMI of either memory or unknown is to
continue operation. For many environments such as scientific computing
it is preferable that the box is taken out and the error dealt with than
an uncorrected parity/ECC error get propogated.
A small number of systems do generate NMI's for bizarre random reasons
such as power management so the default is unchanged. In other respects
the new proc/sys entry works like the existing panic controls already in
that directory.
This is separate to the edac support - EDAC allows supported chipsets to
handle ECC errors well, this change allows unsupported cases to at least
panic rather than cause problems further down the line.
Signed-off-by: Don Zickus <dzickus@redhat.com>
Signed-off-by: Andi Kleen <ak@suse.de>
---
This is just a refreshed post of Alan's original patch
<http://www.ussg.iu.edu/hypermail/linux/kernel/0510.2/1208.html>, with
hopes this time it sticks. :)
It applies cleanly on top of my other nmi patches.
Cheers,
Don
---
arch/i386/kernel/traps.c | 6 ++++++
arch/x86_64/kernel/traps.c | 6 ++++++
include/linux/kernel.h | 1 +
include/linux/sysctl.h | 1 +
kernel/panic.c | 1 +
kernel/sysctl.c | 8 ++++++++
6 files changed, 23 insertions(+)
Index: linux/arch/i386/kernel/traps.c
===================================================================
--- linux.orig/arch/i386/kernel/traps.c
+++ linux/arch/i386/kernel/traps.c
@@ -632,6 +632,8 @@ static void mem_parity_error(unsigned ch
"to continue\n");
printk(KERN_EMERG "You probably have a hardware problem with your RAM "
"chips\n");
+ if (panic_on_unrecovered_nmi)
+ panic("NMI: Not continuing");
/* Clear and disable the memory parity error line. */
clear_mem_error(reason);
@@ -667,6 +669,10 @@ static void unknown_nmi_error(unsigned c
reason, smp_processor_id());
printk("Dazed and confused, but trying to continue\n");
printk("Do you have a strange power saving mode enabled?\n");
+
+ if (panic_on_unrecovered_nmi)
+ panic("NMI: Not continuing");
+
}
static DEFINE_SPINLOCK(nmi_print_lock);
Index: linux/arch/x86_64/kernel/traps.c
===================================================================
--- linux.orig/arch/x86_64/kernel/traps.c
+++ linux/arch/x86_64/kernel/traps.c
@@ -728,6 +728,8 @@ mem_parity_error(unsigned char reason, s
{
printk("Uhhuh. NMI received. Dazed and confused, but trying to continue\n");
printk("You probably have a hardware problem with your RAM chips\n");
+ if (panic_on_unrecovered_nmi)
+ panic("NMI: Not continuing");
/* Clear and disable the memory parity error line. */
reason = (reason & 0xf) | 4;
@@ -753,6 +755,10 @@ unknown_nmi_error(unsigned char reason,
{ printk("Uhhuh. NMI received for unknown reason %02x.\n", reason);
printk("Dazed and confused, but trying to continue\n");
printk("Do you have a strange power saving mode enabled?\n");
+
+ if (panic_on_unrecovered_nmi)
+ panic("NMI: Not continuing");
+
}
/* Runs on IST stack. This code must keep interrupts off all the time.
Index: linux/include/linux/kernel.h
===================================================================
--- linux.orig/include/linux/kernel.h
+++ linux/include/linux/kernel.h
@@ -186,6 +186,7 @@ extern void bust_spinlocks(int yes);
extern int oops_in_progress; /* If set, an oops, panic(), BUG() or die() is in progress */
extern int panic_timeout;
extern int panic_on_oops;
+extern int panic_on_unrecovered_nmi;
extern int tainted;
extern const char *print_tainted(void);
extern void add_taint(unsigned);
Index: linux/kernel/sysctl.c
===================================================================
--- linux.orig/kernel/sysctl.c
+++ linux/kernel/sysctl.c
@@ -642,6 +642,14 @@ static ctl_table kern_table[] = {
#endif
#if defined(CONFIG_X86)
{
+ .ctl_name = KERN_PANIC_ON_NMI,
+ .procname = "panic_on_unrecovered_nmi",
+ .data = &panic_on_unrecovered_nmi,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = &proc_dointvec,
+ },
+ {
.ctl_name = KERN_BOOTLOADER_TYPE,
.procname = "bootloader_type",
.data = &bootloader_type,
Index: linux/include/linux/sysctl.h
===================================================================
--- linux.orig/include/linux/sysctl.h
+++ linux/include/linux/sysctl.h
@@ -151,6 +151,7 @@ enum
KERN_COMPAT_LOG=73, /* int: print compat layer messages */
KERN_MAX_LOCK_DEPTH=74,
KERN_NMI_WATCHDOG=75, /* int: enable/disable nmi watchdog */
+ KERN_PANIC_ON_NMI=76, /* int: whether we will panic on an unrecovered */
};
Index: linux/kernel/panic.c
===================================================================
--- linux.orig/kernel/panic.c
+++ linux/kernel/panic.c
@@ -20,6 +20,7 @@
#include <linux/kexec.h>
int panic_on_oops;
+int panic_on_unrecovered_nmi;
int tainted;
static int pause_on_oops;
static int pause_on_oops_flag;
^ permalink raw reply [flat|nested] 199+ messages in thread* [PATCH for review] [16/145] x86_64: x86 clean up nmi panic messages
[not found] <20060810 935.775038000@suse.de>
` (14 preceding siblings ...)
2006-08-10 19:35 ` [PATCH for review] [15/145] x86_64: Allow users to force a panic on NMI Andi Kleen
@ 2006-08-10 19:35 ` Andi Kleen
2006-08-10 19:35 ` [PATCH for review] [17/145] i386/x86-64: x86 nmi fix Andi Kleen
` (129 subsequent siblings)
145 siblings, 0 replies; 199+ messages in thread
From: Andi Kleen @ 2006-08-10 19:35 UTC (permalink / raw)
r
From: Don Zickus <dzickus@redhat.com>
Clean up some of the output messages on the nmi error paths to make more
sense when they are displayed. This is mainly a cosmetic fix and
shouldn't impact any normal code path.
Signed-off-by: Don Zickus <dzickus@redhat.com>
Signed-off-by: Andi Kleen <ak@suse.de>
---
arch/i386/kernel/traps.c | 15 ++++++++-------
arch/x86_64/kernel/traps.c | 21 ++++++++++++++-------
2 files changed, 22 insertions(+), 14 deletions(-)
Index: linux/arch/i386/kernel/traps.c
===================================================================
--- linux.orig/arch/i386/kernel/traps.c
+++ linux/arch/i386/kernel/traps.c
@@ -628,13 +628,15 @@ gp_in_kernel:
static void mem_parity_error(unsigned char reason, struct pt_regs * regs)
{
- printk(KERN_EMERG "Uhhuh. NMI received. Dazed and confused, but trying "
- "to continue\n");
+ printk(KERN_EMERG "Uhhuh. NMI received for unknown reason %02x on "
+ "CPU %d.\n", reason, smp_processor_id());
printk(KERN_EMERG "You probably have a hardware problem with your RAM "
"chips\n");
if (panic_on_unrecovered_nmi)
panic("NMI: Not continuing");
+ printk(KERN_EMERG "Dazed and confused, but trying to continue\n");
+
/* Clear and disable the memory parity error line. */
clear_mem_error(reason);
}
@@ -665,14 +667,13 @@ static void unknown_nmi_error(unsigned c
return;
}
#endif
- printk("Uhhuh. NMI received for unknown reason %02x on CPU %d.\n",
- reason, smp_processor_id());
- printk("Dazed and confused, but trying to continue\n");
- printk("Do you have a strange power saving mode enabled?\n");
-
+ printk(KERN_EMERG "Uhhuh. NMI received for unknown reason %02x on "
+ "CPU %d.\n", reason, smp_processor_id());
+ printk(KERN_EMERG "Do you have a strange power saving mode enabled?\n");
if (panic_on_unrecovered_nmi)
panic("NMI: Not continuing");
+ printk(KERN_EMERG "Dazed and confused, but trying to continue\n");
}
static DEFINE_SPINLOCK(nmi_print_lock);
Index: linux/arch/x86_64/kernel/traps.c
===================================================================
--- linux.orig/arch/x86_64/kernel/traps.c
+++ linux/arch/x86_64/kernel/traps.c
@@ -726,10 +726,15 @@ asmlinkage void __kprobes do_general_pro
static __kprobes void
mem_parity_error(unsigned char reason, struct pt_regs * regs)
{
- printk("Uhhuh. NMI received. Dazed and confused, but trying to continue\n");
- printk("You probably have a hardware problem with your RAM chips\n");
+ printk(KERN_EMERG "Uhhuh. NMI received for unknown reason %02x.\n",
+ reason);
+ printk(KERN_EMERG "You probably have a hardware problem with your "
+ "RAM chips\n");
+
if (panic_on_unrecovered_nmi)
- panic("NMI: Not continuing");
+ panic("NMI: Not continuing");
+
+ printk(KERN_EMERG "Dazed and confused, but trying to continue\n");
/* Clear and disable the memory parity error line. */
reason = (reason & 0xf) | 4;
@@ -752,13 +757,15 @@ io_check_error(unsigned char reason, str
static __kprobes void
unknown_nmi_error(unsigned char reason, struct pt_regs * regs)
-{ printk("Uhhuh. NMI received for unknown reason %02x.\n", reason);
- printk("Dazed and confused, but trying to continue\n");
- printk("Do you have a strange power saving mode enabled?\n");
+{
+ printk(KERN_EMERG "Uhhuh. NMI received for unknown reason %02x.\n",
+ reason);
+ printk(KERN_EMERG "Do you have a strange power saving mode enabled?\n");
if (panic_on_unrecovered_nmi)
- panic("NMI: Not continuing");
+ panic("NMI: Not continuing");
+ printk(KERN_EMERG "Dazed and confused, but trying to continue\n");
}
/* Runs on IST stack. This code must keep interrupts off all the time.
^ permalink raw reply [flat|nested] 199+ messages in thread* [PATCH for review] [17/145] i386/x86-64: x86 nmi fix
[not found] <20060810 935.775038000@suse.de>
` (15 preceding siblings ...)
2006-08-10 19:35 ` [PATCH for review] [16/145] x86_64: x86 clean up nmi panic messages Andi Kleen
@ 2006-08-10 19:35 ` Andi Kleen
2006-08-10 19:35 ` [PATCH for review] [18/145] x86_64: x86 nmi fix 2 Andi Kleen
` (128 subsequent siblings)
145 siblings, 0 replies; 199+ messages in thread
From: Andi Kleen @ 2006-08-10 19:35 UTC (permalink / raw)
r
From: Shaohua Li <shaohua.li@intel.com>
Making NMI suspend/resume work with SMP. We use CPU hotplug to offline
APs in SMP suspend/resume. Only BSP executes sysdev's .suspend/.resume
method. APs should follow CPU hotplug code path.
Signed-off-by: Shaohua Li <shaohua.li@intel.com>
Signed-off-by: Andi Kleen <ak@suse.de>
Cc: Don Zickus <dzickus@redhat.com>
Cc: Andi Kleen <ak@muc.de>
Signed-off-by: Andrew Morton <akpm@osdl.org>
---
arch/i386/kernel/nmi.c | 14 +++++++++-----
arch/i386/kernel/smpboot.c | 3 ++-
arch/x86_64/kernel/nmi.c | 14 +++++++++-----
arch/x86_64/kernel/smpboot.c | 2 ++
include/asm-i386/nmi.h | 1 +
include/asm-x86_64/nmi.h | 1 +
6 files changed, 24 insertions(+), 11 deletions(-)
Index: linux/arch/i386/kernel/nmi.c
===================================================================
--- linux.orig/arch/i386/kernel/nmi.c
+++ linux/arch/i386/kernel/nmi.c
@@ -63,7 +63,6 @@ struct nmi_watchdog_ctlblk {
static DEFINE_PER_CPU(struct nmi_watchdog_ctlblk, nmi_watchdog_ctlblk);
/* local prototypes */
-static void stop_apic_nmi_watchdog(void *unused);
static int unknown_nmi_panic_callback(struct pt_regs *regs, int cpu);
extern void show_registers(struct pt_regs *regs);
@@ -341,15 +340,20 @@ static int nmi_pm_active; /* nmi_active
static int lapic_nmi_suspend(struct sys_device *dev, pm_message_t state)
{
+ /* only CPU0 goes here, other CPUs should be offline */
nmi_pm_active = atomic_read(&nmi_active);
- disable_lapic_nmi_watchdog();
+ stop_apic_nmi_watchdog(NULL);
+ BUG_ON(atomic_read(&nmi_active) != 0);
return 0;
}
static int lapic_nmi_resume(struct sys_device *dev)
{
- if (nmi_pm_active > 0)
- enable_lapic_nmi_watchdog();
+ /* only CPU0 goes here, other CPUs should be offline */
+ if (nmi_pm_active > 0) {
+ setup_apic_nmi_watchdog(NULL);
+ touch_nmi_watchdog();
+ }
return 0;
}
@@ -667,7 +671,7 @@ void setup_apic_nmi_watchdog (void *unus
atomic_inc(&nmi_active);
}
-static void stop_apic_nmi_watchdog(void *unused)
+void stop_apic_nmi_watchdog(void *unused)
{
/* only support LOCAL and IO APICs for now */
if ((nmi_watchdog != NMI_LOCAL_APIC) &&
Index: linux/arch/i386/kernel/smpboot.c
===================================================================
--- linux.orig/arch/i386/kernel/smpboot.c
+++ linux/arch/i386/kernel/smpboot.c
@@ -1372,7 +1372,8 @@ int __cpu_disable(void)
*/
if (cpu == 0)
return -EBUSY;
-
+ if (nmi_watchdog == NMI_LOCAL_APIC)
+ stop_apic_nmi_watchdog(NULL);
clear_local_APIC();
/* Allow any queued timer interrupts to get serviced */
local_irq_enable();
Index: linux/arch/x86_64/kernel/nmi.c
===================================================================
--- linux.orig/arch/x86_64/kernel/nmi.c
+++ linux/arch/x86_64/kernel/nmi.c
@@ -63,7 +63,6 @@ struct nmi_watchdog_ctlblk {
static DEFINE_PER_CPU(struct nmi_watchdog_ctlblk, nmi_watchdog_ctlblk);
/* local prototypes */
-static void stop_apic_nmi_watchdog(void *unused);
static int unknown_nmi_panic_callback(struct pt_regs *regs, int cpu);
/* converts an msr to an appropriate reservation bit */
@@ -337,15 +336,20 @@ static int nmi_pm_active; /* nmi_active
static int lapic_nmi_suspend(struct sys_device *dev, pm_message_t state)
{
+ /* only CPU0 goes here, other CPUs should be offline */
nmi_pm_active = atomic_read(&nmi_active);
- disable_lapic_nmi_watchdog();
+ stop_apic_nmi_watchdog(NULL);
+ BUG_ON(atomic_read(&nmi_active) != 0);
return 0;
}
static int lapic_nmi_resume(struct sys_device *dev)
{
- if (nmi_pm_active > 0)
- enable_lapic_nmi_watchdog();
+ /* only CPU0 goes here, other CPUs should be offline */
+ if (nmi_pm_active > 0) {
+ setup_apic_nmi_watchdog(NULL);
+ touch_nmi_watchdog();
+ }
return 0;
}
@@ -586,7 +590,7 @@ void setup_apic_nmi_watchdog(void *unuse
atomic_inc(&nmi_active);
}
-static void stop_apic_nmi_watchdog(void *unused)
+void stop_apic_nmi_watchdog(void *unused)
{
/* only support LOCAL and IO APICs for now */
if ((nmi_watchdog != NMI_LOCAL_APIC) &&
Index: linux/arch/x86_64/kernel/smpboot.c
===================================================================
--- linux.orig/arch/x86_64/kernel/smpboot.c
+++ linux/arch/x86_64/kernel/smpboot.c
@@ -1233,6 +1233,8 @@ int __cpu_disable(void)
if (cpu == 0)
return -EBUSY;
+ if (nmi_watchdog == NMI_LOCAL_APIC)
+ stop_apic_nmi_watchdog(NULL);
clear_local_APIC();
/*
Index: linux/include/asm-i386/nmi.h
===================================================================
--- linux.orig/include/asm-i386/nmi.h
+++ linux/include/asm-i386/nmi.h
@@ -23,6 +23,7 @@ extern int reserve_evntsel_nmi(unsigned
extern void release_evntsel_nmi(unsigned int);
extern void setup_apic_nmi_watchdog (void *);
+extern void stop_apic_nmi_watchdog (void *);
extern void disable_timer_nmi_watchdog(void);
extern void enable_timer_nmi_watchdog(void);
extern int nmi_watchdog_tick (struct pt_regs * regs, unsigned reason);
Index: linux/include/asm-x86_64/nmi.h
===================================================================
--- linux.orig/include/asm-x86_64/nmi.h
+++ linux/include/asm-x86_64/nmi.h
@@ -54,6 +54,7 @@ extern int reserve_evntsel_nmi(unsigned
extern void release_evntsel_nmi(unsigned int);
extern void setup_apic_nmi_watchdog (void *);
+extern void stop_apic_nmi_watchdog (void *);
extern void disable_timer_nmi_watchdog(void);
extern void enable_timer_nmi_watchdog(void);
extern int nmi_watchdog_tick (struct pt_regs * regs, unsigned reason);
^ permalink raw reply [flat|nested] 199+ messages in thread* [PATCH for review] [18/145] x86_64: x86 nmi fix 2
[not found] <20060810 935.775038000@suse.de>
` (16 preceding siblings ...)
2006-08-10 19:35 ` [PATCH for review] [17/145] i386/x86-64: x86 nmi fix Andi Kleen
@ 2006-08-10 19:35 ` Andi Kleen
2006-08-10 19:35 ` [PATCH for review] [19/145] x86_64: Fix up panic messages for different NMI panics Andi Kleen
` (127 subsequent siblings)
145 siblings, 0 replies; 199+ messages in thread
From: Andi Kleen @ 2006-08-10 19:35 UTC (permalink / raw)
r
From: Don Zickus <dzickus@redhat.com>
Makes the start/stop paths of nmi watchdog more robust to handle the
suspend/resume cases more gracefully.
Signed-off-by: Don Zickus <dzickus@redhat.com>
Signed-off-by: Andi Kleen <ak@suse.de>
Cc: Shaohua Li <shaohua.li@intel.com>
Cc: Andi Kleen <ak@muc.de>
Signed-off-by: Andrew Morton <akpm@osdl.org>
---
arch/i386/kernel/nmi.c | 19 +++++++++++++++++--
arch/x86_64/kernel/nmi.c | 19 +++++++++++++++++--
2 files changed, 34 insertions(+), 4 deletions(-)
Index: linux/arch/i386/kernel/nmi.c
===================================================================
--- linux.orig/arch/i386/kernel/nmi.c
+++ linux/arch/i386/kernel/nmi.c
@@ -630,11 +630,21 @@ static void stop_p4_watchdog(void)
void setup_apic_nmi_watchdog (void *unused)
{
+ struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk);
+
/* only support LOCAL and IO APICs for now */
if ((nmi_watchdog != NMI_LOCAL_APIC) &&
(nmi_watchdog != NMI_IO_APIC))
return;
+ if (wd->enabled == 1)
+ return;
+
+ /* cheap hack to support suspend/resume */
+ /* if cpu0 is not active neither should the other cpus */
+ if ((smp_processor_id() != 0) && (atomic_read(&nmi_active) <= 0))
+ return;
+
if (nmi_watchdog == NMI_LOCAL_APIC) {
switch (boot_cpu_data.x86_vendor) {
case X86_VENDOR_AMD:
@@ -667,17 +677,22 @@ void setup_apic_nmi_watchdog (void *unus
return;
}
}
- __get_cpu_var(nmi_watchdog_ctlblk.enabled) = 1;
+ wd->enabled = 1;
atomic_inc(&nmi_active);
}
void stop_apic_nmi_watchdog(void *unused)
{
+ struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk);
+
/* only support LOCAL and IO APICs for now */
if ((nmi_watchdog != NMI_LOCAL_APIC) &&
(nmi_watchdog != NMI_IO_APIC))
return;
+ if (wd->enabled == 0)
+ return;
+
if (nmi_watchdog == NMI_LOCAL_APIC) {
switch (boot_cpu_data.x86_vendor) {
case X86_VENDOR_AMD:
@@ -701,7 +716,7 @@ void stop_apic_nmi_watchdog(void *unused
return;
}
}
- __get_cpu_var(nmi_watchdog_ctlblk.enabled) = 0;
+ wd->enabled = 0;
atomic_dec(&nmi_active);
}
Index: linux/arch/x86_64/kernel/nmi.c
===================================================================
--- linux.orig/arch/x86_64/kernel/nmi.c
+++ linux/arch/x86_64/kernel/nmi.c
@@ -565,11 +565,21 @@ static void stop_p4_watchdog(void)
void setup_apic_nmi_watchdog(void *unused)
{
+ struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk);
+
/* only support LOCAL and IO APICs for now */
if ((nmi_watchdog != NMI_LOCAL_APIC) &&
(nmi_watchdog != NMI_IO_APIC))
return;
+ if (wd->enabled == 1)
+ return;
+
+ /* cheap hack to support suspend/resume */
+ /* if cpu0 is not active neither should the other cpus */
+ if ((smp_processor_id() != 0) && (atomic_read(&nmi_active) <= 0))
+ return;
+
if (nmi_watchdog == NMI_LOCAL_APIC) {
switch (boot_cpu_data.x86_vendor) {
case X86_VENDOR_AMD:
@@ -586,17 +596,22 @@ void setup_apic_nmi_watchdog(void *unuse
return;
}
}
- __get_cpu_var(nmi_watchdog_ctlblk.enabled) = 1;
+ wd->enabled = 1;
atomic_inc(&nmi_active);
}
void stop_apic_nmi_watchdog(void *unused)
{
+ struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk);
+
/* only support LOCAL and IO APICs for now */
if ((nmi_watchdog != NMI_LOCAL_APIC) &&
(nmi_watchdog != NMI_IO_APIC))
return;
+ if (wd->enabled == 0)
+ return;
+
if (nmi_watchdog == NMI_LOCAL_APIC) {
switch (boot_cpu_data.x86_vendor) {
case X86_VENDOR_AMD:
@@ -611,7 +626,7 @@ void stop_apic_nmi_watchdog(void *unused
return;
}
}
- __get_cpu_var(nmi_watchdog_ctlblk.enabled) = 0;
+ wd->enabled = 0;
atomic_dec(&nmi_active);
}
^ permalink raw reply [flat|nested] 199+ messages in thread* [PATCH for review] [19/145] x86_64: Fix up panic messages for different NMI panics
[not found] <20060810 935.775038000@suse.de>
` (17 preceding siblings ...)
2006-08-10 19:35 ` [PATCH for review] [18/145] x86_64: x86 nmi fix 2 Andi Kleen
@ 2006-08-10 19:35 ` Andi Kleen
2006-08-10 19:35 ` [PATCH for review] [20/145] x86_64: make functions static Andi Kleen
` (126 subsequent siblings)
145 siblings, 0 replies; 199+ messages in thread
From: Andi Kleen @ 2006-08-10 19:35 UTC (permalink / raw)
r
When a unknown NMI happened the panic would claim a NMI watchdog timeout.
Also it would check the variable set by nmi_watchdog=panic and panic then.
Fix up the panic message to be generic
Unconditionally panic on unknown NMI when panic on unknown nmi is enabled.
Noticed by Jan Beulich
Cc: jbeulich@novell.com
Signed-off-by: Andi Kleen <ak@suse.de>
---
arch/x86_64/kernel/nmi.c | 5 +++--
arch/x86_64/kernel/traps.c | 7 +++----
include/asm-x86_64/nmi.h | 2 +-
3 files changed, 7 insertions(+), 7 deletions(-)
Index: linux/arch/x86_64/kernel/nmi.c
===================================================================
--- linux.orig/arch/x86_64/kernel/nmi.c
+++ linux/arch/x86_64/kernel/nmi.c
@@ -695,7 +695,8 @@ int __kprobes nmi_watchdog_tick(struct p
*/
local_inc(&__get_cpu_var(alert_counter));
if (local_read(&__get_cpu_var(alert_counter)) == 5*nmi_hz)
- die_nmi("NMI Watchdog detected LOCKUP on CPU %d\n", regs);
+ die_nmi("NMI Watchdog detected LOCKUP on CPU %d\n", regs,
+ panic_on_timeout);
} else {
__get_cpu_var(last_irq_sum) = sum;
local_set(&__get_cpu_var(alert_counter), 0);
@@ -765,7 +766,7 @@ static int unknown_nmi_panic_callback(st
char buf[64];
sprintf(buf, "NMI received for unknown reason %02x\n", reason);
- die_nmi(buf,regs);
+ die_nmi(buf, regs, 1); /* Always panic here */
return 0;
}
Index: linux/arch/x86_64/kernel/traps.c
===================================================================
--- linux.orig/arch/x86_64/kernel/traps.c
+++ linux/arch/x86_64/kernel/traps.c
@@ -566,7 +566,7 @@ void die(const char * str, struct pt_reg
do_exit(SIGSEGV);
}
-void __kprobes die_nmi(char *str, struct pt_regs *regs)
+void __kprobes die_nmi(char *str, struct pt_regs *regs, int do_panic)
{
unsigned long flags = oops_begin();
@@ -578,9 +578,8 @@ void __kprobes die_nmi(char *str, struct
show_registers(regs);
if (kexec_should_crash(current))
crash_kexec(regs);
- if (panic_on_timeout || panic_on_oops)
- panic("nmi watchdog");
- printk("console shuts up ...\n");
+ if (do_panic || panic_on_oops)
+ panic("Non maskable interrupt");
oops_end(flags);
nmi_exit();
local_irq_enable();
Index: linux/include/asm-x86_64/nmi.h
===================================================================
--- linux.orig/include/asm-x86_64/nmi.h
+++ linux/include/asm-x86_64/nmi.h
@@ -37,7 +37,7 @@ static inline void unset_nmi_pm_callback
#endif /* CONFIG_PM */
extern void default_do_nmi(struct pt_regs *);
-extern void die_nmi(char *str, struct pt_regs *regs);
+extern void die_nmi(char *str, struct pt_regs *regs, int do_panic);
#define get_nmi_reason() inb(0x61)
^ permalink raw reply [flat|nested] 199+ messages in thread* [PATCH for review] [20/145] x86_64: make functions static
[not found] <20060810 935.775038000@suse.de>
` (18 preceding siblings ...)
2006-08-10 19:35 ` [PATCH for review] [19/145] x86_64: Fix up panic messages for different NMI panics Andi Kleen
@ 2006-08-10 19:35 ` Andi Kleen
2006-08-10 19:35 ` [PATCH for review] [21/145] x86_64: kdump x86_64 nmi event notification fix Andi Kleen
` (125 subsequent siblings)
145 siblings, 0 replies; 199+ messages in thread
From: Andi Kleen @ 2006-08-10 19:35 UTC (permalink / raw)
r
From: Adrian Bunk <bunk@stusta.de>
This patch makes the following needlessly global functions static:
- nmi_int.c: profile_exceptions_notify()
- nmi_timer_int.c: profile_timer_exceptions_notify()
Signed-off-by: Adrian Bunk <bunk@stusta.de>
Signed-off-by: Andi Kleen <ak@suse.de>
---
arch/i386/oprofile/nmi_int.c | 4 ++--
arch/i386/oprofile/nmi_timer_int.c | 4 ++--
2 files changed, 4 insertions(+), 4 deletions(-)
Index: linux/arch/i386/oprofile/nmi_int.c
===================================================================
--- linux.orig/arch/i386/oprofile/nmi_int.c
+++ linux/arch/i386/oprofile/nmi_int.c
@@ -83,8 +83,8 @@ static void exit_driverfs(void)
#define exit_driverfs() do { } while (0)
#endif /* CONFIG_PM */
-int profile_exceptions_notify(struct notifier_block *self,
- unsigned long val, void *data)
+static int profile_exceptions_notify(struct notifier_block *self,
+ unsigned long val, void *data)
{
struct die_args *args = (struct die_args *)data;
int ret = NOTIFY_DONE;
Index: linux/arch/i386/oprofile/nmi_timer_int.c
===================================================================
--- linux.orig/arch/i386/oprofile/nmi_timer_int.c
+++ linux/arch/i386/oprofile/nmi_timer_int.c
@@ -19,8 +19,8 @@
#include <asm/ptrace.h>
#include <asm/kdebug.h>
-int profile_timer_exceptions_notify(struct notifier_block *self,
- unsigned long val, void *data)
+static int profile_timer_exceptions_notify(struct notifier_block *self,
+ unsigned long val, void *data)
{
struct die_args *args = (struct die_args *)data;
int ret = NOTIFY_DONE;
^ permalink raw reply [flat|nested] 199+ messages in thread* [PATCH for review] [21/145] x86_64: kdump x86_64 nmi event notification fix
[not found] <20060810 935.775038000@suse.de>
` (19 preceding siblings ...)
2006-08-10 19:35 ` [PATCH for review] [20/145] x86_64: make functions static Andi Kleen
@ 2006-08-10 19:35 ` Andi Kleen
2006-08-10 19:35 ` [PATCH for review] [22/145] x86_64: Kdump i386 " Andi Kleen
` (124 subsequent siblings)
145 siblings, 0 replies; 199+ messages in thread
From: Andi Kleen @ 2006-08-10 19:35 UTC (permalink / raw)
r
From: Vivek Goyal <vgoyal@in.ibm.com>
After a crash we should wait for NMI IPI event and not for external NMI or
NMI watchdog tick.
Signed-off-by: Vivek Goyal <vgoyal@in.ibm.com>
Signed-off-by: Andi Kleen <ak@suse.de>
Cc: Don Zickus <dzickus@redhat.com>
Cc: Andi Kleen <ak@suse.de>
Signed-off-by: Andrew Morton <akpm@osdl.org>
---
arch/x86_64/kernel/crash.c | 4 ++--
1 files changed, 2 insertions(+), 2 deletions(-)
Index: linux/arch/x86_64/kernel/crash.c
===================================================================
--- linux.orig/arch/x86_64/kernel/crash.c
+++ linux/arch/x86_64/kernel/crash.c
@@ -102,7 +102,7 @@ static int crash_nmi_callback(struct not
struct pt_regs *regs;
int cpu;
- if (val != DIE_NMI)
+ if (val != DIE_NMI_IPI)
return NOTIFY_OK;
regs = ((struct die_args *)data)->regs;
@@ -114,7 +114,7 @@ static int crash_nmi_callback(struct not
* an NMI if system was initially booted with nmi_watchdog parameter.
*/
if (cpu == crashing_cpu)
- return 1;
+ return NOTIFY_STOP;
local_irq_disable();
crash_save_this_cpu(regs, cpu);
^ permalink raw reply [flat|nested] 199+ messages in thread* [PATCH for review] [22/145] x86_64: Kdump i386 nmi event notification fix
[not found] <20060810 935.775038000@suse.de>
` (20 preceding siblings ...)
2006-08-10 19:35 ` [PATCH for review] [21/145] x86_64: kdump x86_64 nmi event notification fix Andi Kleen
@ 2006-08-10 19:35 ` Andi Kleen
2006-08-10 19:35 ` [PATCH for review] [23/145] i386: Enable NMI watchdog by default Andi Kleen
` (123 subsequent siblings)
145 siblings, 0 replies; 199+ messages in thread
From: Andi Kleen @ 2006-08-10 19:35 UTC (permalink / raw)
r
From: Vivek Goyal <vgoyal@in.ibm.com>
After a crash we should wait for NMI IPI event and not for external NMI or
NMI watchdog tick.
Signed-off-by: Vivek Goyal <vgoyal@in.ibm.com>
Signed-off-by: Andi Kleen <ak@suse.de>
Cc: Don Zickus <dzickus@redhat.com>
Cc: Andi Kleen <ak@suse.de>
Signed-off-by: Andrew Morton <akpm@osdl.org>
---
arch/i386/kernel/crash.c | 4 ++--
1 files changed, 2 insertions(+), 2 deletions(-)
Index: linux/arch/i386/kernel/crash.c
===================================================================
--- linux.orig/arch/i386/kernel/crash.c
+++ linux/arch/i386/kernel/crash.c
@@ -102,7 +102,7 @@ static int crash_nmi_callback(struct not
struct pt_regs fixed_regs;
int cpu;
- if (val != DIE_NMI)
+ if (val != DIE_NMI_IPI)
return NOTIFY_OK;
regs = ((struct die_args *)data)->regs;
@@ -113,7 +113,7 @@ static int crash_nmi_callback(struct not
* an NMI if system was initially booted with nmi_watchdog parameter.
*/
if (cpu == crashing_cpu)
- return 1;
+ return NOTIFY_STOP;
local_irq_disable();
if (!user_mode_vm(regs)) {
^ permalink raw reply [flat|nested] 199+ messages in thread* [PATCH for review] [23/145] i386: Enable NMI watchdog by default
[not found] <20060810 935.775038000@suse.de>
` (21 preceding siblings ...)
2006-08-10 19:35 ` [PATCH for review] [22/145] x86_64: Kdump i386 " Andi Kleen
@ 2006-08-10 19:35 ` Andi Kleen
2006-08-10 19:35 ` [PATCH for review] [24/145] x86_64: i386/x86-64 Add nmi watchdog support for new Intel CPUs Andi Kleen
` (122 subsequent siblings)
145 siblings, 0 replies; 199+ messages in thread
From: Andi Kleen @ 2006-08-10 19:35 UTC (permalink / raw)
r
I've had good experiences with having this on by default on x86-64.
It turns nasty hangs into easier to debug oopses.
Enable the local APIC wdog by default for systems newer than 2004.
This comes from a strange compromise: according to arjan the reason
it was off by default was some old IBM systems that corrupted
registered when NMI happened in SMI. Can't remember more specific,
but >= 2004 should avoid these. It's probably overly broad
because most older systems should be ok (and the really old systems
won't be supported by the local apic watchdog anyways)
Signed-off-by: Andi Kleen <ak@suse.de>
---
arch/i386/kernel/nmi.c | 9 +++++++++
1 files changed, 9 insertions(+)
Index: linux/arch/i386/kernel/nmi.c
===================================================================
--- linux.orig/arch/i386/kernel/nmi.c
+++ linux/arch/i386/kernel/nmi.c
@@ -21,6 +21,7 @@
#include <linux/sysdev.h>
#include <linux/sysctl.h>
#include <linux/percpu.h>
+#include <linux/dmi.h>
#include <asm/smp.h>
#include <asm/nmi.h>
@@ -204,6 +205,14 @@ static int __init check_nmi_watchdog(voi
unsigned int *prev_nmi_count;
int cpu;
+ /* Enable NMI watchdog for newer systems.
+ Actually it should be safe for most systems before 2004 too except
+ for some IBM systems that corrupt registers when NMI happens
+ during SMM. Unfortunately we don't have more exact information
+ on these and use this coarse check. */
+ if (nmi_watchdog == NMI_DEFAULT && dmi_get_year(DMI_BIOS_DATE) >= 2004)
+ nmi_watchdog = NMI_LOCAL_APIC;
+
if ((nmi_watchdog == NMI_NONE) || (nmi_watchdog == NMI_DEFAULT))
return 0;
^ permalink raw reply [flat|nested] 199+ messages in thread* [PATCH for review] [24/145] x86_64: i386/x86-64 Add nmi watchdog support for new Intel CPUs
[not found] <20060810 935.775038000@suse.de>
` (22 preceding siblings ...)
2006-08-10 19:35 ` [PATCH for review] [23/145] i386: Enable NMI watchdog by default Andi Kleen
@ 2006-08-10 19:35 ` Andi Kleen
2006-08-10 19:35 ` [PATCH for review] [25/145] x86_64: Add macros for rdtscp Andi Kleen
` (121 subsequent siblings)
145 siblings, 0 replies; 199+ messages in thread
From: Andi Kleen @ 2006-08-10 19:35 UTC (permalink / raw)
r
From: Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>
AK: This redoes the changes I temporarily reverted.
Intel now has support for Architectural Performance Monitoring Counters
( Refer to IA-32 Intel Architecture Software Developer's Manual
http://www.intel.com/design/pentium4/manuals/253669.htm ). This
feature is present starting from Intel Core Duo and Intel Core Solo processors.
What this means is, the performance monitoring counters and some performance
monitoring events are now defined in an architectural way (using cpuid).
And there will be no need to check for family/model etc for these architectural
events.
Below is the patch to use this performance counters in nmi watchdog driver.
Patch handles both i386 and x86-64 kernels.
Signed-off-by: Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>
Signed-off-by: Andi Kleen <ak@suse.de>
---
arch/i386/kernel/nmi.c | 126 +++++++++++++++++++++++++++++--
arch/x86_64/kernel/nmi.c | 130 ++++++++++++++++++++++++++++++--
include/asm-i386/intel_arch_perfmon.h | 31 +++++++
include/asm-x86_64/intel_arch_perfmon.h | 31 +++++++
4 files changed, 308 insertions(+), 10 deletions(-)
Index: linux/arch/i386/kernel/nmi.c
===================================================================
--- linux.orig/arch/i386/kernel/nmi.c
+++ linux/arch/i386/kernel/nmi.c
@@ -26,6 +26,7 @@
#include <asm/smp.h>
#include <asm/nmi.h>
#include <asm/kdebug.h>
+#include <asm/intel_arch_perfmon.h>
#include "mach_traps.h"
@@ -77,6 +78,9 @@ static inline unsigned int nmi_perfctr_m
case X86_VENDOR_AMD:
return (msr - MSR_K7_PERFCTR0);
case X86_VENDOR_INTEL:
+ if (cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON))
+ return (msr - MSR_ARCH_PERFMON_PERFCTR0);
+
switch (boot_cpu_data.x86) {
case 6:
return (msr - MSR_P6_PERFCTR0);
@@ -95,6 +99,9 @@ static inline unsigned int nmi_evntsel_m
case X86_VENDOR_AMD:
return (msr - MSR_K7_EVNTSEL0);
case X86_VENDOR_INTEL:
+ if (cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON))
+ return (msr - MSR_ARCH_PERFMON_EVENTSEL0);
+
switch (boot_cpu_data.x86) {
case 6:
return (msr - MSR_P6_EVNTSEL0);
@@ -174,7 +181,10 @@ static __cpuinit inline int nmi_known_cp
case X86_VENDOR_AMD:
return ((boot_cpu_data.x86 == 15) || (boot_cpu_data.x86 == 6));
case X86_VENDOR_INTEL:
- return ((boot_cpu_data.x86 == 15) || (boot_cpu_data.x86 == 6));
+ if (cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON))
+ return 1;
+ else
+ return ((boot_cpu_data.x86 == 15) || (boot_cpu_data.x86 == 6));
}
return 0;
}
@@ -261,8 +271,24 @@ static int __init check_nmi_watchdog(voi
/* now that we know it works we can reduce NMI frequency to
something more reasonable; makes a difference in some configs */
- if (nmi_watchdog == NMI_LOCAL_APIC)
+ if (nmi_watchdog == NMI_LOCAL_APIC) {
+ struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk);
+
nmi_hz = 1;
+ /*
+ * On Intel CPUs with ARCH_PERFMON only 32 bits in the counter
+ * are writable, with higher bits sign extending from bit 31.
+ * So, we can only program the counter with 31 bit values and
+ * 32nd bit should be 1, for 33.. to be 1.
+ * Find the appropriate nmi_hz
+ */
+ if (wd->perfctr_msr == MSR_ARCH_PERFMON_PERFCTR0 &&
+ ((u64)cpu_khz * 1000) > 0x7fffffffULL) {
+ u64 count = (u64)cpu_khz * 1000;
+ do_div(count, 0x7fffffffUL);
+ nmi_hz = count + 1;
+ }
+ }
kfree(prev_nmi_count);
return 0;
@@ -637,6 +663,85 @@ static void stop_p4_watchdog(void)
release_perfctr_nmi(wd->perfctr_msr);
}
+#define ARCH_PERFMON_NMI_EVENT_SEL ARCH_PERFMON_UNHALTED_CORE_CYCLES_SEL
+#define ARCH_PERFMON_NMI_EVENT_UMASK ARCH_PERFMON_UNHALTED_CORE_CYCLES_UMASK
+
+static int setup_intel_arch_watchdog(void)
+{
+ unsigned int ebx;
+ union cpuid10_eax eax;
+ unsigned int unused;
+ unsigned int perfctr_msr, evntsel_msr;
+ unsigned int evntsel;
+ struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk);
+
+ /*
+ * Check whether the Architectural PerfMon supports
+ * Unhalted Core Cycles Event or not.
+ * NOTE: Corresponding bit = 0 in ebx indicates event present.
+ */
+ cpuid(10, &(eax.full), &ebx, &unused, &unused);
+ if ((eax.split.mask_length < (ARCH_PERFMON_UNHALTED_CORE_CYCLES_INDEX+1)) ||
+ (ebx & ARCH_PERFMON_UNHALTED_CORE_CYCLES_PRESENT))
+ goto fail;
+
+ perfctr_msr = MSR_ARCH_PERFMON_PERFCTR0;
+ evntsel_msr = MSR_ARCH_PERFMON_EVENTSEL0;
+
+ if (!reserve_perfctr_nmi(perfctr_msr))
+ goto fail;
+
+ if (!reserve_evntsel_nmi(evntsel_msr))
+ goto fail1;
+
+ wrmsrl(perfctr_msr, 0UL);
+
+ evntsel = ARCH_PERFMON_EVENTSEL_INT
+ | ARCH_PERFMON_EVENTSEL_OS
+ | ARCH_PERFMON_EVENTSEL_USR
+ | ARCH_PERFMON_NMI_EVENT_SEL
+ | ARCH_PERFMON_NMI_EVENT_UMASK;
+
+ /* setup the timer */
+ wrmsr(evntsel_msr, evntsel, 0);
+ write_watchdog_counter(perfctr_msr, "INTEL_ARCH_PERFCTR0");
+ apic_write(APIC_LVTPC, APIC_DM_NMI);
+ evntsel |= ARCH_PERFMON_EVENTSEL0_ENABLE;
+ wrmsr(evntsel_msr, evntsel, 0);
+
+ wd->perfctr_msr = perfctr_msr;
+ wd->evntsel_msr = evntsel_msr;
+ wd->cccr_msr = 0; //unused
+ wd->check_bit = 1ULL << (eax.split.bit_width - 1);
+ return 1;
+fail1:
+ release_perfctr_nmi(perfctr_msr);
+fail:
+ return 0;
+}
+
+static void stop_intel_arch_watchdog(void)
+{
+ unsigned int ebx;
+ union cpuid10_eax eax;
+ unsigned int unused;
+ struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk);
+
+ /*
+ * Check whether the Architectural PerfMon supports
+ * Unhalted Core Cycles Event or not.
+ * NOTE: Corresponding bit = 0 in ebx indicates event present.
+ */
+ cpuid(10, &(eax.full), &ebx, &unused, &unused);
+ if ((eax.split.mask_length < (ARCH_PERFMON_UNHALTED_CORE_CYCLES_INDEX+1)) ||
+ (ebx & ARCH_PERFMON_UNHALTED_CORE_CYCLES_PRESENT))
+ return;
+
+ wrmsr(wd->evntsel_msr, 0, 0);
+ release_evntsel_nmi(wd->evntsel_msr);
+ release_perfctr_nmi(wd->perfctr_msr);
+}
+
void setup_apic_nmi_watchdog (void *unused)
{
struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk);
@@ -663,6 +768,11 @@ void setup_apic_nmi_watchdog (void *unus
return;
break;
case X86_VENDOR_INTEL:
+ if (cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON)) {
+ if (!setup_intel_arch_watchdog())
+ return;
+ break;
+ }
switch (boot_cpu_data.x86) {
case 6:
if (boot_cpu_data.x86_model > 0xd)
@@ -708,6 +818,10 @@ void stop_apic_nmi_watchdog(void *unused
stop_k7_watchdog();
break;
case X86_VENDOR_INTEL:
+ if (cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON)) {
+ stop_intel_arch_watchdog();
+ break;
+ }
switch (boot_cpu_data.x86) {
case 6:
if (boot_cpu_data.x86_model > 0xd)
@@ -831,10 +945,12 @@ int nmi_watchdog_tick (struct pt_regs *
wrmsrl(wd->cccr_msr, dummy);
apic_write(APIC_LVTPC, APIC_DM_NMI);
}
- else if (wd->perfctr_msr == MSR_P6_PERFCTR0) {
- /* Only P6 based Pentium M need to re-unmask
+ else if (wd->perfctr_msr == MSR_P6_PERFCTR0 ||
+ wd->perfctr_msr == MSR_ARCH_PERFMON_PERFCTR0) {
+ /* P6 based Pentium M need to re-unmask
* the apic vector but it doesn't hurt
- * other P6 variant */
+ * other P6 variant.
+ * ArchPerfom/Core Duo also needs this */
apic_write(APIC_LVTPC, APIC_DM_NMI);
}
/* start the cycle over again */
Index: linux/arch/x86_64/kernel/nmi.c
===================================================================
--- linux.orig/arch/x86_64/kernel/nmi.c
+++ linux/arch/x86_64/kernel/nmi.c
@@ -26,6 +26,7 @@
#include <asm/proto.h>
#include <asm/kdebug.h>
#include <asm/mce.h>
+#include <asm/intel_arch_perfmon.h>
/* perfctr_nmi_owner tracks the ownership of the perfctr registers:
* evtsel_nmi_owner tracks the ownership of the event selection
@@ -73,7 +74,10 @@ static inline unsigned int nmi_perfctr_m
case X86_VENDOR_AMD:
return (msr - MSR_K7_PERFCTR0);
case X86_VENDOR_INTEL:
- return (msr - MSR_P4_BPU_PERFCTR0);
+ if (cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON))
+ return (msr - MSR_ARCH_PERFMON_PERFCTR0);
+ else
+ return (msr - MSR_P4_BPU_PERFCTR0);
}
return 0;
}
@@ -86,7 +90,10 @@ static inline unsigned int nmi_evntsel_m
case X86_VENDOR_AMD:
return (msr - MSR_K7_EVNTSEL0);
case X86_VENDOR_INTEL:
- return (msr - MSR_P4_BSU_ESCR0);
+ if (cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON))
+ return (msr - MSR_ARCH_PERFMON_EVENTSEL0);
+ else
+ return (msr - MSR_P4_BSU_ESCR0);
}
return 0;
}
@@ -160,7 +167,10 @@ static __cpuinit inline int nmi_known_cp
case X86_VENDOR_AMD:
return boot_cpu_data.x86 == 15;
case X86_VENDOR_INTEL:
- return boot_cpu_data.x86 == 15;
+ if (cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON))
+ return 1;
+ else
+ return (boot_cpu_data.x86 == 15);
}
return 0;
}
@@ -246,8 +256,22 @@ int __init check_nmi_watchdog (void)
/* now that we know it works we can reduce NMI frequency to
something more reasonable; makes a difference in some configs */
- if (nmi_watchdog == NMI_LOCAL_APIC)
+ if (nmi_watchdog == NMI_LOCAL_APIC) {
+ struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk);
+
nmi_hz = 1;
+ /*
+ * On Intel CPUs with ARCH_PERFMON only 32 bits in the counter
+ * are writable, with higher bits sign extending from bit 31.
+ * So, we can only program the counter with 31 bit values and
+ * 32nd bit should be 1, for 33.. to be 1.
+ * Find the appropriate nmi_hz
+ */
+ if (wd->perfctr_msr == MSR_ARCH_PERFMON_PERFCTR0 &&
+ ((u64)cpu_khz * 1000) > 0x7fffffffULL) {
+ nmi_hz = ((u64)cpu_khz * 1000) / 0x7fffffffUL + 1;
+ }
+ }
kfree(counts);
return 0;
@@ -563,6 +587,87 @@ static void stop_p4_watchdog(void)
release_perfctr_nmi(wd->perfctr_msr);
}
+#define ARCH_PERFMON_NMI_EVENT_SEL ARCH_PERFMON_UNHALTED_CORE_CYCLES_SEL
+#define ARCH_PERFMON_NMI_EVENT_UMASK ARCH_PERFMON_UNHALTED_CORE_CYCLES_UMASK
+
+static int setup_intel_arch_watchdog(void)
+{
+ unsigned int ebx;
+ union cpuid10_eax eax;
+ unsigned int unused;
+ unsigned int perfctr_msr, evntsel_msr;
+ unsigned int evntsel;
+ struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk);
+
+ /*
+ * Check whether the Architectural PerfMon supports
+ * Unhalted Core Cycles Event or not.
+ * NOTE: Corresponding bit = 0 in ebx indicates event present.
+ */
+ cpuid(10, &(eax.full), &ebx, &unused, &unused);
+ if ((eax.split.mask_length < (ARCH_PERFMON_UNHALTED_CORE_CYCLES_INDEX+1)) ||
+ (ebx & ARCH_PERFMON_UNHALTED_CORE_CYCLES_PRESENT))
+ goto fail;
+
+ perfctr_msr = MSR_ARCH_PERFMON_PERFCTR0;
+ evntsel_msr = MSR_ARCH_PERFMON_EVENTSEL0;
+
+ if (!reserve_perfctr_nmi(perfctr_msr))
+ goto fail;
+
+ if (!reserve_evntsel_nmi(evntsel_msr))
+ goto fail1;
+
+ wrmsrl(perfctr_msr, 0UL);
+
+ evntsel = ARCH_PERFMON_EVENTSEL_INT
+ | ARCH_PERFMON_EVENTSEL_OS
+ | ARCH_PERFMON_EVENTSEL_USR
+ | ARCH_PERFMON_NMI_EVENT_SEL
+ | ARCH_PERFMON_NMI_EVENT_UMASK;
+
+ /* setup the timer */
+ wrmsr(evntsel_msr, evntsel, 0);
+ wrmsrl(perfctr_msr, -((u64)cpu_khz * 1000 / nmi_hz));
+
+ apic_write(APIC_LVTPC, APIC_DM_NMI);
+ evntsel |= ARCH_PERFMON_EVENTSEL0_ENABLE;
+ wrmsr(evntsel_msr, evntsel, 0);
+
+ wd->perfctr_msr = perfctr_msr;
+ wd->evntsel_msr = evntsel_msr;
+ wd->cccr_msr = 0; //unused
+ wd->check_bit = 1ULL << (eax.split.bit_width - 1);
+ return 1;
+fail1:
+ release_perfctr_nmi(perfctr_msr);
+fail:
+ return 0;
+}
+
+static void stop_intel_arch_watchdog(void)
+{
+ unsigned int ebx;
+ union cpuid10_eax eax;
+ unsigned int unused;
+ struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk);
+
+ /*
+ * Check whether the Architectural PerfMon supports
+ * Unhalted Core Cycles Event or not.
+ * NOTE: Corresponding bit = 0 in ebx indicates event present.
+ */
+ cpuid(10, &(eax.full), &ebx, &unused, &unused);
+ if ((eax.split.mask_length < (ARCH_PERFMON_UNHALTED_CORE_CYCLES_INDEX+1)) ||
+ (ebx & ARCH_PERFMON_UNHALTED_CORE_CYCLES_PRESENT))
+ return;
+
+ wrmsr(wd->evntsel_msr, 0, 0);
+
+ release_evntsel_nmi(wd->evntsel_msr);
+ release_perfctr_nmi(wd->perfctr_msr);
+}
+
void setup_apic_nmi_watchdog(void *unused)
{
struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk);
@@ -589,6 +694,11 @@ void setup_apic_nmi_watchdog(void *unuse
return;
break;
case X86_VENDOR_INTEL:
+ if (cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON)) {
+ if (!setup_intel_arch_watchdog())
+ return;
+ break;
+ }
if (!setup_p4_watchdog())
return;
break;
@@ -620,6 +730,10 @@ void stop_apic_nmi_watchdog(void *unused
stop_k7_watchdog();
break;
case X86_VENDOR_INTEL:
+ if (cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON)) {
+ stop_intel_arch_watchdog();
+ break;
+ }
stop_p4_watchdog();
break;
default:
@@ -724,7 +838,13 @@ int __kprobes nmi_watchdog_tick(struct p
dummy &= ~P4_CCCR_OVF;
wrmsrl(wd->cccr_msr, dummy);
apic_write(APIC_LVTPC, APIC_DM_NMI);
- }
+ } else if (wd->perfctr_msr == MSR_ARCH_PERFMON_PERFCTR0) {
+ /*
+ * ArchPerfom/Core Duo needs to re-unmask
+ * the apic vector
+ */
+ apic_write(APIC_LVTPC, APIC_DM_NMI);
+ }
/* start the cycle over again */
wrmsrl(wd->perfctr_msr, -((u64)cpu_khz * 1000 / nmi_hz));
rc = 1;
Index: linux/include/asm-i386/intel_arch_perfmon.h
===================================================================
--- /dev/null
+++ linux/include/asm-i386/intel_arch_perfmon.h
@@ -0,0 +1,31 @@
+#ifndef X86_INTEL_ARCH_PERFMON_H
+#define X86_INTEL_ARCH_PERFMON_H 1
+
+#define MSR_ARCH_PERFMON_PERFCTR0 0xc1
+#define MSR_ARCH_PERFMON_PERFCTR1 0xc2
+
+#define MSR_ARCH_PERFMON_EVENTSEL0 0x186
+#define MSR_ARCH_PERFMON_EVENTSEL1 0x187
+
+#define ARCH_PERFMON_EVENTSEL0_ENABLE (1 << 22)
+#define ARCH_PERFMON_EVENTSEL_INT (1 << 20)
+#define ARCH_PERFMON_EVENTSEL_OS (1 << 17)
+#define ARCH_PERFMON_EVENTSEL_USR (1 << 16)
+
+#define ARCH_PERFMON_UNHALTED_CORE_CYCLES_SEL (0x3c)
+#define ARCH_PERFMON_UNHALTED_CORE_CYCLES_UMASK (0x00 << 8)
+#define ARCH_PERFMON_UNHALTED_CORE_CYCLES_INDEX (0)
+#define ARCH_PERFMON_UNHALTED_CORE_CYCLES_PRESENT \
+ (1 << (ARCH_PERFMON_UNHALTED_CORE_CYCLES_INDEX))
+
+union cpuid10_eax {
+ struct {
+ unsigned int version_id:8;
+ unsigned int num_counters:8;
+ unsigned int bit_width:8;
+ unsigned int mask_length:8;
+ } split;
+ unsigned int full;
+};
+
+#endif /* X86_INTEL_ARCH_PERFMON_H */
Index: linux/include/asm-x86_64/intel_arch_perfmon.h
===================================================================
--- /dev/null
+++ linux/include/asm-x86_64/intel_arch_perfmon.h
@@ -0,0 +1,31 @@
+#ifndef X86_64_INTEL_ARCH_PERFMON_H
+#define X86_64_INTEL_ARCH_PERFMON_H 1
+
+#define MSR_ARCH_PERFMON_PERFCTR0 0xc1
+#define MSR_ARCH_PERFMON_PERFCTR1 0xc2
+
+#define MSR_ARCH_PERFMON_EVENTSEL0 0x186
+#define MSR_ARCH_PERFMON_EVENTSEL1 0x187
+
+#define ARCH_PERFMON_EVENTSEL0_ENABLE (1 << 22)
+#define ARCH_PERFMON_EVENTSEL_INT (1 << 20)
+#define ARCH_PERFMON_EVENTSEL_OS (1 << 17)
+#define ARCH_PERFMON_EVENTSEL_USR (1 << 16)
+
+#define ARCH_PERFMON_UNHALTED_CORE_CYCLES_SEL (0x3c)
+#define ARCH_PERFMON_UNHALTED_CORE_CYCLES_UMASK (0x00 << 8)
+#define ARCH_PERFMON_UNHALTED_CORE_CYCLES_INDEX (0)
+#define ARCH_PERFMON_UNHALTED_CORE_CYCLES_PRESENT \
+ (1 << (ARCH_PERFMON_UNHALTED_CORE_CYCLES_INDEX))
+
+union cpuid10_eax {
+ struct {
+ unsigned int version_id:8;
+ unsigned int num_counters:8;
+ unsigned int bit_width:8;
+ unsigned int mask_length:8;
+ } split;
+ unsigned int full;
+};
+
+#endif /* X86_64_INTEL_ARCH_PERFMON_H */
^ permalink raw reply [flat|nested] 199+ messages in thread* [PATCH for review] [25/145] x86_64: Add macros for rdtscp
[not found] <20060810 935.775038000@suse.de>
` (23 preceding siblings ...)
2006-08-10 19:35 ` [PATCH for review] [24/145] x86_64: i386/x86-64 Add nmi watchdog support for new Intel CPUs Andi Kleen
@ 2006-08-10 19:35 ` Andi Kleen
2006-08-10 19:35 ` [PATCH for review] [26/145] x86_64: Add initalization of the RDTSCP auxilliary values Andi Kleen
` (120 subsequent siblings)
145 siblings, 0 replies; 199+ messages in thread
From: Andi Kleen @ 2006-08-10 19:35 UTC (permalink / raw)
r
From: Vojtech Pavlik <vojtech@suse.cz>
This patch adds macros for reading tsc via the RDTSCP instruction, as well
as writing the auxilliary MSR read by RDTSCP to msr.h
[AK: changed rdtscp definition for old binutils]
Signed-off-by: Vojtech Pavlik <vojtech@suse.cz>
Signed-off-by: Andi Kleen <ak@suse.de>
---
include/asm-x86_64/msr.h | 11 +++++++++++
1 files changed, 11 insertions(+)
Index: linux/include/asm-x86_64/msr.h
===================================================================
--- linux.orig/include/asm-x86_64/msr.h
+++ linux/include/asm-x86_64/msr.h
@@ -66,14 +66,25 @@
#define rdtscl(low) \
__asm__ __volatile__ ("rdtsc" : "=a" (low) : : "edx")
+#define rdtscp(low,high,aux) \
+ asm volatile (".byte 0x0f,0x01,0xf9" : "=a" (low), "=d" (high), "=c" (aux))
+
#define rdtscll(val) do { \
unsigned int __a,__d; \
asm volatile("rdtsc" : "=a" (__a), "=d" (__d)); \
(val) = ((unsigned long)__a) | (((unsigned long)__d)<<32); \
} while(0)
+#define rdtscpll(val, aux) do { \
+ unsigned long __a, __d; \
+ asm volatile (".byte 0x0f,0x01,0xf9" : "=a" (__a), "=d" (__d), "=c" (aux)); \
+ (val) = (__d << 32) | __a; \
+} while (0)
+
#define write_tsc(val1,val2) wrmsr(0x10, val1, val2)
+#define write_rdtscp_aux(val) wrmsr(0xc0000103, val, 0)
+
#define rdpmc(counter,low,high) \
__asm__ __volatile__("rdpmc" \
: "=a" (low), "=d" (high) \
^ permalink raw reply [flat|nested] 199+ messages in thread* [PATCH for review] [26/145] x86_64: Add initalization of the RDTSCP auxilliary values
[not found] <20060810 935.775038000@suse.de>
` (24 preceding siblings ...)
2006-08-10 19:35 ` [PATCH for review] [25/145] x86_64: Add macros for rdtscp Andi Kleen
@ 2006-08-10 19:35 ` Andi Kleen
2006-08-10 21:49 ` Oleg Verych
2006-08-10 19:35 ` [PATCH for review] [27/145] x86_64: Add the vgetcpu vsyscall Andi Kleen
` (119 subsequent siblings)
145 siblings, 1 reply; 199+ messages in thread
From: Andi Kleen @ 2006-08-10 19:35 UTC (permalink / raw)
r
From: Vojtech Pavlik <vojtech@suse.cz>
This patch adds initalization of the RDTSCP auxilliary values to CPU numbers
to time.c. If RDTSCP is available, the MSRs are written with the respective
values. It can be later used to initalize per-cpu timekeeping variables.
AK: Some cleanups. Move externs into headers and fix CPU hotplug.
Signed-off-by: Vojtech Pavlik <vojtech@suse.cz>
Signed-off-by: Andi Kleen <ak@suse.de>
---
arch/x86_64/kernel/smpboot.c | 2 +
arch/x86_64/kernel/time.c | 47 ++++++++++++++++++++++++++++++++-----------
include/asm-x86_64/proto.h | 1
3 files changed, 38 insertions(+), 12 deletions(-)
Index: linux/arch/x86_64/kernel/time.c
===================================================================
--- linux.orig/arch/x86_64/kernel/time.c
+++ linux/arch/x86_64/kernel/time.c
@@ -24,6 +24,8 @@
#include <linux/device.h>
#include <linux/sysdev.h>
#include <linux/bcd.h>
+#include <linux/notifier.h>
+#include <linux/cpu.h>
#include <linux/kallsyms.h>
#include <linux/acpi.h>
#ifdef CONFIG_ACPI
@@ -49,7 +51,7 @@ static void cpufreq_delayed_get(void);
extern void i8254_timer_resume(void);
extern int using_apic_timer;
-static char *time_init_gtod(void);
+static char *timename = NULL;
DEFINE_SPINLOCK(rtc_lock);
EXPORT_SYMBOL(rtc_lock);
@@ -893,11 +895,21 @@ static struct irqaction irq0 = {
timer_interrupt, IRQF_DISABLED, CPU_MASK_NONE, "timer", NULL, NULL
};
-void __init time_init(void)
+static int __cpuinit
+time_cpu_notifier(struct notifier_block *nb, unsigned long action, void *hcpu)
{
- char *timename;
- char *gtod;
+ unsigned cpu = (unsigned long) hcpu;
+ if (action == CPU_ONLINE &&
+ cpu_has(&cpu_data[cpu], X86_FEATURE_RDTSCP)) {
+ unsigned p;
+ p = smp_processor_id() | (cpu_to_node(smp_processor_id())<<12);
+ write_rdtscp_aux(p);
+ }
+ return NOTIFY_DONE;
+}
+void __init time_init(void)
+{
if (nohpet)
vxtime.hpet_address = 0;
@@ -931,18 +943,19 @@ void __init time_init(void)
}
vxtime.mode = VXTIME_TSC;
- gtod = time_init_gtod();
-
- printk(KERN_INFO "time.c: Using %ld.%06ld MHz WALL %s GTOD %s timer.\n",
- vxtime_hz / 1000000, vxtime_hz % 1000000, timename, gtod);
- printk(KERN_INFO "time.c: Detected %d.%03d MHz processor.\n",
- cpu_khz / 1000, cpu_khz % 1000);
vxtime.quot = (USEC_PER_SEC << US_SCALE) / vxtime_hz;
vxtime.tsc_quot = (USEC_PER_MSEC << US_SCALE) / cpu_khz;
vxtime.last_tsc = get_cycles_sync();
setup_irq(0, &irq0);
set_cyc2ns_scale(cpu_khz);
+
+ hotcpu_notifier(time_cpu_notifier, 0);
+ time_cpu_notifier(NULL, CPU_ONLINE, (void *)(long)smp_processor_id());
+
+#ifndef CONFIG_SMP
+ time_init_gtod();
+#endif
}
/*
@@ -973,12 +986,13 @@ __cpuinit int unsynchronized_tsc(void)
/*
* Decide what mode gettimeofday should use.
*/
-__init static char *time_init_gtod(void)
+void time_init_gtod(void)
{
char *timetype;
if (unsynchronized_tsc())
notsc = 1;
+
if (vxtime.hpet_address && notsc) {
timetype = hpet_use_timer ? "HPET" : "PIT/HPET";
if (hpet_use_timer)
@@ -1001,7 +1015,16 @@ __init static char *time_init_gtod(void)
timetype = hpet_use_timer ? "HPET/TSC" : "PIT/TSC";
vxtime.mode = VXTIME_TSC;
}
- return timetype;
+
+ printk(KERN_INFO "time.c: Using %ld.%06ld MHz WALL %s GTOD %s timer.\n",
+ vxtime_hz / 1000000, vxtime_hz % 1000000, timename, timetype);
+ printk(KERN_INFO "time.c: Detected %d.%03d MHz processor.\n",
+ cpu_khz / 1000, cpu_khz % 1000);
+ vxtime.quot = (USEC_PER_SEC << US_SCALE) / vxtime_hz;
+ vxtime.tsc_quot = (USEC_PER_MSEC << US_SCALE) / cpu_khz;
+ vxtime.last_tsc = get_cycles_sync();
+
+ set_cyc2ns_scale(cpu_khz);
}
__setup("report_lost_ticks", time_setup);
Index: linux/arch/x86_64/kernel/smpboot.c
===================================================================
--- linux.orig/arch/x86_64/kernel/smpboot.c
+++ linux/arch/x86_64/kernel/smpboot.c
@@ -1181,6 +1181,8 @@ void __init smp_cpus_done(unsigned int m
#endif
check_nmi_watchdog();
+
+ time_init_gtod();
}
#ifdef CONFIG_HOTPLUG_CPU
Index: linux/include/asm-x86_64/proto.h
===================================================================
--- linux.orig/include/asm-x86_64/proto.h
+++ linux/include/asm-x86_64/proto.h
@@ -51,6 +51,7 @@ extern unsigned long long monotonic_base
extern int sysctl_vsyscall;
extern int nohpet;
extern unsigned long vxtime_hz;
+extern void time_init_gtod(void);
extern int numa_setup(char *opt);
^ permalink raw reply [flat|nested] 199+ messages in thread* Re: [PATCH for review] [26/145] x86_64: Add initalization of the RDTSCP auxilliary values
2006-08-10 19:35 ` [PATCH for review] [26/145] x86_64: Add initalization of the RDTSCP auxilliary values Andi Kleen
@ 2006-08-10 21:49 ` Oleg Verych
2006-08-11 4:09 ` Andi Kleen
0 siblings, 1 reply; 199+ messages in thread
From: Oleg Verych @ 2006-08-10 21:49 UTC (permalink / raw)
To: linux-kernel
Andi Kleen:
> ---
> arch/x86_64/kernel/smpboot.c | 2 +
> arch/x86_64/kernel/time.c | 47 ++++++++++++++++++++++++++++++++-----------
> include/asm-x86_64/proto.h | 1
> 3 files changed, 38 insertions(+), 12 deletions(-)
>
> Index: linux/arch/x86_64/kernel/time.c
> ===================================================================
> --- linux.orig/arch/x86_64/kernel/time.c
> +++ linux/arch/x86_64/kernel/time.c
...
> +static int __cpuinit
> +time_cpu_notifier(struct notifier_block *nb, unsigned long action, void *hcpu)
> {
> - char *timename;
> - char *gtod;
> + unsigned cpu = (unsigned long) hcpu;
Is this some kind of "endian magic" ? I mean getting high or low word of 64
pointer to 32 variable ? Why cast just with (unsigned) doesn't work ?
> + if (action == CPU_ONLINE &&
> + cpu_has(&cpu_data[cpu], X86_FEATURE_RDTSCP)) {
> + unsigned p;
> + p = smp_processor_id() | (cpu_to_node(smp_processor_id())<<12);
Is this code runs under SMP ? I couldn't figure that out.
> + write_rdtscp_aux(p);
> + }
> + return NOTIFY_DONE;
> +}
Thanks.
--
-o--=O`C
#oo'L O
<___=E M
^ permalink raw reply [flat|nested] 199+ messages in thread* Re: [PATCH for review] [26/145] x86_64: Add initalization of the RDTSCP auxilliary values
2006-08-10 21:49 ` Oleg Verych
@ 2006-08-11 4:09 ` Andi Kleen
0 siblings, 0 replies; 199+ messages in thread
From: Andi Kleen @ 2006-08-11 4:09 UTC (permalink / raw)
To: Oleg Verych; +Cc: linux-kernel
> > +static int __cpuinit
> > +time_cpu_notifier(struct notifier_block *nb, unsigned long action, void *hcpu)
> > {
> > - char *timename;
> > - char *gtod;
> > + unsigned cpu = (unsigned long) hcpu;
>
> Is this some kind of "endian magic" ? I mean getting high or low word of 64
> pointer to 32 variable ? Why cast just with (unsigned) doesn't work ?
This is just to avoid a warning from gcc that a pointer is converted
to a 32bit integer -- which is ok here since it uses a "void *" callback argument
to pass an integer.
Arguably there should be a standard macro for this construct, but there
isn't currently.
>
> > + if (action == CPU_ONLINE &&
> > + cpu_has(&cpu_data[cpu], X86_FEATURE_RDTSCP)) {
> > + unsigned p;
> > + p = smp_processor_id() | (cpu_to_node(smp_processor_id())<<12);
>
> Is this code runs under SMP ? I couldn't figure that out.
Yes it is. It handles additional CPUs.
-Andi
^ permalink raw reply [flat|nested] 199+ messages in thread
* [PATCH for review] [27/145] x86_64: Add the vgetcpu vsyscall
[not found] <20060810 935.775038000@suse.de>
` (25 preceding siblings ...)
2006-08-10 19:35 ` [PATCH for review] [26/145] x86_64: Add initalization of the RDTSCP auxilliary values Andi Kleen
@ 2006-08-10 19:35 ` Andi Kleen
2006-08-10 19:35 ` [PATCH for review] [28/145] x86_64: Add portable getcpu call Andi Kleen
` (118 subsequent siblings)
145 siblings, 0 replies; 199+ messages in thread
From: Andi Kleen @ 2006-08-10 19:35 UTC (permalink / raw)
r
From: Vojtech Pavlik <vojtech@suse.cz>
This patch adds a vgetcpu vsyscall, which depending on the CPU RDTSCP
capability uses either the RDTSCP or CPUID to obtain a CPU and node
numbers and pass them to the program.
AK: Lots of changes over Vojtech's original code:
Better prototype for vgetcpu()
It's better to pass the cpu / node numbers as separate arguments
to avoid mistakes when going from SMP to NUMA.
Also add a fast time stamp based cache using a user supplied
argument to speed things more up.
Use fast method from Chuck Ebbert to retrieve node/cpu from
GDT limit instead of CPUID
Made sure RDTSCP init is always executed after node is known.
Drop printk
TBD benchmark LSL vs RDTSCP
Signed-off-by: Vojtech Pavlik <vojtech@suse.cz>
Signed-off-by: Andi Kleen <ak@suse.de>
---
arch/x86_64/kernel/head.S | 2
arch/x86_64/kernel/time.c | 13 +++--
arch/x86_64/kernel/vmlinux.lds.S | 3 +
arch/x86_64/kernel/vsyscall.c | 86 +++++++++++++++++++++++++++++++++++++--
include/asm-x86_64/segment.h | 5 +-
include/asm-x86_64/smp.h | 12 ++++-
include/asm-x86_64/vsyscall.h | 9 ++++
include/linux/getcpu.h | 16 +++++++
8 files changed, 131 insertions(+), 15 deletions(-)
Index: linux/arch/x86_64/kernel/vsyscall.c
===================================================================
--- linux.orig/arch/x86_64/kernel/vsyscall.c
+++ linux/arch/x86_64/kernel/vsyscall.c
@@ -26,6 +26,7 @@
#include <linux/seqlock.h>
#include <linux/jiffies.h>
#include <linux/sysctl.h>
+#include <linux/getcpu.h>
#include <asm/vsyscall.h>
#include <asm/pgtable.h>
@@ -33,11 +34,15 @@
#include <asm/fixmap.h>
#include <asm/errno.h>
#include <asm/io.h>
+#include <asm/segment.h>
+#include <asm/desc.h>
+#include <asm/topology.h>
#define __vsyscall(nr) __attribute__ ((unused,__section__(".vsyscall_" #nr)))
int __sysctl_vsyscall __section_sysctl_vsyscall = 1;
seqlock_t __xtime_lock __section_xtime_lock = SEQLOCK_UNLOCKED;
+int __vgetcpu_mode __section_vgetcpu_mode;
#include <asm/unistd.h>
@@ -127,9 +132,46 @@ time_t __vsyscall(1) vtime(time_t *t)
return __xtime.tv_sec;
}
-long __vsyscall(2) venosys_0(void)
-{
- return -ENOSYS;
+/* Fast way to get current CPU and node.
+ This helps to do per node and per CPU caches in user space.
+ The result is not guaranteed without CPU affinity, but usually
+ works out because the scheduler tries to keep a thread on the same
+ CPU.
+
+ tcache must point to a two element sized long array.
+ All arguments can be NULL. */
+long __vsyscall(2)
+vgetcpu(unsigned *cpu, unsigned *node, struct getcpu_cache *tcache)
+{
+ unsigned int dummy, p;
+ unsigned long j = 0;
+
+ /* Fast cache - only recompute value once per jiffies and avoid
+ relatively costly rdtscp/cpuid otherwise.
+ This works because the scheduler usually keeps the process
+ on the same CPU and this syscall doesn't guarantee its
+ results anyways.
+ We do this here because otherwise user space would do it on
+ its own in a likely inferior way (no access to jiffies).
+ If you don't like it pass NULL. */
+ if (tcache && tcache->t0 == (j = __jiffies)) {
+ p = tcache->t1;
+ } else if (__vgetcpu_mode == VGETCPU_RDTSCP) {
+ /* Load per CPU data from RDTSCP */
+ rdtscp(dummy, dummy, p);
+ } else {
+ /* Load per CPU data from GDT */
+ asm("lsl %1,%0" : "=r" (p) : "r" (__PER_CPU_SEG));
+ }
+ if (tcache) {
+ tcache->t0 = j;
+ tcache->t1 = p;
+ }
+ if (cpu)
+ *cpu = p & 0xfff;
+ if (node)
+ *node = p >> 12;
+ return 0;
}
long __vsyscall(3) venosys_1(void)
@@ -200,6 +242,43 @@ static ctl_table kernel_root_table2[] =
#endif
+static void __cpuinit write_rdtscp_cb(void *info)
+{
+ write_rdtscp_aux((unsigned long)info);
+}
+
+void __cpuinit vsyscall_set_cpu(int cpu)
+{
+ unsigned long *d;
+ unsigned long node = 0;
+#ifdef CONFIG_NUMA
+ node = cpu_to_node[cpu];
+#endif
+ if (cpu_has(&cpu_data[cpu], X86_FEATURE_RDTSCP)) {
+ void *info = (void *)((node << 12) | cpu);
+ /* Can happen on preemptive kernel */
+ if (get_cpu() == cpu)
+ write_rdtscp_cb(info);
+#ifdef CONFIG_SMP
+ else {
+ /* the notifier is unfortunately not executed on the
+ target CPU */
+ smp_call_function_single(cpu,write_rdtscp_cb,info,0,1);
+ }
+#endif
+ put_cpu();
+ }
+
+ /* Store cpu number in limit so that it can be loaded quickly
+ in user space in vgetcpu.
+ 12 bits for the CPU and 8 bits for the node. */
+ d = (unsigned long *)(cpu_gdt(cpu) + GDT_ENTRY_PER_CPU);
+ *d = 0x0f40000000000ULL;
+ *d |= cpu;
+ *d |= (node & 0xf) << 12;
+ *d |= (node >> 4) << 48;
+}
+
static void __init map_vsyscall(void)
{
extern char __vsyscall_0;
@@ -214,6 +293,7 @@ static int __init vsyscall_init(void)
VSYSCALL_ADDR(__NR_vgettimeofday)));
BUG_ON((unsigned long) &vtime != VSYSCALL_ADDR(__NR_vtime));
BUG_ON((VSYSCALL_ADDR(0) != __fix_to_virt(VSYSCALL_FIRST_PAGE)));
+ BUG_ON((unsigned long) &vgetcpu != VSYSCALL_ADDR(__NR_vgetcpu));
map_vsyscall();
#ifdef CONFIG_SYSCTL
register_sysctl_table(kernel_root_table2, 0);
Index: linux/include/asm-x86_64/vsyscall.h
===================================================================
--- linux.orig/include/asm-x86_64/vsyscall.h
+++ linux/include/asm-x86_64/vsyscall.h
@@ -6,6 +6,7 @@
enum vsyscall_num {
__NR_vgettimeofday,
__NR_vtime,
+ __NR_vgetcpu,
};
#define VSYSCALL_START (-10UL << 20)
@@ -16,6 +17,7 @@ enum vsyscall_num {
#ifdef __KERNEL__
#define __section_vxtime __attribute__ ((unused, __section__ (".vxtime"), aligned(16)))
+#define __section_vgetcpu_mode __attribute__ ((unused, __section__ (".vgetcpu_mode"), aligned(16)))
#define __section_wall_jiffies __attribute__ ((unused, __section__ (".wall_jiffies"), aligned(16)))
#define __section_jiffies __attribute__ ((unused, __section__ (".jiffies"), aligned(16)))
#define __section_sys_tz __attribute__ ((unused, __section__ (".sys_tz"), aligned(16)))
@@ -27,6 +29,9 @@ enum vsyscall_num {
#define VXTIME_HPET 2
#define VXTIME_PMTMR 3
+#define VGETCPU_RDTSCP 1
+#define VGETCPU_LSL 2
+
struct vxtime_data {
long hpet_address; /* HPET base address */
int last;
@@ -41,6 +46,7 @@ struct vxtime_data {
/* vsyscall space (readonly) */
extern struct vxtime_data __vxtime;
+extern int __vgetcpu_mode;
extern struct timespec __xtime;
extern volatile unsigned long __jiffies;
extern unsigned long __wall_jiffies;
@@ -49,6 +55,7 @@ extern seqlock_t __xtime_lock;
/* kernel space (writeable) */
extern struct vxtime_data vxtime;
+extern int vgetcpu_mode;
extern unsigned long wall_jiffies;
extern struct timezone sys_tz;
extern int sysctl_vsyscall;
@@ -56,6 +63,8 @@ extern seqlock_t xtime_lock;
extern int sysctl_vsyscall;
+extern void vsyscall_set_cpu(int cpu);
+
#define ARCH_HAVE_XTIME_LOCK 1
#endif /* __KERNEL__ */
Index: linux/arch/x86_64/kernel/time.c
===================================================================
--- linux.orig/arch/x86_64/kernel/time.c
+++ linux/arch/x86_64/kernel/time.c
@@ -899,12 +899,8 @@ static int __cpuinit
time_cpu_notifier(struct notifier_block *nb, unsigned long action, void *hcpu)
{
unsigned cpu = (unsigned long) hcpu;
- if (action == CPU_ONLINE &&
- cpu_has(&cpu_data[cpu], X86_FEATURE_RDTSCP)) {
- unsigned p;
- p = smp_processor_id() | (cpu_to_node(smp_processor_id())<<12);
- write_rdtscp_aux(p);
- }
+ if (action == CPU_ONLINE)
+ vsyscall_set_cpu(cpu);
return NOTIFY_DONE;
}
@@ -993,6 +989,11 @@ void time_init_gtod(void)
if (unsynchronized_tsc())
notsc = 1;
+ if (cpu_has(&boot_cpu_data, X86_FEATURE_RDTSCP))
+ vgetcpu_mode = VGETCPU_RDTSCP;
+ else
+ vgetcpu_mode = VGETCPU_LSL;
+
if (vxtime.hpet_address && notsc) {
timetype = hpet_use_timer ? "HPET" : "PIT/HPET";
if (hpet_use_timer)
Index: linux/arch/x86_64/kernel/vmlinux.lds.S
===================================================================
--- linux.orig/arch/x86_64/kernel/vmlinux.lds.S
+++ linux/arch/x86_64/kernel/vmlinux.lds.S
@@ -99,6 +99,9 @@ SECTIONS
.vxtime : AT(VLOAD(.vxtime)) { *(.vxtime) }
vxtime = VVIRT(.vxtime);
+ .vgetcpu_mode : AT(VLOAD(.vgetcpu_mode)) { *(.vgetcpu_mode) }
+ vgetcpu_mode = VVIRT(.vgetcpu_mode);
+
.wall_jiffies : AT(VLOAD(.wall_jiffies)) { *(.wall_jiffies) }
wall_jiffies = VVIRT(.wall_jiffies);
Index: linux/arch/x86_64/kernel/head.S
===================================================================
--- linux.orig/arch/x86_64/kernel/head.S
+++ linux/arch/x86_64/kernel/head.S
@@ -370,7 +370,7 @@ ENTRY(cpu_gdt_table)
.quad 0,0 /* TSS */
.quad 0,0 /* LDT */
.quad 0,0,0 /* three TLS descriptors */
- .quad 0 /* unused */
+ .quad 0x0000f40000000000 /* node/CPU stored in limit */
gdt_end:
/* asm/segment.h:GDT_ENTRIES must match this */
/* This should be a multiple of the cache line size */
Index: linux/include/asm-x86_64/segment.h
===================================================================
--- linux.orig/include/asm-x86_64/segment.h
+++ linux/include/asm-x86_64/segment.h
@@ -20,15 +20,16 @@
#define __USER_CS 0x33 /* 6*8+3 */
#define __USER32_DS __USER_DS
-#define GDT_ENTRY_TLS 1
#define GDT_ENTRY_TSS 8 /* needs two entries */
#define GDT_ENTRY_LDT 10 /* needs two entries */
#define GDT_ENTRY_TLS_MIN 12
#define GDT_ENTRY_TLS_MAX 14
-/* 15 free */
#define GDT_ENTRY_TLS_ENTRIES 3
+#define GDT_ENTRY_PER_CPU 15 /* Abused to load per CPU data from limit */
+#define __PER_CPU_SEG (GDT_ENTRY_PER_CPU * 8 + 3)
+
/* TLS indexes for 64bit - hardcoded in arch_prctl */
#define FS_TLS 0
#define GS_TLS 1
Index: linux/include/asm-x86_64/smp.h
===================================================================
--- linux.orig/include/asm-x86_64/smp.h
+++ linux/include/asm-x86_64/smp.h
@@ -133,13 +133,19 @@ static __inline int logical_smp_processo
/* we don't want to mark this access volatile - bad code generation */
return GET_APIC_LOGICAL_ID(*(unsigned long *)(APIC_BASE+APIC_LDR));
}
-#endif
#ifdef CONFIG_SMP
#define cpu_physical_id(cpu) x86_cpu_to_apicid[cpu]
#else
#define cpu_physical_id(cpu) boot_cpu_id
-#endif
-
+static inline int smp_call_function_single(int cpuid, void (*func) (void *info),
+ void *info, int retry, int wait)
+{
+ /* Disable interrupts here? */
+ func(info);
+ return 0;
+}
+#endif /* !CONFIG_SMP */
+#endif /* !__ASSEMBLY */
#endif
Index: linux/include/linux/getcpu.h
===================================================================
--- /dev/null
+++ linux/include/linux/getcpu.h
@@ -0,0 +1,16 @@
+#ifndef _LINUX_GETCPU_H
+#define _LINUX_GETCPU_H 1
+
+/* Cache for getcpu() to speed it up. Results might be upto a jiffie
+ out of date, but will be faster.
+ User programs should not refer to the contents of this structure.
+ It is only a cache for vgetcpu(). It might change in future kernels.
+ The user program must store this information per thread (__thread)
+ If you want 100% accurate information pass NULL instead. */
+struct getcpu_cache {
+ unsigned long t0;
+ unsigned long t1;
+ unsigned long res[4];
+};
+
+#endif
^ permalink raw reply [flat|nested] 199+ messages in thread* [PATCH for review] [28/145] x86_64: Add portable getcpu call
[not found] <20060810 935.775038000@suse.de>
` (26 preceding siblings ...)
2006-08-10 19:35 ` [PATCH for review] [27/145] x86_64: Add the vgetcpu vsyscall Andi Kleen
@ 2006-08-10 19:35 ` Andi Kleen
2006-08-10 19:35 ` [PATCH for review] [29/145] x86_64: Clean up asm/smp.h includes Andi Kleen
` (117 subsequent siblings)
145 siblings, 0 replies; 199+ messages in thread
From: Andi Kleen @ 2006-08-10 19:35 UTC (permalink / raw)
r
For NUMA optimization and some other algorithms it is useful to have a fast
to get the current CPU and node numbers in user space.
x86-64 added a fast way to do this in a vsyscall. This adds a generic
syscall for other architectures to make it a generic portable facility.
I expect some of them will also implement it as a faster vsyscall.
The cache is an optimization for the x86-64 vsyscall optimization. Since
what the syscall returns is an approximation anyways and user space
often wants very fast results it can be cached for some time. The norma
methods to get this information in user space are relatively slow
The vsyscall is in a better position to manage the cache because it has direct
access to a fast time stamp (jiffies). For the generic syscall optimization
it doesn't help much, but enforce a valid argument to keep programs
portable
I only added an i386 syscall entry for now. Other architectures can follow.
Signed-off-by: Andi Kleen <ak@suse.de>
---
arch/i386/kernel/syscall_table.S | 1 +
arch/x86_64/ia32/ia32entry.S | 1 +
include/asm-i386/unistd.h | 1 +
include/linux/syscalls.h | 2 ++
kernel/sys.c | 26 ++++++++++++++++++++++++++
5 files changed, 31 insertions(+)
Index: linux/kernel/sys.c
===================================================================
--- linux.orig/kernel/sys.c
+++ linux/kernel/sys.c
@@ -28,6 +28,7 @@
#include <linux/tty.h>
#include <linux/signal.h>
#include <linux/cn_proc.h>
+#include <linux/getcpu.h>
#include <linux/compat.h>
#include <linux/syscalls.h>
@@ -2062,3 +2063,28 @@ asmlinkage long sys_prctl(int option, un
}
return error;
}
+
+asmlinkage long sys_getcpu(unsigned *cpup, unsigned *nodep, struct getcpu_cache *cache)
+{
+ int err = 0;
+ int cpu = get_cpu();
+ put_cpu();
+ if (cpup)
+ err |= put_user(cpu, cpup);
+ if (nodep)
+ err |= put_user(cpu_to_node(cpu), nodep);
+ if (cache) {
+ /* Not needed for this implementation, but make sure user programs pass
+ something valid. We only use t0 and t1 because these are available in both
+ 32bit and 64bit ABI (no need for a compat_getcpu). 32bit has enough
+ padding. */
+ unsigned long t0, t1;
+ get_user(t0, &cache->t0);
+ get_user(t1, &cache->t1);
+ t0++;
+ t1++;
+ put_user(t0, &cache->t0);
+ put_user(t1, &cache->t1);
+ }
+ return err ? -EFAULT : 0;
+}
Index: linux/include/linux/syscalls.h
===================================================================
--- linux.orig/include/linux/syscalls.h
+++ linux/include/linux/syscalls.h
@@ -53,6 +53,7 @@ struct mq_attr;
struct compat_stat;
struct compat_timeval;
struct robust_list_head;
+struct getcpu_cache;
#include <linux/types.h>
#include <linux/aio_abi.h>
@@ -596,5 +597,6 @@ asmlinkage long sys_get_robust_list(int
size_t __user *len_ptr);
asmlinkage long sys_set_robust_list(struct robust_list_head __user *head,
size_t len);
+asmlinkage long sys_getcpu(unsigned *cpu, unsigned *node, struct getcpu_cache *cache);
#endif
Index: linux/arch/i386/kernel/syscall_table.S
===================================================================
--- linux.orig/arch/i386/kernel/syscall_table.S
+++ linux/arch/i386/kernel/syscall_table.S
@@ -317,3 +317,4 @@ ENTRY(sys_call_table)
.long sys_tee /* 315 */
.long sys_vmsplice
.long sys_move_pages
+ .long sys_getcpu
Index: linux/arch/x86_64/ia32/ia32entry.S
===================================================================
--- linux.orig/arch/x86_64/ia32/ia32entry.S
+++ linux/arch/x86_64/ia32/ia32entry.S
@@ -713,4 +713,5 @@ ia32_sys_call_table:
.quad sys_tee
.quad compat_sys_vmsplice
.quad compat_sys_move_pages
+ .quad sys_getcpu
ia32_syscall_end:
Index: linux/include/asm-i386/unistd.h
===================================================================
--- linux.orig/include/asm-i386/unistd.h
+++ linux/include/asm-i386/unistd.h
@@ -323,6 +323,7 @@
#define __NR_tee 315
#define __NR_vmsplice 316
#define __NR_move_pages 317
+#define __NR_getcpu 318
#ifdef __KERNEL__
^ permalink raw reply [flat|nested] 199+ messages in thread* [PATCH for review] [29/145] x86_64: Clean up asm/smp.h includes
[not found] <20060810 935.775038000@suse.de>
` (27 preceding siblings ...)
2006-08-10 19:35 ` [PATCH for review] [28/145] x86_64: Add portable getcpu call Andi Kleen
@ 2006-08-10 19:35 ` Andi Kleen
2006-08-10 19:35 ` [PATCH for review] [30/145] x86_64: x86-64 TIF flags for debug regs and io bitmap in ctxsw Andi Kleen
` (116 subsequent siblings)
145 siblings, 0 replies; 199+ messages in thread
From: Andi Kleen @ 2006-08-10 19:35 UTC (permalink / raw)
r
No need to include it from entry.S
Drop all the #ifdef __ASSEMBLY__
Signed-off-by: Andi Kleen <ak@suse.de>
---
arch/x86_64/kernel/entry.S | 2 --
include/asm-x86_64/smp.h | 12 ------------
2 files changed, 14 deletions(-)
Index: linux/arch/x86_64/kernel/entry.S
===================================================================
--- linux.orig/arch/x86_64/kernel/entry.S
+++ linux/arch/x86_64/kernel/entry.S
@@ -27,10 +27,8 @@
* - schedule it carefully for the final hardware.
*/
-#define ASSEMBLY 1
#include <linux/linkage.h>
#include <asm/segment.h>
-#include <asm/smp.h>
#include <asm/cache.h>
#include <asm/errno.h>
#include <asm/dwarf2.h>
Index: linux/include/asm-x86_64/smp.h
===================================================================
--- linux.orig/include/asm-x86_64/smp.h
+++ linux/include/asm-x86_64/smp.h
@@ -4,15 +4,12 @@
/*
* We need the APIC definitions automatically as part of 'smp.h'
*/
-#ifndef __ASSEMBLY__
#include <linux/threads.h>
#include <linux/cpumask.h>
#include <linux/bitops.h>
extern int disable_apic;
-#endif
#ifdef CONFIG_X86_LOCAL_APIC
-#ifndef __ASSEMBLY__
#include <asm/fixmap.h>
#include <asm/mpspec.h>
#ifdef CONFIG_X86_IO_APIC
@@ -21,10 +18,8 @@ extern int disable_apic;
#include <asm/apic.h>
#include <asm/thread_info.h>
#endif
-#endif
#ifdef CONFIG_SMP
-#ifndef ASSEMBLY
#include <asm/pda.h>
@@ -83,13 +78,10 @@ extern void prefill_possible_map(void);
extern unsigned num_processors;
extern unsigned disabled_cpus;
-#endif /* !ASSEMBLY */
-
#define NO_PROC_ID 0xFF /* No processor magic marker */
#endif
-#ifndef ASSEMBLY
/*
* Some lowlevel functions might want to know about
* the real APIC ID <-> CPU # mapping.
@@ -111,8 +103,6 @@ static inline int cpu_present_to_apicid(
return BAD_APICID;
}
-#endif /* !ASSEMBLY */
-
#ifndef CONFIG_SMP
#define stack_smp_processor_id() 0
#define safe_smp_processor_id() 0
@@ -127,7 +117,6 @@ static inline int cpu_present_to_apicid(
})
#endif
-#ifndef __ASSEMBLY__
static __inline int logical_smp_processor_id(void)
{
/* we don't want to mark this access volatile - bad code generation */
@@ -146,6 +135,5 @@ static inline int smp_call_function_sing
return 0;
}
#endif /* !CONFIG_SMP */
-#endif /* !__ASSEMBLY */
#endif
^ permalink raw reply [flat|nested] 199+ messages in thread* [PATCH for review] [30/145] x86_64: x86-64 TIF flags for debug regs and io bitmap in ctxsw
[not found] <20060810 935.775038000@suse.de>
` (28 preceding siblings ...)
2006-08-10 19:35 ` [PATCH for review] [29/145] x86_64: Clean up asm/smp.h includes Andi Kleen
@ 2006-08-10 19:35 ` Andi Kleen
2006-08-10 19:35 ` [PATCH for review] [31/145] x86_64: Don't print virtual address in HPET initialization Andi Kleen
` (115 subsequent siblings)
145 siblings, 0 replies; 199+ messages in thread
From: Andi Kleen @ 2006-08-10 19:35 UTC (permalink / raw)
r
From: Stephane Eranian <eranian@hpl.hp.com>
Hello,
Following my discussion with Andi. Here is a patch that introduces
two new TIF flags to simplify the context switch code in __switch_to().
The idea is to minimize the number of cache lines accessed in the common
case, i.e., when neither the debug registers nor the I/O bitmap are used.
This patch covers the x86-64 modifications. A patch for i386 follows.
Changelog:
- add TIF_DEBUG to track when debug registers are active
- add TIF_IO_BITMAP to track when I/O bitmap is used
- modify __switch_to() to use the new TIF flags
<signed-off-by>: eranian@hpl.hp.com
Signed-off-by: Andi Kleen <ak@suse.de>
---
arch/x86_64/ia32/ptrace32.c | 4 ++
arch/x86_64/kernel/ioport.c | 1
arch/x86_64/kernel/process.c | 73 ++++++++++++++++++++++-----------------
arch/x86_64/kernel/ptrace.c | 8 +++-
include/asm-x86_64/thread_info.h | 7 +++
5 files changed, 60 insertions(+), 33 deletions(-)
Index: linux/arch/x86_64/ia32/ptrace32.c
===================================================================
--- linux.orig/arch/x86_64/ia32/ptrace32.c
+++ linux/arch/x86_64/ia32/ptrace32.c
@@ -117,6 +117,10 @@ static int putreg32(struct task_struct *
if ((0x5454 >> ((val >> (16 + 4*i)) & 0xf)) & 1)
return -EIO;
child->thread.debugreg7 = val;
+ if (val)
+ set_tsk_thread_flag(child, TIF_DEBUG);
+ else
+ clear_tsk_thread_flag(child, TIF_DEBUG);
break;
default:
Index: linux/arch/x86_64/kernel/ioport.c
===================================================================
--- linux.orig/arch/x86_64/kernel/ioport.c
+++ linux/arch/x86_64/kernel/ioport.c
@@ -56,6 +56,7 @@ asmlinkage long sys_ioperm(unsigned long
memset(bitmap, 0xff, IO_BITMAP_BYTES);
t->io_bitmap_ptr = bitmap;
+ set_thread_flag(TIF_IO_BITMAP);
}
/*
Index: linux/arch/x86_64/kernel/process.c
===================================================================
--- linux.orig/arch/x86_64/kernel/process.c
+++ linux/arch/x86_64/kernel/process.c
@@ -350,6 +350,7 @@ void exit_thread(void)
kfree(t->io_bitmap_ptr);
t->io_bitmap_ptr = NULL;
+ clear_thread_flag(TIF_IO_BITMAP);
/*
* Careful, clear this in the TSS too:
*/
@@ -369,6 +370,7 @@ void flush_thread(void)
if (t->flags & _TIF_IA32)
current_thread_info()->status |= TS_COMPAT;
}
+ t->flags &= ~_TIF_DEBUG;
tsk->thread.debugreg0 = 0;
tsk->thread.debugreg1 = 0;
@@ -461,7 +463,7 @@ int copy_thread(int nr, unsigned long cl
asm("mov %%es,%0" : "=m" (p->thread.es));
asm("mov %%ds,%0" : "=m" (p->thread.ds));
- if (unlikely(me->thread.io_bitmap_ptr != NULL)) {
+ if (unlikely(test_tsk_thread_flag(me, TIF_IO_BITMAP))) {
p->thread.io_bitmap_ptr = kmalloc(IO_BITMAP_BYTES, GFP_KERNEL);
if (!p->thread.io_bitmap_ptr) {
p->thread.io_bitmap_max = 0;
@@ -469,6 +471,7 @@ int copy_thread(int nr, unsigned long cl
}
memcpy(p->thread.io_bitmap_ptr, me->thread.io_bitmap_ptr,
IO_BITMAP_BYTES);
+ set_tsk_thread_flag(p, TIF_IO_BITMAP);
}
/*
@@ -498,6 +501,40 @@ out:
*/
#define loaddebug(thread,r) set_debugreg(thread->debugreg ## r, r)
+static inline void __switch_to_xtra(struct task_struct *prev_p,
+ struct task_struct *next_p,
+ struct tss_struct *tss)
+{
+ struct thread_struct *prev, *next;
+
+ prev = &prev_p->thread,
+ next = &next_p->thread;
+
+ if (test_tsk_thread_flag(next_p, TIF_DEBUG)) {
+ loaddebug(next, 0);
+ loaddebug(next, 1);
+ loaddebug(next, 2);
+ loaddebug(next, 3);
+ /* no 4 and 5 */
+ loaddebug(next, 6);
+ loaddebug(next, 7);
+ }
+
+ if (test_tsk_thread_flag(next_p, TIF_IO_BITMAP)) {
+ /*
+ * Copy the relevant range of the IO bitmap.
+ * Normally this is 128 bytes or less:
+ */
+ memcpy(tss->io_bitmap, next->io_bitmap_ptr,
+ max(prev->io_bitmap_max, next->io_bitmap_max));
+ } else if (test_tsk_thread_flag(prev_p, TIF_IO_BITMAP)) {
+ /*
+ * Clear any possible leftover bits:
+ */
+ memset(tss->io_bitmap, 0xff, prev->io_bitmap_max);
+ }
+}
+
/*
* switch_to(x,y) should switch tasks from x to y.
*
@@ -586,37 +623,11 @@ __switch_to(struct task_struct *prev_p,
task_stack_page(next_p) + THREAD_SIZE - PDA_STACKOFFSET);
/*
- * Now maybe reload the debug registers
+ * Now maybe reload the debug registers and handle I/O bitmaps
*/
- if (unlikely(next->debugreg7)) {
- loaddebug(next, 0);
- loaddebug(next, 1);
- loaddebug(next, 2);
- loaddebug(next, 3);
- /* no 4 and 5 */
- loaddebug(next, 6);
- loaddebug(next, 7);
- }
-
-
- /*
- * Handle the IO bitmap
- */
- if (unlikely(prev->io_bitmap_ptr || next->io_bitmap_ptr)) {
- if (next->io_bitmap_ptr)
- /*
- * Copy the relevant range of the IO bitmap.
- * Normally this is 128 bytes or less:
- */
- memcpy(tss->io_bitmap, next->io_bitmap_ptr,
- max(prev->io_bitmap_max, next->io_bitmap_max));
- else {
- /*
- * Clear any possible leftover bits:
- */
- memset(tss->io_bitmap, 0xff, prev->io_bitmap_max);
- }
- }
+ if (unlikely((task_thread_info(next_p)->flags & _TIF_WORK_CTXSW))
+ || test_tsk_thread_flag(prev_p, TIF_IO_BITMAP))
+ __switch_to_xtra(prev_p, next_p, tss);
return prev_p;
}
Index: linux/arch/x86_64/kernel/ptrace.c
===================================================================
--- linux.orig/arch/x86_64/kernel/ptrace.c
+++ linux/arch/x86_64/kernel/ptrace.c
@@ -420,9 +420,13 @@ long arch_ptrace(struct task_struct *chi
if ((0x5554 >> ((data >> (16 + 4*i)) & 0xf)) & 1)
break;
if (i == 4) {
- child->thread.debugreg7 = data;
+ child->thread.debugreg7 = data;
+ if (data)
+ set_tsk_thread_flag(child, TIF_DEBUG);
+ else
+ clear_tsk_thread_flag(child, TIF_DEBUG);
ret = 0;
- }
+ }
break;
}
break;
Index: linux/include/asm-x86_64/thread_info.h
===================================================================
--- linux.orig/include/asm-x86_64/thread_info.h
+++ linux/include/asm-x86_64/thread_info.h
@@ -120,6 +120,8 @@ static inline struct thread_info *stack_
#define TIF_FORK 18 /* ret_from_fork */
#define TIF_ABI_PENDING 19
#define TIF_MEMDIE 20
+#define TIF_DEBUG 21 /* uses debug registers */
+#define TIF_IO_BITMAP 22 /* uses I/O bitmap */
#define _TIF_SYSCALL_TRACE (1<<TIF_SYSCALL_TRACE)
#define _TIF_NOTIFY_RESUME (1<<TIF_NOTIFY_RESUME)
@@ -133,6 +135,8 @@ static inline struct thread_info *stack_
#define _TIF_IA32 (1<<TIF_IA32)
#define _TIF_FORK (1<<TIF_FORK)
#define _TIF_ABI_PENDING (1<<TIF_ABI_PENDING)
+#define _TIF_DEBUG (1<<TIF_DEBUG)
+#define _TIF_IO_BITMAP (1<<TIF_IO_BITMAP)
/* work to do on interrupt/exception return */
#define _TIF_WORK_MASK \
@@ -140,6 +144,9 @@ static inline struct thread_info *stack_
/* work to do on any return to user space */
#define _TIF_ALLWORK_MASK (0x0000FFFF & ~_TIF_SECCOMP)
+/* flags to check in __switch_to() */
+#define _TIF_WORK_CTXSW (_TIF_DEBUG|_TIF_IO_BITMAP)
+
#define PREEMPT_ACTIVE 0x10000000
/*
^ permalink raw reply [flat|nested] 199+ messages in thread* [PATCH for review] [31/145] x86_64: Don't print virtual address in HPET initialization
[not found] <20060810 935.775038000@suse.de>
` (29 preceding siblings ...)
2006-08-10 19:35 ` [PATCH for review] [30/145] x86_64: x86-64 TIF flags for debug regs and io bitmap in ctxsw Andi Kleen
@ 2006-08-10 19:35 ` Andi Kleen
2006-08-11 16:50 ` Clemens Ladisch
2006-08-10 19:35 ` [PATCH for review] [32/145] x86_64: A few trivial spelling and grammar fixes Andi Kleen
` (114 subsequent siblings)
145 siblings, 1 reply; 199+ messages in thread
From: Andi Kleen @ 2006-08-10 19:35 UTC (permalink / raw)
r
virtual addresses don't belong into kernel logs for non debugging
Cc: clemens@ladisch.de
Signed-off-by: Andi Kleen <ak@suse.de>
---
drivers/char/hpet.c | 4 ++--
1 files changed, 2 insertions(+), 2 deletions(-)
Index: linux/drivers/char/hpet.c
===================================================================
--- linux.orig/drivers/char/hpet.c
+++ linux/drivers/char/hpet.c
@@ -868,8 +868,8 @@ int hpet_alloc(struct hpet_data *hdp)
do_div(temp, period);
hpetp->hp_tick_freq = temp; /* ticks per second */
- printk(KERN_INFO "hpet%d: at MMIO 0x%lx (virtual 0x%p), IRQ%s",
- hpetp->hp_which, hdp->hd_phys_address, hdp->hd_address,
+ printk(KERN_INFO "hpet%d: at MMIO 0x%lx, IRQ%s",
+ hpetp->hp_which, hdp->hd_phys_address,
hpetp->hp_ntimer > 1 ? "s" : "");
for (i = 0; i < hpetp->hp_ntimer; i++)
printk("%s %d", i > 0 ? "," : "", hdp->hd_irq[i]);
^ permalink raw reply [flat|nested] 199+ messages in thread* [PATCH for review] [32/145] x86_64: A few trivial spelling and grammar fixes
[not found] <20060810 935.775038000@suse.de>
` (30 preceding siblings ...)
2006-08-10 19:35 ` [PATCH for review] [31/145] x86_64: Don't print virtual address in HPET initialization Andi Kleen
@ 2006-08-10 19:35 ` Andi Kleen
2006-08-10 19:35 ` [PATCH for review] [33/145] i386/x86-64: Don't randomize stack top when no randomization personality is set Andi Kleen
` (113 subsequent siblings)
145 siblings, 0 replies; 199+ messages in thread
From: Andi Kleen @ 2006-08-10 19:35 UTC (permalink / raw)
r
From: "Adam Henley" <adamazing@gmail.com>
A few trivial spelling and grammar mistakes picked up in
"arch/x86_64/aperture.c", "arch/x86_64/crash.c" and
"arch/x86_64/apic.c". I think all are correct fixes but am ever aware
of my fallibility :o) This is my first patch submission so all
feedback is appreciated, esp. WRT CCing to Linus, Andi and
trivial@kernel.org, is this correct? And which is the most appropriate
kernel version to diff against? If any.
Should apply cleanly to 2.6.18-rc1
Signed-off-by: Adam Henley <adamazing@gmail.com>
Signed-off-by: Andi Kleen <ak@suse.de>
- adam
---
arch/x86_64/kernel/aperture.c | 2 +-
arch/x86_64/kernel/apic.c | 4 ++--
arch/x86_64/kernel/crash.c | 2 +-
3 files changed, 4 insertions(+), 4 deletions(-)
Index: linux/arch/x86_64/kernel/aperture.c
===================================================================
--- linux.orig/arch/x86_64/kernel/aperture.c
+++ linux/arch/x86_64/kernel/aperture.c
@@ -48,7 +48,7 @@ static u32 __init allocate_aperture(void
/*
* Aperture has to be naturally aligned. This means an 2GB aperture won't
- * have much chances to find a place in the lower 4GB of memory.
+ * have much chance of finding a place in the lower 4GB of memory.
* Unfortunately we cannot move it up because that would make the
* IOMMU useless.
*/
Index: linux/arch/x86_64/kernel/apic.c
===================================================================
--- linux.orig/arch/x86_64/kernel/apic.c
+++ linux/arch/x86_64/kernel/apic.c
@@ -400,7 +400,7 @@ void __cpuinit setup_local_APIC (void)
value |= APIC_SPIV_APIC_ENABLED;
/*
- * Some unknown Intel IO/APIC (or APIC) errata is biting us with
+ * Some unknown Intel IO/APIC (or APIC) errata are biting us with
* certain networking cards. If high frequency interrupts are
* happening on a particular IOAPIC pin, plus the IOAPIC routing
* entry is masked/unmasked at a high rate as well then sooner or
@@ -950,7 +950,7 @@ void smp_local_timer_interrupt(struct pt
* We take the 'long' return path, and there every subsystem
* grabs the appropriate locks (kernel lock/ irq lock).
*
- * we might want to decouple profiling from the 'long path',
+ * We might want to decouple profiling from the 'long path',
* and do the profiling totally in assembly.
*
* Currently this isn't too much of an issue (performance wise),
Index: linux/arch/x86_64/kernel/crash.c
===================================================================
--- linux.orig/arch/x86_64/kernel/crash.c
+++ linux/arch/x86_64/kernel/crash.c
@@ -69,7 +69,7 @@ static void crash_save_this_cpu(struct p
* for the data I pass, and I need tags
* on the data to indicate what information I have
* squirrelled away. ELF notes happen to provide
- * all of that that no need to invent something new.
+ * all of that, no need to invent something new.
*/
buf = (u32*)per_cpu_ptr(crash_notes, cpu);
^ permalink raw reply [flat|nested] 199+ messages in thread* [PATCH for review] [33/145] i386/x86-64: Don't randomize stack top when no randomization personality is set
[not found] <20060810 935.775038000@suse.de>
` (31 preceding siblings ...)
2006-08-10 19:35 ` [PATCH for review] [32/145] x86_64: A few trivial spelling and grammar fixes Andi Kleen
@ 2006-08-10 19:35 ` Andi Kleen
2006-08-10 19:35 ` [PATCH for review] [34/145] i386: Account spinlocks to the caller during profiling for !FP kernels Andi Kleen
` (112 subsequent siblings)
145 siblings, 0 replies; 199+ messages in thread
From: Andi Kleen @ 2006-08-10 19:35 UTC (permalink / raw)
r
Based on patch from Frank van Maarseveen <frankvm@frankvm.com>, but
extended.
Signed-off-by: Andi Kleen <ak@suse.de>
---
arch/i386/kernel/process.c | 3 ++-
arch/x86_64/kernel/process.c | 2 +-
fs/binfmt_elf.c | 3 ++-
3 files changed, 5 insertions(+), 3 deletions(-)
Index: linux/arch/x86_64/kernel/process.c
===================================================================
--- linux.orig/arch/x86_64/kernel/process.c
+++ linux/arch/x86_64/kernel/process.c
@@ -845,7 +845,7 @@ int dump_task_regs(struct task_struct *t
unsigned long arch_align_stack(unsigned long sp)
{
- if (randomize_va_space)
+ if (!(current->personality & ADDR_NO_RANDOMIZE) && randomize_va_space)
sp -= get_random_int() % 8192;
return sp & ~0xf;
}
Index: linux/arch/i386/kernel/process.c
===================================================================
--- linux.orig/arch/i386/kernel/process.c
+++ linux/arch/i386/kernel/process.c
@@ -37,6 +37,7 @@
#include <linux/kallsyms.h>
#include <linux/ptrace.h>
#include <linux/random.h>
+#include <linux/personality.h>
#include <asm/uaccess.h>
#include <asm/pgtable.h>
@@ -905,7 +906,7 @@ asmlinkage int sys_get_thread_area(struc
unsigned long arch_align_stack(unsigned long sp)
{
- if (randomize_va_space)
+ if (!(current->personality & ADDR_NO_RANDOMIZE) && randomize_va_space)
sp -= get_random_int() % 8192;
return sp & ~0xf;
}
Index: linux/fs/binfmt_elf.c
===================================================================
--- linux.orig/fs/binfmt_elf.c
+++ linux/fs/binfmt_elf.c
@@ -515,7 +515,8 @@ static unsigned long randomize_stack_top
{
unsigned int random_variable = 0;
- if (current->flags & PF_RANDOMIZE) {
+ if ((current->flags & PF_RANDOMIZE) &&
+ !(current->personality & ADDR_NO_RANDOMIZE)) {
random_variable = get_random_int() & STACK_RND_MASK;
random_variable <<= PAGE_SHIFT;
}
^ permalink raw reply [flat|nested] 199+ messages in thread* [PATCH for review] [34/145] i386: Account spinlocks to the caller during profiling for !FP kernels
[not found] <20060810 935.775038000@suse.de>
` (32 preceding siblings ...)
2006-08-10 19:35 ` [PATCH for review] [33/145] i386/x86-64: Don't randomize stack top when no randomization personality is set Andi Kleen
@ 2006-08-10 19:35 ` Andi Kleen
2006-08-10 19:35 ` [PATCH for review] [35/145] x86_64: Simplify profile_pc on x86-64 Andi Kleen
` (111 subsequent siblings)
145 siblings, 0 replies; 199+ messages in thread
From: Andi Kleen @ 2006-08-10 19:35 UTC (permalink / raw)
r
This ports the algorithm from x86-64 (with improvements) to i386.
Previously this only worked for frame pointer enabled kernels.
But spinlocks have a very simple stack frame that can be manually
analyzed. Do this.
Signed-off-by: Andi Kleen <ak@suse.de>
---
arch/i386/kernel/time.c | 23 +++++++++++++++++++----
include/asm-i386/ptrace.h | 4 ----
kernel/spinlock.c | 5 +++++
3 files changed, 24 insertions(+), 8 deletions(-)
Index: linux/arch/i386/kernel/time.c
===================================================================
--- linux.orig/arch/i386/kernel/time.c
+++ linux/arch/i386/kernel/time.c
@@ -130,18 +130,33 @@ static int set_rtc_mmss(unsigned long no
int timer_ack;
-#if defined(CONFIG_SMP) && defined(CONFIG_FRAME_POINTER)
unsigned long profile_pc(struct pt_regs *regs)
{
unsigned long pc = instruction_pointer(regs);
- if (!user_mode_vm(regs) && in_lock_functions(pc))
+#ifdef CONFIG_SMP
+ if (!user_mode_vm(regs) && in_lock_functions(pc)) {
+#ifdef CONFIG_FRAME_POINTER
return *(unsigned long *)(regs->ebp + 4);
-
+#else
+ unsigned long *sp;
+ if ((regs->xcs & 3) == 0)
+ sp = (unsigned long *)®s->esp;
+ else
+ sp = (unsigned long *)regs->esp;
+ /* Return address is either directly at stack pointer
+ or above a saved eflags. Eflags has bits 22-31 zero,
+ kernel addresses don't. */
+ if (sp[0] >> 22)
+ return sp[0];
+ if (sp[1] >> 22)
+ return sp[1];
+#endif
+ }
+#endif
return pc;
}
EXPORT_SYMBOL(profile_pc);
-#endif
/*
* This is the same as the above, except we _also_ save the current
Index: linux/include/asm-i386/ptrace.h
===================================================================
--- linux.orig/include/asm-i386/ptrace.h
+++ linux/include/asm-i386/ptrace.h
@@ -80,11 +80,7 @@ static inline int user_mode_vm(struct pt
return ((regs->xcs & 3) | (regs->eflags & VM_MASK)) != 0;
}
#define instruction_pointer(regs) ((regs)->eip)
-#if defined(CONFIG_SMP) && defined(CONFIG_FRAME_POINTER)
extern unsigned long profile_pc(struct pt_regs *regs);
-#else
-#define profile_pc(regs) instruction_pointer(regs)
-#endif
#endif /* __KERNEL__ */
#endif
Index: linux/kernel/spinlock.c
===================================================================
--- linux.orig/kernel/spinlock.c
+++ linux/kernel/spinlock.c
@@ -7,6 +7,11 @@
*
* This file contains the spinlock/rwlock implementations for the
* SMP and the DEBUG_SPINLOCK cases. (UP-nondebug inlines them)
+ *
+ * Note that some architectures have special knowledge about the
+ * stack frames of these functions in their profile_pc. If you
+ * change anything significant here that could change the stack
+ * frame contact the architecture maintainers.
*/
#include <linux/linkage.h>
^ permalink raw reply [flat|nested] 199+ messages in thread* [PATCH for review] [35/145] x86_64: Simplify profile_pc on x86-64
[not found] <20060810 935.775038000@suse.de>
` (33 preceding siblings ...)
2006-08-10 19:35 ` [PATCH for review] [34/145] i386: Account spinlocks to the caller during profiling for !FP kernels Andi Kleen
@ 2006-08-10 19:35 ` Andi Kleen
2006-08-10 19:35 ` [PATCH for review] [36/145] x86_64: Document backtracer selection options Andi Kleen
` (110 subsequent siblings)
145 siblings, 0 replies; 199+ messages in thread
From: Andi Kleen @ 2006-08-10 19:35 UTC (permalink / raw)
r
Use knowledge about EFLAGS layout (bits 22:63 are always 0) to distingush
EFLAGS word and kernel address in the spin lock stack frame.
Signed-off-by: Andi Kleen <ak@suse.de>
---
arch/x86_64/kernel/time.c | 21 ++++++++-------------
1 files changed, 8 insertions(+), 13 deletions(-)
Index: linux/arch/x86_64/kernel/time.c
===================================================================
--- linux.orig/arch/x86_64/kernel/time.c
+++ linux/arch/x86_64/kernel/time.c
@@ -189,20 +189,15 @@ unsigned long profile_pc(struct pt_regs
{
unsigned long pc = instruction_pointer(regs);
- /* Assume the lock function has either no stack frame or only a single
- word. This checks if the address on the stack looks like a kernel
- text address.
- There is a small window for false hits, but in that case the tick
- is just accounted to the spinlock function.
- Better would be to write these functions in assembler again
- and check exactly. */
+ /* Assume the lock function has either no stack frame or a copy
+ of eflags from PUSHF
+ Eflags always has bits 22 and up cleared unlike kernel addresses. */
if (!user_mode(regs) && in_lock_functions(pc)) {
- char *v = *(char **)regs->rsp;
- if ((v >= _stext && v <= _etext) ||
- (v >= _sinittext && v <= _einittext) ||
- (v >= (char *)MODULES_VADDR && v <= (char *)MODULES_END))
- return (unsigned long)v;
- return ((unsigned long *)regs->rsp)[1];
+ unsigned long *sp = (unsigned long *)regs->rsp;
+ if (sp[0] >> 22)
+ return sp[0];
+ if (sp[1] >> 22)
+ return sp[1];
}
return pc;
}
^ permalink raw reply [flat|nested] 199+ messages in thread* [PATCH for review] [36/145] x86_64: Document backtracer selection options
[not found] <20060810 935.775038000@suse.de>
` (34 preceding siblings ...)
2006-08-10 19:35 ` [PATCH for review] [35/145] x86_64: Simplify profile_pc on x86-64 Andi Kleen
@ 2006-08-10 19:35 ` Andi Kleen
2006-08-10 19:35 ` [PATCH for review] [37/145] x86_64: Support patchable lock prefix for pure assembly files Andi Kleen
` (109 subsequent siblings)
145 siblings, 0 replies; 199+ messages in thread
From: Andi Kleen @ 2006-08-10 19:35 UTC (permalink / raw)
r
Signed-off-by: Andi Kleen <ak@suse.de>
---
Documentation/x86_64/boot-options.txt | 7 +++++++
1 files changed, 7 insertions(+)
Index: linux/Documentation/x86_64/boot-options.txt
===================================================================
--- linux.orig/Documentation/x86_64/boot-options.txt
+++ linux/Documentation/x86_64/boot-options.txt
@@ -245,6 +245,13 @@ Debugging
newfallback: use new unwinder but fall back to old if it gets
stuck (default)
+ call_trace=[old|both|newfallback|new]
+ old: use old inexact backtracer
+ new: use new exact dwarf2 unwinder
+ both: print entries from both
+ newfallback: use new unwinder but fall back to old if it gets
+ stuck (default)
+
Misc
noreplacement Don't replace instructions with more appropriate ones
^ permalink raw reply [flat|nested] 199+ messages in thread* [PATCH for review] [37/145] x86_64: Support patchable lock prefix for pure assembly files
[not found] <20060810 935.775038000@suse.de>
` (35 preceding siblings ...)
2006-08-10 19:35 ` [PATCH for review] [36/145] x86_64: Document backtracer selection options Andi Kleen
@ 2006-08-10 19:35 ` Andi Kleen
2006-08-10 19:35 ` [PATCH for review] [38/145] x86_64: Switch rwlocks over to patchable lock prefix Andi Kleen
` (108 subsequent siblings)
145 siblings, 0 replies; 199+ messages in thread
From: Andi Kleen @ 2006-08-10 19:35 UTC (permalink / raw)
r
Signed-off-by: Andi Kleen <ak@suse.de>
---
include/asm-x86_64/alternative-asm.i | 14 ++++++++++++++
1 files changed, 14 insertions(+)
Index: linux/include/asm-x86_64/alternative-asm.i
===================================================================
--- /dev/null
+++ linux/include/asm-x86_64/alternative-asm.i
@@ -0,0 +1,14 @@
+#include <linux/config.h>
+
+#ifdef CONFIG_SMP
+ .macro LOCK_PREFIX
+1: lock
+ .section .smp_locks,"a"
+ .align 8
+ .quad 1b
+ .previous
+ .endm
+#else
+ .macro LOCK_PREFIX
+ .endm
+#endif
^ permalink raw reply [flat|nested] 199+ messages in thread* [PATCH for review] [38/145] x86_64: Switch rwlocks over to patchable lock prefix
[not found] <20060810 935.775038000@suse.de>
` (36 preceding siblings ...)
2006-08-10 19:35 ` [PATCH for review] [37/145] x86_64: Support patchable lock prefix for pure assembly files Andi Kleen
@ 2006-08-10 19:35 ` Andi Kleen
2006-08-10 19:35 ` [PATCH for review] [39/145] x86_64: Clean up read write lock assembly Andi Kleen
` (107 subsequent siblings)
145 siblings, 0 replies; 199+ messages in thread
From: Andi Kleen @ 2006-08-10 19:35 UTC (permalink / raw)
r
This way their lock prefix can be patched away on UP
Signed-off-by: Andi Kleen <ak@suse.de>
---
include/asm-x86_64/spinlock.h | 6 +++---
1 files changed, 3 insertions(+), 3 deletions(-)
Index: linux/include/asm-x86_64/spinlock.h
===================================================================
--- linux.orig/include/asm-x86_64/spinlock.h
+++ linux/include/asm-x86_64/spinlock.h
@@ -125,13 +125,13 @@ static inline int __raw_write_trylock(ra
static inline void __raw_read_unlock(raw_rwlock_t *rw)
{
- asm volatile("lock ; incl %0" :"=m" (rw->lock) : : "memory");
+ asm volatile(LOCK_PREFIX "incl %0" :"=m" (rw->lock) : : "memory");
}
static inline void __raw_write_unlock(raw_rwlock_t *rw)
{
- asm volatile("lock ; addl $" RW_LOCK_BIAS_STR ",%0"
- : "=m" (rw->lock) : : "memory");
+ asm volatile(LOCK_PREFIX "addl %1,%0"
+ : "=m" (rw->lock) : "i" (RW_LOCK_BIAS) : "memory");
}
#endif /* __ASM_SPINLOCK_H */
^ permalink raw reply [flat|nested] 199+ messages in thread* [PATCH for review] [39/145] x86_64: Clean up read write lock assembly
[not found] <20060810 935.775038000@suse.de>
` (37 preceding siblings ...)
2006-08-10 19:35 ` [PATCH for review] [38/145] x86_64: Switch rwlocks over to patchable lock prefix Andi Kleen
@ 2006-08-10 19:35 ` Andi Kleen
2006-08-10 19:35 ` [PATCH for review] [40/145] i386: Remove const case for rwlocks Andi Kleen
` (106 subsequent siblings)
145 siblings, 0 replies; 199+ messages in thread
From: Andi Kleen @ 2006-08-10 19:35 UTC (permalink / raw)
r
- Move the slow path fallbacks to their own assembly files
This makes them much easier to read and is needed for the next change.
- Add CFI annotations for unwinding (XXX need review)
- Remove constant case which can never happen with out of line spinlocks
- Use patchable LOCK prefixes
- Don't use lock sections anymore for inline code because they can't
be expressed by the unwinder (this adds one taken jump to the lock
fast path)
Cc: jbeulich@novell.com
Signed-off-by: Andi Kleen <ak@suse.de>
---
arch/x86_64/lib/Makefile | 2 -
arch/x86_64/lib/rwlock.S | 38 +++++++++++++++++++++++
arch/x86_64/lib/thunk.S | 30 ------------------
include/asm-x86_64/rwlock.h | 67 +++++-------------------------------------
include/asm-x86_64/spinlock.h | 11 +-----
5 files changed, 50 insertions(+), 98 deletions(-)
Index: linux/arch/x86_64/lib/rwlock.S
===================================================================
--- /dev/null
+++ linux/arch/x86_64/lib/rwlock.S
@@ -0,0 +1,38 @@
+/* Slow paths of read/write spinlocks. */
+
+#include <linux/linkage.h>
+#include <asm/rwlock.h>
+#include <asm/alternative-asm.i>
+#include <asm/dwarf2.h>
+
+/* rdi: pointer to rwlock_t */
+ENTRY(__write_lock_failed)
+ CFI_STARTPROC
+ LOCK_PREFIX
+ addl $RW_LOCK_BIAS,(%rdi)
+1: rep
+ nop
+ cmpl $RW_LOCK_BIAS,(%rdi)
+ jne 1b
+ LOCK_PREFIX
+ subl $RW_LOCK_BIAS,(%rdi)
+ jnz __write_lock_failed
+ ret
+ CFI_ENDPROC
+END(__write_lock_failed)
+
+/* rdi: pointer to rwlock_t */
+ENTRY(__read_lock_failed)
+ CFI_STARTPROC
+ LOCK_PREFIX
+ incl (%rdi)
+1: rep
+ nop
+ cmpl $1,(%rdi)
+ js 1b
+ LOCK_PREFIX
+ decl (%rdi)
+ js __read_lock_failed
+ ret
+ CFI_ENDPROC
+END(__read_lock_failed)
Index: linux/arch/x86_64/lib/thunk.S
===================================================================
--- linux.orig/arch/x86_64/lib/thunk.S
+++ linux/arch/x86_64/lib/thunk.S
@@ -67,33 +67,3 @@ restore_norax:
RESTORE_ARGS 1
ret
CFI_ENDPROC
-
-#ifdef CONFIG_SMP
-/* Support for read/write spinlocks. */
- .text
-/* rax: pointer to rwlock_t */
-ENTRY(__write_lock_failed)
- lock
- addl $RW_LOCK_BIAS,(%rax)
-1: rep
- nop
- cmpl $RW_LOCK_BIAS,(%rax)
- jne 1b
- lock
- subl $RW_LOCK_BIAS,(%rax)
- jnz __write_lock_failed
- ret
-
-/* rax: pointer to rwlock_t */
-ENTRY(__read_lock_failed)
- lock
- incl (%rax)
-1: rep
- nop
- cmpl $1,(%rax)
- js 1b
- lock
- decl (%rax)
- js __read_lock_failed
- ret
-#endif
Index: linux/include/asm-x86_64/rwlock.h
===================================================================
--- linux.orig/include/asm-x86_64/rwlock.h
+++ linux/include/asm-x86_64/rwlock.h
@@ -18,69 +18,20 @@
#ifndef _ASM_X86_64_RWLOCK_H
#define _ASM_X86_64_RWLOCK_H
-#include <linux/stringify.h>
-
#define RW_LOCK_BIAS 0x01000000
-#define RW_LOCK_BIAS_STR "0x01000000"
-#define __build_read_lock_ptr(rw, helper) \
+#define __build_read_lock(rw) \
asm volatile(LOCK_PREFIX "subl $1,(%0)\n\t" \
- "js 2f\n" \
- "1:\n" \
- LOCK_SECTION_START("") \
- "2:\tcall " helper "\n\t" \
- "jmp 1b\n" \
- LOCK_SECTION_END \
- ::"a" (rw) : "memory")
-
-#define __build_read_lock_const(rw, helper) \
- asm volatile(LOCK_PREFIX "subl $1,%0\n\t" \
- "js 2f\n" \
+ "jns 1f\n" \
+ "call __read_lock_failed\n" \
"1:\n" \
- LOCK_SECTION_START("") \
- "2:\tpushq %%rax\n\t" \
- "leaq %0,%%rax\n\t" \
- "call " helper "\n\t" \
- "popq %%rax\n\t" \
- "jmp 1b\n" \
- LOCK_SECTION_END \
- :"=m" (*((volatile int *)rw))::"memory")
-
-#define __build_read_lock(rw, helper) do { \
- if (__builtin_constant_p(rw)) \
- __build_read_lock_const(rw, helper); \
- else \
- __build_read_lock_ptr(rw, helper); \
- } while (0)
+ ::"D" (rw), "i" (RW_LOCK_BIAS) : "memory")
-#define __build_write_lock_ptr(rw, helper) \
- asm volatile(LOCK_PREFIX "subl $" RW_LOCK_BIAS_STR ",(%0)\n\t" \
- "jnz 2f\n" \
+#define __build_write_lock(rw) \
+ asm volatile(LOCK_PREFIX "subl %1,(%0)\n\t" \
+ "jz 1f\n" \
+ "\tcall __write_lock_failed\n\t" \
"1:\n" \
- LOCK_SECTION_START("") \
- "2:\tcall " helper "\n\t" \
- "jmp 1b\n" \
- LOCK_SECTION_END \
- ::"a" (rw) : "memory")
-
-#define __build_write_lock_const(rw, helper) \
- asm volatile(LOCK_PREFIX "subl $" RW_LOCK_BIAS_STR ",%0\n\t" \
- "jnz 2f\n" \
- "1:\n" \
- LOCK_SECTION_START("") \
- "2:\tpushq %%rax\n\t" \
- "leaq %0,%%rax\n\t" \
- "call " helper "\n\t" \
- "popq %%rax\n\t" \
- "jmp 1b\n" \
- LOCK_SECTION_END \
- :"=m" (*((volatile long *)rw))::"memory")
-
-#define __build_write_lock(rw, helper) do { \
- if (__builtin_constant_p(rw)) \
- __build_write_lock_const(rw, helper); \
- else \
- __build_write_lock_ptr(rw, helper); \
- } while (0)
+ ::"D" (rw), "i" (RW_LOCK_BIAS) : "memory")
#endif
Index: linux/include/asm-x86_64/spinlock.h
===================================================================
--- linux.orig/include/asm-x86_64/spinlock.h
+++ linux/include/asm-x86_64/spinlock.h
@@ -82,13 +82,6 @@ static inline void __raw_spin_unlock(raw
*
* On x86, we implement read-write locks as a 32-bit counter
* with the high bit (sign) being the "contended" bit.
- *
- * The inline assembly is non-obvious. Think about it.
- *
- * Changed to use the same technique as rw semaphores. See
- * semaphore.h for details. -ben
- *
- * the helpers are in arch/i386/kernel/semaphore.c
*/
#define __raw_read_can_lock(x) ((int)(x)->lock > 0)
@@ -96,12 +89,12 @@ static inline void __raw_spin_unlock(raw
static inline void __raw_read_lock(raw_rwlock_t *rw)
{
- __build_read_lock(rw, "__read_lock_failed");
+ __build_read_lock(rw);
}
static inline void __raw_write_lock(raw_rwlock_t *rw)
{
- __build_write_lock(rw, "__write_lock_failed");
+ __build_write_lock(rw);
}
static inline int __raw_read_trylock(raw_rwlock_t *lock)
Index: linux/arch/x86_64/lib/Makefile
===================================================================
--- linux.orig/arch/x86_64/lib/Makefile
+++ linux/arch/x86_64/lib/Makefile
@@ -9,4 +9,4 @@ obj-y := io.o iomap_copy.o
lib-y := csum-partial.o csum-copy.o csum-wrappers.o delay.o \
usercopy.o getuser.o putuser.o \
thunk.o clear_page.o copy_page.o bitstr.o bitops.o
-lib-y += memcpy.o memmove.o memset.o copy_user.o
+lib-y += memcpy.o memmove.o memset.o copy_user.o rwlock.o
^ permalink raw reply [flat|nested] 199+ messages in thread* [PATCH for review] [40/145] i386: Remove const case for rwlocks
[not found] <20060810 935.775038000@suse.de>
` (38 preceding siblings ...)
2006-08-10 19:35 ` [PATCH for review] [39/145] x86_64: Clean up read write lock assembly Andi Kleen
@ 2006-08-10 19:35 ` Andi Kleen
2006-08-10 19:35 ` [PATCH for review] [41/145] x86_64: Add proper alignment to ENTRY Andi Kleen
` (105 subsequent siblings)
145 siblings, 0 replies; 199+ messages in thread
From: Andi Kleen @ 2006-08-10 19:35 UTC (permalink / raw)
r
rwlocks are now out of line, so it near never triggers. Also it was
incompatible with the new dwarf2 unwinder because it had unannotiatable
push/pops.
Signed-off-by: Andi Kleen <ak@suse.de>
---
include/asm-i386/rwlock.h | 40 ++--------------------------------------
1 files changed, 2 insertions(+), 38 deletions(-)
Index: linux/include/asm-i386/rwlock.h
===================================================================
--- linux.orig/include/asm-i386/rwlock.h
+++ linux/include/asm-i386/rwlock.h
@@ -20,7 +20,7 @@
#define RW_LOCK_BIAS 0x01000000
#define RW_LOCK_BIAS_STR "0x01000000"
-#define __build_read_lock_ptr(rw, helper) \
+#define __build_read_lock(rw, helper) \
alternative_smp("lock; subl $1,(%0)\n\t" \
"jns 1f\n" \
"call " helper "\n\t" \
@@ -28,25 +28,7 @@
"subl $1,(%0)\n\t", \
:"a" (rw) : "memory")
-#define __build_read_lock_const(rw, helper) \
- alternative_smp("lock; subl $1,%0\n\t" \
- "jns 1f\n" \
- "pushl %%eax\n\t" \
- "leal %0,%%eax\n\t" \
- "call " helper "\n\t" \
- "popl %%eax\n\t" \
- "1:\n", \
- "subl $1,%0\n\t", \
- "+m" (*(volatile int *)rw) : : "memory")
-
-#define __build_read_lock(rw, helper) do { \
- if (__builtin_constant_p(rw)) \
- __build_read_lock_const(rw, helper); \
- else \
- __build_read_lock_ptr(rw, helper); \
- } while (0)
-
-#define __build_write_lock_ptr(rw, helper) \
+#define __build_write_lock(rw, helper) \
alternative_smp("lock; subl $" RW_LOCK_BIAS_STR ",(%0)\n\t" \
"jz 1f\n" \
"call " helper "\n\t" \
@@ -54,22 +36,4 @@
"subl $" RW_LOCK_BIAS_STR ",(%0)\n\t", \
:"a" (rw) : "memory")
-#define __build_write_lock_const(rw, helper) \
- alternative_smp("lock; subl $" RW_LOCK_BIAS_STR ",%0\n\t" \
- "jz 1f\n" \
- "pushl %%eax\n\t" \
- "leal %0,%%eax\n\t" \
- "call " helper "\n\t" \
- "popl %%eax\n\t" \
- "1:\n", \
- "subl $" RW_LOCK_BIAS_STR ",%0\n\t", \
- "+m" (*(volatile int *)rw) : : "memory")
-
-#define __build_write_lock(rw, helper) do { \
- if (__builtin_constant_p(rw)) \
- __build_write_lock_const(rw, helper); \
- else \
- __build_write_lock_ptr(rw, helper); \
- } while (0)
-
#endif
^ permalink raw reply [flat|nested] 199+ messages in thread* [PATCH for review] [41/145] x86_64: Add proper alignment to ENTRY
[not found] <20060810 935.775038000@suse.de>
` (39 preceding siblings ...)
2006-08-10 19:35 ` [PATCH for review] [40/145] i386: Remove const case for rwlocks Andi Kleen
@ 2006-08-10 19:35 ` Andi Kleen
2006-08-10 19:35 ` [PATCH for review] [42/145] i386: add alternative-asm.h to allow LOCK_PREFIX replacement in .S files Andi Kleen
` (104 subsequent siblings)
145 siblings, 0 replies; 199+ messages in thread
From: Andi Kleen @ 2006-08-10 19:35 UTC (permalink / raw)
r
Previously it didn't align. Use the same one as the C compiler
in blended mode, which is good for K8 and Core2 and doesn't hurt
on P4.
Signed-off-by: Andi Kleen <ak@suse.de>
---
arch/x86_64/kernel/entry.S | 3 +--
include/asm-x86_64/linkage.h | 2 +-
2 files changed, 2 insertions(+), 3 deletions(-)
Index: linux/arch/x86_64/kernel/entry.S
===================================================================
--- linux.orig/arch/x86_64/kernel/entry.S
+++ linux/arch/x86_64/kernel/entry.S
@@ -617,8 +617,7 @@ retint_signal:
#ifdef CONFIG_PREEMPT
/* Returning to kernel space. Check if we need preemption */
/* rcx: threadinfo. interrupts off. */
- .p2align
-retint_kernel:
+ENTRY(retint_kernel)
cmpl $0,threadinfo_preempt_count(%rcx)
jnz retint_restore_args
bt $TIF_NEED_RESCHED,threadinfo_flags(%rcx)
Index: linux/include/asm-x86_64/linkage.h
===================================================================
--- linux.orig/include/asm-x86_64/linkage.h
+++ linux/include/asm-x86_64/linkage.h
@@ -1,6 +1,6 @@
#ifndef __ASM_LINKAGE_H
#define __ASM_LINKAGE_H
-/* Nothing to see here... */
+#define __ALIGN .p2align 4,,15
#endif
^ permalink raw reply [flat|nested] 199+ messages in thread* [PATCH for review] [42/145] i386: add alternative-asm.h to allow LOCK_PREFIX replacement in .S files
[not found] <20060810 935.775038000@suse.de>
` (40 preceding siblings ...)
2006-08-10 19:35 ` [PATCH for review] [41/145] x86_64: Add proper alignment to ENTRY Andi Kleen
@ 2006-08-10 19:35 ` Andi Kleen
2006-08-10 19:35 ` [PATCH for review] [43/145] i386: Redo semaphore and rwlock assembly helpers Andi Kleen
` (103 subsequent siblings)
145 siblings, 0 replies; 199+ messages in thread
From: Andi Kleen @ 2006-08-10 19:35 UTC (permalink / raw)
r
LOCK_PREFIX is replaced by nops on UP systems, so it has to be a special
macro. Previously this was only possible from C. Allow it for pure
assembly files too. Similar to earlier x86-64 patch.
Signed-off-by: Andi Kleen <ak@suse.de>
---
include/asm-i386/alternative-asm.i | 14 ++++++++++++++
1 files changed, 14 insertions(+)
Index: linux/include/asm-i386/alternative-asm.i
===================================================================
--- /dev/null
+++ linux/include/asm-i386/alternative-asm.i
@@ -0,0 +1,14 @@
+#include <linux/config.h>
+
+#ifdef CONFIG_SMP
+ .macro LOCK_PREFIX
+1: lock
+ .section .smp_locks,"a"
+ .align 4
+ .long 1b
+ .previous
+ .endm
+#else
+ .macro LOCK_PREFIX
+ .endm
+#endif
^ permalink raw reply [flat|nested] 199+ messages in thread* [PATCH for review] [43/145] i386: Redo semaphore and rwlock assembly helpers
[not found] <20060810 935.775038000@suse.de>
` (41 preceding siblings ...)
2006-08-10 19:35 ` [PATCH for review] [42/145] i386: add alternative-asm.h to allow LOCK_PREFIX replacement in .S files Andi Kleen
@ 2006-08-10 19:35 ` Andi Kleen
2006-08-13 0:53 ` Andrew Morton
2006-08-10 19:35 ` [PATCH for review] [44/145] x86_64: Remove leftover CVS Id in thunk.S Andi Kleen
` (102 subsequent siblings)
145 siblings, 1 reply; 199+ messages in thread
From: Andi Kleen @ 2006-08-10 19:35 UTC (permalink / raw)
r
- Move them to a pure assembly file. Previously they were in
a C file that only consisted of inline assembly. Doing it in pure
assembler is much nicer.
- Add a frame.i include with FRAME/ENDFRAME macros to easily
add frame pointers to assembly functions
- Add dwarf2 annotation to them so that the new dwarf2 unwinder
doesn't get stuck on them
[TBD: needs review from someone who knows more about CFA than me, e.g. Jan]
- Random cleanups
Cc: jbeulich@novell.com
Signed-off-by: Andi Kleen <ak@suse.de>
---
arch/i386/kernel/Makefile | 2
arch/i386/kernel/semaphore.c | 134 -------------------------------------------
arch/i386/lib/Makefile | 2
arch/i386/lib/semaphore.S | 132 ++++++++++++++++++++++++++++++++++++++++++
include/asm-i386/frame.i | 19 ++++++
5 files changed, 153 insertions(+), 136 deletions(-)
Index: linux/arch/i386/kernel/Makefile
===================================================================
--- linux.orig/arch/i386/kernel/Makefile
+++ linux/arch/i386/kernel/Makefile
@@ -4,7 +4,7 @@
extra-y := head.o init_task.o vmlinux.lds
-obj-y := process.o semaphore.o signal.o entry.o traps.o irq.o \
+obj-y := process.o signal.o entry.o traps.o irq.o \
ptrace.o time.o ioport.o ldt.o setup.o i8259.o sys_i386.o \
pci-dma.o i386_ksyms.o i387.o bootflag.o \
quirks.o i8237.o topology.o alternative.o i8253.o tsc.o
Index: linux/arch/i386/kernel/semaphore.c
===================================================================
--- linux.orig/arch/i386/kernel/semaphore.c
+++ /dev/null
@@ -1,134 +0,0 @@
-/*
- * i386 semaphore implementation.
- *
- * (C) Copyright 1999 Linus Torvalds
- *
- * Portions Copyright 1999 Red Hat, Inc.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
- *
- * rw semaphores implemented November 1999 by Benjamin LaHaise <bcrl@kvack.org>
- */
-#include <asm/semaphore.h>
-
-/*
- * The semaphore operations have a special calling sequence that
- * allow us to do a simpler in-line version of them. These routines
- * need to convert that sequence back into the C sequence when
- * there is contention on the semaphore.
- *
- * %eax contains the semaphore pointer on entry. Save the C-clobbered
- * registers (%eax, %edx and %ecx) except %eax whish is either a return
- * value or just clobbered..
- */
-asm(
-".section .sched.text\n"
-".align 4\n"
-".globl __down_failed\n"
-"__down_failed:\n\t"
-#if defined(CONFIG_FRAME_POINTER)
- "pushl %ebp\n\t"
- "movl %esp,%ebp\n\t"
-#endif
- "pushl %edx\n\t"
- "pushl %ecx\n\t"
- "call __down\n\t"
- "popl %ecx\n\t"
- "popl %edx\n\t"
-#if defined(CONFIG_FRAME_POINTER)
- "movl %ebp,%esp\n\t"
- "popl %ebp\n\t"
-#endif
- "ret"
-);
-
-asm(
-".section .sched.text\n"
-".align 4\n"
-".globl __down_failed_interruptible\n"
-"__down_failed_interruptible:\n\t"
-#if defined(CONFIG_FRAME_POINTER)
- "pushl %ebp\n\t"
- "movl %esp,%ebp\n\t"
-#endif
- "pushl %edx\n\t"
- "pushl %ecx\n\t"
- "call __down_interruptible\n\t"
- "popl %ecx\n\t"
- "popl %edx\n\t"
-#if defined(CONFIG_FRAME_POINTER)
- "movl %ebp,%esp\n\t"
- "popl %ebp\n\t"
-#endif
- "ret"
-);
-
-asm(
-".section .sched.text\n"
-".align 4\n"
-".globl __down_failed_trylock\n"
-"__down_failed_trylock:\n\t"
-#if defined(CONFIG_FRAME_POINTER)
- "pushl %ebp\n\t"
- "movl %esp,%ebp\n\t"
-#endif
- "pushl %edx\n\t"
- "pushl %ecx\n\t"
- "call __down_trylock\n\t"
- "popl %ecx\n\t"
- "popl %edx\n\t"
-#if defined(CONFIG_FRAME_POINTER)
- "movl %ebp,%esp\n\t"
- "popl %ebp\n\t"
-#endif
- "ret"
-);
-
-asm(
-".section .sched.text\n"
-".align 4\n"
-".globl __up_wakeup\n"
-"__up_wakeup:\n\t"
- "pushl %edx\n\t"
- "pushl %ecx\n\t"
- "call __up\n\t"
- "popl %ecx\n\t"
- "popl %edx\n\t"
- "ret"
-);
-
-/*
- * rw spinlock fallbacks
- */
-#if defined(CONFIG_SMP)
-asm(
-".section .sched.text\n"
-".align 4\n"
-".globl __write_lock_failed\n"
-"__write_lock_failed:\n\t"
- LOCK_PREFIX "addl $" RW_LOCK_BIAS_STR ",(%eax)\n"
-"1: rep; nop\n\t"
- "cmpl $" RW_LOCK_BIAS_STR ",(%eax)\n\t"
- "jne 1b\n\t"
- LOCK_PREFIX "subl $" RW_LOCK_BIAS_STR ",(%eax)\n\t"
- "jnz __write_lock_failed\n\t"
- "ret"
-);
-
-asm(
-".section .sched.text\n"
-".align 4\n"
-".globl __read_lock_failed\n"
-"__read_lock_failed:\n\t"
- LOCK_PREFIX "incl (%eax)\n"
-"1: rep; nop\n\t"
- "cmpl $1,(%eax)\n\t"
- "js 1b\n\t"
- LOCK_PREFIX "decl (%eax)\n\t"
- "js __read_lock_failed\n\t"
- "ret"
-);
-#endif
Index: linux/arch/i386/lib/semaphore.S
===================================================================
--- /dev/null
+++ linux/arch/i386/lib/semaphore.S
@@ -0,0 +1,132 @@
+/*
+ * i386 semaphore implementation.
+ *
+ * (C) Copyright 1999 Linus Torvalds
+ *
+ * Portions Copyright 1999 Red Hat, Inc.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ * rw semaphores implemented November 1999 by Benjamin LaHaise <bcrl@kvack.org>
+ */
+
+#include <linux/config.h>
+#include <linux/linkage.h>
+#include <asm/rwlock.h>
+#include <asm/alternative-asm.i>
+#include <asm/frame.i>
+#include <asm/dwarf2.h>
+
+/*
+ * The semaphore operations have a special calling sequence that
+ * allow us to do a simpler in-line version of them. These routines
+ * need to convert that sequence back into the C sequence when
+ * there is contention on the semaphore.
+ *
+ * %eax contains the semaphore pointer on entry. Save the C-clobbered
+ * registers (%eax, %edx and %ecx) except %eax whish is either a return
+ * value or just clobbered..
+ */
+ .section .sched.text
+ENTRY(__down_failed)
+ CFI_STARTPROC
+ FRAME
+ pushl %edx
+ CFI_ADJUST_CFA_OFFSET 4
+ pushl %ecx
+ CFI_ADJUST_CFA_OFFSET 4
+ call __down
+ popl %ecx
+ CFI_ADJUST_CFA_OFFSET -4
+ popl %edx
+ CFI_ADJUST_CFA_OFFSET -4
+ ENDFRAME
+ ret
+ CFI_ENDPROC
+ END(__down_failed)
+
+ENTRY(__down_failed_interruptible)
+ CFI_STARTPROC
+ FRAME
+ pushl %edx
+ CFI_ADJUST_CFA_OFFSET 4
+ pushl %ecx
+ CFI_ADJUST_CFA_OFFSET 4
+ call __down_interruptible
+ popl %ecx
+ CFI_ADJUST_CFA_OFFSET -4
+ popl %edx
+ CFI_ADJUST_CFA_OFFSET -4
+ ENDFRAME
+ ret
+ CFI_ENDPROC
+ END(__down_failed_interruptible)
+
+ENTRY(__down_failed_trylock)
+ CFI_STARTPROC
+ FRAME
+ pushl %edx
+ CFI_ADJUST_CFA_OFFSET 4
+ pushl %ecx
+ CFI_ADJUST_CFA_OFFSET 4
+ call __down_trylock
+ popl %ecx
+ CFI_ADJUST_CFA_OFFSET -4
+ popl %edx
+ CFI_ADJUST_CFA_OFFSET -4
+ ENDFRAME
+ ret
+ CFI_ENDPROC
+ END(__down_failed_trylock)
+
+ENTRY(__up_wakeup)
+ CFI_STARTPROC
+ pushl %edx
+ CFI_ADJUST_CFA_OFFSET 4
+ pushl %ecx
+ CFI_ADJUST_CFA_OFFSET 4
+ call __up
+ popl %ecx
+ CFI_ADJUST_CFA_OFFSET -4
+ popl %edx
+ CFI_ADJUST_CFA_OFFSET -4
+ ret
+ CFI_ENDPROC
+ END(__up_wakeup)
+
+/*
+ * rw spinlock fallbacks
+ */
+#ifdef CONFIG_SMP
+ENTRY(__write_lock_failed)
+ CFI_STARTPROC simple
+ LOCK_PREFIX
+ addl $ RW_LOCK_BIAS,(%eax)
+1: rep; nop
+ cmpl $ RW_LOCK_BIAS,(%eax)
+ jne 1b
+ LOCK_PREFIX
+ subl $ RW_LOCK_BIAS,(%eax)
+ jnz __write_lock_failed
+ ret
+ CFI_ENDPROC
+ END(__write_lock_failed)
+
+ENTRY(__read_lock_failed)
+ CFI_STARTPROC
+ LOCK_PREFIX
+ incl (%eax)
+1: rep; nop
+ cmpl $1,(%eax)
+ js 1b
+ LOCK_PREFIX
+ decl (%eax)
+ js __read_lock_failed
+ ret
+ CFI_ENDPROC
+ END(__read_lock_failed)
+
+#endif
Index: linux/include/asm-i386/frame.i
===================================================================
--- /dev/null
+++ linux/include/asm-i386/frame.i
@@ -0,0 +1,19 @@
+#include <linux/config.h>
+#include <asm/dwarf2.h>
+
+#ifdef CONFIG_FRAME_POINTER
+ .macro FRAME
+ pushl %ebp
+ CFI_ADJUST_CFA_OFFSET 4
+ movl %esp,%ebp
+ .endm
+ .macro ENDFRAME
+ popl %ebp
+ CFI_ADJUST_CFA_OFFSET -4
+ .endm
+#else
+ .macro FRAME
+ .endm
+ .macro ENDFRAME
+ .endm
+#endif
Index: linux/arch/i386/lib/Makefile
===================================================================
--- linux.orig/arch/i386/lib/Makefile
+++ linux/arch/i386/lib/Makefile
@@ -4,6 +4,6 @@
lib-y = checksum.o delay.o usercopy.o getuser.o putuser.o memcpy.o strstr.o \
- bitops.o
+ bitops.o semaphore.o
lib-$(CONFIG_X86_USE_3DNOW) += mmx.o
^ permalink raw reply [flat|nested] 199+ messages in thread* Re: [PATCH for review] [43/145] i386: Redo semaphore and rwlock assembly helpers
2006-08-10 19:35 ` [PATCH for review] [43/145] i386: Redo semaphore and rwlock assembly helpers Andi Kleen
@ 2006-08-13 0:53 ` Andrew Morton
2006-08-13 6:50 ` Andi Kleen
0 siblings, 1 reply; 199+ messages in thread
From: Andrew Morton @ 2006-08-13 0:53 UTC (permalink / raw)
To: Andi Kleen; +Cc: linux-kernel, Jan Beulich
On Thu, 10 Aug 2006 21:35:57 +0200 (CEST)
Andi Kleen <ak@suse.de> wrote:
> - Move them to a pure assembly file. Previously they were in
> a C file that only consisted of inline assembly. Doing it in pure
> assembler is much nicer.
> - Add a frame.i include with FRAME/ENDFRAME macros to easily
> add frame pointers to assembly functions
> - Add dwarf2 annotation to them so that the new dwarf2 unwinder
> doesn't get stuck on them
> [TBD: needs review from someone who knows more about CFA than me, e.g. Jan]
> - Random cleanups
This patch causes the below crash after some seconds of disk stresstesting.
BUG: unable to handle kernel paging request at virtual address 4bf2e411
printing eip:
e4111d00
*pde = 00000000
Oops: 0002 [#1]
SMP
last sysfs file:
Modules linked in:
CPU: 1
EIP: 0060:[<e4111d00>] Not tainted VLI
EFLAGS: 00010046 (2.6.18-rc4 #28)
EIP is at 0xe4111d00
eax: e6ff89e4 ebx: e6ff89e4 ecx: 00000000 edx: e4110000
esi: e6ff89d4 edi: e6ff88fc ebp: e4111cf4 esp: e4111cf8
ds: 007b es: 007b ss: 0068
Process pdflush (pid: 2341, ti=e4110000 task=c1d40aa0 task.ti=e4110000)
Stack: c0271d0f 00000282 e4111d10 c03a4bf2 e6ff89e4 c129a900 e4111d38 c015543c
e6ff89e4 00000000 e4111d48 e6ff89e4 e6ff89d8 c129a900 00000000 e6ff88fc
e4111d48 c014e939 c129a900 d0a8a734 e4111d84 c01752d1 c129a900 c01c588e
Call Trace:
[<c01040d9>] show_stack_log_lvl+0xa9/0xd0
[<c010430d>] show_registers+0x1bd/0x240
[<c01044cc>] die+0x13c/0x300
[<c0114ff2>] do_page_fault+0x2a2/0x5dc
[<c0103bf9>] error_code+0x39/0x40
[<c0271d0f>] _raw_write_lock+0x3f/0x80
Code: 88 ff e6 f4 1c 11 e4 e4 89 ff e6 7b 00 1c c0 7b 00 00 00 ff ff ff ff 00 1d 11 e4 60 00 00 00 46 00 01 00 0f 1d 27 c0 82 02 00 00 <10> 1d 11 e4 f2 4b 3a c0 e4 89 ff e6 00 a9 29 c1 38 1d 11 e4 3c
EIP: [<e4111d00>] 0xe4111d00 SS:ESP 0068:e4111cf8
<3>BUG: sleeping function called from invalid context at kernel/rwsem.c:20
in_atomic():0, irqs_disabled():1
[<c010414b>] show_trace+0x1b/0x20
[<c01048e4>] dump_stack+0x24/0x30
[<c0116fe6>] __might_sleep+0xa6/0xb0
[<c0137e3f>] down_read+0x1f/0x2c
[<c012db27>] blocking_notifier_call_chain+0x17/0x40
[<c01203ef>] profile_task_exit+0x1f/0x30
[<c0121efd>] do_exit+0x1d/0x940
[<c0104685>] die+0x2f5/0x300
[<c0114ff2>] do_page_fault+0x2a2/0x5dc
[<c0103bf9>] error_code+0x39/0x40
[<c0271d0f>] _raw_write_lock+0x3f/0x80
BUG: NMI Watchdog detected LOCKUP on CPU0, eip c03a47e6, registers:
Modules linked in:
^ permalink raw reply [flat|nested] 199+ messages in thread* Re: [PATCH for review] [43/145] i386: Redo semaphore and rwlock assembly helpers
2006-08-13 0:53 ` Andrew Morton
@ 2006-08-13 6:50 ` Andi Kleen
2006-08-13 6:54 ` Andrew Morton
0 siblings, 1 reply; 199+ messages in thread
From: Andi Kleen @ 2006-08-13 6:50 UTC (permalink / raw)
To: Andrew Morton; +Cc: linux-kernel, Jan Beulich
On Sunday 13 August 2006 02:53, Andrew Morton wrote:
> On Thu, 10 Aug 2006 21:35:57 +0200 (CEST)
> Andi Kleen <ak@suse.de> wrote:
>
> > - Move them to a pure assembly file. Previously they were in
> > a C file that only consisted of inline assembly. Doing it in pure
> > assembler is much nicer.
> > - Add a frame.i include with FRAME/ENDFRAME macros to easily
> > add frame pointers to assembly functions
> > - Add dwarf2 annotation to them so that the new dwarf2 unwinder
> > doesn't get stuck on them
> > [TBD: needs review from someone who knows more about CFA than me, e.g. Jan]
> > - Random cleanups
>
> This patch causes the below crash after some seconds of disk stresstesting.
I can't reproduce this with either LTP nor OraSim.
Also I looked over the patch and i can't see any mistakes.
Can you double check please?
-Andi
>
^ permalink raw reply [flat|nested] 199+ messages in thread
* Re: [PATCH for review] [43/145] i386: Redo semaphore and rwlock assembly helpers
2006-08-13 6:50 ` Andi Kleen
@ 2006-08-13 6:54 ` Andrew Morton
2006-08-13 7:42 ` Andi Kleen
0 siblings, 1 reply; 199+ messages in thread
From: Andrew Morton @ 2006-08-13 6:54 UTC (permalink / raw)
To: Andi Kleen; +Cc: linux-kernel, Jan Beulich
On Sun, 13 Aug 2006 08:50:01 +0200
Andi Kleen <ak@suse.de> wrote:
> On Sunday 13 August 2006 02:53, Andrew Morton wrote:
> > On Thu, 10 Aug 2006 21:35:57 +0200 (CEST)
> > Andi Kleen <ak@suse.de> wrote:
> >
> > > - Move them to a pure assembly file. Previously they were in
> > > a C file that only consisted of inline assembly. Doing it in pure
> > > assembler is much nicer.
> > > - Add a frame.i include with FRAME/ENDFRAME macros to easily
> > > add frame pointers to assembly functions
> > > - Add dwarf2 annotation to them so that the new dwarf2 unwinder
> > > doesn't get stuck on them
> > > [TBD: needs review from someone who knows more about CFA than me, e.g. Jan]
> > > - Random cleanups
> >
> > This patch causes the below crash after some seconds of disk stresstesting.
>
> I can't reproduce this with either LTP nor OraSim.
> Also I looked over the patch and i can't see any mistakes.
>
> Can you double check please?
>
2-way pIII with the below .config crashes in seconds running LTP's
./testcases/bin/growfiles -W gf01 -b -e 1 -u -i 0 -L 20 -w -C 1 -l -I r -T 10 glseek20 glseek20.2
CONFIG_X86_32=y
CONFIG_GENERIC_TIME=y
CONFIG_LOCKDEP_SUPPORT=y
CONFIG_STACKTRACE_SUPPORT=y
CONFIG_SEMAPHORE_SLEEPERS=y
CONFIG_X86=y
CONFIG_MMU=y
CONFIG_GENERIC_ISA_DMA=y
CONFIG_GENERIC_IOMAP=y
CONFIG_GENERIC_HWEIGHT=y
CONFIG_ARCH_MAY_HAVE_PC_FDC=y
CONFIG_DMI=y
CONFIG_DEFCONFIG_LIST="/lib/modules/$UNAME_RELEASE/.config"
#
# Code maturity level options
#
CONFIG_EXPERIMENTAL=y
CONFIG_LOCK_KERNEL=y
CONFIG_INIT_ENV_ARG_LIMIT=32
#
# General setup
#
CONFIG_LOCALVERSION=""
CONFIG_LOCALVERSION_AUTO=y
CONFIG_SWAP=y
CONFIG_SWAP_PREFETCH=y
CONFIG_SYSVIPC=y
# CONFIG_IPC_NS is not set
# CONFIG_POSIX_MQUEUE is not set
CONFIG_BSD_PROCESS_ACCT=y
# CONFIG_BSD_PROCESS_ACCT_V3 is not set
# CONFIG_TASKSTATS is not set
CONFIG_SYSCTL=y
# CONFIG_UTS_NS is not set
CONFIG_AUDIT=y
CONFIG_AUDITSYSCALL=y
CONFIG_IKCONFIG=y
# CONFIG_IKCONFIG_PROC is not set
# CONFIG_CPUSETS is not set
# CONFIG_RELAY is not set
CONFIG_INITRAMFS_SOURCE=""
CONFIG_KLIBC_ERRLIST=y
CONFIG_KLIBC_ZLIB=y
CONFIG_UID16=y
# CONFIG_CC_OPTIMIZE_FOR_SIZE is not set
CONFIG_EMBEDDED=y
CONFIG_KALLSYMS=y
CONFIG_KALLSYMS_ALL=y
# CONFIG_KALLSYMS_EXTRA_PASS is not set
CONFIG_HOTPLUG=y
CONFIG_PRINTK=y
CONFIG_BUG=y
CONFIG_ELF_CORE=y
CONFIG_BASE_FULL=y
CONFIG_RT_MUTEXES=y
CONFIG_FUTEX=y
CONFIG_EPOLL=y
CONFIG_SHMEM=y
CONFIG_SLAB=y
CONFIG_VM_EVENT_COUNTERS=y
# CONFIG_TINY_SHMEM is not set
CONFIG_BASE_SMALL=0
# CONFIG_SLOB is not set
#
# Loadable module support
#
CONFIG_MODULES=y
CONFIG_MODULE_UNLOAD=y
# CONFIG_MODULE_FORCE_UNLOAD is not set
CONFIG_MODVERSIONS=y
# CONFIG_MODULE_SRCVERSION_ALL is not set
CONFIG_KMOD=y
CONFIG_STOP_MACHINE=y
#
# Block layer
#
# CONFIG_LBD is not set
# CONFIG_BLK_DEV_IO_TRACE is not set
# CONFIG_LSF is not set
#
# IO Schedulers
#
CONFIG_IOSCHED_NOOP=y
CONFIG_IOSCHED_AS=y
CONFIG_IOSCHED_DEADLINE=y
CONFIG_IOSCHED_CFQ=y
CONFIG_DEFAULT_AS=y
# CONFIG_DEFAULT_DEADLINE is not set
# CONFIG_DEFAULT_CFQ is not set
# CONFIG_DEFAULT_NOOP is not set
CONFIG_DEFAULT_IOSCHED="anticipatory"
#
# Processor type and features
#
CONFIG_SMP=y
CONFIG_X86_PC=y
# CONFIG_X86_ELAN is not set
# CONFIG_X86_VOYAGER is not set
# CONFIG_X86_NUMAQ is not set
# CONFIG_X86_SUMMIT is not set
# CONFIG_X86_BIGSMP is not set
# CONFIG_X86_VISWS is not set
# CONFIG_X86_GENERICARCH is not set
# CONFIG_X86_ES7000 is not set
# CONFIG_M386 is not set
# CONFIG_M486 is not set
# CONFIG_M586 is not set
# CONFIG_M586TSC is not set
# CONFIG_M586MMX is not set
# CONFIG_M686 is not set
# CONFIG_MPENTIUMII is not set
CONFIG_MPENTIUMIII=y
# CONFIG_MPENTIUMM is not set
# CONFIG_MPENTIUM4 is not set
# CONFIG_MK6 is not set
# CONFIG_MK7 is not set
# CONFIG_MK8 is not set
# CONFIG_MCRUSOE is not set
# CONFIG_MEFFICEON is not set
# CONFIG_MWINCHIPC6 is not set
# CONFIG_MWINCHIP2 is not set
# CONFIG_MWINCHIP3D is not set
# CONFIG_MGEODEGX1 is not set
# CONFIG_MGEODE_LX is not set
# CONFIG_MCYRIXIII is not set
# CONFIG_MVIAC3_2 is not set
# CONFIG_X86_GENERIC is not set
CONFIG_X86_CMPXCHG=y
CONFIG_X86_XADD=y
CONFIG_X86_L1_CACHE_SHIFT=5
CONFIG_RWSEM_XCHGADD_ALGORITHM=y
CONFIG_GENERIC_CALIBRATE_DELAY=y
CONFIG_X86_WP_WORKS_OK=y
CONFIG_X86_INVLPG=y
CONFIG_X86_BSWAP=y
CONFIG_X86_POPAD_OK=y
CONFIG_X86_CMPXCHG64=y
CONFIG_X86_GOOD_APIC=y
CONFIG_X86_INTEL_USERCOPY=y
CONFIG_X86_USE_PPRO_CHECKSUM=y
CONFIG_X86_TSC=y
CONFIG_HPET_TIMER=y
CONFIG_HPET_EMULATE_RTC=y
CONFIG_NR_CPUS=8
# CONFIG_SCHED_SMT is not set
CONFIG_SCHED_MC=y
CONFIG_PREEMPT_NONE=y
# CONFIG_PREEMPT_VOLUNTARY is not set
# CONFIG_PREEMPT is not set
CONFIG_PREEMPT_BKL=y
CONFIG_X86_LOCAL_APIC=y
CONFIG_X86_IO_APIC=y
CONFIG_X86_MCE=y
# CONFIG_X86_MCE_NONFATAL is not set
# CONFIG_X86_MCE_P4THERMAL is not set
CONFIG_VM86=y
# CONFIG_TOSHIBA is not set
# CONFIG_I8K is not set
# CONFIG_X86_REBOOTFIXUPS is not set
# CONFIG_MICROCODE is not set
CONFIG_X86_MSR=y
CONFIG_X86_CPUID=y
#
# Firmware Drivers
#
CONFIG_EDD=y
# CONFIG_DELL_RBU is not set
CONFIG_DCDBAS=m
# CONFIG_NOHIGHMEM is not set
CONFIG_HIGHMEM4G=y
# CONFIG_HIGHMEM64G is not set
CONFIG_VMSPLIT_3G=y
# CONFIG_VMSPLIT_3G_OPT is not set
# CONFIG_VMSPLIT_2G is not set
# CONFIG_VMSPLIT_1G is not set
CONFIG_PAGE_OFFSET=0xC0000000
CONFIG_HIGHMEM=y
CONFIG_ARCH_FLATMEM_ENABLE=y
CONFIG_ARCH_SPARSEMEM_ENABLE=y
CONFIG_ARCH_SELECT_MEMORY_MODEL=y
CONFIG_SELECT_MEMORY_MODEL=y
CONFIG_FLATMEM_MANUAL=y
# CONFIG_DISCONTIGMEM_MANUAL is not set
# CONFIG_SPARSEMEM_MANUAL is not set
CONFIG_FLATMEM=y
CONFIG_FLAT_NODE_MEM_MAP=y
CONFIG_SPARSEMEM_STATIC=y
CONFIG_SPLIT_PTLOCK_CPUS=4
# CONFIG_RESOURCES_64BIT is not set
CONFIG_ADAPTIVE_READAHEAD=y
# CONFIG_READAHEAD_ALLOW_OVERHEADS is not set
# CONFIG_HIGHPTE is not set
# CONFIG_MATH_EMULATION is not set
# CONFIG_MTRR is not set
CONFIG_IRQBALANCE=y
# CONFIG_REGPARM is not set
CONFIG_SECCOMP=y
# CONFIG_SECCOMP_DISABLE_TSC is not set
# CONFIG_VGA_NOPROBE is not set
# CONFIG_HZ_100 is not set
CONFIG_HZ_250=y
# CONFIG_HZ_1000 is not set
CONFIG_HZ=250
# CONFIG_KEXEC is not set
# CONFIG_CRASH_DUMP is not set
CONFIG_PHYSICAL_START=0x100000
CONFIG_HOTPLUG_CPU=y
CONFIG_COMPAT_VDSO=y
CONFIG_ARCH_ENABLE_MEMORY_HOTPLUG=y
#
# Power management options (ACPI, APM)
#
CONFIG_PM=y
CONFIG_PM_LEGACY=y
# CONFIG_PM_DEBUG is not set
CONFIG_SOFTWARE_SUSPEND=y
CONFIG_PM_STD_PARTITION=""
CONFIG_SUSPEND_SMP=y
#
# ACPI (Advanced Configuration and Power Interface) Support
#
# CONFIG_ACPI is not set
#
# APM (Advanced Power Management) BIOS Support
#
# CONFIG_APM is not set
#
# CPU Frequency scaling
#
CONFIG_CPU_FREQ=y
CONFIG_CPU_FREQ_TABLE=y
# CONFIG_CPU_FREQ_DEBUG is not set
CONFIG_CPU_FREQ_STAT=y
# CONFIG_CPU_FREQ_STAT_DETAILS is not set
CONFIG_CPU_FREQ_DEFAULT_GOV_PERFORMANCE=y
# CONFIG_CPU_FREQ_DEFAULT_GOV_USERSPACE is not set
CONFIG_CPU_FREQ_GOV_PERFORMANCE=y
# CONFIG_CPU_FREQ_GOV_POWERSAVE is not set
# CONFIG_CPU_FREQ_GOV_USERSPACE is not set
# CONFIG_CPU_FREQ_GOV_ONDEMAND is not set
# CONFIG_CPU_FREQ_GOV_CONSERVATIVE is not set
#
# CPUFreq processor drivers
#
# CONFIG_X86_POWERNOW_K6 is not set
# CONFIG_X86_POWERNOW_K7 is not set
# CONFIG_X86_POWERNOW_K8 is not set
# CONFIG_X86_GX_SUSPMOD is not set
# CONFIG_X86_SPEEDSTEP_CENTRINO is not set
# CONFIG_X86_SPEEDSTEP_ICH is not set
# CONFIG_X86_SPEEDSTEP_SMI is not set
CONFIG_X86_P4_CLOCKMOD=m
# CONFIG_X86_CPUFREQ_NFORCE2 is not set
# CONFIG_X86_LONGRUN is not set
#
# shared options
#
CONFIG_X86_SPEEDSTEP_LIB=m
#
# Bus options (PCI, PCMCIA, EISA, MCA, ISA)
#
CONFIG_PCI=y
# CONFIG_PCI_GOBIOS is not set
# CONFIG_PCI_GOMMCONFIG is not set
# CONFIG_PCI_GODIRECT is not set
CONFIG_PCI_GOANY=y
CONFIG_PCI_BIOS=y
CONFIG_PCI_DIRECT=y
# CONFIG_PCIEPORTBUS is not set
# CONFIG_PCI_MSI is not set
# CONFIG_PCI_DEBUG is not set
CONFIG_ISA_DMA_API=y
CONFIG_ISA=y
CONFIG_EISA=y
# CONFIG_EISA_VLB_PRIMING is not set
CONFIG_EISA_PCI_EISA=y
CONFIG_EISA_VIRTUAL_ROOT=y
CONFIG_EISA_NAMES=y
# CONFIG_MCA is not set
# CONFIG_SCx200 is not set
#
# PCCARD (PCMCIA/CardBus) support
#
# CONFIG_PCCARD is not set
#
# PCI Hotplug Support
#
CONFIG_HOTPLUG_PCI=y
# CONFIG_HOTPLUG_PCI_FAKE is not set
# CONFIG_HOTPLUG_PCI_COMPAQ is not set
# CONFIG_HOTPLUG_PCI_IBM is not set
# CONFIG_HOTPLUG_PCI_CPCI is not set
# CONFIG_HOTPLUG_PCI_SHPC is not set
#
# Executable file formats
#
CONFIG_BINFMT_ELF=y
CONFIG_BINFMT_AOUT=y
CONFIG_BINFMT_MISC=y
#
# Networking
#
CONFIG_NET=y
#
# Networking options
#
# CONFIG_NETDEBUG is not set
CONFIG_PACKET=y
CONFIG_PACKET_MMAP=y
CONFIG_UNIX=y
CONFIG_XFRM=y
# CONFIG_XFRM_USER is not set
CONFIG_NET_KEY=y
CONFIG_INET=y
# CONFIG_IP_MULTICAST is not set
# CONFIG_IP_ADVANCED_ROUTER is not set
CONFIG_IP_FIB_HASH=y
# CONFIG_IP_PNP is not set
# CONFIG_NET_IPIP is not set
# CONFIG_NET_IPGRE is not set
# CONFIG_ARPD is not set
# CONFIG_SYN_COOKIES is not set
# CONFIG_INET_AH is not set
# CONFIG_INET_ESP is not set
# CONFIG_INET_IPCOMP is not set
# CONFIG_INET_XFRM_TUNNEL is not set
# CONFIG_INET_TUNNEL is not set
CONFIG_INET_XFRM_MODE_TRANSPORT=y
CONFIG_INET_XFRM_MODE_TUNNEL=y
CONFIG_INET_DIAG=y
CONFIG_INET_TCP_DIAG=y
# CONFIG_TCP_CONG_ADVANCED is not set
CONFIG_TCP_CONG_BIC=y
# CONFIG_IPV6 is not set
# CONFIG_INET6_XFRM_TUNNEL is not set
# CONFIG_INET6_TUNNEL is not set
# CONFIG_NETWORK_SECMARK is not set
# CONFIG_NETFILTER is not set
#
# DCCP Configuration (EXPERIMENTAL)
#
# CONFIG_IP_DCCP is not set
#
# SCTP Configuration (EXPERIMENTAL)
#
# CONFIG_IP_SCTP is not set
#
# TIPC Configuration (EXPERIMENTAL)
#
# CONFIG_TIPC is not set
# CONFIG_ATM is not set
CONFIG_BRIDGE=y
# CONFIG_VLAN_8021Q is not set
# CONFIG_DECNET is not set
CONFIG_LLC=y
# CONFIG_LLC2 is not set
# CONFIG_IPX is not set
# CONFIG_ATALK is not set
# CONFIG_X25 is not set
# CONFIG_LAPB is not set
# CONFIG_NET_DIVERT is not set
# CONFIG_ECONET is not set
# CONFIG_WAN_ROUTER is not set
#
# QoS and/or fair queueing
#
# CONFIG_NET_SCHED is not set
#
# Network testing
#
# CONFIG_NET_PKTGEN is not set
# CONFIG_HAMRADIO is not set
# CONFIG_IRDA is not set
# CONFIG_BT is not set
# CONFIG_IEEE80211 is not set
#
# Device Drivers
#
#
# Generic Driver Options
#
# CONFIG_STANDALONE is not set
CONFIG_PREVENT_FIRMWARE_BUILD=y
CONFIG_FW_LOADER=y
# CONFIG_DEBUG_DRIVER is not set
# CONFIG_SYS_HYPERVISOR is not set
#
# Connector - unified userspace <-> kernelspace linker
#
CONFIG_CONNECTOR=y
CONFIG_PROC_EVENTS=y
#
# Memory Technology Devices (MTD)
#
# CONFIG_MTD is not set
#
# Parallel port support
#
# CONFIG_PARPORT is not set
#
# Plug and Play support
#
CONFIG_PNP=y
CONFIG_PNP_DEBUG=y
#
# Protocols
#
CONFIG_ISAPNP=y
CONFIG_PNPBIOS=y
# CONFIG_PNPBIOS_PROC_FS is not set
#
# Block devices
#
CONFIG_BLK_DEV_FD=y
# CONFIG_BLK_DEV_XD is not set
# CONFIG_BLK_CPQ_DA is not set
# CONFIG_BLK_CPQ_CISS_DA is not set
# CONFIG_BLK_DEV_DAC960 is not set
# CONFIG_BLK_DEV_UMEM is not set
# CONFIG_BLK_DEV_COW_COMMON is not set
CONFIG_BLK_DEV_LOOP=y
CONFIG_BLK_DEV_CRYPTOLOOP=y
# CONFIG_BLK_DEV_NBD is not set
# CONFIG_BLK_DEV_SX8 is not set
CONFIG_BLK_DEV_RAM=y
CONFIG_BLK_DEV_RAM_COUNT=16
CONFIG_BLK_DEV_RAM_SIZE=4000
CONFIG_BLK_DEV_INITRD=y
# CONFIG_CDROM_PKTCDVD is not set
# CONFIG_ATA_OVER_ETH is not set
#
# ATA/ATAPI/MFM/RLL support
#
CONFIG_IDE=y
CONFIG_IDE_MAX_HWIFS=4
CONFIG_BLK_DEV_IDE=y
#
# Please see Documentation/ide.txt for help/info on IDE drives
#
# CONFIG_BLK_DEV_IDE_SATA is not set
# CONFIG_BLK_DEV_HD_IDE is not set
CONFIG_BLK_DEV_IDEDISK=y
CONFIG_IDEDISK_MULTI_MODE=y
CONFIG_BLK_DEV_IDECD=y
CONFIG_BLK_DEV_IDETAPE=y
CONFIG_BLK_DEV_IDEFLOPPY=y
# CONFIG_BLK_DEV_IDESCSI is not set
CONFIG_IDE_TASK_IOCTL=y
#
# IDE chipset support/bugfixes
#
CONFIG_IDE_GENERIC=y
# CONFIG_BLK_DEV_CMD640 is not set
# CONFIG_BLK_DEV_IDEPNP is not set
CONFIG_BLK_DEV_IDEPCI=y
CONFIG_IDEPCI_SHARE_IRQ=y
# CONFIG_BLK_DEV_OFFBOARD is not set
CONFIG_BLK_DEV_GENERIC=y
# CONFIG_BLK_DEV_OPTI621 is not set
# CONFIG_BLK_DEV_RZ1000 is not set
CONFIG_BLK_DEV_IDEDMA_PCI=y
# CONFIG_BLK_DEV_IDEDMA_FORCED is not set
CONFIG_IDEDMA_PCI_AUTO=y
# CONFIG_IDEDMA_ONLYDISK is not set
# CONFIG_BLK_DEV_AEC62XX is not set
# CONFIG_BLK_DEV_ALI15X3 is not set
# CONFIG_BLK_DEV_AMD74XX is not set
# CONFIG_BLK_DEV_ATIIXP is not set
# CONFIG_BLK_DEV_CMD64X is not set
# CONFIG_BLK_DEV_TRIFLEX is not set
# CONFIG_BLK_DEV_CY82C693 is not set
# CONFIG_BLK_DEV_CS5520 is not set
# CONFIG_BLK_DEV_CS5530 is not set
# CONFIG_BLK_DEV_CS5535 is not set
# CONFIG_BLK_DEV_HPT34X is not set
# CONFIG_BLK_DEV_HPT366 is not set
# CONFIG_BLK_DEV_JMICRON is not set
# CONFIG_BLK_DEV_SC1200 is not set
CONFIG_BLK_DEV_PIIX=y
# CONFIG_BLK_DEV_IT821X is not set
# CONFIG_BLK_DEV_NS87415 is not set
# CONFIG_BLK_DEV_PDC202XX_OLD is not set
# CONFIG_BLK_DEV_PDC202XX_NEW is not set
# CONFIG_BLK_DEV_SVWKS is not set
# CONFIG_BLK_DEV_SIIMAGE is not set
# CONFIG_BLK_DEV_SIS5513 is not set
# CONFIG_BLK_DEV_SLC90E66 is not set
# CONFIG_BLK_DEV_TRM290 is not set
# CONFIG_BLK_DEV_VIA82CXXX is not set
# CONFIG_IDE_ARM is not set
# CONFIG_IDE_CHIPSETS is not set
CONFIG_BLK_DEV_IDEDMA=y
# CONFIG_IDEDMA_IVB is not set
CONFIG_IDEDMA_AUTO=y
# CONFIG_BLK_DEV_HD is not set
#
# SCSI device support
#
# CONFIG_RAID_ATTRS is not set
CONFIG_SCSI=y
# CONFIG_SCSI_TGT is not set
CONFIG_SCSI_PROC_FS=y
#
# SCSI support type (disk, tape, CD-ROM)
#
CONFIG_BLK_DEV_SD=y
# CONFIG_CHR_DEV_ST is not set
# CONFIG_CHR_DEV_OSST is not set
CONFIG_BLK_DEV_SR=y
# CONFIG_BLK_DEV_SR_VENDOR is not set
CONFIG_CHR_DEV_SG=y
# CONFIG_CHR_DEV_SCH is not set
#
# Some SCSI devices (e.g. CD jukebox) support multiple LUNs
#
# CONFIG_SCSI_MULTI_LUN is not set
# CONFIG_SCSI_CONSTANTS is not set
# CONFIG_SCSI_LOGGING is not set
#
# SCSI Transports
#
CONFIG_SCSI_SPI_ATTRS=y
CONFIG_SCSI_FC_ATTRS=y
# CONFIG_SCSI_ISCSI_ATTRS is not set
# CONFIG_SCSI_SAS_ATTRS is not set
# CONFIG_SCSI_SAS_DOMAIN_ATTRS is not set
#
# SCSI low-level drivers
#
# CONFIG_ISCSI_TCP is not set
# CONFIG_BLK_DEV_3W_XXXX_RAID is not set
# CONFIG_SCSI_3W_9XXX is not set
# CONFIG_SCSI_7000FASST is not set
# CONFIG_SCSI_ACARD is not set
# CONFIG_SCSI_AHA152X is not set
# CONFIG_SCSI_AHA1542 is not set
# CONFIG_SCSI_AHA1740 is not set
# CONFIG_SCSI_AACRAID is not set
# CONFIG_SCSI_AIC7XXX is not set
# CONFIG_SCSI_AIC7XXX_OLD is not set
# CONFIG_SCSI_AIC79XX is not set
# CONFIG_SCSI_AIC94XX is not set
# CONFIG_SCSI_DPT_I2O is not set
# CONFIG_SCSI_ADVANSYS is not set
# CONFIG_SCSI_IN2000 is not set
# CONFIG_SCSI_ARCMSR is not set
# CONFIG_MEGARAID_NEWGEN is not set
# CONFIG_MEGARAID_LEGACY is not set
# CONFIG_MEGARAID_SAS is not set
# CONFIG_SCSI_SATA is not set
# CONFIG_SCSI_HPTIOP is not set
# CONFIG_SCSI_BUSLOGIC is not set
# CONFIG_SCSI_DMX3191D is not set
# CONFIG_SCSI_DTC3280 is not set
# CONFIG_SCSI_EATA is not set
# CONFIG_SCSI_FUTURE_DOMAIN is not set
# CONFIG_SCSI_GDTH is not set
# CONFIG_SCSI_GENERIC_NCR5380 is not set
# CONFIG_SCSI_GENERIC_NCR5380_MMIO is not set
# CONFIG_SCSI_IPS is not set
# CONFIG_SCSI_INITIO is not set
# CONFIG_SCSI_INIA100 is not set
# CONFIG_SCSI_NCR53C406A is not set
# CONFIG_SCSI_STEX is not set
# CONFIG_SCSI_SYM53C8XX_2 is not set
# CONFIG_SCSI_IPR is not set
# CONFIG_SCSI_PAS16 is not set
# CONFIG_SCSI_PSI240I is not set
# CONFIG_SCSI_QLOGIC_FAS is not set
# CONFIG_SCSI_QLOGIC_1280 is not set
# CONFIG_SCSI_QLA_FC is not set
# CONFIG_SCSI_LPFC is not set
# CONFIG_SCSI_SIM710 is not set
# CONFIG_SCSI_SYM53C416 is not set
# CONFIG_SCSI_DC395x is not set
# CONFIG_SCSI_DC390T is not set
# CONFIG_SCSI_T128 is not set
# CONFIG_SCSI_U14_34F is not set
# CONFIG_SCSI_ULTRASTOR is not set
# CONFIG_SCSI_NSP32 is not set
# CONFIG_SCSI_DEBUG is not set
# CONFIG_SCSI_SRP is not set
#
# Old CD-ROM drivers (not SCSI, not IDE)
#
# CONFIG_CD_NO_IDESCSI is not set
#
# Multi-device support (RAID and LVM)
#
# CONFIG_MD is not set
#
# Fusion MPT device support
#
# CONFIG_FUSION is not set
# CONFIG_FUSION_SPI is not set
# CONFIG_FUSION_FC is not set
# CONFIG_FUSION_SAS is not set
#
# IEEE 1394 (FireWire) support
#
# CONFIG_IEEE1394 is not set
#
# I2O device support
#
# CONFIG_I2O is not set
#
# Network device support
#
CONFIG_NETDEVICES=y
CONFIG_DUMMY=y
# CONFIG_BONDING is not set
# CONFIG_EQUALIZER is not set
# CONFIG_TUN is not set
# CONFIG_NET_SB1000 is not set
#
# ARCnet devices
#
# CONFIG_ARCNET is not set
#
# PHY device support
#
# CONFIG_PHYLIB is not set
#
# Ethernet (10 or 100Mbit)
#
CONFIG_NET_ETHERNET=y
CONFIG_MII=y
# CONFIG_HAPPYMEAL is not set
# CONFIG_SUNGEM is not set
# CONFIG_CASSINI is not set
CONFIG_NET_VENDOR_3COM=y
# CONFIG_EL1 is not set
# CONFIG_EL2 is not set
# CONFIG_ELPLUS is not set
# CONFIG_EL16 is not set
CONFIG_EL3=m
# CONFIG_3C515 is not set
CONFIG_VORTEX=m
# CONFIG_TYPHOON is not set
# CONFIG_LANCE is not set
# CONFIG_NET_VENDOR_SMC is not set
# CONFIG_NET_VENDOR_RACAL is not set
#
# Tulip family network device support
#
# CONFIG_NET_TULIP is not set
# CONFIG_AT1700 is not set
# CONFIG_DEPCA is not set
# CONFIG_HP100 is not set
# CONFIG_NET_ISA is not set
CONFIG_NET_PCI=y
# CONFIG_PCNET32 is not set
# CONFIG_AMD8111_ETH is not set
# CONFIG_ADAPTEC_STARFIRE is not set
# CONFIG_AC3200 is not set
# CONFIG_APRICOT is not set
# CONFIG_B44 is not set
# CONFIG_FORCEDETH is not set
# CONFIG_CS89x0 is not set
# CONFIG_DGRS is not set
# CONFIG_EEPRO100 is not set
CONFIG_E100=y
# CONFIG_LNE390 is not set
# CONFIG_FEALNX is not set
# CONFIG_NATSEMI is not set
# CONFIG_NE2K_PCI is not set
# CONFIG_NE3210 is not set
# CONFIG_ES3210 is not set
# CONFIG_8139CP is not set
# CONFIG_8139TOO is not set
# CONFIG_SIS900 is not set
# CONFIG_EPIC100 is not set
# CONFIG_SUNDANCE is not set
# CONFIG_TLAN is not set
# CONFIG_VIA_RHINE is not set
#
# Ethernet (1000 Mbit)
#
# CONFIG_ACENIC is not set
# CONFIG_DL2K is not set
# CONFIG_E1000 is not set
# CONFIG_NS83820 is not set
# CONFIG_HAMACHI is not set
# CONFIG_YELLOWFIN is not set
CONFIG_R8169=y
# CONFIG_R8169_NAPI is not set
# CONFIG_SIS190 is not set
# CONFIG_SKGE is not set
# CONFIG_SKY2 is not set
# CONFIG_SK98LIN is not set
# CONFIG_VIA_VELOCITY is not set
# CONFIG_TIGON3 is not set
# CONFIG_BNX2 is not set
# CONFIG_QLA3XXX is not set
#
# Ethernet (10000 Mbit)
#
# CONFIG_CHELSIO_T1 is not set
# CONFIG_IXGB is not set
# CONFIG_S2IO is not set
# CONFIG_MYRI10GE is not set
#
# Token Ring devices
#
# CONFIG_TR is not set
#
# Wireless LAN (non-hamradio)
#
# CONFIG_NET_RADIO is not set
#
# Wan interfaces
#
# CONFIG_WAN is not set
# CONFIG_FDDI is not set
# CONFIG_HIPPI is not set
# CONFIG_PPP is not set
# CONFIG_SLIP is not set
# CONFIG_NET_FC is not set
# CONFIG_SHAPER is not set
CONFIG_NETCONSOLE=y
CONFIG_NETPOLL=y
CONFIG_NETPOLL_RX=y
CONFIG_NETPOLL_TRAP=y
CONFIG_NET_POLL_CONTROLLER=y
#
# ISDN subsystem
#
# CONFIG_ISDN is not set
#
# Telephony Support
#
# CONFIG_PHONE is not set
#
# Input device support
#
CONFIG_INPUT=y
# CONFIG_INPUT_FF_EFFECTS is not set
#
# Userland interfaces
#
CONFIG_INPUT_MOUSEDEV=y
CONFIG_INPUT_MOUSEDEV_PSAUX=y
CONFIG_INPUT_MOUSEDEV_SCREEN_X=1024
CONFIG_INPUT_MOUSEDEV_SCREEN_Y=768
# CONFIG_INPUT_JOYDEV is not set
# CONFIG_INPUT_TSDEV is not set
# CONFIG_INPUT_EVDEV is not set
# CONFIG_INPUT_EVBUG is not set
#
# Input Device Drivers
#
CONFIG_INPUT_KEYBOARD=y
CONFIG_KEYBOARD_ATKBD=y
# CONFIG_KEYBOARD_SUNKBD is not set
# CONFIG_KEYBOARD_LKKBD is not set
# CONFIG_KEYBOARD_XTKBD is not set
# CONFIG_KEYBOARD_NEWTON is not set
CONFIG_INPUT_MOUSE=y
CONFIG_MOUSE_PS2=y
# CONFIG_MOUSE_SERIAL is not set
# CONFIG_MOUSE_INPORT is not set
# CONFIG_MOUSE_LOGIBM is not set
# CONFIG_MOUSE_PC110PAD is not set
# CONFIG_MOUSE_VSXXXAA is not set
# CONFIG_INPUT_JOYSTICK is not set
# CONFIG_INPUT_TOUCHSCREEN is not set
# CONFIG_INPUT_MISC is not set
#
# Hardware I/O ports
#
CONFIG_SERIO=y
CONFIG_SERIO_I8042=y
CONFIG_SERIO_SERPORT=y
# CONFIG_SERIO_CT82C710 is not set
# CONFIG_SERIO_PCIPS2 is not set
CONFIG_SERIO_LIBPS2=y
# CONFIG_SERIO_RAW is not set
CONFIG_GAMEPORT=y
# CONFIG_GAMEPORT_NS558 is not set
# CONFIG_GAMEPORT_L4 is not set
# CONFIG_GAMEPORT_EMU10K1 is not set
# CONFIG_GAMEPORT_FM801 is not set
#
# Character devices
#
CONFIG_VT=y
CONFIG_VT_CONSOLE=y
CONFIG_HW_CONSOLE=y
# CONFIG_VT_HW_CONSOLE_BINDING is not set
# CONFIG_SERIAL_NONSTANDARD is not set
#
# Serial drivers
#
CONFIG_SERIAL_8250=y
# CONFIG_SERIAL_8250_CONSOLE is not set
CONFIG_SERIAL_8250_PCI=y
CONFIG_SERIAL_8250_PNP=y
CONFIG_SERIAL_8250_NR_UARTS=4
CONFIG_SERIAL_8250_RUNTIME_UARTS=4
# CONFIG_SERIAL_8250_EXTENDED is not set
#
# Non-8250 serial port support
#
CONFIG_SERIAL_CORE=y
# CONFIG_SERIAL_JSM is not set
CONFIG_UNIX98_PTYS=y
CONFIG_LEGACY_PTYS=y
CONFIG_LEGACY_PTY_COUNT=256
# CONFIG_BRIQ_PANEL is not set
#
# IPMI
#
# CONFIG_IPMI_HANDLER is not set
#
# Watchdog Cards
#
# CONFIG_WATCHDOG is not set
# CONFIG_HW_RANDOM is not set
# CONFIG_NVRAM is not set
CONFIG_RTC=y
# CONFIG_DTLK is not set
# CONFIG_R3964 is not set
# CONFIG_APPLICOM is not set
# CONFIG_SONYPI is not set
#
# Ftape, the floppy tape device driver
#
CONFIG_AGP=y
# CONFIG_AGP_ALI is not set
# CONFIG_AGP_ATI is not set
# CONFIG_AGP_AMD is not set
# CONFIG_AGP_AMD64 is not set
CONFIG_AGP_INTEL=y
# CONFIG_AGP_NVIDIA is not set
# CONFIG_AGP_SIS is not set
# CONFIG_AGP_SWORKS is not set
# CONFIG_AGP_VIA is not set
# CONFIG_AGP_EFFICEON is not set
# CONFIG_DRM is not set
# CONFIG_MWAVE is not set
# CONFIG_PC8736x_GPIO is not set
# CONFIG_NSC_GPIO is not set
# CONFIG_CS5535_GPIO is not set
CONFIG_RAW_DRIVER=m
CONFIG_MAX_RAW_DEVS=256
# CONFIG_HANGCHECK_TIMER is not set
#
# TPM devices
#
# CONFIG_TCG_TPM is not set
# CONFIG_TELCLOCK is not set
#
# I2C support
#
# CONFIG_I2C is not set
#
# SPI support
#
# CONFIG_SPI is not set
# CONFIG_SPI_MASTER is not set
#
# Dallas's 1-wire bus
#
# CONFIG_W1 is not set
#
# Hardware Monitoring support
#
# CONFIG_HWMON is not set
# CONFIG_HWMON_VID is not set
#
# Misc devices
#
# CONFIG_IBM_ASM is not set
#
# Multimedia devices
#
# CONFIG_VIDEO_DEV is not set
CONFIG_VIDEO_V4L2=y
#
# Digital Video Broadcasting Devices
#
# CONFIG_DVB is not set
#
# Graphics support
#
CONFIG_FIRMWARE_EDID=y
# CONFIG_FB is not set
#
# Console display driver support
#
CONFIG_VGA_CONSOLE=y
# CONFIG_VGACON_SOFT_SCROLLBACK is not set
CONFIG_VIDEO_SELECT=y
# CONFIG_MDA_CONSOLE is not set
CONFIG_DUMMY_CONSOLE=y
#
# Sound
#
# CONFIG_SOUND is not set
#
# USB support
#
CONFIG_USB_ARCH_HAS_HCD=y
CONFIG_USB_ARCH_HAS_OHCI=y
CONFIG_USB_ARCH_HAS_EHCI=y
# CONFIG_USB is not set
#
# NOTE: USB_STORAGE enables SCSI, and 'SCSI disk support'
#
#
# USB Gadget Support
#
# CONFIG_USB_GADGET is not set
#
# MMC/SD Card support
#
# CONFIG_MMC is not set
#
# LED devices
#
# CONFIG_NEW_LEDS is not set
#
# LED drivers
#
#
# LED Triggers
#
#
# InfiniBand support
#
# CONFIG_INFINIBAND is not set
#
# EDAC - error detection and reporting (RAS) (EXPERIMENTAL)
#
CONFIG_EDAC=y
#
# Reporting subsystems
#
CONFIG_EDAC_DEBUG=y
CONFIG_EDAC_MM_EDAC=y
CONFIG_EDAC_AMD76X=y
CONFIG_EDAC_E7XXX=y
CONFIG_EDAC_E752X=y
CONFIG_EDAC_I82875P=y
CONFIG_EDAC_I82860=y
# CONFIG_EDAC_K8 is not set
CONFIG_EDAC_R82600=y
CONFIG_EDAC_POLL=y
#
# Real Time Clock
#
# CONFIG_RTC_CLASS is not set
#
# DMA Engine support
#
# CONFIG_DMA_ENGINE is not set
#
# DMA Clients
#
#
# DMA Devices
#
#
# File systems
#
CONFIG_EXT2_FS=y
# CONFIG_EXT2_FS_XATTR is not set
# CONFIG_EXT2_FS_XIP is not set
CONFIG_EXT3_FS=y
# CONFIG_EXT3_FS_XATTR is not set
CONFIG_JBD=y
CONFIG_JBD_DEBUG=y
# CONFIG_REISER4_FS is not set
# CONFIG_REISERFS_FS is not set
# CONFIG_JFS_FS is not set
CONFIG_FS_POSIX_ACL=y
# CONFIG_XFS_FS is not set
# CONFIG_GFS2_FS is not set
# CONFIG_OCFS2_FS is not set
# CONFIG_MINIX_FS is not set
# CONFIG_ROMFS_FS is not set
CONFIG_INOTIFY=y
CONFIG_INOTIFY_USER=y
# CONFIG_QUOTA is not set
CONFIG_DNOTIFY=y
# CONFIG_AUTOFS_FS is not set
CONFIG_AUTOFS4_FS=y
# CONFIG_FUSE_FS is not set
#
# CD-ROM/DVD Filesystems
#
CONFIG_ISO9660_FS=y
CONFIG_JOLIET=y
CONFIG_ZISOFS=y
CONFIG_ZISOFS_FS=y
CONFIG_UDF_FS=y
CONFIG_UDF_NLS=y
#
# DOS/FAT/NT Filesystems
#
CONFIG_FAT_FS=y
CONFIG_MSDOS_FS=y
CONFIG_VFAT_FS=y
CONFIG_FAT_DEFAULT_CODEPAGE=437
CONFIG_FAT_DEFAULT_IOCHARSET="iso8859-1"
CONFIG_NTFS_FS=m
CONFIG_NTFS_DEBUG=y
# CONFIG_NTFS_RW is not set
#
# Pseudo filesystems
#
CONFIG_PROC_FS=y
CONFIG_PROC_KCORE=y
CONFIG_SYSFS=y
CONFIG_TMPFS=y
CONFIG_HUGETLBFS=y
CONFIG_HUGETLB_PAGE=y
CONFIG_RAMFS=y
# CONFIG_CONFIGFS_FS is not set
#
# Miscellaneous filesystems
#
# CONFIG_ADFS_FS is not set
# CONFIG_AFFS_FS is not set
# CONFIG_HFS_FS is not set
# CONFIG_HFSPLUS_FS is not set
# CONFIG_BEFS_FS is not set
# CONFIG_BFS_FS is not set
# CONFIG_EFS_FS is not set
# CONFIG_CRAMFS is not set
# CONFIG_VXFS_FS is not set
# CONFIG_HPFS_FS is not set
# CONFIG_QNX4FS_FS is not set
# CONFIG_SYSV_FS is not set
# CONFIG_UFS_FS is not set
#
# Network File Systems
#
CONFIG_NFS_FS=y
CONFIG_NFS_V3=y
# CONFIG_NFS_V3_ACL is not set
CONFIG_NFS_V4=y
# CONFIG_NFS_DIRECTIO is not set
CONFIG_NFSD=y
CONFIG_NFSD_V3=y
# CONFIG_NFSD_V3_ACL is not set
CONFIG_NFSD_V4=y
CONFIG_NFSD_TCP=y
CONFIG_LOCKD=y
CONFIG_LOCKD_V4=y
CONFIG_EXPORTFS=y
CONFIG_NFS_COMMON=y
CONFIG_SUNRPC=y
CONFIG_SUNRPC_GSS=y
CONFIG_RPCSEC_GSS_KRB5=y
CONFIG_RPCSEC_GSS_SPKM3=m
# CONFIG_SMB_FS is not set
# CONFIG_CIFS is not set
# CONFIG_NCP_FS is not set
# CONFIG_CODA_FS is not set
# CONFIG_AFS_FS is not set
# CONFIG_9P_FS is not set
#
# Partition Types
#
# CONFIG_PARTITION_ADVANCED is not set
CONFIG_MSDOS_PARTITION=y
#
# Native Language Support
#
CONFIG_NLS=y
CONFIG_NLS_DEFAULT="iso8859-1"
# CONFIG_NLS_CODEPAGE_437 is not set
# CONFIG_NLS_CODEPAGE_737 is not set
# CONFIG_NLS_CODEPAGE_775 is not set
# CONFIG_NLS_CODEPAGE_850 is not set
# CONFIG_NLS_CODEPAGE_852 is not set
# CONFIG_NLS_CODEPAGE_855 is not set
# CONFIG_NLS_CODEPAGE_857 is not set
# CONFIG_NLS_CODEPAGE_860 is not set
# CONFIG_NLS_CODEPAGE_861 is not set
# CONFIG_NLS_CODEPAGE_862 is not set
# CONFIG_NLS_CODEPAGE_863 is not set
# CONFIG_NLS_CODEPAGE_864 is not set
# CONFIG_NLS_CODEPAGE_865 is not set
# CONFIG_NLS_CODEPAGE_866 is not set
# CONFIG_NLS_CODEPAGE_869 is not set
# CONFIG_NLS_CODEPAGE_936 is not set
# CONFIG_NLS_CODEPAGE_950 is not set
# CONFIG_NLS_CODEPAGE_932 is not set
# CONFIG_NLS_CODEPAGE_949 is not set
# CONFIG_NLS_CODEPAGE_874 is not set
# CONFIG_NLS_ISO8859_8 is not set
# CONFIG_NLS_CODEPAGE_1250 is not set
# CONFIG_NLS_CODEPAGE_1251 is not set
CONFIG_NLS_ASCII=m
# CONFIG_NLS_ISO8859_1 is not set
# CONFIG_NLS_ISO8859_2 is not set
# CONFIG_NLS_ISO8859_3 is not set
# CONFIG_NLS_ISO8859_4 is not set
# CONFIG_NLS_ISO8859_5 is not set
# CONFIG_NLS_ISO8859_6 is not set
# CONFIG_NLS_ISO8859_7 is not set
# CONFIG_NLS_ISO8859_9 is not set
# CONFIG_NLS_ISO8859_13 is not set
# CONFIG_NLS_ISO8859_14 is not set
# CONFIG_NLS_ISO8859_15 is not set
# CONFIG_NLS_KOI8_R is not set
# CONFIG_NLS_KOI8_U is not set
# CONFIG_NLS_UTF8 is not set
#
# Distributed Lock Manager
#
#
# Instrumentation Support
#
CONFIG_PROFILING=y
# CONFIG_OPROFILE is not set
# CONFIG_KPROBES is not set
#
# Kernel hacking
#
CONFIG_TRACE_IRQFLAGS_SUPPORT=y
# CONFIG_PRINTK_TIME is not set
CONFIG_ENABLE_MUST_CHECK=y
CONFIG_MAGIC_SYSRQ=y
CONFIG_UNUSED_SYMBOLS=y
# CONFIG_DEBUG_SHIRQ is not set
CONFIG_DEBUG_KERNEL=y
CONFIG_LOG_BUF_SHIFT=17
CONFIG_DETECT_SOFTLOCKUP=y
# CONFIG_SCHEDSTATS is not set
CONFIG_DEBUG_SLAB=y
CONFIG_DEBUG_SLAB_LEAK=y
CONFIG_DEBUG_RT_MUTEXES=y
CONFIG_DEBUG_PI_LIST=y
# CONFIG_RT_MUTEX_TESTER is not set
CONFIG_DEBUG_SPINLOCK=y
CONFIG_DEBUG_MUTEXES=y
# CONFIG_DEBUG_RWSEMS is not set
# CONFIG_DEBUG_LOCK_ALLOC is not set
# CONFIG_PROVE_LOCKING is not set
CONFIG_DEBUG_SPINLOCK_SLEEP=y
CONFIG_DEBUG_LOCKING_API_SELFTESTS=y
# CONFIG_DEBUG_KOBJECT is not set
CONFIG_DEBUG_HIGHMEM=y
CONFIG_DEBUG_BUGVERBOSE=y
CONFIG_DEBUG_INFO=y
# CONFIG_PAGE_OWNER is not set
CONFIG_DEBUG_FS=y
CONFIG_DEBUG_VM=y
CONFIG_FRAME_POINTER=y
# CONFIG_UNWIND_INFO is not set
# CONFIG_PROFILE_LIKELY is not set
CONFIG_FORCED_INLINING=y
# CONFIG_DEBUG_SYNCHRO_TEST is not set
# CONFIG_RCU_TORTURE_TEST is not set
# CONFIG_EARLY_PRINTK is not set
CONFIG_DEBUG_STACKOVERFLOW=y
CONFIG_DEBUG_STACK_USAGE=y
#
# Page alloc debug is incompatible with Software Suspend on i386
#
CONFIG_DEBUG_RODATA=y
# CONFIG_4KSTACKS is not set
CONFIG_X86_FIND_SMP_CONFIG=y
CONFIG_X86_MPPARSE=y
CONFIG_DOUBLEFAULT=y
#
# Security options
#
# CONFIG_KEYS is not set
# CONFIG_SECURITY is not set
#
# Cryptographic options
#
CONFIG_CRYPTO=y
CONFIG_CRYPTO_MANAGER=m
# CONFIG_CRYPTO_HMAC is not set
# CONFIG_CRYPTO_NULL is not set
# CONFIG_CRYPTO_MD4 is not set
CONFIG_CRYPTO_MD5=y
# CONFIG_CRYPTO_SHA1 is not set
# CONFIG_CRYPTO_SHA256 is not set
# CONFIG_CRYPTO_SHA512 is not set
# CONFIG_CRYPTO_WP512 is not set
# CONFIG_CRYPTO_TGR192 is not set
CONFIG_CRYPTO_DES=y
# CONFIG_CRYPTO_BLOWFISH is not set
# CONFIG_CRYPTO_TWOFISH is not set
# CONFIG_CRYPTO_TWOFISH_586 is not set
# CONFIG_CRYPTO_SERPENT is not set
# CONFIG_CRYPTO_AES is not set
# CONFIG_CRYPTO_AES_586 is not set
CONFIG_CRYPTO_CAST5=m
# CONFIG_CRYPTO_CAST6 is not set
# CONFIG_CRYPTO_TEA is not set
# CONFIG_CRYPTO_ARC4 is not set
# CONFIG_CRYPTO_KHAZAD is not set
# CONFIG_CRYPTO_ANUBIS is not set
# CONFIG_CRYPTO_DEFLATE is not set
# CONFIG_CRYPTO_MICHAEL_MIC is not set
# CONFIG_CRYPTO_CRC32C is not set
# CONFIG_CRYPTO_TEST is not set
#
# Hardware crypto devices
#
# CONFIG_CRYPTO_DEV_PADLOCK is not set
#
# Library routines
#
CONFIG_CRC_CCITT=m
# CONFIG_CRC16 is not set
CONFIG_CRC32=y
# CONFIG_LIBCRC32C is not set
CONFIG_ZLIB_INFLATE=y
CONFIG_PLIST=y
CONFIG_GENERIC_HARDIRQS=y
CONFIG_GENERIC_IRQ_PROBE=y
CONFIG_GENERIC_PENDING_IRQ=y
CONFIG_X86_SMP=y
CONFIG_X86_HT=y
CONFIG_X86_BIOS_REBOOT=y
CONFIG_X86_TRAMPOLINE=y
CONFIG_KTIME_SCALAR=y
^ permalink raw reply [flat|nested] 199+ messages in thread
* Re: [PATCH for review] [43/145] i386: Redo semaphore and rwlock assembly helpers
2006-08-13 6:54 ` Andrew Morton
@ 2006-08-13 7:42 ` Andi Kleen
0 siblings, 0 replies; 199+ messages in thread
From: Andi Kleen @ 2006-08-13 7:42 UTC (permalink / raw)
To: Andrew Morton; +Cc: linux-kernel, Jan Beulich
On Sunday 13 August 2006 08:54, Andrew Morton wrote:
> On Sun, 13 Aug 2006 08:50:01 +0200
> Andi Kleen <ak@suse.de> wrote:
>
> > On Sunday 13 August 2006 02:53, Andrew Morton wrote:
> > > On Thu, 10 Aug 2006 21:35:57 +0200 (CEST)
> > > Andi Kleen <ak@suse.de> wrote:
> > >
> > > > - Move them to a pure assembly file. Previously they were in
> > > > a C file that only consisted of inline assembly. Doing it in pure
> > > > assembler is much nicer.
> > > > - Add a frame.i include with FRAME/ENDFRAME macros to easily
> > > > add frame pointers to assembly functions
> > > > - Add dwarf2 annotation to them so that the new dwarf2 unwinder
> > > > doesn't get stuck on them
> > > > [TBD: needs review from someone who knows more about CFA than me, e.g. Jan]
> > > > - Random cleanups
> > >
> > > This patch causes the below crash after some seconds of disk stresstesting.
> >
> > I can't reproduce this with either LTP nor OraSim.
> > Also I looked over the patch and i can't see any mistakes.
> >
> > Can you double check please?
> >
>
> 2-way pIII with the below .config crashes in seconds running LTP's
Ok fixed now.
On the second patch revision I added FRAMEs to the write lock functions too
and that was broken in the frame pointer case, which I didn't retest :/
Sorry. Fixed now on ff.
-Andi
^ permalink raw reply [flat|nested] 199+ messages in thread
* [PATCH for review] [44/145] x86_64: Remove leftover CVS Id in thunk.S
[not found] <20060810 935.775038000@suse.de>
` (42 preceding siblings ...)
2006-08-10 19:35 ` [PATCH for review] [43/145] i386: Redo semaphore and rwlock assembly helpers Andi Kleen
@ 2006-08-10 19:35 ` Andi Kleen
2006-08-10 19:35 ` [PATCH for review] [45/145] x86_64: Add some comments what tce.c actually does Andi Kleen
` (101 subsequent siblings)
145 siblings, 0 replies; 199+ messages in thread
From: Andi Kleen @ 2006-08-10 19:35 UTC (permalink / raw)
r
And move the comment to a proper place.
Signed-off-by: Andi Kleen <ak@suse.de>
---
arch/x86_64/lib/thunk.S | 13 ++++++-------
1 files changed, 6 insertions(+), 7 deletions(-)
Index: linux/arch/x86_64/lib/thunk.S
===================================================================
--- linux.orig/arch/x86_64/lib/thunk.S
+++ linux/arch/x86_64/lib/thunk.S
@@ -1,10 +1,9 @@
- /*
- * Save registers before calling assembly functions. This avoids
- * disturbance of register allocation in some inline assembly constructs.
- * Copyright 2001,2002 by Andi Kleen, SuSE Labs.
- * Subject to the GNU public license, v.2. No warranty of any kind.
- * $Id: thunk.S,v 1.2 2002/03/13 20:06:58 ak Exp $
- */
+/*
+ * Save registers before calling assembly functions. This avoids
+ * disturbance of register allocation in some inline assembly constructs.
+ * Copyright 2001,2002 by Andi Kleen, SuSE Labs.
+ * Subject to the GNU public license, v.2. No warranty of any kind.
+ */
#include <linux/config.h>
#include <linux/linkage.h>
^ permalink raw reply [flat|nested] 199+ messages in thread* [PATCH for review] [45/145] x86_64: Add some comments what tce.c actually does
[not found] <20060810 935.775038000@suse.de>
` (43 preceding siblings ...)
2006-08-10 19:35 ` [PATCH for review] [44/145] x86_64: Remove leftover CVS Id in thunk.S Andi Kleen
@ 2006-08-10 19:35 ` Andi Kleen
2006-08-10 19:36 ` [PATCH for review] [46/145] x86_64: Remove all ifdefs for local/io apic Andi Kleen
` (100 subsequent siblings)
145 siblings, 0 replies; 199+ messages in thread
From: Andi Kleen @ 2006-08-10 19:35 UTC (permalink / raw)
r
Signed-off-by: Andi Kleen <ak@suse.de>
---
arch/x86_64/kernel/tce.c | 2 ++
1 files changed, 2 insertions(+)
Index: linux/arch/x86_64/kernel/tce.c
===================================================================
--- linux.orig/arch/x86_64/kernel/tce.c
+++ linux/arch/x86_64/kernel/tce.c
@@ -1,4 +1,6 @@
/*
+ * This file manages the translation entries for the IBM Calgary IOMMU.
+ *
* Derived from arch/powerpc/platforms/pseries/iommu.c
*
* Copyright (C) IBM Corporation, 2006
^ permalink raw reply [flat|nested] 199+ messages in thread* [PATCH for review] [46/145] x86_64: Remove all ifdefs for local/io apic
[not found] <20060810 935.775038000@suse.de>
` (44 preceding siblings ...)
2006-08-10 19:35 ` [PATCH for review] [45/145] x86_64: Add some comments what tce.c actually does Andi Kleen
@ 2006-08-10 19:36 ` Andi Kleen
2006-08-10 19:36 ` [PATCH for review] [47/145] x86_64: Remove apic mismatch counter Andi Kleen
` (99 subsequent siblings)
145 siblings, 0 replies; 199+ messages in thread
From: Andi Kleen @ 2006-08-10 19:36 UTC (permalink / raw)
r
IO-APIC or local APIC can only be disabled at runtime anyways and
Kconfig has forced these options on for a long time now.
The Kconfigs are kept only now for the benefit of the shared acpi
boot.c code.
Signed-off-by: Andi Kleen <ak@suse.de>
---
arch/x86_64/kernel/Makefile | 4 ++--
arch/x86_64/kernel/apic.c | 6 +-----
arch/x86_64/kernel/crash.c | 2 --
arch/x86_64/kernel/entry.S | 2 --
arch/x86_64/kernel/head64.c | 2 --
arch/x86_64/kernel/i8259.c | 9 ---------
arch/x86_64/kernel/irq.c | 6 ------
arch/x86_64/kernel/mpparse.c | 9 ---------
arch/x86_64/kernel/setup.c | 6 ------
arch/x86_64/kernel/smpboot.c | 5 -----
arch/x86_64/kernel/time.c | 8 --------
arch/x86_64/kernel/traps.c | 2 --
include/asm-x86_64/apic.h | 4 ----
include/asm-x86_64/fixmap.h | 4 ----
include/asm-x86_64/io_apic.h | 6 ------
include/asm-x86_64/irq.h | 2 --
include/asm-x86_64/mpspec.h | 2 --
include/asm-x86_64/smp.h | 4 ----
18 files changed, 3 insertions(+), 80 deletions(-)
Index: linux/arch/x86_64/kernel/Makefile
===================================================================
--- linux.orig/arch/x86_64/kernel/Makefile
+++ linux/arch/x86_64/kernel/Makefile
@@ -20,8 +20,8 @@ obj-$(CONFIG_X86_MSR) += msr.o
obj-$(CONFIG_MICROCODE) += microcode.o
obj-$(CONFIG_X86_CPUID) += cpuid.o
obj-$(CONFIG_SMP) += smp.o smpboot.o trampoline.o
-obj-$(CONFIG_X86_LOCAL_APIC) += apic.o nmi.o
-obj-$(CONFIG_X86_IO_APIC) += io_apic.o mpparse.o \
+obj-y += apic.o nmi.o
+obj-y += io_apic.o mpparse.o \
genapic.o genapic_cluster.o genapic_flat.o
obj-$(CONFIG_KEXEC) += machine_kexec.o relocate_kernel.o crash.o
obj-$(CONFIG_CRASH_DUMP) += crash_dump.o
Index: linux/arch/x86_64/kernel/apic.c
===================================================================
--- linux.orig/arch/x86_64/kernel/apic.c
+++ linux/arch/x86_64/kernel/apic.c
@@ -661,7 +661,6 @@ void __init init_apic_mappings(void)
*/
boot_cpu_id = GET_APIC_ID(apic_read(APIC_ID));
-#ifdef CONFIG_X86_IO_APIC
{
unsigned long ioapic_phys, idx = FIX_IO_APIC_BASE_0;
int i;
@@ -679,7 +678,6 @@ void __init init_apic_mappings(void)
idx++;
}
}
-#endif
}
/*
@@ -1129,12 +1127,10 @@ int __init APIC_init_uniprocessor (void)
setup_local_APIC();
-#ifdef CONFIG_X86_IO_APIC
if (smp_found_config && !skip_ioapic_setup && nr_ioapics)
- setup_IO_APIC();
+ setup_IO_APIC();
else
nr_ioapics = 0;
-#endif
setup_boot_APIC_clock();
check_nmi_watchdog();
return 0;
Index: linux/arch/x86_64/kernel/crash.c
===================================================================
--- linux.orig/arch/x86_64/kernel/crash.c
+++ linux/arch/x86_64/kernel/crash.c
@@ -194,9 +194,7 @@ void machine_crash_shutdown(struct pt_re
if(cpu_has_apic)
disable_local_APIC();
-#if defined(CONFIG_X86_IO_APIC)
disable_IO_APIC();
-#endif
crash_save_self(regs);
}
Index: linux/arch/x86_64/kernel/entry.S
===================================================================
--- linux.orig/arch/x86_64/kernel/entry.S
+++ linux/arch/x86_64/kernel/entry.S
@@ -676,7 +676,6 @@ ENTRY(call_function_interrupt)
END(call_function_interrupt)
#endif
-#ifdef CONFIG_X86_LOCAL_APIC
ENTRY(apic_timer_interrupt)
apicinterrupt LOCAL_TIMER_VECTOR,smp_apic_timer_interrupt
END(apic_timer_interrupt)
@@ -688,7 +687,6 @@ END(error_interrupt)
ENTRY(spurious_interrupt)
apicinterrupt SPURIOUS_APIC_VECTOR,smp_spurious_interrupt
END(spurious_interrupt)
-#endif
/*
* Exception entry points.
Index: linux/arch/x86_64/kernel/head64.c
===================================================================
--- linux.orig/arch/x86_64/kernel/head64.c
+++ linux/arch/x86_64/kernel/head64.c
@@ -111,10 +111,8 @@ void __init x86_64_start_kernel(char * r
if (s != NULL)
numa_setup(s+5);
#endif
-#ifdef CONFIG_X86_IO_APIC
if (strstr(saved_command_line, "disableapic"))
disable_apic = 1;
-#endif
/* You need early console to see that */
if (__pa_symbol(&_end) >= KERNEL_TEXT_SIZE)
panic("Kernel too big for kernel mapping\n");
Index: linux/arch/x86_64/kernel/i8259.c
===================================================================
--- linux.orig/arch/x86_64/kernel/i8259.c
+++ linux/arch/x86_64/kernel/i8259.c
@@ -55,7 +55,6 @@
*/
BUILD_16_IRQS(0x0)
-#ifdef CONFIG_X86_LOCAL_APIC
/*
* The IO-APIC gives us many more interrupt sources. Most of these
* are unused but an SMP system is supposed to have enough memory ...
@@ -75,8 +74,6 @@ BUILD_16_IRQS(0xc) BUILD_16_IRQS(0xd)
BUILD_15_IRQS(0xe)
#endif
-#endif
-
#undef BUILD_16_IRQS
#undef BUILD_15_IRQS
#undef BI
@@ -100,7 +97,6 @@ BUILD_16_IRQS(0xc) BUILD_16_IRQS(0xd)
void (*interrupt[NR_IRQS])(void) = {
IRQLIST_16(0x0),
-#ifdef CONFIG_X86_IO_APIC
IRQLIST_16(0x1), IRQLIST_16(0x2), IRQLIST_16(0x3),
IRQLIST_16(0x4), IRQLIST_16(0x5), IRQLIST_16(0x6), IRQLIST_16(0x7),
IRQLIST_16(0x8), IRQLIST_16(0x9), IRQLIST_16(0xa), IRQLIST_16(0xb),
@@ -110,7 +106,6 @@ void (*interrupt[NR_IRQS])(void) = {
, IRQLIST_15(0xe)
#endif
-#endif
};
#undef IRQ
@@ -453,9 +448,7 @@ void __init init_ISA_irqs (void)
{
int i;
-#ifdef CONFIG_X86_LOCAL_APIC
init_bsp_APIC();
-#endif
init_8259A(0);
for (i = 0; i < NR_IRQS; i++) {
@@ -581,14 +574,12 @@ void __init init_IRQ(void)
set_intr_gate(THERMAL_APIC_VECTOR, thermal_interrupt);
set_intr_gate(THRESHOLD_APIC_VECTOR, threshold_interrupt);
-#ifdef CONFIG_X86_LOCAL_APIC
/* self generated IPI for local APIC timer */
set_intr_gate(LOCAL_TIMER_VECTOR, apic_timer_interrupt);
/* IPI vectors for APIC spurious and error interrupts */
set_intr_gate(SPURIOUS_APIC_VECTOR, spurious_interrupt);
set_intr_gate(ERROR_APIC_VECTOR, error_interrupt);
-#endif
/*
* Set the clock to HZ Hz, we already have a valid
Index: linux/arch/x86_64/kernel/irq.c
===================================================================
--- linux.orig/arch/x86_64/kernel/irq.c
+++ linux/arch/x86_64/kernel/irq.c
@@ -20,11 +20,9 @@
#include <asm/idle.h>
atomic_t irq_err_count;
-#ifdef CONFIG_X86_IO_APIC
#ifdef APIC_MISMATCH_DEBUG
atomic_t irq_mis_count;
#endif
-#endif
#ifdef CONFIG_DEBUG_STACKOVERFLOW
/*
@@ -92,18 +90,14 @@ skip:
for_each_online_cpu(j)
seq_printf(p, "%10u ", cpu_pda(j)->__nmi_count);
seq_putc(p, '\n');
-#ifdef CONFIG_X86_LOCAL_APIC
seq_printf(p, "LOC: ");
for_each_online_cpu(j)
seq_printf(p, "%10u ", cpu_pda(j)->apic_timer_irqs);
seq_putc(p, '\n');
-#endif
seq_printf(p, "ERR: %10u\n", atomic_read(&irq_err_count));
-#ifdef CONFIG_X86_IO_APIC
#ifdef APIC_MISMATCH_DEBUG
seq_printf(p, "MIS: %10u\n", atomic_read(&irq_mis_count));
#endif
-#endif
}
return 0;
}
Index: linux/arch/x86_64/kernel/mpparse.c
===================================================================
--- linux.orig/arch/x86_64/kernel/mpparse.c
+++ linux/arch/x86_64/kernel/mpparse.c
@@ -74,14 +74,10 @@ physid_mask_t phys_cpu_present_map = PHY
/* ACPI MADT entry parsing functions */
#ifdef CONFIG_ACPI
extern struct acpi_boot_flags acpi_boot;
-#ifdef CONFIG_X86_LOCAL_APIC
extern int acpi_parse_lapic (acpi_table_entry_header *header);
extern int acpi_parse_lapic_addr_ovr (acpi_table_entry_header *header);
extern int acpi_parse_lapic_nmi (acpi_table_entry_header *header);
-#endif /*CONFIG_X86_LOCAL_APIC*/
-#ifdef CONFIG_X86_IO_APIC
extern int acpi_parse_ioapic (acpi_table_entry_header *header);
-#endif /*CONFIG_X86_IO_APIC*/
#endif /*CONFIG_ACPI*/
u8 bios_cpu_apicid[NR_CPUS] = { [0 ... NR_CPUS-1] = BAD_APICID };
@@ -661,9 +657,7 @@ void __init find_intel_smp (void)
*/
void __init find_smp_config (void)
{
-#ifdef CONFIG_X86_LOCAL_APIC
find_intel_smp();
-#endif
}
@@ -717,8 +711,6 @@ void __cpuinit mp_register_lapic (
MP_processor_info(&processor);
}
-#ifdef CONFIG_X86_IO_APIC
-
#define MP_ISA_BUS 0
#define MP_MAX_IOAPIC_PIN 127
@@ -997,5 +989,4 @@ int mp_register_gsi(u32 gsi, int trigger
return gsi;
}
-#endif /*CONFIG_X86_IO_APIC*/
#endif /*CONFIG_ACPI*/
Index: linux/arch/x86_64/kernel/setup.c
===================================================================
--- linux.orig/arch/x86_64/kernel/setup.c
+++ linux/arch/x86_64/kernel/setup.c
@@ -340,11 +340,9 @@ static __init void parse_cmdline_early (
else if (fullarg(from, "acpi=strict")) {
acpi_strict = 1;
}
-#ifdef CONFIG_X86_IO_APIC
else if (fullarg(from, "acpi_skip_timer_override"))
acpi_skip_timer_override = 1;
#endif
-#endif
if (fullarg(from, "disable_timer_pin_1"))
disable_timer_pin_1 = 1;
@@ -627,12 +625,10 @@ void __init setup_arch(char **cmdline_p)
*/
acpi_reserve_bootmem();
#endif
-#ifdef CONFIG_X86_LOCAL_APIC
/*
* Find and reserve possible boot-time SMP configuration:
*/
find_smp_config();
-#endif
#ifdef CONFIG_BLK_DEV_INITRD
if (LOADER_TYPE && INITRD_START) {
if (INITRD_START + INITRD_SIZE <= (end_pfn << PAGE_SHIFT)) {
@@ -676,14 +672,12 @@ void __init setup_arch(char **cmdline_p)
init_cpu_to_node();
-#ifdef CONFIG_X86_LOCAL_APIC
/*
* get boot-time SMP configuration:
*/
if (smp_found_config)
get_smp_config();
init_apic_mappings();
-#endif
/*
* Request address space for all standard RAM and ROM resources
Index: linux/arch/x86_64/kernel/smpboot.c
===================================================================
--- linux.orig/arch/x86_64/kernel/smpboot.c
+++ linux/arch/x86_64/kernel/smpboot.c
@@ -1175,13 +1175,8 @@ int __cpuinit __cpu_up(unsigned int cpu)
void __init smp_cpus_done(unsigned int max_cpus)
{
smp_cleanup_boot();
-
-#ifdef CONFIG_X86_IO_APIC
setup_ioapic_dest();
-#endif
-
check_nmi_watchdog();
-
time_init_gtod();
}
Index: linux/arch/x86_64/kernel/time.c
===================================================================
--- linux.orig/arch/x86_64/kernel/time.c
+++ linux/arch/x86_64/kernel/time.c
@@ -41,9 +41,7 @@
#include <asm/sections.h>
#include <linux/cpufreq.h>
#include <linux/hpet.h>
-#ifdef CONFIG_X86_LOCAL_APIC
#include <asm/apic.h>
-#endif
#ifdef CONFIG_CPU_FREQ
static void cpufreq_delayed_get(void);
@@ -438,12 +436,8 @@ void main_timer_handler(struct pt_regs *
* have to call the local interrupt handler.
*/
-#ifndef CONFIG_X86_LOCAL_APIC
- profile_tick(CPU_PROFILING, regs);
-#else
if (!using_apic_timer)
smp_local_timer_interrupt(regs);
-#endif
/*
* If we have an externally synchronized Linux clock, then update CMOS clock
@@ -467,10 +461,8 @@ static irqreturn_t timer_interrupt(int i
if (apic_runs_main_timer > 1)
return IRQ_HANDLED;
main_timer_handler(regs);
-#ifdef CONFIG_X86_LOCAL_APIC
if (using_apic_timer)
smp_send_timer_broadcast_ipi();
-#endif
return IRQ_HANDLED;
}
Index: linux/arch/x86_64/kernel/traps.c
===================================================================
--- linux.orig/arch/x86_64/kernel/traps.c
+++ linux/arch/x86_64/kernel/traps.c
@@ -784,7 +784,6 @@ asmlinkage __kprobes void default_do_nmi
if (notify_die(DIE_NMI_IPI, "nmi_ipi", regs, reason, 2, SIGINT)
== NOTIFY_STOP)
return;
-#ifdef CONFIG_X86_LOCAL_APIC
/*
* Ok, so this is none of the documented NMI sources,
* so it must be the NMI watchdog.
@@ -792,7 +791,6 @@ asmlinkage __kprobes void default_do_nmi
if (nmi_watchdog_tick(regs,reason))
return;
if (!do_nmi_callback(regs,cpu))
-#endif
unknown_nmi_error(reason, regs);
return;
Index: linux/include/asm-x86_64/apic.h
===================================================================
--- linux.orig/include/asm-x86_64/apic.h
+++ linux/include/asm-x86_64/apic.h
@@ -29,8 +29,6 @@ extern int apic_runs_main_timer;
printk(s, ##a); \
} while (0)
-#ifdef CONFIG_X86_LOCAL_APIC
-
struct pt_regs;
/*
@@ -104,8 +102,6 @@ void switch_ipi_to_APIC_timer(void *cpum
#define ARCH_APICTIMER_STOPS_ON_C3 1
-#endif /* CONFIG_X86_LOCAL_APIC */
-
extern unsigned boot_cpu_id;
#endif /* __ASM_APIC_H */
Index: linux/include/asm-x86_64/fixmap.h
===================================================================
--- linux.orig/include/asm-x86_64/fixmap.h
+++ linux/include/asm-x86_64/fixmap.h
@@ -37,13 +37,9 @@ enum fixed_addresses {
VSYSCALL_FIRST_PAGE = VSYSCALL_LAST_PAGE + ((VSYSCALL_END-VSYSCALL_START) >> PAGE_SHIFT) - 1,
VSYSCALL_HPET,
FIX_HPET_BASE,
-#ifdef CONFIG_X86_LOCAL_APIC
FIX_APIC_BASE, /* local (CPU) APIC) -- required for SMP or not */
-#endif
-#ifdef CONFIG_X86_IO_APIC
FIX_IO_APIC_BASE_0,
FIX_IO_APIC_BASE_END = FIX_IO_APIC_BASE_0 + MAX_IO_APICS-1,
-#endif
__end_of_fixed_addresses
};
Index: linux/include/asm-x86_64/io_apic.h
===================================================================
--- linux.orig/include/asm-x86_64/io_apic.h
+++ linux/include/asm-x86_64/io_apic.h
@@ -10,8 +10,6 @@
* Copyright (C) 1997, 1998, 1999, 2000 Ingo Molnar
*/
-#ifdef CONFIG_X86_IO_APIC
-
#ifdef CONFIG_PCI_MSI
static inline int use_pci_vector(void) {return 1;}
static inline void disable_edge_ioapic_vector(unsigned int vector) { }
@@ -209,10 +207,6 @@ extern int timer_uses_ioapic_pin_0;
extern int sis_apic_bug; /* dummy */
-#else /* !CONFIG_X86_IO_APIC */
-#define io_apic_assign_pci_irqs 0
-#endif
-
extern int assign_irq_vector(int irq);
void enable_NMI_through_LVT0 (void * dummy);
Index: linux/include/asm-x86_64/irq.h
===================================================================
--- linux.orig/include/asm-x86_64/irq.h
+++ linux/include/asm-x86_64/irq.h
@@ -44,9 +44,7 @@ static __inline__ int irq_canonicalize(i
return ((irq == 2) ? 9 : irq);
}
-#ifdef CONFIG_X86_LOCAL_APIC
#define ARCH_HAS_NMI_WATCHDOG /* See include/linux/nmi.h */
-#endif
#ifdef CONFIG_HOTPLUG_CPU
#include <linux/cpumask.h>
Index: linux/include/asm-x86_64/mpspec.h
===================================================================
--- linux.orig/include/asm-x86_64/mpspec.h
+++ linux/include/asm-x86_64/mpspec.h
@@ -184,12 +184,10 @@ extern int pic_mode;
extern void mp_register_lapic (u8 id, u8 enabled);
extern void mp_register_lapic_address (u64 address);
-#ifdef CONFIG_X86_IO_APIC
extern void mp_register_ioapic (u8 id, u32 address, u32 gsi_base);
extern void mp_override_legacy_irq (u8 bus_irq, u8 polarity, u8 trigger, u32 gsi);
extern void mp_config_acpi_legacy_irqs (void);
extern int mp_register_gsi (u32 gsi, int triggering, int polarity);
-#endif /*CONFIG_X86_IO_APIC*/
#endif
extern int using_apic_timer;
Index: linux/include/asm-x86_64/smp.h
===================================================================
--- linux.orig/include/asm-x86_64/smp.h
+++ linux/include/asm-x86_64/smp.h
@@ -9,15 +9,11 @@
#include <linux/bitops.h>
extern int disable_apic;
-#ifdef CONFIG_X86_LOCAL_APIC
#include <asm/fixmap.h>
#include <asm/mpspec.h>
-#ifdef CONFIG_X86_IO_APIC
#include <asm/io_apic.h>
-#endif
#include <asm/apic.h>
#include <asm/thread_info.h>
-#endif
#ifdef CONFIG_SMP
^ permalink raw reply [flat|nested] 199+ messages in thread* [PATCH for review] [47/145] x86_64: Remove apic mismatch counter
[not found] <20060810 935.775038000@suse.de>
` (45 preceding siblings ...)
2006-08-10 19:36 ` [PATCH for review] [46/145] x86_64: Remove all ifdefs for local/io apic Andi Kleen
@ 2006-08-10 19:36 ` Andi Kleen
2006-08-10 19:36 ` [PATCH for review] [48/145] x86_64: Remove old "focus disabled" chipset errata workaround Andi Kleen
` (98 subsequent siblings)
145 siblings, 0 replies; 199+ messages in thread
From: Andi Kleen @ 2006-08-10 19:36 UTC (permalink / raw)
r
Nobody has been setting the mismatch counter and the ifdef was never
set so remove it.
Signed-off-by: Andi Kleen <ak@suse.de>
---
arch/x86_64/kernel/irq.c | 6 ------
1 files changed, 6 deletions(-)
Index: linux/arch/x86_64/kernel/irq.c
===================================================================
--- linux.orig/arch/x86_64/kernel/irq.c
+++ linux/arch/x86_64/kernel/irq.c
@@ -20,9 +20,6 @@
#include <asm/idle.h>
atomic_t irq_err_count;
-#ifdef APIC_MISMATCH_DEBUG
-atomic_t irq_mis_count;
-#endif
#ifdef CONFIG_DEBUG_STACKOVERFLOW
/*
@@ -95,9 +92,6 @@ skip:
seq_printf(p, "%10u ", cpu_pda(j)->apic_timer_irqs);
seq_putc(p, '\n');
seq_printf(p, "ERR: %10u\n", atomic_read(&irq_err_count));
-#ifdef APIC_MISMATCH_DEBUG
- seq_printf(p, "MIS: %10u\n", atomic_read(&irq_mis_count));
-#endif
}
return 0;
}
^ permalink raw reply [flat|nested] 199+ messages in thread* [PATCH for review] [48/145] x86_64: Remove old "focus disabled" chipset errata workaround
[not found] <20060810 935.775038000@suse.de>
` (46 preceding siblings ...)
2006-08-10 19:36 ` [PATCH for review] [47/145] x86_64: Remove apic mismatch counter Andi Kleen
@ 2006-08-10 19:36 ` Andi Kleen
2006-08-10 19:36 ` [PATCH for review] [49/145] x86_64: Clean up and minor fixes to TLB flush Andi Kleen
` (97 subsequent siblings)
145 siblings, 0 replies; 199+ messages in thread
From: Andi Kleen @ 2006-08-10 19:36 UTC (permalink / raw)
r
The new systems already use focus disabled and the comment was
completely outdated.
Signed-off-by: Andi Kleen <ak@suse.de>
---
arch/x86_64/kernel/apic.c | 28 ++--------------------------
1 files changed, 2 insertions(+), 26 deletions(-)
Index: linux/arch/x86_64/kernel/apic.c
===================================================================
--- linux.orig/arch/x86_64/kernel/apic.c
+++ linux/arch/x86_64/kernel/apic.c
@@ -399,32 +399,8 @@ void __cpuinit setup_local_APIC (void)
*/
value |= APIC_SPIV_APIC_ENABLED;
- /*
- * Some unknown Intel IO/APIC (or APIC) errata are biting us with
- * certain networking cards. If high frequency interrupts are
- * happening on a particular IOAPIC pin, plus the IOAPIC routing
- * entry is masked/unmasked at a high rate as well then sooner or
- * later IOAPIC line gets 'stuck', no more interrupts are received
- * from the device. If focus CPU is disabled then the hang goes
- * away, oh well :-(
- *
- * [ This bug can be reproduced easily with a level-triggered
- * PCI Ne2000 networking cards and PII/PIII processors, dual
- * BX chipset. ]
- */
- /*
- * Actually disabling the focus CPU check just makes the hang less
- * frequent as it makes the interrupt distributon model be more
- * like LRU than MRU (the short-term load is more even across CPUs).
- * See also the comment in end_level_ioapic_irq(). --macro
- */
-#if 1
- /* Enable focus processor (bit==0) */
- value &= ~APIC_SPIV_FOCUS_DISABLED;
-#else
- /* Disable focus processor (bit==1) */
- value |= APIC_SPIV_FOCUS_DISABLED;
-#endif
+ /* We always use processor focus */
+
/*
* Set spurious IRQ vector
*/
^ permalink raw reply [flat|nested] 199+ messages in thread* [PATCH for review] [49/145] x86_64: Clean up and minor fixes to TLB flush
[not found] <20060810 935.775038000@suse.de>
` (47 preceding siblings ...)
2006-08-10 19:36 ` [PATCH for review] [48/145] x86_64: Remove old "focus disabled" chipset errata workaround Andi Kleen
@ 2006-08-10 19:36 ` Andi Kleen
2006-08-10 19:36 ` [PATCH for review] [50/145] i386: Minor fixes & cleanup to tlb flush Andi Kleen
` (96 subsequent siblings)
145 siblings, 0 replies; 199+ messages in thread
From: Andi Kleen @ 2006-08-10 19:36 UTC (permalink / raw)
r
- Convert CR* accesses to dedicated inline functions and rewrite
the rest as C inlines
- Don't do a double flush for global flushes (pointed out by Zach Amsden)
This was a bug workaround for old CPUs that don't do 64bit and is obsolete.
- Add a proper memory clobber to invlpg
- Remove an unused extern
Signed-off-by: Andi Kleen <ak@suse.de>
---
include/asm-x86_64/pgtable.h | 2 -
include/asm-x86_64/tlbflush.h | 66 +++++++++++++++++++++---------------------
2 files changed, 33 insertions(+), 35 deletions(-)
Index: linux/include/asm-x86_64/pgtable.h
===================================================================
--- linux.orig/include/asm-x86_64/pgtable.h
+++ linux/include/asm-x86_64/pgtable.h
@@ -25,8 +25,6 @@ extern int nonx_setup(char *str);
extern void paging_init(void);
extern void clear_kernel_mapping(unsigned long addr, unsigned long size);
-extern unsigned long pgkern_mask;
-
/*
* ZERO_PAGE is a global shared page that is always zero: used
* for zero-mapped memory areas etc..
Index: linux/include/asm-x86_64/tlbflush.h
===================================================================
--- linux.orig/include/asm-x86_64/tlbflush.h
+++ linux/include/asm-x86_64/tlbflush.h
@@ -4,44 +4,44 @@
#include <linux/mm.h>
#include <asm/processor.h>
-#define __flush_tlb() \
- do { \
- unsigned long tmpreg; \
- \
- __asm__ __volatile__( \
- "movq %%cr3, %0; # flush TLB \n" \
- "movq %0, %%cr3; \n" \
- : "=r" (tmpreg) \
- :: "memory"); \
- } while (0)
+static inline unsigned long get_cr3(void)
+{
+ unsigned long cr3;
+ asm volatile("mov %%cr3,%0" : "=r" (cr3));
+ return cr3;
+}
-/*
- * Global pages have to be flushed a bit differently. Not a real
- * performance problem because this does not happen often.
- */
-#define __flush_tlb_global() \
- do { \
- unsigned long tmpreg, cr4, cr4_orig; \
- \
- __asm__ __volatile__( \
- "movq %%cr4, %2; # turn off PGE \n" \
- "movq %2, %1; \n" \
- "andq %3, %1; \n" \
- "movq %1, %%cr4; \n" \
- "movq %%cr3, %0; # flush TLB \n" \
- "movq %0, %%cr3; \n" \
- "movq %2, %%cr4; # turn PGE back on \n" \
- : "=&r" (tmpreg), "=&r" (cr4), "=&r" (cr4_orig) \
- : "i" (~X86_CR4_PGE) \
- : "memory"); \
- } while (0)
+static inline void set_cr3(unsigned long cr3)
+{
+ asm volatile("mov %0,%%cr3" :: "r" (cr3) : "memory");
+}
+
+static inline void __flush_tlb(void)
+{
+ set_cr3(get_cr3());
+}
+
+static inline unsigned long get_cr4(void)
+{
+ unsigned long cr4;
+ asm volatile("mov %%cr4,%0" : "=r" (cr4));
+ return cr4;
+}
-extern unsigned long pgkern_mask;
+static inline void set_cr4(unsigned long cr4)
+{
+ asm volatile("mov %0,%%cr4" :: "r" (cr4) : "memory");
+}
-#define __flush_tlb_all() __flush_tlb_global()
+static inline void __flush_tlb_all(void)
+{
+ unsigned long cr4 = get_cr4();
+ set_cr4(cr4 & ~X86_CR4_PGE); /* clear PGE */
+ set_cr4(cr4); /* write old PGE again and flush TLBs */
+}
#define __flush_tlb_one(addr) \
- __asm__ __volatile__("invlpg %0": :"m" (*(char *) addr))
+ __asm__ __volatile__("invlpg (%0)" :: "r" (addr) : "memory")
/*
^ permalink raw reply [flat|nested] 199+ messages in thread* [PATCH for review] [50/145] i386: Minor fixes & cleanup to tlb flush
[not found] <20060810 935.775038000@suse.de>
` (48 preceding siblings ...)
2006-08-10 19:36 ` [PATCH for review] [49/145] x86_64: Clean up and minor fixes to TLB flush Andi Kleen
@ 2006-08-10 19:36 ` Andi Kleen
2006-08-10 19:36 ` [PATCH for review] [51/145] x86_64: Add some comments to entry.S Andi Kleen
` (95 subsequent siblings)
145 siblings, 0 replies; 199+ messages in thread
From: Andi Kleen @ 2006-08-10 19:36 UTC (permalink / raw)
r
(based on x86-64 changes)
- Add a proper memory clobber to invlpg
- Remove an unused extern
Signed-off-by: Andi Kleen <ak@suse.de>
---
include/asm-i386/tlbflush.h | 4 +---
1 files changed, 1 insertion(+), 3 deletions(-)
Index: linux/include/asm-i386/tlbflush.h
===================================================================
--- linux.orig/include/asm-i386/tlbflush.h
+++ linux/include/asm-i386/tlbflush.h
@@ -36,8 +36,6 @@
: "memory"); \
} while (0)
-extern unsigned long pgkern_mask;
-
# define __flush_tlb_all() \
do { \
if (cpu_has_pge) \
@@ -49,7 +47,7 @@ extern unsigned long pgkern_mask;
#define cpu_has_invlpg (boot_cpu_data.x86 > 3)
#define __flush_tlb_single(addr) \
- __asm__ __volatile__("invlpg %0": :"m" (*(char *) addr))
+ __asm__ __volatile__("invlpg (%0)" ::"r" (addr) : "memory")
#ifdef CONFIG_X86_INVLPG
# define __flush_tlb_one(addr) __flush_tlb_single(addr)
^ permalink raw reply [flat|nested] 199+ messages in thread* [PATCH for review] [51/145] x86_64: Add some comments to entry.S
[not found] <20060810 935.775038000@suse.de>
` (49 preceding siblings ...)
2006-08-10 19:36 ` [PATCH for review] [50/145] i386: Minor fixes & cleanup to tlb flush Andi Kleen
@ 2006-08-10 19:36 ` Andi Kleen
2006-08-10 19:36 ` [PATCH for review] [52/145] x86_64: Remove pirq overwrite support Andi Kleen
` (94 subsequent siblings)
145 siblings, 0 replies; 199+ messages in thread
From: Andi Kleen @ 2006-08-10 19:36 UTC (permalink / raw)
r
And remove some old obsolete ones.
Signed-off-by: Andi Kleen <ak@suse.de>
---
arch/x86_64/kernel/entry.S | 18 +++++++++++++++---
1 files changed, 15 insertions(+), 3 deletions(-)
Index: linux/arch/x86_64/kernel/entry.S
===================================================================
--- linux.orig/arch/x86_64/kernel/entry.S
+++ linux/arch/x86_64/kernel/entry.S
@@ -22,9 +22,21 @@
* at the top of the kernel process stack.
* - partial stack frame: partially saved registers upto R11.
* - full stack frame: Like partial stack frame, but all register saved.
- *
- * TODO:
- * - schedule it carefully for the final hardware.
+ *
+ * Some macro usage:
+ * - CFI macros are used to generate dwarf2 unwind information for better
+ * backtraces. They don't change any code.
+ * - SAVE_ALL/RESTORE_ALL - Save/restore all registers
+ * - SAVE_ARGS/RESTORE_ARGS - Save/restore registers that C functions modify.
+ * There are unfortunately lots of special cases where some registers
+ * not touched. The macro is a big mess that should be cleaned up.
+ * - SAVE_REST/RESTORE_REST - Handle the registers not saved by SAVE_ARGS.
+ * Gives a full stack frame.
+ * - ENTRY/END Define functions in the symbol table.
+ * - FIXUP_TOP_OF_STACK/RESTORE_TOP_OF_STACK - Fix up the hardware stack
+ * frame that is otherwise undefined after a SYSCALL
+ * - TRACE_IRQ_* - Trace hard interrupt state for lock debugging.
+ * - errorentry/paranoidentry/zeroentry - Define exception entry points.
*/
#include <linux/linkage.h>
^ permalink raw reply [flat|nested] 199+ messages in thread* [PATCH for review] [52/145] x86_64: Remove pirq overwrite support
[not found] <20060810 935.775038000@suse.de>
` (50 preceding siblings ...)
2006-08-10 19:36 ` [PATCH for review] [51/145] x86_64: Add some comments to entry.S Andi Kleen
@ 2006-08-10 19:36 ` Andi Kleen
2006-08-10 19:36 ` [PATCH for review] [53/145] x86_64: Remove leftover MCE/EISA support Andi Kleen
` (93 subsequent siblings)
145 siblings, 0 replies; 199+ messages in thread
From: Andi Kleen @ 2006-08-10 19:36 UTC (permalink / raw)
r
This was an old workaround for broken MP-BIOS. The user could
specify overwrites on the command line.
I've never seen it being used for anything on 64bit. So get
rid of it for now.
Signed-off-by: Andi Kleen <ak@suse.de>
---
arch/x86_64/kernel/io_apic.c | 55 -------------------------------------------
1 files changed, 55 deletions(-)
Index: linux/arch/x86_64/kernel/io_apic.c
===================================================================
--- linux.orig/arch/x86_64/kernel/io_apic.c
+++ linux/arch/x86_64/kernel/io_apic.c
@@ -225,14 +225,6 @@ static void clear_IO_APIC (void)
clear_IO_APIC_pin(apic, pin);
}
-/*
- * support for broken MP BIOSs, enables hand-redirection of PIRQ0-7 to
- * specific CPU-side IRQs.
- */
-
-#define MAX_PIRQS 8
-static int pirq_entries [MAX_PIRQS];
-static int pirqs_enabled;
int skip_ioapic_setup;
int ioapic_force;
@@ -370,34 +362,6 @@ void __init check_ioapic(void)
}
}
-static int __init ioapic_pirq_setup(char *str)
-{
- int i, max;
- int ints[MAX_PIRQS+1];
-
- get_options(str, ARRAY_SIZE(ints), ints);
-
- for (i = 0; i < MAX_PIRQS; i++)
- pirq_entries[i] = -1;
-
- pirqs_enabled = 1;
- apic_printk(APIC_VERBOSE, "PIRQ redirection, working around broken MP-BIOS.\n");
- max = MAX_PIRQS;
- if (ints[0] < MAX_PIRQS)
- max = ints[0];
-
- for (i = 0; i < max; i++) {
- apic_printk(APIC_VERBOSE, "... PIRQ%d -> IRQ %d\n", i, ints[i+1]);
- /*
- * PIRQs are mapped upside down, usually.
- */
- pirq_entries[MAX_PIRQS-i-1] = ints[i+1];
- }
- return 1;
-}
-
-__setup("pirq=", ioapic_pirq_setup);
-
/*
* Find the IRQ entry number of a certain pin.
*/
@@ -793,22 +757,6 @@ static int pin_2_irq(int idx, int apic,
}
}
BUG_ON(irq >= NR_IRQS);
-
- /*
- * PCI IRQ command line redirection. Yes, limits are hardcoded.
- */
- if ((pin >= 16) && (pin <= 23)) {
- if (pirq_entries[pin-16] != -1) {
- if (!pirq_entries[pin-16]) {
- apic_printk(APIC_VERBOSE, "disabling PIRQ%d\n", pin-16);
- } else {
- irq = pirq_entries[pin-16];
- apic_printk(APIC_VERBOSE, "using PIRQ%d -> IRQ %d\n",
- pin-16, irq);
- }
- }
- }
- BUG_ON(irq >= NR_IRQS);
return irq;
}
@@ -1281,9 +1229,6 @@ static void __init enable_IO_APIC(void)
irq_2_pin[i].pin = -1;
irq_2_pin[i].next = 0;
}
- if (!pirqs_enabled)
- for (i = 0; i < MAX_PIRQS; i++)
- pirq_entries[i] = -1;
/*
* The number of IO-APIC IRQ registers (== #pins):
^ permalink raw reply [flat|nested] 199+ messages in thread* [PATCH for review] [53/145] x86_64: Remove leftover MCE/EISA support
[not found] <20060810 935.775038000@suse.de>
` (51 preceding siblings ...)
2006-08-10 19:36 ` [PATCH for review] [52/145] x86_64: Remove pirq overwrite support Andi Kleen
@ 2006-08-10 19:36 ` Andi Kleen
2006-08-10 19:36 ` [PATCH for review] [54/145] x86_64: Remove obsolete PIC mode Andi Kleen
` (92 subsequent siblings)
145 siblings, 0 replies; 199+ messages in thread
From: Andi Kleen @ 2006-08-10 19:36 UTC (permalink / raw)
r
No 64bit EISA or Microchannel systems ever. Remove the left over code
in the IO-APIC driver and the mptable parser
Signed-off-by: Andi Kleen <ak@suse.de>
---
arch/x86_64/kernel/io_apic.c | 57 +------------------------------------------
arch/x86_64/kernel/mpparse.c | 16 ------------
2 files changed, 3 insertions(+), 70 deletions(-)
Index: linux/arch/x86_64/kernel/io_apic.c
===================================================================
--- linux.orig/arch/x86_64/kernel/io_apic.c
+++ linux/arch/x86_64/kernel/io_apic.c
@@ -389,9 +389,7 @@ static int __init find_isa_irq_pin(int i
for (i = 0; i < mp_irq_entries; i++) {
int lbus = mp_irqs[i].mpc_srcbus;
- if ((mp_bus_id_to_type[lbus] == MP_BUS_ISA ||
- mp_bus_id_to_type[lbus] == MP_BUS_EISA ||
- mp_bus_id_to_type[lbus] == MP_BUS_MCA) &&
+ if (mp_bus_id_to_type[lbus] == MP_BUS_ISA &&
(mp_irqs[i].mpc_irqtype == type) &&
(mp_irqs[i].mpc_srcbusirq == irq))
@@ -407,9 +405,7 @@ static int __init find_isa_irq_apic(int
for (i = 0; i < mp_irq_entries; i++) {
int lbus = mp_irqs[i].mpc_srcbus;
- if ((mp_bus_id_to_type[lbus] == MP_BUS_ISA ||
- mp_bus_id_to_type[lbus] == MP_BUS_EISA ||
- mp_bus_id_to_type[lbus] == MP_BUS_MCA) &&
+ if ((mp_bus_id_to_type[lbus] == MP_BUS_ISA) &&
(mp_irqs[i].mpc_irqtype == type) &&
(mp_irqs[i].mpc_srcbusirq == irq))
break;
@@ -472,27 +468,6 @@ int IO_APIC_get_PCI_irq_vector(int bus,
return best_guess;
}
-/*
- * EISA Edge/Level control register, ELCR
- */
-static int EISA_ELCR(unsigned int irq)
-{
- if (irq < 16) {
- unsigned int port = 0x4d0 + (irq >> 3);
- return (inb(port) >> (irq & 7)) & 1;
- }
- apic_printk(APIC_VERBOSE, "Broken MPtable reports ISA irq %d\n", irq);
- return 0;
-}
-
-/* EISA interrupts are always polarity zero and can be edge or level
- * trigger depending on the ELCR value. If an interrupt is listed as
- * EISA conforming in the MP table, that means its trigger type must
- * be read in from the ELCR */
-
-#define default_EISA_trigger(idx) (EISA_ELCR(mp_irqs[idx].mpc_srcbusirq))
-#define default_EISA_polarity(idx) (0)
-
/* ISA interrupts are always polarity zero edge triggered,
* when listed as conforming in the MP table. */
@@ -505,12 +480,6 @@ static int EISA_ELCR(unsigned int irq)
#define default_PCI_trigger(idx) (1)
#define default_PCI_polarity(idx) (1)
-/* MCA interrupts are always polarity zero level triggered,
- * when listed as conforming in the MP table. */
-
-#define default_MCA_trigger(idx) (1)
-#define default_MCA_polarity(idx) (0)
-
static int __init MPBIOS_polarity(int idx)
{
int bus = mp_irqs[idx].mpc_srcbus;
@@ -530,21 +499,11 @@ static int __init MPBIOS_polarity(int id
polarity = default_ISA_polarity(idx);
break;
}
- case MP_BUS_EISA: /* EISA pin */
- {
- polarity = default_EISA_polarity(idx);
- break;
- }
case MP_BUS_PCI: /* PCI pin */
{
polarity = default_PCI_polarity(idx);
break;
}
- case MP_BUS_MCA: /* MCA pin */
- {
- polarity = default_MCA_polarity(idx);
- break;
- }
default:
{
printk(KERN_WARNING "broken BIOS!!\n");
@@ -599,21 +558,11 @@ static int MPBIOS_trigger(int idx)
trigger = default_ISA_trigger(idx);
break;
}
- case MP_BUS_EISA: /* EISA pin */
- {
- trigger = default_EISA_trigger(idx);
- break;
- }
case MP_BUS_PCI: /* PCI pin */
{
trigger = default_PCI_trigger(idx);
break;
}
- case MP_BUS_MCA: /* MCA pin */
- {
- trigger = default_MCA_trigger(idx);
- break;
- }
default:
{
printk(KERN_WARNING "broken BIOS!!\n");
@@ -731,8 +680,6 @@ static int pin_2_irq(int idx, int apic,
switch (mp_bus_id_to_type[bus])
{
case MP_BUS_ISA: /* ISA pin */
- case MP_BUS_EISA:
- case MP_BUS_MCA:
{
irq = mp_irqs[idx].mpc_srcbusirq;
break;
Index: linux/arch/x86_64/kernel/mpparse.c
===================================================================
--- linux.orig/arch/x86_64/kernel/mpparse.c
+++ linux/arch/x86_64/kernel/mpparse.c
@@ -175,14 +175,10 @@ static void __init MP_bus_info (struct m
if (strncmp(str, "ISA", 3) == 0) {
mp_bus_id_to_type[m->mpc_busid] = MP_BUS_ISA;
- } else if (strncmp(str, "EISA", 4) == 0) {
- mp_bus_id_to_type[m->mpc_busid] = MP_BUS_EISA;
} else if (strncmp(str, "PCI", 3) == 0) {
mp_bus_id_to_type[m->mpc_busid] = MP_BUS_PCI;
mp_bus_id_to_pci_bus[m->mpc_busid] = mp_current_pci_id;
mp_current_pci_id++;
- } else if (strncmp(str, "MCA", 3) == 0) {
- mp_bus_id_to_type[m->mpc_busid] = MP_BUS_MCA;
} else {
printk(KERN_ERR "Unknown bustype %s\n", str);
}
@@ -465,14 +461,6 @@ static inline void __init construct_defa
case 5:
memcpy(bus.mpc_bustype, "ISA ", 6);
break;
- case 2:
- case 6:
- case 3:
- memcpy(bus.mpc_bustype, "EISA ", 6);
- break;
- case 4:
- case 7:
- memcpy(bus.mpc_bustype, "MCA ", 6);
}
MP_bus_info(&bus);
if (mpc_default_type > 4) {
@@ -629,9 +617,7 @@ void __init find_intel_smp (void)
smp_scan_config(0xF0000,0x10000))
return;
/*
- * If it is an SMP machine we should know now, unless the
- * configuration is in an EISA/MCA bus machine with an
- * extended bios data area.
+ * If it is an SMP machine we should know now.
*
* there is a real-mode segmented pointer pointing to the
* 4K EBDA area at 0x40E, calculate and scan it here.
^ permalink raw reply [flat|nested] 199+ messages in thread* [PATCH for review] [54/145] x86_64: Remove obsolete PIC mode
[not found] <20060810 935.775038000@suse.de>
` (52 preceding siblings ...)
2006-08-10 19:36 ` [PATCH for review] [53/145] x86_64: Remove leftover MCE/EISA support Andi Kleen
@ 2006-08-10 19:36 ` Andi Kleen
2006-08-16 16:31 ` Len Brown
2006-08-10 19:36 ` [PATCH for review] [55/145] i386/x86-64: Remove obsolete sanity check in mptable parsing Andi Kleen
` (91 subsequent siblings)
145 siblings, 1 reply; 199+ messages in thread
From: Andi Kleen @ 2006-08-10 19:36 UTC (permalink / raw)
r
PIC mode is an outdated way to drive the APICs that was used on
some early MP boards. It is not supported in the ACPI model.
It is unlikely to be ever configured by any x86-64 system
Remove it thus.
Signed-off-by: Andi Kleen <ak@suse.de>
---
arch/x86_64/kernel/apic.c | 92 +++++++++++++------------------------------
arch/x86_64/kernel/mpparse.c | 8 ---
arch/x86_64/kernel/smpboot.c | 1
include/asm-x86_64/mpspec.h | 1
include/asm-x86_64/smp.h | 1
5 files changed, 29 insertions(+), 74 deletions(-)
Index: linux/arch/x86_64/kernel/apic.c
===================================================================
--- linux.orig/arch/x86_64/kernel/apic.c
+++ linux/arch/x86_64/kernel/apic.c
@@ -136,72 +136,40 @@ void clear_local_APIC(void)
apic_read(APIC_ESR);
}
-void __init connect_bsp_APIC(void)
-{
- if (pic_mode) {
- /*
- * Do not trust the local APIC being empty at bootup.
- */
- clear_local_APIC();
- /*
- * PIC mode, enable APIC mode in the IMCR, i.e.
- * connect BSP's local APIC to INT and NMI lines.
- */
- apic_printk(APIC_VERBOSE, "leaving PIC mode, enabling APIC mode.\n");
- outb(0x70, 0x22);
- outb(0x01, 0x23);
- }
-}
-
void disconnect_bsp_APIC(int virt_wire_setup)
{
- if (pic_mode) {
- /*
- * Put the board back into PIC mode (has an effect
- * only on certain older boards). Note that APIC
- * interrupts, including IPIs, won't work beyond
- * this point! The only exception are INIT IPIs.
- */
- apic_printk(APIC_QUIET, "disabling APIC mode, entering PIC mode.\n");
- outb(0x70, 0x22);
- outb(0x00, 0x23);
- }
- else {
- /* Go back to Virtual Wire compatibility mode */
- unsigned long value;
-
- /* For the spurious interrupt use vector F, and enable it */
- value = apic_read(APIC_SPIV);
- value &= ~APIC_VECTOR_MASK;
- value |= APIC_SPIV_APIC_ENABLED;
- value |= 0xf;
- apic_write(APIC_SPIV, value);
-
- if (!virt_wire_setup) {
- /* For LVT0 make it edge triggered, active high, external and enabled */
- value = apic_read(APIC_LVT0);
- value &= ~(APIC_MODE_MASK | APIC_SEND_PENDING |
- APIC_INPUT_POLARITY | APIC_LVT_REMOTE_IRR |
- APIC_LVT_LEVEL_TRIGGER | APIC_LVT_MASKED );
- value |= APIC_LVT_REMOTE_IRR | APIC_SEND_PENDING;
- value = SET_APIC_DELIVERY_MODE(value, APIC_MODE_EXTINT);
- apic_write(APIC_LVT0, value);
- }
- else {
- /* Disable LVT0 */
- apic_write(APIC_LVT0, APIC_LVT_MASKED);
- }
+ /* Go back to Virtual Wire compatibility mode */
+ unsigned long value;
- /* For LVT1 make it edge triggered, active high, nmi and enabled */
- value = apic_read(APIC_LVT1);
- value &= ~(
- APIC_MODE_MASK | APIC_SEND_PENDING |
+ /* For the spurious interrupt use vector F, and enable it */
+ value = apic_read(APIC_SPIV);
+ value &= ~APIC_VECTOR_MASK;
+ value |= APIC_SPIV_APIC_ENABLED;
+ value |= 0xf;
+ apic_write(APIC_SPIV, value);
+
+ if (!virt_wire_setup) {
+ /* For LVT0 make it edge triggered, active high, external and enabled */
+ value = apic_read(APIC_LVT0);
+ value &= ~(APIC_MODE_MASK | APIC_SEND_PENDING |
APIC_INPUT_POLARITY | APIC_LVT_REMOTE_IRR |
- APIC_LVT_LEVEL_TRIGGER | APIC_LVT_MASKED);
+ APIC_LVT_LEVEL_TRIGGER | APIC_LVT_MASKED );
value |= APIC_LVT_REMOTE_IRR | APIC_SEND_PENDING;
- value = SET_APIC_DELIVERY_MODE(value, APIC_MODE_NMI);
- apic_write(APIC_LVT1, value);
+ value = SET_APIC_DELIVERY_MODE(value, APIC_MODE_EXTINT);
+ apic_write(APIC_LVT0, value);
+ } else {
+ /* Disable LVT0 */
+ apic_write(APIC_LVT0, APIC_LVT_MASKED);
}
+
+ /* For LVT1 make it edge triggered, active high, nmi and enabled */
+ value = apic_read(APIC_LVT1);
+ value &= ~(APIC_MODE_MASK | APIC_SEND_PENDING |
+ APIC_INPUT_POLARITY | APIC_LVT_REMOTE_IRR |
+ APIC_LVT_LEVEL_TRIGGER | APIC_LVT_MASKED);
+ value |= APIC_LVT_REMOTE_IRR | APIC_SEND_PENDING;
+ value = SET_APIC_DELIVERY_MODE(value, APIC_MODE_NMI);
+ apic_write(APIC_LVT1, value);
}
void disable_local_APIC(void)
@@ -418,7 +386,7 @@ void __cpuinit setup_local_APIC (void)
* TODO: set up through-local-APIC from through-I/O-APIC? --macro
*/
value = apic_read(APIC_LVT0) & APIC_LVT_MASKED;
- if (!smp_processor_id() && (pic_mode || !value)) {
+ if (!smp_processor_id() && !value) {
value = APIC_DM_EXTINT;
apic_printk(APIC_VERBOSE, "enabled ExtINT on CPU#%d\n", smp_processor_id());
} else {
@@ -1096,8 +1064,6 @@ int __init APIC_init_uniprocessor (void)
verify_local_APIC();
- connect_bsp_APIC();
-
phys_cpu_present_map = physid_mask_of_physid(boot_cpu_id);
apic_write(APIC_ID, SET_APIC_ID(boot_cpu_id));
Index: linux/arch/x86_64/kernel/mpparse.c
===================================================================
--- linux.orig/arch/x86_64/kernel/mpparse.c
+++ linux/arch/x86_64/kernel/mpparse.c
@@ -56,7 +56,6 @@ struct mpc_config_intsrc mp_irqs[MAX_IRQ
int mp_irq_entries;
int nr_ioapics;
-int pic_mode;
unsigned long mp_lapic_addr = 0;
@@ -514,13 +513,6 @@ void __init get_smp_config (void)
printk(KERN_INFO "Using ACPI for processor (LAPIC) configuration information\n");
printk("Intel MultiProcessor Specification v1.%d\n", mpf->mpf_specification);
- if (mpf->mpf_feature2 & (1<<7)) {
- printk(KERN_INFO " IMCR and PIC compatibility mode.\n");
- pic_mode = 1;
- } else {
- printk(KERN_INFO " Virtual Wire compatibility mode.\n");
- pic_mode = 0;
- }
/*
* Now see if we need to read further.
Index: linux/include/asm-x86_64/mpspec.h
===================================================================
--- linux.orig/include/asm-x86_64/mpspec.h
+++ linux/include/asm-x86_64/mpspec.h
@@ -178,7 +178,6 @@ extern int mp_irq_entries;
extern struct mpc_config_intsrc mp_irqs [MAX_IRQ_SOURCES];
extern int mpc_default_type;
extern unsigned long mp_lapic_addr;
-extern int pic_mode;
#ifdef CONFIG_ACPI
extern void mp_register_lapic (u8 id, u8 enabled);
Index: linux/include/asm-x86_64/smp.h
===================================================================
--- linux.orig/include/asm-x86_64/smp.h
+++ linux/include/asm-x86_64/smp.h
@@ -33,7 +33,6 @@ extern cpumask_t cpu_initialized;
extern void smp_alloc_memory(void);
extern volatile unsigned long smp_invalidate_needed;
-extern int pic_mode;
extern void lock_ipi_call_lock(void);
extern void unlock_ipi_call_lock(void);
extern int smp_num_siblings;
Index: linux/arch/x86_64/kernel/smpboot.c
===================================================================
--- linux.orig/arch/x86_64/kernel/smpboot.c
+++ linux/arch/x86_64/kernel/smpboot.c
@@ -1090,7 +1090,6 @@ void __init smp_prepare_cpus(unsigned in
/*
* Switch from PIC to APIC mode.
*/
- connect_bsp_APIC();
setup_local_APIC();
if (GET_APIC_ID(apic_read(APIC_ID)) != boot_cpu_id) {
^ permalink raw reply [flat|nested] 199+ messages in thread* Re: [PATCH for review] [54/145] x86_64: Remove obsolete PIC mode
2006-08-10 19:36 ` [PATCH for review] [54/145] x86_64: Remove obsolete PIC mode Andi Kleen
@ 2006-08-16 16:31 ` Len Brown
2006-08-16 16:35 ` Andi Kleen
0 siblings, 1 reply; 199+ messages in thread
From: Len Brown @ 2006-08-16 16:31 UTC (permalink / raw)
To: Andi Kleen, linux-acpi; +Cc: linux-kernel
On Thursday 10 August 2006 15:36, Andi Kleen wrote:
> PIC mode is an outdated way to drive the APICs that was used on
> some early MP boards. It is not supported in the ACPI model.
>
> It is unlikely to be ever configured by any x86-64 system
>
> Remove it thus.
Is there any reason we can't entirely remove MPS from x86_64?
(asside from the routines that ACPI uses)
-Len
> Signed-off-by: Andi Kleen <ak@suse.de>
>
> ---
> arch/x86_64/kernel/apic.c | 92 +++++++++++++------------------------------
> arch/x86_64/kernel/mpparse.c | 8 ---
> arch/x86_64/kernel/smpboot.c | 1
> include/asm-x86_64/mpspec.h | 1
> include/asm-x86_64/smp.h | 1
> 5 files changed, 29 insertions(+), 74 deletions(-)
>
> Index: linux/arch/x86_64/kernel/apic.c
> ===================================================================
> --- linux.orig/arch/x86_64/kernel/apic.c
> +++ linux/arch/x86_64/kernel/apic.c
> @@ -136,72 +136,40 @@ void clear_local_APIC(void)
> apic_read(APIC_ESR);
> }
>
> -void __init connect_bsp_APIC(void)
> -{
> - if (pic_mode) {
> - /*
> - * Do not trust the local APIC being empty at bootup.
> - */
> - clear_local_APIC();
> - /*
> - * PIC mode, enable APIC mode in the IMCR, i.e.
> - * connect BSP's local APIC to INT and NMI lines.
> - */
> - apic_printk(APIC_VERBOSE, "leaving PIC mode, enabling APIC mode.\n");
> - outb(0x70, 0x22);
> - outb(0x01, 0x23);
> - }
> -}
> -
> void disconnect_bsp_APIC(int virt_wire_setup)
> {
> - if (pic_mode) {
> - /*
> - * Put the board back into PIC mode (has an effect
> - * only on certain older boards). Note that APIC
> - * interrupts, including IPIs, won't work beyond
> - * this point! The only exception are INIT IPIs.
> - */
> - apic_printk(APIC_QUIET, "disabling APIC mode, entering PIC mode.\n");
> - outb(0x70, 0x22);
> - outb(0x00, 0x23);
> - }
> - else {
> - /* Go back to Virtual Wire compatibility mode */
> - unsigned long value;
> -
> - /* For the spurious interrupt use vector F, and enable it */
> - value = apic_read(APIC_SPIV);
> - value &= ~APIC_VECTOR_MASK;
> - value |= APIC_SPIV_APIC_ENABLED;
> - value |= 0xf;
> - apic_write(APIC_SPIV, value);
> -
> - if (!virt_wire_setup) {
> - /* For LVT0 make it edge triggered, active high, external and enabled */
> - value = apic_read(APIC_LVT0);
> - value &= ~(APIC_MODE_MASK | APIC_SEND_PENDING |
> - APIC_INPUT_POLARITY | APIC_LVT_REMOTE_IRR |
> - APIC_LVT_LEVEL_TRIGGER | APIC_LVT_MASKED );
> - value |= APIC_LVT_REMOTE_IRR | APIC_SEND_PENDING;
> - value = SET_APIC_DELIVERY_MODE(value, APIC_MODE_EXTINT);
> - apic_write(APIC_LVT0, value);
> - }
> - else {
> - /* Disable LVT0 */
> - apic_write(APIC_LVT0, APIC_LVT_MASKED);
> - }
> + /* Go back to Virtual Wire compatibility mode */
> + unsigned long value;
>
> - /* For LVT1 make it edge triggered, active high, nmi and enabled */
> - value = apic_read(APIC_LVT1);
> - value &= ~(
> - APIC_MODE_MASK | APIC_SEND_PENDING |
> + /* For the spurious interrupt use vector F, and enable it */
> + value = apic_read(APIC_SPIV);
> + value &= ~APIC_VECTOR_MASK;
> + value |= APIC_SPIV_APIC_ENABLED;
> + value |= 0xf;
> + apic_write(APIC_SPIV, value);
> +
> + if (!virt_wire_setup) {
> + /* For LVT0 make it edge triggered, active high, external and enabled */
> + value = apic_read(APIC_LVT0);
> + value &= ~(APIC_MODE_MASK | APIC_SEND_PENDING |
> APIC_INPUT_POLARITY | APIC_LVT_REMOTE_IRR |
> - APIC_LVT_LEVEL_TRIGGER | APIC_LVT_MASKED);
> + APIC_LVT_LEVEL_TRIGGER | APIC_LVT_MASKED );
> value |= APIC_LVT_REMOTE_IRR | APIC_SEND_PENDING;
> - value = SET_APIC_DELIVERY_MODE(value, APIC_MODE_NMI);
> - apic_write(APIC_LVT1, value);
> + value = SET_APIC_DELIVERY_MODE(value, APIC_MODE_EXTINT);
> + apic_write(APIC_LVT0, value);
> + } else {
> + /* Disable LVT0 */
> + apic_write(APIC_LVT0, APIC_LVT_MASKED);
> }
> +
> + /* For LVT1 make it edge triggered, active high, nmi and enabled */
> + value = apic_read(APIC_LVT1);
> + value &= ~(APIC_MODE_MASK | APIC_SEND_PENDING |
> + APIC_INPUT_POLARITY | APIC_LVT_REMOTE_IRR |
> + APIC_LVT_LEVEL_TRIGGER | APIC_LVT_MASKED);
> + value |= APIC_LVT_REMOTE_IRR | APIC_SEND_PENDING;
> + value = SET_APIC_DELIVERY_MODE(value, APIC_MODE_NMI);
> + apic_write(APIC_LVT1, value);
> }
>
> void disable_local_APIC(void)
> @@ -418,7 +386,7 @@ void __cpuinit setup_local_APIC (void)
> * TODO: set up through-local-APIC from through-I/O-APIC? --macro
> */
> value = apic_read(APIC_LVT0) & APIC_LVT_MASKED;
> - if (!smp_processor_id() && (pic_mode || !value)) {
> + if (!smp_processor_id() && !value) {
> value = APIC_DM_EXTINT;
> apic_printk(APIC_VERBOSE, "enabled ExtINT on CPU#%d\n", smp_processor_id());
> } else {
> @@ -1096,8 +1064,6 @@ int __init APIC_init_uniprocessor (void)
>
> verify_local_APIC();
>
> - connect_bsp_APIC();
> -
> phys_cpu_present_map = physid_mask_of_physid(boot_cpu_id);
> apic_write(APIC_ID, SET_APIC_ID(boot_cpu_id));
>
> Index: linux/arch/x86_64/kernel/mpparse.c
> ===================================================================
> --- linux.orig/arch/x86_64/kernel/mpparse.c
> +++ linux/arch/x86_64/kernel/mpparse.c
> @@ -56,7 +56,6 @@ struct mpc_config_intsrc mp_irqs[MAX_IRQ
> int mp_irq_entries;
>
> int nr_ioapics;
> -int pic_mode;
> unsigned long mp_lapic_addr = 0;
>
>
> @@ -514,13 +513,6 @@ void __init get_smp_config (void)
> printk(KERN_INFO "Using ACPI for processor (LAPIC) configuration information\n");
>
> printk("Intel MultiProcessor Specification v1.%d\n", mpf->mpf_specification);
> - if (mpf->mpf_feature2 & (1<<7)) {
> - printk(KERN_INFO " IMCR and PIC compatibility mode.\n");
> - pic_mode = 1;
> - } else {
> - printk(KERN_INFO " Virtual Wire compatibility mode.\n");
> - pic_mode = 0;
> - }
>
> /*
> * Now see if we need to read further.
> Index: linux/include/asm-x86_64/mpspec.h
> ===================================================================
> --- linux.orig/include/asm-x86_64/mpspec.h
> +++ linux/include/asm-x86_64/mpspec.h
> @@ -178,7 +178,6 @@ extern int mp_irq_entries;
> extern struct mpc_config_intsrc mp_irqs [MAX_IRQ_SOURCES];
> extern int mpc_default_type;
> extern unsigned long mp_lapic_addr;
> -extern int pic_mode;
>
> #ifdef CONFIG_ACPI
> extern void mp_register_lapic (u8 id, u8 enabled);
> Index: linux/include/asm-x86_64/smp.h
> ===================================================================
> --- linux.orig/include/asm-x86_64/smp.h
> +++ linux/include/asm-x86_64/smp.h
> @@ -33,7 +33,6 @@ extern cpumask_t cpu_initialized;
>
> extern void smp_alloc_memory(void);
> extern volatile unsigned long smp_invalidate_needed;
> -extern int pic_mode;
> extern void lock_ipi_call_lock(void);
> extern void unlock_ipi_call_lock(void);
> extern int smp_num_siblings;
> Index: linux/arch/x86_64/kernel/smpboot.c
> ===================================================================
> --- linux.orig/arch/x86_64/kernel/smpboot.c
> +++ linux/arch/x86_64/kernel/smpboot.c
> @@ -1090,7 +1090,6 @@ void __init smp_prepare_cpus(unsigned in
> /*
> * Switch from PIC to APIC mode.
> */
> - connect_bsp_APIC();
> setup_local_APIC();
>
> if (GET_APIC_ID(apic_read(APIC_ID)) != boot_cpu_id) {
> -
> To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at http://vger.kernel.org/majordomo-info.html
> Please read the FAQ at http://www.tux.org/lkml/
>
^ permalink raw reply [flat|nested] 199+ messages in thread* Re: [PATCH for review] [54/145] x86_64: Remove obsolete PIC mode
2006-08-16 16:31 ` Len Brown
@ 2006-08-16 16:35 ` Andi Kleen
0 siblings, 0 replies; 199+ messages in thread
From: Andi Kleen @ 2006-08-16 16:35 UTC (permalink / raw)
To: Len Brown; +Cc: Len Brown, linux-acpi, linux-kernel
On Wed, 16 Aug 2006 12:31:49 -0400
Len Brown <len.brown@intel.com> wrote:
> On Thursday 10 August 2006 15:36, Andi Kleen wrote:
>
>
> > PIC mode is an outdated way to drive the APICs that was used on
> > some early MP boards. It is not supported in the ACPI model.
> >
> > It is unlikely to be ever configured by any x86-64 system
> >
> > Remove it thus.
>
> Is there any reason we can't entirely remove MPS from x86_64?
> (asside from the routines that ACPI uses)
There are still people who like to compile with CONFIG_ACPI=n or use
acpi=off
I wouldn't have a problem with disallowing CONFIG_ACPI=n, but acpi=off
still needs to work.
A lot of the newer systems don't have mptables anymore, but there are
still a lot around who do.
-Andi
^ permalink raw reply [flat|nested] 199+ messages in thread
* [PATCH for review] [55/145] i386/x86-64: Remove obsolete sanity check in mptable parsing
[not found] <20060810 935.775038000@suse.de>
` (53 preceding siblings ...)
2006-08-10 19:36 ` [PATCH for review] [54/145] x86_64: Remove obsolete PIC mode Andi Kleen
@ 2006-08-10 19:36 ` Andi Kleen
2006-08-10 19:36 ` [PATCH for review] [56/145] x86_64: Factor out common io apic routing entry access Andi Kleen
` (90 subsequent siblings)
145 siblings, 0 replies; 199+ messages in thread
From: Andi Kleen @ 2006-08-10 19:36 UTC (permalink / raw)
r
It apparently has never triggered in many years.
Signed-off-by: Andi Kleen <ak@suse.de>
---
arch/i386/kernel/mpparse.c | 13 -------------
arch/x86_64/kernel/mpparse.c | 13 -------------
2 files changed, 26 deletions(-)
Index: linux/arch/x86_64/kernel/mpparse.c
===================================================================
--- linux.orig/arch/x86_64/kernel/mpparse.c
+++ linux/arch/x86_64/kernel/mpparse.c
@@ -223,19 +223,6 @@ static void __init MP_lintsrc_info (stru
m->mpc_irqtype, m->mpc_irqflag & 3,
(m->mpc_irqflag >> 2) &3, m->mpc_srcbusid,
m->mpc_srcbusirq, m->mpc_destapic, m->mpc_destapiclint);
- /*
- * Well it seems all SMP boards in existence
- * use ExtINT/LVT1 == LINT0 and
- * NMI/LVT2 == LINT1 - the following check
- * will show us if this assumptions is false.
- * Until then we do not have to add baggage.
- */
- if ((m->mpc_irqtype == mp_ExtINT) &&
- (m->mpc_destapiclint != 0))
- BUG();
- if ((m->mpc_irqtype == mp_NMI) &&
- (m->mpc_destapiclint != 1))
- BUG();
}
/*
Index: linux/arch/i386/kernel/mpparse.c
===================================================================
--- linux.orig/arch/i386/kernel/mpparse.c
+++ linux/arch/i386/kernel/mpparse.c
@@ -294,19 +294,6 @@ static void __init MP_lintsrc_info (stru
m->mpc_irqtype, m->mpc_irqflag & 3,
(m->mpc_irqflag >> 2) &3, m->mpc_srcbusid,
m->mpc_srcbusirq, m->mpc_destapic, m->mpc_destapiclint);
- /*
- * Well it seems all SMP boards in existence
- * use ExtINT/LVT1 == LINT0 and
- * NMI/LVT2 == LINT1 - the following check
- * will show us if this assumptions is false.
- * Until then we do not have to add baggage.
- */
- if ((m->mpc_irqtype == mp_ExtINT) &&
- (m->mpc_destapiclint != 0))
- BUG();
- if ((m->mpc_irqtype == mp_NMI) &&
- (m->mpc_destapiclint != 1))
- BUG();
}
#ifdef CONFIG_X86_NUMAQ
^ permalink raw reply [flat|nested] 199+ messages in thread* [PATCH for review] [56/145] x86_64: Factor out common io apic routing entry access
[not found] <20060810 935.775038000@suse.de>
` (54 preceding siblings ...)
2006-08-10 19:36 ` [PATCH for review] [55/145] i386/x86-64: Remove obsolete sanity check in mptable parsing Andi Kleen
@ 2006-08-10 19:36 ` Andi Kleen
2006-08-10 19:36 ` [PATCH for review] [57/145] i386: " Andi Kleen
` (89 subsequent siblings)
145 siblings, 0 replies; 199+ messages in thread
From: Andi Kleen @ 2006-08-10 19:36 UTC (permalink / raw)
r
The IO APIC code had lots of duplicated code to read/write 64bit
routing entries into the IO-APIC. Factor this out int common read/write
functions
In a few cases the IO APIC lock is taken more often now, but this
isn't a problem because it's all initialization/shutdown only
slow path code.
Signed-off-by: Andi Kleen <ak@suse.de>
---
arch/x86_64/kernel/io_apic.c | 82 +++++++++++++++++++++----------------------
1 files changed, 41 insertions(+), 41 deletions(-)
Index: linux/arch/x86_64/kernel/io_apic.c
===================================================================
--- linux.orig/arch/x86_64/kernel/io_apic.c
+++ linux/arch/x86_64/kernel/io_apic.c
@@ -111,6 +111,33 @@ int vector_irq[NR_VECTORS] __read_mostly
FINAL; \
}
+union entry_union {
+ struct { u32 w1, w2; };
+ struct IO_APIC_route_entry entry;
+};
+
+static struct IO_APIC_route_entry ioapic_read_entry(int apic, int pin)
+{
+ union entry_union eu;
+ unsigned long flags;
+ spin_lock_irqsave(&ioapic_lock, flags);
+ eu.w1 = io_apic_read(apic, 0x10 + 2 * pin);
+ eu.w2 = io_apic_read(apic, 0x11 + 2 * pin);
+ spin_unlock_irqrestore(&ioapic_lock, flags);
+ return eu.entry;
+}
+
+static void ioapic_write_entry(int apic, int pin, struct IO_APIC_route_entry e)
+{
+ unsigned long flags;
+ union entry_union eu;
+ eu.entry = e;
+ spin_lock_irqsave(&ioapic_lock, flags);
+ io_apic_write(apic, 0x10 + 2*pin, eu.w1);
+ io_apic_write(apic, 0x11 + 2*pin, eu.w2);
+ spin_unlock_irqrestore(&ioapic_lock, flags);
+}
+
#ifdef CONFIG_SMP
static void set_ioapic_affinity_irq(unsigned int irq, cpumask_t mask)
{
@@ -196,13 +223,9 @@ static void unmask_IO_APIC_irq (unsigned
static void clear_IO_APIC_pin(unsigned int apic, unsigned int pin)
{
struct IO_APIC_route_entry entry;
- unsigned long flags;
/* Check delivery_mode to be sure we're not clearing an SMI pin */
- spin_lock_irqsave(&ioapic_lock, flags);
- *(((int*)&entry) + 0) = io_apic_read(apic, 0x10 + 2 * pin);
- *(((int*)&entry) + 1) = io_apic_read(apic, 0x11 + 2 * pin);
- spin_unlock_irqrestore(&ioapic_lock, flags);
+ entry = ioapic_read_entry(apic, pin);
if (entry.delivery_mode == dest_SMI)
return;
/*
@@ -210,10 +233,7 @@ static void clear_IO_APIC_pin(unsigned i
*/
memset(&entry, 0, sizeof(entry));
entry.mask = 1;
- spin_lock_irqsave(&ioapic_lock, flags);
- io_apic_write(apic, 0x10 + 2 * pin, *(((int *)&entry) + 0));
- io_apic_write(apic, 0x11 + 2 * pin, *(((int *)&entry) + 1));
- spin_unlock_irqrestore(&ioapic_lock, flags);
+ ioapic_write_entry(apic, pin, entry);
}
static void clear_IO_APIC (void)
@@ -838,9 +858,9 @@ static void __init setup_IO_APIC_irqs(vo
if (!apic && (irq < 16))
disable_8259A_irq(irq);
}
+ ioapic_write_entry(apic, pin, entry);
+
spin_lock_irqsave(&ioapic_lock, flags);
- io_apic_write(apic, 0x11+2*pin, *(((int *)&entry)+1));
- io_apic_write(apic, 0x10+2*pin, *(((int *)&entry)+0));
set_native_irq_info(irq, TARGET_CPUS);
spin_unlock_irqrestore(&ioapic_lock, flags);
}
@@ -978,10 +998,7 @@ void __apicdebuginit print_IO_APIC(void)
for (i = 0; i <= reg_01.bits.entries; i++) {
struct IO_APIC_route_entry entry;
- spin_lock_irqsave(&ioapic_lock, flags);
- *(((int *)&entry)+0) = io_apic_read(apic, 0x10+i*2);
- *(((int *)&entry)+1) = io_apic_read(apic, 0x11+i*2);
- spin_unlock_irqrestore(&ioapic_lock, flags);
+ entry = ioapic_read_entry(apic, i);
printk(KERN_DEBUG " %02x %03X %02X ",
i,
@@ -1191,11 +1208,7 @@ static void __init enable_IO_APIC(void)
/* See if any of the pins is in ExtINT mode */
for (pin = 0; pin < nr_ioapic_registers[apic]; pin++) {
struct IO_APIC_route_entry entry;
- spin_lock_irqsave(&ioapic_lock, flags);
- *(((int *)&entry) + 0) = io_apic_read(apic, 0x10 + 2 * pin);
- *(((int *)&entry) + 1) = io_apic_read(apic, 0x11 + 2 * pin);
- spin_unlock_irqrestore(&ioapic_lock, flags);
-
+ entry = ioapic_read_entry(apic, pin);
/* If the interrupt line is enabled and in ExtInt mode
* I have found the pin where the i8259 is connected.
@@ -1247,7 +1260,6 @@ void disable_IO_APIC(void)
*/
if (ioapic_i8259.pin != -1) {
struct IO_APIC_route_entry entry;
- unsigned long flags;
memset(&entry, 0, sizeof(entry));
entry.mask = 0; /* Enabled */
@@ -1264,12 +1276,7 @@ void disable_IO_APIC(void)
/*
* Add it to the IO-APIC irq-routing table:
*/
- spin_lock_irqsave(&ioapic_lock, flags);
- io_apic_write(ioapic_i8259.apic, 0x11+2*ioapic_i8259.pin,
- *(((int *)&entry)+1));
- io_apic_write(ioapic_i8259.apic, 0x10+2*ioapic_i8259.pin,
- *(((int *)&entry)+0));
- spin_unlock_irqrestore(&ioapic_lock, flags);
+ ioapic_write_entry(ioapic_i8259.apic, ioapic_i8259.pin, entry);
}
disconnect_bsp_APIC(ioapic_i8259.pin != -1);
@@ -1879,17 +1886,12 @@ static int ioapic_suspend(struct sys_dev
{
struct IO_APIC_route_entry *entry;
struct sysfs_ioapic_data *data;
- unsigned long flags;
int i;
data = container_of(dev, struct sysfs_ioapic_data, dev);
entry = data->entry;
- spin_lock_irqsave(&ioapic_lock, flags);
- for (i = 0; i < nr_ioapic_registers[dev->id]; i ++, entry ++ ) {
- *(((int *)entry) + 1) = io_apic_read(dev->id, 0x11 + 2 * i);
- *(((int *)entry) + 0) = io_apic_read(dev->id, 0x10 + 2 * i);
- }
- spin_unlock_irqrestore(&ioapic_lock, flags);
+ for (i = 0; i < nr_ioapic_registers[dev->id]; i ++, entry ++ )
+ *entry = ioapic_read_entry(dev->id, i);
return 0;
}
@@ -1911,11 +1913,9 @@ static int ioapic_resume(struct sys_devi
reg_00.bits.ID = mp_ioapics[dev->id].mpc_apicid;
io_apic_write(dev->id, 0, reg_00.raw);
}
- for (i = 0; i < nr_ioapic_registers[dev->id]; i ++, entry ++ ) {
- io_apic_write(dev->id, 0x11+2*i, *(((int *)entry)+1));
- io_apic_write(dev->id, 0x10+2*i, *(((int *)entry)+0));
- }
spin_unlock_irqrestore(&ioapic_lock, flags);
+ for (i = 0; i < nr_ioapic_registers[dev->id]; i++)
+ ioapic_write_entry(dev->id, i, entry[i]);
return 0;
}
@@ -2040,10 +2040,10 @@ int io_apic_set_pci_routing (int ioapic,
if (!ioapic && (irq < 16))
disable_8259A_irq(irq);
+ ioapic_write_entry(ioapic, pin, entry);
+
spin_lock_irqsave(&ioapic_lock, flags);
- io_apic_write(ioapic, 0x11+2*pin, *(((int *)&entry)+1));
- io_apic_write(ioapic, 0x10+2*pin, *(((int *)&entry)+0));
- set_native_irq_info(use_pci_vector() ? entry.vector : irq, TARGET_CPUS);
+ set_native_irq_info(use_pci_vector() ? entry.vector : irq, TARGET_CPUS);
spin_unlock_irqrestore(&ioapic_lock, flags);
return 0;
^ permalink raw reply [flat|nested] 199+ messages in thread* [PATCH for review] [57/145] i386: Factor out common io apic routing entry access
[not found] <20060810 935.775038000@suse.de>
` (55 preceding siblings ...)
2006-08-10 19:36 ` [PATCH for review] [56/145] x86_64: Factor out common io apic routing entry access Andi Kleen
@ 2006-08-10 19:36 ` Andi Kleen
2006-08-10 19:36 ` [PATCH for review] [58/145] x86_64: AUX_DEVICE_INFO is one byte long, use 'movb' Andi Kleen
` (88 subsequent siblings)
145 siblings, 0 replies; 199+ messages in thread
From: Andi Kleen @ 2006-08-10 19:36 UTC (permalink / raw)
r
The IO APIC code had lots of duplicated code to read/write 64bit
routing entries into the IO-APIC. Factor this out int common read/write
functions
In a few cases the IO APIC lock is taken more often now, but this
isn't a problem because it's all initialization/shutdown only
slow path code.
Similar to earlier x86-64 patch.
Includes a fix by Jiri Slaby for a mistake that broke resume
Signed-off-by: Andi Kleen <ak@suse.de>
---
arch/i386/kernel/io_apic.c | 100 +++++++++++++++++++--------------------------
1 files changed, 43 insertions(+), 57 deletions(-)
Index: linux/arch/i386/kernel/io_apic.c
===================================================================
--- linux.orig/arch/i386/kernel/io_apic.c
+++ linux/arch/i386/kernel/io_apic.c
@@ -94,6 +94,34 @@ int vector_irq[NR_VECTORS] __read_mostly
#define vector_to_irq(vector) (vector)
#endif
+
+union entry_union {
+ struct { u32 w1, w2; };
+ struct IO_APIC_route_entry entry;
+};
+
+static struct IO_APIC_route_entry ioapic_read_entry(int apic, int pin)
+{
+ union entry_union eu;
+ unsigned long flags;
+ spin_lock_irqsave(&ioapic_lock, flags);
+ eu.w1 = io_apic_read(apic, 0x10 + 2 * pin);
+ eu.w2 = io_apic_read(apic, 0x11 + 2 * pin);
+ spin_unlock_irqrestore(&ioapic_lock, flags);
+ return eu.entry;
+}
+
+static void ioapic_write_entry(int apic, int pin, struct IO_APIC_route_entry e)
+{
+ unsigned long flags;
+ union entry_union eu;
+ eu.entry = e;
+ spin_lock_irqsave(&ioapic_lock, flags);
+ io_apic_write(apic, 0x10 + 2*pin, eu.w1);
+ io_apic_write(apic, 0x11 + 2*pin, eu.w2);
+ spin_unlock_irqrestore(&ioapic_lock, flags);
+}
+
/*
* The common case is 1:1 IRQ<->pin mappings. Sometimes there are
* shared ISA-space IRQs, so we have to support them. We are super
@@ -201,13 +229,9 @@ static void unmask_IO_APIC_irq (unsigned
static void clear_IO_APIC_pin(unsigned int apic, unsigned int pin)
{
struct IO_APIC_route_entry entry;
- unsigned long flags;
/* Check delivery_mode to be sure we're not clearing an SMI pin */
- spin_lock_irqsave(&ioapic_lock, flags);
- *(((int*)&entry) + 0) = io_apic_read(apic, 0x10 + 2 * pin);
- *(((int*)&entry) + 1) = io_apic_read(apic, 0x11 + 2 * pin);
- spin_unlock_irqrestore(&ioapic_lock, flags);
+ entry = ioapic_read_entry(apic, pin);
if (entry.delivery_mode == dest_SMI)
return;
@@ -216,10 +240,7 @@ static void clear_IO_APIC_pin(unsigned i
*/
memset(&entry, 0, sizeof(entry));
entry.mask = 1;
- spin_lock_irqsave(&ioapic_lock, flags);
- io_apic_write(apic, 0x10 + 2 * pin, *(((int *)&entry) + 0));
- io_apic_write(apic, 0x11 + 2 * pin, *(((int *)&entry) + 1));
- spin_unlock_irqrestore(&ioapic_lock, flags);
+ ioapic_write_entry(apic, pin, entry);
}
static void clear_IO_APIC (void)
@@ -1284,9 +1305,8 @@ static void __init setup_IO_APIC_irqs(vo
if (!apic && (irq < 16))
disable_8259A_irq(irq);
}
+ ioapic_write_entry(apic, pin, entry);
spin_lock_irqsave(&ioapic_lock, flags);
- io_apic_write(apic, 0x11+2*pin, *(((int *)&entry)+1));
- io_apic_write(apic, 0x10+2*pin, *(((int *)&entry)+0));
set_native_irq_info(irq, TARGET_CPUS);
spin_unlock_irqrestore(&ioapic_lock, flags);
}
@@ -1302,7 +1322,6 @@ static void __init setup_IO_APIC_irqs(vo
static void __init setup_ExtINT_IRQ0_pin(unsigned int apic, unsigned int pin, int vector)
{
struct IO_APIC_route_entry entry;
- unsigned long flags;
memset(&entry,0,sizeof(entry));
@@ -1332,10 +1351,7 @@ static void __init setup_ExtINT_IRQ0_pin
/*
* Add it to the IO-APIC irq-routing table:
*/
- spin_lock_irqsave(&ioapic_lock, flags);
- io_apic_write(apic, 0x11+2*pin, *(((int *)&entry)+1));
- io_apic_write(apic, 0x10+2*pin, *(((int *)&entry)+0));
- spin_unlock_irqrestore(&ioapic_lock, flags);
+ ioapic_write_entry(apic, pin, entry);
enable_8259A_irq(0);
}
@@ -1445,10 +1461,7 @@ void __init print_IO_APIC(void)
for (i = 0; i <= reg_01.bits.entries; i++) {
struct IO_APIC_route_entry entry;
- spin_lock_irqsave(&ioapic_lock, flags);
- *(((int *)&entry)+0) = io_apic_read(apic, 0x10+i*2);
- *(((int *)&entry)+1) = io_apic_read(apic, 0x11+i*2);
- spin_unlock_irqrestore(&ioapic_lock, flags);
+ entry = ioapic_read_entry(apic, i);
printk(KERN_DEBUG " %02x %03X %02X ",
i,
@@ -1667,10 +1680,7 @@ static void __init enable_IO_APIC(void)
/* See if any of the pins is in ExtINT mode */
for (pin = 0; pin < nr_ioapic_registers[apic]; pin++) {
struct IO_APIC_route_entry entry;
- spin_lock_irqsave(&ioapic_lock, flags);
- *(((int *)&entry) + 0) = io_apic_read(apic, 0x10 + 2 * pin);
- *(((int *)&entry) + 1) = io_apic_read(apic, 0x11 + 2 * pin);
- spin_unlock_irqrestore(&ioapic_lock, flags);
+ entry = ioapic_read_entry(apic, pin);
/* If the interrupt line is enabled and in ExtInt mode
@@ -1727,7 +1737,6 @@ void disable_IO_APIC(void)
*/
if (ioapic_i8259.pin != -1) {
struct IO_APIC_route_entry entry;
- unsigned long flags;
memset(&entry, 0, sizeof(entry));
entry.mask = 0; /* Enabled */
@@ -1744,12 +1753,7 @@ void disable_IO_APIC(void)
/*
* Add it to the IO-APIC irq-routing table:
*/
- spin_lock_irqsave(&ioapic_lock, flags);
- io_apic_write(ioapic_i8259.apic, 0x11+2*ioapic_i8259.pin,
- *(((int *)&entry)+1));
- io_apic_write(ioapic_i8259.apic, 0x10+2*ioapic_i8259.pin,
- *(((int *)&entry)+0));
- spin_unlock_irqrestore(&ioapic_lock, flags);
+ ioapic_write_entry(ioapic_i8259.apic, ioapic_i8259.pin, entry);
}
disconnect_bsp_APIC(ioapic_i8259.pin != -1);
}
@@ -2214,17 +2218,13 @@ static inline void unlock_ExtINT_logic(v
int apic, pin, i;
struct IO_APIC_route_entry entry0, entry1;
unsigned char save_control, save_freq_select;
- unsigned long flags;
pin = find_isa_irq_pin(8, mp_INT);
apic = find_isa_irq_apic(8, mp_INT);
if (pin == -1)
return;
- spin_lock_irqsave(&ioapic_lock, flags);
- *(((int *)&entry0) + 1) = io_apic_read(apic, 0x11 + 2 * pin);
- *(((int *)&entry0) + 0) = io_apic_read(apic, 0x10 + 2 * pin);
- spin_unlock_irqrestore(&ioapic_lock, flags);
+ entry0 = ioapic_read_entry(apic, pin);
clear_IO_APIC_pin(apic, pin);
memset(&entry1, 0, sizeof(entry1));
@@ -2237,10 +2237,7 @@ static inline void unlock_ExtINT_logic(v
entry1.trigger = 0;
entry1.vector = 0;
- spin_lock_irqsave(&ioapic_lock, flags);
- io_apic_write(apic, 0x11 + 2 * pin, *(((int *)&entry1) + 1));
- io_apic_write(apic, 0x10 + 2 * pin, *(((int *)&entry1) + 0));
- spin_unlock_irqrestore(&ioapic_lock, flags);
+ ioapic_write_entry(apic, pin, entry1);
save_control = CMOS_READ(RTC_CONTROL);
save_freq_select = CMOS_READ(RTC_FREQ_SELECT);
@@ -2259,10 +2256,7 @@ static inline void unlock_ExtINT_logic(v
CMOS_WRITE(save_freq_select, RTC_FREQ_SELECT);
clear_IO_APIC_pin(apic, pin);
- spin_lock_irqsave(&ioapic_lock, flags);
- io_apic_write(apic, 0x11 + 2 * pin, *(((int *)&entry0) + 1));
- io_apic_write(apic, 0x10 + 2 * pin, *(((int *)&entry0) + 0));
- spin_unlock_irqrestore(&ioapic_lock, flags);
+ ioapic_write_entry(apic, pin, entry0);
}
int timer_uses_ioapic_pin_0;
@@ -2462,17 +2456,12 @@ static int ioapic_suspend(struct sys_dev
{
struct IO_APIC_route_entry *entry;
struct sysfs_ioapic_data *data;
- unsigned long flags;
int i;
data = container_of(dev, struct sysfs_ioapic_data, dev);
entry = data->entry;
- spin_lock_irqsave(&ioapic_lock, flags);
- for (i = 0; i < nr_ioapic_registers[dev->id]; i ++, entry ++ ) {
- *(((int *)entry) + 1) = io_apic_read(dev->id, 0x11 + 2 * i);
- *(((int *)entry) + 0) = io_apic_read(dev->id, 0x10 + 2 * i);
- }
- spin_unlock_irqrestore(&ioapic_lock, flags);
+ for (i = 0; i < nr_ioapic_registers[dev->id]; i ++)
+ entry[i] = ioapic_read_entry(dev->id, i);
return 0;
}
@@ -2494,11 +2483,9 @@ static int ioapic_resume(struct sys_devi
reg_00.bits.ID = mp_ioapics[dev->id].mpc_apicid;
io_apic_write(dev->id, 0, reg_00.raw);
}
- for (i = 0; i < nr_ioapic_registers[dev->id]; i ++, entry ++ ) {
- io_apic_write(dev->id, 0x11+2*i, *(((int *)entry)+1));
- io_apic_write(dev->id, 0x10+2*i, *(((int *)entry)+0));
- }
spin_unlock_irqrestore(&ioapic_lock, flags);
+ for (i = 0; i < nr_ioapic_registers[dev->id]; i ++)
+ ioapic_write_entry(dev->id, i, entry[i]);
return 0;
}
@@ -2695,9 +2682,8 @@ int io_apic_set_pci_routing (int ioapic,
if (!ioapic && (irq < 16))
disable_8259A_irq(irq);
+ ioapic_write_entry(ioapic, pin, entry);
spin_lock_irqsave(&ioapic_lock, flags);
- io_apic_write(ioapic, 0x11+2*pin, *(((int *)&entry)+1));
- io_apic_write(ioapic, 0x10+2*pin, *(((int *)&entry)+0));
set_native_irq_info(use_pci_vector() ? entry.vector : irq, TARGET_CPUS);
spin_unlock_irqrestore(&ioapic_lock, flags);
^ permalink raw reply [flat|nested] 199+ messages in thread* [PATCH for review] [58/145] x86_64: AUX_DEVICE_INFO is one byte long, use 'movb'
[not found] <20060810 935.775038000@suse.de>
` (56 preceding siblings ...)
2006-08-10 19:36 ` [PATCH for review] [57/145] i386: " Andi Kleen
@ 2006-08-10 19:36 ` Andi Kleen
2006-08-10 19:36 ` [PATCH for review] [59/145] x86_64: Remove MPS table APIC renumbering Andi Kleen
` (87 subsequent siblings)
145 siblings, 0 replies; 199+ messages in thread
From: Andi Kleen @ 2006-08-10 19:36 UTC (permalink / raw)
r
From: Diego Calleja <diegocg@gmail.com>
Bugzilla #6552 says:
"In arch/i386/boot/setup.S, movw is used instead of movb for PS/2 mouse
information, although it is unsigned char. This does not harm, because
the jmp instruction overwritten by movw is used before executing movw,
and never be used again"
I've no idea if this is a real bug or how it gets fixed, so I'm submitting
it for review instead of letting it die of boredom in bugzilla. Aditionally
to i386, I've changed x86-64, which mirrors the same code.
Credits to Yoshinori K. Okuji, who found the problem and suggested a fix.
Signed-off-by: Diego Calleja <diegocg@gmail.com>
Signed-off-by: Andi Kleen <ak@suse.de>
---
arch/i386/boot/setup.S | 4 ++--
arch/x86_64/boot/setup.S | 4 ++--
2 files changed, 4 insertions(+), 4 deletions(-)
Index: linux/arch/i386/boot/setup.S
===================================================================
--- linux.orig/arch/i386/boot/setup.S
+++ linux/arch/i386/boot/setup.S
@@ -494,12 +494,12 @@ no_voyager:
movw %cs, %ax # aka SETUPSEG
subw $DELTA_INITSEG, %ax # aka INITSEG
movw %ax, %ds
- movw $0, (0x1ff) # default is no pointing device
+ movb $0, (0x1ff) # default is no pointing device
int $0x11 # int 0x11: equipment list
testb $0x04, %al # check if mouse installed
jz no_psmouse
- movw $0xAA, (0x1ff) # device present
+ movb $0xAA, (0x1ff) # device present
no_psmouse:
#if defined(CONFIG_X86_SPEEDSTEP_SMI) || defined(CONFIG_X86_SPEEDSTEP_SMI_MODULE)
Index: linux/arch/x86_64/boot/setup.S
===================================================================
--- linux.orig/arch/x86_64/boot/setup.S
+++ linux/arch/x86_64/boot/setup.S
@@ -526,12 +526,12 @@ is_disk1:
movw %cs, %ax # aka SETUPSEG
subw $DELTA_INITSEG, %ax # aka INITSEG
movw %ax, %ds
- movw $0, (0x1ff) # default is no pointing device
+ movb $0, (0x1ff) # default is no pointing device
int $0x11 # int 0x11: equipment list
testb $0x04, %al # check if mouse installed
jz no_psmouse
- movw $0xAA, (0x1ff) # device present
+ movb $0xAA, (0x1ff) # device present
no_psmouse:
#include "../../i386/boot/edd.S"
^ permalink raw reply [flat|nested] 199+ messages in thread* [PATCH for review] [59/145] x86_64: Remove MPS table APIC renumbering
[not found] <20060810 935.775038000@suse.de>
` (57 preceding siblings ...)
2006-08-10 19:36 ` [PATCH for review] [58/145] x86_64: AUX_DEVICE_INFO is one byte long, use 'movb' Andi Kleen
@ 2006-08-10 19:36 ` Andi Kleen
2006-08-10 19:36 ` [PATCH for review] [60/145] x86_64: Move early chipset quirks out to new file Andi Kleen
` (86 subsequent siblings)
145 siblings, 0 replies; 199+ messages in thread
From: Andi Kleen @ 2006-08-10 19:36 UTC (permalink / raw)
r
The MPS table specification says that the operating system should
renumber the IO-APICs following the table as needed. However in
ACPI this is not allowed or neeeded and all x86-64 systems are ACPI
compliant.
The code was already disabled on some systems because it caused
problems there. Remove it completely now.
CC: mdomsch@dell.com
Signed-off-by: Andi Kleen <ak@suse.de>
---
arch/x86_64/kernel/io_apic.c | 71 -------------------------------------------
1 files changed, 71 deletions(-)
Index: linux/arch/x86_64/kernel/io_apic.c
===================================================================
--- linux.orig/arch/x86_64/kernel/io_apic.c
+++ linux/arch/x86_64/kernel/io_apic.c
@@ -1283,72 +1283,6 @@ void disable_IO_APIC(void)
}
/*
- * function to set the IO-APIC physical IDs based on the
- * values stored in the MPC table.
- *
- * by Matt Domsch <Matt_Domsch@dell.com> Tue Dec 21 12:25:05 CST 1999
- */
-
-static void __init setup_ioapic_ids_from_mpc (void)
-{
- union IO_APIC_reg_00 reg_00;
- int apic;
- int i;
- unsigned char old_id;
- unsigned long flags;
-
- /*
- * Set the IOAPIC ID to the value stored in the MPC table.
- */
- for (apic = 0; apic < nr_ioapics; apic++) {
-
- /* Read the register 0 value */
- spin_lock_irqsave(&ioapic_lock, flags);
- reg_00.raw = io_apic_read(apic, 0);
- spin_unlock_irqrestore(&ioapic_lock, flags);
-
- old_id = mp_ioapics[apic].mpc_apicid;
-
-
- printk(KERN_INFO "Using IO-APIC %d\n", mp_ioapics[apic].mpc_apicid);
-
-
- /*
- * We need to adjust the IRQ routing table
- * if the ID changed.
- */
- if (old_id != mp_ioapics[apic].mpc_apicid)
- for (i = 0; i < mp_irq_entries; i++)
- if (mp_irqs[i].mpc_dstapic == old_id)
- mp_irqs[i].mpc_dstapic
- = mp_ioapics[apic].mpc_apicid;
-
- /*
- * Read the right value from the MPC table and
- * write it into the ID register.
- */
- apic_printk(APIC_VERBOSE,KERN_INFO "...changing IO-APIC physical APIC ID to %d ...",
- mp_ioapics[apic].mpc_apicid);
-
- reg_00.bits.ID = mp_ioapics[apic].mpc_apicid;
- spin_lock_irqsave(&ioapic_lock, flags);
- io_apic_write(apic, 0, reg_00.raw);
- spin_unlock_irqrestore(&ioapic_lock, flags);
-
- /*
- * Sanity check
- */
- spin_lock_irqsave(&ioapic_lock, flags);
- reg_00.raw = io_apic_read(apic, 0);
- spin_unlock_irqrestore(&ioapic_lock, flags);
- if (reg_00.bits.ID != mp_ioapics[apic].mpc_apicid)
- printk("could not set ID!\n");
- else
- apic_printk(APIC_VERBOSE," ok.\n");
- }
-}
-
-/*
* There is a nasty bug in some older SMP boards, their mptable lies
* about the timer IRQ. We do the following to work around the situation:
*
@@ -1863,11 +1797,6 @@ void __init setup_IO_APIC(void)
apic_printk(APIC_VERBOSE, "ENABLING IO-APIC IRQs\n");
- /*
- * Set up the IO-APIC IRQ routing table.
- */
- if (!acpi_ioapic)
- setup_ioapic_ids_from_mpc();
sync_Arb_IDs();
setup_IO_APIC_irqs();
init_IO_APIC_traps();
^ permalink raw reply [flat|nested] 199+ messages in thread* [PATCH for review] [60/145] x86_64: Move early chipset quirks out to new file
[not found] <20060810 935.775038000@suse.de>
` (58 preceding siblings ...)
2006-08-10 19:36 ` [PATCH for review] [59/145] x86_64: Remove MPS table APIC renumbering Andi Kleen
@ 2006-08-10 19:36 ` Andi Kleen
2006-08-16 15:36 ` Len Brown
2006-08-10 19:36 ` [PATCH for review] [61/145] x86_64: Replace mp bus array with bitmap for bus not pci Andi Kleen
` (85 subsequent siblings)
145 siblings, 1 reply; 199+ messages in thread
From: Andi Kleen @ 2006-08-10 19:36 UTC (permalink / raw)
r
They did not really belong into io_apic.c. Move them into a new file
and clean it up a bit.
Also remove outdated ATI quirk that was obsolete,
Signed-off-by: Andi Kleen <ak@suse.de>
---
arch/x86_64/kernel/Makefile | 2
arch/x86_64/kernel/early-quirks.c | 118 ++++++++++++++++++++++++++++++++++++++
arch/x86_64/kernel/io_apic.c | 101 --------------------------------
arch/x86_64/kernel/setup.c | 2
include/asm-x86_64/proto.h | 2
5 files changed, 121 insertions(+), 104 deletions(-)
Index: linux/arch/x86_64/kernel/Makefile
===================================================================
--- linux.orig/arch/x86_64/kernel/Makefile
+++ linux/arch/x86_64/kernel/Makefile
@@ -8,7 +8,7 @@ obj-y := process.o signal.o entry.o trap
ptrace.o time.o ioport.o ldt.o setup.o i8259.o sys_x86_64.o \
x8664_ksyms.o i387.o syscall.o vsyscall.o \
setup64.o bootflag.o e820.o reboot.o quirks.o i8237.o \
- pci-dma.o pci-nommu.o alternative.o
+ pci-dma.o pci-nommu.o alternative.o early-quirks.o
obj-$(CONFIG_STACKTRACE) += stacktrace.o
obj-$(CONFIG_X86_MCE) += mce.o
Index: linux/arch/x86_64/kernel/io_apic.c
===================================================================
--- linux.orig/arch/x86_64/kernel/io_apic.c
+++ linux/arch/x86_64/kernel/io_apic.c
@@ -280,107 +280,6 @@ static int __init setup_enable_8254_time
__setup("disable_8254_timer", setup_disable_8254_timer);
__setup("enable_8254_timer", setup_enable_8254_timer);
-#include <asm/pci-direct.h>
-#include <linux/pci_ids.h>
-#include <linux/pci.h>
-
-
-#ifdef CONFIG_ACPI
-
-static int nvidia_hpet_detected __initdata;
-
-static int __init nvidia_hpet_check(unsigned long phys, unsigned long size)
-{
- nvidia_hpet_detected = 1;
- return 0;
-}
-#endif
-
-/* Temporary Hack. Nvidia and VIA boards currently only work with IO-APIC
- off. Check for an Nvidia or VIA PCI bridge and turn it off.
- Use pci direct infrastructure because this runs before the PCI subsystem.
-
- Can be overwritten with "apic"
-
- And another hack to disable the IOMMU on VIA chipsets.
-
- ... and others. Really should move this somewhere else.
-
- Kludge-O-Rama. */
-void __init check_ioapic(void)
-{
- int num,slot,func;
- /* Poor man's PCI discovery */
- for (num = 0; num < 32; num++) {
- for (slot = 0; slot < 32; slot++) {
- for (func = 0; func < 8; func++) {
- u32 class;
- u32 vendor;
- u8 type;
- class = read_pci_config(num,slot,func,
- PCI_CLASS_REVISION);
- if (class == 0xffffffff)
- break;
-
- if ((class >> 16) != PCI_CLASS_BRIDGE_PCI)
- continue;
-
- vendor = read_pci_config(num, slot, func,
- PCI_VENDOR_ID);
- vendor &= 0xffff;
- switch (vendor) {
- case PCI_VENDOR_ID_VIA:
-#ifdef CONFIG_IOMMU
- if ((end_pfn > MAX_DMA32_PFN ||
- force_iommu) &&
- !iommu_aperture_allowed) {
- printk(KERN_INFO
- "Looks like a VIA chipset. Disabling IOMMU. Override with \"iommu=allowed\"\n");
- iommu_aperture_disabled = 1;
- }
-#endif
- return;
- case PCI_VENDOR_ID_NVIDIA:
-#ifdef CONFIG_ACPI
- /*
- * All timer overrides on Nvidia are
- * wrong unless HPET is enabled.
- */
- nvidia_hpet_detected = 0;
- acpi_table_parse(ACPI_HPET,
- nvidia_hpet_check);
- if (nvidia_hpet_detected == 0) {
- acpi_skip_timer_override = 1;
- printk(KERN_INFO "Nvidia board "
- "detected. Ignoring ACPI "
- "timer override.\n");
- }
-#endif
- /* RED-PEN skip them on mptables too? */
- return;
-
- /* This should be actually default, but
- for 2.6.16 let's do it for ATI only where
- it's really needed. */
- case PCI_VENDOR_ID_ATI:
- if (timer_over_8254 == 1) {
- timer_over_8254 = 0;
- printk(KERN_INFO
- "ATI board detected. Disabling timer routing over 8254.\n");
- }
- return;
- }
-
-
- /* No multi-function device? */
- type = read_pci_config_byte(num,slot,func,
- PCI_HEADER_TYPE);
- if (!(type & 0x80))
- break;
- }
- }
- }
-}
/*
* Find the IRQ entry number of a certain pin.
Index: linux/arch/x86_64/kernel/setup.c
===================================================================
--- linux.orig/arch/x86_64/kernel/setup.c
+++ linux/arch/x86_64/kernel/setup.c
@@ -655,7 +655,7 @@ void __init setup_arch(char **cmdline_p)
paging_init();
- check_ioapic();
+ early_quirks();
/*
* set this early, so we dont allocate cpu0
Index: linux/include/asm-x86_64/proto.h
===================================================================
--- linux.orig/include/asm-x86_64/proto.h
+++ linux/include/asm-x86_64/proto.h
@@ -92,7 +92,7 @@ extern void syscall32_cpu_init(void);
extern void setup_node_bootmem(int nodeid, unsigned long start, unsigned long end);
-extern void check_ioapic(void);
+extern void early_quirks(void);
extern void check_efer(void);
extern int unhandled_signal(struct task_struct *tsk, int sig);
Index: linux/arch/x86_64/kernel/early-quirks.c
===================================================================
--- /dev/null
+++ linux/arch/x86_64/kernel/early-quirks.c
@@ -0,0 +1,118 @@
+/* Various workarounds for chipset bugs.
+ This code runs very early and can't use the regular PCI subsystem
+ The entries are keyed to PCI bridges which usually identify chipsets
+ uniquely.
+ This is only for whole classes of chipsets with specific problems which
+ need early invasive action (e.g. before the timers are initialized).
+ Most PCI device specific workarounds can be done later and should be
+ in standard PCI quirks
+ Mainboard specific bugs should be handled by DMI entries.
+ CPU specific bugs in setup.c */
+
+#include <linux/pci.h>
+#include <linux/acpi.h>
+#include <linux/pci_ids.h>
+#include <asm/pci-direct.h>
+#include <asm/proto.h>
+#include <asm/dma.h>
+
+static void via_bugs(void)
+{
+#ifdef CONFIG_IOMMU
+ if ((end_pfn > MAX_DMA32_PFN || force_iommu) &&
+ !iommu_aperture_allowed) {
+ printk(KERN_INFO
+ "Looks like a VIA chipset. Disabling IOMMU. Override with iommu=allowed\n");
+ iommu_aperture_disabled = 1;
+ }
+#endif
+}
+
+#ifdef CONFIG_ACPI
+
+static int nvidia_hpet_detected __initdata;
+
+static int __init nvidia_hpet_check(unsigned long phys, unsigned long size)
+{
+ nvidia_hpet_detected = 1;
+ return 0;
+}
+#endif
+
+static void nvidia_bugs(void)
+{
+#ifdef CONFIG_ACPI
+ /*
+ * All timer overrides on Nvidia are
+ * wrong unless HPET is enabled.
+ */
+ nvidia_hpet_detected = 0;
+ acpi_table_parse(ACPI_HPET, nvidia_hpet_check);
+ if (nvidia_hpet_detected == 0) {
+ acpi_skip_timer_override = 1;
+ printk(KERN_INFO "Nvidia board "
+ "detected. Ignoring ACPI "
+ "timer override.\n");
+ }
+#endif
+ /* RED-PEN skip them on mptables too? */
+
+}
+
+static void ati_bugs(void)
+{
+#if 1 /* for testing */
+ printk("ATI board detected\n");
+#endif
+ /* No bugs right now */
+}
+
+struct chipset {
+ u16 vendor;
+ void (*f)(void);
+};
+
+static struct chipset early_qrk[] = {
+ { PCI_VENDOR_ID_NVIDIA, nvidia_bugs },
+ { PCI_VENDOR_ID_VIA, via_bugs },
+ { PCI_VENDOR_ID_ATI, ati_bugs },
+ {}
+};
+
+void __init early_quirks(void)
+{
+ int num, slot, func;
+ /* Poor man's PCI discovery */
+ for (num = 0; num < 32; num++) {
+ for (slot = 0; slot < 32; slot++) {
+ for (func = 0; func < 8; func++) {
+ u32 class;
+ u32 vendor;
+ u8 type;
+ int i;
+ class = read_pci_config(num,slot,func,
+ PCI_CLASS_REVISION);
+ if (class == 0xffffffff)
+ break;
+
+ if ((class >> 16) != PCI_CLASS_BRIDGE_PCI)
+ continue;
+
+ vendor = read_pci_config(num, slot, func,
+ PCI_VENDOR_ID);
+ vendor &= 0xffff;
+
+ for (i = 0; early_qrk[i].f; i++)
+ if (early_qrk[i].vendor == vendor) {
+ early_qrk[i].f();
+ return;
+ }
+
+ type = read_pci_config_byte(num, slot, func,
+ PCI_HEADER_TYPE);
+ if (!(type & 0x80))
+ break;
+ }
+ }
+ }
+}
^ permalink raw reply [flat|nested] 199+ messages in thread* Re: [PATCH for review] [60/145] x86_64: Move early chipset quirks out to new file
2006-08-10 19:36 ` [PATCH for review] [60/145] x86_64: Move early chipset quirks out to new file Andi Kleen
@ 2006-08-16 15:36 ` Len Brown
0 siblings, 0 replies; 199+ messages in thread
From: Len Brown @ 2006-08-16 15:36 UTC (permalink / raw)
To: Andi Kleen; +Cc: linux-kernel
On Thursday 10 August 2006 15:36, Andi Kleen wrote:
> They did not really belong into io_apic.c. Move them into a new file
> and clean it up a bit.
>
> Also remove outdated ATI quirk that was obsolete,
>
> Signed-off-by: Andi Kleen <ak@suse.de>
>
> ---
> arch/x86_64/kernel/Makefile | 2
> arch/x86_64/kernel/early-quirks.c | 118 ++++++++++++++++++++++++++++++++++++++
> arch/x86_64/kernel/io_apic.c | 101 --------------------------------
> arch/x86_64/kernel/setup.c | 2
> include/asm-x86_64/proto.h | 2
> 5 files changed, 121 insertions(+), 104 deletions(-)
>
Acked-by: Len Brown <len.brown@intel.com>
^ permalink raw reply [flat|nested] 199+ messages in thread
* [PATCH for review] [61/145] x86_64: Replace mp bus array with bitmap for bus not pci
[not found] <20060810 935.775038000@suse.de>
` (59 preceding siblings ...)
2006-08-10 19:36 ` [PATCH for review] [60/145] x86_64: Move early chipset quirks out to new file Andi Kleen
@ 2006-08-10 19:36 ` Andi Kleen
2006-08-10 19:36 ` [PATCH for review] [62/145] x86_64: Remove useless wrapper in mpparse.c code Andi Kleen
` (84 subsequent siblings)
145 siblings, 0 replies; 199+ messages in thread
From: Andi Kleen @ 2006-08-10 19:36 UTC (permalink / raw)
r
Since we only support PCI and ISA legacy busses now there is no need to
have an full array with checking.
Signed-off-by: Andi Kleen <ak@suse.de>
---
arch/x86_64/kernel/io_apic.c | 92 ++++++++++---------------------------------
arch/x86_64/kernel/mpparse.c | 9 +---
include/asm-x86_64/mpspec.h | 8 ---
3 files changed, 27 insertions(+), 82 deletions(-)
Index: linux/arch/x86_64/kernel/io_apic.c
===================================================================
--- linux.orig/arch/x86_64/kernel/io_apic.c
+++ linux/arch/x86_64/kernel/io_apic.c
@@ -308,7 +308,7 @@ static int __init find_isa_irq_pin(int i
for (i = 0; i < mp_irq_entries; i++) {
int lbus = mp_irqs[i].mpc_srcbus;
- if (mp_bus_id_to_type[lbus] == MP_BUS_ISA &&
+ if (test_bit(lbus, mp_bus_not_pci) &&
(mp_irqs[i].mpc_irqtype == type) &&
(mp_irqs[i].mpc_srcbusirq == irq))
@@ -324,7 +324,7 @@ static int __init find_isa_irq_apic(int
for (i = 0; i < mp_irq_entries; i++) {
int lbus = mp_irqs[i].mpc_srcbus;
- if ((mp_bus_id_to_type[lbus] == MP_BUS_ISA) &&
+ if (test_bit(lbus, mp_bus_not_pci) &&
(mp_irqs[i].mpc_irqtype == type) &&
(mp_irqs[i].mpc_srcbusirq == irq))
break;
@@ -364,7 +364,7 @@ int IO_APIC_get_PCI_irq_vector(int bus,
mp_irqs[i].mpc_dstapic == MP_APIC_ALL)
break;
- if ((mp_bus_id_to_type[lbus] == MP_BUS_PCI) &&
+ if (!test_bit(lbus, mp_bus_not_pci) &&
!mp_irqs[i].mpc_irqtype &&
(bus == lbus) &&
(slot == ((mp_irqs[i].mpc_srcbusirq >> 2) & 0x1f))) {
@@ -410,28 +410,11 @@ static int __init MPBIOS_polarity(int id
switch (mp_irqs[idx].mpc_irqflag & 3)
{
case 0: /* conforms, ie. bus-type dependent polarity */
- {
- switch (mp_bus_id_to_type[bus])
- {
- case MP_BUS_ISA: /* ISA pin */
- {
- polarity = default_ISA_polarity(idx);
- break;
- }
- case MP_BUS_PCI: /* PCI pin */
- {
- polarity = default_PCI_polarity(idx);
- break;
- }
- default:
- {
- printk(KERN_WARNING "broken BIOS!!\n");
- polarity = 1;
- break;
- }
- }
+ if (test_bit(bus, mp_bus_not_pci))
+ polarity = default_ISA_polarity(idx);
+ else
+ polarity = default_PCI_polarity(idx);
break;
- }
case 1: /* high active */
{
polarity = 0;
@@ -469,28 +452,11 @@ static int MPBIOS_trigger(int idx)
switch ((mp_irqs[idx].mpc_irqflag>>2) & 3)
{
case 0: /* conforms, ie. bus-type dependent */
- {
- switch (mp_bus_id_to_type[bus])
- {
- case MP_BUS_ISA: /* ISA pin */
- {
- trigger = default_ISA_trigger(idx);
- break;
- }
- case MP_BUS_PCI: /* PCI pin */
- {
- trigger = default_PCI_trigger(idx);
- break;
- }
- default:
- {
- printk(KERN_WARNING "broken BIOS!!\n");
- trigger = 1;
- break;
- }
- }
+ if (test_bit(bus, mp_bus_not_pci))
+ trigger = default_ISA_trigger(idx);
+ else
+ trigger = default_PCI_trigger(idx);
break;
- }
case 1: /* edge */
{
trigger = 0;
@@ -596,31 +562,17 @@ static int pin_2_irq(int idx, int apic,
if (mp_irqs[idx].mpc_dstirq != pin)
printk(KERN_ERR "broken BIOS or MPTABLE parser, ayiee!!\n");
- switch (mp_bus_id_to_type[bus])
- {
- case MP_BUS_ISA: /* ISA pin */
- {
- irq = mp_irqs[idx].mpc_srcbusirq;
- break;
- }
- case MP_BUS_PCI: /* PCI pin */
- {
- /*
- * PCI IRQs are mapped in order
- */
- i = irq = 0;
- while (i < apic)
- irq += nr_ioapic_registers[i++];
- irq += pin;
- irq = gsi_irq_sharing(irq);
- break;
- }
- default:
- {
- printk(KERN_ERR "unknown bus type %d.\n",bus);
- irq = 0;
- break;
- }
+ if (test_bit(bus, mp_bus_not_pci)) {
+ irq = mp_irqs[idx].mpc_srcbusirq;
+ } else {
+ /*
+ * PCI IRQs are mapped in order
+ */
+ i = irq = 0;
+ while (i < apic)
+ irq += nr_ioapic_registers[i++];
+ irq += pin;
+ irq = gsi_irq_sharing(irq);
}
BUG_ON(irq >= NR_IRQS);
return irq;
Index: linux/arch/x86_64/kernel/mpparse.c
===================================================================
--- linux.orig/arch/x86_64/kernel/mpparse.c
+++ linux/arch/x86_64/kernel/mpparse.c
@@ -42,7 +42,7 @@ int acpi_found_madt;
* MP-table.
*/
unsigned char apic_version [MAX_APICS];
-unsigned char mp_bus_id_to_type [MAX_MP_BUSSES] = { [0 ... MAX_MP_BUSSES-1] = -1 };
+DECLARE_BITMAP(mp_bus_not_pci, MAX_MP_BUSSES);
int mp_bus_id_to_pci_bus [MAX_MP_BUSSES] = { [0 ... MAX_MP_BUSSES-1] = -1 };
static int mp_current_pci_id = 0;
@@ -173,9 +173,9 @@ static void __init MP_bus_info (struct m
Dprintk("Bus #%d is %s\n", m->mpc_busid, str);
if (strncmp(str, "ISA", 3) == 0) {
- mp_bus_id_to_type[m->mpc_busid] = MP_BUS_ISA;
+ set_bit(m->mpc_busid, mp_bus_not_pci);
} else if (strncmp(str, "PCI", 3) == 0) {
- mp_bus_id_to_type[m->mpc_busid] = MP_BUS_PCI;
+ clear_bit(m->mpc_busid, mp_bus_not_pci);
mp_bus_id_to_pci_bus[m->mpc_busid] = mp_current_pci_id;
mp_current_pci_id++;
} else {
@@ -808,8 +808,7 @@ void __init mp_config_acpi_legacy_irqs (
/*
* Fabricate the legacy ISA bus (bus #31).
*/
- mp_bus_id_to_type[MP_ISA_BUS] = MP_BUS_ISA;
- Dprintk("Bus #%d is ISA\n", MP_ISA_BUS);
+ set_bit(MP_ISA_BUS, mp_bus_not_pci);
/*
* Locate the IOAPIC that manages the ISA IRQs (0-15).
Index: linux/include/asm-x86_64/mpspec.h
===================================================================
--- linux.orig/include/asm-x86_64/mpspec.h
+++ linux/include/asm-x86_64/mpspec.h
@@ -159,13 +159,7 @@ struct mpc_config_lintsrc
#define MAX_MP_BUSSES 256
/* Each PCI slot may be a combo card with its own bus. 4 IRQ pins per slot. */
#define MAX_IRQ_SOURCES (MAX_MP_BUSSES * 4)
-enum mp_bustype {
- MP_BUS_ISA = 1,
- MP_BUS_EISA,
- MP_BUS_PCI,
- MP_BUS_MCA
-};
-extern unsigned char mp_bus_id_to_type [MAX_MP_BUSSES];
+extern DECLARE_BITMAP(mp_bus_not_pci, MAX_MP_BUSSES);
extern int mp_bus_id_to_pci_bus [MAX_MP_BUSSES];
extern unsigned int boot_cpu_physical_apicid;
^ permalink raw reply [flat|nested] 199+ messages in thread* [PATCH for review] [62/145] x86_64: Remove useless wrapper in mpparse.c code
[not found] <20060810 935.775038000@suse.de>
` (60 preceding siblings ...)
2006-08-10 19:36 ` [PATCH for review] [61/145] x86_64: Replace mp bus array with bitmap for bus not pci Andi Kleen
@ 2006-08-10 19:36 ` Andi Kleen
2006-08-10 19:36 ` [PATCH for review] [63/145] x86_64: Remove some unneeded ACPI externs in mpparse.c Andi Kleen
` (83 subsequent siblings)
145 siblings, 0 replies; 199+ messages in thread
From: Andi Kleen @ 2006-08-10 19:36 UTC (permalink / raw)
r
It used to contain support code for NUMAQ, but that is long gone already
on 64bit.
Signed-off-by: Andi Kleen <ak@suse.de>
---
arch/x86_64/kernel/mpparse.c | 11 +----------
1 files changed, 1 insertion(+), 10 deletions(-)
Index: linux/arch/x86_64/kernel/mpparse.c
===================================================================
--- linux.orig/arch/x86_64/kernel/mpparse.c
+++ linux/arch/x86_64/kernel/mpparse.c
@@ -579,7 +579,7 @@ static int __init smp_scan_config (unsig
return 0;
}
-void __init find_intel_smp (void)
+void __init find_smp_config(void)
{
unsigned int address;
@@ -617,15 +617,6 @@ void __init find_intel_smp (void)
printk(KERN_INFO "No mptable found.\n");
}
-/*
- * - Intel MP Configuration Table
- */
-void __init find_smp_config (void)
-{
- find_intel_smp();
-}
-
-
/* --------------------------------------------------------------------------
ACPI-based MP Configuration
-------------------------------------------------------------------------- */
^ permalink raw reply [flat|nested] 199+ messages in thread* [PATCH for review] [63/145] x86_64: Remove some unneeded ACPI externs in mpparse.c
[not found] <20060810 935.775038000@suse.de>
` (61 preceding siblings ...)
2006-08-10 19:36 ` [PATCH for review] [62/145] x86_64: Remove useless wrapper in mpparse.c code Andi Kleen
@ 2006-08-10 19:36 ` Andi Kleen
2006-08-10 19:36 ` [PATCH for review] [64/145] x86_64: Fix up some non linuxy style in ACPI functions " Andi Kleen
` (82 subsequent siblings)
145 siblings, 0 replies; 199+ messages in thread
From: Andi Kleen @ 2006-08-10 19:36 UTC (permalink / raw)
r
They are not used in this file so remove them. i386 didn't have them either.
Cc: len.brown@intel.com
Signed-off-by: Andi Kleen <ak@suse.de>
---
arch/x86_64/kernel/mpparse.c | 9 ---------
1 files changed, 9 deletions(-)
Index: linux/arch/x86_64/kernel/mpparse.c
===================================================================
--- linux.orig/arch/x86_64/kernel/mpparse.c
+++ linux/arch/x86_64/kernel/mpparse.c
@@ -70,15 +70,6 @@ unsigned disabled_cpus __initdata;
/* Bitmask of physically existing CPUs */
physid_mask_t phys_cpu_present_map = PHYSID_MASK_NONE;
-/* ACPI MADT entry parsing functions */
-#ifdef CONFIG_ACPI
-extern struct acpi_boot_flags acpi_boot;
-extern int acpi_parse_lapic (acpi_table_entry_header *header);
-extern int acpi_parse_lapic_addr_ovr (acpi_table_entry_header *header);
-extern int acpi_parse_lapic_nmi (acpi_table_entry_header *header);
-extern int acpi_parse_ioapic (acpi_table_entry_header *header);
-#endif /*CONFIG_ACPI*/
-
u8 bios_cpu_apicid[NR_CPUS] = { [0 ... NR_CPUS-1] = BAD_APICID };
^ permalink raw reply [flat|nested] 199+ messages in thread* [PATCH for review] [64/145] x86_64: Fix up some non linuxy style in ACPI functions in mpparse.c
[not found] <20060810 935.775038000@suse.de>
` (62 preceding siblings ...)
2006-08-10 19:36 ` [PATCH for review] [63/145] x86_64: Remove some unneeded ACPI externs in mpparse.c Andi Kleen
@ 2006-08-10 19:36 ` Andi Kleen
2006-08-10 19:36 ` [PATCH for review] [65/145] i386: Clean up code style in mpparse.c ACPI code Andi Kleen
` (81 subsequent siblings)
145 siblings, 0 replies; 199+ messages in thread
From: Andi Kleen @ 2006-08-10 19:36 UTC (permalink / raw)
r
No functional changes.
Cc: len.brown@intel.com
Signed-off-by: Andi Kleen <ak@suse.de>
---
arch/x86_64/kernel/mpparse.c | 57 ++++++++++++-------------------------------
1 files changed, 16 insertions(+), 41 deletions(-)
Index: linux/arch/x86_64/kernel/mpparse.c
===================================================================
--- linux.orig/arch/x86_64/kernel/mpparse.c
+++ linux/arch/x86_64/kernel/mpparse.c
@@ -614,23 +614,17 @@ void __init find_smp_config(void)
#ifdef CONFIG_ACPI
-void __init mp_register_lapic_address (
- u64 address)
+void __init mp_register_lapic_address(u64 address)
{
mp_lapic_addr = (unsigned long) address;
-
set_fixmap_nocache(FIX_APIC_BASE, mp_lapic_addr);
-
if (boot_cpu_id == -1U)
boot_cpu_id = GET_APIC_ID(apic_read(APIC_ID));
Dprintk("Boot CPU = %d\n", boot_cpu_physical_apicid);
}
-
-void __cpuinit mp_register_lapic (
- u8 id,
- u8 enabled)
+void __cpuinit mp_register_lapic (u8 id, u8 enabled)
{
struct mpc_config_processor processor;
int boot_cpu = 0;
@@ -668,11 +662,9 @@ static struct mp_ioapic_routing {
u32 pin_programmed[4];
} mp_ioapic_routing[MAX_IO_APICS];
-
-static int mp_find_ioapic (
- int gsi)
+static int mp_find_ioapic(int gsi)
{
- int i = 0;
+ int i = 0;
/* Find the IOAPIC that manages this GSI. */
for (i = 0; i < nr_ioapics; i++) {
@@ -682,17 +674,12 @@ static int mp_find_ioapic (
}
printk(KERN_ERR "ERROR: Unable to locate IOAPIC for GSI %d\n", gsi);
-
return -1;
}
-
-void __init mp_register_ioapic (
- u8 id,
- u32 address,
- u32 gsi_base)
+void __init mp_register_ioapic(u8 id, u32 address, u32 gsi_base)
{
- int idx = 0;
+ int idx = 0;
if (nr_ioapics >= MAX_IO_APICS) {
printk(KERN_ERR "ERROR: Max # of I/O APICs (%d) exceeded "
@@ -729,16 +716,10 @@ void __init mp_register_ioapic (
mp_ioapics[idx].mpc_apicver, mp_ioapics[idx].mpc_apicaddr,
mp_ioapic_routing[idx].gsi_start,
mp_ioapic_routing[idx].gsi_end);
-
- return;
}
-
-void __init mp_override_legacy_irq (
- u8 bus_irq,
- u8 polarity,
- u8 trigger,
- u32 gsi)
+void __init
+mp_override_legacy_irq(u8 bus_irq, u8 polarity, u8 trigger, u32 gsi)
{
struct mpc_config_intsrc intsrc;
int ioapic = -1;
@@ -776,16 +757,13 @@ void __init mp_override_legacy_irq (
mp_irqs[mp_irq_entries] = intsrc;
if (++mp_irq_entries == MAX_IRQ_SOURCES)
panic("Max # of irq sources exceeded!\n");
-
- return;
}
-
-void __init mp_config_acpi_legacy_irqs (void)
+void __init mp_config_acpi_legacy_irqs(void)
{
struct mpc_config_intsrc intsrc;
- int i = 0;
- int ioapic = -1;
+ int i = 0;
+ int ioapic = -1;
/*
* Fabricate the legacy ISA bus (bus #31).
@@ -843,24 +821,22 @@ void __init mp_config_acpi_legacy_irqs (
if (++mp_irq_entries == MAX_IRQ_SOURCES)
panic("Max # of irq sources exceeded!\n");
}
-
- return;
}
#define MAX_GSI_NUM 4096
int mp_register_gsi(u32 gsi, int triggering, int polarity)
{
- int ioapic = -1;
- int ioapic_pin = 0;
- int idx, bit = 0;
- static int pci_irq = 16;
+ int ioapic = -1;
+ int ioapic_pin = 0;
+ int idx, bit = 0;
+ static int pci_irq = 16;
/*
* Mapping between Global System Interrupts, which
* represent all possible interrupts, to the IRQs
* assigned to actual devices.
*/
- static int gsi_to_irq[MAX_GSI_NUM];
+ static int gsi_to_irq[MAX_GSI_NUM];
if (acpi_irq_model != ACPI_IRQ_MODEL_IOAPIC)
return gsi;
@@ -934,5 +910,4 @@ int mp_register_gsi(u32 gsi, int trigger
polarity == ACPI_ACTIVE_HIGH ? 0 : 1);
return gsi;
}
-
#endif /*CONFIG_ACPI*/
^ permalink raw reply [flat|nested] 199+ messages in thread* [PATCH for review] [65/145] i386: Clean up code style in mpparse.c ACPI code
[not found] <20060810 935.775038000@suse.de>
` (63 preceding siblings ...)
2006-08-10 19:36 ` [PATCH for review] [64/145] x86_64: Fix up some non linuxy style in ACPI functions " Andi Kleen
@ 2006-08-10 19:36 ` Andi Kleen
2006-08-16 16:08 ` Len Brown
2006-08-10 19:36 ` [PATCH for review] [66/145] x86_64: Use BUILD_BUG_ON in apic.c build sanity checking Andi Kleen
` (80 subsequent siblings)
145 siblings, 1 reply; 199+ messages in thread
From: Andi Kleen @ 2006-08-10 19:36 UTC (permalink / raw)
r
Remove some unlinuxy ways to write function parameter definitions.
Remove some stray "return;"s
No functional change.
Cc: len.brown@intel.com
Signed-off-by: Andi Kleen <ak@suse.de>
---
arch/i386/kernel/mpparse.c | 52 ++++++++++++++-------------------------------
1 files changed, 17 insertions(+), 35 deletions(-)
Index: linux/arch/i386/kernel/mpparse.c
===================================================================
--- linux.orig/arch/i386/kernel/mpparse.c
+++ linux/arch/i386/kernel/mpparse.c
@@ -810,8 +810,7 @@ int es7000_plat;
#ifdef CONFIG_ACPI
-void __init mp_register_lapic_address (
- u64 address)
+void __init mp_register_lapic_address(u64 address)
{
mp_lapic_addr = (unsigned long) address;
@@ -823,13 +822,10 @@ void __init mp_register_lapic_address (
Dprintk("Boot CPU = %d\n", boot_cpu_physical_apicid);
}
-
-void __devinit mp_register_lapic (
- u8 id,
- u8 enabled)
+void __devinit mp_register_lapic (u8 id, u8 enabled)
{
struct mpc_config_processor processor;
- int boot_cpu = 0;
+ int boot_cpu = 0;
if (MAX_APICS - id <= 0) {
printk(KERN_WARNING "Processor #%d invalid (max %d)\n",
@@ -866,11 +862,9 @@ static struct mp_ioapic_routing {
u32 pin_programmed[4];
} mp_ioapic_routing[MAX_IO_APICS];
-
-static int mp_find_ioapic (
- int gsi)
+static int mp_find_ioapic (int gsi)
{
- int i = 0;
+ int i = 0;
/* Find the IOAPIC that manages this GSI. */
for (i = 0; i < nr_ioapics; i++) {
@@ -883,15 +877,11 @@ static int mp_find_ioapic (
return -1;
}
-
-void __init mp_register_ioapic (
- u8 id,
- u32 address,
- u32 gsi_base)
+void __init mp_register_ioapic(u8 id, u32 address, u32 gsi_base)
{
- int idx = 0;
- int tmpid;
+ int idx = 0;
+ int tmpid;
if (nr_ioapics >= MAX_IO_APICS) {
printk(KERN_ERR "ERROR: Max # of I/O APICs (%d) exceeded "
@@ -937,16 +927,10 @@ void __init mp_register_ioapic (
mp_ioapics[idx].mpc_apicver, mp_ioapics[idx].mpc_apicaddr,
mp_ioapic_routing[idx].gsi_base,
mp_ioapic_routing[idx].gsi_end);
-
- return;
}
-
-void __init mp_override_legacy_irq (
- u8 bus_irq,
- u8 polarity,
- u8 trigger,
- u32 gsi)
+void __init
+mp_override_legacy_irq(u8 bus_irq, u8 polarity, u8 trigger, u32 gsi)
{
struct mpc_config_intsrc intsrc;
int ioapic = -1;
@@ -984,15 +968,13 @@ void __init mp_override_legacy_irq (
mp_irqs[mp_irq_entries] = intsrc;
if (++mp_irq_entries == MAX_IRQ_SOURCES)
panic("Max # of irq sources exceeded!\n");
-
- return;
}
void __init mp_config_acpi_legacy_irqs (void)
{
struct mpc_config_intsrc intsrc;
- int i = 0;
- int ioapic = -1;
+ int i = 0;
+ int ioapic = -1;
/*
* Fabricate the legacy ISA bus (bus #31).
@@ -1061,12 +1043,12 @@ void __init mp_config_acpi_legacy_irqs (
#define MAX_GSI_NUM 4096
-int mp_register_gsi (u32 gsi, int triggering, int polarity)
+int mp_register_gsi(u32 gsi, int triggering, int polarity)
{
- int ioapic = -1;
- int ioapic_pin = 0;
- int idx, bit = 0;
- static int pci_irq = 16;
+ int ioapic = -1;
+ int ioapic_pin = 0;
+ int idx, bit = 0;
+ static int pci_irq = 16;
/*
* Mapping between Global System Interrups, which
* represent all possible interrupts, and IRQs
^ permalink raw reply [flat|nested] 199+ messages in thread* Re: [PATCH for review] [65/145] i386: Clean up code style in mpparse.c ACPI code
2006-08-10 19:36 ` [PATCH for review] [65/145] i386: Clean up code style in mpparse.c ACPI code Andi Kleen
@ 2006-08-16 16:08 ` Len Brown
2006-08-16 16:30 ` Andi Kleen
0 siblings, 1 reply; 199+ messages in thread
From: Len Brown @ 2006-08-16 16:08 UTC (permalink / raw)
To: Andi Kleen; +Cc: linux-kernel
On Thursday 10 August 2006 15:36, Andi Kleen wrote:
>
> Remove some unlinuxy ways to write function parameter definitions.
> Remove some stray "return;"s
>
> No functional change.
>
> Cc: len.brown@intel.com
> Signed-off-by: Andi Kleen <ak@suse.de>
>
> ---
> arch/i386/kernel/mpparse.c | 52 ++++++++++++++-------------------------------
> 1 files changed, 17 insertions(+), 35 deletions(-)
Maybe it is time to just Lindent the file?
When I Lindented the ACPI sub-system, I stopped short of mpparse.c.
As you know, I'd like to see the ACPI part of mpparse.c split out into a different file
that can be shared by i386 and x86_64.
Acked-by: Len Brown <len.brown@intel.com>
> Index: linux/arch/i386/kernel/mpparse.c
> ===================================================================
> --- linux.orig/arch/i386/kernel/mpparse.c
> +++ linux/arch/i386/kernel/mpparse.c
> @@ -810,8 +810,7 @@ int es7000_plat;
>
> #ifdef CONFIG_ACPI
>
> -void __init mp_register_lapic_address (
> - u64 address)
> +void __init mp_register_lapic_address(u64 address)
> {
> mp_lapic_addr = (unsigned long) address;
>
> @@ -823,13 +822,10 @@ void __init mp_register_lapic_address (
> Dprintk("Boot CPU = %d\n", boot_cpu_physical_apicid);
> }
>
> -
> -void __devinit mp_register_lapic (
> - u8 id,
> - u8 enabled)
> +void __devinit mp_register_lapic (u8 id, u8 enabled)
> {
> struct mpc_config_processor processor;
> - int boot_cpu = 0;
> + int boot_cpu = 0;
>
> if (MAX_APICS - id <= 0) {
> printk(KERN_WARNING "Processor #%d invalid (max %d)\n",
> @@ -866,11 +862,9 @@ static struct mp_ioapic_routing {
> u32 pin_programmed[4];
> } mp_ioapic_routing[MAX_IO_APICS];
>
> -
> -static int mp_find_ioapic (
> - int gsi)
> +static int mp_find_ioapic (int gsi)
> {
> - int i = 0;
> + int i = 0;
>
> /* Find the IOAPIC that manages this GSI. */
> for (i = 0; i < nr_ioapics; i++) {
> @@ -883,15 +877,11 @@ static int mp_find_ioapic (
>
> return -1;
> }
> -
>
> -void __init mp_register_ioapic (
> - u8 id,
> - u32 address,
> - u32 gsi_base)
> +void __init mp_register_ioapic(u8 id, u32 address, u32 gsi_base)
> {
> - int idx = 0;
> - int tmpid;
> + int idx = 0;
> + int tmpid;
>
> if (nr_ioapics >= MAX_IO_APICS) {
> printk(KERN_ERR "ERROR: Max # of I/O APICs (%d) exceeded "
> @@ -937,16 +927,10 @@ void __init mp_register_ioapic (
> mp_ioapics[idx].mpc_apicver, mp_ioapics[idx].mpc_apicaddr,
> mp_ioapic_routing[idx].gsi_base,
> mp_ioapic_routing[idx].gsi_end);
> -
> - return;
> }
>
> -
> -void __init mp_override_legacy_irq (
> - u8 bus_irq,
> - u8 polarity,
> - u8 trigger,
> - u32 gsi)
> +void __init
> +mp_override_legacy_irq(u8 bus_irq, u8 polarity, u8 trigger, u32 gsi)
> {
> struct mpc_config_intsrc intsrc;
> int ioapic = -1;
> @@ -984,15 +968,13 @@ void __init mp_override_legacy_irq (
> mp_irqs[mp_irq_entries] = intsrc;
> if (++mp_irq_entries == MAX_IRQ_SOURCES)
> panic("Max # of irq sources exceeded!\n");
> -
> - return;
> }
>
> void __init mp_config_acpi_legacy_irqs (void)
> {
> struct mpc_config_intsrc intsrc;
> - int i = 0;
> - int ioapic = -1;
> + int i = 0;
> + int ioapic = -1;
>
> /*
> * Fabricate the legacy ISA bus (bus #31).
> @@ -1061,12 +1043,12 @@ void __init mp_config_acpi_legacy_irqs (
>
> #define MAX_GSI_NUM 4096
>
> -int mp_register_gsi (u32 gsi, int triggering, int polarity)
> +int mp_register_gsi(u32 gsi, int triggering, int polarity)
> {
> - int ioapic = -1;
> - int ioapic_pin = 0;
> - int idx, bit = 0;
> - static int pci_irq = 16;
> + int ioapic = -1;
> + int ioapic_pin = 0;
> + int idx, bit = 0;
> + static int pci_irq = 16;
> /*
> * Mapping between Global System Interrups, which
> * represent all possible interrupts, and IRQs
> -
> To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at http://vger.kernel.org/majordomo-info.html
> Please read the FAQ at http://www.tux.org/lkml/
>
^ permalink raw reply [flat|nested] 199+ messages in thread* Re: [PATCH for review] [65/145] i386: Clean up code style in mpparse.c ACPI code
2006-08-16 16:08 ` Len Brown
@ 2006-08-16 16:30 ` Andi Kleen
0 siblings, 0 replies; 199+ messages in thread
From: Andi Kleen @ 2006-08-16 16:30 UTC (permalink / raw)
To: Len Brown; +Cc: Len Brown, linux-kernel
On Wed, 16 Aug 2006 12:08:43 -0400
Len Brown <len.brown@intel.com> wrote:
> On Thursday 10 August 2006 15:36, Andi Kleen wrote:
> >
> > Remove some unlinuxy ways to write function parameter definitions.
> > Remove some stray "return;"s
> >
> > No functional change.
> >
> > Cc: len.brown@intel.com
> > Signed-off-by: Andi Kleen <ak@suse.de>
> >
> > ---
> > arch/i386/kernel/mpparse.c | 52 ++++++++++++++-------------------------------
> > 1 files changed, 17 insertions(+), 35 deletions(-)
>
> Maybe it is time to just Lindent the file?
> When I Lindented the ACPI sub-system, I stopped short of mpparse.c.
I think except for the ACPI bits which I already fixed in this patch
everything else was linuxy already.
>
> As you know, I'd like to see the ACPI part of mpparse.c split out into a different file
> that can be shared by i386 and x86_64.
I'm for splitting out in ACPI/non ACPI (with CONFIG for mpparse), but not for
sharing. I want the freedom to change mpparse's internal data structures without
caring for i386.
The first step would be to get rid some of the hacks that convert ACPI into
mpparse.
-Andi
^ permalink raw reply [flat|nested] 199+ messages in thread
* [PATCH for review] [66/145] x86_64: Use BUILD_BUG_ON in apic.c build sanity checking
[not found] <20060810 935.775038000@suse.de>
` (64 preceding siblings ...)
2006-08-10 19:36 ` [PATCH for review] [65/145] i386: Clean up code style in mpparse.c ACPI code Andi Kleen
@ 2006-08-10 19:36 ` Andi Kleen
2006-08-10 19:36 ` [PATCH for review] [67/145] x86_64: Detect CFI support in the assembler at runtime Andi Kleen
` (79 subsequent siblings)
145 siblings, 0 replies; 199+ messages in thread
From: Andi Kleen @ 2006-08-10 19:36 UTC (permalink / raw)
r
Makes code a little shorter.
Signed-off-by: Andi Kleen <ak@suse.de>
---
arch/x86_64/kernel/apic.c | 5 +----
1 files changed, 1 insertion(+), 4 deletions(-)
Index: linux/arch/x86_64/kernel/apic.c
===================================================================
--- linux.orig/arch/x86_64/kernel/apic.c
+++ linux/arch/x86_64/kernel/apic.c
@@ -265,8 +265,6 @@ void __init sync_Arb_IDs(void)
| APIC_DM_INIT);
}
-extern void __error_in_apic_c (void);
-
/*
* An initial setup of the virtual wire mode.
*/
@@ -313,8 +311,7 @@ void __cpuinit setup_local_APIC (void)
value = apic_read(APIC_LVR);
- if ((SPURIOUS_APIC_VECTOR & 0x0f) != 0x0f)
- __error_in_apic_c();
+ BUILD_BUG_ON((SPURIOUS_APIC_VECTOR & 0x0f) != 0x0f);
/*
* Double-check whether this APIC is really registered.
^ permalink raw reply [flat|nested] 199+ messages in thread* [PATCH for review] [67/145] x86_64: Detect CFI support in the assembler at runtime
[not found] <20060810 935.775038000@suse.de>
` (65 preceding siblings ...)
2006-08-10 19:36 ` [PATCH for review] [66/145] x86_64: Use BUILD_BUG_ON in apic.c build sanity checking Andi Kleen
@ 2006-08-10 19:36 ` Andi Kleen
[not found] ` <20060811053932.GA4910@mars.ravnborg.org>
2006-08-10 19:36 ` [PATCH for review] [68/145] x86_64: Remove obsolete CVS $ from assembler files in arch/x86_64/kernel/* Andi Kleen
` (78 subsequent siblings)
145 siblings, 1 reply; 199+ messages in thread
From: Andi Kleen @ 2006-08-10 19:36 UTC (permalink / raw)
r
... instead of using a CONFIG option. The config option still controls
if the resulting executable actually has unwind information.
This is useful to prevent compilation errors when users select
CONFIG_STACK_UNWIND on old binutils and also allows to use
CFI in the future for non kernel debugging applications.
Cc: jbeulich@novell.com
Cc: sam@ravnborg.org
Signed-off-by: Andi Kleen <ak@suse.de>
---
Documentation/kbuild/makefiles.txt | 5 +++++
arch/i386/Makefile | 3 +++
arch/x86_64/Makefile | 2 ++
include/asm-x86_64/dwarf2.h | 2 +-
scripts/Kbuild.include | 6 ++++++
5 files changed, 17 insertions(+), 1 deletion(-)
Index: linux/Documentation/kbuild/makefiles.txt
===================================================================
--- linux.orig/Documentation/kbuild/makefiles.txt
+++ linux/Documentation/kbuild/makefiles.txt
@@ -421,6 +421,11 @@ more details, with real examples.
The second argument is optional, and if supplied will be used
if first argument is not supported.
+ as-instr
+ as-instr checks if the assembler reports a specific instruction
+ and then outputs either option1 or option2
+ C escapes are supported in the test instruction
+
cc-option
cc-option is used to check if $(CC) support a given option, and not
supported to use an optional second option.
Index: linux/arch/x86_64/Makefile
===================================================================
--- linux.orig/arch/x86_64/Makefile
+++ linux/arch/x86_64/Makefile
@@ -54,6 +54,8 @@ endif
cflags-y += $(call cc-option,-funit-at-a-time)
# prevent gcc from generating any FP code by mistake
cflags-y += $(call cc-option,-mno-sse -mno-mmx -mno-sse2 -mno-3dnow,)
+# do binutils support CFI?
+cflags-y += $(call as-instr,.cfi_startproc\n.cfi_endproc,-DCONFIG_AS_CFI=1,)
CFLAGS += $(cflags-y)
CFLAGS_KERNEL += $(cflags-kernel-y)
Index: linux/include/asm-x86_64/dwarf2.h
===================================================================
--- linux.orig/include/asm-x86_64/dwarf2.h
+++ linux/include/asm-x86_64/dwarf2.h
@@ -13,7 +13,7 @@
away for older version.
*/
-#ifdef CONFIG_UNWIND_INFO
+#ifdef CONFIG_AS_CFI
#define CFI_STARTPROC .cfi_startproc
#define CFI_ENDPROC .cfi_endproc
Index: linux/scripts/Kbuild.include
===================================================================
--- linux.orig/scripts/Kbuild.include
+++ linux/scripts/Kbuild.include
@@ -59,6 +59,12 @@ as-option = $(shell if $(CC) $(CFLAGS) $
-xassembler /dev/null > /dev/null 2>&1; then echo "$(1)"; \
else echo "$(2)"; fi ;)
+# as-instr
+# Usage: cflags-y += $(call as-instr, instr, option1, option2)
+
+as-instr = $(shell if echo -e "$(1)" | $(AS) -Z -o /dev/null \
+ 2>&1 >/dev/null ; then echo "$(2)"; else echo "$(3)"; fi;)
+
# cc-option
# Usage: cflags-y += $(call cc-option, -march=winchip-c6, -march=i586)
Index: linux/arch/i386/Makefile
===================================================================
--- linux.orig/arch/i386/Makefile
+++ linux/arch/i386/Makefile
@@ -46,6 +46,9 @@ cflags-y += -ffreestanding
# a lot more stack due to the lack of sharing of stacklots:
CFLAGS += $(shell if [ $(call cc-version) -lt 0400 ] ; then echo $(call cc-option,-fno-unit-at-a-time); fi ;)
+# do binutils support CFI?
+cflags-y += $(call as-instr,.cfi_startproc\n.cfi_endproc,-DCONFIG_AS_CFI=1,)
+
CFLAGS += $(cflags-y)
# Default subarch .c files
^ permalink raw reply [flat|nested] 199+ messages in thread* [PATCH for review] [68/145] x86_64: Remove obsolete CVS $ from assembler files in arch/x86_64/kernel/*
[not found] <20060810 935.775038000@suse.de>
` (66 preceding siblings ...)
2006-08-10 19:36 ` [PATCH for review] [67/145] x86_64: Detect CFI support in the assembler at runtime Andi Kleen
@ 2006-08-10 19:36 ` Andi Kleen
2006-08-10 19:36 ` [PATCH for review] [69/145] x86_64: Disable DAC on VIA PCI bridges Andi Kleen
` (77 subsequent siblings)
145 siblings, 0 replies; 199+ messages in thread
From: Andi Kleen @ 2006-08-10 19:36 UTC (permalink / raw)
r
CVS hasn't been used for a long time for them.
Signed-off-by: Andi Kleen <ak@suse.de>
---
arch/x86_64/kernel/entry.S | 2 --
arch/x86_64/kernel/head.S | 2 --
2 files changed, 4 deletions(-)
Index: linux/arch/x86_64/kernel/entry.S
===================================================================
--- linux.orig/arch/x86_64/kernel/entry.S
+++ linux/arch/x86_64/kernel/entry.S
@@ -4,8 +4,6 @@
* Copyright (C) 1991, 1992 Linus Torvalds
* Copyright (C) 2000, 2001, 2002 Andi Kleen SuSE Labs
* Copyright (C) 2000 Pavel Machek <pavel@suse.cz>
- *
- * $Id$
*/
/*
Index: linux/arch/x86_64/kernel/head.S
===================================================================
--- linux.orig/arch/x86_64/kernel/head.S
+++ linux/arch/x86_64/kernel/head.S
@@ -5,8 +5,6 @@
* Copyright (C) 2000 Pavel Machek <pavel@suse.cz>
* Copyright (C) 2000 Karsten Keil <kkeil@suse.de>
* Copyright (C) 2001,2002 Andi Kleen <ak@suse.de>
- *
- * $Id: head.S,v 1.49 2002/03/19 17:39:25 ak Exp $
*/
^ permalink raw reply [flat|nested] 199+ messages in thread* [PATCH for review] [69/145] x86_64: Disable DAC on VIA PCI bridges
[not found] <20060810 935.775038000@suse.de>
` (67 preceding siblings ...)
2006-08-10 19:36 ` [PATCH for review] [68/145] x86_64: Remove obsolete CVS $ from assembler files in arch/x86_64/kernel/* Andi Kleen
@ 2006-08-10 19:36 ` Andi Kleen
[not found] ` <20060810205554.GE4745@rhun.haifa.ibm.com>
2006-08-10 19:36 ` [PATCH for review] [70/145] x86_64: initialize end of memory variables as early as possible Andi Kleen
` (76 subsequent siblings)
145 siblings, 1 reply; 199+ messages in thread
From: Andi Kleen @ 2006-08-10 19:36 UTC (permalink / raw)
r
Because of several reports that it doesn't work
TBD needs a real confirmation this fixes the problem
TBD needs more testing
Signed-off-by: Andi Kleen <ak@suse.de>
---
Documentation/x86_64/boot-options.txt | 4 +++
arch/x86_64/kernel/pci-dma.c | 42 ++++++++++++++++++++++++++++++++++
2 files changed, 46 insertions(+)
Index: linux/Documentation/x86_64/boot-options.txt
===================================================================
--- linux.orig/Documentation/x86_64/boot-options.txt
+++ linux/Documentation/x86_64/boot-options.txt
@@ -199,6 +199,10 @@ IOMMU
allowed overwrite iommu off workarounds for specific chipsets.
soft Use software bounce buffering (default for Intel machines)
noaperture Don't touch the aperture for AGP.
+ allowdac Allow DMA >4GB - default selected based on chipset bugs
+ When off all DMA over >4GB is forced through an IOMMU or bounce
+ buffering.
+ nodac Forbid DMA >4GB
swiotlb=pages[,force]
Index: linux/arch/x86_64/kernel/pci-dma.c
===================================================================
--- linux.orig/arch/x86_64/kernel/pci-dma.c
+++ linux/arch/x86_64/kernel/pci-dma.c
@@ -170,11 +170,47 @@ void dma_free_coherent(struct device *de
}
EXPORT_SYMBOL(dma_free_coherent);
+static int allow_dac;
+
+static int bridge_from_vendor(struct device *dev, u16 vendor)
+{
+#ifdef CONFIG_PCI
+ struct pci_bus *bus;
+ if (dev->bus != &pci_bus_type)
+ return 0;
+ bus = to_pci_dev(dev)->bus;
+ /* RED-PEN
+ Assumes no locking is needed on these lists because someone
+ should hold a reference count on the target device.
+ Correct assumption? */
+ while (bus != NULL) {
+ if (bus->self && bus->self->vendor == vendor)
+ return 1;
+ bus = bus->parent;
+ }
+#endif
+ return 0;
+}
+
int dma_supported(struct device *dev, u64 mask)
{
if (dma_ops->dma_supported)
return dma_ops->dma_supported(dev, mask);
+ if (mask > DMA_32BIT_MASK) {
+ /* Some VIA bridges seem to have trouble with Double Address
+ Cycle. Disable it behind them all for now. The driver
+ should fall back to non DAC. */
+ if (bridge_from_vendor(dev, PCI_VENDOR_ID_VIA) && !allow_dac) {
+ printk(KERN_INFO
+ "PCI: %s disallowing DAC because of VIA bridge.\n",
+ dev->bus_id);
+ return 0;
+ }
+ if (allow_dac < 0)
+ return 0;
+ }
+
/* Copied from i386. Doesn't make much sense, because it will
only work for pci_alloc_coherent.
The caller just has to use GFP_DMA in this case. */
@@ -231,6 +267,8 @@ EXPORT_SYMBOL(dma_set_mask);
allowed overwrite iommu off workarounds for specific chipsets.
soft Use software bounce buffering (default for Intel machines)
noaperture Don't touch the aperture for AGP.
+ allowdac Allow DMA >4GB - default selected based on chipset bugs
+ nodac Forbid DMA >4GB
*/
__init int iommu_setup(char *p)
{
@@ -264,6 +302,10 @@ __init int iommu_setup(char *p)
iommu_merge = 0;
if (!strncmp(p, "forcesac",8))
iommu_sac_force = 1;
+ if (!strncmp(p, "allowdac", 8))
+ allow_dac = 1;
+ if (!strncmp(p, "nodac", 5))
+ allow_dac = -1;
#ifdef CONFIG_SWIOTLB
if (!strncmp(p, "soft",4))
^ permalink raw reply [flat|nested] 199+ messages in thread* [PATCH for review] [70/145] x86_64: initialize end of memory variables as early as possible
[not found] <20060810 935.775038000@suse.de>
` (68 preceding siblings ...)
2006-08-10 19:36 ` [PATCH for review] [69/145] x86_64: Disable DAC on VIA PCI bridges Andi Kleen
@ 2006-08-10 19:36 ` Andi Kleen
2006-08-10 19:36 ` [PATCH for review] [71/145] x86_64: remove int_delivery_dest Andi Kleen
` (75 subsequent siblings)
145 siblings, 0 replies; 199+ messages in thread
From: Andi Kleen @ 2006-08-10 19:36 UTC (permalink / raw)
r
From: "Jan Beulich" <jbeulich@novell.com>
While an earlier patch already did a small step into that direction,
this patch moves initialization of all memory end variables to as
early as possible, so that dependent code doesn't need to check
whether these variables have already been set.
Also, remove a misleading (perhaps just outdated) comment, and make
static a variable only used in a single file.
Signed-off-by: Jan Beulich <jbeulich@novell.com>
Signed-off-by: Andi Kleen <ak@suse.de>
---
arch/x86_64/kernel/e820.c | 2 +-
arch/x86_64/kernel/setup.c | 7 ++++++-
arch/x86_64/mm/init.c | 6 ------
3 files changed, 7 insertions(+), 8 deletions(-)
Index: linux/arch/x86_64/kernel/e820.c
===================================================================
--- linux.orig/arch/x86_64/kernel/e820.c
+++ linux/arch/x86_64/kernel/e820.c
@@ -40,7 +40,7 @@ unsigned long end_pfn_map;
/*
* Last pfn which the user wants to use.
*/
-unsigned long end_user_pfn = MAXMEM>>PAGE_SHIFT;
+static unsigned long __initdata end_user_pfn = MAXMEM>>PAGE_SHIFT;
extern struct resource code_resource, data_resource;
Index: linux/arch/x86_64/kernel/setup.c
===================================================================
--- linux.orig/arch/x86_64/kernel/setup.c
+++ linux/arch/x86_64/kernel/setup.c
@@ -556,7 +556,7 @@ void __init setup_arch(char **cmdline_p)
* we are rounding upwards:
*/
end_pfn = e820_end_of_ram();
- num_physpages = end_pfn; /* for pfn_valid */
+ num_physpages = end_pfn;
check_efer();
@@ -576,6 +576,11 @@ void __init setup_arch(char **cmdline_p)
acpi_boot_table_init();
#endif
+ /* How many end-of-memory variables you have, grandma! */
+ max_low_pfn = end_pfn;
+ max_pfn = end_pfn;
+ high_memory = (void *)__va(end_pfn * PAGE_SIZE - 1) + 1;
+
#ifdef CONFIG_ACPI_NUMA
/*
* Parse SRAT to discover nodes.
Index: linux/arch/x86_64/mm/init.c
===================================================================
--- linux.orig/arch/x86_64/mm/init.c
+++ linux/arch/x86_64/mm/init.c
@@ -597,12 +597,6 @@ void __init mem_init(void)
pci_iommu_alloc();
- /* How many end-of-memory variables you have, grandma! */
- max_low_pfn = end_pfn;
- max_pfn = end_pfn;
- num_physpages = end_pfn;
- high_memory = (void *) __va(end_pfn * PAGE_SIZE);
-
/* clear the zero-page */
memset(empty_zero_page, 0, PAGE_SIZE);
^ permalink raw reply [flat|nested] 199+ messages in thread* [PATCH for review] [71/145] x86_64: remove int_delivery_dest
[not found] <20060810 935.775038000@suse.de>
` (69 preceding siblings ...)
2006-08-10 19:36 ` [PATCH for review] [70/145] x86_64: initialize end of memory variables as early as possible Andi Kleen
@ 2006-08-10 19:36 ` Andi Kleen
2006-08-10 19:36 ` [PATCH for review] [72/145] i386: initialize end-of-memory variables as early as possible Andi Kleen
` (74 subsequent siblings)
145 siblings, 0 replies; 199+ messages in thread
From: Andi Kleen @ 2006-08-10 19:36 UTC (permalink / raw)
r
From: "Jan Beulich" <jbeulich@novell.com>
The genapic field and the accessor macro weren't used anywhere.
Signed-off-by: Jan Beulich <jbeulich@novell.com>
Signed-off-by: Andi Kleen <ak@suse.de>
---
arch/x86_64/kernel/genapic_cluster.c | 1 -
arch/x86_64/kernel/genapic_flat.c | 2 --
include/asm-x86_64/genapic.h | 1 -
include/asm-x86_64/mach_apic.h | 1 -
4 files changed, 5 deletions(-)
Index: linux/arch/x86_64/kernel/genapic_cluster.c
===================================================================
--- linux.orig/arch/x86_64/kernel/genapic_cluster.c
+++ linux/arch/x86_64/kernel/genapic_cluster.c
@@ -118,7 +118,6 @@ struct genapic apic_cluster = {
.name = "clustered",
.int_delivery_mode = dest_Fixed,
.int_dest_mode = (APIC_DEST_PHYSICAL != 0),
- .int_delivery_dest = APIC_DEST_PHYSICAL | APIC_DM_FIXED,
.target_cpus = cluster_target_cpus,
.apic_id_registered = cluster_apic_id_registered,
.init_apic_ldr = cluster_init_apic_ldr,
Index: linux/arch/x86_64/kernel/genapic_flat.c
===================================================================
--- linux.orig/arch/x86_64/kernel/genapic_flat.c
+++ linux/arch/x86_64/kernel/genapic_flat.c
@@ -121,7 +121,6 @@ struct genapic apic_flat = {
.name = "flat",
.int_delivery_mode = dest_LowestPrio,
.int_dest_mode = (APIC_DEST_LOGICAL != 0),
- .int_delivery_dest = APIC_DEST_LOGICAL | APIC_DM_LOWEST,
.target_cpus = flat_target_cpus,
.apic_id_registered = flat_apic_id_registered,
.init_apic_ldr = flat_init_apic_ldr,
@@ -180,7 +179,6 @@ struct genapic apic_physflat = {
.name = "physical flat",
.int_delivery_mode = dest_Fixed,
.int_dest_mode = (APIC_DEST_PHYSICAL != 0),
- .int_delivery_dest = APIC_DEST_PHYSICAL | APIC_DM_FIXED,
.target_cpus = physflat_target_cpus,
.apic_id_registered = flat_apic_id_registered,
.init_apic_ldr = flat_init_apic_ldr,/*not needed, but shouldn't hurt*/
Index: linux/include/asm-x86_64/genapic.h
===================================================================
--- linux.orig/include/asm-x86_64/genapic.h
+++ linux/include/asm-x86_64/genapic.h
@@ -16,7 +16,6 @@ struct genapic {
char *name;
u32 int_delivery_mode;
u32 int_dest_mode;
- u32 int_delivery_dest; /* for quick IPIs */
int (*apic_id_registered)(void);
cpumask_t (*target_cpus)(void);
void (*init_apic_ldr)(void);
Index: linux/include/asm-x86_64/mach_apic.h
===================================================================
--- linux.orig/include/asm-x86_64/mach_apic.h
+++ linux/include/asm-x86_64/mach_apic.h
@@ -16,7 +16,6 @@
#define INT_DELIVERY_MODE (genapic->int_delivery_mode)
#define INT_DEST_MODE (genapic->int_dest_mode)
-#define INT_DELIVERY_DEST (genapic->int_delivery_dest)
#define TARGET_CPUS (genapic->target_cpus())
#define apic_id_registered (genapic->apic_id_registered)
#define init_apic_ldr (genapic->init_apic_ldr)
^ permalink raw reply [flat|nested] 199+ messages in thread* [PATCH for review] [72/145] i386: initialize end-of-memory variables as early as possible
[not found] <20060810 935.775038000@suse.de>
` (70 preceding siblings ...)
2006-08-10 19:36 ` [PATCH for review] [71/145] x86_64: remove int_delivery_dest Andi Kleen
@ 2006-08-10 19:36 ` Andi Kleen
2006-08-10 19:36 ` [PATCH for review] [73/145] x86_64: Add stack documentation document from Keith Owens Andi Kleen
` (73 subsequent siblings)
145 siblings, 0 replies; 199+ messages in thread
From: Andi Kleen @ 2006-08-10 19:36 UTC (permalink / raw)
r
From: "Jan Beulich" <jbeulich@novell.com>
Move initialization of all memory end variables to as early as
possible, so that dependent code doesn't need to check whether these
variables have already been set.
Change the range check in kunmap_atomic to actually make use of this
so that the no-mapping-estabished path (under CONFIG_DEBUG_HIGHMEM)
gets used only when the address is inside the lowmem area (and BUG()
otherwise).
Signed-off-by: Jan Beulich <jbeulich@novell.com>
Signed-off-by: Andi Kleen <ak@suse.de>
---
arch/i386/kernel/setup.c | 8 ++++++++
arch/i386/mm/discontig.c | 5 +++++
arch/i386/mm/highmem.c | 2 +-
arch/i386/mm/init.c | 20 --------------------
4 files changed, 14 insertions(+), 21 deletions(-)
Index: linux/arch/i386/kernel/setup.c
===================================================================
--- linux.orig/arch/i386/kernel/setup.c
+++ linux/arch/i386/kernel/setup.c
@@ -1170,6 +1170,14 @@ static unsigned long __init setup_memory
}
printk(KERN_NOTICE "%ldMB HIGHMEM available.\n",
pages_to_mb(highend_pfn - highstart_pfn));
+ num_physpages = highend_pfn;
+ high_memory = (void *) __va(highstart_pfn * PAGE_SIZE - 1) + 1;
+#else
+ num_physpages = max_low_pfn;
+ high_memory = (void *) __va(max_low_pfn * PAGE_SIZE - 1) + 1;
+#endif
+#ifdef CONFIG_FLATMEM
+ max_mapnr = num_physpages;
#endif
printk(KERN_NOTICE "%ldMB LOWMEM available.\n",
pages_to_mb(max_low_pfn));
Index: linux/arch/i386/mm/discontig.c
===================================================================
--- linux.orig/arch/i386/mm/discontig.c
+++ linux/arch/i386/mm/discontig.c
@@ -313,6 +313,11 @@ unsigned long __init setup_memory(void)
highstart_pfn = system_max_low_pfn;
printk(KERN_NOTICE "%ldMB HIGHMEM available.\n",
pages_to_mb(highend_pfn - highstart_pfn));
+ num_physpages = highend_pfn;
+ high_memory = (void *) __va(highstart_pfn * PAGE_SIZE - 1) + 1;
+#else
+ num_physpages = system_max_low_pfn;
+ high_memory = (void *) __va(system_max_low_pfn * PAGE_SIZE - 1) + 1;
#endif
printk(KERN_NOTICE "%ldMB LOWMEM available.\n",
pages_to_mb(system_max_low_pfn));
Index: linux/arch/i386/mm/highmem.c
===================================================================
--- linux.orig/arch/i386/mm/highmem.c
+++ linux/arch/i386/mm/highmem.c
@@ -54,7 +54,7 @@ void kunmap_atomic(void *kvaddr, enum km
unsigned long vaddr = (unsigned long) kvaddr & PAGE_MASK;
enum fixed_addresses idx = type + KM_TYPE_NR*smp_processor_id();
- if (vaddr < FIXADDR_START) { // FIXME
+ if (vaddr >= PAGE_OFFSET && vaddr < (unsigned long)high_memory) {
dec_preempt_count();
preempt_check_resched();
return;
Index: linux/arch/i386/mm/init.c
===================================================================
--- linux.orig/arch/i386/mm/init.c
+++ linux/arch/i386/mm/init.c
@@ -552,18 +552,6 @@ static void __init test_wp_bit(void)
}
}
-static void __init set_max_mapnr_init(void)
-{
-#ifdef CONFIG_HIGHMEM
- num_physpages = highend_pfn;
-#else
- num_physpages = max_low_pfn;
-#endif
-#ifdef CONFIG_FLATMEM
- max_mapnr = num_physpages;
-#endif
-}
-
static struct kcore_list kcore_mem, kcore_vmalloc;
void __init mem_init(void)
@@ -590,14 +578,6 @@ void __init mem_init(void)
}
#endif
- set_max_mapnr_init();
-
-#ifdef CONFIG_HIGHMEM
- high_memory = (void *) __va(highstart_pfn * PAGE_SIZE - 1) + 1;
-#else
- high_memory = (void *) __va(max_low_pfn * PAGE_SIZE - 1) + 1;
-#endif
-
/* this will put all low memory onto the freelists */
totalram_pages += free_all_bootmem();
^ permalink raw reply [flat|nested] 199+ messages in thread* [PATCH for review] [73/145] x86_64: Add stack documentation document from Keith Owens
[not found] <20060810 935.775038000@suse.de>
` (71 preceding siblings ...)
2006-08-10 19:36 ` [PATCH for review] [72/145] i386: initialize end-of-memory variables as early as possible Andi Kleen
@ 2006-08-10 19:36 ` Andi Kleen
2006-08-10 19:36 ` [PATCH for review] [74/145] x86_64: Calgary IOMMU: rearrange 'struct iommu_table' members Andi Kleen
` (72 subsequent siblings)
145 siblings, 0 replies; 199+ messages in thread
From: Andi Kleen @ 2006-08-10 19:36 UTC (permalink / raw)
r
Describes the stack organization on x86-64.
I changed it a bit and removed some obsolete information and the
questions.
Cc: kaos@sgi.com
Signed-off-by: Andi Kleen <ak@suse.de>
---
Documentation/x86_64/kernel-stacks | 99 +++++++++++++++++++++++++++++++++++++
1 files changed, 99 insertions(+)
Index: linux/Documentation/x86_64/kernel-stacks
===================================================================
--- /dev/null
+++ linux/Documentation/x86_64/kernel-stacks
@@ -0,0 +1,99 @@
+Most of the text from Keith Owens, hacked by AK
+
+x86_64 page size (PAGE_SIZE) is 4K.
+
+Like all other architectures, x86_64 has a kernel stack for every
+active thread. These thread stacks are THREAD_SIZE (2*PAGE_SIZE) big.
+These stacks contain useful data as long as a thread is alive or a
+zombie. While the thread is in user space the kernel stack is empty
+except for the thread_info structure at the bottom.
+
+In addition to the per thread stacks, there are specialized stacks
+associated with each cpu. These stacks are only used while the kernel
+is in control on that cpu, when a cpu returns to user space the
+specialized stacks contain no useful data. The main cpu stacks is
+
+* Interrupt stack. IRQSTACKSIZE
+
+ Used for external hardware interrupts. If this is the first external
+ hardware interrupt (i.e. not a nested hardware interrupt) then the
+ kernel switches from the current task to the interrupt stack. Like
+ the split thread and interrupt stacks on i386 (with CONFIG_4KSTACKS),
+ this gives more room for kernel interrupt processing without having
+ to increase the size of every per thread stack.
+
+ The interrupt stack is also used when processing a softirq.
+
+Switching to the kernel interrupt stack is done by software based on a
+per CPU interrupt nest counter. This is needed because x86-64 "IST"
+hardware stacks cannot nest without races.
+
+x86_64 also has a feature which is not available on i386, the ability
+to automatically switch to a new stack for designated events such as
+double fault or NMI, which makes it easier to handle these unusual
+events on x86_64. This feature is called the Interrupt Stack Table
+(IST). There can be up to 7 IST entries per cpu. The IST code is an
+index into the Task State Segment (TSS), the IST entries in the TSS
+point to dedicated stacks, each stack can be a different size.
+
+An IST is selected by an non-zero value in the IST field of an
+interrupt-gate descriptor. When an interrupt occurs and the hardware
+loads such a descriptor, the hardware automatically sets the new stack
+pointer based on the IST value, then invokes the interrupt handler. If
+software wants to allow nested IST interrupts then the handler must
+adjust the IST values on entry to and exit from the interrupt handler.
+(this is occasionally done, e.g. for debug exceptions)
+
+Events with different IST codes (i.e. with different stacks) can be
+nested. For example, a debug interrupt can safely be interrupted by an
+NMI. arch/x86_64/kernel/entry.S::paranoidentry adjusts the stack
+pointers on entry to and exit from all IST events, in theory allowing
+IST events with the same code to be nested. However in most cases, the
+stack size allocated to an IST assumes no nesting for the same code.
+If that assumption is ever broken then the stacks will become corrupt.
+
+The currently assigned IST stacks are :-
+
+* STACKFAULT_STACK. EXCEPTION_STKSZ (PAGE_SIZE).
+
+ Used for interrupt 12 - Stack Fault Exception (#SS).
+
+ This allows to recover from invalid stack segments. Rarely
+ happens.
+
+* DOUBLEFAULT_STACK. EXCEPTION_STKSZ (PAGE_SIZE).
+
+ Used for interrupt 8 - Double Fault Exception (#DF).
+
+ Invoked when handling a exception causes another exception. Happens
+ when the kernel is very confused (e.g. kernel stack pointer corrupt)
+ Using a separate stack allows to recover from it well enough in many
+ cases to still output an oops.
+
+* NMI_STACK. EXCEPTION_STKSZ (PAGE_SIZE).
+
+ Used for non-maskable interrupts (NMI).
+
+ NMI can be delivered at any time, including when the kernel is in the
+ middle of switching stacks. Using IST for NMI events avoids making
+ assumptions about the previous state of the kernel stack.
+
+* DEBUG_STACK. DEBUG_STKSZ
+
+ Used for hardware debug interrupts (interrupt 1) and for software
+ debug interrupts (INT3).
+
+ When debugging a kernel, debug interrupts (both hardware and
+ software) can occur at any time. Using IST for these interrupts
+ avoids making assumptions about the previous state of the kernel
+ stack.
+
+* MCE_STACK. EXCEPTION_STKSZ (PAGE_SIZE).
+
+ Used for interrupt 18 - Machine Check Exception (#MC).
+
+ MCE can be delivered at any time, including when the kernel is in the
+ middle of switching stacks. Using IST for MCE events avoids making
+ assumptions about the previous state of the kernel stack.
+
+For more details see the Intel IA32 or AMD AMD64 architecture manuals.
^ permalink raw reply [flat|nested] 199+ messages in thread* [PATCH for review] [74/145] x86_64: Calgary IOMMU: rearrange 'struct iommu_table' members
[not found] <20060810 935.775038000@suse.de>
` (72 preceding siblings ...)
2006-08-10 19:36 ` [PATCH for review] [73/145] x86_64: Add stack documentation document from Keith Owens Andi Kleen
@ 2006-08-10 19:36 ` Andi Kleen
2006-08-10 19:36 ` [PATCH for review] [75/145] x86_64: Calgary IOMMU: consolidate per bus data structures Andi Kleen
` (71 subsequent siblings)
145 siblings, 0 replies; 199+ messages in thread
From: Andi Kleen @ 2006-08-10 19:36 UTC (permalink / raw)
r
From: Muli Ben-Yehuda <muli@il.ibm.com>
Rearrange struct members loosely based on size for improved alignment
and to save a few bytes.
Signed-off-by: Muli Ben-Yehuda <muli@il.ibm.com>
Signed-off-by: Jon Mason <jdmason@us.ibm.com>
Signed-off-by: Andi Kleen <ak@suse.de>
---
include/asm-x86_64/calgary.h | 6 +++---
1 files changed, 3 insertions(+), 3 deletions(-)
Index: linux/include/asm-x86_64/calgary.h
===================================================================
--- linux.orig/include/asm-x86_64/calgary.h
+++ linux/include/asm-x86_64/calgary.h
@@ -34,12 +34,12 @@ struct iommu_table {
unsigned long it_base; /* mapped address of tce table */
unsigned long it_hint; /* Hint for next alloc */
unsigned long *it_map; /* A simple allocation bitmap for now */
+ void __iomem *bbar; /* Bridge BAR */
+ u64 tar_val; /* Table Address Register */
+ struct timer_list watchdog_timer;
spinlock_t it_lock; /* Protects it_map */
unsigned int it_size; /* Size of iommu table in entries */
unsigned char it_busno; /* Bus number this table belongs to */
- void __iomem *bbar;
- u64 tar_val;
- struct timer_list watchdog_timer;
};
#define TCE_TABLE_SIZE_UNSPECIFIED ~0
^ permalink raw reply [flat|nested] 199+ messages in thread* [PATCH for review] [75/145] x86_64: Calgary IOMMU: consolidate per bus data structures
[not found] <20060810 935.775038000@suse.de>
` (73 preceding siblings ...)
2006-08-10 19:36 ` [PATCH for review] [74/145] x86_64: Calgary IOMMU: rearrange 'struct iommu_table' members Andi Kleen
@ 2006-08-10 19:36 ` Andi Kleen
2006-08-10 19:36 ` [PATCH for review] [76/145] x86_64: Calgary IOMMU: break out of pci_find_device_reverse if dev not found Andi Kleen
` (70 subsequent siblings)
145 siblings, 0 replies; 199+ messages in thread
From: Andi Kleen @ 2006-08-10 19:36 UTC (permalink / raw)
r
From: Muli Ben-Yehuda <muli@il.ibm.com>
Move the tce_table_kva array, disabled bitmap and bus_to_phb array
into a new per bus 'struct calgary_bus_info'. Also slightly reorganize
build_tce_table and tce_table_setparms to avoid exporting bus_info to
tce.c.
Signed-off-by: Muli Ben-Yehuda <muli@il.ibm.com>
Signed-off-by: Jon Mason <jdmason@us.ibm.com>
Signed-off-by: Andi Kleen <ak@suse.de>
---
arch/x86_64/kernel/pci-calgary.c | 53 ++++++++++++++++++++-------------------
arch/x86_64/kernel/tce.c | 10 -------
include/asm-x86_64/tce.h | 1
3 files changed, 28 insertions(+), 36 deletions(-)
Index: linux/arch/x86_64/kernel/pci-calgary.c
===================================================================
--- linux.orig/arch/x86_64/kernel/pci-calgary.c
+++ linux/arch/x86_64/kernel/pci-calgary.c
@@ -111,17 +111,17 @@ static const unsigned long phb_offsets[]
0xB000 /* PHB3 */
};
-static char bus_to_phb[MAX_PHB_BUS_NUM];
-void* tce_table_kva[MAX_PHB_BUS_NUM];
unsigned int specified_table_size = TCE_TABLE_SIZE_UNSPECIFIED;
static int translate_empty_slots __read_mostly = 0;
static int calgary_detected __read_mostly = 0;
-/*
- * the bitmap of PHBs the user requested that we disable
- * translation on.
- */
-static DECLARE_BITMAP(translation_disabled, MAX_PHB_BUS_NUM);
+struct calgary_bus_info {
+ void *tce_space;
+ int translation_disabled;
+ signed char phbid;
+};
+
+static struct calgary_bus_info bus_info[MAX_PHB_BUS_NUM] = { { NULL, 0, 0 }, };
static void tce_cache_blast(struct iommu_table *tbl);
@@ -149,7 +149,7 @@ static inline unsigned int num_dma_pages
static inline int translate_phb(struct pci_dev* dev)
{
- int disabled = test_bit(dev->bus->number, translation_disabled);
+ int disabled = bus_info[dev->bus->number].translation_disabled;
return !disabled;
}
@@ -454,7 +454,7 @@ static struct dma_mapping_ops calgary_dm
static inline int busno_to_phbid(unsigned char num)
{
- return bus_to_phb[num];
+ return bus_info[num].phbid;
}
static inline unsigned long split_queue_offset(unsigned char num)
@@ -631,6 +631,10 @@ static int __init calgary_setup_tar(stru
if (ret)
return ret;
+ tbl = dev->sysdata;
+ tbl->it_base = (unsigned long)bus_info[dev->bus->number].tce_space;
+ tce_free(tbl, 0, tbl->it_size);
+
calgary_reserve_regions(dev);
/* set TARs for each PHB */
@@ -824,7 +828,7 @@ static int __init calgary_init(void)
calgary_init_one_nontraslated(dev);
continue;
}
- if (!tce_table_kva[dev->bus->number] && !translate_empty_slots) {
+ if (!bus_info[dev->bus->number].tce_space && !translate_empty_slots) {
pci_dev_put(dev);
continue;
}
@@ -844,7 +848,7 @@ error:
pci_dev_put(dev);
continue;
}
- if (!tce_table_kva[dev->bus->number] && !translate_empty_slots)
+ if (!bus_info[dev->bus->number].tce_space && !translate_empty_slots)
continue;
calgary_disable_translation(dev);
calgary_free_tar(dev);
@@ -894,9 +898,8 @@ void __init detect_calgary(void)
for (bus = 0; bus < MAX_PHB_BUS_NUM; bus++) {
int dev;
-
- tce_table_kva[bus] = NULL;
- bus_to_phb[bus] = -1;
+ struct calgary_bus_info *info = &bus_info[bus];
+ info->phbid = -1;
if (read_pci_config(bus, 0, 0, 0) != PCI_VENDOR_DEVICE_ID_CALGARY)
continue;
@@ -907,12 +910,9 @@ void __init detect_calgary(void)
*/
phb = (phb + 1) % PHBS_PER_CALGARY;
- if (test_bit(bus, translation_disabled)) {
- printk(KERN_INFO "Calgary: translation is disabled for "
- "PHB 0x%x\n", bus);
- /* skip this phb, don't allocate a tbl for it */
+ if (info->translation_disabled)
continue;
- }
+
/*
* Scan the slots of the PCI bus to see if there is a device present.
* The parent bus will be the zero-ith device, so start at 1.
@@ -923,8 +923,8 @@ void __init detect_calgary(void)
tbl = alloc_tce_table();
if (!tbl)
goto cleanup;
- tce_table_kva[bus] = tbl;
- bus_to_phb[bus] = phb;
+ info->tce_space = tbl;
+ info->phbid = phb;
calgary_found = 1;
break;
}
@@ -940,9 +940,12 @@ void __init detect_calgary(void)
return;
cleanup:
- for (--bus; bus >= 0; --bus)
- if (tce_table_kva[bus])
- free_tce_table(tce_table_kva[bus]);
+ for (--bus; bus >= 0; --bus) {
+ struct calgary_bus_info *info = &bus_info[bus];
+
+ if (info->tce_space)
+ free_tce_table(info->tce_space);
+ }
}
int __init calgary_iommu_init(void)
@@ -1016,7 +1019,7 @@ static int __init calgary_parse_options(
if (bridge < MAX_PHB_BUS_NUM) {
printk(KERN_INFO "Calgary: disabling "
"translation for PHB 0x%x\n", bridge);
- set_bit(bridge, translation_disabled);
+ bus_info[bridge].translation_disabled = 1;
}
}
Index: linux/arch/x86_64/kernel/tce.c
===================================================================
--- linux.orig/arch/x86_64/kernel/tce.c
+++ linux/arch/x86_64/kernel/tce.c
@@ -106,14 +106,6 @@ static int tce_table_setparms(struct pci
/* set the tce table size - measured in entries */
tbl->it_size = table_size_to_number_of_entries(specified_table_size);
- tbl->it_base = (unsigned long)tce_table_kva[dev->bus->number];
- if (!tbl->it_base) {
- printk(KERN_ERR "Calgary: iommu_table_setparms: "
- "no table allocated?!\n");
- ret = -ENOMEM;
- goto done;
- }
-
/*
* number of bytes needed for the bitmap size in number of
* entries; we need one bit per entry
@@ -162,8 +154,6 @@ int build_tce_table(struct pci_dev *dev,
if (ret)
goto free_tbl;
- tce_free(tbl, 0, tbl->it_size);
-
tbl->bbar = bbar;
/*
Index: linux/include/asm-x86_64/tce.h
===================================================================
--- linux.orig/include/asm-x86_64/tce.h
+++ linux/include/asm-x86_64/tce.h
@@ -24,7 +24,6 @@
#ifndef _ASM_X86_64_TCE_H
#define _ASM_X86_64_TCE_H
-extern void* tce_table_kva[];
extern unsigned int specified_table_size;
struct iommu_table;
^ permalink raw reply [flat|nested] 199+ messages in thread* [PATCH for review] [76/145] x86_64: Calgary IOMMU: break out of pci_find_device_reverse if dev not found
[not found] <20060810 935.775038000@suse.de>
` (74 preceding siblings ...)
2006-08-10 19:36 ` [PATCH for review] [75/145] x86_64: Calgary IOMMU: consolidate per bus data structures Andi Kleen
@ 2006-08-10 19:36 ` Andi Kleen
2006-08-10 19:36 ` [PATCH for review] [77/145] x86_64: Calgary IOMMU: fix error path memleak in calgary_free_tar Andi Kleen
` (69 subsequent siblings)
145 siblings, 0 replies; 199+ messages in thread
From: Andi Kleen @ 2006-08-10 19:36 UTC (permalink / raw)
r
From: Muli Ben-Yehuda <muli@il.ibm.com>
Signed-off-by: Muli Ben-Yehuda <muli@il.ibm.com>
Signed-off-by: Jon Mason <jdmason@us.ibm.com>
Signed-off-by: Andi Kleen <ak@suse.de>
---
arch/x86_64/kernel/pci-calgary.c | 2 ++
1 files changed, 2 insertions(+)
Index: linux/arch/x86_64/kernel/pci-calgary.c
===================================================================
--- linux.orig/arch/x86_64/kernel/pci-calgary.c
+++ linux/arch/x86_64/kernel/pci-calgary.c
@@ -844,6 +844,8 @@ error:
dev = pci_find_device_reverse(PCI_VENDOR_ID_IBM,
PCI_DEVICE_ID_IBM_CALGARY,
dev);
+ if (!dev)
+ break;
if (!translate_phb(dev)) {
pci_dev_put(dev);
continue;
^ permalink raw reply [flat|nested] 199+ messages in thread* [PATCH for review] [77/145] x86_64: Calgary IOMMU: fix error path memleak in calgary_free_tar
[not found] <20060810 935.775038000@suse.de>
` (75 preceding siblings ...)
2006-08-10 19:36 ` [PATCH for review] [76/145] x86_64: Calgary IOMMU: break out of pci_find_device_reverse if dev not found Andi Kleen
@ 2006-08-10 19:36 ` Andi Kleen
2006-08-10 19:36 ` [PATCH for review] [78/145] x86_64: Calgary IOMMU: fix reference counting of Calgary PCI devices Andi Kleen
` (68 subsequent siblings)
145 siblings, 0 replies; 199+ messages in thread
From: Andi Kleen @ 2006-08-10 19:36 UTC (permalink / raw)
r
From: Muli Ben-Yehuda <muli@il.ibm.com>
We were freeing the iommu_table and leaking the bitmap pages. Also
rename it to calgary_free_bus, which is more accurate.
Signed-off-by: Muli Ben-Yehuda <muli@il.ibm.com>
Signed-off-by: Jon Mason <jdmason@us.ibm.com>
Signed-off-by: Andi Kleen <ak@suse.de>
---
arch/x86_64/kernel/pci-calgary.c | 12 ++++++++++--
1 files changed, 10 insertions(+), 2 deletions(-)
Index: linux/arch/x86_64/kernel/pci-calgary.c
===================================================================
--- linux.orig/arch/x86_64/kernel/pci-calgary.c
+++ linux/arch/x86_64/kernel/pci-calgary.c
@@ -658,11 +658,12 @@ static int __init calgary_setup_tar(stru
return 0;
}
-static void __init calgary_free_tar(struct pci_dev *dev)
+static void __init calgary_free_bus(struct pci_dev *dev)
{
u64 val64;
struct iommu_table *tbl = dev->sysdata;
void __iomem *target;
+ unsigned int bitmapsz;
target = calgary_reg(tbl->bbar, tar_offset(dev->bus->number));
val64 = be64_to_cpu(readq(target));
@@ -670,8 +671,15 @@ static void __init calgary_free_tar(stru
writeq(cpu_to_be64(val64), target);
readq(target); /* flush */
+ bitmapsz = tbl->it_size / BITS_PER_BYTE;
+ free_pages((unsigned long)tbl->it_map, get_order(bitmapsz));
+ tbl->it_map = NULL;
+
kfree(tbl);
dev->sysdata = NULL;
+
+ /* Can't free bootmem allocated memory after system is up :-( */
+ bus_info[dev->bus->number].tce_space = NULL;
}
static void calgary_watchdog(unsigned long data)
@@ -853,7 +861,7 @@ error:
if (!bus_info[dev->bus->number].tce_space && !translate_empty_slots)
continue;
calgary_disable_translation(dev);
- calgary_free_tar(dev);
+ calgary_free_bus(dev);
pci_dev_put(dev);
}
^ permalink raw reply [flat|nested] 199+ messages in thread* [PATCH for review] [78/145] x86_64: Calgary IOMMU: fix reference counting of Calgary PCI devices
[not found] <20060810 935.775038000@suse.de>
` (76 preceding siblings ...)
2006-08-10 19:36 ` [PATCH for review] [77/145] x86_64: Calgary IOMMU: fix error path memleak in calgary_free_tar Andi Kleen
@ 2006-08-10 19:36 ` Andi Kleen
2006-08-10 19:36 ` [PATCH for review] [79/145] x86_64: Calgary IOMMU: calgary_init_one_nontraslated() can return void Andi Kleen
` (67 subsequent siblings)
145 siblings, 0 replies; 199+ messages in thread
From: Andi Kleen @ 2006-08-10 19:36 UTC (permalink / raw)
r
From: Muli Ben-Yehuda <muli@il.ibm.com>
The pci_get_device() API decrements the reference count on the 'from'
parameter when it continues searching. Therefore, take a ref count on
Calgary bus when we initialize them in either translated or
non-translated mode.
Signed-off-by: Muli Ben-Yehuda <muli@il.ibm.com>
Signed-off-by: Jon Mason <jdmason@us.ibm.com>
Signed-off-by: Andi Kleen <ak@suse.de>
---
arch/x86_64/kernel/pci-calgary.c | 10 ++++++----
1 files changed, 6 insertions(+), 4 deletions(-)
Index: linux/arch/x86_64/kernel/pci-calgary.c
===================================================================
--- linux.orig/arch/x86_64/kernel/pci-calgary.c
+++ linux/arch/x86_64/kernel/pci-calgary.c
@@ -786,6 +786,7 @@ static inline unsigned int __init locate
static int __init calgary_init_one_nontraslated(struct pci_dev *dev)
{
+ pci_dev_get(dev);
dev->sysdata = NULL;
dev->bus->self = dev;
@@ -810,6 +811,7 @@ static int __init calgary_init_one(struc
if (ret)
goto iounmap;
+ pci_dev_get(dev);
dev->bus->self = dev;
calgary_enable_translation(dev);
@@ -836,10 +838,9 @@ static int __init calgary_init(void)
calgary_init_one_nontraslated(dev);
continue;
}
- if (!bus_info[dev->bus->number].tce_space && !translate_empty_slots) {
- pci_dev_put(dev);
+ if (!bus_info[dev->bus->number].tce_space && !translate_empty_slots)
continue;
- }
+
ret = calgary_init_one(dev);
if (ret)
goto error;
@@ -860,9 +861,10 @@ error:
}
if (!bus_info[dev->bus->number].tce_space && !translate_empty_slots)
continue;
+
calgary_disable_translation(dev);
calgary_free_bus(dev);
- pci_dev_put(dev);
+ pci_dev_put(dev); /* Undo calgary_init_one()'s pci_dev_get() */
}
return ret;
^ permalink raw reply [flat|nested] 199+ messages in thread* [PATCH for review] [79/145] x86_64: Calgary IOMMU: calgary_init_one_nontraslated() can return void
[not found] <20060810 935.775038000@suse.de>
` (77 preceding siblings ...)
2006-08-10 19:36 ` [PATCH for review] [78/145] x86_64: Calgary IOMMU: fix reference counting of Calgary PCI devices Andi Kleen
@ 2006-08-10 19:36 ` Andi Kleen
2006-08-10 19:36 ` [PATCH for review] [80/145] x86_64: Calgary IOMMU: save a bit of space in bus_info Andi Kleen
` (66 subsequent siblings)
145 siblings, 0 replies; 199+ messages in thread
From: Andi Kleen @ 2006-08-10 19:36 UTC (permalink / raw)
r
From: Muli Ben-Yehuda <muli@il.ibm.com>
Signed-off-by: Muli Ben-Yehuda <muli@il.ibm.com>
Signed-off-by: Jon Mason <jdmason@us.ibm.com>
Signed-off-by: Andi Kleen <ak@suse.de>
---
arch/x86_64/kernel/pci-calgary.c | 4 +---
1 files changed, 1 insertion(+), 3 deletions(-)
Index: linux/arch/x86_64/kernel/pci-calgary.c
===================================================================
--- linux.orig/arch/x86_64/kernel/pci-calgary.c
+++ linux/arch/x86_64/kernel/pci-calgary.c
@@ -784,13 +784,11 @@ static inline unsigned int __init locate
return address;
}
-static int __init calgary_init_one_nontraslated(struct pci_dev *dev)
+static void __init calgary_init_one_nontraslated(struct pci_dev *dev)
{
pci_dev_get(dev);
dev->sysdata = NULL;
dev->bus->self = dev;
-
- return 0;
}
static int __init calgary_init_one(struct pci_dev *dev)
^ permalink raw reply [flat|nested] 199+ messages in thread* [PATCH for review] [80/145] x86_64: Calgary IOMMU: save a bit of space in bus_info
[not found] <20060810 935.775038000@suse.de>
` (78 preceding siblings ...)
2006-08-10 19:36 ` [PATCH for review] [79/145] x86_64: Calgary IOMMU: calgary_init_one_nontraslated() can return void Andi Kleen
@ 2006-08-10 19:36 ` Andi Kleen
2006-08-10 19:36 ` [PATCH for review] [81/145] i386: Remove lock section support in mutex.h, semaphore.h Andi Kleen
` (65 subsequent siblings)
145 siblings, 0 replies; 199+ messages in thread
From: Andi Kleen @ 2006-08-10 19:36 UTC (permalink / raw)
r
From: Muli Ben-Yehuda <muli@il.ibm.com>
Make translation_disabled a uchar rather than an int
Signed-off-by: Muli Ben-Yehuda <muli@il.ibm.com>
Signed-off-by: Jon Mason <jdmason@us.ibm.com>
Signed-off-by: Andi Kleen <ak@suse.de>
---
arch/x86_64/kernel/pci-calgary.c | 2 +-
1 files changed, 1 insertion(+), 1 deletion(-)
Index: linux/arch/x86_64/kernel/pci-calgary.c
===================================================================
--- linux.orig/arch/x86_64/kernel/pci-calgary.c
+++ linux/arch/x86_64/kernel/pci-calgary.c
@@ -117,7 +117,7 @@ static int calgary_detected __read_mostl
struct calgary_bus_info {
void *tce_space;
- int translation_disabled;
+ unsigned char translation_disabled;
signed char phbid;
};
^ permalink raw reply [flat|nested] 199+ messages in thread* [PATCH for review] [81/145] i386: Remove lock section support in mutex.h, semaphore.h
[not found] <20060810 935.775038000@suse.de>
` (79 preceding siblings ...)
2006-08-10 19:36 ` [PATCH for review] [80/145] x86_64: Calgary IOMMU: save a bit of space in bus_info Andi Kleen
@ 2006-08-10 19:36 ` Andi Kleen
2006-08-10 19:36 ` [PATCH for review] [82/145] x86_64: Don't use lock section for mutexes and semaphores Andi Kleen
` (64 subsequent siblings)
145 siblings, 0 replies; 199+ messages in thread
From: Andi Kleen @ 2006-08-10 19:36 UTC (permalink / raw)
r
Lock sections don't work the new dwarf2 unwinder
This generates slightly smaller code. It adds one more taken
jump to the fast path.
Also move the trampolines into semaphore.S and add proper CFI
annotations.
Cc: jbeulich@novell.com
Signed-off-by: Andi Kleen <ak@suse.de>
---
arch/i386/lib/semaphore.S | 63 +++++++++++++++++++++++++++++++++++++++++++
include/asm-i386/mutex.h | 16 ++--------
include/asm-i386/rwsem.h | 62 ++++++++----------------------------------
include/asm-i386/semaphore.h | 49 +++++++++++----------------------
4 files changed, 96 insertions(+), 94 deletions(-)
Index: linux/include/asm-i386/mutex.h
===================================================================
--- linux.orig/include/asm-i386/mutex.h
+++ linux/include/asm-i386/mutex.h
@@ -30,14 +30,10 @@ do { \
\
__asm__ __volatile__( \
LOCK_PREFIX " decl (%%eax) \n" \
- " js 2f \n" \
+ " jns 1f \n" \
+ " call "#fail_fn" \n" \
"1: \n" \
\
- LOCK_SECTION_START("") \
- "2: call "#fail_fn" \n" \
- " jmp 1b \n" \
- LOCK_SECTION_END \
- \
:"=a" (dummy) \
: "a" (count) \
: "memory", "ecx", "edx"); \
@@ -86,14 +82,10 @@ do { \
\
__asm__ __volatile__( \
LOCK_PREFIX " incl (%%eax) \n" \
- " jle 2f \n" \
+ " jg 1f \n" \
+ " call "#fail_fn" \n" \
"1: \n" \
\
- LOCK_SECTION_START("") \
- "2: call "#fail_fn" \n" \
- " jmp 1b \n" \
- LOCK_SECTION_END \
- \
:"=a" (dummy) \
: "a" (count) \
: "memory", "ecx", "edx"); \
Index: linux/arch/i386/lib/semaphore.S
===================================================================
--- linux.orig/arch/i386/lib/semaphore.S
+++ linux/arch/i386/lib/semaphore.S
@@ -130,3 +130,66 @@ ENTRY(__read_lock_failed)
END(__read_lock_failed)
#endif
+
+/* Fix up special calling conventions */
+ENTRY(call_rwsem_down_read_failed)
+ CFI_STARTPROC
+ push %ecx
+ CFI_ADJUST_CFA_OFFSET 4
+ CFI_REL_OFFSET ecx,0
+ push %edx
+ CFI_ADJUST_CFA_OFFSET 4
+ CFI_REL_OFFSET edx,0
+ call rwsem_down_read_failed
+ pop %edx
+ CFI_ADJUST_CFA_OFFSET -4
+ pop %ecx
+ CFI_ADJUST_CFA_OFFSET -4
+ ret
+ CFI_ENDPROC
+ END(call_rwsem_down_read_failed)
+
+ENTRY(call_rwsem_down_write_failed)
+ CFI_STARTPROC
+ push %ecx
+ CFI_ADJUST_CFA_OFFSET 4
+ CFI_REL_OFFSET ecx,0
+ calll rwsem_down_write_failed
+ pop %ecx
+ CFI_ADJUST_CFA_OFFSET -4
+ ret
+ CFI_ENDPROC
+ END(call_rwsem_down_write_failed)
+
+ENTRY(call_rwsem_wake)
+ CFI_STARTPROC
+ decw %dx /* do nothing if still outstanding active readers */
+ jnz 1f
+ push %ecx
+ CFI_ADJUST_CFA_OFFSET 4
+ CFI_REL_OFFSET ecx,0
+ call rwsem_wake
+ pop %ecx
+ CFI_ADJUST_CFA_OFFSET -4
+1: ret
+ CFI_ENDPROC
+ END(call_rwsem_wake)
+
+/* Fix up special calling conventions */
+ENTRY(call_rwsem_downgrade_wake)
+ CFI_STARTPROC
+ push %ecx
+ CFI_ADJUST_CFA_OFFSET 4
+ CFI_REL_OFFSET ecx,0
+ push %edx
+ CFI_ADJUST_CFA_OFFSET 4
+ CFI_REL_OFFSET edx,0
+ call rwsem_downgrade_wake
+ pop %edx
+ CFI_ADJUST_CFA_OFFSET -4
+ pop %ecx
+ CFI_ADJUST_CFA_OFFSET -4
+ ret
+ CFI_ENDPROC
+ END(call_rwsemgrade_wake)
+
Index: linux/include/asm-i386/rwsem.h
===================================================================
--- linux.orig/include/asm-i386/rwsem.h
+++ linux/include/asm-i386/rwsem.h
@@ -99,17 +99,9 @@ static inline void __down_read(struct rw
__asm__ __volatile__(
"# beginning down_read\n\t"
LOCK_PREFIX " incl (%%eax)\n\t" /* adds 0x00000001, returns the old value */
- " js 2f\n\t" /* jump if we weren't granted the lock */
+ " jns 1f\n"
+ " call call_rwsem_down_read_failed\n"
"1:\n\t"
- LOCK_SECTION_START("")
- "2:\n\t"
- " pushl %%ecx\n\t"
- " pushl %%edx\n\t"
- " call rwsem_down_read_failed\n\t"
- " popl %%edx\n\t"
- " popl %%ecx\n\t"
- " jmp 1b\n"
- LOCK_SECTION_END
"# ending down_read\n\t"
: "+m" (sem->count)
: "a" (sem)
@@ -151,15 +143,9 @@ static inline void __down_write_nested(s
"# beginning down_write\n\t"
LOCK_PREFIX " xadd %%edx,(%%eax)\n\t" /* subtract 0x0000ffff, returns the old value */
" testl %%edx,%%edx\n\t" /* was the count 0 before? */
- " jnz 2f\n\t" /* jump if we weren't granted the lock */
- "1:\n\t"
- LOCK_SECTION_START("")
- "2:\n\t"
- " pushl %%ecx\n\t"
- " call rwsem_down_write_failed\n\t"
- " popl %%ecx\n\t"
- " jmp 1b\n"
- LOCK_SECTION_END
+ " jz 1f\n"
+ " call call_rwsem_down_write_failed\n"
+ "1:\n"
"# ending down_write"
: "+m" (sem->count), "=d" (tmp)
: "a" (sem), "1" (tmp)
@@ -193,17 +179,9 @@ static inline void __up_read(struct rw_s
__asm__ __volatile__(
"# beginning __up_read\n\t"
LOCK_PREFIX " xadd %%edx,(%%eax)\n\t" /* subtracts 1, returns the old value */
- " js 2f\n\t" /* jump if the lock is being waited upon */
- "1:\n\t"
- LOCK_SECTION_START("")
- "2:\n\t"
- " decw %%dx\n\t" /* do nothing if still outstanding active readers */
- " jnz 1b\n\t"
- " pushl %%ecx\n\t"
- " call rwsem_wake\n\t"
- " popl %%ecx\n\t"
- " jmp 1b\n"
- LOCK_SECTION_END
+ " jns 1f\n\t"
+ " call call_rwsem_wake\n"
+ "1:\n"
"# ending __up_read\n"
: "+m" (sem->count), "=d" (tmp)
: "a" (sem), "1" (tmp)
@@ -219,17 +197,9 @@ static inline void __up_write(struct rw_
"# beginning __up_write\n\t"
" movl %2,%%edx\n\t"
LOCK_PREFIX " xaddl %%edx,(%%eax)\n\t" /* tries to transition 0xffff0001 -> 0x00000000 */
- " jnz 2f\n\t" /* jump if the lock is being waited upon */
+ " jz 1f\n"
+ " call call_rwsem_wake\n"
"1:\n\t"
- LOCK_SECTION_START("")
- "2:\n\t"
- " decw %%dx\n\t" /* did the active count reduce to 0? */
- " jnz 1b\n\t" /* jump back if not */
- " pushl %%ecx\n\t"
- " call rwsem_wake\n\t"
- " popl %%ecx\n\t"
- " jmp 1b\n"
- LOCK_SECTION_END
"# ending __up_write\n"
: "+m" (sem->count)
: "a" (sem), "i" (-RWSEM_ACTIVE_WRITE_BIAS)
@@ -244,17 +214,9 @@ static inline void __downgrade_write(str
__asm__ __volatile__(
"# beginning __downgrade_write\n\t"
LOCK_PREFIX " addl %2,(%%eax)\n\t" /* transitions 0xZZZZ0001 -> 0xYYYY0001 */
- " js 2f\n\t" /* jump if the lock is being waited upon */
+ " jns 1f\n\t"
+ " call call_rwsem_downgrade_wake\n"
"1:\n\t"
- LOCK_SECTION_START("")
- "2:\n\t"
- " pushl %%ecx\n\t"
- " pushl %%edx\n\t"
- " call rwsem_downgrade_wake\n\t"
- " popl %%edx\n\t"
- " popl %%ecx\n\t"
- " jmp 1b\n"
- LOCK_SECTION_END
"# ending __downgrade_write\n"
: "+m" (sem->count)
: "a" (sem), "i" (-RWSEM_WAITING_BIAS)
Index: linux/include/asm-i386/semaphore.h
===================================================================
--- linux.orig/include/asm-i386/semaphore.h
+++ linux/include/asm-i386/semaphore.h
@@ -100,13 +100,10 @@ static inline void down(struct semaphore
__asm__ __volatile__(
"# atomic down operation\n\t"
LOCK_PREFIX "decl %0\n\t" /* --sem->count */
- "js 2f\n"
- "1:\n"
- LOCK_SECTION_START("")
- "2:\tlea %0,%%eax\n\t"
- "call __down_failed\n\t"
- "jmp 1b\n"
- LOCK_SECTION_END
+ "jns 2f\n"
+ "\tlea %0,%%eax\n\t"
+ "call __down_failed\n"
+ "2:"
:"+m" (sem->count)
:
:"memory","ax");
@@ -123,15 +120,12 @@ static inline int down_interruptible(str
might_sleep();
__asm__ __volatile__(
"# atomic interruptible down operation\n\t"
+ "xorl %0,%0\n\t"
LOCK_PREFIX "decl %1\n\t" /* --sem->count */
- "js 2f\n\t"
- "xorl %0,%0\n"
- "1:\n"
- LOCK_SECTION_START("")
- "2:\tlea %1,%%eax\n\t"
- "call __down_failed_interruptible\n\t"
- "jmp 1b\n"
- LOCK_SECTION_END
+ "jns 2f\n\t"
+ "lea %1,%%eax\n\t"
+ "call __down_failed_interruptible\n"
+ "2:"
:"=a" (result), "+m" (sem->count)
:
:"memory");
@@ -148,15 +142,12 @@ static inline int down_trylock(struct se
__asm__ __volatile__(
"# atomic interruptible down operation\n\t"
+ "xorl %0,%0\n\t"
LOCK_PREFIX "decl %1\n\t" /* --sem->count */
- "js 2f\n\t"
- "xorl %0,%0\n"
- "1:\n"
- LOCK_SECTION_START("")
- "2:\tlea %1,%%eax\n\t"
+ "jns 2f\n\t"
+ "lea %1,%%eax\n\t"
"call __down_failed_trylock\n\t"
- "jmp 1b\n"
- LOCK_SECTION_END
+ "2:\n"
:"=a" (result), "+m" (sem->count)
:
:"memory");
@@ -166,22 +157,16 @@ static inline int down_trylock(struct se
/*
* Note! This is subtle. We jump to wake people up only if
* the semaphore was negative (== somebody was waiting on it).
- * The default case (no contention) will result in NO
- * jumps for both down() and up().
*/
static inline void up(struct semaphore * sem)
{
__asm__ __volatile__(
"# atomic up operation\n\t"
LOCK_PREFIX "incl %0\n\t" /* ++sem->count */
- "jle 2f\n"
- "1:\n"
- LOCK_SECTION_START("")
- "2:\tlea %0,%%eax\n\t"
- "call __up_wakeup\n\t"
- "jmp 1b\n"
- LOCK_SECTION_END
- ".subsection 0\n"
+ "jg 1f\n\t"
+ "lea %0,%%eax\n\t"
+ "call __up_wakeup\n"
+ "1:"
:"+m" (sem->count)
:
:"memory","ax");
^ permalink raw reply [flat|nested] 199+ messages in thread* [PATCH for review] [82/145] x86_64: Don't use lock section for mutexes and semaphores
[not found] <20060810 935.775038000@suse.de>
` (80 preceding siblings ...)
2006-08-10 19:36 ` [PATCH for review] [81/145] i386: Remove lock section support in mutex.h, semaphore.h Andi Kleen
@ 2006-08-10 19:36 ` Andi Kleen
2006-08-10 19:36 ` [PATCH for review] [83/145] x86_64: fix is_at_popf() for compat tasks Andi Kleen
` (63 subsequent siblings)
145 siblings, 0 replies; 199+ messages in thread
From: Andi Kleen @ 2006-08-10 19:36 UTC (permalink / raw)
r
Lock sections cannot be handled by the dwarf2 unwinder.
Disadvantage is a taken branch in the hot path.
Signed-off-by: Andi Kleen <ak@suse.de>
---
include/asm-x86_64/mutex.h | 20 ++++++--------------
include/asm-x86_64/semaphore.h | 40 ++++++++++++++--------------------------
include/asm-x86_64/spinlock.h | 13 ++++++-------
3 files changed, 26 insertions(+), 47 deletions(-)
Index: linux/include/asm-x86_64/mutex.h
===================================================================
--- linux.orig/include/asm-x86_64/mutex.h
+++ linux/include/asm-x86_64/mutex.h
@@ -25,13 +25,9 @@ do { \
\
__asm__ __volatile__( \
LOCK_PREFIX " decl (%%rdi) \n" \
- " js 2f \n" \
- "1: \n" \
- \
- LOCK_SECTION_START("") \
- "2: call "#fail_fn" \n" \
- " jmp 1b \n" \
- LOCK_SECTION_END \
+ " jns 1f \n" \
+ " call "#fail_fn" \n" \
+ "1:" \
\
:"=D" (dummy) \
: "D" (v) \
@@ -75,13 +71,9 @@ do { \
\
__asm__ __volatile__( \
LOCK_PREFIX " incl (%%rdi) \n" \
- " jle 2f \n" \
- "1: \n" \
- \
- LOCK_SECTION_START("") \
- "2: call "#fail_fn" \n" \
- " jmp 1b \n" \
- LOCK_SECTION_END \
+ " jg 1f \n" \
+ " call "#fail_fn" \n" \
+ "1: " \
\
:"=D" (dummy) \
: "D" (v) \
Index: linux/include/asm-x86_64/spinlock.h
===================================================================
--- linux.orig/include/asm-x86_64/spinlock.h
+++ linux/include/asm-x86_64/spinlock.h
@@ -20,16 +20,15 @@
(*(volatile signed int *)(&(x)->slock) <= 0)
#define __raw_spin_lock_string \
- "\n1:\t" \
+ "\n0:\t" \
"lock ; decl %0\n\t" \
- "js 2f\n" \
- LOCK_SECTION_START("") \
- "2:\t" \
+ "jns 2f\n" \
+ "1:\n\t" \
"rep;nop\n\t" \
"cmpl $0,%0\n\t" \
- "jle 2b\n\t" \
- "jmp 1b\n" \
- LOCK_SECTION_END
+ "jle 1b\n\t" \
+ "jmp 0b\n" \
+ "2:\t"
#define __raw_spin_lock_string_up \
"\n\tdecl %0"
Index: linux/include/asm-x86_64/semaphore.h
===================================================================
--- linux.orig/include/asm-x86_64/semaphore.h
+++ linux/include/asm-x86_64/semaphore.h
@@ -107,12 +107,9 @@ static inline void down(struct semaphore
__asm__ __volatile__(
"# atomic down operation\n\t"
LOCK_PREFIX "decl %0\n\t" /* --sem->count */
- "js 2f\n"
- "1:\n"
- LOCK_SECTION_START("")
- "2:\tcall __down_failed\n\t"
- "jmp 1b\n"
- LOCK_SECTION_END
+ "jns 1f\n\t"
+ "call __down_failed\n"
+ "1:"
:"=m" (sem->count)
:"D" (sem)
:"memory");
@@ -130,14 +127,11 @@ static inline int down_interruptible(str
__asm__ __volatile__(
"# atomic interruptible down operation\n\t"
+ "xorl %0,%0\n\t"
LOCK_PREFIX "decl %1\n\t" /* --sem->count */
- "js 2f\n\t"
- "xorl %0,%0\n"
- "1:\n"
- LOCK_SECTION_START("")
- "2:\tcall __down_failed_interruptible\n\t"
- "jmp 1b\n"
- LOCK_SECTION_END
+ "jns 2f\n\t"
+ "call __down_failed_interruptible\n"
+ "2:\n"
:"=a" (result), "=m" (sem->count)
:"D" (sem)
:"memory");
@@ -154,14 +148,11 @@ static inline int down_trylock(struct se
__asm__ __volatile__(
"# atomic interruptible down operation\n\t"
+ "xorl %0,%0\n\t"
LOCK_PREFIX "decl %1\n\t" /* --sem->count */
- "js 2f\n\t"
- "xorl %0,%0\n"
- "1:\n"
- LOCK_SECTION_START("")
- "2:\tcall __down_failed_trylock\n\t"
- "jmp 1b\n"
- LOCK_SECTION_END
+ "jns 2f\n\t"
+ "call __down_failed_trylock\n\t"
+ "2:\n"
:"=a" (result), "=m" (sem->count)
:"D" (sem)
:"memory","cc");
@@ -179,12 +170,9 @@ static inline void up(struct semaphore *
__asm__ __volatile__(
"# atomic up operation\n\t"
LOCK_PREFIX "incl %0\n\t" /* ++sem->count */
- "jle 2f\n"
- "1:\n"
- LOCK_SECTION_START("")
- "2:\tcall __up_wakeup\n\t"
- "jmp 1b\n"
- LOCK_SECTION_END
+ "jg 1f\n\t"
+ "call __up_wakeup\n"
+ "1:"
:"=m" (sem->count)
:"D" (sem)
:"memory");
^ permalink raw reply [flat|nested] 199+ messages in thread* [PATCH for review] [83/145] x86_64: fix is_at_popf() for compat tasks
[not found] <20060810 935.775038000@suse.de>
` (81 preceding siblings ...)
2006-08-10 19:36 ` [PATCH for review] [82/145] x86_64: Don't use lock section for mutexes and semaphores Andi Kleen
@ 2006-08-10 19:36 ` Andi Kleen
2006-08-10 19:36 ` [PATCH for review] [84/145] x86_64: annotate arch/x86_64/lib/*.S Andi Kleen
` (62 subsequent siblings)
145 siblings, 0 replies; 199+ messages in thread
From: Andi Kleen @ 2006-08-10 19:36 UTC (permalink / raw)
r
From: Chuck Ebbert <76306.1226@compuserve.com>
When testing for the REX instruction prefix, first check
for 32-bit mode because in compat mode the REX prefix is an
increment instruction.
Signed-off-by: Chuck Ebbert <76306.1226@compuserve.com>
Signed-off-by: Andi Kleen <ak@suse.de>
---
arch/x86_64/kernel/ptrace.c | 5 ++++-
1 files changed, 4 insertions(+), 1 deletion(-)
Index: linux/arch/x86_64/kernel/ptrace.c
===================================================================
--- linux.orig/arch/x86_64/kernel/ptrace.c
+++ linux/arch/x86_64/kernel/ptrace.c
@@ -141,8 +141,11 @@ static int is_at_popf(struct task_struct
case 0xf0: case 0xf2: case 0xf3:
continue;
- /* REX prefixes */
case 0x40 ... 0x4f:
+ if (regs->cs != __USER_CS)
+ /* 32-bit mode: register increment */
+ return 0;
+ /* 64-bit mode: REX prefix */
continue;
/* CHECKME: f0, f2, f3 */
^ permalink raw reply [flat|nested] 199+ messages in thread* [PATCH for review] [84/145] x86_64: annotate arch/x86_64/lib/*.S
[not found] <20060810 935.775038000@suse.de>
` (82 preceding siblings ...)
2006-08-10 19:36 ` [PATCH for review] [83/145] x86_64: fix is_at_popf() for compat tasks Andi Kleen
@ 2006-08-10 19:36 ` Andi Kleen
2006-08-10 19:36 ` [PATCH for review] [85/145] x86_64: Fix gdt table size in trampoline.S Andi Kleen
` (61 subsequent siblings)
145 siblings, 0 replies; 199+ messages in thread
From: Andi Kleen @ 2006-08-10 19:36 UTC (permalink / raw)
r
From: "Jan Beulich" <jbeulich@novell.com>
Add unwind annotations to arch/x86_64/lib/*.S, and also use the macros
provided by linux/linkage.h where-ever possible.
Some of the alternative instructions handling needed to be adjusted so
that the replacement code would also have valid unwind information.
Signed-off-by: Jan Beulich <jbeulich@novell.com>
Signed-off-by: Andi Kleen <ak@suse.de>
---
arch/x86_64/lib/clear_page.S | 47 +++++++++++++++----------
arch/x86_64/lib/copy_page.S | 53 +++++++++++++++++++---------
arch/x86_64/lib/copy_user.S | 39 +++++++++++++++------
arch/x86_64/lib/csum-copy.S | 26 +++++++++++---
arch/x86_64/lib/getuser.S | 32 ++++++++++-------
arch/x86_64/lib/iomap_copy.S | 10 +++--
arch/x86_64/lib/memcpy.S | 69 +++++++++++++++++++++----------------
arch/x86_64/lib/memset.S | 79 +++++++++++++++++++++++--------------------
arch/x86_64/lib/putuser.S | 32 ++++++++++-------
9 files changed, 244 insertions(+), 143 deletions(-)
Index: linux/arch/x86_64/lib/clear_page.S
===================================================================
--- linux.orig/arch/x86_64/lib/clear_page.S
+++ linux/arch/x86_64/lib/clear_page.S
@@ -1,10 +1,22 @@
+#include <linux/linkage.h>
+#include <asm/dwarf2.h>
+
/*
* Zero a page.
* rdi page
*/
- .globl clear_page
- .p2align 4
-clear_page:
+ ALIGN
+clear_page_c:
+ CFI_STARTPROC
+ movl $4096/8,%ecx
+ xorl %eax,%eax
+ rep stosq
+ ret
+ CFI_ENDPROC
+ENDPROC(clear_page)
+
+ENTRY(clear_page)
+ CFI_STARTPROC
xorl %eax,%eax
movl $4096/64,%ecx
.p2align 4
@@ -23,28 +35,25 @@ clear_page:
jnz .Lloop
nop
ret
-clear_page_end:
+ CFI_ENDPROC
+.Lclear_page_end:
+ENDPROC(clear_page)
/* Some CPUs run faster using the string instructions.
It is also a lot simpler. Use this when possible */
#include <asm/cpufeature.h>
+ .section .altinstr_replacement,"ax"
+1: .byte 0xeb /* jmp <disp8> */
+ .byte (clear_page_c - clear_page) - (2f - 1b) /* offset */
+2:
+ .previous
.section .altinstructions,"a"
.align 8
- .quad clear_page
- .quad clear_page_c
- .byte X86_FEATURE_REP_GOOD
- .byte clear_page_end-clear_page
- .byte clear_page_c_end-clear_page_c
- .previous
-
- .section .altinstr_replacement,"ax"
-clear_page_c:
- movl $4096/8,%ecx
- xorl %eax,%eax
- rep
- stosq
- ret
-clear_page_c_end:
+ .quad clear_page
+ .quad 1b
+ .byte X86_FEATURE_REP_GOOD
+ .byte .Lclear_page_end - clear_page
+ .byte 2b - 1b
.previous
Index: linux/arch/x86_64/lib/copy_page.S
===================================================================
--- linux.orig/arch/x86_64/lib/copy_page.S
+++ linux/arch/x86_64/lib/copy_page.S
@@ -1,17 +1,33 @@
/* Written 2003 by Andi Kleen, based on a kernel by Evandro Menezes */
+#include <linux/config.h>
+#include <linux/linkage.h>
+#include <asm/dwarf2.h>
+
+ ALIGN
+copy_page_c:
+ CFI_STARTPROC
+ movl $4096/8,%ecx
+ rep movsq
+ ret
+ CFI_ENDPROC
+ENDPROC(copy_page_c)
+
/* Don't use streaming store because it's better when the target
ends up in cache. */
/* Could vary the prefetch distance based on SMP/UP */
- .globl copy_page
- .p2align 4
-copy_page:
+ENTRY(copy_page)
+ CFI_STARTPROC
subq $3*8,%rsp
+ CFI_ADJUST_CFA_OFFSET 3*8
movq %rbx,(%rsp)
+ CFI_REL_OFFSET rbx, 0
movq %r12,1*8(%rsp)
+ CFI_REL_OFFSET r12, 1*8
movq %r13,2*8(%rsp)
+ CFI_REL_OFFSET r13, 2*8
movl $(4096/64)-5,%ecx
.p2align 4
@@ -72,30 +88,33 @@ copy_page:
jnz .Loop2
movq (%rsp),%rbx
+ CFI_RESTORE rbx
movq 1*8(%rsp),%r12
+ CFI_RESTORE r12
movq 2*8(%rsp),%r13
+ CFI_RESTORE r13
addq $3*8,%rsp
+ CFI_ADJUST_CFA_OFFSET -3*8
ret
+.Lcopy_page_end:
+ CFI_ENDPROC
+ENDPROC(copy_page)
/* Some CPUs run faster using the string copy instructions.
It is also a lot simpler. Use this when possible */
#include <asm/cpufeature.h>
+ .section .altinstr_replacement,"ax"
+1: .byte 0xeb /* jmp <disp8> */
+ .byte (copy_page_c - copy_page) - (2f - 1b) /* offset */
+2:
+ .previous
.section .altinstructions,"a"
.align 8
- .quad copy_page
- .quad copy_page_c
- .byte X86_FEATURE_REP_GOOD
- .byte copy_page_c_end-copy_page_c
- .byte copy_page_c_end-copy_page_c
- .previous
-
- .section .altinstr_replacement,"ax"
-copy_page_c:
- movl $4096/8,%ecx
- rep
- movsq
- ret
-copy_page_c_end:
+ .quad copy_page
+ .quad 1b
+ .byte X86_FEATURE_REP_GOOD
+ .byte .Lcopy_page_end - copy_page
+ .byte 2b - 1b
.previous
Index: linux/arch/x86_64/lib/copy_user.S
===================================================================
--- linux.orig/arch/x86_64/lib/copy_user.S
+++ linux/arch/x86_64/lib/copy_user.S
@@ -4,6 +4,9 @@
* Functions to copy from and to user space.
*/
+#include <linux/linkage.h>
+#include <asm/dwarf2.h>
+
#define FIX_ALIGNMENT 1
#include <asm/current.h>
@@ -12,9 +15,8 @@
#include <asm/cpufeature.h>
/* Standard copy_to_user with segment limit checking */
- .globl copy_to_user
- .p2align 4
-copy_to_user:
+ENTRY(copy_to_user)
+ CFI_STARTPROC
GET_THREAD_INFO(%rax)
movq %rdi,%rcx
addq %rdx,%rcx
@@ -25,9 +27,11 @@ copy_to_user:
.byte 0xe9 /* 32bit jump */
.long .Lcug-1f
1:
+ CFI_ENDPROC
+ENDPROC(copy_to_user)
.section .altinstr_replacement,"ax"
-3: .byte 0xe9 /* replacement jmp with 8 bit immediate */
+3: .byte 0xe9 /* replacement jmp with 32 bit immediate */
.long copy_user_generic_c-1b /* offset */
.previous
.section .altinstructions,"a"
@@ -40,9 +44,8 @@ copy_to_user:
.previous
/* Standard copy_from_user with segment limit checking */
- .globl copy_from_user
- .p2align 4
-copy_from_user:
+ENTRY(copy_from_user)
+ CFI_STARTPROC
GET_THREAD_INFO(%rax)
movq %rsi,%rcx
addq %rdx,%rcx
@@ -50,10 +53,13 @@ copy_from_user:
cmpq threadinfo_addr_limit(%rax),%rcx
jae bad_from_user
/* FALL THROUGH to copy_user_generic */
+ CFI_ENDPROC
+ENDPROC(copy_from_user)
.section .fixup,"ax"
/* must zero dest */
bad_from_user:
+ CFI_STARTPROC
movl %edx,%ecx
xorl %eax,%eax
rep
@@ -61,6 +67,8 @@ bad_from_user:
bad_to_user:
movl %edx,%eax
ret
+ CFI_ENDPROC
+END(bad_from_user)
.previous
@@ -75,9 +83,8 @@ bad_to_user:
* Output:
* eax uncopied bytes or 0 if successful.
*/
- .globl copy_user_generic
- .p2align 4
-copy_user_generic:
+ENTRY(copy_user_generic)
+ CFI_STARTPROC
.byte 0x66,0x66,0x90 /* 5 byte nop for replacement jump */
.byte 0x66,0x90
1:
@@ -95,6 +102,8 @@ copy_user_generic:
.previous
.Lcug:
pushq %rbx
+ CFI_ADJUST_CFA_OFFSET 8
+ CFI_REL_OFFSET rbx, 0
xorl %eax,%eax /*zero for the exception handler */
#ifdef FIX_ALIGNMENT
@@ -168,9 +177,13 @@ copy_user_generic:
decl %ecx
jnz .Lloop_1
+ CFI_REMEMBER_STATE
.Lende:
popq %rbx
+ CFI_ADJUST_CFA_OFFSET -8
+ CFI_RESTORE rbx
ret
+ CFI_RESTORE_STATE
#ifdef FIX_ALIGNMENT
/* align destination */
@@ -261,6 +274,9 @@ copy_user_generic:
.Le_zero:
movq %rdx,%rax
jmp .Lende
+ CFI_ENDPROC
+ENDPROC(copy_user_generic)
+
/* Some CPUs run faster using the string copy instructions.
This is also a lot simpler. Use them when possible.
@@ -282,6 +298,7 @@ copy_user_generic:
* this please consider this.
*/
copy_user_generic_c:
+ CFI_STARTPROC
movl %edx,%ecx
shrl $3,%ecx
andl $7,%edx
@@ -294,6 +311,8 @@ copy_user_generic_c:
ret
3: lea (%rdx,%rcx,8),%rax
ret
+ CFI_ENDPROC
+END(copy_user_generic_c)
.section __ex_table,"a"
.quad 1b,3b
Index: linux/arch/x86_64/lib/csum-copy.S
===================================================================
--- linux.orig/arch/x86_64/lib/csum-copy.S
+++ linux/arch/x86_64/lib/csum-copy.S
@@ -5,8 +5,9 @@
* License. See the file COPYING in the main directory of this archive
* for more details. No warranty for anything given at all.
*/
- #include <linux/linkage.h>
- #include <asm/errno.h>
+#include <linux/linkage.h>
+#include <asm/dwarf2.h>
+#include <asm/errno.h>
/*
* Checksum copy with exception handling.
@@ -53,19 +54,24 @@
.endm
- .globl csum_partial_copy_generic
- .p2align 4
-csum_partial_copy_generic:
+ENTRY(csum_partial_copy_generic)
+ CFI_STARTPROC
cmpl $3*64,%edx
jle .Lignore
.Lignore:
subq $7*8,%rsp
+ CFI_ADJUST_CFA_OFFSET 7*8
movq %rbx,2*8(%rsp)
+ CFI_REL_OFFSET rbx, 2*8
movq %r12,3*8(%rsp)
+ CFI_REL_OFFSET r12, 3*8
movq %r14,4*8(%rsp)
+ CFI_REL_OFFSET r14, 4*8
movq %r13,5*8(%rsp)
+ CFI_REL_OFFSET r13, 5*8
movq %rbp,6*8(%rsp)
+ CFI_REL_OFFSET rbp, 6*8
movq %r8,(%rsp)
movq %r9,1*8(%rsp)
@@ -208,14 +214,22 @@ csum_partial_copy_generic:
addl %ebx,%eax
adcl %r9d,%eax /* carry */
+ CFI_REMEMBER_STATE
.Lende:
movq 2*8(%rsp),%rbx
+ CFI_RESTORE rbx
movq 3*8(%rsp),%r12
+ CFI_RESTORE r12
movq 4*8(%rsp),%r14
+ CFI_RESTORE r14
movq 5*8(%rsp),%r13
+ CFI_RESTORE r13
movq 6*8(%rsp),%rbp
+ CFI_RESTORE rbp
addq $7*8,%rsp
+ CFI_ADJUST_CFA_OFFSET -7*8
ret
+ CFI_RESTORE_STATE
/* Exception handlers. Very simple, zeroing is done in the wrappers */
.Lbad_source:
@@ -231,3 +245,5 @@ csum_partial_copy_generic:
jz .Lende
movl $-EFAULT,(%rax)
jmp .Lende
+ CFI_ENDPROC
+ENDPROC(csum_partial_copy_generic)
Index: linux/arch/x86_64/lib/getuser.S
===================================================================
--- linux.orig/arch/x86_64/lib/getuser.S
+++ linux/arch/x86_64/lib/getuser.S
@@ -27,25 +27,26 @@
*/
#include <linux/linkage.h>
+#include <asm/dwarf2.h>
#include <asm/page.h>
#include <asm/errno.h>
#include <asm/asm-offsets.h>
#include <asm/thread_info.h>
.text
- .p2align 4
-.globl __get_user_1
-__get_user_1:
+ENTRY(__get_user_1)
+ CFI_STARTPROC
GET_THREAD_INFO(%r8)
cmpq threadinfo_addr_limit(%r8),%rcx
jae bad_get_user
1: movzb (%rcx),%edx
xorl %eax,%eax
ret
+ CFI_ENDPROC
+ENDPROC(__get_user_1)
- .p2align 4
-.globl __get_user_2
-__get_user_2:
+ENTRY(__get_user_2)
+ CFI_STARTPROC
GET_THREAD_INFO(%r8)
addq $1,%rcx
jc 20f
@@ -57,10 +58,11 @@ __get_user_2:
ret
20: decq %rcx
jmp bad_get_user
+ CFI_ENDPROC
+ENDPROC(__get_user_2)
- .p2align 4
-.globl __get_user_4
-__get_user_4:
+ENTRY(__get_user_4)
+ CFI_STARTPROC
GET_THREAD_INFO(%r8)
addq $3,%rcx
jc 30f
@@ -72,10 +74,11 @@ __get_user_4:
ret
30: subq $3,%rcx
jmp bad_get_user
+ CFI_ENDPROC
+ENDPROC(__get_user_4)
- .p2align 4
-.globl __get_user_8
-__get_user_8:
+ENTRY(__get_user_8)
+ CFI_STARTPROC
GET_THREAD_INFO(%r8)
addq $7,%rcx
jc 40f
@@ -87,11 +90,16 @@ __get_user_8:
ret
40: subq $7,%rcx
jmp bad_get_user
+ CFI_ENDPROC
+ENDPROC(__get_user_8)
bad_get_user:
+ CFI_STARTPROC
xorl %edx,%edx
movq $(-EFAULT),%rax
ret
+ CFI_ENDPROC
+END(bad_get_user)
.section __ex_table,"a"
.quad 1b,bad_get_user
Index: linux/arch/x86_64/lib/iomap_copy.S
===================================================================
--- linux.orig/arch/x86_64/lib/iomap_copy.S
+++ linux/arch/x86_64/lib/iomap_copy.S
@@ -15,12 +15,16 @@
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA.
*/
+#include <linux/linkage.h>
+#include <asm/dwarf2.h>
+
/*
* override generic version in lib/iomap_copy.c
*/
- .globl __iowrite32_copy
- .p2align 4
-__iowrite32_copy:
+ENTRY(__iowrite32_copy)
+ CFI_STARTPROC
movl %edx,%ecx
rep movsd
ret
+ CFI_ENDPROC
+ENDPROC(__iowrite32_copy)
Index: linux/arch/x86_64/lib/memcpy.S
===================================================================
--- linux.orig/arch/x86_64/lib/memcpy.S
+++ linux/arch/x86_64/lib/memcpy.S
@@ -1,6 +1,10 @@
/* Copyright 2002 Andi Kleen */
- #include <asm/cpufeature.h>
+#include <linux/config.h>
+#include <linux/linkage.h>
+#include <asm/dwarf2.h>
+#include <asm/cpufeature.h>
+
/*
* memcpy - Copy a memory block.
*
@@ -13,12 +17,26 @@
* rax original destination
*/
- .globl __memcpy
- .globl memcpy
- .p2align 4
-__memcpy:
-memcpy:
+ ALIGN
+memcpy_c:
+ CFI_STARTPROC
+ movq %rdi,%rax
+ movl %edx,%ecx
+ shrl $3,%ecx
+ andl $7,%edx
+ rep movsq
+ movl %edx,%ecx
+ rep movsb
+ ret
+ CFI_ENDPROC
+ENDPROC(memcpy_c)
+
+ENTRY(__memcpy)
+ENTRY(memcpy)
+ CFI_STARTPROC
pushq %rbx
+ CFI_ADJUST_CFA_OFFSET 8
+ CFI_REL_OFFSET rbx, 0
movq %rdi,%rax
movl %edx,%ecx
@@ -86,36 +104,27 @@ memcpy:
.Lende:
popq %rbx
+ CFI_ADJUST_CFA_OFFSET -8
+ CFI_RESTORE rbx
ret
.Lfinal:
+ CFI_ENDPROC
+ENDPROC(memcpy)
+ENDPROC(__memcpy)
/* Some CPUs run faster using the string copy instructions.
It is also a lot simpler. Use this when possible */
+ .section .altinstr_replacement,"ax"
+1: .byte 0xeb /* jmp <disp8> */
+ .byte (memcpy_c - memcpy) - (2f - 1b) /* offset */
+2:
+ .previous
.section .altinstructions,"a"
.align 8
- .quad memcpy
- .quad memcpy_c
- .byte X86_FEATURE_REP_GOOD
- .byte .Lfinal-memcpy
- .byte memcpy_c_end-memcpy_c
- .previous
-
- .section .altinstr_replacement,"ax"
- /* rdi destination
- * rsi source
- * rdx count
- */
-memcpy_c:
- movq %rdi,%rax
- movl %edx,%ecx
- shrl $3,%ecx
- andl $7,%edx
- rep
- movsq
- movl %edx,%ecx
- rep
- movsb
- ret
-memcpy_c_end:
+ .quad memcpy
+ .quad 1b
+ .byte X86_FEATURE_REP_GOOD
+ .byte .Lfinal - memcpy
+ .byte 2b - 1b
.previous
Index: linux/arch/x86_64/lib/memset.S
===================================================================
--- linux.orig/arch/x86_64/lib/memset.S
+++ linux/arch/x86_64/lib/memset.S
@@ -1,4 +1,9 @@
/* Copyright 2002 Andi Kleen, SuSE Labs */
+
+#include <linux/config.h>
+#include <linux/linkage.h>
+#include <asm/dwarf2.h>
+
/*
* ISO C memset - set a memory block to a byte value.
*
@@ -8,11 +13,29 @@
*
* rax original destination
*/
- .globl __memset
- .globl memset
- .p2align 4
-memset:
-__memset:
+ ALIGN
+memset_c:
+ CFI_STARTPROC
+ movq %rdi,%r9
+ movl %edx,%r8d
+ andl $7,%r8d
+ movl %edx,%ecx
+ shrl $3,%ecx
+ /* expand byte value */
+ movzbl %sil,%esi
+ movabs $0x0101010101010101,%rax
+ mulq %rsi /* with rax, clobbers rdx */
+ rep stosq
+ movl %r8d,%ecx
+ rep stosb
+ movq %r9,%rax
+ ret
+ CFI_ENDPROC
+ENDPROC(memset_c)
+
+ENTRY(memset)
+ENTRY(__memset)
+ CFI_STARTPROC
movq %rdi,%r10
movq %rdx,%r11
@@ -25,6 +48,7 @@ __memset:
movl %edi,%r9d
andl $7,%r9d
jnz .Lbad_alignment
+ CFI_REMEMBER_STATE
.Lafter_bad_alignment:
movl %r11d,%ecx
@@ -75,6 +99,7 @@ __memset:
movq %r10,%rax
ret
+ CFI_RESTORE_STATE
.Lbad_alignment:
cmpq $7,%r11
jbe .Lhandle_7
@@ -84,42 +109,26 @@ __memset:
addq %r8,%rdi
subq %r8,%r11
jmp .Lafter_bad_alignment
+.Lfinal:
+ CFI_ENDPROC
+ENDPROC(memset)
+ENDPROC(__memset)
/* Some CPUs run faster using the string instructions.
It is also a lot simpler. Use this when possible */
#include <asm/cpufeature.h>
+ .section .altinstr_replacement,"ax"
+1: .byte 0xeb /* jmp <disp8> */
+ .byte (memset_c - memset) - (2f - 1b) /* offset */
+2:
+ .previous
.section .altinstructions,"a"
.align 8
- .quad memset
- .quad memset_c
- .byte X86_FEATURE_REP_GOOD
- .byte memset_c_end-memset_c
- .byte memset_c_end-memset_c
- .previous
-
- .section .altinstr_replacement,"ax"
- /* rdi destination
- * rsi value
- * rdx count
- */
-memset_c:
- movq %rdi,%r9
- movl %edx,%r8d
- andl $7,%r8d
- movl %edx,%ecx
- shrl $3,%ecx
- /* expand byte value */
- movzbl %sil,%esi
- movabs $0x0101010101010101,%rax
- mulq %rsi /* with rax, clobbers rdx */
- rep
- stosq
- movl %r8d,%ecx
- rep
- stosb
- movq %r9,%rax
- ret
-memset_c_end:
+ .quad memset
+ .quad 1b
+ .byte X86_FEATURE_REP_GOOD
+ .byte .Lfinal - memset
+ .byte 2b - 1b
.previous
Index: linux/arch/x86_64/lib/putuser.S
===================================================================
--- linux.orig/arch/x86_64/lib/putuser.S
+++ linux/arch/x86_64/lib/putuser.S
@@ -25,25 +25,26 @@
*/
#include <linux/linkage.h>
+#include <asm/dwarf2.h>
#include <asm/page.h>
#include <asm/errno.h>
#include <asm/asm-offsets.h>
#include <asm/thread_info.h>
.text
- .p2align 4
-.globl __put_user_1
-__put_user_1:
+ENTRY(__put_user_1)
+ CFI_STARTPROC
GET_THREAD_INFO(%r8)
cmpq threadinfo_addr_limit(%r8),%rcx
jae bad_put_user
1: movb %dl,(%rcx)
xorl %eax,%eax
ret
+ CFI_ENDPROC
+ENDPROC(__put_user_1)
- .p2align 4
-.globl __put_user_2
-__put_user_2:
+ENTRY(__put_user_2)
+ CFI_STARTPROC
GET_THREAD_INFO(%r8)
addq $1,%rcx
jc 20f
@@ -55,10 +56,11 @@ __put_user_2:
ret
20: decq %rcx
jmp bad_put_user
+ CFI_ENDPROC
+ENDPROC(__put_user_2)
- .p2align 4
-.globl __put_user_4
-__put_user_4:
+ENTRY(__put_user_4)
+ CFI_STARTPROC
GET_THREAD_INFO(%r8)
addq $3,%rcx
jc 30f
@@ -70,10 +72,11 @@ __put_user_4:
ret
30: subq $3,%rcx
jmp bad_put_user
+ CFI_ENDPROC
+ENDPROC(__put_user_4)
- .p2align 4
-.globl __put_user_8
-__put_user_8:
+ENTRY(__put_user_8)
+ CFI_STARTPROC
GET_THREAD_INFO(%r8)
addq $7,%rcx
jc 40f
@@ -85,10 +88,15 @@ __put_user_8:
ret
40: subq $7,%rcx
jmp bad_put_user
+ CFI_ENDPROC
+ENDPROC(__put_user_8)
bad_put_user:
+ CFI_STARTPROC
movq $(-EFAULT),%rax
ret
+ CFI_ENDPROC
+END(bad_put_user)
.section __ex_table,"a"
.quad 1b,bad_put_user
^ permalink raw reply [flat|nested] 199+ messages in thread* [PATCH for review] [85/145] x86_64: Fix gdt table size in trampoline.S
[not found] <20060810 935.775038000@suse.de>
` (83 preceding siblings ...)
2006-08-10 19:36 ` [PATCH for review] [84/145] x86_64: annotate arch/x86_64/lib/*.S Andi Kleen
@ 2006-08-10 19:36 ` Andi Kleen
2006-08-10 19:36 ` [PATCH for review] [86/145] x86_64: remove superflous BUG_ON's in nommu and gart Andi Kleen
` (60 subsequent siblings)
145 siblings, 0 replies; 199+ messages in thread
From: Andi Kleen @ 2006-08-10 19:36 UTC (permalink / raw)
r
From: "Eric W. Biederman" <ebiederm@xmission.com>
Allows easier extension of the GDT by using the proper C symbol
for the size in the descriptor.
Signed-off-by: Eric W. Biederman <ebiederm@xmission.com>
Signed-off-by: Andi Kleen <ak@suse.de>
---
arch/x86_64/kernel/trampoline.S | 2 +-
1 files changed, 1 insertion(+), 1 deletion(-)
Index: linux/arch/x86_64/kernel/trampoline.S
===================================================================
--- linux.orig/arch/x86_64/kernel/trampoline.S
+++ linux/arch/x86_64/kernel/trampoline.S
@@ -64,7 +64,7 @@ idt_48:
.word 0, 0 # idt base = 0L
gdt_48:
- .short __KERNEL32_CS + 7 # gdt limit
+ .short GDT_ENTRIES*8 - 1 # gdt limit
.long cpu_gdt_table-__START_KERNEL_map
.globl trampoline_end
^ permalink raw reply [flat|nested] 199+ messages in thread* [PATCH for review] [86/145] x86_64: remove superflous BUG_ON's in nommu and gart
[not found] <20060810 935.775038000@suse.de>
` (84 preceding siblings ...)
2006-08-10 19:36 ` [PATCH for review] [85/145] x86_64: Fix gdt table size in trampoline.S Andi Kleen
@ 2006-08-10 19:36 ` Andi Kleen
2006-08-10 19:36 ` [PATCH for review] [87/145] x86_64: remove lock prefix from is_at_popf() tests Andi Kleen
` (59 subsequent siblings)
145 siblings, 0 replies; 199+ messages in thread
From: Andi Kleen @ 2006-08-10 19:36 UTC (permalink / raw)
r
From: Muli Ben-Yehuda <muli@il.ibm.com>
There's no need to check for invalid DMA data direction in nommu and
gart since we do it in dma-mapping.h anyway before calling the
individual dma-ops.
Signed-off-by: Muli Ben-Yehuda <muli@il.ibm.com>
Signed-off-by: Andi Kleen <ak@suse.de>
---
arch/x86_64/kernel/pci-gart.c | 3 ---
arch/x86_64/kernel/pci-nommu.c | 1 -
2 files changed, 4 deletions(-)
Index: linux/arch/x86_64/kernel/pci-gart.c
===================================================================
--- linux.orig/arch/x86_64/kernel/pci-gart.c
+++ linux/arch/x86_64/kernel/pci-gart.c
@@ -239,8 +239,6 @@ dma_addr_t gart_map_single(struct device
{
unsigned long phys_mem, bus;
- BUG_ON(dir == DMA_NONE);
-
if (!dev)
dev = &fallback_dev;
@@ -383,7 +381,6 @@ int gart_map_sg(struct device *dev, stru
unsigned long pages = 0;
int need = 0, nextneed;
- BUG_ON(dir == DMA_NONE);
if (nents == 0)
return 0;
Index: linux/arch/x86_64/kernel/pci-nommu.c
===================================================================
--- linux.orig/arch/x86_64/kernel/pci-nommu.c
+++ linux/arch/x86_64/kernel/pci-nommu.c
@@ -59,7 +59,6 @@ int nommu_map_sg(struct device *hwdev, s
{
int i;
- BUG_ON(direction == DMA_NONE);
for (i = 0; i < nents; i++ ) {
struct scatterlist *s = &sg[i];
BUG_ON(!s->page);
^ permalink raw reply [flat|nested] 199+ messages in thread* [PATCH for review] [87/145] x86_64: remove lock prefix from is_at_popf() tests
[not found] <20060810 935.775038000@suse.de>
` (85 preceding siblings ...)
2006-08-10 19:36 ` [PATCH for review] [86/145] x86_64: remove superflous BUG_ON's in nommu and gart Andi Kleen
@ 2006-08-10 19:36 ` Andi Kleen
2006-08-10 19:36 ` [PATCH for review] [88/145] x86_64: Use early CPU identify before early command line parsing Andi Kleen
` (58 subsequent siblings)
145 siblings, 0 replies; 199+ messages in thread
From: Andi Kleen @ 2006-08-10 19:36 UTC (permalink / raw)
r
From: Chuck Ebbert <76306.1226@compuserve.com>
The lock prefix will cause an exception when used with the
popf instruction, so no need to continue searching after it's
found.
Signed-off-by: Chuck Ebbert <76306.1226@compuserve.com>
Signed-off-by: Andi Kleen <ak@suse.de>
---
arch/x86_64/kernel/ptrace.c | 4 ++--
1 files changed, 2 insertions(+), 2 deletions(-)
Index: linux/arch/x86_64/kernel/ptrace.c
===================================================================
--- linux.orig/arch/x86_64/kernel/ptrace.c
+++ linux/arch/x86_64/kernel/ptrace.c
@@ -138,7 +138,7 @@ static int is_at_popf(struct task_struct
case 0x26: case 0x2e:
case 0x36: case 0x3e:
case 0x64: case 0x65:
- case 0xf0: case 0xf2: case 0xf3:
+ case 0xf2: case 0xf3:
continue;
case 0x40 ... 0x4f:
@@ -148,7 +148,7 @@ static int is_at_popf(struct task_struct
/* 64-bit mode: REX prefix */
continue;
- /* CHECKME: f0, f2, f3 */
+ /* CHECKME: f2, f3 */
/*
* pushf: NOTE! We should probably not let
^ permalink raw reply [flat|nested] 199+ messages in thread* [PATCH for review] [88/145] x86_64: Use early CPU identify before early command line parsing
[not found] <20060810 935.775038000@suse.de>
` (86 preceding siblings ...)
2006-08-10 19:36 ` [PATCH for review] [87/145] x86_64: remove lock prefix from is_at_popf() tests Andi Kleen
@ 2006-08-10 19:36 ` Andi Kleen
2006-08-10 19:36 ` [PATCH for review] [89/145] x86_64: Allow early_param and identical __setup to exist Andi Kleen
` (57 subsequent siblings)
145 siblings, 0 replies; 199+ messages in thread
From: Andi Kleen @ 2006-08-10 19:36 UTC (permalink / raw)
r
This makes it possible to modify CPU flags in command line
options without hacks.
And remove another copy in head64.c
Signed-off-by: Andi Kleen <ak@suse.de>
---
arch/x86_64/kernel/head64.c | 19 -------------------
arch/x86_64/kernel/setup.c | 4 ++--
2 files changed, 2 insertions(+), 21 deletions(-)
Index: linux/arch/x86_64/kernel/head64.c
===================================================================
--- linux.orig/arch/x86_64/kernel/head64.c
+++ linux/arch/x86_64/kernel/head64.c
@@ -56,24 +56,6 @@ static void __init copy_bootdata(char *r
printk("Bootdata ok (command line is %s)\n", saved_command_line);
}
-static void __init setup_boot_cpu_data(void)
-{
- unsigned int dummy, eax;
-
- /* get vendor info */
- cpuid(0, (unsigned int *)&boot_cpu_data.cpuid_level,
- (unsigned int *)&boot_cpu_data.x86_vendor_id[0],
- (unsigned int *)&boot_cpu_data.x86_vendor_id[8],
- (unsigned int *)&boot_cpu_data.x86_vendor_id[4]);
-
- /* get cpu type */
- cpuid(1, &eax, &dummy, &dummy,
- (unsigned int *) &boot_cpu_data.x86_capability);
- boot_cpu_data.x86 = (eax >> 8) & 0xf;
- boot_cpu_data.x86_model = (eax >> 4) & 0xf;
- boot_cpu_data.x86_mask = eax & 0xf;
-}
-
void __init x86_64_start_kernel(char * real_mode_data)
{
char *s;
@@ -117,6 +99,5 @@ void __init x86_64_start_kernel(char * r
if (__pa_symbol(&_end) >= KERNEL_TEXT_SIZE)
panic("Kernel too big for kernel mapping\n");
- setup_boot_cpu_data();
start_kernel();
}
Index: linux/arch/x86_64/kernel/setup.c
===================================================================
--- linux.orig/arch/x86_64/kernel/setup.c
+++ linux/arch/x86_64/kernel/setup.c
@@ -547,10 +547,10 @@ void __init setup_arch(char **cmdline_p)
data_resource.start = virt_to_phys(&_etext);
data_resource.end = virt_to_phys(&_edata)-1;
- parse_cmdline_early(cmdline_p);
-
early_identify_cpu(&boot_cpu_data);
+ parse_cmdline_early(cmdline_p);
+
/*
* partially used pages are not usable - thus
* we are rounding upwards:
^ permalink raw reply [flat|nested] 199+ messages in thread* [PATCH for review] [89/145] x86_64: Allow early_param and identical __setup to exist
[not found] <20060810 935.775038000@suse.de>
` (87 preceding siblings ...)
2006-08-10 19:36 ` [PATCH for review] [88/145] x86_64: Use early CPU identify before early command line parsing Andi Kleen
@ 2006-08-10 19:36 ` Andi Kleen
2006-08-16 16:25 ` Len Brown
2006-08-10 19:36 ` [PATCH for review] [90/145] x86_64: Replace i386 open-coded cmdline parsing with Andi Kleen
` (56 subsequent siblings)
145 siblings, 1 reply; 199+ messages in thread
From: Andi Kleen @ 2006-08-10 19:36 UTC (permalink / raw)
r
From: Rusty Russell <rusty@rustcorp.com.au>
We currently assume that boot parameters which are handled by
early_param() will not overlap boot parameters handled by __setup: if
they do, behaviour is dependent on link order, usually meaning __setup
will not get called.
ACPI wants to use early_param("pci"), and pci uses __setup("pci="), so
we modify the core to let them coexist: "pci=noacpi" will now get
passed to both.
Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
Signed-off-by: Andi Kleen <ak@suse.de>
---
init/main.c | 12 ++++++++----
1 files changed, 8 insertions(+), 4 deletions(-)
Index: linux/init/main.c
===================================================================
--- linux.orig/init/main.c
+++ linux/init/main.c
@@ -162,16 +162,19 @@ extern struct obs_kernel_param __setup_s
static int __init obsolete_checksetup(char *line)
{
struct obs_kernel_param *p;
+ int had_early_param = 0;
p = __setup_start;
do {
int n = strlen(p->str);
if (!strncmp(line, p->str, n)) {
if (p->early) {
- /* Already done in parse_early_param? (Needs
- * exact match on param part) */
+ /* Already done in parse_early_param?
+ * (Needs exact match on param part).
+ * Keep iterating, as we can have early
+ * params and __setups of same names 8( */
if (line[n] == '\0' || line[n] == '=')
- return 1;
+ had_early_param = 1;
} else if (!p->setup_func) {
printk(KERN_WARNING "Parameter %s is obsolete,"
" ignored\n", p->str);
@@ -181,7 +184,8 @@ static int __init obsolete_checksetup(ch
}
p++;
} while (p < __setup_end);
- return 0;
+
+ return had_early_param;
}
/*
^ permalink raw reply [flat|nested] 199+ messages in thread* Re: [PATCH for review] [89/145] x86_64: Allow early_param and identical __setup to exist
2006-08-10 19:36 ` [PATCH for review] [89/145] x86_64: Allow early_param and identical __setup to exist Andi Kleen
@ 2006-08-16 16:25 ` Len Brown
0 siblings, 0 replies; 199+ messages in thread
From: Len Brown @ 2006-08-16 16:25 UTC (permalink / raw)
To: Andi Kleen, Rusty Russell, Greg KH; +Cc: linux-kernel
On Thursday 10 August 2006 15:36, Andi Kleen wrote:
>
> From: Rusty Russell <rusty@rustcorp.com.au>
> We currently assume that boot parameters which are handled by
> early_param() will not overlap boot parameters handled by __setup: if
> they do, behaviour is dependent on link order, usually meaning __setup
> will not get called.
>
> ACPI wants to use early_param("pci"), and pci uses __setup("pci="), so
> we modify the core to let them coexist: "pci=noacpi" will now get
> passed to both.
For what it is worth....
"pci=noacpi" is the same as "acpi=noirq", except in addition it also sets acpi_pci_disabled.
For some time, it seems that "acpi=noirq" has been sufficient where "pci=noacpi" was being used.
Thopugh we used to have some BIOS quirks that would fool ACPI root bus initialization,
I think that we are probably past those.
So I'll venture that today any uses of "pci=noacpi" could likely be down-graded
to "acpi=noirq", and "pci=noacpi" could be deleted --- though it seems that people
tend to get very upset when cmdline options are removed...
thanks,
-Len
> Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
> Signed-off-by: Andi Kleen <ak@suse.de>
>
> ---
> init/main.c | 12 ++++++++----
> 1 files changed, 8 insertions(+), 4 deletions(-)
>
> Index: linux/init/main.c
> ===================================================================
> --- linux.orig/init/main.c
> +++ linux/init/main.c
> @@ -162,16 +162,19 @@ extern struct obs_kernel_param __setup_s
> static int __init obsolete_checksetup(char *line)
> {
> struct obs_kernel_param *p;
> + int had_early_param = 0;
>
> p = __setup_start;
> do {
> int n = strlen(p->str);
> if (!strncmp(line, p->str, n)) {
> if (p->early) {
> - /* Already done in parse_early_param? (Needs
> - * exact match on param part) */
> + /* Already done in parse_early_param?
> + * (Needs exact match on param part).
> + * Keep iterating, as we can have early
> + * params and __setups of same names 8( */
> if (line[n] == '\0' || line[n] == '=')
> - return 1;
> + had_early_param = 1;
> } else if (!p->setup_func) {
> printk(KERN_WARNING "Parameter %s is obsolete,"
> " ignored\n", p->str);
> @@ -181,7 +184,8 @@ static int __init obsolete_checksetup(ch
> }
> p++;
> } while (p < __setup_end);
> - return 0;
> +
> + return had_early_param;
> }
>
> /*
> -
> To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at http://vger.kernel.org/majordomo-info.html
> Please read the FAQ at http://www.tux.org/lkml/
>
^ permalink raw reply [flat|nested] 199+ messages in thread
* [PATCH for review] [90/145] x86_64: Replace i386 open-coded cmdline parsing with
[not found] <20060810 935.775038000@suse.de>
` (88 preceding siblings ...)
2006-08-10 19:36 ` [PATCH for review] [89/145] x86_64: Allow early_param and identical __setup to exist Andi Kleen
@ 2006-08-10 19:36 ` Andi Kleen
2006-08-10 19:36 ` [PATCH for review] [91/145] x86_64: Convert x86-64 to early param Andi Kleen
` (55 subsequent siblings)
145 siblings, 0 replies; 199+ messages in thread
From: Andi Kleen @ 2006-08-10 19:36 UTC (permalink / raw)
r
From: Rusty Russell <rusty@rustcorp.com.au>
This patch replaces the open-coded early commandline parsing
throughout the i386 boot code with the generic mechanism (already used
by ppc, powerpc, ia64 and s390). The code was inconsistent with
whether it deletes the option from the cmdline or not, meaning some of
these will get passed through the environment into init.
This transformation is mainly mechanical, but there are some notable
parts:
1) Grammar: s/linux never set's it up/linux never sets it up/
2) Remove hacked-in earlyprintk= option scanning. When someone
actually implements CONFIG_EARLY_PRINTK, then they can use
early_param().
[AK: actually it is implemented, but I'm adding the early_param it in the next
x86-64 patch]
3) Move declaration of generic_apic_probe() from setup.c into asm/apic.h
4) Various parameters now moved into their appropriate files (thanks Andi).
5) All parse functions which examine arg need to check for NULL,
except one where it has subtle humor value.
AK: readded acpi_sci handling which was completely dropped
AK: moved some more variables into acpi/boot.c
Cc: len.brown@intel.com
Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
Signed-off-by: Andi Kleen <ak@suse.de>
---
arch/i386/kernel/acpi/boot.c | 76 ++++++++
arch/i386/kernel/apic.c | 15 +
arch/i386/kernel/io_apic.c | 24 ++
arch/i386/kernel/machine_kexec.c | 23 ++
arch/i386/kernel/setup.c | 350 +++++++++++++--------------------------
arch/i386/kernel/smpboot.c | 13 +
arch/i386/mach-generic/probe.c | 58 +++---
arch/i386/mm/init.c | 18 +-
include/asm-i386/acpi.h | 14 -
include/asm-i386/apic.h | 4
include/asm-i386/io_apic.h | 11 +
include/asm-i386/pgtable.h | 2
12 files changed, 319 insertions(+), 289 deletions(-)
Index: linux/arch/i386/kernel/acpi/boot.c
===================================================================
--- linux.orig/arch/i386/kernel/acpi/boot.c
+++ linux/arch/i386/kernel/acpi/boot.c
@@ -36,6 +36,8 @@
#include <asm/io.h>
#include <asm/mpspec.h>
+int __initdata acpi_force = 0;
+
#ifdef CONFIG_X86_64
extern void __init clustered_apic_check(void);
@@ -860,8 +862,6 @@ static void __init acpi_process_madt(voi
return;
}
-extern int acpi_force;
-
#ifdef __i386__
static int __init disable_acpi_irq(struct dmi_system_id *d)
@@ -1163,3 +1163,75 @@ int __init acpi_boot_init(void)
return 0;
}
+
+static int __init parse_acpi(char *arg)
+{
+ if (!arg)
+ return -EINVAL;
+
+ /* "acpi=off" disables both ACPI table parsing and interpreter */
+ if (strcmp(arg, "off") == 0) {
+ disable_acpi();
+ }
+ /* acpi=force to over-ride black-list */
+ else if (strcmp(arg, "force") == 0) {
+ acpi_force = 1;
+ acpi_ht = 1;
+ acpi_disabled = 0;
+ }
+ /* acpi=strict disables out-of-spec workarounds */
+ else if (strcmp(arg, "strict") == 0) {
+ acpi_strict = 1;
+ }
+ /* Limit ACPI just to boot-time to enable HT */
+ else if (strcmp(arg, "ht") == 0) {
+ if (!acpi_force)
+ disable_acpi();
+ acpi_ht = 1;
+ }
+ /* "acpi=noirq" disables ACPI interrupt routing */
+ else if (strcmp(arg, "noirq") == 0) {
+ acpi_noirq_set();
+ } else {
+ /* Core will printk when we return error. */
+ return -EINVAL;
+ }
+ return 0;
+}
+early_param("acpi", parse_acpi);
+
+/* FIXME: Using pci= for an ACPI parameter is a travesty. */
+static int __init parse_pci(char *arg)
+{
+ if (arg && strcmp(arg, "noacpi") == 0)
+ acpi_disable_pci();
+ return 0;
+}
+early_param("pci", parse_pci);
+
+#ifdef CONFIG_X86_IO_APIC
+static int __init parse_acpi_skip_timer_override(char *arg)
+{
+ acpi_skip_timer_override = 1;
+ return 0;
+}
+early_param("acpi_skip_timer_override", parse_acpi_skip_timer_override);
+#endif /* CONFIG_X86_IO_APIC */
+
+static int __init setup_acpi_sci(char *s)
+{
+ if (!s)
+ return -EINVAL;
+ if (!strcmp(s, "edge"))
+ acpi_sci_flags.trigger = 1;
+ else if (!strcmp(s, "level"))
+ acpi_sci_flags.trigger = 3;
+ else if (!strcmp(s, "high"))
+ acpi_sci_flags.polarity = 1;
+ else if (!strcmp(s, "low"))
+ acpi_sci_flags.polarity = 3;
+ else
+ return -EINVAL;
+ return 0;
+}
+early_param("acpi_sci", setup_acpi_sci);
Index: linux/arch/i386/kernel/apic.c
===================================================================
--- linux.orig/arch/i386/kernel/apic.c
+++ linux/arch/i386/kernel/apic.c
@@ -1372,3 +1372,18 @@ int __init APIC_init_uniprocessor (void)
return 0;
}
+
+static int __init parse_lapic(char *arg)
+{
+ lapic_enable();
+ return 0;
+}
+early_param("lapic", parse_lapic);
+
+static int __init parse_nolapic(char *arg)
+{
+ lapic_disable();
+ return 0;
+}
+early_param("nolapic", parse_nolapic);
+
Index: linux/arch/i386/kernel/io_apic.c
===================================================================
--- linux.orig/arch/i386/kernel/io_apic.c
+++ linux/arch/i386/kernel/io_apic.c
@@ -66,7 +66,7 @@ int sis_apic_bug = -1;
*/
int nr_ioapic_registers[MAX_IO_APICS];
-int disable_timer_pin_1 __initdata;
+static int disable_timer_pin_1 __initdata;
/*
* Rough estimation of how many shared IRQs there are, can
@@ -2691,3 +2691,25 @@ int io_apic_set_pci_routing (int ioapic,
}
#endif /* CONFIG_ACPI */
+
+static int __init parse_disable_timer_pin_1(char *arg)
+{
+ disable_timer_pin_1 = 1;
+ return 0;
+}
+early_param("disable_timer_pin_1", parse_disable_timer_pin_1);
+
+static int __init parse_enable_timer_pin_1(char *arg)
+{
+ disable_timer_pin_1 = -1;
+ return 0;
+}
+early_param("enable_timer_pin_1", parse_enable_timer_pin_1);
+
+static int __init parse_noapic(char *arg)
+{
+ /* disable IO-APIC */
+ disable_ioapic_setup();
+ return 0;
+}
+early_param("noapic", parse_noapic);
Index: linux/arch/i386/kernel/machine_kexec.c
===================================================================
--- linux.orig/arch/i386/kernel/machine_kexec.c
+++ linux/arch/i386/kernel/machine_kexec.c
@@ -9,6 +9,7 @@
#include <linux/mm.h>
#include <linux/kexec.h>
#include <linux/delay.h>
+#include <linux/init.h>
#include <asm/pgtable.h>
#include <asm/pgalloc.h>
#include <asm/tlbflush.h>
@@ -209,3 +210,25 @@ NORET_TYPE void machine_kexec(struct kim
rnk = (relocate_new_kernel_t) reboot_code_buffer;
(*rnk)(page_list, reboot_code_buffer, image->start, cpu_has_pae);
}
+
+/* crashkernel=size@addr specifies the location to reserve for
+ * a crash kernel. By reserving this memory we guarantee
+ * that linux never sets it up as a DMA target.
+ * Useful for holding code to do something appropriate
+ * after a kernel panic.
+ */
+static int __init parse_crashkernel(char *arg)
+{
+ unsigned long size, base;
+ size = memparse(arg, &arg);
+ if (*arg == '@') {
+ base = memparse(arg+1, &arg);
+ /* FIXME: Do I want a sanity check
+ * to validate the memory range?
+ */
+ crashk_res.start = base;
+ crashk_res.end = base + size - 1;
+ }
+ return 0;
+}
+early_param("crashkernel", parse_crashkernel);
Index: linux/arch/i386/kernel/setup.c
===================================================================
--- linux.orig/arch/i386/kernel/setup.c
+++ linux/arch/i386/kernel/setup.c
@@ -96,11 +96,6 @@ unsigned long mmu_cr4_features;
#endif
EXPORT_SYMBOL(acpi_disabled);
-#ifdef CONFIG_ACPI
-int __initdata acpi_force = 0;
-extern acpi_interrupt_flags acpi_sci_flags;
-#endif
-
/* for MCA, but anyone else can use it if they want */
unsigned int machine_id;
#ifdef CONFIG_MCA
@@ -148,7 +143,6 @@ EXPORT_SYMBOL(ist_info);
struct e820map e820;
extern void early_cpu_init(void);
-extern void generic_apic_probe(char *);
extern int root_mountflags;
unsigned long saved_videomode;
@@ -700,238 +694,132 @@ static inline void copy_edd(void)
}
#endif
-static void __init parse_cmdline_early (char ** cmdline_p)
+static int __initdata user_defined_memmap = 0;
+
+/*
+ * "mem=nopentium" disables the 4MB page tables.
+ * "mem=XXX[kKmM]" defines a memory region from HIGH_MEM
+ * to <mem>, overriding the bios size.
+ * "memmap=XXX[KkmM]@XXX[KkmM]" defines a memory region from
+ * <start> to <start>+<mem>, overriding the bios size.
+ *
+ * HPA tells me bootloaders need to parse mem=, so no new
+ * option should be mem= [also see Documentation/i386/boot.txt]
+ */
+static int __init parse_mem(char *arg)
{
- char c = ' ', *to = command_line, *from = saved_command_line;
- int len = 0;
- int userdef = 0;
-
- /* Save unparsed command line copy for /proc/cmdline */
- saved_command_line[COMMAND_LINE_SIZE-1] = '\0';
-
- for (;;) {
- if (c != ' ')
- goto next_char;
- /*
- * "mem=nopentium" disables the 4MB page tables.
- * "mem=XXX[kKmM]" defines a memory region from HIGH_MEM
- * to <mem>, overriding the bios size.
- * "memmap=XXX[KkmM]@XXX[KkmM]" defines a memory region from
- * <start> to <start>+<mem>, overriding the bios size.
- *
- * HPA tells me bootloaders need to parse mem=, so no new
- * option should be mem= [also see Documentation/i386/boot.txt]
- */
- if (!memcmp(from, "mem=", 4)) {
- if (to != command_line)
- to--;
- if (!memcmp(from+4, "nopentium", 9)) {
- from += 9+4;
- clear_bit(X86_FEATURE_PSE, boot_cpu_data.x86_capability);
- disable_pse = 1;
- } else {
- /* If the user specifies memory size, we
- * limit the BIOS-provided memory map to
- * that size. exactmap can be used to specify
- * the exact map. mem=number can be used to
- * trim the existing memory map.
- */
- unsigned long long mem_size;
-
- mem_size = memparse(from+4, &from);
- limit_regions(mem_size);
- userdef=1;
- }
- }
+ if (!arg)
+ return -EINVAL;
- else if (!memcmp(from, "memmap=", 7)) {
- if (to != command_line)
- to--;
- if (!memcmp(from+7, "exactmap", 8)) {
-#ifdef CONFIG_CRASH_DUMP
- /* If we are doing a crash dump, we
- * still need to know the real mem
- * size before original memory map is
- * reset.
- */
- find_max_pfn();
- saved_max_pfn = max_pfn;
-#endif
- from += 8+7;
- e820.nr_map = 0;
- userdef = 1;
- } else {
- /* If the user specifies memory size, we
- * limit the BIOS-provided memory map to
- * that size. exactmap can be used to specify
- * the exact map. mem=number can be used to
- * trim the existing memory map.
- */
- unsigned long long start_at, mem_size;
+ if (strcmp(arg, "nopentium") == 0) {
+ clear_bit(X86_FEATURE_PSE, boot_cpu_data.x86_capability);
+ disable_pse = 1;
+ } else {
+ /* If the user specifies memory size, we
+ * limit the BIOS-provided memory map to
+ * that size. exactmap can be used to specify
+ * the exact map. mem=number can be used to
+ * trim the existing memory map.
+ */
+ unsigned long long mem_size;
- mem_size = memparse(from+7, &from);
- if (*from == '@') {
- start_at = memparse(from+1, &from);
- add_memory_region(start_at, mem_size, E820_RAM);
- } else if (*from == '#') {
- start_at = memparse(from+1, &from);
- add_memory_region(start_at, mem_size, E820_ACPI);
- } else if (*from == '$') {
- start_at = memparse(from+1, &from);
- add_memory_region(start_at, mem_size, E820_RESERVED);
- } else {
- limit_regions(mem_size);
- userdef=1;
- }
- }
- }
-
- else if (!memcmp(from, "noexec=", 7))
- noexec_setup(from + 7);
+ mem_size = memparse(arg, &arg);
+ limit_regions(mem_size);
+ user_defined_memmap = 1;
+ }
+ return 0;
+}
+early_param("mem", parse_mem);
+static int __init parse_memmap(char *arg)
+{
+ if (!arg)
+ return -EINVAL;
-#ifdef CONFIG_X86_SMP
- /*
- * If the BIOS enumerates physical processors before logical,
- * maxcpus=N at enumeration-time can be used to disable HT.
+ if (strcmp(arg, "exactmap") == 0) {
+#ifdef CONFIG_CRASH_DUMP
+ /* If we are doing a crash dump, we
+ * still need to know the real mem
+ * size before original memory map is
+ * reset.
*/
- else if (!memcmp(from, "maxcpus=", 8)) {
- extern unsigned int maxcpus;
-
- maxcpus = simple_strtoul(from + 8, NULL, 0);
- }
+ find_max_pfn();
+ saved_max_pfn = max_pfn;
#endif
+ e820.nr_map = 0;
+ user_defined_memmap = 1;
+ } else {
+ /* If the user specifies memory size, we
+ * limit the BIOS-provided memory map to
+ * that size. exactmap can be used to specify
+ * the exact map. mem=number can be used to
+ * trim the existing memory map.
+ */
+ unsigned long long start_at, mem_size;
-#ifdef CONFIG_ACPI
- /* "acpi=off" disables both ACPI table parsing and interpreter */
- else if (!memcmp(from, "acpi=off", 8)) {
- disable_acpi();
- }
-
- /* acpi=force to over-ride black-list */
- else if (!memcmp(from, "acpi=force", 10)) {
- acpi_force = 1;
- acpi_ht = 1;
- acpi_disabled = 0;
- }
-
- /* acpi=strict disables out-of-spec workarounds */
- else if (!memcmp(from, "acpi=strict", 11)) {
- acpi_strict = 1;
- }
-
- /* Limit ACPI just to boot-time to enable HT */
- else if (!memcmp(from, "acpi=ht", 7)) {
- if (!acpi_force)
- disable_acpi();
- acpi_ht = 1;
- }
-
- /* "pci=noacpi" disable ACPI IRQ routing and PCI scan */
- else if (!memcmp(from, "pci=noacpi", 10)) {
- acpi_disable_pci();
- }
- /* "acpi=noirq" disables ACPI interrupt routing */
- else if (!memcmp(from, "acpi=noirq", 10)) {
- acpi_noirq_set();
+ mem_size = memparse(arg, &arg);
+ if (*arg == '@') {
+ start_at = memparse(arg+1, &arg);
+ add_memory_region(start_at, mem_size, E820_RAM);
+ } else if (*arg == '#') {
+ start_at = memparse(arg+1, &arg);
+ add_memory_region(start_at, mem_size, E820_ACPI);
+ } else if (*arg == '$') {
+ start_at = memparse(arg+1, &arg);
+ add_memory_region(start_at, mem_size, E820_RESERVED);
+ } else {
+ limit_regions(mem_size);
+ user_defined_memmap = 1;
}
+ }
+ return 0;
+}
+early_param("memmap", parse_memmap);
- else if (!memcmp(from, "acpi_sci=edge", 13))
- acpi_sci_flags.trigger = 1;
-
- else if (!memcmp(from, "acpi_sci=level", 14))
- acpi_sci_flags.trigger = 3;
-
- else if (!memcmp(from, "acpi_sci=high", 13))
- acpi_sci_flags.polarity = 1;
-
- else if (!memcmp(from, "acpi_sci=low", 12))
- acpi_sci_flags.polarity = 3;
-
-#ifdef CONFIG_X86_IO_APIC
- else if (!memcmp(from, "acpi_skip_timer_override", 24))
- acpi_skip_timer_override = 1;
+#ifdef CONFIG_PROC_VMCORE
+/* elfcorehdr= specifies the location of elf core header
+ * stored by the crashed kernel.
+ */
+static int __init parse_elfcorehdr(char *arg)
+{
+ if (!arg)
+ return -EINVAL;
- if (!memcmp(from, "disable_timer_pin_1", 19))
- disable_timer_pin_1 = 1;
- if (!memcmp(from, "enable_timer_pin_1", 18))
- disable_timer_pin_1 = -1;
-
- /* disable IO-APIC */
- else if (!memcmp(from, "noapic", 6))
- disable_ioapic_setup();
-#endif /* CONFIG_X86_IO_APIC */
-#endif /* CONFIG_ACPI */
+ elfcorehdr_addr = memparse(arg, &arg);
+ return 0;
+}
+early_param("elfcorehdr", parse_elfcorehdr);
+#endif /* CONFIG_PROC_VMCORE */
-#ifdef CONFIG_X86_LOCAL_APIC
- /* enable local APIC */
- else if (!memcmp(from, "lapic", 5))
- lapic_enable();
-
- /* disable local APIC */
- else if (!memcmp(from, "nolapic", 6))
- lapic_disable();
-#endif /* CONFIG_X86_LOCAL_APIC */
+/*
+ * highmem=size forces highmem to be exactly 'size' bytes.
+ * This works even on boxes that have no highmem otherwise.
+ * This also works to reduce highmem size on bigger boxes.
+ */
+static int __init parse_highmem(char *arg)
+{
+ if (!arg)
+ return -EINVAL;
-#ifdef CONFIG_KEXEC
- /* crashkernel=size@addr specifies the location to reserve for
- * a crash kernel. By reserving this memory we guarantee
- * that linux never set's it up as a DMA target.
- * Useful for holding code to do something appropriate
- * after a kernel panic.
- */
- else if (!memcmp(from, "crashkernel=", 12)) {
- unsigned long size, base;
- size = memparse(from+12, &from);
- if (*from == '@') {
- base = memparse(from+1, &from);
- /* FIXME: Do I want a sanity check
- * to validate the memory range?
- */
- crashk_res.start = base;
- crashk_res.end = base + size - 1;
- }
- }
-#endif
-#ifdef CONFIG_PROC_VMCORE
- /* elfcorehdr= specifies the location of elf core header
- * stored by the crashed kernel.
- */
- else if (!memcmp(from, "elfcorehdr=", 11))
- elfcorehdr_addr = memparse(from+11, &from);
-#endif
+ highmem_pages = memparse(arg, &arg) >> PAGE_SHIFT;
+ return 0;
+}
+early_param("highmem", parse_highmem);
- /*
- * highmem=size forces highmem to be exactly 'size' bytes.
- * This works even on boxes that have no highmem otherwise.
- * This also works to reduce highmem size on bigger boxes.
- */
- else if (!memcmp(from, "highmem=", 8))
- highmem_pages = memparse(from+8, &from) >> PAGE_SHIFT;
-
- /*
- * vmalloc=size forces the vmalloc area to be exactly 'size'
- * bytes. This can be used to increase (or decrease) the
- * vmalloc area - the default is 128m.
- */
- else if (!memcmp(from, "vmalloc=", 8))
- __VMALLOC_RESERVE = memparse(from+8, &from);
+/*
+ * vmalloc=size forces the vmalloc area to be exactly 'size'
+ * bytes. This can be used to increase (or decrease) the
+ * vmalloc area - the default is 128m.
+ */
+static int __init parse_vmalloc(char *arg)
+{
+ if (!arg)
+ return -EINVAL;
- next_char:
- c = *(from++);
- if (!c)
- break;
- if (COMMAND_LINE_SIZE <= ++len)
- break;
- *(to++) = c;
- }
- *to = '\0';
- *cmdline_p = command_line;
- if (userdef) {
- printk(KERN_INFO "user-defined physical RAM map:\n");
- print_memory_map("user");
- }
+ __VMALLOC_RESERVE = memparse(arg, &arg);
+ return 0;
}
+early_param("vmalloc", parse_vmalloc);
/*
* Callback for efi_memory_walk.
@@ -1507,17 +1395,15 @@ void __init setup_arch(char **cmdline_p)
data_resource.start = virt_to_phys(_etext);
data_resource.end = virt_to_phys(_edata)-1;
- parse_cmdline_early(cmdline_p);
+ parse_early_param();
-#ifdef CONFIG_EARLY_PRINTK
- {
- char *s = strstr(*cmdline_p, "earlyprintk=");
- if (s) {
- setup_early_printk(strchr(s, '=') + 1);
- printk("early console enabled\n");
- }
+ if (user_defined_memmap) {
+ printk(KERN_INFO "user-defined physical RAM map:\n");
+ print_memory_map("user");
}
-#endif
+
+ strlcpy(command_line, saved_command_line, COMMAND_LINE_SIZE);
+ *cmdline_p = command_line;
max_low_pfn = setup_memory();
@@ -1546,7 +1432,7 @@ void __init setup_arch(char **cmdline_p)
dmi_scan_machine();
#ifdef CONFIG_X86_GENERICARCH
- generic_apic_probe(*cmdline_p);
+ generic_apic_probe();
#endif
if (efi_enabled)
efi_map_memmap();
Index: linux/arch/i386/kernel/smpboot.c
===================================================================
--- linux.orig/arch/i386/kernel/smpboot.c
+++ linux/arch/i386/kernel/smpboot.c
@@ -1487,3 +1487,16 @@ void __init smp_intr_init(void)
/* IPI for generic function call */
set_intr_gate(CALL_FUNCTION_VECTOR, call_function_interrupt);
}
+
+/*
+ * If the BIOS enumerates physical processors before logical,
+ * maxcpus=N at enumeration-time can be used to disable HT.
+ */
+static int __init parse_maxcpus(char *arg)
+{
+ extern unsigned int maxcpus;
+
+ maxcpus = simple_strtoul(arg, NULL, 0);
+ return 0;
+}
+early_param("maxcpus", parse_maxcpus);
Index: linux/arch/i386/mach-generic/probe.c
===================================================================
--- linux.orig/arch/i386/mach-generic/probe.c
+++ linux/arch/i386/mach-generic/probe.c
@@ -9,6 +9,7 @@
#include <linux/kernel.h>
#include <linux/ctype.h>
#include <linux/init.h>
+#include <linux/errno.h>
#include <asm/fixmap.h>
#include <asm/mpspec.h>
#include <asm/apicdef.h>
@@ -29,7 +30,24 @@ struct genapic *apic_probe[] __initdata
NULL,
};
-static int cmdline_apic;
+static int cmdline_apic __initdata;
+static int __init parse_apic(char *arg)
+{
+ int i;
+
+ if (!arg)
+ return -EINVAL;
+
+ for (i = 0; apic_probe[i]; i++) {
+ if (!strcmp(apic_probe[i]->name, arg)) {
+ genapic = apic_probe[i];
+ cmdline_apic = 1;
+ return 0;
+ }
+ }
+ return -ENOENT;
+}
+early_param("apic", parse_apic);
void __init generic_bigsmp_probe(void)
{
@@ -48,40 +66,20 @@ void __init generic_bigsmp_probe(void)
}
}
-void __init generic_apic_probe(char *command_line)
+void __init generic_apic_probe(void)
{
- char *s;
- int i;
- int changed = 0;
-
- s = strstr(command_line, "apic=");
- if (s && (s == command_line || isspace(s[-1]))) {
- char *p = strchr(s, ' '), old;
- if (!p)
- p = strchr(s, '\0');
- old = *p;
- *p = 0;
- for (i = 0; !changed && apic_probe[i]; i++) {
- if (!strcmp(apic_probe[i]->name, s+5)) {
- changed = 1;
+ if (!cmdline_apic) {
+ int i;
+ for (i = 0; apic_probe[i]; i++) {
+ if (apic_probe[i]->probe()) {
genapic = apic_probe[i];
+ break;
}
}
- if (!changed)
- printk(KERN_ERR "Unknown genapic `%s' specified.\n", s);
- *p = old;
- cmdline_apic = changed;
- }
- for (i = 0; !changed && apic_probe[i]; i++) {
- if (apic_probe[i]->probe()) {
- changed = 1;
- genapic = apic_probe[i];
- }
+ /* Not visible without early console */
+ if (!apic_probe[i])
+ panic("Didn't find an APIC driver");
}
- /* Not visible without early console */
- if (!changed)
- panic("Didn't find an APIC driver");
-
printk(KERN_INFO "Using APIC driver %s\n", genapic->name);
}
Index: linux/arch/i386/mm/init.c
===================================================================
--- linux.orig/arch/i386/mm/init.c
+++ linux/arch/i386/mm/init.c
@@ -435,16 +435,22 @@ u64 __supported_pte_mask __read_mostly =
* on Enable
* off Disable
*/
-void __init noexec_setup(const char *str)
+static int __init noexec_setup(char *str)
{
- if (!strncmp(str, "on",2) && cpu_has_nx) {
- __supported_pte_mask |= _PAGE_NX;
- disable_nx = 0;
- } else if (!strncmp(str,"off",3)) {
+ if (!str || !strcmp(str, "on")) {
+ if (cpu_has_nx) {
+ __supported_pte_mask |= _PAGE_NX;
+ disable_nx = 0;
+ }
+ } else if (!strcmp(str,"off")) {
disable_nx = 1;
__supported_pte_mask &= ~_PAGE_NX;
- }
+ } else
+ return -EINVAL;
+
+ return 0;
}
+early_param("noexec", noexec_setup);
int nx_enabled = 0;
#ifdef CONFIG_X86_PAE
Index: linux/include/asm-i386/acpi.h
===================================================================
--- linux.orig/include/asm-i386/acpi.h
+++ linux/include/asm-i386/acpi.h
@@ -131,21 +131,7 @@ static inline void disable_acpi(void)
extern int acpi_gsi_to_irq(u32 gsi, unsigned int *irq);
#ifdef CONFIG_X86_IO_APIC
-extern int skip_ioapic_setup;
extern int acpi_skip_timer_override;
-
-static inline void disable_ioapic_setup(void)
-{
- skip_ioapic_setup = 1;
-}
-
-static inline int ioapic_setup_disabled(void)
-{
- return skip_ioapic_setup;
-}
-
-#else
-static inline void disable_ioapic_setup(void) { }
#endif
static inline void acpi_noirq_set(void) { acpi_noirq = 1; }
Index: linux/include/asm-i386/apic.h
===================================================================
--- linux.orig/include/asm-i386/apic.h
+++ linux/include/asm-i386/apic.h
@@ -42,6 +42,8 @@ static inline void lapic_enable(void)
} while (0)
+extern void generic_apic_probe(void);
+
#ifdef CONFIG_X86_LOCAL_APIC
/*
@@ -117,8 +119,6 @@ extern void enable_APIC_timer(void);
extern void enable_NMI_through_LVT0 (void * dummy);
-extern int disable_timer_pin_1;
-
void smp_send_timer_broadcast_ipi(struct pt_regs *regs);
void switch_APIC_timer_to_ipi(void *cpumask);
void switch_ipi_to_APIC_timer(void *cpumask);
Index: linux/include/asm-i386/io_apic.h
===================================================================
--- linux.orig/include/asm-i386/io_apic.h
+++ linux/include/asm-i386/io_apic.h
@@ -188,6 +188,16 @@ static inline void io_apic_modify(unsign
/* 1 if "noapic" boot option passed */
extern int skip_ioapic_setup;
+static inline void disable_ioapic_setup(void)
+{
+ skip_ioapic_setup = 1;
+}
+
+static inline int ioapic_setup_disabled(void)
+{
+ return skip_ioapic_setup;
+}
+
/*
* If we use the IO-APIC for IRQ routing, disable automatic
* assignment of PCI IRQ's.
@@ -206,6 +216,7 @@ extern int (*ioapic_renumber_irq)(int io
#else /* !CONFIG_X86_IO_APIC */
#define io_apic_assign_pci_irqs 0
+static inline void disable_ioapic_setup(void) { }
#endif
extern int assign_irq_vector(int irq);
Index: linux/include/asm-i386/pgtable.h
===================================================================
--- linux.orig/include/asm-i386/pgtable.h
+++ linux/include/asm-i386/pgtable.h
@@ -391,8 +391,6 @@ extern pte_t *lookup_address(unsigned lo
static inline int set_kernel_exec(unsigned long vaddr, int enable) { return 0;}
#endif
-extern void noexec_setup(const char *str);
-
#if defined(CONFIG_HIGHPTE)
#define pte_offset_map(dir, address) \
((pte_t *)kmap_atomic(pmd_page(*(dir)),KM_PTE0) + pte_index(address))
^ permalink raw reply [flat|nested] 199+ messages in thread* [PATCH for review] [91/145] x86_64: Convert x86-64 to early param
[not found] <20060810 935.775038000@suse.de>
` (89 preceding siblings ...)
2006-08-10 19:36 ` [PATCH for review] [90/145] x86_64: Replace i386 open-coded cmdline parsing with Andi Kleen
@ 2006-08-10 19:36 ` Andi Kleen
2006-08-10 19:36 ` [PATCH for review] [92/145] x86_64: Remove need for early lockdep init Andi Kleen
` (54 subsequent siblings)
145 siblings, 0 replies; 199+ messages in thread
From: Andi Kleen @ 2006-08-10 19:36 UTC (permalink / raw)
r
Instead of hackish manual parsing
Requires earlier i386 patchkit, but also fixes i386 early_printk again.
I removed some obsolete really early parameters which didn't do anything useful.
Also made a few parameters that needed it early (mostly oops printing setup)
Also removed one panic check that wasn't visible without
early console anyways (the early console is now initialized after that
panic)
This cleans up a lot of code.
Signed-off-by: Andi Kleen <ak@suse.de>
---
arch/x86_64/kernel/apic.c | 34 +++---
arch/x86_64/kernel/e820.c | 53 ++++++++-
arch/x86_64/kernel/early_printk.c | 20 +--
arch/x86_64/kernel/head64.c | 15 --
arch/x86_64/kernel/io_apic.c | 15 +-
arch/x86_64/kernel/machine_kexec.c | 28 +++++
arch/x86_64/kernel/pci-dma.c | 7 -
arch/x86_64/kernel/setup.c | 197 +++----------------------------------
arch/x86_64/kernel/setup64.c | 8 -
arch/x86_64/kernel/smpboot.c | 6 -
arch/x86_64/kernel/traps.c | 28 +++--
arch/x86_64/mm/numa.c | 9 +
include/asm-x86_64/apic.h | 4
include/asm-x86_64/e820.h | 3
include/asm-x86_64/pgtable.h | 1
include/asm-x86_64/proto.h | 10 -
16 files changed, 162 insertions(+), 276 deletions(-)
Index: linux/arch/x86_64/kernel/early_printk.c
===================================================================
--- linux.orig/arch/x86_64/kernel/early_printk.c
+++ linux/arch/x86_64/kernel/early_printk.c
@@ -215,20 +215,16 @@ void early_printk(const char *fmt, ...)
static int __initdata keep_early;
-int __init setup_early_printk(char *opt)
+static int __init setup_early_printk(char *buf)
{
- char *space;
- char buf[256];
+ if (!buf)
+ return 0;
if (early_console_initialized)
- return 1;
-
- strlcpy(buf,opt,sizeof(buf));
- space = strchr(buf, ' ');
- if (space)
- *space = 0;
+ return 0;
+ early_console_initialized = 1;
- if (strstr(buf,"keep"))
+ if (!strcmp(buf,"keep"))
keep_early = 1;
if (!strncmp(buf, "serial", 6)) {
@@ -248,11 +244,12 @@ int __init setup_early_printk(char *opt)
early_console = &simnow_console;
keep_early = 1;
}
- early_console_initialized = 1;
register_console(early_console);
return 0;
}
+early_param("earlyprintk", setup_early_printk);
+
void __init disable_early_printk(void)
{
if (!early_console_initialized || !early_console)
@@ -266,4 +263,3 @@ void __init disable_early_printk(void)
}
}
-__setup("earlyprintk=", setup_early_printk);
Index: linux/arch/x86_64/kernel/head64.c
===================================================================
--- linux.orig/arch/x86_64/kernel/head64.c
+++ linux/arch/x86_64/kernel/head64.c
@@ -58,7 +58,6 @@ static void __init copy_bootdata(char *r
void __init x86_64_start_kernel(char * real_mode_data)
{
- char *s;
int i;
for (i = 0; i < 256; i++)
@@ -85,19 +84,5 @@ void __init x86_64_start_kernel(char * r
#ifdef CONFIG_SMP
cpu_set(0, cpu_online_map);
#endif
- s = strstr(saved_command_line, "earlyprintk=");
- if (s != NULL)
- setup_early_printk(strchr(s, '=') + 1);
-#ifdef CONFIG_NUMA
- s = strstr(saved_command_line, "numa=");
- if (s != NULL)
- numa_setup(s+5);
-#endif
- if (strstr(saved_command_line, "disableapic"))
- disable_apic = 1;
- /* You need early console to see that */
- if (__pa_symbol(&_end) >= KERNEL_TEXT_SIZE)
- panic("Kernel too big for kernel mapping\n");
-
start_kernel();
}
Index: linux/arch/x86_64/kernel/setup.c
===================================================================
--- linux.orig/arch/x86_64/kernel/setup.c
+++ linux/arch/x86_64/kernel/setup.c
@@ -76,11 +76,6 @@ unsigned long mmu_cr4_features;
int acpi_disabled;
EXPORT_SYMBOL(acpi_disabled);
-#ifdef CONFIG_ACPI
-extern int __initdata acpi_ht;
-extern acpi_interrupt_flags acpi_sci_flags;
-int __initdata acpi_force = 0;
-#endif
int acpi_numa __initdata;
@@ -276,183 +271,22 @@ static void __init probe_roms(void)
}
}
-/* Check for full argument with no trailing characters */
-static int fullarg(char *p, char *arg)
+#ifdef CONFIG_PROC_VMCORE
+/* elfcorehdr= specifies the location of elf core header
+ * stored by the crashed kernel. This option will be passed
+ * by kexec loader to the capture kernel.
+ */
+static int __init setup_elfcorehdr(char *arg)
{
- int l = strlen(arg);
- return !memcmp(p, arg, l) && (p[l] == 0 || isspace(p[l]));
+ char *end;
+ if (!arg)
+ return -EINVAL;
+ elfcorehdr_addr = memparse(arg, &end);
+ return end > arg ? 0 : -EINVAL;
}
-
-static __init void parse_cmdline_early (char ** cmdline_p)
-{
- char c = ' ', *to = command_line, *from = COMMAND_LINE;
- int len = 0;
- int userdef = 0;
-
- for (;;) {
- if (c != ' ')
- goto next_char;
-
-#ifdef CONFIG_SMP
- /*
- * If the BIOS enumerates physical processors before logical,
- * maxcpus=N at enumeration-time can be used to disable HT.
- */
- else if (!memcmp(from, "maxcpus=", 8)) {
- extern unsigned int maxcpus;
-
- maxcpus = simple_strtoul(from + 8, NULL, 0);
- }
-#endif
-#ifdef CONFIG_ACPI
- /* "acpi=off" disables both ACPI table parsing and interpreter init */
- if (fullarg(from,"acpi=off"))
- disable_acpi();
-
- if (fullarg(from, "acpi=force")) {
- /* add later when we do DMI horrors: */
- acpi_force = 1;
- acpi_disabled = 0;
- }
-
- /* acpi=ht just means: do ACPI MADT parsing
- at bootup, but don't enable the full ACPI interpreter */
- if (fullarg(from, "acpi=ht")) {
- if (!acpi_force)
- disable_acpi();
- acpi_ht = 1;
- }
- else if (fullarg(from, "pci=noacpi"))
- acpi_disable_pci();
- else if (fullarg(from, "acpi=noirq"))
- acpi_noirq_set();
-
- else if (fullarg(from, "acpi_sci=edge"))
- acpi_sci_flags.trigger = 1;
- else if (fullarg(from, "acpi_sci=level"))
- acpi_sci_flags.trigger = 3;
- else if (fullarg(from, "acpi_sci=high"))
- acpi_sci_flags.polarity = 1;
- else if (fullarg(from, "acpi_sci=low"))
- acpi_sci_flags.polarity = 3;
-
- /* acpi=strict disables out-of-spec workarounds */
- else if (fullarg(from, "acpi=strict")) {
- acpi_strict = 1;
- }
- else if (fullarg(from, "acpi_skip_timer_override"))
- acpi_skip_timer_override = 1;
+early_param("elfcorehdr", setup_elfcorehdr);
#endif
- if (fullarg(from, "disable_timer_pin_1"))
- disable_timer_pin_1 = 1;
- if (fullarg(from, "enable_timer_pin_1"))
- disable_timer_pin_1 = -1;
-
- if (fullarg(from, "nolapic") || fullarg(from, "disableapic")) {
- clear_bit(X86_FEATURE_APIC, boot_cpu_data.x86_capability);
- disable_apic = 1;
- }
-
- if (fullarg(from, "noapic"))
- skip_ioapic_setup = 1;
-
- if (fullarg(from,"apic")) {
- skip_ioapic_setup = 0;
- ioapic_force = 1;
- }
-
- if (!memcmp(from, "mem=", 4))
- parse_memopt(from+4, &from);
-
- if (!memcmp(from, "memmap=", 7)) {
- /* exactmap option is for used defined memory */
- if (!memcmp(from+7, "exactmap", 8)) {
-#ifdef CONFIG_CRASH_DUMP
- /* If we are doing a crash dump, we
- * still need to know the real mem
- * size before original memory map is
- * reset.
- */
- saved_max_pfn = e820_end_of_ram();
-#endif
- from += 8+7;
- end_pfn_map = 0;
- e820.nr_map = 0;
- userdef = 1;
- }
- else {
- parse_memmapopt(from+7, &from);
- userdef = 1;
- }
- }
-
-#ifdef CONFIG_NUMA
- if (!memcmp(from, "numa=", 5))
- numa_setup(from+5);
-#endif
-
- if (!memcmp(from,"iommu=",6)) {
- iommu_setup(from+6);
- }
-
- if (fullarg(from,"oops=panic"))
- panic_on_oops = 1;
-
- if (!memcmp(from, "noexec=", 7))
- nonx_setup(from + 7);
-
-#ifdef CONFIG_KEXEC
- /* crashkernel=size@addr specifies the location to reserve for
- * a crash kernel. By reserving this memory we guarantee
- * that linux never set's it up as a DMA target.
- * Useful for holding code to do something appropriate
- * after a kernel panic.
- */
- else if (!memcmp(from, "crashkernel=", 12)) {
- unsigned long size, base;
- size = memparse(from+12, &from);
- if (*from == '@') {
- base = memparse(from+1, &from);
- /* FIXME: Do I want a sanity check
- * to validate the memory range?
- */
- crashk_res.start = base;
- crashk_res.end = base + size - 1;
- }
- }
-#endif
-
-#ifdef CONFIG_PROC_VMCORE
- /* elfcorehdr= specifies the location of elf core header
- * stored by the crashed kernel. This option will be passed
- * by kexec loader to the capture kernel.
- */
- else if(!memcmp(from, "elfcorehdr=", 11))
- elfcorehdr_addr = memparse(from+11, &from);
-#endif
-
-#ifdef CONFIG_HOTPLUG_CPU
- else if (!memcmp(from, "additional_cpus=", 16))
- setup_additional_cpus(from+16);
-#endif
-
- next_char:
- c = *(from++);
- if (!c)
- break;
- if (COMMAND_LINE_SIZE <= ++len)
- break;
- *(to++) = c;
- }
- if (userdef) {
- printk(KERN_INFO "user-defined physical RAM map:\n");
- e820_print_map("user");
- }
- *to = '\0';
- *cmdline_p = command_line;
-}
-
#ifndef CONFIG_NUMA
static void __init
contig_initmem_init(unsigned long start_pfn, unsigned long end_pfn)
@@ -549,7 +383,12 @@ void __init setup_arch(char **cmdline_p)
early_identify_cpu(&boot_cpu_data);
- parse_cmdline_early(cmdline_p);
+ strlcpy(command_line, saved_command_line, COMMAND_LINE_SIZE);
+ *cmdline_p = command_line;
+
+ parse_early_param();
+
+ finish_e820_parsing();
/*
* partially used pages are not usable - thus
Index: linux/arch/x86_64/mm/numa.c
===================================================================
--- linux.orig/arch/x86_64/mm/numa.c
+++ linux/arch/x86_64/mm/numa.c
@@ -348,9 +348,10 @@ void __init paging_init(void)
}
}
-/* [numa=off] */
-__init int numa_setup(char *opt)
+static __init int numa_setup(char *opt)
{
+ if (!opt)
+ return -EINVAL;
if (!strncmp(opt,"off",3))
numa_off = 1;
#ifdef CONFIG_NUMA_EMU
@@ -366,9 +367,11 @@ __init int numa_setup(char *opt)
if (!strncmp(opt,"hotadd=", 7))
hotadd_percent = simple_strtoul(opt+7, NULL, 10);
#endif
- return 1;
+ return 0;
}
+early_param("numa", numa_setup);
+
/*
* Setup early cpu_to_node.
*
Index: linux/include/asm-x86_64/proto.h
===================================================================
--- linux.orig/include/asm-x86_64/proto.h
+++ linux/include/asm-x86_64/proto.h
@@ -53,9 +53,6 @@ extern int nohpet;
extern unsigned long vxtime_hz;
extern void time_init_gtod(void);
-extern int numa_setup(char *opt);
-
-extern int setup_early_printk(char *);
extern void early_printk(const char *fmt, ...) __attribute__((format(printf,1,2)));
extern void early_identify_cpu(struct cpuinfo_x86 *c);
@@ -104,13 +101,7 @@ extern void select_idle_routine(const st
extern unsigned long table_start, table_end;
extern int exception_trace;
-extern int using_apic_timer;
-extern int disable_apic;
extern unsigned cpu_khz;
-extern int ioapic_force;
-extern int skip_ioapic_setup;
-extern int acpi_ht;
-extern int acpi_disabled;
extern void no_iommu_init(void);
extern int force_iommu, no_iommu;
@@ -132,7 +123,6 @@ extern int fix_aperture;
extern int reboot_force;
extern int notsc_setup(char *);
-extern int setup_additional_cpus(char *);
extern void smp_local_timer_interrupt(struct pt_regs * regs);
Index: linux/arch/x86_64/kernel/apic.c
===================================================================
--- linux.orig/arch/x86_64/kernel/apic.c
+++ linux/arch/x86_64/kernel/apic.c
@@ -36,6 +36,7 @@
#include <asm/idle.h>
#include <asm/proto.h>
#include <asm/timex.h>
+#include <asm/apic.h>
int apic_verbosity;
int apic_runs_main_timer;
@@ -546,18 +547,24 @@ static void apic_pm_activate(void) { }
static int __init apic_set_verbosity(char *str)
{
+ if (str == NULL) {
+ skip_ioapic_setup = 0;
+ ioapic_force = 1;
+ return 0;
+ }
if (strcmp("debug", str) == 0)
apic_verbosity = APIC_DEBUG;
else if (strcmp("verbose", str) == 0)
apic_verbosity = APIC_VERBOSE;
- else
+ else {
printk(KERN_WARNING "APIC Verbosity level %s not recognised"
- " use apic=verbose or apic=debug", str);
+ " use apic=verbose or apic=debug\n", str);
+ return -EINVAL;
+ }
- return 1;
+ return 0;
}
-
-__setup("apic=", apic_set_verbosity);
+early_param("apic", apic_set_verbosity);
/*
* Detect and enable local APICs on non-SMP boards.
@@ -1078,14 +1085,17 @@ int __init APIC_init_uniprocessor (void)
static __init int setup_disableapic(char *str)
{
disable_apic = 1;
- return 1;
-}
+ clear_bit(X86_FEATURE_APIC, boot_cpu_data.x86_capability);
+ return 0;
+}
+early_param("disableapic", setup_disableapic);
+/* same as disableapic, for compatibility */
static __init int setup_nolapic(char *str)
{
- disable_apic = 1;
- return 1;
+ return setup_disableapic(str);
}
+early_param("nolapic", setup_nolapic);
static __init int setup_noapictimer(char *str)
{
@@ -1118,11 +1128,5 @@ static __init int setup_apicpmtimer(char
}
__setup("apicpmtimer", setup_apicpmtimer);
-/* dummy parsing: see setup.c */
-
-__setup("disableapic", setup_disableapic);
-__setup("nolapic", setup_nolapic); /* same as disableapic, for compatibility */
-
__setup("noapictimer", setup_noapictimer);
-/* no "lapic" flag - we only use the lapic when the BIOS tells us so. */
Index: linux/arch/x86_64/kernel/e820.c
===================================================================
--- linux.orig/arch/x86_64/kernel/e820.c
+++ linux/arch/x86_64/kernel/e820.c
@@ -592,31 +592,64 @@ void __init setup_memory_region(void)
e820_print_map(who);
}
-void __init parse_memopt(char *p, char **from)
-{
- end_user_pfn = memparse(p, from);
+static int __init parse_memopt(char *p)
+{
+ if (!p)
+ return -EINVAL;
+ end_user_pfn = memparse(p, &p);
end_user_pfn >>= PAGE_SHIFT;
+ return 0;
}
+early_param("mem", parse_memopt);
+
+static int userdef __initdata;
-void __init parse_memmapopt(char *p, char **from)
+static int __init parse_memmap_opt(char *p)
{
+ char *oldp;
unsigned long long start_at, mem_size;
- mem_size = memparse(p, from);
- p = *from;
+ if (!strcmp(p, "exactmap")) {
+#ifdef CONFIG_CRASH_DUMP
+ /* If we are doing a crash dump, we
+ * still need to know the real mem
+ * size before original memory map is
+ * reset.
+ */
+ saved_max_pfn = e820_end_of_ram();
+#endif
+ end_pfn_map = 0;
+ e820.nr_map = 0;
+ userdef = 1;
+ return 0;
+ }
+
+ oldp = p;
+ mem_size = memparse(p, &p);
+ if (p == oldp)
+ return -EINVAL;
if (*p == '@') {
- start_at = memparse(p+1, from);
+ start_at = memparse(p+1, &p);
add_memory_region(start_at, mem_size, E820_RAM);
} else if (*p == '#') {
- start_at = memparse(p+1, from);
+ start_at = memparse(p+1, &p);
add_memory_region(start_at, mem_size, E820_ACPI);
} else if (*p == '$') {
- start_at = memparse(p+1, from);
+ start_at = memparse(p+1, &p);
add_memory_region(start_at, mem_size, E820_RESERVED);
} else {
end_user_pfn = (mem_size >> PAGE_SHIFT);
}
- p = *from;
+ return *p == '\0' ? 0 : -EINVAL;
+}
+early_param("memmap", parse_memmap_opt);
+
+void finish_e820_parsing(void)
+{
+ if (userdef) {
+ printk(KERN_INFO "user-defined physical RAM map:\n");
+ e820_print_map("user");
+ }
}
unsigned long pci_mem_start = 0xaeedbabe;
Index: linux/arch/x86_64/kernel/io_apic.c
===================================================================
--- linux.orig/arch/x86_64/kernel/io_apic.c
+++ linux/arch/x86_64/kernel/io_apic.c
@@ -48,7 +48,7 @@ int sis_apic_bug; /* not actually suppor
static int no_timer_check;
-int disable_timer_pin_1 __initdata;
+static int disable_timer_pin_1 __initdata;
int timer_over_8254 __initdata = 0;
@@ -253,18 +253,17 @@ int ioapic_force;
static int __init disable_ioapic_setup(char *str)
{
skip_ioapic_setup = 1;
- return 1;
+ return 0;
}
+early_param("noapic", disable_ioapic_setup);
-static int __init enable_ioapic_setup(char *str)
+/* Actually the next is obsolete, but keep it for paranoid reasons -AK */
+static int __init disable_timer_pin_setup(char *arg)
{
- ioapic_force = 1;
- skip_ioapic_setup = 0;
+ disable_timer_pin_1 = 1;
return 1;
}
-
-__setup("noapic", disable_ioapic_setup);
-__setup("apic", enable_ioapic_setup);
+__setup("disable_timer_pin_1", disable_timer_pin_setup);
static int __init setup_disable_8254_timer(char *s)
{
Index: linux/arch/x86_64/kernel/pci-dma.c
===================================================================
--- linux.orig/arch/x86_64/kernel/pci-dma.c
+++ linux/arch/x86_64/kernel/pci-dma.c
@@ -274,6 +274,9 @@ __init int iommu_setup(char *p)
{
iommu_merge = 1;
+ if (!p)
+ return -EINVAL;
+
while (*p) {
if (!strncmp(p,"off",3))
no_iommu = 1;
@@ -320,9 +323,9 @@ __init int iommu_setup(char *p)
if (*p == ',')
++p;
}
- return 1;
+ return 0;
}
-__setup("iommu=", iommu_setup);
+early_param("iommu", iommu_setup);
void __init pci_iommu_alloc(void)
{
Index: linux/arch/x86_64/kernel/setup64.c
===================================================================
--- linux.orig/arch/x86_64/kernel/setup64.c
+++ linux/arch/x86_64/kernel/setup64.c
@@ -46,8 +46,10 @@ Control non executable mappings for 64bi
on Enable(default)
off Disable
*/
-int __init nonx_setup(char *str)
+static int __init nonx_setup(char *str)
{
+ if (!str)
+ return -EINVAL;
if (!strncmp(str, "on", 2)) {
__supported_pte_mask |= _PAGE_NX;
do_not_nx = 0;
@@ -55,9 +57,9 @@ int __init nonx_setup(char *str)
do_not_nx = 1;
__supported_pte_mask &= ~_PAGE_NX;
}
- return 1;
+ return 0;
}
-__setup("noexec=", nonx_setup); /* parsed early actually */
+early_param("noexec", nonx_setup);
int force_personality32 = 0;
Index: linux/arch/x86_64/kernel/smpboot.c
===================================================================
--- linux.orig/arch/x86_64/kernel/smpboot.c
+++ linux/arch/x86_64/kernel/smpboot.c
@@ -1270,11 +1270,11 @@ void __cpu_die(unsigned int cpu)
printk(KERN_ERR "CPU %u didn't die...\n", cpu);
}
-__init int setup_additional_cpus(char *s)
+static __init int setup_additional_cpus(char *s)
{
- return get_option(&s, &additional_cpus);
+ return s && get_option(&s, &additional_cpus) ? 0 : -EINVAL;
}
-__setup("additional_cpus=", setup_additional_cpus);
+early_param("additional_cpus", setup_additional_cpus);
#else /* ... !CONFIG_HOTPLUG_CPU */
Index: linux/arch/x86_64/kernel/traps.c
===================================================================
--- linux.orig/arch/x86_64/kernel/traps.c
+++ linux/arch/x86_64/kernel/traps.c
@@ -1114,24 +1114,29 @@ void __init trap_init(void)
cpu_init();
}
-
-/* Actual parsing is done early in setup.c. */
-static int __init oops_dummy(char *s)
+static int __init oops_setup(char *s)
{
- panic_on_oops = 1;
- return 1;
+ if (!s)
+ return -EINVAL;
+ if (!strcmp(s, "panic"))
+ panic_on_oops = 1;
+ return 0;
}
-__setup("oops=", oops_dummy);
+early_param("oops", oops_setup);
static int __init kstack_setup(char *s)
{
+ if (!s)
+ return -EINVAL;
kstack_depth_to_print = simple_strtoul(s,NULL,0);
- return 1;
+ return 0;
}
-__setup("kstack=", kstack_setup);
+early_param("kstack", kstack_setup);
static int __init call_trace_setup(char *s)
{
+ if (!s)
+ return -EINVAL;
if (strcmp(s, "old") == 0)
call_trace = -1;
else if (strcmp(s, "both") == 0)
@@ -1140,6 +1145,9 @@ static int __init call_trace_setup(char
call_trace = 1;
else if (strcmp(s, "new") == 0)
call_trace = 2;
- return 1;
+ return 0;
}
-__setup("call_trace=", call_trace_setup);
+early_param("call_trace", call_trace_setup);
+
+
+
Index: linux/include/asm-x86_64/e820.h
===================================================================
--- linux.orig/include/asm-x86_64/e820.h
+++ linux/include/asm-x86_64/e820.h
@@ -55,8 +55,7 @@ extern void e820_setup_gap(void);
extern unsigned long e820_hole_size(unsigned long start_pfn,
unsigned long end_pfn);
-extern void __init parse_memopt(char *p, char **end);
-extern void __init parse_memmapopt(char *p, char **end);
+extern void finish_e820_parsing(void);
extern struct e820map e820;
Index: linux/include/asm-x86_64/pgtable.h
===================================================================
--- linux.orig/include/asm-x86_64/pgtable.h
+++ linux/include/asm-x86_64/pgtable.h
@@ -21,7 +21,6 @@ extern unsigned long __supported_pte_mas
#define swapper_pg_dir init_level4_pgt
-extern int nonx_setup(char *str);
extern void paging_init(void);
extern void clear_kernel_mapping(unsigned long addr, unsigned long size);
Index: linux/include/asm-x86_64/apic.h
===================================================================
--- linux.orig/include/asm-x86_64/apic.h
+++ linux/include/asm-x86_64/apic.h
@@ -17,6 +17,7 @@
extern int apic_verbosity;
extern int apic_runs_main_timer;
+extern int ioapic_force;
/*
* Define the default level of output to be very little
@@ -93,9 +94,6 @@ extern void setup_APIC_extened_lvt(unsig
#define K8_APIC_EXT_INT_MSG_EXT 0x7
#define K8_APIC_EXT_LVT_ENTRY_THRESHOLD 0
-extern int disable_timer_pin_1;
-
-
void smp_send_timer_broadcast_ipi(void);
void switch_APIC_timer_to_ipi(void *cpumask);
void switch_ipi_to_APIC_timer(void *cpumask);
Index: linux/arch/x86_64/kernel/machine_kexec.c
===================================================================
--- linux.orig/arch/x86_64/kernel/machine_kexec.c
+++ linux/arch/x86_64/kernel/machine_kexec.c
@@ -226,3 +226,31 @@ NORET_TYPE void machine_kexec(struct kim
rnk = (relocate_new_kernel_t) control_code_buffer;
(*rnk)(page_list, control_code_buffer, image->start, start_pgtable);
}
+
+/* crashkernel=size@addr specifies the location to reserve for
+ * a crash kernel. By reserving this memory we guarantee
+ * that linux never set's it up as a DMA target.
+ * Useful for holding code to do something appropriate
+ * after a kernel panic.
+ */
+static int __init setup_crashkernel(char *arg)
+{
+ unsigned long size, base;
+ char *p;
+ if (!arg)
+ return -EINVAL;
+ size = memparse(arg, &p);
+ if (arg == p)
+ return -EINVAL;
+ if (*p == '@') {
+ base = memparse(p+1, &p);
+ /* FIXME: Do I want a sanity check to validate the
+ * memory range? Yes you do, but it's too early for
+ * e820 -AK */
+ crashk_res.start = base;
+ crashk_res.end = base + size - 1;
+ }
+ return 0;
+}
+early_param("crashkernel", setup_crashkernel);
+
^ permalink raw reply [flat|nested] 199+ messages in thread* [PATCH for review] [92/145] x86_64: Remove need for early lockdep init
[not found] <20060810 935.775038000@suse.de>
` (90 preceding siblings ...)
2006-08-10 19:36 ` [PATCH for review] [91/145] x86_64: Convert x86-64 to early param Andi Kleen
@ 2006-08-10 19:36 ` Andi Kleen
2006-08-10 19:36 ` [PATCH for review] [93/145] i386/x86-64: Move acpi_disabled variables into acpi/boot.c Andi Kleen
` (53 subsequent siblings)
145 siblings, 0 replies; 199+ messages in thread
From: Andi Kleen @ 2006-08-10 19:36 UTC (permalink / raw)
r
I think it was only needed for the printks and we can do them later.
I put in a single early_printk so that we know the kernel is alive
(early_printk doesn't need any locks)
This makes some things easier for initialization of unwind for
lockdep, which is needed by later patches.
Cc: mingo@elte.hu
Signed-off-by: Andi Kleen <ak@suse.de>
---
arch/x86_64/kernel/head64.c | 8 +-------
arch/x86_64/kernel/setup.c | 2 ++
2 files changed, 3 insertions(+), 7 deletions(-)
Index: linux/arch/x86_64/kernel/head64.c
===================================================================
--- linux.orig/arch/x86_64/kernel/head64.c
+++ linux/arch/x86_64/kernel/head64.c
@@ -45,15 +45,12 @@ static void __init copy_bootdata(char *r
new_data = *(int *) (x86_boot_params + NEW_CL_POINTER);
if (!new_data) {
if (OLD_CL_MAGIC != * (u16 *) OLD_CL_MAGIC_ADDR) {
- printk("so old bootloader that it does not support commandline?!\n");
return;
}
new_data = OLD_CL_BASE_ADDR + * (u16 *) OLD_CL_OFFSET;
- printk("old bootloader convention, maybe loadlin?\n");
}
command_line = (char *) ((u64)(new_data));
memcpy(saved_command_line, command_line, COMMAND_LINE_SIZE);
- printk("Bootdata ok (command line is %s)\n", saved_command_line);
}
void __init x86_64_start_kernel(char * real_mode_data)
@@ -65,10 +62,7 @@ void __init x86_64_start_kernel(char * r
asm volatile("lidt %0" :: "m" (idt_descr));
clear_bss();
- /*
- * This must be called really, really early:
- */
- lockdep_init();
+ early_printk("Kernel alive\n");
/*
* switch to init_level4_pgt from boot_level4_pgt
Index: linux/arch/x86_64/kernel/setup.c
===================================================================
--- linux.orig/arch/x86_64/kernel/setup.c
+++ linux/arch/x86_64/kernel/setup.c
@@ -355,6 +355,8 @@ void __init setup_arch(char **cmdline_p)
{
unsigned long kernel_end;
+ printk(KERN_INFO "Command line: %s\n", saved_command_line);
+
ROOT_DEV = old_decode_dev(ORIG_ROOT_DEV);
screen_info = SCREEN_INFO;
edid_info = EDID_INFO;
^ permalink raw reply [flat|nested] 199+ messages in thread* [PATCH for review] [93/145] i386/x86-64: Move acpi_disabled variables into acpi/boot.c
[not found] <20060810 935.775038000@suse.de>
` (91 preceding siblings ...)
2006-08-10 19:36 ` [PATCH for review] [92/145] x86_64: Remove need for early lockdep init Andi Kleen
@ 2006-08-10 19:36 ` Andi Kleen
2006-08-10 19:36 ` [PATCH for review] [94/145] x86_64: Clean up acpi_numa variable Andi Kleen
` (52 subsequent siblings)
145 siblings, 0 replies; 199+ messages in thread
From: Andi Kleen @ 2006-08-10 19:36 UTC (permalink / raw)
r
Removes code duplication between i386/x86-64.
Not needed anymore in setup.c since early_param cleanup
Cc: len.brown@intel.com
Signed-off-by: Andi Kleen <ak@suse.de>
---
arch/i386/kernel/acpi/boot.c | 7 +++++++
arch/i386/kernel/setup.c | 7 -------
arch/x86_64/kernel/setup.c | 3 ---
3 files changed, 7 insertions(+), 10 deletions(-)
Index: linux/arch/i386/kernel/acpi/boot.c
===================================================================
--- linux.orig/arch/i386/kernel/acpi/boot.c
+++ linux/arch/i386/kernel/acpi/boot.c
@@ -38,6 +38,13 @@
int __initdata acpi_force = 0;
+#ifdef CONFIG_ACPI
+int acpi_disabled = 0;
+#else
+int acpi_disabled = 1;
+#endif
+EXPORT_SYMBOL(acpi_disabled);
+
#ifdef CONFIG_X86_64
extern void __init clustered_apic_check(void);
Index: linux/arch/i386/kernel/setup.c
===================================================================
--- linux.orig/arch/i386/kernel/setup.c
+++ linux/arch/i386/kernel/setup.c
@@ -89,13 +89,6 @@ EXPORT_SYMBOL(boot_cpu_data);
unsigned long mmu_cr4_features;
-#ifdef CONFIG_ACPI
- int acpi_disabled = 0;
-#else
- int acpi_disabled = 1;
-#endif
-EXPORT_SYMBOL(acpi_disabled);
-
/* for MCA, but anyone else can use it if they want */
unsigned int machine_id;
#ifdef CONFIG_MCA
Index: linux/arch/x86_64/kernel/setup.c
===================================================================
--- linux.orig/arch/x86_64/kernel/setup.c
+++ linux/arch/x86_64/kernel/setup.c
@@ -74,9 +74,6 @@ EXPORT_SYMBOL(boot_cpu_data);
unsigned long mmu_cr4_features;
-int acpi_disabled;
-EXPORT_SYMBOL(acpi_disabled);
-
int acpi_numa __initdata;
/* Boot loader ID as an integer, for the benefit of proc_dointvec */
^ permalink raw reply [flat|nested] 199+ messages in thread* [PATCH for review] [94/145] x86_64: Clean up acpi_numa variable
[not found] <20060810 935.775038000@suse.de>
` (92 preceding siblings ...)
2006-08-10 19:36 ` [PATCH for review] [93/145] i386/x86-64: Move acpi_disabled variables into acpi/boot.c Andi Kleen
@ 2006-08-10 19:36 ` Andi Kleen
2006-08-10 19:36 ` [PATCH for review] [95/145] x86_64: Move e820 map into e820.c Andi Kleen
` (51 subsequent siblings)
145 siblings, 0 replies; 199+ messages in thread
From: Andi Kleen @ 2006-08-10 19:36 UTC (permalink / raw)
r
Move it into srat.c No need to clutter up setup.c for it
And remove use in setup.c completely - it only guarded a printk
which can be done unconditionally.
Signed-off-by: Andi Kleen <ak@suse.de>
---
arch/x86_64/kernel/setup.c | 5 +----
arch/x86_64/mm/srat.c | 2 ++
2 files changed, 3 insertions(+), 4 deletions(-)
Index: linux/arch/x86_64/kernel/setup.c
===================================================================
--- linux.orig/arch/x86_64/kernel/setup.c
+++ linux/arch/x86_64/kernel/setup.c
@@ -74,8 +74,6 @@ EXPORT_SYMBOL(boot_cpu_data);
unsigned long mmu_cr4_features;
-int acpi_numa __initdata;
-
/* Boot loader ID as an integer, for the benefit of proc_dointvec */
int bootloader_type;
@@ -814,8 +812,7 @@ static void srat_detect_node(void)
node = first_node(node_online_map);
numa_set_node(cpu, node);
- if (acpi_numa > 0)
- printk(KERN_INFO "CPU %d/%x -> Node %d\n", cpu, apicid, node);
+ printk(KERN_INFO "CPU %d/%x -> Node %d\n", cpu, apicid, node);
#endif
}
Index: linux/arch/x86_64/mm/srat.c
===================================================================
--- linux.orig/arch/x86_64/mm/srat.c
+++ linux/arch/x86_64/mm/srat.c
@@ -21,6 +21,8 @@
#include <asm/numa.h>
#include <asm/e820.h>
+int acpi_numa __initdata;
+
#if (defined(CONFIG_ACPI_HOTPLUG_MEMORY) || \
defined(CONFIG_ACPI_HOTPLUG_MEMORY_MODULE)) \
&& !defined(CONFIG_MEMORY_HOTPLUG)
^ permalink raw reply [flat|nested] 199+ messages in thread* [PATCH for review] [95/145] x86_64: Move e820 map into e820.c
[not found] <20060810 935.775038000@suse.de>
` (93 preceding siblings ...)
2006-08-10 19:36 ` [PATCH for review] [94/145] x86_64: Clean up acpi_numa variable Andi Kleen
@ 2006-08-10 19:36 ` Andi Kleen
2006-08-10 19:36 ` [PATCH for review] [96/145] x86_64: Add sparse annotation to vsyscall.c Andi Kleen
` (50 subsequent siblings)
145 siblings, 0 replies; 199+ messages in thread
From: Andi Kleen @ 2006-08-10 19:36 UTC (permalink / raw)
r
Minor cleanup. Keep setup.c free from unrelated clutter.
Signed-off-by: Andi Kleen <ak@suse.de>
---
arch/x86_64/kernel/e820.c | 2 ++
arch/x86_64/kernel/setup.c | 1 -
2 files changed, 2 insertions(+), 1 deletion(-)
Index: linux/arch/x86_64/kernel/e820.c
===================================================================
--- linux.orig/arch/x86_64/kernel/e820.c
+++ linux/arch/x86_64/kernel/e820.c
@@ -24,6 +24,8 @@
#include <asm/bootsetup.h>
#include <asm/sections.h>
+struct e820map e820 __initdata;
+
/*
* PFN of last memory page.
*/
Index: linux/arch/x86_64/kernel/setup.c
===================================================================
--- linux.orig/arch/x86_64/kernel/setup.c
+++ linux/arch/x86_64/kernel/setup.c
@@ -97,7 +97,6 @@ struct sys_desc_table_struct {
struct edid_info edid_info;
EXPORT_SYMBOL_GPL(edid_info);
-struct e820map e820;
extern int root_mountflags;
^ permalink raw reply [flat|nested] 199+ messages in thread* [PATCH for review] [96/145] x86_64: Add sparse annotation to vsyscall.c
[not found] <20060810 935.775038000@suse.de>
` (94 preceding siblings ...)
2006-08-10 19:36 ` [PATCH for review] [95/145] x86_64: Move e820 map into e820.c Andi Kleen
@ 2006-08-10 19:36 ` Andi Kleen
2006-08-10 19:36 ` [PATCH for review] [97/145] x86_64: Add sparse annotations to quiet sparse in arch/x86_64/mm/fault.c Andi Kleen
` (49 subsequent siblings)
145 siblings, 0 replies; 199+ messages in thread
From: Andi Kleen @ 2006-08-10 19:36 UTC (permalink / raw)
r
Fixes
linux/arch/x86_64/kernel/vsyscall.c:276:7: warning: constant 0x0f40000000000 is so big it is long
linux/arch/x86_64/kernel/vsyscall.c:80:14: warning: incorrect type in argument 1 (different address spaces)
linux/arch/x86_64/kernel/vsyscall.c:80:14: expected void const volatile [noderef] *addr<asn:2>
linux/arch/x86_64/kernel/vsyscall.c:80:14: got void *<noident>
linux/arch/x86_64/kernel/vsyscall.c:200:7: warning: incorrect type in assignment (different address spaces)
linux/arch/x86_64/kernel/vsyscall.c:200:7: expected unsigned short [usertype] *map1
linux/arch/x86_64/kernel/vsyscall.c:200:7: got void [noderef] *<asn:2>
linux/arch/x86_64/kernel/vsyscall.c:203:7: warning: incorrect type in assignment (different address spaces)
linux/arch/x86_64/kernel/vsyscall.c:203:7: expected unsigned short [usertype] *map2
linux/arch/x86_64/kernel/vsyscall.c:203:7: got void [noderef] *<asn:2>
linux/arch/x86_64/kernel/vsyscall.c:215:10: warning: incorrect type in argument 1 (different address spaces)
linux/arch/x86_64/kernel/vsyscall.c:215:10: expected void volatile [noderef] *addr<asn:2>
linux/arch/x86_64/kernel/vsyscall.c:215:10: got unsigned short [usertype] *map2
linux/arch/x86_64/kernel/vsyscall.c:217:10: warning: incorrect type in argument 1 (different address spaces)
linux/arch/x86_64/kernel/vsyscall.c:217:10: expected void volatile [noderef] *addr<asn:2>
linux/arch/x86_64/kernel/vsyscall.c:217:10: got unsigned short [usertype] *map1
Signed-off-by: Andi Kleen <ak@suse.de>
---
arch/x86_64/kernel/vsyscall.c | 14 ++++++++------
1 files changed, 8 insertions(+), 6 deletions(-)
Index: linux/arch/x86_64/kernel/vsyscall.c
===================================================================
--- linux.orig/arch/x86_64/kernel/vsyscall.c
+++ linux/arch/x86_64/kernel/vsyscall.c
@@ -77,7 +77,8 @@ static __always_inline void do_vgettimeo
__vxtime.tsc_quot) >> 32;
/* See comment in x86_64 do_gettimeofday. */
} else {
- usec += ((readl((void *)fix_to_virt(VSYSCALL_HPET) + 0xf0) -
+ usec += ((readl((void __iomem *)
+ fix_to_virt(VSYSCALL_HPET) + 0xf0) -
__vxtime.last) * __vxtime.quot) >> 32;
}
} while (read_seqretry(&__xtime_lock, sequence));
@@ -191,7 +192,8 @@ static int vsyscall_sysctl_change(ctl_ta
void __user *buffer, size_t *lenp, loff_t *ppos)
{
extern u16 vsysc1, vsysc2;
- u16 *map1, *map2;
+ u16 __iomem *map1;
+ u16 __iomem *map2;
int ret = proc_dointvec(ctl, write, filp, buffer, lenp, ppos);
if (!write)
return ret;
@@ -206,11 +208,11 @@ static int vsyscall_sysctl_change(ctl_ta
goto out;
}
if (!sysctl_vsyscall) {
- *map1 = SYSCALL;
- *map2 = SYSCALL;
+ writew(SYSCALL, map1);
+ writew(SYSCALL, map2);
} else {
- *map1 = NOP2;
- *map2 = NOP2;
+ writew(NOP2, map1);
+ writew(NOP2, map2);
}
iounmap(map2);
out:
^ permalink raw reply [flat|nested] 199+ messages in thread* [PATCH for review] [97/145] x86_64: Add sparse annotations to quiet sparse in arch/x86_64/mm/fault.c
[not found] <20060810 935.775038000@suse.de>
` (95 preceding siblings ...)
2006-08-10 19:36 ` [PATCH for review] [96/145] x86_64: Add sparse annotation to vsyscall.c Andi Kleen
@ 2006-08-10 19:36 ` Andi Kleen
2006-08-10 19:36 ` [PATCH for review] [98/145] x86_64: Fix most sparse warnings in sys_ia32.c Andi Kleen
` (48 subsequent siblings)
145 siblings, 0 replies; 199+ messages in thread
From: Andi Kleen @ 2006-08-10 19:36 UTC (permalink / raw)
r
Fixes
linux/arch/x86_64/mm/fault.c:125:7: warning: incorrect type in argument 1 (different address spaces)
linux/arch/x86_64/mm/fault.c:125:7: expected void [noderef] *<noident><asn:1>
linux/arch/x86_64/mm/fault.c:125:7: got unsigned char *[assigned] instr
linux/arch/x86_64/mm/fault.c:163:8: warning: incorrect type in argument 1 (different address spaces)
linux/arch/x86_64/mm/fault.c:163:8: expected void [noderef] *<noident><asn:1>
linux/arch/x86_64/mm/fault.c:163:8: got unsigned char *[assigned] instr
linux/arch/x86_64/mm/fault.c:179:9: warning: incorrect type in argument 1 (different address spaces)
linux/arch/x86_64/mm/fault.c:179:9: expected void [noderef] *<noident><asn:1>
linux/arch/x86_64/mm/fault.c:179:9: got unsigned long *<noident>
Signed-off-by: Andi Kleen <ak@suse.de>
---
arch/x86_64/mm/fault.c | 10 +++++-----
1 files changed, 5 insertions(+), 5 deletions(-)
Index: linux/arch/x86_64/mm/fault.c
===================================================================
--- linux.orig/arch/x86_64/mm/fault.c
+++ linux/arch/x86_64/mm/fault.c
@@ -102,7 +102,7 @@ void bust_spinlocks(int yes)
static noinline int is_prefetch(struct pt_regs *regs, unsigned long addr,
unsigned long error_code)
{
- unsigned char *instr;
+ unsigned char __user *instr;
int scan_more = 1;
int prefetch = 0;
unsigned char *max_instr;
@@ -111,7 +111,7 @@ static noinline int is_prefetch(struct p
if (error_code & PF_INSTR)
return 0;
- instr = (unsigned char *)convert_rip_to_linear(current, regs);
+ instr = (unsigned char __user *)convert_rip_to_linear(current, regs);
max_instr = instr + 15;
if (user_mode(regs) && instr >= (unsigned char *)TASK_SIZE)
@@ -122,7 +122,7 @@ static noinline int is_prefetch(struct p
unsigned char instr_hi;
unsigned char instr_lo;
- if (__get_user(opcode, instr))
+ if (__get_user(opcode, (char __user *)instr))
break;
instr_hi = opcode & 0xf0;
@@ -160,7 +160,7 @@ static noinline int is_prefetch(struct p
case 0x00:
/* Prefetch instruction is 0x0F0D or 0x0F18 */
scan_more = 0;
- if (__get_user(opcode, instr))
+ if (__get_user(opcode, (char __user *)instr))
break;
prefetch = (instr_lo == 0xF) &&
(opcode == 0x0D || opcode == 0x18);
@@ -176,7 +176,7 @@ static noinline int is_prefetch(struct p
static int bad_address(void *p)
{
unsigned long dummy;
- return __get_user(dummy, (unsigned long *)p);
+ return __get_user(dummy, (unsigned long __user *)p);
}
void dump_pagetable(unsigned long address)
^ permalink raw reply [flat|nested] 199+ messages in thread* [PATCH for review] [98/145] x86_64: Fix most sparse warnings in sys_ia32.c
[not found] <20060810 935.775038000@suse.de>
` (96 preceding siblings ...)
2006-08-10 19:36 ` [PATCH for review] [97/145] x86_64: Add sparse annotations to quiet sparse in arch/x86_64/mm/fault.c Andi Kleen
@ 2006-08-10 19:36 ` Andi Kleen
2006-08-10 19:36 ` [PATCH for review] [99/145] x86_64: Fix sparse warnings in compat aout code Andi Kleen
` (47 subsequent siblings)
145 siblings, 0 replies; 199+ messages in thread
From: Andi Kleen @ 2006-08-10 19:36 UTC (permalink / raw)
r
Mostly by adding casts.
I didn't touch the "invalid access past ..." which are caused
by the sigset conversion.
Signed-off-by: Andi Kleen <ak@suse.de>
---
arch/x86_64/ia32/sys_ia32.c | 24 +++++++++++++-----------
1 files changed, 13 insertions(+), 11 deletions(-)
Index: linux/arch/x86_64/ia32/sys_ia32.c
===================================================================
--- linux.orig/arch/x86_64/ia32/sys_ia32.c
+++ linux/arch/x86_64/ia32/sys_ia32.c
@@ -60,6 +60,7 @@
#include <linux/highuid.h>
#include <linux/vmalloc.h>
#include <linux/fsnotify.h>
+#include <linux/sysctl.h>
#include <asm/mman.h>
#include <asm/types.h>
#include <asm/uaccess.h>
@@ -389,7 +390,9 @@ sys32_rt_sigprocmask(int how, compat_sig
}
}
set_fs (KERNEL_DS);
- ret = sys_rt_sigprocmask(how, set ? &s : NULL, oset ? &s : NULL,
+ ret = sys_rt_sigprocmask(how,
+ set ? (sigset_t __user *)&s : NULL,
+ oset ? (sigset_t __user *)&s : NULL,
sigsetsize);
set_fs (old_fs);
if (ret) return ret;
@@ -541,7 +544,7 @@ sys32_sysinfo(struct sysinfo32 __user *i
int bitcount = 0;
set_fs (KERNEL_DS);
- ret = sys_sysinfo(&s);
+ ret = sys_sysinfo((struct sysinfo __user *)&s);
set_fs (old_fs);
/* Check to see if any memory value is too large for 32-bit and scale
@@ -589,7 +592,7 @@ sys32_sched_rr_get_interval(compat_pid_t
mm_segment_t old_fs = get_fs ();
set_fs (KERNEL_DS);
- ret = sys_sched_rr_get_interval(pid, &t);
+ ret = sys_sched_rr_get_interval(pid, (struct timespec __user *)&t);
set_fs (old_fs);
if (put_compat_timespec(&t, interval))
return -EFAULT;
@@ -605,7 +608,7 @@ sys32_rt_sigpending(compat_sigset_t __us
mm_segment_t old_fs = get_fs();
set_fs (KERNEL_DS);
- ret = sys_rt_sigpending(&s, sigsetsize);
+ ret = sys_rt_sigpending((sigset_t __user *)&s, sigsetsize);
set_fs (old_fs);
if (!ret) {
switch (_NSIG_WORDS) {
@@ -630,7 +633,7 @@ sys32_rt_sigqueueinfo(int pid, int sig,
if (copy_siginfo_from_user32(&info, uinfo))
return -EFAULT;
set_fs (KERNEL_DS);
- ret = sys_rt_sigqueueinfo(pid, sig, &info);
+ ret = sys_rt_sigqueueinfo(pid, sig, (siginfo_t __user *)&info);
set_fs (old_fs);
return ret;
}
@@ -666,9 +669,6 @@ sys32_sysctl(struct sysctl_ia32 __user *
size_t oldlen;
int __user *namep;
long ret;
- extern int do_sysctl(int *name, int nlen, void *oldval, size_t *oldlenp,
- void *newval, size_t newlen);
-
if (copy_from_user(&a32, args32, sizeof (a32)))
return -EFAULT;
@@ -692,7 +692,8 @@ sys32_sysctl(struct sysctl_ia32 __user *
set_fs(KERNEL_DS);
lock_kernel();
- ret = do_sysctl(namep, a32.nlen, oldvalp, &oldlen, newvalp, (size_t) a32.newlen);
+ ret = do_sysctl(namep, a32.nlen, oldvalp, (size_t __user *)&oldlen,
+ newvalp, (size_t) a32.newlen);
unlock_kernel();
set_fs(old_fs);
@@ -743,7 +744,8 @@ sys32_sendfile(int out_fd, int in_fd, co
return -EFAULT;
set_fs(KERNEL_DS);
- ret = sys_sendfile(out_fd, in_fd, offset ? &of : NULL, count);
+ ret = sys_sendfile(out_fd, in_fd, offset ? (off_t __user *)&of : NULL,
+ count);
set_fs(old_fs);
if (offset && put_user(of, offset))
@@ -831,7 +833,7 @@ long sys32_ustat(unsigned dev, struct us
seg = get_fs();
set_fs(KERNEL_DS);
- ret = sys_ustat(dev,&u);
+ ret = sys_ustat(dev, (struct ustat __user *)&u);
set_fs(seg);
if (ret >= 0) {
if (!access_ok(VERIFY_WRITE,u32p,sizeof(struct ustat32)) ||
^ permalink raw reply [flat|nested] 199+ messages in thread* [PATCH for review] [99/145] x86_64: Fix sparse warnings in compat aout code
[not found] <20060810 935.775038000@suse.de>
` (97 preceding siblings ...)
2006-08-10 19:36 ` [PATCH for review] [98/145] x86_64: Fix most sparse warnings in sys_ia32.c Andi Kleen
@ 2006-08-10 19:36 ` Andi Kleen
2006-08-10 19:36 ` [PATCH for review] [100/145] x86_64: Fix broken indentation in iommu_setup Andi Kleen
` (46 subsequent siblings)
145 siblings, 0 replies; 199+ messages in thread
From: Andi Kleen @ 2006-08-10 19:36 UTC (permalink / raw)
r
Signed-off-by: Andi Kleen <ak@suse.de>
---
arch/x86_64/ia32/ia32_aout.c | 8 +++++---
1 files changed, 5 insertions(+), 3 deletions(-)
Index: linux/arch/x86_64/ia32/ia32_aout.c
===================================================================
--- linux.orig/arch/x86_64/ia32/ia32_aout.c
+++ linux/arch/x86_64/ia32/ia32_aout.c
@@ -333,7 +333,8 @@ static int load_aout_binary(struct linux
return error;
}
- error = bprm->file->f_op->read(bprm->file, (char *)text_addr,
+ error = bprm->file->f_op->read(bprm->file,
+ (char __user *)text_addr,
ex.a_text+ex.a_data, &pos);
if ((signed long)error < 0) {
send_sig(SIGKILL, current, 0);
@@ -366,7 +367,8 @@ static int load_aout_binary(struct linux
down_write(¤t->mm->mmap_sem);
do_brk(N_TXTADDR(ex), ex.a_text+ex.a_data);
up_write(¤t->mm->mmap_sem);
- bprm->file->f_op->read(bprm->file,(char *)N_TXTADDR(ex),
+ bprm->file->f_op->read(bprm->file,
+ (char __user *)N_TXTADDR(ex),
ex.a_text+ex.a_data, &pos);
flush_icache_range((unsigned long) N_TXTADDR(ex),
(unsigned long) N_TXTADDR(ex) +
@@ -477,7 +479,7 @@ static int load_aout_library(struct file
do_brk(start_addr, ex.a_text + ex.a_data + ex.a_bss);
up_write(¤t->mm->mmap_sem);
- file->f_op->read(file, (char *)start_addr,
+ file->f_op->read(file, (char __user *)start_addr,
ex.a_text + ex.a_data, &pos);
flush_icache_range((unsigned long) start_addr,
(unsigned long) start_addr + ex.a_text + ex.a_data);
^ permalink raw reply [flat|nested] 199+ messages in thread* [PATCH for review] [100/145] x86_64: Fix broken indentation in iommu_setup
[not found] <20060810 935.775038000@suse.de>
` (98 preceding siblings ...)
2006-08-10 19:36 ` [PATCH for review] [99/145] x86_64: Fix sparse warnings in compat aout code Andi Kleen
@ 2006-08-10 19:36 ` Andi Kleen
2006-08-10 19:36 ` [PATCH for review] [101/145] x86_64: Replace local_save_flags+local_irq_disable with Andi Kleen
` (45 subsequent siblings)
145 siblings, 0 replies; 199+ messages in thread
From: Andi Kleen @ 2006-08-10 19:36 UTC (permalink / raw)
r
No functional changes; only white space.
Signed-off-by: Andi Kleen <ak@suse.de>
---
arch/x86_64/kernel/pci-dma.c | 86 ++++++++++++++++++++-----------------------
1 files changed, 40 insertions(+), 46 deletions(-)
Index: linux/arch/x86_64/kernel/pci-dma.c
===================================================================
--- linux.orig/arch/x86_64/kernel/pci-dma.c
+++ linux/arch/x86_64/kernel/pci-dma.c
@@ -272,58 +272,52 @@ EXPORT_SYMBOL(dma_set_mask);
*/
__init int iommu_setup(char *p)
{
- iommu_merge = 1;
-
+ iommu_merge = 1;
if (!p)
return -EINVAL;
-
- while (*p) {
- if (!strncmp(p,"off",3))
- no_iommu = 1;
- /* gart_parse_options has more force support */
- if (!strncmp(p,"force",5))
- force_iommu = 1;
- if (!strncmp(p,"noforce",7)) {
- iommu_merge = 0;
- force_iommu = 0;
- }
-
- if (!strncmp(p, "biomerge",8)) {
- iommu_bio_merge = 4096;
- iommu_merge = 1;
- force_iommu = 1;
- }
- if (!strncmp(p, "panic",5))
- panic_on_overflow = 1;
- if (!strncmp(p, "nopanic",7))
- panic_on_overflow = 0;
- if (!strncmp(p, "merge",5)) {
- iommu_merge = 1;
- force_iommu = 1;
- }
- if (!strncmp(p, "nomerge",7))
- iommu_merge = 0;
- if (!strncmp(p, "forcesac",8))
- iommu_sac_force = 1;
- if (!strncmp(p, "allowdac", 8))
- allow_dac = 1;
- if (!strncmp(p, "nodac", 5))
- allow_dac = -1;
-
+ while (*p) {
+ if (!strncmp(p,"off",3))
+ no_iommu = 1;
+ /* gart_parse_options has more force support */
+ if (!strncmp(p,"force",5))
+ force_iommu = 1;
+ if (!strncmp(p,"noforce",7)) {
+ iommu_merge = 0;
+ force_iommu = 0;
+ }
+ if (!strncmp(p, "biomerge",8)) {
+ iommu_bio_merge = 4096;
+ iommu_merge = 1;
+ force_iommu = 1;
+ }
+ if (!strncmp(p, "panic",5))
+ panic_on_overflow = 1;
+ if (!strncmp(p, "nopanic",7))
+ panic_on_overflow = 0;
+ if (!strncmp(p, "merge",5)) {
+ iommu_merge = 1;
+ force_iommu = 1;
+ }
+ if (!strncmp(p, "nomerge",7))
+ iommu_merge = 0;
+ if (!strncmp(p, "forcesac",8))
+ iommu_sac_force = 1;
+ if (!strncmp(p, "allowdac", 8))
+ allow_dac = 1;
+ if (!strncmp(p, "nodac", 5))
+ allow_dac = -1;
#ifdef CONFIG_SWIOTLB
- if (!strncmp(p, "soft",4))
- swiotlb = 1;
+ if (!strncmp(p, "soft",4))
+ swiotlb = 1;
#endif
-
#ifdef CONFIG_IOMMU
- gart_parse_options(p);
+ gart_parse_options(p);
#endif
-
- p += strcspn(p, ",");
- if (*p == ',')
- ++p;
- }
- return 0;
+ p += strcspn(p, ",");
+ if (*p == ',')
+ ++p;
+ }
+ return 0;
}
early_param("iommu", iommu_setup);
^ permalink raw reply [flat|nested] 199+ messages in thread* [PATCH for review] [101/145] x86_64: Replace local_save_flags+local_irq_disable with
[not found] <20060810 935.775038000@suse.de>
` (99 preceding siblings ...)
2006-08-10 19:36 ` [PATCH for review] [100/145] x86_64: Fix broken indentation in iommu_setup Andi Kleen
@ 2006-08-10 19:36 ` Andi Kleen
2006-08-10 19:37 ` [PATCH for review] [102/145] x86_64: Remove unneeded externs in acpi/boot.c Andi Kleen
` (44 subsequent siblings)
145 siblings, 0 replies; 199+ messages in thread
From: Andi Kleen @ 2006-08-10 19:36 UTC (permalink / raw)
r
From: Fernando Luis =?ISO-8859-1?Q?V=E1zquez?= Cao <fernando@oss.ntt.co.jp>
The combination of "local_save_flags" and "local_irq_disable" seems to be
equivalent to "local_irq_save" (see code snips below). Consequently, replace
occurrences of local_save_flags+local_irq_disable with local_irq_save.
* local_irq_save
#define raw_local_irq_save(flags) \
do { (flags) = __raw_local_irq_save(); } while (0)
static inline unsigned long __raw_local_irq_save(void)
{
unsigned long flags = __raw_local_save_flags();
raw_local_irq_disable();
return flags;
}
* local_save_flags
#define raw_local_save_flags(flags) \
do { (flags) = __raw_local_save_flags(); } while (0)
Signed-off-by: Fernando Vazquez <fernando@intellilink.co.jp>
Signed-off-by: Andi Kleen <ak@suse.de>
---
---
arch/x86_64/kernel/apic.c | 3 +--
arch/x86_64/kernel/genapic_flat.c | 3 +--
2 files changed, 2 insertions(+), 4 deletions(-)
Index: linux/arch/x86_64/kernel/apic.c
===================================================================
--- linux.orig/arch/x86_64/kernel/apic.c
+++ linux/arch/x86_64/kernel/apic.c
@@ -468,8 +468,7 @@ static int lapic_suspend(struct sys_devi
apic_pm_state.apic_tmict = apic_read(APIC_TMICT);
apic_pm_state.apic_tdcr = apic_read(APIC_TDCR);
apic_pm_state.apic_thmr = apic_read(APIC_LVTTHMR);
- local_save_flags(flags);
- local_irq_disable();
+ local_irq_save(flags);
disable_local_APIC();
local_irq_restore(flags);
return 0;
Index: linux/arch/x86_64/kernel/genapic_flat.c
===================================================================
--- linux.orig/arch/x86_64/kernel/genapic_flat.c
+++ linux/arch/x86_64/kernel/genapic_flat.c
@@ -49,8 +49,7 @@ static void flat_send_IPI_mask(cpumask_t
unsigned long cfg;
unsigned long flags;
- local_save_flags(flags);
- local_irq_disable();
+ local_irq_save(flags);
/*
* Wait for idle.
^ permalink raw reply [flat|nested] 199+ messages in thread* [PATCH for review] [102/145] x86_64: Remove unneeded externs in acpi/boot.c
[not found] <20060810 935.775038000@suse.de>
` (100 preceding siblings ...)
2006-08-10 19:36 ` [PATCH for review] [101/145] x86_64: Replace local_save_flags+local_irq_disable with Andi Kleen
@ 2006-08-10 19:37 ` Andi Kleen
2006-08-16 15:41 ` Len Brown
2006-08-10 19:37 ` [PATCH for review] [103/145] i386/x86-64: rename is_at_popf(), add iret to tests and fix Andi Kleen
` (43 subsequent siblings)
145 siblings, 1 reply; 199+ messages in thread
From: Andi Kleen @ 2006-08-10 19:37 UTC (permalink / raw)
r
And move one into proto.h
Cc: len.brown@intel.com
Signed-off-by: Andi Kleen <ak@suse.de>
---
arch/i386/kernel/acpi/boot.c | 3 ---
include/asm-x86_64/proto.h | 2 ++
2 files changed, 2 insertions(+), 3 deletions(-)
Index: linux/arch/i386/kernel/acpi/boot.c
===================================================================
--- linux.orig/arch/i386/kernel/acpi/boot.c
+++ linux/arch/i386/kernel/acpi/boot.c
@@ -47,9 +47,6 @@ EXPORT_SYMBOL(acpi_disabled);
#ifdef CONFIG_X86_64
-extern void __init clustered_apic_check(void);
-
-extern int gsi_irq_sharing(int gsi);
#include <asm/proto.h>
static inline int acpi_madt_oem_check(char *oem_id, char *oem_table_id) { return 0; }
Index: linux/include/asm-x86_64/proto.h
===================================================================
--- linux.orig/include/asm-x86_64/proto.h
+++ linux/include/asm-x86_64/proto.h
@@ -124,6 +124,8 @@ extern int fix_aperture;
extern int reboot_force;
extern int notsc_setup(char *);
+extern int gsi_irq_sharing(int gsi);
+
extern void smp_local_timer_interrupt(struct pt_regs * regs);
long do_arch_prctl(struct task_struct *task, int code, unsigned long addr);
^ permalink raw reply [flat|nested] 199+ messages in thread* Re: [PATCH for review] [102/145] x86_64: Remove unneeded externs in acpi/boot.c
2006-08-10 19:37 ` [PATCH for review] [102/145] x86_64: Remove unneeded externs in acpi/boot.c Andi Kleen
@ 2006-08-16 15:41 ` Len Brown
0 siblings, 0 replies; 199+ messages in thread
From: Len Brown @ 2006-08-16 15:41 UTC (permalink / raw)
To: Andi Kleen; +Cc: linux-kernel
On Thursday 10 August 2006 15:37, Andi Kleen wrote:
> And move one into proto.h
>
> Cc: len.brown@intel.com
Acked-by: Len Brown <len.brown@intel.com>
> Signed-off-by: Andi Kleen <ak@suse.de>
>
> ---
> arch/i386/kernel/acpi/boot.c | 3 ---
> include/asm-x86_64/proto.h | 2 ++
> 2 files changed, 2 insertions(+), 3 deletions(-)
>
> Index: linux/arch/i386/kernel/acpi/boot.c
> ===================================================================
> --- linux.orig/arch/i386/kernel/acpi/boot.c
> +++ linux/arch/i386/kernel/acpi/boot.c
> @@ -47,9 +47,6 @@ EXPORT_SYMBOL(acpi_disabled);
>
> #ifdef CONFIG_X86_64
>
> -extern void __init clustered_apic_check(void);
> -
> -extern int gsi_irq_sharing(int gsi);
> #include <asm/proto.h>
>
> static inline int acpi_madt_oem_check(char *oem_id, char *oem_table_id) { return 0; }
> Index: linux/include/asm-x86_64/proto.h
> ===================================================================
> --- linux.orig/include/asm-x86_64/proto.h
> +++ linux/include/asm-x86_64/proto.h
> @@ -124,6 +124,8 @@ extern int fix_aperture;
> extern int reboot_force;
> extern int notsc_setup(char *);
>
> +extern int gsi_irq_sharing(int gsi);
> +
> extern void smp_local_timer_interrupt(struct pt_regs * regs);
>
> long do_arch_prctl(struct task_struct *task, int code, unsigned long addr);
^ permalink raw reply [flat|nested] 199+ messages in thread
* [PATCH for review] [103/145] i386/x86-64: rename is_at_popf(), add iret to tests and fix
[not found] <20060810 935.775038000@suse.de>
` (101 preceding siblings ...)
2006-08-10 19:37 ` [PATCH for review] [102/145] x86_64: Remove unneeded externs in acpi/boot.c Andi Kleen
@ 2006-08-10 19:37 ` Andi Kleen
2006-08-10 19:37 ` [PATCH for review] [104/145] x86_64: print whether CONFIG_IOMMU_DEBUG is enabled Andi Kleen
` (42 subsequent siblings)
145 siblings, 0 replies; 199+ messages in thread
From: Andi Kleen @ 2006-08-10 19:37 UTC (permalink / raw)
r
From: Chuck Ebbert <76306.1226@compuserve.com>
is_at_popf() needs to test for the iret instruction as well as
popf. So add that test and rename it to is_setting_trap_flag().
Also change max insn length from 16 to 15 to match reality.
LAHF / SAHF can't affect TF, so the comment in x86_64 is removed.
Signed-off-by: Chuck Ebbert <76306.1226@compuserve.com>
Signed-off-by: Andi Kleen <ak@suse.de>
---
Tested on x86_64.
---
arch/i386/kernel/ptrace.c | 10 +++++-----
arch/x86_64/kernel/ptrace.c | 12 +++++-------
2 files changed, 10 insertions(+), 12 deletions(-)
Index: linux/arch/i386/kernel/ptrace.c
===================================================================
--- linux.orig/arch/i386/kernel/ptrace.c
+++ linux/arch/i386/kernel/ptrace.c
@@ -185,17 +185,17 @@ static unsigned long convert_eip_to_line
return addr;
}
-static inline int is_at_popf(struct task_struct *child, struct pt_regs *regs)
+static inline int is_setting_trap_flag(struct task_struct *child, struct pt_regs *regs)
{
int i, copied;
- unsigned char opcode[16];
+ unsigned char opcode[15];
unsigned long addr = convert_eip_to_linear(child, regs);
copied = access_process_vm(child, addr, opcode, sizeof(opcode), 0);
for (i = 0; i < copied; i++) {
switch (opcode[i]) {
- /* popf */
- case 0x9d:
+ /* popf and iret */
+ case 0x9d: case 0xcf:
return 1;
/* opcode and address size prefixes */
case 0x66: case 0x67:
@@ -247,7 +247,7 @@ static void set_singlestep(struct task_s
* don't mark it as being "us" that set it, so that we
* won't clear it by hand later.
*/
- if (is_at_popf(child, regs))
+ if (is_setting_trap_flag(child, regs))
return;
child->ptrace |= PT_DTRACE;
Index: linux/arch/x86_64/kernel/ptrace.c
===================================================================
--- linux.orig/arch/x86_64/kernel/ptrace.c
+++ linux/arch/x86_64/kernel/ptrace.c
@@ -116,17 +116,17 @@ unsigned long convert_rip_to_linear(stru
return addr;
}
-static int is_at_popf(struct task_struct *child, struct pt_regs *regs)
+static int is_setting_trap_flag(struct task_struct *child, struct pt_regs *regs)
{
int i, copied;
- unsigned char opcode[16];
+ unsigned char opcode[15];
unsigned long addr = convert_rip_to_linear(child, regs);
copied = access_process_vm(child, addr, opcode, sizeof(opcode), 0);
for (i = 0; i < copied; i++) {
switch (opcode[i]) {
- /* popf */
- case 0x9d:
+ /* popf and iret */
+ case 0x9d: case 0xcf:
return 1;
/* CHECKME: 64 65 */
@@ -189,10 +189,8 @@ static void set_singlestep(struct task_s
* ..but if TF is changed by the instruction we will trace,
* don't mark it as being "us" that set it, so that we
* won't clear it by hand later.
- *
- * AK: this is not enough, LAHF and IRET can change TF in user space too.
*/
- if (is_at_popf(child, regs))
+ if (is_setting_trap_flag(child, regs))
return;
child->ptrace |= PT_DTRACE;
^ permalink raw reply [flat|nested] 199+ messages in thread* [PATCH for review] [104/145] x86_64: print whether CONFIG_IOMMU_DEBUG is enabled
[not found] <20060810 935.775038000@suse.de>
` (102 preceding siblings ...)
2006-08-10 19:37 ` [PATCH for review] [103/145] i386/x86-64: rename is_at_popf(), add iret to tests and fix Andi Kleen
@ 2006-08-10 19:37 ` Andi Kleen
2006-08-10 19:37 ` [PATCH for review] [105/145] x86_64: only verify the allocation bitmap if CONFIG_IOMMU_DEBUG is on Andi Kleen
` (41 subsequent siblings)
145 siblings, 0 replies; 199+ messages in thread
From: Andi Kleen @ 2006-08-10 19:37 UTC (permalink / raw)
r
From: Muli Ben-Yehuda <muli@il.ibm.com>
Signed-off-by: Muli Ben-Yehuda <muli@il.ibm.com>
Signed-off-by: Jon Mason <jdmason@us.ibm.com>
Signed-off-by: Andi Kleen <ak@suse.de>
---
arch/x86_64/kernel/pci-calgary.c | 14 ++++++++++----
1 files changed, 10 insertions(+), 4 deletions(-)
Index: linux/arch/x86_64/kernel/pci-calgary.c
===================================================================
--- linux.orig/arch/x86_64/kernel/pci-calgary.c
+++ linux/arch/x86_64/kernel/pci-calgary.c
@@ -127,15 +127,19 @@ static void tce_cache_blast(struct iommu
/* enable this to stress test the chip's TCE cache */
#ifdef CONFIG_IOMMU_DEBUG
+int debugging __read_mostly = 1;
+
static inline void tce_cache_blast_stress(struct iommu_table *tbl)
{
tce_cache_blast(tbl);
}
-#else
+#else /* debugging is disabled */
+int debugging __read_mostly = 0;
+
static inline void tce_cache_blast_stress(struct iommu_table *tbl)
{
}
-#endif /* BLAST_TCE_CACHE_ON_UNMAP */
+#endif /* CONFIG_IOMMU_DEBUG */
static inline unsigned int num_dma_pages(unsigned long dma, unsigned int dmalen)
{
@@ -944,8 +948,10 @@ void __init detect_calgary(void)
if (calgary_found) {
iommu_detected = 1;
calgary_detected = 1;
- printk(KERN_INFO "PCI-DMA: Calgary IOMMU detected. "
- "TCE table spec is %d.\n", specified_table_size);
+ printk(KERN_INFO "PCI-DMA: Calgary IOMMU detected.\n");
+ printk(KERN_INFO "PCI-DMA: Calgary TCE table spec is %d, "
+ "CONFIG_IOMMU_DEBUG is %s.\n", specified_table_size,
+ debugging ? "enabled" : "disabled");
}
return;
^ permalink raw reply [flat|nested] 199+ messages in thread* [PATCH for review] [105/145] x86_64: only verify the allocation bitmap if CONFIG_IOMMU_DEBUG is on
[not found] <20060810 935.775038000@suse.de>
` (103 preceding siblings ...)
2006-08-10 19:37 ` [PATCH for review] [104/145] x86_64: print whether CONFIG_IOMMU_DEBUG is enabled Andi Kleen
@ 2006-08-10 19:37 ` Andi Kleen
2006-08-10 19:37 ` [PATCH for review] [106/145] x86_64: remove tce_cache_blast_stress() Andi Kleen
` (40 subsequent siblings)
145 siblings, 0 replies; 199+ messages in thread
From: Andi Kleen @ 2006-08-10 19:37 UTC (permalink / raw)
r
From: Muli Ben-Yehuda <muli@il.ibm.com>
Introduce new function verify_bit_range(). Define two versions, one
for CONFIG_IOMMU_DEBUG enabled and one for disabled. Previously we
were checking that the bitmap was consistent every time we allocated
or freed an entry in the TCE table, which is good for debugging but
incurs an unnecessary penalty on non debug builds.
Signed-off-by: Muli Ben-Yehuda <muli@il.ibm.com>
Signed-off-by: Jon Mason <jdmason@us.ibm.com>
Signed-off-by: Andi Kleen <ak@suse.de>
---
arch/x86_64/kernel/pci-calgary.c | 44 +++++++++++++++++++++++++++++++--------
1 files changed, 35 insertions(+), 9 deletions(-)
Index: linux/arch/x86_64/kernel/pci-calgary.c
===================================================================
--- linux.orig/arch/x86_64/kernel/pci-calgary.c
+++ linux/arch/x86_64/kernel/pci-calgary.c
@@ -133,12 +133,35 @@ static inline void tce_cache_blast_stres
{
tce_cache_blast(tbl);
}
+
+static inline unsigned long verify_bit_range(unsigned long* bitmap,
+ int expected, unsigned long start, unsigned long end)
+{
+ unsigned long idx = start;
+
+ BUG_ON(start >= end);
+
+ while (idx < end) {
+ if (!!test_bit(idx, bitmap) != expected)
+ return idx;
+ ++idx;
+ }
+
+ /* all bits have the expected value */
+ return ~0UL;
+}
#else /* debugging is disabled */
int debugging __read_mostly = 0;
static inline void tce_cache_blast_stress(struct iommu_table *tbl)
{
}
+
+static inline unsigned long verify_bit_range(unsigned long* bitmap,
+ int expected, unsigned long start, unsigned long end)
+{
+ return ~0UL;
+}
#endif /* CONFIG_IOMMU_DEBUG */
static inline unsigned int num_dma_pages(unsigned long dma, unsigned int dmalen)
@@ -162,6 +185,7 @@ static void iommu_range_reserve(struct i
{
unsigned long index;
unsigned long end;
+ unsigned long badbit;
index = start_addr >> PAGE_SHIFT;
@@ -173,14 +197,15 @@ static void iommu_range_reserve(struct i
if (end > tbl->it_size) /* don't go off the table */
end = tbl->it_size;
- while (index < end) {
- if (test_bit(index, tbl->it_map))
+ badbit = verify_bit_range(tbl->it_map, 0, index, end);
+ if (badbit != ~0UL) {
+ if (printk_ratelimit())
printk(KERN_ERR "Calgary: entry already allocated at "
"0x%lx tbl %p dma 0x%lx npages %u\n",
- index, tbl, start_addr, npages);
- ++index;
+ badbit, tbl, start_addr, npages);
}
- set_bit_string(tbl->it_map, start_addr >> PAGE_SHIFT, npages);
+
+ set_bit_string(tbl->it_map, index, npages);
}
static unsigned long iommu_range_alloc(struct iommu_table *tbl,
@@ -247,7 +272,7 @@ static void __iommu_free(struct iommu_ta
unsigned int npages)
{
unsigned long entry;
- unsigned long i;
+ unsigned long badbit;
entry = dma_addr >> PAGE_SHIFT;
@@ -255,11 +280,12 @@ static void __iommu_free(struct iommu_ta
tce_free(tbl, entry, npages);
- for (i = 0; i < npages; ++i) {
- if (!test_bit(entry + i, tbl->it_map))
+ badbit = verify_bit_range(tbl->it_map, 1, entry, entry + npages);
+ if (badbit != ~0UL) {
+ if (printk_ratelimit())
printk(KERN_ERR "Calgary: bit is off at 0x%lx "
"tbl %p dma 0x%Lx entry 0x%lx npages %u\n",
- entry + i, tbl, dma_addr, entry, npages);
+ badbit, tbl, dma_addr, entry, npages);
}
__clear_bit_string(tbl->it_map, entry, npages);
^ permalink raw reply [flat|nested] 199+ messages in thread* [PATCH for review] [106/145] x86_64: remove tce_cache_blast_stress()
[not found] <20060810 935.775038000@suse.de>
` (104 preceding siblings ...)
2006-08-10 19:37 ` [PATCH for review] [105/145] x86_64: only verify the allocation bitmap if CONFIG_IOMMU_DEBUG is on Andi Kleen
@ 2006-08-10 19:37 ` Andi Kleen
2006-08-10 19:37 ` [PATCH for review] [107/145] x86_64: Calgary IOMMU: eradicate sole remaining 80 chars per line offender Andi Kleen
` (39 subsequent siblings)
145 siblings, 0 replies; 199+ messages in thread
From: Andi Kleen @ 2006-08-10 19:37 UTC (permalink / raw)
r
From: Muli Ben-Yehuda <muli@il.ibm.com>
tce_cache_blast_stress was useful during bringup to stress the IOMMU's
cache flushing. Now that we quiesce DMAs on every cache flush, using
_stress() brings the machine down to its knees once you put it under
load. Remove this debug / bringup code that isn't useful anymore
completely.
Signed-off-by: Muli Ben-Yehuda <muli@il.ibm.com>
Signed-off-by: Jon Mason <jdmason@us.ibm.com>
Signed-off-by: Andi Kleen <ak@suse.de>
---
arch/x86_64/kernel/pci-calgary.c | 11 -----------
1 files changed, 11 deletions(-)
Index: linux/arch/x86_64/kernel/pci-calgary.c
===================================================================
--- linux.orig/arch/x86_64/kernel/pci-calgary.c
+++ linux/arch/x86_64/kernel/pci-calgary.c
@@ -129,11 +129,6 @@ static void tce_cache_blast(struct iommu
#ifdef CONFIG_IOMMU_DEBUG
int debugging __read_mostly = 1;
-static inline void tce_cache_blast_stress(struct iommu_table *tbl)
-{
- tce_cache_blast(tbl);
-}
-
static inline unsigned long verify_bit_range(unsigned long* bitmap,
int expected, unsigned long start, unsigned long end)
{
@@ -153,10 +148,6 @@ static inline unsigned long verify_bit_r
#else /* debugging is disabled */
int debugging __read_mostly = 0;
-static inline void tce_cache_blast_stress(struct iommu_table *tbl)
-{
-}
-
static inline unsigned long verify_bit_range(unsigned long* bitmap,
int expected, unsigned long start, unsigned long end)
{
@@ -289,8 +280,6 @@ static void __iommu_free(struct iommu_ta
}
__clear_bit_string(tbl->it_map, entry, npages);
-
- tce_cache_blast_stress(tbl);
}
static void iommu_free(struct iommu_table *tbl, dma_addr_t dma_addr,
^ permalink raw reply [flat|nested] 199+ messages in thread* [PATCH for review] [107/145] x86_64: Calgary IOMMU: eradicate sole remaining 80 chars per line offender
[not found] <20060810 935.775038000@suse.de>
` (105 preceding siblings ...)
2006-08-10 19:37 ` [PATCH for review] [106/145] x86_64: remove tce_cache_blast_stress() Andi Kleen
@ 2006-08-10 19:37 ` Andi Kleen
2006-08-10 19:37 ` [PATCH for review] [108/145] x86_64: Some preparationary cleanup for stack trace Andi Kleen
` (38 subsequent siblings)
145 siblings, 0 replies; 199+ messages in thread
From: Andi Kleen @ 2006-08-10 19:37 UTC (permalink / raw)
r
From: Muli Ben-Yehuda <muli@il.ibm.com>
Signed-off-by: Muli Ben-Yehuda <muli@il.ibm.com>
Signed-off-by: Jon Mason <jdmason@us.ibm.com>
Signed-off-by: Andi Kleen <ak@suse.de>
---
arch/x86_64/kernel/pci-calgary.c | 3 ++-
1 files changed, 2 insertions(+), 1 deletion(-)
Index: linux/arch/x86_64/kernel/pci-calgary.c
===================================================================
--- linux.orig/arch/x86_64/kernel/pci-calgary.c
+++ linux/arch/x86_64/kernel/pci-calgary.c
@@ -86,7 +86,8 @@
#define MAX_NUM_OF_PHBS 8 /* how many PHBs in total? */
#define MAX_NUM_CHASSIS 8 /* max number of chassis */
-#define MAX_PHB_BUS_NUM (MAX_NUM_OF_PHBS * MAX_NUM_CHASSIS * 2) /* max dev->bus->number */
+/* MAX_PHB_BUS_NUM is the maximal possible dev->bus->number */
+#define MAX_PHB_BUS_NUM (MAX_NUM_OF_PHBS * MAX_NUM_CHASSIS * 2)
#define PHBS_PER_CALGARY 4
/* register offsets in Calgary's internal register space */
^ permalink raw reply [flat|nested] 199+ messages in thread* [PATCH for review] [108/145] x86_64: Some preparationary cleanup for stack trace
[not found] <20060810 935.775038000@suse.de>
` (106 preceding siblings ...)
2006-08-10 19:37 ` [PATCH for review] [107/145] x86_64: Calgary IOMMU: eradicate sole remaining 80 chars per line offender Andi Kleen
@ 2006-08-10 19:37 ` Andi Kleen
2006-08-13 6:26 ` Andrew Morton
2006-08-10 19:37 ` [PATCH for review] [109/145] x86_64: Convert modlist_lock to be a raw spinlock Andi Kleen
` (37 subsequent siblings)
145 siblings, 1 reply; 199+ messages in thread
From: Andi Kleen @ 2006-08-10 19:37 UTC (permalink / raw)
r
- Remove unused all_contexts parameter
No caller used it
- Move skip argument into the structure (needed for
followon patches)
Cc: mingo@elte.hu
Signed-off-by: Andi Kleen <ak@suse.de>
---
arch/i386/kernel/stacktrace.c | 11 +++--------
arch/x86_64/kernel/stacktrace.c | 14 +++++---------
include/linux/stacktrace.h | 7 ++++---
kernel/lockdep.c | 5 ++++-
4 files changed, 16 insertions(+), 21 deletions(-)
Index: linux/arch/i386/kernel/stacktrace.c
===================================================================
--- linux.orig/arch/i386/kernel/stacktrace.c
+++ linux/arch/i386/kernel/stacktrace.c
@@ -61,12 +61,8 @@ save_context_stack(struct stack_trace *t
/*
* Save stack-backtrace addresses into a stack_trace buffer.
- * If all_contexts is set, all contexts (hardirq, softirq and process)
- * are saved. If not set then only the current context is saved.
*/
-void save_stack_trace(struct stack_trace *trace,
- struct task_struct *task, int all_contexts,
- unsigned int skip)
+void save_stack_trace(struct stack_trace *trace, struct task_struct *task)
{
unsigned long ebp;
unsigned long *stack = &ebp;
@@ -85,10 +81,9 @@ void save_stack_trace(struct stack_trace
struct thread_info *context = (struct thread_info *)
((unsigned long)stack & (~(THREAD_SIZE - 1)));
- ebp = save_context_stack(trace, skip, context, stack, ebp);
+ ebp = save_context_stack(trace, trace->skip, context, stack, ebp);
stack = (unsigned long *)context->previous_esp;
- if (!all_contexts || !stack ||
- trace->nr_entries >= trace->max_entries)
+ if (!stack || trace->nr_entries >= trace->max_entries)
break;
trace->entries[trace->nr_entries++] = ULONG_MAX;
if (trace->nr_entries >= trace->max_entries)
Index: linux/arch/x86_64/kernel/stacktrace.c
===================================================================
--- linux.orig/arch/x86_64/kernel/stacktrace.c
+++ linux/arch/x86_64/kernel/stacktrace.c
@@ -109,9 +109,10 @@ out_restore:
* Save stack-backtrace addresses into a stack_trace buffer:
*/
static inline unsigned long
-save_context_stack(struct stack_trace *trace, unsigned int skip,
+save_context_stack(struct stack_trace *trace,
unsigned long stack, unsigned long stack_end)
{
+ int skip = trace->skip;
unsigned long addr;
#ifdef CONFIG_FRAME_POINTER
@@ -159,12 +160,8 @@ save_context_stack(struct stack_trace *t
/*
* Save stack-backtrace addresses into a stack_trace buffer.
- * If all_contexts is set, all contexts (hardirq, softirq and process)
- * are saved. If not set then only the current context is saved.
*/
-void save_stack_trace(struct stack_trace *trace,
- struct task_struct *task, int all_contexts,
- unsigned int skip)
+void save_stack_trace(struct stack_trace *trace, struct task_struct *task)
{
unsigned long stack = (unsigned long)&stack;
int i, nr_stacks = 0, stacks_done[MAX_STACKS];
@@ -207,9 +204,8 @@ void save_stack_trace(struct stack_trace
return;
stacks_done[nr_stacks] = stack_end;
- stack = save_context_stack(trace, skip, stack, stack_end);
- if (!all_contexts || !stack ||
- trace->nr_entries >= trace->max_entries)
+ stack = save_context_stack(trace, stack, stack_end);
+ if (!stack || trace->nr_entries >= trace->max_entries)
return;
trace->entries[trace->nr_entries++] = ULONG_MAX;
if (trace->nr_entries >= trace->max_entries)
Index: linux/include/linux/stacktrace.h
===================================================================
--- linux.orig/include/linux/stacktrace.h
+++ linux/include/linux/stacktrace.h
@@ -5,15 +5,16 @@
struct stack_trace {
unsigned int nr_entries, max_entries;
unsigned long *entries;
+ int skip; /* input argument: How many entries to skip */
+ int all_contexts; /* input argument: if true do than one stack */
};
extern void save_stack_trace(struct stack_trace *trace,
- struct task_struct *task, int all_contexts,
- unsigned int skip);
+ struct task_struct *task);
extern void print_stack_trace(struct stack_trace *trace, int spaces);
#else
-# define save_stack_trace(trace, task, all, skip) do { } while (0)
+# define save_stack_trace(trace, task) do { } while (0)
# define print_stack_trace(trace) do { } while (0)
#endif
Index: linux/kernel/lockdep.c
===================================================================
--- linux.orig/kernel/lockdep.c
+++ linux/kernel/lockdep.c
@@ -224,7 +224,10 @@ static int save_trace(struct stack_trace
trace->max_entries = MAX_STACK_TRACE_ENTRIES - nr_stack_trace_entries;
trace->entries = stack_trace + nr_stack_trace_entries;
- save_stack_trace(trace, NULL, 0, 3);
+ trace->skip = 3;
+ trace->all_contexts = 0;
+
+ save_stack_trace(trace, NULL);
trace->max_entries = trace->nr_entries;
^ permalink raw reply [flat|nested] 199+ messages in thread* [PATCH for review] [109/145] x86_64: Convert modlist_lock to be a raw spinlock
[not found] <20060810 935.775038000@suse.de>
` (107 preceding siblings ...)
2006-08-10 19:37 ` [PATCH for review] [108/145] x86_64: Some preparationary cleanup for stack trace Andi Kleen
@ 2006-08-10 19:37 ` Andi Kleen
2006-08-13 5:48 ` Andrew Morton
2006-08-10 19:37 ` [PATCH for review] [110/145] x86_64: Don't access the APIC in safe_smp_processor_id when it is not mapped yet Andi Kleen
` (36 subsequent siblings)
145 siblings, 1 reply; 199+ messages in thread
From: Andi Kleen @ 2006-08-10 19:37 UTC (permalink / raw)
r
This is a preparationary patch for converting stacktrace over to the
new dwarf2 unwinder. lockdep uses stacktrace and the new unwinder
takes the modlist_lock so using a normal spinlock would cause a deadlock.
Use a raw lock instead.
Cc: mingo@elte.hu
Signed-off-by: Andi Kleen <ak@suse.de>
---
kernel/module.c | 42 ++++++++++++++++++++++++++----------------
1 files changed, 26 insertions(+), 16 deletions(-)
Index: linux/kernel/module.c
===================================================================
--- linux.orig/kernel/module.c
+++ linux/kernel/module.c
@@ -59,7 +59,7 @@
#define INIT_OFFSET_MASK (1UL << (BITS_PER_LONG-1))
/* Protects module list */
-static DEFINE_SPINLOCK(modlist_lock);
+static raw_spinlock_t modlist_lock = __RAW_SPIN_LOCK_UNLOCKED;
/* List of modules, protected by module_mutex AND modlist_lock */
static DEFINE_MUTEX(module_mutex);
@@ -751,11 +751,13 @@ void __symbol_put(const char *symbol)
unsigned long flags;
const unsigned long *crc;
- spin_lock_irqsave(&modlist_lock, flags);
+ raw_local_save_flags(flags);
+ __raw_spin_lock(&modlist_lock);
if (!__find_symbol(symbol, &owner, &crc, 1))
BUG();
module_put(owner);
- spin_unlock_irqrestore(&modlist_lock, flags);
+ __raw_spin_unlock(&modlist_lock);
+ raw_local_irq_restore(flags);
}
EXPORT_SYMBOL(__symbol_put);
@@ -1134,11 +1136,13 @@ void *__symbol_get(const char *symbol)
unsigned long value, flags;
const unsigned long *crc;
- spin_lock_irqsave(&modlist_lock, flags);
+ raw_local_save_flags(flags);
+ __raw_spin_lock(&modlist_lock);
value = __find_symbol(symbol, &owner, &crc, 1);
if (value && !strong_try_module_get(owner))
value = 0;
- spin_unlock_irqrestore(&modlist_lock, flags);
+ __raw_spin_unlock(&modlist_lock);
+ raw_local_irq_restore(flags);
return (void *)value;
}
@@ -2141,7 +2145,8 @@ const struct exception_table_entry *sear
const struct exception_table_entry *e = NULL;
struct module *mod;
- spin_lock_irqsave(&modlist_lock, flags);
+ raw_local_save_flags(flags);
+ __raw_spin_lock(&modlist_lock);
list_for_each_entry(mod, &modules, list) {
if (mod->num_exentries == 0)
continue;
@@ -2152,7 +2157,8 @@ const struct exception_table_entry *sear
if (e)
break;
}
- spin_unlock_irqrestore(&modlist_lock, flags);
+ __raw_spin_unlock(&modlist_lock);
+ raw_local_irq_restore(flags);
/* Now, if we found one, we are running inside it now, hence
we cannot unload the module, hence no refcnt needed. */
@@ -2166,19 +2172,20 @@ int is_module_address(unsigned long addr
{
unsigned long flags;
struct module *mod;
+ int ret = 0;
- spin_lock_irqsave(&modlist_lock, flags);
-
+ raw_local_save_flags(flags);
+ __raw_spin_lock(&modlist_lock);
list_for_each_entry(mod, &modules, list) {
if (within(addr, mod->module_core, mod->core_size)) {
- spin_unlock_irqrestore(&modlist_lock, flags);
- return 1;
+ ret = 1;
+ break;
}
}
+ __raw_spin_unlock(&modlist_lock);
+ raw_local_irq_restore(flags);
- spin_unlock_irqrestore(&modlist_lock, flags);
-
- return 0;
+ return ret;
}
@@ -2199,9 +2206,12 @@ struct module *module_text_address(unsig
struct module *mod;
unsigned long flags;
- spin_lock_irqsave(&modlist_lock, flags);
+ /* This is called from lockdep */
+ raw_local_save_flags(flags);
+ __raw_spin_lock(&modlist_lock);
mod = __module_text_address(addr);
- spin_unlock_irqrestore(&modlist_lock, flags);
+ __raw_spin_unlock(&modlist_lock);
+ raw_local_irq_restore(flags);
return mod;
}
^ permalink raw reply [flat|nested] 199+ messages in thread* Re: [PATCH for review] [109/145] x86_64: Convert modlist_lock to be a raw spinlock
2006-08-10 19:37 ` [PATCH for review] [109/145] x86_64: Convert modlist_lock to be a raw spinlock Andi Kleen
@ 2006-08-13 5:48 ` Andrew Morton
2006-08-13 6:52 ` Andi Kleen
0 siblings, 1 reply; 199+ messages in thread
From: Andrew Morton @ 2006-08-13 5:48 UTC (permalink / raw)
To: Andi Kleen; +Cc: linux-kernel, Ingo Molnar
On Thu, 10 Aug 2006 21:37:07 +0200 (CEST)
Andi Kleen <ak@suse.de> wrote:
> This is a preparationary patch for converting stacktrace over to the
> new dwarf2 unwinder. lockdep uses stacktrace and the new unwinder
> takes the modlist_lock so using a normal spinlock would cause a deadlock.
> Use a raw lock instead.
>
It breaks the build on most architectures.
> ---
> kernel/module.c | 42 ++++++++++++++++++++++++++----------------
> 1 files changed, 26 insertions(+), 16 deletions(-)
>
> Index: linux/kernel/module.c
> ===================================================================
> --- linux.orig/kernel/module.c
> +++ linux/kernel/module.c
> @@ -59,7 +59,7 @@
> #define INIT_OFFSET_MASK (1UL << (BITS_PER_LONG-1))
>
> /* Protects module list */
> -static DEFINE_SPINLOCK(modlist_lock);
> +static raw_spinlock_t modlist_lock = __RAW_SPIN_LOCK_UNLOCKED;
>
> /* List of modules, protected by module_mutex AND modlist_lock */
> static DEFINE_MUTEX(module_mutex);
> @@ -751,11 +751,13 @@ void __symbol_put(const char *symbol)
> unsigned long flags;
> const unsigned long *crc;
>
> - spin_lock_irqsave(&modlist_lock, flags);
> + raw_local_save_flags(flags);
> + __raw_spin_lock(&modlist_lock);
> if (!__find_symbol(symbol, &owner, &crc, 1))
> BUG();
> module_put(owner);
> - spin_unlock_irqrestore(&modlist_lock, flags);
> + __raw_spin_unlock(&modlist_lock);
> + raw_local_irq_restore(flags);
That looks fairly hacky. Wouldn't it be better to implement
raw_spin_lock_irqsave()?
^ permalink raw reply [flat|nested] 199+ messages in thread
* Re: [PATCH for review] [109/145] x86_64: Convert modlist_lock to be a raw spinlock
2006-08-13 5:48 ` Andrew Morton
@ 2006-08-13 6:52 ` Andi Kleen
2006-08-13 7:02 ` Andrew Morton
0 siblings, 1 reply; 199+ messages in thread
From: Andi Kleen @ 2006-08-13 6:52 UTC (permalink / raw)
To: Andrew Morton; +Cc: linux-kernel, Ingo Molnar
On Sunday 13 August 2006 07:48, Andrew Morton wrote:
> On Thu, 10 Aug 2006 21:37:07 +0200 (CEST)
> Andi Kleen <ak@suse.de> wrote:
>
> > This is a preparationary patch for converting stacktrace over to the
> > new dwarf2 unwinder. lockdep uses stacktrace and the new unwinder
> > takes the modlist_lock so using a normal spinlock would cause a deadlock.
> > Use a raw lock instead.
> >
>
> It breaks the build on most architectures.
Hmm, I grepped and most architectures seem to have both __raw_spin_lock
and local_save_flags. I didn't actually compile them because crosstool
doesn't love me anymore since I use gcc 4.0.
What is the official portable interface to do a raw spinlock
if this one doesn't work?
> > - spin_lock_irqsave(&modlist_lock, flags);
> > + raw_local_save_flags(flags);
> > + __raw_spin_lock(&modlist_lock);
> > if (!__find_symbol(symbol, &owner, &crc, 1))
> > BUG();
> > module_put(owner);
> > - spin_unlock_irqrestore(&modlist_lock, flags);
> > + __raw_spin_unlock(&modlist_lock);
> > + raw_local_irq_restore(flags);
>
> That looks fairly hacky. Wouldn't it be better to implement
> raw_spin_lock_irqsave()?
Possible.
-Andi
^ permalink raw reply [flat|nested] 199+ messages in thread
* Re: [PATCH for review] [109/145] x86_64: Convert modlist_lock to be a raw spinlock
2006-08-13 6:52 ` Andi Kleen
@ 2006-08-13 7:02 ` Andrew Morton
2006-08-13 7:15 ` Andi Kleen
0 siblings, 1 reply; 199+ messages in thread
From: Andrew Morton @ 2006-08-13 7:02 UTC (permalink / raw)
To: Andi Kleen; +Cc: linux-kernel, Ingo Molnar
On Sun, 13 Aug 2006 08:52:46 +0200
Andi Kleen <ak@suse.de> wrote:
> On Sunday 13 August 2006 07:48, Andrew Morton wrote:
> > On Thu, 10 Aug 2006 21:37:07 +0200 (CEST)
> > Andi Kleen <ak@suse.de> wrote:
> >
> > > This is a preparationary patch for converting stacktrace over to the
> > > new dwarf2 unwinder. lockdep uses stacktrace and the new unwinder
> > > takes the modlist_lock so using a normal spinlock would cause a deadlock.
> > > Use a raw lock instead.
> > >
> >
> > It breaks the build on most architectures.
>
> Hmm, I grepped and most architectures seem to have both __raw_spin_lock
> and local_save_flags.
box:/usr/src/25> grep -l raw_local_save_flags include/asm-*/*.h
include/asm-avr32/irqflags.h
include/asm-i386/irqflags.h
include/asm-mips/irqflags.h
include/asm-powerpc/irqflags.h
include/asm-s390/irqflags.h
include/asm-x86_64/irqflags.h
> I didn't actually compile them because crosstool
> doesn't love me anymore since I use gcc 4.0.
crosstool is a bit of a bitch.
> What is the official portable interface to do a raw spinlock
> if this one doesn't work?
I don't see a way, really. Apart from going in and implementing it on the
various architectures.
Perhaps x86_64-mm-module-locks-raw-spinlock-hack-hack-hack.patch could be
hoisted up to include/linux/spinlock.h and then at least only
lockdep-enabled architectures need to implement these things.
^ permalink raw reply [flat|nested] 199+ messages in thread
* Re: [PATCH for review] [109/145] x86_64: Convert modlist_lock to be a raw spinlock
2006-08-13 7:02 ` Andrew Morton
@ 2006-08-13 7:15 ` Andi Kleen
0 siblings, 0 replies; 199+ messages in thread
From: Andi Kleen @ 2006-08-13 7:15 UTC (permalink / raw)
To: Andrew Morton; +Cc: linux-kernel, Ingo Molnar
>
> > What is the official portable interface to do a raw spinlock
> > if this one doesn't work?
>
> I don't see a way, really. Apart from going in and implementing it on the
> various architectures.
Hmpf. Maybe lockdep just needs a recursion check.
-Andi
^ permalink raw reply [flat|nested] 199+ messages in thread
* [PATCH for review] [110/145] x86_64: Don't access the APIC in safe_smp_processor_id when it is not mapped yet
[not found] <20060810 935.775038000@suse.de>
` (108 preceding siblings ...)
2006-08-10 19:37 ` [PATCH for review] [109/145] x86_64: Convert modlist_lock to be a raw spinlock Andi Kleen
@ 2006-08-10 19:37 ` Andi Kleen
2006-08-10 19:37 ` [PATCH for review] [111/145] x86_64: Move unwind_init earlier Andi Kleen
` (35 subsequent siblings)
145 siblings, 0 replies; 199+ messages in thread
From: Andi Kleen @ 2006-08-10 19:37 UTC (permalink / raw)
r
Lockdep can call the dwarf2 unwinder early, and the dwarf2 code
uses safe_smp_processor_id which tries to access the local APIC page.
But that doesn't work before the APIC code has set up its fixmap.
Check for this case and always return boot cpu then.
Cc: jbeulich@novell.com
Cc: mingo@elte.hu
Signed-off-by: Andi Kleen <ak@suse.de>
---
arch/x86_64/kernel/apic.c | 2 ++
arch/x86_64/kernel/smp.c | 2 +-
include/asm-x86_64/apic.h | 1 +
3 files changed, 4 insertions(+), 1 deletion(-)
Index: linux/arch/x86_64/kernel/apic.c
===================================================================
--- linux.orig/arch/x86_64/kernel/apic.c
+++ linux/arch/x86_64/kernel/apic.c
@@ -38,6 +38,7 @@
#include <asm/timex.h>
#include <asm/apic.h>
+int apic_mapped;
int apic_verbosity;
int apic_runs_main_timer;
int apic_calibrate_pmtmr __initdata;
@@ -600,6 +601,7 @@ void __init init_apic_mappings(void)
apic_phys = mp_lapic_addr;
set_fixmap_nocache(FIX_APIC_BASE, apic_phys);
+ apic_mapped = 1;
apic_printk(APIC_VERBOSE,"mapped APIC to %16lx (%16lx)\n", APIC_BASE, apic_phys);
/*
Index: linux/arch/x86_64/kernel/smp.c
===================================================================
--- linux.orig/arch/x86_64/kernel/smp.c
+++ linux/arch/x86_64/kernel/smp.c
@@ -526,7 +526,7 @@ int safe_smp_processor_id(void)
{
unsigned apicid, i;
- if (disable_apic)
+ if (disable_apic || !apic_mapped)
return 0;
apicid = hard_smp_processor_id();
Index: linux/include/asm-x86_64/apic.h
===================================================================
--- linux.orig/include/asm-x86_64/apic.h
+++ linux/include/asm-x86_64/apic.h
@@ -18,6 +18,7 @@
extern int apic_verbosity;
extern int apic_runs_main_timer;
extern int ioapic_force;
+extern int apic_mapped;
/*
* Define the default level of output to be very little
^ permalink raw reply [flat|nested] 199+ messages in thread* [PATCH for review] [111/145] x86_64: Move unwind_init earlier
[not found] <20060810 935.775038000@suse.de>
` (109 preceding siblings ...)
2006-08-10 19:37 ` [PATCH for review] [110/145] x86_64: Don't access the APIC in safe_smp_processor_id when it is not mapped yet Andi Kleen
@ 2006-08-10 19:37 ` Andi Kleen
2006-08-10 19:37 ` [PATCH for review] [112/145] x86_64: Merge stacktrace and show_trace Andi Kleen
` (34 subsequent siblings)
145 siblings, 0 replies; 199+ messages in thread
From: Andi Kleen @ 2006-08-10 19:37 UTC (permalink / raw)
r
Needed for use of the unwinder in lockdep, because lockdep runs really
early too.
Cc: jbeulich@novell.com
Signed-off-by: Andi Kleen <ak@suse.de>
---
init/main.c | 2 +-
1 files changed, 1 insertion(+), 1 deletion(-)
Index: linux/init/main.c
===================================================================
--- linux.orig/init/main.c
+++ linux/init/main.c
@@ -468,6 +468,7 @@ asmlinkage void __init start_kernel(void
* Need to run as early as possible, to initialize the
* lockdep hash:
*/
+ unwind_init();
lockdep_init();
local_irq_disable();
@@ -506,7 +507,6 @@ asmlinkage void __init start_kernel(void
__stop___param - __start___param,
&unknown_bootoption);
sort_main_extable();
- unwind_init();
trap_init();
rcu_init();
init_IRQ();
^ permalink raw reply [flat|nested] 199+ messages in thread* [PATCH for review] [112/145] x86_64: Merge stacktrace and show_trace
[not found] <20060810 935.775038000@suse.de>
` (110 preceding siblings ...)
2006-08-10 19:37 ` [PATCH for review] [111/145] x86_64: Move unwind_init earlier Andi Kleen
@ 2006-08-10 19:37 ` Andi Kleen
2006-08-10 19:37 ` [PATCH for review] [113/145] i386: Do stacktracer conversion too Andi Kleen
` (33 subsequent siblings)
145 siblings, 0 replies; 199+ messages in thread
From: Andi Kleen @ 2006-08-10 19:37 UTC (permalink / raw)
r
This unifies the standard backtracer and the new stacktrace
in memory backtracer. The standard one is converted to use callbacks
and then reimplement stacktrace using new callbacks.
The main advantage is that stacktrace can now use the new dwarf2 unwinder
and avoid false positives in many cases.
I kept it simple to make sure the standard backtracer stays reliable.
Cc: mingo@elte.hu
Signed-off-by: Andi Kleen <ak@suse.de>
---
arch/x86_64/kernel/stacktrace.c | 214 ++++------------------------------------
arch/x86_64/kernel/traps.c | 103 ++++++++++++++-----
include/asm-x86_64/stacktrace.h | 18 +++
3 files changed, 124 insertions(+), 211 deletions(-)
Index: linux/arch/x86_64/kernel/traps.c
===================================================================
--- linux.orig/arch/x86_64/kernel/traps.c
+++ linux/arch/x86_64/kernel/traps.c
@@ -45,6 +45,7 @@
#include <asm/pda.h>
#include <asm/proto.h>
#include <asm/nmi.h>
+#include <asm/stacktrace.h>
asmlinkage void divide_error(void);
asmlinkage void debug(void);
@@ -138,7 +139,7 @@ void printk_address(unsigned long addres
#endif
static unsigned long *in_exception_stack(unsigned cpu, unsigned long stack,
- unsigned *usedp, const char **idp)
+ unsigned *usedp, char **idp)
{
static char ids[][8] = {
[DEBUG_STACK - 1] = "#DB",
@@ -230,13 +231,19 @@ static unsigned long *in_exception_stack
return NULL;
}
-static int show_trace_unwind(struct unwind_frame_info *info, void *context)
+struct ops_and_data {
+ struct stacktrace_ops *ops;
+ void *data;
+};
+
+static int dump_trace_unwind(struct unwind_frame_info *info, void *context)
{
+ struct ops_and_data *oad = (struct ops_and_data *)context;
int n = 0;
while (unwind(info) == 0 && UNW_PC(info)) {
n++;
- printk_address(UNW_PC(info));
+ oad->ops->address(oad->data, UNW_PC(info));
if (arch_unw_user_mode(info))
break;
}
@@ -248,49 +255,59 @@ static int show_trace_unwind(struct unwi
* process stack
* interrupt stack
* severe exception (double fault, nmi, stack fault, debug, mce) hardware stack
+ * See Documentation/x86_64/kernelstacks for more details.
*/
-
-void show_trace(struct task_struct *tsk, struct pt_regs *regs, unsigned long * stack)
+void dump_trace(struct task_struct *tsk, struct pt_regs *regs,
+ unsigned long *stack,
+ struct stacktrace_ops *ops, void *data)
{
const unsigned cpu = safe_smp_processor_id();
unsigned long *irqstack_end = (unsigned long *)cpu_pda(cpu)->irqstackptr;
unsigned used = 0;
- printk("\nCall Trace:\n");
-
if (!tsk)
tsk = current;
if (call_trace >= 0) {
int unw_ret = 0;
struct unwind_frame_info info;
+ struct ops_and_data oad = { .ops = ops, .data = data };
if (regs) {
if (unwind_init_frame_info(&info, tsk, regs) == 0)
- unw_ret = show_trace_unwind(&info, NULL);
+ unw_ret = dump_trace_unwind(&info, &oad);
} else if (tsk == current)
- unw_ret = unwind_init_running(&info, show_trace_unwind, NULL);
+ unw_ret = unwind_init_running(&info, dump_trace_unwind, &oad);
else {
if (unwind_init_blocked(&info, tsk) == 0)
- unw_ret = show_trace_unwind(&info, NULL);
+ unw_ret = dump_trace_unwind(&info, &oad);
}
if (unw_ret > 0 && !arch_unw_user_mode(&info)) {
#ifdef CONFIG_STACK_UNWIND
unsigned long rip = info.regs.rip;
- print_symbol("DWARF2 unwinder stuck at %s\n", rip);
+ ops->warning_symbol(data,
+ "DWARF2 unwinder stuck at %s", rip);
if (call_trace == 1) {
- printk("Leftover inexact backtrace:\n");
+ ops->warning(data, "Leftover inexact backtrace:");
stack = (unsigned long *)info.regs.rsp;
} else if (call_trace > 1)
return;
else
- printk("Full inexact backtrace again:\n");
+ ops->warning(data,
+ "Full inexact backtrace again:");
#else
- printk("Inexact backtrace:\n");
+ ops->warning(data, "Inexact backtrace:");
#endif
}
}
+ if (!stack) {
+ unsigned long dummy;
+ stack = &dummy;
+ if (tsk && tsk != current)
+ stack = (unsigned long *)tsk->thread.rsp;
+ }
+
/*
* Print function call entries within a stack. 'cond' is the
* "end of stackframe" condition, that the 'stack++'
@@ -308,7 +325,7 @@ void show_trace(struct task_struct *tsk,
* down the cause of the crash will be able to figure \
* out the call path that was taken. \
*/ \
- printk_address(addr); \
+ ops->address(data, addr); \
} \
} while (0)
@@ -317,16 +334,17 @@ void show_trace(struct task_struct *tsk,
* current stack address. If the stacks consist of nested
* exceptions
*/
- for ( ; ; ) {
- const char *id;
+ for (;;) {
+ char *id;
unsigned long *estack_end;
estack_end = in_exception_stack(cpu, (unsigned long)stack,
&used, &id);
if (estack_end) {
- printk(" <%s>", id);
+ if (ops->stack(data, id) < 0)
+ break;
HANDLE_STACK (stack < estack_end);
- printk(" <EOE>");
+ ops->stack(data, "<EOE>");
/*
* We link to the next stack via the
* second-to-last pointer (index -2 to end) in the
@@ -341,7 +359,8 @@ void show_trace(struct task_struct *tsk,
(IRQSTACKSIZE - 64) / sizeof(*irqstack);
if (stack >= irqstack && stack < irqstack_end) {
- printk(" <IRQ>");
+ if (ops->stack(data, "IRQ") < 0)
+ break;
HANDLE_STACK (stack < irqstack_end);
/*
* We link to the next stack (which would be
@@ -350,7 +369,7 @@ void show_trace(struct task_struct *tsk,
*/
stack = (unsigned long *) (irqstack_end[-1]);
irqstack_end = NULL;
- printk(" <EOI>");
+ ops->stack(data, "EOI");
continue;
}
}
@@ -358,15 +377,53 @@ void show_trace(struct task_struct *tsk,
}
/*
- * This prints the process stack:
+ * This handles the process stack:
*/
HANDLE_STACK (((long) stack & (THREAD_SIZE-1)) != 0);
#undef HANDLE_STACK
+}
+EXPORT_SYMBOL(dump_trace);
+
+static void
+print_trace_warning_symbol(void *data, char *msg, unsigned long symbol)
+{
+ print_symbol(msg, symbol);
+ printk("\n");
+}
+static void print_trace_warning(void *data, char *msg)
+{
+ printk("%s\n", msg);
+}
+
+static int print_trace_stack(void *data, char *name)
+{
+ printk(" <%s> ", name);
+ return 0;
+}
+
+static void print_trace_address(void *data, unsigned long addr)
+{
+ printk_address(addr);
+}
+
+static struct stacktrace_ops print_trace_ops = {
+ .warning = print_trace_warning,
+ .warning_symbol = print_trace_warning_symbol,
+ .stack = print_trace_stack,
+ .address = print_trace_address,
+};
+
+void
+show_trace(struct task_struct *tsk, struct pt_regs *regs, unsigned long *stack)
+{
+ printk("\nCall Trace:\n");
+ dump_trace(tsk, regs, stack, &print_trace_ops, NULL);
printk("\n");
}
-static void _show_stack(struct task_struct *tsk, struct pt_regs *regs, unsigned long * rsp)
+static void
+_show_stack(struct task_struct *tsk, struct pt_regs *regs, unsigned long *rsp)
{
unsigned long *stack;
int i;
Index: linux/include/asm-x86_64/stacktrace.h
===================================================================
--- /dev/null
+++ linux/include/asm-x86_64/stacktrace.h
@@ -0,0 +1,18 @@
+#ifndef _ASM_STACKTRACE_H
+#define _ASM_STACKTRACE_H 1
+
+/* Generic stack tracer with callbacks */
+
+struct stacktrace_ops {
+ void (*warning)(void *data, char *msg);
+ /* msg must contain %s for the symbol */
+ void (*warning_symbol)(void *data, char *msg, unsigned long symbol);
+ void (*address)(void *data, unsigned long address);
+ /* On negative return stop dumping */
+ int (*stack)(void *data, char *name);
+};
+
+void dump_trace(struct task_struct *tsk, struct pt_regs *regs, unsigned long *stack,
+ struct stacktrace_ops *ops, void *data);
+
+#endif
Index: linux/arch/x86_64/kernel/stacktrace.c
===================================================================
--- linux.orig/arch/x86_64/kernel/stacktrace.c
+++ linux/arch/x86_64/kernel/stacktrace.c
@@ -7,211 +7,49 @@
*/
#include <linux/sched.h>
#include <linux/stacktrace.h>
+#include <linux/module.h>
+#include <asm/stacktrace.h>
-#include <asm/smp.h>
-
-static inline int
-in_range(unsigned long start, unsigned long addr, unsigned long end)
+static void save_stack_warning(void *data, char *msg)
{
- return addr >= start && addr <= end;
}
-static unsigned long
-get_stack_end(struct task_struct *task, unsigned long stack)
+static void
+save_stack_warning_symbol(void *data, char *msg, unsigned long symbol)
{
- unsigned long stack_start, stack_end, flags;
- int i, cpu;
-
- /*
- * The most common case is that we are in the task stack:
- */
- stack_start = (unsigned long)task->thread_info;
- stack_end = stack_start + THREAD_SIZE;
-
- if (in_range(stack_start, stack, stack_end))
- return stack_end;
-
- /*
- * We are in an interrupt if irqstackptr is set:
- */
- raw_local_irq_save(flags);
- cpu = safe_smp_processor_id();
- stack_end = (unsigned long)cpu_pda(cpu)->irqstackptr;
-
- if (stack_end) {
- stack_start = stack_end & ~(IRQSTACKSIZE-1);
- if (in_range(stack_start, stack, stack_end))
- goto out_restore;
- /*
- * We get here if we are in an IRQ context but we
- * are also in an exception stack.
- */
- }
-
- /*
- * Iterate over all exception stacks, and figure out whether
- * 'stack' is in one of them:
- */
- for (i = 0; i < N_EXCEPTION_STACKS; i++) {
- /*
- * set 'end' to the end of the exception stack.
- */
- stack_end = per_cpu(init_tss, cpu).ist[i];
- stack_start = stack_end - EXCEPTION_STKSZ;
-
- /*
- * Is 'stack' above this exception frame's end?
- * If yes then skip to the next frame.
- */
- if (stack >= stack_end)
- continue;
- /*
- * Is 'stack' above this exception frame's start address?
- * If yes then we found the right frame.
- */
- if (stack >= stack_start)
- goto out_restore;
-
- /*
- * If this is a debug stack, and if it has a larger size than
- * the usual exception stacks, then 'stack' might still
- * be within the lower portion of the debug stack:
- */
-#if DEBUG_STKSZ > EXCEPTION_STKSZ
- if (i == DEBUG_STACK - 1 && stack >= stack_end - DEBUG_STKSZ) {
- /*
- * Black magic. A large debug stack is composed of
- * multiple exception stack entries, which we
- * iterate through now. Dont look:
- */
- do {
- stack_end -= EXCEPTION_STKSZ;
- stack_start -= EXCEPTION_STKSZ;
- } while (stack < stack_start);
-
- goto out_restore;
- }
-#endif
- }
- /*
- * Ok, 'stack' is not pointing to any of the system stacks.
- */
- stack_end = 0;
-
-out_restore:
- raw_local_irq_restore(flags);
-
- return stack_end;
}
-
-/*
- * Save stack-backtrace addresses into a stack_trace buffer:
- */
-static inline unsigned long
-save_context_stack(struct stack_trace *trace,
- unsigned long stack, unsigned long stack_end)
+static int save_stack_stack(void *data, char *name)
{
- int skip = trace->skip;
- unsigned long addr;
-
-#ifdef CONFIG_FRAME_POINTER
- unsigned long prev_stack = 0;
+ struct stack_trace *trace = (struct stack_trace *)data;
+ return trace->all_contexts ? 0 : -1;
+}
- while (in_range(prev_stack, stack, stack_end)) {
- pr_debug("stack: %p\n", (void *)stack);
- addr = (unsigned long)(((unsigned long *)stack)[1]);
- pr_debug("addr: %p\n", (void *)addr);
- if (!skip)
- trace->entries[trace->nr_entries++] = addr-1;
- else
- skip--;
- if (trace->nr_entries >= trace->max_entries)
- break;
- if (!addr)
- return 0;
- /*
- * Stack frames must go forwards (otherwise a loop could
- * happen if the stackframe is corrupted), so we move
- * prev_stack forwards:
- */
- prev_stack = stack;
- stack = (unsigned long)(((unsigned long *)stack)[0]);
- }
- pr_debug("invalid: %p\n", (void *)stack);
-#else
- while (stack < stack_end) {
- addr = ((unsigned long *)stack)[0];
- stack += sizeof(long);
- if (__kernel_text_address(addr)) {
- if (!skip)
- trace->entries[trace->nr_entries++] = addr-1;
- else
- skip--;
- if (trace->nr_entries >= trace->max_entries)
- break;
- }
+static void save_stack_address(void *data, unsigned long addr)
+{
+ struct stack_trace *trace = (struct stack_trace *)data;
+ if (trace->skip > 0) {
+ trace->skip--;
+ return;
}
-#endif
- return stack;
+ if (trace->nr_entries < trace->max_entries - 1)
+ trace->entries[trace->nr_entries++] = addr;
}
-#define MAX_STACKS 10
+static struct stacktrace_ops save_stack_ops = {
+ .warning = save_stack_warning,
+ .warning_symbol = save_stack_warning_symbol,
+ .stack = save_stack_stack,
+ .address = save_stack_address,
+};
/*
* Save stack-backtrace addresses into a stack_trace buffer.
*/
void save_stack_trace(struct stack_trace *trace, struct task_struct *task)
{
- unsigned long stack = (unsigned long)&stack;
- int i, nr_stacks = 0, stacks_done[MAX_STACKS];
-
- WARN_ON(trace->nr_entries || !trace->max_entries);
-
- if (!task)
- task = current;
-
- pr_debug("task: %p, ti: %p\n", task, task->thread_info);
-
- if (!task || task == current) {
- /* Grab rbp right from our regs: */
- asm ("mov %%rbp, %0" : "=r" (stack));
- pr_debug("rbp: %p\n", (void *)stack);
- } else {
- /* rbp is the last reg pushed by switch_to(): */
- stack = task->thread.rsp;
- pr_debug("other task rsp: %p\n", (void *)stack);
- stack = (unsigned long)(((unsigned long *)stack)[0]);
- pr_debug("other task rbp: %p\n", (void *)stack);
- }
-
- while (1) {
- unsigned long stack_end = get_stack_end(task, stack);
-
- pr_debug("stack: %p\n", (void *)stack);
- pr_debug("stack end: %p\n", (void *)stack_end);
-
- /*
- * Invalid stack addres?
- */
- if (!stack_end)
- return;
- /*
- * Were we in this stack already? (recursion)
- */
- for (i = 0; i < nr_stacks; i++)
- if (stacks_done[i] == stack_end)
- return;
- stacks_done[nr_stacks] = stack_end;
-
- stack = save_context_stack(trace, stack, stack_end);
- if (!stack || trace->nr_entries >= trace->max_entries)
- return;
- trace->entries[trace->nr_entries++] = ULONG_MAX;
- if (trace->nr_entries >= trace->max_entries)
- return;
- if (++nr_stacks >= MAX_STACKS)
- return;
- }
+ dump_trace(task, NULL, NULL, &save_stack_ops, trace);
+ trace->entries[trace->nr_entries++] = ULONG_MAX;
}
+EXPORT_SYMBOL(save_stack_trace);
^ permalink raw reply [flat|nested] 199+ messages in thread* [PATCH for review] [113/145] i386: Do stacktracer conversion too
[not found] <20060810 935.775038000@suse.de>
` (111 preceding siblings ...)
2006-08-10 19:37 ` [PATCH for review] [112/145] x86_64: Merge stacktrace and show_trace Andi Kleen
@ 2006-08-10 19:37 ` Andi Kleen
2006-08-10 19:37 ` [PATCH for review] [114/145] x86_64: Don't force frame pointers for lockdep Andi Kleen
` (32 subsequent siblings)
145 siblings, 0 replies; 199+ messages in thread
From: Andi Kleen @ 2006-08-10 19:37 UTC (permalink / raw)
r
Following x86-64 patches. Reuses code from them in fact.
Convert the standard backtracer to do all output using
callbacks. Use the x86-64 stack tracer implementation
that uses these callbacks to implement the stacktrace interface.
This allows to use the new dwarf2 unwinder for stacktrace
and get better backtraces.
Cc: mingo@elte.hu
Signed-off-by: Andi Kleen <ak@suse.de>
---
arch/i386/kernel/Makefile | 1
arch/i386/kernel/stacktrace.c | 93 ------------------------------------
arch/i386/kernel/traps.c | 108 +++++++++++++++++++++++++++++++-----------
include/asm-i386/stacktrace.h | 1
4 files changed, 83 insertions(+), 120 deletions(-)
Index: linux/arch/i386/kernel/Makefile
===================================================================
--- linux.orig/arch/i386/kernel/Makefile
+++ linux/arch/i386/kernel/Makefile
@@ -82,4 +82,5 @@ $(obj)/vsyscall-syms.o: $(src)/vsyscall.
$(call if_changed,syscall)
k8-y += ../../x86_64/kernel/k8.o
+stacktrace-y += ../../x86_64/kernel/stacktrace.o
Index: linux/arch/i386/kernel/stacktrace.c
===================================================================
--- linux.orig/arch/i386/kernel/stacktrace.c
+++ /dev/null
@@ -1,93 +0,0 @@
-/*
- * arch/i386/kernel/stacktrace.c
- *
- * Stack trace management functions
- *
- * Copyright (C) 2006 Red Hat, Inc., Ingo Molnar <mingo@redhat.com>
- */
-#include <linux/sched.h>
-#include <linux/stacktrace.h>
-
-static inline int valid_stack_ptr(struct thread_info *tinfo, void *p)
-{
- return p > (void *)tinfo &&
- p < (void *)tinfo + THREAD_SIZE - 3;
-}
-
-/*
- * Save stack-backtrace addresses into a stack_trace buffer:
- */
-static inline unsigned long
-save_context_stack(struct stack_trace *trace, unsigned int skip,
- struct thread_info *tinfo, unsigned long *stack,
- unsigned long ebp)
-{
- unsigned long addr;
-
-#ifdef CONFIG_FRAME_POINTER
- while (valid_stack_ptr(tinfo, (void *)ebp)) {
- addr = *(unsigned long *)(ebp + 4);
- if (!skip)
- trace->entries[trace->nr_entries++] = addr;
- else
- skip--;
- if (trace->nr_entries >= trace->max_entries)
- break;
- /*
- * break out of recursive entries (such as
- * end_of_stack_stop_unwind_function):
- */
- if (ebp == *(unsigned long *)ebp)
- break;
-
- ebp = *(unsigned long *)ebp;
- }
-#else
- while (valid_stack_ptr(tinfo, stack)) {
- addr = *stack++;
- if (__kernel_text_address(addr)) {
- if (!skip)
- trace->entries[trace->nr_entries++] = addr;
- else
- skip--;
- if (trace->nr_entries >= trace->max_entries)
- break;
- }
- }
-#endif
-
- return ebp;
-}
-
-/*
- * Save stack-backtrace addresses into a stack_trace buffer.
- */
-void save_stack_trace(struct stack_trace *trace, struct task_struct *task)
-{
- unsigned long ebp;
- unsigned long *stack = &ebp;
-
- WARN_ON(trace->nr_entries || !trace->max_entries);
-
- if (!task || task == current) {
- /* Grab ebp right from our regs: */
- asm ("movl %%ebp, %0" : "=r" (ebp));
- } else {
- /* ebp is the last reg pushed by switch_to(): */
- ebp = *(unsigned long *) task->thread.esp;
- }
-
- while (1) {
- struct thread_info *context = (struct thread_info *)
- ((unsigned long)stack & (~(THREAD_SIZE - 1)));
-
- ebp = save_context_stack(trace, trace->skip, context, stack, ebp);
- stack = (unsigned long *)context->previous_esp;
- if (!stack || trace->nr_entries >= trace->max_entries)
- break;
- trace->entries[trace->nr_entries++] = ULONG_MAX;
- if (trace->nr_entries >= trace->max_entries)
- break;
- }
-}
-
Index: linux/arch/i386/kernel/traps.c
===================================================================
--- linux.orig/arch/i386/kernel/traps.c
+++ linux/arch/i386/kernel/traps.c
@@ -51,6 +51,7 @@
#include <asm/smp.h>
#include <asm/arch_hooks.h>
#include <asm/kdebug.h>
+#include <asm/stacktrace.h>
#include <linux/module.h>
@@ -114,26 +115,16 @@ static inline int valid_stack_ptr(struct
p < (void *)tinfo + THREAD_SIZE - 3;
}
-/*
- * Print one address/symbol entries per line.
- */
-static inline void print_addr_and_symbol(unsigned long addr, char *log_lvl)
-{
- printk(" [<%08lx>] ", addr);
-
- print_symbol("%s\n", addr);
-}
-
static inline unsigned long print_context_stack(struct thread_info *tinfo,
unsigned long *stack, unsigned long ebp,
- char *log_lvl)
+ struct stacktrace_ops *ops, void *data)
{
unsigned long addr;
#ifdef CONFIG_FRAME_POINTER
while (valid_stack_ptr(tinfo, (void *)ebp)) {
addr = *(unsigned long *)(ebp + 4);
- print_addr_and_symbol(addr, log_lvl);
+ ops->address(data, addr);
/*
* break out of recursive entries (such as
* end_of_stack_stop_unwind_function):
@@ -146,28 +137,35 @@ static inline unsigned long print_contex
while (valid_stack_ptr(tinfo, stack)) {
addr = *stack++;
if (__kernel_text_address(addr))
- print_addr_and_symbol(addr, log_lvl);
+ ops->address(data, addr);
}
#endif
return ebp;
}
+struct ops_and_data {
+ struct stacktrace_ops *ops;
+ void *data;
+};
+
static asmlinkage int
-show_trace_unwind(struct unwind_frame_info *info, void *log_lvl)
+dump_trace_unwind(struct unwind_frame_info *info, void *data)
{
+ struct ops_and_data *oad = (struct ops_and_data *)data;
int n = 0;
while (unwind(info) == 0 && UNW_PC(info)) {
n++;
- print_addr_and_symbol(UNW_PC(info), log_lvl);
+ oad->ops->address(oad->data, UNW_PC(info));
if (arch_unw_user_mode(info))
break;
}
return n;
}
-static void show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs,
- unsigned long *stack, char *log_lvl)
+void dump_trace(struct task_struct *task, struct pt_regs *regs,
+ unsigned long *stack,
+ struct stacktrace_ops *ops, void *data)
{
unsigned long ebp;
@@ -177,34 +175,42 @@ static void show_trace_log_lvl(struct ta
if (call_trace >= 0) {
int unw_ret = 0;
struct unwind_frame_info info;
+ struct ops_and_data oad = { .ops = ops, .data = data };
if (regs) {
if (unwind_init_frame_info(&info, task, regs) == 0)
- unw_ret = show_trace_unwind(&info, log_lvl);
+ unw_ret = dump_trace_unwind(&info, &oad);
} else if (task == current)
- unw_ret = unwind_init_running(&info, show_trace_unwind, log_lvl);
+ unw_ret = unwind_init_running(&info, dump_trace_unwind, &oad);
else {
if (unwind_init_blocked(&info, task) == 0)
- unw_ret = show_trace_unwind(&info, log_lvl);
+ unw_ret = dump_trace_unwind(&info, &oad);
}
if (unw_ret > 0 && !arch_unw_user_mode(&info)) {
#ifdef CONFIG_STACK_UNWIND
- print_symbol("DWARF2 unwinder stuck at %s\n",
+ ops->warning_symbol(data, "DWARF2 unwinder stuck at %s",
UNW_PC(&info));
if (call_trace == 1) {
- printk("Leftover inexact backtrace:\n");
+ ops->warning(data, "Leftover inexact backtrace:");
if (UNW_SP(&info))
stack = (void *)UNW_SP(&info);
} else if (call_trace > 1)
return;
else
- printk("Full inexact backtrace again:\n");
+ ops->warning(data, "Full inexact backtrace again:");
#else
- printk("Inexact backtrace:\n");
+ ops->warning(data, "Inexact backtrace:");
#endif
}
}
+ if (!stack) {
+ unsigned long dummy;
+ stack = &dummy;
+ if (task && task != current)
+ stack = (unsigned long *)task->thread.esp;
+ }
+
if (task == current) {
/* Grab ebp right from our regs */
asm ("movl %%ebp, %0" : "=r" (ebp) : );
@@ -217,15 +223,63 @@ static void show_trace_log_lvl(struct ta
struct thread_info *context;
context = (struct thread_info *)
((unsigned long)stack & (~(THREAD_SIZE - 1)));
- ebp = print_context_stack(context, stack, ebp, log_lvl);
+ ebp = print_context_stack(context, stack, ebp, ops, data);
+ /* Should be after the line below, but somewhere
+ in early boot context comes out corrupted and we
+ can't reference it -AK */
+ if (ops->stack(data, "IRQ") < 0)
+ break;
stack = (unsigned long*)context->previous_esp;
if (!stack)
break;
- printk("%s =======================\n", log_lvl);
}
}
+EXPORT_SYMBOL(dump_trace);
+
+static void
+print_trace_warning_symbol(void *data, char *msg, unsigned long symbol)
+{
+ printk(data);
+ print_symbol(msg, symbol);
+ printk("\n");
+}
+
+static void print_trace_warning(void *data, char *msg)
+{
+ printk("%s%s\n", (char *)data, msg);
+}
+
+static int print_trace_stack(void *data, char *name)
+{
+ return 0;
+}
+
+/*
+ * Print one address/symbol entries per line.
+ */
+static void print_trace_address(void *data, unsigned long addr)
+{
+ printk("%s [<%08lx>] ", (char *)data, addr);
+ print_symbol("%s\n", addr);
+}
+
+static struct stacktrace_ops print_trace_ops = {
+ .warning = print_trace_warning,
+ .warning_symbol = print_trace_warning_symbol,
+ .stack = print_trace_stack,
+ .address = print_trace_address,
+};
+
+static void
+show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs,
+ unsigned long * stack, char *log_lvl)
+{
+ dump_trace(task, regs, stack, &print_trace_ops, log_lvl);
+ printk("%s =======================\n", log_lvl);
+}
-void show_trace(struct task_struct *task, struct pt_regs *regs, unsigned long * stack)
+void show_trace(struct task_struct *task, struct pt_regs *regs,
+ unsigned long * stack)
{
show_trace_log_lvl(task, regs, stack, "");
}
Index: linux/include/asm-i386/stacktrace.h
===================================================================
--- /dev/null
+++ linux/include/asm-i386/stacktrace.h
@@ -0,0 +1 @@
+#include <asm-x86_64/stacktrace.h>
^ permalink raw reply [flat|nested] 199+ messages in thread* [PATCH for review] [114/145] x86_64: Don't force frame pointers for lockdep
[not found] <20060810 935.775038000@suse.de>
` (112 preceding siblings ...)
2006-08-10 19:37 ` [PATCH for review] [113/145] i386: Do stacktracer conversion too Andi Kleen
@ 2006-08-10 19:37 ` Andi Kleen
2006-08-10 19:37 ` [PATCH for review] [115/145] x86_64: fix dubious segment register clear in cpu_init() Andi Kleen
` (31 subsequent siblings)
145 siblings, 0 replies; 199+ messages in thread
From: Andi Kleen @ 2006-08-10 19:37 UTC (permalink / raw)
r
Now that stacktrace supports dwarf2 don't force frame pointers for lockdep anymore
Cc: mingo@elte.hu
Signed-off-by: Andi Kleen <ak@suse.de>
---
lib/Kconfig.debug | 1 -
1 files changed, 1 deletion(-)
Index: linux/lib/Kconfig.debug
===================================================================
--- linux.orig/lib/Kconfig.debug
+++ linux/lib/Kconfig.debug
@@ -218,7 +218,6 @@ config LOCKDEP
bool
depends on DEBUG_KERNEL && TRACE_IRQFLAGS_SUPPORT && STACKTRACE_SUPPORT && LOCKDEP_SUPPORT
select STACKTRACE
- select FRAME_POINTER
select KALLSYMS
select KALLSYMS_ALL
^ permalink raw reply [flat|nested] 199+ messages in thread* [PATCH for review] [115/145] x86_64: fix dubious segment register clear in cpu_init()
[not found] <20060810 935.775038000@suse.de>
` (113 preceding siblings ...)
2006-08-10 19:37 ` [PATCH for review] [114/145] x86_64: Don't force frame pointers for lockdep Andi Kleen
@ 2006-08-10 19:37 ` Andi Kleen
2006-08-10 19:37 ` [PATCH for review] [116/145] i386: don't taint UP K7's running SMP kernels Andi Kleen
` (30 subsequent siblings)
145 siblings, 0 replies; 199+ messages in thread
From: Andi Kleen @ 2006-08-10 19:37 UTC (permalink / raw)
r
From: Jeremy Fitzhardinge <jeremy@goop.org>
Fix a very dubious piece of code in
arch/i386/kernel/cpu/common.c:cpu_init(). This clears out %fs and
%gs, but clobbers %eax in the process without telling gcc. It turns
out that gcc happens to be not using %eax at that point anyway so it
doesn't matter much, but it looks like a bomb waiting to go off.
This does end up saving an instruction, because gcc wants %eax==0 for
the set_debugreg()s below.
Signed-off-by: Jeremy Fitzhardinge <jeremy@goop.org>
Signed-off-by: Andi Kleen <ak@suse.de>
---
arch/i386/kernel/cpu/common.c | 2 +-
1 files changed, 1 insertion(+), 1 deletion(-)
Index: linux/arch/i386/kernel/cpu/common.c
===================================================================
--- linux.orig/arch/i386/kernel/cpu/common.c
+++ linux/arch/i386/kernel/cpu/common.c
@@ -675,7 +675,7 @@ old_gdt:
#endif
/* Clear %fs and %gs. */
- asm volatile ("xorl %eax, %eax; movl %eax, %fs; movl %eax, %gs");
+ asm volatile ("movl %0, %%fs; movl %0, %%gs" : : "r" (0));
/* Clear all 6 debug registers: */
set_debugreg(0, 0);
^ permalink raw reply [flat|nested] 199+ messages in thread* [PATCH for review] [116/145] i386: don't taint UP K7's running SMP kernels.
[not found] <20060810 935.775038000@suse.de>
` (114 preceding siblings ...)
2006-08-10 19:37 ` [PATCH for review] [115/145] x86_64: fix dubious segment register clear in cpu_init() Andi Kleen
@ 2006-08-10 19:37 ` Andi Kleen
2006-08-10 19:37 ` [PATCH for review] [117/145] i386: error_code is not safe for kprobes Andi Kleen
` (29 subsequent siblings)
145 siblings, 0 replies; 199+ messages in thread
From: Andi Kleen @ 2006-08-10 19:37 UTC (permalink / raw)
r
From: Dave Jones <davej@redhat.com>
We have a test that looks for invalid pairings of certain athlon/durons
that weren't designed for SMP, and taint accordingly (with 'S') if we find
such a configuration. However, this test shouldn't fire if there's only
a single CPU present. It's perfectly valid for an SMP kernel to boot on UP
hardware for example.
AK: changed to num_possible_cpus()
Signed-off-by: Dave Jones <davej@redhat.com>
Signed-off-by: Andi Kleen <ak@suse.de>
---
arch/i386/kernel/smpboot.c | 3 +++
1 files changed, 3 insertions(+)
Index: linux/arch/i386/kernel/smpboot.c
===================================================================
--- linux.orig/arch/i386/kernel/smpboot.c
+++ linux/arch/i386/kernel/smpboot.c
@@ -177,6 +177,9 @@ static void __devinit smp_store_cpu_info
*/
if ((c->x86_vendor == X86_VENDOR_AMD) && (c->x86 == 6)) {
+ if (num_possible_cpus() == 1)
+ goto valid_k7;
+
/* Athlon 660/661 is valid. */
if ((c->x86_model==6) && ((c->x86_mask==0) || (c->x86_mask==1)))
goto valid_k7;
^ permalink raw reply [flat|nested] 199+ messages in thread* [PATCH for review] [117/145] i386: error_code is not safe for kprobes
[not found] <20060810 935.775038000@suse.de>
` (115 preceding siblings ...)
2006-08-10 19:37 ` [PATCH for review] [116/145] i386: don't taint UP K7's running SMP kernels Andi Kleen
@ 2006-08-10 19:37 ` Andi Kleen
2006-08-10 19:37 ` [PATCH for review] [118/145] x86_64: Mark error_entry as forbidden to kprobes Andi Kleen
` (28 subsequent siblings)
145 siblings, 0 replies; 199+ messages in thread
From: Andi Kleen @ 2006-08-10 19:37 UTC (permalink / raw)
r
From: Chuck Ebbert <76306.1226@compuserve.com>
Because code marked unsafe for kprobes jumps directly to
entry.S::error_code, that must be marked unsafe as well.
The easiest way to do that is to move the page fault entry
point to just before error_code and let it inherit the same
section.
Also moved all the ".previous" asm directives for kprobes
sections to column 1 and removed ".text" from them.
Signed-off-by: Chuck Ebbert <76306.1226@compuserve.com>
Signed-off-by: Andi Kleen <ak@suse.de>
---
arch/i386/kernel/entry.S | 25 +++++++++++++------------
1 files changed, 13 insertions(+), 12 deletions(-)
Index: linux/arch/i386/kernel/entry.S
===================================================================
--- linux.orig/arch/i386/kernel/entry.S
+++ linux/arch/i386/kernel/entry.S
@@ -587,11 +587,9 @@ ENTRY(name) \
/* The include is where all of the SMP etc. interrupts come from */
#include "entry_arch.h"
-ENTRY(divide_error)
- RING0_INT_FRAME
- pushl $0 # no error code
- CFI_ADJUST_CFA_OFFSET 4
- pushl $do_divide_error
+KPROBE_ENTRY(page_fault)
+ RING0_EC_FRAME
+ pushl $do_page_fault
CFI_ADJUST_CFA_OFFSET 4
ALIGN
error_code:
@@ -641,6 +639,7 @@ error_code:
call *%edi
jmp ret_from_exception
CFI_ENDPROC
+.previous
ENTRY(coprocessor_error)
RING0_INT_FRAME
@@ -716,7 +715,8 @@ debug_stack_correct:
call do_debug
jmp ret_from_exception
CFI_ENDPROC
- .previous .text
+.previous
+
/*
* NMI is doubly nasty. It can happen _while_ we're handling
* a debug fault, and the debug fault hasn't yet been able to
@@ -812,7 +812,7 @@ KPROBE_ENTRY(int3)
call do_int3
jmp ret_from_exception
CFI_ENDPROC
- .previous .text
+.previous
ENTRY(overflow)
RING0_INT_FRAME
@@ -877,7 +877,7 @@ KPROBE_ENTRY(general_protection)
CFI_ADJUST_CFA_OFFSET 4
jmp error_code
CFI_ENDPROC
- .previous .text
+.previous
ENTRY(alignment_check)
RING0_EC_FRAME
@@ -886,13 +886,14 @@ ENTRY(alignment_check)
jmp error_code
CFI_ENDPROC
-KPROBE_ENTRY(page_fault)
- RING0_EC_FRAME
- pushl $do_page_fault
+ENTRY(divide_error)
+ RING0_INT_FRAME
+ pushl $0 # no error code
+ CFI_ADJUST_CFA_OFFSET 4
+ pushl $do_divide_error
CFI_ADJUST_CFA_OFFSET 4
jmp error_code
CFI_ENDPROC
- .previous .text
#ifdef CONFIG_X86_MCE
ENTRY(machine_check)
^ permalink raw reply [flat|nested] 199+ messages in thread* [PATCH for review] [118/145] x86_64: Mark error_entry as forbidden to kprobes
[not found] <20060810 935.775038000@suse.de>
` (116 preceding siblings ...)
2006-08-10 19:37 ` [PATCH for review] [117/145] i386: error_code is not safe for kprobes Andi Kleen
@ 2006-08-10 19:37 ` Andi Kleen
2006-08-10 19:37 ` [PATCH for review] [119/145] x86_64: X86_64 monotonic_clock goes backwards Andi Kleen
` (27 subsequent siblings)
145 siblings, 0 replies; 199+ messages in thread
From: Andi Kleen @ 2006-08-10 19:37 UTC (permalink / raw)
r
From: Prasanna S.P. <prasanna@in.ibm.com>
This patch moves the entry.S:error_entry to .kprobes.text section,
since code marked unsafe for kprobes jumps directly to entry.S::error_entry,
that must be marked unsafe as well.
This patch also moves all the ".previous.text" asm directives to ".previous"
for kprobes section.
AK: Following a similar i386 patch from Chuck Ebbert
Signed-off-by: Prasanna S.P. <prasanna@in.ibm.com>
Signed-off-by: Andi Kleen <ak@suse.de>
arch/x86_64/kernel/entry.S | 13 +++++++------
1 files changed, 7 insertions(+), 6 deletions(-)
Index: linux/arch/x86_64/kernel/entry.S
===================================================================
--- linux.orig/arch/x86_64/kernel/entry.S
+++ linux/arch/x86_64/kernel/entry.S
@@ -819,7 +819,7 @@ paranoid_schedule\trace:
* Exception entry point. This expects an error code/orig_rax on the stack
* and the exception handler in %rax.
*/
-ENTRY(error_entry)
+KPROBE_ENTRY(error_entry)
_frame RDI
/* rdi slot contains rax, oldrax contains error code */
cld
@@ -904,6 +904,7 @@ error_kernelspace:
je error_swapgs
jmp error_sti
END(error_entry)
+ .previous
/* Reload gs selector with exception handling */
/* edi: new selector */
@@ -1023,7 +1024,7 @@ ENDPROC(execve)
KPROBE_ENTRY(page_fault)
errorentry do_page_fault
END(page_fault)
- .previous .text
+ .previous
ENTRY(coprocessor_error)
zeroentry do_coprocessor_error
@@ -1045,7 +1046,7 @@ KPROBE_ENTRY(debug)
paranoidentry do_debug, DEBUG_STACK
paranoidexit
END(debug)
- .previous .text
+ .previous
/* runs on exception stack */
KPROBE_ENTRY(nmi)
@@ -1060,7 +1061,7 @@ KPROBE_ENTRY(nmi)
CFI_ENDPROC
#endif
END(nmi)
- .previous .text
+ .previous
KPROBE_ENTRY(int3)
INTR_FRAME
@@ -1070,7 +1071,7 @@ KPROBE_ENTRY(int3)
jmp paranoid_exit1
CFI_ENDPROC
END(int3)
- .previous .text
+ .previous
ENTRY(overflow)
zeroentry do_overflow
@@ -1119,7 +1120,7 @@ END(stack_segment)
KPROBE_ENTRY(general_protection)
errorentry do_general_protection
END(general_protection)
- .previous .text
+ .previous
ENTRY(alignment_check)
errorentry do_alignment_check
^ permalink raw reply [flat|nested] 199+ messages in thread* [PATCH for review] [119/145] x86_64: X86_64 monotonic_clock goes backwards
[not found] <20060810 935.775038000@suse.de>
` (117 preceding siblings ...)
2006-08-10 19:37 ` [PATCH for review] [118/145] x86_64: Mark error_entry as forbidden to kprobes Andi Kleen
@ 2006-08-10 19:37 ` Andi Kleen
2006-08-10 19:37 ` [PATCH for review] [120/145] i386/x86-64: Improve Kconfig description of CRASH_DUMP Andi Kleen
` (26 subsequent siblings)
145 siblings, 0 replies; 199+ messages in thread
From: Andi Kleen @ 2006-08-10 19:37 UTC (permalink / raw)
r
From: Dimitri Sivanich <sivanich@sgi.com>
I've noticed some erratic behavior while testing the X86_64 version
of monotonic_clock().
While spinning in a loop reading monotonic clock values (pinned to a
single cpu) I noticed that the difference between subsequent values
occasionally went negative (time going backwards).
I found that in the following code:
this_offset = get_cycles_sync();
/* FIXME: 1000 or 1000000? */
--> offset = (this_offset - last_offset)*1000 / cpu_khz;
}
return base + offset;
the offset sometimes turns out to be 0, even though
this_offset > last_offset.
Signed-off-by: Dimitri Sivanich <sivanich@sgi.com>
Signed-off-by: Andi Kleen <ak@suse.de>
---
arch/x86_64/kernel/time.c | 7 +++----
1 files changed, 3 insertions(+), 4 deletions(-)
Index: linux/arch/x86_64/kernel/time.c
===================================================================
--- linux.orig/arch/x86_64/kernel/time.c
+++ linux/arch/x86_64/kernel/time.c
@@ -276,6 +276,7 @@ static void set_rtc_mmss(unsigned long n
* Note: This function is required to return accurate
* time even in the absence of multiple timer ticks.
*/
+static inline unsigned long long cycles_2_ns(unsigned long long cyc);
unsigned long long monotonic_clock(void)
{
unsigned long seq;
@@ -300,8 +301,7 @@ unsigned long long monotonic_clock(void)
base = monotonic_base;
} while (read_seqretry(&xtime_lock, seq));
this_offset = get_cycles_sync();
- /* FIXME: 1000 or 1000000? */
- offset = (this_offset - last_offset)*1000 / cpu_khz;
+ offset = cycles_2_ns(this_offset - last_offset);
}
return base + offset;
}
@@ -405,8 +405,7 @@ void main_timer_handler(struct pt_regs *
offset %= USEC_PER_TICK;
}
- /* FIXME: 1000 or 1000000? */
- monotonic_base += (tsc - vxtime.last_tsc) * 1000000 / cpu_khz;
+ monotonic_base += cycles_2_ns(tsc - vxtime.last_tsc);
vxtime.last_tsc = tsc - vxtime.quot * delay / vxtime.tsc_quot;
^ permalink raw reply [flat|nested] 199+ messages in thread* [PATCH for review] [120/145] i386/x86-64: Improve Kconfig description of CRASH_DUMP
[not found] <20060810 935.775038000@suse.de>
` (118 preceding siblings ...)
2006-08-10 19:37 ` [PATCH for review] [119/145] x86_64: X86_64 monotonic_clock goes backwards Andi Kleen
@ 2006-08-10 19:37 ` Andi Kleen
2006-08-10 21:12 ` Randy.Dunlap
2006-08-10 19:37 ` [PATCH for review] [121/145] x86_64: Make boot_param_data pure BSS Andi Kleen
` (25 subsequent siblings)
145 siblings, 1 reply; 199+ messages in thread
From: Andi Kleen @ 2006-08-10 19:37 UTC (permalink / raw)
r
Improve Kconfig description of CONFIG_CRASH_DUMP. Previously
it was too brief to be useful.
Cc: vgoyal@in.ibm.com
Cc: ebiederm@xmission.com
Signed-off-by: Andi Kleen <ak@suse.de>
---
arch/i386/Kconfig | 7 +++++++
arch/x86_64/Kconfig | 9 ++++++++-
2 files changed, 15 insertions(+), 1 deletion(-)
Index: linux/arch/i386/Kconfig
===================================================================
--- linux.orig/arch/i386/Kconfig
+++ linux/arch/i386/Kconfig
@@ -759,6 +759,13 @@ config CRASH_DUMP
depends on HIGHMEM
help
Generate crash dump after being started by kexec.
+ This should be normally only set in special crash dump kernels
+ which are loaded in the main kernel with kexec-tools into
+ a specially reserved region and then later executed after
+ a crash by kdump/kexec. The crash dump kernel must be compiled
+ to a memory address not used by the main kernel or BIOS using
+ PHYSICAL_START.
+ For more details see Documentation/kdump/kdump.txt
config PHYSICAL_START
hex "Physical address where the kernel is loaded" if (EMBEDDED || CRASH_DUMP)
Index: linux/arch/x86_64/Kconfig
===================================================================
--- linux.orig/arch/x86_64/Kconfig
+++ linux/arch/x86_64/Kconfig
@@ -484,7 +484,14 @@ config CRASH_DUMP
bool "kernel crash dumps (EXPERIMENTAL)"
depends on EXPERIMENTAL
help
- Generate crash dump after being started by kexec.
+ Generate crash dump after being started by kexec.
+ This should be normally only set in special crash dump kernels
+ which are loaded in the main kernel with kexec-tools into
+ a specially reserved region and then later executed after
+ a crash by kdump/kexec. The crash dump kernel must be compiled
+ to a memory address not used by the main kernel or BIOS using
+ PHYSICAL_START.
+ For more details see Documentation/kdump/kdump.txt
config PHYSICAL_START
hex "Physical address where the kernel is loaded" if (EMBEDDED || CRASH_DUMP)
^ permalink raw reply [flat|nested] 199+ messages in thread* Re: [PATCH for review] [120/145] i386/x86-64: Improve Kconfig description of CRASH_DUMP
2006-08-10 19:37 ` [PATCH for review] [120/145] i386/x86-64: Improve Kconfig description of CRASH_DUMP Andi Kleen
@ 2006-08-10 21:12 ` Randy.Dunlap
0 siblings, 0 replies; 199+ messages in thread
From: Randy.Dunlap @ 2006-08-10 21:12 UTC (permalink / raw)
To: Andi Kleen; +Cc: lkml
On Thu, 10 Aug 2006 21:37:19 +0200 (CEST) Andi Kleen wrote:
> r
>
> Improve Kconfig description of CONFIG_CRASH_DUMP. Previously
> it was too brief to be useful.
>
> Cc: vgoyal@in.ibm.com
> Cc: ebiederm@xmission.com
>
> Signed-off-by: Andi Kleen <ak@suse.de>
>
> ---
> arch/i386/Kconfig | 7 +++++++
> arch/x86_64/Kconfig | 9 ++++++++-
> 2 files changed, 15 insertions(+), 1 deletion(-)
>
> Index: linux/arch/i386/Kconfig
> ===================================================================
> --- linux.orig/arch/i386/Kconfig
> +++ linux/arch/i386/Kconfig
> @@ -759,6 +759,13 @@ config CRASH_DUMP
> depends on HIGHMEM
> help
> Generate crash dump after being started by kexec.
> + This should be normally only set in special crash dump kernels
> + which are loaded in the main kernel with kexec-tools into
> + a specially reserved region and then later executed after
> + a crash by kdump/kexec. The crash dump kernel must be compiled
> + to a memory address not used by the main kernel or BIOS using
> + PHYSICAL_START.
> + For more details see Documentation/kdump/kdump.txt
>
> config PHYSICAL_START
> hex "Physical address where the kernel is loaded" if (EMBEDDED || CRASH_DUMP)
> Index: linux/arch/x86_64/Kconfig
> ===================================================================
> --- linux.orig/arch/x86_64/Kconfig
> +++ linux/arch/x86_64/Kconfig
> @@ -484,7 +484,14 @@ config CRASH_DUMP
> bool "kernel crash dumps (EXPERIMENTAL)"
> depends on EXPERIMENTAL
> help
> - Generate crash dump after being started by kexec.
> + Generate crash dump after being started by kexec.
> + This should be normally only set in special crash dump kernels
> + which are loaded in the main kernel with kexec-tools into
> + a specially reserved region and then later executed after
> + a crash by kdump/kexec. The crash dump kernel must be compiled
> + to a memory address not used by the main kernel or BIOS using
> + PHYSICAL_START.
> + For more details see Documentation/kdump/kdump.txt
>
> config PHYSICAL_START
> hex "Physical address where the kernel is loaded" if (EMBEDDED || CRASH_DUMP)
> -
Please fix indentation (use tabs consistently).
---
~Randy
^ permalink raw reply [flat|nested] 199+ messages in thread
* [PATCH for review] [121/145] x86_64: Make boot_param_data pure BSS
[not found] <20060810 935.775038000@suse.de>
` (119 preceding siblings ...)
2006-08-10 19:37 ` [PATCH for review] [120/145] i386/x86-64: Improve Kconfig description of CRASH_DUMP Andi Kleen
@ 2006-08-10 19:37 ` Andi Kleen
2006-08-10 19:37 ` [PATCH for review] [122/145] i386: Fix warning in mpparse.c Andi Kleen
` (24 subsequent siblings)
145 siblings, 0 replies; 199+ messages in thread
From: Andi Kleen @ 2006-08-10 19:37 UTC (permalink / raw)
r
Since it's all zero.
Actually I think gcc 4+ will do that automatically, but earlier compilers won't
Signed-off-by: Andi Kleen <ak@suse.de>
---
arch/x86_64/kernel/setup64.c | 2 +-
1 files changed, 1 insertion(+), 1 deletion(-)
Index: linux/arch/x86_64/kernel/setup64.c
===================================================================
--- linux.orig/arch/x86_64/kernel/setup64.c
+++ linux/arch/x86_64/kernel/setup64.c
@@ -24,7 +24,7 @@
#include <asm/proto.h>
#include <asm/sections.h>
-char x86_boot_params[BOOT_PARAM_SIZE] __initdata = {0,};
+char x86_boot_params[BOOT_PARAM_SIZE] __initdata;
cpumask_t cpu_initialized __cpuinitdata = CPU_MASK_NONE;
^ permalink raw reply [flat|nested] 199+ messages in thread* [PATCH for review] [122/145] i386: Fix warning in mpparse.c
[not found] <20060810 935.775038000@suse.de>
` (120 preceding siblings ...)
2006-08-10 19:37 ` [PATCH for review] [121/145] x86_64: Make boot_param_data pure BSS Andi Kleen
@ 2006-08-10 19:37 ` Andi Kleen
2006-08-10 19:37 ` [PATCH for review] [123/145] i386: make fault notifier unconditional and export it Andi Kleen
` (23 subsequent siblings)
145 siblings, 0 replies; 199+ messages in thread
From: Andi Kleen @ 2006-08-10 19:37 UTC (permalink / raw)
r
Fix
linux/arch/i386/kernel/mpparse.c: In function #MP_bus_info#:
linux/arch/i386/kernel/mpparse.c:232: warning: comparison is always false due to limited range of data type
Signed-off-by: Andi Kleen <ak@suse.de>
---
arch/i386/kernel/mpparse.c | 2 ++
1 files changed, 2 insertions(+)
Index: linux/arch/i386/kernel/mpparse.c
===================================================================
--- linux.orig/arch/i386/kernel/mpparse.c
+++ linux/arch/i386/kernel/mpparse.c
@@ -229,12 +229,14 @@ static void __init MP_bus_info (struct m
mpc_oem_bus_info(m, str, translation_table[mpc_record]);
+#if MAX_MP_BUSSES < 256
if (m->mpc_busid >= MAX_MP_BUSSES) {
printk(KERN_WARNING "MP table busid value (%d) for bustype %s "
" is too large, max. supported is %d\n",
m->mpc_busid, str, MAX_MP_BUSSES - 1);
return;
}
+#endif
if (strncmp(str, BUSTYPE_ISA, sizeof(BUSTYPE_ISA)-1) == 0) {
mp_bus_id_to_type[m->mpc_busid] = MP_BUS_ISA;
^ permalink raw reply [flat|nested] 199+ messages in thread* [PATCH for review] [123/145] i386: make fault notifier unconditional and export it
[not found] <20060810 935.775038000@suse.de>
` (121 preceding siblings ...)
2006-08-10 19:37 ` [PATCH for review] [122/145] i386: Fix warning in mpparse.c Andi Kleen
@ 2006-08-10 19:37 ` Andi Kleen
2006-08-13 15:28 ` Adrian Bunk
2006-08-10 19:37 ` [PATCH for review] [124/145] " Andi Kleen
` (22 subsequent siblings)
145 siblings, 1 reply; 199+ messages in thread
From: Andi Kleen @ 2006-08-10 19:37 UTC (permalink / raw)
r
It's needed for external debuggers and overhead is very small.
Also make the actual notifier chain they use static
Cc: jbeulich@novell.com
Signed-off-by: Andi Kleen <ak@suse.de>
---
arch/x86_64/mm/fault.c | 12 +++---------
1 files changed, 3 insertions(+), 9 deletions(-)
Index: linux/arch/x86_64/mm/fault.c
===================================================================
--- linux.orig/arch/x86_64/mm/fault.c
+++ linux/arch/x86_64/mm/fault.c
@@ -40,8 +40,7 @@
#define PF_RSVD (1<<3)
#define PF_INSTR (1<<4)
-#ifdef CONFIG_KPROBES
-ATOMIC_NOTIFIER_HEAD(notify_page_fault_chain);
+static ATOMIC_NOTIFIER_HEAD(notify_page_fault_chain);
/* Hook to register for page fault notifications */
int register_page_fault_notifier(struct notifier_block *nb)
@@ -49,11 +48,13 @@ int register_page_fault_notifier(struct
vmalloc_sync_all();
return atomic_notifier_chain_register(¬ify_page_fault_chain, nb);
}
+EXPORT_SYMBOL_GPL(register_page_fault_notifier);
int unregister_page_fault_notifier(struct notifier_block *nb)
{
return atomic_notifier_chain_unregister(¬ify_page_fault_chain, nb);
}
+EXPORT_SYMBOL_GPL(unregister_page_fault_notifier);
static inline int notify_page_fault(enum die_val val, const char *str,
struct pt_regs *regs, long err, int trap, int sig)
@@ -67,13 +68,6 @@ static inline int notify_page_fault(enum
};
return atomic_notifier_call_chain(¬ify_page_fault_chain, val, &args);
}
-#else
-static inline int notify_page_fault(enum die_val val, const char *str,
- struct pt_regs *regs, long err, int trap, int sig)
-{
- return NOTIFY_DONE;
-}
-#endif
void bust_spinlocks(int yes)
{
^ permalink raw reply [flat|nested] 199+ messages in thread* Re: [PATCH for review] [123/145] i386: make fault notifier unconditional and export it
2006-08-10 19:37 ` [PATCH for review] [123/145] i386: make fault notifier unconditional and export it Andi Kleen
@ 2006-08-13 15:28 ` Adrian Bunk
2006-08-13 17:11 ` Alan Cox
2006-08-13 20:17 ` Andi Kleen
0 siblings, 2 replies; 199+ messages in thread
From: Adrian Bunk @ 2006-08-13 15:28 UTC (permalink / raw)
To: Andi Kleen; +Cc: linux-kernel, Andrew Morton
> It's needed for external debuggers and overhead is very small.
>...
We are currently trying to remove exports not used by any in-kernel
code.
The patch description also lacks the name of what you call "external
debuggers" (I assume the exports are not for a theoretical usage but for
an already existing debugger?). There is no reason for keeping a patch
description small.
Especially nowadays where people demand deprecation periods for removing
exports without any in-kernel users there must be a _very_ good
justification when adding such exports.
This is true for both the i386 and the x86_64 patches.
cu
Adrian
BTW1: The subject of this email is wrong (it's the x86_64 patch).
BTW2: Please use a valid To: header.
--
Gentoo kernels are 42 times more popular than SUSE kernels among
KLive users (a service by SUSE contractor Andrea Arcangeli that
gathers data about kernels from many users worldwide).
There are three kinds of lies: Lies, Damn Lies, and Statistics.
Benjamin Disraeli
^ permalink raw reply [flat|nested] 199+ messages in thread* Re: [PATCH for review] [123/145] i386: make fault notifier unconditional and export it
2006-08-13 15:28 ` Adrian Bunk
@ 2006-08-13 17:11 ` Alan Cox
2006-08-13 17:08 ` Adrian Bunk
2006-08-13 20:17 ` Andi Kleen
1 sibling, 1 reply; 199+ messages in thread
From: Alan Cox @ 2006-08-13 17:11 UTC (permalink / raw)
To: Adrian Bunk; +Cc: Andi Kleen, linux-kernel, Andrew Morton
Ar Sul, 2006-08-13 am 17:28 +0200, ysgrifennodd Adrian Bunk:
> > It's needed for external debuggers and overhead is very small.
> >...
>
> We are currently trying to remove exports not used by any in-kernel
> code.
Wrong pronoun. I think you meant to type "You".
^ permalink raw reply [flat|nested] 199+ messages in thread
* Re: [PATCH for review] [123/145] i386: make fault notifier unconditional and export it
2006-08-13 17:11 ` Alan Cox
@ 2006-08-13 17:08 ` Adrian Bunk
2006-08-13 18:04 ` Alan Cox
0 siblings, 1 reply; 199+ messages in thread
From: Adrian Bunk @ 2006-08-13 17:08 UTC (permalink / raw)
To: Alan Cox; +Cc: Andi Kleen, linux-kernel, Andrew Morton
On Sun, Aug 13, 2006 at 06:11:45PM +0100, Alan Cox wrote:
> Ar Sul, 2006-08-13 am 17:28 +0200, ysgrifennodd Adrian Bunk:
> > > It's needed for external debuggers and overhead is very small.
> > >...
> >
> > We are currently trying to remove exports not used by any in-kernel
> > code.
>
> Wrong pronoun. I think you meant to type "You".
"You are currently trying to remove exports..."?
Wouldn't this sound as if Andi was doing this?
I thought the "We" was correct since it's at least Arjan and me.
If this was wrong all I can say is that I'm not a native English
speaker.
cu
Adrian
--
Gentoo kernels are 42 times more popular than SUSE kernels among
KLive users (a service by SUSE contractor Andrea Arcangeli that
gathers data about kernels from many users worldwide).
There are three kinds of lies: Lies, Damn Lies, and Statistics.
Benjamin Disraeli
^ permalink raw reply [flat|nested] 199+ messages in thread* Re: [PATCH for review] [123/145] i386: make fault notifier unconditional and export it
2006-08-13 17:08 ` Adrian Bunk
@ 2006-08-13 18:04 ` Alan Cox
0 siblings, 0 replies; 199+ messages in thread
From: Alan Cox @ 2006-08-13 18:04 UTC (permalink / raw)
To: Adrian Bunk; +Cc: Andi Kleen, linux-kernel, Andrew Morton
Ar Sul, 2006-08-13 am 19:08 +0200, ysgrifennodd Adrian Bunk:
> > Wrong pronoun. I think you meant to type "You".
>
> "You are currently trying to remove exports..."?
> Wouldn't this sound as if Andi was doing this?
>
> I thought the "We" was correct since it's at least Arjan and me.
>
> If this was wrong all I can say is that I'm not a native English
> speaker.
"We" tends to imply 'I speak for all of us'. I was pointing out that you
don't speak for everyone. The humour in making that point didn't
translate and apparently I ended up confusing you as well which wasn't
the intent.
The joys of language.
To put it more clearly "Not everyone agrees with the remove exports"
approach, at least not the "all unused" part.
Alan
^ permalink raw reply [flat|nested] 199+ messages in thread
* Re: [PATCH for review] [123/145] i386: make fault notifier unconditional and export it
2006-08-13 15:28 ` Adrian Bunk
2006-08-13 17:11 ` Alan Cox
@ 2006-08-13 20:17 ` Andi Kleen
2006-08-14 0:03 ` Keith Owens
1 sibling, 1 reply; 199+ messages in thread
From: Andi Kleen @ 2006-08-13 20:17 UTC (permalink / raw)
To: Adrian Bunk; +Cc: linux-kernel, Andrew Morton
On Sunday 13 August 2006 17:28, Adrian Bunk wrote:
> > It's needed for external debuggers and overhead is very small.
> >...
>
> We are currently trying to remove exports not used by any in-kernel
> code.
That ``we'' doesn't include me at least.
>
> The patch description also lacks the name of what you call "external
> debuggers" (I assume the exports are not for a theoretical usage but for
> an already existing debugger?).
The fault chain is needed for pretty much any debugger, including
kgdb, kdb, nlkd. The one in this case was NLKD.
> Especially nowadays where people demand deprecation periods for removing
> exports without any in-kernel users there must be a _very_ good
> justification when adding such exports.
I've always exported symbols when people can make a reasonable case that they
need it for extern free non broken by design code.
On the other hand I have no problems with removing unused exports
that don't have such a case or are clearly not a useful external
interface.
> BTW1: The subject of this email is wrong (it's the x86_64 patch).
Fixed, thanks
-Andi
^ permalink raw reply [flat|nested] 199+ messages in thread
* Re: [PATCH for review] [123/145] i386: make fault notifier unconditional and export it
2006-08-13 20:17 ` Andi Kleen
@ 2006-08-14 0:03 ` Keith Owens
0 siblings, 0 replies; 199+ messages in thread
From: Keith Owens @ 2006-08-14 0:03 UTC (permalink / raw)
To: Andi Kleen; +Cc: Adrian Bunk, linux-kernel, Andrew Morton
Andi Kleen (on Sun, 13 Aug 2006 22:17:48 +0200) wrote:
>On Sunday 13 August 2006 17:28, Adrian Bunk wrote:
>> > It's needed for external debuggers and overhead is very small.
>> >...
>>
>> We are currently trying to remove exports not used by any in-kernel
>> code.
>
>That ``we'' doesn't include me at least.
>
>>
>> The patch description also lacks the name of what you call "external
>> debuggers" (I assume the exports are not for a theoretical usage but for
>> an already existing debugger?).
>
>The fault chain is needed for pretty much any debugger, including
>kgdb, kdb, nlkd. The one in this case was NLKD.
No. The page fault event was only used by kprobes, but it slowed down
all code. That is why it was changed to only be present if kprobes was
in the system. kdb and AFAIK kgdb do not need the page fault chain.
nlkd might need it, but that does not seem to have been explicitly
stated.
^ permalink raw reply [flat|nested] 199+ messages in thread
* [PATCH for review] [124/145] i386: make fault notifier unconditional and export it
[not found] <20060810 935.775038000@suse.de>
` (122 preceding siblings ...)
2006-08-10 19:37 ` [PATCH for review] [123/145] i386: make fault notifier unconditional and export it Andi Kleen
@ 2006-08-10 19:37 ` Andi Kleen
2006-08-10 19:37 ` [PATCH for review] [125/145] i376: Make acpi_force static Andi Kleen
` (21 subsequent siblings)
145 siblings, 0 replies; 199+ messages in thread
From: Andi Kleen @ 2006-08-10 19:37 UTC (permalink / raw)
r
It's needed for external debuggers and overhead is very small.
Also make the actual notifier chain they use static
Cc: jbeulich@novell.com
Signed-off-by: Andi Kleen <ak@suse.de>
---
arch/i386/mm/fault.c | 14 ++++----------
1 files changed, 4 insertions(+), 10 deletions(-)
Index: linux/arch/i386/mm/fault.c
===================================================================
--- linux.orig/arch/i386/mm/fault.c
+++ linux/arch/i386/mm/fault.c
@@ -30,18 +30,20 @@
extern void die(const char *,struct pt_regs *,long);
-#ifdef CONFIG_KPROBES
-ATOMIC_NOTIFIER_HEAD(notify_page_fault_chain);
+static ATOMIC_NOTIFIER_HEAD(notify_page_fault_chain);
+
int register_page_fault_notifier(struct notifier_block *nb)
{
vmalloc_sync_all();
return atomic_notifier_chain_register(¬ify_page_fault_chain, nb);
}
+EXPORT_SYMBOL_GPL(register_page_fault_notifier);
int unregister_page_fault_notifier(struct notifier_block *nb)
{
return atomic_notifier_chain_unregister(¬ify_page_fault_chain, nb);
}
+EXPORT_SYMBOL_GPL(unregister_page_fault_notifier);
static inline int notify_page_fault(enum die_val val, const char *str,
struct pt_regs *regs, long err, int trap, int sig)
@@ -55,14 +57,6 @@ static inline int notify_page_fault(enum
};
return atomic_notifier_call_chain(¬ify_page_fault_chain, val, &args);
}
-#else
-static inline int notify_page_fault(enum die_val val, const char *str,
- struct pt_regs *regs, long err, int trap, int sig)
-{
- return NOTIFY_DONE;
-}
-#endif
-
/*
* Unlock any spinlocks which will prevent us from getting the
^ permalink raw reply [flat|nested] 199+ messages in thread* [PATCH for review] [125/145] i376: Make acpi_force static
[not found] <20060810 935.775038000@suse.de>
` (123 preceding siblings ...)
2006-08-10 19:37 ` [PATCH for review] [124/145] " Andi Kleen
@ 2006-08-10 19:37 ` Andi Kleen
2006-08-16 15:47 ` Len Brown
2006-08-10 19:37 ` [PATCH for review] [126/145] i386: Make enable_local_apic static Andi Kleen
` (20 subsequent siblings)
145 siblings, 1 reply; 199+ messages in thread
From: Andi Kleen @ 2006-08-10 19:37 UTC (permalink / raw)
r
From: Adrian Bunk <bunk@stusta.de>
acpi_force can become static.
Cc: len.brown@intel.com
Signed-off-by: Adrian Bunk <bunk@stusta.de>
Signed-off-by: Andi Kleen <ak@suse.de>
---
arch/i386/kernel/acpi/boot.c | 2 +-
1 files changed, 1 insertion(+), 1 deletion(-)
Index: linux/arch/i386/kernel/acpi/boot.c
===================================================================
--- linux.orig/arch/i386/kernel/acpi/boot.c
+++ linux/arch/i386/kernel/acpi/boot.c
@@ -36,7 +36,7 @@
#include <asm/io.h>
#include <asm/mpspec.h>
-int __initdata acpi_force = 0;
+static int __initdata acpi_force = 0;
#ifdef CONFIG_ACPI
int acpi_disabled = 0;
^ permalink raw reply [flat|nested] 199+ messages in thread* Re: [PATCH for review] [125/145] i376: Make acpi_force static
2006-08-10 19:37 ` [PATCH for review] [125/145] i376: Make acpi_force static Andi Kleen
@ 2006-08-16 15:47 ` Len Brown
2006-08-16 16:07 ` Andi Kleen
0 siblings, 1 reply; 199+ messages in thread
From: Len Brown @ 2006-08-16 15:47 UTC (permalink / raw)
To: Andi Kleen; +Cc: linux-kernel
On Thursday 10 August 2006 15:37, Andi Kleen wrote:
Ack -- assuming there was a previous patch I didn't see to move the definition of acpi_force
to boot.c from setup.c on i386...
-Len
>
> From: Adrian Bunk <bunk@stusta.de>
>
> acpi_force can become static.
>
> Cc: len.brown@intel.com
>
> Signed-off-by: Adrian Bunk <bunk@stusta.de>
> Signed-off-by: Andi Kleen <ak@suse.de>
>
> ---
> arch/i386/kernel/acpi/boot.c | 2 +-
> 1 files changed, 1 insertion(+), 1 deletion(-)
>
> Index: linux/arch/i386/kernel/acpi/boot.c
> ===================================================================
> --- linux.orig/arch/i386/kernel/acpi/boot.c
> +++ linux/arch/i386/kernel/acpi/boot.c
> @@ -36,7 +36,7 @@
> #include <asm/io.h>
> #include <asm/mpspec.h>
>
> -int __initdata acpi_force = 0;
> +static int __initdata acpi_force = 0;
>
> #ifdef CONFIG_ACPI
> int acpi_disabled = 0;
> -
> To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at http://vger.kernel.org/majordomo-info.html
> Please read the FAQ at http://www.tux.org/lkml/
>
^ permalink raw reply [flat|nested] 199+ messages in thread
* Re: [PATCH for review] [125/145] i376: Make acpi_force static
2006-08-16 15:47 ` Len Brown
@ 2006-08-16 16:07 ` Andi Kleen
0 siblings, 0 replies; 199+ messages in thread
From: Andi Kleen @ 2006-08-16 16:07 UTC (permalink / raw)
To: Len Brown; +Cc: Len Brown, linux-kernel
On Wed, 16 Aug 2006 11:47:15 -0400
Len Brown <len.brown@intel.com> wrote:
> On Thursday 10 August 2006 15:37, Andi Kleen wrote:
>
> Ack -- assuming there was a previous patch I didn't see to move the definition of acpi_force
> to boot.c from setup.c on i386...
Yes there was. The complete early parameter parsing got changed to early_param()
and the acpi parsing is now in boot.c only.
-andi
^ permalink raw reply [flat|nested] 199+ messages in thread
* [PATCH for review] [126/145] i386: Make enable_local_apic static
[not found] <20060810 935.775038000@suse.de>
` (124 preceding siblings ...)
2006-08-10 19:37 ` [PATCH for review] [125/145] i376: Make acpi_force static Andi Kleen
@ 2006-08-10 19:37 ` Andi Kleen
2006-08-10 19:37 ` [PATCH for review] [127/145] i386: move kernel_thread_helper into entry.S Andi Kleen
` (19 subsequent siblings)
145 siblings, 0 replies; 199+ messages in thread
From: Andi Kleen @ 2006-08-10 19:37 UTC (permalink / raw)
r
From: Adrian Bunk <bunk@stusta.de>
enable_local_apic can now become static.
Cc: len.brown@intel.com
Signed-off-by: Adrian Bunk <bunk@stusta.de>
Signed-off-by: Andi Kleen <ak@suse.de>
---
arch/i386/kernel/apic.c | 13 ++++++++++++-
include/asm-i386/apic.h | 12 ------------
2 files changed, 12 insertions(+), 13 deletions(-)
Index: linux/include/asm-i386/apic.h
===================================================================
--- linux.orig/include/asm-i386/apic.h
+++ linux/include/asm-i386/apic.h
@@ -16,20 +16,8 @@
#define APIC_VERBOSE 1
#define APIC_DEBUG 2
-extern int enable_local_apic;
extern int apic_verbosity;
-static inline void lapic_disable(void)
-{
- enable_local_apic = -1;
- clear_bit(X86_FEATURE_APIC, boot_cpu_data.x86_capability);
-}
-
-static inline void lapic_enable(void)
-{
- enable_local_apic = 1;
-}
-
/*
* Define the default level of output to be very little
* This can be turned up by using apic=verbose for more
Index: linux/arch/i386/kernel/apic.c
===================================================================
--- linux.orig/arch/i386/kernel/apic.c
+++ linux/arch/i386/kernel/apic.c
@@ -52,7 +52,18 @@ static cpumask_t timer_bcast_ipi;
/*
* Knob to control our willingness to enable the local APIC.
*/
-int enable_local_apic __initdata = 0; /* -1=force-disable, +1=force-enable */
+static int enable_local_apic __initdata = 0; /* -1=force-disable, +1=force-enable */
+
+static inline void lapic_disable(void)
+{
+ enable_local_apic = -1;
+ clear_bit(X86_FEATURE_APIC, boot_cpu_data.x86_capability);
+}
+
+static inline void lapic_enable(void)
+{
+ enable_local_apic = 1;
+}
/*
* Debug level
^ permalink raw reply [flat|nested] 199+ messages in thread* [PATCH for review] [127/145] i386: move kernel_thread_helper into entry.S
[not found] <20060810 935.775038000@suse.de>
` (125 preceding siblings ...)
2006-08-10 19:37 ` [PATCH for review] [126/145] i386: Make enable_local_apic static Andi Kleen
@ 2006-08-10 19:37 ` Andi Kleen
2006-08-11 8:33 ` Jan Beulich
2006-08-10 19:37 ` [PATCH for review] [128/145] i386: Descriptor and trap table cleanups Andi Kleen
` (18 subsequent siblings)
145 siblings, 1 reply; 199+ messages in thread
From: Andi Kleen @ 2006-08-10 19:37 UTC (permalink / raw)
r
And add proper CFI annotation to it which was previously
impossible. This prevents "stuck" messages by the dwarf2 unwinder
when reaching the top of a kernel stack.
Cc: jbeulich@novell.com
Signed-off-by: Andi Kleen <ak@suse.de>
---
arch/i386/kernel/entry.S | 15 +++++++++++++++
arch/i386/kernel/process.c | 9 ---------
2 files changed, 15 insertions(+), 9 deletions(-)
Index: linux/arch/i386/kernel/entry.S
===================================================================
--- linux.orig/arch/i386/kernel/entry.S
+++ linux/arch/i386/kernel/entry.S
@@ -946,6 +946,21 @@ ENTRY(arch_unwind_init_running)
ENDPROC(arch_unwind_init_running)
#endif
+ENTRY(kernel_thread_helper)
+ CFI_STARTPROC
+ movl %edx,%eax
+ CFI_REGISTER edx,eax
+ push %edx
+ CFI_ADJUST_CFA_OFFSET 4
+ CFI_REL_OFFSET edx,0
+ call *%ebx
+ push %eax
+ CFI_ADJUST_CFA_OFFSET 4
+ CFI_REL_OFFSET eax,0
+ call do_exit
+ CFI_ENDPROC
+ENDPROC(kernel_thread_helper)
+
.section .rodata,"a"
#include "syscall_table.S"
Index: linux/arch/i386/kernel/process.c
===================================================================
--- linux.orig/arch/i386/kernel/process.c
+++ linux/arch/i386/kernel/process.c
@@ -321,15 +321,6 @@ void show_regs(struct pt_regs * regs)
* the "args".
*/
extern void kernel_thread_helper(void);
-__asm__(".section .text\n"
- ".align 4\n"
- "kernel_thread_helper:\n\t"
- "movl %edx,%eax\n\t"
- "pushl %edx\n\t"
- "call *%ebx\n\t"
- "pushl %eax\n\t"
- "call do_exit\n"
- ".previous");
/*
* Create a kernel thread
^ permalink raw reply [flat|nested] 199+ messages in thread* Re: [PATCH for review] [127/145] i386: move kernel_thread_helper into entry.S
2006-08-10 19:37 ` [PATCH for review] [127/145] i386: move kernel_thread_helper into entry.S Andi Kleen
@ 2006-08-11 8:33 ` Jan Beulich
2006-08-11 8:38 ` Andi Kleen
0 siblings, 1 reply; 199+ messages in thread
From: Jan Beulich @ 2006-08-11 8:33 UTC (permalink / raw)
To: ak; +Cc: linux-kernel
>And add proper CFI annotation to it which was previously
>impossible. This prevents "stuck" messages by the dwarf2 unwinder
>when reaching the top of a kernel stack.
>+ENTRY(kernel_thread_helper)
>+ CFI_STARTPROC
>+ movl %edx,%eax
>+ CFI_REGISTER edx,eax
This is pointless, as %eax will be clobbered by the callee of the
subsequent call.
>+ push %edx
>+ CFI_ADJUST_CFA_OFFSET 4
>+ CFI_REL_OFFSET edx,0
This likewise is pointless, as the argument is owned by the callee.
>+ call *%ebx
>+ push %eax
>+ CFI_ADJUST_CFA_OFFSET 4
>+ CFI_REL_OFFSET eax,0
And this too - the value now in %eax has no relation with the
value known by the caller of this routine (which doesn't expect
any return from here anyway).
>+ call do_exit
>+ CFI_ENDPROC
>+ENDPROC(kernel_thread_helper)
Jan
^ permalink raw reply [flat|nested] 199+ messages in thread
* Re: [PATCH for review] [127/145] i386: move kernel_thread_helper into entry.S
2006-08-11 8:33 ` Jan Beulich
@ 2006-08-11 8:38 ` Andi Kleen
2006-08-11 9:48 ` Jan Beulich
0 siblings, 1 reply; 199+ messages in thread
From: Andi Kleen @ 2006-08-11 8:38 UTC (permalink / raw)
To: Jan Beulich; +Cc: linux-kernel
> And this too - the value now in %eax has no relation with the
> value known by the caller of this routine (which doesn't expect
> any return from here anyway).
Ok, but somehow it needs to be annotiated so that the unwinder stops
and doesn't fall back. Can you please send a replacement patch that
does this correctly?
Thanks,
-Andi
^ permalink raw reply [flat|nested] 199+ messages in thread
* Re: [PATCH for review] [127/145] i386: move kernel_thread_helper into entry.S
2006-08-11 8:38 ` Andi Kleen
@ 2006-08-11 9:48 ` Jan Beulich
2006-08-11 10:16 ` Andi Kleen
0 siblings, 1 reply; 199+ messages in thread
From: Jan Beulich @ 2006-08-11 9:48 UTC (permalink / raw)
To: Andi Kleen; +Cc: linux-kernel
>> And this too - the value now in %eax has no relation with the
>> value known by the caller of this routine (which doesn't expect
>> any return from here anyway).
>
>Ok, but somehow it needs to be annotiated so that the unwinder stops
>and doesn't fall back. Can you please send a replacement patch that
>does this correctly?
I would do it this way (untested):
ENTRY(kernel_thread_helper)
CFI_STARTPROC
movl %edx,%eax
pushl %edx
CFI_ADJUST_CFA_OFFSET 4
call *%ebx
pushl %eax
CFI_ADJUST_CFA_OFFSET 4
call do_exit
CFI_ENDPROC
ENDPROC(kernel_thread_helper)
(i.e. tracking the stack pointer movement, but not the register values
other than the return address)
Jan
^ permalink raw reply [flat|nested] 199+ messages in thread
* Re: [PATCH for review] [127/145] i386: move kernel_thread_helper into entry.S
2006-08-11 9:48 ` Jan Beulich
@ 2006-08-11 10:16 ` Andi Kleen
0 siblings, 0 replies; 199+ messages in thread
From: Andi Kleen @ 2006-08-11 10:16 UTC (permalink / raw)
To: Jan Beulich; +Cc: linux-kernel
> ENTRY(kernel_thread_helper)
> CFI_STARTPROC
> movl %edx,%eax
> pushl %edx
> CFI_ADJUST_CFA_OFFSET 4
> call *%ebx
> pushl %eax
> CFI_ADJUST_CFA_OFFSET 4
> call do_exit
> CFI_ENDPROC
> ENDPROC(kernel_thread_helper)
>
> (i.e. tracking the stack pointer movement, but not the register values
> other than the return address)
Done thanks.
-Andi
^ permalink raw reply [flat|nested] 199+ messages in thread
* [PATCH for review] [128/145] i386: Descriptor and trap table cleanups.
[not found] <20060810 935.775038000@suse.de>
` (126 preceding siblings ...)
2006-08-10 19:37 ` [PATCH for review] [127/145] i386: move kernel_thread_helper into entry.S Andi Kleen
@ 2006-08-10 19:37 ` Andi Kleen
2006-08-10 19:37 ` [PATCH for review] [129/145] x86_64: Auto size the per cpu area Andi Kleen
` (17 subsequent siblings)
145 siblings, 0 replies; 199+ messages in thread
From: Andi Kleen @ 2006-08-10 19:37 UTC (permalink / raw)
r
From: Rusty Russell <rusty@rustcorp.com.au>
The implementation comes from Zach's [RFC, PATCH 10/24] i386 Vmi
descriptor changes:
Descriptor and trap table cleanups. Add cleanly written accessors for
IDT and GDT gates so the subarch may override them. Note that this
allows the hypervisor to transparently tweak the DPL of the descriptors
as well as the RPL of segments in those descriptors, with no unnecessary
kernel code modification. It also allows the hypervisor implementation
of the VMI to tweak the gates, allowing for custom exception frames or
extra layers of indirection above the guest fault / IRQ handlers.
Signed-off-by: Zachary Amsden <zach@vmware.com>
Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
Signed-off-by: Andi Kleen <ak@suse.de>
---
arch/i386/kernel/traps.c | 24 +--------
include/asm-i386/desc.h | 119 +++++++++++++++++++++++++++++------------------
2 files changed, 80 insertions(+), 63 deletions(-)
Index: linux/arch/i386/kernel/traps.c
===================================================================
--- linux.orig/arch/i386/kernel/traps.c
+++ linux/arch/i386/kernel/traps.c
@@ -1156,20 +1156,6 @@ void __init trap_init_f00f_bug(void)
}
#endif
-#define _set_gate(gate_addr,type,dpl,addr,seg) \
-do { \
- int __d0, __d1; \
- __asm__ __volatile__ ("movw %%dx,%%ax\n\t" \
- "movw %4,%%dx\n\t" \
- "movl %%eax,%0\n\t" \
- "movl %%edx,%1" \
- :"=m" (*((long *) (gate_addr))), \
- "=m" (*(1+(long *) (gate_addr))), "=&a" (__d0), "=&d" (__d1) \
- :"i" ((short) (0x8000+(dpl<<13)+(type<<8))), \
- "3" ((char *) (addr)),"2" ((seg) << 16)); \
-} while (0)
-
-
/*
* This needs to use 'idt_table' rather than 'idt', and
* thus use the _nonmapped_ version of the IDT, as the
@@ -1178,7 +1164,7 @@ do { \
*/
void set_intr_gate(unsigned int n, void *addr)
{
- _set_gate(idt_table+n,14,0,addr,__KERNEL_CS);
+ _set_gate(n, DESCTYPE_INT, addr, __KERNEL_CS);
}
/*
@@ -1186,22 +1172,22 @@ void set_intr_gate(unsigned int n, void
*/
static inline void set_system_intr_gate(unsigned int n, void *addr)
{
- _set_gate(idt_table+n, 14, 3, addr, __KERNEL_CS);
+ _set_gate(n, DESCTYPE_INT | DESCTYPE_DPL3, addr, __KERNEL_CS);
}
static void __init set_trap_gate(unsigned int n, void *addr)
{
- _set_gate(idt_table+n,15,0,addr,__KERNEL_CS);
+ _set_gate(n, DESCTYPE_TRAP, addr, __KERNEL_CS);
}
static void __init set_system_gate(unsigned int n, void *addr)
{
- _set_gate(idt_table+n,15,3,addr,__KERNEL_CS);
+ _set_gate(n, DESCTYPE_TRAP | DESCTYPE_DPL3, addr, __KERNEL_CS);
}
static void __init set_task_gate(unsigned int n, unsigned int gdt_entry)
{
- _set_gate(idt_table+n,5,0,0,(gdt_entry<<3));
+ _set_gate(n, DESCTYPE_TASK, (void *)0, (gdt_entry<<3));
}
Index: linux/include/asm-i386/desc.h
===================================================================
--- linux.orig/include/asm-i386/desc.h
+++ linux/include/asm-i386/desc.h
@@ -33,50 +33,99 @@ static inline struct desc_struct *get_cp
return (struct desc_struct *)per_cpu(cpu_gdt_descr, cpu).address;
}
+/*
+ * This is the ldt that every process will get unless we need
+ * something other than this.
+ */
+extern struct desc_struct default_ldt[];
+extern struct desc_struct idt_table[];
+extern void set_intr_gate(unsigned int irq, void * addr);
+
+static inline void pack_descriptor(__u32 *a, __u32 *b,
+ unsigned long base, unsigned long limit, unsigned char type, unsigned char flags)
+{
+ *a = ((base & 0xffff) << 16) | (limit & 0xffff);
+ *b = (base & 0xff000000) | ((base & 0xff0000) >> 16) |
+ ((type & 0xff) << 8) | ((flags & 0xf) << 12);
+}
+
+static inline void pack_gate(__u32 *a, __u32 *b,
+ unsigned long base, unsigned short seg, unsigned char type, unsigned char flags)
+{
+ *a = (seg << 16) | (base & 0xffff);
+ *b = (base & 0xffff0000) | ((type & 0xff) << 8) | (flags & 0xff);
+}
+
+#define DESCTYPE_LDT 0x82 /* present, system, DPL-0, LDT */
+#define DESCTYPE_TSS 0x89 /* present, system, DPL-0, 32-bit TSS */
+#define DESCTYPE_TASK 0x85 /* present, system, DPL-0, task gate */
+#define DESCTYPE_INT 0x8e /* present, system, DPL-0, interrupt gate */
+#define DESCTYPE_TRAP 0x8f /* present, system, DPL-0, trap gate */
+#define DESCTYPE_DPL3 0x60 /* DPL-3 */
+#define DESCTYPE_S 0x10 /* !system */
+
#define load_TR_desc() __asm__ __volatile__("ltr %w0"::"q" (GDT_ENTRY_TSS*8))
#define load_LDT_desc() __asm__ __volatile__("lldt %w0"::"q" (GDT_ENTRY_LDT*8))
#define load_gdt(dtr) __asm__ __volatile("lgdt %0"::"m" (*dtr))
#define load_idt(dtr) __asm__ __volatile("lidt %0"::"m" (*dtr))
-#define load_tr(tr) __asm__ __volatile("ltr %0"::"mr" (tr))
-#define load_ldt(ldt) __asm__ __volatile("lldt %0"::"mr" (ldt))
+#define load_tr(tr) __asm__ __volatile("ltr %0"::"m" (tr))
+#define load_ldt(ldt) __asm__ __volatile("lldt %0"::"m" (ldt))
#define store_gdt(dtr) __asm__ ("sgdt %0":"=m" (*dtr))
#define store_idt(dtr) __asm__ ("sidt %0":"=m" (*dtr))
-#define store_tr(tr) __asm__ ("str %0":"=mr" (tr))
-#define store_ldt(ldt) __asm__ ("sldt %0":"=mr" (ldt))
+#define store_tr(tr) __asm__ ("str %0":"=m" (tr))
+#define store_ldt(ldt) __asm__ ("sldt %0":"=m" (ldt))
-/*
- * This is the ldt that every process will get unless we need
- * something other than this.
- */
-extern struct desc_struct default_ldt[];
-extern void set_intr_gate(unsigned int irq, void * addr);
+#if TLS_SIZE != 24
+# error update this code.
+#endif
+
+static inline void load_TLS(struct thread_struct *t, unsigned int cpu)
+{
+#define C(i) get_cpu_gdt_table(cpu)[GDT_ENTRY_TLS_MIN + i] = t->tls_array[i]
+ C(0); C(1); C(2);
+#undef C
+}
+
+static inline void write_dt_entry(void *dt, int entry, __u32 entry_a, __u32 entry_b)
+{
+ __u32 *lp = (__u32 *)((char *)dt + entry*8);
+ *lp = entry_a;
+ *(lp+1) = entry_b;
+}
-#define _set_tssldt_desc(n,addr,limit,type) \
-__asm__ __volatile__ ("movw %w3,0(%2)\n\t" \
- "movw %w1,2(%2)\n\t" \
- "rorl $16,%1\n\t" \
- "movb %b1,4(%2)\n\t" \
- "movb %4,5(%2)\n\t" \
- "movb $0,6(%2)\n\t" \
- "movb %h1,7(%2)\n\t" \
- "rorl $16,%1" \
- : "=m"(*(n)) : "q" (addr), "r"(n), "ir"(limit), "i"(type))
+#define write_ldt_entry(dt, entry, a, b) write_dt_entry(dt, entry, a, b)
+#define write_gdt_entry(dt, entry, a, b) write_dt_entry(dt, entry, a, b)
+#define write_idt_entry(dt, entry, a, b) write_dt_entry(dt, entry, a, b)
-static inline void __set_tss_desc(unsigned int cpu, unsigned int entry, void *addr)
+static inline void _set_gate(int gate, unsigned int type, void *addr, unsigned short seg)
{
- _set_tssldt_desc(&get_cpu_gdt_table(cpu)[entry], (int)addr,
- offsetof(struct tss_struct, __cacheline_filler) - 1, 0x89);
+ __u32 a, b;
+ pack_gate(&a, &b, (unsigned long)addr, seg, type, 0);
+ write_idt_entry(idt_table, gate, a, b);
}
-#define set_tss_desc(cpu,addr) __set_tss_desc(cpu, GDT_ENTRY_TSS, addr)
+static inline void __set_tss_desc(unsigned int cpu, unsigned int entry, const void *addr)
+{
+ __u32 a, b;
+ pack_descriptor(&a, &b, (unsigned long)addr,
+ offsetof(struct tss_struct, __cacheline_filler) - 1,
+ DESCTYPE_TSS, 0);
+ write_gdt_entry(get_cpu_gdt_table(cpu), entry, a, b);
+}
-static inline void set_ldt_desc(unsigned int cpu, void *addr, unsigned int size)
+static inline void set_ldt_desc(unsigned int cpu, void *addr, unsigned int entries)
{
- _set_tssldt_desc(&get_cpu_gdt_table(cpu)[GDT_ENTRY_LDT], (int)addr, ((size << 3)-1), 0x82);
+ __u32 a, b;
+ pack_descriptor(&a, &b, (unsigned long)addr,
+ entries * sizeof(struct desc_struct) - 1,
+ DESCTYPE_LDT, 0);
+ write_gdt_entry(get_cpu_gdt_table(cpu), GDT_ENTRY_LDT, a, b);
}
+#define set_tss_desc(cpu,addr) __set_tss_desc(cpu, GDT_ENTRY_TSS, addr)
+
#define LDT_entry_a(info) \
((((info)->base_addr & 0x0000ffff) << 16) | ((info)->limit & 0x0ffff))
@@ -102,24 +151,6 @@ static inline void set_ldt_desc(unsigned
(info)->seg_not_present == 1 && \
(info)->useable == 0 )
-static inline void write_ldt_entry(void *ldt, int entry, __u32 entry_a, __u32 entry_b)
-{
- __u32 *lp = (__u32 *)((char *)ldt + entry*8);
- *lp = entry_a;
- *(lp+1) = entry_b;
-}
-
-#if TLS_SIZE != 24
-# error update this code.
-#endif
-
-static inline void load_TLS(struct thread_struct *t, unsigned int cpu)
-{
-#define C(i) get_cpu_gdt_table(cpu)[GDT_ENTRY_TLS_MIN + i] = t->tls_array[i]
- C(0); C(1); C(2);
-#undef C
-}
-
static inline void clear_LDT(void)
{
int cpu = get_cpu();
^ permalink raw reply [flat|nested] 199+ messages in thread* [PATCH for review] [129/145] x86_64: Auto size the per cpu area.
[not found] <20060810 935.775038000@suse.de>
` (127 preceding siblings ...)
2006-08-10 19:37 ` [PATCH for review] [128/145] i386: Descriptor and trap table cleanups Andi Kleen
@ 2006-08-10 19:37 ` Andi Kleen
2006-08-10 19:37 ` [PATCH for review] [130/145] i386: clean up topology.c Andi Kleen
` (16 subsequent siblings)
145 siblings, 0 replies; 199+ messages in thread
From: Andi Kleen @ 2006-08-10 19:37 UTC (permalink / raw)
r
From: ebiederm@xmission.com (Eric W. Biederman)
Now for a completely different but trivial approach.
I just boot tested it with 255 CPUS and everything worked.
Currently everything (except module data) we place in
the per cpu area we know about at compile time. So
instead of allocating a fixed size for the per_cpu area
allocate the number of bytes we need plus a fixed constant
for to be used for modules.
It isn't perfect but it is much less of a pain to
work with than what we are doing now.
AK: fixed warning
Signed-off-by: Eric W. Biederman <ebiederm@xmission.com>
Signed-off-by: Andi Kleen <ak@suse.de>
---
arch/x86_64/kernel/setup64.c | 7 ++-----
include/asm-x86_64/percpu.h | 10 ++++++++++
2 files changed, 12 insertions(+), 5 deletions(-)
Index: linux/arch/x86_64/kernel/setup64.c
===================================================================
--- linux.orig/arch/x86_64/kernel/setup64.c
+++ linux/arch/x86_64/kernel/setup64.c
@@ -95,12 +95,9 @@ void __init setup_per_cpu_areas(void)
#endif
/* Copy section for each CPU (we discard the original) */
- size = ALIGN(__per_cpu_end - __per_cpu_start, SMP_CACHE_BYTES);
-#ifdef CONFIG_MODULES
- if (size < PERCPU_ENOUGH_ROOM)
- size = PERCPU_ENOUGH_ROOM;
-#endif
+ size = PERCPU_ENOUGH_ROOM;
+ printk(KERN_INFO "PERCPU: Allocating %lu bytes of per cpu data\n", size);
for_each_cpu_mask (i, cpu_possible_map) {
char *ptr;
Index: linux/include/asm-x86_64/percpu.h
===================================================================
--- linux.orig/include/asm-x86_64/percpu.h
+++ linux/include/asm-x86_64/percpu.h
@@ -11,6 +11,16 @@
#include <asm/pda.h>
+#ifdef CONFIG_MODULES
+# define PERCPU_MODULE_RESERVE 8192
+#else
+# define PERCPU_MODULE_RESERVE 0
+#endif
+
+#define PERCPU_ENOUGH_ROOM \
+ (ALIGN(__per_cpu_end - __per_cpu_start, SMP_CACHE_BYTES) + \
+ PERCPU_MODULE_RESERVE)
+
#define __per_cpu_offset(cpu) (cpu_pda(cpu)->data_offset)
#define __my_cpu_offset() read_pda(data_offset)
^ permalink raw reply [flat|nested] 199+ messages in thread* [PATCH for review] [130/145] i386: clean up topology.c
[not found] <20060810 935.775038000@suse.de>
` (128 preceding siblings ...)
2006-08-10 19:37 ` [PATCH for review] [129/145] x86_64: Auto size the per cpu area Andi Kleen
@ 2006-08-10 19:37 ` Andi Kleen
2006-08-10 19:50 ` Dave Hansen
2006-08-10 19:37 ` [PATCH for review] [131/145] i386: mark two more functions as __init Andi Kleen
` (15 subsequent siblings)
145 siblings, 1 reply; 199+ messages in thread
From: Andi Kleen @ 2006-08-10 19:37 UTC (permalink / raw)
r
From: Magnus Damm <magnus@valinux.co.jp>
There is no need to duplicate the topology_init() function.
Signed-off-by: Magnus Damm <magnus@valinux.co.jp>
Signed-off-by: Andi Kleen <ak@suse.de>
---
arch/i386/kernel/topology.c | 21 +++------------------
1 files changed, 3 insertions(+), 18 deletions(-)
Index: linux/arch/i386/kernel/topology.c
===================================================================
--- linux.orig/arch/i386/kernel/topology.c
+++ linux/arch/i386/kernel/topology.c
@@ -28,6 +28,7 @@
#include <linux/init.h>
#include <linux/smp.h>
#include <linux/nodemask.h>
+#include <linux/mmzone.h>
#include <asm/cpu.h>
static struct i386_cpu cpu_devices[NR_CPUS];
@@ -55,34 +56,18 @@ EXPORT_SYMBOL(arch_register_cpu);
EXPORT_SYMBOL(arch_unregister_cpu);
#endif /*CONFIG_HOTPLUG_CPU*/
-
-
-#ifdef CONFIG_NUMA
-#include <linux/mmzone.h>
-
static int __init topology_init(void)
{
int i;
+#ifdef CONFIG_NUMA
for_each_online_node(i)
register_one_node(i);
+#endif /* CONFIG_NUMA */
for_each_present_cpu(i)
arch_register_cpu(i);
return 0;
}
-#else /* !CONFIG_NUMA */
-
-static int __init topology_init(void)
-{
- int i;
-
- for_each_present_cpu(i)
- arch_register_cpu(i);
- return 0;
-}
-
-#endif /* CONFIG_NUMA */
-
subsys_initcall(topology_init);
^ permalink raw reply [flat|nested] 199+ messages in thread* Re: [PATCH for review] [130/145] i386: clean up topology.c
2006-08-10 19:37 ` [PATCH for review] [130/145] i386: clean up topology.c Andi Kleen
@ 2006-08-10 19:50 ` Dave Hansen
2006-08-10 19:55 ` Andi Kleen
2006-08-11 1:32 ` Magnus Damm
0 siblings, 2 replies; 199+ messages in thread
From: Dave Hansen @ 2006-08-10 19:50 UTC (permalink / raw)
To: Andi Kleen; +Cc: Linux Kernel Mailing List
On Thu, 2006-08-10 at 21:37 +0200, Andi Kleen wrote:
> static int __init topology_init(void)
> {
> int i;
>
> +#ifdef CONFIG_NUMA
> for_each_online_node(i)
> register_one_node(i);
> +#endif /* CONFIG_NUMA */
>
> for_each_present_cpu(i)
> arch_register_cpu(i);
> return 0;
> }
Wouldn't it be more proper here to make register_one_node() have a
non-NUMA definition, instead of putting an #ifdef in a .c file like
this?
-- Dave
^ permalink raw reply [flat|nested] 199+ messages in thread* Re: [PATCH for review] [130/145] i386: clean up topology.c
2006-08-10 19:50 ` Dave Hansen
@ 2006-08-10 19:55 ` Andi Kleen
2006-08-11 1:32 ` Magnus Damm
1 sibling, 0 replies; 199+ messages in thread
From: Andi Kleen @ 2006-08-10 19:55 UTC (permalink / raw)
To: Dave Hansen; +Cc: Linux Kernel Mailing List
On Thursday 10 August 2006 21:50, Dave Hansen wrote:
> On Thu, 2006-08-10 at 21:37 +0200, Andi Kleen wrote:
> > static int __init topology_init(void)
> > {
> > int i;
> >
> > +#ifdef CONFIG_NUMA
> > for_each_online_node(i)
> > register_one_node(i);
> > +#endif /* CONFIG_NUMA */
> >
> > for_each_present_cpu(i)
> > arch_register_cpu(i);
> > return 0;
> > }
>
> Wouldn't it be more proper here to make register_one_node() have a
> non-NUMA definition, instead of putting an #ifdef in a .c file like
> this?
I don't see a particular advantage of that for something simple like this.
But if you feel strongly about it please submit a tested replacement patch.
-Andi
^ permalink raw reply [flat|nested] 199+ messages in thread* Re: [PATCH for review] [130/145] i386: clean up topology.c
2006-08-10 19:50 ` Dave Hansen
2006-08-10 19:55 ` Andi Kleen
@ 2006-08-11 1:32 ` Magnus Damm
1 sibling, 0 replies; 199+ messages in thread
From: Magnus Damm @ 2006-08-11 1:32 UTC (permalink / raw)
To: Dave Hansen; +Cc: Andi Kleen, Linux Kernel Mailing List
On 8/11/06, Dave Hansen <haveblue@us.ibm.com> wrote:
> On Thu, 2006-08-10 at 21:37 +0200, Andi Kleen wrote:
> > static int __init topology_init(void)
> > {
> > int i;
> >
> > +#ifdef CONFIG_NUMA
> > for_each_online_node(i)
> > register_one_node(i);
> > +#endif /* CONFIG_NUMA */
> >
> > for_each_present_cpu(i)
> > arch_register_cpu(i);
> > return 0;
> > }
>
> Wouldn't it be more proper here to make register_one_node() have a
> non-NUMA definition, instead of putting an #ifdef in a .c file like
> this?
I thought about that too, and my reason for not doing it is that this
simple fix would be less straight-forward and probably more subject to
whining and arguing. So my plan was to do this as a first step and
then encourage anyone else that wanted to fix up register_one_node()
properly. =)
Cheers,
/ magnus
^ permalink raw reply [flat|nested] 199+ messages in thread
* [PATCH for review] [131/145] i386: mark two more functions as __init
[not found] <20060810 935.775038000@suse.de>
` (129 preceding siblings ...)
2006-08-10 19:37 ` [PATCH for review] [130/145] i386: clean up topology.c Andi Kleen
@ 2006-08-10 19:37 ` Andi Kleen
2006-08-10 19:37 ` [PATCH for review] [132/145] x86_64: fix bus numbering format in mmconfig warning Andi Kleen
` (14 subsequent siblings)
145 siblings, 0 replies; 199+ messages in thread
From: Andi Kleen @ 2006-08-10 19:37 UTC (permalink / raw)
r
From: Magnus Damm <magnus@valinux.co.jp>
cyrix_identify() should be __init because transmeta_identify() is.
tsc_init() is only called from setup_arch() which is marked as __init.
These two section mismatches have been detected using running modpost on
a vmlinux image compiled with CONFIG_RELOCATABLE=y.
Signed-off-by: Magnus Damm <magnus@valinux.co.jp>
Signed-off-by: Andi Kleen <ak@suse.de>
---
arch/i386/kernel/cpu/cyrix.c | 2 +-
arch/i386/kernel/tsc.c | 2 +-
2 files changed, 2 insertions(+), 2 deletions(-)
Index: linux/arch/i386/kernel/cpu/cyrix.c
===================================================================
--- linux.orig/arch/i386/kernel/cpu/cyrix.c
+++ linux/arch/i386/kernel/cpu/cyrix.c
@@ -394,7 +394,7 @@ static inline int test_cyrix_52div(void)
return (unsigned char) (test >> 8) == 0x02;
}
-static void cyrix_identify(struct cpuinfo_x86 * c)
+static void __init cyrix_identify(struct cpuinfo_x86 * c)
{
/* Detect Cyrix with disabled CPUID */
if ( c->x86 == 4 && test_cyrix_52div() ) {
Index: linux/arch/i386/kernel/tsc.c
===================================================================
--- linux.orig/arch/i386/kernel/tsc.c
+++ linux/arch/i386/kernel/tsc.c
@@ -192,7 +192,7 @@ int recalibrate_cpu_khz(void)
EXPORT_SYMBOL(recalibrate_cpu_khz);
-void tsc_init(void)
+void __init tsc_init(void)
{
if (!cpu_has_tsc || tsc_disable)
return;
^ permalink raw reply [flat|nested] 199+ messages in thread* [PATCH for review] [132/145] x86_64: fix bus numbering format in mmconfig warning
[not found] <20060810 935.775038000@suse.de>
` (130 preceding siblings ...)
2006-08-10 19:37 ` [PATCH for review] [131/145] i386: mark two more functions as __init Andi Kleen
@ 2006-08-10 19:37 ` Andi Kleen
2006-08-10 19:37 ` [PATCH for review] [133/145] x86_64: Support physical cpu hotplug for x86_64 Andi Kleen
` (13 subsequent siblings)
145 siblings, 0 replies; 199+ messages in thread
From: Andi Kleen @ 2006-08-10 19:37 UTC (permalink / raw)
r
From: Brice Goglin <brice@myri.com>
Make an mmconfig warning print the bus id with a regular format.
Signed-off-by: Brice Goglin <brice@myri.com>
Signed-off-by: Andi Kleen <ak@suse.de>
Signed-off-by: Andrew Morton <akpm@osdl.org>
---
arch/x86_64/pci/mmconfig.c | 5 ++---
1 files changed, 2 insertions(+), 3 deletions(-)
Index: linux/arch/x86_64/pci/mmconfig.c
===================================================================
--- linux.orig/arch/x86_64/pci/mmconfig.c
+++ linux/arch/x86_64/pci/mmconfig.c
@@ -156,9 +156,8 @@ static __init void unreachable_devices(v
addr = pci_dev_base(0, k, PCI_DEVFN(i, 0));
if (addr == NULL|| readl(addr) != val1) {
set_bit(i + 32*k, fallback_slots);
- printk(KERN_NOTICE
- "PCI: No mmconfig possible on device %x:%x\n",
- k, i);
+ printk(KERN_NOTICE "PCI: No mmconfig possible"
+ " on device %02x:%02x\n", k, i);
}
}
}
^ permalink raw reply [flat|nested] 199+ messages in thread* [PATCH for review] [133/145] x86_64: Support physical cpu hotplug for x86_64
[not found] <20060810 935.775038000@suse.de>
` (131 preceding siblings ...)
2006-08-10 19:37 ` [PATCH for review] [132/145] x86_64: fix bus numbering format in mmconfig warning Andi Kleen
@ 2006-08-10 19:37 ` Andi Kleen
[not found] ` <20060810222056.GA24184@mail.muni.cz>
2006-08-16 16:02 ` Len Brown
2006-08-10 19:37 ` [PATCH for review] [134/145] x86_64: non lazy "sleazy" fpu implementation Andi Kleen
` (12 subsequent siblings)
145 siblings, 2 replies; 199+ messages in thread
From: Andi Kleen @ 2006-08-10 19:37 UTC (permalink / raw)
r
From: Ashok Raj <ashok.raj@intel.com>
This patch enables ACPI based physical CPU hotplug support for x86_64.
Implements acpi_map_lsapic() and acpi_unmap_lsapic() to support physical cpu
hotplug.
Signed-off-by: Ashok Raj <ashok.raj@intel.com>
Signed-off-by: Andi Kleen <ak@suse.de>
Cc: Andi Kleen <ak@muc.de>
Cc: "Brown, Len" <len.brown@intel.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
---
arch/i386/kernel/acpi/boot.c | 69 ++++++++++++++++++++++++++++++++++++++++---
arch/i386/kernel/mpparse.c | 2 -
include/asm-i386/smp.h | 1
3 files changed, 67 insertions(+), 5 deletions(-)
Index: linux/arch/i386/kernel/acpi/boot.c
===================================================================
--- linux.orig/arch/i386/kernel/acpi/boot.c
+++ linux/arch/i386/kernel/acpi/boot.c
@@ -26,6 +26,7 @@
#include <linux/init.h>
#include <linux/acpi.h>
#include <linux/efi.h>
+#include <linux/cpumask.h>
#include <linux/module.h>
#include <linux/dmi.h>
#include <linux/irq.h>
@@ -512,16 +513,76 @@ EXPORT_SYMBOL(acpi_register_gsi);
#ifdef CONFIG_ACPI_HOTPLUG_CPU
int acpi_map_lsapic(acpi_handle handle, int *pcpu)
{
- /* TBD */
- return -EINVAL;
+ struct acpi_buffer buffer = { ACPI_ALLOCATE_BUFFER, NULL };
+ union acpi_object *obj;
+ struct acpi_table_lapic *lapic;
+ cpumask_t tmp_map, new_map;
+ u8 physid;
+ int cpu;
+
+ if (ACPI_FAILURE(acpi_evaluate_object(handle, "_MAT", NULL, &buffer)))
+ return -EINVAL;
+
+ if (!buffer.length || !buffer.pointer)
+ return -EINVAL;
+
+ obj = buffer.pointer;
+ if (obj->type != ACPI_TYPE_BUFFER ||
+ obj->buffer.length < sizeof(*lapic)) {
+ kfree(buffer.pointer);
+ return -EINVAL;
+ }
+
+ lapic = (struct acpi_table_lapic *)obj->buffer.pointer;
+
+ if ((lapic->header.type != ACPI_MADT_LAPIC) ||
+ (!lapic->flags.enabled)) {
+ kfree(buffer.pointer);
+ return -EINVAL;
+ }
+
+ physid = lapic->id;
+
+ kfree(buffer.pointer);
+ buffer.length = ACPI_ALLOCATE_BUFFER;
+ buffer.pointer = NULL;
+
+ tmp_map = cpu_present_map;
+ mp_register_lapic(physid, lapic->flags.enabled);
+
+ /*
+ * If mp_register_lapic successfully generates a new logical cpu
+ * number, then the following will get us exactly what was mapped
+ */
+ cpus_andnot(new_map, cpu_present_map, tmp_map);
+ if (cpus_empty(new_map)) {
+ printk ("Unable to map lapic to logical cpu number\n");
+ return -EINVAL;
+ }
+
+ cpu = first_cpu(new_map);
+
+ *pcpu = cpu;
+ return 0;
}
EXPORT_SYMBOL(acpi_map_lsapic);
int acpi_unmap_lsapic(int cpu)
{
- /* TBD */
- return -EINVAL;
+ int i;
+
+ for_each_possible_cpu(i) {
+ if (x86_acpiid_to_apicid[i] == x86_cpu_to_apicid[cpu]) {
+ x86_acpiid_to_apicid[i] = -1;
+ break;
+ }
+ }
+ x86_cpu_to_apicid[cpu] = -1;
+ cpu_clear(cpu, cpu_present_map);
+ num_processors--;
+
+ return (0);
}
EXPORT_SYMBOL(acpi_unmap_lsapic);
Index: linux/arch/i386/kernel/mpparse.c
===================================================================
--- linux.orig/arch/i386/kernel/mpparse.c
+++ linux/arch/i386/kernel/mpparse.c
@@ -69,7 +69,7 @@ unsigned int def_to_bigsmp = 0;
/* Processor that is doing the boot up */
unsigned int boot_cpu_physical_apicid = -1U;
/* Internal processor count */
-static unsigned int __devinitdata num_processors;
+unsigned int __cpuinitdata num_processors;
/* Bitmask of physically existing CPUs */
physid_mask_t phys_cpu_present_map;
Index: linux/include/asm-i386/smp.h
===================================================================
--- linux.orig/include/asm-i386/smp.h
+++ linux/include/asm-i386/smp.h
@@ -84,6 +84,7 @@ static inline int hard_smp_processor_id(
extern int __cpu_disable(void);
extern void __cpu_die(unsigned int cpu);
+extern unsigned int num_processors;
#endif /* !__ASSEMBLY__ */
^ permalink raw reply [flat|nested] 199+ messages in thread[parent not found: <20060810222056.GA24184@mail.muni.cz>]
* Re: [PATCH for review] [133/145] x86_64: Support physical cpu hotplug for x86_64
2006-08-10 19:37 ` [PATCH for review] [133/145] x86_64: Support physical cpu hotplug for x86_64 Andi Kleen
[not found] ` <20060810222056.GA24184@mail.muni.cz>
@ 2006-08-16 16:02 ` Len Brown
1 sibling, 0 replies; 199+ messages in thread
From: Len Brown @ 2006-08-16 16:02 UTC (permalink / raw)
To: Andi Kleen, ashok.raj; +Cc: linux-kernel
On Thursday 10 August 2006 15:37, Andi Kleen wrote:
> r
>
> From: Ashok Raj <ashok.raj@intel.com>
>
> This patch enables ACPI based physical CPU hotplug support for x86_64.
> Implements acpi_map_lsapic() and acpi_unmap_lsapic() to support physical cpu
> hotplug.
> Signed-off-by: Ashok Raj <ashok.raj@intel.com>
> Signed-off-by: Andi Kleen <ak@suse.de>
> Cc: Andi Kleen <ak@muc.de>
> Cc: "Brown, Len" <len.brown@intel.com>
> Signed-off-by: Andrew Morton <akpm@osdl.org>
> ---
>
> arch/i386/kernel/acpi/boot.c | 69 ++++++++++++++++++++++++++++++++++++++++---
> arch/i386/kernel/mpparse.c | 2 -
> include/asm-i386/smp.h | 1
> 3 files changed, 67 insertions(+), 5 deletions(-)
>
> Index: linux/arch/i386/kernel/acpi/boot.c
> ===================================================================
> --- linux.orig/arch/i386/kernel/acpi/boot.c
> +++ linux/arch/i386/kernel/acpi/boot.c
> @@ -26,6 +26,7 @@
> #include <linux/init.h>
> #include <linux/acpi.h>
> #include <linux/efi.h>
> +#include <linux/cpumask.h>
> #include <linux/module.h>
> #include <linux/dmi.h>
> #include <linux/irq.h>
> @@ -512,16 +513,76 @@ EXPORT_SYMBOL(acpi_register_gsi);
> #ifdef CONFIG_ACPI_HOTPLUG_CPU
> int acpi_map_lsapic(acpi_handle handle, int *pcpu)
> {
> - /* TBD */
> - return -EINVAL;
> + struct acpi_buffer buffer = { ACPI_ALLOCATE_BUFFER, NULL };
> + union acpi_object *obj;
> + struct acpi_table_lapic *lapic;
Lets _not_ have a routine called acpi_map_lsapic() that on ia64 maps an lsapic
and on i386 and x86_64 maps a lapic.
If you can't share the same routine between 3 architectures, then at least give
it a generic name that applies to both lsapic and lapic.
thanks,
-Len
> + cpumask_t tmp_map, new_map;
> + u8 physid;
> + int cpu;
> +
> + if (ACPI_FAILURE(acpi_evaluate_object(handle, "_MAT", NULL, &buffer)))
> + return -EINVAL;
> +
> + if (!buffer.length || !buffer.pointer)
> + return -EINVAL;
> +
> + obj = buffer.pointer;
> + if (obj->type != ACPI_TYPE_BUFFER ||
> + obj->buffer.length < sizeof(*lapic)) {
> + kfree(buffer.pointer);
> + return -EINVAL;
> + }
> +
> + lapic = (struct acpi_table_lapic *)obj->buffer.pointer;
> +
> + if ((lapic->header.type != ACPI_MADT_LAPIC) ||
> + (!lapic->flags.enabled)) {
> + kfree(buffer.pointer);
> + return -EINVAL;
> + }
> +
> + physid = lapic->id;
> +
> + kfree(buffer.pointer);
> + buffer.length = ACPI_ALLOCATE_BUFFER;
> + buffer.pointer = NULL;
> +
> + tmp_map = cpu_present_map;
> + mp_register_lapic(physid, lapic->flags.enabled);
> +
> + /*
> + * If mp_register_lapic successfully generates a new logical cpu
> + * number, then the following will get us exactly what was mapped
> + */
> + cpus_andnot(new_map, cpu_present_map, tmp_map);
> + if (cpus_empty(new_map)) {
> + printk ("Unable to map lapic to logical cpu number\n");
> + return -EINVAL;
> + }
> +
> + cpu = first_cpu(new_map);
> +
> + *pcpu = cpu;
> + return 0;
> }
>
> EXPORT_SYMBOL(acpi_map_lsapic);
>
> int acpi_unmap_lsapic(int cpu)
> {
> - /* TBD */
> - return -EINVAL;
> + int i;
> +
> + for_each_possible_cpu(i) {
> + if (x86_acpiid_to_apicid[i] == x86_cpu_to_apicid[cpu]) {
> + x86_acpiid_to_apicid[i] = -1;
> + break;
> + }
> + }
> + x86_cpu_to_apicid[cpu] = -1;
> + cpu_clear(cpu, cpu_present_map);
> + num_processors--;
> +
> + return (0);
> }
>
> EXPORT_SYMBOL(acpi_unmap_lsapic);
> Index: linux/arch/i386/kernel/mpparse.c
> ===================================================================
> --- linux.orig/arch/i386/kernel/mpparse.c
> +++ linux/arch/i386/kernel/mpparse.c
> @@ -69,7 +69,7 @@ unsigned int def_to_bigsmp = 0;
> /* Processor that is doing the boot up */
> unsigned int boot_cpu_physical_apicid = -1U;
> /* Internal processor count */
> -static unsigned int __devinitdata num_processors;
> +unsigned int __cpuinitdata num_processors;
>
> /* Bitmask of physically existing CPUs */
> physid_mask_t phys_cpu_present_map;
> Index: linux/include/asm-i386/smp.h
> ===================================================================
> --- linux.orig/include/asm-i386/smp.h
> +++ linux/include/asm-i386/smp.h
> @@ -84,6 +84,7 @@ static inline int hard_smp_processor_id(
>
> extern int __cpu_disable(void);
> extern void __cpu_die(unsigned int cpu);
> +extern unsigned int num_processors;
>
> #endif /* !__ASSEMBLY__ */
>
> -
> To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at http://vger.kernel.org/majordomo-info.html
> Please read the FAQ at http://www.tux.org/lkml/
>
^ permalink raw reply [flat|nested] 199+ messages in thread
* [PATCH for review] [134/145] x86_64: non lazy "sleazy" fpu implementation
[not found] <20060810 935.775038000@suse.de>
` (132 preceding siblings ...)
2006-08-10 19:37 ` [PATCH for review] [133/145] x86_64: Support physical cpu hotplug for x86_64 Andi Kleen
@ 2006-08-10 19:37 ` Andi Kleen
2006-08-10 19:37 ` [PATCH for review] [135/145] x86_64: wire up oops_enter()/oops_exit() Andi Kleen
` (11 subsequent siblings)
145 siblings, 0 replies; 199+ messages in thread
From: Andi Kleen @ 2006-08-10 19:37 UTC (permalink / raw)
r
From: Arjan van de Ven <arjan@linux.intel.com>
Right now the kernel on x86-64 has a 100% lazy fpu behavior: after *every*
context switch a trap is taken for the first FPU use to restore the FPU
context lazily. This is of course great for applications that have very
sporadic or no FPU use (since then you avoid doing the expensive
save/restore all the time). However for very frequent FPU users... you
take an extra trap every context switch.
The patch below adds a simple heuristic to this code: After 5 consecutive
context switches of FPU use, the lazy behavior is disabled and the context
gets restored every context switch. If the app indeed uses the FPU, the
trap is avoided. (the chance of the 6th time slice using FPU after the
previous 5 having done so are quite high obviously).
After 256 switches, this is reset and lazy behavior is returned (until
there are 5 consecutive ones again). The reason for this is to give apps
that do longer bursts of FPU use still the lazy behavior back after some
time.
[akpm@osdl.org: place new task_struct field next to jit_keyring to save space]
Signed-off-by: Arjan van de Ven <arjan@linux.intel.com>
Signed-off-by: Andi Kleen <ak@suse.de>
Cc: Andi Kleen <ak@muc.de>
Signed-off-by: Andrew Morton <akpm@osdl.org>
---
arch/x86_64/kernel/process.c | 10 ++++++++++
arch/x86_64/kernel/traps.c | 1 +
include/asm-x86_64/i387.h | 5 ++++-
include/linux/sched.h | 9 +++++++++
4 files changed, 24 insertions(+), 1 deletion(-)
Index: linux/arch/x86_64/kernel/process.c
===================================================================
--- linux.orig/arch/x86_64/kernel/process.c
+++ linux/arch/x86_64/kernel/process.c
@@ -552,6 +552,10 @@ __switch_to(struct task_struct *prev_p,
int cpu = smp_processor_id();
struct tss_struct *tss = &per_cpu(init_tss, cpu);
+ /* we're going to use this soon, after a few expensive things */
+ if (next_p->fpu_counter>5)
+ prefetch(&next->i387.fxsave);
+
/*
* Reload esp0, LDT and the page table pointer:
*/
@@ -629,6 +633,12 @@ __switch_to(struct task_struct *prev_p,
|| test_tsk_thread_flag(prev_p, TIF_IO_BITMAP))
__switch_to_xtra(prev_p, next_p, tss);
+ /* If the task has used fpu the last 5 timeslices, just do a full
+ * restore of the math state immediately to avoid the trap; the
+ * chances of needing FPU soon are obviously high now
+ */
+ if (next_p->fpu_counter>5)
+ math_state_restore();
return prev_p;
}
Index: linux/arch/x86_64/kernel/traps.c
===================================================================
--- linux.orig/arch/x86_64/kernel/traps.c
+++ linux/arch/x86_64/kernel/traps.c
@@ -1134,6 +1134,7 @@ asmlinkage void math_state_restore(void)
init_fpu(me);
restore_fpu_checking(&me->thread.i387.fxsave);
task_thread_info(me)->status |= TS_USEDFPU;
+ me->fpu_counter++;
}
void __init trap_init(void)
Index: linux/include/asm-x86_64/i387.h
===================================================================
--- linux.orig/include/asm-x86_64/i387.h
+++ linux/include/asm-x86_64/i387.h
@@ -24,6 +24,7 @@ extern unsigned int mxcsr_feature_mask;
extern void mxcsr_feature_mask_init(void);
extern void init_fpu(struct task_struct *child);
extern int save_i387(struct _fpstate __user *buf);
+extern asmlinkage void math_state_restore(void);
/*
* FPU lazy state save handling...
@@ -31,7 +32,9 @@ extern int save_i387(struct _fpstate __u
#define unlazy_fpu(tsk) do { \
if (task_thread_info(tsk)->status & TS_USEDFPU) \
- save_init_fpu(tsk); \
+ save_init_fpu(tsk); \
+ else \
+ tsk->fpu_counter = 0; \
} while (0)
/* Ignore delayed exceptions from user space */
Index: linux/include/linux/sched.h
===================================================================
--- linux.orig/include/linux/sched.h
+++ linux/include/linux/sched.h
@@ -865,6 +865,15 @@ struct task_struct {
struct key *thread_keyring; /* keyring private to this thread */
unsigned char jit_keyring; /* default keyring to attach requested keys to */
#endif
+ /*
+ * fpu_counter contains the number of consecutive context switches
+ * that the FPU is used. If this is over a threshold, the lazy fpu
+ * saving becomes unlazy to save the trap. This is an unsigned char
+ * so that after 256 times the counter wraps and the behavior turns
+ * lazy again; this to deal with bursty apps that only use FPU for
+ * a short time
+ */
+ unsigned char fpu_counter;
int oomkilladj; /* OOM kill score adjustment (bit shift). */
char comm[TASK_COMM_LEN]; /* executable name excluding path
- access with [gs]et_task_comm (which lock
^ permalink raw reply [flat|nested] 199+ messages in thread* [PATCH for review] [135/145] x86_64: wire up oops_enter()/oops_exit()
[not found] <20060810 935.775038000@suse.de>
` (133 preceding siblings ...)
2006-08-10 19:37 ` [PATCH for review] [134/145] x86_64: non lazy "sleazy" fpu implementation Andi Kleen
@ 2006-08-10 19:37 ` Andi Kleen
2006-08-10 19:37 ` [PATCH for review] [136/145] x86_64: x86_64 kernel mapping fix Andi Kleen
` (10 subsequent siblings)
145 siblings, 0 replies; 199+ messages in thread
From: Andi Kleen @ 2006-08-10 19:37 UTC (permalink / raw)
r
From: Andrew Morton <akpm@osdl.org>
Implement pause_on_oops() on x86_64.
AK: I redid the patch to do the oops_enter/exit in the existing
oops_begin()/end(). This makes it much shorter.
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Andi Kleen <ak@suse.de>
---
arch/x86_64/kernel/traps.c | 4 ++++
1 files changed, 4 insertions(+)
Index: linux/arch/x86_64/kernel/traps.c
===================================================================
--- linux.orig/arch/x86_64/kernel/traps.c
+++ linux/arch/x86_64/kernel/traps.c
@@ -559,6 +559,8 @@ unsigned __kprobes long oops_begin(void)
int cpu = safe_smp_processor_id();
unsigned long flags;
+ oops_enter();
+
/* racy, but better than risking deadlock. */
local_irq_save(flags);
if (!spin_trylock(&die_lock)) {
@@ -587,6 +589,8 @@ void __kprobes oops_end(unsigned long fl
spin_unlock_irqrestore(&die_lock, flags);
if (panic_on_oops)
panic("Fatal exception: panic_on_oops");
+
+ oops_exit();
}
void __kprobes __die(const char * str, struct pt_regs * regs, long err)
^ permalink raw reply [flat|nested] 199+ messages in thread* [PATCH for review] [136/145] x86_64: x86_64 kernel mapping fix
[not found] <20060810 935.775038000@suse.de>
` (134 preceding siblings ...)
2006-08-10 19:37 ` [PATCH for review] [135/145] x86_64: wire up oops_enter()/oops_exit() Andi Kleen
@ 2006-08-10 19:37 ` Andi Kleen
2006-08-10 19:37 ` [PATCH for review] [137/145] i386: KPROBE_ENTRY ends up putting code into .fixup Andi Kleen
` (9 subsequent siblings)
145 siblings, 0 replies; 199+ messages in thread
From: Andi Kleen @ 2006-08-10 19:37 UTC (permalink / raw)
r
From: Keith Mannthey <kmannth@us.ibm.com>
Fix for the x86_64 kernel mapping code. Without this patch the update path
only inits one pmd_page worth of memory and tramples any entries on it. now
the calling convention to phys_pmd_init and phys_init is to always pass a
[pmd/pud] page not an offset within a page.
Signed-off-by: Keith Mannthey<kmannth@us.ibm.com>
Signed-off-by: Andi Kleen <ak@suse.de>
Cc: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
---
arch/x86_64/mm/init.c | 51 +++++++++++++++++++++++++-------------------------
1 files changed, 26 insertions(+), 25 deletions(-)
Index: linux/arch/x86_64/mm/init.c
===================================================================
--- linux.orig/arch/x86_64/mm/init.c
+++ linux/arch/x86_64/mm/init.c
@@ -250,12 +250,13 @@ __init void early_iounmap(void *addr, un
}
static void __meminit
-phys_pmd_init(pmd_t *pmd, unsigned long address, unsigned long end)
+phys_pmd_init(pmd_t *pmd_page, unsigned long address, unsigned long end)
{
- int i;
+ int i = pmd_index(address);
- for (i = 0; i < PTRS_PER_PMD; pmd++, i++, address += PMD_SIZE) {
+ for (; i < PTRS_PER_PMD; i++, address += PMD_SIZE) {
unsigned long entry;
+ pmd_t *pmd = pmd_page + pmd_index(address);
if (address >= end) {
if (!after_bootmem)
@@ -263,6 +264,10 @@ phys_pmd_init(pmd_t *pmd, unsigned long
set_pmd(pmd, __pmd(0));
break;
}
+
+ if (pmd_val(*pmd))
+ continue;
+
entry = _PAGE_NX|_PAGE_PSE|_KERNPG_TABLE|_PAGE_GLOBAL|address;
entry &= __supported_pte_mask;
set_pmd(pmd, __pmd(entry));
@@ -272,45 +277,41 @@ phys_pmd_init(pmd_t *pmd, unsigned long
static void __meminit
phys_pmd_update(pud_t *pud, unsigned long address, unsigned long end)
{
- pmd_t *pmd = pmd_offset(pud, (unsigned long)__va(address));
-
- if (pmd_none(*pmd)) {
- spin_lock(&init_mm.page_table_lock);
- phys_pmd_init(pmd, address, end);
- spin_unlock(&init_mm.page_table_lock);
- __flush_tlb_all();
- }
+ pmd_t *pmd = pmd_offset(pud,0);
+ spin_lock(&init_mm.page_table_lock);
+ phys_pmd_init(pmd, address, end);
+ spin_unlock(&init_mm.page_table_lock);
+ __flush_tlb_all();
}
-static void __meminit phys_pud_init(pud_t *pud, unsigned long address, unsigned long end)
+static void __meminit phys_pud_init(pud_t *pud_page, unsigned long addr, unsigned long end)
{
- long i = pud_index(address);
-
- pud = pud + i;
+ int i = pud_index(addr);
- if (after_bootmem && pud_val(*pud)) {
- phys_pmd_update(pud, address, end);
- return;
- }
- for (; i < PTRS_PER_PUD; pud++, i++) {
+ for (; i < PTRS_PER_PUD; i++, addr = (addr & PUD_MASK) + PUD_SIZE ) {
int map;
- unsigned long paddr, pmd_phys;
+ unsigned long pmd_phys;
+ pud_t *pud = pud_page + pud_index(addr);
pmd_t *pmd;
- paddr = (address & PGDIR_MASK) + i*PUD_SIZE;
- if (paddr >= end)
+ if (addr >= end)
break;
- if (!after_bootmem && !e820_any_mapped(paddr, paddr+PUD_SIZE, 0)) {
+ if (!after_bootmem && !e820_any_mapped(addr,addr+PUD_SIZE,0)) {
set_pud(pud, __pud(0));
continue;
}
+ if (pud_val(*pud)) {
+ phys_pmd_update(pud, addr, end);
+ continue;
+ }
+
pmd = alloc_low_page(&map, &pmd_phys);
spin_lock(&init_mm.page_table_lock);
set_pud(pud, __pud(pmd_phys | _KERNPG_TABLE));
- phys_pmd_init(pmd, paddr, end);
+ phys_pmd_init(pmd, addr, end);
spin_unlock(&init_mm.page_table_lock);
unmap_low_page(map);
}
^ permalink raw reply [flat|nested] 199+ messages in thread* [PATCH for review] [137/145] i386: KPROBE_ENTRY ends up putting code into .fixup
[not found] <20060810 935.775038000@suse.de>
` (135 preceding siblings ...)
2006-08-10 19:37 ` [PATCH for review] [136/145] x86_64: x86_64 kernel mapping fix Andi Kleen
@ 2006-08-10 19:37 ` Andi Kleen
2006-08-10 19:37 ` [PATCH for review] [138/145] i386: remove redundant generic_identify() calls when identifying cpus Andi Kleen
` (8 subsequent siblings)
145 siblings, 0 replies; 199+ messages in thread
From: Andi Kleen @ 2006-08-10 19:37 UTC (permalink / raw)
r
From: Jeremy Fitzhardinge <jeremy@goop.org>
KPROBE_ENTRY does a .section .kprobes.text, and expects its users to
do a .previous at the end of the function.
Unfortunately, if any code within the function switches sections, for
example .fixup, then the .previous ends up putting all subsequent code
into .fixup. Worse, any subsequent .fixup code gets intermingled with
the code its supposed to be fixing (which is also in .fixup). It's
surprising this didn't cause more havok.
The fix is to use .pushsection/.popsection, so this stuff nests
properly. A further cleanup would be to get rid of all
.section/.previous pairs, since they're inherently fragile.
Signed-off-by: Jeremy Fitzhardinge <jeremy@goop.org>
Signed-off-by: Andi Kleen <ak@suse.de>
---
arch/i386/kernel/entry.S | 8 ++++----
arch/x86_64/kernel/entry.S | 12 ++++++------
include/linux/linkage.h | 2 +-
3 files changed, 11 insertions(+), 11 deletions(-)
Index: linux/arch/i386/kernel/entry.S
===================================================================
--- linux.orig/arch/i386/kernel/entry.S
+++ linux/arch/i386/kernel/entry.S
@@ -639,7 +639,7 @@ error_code:
call *%edi
jmp ret_from_exception
CFI_ENDPROC
-.previous
+.popsection
ENTRY(coprocessor_error)
RING0_INT_FRAME
@@ -715,7 +715,7 @@ debug_stack_correct:
call do_debug
jmp ret_from_exception
CFI_ENDPROC
-.previous
+.popsection
/*
* NMI is doubly nasty. It can happen _while_ we're handling
@@ -812,7 +812,7 @@ KPROBE_ENTRY(int3)
call do_int3
jmp ret_from_exception
CFI_ENDPROC
-.previous
+.popsection
ENTRY(overflow)
RING0_INT_FRAME
@@ -877,7 +877,7 @@ KPROBE_ENTRY(general_protection)
CFI_ADJUST_CFA_OFFSET 4
jmp error_code
CFI_ENDPROC
-.previous
+.popsection
ENTRY(alignment_check)
RING0_EC_FRAME
Index: linux/arch/x86_64/kernel/entry.S
===================================================================
--- linux.orig/arch/x86_64/kernel/entry.S
+++ linux/arch/x86_64/kernel/entry.S
@@ -904,7 +904,7 @@ error_kernelspace:
je error_swapgs
jmp error_sti
END(error_entry)
- .previous
+ .popsection
/* Reload gs selector with exception handling */
/* edi: new selector */
@@ -1024,7 +1024,7 @@ ENDPROC(execve)
KPROBE_ENTRY(page_fault)
errorentry do_page_fault
END(page_fault)
- .previous
+ .popsection
ENTRY(coprocessor_error)
zeroentry do_coprocessor_error
@@ -1046,7 +1046,7 @@ KPROBE_ENTRY(debug)
paranoidentry do_debug, DEBUG_STACK
paranoidexit
END(debug)
- .previous
+ .popsection
/* runs on exception stack */
KPROBE_ENTRY(nmi)
@@ -1061,7 +1061,7 @@ KPROBE_ENTRY(nmi)
CFI_ENDPROC
#endif
END(nmi)
- .previous
+ .popsection
KPROBE_ENTRY(int3)
INTR_FRAME
@@ -1071,7 +1071,7 @@ KPROBE_ENTRY(int3)
jmp paranoid_exit1
CFI_ENDPROC
END(int3)
- .previous
+ .popsection
ENTRY(overflow)
zeroentry do_overflow
@@ -1120,7 +1120,7 @@ END(stack_segment)
KPROBE_ENTRY(general_protection)
errorentry do_general_protection
END(general_protection)
- .previous
+ .popsection
ENTRY(alignment_check)
errorentry do_alignment_check
Index: linux/include/linux/linkage.h
===================================================================
--- linux.orig/include/linux/linkage.h
+++ linux/include/linux/linkage.h
@@ -35,7 +35,7 @@
#endif
#define KPROBE_ENTRY(name) \
- .section .kprobes.text, "ax"; \
+ .pushsection .kprobes.text, "ax"; \
ENTRY(name)
#ifndef END
^ permalink raw reply [flat|nested] 199+ messages in thread* [PATCH for review] [138/145] i386: remove redundant generic_identify() calls when identifying cpus
[not found] <20060810 935.775038000@suse.de>
` (136 preceding siblings ...)
2006-08-10 19:37 ` [PATCH for review] [137/145] i386: KPROBE_ENTRY ends up putting code into .fixup Andi Kleen
@ 2006-08-10 19:37 ` Andi Kleen
2006-08-10 19:37 ` [PATCH for review] [139/145] x86_64: mark init_amd() as __cpuinit Andi Kleen
` (7 subsequent siblings)
145 siblings, 0 replies; 199+ messages in thread
From: Andi Kleen @ 2006-08-10 19:37 UTC (permalink / raw)
r
From: Magnus Damm <magnus@valinux.co.jp>
cpu_dev->c_identify is only called from arch/i386/common.c:identify_cpu(), and
this after generic_identify() already has been called. There is no need to call
this function twice and hook it in c_identify - but I may be wrong, please
double check before applying.
This patch also removes generic_identify() from cpu.h to avoid unnecessary
future nesting.
Signed-off-by: Magnus Damm <magnus@valinux.co.jp>
Signed-off-by: Andi Kleen <ak@suse.de>
---
arch/i386/kernel/cpu/amd.c | 1 -
arch/i386/kernel/cpu/common.c | 2 +-
arch/i386/kernel/cpu/cpu.h | 2 --
arch/i386/kernel/cpu/cyrix.c | 2 --
arch/i386/kernel/cpu/intel.c | 1 -
arch/i386/kernel/cpu/nexgen.c | 1 -
arch/i386/kernel/cpu/transmeta.c | 1 -
7 files changed, 1 insertion(+), 9 deletions(-)
Index: linux/arch/i386/kernel/cpu/amd.c
===================================================================
--- linux.orig/arch/i386/kernel/cpu/amd.c
+++ linux/arch/i386/kernel/cpu/amd.c
@@ -275,7 +275,6 @@ static struct cpu_dev amd_cpu_dev __init
},
},
.c_init = init_amd,
- .c_identify = generic_identify,
.c_size_cache = amd_size_cache,
};
Index: linux/arch/i386/kernel/cpu/common.c
===================================================================
--- linux.orig/arch/i386/kernel/cpu/common.c
+++ linux/arch/i386/kernel/cpu/common.c
@@ -265,7 +265,7 @@ static void __init early_cpu_detect(void
}
}
-void __cpuinit generic_identify(struct cpuinfo_x86 * c)
+static void __cpuinit generic_identify(struct cpuinfo_x86 * c)
{
u32 tfms, xlvl;
int ebx;
Index: linux/arch/i386/kernel/cpu/cpu.h
===================================================================
--- linux.orig/arch/i386/kernel/cpu/cpu.h
+++ linux/arch/i386/kernel/cpu/cpu.h
@@ -24,7 +24,5 @@ extern struct cpu_dev * cpu_devs [X86_VE
extern int get_model_name(struct cpuinfo_x86 *c);
extern void display_cacheinfo(struct cpuinfo_x86 *c);
-extern void generic_identify(struct cpuinfo_x86 * c);
-
extern void early_intel_workaround(struct cpuinfo_x86 *c);
Index: linux/arch/i386/kernel/cpu/cyrix.c
===================================================================
--- linux.orig/arch/i386/kernel/cpu/cyrix.c
+++ linux/arch/i386/kernel/cpu/cyrix.c
@@ -427,7 +427,6 @@ static void __init cyrix_identify(struct
local_irq_restore(flags);
}
}
- generic_identify(c);
}
static struct cpu_dev cyrix_cpu_dev __initdata = {
@@ -457,7 +456,6 @@ static struct cpu_dev nsc_cpu_dev __init
.c_vendor = "NSC",
.c_ident = { "Geode by NSC" },
.c_init = init_nsc,
- .c_identify = generic_identify,
};
int __init nsc_init_cpu(void)
Index: linux/arch/i386/kernel/cpu/intel.c
===================================================================
--- linux.orig/arch/i386/kernel/cpu/intel.c
+++ linux/arch/i386/kernel/cpu/intel.c
@@ -263,7 +263,6 @@ static struct cpu_dev intel_cpu_dev __cp
},
},
.c_init = init_intel,
- .c_identify = generic_identify,
.c_size_cache = intel_size_cache,
};
Index: linux/arch/i386/kernel/cpu/nexgen.c
===================================================================
--- linux.orig/arch/i386/kernel/cpu/nexgen.c
+++ linux/arch/i386/kernel/cpu/nexgen.c
@@ -38,7 +38,6 @@ static void __init nexgen_identify(struc
if ( deep_magic_nexgen_probe() ) {
strcpy(c->x86_vendor_id, "NexGenDriven");
}
- generic_identify(c);
}
static struct cpu_dev nexgen_cpu_dev __initdata = {
Index: linux/arch/i386/kernel/cpu/transmeta.c
===================================================================
--- linux.orig/arch/i386/kernel/cpu/transmeta.c
+++ linux/arch/i386/kernel/cpu/transmeta.c
@@ -88,7 +88,6 @@ static void __init init_transmeta(struct
static void __init transmeta_identify(struct cpuinfo_x86 * c)
{
u32 xlvl;
- generic_identify(c);
/* Transmeta-defined flags: level 0x80860001 */
xlvl = cpuid_eax(0x80860000);
^ permalink raw reply [flat|nested] 199+ messages in thread* [PATCH for review] [139/145] x86_64: mark init_amd() as __cpuinit
[not found] <20060810 935.775038000@suse.de>
` (137 preceding siblings ...)
2006-08-10 19:37 ` [PATCH for review] [138/145] i386: remove redundant generic_identify() calls when identifying cpus Andi Kleen
@ 2006-08-10 19:37 ` Andi Kleen
2006-08-10 19:37 ` [PATCH for review] [140/145] i386: mark cpu_dev structures as __cpuinitdata Andi Kleen
` (6 subsequent siblings)
145 siblings, 0 replies; 199+ messages in thread
From: Andi Kleen @ 2006-08-10 19:37 UTC (permalink / raw)
r
From: Magnus Damm <magnus@valinux.co.jp>
The init_amd() function is only called from identify_cpu() which is already
marked as __cpuinit. So let's mark it as __cpuinit.
Signed-off-by: Magnus Damm <magnus@valinux.co.jp>
Signed-off-by: Andi Kleen <ak@suse.de>
---
arch/x86_64/kernel/setup.c | 2 +-
1 files changed, 1 insertion(+), 1 deletion(-)
Index: linux/arch/x86_64/kernel/setup.c
===================================================================
--- linux.orig/arch/x86_64/kernel/setup.c
+++ linux/arch/x86_64/kernel/setup.c
@@ -674,7 +674,7 @@ static void __init amd_detect_cmp(struct
#endif
}
-static void __init init_amd(struct cpuinfo_x86 *c)
+static void __cpuinit init_amd(struct cpuinfo_x86 *c)
{
unsigned level;
^ permalink raw reply [flat|nested] 199+ messages in thread* [PATCH for review] [140/145] i386: mark cpu_dev structures as __cpuinitdata
[not found] <20060810 935.775038000@suse.de>
` (138 preceding siblings ...)
2006-08-10 19:37 ` [PATCH for review] [139/145] x86_64: mark init_amd() as __cpuinit Andi Kleen
@ 2006-08-10 19:37 ` Andi Kleen
2006-08-10 19:37 ` [PATCH for review] [141/145] i386: mark cpu init functions as __cpuinit, data " Andi Kleen
` (5 subsequent siblings)
145 siblings, 0 replies; 199+ messages in thread
From: Andi Kleen @ 2006-08-10 19:37 UTC (permalink / raw)
r
From: Magnus Damm <magnus@valinux.co.jp>
The different cpu_dev structures are all used from __cpuinit callers what
I can tell. So mark them as __cpuinitdata instead of __initdata. I am a
little bit unsure about arch/i386/common.c:default_cpu, especially when it
comes to the purpose of this_cpu.
Signed-off-by: Magnus Damm <magnus@valinux.co.jp>
Signed-off-by: Andi Kleen <ak@suse.de>
---
arch/i386/kernel/cpu/amd.c | 2 +-
arch/i386/kernel/cpu/centaur.c | 2 +-
arch/i386/kernel/cpu/common.c | 2 +-
arch/i386/kernel/cpu/cyrix.c | 4 ++--
arch/i386/kernel/cpu/nexgen.c | 2 +-
arch/i386/kernel/cpu/rise.c | 2 +-
arch/i386/kernel/cpu/transmeta.c | 2 +-
arch/i386/kernel/cpu/umc.c | 2 +-
8 files changed, 9 insertions(+), 9 deletions(-)
Index: linux/arch/i386/kernel/cpu/amd.c
===================================================================
--- linux.orig/arch/i386/kernel/cpu/amd.c
+++ linux/arch/i386/kernel/cpu/amd.c
@@ -259,7 +259,7 @@ static unsigned int amd_size_cache(struc
return size;
}
-static struct cpu_dev amd_cpu_dev __initdata = {
+static struct cpu_dev amd_cpu_dev __cpuinitdata = {
.c_vendor = "AMD",
.c_ident = { "AuthenticAMD" },
.c_models = {
Index: linux/arch/i386/kernel/cpu/centaur.c
===================================================================
--- linux.orig/arch/i386/kernel/cpu/centaur.c
+++ linux/arch/i386/kernel/cpu/centaur.c
@@ -457,7 +457,7 @@ static unsigned int centaur_size_cache(s
return size;
}
-static struct cpu_dev centaur_cpu_dev __initdata = {
+static struct cpu_dev centaur_cpu_dev __cpuinitdata = {
.c_vendor = "Centaur",
.c_ident = { "CentaurHauls" },
.c_init = init_centaur,
Index: linux/arch/i386/kernel/cpu/common.c
===================================================================
--- linux.orig/arch/i386/kernel/cpu/common.c
+++ linux/arch/i386/kernel/cpu/common.c
@@ -49,7 +49,7 @@ static void default_init(struct cpuinfo_
}
}
-static struct cpu_dev default_cpu = {
+static struct cpu_dev __cpuinitdata default_cpu = {
.c_init = default_init,
.c_vendor = "Unknown",
};
Index: linux/arch/i386/kernel/cpu/cyrix.c
===================================================================
--- linux.orig/arch/i386/kernel/cpu/cyrix.c
+++ linux/arch/i386/kernel/cpu/cyrix.c
@@ -429,7 +429,7 @@ static void __init cyrix_identify(struct
}
}
-static struct cpu_dev cyrix_cpu_dev __initdata = {
+static struct cpu_dev cyrix_cpu_dev __cpuinitdata = {
.c_vendor = "Cyrix",
.c_ident = { "CyrixInstead" },
.c_init = init_cyrix,
@@ -452,7 +452,7 @@ static int __init cyrix_exit_cpu(void)
late_initcall(cyrix_exit_cpu);
-static struct cpu_dev nsc_cpu_dev __initdata = {
+static struct cpu_dev nsc_cpu_dev __cpuinitdata = {
.c_vendor = "NSC",
.c_ident = { "Geode by NSC" },
.c_init = init_nsc,
Index: linux/arch/i386/kernel/cpu/nexgen.c
===================================================================
--- linux.orig/arch/i386/kernel/cpu/nexgen.c
+++ linux/arch/i386/kernel/cpu/nexgen.c
@@ -40,7 +40,7 @@ static void __init nexgen_identify(struc
}
}
-static struct cpu_dev nexgen_cpu_dev __initdata = {
+static struct cpu_dev nexgen_cpu_dev __cpuinitdata = {
.c_vendor = "Nexgen",
.c_ident = { "NexGenDriven" },
.c_models = {
Index: linux/arch/i386/kernel/cpu/rise.c
===================================================================
--- linux.orig/arch/i386/kernel/cpu/rise.c
+++ linux/arch/i386/kernel/cpu/rise.c
@@ -28,7 +28,7 @@ static void __init init_rise(struct cpui
set_bit(X86_FEATURE_CX8, c->x86_capability);
}
-static struct cpu_dev rise_cpu_dev __initdata = {
+static struct cpu_dev rise_cpu_dev __cpuinitdata = {
.c_vendor = "Rise",
.c_ident = { "RiseRiseRise" },
.c_models = {
Index: linux/arch/i386/kernel/cpu/transmeta.c
===================================================================
--- linux.orig/arch/i386/kernel/cpu/transmeta.c
+++ linux/arch/i386/kernel/cpu/transmeta.c
@@ -97,7 +97,7 @@ static void __init transmeta_identify(st
}
}
-static struct cpu_dev transmeta_cpu_dev __initdata = {
+static struct cpu_dev transmeta_cpu_dev __cpuinitdata = {
.c_vendor = "Transmeta",
.c_ident = { "GenuineTMx86", "TransmetaCPU" },
.c_init = init_transmeta,
Index: linux/arch/i386/kernel/cpu/umc.c
===================================================================
--- linux.orig/arch/i386/kernel/cpu/umc.c
+++ linux/arch/i386/kernel/cpu/umc.c
@@ -10,7 +10,7 @@ static void __init init_umc(struct cpuin
}
-static struct cpu_dev umc_cpu_dev __initdata = {
+static struct cpu_dev umc_cpu_dev __cpuinitdata = {
.c_vendor = "UMC",
.c_ident = { "UMC UMC UMC" },
.c_models = {
^ permalink raw reply [flat|nested] 199+ messages in thread* [PATCH for review] [141/145] i386: mark cpu init functions as __cpuinit, data as __cpuinitdata
[not found] <20060810 935.775038000@suse.de>
` (139 preceding siblings ...)
2006-08-10 19:37 ` [PATCH for review] [140/145] i386: mark cpu_dev structures as __cpuinitdata Andi Kleen
@ 2006-08-10 19:37 ` Andi Kleen
2006-08-10 19:37 ` [PATCH for review] [142/145] i386: mark cpu identify functions as __cpuinit Andi Kleen
` (4 subsequent siblings)
145 siblings, 0 replies; 199+ messages in thread
From: Andi Kleen @ 2006-08-10 19:37 UTC (permalink / raw)
r
From: Magnus Damm <magnus@valinux.co.jp>
Mark i386-specific cpu init functions as __cpuinit. They are all
only called from arch/i386/common.c:identify_cpu() that already is marked as
__cpuinit. This patch also removes the empty function init_umc().
Signed-off-by: Magnus Damm <magnus@valinux.co.jp>
Signed-off-by: Andi Kleen <ak@suse.de>
---
arch/i386/kernel/cpu/amd.c | 2 +-
arch/i386/kernel/cpu/centaur.c | 20 ++++++++++----------
arch/i386/kernel/cpu/common.c | 2 +-
arch/i386/kernel/cpu/cyrix.c | 32 ++++++++++++++++----------------
arch/i386/kernel/cpu/nexgen.c | 2 +-
arch/i386/kernel/cpu/rise.c | 2 +-
arch/i386/kernel/cpu/transmeta.c | 2 +-
arch/i386/kernel/cpu/umc.c | 5 -----
8 files changed, 31 insertions(+), 36 deletions(-)
Index: linux/arch/i386/kernel/cpu/amd.c
===================================================================
--- linux.orig/arch/i386/kernel/cpu/amd.c
+++ linux/arch/i386/kernel/cpu/amd.c
@@ -22,7 +22,7 @@
extern void vide(void);
__asm__(".align 4\nvide: ret");
-static void __init init_amd(struct cpuinfo_x86 *c)
+static void __cpuinit init_amd(struct cpuinfo_x86 *c)
{
u32 l, h;
int mbytes = num_physpages >> (20-PAGE_SHIFT);
Index: linux/arch/i386/kernel/cpu/centaur.c
===================================================================
--- linux.orig/arch/i386/kernel/cpu/centaur.c
+++ linux/arch/i386/kernel/cpu/centaur.c
@@ -9,7 +9,7 @@
#ifdef CONFIG_X86_OOSTORE
-static u32 __init power2(u32 x)
+static u32 __cpuinit power2(u32 x)
{
u32 s=1;
while(s<=x)
@@ -22,7 +22,7 @@ static u32 __init power2(u32 x)
* Set up an actual MCR
*/
-static void __init centaur_mcr_insert(int reg, u32 base, u32 size, int key)
+static void __cpuinit centaur_mcr_insert(int reg, u32 base, u32 size, int key)
{
u32 lo, hi;
@@ -40,7 +40,7 @@ static void __init centaur_mcr_insert(in
* Shortcut: We know you can't put 4Gig of RAM on a winchip
*/
-static u32 __init ramtop(void) /* 16388 */
+static u32 __cpuinit ramtop(void) /* 16388 */
{
int i;
u32 top = 0;
@@ -91,7 +91,7 @@ static u32 __init ramtop(void) /* 16388
* Compute a set of MCR's to give maximum coverage
*/
-static int __init centaur_mcr_compute(int nr, int key)
+static int __cpuinit centaur_mcr_compute(int nr, int key)
{
u32 mem = ramtop();
u32 root = power2(mem);
@@ -166,7 +166,7 @@ static int __init centaur_mcr_compute(in
return ct;
}
-static void __init centaur_create_optimal_mcr(void)
+static void __cpuinit centaur_create_optimal_mcr(void)
{
int i;
/*
@@ -189,7 +189,7 @@ static void __init centaur_create_optima
wrmsr(MSR_IDT_MCR0+i, 0, 0);
}
-static void __init winchip2_create_optimal_mcr(void)
+static void __cpuinit winchip2_create_optimal_mcr(void)
{
u32 lo, hi;
int i;
@@ -227,7 +227,7 @@ static void __init winchip2_create_optim
* Handle the MCR key on the Winchip 2.
*/
-static void __init winchip2_unprotect_mcr(void)
+static void __cpuinit winchip2_unprotect_mcr(void)
{
u32 lo, hi;
u32 key;
@@ -239,7 +239,7 @@ static void __init winchip2_unprotect_mc
wrmsr(MSR_IDT_MCR_CTRL, lo, hi);
}
-static void __init winchip2_protect_mcr(void)
+static void __cpuinit winchip2_protect_mcr(void)
{
u32 lo, hi;
@@ -257,7 +257,7 @@ static void __init winchip2_protect_mcr(
#define RNG_ENABLED (1 << 3)
#define RNG_ENABLE (1 << 6) /* MSR_VIA_RNG */
-static void __init init_c3(struct cpuinfo_x86 *c)
+static void __cpuinit init_c3(struct cpuinfo_x86 *c)
{
u32 lo, hi;
@@ -303,7 +303,7 @@ static void __init init_c3(struct cpuinf
display_cacheinfo(c);
}
-static void __init init_centaur(struct cpuinfo_x86 *c)
+static void __cpuinit init_centaur(struct cpuinfo_x86 *c)
{
enum {
ECX8=1<<1,
Index: linux/arch/i386/kernel/cpu/common.c
===================================================================
--- linux.orig/arch/i386/kernel/cpu/common.c
+++ linux/arch/i386/kernel/cpu/common.c
@@ -36,7 +36,7 @@ struct cpu_dev * cpu_devs[X86_VENDOR_NUM
extern int disable_pse;
-static void default_init(struct cpuinfo_x86 * c)
+static void __cpuinit default_init(struct cpuinfo_x86 * c)
{
/* Not much we can do here... */
/* Check if at least it has cpuid */
Index: linux/arch/i386/kernel/cpu/cyrix.c
===================================================================
--- linux.orig/arch/i386/kernel/cpu/cyrix.c
+++ linux/arch/i386/kernel/cpu/cyrix.c
@@ -52,25 +52,25 @@ static void __init do_cyrix_devid(unsign
* Actually since bugs.h doesn't even reference this perhaps someone should
* fix the documentation ???
*/
-static unsigned char Cx86_dir0_msb __initdata = 0;
+static unsigned char Cx86_dir0_msb __cpuinitdata = 0;
-static char Cx86_model[][9] __initdata = {
+static char Cx86_model[][9] __cpuinitdata = {
"Cx486", "Cx486", "5x86 ", "6x86", "MediaGX ", "6x86MX ",
"M II ", "Unknown"
};
-static char Cx486_name[][5] __initdata = {
+static char Cx486_name[][5] __cpuinitdata = {
"SLC", "DLC", "SLC2", "DLC2", "SRx", "DRx",
"SRx2", "DRx2"
};
-static char Cx486S_name[][4] __initdata = {
+static char Cx486S_name[][4] __cpuinitdata = {
"S", "S2", "Se", "S2e"
};
-static char Cx486D_name[][4] __initdata = {
+static char Cx486D_name[][4] __cpuinitdata = {
"DX", "DX2", "?", "?", "?", "DX4"
};
-static char Cx86_cb[] __initdata = "?.5x Core/Bus Clock";
-static char cyrix_model_mult1[] __initdata = "12??43";
-static char cyrix_model_mult2[] __initdata = "12233445";
+static char Cx86_cb[] __cpuinitdata = "?.5x Core/Bus Clock";
+static char cyrix_model_mult1[] __cpuinitdata = "12??43";
+static char cyrix_model_mult2[] __cpuinitdata = "12233445";
/*
* Reset the slow-loop (SLOP) bit on the 686(L) which is set by some old
@@ -82,7 +82,7 @@ static char cyrix_model_mult2[] __initda
extern void calibrate_delay(void) __init;
-static void __init check_cx686_slop(struct cpuinfo_x86 *c)
+static void __cpuinit check_cx686_slop(struct cpuinfo_x86 *c)
{
unsigned long flags;
@@ -107,7 +107,7 @@ static void __init check_cx686_slop(stru
}
-static void __init set_cx86_reorder(void)
+static void __cpuinit set_cx86_reorder(void)
{
u8 ccr3;
@@ -122,7 +122,7 @@ static void __init set_cx86_reorder(void
setCx86(CX86_CCR3, ccr3);
}
-static void __init set_cx86_memwb(void)
+static void __cpuinit set_cx86_memwb(void)
{
u32 cr0;
@@ -137,7 +137,7 @@ static void __init set_cx86_memwb(void)
setCx86(CX86_CCR2, getCx86(CX86_CCR2) | 0x14 );
}
-static void __init set_cx86_inc(void)
+static void __cpuinit set_cx86_inc(void)
{
unsigned char ccr3;
@@ -158,7 +158,7 @@ static void __init set_cx86_inc(void)
* Configure later MediaGX and/or Geode processor.
*/
-static void __init geode_configure(void)
+static void __cpuinit geode_configure(void)
{
unsigned long flags;
u8 ccr3, ccr4;
@@ -184,14 +184,14 @@ static void __init geode_configure(void)
#ifdef CONFIG_PCI
-static struct pci_device_id __initdata cyrix_55x0[] = {
+static struct pci_device_id __cpuinitdata cyrix_55x0[] = {
{ PCI_DEVICE(PCI_VENDOR_ID_CYRIX, PCI_DEVICE_ID_CYRIX_5510) },
{ PCI_DEVICE(PCI_VENDOR_ID_CYRIX, PCI_DEVICE_ID_CYRIX_5520) },
{ },
};
#endif
-static void __init init_cyrix(struct cpuinfo_x86 *c)
+static void __cpuinit init_cyrix(struct cpuinfo_x86 *c)
{
unsigned char dir0, dir0_msn, dir0_lsn, dir1 = 0;
char *buf = c->x86_model_id;
@@ -346,7 +346,7 @@ static void __init init_cyrix(struct cpu
/*
* Handle National Semiconductor branded processors
*/
-static void __init init_nsc(struct cpuinfo_x86 *c)
+static void __cpuinit init_nsc(struct cpuinfo_x86 *c)
{
/* There may be GX1 processors in the wild that are branded
* NSC and not Cyrix.
Index: linux/arch/i386/kernel/cpu/nexgen.c
===================================================================
--- linux.orig/arch/i386/kernel/cpu/nexgen.c
+++ linux/arch/i386/kernel/cpu/nexgen.c
@@ -27,7 +27,7 @@ static int __init deep_magic_nexgen_prob
return ret;
}
-static void __init init_nexgen(struct cpuinfo_x86 * c)
+static void __cpuinit init_nexgen(struct cpuinfo_x86 * c)
{
c->x86_cache_size = 256; /* A few had 1 MB... */
}
Index: linux/arch/i386/kernel/cpu/rise.c
===================================================================
--- linux.orig/arch/i386/kernel/cpu/rise.c
+++ linux/arch/i386/kernel/cpu/rise.c
@@ -5,7 +5,7 @@
#include "cpu.h"
-static void __init init_rise(struct cpuinfo_x86 *c)
+static void __cpuinit init_rise(struct cpuinfo_x86 *c)
{
printk("CPU: Rise iDragon");
if (c->x86_model > 2)
Index: linux/arch/i386/kernel/cpu/transmeta.c
===================================================================
--- linux.orig/arch/i386/kernel/cpu/transmeta.c
+++ linux/arch/i386/kernel/cpu/transmeta.c
@@ -5,7 +5,7 @@
#include <asm/msr.h>
#include "cpu.h"
-static void __init init_transmeta(struct cpuinfo_x86 *c)
+static void __cpuinit init_transmeta(struct cpuinfo_x86 *c)
{
unsigned int cap_mask, uk, max, dummy;
unsigned int cms_rev1, cms_rev2;
Index: linux/arch/i386/kernel/cpu/umc.c
===================================================================
--- linux.orig/arch/i386/kernel/cpu/umc.c
+++ linux/arch/i386/kernel/cpu/umc.c
@@ -5,10 +5,6 @@
/* UMC chips appear to be only either 386 or 486, so no special init takes place.
*/
-static void __init init_umc(struct cpuinfo_x86 * c)
-{
-
-}
static struct cpu_dev umc_cpu_dev __cpuinitdata = {
.c_vendor = "UMC",
@@ -21,7 +17,6 @@ static struct cpu_dev umc_cpu_dev __cpui
}
},
},
- .c_init = init_umc,
};
int __init umc_init_cpu(void)
^ permalink raw reply [flat|nested] 199+ messages in thread* [PATCH for review] [142/145] i386: mark cpu identify functions as __cpuinit
[not found] <20060810 935.775038000@suse.de>
` (140 preceding siblings ...)
2006-08-10 19:37 ` [PATCH for review] [141/145] i386: mark cpu init functions as __cpuinit, data " Andi Kleen
@ 2006-08-10 19:37 ` Andi Kleen
2006-08-10 19:37 ` [PATCH for review] [143/145] i386: mark cpu cache " Andi Kleen
` (3 subsequent siblings)
145 siblings, 0 replies; 199+ messages in thread
From: Andi Kleen @ 2006-08-10 19:37 UTC (permalink / raw)
r
From: Magnus Damm <magnus@valinux.co.jp>
Mark i386-specific cpu identification functions as __cpuinit. They are all
only called from arch/i386/common.c:identify_cpu() that already is marked as
__cpuinit.
Signed-off-by: Magnus Damm <magnus@valinux.co.jp>
Signed-off-by: Andi Kleen <ak@suse.de>
---
arch/i386/kernel/cpu/cyrix.c | 4 ++--
arch/i386/kernel/cpu/nexgen.c | 4 ++--
arch/i386/kernel/cpu/transmeta.c | 2 +-
3 files changed, 5 insertions(+), 5 deletions(-)
Index: linux/arch/i386/kernel/cpu/cyrix.c
===================================================================
--- linux.orig/arch/i386/kernel/cpu/cyrix.c
+++ linux/arch/i386/kernel/cpu/cyrix.c
@@ -12,7 +12,7 @@
/*
* Read NSC/Cyrix DEVID registers (DIR) to get more detailed info. about the CPU
*/
-static void __init do_cyrix_devid(unsigned char *dir0, unsigned char *dir1)
+static void __cpuinit do_cyrix_devid(unsigned char *dir0, unsigned char *dir1)
{
unsigned char ccr2, ccr3;
unsigned long flags;
@@ -394,7 +394,7 @@ static inline int test_cyrix_52div(void)
return (unsigned char) (test >> 8) == 0x02;
}
-static void __init cyrix_identify(struct cpuinfo_x86 * c)
+static void __cpuinit cyrix_identify(struct cpuinfo_x86 * c)
{
/* Detect Cyrix with disabled CPUID */
if ( c->x86 == 4 && test_cyrix_52div() ) {
Index: linux/arch/i386/kernel/cpu/nexgen.c
===================================================================
--- linux.orig/arch/i386/kernel/cpu/nexgen.c
+++ linux/arch/i386/kernel/cpu/nexgen.c
@@ -10,7 +10,7 @@
* to have CPUID. (Thanks to Herbert Oppmann)
*/
-static int __init deep_magic_nexgen_probe(void)
+static int __cpuinit deep_magic_nexgen_probe(void)
{
int ret;
@@ -32,7 +32,7 @@ static void __cpuinit init_nexgen(struct
c->x86_cache_size = 256; /* A few had 1 MB... */
}
-static void __init nexgen_identify(struct cpuinfo_x86 * c)
+static void __cpuinit nexgen_identify(struct cpuinfo_x86 * c)
{
/* Detect NexGen with old hypercode */
if ( deep_magic_nexgen_probe() ) {
Index: linux/arch/i386/kernel/cpu/transmeta.c
===================================================================
--- linux.orig/arch/i386/kernel/cpu/transmeta.c
+++ linux/arch/i386/kernel/cpu/transmeta.c
@@ -85,7 +85,7 @@ static void __cpuinit init_transmeta(str
#endif
}
-static void __init transmeta_identify(struct cpuinfo_x86 * c)
+static void __cpuinit transmeta_identify(struct cpuinfo_x86 * c)
{
u32 xlvl;
^ permalink raw reply [flat|nested] 199+ messages in thread* [PATCH for review] [143/145] i386: mark cpu cache functions as __cpuinit
[not found] <20060810 935.775038000@suse.de>
` (141 preceding siblings ...)
2006-08-10 19:37 ` [PATCH for review] [142/145] i386: mark cpu identify functions as __cpuinit Andi Kleen
@ 2006-08-10 19:37 ` Andi Kleen
2006-08-10 19:37 ` [PATCH for review] [144/145] i386: Disallow kprobes on NMI handlers Andi Kleen
` (2 subsequent siblings)
145 siblings, 0 replies; 199+ messages in thread
From: Andi Kleen @ 2006-08-10 19:37 UTC (permalink / raw)
r
From: Magnus Damm <magnus@valinux.co.jp>
Mark i386-specific cpu cache functions as __cpuinit. They are all
only called from arch/i386/common.c:display_cache_info() that already is
marked as __cpuinit.
Signed-off-by: Magnus Damm <magnus@valinux.co.jp>
Signed-off-by: Andi Kleen <ak@suse.de>
---
arch/i386/kernel/cpu/amd.c | 2 +-
arch/i386/kernel/cpu/centaur.c | 2 +-
arch/i386/kernel/cpu/intel.c | 2 +-
3 files changed, 3 insertions(+), 3 deletions(-)
Index: linux/arch/i386/kernel/cpu/amd.c
===================================================================
--- linux.orig/arch/i386/kernel/cpu/amd.c
+++ linux/arch/i386/kernel/cpu/amd.c
@@ -246,7 +246,7 @@ static void __cpuinit init_amd(struct cp
num_cache_leaves = 3;
}
-static unsigned int amd_size_cache(struct cpuinfo_x86 * c, unsigned int size)
+static unsigned int __cpuinit amd_size_cache(struct cpuinfo_x86 * c, unsigned int size)
{
/* AMD errata T13 (order #21922) */
if ((c->x86 == 6)) {
Index: linux/arch/i386/kernel/cpu/centaur.c
===================================================================
--- linux.orig/arch/i386/kernel/cpu/centaur.c
+++ linux/arch/i386/kernel/cpu/centaur.c
@@ -442,7 +442,7 @@ static void __cpuinit init_centaur(struc
}
}
-static unsigned int centaur_size_cache(struct cpuinfo_x86 * c, unsigned int size)
+static unsigned int __cpuinit centaur_size_cache(struct cpuinfo_x86 * c, unsigned int size)
{
/* VIA C3 CPUs (670-68F) need further shifting. */
if ((c->x86 == 6) && ((c->x86_model == 7) || (c->x86_model == 8)))
Index: linux/arch/i386/kernel/cpu/intel.c
===================================================================
--- linux.orig/arch/i386/kernel/cpu/intel.c
+++ linux/arch/i386/kernel/cpu/intel.c
@@ -198,7 +198,7 @@ static void __cpuinit init_intel(struct
}
-static unsigned int intel_size_cache(struct cpuinfo_x86 * c, unsigned int size)
+static unsigned int __cpuinit intel_size_cache(struct cpuinfo_x86 * c, unsigned int size)
{
/* Intel PIII Tualatin. This comes in two flavours.
* One has 256kb of cache, the other 512. We have no way
^ permalink raw reply [flat|nested] 199+ messages in thread* [PATCH for review] [144/145] i386: Disallow kprobes on NMI handlers
[not found] <20060810 935.775038000@suse.de>
` (142 preceding siblings ...)
2006-08-10 19:37 ` [PATCH for review] [143/145] i386: mark cpu cache " Andi Kleen
@ 2006-08-10 19:37 ` Andi Kleen
2006-08-10 19:37 ` [PATCH for review] [145/145] " Andi Kleen
[not found] ` <20060810193518.394E413B90__40006.6926530146$1155241071$gmane$org@wotan.suse.de>
145 siblings, 0 replies; 199+ messages in thread
From: Andi Kleen @ 2006-08-10 19:37 UTC (permalink / raw)
r
From: Fernando Luis =?ISO-8859-1?Q?V=E1zquez?= Cao <fernando@oss.ntt.co.jp>
A kprobe executes IRET early and that could cause NMI recursion and stack
corruption.
Note: This problem was originally spotted by Andi Kleen. This patch
adds fixes not included in his original patch.
[AK: Jan Beulich originally discovered these classes of bugs]
Signed-off-by: Fernando Vazquez <fernando@intellilink.co.jp>
Signed-off-by: Andi Kleen <ak@suse.de>
---
---
arch/i386/kernel/mca.c | 8 +++++---
1 files changed, 5 insertions(+), 3 deletions(-)
Index: linux/arch/i386/kernel/mca.c
===================================================================
--- linux.orig/arch/i386/kernel/mca.c
+++ linux/arch/i386/kernel/mca.c
@@ -42,6 +42,7 @@
#include <linux/errno.h>
#include <linux/kernel.h>
#include <linux/mca.h>
+#include <linux/kprobes.h>
#include <asm/system.h>
#include <asm/io.h>
#include <linux/proc_fs.h>
@@ -414,7 +415,8 @@ subsys_initcall(mca_init);
/*--------------------------------------------------------------------*/
-static void mca_handle_nmi_device(struct mca_device *mca_dev, int check_flag)
+static __kprobes void
+mca_handle_nmi_device(struct mca_device *mca_dev, int check_flag)
{
int slot = mca_dev->slot;
@@ -444,7 +446,7 @@ static void mca_handle_nmi_device(struct
/*--------------------------------------------------------------------*/
-static int mca_handle_nmi_callback(struct device *dev, void *data)
+static int __kprobes mca_handle_nmi_callback(struct device *dev, void *data)
{
struct mca_device *mca_dev = to_mca_device(dev);
unsigned char pos5;
@@ -462,7 +464,7 @@ static int mca_handle_nmi_callback(struc
return 0;
}
-void mca_handle_nmi(void)
+void __kprobes mca_handle_nmi(void)
{
/* First try - scan the various adapters and see if a specific
* adapter was responsible for the error.
^ permalink raw reply [flat|nested] 199+ messages in thread* [PATCH for review] [145/145] i386: Disallow kprobes on NMI handlers
[not found] <20060810 935.775038000@suse.de>
` (143 preceding siblings ...)
2006-08-10 19:37 ` [PATCH for review] [144/145] i386: Disallow kprobes on NMI handlers Andi Kleen
@ 2006-08-10 19:37 ` Andi Kleen
[not found] ` <20060810193518.394E413B90__40006.6926530146$1155241071$gmane$org@wotan.suse.de>
145 siblings, 0 replies; 199+ messages in thread
From: Andi Kleen @ 2006-08-10 19:37 UTC (permalink / raw)
r
From: Fernando Luis =?ISO-8859-1?Q?V=E1zquez?= Cao <fernando@oss.ntt.co.jp>
A kprobe executes IRET early and that could cause NMI recursion and stack
corruption.
Note: This problem was originally spotted and solved by Andi Kleen in the
x86_64 architecture. This patch is an adaption of his patch for i386.
AK: Merged with current code which was a bit different.
AK: Removed printk in nmi handler that shouldn't be there in the first time
AK: Added missing include.
Signed-off-by: Fernando Vazquez <fernando@intellilink.co.jp>
Signed-off-by: Andi Kleen <ak@suse.de>
---
---
arch/i386/kernel/entry.S | 2 +-
arch/i386/kernel/nmi.c | 6 +++---
arch/i386/kernel/traps.c | 15 +++++++++------
3 files changed, 13 insertions(+), 10 deletions(-)
Index: linux/arch/i386/kernel/entry.S
===================================================================
--- linux.orig/arch/i386/kernel/entry.S
+++ linux/arch/i386/kernel/entry.S
@@ -725,7 +725,7 @@ debug_stack_correct:
* check whether we got an NMI on the debug path where the debug
* fault happened on the sysenter path.
*/
-ENTRY(nmi)
+KPROBE_ENTRY(nmi)
RING0_INT_FRAME
pushl %eax
CFI_ADJUST_CFA_OFFSET 4
Index: linux/arch/i386/kernel/nmi.c
===================================================================
--- linux.orig/arch/i386/kernel/nmi.c
+++ linux/arch/i386/kernel/nmi.c
@@ -22,6 +22,7 @@
#include <linux/sysctl.h>
#include <linux/percpu.h>
#include <linux/dmi.h>
+#include <linux/kprobes.h>
#include <asm/smp.h>
#include <asm/nmi.h>
@@ -882,7 +883,7 @@ EXPORT_SYMBOL(touch_nmi_watchdog);
extern void die_nmi(struct pt_regs *, const char *msg);
-int nmi_watchdog_tick (struct pt_regs * regs, unsigned reason)
+__kprobes int nmi_watchdog_tick(struct pt_regs * regs, unsigned reason)
{
/*
@@ -962,8 +963,7 @@ int nmi_watchdog_tick (struct pt_regs *
* This matches the old behaviour.
*/
rc = 1;
- } else
- printk(KERN_WARNING "Unknown enabled NMI hardware?!\n");
+ }
}
done:
return rc;
Index: linux/arch/i386/kernel/traps.c
===================================================================
--- linux.orig/arch/i386/kernel/traps.c
+++ linux/arch/i386/kernel/traps.c
@@ -680,7 +680,8 @@ gp_in_kernel:
}
}
-static void mem_parity_error(unsigned char reason, struct pt_regs * regs)
+static __kprobes void
+mem_parity_error(unsigned char reason, struct pt_regs * regs)
{
printk(KERN_EMERG "Uhhuh. NMI received for unknown reason %02x on "
"CPU %d.\n", reason, smp_processor_id());
@@ -695,7 +696,8 @@ static void mem_parity_error(unsigned ch
clear_mem_error(reason);
}
-static void io_check_error(unsigned char reason, struct pt_regs * regs)
+static __kprobes void
+io_check_error(unsigned char reason, struct pt_regs * regs)
{
unsigned long i;
@@ -711,7 +713,8 @@ static void io_check_error(unsigned char
outb(reason, 0x61);
}
-static void unknown_nmi_error(unsigned char reason, struct pt_regs * regs)
+static __kprobes void
+unknown_nmi_error(unsigned char reason, struct pt_regs * regs)
{
#ifdef CONFIG_MCA
/* Might actually be able to figure out what the guilty party
@@ -732,7 +735,7 @@ static void unknown_nmi_error(unsigned c
static DEFINE_SPINLOCK(nmi_print_lock);
-void die_nmi (struct pt_regs *regs, const char *msg)
+void __kprobes die_nmi(struct pt_regs *regs, const char *msg)
{
if (notify_die(DIE_NMIWATCHDOG, msg, regs, 0, 2, SIGINT) ==
NOTIFY_STOP)
@@ -764,7 +767,7 @@ void die_nmi (struct pt_regs *regs, cons
do_exit(SIGSEGV);
}
-static void default_do_nmi(struct pt_regs * regs)
+static __kprobes void default_do_nmi(struct pt_regs * regs)
{
unsigned char reason = 0;
@@ -802,7 +805,7 @@ static void default_do_nmi(struct pt_reg
reassert_nmi();
}
-fastcall void do_nmi(struct pt_regs * regs, long error_code)
+fastcall __kprobes void do_nmi(struct pt_regs * regs, long error_code)
{
int cpu;
^ permalink raw reply [flat|nested] 199+ messages in thread[parent not found: <20060810193518.394E413B90__40006.6926530146$1155241071$gmane$org@wotan.suse.de>]
* Re: [PATCH for review] [6/145] x86_64: Utilize performance counter reservation framework in oprofile
[not found] ` <20060810193518.394E413B90__40006.6926530146$1155241071$gmane$org@wotan.suse.de>
@ 2006-08-10 22:42 ` Oleg Verych
0 siblings, 0 replies; 199+ messages in thread
From: Oleg Verych @ 2006-08-10 22:42 UTC (permalink / raw)
To: linux-kernel
Andi Kleen:
> Index: linux/arch/i386/oprofile/nmi_int.c
> ===================================================================
> --- linux.orig/arch/i386/oprofile/nmi_int.c
> +++ linux/arch/i386/oprofile/nmi_int.c
> @@ -98,15 +98,19 @@ static void nmi_cpu_save_registers(struc
> unsigned int i;
>
> for (i = 0; i < nr_ctrs; ++i) {
> - rdmsr(counters[i].addr,
> - counters[i].saved.low,
> - counters[i].saved.high);
> + if (counters[i].addr){
need space ){
> + rdmsr(counters[i].addr,
> + counters[i].saved.low,
> + counters[i].saved.high);
> + }
> }
>
> for (i = 0; i < nr_ctrls; ++i) {
> - rdmsr(controls[i].addr,
> - controls[i].saved.low,
> - controls[i].saved.high);
> + if (controls[i].addr){
likewise
> + rdmsr(controls[i].addr,
> + controls[i].saved.low,
> + controls[i].saved.high);
> + }
> }
> }
>
> @@ -205,15 +209,19 @@ static void nmi_restore_registers(struct
> unsigned int i;
>
> for (i = 0; i < nr_ctrls; ++i) {
> - wrmsr(controls[i].addr,
> - controls[i].saved.low,
> - controls[i].saved.high);
> + if (controls[i].addr){
likewise
> + wrmsr(controls[i].addr,
> + controls[i].saved.low,
> + controls[i].saved.high);
> + }
> }
>
> for (i = 0; i < nr_ctrs; ++i) {
> - wrmsr(counters[i].addr,
> - counters[i].saved.low,
> - counters[i].saved.high);
> + if (counters[i].addr){
likewise
> + wrmsr(counters[i].addr,
> + counters[i].saved.low,
> + counters[i].saved.high);
> + }
> }
> }
>
> @@ -234,6 +242,7 @@ static void nmi_cpu_shutdown(void * dumm
> apic_write(APIC_LVTPC, saved_lvtpc[cpu]);
> apic_write(APIC_LVTERR, v);
> nmi_restore_registers(msrs);
> + model->shutdown(msrs);
> }
>
>
> @@ -284,6 +293,14 @@ static int nmi_create_files(struct super
> struct dentry * dir;
> char buf[4];
>
> + /* quick little hack to _not_ expose a counter if it is not
> + * available for use. This should protect userspace app.
> + * NOTE: assumes 1:1 mapping here (that counters are organized
> + * sequentially in their struct assignment).
> + */
> + if (unlikely(!avail_to_resrv_perfctr_nmi_bit(i)))
> + continue;
> +
> snprintf(buf, sizeof(buf), "%d", i);
> dir = oprofilefs_mkdir(sb, root, buf);
> oprofilefs_create_ulong(sb, dir, "enabled", &counter_config[i].enabled);
> Index: linux/arch/i386/oprofile/op_model_athlon.c
> ===================================================================
> --- linux.orig/arch/i386/oprofile/op_model_athlon.c
> +++ linux/arch/i386/oprofile/op_model_athlon.c
> @@ -21,10 +21,12 @@
> #define NUM_COUNTERS 4
> #define NUM_CONTROLS 4
>
> +#define CTR_IS_RESERVED(msrs,c) (msrs->counters[(c)].addr ? 1 : 0)
> #define CTR_READ(l,h,msrs,c) do {rdmsr(msrs->counters[(c)].addr, (l), (h));} while (0)
> #define CTR_WRITE(l,msrs,c) do {wrmsr(msrs->counters[(c)].addr, -(unsigned int)(l), -1);} while (0)
> #define CTR_OVERFLOWED(n) (!((n) & (1U<<31)))
>
> +#define CTRL_IS_RESERVED(msrs,c) (msrs->controls[(c)].addr ? 1 : 0)
> #define CTRL_READ(l,h,msrs,c) do {rdmsr(msrs->controls[(c)].addr, (l), (h));} while (0)
> #define CTRL_WRITE(l,h,msrs,c) do {wrmsr(msrs->controls[(c)].addr, (l), (h));} while (0)
> #define CTRL_SET_ACTIVE(n) (n |= (1<<22))
> @@ -40,15 +42,21 @@ static unsigned long reset_value[NUM_COU
>
> static void athlon_fill_in_addresses(struct op_msrs * const msrs)
> {
> - msrs->counters[0].addr = MSR_K7_PERFCTR0;
> - msrs->counters[1].addr = MSR_K7_PERFCTR1;
> - msrs->counters[2].addr = MSR_K7_PERFCTR2;
> - msrs->counters[3].addr = MSR_K7_PERFCTR3;
> -
> - msrs->controls[0].addr = MSR_K7_EVNTSEL0;
> - msrs->controls[1].addr = MSR_K7_EVNTSEL1;
> - msrs->controls[2].addr = MSR_K7_EVNTSEL2;
> - msrs->controls[3].addr = MSR_K7_EVNTSEL3;
> + int i;
> +
> + for (i=0; i < NUM_COUNTERS; i++) {
> + if (reserve_perfctr_nmi(MSR_K7_PERFCTR0 + i))
> + msrs->counters[i].addr = MSR_K7_PERFCTR0 + i;
> + else
> + msrs->counters[i].addr = 0;
> + }
> +
> + for (i=0; i < NUM_CONTROLS; i++) {
> + if (reserve_evntsel_nmi(MSR_K7_EVNTSEL0 + i))
> + msrs->controls[i].addr = MSR_K7_EVNTSEL0 + i;
> + else
> + msrs->controls[i].addr = 0;
> + }
> }
>
>
> @@ -59,19 +67,23 @@ static void athlon_setup_ctrs(struct op_
>
> /* clear all counters */
> for (i = 0 ; i < NUM_CONTROLS; ++i) {
> + if (unlikely(!CTRL_IS_RESERVED(msrs,i)))
> + continue;
> CTRL_READ(low, high, msrs, i);
> CTRL_CLEAR(low);
> CTRL_WRITE(low, high, msrs, i);
> }
> -
> +
> /* avoid a false detection of ctr overflows in NMI handler */
> for (i = 0; i < NUM_COUNTERS; ++i) {
> + if (unlikely(!CTR_IS_RESERVED(msrs,i)))
> + continue;
> CTR_WRITE(1, msrs, i);
> }
>
> /* enable active counters */
> for (i = 0; i < NUM_COUNTERS; ++i) {
> - if (counter_config[i].enabled) {
> + if ((counter_config[i].enabled) && (CTR_IS_RESERVED(msrs,i))) {
> reset_value[i] = counter_config[i].count;
>
> CTR_WRITE(counter_config[i].count, msrs, i);
> @@ -98,6 +110,8 @@ static int athlon_check_ctrs(struct pt_r
> int i;
>
> for (i = 0 ; i < NUM_COUNTERS; ++i) {
> + if (!reset_value[i])
> + continue;
> CTR_READ(low, high, msrs, i);
> if (CTR_OVERFLOWED(low)) {
> oprofile_add_sample(regs, i);
> @@ -132,12 +146,27 @@ static void athlon_stop(struct op_msrs c
> /* Subtle: stop on all counters to avoid race with
> * setting our pm callback */
> for (i = 0 ; i < NUM_COUNTERS ; ++i) {
> + if (!reset_value[i])
> + continue;
> CTRL_READ(low, high, msrs, i);
> CTRL_SET_INACTIVE(low);
> CTRL_WRITE(low, high, msrs, i);
> }
> }
>
> +static void athlon_shutdown(struct op_msrs const * const msrs)
> +{
> + int i;
> +
> + for (i = 0 ; i < NUM_COUNTERS ; ++i) {
> + if (CTR_IS_RESERVED(msrs,i))
> + release_perfctr_nmi(MSR_K7_PERFCTR0 + i);
> + }
> + for (i = 0 ; i < NUM_CONTROLS ; ++i) {
> + if (CTRL_IS_RESERVED(msrs,i))
> + release_evntsel_nmi(MSR_K7_EVNTSEL0 + i);
> + }
> +}
>
> struct op_x86_model_spec const op_athlon_spec = {
> .num_counters = NUM_COUNTERS,
> @@ -146,5 +175,6 @@ struct op_x86_model_spec const op_athlon
> .setup_ctrs = &athlon_setup_ctrs,
> .check_ctrs = &athlon_check_ctrs,
> .start = &athlon_start,
> - .stop = &athlon_stop
> + .stop = &athlon_stop,
> + .shutdown = &athlon_shutdown
> };
> Index: linux/arch/i386/oprofile/op_model_p4.c
> ===================================================================
> --- linux.orig/arch/i386/oprofile/op_model_p4.c
> +++ linux/arch/i386/oprofile/op_model_p4.c
> @@ -32,7 +32,7 @@
> #define NUM_CONTROLS_HT2 (NUM_ESCRS_HT2 + NUM_CCCRS_HT2)
>
> static unsigned int num_counters = NUM_COUNTERS_NON_HT;
> -
> +static unsigned int num_controls = NUM_CONTROLS_NON_HT;
>
> /* this has to be checked dynamically since the
> hyper-threadedness of a chip is discovered at
> @@ -40,8 +40,10 @@ static unsigned int num_counters = NUM_C
> static inline void setup_num_counters(void)
> {
> #ifdef CONFIG_SMP
> - if (smp_num_siblings == 2)
> + if (smp_num_siblings == 2){
likewise
> num_counters = NUM_COUNTERS_HT2;
> + num_controls = NUM_CONTROLS_HT2;
> + }
> #endif
> }
>
> @@ -97,15 +99,6 @@ static struct p4_counter_binding p4_coun
>
> #define NUM_UNUSED_CCCRS NUM_CCCRS_NON_HT - NUM_COUNTERS_NON_HT
maybe (NUM_CCCRS_NON_HT - NUM_COUNTERS_NON_HT) will be better
>
> -/* All cccr we don't use. */
> -static int p4_unused_cccr[NUM_UNUSED_CCCRS] = {
> - MSR_P4_BPU_CCCR1, MSR_P4_BPU_CCCR3,
> - MSR_P4_MS_CCCR1, MSR_P4_MS_CCCR3,
> - MSR_P4_FLAME_CCCR1, MSR_P4_FLAME_CCCR3,
> - MSR_P4_IQ_CCCR0, MSR_P4_IQ_CCCR1,
> - MSR_P4_IQ_CCCR2, MSR_P4_IQ_CCCR3
> -};
> -
> /* p4 event codes in libop/op_event.h are indices into this table. */
>
> static struct p4_event_binding p4_events[NUM_EVENTS] = {
> @@ -372,6 +365,8 @@ static struct p4_event_binding p4_events
> #define CCCR_OVF_P(cccr) ((cccr) & (1U<<31))
> #define CCCR_CLEAR_OVF(cccr) ((cccr) &= (~(1U<<31)))
>
> +#define CTRL_IS_RESERVED(msrs,c) (msrs->controls[(c)].addr ? 1 : 0)
> +#define CTR_IS_RESERVED(msrs,c) (msrs->counters[(c)].addr ? 1 : 0)
> #define CTR_READ(l,h,i) do {rdmsr(p4_counters[(i)].counter_address, (l), (h));} while (0)
> #define CTR_WRITE(l,i) do {wrmsr(p4_counters[(i)].counter_address, -(u32)(l), -1);} while (0)
> #define CTR_OVERFLOW_P(ctr) (!((ctr) & 0x80000000))
> @@ -401,29 +396,34 @@ static unsigned long reset_value[NUM_COU
> static void p4_fill_in_addresses(struct op_msrs * const msrs)
> {
> unsigned int i;
> - unsigned int addr, stag;
> + unsigned int addr, cccraddr, stag;
>
> setup_num_counters();
> stag = get_stagger();
>
> - /* the counter registers we pay attention to */
> + /* initialize some registers */
> for (i = 0; i < num_counters; ++i) {
> - msrs->counters[i].addr =
> - p4_counters[VIRT_CTR(stag, i)].counter_address;
> + msrs->counters[i].addr = 0;
> }
> -
> - /* FIXME: bad feeling, we don't save the 10 counters we don't use. */
> -
> - /* 18 CCCR registers */
> - for (i = 0, addr = MSR_P4_BPU_CCCR0 + stag;
> - addr <= MSR_P4_IQ_CCCR5; ++i, addr += addr_increment()) {
> - msrs->controls[i].addr = addr;
> + for (i = 0; i < num_controls; ++i) {
> + msrs->controls[i].addr = 0;
> }
>
> + /* the counter & cccr registers we pay attention to */
> + for (i = 0; i < num_counters; ++i) {
> + addr = p4_counters[VIRT_CTR(stag, i)].counter_address;
> + cccraddr = p4_counters[VIRT_CTR(stag, i)].cccr_address;
> + if (reserve_perfctr_nmi(addr)){
){
> + msrs->counters[i].addr = addr;
> + msrs->controls[i].addr = cccraddr;
> + }
> + }
> +
> /* 43 ESCR registers in three or four discontiguous group */
> for (addr = MSR_P4_BSU_ESCR0 + stag;
> addr < MSR_P4_IQ_ESCR0; ++i, addr += addr_increment()) {
> - msrs->controls[i].addr = addr;
> + if (reserve_evntsel_nmi(addr))
> + msrs->controls[i].addr = addr;
> }
>
> /* no IQ_ESCR0/1 on some models, we save a seconde time BSU_ESCR0/1
> @@ -431,47 +431,57 @@ static void p4_fill_in_addresses(struct
> if (boot_cpu_data.x86_model >= 0x3) {
> for (addr = MSR_P4_BSU_ESCR0 + stag;
> addr <= MSR_P4_BSU_ESCR1; ++i, addr += addr_increment()) {
> - msrs->controls[i].addr = addr;
> + if (reserve_evntsel_nmi(addr))
> + msrs->controls[i].addr = addr;
anyways 'if' opens braces, maybe move init of 'addr' before 'for'
> }
> } else {
> for (addr = MSR_P4_IQ_ESCR0 + stag;
> addr <= MSR_P4_IQ_ESCR1; ++i, addr += addr_increment()) {
> - msrs->controls[i].addr = addr;
> + if (reserve_evntsel_nmi(addr))
> + msrs->controls[i].addr = addr;
> }
> }
>
> for (addr = MSR_P4_RAT_ESCR0 + stag;
> addr <= MSR_P4_SSU_ESCR0; ++i, addr += addr_increment()) {
> - msrs->controls[i].addr = addr;
> + if (reserve_evntsel_nmi(addr))
> + msrs->controls[i].addr = addr;
> }
>
> for (addr = MSR_P4_MS_ESCR0 + stag;
> addr <= MSR_P4_TC_ESCR1; ++i, addr += addr_increment()) {
> - msrs->controls[i].addr = addr;
> + if (reserve_evntsel_nmi(addr))
> + msrs->controls[i].addr = addr;
> }
>
> for (addr = MSR_P4_IX_ESCR0 + stag;
> addr <= MSR_P4_CRU_ESCR3; ++i, addr += addr_increment()) {
> - msrs->controls[i].addr = addr;
> + if (reserve_evntsel_nmi(addr))
> + msrs->controls[i].addr = addr;
> }
>
> /* there are 2 remaining non-contiguously located ESCRs */
>
> if (num_counters == NUM_COUNTERS_NON_HT) {
> /* standard non-HT CPUs handle both remaining ESCRs*/
> - msrs->controls[i++].addr = MSR_P4_CRU_ESCR5;
> - msrs->controls[i++].addr = MSR_P4_CRU_ESCR4;
> + if (reserve_evntsel_nmi(MSR_P4_CRU_ESCR5))
> + msrs->controls[i++].addr = MSR_P4_CRU_ESCR5;
> + if (reserve_evntsel_nmi(MSR_P4_CRU_ESCR4))
> + msrs->controls[i++].addr = MSR_P4_CRU_ESCR4;
>
> } else if (stag == 0) {
if (!stag)
> /* HT CPUs give the first remainder to the even thread, as
> the 32nd control register */
> - msrs->controls[i++].addr = MSR_P4_CRU_ESCR4;
> + if (reserve_evntsel_nmi(MSR_P4_CRU_ESCR4))
> + msrs->controls[i++].addr = MSR_P4_CRU_ESCR4;
>
> } else {
> /* and two copies of the second to the odd thread,
> for the 22st and 23nd control registers */
> - msrs->controls[i++].addr = MSR_P4_CRU_ESCR5;
> - msrs->controls[i++].addr = MSR_P4_CRU_ESCR5;
> + if (reserve_evntsel_nmi(MSR_P4_CRU_ESCR5)) {
> + msrs->controls[i++].addr = MSR_P4_CRU_ESCR5;
> + msrs->controls[i++].addr = MSR_P4_CRU_ESCR5;
> + }
> }
> }
>
> @@ -544,7 +554,6 @@ static void p4_setup_ctrs(struct op_msrs
> {
> unsigned int i;
> unsigned int low, high;
> - unsigned int addr;
> unsigned int stag;
>
> stag = get_stagger();
> @@ -557,59 +566,24 @@ static void p4_setup_ctrs(struct op_msrs
>
> /* clear the cccrs we will use */
> for (i = 0 ; i < num_counters ; i++) {
> + if (unlikely(!CTRL_IS_RESERVED(msrs,i)))
> + continue;
> rdmsr(p4_counters[VIRT_CTR(stag, i)].cccr_address, low, high);
> CCCR_CLEAR(low);
> CCCR_SET_REQUIRED_BITS(low);
> wrmsr(p4_counters[VIRT_CTR(stag, i)].cccr_address, low, high);
> }
>
> - /* clear cccrs outside our concern */
> - for (i = stag ; i < NUM_UNUSED_CCCRS ; i += addr_increment()) {
> - rdmsr(p4_unused_cccr[i], low, high);
> - CCCR_CLEAR(low);
> - CCCR_SET_REQUIRED_BITS(low);
> - wrmsr(p4_unused_cccr[i], low, high);
> - }
> -
> /* clear all escrs (including those outside our concern) */
> - for (addr = MSR_P4_BSU_ESCR0 + stag;
> - addr < MSR_P4_IQ_ESCR0; addr += addr_increment()) {
> - wrmsr(addr, 0, 0);
> - }
> -
> - /* On older models clear also MSR_P4_IQ_ESCR0/1 */
> - if (boot_cpu_data.x86_model < 0x3) {
> - wrmsr(MSR_P4_IQ_ESCR0, 0, 0);
> - wrmsr(MSR_P4_IQ_ESCR1, 0, 0);
> - }
> -
> - for (addr = MSR_P4_RAT_ESCR0 + stag;
> - addr <= MSR_P4_SSU_ESCR0; ++i, addr += addr_increment()) {
> - wrmsr(addr, 0, 0);
> - }
> -
> - for (addr = MSR_P4_MS_ESCR0 + stag;
> - addr <= MSR_P4_TC_ESCR1; addr += addr_increment()){
> - wrmsr(addr, 0, 0);
> - }
> -
> - for (addr = MSR_P4_IX_ESCR0 + stag;
> - addr <= MSR_P4_CRU_ESCR3; addr += addr_increment()){
> - wrmsr(addr, 0, 0);
> + for (i = num_counters; i < num_controls; i++) {
> + if (unlikely(!CTRL_IS_RESERVED(msrs,i)))
> + continue;
> + wrmsr(msrs->controls[i].addr, 0, 0);
> }
>
> - if (num_counters == NUM_COUNTERS_NON_HT) {
> - wrmsr(MSR_P4_CRU_ESCR4, 0, 0);
> - wrmsr(MSR_P4_CRU_ESCR5, 0, 0);
> - } else if (stag == 0) {
> - wrmsr(MSR_P4_CRU_ESCR4, 0, 0);
> - } else {
> - wrmsr(MSR_P4_CRU_ESCR5, 0, 0);
> - }
> -
> /* setup all counters */
> for (i = 0 ; i < num_counters ; ++i) {
> - if (counter_config[i].enabled) {
> + if ((counter_config[i].enabled) && (CTRL_IS_RESERVED(msrs,i))) {
> reset_value[i] = counter_config[i].count;
> pmc_setup_one_p4_counter(i);
> CTR_WRITE(counter_config[i].count, VIRT_CTR(stag, i));
> @@ -696,12 +670,32 @@ static void p4_stop(struct op_msrs const
> stag = get_stagger();
>
> for (i = 0; i < num_counters; ++i) {
> + if (!reset_value[i])
> + continue;
> CCCR_READ(low, high, VIRT_CTR(stag, i));
> CCCR_SET_DISABLE(low);
> CCCR_WRITE(low, high, VIRT_CTR(stag, i));
> }
> }
>
> +static void p4_shutdown(struct op_msrs const * const msrs)
> +{
> + int i;
> +
> + for (i = 0 ; i < num_counters ; ++i) {
> + if (CTR_IS_RESERVED(msrs,i))
> + release_perfctr_nmi(msrs->counters[i].addr);
> + }
> + /* some of the control registers are specially reserved in
> + * conjunction with the counter registers (hence the starting offset).
> + * This saves a few bits.
> + */
> + for (i = num_counters ; i < num_controls ; ++i) {
> + if (CTRL_IS_RESERVED(msrs,i))
> + release_evntsel_nmi(msrs->controls[i].addr);
> + }
> +}
> +
>
> #ifdef CONFIG_SMP
> struct op_x86_model_spec const op_p4_ht2_spec = {
> @@ -711,7 +705,8 @@ struct op_x86_model_spec const op_p4_ht2
> .setup_ctrs = &p4_setup_ctrs,
> .check_ctrs = &p4_check_ctrs,
> .start = &p4_start,
> - .stop = &p4_stop
> + .stop = &p4_stop,
> + .shutdown = &p4_shutdown
> };
> #endif
>
> @@ -722,5 +717,6 @@ struct op_x86_model_spec const op_p4_spe
> .setup_ctrs = &p4_setup_ctrs,
> .check_ctrs = &p4_check_ctrs,
> .start = &p4_start,
> - .stop = &p4_stop
> + .stop = &p4_stop,
> + .shutdown = &p4_shutdown
> };
> Index: linux/arch/i386/oprofile/op_model_ppro.c
> ===================================================================
> --- linux.orig/arch/i386/oprofile/op_model_ppro.c
> +++ linux/arch/i386/oprofile/op_model_ppro.c
> @@ -22,10 +22,12 @@
> #define NUM_COUNTERS 2
> #define NUM_CONTROLS 2
>
> +#define CTR_IS_RESERVED(msrs,c) (msrs->counters[(c)].addr ? 1 : 0)
> #define CTR_READ(l,h,msrs,c) do {rdmsr(msrs->counters[(c)].addr, (l), (h));} while (0)
> #define CTR_WRITE(l,msrs,c) do {wrmsr(msrs->counters[(c)].addr, -(u32)(l), -1);} while (0)
> #define CTR_OVERFLOWED(n) (!((n) & (1U<<31)))
>
> +#define CTRL_IS_RESERVED(msrs,c) (msrs->controls[(c)].addr ? 1 : 0)
> #define CTRL_READ(l,h,msrs,c) do {rdmsr((msrs->controls[(c)].addr), (l), (h));} while (0)
> #define CTRL_WRITE(l,h,msrs,c) do {wrmsr((msrs->controls[(c)].addr), (l), (h));} while (0)
> #define CTRL_SET_ACTIVE(n) (n |= (1<<22))
> @@ -41,11 +43,21 @@ static unsigned long reset_value[NUM_COU
>
> static void ppro_fill_in_addresses(struct op_msrs * const msrs)
> {
> - msrs->counters[0].addr = MSR_P6_PERFCTR0;
> - msrs->counters[1].addr = MSR_P6_PERFCTR1;
> + int i;
> +
> + for (i=0; i < NUM_COUNTERS; i++) {
> + if (reserve_perfctr_nmi(MSR_P6_PERFCTR0 + i))
> + msrs->counters[i].addr = MSR_P6_PERFCTR0 + i;
> + else
> + msrs->counters[i].addr = 0;
> + }
>
> - msrs->controls[0].addr = MSR_P6_EVNTSEL0;
> - msrs->controls[1].addr = MSR_P6_EVNTSEL1;
> + for (i=0; i < NUM_CONTROLS; i++) {
> + if (reserve_evntsel_nmi(MSR_P6_EVNTSEL0 + i))
> + msrs->controls[i].addr = MSR_P6_EVNTSEL0 + i;
> + else
> + msrs->controls[i].addr = 0;
> + }
> }
>
>
> @@ -56,6 +68,8 @@ static void ppro_setup_ctrs(struct op_ms
>
> /* clear all counters */
> for (i = 0 ; i < NUM_CONTROLS; ++i) {
> + if (unlikely(!CTRL_IS_RESERVED(msrs,i)))
> + continue;
> CTRL_READ(low, high, msrs, i);
> CTRL_CLEAR(low);
> CTRL_WRITE(low, high, msrs, i);
> @@ -63,12 +77,14 @@ static void ppro_setup_ctrs(struct op_ms
>
> /* avoid a false detection of ctr overflows in NMI handler */
> for (i = 0; i < NUM_COUNTERS; ++i) {
> + if (unlikely(!CTR_IS_RESERVED(msrs,i)))
> + continue;
> CTR_WRITE(1, msrs, i);
> }
>
> /* enable active counters */
> for (i = 0; i < NUM_COUNTERS; ++i) {
> - if (counter_config[i].enabled) {
> + if ((counter_config[i].enabled) && (CTR_IS_RESERVED(msrs,i))) {
> reset_value[i] = counter_config[i].count;
>
> CTR_WRITE(counter_config[i].count, msrs, i);
> @@ -81,6 +97,8 @@ static void ppro_setup_ctrs(struct op_ms
> CTRL_SET_UM(low, counter_config[i].unit_mask);
> CTRL_SET_EVENT(low, counter_config[i].event);
> CTRL_WRITE(low, high, msrs, i);
> + } else {
> + reset_value[i] = 0;
> }
> }
> }
> @@ -93,6 +111,8 @@ static int ppro_check_ctrs(struct pt_reg
> int i;
>
> for (i = 0 ; i < NUM_COUNTERS; ++i) {
> + if (!reset_value[i])
> + continue;
> CTR_READ(low, high, msrs, i);
> if (CTR_OVERFLOWED(low)) {
> oprofile_add_sample(regs, i);
> @@ -118,18 +138,38 @@ static int ppro_check_ctrs(struct pt_reg
> static void ppro_start(struct op_msrs const * const msrs)
> {
> unsigned int low,high;
> - CTRL_READ(low, high, msrs, 0);
> - CTRL_SET_ACTIVE(low);
> - CTRL_WRITE(low, high, msrs, 0);
> +
> + if (reset_value[0]) {
> + CTRL_READ(low, high, msrs, 0);
> + CTRL_SET_ACTIVE(low);
> + CTRL_WRITE(low, high, msrs, 0);
> + }
> }
>
>
> static void ppro_stop(struct op_msrs const * const msrs)
> {
> unsigned int low,high;
> - CTRL_READ(low, high, msrs, 0);
> - CTRL_SET_INACTIVE(low);
> - CTRL_WRITE(low, high, msrs, 0);
> +
> + if (reset_value[0]) {
> + CTRL_READ(low, high, msrs, 0);
> + CTRL_SET_INACTIVE(low);
> + CTRL_WRITE(low, high, msrs, 0);
> + }
> +}
> +
> +static void ppro_shutdown(struct op_msrs const * const msrs)
> +{
> + int i;
> +
> + for (i = 0 ; i < NUM_COUNTERS ; ++i) {
> + if (CTR_IS_RESERVED(msrs,i))
> + release_perfctr_nmi(MSR_P6_PERFCTR0 + i);
> + }
> + for (i = 0 ; i < NUM_CONTROLS ; ++i) {
> + if (CTRL_IS_RESERVED(msrs,i))
> + release_evntsel_nmi(MSR_P6_EVNTSEL0 + i);
> + }
> }
>
>
> @@ -140,5 +180,6 @@ struct op_x86_model_spec const op_ppro_s
> .setup_ctrs = &ppro_setup_ctrs,
> .check_ctrs = &ppro_check_ctrs,
> .start = &ppro_start,
> - .stop = &ppro_stop
> + .stop = &ppro_stop,
> + .shutdown = &ppro_shutdown
> };
> Index: linux/arch/i386/oprofile/op_x86_model.h
> ===================================================================
> --- linux.orig/arch/i386/oprofile/op_x86_model.h
> +++ linux/arch/i386/oprofile/op_x86_model.h
> @@ -40,6 +40,7 @@ struct op_x86_model_spec {
> struct op_msrs const * const msrs);
> void (*start)(struct op_msrs const * const msrs);
> void (*stop)(struct op_msrs const * const msrs);
> + void (*shutdown)(struct op_msrs const * const msrs);
> };
>
> extern struct op_x86_model_spec const op_ppro_spec;
i hope code works the way it should ;D
---
-o--=O`C
#oo'L O
<___=E M
^ permalink raw reply [flat|nested] 199+ messages in thread