mirror of
https://github.com/torvalds/linux.git
synced 2025-04-12 06:49:52 +00:00
ASoC: tas2764: Random patches from the Asahi Linux
Merge series from broonie@kernel.org: This is a random subset of the patches for the tas2764 driver that I found in the Asahi Linux tree which seemed to be clear fixes and improvements which apply easily to mainline without much effort, there's a bunch more work on the driver that should also be applicable. I've only build tested this.
This commit is contained in:
commit
0770b7cc09
1
.mailmap
1
.mailmap
@ -376,6 +376,7 @@ Juha Yrjola <juha.yrjola@solidboot.com>
|
||||
Julien Thierry <julien.thierry.kdev@gmail.com> <julien.thierry@arm.com>
|
||||
Iskren Chernev <me@iskren.info> <iskren.chernev@gmail.com>
|
||||
Kalle Valo <kvalo@kernel.org> <kvalo@codeaurora.org>
|
||||
Kalle Valo <kvalo@kernel.org> <quic_kvalo@quicinc.com>
|
||||
Kalyan Thota <quic_kalyant@quicinc.com> <kalyan_t@codeaurora.org>
|
||||
Karthikeyan Periyasamy <quic_periyasa@quicinc.com> <periyasa@codeaurora.org>
|
||||
Kathiravan T <quic_kathirav@quicinc.com> <kathirav@codeaurora.org>
|
||||
|
6
CREDITS
6
CREDITS
@ -2515,11 +2515,9 @@ D: SLS distribution
|
||||
D: Initial implementation of VC's, pty's and select()
|
||||
|
||||
N: Pavel Machek
|
||||
E: pavel@ucw.cz
|
||||
E: pavel@kernel.org
|
||||
P: 4096R/92DFCE96 4FA7 9EEF FCD4 C44F C585 B8C7 C060 2241 92DF CE96
|
||||
D: Softcursor for vga, hypertech cdrom support, vcsa bugfix, nbd,
|
||||
D: sun4/330 port, capabilities for elf, speedup for rm on ext2, USB,
|
||||
D: work on suspend-to-ram/disk, killing duplicates from ioctl32,
|
||||
D: NBD, Sun4/330 port, USB, work on suspend-to-ram/disk,
|
||||
D: Altera SoCFPGA and Nokia N900 support.
|
||||
S: Czech Republic
|
||||
|
||||
|
@ -37,7 +37,7 @@ intended to be exhaustive.
|
||||
shadow stacks rather than GCS.
|
||||
|
||||
* Support for GCS is reported to userspace via HWCAP_GCS in the aux vector
|
||||
AT_HWCAP2 entry.
|
||||
AT_HWCAP entry.
|
||||
|
||||
* GCS is enabled per thread. While there is support for disabling GCS
|
||||
at runtime this should be done with great care.
|
||||
|
@ -8,6 +8,7 @@ title: Qualcomm Graphics Clock & Reset Controller
|
||||
|
||||
maintainers:
|
||||
- Taniya Das <quic_tdas@quicinc.com>
|
||||
- Imran Shaik <quic_imrashai@quicinc.com>
|
||||
|
||||
description: |
|
||||
Qualcomm graphics clock control module provides the clocks, resets and power
|
||||
@ -23,10 +24,12 @@ description: |
|
||||
include/dt-bindings/clock/qcom,gpucc-sm8150.h
|
||||
include/dt-bindings/clock/qcom,gpucc-sm8250.h
|
||||
include/dt-bindings/clock/qcom,gpucc-sm8350.h
|
||||
include/dt-bindings/clock/qcom,qcs8300-gpucc.h
|
||||
|
||||
properties:
|
||||
compatible:
|
||||
enum:
|
||||
- qcom,qcs8300-gpucc
|
||||
- qcom,sdm845-gpucc
|
||||
- qcom,sa8775p-gpucc
|
||||
- qcom,sc7180-gpucc
|
||||
|
@ -8,16 +8,20 @@ title: Qualcomm Camera Clock & Reset Controller on SA8775P
|
||||
|
||||
maintainers:
|
||||
- Taniya Das <quic_tdas@quicinc.com>
|
||||
- Imran Shaik <quic_imrashai@quicinc.com>
|
||||
|
||||
description: |
|
||||
Qualcomm camera clock control module provides the clocks, resets and power
|
||||
domains on SA8775p.
|
||||
|
||||
See also: include/dt-bindings/clock/qcom,sa8775p-camcc.h
|
||||
See also:
|
||||
include/dt-bindings/clock/qcom,qcs8300-camcc.h
|
||||
include/dt-bindings/clock/qcom,sa8775p-camcc.h
|
||||
|
||||
properties:
|
||||
compatible:
|
||||
enum:
|
||||
- qcom,qcs8300-camcc
|
||||
- qcom,sa8775p-camcc
|
||||
|
||||
clocks:
|
||||
|
@ -18,6 +18,7 @@ description: |
|
||||
properties:
|
||||
compatible:
|
||||
enum:
|
||||
- qcom,qcs8300-videocc
|
||||
- qcom,sa8775p-videocc
|
||||
|
||||
clocks:
|
||||
|
@ -0,0 +1,29 @@
|
||||
# SPDX-License-Identifier: GPL-2.0-only OR BSD-2-Clause
|
||||
%YAML 1.2
|
||||
---
|
||||
$id: http://devicetree.org/schemas/display/panel/powertip,hx8238a.yaml#
|
||||
$schema: http://devicetree.org/meta-schemas/core.yaml#
|
||||
|
||||
title: Powertip Electronic Technology Co. 320 x 240 LCD panel
|
||||
|
||||
maintainers:
|
||||
- Lukasz Majewski <lukma@denx.de>
|
||||
|
||||
allOf:
|
||||
- $ref: panel-dpi.yaml#
|
||||
|
||||
properties:
|
||||
compatible:
|
||||
items:
|
||||
- const: powertip,hx8238a
|
||||
- {} # panel-dpi, but not listed here to avoid false select
|
||||
|
||||
height-mm: true
|
||||
panel-timing: true
|
||||
port: true
|
||||
power-supply: true
|
||||
width-mm: true
|
||||
|
||||
additionalProperties: false
|
||||
|
||||
...
|
@ -0,0 +1,29 @@
|
||||
# SPDX-License-Identifier: GPL-2.0-only OR BSD-2-Clause
|
||||
%YAML 1.2
|
||||
---
|
||||
$id: http://devicetree.org/schemas/display/panel/powertip,st7272.yaml#
|
||||
$schema: http://devicetree.org/meta-schemas/core.yaml#
|
||||
|
||||
title: Powertip Electronic Technology Co. 320 x 240 LCD panel
|
||||
|
||||
maintainers:
|
||||
- Lukasz Majewski <lukma@denx.de>
|
||||
|
||||
allOf:
|
||||
- $ref: panel-dpi.yaml#
|
||||
|
||||
properties:
|
||||
compatible:
|
||||
items:
|
||||
- const: powertip,st7272
|
||||
- {} # panel-dpi, but not listed here to avoid false select
|
||||
|
||||
height-mm: true
|
||||
panel-timing: true
|
||||
port: true
|
||||
power-supply: true
|
||||
width-mm: true
|
||||
|
||||
additionalProperties: false
|
||||
|
||||
...
|
@ -23,7 +23,7 @@ properties:
|
||||
compatible:
|
||||
enum:
|
||||
- ti,am625-dss
|
||||
- ti,am62a7,dss
|
||||
- ti,am62a7-dss
|
||||
- ti,am65x-dss
|
||||
|
||||
reg:
|
||||
|
@ -14,9 +14,8 @@ allOf:
|
||||
|
||||
description: |
|
||||
The Microchip LAN966x outband interrupt controller (OIC) maps the internal
|
||||
interrupt sources of the LAN966x device to an external interrupt.
|
||||
When the LAN966x device is used as a PCI device, the external interrupt is
|
||||
routed to the PCI interrupt.
|
||||
interrupt sources of the LAN966x device to a PCI interrupt when the LAN966x
|
||||
device is used as a PCI device.
|
||||
|
||||
properties:
|
||||
compatible:
|
||||
|
@ -7,7 +7,6 @@ $schema: http://devicetree.org/meta-schemas/core.yaml#
|
||||
title: Qualcomm Technologies ath10k wireless devices
|
||||
|
||||
maintainers:
|
||||
- Kalle Valo <kvalo@kernel.org>
|
||||
- Jeff Johnson <jjohnson@kernel.org>
|
||||
|
||||
description:
|
||||
|
@ -8,7 +8,6 @@ $schema: http://devicetree.org/meta-schemas/core.yaml#
|
||||
title: Qualcomm Technologies ath11k wireless devices (PCIe)
|
||||
|
||||
maintainers:
|
||||
- Kalle Valo <kvalo@kernel.org>
|
||||
- Jeff Johnson <jjohnson@kernel.org>
|
||||
|
||||
description: |
|
||||
|
@ -8,7 +8,6 @@ $schema: http://devicetree.org/meta-schemas/core.yaml#
|
||||
title: Qualcomm Technologies ath11k wireless devices
|
||||
|
||||
maintainers:
|
||||
- Kalle Valo <kvalo@kernel.org>
|
||||
- Jeff Johnson <jjohnson@kernel.org>
|
||||
|
||||
description: |
|
||||
|
@ -9,7 +9,6 @@ title: Qualcomm Technologies ath12k wireless devices (PCIe) with WSI interface
|
||||
|
||||
maintainers:
|
||||
- Jeff Johnson <jjohnson@kernel.org>
|
||||
- Kalle Valo <kvalo@kernel.org>
|
||||
|
||||
description: |
|
||||
Qualcomm Technologies IEEE 802.11be PCIe devices with WSI interface.
|
||||
|
@ -9,7 +9,6 @@ title: Qualcomm Technologies ath12k wireless devices (PCIe)
|
||||
|
||||
maintainers:
|
||||
- Jeff Johnson <quic_jjohnson@quicinc.com>
|
||||
- Kalle Valo <kvalo@kernel.org>
|
||||
|
||||
description:
|
||||
Qualcomm Technologies IEEE 802.11be PCIe devices.
|
||||
|
@ -36,6 +36,7 @@ properties:
|
||||
- qcom,qcs404-qfprom
|
||||
- qcom,qcs615-qfprom
|
||||
- qcom,qcs8300-qfprom
|
||||
- qcom,sar2130p-qfprom
|
||||
- qcom,sc7180-qfprom
|
||||
- qcom,sc7280-qfprom
|
||||
- qcom,sc8280xp-qfprom
|
||||
|
@ -22,7 +22,7 @@ description:
|
||||
Each sub-node is identified using the node's name, with valid values listed
|
||||
for each of the pmics below.
|
||||
|
||||
For mp5496, s1, s2
|
||||
For mp5496, s1, s2, l2, l5
|
||||
|
||||
For pm2250, s1, s2, s3, s4, l1, l2, l3, l4, l5, l6, l7, l8, l9, l10, l11,
|
||||
l12, l13, l14, l15, l16, l17, l18, l19, l20, l21, l22
|
||||
|
@ -41,6 +41,12 @@ Device Drivers Base
|
||||
.. kernel-doc:: drivers/base/class.c
|
||||
:export:
|
||||
|
||||
.. kernel-doc:: include/linux/device/faux.h
|
||||
:internal:
|
||||
|
||||
.. kernel-doc:: drivers/base/faux.c
|
||||
:export:
|
||||
|
||||
.. kernel-doc:: drivers/base/node.c
|
||||
:internal:
|
||||
|
||||
|
98
Documentation/filesystems/bcachefs/SubmittingPatches.rst
Normal file
98
Documentation/filesystems/bcachefs/SubmittingPatches.rst
Normal file
@ -0,0 +1,98 @@
|
||||
Submitting patches to bcachefs:
|
||||
===============================
|
||||
|
||||
Patches must be tested before being submitted, either with the xfstests suite
|
||||
[0], or the full bcachefs test suite in ktest [1], depending on what's being
|
||||
touched. Note that ktest wraps xfstests and will be an easier method to running
|
||||
it for most users; it includes single-command wrappers for all the mainstream
|
||||
in-kernel local filesystems.
|
||||
|
||||
Patches will undergo more testing after being merged (including
|
||||
lockdep/kasan/preempt/etc. variants), these are not generally required to be
|
||||
run by the submitter - but do put some thought into what you're changing and
|
||||
which tests might be relevant, e.g. are you dealing with tricky memory layout
|
||||
work? kasan, are you doing locking work? then lockdep; and ktest includes
|
||||
single-command variants for the debug build types you'll most likely need.
|
||||
|
||||
The exception to this rule is incomplete WIP/RFC patches: if you're working on
|
||||
something nontrivial, it's encouraged to send out a WIP patch to let people
|
||||
know what you're doing and make sure you're on the right track. Just make sure
|
||||
it includes a brief note as to what's done and what's incomplete, to avoid
|
||||
confusion.
|
||||
|
||||
Rigorous checkpatch.pl adherence is not required (many of its warnings are
|
||||
considered out of date), but try not to deviate too much without reason.
|
||||
|
||||
Focus on writing code that reads well and is organized well; code should be
|
||||
aesthetically pleasing.
|
||||
|
||||
CI:
|
||||
===
|
||||
|
||||
Instead of running your tests locally, when running the full test suite it's
|
||||
prefereable to let a server farm do it in parallel, and then have the results
|
||||
in a nice test dashboard (which can tell you which failures are new, and
|
||||
presents results in a git log view, avoiding the need for most bisecting).
|
||||
|
||||
That exists [2], and community members may request an account. If you work for
|
||||
a big tech company, you'll need to help out with server costs to get access -
|
||||
but the CI is not restricted to running bcachefs tests: it runs any ktest test
|
||||
(which generally makes it easy to wrap other tests that can run in qemu).
|
||||
|
||||
Other things to think about:
|
||||
============================
|
||||
|
||||
- How will we debug this code? Is there sufficient introspection to diagnose
|
||||
when something starts acting wonky on a user machine?
|
||||
|
||||
We don't necessarily need every single field of every data structure visible
|
||||
with introspection, but having the important fields of all the core data
|
||||
types wired up makes debugging drastically easier - a bit of thoughtful
|
||||
foresight greatly reduces the need to have people build custom kernels with
|
||||
debug patches.
|
||||
|
||||
More broadly, think about all the debug tooling that might be needed.
|
||||
|
||||
- Does it make the codebase more or less of a mess? Can we also try to do some
|
||||
organizing, too?
|
||||
|
||||
- Do new tests need to be written? New assertions? How do we know and verify
|
||||
that the code is correct, and what happens if something goes wrong?
|
||||
|
||||
We don't yet have automated code coverage analysis or easy fault injection -
|
||||
but for now, pretend we did and ask what they might tell us.
|
||||
|
||||
Assertions are hugely important, given that we don't yet have a systems
|
||||
language that can do ergonomic embedded correctness proofs. Hitting an assert
|
||||
in testing is much better than wandering off into undefined behaviour la-la
|
||||
land - use them. Use them judiciously, and not as a replacement for proper
|
||||
error handling, but use them.
|
||||
|
||||
- Does it need to be performance tested? Should we add new peformance counters?
|
||||
|
||||
bcachefs has a set of persistent runtime counters which can be viewed with
|
||||
the 'bcachefs fs top' command; this should give users a basic idea of what
|
||||
their filesystem is currently doing. If you're doing a new feature or looking
|
||||
at old code, think if anything should be added.
|
||||
|
||||
- If it's a new on disk format feature - have upgrades and downgrades been
|
||||
tested? (Automated tests exists but aren't in the CI, due to the hassle of
|
||||
disk image management; coordinate to have them run.)
|
||||
|
||||
Mailing list, IRC:
|
||||
==================
|
||||
|
||||
Patches should hit the list [3], but much discussion and code review happens on
|
||||
IRC as well [4]; many people appreciate the more conversational approach and
|
||||
quicker feedback.
|
||||
|
||||
Additionally, we have a lively user community doing excellent QA work, which
|
||||
exists primarily on IRC. Please make use of that resource; user feedback is
|
||||
important for any nontrivial feature, and documenting it in commit messages
|
||||
would be a good idea.
|
||||
|
||||
[0]: git://git.kernel.org/pub/scm/fs/xfs/xfstests-dev.git
|
||||
[1]: https://evilpiepirate.org/git/ktest.git/
|
||||
[2]: https://evilpiepirate.org/~testdashboard/ci/
|
||||
[3]: linux-bcachefs@vger.kernel.org
|
||||
[4]: irc.oftc.net#bcache, #bcachefs-dev
|
@ -9,4 +9,5 @@ bcachefs Documentation
|
||||
:numbered:
|
||||
|
||||
CodingStyle
|
||||
SubmittingPatches
|
||||
errorcodes
|
||||
|
@ -1524,7 +1524,8 @@ attribute-sets:
|
||||
nested-attributes: bitset
|
||||
-
|
||||
name: hwtstamp-flags
|
||||
type: u32
|
||||
type: nest
|
||||
nested-attributes: bitset
|
||||
|
||||
operations:
|
||||
enum-model: directional
|
||||
|
@ -369,8 +369,8 @@ to their default.
|
||||
|
||||
addr.can_family = AF_CAN;
|
||||
addr.can_ifindex = if_nametoindex("can0");
|
||||
addr.tp.tx_id = 0x18DA42F1 | CAN_EFF_FLAG;
|
||||
addr.tp.rx_id = 0x18DAF142 | CAN_EFF_FLAG;
|
||||
addr.can_addr.tp.tx_id = 0x18DA42F1 | CAN_EFF_FLAG;
|
||||
addr.can_addr.tp.rx_id = 0x18DAF142 | CAN_EFF_FLAG;
|
||||
|
||||
ret = bind(s, (struct sockaddr *)&addr, sizeof(addr));
|
||||
if (ret < 0)
|
||||
|
@ -1419,7 +1419,7 @@ fetch) is injected in the guest.
|
||||
S390:
|
||||
^^^^^
|
||||
|
||||
Returns -EINVAL if the VM has the KVM_VM_S390_UCONTROL flag set.
|
||||
Returns -EINVAL or -EEXIST if the VM has the KVM_VM_S390_UCONTROL flag set.
|
||||
Returns -EINVAL if called on a protected VM.
|
||||
|
||||
4.36 KVM_SET_TSS_ADDR
|
||||
|
85
MAINTAINERS
85
MAINTAINERS
@ -2209,7 +2209,6 @@ F: sound/soc/codecs/cs42l84.*
|
||||
F: sound/soc/codecs/ssm3515.c
|
||||
|
||||
ARM/APPLE MACHINE SUPPORT
|
||||
M: Hector Martin <marcan@marcan.st>
|
||||
M: Sven Peter <sven@svenpeter.dev>
|
||||
R: Alyssa Rosenzweig <alyssa@rosenzweig.io>
|
||||
L: asahi@lists.linux.dev
|
||||
@ -3655,7 +3654,6 @@ F: Documentation/devicetree/bindings/phy/phy-ath79-usb.txt
|
||||
F: drivers/phy/qualcomm/phy-ath79-usb.c
|
||||
|
||||
ATHEROS ATH GENERIC UTILITIES
|
||||
M: Kalle Valo <kvalo@kernel.org>
|
||||
M: Jeff Johnson <jjohnson@kernel.org>
|
||||
L: linux-wireless@vger.kernel.org
|
||||
S: Supported
|
||||
@ -3860,13 +3858,6 @@ W: https://ez.analog.com/linux-software-drivers
|
||||
F: Documentation/devicetree/bindings/pwm/adi,axi-pwmgen.yaml
|
||||
F: drivers/pwm/pwm-axi-pwmgen.c
|
||||
|
||||
AXXIA I2C CONTROLLER
|
||||
M: Krzysztof Adamski <krzysztof.adamski@nokia.com>
|
||||
L: linux-i2c@vger.kernel.org
|
||||
S: Maintained
|
||||
F: Documentation/devicetree/bindings/i2c/i2c-axxia.txt
|
||||
F: drivers/i2c/busses/i2c-axxia.c
|
||||
|
||||
AZ6007 DVB DRIVER
|
||||
M: Mauro Carvalho Chehab <mchehab@kernel.org>
|
||||
L: linux-media@vger.kernel.org
|
||||
@ -3955,6 +3946,7 @@ M: Kent Overstreet <kent.overstreet@linux.dev>
|
||||
L: linux-bcachefs@vger.kernel.org
|
||||
S: Supported
|
||||
C: irc://irc.oftc.net/bcache
|
||||
P: Documentation/filesystems/bcachefs/SubmittingPatches.rst
|
||||
T: git https://evilpiepirate.org/git/bcachefs.git
|
||||
F: fs/bcachefs/
|
||||
F: Documentation/filesystems/bcachefs/
|
||||
@ -7116,8 +7108,10 @@ F: rust/kernel/device.rs
|
||||
F: rust/kernel/device_id.rs
|
||||
F: rust/kernel/devres.rs
|
||||
F: rust/kernel/driver.rs
|
||||
F: rust/kernel/faux.rs
|
||||
F: rust/kernel/platform.rs
|
||||
F: samples/rust/rust_driver_platform.rs
|
||||
F: samples/rust/rust_driver_faux.rs
|
||||
|
||||
DRIVERS FOR OMAP ADAPTIVE VOLTAGE SCALING (AVS)
|
||||
M: Nishanth Menon <nm@ti.com>
|
||||
@ -9418,7 +9412,7 @@ F: fs/freevxfs/
|
||||
|
||||
FREEZER
|
||||
M: "Rafael J. Wysocki" <rafael@kernel.org>
|
||||
M: Pavel Machek <pavel@ucw.cz>
|
||||
M: Pavel Machek <pavel@kernel.org>
|
||||
L: linux-pm@vger.kernel.org
|
||||
S: Supported
|
||||
F: Documentation/power/freezing-of-tasks.rst
|
||||
@ -9878,7 +9872,7 @@ S: Maintained
|
||||
F: drivers/staging/gpib/
|
||||
|
||||
GPIO ACPI SUPPORT
|
||||
M: Mika Westerberg <mika.westerberg@linux.intel.com>
|
||||
M: Mika Westerberg <westeri@kernel.org>
|
||||
M: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
|
||||
L: linux-gpio@vger.kernel.org
|
||||
L: linux-acpi@vger.kernel.org
|
||||
@ -10253,7 +10247,7 @@ F: drivers/video/fbdev/hgafb.c
|
||||
|
||||
HIBERNATION (aka Software Suspend, aka swsusp)
|
||||
M: "Rafael J. Wysocki" <rafael@kernel.org>
|
||||
M: Pavel Machek <pavel@ucw.cz>
|
||||
M: Pavel Machek <pavel@kernel.org>
|
||||
L: linux-pm@vger.kernel.org
|
||||
S: Supported
|
||||
B: https://bugzilla.kernel.org
|
||||
@ -10822,7 +10816,7 @@ S: Odd Fixes
|
||||
F: drivers/tty/hvc/
|
||||
|
||||
I2C ACPI SUPPORT
|
||||
M: Mika Westerberg <mika.westerberg@linux.intel.com>
|
||||
M: Mika Westerberg <westeri@kernel.org>
|
||||
L: linux-i2c@vger.kernel.org
|
||||
L: linux-acpi@vger.kernel.org
|
||||
S: Maintained
|
||||
@ -13124,8 +13118,8 @@ T: git git://git.kernel.org/pub/scm/linux/kernel/git/kees/linux.git for-next/har
|
||||
F: scripts/leaking_addresses.pl
|
||||
|
||||
LED SUBSYSTEM
|
||||
M: Pavel Machek <pavel@ucw.cz>
|
||||
M: Lee Jones <lee@kernel.org>
|
||||
M: Pavel Machek <pavel@kernel.org>
|
||||
L: linux-leds@vger.kernel.org
|
||||
S: Maintained
|
||||
T: git git://git.kernel.org/pub/scm/linux/kernel/git/lee/leds.git
|
||||
@ -16438,7 +16432,7 @@ X: drivers/net/can/
|
||||
X: drivers/net/wireless/
|
||||
|
||||
NETWORKING DRIVERS (WIRELESS)
|
||||
M: Kalle Valo <kvalo@kernel.org>
|
||||
M: Johannes Berg <johannes@sipsolutions.net>
|
||||
L: linux-wireless@vger.kernel.org
|
||||
S: Maintained
|
||||
W: https://wireless.wiki.kernel.org/
|
||||
@ -16462,6 +16456,22 @@ F: include/net/dsa.h
|
||||
F: net/dsa/
|
||||
F: tools/testing/selftests/drivers/net/dsa/
|
||||
|
||||
NETWORKING [ETHTOOL]
|
||||
M: Andrew Lunn <andrew@lunn.ch>
|
||||
M: Jakub Kicinski <kuba@kernel.org>
|
||||
F: Documentation/netlink/specs/ethtool.yaml
|
||||
F: Documentation/networking/ethtool-netlink.rst
|
||||
F: include/linux/ethtool*
|
||||
F: include/uapi/linux/ethtool*
|
||||
F: net/ethtool/
|
||||
F: tools/testing/selftests/drivers/net/*/ethtool*
|
||||
|
||||
NETWORKING [ETHTOOL CABLE TEST]
|
||||
M: Andrew Lunn <andrew@lunn.ch>
|
||||
F: net/ethtool/cabletest.c
|
||||
F: tools/testing/selftests/drivers/net/*/ethtool*
|
||||
K: cable_test
|
||||
|
||||
NETWORKING [GENERAL]
|
||||
M: "David S. Miller" <davem@davemloft.net>
|
||||
M: Eric Dumazet <edumazet@google.com>
|
||||
@ -16493,6 +16503,7 @@ F: include/linux/netdev*
|
||||
F: include/linux/netlink.h
|
||||
F: include/linux/netpoll.h
|
||||
F: include/linux/rtnetlink.h
|
||||
F: include/linux/sctp.h
|
||||
F: include/linux/seq_file_net.h
|
||||
F: include/linux/skbuff*
|
||||
F: include/net/
|
||||
@ -16509,6 +16520,7 @@ F: include/uapi/linux/netdev*
|
||||
F: include/uapi/linux/netlink.h
|
||||
F: include/uapi/linux/netlink_diag.h
|
||||
F: include/uapi/linux/rtnetlink.h
|
||||
F: include/uapi/linux/sctp.h
|
||||
F: lib/net_utils.c
|
||||
F: lib/random32.c
|
||||
F: net/
|
||||
@ -16621,6 +16633,7 @@ F: tools/testing/selftests/net/mptcp/
|
||||
NETWORKING [TCP]
|
||||
M: Eric Dumazet <edumazet@google.com>
|
||||
M: Neal Cardwell <ncardwell@google.com>
|
||||
R: Kuniyuki Iwashima <kuniyu@amazon.com>
|
||||
L: netdev@vger.kernel.org
|
||||
S: Maintained
|
||||
F: Documentation/networking/net_cachelines/tcp_sock.rst
|
||||
@ -16648,6 +16661,31 @@ F: include/net/tls.h
|
||||
F: include/uapi/linux/tls.h
|
||||
F: net/tls/*
|
||||
|
||||
NETWORKING [SOCKETS]
|
||||
M: Eric Dumazet <edumazet@google.com>
|
||||
M: Kuniyuki Iwashima <kuniyu@amazon.com>
|
||||
M: Paolo Abeni <pabeni@redhat.com>
|
||||
M: Willem de Bruijn <willemb@google.com>
|
||||
S: Maintained
|
||||
F: include/linux/sock_diag.h
|
||||
F: include/linux/socket.h
|
||||
F: include/linux/sockptr.h
|
||||
F: include/net/sock.h
|
||||
F: include/net/sock_reuseport.h
|
||||
F: include/uapi/linux/socket.h
|
||||
F: net/core/*sock*
|
||||
F: net/core/scm.c
|
||||
F: net/socket.c
|
||||
|
||||
NETWORKING [UNIX SOCKETS]
|
||||
M: Kuniyuki Iwashima <kuniyu@amazon.com>
|
||||
S: Maintained
|
||||
F: include/net/af_unix.h
|
||||
F: include/net/netns/unix.h
|
||||
F: include/uapi/linux/unix_diag.h
|
||||
F: net/unix/
|
||||
F: tools/testing/selftests/net/af_unix/
|
||||
|
||||
NETXEN (1/10) GbE SUPPORT
|
||||
M: Manish Chopra <manishc@marvell.com>
|
||||
M: Rahul Verma <rahulv@marvell.com>
|
||||
@ -16781,7 +16819,7 @@ F: include/linux/tick.h
|
||||
F: kernel/time/tick*.*
|
||||
|
||||
NOKIA N900 CAMERA SUPPORT (ET8EK8 SENSOR, AD5820 FOCUS)
|
||||
M: Pavel Machek <pavel@ucw.cz>
|
||||
M: Pavel Machek <pavel@kernel.org>
|
||||
M: Sakari Ailus <sakari.ailus@iki.fi>
|
||||
L: linux-media@vger.kernel.org
|
||||
S: Maintained
|
||||
@ -17713,6 +17751,7 @@ L: netdev@vger.kernel.org
|
||||
L: dev@openvswitch.org
|
||||
S: Maintained
|
||||
W: http://openvswitch.org
|
||||
F: Documentation/networking/openvswitch.rst
|
||||
F: include/uapi/linux/openvswitch.h
|
||||
F: net/openvswitch/
|
||||
F: tools/testing/selftests/net/openvswitch/
|
||||
@ -19313,7 +19352,6 @@ Q: http://patchwork.linuxtv.org/project/linux-media/list/
|
||||
F: drivers/media/tuners/qt1010*
|
||||
|
||||
QUALCOMM ATH12K WIRELESS DRIVER
|
||||
M: Kalle Valo <kvalo@kernel.org>
|
||||
M: Jeff Johnson <jjohnson@kernel.org>
|
||||
L: ath12k@lists.infradead.org
|
||||
S: Supported
|
||||
@ -19323,7 +19361,6 @@ F: drivers/net/wireless/ath/ath12k/
|
||||
N: ath12k
|
||||
|
||||
QUALCOMM ATHEROS ATH10K WIRELESS DRIVER
|
||||
M: Kalle Valo <kvalo@kernel.org>
|
||||
M: Jeff Johnson <jjohnson@kernel.org>
|
||||
L: ath10k@lists.infradead.org
|
||||
S: Supported
|
||||
@ -19333,7 +19370,6 @@ F: drivers/net/wireless/ath/ath10k/
|
||||
N: ath10k
|
||||
|
||||
QUALCOMM ATHEROS ATH11K WIRELESS DRIVER
|
||||
M: Kalle Valo <kvalo@kernel.org>
|
||||
M: Jeff Johnson <jjohnson@kernel.org>
|
||||
L: ath11k@lists.infradead.org
|
||||
S: Supported
|
||||
@ -19468,6 +19504,15 @@ L: dmaengine@vger.kernel.org
|
||||
S: Supported
|
||||
F: drivers/dma/qcom/hidma*
|
||||
|
||||
QUALCOMM I2C QCOM GENI DRIVER
|
||||
M: Mukesh Kumar Savaliya <quic_msavaliy@quicinc.com>
|
||||
M: Viken Dadhaniya <quic_vdadhani@quicinc.com>
|
||||
L: linux-i2c@vger.kernel.org
|
||||
L: linux-arm-msm@vger.kernel.org
|
||||
S: Maintained
|
||||
F: Documentation/devicetree/bindings/i2c/qcom,i2c-geni-qcom.yaml
|
||||
F: drivers/i2c/busses/i2c-qcom-geni.c
|
||||
|
||||
QUALCOMM I2C CCI DRIVER
|
||||
M: Loic Poulain <loic.poulain@linaro.org>
|
||||
M: Robert Foss <rfoss@kernel.org>
|
||||
@ -22807,7 +22852,7 @@ F: drivers/sh/
|
||||
SUSPEND TO RAM
|
||||
M: "Rafael J. Wysocki" <rafael@kernel.org>
|
||||
M: Len Brown <len.brown@intel.com>
|
||||
M: Pavel Machek <pavel@ucw.cz>
|
||||
M: Pavel Machek <pavel@kernel.org>
|
||||
L: linux-pm@vger.kernel.org
|
||||
S: Supported
|
||||
B: https://bugzilla.kernel.org
|
||||
|
15
Makefile
15
Makefile
@ -2,7 +2,7 @@
|
||||
VERSION = 6
|
||||
PATCHLEVEL = 14
|
||||
SUBLEVEL = 0
|
||||
EXTRAVERSION = -rc1
|
||||
EXTRAVERSION = -rc3
|
||||
NAME = Baby Opossum Posse
|
||||
|
||||
# *DOCUMENTATION*
|
||||
@ -1120,8 +1120,8 @@ LDFLAGS_vmlinux += --orphan-handling=$(CONFIG_LD_ORPHAN_WARN_LEVEL)
|
||||
endif
|
||||
|
||||
# Align the bit size of userspace programs with the kernel
|
||||
KBUILD_USERCFLAGS += $(filter -m32 -m64 --target=%, $(KBUILD_CFLAGS))
|
||||
KBUILD_USERLDFLAGS += $(filter -m32 -m64 --target=%, $(KBUILD_CFLAGS))
|
||||
KBUILD_USERCFLAGS += $(filter -m32 -m64 --target=%, $(KBUILD_CPPFLAGS) $(KBUILD_CFLAGS))
|
||||
KBUILD_USERLDFLAGS += $(filter -m32 -m64 --target=%, $(KBUILD_CPPFLAGS) $(KBUILD_CFLAGS))
|
||||
|
||||
# make the checker run with the right architecture
|
||||
CHECKFLAGS += --arch=$(ARCH)
|
||||
@ -1421,18 +1421,13 @@ ifneq ($(wildcard $(resolve_btfids_O)),)
|
||||
$(Q)$(MAKE) -sC $(srctree)/tools/bpf/resolve_btfids O=$(resolve_btfids_O) clean
|
||||
endif
|
||||
|
||||
# Clear a bunch of variables before executing the submake
|
||||
ifeq ($(quiet),silent_)
|
||||
tools_silent=s
|
||||
endif
|
||||
|
||||
tools/: FORCE
|
||||
$(Q)mkdir -p $(objtree)/tools
|
||||
$(Q)$(MAKE) LDFLAGS= MAKEFLAGS="$(tools_silent) $(filter --j% -j,$(MAKEFLAGS))" O=$(abspath $(objtree)) subdir=tools -C $(srctree)/tools/
|
||||
$(Q)$(MAKE) LDFLAGS= O=$(abspath $(objtree)) subdir=tools -C $(srctree)/tools/
|
||||
|
||||
tools/%: FORCE
|
||||
$(Q)mkdir -p $(objtree)/tools
|
||||
$(Q)$(MAKE) LDFLAGS= MAKEFLAGS="$(tools_silent) $(filter --j% -j,$(MAKEFLAGS))" O=$(abspath $(objtree)) subdir=tools -C $(srctree)/tools/ $*
|
||||
$(Q)$(MAKE) LDFLAGS= O=$(abspath $(objtree)) subdir=tools -C $(srctree)/tools/ $*
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Kernel selftest
|
||||
|
@ -74,7 +74,7 @@ typedef elf_fpreg_t elf_fpregset_t[ELF_NFPREG];
|
||||
/*
|
||||
* This is used to ensure we don't load something for the wrong architecture.
|
||||
*/
|
||||
#define elf_check_arch(x) ((x)->e_machine == EM_ALPHA)
|
||||
#define elf_check_arch(x) (((x)->e_machine == EM_ALPHA) && !((x)->e_flags & EF_ALPHA_32BIT))
|
||||
|
||||
/*
|
||||
* These are used to set parameters in the core dumps.
|
||||
@ -137,10 +137,6 @@ extern int dump_elf_task(elf_greg_t *dest, struct task_struct *task);
|
||||
: amask (AMASK_CIX) ? "ev6" : "ev67"); \
|
||||
})
|
||||
|
||||
#define SET_PERSONALITY(EX) \
|
||||
set_personality(((EX).e_flags & EF_ALPHA_32BIT) \
|
||||
? PER_LINUX_32BIT : PER_LINUX)
|
||||
|
||||
extern int alpha_l1i_cacheshape;
|
||||
extern int alpha_l1d_cacheshape;
|
||||
extern int alpha_l2_cacheshape;
|
||||
|
@ -135,7 +135,7 @@ struct crb_struct {
|
||||
/* virtual->physical map */
|
||||
unsigned long map_entries;
|
||||
unsigned long map_pages;
|
||||
struct vf_map_struct map[1];
|
||||
struct vf_map_struct map[];
|
||||
};
|
||||
|
||||
struct memclust_struct {
|
||||
|
@ -360,7 +360,7 @@ static inline pte_t pte_swp_clear_exclusive(pte_t pte)
|
||||
|
||||
extern void paging_init(void);
|
||||
|
||||
/* We have our own get_unmapped_area to cope with ADDR_LIMIT_32BIT. */
|
||||
/* We have our own get_unmapped_area */
|
||||
#define HAVE_ARCH_UNMAPPED_AREA
|
||||
|
||||
#endif /* _ALPHA_PGTABLE_H */
|
||||
|
@ -8,23 +8,19 @@
|
||||
#ifndef __ASM_ALPHA_PROCESSOR_H
|
||||
#define __ASM_ALPHA_PROCESSOR_H
|
||||
|
||||
#include <linux/personality.h> /* for ADDR_LIMIT_32BIT */
|
||||
|
||||
/*
|
||||
* We have a 42-bit user address space: 4TB user VM...
|
||||
*/
|
||||
#define TASK_SIZE (0x40000000000UL)
|
||||
|
||||
#define STACK_TOP \
|
||||
(current->personality & ADDR_LIMIT_32BIT ? 0x80000000 : 0x00120000000UL)
|
||||
#define STACK_TOP (0x00120000000UL)
|
||||
|
||||
#define STACK_TOP_MAX 0x00120000000UL
|
||||
|
||||
/* This decides where the kernel will search for a free chunk of vm
|
||||
* space during mmap's.
|
||||
*/
|
||||
#define TASK_UNMAPPED_BASE \
|
||||
((current->personality & ADDR_LIMIT_32BIT) ? 0x40000000 : TASK_SIZE / 2)
|
||||
#define TASK_UNMAPPED_BASE (TASK_SIZE / 2)
|
||||
|
||||
/* This is dead. Everything has been moved to thread_info. */
|
||||
struct thread_struct { };
|
||||
|
@ -42,6 +42,8 @@ struct pt_regs {
|
||||
unsigned long trap_a0;
|
||||
unsigned long trap_a1;
|
||||
unsigned long trap_a2;
|
||||
/* This makes the stack 16-byte aligned as GCC expects */
|
||||
unsigned long __pad0;
|
||||
/* These are saved by PAL-code: */
|
||||
unsigned long ps;
|
||||
unsigned long pc;
|
||||
|
@ -19,9 +19,13 @@ static void __used foo(void)
|
||||
DEFINE(TI_STATUS, offsetof(struct thread_info, status));
|
||||
BLANK();
|
||||
|
||||
DEFINE(SP_OFF, offsetof(struct pt_regs, ps));
|
||||
DEFINE(SIZEOF_PT_REGS, sizeof(struct pt_regs));
|
||||
BLANK();
|
||||
|
||||
DEFINE(SWITCH_STACK_SIZE, sizeof(struct switch_stack));
|
||||
BLANK();
|
||||
|
||||
DEFINE(HAE_CACHE, offsetof(struct alpha_machine_vector, hae_cache));
|
||||
DEFINE(HAE_REG, offsetof(struct alpha_machine_vector, hae_register));
|
||||
}
|
||||
|
@ -15,10 +15,6 @@
|
||||
.set noat
|
||||
.cfi_sections .debug_frame
|
||||
|
||||
/* Stack offsets. */
|
||||
#define SP_OFF 184
|
||||
#define SWITCH_STACK_SIZE 64
|
||||
|
||||
.macro CFI_START_OSF_FRAME func
|
||||
.align 4
|
||||
.globl \func
|
||||
@ -198,8 +194,8 @@ CFI_END_OSF_FRAME entArith
|
||||
CFI_START_OSF_FRAME entMM
|
||||
SAVE_ALL
|
||||
/* save $9 - $15 so the inline exception code can manipulate them. */
|
||||
subq $sp, 56, $sp
|
||||
.cfi_adjust_cfa_offset 56
|
||||
subq $sp, 64, $sp
|
||||
.cfi_adjust_cfa_offset 64
|
||||
stq $9, 0($sp)
|
||||
stq $10, 8($sp)
|
||||
stq $11, 16($sp)
|
||||
@ -214,7 +210,7 @@ CFI_START_OSF_FRAME entMM
|
||||
.cfi_rel_offset $13, 32
|
||||
.cfi_rel_offset $14, 40
|
||||
.cfi_rel_offset $15, 48
|
||||
addq $sp, 56, $19
|
||||
addq $sp, 64, $19
|
||||
/* handle the fault */
|
||||
lda $8, 0x3fff
|
||||
bic $sp, $8, $8
|
||||
@ -227,7 +223,7 @@ CFI_START_OSF_FRAME entMM
|
||||
ldq $13, 32($sp)
|
||||
ldq $14, 40($sp)
|
||||
ldq $15, 48($sp)
|
||||
addq $sp, 56, $sp
|
||||
addq $sp, 64, $sp
|
||||
.cfi_restore $9
|
||||
.cfi_restore $10
|
||||
.cfi_restore $11
|
||||
@ -235,7 +231,7 @@ CFI_START_OSF_FRAME entMM
|
||||
.cfi_restore $13
|
||||
.cfi_restore $14
|
||||
.cfi_restore $15
|
||||
.cfi_adjust_cfa_offset -56
|
||||
.cfi_adjust_cfa_offset -64
|
||||
/* finish up the syscall as normal. */
|
||||
br ret_from_sys_call
|
||||
CFI_END_OSF_FRAME entMM
|
||||
@ -382,8 +378,8 @@ entUnaUser:
|
||||
.cfi_restore $0
|
||||
.cfi_adjust_cfa_offset -256
|
||||
SAVE_ALL /* setup normal kernel stack */
|
||||
lda $sp, -56($sp)
|
||||
.cfi_adjust_cfa_offset 56
|
||||
lda $sp, -64($sp)
|
||||
.cfi_adjust_cfa_offset 64
|
||||
stq $9, 0($sp)
|
||||
stq $10, 8($sp)
|
||||
stq $11, 16($sp)
|
||||
@ -399,7 +395,7 @@ entUnaUser:
|
||||
.cfi_rel_offset $14, 40
|
||||
.cfi_rel_offset $15, 48
|
||||
lda $8, 0x3fff
|
||||
addq $sp, 56, $19
|
||||
addq $sp, 64, $19
|
||||
bic $sp, $8, $8
|
||||
jsr $26, do_entUnaUser
|
||||
ldq $9, 0($sp)
|
||||
@ -409,7 +405,7 @@ entUnaUser:
|
||||
ldq $13, 32($sp)
|
||||
ldq $14, 40($sp)
|
||||
ldq $15, 48($sp)
|
||||
lda $sp, 56($sp)
|
||||
lda $sp, 64($sp)
|
||||
.cfi_restore $9
|
||||
.cfi_restore $10
|
||||
.cfi_restore $11
|
||||
@ -417,7 +413,7 @@ entUnaUser:
|
||||
.cfi_restore $13
|
||||
.cfi_restore $14
|
||||
.cfi_restore $15
|
||||
.cfi_adjust_cfa_offset -56
|
||||
.cfi_adjust_cfa_offset -64
|
||||
br ret_from_sys_call
|
||||
CFI_END_OSF_FRAME entUna
|
||||
|
||||
|
@ -1210,8 +1210,7 @@ SYSCALL_DEFINE1(old_adjtimex, struct timex32 __user *, txc_p)
|
||||
return ret;
|
||||
}
|
||||
|
||||
/* Get an address range which is currently unmapped. Similar to the
|
||||
generic version except that we know how to honor ADDR_LIMIT_32BIT. */
|
||||
/* Get an address range which is currently unmapped. */
|
||||
|
||||
static unsigned long
|
||||
arch_get_unmapped_area_1(unsigned long addr, unsigned long len,
|
||||
@ -1230,13 +1229,7 @@ arch_get_unmapped_area(struct file *filp, unsigned long addr,
|
||||
unsigned long len, unsigned long pgoff,
|
||||
unsigned long flags, vm_flags_t vm_flags)
|
||||
{
|
||||
unsigned long limit;
|
||||
|
||||
/* "32 bit" actually means 31 bit, since pointers sign extend. */
|
||||
if (current->personality & ADDR_LIMIT_32BIT)
|
||||
limit = 0x80000000;
|
||||
else
|
||||
limit = TASK_SIZE;
|
||||
unsigned long limit = TASK_SIZE;
|
||||
|
||||
if (len > limit)
|
||||
return -ENOMEM;
|
||||
|
@ -13,6 +13,7 @@
|
||||
#include <linux/log2.h>
|
||||
#include <linux/dma-map-ops.h>
|
||||
#include <linux/iommu-helper.h>
|
||||
#include <linux/string_choices.h>
|
||||
|
||||
#include <asm/io.h>
|
||||
#include <asm/hwrpb.h>
|
||||
@ -212,7 +213,7 @@ static int pci_dac_dma_supported(struct pci_dev *dev, u64 mask)
|
||||
|
||||
/* If both conditions above are met, we are fine. */
|
||||
DBGA("pci_dac_dma_supported %s from %ps\n",
|
||||
ok ? "yes" : "no", __builtin_return_address(0));
|
||||
str_yes_no(ok), __builtin_return_address(0));
|
||||
|
||||
return ok;
|
||||
}
|
||||
|
@ -649,7 +649,7 @@ s_reg_to_mem (unsigned long s_reg)
|
||||
static int unauser_reg_offsets[32] = {
|
||||
R(r0), R(r1), R(r2), R(r3), R(r4), R(r5), R(r6), R(r7), R(r8),
|
||||
/* r9 ... r15 are stored in front of regs. */
|
||||
-56, -48, -40, -32, -24, -16, -8,
|
||||
-64, -56, -48, -40, -32, -24, -16, /* padding at -8 */
|
||||
R(r16), R(r17), R(r18),
|
||||
R(r19), R(r20), R(r21), R(r22), R(r23), R(r24), R(r25), R(r26),
|
||||
R(r27), R(r28), R(gp),
|
||||
|
@ -78,8 +78,8 @@ __load_new_mm_context(struct mm_struct *next_mm)
|
||||
|
||||
/* Macro for exception fixup code to access integer registers. */
|
||||
#define dpf_reg(r) \
|
||||
(((unsigned long *)regs)[(r) <= 8 ? (r) : (r) <= 15 ? (r)-16 : \
|
||||
(r) <= 18 ? (r)+10 : (r)-10])
|
||||
(((unsigned long *)regs)[(r) <= 8 ? (r) : (r) <= 15 ? (r)-17 : \
|
||||
(r) <= 18 ? (r)+11 : (r)-10])
|
||||
|
||||
asmlinkage void
|
||||
do_page_fault(unsigned long address, unsigned long mmcsr,
|
||||
|
@ -225,7 +225,6 @@ config ARM64
|
||||
select HAVE_FUNCTION_ERROR_INJECTION
|
||||
select HAVE_FUNCTION_GRAPH_FREGS
|
||||
select HAVE_FUNCTION_GRAPH_TRACER
|
||||
select HAVE_FUNCTION_GRAPH_RETVAL
|
||||
select HAVE_GCC_PLUGINS
|
||||
select HAVE_HARDLOCKUP_DETECTOR_PERF if PERF_EVENTS && \
|
||||
HW_PERF_EVENTS && HAVE_PERF_EVENTS_NMI
|
||||
|
@ -48,7 +48,11 @@ KBUILD_CFLAGS += $(CC_FLAGS_NO_FPU) \
|
||||
KBUILD_CFLAGS += $(call cc-disable-warning, psabi)
|
||||
KBUILD_AFLAGS += $(compat_vdso)
|
||||
|
||||
ifeq ($(call test-ge, $(CONFIG_RUSTC_VERSION), 108500),y)
|
||||
KBUILD_RUSTFLAGS += --target=aarch64-unknown-none-softfloat
|
||||
else
|
||||
KBUILD_RUSTFLAGS += --target=aarch64-unknown-none -Ctarget-feature="-neon"
|
||||
endif
|
||||
|
||||
KBUILD_CFLAGS += $(call cc-option,-mabi=lp64)
|
||||
KBUILD_AFLAGS += $(call cc-option,-mabi=lp64)
|
||||
|
@ -605,48 +605,6 @@ static __always_inline void kvm_incr_pc(struct kvm_vcpu *vcpu)
|
||||
__cpacr_to_cptr_set(clr, set));\
|
||||
} while (0)
|
||||
|
||||
static __always_inline void kvm_write_cptr_el2(u64 val)
|
||||
{
|
||||
if (has_vhe() || has_hvhe())
|
||||
write_sysreg(val, cpacr_el1);
|
||||
else
|
||||
write_sysreg(val, cptr_el2);
|
||||
}
|
||||
|
||||
/* Resets the value of cptr_el2 when returning to the host. */
|
||||
static __always_inline void __kvm_reset_cptr_el2(struct kvm *kvm)
|
||||
{
|
||||
u64 val;
|
||||
|
||||
if (has_vhe()) {
|
||||
val = (CPACR_EL1_FPEN | CPACR_EL1_ZEN_EL1EN);
|
||||
if (cpus_have_final_cap(ARM64_SME))
|
||||
val |= CPACR_EL1_SMEN_EL1EN;
|
||||
} else if (has_hvhe()) {
|
||||
val = CPACR_EL1_FPEN;
|
||||
|
||||
if (!kvm_has_sve(kvm) || !guest_owns_fp_regs())
|
||||
val |= CPACR_EL1_ZEN;
|
||||
if (cpus_have_final_cap(ARM64_SME))
|
||||
val |= CPACR_EL1_SMEN;
|
||||
} else {
|
||||
val = CPTR_NVHE_EL2_RES1;
|
||||
|
||||
if (kvm_has_sve(kvm) && guest_owns_fp_regs())
|
||||
val |= CPTR_EL2_TZ;
|
||||
if (!cpus_have_final_cap(ARM64_SME))
|
||||
val |= CPTR_EL2_TSM;
|
||||
}
|
||||
|
||||
kvm_write_cptr_el2(val);
|
||||
}
|
||||
|
||||
#ifdef __KVM_NVHE_HYPERVISOR__
|
||||
#define kvm_reset_cptr_el2(v) __kvm_reset_cptr_el2(kern_hyp_va((v)->kvm))
|
||||
#else
|
||||
#define kvm_reset_cptr_el2(v) __kvm_reset_cptr_el2((v)->kvm)
|
||||
#endif
|
||||
|
||||
/*
|
||||
* Returns a 'sanitised' view of CPTR_EL2, translating from nVHE to the VHE
|
||||
* format if E2H isn't set.
|
||||
|
@ -100,7 +100,7 @@ static inline void push_hyp_memcache(struct kvm_hyp_memcache *mc,
|
||||
static inline void *pop_hyp_memcache(struct kvm_hyp_memcache *mc,
|
||||
void *(*to_va)(phys_addr_t phys))
|
||||
{
|
||||
phys_addr_t *p = to_va(mc->head);
|
||||
phys_addr_t *p = to_va(mc->head & PAGE_MASK);
|
||||
|
||||
if (!mc->nr_pages)
|
||||
return NULL;
|
||||
@ -615,8 +615,6 @@ struct cpu_sve_state {
|
||||
struct kvm_host_data {
|
||||
#define KVM_HOST_DATA_FLAG_HAS_SPE 0
|
||||
#define KVM_HOST_DATA_FLAG_HAS_TRBE 1
|
||||
#define KVM_HOST_DATA_FLAG_HOST_SVE_ENABLED 2
|
||||
#define KVM_HOST_DATA_FLAG_HOST_SME_ENABLED 3
|
||||
#define KVM_HOST_DATA_FLAG_TRBE_ENABLED 4
|
||||
#define KVM_HOST_DATA_FLAG_EL1_TRACING_CONFIGURED 5
|
||||
unsigned long flags;
|
||||
@ -624,23 +622,13 @@ struct kvm_host_data {
|
||||
struct kvm_cpu_context host_ctxt;
|
||||
|
||||
/*
|
||||
* All pointers in this union are hyp VA.
|
||||
* Hyp VA.
|
||||
* sve_state is only used in pKVM and if system_supports_sve().
|
||||
*/
|
||||
union {
|
||||
struct user_fpsimd_state *fpsimd_state;
|
||||
struct cpu_sve_state *sve_state;
|
||||
};
|
||||
struct cpu_sve_state *sve_state;
|
||||
|
||||
union {
|
||||
/* HYP VA pointer to the host storage for FPMR */
|
||||
u64 *fpmr_ptr;
|
||||
/*
|
||||
* Used by pKVM only, as it needs to provide storage
|
||||
* for the host
|
||||
*/
|
||||
u64 fpmr;
|
||||
};
|
||||
/* Used by pKVM only. */
|
||||
u64 fpmr;
|
||||
|
||||
/* Ownership of the FP regs */
|
||||
enum {
|
||||
|
@ -101,16 +101,18 @@ int populate_cache_leaves(unsigned int cpu)
|
||||
unsigned int level, idx;
|
||||
enum cache_type type;
|
||||
struct cpu_cacheinfo *this_cpu_ci = get_cpu_cacheinfo(cpu);
|
||||
struct cacheinfo *this_leaf = this_cpu_ci->info_list;
|
||||
struct cacheinfo *infos = this_cpu_ci->info_list;
|
||||
|
||||
for (idx = 0, level = 1; level <= this_cpu_ci->num_levels &&
|
||||
idx < this_cpu_ci->num_leaves; idx++, level++) {
|
||||
idx < this_cpu_ci->num_leaves; level++) {
|
||||
type = get_cache_type(level);
|
||||
if (type == CACHE_TYPE_SEPARATE) {
|
||||
ci_leaf_init(this_leaf++, CACHE_TYPE_DATA, level);
|
||||
ci_leaf_init(this_leaf++, CACHE_TYPE_INST, level);
|
||||
if (idx + 1 >= this_cpu_ci->num_leaves)
|
||||
break;
|
||||
ci_leaf_init(&infos[idx++], CACHE_TYPE_DATA, level);
|
||||
ci_leaf_init(&infos[idx++], CACHE_TYPE_INST, level);
|
||||
} else {
|
||||
ci_leaf_init(this_leaf++, type, level);
|
||||
ci_leaf_init(&infos[idx++], type, level);
|
||||
}
|
||||
}
|
||||
return 0;
|
||||
|
@ -3091,6 +3091,7 @@ static const struct arm64_cpu_capabilities arm64_elf_hwcaps[] = {
|
||||
HWCAP_CAP(ID_AA64ISAR0_EL1, TS, FLAGM, CAP_HWCAP, KERNEL_HWCAP_FLAGM),
|
||||
HWCAP_CAP(ID_AA64ISAR0_EL1, TS, FLAGM2, CAP_HWCAP, KERNEL_HWCAP_FLAGM2),
|
||||
HWCAP_CAP(ID_AA64ISAR0_EL1, RNDR, IMP, CAP_HWCAP, KERNEL_HWCAP_RNG),
|
||||
HWCAP_CAP(ID_AA64ISAR3_EL1, FPRCVT, IMP, CAP_HWCAP, KERNEL_HWCAP_FPRCVT),
|
||||
HWCAP_CAP(ID_AA64PFR0_EL1, FP, IMP, CAP_HWCAP, KERNEL_HWCAP_FP),
|
||||
HWCAP_CAP(ID_AA64PFR0_EL1, FP, FP16, CAP_HWCAP, KERNEL_HWCAP_FPHP),
|
||||
HWCAP_CAP(ID_AA64PFR0_EL1, AdvSIMD, IMP, CAP_HWCAP, KERNEL_HWCAP_ASIMD),
|
||||
@ -3180,8 +3181,6 @@ static const struct arm64_cpu_capabilities arm64_elf_hwcaps[] = {
|
||||
HWCAP_CAP(ID_AA64SMFR0_EL1, SF8FMA, IMP, CAP_HWCAP, KERNEL_HWCAP_SME_SF8FMA),
|
||||
HWCAP_CAP(ID_AA64SMFR0_EL1, SF8DP4, IMP, CAP_HWCAP, KERNEL_HWCAP_SME_SF8DP4),
|
||||
HWCAP_CAP(ID_AA64SMFR0_EL1, SF8DP2, IMP, CAP_HWCAP, KERNEL_HWCAP_SME_SF8DP2),
|
||||
HWCAP_CAP(ID_AA64SMFR0_EL1, SF8MM8, IMP, CAP_HWCAP, KERNEL_HWCAP_SME_SF8MM8),
|
||||
HWCAP_CAP(ID_AA64SMFR0_EL1, SF8MM4, IMP, CAP_HWCAP, KERNEL_HWCAP_SME_SF8MM4),
|
||||
HWCAP_CAP(ID_AA64SMFR0_EL1, SBitPerm, IMP, CAP_HWCAP, KERNEL_HWCAP_SME_SBITPERM),
|
||||
HWCAP_CAP(ID_AA64SMFR0_EL1, AES, IMP, CAP_HWCAP, KERNEL_HWCAP_SME_AES),
|
||||
HWCAP_CAP(ID_AA64SMFR0_EL1, SFEXPA, IMP, CAP_HWCAP, KERNEL_HWCAP_SME_SFEXPA),
|
||||
@ -3192,6 +3191,8 @@ static const struct arm64_cpu_capabilities arm64_elf_hwcaps[] = {
|
||||
HWCAP_CAP(ID_AA64FPFR0_EL1, F8FMA, IMP, CAP_HWCAP, KERNEL_HWCAP_F8FMA),
|
||||
HWCAP_CAP(ID_AA64FPFR0_EL1, F8DP4, IMP, CAP_HWCAP, KERNEL_HWCAP_F8DP4),
|
||||
HWCAP_CAP(ID_AA64FPFR0_EL1, F8DP2, IMP, CAP_HWCAP, KERNEL_HWCAP_F8DP2),
|
||||
HWCAP_CAP(ID_AA64FPFR0_EL1, F8MM8, IMP, CAP_HWCAP, KERNEL_HWCAP_F8MM8),
|
||||
HWCAP_CAP(ID_AA64FPFR0_EL1, F8MM4, IMP, CAP_HWCAP, KERNEL_HWCAP_F8MM4),
|
||||
HWCAP_CAP(ID_AA64FPFR0_EL1, F8E4M3, IMP, CAP_HWCAP, KERNEL_HWCAP_F8E4M3),
|
||||
HWCAP_CAP(ID_AA64FPFR0_EL1, F8E5M2, IMP, CAP_HWCAP, KERNEL_HWCAP_F8E5M2),
|
||||
#ifdef CONFIG_ARM64_POE
|
||||
|
@ -1694,31 +1694,6 @@ void fpsimd_signal_preserve_current_state(void)
|
||||
sve_to_fpsimd(current);
|
||||
}
|
||||
|
||||
/*
|
||||
* Called by KVM when entering the guest.
|
||||
*/
|
||||
void fpsimd_kvm_prepare(void)
|
||||
{
|
||||
if (!system_supports_sve())
|
||||
return;
|
||||
|
||||
/*
|
||||
* KVM does not save host SVE state since we can only enter
|
||||
* the guest from a syscall so the ABI means that only the
|
||||
* non-saved SVE state needs to be saved. If we have left
|
||||
* SVE enabled for performance reasons then update the task
|
||||
* state to be FPSIMD only.
|
||||
*/
|
||||
get_cpu_fpsimd_context();
|
||||
|
||||
if (test_and_clear_thread_flag(TIF_SVE)) {
|
||||
sve_to_fpsimd(current);
|
||||
current->thread.fp_type = FP_STATE_FPSIMD;
|
||||
}
|
||||
|
||||
put_cpu_fpsimd_context();
|
||||
}
|
||||
|
||||
/*
|
||||
* Associate current's FPSIMD context with this cpu
|
||||
* The caller must have ownership of the cpu FPSIMD context before calling
|
||||
|
@ -194,12 +194,19 @@ static void amu_fie_setup(const struct cpumask *cpus)
|
||||
int cpu;
|
||||
|
||||
/* We are already set since the last insmod of cpufreq driver */
|
||||
if (unlikely(cpumask_subset(cpus, amu_fie_cpus)))
|
||||
if (cpumask_available(amu_fie_cpus) &&
|
||||
unlikely(cpumask_subset(cpus, amu_fie_cpus)))
|
||||
return;
|
||||
|
||||
for_each_cpu(cpu, cpus) {
|
||||
for_each_cpu(cpu, cpus)
|
||||
if (!freq_counters_valid(cpu))
|
||||
return;
|
||||
|
||||
if (!cpumask_available(amu_fie_cpus) &&
|
||||
!zalloc_cpumask_var(&amu_fie_cpus, GFP_KERNEL)) {
|
||||
WARN_ONCE(1, "Failed to allocate FIE cpumask for CPUs[%*pbl]\n",
|
||||
cpumask_pr_args(cpus));
|
||||
return;
|
||||
}
|
||||
|
||||
cpumask_or(amu_fie_cpus, amu_fie_cpus, cpus);
|
||||
@ -237,17 +244,8 @@ static struct notifier_block init_amu_fie_notifier = {
|
||||
|
||||
static int __init init_amu_fie(void)
|
||||
{
|
||||
int ret;
|
||||
|
||||
if (!zalloc_cpumask_var(&amu_fie_cpus, GFP_KERNEL))
|
||||
return -ENOMEM;
|
||||
|
||||
ret = cpufreq_register_notifier(&init_amu_fie_notifier,
|
||||
return cpufreq_register_notifier(&init_amu_fie_notifier,
|
||||
CPUFREQ_POLICY_NOTIFIER);
|
||||
if (ret)
|
||||
free_cpumask_var(amu_fie_cpus);
|
||||
|
||||
return ret;
|
||||
}
|
||||
core_initcall(init_amu_fie);
|
||||
|
||||
|
@ -41,6 +41,7 @@ SECTIONS
|
||||
*/
|
||||
/DISCARD/ : {
|
||||
*(.note.GNU-stack .note.gnu.property)
|
||||
*(.ARM.attributes)
|
||||
}
|
||||
.note : { *(.note.*) } :text :note
|
||||
|
||||
|
@ -162,6 +162,7 @@ SECTIONS
|
||||
/DISCARD/ : {
|
||||
*(.interp .dynamic)
|
||||
*(.dynsym .dynstr .hash .gnu.hash)
|
||||
*(.ARM.attributes)
|
||||
}
|
||||
|
||||
. = KIMAGE_VADDR;
|
||||
|
@ -447,21 +447,19 @@ static void kvm_timer_update_status(struct arch_timer_context *ctx, bool level)
|
||||
static void kvm_timer_update_irq(struct kvm_vcpu *vcpu, bool new_level,
|
||||
struct arch_timer_context *timer_ctx)
|
||||
{
|
||||
int ret;
|
||||
|
||||
kvm_timer_update_status(timer_ctx, new_level);
|
||||
|
||||
timer_ctx->irq.level = new_level;
|
||||
trace_kvm_timer_update_irq(vcpu->vcpu_id, timer_irq(timer_ctx),
|
||||
timer_ctx->irq.level);
|
||||
|
||||
if (!userspace_irqchip(vcpu->kvm)) {
|
||||
ret = kvm_vgic_inject_irq(vcpu->kvm, vcpu,
|
||||
timer_irq(timer_ctx),
|
||||
timer_ctx->irq.level,
|
||||
timer_ctx);
|
||||
WARN_ON(ret);
|
||||
}
|
||||
if (userspace_irqchip(vcpu->kvm))
|
||||
return;
|
||||
|
||||
kvm_vgic_inject_irq(vcpu->kvm, vcpu,
|
||||
timer_irq(timer_ctx),
|
||||
timer_ctx->irq.level,
|
||||
timer_ctx);
|
||||
}
|
||||
|
||||
/* Only called for a fully emulated timer */
|
||||
@ -471,10 +469,8 @@ static void timer_emulate(struct arch_timer_context *ctx)
|
||||
|
||||
trace_kvm_timer_emulate(ctx, should_fire);
|
||||
|
||||
if (should_fire != ctx->irq.level) {
|
||||
if (should_fire != ctx->irq.level)
|
||||
kvm_timer_update_irq(ctx->vcpu, should_fire, ctx);
|
||||
return;
|
||||
}
|
||||
|
||||
kvm_timer_update_status(ctx, should_fire);
|
||||
|
||||
@ -761,21 +757,6 @@ static void kvm_timer_vcpu_load_nested_switch(struct kvm_vcpu *vcpu,
|
||||
timer_irq(map->direct_ptimer),
|
||||
&arch_timer_irq_ops);
|
||||
WARN_ON_ONCE(ret);
|
||||
|
||||
/*
|
||||
* The virtual offset behaviour is "interesting", as it
|
||||
* always applies when HCR_EL2.E2H==0, but only when
|
||||
* accessed from EL1 when HCR_EL2.E2H==1. So make sure we
|
||||
* track E2H when putting the HV timer in "direct" mode.
|
||||
*/
|
||||
if (map->direct_vtimer == vcpu_hvtimer(vcpu)) {
|
||||
struct arch_timer_offset *offs = &map->direct_vtimer->offset;
|
||||
|
||||
if (vcpu_el2_e2h_is_set(vcpu))
|
||||
offs->vcpu_offset = NULL;
|
||||
else
|
||||
offs->vcpu_offset = &__vcpu_sys_reg(vcpu, CNTVOFF_EL2);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@ -976,31 +957,21 @@ void kvm_timer_sync_nested(struct kvm_vcpu *vcpu)
|
||||
* which allows trapping of the timer registers even with NV2.
|
||||
* Still, this is still worse than FEAT_NV on its own. Meh.
|
||||
*/
|
||||
if (!vcpu_el2_e2h_is_set(vcpu)) {
|
||||
if (cpus_have_final_cap(ARM64_HAS_ECV))
|
||||
return;
|
||||
|
||||
/*
|
||||
* A non-VHE guest hypervisor doesn't have any direct access
|
||||
* to its timers: the EL2 registers trap (and the HW is
|
||||
* fully emulated), while the EL0 registers access memory
|
||||
* despite the access being notionally direct. Boo.
|
||||
*
|
||||
* We update the hardware timer registers with the
|
||||
* latest value written by the guest to the VNCR page
|
||||
* and let the hardware take care of the rest.
|
||||
*/
|
||||
write_sysreg_el0(__vcpu_sys_reg(vcpu, CNTV_CTL_EL0), SYS_CNTV_CTL);
|
||||
write_sysreg_el0(__vcpu_sys_reg(vcpu, CNTV_CVAL_EL0), SYS_CNTV_CVAL);
|
||||
write_sysreg_el0(__vcpu_sys_reg(vcpu, CNTP_CTL_EL0), SYS_CNTP_CTL);
|
||||
write_sysreg_el0(__vcpu_sys_reg(vcpu, CNTP_CVAL_EL0), SYS_CNTP_CVAL);
|
||||
} else {
|
||||
if (!cpus_have_final_cap(ARM64_HAS_ECV)) {
|
||||
/*
|
||||
* For a VHE guest hypervisor, the EL2 state is directly
|
||||
* stored in the host EL1 timers, while the emulated EL0
|
||||
* stored in the host EL1 timers, while the emulated EL1
|
||||
* state is stored in the VNCR page. The latter could have
|
||||
* been updated behind our back, and we must reset the
|
||||
* emulation of the timers.
|
||||
*
|
||||
* A non-VHE guest hypervisor doesn't have any direct access
|
||||
* to its timers: the EL2 registers trap despite being
|
||||
* notionally direct (we use the EL1 HW, as for VHE), while
|
||||
* the EL1 registers access memory.
|
||||
*
|
||||
* In both cases, process the emulated timers on each guest
|
||||
* exit. Boo.
|
||||
*/
|
||||
struct timer_map map;
|
||||
get_timer_map(vcpu, &map);
|
||||
|
@ -2290,6 +2290,19 @@ static int __init init_subsystems(void)
|
||||
break;
|
||||
case -ENODEV:
|
||||
case -ENXIO:
|
||||
/*
|
||||
* No VGIC? No pKVM for you.
|
||||
*
|
||||
* Protected mode assumes that VGICv3 is present, so no point
|
||||
* in trying to hobble along if vgic initialization fails.
|
||||
*/
|
||||
if (is_protected_kvm_enabled())
|
||||
goto out;
|
||||
|
||||
/*
|
||||
* Otherwise, userspace could choose to implement a GIC for its
|
||||
* guest on non-cooperative hardware.
|
||||
*/
|
||||
vgic_present = false;
|
||||
err = 0;
|
||||
break;
|
||||
@ -2400,6 +2413,13 @@ static void kvm_hyp_init_symbols(void)
|
||||
kvm_nvhe_sym(id_aa64smfr0_el1_sys_val) = read_sanitised_ftr_reg(SYS_ID_AA64SMFR0_EL1);
|
||||
kvm_nvhe_sym(__icache_flags) = __icache_flags;
|
||||
kvm_nvhe_sym(kvm_arm_vmid_bits) = kvm_arm_vmid_bits;
|
||||
|
||||
/*
|
||||
* Flush entire BSS since part of its data containing init symbols is read
|
||||
* while the MMU is off.
|
||||
*/
|
||||
kvm_flush_dcache_to_poc(kvm_ksym_ref(__hyp_bss_start),
|
||||
kvm_ksym_ref(__hyp_bss_end) - kvm_ksym_ref(__hyp_bss_start));
|
||||
}
|
||||
|
||||
static int __init kvm_hyp_init_protection(u32 hyp_va_bits)
|
||||
@ -2461,14 +2481,6 @@ static void finalize_init_hyp_mode(void)
|
||||
per_cpu_ptr_nvhe_sym(kvm_host_data, cpu)->sve_state =
|
||||
kern_hyp_va(sve_state);
|
||||
}
|
||||
} else {
|
||||
for_each_possible_cpu(cpu) {
|
||||
struct user_fpsimd_state *fpsimd_state;
|
||||
|
||||
fpsimd_state = &per_cpu_ptr_nvhe_sym(kvm_host_data, cpu)->host_ctxt.fp_regs;
|
||||
per_cpu_ptr_nvhe_sym(kvm_host_data, cpu)->fpsimd_state =
|
||||
kern_hyp_va(fpsimd_state);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -54,50 +54,18 @@ void kvm_arch_vcpu_load_fp(struct kvm_vcpu *vcpu)
|
||||
if (!system_supports_fpsimd())
|
||||
return;
|
||||
|
||||
fpsimd_kvm_prepare();
|
||||
|
||||
/*
|
||||
* We will check TIF_FOREIGN_FPSTATE just before entering the
|
||||
* guest in kvm_arch_vcpu_ctxflush_fp() and override this to
|
||||
* FP_STATE_FREE if the flag set.
|
||||
* Ensure that any host FPSIMD/SVE/SME state is saved and unbound such
|
||||
* that the host kernel is responsible for restoring this state upon
|
||||
* return to userspace, and the hyp code doesn't need to save anything.
|
||||
*
|
||||
* When the host may use SME, fpsimd_save_and_flush_cpu_state() ensures
|
||||
* that PSTATE.{SM,ZA} == {0,0}.
|
||||
*/
|
||||
*host_data_ptr(fp_owner) = FP_STATE_HOST_OWNED;
|
||||
*host_data_ptr(fpsimd_state) = kern_hyp_va(¤t->thread.uw.fpsimd_state);
|
||||
*host_data_ptr(fpmr_ptr) = kern_hyp_va(¤t->thread.uw.fpmr);
|
||||
fpsimd_save_and_flush_cpu_state();
|
||||
*host_data_ptr(fp_owner) = FP_STATE_FREE;
|
||||
|
||||
host_data_clear_flag(HOST_SVE_ENABLED);
|
||||
if (read_sysreg(cpacr_el1) & CPACR_EL1_ZEN_EL0EN)
|
||||
host_data_set_flag(HOST_SVE_ENABLED);
|
||||
|
||||
if (system_supports_sme()) {
|
||||
host_data_clear_flag(HOST_SME_ENABLED);
|
||||
if (read_sysreg(cpacr_el1) & CPACR_EL1_SMEN_EL0EN)
|
||||
host_data_set_flag(HOST_SME_ENABLED);
|
||||
|
||||
/*
|
||||
* If PSTATE.SM is enabled then save any pending FP
|
||||
* state and disable PSTATE.SM. If we leave PSTATE.SM
|
||||
* enabled and the guest does not enable SME via
|
||||
* CPACR_EL1.SMEN then operations that should be valid
|
||||
* may generate SME traps from EL1 to EL1 which we
|
||||
* can't intercept and which would confuse the guest.
|
||||
*
|
||||
* Do the same for PSTATE.ZA in the case where there
|
||||
* is state in the registers which has not already
|
||||
* been saved, this is very unlikely to happen.
|
||||
*/
|
||||
if (read_sysreg_s(SYS_SVCR) & (SVCR_SM_MASK | SVCR_ZA_MASK)) {
|
||||
*host_data_ptr(fp_owner) = FP_STATE_FREE;
|
||||
fpsimd_save_and_flush_cpu_state();
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* If normal guests gain SME support, maintain this behavior for pKVM
|
||||
* guests, which don't support SME.
|
||||
*/
|
||||
WARN_ON(is_protected_kvm_enabled() && system_supports_sme() &&
|
||||
read_sysreg_s(SYS_SVCR));
|
||||
WARN_ON_ONCE(system_supports_sme() && read_sysreg_s(SYS_SVCR));
|
||||
}
|
||||
|
||||
/*
|
||||
@ -162,52 +130,7 @@ void kvm_arch_vcpu_put_fp(struct kvm_vcpu *vcpu)
|
||||
|
||||
local_irq_save(flags);
|
||||
|
||||
/*
|
||||
* If we have VHE then the Hyp code will reset CPACR_EL1 to
|
||||
* the default value and we need to reenable SME.
|
||||
*/
|
||||
if (has_vhe() && system_supports_sme()) {
|
||||
/* Also restore EL0 state seen on entry */
|
||||
if (host_data_test_flag(HOST_SME_ENABLED))
|
||||
sysreg_clear_set(CPACR_EL1, 0, CPACR_EL1_SMEN);
|
||||
else
|
||||
sysreg_clear_set(CPACR_EL1,
|
||||
CPACR_EL1_SMEN_EL0EN,
|
||||
CPACR_EL1_SMEN_EL1EN);
|
||||
isb();
|
||||
}
|
||||
|
||||
if (guest_owns_fp_regs()) {
|
||||
if (vcpu_has_sve(vcpu)) {
|
||||
u64 zcr = read_sysreg_el1(SYS_ZCR);
|
||||
|
||||
/*
|
||||
* If the vCPU is in the hyp context then ZCR_EL1 is
|
||||
* loaded with its vEL2 counterpart.
|
||||
*/
|
||||
__vcpu_sys_reg(vcpu, vcpu_sve_zcr_elx(vcpu)) = zcr;
|
||||
|
||||
/*
|
||||
* Restore the VL that was saved when bound to the CPU,
|
||||
* which is the maximum VL for the guest. Because the
|
||||
* layout of the data when saving the sve state depends
|
||||
* on the VL, we need to use a consistent (i.e., the
|
||||
* maximum) VL.
|
||||
* Note that this means that at guest exit ZCR_EL1 is
|
||||
* not necessarily the same as on guest entry.
|
||||
*
|
||||
* ZCR_EL2 holds the guest hypervisor's VL when running
|
||||
* a nested guest, which could be smaller than the
|
||||
* max for the vCPU. Similar to above, we first need to
|
||||
* switch to a VL consistent with the layout of the
|
||||
* vCPU's SVE state. KVM support for NV implies VHE, so
|
||||
* using the ZCR_EL1 alias is safe.
|
||||
*/
|
||||
if (!has_vhe() || (vcpu_has_nv(vcpu) && !is_hyp_ctxt(vcpu)))
|
||||
sve_cond_update_zcr_vq(vcpu_sve_max_vq(vcpu) - 1,
|
||||
SYS_ZCR_EL1);
|
||||
}
|
||||
|
||||
/*
|
||||
* Flush (save and invalidate) the fpsimd/sve state so that if
|
||||
* the host tries to use fpsimd/sve, it's not using stale data
|
||||
@ -219,18 +142,6 @@ void kvm_arch_vcpu_put_fp(struct kvm_vcpu *vcpu)
|
||||
* when needed.
|
||||
*/
|
||||
fpsimd_save_and_flush_cpu_state();
|
||||
} else if (has_vhe() && system_supports_sve()) {
|
||||
/*
|
||||
* The FPSIMD/SVE state in the CPU has not been touched, and we
|
||||
* have SVE (and VHE): CPACR_EL1 (alias CPTR_EL2) has been
|
||||
* reset by kvm_reset_cptr_el2() in the Hyp code, disabling SVE
|
||||
* for EL0. To avoid spurious traps, restore the trap state
|
||||
* seen by kvm_arch_vcpu_load_fp():
|
||||
*/
|
||||
if (host_data_test_flag(HOST_SVE_ENABLED))
|
||||
sysreg_clear_set(CPACR_EL1, 0, CPACR_EL1_ZEN_EL0EN);
|
||||
else
|
||||
sysreg_clear_set(CPACR_EL1, CPACR_EL1_ZEN_EL0EN, 0);
|
||||
}
|
||||
|
||||
local_irq_restore(flags);
|
||||
|
@ -44,6 +44,11 @@ alternative_if ARM64_HAS_RAS_EXTN
|
||||
alternative_else_nop_endif
|
||||
mrs x1, isr_el1
|
||||
cbz x1, 1f
|
||||
|
||||
// Ensure that __guest_enter() always provides a context
|
||||
// synchronization event so that callers don't need ISBs for anything
|
||||
// that would usually be synchonized by the ERET.
|
||||
isb
|
||||
mov x0, #ARM_EXCEPTION_IRQ
|
||||
ret
|
||||
|
||||
|
@ -326,7 +326,7 @@ static inline bool __populate_fault_info(struct kvm_vcpu *vcpu)
|
||||
return __get_fault_info(vcpu->arch.fault.esr_el2, &vcpu->arch.fault);
|
||||
}
|
||||
|
||||
static bool kvm_hyp_handle_mops(struct kvm_vcpu *vcpu, u64 *exit_code)
|
||||
static inline bool kvm_hyp_handle_mops(struct kvm_vcpu *vcpu, u64 *exit_code)
|
||||
{
|
||||
*vcpu_pc(vcpu) = read_sysreg_el2(SYS_ELR);
|
||||
arm64_mops_reset_regs(vcpu_gp_regs(vcpu), vcpu->arch.fault.esr_el2);
|
||||
@ -375,7 +375,87 @@ static inline void __hyp_sve_save_host(void)
|
||||
true);
|
||||
}
|
||||
|
||||
static void kvm_hyp_save_fpsimd_host(struct kvm_vcpu *vcpu);
|
||||
static inline void fpsimd_lazy_switch_to_guest(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
u64 zcr_el1, zcr_el2;
|
||||
|
||||
if (!guest_owns_fp_regs())
|
||||
return;
|
||||
|
||||
if (vcpu_has_sve(vcpu)) {
|
||||
/* A guest hypervisor may restrict the effective max VL. */
|
||||
if (vcpu_has_nv(vcpu) && !is_hyp_ctxt(vcpu))
|
||||
zcr_el2 = __vcpu_sys_reg(vcpu, ZCR_EL2);
|
||||
else
|
||||
zcr_el2 = vcpu_sve_max_vq(vcpu) - 1;
|
||||
|
||||
write_sysreg_el2(zcr_el2, SYS_ZCR);
|
||||
|
||||
zcr_el1 = __vcpu_sys_reg(vcpu, vcpu_sve_zcr_elx(vcpu));
|
||||
write_sysreg_el1(zcr_el1, SYS_ZCR);
|
||||
}
|
||||
}
|
||||
|
||||
static inline void fpsimd_lazy_switch_to_host(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
u64 zcr_el1, zcr_el2;
|
||||
|
||||
if (!guest_owns_fp_regs())
|
||||
return;
|
||||
|
||||
/*
|
||||
* When the guest owns the FP regs, we know that guest+hyp traps for
|
||||
* any FPSIMD/SVE/SME features exposed to the guest have been disabled
|
||||
* by either fpsimd_lazy_switch_to_guest() or kvm_hyp_handle_fpsimd()
|
||||
* prior to __guest_entry(). As __guest_entry() guarantees a context
|
||||
* synchronization event, we don't need an ISB here to avoid taking
|
||||
* traps for anything that was exposed to the guest.
|
||||
*/
|
||||
if (vcpu_has_sve(vcpu)) {
|
||||
zcr_el1 = read_sysreg_el1(SYS_ZCR);
|
||||
__vcpu_sys_reg(vcpu, vcpu_sve_zcr_elx(vcpu)) = zcr_el1;
|
||||
|
||||
/*
|
||||
* The guest's state is always saved using the guest's max VL.
|
||||
* Ensure that the host has the guest's max VL active such that
|
||||
* the host can save the guest's state lazily, but don't
|
||||
* artificially restrict the host to the guest's max VL.
|
||||
*/
|
||||
if (has_vhe()) {
|
||||
zcr_el2 = vcpu_sve_max_vq(vcpu) - 1;
|
||||
write_sysreg_el2(zcr_el2, SYS_ZCR);
|
||||
} else {
|
||||
zcr_el2 = sve_vq_from_vl(kvm_host_sve_max_vl) - 1;
|
||||
write_sysreg_el2(zcr_el2, SYS_ZCR);
|
||||
|
||||
zcr_el1 = vcpu_sve_max_vq(vcpu) - 1;
|
||||
write_sysreg_el1(zcr_el1, SYS_ZCR);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static void kvm_hyp_save_fpsimd_host(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
/*
|
||||
* Non-protected kvm relies on the host restoring its sve state.
|
||||
* Protected kvm restores the host's sve state as not to reveal that
|
||||
* fpsimd was used by a guest nor leak upper sve bits.
|
||||
*/
|
||||
if (system_supports_sve()) {
|
||||
__hyp_sve_save_host();
|
||||
|
||||
/* Re-enable SVE traps if not supported for the guest vcpu. */
|
||||
if (!vcpu_has_sve(vcpu))
|
||||
cpacr_clear_set(CPACR_EL1_ZEN, 0);
|
||||
|
||||
} else {
|
||||
__fpsimd_save_state(host_data_ptr(host_ctxt.fp_regs));
|
||||
}
|
||||
|
||||
if (kvm_has_fpmr(kern_hyp_va(vcpu->kvm)))
|
||||
*host_data_ptr(fpmr) = read_sysreg_s(SYS_FPMR);
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* We trap the first access to the FP/SIMD to save the host context and
|
||||
@ -383,7 +463,7 @@ static void kvm_hyp_save_fpsimd_host(struct kvm_vcpu *vcpu);
|
||||
* If FP/SIMD is not implemented, handle the trap and inject an undefined
|
||||
* instruction exception to the guest. Similarly for trapped SVE accesses.
|
||||
*/
|
||||
static bool kvm_hyp_handle_fpsimd(struct kvm_vcpu *vcpu, u64 *exit_code)
|
||||
static inline bool kvm_hyp_handle_fpsimd(struct kvm_vcpu *vcpu, u64 *exit_code)
|
||||
{
|
||||
bool sve_guest;
|
||||
u8 esr_ec;
|
||||
@ -425,7 +505,7 @@ static bool kvm_hyp_handle_fpsimd(struct kvm_vcpu *vcpu, u64 *exit_code)
|
||||
isb();
|
||||
|
||||
/* Write out the host state if it's in the registers */
|
||||
if (host_owns_fp_regs())
|
||||
if (is_protected_kvm_enabled() && host_owns_fp_regs())
|
||||
kvm_hyp_save_fpsimd_host(vcpu);
|
||||
|
||||
/* Restore the guest state */
|
||||
@ -501,9 +581,22 @@ static inline bool handle_tx2_tvm(struct kvm_vcpu *vcpu)
|
||||
return true;
|
||||
}
|
||||
|
||||
/* Open-coded version of timer_get_offset() to allow for kern_hyp_va() */
|
||||
static inline u64 hyp_timer_get_offset(struct arch_timer_context *ctxt)
|
||||
{
|
||||
u64 offset = 0;
|
||||
|
||||
if (ctxt->offset.vm_offset)
|
||||
offset += *kern_hyp_va(ctxt->offset.vm_offset);
|
||||
if (ctxt->offset.vcpu_offset)
|
||||
offset += *kern_hyp_va(ctxt->offset.vcpu_offset);
|
||||
|
||||
return offset;
|
||||
}
|
||||
|
||||
static inline u64 compute_counter_value(struct arch_timer_context *ctxt)
|
||||
{
|
||||
return arch_timer_read_cntpct_el0() - timer_get_offset(ctxt);
|
||||
return arch_timer_read_cntpct_el0() - hyp_timer_get_offset(ctxt);
|
||||
}
|
||||
|
||||
static bool kvm_handle_cntxct(struct kvm_vcpu *vcpu)
|
||||
@ -587,7 +680,7 @@ static bool handle_ampere1_tcr(struct kvm_vcpu *vcpu)
|
||||
return true;
|
||||
}
|
||||
|
||||
static bool kvm_hyp_handle_sysreg(struct kvm_vcpu *vcpu, u64 *exit_code)
|
||||
static inline bool kvm_hyp_handle_sysreg(struct kvm_vcpu *vcpu, u64 *exit_code)
|
||||
{
|
||||
if (cpus_have_final_cap(ARM64_WORKAROUND_CAVIUM_TX2_219_TVM) &&
|
||||
handle_tx2_tvm(vcpu))
|
||||
@ -607,7 +700,7 @@ static bool kvm_hyp_handle_sysreg(struct kvm_vcpu *vcpu, u64 *exit_code)
|
||||
return false;
|
||||
}
|
||||
|
||||
static bool kvm_hyp_handle_cp15_32(struct kvm_vcpu *vcpu, u64 *exit_code)
|
||||
static inline bool kvm_hyp_handle_cp15_32(struct kvm_vcpu *vcpu, u64 *exit_code)
|
||||
{
|
||||
if (static_branch_unlikely(&vgic_v3_cpuif_trap) &&
|
||||
__vgic_v3_perform_cpuif_access(vcpu) == 1)
|
||||
@ -616,19 +709,18 @@ static bool kvm_hyp_handle_cp15_32(struct kvm_vcpu *vcpu, u64 *exit_code)
|
||||
return false;
|
||||
}
|
||||
|
||||
static bool kvm_hyp_handle_memory_fault(struct kvm_vcpu *vcpu, u64 *exit_code)
|
||||
static inline bool kvm_hyp_handle_memory_fault(struct kvm_vcpu *vcpu,
|
||||
u64 *exit_code)
|
||||
{
|
||||
if (!__populate_fault_info(vcpu))
|
||||
return true;
|
||||
|
||||
return false;
|
||||
}
|
||||
static bool kvm_hyp_handle_iabt_low(struct kvm_vcpu *vcpu, u64 *exit_code)
|
||||
__alias(kvm_hyp_handle_memory_fault);
|
||||
static bool kvm_hyp_handle_watchpt_low(struct kvm_vcpu *vcpu, u64 *exit_code)
|
||||
__alias(kvm_hyp_handle_memory_fault);
|
||||
#define kvm_hyp_handle_iabt_low kvm_hyp_handle_memory_fault
|
||||
#define kvm_hyp_handle_watchpt_low kvm_hyp_handle_memory_fault
|
||||
|
||||
static bool kvm_hyp_handle_dabt_low(struct kvm_vcpu *vcpu, u64 *exit_code)
|
||||
static inline bool kvm_hyp_handle_dabt_low(struct kvm_vcpu *vcpu, u64 *exit_code)
|
||||
{
|
||||
if (kvm_hyp_handle_memory_fault(vcpu, exit_code))
|
||||
return true;
|
||||
@ -658,23 +750,16 @@ static bool kvm_hyp_handle_dabt_low(struct kvm_vcpu *vcpu, u64 *exit_code)
|
||||
|
||||
typedef bool (*exit_handler_fn)(struct kvm_vcpu *, u64 *);
|
||||
|
||||
static const exit_handler_fn *kvm_get_exit_handler_array(struct kvm_vcpu *vcpu);
|
||||
|
||||
static void early_exit_filter(struct kvm_vcpu *vcpu, u64 *exit_code);
|
||||
|
||||
/*
|
||||
* Allow the hypervisor to handle the exit with an exit handler if it has one.
|
||||
*
|
||||
* Returns true if the hypervisor handled the exit, and control should go back
|
||||
* to the guest, or false if it hasn't.
|
||||
*/
|
||||
static inline bool kvm_hyp_handle_exit(struct kvm_vcpu *vcpu, u64 *exit_code)
|
||||
static inline bool kvm_hyp_handle_exit(struct kvm_vcpu *vcpu, u64 *exit_code,
|
||||
const exit_handler_fn *handlers)
|
||||
{
|
||||
const exit_handler_fn *handlers = kvm_get_exit_handler_array(vcpu);
|
||||
exit_handler_fn fn;
|
||||
|
||||
fn = handlers[kvm_vcpu_trap_get_class(vcpu)];
|
||||
|
||||
exit_handler_fn fn = handlers[kvm_vcpu_trap_get_class(vcpu)];
|
||||
if (fn)
|
||||
return fn(vcpu, exit_code);
|
||||
|
||||
@ -704,20 +789,9 @@ static inline void synchronize_vcpu_pstate(struct kvm_vcpu *vcpu, u64 *exit_code
|
||||
* the guest, false when we should restore the host state and return to the
|
||||
* main run loop.
|
||||
*/
|
||||
static inline bool fixup_guest_exit(struct kvm_vcpu *vcpu, u64 *exit_code)
|
||||
static inline bool __fixup_guest_exit(struct kvm_vcpu *vcpu, u64 *exit_code,
|
||||
const exit_handler_fn *handlers)
|
||||
{
|
||||
/*
|
||||
* Save PSTATE early so that we can evaluate the vcpu mode
|
||||
* early on.
|
||||
*/
|
||||
synchronize_vcpu_pstate(vcpu, exit_code);
|
||||
|
||||
/*
|
||||
* Check whether we want to repaint the state one way or
|
||||
* another.
|
||||
*/
|
||||
early_exit_filter(vcpu, exit_code);
|
||||
|
||||
if (ARM_EXCEPTION_CODE(*exit_code) != ARM_EXCEPTION_IRQ)
|
||||
vcpu->arch.fault.esr_el2 = read_sysreg_el2(SYS_ESR);
|
||||
|
||||
@ -747,7 +821,7 @@ static inline bool fixup_guest_exit(struct kvm_vcpu *vcpu, u64 *exit_code)
|
||||
goto exit;
|
||||
|
||||
/* Check if there's an exit handler and allow it to handle the exit. */
|
||||
if (kvm_hyp_handle_exit(vcpu, exit_code))
|
||||
if (kvm_hyp_handle_exit(vcpu, exit_code, handlers))
|
||||
goto guest;
|
||||
exit:
|
||||
/* Return to the host kernel and handle the exit */
|
||||
|
@ -5,6 +5,7 @@
|
||||
*/
|
||||
|
||||
#include <hyp/adjust_pc.h>
|
||||
#include <hyp/switch.h>
|
||||
|
||||
#include <asm/pgtable-types.h>
|
||||
#include <asm/kvm_asm.h>
|
||||
@ -83,7 +84,7 @@ static void fpsimd_sve_sync(struct kvm_vcpu *vcpu)
|
||||
if (system_supports_sve())
|
||||
__hyp_sve_restore_host();
|
||||
else
|
||||
__fpsimd_restore_state(*host_data_ptr(fpsimd_state));
|
||||
__fpsimd_restore_state(host_data_ptr(host_ctxt.fp_regs));
|
||||
|
||||
if (has_fpmr)
|
||||
write_sysreg_s(*host_data_ptr(fpmr), SYS_FPMR);
|
||||
@ -91,11 +92,34 @@ static void fpsimd_sve_sync(struct kvm_vcpu *vcpu)
|
||||
*host_data_ptr(fp_owner) = FP_STATE_HOST_OWNED;
|
||||
}
|
||||
|
||||
static void flush_debug_state(struct pkvm_hyp_vcpu *hyp_vcpu)
|
||||
{
|
||||
struct kvm_vcpu *host_vcpu = hyp_vcpu->host_vcpu;
|
||||
|
||||
hyp_vcpu->vcpu.arch.debug_owner = host_vcpu->arch.debug_owner;
|
||||
|
||||
if (kvm_guest_owns_debug_regs(&hyp_vcpu->vcpu))
|
||||
hyp_vcpu->vcpu.arch.vcpu_debug_state = host_vcpu->arch.vcpu_debug_state;
|
||||
else if (kvm_host_owns_debug_regs(&hyp_vcpu->vcpu))
|
||||
hyp_vcpu->vcpu.arch.external_debug_state = host_vcpu->arch.external_debug_state;
|
||||
}
|
||||
|
||||
static void sync_debug_state(struct pkvm_hyp_vcpu *hyp_vcpu)
|
||||
{
|
||||
struct kvm_vcpu *host_vcpu = hyp_vcpu->host_vcpu;
|
||||
|
||||
if (kvm_guest_owns_debug_regs(&hyp_vcpu->vcpu))
|
||||
host_vcpu->arch.vcpu_debug_state = hyp_vcpu->vcpu.arch.vcpu_debug_state;
|
||||
else if (kvm_host_owns_debug_regs(&hyp_vcpu->vcpu))
|
||||
host_vcpu->arch.external_debug_state = hyp_vcpu->vcpu.arch.external_debug_state;
|
||||
}
|
||||
|
||||
static void flush_hyp_vcpu(struct pkvm_hyp_vcpu *hyp_vcpu)
|
||||
{
|
||||
struct kvm_vcpu *host_vcpu = hyp_vcpu->host_vcpu;
|
||||
|
||||
fpsimd_sve_flush();
|
||||
flush_debug_state(hyp_vcpu);
|
||||
|
||||
hyp_vcpu->vcpu.arch.ctxt = host_vcpu->arch.ctxt;
|
||||
|
||||
@ -123,6 +147,7 @@ static void sync_hyp_vcpu(struct pkvm_hyp_vcpu *hyp_vcpu)
|
||||
unsigned int i;
|
||||
|
||||
fpsimd_sve_sync(&hyp_vcpu->vcpu);
|
||||
sync_debug_state(hyp_vcpu);
|
||||
|
||||
host_vcpu->arch.ctxt = hyp_vcpu->vcpu.arch.ctxt;
|
||||
|
||||
@ -200,8 +225,12 @@ static void handle___kvm_vcpu_run(struct kvm_cpu_context *host_ctxt)
|
||||
|
||||
sync_hyp_vcpu(hyp_vcpu);
|
||||
} else {
|
||||
struct kvm_vcpu *vcpu = kern_hyp_va(host_vcpu);
|
||||
|
||||
/* The host is fully trusted, run its vCPU directly. */
|
||||
ret = __kvm_vcpu_run(kern_hyp_va(host_vcpu));
|
||||
fpsimd_lazy_switch_to_guest(vcpu);
|
||||
ret = __kvm_vcpu_run(vcpu);
|
||||
fpsimd_lazy_switch_to_host(vcpu);
|
||||
}
|
||||
out:
|
||||
cpu_reg(host_ctxt, 1) = ret;
|
||||
@ -651,12 +680,6 @@ void handle_trap(struct kvm_cpu_context *host_ctxt)
|
||||
case ESR_ELx_EC_SMC64:
|
||||
handle_host_smc(host_ctxt);
|
||||
break;
|
||||
case ESR_ELx_EC_SVE:
|
||||
cpacr_clear_set(0, CPACR_EL1_ZEN);
|
||||
isb();
|
||||
sve_cond_update_zcr_vq(sve_vq_from_vl(kvm_host_sve_max_vl) - 1,
|
||||
SYS_ZCR_EL2);
|
||||
break;
|
||||
case ESR_ELx_EC_IABT_LOW:
|
||||
case ESR_ELx_EC_DABT_LOW:
|
||||
handle_host_mem_abort(host_ctxt);
|
||||
|
@ -943,10 +943,10 @@ static int __check_host_shared_guest(struct pkvm_hyp_vm *vm, u64 *__phys, u64 ip
|
||||
ret = kvm_pgtable_get_leaf(&vm->pgt, ipa, &pte, &level);
|
||||
if (ret)
|
||||
return ret;
|
||||
if (level != KVM_PGTABLE_LAST_LEVEL)
|
||||
return -E2BIG;
|
||||
if (!kvm_pte_valid(pte))
|
||||
return -ENOENT;
|
||||
if (level != KVM_PGTABLE_LAST_LEVEL)
|
||||
return -E2BIG;
|
||||
|
||||
state = guest_get_page_state(pte, ipa);
|
||||
if (state != PKVM_PAGE_SHARED_BORROWED)
|
||||
@ -998,44 +998,57 @@ unlock:
|
||||
return ret;
|
||||
}
|
||||
|
||||
int __pkvm_host_relax_perms_guest(u64 gfn, struct pkvm_hyp_vcpu *vcpu, enum kvm_pgtable_prot prot)
|
||||
static void assert_host_shared_guest(struct pkvm_hyp_vm *vm, u64 ipa)
|
||||
{
|
||||
struct pkvm_hyp_vm *vm = pkvm_hyp_vcpu_to_hyp_vm(vcpu);
|
||||
u64 ipa = hyp_pfn_to_phys(gfn);
|
||||
u64 phys;
|
||||
int ret;
|
||||
|
||||
if (prot & ~KVM_PGTABLE_PROT_RWX)
|
||||
return -EINVAL;
|
||||
if (!IS_ENABLED(CONFIG_NVHE_EL2_DEBUG))
|
||||
return;
|
||||
|
||||
host_lock_component();
|
||||
guest_lock_component(vm);
|
||||
|
||||
ret = __check_host_shared_guest(vm, &phys, ipa);
|
||||
if (!ret)
|
||||
ret = kvm_pgtable_stage2_relax_perms(&vm->pgt, ipa, prot, 0);
|
||||
|
||||
guest_unlock_component(vm);
|
||||
host_unlock_component();
|
||||
|
||||
WARN_ON(ret && ret != -ENOENT);
|
||||
}
|
||||
|
||||
int __pkvm_host_relax_perms_guest(u64 gfn, struct pkvm_hyp_vcpu *vcpu, enum kvm_pgtable_prot prot)
|
||||
{
|
||||
struct pkvm_hyp_vm *vm = pkvm_hyp_vcpu_to_hyp_vm(vcpu);
|
||||
u64 ipa = hyp_pfn_to_phys(gfn);
|
||||
int ret;
|
||||
|
||||
if (pkvm_hyp_vm_is_protected(vm))
|
||||
return -EPERM;
|
||||
|
||||
if (prot & ~KVM_PGTABLE_PROT_RWX)
|
||||
return -EINVAL;
|
||||
|
||||
assert_host_shared_guest(vm, ipa);
|
||||
guest_lock_component(vm);
|
||||
ret = kvm_pgtable_stage2_relax_perms(&vm->pgt, ipa, prot, 0);
|
||||
guest_unlock_component(vm);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
int __pkvm_host_wrprotect_guest(u64 gfn, struct pkvm_hyp_vm *vm)
|
||||
{
|
||||
u64 ipa = hyp_pfn_to_phys(gfn);
|
||||
u64 phys;
|
||||
int ret;
|
||||
|
||||
host_lock_component();
|
||||
if (pkvm_hyp_vm_is_protected(vm))
|
||||
return -EPERM;
|
||||
|
||||
assert_host_shared_guest(vm, ipa);
|
||||
guest_lock_component(vm);
|
||||
|
||||
ret = __check_host_shared_guest(vm, &phys, ipa);
|
||||
if (!ret)
|
||||
ret = kvm_pgtable_stage2_wrprotect(&vm->pgt, ipa, PAGE_SIZE);
|
||||
|
||||
ret = kvm_pgtable_stage2_wrprotect(&vm->pgt, ipa, PAGE_SIZE);
|
||||
guest_unlock_component(vm);
|
||||
host_unlock_component();
|
||||
|
||||
return ret;
|
||||
}
|
||||
@ -1043,18 +1056,15 @@ int __pkvm_host_wrprotect_guest(u64 gfn, struct pkvm_hyp_vm *vm)
|
||||
int __pkvm_host_test_clear_young_guest(u64 gfn, bool mkold, struct pkvm_hyp_vm *vm)
|
||||
{
|
||||
u64 ipa = hyp_pfn_to_phys(gfn);
|
||||
u64 phys;
|
||||
int ret;
|
||||
|
||||
host_lock_component();
|
||||
if (pkvm_hyp_vm_is_protected(vm))
|
||||
return -EPERM;
|
||||
|
||||
assert_host_shared_guest(vm, ipa);
|
||||
guest_lock_component(vm);
|
||||
|
||||
ret = __check_host_shared_guest(vm, &phys, ipa);
|
||||
if (!ret)
|
||||
ret = kvm_pgtable_stage2_test_clear_young(&vm->pgt, ipa, PAGE_SIZE, mkold);
|
||||
|
||||
ret = kvm_pgtable_stage2_test_clear_young(&vm->pgt, ipa, PAGE_SIZE, mkold);
|
||||
guest_unlock_component(vm);
|
||||
host_unlock_component();
|
||||
|
||||
return ret;
|
||||
}
|
||||
@ -1063,18 +1073,14 @@ int __pkvm_host_mkyoung_guest(u64 gfn, struct pkvm_hyp_vcpu *vcpu)
|
||||
{
|
||||
struct pkvm_hyp_vm *vm = pkvm_hyp_vcpu_to_hyp_vm(vcpu);
|
||||
u64 ipa = hyp_pfn_to_phys(gfn);
|
||||
u64 phys;
|
||||
int ret;
|
||||
|
||||
host_lock_component();
|
||||
if (pkvm_hyp_vm_is_protected(vm))
|
||||
return -EPERM;
|
||||
|
||||
assert_host_shared_guest(vm, ipa);
|
||||
guest_lock_component(vm);
|
||||
|
||||
ret = __check_host_shared_guest(vm, &phys, ipa);
|
||||
if (!ret)
|
||||
kvm_pgtable_stage2_mkyoung(&vm->pgt, ipa, 0);
|
||||
|
||||
kvm_pgtable_stage2_mkyoung(&vm->pgt, ipa, 0);
|
||||
guest_unlock_component(vm);
|
||||
host_unlock_component();
|
||||
|
||||
return ret;
|
||||
return 0;
|
||||
}
|
||||
|
@ -39,6 +39,9 @@ static void __activate_cptr_traps(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
u64 val = CPTR_EL2_TAM; /* Same bit irrespective of E2H */
|
||||
|
||||
if (!guest_owns_fp_regs())
|
||||
__activate_traps_fpsimd32(vcpu);
|
||||
|
||||
if (has_hvhe()) {
|
||||
val |= CPACR_EL1_TTA;
|
||||
|
||||
@ -47,6 +50,8 @@ static void __activate_cptr_traps(struct kvm_vcpu *vcpu)
|
||||
if (vcpu_has_sve(vcpu))
|
||||
val |= CPACR_EL1_ZEN;
|
||||
}
|
||||
|
||||
write_sysreg(val, cpacr_el1);
|
||||
} else {
|
||||
val |= CPTR_EL2_TTA | CPTR_NVHE_EL2_RES1;
|
||||
|
||||
@ -61,12 +66,32 @@ static void __activate_cptr_traps(struct kvm_vcpu *vcpu)
|
||||
|
||||
if (!guest_owns_fp_regs())
|
||||
val |= CPTR_EL2_TFP;
|
||||
|
||||
write_sysreg(val, cptr_el2);
|
||||
}
|
||||
}
|
||||
|
||||
if (!guest_owns_fp_regs())
|
||||
__activate_traps_fpsimd32(vcpu);
|
||||
static void __deactivate_cptr_traps(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
if (has_hvhe()) {
|
||||
u64 val = CPACR_EL1_FPEN;
|
||||
|
||||
kvm_write_cptr_el2(val);
|
||||
if (cpus_have_final_cap(ARM64_SVE))
|
||||
val |= CPACR_EL1_ZEN;
|
||||
if (cpus_have_final_cap(ARM64_SME))
|
||||
val |= CPACR_EL1_SMEN;
|
||||
|
||||
write_sysreg(val, cpacr_el1);
|
||||
} else {
|
||||
u64 val = CPTR_NVHE_EL2_RES1;
|
||||
|
||||
if (!cpus_have_final_cap(ARM64_SVE))
|
||||
val |= CPTR_EL2_TZ;
|
||||
if (!cpus_have_final_cap(ARM64_SME))
|
||||
val |= CPTR_EL2_TSM;
|
||||
|
||||
write_sysreg(val, cptr_el2);
|
||||
}
|
||||
}
|
||||
|
||||
static void __activate_traps(struct kvm_vcpu *vcpu)
|
||||
@ -119,7 +144,7 @@ static void __deactivate_traps(struct kvm_vcpu *vcpu)
|
||||
|
||||
write_sysreg(this_cpu_ptr(&kvm_init_params)->hcr_el2, hcr_el2);
|
||||
|
||||
kvm_reset_cptr_el2(vcpu);
|
||||
__deactivate_cptr_traps(vcpu);
|
||||
write_sysreg(__kvm_hyp_host_vector, vbar_el2);
|
||||
}
|
||||
|
||||
@ -192,34 +217,6 @@ static bool kvm_handle_pvm_sys64(struct kvm_vcpu *vcpu, u64 *exit_code)
|
||||
kvm_handle_pvm_sysreg(vcpu, exit_code));
|
||||
}
|
||||
|
||||
static void kvm_hyp_save_fpsimd_host(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
/*
|
||||
* Non-protected kvm relies on the host restoring its sve state.
|
||||
* Protected kvm restores the host's sve state as not to reveal that
|
||||
* fpsimd was used by a guest nor leak upper sve bits.
|
||||
*/
|
||||
if (unlikely(is_protected_kvm_enabled() && system_supports_sve())) {
|
||||
__hyp_sve_save_host();
|
||||
|
||||
/* Re-enable SVE traps if not supported for the guest vcpu. */
|
||||
if (!vcpu_has_sve(vcpu))
|
||||
cpacr_clear_set(CPACR_EL1_ZEN, 0);
|
||||
|
||||
} else {
|
||||
__fpsimd_save_state(*host_data_ptr(fpsimd_state));
|
||||
}
|
||||
|
||||
if (kvm_has_fpmr(kern_hyp_va(vcpu->kvm))) {
|
||||
u64 val = read_sysreg_s(SYS_FPMR);
|
||||
|
||||
if (unlikely(is_protected_kvm_enabled()))
|
||||
*host_data_ptr(fpmr) = val;
|
||||
else
|
||||
**host_data_ptr(fpmr_ptr) = val;
|
||||
}
|
||||
}
|
||||
|
||||
static const exit_handler_fn hyp_exit_handlers[] = {
|
||||
[0 ... ESR_ELx_EC_MAX] = NULL,
|
||||
[ESR_ELx_EC_CP15_32] = kvm_hyp_handle_cp15_32,
|
||||
@ -251,19 +248,21 @@ static const exit_handler_fn *kvm_get_exit_handler_array(struct kvm_vcpu *vcpu)
|
||||
return hyp_exit_handlers;
|
||||
}
|
||||
|
||||
/*
|
||||
* Some guests (e.g., protected VMs) are not be allowed to run in AArch32.
|
||||
* The ARMv8 architecture does not give the hypervisor a mechanism to prevent a
|
||||
* guest from dropping to AArch32 EL0 if implemented by the CPU. If the
|
||||
* hypervisor spots a guest in such a state ensure it is handled, and don't
|
||||
* trust the host to spot or fix it. The check below is based on the one in
|
||||
* kvm_arch_vcpu_ioctl_run().
|
||||
*
|
||||
* Returns false if the guest ran in AArch32 when it shouldn't have, and
|
||||
* thus should exit to the host, or true if a the guest run loop can continue.
|
||||
*/
|
||||
static void early_exit_filter(struct kvm_vcpu *vcpu, u64 *exit_code)
|
||||
static inline bool fixup_guest_exit(struct kvm_vcpu *vcpu, u64 *exit_code)
|
||||
{
|
||||
const exit_handler_fn *handlers = kvm_get_exit_handler_array(vcpu);
|
||||
|
||||
synchronize_vcpu_pstate(vcpu, exit_code);
|
||||
|
||||
/*
|
||||
* Some guests (e.g., protected VMs) are not be allowed to run in
|
||||
* AArch32. The ARMv8 architecture does not give the hypervisor a
|
||||
* mechanism to prevent a guest from dropping to AArch32 EL0 if
|
||||
* implemented by the CPU. If the hypervisor spots a guest in such a
|
||||
* state ensure it is handled, and don't trust the host to spot or fix
|
||||
* it. The check below is based on the one in
|
||||
* kvm_arch_vcpu_ioctl_run().
|
||||
*/
|
||||
if (unlikely(vcpu_is_protected(vcpu) && vcpu_mode_is_32bit(vcpu))) {
|
||||
/*
|
||||
* As we have caught the guest red-handed, decide that it isn't
|
||||
@ -276,6 +275,8 @@ static void early_exit_filter(struct kvm_vcpu *vcpu, u64 *exit_code)
|
||||
*exit_code &= BIT(ARM_EXIT_WITH_SERROR_BIT);
|
||||
*exit_code |= ARM_EXCEPTION_IL;
|
||||
}
|
||||
|
||||
return __fixup_guest_exit(vcpu, exit_code, handlers);
|
||||
}
|
||||
|
||||
/* Switch to the guest for legacy non-VHE systems */
|
||||
|
@ -136,6 +136,16 @@ write:
|
||||
write_sysreg(val, cpacr_el1);
|
||||
}
|
||||
|
||||
static void __deactivate_cptr_traps(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
u64 val = CPACR_EL1_FPEN | CPACR_EL1_ZEN_EL1EN;
|
||||
|
||||
if (cpus_have_final_cap(ARM64_SME))
|
||||
val |= CPACR_EL1_SMEN_EL1EN;
|
||||
|
||||
write_sysreg(val, cpacr_el1);
|
||||
}
|
||||
|
||||
static void __activate_traps(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
u64 val;
|
||||
@ -207,7 +217,7 @@ static void __deactivate_traps(struct kvm_vcpu *vcpu)
|
||||
*/
|
||||
asm(ALTERNATIVE("nop", "isb", ARM64_WORKAROUND_SPECULATIVE_AT));
|
||||
|
||||
kvm_reset_cptr_el2(vcpu);
|
||||
__deactivate_cptr_traps(vcpu);
|
||||
|
||||
if (!arm64_kernel_unmapped_at_el0())
|
||||
host_vectors = __this_cpu_read(this_cpu_vector);
|
||||
@ -413,14 +423,6 @@ static bool kvm_hyp_handle_eret(struct kvm_vcpu *vcpu, u64 *exit_code)
|
||||
return true;
|
||||
}
|
||||
|
||||
static void kvm_hyp_save_fpsimd_host(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
__fpsimd_save_state(*host_data_ptr(fpsimd_state));
|
||||
|
||||
if (kvm_has_fpmr(vcpu->kvm))
|
||||
**host_data_ptr(fpmr_ptr) = read_sysreg_s(SYS_FPMR);
|
||||
}
|
||||
|
||||
static bool kvm_hyp_handle_tlbi_el2(struct kvm_vcpu *vcpu, u64 *exit_code)
|
||||
{
|
||||
int ret = -EINVAL;
|
||||
@ -538,13 +540,10 @@ static const exit_handler_fn hyp_exit_handlers[] = {
|
||||
[ESR_ELx_EC_MOPS] = kvm_hyp_handle_mops,
|
||||
};
|
||||
|
||||
static const exit_handler_fn *kvm_get_exit_handler_array(struct kvm_vcpu *vcpu)
|
||||
static inline bool fixup_guest_exit(struct kvm_vcpu *vcpu, u64 *exit_code)
|
||||
{
|
||||
return hyp_exit_handlers;
|
||||
}
|
||||
synchronize_vcpu_pstate(vcpu, exit_code);
|
||||
|
||||
static void early_exit_filter(struct kvm_vcpu *vcpu, u64 *exit_code)
|
||||
{
|
||||
/*
|
||||
* If we were in HYP context on entry, adjust the PSTATE view
|
||||
* so that the usual helpers work correctly.
|
||||
@ -564,6 +563,8 @@ static void early_exit_filter(struct kvm_vcpu *vcpu, u64 *exit_code)
|
||||
*vcpu_cpsr(vcpu) &= ~(PSR_MODE_MASK | PSR_MODE32_BIT);
|
||||
*vcpu_cpsr(vcpu) |= mode;
|
||||
}
|
||||
|
||||
return __fixup_guest_exit(vcpu, exit_code, hyp_exit_handlers);
|
||||
}
|
||||
|
||||
/* Switch to the guest for VHE systems running in EL2 */
|
||||
@ -578,6 +579,8 @@ static int __kvm_vcpu_run_vhe(struct kvm_vcpu *vcpu)
|
||||
|
||||
sysreg_save_host_state_vhe(host_ctxt);
|
||||
|
||||
fpsimd_lazy_switch_to_guest(vcpu);
|
||||
|
||||
/*
|
||||
* Note that ARM erratum 1165522 requires us to configure both stage 1
|
||||
* and stage 2 translation for the guest context before we clear
|
||||
@ -602,6 +605,8 @@ static int __kvm_vcpu_run_vhe(struct kvm_vcpu *vcpu)
|
||||
|
||||
__deactivate_traps(vcpu);
|
||||
|
||||
fpsimd_lazy_switch_to_host(vcpu);
|
||||
|
||||
sysreg_restore_host_state_vhe(host_ctxt);
|
||||
|
||||
if (guest_owns_fp_regs())
|
||||
|
@ -67,26 +67,27 @@ int kvm_vcpu_init_nested(struct kvm_vcpu *vcpu)
|
||||
if (!tmp)
|
||||
return -ENOMEM;
|
||||
|
||||
swap(kvm->arch.nested_mmus, tmp);
|
||||
|
||||
/*
|
||||
* If we went through a realocation, adjust the MMU back-pointers in
|
||||
* the previously initialised kvm_pgtable structures.
|
||||
*/
|
||||
if (kvm->arch.nested_mmus != tmp)
|
||||
for (int i = 0; i < kvm->arch.nested_mmus_size; i++)
|
||||
tmp[i].pgt->mmu = &tmp[i];
|
||||
kvm->arch.nested_mmus[i].pgt->mmu = &kvm->arch.nested_mmus[i];
|
||||
|
||||
for (int i = kvm->arch.nested_mmus_size; !ret && i < num_mmus; i++)
|
||||
ret = init_nested_s2_mmu(kvm, &tmp[i]);
|
||||
ret = init_nested_s2_mmu(kvm, &kvm->arch.nested_mmus[i]);
|
||||
|
||||
if (ret) {
|
||||
for (int i = kvm->arch.nested_mmus_size; i < num_mmus; i++)
|
||||
kvm_free_stage2_pgd(&tmp[i]);
|
||||
kvm_free_stage2_pgd(&kvm->arch.nested_mmus[i]);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
kvm->arch.nested_mmus_size = num_mmus;
|
||||
kvm->arch.nested_mmus = tmp;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
@ -1452,6 +1452,16 @@ static bool access_arch_timer(struct kvm_vcpu *vcpu,
|
||||
return true;
|
||||
}
|
||||
|
||||
static bool access_hv_timer(struct kvm_vcpu *vcpu,
|
||||
struct sys_reg_params *p,
|
||||
const struct sys_reg_desc *r)
|
||||
{
|
||||
if (!vcpu_el2_e2h_is_set(vcpu))
|
||||
return undef_access(vcpu, p, r);
|
||||
|
||||
return access_arch_timer(vcpu, p, r);
|
||||
}
|
||||
|
||||
static s64 kvm_arm64_ftr_safe_value(u32 id, const struct arm64_ftr_bits *ftrp,
|
||||
s64 new, s64 cur)
|
||||
{
|
||||
@ -3103,9 +3113,9 @@ static const struct sys_reg_desc sys_reg_descs[] = {
|
||||
EL2_REG(CNTHP_CTL_EL2, access_arch_timer, reset_val, 0),
|
||||
EL2_REG(CNTHP_CVAL_EL2, access_arch_timer, reset_val, 0),
|
||||
|
||||
{ SYS_DESC(SYS_CNTHV_TVAL_EL2), access_arch_timer },
|
||||
EL2_REG(CNTHV_CTL_EL2, access_arch_timer, reset_val, 0),
|
||||
EL2_REG(CNTHV_CVAL_EL2, access_arch_timer, reset_val, 0),
|
||||
{ SYS_DESC(SYS_CNTHV_TVAL_EL2), access_hv_timer },
|
||||
EL2_REG(CNTHV_CTL_EL2, access_hv_timer, reset_val, 0),
|
||||
EL2_REG(CNTHV_CVAL_EL2, access_hv_timer, reset_val, 0),
|
||||
|
||||
{ SYS_DESC(SYS_CNTKCTL_EL12), access_cntkctl_el12 },
|
||||
|
||||
|
@ -34,9 +34,9 @@
|
||||
*
|
||||
* CPU Interface:
|
||||
*
|
||||
* - kvm_vgic_vcpu_init(): initialization of static data that
|
||||
* doesn't depend on any sizing information or emulation type. No
|
||||
* allocation is allowed there.
|
||||
* - kvm_vgic_vcpu_init(): initialization of static data that doesn't depend
|
||||
* on any sizing information. Private interrupts are allocated if not
|
||||
* already allocated at vgic-creation time.
|
||||
*/
|
||||
|
||||
/* EARLY INIT */
|
||||
@ -58,6 +58,8 @@ void kvm_vgic_early_init(struct kvm *kvm)
|
||||
|
||||
/* CREATION */
|
||||
|
||||
static int vgic_allocate_private_irqs_locked(struct kvm_vcpu *vcpu, u32 type);
|
||||
|
||||
/**
|
||||
* kvm_vgic_create: triggered by the instantiation of the VGIC device by
|
||||
* user space, either through the legacy KVM_CREATE_IRQCHIP ioctl (v2 only)
|
||||
@ -112,6 +114,22 @@ int kvm_vgic_create(struct kvm *kvm, u32 type)
|
||||
goto out_unlock;
|
||||
}
|
||||
|
||||
kvm_for_each_vcpu(i, vcpu, kvm) {
|
||||
ret = vgic_allocate_private_irqs_locked(vcpu, type);
|
||||
if (ret)
|
||||
break;
|
||||
}
|
||||
|
||||
if (ret) {
|
||||
kvm_for_each_vcpu(i, vcpu, kvm) {
|
||||
struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu;
|
||||
kfree(vgic_cpu->private_irqs);
|
||||
vgic_cpu->private_irqs = NULL;
|
||||
}
|
||||
|
||||
goto out_unlock;
|
||||
}
|
||||
|
||||
kvm->arch.vgic.in_kernel = true;
|
||||
kvm->arch.vgic.vgic_model = type;
|
||||
|
||||
@ -180,7 +198,7 @@ static int kvm_vgic_dist_init(struct kvm *kvm, unsigned int nr_spis)
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int vgic_allocate_private_irqs_locked(struct kvm_vcpu *vcpu)
|
||||
static int vgic_allocate_private_irqs_locked(struct kvm_vcpu *vcpu, u32 type)
|
||||
{
|
||||
struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu;
|
||||
int i;
|
||||
@ -218,17 +236,28 @@ static int vgic_allocate_private_irqs_locked(struct kvm_vcpu *vcpu)
|
||||
/* PPIs */
|
||||
irq->config = VGIC_CONFIG_LEVEL;
|
||||
}
|
||||
|
||||
switch (type) {
|
||||
case KVM_DEV_TYPE_ARM_VGIC_V3:
|
||||
irq->group = 1;
|
||||
irq->mpidr = kvm_vcpu_get_mpidr_aff(vcpu);
|
||||
break;
|
||||
case KVM_DEV_TYPE_ARM_VGIC_V2:
|
||||
irq->group = 0;
|
||||
irq->targets = BIT(vcpu->vcpu_id);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int vgic_allocate_private_irqs(struct kvm_vcpu *vcpu)
|
||||
static int vgic_allocate_private_irqs(struct kvm_vcpu *vcpu, u32 type)
|
||||
{
|
||||
int ret;
|
||||
|
||||
mutex_lock(&vcpu->kvm->arch.config_lock);
|
||||
ret = vgic_allocate_private_irqs_locked(vcpu);
|
||||
ret = vgic_allocate_private_irqs_locked(vcpu, type);
|
||||
mutex_unlock(&vcpu->kvm->arch.config_lock);
|
||||
|
||||
return ret;
|
||||
@ -258,7 +287,7 @@ int kvm_vgic_vcpu_init(struct kvm_vcpu *vcpu)
|
||||
if (!irqchip_in_kernel(vcpu->kvm))
|
||||
return 0;
|
||||
|
||||
ret = vgic_allocate_private_irqs(vcpu);
|
||||
ret = vgic_allocate_private_irqs(vcpu, dist->vgic_model);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
@ -295,7 +324,7 @@ int vgic_init(struct kvm *kvm)
|
||||
{
|
||||
struct vgic_dist *dist = &kvm->arch.vgic;
|
||||
struct kvm_vcpu *vcpu;
|
||||
int ret = 0, i;
|
||||
int ret = 0;
|
||||
unsigned long idx;
|
||||
|
||||
lockdep_assert_held(&kvm->arch.config_lock);
|
||||
@ -315,35 +344,6 @@ int vgic_init(struct kvm *kvm)
|
||||
if (ret)
|
||||
goto out;
|
||||
|
||||
/* Initialize groups on CPUs created before the VGIC type was known */
|
||||
kvm_for_each_vcpu(idx, vcpu, kvm) {
|
||||
ret = vgic_allocate_private_irqs_locked(vcpu);
|
||||
if (ret)
|
||||
goto out;
|
||||
|
||||
for (i = 0; i < VGIC_NR_PRIVATE_IRQS; i++) {
|
||||
struct vgic_irq *irq = vgic_get_vcpu_irq(vcpu, i);
|
||||
|
||||
switch (dist->vgic_model) {
|
||||
case KVM_DEV_TYPE_ARM_VGIC_V3:
|
||||
irq->group = 1;
|
||||
irq->mpidr = kvm_vcpu_get_mpidr_aff(vcpu);
|
||||
break;
|
||||
case KVM_DEV_TYPE_ARM_VGIC_V2:
|
||||
irq->group = 0;
|
||||
irq->targets = 1U << idx;
|
||||
break;
|
||||
default:
|
||||
ret = -EINVAL;
|
||||
}
|
||||
|
||||
vgic_put_irq(kvm, irq);
|
||||
|
||||
if (ret)
|
||||
goto out;
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* If we have GICv4.1 enabled, unconditionally request enable the
|
||||
* v4 support so that we get HW-accelerated vSGIs. Otherwise, only
|
||||
|
@ -162,6 +162,13 @@ static int copy_p4d(struct trans_pgd_info *info, pgd_t *dst_pgdp,
|
||||
unsigned long next;
|
||||
unsigned long addr = start;
|
||||
|
||||
if (pgd_none(READ_ONCE(*dst_pgdp))) {
|
||||
dst_p4dp = trans_alloc(info);
|
||||
if (!dst_p4dp)
|
||||
return -ENOMEM;
|
||||
pgd_populate(NULL, dst_pgdp, dst_p4dp);
|
||||
}
|
||||
|
||||
dst_p4dp = p4d_offset(dst_pgdp, start);
|
||||
src_p4dp = p4d_offset(src_pgdp, start);
|
||||
do {
|
||||
|
@ -76,27 +76,6 @@ extern const char *__cpu_full_name[];
|
||||
#define cpu_family_string() __cpu_family[raw_smp_processor_id()]
|
||||
#define cpu_full_name_string() __cpu_full_name[raw_smp_processor_id()]
|
||||
|
||||
struct seq_file;
|
||||
struct notifier_block;
|
||||
|
||||
extern int register_proc_cpuinfo_notifier(struct notifier_block *nb);
|
||||
extern int proc_cpuinfo_notifier_call_chain(unsigned long val, void *v);
|
||||
|
||||
#define proc_cpuinfo_notifier(fn, pri) \
|
||||
({ \
|
||||
static struct notifier_block fn##_nb = { \
|
||||
.notifier_call = fn, \
|
||||
.priority = pri \
|
||||
}; \
|
||||
\
|
||||
register_proc_cpuinfo_notifier(&fn##_nb); \
|
||||
})
|
||||
|
||||
struct proc_cpuinfo_notifier_args {
|
||||
struct seq_file *m;
|
||||
unsigned long n;
|
||||
};
|
||||
|
||||
static inline bool cpus_are_siblings(int cpua, int cpub)
|
||||
{
|
||||
struct cpuinfo_loongarch *infoa = &cpu_data[cpua];
|
||||
|
@ -77,6 +77,8 @@ extern int __cpu_logical_map[NR_CPUS];
|
||||
#define SMP_IRQ_WORK BIT(ACTION_IRQ_WORK)
|
||||
#define SMP_CLEAR_VECTOR BIT(ACTION_CLEAR_VECTOR)
|
||||
|
||||
struct seq_file;
|
||||
|
||||
struct secondary_data {
|
||||
unsigned long stack;
|
||||
unsigned long thread_info;
|
||||
|
@ -18,16 +18,19 @@
|
||||
|
||||
.align 5
|
||||
SYM_FUNC_START(__arch_cpu_idle)
|
||||
/* start of rollback region */
|
||||
LONG_L t0, tp, TI_FLAGS
|
||||
nop
|
||||
andi t0, t0, _TIF_NEED_RESCHED
|
||||
bnez t0, 1f
|
||||
nop
|
||||
nop
|
||||
nop
|
||||
/* start of idle interrupt region */
|
||||
ori t0, zero, CSR_CRMD_IE
|
||||
/* idle instruction needs irq enabled */
|
||||
csrxchg t0, t0, LOONGARCH_CSR_CRMD
|
||||
/*
|
||||
* If an interrupt lands here; between enabling interrupts above and
|
||||
* going idle on the next instruction, we must *NOT* go idle since the
|
||||
* interrupt could have set TIF_NEED_RESCHED or caused an timer to need
|
||||
* reprogramming. Fall through -- see handle_vint() below -- and have
|
||||
* the idle loop take care of things.
|
||||
*/
|
||||
idle 0
|
||||
/* end of rollback region */
|
||||
/* end of idle interrupt region */
|
||||
1: jr ra
|
||||
SYM_FUNC_END(__arch_cpu_idle)
|
||||
|
||||
@ -35,11 +38,10 @@ SYM_CODE_START(handle_vint)
|
||||
UNWIND_HINT_UNDEFINED
|
||||
BACKUP_T0T1
|
||||
SAVE_ALL
|
||||
la_abs t1, __arch_cpu_idle
|
||||
la_abs t1, 1b
|
||||
LONG_L t0, sp, PT_ERA
|
||||
/* 32 byte rollback region */
|
||||
ori t0, t0, 0x1f
|
||||
xori t0, t0, 0x1f
|
||||
/* 3 instructions idle interrupt region */
|
||||
ori t0, t0, 0b1100
|
||||
bne t0, t1, 1f
|
||||
LONG_S t0, sp, PT_ERA
|
||||
1: move a0, sp
|
||||
|
@ -11,7 +11,6 @@
|
||||
|
||||
void __cpuidle arch_cpu_idle(void)
|
||||
{
|
||||
raw_local_irq_enable();
|
||||
__arch_cpu_idle(); /* idle instruction needs irq enabled */
|
||||
__arch_cpu_idle();
|
||||
raw_local_irq_disable();
|
||||
}
|
||||
|
@ -13,28 +13,12 @@
|
||||
#include <asm/processor.h>
|
||||
#include <asm/time.h>
|
||||
|
||||
/*
|
||||
* No lock; only written during early bootup by CPU 0.
|
||||
*/
|
||||
static RAW_NOTIFIER_HEAD(proc_cpuinfo_chain);
|
||||
|
||||
int __ref register_proc_cpuinfo_notifier(struct notifier_block *nb)
|
||||
{
|
||||
return raw_notifier_chain_register(&proc_cpuinfo_chain, nb);
|
||||
}
|
||||
|
||||
int proc_cpuinfo_notifier_call_chain(unsigned long val, void *v)
|
||||
{
|
||||
return raw_notifier_call_chain(&proc_cpuinfo_chain, val, v);
|
||||
}
|
||||
|
||||
static int show_cpuinfo(struct seq_file *m, void *v)
|
||||
{
|
||||
unsigned long n = (unsigned long) v - 1;
|
||||
unsigned int isa = cpu_data[n].isa_level;
|
||||
unsigned int version = cpu_data[n].processor_id & 0xff;
|
||||
unsigned int fp_version = cpu_data[n].fpu_vers;
|
||||
struct proc_cpuinfo_notifier_args proc_cpuinfo_notifier_args;
|
||||
|
||||
#ifdef CONFIG_SMP
|
||||
if (!cpu_online(n))
|
||||
@ -91,20 +75,13 @@ static int show_cpuinfo(struct seq_file *m, void *v)
|
||||
if (cpu_has_lbt_mips) seq_printf(m, " lbt_mips");
|
||||
seq_printf(m, "\n");
|
||||
|
||||
seq_printf(m, "Hardware Watchpoint\t: %s",
|
||||
cpu_has_watch ? "yes, " : "no\n");
|
||||
seq_printf(m, "Hardware Watchpoint\t: %s", str_yes_no(cpu_has_watch));
|
||||
if (cpu_has_watch) {
|
||||
seq_printf(m, "iwatch count: %d, dwatch count: %d\n",
|
||||
seq_printf(m, ", iwatch count: %d, dwatch count: %d",
|
||||
cpu_data[n].watch_ireg_count, cpu_data[n].watch_dreg_count);
|
||||
}
|
||||
|
||||
proc_cpuinfo_notifier_args.m = m;
|
||||
proc_cpuinfo_notifier_args.n = n;
|
||||
|
||||
raw_notifier_call_chain(&proc_cpuinfo_chain, 0,
|
||||
&proc_cpuinfo_notifier_args);
|
||||
|
||||
seq_printf(m, "\n");
|
||||
seq_printf(m, "\n\n");
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
@ -33,7 +33,7 @@ void machine_halt(void)
|
||||
console_flush_on_panic(CONSOLE_FLUSH_PENDING);
|
||||
|
||||
while (true) {
|
||||
__arch_cpu_idle();
|
||||
__asm__ __volatile__("idle 0" : : : "memory");
|
||||
}
|
||||
}
|
||||
|
||||
@ -53,7 +53,7 @@ void machine_power_off(void)
|
||||
#endif
|
||||
|
||||
while (true) {
|
||||
__arch_cpu_idle();
|
||||
__asm__ __volatile__("idle 0" : : : "memory");
|
||||
}
|
||||
}
|
||||
|
||||
@ -74,6 +74,6 @@ void machine_restart(char *command)
|
||||
acpi_reboot();
|
||||
|
||||
while (true) {
|
||||
__arch_cpu_idle();
|
||||
__asm__ __volatile__("idle 0" : : : "memory");
|
||||
}
|
||||
}
|
||||
|
@ -303,9 +303,9 @@ int kvm_arch_enable_virtualization_cpu(void)
|
||||
* TOE=0: Trap on Exception.
|
||||
* TIT=0: Trap on Timer.
|
||||
*/
|
||||
if (env & CSR_GCFG_GCIP_ALL)
|
||||
if (env & CSR_GCFG_GCIP_SECURE)
|
||||
gcfg |= CSR_GCFG_GCI_SECURE;
|
||||
if (env & CSR_GCFG_MATC_ROOT)
|
||||
if (env & CSR_GCFG_MATP_ROOT)
|
||||
gcfg |= CSR_GCFG_MATC_ROOT;
|
||||
|
||||
write_csr_gcfg(gcfg);
|
||||
|
@ -85,7 +85,7 @@
|
||||
* Guest CRMD comes from separate GCSR_CRMD register
|
||||
*/
|
||||
ori t0, zero, CSR_PRMD_PIE
|
||||
csrxchg t0, t0, LOONGARCH_CSR_PRMD
|
||||
csrwr t0, LOONGARCH_CSR_PRMD
|
||||
|
||||
/* Set PVM bit to setup ertn to guest context */
|
||||
ori t0, zero, CSR_GSTAT_PVM
|
||||
|
@ -1548,9 +1548,6 @@ static int _kvm_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
|
||||
|
||||
/* Restore timer state regardless */
|
||||
kvm_restore_timer(vcpu);
|
||||
|
||||
/* Control guest page CCA attribute */
|
||||
change_csr_gcfg(CSR_GCFG_MATC_MASK, CSR_GCFG_MATC_ROOT);
|
||||
kvm_make_request(KVM_REQ_STEAL_UPDATE, vcpu);
|
||||
|
||||
/* Restore hardware PMU CSRs */
|
||||
|
@ -25,7 +25,7 @@ unsigned int __no_sanitize_address do_csum(const unsigned char *buff, int len)
|
||||
const u64 *ptr;
|
||||
u64 data, sum64 = 0;
|
||||
|
||||
if (unlikely(len == 0))
|
||||
if (unlikely(len <= 0))
|
||||
return 0;
|
||||
|
||||
offset = (unsigned long)buff & 7;
|
||||
|
@ -3,6 +3,7 @@
|
||||
* Copyright (C) 2024 Loongson Technology Corporation Limited
|
||||
*/
|
||||
|
||||
#include <linux/memblock.h>
|
||||
#include <linux/pagewalk.h>
|
||||
#include <linux/pgtable.h>
|
||||
#include <asm/set_memory.h>
|
||||
@ -167,7 +168,7 @@ bool kernel_page_present(struct page *page)
|
||||
unsigned long addr = (unsigned long)page_address(page);
|
||||
|
||||
if (addr < vm_map_base)
|
||||
return true;
|
||||
return memblock_is_memory(__pa(addr));
|
||||
|
||||
pgd = pgd_offset_k(addr);
|
||||
if (pgd_none(pgdp_get(pgd)))
|
||||
|
@ -27,8 +27,8 @@
|
||||
*/
|
||||
struct pt_regs {
|
||||
#ifdef CONFIG_32BIT
|
||||
/* Pad bytes for argument save space on the stack. */
|
||||
unsigned long pad0[8];
|
||||
/* Saved syscall stack arguments; entries 0-3 unused. */
|
||||
unsigned long args[8];
|
||||
#endif
|
||||
|
||||
/* Saved main processor registers. */
|
||||
|
@ -57,37 +57,21 @@ static inline void mips_syscall_update_nr(struct task_struct *task,
|
||||
static inline void mips_get_syscall_arg(unsigned long *arg,
|
||||
struct task_struct *task, struct pt_regs *regs, unsigned int n)
|
||||
{
|
||||
unsigned long usp __maybe_unused = regs->regs[29];
|
||||
|
||||
#ifdef CONFIG_32BIT
|
||||
switch (n) {
|
||||
case 0: case 1: case 2: case 3:
|
||||
*arg = regs->regs[4 + n];
|
||||
|
||||
return;
|
||||
|
||||
#ifdef CONFIG_32BIT
|
||||
case 4: case 5: case 6: case 7:
|
||||
get_user(*arg, (int *)usp + n);
|
||||
*arg = regs->args[n];
|
||||
return;
|
||||
#endif
|
||||
|
||||
#ifdef CONFIG_64BIT
|
||||
case 4: case 5: case 6: case 7:
|
||||
#ifdef CONFIG_MIPS32_O32
|
||||
if (test_tsk_thread_flag(task, TIF_32BIT_REGS))
|
||||
get_user(*arg, (int *)usp + n);
|
||||
else
|
||||
#endif
|
||||
*arg = regs->regs[4 + n];
|
||||
|
||||
return;
|
||||
#endif
|
||||
|
||||
default:
|
||||
BUG();
|
||||
}
|
||||
|
||||
unreachable();
|
||||
#else
|
||||
*arg = regs->regs[4 + n];
|
||||
if ((IS_ENABLED(CONFIG_MIPS32_O32) &&
|
||||
test_tsk_thread_flag(task, TIF_32BIT_REGS)))
|
||||
*arg = (unsigned int)*arg;
|
||||
#endif
|
||||
}
|
||||
|
||||
static inline long syscall_get_error(struct task_struct *task,
|
||||
|
@ -27,6 +27,12 @@ void output_ptreg_defines(void);
|
||||
void output_ptreg_defines(void)
|
||||
{
|
||||
COMMENT("MIPS pt_regs offsets.");
|
||||
#ifdef CONFIG_32BIT
|
||||
OFFSET(PT_ARG4, pt_regs, args[4]);
|
||||
OFFSET(PT_ARG5, pt_regs, args[5]);
|
||||
OFFSET(PT_ARG6, pt_regs, args[6]);
|
||||
OFFSET(PT_ARG7, pt_regs, args[7]);
|
||||
#endif
|
||||
OFFSET(PT_R0, pt_regs, regs[0]);
|
||||
OFFSET(PT_R1, pt_regs, regs[1]);
|
||||
OFFSET(PT_R2, pt_regs, regs[2]);
|
||||
|
@ -64,10 +64,10 @@ load_a6: user_lw(t7, 24(t0)) # argument #7 from usp
|
||||
load_a7: user_lw(t8, 28(t0)) # argument #8 from usp
|
||||
loads_done:
|
||||
|
||||
sw t5, 16(sp) # argument #5 to ksp
|
||||
sw t6, 20(sp) # argument #6 to ksp
|
||||
sw t7, 24(sp) # argument #7 to ksp
|
||||
sw t8, 28(sp) # argument #8 to ksp
|
||||
sw t5, PT_ARG4(sp) # argument #5 to ksp
|
||||
sw t6, PT_ARG5(sp) # argument #6 to ksp
|
||||
sw t7, PT_ARG6(sp) # argument #7 to ksp
|
||||
sw t8, PT_ARG7(sp) # argument #8 to ksp
|
||||
.set pop
|
||||
|
||||
.section __ex_table,"a"
|
||||
|
@ -75,7 +75,7 @@ static void fsl_msi_print_chip(struct irq_data *irqd, struct seq_file *p)
|
||||
srs = (hwirq >> msi_data->srs_shift) & MSI_SRS_MASK;
|
||||
cascade_virq = msi_data->cascade_array[srs]->virq;
|
||||
|
||||
seq_printf(p, " fsl-msi-%d", cascade_virq);
|
||||
seq_printf(p, "fsl-msi-%d", cascade_virq);
|
||||
}
|
||||
|
||||
|
||||
|
@ -740,7 +740,6 @@ CONFIG_IMA=y
|
||||
CONFIG_IMA_DEFAULT_HASH_SHA256=y
|
||||
CONFIG_IMA_WRITE_POLICY=y
|
||||
CONFIG_IMA_APPRAISE=y
|
||||
CONFIG_LSM="yama,loadpin,safesetid,integrity,selinux,smack,tomoyo,apparmor"
|
||||
CONFIG_BUG_ON_DATA_CORRUPTION=y
|
||||
CONFIG_CRYPTO_USER=m
|
||||
# CONFIG_CRYPTO_MANAGER_DISABLE_TESTS is not set
|
||||
|
@ -725,7 +725,6 @@ CONFIG_IMA=y
|
||||
CONFIG_IMA_DEFAULT_HASH_SHA256=y
|
||||
CONFIG_IMA_WRITE_POLICY=y
|
||||
CONFIG_IMA_APPRAISE=y
|
||||
CONFIG_LSM="yama,loadpin,safesetid,integrity,selinux,smack,tomoyo,apparmor"
|
||||
CONFIG_BUG_ON_DATA_CORRUPTION=y
|
||||
CONFIG_CRYPTO_FIPS=y
|
||||
CONFIG_CRYPTO_USER=m
|
||||
|
@ -62,7 +62,6 @@ CONFIG_ZFCP=y
|
||||
# CONFIG_INOTIFY_USER is not set
|
||||
# CONFIG_MISC_FILESYSTEMS is not set
|
||||
# CONFIG_NETWORK_FILESYSTEMS is not set
|
||||
CONFIG_LSM="yama,loadpin,safesetid,integrity"
|
||||
# CONFIG_ZLIB_DFLTCC is not set
|
||||
CONFIG_XZ_DEC_MICROLZMA=y
|
||||
CONFIG_PRINTK_TIME=y
|
||||
|
@ -53,7 +53,11 @@ static __always_inline bool arch_test_bit(unsigned long nr, const volatile unsig
|
||||
unsigned long mask;
|
||||
int cc;
|
||||
|
||||
if (__builtin_constant_p(nr)) {
|
||||
/*
|
||||
* With CONFIG_PROFILE_ALL_BRANCHES enabled gcc fails to
|
||||
* handle __builtin_constant_p() in some cases.
|
||||
*/
|
||||
if (!IS_ENABLED(CONFIG_PROFILE_ALL_BRANCHES) && __builtin_constant_p(nr)) {
|
||||
addr = (const volatile unsigned char *)ptr;
|
||||
addr += (nr ^ (BITS_PER_LONG - BITS_PER_BYTE)) / BITS_PER_BYTE;
|
||||
mask = 1UL << (nr & (BITS_PER_BYTE - 1));
|
||||
|
@ -23,7 +23,6 @@
|
||||
/**
|
||||
* struct gmap_struct - guest address space
|
||||
* @list: list head for the mm->context gmap list
|
||||
* @crst_list: list of all crst tables used in the guest address space
|
||||
* @mm: pointer to the parent mm_struct
|
||||
* @guest_to_host: radix tree with guest to host address translation
|
||||
* @host_to_guest: radix tree with pointer to segment table entries
|
||||
@ -35,7 +34,6 @@
|
||||
* @guest_handle: protected virtual machine handle for the ultravisor
|
||||
* @host_to_rmap: radix tree with gmap_rmap lists
|
||||
* @children: list of shadow gmap structures
|
||||
* @pt_list: list of all page tables used in the shadow guest address space
|
||||
* @shadow_lock: spinlock to protect the shadow gmap list
|
||||
* @parent: pointer to the parent gmap for shadow guest address spaces
|
||||
* @orig_asce: ASCE for which the shadow page table has been created
|
||||
@ -45,7 +43,6 @@
|
||||
*/
|
||||
struct gmap {
|
||||
struct list_head list;
|
||||
struct list_head crst_list;
|
||||
struct mm_struct *mm;
|
||||
struct radix_tree_root guest_to_host;
|
||||
struct radix_tree_root host_to_guest;
|
||||
@ -61,7 +58,6 @@ struct gmap {
|
||||
/* Additional data for shadow guest address spaces */
|
||||
struct radix_tree_root host_to_rmap;
|
||||
struct list_head children;
|
||||
struct list_head pt_list;
|
||||
spinlock_t shadow_lock;
|
||||
struct gmap *parent;
|
||||
unsigned long orig_asce;
|
||||
@ -106,23 +102,21 @@ struct gmap *gmap_create(struct mm_struct *mm, unsigned long limit);
|
||||
void gmap_remove(struct gmap *gmap);
|
||||
struct gmap *gmap_get(struct gmap *gmap);
|
||||
void gmap_put(struct gmap *gmap);
|
||||
void gmap_free(struct gmap *gmap);
|
||||
struct gmap *gmap_alloc(unsigned long limit);
|
||||
|
||||
int gmap_map_segment(struct gmap *gmap, unsigned long from,
|
||||
unsigned long to, unsigned long len);
|
||||
int gmap_unmap_segment(struct gmap *gmap, unsigned long to, unsigned long len);
|
||||
unsigned long __gmap_translate(struct gmap *, unsigned long gaddr);
|
||||
unsigned long gmap_translate(struct gmap *, unsigned long gaddr);
|
||||
int __gmap_link(struct gmap *gmap, unsigned long gaddr, unsigned long vmaddr);
|
||||
int gmap_fault(struct gmap *, unsigned long gaddr, unsigned int fault_flags);
|
||||
void gmap_discard(struct gmap *, unsigned long from, unsigned long to);
|
||||
void __gmap_zap(struct gmap *, unsigned long gaddr);
|
||||
void gmap_unlink(struct mm_struct *, unsigned long *table, unsigned long vmaddr);
|
||||
|
||||
int gmap_read_table(struct gmap *gmap, unsigned long gaddr, unsigned long *val);
|
||||
|
||||
struct gmap *gmap_shadow(struct gmap *parent, unsigned long asce,
|
||||
int edat_level);
|
||||
int gmap_shadow_valid(struct gmap *sg, unsigned long asce, int edat_level);
|
||||
void gmap_unshadow(struct gmap *sg);
|
||||
int gmap_shadow_r2t(struct gmap *sg, unsigned long saddr, unsigned long r2t,
|
||||
int fake);
|
||||
int gmap_shadow_r3t(struct gmap *sg, unsigned long saddr, unsigned long r3t,
|
||||
@ -131,24 +125,22 @@ int gmap_shadow_sgt(struct gmap *sg, unsigned long saddr, unsigned long sgt,
|
||||
int fake);
|
||||
int gmap_shadow_pgt(struct gmap *sg, unsigned long saddr, unsigned long pgt,
|
||||
int fake);
|
||||
int gmap_shadow_pgt_lookup(struct gmap *sg, unsigned long saddr,
|
||||
unsigned long *pgt, int *dat_protection, int *fake);
|
||||
int gmap_shadow_page(struct gmap *sg, unsigned long saddr, pte_t pte);
|
||||
|
||||
void gmap_register_pte_notifier(struct gmap_notifier *);
|
||||
void gmap_unregister_pte_notifier(struct gmap_notifier *);
|
||||
|
||||
int gmap_mprotect_notify(struct gmap *, unsigned long start,
|
||||
unsigned long len, int prot);
|
||||
int gmap_protect_one(struct gmap *gmap, unsigned long gaddr, int prot, unsigned long bits);
|
||||
|
||||
void gmap_sync_dirty_log_pmd(struct gmap *gmap, unsigned long dirty_bitmap[4],
|
||||
unsigned long gaddr, unsigned long vmaddr);
|
||||
int s390_disable_cow_sharing(void);
|
||||
void s390_unlist_old_asce(struct gmap *gmap);
|
||||
int s390_replace_asce(struct gmap *gmap);
|
||||
void s390_uv_destroy_pfns(unsigned long count, unsigned long *pfns);
|
||||
int __s390_uv_destroy_range(struct mm_struct *mm, unsigned long start,
|
||||
unsigned long end, bool interruptible);
|
||||
int kvm_s390_wiggle_split_folio(struct mm_struct *mm, struct folio *folio, bool split);
|
||||
unsigned long *gmap_table_walk(struct gmap *gmap, unsigned long gaddr, int level);
|
||||
|
||||
/**
|
||||
* s390_uv_destroy_range - Destroy a range of pages in the given mm.
|
||||
|
@ -30,6 +30,8 @@
|
||||
#define KVM_S390_ESCA_CPU_SLOTS 248
|
||||
#define KVM_MAX_VCPUS 255
|
||||
|
||||
#define KVM_INTERNAL_MEM_SLOTS 1
|
||||
|
||||
/*
|
||||
* These seem to be used for allocating ->chip in the routing table, which we
|
||||
* don't use. 1 is as small as we can get to reduce the needed memory. If we
|
||||
@ -931,12 +933,14 @@ struct sie_page2 {
|
||||
u8 reserved928[0x1000 - 0x928]; /* 0x0928 */
|
||||
};
|
||||
|
||||
struct vsie_page;
|
||||
|
||||
struct kvm_s390_vsie {
|
||||
struct mutex mutex;
|
||||
struct radix_tree_root addr_to_page;
|
||||
int page_count;
|
||||
int next;
|
||||
struct page *pages[KVM_MAX_VCPUS];
|
||||
struct vsie_page *pages[KVM_MAX_VCPUS];
|
||||
};
|
||||
|
||||
struct kvm_s390_gisa_iam {
|
||||
|
@ -420,9 +420,10 @@ void setup_protection_map(void);
|
||||
#define PGSTE_HC_BIT 0x0020000000000000UL
|
||||
#define PGSTE_GR_BIT 0x0004000000000000UL
|
||||
#define PGSTE_GC_BIT 0x0002000000000000UL
|
||||
#define PGSTE_UC_BIT 0x0000800000000000UL /* user dirty (migration) */
|
||||
#define PGSTE_IN_BIT 0x0000400000000000UL /* IPTE notify bit */
|
||||
#define PGSTE_VSIE_BIT 0x0000200000000000UL /* ref'd in a shadow table */
|
||||
#define PGSTE_ST2_MASK 0x0000ffff00000000UL
|
||||
#define PGSTE_UC_BIT 0x0000000000008000UL /* user dirty (migration) */
|
||||
#define PGSTE_IN_BIT 0x0000000000004000UL /* IPTE notify bit */
|
||||
#define PGSTE_VSIE_BIT 0x0000000000002000UL /* ref'd in a shadow table */
|
||||
|
||||
/* Guest Page State used for virtualization */
|
||||
#define _PGSTE_GPS_ZERO 0x0000000080000000UL
|
||||
@ -2007,4 +2008,18 @@ extern void s390_reset_cmma(struct mm_struct *mm);
|
||||
#define pmd_pgtable(pmd) \
|
||||
((pgtable_t)__va(pmd_val(pmd) & -sizeof(pte_t)*PTRS_PER_PTE))
|
||||
|
||||
static inline unsigned long gmap_pgste_get_pgt_addr(unsigned long *pgt)
|
||||
{
|
||||
unsigned long *pgstes, res;
|
||||
|
||||
pgstes = pgt + _PAGE_ENTRIES;
|
||||
|
||||
res = (pgstes[0] & PGSTE_ST2_MASK) << 16;
|
||||
res |= pgstes[1] & PGSTE_ST2_MASK;
|
||||
res |= (pgstes[2] & PGSTE_ST2_MASK) >> 16;
|
||||
res |= (pgstes[3] & PGSTE_ST2_MASK) >> 32;
|
||||
|
||||
return res;
|
||||
}
|
||||
|
||||
#endif /* _S390_PAGE_H */
|
||||
|
@ -628,12 +628,12 @@ static inline int is_prot_virt_host(void)
|
||||
}
|
||||
|
||||
int uv_pin_shared(unsigned long paddr);
|
||||
int gmap_make_secure(struct gmap *gmap, unsigned long gaddr, void *uvcb);
|
||||
int gmap_destroy_page(struct gmap *gmap, unsigned long gaddr);
|
||||
int uv_destroy_folio(struct folio *folio);
|
||||
int uv_destroy_pte(pte_t pte);
|
||||
int uv_convert_from_secure_pte(pte_t pte);
|
||||
int gmap_convert_to_secure(struct gmap *gmap, unsigned long gaddr);
|
||||
int make_folio_secure(struct folio *folio, struct uv_cb_header *uvcb);
|
||||
int uv_convert_from_secure(unsigned long paddr);
|
||||
int uv_convert_from_secure_folio(struct folio *folio);
|
||||
|
||||
void setup_uv(void);
|
||||
|
||||
|
@ -19,19 +19,6 @@
|
||||
#include <asm/sections.h>
|
||||
#include <asm/uv.h>
|
||||
|
||||
#if !IS_ENABLED(CONFIG_KVM)
|
||||
unsigned long __gmap_translate(struct gmap *gmap, unsigned long gaddr)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
int gmap_fault(struct gmap *gmap, unsigned long gaddr,
|
||||
unsigned int fault_flags)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
#endif
|
||||
|
||||
/* the bootdata_preserved fields come from ones in arch/s390/boot/uv.c */
|
||||
int __bootdata_preserved(prot_virt_guest);
|
||||
EXPORT_SYMBOL(prot_virt_guest);
|
||||
@ -159,6 +146,7 @@ int uv_destroy_folio(struct folio *folio)
|
||||
folio_put(folio);
|
||||
return rc;
|
||||
}
|
||||
EXPORT_SYMBOL(uv_destroy_folio);
|
||||
|
||||
/*
|
||||
* The present PTE still indirectly holds a folio reference through the mapping.
|
||||
@ -175,7 +163,7 @@ int uv_destroy_pte(pte_t pte)
|
||||
*
|
||||
* @paddr: Absolute host address of page to be exported
|
||||
*/
|
||||
static int uv_convert_from_secure(unsigned long paddr)
|
||||
int uv_convert_from_secure(unsigned long paddr)
|
||||
{
|
||||
struct uv_cb_cfs uvcb = {
|
||||
.header.cmd = UVC_CMD_CONV_FROM_SEC_STOR,
|
||||
@ -187,11 +175,12 @@ static int uv_convert_from_secure(unsigned long paddr)
|
||||
return -EINVAL;
|
||||
return 0;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(uv_convert_from_secure);
|
||||
|
||||
/*
|
||||
* The caller must already hold a reference to the folio.
|
||||
*/
|
||||
static int uv_convert_from_secure_folio(struct folio *folio)
|
||||
int uv_convert_from_secure_folio(struct folio *folio)
|
||||
{
|
||||
int rc;
|
||||
|
||||
@ -206,6 +195,7 @@ static int uv_convert_from_secure_folio(struct folio *folio)
|
||||
folio_put(folio);
|
||||
return rc;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(uv_convert_from_secure_folio);
|
||||
|
||||
/*
|
||||
* The present PTE still indirectly holds a folio reference through the mapping.
|
||||
@ -237,13 +227,33 @@ static int expected_folio_refs(struct folio *folio)
|
||||
return res;
|
||||
}
|
||||
|
||||
static int make_folio_secure(struct folio *folio, struct uv_cb_header *uvcb)
|
||||
/**
|
||||
* make_folio_secure() - make a folio secure
|
||||
* @folio: the folio to make secure
|
||||
* @uvcb: the uvcb that describes the UVC to be used
|
||||
*
|
||||
* The folio @folio will be made secure if possible, @uvcb will be passed
|
||||
* as-is to the UVC.
|
||||
*
|
||||
* Return: 0 on success;
|
||||
* -EBUSY if the folio is in writeback or has too many references;
|
||||
* -E2BIG if the folio is large;
|
||||
* -EAGAIN if the UVC needs to be attempted again;
|
||||
* -ENXIO if the address is not mapped;
|
||||
* -EINVAL if the UVC failed for other reasons.
|
||||
*
|
||||
* Context: The caller must hold exactly one extra reference on the folio
|
||||
* (it's the same logic as split_folio())
|
||||
*/
|
||||
int make_folio_secure(struct folio *folio, struct uv_cb_header *uvcb)
|
||||
{
|
||||
int expected, cc = 0;
|
||||
|
||||
if (folio_test_large(folio))
|
||||
return -E2BIG;
|
||||
if (folio_test_writeback(folio))
|
||||
return -EAGAIN;
|
||||
expected = expected_folio_refs(folio);
|
||||
return -EBUSY;
|
||||
expected = expected_folio_refs(folio) + 1;
|
||||
if (!folio_ref_freeze(folio, expected))
|
||||
return -EBUSY;
|
||||
set_bit(PG_arch_1, &folio->flags);
|
||||
@ -267,251 +277,7 @@ static int make_folio_secure(struct folio *folio, struct uv_cb_header *uvcb)
|
||||
return -EAGAIN;
|
||||
return uvcb->rc == 0x10a ? -ENXIO : -EINVAL;
|
||||
}
|
||||
|
||||
/**
|
||||
* should_export_before_import - Determine whether an export is needed
|
||||
* before an import-like operation
|
||||
* @uvcb: the Ultravisor control block of the UVC to be performed
|
||||
* @mm: the mm of the process
|
||||
*
|
||||
* Returns whether an export is needed before every import-like operation.
|
||||
* This is needed for shared pages, which don't trigger a secure storage
|
||||
* exception when accessed from a different guest.
|
||||
*
|
||||
* Although considered as one, the Unpin Page UVC is not an actual import,
|
||||
* so it is not affected.
|
||||
*
|
||||
* No export is needed also when there is only one protected VM, because the
|
||||
* page cannot belong to the wrong VM in that case (there is no "other VM"
|
||||
* it can belong to).
|
||||
*
|
||||
* Return: true if an export is needed before every import, otherwise false.
|
||||
*/
|
||||
static bool should_export_before_import(struct uv_cb_header *uvcb, struct mm_struct *mm)
|
||||
{
|
||||
/*
|
||||
* The misc feature indicates, among other things, that importing a
|
||||
* shared page from a different protected VM will automatically also
|
||||
* transfer its ownership.
|
||||
*/
|
||||
if (uv_has_feature(BIT_UV_FEAT_MISC))
|
||||
return false;
|
||||
if (uvcb->cmd == UVC_CMD_UNPIN_PAGE_SHARED)
|
||||
return false;
|
||||
return atomic_read(&mm->context.protected_count) > 1;
|
||||
}
|
||||
|
||||
/*
|
||||
* Drain LRU caches: the local one on first invocation and the ones of all
|
||||
* CPUs on successive invocations. Returns "true" on the first invocation.
|
||||
*/
|
||||
static bool drain_lru(bool *drain_lru_called)
|
||||
{
|
||||
/*
|
||||
* If we have tried a local drain and the folio refcount
|
||||
* still does not match our expected safe value, try with a
|
||||
* system wide drain. This is needed if the pagevecs holding
|
||||
* the page are on a different CPU.
|
||||
*/
|
||||
if (*drain_lru_called) {
|
||||
lru_add_drain_all();
|
||||
/* We give up here, don't retry immediately. */
|
||||
return false;
|
||||
}
|
||||
/*
|
||||
* We are here if the folio refcount does not match the
|
||||
* expected safe value. The main culprits are usually
|
||||
* pagevecs. With lru_add_drain() we drain the pagevecs
|
||||
* on the local CPU so that hopefully the refcount will
|
||||
* reach the expected safe value.
|
||||
*/
|
||||
lru_add_drain();
|
||||
*drain_lru_called = true;
|
||||
/* The caller should try again immediately */
|
||||
return true;
|
||||
}
|
||||
|
||||
/*
|
||||
* Requests the Ultravisor to make a page accessible to a guest.
|
||||
* If it's brought in the first time, it will be cleared. If
|
||||
* it has been exported before, it will be decrypted and integrity
|
||||
* checked.
|
||||
*/
|
||||
int gmap_make_secure(struct gmap *gmap, unsigned long gaddr, void *uvcb)
|
||||
{
|
||||
struct vm_area_struct *vma;
|
||||
bool drain_lru_called = false;
|
||||
spinlock_t *ptelock;
|
||||
unsigned long uaddr;
|
||||
struct folio *folio;
|
||||
pte_t *ptep;
|
||||
int rc;
|
||||
|
||||
again:
|
||||
rc = -EFAULT;
|
||||
mmap_read_lock(gmap->mm);
|
||||
|
||||
uaddr = __gmap_translate(gmap, gaddr);
|
||||
if (IS_ERR_VALUE(uaddr))
|
||||
goto out;
|
||||
vma = vma_lookup(gmap->mm, uaddr);
|
||||
if (!vma)
|
||||
goto out;
|
||||
/*
|
||||
* Secure pages cannot be huge and userspace should not combine both.
|
||||
* In case userspace does it anyway this will result in an -EFAULT for
|
||||
* the unpack. The guest is thus never reaching secure mode. If
|
||||
* userspace is playing dirty tricky with mapping huge pages later
|
||||
* on this will result in a segmentation fault.
|
||||
*/
|
||||
if (is_vm_hugetlb_page(vma))
|
||||
goto out;
|
||||
|
||||
rc = -ENXIO;
|
||||
ptep = get_locked_pte(gmap->mm, uaddr, &ptelock);
|
||||
if (!ptep)
|
||||
goto out;
|
||||
if (pte_present(*ptep) && !(pte_val(*ptep) & _PAGE_INVALID) && pte_write(*ptep)) {
|
||||
folio = page_folio(pte_page(*ptep));
|
||||
rc = -EAGAIN;
|
||||
if (folio_test_large(folio)) {
|
||||
rc = -E2BIG;
|
||||
} else if (folio_trylock(folio)) {
|
||||
if (should_export_before_import(uvcb, gmap->mm))
|
||||
uv_convert_from_secure(PFN_PHYS(folio_pfn(folio)));
|
||||
rc = make_folio_secure(folio, uvcb);
|
||||
folio_unlock(folio);
|
||||
}
|
||||
|
||||
/*
|
||||
* Once we drop the PTL, the folio may get unmapped and
|
||||
* freed immediately. We need a temporary reference.
|
||||
*/
|
||||
if (rc == -EAGAIN || rc == -E2BIG)
|
||||
folio_get(folio);
|
||||
}
|
||||
pte_unmap_unlock(ptep, ptelock);
|
||||
out:
|
||||
mmap_read_unlock(gmap->mm);
|
||||
|
||||
switch (rc) {
|
||||
case -E2BIG:
|
||||
folio_lock(folio);
|
||||
rc = split_folio(folio);
|
||||
folio_unlock(folio);
|
||||
folio_put(folio);
|
||||
|
||||
switch (rc) {
|
||||
case 0:
|
||||
/* Splitting succeeded, try again immediately. */
|
||||
goto again;
|
||||
case -EAGAIN:
|
||||
/* Additional folio references. */
|
||||
if (drain_lru(&drain_lru_called))
|
||||
goto again;
|
||||
return -EAGAIN;
|
||||
case -EBUSY:
|
||||
/* Unexpected race. */
|
||||
return -EAGAIN;
|
||||
}
|
||||
WARN_ON_ONCE(1);
|
||||
return -ENXIO;
|
||||
case -EAGAIN:
|
||||
/*
|
||||
* If we are here because the UVC returned busy or partial
|
||||
* completion, this is just a useless check, but it is safe.
|
||||
*/
|
||||
folio_wait_writeback(folio);
|
||||
folio_put(folio);
|
||||
return -EAGAIN;
|
||||
case -EBUSY:
|
||||
/* Additional folio references. */
|
||||
if (drain_lru(&drain_lru_called))
|
||||
goto again;
|
||||
return -EAGAIN;
|
||||
case -ENXIO:
|
||||
if (gmap_fault(gmap, gaddr, FAULT_FLAG_WRITE))
|
||||
return -EFAULT;
|
||||
return -EAGAIN;
|
||||
}
|
||||
return rc;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(gmap_make_secure);
|
||||
|
||||
int gmap_convert_to_secure(struct gmap *gmap, unsigned long gaddr)
|
||||
{
|
||||
struct uv_cb_cts uvcb = {
|
||||
.header.cmd = UVC_CMD_CONV_TO_SEC_STOR,
|
||||
.header.len = sizeof(uvcb),
|
||||
.guest_handle = gmap->guest_handle,
|
||||
.gaddr = gaddr,
|
||||
};
|
||||
|
||||
return gmap_make_secure(gmap, gaddr, &uvcb);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(gmap_convert_to_secure);
|
||||
|
||||
/**
|
||||
* gmap_destroy_page - Destroy a guest page.
|
||||
* @gmap: the gmap of the guest
|
||||
* @gaddr: the guest address to destroy
|
||||
*
|
||||
* An attempt will be made to destroy the given guest page. If the attempt
|
||||
* fails, an attempt is made to export the page. If both attempts fail, an
|
||||
* appropriate error is returned.
|
||||
*/
|
||||
int gmap_destroy_page(struct gmap *gmap, unsigned long gaddr)
|
||||
{
|
||||
struct vm_area_struct *vma;
|
||||
struct folio_walk fw;
|
||||
unsigned long uaddr;
|
||||
struct folio *folio;
|
||||
int rc;
|
||||
|
||||
rc = -EFAULT;
|
||||
mmap_read_lock(gmap->mm);
|
||||
|
||||
uaddr = __gmap_translate(gmap, gaddr);
|
||||
if (IS_ERR_VALUE(uaddr))
|
||||
goto out;
|
||||
vma = vma_lookup(gmap->mm, uaddr);
|
||||
if (!vma)
|
||||
goto out;
|
||||
/*
|
||||
* Huge pages should not be able to become secure
|
||||
*/
|
||||
if (is_vm_hugetlb_page(vma))
|
||||
goto out;
|
||||
|
||||
rc = 0;
|
||||
folio = folio_walk_start(&fw, vma, uaddr, 0);
|
||||
if (!folio)
|
||||
goto out;
|
||||
/*
|
||||
* See gmap_make_secure(): large folios cannot be secure. Small
|
||||
* folio implies FW_LEVEL_PTE.
|
||||
*/
|
||||
if (folio_test_large(folio) || !pte_write(fw.pte))
|
||||
goto out_walk_end;
|
||||
rc = uv_destroy_folio(folio);
|
||||
/*
|
||||
* Fault handlers can race; it is possible that two CPUs will fault
|
||||
* on the same secure page. One CPU can destroy the page, reboot,
|
||||
* re-enter secure mode and import it, while the second CPU was
|
||||
* stuck at the beginning of the handler. At some point the second
|
||||
* CPU will be able to progress, and it will not be able to destroy
|
||||
* the page. In that case we do not want to terminate the process,
|
||||
* we instead try to export the page.
|
||||
*/
|
||||
if (rc)
|
||||
rc = uv_convert_from_secure_folio(folio);
|
||||
out_walk_end:
|
||||
folio_walk_end(&fw, vma);
|
||||
out:
|
||||
mmap_read_unlock(gmap->mm);
|
||||
return rc;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(gmap_destroy_page);
|
||||
EXPORT_SYMBOL_GPL(make_folio_secure);
|
||||
|
||||
/*
|
||||
* To be called with the folio locked or with an extra reference! This will
|
||||
|
@ -8,7 +8,7 @@ include $(srctree)/virt/kvm/Makefile.kvm
|
||||
ccflags-y := -Ivirt/kvm -Iarch/s390/kvm
|
||||
|
||||
kvm-y += kvm-s390.o intercept.o interrupt.o priv.o sigp.o
|
||||
kvm-y += diag.o gaccess.o guestdbg.o vsie.o pv.o
|
||||
kvm-y += diag.o gaccess.o guestdbg.o vsie.o pv.o gmap.o gmap-vsie.o
|
||||
|
||||
kvm-$(CONFIG_VFIO_PCI_ZDEV_KVM) += pci.o
|
||||
obj-$(CONFIG_KVM) += kvm.o
|
||||
|
@ -16,6 +16,7 @@
|
||||
#include <asm/gmap.h>
|
||||
#include <asm/dat-bits.h>
|
||||
#include "kvm-s390.h"
|
||||
#include "gmap.h"
|
||||
#include "gaccess.h"
|
||||
|
||||
/*
|
||||
@ -1392,6 +1393,44 @@ shadow_pgt:
|
||||
return 0;
|
||||
}
|
||||
|
||||
/**
|
||||
* shadow_pgt_lookup() - find a shadow page table
|
||||
* @sg: pointer to the shadow guest address space structure
|
||||
* @saddr: the address in the shadow aguest address space
|
||||
* @pgt: parent gmap address of the page table to get shadowed
|
||||
* @dat_protection: if the pgtable is marked as protected by dat
|
||||
* @fake: pgt references contiguous guest memory block, not a pgtable
|
||||
*
|
||||
* Returns 0 if the shadow page table was found and -EAGAIN if the page
|
||||
* table was not found.
|
||||
*
|
||||
* Called with sg->mm->mmap_lock in read.
|
||||
*/
|
||||
static int shadow_pgt_lookup(struct gmap *sg, unsigned long saddr, unsigned long *pgt,
|
||||
int *dat_protection, int *fake)
|
||||
{
|
||||
unsigned long pt_index;
|
||||
unsigned long *table;
|
||||
struct page *page;
|
||||
int rc;
|
||||
|
||||
spin_lock(&sg->guest_table_lock);
|
||||
table = gmap_table_walk(sg, saddr, 1); /* get segment pointer */
|
||||
if (table && !(*table & _SEGMENT_ENTRY_INVALID)) {
|
||||
/* Shadow page tables are full pages (pte+pgste) */
|
||||
page = pfn_to_page(*table >> PAGE_SHIFT);
|
||||
pt_index = gmap_pgste_get_pgt_addr(page_to_virt(page));
|
||||
*pgt = pt_index & ~GMAP_SHADOW_FAKE_TABLE;
|
||||
*dat_protection = !!(*table & _SEGMENT_ENTRY_PROTECT);
|
||||
*fake = !!(pt_index & GMAP_SHADOW_FAKE_TABLE);
|
||||
rc = 0;
|
||||
} else {
|
||||
rc = -EAGAIN;
|
||||
}
|
||||
spin_unlock(&sg->guest_table_lock);
|
||||
return rc;
|
||||
}
|
||||
|
||||
/**
|
||||
* kvm_s390_shadow_fault - handle fault on a shadow page table
|
||||
* @vcpu: virtual cpu
|
||||
@ -1415,6 +1454,9 @@ int kvm_s390_shadow_fault(struct kvm_vcpu *vcpu, struct gmap *sg,
|
||||
int dat_protection, fake;
|
||||
int rc;
|
||||
|
||||
if (KVM_BUG_ON(!gmap_is_shadow(sg), vcpu->kvm))
|
||||
return -EFAULT;
|
||||
|
||||
mmap_read_lock(sg->mm);
|
||||
/*
|
||||
* We don't want any guest-2 tables to change - so the parent
|
||||
@ -1423,7 +1465,7 @@ int kvm_s390_shadow_fault(struct kvm_vcpu *vcpu, struct gmap *sg,
|
||||
*/
|
||||
ipte_lock(vcpu->kvm);
|
||||
|
||||
rc = gmap_shadow_pgt_lookup(sg, saddr, &pgt, &dat_protection, &fake);
|
||||
rc = shadow_pgt_lookup(sg, saddr, &pgt, &dat_protection, &fake);
|
||||
if (rc)
|
||||
rc = kvm_s390_shadow_tables(sg, saddr, &pgt, &dat_protection,
|
||||
&fake);
|
||||
|
142
arch/s390/kvm/gmap-vsie.c
Normal file
142
arch/s390/kvm/gmap-vsie.c
Normal file
@ -0,0 +1,142 @@
|
||||
// SPDX-License-Identifier: GPL-2.0
|
||||
/*
|
||||
* Guest memory management for KVM/s390 nested VMs.
|
||||
*
|
||||
* Copyright IBM Corp. 2008, 2020, 2024
|
||||
*
|
||||
* Author(s): Claudio Imbrenda <imbrenda@linux.ibm.com>
|
||||
* Martin Schwidefsky <schwidefsky@de.ibm.com>
|
||||
* David Hildenbrand <david@redhat.com>
|
||||
* Janosch Frank <frankja@linux.vnet.ibm.com>
|
||||
*/
|
||||
|
||||
#include <linux/compiler.h>
|
||||
#include <linux/kvm.h>
|
||||
#include <linux/kvm_host.h>
|
||||
#include <linux/pgtable.h>
|
||||
#include <linux/pagemap.h>
|
||||
#include <linux/mman.h>
|
||||
|
||||
#include <asm/lowcore.h>
|
||||
#include <asm/gmap.h>
|
||||
#include <asm/uv.h>
|
||||
|
||||
#include "kvm-s390.h"
|
||||
#include "gmap.h"
|
||||
|
||||
/**
|
||||
* gmap_find_shadow - find a specific asce in the list of shadow tables
|
||||
* @parent: pointer to the parent gmap
|
||||
* @asce: ASCE for which the shadow table is created
|
||||
* @edat_level: edat level to be used for the shadow translation
|
||||
*
|
||||
* Returns the pointer to a gmap if a shadow table with the given asce is
|
||||
* already available, ERR_PTR(-EAGAIN) if another one is just being created,
|
||||
* otherwise NULL
|
||||
*
|
||||
* Context: Called with parent->shadow_lock held
|
||||
*/
|
||||
static struct gmap *gmap_find_shadow(struct gmap *parent, unsigned long asce, int edat_level)
|
||||
{
|
||||
struct gmap *sg;
|
||||
|
||||
lockdep_assert_held(&parent->shadow_lock);
|
||||
list_for_each_entry(sg, &parent->children, list) {
|
||||
if (!gmap_shadow_valid(sg, asce, edat_level))
|
||||
continue;
|
||||
if (!sg->initialized)
|
||||
return ERR_PTR(-EAGAIN);
|
||||
refcount_inc(&sg->ref_count);
|
||||
return sg;
|
||||
}
|
||||
return NULL;
|
||||
}
|
||||
|
||||
/**
|
||||
* gmap_shadow - create/find a shadow guest address space
|
||||
* @parent: pointer to the parent gmap
|
||||
* @asce: ASCE for which the shadow table is created
|
||||
* @edat_level: edat level to be used for the shadow translation
|
||||
*
|
||||
* The pages of the top level page table referred by the asce parameter
|
||||
* will be set to read-only and marked in the PGSTEs of the kvm process.
|
||||
* The shadow table will be removed automatically on any change to the
|
||||
* PTE mapping for the source table.
|
||||
*
|
||||
* Returns a guest address space structure, ERR_PTR(-ENOMEM) if out of memory,
|
||||
* ERR_PTR(-EAGAIN) if the caller has to retry and ERR_PTR(-EFAULT) if the
|
||||
* parent gmap table could not be protected.
|
||||
*/
|
||||
struct gmap *gmap_shadow(struct gmap *parent, unsigned long asce, int edat_level)
|
||||
{
|
||||
struct gmap *sg, *new;
|
||||
unsigned long limit;
|
||||
int rc;
|
||||
|
||||
if (KVM_BUG_ON(parent->mm->context.allow_gmap_hpage_1m, (struct kvm *)parent->private) ||
|
||||
KVM_BUG_ON(gmap_is_shadow(parent), (struct kvm *)parent->private))
|
||||
return ERR_PTR(-EFAULT);
|
||||
spin_lock(&parent->shadow_lock);
|
||||
sg = gmap_find_shadow(parent, asce, edat_level);
|
||||
spin_unlock(&parent->shadow_lock);
|
||||
if (sg)
|
||||
return sg;
|
||||
/* Create a new shadow gmap */
|
||||
limit = -1UL >> (33 - (((asce & _ASCE_TYPE_MASK) >> 2) * 11));
|
||||
if (asce & _ASCE_REAL_SPACE)
|
||||
limit = -1UL;
|
||||
new = gmap_alloc(limit);
|
||||
if (!new)
|
||||
return ERR_PTR(-ENOMEM);
|
||||
new->mm = parent->mm;
|
||||
new->parent = gmap_get(parent);
|
||||
new->private = parent->private;
|
||||
new->orig_asce = asce;
|
||||
new->edat_level = edat_level;
|
||||
new->initialized = false;
|
||||
spin_lock(&parent->shadow_lock);
|
||||
/* Recheck if another CPU created the same shadow */
|
||||
sg = gmap_find_shadow(parent, asce, edat_level);
|
||||
if (sg) {
|
||||
spin_unlock(&parent->shadow_lock);
|
||||
gmap_free(new);
|
||||
return sg;
|
||||
}
|
||||
if (asce & _ASCE_REAL_SPACE) {
|
||||
/* only allow one real-space gmap shadow */
|
||||
list_for_each_entry(sg, &parent->children, list) {
|
||||
if (sg->orig_asce & _ASCE_REAL_SPACE) {
|
||||
spin_lock(&sg->guest_table_lock);
|
||||
gmap_unshadow(sg);
|
||||
spin_unlock(&sg->guest_table_lock);
|
||||
list_del(&sg->list);
|
||||
gmap_put(sg);
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
refcount_set(&new->ref_count, 2);
|
||||
list_add(&new->list, &parent->children);
|
||||
if (asce & _ASCE_REAL_SPACE) {
|
||||
/* nothing to protect, return right away */
|
||||
new->initialized = true;
|
||||
spin_unlock(&parent->shadow_lock);
|
||||
return new;
|
||||
}
|
||||
spin_unlock(&parent->shadow_lock);
|
||||
/* protect after insertion, so it will get properly invalidated */
|
||||
mmap_read_lock(parent->mm);
|
||||
rc = __kvm_s390_mprotect_many(parent, asce & _ASCE_ORIGIN,
|
||||
((asce & _ASCE_TABLE_LENGTH) + 1),
|
||||
PROT_READ, GMAP_NOTIFY_SHADOW);
|
||||
mmap_read_unlock(parent->mm);
|
||||
spin_lock(&parent->shadow_lock);
|
||||
new->initialized = true;
|
||||
if (rc) {
|
||||
list_del(&new->list);
|
||||
gmap_free(new);
|
||||
new = ERR_PTR(rc);
|
||||
}
|
||||
spin_unlock(&parent->shadow_lock);
|
||||
return new;
|
||||
}
|
212
arch/s390/kvm/gmap.c
Normal file
212
arch/s390/kvm/gmap.c
Normal file
@ -0,0 +1,212 @@
|
||||
// SPDX-License-Identifier: GPL-2.0
|
||||
/*
|
||||
* Guest memory management for KVM/s390
|
||||
*
|
||||
* Copyright IBM Corp. 2008, 2020, 2024
|
||||
*
|
||||
* Author(s): Claudio Imbrenda <imbrenda@linux.ibm.com>
|
||||
* Martin Schwidefsky <schwidefsky@de.ibm.com>
|
||||
* David Hildenbrand <david@redhat.com>
|
||||
* Janosch Frank <frankja@linux.vnet.ibm.com>
|
||||
*/
|
||||
|
||||
#include <linux/compiler.h>
|
||||
#include <linux/kvm.h>
|
||||
#include <linux/kvm_host.h>
|
||||
#include <linux/pgtable.h>
|
||||
#include <linux/pagemap.h>
|
||||
|
||||
#include <asm/lowcore.h>
|
||||
#include <asm/gmap.h>
|
||||
#include <asm/uv.h>
|
||||
|
||||
#include "gmap.h"
|
||||
|
||||
/**
|
||||
* should_export_before_import - Determine whether an export is needed
|
||||
* before an import-like operation
|
||||
* @uvcb: the Ultravisor control block of the UVC to be performed
|
||||
* @mm: the mm of the process
|
||||
*
|
||||
* Returns whether an export is needed before every import-like operation.
|
||||
* This is needed for shared pages, which don't trigger a secure storage
|
||||
* exception when accessed from a different guest.
|
||||
*
|
||||
* Although considered as one, the Unpin Page UVC is not an actual import,
|
||||
* so it is not affected.
|
||||
*
|
||||
* No export is needed also when there is only one protected VM, because the
|
||||
* page cannot belong to the wrong VM in that case (there is no "other VM"
|
||||
* it can belong to).
|
||||
*
|
||||
* Return: true if an export is needed before every import, otherwise false.
|
||||
*/
|
||||
static bool should_export_before_import(struct uv_cb_header *uvcb, struct mm_struct *mm)
|
||||
{
|
||||
/*
|
||||
* The misc feature indicates, among other things, that importing a
|
||||
* shared page from a different protected VM will automatically also
|
||||
* transfer its ownership.
|
||||
*/
|
||||
if (uv_has_feature(BIT_UV_FEAT_MISC))
|
||||
return false;
|
||||
if (uvcb->cmd == UVC_CMD_UNPIN_PAGE_SHARED)
|
||||
return false;
|
||||
return atomic_read(&mm->context.protected_count) > 1;
|
||||
}
|
||||
|
||||
static int __gmap_make_secure(struct gmap *gmap, struct page *page, void *uvcb)
|
||||
{
|
||||
struct folio *folio = page_folio(page);
|
||||
int rc;
|
||||
|
||||
/*
|
||||
* Secure pages cannot be huge and userspace should not combine both.
|
||||
* In case userspace does it anyway this will result in an -EFAULT for
|
||||
* the unpack. The guest is thus never reaching secure mode.
|
||||
* If userspace plays dirty tricks and decides to map huge pages at a
|
||||
* later point in time, it will receive a segmentation fault or
|
||||
* KVM_RUN will return -EFAULT.
|
||||
*/
|
||||
if (folio_test_hugetlb(folio))
|
||||
return -EFAULT;
|
||||
if (folio_test_large(folio)) {
|
||||
mmap_read_unlock(gmap->mm);
|
||||
rc = kvm_s390_wiggle_split_folio(gmap->mm, folio, true);
|
||||
mmap_read_lock(gmap->mm);
|
||||
if (rc)
|
||||
return rc;
|
||||
folio = page_folio(page);
|
||||
}
|
||||
|
||||
if (!folio_trylock(folio))
|
||||
return -EAGAIN;
|
||||
if (should_export_before_import(uvcb, gmap->mm))
|
||||
uv_convert_from_secure(folio_to_phys(folio));
|
||||
rc = make_folio_secure(folio, uvcb);
|
||||
folio_unlock(folio);
|
||||
|
||||
/*
|
||||
* In theory a race is possible and the folio might have become
|
||||
* large again before the folio_trylock() above. In that case, no
|
||||
* action is performed and -EAGAIN is returned; the callers will
|
||||
* have to try again later.
|
||||
* In most cases this implies running the VM again, getting the same
|
||||
* exception again, and make another attempt in this function.
|
||||
* This is expected to happen extremely rarely.
|
||||
*/
|
||||
if (rc == -E2BIG)
|
||||
return -EAGAIN;
|
||||
/* The folio has too many references, try to shake some off */
|
||||
if (rc == -EBUSY) {
|
||||
mmap_read_unlock(gmap->mm);
|
||||
kvm_s390_wiggle_split_folio(gmap->mm, folio, false);
|
||||
mmap_read_lock(gmap->mm);
|
||||
return -EAGAIN;
|
||||
}
|
||||
|
||||
return rc;
|
||||
}
|
||||
|
||||
/**
|
||||
* gmap_make_secure() - make one guest page secure
|
||||
* @gmap: the guest gmap
|
||||
* @gaddr: the guest address that needs to be made secure
|
||||
* @uvcb: the UVCB specifying which operation needs to be performed
|
||||
*
|
||||
* Context: needs to be called with kvm->srcu held.
|
||||
* Return: 0 on success, < 0 in case of error (see __gmap_make_secure()).
|
||||
*/
|
||||
int gmap_make_secure(struct gmap *gmap, unsigned long gaddr, void *uvcb)
|
||||
{
|
||||
struct kvm *kvm = gmap->private;
|
||||
struct page *page;
|
||||
int rc = 0;
|
||||
|
||||
lockdep_assert_held(&kvm->srcu);
|
||||
|
||||
page = gfn_to_page(kvm, gpa_to_gfn(gaddr));
|
||||
mmap_read_lock(gmap->mm);
|
||||
if (page)
|
||||
rc = __gmap_make_secure(gmap, page, uvcb);
|
||||
kvm_release_page_clean(page);
|
||||
mmap_read_unlock(gmap->mm);
|
||||
|
||||
return rc;
|
||||
}
|
||||
|
||||
int gmap_convert_to_secure(struct gmap *gmap, unsigned long gaddr)
|
||||
{
|
||||
struct uv_cb_cts uvcb = {
|
||||
.header.cmd = UVC_CMD_CONV_TO_SEC_STOR,
|
||||
.header.len = sizeof(uvcb),
|
||||
.guest_handle = gmap->guest_handle,
|
||||
.gaddr = gaddr,
|
||||
};
|
||||
|
||||
return gmap_make_secure(gmap, gaddr, &uvcb);
|
||||
}
|
||||
|
||||
/**
|
||||
* __gmap_destroy_page() - Destroy a guest page.
|
||||
* @gmap: the gmap of the guest
|
||||
* @page: the page to destroy
|
||||
*
|
||||
* An attempt will be made to destroy the given guest page. If the attempt
|
||||
* fails, an attempt is made to export the page. If both attempts fail, an
|
||||
* appropriate error is returned.
|
||||
*
|
||||
* Context: must be called holding the mm lock for gmap->mm
|
||||
*/
|
||||
static int __gmap_destroy_page(struct gmap *gmap, struct page *page)
|
||||
{
|
||||
struct folio *folio = page_folio(page);
|
||||
int rc;
|
||||
|
||||
/*
|
||||
* See gmap_make_secure(): large folios cannot be secure. Small
|
||||
* folio implies FW_LEVEL_PTE.
|
||||
*/
|
||||
if (folio_test_large(folio))
|
||||
return -EFAULT;
|
||||
|
||||
rc = uv_destroy_folio(folio);
|
||||
/*
|
||||
* Fault handlers can race; it is possible that two CPUs will fault
|
||||
* on the same secure page. One CPU can destroy the page, reboot,
|
||||
* re-enter secure mode and import it, while the second CPU was
|
||||
* stuck at the beginning of the handler. At some point the second
|
||||
* CPU will be able to progress, and it will not be able to destroy
|
||||
* the page. In that case we do not want to terminate the process,
|
||||
* we instead try to export the page.
|
||||
*/
|
||||
if (rc)
|
||||
rc = uv_convert_from_secure_folio(folio);
|
||||
|
||||
return rc;
|
||||
}
|
||||
|
||||
/**
|
||||
* gmap_destroy_page() - Destroy a guest page.
|
||||
* @gmap: the gmap of the guest
|
||||
* @gaddr: the guest address to destroy
|
||||
*
|
||||
* An attempt will be made to destroy the given guest page. If the attempt
|
||||
* fails, an attempt is made to export the page. If both attempts fail, an
|
||||
* appropriate error is returned.
|
||||
*
|
||||
* Context: may sleep.
|
||||
*/
|
||||
int gmap_destroy_page(struct gmap *gmap, unsigned long gaddr)
|
||||
{
|
||||
struct page *page;
|
||||
int rc = 0;
|
||||
|
||||
mmap_read_lock(gmap->mm);
|
||||
page = gfn_to_page(gmap->private, gpa_to_gfn(gaddr));
|
||||
if (page)
|
||||
rc = __gmap_destroy_page(gmap, page);
|
||||
kvm_release_page_clean(page);
|
||||
mmap_read_unlock(gmap->mm);
|
||||
return rc;
|
||||
}
|
39
arch/s390/kvm/gmap.h
Normal file
39
arch/s390/kvm/gmap.h
Normal file
@ -0,0 +1,39 @@
|
||||
/* SPDX-License-Identifier: GPL-2.0 */
|
||||
/*
|
||||
* KVM guest address space mapping code
|
||||
*
|
||||
* Copyright IBM Corp. 2007, 2016, 2025
|
||||
* Author(s): Martin Schwidefsky <schwidefsky@de.ibm.com>
|
||||
* Claudio Imbrenda <imbrenda@linux.ibm.com>
|
||||
*/
|
||||
|
||||
#ifndef ARCH_KVM_S390_GMAP_H
|
||||
#define ARCH_KVM_S390_GMAP_H
|
||||
|
||||
#define GMAP_SHADOW_FAKE_TABLE 1ULL
|
||||
|
||||
int gmap_make_secure(struct gmap *gmap, unsigned long gaddr, void *uvcb);
|
||||
int gmap_convert_to_secure(struct gmap *gmap, unsigned long gaddr);
|
||||
int gmap_destroy_page(struct gmap *gmap, unsigned long gaddr);
|
||||
struct gmap *gmap_shadow(struct gmap *parent, unsigned long asce, int edat_level);
|
||||
|
||||
/**
|
||||
* gmap_shadow_valid - check if a shadow guest address space matches the
|
||||
* given properties and is still valid
|
||||
* @sg: pointer to the shadow guest address space structure
|
||||
* @asce: ASCE for which the shadow table is requested
|
||||
* @edat_level: edat level to be used for the shadow translation
|
||||
*
|
||||
* Returns 1 if the gmap shadow is still valid and matches the given
|
||||
* properties, the caller can continue using it. Returns 0 otherwise, the
|
||||
* caller has to request a new shadow gmap in this case.
|
||||
*
|
||||
*/
|
||||
static inline int gmap_shadow_valid(struct gmap *sg, unsigned long asce, int edat_level)
|
||||
{
|
||||
if (sg->removed)
|
||||
return 0;
|
||||
return sg->orig_asce == asce && sg->edat_level == edat_level;
|
||||
}
|
||||
|
||||
#endif
|
@ -21,6 +21,7 @@
|
||||
#include "gaccess.h"
|
||||
#include "trace.h"
|
||||
#include "trace-s390.h"
|
||||
#include "gmap.h"
|
||||
|
||||
u8 kvm_s390_get_ilen(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
@ -367,7 +368,7 @@ static int handle_mvpg_pei(struct kvm_vcpu *vcpu)
|
||||
reg2, &srcaddr, GACC_FETCH, 0);
|
||||
if (rc)
|
||||
return kvm_s390_inject_prog_cond(vcpu, rc);
|
||||
rc = gmap_fault(vcpu->arch.gmap, srcaddr, 0);
|
||||
rc = kvm_s390_handle_dat_fault(vcpu, srcaddr, 0);
|
||||
if (rc != 0)
|
||||
return rc;
|
||||
|
||||
@ -376,7 +377,7 @@ static int handle_mvpg_pei(struct kvm_vcpu *vcpu)
|
||||
reg1, &dstaddr, GACC_STORE, 0);
|
||||
if (rc)
|
||||
return kvm_s390_inject_prog_cond(vcpu, rc);
|
||||
rc = gmap_fault(vcpu->arch.gmap, dstaddr, FAULT_FLAG_WRITE);
|
||||
rc = kvm_s390_handle_dat_fault(vcpu, dstaddr, FOLL_WRITE);
|
||||
if (rc != 0)
|
||||
return rc;
|
||||
|
||||
@ -549,7 +550,7 @@ static int handle_pv_uvc(struct kvm_vcpu *vcpu)
|
||||
* If the unpin did not succeed, the guest will exit again for the UVC
|
||||
* and we will retry the unpin.
|
||||
*/
|
||||
if (rc == -EINVAL)
|
||||
if (rc == -EINVAL || rc == -ENXIO)
|
||||
return 0;
|
||||
/*
|
||||
* If we got -EAGAIN here, we simply return it. It will eventually
|
||||
|
@ -2893,7 +2893,8 @@ int kvm_set_routing_entry(struct kvm *kvm,
|
||||
struct kvm_kernel_irq_routing_entry *e,
|
||||
const struct kvm_irq_routing_entry *ue)
|
||||
{
|
||||
u64 uaddr;
|
||||
u64 uaddr_s, uaddr_i;
|
||||
int idx;
|
||||
|
||||
switch (ue->type) {
|
||||
/* we store the userspace addresses instead of the guest addresses */
|
||||
@ -2901,14 +2902,16 @@ int kvm_set_routing_entry(struct kvm *kvm,
|
||||
if (kvm_is_ucontrol(kvm))
|
||||
return -EINVAL;
|
||||
e->set = set_adapter_int;
|
||||
uaddr = gmap_translate(kvm->arch.gmap, ue->u.adapter.summary_addr);
|
||||
if (uaddr == -EFAULT)
|
||||
|
||||
idx = srcu_read_lock(&kvm->srcu);
|
||||
uaddr_s = gpa_to_hva(kvm, ue->u.adapter.summary_addr);
|
||||
uaddr_i = gpa_to_hva(kvm, ue->u.adapter.ind_addr);
|
||||
srcu_read_unlock(&kvm->srcu, idx);
|
||||
|
||||
if (kvm_is_error_hva(uaddr_s) || kvm_is_error_hva(uaddr_i))
|
||||
return -EFAULT;
|
||||
e->adapter.summary_addr = uaddr;
|
||||
uaddr = gmap_translate(kvm->arch.gmap, ue->u.adapter.ind_addr);
|
||||
if (uaddr == -EFAULT)
|
||||
return -EFAULT;
|
||||
e->adapter.ind_addr = uaddr;
|
||||
e->adapter.summary_addr = uaddr_s;
|
||||
e->adapter.ind_addr = uaddr_i;
|
||||
e->adapter.summary_offset = ue->u.adapter.summary_offset;
|
||||
e->adapter.ind_offset = ue->u.adapter.ind_offset;
|
||||
e->adapter.adapter_id = ue->u.adapter.adapter_id;
|
||||
|
@ -50,6 +50,7 @@
|
||||
#include "kvm-s390.h"
|
||||
#include "gaccess.h"
|
||||
#include "pci.h"
|
||||
#include "gmap.h"
|
||||
|
||||
#define CREATE_TRACE_POINTS
|
||||
#include "trace.h"
|
||||
@ -3428,8 +3429,20 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
|
||||
VM_EVENT(kvm, 3, "vm created with type %lu", type);
|
||||
|
||||
if (type & KVM_VM_S390_UCONTROL) {
|
||||
struct kvm_userspace_memory_region2 fake_memslot = {
|
||||
.slot = KVM_S390_UCONTROL_MEMSLOT,
|
||||
.guest_phys_addr = 0,
|
||||
.userspace_addr = 0,
|
||||
.memory_size = ALIGN_DOWN(TASK_SIZE, _SEGMENT_SIZE),
|
||||
.flags = 0,
|
||||
};
|
||||
|
||||
kvm->arch.gmap = NULL;
|
||||
kvm->arch.mem_limit = KVM_S390_NO_MEM_LIMIT;
|
||||
/* one flat fake memslot covering the whole address-space */
|
||||
mutex_lock(&kvm->slots_lock);
|
||||
KVM_BUG_ON(kvm_set_internal_memslot(kvm, &fake_memslot), kvm);
|
||||
mutex_unlock(&kvm->slots_lock);
|
||||
} else {
|
||||
if (sclp.hamax == U64_MAX)
|
||||
kvm->arch.mem_limit = TASK_SIZE_MAX;
|
||||
@ -4498,6 +4511,75 @@ static bool ibs_enabled(struct kvm_vcpu *vcpu)
|
||||
return kvm_s390_test_cpuflags(vcpu, CPUSTAT_IBS);
|
||||
}
|
||||
|
||||
static int __kvm_s390_fixup_fault_sync(struct gmap *gmap, gpa_t gaddr, unsigned int flags)
|
||||
{
|
||||
struct kvm *kvm = gmap->private;
|
||||
gfn_t gfn = gpa_to_gfn(gaddr);
|
||||
bool unlocked;
|
||||
hva_t vmaddr;
|
||||
gpa_t tmp;
|
||||
int rc;
|
||||
|
||||
if (kvm_is_ucontrol(kvm)) {
|
||||
tmp = __gmap_translate(gmap, gaddr);
|
||||
gfn = gpa_to_gfn(tmp);
|
||||
}
|
||||
|
||||
vmaddr = gfn_to_hva(kvm, gfn);
|
||||
rc = fixup_user_fault(gmap->mm, vmaddr, FAULT_FLAG_WRITE, &unlocked);
|
||||
if (!rc)
|
||||
rc = __gmap_link(gmap, gaddr, vmaddr);
|
||||
return rc;
|
||||
}
|
||||
|
||||
/**
|
||||
* __kvm_s390_mprotect_many() - Apply specified protection to guest pages
|
||||
* @gmap: the gmap of the guest
|
||||
* @gpa: the starting guest address
|
||||
* @npages: how many pages to protect
|
||||
* @prot: indicates access rights: PROT_NONE, PROT_READ or PROT_WRITE
|
||||
* @bits: pgste notification bits to set
|
||||
*
|
||||
* Returns: 0 in case of success, < 0 in case of error - see gmap_protect_one()
|
||||
*
|
||||
* Context: kvm->srcu and gmap->mm need to be held in read mode
|
||||
*/
|
||||
int __kvm_s390_mprotect_many(struct gmap *gmap, gpa_t gpa, u8 npages, unsigned int prot,
|
||||
unsigned long bits)
|
||||
{
|
||||
unsigned int fault_flag = (prot & PROT_WRITE) ? FAULT_FLAG_WRITE : 0;
|
||||
gpa_t end = gpa + npages * PAGE_SIZE;
|
||||
int rc;
|
||||
|
||||
for (; gpa < end; gpa = ALIGN(gpa + 1, rc)) {
|
||||
rc = gmap_protect_one(gmap, gpa, prot, bits);
|
||||
if (rc == -EAGAIN) {
|
||||
__kvm_s390_fixup_fault_sync(gmap, gpa, fault_flag);
|
||||
rc = gmap_protect_one(gmap, gpa, prot, bits);
|
||||
}
|
||||
if (rc < 0)
|
||||
return rc;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int kvm_s390_mprotect_notify_prefix(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
gpa_t gaddr = kvm_s390_get_prefix(vcpu);
|
||||
int idx, rc;
|
||||
|
||||
idx = srcu_read_lock(&vcpu->kvm->srcu);
|
||||
mmap_read_lock(vcpu->arch.gmap->mm);
|
||||
|
||||
rc = __kvm_s390_mprotect_many(vcpu->arch.gmap, gaddr, 2, PROT_WRITE, GMAP_NOTIFY_MPROT);
|
||||
|
||||
mmap_read_unlock(vcpu->arch.gmap->mm);
|
||||
srcu_read_unlock(&vcpu->kvm->srcu, idx);
|
||||
|
||||
return rc;
|
||||
}
|
||||
|
||||
static int kvm_s390_handle_requests(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
retry:
|
||||
@ -4513,9 +4595,8 @@ retry:
|
||||
*/
|
||||
if (kvm_check_request(KVM_REQ_REFRESH_GUEST_PREFIX, vcpu)) {
|
||||
int rc;
|
||||
rc = gmap_mprotect_notify(vcpu->arch.gmap,
|
||||
kvm_s390_get_prefix(vcpu),
|
||||
PAGE_SIZE * 2, PROT_WRITE);
|
||||
|
||||
rc = kvm_s390_mprotect_notify_prefix(vcpu);
|
||||
if (rc) {
|
||||
kvm_make_request(KVM_REQ_REFRESH_GUEST_PREFIX, vcpu);
|
||||
return rc;
|
||||
@ -4766,11 +4847,111 @@ static int vcpu_post_run_addressing_exception(struct kvm_vcpu *vcpu)
|
||||
return kvm_s390_inject_prog_irq(vcpu, &pgm_info);
|
||||
}
|
||||
|
||||
static void kvm_s390_assert_primary_as(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
KVM_BUG(current->thread.gmap_teid.as != PSW_BITS_AS_PRIMARY, vcpu->kvm,
|
||||
"Unexpected program interrupt 0x%x, TEID 0x%016lx",
|
||||
current->thread.gmap_int_code, current->thread.gmap_teid.val);
|
||||
}
|
||||
|
||||
/*
|
||||
* __kvm_s390_handle_dat_fault() - handle a dat fault for the gmap of a vcpu
|
||||
* @vcpu: the vCPU whose gmap is to be fixed up
|
||||
* @gfn: the guest frame number used for memslots (including fake memslots)
|
||||
* @gaddr: the gmap address, does not have to match @gfn for ucontrol gmaps
|
||||
* @flags: FOLL_* flags
|
||||
*
|
||||
* Return: 0 on success, < 0 in case of error.
|
||||
* Context: The mm lock must not be held before calling. May sleep.
|
||||
*/
|
||||
int __kvm_s390_handle_dat_fault(struct kvm_vcpu *vcpu, gfn_t gfn, gpa_t gaddr, unsigned int flags)
|
||||
{
|
||||
struct kvm_memory_slot *slot;
|
||||
unsigned int fault_flags;
|
||||
bool writable, unlocked;
|
||||
unsigned long vmaddr;
|
||||
struct page *page;
|
||||
kvm_pfn_t pfn;
|
||||
int rc;
|
||||
|
||||
slot = kvm_vcpu_gfn_to_memslot(vcpu, gfn);
|
||||
if (!slot || slot->flags & KVM_MEMSLOT_INVALID)
|
||||
return vcpu_post_run_addressing_exception(vcpu);
|
||||
|
||||
fault_flags = flags & FOLL_WRITE ? FAULT_FLAG_WRITE : 0;
|
||||
if (vcpu->arch.gmap->pfault_enabled)
|
||||
flags |= FOLL_NOWAIT;
|
||||
vmaddr = __gfn_to_hva_memslot(slot, gfn);
|
||||
|
||||
try_again:
|
||||
pfn = __kvm_faultin_pfn(slot, gfn, flags, &writable, &page);
|
||||
|
||||
/* Access outside memory, inject addressing exception */
|
||||
if (is_noslot_pfn(pfn))
|
||||
return vcpu_post_run_addressing_exception(vcpu);
|
||||
/* Signal pending: try again */
|
||||
if (pfn == KVM_PFN_ERR_SIGPENDING)
|
||||
return -EAGAIN;
|
||||
|
||||
/* Needs I/O, try to setup async pfault (only possible with FOLL_NOWAIT) */
|
||||
if (pfn == KVM_PFN_ERR_NEEDS_IO) {
|
||||
trace_kvm_s390_major_guest_pfault(vcpu);
|
||||
if (kvm_arch_setup_async_pf(vcpu))
|
||||
return 0;
|
||||
vcpu->stat.pfault_sync++;
|
||||
/* Could not setup async pfault, try again synchronously */
|
||||
flags &= ~FOLL_NOWAIT;
|
||||
goto try_again;
|
||||
}
|
||||
/* Any other error */
|
||||
if (is_error_pfn(pfn))
|
||||
return -EFAULT;
|
||||
|
||||
/* Success */
|
||||
mmap_read_lock(vcpu->arch.gmap->mm);
|
||||
/* Mark the userspace PTEs as young and/or dirty, to avoid page fault loops */
|
||||
rc = fixup_user_fault(vcpu->arch.gmap->mm, vmaddr, fault_flags, &unlocked);
|
||||
if (!rc)
|
||||
rc = __gmap_link(vcpu->arch.gmap, gaddr, vmaddr);
|
||||
scoped_guard(spinlock, &vcpu->kvm->mmu_lock) {
|
||||
kvm_release_faultin_page(vcpu->kvm, page, false, writable);
|
||||
}
|
||||
mmap_read_unlock(vcpu->arch.gmap->mm);
|
||||
return rc;
|
||||
}
|
||||
|
||||
static int vcpu_dat_fault_handler(struct kvm_vcpu *vcpu, unsigned long gaddr, unsigned int flags)
|
||||
{
|
||||
unsigned long gaddr_tmp;
|
||||
gfn_t gfn;
|
||||
|
||||
gfn = gpa_to_gfn(gaddr);
|
||||
if (kvm_is_ucontrol(vcpu->kvm)) {
|
||||
/*
|
||||
* This translates the per-vCPU guest address into a
|
||||
* fake guest address, which can then be used with the
|
||||
* fake memslots that are identity mapping userspace.
|
||||
* This allows ucontrol VMs to use the normal fault
|
||||
* resolution path, like normal VMs.
|
||||
*/
|
||||
mmap_read_lock(vcpu->arch.gmap->mm);
|
||||
gaddr_tmp = __gmap_translate(vcpu->arch.gmap, gaddr);
|
||||
mmap_read_unlock(vcpu->arch.gmap->mm);
|
||||
if (gaddr_tmp == -EFAULT) {
|
||||
vcpu->run->exit_reason = KVM_EXIT_S390_UCONTROL;
|
||||
vcpu->run->s390_ucontrol.trans_exc_code = gaddr;
|
||||
vcpu->run->s390_ucontrol.pgm_code = PGM_SEGMENT_TRANSLATION;
|
||||
return -EREMOTE;
|
||||
}
|
||||
gfn = gpa_to_gfn(gaddr_tmp);
|
||||
}
|
||||
return __kvm_s390_handle_dat_fault(vcpu, gfn, gaddr, flags);
|
||||
}
|
||||
|
||||
static int vcpu_post_run_handle_fault(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
unsigned int flags = 0;
|
||||
unsigned long gaddr;
|
||||
int rc = 0;
|
||||
|
||||
gaddr = current->thread.gmap_teid.addr * PAGE_SIZE;
|
||||
if (kvm_s390_cur_gmap_fault_is_write())
|
||||
@ -4781,9 +4962,7 @@ static int vcpu_post_run_handle_fault(struct kvm_vcpu *vcpu)
|
||||
vcpu->stat.exit_null++;
|
||||
break;
|
||||
case PGM_NON_SECURE_STORAGE_ACCESS:
|
||||
KVM_BUG(current->thread.gmap_teid.as != PSW_BITS_AS_PRIMARY, vcpu->kvm,
|
||||
"Unexpected program interrupt 0x%x, TEID 0x%016lx",
|
||||
current->thread.gmap_int_code, current->thread.gmap_teid.val);
|
||||
kvm_s390_assert_primary_as(vcpu);
|
||||
/*
|
||||
* This is normal operation; a page belonging to a protected
|
||||
* guest has not been imported yet. Try to import the page into
|
||||
@ -4794,9 +4973,7 @@ static int vcpu_post_run_handle_fault(struct kvm_vcpu *vcpu)
|
||||
break;
|
||||
case PGM_SECURE_STORAGE_ACCESS:
|
||||
case PGM_SECURE_STORAGE_VIOLATION:
|
||||
KVM_BUG(current->thread.gmap_teid.as != PSW_BITS_AS_PRIMARY, vcpu->kvm,
|
||||
"Unexpected program interrupt 0x%x, TEID 0x%016lx",
|
||||
current->thread.gmap_int_code, current->thread.gmap_teid.val);
|
||||
kvm_s390_assert_primary_as(vcpu);
|
||||
/*
|
||||
* This can happen after a reboot with asynchronous teardown;
|
||||
* the new guest (normal or protected) will run on top of the
|
||||
@ -4825,40 +5002,15 @@ static int vcpu_post_run_handle_fault(struct kvm_vcpu *vcpu)
|
||||
case PGM_REGION_FIRST_TRANS:
|
||||
case PGM_REGION_SECOND_TRANS:
|
||||
case PGM_REGION_THIRD_TRANS:
|
||||
KVM_BUG(current->thread.gmap_teid.as != PSW_BITS_AS_PRIMARY, vcpu->kvm,
|
||||
"Unexpected program interrupt 0x%x, TEID 0x%016lx",
|
||||
current->thread.gmap_int_code, current->thread.gmap_teid.val);
|
||||
if (vcpu->arch.gmap->pfault_enabled) {
|
||||
rc = gmap_fault(vcpu->arch.gmap, gaddr, flags | FAULT_FLAG_RETRY_NOWAIT);
|
||||
if (rc == -EFAULT)
|
||||
return vcpu_post_run_addressing_exception(vcpu);
|
||||
if (rc == -EAGAIN) {
|
||||
trace_kvm_s390_major_guest_pfault(vcpu);
|
||||
if (kvm_arch_setup_async_pf(vcpu))
|
||||
return 0;
|
||||
vcpu->stat.pfault_sync++;
|
||||
} else {
|
||||
return rc;
|
||||
}
|
||||
}
|
||||
rc = gmap_fault(vcpu->arch.gmap, gaddr, flags);
|
||||
if (rc == -EFAULT) {
|
||||
if (kvm_is_ucontrol(vcpu->kvm)) {
|
||||
vcpu->run->exit_reason = KVM_EXIT_S390_UCONTROL;
|
||||
vcpu->run->s390_ucontrol.trans_exc_code = gaddr;
|
||||
vcpu->run->s390_ucontrol.pgm_code = 0x10;
|
||||
return -EREMOTE;
|
||||
}
|
||||
return vcpu_post_run_addressing_exception(vcpu);
|
||||
}
|
||||
break;
|
||||
kvm_s390_assert_primary_as(vcpu);
|
||||
return vcpu_dat_fault_handler(vcpu, gaddr, flags);
|
||||
default:
|
||||
KVM_BUG(1, vcpu->kvm, "Unexpected program interrupt 0x%x, TEID 0x%016lx",
|
||||
current->thread.gmap_int_code, current->thread.gmap_teid.val);
|
||||
send_sig(SIGSEGV, current, 0);
|
||||
break;
|
||||
}
|
||||
return rc;
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int vcpu_post_run(struct kvm_vcpu *vcpu, int exit_reason)
|
||||
@ -5737,7 +5889,9 @@ long kvm_arch_vcpu_ioctl(struct file *filp,
|
||||
}
|
||||
#endif
|
||||
case KVM_S390_VCPU_FAULT: {
|
||||
r = gmap_fault(vcpu->arch.gmap, arg, 0);
|
||||
idx = srcu_read_lock(&vcpu->kvm->srcu);
|
||||
r = vcpu_dat_fault_handler(vcpu, arg, 0);
|
||||
srcu_read_unlock(&vcpu->kvm->srcu, idx);
|
||||
break;
|
||||
}
|
||||
case KVM_ENABLE_CAP:
|
||||
@ -5853,7 +6007,7 @@ int kvm_arch_prepare_memory_region(struct kvm *kvm,
|
||||
{
|
||||
gpa_t size;
|
||||
|
||||
if (kvm_is_ucontrol(kvm))
|
||||
if (kvm_is_ucontrol(kvm) && new->id < KVM_USER_MEM_SLOTS)
|
||||
return -EINVAL;
|
||||
|
||||
/* When we are protected, we should not change the memory slots */
|
||||
@ -5905,6 +6059,9 @@ void kvm_arch_commit_memory_region(struct kvm *kvm,
|
||||
{
|
||||
int rc = 0;
|
||||
|
||||
if (kvm_is_ucontrol(kvm))
|
||||
return;
|
||||
|
||||
switch (change) {
|
||||
case KVM_MR_DELETE:
|
||||
rc = gmap_unmap_segment(kvm->arch.gmap, old->base_gfn * PAGE_SIZE,
|
||||
|
@ -20,6 +20,8 @@
|
||||
#include <asm/processor.h>
|
||||
#include <asm/sclp.h>
|
||||
|
||||
#define KVM_S390_UCONTROL_MEMSLOT (KVM_USER_MEM_SLOTS + 0)
|
||||
|
||||
static inline void kvm_s390_fpu_store(struct kvm_run *run)
|
||||
{
|
||||
fpu_stfpc(&run->s.regs.fpc);
|
||||
@ -279,6 +281,15 @@ static inline u32 kvm_s390_get_gisa_desc(struct kvm *kvm)
|
||||
return gd;
|
||||
}
|
||||
|
||||
static inline hva_t gpa_to_hva(struct kvm *kvm, gpa_t gpa)
|
||||
{
|
||||
hva_t hva = gfn_to_hva(kvm, gpa_to_gfn(gpa));
|
||||
|
||||
if (!kvm_is_error_hva(hva))
|
||||
hva |= offset_in_page(gpa);
|
||||
return hva;
|
||||
}
|
||||
|
||||
/* implemented in pv.c */
|
||||
int kvm_s390_pv_destroy_cpu(struct kvm_vcpu *vcpu, u16 *rc, u16 *rrc);
|
||||
int kvm_s390_pv_create_cpu(struct kvm_vcpu *vcpu, u16 *rc, u16 *rrc);
|
||||
@ -408,6 +419,14 @@ void kvm_s390_vcpu_unsetup_cmma(struct kvm_vcpu *vcpu);
|
||||
void kvm_s390_set_cpu_timer(struct kvm_vcpu *vcpu, __u64 cputm);
|
||||
__u64 kvm_s390_get_cpu_timer(struct kvm_vcpu *vcpu);
|
||||
int kvm_s390_cpus_from_pv(struct kvm *kvm, u16 *rc, u16 *rrc);
|
||||
int __kvm_s390_handle_dat_fault(struct kvm_vcpu *vcpu, gfn_t gfn, gpa_t gaddr, unsigned int flags);
|
||||
int __kvm_s390_mprotect_many(struct gmap *gmap, gpa_t gpa, u8 npages, unsigned int prot,
|
||||
unsigned long bits);
|
||||
|
||||
static inline int kvm_s390_handle_dat_fault(struct kvm_vcpu *vcpu, gpa_t gaddr, unsigned int flags)
|
||||
{
|
||||
return __kvm_s390_handle_dat_fault(vcpu, gpa_to_gfn(gaddr), gaddr, flags);
|
||||
}
|
||||
|
||||
/* implemented in diag.c */
|
||||
int kvm_s390_handle_diag(struct kvm_vcpu *vcpu);
|
||||
|
@ -17,6 +17,7 @@
|
||||
#include <linux/sched/mm.h>
|
||||
#include <linux/mmu_notifier.h>
|
||||
#include "kvm-s390.h"
|
||||
#include "gmap.h"
|
||||
|
||||
bool kvm_s390_pv_is_protected(struct kvm *kvm)
|
||||
{
|
||||
@ -638,10 +639,28 @@ static int unpack_one(struct kvm *kvm, unsigned long addr, u64 tweak,
|
||||
.tweak[1] = offset,
|
||||
};
|
||||
int ret = gmap_make_secure(kvm->arch.gmap, addr, &uvcb);
|
||||
unsigned long vmaddr;
|
||||
bool unlocked;
|
||||
|
||||
*rc = uvcb.header.rc;
|
||||
*rrc = uvcb.header.rrc;
|
||||
|
||||
if (ret == -ENXIO) {
|
||||
mmap_read_lock(kvm->mm);
|
||||
vmaddr = gfn_to_hva(kvm, gpa_to_gfn(addr));
|
||||
if (kvm_is_error_hva(vmaddr)) {
|
||||
ret = -EFAULT;
|
||||
} else {
|
||||
ret = fixup_user_fault(kvm->mm, vmaddr, FAULT_FLAG_WRITE, &unlocked);
|
||||
if (!ret)
|
||||
ret = __gmap_link(kvm->arch.gmap, addr, vmaddr);
|
||||
}
|
||||
mmap_read_unlock(kvm->mm);
|
||||
if (!ret)
|
||||
return -EAGAIN;
|
||||
return ret;
|
||||
}
|
||||
|
||||
if (ret && ret != -EAGAIN)
|
||||
KVM_UV_EVENT(kvm, 3, "PROTVIRT VM UNPACK: failed addr %llx with rc %x rrc %x",
|
||||
uvcb.gaddr, *rc, *rrc);
|
||||
@ -660,6 +679,8 @@ int kvm_s390_pv_unpack(struct kvm *kvm, unsigned long addr, unsigned long size,
|
||||
KVM_UV_EVENT(kvm, 3, "PROTVIRT VM UNPACK: start addr %lx size %lx",
|
||||
addr, size);
|
||||
|
||||
guard(srcu)(&kvm->srcu);
|
||||
|
||||
while (offset < size) {
|
||||
ret = unpack_one(kvm, addr, tweak, offset, rc, rrc);
|
||||
if (ret == -EAGAIN) {
|
||||
|
@ -13,6 +13,7 @@
|
||||
#include <linux/bitmap.h>
|
||||
#include <linux/sched/signal.h>
|
||||
#include <linux/io.h>
|
||||
#include <linux/mman.h>
|
||||
|
||||
#include <asm/gmap.h>
|
||||
#include <asm/mmu_context.h>
|
||||
@ -22,6 +23,11 @@
|
||||
#include <asm/facility.h>
|
||||
#include "kvm-s390.h"
|
||||
#include "gaccess.h"
|
||||
#include "gmap.h"
|
||||
|
||||
enum vsie_page_flags {
|
||||
VSIE_PAGE_IN_USE = 0,
|
||||
};
|
||||
|
||||
struct vsie_page {
|
||||
struct kvm_s390_sie_block scb_s; /* 0x0000 */
|
||||
@ -46,7 +52,18 @@ struct vsie_page {
|
||||
gpa_t gvrd_gpa; /* 0x0240 */
|
||||
gpa_t riccbd_gpa; /* 0x0248 */
|
||||
gpa_t sdnx_gpa; /* 0x0250 */
|
||||
__u8 reserved[0x0700 - 0x0258]; /* 0x0258 */
|
||||
/*
|
||||
* guest address of the original SCB. Remains set for free vsie
|
||||
* pages, so we can properly look them up in our addr_to_page
|
||||
* radix tree.
|
||||
*/
|
||||
gpa_t scb_gpa; /* 0x0258 */
|
||||
/*
|
||||
* Flags: must be set/cleared atomically after the vsie page can be
|
||||
* looked up by other CPUs.
|
||||
*/
|
||||
unsigned long flags; /* 0x0260 */
|
||||
__u8 reserved[0x0700 - 0x0268]; /* 0x0268 */
|
||||
struct kvm_s390_crypto_cb crycb; /* 0x0700 */
|
||||
__u8 fac[S390_ARCH_FAC_LIST_SIZE_BYTE]; /* 0x0800 */
|
||||
};
|
||||
@ -584,7 +601,6 @@ void kvm_s390_vsie_gmap_notifier(struct gmap *gmap, unsigned long start,
|
||||
struct kvm *kvm = gmap->private;
|
||||
struct vsie_page *cur;
|
||||
unsigned long prefix;
|
||||
struct page *page;
|
||||
int i;
|
||||
|
||||
if (!gmap_is_shadow(gmap))
|
||||
@ -594,10 +610,9 @@ void kvm_s390_vsie_gmap_notifier(struct gmap *gmap, unsigned long start,
|
||||
* therefore we can safely reference them all the time.
|
||||
*/
|
||||
for (i = 0; i < kvm->arch.vsie.page_count; i++) {
|
||||
page = READ_ONCE(kvm->arch.vsie.pages[i]);
|
||||
if (!page)
|
||||
cur = READ_ONCE(kvm->arch.vsie.pages[i]);
|
||||
if (!cur)
|
||||
continue;
|
||||
cur = page_to_virt(page);
|
||||
if (READ_ONCE(cur->gmap) != gmap)
|
||||
continue;
|
||||
prefix = cur->scb_s.prefix << GUEST_PREFIX_SHIFT;
|
||||
@ -1345,6 +1360,20 @@ static int vsie_run(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page)
|
||||
return rc;
|
||||
}
|
||||
|
||||
/* Try getting a given vsie page, returning "true" on success. */
|
||||
static inline bool try_get_vsie_page(struct vsie_page *vsie_page)
|
||||
{
|
||||
if (test_bit(VSIE_PAGE_IN_USE, &vsie_page->flags))
|
||||
return false;
|
||||
return !test_and_set_bit(VSIE_PAGE_IN_USE, &vsie_page->flags);
|
||||
}
|
||||
|
||||
/* Put a vsie page acquired through get_vsie_page / try_get_vsie_page. */
|
||||
static void put_vsie_page(struct vsie_page *vsie_page)
|
||||
{
|
||||
clear_bit(VSIE_PAGE_IN_USE, &vsie_page->flags);
|
||||
}
|
||||
|
||||
/*
|
||||
* Get or create a vsie page for a scb address.
|
||||
*
|
||||
@ -1355,16 +1384,21 @@ static int vsie_run(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page)
|
||||
static struct vsie_page *get_vsie_page(struct kvm *kvm, unsigned long addr)
|
||||
{
|
||||
struct vsie_page *vsie_page;
|
||||
struct page *page;
|
||||
int nr_vcpus;
|
||||
|
||||
rcu_read_lock();
|
||||
page = radix_tree_lookup(&kvm->arch.vsie.addr_to_page, addr >> 9);
|
||||
vsie_page = radix_tree_lookup(&kvm->arch.vsie.addr_to_page, addr >> 9);
|
||||
rcu_read_unlock();
|
||||
if (page) {
|
||||
if (page_ref_inc_return(page) == 2)
|
||||
return page_to_virt(page);
|
||||
page_ref_dec(page);
|
||||
if (vsie_page) {
|
||||
if (try_get_vsie_page(vsie_page)) {
|
||||
if (vsie_page->scb_gpa == addr)
|
||||
return vsie_page;
|
||||
/*
|
||||
* We raced with someone reusing + putting this vsie
|
||||
* page before we grabbed it.
|
||||
*/
|
||||
put_vsie_page(vsie_page);
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
@ -1375,36 +1409,40 @@ static struct vsie_page *get_vsie_page(struct kvm *kvm, unsigned long addr)
|
||||
|
||||
mutex_lock(&kvm->arch.vsie.mutex);
|
||||
if (kvm->arch.vsie.page_count < nr_vcpus) {
|
||||
page = alloc_page(GFP_KERNEL_ACCOUNT | __GFP_ZERO | GFP_DMA);
|
||||
if (!page) {
|
||||
vsie_page = (void *)__get_free_page(GFP_KERNEL_ACCOUNT | __GFP_ZERO | GFP_DMA);
|
||||
if (!vsie_page) {
|
||||
mutex_unlock(&kvm->arch.vsie.mutex);
|
||||
return ERR_PTR(-ENOMEM);
|
||||
}
|
||||
page_ref_inc(page);
|
||||
kvm->arch.vsie.pages[kvm->arch.vsie.page_count] = page;
|
||||
__set_bit(VSIE_PAGE_IN_USE, &vsie_page->flags);
|
||||
kvm->arch.vsie.pages[kvm->arch.vsie.page_count] = vsie_page;
|
||||
kvm->arch.vsie.page_count++;
|
||||
} else {
|
||||
/* reuse an existing entry that belongs to nobody */
|
||||
while (true) {
|
||||
page = kvm->arch.vsie.pages[kvm->arch.vsie.next];
|
||||
if (page_ref_inc_return(page) == 2)
|
||||
vsie_page = kvm->arch.vsie.pages[kvm->arch.vsie.next];
|
||||
if (try_get_vsie_page(vsie_page))
|
||||
break;
|
||||
page_ref_dec(page);
|
||||
kvm->arch.vsie.next++;
|
||||
kvm->arch.vsie.next %= nr_vcpus;
|
||||
}
|
||||
radix_tree_delete(&kvm->arch.vsie.addr_to_page, page->index >> 9);
|
||||
if (vsie_page->scb_gpa != ULONG_MAX)
|
||||
radix_tree_delete(&kvm->arch.vsie.addr_to_page,
|
||||
vsie_page->scb_gpa >> 9);
|
||||
}
|
||||
page->index = addr;
|
||||
/* double use of the same address */
|
||||
if (radix_tree_insert(&kvm->arch.vsie.addr_to_page, addr >> 9, page)) {
|
||||
page_ref_dec(page);
|
||||
/* Mark it as invalid until it resides in the tree. */
|
||||
vsie_page->scb_gpa = ULONG_MAX;
|
||||
|
||||
/* Double use of the same address or allocation failure. */
|
||||
if (radix_tree_insert(&kvm->arch.vsie.addr_to_page, addr >> 9,
|
||||
vsie_page)) {
|
||||
put_vsie_page(vsie_page);
|
||||
mutex_unlock(&kvm->arch.vsie.mutex);
|
||||
return NULL;
|
||||
}
|
||||
vsie_page->scb_gpa = addr;
|
||||
mutex_unlock(&kvm->arch.vsie.mutex);
|
||||
|
||||
vsie_page = page_to_virt(page);
|
||||
memset(&vsie_page->scb_s, 0, sizeof(struct kvm_s390_sie_block));
|
||||
release_gmap_shadow(vsie_page);
|
||||
vsie_page->fault_addr = 0;
|
||||
@ -1412,14 +1450,6 @@ static struct vsie_page *get_vsie_page(struct kvm *kvm, unsigned long addr)
|
||||
return vsie_page;
|
||||
}
|
||||
|
||||
/* put a vsie page acquired via get_vsie_page */
|
||||
static void put_vsie_page(struct kvm *kvm, struct vsie_page *vsie_page)
|
||||
{
|
||||
struct page *page = pfn_to_page(__pa(vsie_page) >> PAGE_SHIFT);
|
||||
|
||||
page_ref_dec(page);
|
||||
}
|
||||
|
||||
int kvm_s390_handle_vsie(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
struct vsie_page *vsie_page;
|
||||
@ -1470,7 +1500,7 @@ out_unshadow:
|
||||
out_unpin_scb:
|
||||
unpin_scb(vcpu, vsie_page, scb_addr);
|
||||
out_put:
|
||||
put_vsie_page(vcpu->kvm, vsie_page);
|
||||
put_vsie_page(vsie_page);
|
||||
|
||||
return rc < 0 ? rc : 0;
|
||||
}
|
||||
@ -1486,18 +1516,18 @@ void kvm_s390_vsie_init(struct kvm *kvm)
|
||||
void kvm_s390_vsie_destroy(struct kvm *kvm)
|
||||
{
|
||||
struct vsie_page *vsie_page;
|
||||
struct page *page;
|
||||
int i;
|
||||
|
||||
mutex_lock(&kvm->arch.vsie.mutex);
|
||||
for (i = 0; i < kvm->arch.vsie.page_count; i++) {
|
||||
page = kvm->arch.vsie.pages[i];
|
||||
vsie_page = kvm->arch.vsie.pages[i];
|
||||
kvm->arch.vsie.pages[i] = NULL;
|
||||
vsie_page = page_to_virt(page);
|
||||
release_gmap_shadow(vsie_page);
|
||||
/* free the radix tree entry */
|
||||
radix_tree_delete(&kvm->arch.vsie.addr_to_page, page->index >> 9);
|
||||
__free_page(page);
|
||||
if (vsie_page->scb_gpa != ULONG_MAX)
|
||||
radix_tree_delete(&kvm->arch.vsie.addr_to_page,
|
||||
vsie_page->scb_gpa >> 9);
|
||||
free_page((unsigned long)vsie_page);
|
||||
}
|
||||
kvm->arch.vsie.page_count = 0;
|
||||
mutex_unlock(&kvm->arch.vsie.mutex);
|
||||
|
File diff suppressed because it is too large
Load Diff
@ -176,8 +176,6 @@ unsigned long *page_table_alloc(struct mm_struct *mm)
|
||||
}
|
||||
table = ptdesc_to_virt(ptdesc);
|
||||
__arch_set_page_dat(table, 1);
|
||||
/* pt_list is used by gmap only */
|
||||
INIT_LIST_HEAD(&ptdesc->pt_list);
|
||||
memset64((u64 *)table, _PAGE_INVALID, PTRS_PER_PTE);
|
||||
memset64((u64 *)table + PTRS_PER_PTE, 0, PTRS_PER_PTE);
|
||||
return table;
|
||||
|
@ -331,6 +331,17 @@ error:
|
||||
return rc;
|
||||
}
|
||||
|
||||
static bool zpci_bus_is_isolated_vf(struct zpci_bus *zbus, struct zpci_dev *zdev)
|
||||
{
|
||||
struct pci_dev *pdev;
|
||||
|
||||
pdev = zpci_iov_find_parent_pf(zbus, zdev);
|
||||
if (!pdev)
|
||||
return true;
|
||||
pci_dev_put(pdev);
|
||||
return false;
|
||||
}
|
||||
|
||||
int zpci_bus_device_register(struct zpci_dev *zdev, struct pci_ops *ops)
|
||||
{
|
||||
bool topo_is_tid = zdev->tid_avail;
|
||||
@ -345,6 +356,15 @@ int zpci_bus_device_register(struct zpci_dev *zdev, struct pci_ops *ops)
|
||||
|
||||
topo = topo_is_tid ? zdev->tid : zdev->pchid;
|
||||
zbus = zpci_bus_get(topo, topo_is_tid);
|
||||
/*
|
||||
* An isolated VF gets its own domain/bus even if there exists
|
||||
* a matching domain/bus already
|
||||
*/
|
||||
if (zbus && zpci_bus_is_isolated_vf(zbus, zdev)) {
|
||||
zpci_bus_put(zbus);
|
||||
zbus = NULL;
|
||||
}
|
||||
|
||||
if (!zbus) {
|
||||
zbus = zpci_bus_alloc(topo, topo_is_tid);
|
||||
if (!zbus)
|
||||
|
@ -60,18 +60,35 @@ static int zpci_iov_link_virtfn(struct pci_dev *pdev, struct pci_dev *virtfn, in
|
||||
return 0;
|
||||
}
|
||||
|
||||
int zpci_iov_setup_virtfn(struct zpci_bus *zbus, struct pci_dev *virtfn, int vfn)
|
||||
/**
|
||||
* zpci_iov_find_parent_pf - Find the parent PF, if any, of the given function
|
||||
* @zbus: The bus that the PCI function is on, or would be added on
|
||||
* @zdev: The PCI function
|
||||
*
|
||||
* Finds the parent PF, if it exists and is configured, of the given PCI function
|
||||
* and increments its refcount. Th PF is searched for on the provided bus so the
|
||||
* caller has to ensure that this is the correct bus to search. This function may
|
||||
* be used before adding the PCI function to a zbus.
|
||||
*
|
||||
* Return: Pointer to the struct pci_dev of the parent PF or NULL if it not
|
||||
* found. If the function is not a VF or has no RequesterID information,
|
||||
* NULL is returned as well.
|
||||
*/
|
||||
struct pci_dev *zpci_iov_find_parent_pf(struct zpci_bus *zbus, struct zpci_dev *zdev)
|
||||
{
|
||||
int i, cand_devfn;
|
||||
struct zpci_dev *zdev;
|
||||
int i, vfid, devfn, cand_devfn;
|
||||
struct pci_dev *pdev;
|
||||
int vfid = vfn - 1; /* Linux' vfid's start at 0 vfn at 1*/
|
||||
int rc = 0;
|
||||
|
||||
if (!zbus->multifunction)
|
||||
return 0;
|
||||
|
||||
/* If the parent PF for the given VF is also configured in the
|
||||
return NULL;
|
||||
/* Non-VFs and VFs without RID available don't have a parent */
|
||||
if (!zdev->vfn || !zdev->rid_available)
|
||||
return NULL;
|
||||
/* Linux vfid starts at 0 vfn at 1 */
|
||||
vfid = zdev->vfn - 1;
|
||||
devfn = zdev->rid & ZPCI_RID_MASK_DEVFN;
|
||||
/*
|
||||
* If the parent PF for the given VF is also configured in the
|
||||
* instance, it must be on the same zbus.
|
||||
* We can then identify the parent PF by checking what
|
||||
* devfn the VF would have if it belonged to that PF using the PF's
|
||||
@ -85,15 +102,26 @@ int zpci_iov_setup_virtfn(struct zpci_bus *zbus, struct pci_dev *virtfn, int vfn
|
||||
if (!pdev)
|
||||
continue;
|
||||
cand_devfn = pci_iov_virtfn_devfn(pdev, vfid);
|
||||
if (cand_devfn == virtfn->devfn) {
|
||||
rc = zpci_iov_link_virtfn(pdev, virtfn, vfid);
|
||||
/* balance pci_get_slot() */
|
||||
pci_dev_put(pdev);
|
||||
break;
|
||||
}
|
||||
if (cand_devfn == devfn)
|
||||
return pdev;
|
||||
/* balance pci_get_slot() */
|
||||
pci_dev_put(pdev);
|
||||
}
|
||||
}
|
||||
return NULL;
|
||||
}
|
||||
|
||||
int zpci_iov_setup_virtfn(struct zpci_bus *zbus, struct pci_dev *virtfn, int vfn)
|
||||
{
|
||||
struct zpci_dev *zdev = to_zpci(virtfn);
|
||||
struct pci_dev *pdev_pf;
|
||||
int rc = 0;
|
||||
|
||||
pdev_pf = zpci_iov_find_parent_pf(zbus, zdev);
|
||||
if (pdev_pf) {
|
||||
/* Linux' vfids start at 0 while zdev->vfn starts at 1 */
|
||||
rc = zpci_iov_link_virtfn(pdev_pf, virtfn, zdev->vfn - 1);
|
||||
pci_dev_put(pdev_pf);
|
||||
}
|
||||
return rc;
|
||||
}
|
||||
|
@ -19,6 +19,8 @@ void zpci_iov_map_resources(struct pci_dev *pdev);
|
||||
|
||||
int zpci_iov_setup_virtfn(struct zpci_bus *zbus, struct pci_dev *virtfn, int vfn);
|
||||
|
||||
struct pci_dev *zpci_iov_find_parent_pf(struct zpci_bus *zbus, struct zpci_dev *zdev);
|
||||
|
||||
#else /* CONFIG_PCI_IOV */
|
||||
static inline void zpci_iov_remove_virtfn(struct pci_dev *pdev, int vfn) {}
|
||||
|
||||
@ -28,5 +30,10 @@ static inline int zpci_iov_setup_virtfn(struct zpci_bus *zbus, struct pci_dev *v
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
static inline struct pci_dev *zpci_iov_find_parent_pf(struct zpci_bus *zbus, struct zpci_dev *zdev)
|
||||
{
|
||||
return NULL;
|
||||
}
|
||||
#endif /* CONFIG_PCI_IOV */
|
||||
#endif /* __S390_PCI_IOV_h */
|
||||
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
x
Reference in New Issue
Block a user