Adam Radford <aradford@gmail.com>
Adriana Reus <adi.reus@gmail.com> <adriana.reus@intel.com>
Adrian Bunk <bunk@stusta.de>
+Ajay Kaher <ajay.kaher@broadcom.com> <akaher@vmware.com>
Akhil P Oommen <quic_akhilpo@quicinc.com> <akhilpo@codeaurora.org>
Alan Cox <alan@lxorguk.ukuu.org.uk>
Alan Cox <root@hraefn.swansea.linux.org.uk>
Alexei Starovoitov <ast@kernel.org> <alexei.starovoitov@gmail.com>
Alexei Starovoitov <ast@kernel.org> <ast@fb.com>
Alexei Starovoitov <ast@kernel.org> <ast@plumgrid.com>
+Alexey Makhalov <alexey.amakhalov@broadcom.com> <amakhalov@vmware.com>
+Alex Elder <elder@kernel.org>
+Alex Elder <elder@kernel.org> <aelder@sgi.com>
+Alex Elder <elder@kernel.org> <alex.elder@linaro.org>
+Alex Elder <elder@kernel.org> <alex.elder@linary.org>
+Alex Elder <elder@kernel.org> <elder@dreamhost.com>
+Alex Elder <elder@kernel.org> <elder@dreawmhost.com>
+Alex Elder <elder@kernel.org> <elder@ieee.org>
+Alex Elder <elder@kernel.org> <elder@inktank.com>
+Alex Elder <elder@kernel.org> <elder@linaro.org>
+Alex Elder <elder@kernel.org> <elder@newdream.net>
Alex Hung <alexhung@gmail.com> <alex.hung@canonical.com>
Alex Shi <alexs@kernel.org> <alex.shi@intel.com>
Alex Shi <alexs@kernel.org> <alex.shi@linaro.org>
Ben Widawsky <bwidawsk@kernel.org> <ben.widawsky@intel.com>
Ben Widawsky <bwidawsk@kernel.org> <benjamin.widawsky@intel.com>
Benjamin Poirier <benjamin.poirier@gmail.com> <bpoirier@suse.de>
+Benjamin Tissoires <bentiss@kernel.org> <benjamin.tissoires@gmail.com>
+Benjamin Tissoires <bentiss@kernel.org> <benjamin.tissoires@redhat.com>
Bjorn Andersson <andersson@kernel.org> <bjorn@kryo.se>
Bjorn Andersson <andersson@kernel.org> <bjorn.andersson@linaro.org>
Bjorn Andersson <andersson@kernel.org> <bjorn.andersson@sonymobile.com>
Brian Avery <b.avery@hp.com>
Brian King <brking@us.ibm.com>
Brian Silverman <bsilver16384@gmail.com> <brian.silverman@bluerivertech.com>
+Bryan Tan <bryan-bt.tan@broadcom.com> <bryantan@vmware.com>
Cai Huoqing <cai.huoqing@linux.dev> <caihuoqing@baidu.com>
Can Guo <quic_cang@quicinc.com> <cang@codeaurora.org>
Carl Huang <quic_cjhuang@quicinc.com> <cjhuang@codeaurora.org>
Lee Jones <lee@kernel.org> <lee.jones@canonical.com>
Lee Jones <lee@kernel.org> <lee.jones@linaro.org>
Lee Jones <lee@kernel.org> <lee@ubuntu.com>
-Leonard Crestez <leonard.crestez@nxp.com> Leonard Crestez <cdleonard@gmail.com>
+Leonard Crestez <cdleonard@gmail.com> <leonard.crestez@nxp.com>
+Leonard Crestez <cdleonard@gmail.com> <leonard.crestez@intel.com>
Leonardo Bras <leobras.c@gmail.com> <leonardo@linux.ibm.com>
Leonard Göhrs <l.goehrs@pengutronix.de>
Leonid I Ananiev <leonid.i.ananiev@intel.com>
Nadav Amit <nadav.amit@gmail.com> <namit@vmware.com>
Nadav Amit <nadav.amit@gmail.com> <namit@cs.technion.ac.il>
Nadia Yvette Chambers <nyc@holomorphy.com> William Lee Irwin III <wli@holomorphy.com>
-Naoya Horiguchi <naoya.horiguchi@nec.com> <n-horiguchi@ah.jp.nec.com>
+Naoya Horiguchi <nao.horiguchi@gmail.com> <n-horiguchi@ah.jp.nec.com>
+Naoya Horiguchi <nao.horiguchi@gmail.com> <naoya.horiguchi@nec.com>
Nathan Chancellor <nathan@kernel.org> <natechancellor@gmail.com>
Neeraj Upadhyay <quic_neeraju@quicinc.com> <neeraju@codeaurora.org>
Neil Armstrong <neil.armstrong@linaro.org> <narmstrong@baylibre.com>
Punit Agrawal <punitagrawal@gmail.com> <punit.agrawal@arm.com>
Qais Yousef <qyousef@layalina.io> <qais.yousef@imgtec.com>
Qais Yousef <qyousef@layalina.io> <qais.yousef@arm.com>
-Quentin Monnet <quentin@isovalent.com> <quentin.monnet@netronome.com>
+Quentin Monnet <qmo@kernel.org> <quentin.monnet@netronome.com>
+Quentin Monnet <qmo@kernel.org> <quentin@isovalent.com>
Quentin Perret <qperret@qperret.net> <quentin.perret@arm.com>
Rafael J. Wysocki <rjw@rjwysocki.net> <rjw@sisk.pl>
Rajeev Nandan <quic_rajeevny@quicinc.com> <rajeevny@codeaurora.org>
Ricardo Ribalda <ribalda@kernel.org> <ricardo@ribalda.com>
Ricardo Ribalda <ribalda@kernel.org> Ricardo Ribalda Delgado <ribalda@kernel.org>
Ricardo Ribalda <ribalda@kernel.org> <ricardo.ribalda@gmail.com>
+Richard Genoud <richard.genoud@bootlin.com> <richard.genoud@gmail.com>
Richard Leitner <richard.leitner@linux.dev> <dev@g0hl1n.net>
Richard Leitner <richard.leitner@linux.dev> <me@g0hl1n.net>
Richard Leitner <richard.leitner@linux.dev> <richard.leitner@skidata.com>
Roman Gushchin <roman.gushchin@linux.dev> <guro@fb.com>
Roman Gushchin <roman.gushchin@linux.dev> <guroan@gmail.com>
Roman Gushchin <roman.gushchin@linux.dev> <klamm@yandex-team.ru>
+Ronak Doshi <ronak.doshi@broadcom.com> <doshir@vmware.com>
Muchun Song <muchun.song@linux.dev> <songmuchun@bytedance.com>
Muchun Song <muchun.song@linux.dev> <smuchun@gmail.com>
Ross Zwisler <zwisler@kernel.org> <ross.zwisler@linux.intel.com>
Viresh Kumar <vireshk@kernel.org> <viresh.linux@gmail.com>
Viresh Kumar <viresh.kumar@linaro.org> <viresh.kumar@linaro.org>
Viresh Kumar <viresh.kumar@linaro.org> <viresh.kumar@linaro.com>
+Vishnu Dasa <vishnu.dasa@broadcom.com> <vdasa@vmware.com>
Vivek Aknurwar <quic_viveka@quicinc.com> <viveka@codeaurora.org>
Vivien Didelot <vivien.didelot@gmail.com> <vivien.didelot@savoirfairelinux.com>
Vlad Dogaru <ddvlad@gmail.com> <vlad.dogaru@intel.com>
S: 13353 Berlin
S: Germany
+N: Gustavo Pimental
+E: gustavo.pimentel@synopsys.com
+D: PCI driver for Synopsys DesignWare
+
N: Emanuel Pirker
E: epirker@edu.uni-klu.ac.at
D: AIC5800 IEEE 1394, RAW I/O on 1394
the BHB might be shared across privilege levels even in the presence of
Enhanced IBRS.
-Currently the only known real-world BHB attack vector is via
-unprivileged eBPF. Therefore, it's highly recommended to not enable
-unprivileged eBPF, especially when eIBRS is used (without retpolines).
-For a full mitigation against BHB attacks, it's recommended to use
-retpolines (or eIBRS combined with retpolines).
+Previously the only known real-world BHB attack vector was via unprivileged
+eBPF. Further research has found attacks that don't require unprivileged eBPF.
+For a full mitigation against BHB attacks it is recommended to set BHI_DIS_S or
+use the BHB clearing sequence.
Attack scenarios
----------------
'PBRSB-eIBRS: Not affected' CPU is not affected by PBRSB
=========================== =======================================================
+ - Branch History Injection (BHI) protection status:
+
+.. list-table::
+
+ * - BHI: Not affected
+ - System is not affected
+ * - BHI: Retpoline
+ - System is protected by retpoline
+ * - BHI: BHI_DIS_S
+ - System is protected by BHI_DIS_S
+ * - BHI: SW loop, KVM SW loop
+ - System is protected by software clearing sequence
+ * - BHI: Vulnerable
+ - System is vulnerable to BHI
+ * - BHI: Vulnerable, KVM: SW loop
+ - System is vulnerable; KVM is protected by software clearing sequence
+
Full mitigation might require a microcode update from the CPU
vendor. When the necessary microcode is not available, the kernel will
report vulnerability.
Systems which support enhanced IBRS (eIBRS) enable IBRS protection once at
boot, by setting the IBRS bit, and they're automatically protected against
- Spectre v2 variant attacks.
+ some Spectre v2 variant attacks. The BHB can still influence the choice of
+ indirect branch predictor entry, and although branch predictor entries are
+ isolated between modes when eIBRS is enabled, the BHB itself is not isolated
+ between modes. Systems which support BHI_DIS_S will set it to protect against
+ BHI attacks.
On Intel's enhanced IBRS systems, this includes cross-thread branch target
injections on SMT systems (STIBP). In other words, Intel eIBRS enables
spectre_v2=off. Spectre variant 1 mitigations
cannot be disabled.
+ spectre_bhi=
+
+ [X86] Control mitigation of Branch History Injection
+ (BHI) vulnerability. This setting affects the deployment
+ of the HW BHI control and the SW BHB clearing sequence.
+
+ on
+ (default) Enable the HW or SW mitigation as
+ needed.
+ off
+ Disable the mitigation.
+
For spectre_v2_user see Documentation/admin-guide/kernel-parameters.txt
Mitigation selection guide
retbleed=off [X86]
spec_rstack_overflow=off [X86]
spec_store_bypass_disable=off [X86,PPC]
+ spectre_bhi=off [X86]
spectre_v2_user=off [X86]
srbds=off [X86,INTEL]
ssbd=force-off [ARM64]
sonypi.*= [HW] Sony Programmable I/O Control Device driver
See Documentation/admin-guide/laptops/sonypi.rst
+ spectre_bhi= [X86] Control mitigation of Branch History Injection
+ (BHI) vulnerability. This setting affects the
+ deployment of the HW BHI control and the SW BHB
+ clearing sequence.
+
+ on - (default) Enable the HW or SW mitigation
+ as needed.
+ off - Disable the mitigation.
+
spectre_v2= [X86,EARLY] Control mitigation of Spectre variant 2
(indirect branch speculation) vulnerability.
The default operation protects the kernel from
To turn off having tracepoints sent to printk,
echo 0 > /proc/sys/kernel/tracepoint_printk
Note, echoing 1 into this file without the
- tracepoint_printk kernel cmdline option has no effect.
+ tp_printk kernel cmdline option has no effect.
The tp_printk_stop_on_boot (see below) can also be used
to stop the printing of events to console at
Some users cannot tolerate the swapping that comes with zswap store failures
and zswap writebacks. Swapping can be disabled entirely (without disabling
-zswap itself) on a cgroup-basis as follows:
+zswap itself) on a cgroup-basis as follows::
echo 0 > /sys/fs/cgroup/<cgroup-name>/memory.zswap.writeback
When there is a sizable amount of cold memory residing in the zswap pool, it
can be advantageous to proactively write these cold pages to swap and reclaim
the memory for other use cases. By default, the zswap shrinker is disabled.
-User can enable it as follows:
+User can enable it as follows::
echo Y > /sys/module/zswap/parameters/shrinker_enabled
========================================
*[If you are new to building or bisecting Linux, ignore this section and head
-over to the* ":ref:`step-by-step guide<introguide_bissbs>`" *below. It utilizes
+over to the* ':ref:`step-by-step guide <introguide_bissbs>`' *below. It utilizes
the same commands as this section while describing them in brief fashion. The
steps are nevertheless easy to follow and together with accompanying entries
in a reference section mention many alternatives, pitfalls, and additional
**In case you want to check if a bug is present in code currently supported by
developers**, execute just the *preparations* and *segment 1*; while doing so,
consider the newest Linux kernel you regularly use to be the 'working' kernel.
-In the following example that's assumed to be 6.0.13, which is why the sources
-of 6.0 will be used to prepare the .config file.
+In the following example that's assumed to be 6.0, which is why its sources
+will be used to prepare the .config file.
**In case you face a regression**, follow the steps at least till the end of
*segment 2*. Then you can submit a preliminary report -- or continue with
cd ~/linux/
git remote add -t master stable \
https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git
- git checkout --detach v6.0
+ git switch --detach v6.0
# * Hint: if you used an existing clone, ensure no stale .config is around.
make olddefconfig
# * Ensure the former command picked the .config of the 'working' kernel.
a) Checking out latest mainline code::
cd ~/linux/
- git checkout --force --detach mainline/master
+ git switch --discard-changes --detach mainline/master
b) Build, install, and boot a kernel::
a) Start by checking out the sources of the 'good' version::
cd ~/linux/
- git checkout --force --detach v6.0
+ git switch --discard-changes --detach v6.0
b) Build, install, and boot a kernel as described earlier in *segment 1,
section b* -- just feel free to skip the 'du' commands, as you have a rough
* **Segment 3**: perform and validate the bisection.
- a) In case your 'broken' version is a stable/longterm release, add the Git
- branch holding it::
+ a) Retrieve the sources for your 'bad' version::
git remote set-branches --add stable linux-6.1.y
git fetch stable
works with the newly built kernel. If it does, tell Git by executing
``git bisect good``; if it does not, run ``git bisect bad`` instead.
- All three commands will make Git checkout another commit; then re-execute
+ All three commands will make Git check out another commit; then re-execute
this step (e.g. build, install, boot, and test a kernel to then tell Git
the outcome). Do so again and again until Git shows which commit broke
things. If you run short of disk space during this process, check the
- "Supplementary tasks" section below.
+ section 'Complementary tasks: cleanup during and after the process'
+ below.
d) Once your finished the bisection, put a few things away::
e) Try to verify the bisection result::
- git checkout --force --detach mainline/master
+ git switch --discard-changes --detach mainline/master
git revert --no-edit cafec0cacaca0
+ cp ~/kernel-config-working .config
+ ./scripts/config --set-str CONFIG_LOCALVERSION '-local-cafec0cacaca0-reverted'
This is optional, as some commits are impossible to revert. But if the
second command worked flawlessly, build, install, and boot one more kernel
- kernel, which should not show the regression.
+ kernel; just this time skip the first command copying the base .config file
+ over, as that already has been taken care off.
-* **Supplementary tasks**: cleanup during and after the process.
+* **Complementary tasks**: cleanup during and after the process.
a) To avoid running out of disk space during a bisection, you might need to
remove some kernels you built earlier. You most likely want to keep those
the kernels you built earlier and later you might want to keep around for
a week or two.
+* **Optional task**: test a debug patch or a proposed fix later::
+
+ git fetch mainline
+ git switch --discard-changes --detach mainline/master
+ git apply /tmp/foobars-proposed-fix-v1.patch
+ cp ~/kernel-config-working .config
+ ./scripts/config --set-str CONFIG_LOCALVERSION '-local-foobars-fix-v1'
+
+ Build, install, and boot a kernel as described in *segment 1, section b* --
+ but this time omit the first command copying the build configuration over,
+ as that has been taken care of already.
+
.. _introguide_bissbs:
Step-by-step guide on how to verify bugs and bisect regressions
===============================================================
This guide describes how to set up your own Linux kernels for investigating bugs
-or regressions you intent to report. How far you want to follow the instructions
+or regressions you intend to report. How far you want to follow the instructions
depends on your issue:
Execute all steps till the end of *segment 1* to **verify if your kernel problem
*segment 3* to **perform a bisection** for a full-fledged regression report
developers are obliged to act upon.
- :ref:`Preparations: set up everything to build your own kernels.<introprep_bissbs>`
+ :ref:`Preparations: set up everything to build your own kernels <introprep_bissbs>`.
- :ref:`Segment 1: try to reproduce the problem with the latest codebase.<introlatestcheck_bissbs>`
+ :ref:`Segment 1: try to reproduce the problem with the latest codebase <introlatestcheck_bissbs>`.
- :ref:`Segment 2: check if the kernels you build work fine.<introworkingcheck_bissbs>`
+ :ref:`Segment 2: check if the kernels you build work fine <introworkingcheck_bissbs>`.
- :ref:`Segment 3: perform a bisection and validate the result.<introbisect_bissbs>`
+ :ref:`Segment 3: perform a bisection and validate the result <introbisect_bissbs>`.
- :ref:`Supplementary tasks: cleanup during and after following this guide.<introclosure_bissbs>`
+ :ref:`Complementary tasks: cleanup during and after following this guide <introclosure_bissbs>`.
+
+ :ref:`Optional tasks: test reverts, patches, or later versions <introoptional_bissbs>`.
The steps in each segment illustrate the important aspects of the process, while
a comprehensive reference section holds additional details for almost all of the
For further details on how to report Linux kernel issues or regressions check
out Documentation/admin-guide/reporting-issues.rst, which works in conjunction
with this document. It among others explains why you need to verify bugs with
-the latest 'mainline' kernel, even if you face a problem with a kernel from a
-'stable/longterm' series; for users facing a regression it also explains that
-sending a preliminary report after finishing segment 2 might be wise, as the
-regression and its culprit might be known already. For further details on
-what actually qualifies as a regression check out
-Documentation/admin-guide/reporting-regressions.rst.
+the latest 'mainline' kernel (e.g. versions like 6.0, 6.1-rc1, or 6.1-rc6),
+even if you face a problem with a kernel from a 'stable/longterm' series
+(say 6.0.13).
+
+For users facing a regression that document also explains why sending a
+preliminary report after segment 2 might be wise, as the regression and its
+culprit might be known already. For further details on what actually qualifies
+as a regression check out Documentation/admin-guide/reporting-regressions.rst.
+
+If you run into any problems while following this guide or have ideas how to
+improve it, :ref:`please let the kernel developers know <submit_improvements>`.
.. _introprep_bissbs:
Preparations: set up everything to build your own kernels
---------------------------------------------------------
+The following steps lay the groundwork for all further tasks.
+
+Note: the instructions assume you are building and testing on the same
+machine; if you want to compile the kernel on another system, check
+:ref:`Build kernels on a different machine <buildhost_bis>` below.
+
.. _backup_bissbs:
* Create a fresh backup and put system repair and restore tools at hand, just
to be prepared for the unlikely case of something going sideways.
- [:ref:`details<backup_bisref>`]
+ [:ref:`details <backup_bisref>`]
.. _vanilla_bissbs:
builds them automatically. That includes but is not limited to DKMS, openZFS,
VirtualBox, and Nvidia's graphics drivers (including the GPLed kernel module).
- [:ref:`details<vanilla_bisref>`]
+ [:ref:`details <vanilla_bisref>`]
.. _secureboot_bissbs:
their restrictions through a process initiated by
``mokutil --disable-validation``.
- [:ref:`details<secureboot_bisref>`]
+ [:ref:`details <secureboot_bisref>`]
.. _rangecheck_bissbs:
* Determine the kernel versions considered 'good' and 'bad' throughout this
- guide.
+ guide:
- Do you follow this guide to verify if a bug is present in the code developers
- care for? Then consider the mainline release your 'working' kernel (the newest
- one you regularly use) is based on to be the 'good' version; if your 'working'
- kernel for example is 6.0.11, then your 'good' kernel is 6.0.
+ * Do you follow this guide to verify if a bug is present in the code the
+ primary developers care for? Then consider the version of the newest kernel
+ you regularly use currently as 'good' (e.g. 6.0, 6.0.13, or 6.1-rc2).
- In case you face a regression, it depends on the version range where the
- regression was introduced:
+ * Do you face a regression, e.g. something broke or works worse after
+ switching to a newer kernel version? In that case it depends on the version
+ range during which the problem appeared:
- * Something which used to work in Linux 6.0 broke when switching to Linux
- 6.1-rc1? Then henceforth regard 6.0 as the last known 'good' version
- and 6.1-rc1 as the first 'bad' one.
+ * Something regressed when updating from a stable/longterm release
+ (say 6.0.13) to a newer mainline series (like 6.1-rc7 or 6.1) or a
+ stable/longterm version based on one (say 6.1.5)? Then consider the
+ mainline release your working kernel is based on to be the 'good'
+ version (e.g. 6.0) and the first version to be broken as the 'bad' one
+ (e.g. 6.1-rc7, 6.1, or 6.1.5). Note, at this point it is merely assumed
+ that 6.0 is fine; this hypothesis will be checked in segment 2.
- * Some function stopped working when updating from 6.0.11 to 6.1.4? Then for
- the time being consider 6.0 as the last 'good' version and 6.1.4 as
- the 'bad' one. Note, at this point it is merely assumed that 6.0 is fine;
- this assumption will be checked in segment 2.
+ * Something regressed when switching from one mainline version (say 6.0) to
+ a later one (like 6.1-rc1) or a stable/longterm release based on it
+ (say 6.1.5)? Then regard the last working version (e.g. 6.0) as 'good' and
+ the first broken (e.g. 6.1-rc1 or 6.1.5) as 'bad'.
- * A feature you used in 6.0.11 does not work at all or worse in 6.1.13? In
- that case you want to bisect within a stable/longterm series: consider
- 6.0.11 as the last known 'good' version and 6.0.13 as the first 'bad'
- one. Note, in this case you still want to compile and test a mainline kernel
- as explained in segment 1: the outcome will determine if you need to report
- your issue to the regular developers or the stable team.
+ * Something regressed when updating within a stable/longterm series (say
+ from 6.0.13 to 6.0.15)? Then consider those versions as 'good' and 'bad'
+ (e.g. 6.0.13 and 6.0.15), as you need to bisect within that series.
*Note, do not confuse 'good' version with 'working' kernel; the latter term
throughout this guide will refer to the last kernel that has been working
fine.*
- [:ref:`details<rangecheck_bisref>`]
+ [:ref:`details <rangecheck_bisref>`]
.. _bootworking_bissbs:
* Boot into the 'working' kernel and briefly use the apparently broken feature.
- [:ref:`details<bootworking_bisref>`]
+ [:ref:`details <bootworking_bisref>`]
.. _diskspace_bissbs:
debug symbols: both explain approaches reducing the amount of space, which
should allow you to master these tasks with about 4 Gigabytes free space.
- [:ref:`details<diskspace_bisref>`]
+ [:ref:`details <diskspace_bisref>`]
.. _buildrequires_bissbs:
reference section shows how to quickly install those on various popular Linux
distributions.
- [:ref:`details<buildrequires_bisref>`]
+ [:ref:`details <buildrequires_bisref>`]
.. _sources_bissbs:
git remote add -t master stable \
https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git
- [:ref:`details<sources_bisref>`]
+ [:ref:`details <sources_bisref>`]
+
+.. _stablesources_bissbs:
+
+* Is one of the versions you earlier established as 'good' or 'bad' a stable or
+ longterm release (say 6.1.5)? Then download the code for the series it belongs
+ to ('linux-6.1.y' in this example)::
+
+ git remote set-branches --add stable linux-6.1.y
+ git fetch stable
.. _oldconfig_bissbs:
* Start preparing a kernel build configuration (the '.config' file).
Before doing so, ensure you are still running the 'working' kernel an earlier
- step told you to boot; if you are unsure, check the current kernel release
+ step told you to boot; if you are unsure, check the current kernelrelease
identifier using ``uname -r``.
Afterwards check out the source code for the version earlier established as
the version number in this and all later Git commands needs to be prefixed
with a 'v'::
- git checkout --detach v6.0
+ git switch --discard-changes --detach v6.0
Now create a build configuration file::
'make olddefconfig' again and check if it now picked up the right config file
as base.
- [:ref:`details<oldconfig_bisref>`]
+ [:ref:`details <oldconfig_bisref>`]
.. _localmodconfig_bissbs:
spending much effort on, as long as it boots and allows to properly test the
feature that causes trouble.
- [:ref:`details<localmodconfig_bisref>`]
+ [:ref:`details <localmodconfig_bisref>`]
.. _tagging_bissbs:
./scripts/config --set-str CONFIG_LOCALVERSION '-local'
./scripts/config -e CONFIG_LOCALVERSION_AUTO
- [:ref:`details<tagging_bisref>`]
+ [:ref:`details <tagging_bisref>`]
.. _debugsymbols_bissbs:
./scripts/config -d DEBUG_INFO -d DEBUG_INFO_DWARF_TOOLCHAIN_DEFAULT \
-d DEBUG_INFO_DWARF4 -d DEBUG_INFO_DWARF5 -e CONFIG_DEBUG_INFO_NONE
- [:ref:`details<debugsymbols_bisref>`]
+ [:ref:`details <debugsymbols_bisref>`]
.. _configmods_bissbs:
* Are you running Debian? Then you want to avoid known problems by performing
additional adjustments explained in the reference section.
- [:ref:`details<configmods_distros_bisref>`].
+ [:ref:`details <configmods_distros_bisref>`].
* If you want to influence other aspects of the configuration, do so now using
your preferred tool. Note, to use make targets like 'menuconfig' or
'nconfig', you will need to install the development files of ncurses; for
'xconfig' you likewise need the Qt5 or Qt6 headers.
- [:ref:`details<configmods_individual_bisref>`].
+ [:ref:`details <configmods_individual_bisref>`].
.. _saveconfig_bissbs:
make olddefconfig
cp .config ~/kernel-config-working
- [:ref:`details<saveconfig_bisref>`]
+ [:ref:`details <saveconfig_bisref>`]
.. _introlatestcheck_bissbs:
The following steps verify if the problem occurs with the code currently
supported by developers. In case you face a regression, it also checks that the
problem is not caused by some .config change, as reporting the issue then would
-be a waste of time. [:ref:`details<introlatestcheck_bisref>`]
+be a waste of time. [:ref:`details <introlatestcheck_bisref>`]
.. _checkoutmaster_bissbs:
-* Check out the latest Linux codebase::
+* Check out the latest Linux codebase.
- cd ~/linux/
- git checkout --force --detach mainline/master
+ * Are your 'good' and 'bad' versions from the same stable or longterm series?
+ Then check the `front page of kernel.org <https://kernel.org/>`_: if it
+ lists a release from that series without an '[EOL]' tag, checkout the series
+ latest version ('linux-6.1.y' in the following example)::
+
+ cd ~/linux/
+ git switch --discard-changes --detach stable/linux-6.1.y
+
+ Your series is unsupported, if is not listed or carrying a 'end of life'
+ tag. In that case you might want to check if a successor series (say
+ linux-6.2.y) or mainline (see next point) fix the bug.
- [:ref:`details<checkoutmaster_bisref>`]
+ * In all other cases, run::
+
+ cd ~/linux/
+ git switch --discard-changes --detach mainline/master
+
+ [:ref:`details <checkoutmaster_bisref>`]
.. _build_bissbs:
reference section for alternatives, which obviously will require other
steps to install as well.
- [:ref:`details<build_bisref>`]
+ [:ref:`details <build_bisref>`]
.. _install_bissbs:
down: if you will build more kernels as described in segment 2 and 3, you will
have to perform those again after executing ``command -v installkernel [...]``.
- [:ref:`details<install_bisref>`]
+ [:ref:`details <install_bisref>`]
.. _storagespace_bissbs:
Write down or remember those two values for later: they enable you to prevent
running out of disk space accidentally during a bisection.
- [:ref:`details<storagespace_bisref>`]
+ [:ref:`details <storagespace_bisref>`]
.. _kernelrelease_bissbs:
If that command does not return '0', check the reference section, as the cause
for this might interfere with your testing.
- [:ref:`details<tainted_bisref>`]
+ [:ref:`details <tainted_bisref>`]
.. _recheckbroken_bissbs:
out the instructions in the reference section to ensure nothing went sideways
during your tests.
- [:ref:`details<recheckbroken_bisref>`]
+ [:ref:`details <recheckbroken_bisref>`]
.. _recheckstablebroken_bissbs:
-* Are you facing a problem within a stable/longterm series, but failed to
- reproduce it with the mainline kernel you just built? One that according to
- the `front page of kernel.org <https://kernel.org/>`_ is still supported? Then
- check if the latest codebase for the particular series might already fix the
- problem. To do so, add the stable series Git branch for your 'good' kernel
- (again, this here is assumed to be 6.0) and check out the latest version::
+* Did you just built a stable or longterm kernel? And were you able to reproduce
+ the regression with it? Then you should test the latest mainline codebase as
+ well, because the result determines which developers the bug must be submitted
+ to.
+
+ To prepare that test, check out current mainline::
cd ~/linux/
- git remote set-branches --add stable linux-6.0.y
- git fetch stable
- git checkout --force --detach linux-6.0.y
+ git switch --discard-changes --detach mainline/master
Now use the checked out code to build and install another kernel using the
commands the earlier steps already described in more detail::
uname -r
cat /proc/sys/kernel/tainted
- Now verify if this kernel is showing the problem.
+ Now verify if this kernel is showing the problem. If it does, then you need
+ to report the bug to the primary developers; if it does not, report it to the
+ stable team. See Documentation/admin-guide/reporting-issues.rst for details.
- [:ref:`details<recheckstablebroken_bisref>`]
+ [:ref:`details <recheckstablebroken_bisref>`]
Do you follow this guide to verify if a problem is present in the code
currently supported by Linux kernel developers? Then you are done at this
point. If you later want to remove the kernel you just built, check out
-:ref:`Supplementary tasks: cleanup during and after following this guide<introclosure_bissbs>`.
+:ref:`Complementary tasks: cleanup during and after following this guide <introclosure_bissbs>`.
In case you face a regression, move on and execute at least the next segment
as well.
In case of a regression, you now want to ensure the trimmed configuration file
you created earlier works as expected; a bisection with the .config file
-otherwise would be a waste of time. [:ref:`details<introworkingcheck_bisref>`]
+otherwise would be a waste of time. [:ref:`details <introworkingcheck_bisref>`]
.. _recheckworking_bissbs:
'good' (once again assumed to be 6.0 here)::
cd ~/linux/
- git checkout --detach v6.0
+ git switch --discard-changes --detach v6.0
Now use the checked out code to configure, build, and install another kernel
using the commands the previous subsection explained in more detail::
Now check if this kernel works as expected; if not, consult the reference
section for further instructions.
- [:ref:`details<recheckworking_bisref>`]
+ [:ref:`details <recheckworking_bisref>`]
.. _introbisect_bissbs:
With all the preparations and precaution builds taken care of, you are now ready
to begin the bisection. This will make you build quite a few kernels -- usually
about 15 in case you encountered a regression when updating to a newer series
-(say from 6.0.11 to 6.1.3). But do not worry, due to the trimmed build
+(say from 6.0.13 to 6.1.5). But do not worry, due to the trimmed build
configuration created earlier this works a lot faster than many people assume:
overall on average it will often just take about 10 to 15 minutes to compile
each kernel on commodity x86 machines.
-* In case your 'bad' version is a stable/longterm release (say 6.1.5), add its
- stable branch, unless you already did so earlier::
-
- cd ~/linux/
- git remote set-branches --add stable linux-6.1.y
- git fetch stable
-
.. _bisectstart_bissbs:
* Start the bisection and tell Git about the versions earlier established as
git bisect good v6.0
git bisect bad v6.1.5
- [:ref:`details<bisectstart_bisref>`]
+ [:ref:`details <bisectstart_bisref>`]
.. _bisectbuild_bissbs:
If compilation fails for some reason, run ``git bisect skip`` and restart
executing the stack of commands from the beginning.
- In case you skipped the "test latest codebase" step in the guide, check its
+ In case you skipped the 'test latest codebase' step in the guide, check its
description as for why the 'df [...]' and 'make -s kernelrelease [...]'
commands are here.
totally normal to see release identifiers like '6.0-rc1-local-gcafec0cacaca0'
if you bisect between versions 6.1 and 6.2 for example.
- [:ref:`details<bisectbuild_bisref>`]
+ [:ref:`details <bisectbuild_bisref>`]
.. _bisecttest_bissbs:
might need to scroll up to see the message mentioning the culprit;
alternatively, run ``git bisect log > ~/bisection-log``.
- [:ref:`details<bisecttest_bisref>`]
+ [:ref:`details <bisecttest_bisref>`]
.. _bisectlog_bissbs:
cp .config ~/bisection-config-culprit
git bisect reset
- [:ref:`details<bisectlog_bisref>`]
+ [:ref:`details <bisectlog_bisref>`]
.. _revert_bissbs:
Begin by checking out the latest codebase depending on the range you bisected:
* Did you face a regression within a stable/longterm series (say between
- 6.0.11 and 6.0.13) that does not happen in mainline? Then check out the
+ 6.0.13 and 6.0.15) that does not happen in mainline? Then check out the
latest codebase for the affected series like this::
git fetch stable
- git checkout --force --detach linux-6.0.y
+ git switch --discard-changes --detach linux-6.0.y
* In all other cases check out latest mainline::
git fetch mainline
- git checkout --force --detach mainline/master
+ git switch --discard-changes --detach mainline/master
If you bisected a regression within a stable/longterm series that also
happens in mainline, there is one more thing to do: look up the mainline
git revert --no-edit cafec0cacaca0
- If that fails, give up trying and move on to the next step. But if it works,
- build a kernel again using the familiar command sequence::
+ If that fails, give up trying and move on to the next step; if it works,
+ adjust the tag to facilitate the identification and prevent accidentally
+ overwriting another kernel::
cp ~/kernel-config-working .config
+ ./scripts/config --set-str CONFIG_LOCALVERSION '-local-cafec0cacaca0-reverted'
+
+ Build a kernel using the familiar command sequence, just without copying the
+ the base .config over::
+
make olddefconfig &&
- make -j $(nproc --all) &&
+ make -j $(nproc --all)
# * Check if the free space suffices holding another kernel:
df -h /boot/ /lib/modules/
sudo make modules_install
command -v installkernel && sudo make install
- Make -s kernelrelease | tee -a ~/kernels-built
+ make -s kernelrelease | tee -a ~/kernels-built
reboot
- Now check one last time if the feature that made you perform a bisection work
- with that kernel.
+ Now check one last time if the feature that made you perform a bisection works
+ with that kernel: if everything went well, it should not show the regression.
- [:ref:`details<revert_bisref>`]
+ [:ref:`details <revert_bisref>`]
.. _introclosure_bissbs:
-Supplementary tasks: cleanup during and after the bisection
+Complementary tasks: cleanup during and after the bisection
-----------------------------------------------------------
During and after following this guide you might want or need to remove some of
kernel image and related files behind; in that case remove them as described
in the reference section.
- [:ref:`details<makeroom_bisref>`]
+ [:ref:`details <makeroom_bisref>`]
.. _finishingtouch_bissbs:
the version considered 'good', and the last three or four you compiled
during the actual bisection process.
- [:ref:`details<finishingtouch_bisref>`]
+ [:ref:`details <finishingtouch_bisref>`]
+
+.. _introoptional_bissbs:
+
+Optional: test reverts, patches, or later versions
+--------------------------------------------------
+
+While or after reporting a bug, you might want or potentially will be asked to
+test reverts, debug patches, proposed fixes, or other versions. In that case
+follow these instructions.
+
+* Update your Git clone and check out the latest code.
+
+ * In case you want to test mainline, fetch its latest changes before checking
+ its code out::
+
+ git fetch mainline
+ git switch --discard-changes --detach mainline/master
+
+ * In case you want to test a stable or longterm kernel, first add the branch
+ holding the series you are interested in (6.2 in the example), unless you
+ already did so earlier::
+
+ git remote set-branches --add stable linux-6.2.y
+
+ Then fetch the latest changes and check out the latest version from the
+ series::
+
+ git fetch stable
+ git switch --discard-changes --detach stable/linux-6.2.y
+
+* Copy your kernel build configuration over::
+
+ cp ~/kernel-config-working .config
+
+* Your next step depends on what you want to do:
+
+ * In case you just want to test the latest codebase, head to the next step,
+ you are already all set.
+
+ * In case you want to test if a revert fixes an issue, revert one or multiple
+ changes by specifying their commit ids::
+
+ git revert --no-edit cafec0cacaca0
+
+ Now give that kernel a special tag to facilitates its identification and
+ prevent accidentally overwriting another kernel::
+
+ ./scripts/config --set-str CONFIG_LOCALVERSION '-local-cafec0cacaca0-reverted'
+
+ * In case you want to test a patch, store the patch in a file like
+ '/tmp/foobars-proposed-fix-v1.patch' and apply it like this::
+
+ git apply /tmp/foobars-proposed-fix-v1.patch
+
+ In case of multiple patches, repeat this step with the others.
+
+ Now give that kernel a special tag to facilitates its identification and
+ prevent accidentally overwriting another kernel::
+
+ ./scripts/config --set-str CONFIG_LOCALVERSION '-local-foobars-fix-v1'
+
+* Build a kernel using the familiar commands, just without copying the kernel
+ build configuration over, as that has been taken care of already::
+
+ make olddefconfig &&
+ make -j $(nproc --all)
+ # * Check if the free space suffices holding another kernel:
+ df -h /boot/ /lib/modules/
+ sudo make modules_install
+ command -v installkernel && sudo make install
+ make -s kernelrelease | tee -a ~/kernels-built
+ reboot
+
+* Now verify you booted the newly built kernel and check it.
+
+[:ref:`details <introoptional_bisref>`]
.. _submit_improvements:
-This concludes the step-by-step guide.
+Conclusion
+----------
+
+You have reached the end of the step-by-step guide.
Did you run into trouble following any of the above steps not cleared up by the
reference section below? Did you spot errors? Or do you have ideas how to
-improve the guide? Then please take a moment and let the maintainer of this
+improve the guide?
+
+If any of that applies, please take a moment and let the maintainer of this
document know by email (Thorsten Leemhuis <linux@leemhuis.info>), ideally while
CCing the Linux docs mailing list (linux-doc@vger.kernel.org). Such feedback is
-vital to improve this document further, which is in everybody's interest, as it
+vital to improve this text further, which is in everybody's interest, as it
will enable more people to master the task described here -- and hopefully also
improve similar guides inspired by this one.
This section holds additional information for almost all the items in the above
step-by-step guide.
+Preparations for building your own kernels
+------------------------------------------
+
+ *The steps in this section lay the groundwork for all further tests.*
+ [:ref:`... <introprep_bissbs>`]
+
+The steps in all later sections of this guide depend on those described here.
+
+[:ref:`back to step-by-step guide <introprep_bissbs>`].
+
.. _backup_bisref:
Prepare for emergencies
------------------------
+~~~~~~~~~~~~~~~~~~~~~~~
*Create a fresh backup and put system repair and restore tools at hand.*
[:ref:`... <backup_bissbs>`]
.. _vanilla_bisref:
Remove anything related to externally maintained kernel modules
----------------------------------------------------------------
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
*Remove all software that depends on externally developed kernel drivers or
builds them automatically.* [:ref:`...<vanilla_bissbs>`]
.. _secureboot_bisref:
Deal with techniques like Secure Boot
--------------------------------------
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
*On platforms with 'Secure Boot' or similar techniques, prepare everything to
ensure the system will permit your self-compiled kernel to boot later.*
.. _bootworking_bisref:
Boot the last kernel that was working
--------------------------------------
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
*Boot into the last working kernel and briefly recheck if the feature that
regressed really works.* [:ref:`...<bootworking_bissbs>`]
.. _diskspace_bisref:
Space requirements
-------------------
+~~~~~~~~~~~~~~~~~~
*Ensure to have enough free space for building Linux.*
[:ref:`... <diskspace_bissbs>`]
.. _rangecheck_bisref:
Bisection range
----------------
+~~~~~~~~~~~~~~~
*Determine the kernel versions considered 'good' and 'bad' throughout this
guide.* [:ref:`...<rangecheck_bissbs>`]
Establishing the range of commits to be checked is mostly straightforward,
except when a regression occurred when switching from a release of one stable
-series to a release of a later series (e.g. from 6.0.11 to 6.1.4). In that case
+series to a release of a later series (e.g. from 6.0.13 to 6.1.5). In that case
Git will need some hand holding, as there is no straight line of descent.
That's because with the release of 6.0 mainline carried on to 6.1 while the
stable series 6.0.y branched to the side. It's therefore theoretically possible
-that the issue you face with 6.1.4 only worked in 6.0.11, as it was fixed by a
+that the issue you face with 6.1.5 only worked in 6.0.13, as it was fixed by a
commit that went into one of the 6.0.y releases, but never hit mainline or the
6.1.y series. Thankfully that normally should not happen due to the way the
stable/longterm maintainers maintain the code. It's thus pretty safe to assume
6.0 as a 'good' kernel. That assumption will be tested anyway, as that kernel
will be built and tested in the segment '2' of this guide; Git would force you
-to do this as well, if you tried bisecting between 6.0.11 and 6.1.13.
+to do this as well, if you tried bisecting between 6.0.13 and 6.1.15.
[:ref:`back to step-by-step guide <rangecheck_bissbs>`]
.. _buildrequires_bisref:
Install build requirements
---------------------------
+~~~~~~~~~~~~~~~~~~~~~~~~~~
*Install all software required to build a Linux kernel.*
[:ref:`...<buildrequires_bissbs>`]
for example might want to skip installing the development headers for ncurses,
which you will only need in case you later might want to adjust the kernel build
configuration using make the targets 'menuconfig' or 'nconfig'; likewise omit
-the headers of Qt6 is you do not plan to adjust the .config using 'xconfig'.
+the headers of Qt6 if you do not plan to adjust the .config using 'xconfig'.
You furthermore might need additional libraries and their development headers
for tasks not covered in this guide -- for example when building utilities from
.. _sources_bisref:
Download the sources using Git
-------------------------------
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
*Retrieve the Linux mainline sources.*
[:ref:`...<sources_bissbs>`]
.. _sources_bundle_bisref:
Downloading Linux mainline sources using a bundle
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+"""""""""""""""""""""""""""""""""""""""""""""""""
Use the following commands to retrieve the Linux mainline sources using a
bundle::
https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git
Now deepen your clone's history to the second predecessor of the mainline
-release of your 'good' version. In case the latter are 6.0 or 6.0.11, 5.19 would
+release of your 'good' version. In case the latter are 6.0 or 6.0.13, 5.19 would
be the first predecessor and 5.18 the second -- hence deepen the history up to
that version::
.. _oldconfig_bisref:
Start defining the build configuration for your kernel
-------------------------------------------------------
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
*Start preparing a kernel build configuration (the '.config' file).*
[:ref:`... <oldconfig_bissbs>`]
.. _localmodconfig_bisref:
Trim the build configuration for your kernel
---------------------------------------------
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
*Disable any kernel modules apparently superfluous for your setup.*
[:ref:`... <localmodconfig_bissbs>`]
.. _tagging_bisref:
Tag the kernels about to be build
----------------------------------
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
*Ensure all the kernels you will build are clearly identifiable using a
special tag and a unique version identifier.* [:ref:`... <tagging_bissbs>`]
.. _debugsymbols_bisref:
Decide to enable or disable debug symbols
------------------------------------------
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
*Decide how to handle debug symbols.* [:ref:`... <debugsymbols_bissbs>`]
.. _configmods_bisref:
Adjust build configuration
---------------------------
+~~~~~~~~~~~~~~~~~~~~~~~~~~
*Check if you may want or need to adjust some other kernel configuration
options:*
.. _configmods_distros_bisref:
Distro specific adjustments
-~~~~~~~~~~~~~~~~~~~~~~~~~~~
+"""""""""""""""""""""""""""
*Are you running* [:ref:`... <configmods_bissbs>`]
.. _configmods_individual_bisref:
Individual adjustments
-~~~~~~~~~~~~~~~~~~~~~~
+""""""""""""""""""""""
*If you want to influence the other aspects of the configuration, do so
now.* [:ref:`... <configmods_bissbs>`]
.. _saveconfig_bisref:
Put the .config file aside
---------------------------
+~~~~~~~~~~~~~~~~~~~~~~~~~~
*Reprocess the .config after the latest changes and store it in a safe place.*
[:ref:`... <saveconfig_bissbs>`]
Put the .config you prepared aside, as you want to copy it back to the build
-directory every time during this guide before you start building another
+directory every time during this guide before you start building another
kernel. That's because going back and forth between different versions can alter
.config files in odd ways; those occasionally cause side effects that could
confuse testing or in some cases render the result of your bisection
.. _introlatestcheck_bisref:
-Try to reproduce the regression
------------------------------------------
+Try to reproduce the problem with the latest codebase
+-----------------------------------------------------
*Verify the regression is not caused by some .config change and check if it
still occurs with the latest codebase.* [:ref:`... <introlatestcheck_bissbs>`]
Your report might be ignored if you send it to the wrong party -- and even
when you get a reply there is a decent chance that developers tell you to
- evaluate which of the two cases it is before they take a closer look.
+ evaluate which of the two cases it is before they take a closer look.
[:ref:`back to step-by-step guide <introlatestcheck_bissbs>`]
.. _checkoutmaster_bisref:
Check out the latest Linux codebase
------------------------------------
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
*Check out the latest Linux codebase.*
- [:ref:`... <introlatestcheck_bissbs>`]
+ [:ref:`... <checkoutmaster_bissbs>`]
In case you later want to recheck if an ever newer codebase might fix the
problem, remember to run that ``git fetch --shallow-exclude [...]`` command
again mentioned earlier to update your local Git repository.
-[:ref:`back to step-by-step guide <introlatestcheck_bissbs>`]
+[:ref:`back to step-by-step guide <checkoutmaster_bissbs>`]
.. _build_bisref:
Build your kernel
------------------
+~~~~~~~~~~~~~~~~~
*Build the image and the modules of your first kernel using the config file
you prepared.* [:ref:`... <build_bissbs>`]
deb, rpm or tar file.
Dealing with build errors
-~~~~~~~~~~~~~~~~~~~~~~~~~
+"""""""""""""""""""""""""
When a build error occurs, it might be caused by some aspect of your machine's
setup that often can be fixed quickly; other times though the problem lies in
In the end, most issues you run into have likely been encountered and
reported by others already. That includes issues where the cause is not your
-system, but lies in the code. If you run into one of those, you might thus find a
-solution (e.g. a patch) or workaround for your issue, too.
+system, but lies in the code. If you run into one of those, you might thus find
+a solution (e.g. a patch) or workaround for your issue, too.
Package your kernel up
-~~~~~~~~~~~~~~~~~~~~~~
+""""""""""""""""""""""
The step-by-step guide uses the default make targets (e.g. 'bzImage' and
'modules' on x86) to build the image and the modules of your kernel, which later
.. _install_bisref:
Put the kernel in place
------------------------
+~~~~~~~~~~~~~~~~~~~~~~~
*Install the kernel you just built.* [:ref:`... <install_bissbs>`]
.. _storagespace_bisref:
Storage requirements per kernel
--------------------------------
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
*Check how much storage space the kernel, its modules, and other related files
like the initramfs consume.* [:ref:`... <storagespace_bissbs>`]
.. _tainted_bisref:
Check if your newly built kernel considers itself 'tainted'
------------------------------------------------------------
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
*Check if the kernel marked itself as 'tainted'.*
[:ref:`... <tainted_bissbs>`]
.. _recheckbroken_bisref:
Check the kernel built from a recent mainline codebase
-------------------------------------------------------
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
*Verify if your bug occurs with the newly built kernel.*
[:ref:`... <recheckbroken_bissbs>`]
.. _recheckstablebroken_bisref:
Check the kernel built from the latest stable/longterm codebase
----------------------------------------------------------------
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
*Are you facing a regression within a stable/longterm release, but failed to
reproduce it with the kernel you just built using the latest mainline sources?
.. _recheckworking_bisref:
Build your own version of the 'good' kernel
--------------------------------------------
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
*Build your own variant of the working kernel and check if the feature that
regressed works as expected with it.* [:ref:`... <recheckworking_bissbs>`]
Note, if you found and fixed problems with the .config file, you want to use it
to build another kernel from the latest codebase, as your earlier tests with
-mainline and the latest version from an affected stable/longterm series were most
-likely flawed.
+mainline and the latest version from an affected stable/longterm series were
+most likely flawed.
[:ref:`back to step-by-step guide <recheckworking_bissbs>`]
+Perform a bisection and validate the result
+-------------------------------------------
+
+ *With all the preparations and precaution builds taken care of, you are now
+ ready to begin the bisection.* [:ref:`... <introbisect_bissbs>`]
+
+The steps in this segment perform and validate the bisection.
+
+[:ref:`back to step-by-step guide <introbisect_bissbs>`].
+
.. _bisectstart_bisref:
Start the bisection
--------------------
+~~~~~~~~~~~~~~~~~~~
*Start the bisection and tell Git about the versions earlier established as
'good' and 'bad'.* [:ref:`... <bisectstart_bissbs>`]
.. _bisectbuild_bisref:
Build a kernel from the bisection point
----------------------------------------
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
*Build, install, and boot a kernel from the code Git checked out using the
same commands you used earlier.* [:ref:`... <bisectbuild_bissbs>`]
.. _bisecttest_bisref:
Bisection checkpoint
---------------------
+~~~~~~~~~~~~~~~~~~~~
*Check if the feature that regressed works in the kernel you just built.*
[:ref:`... <bisecttest_bissbs>`]
.. _bisectlog_bisref:
Put the bisection log away
---------------------------
+~~~~~~~~~~~~~~~~~~~~~~~~~~
*Store Git's bisection log and the current .config file in a safe place.*
[:ref:`... <bisectlog_bissbs>`]
.. _revert_bisref:
Try reverting the culprit
--------------------------
+~~~~~~~~~~~~~~~~~~~~~~~~~
*Try reverting the culprit on top of the latest codebase to see if this fixes
your regression.* [:ref:`... <revert_bissbs>`]
[:ref:`back to step-by-step guide <revert_bissbs>`]
+Cleanup steps during and after following this guide
+---------------------------------------------------
-Supplementary tasks: cleanup during and after the bisection
------------------------------------------------------------
+ *During and after following this guide you might want or need to remove some
+ of the kernels you installed.* [:ref:`... <introclosure_bissbs>`]
+
+The steps in this section describe clean-up procedures.
+
+[:ref:`back to step-by-step guide <introclosure_bissbs>`].
.. _makeroom_bisref:
Cleaning up during the bisection
---------------------------------
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
*To remove one of the kernels you installed, look up its 'kernelrelease'
identifier.* [:ref:`... <makeroom_bissbs>`]
the steps to do that vary quite a bit between Linux distributions.
Note, be careful with wildcards like '*' when deleting files or directories
-for kernels manually: you might accidentally remove files of a 6.0.11 kernel
+for kernels manually: you might accidentally remove files of a 6.0.13 kernel
when all you want is to remove 6.0 or 6.0.1.
[:ref:`back to step-by-step guide <makeroom_bissbs>`]
Cleaning up after the bisection
--------------------------------
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
.. _finishingtouch_bisref:
(~/linux/.git/) behind -- a simple ``git reset --hard`` thus will bring the
sources back.
-Removing the repository as well would likely be unwise at this point: there is a
-decent chance developers will ask you to build another kernel to perform
-additional tests. This is often required to debug an issue or check proposed
-fixes. Before doing so you want to run the ``git fetch mainline`` command again
-followed by ``git checkout mainline/master`` to bring your clone up to date and
-checkout the latest codebase. Then apply the patch using ``git apply
-<filename>`` or ``git am <filename>`` and build yet another kernel using the
-familiar commands.
+Removing the repository as well would likely be unwise at this point: there
+is a decent chance developers will ask you to build another kernel to
+perform additional tests -- like testing a debug patch or a proposed fix.
+Details on how to perform those can be found in the section :ref:`Optional
+tasks: test reverts, patches, or later versions <introoptional_bissbs>`.
Additional tests are also the reason why you want to keep the
~/kernel-config-working file around for a few weeks.
[:ref:`back to step-by-step guide <finishingtouch_bissbs>`]
+.. _introoptional_bisref:
-Additional reading material
-===========================
+Test reverts, patches, or later versions
+----------------------------------------
+
+ *While or after reporting a bug, you might want or potentially will be asked
+ to test reverts, patches, proposed fixes, or other versions.*
+ [:ref:`... <introoptional_bissbs>`]
+
+All the commands used in this section should be pretty straight forward, so
+there is not much to add except one thing: when setting a kernel tag as
+instructed, ensure it is not much longer than the one used in the example, as
+problems will arise if the kernelrelease identifier exceeds 63 characters.
+
+[:ref:`back to step-by-step guide <introoptional_bissbs>`].
+
+
+Additional information
+======================
+
+.. _buildhost_bis:
+
+Build kernels on a different machine
+------------------------------------
+
+To compile kernels on another system, slightly alter the step-by-step guide's
+instructions:
+
+* Start following the guide on the machine where you want to install and test
+ the kernels later.
+
+* After executing ':ref:`Boot into the working kernel and briefly use the
+ apparently broken feature <bootworking_bissbs>`', save the list of loaded
+ modules to a file using ``lsmod > ~/test-machine-lsmod``. Then locate the
+ build configuration for the running kernel (see ':ref:`Start defining the
+ build configuration for your kernel <oldconfig_bisref>`' for hints on where
+ to find it) and store it as '~/test-machine-config-working'. Transfer both
+ files to the home directory of your build host.
+
+* Continue the guide on the build host (e.g. with ':ref:`Ensure to have enough
+ free space for building [...] <diskspace_bissbs>`').
+
+* When you reach ':ref:`Start preparing a kernel build configuration[...]
+ <oldconfig_bissbs>`': before running ``make olddefconfig`` for the first time,
+ execute the following command to base your configuration on the one from the
+ test machine's 'working' kernel::
+
+ cp ~/test-machine-config-working ~/linux/.config
+
+* During the next step to ':ref:`disable any apparently superfluous kernel
+ modules <localmodconfig_bissbs>`' use the following command instead::
-Further sources
----------------
+ yes '' | make localmodconfig LSMOD=~/lsmod_foo-machine localmodconfig
+
+* Continue the guide, but ignore the instructions outlining how to compile,
+ install, and reboot into a kernel every time they come up. Instead build
+ like this::
+
+ cp ~/kernel-config-working .config
+ make olddefconfig &&
+ make -j $(nproc --all) targz-pkg
+
+ This will generate a gzipped tar file whose name is printed in the last
+ line shown; for example, a kernel with the kernelrelease identifier
+ '6.0.0-rc1-local-g928a87efa423' built for x86 machines usually will
+ be stored as '~/linux/linux-6.0.0-rc1-local-g928a87efa423-x86.tar.gz'.
+
+ Copy that file to your test machine's home directory.
+
+* Switch to the test machine to check if you have enough space to hold another
+ kernel. Then extract the file you transferred::
+
+ sudo tar -xvzf ~/linux-6.0.0-rc1-local-g928a87efa423-x86.tar.gz -C /
+
+ Afterwards :ref:`generate the initramfs and add the kernel to your boot
+ loader's configuration <install_bisref>`; on some distributions the following
+ command will take care of both these tasks::
+
+ sudo /sbin/installkernel 6.0.0-rc1-local-g928a87efa423 /boot/vmlinuz-6.0.0-rc1-local-g928a87efa423
+
+ Now reboot and ensure you started the intended kernel.
+
+This approach even works when building for another architecture: just install
+cross-compilers and add the appropriate parameters to every invocation of make
+(e.g. ``make ARCH=arm64 CROSS_COMPILE=aarch64-linux-gnu- [...]``).
+
+Additional reading material
+---------------------------
* The `man page for 'git bisect' <https://git-scm.com/docs/git-bisect>`_ and
`fighting regressions with 'git bisect' <https://git-scm.com/docs/git-bisect-lk2009.html>`_
MB:<cache_id0>=bandwidth0;<cache_id1>=bandwidth1;...
Memory bandwidth Allocation specified in MiBps
----------------------------------------------
+----------------------------------------------
Memory bandwidth domain is L3 cache.
::
KASAN and can be used in production. See Documentation/dev-tools/kfence.rst
* lockdep is a locking correctness validator. See
Documentation/locking/lockdep-design.rst
+* Runtime Verification (RV) supports checking specific behaviours for a given
+ subsystem. See Documentation/trace/rv/runtime-verification.rst
* There are several other pieces of debug instrumentation in the kernel, many
of which can be found in lib/Kconfig.debug
-Status: Unstable - ABI compatibility may be broken in the future
-
Binding for Keystone gate control driver which uses PSC controller IP.
This binding uses the common clock binding[1].
-Status: Unstable - ABI compatibility may be broken in the future
-
Binding for keystone PLLs. The main PLL IP typically has a multiplier,
a divider and a post divider. The additional PLL IPs like ARMPLL, DDRPLL
and PAPLL are controlled by the memory mapped register where as the Main
Binding for Texas Instruments ADPLL clock.
-Binding status: Unstable - ABI compatibility may be broken in the future
-
This binding uses the common clock binding[1]. It assumes a
register-mapped ADPLL with two to three selectable input clocks
and three to four children.
Binding for Texas Instruments APLL clock.
-Binding status: Unstable - ABI compatibility may be broken in the future
-
This binding uses the common clock binding[1]. It assumes a
register-mapped APLL with usually two selectable input clocks
(reference clock and bypass clock), with analog phase locked
Binding for Texas Instruments autoidle clock.
-Binding status: Unstable - ABI compatibility may be broken in the future
-
This binding uses the common clock binding[1]. It assumes a register mapped
clock which can be put to idle automatically by hardware based on the usage
and a configuration bit setting. Autoidle clock is never an individual
Binding for Texas Instruments clockdomain.
-Binding status: Unstable - ABI compatibility may be broken in the future
-
This binding uses the common clock binding[1] in consumer role.
Every clock on TI SoC belongs to one clockdomain, but software
only needs this information for specific clocks which require
Binding for TI composite clock.
-Binding status: Unstable - ABI compatibility may be broken in the future
-
This binding uses the common clock binding[1]. It assumes a
register-mapped composite clock with multiple different sub-types;
Binding for TI divider clock
-Binding status: Unstable - ABI compatibility may be broken in the future
-
This binding uses the common clock binding[1]. It assumes a
register-mapped adjustable clock rate divider that does not gate and has
only one input clock or parent. By default the value programmed into
Binding for Texas Instruments DPLL clock.
-Binding status: Unstable - ABI compatibility may be broken in the future
-
This binding uses the common clock binding[1]. It assumes a
register-mapped DPLL with usually two selectable input clocks
(reference clock and bypass clock), with digital phase locked
Binding for Texas Instruments FAPLL clock.
-Binding status: Unstable - ABI compatibility may be broken in the future
-
This binding uses the common clock binding[1]. It assumes a
register-mapped FAPLL with usually two selectable input clocks
(reference clock and bypass clock), and one or more child
Binding for TI fixed factor rate clock sources.
-Binding status: Unstable - ABI compatibility may be broken in the future
-
This binding uses the common clock binding[1], and also uses the autoidle
support from TI autoidle clock [2].
Binding for Texas Instruments gate clock.
-Binding status: Unstable - ABI compatibility may be broken in the future
-
This binding uses the common clock binding[1]. This clock is
quite much similar to the basic gate-clock [2], however,
it supports a number of additional features. If no register
Binding for Texas Instruments interface clock.
-Binding status: Unstable - ABI compatibility may be broken in the future
-
This binding uses the common clock binding[1]. This clock is
quite much similar to the basic gate-clock [2], however,
it supports a number of additional features, including
Binding for TI mux clock.
-Binding status: Unstable - ABI compatibility may be broken in the future
-
This binding uses the common clock binding[1]. It assumes a
register-mapped multiplexer with multiple input clock signals or
parents, one of which can be selected as output. This clock does not
compatible:
const: qcom,sm8150-dpu
+ "^displayport-controller@[0-9a-f]+$":
+ type: object
+ additionalProperties: true
+
+ properties:
+ compatible:
+ contains:
+ const: qcom,sm8150-dp
+
"^dsi@[0-9a-f]+$":
type: object
additionalProperties: true
#dma-cells = <1>;
clocks = <&clock_controller 0>, <&clock_controller 1>;
clock-names = "bus", "host";
+ #address-cells = <1>;
+ #size-cells = <1>;
vendor,custom-property = <2>;
status = "disabled";
local-bd-address: true
+ qcom,local-bd-address-broken:
+ type: boolean
+ description:
+ boot firmware is incorrectly passing the address in big-endian order
required:
- compatible
- const: main
- const: mm
+ power-domains:
+ maxItems: 1
+
required:
- compatible
- reg
TI Davinci DSP devices
=======================
-Binding status: Unstable - Subject to changes for DT representation of clocks
- and resets
-
The TI Davinci family of SoCs usually contains a TI DSP Core sub-system that
is used to offload some of the processor-intensive tasks or algorithms, for
achieving various system level goals.
title: Atmel Universal Synchronous Asynchronous Receiver/Transmitter (USART)
maintainers:
- - Richard Genoud <richard.genoud@gmail.com>
+ - Richard Genoud <richard.genoud@bootlin.com>
properties:
compatible:
ranges: true
patternProperties:
- "^clock-controller@[0-9a-z]+$":
+ "^clock-controller@[0-9a-f]+$":
$ref: /schemas/clock/fsl,flexspi-clock.yaml#
required:
ranges: true
patternProperties:
- "^interrupt-controller@[a-z0-9]+$":
+ "^interrupt-controller@[a-f0-9]+$":
$ref: /schemas/interrupt-controller/fsl,ls-extirq.yaml#
required:
unevaluatedProperties: false
pcie-phy:
+ type: object
description:
Documentation/devicetree/bindings/phy/rockchip-pcie-phy.txt
be implemented in an always-on power domain."
patternProperties:
- '^frame@[0-9a-z]*$':
+ '^frame@[0-9a-f]+$':
type: object
additionalProperties: false
description: A timer node has up to 8 frame sub-nodes, each with the following properties.
- qcom,msm8996-ufshc
- qcom,msm8998-ufshc
- qcom,sa8775p-ufshc
+ - qcom,sc7180-ufshc
- qcom,sc7280-ufshc
+ - qcom,sc8180x-ufshc
- qcom,sc8280xp-ufshc
- qcom,sdm845-ufshc
- qcom,sm6115-ufshc
+ - qcom,sm6125-ufshc
- qcom,sm6350-ufshc
- qcom,sm8150-ufshc
- qcom,sm8250-ufshc
- const: jedec,ufs-2.0
clocks:
- minItems: 8
+ minItems: 7
maxItems: 11
clock-names:
- minItems: 8
+ minItems: 7
maxItems: 11
dma-coherent: true
allOf:
- $ref: ufs-common.yaml
+ - if:
+ properties:
+ compatible:
+ contains:
+ enum:
+ - qcom,sc7180-ufshc
+ then:
+ properties:
+ clocks:
+ minItems: 7
+ maxItems: 7
+ clock-names:
+ items:
+ - const: core_clk
+ - const: bus_aggr_clk
+ - const: iface_clk
+ - const: core_clk_unipro
+ - const: ref_clk
+ - const: tx_lane0_sync_clk
+ - const: rx_lane0_sync_clk
+ reg:
+ maxItems: 1
+ reg-names:
+ maxItems: 1
+
- if:
properties:
compatible:
- qcom,msm8998-ufshc
- qcom,sa8775p-ufshc
- qcom,sc7280-ufshc
+ - qcom,sc8180x-ufshc
- qcom,sc8280xp-ufshc
- qcom,sm8250-ufshc
- qcom,sm8350-ufshc
contains:
enum:
- qcom,sm6115-ufshc
+ - qcom,sm6125-ufshc
then:
properties:
clocks:
reg:
maxItems: 1
clocks:
- minItems: 8
+ minItems: 7
maxItems: 8
else:
properties:
minItems: 1
maxItems: 2
clocks:
- minItems: 8
+ minItems: 7
maxItems: 11
unevaluatedProperties: false
static struct virtio_driver virtio_dummy_driver = {
.driver.name = KBUILD_MODNAME,
- .driver.owner = THIS_MODULE,
.id_table = id_table,
.probe = virtio_dummy_probe,
.remove = virtio_dummy_remove,
--- /dev/null
+.. SPDX-License-Identifier: GPL-2.0
+
+======================
+bcachefs Documentation
+======================
+
+.. toctree::
+ :maxdepth: 2
+ :numbered:
+
+ errorcodes
afs
autofs
autofs-mount-control
+ bcachefs/index
befs
bfs
btrfs
- ``LLVM=1``
* - s390
- Maintained
- - ``CC=clang``
+ - ``LLVM=1`` (LLVM >= 18.1.0), ``CC=clang`` (LLVM < 18.1.0)
* - um (User Mode)
- Maintained
- ``LLVM=1``
each page. It is already implemented and activated if page owner is
enabled. Other usages are more than welcome.
-It can also be used to show all the stacks and their outstanding
-allocations, which gives us a quick overview of where the memory is going
-without the need to screen through all the pages and match the allocation
-and free operation.
+It can also be used to show all the stacks and their current number of
+allocated base pages, which gives us a quick overview of where the memory
+is going without the need to screen through all the pages and match the
+allocation and free operation.
page owner is disabled by default. So, if you'd like to use it, you need
to add "page_owner=on" to your boot cmdline. If the kernel is built
cat /sys/kernel/debug/page_owner_stacks/show_stacks > stacks.txt
cat stacks.txt
- prep_new_page+0xa9/0x120
- get_page_from_freelist+0x7e6/0x2140
- __alloc_pages+0x18a/0x370
- new_slab+0xc8/0x580
- ___slab_alloc+0x1f2/0xaf0
- __slab_alloc.isra.86+0x22/0x40
- kmem_cache_alloc+0x31b/0x350
- __khugepaged_enter+0x39/0x100
- dup_mmap+0x1c7/0x5ce
- copy_process+0x1afe/0x1c90
- kernel_clone+0x9a/0x3c0
- __do_sys_clone+0x66/0x90
- do_syscall_64+0x7f/0x160
- entry_SYSCALL_64_after_hwframe+0x6c/0x74
- stack_count: 234
+ post_alloc_hook+0x177/0x1a0
+ get_page_from_freelist+0xd01/0xd80
+ __alloc_pages+0x39e/0x7e0
+ allocate_slab+0xbc/0x3f0
+ ___slab_alloc+0x528/0x8a0
+ kmem_cache_alloc+0x224/0x3b0
+ sk_prot_alloc+0x58/0x1a0
+ sk_alloc+0x32/0x4f0
+ inet_create+0x427/0xb50
+ __sock_create+0x2e4/0x650
+ inet_ctl_sock_create+0x30/0x180
+ igmp_net_init+0xc1/0x130
+ ops_init+0x167/0x410
+ setup_net+0x304/0xa60
+ copy_net_ns+0x29b/0x4a0
+ create_new_namespaces+0x4a1/0x820
+ nr_base_pages: 16
...
...
echo 7000 > /sys/kernel/debug/page_owner_stacks/count_threshold
cat /sys/kernel/debug/page_owner_stacks/show_stacks> stacks_7000.txt
cat stacks_7000.txt
- prep_new_page+0xa9/0x120
- get_page_from_freelist+0x7e6/0x2140
- __alloc_pages+0x18a/0x370
- alloc_pages_mpol+0xdf/0x1e0
- folio_alloc+0x14/0x50
- filemap_alloc_folio+0xb0/0x100
- page_cache_ra_unbounded+0x97/0x180
- filemap_fault+0x4b4/0x1200
- __do_fault+0x2d/0x110
- do_pte_missing+0x4b0/0xa30
- __handle_mm_fault+0x7fa/0xb70
- handle_mm_fault+0x125/0x300
- do_user_addr_fault+0x3c9/0x840
- exc_page_fault+0x68/0x150
- asm_exc_page_fault+0x22/0x30
- stack_count: 8248
+ post_alloc_hook+0x177/0x1a0
+ get_page_from_freelist+0xd01/0xd80
+ __alloc_pages+0x39e/0x7e0
+ alloc_pages_mpol+0x22e/0x490
+ folio_alloc+0xd5/0x110
+ filemap_alloc_folio+0x78/0x230
+ page_cache_ra_order+0x287/0x6f0
+ filemap_get_pages+0x517/0x1160
+ filemap_read+0x304/0x9f0
+ xfs_file_buffered_read+0xe6/0x1d0 [xfs]
+ xfs_file_read_iter+0x1f0/0x380 [xfs]
+ __kernel_read+0x3b9/0x730
+ kernel_read_file+0x309/0x4d0
+ __do_sys_finit_module+0x381/0x730
+ do_syscall_64+0x8d/0x150
+ entry_SYSCALL_64_after_hwframe+0x62/0x6a
+ nr_base_pages: 20824
...
cat /sys/kernel/debug/page_owner > page_owner_full.txt
--- /dev/null
+.. SPDX-License-Identifier: GPL-2.0
+
+==========================
+Devlink E-Switch Attribute
+==========================
+
+Devlink E-Switch supports two modes of operation: legacy and switchdev.
+Legacy mode operates based on traditional MAC/VLAN steering rules. Switching
+decisions are made based on MAC addresses, VLANs, etc. There is limited ability
+to offload switching rules to hardware.
+
+On the other hand, switchdev mode allows for more advanced offloading
+capabilities of the E-Switch to hardware. In switchdev mode, more switching
+rules and logic can be offloaded to the hardware switch ASIC. It enables
+representor netdevices that represent the slow path of virtual functions (VFs)
+or scalable-functions (SFs) of the device. See more information about
+:ref:`Documentation/networking/switchdev.rst <switchdev>` and
+:ref:`Documentation/networking/representors.rst <representors>`.
+
+In addition, the devlink E-Switch also comes with other attributes listed
+in the following section.
+
+Attributes Description
+======================
+
+The following is a list of E-Switch attributes.
+
+.. list-table:: E-Switch attributes
+ :widths: 8 5 45
+
+ * - Name
+ - Type
+ - Description
+ * - ``mode``
+ - enum
+ - The mode of the device. The mode can be one of the following:
+
+ * ``legacy`` operates based on traditional MAC/VLAN steering
+ rules.
+ * ``switchdev`` allows for more advanced offloading capabilities of
+ the E-Switch to hardware.
+ * - ``inline-mode``
+ - enum
+ - Some HWs need the VF driver to put part of the packet
+ headers on the TX descriptor so the e-switch can do proper
+ matching and steering. Support for both switchdev mode and legacy mode.
+
+ * ``none`` none.
+ * ``link`` L2 mode.
+ * ``network`` L3 mode.
+ * ``transport`` L4 mode.
+ * - ``encap-mode``
+ - enum
+ - The encapsulation mode of the device. Support for both switchdev mode
+ and legacy mode. The mode can be one of the following:
+
+ * ``none`` Disable encapsulation support.
+ * ``basic`` Enable encapsulation support.
+
+Example Usage
+=============
+
+.. code:: shell
+
+ # enable switchdev mode
+ $ devlink dev eswitch set pci/0000:08:00.0 mode switchdev
+
+ # set inline-mode and encap-mode
+ $ devlink dev eswitch set pci/0000:08:00.0 inline-mode none encap-mode basic
+
+ # display devlink device eswitch attributes
+ $ devlink dev eswitch show pci/0000:08:00.0
+ pci/0000:08:00.0: mode switchdev inline-mode none encap-mode basic
+
+ # enable encap-mode with legacy mode
+ $ devlink dev eswitch set pci/0000:08:00.0 mode legacy inline-mode none encap-mode basic
devlink-selftests
devlink-trap
devlink-linecard
+ devlink-eswitch-attr
Driver-specific documentation
-----------------------------
.. SPDX-License-Identifier: GPL-2.0
+.. _representors:
=============================
Network Function Representors
AMD Tom Lendacky <thomas.lendacky@amd.com>
Ampere Darren Hart <darren@os.amperecomputing.com>
ARM Catalin Marinas <catalin.marinas@arm.com>
- IBM Power Anton Blanchard <anton@linux.ibm.com>
+ IBM Power Michael Ellerman <ellerman@au.ibm.com>
IBM Z Christian Borntraeger <borntraeger@de.ibm.com>
Intel Tony Luck <tony.luck@intel.com>
Qualcomm Trilok Soni <quic_tsoni@quicinc.com>
Hence, the ASID for the SEV-enabled guests must be from 1 to a maximum value
defined in the CPUID 0x8000001f[ecx] field.
-SEV Key Management
-==================
+The KVM_MEMORY_ENCRYPT_OP ioctl
+===============================
-The SEV guest key management is handled by a separate processor called the AMD
-Secure Processor (AMD-SP). Firmware running inside the AMD-SP provides a secure
-key management interface to perform common hypervisor activities such as
-encrypting bootstrap code, snapshot, migrating and debugging the guest. For more
-information, see the SEV Key Management spec [api-spec]_
-
-The main ioctl to access SEV is KVM_MEMORY_ENCRYPT_OP. If the argument
-to KVM_MEMORY_ENCRYPT_OP is NULL, the ioctl returns 0 if SEV is enabled
-and ``ENOTTY`` if it is disabled (on some older versions of Linux,
-the ioctl runs normally even with a NULL argument, and therefore will
-likely return ``EFAULT``). If non-NULL, the argument to KVM_MEMORY_ENCRYPT_OP
-must be a struct kvm_sev_cmd::
+The main ioctl to access SEV is KVM_MEMORY_ENCRYPT_OP, which operates on
+the VM file descriptor. If the argument to KVM_MEMORY_ENCRYPT_OP is NULL,
+the ioctl returns 0 if SEV is enabled and ``ENOTTY`` if it is disabled
+(on some older versions of Linux, the ioctl tries to run normally even
+with a NULL argument, and therefore will likely return ``EFAULT`` instead
+of zero if SEV is enabled). If non-NULL, the argument to
+KVM_MEMORY_ENCRYPT_OP must be a struct kvm_sev_cmd::
struct kvm_sev_cmd {
__u32 id;
The KVM_SEV_INIT command is used by the hypervisor to initialize the SEV platform
context. In a typical workflow, this command should be the first command issued.
-The firmware can be initialized either by using its own non-volatile storage or
-the OS can manage the NV storage for the firmware using the module parameter
-``init_ex_path``. If the file specified by ``init_ex_path`` does not exist or
-is invalid, the OS will create or override the file with output from PSP.
Returns: 0 on success, -negative on error
Returns: 0 on success, -negative on error
+Firmware Management
+===================
+
+The SEV guest key management is handled by a separate processor called the AMD
+Secure Processor (AMD-SP). Firmware running inside the AMD-SP provides a secure
+key management interface to perform common hypervisor activities such as
+encrypting bootstrap code, snapshot, migrating and debugging the guest. For more
+information, see the SEV Key Management spec [api-spec]_
+
+The AMD-SP firmware can be initialized either by using its own non-volatile
+storage or the OS can manage the NV storage for the firmware using
+parameter ``init_ex_path`` of the ``ccp`` module. If the file specified
+by ``init_ex_path`` does not exist or is invalid, the OS will create or
+override the file with PSP non-volatile storage.
+
References
==========
Asynchronous page fault (APF) control MSR.
Bits 63-6 hold 64-byte aligned physical address of a 64 byte memory area
- which must be in guest RAM and must be zeroed. This memory is expected
- to hold a copy of the following structure::
+ which must be in guest RAM. This memory is expected to hold the
+ following structure::
struct kvm_vcpu_pv_apf_data {
/* Used for 'page not present' events delivered via #PF */
__u32 token;
__u8 pad[56];
- __u32 enabled;
};
Bits 5-4 of the MSR are reserved and should be zero. Bit 0 is set to 1
as regular page fault, guest must reset 'flags' to '0' before it does
something that can generate normal page fault.
- Bytes 5-7 of 64 byte memory location ('token') will be written to by the
+ Bytes 4-7 of 64 byte memory location ('token') will be written to by the
hypervisor at the time of APF 'page ready' event injection. The content
- of these bytes is a token which was previously delivered as 'page not
- present' event. The event indicates the page in now available. Guest is
- supposed to write '0' to 'token' when it is done handling 'page ready'
- event and to write 1' to MSR_KVM_ASYNC_PF_ACK after clearing the location;
- writing to the MSR forces KVM to re-scan its queue and deliver the next
- pending notification.
+ of these bytes is a token which was previously delivered in CR2 as
+ 'page not present' event. The event indicates the page is now available.
+ Guest is supposed to write '0' to 'token' when it is done handling
+ 'page ready' event and to write '1' to MSR_KVM_ASYNC_PF_ACK after
+ clearing the location; writing to the MSR forces KVM to re-scan its
+ queue and deliver the next pending notification.
Note, MSR_KVM_ASYNC_PF_INT MSR specifying the interrupt vector for 'page
ready' APF delivery needs to be written to before enabling APF mechanism
ARM/FREESCALE LAYERSCAPE ARM ARCHITECTURE
M: Shawn Guo <shawnguo@kernel.org>
-M: Li Yang <leoyang.li@nxp.com>
L: linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
S: Maintained
T: git git://git.kernel.org/pub/scm/linux/kernel/git/shawnguo/linux.git
N: rockchip
ARM/SAMSUNG S3C, S5P AND EXYNOS ARM ARCHITECTURES
-M: Krzysztof Kozlowski <krzysztof.kozlowski@linaro.org>
+M: Krzysztof Kozlowski <krzk@kernel.org>
R: Alim Akhtar <alim.akhtar@samsung.com>
L: linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
L: linux-samsung-soc@vger.kernel.org
C: irc://irc.oftc.net/bcache
T: git https://evilpiepirate.org/git/bcachefs.git
F: fs/bcachefs/
+F: Documentation/filesystems/bcachefs/
BDISP ST MEDIA DRIVER
M: Fabien Dessenne <fabien.dessenne@foss.st.com>
BPF [SECURITY & LSM] (Security Audit and Enforcement using BPF)
M: KP Singh <kpsingh@kernel.org>
-R: Florent Revest <revest@chromium.org>
-R: Brendan Jackman <jackmanb@chromium.org>
+R: Matt Bobrowski <mattbobrowski@google.com>
L: bpf@vger.kernel.org
S: Maintained
F: Documentation/bpf/prog_lsm.rst
F: kernel/bpf/cgroup.c
BPF [TOOLING] (bpftool)
-M: Quentin Monnet <quentin@isovalent.com>
+M: Quentin Monnet <qmo@kernel.org>
L: bpf@vger.kernel.org
S: Maintained
F: kernel/bpf/disasm.*
CEPH COMMON CODE (LIBCEPH)
M: Ilya Dryomov <idryomov@gmail.com>
M: Xiubo Li <xiubli@redhat.com>
-R: Jeff Layton <jlayton@kernel.org>
L: ceph-devel@vger.kernel.org
S: Supported
W: http://ceph.com/
CEPH DISTRIBUTED FILE SYSTEM CLIENT (CEPH)
M: Xiubo Li <xiubli@redhat.com>
M: Ilya Dryomov <idryomov@gmail.com>
-R: Jeff Layton <jlayton@kernel.org>
L: ceph-devel@vger.kernel.org
S: Supported
W: http://ceph.com/
CPUIDLE DRIVER - ARM EXYNOS
M: Daniel Lezcano <daniel.lezcano@linaro.org>
M: Kukjin Kim <kgene@kernel.org>
-R: Krzysztof Kozlowski <krzysztof.kozlowski@linaro.org>
+R: Krzysztof Kozlowski <krzk@kernel.org>
L: linux-pm@vger.kernel.org
L: linux-samsung-soc@vger.kernel.org
S: Maintained
M: Alasdair Kergon <agk@redhat.com>
M: Mike Snitzer <snitzer@kernel.org>
M: Mikulas Patocka <mpatocka@redhat.com>
-M: dm-devel@lists.linux.dev
L: dm-devel@lists.linux.dev
S: Maintained
Q: http://patchwork.kernel.org/project/dm-devel/list/
DEVICE-MAPPER VDO TARGET
M: Matthew Sakai <msakai@redhat.com>
-M: dm-devel@lists.linux.dev
L: dm-devel@lists.linux.dev
S: Maintained
F: Documentation/admin-guide/device-mapper/vdo*.rst
F: fs/efs/
EHEA (IBM pSeries eHEA 10Gb ethernet adapter) DRIVER
-M: Douglas Miller <dougmill@linux.ibm.com>
L: netdev@vger.kernel.org
-S: Maintained
+S: Orphan
F: drivers/net/ethernet/ibm/ehea/
ELM327 CAN NETWORK DRIVER
M: Chao Yu <chao@kernel.org>
R: Yue Hu <huyue2@coolpad.com>
R: Jeffle Xu <jefflexu@linux.alibaba.com>
+R: Sandeep Dhavale <dhavale@google.com>
L: linux-erofs@lists.ozlabs.org
S: Maintained
W: https://erofs.docs.kernel.org
F: drivers/video/fbdev/fsl-diu-fb.*
FREESCALE DMA DRIVER
-M: Li Yang <leoyang.li@nxp.com>
M: Zhang Wei <zw@zh-kernel.org>
L: linuxppc-dev@lists.ozlabs.org
S: Maintained
F: include/dt-bindings/soc/cpm1-fsl,tsa.h
FREESCALE QUICC ENGINE UCC ETHERNET DRIVER
-M: Li Yang <leoyang.li@nxp.com>
L: netdev@vger.kernel.org
L: linuxppc-dev@lists.ozlabs.org
-S: Maintained
+S: Orphan
F: drivers/net/ethernet/freescale/ucc_geth*
FREESCALE QUICC ENGINE UCC HDLC DRIVER
F: drivers/tty/serial/ucc_uart.c
FREESCALE SOC DRIVERS
-M: Li Yang <leoyang.li@nxp.com>
L: linuxppc-dev@lists.ozlabs.org
L: linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
-S: Maintained
+S: Orphan
F: Documentation/devicetree/bindings/misc/fsl,dpaa2-console.yaml
F: Documentation/devicetree/bindings/soc/fsl/
F: drivers/soc/fsl/
F: sound/soc/fsl/fsl_qmc_audio.c
FREESCALE USB PERIPHERAL DRIVERS
-M: Li Yang <leoyang.li@nxp.com>
L: linux-usb@vger.kernel.org
L: linuxppc-dev@lists.ozlabs.org
-S: Maintained
+S: Orphan
F: drivers/usb/gadget/udc/fsl*
FREESCALE USB PHY DRIVER
-M: Ran Wang <ran.wang_1@nxp.com>
L: linux-usb@vger.kernel.org
L: linuxppc-dev@lists.ozlabs.org
-S: Maintained
+S: Orphan
F: drivers/usb/phy/phy-fsl-usb*
FREEVXFS FILESYSTEM
F: include/linux/platform_data/i2c-mux-gpio.h
GENERIC GPIO RESET DRIVER
-M: Krzysztof Kozlowski <krzysztof.kozlowski@linaro.org>
+M: Krzysztof Kozlowski <krzk@kernel.org>
S: Maintained
F: drivers/reset/reset-gpio.c
HID CORE LAYER
M: Jiri Kosina <jikos@kernel.org>
-M: Benjamin Tissoires <benjamin.tissoires@redhat.com>
+M: Benjamin Tissoires <bentiss@kernel.org>
L: linux-input@vger.kernel.org
S: Maintained
T: git git://git.kernel.org/pub/scm/linux/kernel/git/hid/hid.git
S: Maintained
F: drivers/hid/hid-logitech-hidpp.c
-HIGH-RESOLUTION TIMERS, CLOCKEVENTS
+HIGH-RESOLUTION TIMERS, TIMER WHEEL, CLOCKEVENTS
+M: Anna-Maria Behnsen <anna-maria@linutronix.de>
+M: Frederic Weisbecker <frederic@kernel.org>
M: Thomas Gleixner <tglx@linutronix.de>
L: linux-kernel@vger.kernel.org
S: Maintained
F: Documentation/timers/
F: include/linux/clockchips.h
F: include/linux/hrtimer.h
+F: include/linux/timer.h
F: kernel/time/clockevents.c
F: kernel/time/hrtimer.c
-F: kernel/time/timer_*.c
+F: kernel/time/timer.c
+F: kernel/time/timer_list.c
+F: kernel/time/timer_migration.*
+F: tools/testing/selftests/timers/
HIGH-SPEED SCC DRIVER FOR AX.25
L: linux-hams@vger.kernel.org
HWPOISON MEMORY FAILURE HANDLING
M: Miaohe Lin <linmiaohe@huawei.com>
-R: Naoya Horiguchi <naoya.horiguchi@nec.com>
+R: Naoya Horiguchi <nao.horiguchi@gmail.com>
L: linux-mm@kvack.org
S: Maintained
F: mm/hwpoison-inject.c
F: security/keys/encrypted-keys/
KEYS-TRUSTED
-M: James Bottomley <jejb@linux.ibm.com>
+M: James Bottomley <James.Bottomley@HansenPartnership.com>
M: Jarkko Sakkinen <jarkko@kernel.org>
M: Mimi Zohar <zohar@linux.ibm.com>
L: linux-integrity@vger.kernel.org
MARVELL MWIFIEX WIRELESS DRIVER
M: Brian Norris <briannorris@chromium.org>
+R: Francesco Dolcini <francesco@dolcini.it>
L: linux-wireless@vger.kernel.org
S: Odd Fixes
F: drivers/net/wireless/marvell/mwifiex/
MAXIM MAX17040 FAMILY FUEL GAUGE DRIVERS
R: Iskren Chernev <iskren.chernev@gmail.com>
-R: Krzysztof Kozlowski <krzysztof.kozlowski@linaro.org>
+R: Krzysztof Kozlowski <krzk@kernel.org>
R: Marek Szyprowski <m.szyprowski@samsung.com>
R: Matheus Castello <matheus@castello.eng.br>
L: linux-pm@vger.kernel.org
MAXIM MAX17042 FAMILY FUEL GAUGE DRIVERS
R: Hans de Goede <hdegoede@redhat.com>
-R: Krzysztof Kozlowski <krzysztof.kozlowski@linaro.org>
+R: Krzysztof Kozlowski <krzk@kernel.org>
R: Marek Szyprowski <m.szyprowski@samsung.com>
R: Sebastian Krzyszkowiak <sebastian.krzyszkowiak@puri.sm>
R: Purism Kernel Team <kernel@puri.sm>
F: drivers/power/supply/max77976_charger.c
MAXIM MUIC CHARGER DRIVERS FOR EXYNOS BASED BOARDS
-M: Krzysztof Kozlowski <krzysztof.kozlowski@linaro.org>
+M: Krzysztof Kozlowski <krzk@kernel.org>
L: linux-pm@vger.kernel.org
S: Maintained
B: mailto:linux-samsung-soc@vger.kernel.org
MAXIM PMIC AND MUIC DRIVERS FOR EXYNOS BASED BOARDS
M: Chanwoo Choi <cw00.choi@samsung.com>
-M: Krzysztof Kozlowski <krzysztof.kozlowski@linaro.org>
+M: Krzysztof Kozlowski <krzk@kernel.org>
L: linux-kernel@vger.kernel.org
S: Maintained
B: mailto:linux-samsung-soc@vger.kernel.org
MELLANOX ETHERNET DRIVER (mlx5e)
M: Saeed Mahameed <saeedm@nvidia.com>
+M: Tariq Toukan <tariqt@nvidia.com>
L: netdev@vger.kernel.org
S: Supported
W: http://www.mellanox.com
MELLANOX MLX5 core VPI driver
M: Saeed Mahameed <saeedm@nvidia.com>
M: Leon Romanovsky <leonro@nvidia.com>
+M: Tariq Toukan <tariqt@nvidia.com>
L: netdev@vger.kernel.org
L: linux-rdma@vger.kernel.org
S: Supported
F: tools/testing/memblock/
MEMORY CONTROLLER DRIVERS
-M: Krzysztof Kozlowski <krzysztof.kozlowski@linaro.org>
+M: Krzysztof Kozlowski <krzk@kernel.org>
L: linux-kernel@vger.kernel.org
S: Maintained
B: mailto:krzysztof.kozlowski@linaro.org
F: include/dt-bindings/dma/at91.h
MICROCHIP AT91 SERIAL DRIVER
-M: Richard Genoud <richard.genoud@gmail.com>
+M: Richard Genoud <richard.genoud@bootlin.com>
S: Maintained
F: Documentation/devicetree/bindings/serial/atmel,at91-usart.yaml
F: drivers/tty/serial/atmel_serial.c
F: net/ipv4/nexthop.c
NFC SUBSYSTEM
-M: Krzysztof Kozlowski <krzysztof.kozlowski@linaro.org>
+M: Krzysztof Kozlowski <krzk@kernel.org>
L: netdev@vger.kernel.org
S: Maintained
F: Documentation/devicetree/bindings/net/nfc/
F: include/uapi/linux/nsm.h
NOHZ, DYNTICKS SUPPORT
+M: Anna-Maria Behnsen <anna-maria@linutronix.de>
M: Frederic Weisbecker <frederic@kernel.org>
-M: Thomas Gleixner <tglx@linutronix.de>
M: Ingo Molnar <mingo@kernel.org>
+M: Thomas Gleixner <tglx@linutronix.de>
L: linux-kernel@vger.kernel.org
S: Maintained
T: git git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git timers/nohz
F: drivers/regulator/pf8x00-regulator.c
NXP PTN5150A CC LOGIC AND EXTCON DRIVER
-M: Krzysztof Kozlowski <krzysztof.kozlowski@linaro.org>
+M: Krzysztof Kozlowski <krzk@kernel.org>
L: linux-kernel@vger.kernel.org
S: Maintained
F: Documentation/devicetree/bindings/extcon/extcon-ptn5150.yaml
OPEN FIRMWARE AND FLATTENED DEVICE TREE BINDINGS
M: Rob Herring <robh@kernel.org>
-M: Krzysztof Kozlowski <krzysztof.kozlowski+dt@linaro.org>
+M: Krzysztof Kozlowski <krzk+dt@kernel.org>
M: Conor Dooley <conor+dt@kernel.org>
L: devicetree@vger.kernel.org
S: Maintained
PARAVIRT_OPS INTERFACE
M: Juergen Gross <jgross@suse.com>
-R: Ajay Kaher <akaher@vmware.com>
-R: Alexey Makhalov <amakhalov@vmware.com>
-R: VMware PV-Drivers Reviewers <pv-drivers@vmware.com>
+R: Ajay Kaher <ajay.kaher@broadcom.com>
+R: Alexey Makhalov <alexey.amakhalov@broadcom.com>
+R: Broadcom internal kernel review list <bcm-kernel-feedback-list@broadcom.com>
L: virtualization@lists.linux.dev
L: x86@kernel.org
S: Supported
PCI DRIVER FOR SYNOPSYS DESIGNWARE
M: Jingoo Han <jingoohan1@gmail.com>
-M: Gustavo Pimentel <gustavo.pimentel@synopsys.com>
M: Manivannan Sadhasivam <manivannan.sadhasivam@linaro.org>
L: linux-pci@vger.kernel.org
S: Maintained
F: drivers/pinctrl/renesas/
PIN CONTROLLER - SAMSUNG
-M: Krzysztof Kozlowski <krzysztof.kozlowski@linaro.org>
+M: Krzysztof Kozlowski <krzk@kernel.org>
M: Sylwester Nawrocki <s.nawrocki@samsung.com>
R: Alim Akhtar <alim.akhtar@samsung.com>
L: linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
F: include/linux/pnp.h
POSIX CLOCKS and TIMERS
+M: Anna-Maria Behnsen <anna-maria@linutronix.de>
+M: Frederic Weisbecker <frederic@kernel.org>
M: Thomas Gleixner <tglx@linutronix.de>
L: linux-kernel@vger.kernel.org
S: Maintained
T: git git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git timers/core
F: fs/timerfd.c
F: include/linux/time_namespace.h
-F: include/linux/timer*
+F: include/linux/timerfd.h
+F: include/uapi/linux/time.h
+F: include/uapi/linux/timerfd.h
F: include/trace/events/timer*
-F: kernel/time/*timer*
+F: kernel/time/itimer.c
+F: kernel/time/posix-*
F: kernel/time/namespace.c
POWER MANAGEMENT CORE
F: drivers/media/rc/pwm-ir-tx.c
PWM SUBSYSTEM
-M: Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
+M: Uwe Kleine-König <ukleinek@kernel.org>
L: linux-pwm@vger.kernel.org
S: Maintained
Q: https://patchwork.ozlabs.org/project/linux-pwm/list/
M: Ping-Ke Shih <pkshih@realtek.com>
L: linux-wireless@vger.kernel.org
S: Maintained
+T: git https://github.com/pkshih/rtw.git
F: drivers/net/wireless/realtek/rtlwifi/
REALTEK WIRELESS DRIVER (rtw88)
M: Ping-Ke Shih <pkshih@realtek.com>
L: linux-wireless@vger.kernel.org
S: Maintained
+T: git https://github.com/pkshih/rtw.git
F: drivers/net/wireless/realtek/rtw88/
REALTEK WIRELESS DRIVER (rtw89)
M: Ping-Ke Shih <pkshih@realtek.com>
L: linux-wireless@vger.kernel.org
S: Maintained
+T: git https://github.com/pkshih/rtw.git
F: drivers/net/wireless/realtek/rtw89/
REDPINE WIRELESS DRIVER
F: Documentation/devicetree/bindings/i2c/renesas,iic-emev2.yaml
F: drivers/i2c/busses/i2c-emev2.c
-RENESAS ETHERNET DRIVERS
+RENESAS ETHERNET AVB DRIVER
R: Sergey Shtylyov <s.shtylyov@omp.ru>
L: netdev@vger.kernel.org
L: linux-renesas-soc@vger.kernel.org
-F: Documentation/devicetree/bindings/net/renesas,*.yaml
-F: drivers/net/ethernet/renesas/
-F: include/linux/sh_eth.h
+F: Documentation/devicetree/bindings/net/renesas,etheravb.yaml
+F: drivers/net/ethernet/renesas/Kconfig
+F: drivers/net/ethernet/renesas/Makefile
+F: drivers/net/ethernet/renesas/ravb*
+
+RENESAS ETHERNET SWITCH DRIVER
+R: Yoshihiro Shimoda <yoshihiro.shimoda.uh@renesas.com>
+L: netdev@vger.kernel.org
+L: linux-renesas-soc@vger.kernel.org
+F: Documentation/devicetree/bindings/net/renesas,*ether-switch.yaml
+F: drivers/net/ethernet/renesas/Kconfig
+F: drivers/net/ethernet/renesas/Makefile
+F: drivers/net/ethernet/renesas/rcar_gen4*
+F: drivers/net/ethernet/renesas/rswitch*
RENESAS IDT821034 ASoC CODEC
M: Herve Codina <herve.codina@bootlin.com>
F: Documentation/devicetree/bindings/i2c/renesas,rzv2m.yaml
F: drivers/i2c/busses/i2c-rzv2m.c
+RENESAS SUPERH ETHERNET DRIVER
+R: Sergey Shtylyov <s.shtylyov@omp.ru>
+L: netdev@vger.kernel.org
+L: linux-renesas-soc@vger.kernel.org
+F: Documentation/devicetree/bindings/net/renesas,ether.yaml
+F: drivers/net/ethernet/renesas/Kconfig
+F: drivers/net/ethernet/renesas/Makefile
+F: drivers/net/ethernet/renesas/sh_eth*
+F: include/linux/sh_eth.h
+
RENESAS USB PHY DRIVER
M: Yoshihiro Shimoda <yoshihiro.shimoda.uh@renesas.com>
L: linux-renesas-soc@vger.kernel.org
M: Larry Finger <Larry.Finger@lwfinger.net>
L: linux-wireless@vger.kernel.org
S: Maintained
+T: git https://github.com/pkshih/rtw.git
F: drivers/net/wireless/realtek/rtl818x/rtl8187/
RTL8XXXU WIRELESS DRIVER (rtl8xxxu)
M: Jes Sorensen <Jes.Sorensen@gmail.com>
L: linux-wireless@vger.kernel.org
S: Maintained
+T: git https://github.com/pkshih/rtw.git
F: drivers/net/wireless/realtek/rtl8xxxu/
RTRS TRANSPORT DRIVERS
F: sound/soc/samsung/
SAMSUNG EXYNOS PSEUDO RANDOM NUMBER GENERATOR (RNG) DRIVER
-M: Krzysztof Kozlowski <krzysztof.kozlowski@linaro.org>
+M: Krzysztof Kozlowski <krzk@kernel.org>
L: linux-crypto@vger.kernel.org
L: linux-samsung-soc@vger.kernel.org
S: Maintained
F: drivers/platform/x86/samsung-laptop.c
SAMSUNG MULTIFUNCTION PMIC DEVICE DRIVERS
-M: Krzysztof Kozlowski <krzysztof.kozlowski@linaro.org>
+M: Krzysztof Kozlowski <krzk@kernel.org>
L: linux-kernel@vger.kernel.org
L: linux-samsung-soc@vger.kernel.org
S: Maintained
F: include/media/drv-intf/s3c_camif.h
SAMSUNG S3FWRN5 NFC DRIVER
-M: Krzysztof Kozlowski <krzysztof.kozlowski@linaro.org>
+M: Krzysztof Kozlowski <krzk@kernel.org>
S: Maintained
F: Documentation/devicetree/bindings/net/nfc/samsung,s3fwrn5.yaml
F: drivers/nfc/s3fwrn5
F: drivers/media/i2c/s5k5baf.c
SAMSUNG S5P Security SubSystem (SSS) DRIVER
-M: Krzysztof Kozlowski <krzysztof.kozlowski@linaro.org>
+M: Krzysztof Kozlowski <krzk@kernel.org>
M: Vladimir Zapolskiy <vz@mleia.com>
L: linux-crypto@vger.kernel.org
L: linux-samsung-soc@vger.kernel.org
F: drivers/media/platform/samsung/exynos4-is/
SAMSUNG SOC CLOCK DRIVERS
-M: Krzysztof Kozlowski <krzysztof.kozlowski@linaro.org>
+M: Krzysztof Kozlowski <krzk@kernel.org>
M: Sylwester Nawrocki <s.nawrocki@samsung.com>
M: Chanwoo Choi <cw00.choi@samsung.com>
R: Alim Akhtar <alim.akhtar@samsung.com>
SAMSUNG THERMAL DRIVER
M: Bartlomiej Zolnierkiewicz <bzolnier@gmail.com>
-M: Krzysztof Kozlowski <krzysztof.kozlowski@linaro.org>
+M: Krzysztof Kozlowski <krzk@kernel.org>
L: linux-pm@vger.kernel.org
L: linux-samsung-soc@vger.kernel.org
S: Maintained
F: include/scsi/sg.h
SCSI SUBSYSTEM
-M: "James E.J. Bottomley" <jejb@linux.ibm.com>
+M: "James E.J. Bottomley" <James.Bottomley@HansenPartnership.com>
M: "Martin K. Petersen" <martin.petersen@oracle.com>
L: linux-scsi@vger.kernel.org
S: Maintained
SIOX
M: Thorsten Scherer <t.scherer@eckelmann.de>
-M: Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
R: Pengutronix Kernel Team <kernel@pengutronix.de>
S: Supported
F: drivers/gpio/gpio-siox.c
T: git git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git timers/core
F: include/linux/clocksource.h
F: include/linux/time.h
+F: include/linux/timekeeper_internal.h
+F: include/linux/timekeeping.h
F: include/linux/timex.h
F: include/uapi/linux/time.h
F: include/uapi/linux/timex.h
F: kernel/time/alarmtimer.c
-F: kernel/time/clocksource.c
-F: kernel/time/ntp.c
-F: kernel/time/time*.c
+F: kernel/time/clocksource*
+F: kernel/time/ntp*
+F: kernel/time/time.c
+F: kernel/time/timeconst.bc
+F: kernel/time/timeconv.c
+F: kernel/time/timecounter.c
+F: kernel/time/timekeeping*
+F: kernel/time/time_test.c
F: tools/testing/selftests/timers/
TIPC NETWORK LAYER
W: https://kernsec.org/wiki/index.php/Linux_Kernel_Integrity
Q: https://patchwork.kernel.org/project/linux-integrity/list/
T: git git://git.kernel.org/pub/scm/linux/kernel/git/jarkko/linux-tpmdd.git
+F: Documentation/devicetree/bindings/tpm/
F: drivers/char/tpm/
TPS546D24 DRIVER
B: https://bugzilla.kernel.org
T: git git://git.kernel.org/pub/scm/linux/kernel/git/lenb/linux.git turbostat
F: tools/power/x86/turbostat/
+F: tools/testing/selftests/turbostat/
TW5864 VIDEO4LINUX DRIVER
M: Bluecherry Maintainers <maintainers@bluecherrydvr.com>
USB HID/HIDBP DRIVERS (USB KEYBOARDS, MICE, REMOTE CONTROLS, ...)
M: Jiri Kosina <jikos@kernel.org>
-M: Benjamin Tissoires <benjamin.tissoires@redhat.com>
+M: Benjamin Tissoires <bentiss@kernel.org>
L: linux-usb@vger.kernel.org
S: Maintained
T: git git://git.kernel.org/pub/scm/linux/kernel/git/hid/hid.git
F: drivers/misc/vmw_balloon.c
VMWARE HYPERVISOR INTERFACE
-M: Ajay Kaher <akaher@vmware.com>
-M: Alexey Makhalov <amakhalov@vmware.com>
-R: VMware PV-Drivers Reviewers <pv-drivers@vmware.com>
+M: Ajay Kaher <ajay.kaher@broadcom.com>
+M: Alexey Makhalov <alexey.amakhalov@broadcom.com>
+R: Broadcom internal kernel review list <bcm-kernel-feedback-list@broadcom.com>
L: virtualization@lists.linux.dev
L: x86@kernel.org
S: Supported
F: arch/x86/kernel/cpu/vmware.c
VMWARE PVRDMA DRIVER
-M: Bryan Tan <bryantan@vmware.com>
-M: Vishnu Dasa <vdasa@vmware.com>
-R: VMware PV-Drivers Reviewers <pv-drivers@vmware.com>
+M: Bryan Tan <bryan-bt.tan@broadcom.com>
+M: Vishnu Dasa <vishnu.dasa@broadcom.com>
+R: Broadcom internal kernel review list <bcm-kernel-feedback-list@broadcom.com>
L: linux-rdma@vger.kernel.org
S: Supported
F: drivers/infiniband/hw/vmw_pvrdma/
VMWARE PVSCSI DRIVER
-M: Vishal Bhakta <vbhakta@vmware.com>
-R: VMware PV-Drivers Reviewers <pv-drivers@vmware.com>
+M: Vishal Bhakta <vishal.bhakta@broadcom.com>
+R: Broadcom internal kernel review list <bcm-kernel-feedback-list@broadcom.com>
L: linux-scsi@vger.kernel.org
S: Supported
F: drivers/scsi/vmw_pvscsi.c
F: drivers/scsi/vmw_pvscsi.h
VMWARE VIRTUAL PTP CLOCK DRIVER
-M: Jeff Sipek <jsipek@vmware.com>
-R: Ajay Kaher <akaher@vmware.com>
-R: Alexey Makhalov <amakhalov@vmware.com>
-R: VMware PV-Drivers Reviewers <pv-drivers@vmware.com>
+M: Nick Shi <nick.shi@broadcom.com>
+R: Ajay Kaher <ajay.kaher@broadcom.com>
+R: Alexey Makhalov <alexey.amakhalov@broadcom.com>
+R: Broadcom internal kernel review list <bcm-kernel-feedback-list@broadcom.com>
L: netdev@vger.kernel.org
S: Supported
F: drivers/ptp/ptp_vmw.c
VMWARE VMCI DRIVER
-M: Bryan Tan <bryantan@vmware.com>
-M: Vishnu Dasa <vdasa@vmware.com>
-R: VMware PV-Drivers Reviewers <pv-drivers@vmware.com>
+M: Bryan Tan <bryan-bt.tan@broadcom.com>
+M: Vishnu Dasa <vishnu.dasa@broadcom.com>
+R: Broadcom internal kernel review list <bcm-kernel-feedback-list@broadcom.com>
L: linux-kernel@vger.kernel.org
S: Supported
F: drivers/misc/vmw_vmci/
F: drivers/input/mouse/vmmouse.h
VMWARE VMXNET3 ETHERNET DRIVER
-M: Ronak Doshi <doshir@vmware.com>
-R: VMware PV-Drivers Reviewers <pv-drivers@vmware.com>
+M: Ronak Doshi <ronak.doshi@broadcom.com>
+R: Broadcom internal kernel review list <bcm-kernel-feedback-list@broadcom.com>
L: netdev@vger.kernel.org
S: Supported
F: drivers/net/vmxnet3/
VMWARE VSOCK VMCI TRANSPORT DRIVER
-M: Bryan Tan <bryantan@vmware.com>
-M: Vishnu Dasa <vdasa@vmware.com>
-R: VMware PV-Drivers Reviewers <pv-drivers@vmware.com>
+M: Bryan Tan <bryan-bt.tan@broadcom.com>
+M: Vishnu Dasa <vishnu.dasa@broadcom.com>
+R: Broadcom internal kernel review list <bcm-kernel-feedback-list@broadcom.com>
L: linux-kernel@vger.kernel.org
S: Supported
F: net/vmw_vsock/vmci_transport*
F: drivers/mmc/host/vub300.c
W1 DALLAS'S 1-WIRE BUS
-M: Krzysztof Kozlowski <krzysztof.kozlowski@linaro.org>
+M: Krzysztof Kozlowski <krzk@kernel.org>
S: Maintained
F: Documentation/devicetree/bindings/w1/
F: Documentation/w1/
VERSION = 6
PATCHLEVEL = 9
SUBLEVEL = 0
-EXTRAVERSION = -rc1
+EXTRAVERSION = -rc5
NAME = Hurr durr I'ma ninja sloth
# *DOCUMENTATION*
config PAGE_SHIFT
int
- default 12 if PAGE_SIZE_4KB
- default 13 if PAGE_SIZE_8KB
- default 14 if PAGE_SIZE_16KB
- default 15 if PAGE_SIZE_32KB
- default 16 if PAGE_SIZE_64KB
- default 18 if PAGE_SIZE_256KB
+ default 12 if PAGE_SIZE_4KB
+ default 13 if PAGE_SIZE_8KB
+ default 14 if PAGE_SIZE_16KB
+ default 15 if PAGE_SIZE_32KB
+ default 16 if PAGE_SIZE_64KB
+ default 18 if PAGE_SIZE_256KB
# This allows to use a set of generic functions to determine mmap base
# address by giving priority to top-down scheme only if the process
config ARC
def_bool y
select ARC_TIMERS
- select ARCH_HAS_CPU_CACHE_ALIASING
select ARCH_HAS_CACHE_LINE_SIZE
select ARCH_HAS_DEBUG_VM_PGTABLE
select ARCH_HAS_DMA_PREP_COHERENT
# SPDX-License-Identifier: GPL-2.0
-# uImage build relies on mkimage being availble on your host for ARC target
+# uImage build relies on mkimage being available on your host for ARC target
# You will need to build u-boot for ARC, rename mkimage to arc-elf32-mkimage
-# and make sure it's reacable from your PATH
+# and make sure it's reachable from your PATH
OBJCOPYFLAGS= -O binary -R .note -R .note.gnu.build-id -R .comment -S
/*
* The DW APB ICTL intc on MB is connected to CPU intc via a
* DT "invisible" DW APB GPIO block, configured to simply pass thru
- * interrupts - setup accordinly in platform init (plat-axs10x/ax10x.c)
+ * interrupts - setup accordingly in platform init (plat-axs10x/ax10x.c)
*
- * So here we mimic a direct connection betwen them, ignoring the
+ * So here we mimic a direct connection between them, ignoring the
* ABPG GPIO. Thus set "interrupts = <24>" (DW APB GPIO to core)
* instead of "interrupts = <12>" (DW APB ICTL to DW APB GPIO)
*
};
gmac: ethernet@8000 {
- #interrupt-cells = <1>;
compatible = "snps,dwmac";
reg = <0x8000 0x2000>;
interrupts = <10>;
/*
* Embedded Vision subsystem UIO mappings; only relevant for EV VDK
*
- * This node is intentionally put outside of MB above becase
+ * This node is intentionally put outside of MB above because
* it maps areas outside of MB's 0xez-0xfz.
*/
uio_ev: uio@d0000000 {
+++ /dev/null
-/* SPDX-License-Identifier: GPL-2.0 */
-#ifndef __ASM_ARC_CACHETYPE_H
-#define __ASM_ARC_CACHETYPE_H
-
-#include <linux/types.h>
-
-#define cpu_dcache_is_aliasing() true
-
-#endif
/*
* DSP-related saved registers - need to be saved only when you are
* scheduled out.
- * structure fields name must correspond to aux register defenitions for
+ * structure fields name must correspond to aux register definitions for
* automatic offset calculation in DSP_AUX_SAVE_RESTORE macros
*/
struct dsp_callee_regs {
* Stack switching code can no longer reliably rely on the fact that
* if we are NOT in user mode, stack is switched to kernel mode.
* e.g. L2 IRQ interrupted a L1 ISR which had not yet completed
- * it's prologue including stack switching from user mode
+ * its prologue including stack switching from user mode
*
* Vineetg: Aug 28th 2008: Bug #94984
* -Zero Overhead Loop Context shd be cleared when entering IRQ/EXcp/Trap
* 2. L1 IRQ taken, ISR starts (CPU auto-switched to KERNEL mode)
* 3. But before it could switch SP from USER to KERNEL stack
* a L2 IRQ "Interrupts" L1
- * Thay way although L2 IRQ happened in Kernel mode, stack is still
+ * That way although L2 IRQ happened in Kernel mode, stack is still
* not switched.
* To handle this, we may need to switch stack even if in kernel mode
* provided SP has values in range of USER mode stack ( < 0x7000_0000 )
GET_CURR_TASK_ON_CPU r9
- /* With current tsk in r9, get it's kernel mode stack base */
+ /* With current tsk in r9, get its kernel mode stack base */
GET_TSK_STACK_BASE r9, r9
/* save U mode SP @ pt_regs->sp */
* NOTE:
*
* It is recommended that lp_count/ilink1/ilink2 not be used as a dest reg
- * for memory load operations. If used in that way interrupts are deffered
+ * for memory load operations. If used in that way interrupts are deferred
* by hardware and that is not good.
*-------------------------------------------------------------*/
.macro EXCEPTION_EPILOGUE
* NOTE:
*
* It is recommended that lp_count/ilink1/ilink2 not be used as a dest reg
- * for memory load operations. If used in that way interrupts are deffered
+ * for memory load operations. If used in that way interrupts are deferred
* by hardware and that is not good.
*-------------------------------------------------------------*/
.macro INTERRUPT_EPILOGUE LVL
#ifndef __ASM_ARC_ENTRY_H
#define __ASM_ARC_ENTRY_H
-#include <asm/unistd.h> /* For NR_syscalls defination */
+#include <asm/unistd.h> /* For NR_syscalls definition */
#include <asm/arcregs.h>
#include <asm/ptrace.h>
#include <asm/processor.h> /* For VMALLOC_START */
.endm
/*-------------------------------------------------------------
- * given a tsk struct, get to the base of it's kernel mode stack
+ * given a tsk struct, get to the base of its kernel mode stack
* tsk->thread_info is really a PAGE, whose bottom hoists stack
* which grows upwards towards thread_info
*------------------------------------------------------------*/
* ARCv2 can support 240 interrupts in the core interrupts controllers and
* 128 interrupts in IDU. Thus 512 virtual IRQs must be enough for most
* configurations of boards.
- * This doesnt affect ARCompact, but we change it to same value
+ * This doesn't affect ARCompact, but we change it to same value
*/
#define NR_IRQS 512
* IRQ Control Macros
*
* All of them have "memory" clobber (compiler barrier) which is needed to
- * ensure that LD/ST requiring irq safetly (R-M-W when LLSC is not available)
+ * ensure that LD/ST requiring irq safety (R-M-W when LLSC is not available)
* are redone after IRQs are re-enabled (and gcc doesn't reuse stale register)
*
* Noted at the time of Abilis Timer List corruption
* for retiring-mm. However destroy_context( ) still needs to do that because
* between mm_release( ) = >deactive_mm( ) and
* mmput => .. => __mmdrop( ) => destroy_context( )
- * there is a good chance that task gets sched-out/in, making it's ASID valid
+ * there is a good chance that task gets sched-out/in, making its ASID valid
* again (this teased me for a whole day).
*/
* Other rules which cause the divergence from 1:1 mapping
*
* 1. Although ARC700 can do exclusive execute/write protection (meaning R
- * can be tracked independet of X/W unlike some other CPUs), still to
+ * can be tracked independently of X/W unlike some other CPUs), still to
* keep things consistent with other archs:
* -Write implies Read: W => R
* -Execute implies Read: X => R
return *(unsigned long *)((unsigned long)regs + offset);
}
-extern int syscall_trace_entry(struct pt_regs *);
+extern int syscall_trace_enter(struct pt_regs *);
extern void syscall_trace_exit(struct pt_regs *);
#endif /* !__ASSEMBLY__ */
#ifndef __ARC_ASM_SHMPARAM_H
#define __ARC_ASM_SHMPARAM_H
-/* Handle upto 2 cache bins */
+/* Handle up to 2 cache bins */
#define SHMLBA (2 * PAGE_SIZE)
/* Enforce SHMLBA in shmat */
/*
* ARC700 doesn't support atomic Read-Modify-Write ops.
- * Originally Interrupts had to be disabled around code to gaurantee atomicity.
+ * Originally Interrupts had to be disabled around code to guarantee atomicity.
* The LLOCK/SCOND insns allow writing interrupt-hassle-free based atomic ops
* based on retry-if-irq-in-atomic (with hardware assist).
* However despite these, we provide the IRQ disabling variant
* support needed.
*
* (2) In a SMP setup, the LLOCK/SCOND atomicity across CPUs needs to be
- * gaurantted by the platform (not something which core handles).
+ * guaranteed by the platform (not something which core handles).
* Assuming a platform won't, SMP Linux needs to use spinlocks + local IRQ
* disabling for atomicity.
*
struct thread_info {
unsigned long flags; /* low level flags */
unsigned long ksp; /* kernel mode stack top in __switch_to */
- int preempt_count; /* 0 => preemptable, <0 => BUG */
+ int preempt_count; /* 0 => preemptible, <0 => BUG */
int cpu; /* current CPU */
unsigned long thr_ptr; /* TLS ptr */
struct task_struct *task; /* main task structure */
* 8051fdc4: st r2,[r1,20] ; Mem op : save result back to mem
*
* Joern suggested a better "C" algorithm which is great since
- * (1) It is portable to any architecure
+ * (1) It is portable to any architecture
* (2) At the same time it takes advantage of ARC ISA (rotate intrns)
*/
* Copyright (C) 2013 Synopsys, Inc. (www.synopsys.com)
*/
-#include <linux/linkage.h> /* ARC_{EXTRY,EXIT} */
+#include <linux/linkage.h> /* ARC_{ENTRY,EXIT} */
#include <asm/entry.h> /* SAVE_ALL_{INT1,INT2,TRAP...} */
#include <asm/errno.h>
#include <asm/arcregs.h>
VECTOR mem_service ; Mem exception
VECTOR instr_service ; Instrn Error
VECTOR EV_MachineCheck ; Fatal Machine check
-VECTOR EV_TLBMissI ; Intruction TLB miss
+VECTOR EV_TLBMissI ; Instruction TLB miss
VECTOR EV_TLBMissD ; Data TLB miss
VECTOR EV_TLBProtV ; Protection Violation
VECTOR EV_PrivilegeV ; Privilege Violation
# query in hard ISR path would return false (since .IE is set) which would
# trips genirq interrupt handling asserts.
#
- # So do a "soft" disable of interrutps here.
+ # So do a "soft" disable of interrupts here.
#
# Note this disable is only for consistent book-keeping as further interrupts
# will be disabled anyways even w/o this. Hardware tracks active interrupts
- # seperately in AUX_IRQ_ACT.active and will not take new interrupts
+ # separately in AUX_IRQ_ACT.active and will not take new interrupts
# unless this one returns (or higher prio becomes pending in 2-prio scheme)
IRQ_DISABLE
lr r0, [efa]
mov r1, sp
- ; MC excpetions disable MMU
+ ; MC exceptions disable MMU
ARC_MMU_REENABLE r3
lsr r3, r10, 8
; ---------------------------------------------
; syscall TRAP
-; ABI: (r0-r7) upto 8 args, (r8) syscall number
+; ABI: (r0-r7) up to 8 args, (r8) syscall number
; ---------------------------------------------
ENTRY(EV_Trap)
; setup stack (fp, sp)
mov fp, 0
- ; set it's stack base to tsk->thread_info bottom
+ ; set its stack base to tsk->thread_info bottom
GET_TSK_STACK_BASE r0, sp
j start_kernel_secondary
WRITE_AUX(AUX_IRQ_CTRL, ictrl);
/*
- * ARCv2 core intc provides multiple interrupt priorities (upto 16).
+ * ARCv2 core intc provides multiple interrupt priorities (up to 16).
* Typical builds though have only two levels (0-high, 1-low)
* Linux by default uses lower prio 1 for most irqs, reserving 0 for
* NMI style interrupts in future (say perf)
}
}
-int __kprobes arc_kprobe_handler(unsigned long addr, struct pt_regs *regs)
+static int
+__kprobes arc_kprobe_handler(unsigned long addr, struct pt_regs *regs)
{
struct kprobe *p;
struct kprobe_ctlblk *kcb;
return 0;
}
-static int __kprobes arc_post_kprobe_handler(unsigned long addr,
- struct pt_regs *regs)
+static int
+__kprobes arc_post_kprobe_handler(unsigned long addr, struct pt_regs *regs)
{
struct kprobe *cur = kprobe_running();
struct kprobe_ctlblk *kcb = get_kprobe_ctlblk();
* (based on a specific RTL build)
* Below is the static map between perf generic/arc specific event_id and
* h/w condition names.
- * At the time of probe, we loop thru each index and find it's name to
+ * At the time of probe, we loop thru each index and find its name to
* complete the mapping of perf event_id to h/w index as latter is needed
* to program the counter really
*/
#ifdef CONFIG_ARC_HAS_DCCM
/*
* DCCM can be arbit placed in hardware.
- * Make sure it's placement/sz matches what Linux is built with
+ * Make sure its placement/sz matches what Linux is built with
*/
if ((unsigned int)__arc_dccm_base != info->dccm.base)
panic("Linux built with incorrect DCCM Base address\n");
*
* vineetg: Nov 2009 (Everything needed for TIF_RESTORE_SIGMASK)
* -do_signal() supports TIF_RESTORE_SIGMASK
- * -do_signal() no loner needs oldset, required by OLD sys_sigsuspend
- * -sys_rt_sigsuspend() now comes from generic code, so discard arch implemen
+ * -do_signal() no longer needs oldset, required by OLD sys_sigsuspend
+ * -sys_rt_sigsuspend() now comes from generic code, so discard arch
+ * implementation
* -sys_sigsuspend() no longer needs to fudge ptregs, hence that arg removed
* -sys_sigsuspend() no longer loops for do_signal(), sets TIF_xxx and leaves
* the job to do_signal()
*
* vineetg: July 2009
* -Modified Code to support the uClibc provided userland sigreturn stub
- * to avoid kernel synthesing it on user stack at runtime, costing TLB
+ * to avoid kernel synthesizing it on user stack at runtime, costing TLB
* probes and Cache line flushes.
*
* vineetg: July 2009
/*
* Entry point for miscll errors such as Nested Exceptions
- * -Duplicate TLB entry is handled seperately though
+ * -Duplicate TLB entry is handled separately though
*/
void do_machine_check_fault(unsigned long address, struct pt_regs *regs)
{
#endif
/*
- * The reason for having a seperate subsection .init.ramfs is to
- * prevent objump from including it in kernel dumps
+ * The reason for having a separate subsection .init.ramfs is to
+ * prevent objdump from including it in kernel dumps
*
* Reason for having .init.ramfs above .init is to make sure that the
* binary blob is tucked away to one side, reducing the displacement
unsigned long flags;
/* If range @start to @end is more than 32 TLB entries deep,
- * its better to move to a new ASID rather than searching for
+ * it's better to move to a new ASID rather than searching for
* individual entries and then shooting them down
*
* The calc above is rough, doesn't account for unaligned parts,
* -More importantly it makes this handler inconsistent with fast-path
* TLB Refill handler which always deals with "current"
*
- * Lets see the use cases when current->mm != vma->mm and we land here
+ * Let's see the use cases when current->mm != vma->mm and we land here
* 1. execve->copy_strings()->__get_user_pages->handle_mm_fault
* Here VM wants to pre-install a TLB entry for user stack while
* current->mm still points to pre-execve mm (hence the condition).
* Copyright (C) 2004, 2007-2010, 2011-2012 Synopsys, Inc. (www.synopsys.com)
*
* Vineetg: April 2011 :
- * -MMU v1: moved out legacy code into a seperate file
+ * -MMU v1: moved out legacy code into a separate file
* -MMU v3: PD{0,1} bits layout changed: They don't overlap anymore,
* helps avoid a shift when preparing PD0 from PTE
*
* Vineetg: July 2009
- * -For MMU V2, we need not do heuristics at the time of commiting a D-TLB
- * entry, so that it doesn't knock out it's I-TLB entry
+ * -For MMU V2, we need not do heuristics at the time of committing a D-TLB
+ * entry, so that it doesn't knock out its I-TLB entry
* -Some more fine tuning:
* bmsk instead of add, asl.cc instead of branch, delay slot utilise etc
*
* Vineetg: July 2009
* -Practically rewrote the I/D TLB Miss handlers
- * Now 40 and 135 instructions a peice as compared to 131 and 449 resp.
+ * Now 40 and 135 instructions apiece as compared to 131 and 449 resp.
* Hence Leaner by 1.5 K
* Used Conditional arithmetic to replace excessive branching
* Also used short instructions wherever possible
regulator-state-standby {
regulator-on-in-suspend;
- regulator-suspend-voltage = <1150000>;
+ regulator-suspend-microvolt = <1150000>;
regulator-mode = <4>;
};
regulator-state-standby {
regulator-on-in-suspend;
- regulator-suspend-voltage = <1050000>;
+ regulator-suspend-microvolt = <1050000>;
regulator-mode = <4>;
};
regulator-always-on;
regulator-state-standby {
- regulator-suspend-voltage = <1800000>;
+ regulator-suspend-microvolt = <1800000>;
regulator-on-in-suspend;
};
regulator-always-on;
regulator-state-standby {
- regulator-suspend-voltage = <3300000>;
+ regulator-suspend-microvolt = <3300000>;
regulator-on-in-suspend;
};
regulator-state-standby {
regulator-on-in-suspend;
- regulator-suspend-voltage = <1150000>;
+ regulator-suspend-microvolt = <1150000>;
regulator-mode = <4>;
};
regulator-state-standby {
regulator-on-in-suspend;
- regulator-suspend-voltage = <1050000>;
+ regulator-suspend-microvolt = <1050000>;
regulator-mode = <4>;
};
regulator-always-on;
regulator-state-standby {
- regulator-suspend-voltage = <1800000>;
+ regulator-suspend-microvolt = <1800000>;
regulator-on-in-suspend;
};
regulator-max-microvolt = <3700000>;
regulator-state-standby {
- regulator-suspend-voltage = <1800000>;
+ regulator-suspend-microvolt = <1800000>;
regulator-on-in-suspend;
};
&pinctrl_usb_pwr>;
dr_mode = "host";
power-active-high;
+ over-current-active-low;
disable-over-current;
status = "okay";
};
bus-width = <4>;
no-1-8-v;
no-sdio;
- no-emmc;
+ no-mmc;
status = "okay";
};
remote-endpoint = <&mipi_from_sensor>;
clock-lanes = <0>;
data-lanes = <1>;
+ link-frequencies = /bits/ 64 <330000000>;
};
};
};
--- /dev/null
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __ASM_MMAN_H__
+#define __ASM_MMAN_H__
+
+#include <asm/system_info.h>
+#include <uapi/asm/mman.h>
+
+static inline bool arch_memory_deny_write_exec_supported(void)
+{
+ return cpu_architecture() >= CPU_ARCH_ARMv6;
+}
+#define arch_memory_deny_write_exec_supported arch_memory_deny_write_exec_supported
+
+#endif /* __ASM_MMAN_H__ */
static struct gpiod_lookup_table tusb_gpio_table = {
.dev_id = "musb-tusb",
.table = {
- GPIO_LOOKUP("gpio-0-15", 0, "enable",
- GPIO_ACTIVE_HIGH),
- GPIO_LOOKUP("gpio-48-63", 10, "int",
- GPIO_ACTIVE_HIGH),
+ GPIO_LOOKUP("gpio-0-31", 0, "enable", GPIO_ACTIVE_HIGH),
+ GPIO_LOOKUP("gpio-32-63", 26, "int", GPIO_ACTIVE_HIGH),
{ }
},
};
static int slot2_cover_open;
static struct device *mmc_device;
-static struct gpiod_lookup_table nokia8xx_mmc_gpio_table = {
+static struct gpiod_lookup_table nokia800_mmc_gpio_table = {
.dev_id = "mmci-omap.0",
.table = {
/* Slot switch, GPIO 96 */
- GPIO_LOOKUP("gpio-80-111", 16,
- "switch", GPIO_ACTIVE_HIGH),
+ GPIO_LOOKUP("gpio-96-127", 0, "switch", GPIO_ACTIVE_HIGH),
{ }
},
};
static struct gpiod_lookup_table nokia810_mmc_gpio_table = {
.dev_id = "mmci-omap.0",
.table = {
+ /* Slot switch, GPIO 96 */
+ GPIO_LOOKUP("gpio-96-127", 0, "switch", GPIO_ACTIVE_HIGH),
/* Slot index 1, VSD power, GPIO 23 */
- GPIO_LOOKUP_IDX("gpio-16-31", 7,
- "vsd", 1, GPIO_ACTIVE_HIGH),
+ GPIO_LOOKUP_IDX("gpio-0-31", 23, "vsd", 1, GPIO_ACTIVE_HIGH),
/* Slot index 1, VIO power, GPIO 9 */
- GPIO_LOOKUP_IDX("gpio-0-15", 9,
- "vio", 1, GPIO_ACTIVE_HIGH),
+ GPIO_LOOKUP_IDX("gpio-0-31", 9, "vio", 1, GPIO_ACTIVE_HIGH),
{ }
},
};
static void __init n8x0_mmc_init(void)
{
- gpiod_add_lookup_table(&nokia8xx_mmc_gpio_table);
-
if (board_is_n810()) {
mmc1_data.slots[0].name = "external";
mmc1_data.slots[1].name = "internal";
mmc1_data.slots[1].ban_openended = 1;
gpiod_add_lookup_table(&nokia810_mmc_gpio_table);
+ } else {
+ gpiod_add_lookup_table(&nokia800_mmc_gpio_table);
}
mmc1_data.nr_slots = 2;
interrupts = <GIC_SPI 267 IRQ_TYPE_LEVEL_HIGH>;
fsl,usbphy = <&usbphy1>;
fsl,usbmisc = <&usbmisc1 0>;
- clocks = <&usb2_lpcg 0>;
+ clocks = <&usb2_lpcg IMX_LPCG_CLK_6>;
ahb-burst-config = <0x0>;
tx-burst-size-dword = <0x10>;
rx-burst-size-dword = <0x10>;
usbphy1: usbphy@5b100000 {
compatible = "fsl,imx7ulp-usbphy";
reg = <0x5b100000 0x1000>;
- clocks = <&usb2_lpcg 1>;
+ clocks = <&usb2_lpcg IMX_LPCG_CLK_7>;
power-domains = <&pd IMX_SC_R_USB_0_PHY>;
status = "disabled";
};
interrupts = <GIC_SPI 232 IRQ_TYPE_LEVEL_HIGH>;
reg = <0x5b010000 0x10000>;
clocks = <&sdhc0_lpcg IMX_LPCG_CLK_4>,
- <&sdhc0_lpcg IMX_LPCG_CLK_0>,
- <&sdhc0_lpcg IMX_LPCG_CLK_5>;
+ <&sdhc0_lpcg IMX_LPCG_CLK_5>,
+ <&sdhc0_lpcg IMX_LPCG_CLK_0>;
clock-names = "ipg", "ahb", "per";
power-domains = <&pd IMX_SC_R_SDHC_0>;
status = "disabled";
interrupts = <GIC_SPI 233 IRQ_TYPE_LEVEL_HIGH>;
reg = <0x5b020000 0x10000>;
clocks = <&sdhc1_lpcg IMX_LPCG_CLK_4>,
- <&sdhc1_lpcg IMX_LPCG_CLK_0>,
- <&sdhc1_lpcg IMX_LPCG_CLK_5>;
+ <&sdhc1_lpcg IMX_LPCG_CLK_5>,
+ <&sdhc1_lpcg IMX_LPCG_CLK_0>;
clock-names = "ipg", "ahb", "per";
power-domains = <&pd IMX_SC_R_SDHC_1>;
fsl,tuning-start-tap = <20>;
interrupts = <GIC_SPI 234 IRQ_TYPE_LEVEL_HIGH>;
reg = <0x5b030000 0x10000>;
clocks = <&sdhc2_lpcg IMX_LPCG_CLK_4>,
- <&sdhc2_lpcg IMX_LPCG_CLK_0>,
- <&sdhc2_lpcg IMX_LPCG_CLK_5>;
+ <&sdhc2_lpcg IMX_LPCG_CLK_5>,
+ <&sdhc2_lpcg IMX_LPCG_CLK_0>;
clock-names = "ipg", "ahb", "per";
power-domains = <&pd IMX_SC_R_SDHC_2>;
status = "disabled";
#size-cells = <0>;
interrupts = <GIC_SPI 336 IRQ_TYPE_LEVEL_HIGH>;
interrupt-parent = <&gic>;
- clocks = <&spi0_lpcg 0>,
- <&spi0_lpcg 1>;
+ clocks = <&spi0_lpcg IMX_LPCG_CLK_0>,
+ <&spi0_lpcg IMX_LPCG_CLK_4>;
clock-names = "per", "ipg";
assigned-clocks = <&clk IMX_SC_R_SPI_0 IMX_SC_PM_CLK_PER>;
assigned-clock-rates = <60000000>;
#size-cells = <0>;
interrupts = <GIC_SPI 337 IRQ_TYPE_LEVEL_HIGH>;
interrupt-parent = <&gic>;
- clocks = <&spi1_lpcg 0>,
- <&spi1_lpcg 1>;
+ clocks = <&spi1_lpcg IMX_LPCG_CLK_0>,
+ <&spi1_lpcg IMX_LPCG_CLK_4>;
clock-names = "per", "ipg";
assigned-clocks = <&clk IMX_SC_R_SPI_1 IMX_SC_PM_CLK_PER>;
assigned-clock-rates = <60000000>;
#size-cells = <0>;
interrupts = <GIC_SPI 338 IRQ_TYPE_LEVEL_HIGH>;
interrupt-parent = <&gic>;
- clocks = <&spi2_lpcg 0>,
- <&spi2_lpcg 1>;
+ clocks = <&spi2_lpcg IMX_LPCG_CLK_0>,
+ <&spi2_lpcg IMX_LPCG_CLK_4>;
clock-names = "per", "ipg";
assigned-clocks = <&clk IMX_SC_R_SPI_2 IMX_SC_PM_CLK_PER>;
assigned-clock-rates = <60000000>;
#size-cells = <0>;
interrupts = <GIC_SPI 339 IRQ_TYPE_LEVEL_HIGH>;
interrupt-parent = <&gic>;
- clocks = <&spi3_lpcg 0>,
- <&spi3_lpcg 1>;
+ clocks = <&spi3_lpcg IMX_LPCG_CLK_0>,
+ <&spi3_lpcg IMX_LPCG_CLK_4>;
clock-names = "per", "ipg";
assigned-clocks = <&clk IMX_SC_R_SPI_3 IMX_SC_PM_CLK_PER>;
assigned-clock-rates = <60000000>;
compatible = "fsl,imx8qxp-pwm", "fsl,imx27-pwm";
reg = <0x5a190000 0x1000>;
interrupts = <GIC_SPI 127 IRQ_TYPE_LEVEL_HIGH>;
- clocks = <&adma_pwm_lpcg 1>,
- <&adma_pwm_lpcg 0>;
+ clocks = <&adma_pwm_lpcg IMX_LPCG_CLK_4>,
+ <&adma_pwm_lpcg IMX_LPCG_CLK_0>;
clock-names = "ipg", "per";
assigned-clocks = <&clk IMX_SC_R_LCD_0_PWM_0 IMX_SC_PM_CLK_PER>;
assigned-clock-rates = <24000000>;
reg = <0x5a880000 0x10000>;
interrupts = <GIC_SPI 240 IRQ_TYPE_LEVEL_HIGH>;
interrupt-parent = <&gic>;
- clocks = <&adc0_lpcg 0>,
- <&adc0_lpcg 1>;
+ clocks = <&adc0_lpcg IMX_LPCG_CLK_0>,
+ <&adc0_lpcg IMX_LPCG_CLK_4>;
clock-names = "per", "ipg";
assigned-clocks = <&clk IMX_SC_R_ADC_0 IMX_SC_PM_CLK_PER>;
assigned-clock-rates = <24000000>;
reg = <0x5a890000 0x10000>;
interrupts = <GIC_SPI 241 IRQ_TYPE_LEVEL_HIGH>;
interrupt-parent = <&gic>;
- clocks = <&adc1_lpcg 0>,
- <&adc1_lpcg 1>;
+ clocks = <&adc1_lpcg IMX_LPCG_CLK_0>,
+ <&adc1_lpcg IMX_LPCG_CLK_4>;
clock-names = "per", "ipg";
assigned-clocks = <&clk IMX_SC_R_ADC_1 IMX_SC_PM_CLK_PER>;
assigned-clock-rates = <24000000>;
reg = <0x5a8d0000 0x10000>;
interrupts = <GIC_SPI 235 IRQ_TYPE_LEVEL_HIGH>;
interrupt-parent = <&gic>;
- clocks = <&can0_lpcg 1>,
- <&can0_lpcg 0>;
+ clocks = <&can0_lpcg IMX_LPCG_CLK_4>,
+ <&can0_lpcg IMX_LPCG_CLK_0>;
clock-names = "ipg", "per";
assigned-clocks = <&clk IMX_SC_R_CAN_0 IMX_SC_PM_CLK_PER>;
assigned-clock-rates = <40000000>;
* CAN1 shares CAN0's clock and to enable CAN0's clock it
* has to be powered on.
*/
- clocks = <&can0_lpcg 1>,
- <&can0_lpcg 0>;
+ clocks = <&can0_lpcg IMX_LPCG_CLK_4>,
+ <&can0_lpcg IMX_LPCG_CLK_0>;
clock-names = "ipg", "per";
assigned-clocks = <&clk IMX_SC_R_CAN_0 IMX_SC_PM_CLK_PER>;
assigned-clock-rates = <40000000>;
* CAN2 shares CAN0's clock and to enable CAN0's clock it
* has to be powered on.
*/
- clocks = <&can0_lpcg 1>,
- <&can0_lpcg 0>;
+ clocks = <&can0_lpcg IMX_LPCG_CLK_4>,
+ <&can0_lpcg IMX_LPCG_CLK_0>;
clock-names = "ipg", "per";
assigned-clocks = <&clk IMX_SC_R_CAN_0 IMX_SC_PM_CLK_PER>;
assigned-clock-rates = <40000000>;
compatible = "fsl,imx27-pwm";
reg = <0x5d000000 0x10000>;
clock-names = "ipg", "per";
- clocks = <&pwm0_lpcg 4>,
- <&pwm0_lpcg 1>;
+ clocks = <&pwm0_lpcg IMX_LPCG_CLK_6>,
+ <&pwm0_lpcg IMX_LPCG_CLK_1>;
assigned-clocks = <&clk IMX_SC_R_PWM_0 IMX_SC_PM_CLK_PER>;
assigned-clock-rates = <24000000>;
#pwm-cells = <3>;
compatible = "fsl,imx27-pwm";
reg = <0x5d010000 0x10000>;
clock-names = "ipg", "per";
- clocks = <&pwm1_lpcg 4>,
- <&pwm1_lpcg 1>;
+ clocks = <&pwm1_lpcg IMX_LPCG_CLK_6>,
+ <&pwm1_lpcg IMX_LPCG_CLK_1>;
assigned-clocks = <&clk IMX_SC_R_PWM_1 IMX_SC_PM_CLK_PER>;
assigned-clock-rates = <24000000>;
#pwm-cells = <3>;
compatible = "fsl,imx27-pwm";
reg = <0x5d020000 0x10000>;
clock-names = "ipg", "per";
- clocks = <&pwm2_lpcg 4>,
- <&pwm2_lpcg 1>;
+ clocks = <&pwm2_lpcg IMX_LPCG_CLK_6>,
+ <&pwm2_lpcg IMX_LPCG_CLK_1>;
assigned-clocks = <&clk IMX_SC_R_PWM_2 IMX_SC_PM_CLK_PER>;
assigned-clock-rates = <24000000>;
#pwm-cells = <3>;
compatible = "fsl,imx27-pwm";
reg = <0x5d030000 0x10000>;
clock-names = "ipg", "per";
- clocks = <&pwm3_lpcg 4>,
- <&pwm3_lpcg 1>;
+ clocks = <&pwm3_lpcg IMX_LPCG_CLK_6>,
+ <&pwm3_lpcg IMX_LPCG_CLK_1>;
assigned-clocks = <&clk IMX_SC_R_PWM_3 IMX_SC_PM_CLK_PER>;
assigned-clock-rates = <24000000>;
#pwm-cells = <3>;
pinctrl-0 = <&pinctrl_usbcon1>;
type = "micro";
label = "otg";
+ vbus-supply = <®_usb1_vbus>;
id-gpios = <&gpio3 21 GPIO_ACTIVE_HIGH>;
port {
};
&usb3_phy0 {
- vbus-supply = <®_usb1_vbus>;
status = "okay";
};
pinctrl-0 = <&pinctrl_usbcon1>;
type = "micro";
label = "otg";
+ vbus-supply = <®_usb1_vbus>;
id-gpios = <&gpio3 21 GPIO_ACTIVE_HIGH>;
port {
};
&usb3_phy0 {
- vbus-supply = <®_usb1_vbus>;
status = "okay";
};
<&clk IMX8MP_CLK_MEDIA_MIPI_PHY1_REF_ROOT>,
<&clk IMX8MP_CLK_MEDIA_AXI_ROOT>;
clock-names = "pclk", "wrap", "phy", "axi";
- assigned-clocks = <&clk IMX8MP_CLK_MEDIA_CAM1_PIX>,
+ assigned-clocks = <&clk IMX8MP_CLK_MEDIA_CAM2_PIX>,
<&clk IMX8MP_CLK_MEDIA_MIPI_PHY1_REF>;
assigned-clock-parents = <&clk IMX8MP_SYS_PLL2_1000M>,
<&clk IMX8MP_CLK_24M>;
};
&flexcan2 {
- clocks = <&can1_lpcg 1>,
- <&can1_lpcg 0>;
+ clocks = <&can1_lpcg IMX_LPCG_CLK_4>,
+ <&can1_lpcg IMX_LPCG_CLK_0>;
assigned-clocks = <&clk IMX_SC_R_CAN_1 IMX_SC_PM_CLK_PER>;
fsl,clk-source = /bits/ 8 <1>;
};
&flexcan3 {
- clocks = <&can2_lpcg 1>,
- <&can2_lpcg 0>;
+ clocks = <&can2_lpcg IMX_LPCG_CLK_4>,
+ <&can2_lpcg IMX_LPCG_CLK_0>;
assigned-clocks = <&clk IMX_SC_R_CAN_2 IMX_SC_PM_CLK_PER>;
fsl,clk-source = /bits/ 8 <1>;
};
};
&pio {
- eth_default: eth_default {
+ eth_default: eth-default-pins {
tx_pins {
pinmux = <MT2712_PIN_71_GBE_TXD3__FUNC_GBE_TXD3>,
<MT2712_PIN_72_GBE_TXD2__FUNC_GBE_TXD2>,
};
};
- eth_sleep: eth_sleep {
+ eth_sleep: eth-sleep-pins {
tx_pins {
pinmux = <MT2712_PIN_71_GBE_TXD3__FUNC_GPIO71>,
<MT2712_PIN_72_GBE_TXD2__FUNC_GPIO72>,
};
};
- usb0_id_pins_float: usb0_iddig {
+ usb0_id_pins_float: usb0-iddig-pins {
pins_iddig {
pinmux = <MT2712_PIN_12_IDDIG_P0__FUNC_IDDIG_A>;
bias-pull-up;
};
};
- usb1_id_pins_float: usb1_iddig {
+ usb1_id_pins_float: usb1-iddig-pins {
pins_iddig {
pinmux = <MT2712_PIN_14_IDDIG_P1__FUNC_IDDIG_B>;
bias-pull-up;
#clock-cells = <1>;
};
- infracfg: syscon@10001000 {
+ infracfg: clock-controller@10001000 {
compatible = "mediatek,mt2712-infracfg", "syscon";
reg = <0 0x10001000 0 0x1000>;
#clock-cells = <1>;
+ #reset-cells = <1>;
};
pericfg: syscon@10003000 {
clock-names = "hif_sel";
};
- cir: cir@10009000 {
+ cir: ir-receiver@10009000 {
compatible = "mediatek,mt7622-cir";
reg = <0 0x10009000 0 0x1000>;
interrupts = <GIC_SPI 175 IRQ_TYPE_LEVEL_LOW>;
};
};
- apmixedsys: apmixedsys@10209000 {
- compatible = "mediatek,mt7622-apmixedsys",
- "syscon";
+ apmixedsys: clock-controller@10209000 {
+ compatible = "mediatek,mt7622-apmixedsys";
reg = <0 0x10209000 0 0x1000>;
#clock-cells = <1>;
};
- topckgen: topckgen@10210000 {
- compatible = "mediatek,mt7622-topckgen",
- "syscon";
+ topckgen: clock-controller@10210000 {
+ compatible = "mediatek,mt7622-topckgen";
reg = <0 0x10210000 0 0x1000>;
#clock-cells = <1>;
};
<&pericfg CLK_PERI_AUXADC_PD>;
clock-names = "therm", "auxadc";
resets = <&pericfg MT7622_PERI_THERM_SW_RST>;
- reset-names = "therm";
mediatek,auxadc = <&auxadc>;
mediatek,apmixedsys = <&apmixedsys>;
nvmem-cells = <&thermal_calibration>;
power-domains = <&scpsys MT7622_POWER_DOMAIN_WB>;
};
- ssusbsys: ssusbsys@1a000000 {
- compatible = "mediatek,mt7622-ssusbsys",
- "syscon";
+ ssusbsys: clock-controller@1a000000 {
+ compatible = "mediatek,mt7622-ssusbsys";
reg = <0 0x1a000000 0 0x1000>;
#clock-cells = <1>;
#reset-cells = <1>;
};
};
- pciesys: pciesys@1a100800 {
- compatible = "mediatek,mt7622-pciesys",
- "syscon";
+ pciesys: clock-controller@1a100800 {
+ compatible = "mediatek,mt7622-pciesys";
reg = <0 0x1a100800 0 0x1000>;
#clock-cells = <1>;
#reset-cells = <1>;
};
};
- hifsys: syscon@1af00000 {
- compatible = "mediatek,mt7622-hifsys", "syscon";
+ hifsys: clock-controller@1af00000 {
+ compatible = "mediatek,mt7622-hifsys";
reg = <0 0x1af00000 0 0x70>;
+ #clock-cells = <1>;
};
- ethsys: syscon@1b000000 {
+ ethsys: clock-controller@1b000000 {
compatible = "mediatek,mt7622-ethsys",
"syscon";
reg = <0 0x1b000000 0 0x1000>;
};
eth: ethernet@1b100000 {
- compatible = "mediatek,mt7622-eth",
- "mediatek,mt2701-eth",
- "syscon";
+ compatible = "mediatek,mt7622-eth";
reg = <0 0x1b100000 0 0x20000>;
interrupts = <GIC_SPI 223 IRQ_TYPE_LEVEL_LOW>,
<GIC_SPI 224 IRQ_TYPE_LEVEL_LOW>,
&cpu_thermal {
cooling-maps {
- cpu-active-high {
+ map-cpu-active-high {
/* active: set fan to cooling level 2 */
cooling-device = <&fan 2 2>;
trip = <&cpu_trip_active_high>;
};
- cpu-active-med {
+ map-cpu-active-med {
/* active: set fan to cooling level 1 */
cooling-device = <&fan 1 1>;
trip = <&cpu_trip_active_med>;
};
- cpu-active-low {
+ map-cpu-active-low {
/* active: set fan to cooling level 0 */
cooling-device = <&fan 0 0>;
trip = <&cpu_trip_active_low>;
reg = <0 0x1100c800 0 0x800>;
interrupts = <GIC_SPI 138 IRQ_TYPE_LEVEL_HIGH>;
clocks = <&infracfg CLK_INFRA_THERM_CK>,
- <&infracfg CLK_INFRA_ADC_26M_CK>,
- <&infracfg CLK_INFRA_ADC_FRC_CK>;
- clock-names = "therm", "auxadc", "adc_32k";
+ <&infracfg CLK_INFRA_ADC_26M_CK>;
+ clock-names = "therm", "auxadc";
nvmem-cells = <&thermal_calibration>;
nvmem-cell-names = "calibration-data";
#thermal-sensor-cells = <1>;
compatible = "mediatek,mt7986-ethsys",
"syscon";
reg = <0 0x15000000 0 0x1000>;
- #address-cells = <1>;
- #size-cells = <1>;
#clock-cells = <1>;
#reset-cells = <1>;
};
<&topckgen CLK_TOP_SGM_325M_SEL>;
assigned-clock-parents = <&apmixedsys CLK_APMIXED_NET2PLL>,
<&apmixedsys CLK_APMIXED_SGMPLL>;
- #reset-cells = <1>;
#address-cells = <1>;
#size-cells = <0>;
mediatek,ethsys = <ðsys>;
};
&mt6358_vgpu_reg {
- regulator-min-microvolt = <625000>;
regulator-max-microvolt = <900000>;
regulator-coupled-with = <&mt6358_vsram_gpu_reg>;
compatible = "mediatek,mt8183-mfgcfg", "syscon";
reg = <0 0x13000000 0 0x1000>;
#clock-cells = <1>;
+ power-domains = <&spm MT8183_POWER_DOMAIN_MFG_ASYNC>;
};
gpu: gpu@13040000 {
* regulator coupling requirements.
*/
regulator-name = "ppvar_dvdd_vgpu";
- regulator-min-microvolt = <600000>;
+ regulator-min-microvolt = <500000>;
regulator-max-microvolt = <950000>;
regulator-ramp-delay = <6250>;
regulator-enable-ramp-delay = <200>;
mt6315_6_vbuck1: vbuck1 {
regulator-compatible = "vbuck1";
regulator-name = "Vbcpu";
- regulator-min-microvolt = <300000>;
+ regulator-min-microvolt = <400000>;
regulator-max-microvolt = <1193750>;
regulator-enable-ramp-delay = <256>;
regulator-allowed-modes = <0 1 2>;
mt6315_6_vbuck3: vbuck3 {
regulator-compatible = "vbuck3";
regulator-name = "Vlcpu";
- regulator-min-microvolt = <300000>;
+ regulator-min-microvolt = <400000>;
regulator-max-microvolt = <1193750>;
regulator-enable-ramp-delay = <256>;
regulator-allowed-modes = <0 1 2>;
mt6315_7_vbuck1: vbuck1 {
regulator-compatible = "vbuck1";
regulator-name = "Vgpu";
- regulator-min-microvolt = <606250>;
+ regulator-min-microvolt = <400000>;
regulator-max-microvolt = <800000>;
regulator-enable-ramp-delay = <256>;
regulator-allowed-modes = <0 1 2>;
reg = <0 0x14001000 0 0x1000>;
interrupts = <GIC_SPI 252 IRQ_TYPE_LEVEL_HIGH 0>;
clocks = <&mmsys CLK_MM_DISP_MUTEX0>;
+ mediatek,gce-client-reg = <&gce SUBSYS_1400XXXX 0x1000 0x1000>;
mediatek,gce-events = <CMDQ_EVENT_DISP_STREAM_DONE_ENG_EVENT_0>,
<CMDQ_EVENT_DISP_STREAM_DONE_ENG_EVENT_1>;
power-domains = <&spm MT8192_POWER_DOMAIN_DISP>;
status = "okay";
};
+&cpu0 {
+ cpu-supply = <&mt6359_vcore_buck_reg>;
+};
+
+&cpu1 {
+ cpu-supply = <&mt6359_vcore_buck_reg>;
+};
+
+&cpu2 {
+ cpu-supply = <&mt6359_vcore_buck_reg>;
+};
+
+&cpu3 {
+ cpu-supply = <&mt6359_vcore_buck_reg>;
+};
+
+&cpu4 {
+ cpu-supply = <&mt6315_6_vbuck1>;
+};
+
+&cpu5 {
+ cpu-supply = <&mt6315_6_vbuck1>;
+};
+
+&cpu6 {
+ cpu-supply = <&mt6315_6_vbuck1>;
+};
+
+&cpu7 {
+ cpu-supply = <&mt6315_6_vbuck1>;
+};
+
&dp_intf0 {
status = "okay";
mt6315_6_vbuck1: vbuck1 {
regulator-compatible = "vbuck1";
regulator-name = "Vbcpu";
- regulator-min-microvolt = <300000>;
+ regulator-min-microvolt = <400000>;
regulator-max-microvolt = <1193750>;
regulator-enable-ramp-delay = <256>;
regulator-ramp-delay = <6250>;
mt6315_7_vbuck1: vbuck1 {
regulator-compatible = "vbuck1";
regulator-name = "Vgpu";
- regulator-min-microvolt = <625000>;
+ regulator-min-microvolt = <400000>;
regulator-max-microvolt = <1193750>;
regulator-enable-ramp-delay = <256>;
regulator-ramp-delay = <6250>;
compatible = "mediatek,mt8195-vppsys0", "syscon";
reg = <0 0x14000000 0 0x1000>;
#clock-cells = <1>;
+ mediatek,gce-client-reg = <&gce1 SUBSYS_1400XXXX 0 0x1000>;
};
dma-controller@14001000 {
compatible = "mediatek,mt8195-vppsys1", "syscon";
reg = <0 0x14f00000 0 0x1000>;
#clock-cells = <1>;
+ mediatek,gce-client-reg = <&gce1 SUBSYS_14f0XXXX 0 0x1000>;
};
mutex@14f01000 {
reg = <0 0x1c01a000 0 0x1000>;
mboxes = <&gce0 0 CMDQ_THR_PRIO_4>;
#clock-cells = <1>;
+ mediatek,gce-client-reg = <&gce0 SUBSYS_1c01XXXX 0xa000 0x1000>;
};
interrupts = <GIC_SPI 658 IRQ_TYPE_LEVEL_HIGH 0>;
power-domains = <&spm MT8195_POWER_DOMAIN_VDOSYS0>;
clocks = <&vdosys0 CLK_VDO0_DISP_MUTEX0>;
+ mediatek,gce-client-reg = <&gce0 SUBSYS_1c01XXXX 0x6000 0x1000>;
mediatek,gce-events = <CMDQ_EVENT_VDO0_DISP_STREAM_DONE_0>;
};
power-domains = <&spm MT8195_POWER_DOMAIN_VDOSYS1>;
clocks = <&vdosys1 CLK_VDO1_DISP_MUTEX>;
clock-names = "vdo1_mutex";
+ mediatek,gce-client-reg = <&gce0 SUBSYS_1c10XXXX 0x1000 0x1000>;
mediatek,gce-events = <CMDQ_EVENT_VDO1_STREAM_DONE_ENG_0>;
};
vddrf-supply = <&pp1300_l2c>;
vddch0-supply = <&pp3300_l10c>;
max-speed = <3200000>;
+
+ qcom,local-bd-address-broken;
};
};
compatible = "qcom,sc7280-adsp-pas";
reg = <0 0x03700000 0 0x100>;
- interrupts-extended = <&pdc 6 IRQ_TYPE_LEVEL_HIGH>,
+ interrupts-extended = <&pdc 6 IRQ_TYPE_EDGE_RISING>,
<&adsp_smp2p_in 0 IRQ_TYPE_EDGE_RISING>,
<&adsp_smp2p_in 1 IRQ_TYPE_EDGE_RISING>,
<&adsp_smp2p_in 2 IRQ_TYPE_EDGE_RISING>,
compatible = "qcom,sc7280-cdsp-pas";
reg = <0 0x0a300000 0 0x10000>;
- interrupts-extended = <&intc GIC_SPI 578 IRQ_TYPE_LEVEL_HIGH>,
+ interrupts-extended = <&intc GIC_SPI 578 IRQ_TYPE_EDGE_RISING>,
<&cdsp_smp2p_in 0 IRQ_TYPE_EDGE_RISING>,
<&cdsp_smp2p_in 1 IRQ_TYPE_EDGE_RISING>,
<&cdsp_smp2p_in 2 IRQ_TYPE_EDGE_RISING>,
resets = <&gcc GCC_USB30_SEC_BCR>;
power-domains = <&gcc USB30_SEC_GDSC>;
interrupts-extended = <&intc GIC_SPI 136 IRQ_TYPE_LEVEL_HIGH>,
- <&pdc 7 IRQ_TYPE_LEVEL_HIGH>,
+ <&pdc 40 IRQ_TYPE_LEVEL_HIGH>,
<&pdc 10 IRQ_TYPE_EDGE_BOTH>,
<&pdc 11 IRQ_TYPE_EDGE_BOTH>;
interrupt-names = "hs_phy_irq", "ss_phy_irq",
reset-names = "pci";
power-domains = <&gcc PCIE_4_GDSC>;
+ required-opps = <&rpmhpd_opp_nom>;
phys = <&pcie4_phy>;
phy-names = "pciephy";
reset-names = "pci";
power-domains = <&gcc PCIE_3B_GDSC>;
+ required-opps = <&rpmhpd_opp_nom>;
phys = <&pcie3b_phy>;
phy-names = "pciephy";
reset-names = "pci";
power-domains = <&gcc PCIE_3A_GDSC>;
+ required-opps = <&rpmhpd_opp_nom>;
phys = <&pcie3a_phy>;
phy-names = "pciephy";
reset-names = "pci";
power-domains = <&gcc PCIE_2B_GDSC>;
+ required-opps = <&rpmhpd_opp_nom>;
phys = <&pcie2b_phy>;
phy-names = "pciephy";
reset-names = "pci";
power-domains = <&gcc PCIE_2A_GDSC>;
+ required-opps = <&rpmhpd_opp_nom>;
phys = <&pcie2a_phy>;
phy-names = "pciephy";
compatible = "qcom,sc8280xp-adsp-pas";
reg = <0 0x03000000 0 0x100>;
- interrupts-extended = <&intc GIC_SPI 162 IRQ_TYPE_LEVEL_HIGH>,
+ interrupts-extended = <&intc GIC_SPI 162 IRQ_TYPE_EDGE_RISING>,
<&smp2p_adsp_in 0 IRQ_TYPE_EDGE_RISING>,
<&smp2p_adsp_in 1 IRQ_TYPE_EDGE_RISING>,
<&smp2p_adsp_in 2 IRQ_TYPE_EDGE_RISING>,
compatible = "qcom,sc8280xp-nsp0-pas";
reg = <0 0x1b300000 0 0x100>;
- interrupts-extended = <&intc GIC_SPI 578 IRQ_TYPE_LEVEL_HIGH>,
+ interrupts-extended = <&intc GIC_SPI 578 IRQ_TYPE_EDGE_RISING>,
<&smp2p_nsp0_in 0 IRQ_TYPE_EDGE_RISING>,
<&smp2p_nsp0_in 1 IRQ_TYPE_EDGE_RISING>,
<&smp2p_nsp0_in 2 IRQ_TYPE_EDGE_RISING>,
compatible = "qcom,sc8280xp-nsp1-pas";
reg = <0 0x21300000 0 0x100>;
- interrupts-extended = <&intc GIC_SPI 887 IRQ_TYPE_LEVEL_HIGH>,
+ interrupts-extended = <&intc GIC_SPI 887 IRQ_TYPE_EDGE_RISING>,
<&smp2p_nsp1_in 0 IRQ_TYPE_EDGE_RISING>,
<&smp2p_nsp1_in 1 IRQ_TYPE_EDGE_RISING>,
<&smp2p_nsp1_in 2 IRQ_TYPE_EDGE_RISING>,
compatible = "qcom,sm6350-adsp-pas";
reg = <0 0x03000000 0 0x100>;
- interrupts-extended = <&pdc 6 IRQ_TYPE_LEVEL_HIGH>,
+ interrupts-extended = <&pdc 6 IRQ_TYPE_EDGE_RISING>,
<&smp2p_adsp_in 0 IRQ_TYPE_EDGE_RISING>,
<&smp2p_adsp_in 1 IRQ_TYPE_EDGE_RISING>,
<&smp2p_adsp_in 2 IRQ_TYPE_EDGE_RISING>,
compatible = "qcom,sm6350-cdsp-pas";
reg = <0 0x08300000 0 0x10000>;
- interrupts-extended = <&intc GIC_SPI 578 IRQ_TYPE_LEVEL_HIGH>,
+ interrupts-extended = <&intc GIC_SPI 578 IRQ_TYPE_EDGE_RISING>,
<&smp2p_cdsp_in 0 IRQ_TYPE_EDGE_RISING>,
<&smp2p_cdsp_in 1 IRQ_TYPE_EDGE_RISING>,
<&smp2p_cdsp_in 2 IRQ_TYPE_EDGE_RISING>,
compatible = "qcom,sm6375-adsp-pas";
reg = <0 0x0a400000 0 0x100>;
- interrupts-extended = <&intc GIC_SPI 282 IRQ_TYPE_LEVEL_HIGH>,
+ interrupts-extended = <&intc GIC_SPI 282 IRQ_TYPE_EDGE_RISING>,
<&smp2p_adsp_in 0 IRQ_TYPE_EDGE_RISING>,
<&smp2p_adsp_in 1 IRQ_TYPE_EDGE_RISING>,
<&smp2p_adsp_in 2 IRQ_TYPE_EDGE_RISING>,
compatible = "qcom,sm8250-slpi-pas";
reg = <0 0x05c00000 0 0x4000>;
- interrupts-extended = <&pdc 9 IRQ_TYPE_LEVEL_HIGH>,
+ interrupts-extended = <&pdc 9 IRQ_TYPE_EDGE_RISING>,
<&smp2p_slpi_in 0 IRQ_TYPE_EDGE_RISING>,
<&smp2p_slpi_in 1 IRQ_TYPE_EDGE_RISING>,
<&smp2p_slpi_in 2 IRQ_TYPE_EDGE_RISING>,
compatible = "qcom,sm8250-cdsp-pas";
reg = <0 0x08300000 0 0x10000>;
- interrupts-extended = <&intc GIC_SPI 578 IRQ_TYPE_LEVEL_HIGH>,
+ interrupts-extended = <&intc GIC_SPI 578 IRQ_TYPE_EDGE_RISING>,
<&smp2p_cdsp_in 0 IRQ_TYPE_EDGE_RISING>,
<&smp2p_cdsp_in 1 IRQ_TYPE_EDGE_RISING>,
<&smp2p_cdsp_in 2 IRQ_TYPE_EDGE_RISING>,
compatible = "qcom,sm8250-adsp-pas";
reg = <0 0x17300000 0 0x100>;
- interrupts-extended = <&pdc 6 IRQ_TYPE_LEVEL_HIGH>,
+ interrupts-extended = <&pdc 6 IRQ_TYPE_EDGE_RISING>,
<&smp2p_adsp_in 0 IRQ_TYPE_EDGE_RISING>,
<&smp2p_adsp_in 1 IRQ_TYPE_EDGE_RISING>,
<&smp2p_adsp_in 2 IRQ_TYPE_EDGE_RISING>,
ranges = <0x01000000 0x0 0x00000000 0x0 0x60200000 0x0 0x100000>,
<0x02000000 0x0 0x60300000 0x0 0x60300000 0x0 0x3d00000>;
- /*
- * MSIs for BDF (1:0.0) only works with Device ID 0x5980.
- * Hence, the IDs are swapped.
- */
- msi-map = <0x0 &gic_its 0x5981 0x1>,
- <0x100 &gic_its 0x5980 0x1>;
+ msi-map = <0x0 &gic_its 0x5980 0x1>,
+ <0x100 &gic_its 0x5981 0x1>;
msi-map-mask = <0xff00>;
interrupts = <GIC_SPI 141 IRQ_TYPE_LEVEL_HIGH>,
<GIC_SPI 142 IRQ_TYPE_LEVEL_HIGH>,
ranges = <0x01000000 0x0 0x00000000 0x0 0x40200000 0x0 0x100000>,
<0x02000000 0x0 0x40300000 0x0 0x40300000 0x0 0x1fd00000>;
- /*
- * MSIs for BDF (1:0.0) only works with Device ID 0x5a00.
- * Hence, the IDs are swapped.
- */
- msi-map = <0x0 &gic_its 0x5a01 0x1>,
- <0x100 &gic_its 0x5a00 0x1>;
+ msi-map = <0x0 &gic_its 0x5a00 0x1>,
+ <0x100 &gic_its 0x5a01 0x1>;
msi-map-mask = <0xff00>;
interrupts = <GIC_SPI 307 IRQ_TYPE_LEVEL_HIGH>,
<GIC_SPI 308 IRQ_TYPE_LEVEL_HIGH>,
<&gem_noc MASTER_APPSS_PROC 0 &cnoc_main SLAVE_PCIE_0 0>;
interconnect-names = "pcie-mem", "cpu-pcie";
- /* Entries are reversed due to the unusual ITS DeviceID encoding */
- msi-map = <0x0 &gic_its 0x1401 0x1>,
- <0x100 &gic_its 0x1400 0x1>;
+ msi-map = <0x0 &gic_its 0x1400 0x1>,
+ <0x100 &gic_its 0x1401 0x1>;
iommu-map = <0x0 &apps_smmu 0x1400 0x1>,
<0x100 &apps_smmu 0x1401 0x1>;
<&gem_noc MASTER_APPSS_PROC 0 &cnoc_main SLAVE_PCIE_1 0>;
interconnect-names = "pcie-mem", "cpu-pcie";
- /* Entries are reversed due to the unusual ITS DeviceID encoding */
- msi-map = <0x0 &gic_its 0x1481 0x1>,
- <0x100 &gic_its 0x1480 0x1>;
+ msi-map = <0x0 &gic_its 0x1480 0x1>,
+ <0x100 &gic_its 0x1481 0x1>;
iommu-map = <0x0 &apps_smmu 0x1480 0x1>,
<0x100 &apps_smmu 0x1481 0x1>;
interrupt-map-mask = <0 0 0 0x7>;
#interrupt-cells = <1>;
- /* Entries are reversed due to the unusual ITS DeviceID encoding */
- msi-map = <0x0 &gic_its 0x1401 0x1>,
- <0x100 &gic_its 0x1400 0x1>;
+ msi-map = <0x0 &gic_its 0x1400 0x1>,
+ <0x100 &gic_its 0x1401 0x1>;
msi-map-mask = <0xff00>;
linux,pci-domain = <0>;
interrupt-map-mask = <0 0 0 0x7>;
#interrupt-cells = <1>;
- /* Entries are reversed due to the unusual ITS DeviceID encoding */
- msi-map = <0x0 &gic_its 0x1481 0x1>,
- <0x100 &gic_its 0x1480 0x1>;
+ msi-map = <0x0 &gic_its 0x1480 0x1>,
+ <0x100 &gic_its 0x1481 0x1>;
msi-map-mask = <0xff00>;
linux,pci-domain = <1>;
domain-idle-states {
CLUSTER_CL4: cluster-sleep-0 {
- compatible = "arm,idle-state";
+ compatible = "domain-idle-state";
idle-state-name = "l2-ret";
arm,psci-suspend-param = <0x01000044>;
entry-latency-us = <350>;
};
CLUSTER_CL5: cluster-sleep-1 {
- compatible = "arm,idle-state";
+ compatible = "domain-idle-state";
idle-state-name = "ret-pll-off";
arm,psci-suspend-param = <0x01000054>;
entry-latency-us = <2200>;
port@1 {
reg = <1>;
- mipi1_in_panel: endpoint@1 {
+ mipi1_in_panel: endpoint {
remote-endpoint = <&mipi1_out_panel>;
};
};
ep-gpios = <&gpio0 3 GPIO_ACTIVE_HIGH>;
/* PERST# asserted in S3 */
- pcie-reset-suspend = <1>;
vpcie3v3-supply = <&wlan_3v3>;
vpcie1v8-supply = <&pp1800_pcie>;
#size-cells = <0>;
interface@0 { /* interface 0 of configuration 1 */
- compatible = "usbbda,8156.config1.0";
+ compatible = "usbifbda,8156.config1.0";
reg = <0 1>;
};
};
};
&pcie0 {
- bus-scan-delay-ms = <1000>;
ep-gpios = <&gpio2 RK_PD4 GPIO_ACTIVE_HIGH>;
num-lanes = <4>;
pinctrl-names = "default";
num-lanes = <4>;
pinctrl-names = "default";
pinctrl-0 = <&pcie_clkreqn_cpm>;
+ vpcie3v3-supply = <&vcc3v3_baseboard>;
+ vpcie12v-supply = <&dc_12v>;
status = "okay";
};
regulator-max-microvolt = <5000000>;
};
+ vcca_0v9: vcca-0v9-regulator {
+ compatible = "regulator-fixed";
+ regulator-name = "vcca_0v9";
+ regulator-always-on;
+ regulator-boot-on;
+ regulator-min-microvolt = <900000>;
+ regulator-max-microvolt = <900000>;
+ vin-supply = <&vcc_1v8>;
+ };
+
+ vcca_1v8: vcca-1v8-regulator {
+ compatible = "regulator-fixed";
+ regulator-name = "vcca_1v8";
+ regulator-always-on;
+ regulator-boot-on;
+ regulator-min-microvolt = <1800000>;
+ regulator-max-microvolt = <1800000>;
+ vin-supply = <&vcc3v3_sys>;
+ };
+
vdd_log: vdd-log {
compatible = "pwm-regulator";
pwms = <&pwm2 0 25000 1>;
gpio1830-supply = <&vcc_1v8>;
};
-&pmu_io_domains {
- status = "okay";
- pmu1830-supply = <&vcc_1v8>;
+&pcie0 {
+ /* PCIe PHY supplies */
+ vpcie0v9-supply = <&vcca_0v9>;
+ vpcie1v8-supply = <&vcca_1v8>;
};
-&pwm2 {
- status = "okay";
+&pcie_clkreqn_cpm {
+ rockchip,pins =
+ <2 RK_PD2 RK_FUNC_GPIO &pcfg_pull_up>;
};
&pinctrl {
+ pinctrl-names = "default";
+ pinctrl-0 = <&q7_thermal_pin>;
+
+ gpios {
+ q7_thermal_pin: q7-thermal-pin {
+ rockchip,pins =
+ <0 RK_PA3 RK_FUNC_GPIO &pcfg_pull_up>;
+ };
+ };
+
i2c8 {
i2c8_xfer_a: i2c8-xfer {
rockchip,pins =
usb3 {
usb3_id: usb3-id {
rockchip,pins =
- <1 RK_PC2 RK_FUNC_GPIO &pcfg_pull_none>;
+ <1 RK_PC2 RK_FUNC_GPIO &pcfg_pull_up>;
};
};
};
+&pmu_io_domains {
+ status = "okay";
+ pmu1830-supply = <&vcc_1v8>;
+};
+
+&pwm2 {
+ status = "okay";
+};
+
&sdhci {
/*
* Signal integrity isn't great at 200MHz but 100MHz has proven stable
&pcie2x1 {
reset-gpios = <&gpio0 RK_PB6 GPIO_ACTIVE_HIGH>;
- disable-gpios = <&gpio0 RK_PA6 GPIO_ACTIVE_HIGH>;
vpcie3v3-supply = <&vcc3v3_pcie>;
status = "okay";
};
vccio_sd: LDO_REG5 {
regulator-name = "vccio_sd";
+ regulator-always-on;
+ regulator-boot-on;
regulator-min-microvolt = <1800000>;
regulator-max-microvolt = <3300000>;
#address-cells = <1>;
#size-cells = <0>;
- switch@0 {
+ switch@1f {
compatible = "mediatek,mt7531";
- reg = <0>;
+ reg = <0x1f>;
ports {
#address-cells = <1>;
&pcie2x1 {
reset-gpios = <&gpio3 RK_PC1 GPIO_ACTIVE_HIGH>;
- disable-gpios = <&gpio3 RK_PC2 GPIO_ACTIVE_HIGH>;
vpcie3v3-supply = <&vcc3v3_mini_pcie>;
status = "okay";
};
pinctrl-0 = <&i2c7m0_xfer>;
status = "okay";
- es8316: audio-codec@11 {
+ es8316: audio-codec@10 {
compatible = "everest,es8316";
- reg = <0x11>;
+ reg = <0x10>;
assigned-clocks = <&cru I2S0_8CH_MCLKOUT>;
assigned-clock-rates = <12288000>;
clocks = <&cru I2S0_8CH_MCLKOUT>;
pinctrl-0 = <&pmic_pins>, <&rk806_dvs1_null>,
<&rk806_dvs2_null>, <&rk806_dvs3_null>;
spi-max-frequency = <1000000>;
+ system-power-controller;
vcc1-supply = <&vcc5v0_sys>;
vcc2-supply = <&vcc5v0_sys>;
#gpio-cells = <2>;
rk806_dvs1_null: dvs1-null-pins {
- pins = "gpio_pwrctrl2";
+ pins = "gpio_pwrctrl1";
function = "pin_fun0";
};
<&rk806_dvs2_null>, <&rk806_dvs3_null>;
pinctrl-names = "default";
spi-max-frequency = <1000000>;
+ system-power-controller;
vcc1-supply = <&vcc4v0_sys>;
vcc2-supply = <&vcc4v0_sys>;
#define MAX_TLBI_RANGE_PAGES __TLBI_RANGE_PAGES(31, 3)
/*
- * Generate 'num' values from -1 to 30 with -1 rejected by the
- * __flush_tlb_range() loop below.
+ * Generate 'num' values from -1 to 31 with -1 rejected by the
+ * __flush_tlb_range() loop below. Its return value is only
+ * significant for a maximum of MAX_TLBI_RANGE_PAGES pages. If
+ * 'pages' is more than that, you must iterate over the overall
+ * range.
*/
-#define TLBI_RANGE_MASK GENMASK_ULL(4, 0)
-#define __TLBI_RANGE_NUM(pages, scale) \
- ((((pages) >> (5 * (scale) + 1)) & TLBI_RANGE_MASK) - 1)
+#define __TLBI_RANGE_NUM(pages, scale) \
+ ({ \
+ int __pages = min((pages), \
+ __TLBI_RANGE_PAGES(31, (scale))); \
+ (__pages >> (5 * (scale) + 1)) - 1; \
+ })
/*
* TLB Invalidation
* 3. If there is 1 page remaining, flush it through non-range operations. Range
* operations can only span an even number of pages. We save this for last to
* ensure 64KB start alignment is maintained for the LPA2 case.
- *
- * Note that certain ranges can be represented by either num = 31 and
- * scale or num = 0 and scale + 1. The loop below favours the latter
- * since num is limited to 30 by the __TLBI_RANGE_NUM() macro.
*/
#define __flush_tlb_range_op(op, start, pages, stride, \
asid, tlb_level, tlbi_user, lpa2) \
adr_l x1, __hyp_text_end
adr_l x2, dcache_clean_poc
blr x2
+
+ mov_q x0, INIT_SCTLR_EL2_MMU_OFF
+ pre_disable_mmu_workaround
+ msr sctlr_el2, x0
+ isb
0:
mov_q x0, HCR_HOST_NVHE_FLAGS
+
+ /*
+ * Compliant CPUs advertise their VHE-onlyness with
+ * ID_AA64MMFR4_EL1.E2H0 < 0. HCR_EL2.E2H can be
+ * RES1 in that case. Publish the E2H bit early so that
+ * it can be picked up by the init_el2_state macro.
+ *
+ * Fruity CPUs seem to have HCR_EL2.E2H set to RAO/WI, but
+ * don't advertise it (they predate this relaxation).
+ */
+ mrs_s x1, SYS_ID_AA64MMFR4_EL1
+ tbz x1, #(ID_AA64MMFR4_EL1_E2H0_SHIFT + ID_AA64MMFR4_EL1_E2H0_WIDTH - 1), 1f
+
+ orr x0, x0, #HCR_E2H
+1:
msr hcr_el2, x0
isb
mov_q x1, INIT_SCTLR_EL1_MMU_OFF
- /*
- * Compliant CPUs advertise their VHE-onlyness with
- * ID_AA64MMFR4_EL1.E2H0 < 0. HCR_EL2.E2H can be
- * RES1 in that case.
- *
- * Fruity CPUs seem to have HCR_EL2.E2H set to RES1, but
- * don't advertise it (they predate this relaxation).
- */
- mrs_s x0, SYS_ID_AA64MMFR4_EL1
- ubfx x0, x0, #ID_AA64MMFR4_EL1_E2H0_SHIFT, #ID_AA64MMFR4_EL1_E2H0_WIDTH
- tbnz x0, #(ID_AA64MMFR4_EL1_E2H0_SHIFT + ID_AA64MMFR4_EL1_E2H0_WIDTH - 1), 1f
-
mrs x0, hcr_el2
and x0, x0, #HCR_E2H
cbz x0, 2f
-1:
+
/* Set a sane SCTLR_EL1, the VHE way */
- pre_disable_mmu_workaround
msr_s SYS_SCTLR_EL12, x1
mov x2, #BOOT_CPU_FLAG_E2H
b 3f
2:
- pre_disable_mmu_workaround
msr sctlr_el1, x1
mov x2, xzr
3:
{
unsigned int vq;
bool active;
- bool fpsimd_only;
enum vec_type task_type;
memset(header, 0, sizeof(*header));
case ARM64_VEC_SVE:
if (test_tsk_thread_flag(target, TIF_SVE_VL_INHERIT))
header->flags |= SVE_PT_VL_INHERIT;
- fpsimd_only = !test_tsk_thread_flag(target, TIF_SVE);
break;
case ARM64_VEC_SME:
if (test_tsk_thread_flag(target, TIF_SME_VL_INHERIT))
header->flags |= SVE_PT_VL_INHERIT;
- fpsimd_only = false;
break;
default:
WARN_ON_ONCE(1);
}
if (active) {
- if (fpsimd_only) {
+ if (target->thread.fp_type == FP_STATE_FPSIMD) {
header->flags |= SVE_PT_REGS_FPSIMD;
} else {
header->flags |= SVE_PT_REGS_SVE;
if (err)
goto out_hyp;
- if (is_protected_kvm_enabled()) {
- kvm_info("Protected nVHE mode initialized successfully\n");
- } else if (in_hyp_mode) {
- kvm_info("VHE mode initialized successfully\n");
- } else {
- char mode = cpus_have_final_cap(ARM64_KVM_HVHE) ? 'h' : 'n';
- kvm_info("Hyp mode (%cVHE) initialized successfully\n", mode);
- }
+ kvm_info("%s%sVHE mode initialized successfully\n",
+ in_hyp_mode ? "" : (is_protected_kvm_enabled() ?
+ "Protected " : "Hyp "),
+ in_hyp_mode ? "" : (cpus_have_final_cap(ARM64_KVM_HVHE) ?
+ "h" : "n"));
/*
* FIXME: Do something reasonable if kvm_init() fails after pKVM
/* Switch to requested VMID */
__tlb_switch_to_guest(mmu, &cxt, false);
- __flush_s2_tlb_range_op(ipas2e1is, start, pages, stride, 0);
+ __flush_s2_tlb_range_op(ipas2e1is, start, pages, stride,
+ TLBI_TTL_UNKNOWN);
dsb(ish);
__tlbi(vmalle1is);
kvm_clear_pte(ctx->ptep);
dsb(ishst);
- __tlbi_level(vae2is, __TLBI_VADDR(ctx->addr, 0), ctx->level);
+ __tlbi_level(vae2is, __TLBI_VADDR(ctx->addr, 0), TLBI_TTL_UNKNOWN);
} else {
if (ctx->end - ctx->addr < granule)
return -EINVAL;
* Perform the appropriate TLB invalidation based on the
* evicted pte value (if any).
*/
- if (kvm_pte_table(ctx->old, ctx->level))
- kvm_tlb_flush_vmid_range(mmu, ctx->addr,
- kvm_granule_size(ctx->level));
- else if (kvm_pte_valid(ctx->old))
+ if (kvm_pte_table(ctx->old, ctx->level)) {
+ u64 size = kvm_granule_size(ctx->level);
+ u64 addr = ALIGN_DOWN(ctx->addr, size);
+
+ kvm_tlb_flush_vmid_range(mmu, addr, size);
+ } else if (kvm_pte_valid(ctx->old)) {
kvm_call_hyp(__kvm_tlb_flush_vmid_ipa, mmu,
ctx->addr, ctx->level);
+ }
}
if (stage2_pte_is_counted(ctx->old))
if (kvm_pte_valid(ctx->old)) {
kvm_clear_pte(ctx->ptep);
- if (!stage2_unmap_defer_tlb_flush(pgt))
- kvm_call_hyp(__kvm_tlb_flush_vmid_ipa, mmu,
- ctx->addr, ctx->level);
+ if (kvm_pte_table(ctx->old, ctx->level)) {
+ kvm_call_hyp(__kvm_tlb_flush_vmid_ipa, mmu, ctx->addr,
+ TLBI_TTL_UNKNOWN);
+ } else if (!stage2_unmap_defer_tlb_flush(pgt)) {
+ kvm_call_hyp(__kvm_tlb_flush_vmid_ipa, mmu, ctx->addr,
+ ctx->level);
+ }
}
mm_ops->put_page(ctx->ptep);
/* Switch to requested VMID */
__tlb_switch_to_guest(mmu, &cxt);
- __flush_s2_tlb_range_op(ipas2e1is, start, pages, stride, 0);
+ __flush_s2_tlb_range_op(ipas2e1is, start, pages, stride,
+ TLBI_TTL_UNKNOWN);
dsb(ish);
__tlbi(vmalle1is);
fault_ipa = kvm_vcpu_get_fault_ipa(vcpu);
is_iabt = kvm_vcpu_trap_is_iabt(vcpu);
- if (esr_fsc_is_permission_fault(esr)) {
+ if (esr_fsc_is_translation_fault(esr)) {
/* Beyond sanitised PARange (which is the IPA limit) */
if (fault_ipa >= BIT_ULL(get_kvm_ipa_limit())) {
kvm_inject_size_fault(vcpu);
pte_t *ptep = NULL;
pgdp = pgd_offset(mm, addr);
- p4dp = p4d_offset(pgdp, addr);
+ p4dp = p4d_alloc(mm, pgdp, addr);
+ if (!p4dp)
+ return NULL;
+
pudp = pud_alloc(mm, p4dp, addr);
if (!pudp)
return NULL;
pte_t *ptep;
unsigned long addr = (unsigned long)page_address(page);
- if (!can_set_direct_map())
- return true;
-
pgdp = pgd_offset_k(addr);
if (pgd_none(READ_ONCE(*pgdp)))
return false;
emit(A64_UXTH(is64, dst, dst), ctx);
break;
case 32:
- emit(A64_REV32(is64, dst, dst), ctx);
+ emit(A64_REV32(0, dst, dst), ctx);
/* upper 32 bits already cleared */
break;
case 64:
} else {
emit_a64_mov_i(1, tmp, off, ctx);
if (sign_extend)
- emit(A64_LDRSW(dst, src_adj, off_adj), ctx);
+ emit(A64_LDRSW(dst, src, tmp), ctx);
else
emit(A64_LDR32(dst, src, tmp), ctx);
}
STABS_DEBUG
DWARF_DEBUG
ELF_DETAILS
+ .hexagon.attributes 0 : { *(.hexagon.attributes) }
DISCARDS
}
select RELOCATABLE
config ARCH_HAS_GENERIC_CRASHKERNEL_RESERVATION
- def_bool CRASH_CORE
+ def_bool CRASH_RESERVE
config RELOCATABLE
bool "Relocatable kernel"
#size-cells = <2>;
dma-coherent;
+ isa@18000000 {
+ compatible = "isa";
+ #size-cells = <1>;
+ #address-cells = <2>;
+ ranges = <1 0x0 0x0 0x18000000 0x4000>;
+ };
+
liointc0: interrupt-controller@1fe01400 {
compatible = "loongson,liointc-2.0";
reg = <0x0 0x1fe01400 0x0 0x40>,
&gmac0 {
status = "okay";
+
+ phy-mode = "gmii";
+ phy-handle = <&phy0>;
+ mdio {
+ compatible = "snps,dwmac-mdio";
+ #address-cells = <1>;
+ #size-cells = <0>;
+ phy0: ethernet-phy@0 {
+ reg = <2>;
+ };
+ };
};
&gmac1 {
status = "okay";
+
+ phy-mode = "gmii";
+ phy-handle = <&phy1>;
+ mdio {
+ compatible = "snps,dwmac-mdio";
+ #address-cells = <1>;
+ #size-cells = <0>;
+ phy1: ethernet-phy@1 {
+ reg = <2>;
+ };
+ };
};
&gmac2 {
status = "okay";
+
+ phy-mode = "rgmii";
+ phy-handle = <&phy2>;
+ mdio {
+ compatible = "snps,dwmac-mdio";
+ #address-cells = <1>;
+ #size-cells = <0>;
+ phy2: ethernet-phy@2 {
+ reg = <0>;
+ };
+ };
};
#address-cells = <2>;
#size-cells = <2>;
+ isa@18400000 {
+ compatible = "isa";
+ #size-cells = <1>;
+ #address-cells = <2>;
+ ranges = <1 0x0 0x0 0x18400000 0x4000>;
+ };
+
pmc: power-management@100d0000 {
compatible = "loongson,ls2k2000-pmc", "loongson,ls2k0500-pmc", "syscon";
reg = <0x0 0x100d0000 0x0 0x58>;
msi: msi-controller@1fe01140 {
compatible = "loongson,pch-msi-1.0";
reg = <0x0 0x1fe01140 0x0 0x8>;
+ interrupt-controller;
+ #interrupt-cells = <1>;
msi-controller;
loongson,msi-base-vec = <64>;
loongson,msi-num-vecs = <192>;
#address-cells = <3>;
#size-cells = <2>;
device_type = "pci";
+ msi-parent = <&msi>;
bus-range = <0x0 0xff>;
- ranges = <0x01000000 0x0 0x00008000 0x0 0x18400000 0x0 0x00008000>,
+ ranges = <0x01000000 0x0 0x00008000 0x0 0x18408000 0x0 0x00008000>,
<0x02000000 0x0 0x60000000 0x0 0x60000000 0x0 0x20000000>;
gmac0: ethernet@3,0 {
reg = <0x1800 0x0 0x0 0x0 0x0>;
- interrupts = <12 IRQ_TYPE_LEVEL_HIGH>;
+ interrupts = <12 IRQ_TYPE_LEVEL_HIGH>,
+ <13 IRQ_TYPE_LEVEL_HIGH>;
+ interrupt-names = "macirq", "eth_lpi";
interrupt-parent = <&pic>;
status = "disabled";
};
gmac1: ethernet@3,1 {
reg = <0x1900 0x0 0x0 0x0 0x0>;
- interrupts = <14 IRQ_TYPE_LEVEL_HIGH>;
+ interrupts = <14 IRQ_TYPE_LEVEL_HIGH>,
+ <15 IRQ_TYPE_LEVEL_HIGH>;
+ interrupt-names = "macirq", "eth_lpi";
interrupt-parent = <&pic>;
status = "disabled";
};
gmac2: ethernet@3,2 {
reg = <0x1a00 0x0 0x0 0x0 0x0>;
- interrupts = <17 IRQ_TYPE_LEVEL_HIGH>;
+ interrupts = <17 IRQ_TYPE_LEVEL_HIGH>,
+ <18 IRQ_TYPE_LEVEL_HIGH>;
+ interrupt-names = "macirq", "eth_lpi";
interrupt-parent = <&pic>;
status = "disabled";
};
#define _ASM_ADDRSPACE_H
#include <linux/const.h>
+#include <linux/sizes.h>
#include <asm/loongarch.h>
/* SPDX-License-Identifier: GPL-2.0-only */
-#ifndef _LOONGARCH_CRASH_CORE_H
-#define _LOONGARCH_CRASH_CORE_H
+#ifndef _LOONGARCH_CRASH_RESERVE_H
+#define _LOONGARCH_CRASH_RESERVE_H
#define CRASH_ALIGN SZ_2M
#include <asm/pgtable-bits.h>
#include <asm/string.h>
-/*
- * Change "struct page" to physical address.
- */
-#define page_to_phys(page) ((phys_addr_t)page_to_pfn(page) << PAGE_SHIFT)
-
extern void __init __iomem *early_ioremap(u64 phys_addr, unsigned long size);
extern void __init early_iounmap(void __iomem *addr, unsigned long size);
#define __io_aw() mmiowb()
+#ifdef CONFIG_KFENCE
+#define virt_to_phys(kaddr) \
+({ \
+ (likely((unsigned long)kaddr < vm_map_base)) ? __pa((unsigned long)kaddr) : \
+ page_to_phys(tlb_virt_to_page((unsigned long)kaddr)) + offset_in_page((unsigned long)kaddr);\
+})
+
+#define phys_to_virt(paddr) \
+({ \
+ extern char *__kfence_pool; \
+ (unlikely(__kfence_pool == NULL)) ? __va((unsigned long)paddr) : \
+ page_address(phys_to_page((unsigned long)paddr)) + offset_in_page((unsigned long)paddr);\
+})
+#endif
+
#include <asm-generic/io.h>
#define ARCH_HAS_VALID_PHYS_ADDR_RANGE
static inline bool arch_kfence_init_pool(void)
{
int err;
+ char *kaddr, *vaddr;
char *kfence_pool = __kfence_pool;
struct vm_struct *area;
return false;
}
+ kaddr = kfence_pool;
+ vaddr = __kfence_pool;
+ while (kaddr < kfence_pool + KFENCE_POOL_SIZE) {
+ set_page_address(virt_to_page(kaddr), vaddr);
+ kaddr += PAGE_SIZE;
+ vaddr += PAGE_SIZE;
+ }
+
return true;
}
struct page *dmw_virt_to_page(unsigned long kaddr);
struct page *tlb_virt_to_page(unsigned long kaddr);
-#define virt_to_pfn(kaddr) PFN_DOWN(PHYSADDR(kaddr))
+#define pfn_to_phys(pfn) __pfn_to_phys(pfn)
+#define phys_to_pfn(paddr) __phys_to_pfn(paddr)
+
+#define page_to_phys(page) pfn_to_phys(page_to_pfn(page))
+#define phys_to_page(paddr) pfn_to_page(phys_to_pfn(paddr))
+
+#ifndef CONFIG_KFENCE
+
+#define page_to_virt(page) __va(page_to_phys(page))
+#define virt_to_page(kaddr) phys_to_page(__pa(kaddr))
+
+#else
+
+#define WANT_PAGE_VIRTUAL
+
+#define page_to_virt(page) \
+({ \
+ extern char *__kfence_pool; \
+ (__kfence_pool == NULL) ? __va(page_to_phys(page)) : page_address(page); \
+})
#define virt_to_page(kaddr) \
({ \
dmw_virt_to_page((unsigned long)kaddr) : tlb_virt_to_page((unsigned long)kaddr);\
})
+#endif
+
+#define pfn_to_virt(pfn) page_to_virt(pfn_to_page(pfn))
+#define virt_to_pfn(kaddr) page_to_pfn(virt_to_page(kaddr))
+
extern int __virt_addr_valid(volatile void *kaddr);
#define virt_addr_valid(kaddr) __virt_addr_valid((volatile void *)(kaddr))
#ifndef __LOONGARCH_PERF_EVENT_H__
#define __LOONGARCH_PERF_EVENT_H__
+#include <asm/ptrace.h>
+
#define perf_arch_bpf_user_pt_regs(regs) (struct user_pt_regs *)regs
+#define perf_arch_fetch_caller_regs(regs, __ip) { \
+ (regs)->csr_era = (__ip); \
+ (regs)->regs[3] = current_stack_pointer; \
+ (regs)->regs[22] = (unsigned long) __builtin_frame_address(0); \
+}
+
#endif /* __LOONGARCH_PERF_EVENT_H__ */
);
}
-#define __tlb_remove_tlb_entry(tlb, ptep, address) do { } while (0)
-
static void tlb_flush(struct mmu_gather *tlb);
#define tlb_flush tlb_flush
return 0;
}
-early_initcall(init_hw_perf_events);
+pure_initcall(init_hw_perf_events);
if (!(vma->vm_flags & VM_WRITE))
goto bad_area;
} else {
- if (!(vma->vm_flags & VM_READ) && address != exception_era(regs))
- goto bad_area;
if (!(vma->vm_flags & VM_EXEC) && address == exception_era(regs))
goto bad_area;
+ if (!(vma->vm_flags & (VM_READ | VM_WRITE)) && address != exception_era(regs))
+ goto bad_area;
}
/*
*/
#include <linux/export.h>
#include <linux/io.h>
+#include <linux/kfence.h>
#include <linux/memblock.h>
#include <linux/mm.h>
#include <linux/mman.h>
{
unsigned long vaddr = (unsigned long)kaddr;
+ if (is_kfence_address((void *)kaddr))
+ return 1;
+
if ((vaddr < PAGE_OFFSET) || (vaddr >= vm_map_base))
return 0;
struct page *dmw_virt_to_page(unsigned long kaddr)
{
- return pfn_to_page(virt_to_pfn(kaddr));
+ return phys_to_page(__pa(kaddr));
}
EXPORT_SYMBOL(dmw_virt_to_page);
struct page *tlb_virt_to_page(unsigned long kaddr)
{
- return pfn_to_page(pte_pfn(*virt_to_kpte(kaddr)));
+ return phys_to_page(pfn_to_phys(pte_pfn(*virt_to_kpte(kaddr))));
}
EXPORT_SYMBOL(tlb_virt_to_page);
bool
-config FIT_IMAGE_FDT_EPM5
- bool "Include FDT for Mobileye EyeQ5 development platforms"
- depends on MACH_EYEQ5
- default n
- help
- Enable this to include the FDT for the EyeQ5 development platforms
- from Mobileye in the FIT kernel image.
- This requires u-boot on the platform.
-
config MACH_NINTENDO64
bool "Nintendo 64 console"
select CEVT_R4K
endchoice
+config FIT_IMAGE_FDT_EPM5
+ bool "Include FDT for Mobileye EyeQ5 development platforms"
+ depends on MACH_EYEQ5
+ default n
+ help
+ Enable this to include the FDT for the EyeQ5 development platforms
+ from Mobileye in the FIT kernel image.
+ This requires u-boot on the platform.
+
source "arch/mips/alchemy/Kconfig"
source "arch/mips/ath25/Kconfig"
source "arch/mips/ath79/Kconfig"
#define exception_ip(regs) exception_ip(regs)
#define profile_pc(regs) instruction_pointer(regs)
-extern asmlinkage long syscall_trace_enter(struct pt_regs *regs, long syscall);
+extern asmlinkage long syscall_trace_enter(struct pt_regs *regs);
extern asmlinkage void syscall_trace_leave(struct pt_regs *regs);
extern void die(const char *, struct pt_regs *) __noreturn;
OFFSET(TI_CPU, thread_info, cpu);
OFFSET(TI_PRE_COUNT, thread_info, preempt_count);
OFFSET(TI_REGS, thread_info, regs);
+ OFFSET(TI_SYSCALL, thread_info, syscall);
DEFINE(_THREAD_SIZE, THREAD_SIZE);
DEFINE(_THREAD_MASK, THREAD_MASK);
DEFINE(_IRQ_STACK_SIZE, IRQ_STACK_SIZE);
* Notification of system call entry/exit
* - triggered by current->work.syscall_trace
*/
-asmlinkage long syscall_trace_enter(struct pt_regs *regs, long syscall)
+asmlinkage long syscall_trace_enter(struct pt_regs *regs)
{
user_exit();
- current_thread_info()->syscall = syscall;
-
if (test_thread_flag(TIF_SYSCALL_TRACE)) {
if (ptrace_report_syscall_entry(regs))
return -1;
- syscall = current_thread_info()->syscall;
}
#ifdef CONFIG_SECCOMP
struct seccomp_data sd;
unsigned long args[6];
- sd.nr = syscall;
+ sd.nr = current_thread_info()->syscall;
sd.arch = syscall_get_arch(current);
syscall_get_arguments(current, regs, args);
for (i = 0; i < 6; i++)
ret = __secure_computing(&sd);
if (ret == -1)
return ret;
- syscall = current_thread_info()->syscall;
}
#endif
if (unlikely(test_thread_flag(TIF_SYSCALL_TRACEPOINT)))
trace_sys_enter(regs, regs->regs[2]);
- audit_syscall_entry(syscall, regs->regs[4], regs->regs[5],
+ audit_syscall_entry(current_thread_info()->syscall,
+ regs->regs[4], regs->regs[5],
regs->regs[6], regs->regs[7]);
/*
* Negative syscall numbers are mistaken for rejected syscalls, but
* won't have had the return value set appropriately, so we do so now.
*/
- if (syscall < 0)
+ if (current_thread_info()->syscall < 0)
syscall_set_return_value(current, regs, -ENOSYS, 0);
- return syscall;
+ return current_thread_info()->syscall;
}
/*
PTR_WD load_a7, bad_stack_a7
.previous
+ /*
+ * syscall number is in v0 unless we called syscall(__NR_###)
+ * where the real syscall number is in a0
+ */
+ subu t2, v0, __NR_O32_Linux
+ bnez t2, 1f /* __NR_syscall at offset 0 */
+ LONG_S a0, TI_SYSCALL($28) # Save a0 as syscall number
+ b 2f
+1:
+ LONG_S v0, TI_SYSCALL($28) # Save v0 as syscall number
+2:
+
lw t0, TI_FLAGS($28) # syscall tracing enabled?
li t1, _TIF_WORK_SYSCALL_ENTRY
and t0, t1
SAVE_STATIC
move a0, sp
- /*
- * syscall number is in v0 unless we called syscall(__NR_###)
- * where the real syscall number is in a0
- */
- move a1, v0
- subu t2, v0, __NR_O32_Linux
- bnez t2, 1f /* __NR_syscall at offset 0 */
- lw a1, PT_R4(sp)
-
-1: jal syscall_trace_enter
+ jal syscall_trace_enter
bltz v0, 1f # seccomp failed? Skip syscall
sd a3, PT_R26(sp) # save a3 for syscall restarting
+ LONG_S v0, TI_SYSCALL($28) # Store syscall number
+
li t1, _TIF_WORK_SYSCALL_ENTRY
LONG_L t0, TI_FLAGS($28) # syscall tracing enabled?
and t0, t1, t0
n32_syscall_trace_entry:
SAVE_STATIC
move a0, sp
- move a1, v0
jal syscall_trace_enter
bltz v0, 1f # seccomp failed? Skip syscall
sd a3, PT_R26(sp) # save a3 for syscall restarting
+ LONG_S v0, TI_SYSCALL($28) # Store syscall number
+
li t1, _TIF_WORK_SYSCALL_ENTRY
LONG_L t0, TI_FLAGS($28) # syscall tracing enabled?
and t0, t1, t0
syscall_trace_entry:
SAVE_STATIC
move a0, sp
- move a1, v0
jal syscall_trace_enter
bltz v0, 1f # seccomp failed? Skip syscall
PTR_WD load_a7, bad_stack_a7
.previous
+ /*
+ * absolute syscall number is in v0 unless we called syscall(__NR_###)
+ * where the real syscall number is in a0
+ * note: NR_syscall is the first O32 syscall but the macro is
+ * only defined when compiling with -mabi=32 (CONFIG_32BIT)
+ * therefore __NR_O32_Linux is used (4000)
+ */
+
+ subu t2, v0, __NR_O32_Linux
+ bnez t2, 1f /* __NR_syscall at offset 0 */
+ LONG_S a0, TI_SYSCALL($28) # Save a0 as syscall number
+ b 2f
+1:
+ LONG_S v0, TI_SYSCALL($28) # Save v0 as syscall number
+2:
+
li t1, _TIF_WORK_SYSCALL_ENTRY
LONG_L t0, TI_FLAGS($28) # syscall tracing enabled?
and t0, t1, t0
sd a7, PT_R11(sp) # For indirect syscalls
move a0, sp
- /*
- * absolute syscall number is in v0 unless we called syscall(__NR_###)
- * where the real syscall number is in a0
- * note: NR_syscall is the first O32 syscall but the macro is
- * only defined when compiling with -mabi=32 (CONFIG_32BIT)
- * therefore __NR_O32_Linux is used (4000)
- */
- .set push
- .set reorder
- subu t1, v0, __NR_O32_Linux
- move a1, v0
- bnez t1, 1f /* __NR_syscall at offset 0 */
- ld a1, PT_R4(sp) /* Arg1 for __NR_syscall case */
- .set pop
-
-1: jal syscall_trace_enter
+ jal syscall_trace_enter
bltz v0, 1f # seccomp failed? Skip syscall
void __init early_init_devtree(void *params)
{
- __be32 *dtb = (u32 *)__dtb_start;
+ __be32 __maybe_unused *dtb = (u32 *)__dtb_start;
+
#if defined(CONFIG_NIOS2_DTB_AT_PHYS_ADDR)
if (be32_to_cpup((__be32 *)CONFIG_NIOS2_DTB_PHYS_ADDR) ==
OF_DT_HEADER) {
return;
}
#endif
+
+#ifdef CONFIG_NIOS2_DTB_SOURCE_BOOL
if (be32_to_cpu((__be32) *dtb) == OF_DT_HEADER)
params = (void *)__dtb_start;
+#endif
early_init_dt_scan(params);
}
--- /dev/null
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __ASM_MMAN_H__
+#define __ASM_MMAN_H__
+
+#include <uapi/asm/mman.h>
+
+/* PARISC cannot allow mdwe as it needs writable stacks */
+static inline bool arch_memory_deny_write_exec_supported(void)
+{
+ return false;
+}
+#define arch_memory_deny_write_exec_supported arch_memory_deny_write_exec_supported
+
+#endif /* __ASM_MMAN_H__ */
static int __init chacha_p10_init(void)
{
+ if (!cpu_has_feature(CPU_FTR_ARCH_31))
+ return 0;
+
static_branch_enable(&have_p10);
return crypto_register_skciphers(algs, ARRAY_SIZE(algs));
static void __exit chacha_p10_exit(void)
{
+ if (!static_branch_likely(&have_p10))
+ return;
+
crypto_unregister_skciphers(algs, ARRAY_SIZE(algs));
}
-module_cpu_feature_match(PPC_MODULE_FEATURE_P10, chacha_p10_init);
+module_init(chacha_p10_init);
module_exit(chacha_p10_exit);
MODULE_DESCRIPTION("ChaCha and XChaCha stream ciphers (P10 accelerated)");
#ifndef __ASSEMBLY__
-#include <asm/page.h>
#include <asm/vdso/timebase.h>
#include <asm/barrier.h>
#include <asm/unistd.h>
static __always_inline
const struct vdso_data *__arch_get_timens_vdso_data(const struct vdso_data *vd)
{
- return (void *)vd + PAGE_SIZE;
+ return (void *)vd + (1U << CONFIG_PAGE_SHIFT);
}
#endif
struct device *dev)
{
struct iommu_domain *domain = iommu_get_domain_for_dev(dev);
- struct iommu_group *grp = iommu_group_get(dev);
struct iommu_table_group *table_group;
+ struct iommu_group *grp;
/* At first attach the ownership is already set */
- if (!domain) {
- iommu_group_put(grp);
+ if (!domain)
return 0;
- }
+ grp = iommu_group_get(dev);
table_group = iommu_group_get_iommudata(grp);
/*
* The domain being set to PLATFORM from earlier
endif
vdso-install-y += arch/riscv/kernel/vdso/vdso.so.dbg
-vdso-install-$(CONFIG_COMPAT) += arch/riscv/kernel/compat_vdso/compat_vdso.so.dbg:../compat_vdso/compat_vdso.so
+vdso-install-$(CONFIG_COMPAT) += arch/riscv/kernel/compat_vdso/compat_vdso.so.dbg
ifneq ($(CONFIG_XIP_KERNEL),y)
ifeq ($(CONFIG_RISCV_M_MODE)$(CONFIG_ARCH_CANAAN),yy)
return ptep_test_and_clear_young(vma, address, ptep);
}
+#define pgprot_nx pgprot_nx
+static inline pgprot_t pgprot_nx(pgprot_t _prot)
+{
+ return __pgprot(pgprot_val(_prot) & ~_PAGE_EXEC);
+}
+
#define pgprot_noncached pgprot_noncached
static inline pgprot_t pgprot_noncached(pgprot_t _prot)
{
ulong) \
__attribute__((alias(__stringify(___se_##prefix##name)))); \
__diag_pop(); \
- static long noinline ___se_##prefix##name(__MAP(x,__SC_LONG,__VA_ARGS__)); \
+ static long noinline ___se_##prefix##name(__MAP(x,__SC_LONG,__VA_ARGS__)) \
+ __used; \
static long ___se_##prefix##name(__MAP(x,__SC_LONG,__VA_ARGS__))
#define SC_RISCV_REGS_TO_ARGS(x, ...) \
#define __get_kernel_nofault(dst, src, type, err_label) \
do { \
- long __kr_err; \
+ long __kr_err = 0; \
\
__get_user_nocheck(*((type *)(dst)), (type *)(src), __kr_err); \
if (unlikely(__kr_err)) \
#define __put_kernel_nofault(dst, src, type, err_label) \
do { \
- long __kr_err; \
+ long __kr_err = 0; \
\
__put_user_nocheck(*((type *)(src)), (type *)(dst), __kr_err); \
if (unlikely(__kr_err)) \
#define AT_L3_CACHEGEOMETRY 47
/* entries in ARCH_DLINFO */
-#define AT_VECTOR_SIZE_ARCH 9
+#define AT_VECTOR_SIZE_ARCH 10
#define AT_MINSIGSTKSZ 51
#endif /* _UAPI_ASM_RISCV_AUXVEC_H */
rm $@.tmp
# actual build commands
-quiet_cmd_compat_vdsoas = VDSOAS $@
+quiet_cmd_compat_vdsoas = VDSOAS $@
cmd_compat_vdsoas = $(COMPAT_CC) $(a_flags) $(COMPAT_CC_FLAGS) -c -o $@ $<
*/
lockdep_assert_held(&text_mutex);
+ preempt_disable();
+
if (across_pages)
patch_map(addr + PAGE_SIZE, FIX_TEXT_POKE1);
if (across_pages)
patch_unmap(FIX_TEXT_POKE1);
+ preempt_enable();
+
return 0;
}
NOKPROBE_SYMBOL(__patch_insn_set);
if (!riscv_patch_in_stop_machine)
lockdep_assert_held(&text_mutex);
+ preempt_disable();
+
if (across_pages)
patch_map(addr + PAGE_SIZE, FIX_TEXT_POKE1);
if (across_pages)
patch_unmap(FIX_TEXT_POKE1);
+ preempt_enable();
+
return ret;
}
NOKPROBE_SYMBOL(__patch_insn_write);
#include <asm/vector.h>
#include <asm/cpufeature.h>
-register unsigned long gp_in_global __asm__("gp");
-
#if defined(CONFIG_STACKPROTECTOR) && !defined(CONFIG_STACKPROTECTOR_PER_TASK)
#include <linux/stackprotector.h>
unsigned long __stack_chk_guard __read_mostly;
extern asmlinkage void ret_from_fork(void);
-void arch_cpu_idle(void)
+void noinstr arch_cpu_idle(void)
{
cpu_do_idle();
}
if (unlikely(args->fn)) {
/* Kernel thread */
memset(childregs, 0, sizeof(struct pt_regs));
- childregs->gp = gp_in_global;
/* Supervisor/Machine, irqs on: */
childregs->status = SR_PP | SR_PIE;
struct __sc_riscv_v_state __user *state = sc_vec;
void __user *datap;
+ /*
+ * Mark the vstate as clean prior performing the actual copy,
+ * to avoid getting the vstate incorrectly clobbered by the
+ * discarded vector state.
+ */
+ riscv_v_vstate_set_restore(current, regs);
+
/* Copy everything of __sc_riscv_v_state except datap. */
err = __copy_from_user(¤t->thread.vstate, &state->v_state,
offsetof(struct __riscv_v_ext_state, datap));
* Copy the whole vector content from user space datap. Use
* copy_from_user to prevent information leak.
*/
- err = copy_from_user(current->thread.vstate.datap, datap, riscv_v_vsize);
- if (unlikely(err))
- return err;
-
- riscv_v_vstate_set_restore(current, regs);
-
- return err;
+ return copy_from_user(current->thread.vstate.datap, datap, riscv_v_vsize);
}
#else
#define save_v_state(task, regs) (0)
print_vma_addr(KERN_CONT " in ", instruction_pointer(regs));
pr_cont("\n");
__show_regs(regs);
- dump_instr(KERN_EMERG, regs);
+ dump_instr(KERN_INFO, regs);
}
force_sig_fault(signo, code, (void __user *)addr);
# Disable -pg to prevent insert call site
CFLAGS_REMOVE_vgettimeofday.o = $(CC_FLAGS_FTRACE) $(CC_FLAGS_SCS)
+CFLAGS_REMOVE_hwprobe.o = $(CC_FLAGS_FTRACE) $(CC_FLAGS_SCS)
# Disable profiling and instrumentation for VDSO code
GCOV_PROFILE := n
raw_spin_lock_irqsave(&irqd->lock, flags);
sm = irqd->sourcecfg & APLIC_SOURCECFG_SM_MASK;
- if (!pending &&
- ((sm == APLIC_SOURCECFG_SM_LEVEL_HIGH) ||
- (sm == APLIC_SOURCECFG_SM_LEVEL_LOW)))
+ if (sm == APLIC_SOURCECFG_SM_INACTIVE)
goto skip_write_pending;
+ if (sm == APLIC_SOURCECFG_SM_LEVEL_HIGH ||
+ sm == APLIC_SOURCECFG_SM_LEVEL_LOW) {
+ if (!pending)
+ goto skip_write_pending;
+ if ((irqd->state & APLIC_IRQ_STATE_INPUT) &&
+ sm == APLIC_SOURCECFG_SM_LEVEL_LOW)
+ goto skip_write_pending;
+ if (!(irqd->state & APLIC_IRQ_STATE_INPUT) &&
+ sm == APLIC_SOURCECFG_SM_LEVEL_HIGH)
+ goto skip_write_pending;
+ }
+
if (pending)
irqd->state |= APLIC_IRQ_STATE_PENDING;
else
static bool aplic_read_input(struct aplic *aplic, u32 irq)
{
- bool ret;
- unsigned long flags;
+ u32 sourcecfg, sm, raw_input, irq_inverted;
struct aplic_irq *irqd;
+ unsigned long flags;
+ bool ret = false;
if (!irq || aplic->nr_irqs <= irq)
return false;
irqd = &aplic->irqs[irq];
raw_spin_lock_irqsave(&irqd->lock, flags);
- ret = (irqd->state & APLIC_IRQ_STATE_INPUT) ? true : false;
+
+ sourcecfg = irqd->sourcecfg;
+ if (sourcecfg & APLIC_SOURCECFG_D)
+ goto skip;
+
+ sm = sourcecfg & APLIC_SOURCECFG_SM_MASK;
+ if (sm == APLIC_SOURCECFG_SM_INACTIVE)
+ goto skip;
+
+ raw_input = (irqd->state & APLIC_IRQ_STATE_INPUT) ? 1 : 0;
+ irq_inverted = (sm == APLIC_SOURCECFG_SM_LEVEL_LOW ||
+ sm == APLIC_SOURCECFG_SM_EDGE_FALL) ? 1 : 0;
+ ret = !!(raw_input ^ irq_inverted);
+
+skip:
raw_spin_unlock_irqrestore(&irqd->lock, flags);
return ret;
static inline unsigned long num_isa_ext_regs(const struct kvm_vcpu *vcpu)
{
- return copy_isa_ext_reg_indices(vcpu, NULL);;
+ return copy_isa_ext_reg_indices(vcpu, NULL);
}
static int copy_sbi_ext_reg_indices(struct kvm_vcpu *vcpu, u64 __user *uindices)
local_flush_tlb_range_asid(d->start, d->size, d->stride, d->asid);
}
-static void __flush_tlb_range(struct cpumask *cmask, unsigned long asid,
+static void __flush_tlb_range(const struct cpumask *cmask, unsigned long asid,
unsigned long start, unsigned long size,
unsigned long stride)
{
void flush_tlb_kernel_range(unsigned long start, unsigned long end)
{
- __flush_tlb_range((struct cpumask *)cpu_online_mask, FLUSH_TLB_NO_ASID,
+ __flush_tlb_range(cpu_online_mask, FLUSH_TLB_NO_ASID,
start, end - start, PAGE_SIZE);
}
if (ret < 0)
return ret;
+ if (insn->src_reg == BPF_PSEUDO_KFUNC_CALL) {
+ const struct btf_func_model *fm;
+ int idx;
+
+ fm = bpf_jit_find_kfunc_model(ctx->prog, insn);
+ if (!fm)
+ return -EINVAL;
+
+ for (idx = 0; idx < fm->nr_args; idx++) {
+ u8 reg = bpf_to_rv_reg(BPF_REG_1 + idx, ctx);
+
+ if (fm->arg_size[idx] == sizeof(int))
+ emit_sextw(reg, reg, ctx);
+ }
+ }
+
ret = emit_call(addr, fixed_addr, ctx);
if (ret)
return ret;
#include <asm/barrier.h>
#include <asm/cmpxchg.h>
-static inline int arch_atomic_read(const atomic_t *v)
+static __always_inline int arch_atomic_read(const atomic_t *v)
{
return __atomic_read(v);
}
#define arch_atomic_read arch_atomic_read
-static inline void arch_atomic_set(atomic_t *v, int i)
+static __always_inline void arch_atomic_set(atomic_t *v, int i)
{
__atomic_set(v, i);
}
#define arch_atomic_set arch_atomic_set
-static inline int arch_atomic_add_return(int i, atomic_t *v)
+static __always_inline int arch_atomic_add_return(int i, atomic_t *v)
{
return __atomic_add_barrier(i, &v->counter) + i;
}
#define arch_atomic_add_return arch_atomic_add_return
-static inline int arch_atomic_fetch_add(int i, atomic_t *v)
+static __always_inline int arch_atomic_fetch_add(int i, atomic_t *v)
{
return __atomic_add_barrier(i, &v->counter);
}
#define arch_atomic_fetch_add arch_atomic_fetch_add
-static inline void arch_atomic_add(int i, atomic_t *v)
+static __always_inline void arch_atomic_add(int i, atomic_t *v)
{
__atomic_add(i, &v->counter);
}
#define arch_atomic_fetch_sub(_i, _v) arch_atomic_fetch_add(-(int)(_i), _v)
#define ATOMIC_OPS(op) \
-static inline void arch_atomic_##op(int i, atomic_t *v) \
+static __always_inline void arch_atomic_##op(int i, atomic_t *v) \
{ \
__atomic_##op(i, &v->counter); \
} \
-static inline int arch_atomic_fetch_##op(int i, atomic_t *v) \
+static __always_inline int arch_atomic_fetch_##op(int i, atomic_t *v) \
{ \
return __atomic_##op##_barrier(i, &v->counter); \
}
#define arch_atomic_xchg(v, new) (arch_xchg(&((v)->counter), new))
-static inline int arch_atomic_cmpxchg(atomic_t *v, int old, int new)
+static __always_inline int arch_atomic_cmpxchg(atomic_t *v, int old, int new)
{
return __atomic_cmpxchg(&v->counter, old, new);
}
#define ATOMIC64_INIT(i) { (i) }
-static inline s64 arch_atomic64_read(const atomic64_t *v)
+static __always_inline s64 arch_atomic64_read(const atomic64_t *v)
{
return __atomic64_read(v);
}
#define arch_atomic64_read arch_atomic64_read
-static inline void arch_atomic64_set(atomic64_t *v, s64 i)
+static __always_inline void arch_atomic64_set(atomic64_t *v, s64 i)
{
__atomic64_set(v, i);
}
#define arch_atomic64_set arch_atomic64_set
-static inline s64 arch_atomic64_add_return(s64 i, atomic64_t *v)
+static __always_inline s64 arch_atomic64_add_return(s64 i, atomic64_t *v)
{
return __atomic64_add_barrier(i, (long *)&v->counter) + i;
}
#define arch_atomic64_add_return arch_atomic64_add_return
-static inline s64 arch_atomic64_fetch_add(s64 i, atomic64_t *v)
+static __always_inline s64 arch_atomic64_fetch_add(s64 i, atomic64_t *v)
{
return __atomic64_add_barrier(i, (long *)&v->counter);
}
#define arch_atomic64_fetch_add arch_atomic64_fetch_add
-static inline void arch_atomic64_add(s64 i, atomic64_t *v)
+static __always_inline void arch_atomic64_add(s64 i, atomic64_t *v)
{
__atomic64_add(i, (long *)&v->counter);
}
#define arch_atomic64_xchg(v, new) (arch_xchg(&((v)->counter), new))
-static inline s64 arch_atomic64_cmpxchg(atomic64_t *v, s64 old, s64 new)
+static __always_inline s64 arch_atomic64_cmpxchg(atomic64_t *v, s64 old, s64 new)
{
return __atomic64_cmpxchg((long *)&v->counter, old, new);
}
#define arch_atomic64_cmpxchg arch_atomic64_cmpxchg
-#define ATOMIC64_OPS(op) \
-static inline void arch_atomic64_##op(s64 i, atomic64_t *v) \
-{ \
- __atomic64_##op(i, (long *)&v->counter); \
-} \
-static inline long arch_atomic64_fetch_##op(s64 i, atomic64_t *v) \
-{ \
- return __atomic64_##op##_barrier(i, (long *)&v->counter); \
+#define ATOMIC64_OPS(op) \
+static __always_inline void arch_atomic64_##op(s64 i, atomic64_t *v) \
+{ \
+ __atomic64_##op(i, (long *)&v->counter); \
+} \
+static __always_inline long arch_atomic64_fetch_##op(s64 i, atomic64_t *v) \
+{ \
+ return __atomic64_##op##_barrier(i, (long *)&v->counter); \
}
ATOMIC64_OPS(and)
#ifndef __ARCH_S390_ATOMIC_OPS__
#define __ARCH_S390_ATOMIC_OPS__
-static inline int __atomic_read(const atomic_t *v)
+static __always_inline int __atomic_read(const atomic_t *v)
{
int c;
return c;
}
-static inline void __atomic_set(atomic_t *v, int i)
+static __always_inline void __atomic_set(atomic_t *v, int i)
{
asm volatile(
" st %1,%0\n"
: "=R" (v->counter) : "d" (i));
}
-static inline s64 __atomic64_read(const atomic64_t *v)
+static __always_inline s64 __atomic64_read(const atomic64_t *v)
{
s64 c;
return c;
}
-static inline void __atomic64_set(atomic64_t *v, s64 i)
+static __always_inline void __atomic64_set(atomic64_t *v, s64 i)
{
asm volatile(
" stg %1,%0\n"
#ifdef CONFIG_HAVE_MARCH_Z196_FEATURES
#define __ATOMIC_OP(op_name, op_type, op_string, op_barrier) \
-static inline op_type op_name(op_type val, op_type *ptr) \
+static __always_inline op_type op_name(op_type val, op_type *ptr) \
{ \
op_type old; \
\
#else /* CONFIG_HAVE_MARCH_Z196_FEATURES */
#define __ATOMIC_OP(op_name, op_string) \
-static inline int op_name(int val, int *ptr) \
+static __always_inline int op_name(int val, int *ptr) \
{ \
int old, new; \
\
#undef __ATOMIC_OPS
#define __ATOMIC64_OP(op_name, op_string) \
-static inline long op_name(long val, long *ptr) \
+static __always_inline long op_name(long val, long *ptr) \
{ \
long old, new; \
\
#endif /* CONFIG_HAVE_MARCH_Z196_FEATURES */
-static inline int __atomic_cmpxchg(int *ptr, int old, int new)
+static __always_inline int __atomic_cmpxchg(int *ptr, int old, int new)
{
asm volatile(
" cs %[old],%[new],%[ptr]"
return old;
}
-static inline bool __atomic_cmpxchg_bool(int *ptr, int old, int new)
+static __always_inline bool __atomic_cmpxchg_bool(int *ptr, int old, int new)
{
int old_expected = old;
return old == old_expected;
}
-static inline long __atomic64_cmpxchg(long *ptr, long old, long new)
+static __always_inline long __atomic64_cmpxchg(long *ptr, long old, long new)
{
asm volatile(
" csg %[old],%[new],%[ptr]"
return old;
}
-static inline bool __atomic64_cmpxchg_bool(long *ptr, long old, long new)
+static __always_inline bool __atomic64_cmpxchg_bool(long *ptr, long old, long new)
{
long old_expected = old;
#define PREEMPT_NEED_RESCHED 0x80000000
#define PREEMPT_ENABLED (0 + PREEMPT_NEED_RESCHED)
-static inline int preempt_count(void)
+static __always_inline int preempt_count(void)
{
return READ_ONCE(S390_lowcore.preempt_count) & ~PREEMPT_NEED_RESCHED;
}
-static inline void preempt_count_set(int pc)
+static __always_inline void preempt_count_set(int pc)
{
int old, new;
old, new) != old);
}
-static inline void set_preempt_need_resched(void)
+static __always_inline void set_preempt_need_resched(void)
{
__atomic_and(~PREEMPT_NEED_RESCHED, &S390_lowcore.preempt_count);
}
-static inline void clear_preempt_need_resched(void)
+static __always_inline void clear_preempt_need_resched(void)
{
__atomic_or(PREEMPT_NEED_RESCHED, &S390_lowcore.preempt_count);
}
-static inline bool test_preempt_need_resched(void)
+static __always_inline bool test_preempt_need_resched(void)
{
return !(READ_ONCE(S390_lowcore.preempt_count) & PREEMPT_NEED_RESCHED);
}
-static inline void __preempt_count_add(int val)
+static __always_inline void __preempt_count_add(int val)
{
/*
* With some obscure config options and CONFIG_PROFILE_ALL_BRANCHES
__atomic_add(val, &S390_lowcore.preempt_count);
}
-static inline void __preempt_count_sub(int val)
+static __always_inline void __preempt_count_sub(int val)
{
__preempt_count_add(-val);
}
-static inline bool __preempt_count_dec_and_test(void)
+static __always_inline bool __preempt_count_dec_and_test(void)
{
return __atomic_add(-1, &S390_lowcore.preempt_count) == 1;
}
-static inline bool should_resched(int preempt_offset)
+static __always_inline bool should_resched(int preempt_offset)
{
return unlikely(READ_ONCE(S390_lowcore.preempt_count) ==
preempt_offset);
#define PREEMPT_ENABLED (0)
-static inline int preempt_count(void)
+static __always_inline int preempt_count(void)
{
return READ_ONCE(S390_lowcore.preempt_count);
}
-static inline void preempt_count_set(int pc)
+static __always_inline void preempt_count_set(int pc)
{
S390_lowcore.preempt_count = pc;
}
-static inline void set_preempt_need_resched(void)
+static __always_inline void set_preempt_need_resched(void)
{
}
-static inline void clear_preempt_need_resched(void)
+static __always_inline void clear_preempt_need_resched(void)
{
}
-static inline bool test_preempt_need_resched(void)
+static __always_inline bool test_preempt_need_resched(void)
{
return false;
}
-static inline void __preempt_count_add(int val)
+static __always_inline void __preempt_count_add(int val)
{
S390_lowcore.preempt_count += val;
}
-static inline void __preempt_count_sub(int val)
+static __always_inline void __preempt_count_sub(int val)
{
S390_lowcore.preempt_count -= val;
}
-static inline bool __preempt_count_dec_and_test(void)
+static __always_inline bool __preempt_count_dec_and_test(void)
{
return !--S390_lowcore.preempt_count && tif_need_resched();
}
-static inline bool should_resched(int preempt_offset)
+static __always_inline bool should_resched(int preempt_offset)
{
return unlikely(preempt_count() == preempt_offset &&
tif_need_resched());
mvc __PT_LAST_BREAK(8,%r11),__LC_PGM_LAST_BREAK
stctg %c1,%c1,__PT_CR1(%r11)
#if IS_ENABLED(CONFIG_KVM)
- lg %r12,__LC_GMAP
+ ltg %r12,__LC_GMAP
+ jz 5f
clc __GMAP_ASCE(8,%r12), __PT_CR1(%r11)
jne 5f
BPENTER __SF_SIE_FLAGS(%r10),_TIF_ISOLATE_BP_GUEST
SYM_DATA_END(daton_psw)
.section .rodata, "a"
+ .balign 8
#define SYSCALL(esame,emu) .quad __s390x_ ## esame
SYM_DATA_START(sys_call_table)
#include "asm/syscall_table.h"
event->cpu);
struct paicrypt_map *cpump = mp->mapptr;
- cpump->event = NULL;
static_branch_dec(&pai_key);
mutex_lock(&pai_reserve_mutex);
debug_sprintf_event(cfm_dbg, 5, "%s event %#llx cpu %d users %d"
static void paicrypt_stop(struct perf_event *event, int flags)
{
- if (!event->attr.sample_period) /* Counting */
+ struct paicrypt_mapptr *mp = this_cpu_ptr(paicrypt_root.mapptr);
+ struct paicrypt_map *cpump = mp->mapptr;
+
+ if (!event->attr.sample_period) { /* Counting */
paicrypt_read(event);
- else /* Sampling */
+ } else { /* Sampling */
perf_sched_cb_dec(event->pmu);
+ cpump->event = NULL;
+ }
event->hw.state = PERF_HES_STOPPED;
}
free_page(PAI_SAVE_AREA(event));
mutex_lock(&paiext_reserve_mutex);
- cpump->event = NULL;
if (refcount_dec_and_test(&cpump->refcnt)) /* Last reference gone */
paiext_free(mp);
paiext_root_free();
static void paiext_stop(struct perf_event *event, int flags)
{
- if (!event->attr.sample_period) /* Counting */
+ struct paiext_mapptr *mp = this_cpu_ptr(paiext_root.mapptr);
+ struct paiext_map *cpump = mp->mapptr;
+
+ if (!event->attr.sample_period) { /* Counting */
paiext_read(event);
- else /* Sampling */
+ } else { /* Sampling */
perf_sched_cb_dec(event->pmu);
+ cpump->event = NULL;
+ }
event->hw.state = PERF_HES_STOPPED;
}
if (!IS_ENABLED(CONFIG_PGSTE))
return KERNEL_FAULT;
gmap = (struct gmap *)S390_lowcore.gmap;
- if (regs->cr1 == gmap->asce)
+ if (gmap && gmap->asce == regs->cr1)
return GMAP_FAULT;
return KERNEL_FAULT;
}
* PLT for hotpatchable calls. The calling convention is the same as for the
* ftrace hotpatch trampolines: %r0 is return address, %r1 is clobbered.
*/
-extern const char bpf_plt[];
-extern const char bpf_plt_ret[];
-extern const char bpf_plt_target[];
-extern const char bpf_plt_end[];
-#define BPF_PLT_SIZE 32
+struct bpf_plt {
+ char code[16];
+ void *ret;
+ void *target;
+} __packed;
+extern const struct bpf_plt bpf_plt;
asm(
".pushsection .rodata\n"
" .balign 8\n"
" .balign 8\n"
"bpf_plt_ret: .quad 0\n"
"bpf_plt_target: .quad 0\n"
- "bpf_plt_end:\n"
" .popsection\n"
);
-static void bpf_jit_plt(void *plt, void *ret, void *target)
+static void bpf_jit_plt(struct bpf_plt *plt, void *ret, void *target)
{
- memcpy(plt, bpf_plt, BPF_PLT_SIZE);
- *(void **)((char *)plt + (bpf_plt_ret - bpf_plt)) = ret;
- *(void **)((char *)plt + (bpf_plt_target - bpf_plt)) = target ?: ret;
+ memcpy(plt, &bpf_plt, sizeof(*plt));
+ plt->ret = ret;
+ plt->target = target;
}
/*
jit->prg = ALIGN(jit->prg, 8);
jit->prologue_plt = jit->prg;
if (jit->prg_buf)
- bpf_jit_plt(jit->prg_buf + jit->prg,
+ bpf_jit_plt((struct bpf_plt *)(jit->prg_buf + jit->prg),
jit->prg_buf + jit->prologue_plt_ret, NULL);
- jit->prg += BPF_PLT_SIZE;
+ jit->prg += sizeof(struct bpf_plt);
}
static int get_probe_mem_regno(const u8 *insn)
struct bpf_jit jit;
int pass;
- if (WARN_ON_ONCE(bpf_plt_end - bpf_plt != BPF_PLT_SIZE))
- return orig_fp;
-
if (!fp->jit_requested)
return orig_fp;
int bpf_arch_text_poke(void *ip, enum bpf_text_poke_type t,
void *old_addr, void *new_addr)
{
+ struct bpf_plt expected_plt, current_plt, new_plt, *plt;
struct {
u16 opc;
s32 disp;
} __packed insn;
- char expected_plt[BPF_PLT_SIZE];
- char current_plt[BPF_PLT_SIZE];
- char new_plt[BPF_PLT_SIZE];
- char *plt;
char *ret;
int err;
*/
} else {
/* Verify the PLT. */
- plt = (char *)ip + (insn.disp << 1);
- err = copy_from_kernel_nofault(current_plt, plt, BPF_PLT_SIZE);
+ plt = ip + (insn.disp << 1);
+ err = copy_from_kernel_nofault(¤t_plt, plt,
+ sizeof(current_plt));
if (err < 0)
return err;
ret = (char *)ip + 6;
- bpf_jit_plt(expected_plt, ret, old_addr);
- if (memcmp(current_plt, expected_plt, BPF_PLT_SIZE))
+ bpf_jit_plt(&expected_plt, ret, old_addr);
+ if (memcmp(¤t_plt, &expected_plt, sizeof(current_plt)))
return -EINVAL;
/* Adjust the call address. */
- bpf_jit_plt(new_plt, ret, new_addr);
- s390_kernel_write(plt + (bpf_plt_target - bpf_plt),
- new_plt + (bpf_plt_target - bpf_plt),
+ bpf_jit_plt(&new_plt, ret, new_addr);
+ s390_kernel_write(&plt->target, &new_plt.target,
sizeof(void *));
}
obj-$(CONFIG_KEXEC_FILE) += purgatory/
-obj-y += virt/svm/
+obj-y += virt/
# for cleaning
subdir- += boot tools
# with named address spaces - see GCC PR sanitizer/111736.
#
depends on !KASAN
+ # -fsanitize=thread (KCSAN) is also incompatible.
+ depends on !KCSAN
config CC_HAS_SLS
def_bool $(cc-option,-mharden-sls=all)
stored in floating point, vector and integer registers.
See also <file:Documentation/admin-guide/hw-vuln/reg-file-data-sampling.rst>
+config MITIGATION_SPECTRE_BHI
+ bool "Mitigate Spectre-BHB (Branch History Injection)"
+ depends on CPU_SUP_INTEL
+ default y
+ help
+ Enable BHI mitigations. BHI attacks are a form of Spectre V2 attacks
+ where the branch history buffer is poisoned to speculatively steer
+ indirect branches.
+ See <file:Documentation/admin-guide/hw-vuln/spectre.rst>
+
endif
config ARCH_HAS_ADD_PAGES
libs-y += arch/x86/lib/
-core-y += arch/x86/virt/
-
# drivers-y are linked after core-y
drivers-$(CONFIG_MATH_EMULATION) += arch/x86/math-emu/
drivers-$(CONFIG_PCI) += arch/x86/pci/
*/
#include <linux/linkage.h>
+#include <asm/asm-offsets.h>
#include <asm/msr.h>
#include <asm/page_types.h>
#include <asm/processor-flags.h>
#include <asm/segment.h>
+#include <asm/setup.h>
.code64
.text
SYM_FUNC_START(efi32_stub_entry)
call 1f
1: popl %ecx
+ leal (efi32_boot_args - 1b)(%ecx), %ebx
/* Clear BSS */
xorl %eax, %eax
popl %ecx
popl %edx
popl %esi
+ movl %esi, 8(%ebx)
jmp efi32_entry
SYM_FUNC_END(efi32_stub_entry)
#endif
*
* Arguments: %ecx image handle
* %edx EFI system table pointer
- * %esi struct bootparams pointer (or NULL when not using
- * the EFI handover protocol)
*
* Since this is the point of no return for ordinary execution, no registers
* are considered live except for the function parameters. [Note that the EFI
leal (efi32_boot_args - 1b)(%ebx), %ebx
movl %ecx, 0(%ebx)
movl %edx, 4(%ebx)
- movl %esi, 8(%ebx)
movb $0x0, 12(%ebx) // efi_is64
+ /*
+ * Allocate some memory for a temporary struct boot_params, which only
+ * needs the minimal pieces that startup_32() relies on.
+ */
+ subl $PARAM_SIZE, %esp
+ movl %esp, %esi
+ movl $PAGE_SIZE, BP_kernel_alignment(%esi)
+ movl $_end - 1b, BP_init_size(%esi)
+ subl $startup_32 - 1b, BP_init_size(%esi)
+
/* Disable paging */
movl %cr0, %eax
btrl $X86_CR0_PG_BIT, %eax
movl 8(%ebp), %ecx // image_handle
movl 12(%ebp), %edx // sys_table
- xorl %esi, %esi
- jmp efi32_entry // pass %ecx, %edx, %esi
+ jmp efi32_entry // pass %ecx, %edx
// no other registers remain live
2: popl %edi // restore callee-save registers
* Confidential Computing Platform Capability checks
*
* Copyright (C) 2021 Advanced Micro Devices, Inc.
+ * Copyright (C) 2024 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved.
*
* Author: Tom Lendacky <thomas.lendacky@amd.com>
*/
#include <linux/export.h>
#include <linux/cc_platform.h>
+#include <linux/string.h>
+#include <linux/random.h>
+#include <asm/archrandom.h>
#include <asm/coco.h>
#include <asm/processor.h>
enum cc_vendor cc_vendor __ro_after_init = CC_VENDOR_NONE;
u64 cc_mask __ro_after_init;
+static struct cc_attr_flags {
+ __u64 host_sev_snp : 1,
+ __resv : 63;
+} cc_flags;
+
static bool noinstr intel_cc_platform_has(enum cc_attr attr)
{
switch (attr) {
case CC_ATTR_GUEST_SEV_SNP:
return sev_status & MSR_AMD64_SEV_SNP_ENABLED;
+ case CC_ATTR_HOST_SEV_SNP:
+ return cc_flags.host_sev_snp;
+
default:
return false;
}
}
}
EXPORT_SYMBOL_GPL(cc_mkdec);
+
+static void amd_cc_platform_clear(enum cc_attr attr)
+{
+ switch (attr) {
+ case CC_ATTR_HOST_SEV_SNP:
+ cc_flags.host_sev_snp = 0;
+ break;
+ default:
+ break;
+ }
+}
+
+void cc_platform_clear(enum cc_attr attr)
+{
+ switch (cc_vendor) {
+ case CC_VENDOR_AMD:
+ amd_cc_platform_clear(attr);
+ break;
+ default:
+ break;
+ }
+}
+
+static void amd_cc_platform_set(enum cc_attr attr)
+{
+ switch (attr) {
+ case CC_ATTR_HOST_SEV_SNP:
+ cc_flags.host_sev_snp = 1;
+ break;
+ default:
+ break;
+ }
+}
+
+void cc_platform_set(enum cc_attr attr)
+{
+ switch (cc_vendor) {
+ case CC_VENDOR_AMD:
+ amd_cc_platform_set(attr);
+ break;
+ default:
+ break;
+ }
+}
+
+__init void cc_random_init(void)
+{
+ /*
+ * The seed is 32 bytes (in units of longs), which is 256 bits, which
+ * is the security level that the RNG is targeting.
+ */
+ unsigned long rng_seed[32 / sizeof(long)];
+ size_t i, longs;
+
+ if (!cc_platform_has(CC_ATTR_GUEST_MEM_ENCRYPT))
+ return;
+
+ /*
+ * Since the CoCo threat model includes the host, the only reliable
+ * source of entropy that can be neither observed nor manipulated is
+ * RDRAND. Usually, RDRAND failure is considered tolerable, but since
+ * CoCo guests have no other unobservable source of entropy, it's
+ * important to at least ensure the RNG gets some initial random seeds.
+ */
+ for (i = 0; i < ARRAY_SIZE(rng_seed); i += longs) {
+ longs = arch_get_random_longs(&rng_seed[i], ARRAY_SIZE(rng_seed) - i);
+
+ /*
+ * A zero return value means that the guest doesn't have RDRAND
+ * or the CPU is physically broken, and in both cases that
+ * means most crypto inside of the CoCo instance will be
+ * broken, defeating the purpose of CoCo in the first place. So
+ * just panic here because it's absolutely unsafe to continue
+ * executing.
+ */
+ if (longs == 0)
+ panic("RDRAND is defective.");
+ }
+ add_device_randomness(rng_seed, sizeof(rng_seed));
+ memzero_explicit(rng_seed, sizeof(rng_seed));
+}
if (likely(unr < NR_syscalls)) {
unr = array_index_nospec(unr, NR_syscalls);
- regs->ax = sys_call_table[unr](regs);
+ regs->ax = x64_sys_call(regs, unr);
return true;
}
return false;
if (IS_ENABLED(CONFIG_X86_X32_ABI) && likely(xnr < X32_NR_syscalls)) {
xnr = array_index_nospec(xnr, X32_NR_syscalls);
- regs->ax = x32_sys_call_table[xnr](regs);
+ regs->ax = x32_sys_call(regs, xnr);
return true;
}
return false;
if (likely(unr < IA32_NR_syscalls)) {
unr = array_index_nospec(unr, IA32_NR_syscalls);
- regs->ax = ia32_sys_call_table[unr](regs);
+ regs->ax = ia32_sys_call(regs, unr);
} else if (nr != -1) {
regs->ax = __ia32_sys_ni_syscall(regs);
}
}
/**
- * int80_emulation - 32-bit legacy syscall entry
+ * do_int80_emulation - 32-bit legacy syscall C entry from asm
*
* This entry point can be used by 32-bit and 64-bit programs to perform
* 32-bit system calls. Instances of INT $0x80 can be found inline in
* eax: system call number
* ebx, ecx, edx, esi, edi, ebp: arg1 - arg 6
*/
-DEFINE_IDTENTRY_RAW(int80_emulation)
+__visible noinstr void do_int80_emulation(struct pt_regs *regs)
{
int nr;
instrumentation_end();
syscall_exit_to_user_mode(regs);
}
+
+#ifdef CONFIG_X86_FRED
+/*
+ * A FRED-specific INT80 handler is warranted for the follwing reasons:
+ *
+ * 1) As INT instructions and hardware interrupts are separate event
+ * types, FRED does not preclude the use of vector 0x80 for external
+ * interrupts. As a result, the FRED setup code does not reserve
+ * vector 0x80 and calling int80_is_external() is not merely
+ * suboptimal but actively incorrect: it could cause a system call
+ * to be incorrectly ignored.
+ *
+ * 2) It is called only for handling vector 0x80 of event type
+ * EVENT_TYPE_SWINT and will never be called to handle any external
+ * interrupt (event type EVENT_TYPE_EXTINT).
+ *
+ * 3) FRED has separate entry flows depending on if the event came from
+ * user space or kernel space, and because the kernel does not use
+ * INT insns, the FRED kernel entry handler fred_entry_from_kernel()
+ * falls through to fred_bad_type() if the event type is
+ * EVENT_TYPE_SWINT, i.e., INT insns. So if the kernel is handling
+ * an INT insn, it can only be from a user level.
+ *
+ * 4) int80_emulation() does a CLEAR_BRANCH_HISTORY. While FRED will
+ * likely take a different approach if it is ever needed: it
+ * probably belongs in either fred_intx()/ fred_other() or
+ * asm_fred_entrypoint_user(), depending on if this ought to be done
+ * for all entries from userspace or only system
+ * calls.
+ *
+ * 5) INT $0x80 is the fast path for 32-bit system calls under FRED.
+ */
+DEFINE_FREDENTRY_RAW(int80_emulation)
+{
+ int nr;
+
+ enter_from_user_mode(regs);
+
+ instrumentation_begin();
+ add_random_kstack_offset();
+
+ /*
+ * FRED pushed 0 into regs::orig_ax and regs::ax contains the
+ * syscall number.
+ *
+ * User tracing code (ptrace or signal handlers) might assume
+ * that the regs::orig_ax contains a 32-bit number on invoking
+ * a 32-bit syscall.
+ *
+ * Establish the syscall convention by saving the 32bit truncated
+ * syscall number in regs::orig_ax and by invalidating regs::ax.
+ */
+ regs->orig_ax = regs->ax & GENMASK(31, 0);
+ regs->ax = -ENOSYS;
+
+ nr = syscall_32_enter(regs);
+
+ local_irq_enable();
+ nr = syscall_enter_from_user_mode_work(regs, nr);
+ do_syscall_32_irqs_on(regs, nr);
+
+ instrumentation_end();
+ syscall_exit_to_user_mode(regs);
+}
+#endif
#else /* CONFIG_IA32_EMULATION */
/* Handles int $0x80 on a 32bit kernel */
/* clobbers %rax, make sure it is after saving the syscall nr */
IBRS_ENTER
UNTRAIN_RET
+ CLEAR_BRANCH_HISTORY
call do_syscall_64 /* returns with IRQs disabled */
call make_task_dead
SYM_CODE_END(rewind_stack_and_make_dead)
.popsection
+
+/*
+ * This sequence executes branches in order to remove user branch information
+ * from the branch history tracker in the Branch Predictor, therefore removing
+ * user influence on subsequent BTB lookups.
+ *
+ * It should be used on parts prior to Alder Lake. Newer parts should use the
+ * BHI_DIS_S hardware control instead. If a pre-Alder Lake part is being
+ * virtualized on newer hardware the VMM should protect against BHI attacks by
+ * setting BHI_DIS_S for the guests.
+ *
+ * CALLs/RETs are necessary to prevent Loop Stream Detector(LSD) from engaging
+ * and not clearing the branch history. The call tree looks like:
+ *
+ * call 1
+ * call 2
+ * call 2
+ * call 2
+ * call 2
+ * call 2
+ * ret
+ * ret
+ * ret
+ * ret
+ * ret
+ * ret
+ *
+ * This means that the stack is non-constant and ORC can't unwind it with %rsp
+ * alone. Therefore we unconditionally set up the frame pointer, which allows
+ * ORC to unwind properly.
+ *
+ * The alignment is for performance and not for safety, and may be safely
+ * refactored in the future if needed.
+ */
+SYM_FUNC_START(clear_bhb_loop)
+ push %rbp
+ mov %rsp, %rbp
+ movl $5, %ecx
+ ANNOTATE_INTRA_FUNCTION_CALL
+ call 1f
+ jmp 5f
+ .align 64, 0xcc
+ ANNOTATE_INTRA_FUNCTION_CALL
+1: call 2f
+ RET
+ .align 64, 0xcc
+2: movl $5, %eax
+3: jmp 4f
+ nop
+4: sub $1, %eax
+ jnz 3b
+ sub $1, %ecx
+ jnz 1b
+ RET
+5: lfence
+ pop %rbp
+ RET
+SYM_FUNC_END(clear_bhb_loop)
+EXPORT_SYMBOL_GPL(clear_bhb_loop)
+STACK_FRAME_NON_STANDARD(clear_bhb_loop)
IBRS_ENTER
UNTRAIN_RET
+ CLEAR_BRANCH_HISTORY
/*
* SYSENTER doesn't filter flags, so we need to clear NT and AC
IBRS_ENTER
UNTRAIN_RET
+ CLEAR_BRANCH_HISTORY
movq %rsp, %rdi
call do_fast_syscall_32
ANNOTATE_NOENDBR
int3
SYM_CODE_END(entry_SYSCALL_compat)
+
+/*
+ * int 0x80 is used by 32 bit mode as a system call entry. Normally idt entries
+ * point to C routines, however since this is a system call interface the branch
+ * history needs to be scrubbed to protect against BHI attacks, and that
+ * scrubbing needs to take place in assembly code prior to entering any C
+ * routines.
+ */
+SYM_CODE_START(int80_emulation)
+ ANNOTATE_NOENDBR
+ UNWIND_HINT_FUNC
+ CLEAR_BRANCH_HISTORY
+ jmp do_int80_emulation
+SYM_CODE_END(int80_emulation)
if (regs->fred_cs.sl > 0) {
pr_emerg("PANIC: invalid or fatal FRED event; event type %u "
"vector %u error 0x%lx aux 0x%lx at %04x:%016lx\n",
- regs->fred_ss.type, regs->fred_ss.vector, regs->orig_ax,
+ regs->fred_ss.type, regs->fred_ss.vector, error_code,
fred_event_data(regs), regs->cs, regs->ip);
- die("invalid or fatal FRED event", regs, regs->orig_ax);
+ die("invalid or fatal FRED event", regs, error_code);
panic("invalid or fatal FRED event");
} else {
unsigned long flags = oops_begin();
pr_alert("BUG: invalid or fatal FRED event; event type %u "
"vector %u error 0x%lx aux 0x%lx at %04x:%016lx\n",
- regs->fred_ss.type, regs->fred_ss.vector, regs->orig_ax,
+ regs->fred_ss.type, regs->fred_ss.vector, error_code,
fred_event_data(regs), regs->cs, regs->ip);
- if (__die("Invalid or fatal FRED event", regs, regs->orig_ax))
+ if (__die("Invalid or fatal FRED event", regs, error_code))
sig = 0;
oops_end(flags, regs, sig);
/* INT80 */
case IA32_SYSCALL_VECTOR:
if (ia32_enabled())
- return int80_emulation(regs);
+ return fred_int80_emulation(regs);
fallthrough;
#endif
#include <asm/syscalls_32.h>
#undef __SYSCALL
+/*
+ * The sys_call_table[] is no longer used for system calls, but
+ * kernel/trace/trace_syscalls.c still wants to know the system
+ * call address.
+ */
+#ifdef CONFIG_X86_32
#define __SYSCALL(nr, sym) __ia32_##sym,
-
-__visible const sys_call_ptr_t ia32_sys_call_table[] = {
+const sys_call_ptr_t sys_call_table[] = {
#include <asm/syscalls_32.h>
};
+#undef __SYSCALL
+#endif
+
+#define __SYSCALL(nr, sym) case nr: return __ia32_##sym(regs);
+
+long ia32_sys_call(const struct pt_regs *regs, unsigned int nr)
+{
+ switch (nr) {
+ #include <asm/syscalls_32.h>
+ default: return __ia32_sys_ni_syscall(regs);
+ }
+};
#include <asm/syscalls_64.h>
#undef __SYSCALL
+/*
+ * The sys_call_table[] is no longer used for system calls, but
+ * kernel/trace/trace_syscalls.c still wants to know the system
+ * call address.
+ */
#define __SYSCALL(nr, sym) __x64_##sym,
-
-asmlinkage const sys_call_ptr_t sys_call_table[] = {
+const sys_call_ptr_t sys_call_table[] = {
#include <asm/syscalls_64.h>
};
+#undef __SYSCALL
+
+#define __SYSCALL(nr, sym) case nr: return __x64_##sym(regs);
+
+long x64_sys_call(const struct pt_regs *regs, unsigned int nr)
+{
+ switch (nr) {
+ #include <asm/syscalls_64.h>
+ default: return __x64_sys_ni_syscall(regs);
+ }
+};
#include <asm/syscalls_x32.h>
#undef __SYSCALL
-#define __SYSCALL(nr, sym) __x64_##sym,
+#define __SYSCALL(nr, sym) case nr: return __x64_##sym(regs);
-asmlinkage const sys_call_ptr_t x32_sys_call_table[] = {
-#include <asm/syscalls_x32.h>
+long x32_sys_call(const struct pt_regs *regs, unsigned int nr)
+{
+ switch (nr) {
+ #include <asm/syscalls_x32.h>
+ default: return __x64_sys_ni_syscall(regs);
+ }
};
obj-$(CONFIG_COMPAT_32) += vdso-image-32.o vdso32-setup.o
OBJECT_FILES_NON_STANDARD_vdso-image-32.o := n
+OBJECT_FILES_NON_STANDARD_vdso-image-x32.o := n
OBJECT_FILES_NON_STANDARD_vdso-image-64.o := n
OBJECT_FILES_NON_STANDARD_vdso32-setup.o := n
/*
* AMD Performance Monitor Family 17h and later:
*/
-static const u64 amd_f17h_perfmon_event_map[PERF_COUNT_HW_MAX] =
+static const u64 amd_zen1_perfmon_event_map[PERF_COUNT_HW_MAX] =
{
[PERF_COUNT_HW_CPU_CYCLES] = 0x0076,
[PERF_COUNT_HW_INSTRUCTIONS] = 0x00c0,
[PERF_COUNT_HW_STALLED_CYCLES_BACKEND] = 0x0187,
};
+static const u64 amd_zen2_perfmon_event_map[PERF_COUNT_HW_MAX] =
+{
+ [PERF_COUNT_HW_CPU_CYCLES] = 0x0076,
+ [PERF_COUNT_HW_INSTRUCTIONS] = 0x00c0,
+ [PERF_COUNT_HW_CACHE_REFERENCES] = 0xff60,
+ [PERF_COUNT_HW_CACHE_MISSES] = 0x0964,
+ [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = 0x00c2,
+ [PERF_COUNT_HW_BRANCH_MISSES] = 0x00c3,
+ [PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] = 0x00a9,
+};
+
+static const u64 amd_zen4_perfmon_event_map[PERF_COUNT_HW_MAX] =
+{
+ [PERF_COUNT_HW_CPU_CYCLES] = 0x0076,
+ [PERF_COUNT_HW_INSTRUCTIONS] = 0x00c0,
+ [PERF_COUNT_HW_CACHE_REFERENCES] = 0xff60,
+ [PERF_COUNT_HW_CACHE_MISSES] = 0x0964,
+ [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = 0x00c2,
+ [PERF_COUNT_HW_BRANCH_MISSES] = 0x00c3,
+ [PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] = 0x00a9,
+ [PERF_COUNT_HW_REF_CPU_CYCLES] = 0x100000120,
+};
+
static u64 amd_pmu_event_map(int hw_event)
{
- if (boot_cpu_data.x86 >= 0x17)
- return amd_f17h_perfmon_event_map[hw_event];
+ if (cpu_feature_enabled(X86_FEATURE_ZEN4) || boot_cpu_data.x86 >= 0x1a)
+ return amd_zen4_perfmon_event_map[hw_event];
+
+ if (cpu_feature_enabled(X86_FEATURE_ZEN2) || boot_cpu_data.x86 >= 0x19)
+ return amd_zen2_perfmon_event_map[hw_event];
+
+ if (cpu_feature_enabled(X86_FEATURE_ZEN1))
+ return amd_zen1_perfmon_event_map[hw_event];
return amd_perfmon_event_map[hw_event];
}
if (!status)
goto done;
- /* Read branch records before unfreezing */
- if (status & GLOBAL_STATUS_LBRS_FROZEN) {
+ /* Read branch records */
+ if (x86_pmu.lbr_nr) {
amd_pmu_lbr_read();
status &= ~GLOBAL_STATUS_LBRS_FROZEN;
}
wrmsrl(MSR_AMD64_LBR_SELECT, lbr_select);
}
- rdmsrl(MSR_IA32_DEBUGCTLMSR, dbg_ctl);
- rdmsrl(MSR_AMD_DBG_EXTN_CFG, dbg_extn_cfg);
+ if (cpu_feature_enabled(X86_FEATURE_AMD_LBR_PMC_FREEZE)) {
+ rdmsrl(MSR_IA32_DEBUGCTLMSR, dbg_ctl);
+ wrmsrl(MSR_IA32_DEBUGCTLMSR, dbg_ctl | DEBUGCTLMSR_FREEZE_LBRS_ON_PMI);
+ }
- wrmsrl(MSR_IA32_DEBUGCTLMSR, dbg_ctl | DEBUGCTLMSR_FREEZE_LBRS_ON_PMI);
+ rdmsrl(MSR_AMD_DBG_EXTN_CFG, dbg_extn_cfg);
wrmsrl(MSR_AMD_DBG_EXTN_CFG, dbg_extn_cfg | DBG_EXTN_CFG_LBRV2EN);
}
return;
rdmsrl(MSR_AMD_DBG_EXTN_CFG, dbg_extn_cfg);
- rdmsrl(MSR_IA32_DEBUGCTLMSR, dbg_ctl);
-
wrmsrl(MSR_AMD_DBG_EXTN_CFG, dbg_extn_cfg & ~DBG_EXTN_CFG_LBRV2EN);
- wrmsrl(MSR_IA32_DEBUGCTLMSR, dbg_ctl & ~DEBUGCTLMSR_FREEZE_LBRS_ON_PMI);
+
+ if (cpu_feature_enabled(X86_FEATURE_AMD_LBR_PMC_FREEZE)) {
+ rdmsrl(MSR_IA32_DEBUGCTLMSR, dbg_ctl);
+ wrmsrl(MSR_IA32_DEBUGCTLMSR, dbg_ctl & ~DEBUGCTLMSR_FREEZE_LBRS_ON_PMI);
+ }
}
__init int amd_pmu_lbr_init(void)
while (++i < cpuc->n_events) {
cpuc->event_list[i-1] = cpuc->event_list[i];
cpuc->event_constraint[i-1] = cpuc->event_constraint[i];
+ cpuc->assign[i-1] = cpuc->assign[i];
}
cpuc->event_constraint[i-1] = NULL;
--cpuc->n_events;
struct pmu *pmu = event->pmu;
/*
- * Make sure we get updated with the first PEBS
- * event. It will trigger also during removal, but
- * that does not hurt:
+ * Make sure we get updated with the first PEBS event.
+ * During removal, ->pebs_data_cfg is still valid for
+ * the last PEBS event. Don't clear it.
*/
- if (cpuc->n_pebs == 1)
+ if ((cpuc->n_pebs == 1) && add)
cpuc->pebs_data_cfg = PEBS_UPDATE_DS_SW;
if (needed_cb != pebs_needs_sched_cb(cpuc)) {
lbr->from = x86_pmu.lbr_from;
lbr->to = x86_pmu.lbr_to;
lbr->info = x86_pmu.lbr_info;
+ lbr->has_callstack = x86_pmu_has_lbr_callstack();
}
EXPORT_SYMBOL_GPL(x86_perf_get_lbr);
* IPI implementation on Hyper-V.
*/
static bool __send_ipi_mask_ex(const struct cpumask *mask, int vector,
- bool exclude_self)
+ bool exclude_self)
{
struct hv_send_ipi_ex *ipi_arg;
unsigned long flags;
if (!cpumask_equal(mask, cpu_present_mask) || exclude_self) {
ipi_arg->vp_set.format = HV_GENERIC_SET_SPARSE_4K;
- nr_bank = cpumask_to_vpset_skip(&(ipi_arg->vp_set), mask,
- exclude_self ? cpu_is_self : NULL);
+ nr_bank = cpumask_to_vpset_skip(&ipi_arg->vp_set, mask,
+ exclude_self ? cpu_is_self : NULL);
/*
* 'nr_bank <= 0' means some CPUs in cpumask can't be
}
status = hv_do_rep_hypercall(HVCALL_SEND_IPI_EX, 0, nr_bank,
- ipi_arg, NULL);
+ ipi_arg, NULL);
ipi_mask_ex_done:
local_irq_restore(flags);
}
static bool __send_ipi_mask(const struct cpumask *mask, int vector,
- bool exclude_self)
+ bool exclude_self)
{
int cur_cpu, vcpu, this_cpu = smp_processor_id();
struct hv_send_ipi ipi_arg;
return false;
}
- if ((vector < HV_IPI_LOW_VECTOR) || (vector > HV_IPI_HIGH_VECTOR))
+ if (vector < HV_IPI_LOW_VECTOR || vector > HV_IPI_HIGH_VECTOR)
return false;
/*
}
status = hv_do_fast_hypercall16(HVCALL_SEND_IPI, ipi_arg.vector,
- ipi_arg.cpu_mask);
+ ipi_arg.cpu_mask);
return hv_result_success(status);
do_ex_hypercall:
return false;
}
- if ((vector < HV_IPI_LOW_VECTOR) || (vector > HV_IPI_HIGH_VECTOR))
+ if (vector < HV_IPI_LOW_VECTOR || vector > HV_IPI_HIGH_VECTOR)
return false;
if (vp >= 64)
#include <linux/vmalloc.h>
#include <linux/mm.h>
#include <linux/clockchips.h>
-#include <linux/acpi.h>
#include <linux/hyperv.h>
#include <linux/slab.h>
#include <linux/cpuhotplug.h>
int hv_call_add_logical_proc(int node, u32 lp_index, u32 apic_id)
{
- struct hv_add_logical_processor_in *input;
- struct hv_add_logical_processor_out *output;
+ struct hv_input_add_logical_processor *input;
+ struct hv_output_add_logical_processor *output;
u64 status;
unsigned long flags;
int ret = HV_STATUS_SUCCESS;
- int pxm = node_to_pxm(node);
/*
* When adding a logical processor, the hypervisor may return
input->lp_index = lp_index;
input->apic_id = apic_id;
- input->flags = 0;
- input->proximity_domain_info.domain_id = pxm;
- input->proximity_domain_info.flags.reserved = 0;
- input->proximity_domain_info.flags.proximity_info_valid = 1;
- input->proximity_domain_info.flags.proximity_preferred = 1;
+ input->proximity_domain_info = hv_numa_node_to_pxm_info(node);
status = hv_do_hypercall(HVCALL_ADD_LOGICAL_PROCESSOR,
input, output);
local_irq_restore(flags);
u64 status;
unsigned long irq_flags;
int ret = HV_STATUS_SUCCESS;
- int pxm = node_to_pxm(node);
/* Root VPs don't seem to need pages deposited */
if (partition_id != hv_current_partition_id) {
input->vp_index = vp_index;
input->flags = flags;
input->subnode_type = HvSubnodeAny;
- if (node != NUMA_NO_NODE) {
- input->proximity_domain_info.domain_id = pxm;
- input->proximity_domain_info.flags.reserved = 0;
- input->proximity_domain_info.flags.proximity_info_valid = 1;
- input->proximity_domain_info.flags.proximity_preferred = 1;
- } else {
- input->proximity_domain_info.as_uint64 = 0;
- }
+ input->proximity_domain_info = hv_numa_node_to_pxm_info(node);
status = hv_do_hypercall(HVCALL_CREATE_VP, input, NULL);
local_irq_restore(irq_flags);
extern void callthunks_patch_module_calls(struct callthunk_sites *sites,
struct module *mod);
extern void *callthunks_translate_call_dest(void *dest);
-extern int x86_call_depth_emit_accounting(u8 **pprog, void *func);
+extern int x86_call_depth_emit_accounting(u8 **pprog, void *func, void *ip);
#else
static __always_inline void callthunks_patch_builtin_calls(void) {}
static __always_inline void
return dest;
}
static __always_inline int x86_call_depth_emit_accounting(u8 **pprog,
- void *func)
+ void *func, void *ip)
{
return 0;
}
#include <asm/mpspec.h>
#include <asm/msr.h>
#include <asm/hardirq.h>
+#include <asm/io.h>
#define ARCH_APICTIMER_STOPS_ON_C3 1
static inline u32 native_apic_mem_read(u32 reg)
{
- return *((volatile u32 *)(APIC_BASE + reg));
+ return readl((void __iomem *)(APIC_BASE + reg));
}
static inline void native_apic_mem_eoi(void)
#include <asm/asm.h>
#include <asm/fred.h>
#include <asm/gsseg.h>
+#include <asm/nospec-branch.h>
#ifndef CONFIG_X86_CMPXCHG64
extern void cmpxchg8b_emu(void);
#define __smp_mb__before_atomic() do { } while (0)
#define __smp_mb__after_atomic() do { } while (0)
+/* Writing to CR3 provides a full memory barrier in switch_mm(). */
+#define smp_mb__after_switch_mm() do { } while (0)
+
#include <asm-generic/barrier.h>
#endif /* _ASM_X86_BARRIER_H */
u64 cc_mkenc(u64 val);
u64 cc_mkdec(u64 val);
+void cc_random_init(void);
#else
#define cc_vendor (CC_VENDOR_NONE)
{
return val;
}
+static inline void cc_random_init(void) { }
#endif
#endif /* _ASM_X86_COCO_H */
CPUID_7_EDX,
CPUID_8000_001F_EAX,
CPUID_8000_0021_EAX,
+ CPUID_LNX_5,
+ NR_CPUID_WORDS,
};
#define X86_CAP_FMT_NUM "%d:%d"
CHECK_BIT_IN_MASK_WORD(REQUIRED_MASK, 18, feature_bit) || \
CHECK_BIT_IN_MASK_WORD(REQUIRED_MASK, 19, feature_bit) || \
CHECK_BIT_IN_MASK_WORD(REQUIRED_MASK, 20, feature_bit) || \
+ CHECK_BIT_IN_MASK_WORD(REQUIRED_MASK, 21, feature_bit) || \
REQUIRED_MASK_CHECK || \
- BUILD_BUG_ON_ZERO(NCAPINTS != 21))
+ BUILD_BUG_ON_ZERO(NCAPINTS != 22))
#define DISABLED_MASK_BIT_SET(feature_bit) \
( CHECK_BIT_IN_MASK_WORD(DISABLED_MASK, 0, feature_bit) || \
CHECK_BIT_IN_MASK_WORD(DISABLED_MASK, 18, feature_bit) || \
CHECK_BIT_IN_MASK_WORD(DISABLED_MASK, 19, feature_bit) || \
CHECK_BIT_IN_MASK_WORD(DISABLED_MASK, 20, feature_bit) || \
+ CHECK_BIT_IN_MASK_WORD(DISABLED_MASK, 21, feature_bit) || \
DISABLED_MASK_CHECK || \
- BUILD_BUG_ON_ZERO(NCAPINTS != 21))
+ BUILD_BUG_ON_ZERO(NCAPINTS != 22))
#define cpu_has(c, bit) \
(__builtin_constant_p(bit) && REQUIRED_MASK_BIT_SET(bit) ? 1 : \
/*
* Defines x86 CPU feature bits
*/
-#define NCAPINTS 21 /* N 32-bit words worth of info */
+#define NCAPINTS 22 /* N 32-bit words worth of info */
#define NBUGINTS 2 /* N 32-bit bug flags */
/*
#define X86_FEATURE_IBPB_BRTYPE (20*32+28) /* "" MSR_PRED_CMD[IBPB] flushes all branch type predictions */
#define X86_FEATURE_SRSO_NO (20*32+29) /* "" CPU is not affected by SRSO */
+/*
+ * Extended auxiliary flags: Linux defined - for features scattered in various
+ * CPUID levels like 0x80000022, etc and Linux defined features.
+ *
+ * Reuse free bits when adding new feature flags!
+ */
+#define X86_FEATURE_AMD_LBR_PMC_FREEZE (21*32+ 0) /* AMD LBR and PMC Freeze */
+#define X86_FEATURE_CLEAR_BHB_LOOP (21*32+ 1) /* "" Clear branch history at syscall entry using SW loop */
+#define X86_FEATURE_BHI_CTRL (21*32+ 2) /* "" BHI_DIS_S HW control available */
+#define X86_FEATURE_CLEAR_BHB_HW (21*32+ 3) /* "" BHI_DIS_S HW control enabled */
+#define X86_FEATURE_CLEAR_BHB_LOOP_ON_VMEXIT (21*32+ 4) /* "" Clear branch history at vmexit using SW loop */
+
/*
* BUG word(s)
*/
#define X86_BUG_SRSO X86_BUG(1*32 + 0) /* AMD SRSO bug */
#define X86_BUG_DIV0 X86_BUG(1*32 + 1) /* AMD DIV0 speculation bug */
#define X86_BUG_RFDS X86_BUG(1*32 + 2) /* CPU is vulnerable to Register File Data Sampling */
+#define X86_BUG_BHI X86_BUG(1*32 + 3) /* CPU is affected by Branch History Injection */
#endif /* _ASM_X86_CPUFEATURES_H */
#endif
}
+#define HAVE_ARCH_ADD_CRASH_RES_TO_IOMEM_EARLY
+
#endif /* _X86_CRASH_RESERVE_H */
#define DISABLED_MASK18 (DISABLE_IBT)
#define DISABLED_MASK19 (DISABLE_SEV_SNP)
#define DISABLED_MASK20 0
-#define DISABLED_MASK_CHECK BUILD_BUG_ON_ZERO(NCAPINTS != 21)
+#define DISABLED_MASK21 0
+#define DISABLED_MASK_CHECK BUILD_BUG_ON_ZERO(NCAPINTS != 22)
#endif /* _ASM_X86_DISABLED_FEATURES_H */
int cpuid_nent;
struct kvm_cpuid_entry2 *cpuid_entries;
struct kvm_hypervisor_cpuid kvm_cpuid;
+ bool is_amd_compatible;
/*
* FIXME: Drop this macro and use KVM_NR_GOVERNED_FEATURES directly
#define SPEC_CTRL_SSBD BIT(SPEC_CTRL_SSBD_SHIFT) /* Speculative Store Bypass Disable */
#define SPEC_CTRL_RRSBA_DIS_S_SHIFT 6 /* Disable RRSBA behavior */
#define SPEC_CTRL_RRSBA_DIS_S BIT(SPEC_CTRL_RRSBA_DIS_S_SHIFT)
+#define SPEC_CTRL_BHI_DIS_S_SHIFT 10 /* Disable Branch History Injection behavior */
+#define SPEC_CTRL_BHI_DIS_S BIT(SPEC_CTRL_BHI_DIS_S_SHIFT)
/* A mask for bits which the kernel toggles when controlling mitigations */
#define SPEC_CTRL_MITIGATIONS_MASK (SPEC_CTRL_IBRS | SPEC_CTRL_STIBP | SPEC_CTRL_SSBD \
- | SPEC_CTRL_RRSBA_DIS_S)
+ | SPEC_CTRL_RRSBA_DIS_S \
+ | SPEC_CTRL_BHI_DIS_S)
#define MSR_IA32_PRED_CMD 0x00000049 /* Prediction Command */
#define PRED_CMD_IBPB BIT(0) /* Indirect Branch Prediction Barrier */
* are restricted to targets in
* kernel.
*/
+#define ARCH_CAP_BHI_NO BIT(20) /*
+ * CPU is not affected by Branch
+ * History Injection.
+ */
#define ARCH_CAP_PBRSB_NO BIT(24) /*
* Not susceptible to Post-Barrier
* Return Stack Buffer Predictions.
.Lskip_rsb_\@:
.endm
+/*
+ * The CALL to srso_alias_untrain_ret() must be patched in directly at
+ * the spot where untraining must be done, ie., srso_alias_untrain_ret()
+ * must be the target of a CALL instruction instead of indirectly
+ * jumping to a wrapper which then calls it. Therefore, this macro is
+ * called outside of __UNTRAIN_RET below, for the time being, before the
+ * kernel can support nested alternatives with arbitrary nesting.
+ */
+.macro CALL_UNTRAIN_RET
#if defined(CONFIG_MITIGATION_UNRET_ENTRY) || defined(CONFIG_MITIGATION_SRSO)
-#define CALL_UNTRAIN_RET "call entry_untrain_ret"
-#else
-#define CALL_UNTRAIN_RET ""
+ ALTERNATIVE_2 "", "call entry_untrain_ret", X86_FEATURE_UNRET, \
+ "call srso_alias_untrain_ret", X86_FEATURE_SRSO_ALIAS
#endif
+.endm
/*
* Mitigate RETBleed for AMD/Hygon Zen uarch. Requires KERNEL CR3 because the
.macro __UNTRAIN_RET ibpb_feature, call_depth_insns
#if defined(CONFIG_MITIGATION_RETHUNK) || defined(CONFIG_MITIGATION_IBPB_ENTRY)
VALIDATE_UNRET_END
- ALTERNATIVE_3 "", \
- CALL_UNTRAIN_RET, X86_FEATURE_UNRET, \
+ CALL_UNTRAIN_RET
+ ALTERNATIVE_2 "", \
"call entry_ibpb", \ibpb_feature, \
__stringify(\call_depth_insns), X86_FEATURE_CALL_DEPTH
#endif
ALTERNATIVE "", __stringify(verw _ASM_RIP(mds_verw_sel)), X86_FEATURE_CLEAR_CPU_BUF
.endm
+#ifdef CONFIG_X86_64
+.macro CLEAR_BRANCH_HISTORY
+ ALTERNATIVE "", "call clear_bhb_loop", X86_FEATURE_CLEAR_BHB_LOOP
+.endm
+
+.macro CLEAR_BRANCH_HISTORY_VMEXIT
+ ALTERNATIVE "", "call clear_bhb_loop", X86_FEATURE_CLEAR_BHB_LOOP_ON_VMEXIT
+.endm
+#else
+#define CLEAR_BRANCH_HISTORY
+#define CLEAR_BRANCH_HISTORY_VMEXIT
+#endif
+
#else /* __ASSEMBLY__ */
#define ANNOTATE_RETPOLINE_SAFE \
static inline void retbleed_return_thunk(void) {}
#endif
+extern void srso_alias_untrain_ret(void);
+
#ifdef CONFIG_MITIGATION_SRSO
extern void srso_return_thunk(void);
extern void srso_alias_return_thunk(void);
extern void entry_untrain_ret(void);
extern void entry_ibpb(void);
+#ifdef CONFIG_X86_64
+extern void clear_bhb_loop(void);
+#endif
+
extern void (*x86_return_thunk)(void);
extern void __warn_thunk(void);
unsigned int from;
unsigned int to;
unsigned int info;
+ bool has_callstack;
};
extern void perf_get_x86_pmu_capability(struct x86_pmu_capability *cap);
#define REQUIRED_MASK18 0
#define REQUIRED_MASK19 0
#define REQUIRED_MASK20 0
-#define REQUIRED_MASK_CHECK BUILD_BUG_ON_ZERO(NCAPINTS != 21)
+#define REQUIRED_MASK21 0
+#define REQUIRED_MASK_CHECK BUILD_BUG_ON_ZERO(NCAPINTS != 22)
#endif /* _ASM_X86_REQUIRED_FEATURES_H */
unsigned long npages);
void early_snp_set_memory_shared(unsigned long vaddr, unsigned long paddr,
unsigned long npages);
-void __init snp_prep_memory(unsigned long paddr, unsigned int sz, enum psc_op op);
void snp_set_memory_shared(unsigned long vaddr, unsigned long npages);
void snp_set_memory_private(unsigned long vaddr, unsigned long npages);
void snp_set_wakeup_secondary_cpu(void);
bool snp_init(struct boot_params *bp);
void __noreturn snp_abort(void);
+void snp_dmi_setup(void);
int snp_issue_guest_request(u64 exit_code, struct snp_req_data *input, struct snp_guest_request_ioctl *rio);
void snp_accept_memory(phys_addr_t start, phys_addr_t end);
u64 snp_get_unsupported_features(u64 status);
u64 sev_get_status(void);
-void kdump_sev_callback(void);
void sev_show_status(void);
#else
static inline void sev_es_ist_enter(struct pt_regs *regs) { }
early_snp_set_memory_private(unsigned long vaddr, unsigned long paddr, unsigned long npages) { }
static inline void __init
early_snp_set_memory_shared(unsigned long vaddr, unsigned long paddr, unsigned long npages) { }
-static inline void __init snp_prep_memory(unsigned long paddr, unsigned int sz, enum psc_op op) { }
static inline void snp_set_memory_shared(unsigned long vaddr, unsigned long npages) { }
static inline void snp_set_memory_private(unsigned long vaddr, unsigned long npages) { }
static inline void snp_set_wakeup_secondary_cpu(void) { }
static inline bool snp_init(struct boot_params *bp) { return false; }
static inline void snp_abort(void) { }
+static inline void snp_dmi_setup(void) { }
static inline int snp_issue_guest_request(u64 exit_code, struct snp_req_data *input, struct snp_guest_request_ioctl *rio)
{
return -ENOTTY;
static inline void snp_accept_memory(phys_addr_t start, phys_addr_t end) { }
static inline u64 snp_get_unsupported_features(u64 status) { return 0; }
static inline u64 sev_get_status(void) { return 0; }
-static inline void kdump_sev_callback(void) { }
static inline void sev_show_status(void) { }
#endif
int rmp_make_private(u64 pfn, u64 gpa, enum pg_level level, u32 asid, bool immutable);
int rmp_make_shared(u64 pfn, enum pg_level level);
void snp_leak_pages(u64 pfn, unsigned int npages);
+void kdump_sev_callback(void);
#else
static inline bool snp_probe_rmptable_info(void) { return false; }
static inline int snp_lookup_rmpentry(u64 pfn, bool *assigned, int *level) { return -ENODEV; }
}
static inline int rmp_make_shared(u64 pfn, enum pg_level level) { return -ENODEV; }
static inline void snp_leak_pages(u64 pfn, unsigned int npages) {}
+static inline void kdump_sev_callback(void) { }
#endif
#endif
#include <asm/thread_info.h> /* for TS_COMPAT */
#include <asm/unistd.h>
+/* This is used purely for kernel/trace/trace_syscalls.c */
typedef long (*sys_call_ptr_t)(const struct pt_regs *);
extern const sys_call_ptr_t sys_call_table[];
-#if defined(CONFIG_X86_32)
-#define ia32_sys_call_table sys_call_table
-#else
/*
* These may not exist, but still put the prototypes in so we
* can use IS_ENABLED().
*/
-extern const sys_call_ptr_t ia32_sys_call_table[];
-extern const sys_call_ptr_t x32_sys_call_table[];
-#endif
+extern long ia32_sys_call(const struct pt_regs *, unsigned int nr);
+extern long x32_sys_call(const struct pt_regs *, unsigned int nr);
+extern long x64_sys_call(const struct pt_regs *, unsigned int nr);
/*
* Only the low 32 bits of orig_ax are meaningful, so we return int.
}
bool do_syscall_64(struct pt_regs *regs, int nr);
+void do_int80_emulation(struct pt_regs *regs);
#endif /* CONFIG_X86_32 */
* @reserve_resources: reserve the standard resources for the
* platform
* @memory_setup: platform specific memory setup
- *
+ * @dmi_setup: platform specific DMI setup
*/
struct x86_init_resources {
void (*probe_roms)(void);
void (*reserve_resources)(void);
char *(*memory_setup)(void);
+ void (*dmi_setup)(void);
};
/**
struct kvm_sev_cmd {
__u32 id;
+ __u32 pad0;
__u64 data;
__u32 error;
__u32 sev_fd;
__u32 policy;
__u64 dh_uaddr;
__u32 dh_len;
+ __u32 pad0;
__u64 session_uaddr;
__u32 session_len;
+ __u32 pad1;
};
struct kvm_sev_launch_update_data {
__u64 uaddr;
__u32 len;
+ __u32 pad0;
};
struct kvm_sev_launch_secret {
__u64 hdr_uaddr;
__u32 hdr_len;
+ __u32 pad0;
__u64 guest_uaddr;
__u32 guest_len;
+ __u32 pad1;
__u64 trans_uaddr;
__u32 trans_len;
+ __u32 pad2;
};
struct kvm_sev_launch_measure {
__u64 uaddr;
__u32 len;
+ __u32 pad0;
};
struct kvm_sev_guest_status {
__u64 src_uaddr;
__u64 dst_uaddr;
__u32 len;
+ __u32 pad0;
};
struct kvm_sev_attestation_report {
__u8 mnonce[16];
__u64 uaddr;
__u32 len;
+ __u32 pad0;
};
struct kvm_sev_send_start {
__u32 policy;
+ __u32 pad0;
__u64 pdh_cert_uaddr;
__u32 pdh_cert_len;
+ __u32 pad1;
__u64 plat_certs_uaddr;
__u32 plat_certs_len;
+ __u32 pad2;
__u64 amd_certs_uaddr;
__u32 amd_certs_len;
+ __u32 pad3;
__u64 session_uaddr;
__u32 session_len;
+ __u32 pad4;
};
struct kvm_sev_send_update_data {
__u64 hdr_uaddr;
__u32 hdr_len;
+ __u32 pad0;
__u64 guest_uaddr;
__u32 guest_len;
+ __u32 pad1;
__u64 trans_uaddr;
__u32 trans_len;
+ __u32 pad2;
};
struct kvm_sev_receive_start {
__u32 policy;
__u64 pdh_uaddr;
__u32 pdh_len;
+ __u32 pad0;
__u64 session_uaddr;
__u32 session_len;
+ __u32 pad1;
};
struct kvm_sev_receive_update_data {
__u64 hdr_uaddr;
__u32 hdr_len;
+ __u32 pad0;
__u64 guest_uaddr;
__u32 guest_len;
+ __u32 pad1;
__u64 trans_uaddr;
__u32 trans_len;
+ __u32 pad2;
};
#define KVM_X2APIC_API_USE_32BIT_IDS (1ULL << 0)
__u32 token;
__u8 pad[56];
- __u32 enabled;
};
#define KVM_PV_EOI_BIT 0
static bool x2apic_hw_locked(void)
{
- u64 ia32_cap;
+ u64 x86_arch_cap_msr;
u64 msr;
- ia32_cap = x86_read_arch_cap_msr();
- if (ia32_cap & ARCH_CAP_XAPIC_DISABLE) {
+ x86_arch_cap_msr = x86_read_arch_cap_msr();
+ if (x86_arch_cap_msr & ARCH_CAP_XAPIC_DISABLE) {
rdmsrl(MSR_IA32_XAPIC_DISABLE_STATUS, msr);
return (msr & LEGACY_XAPIC_DISABLED);
}
return !bcmp(pad, insn_buff, tmpl_size);
}
-int x86_call_depth_emit_accounting(u8 **pprog, void *func)
+int x86_call_depth_emit_accounting(u8 **pprog, void *func, void *ip)
{
unsigned int tmpl_size = SKL_TMPL_SIZE;
u8 insn_buff[MAX_PATCH_LEN];
return 0;
memcpy(insn_buff, skl_call_thunk_template, tmpl_size);
- apply_relocation(insn_buff, tmpl_size, *pprog,
+ apply_relocation(insn_buff, tmpl_size, ip,
skl_call_thunk_template, tmpl_size);
memcpy(*pprog, insn_buff, tmpl_size);
#endif
}
+static void bsp_determine_snp(struct cpuinfo_x86 *c)
+{
+#ifdef CONFIG_ARCH_HAS_CC_PLATFORM
+ cc_vendor = CC_VENDOR_AMD;
+
+ if (cpu_has(c, X86_FEATURE_SEV_SNP)) {
+ /*
+ * RMP table entry format is not architectural and is defined by the
+ * per-processor PPR. Restrict SNP support on the known CPU models
+ * for which the RMP table entry format is currently defined for.
+ */
+ if (!cpu_has(c, X86_FEATURE_HYPERVISOR) &&
+ c->x86 >= 0x19 && snp_probe_rmptable_info()) {
+ cc_platform_set(CC_ATTR_HOST_SEV_SNP);
+ } else {
+ setup_clear_cpu_cap(X86_FEATURE_SEV_SNP);
+ cc_platform_clear(CC_ATTR_HOST_SEV_SNP);
+ }
+ }
+#endif
+}
+
static void bsp_init_amd(struct cpuinfo_x86 *c)
{
if (cpu_has(c, X86_FEATURE_CONSTANT_TSC)) {
break;
}
- if (cpu_has(c, X86_FEATURE_SEV_SNP)) {
- /*
- * RMP table entry format is not architectural and it can vary by processor
- * and is defined by the per-processor PPR. Restrict SNP support on the
- * known CPU model and family for which the RMP table entry format is
- * currently defined for.
- */
- if (!boot_cpu_has(X86_FEATURE_ZEN3) &&
- !boot_cpu_has(X86_FEATURE_ZEN4) &&
- !boot_cpu_has(X86_FEATURE_ZEN5))
- setup_clear_cpu_cap(X86_FEATURE_SEV_SNP);
- else if (!snp_probe_rmptable_info())
- setup_clear_cpu_cap(X86_FEATURE_SEV_SNP);
- }
-
+ bsp_determine_snp(c);
return;
warn:
static void early_init_amd(struct cpuinfo_x86 *c)
{
- u64 value;
u32 dummy;
if (c->x86 >= 0xf)
early_detect_mem_encrypt(c);
- /* Re-enable TopologyExtensions if switched off by BIOS */
- if (c->x86 == 0x15 &&
- (c->x86_model >= 0x10 && c->x86_model <= 0x6f) &&
- !cpu_has(c, X86_FEATURE_TOPOEXT)) {
-
- if (msr_set_bit(0xc0011005, 54) > 0) {
- rdmsrl(0xc0011005, value);
- if (value & BIT_64(54)) {
- set_cpu_cap(c, X86_FEATURE_TOPOEXT);
- pr_info_once(FW_INFO "CPU: Re-enabling disabled Topology Extensions Support.\n");
- }
- }
- }
-
if (!cpu_has(c, X86_FEATURE_HYPERVISOR) && !cpu_has(c, X86_FEATURE_IBPB_BRTYPE)) {
if (c->x86 == 0x17 && boot_cpu_has(X86_FEATURE_AMD_IBPB))
setup_force_cpu_cap(X86_FEATURE_IBPB_BRTYPE);
u64 x86_pred_cmd __ro_after_init = PRED_CMD_IBPB;
EXPORT_SYMBOL_GPL(x86_pred_cmd);
+static u64 __ro_after_init x86_arch_cap_msr;
+
static DEFINE_MUTEX(spec_ctrl_mutex);
void (*x86_return_thunk)(void) __ro_after_init = __x86_return_thunk;
x86_spec_ctrl_base &= ~SPEC_CTRL_MITIGATIONS_MASK;
}
+ x86_arch_cap_msr = x86_read_arch_cap_msr();
+
/* Select the proper CPU mitigations before patching alternatives: */
spectre_v1_select_mitigation();
spectre_v2_select_mitigation();
static void __init taa_select_mitigation(void)
{
- u64 ia32_cap;
-
if (!boot_cpu_has_bug(X86_BUG_TAA)) {
taa_mitigation = TAA_MITIGATION_OFF;
return;
* On MDS_NO=1 CPUs if ARCH_CAP_TSX_CTRL_MSR is not set, microcode
* update is required.
*/
- ia32_cap = x86_read_arch_cap_msr();
- if ( (ia32_cap & ARCH_CAP_MDS_NO) &&
- !(ia32_cap & ARCH_CAP_TSX_CTRL_MSR))
+ if ( (x86_arch_cap_msr & ARCH_CAP_MDS_NO) &&
+ !(x86_arch_cap_msr & ARCH_CAP_TSX_CTRL_MSR))
taa_mitigation = TAA_MITIGATION_UCODE_NEEDED;
/*
static void __init mmio_select_mitigation(void)
{
- u64 ia32_cap;
-
if (!boot_cpu_has_bug(X86_BUG_MMIO_STALE_DATA) ||
boot_cpu_has_bug(X86_BUG_MMIO_UNKNOWN) ||
cpu_mitigations_off()) {
if (mmio_mitigation == MMIO_MITIGATION_OFF)
return;
- ia32_cap = x86_read_arch_cap_msr();
-
/*
* Enable CPU buffer clear mitigation for host and VMM, if also affected
* by MDS or TAA. Otherwise, enable mitigation for VMM only.
* be propagated to uncore buffers, clearing the Fill buffers on idle
* is required irrespective of SMT state.
*/
- if (!(ia32_cap & ARCH_CAP_FBSDP_NO))
+ if (!(x86_arch_cap_msr & ARCH_CAP_FBSDP_NO))
static_branch_enable(&mds_idle_clear);
/*
* FB_CLEAR or by the presence of both MD_CLEAR and L1D_FLUSH on MDS
* affected systems.
*/
- if ((ia32_cap & ARCH_CAP_FB_CLEAR) ||
+ if ((x86_arch_cap_msr & ARCH_CAP_FB_CLEAR) ||
(boot_cpu_has(X86_FEATURE_MD_CLEAR) &&
boot_cpu_has(X86_FEATURE_FLUSH_L1D) &&
- !(ia32_cap & ARCH_CAP_MDS_NO)))
+ !(x86_arch_cap_msr & ARCH_CAP_MDS_NO)))
mmio_mitigation = MMIO_MITIGATION_VERW;
else
mmio_mitigation = MMIO_MITIGATION_UCODE_NEEDED;
if (rfds_mitigation == RFDS_MITIGATION_OFF)
return;
- if (x86_read_arch_cap_msr() & ARCH_CAP_RFDS_CLEAR)
+ if (x86_arch_cap_msr & ARCH_CAP_RFDS_CLEAR)
setup_force_cpu_cap(X86_FEATURE_CLEAR_CPU_BUF);
else
rfds_mitigation = RFDS_MITIGATION_UCODE_NEEDED;
static void __init srbds_select_mitigation(void)
{
- u64 ia32_cap;
-
if (!boot_cpu_has_bug(X86_BUG_SRBDS))
return;
* are only exposed to SRBDS when TSX is enabled or when CPU is affected
* by Processor MMIO Stale Data vulnerability.
*/
- ia32_cap = x86_read_arch_cap_msr();
- if ((ia32_cap & ARCH_CAP_MDS_NO) && !boot_cpu_has(X86_FEATURE_RTM) &&
+ if ((x86_arch_cap_msr & ARCH_CAP_MDS_NO) && !boot_cpu_has(X86_FEATURE_RTM) &&
!boot_cpu_has_bug(X86_BUG_MMIO_STALE_DATA))
srbds_mitigation = SRBDS_MITIGATION_TSX_OFF;
else if (boot_cpu_has(X86_FEATURE_HYPERVISOR))
/* Will verify below that mitigation _can_ be disabled */
/* No microcode */
- if (!(x86_read_arch_cap_msr() & ARCH_CAP_GDS_CTRL)) {
+ if (!(x86_arch_cap_msr & ARCH_CAP_GDS_CTRL)) {
if (gds_mitigation == GDS_MITIGATION_FORCE) {
/*
* This only needs to be done on the boot CPU so do it
return SPECTRE_V2_RETPOLINE;
}
+static bool __ro_after_init rrsba_disabled;
+
/* Disable in-kernel use of non-RSB RET predictors */
static void __init spec_ctrl_disable_kernel_rrsba(void)
{
- u64 ia32_cap;
+ if (rrsba_disabled)
+ return;
- if (!boot_cpu_has(X86_FEATURE_RRSBA_CTRL))
+ if (!(x86_arch_cap_msr & ARCH_CAP_RRSBA)) {
+ rrsba_disabled = true;
return;
+ }
- ia32_cap = x86_read_arch_cap_msr();
+ if (!boot_cpu_has(X86_FEATURE_RRSBA_CTRL))
+ return;
- if (ia32_cap & ARCH_CAP_RRSBA) {
- x86_spec_ctrl_base |= SPEC_CTRL_RRSBA_DIS_S;
- update_spec_ctrl(x86_spec_ctrl_base);
- }
+ x86_spec_ctrl_base |= SPEC_CTRL_RRSBA_DIS_S;
+ update_spec_ctrl(x86_spec_ctrl_base);
+ rrsba_disabled = true;
}
static void __init spectre_v2_determine_rsb_fill_type_at_vmexit(enum spectre_v2_mitigation mode)
dump_stack();
}
+/*
+ * Set BHI_DIS_S to prevent indirect branches in kernel to be influenced by
+ * branch history in userspace. Not needed if BHI_NO is set.
+ */
+static bool __init spec_ctrl_bhi_dis(void)
+{
+ if (!boot_cpu_has(X86_FEATURE_BHI_CTRL))
+ return false;
+
+ x86_spec_ctrl_base |= SPEC_CTRL_BHI_DIS_S;
+ update_spec_ctrl(x86_spec_ctrl_base);
+ setup_force_cpu_cap(X86_FEATURE_CLEAR_BHB_HW);
+
+ return true;
+}
+
+enum bhi_mitigations {
+ BHI_MITIGATION_OFF,
+ BHI_MITIGATION_ON,
+};
+
+static enum bhi_mitigations bhi_mitigation __ro_after_init =
+ IS_ENABLED(CONFIG_MITIGATION_SPECTRE_BHI) ? BHI_MITIGATION_ON : BHI_MITIGATION_OFF;
+
+static int __init spectre_bhi_parse_cmdline(char *str)
+{
+ if (!str)
+ return -EINVAL;
+
+ if (!strcmp(str, "off"))
+ bhi_mitigation = BHI_MITIGATION_OFF;
+ else if (!strcmp(str, "on"))
+ bhi_mitigation = BHI_MITIGATION_ON;
+ else
+ pr_err("Ignoring unknown spectre_bhi option (%s)", str);
+
+ return 0;
+}
+early_param("spectre_bhi", spectre_bhi_parse_cmdline);
+
+static void __init bhi_select_mitigation(void)
+{
+ if (bhi_mitigation == BHI_MITIGATION_OFF)
+ return;
+
+ /* Retpoline mitigates against BHI unless the CPU has RRSBA behavior */
+ if (boot_cpu_has(X86_FEATURE_RETPOLINE) &&
+ !boot_cpu_has(X86_FEATURE_RETPOLINE_LFENCE)) {
+ spec_ctrl_disable_kernel_rrsba();
+ if (rrsba_disabled)
+ return;
+ }
+
+ if (spec_ctrl_bhi_dis())
+ return;
+
+ if (!IS_ENABLED(CONFIG_X86_64))
+ return;
+
+ /* Mitigate KVM by default */
+ setup_force_cpu_cap(X86_FEATURE_CLEAR_BHB_LOOP_ON_VMEXIT);
+ pr_info("Spectre BHI mitigation: SW BHB clearing on vm exit\n");
+
+ /* Mitigate syscalls when the mitigation is forced =on */
+ setup_force_cpu_cap(X86_FEATURE_CLEAR_BHB_LOOP);
+ pr_info("Spectre BHI mitigation: SW BHB clearing on syscall\n");
+}
+
static void __init spectre_v2_select_mitigation(void)
{
enum spectre_v2_mitigation_cmd cmd = spectre_v2_parse_cmdline();
mode == SPECTRE_V2_RETPOLINE)
spec_ctrl_disable_kernel_rrsba();
+ if (boot_cpu_has(X86_BUG_BHI))
+ bhi_select_mitigation();
+
spectre_v2_enabled = mode;
pr_info("%s\n", spectre_v2_strings[mode]);
/* Update the static key controlling the MDS CPU buffer clear in idle */
static void update_mds_branch_idle(void)
{
- u64 ia32_cap = x86_read_arch_cap_msr();
-
/*
* Enable the idle clearing if SMT is active on CPUs which are
* affected only by MSBDS and not any other MDS variant.
if (sched_smt_active()) {
static_branch_enable(&mds_idle_clear);
} else if (mmio_mitigation == MMIO_MITIGATION_OFF ||
- (ia32_cap & ARCH_CAP_FBSDP_NO)) {
+ (x86_arch_cap_msr & ARCH_CAP_FBSDP_NO)) {
static_branch_disable(&mds_idle_clear);
}
}
switch (spectre_v2_user_stibp) {
case SPECTRE_V2_USER_NONE:
- return ", STIBP: disabled";
+ return "; STIBP: disabled";
case SPECTRE_V2_USER_STRICT:
- return ", STIBP: forced";
+ return "; STIBP: forced";
case SPECTRE_V2_USER_STRICT_PREFERRED:
- return ", STIBP: always-on";
+ return "; STIBP: always-on";
case SPECTRE_V2_USER_PRCTL:
case SPECTRE_V2_USER_SECCOMP:
if (static_key_enabled(&switch_to_cond_stibp))
- return ", STIBP: conditional";
+ return "; STIBP: conditional";
}
return "";
}
{
if (boot_cpu_has(X86_FEATURE_IBPB)) {
if (static_key_enabled(&switch_mm_always_ibpb))
- return ", IBPB: always-on";
+ return "; IBPB: always-on";
if (static_key_enabled(&switch_mm_cond_ibpb))
- return ", IBPB: conditional";
- return ", IBPB: disabled";
+ return "; IBPB: conditional";
+ return "; IBPB: disabled";
}
return "";
}
if (boot_cpu_has_bug(X86_BUG_EIBRS_PBRSB)) {
if (boot_cpu_has(X86_FEATURE_RSB_VMEXIT_LITE) ||
boot_cpu_has(X86_FEATURE_RSB_VMEXIT))
- return ", PBRSB-eIBRS: SW sequence";
+ return "; PBRSB-eIBRS: SW sequence";
else
- return ", PBRSB-eIBRS: Vulnerable";
+ return "; PBRSB-eIBRS: Vulnerable";
} else {
- return ", PBRSB-eIBRS: Not affected";
+ return "; PBRSB-eIBRS: Not affected";
}
}
+static const char *spectre_bhi_state(void)
+{
+ if (!boot_cpu_has_bug(X86_BUG_BHI))
+ return "; BHI: Not affected";
+ else if (boot_cpu_has(X86_FEATURE_CLEAR_BHB_HW))
+ return "; BHI: BHI_DIS_S";
+ else if (boot_cpu_has(X86_FEATURE_CLEAR_BHB_LOOP))
+ return "; BHI: SW loop, KVM: SW loop";
+ else if (boot_cpu_has(X86_FEATURE_RETPOLINE) &&
+ !boot_cpu_has(X86_FEATURE_RETPOLINE_LFENCE) &&
+ rrsba_disabled)
+ return "; BHI: Retpoline";
+ else if (boot_cpu_has(X86_FEATURE_CLEAR_BHB_LOOP_ON_VMEXIT))
+ return "; BHI: Vulnerable, KVM: SW loop";
+
+ return "; BHI: Vulnerable";
+}
+
static ssize_t spectre_v2_show_state(char *buf)
{
if (spectre_v2_enabled == SPECTRE_V2_LFENCE)
spectre_v2_enabled == SPECTRE_V2_EIBRS_LFENCE)
return sysfs_emit(buf, "Vulnerable: eIBRS+LFENCE with unprivileged eBPF and SMT\n");
- return sysfs_emit(buf, "%s%s%s%s%s%s%s\n",
+ return sysfs_emit(buf, "%s%s%s%s%s%s%s%s\n",
spectre_v2_strings[spectre_v2_enabled],
ibpb_state(),
- boot_cpu_has(X86_FEATURE_USE_IBRS_FW) ? ", IBRS_FW" : "",
+ boot_cpu_has(X86_FEATURE_USE_IBRS_FW) ? "; IBRS_FW" : "",
stibp_state(),
- boot_cpu_has(X86_FEATURE_RSB_CTXSW) ? ", RSB filling" : "",
+ boot_cpu_has(X86_FEATURE_RSB_CTXSW) ? "; RSB filling" : "",
pbrsb_eibrs_state(),
+ spectre_bhi_state(),
+ /* this should always be at the end */
spectre_v2_module_string());
}
#define NO_SPECTRE_V2 BIT(8)
#define NO_MMIO BIT(9)
#define NO_EIBRS_PBRSB BIT(10)
+#define NO_BHI BIT(11)
#define VULNWL(vendor, family, model, whitelist) \
X86_MATCH_VENDOR_FAM_MODEL(vendor, family, model, whitelist)
VULNWL_INTEL(ATOM_TREMONT_D, NO_ITLB_MULTIHIT | NO_EIBRS_PBRSB),
/* AMD Family 0xf - 0x12 */
- VULNWL_AMD(0x0f, NO_MELTDOWN | NO_SSB | NO_L1TF | NO_MDS | NO_SWAPGS | NO_ITLB_MULTIHIT | NO_MMIO),
- VULNWL_AMD(0x10, NO_MELTDOWN | NO_SSB | NO_L1TF | NO_MDS | NO_SWAPGS | NO_ITLB_MULTIHIT | NO_MMIO),
- VULNWL_AMD(0x11, NO_MELTDOWN | NO_SSB | NO_L1TF | NO_MDS | NO_SWAPGS | NO_ITLB_MULTIHIT | NO_MMIO),
- VULNWL_AMD(0x12, NO_MELTDOWN | NO_SSB | NO_L1TF | NO_MDS | NO_SWAPGS | NO_ITLB_MULTIHIT | NO_MMIO),
+ VULNWL_AMD(0x0f, NO_MELTDOWN | NO_SSB | NO_L1TF | NO_MDS | NO_SWAPGS | NO_ITLB_MULTIHIT | NO_MMIO | NO_BHI),
+ VULNWL_AMD(0x10, NO_MELTDOWN | NO_SSB | NO_L1TF | NO_MDS | NO_SWAPGS | NO_ITLB_MULTIHIT | NO_MMIO | NO_BHI),
+ VULNWL_AMD(0x11, NO_MELTDOWN | NO_SSB | NO_L1TF | NO_MDS | NO_SWAPGS | NO_ITLB_MULTIHIT | NO_MMIO | NO_BHI),
+ VULNWL_AMD(0x12, NO_MELTDOWN | NO_SSB | NO_L1TF | NO_MDS | NO_SWAPGS | NO_ITLB_MULTIHIT | NO_MMIO | NO_BHI),
/* FAMILY_ANY must be last, otherwise 0x0f - 0x12 matches won't work */
- VULNWL_AMD(X86_FAMILY_ANY, NO_MELTDOWN | NO_L1TF | NO_MDS | NO_SWAPGS | NO_ITLB_MULTIHIT | NO_MMIO | NO_EIBRS_PBRSB),
- VULNWL_HYGON(X86_FAMILY_ANY, NO_MELTDOWN | NO_L1TF | NO_MDS | NO_SWAPGS | NO_ITLB_MULTIHIT | NO_MMIO | NO_EIBRS_PBRSB),
+ VULNWL_AMD(X86_FAMILY_ANY, NO_MELTDOWN | NO_L1TF | NO_MDS | NO_SWAPGS | NO_ITLB_MULTIHIT | NO_MMIO | NO_EIBRS_PBRSB | NO_BHI),
+ VULNWL_HYGON(X86_FAMILY_ANY, NO_MELTDOWN | NO_L1TF | NO_MDS | NO_SWAPGS | NO_ITLB_MULTIHIT | NO_MMIO | NO_EIBRS_PBRSB | NO_BHI),
/* Zhaoxin Family 7 */
- VULNWL(CENTAUR, 7, X86_MODEL_ANY, NO_SPECTRE_V2 | NO_SWAPGS | NO_MMIO),
- VULNWL(ZHAOXIN, 7, X86_MODEL_ANY, NO_SPECTRE_V2 | NO_SWAPGS | NO_MMIO),
+ VULNWL(CENTAUR, 7, X86_MODEL_ANY, NO_SPECTRE_V2 | NO_SWAPGS | NO_MMIO | NO_BHI),
+ VULNWL(ZHAOXIN, 7, X86_MODEL_ANY, NO_SPECTRE_V2 | NO_SWAPGS | NO_MMIO | NO_BHI),
{}
};
u64 x86_read_arch_cap_msr(void)
{
- u64 ia32_cap = 0;
+ u64 x86_arch_cap_msr = 0;
if (boot_cpu_has(X86_FEATURE_ARCH_CAPABILITIES))
- rdmsrl(MSR_IA32_ARCH_CAPABILITIES, ia32_cap);
+ rdmsrl(MSR_IA32_ARCH_CAPABILITIES, x86_arch_cap_msr);
- return ia32_cap;
+ return x86_arch_cap_msr;
}
-static bool arch_cap_mmio_immune(u64 ia32_cap)
+static bool arch_cap_mmio_immune(u64 x86_arch_cap_msr)
{
- return (ia32_cap & ARCH_CAP_FBSDP_NO &&
- ia32_cap & ARCH_CAP_PSDP_NO &&
- ia32_cap & ARCH_CAP_SBDR_SSDP_NO);
+ return (x86_arch_cap_msr & ARCH_CAP_FBSDP_NO &&
+ x86_arch_cap_msr & ARCH_CAP_PSDP_NO &&
+ x86_arch_cap_msr & ARCH_CAP_SBDR_SSDP_NO);
}
-static bool __init vulnerable_to_rfds(u64 ia32_cap)
+static bool __init vulnerable_to_rfds(u64 x86_arch_cap_msr)
{
/* The "immunity" bit trumps everything else: */
- if (ia32_cap & ARCH_CAP_RFDS_NO)
+ if (x86_arch_cap_msr & ARCH_CAP_RFDS_NO)
return false;
/*
* indicate that mitigation is needed because guest is running on a
* vulnerable hardware or may migrate to such hardware:
*/
- if (ia32_cap & ARCH_CAP_RFDS_CLEAR)
+ if (x86_arch_cap_msr & ARCH_CAP_RFDS_CLEAR)
return true;
/* Only consult the blacklist when there is no enumeration: */
static void __init cpu_set_bug_bits(struct cpuinfo_x86 *c)
{
- u64 ia32_cap = x86_read_arch_cap_msr();
+ u64 x86_arch_cap_msr = x86_read_arch_cap_msr();
/* Set ITLB_MULTIHIT bug if cpu is not in the whitelist and not mitigated */
if (!cpu_matches(cpu_vuln_whitelist, NO_ITLB_MULTIHIT) &&
- !(ia32_cap & ARCH_CAP_PSCHANGE_MC_NO))
+ !(x86_arch_cap_msr & ARCH_CAP_PSCHANGE_MC_NO))
setup_force_cpu_bug(X86_BUG_ITLB_MULTIHIT);
if (cpu_matches(cpu_vuln_whitelist, NO_SPECULATION))
setup_force_cpu_bug(X86_BUG_SPECTRE_V2);
if (!cpu_matches(cpu_vuln_whitelist, NO_SSB) &&
- !(ia32_cap & ARCH_CAP_SSB_NO) &&
+ !(x86_arch_cap_msr & ARCH_CAP_SSB_NO) &&
!cpu_has(c, X86_FEATURE_AMD_SSB_NO))
setup_force_cpu_bug(X86_BUG_SPEC_STORE_BYPASS);
* Don't use AutoIBRS when SNP is enabled because it degrades host
* userspace indirect branch performance.
*/
- if ((ia32_cap & ARCH_CAP_IBRS_ALL) ||
+ if ((x86_arch_cap_msr & ARCH_CAP_IBRS_ALL) ||
(cpu_has(c, X86_FEATURE_AUTOIBRS) &&
!cpu_feature_enabled(X86_FEATURE_SEV_SNP))) {
setup_force_cpu_cap(X86_FEATURE_IBRS_ENHANCED);
if (!cpu_matches(cpu_vuln_whitelist, NO_EIBRS_PBRSB) &&
- !(ia32_cap & ARCH_CAP_PBRSB_NO))
+ !(x86_arch_cap_msr & ARCH_CAP_PBRSB_NO))
setup_force_cpu_bug(X86_BUG_EIBRS_PBRSB);
}
if (!cpu_matches(cpu_vuln_whitelist, NO_MDS) &&
- !(ia32_cap & ARCH_CAP_MDS_NO)) {
+ !(x86_arch_cap_msr & ARCH_CAP_MDS_NO)) {
setup_force_cpu_bug(X86_BUG_MDS);
if (cpu_matches(cpu_vuln_whitelist, MSBDS_ONLY))
setup_force_cpu_bug(X86_BUG_MSBDS_ONLY);
* TSX_CTRL check alone is not sufficient for cases when the microcode
* update is not present or running as guest that don't get TSX_CTRL.
*/
- if (!(ia32_cap & ARCH_CAP_TAA_NO) &&
+ if (!(x86_arch_cap_msr & ARCH_CAP_TAA_NO) &&
(cpu_has(c, X86_FEATURE_RTM) ||
- (ia32_cap & ARCH_CAP_TSX_CTRL_MSR)))
+ (x86_arch_cap_msr & ARCH_CAP_TSX_CTRL_MSR)))
setup_force_cpu_bug(X86_BUG_TAA);
/*
* Set X86_BUG_MMIO_UNKNOWN for CPUs that are neither in the blacklist,
* nor in the whitelist and also don't enumerate MSR ARCH_CAP MMIO bits.
*/
- if (!arch_cap_mmio_immune(ia32_cap)) {
+ if (!arch_cap_mmio_immune(x86_arch_cap_msr)) {
if (cpu_matches(cpu_vuln_blacklist, MMIO))
setup_force_cpu_bug(X86_BUG_MMIO_STALE_DATA);
else if (!cpu_matches(cpu_vuln_whitelist, NO_MMIO))
}
if (!cpu_has(c, X86_FEATURE_BTC_NO)) {
- if (cpu_matches(cpu_vuln_blacklist, RETBLEED) || (ia32_cap & ARCH_CAP_RSBA))
+ if (cpu_matches(cpu_vuln_blacklist, RETBLEED) || (x86_arch_cap_msr & ARCH_CAP_RSBA))
setup_force_cpu_bug(X86_BUG_RETBLEED);
}
* disabling AVX2. The only way to do this in HW is to clear XCR0[2],
* which means that AVX will be disabled.
*/
- if (cpu_matches(cpu_vuln_blacklist, GDS) && !(ia32_cap & ARCH_CAP_GDS_NO) &&
+ if (cpu_matches(cpu_vuln_blacklist, GDS) && !(x86_arch_cap_msr & ARCH_CAP_GDS_NO) &&
boot_cpu_has(X86_FEATURE_AVX))
setup_force_cpu_bug(X86_BUG_GDS);
- if (vulnerable_to_rfds(ia32_cap))
+ if (vulnerable_to_rfds(x86_arch_cap_msr))
setup_force_cpu_bug(X86_BUG_RFDS);
+ /* When virtualized, eIBRS could be hidden, assume vulnerable */
+ if (!(x86_arch_cap_msr & ARCH_CAP_BHI_NO) &&
+ !cpu_matches(cpu_vuln_whitelist, NO_BHI) &&
+ (boot_cpu_has(X86_FEATURE_IBRS_ENHANCED) ||
+ boot_cpu_has(X86_FEATURE_HYPERVISOR)))
+ setup_force_cpu_bug(X86_BUG_BHI);
+
if (cpu_matches(cpu_vuln_whitelist, NO_MELTDOWN))
return;
/* Rogue Data Cache Load? No! */
- if (ia32_cap & ARCH_CAP_RDCL_NO)
+ if (x86_arch_cap_msr & ARCH_CAP_RDCL_NO)
return;
setup_force_cpu_bug(X86_BUG_CPU_MELTDOWN);
{ X86_FEATURE_F16C, X86_FEATURE_XMM2, },
{ X86_FEATURE_AES, X86_FEATURE_XMM2 },
{ X86_FEATURE_SHA_NI, X86_FEATURE_XMM2 },
+ { X86_FEATURE_GFNI, X86_FEATURE_XMM2 },
{ X86_FEATURE_FMA, X86_FEATURE_AVX },
+ { X86_FEATURE_VAES, X86_FEATURE_AVX },
+ { X86_FEATURE_VPCLMULQDQ, X86_FEATURE_AVX },
{ X86_FEATURE_AVX2, X86_FEATURE_AVX, },
{ X86_FEATURE_AVX512F, X86_FEATURE_AVX, },
{ X86_FEATURE_AVX512IFMA, X86_FEATURE_AVX512F },
{ X86_FEATURE_AVX512VL, X86_FEATURE_AVX512F },
{ X86_FEATURE_AVX512VBMI, X86_FEATURE_AVX512F },
{ X86_FEATURE_AVX512_VBMI2, X86_FEATURE_AVX512VL },
- { X86_FEATURE_GFNI, X86_FEATURE_AVX512VL },
- { X86_FEATURE_VAES, X86_FEATURE_AVX512VL },
- { X86_FEATURE_VPCLMULQDQ, X86_FEATURE_AVX512VL },
{ X86_FEATURE_AVX512_VNNI, X86_FEATURE_AVX512VL },
{ X86_FEATURE_AVX512_BITALG, X86_FEATURE_AVX512VL },
{ X86_FEATURE_AVX512_4VNNIW, X86_FEATURE_AVX512F },
return -EINVAL;
b = &per_cpu(mce_banks_array, s->id)[bank];
-
if (!b->init)
return -ENODEV;
b->ctl = new;
+
+ mutex_lock(&mce_sysfs_mutex);
mce_restart();
+ mutex_unlock(&mce_sysfs_mutex);
return size;
}
(boot_cpu_data.x86 >= 0x0f)))
return;
- if (cpu_feature_enabled(X86_FEATURE_SEV_SNP))
+ if (cc_platform_has(CC_ATTR_HOST_SEV_SNP))
return;
rdmsr(MSR_AMD64_SYSCFG, lo, hi);
else
cpu = cpumask_any_but(mask, exclude_cpu);
- if (!IS_ENABLED(CONFIG_NO_HZ_FULL))
+ /* Only continue if tick_nohz_full_mask has been initialized. */
+ if (!tick_nohz_full_enabled())
return cpu;
/* If the CPU picked isn't marked nohz_full nothing more needs doing. */
{ X86_FEATURE_EPB, CPUID_ECX, 3, 0x00000006, 0 },
{ X86_FEATURE_INTEL_PPIN, CPUID_EBX, 0, 0x00000007, 1 },
{ X86_FEATURE_RRSBA_CTRL, CPUID_EDX, 2, 0x00000007, 2 },
+ { X86_FEATURE_BHI_CTRL, CPUID_EDX, 4, 0x00000007, 2 },
{ X86_FEATURE_CQM_LLC, CPUID_EDX, 1, 0x0000000f, 0 },
{ X86_FEATURE_CQM_OCCUP_LLC, CPUID_EDX, 0, 0x0000000f, 1 },
{ X86_FEATURE_CQM_MBM_TOTAL, CPUID_EDX, 1, 0x0000000f, 1 },
{ X86_FEATURE_BMEC, CPUID_EBX, 3, 0x80000020, 0 },
{ X86_FEATURE_PERFMON_V2, CPUID_EAX, 0, 0x80000022, 0 },
{ X86_FEATURE_AMD_LBR_V2, CPUID_EAX, 1, 0x80000022, 0 },
+ { X86_FEATURE_AMD_LBR_PMC_FREEZE, CPUID_EAX, 2, 0x80000022, 0 },
{ 0, 0, 0, 0, 0 }
};
early_per_cpu(x86_cpu_to_apicid, cpu) = apic_id;
early_per_cpu(x86_cpu_to_acpiid, cpu) = acpi_id;
#endif
- set_cpu_possible(cpu, true);
set_cpu_present(cpu, true);
}
topo_info.nr_disabled_cpus++;
}
- /* Register present and possible CPUs in the domain maps */
+ /*
+ * Register present and possible CPUs in the domain
+ * maps. cpu_possible_map will be updated in
+ * topology_init_possible_cpus() after enumeration is done.
+ */
for (dom = TOPO_SMT_DOMAIN; dom < TOPO_MAX_DOMAIN; dom++)
set_bit(topo_apicid(apic_id, dom), apic_maps[dom].map);
}
if (!sft)
sft = get_count_order(ecx.cpu_nthreads + 1);
- topology_set_dom(tscan, TOPO_SMT_DOMAIN, sft, ecx.cpu_nthreads + 1);
+ /*
+ * cpu_nthreads describes the number of threads in the package
+ * sft is the number of APIC ID bits per package
+ *
+ * As the number of actual threads per core is not described in
+ * this leaf, just set the CORE domain shift and let the later
+ * parsers set SMT shift. Assume one thread per core by default
+ * which is correct if there are no other CPUID leafs to parse.
+ */
+ topology_update_dom(tscan, TOPO_SMT_DOMAIN, 0, 1);
+ topology_set_dom(tscan, TOPO_CORE_DOMAIN, sft, ecx.cpu_nthreads + 1);
return true;
}
-static void store_node(struct topo_scan *tscan, unsigned int nr_nodes, u16 node_id)
+static void store_node(struct topo_scan *tscan, u16 nr_nodes, u16 node_id)
{
/*
* Starting with Fam 17h the DIE domain could probably be used to
tscan->c->topo.initial_apicid = leaf.ext_apic_id;
/*
- * If leaf 0xb is available, then SMT shift is set already. If not
- * take it from ecx.threads_per_core and use topo_update_dom() -
- * topology_set_dom() would propagate and overwrite the already
- * propagated CORE level.
+ * If leaf 0xb is available, then the domain shifts are set
+ * already and nothing to do here.
*/
if (!has_0xb) {
+ /*
+ * Leaf 0x80000008 set the CORE domain shift already.
+ * Update the SMT domain, but do not propagate it.
+ */
unsigned int nthreads = leaf.core_nthreads + 1;
topology_update_dom(tscan, TOPO_SMT_DOMAIN, get_count_order(nthreads), nthreads);
static bool parse_fam10h_node_id(struct topo_scan *tscan)
{
- struct {
- union {
+ union {
+ struct {
u64 node_id : 3,
nodes_per_pkg : 3,
unused : 58;
- u64 msr;
};
+ u64 msr;
} nid;
if (!boot_cpu_has(X86_FEATURE_NODEID_MSR))
tscan->c->topo.llc_id = apicid >> tscan->dom_shifts[TOPO_CORE_DOMAIN];
}
+static void topoext_fixup(struct topo_scan *tscan)
+{
+ struct cpuinfo_x86 *c = tscan->c;
+ u64 msrval;
+
+ /* Try to re-enable TopologyExtensions if switched off by BIOS */
+ if (cpu_has(c, X86_FEATURE_TOPOEXT) || c->x86_vendor != X86_VENDOR_AMD ||
+ c->x86 != 0x15 || c->x86_model < 0x10 || c->x86_model > 0x6f)
+ return;
+
+ if (msr_set_bit(0xc0011005, 54) <= 0)
+ return;
+
+ rdmsrl(0xc0011005, msrval);
+ if (msrval & BIT_64(54)) {
+ set_cpu_cap(c, X86_FEATURE_TOPOEXT);
+ pr_info_once(FW_INFO "CPU: Re-enabling disabled Topology Extensions Support.\n");
+ }
+}
+
static void parse_topology_amd(struct topo_scan *tscan)
{
bool has_0xb = false;
void cpu_parse_topology_amd(struct topo_scan *tscan)
{
tscan->amd_nodes_per_pkg = 1;
+ topoext_fixup(tscan);
parse_topology_amd(tscan);
if (tscan->amd_nodes_per_pkg > 1)
/*
* EISA specific code
*/
+#include <linux/cc_platform.h>
#include <linux/ioport.h>
#include <linux/eisa.h>
#include <linux/io.h>
{
void __iomem *p;
- if (xen_pv_domain() && !xen_initial_domain())
+ if ((xen_pv_domain() && !xen_initial_domain()) || cc_platform_has(CC_ATTR_GUEST_SEV_SNP))
return 0;
p = ioremap(0x0FFFD9, 4);
early_param("no-steal-acc", parse_no_stealacc);
+static DEFINE_PER_CPU_READ_MOSTLY(bool, async_pf_enabled);
static DEFINE_PER_CPU_DECRYPTED(struct kvm_vcpu_pv_apf_data, apf_reason) __aligned(64);
DEFINE_PER_CPU_DECRYPTED(struct kvm_steal_time, steal_time) __aligned(64) __visible;
static int has_steal_clock = 0;
{
u32 flags = 0;
- if (__this_cpu_read(apf_reason.enabled)) {
+ if (__this_cpu_read(async_pf_enabled)) {
flags = __this_cpu_read(apf_reason.flags);
__this_cpu_write(apf_reason.flags, 0);
}
inc_irq_stat(irq_hv_callback_count);
- if (__this_cpu_read(apf_reason.enabled)) {
+ if (__this_cpu_read(async_pf_enabled)) {
token = __this_cpu_read(apf_reason.token);
kvm_async_pf_task_wake(token);
__this_cpu_write(apf_reason.token, 0);
wrmsrl(MSR_KVM_ASYNC_PF_INT, HYPERVISOR_CALLBACK_VECTOR);
wrmsrl(MSR_KVM_ASYNC_PF_EN, pa);
- __this_cpu_write(apf_reason.enabled, 1);
+ __this_cpu_write(async_pf_enabled, true);
pr_debug("setup async PF for cpu %d\n", smp_processor_id());
}
static void kvm_pv_disable_apf(void)
{
- if (!__this_cpu_read(apf_reason.enabled))
+ if (!__this_cpu_read(async_pf_enabled))
return;
wrmsrl(MSR_KVM_ASYNC_PF_EN, 0);
- __this_cpu_write(apf_reason.enabled, 0);
+ __this_cpu_write(async_pf_enabled, false);
pr_debug("disable async PF for cpu %d\n", smp_processor_id());
}
static char *nmi_check_stall_msg[] = {
/* */
-/* +--------- nsp->idt_seq_snap & 0x1: CPU is in NMI handler. */
+/* +--------- nmi_seq & 0x1: CPU is currently in NMI handler. */
/* | +------ cpu_is_offline(cpu) */
/* | | +--- nsp->idt_calls_snap != atomic_long_read(&nsp->idt_calls): */
/* | | | NMI handler has been invoked. */
nmi_seq = READ_ONCE(nsp->idt_nmi_seq);
if (nsp->idt_nmi_seq_snap + 1 == nmi_seq && (nmi_seq & 0x1)) {
msgp = "CPU entered NMI handler function, but has not exited";
- } else if ((nsp->idt_nmi_seq_snap & 0x1) != (nmi_seq & 0x1)) {
- msgp = "CPU is handling NMIs";
- } else {
- idx = ((nsp->idt_seq_snap & 0x1) << 2) |
+ } else if (nsp->idt_nmi_seq_snap == nmi_seq ||
+ nsp->idt_nmi_seq_snap + 1 == nmi_seq) {
+ idx = ((nmi_seq & 0x1) << 2) |
(cpu_is_offline(cpu) << 1) |
(nsp->idt_calls_snap != atomic_long_read(&nsp->idt_calls));
msgp = nmi_check_stall_msg[idx];
if (nsp->idt_ignored_snap != READ_ONCE(nsp->idt_ignored) && (idx & 0x1))
modp = ", but OK because ignore_nmis was set";
- if (nmi_seq & 0x1)
- msghp = " (CPU currently in NMI handler function)";
- else if (nsp->idt_nmi_seq_snap + 1 == nmi_seq)
+ if (nsp->idt_nmi_seq_snap + 1 == nmi_seq)
msghp = " (CPU exited one NMI handler function)";
+ else if (nmi_seq & 0x1)
+ msghp = " (CPU currently in NMI handler function)";
+ else
+ msghp = " (CPU was never in an NMI handler function)";
+ } else {
+ msgp = "CPU is handling NMIs";
}
- pr_alert("%s: CPU %d: %s%s%s, last activity: %lu jiffies ago.\n",
- __func__, cpu, msgp, modp, msghp, j - READ_ONCE(nsp->recv_jiffies));
+ pr_alert("%s: CPU %d: %s%s%s\n", __func__, cpu, msgp, modp, msghp);
+ pr_alert("%s: last activity: %lu jiffies ago.\n",
+ __func__, j - READ_ONCE(nsp->recv_jiffies));
}
}
unsigned char c;
int i;
- /*
- * The ROM memory range is not part of the e820 table and is therefore not
- * pre-validated by BIOS. The kernel page table maps the ROM region as encrypted
- * memory, and SNP requires encrypted memory to be validated before access.
- * Do that here.
- */
- snp_prep_memory(video_rom_resource.start,
- ((system_rom_resource.end + 1) - video_rom_resource.start),
- SNP_PAGE_STATE_PRIVATE);
-
/* video rom */
upper = adapter_rom_resources[0].start;
for (start = video_rom_resource.start; start < upper; start += 2048) {
#include <linux/console.h>
#include <linux/crash_dump.h>
#include <linux/dma-map-ops.h>
-#include <linux/dmi.h>
#include <linux/efi.h>
#include <linux/ima.h>
#include <linux/init_ohci1394_dma.h>
#include <asm/bios_ebda.h>
#include <asm/bugs.h>
#include <asm/cacheinfo.h>
+#include <asm/coco.h>
#include <asm/cpu.h>
#include <asm/efi.h>
#include <asm/gart.h>
efi_init();
reserve_ibft_region();
- dmi_setup();
+ x86_init.resources.dmi_setup();
/*
* VMware detection requires dmi to be available, so this
* memory size.
*/
mem_encrypt_setup_arch();
+ cc_random_init();
efi_fake_memmap();
efi_find_mirror();
#include <linux/platform_device.h>
#include <linux/io.h>
#include <linux/psp-sev.h>
+#include <linux/dmi.h>
#include <uapi/linux/sev-guest.h>
#include <asm/init.h>
early_set_pages_state(vaddr, paddr, npages, SNP_PAGE_STATE_SHARED);
}
-void __init snp_prep_memory(unsigned long paddr, unsigned int sz, enum psc_op op)
-{
- unsigned long vaddr, npages;
-
- vaddr = (unsigned long)__va(paddr);
- npages = PAGE_ALIGN(sz) >> PAGE_SHIFT;
-
- if (op == SNP_PAGE_STATE_PRIVATE)
- early_snp_set_memory_private(vaddr, paddr, npages);
- else if (op == SNP_PAGE_STATE_SHARED)
- early_snp_set_memory_shared(vaddr, paddr, npages);
- else
- WARN(1, "invalid memory op %d\n", op);
-}
-
static unsigned long __set_pages_state(struct snp_psc_desc *data, unsigned long vaddr,
unsigned long vaddr_end, int op)
{
sev_es_terminate(SEV_TERM_SET_GEN, GHCB_SNP_UNSUPPORTED);
}
+/*
+ * SEV-SNP guests should only execute dmi_setup() if EFI_CONFIG_TABLES are
+ * enabled, as the alternative (fallback) logic for DMI probing in the legacy
+ * ROM region can cause a crash since this region is not pre-validated.
+ */
+void __init snp_dmi_setup(void)
+{
+ if (efi_enabled(EFI_CONFIG_TABLES))
+ dmi_setup();
+}
+
static void dump_cpuid_table(void)
{
const struct snp_cpuid_table *cpuid_table = snp_cpuid_get_table();
}
device_initcall(snp_init_platform_device);
-void kdump_sev_callback(void)
-{
- /*
- * Do wbinvd() on remote CPUs when SNP is enabled in order to
- * safely do SNP_SHUTDOWN on the local CPU.
- */
- if (cpu_feature_enabled(X86_FEATURE_SEV_SNP))
- wbinvd();
-}
-
void sev_show_status(void)
{
int i;
*
* For licencing details see kernel-base/COPYING
*/
+#include <linux/dmi.h>
#include <linux/init.h>
#include <linux/ioport.h>
#include <linux/export.h>
.probe_roms = probe_roms,
.reserve_resources = reserve_standard_io_resources,
.memory_setup = e820__memory_setup_default,
+ .dmi_setup = dmi_setup,
},
.mpparse = {
default y
depends on KVM_AMD && X86_64
depends on CRYPTO_DEV_SP_PSP && !(KVM_AMD=y && CRYPTO_DEV_CCP_DD=m)
+ select ARCH_HAS_CC_PLATFORM
help
Provides support for launching Encrypted VMs (SEV) and Encrypted VMs
with Encrypted State (SEV-ES) on AMD processors.
ccflags-y += -I $(srctree)/arch/x86/kvm
ccflags-$(CONFIG_KVM_WERROR) += -Werror
-ifeq ($(CONFIG_FRAME_POINTER),y)
-OBJECT_FILES_NON_STANDARD_vmx/vmenter.o := y
-OBJECT_FILES_NON_STANDARD_svm/vmenter.o := y
-endif
-
include $(srctree)/virt/kvm/Makefile.kvm
kvm-y += x86.o emulate.o i8259.o irq.o lapic.o \
return 0;
}
-static struct kvm_hypervisor_cpuid kvm_get_hypervisor_cpuid(struct kvm_vcpu *vcpu,
- const char *sig)
+static struct kvm_hypervisor_cpuid __kvm_get_hypervisor_cpuid(struct kvm_cpuid_entry2 *entries,
+ int nent, const char *sig)
{
struct kvm_hypervisor_cpuid cpuid = {};
struct kvm_cpuid_entry2 *entry;
u32 base;
for_each_possible_hypervisor_cpuid_base(base) {
- entry = kvm_find_cpuid_entry(vcpu, base);
+ entry = cpuid_entry2_find(entries, nent, base, KVM_CPUID_INDEX_NOT_SIGNIFICANT);
if (entry) {
u32 signature[3];
return cpuid;
}
-static struct kvm_cpuid_entry2 *__kvm_find_kvm_cpuid_features(struct kvm_vcpu *vcpu,
- struct kvm_cpuid_entry2 *entries, int nent)
+static struct kvm_hypervisor_cpuid kvm_get_hypervisor_cpuid(struct kvm_vcpu *vcpu,
+ const char *sig)
{
- u32 base = vcpu->arch.kvm_cpuid.base;
-
- if (!base)
- return NULL;
+ return __kvm_get_hypervisor_cpuid(vcpu->arch.cpuid_entries,
+ vcpu->arch.cpuid_nent, sig);
+}
- return cpuid_entry2_find(entries, nent, base | KVM_CPUID_FEATURES,
+static struct kvm_cpuid_entry2 *__kvm_find_kvm_cpuid_features(struct kvm_cpuid_entry2 *entries,
+ int nent, u32 kvm_cpuid_base)
+{
+ return cpuid_entry2_find(entries, nent, kvm_cpuid_base | KVM_CPUID_FEATURES,
KVM_CPUID_INDEX_NOT_SIGNIFICANT);
}
static struct kvm_cpuid_entry2 *kvm_find_kvm_cpuid_features(struct kvm_vcpu *vcpu)
{
- return __kvm_find_kvm_cpuid_features(vcpu, vcpu->arch.cpuid_entries,
- vcpu->arch.cpuid_nent);
+ u32 base = vcpu->arch.kvm_cpuid.base;
+
+ if (!base)
+ return NULL;
+
+ return __kvm_find_kvm_cpuid_features(vcpu->arch.cpuid_entries,
+ vcpu->arch.cpuid_nent, base);
}
void kvm_update_pv_runtime(struct kvm_vcpu *vcpu)
int nent)
{
struct kvm_cpuid_entry2 *best;
+ struct kvm_hypervisor_cpuid kvm_cpuid;
best = cpuid_entry2_find(entries, nent, 1, KVM_CPUID_INDEX_NOT_SIGNIFICANT);
if (best) {
cpuid_entry_has(best, X86_FEATURE_XSAVEC)))
best->ebx = xstate_required_size(vcpu->arch.xcr0, true);
- best = __kvm_find_kvm_cpuid_features(vcpu, entries, nent);
- if (kvm_hlt_in_guest(vcpu->kvm) && best &&
- (best->eax & (1 << KVM_FEATURE_PV_UNHALT)))
- best->eax &= ~(1 << KVM_FEATURE_PV_UNHALT);
+ kvm_cpuid = __kvm_get_hypervisor_cpuid(entries, nent, KVM_SIGNATURE);
+ if (kvm_cpuid.base) {
+ best = __kvm_find_kvm_cpuid_features(entries, nent, kvm_cpuid.base);
+ if (kvm_hlt_in_guest(vcpu->kvm) && best)
+ best->eax &= ~(1 << KVM_FEATURE_PV_UNHALT);
+ }
if (!kvm_check_has_quirk(vcpu->kvm, KVM_X86_QUIRK_MISC_ENABLE_NO_MWAIT)) {
best = cpuid_entry2_find(entries, nent, 0x1, KVM_CPUID_INDEX_NOT_SIGNIFICANT);
kvm_update_pv_runtime(vcpu);
+ vcpu->arch.is_amd_compatible = guest_cpuid_is_amd_or_hygon(vcpu);
vcpu->arch.maxphyaddr = cpuid_query_maxphyaddr(vcpu);
vcpu->arch.reserved_gpa_bits = kvm_vcpu_reserved_gpa_bits_raw(vcpu);
return best && is_guest_vendor_intel(best->ebx, best->ecx, best->edx);
}
+static inline bool guest_cpuid_is_amd_compatible(struct kvm_vcpu *vcpu)
+{
+ return vcpu->arch.is_amd_compatible;
+}
+
+static inline bool guest_cpuid_is_intel_compatible(struct kvm_vcpu *vcpu)
+{
+ return !guest_cpuid_is_amd_compatible(vcpu);
+}
+
static inline int guest_cpuid_family(struct kvm_vcpu *vcpu)
{
struct kvm_cpuid_entry2 *best;
trig_mode = reg & APIC_LVT_LEVEL_TRIGGER;
r = __apic_accept_irq(apic, mode, vector, 1, trig_mode, NULL);
- if (r && lvt_type == APIC_LVTPC)
+ if (r && lvt_type == APIC_LVTPC &&
+ guest_cpuid_is_intel_compatible(apic->vcpu))
kvm_lapic_set_reg(apic, APIC_LVTPC, reg | APIC_LVT_MASKED);
return r;
}
context->cpu_role.base.level, is_efer_nx(context),
guest_can_use(vcpu, X86_FEATURE_GBPAGES),
is_cr4_pse(context),
- guest_cpuid_is_amd_or_hygon(vcpu));
+ guest_cpuid_is_amd_compatible(vcpu));
}
static void __reset_rsvds_bits_mask_ept(struct rsvd_bits_validate *rsvd_check,
* that problem is swept under the rug; KVM's CPUID API is horrific and
* it's all but impossible to solve it without introducing a new API.
*/
- vcpu->arch.root_mmu.root_role.word = 0;
- vcpu->arch.guest_mmu.root_role.word = 0;
- vcpu->arch.nested_mmu.root_role.word = 0;
+ vcpu->arch.root_mmu.root_role.invalid = 1;
+ vcpu->arch.guest_mmu.root_role.invalid = 1;
+ vcpu->arch.nested_mmu.root_role.invalid = 1;
vcpu->arch.root_mmu.cpu_role.ext.valid = 0;
vcpu->arch.guest_mmu.cpu_role.ext.valid = 0;
vcpu->arch.nested_mmu.cpu_role.ext.valid = 0;
* by the memslot, KVM can't use a hugepage due to the
* misaligned address regardless of memory attributes.
*/
- if (gfn >= slot->base_gfn) {
+ if (gfn >= slot->base_gfn &&
+ gfn + nr_pages <= slot->base_gfn + slot->npages) {
if (hugepage_has_attrs(kvm, slot, gfn, level, attrs))
hugepage_clear_mixed(slot, gfn, level);
else
}
}
-/*
- * Clear the dirty status of all the SPTEs mapping GFNs in the memslot. If
- * AD bits are enabled, this will involve clearing the dirty bit on each SPTE.
- * If AD bits are not enabled, this will require clearing the writable bit on
- * each SPTE. Returns true if an SPTE has been changed and the TLBs need to
- * be flushed.
- */
+static bool tdp_mmu_need_write_protect(struct kvm_mmu_page *sp)
+{
+ /*
+ * All TDP MMU shadow pages share the same role as their root, aside
+ * from level, so it is valid to key off any shadow page to determine if
+ * write protection is needed for an entire tree.
+ */
+ return kvm_mmu_page_ad_need_write_protect(sp) || !kvm_ad_enabled();
+}
+
static bool clear_dirty_gfn_range(struct kvm *kvm, struct kvm_mmu_page *root,
gfn_t start, gfn_t end)
{
- u64 dbit = kvm_ad_enabled() ? shadow_dirty_mask : PT_WRITABLE_MASK;
+ const u64 dbit = tdp_mmu_need_write_protect(root) ? PT_WRITABLE_MASK :
+ shadow_dirty_mask;
struct tdp_iter iter;
bool spte_set = false;
if (tdp_mmu_iter_cond_resched(kvm, &iter, false, true))
continue;
- KVM_MMU_WARN_ON(kvm_ad_enabled() &&
+ KVM_MMU_WARN_ON(dbit == shadow_dirty_mask &&
spte_ad_need_write_protect(iter.old_spte));
if (!(iter.old_spte & dbit))
}
/*
- * Clear the dirty status of all the SPTEs mapping GFNs in the memslot. If
- * AD bits are enabled, this will involve clearing the dirty bit on each SPTE.
- * If AD bits are not enabled, this will require clearing the writable bit on
- * each SPTE. Returns true if an SPTE has been changed and the TLBs need to
- * be flushed.
+ * Clear the dirty status (D-bit or W-bit) of all the SPTEs mapping GFNs in the
+ * memslot. Returns true if an SPTE has been changed and the TLBs need to be
+ * flushed.
*/
bool kvm_tdp_mmu_clear_dirty_slot(struct kvm *kvm,
const struct kvm_memory_slot *slot)
return spte_set;
}
-/*
- * Clears the dirty status of all the 4k SPTEs mapping GFNs for which a bit is
- * set in mask, starting at gfn. The given memslot is expected to contain all
- * the GFNs represented by set bits in the mask. If AD bits are enabled,
- * clearing the dirty status will involve clearing the dirty bit on each SPTE
- * or, if AD bits are not enabled, clearing the writable bit on each SPTE.
- */
static void clear_dirty_pt_masked(struct kvm *kvm, struct kvm_mmu_page *root,
gfn_t gfn, unsigned long mask, bool wrprot)
{
- u64 dbit = (wrprot || !kvm_ad_enabled()) ? PT_WRITABLE_MASK :
- shadow_dirty_mask;
+ const u64 dbit = (wrprot || tdp_mmu_need_write_protect(root)) ? PT_WRITABLE_MASK :
+ shadow_dirty_mask;
struct tdp_iter iter;
lockdep_assert_held_write(&kvm->mmu_lock);
if (!mask)
break;
- KVM_MMU_WARN_ON(kvm_ad_enabled() &&
+ KVM_MMU_WARN_ON(dbit == shadow_dirty_mask &&
spte_ad_need_write_protect(iter.old_spte));
if (iter.level > PG_LEVEL_4K ||
}
/*
- * Clears the dirty status of all the 4k SPTEs mapping GFNs for which a bit is
- * set in mask, starting at gfn. The given memslot is expected to contain all
- * the GFNs represented by set bits in the mask. If AD bits are enabled,
- * clearing the dirty status will involve clearing the dirty bit on each SPTE
- * or, if AD bits are not enabled, clearing the writable bit on each SPTE.
+ * Clear the dirty status (D-bit or W-bit) of all the 4k SPTEs mapping GFNs for
+ * which a bit is set in mask, starting at gfn. The given memslot is expected to
+ * contain all the GFNs represented by set bits in the mask.
*/
void kvm_tdp_mmu_clear_dirty_pt_masked(struct kvm *kvm,
struct kvm_memory_slot *slot,
pmu->pebs_data_cfg_mask = ~0ull;
bitmap_zero(pmu->all_valid_pmc_idx, X86_PMC_IDX_MAX);
- if (vcpu->kvm->arch.enable_pmu)
- static_call(kvm_x86_pmu_refresh)(vcpu);
+ if (!vcpu->kvm->arch.enable_pmu)
+ return;
+
+ static_call(kvm_x86_pmu_refresh)(vcpu);
+
+ /*
+ * At RESET, both Intel and AMD CPUs set all enable bits for general
+ * purpose counters in IA32_PERF_GLOBAL_CTRL (so that software that
+ * was written for v1 PMUs don't unknowingly leave GP counters disabled
+ * in the global controls). Emulate that behavior when refreshing the
+ * PMU so that userspace doesn't need to manually set PERF_GLOBAL_CTRL.
+ */
+ if (kvm_pmu_has_perf_global_ctrl(pmu) && pmu->nr_arch_gp_counters)
+ pmu->global_ctrl = GENMASK_ULL(pmu->nr_arch_gp_counters - 1, 0);
}
void kvm_pmu_init(struct kvm_vcpu *vcpu)
#define X86_FEATURE_IPRED_CTRL KVM_X86_FEATURE(CPUID_7_2_EDX, 1)
#define KVM_X86_FEATURE_RRSBA_CTRL KVM_X86_FEATURE(CPUID_7_2_EDX, 2)
#define X86_FEATURE_DDPD_U KVM_X86_FEATURE(CPUID_7_2_EDX, 3)
-#define X86_FEATURE_BHI_CTRL KVM_X86_FEATURE(CPUID_7_2_EDX, 4)
+#define KVM_X86_FEATURE_BHI_CTRL KVM_X86_FEATURE(CPUID_7_2_EDX, 4)
#define X86_FEATURE_MCDT_NO KVM_X86_FEATURE(CPUID_7_2_EDX, 5)
/* CPUID level 0x80000007 (EDX). */
*/
static __always_inline void reverse_cpuid_check(unsigned int x86_leaf)
{
+ BUILD_BUG_ON(NR_CPUID_WORDS != NCAPINTS);
BUILD_BUG_ON(x86_leaf == CPUID_LNX_1);
BUILD_BUG_ON(x86_leaf == CPUID_LNX_2);
BUILD_BUG_ON(x86_leaf == CPUID_LNX_3);
BUILD_BUG_ON(x86_leaf == CPUID_LNX_4);
+ BUILD_BUG_ON(x86_leaf == CPUID_LNX_5);
BUILD_BUG_ON(x86_leaf >= ARRAY_SIZE(reverse_cpuid));
BUILD_BUG_ON(reverse_cpuid[x86_leaf].function == 0);
}
KVM_X86_TRANSLATE_FEATURE(CONSTANT_TSC);
KVM_X86_TRANSLATE_FEATURE(PERFMON_V2);
KVM_X86_TRANSLATE_FEATURE(RRSBA_CTRL);
+ KVM_X86_TRANSLATE_FEATURE(BHI_CTRL);
default:
return x86_feature;
}
};
/* Called with the sev_bitmap_lock held, or on shutdown */
-static int sev_flush_asids(int min_asid, int max_asid)
+static int sev_flush_asids(unsigned int min_asid, unsigned int max_asid)
{
- int ret, asid, error = 0;
+ int ret, error = 0;
+ unsigned int asid;
/* Check if there are any ASIDs to reclaim before performing a flush */
asid = find_next_bit(sev_reclaim_asid_bitmap, nr_asids, min_asid);
}
/* Must be called with the sev_bitmap_lock held */
-static bool __sev_recycle_asids(int min_asid, int max_asid)
+static bool __sev_recycle_asids(unsigned int min_asid, unsigned int max_asid)
{
if (sev_flush_asids(min_asid, max_asid))
return false;
static int sev_asid_new(struct kvm_sev_info *sev)
{
- int asid, min_asid, max_asid, ret;
+ /*
+ * SEV-enabled guests must use asid from min_sev_asid to max_sev_asid.
+ * SEV-ES-enabled guest can use from 1 to min_sev_asid - 1.
+ * Note: min ASID can end up larger than the max if basic SEV support is
+ * effectively disabled by disallowing use of ASIDs for SEV guests.
+ */
+ unsigned int min_asid = sev->es_active ? 1 : min_sev_asid;
+ unsigned int max_asid = sev->es_active ? min_sev_asid - 1 : max_sev_asid;
+ unsigned int asid;
bool retry = true;
+ int ret;
+
+ if (min_asid > max_asid)
+ return -ENOTTY;
WARN_ON(sev->misc_cg);
sev->misc_cg = get_current_misc_cg();
mutex_lock(&sev_bitmap_lock);
- /*
- * SEV-enabled guests must use asid from min_sev_asid to max_sev_asid.
- * SEV-ES-enabled guest can use from 1 to min_sev_asid - 1.
- */
- min_asid = sev->es_active ? 1 : min_sev_asid;
- max_asid = sev->es_active ? min_sev_asid - 1 : max_sev_asid;
again:
asid = find_next_zero_bit(sev_asid_bitmap, max_asid + 1, min_asid);
if (asid > max_asid) {
mutex_unlock(&sev_bitmap_lock);
- return asid;
+ sev->asid = asid;
+ return 0;
e_uncharge:
sev_misc_cg_uncharge(sev);
put_misc_cg(sev->misc_cg);
return ret;
}
-static int sev_get_asid(struct kvm *kvm)
+static unsigned int sev_get_asid(struct kvm *kvm)
{
struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info;
{
struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info;
struct sev_platform_init_args init_args = {0};
- int asid, ret;
+ int ret;
if (kvm->created_vcpus)
return -EINVAL;
- ret = -EBUSY;
if (unlikely(sev->active))
- return ret;
+ return -EINVAL;
sev->active = true;
sev->es_active = argp->id == KVM_SEV_ES_INIT;
- asid = sev_asid_new(sev);
- if (asid < 0)
+ ret = sev_asid_new(sev);
+ if (ret)
goto e_no_asid;
- sev->asid = asid;
init_args.probe = false;
ret = sev_platform_init(&init_args);
static int sev_bind_asid(struct kvm *kvm, unsigned int handle, int *error)
{
+ unsigned int asid = sev_get_asid(kvm);
struct sev_data_activate activate;
- int asid = sev_get_asid(kvm);
int ret;
/* activate ASID on the given handle */
/* Avoid using vmalloc for smaller buffers. */
size = npages * sizeof(struct page *);
if (size > PAGE_SIZE)
- pages = __vmalloc(size, GFP_KERNEL_ACCOUNT | __GFP_ZERO);
+ pages = __vmalloc(size, GFP_KERNEL_ACCOUNT);
else
pages = kmalloc(size, GFP_KERNEL_ACCOUNT);
goto out;
}
- sev_asid_count = max_sev_asid - min_sev_asid + 1;
- WARN_ON_ONCE(misc_cg_set_capacity(MISC_CG_RES_SEV, sev_asid_count));
+ if (min_sev_asid <= max_sev_asid) {
+ sev_asid_count = max_sev_asid - min_sev_asid + 1;
+ WARN_ON_ONCE(misc_cg_set_capacity(MISC_CG_RES_SEV, sev_asid_count));
+ }
sev_supported = true;
/* SEV-ES support requested? */
out:
if (boot_cpu_has(X86_FEATURE_SEV))
pr_info("SEV %s (ASIDs %u - %u)\n",
- sev_supported ? "enabled" : "disabled",
+ sev_supported ? min_sev_asid <= max_sev_asid ? "enabled" :
+ "unusable" :
+ "disabled",
min_sev_asid, max_sev_asid);
if (boot_cpu_has(X86_FEATURE_SEV_ES))
pr_info("SEV-ES %s (ASIDs %u - %u)\n",
*/
static void sev_flush_encrypted_page(struct kvm_vcpu *vcpu, void *va)
{
- int asid = to_kvm_svm(vcpu->kvm)->sev_info.asid;
+ unsigned int asid = sev_get_asid(vcpu->kvm);
/*
* Note! The address must be a kernel address, as regular page walk
void pre_sev_run(struct vcpu_svm *svm, int cpu)
{
struct svm_cpu_data *sd = per_cpu_ptr(&svm_data, cpu);
- int asid = sev_get_asid(svm->vcpu.kvm);
+ unsigned int asid = sev_get_asid(svm->vcpu.kvm);
/* Assign the asid allocated with this SEV guest */
svm->asid = asid;
unsigned long pfn;
struct page *p;
- if (!cpu_feature_enabled(X86_FEATURE_SEV_SNP))
+ if (!cc_platform_has(CC_ATTR_HOST_SEV_SNP))
return alloc_page(GFP_KERNEL_ACCOUNT | __GFP_ZERO);
/*
__free_pages(virt_to_page(svm->msrpm), get_order(MSRPM_SIZE));
}
+static struct sev_es_save_area *sev_es_host_save_area(struct svm_cpu_data *sd)
+{
+ return page_address(sd->save_area) + 0x400;
+}
+
static void svm_prepare_switch_to_guest(struct kvm_vcpu *vcpu)
{
struct vcpu_svm *svm = to_svm(vcpu);
* or subsequent vmload of host save area.
*/
vmsave(sd->save_area_pa);
- if (sev_es_guest(vcpu->kvm)) {
- struct sev_es_save_area *hostsa;
- hostsa = (struct sev_es_save_area *)(page_address(sd->save_area) + 0x400);
-
- sev_es_prepare_switch_to_guest(hostsa);
- }
+ if (sev_es_guest(vcpu->kvm))
+ sev_es_prepare_switch_to_guest(sev_es_host_save_area(sd));
if (tsc_scaling)
__svm_write_tsc_multiplier(vcpu->arch.tsc_scaling_ratio);
static noinstr void svm_vcpu_enter_exit(struct kvm_vcpu *vcpu, bool spec_ctrl_intercepted)
{
+ struct svm_cpu_data *sd = per_cpu_ptr(&svm_data, vcpu->cpu);
struct vcpu_svm *svm = to_svm(vcpu);
guest_state_enter_irqoff();
amd_clear_divider();
if (sev_es_guest(vcpu->kvm))
- __svm_sev_es_vcpu_run(svm, spec_ctrl_intercepted);
+ __svm_sev_es_vcpu_run(svm, spec_ctrl_intercepted,
+ sev_es_host_save_area(sd));
else
__svm_vcpu_run(svm, spec_ctrl_intercepted);
/* vmenter.S */
-void __svm_sev_es_vcpu_run(struct vcpu_svm *svm, bool spec_ctrl_intercepted);
+void __svm_sev_es_vcpu_run(struct vcpu_svm *svm, bool spec_ctrl_intercepted,
+ struct sev_es_save_area *hostsa);
void __svm_vcpu_run(struct vcpu_svm *svm, bool spec_ctrl_intercepted);
#define DEFINE_KVM_GHCB_ACCESSORS(field) \
#include <asm/asm.h>
#include <asm/asm-offsets.h>
#include <asm/bitsperlong.h>
+#include <asm/frame.h>
#include <asm/kvm_vcpu_regs.h>
#include <asm/nospec-branch.h>
#include "kvm-asm-offsets.h"
"", X86_FEATURE_V_SPEC_CTRL
901:
.endm
-.macro RESTORE_HOST_SPEC_CTRL_BODY
+.macro RESTORE_HOST_SPEC_CTRL_BODY spec_ctrl_intercepted:req
900:
/* Same for after vmexit. */
mov $MSR_IA32_SPEC_CTRL, %ecx
* Load the value that the guest had written into MSR_IA32_SPEC_CTRL,
* if it was not intercepted during guest execution.
*/
- cmpb $0, (%_ASM_SP)
+ cmpb $0, \spec_ctrl_intercepted
jnz 998f
rdmsr
movl %eax, SVM_spec_ctrl(%_ASM_DI)
*/
SYM_FUNC_START(__svm_vcpu_run)
push %_ASM_BP
+ mov %_ASM_SP, %_ASM_BP
#ifdef CONFIG_X86_64
push %r15
push %r14
RET
RESTORE_GUEST_SPEC_CTRL_BODY
- RESTORE_HOST_SPEC_CTRL_BODY
+ RESTORE_HOST_SPEC_CTRL_BODY (%_ASM_SP)
10: cmpb $0, _ASM_RIP(kvm_rebooting)
jne 2b
SYM_FUNC_END(__svm_vcpu_run)
+#ifdef CONFIG_KVM_AMD_SEV
+
+
+#ifdef CONFIG_X86_64
+#define SEV_ES_GPRS_BASE 0x300
+#define SEV_ES_RBX (SEV_ES_GPRS_BASE + __VCPU_REGS_RBX * WORD_SIZE)
+#define SEV_ES_RBP (SEV_ES_GPRS_BASE + __VCPU_REGS_RBP * WORD_SIZE)
+#define SEV_ES_RSI (SEV_ES_GPRS_BASE + __VCPU_REGS_RSI * WORD_SIZE)
+#define SEV_ES_RDI (SEV_ES_GPRS_BASE + __VCPU_REGS_RDI * WORD_SIZE)
+#define SEV_ES_R12 (SEV_ES_GPRS_BASE + __VCPU_REGS_R12 * WORD_SIZE)
+#define SEV_ES_R13 (SEV_ES_GPRS_BASE + __VCPU_REGS_R13 * WORD_SIZE)
+#define SEV_ES_R14 (SEV_ES_GPRS_BASE + __VCPU_REGS_R14 * WORD_SIZE)
+#define SEV_ES_R15 (SEV_ES_GPRS_BASE + __VCPU_REGS_R15 * WORD_SIZE)
+#endif
+
/**
* __svm_sev_es_vcpu_run - Run a SEV-ES vCPU via a transition to SVM guest mode
* @svm: struct vcpu_svm *
* @spec_ctrl_intercepted: bool
*/
SYM_FUNC_START(__svm_sev_es_vcpu_run)
- push %_ASM_BP
-#ifdef CONFIG_X86_64
- push %r15
- push %r14
- push %r13
- push %r12
-#else
- push %edi
- push %esi
-#endif
- push %_ASM_BX
+ FRAME_BEGIN
/*
- * Save variables needed after vmexit on the stack, in inverse
- * order compared to when they are needed.
+ * Save non-volatile (callee-saved) registers to the host save area.
+ * Except for RAX and RSP, all GPRs are restored on #VMEXIT, but not
+ * saved on VMRUN.
*/
+ mov %rbp, SEV_ES_RBP (%rdx)
+ mov %r15, SEV_ES_R15 (%rdx)
+ mov %r14, SEV_ES_R14 (%rdx)
+ mov %r13, SEV_ES_R13 (%rdx)
+ mov %r12, SEV_ES_R12 (%rdx)
+ mov %rbx, SEV_ES_RBX (%rdx)
- /* Accessed directly from the stack in RESTORE_HOST_SPEC_CTRL. */
- push %_ASM_ARG2
-
- /* Save @svm. */
- push %_ASM_ARG1
-
-.ifnc _ASM_ARG1, _ASM_DI
/*
- * Stash @svm in RDI early. On 32-bit, arguments are in RAX, RCX
- * and RDX which are clobbered by RESTORE_GUEST_SPEC_CTRL.
+ * Save volatile registers that hold arguments that are needed after
+ * #VMEXIT (RDI=@svm and RSI=@spec_ctrl_intercepted).
*/
- mov %_ASM_ARG1, %_ASM_DI
-.endif
+ mov %rdi, SEV_ES_RDI (%rdx)
+ mov %rsi, SEV_ES_RSI (%rdx)
- /* Clobbers RAX, RCX, RDX. */
+ /* Clobbers RAX, RCX, RDX (@hostsa). */
RESTORE_GUEST_SPEC_CTRL
/* Get svm->current_vmcb->pa into RAX. */
- mov SVM_current_vmcb(%_ASM_DI), %_ASM_AX
- mov KVM_VMCB_pa(%_ASM_AX), %_ASM_AX
+ mov SVM_current_vmcb(%rdi), %rax
+ mov KVM_VMCB_pa(%rax), %rax
/* Enter guest mode */
sti
-1: vmrun %_ASM_AX
+1: vmrun %rax
2: cli
- /* Pop @svm to RDI, guest registers have been saved already. */
- pop %_ASM_DI
-
#ifdef CONFIG_MITIGATION_RETPOLINE
/* IMPORTANT: Stuff the RSB immediately after VM-Exit, before RET! */
- FILL_RETURN_BUFFER %_ASM_AX, RSB_CLEAR_LOOPS, X86_FEATURE_RETPOLINE
+ FILL_RETURN_BUFFER %rax, RSB_CLEAR_LOOPS, X86_FEATURE_RETPOLINE
#endif
- /* Clobbers RAX, RCX, RDX. */
+ /* Clobbers RAX, RCX, RDX, consumes RDI (@svm) and RSI (@spec_ctrl_intercepted). */
RESTORE_HOST_SPEC_CTRL
/*
*/
UNTRAIN_RET_VM
- /* "Pop" @spec_ctrl_intercepted. */
- pop %_ASM_BX
-
- pop %_ASM_BX
-
-#ifdef CONFIG_X86_64
- pop %r12
- pop %r13
- pop %r14
- pop %r15
-#else
- pop %esi
- pop %edi
-#endif
- pop %_ASM_BP
+ FRAME_END
RET
RESTORE_GUEST_SPEC_CTRL_BODY
- RESTORE_HOST_SPEC_CTRL_BODY
+ RESTORE_HOST_SPEC_CTRL_BODY %sil
-3: cmpb $0, _ASM_RIP(kvm_rebooting)
+3: cmpb $0, kvm_rebooting(%rip)
jne 2b
ud2
_ASM_EXTABLE(1b, 3b)
SYM_FUNC_END(__svm_sev_es_vcpu_run)
+#endif /* CONFIG_KVM_AMD_SEV */
* Tracepoint for nested #vmexit because of interrupt pending
*/
TRACE_EVENT(kvm_invlpga,
- TP_PROTO(__u64 rip, int asid, u64 address),
+ TP_PROTO(__u64 rip, unsigned int asid, u64 address),
TP_ARGS(rip, asid, address),
TP_STRUCT__entry(
- __field( __u64, rip )
- __field( int, asid )
- __field( __u64, address )
+ __field( __u64, rip )
+ __field( unsigned int, asid )
+ __field( __u64, address )
),
TP_fast_assign(
__entry->address = address;
),
- TP_printk("rip: 0x%016llx asid: %d address: 0x%016llx",
+ TP_printk("rip: 0x%016llx asid: %u address: 0x%016llx",
__entry->rip, __entry->asid, __entry->address)
);
perf_capabilities = vcpu_get_perf_capabilities(vcpu);
if (cpuid_model_is_consistent(vcpu) &&
(perf_capabilities & PMU_CAP_LBR_FMT))
- x86_perf_get_lbr(&lbr_desc->records);
+ memcpy(&lbr_desc->records, &vmx_lbr_caps, sizeof(vmx_lbr_caps));
else
lbr_desc->records.nr = 0;
call vmx_spec_ctrl_restore_host
+ CLEAR_BRANCH_HISTORY_VMEXIT
+
/* Put return value in AX */
mov %_ASM_BX, %_ASM_AX
int __read_mostly pt_mode = PT_MODE_SYSTEM;
module_param(pt_mode, int, S_IRUGO);
+struct x86_pmu_lbr __ro_after_init vmx_lbr_caps;
+
static DEFINE_STATIC_KEY_FALSE(vmx_l1d_should_flush);
static DEFINE_STATIC_KEY_FALSE(vmx_l1d_flush_cond);
static DEFINE_MUTEX(vmx_l1d_flush_mutex);
vmx_update_exception_bitmap(vcpu);
}
-static u64 vmx_get_perf_capabilities(void)
+static __init u64 vmx_get_perf_capabilities(void)
{
u64 perf_cap = PMU_CAP_FW_WRITES;
- struct x86_pmu_lbr lbr;
u64 host_perf_cap = 0;
if (!enable_pmu)
rdmsrl(MSR_IA32_PERF_CAPABILITIES, host_perf_cap);
if (!cpu_feature_enabled(X86_FEATURE_ARCH_LBR)) {
- x86_perf_get_lbr(&lbr);
- if (lbr.nr)
+ x86_perf_get_lbr(&vmx_lbr_caps);
+
+ /*
+ * KVM requires LBR callstack support, as the overhead due to
+ * context switching LBRs without said support is too high.
+ * See intel_pmu_create_guest_lbr_event() for more info.
+ */
+ if (!vmx_lbr_caps.has_callstack)
+ memset(&vmx_lbr_caps, 0, sizeof(vmx_lbr_caps));
+ else if (vmx_lbr_caps.nr)
perf_cap |= host_perf_cap & PMU_CAP_LBR_FMT;
}
if (vmx_pebs_supported()) {
perf_cap |= host_perf_cap & PERF_CAP_PEBS_MASK;
- if ((perf_cap & PERF_CAP_PEBS_FORMAT) < 4)
- perf_cap &= ~PERF_CAP_PEBS_BASELINE;
+
+ /*
+ * Disallow adaptive PEBS as it is functionally broken, can be
+ * used by the guest to read *host* LBRs, and can be used to
+ * bypass userspace event filters. To correctly and safely
+ * support adaptive PEBS, KVM needs to:
+ *
+ * 1. Account for the ADAPTIVE flag when (re)programming fixed
+ * counters.
+ *
+ * 2. Gain support from perf (or take direct control of counter
+ * programming) to support events without adaptive PEBS
+ * enabled for the hardware counter.
+ *
+ * 3. Ensure LBR MSRs cannot hold host data on VM-Entry with
+ * adaptive PEBS enabled and MSR_PEBS_DATA_CFG.LBRS=1.
+ *
+ * 4. Document which PMU events are effectively exposed to the
+ * guest via adaptive PEBS, and make adaptive PEBS mutually
+ * exclusive with KVM_SET_PMU_EVENT_FILTER if necessary.
+ */
+ perf_cap &= ~PERF_CAP_PEBS_BASELINE;
}
return perf_cap;
#include "vmx_ops.h"
#include "../cpuid.h"
#include "run_flags.h"
+#include "../mmu.h"
#define MSR_TYPE_R 1
#define MSR_TYPE_W 2
bool msr_passthrough;
};
+extern struct x86_pmu_lbr vmx_lbr_caps;
+
/*
* The nested_vmx structure is part of vcpu_vmx, and holds information we need
* for correct emulation of VMX (i.e., nested VMX) on this vcpu.
if (!enable_ept)
return true;
- return allow_smaller_maxphyaddr && cpuid_maxphyaddr(vcpu) < boot_cpu_data.x86_phys_bits;
+ return allow_smaller_maxphyaddr &&
+ cpuid_maxphyaddr(vcpu) < kvm_get_shadow_phys_bits();
}
static inline bool is_unrestricted_guest(struct kvm_vcpu *vcpu)
ARCH_CAP_PSCHANGE_MC_NO | ARCH_CAP_TSX_CTRL_MSR | ARCH_CAP_TAA_NO | \
ARCH_CAP_SBDR_SSDP_NO | ARCH_CAP_FBSDP_NO | ARCH_CAP_PSDP_NO | \
ARCH_CAP_FB_CLEAR | ARCH_CAP_RRSBA | ARCH_CAP_PBRSB_NO | ARCH_CAP_GDS_NO | \
- ARCH_CAP_RFDS_NO | ARCH_CAP_RFDS_CLEAR)
+ ARCH_CAP_RFDS_NO | ARCH_CAP_RFDS_CLEAR | ARCH_CAP_BHI_NO)
static u64 kvm_get_arch_capabilities(void)
{
static bool can_set_mci_status(struct kvm_vcpu *vcpu)
{
/* McStatusWrEn enabled? */
- if (guest_cpuid_is_amd_or_hygon(vcpu))
+ if (guest_cpuid_is_amd_compatible(vcpu))
return !!(vcpu->arch.msr_hwcr & BIT_ULL(18));
return false;
lfence
jmp srso_alias_return_thunk
SYM_FUNC_END(srso_alias_untrain_ret)
+__EXPORT_THUNK(srso_alias_untrain_ret)
.popsection
.pushsection .text..__x86.rethunk_safe
SYM_CODE_END(srso_return_thunk)
#define JMP_SRSO_UNTRAIN_RET "jmp srso_untrain_ret"
-#define JMP_SRSO_ALIAS_UNTRAIN_RET "jmp srso_alias_untrain_ret"
#else /* !CONFIG_MITIGATION_SRSO */
+/* Dummy for the alternative in CALL_UNTRAIN_RET. */
+SYM_CODE_START(srso_alias_untrain_ret)
+ ANNOTATE_UNRET_SAFE
+ ANNOTATE_NOENDBR
+ ret
+ int3
+SYM_FUNC_END(srso_alias_untrain_ret)
+__EXPORT_THUNK(srso_alias_untrain_ret)
#define JMP_SRSO_UNTRAIN_RET "ud2"
-#define JMP_SRSO_ALIAS_UNTRAIN_RET "ud2"
#endif /* CONFIG_MITIGATION_SRSO */
#ifdef CONFIG_MITIGATION_UNRET_ENTRY
#if defined(CONFIG_MITIGATION_UNRET_ENTRY) || defined(CONFIG_MITIGATION_SRSO)
SYM_FUNC_START(entry_untrain_ret)
- ALTERNATIVE_2 JMP_RETBLEED_UNTRAIN_RET, \
- JMP_SRSO_UNTRAIN_RET, X86_FEATURE_SRSO, \
- JMP_SRSO_ALIAS_UNTRAIN_RET, X86_FEATURE_SRSO_ALIAS
+ ALTERNATIVE JMP_RETBLEED_UNTRAIN_RET, JMP_SRSO_UNTRAIN_RET, X86_FEATURE_SRSO
SYM_FUNC_END(entry_untrain_ret)
__EXPORT_THUNK(entry_untrain_ret)
SYM_CODE_START(__x86_return_thunk)
UNWIND_HINT_FUNC
ANNOTATE_NOENDBR
+#if defined(CONFIG_MITIGATION_UNRET_ENTRY) || \
+ defined(CONFIG_MITIGATION_SRSO) || \
+ defined(CONFIG_MITIGATION_CALL_DEPTH_TRACKING)
ALTERNATIVE __stringify(ANNOTATE_UNRET_SAFE; ret), \
"jmp warn_thunk_thunk", X86_FEATURE_ALWAYS
+#else
+ ANNOTATE_UNRET_SAFE
+ ret
+#endif
int3
SYM_CODE_END(__x86_return_thunk)
EXPORT_SYMBOL(__x86_return_thunk)
for (; addr < end; addr = next) {
pud_t *pud = pud_page + pud_index(addr);
pmd_t *pmd;
- bool use_gbpage;
next = (addr & PUD_MASK) + PUD_SIZE;
if (next > end)
next = end;
- /* if this is already a gbpage, this portion is already mapped */
- if (pud_leaf(*pud))
- continue;
-
- /* Is using a gbpage allowed? */
- use_gbpage = info->direct_gbpages;
-
- /* Don't use gbpage if it maps more than the requested region. */
- /* at the begining: */
- use_gbpage &= ((addr & ~PUD_MASK) == 0);
- /* ... or at the end: */
- use_gbpage &= ((next & ~PUD_MASK) == 0);
-
- /* Never overwrite existing mappings */
- use_gbpage &= !pud_present(*pud);
-
- if (use_gbpage) {
+ if (info->direct_gbpages) {
pud_t pudval;
+ if (pud_present(*pud))
+ continue;
+
+ addr &= PUD_MASK;
pudval = __pud((addr - info->offset) | info->page_flag);
set_pud(pud, pudval);
continue;
*/
if (sev_status & MSR_AMD64_SEV_ENABLED)
ia32_disable();
+
+ /*
+ * Override init functions that scan the ROM region in SEV-SNP guests,
+ * as this memory is not pre-validated and would thus cause a crash.
+ */
+ if (sev_status & MSR_AMD64_SEV_SNP_ENABLED) {
+ x86_init.mpparse.find_mptable = x86_init_noop;
+ x86_init.pci.init_irq = x86_init_noop;
+ x86_init.resources.probe_roms = x86_init_noop;
+
+ /*
+ * DMI setup behavior for SEV-SNP guests depends on
+ * efi_enabled(EFI_CONFIG_TABLES), which hasn't been
+ * parsed yet. snp_dmi_setup() will run after that
+ * parsing has happened.
+ */
+ x86_init.resources.dmi_setup = snp_dmi_setup;
+ }
}
void __init mem_encrypt_free_decrypted_mem(void)
#include <linux/memblock.h>
#include <linux/init.h>
+#include <asm/pgtable_areas.h>
#include "numa_internal.h"
memtype_free(paddr, paddr + size);
}
+static int get_pat_info(struct vm_area_struct *vma, resource_size_t *paddr,
+ pgprot_t *pgprot)
+{
+ unsigned long prot;
+
+ VM_WARN_ON_ONCE(!(vma->vm_flags & VM_PAT));
+
+ /*
+ * We need the starting PFN and cachemode used for track_pfn_remap()
+ * that covered the whole VMA. For most mappings, we can obtain that
+ * information from the page tables. For COW mappings, we might now
+ * suddenly have anon folios mapped and follow_phys() will fail.
+ *
+ * Fallback to using vma->vm_pgoff, see remap_pfn_range_notrack(), to
+ * detect the PFN. If we need the cachemode as well, we're out of luck
+ * for now and have to fail fork().
+ */
+ if (!follow_phys(vma, vma->vm_start, 0, &prot, paddr)) {
+ if (pgprot)
+ *pgprot = __pgprot(prot);
+ return 0;
+ }
+ if (is_cow_mapping(vma->vm_flags)) {
+ if (pgprot)
+ return -EINVAL;
+ *paddr = (resource_size_t)vma->vm_pgoff << PAGE_SHIFT;
+ return 0;
+ }
+ WARN_ON_ONCE(1);
+ return -EINVAL;
+}
+
/*
* track_pfn_copy is called when vma that is covering the pfnmap gets
* copied through copy_page_range().
int track_pfn_copy(struct vm_area_struct *vma)
{
resource_size_t paddr;
- unsigned long prot;
unsigned long vma_size = vma->vm_end - vma->vm_start;
pgprot_t pgprot;
if (vma->vm_flags & VM_PAT) {
- /*
- * reserve the whole chunk covered by vma. We need the
- * starting address and protection from pte.
- */
- if (follow_phys(vma, vma->vm_start, 0, &prot, &paddr)) {
- WARN_ON_ONCE(1);
+ if (get_pat_info(vma, &paddr, &pgprot))
return -EINVAL;
- }
- pgprot = __pgprot(prot);
+ /* reserve the whole chunk covered by vma. */
return reserve_pfn_range(paddr, vma_size, &pgprot, 1);
}
unsigned long size, bool mm_wr_locked)
{
resource_size_t paddr;
- unsigned long prot;
if (vma && !(vma->vm_flags & VM_PAT))
return;
/* free the chunk starting from pfn or the whole chunk */
paddr = (resource_size_t)pfn << PAGE_SHIFT;
if (!paddr && !size) {
- if (follow_phys(vma, vma->vm_start, 0, &prot, &paddr)) {
- WARN_ON_ONCE(1);
+ if (get_pat_info(vma, &paddr, NULL))
return;
- }
-
size = vma->vm_end - vma->vm_start;
}
free_pfn_range(paddr, size);
static int emit_rsb_call(u8 **pprog, void *func, void *ip)
{
OPTIMIZER_HIDE_VAR(func);
- x86_call_depth_emit_accounting(pprog, func);
+ ip += x86_call_depth_emit_accounting(pprog, func, ip);
return emit_patch(pprog, func, ip, 0xE8);
}
/* call */
case BPF_JMP | BPF_CALL: {
- int offs;
+ u8 *ip = image + addrs[i - 1];
func = (u8 *) __bpf_call_base + imm32;
if (tail_call_reachable) {
RESTORE_TAIL_CALL_CNT(bpf_prog->aux->stack_depth);
- if (!imm32)
- return -EINVAL;
- offs = 7 + x86_call_depth_emit_accounting(&prog, func);
- } else {
- if (!imm32)
- return -EINVAL;
- offs = x86_call_depth_emit_accounting(&prog, func);
+ ip += 7;
}
- if (emit_call(&prog, func, image + addrs[i - 1] + offs))
+ if (!imm32)
+ return -EINVAL;
+ ip += x86_call_depth_emit_accounting(&prog, func, ip);
+ if (emit_call(&prog, func, ip))
return -EINVAL;
break;
}
* Direct-call fentry stub, as such it needs accounting for the
* __fentry__ call.
*/
- x86_call_depth_emit_accounting(&prog, NULL);
+ x86_call_depth_emit_accounting(&prog, NULL, image);
}
EMIT1(0x55); /* push rbp */
EMIT3(0x48, 0x89, 0xE5); /* mov rbp, rsp */
# SPDX-License-Identifier: GPL-2.0-only
-obj-y += vmx/
+obj-y += svm/ vmx/
{
u64 val;
- if (!cpu_feature_enabled(X86_FEATURE_SEV_SNP))
+ if (!cc_platform_has(CC_ATTR_HOST_SEV_SNP))
return 0;
rdmsrl(MSR_AMD64_SYSCFG, val);
{
u64 val;
- if (!cpu_feature_enabled(X86_FEATURE_SEV_SNP))
+ if (!cc_platform_has(CC_ATTR_HOST_SEV_SNP))
return 0;
rdmsrl(MSR_AMD64_SYSCFG, val);
u64 rmptable_size;
u64 val;
- if (!cpu_feature_enabled(X86_FEATURE_SEV_SNP))
+ if (!cc_platform_has(CC_ATTR_HOST_SEV_SNP))
return 0;
if (!amd_iommu_snp_en)
- return 0;
+ goto nosnp;
if (!probed_rmp_size)
goto nosnp;
return 0;
nosnp:
- setup_clear_cpu_cap(X86_FEATURE_SEV_SNP);
+ cc_platform_clear(CC_ATTR_HOST_SEV_SNP);
return -ENOSYS;
}
{
struct rmpentry *large_entry, *entry;
- if (!cpu_feature_enabled(X86_FEATURE_SEV_SNP))
+ if (!cc_platform_has(CC_ATTR_HOST_SEV_SNP))
return ERR_PTR(-ENODEV);
entry = get_rmpentry(pfn);
unsigned long paddr = pfn << PAGE_SHIFT;
int ret;
- if (!cpu_feature_enabled(X86_FEATURE_SEV_SNP))
+ if (!cc_platform_has(CC_ATTR_HOST_SEV_SNP))
return -ENODEV;
if (!pfn_valid(pfn))
unsigned long paddr = pfn << PAGE_SHIFT;
int ret, level;
- if (!cpu_feature_enabled(X86_FEATURE_SEV_SNP))
+ if (!cc_platform_has(CC_ATTR_HOST_SEV_SNP))
return -ENODEV;
level = RMP_TO_PG_LEVEL(state->pagesize);
spin_unlock(&snp_leaked_pages_list_lock);
}
EXPORT_SYMBOL_GPL(snp_leak_pages);
+
+void kdump_sev_callback(void)
+{
+ /*
+ * Do wbinvd() on remote CPUs when SNP is enabled in order to
+ * safely do SNP_SHUTDOWN on the local CPU.
+ */
+ if (cc_platform_has(CC_ATTR_HOST_SEV_SNP))
+ wbinvd();
+}
mutex_unlock(&bdev->bd_holder_lock);
bd_clear_claiming(whole, holder);
mutex_unlock(&bdev_lock);
-
- if (hops && hops->get_holder)
- hops->get_holder(holder);
}
/**
static void bd_end_claim(struct block_device *bdev, void *holder)
{
struct block_device *whole = bdev_whole(bdev);
- const struct blk_holder_ops *hops = bdev->bd_holder_ops;
bool unblock = false;
/*
whole->bd_holder = NULL;
mutex_unlock(&bdev_lock);
- if (hops && hops->put_holder)
- hops->put_holder(holder);
-
/*
* If this was the last claim, remove holder link and unblock evpoll if
* it was a write holder.
bdev_write_inode(bdev);
}
+static void blkdev_put_whole(struct block_device *bdev)
+{
+ if (atomic_dec_and_test(&bdev->bd_openers))
+ blkdev_flush_mapping(bdev);
+ if (bdev->bd_disk->fops->release)
+ bdev->bd_disk->fops->release(bdev->bd_disk);
+}
+
static int blkdev_get_whole(struct block_device *bdev, blk_mode_t mode)
{
struct gendisk *disk = bdev->bd_disk;
if (!atomic_read(&bdev->bd_openers))
set_init_blocksize(bdev);
- if (test_bit(GD_NEED_PART_SCAN, &disk->state))
- bdev_disk_changed(disk, false);
atomic_inc(&bdev->bd_openers);
+ if (test_bit(GD_NEED_PART_SCAN, &disk->state)) {
+ /*
+ * Only return scanning errors if we are called from contexts
+ * that explicitly want them, e.g. the BLKRRPART ioctl.
+ */
+ ret = bdev_disk_changed(disk, false);
+ if (ret && (mode & BLK_OPEN_STRICT_SCAN)) {
+ blkdev_put_whole(bdev);
+ return ret;
+ }
+ }
return 0;
}
-static void blkdev_put_whole(struct block_device *bdev)
-{
- if (atomic_dec_and_test(&bdev->bd_openers))
- blkdev_flush_mapping(bdev);
- if (bdev->bd_disk->fops->release)
- bdev->bd_disk->fops->release(bdev->bd_disk);
-}
-
static int blkdev_get_part(struct block_device *part, blk_mode_t mode)
{
struct gendisk *disk = part->bd_disk;
static bool bdev_writes_blocked(struct block_device *bdev)
{
- return bdev->bd_writers == -1;
+ return bdev->bd_writers < 0;
}
static void bdev_block_writes(struct block_device *bdev)
{
- bdev->bd_writers = -1;
+ bdev->bd_writers--;
}
static void bdev_unblock_writes(struct block_device *bdev)
{
- bdev->bd_writers = 0;
+ bdev->bd_writers++;
}
static bool bdev_may_open(struct block_device *bdev, blk_mode_t mode)
bdev->bd_writers++;
}
+static inline bool bdev_unclaimed(const struct file *bdev_file)
+{
+ return bdev_file->private_data == BDEV_I(bdev_file->f_mapping->host);
+}
+
static void bdev_yield_write_access(struct file *bdev_file)
{
struct block_device *bdev;
if (bdev_allow_write_mounted)
return;
+ if (bdev_unclaimed(bdev_file))
+ return;
+
bdev = file_bdev(bdev_file);
- /* Yield exclusive or shared write access. */
- if (bdev_file->f_mode & FMODE_WRITE) {
- if (bdev_writes_blocked(bdev))
- bdev_unblock_writes(bdev);
- else
- bdev->bd_writers--;
- }
+
+ if (bdev_file->f_mode & FMODE_WRITE_RESTRICTED)
+ bdev_unblock_writes(bdev);
+ else if (bdev_file->f_mode & FMODE_WRITE)
+ bdev->bd_writers--;
}
/**
goto abort_claiming;
ret = -EBUSY;
if (!bdev_may_open(bdev, mode))
- goto abort_claiming;
+ goto put_module;
if (bdev_is_partition(bdev))
ret = blkdev_get_part(bdev, mode);
else
bdev_file->f_mode |= FMODE_BUF_RASYNC | FMODE_CAN_ODIRECT;
if (bdev_nowait(bdev))
bdev_file->f_mode |= FMODE_NOWAIT;
+ if (mode & BLK_OPEN_RESTRICT_WRITES)
+ bdev_file->f_mode |= FMODE_WRITE_RESTRICTED;
bdev_file->f_mapping = bdev->bd_inode->i_mapping;
bdev_file->f_wb_err = filemap_sample_wb_err(bdev_file->f_mapping);
bdev_file->private_data = holder;
}
EXPORT_SYMBOL(bdev_file_open_by_path);
+static inline void bd_yield_claim(struct file *bdev_file)
+{
+ struct block_device *bdev = file_bdev(bdev_file);
+ void *holder = bdev_file->private_data;
+
+ lockdep_assert_held(&bdev->bd_disk->open_mutex);
+
+ if (WARN_ON_ONCE(IS_ERR_OR_NULL(holder)))
+ return;
+
+ if (!bdev_unclaimed(bdev_file))
+ bd_end_claim(bdev, holder);
+}
+
void bdev_release(struct file *bdev_file)
{
struct block_device *bdev = file_bdev(bdev_file);
bdev_yield_write_access(bdev_file);
if (holder)
- bd_end_claim(bdev, holder);
+ bd_yield_claim(bdev_file);
/*
* Trigger event checking and tell drivers to flush MEDIA_CHANGE
blkdev_put_no_open(bdev);
}
+/**
+ * bdev_fput - yield claim to the block device and put the file
+ * @bdev_file: open block device
+ *
+ * Yield claim on the block device and put the file. Ensure that the
+ * block device can be reclaimed before the file is closed which is a
+ * deferred operation.
+ */
+void bdev_fput(struct file *bdev_file)
+{
+ if (WARN_ON_ONCE(bdev_file->f_op != &def_blk_fops))
+ return;
+
+ if (bdev_file->private_data) {
+ struct block_device *bdev = file_bdev(bdev_file);
+ struct gendisk *disk = bdev->bd_disk;
+
+ mutex_lock(&disk->open_mutex);
+ bdev_yield_write_access(bdev_file);
+ bd_yield_claim(bdev_file);
+ /*
+ * Tell release we already gave up our hold on the
+ * device and if write restrictions are available that
+ * we already gave up write access to the device.
+ */
+ bdev_file->private_data = BDEV_I(bdev_file->f_mapping->host);
+ mutex_unlock(&disk->open_mutex);
+ }
+
+ fput(bdev_file);
+}
+EXPORT_SYMBOL(bdev_fput);
+
/**
* lookup_bdev() - Look up a struct block_device by name.
* @pathname: Name of the block device in the filesystem.
return 0;
}
+void blkg_init_queue(struct request_queue *q)
+{
+ INIT_LIST_HEAD(&q->blkg_list);
+ mutex_init(&q->blkcg_mutex);
+}
+
int blkcg_init_disk(struct gendisk *disk)
{
struct request_queue *q = disk->queue;
bool preloaded;
int ret;
- INIT_LIST_HEAD(&q->blkg_list);
- mutex_init(&q->blkcg_mutex);
-
new_blkg = blkg_alloc(&blkcg_root, disk, GFP_KERNEL);
if (!new_blkg)
return -ENOMEM;
extern struct blkcg blkcg_root;
extern bool blkcg_debug_stats;
+void blkg_init_queue(struct request_queue *q);
int blkcg_init_disk(struct gendisk *disk);
void blkcg_exit_disk(struct gendisk *disk);
};
static inline struct blkcg_gq *blkg_lookup(struct blkcg *blkcg, void *key) { return NULL; }
+static inline void blkg_init_queue(struct request_queue *q) { }
static inline int blkcg_init_disk(struct gendisk *disk) { return 0; }
static inline void blkcg_exit_disk(struct gendisk *disk) { }
static inline int blkcg_policy_register(struct blkcg_policy *pol) { return 0; }
init_waitqueue_head(&q->mq_freeze_wq);
mutex_init(&q->mq_freeze_lock);
+ blkg_init_queue(q);
+
/*
* Init percpu_ref in atomic mode so that it's faster to shutdown.
* See blk_register_queue() for details.
if (unlikely(!rq_list_empty(plug->cached_rq)))
blk_mq_free_plug_rqs(plug);
+ plug->cur_ktime = 0;
current->flags &= ~PF_BLOCK_TS;
}
{
struct ioc *ioc = iocg->ioc;
struct blkcg_gq *blkg = iocg_to_blkg(iocg);
- u64 tdelta, delay, new_delay;
+ u64 tdelta, delay, new_delay, shift;
s64 vover, vover_pct;
u32 hwa;
/* calculate the current delay in effect - 1/2 every second */
tdelta = now->now - iocg->delay_at;
- if (iocg->delay)
- delay = iocg->delay >> div64_u64(tdelta, USEC_PER_SEC);
+ shift = div64_u64(tdelta, USEC_PER_SEC);
+ if (iocg->delay && shift < BITS_PER_LONG)
+ delay = iocg->delay >> shift;
else
delay = 0;
lockdep_assert_held(&iocg->ioc->lock);
lockdep_assert_held(&iocg->waitq.lock);
- /* make sure that nobody messed with @iocg */
- WARN_ON_ONCE(list_empty(&iocg->active_list));
+ /*
+ * make sure that nobody messed with @iocg. Check iocg->pd.online
+ * to avoid warn when removing blkcg or disk.
+ */
+ WARN_ON_ONCE(list_empty(&iocg->active_list) && iocg->pd.online);
WARN_ON_ONCE(iocg->inuse > 1);
iocg->abs_vdebt -= min(abs_vpay, iocg->abs_vdebt);
* which can be mixed are set in each bio and mark @rq as mixed
* merged.
*/
-void blk_rq_set_mixed_merge(struct request *rq)
+static void blk_rq_set_mixed_merge(struct request *rq)
{
blk_opf_t ff = rq->cmd_flags & REQ_FAILFAST_MASK;
struct bio *bio;
/*
* Partial zone append completions cannot be supported as the
* BIO fragments may end up not being written sequentially.
- * For such case, force the completed nbytes to be equal to
- * the BIO size so that bio_advance() sets the BIO remaining
- * size to 0 and we end up calling bio_endio() before returning.
*/
- if (bio->bi_iter.bi_size != nbytes) {
+ if (bio->bi_iter.bi_size != nbytes)
bio->bi_status = BLK_STS_IOERR;
- nbytes = bio->bi_iter.bi_size;
- } else {
+ else
bio->bi_iter.bi_sector = rq->__sector;
- }
}
bio_advance(bio, nbytes);
max_hw_sectors = min_not_zero(lim->max_hw_sectors,
lim->max_dev_sectors);
if (lim->max_user_sectors) {
- if (lim->max_user_sectors > max_hw_sectors ||
- lim->max_user_sectors < PAGE_SIZE / SECTOR_SIZE)
+ if (lim->max_user_sectors < PAGE_SIZE / SECTOR_SIZE)
return -EINVAL;
lim->max_sectors = min(max_hw_sectors, lim->max_user_sectors);
} else {
return -EINVAL;
/*
- * Devices that require a virtual boundary do not support scatter/gather
- * I/O natively, but instead require a descriptor list entry for each
- * page (which might not be identical to the Linux PAGE_SIZE). Because
- * of that they are not limited by our notion of "segment size".
+ * Stacking device may have both virtual boundary and max segment
+ * size limit, so allow this setting now, and long-term the two
+ * might need to move out of stacking limits since we have immutable
+ * bvec and lower layer bio splitting is supposed to handle the two
+ * correctly.
*/
- if (lim->virt_boundary_mask) {
- if (WARN_ON_ONCE(lim->max_segment_size &&
- lim->max_segment_size != UINT_MAX))
- return -EINVAL;
- lim->max_segment_size = UINT_MAX;
- } else {
+ if (!lim->virt_boundary_mask) {
/*
* The maximum segment size has an odd historic 64k default that
* drivers probably should override. Just like the I/O size we
bool blk_attempt_req_merge(struct request_queue *q, struct request *rq,
struct request *next);
unsigned int blk_recalc_rq_segments(struct request *rq);
-void blk_rq_set_mixed_merge(struct request *rq);
bool blk_rq_merge_ok(struct request *rq, struct bio *bio);
enum elv_merge blk_try_merge(struct request *rq, struct bio *bio);
unsigned long arg)
{
uint64_t range[2];
- uint64_t start, len;
+ uint64_t start, len, end;
struct inode *inode = bdev->bd_inode;
int err;
if (len & 511)
return -EINVAL;
- if (start + len > bdev_nr_bytes(bdev))
+ if (check_add_overflow(start, len, &end) ||
+ end > bdev_nr_bytes(bdev))
return -EINVAL;
filemap_invalidate_lock(inode->i_mapping);
return -EACCES;
if (bdev_is_partition(bdev))
return -EINVAL;
- return disk_scan_partitions(bdev->bd_disk, mode);
+ return disk_scan_partitions(bdev->bd_disk,
+ mode | BLK_OPEN_STRICT_SCAN);
case BLKTRACESTART:
case BLKTRACESTOP:
case BLKTRACETEARDOWN:
oid = look_up_OID(value, vlen);
switch (oid) {
+ case OID_sha1:
+ ctx->digest_algo = "sha1";
+ break;
case OID_sha256:
ctx->digest_algo = "sha256";
break;
struct pkcs7_parse_context *ctx = context;
switch (ctx->last_oid) {
+ case OID_sha1:
+ ctx->sinfo->sig->hash_algo = "sha1";
+ break;
case OID_sha256:
ctx->sinfo->sig->hash_algo = "sha256";
break;
ctx->sinfo->sig->pkey_algo = "rsa";
ctx->sinfo->sig->encoding = "pkcs1";
break;
+ case OID_id_ecdsa_with_sha1:
case OID_id_ecdsa_with_sha224:
case OID_id_ecdsa_with_sha256:
case OID_id_ecdsa_with_sha384:
*/
if (!hash_algo)
return -EINVAL;
- if (strcmp(hash_algo, "sha224") != 0 &&
+ if (strcmp(hash_algo, "sha1") != 0 &&
+ strcmp(hash_algo, "sha224") != 0 &&
strcmp(hash_algo, "sha256") != 0 &&
strcmp(hash_algo, "sha384") != 0 &&
strcmp(hash_algo, "sha512") != 0 &&
* Sign the specified data blob using the private key specified by params->key.
* The signature is wrapped in an encoding if params->encoding is specified
* (eg. "pkcs1"). If the encoding needs to know the digest type, this can be
- * passed through params->hash_algo (eg. "sha512").
+ * passed through params->hash_algo (eg. "sha1").
*
* Returns the length of the data placed in the signature buffer or an error.
*/
default:
return -ENOPKG; /* Unsupported combination */
+ case OID_sha1WithRSAEncryption:
+ ctx->cert->sig->hash_algo = "sha1";
+ goto rsa_pkcs1;
+
case OID_sha256WithRSAEncryption:
ctx->cert->sig->hash_algo = "sha256";
goto rsa_pkcs1;
ctx->cert->sig->hash_algo = "sha224";
goto rsa_pkcs1;
+ case OID_id_ecdsa_with_sha1:
+ ctx->cert->sig->hash_algo = "sha1";
+ goto ecdsa;
+
case OID_id_rsassa_pkcs1_v1_5_with_sha3_256:
ctx->cert->sig->hash_algo = "sha3-256";
goto rsa_pkcs1;
static const struct akcipher_testvec ecdsa_nist_p192_tv_template[] = {
{
.key =
+ "\x04\xf7\x46\xf8\x2f\x15\xf6\x22\x8e\xd7\x57\x4f\xcc\xe7\xbb\xc1"
+ "\xd4\x09\x73\xcf\xea\xd0\x15\x07\x3d\xa5\x8a\x8a\x95\x43\xe4\x68"
+ "\xea\xc6\x25\xc1\xc1\x01\x25\x4c\x7e\xc3\x3c\xa6\x04\x0a\xe7\x08"
+ "\x98",
+ .key_len = 49,
+ .params =
+ "\x30\x13\x06\x07\x2a\x86\x48\xce\x3d\x02\x01\x06\x08\x2a\x86\x48"
+ "\xce\x3d\x03\x01\x01",
+ .param_len = 21,
+ .m =
+ "\xcd\xb9\xd2\x1c\xb7\x6f\xcd\x44\xb3\xfd\x63\xea\xa3\x66\x7f\xae"
+ "\x63\x85\xe7\x82",
+ .m_size = 20,
+ .algo = OID_id_ecdsa_with_sha1,
+ .c =
+ "\x30\x35\x02\x19\x00\xba\xe5\x93\x83\x6e\xb6\x3b\x63\xa0\x27\x91"
+ "\xc6\xf6\x7f\xc3\x09\xad\x59\xad\x88\x27\xd6\x92\x6b\x02\x18\x10"
+ "\x68\x01\x9d\xba\xce\x83\x08\xef\x95\x52\x7b\xa0\x0f\xe4\x18\x86"
+ "\x80\x6f\xa5\x79\x77\xda\xd0",
+ .c_size = 55,
+ .public_key_vec = true,
+ .siggen_sigver_test = true,
+ }, {
+ .key =
"\x04\xb6\x4b\xb1\xd1\xac\xba\x24\x8f\x65\xb2\x60\x00\x90\xbf\xbd"
"\x78\x05\x73\xe9\x79\x1d\x6f\x7c\x0b\xd2\xc3\x93\xa7\x28\xe1\x75"
"\xf7\xd5\x95\x1d\x28\x10\xc0\x75\x50\x5c\x1a\x4f\x3f\x8f\xa5\xee"
static const struct akcipher_testvec ecdsa_nist_p256_tv_template[] = {
{
.key =
+ "\x04\xb9\x7b\xbb\xd7\x17\x64\xd2\x7e\xfc\x81\x5d\x87\x06\x83\x41"
+ "\x22\xd6\x9a\xaa\x87\x17\xec\x4f\x63\x55\x2f\x94\xba\xdd\x83\xe9"
+ "\x34\x4b\xf3\xe9\x91\x13\x50\xb6\xcb\xca\x62\x08\xe7\x3b\x09\xdc"
+ "\xc3\x63\x4b\x2d\xb9\x73\x53\xe4\x45\xe6\x7c\xad\xe7\x6b\xb0\xe8"
+ "\xaf",
+ .key_len = 65,
+ .params =
+ "\x30\x13\x06\x07\x2a\x86\x48\xce\x3d\x02\x01\x06\x08\x2a\x86\x48"
+ "\xce\x3d\x03\x01\x07",
+ .param_len = 21,
+ .m =
+ "\xc2\x2b\x5f\x91\x78\x34\x26\x09\x42\x8d\x6f\x51\xb2\xc5\xaf\x4c"
+ "\x0b\xde\x6a\x42",
+ .m_size = 20,
+ .algo = OID_id_ecdsa_with_sha1,
+ .c =
+ "\x30\x46\x02\x21\x00\xf9\x25\xce\x9f\x3a\xa6\x35\x81\xcf\xd4\xe7"
+ "\xb7\xf0\x82\x56\x41\xf7\xd4\xad\x8d\x94\x5a\x69\x89\xee\xca\x6a"
+ "\x52\x0e\x48\x4d\xcc\x02\x21\x00\xd7\xe4\xef\x52\x66\xd3\x5b\x9d"
+ "\x8a\xfa\x54\x93\x29\xa7\x70\x86\xf1\x03\x03\xf3\x3b\xe2\x73\xf7"
+ "\xfb\x9d\x8b\xde\xd4\x8d\x6f\xad",
+ .c_size = 72,
+ .public_key_vec = true,
+ .siggen_sigver_test = true,
+ }, {
+ .key =
"\x04\x8b\x6d\xc0\x33\x8e\x2d\x8b\x67\xf5\xeb\xc4\x7f\xa0\xf5\xd9"
"\x7b\x03\xa5\x78\x9a\xb5\xea\x14\xe4\x23\xd0\xaf\xd7\x0e\x2e\xa0"
"\xc9\x8b\xdb\x95\xf8\xb3\xaf\xac\x00\x2c\x2c\x1f\x7a\xfd\x95\x88"
static const struct akcipher_testvec ecdsa_nist_p384_tv_template[] = {
{
+ .key = /* secp384r1(sha1) */
+ "\x04\x89\x25\xf3\x97\x88\xcb\xb0\x78\xc5\x72\x9a\x14\x6e\x7a\xb1"
+ "\x5a\xa5\x24\xf1\x95\x06\x9e\x28\xfb\xc4\xb9\xbe\x5a\x0d\xd9\x9f"
+ "\xf3\xd1\x4d\x2d\x07\x99\xbd\xda\xa7\x66\xec\xbb\xea\xba\x79\x42"
+ "\xc9\x34\x89\x6a\xe7\x0b\xc3\xf2\xfe\x32\x30\xbe\xba\xf9\xdf\x7e"
+ "\x4b\x6a\x07\x8e\x26\x66\x3f\x1d\xec\xa2\x57\x91\x51\xdd\x17\x0e"
+ "\x0b\x25\xd6\x80\x5c\x3b\xe6\x1a\x98\x48\x91\x45\x7a\x73\xb0\xc3"
+ "\xf1",
+ .key_len = 97,
+ .params =
+ "\x30\x10\x06\x07\x2a\x86\x48\xce\x3d\x02\x01\x06\x05\x2b\x81\x04"
+ "\x00\x22",
+ .param_len = 18,
+ .m =
+ "\x12\x55\x28\xf0\x77\xd5\xb6\x21\x71\x32\x48\xcd\x28\xa8\x25\x22"
+ "\x3a\x69\xc1\x93",
+ .m_size = 20,
+ .algo = OID_id_ecdsa_with_sha1,
+ .c =
+ "\x30\x66\x02\x31\x00\xf5\x0f\x24\x4c\x07\x93\x6f\x21\x57\x55\x07"
+ "\x20\x43\x30\xde\xa0\x8d\x26\x8e\xae\x63\x3f\xbc\x20\x3a\xc6\xf1"
+ "\x32\x3c\xce\x70\x2b\x78\xf1\x4c\x26\xe6\x5b\x86\xcf\xec\x7c\x7e"
+ "\xd0\x87\xd7\xd7\x6e\x02\x31\x00\xcd\xbb\x7e\x81\x5d\x8f\x63\xc0"
+ "\x5f\x63\xb1\xbe\x5e\x4c\x0e\xa1\xdf\x28\x8c\x1b\xfa\xf9\x95\x88"
+ "\x74\xa0\x0f\xbf\xaf\xc3\x36\x76\x4a\xa1\x59\xf1\x1c\xa4\x58\x26"
+ "\x79\x12\x2a\xb7\xc5\x15\x92\xc5",
+ .c_size = 104,
+ .public_key_vec = true,
+ .siggen_sigver_test = true,
+ }, {
.key = /* secp384r1(sha224) */
"\x04\x69\x6c\xcf\x62\xee\xd0\x0d\xe5\xb5\x2f\x70\x54\xcf\x26\xa0"
"\xd9\x98\x8d\x92\x2a\xab\x9b\x11\xcb\x48\x18\xa1\xa9\x0d\xd5\x18"
// SPDX-License-Identifier: GPL-2.0-only
/*
- * Copyright (C) 2020-2023 Intel Corporation
+ * Copyright (C) 2020-2024 Intel Corporation
*/
#include <linux/firmware.h>
return 0;
}
-static int ivpu_get_core_clock_rate(struct ivpu_device *vdev, u64 *clk_rate)
-{
- int ret;
-
- ret = ivpu_rpm_get_if_active(vdev);
- if (ret < 0)
- return ret;
-
- *clk_rate = ret ? ivpu_hw_reg_pll_freq_get(vdev) : 0;
-
- if (ret)
- ivpu_rpm_put(vdev);
-
- return 0;
-}
-
static int ivpu_get_param_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
{
struct ivpu_file_priv *file_priv = file->driver_priv;
args->value = vdev->platform;
break;
case DRM_IVPU_PARAM_CORE_CLOCK_RATE:
- ret = ivpu_get_core_clock_rate(vdev, &args->value);
+ args->value = ivpu_hw_ratio_to_freq(vdev, vdev->hw->pll.max_ratio);
break;
case DRM_IVPU_PARAM_NUM_CONTEXTS:
args->value = ivpu_get_context_count(vdev);
{
int ret;
- ivpu_prepare_for_reset(vdev);
+ /* Save PCI state before powering down as it sometimes gets corrupted if NPU hangs */
+ pci_save_state(to_pci_dev(vdev->drm.dev));
ret = ivpu_hw_power_down(vdev);
if (ret)
ivpu_warn(vdev, "Failed to power down HW: %d\n", ret);
+ pci_set_power_state(to_pci_dev(vdev->drm.dev), PCI_D3hot);
+
return ret;
}
vdev->context_xa_limit.min = IVPU_USER_CONTEXT_MIN_SSID;
vdev->context_xa_limit.max = IVPU_USER_CONTEXT_MAX_SSID;
atomic64_set(&vdev->unique_id_counter, 0);
- xa_init_flags(&vdev->context_xa, XA_FLAGS_ALLOC);
+ xa_init_flags(&vdev->context_xa, XA_FLAGS_ALLOC | XA_FLAGS_LOCK_IRQ);
xa_init_flags(&vdev->submitted_jobs_xa, XA_FLAGS_ALLOC1);
xa_init_flags(&vdev->db_xa, XA_FLAGS_ALLOC1);
lockdep_set_class(&vdev->submitted_jobs_xa.xa_lock, &submitted_jobs_xa_lock_class_key);
/* Power up early so the rest of init code can access VPU registers */
ret = ivpu_hw_power_up(vdev);
if (ret)
- goto err_power_down;
+ goto err_shutdown;
ret = ivpu_mmu_global_context_init(vdev);
if (ret)
- goto err_power_down;
+ goto err_shutdown;
ret = ivpu_mmu_init(vdev);
if (ret)
ivpu_mmu_reserved_context_fini(vdev);
err_mmu_gctx_fini:
ivpu_mmu_global_context_fini(vdev);
-err_power_down:
- ivpu_hw_power_down(vdev);
- if (IVPU_WA(d3hot_after_power_off))
- pci_set_power_state(to_pci_dev(vdev->drm.dev), PCI_D3hot);
+err_shutdown:
+ ivpu_shutdown(vdev);
err_xa_destroy:
xa_destroy(&vdev->db_xa);
xa_destroy(&vdev->submitted_jobs_xa);
static void ivpu_dev_fini(struct ivpu_device *vdev)
{
ivpu_pm_disable(vdev);
+ ivpu_prepare_for_reset(vdev);
ivpu_shutdown(vdev);
- if (IVPU_WA(d3hot_after_power_off))
- pci_set_power_state(to_pci_dev(vdev->drm.dev), PCI_D3hot);
ivpu_jobs_abort_all(vdev);
ivpu_job_done_consumer_fini(vdev);
/* SPDX-License-Identifier: GPL-2.0-only */
/*
- * Copyright (C) 2020-2023 Intel Corporation
+ * Copyright (C) 2020-2024 Intel Corporation
*/
#ifndef __IVPU_DRV_H__
struct ivpu_wa_table {
bool punit_disabled;
bool clear_runtime_mem;
- bool d3hot_after_power_off;
bool interrupt_clear_with_0;
bool disable_clock_relinquish;
bool disable_d0i3_msg;
u32 (*profiling_freq_get)(struct ivpu_device *vdev);
void (*profiling_freq_drive)(struct ivpu_device *vdev, bool enable);
u32 (*reg_pll_freq_get)(struct ivpu_device *vdev);
+ u32 (*ratio_to_freq)(struct ivpu_device *vdev, u32 ratio);
u32 (*reg_telemetry_offset_get)(struct ivpu_device *vdev);
u32 (*reg_telemetry_size_get)(struct ivpu_device *vdev);
u32 (*reg_telemetry_enable_get)(struct ivpu_device *vdev);
return vdev->hw->ops->reg_pll_freq_get(vdev);
};
+static inline u32 ivpu_hw_ratio_to_freq(struct ivpu_device *vdev, u32 ratio)
+{
+ return vdev->hw->ops->ratio_to_freq(vdev, ratio);
+}
+
static inline u32 ivpu_hw_reg_telemetry_offset_get(struct ivpu_device *vdev)
{
return vdev->hw->ops->reg_telemetry_offset_get(vdev);
// SPDX-License-Identifier: GPL-2.0-only
/*
- * Copyright (C) 2020-2023 Intel Corporation
+ * Copyright (C) 2020-2024 Intel Corporation
*/
#include "ivpu_drv.h"
{
vdev->wa.punit_disabled = false;
vdev->wa.clear_runtime_mem = false;
- vdev->wa.d3hot_after_power_off = true;
REGB_WR32(VPU_37XX_BUTTRESS_INTERRUPT_STAT, BUTTRESS_ALL_IRQ_MASK);
if (REGB_RD32(VPU_37XX_BUTTRESS_INTERRUPT_STAT) == BUTTRESS_ALL_IRQ_MASK) {
IVPU_PRINT_WA(punit_disabled);
IVPU_PRINT_WA(clear_runtime_mem);
- IVPU_PRINT_WA(d3hot_after_power_off);
IVPU_PRINT_WA(interrupt_clear_with_0);
}
/* Profiling freq - is a debug feature. Unavailable on VPU 37XX. */
}
-static u32 ivpu_hw_37xx_pll_to_freq(u32 ratio, u32 config)
+static u32 ivpu_hw_37xx_ratio_to_freq(struct ivpu_device *vdev, u32 ratio)
{
u32 pll_clock = PLL_REF_CLK_FREQ * ratio;
u32 cpu_clock;
- if ((config & 0xff) == PLL_RATIO_4_3)
+ if ((vdev->hw->config & 0xff) == PLL_RATIO_4_3)
cpu_clock = pll_clock * 2 / 4;
else
cpu_clock = pll_clock * 2 / 5;
if (!ivpu_is_silicon(vdev))
return PLL_SIMULATION_FREQ;
- return ivpu_hw_37xx_pll_to_freq(pll_curr_ratio, vdev->hw->config);
+ return ivpu_hw_37xx_ratio_to_freq(vdev, pll_curr_ratio);
}
static u32 ivpu_hw_37xx_reg_telemetry_offset_get(struct ivpu_device *vdev)
.profiling_freq_get = ivpu_hw_37xx_profiling_freq_get,
.profiling_freq_drive = ivpu_hw_37xx_profiling_freq_drive,
.reg_pll_freq_get = ivpu_hw_37xx_reg_pll_freq_get,
+ .ratio_to_freq = ivpu_hw_37xx_ratio_to_freq,
.reg_telemetry_offset_get = ivpu_hw_37xx_reg_telemetry_offset_get,
.reg_telemetry_size_get = ivpu_hw_37xx_reg_telemetry_size_get,
.reg_telemetry_enable_get = ivpu_hw_37xx_reg_telemetry_enable_get,
return PLL_RATIO_TO_FREQ(pll_curr_ratio);
}
+static u32 ivpu_hw_40xx_ratio_to_freq(struct ivpu_device *vdev, u32 ratio)
+{
+ return PLL_RATIO_TO_FREQ(ratio);
+}
+
static u32 ivpu_hw_40xx_reg_telemetry_offset_get(struct ivpu_device *vdev)
{
return REGB_RD32(VPU_40XX_BUTTRESS_VPU_TELEMETRY_OFFSET);
.profiling_freq_get = ivpu_hw_40xx_profiling_freq_get,
.profiling_freq_drive = ivpu_hw_40xx_profiling_freq_drive,
.reg_pll_freq_get = ivpu_hw_40xx_reg_pll_freq_get,
+ .ratio_to_freq = ivpu_hw_40xx_ratio_to_freq,
.reg_telemetry_offset_get = ivpu_hw_40xx_reg_telemetry_offset_get,
.reg_telemetry_size_get = ivpu_hw_40xx_reg_telemetry_size_get,
.reg_telemetry_enable_get = ivpu_hw_40xx_reg_telemetry_enable_get,
// SPDX-License-Identifier: GPL-2.0-only
/*
- * Copyright (C) 2020-2023 Intel Corporation
+ * Copyright (C) 2020-2024 Intel Corporation
*/
#include <linux/genalloc.h>
spin_lock_init(&ipc->cons_lock);
INIT_LIST_HEAD(&ipc->cons_list);
INIT_LIST_HEAD(&ipc->cb_msg_list);
- drmm_mutex_init(&vdev->drm, &ipc->lock);
+ ret = drmm_mutex_init(&vdev->drm, &ipc->lock);
+ if (ret) {
+ ivpu_err(vdev, "Failed to initialize ipc->lock, ret %d\n", ret);
+ goto err_free_rx;
+ }
ivpu_ipc_reset(vdev);
return 0;
case IVPU_MMU_EVT_F_VMS_FETCH:
return "Fetch of VMS caused external abort";
default:
- return "Unknown CMDQ command";
+ return "Unknown event";
}
}
{
switch (err) {
case IVPU_MMU_CERROR_NONE:
- return "No CMDQ Error";
+ return "No error";
case IVPU_MMU_CERROR_ILL:
return "Illegal command";
case IVPU_MMU_CERROR_ABT:
- return "External abort on CMDQ read";
+ return "External abort on command queue read";
case IVPU_MMU_CERROR_ATC_INV_SYNC:
return "Sync failed to complete ATS invalidation";
default:
- return "Unknown CMDQ Error";
+ return "Unknown error";
}
}
// SPDX-License-Identifier: GPL-2.0-only
/*
- * Copyright (C) 2020-2023 Intel Corporation
+ * Copyright (C) 2020-2024 Intel Corporation
*/
#include <linux/highmem.h>
{
int ret;
- /* Save PCI state before powering down as it sometimes gets corrupted if NPU hangs */
- pci_save_state(to_pci_dev(vdev->drm.dev));
+ ivpu_prepare_for_reset(vdev);
ret = ivpu_shutdown(vdev);
if (ret)
- ivpu_err(vdev, "Failed to shutdown VPU: %d\n", ret);
-
- pci_set_power_state(to_pci_dev(vdev->drm.dev), PCI_D3hot);
+ ivpu_err(vdev, "Failed to shutdown NPU: %d\n", ret);
return ret;
}
{
int ret;
- pci_set_power_state(to_pci_dev(vdev->drm.dev), PCI_D0);
+retry:
pci_restore_state(to_pci_dev(vdev->drm.dev));
+ pci_set_power_state(to_pci_dev(vdev->drm.dev), PCI_D0);
-retry:
ret = ivpu_hw_power_up(vdev);
if (ret) {
ivpu_err(vdev, "Failed to power up HW: %d\n", ret);
ivpu_mmu_disable(vdev);
err_power_down:
ivpu_hw_power_down(vdev);
+ pci_set_power_state(to_pci_dev(vdev->drm.dev), PCI_D3hot);
if (!ivpu_fw_is_cold_boot(vdev)) {
ivpu_pm_prepare_cold_boot(vdev);
}
attr_ch = get_char(vc, (u_short *)tmp_pos, &spk_attr);
buf[cnt++] = attr_ch;
- while (tmpx < vc->vc_cols - 1) {
+ while (tmpx < vc->vc_cols - 1 && cnt < sizeof(buf) - 1) {
tmp_pos += 2;
tmpx++;
ch = get_char(vc, (u_short *)tmp_pos, &temp);
ACPI_FREE(buffer.pointer);
buffer.length = ACPI_ALLOCATE_LOCAL_BUFFER;
- acpi_evaluate_object(obj_handle, NULL, NULL, &buffer);
-
+ status = acpi_evaluate_object(obj_handle, NULL, NULL, &buffer);
+ if (ACPI_FAILURE(status)) {
+ acpi_os_printf("Could Not evaluate object %p\n",
+ obj_handle);
+ return (AE_OK);
+ }
/*
* Since this is a field unit, surround the output in braces
*/
return rc;
}
-static void __exit einj_remove(struct platform_device *pdev)
+static void einj_remove(struct platform_device *pdev)
{
struct apei_exec_context ctx;
#define GET_BIT_WIDTH(reg) ((reg)->access_width ? (8 << ((reg)->access_width - 1)) : (reg)->bit_width)
/* Shift and apply the mask for CPC reads/writes */
-#define MASK_VAL(reg, val) ((val) >> ((reg)->bit_offset & \
- GENMASK(((reg)->bit_width), 0)))
+#define MASK_VAL(reg, val) (((val) >> (reg)->bit_offset) & \
+ GENMASK(((reg)->bit_width) - 1, 0))
static ssize_t show_feedback_ctrs(struct kobject *kobj,
struct kobj_attribute *attr, char *buf)
}
*val = 0;
+ size = GET_BIT_WIDTH(reg);
if (reg->space_id == ACPI_ADR_SPACE_SYSTEM_IO) {
- u32 width = GET_BIT_WIDTH(reg);
u32 val_u32;
acpi_status status;
status = acpi_os_read_port((acpi_io_address)reg->address,
- &val_u32, width);
+ &val_u32, size);
if (ACPI_FAILURE(status)) {
pr_debug("Error: Failed to read SystemIO port %llx\n",
reg->address);
*val = val_u32;
return 0;
- } else if (reg->space_id == ACPI_ADR_SPACE_PLATFORM_COMM && pcc_ss_id >= 0)
+ } else if (reg->space_id == ACPI_ADR_SPACE_PLATFORM_COMM && pcc_ss_id >= 0) {
+ /*
+ * For registers in PCC space, the register size is determined
+ * by the bit width field; the access size is used to indicate
+ * the PCC subspace id.
+ */
+ size = reg->bit_width;
vaddr = GET_PCC_VADDR(reg->address, pcc_ss_id);
+ }
else if (reg->space_id == ACPI_ADR_SPACE_SYSTEM_MEMORY)
vaddr = reg_res->sys_mem_vaddr;
else if (reg->space_id == ACPI_ADR_SPACE_FIXED_HARDWARE)
return cpc_read_ffh(cpu, reg, val);
else
return acpi_os_read_memory((acpi_physical_address)reg->address,
- val, reg->bit_width);
-
- size = GET_BIT_WIDTH(reg);
+ val, size);
switch (size) {
case 8:
*val = readq_relaxed(vaddr);
break;
default:
- pr_debug("Error: Cannot read %u bit width from PCC for ss: %d\n",
- reg->bit_width, pcc_ss_id);
+ if (reg->space_id == ACPI_ADR_SPACE_SYSTEM_MEMORY) {
+ pr_debug("Error: Cannot read %u bit width from system memory: 0x%llx\n",
+ size, reg->address);
+ } else if (reg->space_id == ACPI_ADR_SPACE_PLATFORM_COMM) {
+ pr_debug("Error: Cannot read %u bit width from PCC for ss: %d\n",
+ size, pcc_ss_id);
+ }
return -EFAULT;
}
int pcc_ss_id = per_cpu(cpu_pcc_subspace_idx, cpu);
struct cpc_reg *reg = ®_res->cpc_entry.reg;
+ size = GET_BIT_WIDTH(reg);
+
if (reg->space_id == ACPI_ADR_SPACE_SYSTEM_IO) {
- u32 width = GET_BIT_WIDTH(reg);
acpi_status status;
status = acpi_os_write_port((acpi_io_address)reg->address,
- (u32)val, width);
+ (u32)val, size);
if (ACPI_FAILURE(status)) {
pr_debug("Error: Failed to write SystemIO port %llx\n",
reg->address);
}
return 0;
- } else if (reg->space_id == ACPI_ADR_SPACE_PLATFORM_COMM && pcc_ss_id >= 0)
+ } else if (reg->space_id == ACPI_ADR_SPACE_PLATFORM_COMM && pcc_ss_id >= 0) {
+ /*
+ * For registers in PCC space, the register size is determined
+ * by the bit width field; the access size is used to indicate
+ * the PCC subspace id.
+ */
+ size = reg->bit_width;
vaddr = GET_PCC_VADDR(reg->address, pcc_ss_id);
+ }
else if (reg->space_id == ACPI_ADR_SPACE_SYSTEM_MEMORY)
vaddr = reg_res->sys_mem_vaddr;
else if (reg->space_id == ACPI_ADR_SPACE_FIXED_HARDWARE)
return cpc_write_ffh(cpu, reg, val);
else
return acpi_os_write_memory((acpi_physical_address)reg->address,
- val, reg->bit_width);
-
- size = GET_BIT_WIDTH(reg);
+ val, size);
if (reg->space_id == ACPI_ADR_SPACE_SYSTEM_MEMORY)
val = MASK_VAL(reg, val);
writeq_relaxed(val, vaddr);
break;
default:
- pr_debug("Error: Cannot write %u bit width to PCC for ss: %d\n",
- reg->bit_width, pcc_ss_id);
+ if (reg->space_id == ACPI_ADR_SPACE_SYSTEM_MEMORY) {
+ pr_debug("Error: Cannot write %u bit width to system memory: 0x%llx\n",
+ size, reg->address);
+ } else if (reg->space_id == ACPI_ADR_SPACE_PLATFORM_COMM) {
+ pr_debug("Error: Cannot write %u bit width to PCC for ss: %d\n",
+ size, pcc_ss_id);
+ }
ret_val = -EFAULT;
break;
}
if (dep->honor_dep)
adev->flags.honor_deps = 1;
- adev->dep_unmet++;
+ if (!dep->met)
+ adev->dep_unmet++;
}
}
}
{
int result;
- tz->thermal_zone = thermal_zone_device_register_with_trips("acpitz",
- trip_table,
- trip_count,
- tz,
- &acpi_thermal_zone_ops,
- NULL,
- passive_delay,
- tz->polling_frequency * 100);
+ if (trip_count)
+ tz->thermal_zone = thermal_zone_device_register_with_trips(
+ "acpitz", trip_table, trip_count, tz,
+ &acpi_thermal_zone_ops, NULL, passive_delay,
+ tz->polling_frequency * 100);
+ else
+ tz->thermal_zone = thermal_tripless_zone_device_register(
+ "acpitz", tz, &acpi_thermal_zone_ops, NULL);
+
if (IS_ERR(tz->thermal_zone))
return PTR_ERR(tz->thermal_zone);
trip++;
}
- if (trip == trip_table) {
+ if (trip == trip_table)
pr_warn(FW_BUG "No valid trip points!\n");
- result = -ENODEV;
- goto free_memory;
- }
result = acpi_thermal_register_thermal_zone(tz, trip_table,
trip - trip_table,
unsigned int func_mask;
/*
- * Avoid evaluating the same _DSM function for two
- * different UUIDs and prioritize the MSFT one.
+ * Log a message if the _DSM function sets for two
+ * different UUIDs overlap.
*/
func_mask = lps0_dsm_func_mask & lps0_dsm_func_mask_microsoft;
- if (func_mask) {
+ if (func_mask)
acpi_handle_info(adev->handle,
"Duplicate LPS0 _DSM functions (mask: 0x%x)\n",
func_mask);
- lps0_dsm_func_mask &= ~func_mask;
- }
}
}
size_t object_size = 0;
read_size = min_t(size_t, sizeof(*object), buffer->data_size - offset);
- if (offset > buffer->data_size || read_size < sizeof(*hdr))
+ if (offset > buffer->data_size || read_size < sizeof(*hdr) ||
+ !IS_ALIGNED(offset, sizeof(u32)))
return 0;
+
if (u) {
if (copy_from_user(object, u + offset, read_size))
return 0;
module_param(mobile_lpm_policy, int, 0644);
MODULE_PARM_DESC(mobile_lpm_policy, "Default LPM policy for mobile chipsets");
+static char *ahci_mask_port_map;
+module_param_named(mask_port_map, ahci_mask_port_map, charp, 0444);
+MODULE_PARM_DESC(mask_port_map,
+ "32-bits port map masks to ignore controllers ports. "
+ "Valid values are: "
+ "\"<mask>\" to apply the same mask to all AHCI controller "
+ "devices, and \"<pci_dev>=<mask>,<pci_dev>=<mask>,...\" to "
+ "specify different masks for the controllers specified, "
+ "where <pci_dev> is the PCI ID of an AHCI controller in the "
+ "form \"domain:bus:dev.func\"");
+
+static void ahci_apply_port_map_mask(struct device *dev,
+ struct ahci_host_priv *hpriv, char *mask_s)
+{
+ unsigned int mask;
+
+ if (kstrtouint(mask_s, 0, &mask)) {
+ dev_err(dev, "Invalid port map mask\n");
+ return;
+ }
+
+ hpriv->mask_port_map = mask;
+}
+
+static void ahci_get_port_map_mask(struct device *dev,
+ struct ahci_host_priv *hpriv)
+{
+ char *param, *end, *str, *mask_s;
+ char *name;
+
+ if (!strlen(ahci_mask_port_map))
+ return;
+
+ str = kstrdup(ahci_mask_port_map, GFP_KERNEL);
+ if (!str)
+ return;
+
+ /* Handle single mask case */
+ if (!strchr(str, '=')) {
+ ahci_apply_port_map_mask(dev, hpriv, str);
+ goto free;
+ }
+
+ /*
+ * Mask list case: parse the parameter to apply the mask only if
+ * the device name matches.
+ */
+ param = str;
+ end = param + strlen(param);
+ while (param && param < end && *param) {
+ name = param;
+ param = strchr(name, '=');
+ if (!param)
+ break;
+
+ *param = '\0';
+ param++;
+ if (param >= end)
+ break;
+
+ if (strcmp(dev_name(dev), name) != 0) {
+ param = strchr(param, ',');
+ if (param)
+ param++;
+ continue;
+ }
+
+ mask_s = param;
+ param = strchr(mask_s, ',');
+ if (param) {
+ *param = '\0';
+ param++;
+ }
+
+ ahci_apply_port_map_mask(dev, hpriv, mask_s);
+ }
+
+free:
+ kfree(str);
+}
+
static void ahci_pci_save_initial_config(struct pci_dev *pdev,
struct ahci_host_priv *hpriv)
{
"Disabling your PATA port. Use the boot option 'ahci.marvell_enable=0' to avoid this.\n");
}
+ /* Handle port map masks passed as module parameter. */
+ if (ahci_mask_port_map)
+ ahci_get_port_map_mask(&pdev->dev, hpriv);
+
ahci_save_initial_config(&pdev->dev, hpriv);
}
#define ST_AHCI_OOBR_CIMAX_SHIFT 0
struct st_ahci_drv_data {
- struct platform_device *ahci;
struct reset_control *pwr;
struct reset_control *sw_rst;
struct reset_control *pwr_rst;
bool cdl_enabled;
u64 val;
- if (ata_id_major_version(dev->id) < 12)
+ if (ata_id_major_version(dev->id) < 11)
goto not_supported;
if (!ata_log_supported(dev, ATA_LOG_IDENTIFY_DEVICE) ||
ehc->saved_ncq_enabled |= 1 << devno;
/* If we are resuming, wake up the device */
- if (ap->pflags & ATA_PFLAG_RESUMING)
+ if (ap->pflags & ATA_PFLAG_RESUMING) {
+ dev->flags |= ATA_DFLAG_RESUMING;
ehc->i.dev_action[devno] |= ATA_EH_SET_ACTIVE;
+ }
}
}
return 0;
err:
+ dev->flags &= ~ATA_DFLAG_RESUMING;
*r_failed_dev = dev;
return rc;
}
struct ata_link *link;
struct ata_device *dev;
unsigned long flags;
+ bool do_resume;
int ret = 0;
mutex_lock(&ap->scsi_scan_mutex);
* bail out.
*/
if (ap->pflags & ATA_PFLAG_SUSPENDED)
- goto unlock;
+ goto unlock_ap;
if (!sdev)
continue;
if (scsi_device_get(sdev))
continue;
+ do_resume = dev->flags & ATA_DFLAG_RESUMING;
+
spin_unlock_irqrestore(ap->lock, flags);
+ if (do_resume) {
+ ret = scsi_resume_device(sdev);
+ if (ret == -EWOULDBLOCK)
+ goto unlock_scan;
+ dev->flags &= ~ATA_DFLAG_RESUMING;
+ }
ret = scsi_rescan_device(sdev);
scsi_device_put(sdev);
spin_lock_irqsave(ap->lock, flags);
if (ret)
- goto unlock;
+ goto unlock_ap;
}
}
-unlock:
+unlock_ap:
spin_unlock_irqrestore(ap->lock, flags);
+unlock_scan:
mutex_unlock(&ap->scsi_scan_mutex);
/* Reschedule with a delay if scsi_rescan_device() returned an error */
.suspend = pata_macio_pci_suspend,
.resume = pata_macio_pci_resume,
#endif
- .driver = {
- .owner = THIS_MODULE,
- },
};
MODULE_DEVICE_TABLE(pci, pata_macio_pci_match);
pclk = sg->sata0_pclk;
else
pclk = sg->sata1_pclk;
- clk_enable(pclk);
+ ret = clk_enable(pclk);
+ if (ret)
+ return ret;
+
msleep(10);
/* Do not keep clocking a bridge that is not online */
},
};
-static const struct pci_device_id mv_pci_tbl[] = {
- { PCI_VDEVICE(MARVELL, 0x5040), chip_504x },
- { PCI_VDEVICE(MARVELL, 0x5041), chip_504x },
- { PCI_VDEVICE(MARVELL, 0x5080), chip_5080 },
- { PCI_VDEVICE(MARVELL, 0x5081), chip_508x },
- /* RocketRAID 1720/174x have different identifiers */
- { PCI_VDEVICE(TTI, 0x1720), chip_6042 },
- { PCI_VDEVICE(TTI, 0x1740), chip_6042 },
- { PCI_VDEVICE(TTI, 0x1742), chip_6042 },
-
- { PCI_VDEVICE(MARVELL, 0x6040), chip_604x },
- { PCI_VDEVICE(MARVELL, 0x6041), chip_604x },
- { PCI_VDEVICE(MARVELL, 0x6042), chip_6042 },
- { PCI_VDEVICE(MARVELL, 0x6080), chip_608x },
- { PCI_VDEVICE(MARVELL, 0x6081), chip_608x },
-
- { PCI_VDEVICE(ADAPTEC2, 0x0241), chip_604x },
-
- /* Adaptec 1430SA */
- { PCI_VDEVICE(ADAPTEC2, 0x0243), chip_7042 },
-
- /* Marvell 7042 support */
- { PCI_VDEVICE(MARVELL, 0x7042), chip_7042 },
-
- /* Highpoint RocketRAID PCIe series */
- { PCI_VDEVICE(TTI, 0x2300), chip_7042 },
- { PCI_VDEVICE(TTI, 0x2310), chip_7042 },
-
- { } /* terminate list */
-};
-
static const struct mv_hw_ops mv5xxx_ops = {
.phy_errata = mv5_phy_errata,
.enable_leds = mv5_enable_leds,
static int mv_pci_device_resume(struct pci_dev *pdev);
#endif
+static const struct pci_device_id mv_pci_tbl[] = {
+ { PCI_VDEVICE(MARVELL, 0x5040), chip_504x },
+ { PCI_VDEVICE(MARVELL, 0x5041), chip_504x },
+ { PCI_VDEVICE(MARVELL, 0x5080), chip_5080 },
+ { PCI_VDEVICE(MARVELL, 0x5081), chip_508x },
+ /* RocketRAID 1720/174x have different identifiers */
+ { PCI_VDEVICE(TTI, 0x1720), chip_6042 },
+ { PCI_VDEVICE(TTI, 0x1740), chip_6042 },
+ { PCI_VDEVICE(TTI, 0x1742), chip_6042 },
+
+ { PCI_VDEVICE(MARVELL, 0x6040), chip_604x },
+ { PCI_VDEVICE(MARVELL, 0x6041), chip_604x },
+ { PCI_VDEVICE(MARVELL, 0x6042), chip_6042 },
+ { PCI_VDEVICE(MARVELL, 0x6080), chip_608x },
+ { PCI_VDEVICE(MARVELL, 0x6081), chip_608x },
+
+ { PCI_VDEVICE(ADAPTEC2, 0x0241), chip_604x },
+
+ /* Adaptec 1430SA */
+ { PCI_VDEVICE(ADAPTEC2, 0x0243), chip_7042 },
+
+ /* Marvell 7042 support */
+ { PCI_VDEVICE(MARVELL, 0x7042), chip_7042 },
+
+ /* Highpoint RocketRAID PCIe series */
+ { PCI_VDEVICE(TTI, 0x2300), chip_7042 },
+ { PCI_VDEVICE(TTI, 0x2310), chip_7042 },
+
+ { } /* terminate list */
+};
static struct pci_driver mv_pci_driver = {
.name = DRV_NAME,
#endif
};
+MODULE_DEVICE_TABLE(pci, mv_pci_tbl);
/**
* mv_print_info - Dump key info to kernel log for perusal.
MODULE_AUTHOR("Brett Russ");
MODULE_DESCRIPTION("SCSI low-level driver for Marvell SATA controllers");
MODULE_LICENSE("GPL v2");
-MODULE_DEVICE_TABLE(pci, mv_pci_tbl);
MODULE_VERSION(DRV_VERSION);
MODULE_ALIAS("platform:" DRV_NAME);
offset -= (idx * window_size);
idx++;
- dist = ((long) (window_size - (offset + size))) >= 0 ? size :
- (long) (window_size - offset);
+ dist = min(size, window_size - offset);
memcpy_fromio(psource, dimm_mmio + offset / 4, dist);
psource += dist;
readl(mmio + PDC_DIMM_WINDOW_CTLR);
offset -= (idx * window_size);
idx++;
- dist = ((long)(s32)(window_size - (offset + size))) >= 0 ? size :
- (long) (window_size - offset);
+ dist = min(size, window_size - offset);
memcpy_toio(dimm_mmio + offset / 4, psource, dist);
writel(0x01, mmio + PDC_GENERAL_CTLR);
readl(mmio + PDC_GENERAL_CTLR);
static void __fw_devlink_link_to_consumers(struct device *dev);
static bool fw_devlink_drv_reg_done;
static bool fw_devlink_best_effort;
+static struct workqueue_struct *device_link_wq;
/**
* __fwnode_link_add - Create a link between two fwnode_handles.
/*
* It may take a while to complete this work because of the SRCU
* synchronization in device_link_release_fn() and if the consumer or
- * supplier devices get deleted when it runs, so put it into the "long"
- * workqueue.
+ * supplier devices get deleted when it runs, so put it into the
+ * dedicated workqueue.
*/
- queue_work(system_long_wq, &link->rm_work);
+ queue_work(device_link_wq, &link->rm_work);
}
+/**
+ * device_link_wait_removal - Wait for ongoing devlink removal jobs to terminate
+ */
+void device_link_wait_removal(void)
+{
+ /*
+ * devlink removal jobs are queued in the dedicated work queue.
+ * To be sure that all removal jobs are terminated, ensure that any
+ * scheduled work has run to completion.
+ */
+ flush_workqueue(device_link_wq);
+}
+EXPORT_SYMBOL_GPL(device_link_wait_removal);
+
static struct class devlink_class = {
.name = "devlink",
.dev_groups = devlink_groups,
sysfs_dev_char_kobj = kobject_create_and_add("char", dev_kobj);
if (!sysfs_dev_char_kobj)
goto char_kobj_err;
+ device_link_wq = alloc_workqueue("device_link_wq", 0, 0);
+ if (!device_link_wq)
+ goto wq_err;
return 0;
+ wq_err:
+ kobject_put(sysfs_dev_char_kobj);
char_kobj_err:
kobject_put(sysfs_dev_block_kobj);
block_kobj_err:
unsigned long *entry, *lower, *upper;
unsigned long lower_index, lower_last;
unsigned long upper_index, upper_last;
- int ret;
+ int ret = 0;
lower = NULL;
upper = NULL;
upper_index = max + 1;
upper_last = mas.last;
- upper = kmemdup(&entry[max + 1],
+ upper = kmemdup(&entry[max - mas.index + 1],
((mas.last - max) *
sizeof(unsigned long)),
map->alloc_flags);
unsigned long lmin = min;
unsigned long lmax = max;
unsigned int r, v, sync_start;
- int ret;
+ int ret = 0;
bool sync_needed = false;
map->cache_bypass = true;
out_ida_free:
ida_free(&nullb_indexes, nullb->index);
-out_cleanup_zone:
- null_free_zoned_dev(dev);
out_cleanup_disk:
put_disk(nullb->disk);
+out_cleanup_zone:
+ null_free_zoned_dev(dev);
out_cleanup_tags:
if (nullb->tag_set == &nullb->__tag_set)
blk_mq_free_tag_set(nullb->tag_set);
switch (data->cd_info.state) {
case HCI_DEVCOREDUMP_IDLE:
err = hci_devcd_init(hdev, MTK_COREDUMP_SIZE);
- if (err < 0)
+ if (err < 0) {
+ kfree_skb(skb);
break;
+ }
data->cd_info.cnt = 0;
/* It is supposed coredump can be done within 5 seconds */
break;
}
- if (err < 0)
- kfree_skb(skb);
-
return err;
}
EXPORT_SYMBOL_GPL(btmtk_process_coredump);
#define VERSION "0.1"
+#define QCA_BDADDR_DEFAULT (&(bdaddr_t) {{ 0xad, 0x5a, 0x00, 0x00, 0x00, 0x00 }})
+
int qca_read_soc_version(struct hci_dev *hdev, struct qca_btsoc_version *ver,
enum qca_btsoc_type soc_type)
{
}
EXPORT_SYMBOL_GPL(qca_set_bdaddr_rome);
+static int qca_check_bdaddr(struct hci_dev *hdev)
+{
+ struct hci_rp_read_bd_addr *bda;
+ struct sk_buff *skb;
+ int err;
+
+ if (bacmp(&hdev->public_addr, BDADDR_ANY))
+ return 0;
+
+ skb = __hci_cmd_sync(hdev, HCI_OP_READ_BD_ADDR, 0, NULL,
+ HCI_INIT_TIMEOUT);
+ if (IS_ERR(skb)) {
+ err = PTR_ERR(skb);
+ bt_dev_err(hdev, "Failed to read device address (%d)", err);
+ return err;
+ }
+
+ if (skb->len != sizeof(*bda)) {
+ bt_dev_err(hdev, "Device address length mismatch");
+ kfree_skb(skb);
+ return -EIO;
+ }
+
+ bda = (struct hci_rp_read_bd_addr *)skb->data;
+ if (!bacmp(&bda->bdaddr, QCA_BDADDR_DEFAULT))
+ set_bit(HCI_QUIRK_USE_BDADDR_PROPERTY, &hdev->quirks);
+
+ kfree_skb(skb);
+
+ return 0;
+}
+
static void qca_generate_hsp_nvm_name(char *fwname, size_t max_size,
struct qca_btsoc_version ver, u8 rom_ver, u16 bid)
{
break;
}
+ err = qca_check_bdaddr(hdev);
+ if (err)
+ return err;
+
bt_dev_info(hdev, "QCA setup on UART is completed");
return 0;
int qca_set_bdaddr(struct hci_dev *hdev, const bdaddr_t *bdaddr)
{
+ bdaddr_t bdaddr_swapped;
struct sk_buff *skb;
int err;
- skb = __hci_cmd_sync_ev(hdev, EDL_WRITE_BD_ADDR_OPCODE, 6, bdaddr,
- HCI_EV_VENDOR, HCI_INIT_TIMEOUT);
+ baswap(&bdaddr_swapped, bdaddr);
+
+ skb = __hci_cmd_sync_ev(hdev, EDL_WRITE_BD_ADDR_OPCODE, 6,
+ &bdaddr_swapped, HCI_EV_VENDOR,
+ HCI_INIT_TIMEOUT);
if (IS_ERR(skb)) {
err = PTR_ERR(skb);
bt_dev_err(hdev, "QCA Change address cmd failed (%d)", err);
/* Realtek 8852BE Bluetooth devices */
{ USB_DEVICE(0x0cb8, 0xc559), .driver_info = BTUSB_REALTEK |
BTUSB_WIDEBAND_SPEECH },
+ { USB_DEVICE(0x0bda, 0x4853), .driver_info = BTUSB_REALTEK |
+ BTUSB_WIDEBAND_SPEECH },
{ USB_DEVICE(0x0bda, 0x887b), .driver_info = BTUSB_REALTEK |
BTUSB_WIDEBAND_SPEECH },
{ USB_DEVICE(0x0bda, 0xb85b), .driver_info = BTUSB_REALTEK |
static void btusb_coredump_qca(struct hci_dev *hdev)
{
+ int err;
static const u8 param[] = { 0x26 };
- struct sk_buff *skb;
- skb = __hci_cmd_sync(hdev, 0xfc0c, 1, param, HCI_CMD_TIMEOUT);
- if (IS_ERR(skb))
- bt_dev_err(hdev, "%s: triggle crash failed (%ld)", __func__, PTR_ERR(skb));
- kfree_skb(skb);
+ err = __hci_cmd_send(hdev, 0xfc0c, 1, param);
+ if (err < 0)
+ bt_dev_err(hdev, "%s: triggle crash failed (%d)", __func__, err);
}
/*
*
* Copyright (C) 2007 Texas Instruments, Inc.
* Copyright (c) 2010, 2012, 2018 The Linux Foundation. All rights reserved.
- * Copyright (c) 2023 Qualcomm Innovation Center, Inc. All rights reserved.
*
* Acknowledgements:
* This file is based on hci_ll.c, which was...
struct qca_power *bt_power;
u32 init_speed;
u32 oper_speed;
+ bool bdaddr_property_broken;
const char *firmware_name;
};
struct hci_uart *hu = hci_get_drvdata(hdev);
bool wakeup;
+ if (!hu->serdev)
+ return true;
+
/* BT SoC attached through the serial bus is handled by the serdev driver.
* So we need to use the device handle of the serdev driver to get the
* status of device may wakeup.
const char *firmware_name = qca_get_firmware_name(hu);
int ret;
struct qca_btsoc_version ver;
+ struct qca_serdev *qcadev;
const char *soc_name;
ret = qca_check_speeds(hu);
case QCA_WCN6750:
case QCA_WCN6855:
case QCA_WCN7850:
-
- /* Set BDA quirk bit for reading BDA value from fwnode property
- * only if that property exist in DT.
- */
- if (fwnode_property_present(dev_fwnode(hdev->dev.parent), "local-bd-address")) {
- set_bit(HCI_QUIRK_USE_BDADDR_PROPERTY, &hdev->quirks);
- bt_dev_info(hdev, "setting quirk bit to read BDA from fwnode later");
- } else {
- bt_dev_dbg(hdev, "local-bd-address` is not present in the devicetree so not setting quirk bit for BDA");
- }
+ qcadev = serdev_device_get_drvdata(hu->serdev);
+ if (qcadev->bdaddr_property_broken)
+ set_bit(HCI_QUIRK_BDADDR_PROPERTY_BROKEN, &hdev->quirks);
hci_set_aosp_capable(hdev);
qca_debugfs_init(hdev);
hu->hdev->hw_error = qca_hw_error;
hu->hdev->cmd_timeout = qca_cmd_timeout;
- if (device_can_wakeup(hu->serdev->ctrl->dev.parent))
- hu->hdev->wakeup = qca_wakeup;
+ if (hu->serdev) {
+ if (device_can_wakeup(hu->serdev->ctrl->dev.parent))
+ hu->hdev->wakeup = qca_wakeup;
+ }
} else if (ret == -ENOENT) {
/* No patch/nvm-config found, run with original fw/config */
set_bit(QCA_ROM_FW, &qca->flags);
if (!qcadev->oper_speed)
BT_DBG("UART will pick default operating speed");
+ qcadev->bdaddr_property_broken = device_property_read_bool(&serdev->dev,
+ "qcom,local-bd-address-broken");
+
if (data)
qcadev->btsoc_type = data->soc_type;
else
(data->soc_type == QCA_WCN6750 ||
data->soc_type == QCA_WCN6855)) {
dev_err(&serdev->dev, "failed to acquire BT_EN gpio\n");
- power_ctrl_enabled = false;
+ return PTR_ERR(qcadev->bt_en);
}
+ if (!qcadev->bt_en)
+ power_ctrl_enabled = false;
+
qcadev->sw_ctrl = devm_gpiod_get_optional(&serdev->dev, "swctrl",
GPIOD_IN);
if (IS_ERR(qcadev->sw_ctrl) &&
(data->soc_type == QCA_WCN6750 ||
data->soc_type == QCA_WCN6855 ||
- data->soc_type == QCA_WCN7850))
- dev_warn(&serdev->dev, "failed to acquire SW_CTRL gpio\n");
+ data->soc_type == QCA_WCN7850)) {
+ dev_err(&serdev->dev, "failed to acquire SW_CTRL gpio\n");
+ return PTR_ERR(qcadev->sw_ctrl);
+ }
qcadev->susclk = devm_clk_get_optional(&serdev->dev, NULL);
if (IS_ERR(qcadev->susclk)) {
qcadev->bt_en = devm_gpiod_get_optional(&serdev->dev, "enable",
GPIOD_OUT_LOW);
if (IS_ERR(qcadev->bt_en)) {
- dev_warn(&serdev->dev, "failed to acquire enable gpio\n");
- power_ctrl_enabled = false;
+ dev_err(&serdev->dev, "failed to acquire enable gpio\n");
+ return PTR_ERR(qcadev->bt_en);
}
+ if (!qcadev->bt_en)
+ power_ctrl_enabled = false;
+
qcadev->susclk = devm_clk_get_optional(&serdev->dev, NULL);
if (IS_ERR(qcadev->susclk)) {
dev_warn(&serdev->dev, "failed to acquire clk\n");
#include <linux/of_address.h>
#include <linux/device.h>
#include <linux/bitfield.h>
+#include <linux/platform_device.h>
+#include <linux/property.h>
#include <asm/cacheflush.h>
#include <asm/cacheinfo.h>
#include <asm/dma-noncoherent.h>
return IRQ_HANDLED;
}
+static int sifive_ccache_probe(struct platform_device *pdev)
+{
+ struct device *dev = &pdev->dev;
+ unsigned long quirks;
+ int intr_num, rc;
+
+ quirks = (unsigned long)device_get_match_data(dev);
+
+ intr_num = platform_irq_count(pdev);
+ if (!intr_num)
+ return dev_err_probe(dev, -ENODEV, "No interrupts property\n");
+
+ for (int i = 0; i < intr_num; i++) {
+ if (i == DATA_UNCORR && (quirks & QUIRK_BROKEN_DATA_UNCORR))
+ continue;
+
+ g_irq[i] = platform_get_irq(pdev, i);
+ if (g_irq[i] < 0)
+ return g_irq[i];
+
+ rc = devm_request_irq(dev, g_irq[i], ccache_int_handler, 0, "ccache_ecc", NULL);
+ if (rc)
+ return dev_err_probe(dev, rc, "Could not request IRQ %d\n", g_irq[i]);
+ }
+
+ return 0;
+}
+
+static struct platform_driver sifive_ccache_driver = {
+ .probe = sifive_ccache_probe,
+ .driver = {
+ .name = "sifive_ccache",
+ .of_match_table = sifive_ccache_ids,
+ },
+};
+
static int __init sifive_ccache_init(void)
{
struct device_node *np;
struct resource res;
- int i, rc, intr_num;
const struct of_device_id *match;
unsigned long quirks;
+ int rc;
np = of_find_matching_node_and_match(NULL, sifive_ccache_ids, &match);
if (!np)
goto err_unmap;
}
- intr_num = of_property_count_u32_elems(np, "interrupts");
- if (!intr_num) {
- pr_err("No interrupts property\n");
- rc = -ENODEV;
- goto err_unmap;
- }
-
- for (i = 0; i < intr_num; i++) {
- g_irq[i] = irq_of_parse_and_map(np, i);
-
- if (i == DATA_UNCORR && (quirks & QUIRK_BROKEN_DATA_UNCORR))
- continue;
-
- rc = request_irq(g_irq[i], ccache_int_handler, 0, "ccache_ecc",
- NULL);
- if (rc) {
- pr_err("Could not request IRQ %d\n", g_irq[i]);
- goto err_free_irq;
- }
- }
- of_node_put(np);
-
#ifdef CONFIG_RISCV_NONSTANDARD_CACHE_OPS
if (quirks & QUIRK_NONSTANDARD_CACHE_OPS) {
riscv_cbom_block_size = SIFIVE_CCACHE_LINE_SIZE;
#ifdef CONFIG_DEBUG_FS
setup_sifive_debug();
#endif
+
+ rc = platform_driver_register(&sifive_ccache_driver);
+ if (rc)
+ goto err_unmap;
+
+ of_node_put(np);
+
return 0;
-err_free_irq:
- while (--i >= 0)
- free_irq(g_irq[i], NULL);
err_unmap:
iounmap(ccache_base);
err_node_put:
static void __cold _credit_init_bits(size_t bits)
{
- static struct execute_work set_ready;
+ static DECLARE_WORK(set_ready, crng_set_ready);
unsigned int new, orig, add;
unsigned long flags;
if (orig < POOL_READY_BITS && new >= POOL_READY_BITS) {
crng_reseed(NULL); /* Sets crng_init to CRNG_READY under base_crng.lock. */
- if (static_key_initialized)
- execute_in_process_context(crng_set_ready, &set_ready);
+ if (static_key_initialized && system_unbound_wq)
+ queue_work(system_unbound_wq, &set_ready);
atomic_notifier_call_chain(&random_ready_notifier, 0, NULL);
wake_up_interruptible(&crng_init_wait);
kill_fasync(&fasync, SIGIO, POLL_IN);
/*
* If we were initialized by the cpu or bootloader before jump labels
- * are initialized, then we should enable the static branch here, where
- * it's guaranteed that jump labels have been initialized.
+ * or workqueues are initialized, then we should enable the static
+ * branch here, where it's guaranteed that these have been initialized.
*/
if (!static_branch_likely(&crng_is_ready) && crng_init >= CRNG_READY)
crng_set_ready(NULL);
static HLIST_HEAD(clk_orphan_list);
static LIST_HEAD(clk_notifier_list);
+/* List of registered clks that use runtime PM */
+static HLIST_HEAD(clk_rpm_list);
+static DEFINE_MUTEX(clk_rpm_list_lock);
+
static const struct hlist_head *all_lists[] = {
&clk_root_list,
&clk_orphan_list,
struct clk_hw *hw;
struct module *owner;
struct device *dev;
+ struct hlist_node rpm_node;
struct device_node *of_node;
struct clk_core *parent;
struct clk_parent_map *parents;
pm_runtime_put_sync(core->dev);
}
+/**
+ * clk_pm_runtime_get_all() - Runtime "get" all clk provider devices
+ *
+ * Call clk_pm_runtime_get() on all runtime PM enabled clks in the clk tree so
+ * that disabling unused clks avoids a deadlock where a device is runtime PM
+ * resuming/suspending and the runtime PM callback is trying to grab the
+ * prepare_lock for something like clk_prepare_enable() while
+ * clk_disable_unused_subtree() holds the prepare_lock and is trying to runtime
+ * PM resume/suspend the device as well.
+ *
+ * Context: Acquires the 'clk_rpm_list_lock' and returns with the lock held on
+ * success. Otherwise the lock is released on failure.
+ *
+ * Return: 0 on success, negative errno otherwise.
+ */
+static int clk_pm_runtime_get_all(void)
+{
+ int ret;
+ struct clk_core *core, *failed;
+
+ /*
+ * Grab the list lock to prevent any new clks from being registered
+ * or unregistered until clk_pm_runtime_put_all().
+ */
+ mutex_lock(&clk_rpm_list_lock);
+
+ /*
+ * Runtime PM "get" all the devices that are needed for the clks
+ * currently registered. Do this without holding the prepare_lock, to
+ * avoid the deadlock.
+ */
+ hlist_for_each_entry(core, &clk_rpm_list, rpm_node) {
+ ret = clk_pm_runtime_get(core);
+ if (ret) {
+ failed = core;
+ pr_err("clk: Failed to runtime PM get '%s' for clk '%s'\n",
+ dev_name(failed->dev), failed->name);
+ goto err;
+ }
+ }
+
+ return 0;
+
+err:
+ hlist_for_each_entry(core, &clk_rpm_list, rpm_node) {
+ if (core == failed)
+ break;
+
+ clk_pm_runtime_put(core);
+ }
+ mutex_unlock(&clk_rpm_list_lock);
+
+ return ret;
+}
+
+/**
+ * clk_pm_runtime_put_all() - Runtime "put" all clk provider devices
+ *
+ * Put the runtime PM references taken in clk_pm_runtime_get_all() and release
+ * the 'clk_rpm_list_lock'.
+ */
+static void clk_pm_runtime_put_all(void)
+{
+ struct clk_core *core;
+
+ hlist_for_each_entry(core, &clk_rpm_list, rpm_node)
+ clk_pm_runtime_put(core);
+ mutex_unlock(&clk_rpm_list_lock);
+}
+
+static void clk_pm_runtime_init(struct clk_core *core)
+{
+ struct device *dev = core->dev;
+
+ if (dev && pm_runtime_enabled(dev)) {
+ core->rpm_enabled = true;
+
+ mutex_lock(&clk_rpm_list_lock);
+ hlist_add_head(&core->rpm_node, &clk_rpm_list);
+ mutex_unlock(&clk_rpm_list_lock);
+ }
+}
+
/*** locking ***/
static void clk_prepare_lock(void)
{
if (core->flags & CLK_IGNORE_UNUSED)
return;
- if (clk_pm_runtime_get(core))
- return;
-
if (clk_core_is_prepared(core)) {
trace_clk_unprepare(core);
if (core->ops->unprepare_unused)
core->ops->unprepare(core->hw);
trace_clk_unprepare_complete(core);
}
-
- clk_pm_runtime_put(core);
}
static void __init clk_disable_unused_subtree(struct clk_core *core)
if (core->flags & CLK_OPS_PARENT_ENABLE)
clk_core_prepare_enable(core->parent);
- if (clk_pm_runtime_get(core))
- goto unprepare_out;
-
flags = clk_enable_lock();
if (core->enable_count)
unlock_out:
clk_enable_unlock(flags);
- clk_pm_runtime_put(core);
-unprepare_out:
if (core->flags & CLK_OPS_PARENT_ENABLE)
clk_core_disable_unprepare(core->parent);
}
static int __init clk_disable_unused(void)
{
struct clk_core *core;
+ int ret;
if (clk_ignore_unused) {
pr_warn("clk: Not disabling unused clocks\n");
pr_info("clk: Disabling unused clocks\n");
+ ret = clk_pm_runtime_get_all();
+ if (ret)
+ return ret;
+ /*
+ * Grab the prepare lock to keep the clk topology stable while iterating
+ * over clks.
+ */
clk_prepare_lock();
hlist_for_each_entry(core, &clk_root_list, child_node)
clk_prepare_unlock();
+ clk_pm_runtime_put_all();
+
return 0;
}
late_initcall_sync(clk_disable_unused);
{
struct clk_core *child;
- clk_pm_runtime_get(c);
clk_summary_show_one(s, c, level);
- clk_pm_runtime_put(c);
hlist_for_each_entry(child, &c->children, child_node)
clk_summary_show_subtree(s, child, level + 1);
{
struct clk_core *c;
struct hlist_head **lists = s->private;
+ int ret;
seq_puts(s, " enable prepare protect duty hardware connection\n");
seq_puts(s, " clock count count count rate accuracy phase cycle enable consumer id\n");
seq_puts(s, "---------------------------------------------------------------------------------------------------------------------------------------------\n");
+ ret = clk_pm_runtime_get_all();
+ if (ret)
+ return ret;
clk_prepare_lock();
clk_summary_show_subtree(s, c, 0);
clk_prepare_unlock();
+ clk_pm_runtime_put_all();
return 0;
}
struct clk_core *c;
bool first_node = true;
struct hlist_head **lists = s->private;
+ int ret;
+
+ ret = clk_pm_runtime_get_all();
+ if (ret)
+ return ret;
seq_putc(s, '{');
+
clk_prepare_lock();
for (; *lists; lists++) {
}
clk_prepare_unlock();
+ clk_pm_runtime_put_all();
seq_puts(s, "}\n");
return 0;
}
clk_core_reparent_orphans_nolock();
-
- kref_init(&core->ref);
out:
clk_pm_runtime_put(core);
unlock:
kfree(core->parents);
}
+/* Free memory allocated for a struct clk_core */
+static void __clk_release(struct kref *ref)
+{
+ struct clk_core *core = container_of(ref, struct clk_core, ref);
+
+ if (core->rpm_enabled) {
+ mutex_lock(&clk_rpm_list_lock);
+ hlist_del(&core->rpm_node);
+ mutex_unlock(&clk_rpm_list_lock);
+ }
+
+ clk_core_free_parent_map(core);
+ kfree_const(core->name);
+ kfree(core);
+}
+
static struct clk *
__clk_register(struct device *dev, struct device_node *np, struct clk_hw *hw)
{
goto fail_out;
}
+ kref_init(&core->ref);
+
core->name = kstrdup_const(init->name, GFP_KERNEL);
if (!core->name) {
ret = -ENOMEM;
}
core->ops = init->ops;
- if (dev && pm_runtime_enabled(dev))
- core->rpm_enabled = true;
core->dev = dev;
+ clk_pm_runtime_init(core);
core->of_node = np;
if (dev && dev->driver)
core->owner = dev->driver->owner;
hw->clk = NULL;
fail_create_clk:
- clk_core_free_parent_map(core);
fail_parents:
fail_ops:
- kfree_const(core->name);
fail_name:
- kfree(core);
+ kref_put(&core->ref, __clk_release);
fail_out:
return ERR_PTR(ret);
}
}
EXPORT_SYMBOL_GPL(of_clk_hw_register);
-/* Free memory allocated for a clock. */
-static void __clk_release(struct kref *ref)
-{
- struct clk_core *core = container_of(ref, struct clk_core, ref);
-
- lockdep_assert_held(&prepare_lock);
-
- clk_core_free_parent_map(core);
- kfree_const(core->name);
- kfree(core);
-}
-
/*
* Empty clk_ops for unregistered clocks. These are used temporarily
* after clk_unregister() was called on a clock and until last clock
if (ops == &clk_nodrv_ops) {
pr_err("%s: unregistered clock: %s\n", __func__,
clk->core->name);
- goto unlock;
+ clk_prepare_unlock();
+ return;
}
/*
* Assign empty clock ops for consumers that might still hold
if (clk->core->protect_count)
pr_warn("%s: unregistering protected clock: %s\n",
__func__, clk->core->name);
+ clk_prepare_unlock();
kref_put(&clk->core->ref, __clk_release);
free_clk(clk);
-unlock:
- clk_prepare_unlock();
}
EXPORT_SYMBOL_GPL(clk_unregister);
if (clk->min_rate > 0 || clk->max_rate < ULONG_MAX)
clk_set_rate_range_nolock(clk, 0, ULONG_MAX);
- owner = clk->core->owner;
- kref_put(&clk->core->ref, __clk_release);
-
clk_prepare_unlock();
+ owner = clk->core->owner;
+ kref_put(&clk->core->ref, __clk_release);
module_put(owner);
-
free_clk(clk);
}
GATE_INFRA0(CLK_INFRA_PCIE_PERI_26M_CK_P1, "infra_pcie_peri_ck_26m_ck_p1",
"csw_infra_f26m_sel", 8),
GATE_INFRA0(CLK_INFRA_PCIE_PERI_26M_CK_P2, "infra_pcie_peri_ck_26m_ck_p2",
- "csw_infra_f26m_sel", 9),
+ "infra_pcie_peri_ck_26m_ck_p3", 9),
GATE_INFRA0(CLK_INFRA_PCIE_PERI_26M_CK_P3, "infra_pcie_peri_ck_26m_ck_p3",
"csw_infra_f26m_sel", 10),
/* INFRA1 */
#include <linux/of.h>
#include <linux/of_address.h>
#include <linux/platform_device.h>
+#include <linux/pm_runtime.h>
#include <linux/slab.h>
#include "clk-mtk.h"
return IS_ERR(base) ? PTR_ERR(base) : -ENOMEM;
}
+
+ devm_pm_runtime_enable(&pdev->dev);
+ /*
+ * Do a pm_runtime_resume_and_get() to workaround a possible
+ * deadlock between clk_register() and the genpd framework.
+ */
+ r = pm_runtime_resume_and_get(&pdev->dev);
+ if (r)
+ return r;
+
/* Calculate how many clk_hw_onecell_data entries to allocate */
num_clks = mcd->num_clks + mcd->num_composite_clks;
num_clks += mcd->num_fixed_clks + mcd->num_factor_clks;
goto unregister_clks;
}
+ pm_runtime_put(&pdev->dev);
+
return r;
unregister_clks:
free_base:
if (mcd->shared_io && base)
iounmap(base);
+
+ pm_runtime_put(&pdev->dev);
return r;
}
struct vmk80xx_private *devpriv = dev->private;
struct usb_interface *intf = comedi_to_usb_interface(dev);
struct usb_host_interface *iface_desc = intf->cur_altsetting;
- struct usb_endpoint_descriptor *ep_desc;
- int i;
-
- if (iface_desc->desc.bNumEndpoints != 2)
- return -ENODEV;
-
- for (i = 0; i < iface_desc->desc.bNumEndpoints; i++) {
- ep_desc = &iface_desc->endpoint[i].desc;
-
- if (usb_endpoint_is_int_in(ep_desc) ||
- usb_endpoint_is_bulk_in(ep_desc)) {
- if (!devpriv->ep_rx)
- devpriv->ep_rx = ep_desc;
- continue;
- }
+ struct usb_endpoint_descriptor *ep_rx_desc, *ep_tx_desc;
+ int ret;
- if (usb_endpoint_is_int_out(ep_desc) ||
- usb_endpoint_is_bulk_out(ep_desc)) {
- if (!devpriv->ep_tx)
- devpriv->ep_tx = ep_desc;
- continue;
- }
- }
+ if (devpriv->model == VMK8061_MODEL)
+ ret = usb_find_common_endpoints(iface_desc, &ep_rx_desc,
+ &ep_tx_desc, NULL, NULL);
+ else
+ ret = usb_find_common_endpoints(iface_desc, NULL, NULL,
+ &ep_rx_desc, &ep_tx_desc);
- if (!devpriv->ep_rx || !devpriv->ep_tx)
+ if (ret)
return -ENODEV;
+ devpriv->ep_rx = ep_rx_desc;
+ devpriv->ep_tx = ep_tx_desc;
+
if (!usb_endpoint_maxp(devpriv->ep_rx) || !usb_endpoint_maxp(devpriv->ep_tx))
return -EINVAL;
void *arg = &data;
int cmd, rc = 0;
- if (!cpu_feature_enabled(X86_FEATURE_SEV_SNP))
+ if (!cc_platform_has(CC_ATTR_HOST_SEV_SNP))
return -ENODEV;
sev = psp->sev_data;
return -EINVAL;
cpus_per_iaa = (nr_nodes * nr_cpus_per_node) / nr_iaa;
+ if (!cpus_per_iaa)
+ cpus_per_iaa = 1;
out:
return 0;
}
}
}
- if (nr_iaa)
+ if (nr_iaa) {
cpus_per_iaa = (nr_nodes * nr_cpus_per_node) / nr_iaa;
- else
- cpus_per_iaa = 0;
+ if (!cpus_per_iaa)
+ cpus_per_iaa = 1;
+ } else
+ cpus_per_iaa = 1;
}
static int wq_table_add_wqs(int iaa, int cpu)
If unsure, or if this kernel is meant for production environments,
say N.
-config CXL_PMU
- tristate "CXL Performance Monitoring Unit"
- default CXL_BUS
- depends on PERF_EVENTS
- help
- Support performance monitoring as defined in CXL rev 3.0
- section 13.2: Performance Monitoring. CXL components may have
- one or more CXL Performance Monitoring Units (CPMUs).
-
- Say 'y/m' to enable a driver that will attach to performance
- monitoring units and provide standard perf based interfaces.
-
- If unsure say 'm'.
endif
{
struct acpi_device *hb = to_cxl_host_bridge(NULL, dev);
u32 uid;
- int rc;
if (kstrtou32(acpi_device_uid(hb), 0, &uid))
return -EINVAL;
- rc = acpi_get_genport_coordinates(uid, dport->hb_coord);
- if (rc < 0)
- return rc;
-
- /* Adjust back to picoseconds from nanoseconds */
- for (int i = 0; i < ACCESS_COORDINATE_MAX; i++) {
- dport->hb_coord[i].read_latency *= 1000;
- dport->hb_coord[i].write_latency *= 1000;
- }
-
- return 0;
+ return acpi_get_genport_coordinates(uid, dport->coord);
}
static int add_host_bridge_dport(struct device *match, void *arg)
struct dsmas_entry {
struct range dpa_range;
u8 handle;
- struct access_coordinate coord;
+ struct access_coordinate coord[ACCESS_COORDINATE_MAX];
int entries;
int qos_class;
};
+static u32 cdat_normalize(u16 entry, u64 base, u8 type)
+{
+ u32 value;
+
+ /*
+ * Check for invalid and overflow values
+ */
+ if (entry == 0xffff || !entry)
+ return 0;
+ else if (base > (UINT_MAX / (entry)))
+ return 0;
+
+ /*
+ * CDAT fields follow the format of HMAT fields. See table 5 Device
+ * Scoped Latency and Bandwidth Information Structure in Coherent Device
+ * Attribute Table (CDAT) Specification v1.01.
+ */
+ value = entry * base;
+ switch (type) {
+ case ACPI_HMAT_ACCESS_LATENCY:
+ case ACPI_HMAT_READ_LATENCY:
+ case ACPI_HMAT_WRITE_LATENCY:
+ value = DIV_ROUND_UP(value, 1000);
+ break;
+ default:
+ break;
+ }
+ return value;
+}
+
static int cdat_dsmas_handler(union acpi_subtable_headers *header, void *arg,
const unsigned long end)
{
return 0;
}
-static void cxl_access_coordinate_set(struct access_coordinate *coord,
- int access, unsigned int val)
+static void __cxl_access_coordinate_set(struct access_coordinate *coord,
+ int access, unsigned int val)
{
switch (access) {
case ACPI_HMAT_ACCESS_LATENCY:
}
}
+static void cxl_access_coordinate_set(struct access_coordinate *coord,
+ int access, unsigned int val)
+{
+ for (int i = 0; i < ACCESS_COORDINATE_MAX; i++)
+ __cxl_access_coordinate_set(&coord[i], access, val);
+}
+
static int cdat_dslbis_handler(union acpi_subtable_headers *header, void *arg,
const unsigned long end)
{
__le16 le_val;
u64 val;
u16 len;
- int rc;
len = le16_to_cpu((__force __le16)hdr->length);
if (len != size || (unsigned long)hdr + len > end) {
le_base = (__force __le64)dslbis->entry_base_unit;
le_val = (__force __le16)dslbis->entry[0];
- rc = check_mul_overflow(le64_to_cpu(le_base),
- le16_to_cpu(le_val), &val);
- if (rc)
- pr_warn("DSLBIS value overflowed.\n");
+ val = cdat_normalize(le16_to_cpu(le_val), le64_to_cpu(le_base),
+ dslbis->data_type);
- cxl_access_coordinate_set(&dent->coord, dslbis->data_type, val);
+ cxl_access_coordinate_set(dent->coord, dslbis->data_type, val);
return 0;
}
static int cxl_port_perf_data_calculate(struct cxl_port *port,
struct xarray *dsmas_xa)
{
- struct access_coordinate ep_c;
- struct access_coordinate coord[ACCESS_COORDINATE_MAX];
+ struct access_coordinate ep_c[ACCESS_COORDINATE_MAX];
struct dsmas_entry *dent;
int valid_entries = 0;
unsigned long index;
int rc;
- rc = cxl_endpoint_get_perf_coordinates(port, &ep_c);
+ rc = cxl_endpoint_get_perf_coordinates(port, ep_c);
if (rc) {
dev_dbg(&port->dev, "Failed to retrieve ep perf coordinates.\n");
return rc;
}
- rc = cxl_hb_get_perf_coordinates(port, coord);
- if (rc) {
- dev_dbg(&port->dev, "Failed to retrieve hb perf coordinates.\n");
- return rc;
- }
-
struct cxl_root *cxl_root __free(put_cxl_root) = find_cxl_root(port);
if (!cxl_root)
xa_for_each(dsmas_xa, index, dent) {
int qos_class;
- cxl_coordinates_combine(&dent->coord, &dent->coord, &ep_c);
- /*
- * Keeping the host bridge coordinates separate from the dsmas
- * coordinates in order to allow calculation of access class
- * 0 and 1 for region later.
- */
- cxl_coordinates_combine(&coord[ACCESS_COORDINATE_CPU],
- &coord[ACCESS_COORDINATE_CPU],
- &dent->coord);
+ cxl_coordinates_combine(dent->coord, dent->coord, ep_c);
dent->entries = 1;
rc = cxl_root->ops->qos_class(cxl_root,
- &coord[ACCESS_COORDINATE_CPU],
+ &dent->coord[ACCESS_COORDINATE_CPU],
1, &qos_class);
if (rc != 1)
continue;
static void update_perf_entry(struct device *dev, struct dsmas_entry *dent,
struct cxl_dpa_perf *dpa_perf)
{
+ for (int i = 0; i < ACCESS_COORDINATE_MAX; i++)
+ dpa_perf->coord[i] = dent->coord[i];
dpa_perf->dpa_range = dent->dpa_range;
- dpa_perf->coord = dent->coord;
dpa_perf->qos_class = dent->qos_class;
dev_dbg(dev,
"DSMAS: dpa: %#llx qos: %d read_bw: %d write_bw %d read_lat: %d write_lat: %d\n",
dent->dpa_range.start, dpa_perf->qos_class,
- dent->coord.read_bandwidth, dent->coord.write_bandwidth,
- dent->coord.read_latency, dent->coord.write_latency);
+ dent->coord[ACCESS_COORDINATE_CPU].read_bandwidth,
+ dent->coord[ACCESS_COORDINATE_CPU].write_bandwidth,
+ dent->coord[ACCESS_COORDINATE_CPU].read_latency,
+ dent->coord[ACCESS_COORDINATE_CPU].write_latency);
}
static void cxl_memdev_set_qos_class(struct cxl_dev_state *cxlds,
le_base = (__force __le64)tbl->sslbis_header.entry_base_unit;
le_val = (__force __le16)tbl->entries[i].latency_or_bandwidth;
-
- if (check_mul_overflow(le64_to_cpu(le_base),
- le16_to_cpu(le_val), &val))
- dev_warn(dev, "SSLBIS value overflowed!\n");
+ val = cdat_normalize(le16_to_cpu(le_val), le64_to_cpu(le_base),
+ sslbis->data_type);
xa_for_each(&port->dports, index, dport) {
if (dsp_id == ACPI_CDAT_SSLBIS_ANY_PORT ||
- dsp_id == dport->port_id)
- cxl_access_coordinate_set(&dport->sw_coord,
+ dsp_id == dport->port_id) {
+ cxl_access_coordinate_set(dport->coord,
sslbis->data_type,
val);
+ }
}
}
}
EXPORT_SYMBOL_NS_GPL(cxl_switch_parse_cdat, CXL);
+static void __cxl_coordinates_combine(struct access_coordinate *out,
+ struct access_coordinate *c1,
+ struct access_coordinate *c2)
+{
+ if (c1->write_bandwidth && c2->write_bandwidth)
+ out->write_bandwidth = min(c1->write_bandwidth,
+ c2->write_bandwidth);
+ out->write_latency = c1->write_latency + c2->write_latency;
+
+ if (c1->read_bandwidth && c2->read_bandwidth)
+ out->read_bandwidth = min(c1->read_bandwidth,
+ c2->read_bandwidth);
+ out->read_latency = c1->read_latency + c2->read_latency;
+}
+
/**
* cxl_coordinates_combine - Combine the two input coordinates
*
struct access_coordinate *c1,
struct access_coordinate *c2)
{
- if (c1->write_bandwidth && c2->write_bandwidth)
- out->write_bandwidth = min(c1->write_bandwidth,
- c2->write_bandwidth);
- out->write_latency = c1->write_latency + c2->write_latency;
-
- if (c1->read_bandwidth && c2->read_bandwidth)
- out->read_bandwidth = min(c1->read_bandwidth,
- c2->read_bandwidth);
- out->read_latency = c1->read_latency + c2->read_latency;
+ for (int i = 0; i < ACCESS_COORDINATE_MAX; i++)
+ __cxl_coordinates_combine(&out[i], &c1[i], &c2[i]);
}
MODULE_IMPORT_NS(CXL);
struct cxl_endpoint_decoder *cxled)
{
struct cxl_memdev *cxlmd = cxled_to_memdev(cxled);
- struct cxl_port *port = cxlmd->endpoint;
struct cxl_dev_state *cxlds = cxlmd->cxlds;
struct cxl_memdev_state *mds = to_cxl_memdev_state(cxlds);
- struct access_coordinate hb_coord[ACCESS_COORDINATE_MAX];
- struct access_coordinate coord;
struct range dpa = {
.start = cxled->dpa_res->start,
.end = cxled->dpa_res->end,
};
struct cxl_dpa_perf *perf;
- int rc;
switch (cxlr->mode) {
case CXL_DECODER_RAM:
if (!range_contains(&perf->dpa_range, &dpa))
return;
- rc = cxl_hb_get_perf_coordinates(port, hb_coord);
- if (rc) {
- dev_dbg(&port->dev, "Failed to retrieve hb perf coordinates.\n");
- return;
- }
-
for (int i = 0; i < ACCESS_COORDINATE_MAX; i++) {
- /* Pickup the host bridge coords */
- cxl_coordinates_combine(&coord, &hb_coord[i], &perf->coord);
-
/* Get total bandwidth and the worst latency for the cxl region */
cxlr->coord[i].read_latency = max_t(unsigned int,
cxlr->coord[i].read_latency,
- coord.read_latency);
+ perf->coord[i].read_latency);
cxlr->coord[i].write_latency = max_t(unsigned int,
cxlr->coord[i].write_latency,
- coord.write_latency);
- cxlr->coord[i].read_bandwidth += coord.read_bandwidth;
- cxlr->coord[i].write_bandwidth += coord.write_bandwidth;
-
- /*
- * Convert latency to nanosec from picosec to be consistent
- * with the resulting latency coordinates computed by the
- * HMAT_REPORTING code.
- */
- cxlr->coord[i].read_latency =
- DIV_ROUND_UP(cxlr->coord[i].read_latency, 1000);
- cxlr->coord[i].write_latency =
- DIV_ROUND_UP(cxlr->coord[i].write_latency, 1000);
+ perf->coord[i].write_latency);
+ cxlr->coord[i].read_bandwidth += perf->coord[i].read_bandwidth;
+ cxlr->coord[i].write_bandwidth += perf->coord[i].write_bandwidth;
}
}
payload->handles[i++] = gen->hdr.handle;
dev_dbg(mds->cxlds.dev, "Event log '%d': Clearing %u\n", log,
- le16_to_cpu(payload->handles[i]));
+ le16_to_cpu(payload->handles[i - 1]));
if (i == max_handles) {
payload->nr_recs = i;
struct cxl_memdev *cxlmd = mds->cxlds.cxlmd;
struct device *dev = mds->cxlds.dev;
struct cxl_get_event_payload *payload;
- struct cxl_mbox_cmd mbox_cmd;
u8 log_type = type;
u16 nr_rec;
mutex_lock(&mds->event.log_lock);
payload = mds->event.buf;
- mbox_cmd = (struct cxl_mbox_cmd) {
- .opcode = CXL_MBOX_OP_GET_EVENT_RECORD,
- .payload_in = &log_type,
- .size_in = sizeof(log_type),
- .payload_out = payload,
- .size_out = mds->payload_size,
- .min_out = struct_size(payload, records, 0),
- };
-
do {
int rc, i;
+ struct cxl_mbox_cmd mbox_cmd = (struct cxl_mbox_cmd) {
+ .opcode = CXL_MBOX_OP_GET_EVENT_RECORD,
+ .payload_in = &log_type,
+ .size_in = sizeof(log_type),
+ .payload_out = payload,
+ .size_out = mds->payload_size,
+ .min_out = struct_size(payload, records, 0),
+ };
rc = cxl_internal_send_cmd(mds, &mbox_cmd);
if (rc) {
struct cxl_memdev_state *mds = to_cxl_memdev_state(cxlmd->cxlds);
struct cxl_mbox_poison_out *po;
struct cxl_mbox_poison_in pi;
- struct cxl_mbox_cmd mbox_cmd;
int nr_records = 0;
int rc;
pi.offset = cpu_to_le64(offset);
pi.length = cpu_to_le64(len / CXL_POISON_LEN_MULT);
- mbox_cmd = (struct cxl_mbox_cmd) {
- .opcode = CXL_MBOX_OP_GET_POISON,
- .size_in = sizeof(pi),
- .payload_in = &pi,
- .size_out = mds->payload_size,
- .payload_out = po,
- .min_out = struct_size(po, record, 0),
- };
-
do {
+ struct cxl_mbox_cmd mbox_cmd = (struct cxl_mbox_cmd){
+ .opcode = CXL_MBOX_OP_GET_POISON,
+ .size_in = sizeof(pi),
+ .payload_in = &pi,
+ .size_out = mds->payload_size,
+ .payload_out = po,
+ .min_out = struct_size(po, record, 0),
+ };
+
rc = cxl_internal_send_cmd(mds, &mbox_cmd);
if (rc)
break;
}
EXPORT_SYMBOL_NS_GPL(schedule_cxl_memdev_detach, CXL);
-/**
- * cxl_hb_get_perf_coordinates - Retrieve performance numbers between initiator
- * and host bridge
- *
- * @port: endpoint cxl_port
- * @coord: output access coordinates
- *
- * Return: errno on failure, 0 on success.
- */
-int cxl_hb_get_perf_coordinates(struct cxl_port *port,
- struct access_coordinate *coord)
+static void add_latency(struct access_coordinate *c, long latency)
{
- struct cxl_port *iter = port;
- struct cxl_dport *dport;
+ for (int i = 0; i < ACCESS_COORDINATE_MAX; i++) {
+ c[i].write_latency += latency;
+ c[i].read_latency += latency;
+ }
+}
- if (!is_cxl_endpoint(port))
- return -EINVAL;
+static bool coordinates_valid(struct access_coordinate *c)
+{
+ for (int i = 0; i < ACCESS_COORDINATE_MAX; i++) {
+ if (c[i].read_bandwidth && c[i].write_bandwidth &&
+ c[i].read_latency && c[i].write_latency)
+ continue;
+ return false;
+ }
- dport = iter->parent_dport;
- while (iter && !is_cxl_root(to_cxl_port(iter->dev.parent))) {
- iter = to_cxl_port(iter->dev.parent);
- dport = iter->parent_dport;
+ return true;
+}
+
+static void set_min_bandwidth(struct access_coordinate *c, unsigned int bw)
+{
+ for (int i = 0; i < ACCESS_COORDINATE_MAX; i++) {
+ c[i].write_bandwidth = min(c[i].write_bandwidth, bw);
+ c[i].read_bandwidth = min(c[i].read_bandwidth, bw);
}
+}
- coord[ACCESS_COORDINATE_LOCAL] =
- dport->hb_coord[ACCESS_COORDINATE_LOCAL];
- coord[ACCESS_COORDINATE_CPU] =
- dport->hb_coord[ACCESS_COORDINATE_CPU];
+static void set_access_coordinates(struct access_coordinate *out,
+ struct access_coordinate *in)
+{
+ for (int i = 0; i < ACCESS_COORDINATE_MAX; i++)
+ out[i] = in[i];
+}
- return 0;
+static bool parent_port_is_cxl_root(struct cxl_port *port)
+{
+ return is_cxl_root(to_cxl_port(port->dev.parent));
}
/**
int cxl_endpoint_get_perf_coordinates(struct cxl_port *port,
struct access_coordinate *coord)
{
- struct access_coordinate c = {
- .read_bandwidth = UINT_MAX,
- .write_bandwidth = UINT_MAX,
+ struct access_coordinate c[] = {
+ {
+ .read_bandwidth = UINT_MAX,
+ .write_bandwidth = UINT_MAX,
+ },
+ {
+ .read_bandwidth = UINT_MAX,
+ .write_bandwidth = UINT_MAX,
+ },
};
struct cxl_port *iter = port;
struct cxl_dport *dport;
struct pci_dev *pdev;
unsigned int bw;
+ bool is_cxl_root;
if (!is_cxl_endpoint(port))
return -EINVAL;
- dport = iter->parent_dport;
-
/*
- * Exit the loop when the parent port of the current port is cxl root.
- * The iterative loop starts at the endpoint and gathers the
- * latency of the CXL link from the current iter to the next downstream
- * port each iteration. If the parent is cxl root then there is
- * nothing to gather.
+ * Exit the loop when the parent port of the current iter port is cxl
+ * root. The iterative loop starts at the endpoint and gathers the
+ * latency of the CXL link from the current device/port to the connected
+ * downstream port each iteration.
*/
- while (iter && !is_cxl_root(to_cxl_port(iter->dev.parent))) {
- cxl_coordinates_combine(&c, &c, &dport->sw_coord);
- c.write_latency += dport->link_latency;
- c.read_latency += dport->link_latency;
-
- iter = to_cxl_port(iter->dev.parent);
+ do {
dport = iter->parent_dport;
- }
+ iter = to_cxl_port(iter->dev.parent);
+ is_cxl_root = parent_port_is_cxl_root(iter);
+
+ /*
+ * There's no valid access_coordinate for a root port since RPs do not
+ * have CDAT and therefore needs to be skipped.
+ */
+ if (!is_cxl_root) {
+ if (!coordinates_valid(dport->coord))
+ return -EINVAL;
+ cxl_coordinates_combine(c, c, dport->coord);
+ }
+ add_latency(c, dport->link_latency);
+ } while (!is_cxl_root);
+
+ dport = iter->parent_dport;
+ /* Retrieve HB coords */
+ if (!coordinates_valid(dport->coord))
+ return -EINVAL;
+ cxl_coordinates_combine(c, c, dport->coord);
/* Get the calculated PCI paths bandwidth */
pdev = to_pci_dev(port->uport_dev->parent);
return -ENXIO;
bw /= BITS_PER_BYTE;
- c.write_bandwidth = min(c.write_bandwidth, bw);
- c.read_bandwidth = min(c.read_bandwidth, bw);
-
- *coord = c;
+ set_min_bandwidth(c, bw);
+ set_access_coordinates(coord, c);
return 0;
}
static bool cxl_decode_regblock(struct pci_dev *pdev, u32 reg_lo, u32 reg_hi,
struct cxl_register_map *map)
{
+ u8 reg_type = FIELD_GET(CXL_DVSEC_REG_LOCATOR_BLOCK_ID_MASK, reg_lo);
int bar = FIELD_GET(CXL_DVSEC_REG_LOCATOR_BIR_MASK, reg_lo);
u64 offset = ((u64)reg_hi << 32) |
(reg_lo & CXL_DVSEC_REG_LOCATOR_BLOCK_OFF_LOW_MASK);
if (offset > pci_resource_len(pdev, bar)) {
dev_warn(&pdev->dev,
"BAR%d: %pr: too small (offset: %pa, type: %d)\n", bar,
- &pdev->resource[bar], &offset, map->reg_type);
+ &pdev->resource[bar], &offset, reg_type);
return false;
}
- map->reg_type = FIELD_GET(CXL_DVSEC_REG_LOCATOR_BLOCK_ID_MASK, reg_lo);
+ map->reg_type = reg_type;
map->resource = pci_resource_start(pdev, bar) + offset;
map->max_size = pci_resource_len(pdev, bar) - offset;
return true;
* @rch: Indicate whether this dport was enumerated in RCH or VH mode
* @port: reference to cxl_port that contains this downstream port
* @regs: Dport parsed register blocks
- * @sw_coord: access coordinates (performance) for switch from CDAT
- * @hb_coord: access coordinates (performance) from ACPI generic port (host bridge)
+ * @coord: access coordinates (bandwidth and latency performance attributes)
* @link_latency: calculated PCIe downstream latency
*/
struct cxl_dport {
bool rch;
struct cxl_port *port;
struct cxl_regs regs;
- struct access_coordinate sw_coord;
- struct access_coordinate hb_coord[ACCESS_COORDINATE_MAX];
+ struct access_coordinate coord[ACCESS_COORDINATE_MAX];
long link_latency;
};
int cxl_endpoint_get_perf_coordinates(struct cxl_port *port,
struct access_coordinate *coord);
-int cxl_hb_get_perf_coordinates(struct cxl_port *port,
- struct access_coordinate *coord);
void cxl_region_perf_data_calculate(struct cxl_region *cxlr,
struct cxl_endpoint_decoder *cxled);
*/
struct cxl_dpa_perf {
struct range dpa_range;
- struct access_coordinate coord;
+ struct access_coordinate coord[ACCESS_COORDINATE_MAX];
int qos_class;
};
return -ENOMEM;
chain = mock_chain(NULL, f, 1);
- if (!chain)
+ if (chain)
+ dma_fence_enable_sw_signaling(chain);
+ else
err = -ENOMEM;
- dma_fence_enable_sw_signaling(chain);
-
dma_fence_signal(f);
dma_fence_put(f);
#
config DPLL
- bool
+ bool
struct list_head list;
const struct dpll_pin_ops *ops;
void *priv;
+ void *cookie;
};
struct dpll_device *dpll_device_get_by_id(int id)
static struct dpll_pin_registration *
dpll_pin_registration_find(struct dpll_pin_ref *ref,
- const struct dpll_pin_ops *ops, void *priv)
+ const struct dpll_pin_ops *ops, void *priv,
+ void *cookie)
{
struct dpll_pin_registration *reg;
list_for_each_entry(reg, &ref->registration_list, list) {
- if (reg->ops == ops && reg->priv == priv)
+ if (reg->ops == ops && reg->priv == priv &&
+ reg->cookie == cookie)
return reg;
}
return NULL;
static int
dpll_xa_ref_pin_add(struct xarray *xa_pins, struct dpll_pin *pin,
- const struct dpll_pin_ops *ops, void *priv)
+ const struct dpll_pin_ops *ops, void *priv,
+ void *cookie)
{
struct dpll_pin_registration *reg;
struct dpll_pin_ref *ref;
xa_for_each(xa_pins, i, ref) {
if (ref->pin != pin)
continue;
- reg = dpll_pin_registration_find(ref, ops, priv);
+ reg = dpll_pin_registration_find(ref, ops, priv, cookie);
if (reg) {
refcount_inc(&ref->refcount);
return 0;
}
reg->ops = ops;
reg->priv = priv;
+ reg->cookie = cookie;
if (ref_exists)
refcount_inc(&ref->refcount);
list_add_tail(®->list, &ref->registration_list);
}
static int dpll_xa_ref_pin_del(struct xarray *xa_pins, struct dpll_pin *pin,
- const struct dpll_pin_ops *ops, void *priv)
+ const struct dpll_pin_ops *ops, void *priv,
+ void *cookie)
{
struct dpll_pin_registration *reg;
struct dpll_pin_ref *ref;
xa_for_each(xa_pins, i, ref) {
if (ref->pin != pin)
continue;
- reg = dpll_pin_registration_find(ref, ops, priv);
+ reg = dpll_pin_registration_find(ref, ops, priv, cookie);
if (WARN_ON(!reg))
return -EINVAL;
list_del(®->list);
static int
dpll_xa_ref_dpll_add(struct xarray *xa_dplls, struct dpll_device *dpll,
- const struct dpll_pin_ops *ops, void *priv)
+ const struct dpll_pin_ops *ops, void *priv, void *cookie)
{
struct dpll_pin_registration *reg;
struct dpll_pin_ref *ref;
xa_for_each(xa_dplls, i, ref) {
if (ref->dpll != dpll)
continue;
- reg = dpll_pin_registration_find(ref, ops, priv);
+ reg = dpll_pin_registration_find(ref, ops, priv, cookie);
if (reg) {
refcount_inc(&ref->refcount);
return 0;
}
reg->ops = ops;
reg->priv = priv;
+ reg->cookie = cookie;
if (ref_exists)
refcount_inc(&ref->refcount);
list_add_tail(®->list, &ref->registration_list);
static void
dpll_xa_ref_dpll_del(struct xarray *xa_dplls, struct dpll_device *dpll,
- const struct dpll_pin_ops *ops, void *priv)
+ const struct dpll_pin_ops *ops, void *priv, void *cookie)
{
struct dpll_pin_registration *reg;
struct dpll_pin_ref *ref;
xa_for_each(xa_dplls, i, ref) {
if (ref->dpll != dpll)
continue;
- reg = dpll_pin_registration_find(ref, ops, priv);
+ reg = dpll_pin_registration_find(ref, ops, priv, cookie);
if (WARN_ON(!reg))
return;
list_del(®->list);
static int
__dpll_pin_register(struct dpll_device *dpll, struct dpll_pin *pin,
- const struct dpll_pin_ops *ops, void *priv)
+ const struct dpll_pin_ops *ops, void *priv, void *cookie)
{
int ret;
- ret = dpll_xa_ref_pin_add(&dpll->pin_refs, pin, ops, priv);
+ ret = dpll_xa_ref_pin_add(&dpll->pin_refs, pin, ops, priv, cookie);
if (ret)
return ret;
- ret = dpll_xa_ref_dpll_add(&pin->dpll_refs, dpll, ops, priv);
+ ret = dpll_xa_ref_dpll_add(&pin->dpll_refs, dpll, ops, priv, cookie);
if (ret)
goto ref_pin_del;
xa_set_mark(&dpll_pin_xa, pin->id, DPLL_REGISTERED);
return ret;
ref_pin_del:
- dpll_xa_ref_pin_del(&dpll->pin_refs, pin, ops, priv);
+ dpll_xa_ref_pin_del(&dpll->pin_refs, pin, ops, priv, cookie);
return ret;
}
dpll->clock_id == pin->clock_id)))
ret = -EINVAL;
else
- ret = __dpll_pin_register(dpll, pin, ops, priv);
+ ret = __dpll_pin_register(dpll, pin, ops, priv, NULL);
mutex_unlock(&dpll_lock);
return ret;
static void
__dpll_pin_unregister(struct dpll_device *dpll, struct dpll_pin *pin,
- const struct dpll_pin_ops *ops, void *priv)
+ const struct dpll_pin_ops *ops, void *priv, void *cookie)
{
ASSERT_DPLL_PIN_REGISTERED(pin);
- dpll_xa_ref_pin_del(&dpll->pin_refs, pin, ops, priv);
- dpll_xa_ref_dpll_del(&pin->dpll_refs, dpll, ops, priv);
+ dpll_xa_ref_pin_del(&dpll->pin_refs, pin, ops, priv, cookie);
+ dpll_xa_ref_dpll_del(&pin->dpll_refs, dpll, ops, priv, cookie);
if (xa_empty(&pin->dpll_refs))
xa_clear_mark(&dpll_pin_xa, pin->id, DPLL_REGISTERED);
}
mutex_lock(&dpll_lock);
dpll_pin_delete_ntf(pin);
- __dpll_pin_unregister(dpll, pin, ops, priv);
+ __dpll_pin_unregister(dpll, pin, ops, priv, NULL);
mutex_unlock(&dpll_lock);
}
EXPORT_SYMBOL_GPL(dpll_pin_unregister);
return -EINVAL;
mutex_lock(&dpll_lock);
- ret = dpll_xa_ref_pin_add(&pin->parent_refs, parent, ops, priv);
+ ret = dpll_xa_ref_pin_add(&pin->parent_refs, parent, ops, priv, pin);
if (ret)
goto unlock;
refcount_inc(&pin->refcount);
xa_for_each(&parent->dpll_refs, i, ref) {
- ret = __dpll_pin_register(ref->dpll, pin, ops, priv);
+ ret = __dpll_pin_register(ref->dpll, pin, ops, priv, parent);
if (ret) {
stop = i;
goto dpll_unregister;
dpll_unregister:
xa_for_each(&parent->dpll_refs, i, ref)
if (i < stop) {
- __dpll_pin_unregister(ref->dpll, pin, ops, priv);
+ __dpll_pin_unregister(ref->dpll, pin, ops, priv,
+ parent);
dpll_pin_delete_ntf(pin);
}
refcount_dec(&pin->refcount);
- dpll_xa_ref_pin_del(&pin->parent_refs, parent, ops, priv);
+ dpll_xa_ref_pin_del(&pin->parent_refs, parent, ops, priv, pin);
unlock:
mutex_unlock(&dpll_lock);
return ret;
mutex_lock(&dpll_lock);
dpll_pin_delete_ntf(pin);
- dpll_xa_ref_pin_del(&pin->parent_refs, parent, ops, priv);
+ dpll_xa_ref_pin_del(&pin->parent_refs, parent, ops, priv, pin);
refcount_dec(&pin->refcount);
xa_for_each(&pin->dpll_refs, i, ref)
- __dpll_pin_unregister(ref->dpll, pin, ops, priv);
+ __dpll_pin_unregister(ref->dpll, pin, ops, priv, parent);
mutex_unlock(&dpll_lock);
}
EXPORT_SYMBOL_GPL(dpll_pin_on_pin_unregister);
ohci->generation = generation;
reg_write(ohci, OHCI1394_IntEventClear, OHCI1394_busReset);
+ if (param_debug & OHCI_PARAM_DEBUG_BUSRESETS)
+ reg_write(ohci, OHCI1394_IntMaskSet, OHCI1394_busReset);
if (ohci->quirks & QUIRK_RESET_PACKET)
ohci->request_generation = generation;
return IRQ_NONE;
/*
- * busReset and postedWriteErr must not be cleared yet
+ * busReset and postedWriteErr events must not be cleared yet
* (OHCI 1.1 clauses 7.2.3.2 and 13.2.8.1)
*/
reg_write(ohci, OHCI1394_IntEventClear,
event & ~(OHCI1394_busReset | OHCI1394_postedWriteErr));
log_irqs(ohci, event);
+ if (event & OHCI1394_busReset)
+ reg_write(ohci, OHCI1394_IntMaskClear, OHCI1394_busReset);
if (event & OHCI1394_selfIDComplete)
queue_work(selfid_workqueue, &ohci->bus_reset_work);
part_id = packed_id_list[ids_processed++];
- if (!ids_count[list]) { /* Global Notification */
+ if (ids_count[list] == 1) { /* Global Notification */
__do_sched_recv_cb(part_id, 0, false);
continue;
}
ph->hops->fastchannel_init(ph, POWERCAP_DESCRIBE_FASTCHANNEL,
POWERCAP_PAI_GET, 4, domain,
&fc[POWERCAP_FC_PAI].get_addr, NULL,
- &fc[POWERCAP_PAI_GET].rate_limit);
+ &fc[POWERCAP_FC_PAI].rate_limit);
*p_fc = fc;
}
rd->raw = raw;
filp->private_data = rd;
- return 0;
+ return nonseekable_open(inode, filp);
}
static int scmi_dbg_raw_mode_release(struct inode *inode, struct file *filp)
.open = scmi_dbg_raw_mode_open,
.release = scmi_dbg_raw_mode_release,
.write = scmi_dbg_raw_mode_reset_write,
+ .llseek = no_llseek,
.owner = THIS_MODULE,
};
.read = scmi_dbg_raw_mode_message_read,
.write = scmi_dbg_raw_mode_message_write,
.poll = scmi_dbg_raw_mode_message_poll,
+ .llseek = no_llseek,
.owner = THIS_MODULE,
};
.read = scmi_dbg_raw_mode_message_read,
.write = scmi_dbg_raw_mode_message_async_write,
.poll = scmi_dbg_raw_mode_message_poll,
+ .llseek = no_llseek,
.owner = THIS_MODULE,
};
.release = scmi_dbg_raw_mode_release,
.read = scmi_test_dbg_raw_mode_notif_read,
.poll = scmi_test_dbg_raw_mode_notif_poll,
+ .llseek = no_llseek,
.owner = THIS_MODULE,
};
.release = scmi_dbg_raw_mode_release,
.read = scmi_test_dbg_raw_mode_errors_read,
.poll = scmi_test_dbg_raw_mode_errors_poll,
+ .llseek = no_llseek,
.owner = THIS_MODULE,
};
continue;
}
- target = round_up(max(md->phys_addr, alloc_min), align) + target_slot * align;
+ target = round_up(max_t(u64, md->phys_addr, alloc_min), align) + target_slot * align;
pages = size / EFI_PAGE_SIZE;
status = efi_bs_call(allocate_pages, EFI_ALLOCATE_ADDRESS,
hdr->vid_mode = 0xffff;
hdr->type_of_loader = 0x21;
+ hdr->initrd_addr_max = INT_MAX;
/* Convert unicode cmdline to ascii */
cmdline_ptr = efi_convert_cmdline(image, &options_size);
* alignment of 8 bytes (64 bits) for GUIDs. Our definition of efi_guid_t,
* however, has an alignment of 4 byte (32 bits). So far, this seems to work
* fine here. See also the comment on the typedef of efi_guid_t.
+ *
+ * Note: It looks like uefisecapp is quite picky about how the memory passed to
+ * it is structured and aligned. In particular the request/response setup used
+ * for QSEE_CMD_UEFI_GET_VARIABLE. While qcom_qseecom_app_send(), in theory,
+ * accepts separate buffers/addresses for the request and response parts, in
+ * practice, however, it seems to expect them to be both part of a larger
+ * contiguous block. We initially allocated separate buffers for the request
+ * and response but this caused the QSEE_CMD_UEFI_GET_VARIABLE command to
+ * either not write any response to the response buffer or outright crash the
+ * device. Therefore, we now allocate a single contiguous block of DMA memory
+ * for both and properly align the data using the macros below. In particular,
+ * request and response structs are aligned at 8 byte (via __reqdata_offs()),
+ * following the driver that this has been reverse-engineered from.
*/
#define qcuefi_buf_align_fields(fields...) \
({ \
#define __array_offs(type, count, offset) \
__field_impl(sizeof(type) * (count), __alignof__(type), offset)
+#define __array_offs_aligned(type, count, align, offset) \
+ __field_impl(sizeof(type) * (count), align, offset)
+
+#define __reqdata_offs(size, offset) \
+ __array_offs_aligned(u8, size, 8, offset)
+
#define __array(type, count) __array_offs(type, count, NULL)
#define __field_offs(type, offset) __array_offs(type, 1, offset)
#define __field(type) __array_offs(type, 1, NULL)
unsigned long buffer_size = *data_size;
efi_status_t efi_status = EFI_SUCCESS;
unsigned long name_length;
+ dma_addr_t cmd_buf_dma;
+ size_t cmd_buf_size;
+ void *cmd_buf;
size_t guid_offs;
size_t name_offs;
size_t req_size;
size_t rsp_size;
+ size_t req_offs;
+ size_t rsp_offs;
ssize_t status;
if (!name || !guid)
__array(u8, buffer_size)
);
- req_data = kzalloc(req_size, GFP_KERNEL);
- if (!req_data) {
+ cmd_buf_size = qcuefi_buf_align_fields(
+ __reqdata_offs(req_size, &req_offs)
+ __reqdata_offs(rsp_size, &rsp_offs)
+ );
+
+ cmd_buf = qseecom_dma_alloc(qcuefi->client, cmd_buf_size, &cmd_buf_dma, GFP_KERNEL);
+ if (!cmd_buf) {
efi_status = EFI_OUT_OF_RESOURCES;
goto out;
}
- rsp_data = kzalloc(rsp_size, GFP_KERNEL);
- if (!rsp_data) {
- efi_status = EFI_OUT_OF_RESOURCES;
- goto out_free_req;
- }
+ req_data = cmd_buf + req_offs;
+ rsp_data = cmd_buf + rsp_offs;
req_data->command_id = QSEE_CMD_UEFI_GET_VARIABLE;
req_data->data_size = buffer_size;
memcpy(((void *)req_data) + req_data->guid_offset, guid, req_data->guid_size);
- status = qcom_qseecom_app_send(qcuefi->client, req_data, req_size, rsp_data, rsp_size);
+ status = qcom_qseecom_app_send(qcuefi->client,
+ cmd_buf_dma + req_offs, req_size,
+ cmd_buf_dma + rsp_offs, rsp_size);
if (status) {
efi_status = EFI_DEVICE_ERROR;
goto out_free;
memcpy(data, ((void *)rsp_data) + rsp_data->data_offset, rsp_data->data_size);
out_free:
- kfree(rsp_data);
-out_free_req:
- kfree(req_data);
+ qseecom_dma_free(qcuefi->client, cmd_buf_size, cmd_buf, cmd_buf_dma);
out:
return efi_status;
}
struct qsee_rsp_uefi_set_variable *rsp_data;
efi_status_t efi_status = EFI_SUCCESS;
unsigned long name_length;
+ dma_addr_t cmd_buf_dma;
+ size_t cmd_buf_size;
+ void *cmd_buf;
size_t name_offs;
size_t guid_offs;
size_t data_offs;
size_t req_size;
+ size_t req_offs;
+ size_t rsp_offs;
ssize_t status;
if (!name || !guid)
__array_offs(u8, data_size, &data_offs)
);
- req_data = kzalloc(req_size, GFP_KERNEL);
- if (!req_data) {
+ cmd_buf_size = qcuefi_buf_align_fields(
+ __reqdata_offs(req_size, &req_offs)
+ __reqdata_offs(sizeof(*rsp_data), &rsp_offs)
+ );
+
+ cmd_buf = qseecom_dma_alloc(qcuefi->client, cmd_buf_size, &cmd_buf_dma, GFP_KERNEL);
+ if (!cmd_buf) {
efi_status = EFI_OUT_OF_RESOURCES;
goto out;
}
- rsp_data = kzalloc(sizeof(*rsp_data), GFP_KERNEL);
- if (!rsp_data) {
- efi_status = EFI_OUT_OF_RESOURCES;
- goto out_free_req;
- }
+ req_data = cmd_buf + req_offs;
+ rsp_data = cmd_buf + rsp_offs;
req_data->command_id = QSEE_CMD_UEFI_SET_VARIABLE;
req_data->attributes = attributes;
if (data_size)
memcpy(((void *)req_data) + req_data->data_offset, data, req_data->data_size);
- status = qcom_qseecom_app_send(qcuefi->client, req_data, req_size, rsp_data,
- sizeof(*rsp_data));
+ status = qcom_qseecom_app_send(qcuefi->client,
+ cmd_buf_dma + req_offs, req_size,
+ cmd_buf_dma + rsp_offs, sizeof(*rsp_data));
if (status) {
efi_status = EFI_DEVICE_ERROR;
goto out_free;
}
out_free:
- kfree(rsp_data);
-out_free_req:
- kfree(req_data);
+ qseecom_dma_free(qcuefi->client, cmd_buf_size, cmd_buf, cmd_buf_dma);
out:
return efi_status;
}
struct qsee_req_uefi_get_next_variable *req_data;
struct qsee_rsp_uefi_get_next_variable *rsp_data;
efi_status_t efi_status = EFI_SUCCESS;
+ dma_addr_t cmd_buf_dma;
+ size_t cmd_buf_size;
+ void *cmd_buf;
size_t guid_offs;
size_t name_offs;
size_t req_size;
size_t rsp_size;
+ size_t req_offs;
+ size_t rsp_offs;
ssize_t status;
if (!name_size || !name || !guid)
__array(*name, *name_size / sizeof(*name))
);
- req_data = kzalloc(req_size, GFP_KERNEL);
- if (!req_data) {
+ cmd_buf_size = qcuefi_buf_align_fields(
+ __reqdata_offs(req_size, &req_offs)
+ __reqdata_offs(rsp_size, &rsp_offs)
+ );
+
+ cmd_buf = qseecom_dma_alloc(qcuefi->client, cmd_buf_size, &cmd_buf_dma, GFP_KERNEL);
+ if (!cmd_buf) {
efi_status = EFI_OUT_OF_RESOURCES;
goto out;
}
- rsp_data = kzalloc(rsp_size, GFP_KERNEL);
- if (!rsp_data) {
- efi_status = EFI_OUT_OF_RESOURCES;
- goto out_free_req;
- }
+ req_data = cmd_buf + req_offs;
+ rsp_data = cmd_buf + rsp_offs;
req_data->command_id = QSEE_CMD_UEFI_GET_NEXT_VARIABLE;
req_data->guid_offset = guid_offs;
goto out_free;
}
- status = qcom_qseecom_app_send(qcuefi->client, req_data, req_size, rsp_data, rsp_size);
+ status = qcom_qseecom_app_send(qcuefi->client,
+ cmd_buf_dma + req_offs, req_size,
+ cmd_buf_dma + rsp_offs, rsp_size);
if (status) {
efi_status = EFI_DEVICE_ERROR;
goto out_free;
}
out_free:
- kfree(rsp_data);
-out_free_req:
- kfree(req_data);
+ qseecom_dma_free(qcuefi->client, cmd_buf_size, cmd_buf, cmd_buf_dma);
out:
return efi_status;
}
struct qsee_req_uefi_query_variable_info *req_data;
struct qsee_rsp_uefi_query_variable_info *rsp_data;
efi_status_t efi_status = EFI_SUCCESS;
+ dma_addr_t cmd_buf_dma;
+ size_t cmd_buf_size;
+ void *cmd_buf;
+ size_t req_offs;
+ size_t rsp_offs;
int status;
- req_data = kzalloc(sizeof(*req_data), GFP_KERNEL);
- if (!req_data) {
+ cmd_buf_size = qcuefi_buf_align_fields(
+ __reqdata_offs(sizeof(*req_data), &req_offs)
+ __reqdata_offs(sizeof(*rsp_data), &rsp_offs)
+ );
+
+ cmd_buf = qseecom_dma_alloc(qcuefi->client, cmd_buf_size, &cmd_buf_dma, GFP_KERNEL);
+ if (!cmd_buf) {
efi_status = EFI_OUT_OF_RESOURCES;
goto out;
}
- rsp_data = kzalloc(sizeof(*rsp_data), GFP_KERNEL);
- if (!rsp_data) {
- efi_status = EFI_OUT_OF_RESOURCES;
- goto out_free_req;
- }
+ req_data = cmd_buf + req_offs;
+ rsp_data = cmd_buf + rsp_offs;
req_data->command_id = QSEE_CMD_UEFI_QUERY_VARIABLE_INFO;
req_data->attributes = attr;
req_data->length = sizeof(*req_data);
- status = qcom_qseecom_app_send(qcuefi->client, req_data, sizeof(*req_data), rsp_data,
- sizeof(*rsp_data));
+ status = qcom_qseecom_app_send(qcuefi->client,
+ cmd_buf_dma + req_offs, sizeof(*req_data),
+ cmd_buf_dma + rsp_offs, sizeof(*rsp_data));
if (status) {
efi_status = EFI_DEVICE_ERROR;
goto out_free;
*max_variable_size = rsp_data->max_variable_size;
out_free:
- kfree(rsp_data);
-out_free_req:
- kfree(req_data);
+ qseecom_dma_free(qcuefi->client, cmd_buf_size, cmd_buf, cmd_buf_dma);
out:
return efi_status;
}
/**
* qcom_scm_qseecom_app_send() - Send to and receive data from a given QSEE app.
* @app_id: The ID of the target app.
- * @req: Request buffer sent to the app (must be DMA-mappable).
+ * @req: DMA address of the request buffer sent to the app.
* @req_size: Size of the request buffer.
- * @rsp: Response buffer, written to by the app (must be DMA-mappable).
+ * @rsp: DMA address of the response buffer, written to by the app.
* @rsp_size: Size of the response buffer.
*
* Sends a request to the QSEE app associated with the given ID and read back
*
* Return: Zero on success, nonzero on failure.
*/
-int qcom_scm_qseecom_app_send(u32 app_id, void *req, size_t req_size, void *rsp,
- size_t rsp_size)
+int qcom_scm_qseecom_app_send(u32 app_id, dma_addr_t req, size_t req_size,
+ dma_addr_t rsp, size_t rsp_size)
{
struct qcom_scm_qseecom_resp res = {};
struct qcom_scm_desc desc = {};
- dma_addr_t req_phys;
- dma_addr_t rsp_phys;
int status;
- /* Map request buffer */
- req_phys = dma_map_single(__scm->dev, req, req_size, DMA_TO_DEVICE);
- status = dma_mapping_error(__scm->dev, req_phys);
- if (status) {
- dev_err(__scm->dev, "qseecom: failed to map request buffer\n");
- return status;
- }
-
- /* Map response buffer */
- rsp_phys = dma_map_single(__scm->dev, rsp, rsp_size, DMA_FROM_DEVICE);
- status = dma_mapping_error(__scm->dev, rsp_phys);
- if (status) {
- dma_unmap_single(__scm->dev, req_phys, req_size, DMA_TO_DEVICE);
- dev_err(__scm->dev, "qseecom: failed to map response buffer\n");
- return status;
- }
-
- /* Set up SCM call data */
desc.owner = QSEECOM_TZ_OWNER_TZ_APPS;
desc.svc = QSEECOM_TZ_SVC_APP_ID_PLACEHOLDER;
desc.cmd = QSEECOM_TZ_CMD_APP_SEND;
QCOM_SCM_RW, QCOM_SCM_VAL,
QCOM_SCM_RW, QCOM_SCM_VAL);
desc.args[0] = app_id;
- desc.args[1] = req_phys;
+ desc.args[1] = req;
desc.args[2] = req_size;
- desc.args[3] = rsp_phys;
+ desc.args[3] = rsp;
desc.args[4] = rsp_size;
- /* Perform call */
status = qcom_scm_qseecom_call(&desc, &res);
- /* Unmap buffers */
- dma_unmap_single(__scm->dev, rsp_phys, rsp_size, DMA_FROM_DEVICE);
- dma_unmap_single(__scm->dev, req_phys, req_size, DMA_TO_DEVICE);
-
if (status)
return status;
case 0x5e:
return GPIOPANELCTL;
default:
- return -EOPNOTSUPP;
+ return -ENOTSUPP;
}
}
{ .compatible = "nxp,lpc3220-gpio", },
{ },
};
+MODULE_DEVICE_TABLE(of, lpc32xx_gpio_of_match);
static struct platform_driver lpc32xx_gpio_driver = {
.driver = {
static void tng_irq_ack(struct irq_data *d)
{
- struct tng_gpio *priv = irq_data_get_irq_chip_data(d);
+ struct gpio_chip *gc = irq_data_get_irq_chip_data(d);
+ struct tng_gpio *priv = gpiochip_get_data(gc);
irq_hw_number_t gpio = irqd_to_hwirq(d);
void __iomem *gisr;
u8 shift;
static void tng_irq_mask(struct irq_data *d)
{
- struct tng_gpio *priv = irq_data_get_irq_chip_data(d);
+ struct gpio_chip *gc = irq_data_get_irq_chip_data(d);
+ struct tng_gpio *priv = gpiochip_get_data(gc);
irq_hw_number_t gpio = irqd_to_hwirq(d);
tng_irq_unmask_mask(priv, gpio, false);
static void tng_irq_unmask(struct irq_data *d)
{
- struct tng_gpio *priv = irq_data_get_irq_chip_data(d);
+ struct gpio_chip *gc = irq_data_get_irq_chip_data(d);
+ struct tng_gpio *priv = gpiochip_get_data(gc);
irq_hw_number_t gpio = irqd_to_hwirq(d);
gpiochip_enable_irq(&priv->chip, gpio);
#define TEGRA186_GPIO_SCR_SEC_REN BIT(27)
#define TEGRA186_GPIO_SCR_SEC_G1W BIT(9)
#define TEGRA186_GPIO_SCR_SEC_G1R BIT(1)
-#define TEGRA186_GPIO_FULL_ACCESS (TEGRA186_GPIO_SCR_SEC_WEN | \
- TEGRA186_GPIO_SCR_SEC_REN | \
- TEGRA186_GPIO_SCR_SEC_G1R | \
- TEGRA186_GPIO_SCR_SEC_G1W)
-#define TEGRA186_GPIO_SCR_SEC_ENABLE (TEGRA186_GPIO_SCR_SEC_WEN | \
- TEGRA186_GPIO_SCR_SEC_REN)
/* control registers */
#define TEGRA186_GPIO_ENABLE_CONFIG 0x00
value = __raw_readl(secure + TEGRA186_GPIO_SCR);
- if ((value & TEGRA186_GPIO_SCR_SEC_ENABLE) == 0)
- return true;
+ /*
+ * When SCR_SEC_[R|W]EN is unset, then we have full read/write access to all the
+ * registers for given GPIO pin.
+ * When SCR_SEC[R|W]EN is set, then there is need to further check the accompanying
+ * SCR_SEC_G1[R|W] bit to determine read/write access to all the registers for given
+ * GPIO pin.
+ */
- if ((value & TEGRA186_GPIO_FULL_ACCESS) == TEGRA186_GPIO_FULL_ACCESS)
+ if (((value & TEGRA186_GPIO_SCR_SEC_REN) == 0 ||
+ ((value & TEGRA186_GPIO_SCR_SEC_REN) && (value & TEGRA186_GPIO_SCR_SEC_G1R))) &&
+ ((value & TEGRA186_GPIO_SCR_SEC_WEN) == 0 ||
+ ((value & TEGRA186_GPIO_SCR_SEC_WEN) && (value & TEGRA186_GPIO_SCR_SEC_G1W))))
return true;
return false;
unsigned int reg = type == CTRL_IN ? GPIO_IN_CTRL_BASE : GPIO_OUT_CTRL_BASE;
if (gpio >= WCOVE_GPIO_NUM)
- return -EOPNOTSUPP;
+ return -ENOTSUPP;
return reg + gpio;
}
GPIO_V2_LINE_EVENT_FALLING_EDGE;
}
+static inline char *make_irq_label(const char *orig)
+{
+ char *new;
+
+ if (!orig)
+ return NULL;
+
+ new = kstrdup_and_replace(orig, '/', ':', GFP_KERNEL);
+ if (!new)
+ return ERR_PTR(-ENOMEM);
+
+ return new;
+}
+
+static inline void free_irq_label(const char *label)
+{
+ kfree(label);
+}
+
#ifdef CONFIG_HTE
static enum hte_return process_hw_ts_thread(void *p)
{
unsigned long irqflags;
int ret, level, irq;
+ char *label;
/* try hardware */
ret = gpiod_set_debounce(line->desc, debounce_period_us);
if (irq < 0)
return -ENXIO;
+ label = make_irq_label(line->req->label);
+ if (IS_ERR(label))
+ return -ENOMEM;
+
irqflags = IRQF_TRIGGER_FALLING | IRQF_TRIGGER_RISING;
ret = request_irq(irq, debounce_irq_handler, irqflags,
- line->req->label, line);
- if (ret)
+ label, line);
+ if (ret) {
+ free_irq_label(label);
return ret;
+ }
line->irq = irq;
} else {
ret = hte_edge_setup(line, GPIO_V2_LINE_FLAG_EDGE_BOTH);
static void edge_detector_stop(struct line *line)
{
if (line->irq) {
- free_irq(line->irq, line);
+ free_irq_label(free_irq(line->irq, line));
line->irq = 0;
}
unsigned long irqflags = 0;
u64 eflags;
int irq, ret;
+ char *label;
eflags = edflags & GPIO_V2_LINE_EDGE_FLAGS;
if (eflags && !kfifo_initialized(&line->req->events)) {
IRQF_TRIGGER_RISING : IRQF_TRIGGER_FALLING;
irqflags |= IRQF_ONESHOT;
+ label = make_irq_label(line->req->label);
+ if (IS_ERR(label))
+ return PTR_ERR(label);
+
/* Request a thread to read the events */
ret = request_threaded_irq(irq, edge_irq_handler, edge_irq_thread,
- irqflags, line->req->label, line);
- if (ret)
+ irqflags, label, line);
+ if (ret) {
+ free_irq_label(label);
return ret;
+ }
line->irq = irq;
return 0;
blocking_notifier_chain_unregister(&le->gdev->device_notifier,
&le->device_unregistered_nb);
if (le->irq)
- free_irq(le->irq, le);
+ free_irq_label(free_irq(le->irq, le));
if (le->desc)
gpiod_free(le->desc);
kfree(le->label);
int fd;
int ret;
int irq, irqflags = 0;
+ char *label;
if (copy_from_user(&eventreq, ip, sizeof(eventreq)))
return -EFAULT;
if (ret)
goto out_free_le;
+ label = make_irq_label(le->label);
+ if (IS_ERR(label)) {
+ ret = PTR_ERR(label);
+ goto out_free_le;
+ }
+
/* Request a thread to read the events */
ret = request_threaded_irq(irq,
lineevent_irq_handler,
lineevent_irq_thread,
irqflags,
- le->label,
+ label,
le);
- if (ret)
+ if (ret) {
+ free_irq_label(label);
goto out_free_le;
+ }
le->irq = irq;
list_for_each_entry_srcu(gdev, &gpio_devices, list,
srcu_read_lock_held(&gpio_devices_srcu)) {
+ if (!device_is_registered(&gdev->dev))
+ continue;
+
guard(srcu)(&gdev->srcu);
gc = srcu_dereference(gdev->chip, &gdev->srcu);
}
EXPORT_SYMBOL_GPL(gpiochip_dup_line_label);
+static inline const char *function_name_or_default(const char *con_id)
+{
+ return con_id ?: "(default)";
+}
+
/**
* gpiochip_request_own_desc - Allow GPIO chip to request its own descriptor
* @gc: GPIO chip
enum gpiod_flags dflags)
{
struct gpio_desc *desc = gpiochip_get_desc(gc, hwnum);
+ const char *name = function_name_or_default(label);
int ret;
if (IS_ERR(desc)) {
- chip_err(gc, "failed to get GPIO descriptor\n");
+ chip_err(gc, "failed to get GPIO %s descriptor\n", name);
return desc;
}
ret = gpiod_configure_flags(desc, label, lflags, dflags);
if (ret) {
- chip_err(gc, "setup of own GPIO %s failed\n", label);
gpiod_free_commit(desc);
+ chip_err(gc, "setup of own GPIO %s failed\n", name);
return ERR_PTR(ret);
}
enum gpiod_flags *flags,
unsigned long *lookupflags)
{
+ const char *name = function_name_or_default(con_id);
struct gpio_desc *desc = ERR_PTR(-ENOENT);
if (is_of_node(fwnode)) {
- dev_dbg(consumer, "using DT '%pfw' for '%s' GPIO lookup\n",
- fwnode, con_id);
+ dev_dbg(consumer, "using DT '%pfw' for '%s' GPIO lookup\n", fwnode, name);
desc = of_find_gpio(to_of_node(fwnode), con_id, idx, lookupflags);
} else if (is_acpi_node(fwnode)) {
- dev_dbg(consumer, "using ACPI '%pfw' for '%s' GPIO lookup\n",
- fwnode, con_id);
+ dev_dbg(consumer, "using ACPI '%pfw' for '%s' GPIO lookup\n", fwnode, name);
desc = acpi_find_gpio(fwnode, con_id, idx, flags, lookupflags);
} else if (is_software_node(fwnode)) {
- dev_dbg(consumer, "using swnode '%pfw' for '%s' GPIO lookup\n",
- fwnode, con_id);
+ dev_dbg(consumer, "using swnode '%pfw' for '%s' GPIO lookup\n", fwnode, name);
desc = swnode_find_gpio(fwnode, con_id, idx, lookupflags);
}
bool platform_lookup_allowed)
{
unsigned long lookupflags = GPIO_LOOKUP_FLAGS_DEFAULT;
+ const char *name = function_name_or_default(con_id);
/*
* scoped_guard() is implemented as a for loop, meaning static
* analyzers will complain about these two not being initialized.
}
if (IS_ERR(desc)) {
- dev_dbg(consumer, "No GPIO consumer %s found\n",
- con_id);
+ dev_dbg(consumer, "No GPIO consumer %s found\n", name);
return desc;
}
*
* FIXME: Make this more sane and safe.
*/
- dev_info(consumer,
- "nonexclusive access to GPIO for %s\n", con_id);
+ dev_info(consumer, "nonexclusive access to GPIO for %s\n", name);
return desc;
}
ret = gpiod_configure_flags(desc, con_id, lookupflags, flags);
if (ret < 0) {
- dev_dbg(consumer, "setup of GPIO %s failed\n", con_id);
gpiod_put(desc);
+ dev_dbg(consumer, "setup of GPIO %s failed\n", name);
return ERR_PTR(ret);
}
int gpiod_configure_flags(struct gpio_desc *desc, const char *con_id,
unsigned long lflags, enum gpiod_flags dflags)
{
+ const char *name = function_name_or_default(con_id);
int ret;
if (lflags & GPIO_ACTIVE_LOW)
/* No particular flag request, return here... */
if (!(dflags & GPIOD_FLAGS_BIT_DIR_SET)) {
- gpiod_dbg(desc, "no flags found for %s\n", con_id);
+ gpiod_dbg(desc, "no flags found for GPIO %s\n", name);
return 0;
}
extern int amdgpu_mcbp;
extern int amdgpu_discovery;
extern int amdgpu_mes;
+extern int amdgpu_mes_log_enable;
extern int amdgpu_mes_kiq;
extern int amdgpu_noretry;
extern int amdgpu_force_asic_type;
err_bo_create:
amdgpu_amdkfd_unreserve_mem_limit(adev, aligned_size, flags, xcp_id);
err_reserve_limit:
+ amdgpu_sync_free(&(*mem)->sync);
mutex_destroy(&(*mem)->lock);
if (gobj)
drm_gem_object_put(gobj);
amdgpu_sync_create(&sync_obj);
- /* Validate BOs and map them to GPUVM (update VM page tables). */
+ /* Validate BOs managed by KFD */
list_for_each_entry(mem, &process_info->kfd_bo_list,
validate_list) {
struct amdgpu_bo *bo = mem->bo;
uint32_t domain = mem->domain;
- struct kfd_mem_attachment *attachment;
struct dma_resv_iter cursor;
struct dma_fence *fence;
goto validate_map_fail;
}
}
+ }
+
+ if (failed_size)
+ pr_debug("0x%lx/0x%lx in system\n", failed_size, total_size);
+
+ /* Validate PDs, PTs and evicted DMABuf imports last. Otherwise BO
+ * validations above would invalidate DMABuf imports again.
+ */
+ ret = process_validate_vms(process_info, &exec.ticket);
+ if (ret) {
+ pr_debug("Validating VMs failed, ret: %d\n", ret);
+ goto validate_map_fail;
+ }
+
+ /* Update mappings managed by KFD. */
+ list_for_each_entry(mem, &process_info->kfd_bo_list,
+ validate_list) {
+ struct kfd_mem_attachment *attachment;
+
list_for_each_entry(attachment, &mem->attachments, list) {
if (!attachment->is_mapped)
continue;
}
}
- if (failed_size)
- pr_debug("0x%lx/0x%lx in system\n", failed_size, total_size);
-
- /* Validate PDs, PTs and evicted DMABuf imports last. Otherwise BO
- * validations above would invalidate DMABuf imports again.
- */
- ret = process_validate_vms(process_info, &exec.ticket);
- if (ret) {
- pr_debug("Validating VMs failed, ret: %d\n", ret);
- goto validate_map_fail;
- }
-
/* Update mappings not managed by KFD */
list_for_each_entry(peer_vm, &process_info->vm_list_head,
vm_list_node) {
p->bytes_moved += ctx.bytes_moved;
if (!amdgpu_gmc_vram_full_visible(&adev->gmc) &&
- amdgpu_bo_in_cpu_visible_vram(bo))
+ amdgpu_res_cpu_visible(adev, bo->tbo.resource))
p->bytes_moved_vis += ctx.bytes_moved;
if (unlikely(r == -ENOMEM) && domain != bo->allowed_domains) {
adev->ip_blocks[i].status.hw = true;
}
}
+ } else if (amdgpu_ip_version(adev, MP1_HWIP, 0) == IP_VERSION(13, 0, 10) &&
+ !amdgpu_device_has_display_hardware(adev)) {
+ r = psp_gpu_reset(adev);
} else {
- tmp = amdgpu_reset_method;
- /* It should do a default reset when loading or reloading the driver,
- * regardless of the module parameter reset_method.
- */
- amdgpu_reset_method = AMD_RESET_METHOD_NONE;
- r = amdgpu_asic_reset(adev);
- amdgpu_reset_method = tmp;
- if (r) {
- dev_err(adev->dev, "asic reset on init failed\n");
- goto failed;
- }
+ tmp = amdgpu_reset_method;
+ /* It should do a default reset when loading or reloading the driver,
+ * regardless of the module parameter reset_method.
+ */
+ amdgpu_reset_method = AMD_RESET_METHOD_NONE;
+ r = amdgpu_asic_reset(adev);
+ amdgpu_reset_method = tmp;
+ }
+
+ if (r) {
+ dev_err(adev->dev, "asic reset on init failed\n");
+ goto failed;
}
}
if (r)
goto unprepare;
+ flush_delayed_work(&adev->gfx.gfx_off_delay_work);
+
for (i = 0; i < adev->num_ip_blocks; i++) {
if (!adev->ip_blocks[i].status.valid)
continue;
amdgpu_device_ip_block_add(adev, &smu_v13_0_ip_block);
break;
case IP_VERSION(14, 0, 0):
+ case IP_VERSION(14, 0, 1):
amdgpu_device_ip_block_add(adev, &smu_v14_0_ip_block);
break;
default:
{
switch (amdgpu_ip_version(adev, VCN_HWIP, 0)) {
case IP_VERSION(4, 0, 5):
+ case IP_VERSION(4, 0, 6):
if (amdgpu_umsch_mm & 0x1) {
amdgpu_device_ip_block_add(adev, &umsch_mm_v4_0_ip_block);
adev->enable_umsch_mm = true;
int amdgpu_mcbp = -1;
int amdgpu_discovery = -1;
int amdgpu_mes;
+int amdgpu_mes_log_enable = 0;
int amdgpu_mes_kiq;
int amdgpu_noretry = -1;
int amdgpu_force_asic_type = -1;
"Enable Micro Engine Scheduler (0 = disabled (default), 1 = enabled)");
module_param_named(mes, amdgpu_mes, int, 0444);
+/**
+ * DOC: mes_log_enable (int)
+ * Enable Micro Engine Scheduler log. This is used to enable/disable MES internal log.
+ * (0 = disabled (default), 1 = enabled)
+ */
+MODULE_PARM_DESC(mes_log_enable,
+ "Enable Micro Engine Scheduler log (0 = disabled (default), 1 = enabled)");
+module_param_named(mes_log_enable, amdgpu_mes_log_enable, int, 0444);
+
/**
* DOC: mes_kiq (int)
* Enable Micro Engine Scheduler KIQ. This is a new engine pipe for kiq.
dma_fence_set_error(finished, -ECANCELED);
if (finished->error < 0) {
- DRM_INFO("Skip scheduling IBs!\n");
+ dev_dbg(adev->dev, "Skip scheduling IBs in ring(%s)",
+ ring->name);
} else {
r = amdgpu_ib_schedule(ring, job->num_ibs, job->ibs, job,
&fence);
if (r)
- DRM_ERROR("Error scheduling IBs (%d)\n", r);
+ dev_err(adev->dev,
+ "Error scheduling IBs (%d) in ring(%s)", r,
+ ring->name);
}
job->job_run_counter++;
{
int r;
- r = amdgpu_bo_create_kernel(adev, PAGE_SIZE, PAGE_SIZE,
+ if (!amdgpu_mes_log_enable)
+ return 0;
+
+ r = amdgpu_bo_create_kernel(adev, AMDGPU_MES_LOG_BUFFER_SIZE, PAGE_SIZE,
AMDGPU_GEM_DOMAIN_GTT,
&adev->mes.event_log_gpu_obj,
&adev->mes.event_log_gpu_addr,
return;
amdgpu_mes_remove_hw_queue(adev, ring->hw_queue_id);
+ del_timer_sync(&ring->fence_drv.fallback_timer);
amdgpu_ring_fini(ring);
kfree(ring);
}
uint32_t *mem = (uint32_t *)(adev->mes.event_log_cpu_addr);
seq_hex_dump(m, "", DUMP_PREFIX_OFFSET, 32, 4,
- mem, PAGE_SIZE, false);
+ mem, AMDGPU_MES_LOG_BUFFER_SIZE, false);
return 0;
}
-
DEFINE_SHOW_ATTRIBUTE(amdgpu_debugfs_mes_event_log);
#endif
#if defined(CONFIG_DEBUG_FS)
struct drm_minor *minor = adev_to_drm(adev)->primary;
struct dentry *root = minor->debugfs_root;
- if (adev->enable_mes)
+ if (adev->enable_mes && amdgpu_mes_log_enable)
debugfs_create_file("amdgpu_mes_event_log", 0444, root,
adev, &amdgpu_debugfs_mes_event_log_fops);
#define AMDGPU_MES_PROC_CTX_SIZE 0x1000 /* one page area */
#define AMDGPU_MES_GANG_CTX_SIZE 0x1000 /* one page area */
+#define AMDGPU_MES_LOG_BUFFER_SIZE 0x4000 /* Maximu log buffer size for MES */
struct amdgpu_mes_funcs;
else
amdgpu_bo_placement_from_domain(bo, bp->domain);
if (bp->type == ttm_bo_type_kernel)
+ bo->tbo.priority = 2;
+ else if (!(bp->flags & AMDGPU_GEM_CREATE_DISCARDABLE))
bo->tbo.priority = 1;
if (!bp->destroy)
return r;
if (!amdgpu_gmc_vram_full_visible(&adev->gmc) &&
- bo->tbo.resource->mem_type == TTM_PL_VRAM &&
- amdgpu_bo_in_cpu_visible_vram(bo))
+ amdgpu_res_cpu_visible(adev, bo->tbo.resource))
amdgpu_cs_report_moved_bytes(adev, ctx.bytes_moved,
ctx.bytes_moved);
else
void amdgpu_bo_get_memory(struct amdgpu_bo *bo,
struct amdgpu_mem_stats *stats)
{
+ struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);
+ struct ttm_resource *res = bo->tbo.resource;
uint64_t size = amdgpu_bo_size(bo);
struct drm_gem_object *obj;
unsigned int domain;
bool shared;
/* Abort if the BO doesn't currently have a backing store */
- if (!bo->tbo.resource)
+ if (!res)
return;
obj = &bo->tbo.base;
shared = drm_gem_object_is_shared_for_memory_stats(obj);
- domain = amdgpu_mem_type_to_domain(bo->tbo.resource->mem_type);
+ domain = amdgpu_mem_type_to_domain(res->mem_type);
switch (domain) {
case AMDGPU_GEM_DOMAIN_VRAM:
stats->vram += size;
- if (amdgpu_bo_in_cpu_visible_vram(bo))
+ if (amdgpu_res_cpu_visible(adev, bo->tbo.resource))
stats->visible_vram += size;
if (shared)
stats->vram_shared += size;
/* Remember that this BO was accessed by the CPU */
abo->flags |= AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED;
- if (bo->resource->mem_type != TTM_PL_VRAM)
- return 0;
-
- if (amdgpu_bo_in_cpu_visible_vram(abo))
+ if (amdgpu_res_cpu_visible(adev, bo->resource))
return 0;
/* Can't move a pinned BO to visible VRAM */
/* this should never happen */
if (bo->resource->mem_type == TTM_PL_VRAM &&
- !amdgpu_bo_in_cpu_visible_vram(abo))
+ !amdgpu_res_cpu_visible(adev, bo->resource))
return VM_FAULT_SIGBUS;
ttm_bo_move_to_lru_tail_unlocked(bo);
*/
u64 amdgpu_bo_print_info(int id, struct amdgpu_bo *bo, struct seq_file *m)
{
+ struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);
struct dma_buf_attachment *attachment;
struct dma_buf *dma_buf;
const char *placement;
if (dma_resv_trylock(bo->tbo.base.resv)) {
unsigned int domain;
+
domain = amdgpu_mem_type_to_domain(bo->tbo.resource->mem_type);
switch (domain) {
case AMDGPU_GEM_DOMAIN_VRAM:
- if (amdgpu_bo_in_cpu_visible_vram(bo))
+ if (amdgpu_res_cpu_visible(adev, bo->tbo.resource))
placement = "VRAM VISIBLE";
else
placement = "VRAM";
return drm_vma_node_offset_addr(&bo->tbo.base.vma_node);
}
-/**
- * amdgpu_bo_in_cpu_visible_vram - check if BO is (partly) in visible VRAM
- */
-static inline bool amdgpu_bo_in_cpu_visible_vram(struct amdgpu_bo *bo)
-{
- struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);
- struct amdgpu_res_cursor cursor;
-
- if (!bo->tbo.resource || bo->tbo.resource->mem_type != TTM_PL_VRAM)
- return false;
-
- amdgpu_res_first(bo->tbo.resource, 0, amdgpu_bo_size(bo), &cursor);
- while (cursor.remaining) {
- if (cursor.start < adev->gmc.visible_vram_size)
- return true;
-
- amdgpu_res_next(&cursor, cursor.size);
- }
-
- return false;
-}
-
/**
* amdgpu_bo_explicit_sync - return whether the bo is explicitly synced
*/
{
struct amdgpu_ring *ring = file_inode(f)->i_private;
volatile u32 *mqd;
- int r;
+ u32 *kbuf;
+ int r, i;
uint32_t value, result;
if (*pos & 3 || size & 3)
return -EINVAL;
- result = 0;
+ kbuf = kmalloc(ring->mqd_size, GFP_KERNEL);
+ if (!kbuf)
+ return -ENOMEM;
r = amdgpu_bo_reserve(ring->mqd_obj, false);
if (unlikely(r != 0))
- return r;
+ goto err_free;
r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&mqd);
- if (r) {
- amdgpu_bo_unreserve(ring->mqd_obj);
- return r;
- }
+ if (r)
+ goto err_unreserve;
+ /*
+ * Copy to local buffer to avoid put_user(), which might fault
+ * and acquire mmap_sem, under reservation_ww_class_mutex.
+ */
+ for (i = 0; i < ring->mqd_size/sizeof(u32); i++)
+ kbuf[i] = mqd[i];
+
+ amdgpu_bo_kunmap(ring->mqd_obj);
+ amdgpu_bo_unreserve(ring->mqd_obj);
+
+ result = 0;
while (size) {
if (*pos >= ring->mqd_size)
- goto done;
+ break;
- value = mqd[*pos/4];
+ value = kbuf[*pos/4];
r = put_user(value, (uint32_t *)buf);
if (r)
- goto done;
+ goto err_free;
buf += 4;
result += 4;
size -= 4;
*pos += 4;
}
-done:
- amdgpu_bo_kunmap(ring->mqd_obj);
- mqd = NULL;
- amdgpu_bo_unreserve(ring->mqd_obj);
- if (r)
- return r;
-
+ kfree(kbuf);
return result;
+
+err_unreserve:
+ amdgpu_bo_unreserve(ring->mqd_obj);
+err_free:
+ kfree(kbuf);
+ return r;
}
static const struct file_operations amdgpu_debugfs_mqd_fops = {
} else if (!amdgpu_gmc_vram_full_visible(&adev->gmc) &&
!(abo->flags & AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED) &&
- amdgpu_bo_in_cpu_visible_vram(abo)) {
+ amdgpu_res_cpu_visible(adev, bo->resource)) {
/* Try evicting to the CPU inaccessible part of VRAM
* first, but only set GTT as busy placement, so this
return r;
}
-/*
- * amdgpu_mem_visible - Check that memory can be accessed by ttm_bo_move_memcpy
+/**
+ * amdgpu_res_cpu_visible - Check that resource can be accessed by CPU
+ * @adev: amdgpu device
+ * @res: the resource to check
*
- * Called by amdgpu_bo_move()
+ * Returns: true if the full resource is CPU visible, false otherwise.
*/
-static bool amdgpu_mem_visible(struct amdgpu_device *adev,
- struct ttm_resource *mem)
+bool amdgpu_res_cpu_visible(struct amdgpu_device *adev,
+ struct ttm_resource *res)
{
- u64 mem_size = (u64)mem->size;
struct amdgpu_res_cursor cursor;
- u64 end;
- if (mem->mem_type == TTM_PL_SYSTEM ||
- mem->mem_type == TTM_PL_TT)
+ if (!res)
+ return false;
+
+ if (res->mem_type == TTM_PL_SYSTEM || res->mem_type == TTM_PL_TT ||
+ res->mem_type == AMDGPU_PL_PREEMPT)
return true;
- if (mem->mem_type != TTM_PL_VRAM)
+
+ if (res->mem_type != TTM_PL_VRAM)
return false;
- amdgpu_res_first(mem, 0, mem_size, &cursor);
- end = cursor.start + cursor.size;
+ amdgpu_res_first(res, 0, res->size, &cursor);
while (cursor.remaining) {
+ if ((cursor.start + cursor.size) >= adev->gmc.visible_vram_size)
+ return false;
amdgpu_res_next(&cursor, cursor.size);
+ }
- if (!cursor.remaining)
- break;
+ return true;
+}
- /* ttm_resource_ioremap only supports contiguous memory */
- if (end != cursor.start)
- return false;
+/*
+ * amdgpu_res_copyable - Check that memory can be accessed by ttm_bo_move_memcpy
+ *
+ * Called by amdgpu_bo_move()
+ */
+static bool amdgpu_res_copyable(struct amdgpu_device *adev,
+ struct ttm_resource *mem)
+{
+ if (!amdgpu_res_cpu_visible(adev, mem))
+ return false;
- end = cursor.start + cursor.size;
- }
+ /* ttm_resource_ioremap only supports contiguous memory */
+ if (mem->mem_type == TTM_PL_VRAM &&
+ !(mem->placement & TTM_PL_FLAG_CONTIGUOUS))
+ return false;
- return end <= adev->gmc.visible_vram_size;
+ return true;
}
/*
if (r) {
/* Check that all memory is CPU accessible */
- if (!amdgpu_mem_visible(adev, old_mem) ||
- !amdgpu_mem_visible(adev, new_mem)) {
+ if (!amdgpu_res_copyable(adev, old_mem) ||
+ !amdgpu_res_copyable(adev, new_mem)) {
pr_err("Move buffer fallback to memcpy unavailable\n");
return r;
}
struct ttm_resource *mem)
{
struct amdgpu_device *adev = amdgpu_ttm_adev(bdev);
- size_t bus_size = (size_t)mem->size;
switch (mem->mem_type) {
case TTM_PL_SYSTEM:
break;
case TTM_PL_VRAM:
mem->bus.offset = mem->start << PAGE_SHIFT;
- /* check if it's visible */
- if ((mem->bus.offset + bus_size) > adev->gmc.visible_vram_size)
- return -EINVAL;
if (adev->mman.aper_base_kaddr &&
mem->placement & TTM_PL_FLAG_CONTIGUOUS)
int amdgpu_vram_mgr_query_page_status(struct amdgpu_vram_mgr *mgr,
uint64_t start);
+bool amdgpu_res_cpu_visible(struct amdgpu_device *adev,
+ struct ttm_resource *res);
+
int amdgpu_ttm_init(struct amdgpu_device *adev);
void amdgpu_ttm_fini(struct amdgpu_device *adev);
void amdgpu_ttm_set_buffer_funcs_status(struct amdgpu_device *adev,
mqd->rptr_val = 0;
mqd->unmapped = 1;
+ if (adev->vpe.collaborate_mode)
+ memcpy(++mqd, test->mqd_data_cpu_addr, sizeof(struct MQD_INFO));
+
qinfo->mqd_addr = test->mqd_data_gpu_addr;
qinfo->csa_addr = test->ctx_data_gpu_addr +
offsetof(struct umsch_mm_test_ctx_data, vpe_ctx_csa);
- qinfo->doorbell_offset_0 = (adev->doorbell_index.vpe_ring + 1) << 1;
+ qinfo->doorbell_offset_0 = 0;
qinfo->doorbell_offset_1 = 0;
}
ring[5] = 0;
mqd->wptr_val = (6 << 2);
- // WDOORBELL32(adev->umsch_mm.agdb_index[CONTEXT_PRIORITY_LEVEL_NORMAL], mqd->wptr_val);
+ if (adev->vpe.collaborate_mode)
+ (++mqd)->wptr_val = (6 << 2);
+
+ WDOORBELL32(adev->umsch_mm.agdb_index[CONTEXT_PRIORITY_LEVEL_NORMAL], mqd->wptr_val);
for (i = 0; i < adev->usec_timeout; i++) {
if (*fence == test_pattern)
switch (amdgpu_ip_version(adev, VCN_HWIP, 0)) {
case IP_VERSION(4, 0, 5):
+ case IP_VERSION(4, 0, 6):
fw_name = "amdgpu/umsch_mm_4_0_0.bin";
break;
default:
switch (amdgpu_ip_version(adev, VCN_HWIP, 0)) {
case IP_VERSION(4, 0, 5):
+ case IP_VERSION(4, 0, 6):
umsch_mm_v4_0_set_funcs(&adev->umsch_mm);
break;
default:
{
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ if (amdgpu_in_reset(adev) || adev->in_s0ix || adev->in_suspend)
+ return 0;
+
return umsch_mm_test(adev);
}
UMSCH_SWIP_ENGINE_TYPE_MAX
};
-enum UMSCH_SWIP_AFFINITY_TYPE {
- UMSCH_SWIP_AFFINITY_TYPE_ANY = 0,
- UMSCH_SWIP_AFFINITY_TYPE_VCN0 = 1,
- UMSCH_SWIP_AFFINITY_TYPE_VCN1 = 2,
- UMSCH_SWIP_AFFINITY_TYPE_MAX
-};
-
enum UMSCH_CONTEXT_PRIORITY_LEVEL {
CONTEXT_PRIORITY_LEVEL_IDLE = 0,
CONTEXT_PRIORITY_LEVEL_NORMAL = 1,
struct umsch_mm_set_resource_input {
uint32_t vmid_mask_mm_vcn;
uint32_t vmid_mask_mm_vpe;
+ uint32_t collaboration_mask_vpe;
uint32_t logging_vmid;
uint32_t engine_mask;
union {
struct {
uint32_t disable_reset : 1;
uint32_t disable_umsch_mm_log : 1;
- uint32_t reserved : 30;
+ uint32_t use_rs64mem_for_proc_ctx_csa : 1;
+ uint32_t reserved : 29;
};
uint32_t uint32_all;
};
uint32_t doorbell_offset_1;
enum UMSCH_SWIP_ENGINE_TYPE engine_type;
uint32_t affinity;
- enum UMSCH_SWIP_AFFINITY_TYPE affinity_type;
uint64_t mqd_addr;
uint64_t h_context;
uint64_t h_queue;
uint32_t vm_context_cntl;
+ uint32_t process_csa_array_index;
+ uint32_t context_csa_array_index;
+
struct {
uint32_t is_context_suspended : 1;
- uint32_t reserved : 31;
+ uint32_t collaboration_mode : 1;
+ uint32_t reserved : 30;
};
};
uint32_t doorbell_offset_0;
uint32_t doorbell_offset_1;
uint64_t context_csa_addr;
+ uint32_t context_csa_array_index;
};
struct MQD_INFO {
uint32_t wptr_val;
uint32_t rptr_val;
uint32_t unmapped;
+ uint32_t vmid;
};
struct amdgpu_umsch_mm;
trace_amdgpu_vm_bo_map(bo_va, mapping);
}
+/* Validate operation parameters to prevent potential abuse */
+static int amdgpu_vm_verify_parameters(struct amdgpu_device *adev,
+ struct amdgpu_bo *bo,
+ uint64_t saddr,
+ uint64_t offset,
+ uint64_t size)
+{
+ uint64_t tmp, lpfn;
+
+ if (saddr & AMDGPU_GPU_PAGE_MASK
+ || offset & AMDGPU_GPU_PAGE_MASK
+ || size & AMDGPU_GPU_PAGE_MASK)
+ return -EINVAL;
+
+ if (check_add_overflow(saddr, size, &tmp)
+ || check_add_overflow(offset, size, &tmp)
+ || size == 0 /* which also leads to end < begin */)
+ return -EINVAL;
+
+ /* make sure object fit at this offset */
+ if (bo && offset + size > amdgpu_bo_size(bo))
+ return -EINVAL;
+
+ /* Ensure last pfn not exceed max_pfn */
+ lpfn = (saddr + size - 1) >> AMDGPU_GPU_PAGE_SHIFT;
+ if (lpfn >= adev->vm_manager.max_pfn)
+ return -EINVAL;
+
+ return 0;
+}
+
/**
* amdgpu_vm_bo_map - map bo inside a vm
*
struct amdgpu_bo *bo = bo_va->base.bo;
struct amdgpu_vm *vm = bo_va->base.vm;
uint64_t eaddr;
+ int r;
- /* validate the parameters */
- if (saddr & ~PAGE_MASK || offset & ~PAGE_MASK || size & ~PAGE_MASK)
- return -EINVAL;
- if (saddr + size <= saddr || offset + size <= offset)
- return -EINVAL;
-
- /* make sure object fit at this offset */
- eaddr = saddr + size - 1;
- if ((bo && offset + size > amdgpu_bo_size(bo)) ||
- (eaddr >= adev->vm_manager.max_pfn << AMDGPU_GPU_PAGE_SHIFT))
- return -EINVAL;
+ r = amdgpu_vm_verify_parameters(adev, bo, saddr, offset, size);
+ if (r)
+ return r;
saddr /= AMDGPU_GPU_PAGE_SIZE;
- eaddr /= AMDGPU_GPU_PAGE_SIZE;
+ eaddr = saddr + (size - 1) / AMDGPU_GPU_PAGE_SIZE;
tmp = amdgpu_vm_it_iter_first(&vm->va, saddr, eaddr);
if (tmp) {
uint64_t eaddr;
int r;
- /* validate the parameters */
- if (saddr & ~PAGE_MASK || offset & ~PAGE_MASK || size & ~PAGE_MASK)
- return -EINVAL;
- if (saddr + size <= saddr || offset + size <= offset)
- return -EINVAL;
-
- /* make sure object fit at this offset */
- eaddr = saddr + size - 1;
- if ((bo && offset + size > amdgpu_bo_size(bo)) ||
- (eaddr >= adev->vm_manager.max_pfn << AMDGPU_GPU_PAGE_SHIFT))
- return -EINVAL;
+ r = amdgpu_vm_verify_parameters(adev, bo, saddr, offset, size);
+ if (r)
+ return r;
/* Allocate all the needed memory */
mapping = kmalloc(sizeof(*mapping), GFP_KERNEL);
}
saddr /= AMDGPU_GPU_PAGE_SIZE;
- eaddr /= AMDGPU_GPU_PAGE_SIZE;
+ eaddr = saddr + (size - 1) / AMDGPU_GPU_PAGE_SIZE;
mapping->start = saddr;
mapping->last = eaddr;
struct amdgpu_bo_va_mapping *before, *after, *tmp, *next;
LIST_HEAD(removed);
uint64_t eaddr;
+ int r;
+
+ r = amdgpu_vm_verify_parameters(adev, NULL, saddr, 0, size);
+ if (r)
+ return r;
- eaddr = saddr + size - 1;
saddr /= AMDGPU_GPU_PAGE_SIZE;
- eaddr /= AMDGPU_GPU_PAGE_SIZE;
+ eaddr = saddr + (size - 1) / AMDGPU_GPU_PAGE_SIZE;
/* Allocate all the needed memory */
before = kzalloc(sizeof(*before), GFP_KERNEL);
dpm_ctl &= 0xfffffffe; /* Disable DPM */
WREG32(vpe_get_reg_offset(vpe, 0, vpe->regs.dpm_enable), dpm_ctl);
dev_dbg(adev->dev, "%s: disable vpe dpm\n", __func__);
- return 0;
+ return -EINVAL;
}
int amdgpu_vpe_psp_update_sram(struct amdgpu_device *adev)
struct amdgpu_vpe *vpe = &adev->vpe;
int ret;
+ /* Power on VPE */
+ ret = amdgpu_device_ip_set_powergating_state(adev, AMD_IP_BLOCK_TYPE_VPE,
+ AMD_PG_STATE_UNGATE);
+ if (ret)
+ return ret;
+
ret = vpe_load_microcode(vpe);
if (ret)
return ret;
adev->doorbell_index.max_assignment = AMDGPU_DOORBELL_LAYOUT1_MAX_ASSIGNMENT << 1;
}
+static bool aqua_vanjaram_xcp_vcn_shared(struct amdgpu_device *adev)
+{
+ return (adev->xcp_mgr->num_xcps > adev->vcn.num_vcn_inst);
+}
+
static void aqua_vanjaram_set_xcp_id(struct amdgpu_device *adev,
uint32_t inst_idx, struct amdgpu_ring *ring)
{
case AMDGPU_RING_TYPE_VCN_ENC:
case AMDGPU_RING_TYPE_VCN_JPEG:
ip_blk = AMDGPU_XCP_VCN;
- if (adev->xcp_mgr->mode == AMDGPU_CPX_PARTITION_MODE)
+ if (aqua_vanjaram_xcp_vcn_shared(adev))
inst_mask = 1 << (inst_idx * 2);
break;
default:
aqua_vanjaram_xcp_gpu_sched_update(adev, ring, ring->xcp_id);
- /* VCN is shared by two partitions under CPX MODE */
+ /* VCN may be shared by two partitions under CPX MODE in certain
+ * configs.
+ */
if ((ring->funcs->type == AMDGPU_RING_TYPE_VCN_ENC ||
- ring->funcs->type == AMDGPU_RING_TYPE_VCN_JPEG) &&
- adev->xcp_mgr->mode == AMDGPU_CPX_PARTITION_MODE)
+ ring->funcs->type == AMDGPU_RING_TYPE_VCN_JPEG) &&
+ aqua_vanjaram_xcp_vcn_shared(adev))
aqua_vanjaram_xcp_gpu_sched_update(adev, ring, ring->xcp_id + 1);
}
7 + /* PIPELINE_SYNC */
SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 +
SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 +
- 2 + /* VM_FLUSH */
+ 4 + /* VM_FLUSH */
8 + /* FENCE for VM_FLUSH */
20 + /* GDS switch */
4 + /* double SWITCH_BUFFER,
7 + /* gfx_v10_0_ring_emit_pipeline_sync */
SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 +
SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 +
- 2 + /* gfx_v10_0_ring_emit_vm_flush */
8 + 8 + 8, /* gfx_v10_0_ring_emit_fence_kiq x3 for user fence, vm fence */
.emit_ib_size = 7, /* gfx_v10_0_ring_emit_ib_compute */
.emit_ib = gfx_v10_0_ring_emit_ib_compute,
active_rb_bitmap |= (0x3 << (i * rb_bitmap_width_per_sa));
}
- active_rb_bitmap |= global_active_rb_bitmap;
+ active_rb_bitmap &= global_active_rb_bitmap;
adev->gfx.config.backend_enable_mask = active_rb_bitmap;
adev->gfx.config.num_rbs = hweight32(active_rb_bitmap);
}
/* Make sure that we can't skip the SET_Q_MODE packets when the VM
* changed in any way.
*/
+ ring->set_q_mode_offs = 0;
ring->set_q_mode_ptr = NULL;
}
7 + /* PIPELINE_SYNC */
SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 +
SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 +
- 2 + /* VM_FLUSH */
+ 4 + /* VM_FLUSH */
8 + /* FENCE for VM_FLUSH */
20 + /* GDS switch */
5 + /* COND_EXEC */
7 + /* gfx_v11_0_ring_emit_pipeline_sync */
SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 +
SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 +
- 2 + /* gfx_v11_0_ring_emit_vm_flush */
8 + 8 + 8, /* gfx_v11_0_ring_emit_fence_kiq x3 for user fence, vm fence */
.emit_ib_size = 7, /* gfx_v11_0_ring_emit_ib_compute */
.emit_ib = gfx_v11_0_ring_emit_ib_compute,
7 + /* gfx_v9_0_ring_emit_pipeline_sync */
SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 +
SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 +
- 2 + /* gfx_v9_0_ring_emit_vm_flush */
8 + 8 + 8 + /* gfx_v9_0_ring_emit_fence x3 for user fence, vm fence */
7 + /* gfx_v9_0_emit_mem_sync */
5 + /* gfx_v9_0_emit_wave_limit for updating mmSPI_WCL_PIPE_PERCENT_GFX register */
7 + /* gfx_v9_0_ring_emit_pipeline_sync */
SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 +
SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 +
- 2 + /* gfx_v9_0_ring_emit_vm_flush */
8 + 8 + 8, /* gfx_v9_0_ring_emit_fence_kiq x3 for user fence, vm fence */
.emit_ib_size = 7, /* gfx_v9_0_ring_emit_ib_compute */
.emit_fence = gfx_v9_0_ring_emit_fence_kiq,
mes_set_hw_res_pkt.enable_reg_active_poll = 1;
mes_set_hw_res_pkt.enable_level_process_quantum_check = 1;
mes_set_hw_res_pkt.oversubscription_timer = 50;
- mes_set_hw_res_pkt.enable_mes_event_int_logging = 1;
- mes_set_hw_res_pkt.event_intr_history_gpu_mc_ptr = mes->event_log_gpu_addr;
+ if (amdgpu_mes_log_enable) {
+ mes_set_hw_res_pkt.enable_mes_event_int_logging = 1;
+ mes_set_hw_res_pkt.event_intr_history_gpu_mc_ptr =
+ mes->event_log_gpu_addr;
+ }
return mes_v11_0_submit_pkt_and_poll_completion(mes,
&mes_set_hw_res_pkt, sizeof(mes_set_hw_res_pkt),
u32 ref_and_mask = 0;
const struct nbio_hdp_flush_reg *nbio_hf_reg = adev->nbio.hdp_flush_reg;
- ref_and_mask = nbio_hf_reg->ref_and_mask_sdma0 << ring->me;
+ ref_and_mask = nbio_hf_reg->ref_and_mask_sdma0
+ << (ring->me % adev->sdma.num_inst_per_aid);
sdma_v4_4_2_wait_reg_mem(ring, 0, 1,
adev->nbio.funcs->get_hdp_flush_done_offset(adev),
u32 sdma_cntl;
sdma_cntl = RREG32_SDMA(type, regSDMA_CNTL);
- switch (state) {
- case AMDGPU_IRQ_STATE_DISABLE:
- sdma_cntl = REG_SET_FIELD(sdma_cntl, SDMA_CNTL,
- DRAM_ECC_INT_ENABLE, 0);
- WREG32_SDMA(type, regSDMA_CNTL, sdma_cntl);
- break;
- /* sdma ecc interrupt is enabled by default
- * driver doesn't need to do anything to
- * enable the interrupt */
- case AMDGPU_IRQ_STATE_ENABLE:
- default:
- break;
- }
+ sdma_cntl = REG_SET_FIELD(sdma_cntl, SDMA_CNTL, DRAM_ECC_INT_ENABLE,
+ state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0);
+ WREG32_SDMA(type, regSDMA_CNTL, sdma_cntl);
return 0;
}
u32 ref_and_mask = 0;
const struct nbio_hdp_flush_reg *nbio_hf_reg = adev->nbio.hdp_flush_reg;
- ref_and_mask = nbio_hf_reg->ref_and_mask_sdma0 << ring->me;
-
- amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_POLL_REGMEM) |
- SDMA_PKT_POLL_REGMEM_HEADER_HDP_FLUSH(1) |
- SDMA_PKT_POLL_REGMEM_HEADER_FUNC(3)); /* == */
- amdgpu_ring_write(ring, (adev->nbio.funcs->get_hdp_flush_done_offset(adev)) << 2);
- amdgpu_ring_write(ring, (adev->nbio.funcs->get_hdp_flush_req_offset(adev)) << 2);
- amdgpu_ring_write(ring, ref_and_mask); /* reference */
- amdgpu_ring_write(ring, ref_and_mask); /* mask */
- amdgpu_ring_write(ring, SDMA_PKT_POLL_REGMEM_DW5_RETRY_COUNT(0xfff) |
- SDMA_PKT_POLL_REGMEM_DW5_INTERVAL(10)); /* retry count, poll interval */
+ if (ring->me > 1) {
+ amdgpu_asic_flush_hdp(adev, ring);
+ } else {
+ ref_and_mask = nbio_hf_reg->ref_and_mask_sdma0 << ring->me;
+
+ amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_POLL_REGMEM) |
+ SDMA_PKT_POLL_REGMEM_HEADER_HDP_FLUSH(1) |
+ SDMA_PKT_POLL_REGMEM_HEADER_FUNC(3)); /* == */
+ amdgpu_ring_write(ring, (adev->nbio.funcs->get_hdp_flush_done_offset(adev)) << 2);
+ amdgpu_ring_write(ring, (adev->nbio.funcs->get_hdp_flush_req_offset(adev)) << 2);
+ amdgpu_ring_write(ring, ref_and_mask); /* reference */
+ amdgpu_ring_write(ring, ref_and_mask); /* mask */
+ amdgpu_ring_write(ring, SDMA_PKT_POLL_REGMEM_DW5_RETRY_COUNT(0xfff) |
+ SDMA_PKT_POLL_REGMEM_DW5_INTERVAL(10)); /* retry count, poll interval */
+ }
}
/**
{
switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
case IP_VERSION(11, 0, 0):
- return amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__UMC);
case IP_VERSION(11, 0, 2):
case IP_VERSION(11, 0, 3):
- return false;
default:
return true;
}
AMD_PG_SUPPORT_VCN |
AMD_PG_SUPPORT_JPEG |
AMD_PG_SUPPORT_GFX_PG;
- adev->external_rev_id = adev->rev_id + 0x1;
+ if (adev->rev_id == 0)
+ adev->external_rev_id = 0x1;
+ else
+ adev->external_rev_id = adev->rev_id + 0x10;
break;
case IP_VERSION(11, 5, 1):
adev->cg_flags =
return soc21_common_hw_fini(adev);
}
+static bool soc21_need_reset_on_resume(struct amdgpu_device *adev)
+{
+ u32 sol_reg1, sol_reg2;
+
+ /* Will reset for the following suspend abort cases.
+ * 1) Only reset dGPU side.
+ * 2) S3 suspend got aborted and TOS is active.
+ */
+ if (!(adev->flags & AMD_IS_APU) && adev->in_s3 &&
+ !adev->suspend_complete) {
+ sol_reg1 = RREG32_SOC15(MP0, 0, regMP0_SMN_C2PMSG_81);
+ msleep(100);
+ sol_reg2 = RREG32_SOC15(MP0, 0, regMP0_SMN_C2PMSG_81);
+
+ return (sol_reg1 != sol_reg2);
+ }
+
+ return false;
+}
+
static int soc21_common_resume(void *handle)
{
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ if (soc21_need_reset_on_resume(adev)) {
+ dev_info(adev->dev, "S3 suspend aborted, resetting...");
+ soc21_asic_reset(adev);
+ }
+
return soc21_common_hw_init(adev);
}
umsch->cmd_buf_curr_ptr = umsch->cmd_buf_ptr;
- if (amdgpu_ip_version(adev, VCN_HWIP, 0) == IP_VERSION(4, 0, 5)) {
+ if (amdgpu_ip_version(adev, VCN_HWIP, 0) >= IP_VERSION(4, 0, 5)) {
WREG32_SOC15(VCN, 0, regUVD_IPX_DLDO_CONFIG,
1 << UVD_IPX_DLDO_CONFIG__ONO0_PWR_CONFIG__SHIFT);
SOC15_WAIT_ON_RREG(VCN, 0, regUVD_IPX_DLDO_STATUS,
WREG32_SOC15(VCN, 0, regVCN_UMSCH_RB_SIZE, ring->ring_size);
+ ring->wptr = 0;
+
data = RREG32_SOC15(VCN, 0, regVCN_RB_ENABLE);
data &= ~(VCN_RB_ENABLE__AUDIO_RB_EN_MASK);
WREG32_SOC15(VCN, 0, regVCN_RB_ENABLE, data);
data = REG_SET_FIELD(data, VCN_UMSCH_RB_DB_CTRL, EN, 0);
WREG32_SOC15(VCN, 0, regVCN_UMSCH_RB_DB_CTRL, data);
- if (amdgpu_ip_version(adev, VCN_HWIP, 0) == IP_VERSION(4, 0, 5)) {
+ if (amdgpu_ip_version(adev, VCN_HWIP, 0) >= IP_VERSION(4, 0, 5)) {
WREG32_SOC15(VCN, 0, regUVD_IPX_DLDO_CONFIG,
2 << UVD_IPX_DLDO_CONFIG__ONO0_PWR_CONFIG__SHIFT);
SOC15_WAIT_ON_RREG(VCN, 0, regUVD_IPX_DLDO_STATUS,
set_hw_resources.vmid_mask_mm_vcn = umsch->vmid_mask_mm_vcn;
set_hw_resources.vmid_mask_mm_vpe = umsch->vmid_mask_mm_vpe;
+ set_hw_resources.collaboration_mask_vpe =
+ adev->vpe.collaborate_mode ? 0x3 : 0x0;
set_hw_resources.engine_mask = umsch->engine_mask;
set_hw_resources.vcn0_hqd_mask[0] = umsch->vcn0_hqd_mask;
add_queue.h_queue = input_ptr->h_queue;
add_queue.vm_context_cntl = input_ptr->vm_context_cntl;
add_queue.is_context_suspended = input_ptr->is_context_suspended;
+ add_queue.collaboration_mode = adev->vpe.collaborate_mode ? 1 : 0;
add_queue.api_status.api_completion_fence_addr = umsch->ring.fence_drv.gpu_addr;
add_queue.api_status.api_completion_fence_value = ++umsch->ring.fence_drv.sync_seq;
WREG32(vpe_get_reg_offset(vpe, j, regVPEC_CNTL), ret);
}
+ /* setup collaborate mode */
+ vpe_v6_1_set_collaborate_mode(vpe, true);
+ /* setup DPM */
+ if (amdgpu_vpe_configure_dpm(vpe))
+ dev_warn(adev->dev, "VPE failed to enable DPM\n");
+
/*
* For VPE 6.1.1, still only need to add master's offset, and psp will apply it to slave as well.
* Here use instance 0 as master.
adev->vpe.cmdbuf_cpu_addr[0] = f32_offset;
adev->vpe.cmdbuf_cpu_addr[1] = f32_cntl;
- amdgpu_vpe_psp_update_sram(adev);
- vpe_v6_1_set_collaborate_mode(vpe, true);
- amdgpu_vpe_configure_dpm(vpe);
-
- return 0;
+ return amdgpu_vpe_psp_update_sram(adev);
}
vpe_hdr = (const struct vpe_firmware_header_v1_0 *)adev->vpe.fw->data;
}
vpe_v6_1_halt(vpe, false);
- vpe_v6_1_set_collaborate_mode(vpe, true);
- amdgpu_vpe_configure_dpm(vpe);
return 0;
}
* nodes, but not more than args->num_of_nodes as that is
* the amount of memory allocated by user
*/
- pa = kzalloc((sizeof(struct kfd_process_device_apertures) *
- args->num_of_nodes), GFP_KERNEL);
+ pa = kcalloc(args->num_of_nodes, sizeof(struct kfd_process_device_apertures),
+ GFP_KERNEL);
if (!pa)
return -ENOMEM;
/* Find a KFD GPU device that supports the get_dmabuf_info query */
for (i = 0; kfd_topology_enum_kfd_devices(i, &dev) == 0; i++)
- if (dev)
+ if (dev && !kfd_devcgroup_check_permission(dev))
break;
if (!dev)
return -EINVAL;
if (xcp_id >= 0)
args->gpu_id = dmabuf_adev->kfd.dev->nodes[xcp_id]->id;
else
- args->gpu_id = dmabuf_adev->kfd.dev->nodes[0]->id;
+ args->gpu_id = dev->id;
args->flags = flags;
/* Copy metadata buffer to user mode */
{
struct kfd_node *node;
int i;
- int count;
if (!kfd->init_complete)
return;
/* for runtime suspend, skip locking kfd */
if (!run_pm) {
mutex_lock(&kfd_processes_mutex);
- count = ++kfd_locked;
- mutex_unlock(&kfd_processes_mutex);
-
/* For first KFD device suspend all the KFD processes */
- if (count == 1)
+ if (++kfd_locked == 1)
kfd_suspend_all_processes();
+ mutex_unlock(&kfd_processes_mutex);
}
for (i = 0; i < kfd->num_nodes; i++) {
int kgd2kfd_resume(struct kfd_dev *kfd, bool run_pm)
{
- int ret, count, i;
+ int ret, i;
if (!kfd->init_complete)
return 0;
/* for runtime resume, skip unlocking kfd */
if (!run_pm) {
mutex_lock(&kfd_processes_mutex);
- count = --kfd_locked;
- mutex_unlock(&kfd_processes_mutex);
-
- WARN_ONCE(count < 0, "KFD suspend / resume ref. error");
- if (count == 0)
+ if (--kfd_locked == 0)
ret = kfd_resume_all_processes();
+ WARN_ONCE(kfd_locked < 0, "KFD suspend / resume ref. error");
+ mutex_unlock(&kfd_processes_mutex);
}
return ret;
dev_err(dev, "HIQ MQD's queue_doorbell_id0 is not 0, Queue preemption time out\n");
while (halt_if_hws_hang)
schedule();
+ kfd_hws_hang(dqm);
return -ETIME;
}
break;
}
kfd_signal_event_interrupt(pasid, context_id0 & 0x7fffff, 23);
- } else if (source_id == SOC15_INTSRC_CP_BAD_OPCODE) {
+ } else if (source_id == SOC15_INTSRC_CP_BAD_OPCODE &&
+ KFD_DBG_EC_TYPE_IS_PACKET(KFD_DEBUG_CP_BAD_OP_ECODE(context_id0))) {
kfd_set_dbg_ev_from_interrupt(dev, pasid,
KFD_DEBUG_DOORBELL_ID(context_id0),
KFD_EC_MASK(KFD_DEBUG_CP_BAD_OP_ECODE(context_id0)),
/* CP */
if (source_id == SOC15_INTSRC_CP_END_OF_PIPE)
kfd_signal_event_interrupt(pasid, context_id0, 32);
- else if (source_id == SOC15_INTSRC_CP_BAD_OPCODE)
+ else if (source_id == SOC15_INTSRC_CP_BAD_OPCODE &&
+ KFD_DBG_EC_TYPE_IS_PACKET(KFD_CTXID0_CP_BAD_OP_ECODE(context_id0)))
kfd_set_dbg_ev_from_interrupt(dev, pasid,
KFD_CTXID0_DOORBELL_ID(context_id0),
KFD_EC_MASK(KFD_CTXID0_CP_BAD_OP_ECODE(context_id0)),
break;
}
kfd_signal_event_interrupt(pasid, sq_int_data, 24);
- } else if (source_id == SOC15_INTSRC_CP_BAD_OPCODE) {
+ } else if (source_id == SOC15_INTSRC_CP_BAD_OPCODE &&
+ KFD_DBG_EC_TYPE_IS_PACKET(KFD_DEBUG_CP_BAD_OP_ECODE(context_id0))) {
kfd_set_dbg_ev_from_interrupt(dev, pasid,
KFD_DEBUG_DOORBELL_ID(context_id0),
KFD_EC_MASK(KFD_DEBUG_CP_BAD_OP_ECODE(context_id0)),
start = start_mgr << PAGE_SHIFT;
end = (last_mgr + 1) << PAGE_SHIFT;
+ r = amdgpu_amdkfd_reserve_mem_limit(node->adev,
+ prange->npages * PAGE_SIZE,
+ KFD_IOC_ALLOC_MEM_FLAGS_VRAM,
+ node->xcp ? node->xcp->id : 0);
+ if (r) {
+ dev_dbg(node->adev->dev, "failed to reserve VRAM, r: %ld\n", r);
+ return -ENOSPC;
+ }
+
r = svm_range_vram_node_new(node, prange, true);
if (r) {
dev_dbg(node->adev->dev, "fail %ld to alloc vram\n", r);
- return r;
+ goto out;
}
ttm_res_offset = (start_mgr - prange->start + prange->offset) << PAGE_SHIFT;
svm_range_vram_node_free(prange);
}
+out:
+ amdgpu_amdkfd_unreserve_mem_limit(node->adev,
+ prange->npages * PAGE_SIZE,
+ KFD_IOC_ALLOC_MEM_FLAGS_VRAM,
+ node->xcp ? node->xcp->id : 0);
return r < 0 ? r : 0;
}
static inline bool kfd_flush_tlb_after_unmap(struct kfd_dev *dev)
{
- return KFD_GC_VERSION(dev) > IP_VERSION(9, 4, 2) ||
+ return KFD_GC_VERSION(dev) >= IP_VERSION(9, 4, 2) ||
(KFD_GC_VERSION(dev) == IP_VERSION(9, 4, 1) && dev->sdma_fw_version >= 18) ||
KFD_GC_VERSION(dev) == IP_VERSION(9, 4, 0);
}
mutex_lock(&kfd_processes_mutex);
if (kfd_is_locked()) {
- mutex_unlock(&kfd_processes_mutex);
pr_debug("KFD is locked! Cannot create process");
- return ERR_PTR(-EINVAL);
+ process = ERR_PTR(-EINVAL);
+ goto out;
}
/* A prior open of /dev/kfd could have already created the process. */
rcu_read_lock();
ef = dma_fence_get_rcu_safe(&p->ef);
rcu_read_unlock();
+ if (!ef)
+ return -EINVAL;
ret = dma_fence_signal(ef);
dma_fence_put(ef);
* they are responsible stopping the queues and scheduling
* the restore work.
*/
- if (!signal_eviction_fence(p))
- queue_delayed_work(kfd_restore_wq, &p->restore_work,
- msecs_to_jiffies(PROCESS_RESTORE_TIME_MS));
- else
+ if (signal_eviction_fence(p) ||
+ mod_delayed_work(kfd_restore_wq, &p->restore_work,
+ msecs_to_jiffies(PROCESS_RESTORE_TIME_MS)))
kfd_process_restore_queues(p);
pr_debug("Finished evicting pasid 0x%x\n", p->pasid);
if (ret) {
pr_debug("Failed to restore BOs of pasid 0x%x, retry after %d ms\n",
p->pasid, PROCESS_BACK_OFF_TIME_MS);
- ret = queue_delayed_work(kfd_restore_wq, &p->restore_work,
- msecs_to_jiffies(PROCESS_BACK_OFF_TIME_MS));
- WARN(!ret, "reschedule restore work failed\n");
+ if (mod_delayed_work(kfd_restore_wq, &p->restore_work,
+ msecs_to_jiffies(PROCESS_RESTORE_TIME_MS)))
+ kfd_process_restore_queues(p);
}
}
mm, KFD_MIGRATE_TRIGGER_PREFETCH);
*migrated = !r;
- return r;
+ return 0;
}
int svm_range_schedule_evict_svm_bo(struct amdgpu_amdkfd_fence *fence)
#define FIRMWARE_DCN_35_DMUB "amdgpu/dcn_3_5_dmcub.bin"
MODULE_FIRMWARE(FIRMWARE_DCN_35_DMUB);
+#define FIRMWARE_DCN_351_DMUB "amdgpu/dcn_3_5_1_dmcub.bin"
+MODULE_FIRMWARE(FIRMWARE_DCN_351_DMUB);
+
/* Number of bytes in PSP header for firmware. */
#define PSP_HEADER_BYTES 0x100
dc_stream_release(dm_new_crtc_state->stream);
dm_new_crtc_state->stream = NULL;
}
+ dm_new_crtc_state->base.color_mgmt_changed = true;
}
for_each_new_plane_in_state(dm->cached_state, plane, new_plane_state, i) {
/* Do mst topology probing after resuming cached state*/
drm_connector_list_iter_begin(ddev, &iter);
drm_for_each_connector_iter(connector, &iter) {
+
+ if (connector->connector_type == DRM_MODE_CONNECTOR_WRITEBACK)
+ continue;
+
aconnector = to_amdgpu_dm_connector(connector);
if (aconnector->dc_link->type != dc_connection_mst_branch ||
aconnector->mst_root)
fw_name_dmub = FIRMWARE_DCN_V3_2_1_DMCUB;
break;
case IP_VERSION(3, 5, 0):
- case IP_VERSION(3, 5, 1):
fw_name_dmub = FIRMWARE_DCN_35_DMUB;
break;
+ case IP_VERSION(3, 5, 1):
+ fw_name_dmub = FIRMWARE_DCN_351_DMUB;
+ break;
default:
/* ASIC doesn't support DMUB. */
return 0;
&aconnector->base.probed_modes :
&aconnector->base.modes;
+ if (aconnector->base.connector_type == DRM_MODE_CONNECTOR_WRITEBACK)
+ return NULL;
+
if (aconnector->freesync_vid_base.clock != 0)
return &aconnector->freesync_vid_base;
if (stream->signal == SIGNAL_TYPE_HDMI_TYPE_A)
mod_build_hf_vsif_infopacket(stream, &stream->vsp_infopacket);
- else if (stream->signal == SIGNAL_TYPE_DISPLAY_PORT ||
- stream->signal == SIGNAL_TYPE_DISPLAY_PORT_MST ||
- stream->signal == SIGNAL_TYPE_EDP) {
+
+ if (stream->signal == SIGNAL_TYPE_DISPLAY_PORT ||
+ stream->signal == SIGNAL_TYPE_DISPLAY_PORT_MST ||
+ stream->signal == SIGNAL_TYPE_EDP) {
//
// should decide stream support vsc sdp colorimetry capability
// before building vsc info packet
//
- stream->use_vsc_sdp_for_colorimetry = false;
- if (aconnector->dc_sink->sink_signal == SIGNAL_TYPE_DISPLAY_PORT_MST) {
- stream->use_vsc_sdp_for_colorimetry =
- aconnector->dc_sink->is_vsc_sdp_colorimetry_supported;
- } else {
- if (stream->link->dpcd_caps.dprx_feature.bits.VSC_SDP_COLORIMETRY_SUPPORTED)
- stream->use_vsc_sdp_for_colorimetry = true;
- }
+ stream->use_vsc_sdp_for_colorimetry = stream->link->dpcd_caps.dpcd_rev.raw >= 0x14 &&
+ stream->link->dpcd_caps.dprx_feature.bits.VSC_SDP_COLORIMETRY_SUPPORTED;
+
if (stream->out_transfer_func->tf == TRANSFER_FUNCTION_GAMMA22)
tf = TRANSFER_FUNC_GAMMA_22;
mod_build_vsc_infopacket(stream, &stream->vsc_infopacket, stream->output_color_space, tf);
+ aconnector->psr_skip_count = AMDGPU_DM_PSR_ENTRY_DELAY;
- if (stream->link->psr_settings.psr_feature_enabled)
- aconnector->psr_skip_count = AMDGPU_DM_PSR_ENTRY_DELAY;
}
finish:
dc_sink_release(sink);
if (!drm_atomic_crtc_needs_modeset(new_crtc_state))
continue;
+notify:
if (connector->connector_type == DRM_MODE_CONNECTOR_WRITEBACK)
continue;
-notify:
aconnector = to_amdgpu_dm_connector(connector);
mutex_lock(&adev->dm.audio_lock);
* amdgpu_dm_psr_enable() - enable psr f/w
* @stream: stream state
*
- * Return: true if success
*/
-bool amdgpu_dm_psr_enable(struct dc_stream_state *stream)
+void amdgpu_dm_psr_enable(struct dc_stream_state *stream)
{
struct dc_link *link = stream->link;
unsigned int vsync_rate_hz = 0;
if (link->psr_settings.psr_version < DC_PSR_VERSION_SU_1)
power_opt |= psr_power_opt_z10_static_screen;
- return dc_link_set_psr_allow_active(link, &psr_enable, false, false, &power_opt);
+ dc_link_set_psr_allow_active(link, &psr_enable, false, false, &power_opt);
+
+ if (link->ctx->dc->caps.ips_support)
+ dc_allow_idle_optimizations(link->ctx->dc, true);
}
/*
#define AMDGPU_DM_PSR_ENTRY_DELAY 5
void amdgpu_dm_set_psr_caps(struct dc_link *link);
-bool amdgpu_dm_psr_enable(struct dc_stream_state *stream);
+void amdgpu_dm_psr_enable(struct dc_stream_state *stream);
bool amdgpu_dm_link_setup_psr(struct dc_stream_state *stream);
bool amdgpu_dm_psr_disable(struct dc_stream_state *stream);
bool amdgpu_dm_psr_disable_all(struct amdgpu_display_manager *dm);
static int amdgpu_dm_wb_connector_get_modes(struct drm_connector *connector)
{
- struct drm_device *dev = connector->dev;
-
- return drm_add_modes_noedid(connector, dev->mode_config.max_width,
- dev->mode_config.max_height);
+ /* Maximum resolution supported by DWB */
+ return drm_add_modes_noedid(connector, 3840, 2160);
}
static int amdgpu_dm_wb_prepare_job(struct drm_writeback_connector *wb_connector,
return display_count;
}
-static void dcn316_disable_otg_wa(struct clk_mgr *clk_mgr_base, struct dc_state *context, bool disable)
+static void dcn316_disable_otg_wa(struct clk_mgr *clk_mgr_base, struct dc_state *context,
+ bool safe_to_lower, bool disable)
{
struct dc *dc = clk_mgr_base->ctx->dc;
int i;
for (i = 0; i < dc->res_pool->pipe_count; ++i) {
- struct pipe_ctx *pipe = &dc->current_state->res_ctx.pipe_ctx[i];
+ struct pipe_ctx *pipe = safe_to_lower
+ ? &context->res_ctx.pipe_ctx[i]
+ : &dc->current_state->res_ctx.pipe_ctx[i];
if (pipe->top_pipe || pipe->prev_odm_pipe)
continue;
- if (pipe->stream && (pipe->stream->dpms_off || pipe->plane_state == NULL ||
- dc_is_virtual_signal(pipe->stream->signal))) {
+ if (pipe->stream && (pipe->stream->dpms_off || dc_is_virtual_signal(pipe->stream->signal) ||
+ !pipe->stream->link_enc)) {
if (disable) {
- pipe->stream_res.tg->funcs->immediate_disable_crtc(pipe->stream_res.tg);
+ if (pipe->stream_res.tg && pipe->stream_res.tg->funcs->immediate_disable_crtc)
+ pipe->stream_res.tg->funcs->immediate_disable_crtc(pipe->stream_res.tg);
+
reset_sync_context_for_pipe(dc, context, i);
} else
pipe->stream_res.tg->funcs->enable_crtc(pipe->stream_res.tg);
}
if (should_set_clock(safe_to_lower, new_clocks->dispclk_khz, clk_mgr_base->clks.dispclk_khz)) {
- dcn316_disable_otg_wa(clk_mgr_base, context, true);
+ dcn316_disable_otg_wa(clk_mgr_base, context, safe_to_lower, true);
clk_mgr_base->clks.dispclk_khz = new_clocks->dispclk_khz;
dcn316_smu_set_dispclk(clk_mgr, clk_mgr_base->clks.dispclk_khz);
- dcn316_disable_otg_wa(clk_mgr_base, context, false);
+ dcn316_disable_otg_wa(clk_mgr_base, context, safe_to_lower, false);
update_dispclk = true;
}
#define CLK1_CLK2_BYPASS_CNTL__CLK2_BYPASS_SEL_MASK 0x00000007L
#define CLK1_CLK2_BYPASS_CNTL__CLK2_BYPASS_DIV_MASK 0x000F0000L
+#define regCLK5_0_CLK5_spll_field_8 0x464b
+#define regCLK5_0_CLK5_spll_field_8_BASE_IDX 0
+
+#define CLK5_0_CLK5_spll_field_8__spll_ssc_en__SHIFT 0xd
+#define CLK5_0_CLK5_spll_field_8__spll_ssc_en_MASK 0x00002000L
+
+#define SMU_VER_THRESHOLD 0x5D4A00 //93.74.0
+
#define REG(reg_name) \
(ctx->clk_reg_offsets[reg ## reg_name ## _BASE_IDX] + reg ## reg_name)
{
}
+static bool dcn35_is_spll_ssc_enabled(struct clk_mgr *clk_mgr_base)
+{
+ struct clk_mgr_internal *clk_mgr = TO_CLK_MGR_INTERNAL(clk_mgr_base);
+ struct dc_context *ctx = clk_mgr->base.ctx;
+ uint32_t ssc_enable;
+
+ REG_GET(CLK5_0_CLK5_spll_field_8, spll_ssc_en, &ssc_enable);
+
+ return ssc_enable == 1;
+}
+
static void init_clk_states(struct clk_mgr *clk_mgr)
{
+ struct clk_mgr_internal *clk_mgr_int = TO_CLK_MGR_INTERNAL(clk_mgr);
uint32_t ref_dtbclk = clk_mgr->clks.ref_dtbclk_khz;
memset(&(clk_mgr->clks), 0, sizeof(struct dc_clocks));
+ if (clk_mgr_int->smu_ver >= SMU_VER_THRESHOLD)
+ clk_mgr->clks.dtbclk_en = true; // request DTBCLK disable on first commit
clk_mgr->clks.ref_dtbclk_khz = ref_dtbclk; // restore ref_dtbclk
clk_mgr->clks.p_state_change_support = true;
clk_mgr->clks.prev_p_state_change_support = true;
void dcn35_init_clocks(struct clk_mgr *clk_mgr)
{
+ struct clk_mgr_internal *clk_mgr_int = TO_CLK_MGR_INTERNAL(clk_mgr);
init_clk_states(clk_mgr);
+
+ // to adjust dp_dto reference clock if ssc is enable otherwise to apply dprefclk
+ if (dcn35_is_spll_ssc_enabled(clk_mgr))
+ clk_mgr->dp_dto_source_clock_in_khz =
+ dce_adjust_dp_ref_freq_for_ss(clk_mgr_int, clk_mgr->dprefclk_khz);
+ else
+ clk_mgr->dp_dto_source_clock_in_khz = clk_mgr->dprefclk_khz;
+
}
static struct clk_bw_params dcn35_bw_params = {
.vram_type = Ddr4MemType,
static struct dcn35_watermarks dummy_wms = { 0 };
+static struct dcn35_ss_info_table ss_info_table = {
+ .ss_divider = 1000,
+ .ss_percentage = {0, 0, 375, 375, 375}
+};
+
+static void dcn35_read_ss_info_from_lut(struct clk_mgr_internal *clk_mgr)
+{
+ struct dc_context *ctx = clk_mgr->base.ctx;
+ uint32_t clock_source;
+
+ REG_GET(CLK1_CLK2_BYPASS_CNTL, CLK2_BYPASS_SEL, &clock_source);
+ // If it's DFS mode, clock_source is 0.
+ if (dcn35_is_spll_ssc_enabled(&clk_mgr->base) && (clock_source < ARRAY_SIZE(ss_info_table.ss_percentage))) {
+ clk_mgr->dprefclk_ss_percentage = ss_info_table.ss_percentage[clock_source];
+
+ if (clk_mgr->dprefclk_ss_percentage != 0) {
+ clk_mgr->ss_on_dprefclk = true;
+ clk_mgr->dprefclk_ss_divider = ss_info_table.ss_divider;
+ }
+ }
+}
+
static void dcn35_build_watermark_ranges(struct clk_bw_params *bw_params, struct dcn35_watermarks *table)
{
int i, num_valid_sets;
clock_table->NumFclkLevelsEnabled;
max_fclk = find_max_clk_value(clock_table->FclkClocks_Freq, num_fclk);
- num_dcfclk = (clock_table->NumFclkLevelsEnabled > NUM_DCFCLK_DPM_LEVELS) ? NUM_DCFCLK_DPM_LEVELS :
+ num_dcfclk = (clock_table->NumDcfClkLevelsEnabled > NUM_DCFCLK_DPM_LEVELS) ? NUM_DCFCLK_DPM_LEVELS :
clock_table->NumDcfClkLevelsEnabled;
for (i = 0; i < num_dcfclk; i++) {
int j;
dce_clock_read_ss_info(&clk_mgr->base);
/*when clk src is from FCH, it could have ss, same clock src as DPREF clk*/
+ dcn35_read_ss_info_from_lut(&clk_mgr->base);
+
clk_mgr->base.base.bw_params = &dcn35_bw_params;
if (clk_mgr->base.base.ctx->dc->debug.pstate_enabled) {
scratch->blend_tf[i] = *status->plane_states[i]->blend_tf;
}
scratch->stream_state = *stream;
- scratch->out_transfer_func = *stream->out_transfer_func;
+ if (stream->out_transfer_func)
+ scratch->out_transfer_func = *stream->out_transfer_func;
}
static void restore_planes_and_stream_state(
*status->plane_states[i]->blend_tf = scratch->blend_tf[i];
}
*stream = scratch->stream_state;
- *stream->out_transfer_func = scratch->out_transfer_func;
+ if (stream->out_transfer_func)
+ *stream->out_transfer_func = scratch->out_transfer_func;
}
static bool update_planes_and_stream_state(struct dc *dc,
goto out;
}
+ if (stream_status->plane_count == 0 && dc->config.enable_windowed_mpo_odm)
+ /* ODM combine could prevent us from supporting more planes
+ * we will reset ODM slice count back to 1 when all planes have
+ * been removed to maximize the amount of planes supported when
+ * new planes are added.
+ */
+ resource_update_pipes_for_stream_with_slice_count(
+ state, dc->current_state, dc->res_pool, stream, 1);
+
otg_master_pipe = resource_get_otg_master_for_stream(
&state->res_ctx, stream);
if (otg_master_pipe)
struct bp_pixel_clock_parameters bp_pc_params = {0};
enum transmitter_color_depth bp_pc_colour_depth = TRANSMITTER_COLOR_DEPTH_24;
- if (clock_source->ctx->dc->clk_mgr->dp_dto_source_clock_in_khz != 0)
+ // Apply ssed(spread spectrum) dpref clock for edp only.
+ if (clock_source->ctx->dc->clk_mgr->dp_dto_source_clock_in_khz != 0
+ && pix_clk_params->signal_type == SIGNAL_TYPE_EDP
+ && encoding == DP_8b_10b_ENCODING)
dp_dto_ref_khz = clock_source->ctx->dc->clk_mgr->dp_dto_source_clock_in_khz;
// For these signal types Driver to program DP_DTO without calling VBIOS Command table
if (dc_is_dp_signal(pix_clk_params->signal_type) || dc_is_virtual_signal(pix_clk_params->signal_type)) {
unsigned int modulo_hz = 0;
unsigned int dp_dto_ref_khz = clock_source->ctx->dc->clk_mgr->dprefclk_khz;
- if (clock_source->ctx->dc->clk_mgr->dp_dto_source_clock_in_khz != 0)
- dp_dto_ref_khz = clock_source->ctx->dc->clk_mgr->dp_dto_source_clock_in_khz;
-
if (clock_source->id == CLOCK_SOURCE_ID_DP_DTO) {
clock_hz = REG_READ(PHASE[inst]);
# Makefile for the 'controller' sub-component of DAL.
# It provides the control and status of HW CRTC block.
-CFLAGS_$(AMDDALPATH)/dc/dce110/dce110_resource.o = $(call cc-disable-warning, override-init)
+CFLAGS_$(AMDDALPATH)/dc/dce110/dce110_resource.o = -Wno-override-init
DCE110 = dce110_timing_generator.o \
dce110_compressor.o dce110_opp_regamma_v.o \
# Makefile for the 'controller' sub-component of DAL.
# It provides the control and status of HW CRTC block.
-CFLAGS_$(AMDDALPATH)/dc/dce112/dce112_resource.o = $(call cc-disable-warning, override-init)
+CFLAGS_$(AMDDALPATH)/dc/dce112/dce112_resource.o = -Wno-override-init
DCE112 = dce112_compressor.o
# It provides the control and status of HW CRTC block.
-CFLAGS_$(AMDDALPATH)/dc/dce120/dce120_resource.o = $(call cc-disable-warning, override-init)
+CFLAGS_$(AMDDALPATH)/dc/dce120/dce120_resource.o = -Wno-override-init
DCE120 = dce120_timing_generator.o
# Makefile for the 'controller' sub-component of DAL.
# It provides the control and status of HW CRTC block.
-CFLAGS_$(AMDDALPATH)/dc/dce60/dce60_resource.o = $(call cc-disable-warning, override-init)
+CFLAGS_$(AMDDALPATH)/dc/dce60/dce60_resource.o = -Wno-override-init
DCE60 = dce60_timing_generator.o dce60_hw_sequencer.o \
dce60_resource.o
# Makefile for the 'controller' sub-component of DAL.
# It provides the control and status of HW CRTC block.
-CFLAGS_$(AMDDALPATH)/dc/dce80/dce80_resource.o = $(call cc-disable-warning, override-init)
+CFLAGS_$(AMDDALPATH)/dc/dce80/dce80_resource.o = -Wno-override-init
DCE80 = dce80_timing_generator.o
#define NUM_ELEMENTS(a) (sizeof(a) / sizeof((a)[0]))
+void mpc3_mpc_init(struct mpc *mpc)
+{
+ struct dcn30_mpc *mpc30 = TO_DCN30_MPC(mpc);
+ int opp_id;
+
+ mpc1_mpc_init(mpc);
+
+ for (opp_id = 0; opp_id < MAX_OPP; opp_id++) {
+ if (REG(MUX[opp_id]))
+ /* disable mpc out rate and flow control */
+ REG_UPDATE_2(MUX[opp_id], MPC_OUT_RATE_CONTROL_DISABLE,
+ 1, MPC_OUT_FLOW_CONTROL_COUNT, 0);
+ }
+}
+
+void mpc3_mpc_init_single_inst(struct mpc *mpc, unsigned int mpcc_id)
+{
+ struct dcn30_mpc *mpc30 = TO_DCN30_MPC(mpc);
+
+ mpc1_mpc_init_single_inst(mpc, mpcc_id);
+
+ /* assuming mpc out mux is connected to opp with the same index at this
+ * point in time (e.g. transitioning from vbios to driver)
+ */
+ if (mpcc_id < MAX_OPP && REG(MUX[mpcc_id]))
+ /* disable mpc out rate and flow control */
+ REG_UPDATE_2(MUX[mpcc_id], MPC_OUT_RATE_CONTROL_DISABLE,
+ 1, MPC_OUT_FLOW_CONTROL_COUNT, 0);
+}
+
bool mpc3_is_dwb_idle(
struct mpc *mpc,
int dwb_id)
MPC_DWB0_MUX, 0xf);
}
-void mpc3_set_out_rate_control(
- struct mpc *mpc,
- int opp_id,
- bool enable,
- bool rate_2x_mode,
- struct mpc_dwb_flow_control *flow_control)
-{
- struct dcn30_mpc *mpc30 = TO_DCN30_MPC(mpc);
-
- REG_UPDATE_2(MUX[opp_id],
- MPC_OUT_RATE_CONTROL_DISABLE, !enable,
- MPC_OUT_RATE_CONTROL, rate_2x_mode);
-
- if (flow_control)
- REG_UPDATE_2(MUX[opp_id],
- MPC_OUT_FLOW_CONTROL_MODE, flow_control->flow_ctrl_mode,
- MPC_OUT_FLOW_CONTROL_COUNT, flow_control->flow_ctrl_cnt1);
-}
-
enum dc_lut_mode mpc3_get_ogam_current(struct mpc *mpc, int mpcc_id)
{
/*Contrary to DCN2 and DCN1 wherein a single status register field holds this info;
.read_mpcc_state = mpc3_read_mpcc_state,
.insert_plane = mpc1_insert_plane,
.remove_mpcc = mpc1_remove_mpcc,
- .mpc_init = mpc1_mpc_init,
- .mpc_init_single_inst = mpc1_mpc_init_single_inst,
+ .mpc_init = mpc3_mpc_init,
+ .mpc_init_single_inst = mpc3_mpc_init_single_inst,
.update_blending = mpc2_update_blending,
.cursor_lock = mpc1_cursor_lock,
.get_mpcc_for_dpp = mpc1_get_mpcc_for_dpp,
.set_dwb_mux = mpc3_set_dwb_mux,
.disable_dwb_mux = mpc3_disable_dwb_mux,
.is_dwb_idle = mpc3_is_dwb_idle,
- .set_out_rate_control = mpc3_set_out_rate_control,
.set_gamut_remap = mpc3_set_gamut_remap,
.program_shaper = mpc3_program_shaper,
.acquire_rmu = mpcc3_acquire_rmu,
int num_mpcc,
int num_rmu);
+void mpc3_mpc_init(
+ struct mpc *mpc);
+
+void mpc3_mpc_init_single_inst(
+ struct mpc *mpc,
+ unsigned int mpcc_id);
+
bool mpc3_program_shaper(
struct mpc *mpc,
const struct pwl_params *params,
struct mpc *mpc,
int dwb_id);
-void mpc3_set_out_rate_control(
- struct mpc *mpc,
- int opp_id,
- bool enable,
- bool rate_2x_mode,
- struct mpc_dwb_flow_control *flow_control);
-
void mpc3_power_on_ogam_lut(
struct mpc *mpc, int mpcc_id,
bool power_on);
enc10->base.hpd_source = init_data->hpd_source;
enc10->base.connector = init_data->connector;
- enc10->base.preferred_engine = ENGINE_ID_UNKNOWN;
-
- enc10->base.features = *enc_features;
if (enc10->base.connector.id == CONNECTOR_ID_USBC)
enc10->base.features.flags.bits.DP_IS_USB_C = 1;
- if (enc10->base.connector.id == CONNECTOR_ID_USBC)
- enc10->base.features.flags.bits.DP_IS_USB_C = 1;
+ enc10->base.preferred_engine = ENGINE_ID_UNKNOWN;
+
+ enc10->base.features = *enc_features;
enc10->base.transmitter = init_data->transmitter;
struct dcn30_mpc *mpc30 = TO_DCN30_MPC(mpc);
int mpcc_id;
- mpc1_mpc_init(mpc);
+ mpc3_mpc_init(mpc);
if (mpc->ctx->dc->debug.enable_mem_low_power.bits.mpc) {
if (mpc30->mpc_mask->MPCC_MCM_SHAPER_MEM_LOW_PWR_MODE && mpc30->mpc_mask->MPCC_MCM_3DLUT_MEM_LOW_PWR_MODE) {
.insert_plane = mpc1_insert_plane,
.remove_mpcc = mpc1_remove_mpcc,
.mpc_init = mpc32_mpc_init,
- .mpc_init_single_inst = mpc1_mpc_init_single_inst,
+ .mpc_init_single_inst = mpc3_mpc_init_single_inst,
.update_blending = mpc2_update_blending,
.cursor_lock = mpc1_cursor_lock,
.get_mpcc_for_dpp = mpc1_get_mpcc_for_dpp,
.set_dwb_mux = mpc3_set_dwb_mux,
.disable_dwb_mux = mpc3_disable_dwb_mux,
.is_dwb_idle = mpc3_is_dwb_idle,
- .set_out_rate_control = mpc3_set_out_rate_control,
.set_gamut_remap = mpc3_set_gamut_remap,
.program_shaper = mpc32_program_shaper,
.program_3dlut = mpc32_program_3dlut,
enc10->base.hpd_source = init_data->hpd_source;
enc10->base.connector = init_data->connector;
+ if (enc10->base.connector.id == CONNECTOR_ID_USBC)
+ enc10->base.features.flags.bits.DP_IS_USB_C = 1;
enc10->base.preferred_engine = ENGINE_ID_UNKNOWN;
}
enc10->base.features.flags.bits.HDMI_6GB_EN = 1;
- if (enc10->base.connector.id == CONNECTOR_ID_USBC)
- enc10->base.features.flags.bits.DP_IS_USB_C = 1;
if (bp_funcs->get_connector_speed_cap_info)
result = bp_funcs->get_connector_speed_cap_info(enc10->base.ctx->dc_bios,
.num_states = 5,
.sr_exit_time_us = 28.0,
.sr_enter_plus_exit_time_us = 30.0,
- .sr_exit_z8_time_us = 210.0,
- .sr_enter_plus_exit_z8_time_us = 320.0,
+ .sr_exit_z8_time_us = 250.0,
+ .sr_enter_plus_exit_z8_time_us = 350.0,
.fclk_change_latency_us = 24.0,
.usr_retraining_latency_us = 2,
.writeback_latency_us = 12.0,
.clock_limits = {
{
.state = 0,
- .dispclk_mhz = 1200.0,
- .dppclk_mhz = 1200.0,
+ .dcfclk_mhz = 400.0,
+ .fabricclk_mhz = 400.0,
+ .socclk_mhz = 600.0,
+ .dram_speed_mts = 3200.0,
+ .dispclk_mhz = 600.0,
+ .dppclk_mhz = 600.0,
.phyclk_mhz = 600.0,
.phyclk_d18_mhz = 667.0,
- .dscclk_mhz = 186.0,
+ .dscclk_mhz = 200.0,
.dtbclk_mhz = 600.0,
},
{
.state = 1,
- .dispclk_mhz = 1200.0,
- .dppclk_mhz = 1200.0,
+ .dcfclk_mhz = 600.0,
+ .fabricclk_mhz = 1000.0,
+ .socclk_mhz = 733.0,
+ .dram_speed_mts = 6400.0,
+ .dispclk_mhz = 800.0,
+ .dppclk_mhz = 800.0,
.phyclk_mhz = 810.0,
.phyclk_d18_mhz = 667.0,
- .dscclk_mhz = 209.0,
+ .dscclk_mhz = 266.7,
.dtbclk_mhz = 600.0,
},
{
.state = 2,
- .dispclk_mhz = 1200.0,
- .dppclk_mhz = 1200.0,
+ .dcfclk_mhz = 738.0,
+ .fabricclk_mhz = 1200.0,
+ .socclk_mhz = 880.0,
+ .dram_speed_mts = 7500.0,
+ .dispclk_mhz = 800.0,
+ .dppclk_mhz = 800.0,
.phyclk_mhz = 810.0,
.phyclk_d18_mhz = 667.0,
- .dscclk_mhz = 209.0,
+ .dscclk_mhz = 266.7,
.dtbclk_mhz = 600.0,
},
{
.state = 3,
- .dispclk_mhz = 1200.0,
- .dppclk_mhz = 1200.0,
+ .dcfclk_mhz = 800.0,
+ .fabricclk_mhz = 1400.0,
+ .socclk_mhz = 978.0,
+ .dram_speed_mts = 7500.0,
+ .dispclk_mhz = 960.0,
+ .dppclk_mhz = 960.0,
.phyclk_mhz = 810.0,
.phyclk_d18_mhz = 667.0,
- .dscclk_mhz = 371.0,
+ .dscclk_mhz = 320.0,
.dtbclk_mhz = 600.0,
},
{
.state = 4,
+ .dcfclk_mhz = 873.0,
+ .fabricclk_mhz = 1600.0,
+ .socclk_mhz = 1100.0,
+ .dram_speed_mts = 8533.0,
+ .dispclk_mhz = 1066.7,
+ .dppclk_mhz = 1066.7,
+ .phyclk_mhz = 810.0,
+ .phyclk_d18_mhz = 667.0,
+ .dscclk_mhz = 355.6,
+ .dtbclk_mhz = 600.0,
+ },
+ {
+ .state = 5,
+ .dcfclk_mhz = 960.0,
+ .fabricclk_mhz = 1700.0,
+ .socclk_mhz = 1257.0,
+ .dram_speed_mts = 8533.0,
.dispclk_mhz = 1200.0,
.dppclk_mhz = 1200.0,
.phyclk_mhz = 810.0,
.phyclk_d18_mhz = 667.0,
- .dscclk_mhz = 417.0,
+ .dscclk_mhz = 400.0,
+ .dtbclk_mhz = 600.0,
+ },
+ {
+ .state = 6,
+ .dcfclk_mhz = 1067.0,
+ .fabricclk_mhz = 1850.0,
+ .socclk_mhz = 1257.0,
+ .dram_speed_mts = 8533.0,
+ .dispclk_mhz = 1371.4,
+ .dppclk_mhz = 1371.4,
+ .phyclk_mhz = 810.0,
+ .phyclk_d18_mhz = 667.0,
+ .dscclk_mhz = 457.1,
+ .dtbclk_mhz = 600.0,
+ },
+ {
+ .state = 7,
+ .dcfclk_mhz = 1200.0,
+ .fabricclk_mhz = 2000.0,
+ .socclk_mhz = 1467.0,
+ .dram_speed_mts = 8533.0,
+ .dispclk_mhz = 1600.0,
+ .dppclk_mhz = 1600.0,
+ .phyclk_mhz = 810.0,
+ .phyclk_d18_mhz = 667.0,
+ .dscclk_mhz = 533.3,
.dtbclk_mhz = 600.0,
},
},
- .num_states = 5,
+ .num_states = 8,
.sr_exit_time_us = 28.0,
.sr_enter_plus_exit_time_us = 30.0,
- .sr_exit_z8_time_us = 210.0,
- .sr_enter_plus_exit_z8_time_us = 320.0,
+ .sr_exit_z8_time_us = 250.0,
+ .sr_enter_plus_exit_z8_time_us = 350.0,
.fclk_change_latency_us = 24.0,
.usr_retraining_latency_us = 2,
.writeback_latency_us = 12.0,
.do_urgent_latency_adjustment = 0,
.urgent_latency_adjustment_fabric_clock_component_us = 0,
.urgent_latency_adjustment_fabric_clock_reference_mhz = 0,
+ .num_chans = 4,
+ .dram_clock_change_latency_us = 11.72,
+ .dispclk_dppclk_vco_speed_mhz = 2400.0,
};
/*
clock_limits[i].socclk_mhz;
dc->dml2_options.bbox_overrides.clks_table.clk_entries[i].memclk_mhz =
clk_table->entries[i].memclk_mhz * clk_table->entries[i].wck_ratio;
+ dc->dml2_options.bbox_overrides.clks_table.clk_entries[i].dtbclk_mhz =
+ clock_limits[i].dtbclk_mhz;
dc->dml2_options.bbox_overrides.clks_table.num_entries_per_clk.num_dcfclk_levels =
clk_table->num_entries;
dc->dml2_options.bbox_overrides.clks_table.num_entries_per_clk.num_fclk_levels =
clk_table->num_entries;
dc->dml2_options.bbox_overrides.clks_table.num_entries_per_clk.num_memclk_levels =
clk_table->num_entries;
+ dc->dml2_options.bbox_overrides.clks_table.num_entries_per_clk.num_dtbclk_levels =
+ clk_table->num_entries;
}
}
if (context->res_ctx.pipe_ctx[i].plane_state)
plane_count++;
}
+
/*dcn351 does not support z9/z10*/
if (context->stream_count == 0 || plane_count == 0) {
support = DCN_ZSTATE_SUPPORT_ALLOW_Z8_ONLY;
dc->debug.minimum_z8_residency_time > 0 ? dc->debug.minimum_z8_residency_time : 1000;
bool allow_z8 = context->bw_ctx.dml.vba.StutterPeriod > (double)minmum_z8_residency;
-
/*for psr1/psr-su, we allow z8 and z10 based on latency, for replay with IPS enabled, it will enter ips2*/
- if (is_pwrseq0 && (is_psr || is_replay))
+ if (is_pwrseq0 && (is_psr || is_replay))
support = allow_z8 ? allow_z8 : DCN_ZSTATE_SUPPORT_DISALLOW;
-
}
context->bw_ctx.bw.dcn.clk.zstate_support = support;
}
break;
case dml_project_dcn35:
+ case dml_project_dcn351:
out->num_chans = 4;
out->round_trip_ping_latency_dcfclk_cycles = 106;
out->smn_latency_us = 2;
out->dispclk_dppclk_vco_speed_mhz = 3600;
break;
- case dml_project_dcn351:
- out->num_chans = 16;
- out->round_trip_ping_latency_dcfclk_cycles = 1100;
- out->smn_latency_us = 2;
- break;
}
/* ---Overrides if available--- */
if (dml2->config.bbox_overrides.dram_num_chan)
if (dccg) {
dccg->funcs->disable_symclk32_se(dccg, dp_hpo_inst);
dccg->funcs->set_dpstreamclk(dccg, REFCLK, tg->inst, dp_hpo_inst);
- dccg->funcs->set_dtbclk_dto(dccg, &dto_params);
+ if (dccg && dccg->funcs->set_dtbclk_dto)
+ dccg->funcs->set_dtbclk_dto(dccg, &dto_params);
}
} else if (dccg && dccg->funcs->disable_symclk_se) {
dccg->funcs->disable_symclk_se(dccg, stream_enc->stream_enc_inst,
#define FN(reg_name, field_name) \
hws->shifts->field_name, hws->masks->field_name
-static int calc_mpc_flow_ctrl_cnt(const struct dc_stream_state *stream,
- int opp_cnt)
-{
- bool hblank_halved = optc2_is_two_pixels_per_containter(&stream->timing);
- int flow_ctrl_cnt;
-
- if (opp_cnt >= 2)
- hblank_halved = true;
-
- flow_ctrl_cnt = stream->timing.h_total - stream->timing.h_addressable -
- stream->timing.h_border_left -
- stream->timing.h_border_right;
-
- if (hblank_halved)
- flow_ctrl_cnt /= 2;
-
- /* ODM combine 4:1 case */
- if (opp_cnt == 4)
- flow_ctrl_cnt /= 2;
-
- return flow_ctrl_cnt;
-}
-
static void update_dsc_on_stream(struct pipe_ctx *pipe_ctx, bool enable)
{
struct display_stream_compressor *dsc = pipe_ctx->stream_res.dsc;
struct pipe_ctx *odm_pipe;
int opp_cnt = 0;
int opp_inst[MAX_PIPES] = {0};
- bool rate_control_2x_pclk = (pipe_ctx->stream->timing.flags.INTERLACE || optc2_is_two_pixels_per_containter(&pipe_ctx->stream->timing));
- struct mpc_dwb_flow_control flow_control;
- struct mpc *mpc = dc->res_pool->mpc;
- int i;
opp_cnt = get_odm_config(pipe_ctx, opp_inst);
pipe_ctx->stream_res.tg->funcs->set_odm_bypass(
pipe_ctx->stream_res.tg, &pipe_ctx->stream->timing);
- rate_control_2x_pclk = rate_control_2x_pclk || opp_cnt > 1;
- flow_control.flow_ctrl_mode = 0;
- flow_control.flow_ctrl_cnt0 = 0x80;
- flow_control.flow_ctrl_cnt1 = calc_mpc_flow_ctrl_cnt(pipe_ctx->stream, opp_cnt);
- if (mpc->funcs->set_out_rate_control) {
- for (i = 0; i < opp_cnt; ++i) {
- mpc->funcs->set_out_rate_control(
- mpc, opp_inst[i],
- true,
- rate_control_2x_pclk,
- &flow_control);
- }
- }
-
for (odm_pipe = pipe_ctx->next_odm_pipe; odm_pipe; odm_pipe = odm_pipe->next_odm_pipe) {
odm_pipe->stream_res.opp->funcs->opp_pipe_clock_control(
odm_pipe->stream_res.opp,
}
}
-static int calc_mpc_flow_ctrl_cnt(const struct dc_stream_state *stream,
- int opp_cnt)
-{
- bool hblank_halved = optc2_is_two_pixels_per_containter(&stream->timing);
- int flow_ctrl_cnt;
-
- if (opp_cnt >= 2)
- hblank_halved = true;
-
- flow_ctrl_cnt = stream->timing.h_total - stream->timing.h_addressable -
- stream->timing.h_border_left -
- stream->timing.h_border_right;
-
- if (hblank_halved)
- flow_ctrl_cnt /= 2;
-
- /* ODM combine 4:1 case */
- if (opp_cnt == 4)
- flow_ctrl_cnt /= 2;
-
- return flow_ctrl_cnt;
-}
-
static void update_dsc_on_stream(struct pipe_ctx *pipe_ctx, bool enable)
{
struct display_stream_compressor *dsc = pipe_ctx->stream_res.dsc;
struct pipe_ctx *odm_pipe;
int opp_cnt = 0;
int opp_inst[MAX_PIPES] = {0};
- bool rate_control_2x_pclk = (pipe_ctx->stream->timing.flags.INTERLACE || optc2_is_two_pixels_per_containter(&pipe_ctx->stream->timing));
- struct mpc_dwb_flow_control flow_control;
- struct mpc *mpc = dc->res_pool->mpc;
- int i;
opp_cnt = get_odm_config(pipe_ctx, opp_inst);
pipe_ctx->stream_res.tg->funcs->set_odm_bypass(
pipe_ctx->stream_res.tg, &pipe_ctx->stream->timing);
- rate_control_2x_pclk = rate_control_2x_pclk || opp_cnt > 1;
- flow_control.flow_ctrl_mode = 0;
- flow_control.flow_ctrl_cnt0 = 0x80;
- flow_control.flow_ctrl_cnt1 = calc_mpc_flow_ctrl_cnt(pipe_ctx->stream, opp_cnt);
- if (mpc->funcs->set_out_rate_control) {
- for (i = 0; i < opp_cnt; ++i) {
- mpc->funcs->set_out_rate_control(
- mpc, opp_inst[i],
- true,
- rate_control_2x_pclk,
- &flow_control);
- }
- }
-
for (odm_pipe = pipe_ctx->next_odm_pipe; odm_pipe; odm_pipe = odm_pipe->next_odm_pipe) {
odm_pipe->stream_res.opp->funcs->opp_pipe_clock_control(
odm_pipe->stream_res.opp,
}
}
-static int calc_mpc_flow_ctrl_cnt(const struct dc_stream_state *stream,
- int opp_cnt)
-{
- bool hblank_halved = optc2_is_two_pixels_per_containter(&stream->timing);
- int flow_ctrl_cnt;
-
- if (opp_cnt >= 2)
- hblank_halved = true;
-
- flow_ctrl_cnt = stream->timing.h_total - stream->timing.h_addressable -
- stream->timing.h_border_left -
- stream->timing.h_border_right;
-
- if (hblank_halved)
- flow_ctrl_cnt /= 2;
-
- /* ODM combine 4:1 case */
- if (opp_cnt == 4)
- flow_ctrl_cnt /= 2;
-
- return flow_ctrl_cnt;
-}
-
static void update_dsc_on_stream(struct pipe_ctx *pipe_ctx, bool enable)
{
struct display_stream_compressor *dsc = pipe_ctx->stream_res.dsc;
struct pipe_ctx *odm_pipe;
int opp_cnt = 0;
int opp_inst[MAX_PIPES] = {0};
- bool rate_control_2x_pclk = (pipe_ctx->stream->timing.flags.INTERLACE || optc2_is_two_pixels_per_containter(&pipe_ctx->stream->timing));
- struct mpc_dwb_flow_control flow_control;
- struct mpc *mpc = dc->res_pool->mpc;
- int i;
opp_cnt = get_odm_config(pipe_ctx, opp_inst);
pipe_ctx->stream_res.tg->funcs->set_odm_bypass(
pipe_ctx->stream_res.tg, &pipe_ctx->stream->timing);
- rate_control_2x_pclk = rate_control_2x_pclk || opp_cnt > 1;
- flow_control.flow_ctrl_mode = 0;
- flow_control.flow_ctrl_cnt0 = 0x80;
- flow_control.flow_ctrl_cnt1 = calc_mpc_flow_ctrl_cnt(pipe_ctx->stream, opp_cnt);
- if (mpc->funcs->set_out_rate_control) {
- for (i = 0; i < opp_cnt; ++i) {
- mpc->funcs->set_out_rate_control(
- mpc, opp_inst[i],
- true,
- rate_control_2x_pclk,
- &flow_control);
- }
- }
-
for (odm_pipe = pipe_ctx->next_odm_pipe; odm_pipe; odm_pipe = odm_pipe->next_odm_pipe) {
odm_pipe->stream_res.opp->funcs->opp_pipe_clock_control(
odm_pipe->stream_res.opp,
.prepare_bandwidth = dcn35_prepare_bandwidth,
.optimize_bandwidth = dcn35_optimize_bandwidth,
.update_bandwidth = dcn20_update_bandwidth,
- .set_drr = dcn10_set_drr,
+ .set_drr = dcn35_set_drr,
.get_position = dcn10_get_position,
.set_static_screen_control = dcn35_set_static_screen_control,
.setup_stereo = dcn10_setup_stereo,
OTG_V_TOTAL_MAX_SEL, 1,
OTG_FORCE_LOCK_ON_EVENT, 0,
OTG_SET_V_TOTAL_MIN_MASK, (1 << 1)); /* TRIGA */
-
- // Setup manual flow control for EOF via TRIG_A
- optc->funcs->setup_manual_trigger(optc);
}
}
.disable_dcc = DCC_ENABLE,
.disable_dpp_power_gate = true,
.disable_hubp_power_gate = true,
+ .disable_optc_power_gate = true, /*should the same as above two*/
+ .disable_hpo_power_gate = true, /*dmubfw force domain25 on*/
.disable_clock_gate = false,
.disable_dsc_power_gate = true,
.vsr_support = true,
},
.seamless_boot_odm_combine = DML_FAIL_SOURCE_PIXEL_FORMAT,
.enable_z9_disable_interface = true, /* Allow support for the PMFW interface for disable Z9*/
+ .minimum_z8_residency_time = 2100,
.using_dml2 = true,
.support_eDP1_5 = true,
.enable_hpo_pg_support = false,
.enable_legacy_fast_update = true,
.enable_single_display_2to1_odm_policy = true,
- .disable_idle_power_optimizations = true,
+ .disable_idle_power_optimizations = false,
.dmcub_emulation = false,
.disable_boot_optimizations = false,
.disable_unbounded_requesting = false,
.disable_z10 = true,
.ignore_pg = true,
.psp_disabled_wa = true,
- .ips2_eval_delay_us = 200,
- .ips2_entry_delay_us = 400
+ .ips2_eval_delay_us = 2000,
+ .ips2_entry_delay_us = 800,
+ .disable_dmub_reallow_idle = true,
+ .static_screen_wait_frames = 2,
};
static const struct dc_panel_config panel_config_defaults = {
}
/* VSC packet set to 4 for PSR-SU, or 2 for PSR1 */
- if (stream->link->psr_settings.psr_feature_enabled) {
- if (stream->link->psr_settings.psr_version == DC_PSR_VERSION_SU_1)
- vsc_packet_revision = vsc_packet_rev4;
- else if (stream->link->psr_settings.psr_version == DC_PSR_VERSION_1)
- vsc_packet_revision = vsc_packet_rev2;
- }
-
- if (stream->link->replay_settings.config.replay_supported)
+ if (stream->link->psr_settings.psr_version == DC_PSR_VERSION_SU_1)
+ vsc_packet_revision = vsc_packet_rev4;
+ else if (stream->link->replay_settings.config.replay_supported)
vsc_packet_revision = vsc_packet_rev4;
+ else if (stream->link->psr_settings.psr_version == DC_PSR_VERSION_1)
+ vsc_packet_revision = vsc_packet_rev2;
/* Update to revision 5 for extended colorimetry support */
if (stream->use_vsc_sdp_for_colorimetry)
uint32_t enable_level_process_quantum_check : 1;
uint32_t is_vcn0_enabled : 1;
uint32_t is_vcn1_enabled : 1;
- uint32_t reserved : 27;
+ uint32_t use_rs64mem_for_proc_ctx_csa : 1;
+ uint32_t reserved : 26;
};
uint32_t uint32_all;
};
struct {
uint32_t is_context_suspended : 1;
- uint32_t reserved : 31;
+ uint32_t collaboration_mode : 1;
+ uint32_t reserved : 30;
};
struct UMSCH_API_STATUS api_status;
+ uint32_t process_csa_array_index;
+ uint32_t context_csa_array_index;
};
uint32_t max_dwords_in_api[API_FRAME_SIZE_IN_DWORDS];
uint64_t context_csa_addr;
struct UMSCH_API_STATUS api_status;
+ uint32_t context_csa_array_index;
};
uint32_t max_dwords_in_api[API_FRAME_SIZE_IN_DWORDS];
uint32_t suspend_fence_value;
struct UMSCH_API_STATUS api_status;
+ uint32_t context_csa_array_index;
};
uint32_t max_dwords_in_api[API_FRAME_SIZE_IN_DWORDS];
enum UMSCH_ENGINE_TYPE engine_type;
struct UMSCH_API_STATUS api_status;
+ uint32_t context_csa_array_index;
};
uint32_t max_dwords_in_api[API_FRAME_SIZE_IN_DWORDS];
union UMSCH_AFFINITY affinity;
uint64_t context_csa_addr;
struct UMSCH_API_STATUS api_status;
+ uint32_t context_csa_array_index;
};
uint32_t max_dwords_in_api[API_FRAME_SIZE_IN_DWORDS];
uint64_t context_quantum;
uint64_t context_csa_addr;
struct UMSCH_API_STATUS api_status;
+ uint32_t context_csa_array_index;
};
uint32_t max_dwords_in_api[API_FRAME_SIZE_IN_DWORDS];
}
}
+ /*
+ * If gpu_od is the only member in the list, that means gpu_od is an
+ * empty directory, so remove it.
+ */
+ if (list_is_singular(&adev->pm.od_kobj_list))
+ goto err_out;
+
return 0;
err_out:
smu->adev = adev;
smu->pm_enabled = !!amdgpu_dpm;
smu->is_apu = false;
- smu->smu_baco.state = SMU_BACO_STATE_EXIT;
+ smu->smu_baco.state = SMU_BACO_STATE_NONE;
smu->smu_baco.platform_support = false;
smu->user_dpm_profile.fan_mode = -1;
return 0;
}
+static int smu_reset_mp1_state(struct smu_context *smu)
+{
+ struct amdgpu_device *adev = smu->adev;
+ int ret = 0;
+
+ if ((!adev->in_runpm) && (!adev->in_suspend) &&
+ (!amdgpu_in_reset(adev)) && amdgpu_ip_version(adev, MP1_HWIP, 0) ==
+ IP_VERSION(13, 0, 10) &&
+ !amdgpu_device_has_display_hardware(adev))
+ ret = smu_set_mp1_state(smu, PP_MP1_STATE_UNLOAD);
+
+ return ret;
+}
+
static int smu_hw_fini(void *handle)
{
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
struct smu_context *smu = adev->powerplay.pp_handle;
+ int ret;
if (amdgpu_sriov_vf(adev) && !amdgpu_sriov_is_pp_one_vf(adev))
return 0;
adev->pm.dpm_enabled = false;
- return smu_smc_hw_cleanup(smu);
+ ret = smu_smc_hw_cleanup(smu);
+ if (ret)
+ return ret;
+
+ ret = smu_reset_mp1_state(smu);
+ if (ret)
+ return ret;
+
+ return 0;
}
static void smu_late_fini(void *handle)
enum smu_baco_state {
SMU_BACO_STATE_ENTER = 0,
SMU_BACO_STATE_EXIT,
+ SMU_BACO_STATE_NONE,
};
struct smu_baco_context {
uint32_t MaxGfxClk;
} DpmClocks_t;
+//Freq in MHz
+//Voltage in milli volts with 2 fractional bits
+typedef struct {
+ uint32_t DcfClocks[NUM_DCFCLK_DPM_LEVELS];
+ uint32_t DispClocks[NUM_DISPCLK_DPM_LEVELS];
+ uint32_t DppClocks[NUM_DPPCLK_DPM_LEVELS];
+ uint32_t SocClocks[NUM_SOCCLK_DPM_LEVELS];
+ uint32_t VClocks0[NUM_VCN_DPM_LEVELS];
+ uint32_t VClocks1[NUM_VCN_DPM_LEVELS];
+ uint32_t DClocks0[NUM_VCN_DPM_LEVELS];
+ uint32_t DClocks1[NUM_VCN_DPM_LEVELS];
+ uint32_t VPEClocks[NUM_VPE_DPM_LEVELS];
+ uint32_t FclkClocks_Freq[NUM_FCLK_DPM_LEVELS];
+ uint32_t FclkClocks_Voltage[NUM_FCLK_DPM_LEVELS];
+ uint32_t SocVoltage[NUM_SOC_VOLTAGE_LEVELS];
+ MemPstateTable_t MemPstateTable[NUM_MEM_PSTATE_LEVELS];
+
+ uint8_t NumDcfClkLevelsEnabled;
+ uint8_t NumDispClkLevelsEnabled; //Applies to both Dispclk and Dppclk
+ uint8_t NumSocClkLevelsEnabled;
+ uint8_t Vcn0ClkLevelsEnabled; //Applies to both Vclk0 and Dclk0
+ uint8_t Vcn1ClkLevelsEnabled; //Applies to both Vclk1 and Dclk1
+ uint8_t VpeClkLevelsEnabled;
+ uint8_t NumMemPstatesEnabled;
+ uint8_t NumFclkLevelsEnabled;
+ uint8_t spare;
+
+ uint32_t MinGfxClk;
+ uint32_t MaxGfxClk;
+} DpmClocks_t_v14_0_1;
+
typedef struct {
uint16_t CoreFrequency[16]; //Target core frequency [MHz]
uint16_t CorePower[16]; //CAC calculated core power [mW]
#define TABLE_CUSTOM_DPM 2 // Called by Driver
#define TABLE_BIOS_GPIO_CONFIG 3 // Called by BIOS
#define TABLE_DPMCLOCKS 4 // Called by Driver and VBIOS
-#define TABLE_SPARE0 5 // Unused
+#define TABLE_MOMENTARY_PM 5 // Called by Tools
#define TABLE_MODERN_STDBY 6 // Called by Tools for Modern Standby Log
#define TABLE_SMU_METRICS 7 // Called by Driver and SMF/PMF
#define TABLE_COUNT 8
#define FEATURE_EDC_BIT 7
#define FEATURE_PLL_POWER_DOWN_BIT 8
#define FEATURE_VDDOFF_BIT 9
-#define FEATURE_VCN_DPM_BIT 10
+#define FEATURE_VCN_DPM_BIT 10 /* this is for both VCN0 and VCN1 */
#define FEATURE_DS_MPM_BIT 11
#define FEATURE_FCLK_DPM_BIT 12
#define FEATURE_SOCCLK_DPM_BIT 13
#define FEATURE_DS_GFXCLK_BIT 21
#define FEATURE_DS_SOCCLK_BIT 22
#define FEATURE_DS_LCLK_BIT 23
-#define FEATURE_LOW_POWER_DCNCLKS_BIT 24 // for all DISP clks
+#define FEATURE_LOW_POWER_DCNCLKS_BIT 24
#define FEATURE_DS_SHUBCLK_BIT 25
-#define FEATURE_SPARE0_BIT 26 //SPARE
+#define FEATURE_RESERVED0_BIT 26
#define FEATURE_ZSTATES_BIT 27
#define FEATURE_IOMMUL2_PG_BIT 28
#define FEATURE_DS_FCLK_BIT 29
#define FEATURE_DS_MP1CLK_BIT 31
#define FEATURE_WHISPER_MODE_BIT 32
#define FEATURE_SMU_LOW_POWER_BIT 33
-#define FEATURE_SMART_L3_RINSER_BIT 34
-#define FEATURE_SPARE1_BIT 35 //SPARE
+#define FEATURE_RESERVED1_BIT 34 /* v14_0_0 SMART_L3_RINSER; v14_0_1 RESERVED1 */
+#define FEATURE_GFX_DEM_BIT 35 /* v14_0_0 SPARE; v14_0_1 GFX_DEM */
#define FEATURE_PSI_BIT 36
#define FEATURE_PROCHOT_BIT 37
#define FEATURE_CPUOFF_BIT 38
#define FEATURE_PERF_LIMIT_BIT 42
#define FEATURE_CORE_DLDO_BIT 43
#define FEATURE_DVO_BIT 44
-#define FEATURE_DS_VCN_BIT 45
+#define FEATURE_DS_VCN_BIT 45 /* v14_0_1 this is for both VCN0 and VCN1 */
#define FEATURE_CPPC_BIT 46
#define FEATURE_CPPC_PREFERRED_CORES 47
#define FEATURE_DF_CSTATES_BIT 48
-#define FEATURE_SPARE2_BIT 49 //SPARE
+#define FEATURE_FAST_PSTATE_CLDO_BIT 49 /* v14_0_0 SPARE */
#define FEATURE_ATHUB_PG_BIT 50
#define FEATURE_VDDOFF_ECO_BIT 51
#define FEATURE_ZSTATES_ECO_BIT 52
#define FEATURE_DS_IPUCLK_BIT 58
#define FEATURE_DS_VPECLK_BIT 59
#define FEATURE_VPE_DPM_BIT 60
-#define FEATURE_SPARE_61 61
-#define FEATURE_FP_DIDT 62
+#define FEATURE_SMART_L3_RINSER_BIT 61 /* v14_0_0 SPARE*/
+#define FEATURE_PCC_BIT 62 /* v14_0_0 FP_DIDT v14_0_1 PCC_BIT */
#define NUM_FEATURES 63
// Firmware Header/Footer
// MP1_EXT_SCRATCH7 = RTOS Current Job
} FwStatus_t;
+typedef struct {
+ // MP1_EXT_SCRATCH0
+ uint32_t DpmHandlerID : 8;
+ uint32_t ActivityMonitorID : 8;
+ uint32_t DpmTimerID : 8;
+ uint32_t DpmHubID : 4;
+ uint32_t DpmHubTask : 4;
+ // MP1_EXT_SCRATCH1
+ uint32_t CclkSyncStatus : 8;
+ uint32_t ZstateStatus : 4;
+ uint32_t Cpu1VddOff : 4;
+ uint32_t DstateFun : 4;
+ uint32_t DstateDev : 4;
+ uint32_t GfxOffStatus : 2;
+ uint32_t Cpu0Off : 2;
+ uint32_t Cpu1Off : 2;
+ uint32_t Cpu0VddOff : 2;
+ // MP1_EXT_SCRATCH2
+ uint32_t P2JobHandler :32;
+ // MP1_EXT_SCRATCH3
+ uint32_t PostCode :32;
+ // MP1_EXT_SCRATCH4
+ uint32_t MsgPortBusy :15;
+ uint32_t RsmuPmiP1Pending : 1;
+ uint32_t RsmuPmiP2PendingCnt : 8;
+ uint32_t DfCstateExitPending : 1;
+ uint32_t Pc6EntryPending : 1;
+ uint32_t Pc6ExitPending : 1;
+ uint32_t WarmResetPending : 1;
+ uint32_t Mp0ClkPending : 1;
+ uint32_t InWhisperMode : 1;
+ uint32_t spare2 : 2;
+ // MP1_EXT_SCRATCH5
+ uint32_t IdleMask :32;
+ // MP1_EXT_SCRATCH6 = RTOS threads' status
+ // MP1_EXT_SCRATCH7 = RTOS Current Job
+} FwStatus_t_v14_0_1;
#pragma pack(pop)
#define PPSMC_MSG_TestMessage 0x01 ///< To check if PMFW is alive and responding. Requirement specified by PMFW team
#define PPSMC_MSG_GetPmfwVersion 0x02 ///< Get PMFW version
#define PPSMC_MSG_GetDriverIfVersion 0x03 ///< Get PMFW_DRIVER_IF version
-#define PPSMC_MSG_SPARE0 0x04 ///< SPARE
-#define PPSMC_MSG_SPARE1 0x05 ///< SPARE
-#define PPSMC_MSG_PowerDownVcn 0x06 ///< Power down VCN
-#define PPSMC_MSG_PowerUpVcn 0x07 ///< Power up VCN; VCN is power gated by default
-#define PPSMC_MSG_SetHardMinVcn 0x08 ///< For wireless display
+#define PPSMC_MSG_PowerDownVcn1 0x04 ///< Power down VCN1
+#define PPSMC_MSG_PowerUpVcn1 0x05 ///< Power up VCN1; VCN1 is power gated by default
+#define PPSMC_MSG_PowerDownVcn0 0x06 ///< Power down VCN0
+#define PPSMC_MSG_PowerUpVcn0 0x07 ///< Power up VCN0; VCN0 is power gated by default
+#define PPSMC_MSG_SetHardMinVcn0 0x08 ///< For wireless display
#define PPSMC_MSG_SetSoftMinGfxclk 0x09 ///< Set SoftMin for GFXCLK, argument is frequency in MHz
-#define PPSMC_MSG_SPARE2 0x0A ///< SPARE
-#define PPSMC_MSG_SPARE3 0x0B ///< SPARE
+#define PPSMC_MSG_SetHardMinVcn1 0x0A ///< For wireless display
+#define PPSMC_MSG_SetSoftMinVcn1 0x0B ///< Set soft min for VCN1 clocks (VCLK1 and DCLK1)
#define PPSMC_MSG_PrepareMp1ForUnload 0x0C ///< Prepare PMFW for GFX driver unload
#define PPSMC_MSG_SetDriverDramAddrHigh 0x0D ///< Set high 32 bits of DRAM address for Driver table transfer
#define PPSMC_MSG_SetDriverDramAddrLow 0x0E ///< Set low 32 bits of DRAM address for Driver table transfer
#define PPSMC_MSG_GetEnabledSmuFeatures 0x12 ///< Get enabled features in PMFW
#define PPSMC_MSG_SetHardMinSocclkByFreq 0x13 ///< Set hard min for SOC CLK
#define PPSMC_MSG_SetSoftMinFclk 0x14 ///< Set hard min for FCLK
-#define PPSMC_MSG_SetSoftMinVcn 0x15 ///< Set soft min for VCN clocks (VCLK and DCLK)
-
+#define PPSMC_MSG_SetSoftMinVcn0 0x15 ///< Set soft min for VCN0 clocks (VCLK0 and DCLK0)
#define PPSMC_MSG_EnableGfxImu 0x16 ///< Enable GFX IMU
-
-#define PPSMC_MSG_spare_0x17 0x17
-#define PPSMC_MSG_spare_0x18 0x18
+#define PPSMC_MSG_spare_0x17 0x17 ///< Get GFX clock frequency
+#define PPSMC_MSG_spare_0x18 0x18 ///< Get FCLK frequency
#define PPSMC_MSG_AllowGfxOff 0x19 ///< Inform PMFW of allowing GFXOFF entry
#define PPSMC_MSG_DisallowGfxOff 0x1A ///< Inform PMFW of disallowing GFXOFF entry
#define PPSMC_MSG_SetSoftMaxGfxClk 0x1B ///< Set soft max for GFX CLK
#define PPSMC_MSG_SetHardMinGfxClk 0x1C ///< Set hard min for GFX CLK
-
#define PPSMC_MSG_SetSoftMaxSocclkByFreq 0x1D ///< Set soft max for SOC CLK
#define PPSMC_MSG_SetSoftMaxFclkByFreq 0x1E ///< Set soft max for FCLK
-#define PPSMC_MSG_SetSoftMaxVcn 0x1F ///< Set soft max for VCN clocks (VCLK and DCLK)
-#define PPSMC_MSG_spare_0x20 0x20
-#define PPSMC_MSG_PowerDownJpeg 0x21 ///< Power down Jpeg
-#define PPSMC_MSG_PowerUpJpeg 0x22 ///< Power up Jpeg; VCN is power gated by default
-
+#define PPSMC_MSG_SetSoftMaxVcn0 0x1F ///< Set soft max for VCN0 clocks (VCLK0 and DCLK0)
+#define PPSMC_MSG_spare_0x20 0x20 ///< Set power limit percentage
+#define PPSMC_MSG_PowerDownJpeg0 0x21 ///< Power down Jpeg of VCN0
+#define PPSMC_MSG_PowerUpJpeg0 0x22 ///< Power up Jpeg of VCN0; VCN0 is power gated by default
#define PPSMC_MSG_SetHardMinFclkByFreq 0x23 ///< Set hard min for FCLK
#define PPSMC_MSG_SetSoftMinSocclkByFreq 0x24 ///< Set soft min for SOC CLK
#define PPSMC_MSG_AllowZstates 0x25 ///< Inform PMFM of allowing Zstate entry, i.e. no Miracast activity
-#define PPSMC_MSG_Reserved 0x26 ///< Not used
-#define PPSMC_MSG_Reserved1 0x27 ///< Not used, previously PPSMC_MSG_RequestActiveWgp
-#define PPSMC_MSG_Reserved2 0x28 ///< Not used, previously PPSMC_MSG_QueryActiveWgp
+#define PPSMC_MSG_PowerDownJpeg1 0x26 ///< Power down Jpeg of VCN1
+#define PPSMC_MSG_PowerUpJpeg1 0x27 ///< Power up Jpeg of VCN1; VCN1 is power gated by default
+#define PPSMC_MSG_SetSoftMaxVcn1 0x28 ///< Set soft max for VCN1 clocks (VCLK1 and DCLK1)
#define PPSMC_MSG_PowerDownIspByTile 0x29 ///< ISP is power gated by default
#define PPSMC_MSG_PowerUpIspByTile 0x2A ///< This message is used to power up ISP tiles and enable the ISP DPM
#define PPSMC_MSG_SetHardMinIspiclkByFreq 0x2B ///< Set HardMin by frequency for ISPICLK
#define PPSMC_MSG_SetHardMinIspxclkByFreq 0x2C ///< Set HardMin by frequency for ISPXCLK
-#define PPSMC_MSG_PowerDownUmsch 0x2D ///< Power down VCN.UMSCH (aka VSCH) scheduler
-#define PPSMC_MSG_PowerUpUmsch 0x2E ///< Power up VCN.UMSCH (aka VSCH) scheduler
+#define PPSMC_MSG_PowerDownUmsch 0x2D ///< Power down VCN0.UMSCH (aka VSCH) scheduler
+#define PPSMC_MSG_PowerUpUmsch 0x2E ///< Power up VCN0.UMSCH (aka VSCH) scheduler
#define PPSMC_Message_IspStutterOn_MmhubPgDis 0x2F ///< ISP StutterOn mmHub PgDis
#define PPSMC_Message_IspStutterOff_MmhubPgEn 0x30 ///< ISP StufferOff mmHub PgEn
#define PPSMC_MSG_PowerUpVpe 0x31 ///< Power up VPE
#define PPSMC_MSG_DisableLSdma 0x35 ///< Disable LSDMA
#define PPSMC_MSG_SetSoftMaxVpe 0x36 ///<
#define PPSMC_MSG_SetSoftMinVpe 0x37 ///<
-#define PPSMC_Message_Count 0x38 ///< Total number of PPSMC messages
+#define PPSMC_MSG_AllocMALLCache 0x38 ///< Allocating MALL Cache
+#define PPSMC_MSG_ReleaseMALLCache 0x39 ///< Releasing MALL Cache
+#define PPSMC_Message_Count 0x3A ///< Total number of PPSMC messages
/** @}*/
/**
__SMU_DUMMY_MAP(PowerDownVcn), \
__SMU_DUMMY_MAP(PowerUpJpeg), \
__SMU_DUMMY_MAP(PowerDownJpeg), \
+ __SMU_DUMMY_MAP(PowerUpJpeg0), \
+ __SMU_DUMMY_MAP(PowerDownJpeg0), \
+ __SMU_DUMMY_MAP(PowerUpJpeg1), \
+ __SMU_DUMMY_MAP(PowerDownJpeg1), \
__SMU_DUMMY_MAP(BacoAudioD3PME), \
__SMU_DUMMY_MAP(ArmD3), \
__SMU_DUMMY_MAP(RunDcBtc), \
__SMU_DUMMY_MAP(PowerUpSdma), \
__SMU_DUMMY_MAP(SetHardMinIspclkByFreq), \
__SMU_DUMMY_MAP(SetHardMinVcn), \
+ __SMU_DUMMY_MAP(SetHardMinVcn0), \
+ __SMU_DUMMY_MAP(SetHardMinVcn1), \
__SMU_DUMMY_MAP(SetAllowFclkSwitch), \
__SMU_DUMMY_MAP(SetMinVideoGfxclkFreq), \
__SMU_DUMMY_MAP(ActiveProcessNotify), \
__SMU_DUMMY_MAP(SetPhyclkVoltageByFreq), \
__SMU_DUMMY_MAP(SetDppclkVoltageByFreq), \
__SMU_DUMMY_MAP(SetSoftMinVcn), \
+ __SMU_DUMMY_MAP(SetSoftMinVcn0), \
+ __SMU_DUMMY_MAP(SetSoftMinVcn1), \
__SMU_DUMMY_MAP(EnablePostCode), \
__SMU_DUMMY_MAP(GetGfxclkFrequency), \
__SMU_DUMMY_MAP(GetFclkFrequency), \
__SMU_DUMMY_MAP(SetSoftMaxSocclkByFreq), \
__SMU_DUMMY_MAP(SetSoftMaxFclkByFreq), \
__SMU_DUMMY_MAP(SetSoftMaxVcn), \
+ __SMU_DUMMY_MAP(SetSoftMaxVcn0), \
+ __SMU_DUMMY_MAP(SetSoftMaxVcn1), \
__SMU_DUMMY_MAP(PowerGateMmHub), \
__SMU_DUMMY_MAP(UpdatePmeRestore), \
__SMU_DUMMY_MAP(GpuChangeState), \
#define SMU14_DRIVER_IF_VERSION_INV 0xFFFFFFFF
#define SMU14_DRIVER_IF_VERSION_SMU_V14_0_0 0x7
+#define SMU14_DRIVER_IF_VERSION_SMU_V14_0_1 0x6
#define SMU14_DRIVER_IF_VERSION_SMU_V14_0_2 0x1
#define FEATURE_MASK(feature) (1ULL << feature)
switch (mp1_state) {
case PP_MP1_STATE_UNLOAD:
- ret = smu_cmn_set_mp1_state(smu, mp1_state);
+ ret = smu_cmn_send_smc_msg_with_param(smu,
+ SMU_MSG_PrepareMp1ForUnload,
+ 0x55, NULL);
+
+ if (!ret && smu->smu_baco.state == SMU_BACO_STATE_EXIT)
+ ret = smu_v13_0_disable_pmfw_state(smu);
+
break;
default:
/* Ignore others */
struct amdgpu_device *adev = smu->adev;
int ret = 0;
- if (!en && !adev->in_s0ix)
+ if (!en && !adev->in_s0ix) {
+ /* Adds a GFX reset as workaround just before sending the
+ * MP1_UNLOAD message to prevent GC/RLC/PMFW from entering
+ * an invalid state.
+ */
+ ret = smu_cmn_send_smc_msg_with_param(smu, SMU_MSG_GfxDeviceDriverReset,
+ SMU_RESET_MODE_2, NULL);
+ if (ret)
+ return ret;
+
ret = smu_cmn_send_smc_msg(smu, SMU_MSG_PrepareMp1ForUnload, NULL);
+ }
return ret;
}
return sizeof(*gpu_metrics);
}
+static void smu_v13_0_6_restore_pci_config(struct smu_context *smu)
+{
+ struct amdgpu_device *adev = smu->adev;
+ int i;
+
+ for (i = 0; i < 16; i++)
+ pci_write_config_dword(adev->pdev, i * 4,
+ adev->pdev->saved_config_space[i]);
+ pci_restore_msi_state(adev->pdev);
+}
+
static int smu_v13_0_6_mode2_reset(struct smu_context *smu)
{
int ret = 0, index;
/* Restore the config space saved during init */
amdgpu_device_load_pci_state(adev->pdev);
+ /* Certain platforms have switches which assign virtual BAR values to
+ * devices. OS uses the virtual BAR values and device behind the switch
+ * is assgined another BAR value. When device's config space registers
+ * are queried, switch returns the virtual BAR values. When mode-2 reset
+ * is performed, switch is unaware of it, and will continue to return
+ * the same virtual values to the OS.This affects
+ * pci_restore_config_space() API as it doesn't write the value saved if
+ * the current value read from config space is the same as what is
+ * saved. As a workaround, make sure the config space is restored
+ * always.
+ */
+ if (!(adev->flags & AMD_IS_APU))
+ smu_v13_0_6_restore_pci_config(smu);
+
dev_dbg(smu->adev->dev, "wait for reset ack\n");
do {
ret = smu_cmn_wait_for_response(smu);
smu->smc_driver_if_version = SMU14_DRIVER_IF_VERSION_SMU_V14_0_0;
break;
case IP_VERSION(14, 0, 1):
- smu->smc_driver_if_version = SMU14_DRIVER_IF_VERSION_SMU_V14_0_0;
+ smu->smc_driver_if_version = SMU14_DRIVER_IF_VERSION_SMU_V14_0_1;
break;
default:
if (adev->vcn.harvest_config & (1 << i))
continue;
- ret = smu_cmn_send_smc_msg_with_param(smu, enable ?
- SMU_MSG_PowerUpVcn : SMU_MSG_PowerDownVcn,
- i << 16U, NULL);
+ if (amdgpu_ip_version(adev, MP1_HWIP, 0) == IP_VERSION(14, 0, 0) ||
+ amdgpu_ip_version(adev, MP1_HWIP, 0) == IP_VERSION(14, 0, 1)) {
+ if (i == 0)
+ ret = smu_cmn_send_smc_msg_with_param(smu, enable ?
+ SMU_MSG_PowerUpVcn0 : SMU_MSG_PowerDownVcn0,
+ i << 16U, NULL);
+ else if (i == 1)
+ ret = smu_cmn_send_smc_msg_with_param(smu, enable ?
+ SMU_MSG_PowerUpVcn1 : SMU_MSG_PowerDownVcn1,
+ i << 16U, NULL);
+ } else {
+ ret = smu_cmn_send_smc_msg_with_param(smu, enable ?
+ SMU_MSG_PowerUpVcn : SMU_MSG_PowerDownVcn,
+ i << 16U, NULL);
+ }
+
if (ret)
return ret;
}
int smu_v14_0_set_jpeg_enable(struct smu_context *smu,
bool enable)
{
- return smu_cmn_send_smc_msg_with_param(smu, enable ?
- SMU_MSG_PowerUpJpeg : SMU_MSG_PowerDownJpeg,
- 0, NULL);
+ struct amdgpu_device *adev = smu->adev;
+ int i, ret = 0;
+
+ for (i = 0; i < adev->jpeg.num_jpeg_inst; i++) {
+ if (adev->jpeg.harvest_config & (1 << i))
+ continue;
+
+ if (amdgpu_ip_version(adev, MP1_HWIP, 0) == IP_VERSION(14, 0, 0) ||
+ amdgpu_ip_version(adev, MP1_HWIP, 0) == IP_VERSION(14, 0, 1)) {
+ if (i == 0)
+ ret = smu_cmn_send_smc_msg_with_param(smu, enable ?
+ SMU_MSG_PowerUpJpeg0 : SMU_MSG_PowerDownJpeg0,
+ i << 16U, NULL);
+ else if (i == 1 && amdgpu_ip_version(adev, MP1_HWIP, 0) == IP_VERSION(14, 0, 1))
+ ret = smu_cmn_send_smc_msg_with_param(smu, enable ?
+ SMU_MSG_PowerUpJpeg1 : SMU_MSG_PowerDownJpeg1,
+ i << 16U, NULL);
+ } else {
+ ret = smu_cmn_send_smc_msg_with_param(smu, enable ?
+ SMU_MSG_PowerUpJpeg : SMU_MSG_PowerDownJpeg,
+ i << 16U, NULL);
+ }
+
+ if (ret)
+ return ret;
+ }
+
+ return ret;
}
int smu_v14_0_run_btc(struct smu_context *smu)
MSG_MAP(TestMessage, PPSMC_MSG_TestMessage, 1),
MSG_MAP(GetSmuVersion, PPSMC_MSG_GetPmfwVersion, 1),
MSG_MAP(GetDriverIfVersion, PPSMC_MSG_GetDriverIfVersion, 1),
- MSG_MAP(PowerDownVcn, PPSMC_MSG_PowerDownVcn, 1),
- MSG_MAP(PowerUpVcn, PPSMC_MSG_PowerUpVcn, 1),
- MSG_MAP(SetHardMinVcn, PPSMC_MSG_SetHardMinVcn, 1),
+ MSG_MAP(PowerDownVcn0, PPSMC_MSG_PowerDownVcn0, 1),
+ MSG_MAP(PowerUpVcn0, PPSMC_MSG_PowerUpVcn0, 1),
+ MSG_MAP(SetHardMinVcn0, PPSMC_MSG_SetHardMinVcn0, 1),
+ MSG_MAP(PowerDownVcn1, PPSMC_MSG_PowerDownVcn1, 1),
+ MSG_MAP(PowerUpVcn1, PPSMC_MSG_PowerUpVcn1, 1),
+ MSG_MAP(SetHardMinVcn1, PPSMC_MSG_SetHardMinVcn1, 1),
MSG_MAP(SetSoftMinGfxclk, PPSMC_MSG_SetSoftMinGfxclk, 1),
MSG_MAP(PrepareMp1ForUnload, PPSMC_MSG_PrepareMp1ForUnload, 1),
MSG_MAP(SetDriverDramAddrHigh, PPSMC_MSG_SetDriverDramAddrHigh, 1),
MSG_MAP(GetEnabledSmuFeatures, PPSMC_MSG_GetEnabledSmuFeatures, 1),
MSG_MAP(SetHardMinSocclkByFreq, PPSMC_MSG_SetHardMinSocclkByFreq, 1),
MSG_MAP(SetSoftMinFclk, PPSMC_MSG_SetSoftMinFclk, 1),
- MSG_MAP(SetSoftMinVcn, PPSMC_MSG_SetSoftMinVcn, 1),
+ MSG_MAP(SetSoftMinVcn0, PPSMC_MSG_SetSoftMinVcn0, 1),
+ MSG_MAP(SetSoftMinVcn1, PPSMC_MSG_SetSoftMinVcn1, 1),
MSG_MAP(EnableGfxImu, PPSMC_MSG_EnableGfxImu, 1),
MSG_MAP(AllowGfxOff, PPSMC_MSG_AllowGfxOff, 1),
MSG_MAP(DisallowGfxOff, PPSMC_MSG_DisallowGfxOff, 1),
MSG_MAP(SetHardMinGfxClk, PPSMC_MSG_SetHardMinGfxClk, 1),
MSG_MAP(SetSoftMaxSocclkByFreq, PPSMC_MSG_SetSoftMaxSocclkByFreq, 1),
MSG_MAP(SetSoftMaxFclkByFreq, PPSMC_MSG_SetSoftMaxFclkByFreq, 1),
- MSG_MAP(SetSoftMaxVcn, PPSMC_MSG_SetSoftMaxVcn, 1),
- MSG_MAP(PowerDownJpeg, PPSMC_MSG_PowerDownJpeg, 1),
- MSG_MAP(PowerUpJpeg, PPSMC_MSG_PowerUpJpeg, 1),
+ MSG_MAP(SetSoftMaxVcn0, PPSMC_MSG_SetSoftMaxVcn0, 1),
+ MSG_MAP(SetSoftMaxVcn1, PPSMC_MSG_SetSoftMaxVcn1, 1),
+ MSG_MAP(PowerDownJpeg0, PPSMC_MSG_PowerDownJpeg0, 1),
+ MSG_MAP(PowerUpJpeg0, PPSMC_MSG_PowerUpJpeg0, 1),
+ MSG_MAP(PowerDownJpeg1, PPSMC_MSG_PowerDownJpeg1, 1),
+ MSG_MAP(PowerUpJpeg1, PPSMC_MSG_PowerUpJpeg1, 1),
MSG_MAP(SetHardMinFclkByFreq, PPSMC_MSG_SetHardMinFclkByFreq, 1),
MSG_MAP(SetSoftMinSocclkByFreq, PPSMC_MSG_SetSoftMinSocclkByFreq, 1),
MSG_MAP(PowerDownIspByTile, PPSMC_MSG_PowerDownIspByTile, 1),
SMU_TABLE_INIT(tables, SMU_TABLE_WATERMARKS, sizeof(Watermarks_t),
PAGE_SIZE, AMDGPU_GEM_DOMAIN_VRAM);
- SMU_TABLE_INIT(tables, SMU_TABLE_DPMCLOCKS, sizeof(DpmClocks_t),
+ SMU_TABLE_INIT(tables, SMU_TABLE_DPMCLOCKS, max(sizeof(DpmClocks_t), sizeof(DpmClocks_t_v14_0_1)),
PAGE_SIZE, AMDGPU_GEM_DOMAIN_VRAM);
SMU_TABLE_INIT(tables, SMU_TABLE_SMU_METRICS, sizeof(SmuMetrics_t),
PAGE_SIZE, AMDGPU_GEM_DOMAIN_VRAM);
goto err0_out;
smu_table->metrics_time = 0;
- smu_table->clocks_table = kzalloc(sizeof(DpmClocks_t), GFP_KERNEL);
+ smu_table->clocks_table = kzalloc(max(sizeof(DpmClocks_t), sizeof(DpmClocks_t_v14_0_1)), GFP_KERNEL);
if (!smu_table->clocks_table)
goto err1_out;
return ret;
}
+static int smu_v14_0_1_get_dpm_freq_by_index(struct smu_context *smu,
+ enum smu_clk_type clk_type,
+ uint32_t dpm_level,
+ uint32_t *freq)
+{
+ DpmClocks_t_v14_0_1 *clk_table = smu->smu_table.clocks_table;
+
+ if (!clk_table || clk_type >= SMU_CLK_COUNT)
+ return -EINVAL;
+
+ switch (clk_type) {
+ case SMU_SOCCLK:
+ if (dpm_level >= clk_table->NumSocClkLevelsEnabled)
+ return -EINVAL;
+ *freq = clk_table->SocClocks[dpm_level];
+ break;
+ case SMU_VCLK:
+ if (dpm_level >= clk_table->Vcn0ClkLevelsEnabled)
+ return -EINVAL;
+ *freq = clk_table->VClocks0[dpm_level];
+ break;
+ case SMU_DCLK:
+ if (dpm_level >= clk_table->Vcn0ClkLevelsEnabled)
+ return -EINVAL;
+ *freq = clk_table->DClocks0[dpm_level];
+ break;
+ case SMU_VCLK1:
+ if (dpm_level >= clk_table->Vcn1ClkLevelsEnabled)
+ return -EINVAL;
+ *freq = clk_table->VClocks1[dpm_level];
+ break;
+ case SMU_DCLK1:
+ if (dpm_level >= clk_table->Vcn1ClkLevelsEnabled)
+ return -EINVAL;
+ *freq = clk_table->DClocks1[dpm_level];
+ break;
+ case SMU_UCLK:
+ case SMU_MCLK:
+ if (dpm_level >= clk_table->NumMemPstatesEnabled)
+ return -EINVAL;
+ *freq = clk_table->MemPstateTable[dpm_level].MemClk;
+ break;
+ case SMU_FCLK:
+ if (dpm_level >= clk_table->NumFclkLevelsEnabled)
+ return -EINVAL;
+ *freq = clk_table->FclkClocks_Freq[dpm_level];
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
static int smu_v14_0_0_get_dpm_freq_by_index(struct smu_context *smu,
enum smu_clk_type clk_type,
uint32_t dpm_level,
return 0;
}
+static int smu_v14_0_common_get_dpm_freq_by_index(struct smu_context *smu,
+ enum smu_clk_type clk_type,
+ uint32_t dpm_level,
+ uint32_t *freq)
+{
+ if (amdgpu_ip_version(smu->adev, MP1_HWIP, 0) == IP_VERSION(14, 0, 0))
+ smu_v14_0_0_get_dpm_freq_by_index(smu, clk_type, dpm_level, freq);
+ else if (amdgpu_ip_version(smu->adev, MP1_HWIP, 0) == IP_VERSION(14, 0, 1))
+ smu_v14_0_1_get_dpm_freq_by_index(smu, clk_type, dpm_level, freq);
+
+ return 0;
+}
+
static bool smu_v14_0_0_clk_dpm_is_enabled(struct smu_context *smu,
enum smu_clk_type clk_type)
{
break;
case SMU_VCLK:
case SMU_DCLK:
+ case SMU_VCLK1:
+ case SMU_DCLK1:
feature_id = SMU_FEATURE_VCN_DPM_BIT;
break;
default:
return smu_cmn_feature_is_enabled(smu, feature_id);
}
+static int smu_v14_0_1_get_dpm_ultimate_freq(struct smu_context *smu,
+ enum smu_clk_type clk_type,
+ uint32_t *min,
+ uint32_t *max)
+{
+ DpmClocks_t_v14_0_1 *clk_table = smu->smu_table.clocks_table;
+ uint32_t clock_limit;
+ uint32_t max_dpm_level, min_dpm_level;
+ int ret = 0;
+
+ if (!smu_v14_0_0_clk_dpm_is_enabled(smu, clk_type)) {
+ switch (clk_type) {
+ case SMU_MCLK:
+ case SMU_UCLK:
+ clock_limit = smu->smu_table.boot_values.uclk;
+ break;
+ case SMU_FCLK:
+ clock_limit = smu->smu_table.boot_values.fclk;
+ break;
+ case SMU_GFXCLK:
+ case SMU_SCLK:
+ clock_limit = smu->smu_table.boot_values.gfxclk;
+ break;
+ case SMU_SOCCLK:
+ clock_limit = smu->smu_table.boot_values.socclk;
+ break;
+ case SMU_VCLK:
+ case SMU_VCLK1:
+ clock_limit = smu->smu_table.boot_values.vclk;
+ break;
+ case SMU_DCLK:
+ case SMU_DCLK1:
+ clock_limit = smu->smu_table.boot_values.dclk;
+ break;
+ default:
+ clock_limit = 0;
+ break;
+ }
+
+ /* clock in Mhz unit */
+ if (min)
+ *min = clock_limit / 100;
+ if (max)
+ *max = clock_limit / 100;
+
+ return 0;
+ }
+
+ if (max) {
+ switch (clk_type) {
+ case SMU_GFXCLK:
+ case SMU_SCLK:
+ *max = clk_table->MaxGfxClk;
+ break;
+ case SMU_MCLK:
+ case SMU_UCLK:
+ case SMU_FCLK:
+ max_dpm_level = 0;
+ break;
+ case SMU_SOCCLK:
+ max_dpm_level = clk_table->NumSocClkLevelsEnabled - 1;
+ break;
+ case SMU_VCLK:
+ case SMU_DCLK:
+ max_dpm_level = clk_table->Vcn0ClkLevelsEnabled - 1;
+ break;
+ case SMU_VCLK1:
+ case SMU_DCLK1:
+ max_dpm_level = clk_table->Vcn1ClkLevelsEnabled - 1;
+ break;
+ default:
+ ret = -EINVAL;
+ goto failed;
+ }
+
+ if (clk_type != SMU_GFXCLK && clk_type != SMU_SCLK) {
+ ret = smu_v14_0_common_get_dpm_freq_by_index(smu, clk_type, max_dpm_level, max);
+ if (ret)
+ goto failed;
+ }
+ }
+
+ if (min) {
+ switch (clk_type) {
+ case SMU_GFXCLK:
+ case SMU_SCLK:
+ *min = clk_table->MinGfxClk;
+ break;
+ case SMU_MCLK:
+ case SMU_UCLK:
+ min_dpm_level = clk_table->NumMemPstatesEnabled - 1;
+ break;
+ case SMU_FCLK:
+ min_dpm_level = clk_table->NumFclkLevelsEnabled - 1;
+ break;
+ case SMU_SOCCLK:
+ min_dpm_level = 0;
+ break;
+ case SMU_VCLK:
+ case SMU_DCLK:
+ case SMU_VCLK1:
+ case SMU_DCLK1:
+ min_dpm_level = 0;
+ break;
+ default:
+ ret = -EINVAL;
+ goto failed;
+ }
+
+ if (clk_type != SMU_GFXCLK && clk_type != SMU_SCLK) {
+ ret = smu_v14_0_common_get_dpm_freq_by_index(smu, clk_type, min_dpm_level, min);
+ if (ret)
+ goto failed;
+ }
+ }
+
+failed:
+ return ret;
+}
+
static int smu_v14_0_0_get_dpm_ultimate_freq(struct smu_context *smu,
enum smu_clk_type clk_type,
uint32_t *min,
}
if (clk_type != SMU_GFXCLK && clk_type != SMU_SCLK) {
- ret = smu_v14_0_0_get_dpm_freq_by_index(smu, clk_type, max_dpm_level, max);
+ ret = smu_v14_0_common_get_dpm_freq_by_index(smu, clk_type, max_dpm_level, max);
if (ret)
goto failed;
}
}
if (clk_type != SMU_GFXCLK && clk_type != SMU_SCLK) {
- ret = smu_v14_0_0_get_dpm_freq_by_index(smu, clk_type, min_dpm_level, min);
+ ret = smu_v14_0_common_get_dpm_freq_by_index(smu, clk_type, min_dpm_level, min);
if (ret)
goto failed;
}
return ret;
}
+static int smu_v14_0_common_get_dpm_ultimate_freq(struct smu_context *smu,
+ enum smu_clk_type clk_type,
+ uint32_t *min,
+ uint32_t *max)
+{
+ if (amdgpu_ip_version(smu->adev, MP1_HWIP, 0) == IP_VERSION(14, 0, 0))
+ smu_v14_0_0_get_dpm_ultimate_freq(smu, clk_type, min, max);
+ else if (amdgpu_ip_version(smu->adev, MP1_HWIP, 0) == IP_VERSION(14, 0, 1))
+ smu_v14_0_1_get_dpm_ultimate_freq(smu, clk_type, min, max);
+
+ return 0;
+}
+
static int smu_v14_0_0_get_current_clk_freq(struct smu_context *smu,
enum smu_clk_type clk_type,
uint32_t *value)
return smu_v14_0_0_get_smu_metrics_data(smu, member_type, value);
}
+static int smu_v14_0_1_get_dpm_level_count(struct smu_context *smu,
+ enum smu_clk_type clk_type,
+ uint32_t *count)
+{
+ DpmClocks_t_v14_0_1 *clk_table = smu->smu_table.clocks_table;
+
+ switch (clk_type) {
+ case SMU_SOCCLK:
+ *count = clk_table->NumSocClkLevelsEnabled;
+ break;
+ case SMU_VCLK:
+ case SMU_DCLK:
+ *count = clk_table->Vcn0ClkLevelsEnabled;
+ break;
+ case SMU_VCLK1:
+ case SMU_DCLK1:
+ *count = clk_table->Vcn1ClkLevelsEnabled;
+ break;
+ case SMU_MCLK:
+ *count = clk_table->NumMemPstatesEnabled;
+ break;
+ case SMU_FCLK:
+ *count = clk_table->NumFclkLevelsEnabled;
+ break;
+ default:
+ break;
+ }
+
+ return 0;
+}
+
static int smu_v14_0_0_get_dpm_level_count(struct smu_context *smu,
enum smu_clk_type clk_type,
uint32_t *count)
return 0;
}
+static int smu_v14_0_common_get_dpm_level_count(struct smu_context *smu,
+ enum smu_clk_type clk_type,
+ uint32_t *count)
+{
+ if (amdgpu_ip_version(smu->adev, MP1_HWIP, 0) == IP_VERSION(14, 0, 0))
+ smu_v14_0_0_get_dpm_level_count(smu, clk_type, count);
+ else if (amdgpu_ip_version(smu->adev, MP1_HWIP, 0) == IP_VERSION(14, 0, 1))
+ smu_v14_0_1_get_dpm_level_count(smu, clk_type, count);
+
+ return 0;
+}
+
static int smu_v14_0_0_print_clk_levels(struct smu_context *smu,
enum smu_clk_type clk_type, char *buf)
{
case SMU_SOCCLK:
case SMU_VCLK:
case SMU_DCLK:
+ case SMU_VCLK1:
+ case SMU_DCLK1:
case SMU_MCLK:
case SMU_FCLK:
ret = smu_v14_0_0_get_current_clk_freq(smu, clk_type, &cur_value);
if (ret)
break;
- ret = smu_v14_0_0_get_dpm_level_count(smu, clk_type, &count);
+ ret = smu_v14_0_common_get_dpm_level_count(smu, clk_type, &count);
if (ret)
break;
for (i = 0; i < count; i++) {
- ret = smu_v14_0_0_get_dpm_freq_by_index(smu, clk_type, i, &value);
+ ret = smu_v14_0_common_get_dpm_freq_by_index(smu, clk_type, i, &value);
if (ret)
break;
break;
case SMU_VCLK:
case SMU_DCLK:
- msg_set_min = SMU_MSG_SetHardMinVcn;
- msg_set_max = SMU_MSG_SetSoftMaxVcn;
+ msg_set_min = SMU_MSG_SetHardMinVcn0;
+ msg_set_max = SMU_MSG_SetSoftMaxVcn0;
+ break;
+ case SMU_VCLK1:
+ case SMU_DCLK1:
+ msg_set_min = SMU_MSG_SetHardMinVcn1;
+ msg_set_max = SMU_MSG_SetSoftMaxVcn1;
break;
default:
return -EINVAL;
case SMU_FCLK:
case SMU_VCLK:
case SMU_DCLK:
- ret = smu_v14_0_0_get_dpm_freq_by_index(smu, clk_type, soft_min_level, &min_freq);
+ ret = smu_v14_0_common_get_dpm_freq_by_index(smu, clk_type, soft_min_level, &min_freq);
if (ret)
break;
- ret = smu_v14_0_0_get_dpm_freq_by_index(smu, clk_type, soft_max_level, &max_freq);
+ ret = smu_v14_0_common_get_dpm_freq_by_index(smu, clk_type, soft_max_level, &max_freq);
if (ret)
break;
switch (level) {
case AMD_DPM_FORCED_LEVEL_HIGH:
- smu_v14_0_0_get_dpm_ultimate_freq(smu, SMU_SCLK, NULL, &sclk_max);
- smu_v14_0_0_get_dpm_ultimate_freq(smu, SMU_FCLK, NULL, &fclk_max);
- smu_v14_0_0_get_dpm_ultimate_freq(smu, SMU_SOCCLK, NULL, &socclk_max);
+ smu_v14_0_common_get_dpm_ultimate_freq(smu, SMU_SCLK, NULL, &sclk_max);
+ smu_v14_0_common_get_dpm_ultimate_freq(smu, SMU_FCLK, NULL, &fclk_max);
+ smu_v14_0_common_get_dpm_ultimate_freq(smu, SMU_SOCCLK, NULL, &socclk_max);
sclk_min = sclk_max;
fclk_min = fclk_max;
socclk_min = socclk_max;
break;
case AMD_DPM_FORCED_LEVEL_LOW:
- smu_v14_0_0_get_dpm_ultimate_freq(smu, SMU_SCLK, &sclk_min, NULL);
- smu_v14_0_0_get_dpm_ultimate_freq(smu, SMU_FCLK, &fclk_min, NULL);
- smu_v14_0_0_get_dpm_ultimate_freq(smu, SMU_SOCCLK, &socclk_min, NULL);
+ smu_v14_0_common_get_dpm_ultimate_freq(smu, SMU_SCLK, &sclk_min, NULL);
+ smu_v14_0_common_get_dpm_ultimate_freq(smu, SMU_FCLK, &fclk_min, NULL);
+ smu_v14_0_common_get_dpm_ultimate_freq(smu, SMU_SOCCLK, &socclk_min, NULL);
sclk_max = sclk_min;
fclk_max = fclk_min;
socclk_max = socclk_min;
break;
case AMD_DPM_FORCED_LEVEL_AUTO:
- smu_v14_0_0_get_dpm_ultimate_freq(smu, SMU_SCLK, &sclk_min, &sclk_max);
- smu_v14_0_0_get_dpm_ultimate_freq(smu, SMU_FCLK, &fclk_min, &fclk_max);
- smu_v14_0_0_get_dpm_ultimate_freq(smu, SMU_SOCCLK, &socclk_min, &socclk_max);
+ smu_v14_0_common_get_dpm_ultimate_freq(smu, SMU_SCLK, &sclk_min, &sclk_max);
+ smu_v14_0_common_get_dpm_ultimate_freq(smu, SMU_FCLK, &fclk_min, &fclk_max);
+ smu_v14_0_common_get_dpm_ultimate_freq(smu, SMU_SOCCLK, &socclk_min, &socclk_max);
break;
case AMD_DPM_FORCED_LEVEL_PROFILE_STANDARD:
case AMD_DPM_FORCED_LEVEL_PROFILE_MIN_SCLK:
return ret;
}
+static int smu_v14_0_1_set_fine_grain_gfx_freq_parameters(struct smu_context *smu)
+{
+ DpmClocks_t_v14_0_1 *clk_table = smu->smu_table.clocks_table;
+
+ smu->gfx_default_hard_min_freq = clk_table->MinGfxClk;
+ smu->gfx_default_soft_max_freq = clk_table->MaxGfxClk;
+ smu->gfx_actual_hard_min_freq = 0;
+ smu->gfx_actual_soft_max_freq = 0;
+
+ return 0;
+}
+
static int smu_v14_0_0_set_fine_grain_gfx_freq_parameters(struct smu_context *smu)
{
DpmClocks_t *clk_table = smu->smu_table.clocks_table;
return 0;
}
+static int smu_v14_0_common_set_fine_grain_gfx_freq_parameters(struct smu_context *smu)
+{
+ if (amdgpu_ip_version(smu->adev, MP1_HWIP, 0) == IP_VERSION(14, 0, 0))
+ smu_v14_0_0_set_fine_grain_gfx_freq_parameters(smu);
+ else if (amdgpu_ip_version(smu->adev, MP1_HWIP, 0) == IP_VERSION(14, 0, 1))
+ smu_v14_0_1_set_fine_grain_gfx_freq_parameters(smu);
+
+ return 0;
+}
+
static int smu_v14_0_0_set_vpe_enable(struct smu_context *smu,
bool enable)
{
0, NULL);
}
+static int smu_14_0_1_get_dpm_table(struct smu_context *smu, struct dpm_clocks *clock_table)
+{
+ DpmClocks_t_v14_0_1 *clk_table = smu->smu_table.clocks_table;
+ uint8_t idx;
+
+ /* Only the Clock information of SOC and VPE is copied to provide VPE DPM settings for use. */
+ for (idx = 0; idx < NUM_SOCCLK_DPM_LEVELS; idx++) {
+ clock_table->SocClocks[idx].Freq = (idx < clk_table->NumSocClkLevelsEnabled) ? clk_table->SocClocks[idx]:0;
+ clock_table->SocClocks[idx].Vol = 0;
+ }
+
+ for (idx = 0; idx < NUM_VPE_DPM_LEVELS; idx++) {
+ clock_table->VPEClocks[idx].Freq = (idx < clk_table->VpeClkLevelsEnabled) ? clk_table->VPEClocks[idx]:0;
+ clock_table->VPEClocks[idx].Vol = 0;
+ }
+
+ return 0;
+}
+
static int smu_14_0_0_get_dpm_table(struct smu_context *smu, struct dpm_clocks *clock_table)
{
DpmClocks_t *clk_table = smu->smu_table.clocks_table;
return 0;
}
+static int smu_v14_0_common_get_dpm_table(struct smu_context *smu, struct dpm_clocks *clock_table)
+{
+ if (amdgpu_ip_version(smu->adev, MP1_HWIP, 0) == IP_VERSION(14, 0, 0))
+ smu_14_0_0_get_dpm_table(smu, clock_table);
+ else if (amdgpu_ip_version(smu->adev, MP1_HWIP, 0) == IP_VERSION(14, 0, 1))
+ smu_14_0_1_get_dpm_table(smu, clock_table);
+
+ return 0;
+}
+
static const struct pptable_funcs smu_v14_0_0_ppt_funcs = {
.check_fw_status = smu_v14_0_check_fw_status,
.check_fw_version = smu_v14_0_check_fw_version,
.set_driver_table_location = smu_v14_0_set_driver_table_location,
.gfx_off_control = smu_v14_0_gfx_off_control,
.mode2_reset = smu_v14_0_0_mode2_reset,
- .get_dpm_ultimate_freq = smu_v14_0_0_get_dpm_ultimate_freq,
+ .get_dpm_ultimate_freq = smu_v14_0_common_get_dpm_ultimate_freq,
.od_edit_dpm_table = smu_v14_0_od_edit_dpm_table,
.print_clk_levels = smu_v14_0_0_print_clk_levels,
.force_clk_levels = smu_v14_0_0_force_clk_levels,
.set_performance_level = smu_v14_0_0_set_performance_level,
- .set_fine_grain_gfx_freq_parameters = smu_v14_0_0_set_fine_grain_gfx_freq_parameters,
+ .set_fine_grain_gfx_freq_parameters = smu_v14_0_common_set_fine_grain_gfx_freq_parameters,
.set_gfx_power_up_by_imu = smu_v14_0_set_gfx_power_up_by_imu,
.dpm_set_vpe_enable = smu_v14_0_0_set_vpe_enable,
.dpm_set_umsch_mm_enable = smu_v14_0_0_set_umsch_mm_enable,
- .get_dpm_clock_table = smu_14_0_0_get_dpm_table,
+ .get_dpm_clock_table = smu_v14_0_common_get_dpm_table,
};
static void smu_v14_0_0_set_smu_mailbox_registers(struct smu_context *smu)
{
struct ast_device *ast = to_ast_device(dev);
u8 video_on_off = on;
+ u32 i = 0;
// Video On/Off
ast_set_index_reg_mask(ast, AST_IO_VGACRI, 0xE3, (u8) ~AST_DP_VIDEO_ENABLE, on);
ASTDP_MIRROR_VIDEO_ENABLE) != video_on_off) {
// wait 1 ms
mdelay(1);
+ if (++i > 200)
+ break;
}
}
}
* @adapter: I2C adapter for the DDC bus
* @offset: register offset
* @buffer: buffer for return data
- * @size: sizo of the buffer
+ * @size: size of the buffer
*
* Reads @size bytes from the DP dual mode adaptor registers
* starting at @offset.
* @adapter: I2C adapter for the DDC bus
* @offset: register offset
* @buffer: buffer for write data
- * @size: sizo of the buffer
+ * @size: size of the buffer
*
* Writes @size bytes to the DP dual mode adaptor registers
* starting at @offset.
u32 overhead = 1000000;
int symbol_cycles;
+ if (lane_count == 0 || hactive == 0 || bpp_x16 == 0) {
+ DRM_DEBUG_KMS("Invalid BW overhead params: lane_count %d, hactive %d, bpp_x16 %d.%04d\n",
+ lane_count, hactive,
+ bpp_x16 >> 4, (bpp_x16 & 0xf) * 625);
+ return 0;
+ }
+
/*
* DP Standard v2.1 2.6.4.1
* SSC downspread and ref clock variation margin:
unsigned int total_modes_count = 0;
struct drm_client_offset *offsets;
unsigned int connector_count = 0;
+ /* points to modes protected by mode_config.mutex */
struct drm_display_mode **modes;
struct drm_crtc **crtcs;
int i, ret = 0;
drm_client_pick_crtcs(client, connectors, connector_count,
crtcs, modes, 0, width, height);
}
- mutex_unlock(&dev->mode_config.mutex);
drm_client_modeset_release(client);
modeset->y = offset->y;
}
}
+ mutex_unlock(&dev->mode_config.mutex);
mutex_unlock(&client->modeset_mutex);
out:
__drm_atomic_helper_plane_duplicate_state(plane, &new_shadow_plane_state->base);
- drm_format_conv_state_copy(&shadow_plane_state->fmtcnv_state,
- &new_shadow_plane_state->fmtcnv_state);
+ drm_format_conv_state_copy(&new_shadow_plane_state->fmtcnv_state,
+ &shadow_plane_state->fmtcnv_state);
}
EXPORT_SYMBOL(__drm_gem_duplicate_shadow_plane_state);
{
struct drm_gem_object *obj = dma_buf->priv;
- if (!obj->funcs->get_sg_table)
+ /*
+ * drm_gem_map_dma_buf() requires obj->get_sg_table(), but drivers
+ * that implement their own ->map_dma_buf() do not.
+ */
+ if (dma_buf->ops->map_dma_buf == drm_gem_map_dma_buf &&
+ !obj->funcs->get_sg_table)
return -ENOSYS;
return drm_gem_pin(obj);
*value = gpu->identity.eco_id;
break;
- case ETNAVIV_PARAM_GPU_NN_CORE_COUNT:
- *value = gpu->identity.nn_core_count;
- break;
-
- case ETNAVIV_PARAM_GPU_NN_MAD_PER_CORE:
- *value = gpu->identity.nn_mad_per_core;
- break;
-
- case ETNAVIV_PARAM_GPU_TP_CORE_COUNT:
- *value = gpu->identity.tp_core_count;
- break;
-
- case ETNAVIV_PARAM_GPU_ON_CHIP_SRAM_SIZE:
- *value = gpu->identity.on_chip_sram_size;
- break;
-
- case ETNAVIV_PARAM_GPU_AXI_SRAM_SIZE:
- *value = gpu->identity.axi_sram_size;
- break;
-
default:
DBG("%s: invalid param: %u", dev_name(gpu->dev), param);
return -EINVAL;
/* Disable TX clock gating on affected core revisions. */
if (etnaviv_is_model_rev(gpu, GC4000, 0x5222) ||
etnaviv_is_model_rev(gpu, GC2000, 0x5108) ||
- etnaviv_is_model_rev(gpu, GC2000, 0x6202) ||
- etnaviv_is_model_rev(gpu, GC2000, 0x6203))
+ etnaviv_is_model_rev(gpu, GC7000, 0x6202) ||
+ etnaviv_is_model_rev(gpu, GC7000, 0x6203))
pmc |= VIVS_PM_MODULE_CONTROLS_DISABLE_MODULE_CLOCK_GATING_TX;
/* Disable SE and RA clock gating on affected core revisions. */
/* Number of Neural Network cores. */
u32 nn_core_count;
- /* Number of MAD units per Neural Network core. */
- u32 nn_mad_per_core;
-
- /* Number of Tensor Processing cores. */
- u32 tp_core_count;
-
- /* Size in bytes of the SRAM inside the NPU. */
- u32 on_chip_sram_size;
-
- /* Size in bytes of the SRAM across the AXI bus. */
- u32 axi_sram_size;
-
/* Size of the vertex cache. */
u32 vertex_cache_size;
.thread_count = 128,
.shader_core_count = 1,
.nn_core_count = 0,
- .nn_mad_per_core = 0,
- .tp_core_count = 0,
- .on_chip_sram_size = 0,
- .axi_sram_size = 0,
.vertex_cache_size = 8,
.vertex_output_buffer_size = 1024,
.pixel_pipes = 1,
.register_max = 64,
.thread_count = 256,
.shader_core_count = 1,
- .nn_core_count = 0,
- .nn_mad_per_core = 0,
- .tp_core_count = 0,
- .on_chip_sram_size = 0,
- .axi_sram_size = 0,
.vertex_cache_size = 8,
.vertex_output_buffer_size = 512,
.pixel_pipes = 1,
.thread_count = 512,
.shader_core_count = 2,
.nn_core_count = 0,
- .nn_mad_per_core = 0,
- .tp_core_count = 0,
- .on_chip_sram_size = 0,
- .axi_sram_size = 0,
.vertex_cache_size = 16,
.vertex_output_buffer_size = 1024,
.pixel_pipes = 1,
.thread_count = 512,
.shader_core_count = 2,
.nn_core_count = 0,
- .nn_mad_per_core = 0,
- .tp_core_count = 0,
- .on_chip_sram_size = 0,
- .axi_sram_size = 0,
.vertex_cache_size = 16,
.vertex_output_buffer_size = 1024,
.pixel_pipes = 1,
.register_max = 64,
.thread_count = 512,
.shader_core_count = 2,
- .nn_core_count = 0,
- .nn_mad_per_core = 0,
- .tp_core_count = 0,
- .on_chip_sram_size = 0,
- .axi_sram_size = 0,
.vertex_cache_size = 16,
.vertex_output_buffer_size = 1024,
.pixel_pipes = 1,
.thread_count = 1024,
.shader_core_count = 4,
.nn_core_count = 0,
- .nn_mad_per_core = 0,
- .tp_core_count = 0,
- .on_chip_sram_size = 0,
- .axi_sram_size = 0,
.vertex_cache_size = 16,
.vertex_output_buffer_size = 1024,
.pixel_pipes = 2,
.thread_count = 256,
.shader_core_count = 1,
.nn_core_count = 8,
- .nn_mad_per_core = 64,
- .tp_core_count = 4,
- .on_chip_sram_size = 524288,
- .axi_sram_size = 1048576,
.vertex_cache_size = 16,
.vertex_output_buffer_size = 1024,
.pixel_pipes = 1,
.thread_count = 256,
.shader_core_count = 1,
.nn_core_count = 6,
- .nn_mad_per_core = 64,
- .tp_core_count = 3,
- .on_chip_sram_size = 262144,
- .axi_sram_size = 0,
.vertex_cache_size = 16,
.vertex_output_buffer_size = 1024,
.pixel_pipes = 1,
psb_intel_lvds.o \
psb_intel_modes.o \
psb_intel_sdvo.o \
- psb_lid.o \
psb_irq.o
gma500_gfx-$(CONFIG_ACPI) += opregion.o
}
psb_intel_lvds_set_brightness(dev, PSB_MAX_BRIGHTNESS);
- /* This must occur after the backlight is properly initialised */
- psb_lid_timer_init(dev_priv);
+
return 0;
}
static void psb_chip_teardown(struct drm_device *dev)
{
- struct drm_psb_private *dev_priv = to_drm_psb_private(dev);
- psb_lid_timer_takedown(dev_priv);
gma_intel_teardown_gmbus(dev);
}
#define PSB_NUM_VBLANKS 2
#define PSB_WATCHDOG_DELAY (HZ * 2)
-#define PSB_LID_DELAY (HZ / 10)
#define PSB_MAX_BRIGHTNESS 100
/* Hotplug handling */
struct work_struct hotplug_work;
- /* LID-Switch */
- spinlock_t lid_lock;
- struct timer_list lid_timer;
struct psb_intel_opregion opregion;
- u32 lid_last_state;
/* Watchdog */
uint32_t apm_reg;
int i2c_bus; /* I2C bus identifier for Moorestown */
};
-/* psb_lid.c */
-extern void psb_lid_timer_init(struct drm_psb_private *dev_priv);
-extern void psb_lid_timer_takedown(struct drm_psb_private *dev_priv);
-
/* modesetting */
extern void psb_modeset_init(struct drm_device *dev);
extern void psb_modeset_cleanup(struct drm_device *dev);
+++ /dev/null
-// SPDX-License-Identifier: GPL-2.0-only
-/**************************************************************************
- * Copyright (c) 2007, Intel Corporation.
- *
- * Authors: Thomas Hellstrom <thomas-at-tungstengraphics-dot-com>
- **************************************************************************/
-
-#include <linux/spinlock.h>
-
-#include "psb_drv.h"
-#include "psb_intel_reg.h"
-#include "psb_reg.h"
-
-static void psb_lid_timer_func(struct timer_list *t)
-{
- struct drm_psb_private *dev_priv = from_timer(dev_priv, t, lid_timer);
- struct drm_device *dev = (struct drm_device *)&dev_priv->dev;
- struct timer_list *lid_timer = &dev_priv->lid_timer;
- unsigned long irq_flags;
- u32 __iomem *lid_state = dev_priv->opregion.lid_state;
- u32 pp_status;
-
- if (readl(lid_state) == dev_priv->lid_last_state)
- goto lid_timer_schedule;
-
- if ((readl(lid_state)) & 0x01) {
- /*lid state is open*/
- REG_WRITE(PP_CONTROL, REG_READ(PP_CONTROL) | POWER_TARGET_ON);
- do {
- pp_status = REG_READ(PP_STATUS);
- } while ((pp_status & PP_ON) == 0 &&
- (pp_status & PP_SEQUENCE_MASK) != 0);
-
- if (REG_READ(PP_STATUS) & PP_ON) {
- /*FIXME: should be backlight level before*/
- psb_intel_lvds_set_brightness(dev, 100);
- } else {
- DRM_DEBUG("LVDS panel never powered up");
- return;
- }
- } else {
- psb_intel_lvds_set_brightness(dev, 0);
-
- REG_WRITE(PP_CONTROL, REG_READ(PP_CONTROL) & ~POWER_TARGET_ON);
- do {
- pp_status = REG_READ(PP_STATUS);
- } while ((pp_status & PP_ON) == 0);
- }
- dev_priv->lid_last_state = readl(lid_state);
-
-lid_timer_schedule:
- spin_lock_irqsave(&dev_priv->lid_lock, irq_flags);
- if (!timer_pending(lid_timer)) {
- lid_timer->expires = jiffies + PSB_LID_DELAY;
- add_timer(lid_timer);
- }
- spin_unlock_irqrestore(&dev_priv->lid_lock, irq_flags);
-}
-
-void psb_lid_timer_init(struct drm_psb_private *dev_priv)
-{
- struct timer_list *lid_timer = &dev_priv->lid_timer;
- unsigned long irq_flags;
-
- spin_lock_init(&dev_priv->lid_lock);
- spin_lock_irqsave(&dev_priv->lid_lock, irq_flags);
-
- timer_setup(lid_timer, psb_lid_timer_func, 0);
-
- lid_timer->expires = jiffies + PSB_LID_DELAY;
-
- add_timer(lid_timer);
- spin_unlock_irqrestore(&dev_priv->lid_lock, irq_flags);
-}
-
-void psb_lid_timer_takedown(struct drm_psb_private *dev_priv)
-{
- del_timer_sync(&dev_priv->lid_timer);
-}
-
subdir-ccflags-$(CONFIG_DRM_I915_WERROR) += -Werror
# Fine grained warnings disable
-CFLAGS_i915_pci.o = $(call cc-disable-warning, override-init)
-CFLAGS_display/intel_display_device.o = $(call cc-disable-warning, override-init)
-CFLAGS_display/intel_fbdev.o = $(call cc-disable-warning, override-init)
+CFLAGS_i915_pci.o = -Wno-override-init
+CFLAGS_display/intel_display_device.o = -Wno-override-init
+CFLAGS_display/intel_fbdev.o = -Wno-override-init
# Support compiling the display code separately for both i915 and xe
# drivers. Define I915 when building i915.
gt/intel_ggtt_fencing.o \
gt/intel_gt.o \
gt/intel_gt_buffer_pool.o \
+ gt/intel_gt_ccs_mode.o \
gt/intel_gt_clock_utils.o \
gt/intel_gt_debugfs.o \
gt/intel_gt_engines_debugfs.o \
{
intel_enable_dp(state, encoder, pipe_config, conn_state);
intel_edp_backlight_on(pipe_config, conn_state);
- encoder->audio_enable(encoder, pipe_config, conn_state);
}
static void vlv_enable_dp(struct intel_atomic_state *state,
const struct drm_connector_state *conn_state)
{
intel_edp_backlight_on(pipe_config, conn_state);
- encoder->audio_enable(encoder, pipe_config, conn_state);
}
static void g4x_pre_enable_dp(struct intel_atomic_state *state,
}
intel_dsi_vbt_exec_sequence(intel_dsi, MIPI_SEQ_INIT_OTP);
- intel_dsi_vbt_exec_sequence(intel_dsi, MIPI_SEQ_DISPLAY_ON);
/* ensure all panel commands dispatched before enabling transcoder */
wait_for_cmds_dispatched_to_panel(encoder);
/* step6d: enable dsi transcoder */
gen11_dsi_enable_transcoder(encoder);
+ intel_dsi_vbt_exec_sequence(intel_dsi, MIPI_SEQ_DISPLAY_ON);
+
/* step7: enable backlight */
intel_backlight_enable(crtc_state, conn_state);
intel_dsi_vbt_exec_sequence(intel_dsi, MIPI_SEQ_BACKLIGHT_ON);
* these devices we split the init OTP sequence into a deassert sequence and
* the actual init OTP part.
*/
-static void fixup_mipi_sequences(struct drm_i915_private *i915,
- struct intel_panel *panel)
+static void vlv_fixup_mipi_sequences(struct drm_i915_private *i915,
+ struct intel_panel *panel)
{
u8 *init_otp;
int len;
- /* Limit this to VLV for now. */
- if (!IS_VALLEYVIEW(i915))
- return;
-
/* Limit this to v1 vid-mode sequences */
if (panel->vbt.dsi.config->is_cmd_mode ||
panel->vbt.dsi.seq_version != 1)
panel->vbt.dsi.sequence[MIPI_SEQ_INIT_OTP] = init_otp + len - 1;
}
+/*
+ * Some machines (eg. Lenovo 82TQ) appear to have broken
+ * VBT sequences:
+ * - INIT_OTP is not present at all
+ * - what should be in INIT_OTP is in DISPLAY_ON
+ * - what should be in DISPLAY_ON is in BACKLIGHT_ON
+ * (along with the actual backlight stuff)
+ *
+ * To make those work we simply swap DISPLAY_ON and INIT_OTP.
+ *
+ * TODO: Do we need to limit this to specific machines,
+ * or examine the contents of the sequences to
+ * avoid false positives?
+ */
+static void icl_fixup_mipi_sequences(struct drm_i915_private *i915,
+ struct intel_panel *panel)
+{
+ if (!panel->vbt.dsi.sequence[MIPI_SEQ_INIT_OTP] &&
+ panel->vbt.dsi.sequence[MIPI_SEQ_DISPLAY_ON]) {
+ drm_dbg_kms(&i915->drm, "Broken VBT: Swapping INIT_OTP and DISPLAY_ON sequences\n");
+
+ swap(panel->vbt.dsi.sequence[MIPI_SEQ_INIT_OTP],
+ panel->vbt.dsi.sequence[MIPI_SEQ_DISPLAY_ON]);
+ }
+}
+
+static void fixup_mipi_sequences(struct drm_i915_private *i915,
+ struct intel_panel *panel)
+{
+ if (DISPLAY_VER(i915) >= 11)
+ icl_fixup_mipi_sequences(i915, panel);
+ else if (IS_VALLEYVIEW(i915))
+ vlv_fixup_mipi_sequences(i915, panel);
+}
+
static void
parse_mipi_sequence(struct drm_i915_private *i915,
struct intel_panel *panel)
{
const struct child_device_config *child = &devdata->child;
+ if (!devdata)
+ return false;
+
if (!intel_bios_encoder_supports_dp(devdata) ||
!intel_bios_encoder_supports_hdmi(devdata))
return false;
intel_atomic_get_old_cdclk_state(state);
const struct intel_cdclk_state *new_cdclk_state =
intel_atomic_get_new_cdclk_state(state);
- enum pipe pipe = new_cdclk_state->pipe;
+ struct intel_cdclk_config cdclk_config;
+ enum pipe pipe;
if (!intel_cdclk_changed(&old_cdclk_state->actual,
&new_cdclk_state->actual))
if (IS_DG2(i915))
intel_cdclk_pcode_pre_notify(state);
- if (pipe == INVALID_PIPE ||
- old_cdclk_state->actual.cdclk <= new_cdclk_state->actual.cdclk) {
- drm_WARN_ON(&i915->drm, !new_cdclk_state->base.changed);
+ if (new_cdclk_state->disable_pipes) {
+ cdclk_config = new_cdclk_state->actual;
+ pipe = INVALID_PIPE;
+ } else {
+ if (new_cdclk_state->actual.cdclk >= old_cdclk_state->actual.cdclk) {
+ cdclk_config = new_cdclk_state->actual;
+ pipe = new_cdclk_state->pipe;
+ } else {
+ cdclk_config = old_cdclk_state->actual;
+ pipe = INVALID_PIPE;
+ }
- intel_set_cdclk(i915, &new_cdclk_state->actual, pipe);
+ cdclk_config.voltage_level = max(new_cdclk_state->actual.voltage_level,
+ old_cdclk_state->actual.voltage_level);
}
+
+ drm_WARN_ON(&i915->drm, !new_cdclk_state->base.changed);
+
+ intel_set_cdclk(i915, &cdclk_config, pipe);
}
/**
intel_atomic_get_old_cdclk_state(state);
const struct intel_cdclk_state *new_cdclk_state =
intel_atomic_get_new_cdclk_state(state);
- enum pipe pipe = new_cdclk_state->pipe;
+ enum pipe pipe;
if (!intel_cdclk_changed(&old_cdclk_state->actual,
&new_cdclk_state->actual))
if (IS_DG2(i915))
intel_cdclk_pcode_post_notify(state);
- if (pipe != INVALID_PIPE &&
- old_cdclk_state->actual.cdclk > new_cdclk_state->actual.cdclk) {
- drm_WARN_ON(&i915->drm, !new_cdclk_state->base.changed);
+ if (!new_cdclk_state->disable_pipes &&
+ new_cdclk_state->actual.cdclk < old_cdclk_state->actual.cdclk)
+ pipe = new_cdclk_state->pipe;
+ else
+ pipe = INVALID_PIPE;
+
+ drm_WARN_ON(&i915->drm, !new_cdclk_state->base.changed);
- intel_set_cdclk(i915, &new_cdclk_state->actual, pipe);
- }
+ intel_set_cdclk(i915, &new_cdclk_state->actual, pipe);
}
static int intel_pixel_rate_to_cdclk(const struct intel_crtc_state *crtc_state)
return NULL;
cdclk_state->pipe = INVALID_PIPE;
+ cdclk_state->disable_pipes = false;
return &cdclk_state->base;
}
if (ret)
return ret;
+ new_cdclk_state->disable_pipes = true;
+
drm_dbg_kms(&dev_priv->drm,
"Modeset required for cdclk change\n");
}
/* bitmask of active pipes */
u8 active_pipes;
+
+ /* update cdclk with pipes disabled */
+ bool disable_pipes;
};
int intel_crtc_compute_min_cdclk(const struct intel_crtc_state *crtc_state);
{
struct drm_i915_private *dev_priv =
to_i915(plane_state->uapi.plane->dev);
- const struct drm_framebuffer *fb = plane_state->hw.fb;
- struct drm_i915_gem_object *obj = intel_fb_obj(fb);
u32 base;
if (DISPLAY_INFO(dev_priv)->cursor_needs_physical)
- base = i915_gem_object_get_dma_address(obj, 0);
+ base = plane_state->phys_dma_addr;
else
base = intel_plane_ggtt_offset(plane_state);
static bool crtcs_port_sync_compatible(const struct intel_crtc_state *crtc_state1,
const struct intel_crtc_state *crtc_state2)
{
+ /*
+ * FIXME the modeset sequence is currently wrong and
+ * can't deal with bigjoiner + port sync at the same time.
+ */
return crtc_state1->hw.active && crtc_state2->hw.active &&
+ !crtc_state1->bigjoiner_pipes && !crtc_state2->bigjoiner_pipes &&
crtc_state1->output_types == crtc_state2->output_types &&
crtc_state1->output_format == crtc_state2->output_format &&
crtc_state1->lane_count == crtc_state2->lane_count &&
*/
intel_de_write(dev_priv, PIPESRC(pipe),
PIPESRC_WIDTH(width - 1) | PIPESRC_HEIGHT(height - 1));
-
- if (!crtc_state->enable_psr2_su_region_et)
- return;
-
- width = drm_rect_width(&crtc_state->psr2_su_area);
- height = drm_rect_height(&crtc_state->psr2_su_area);
-
- intel_de_write(dev_priv, PIPE_SRCSZ_ERLY_TPT(pipe),
- PIPESRC_WIDTH(width - 1) | PIPESRC_HEIGHT(height - 1));
}
static bool intel_pipe_is_interlaced(const struct intel_crtc_state *crtc_state)
#define HAS_DPT(i915) (DISPLAY_VER(i915) >= 13)
#define HAS_DSB(i915) (DISPLAY_INFO(i915)->has_dsb)
#define HAS_DSC(__i915) (DISPLAY_RUNTIME_INFO(__i915)->has_dsc)
+#define HAS_DSC_MST(__i915) (DISPLAY_VER(__i915) >= 12 && HAS_DSC(__i915))
#define HAS_FBC(i915) (DISPLAY_RUNTIME_INFO(i915)->fbc_mask != 0)
#define HAS_FPGA_DBG_UNCLAIMED(i915) (DISPLAY_INFO(i915)->has_fpga_dbg)
#define HAS_FW_BLC(i915) (DISPLAY_VER(i915) >= 3)
#define PLANE_HAS_FENCE BIT(0)
struct intel_fb_view view;
+ u32 phys_dma_addr; /* for cursor_needs_physical */
/* Plane pxp decryption state */
bool decrypt;
u32 psr2_man_track_ctl;
+ u32 pipe_srcsz_early_tpt;
+
struct drm_rect psr2_su_area;
/* Variable Refresh Rate state */
#include "intel_dp_tunnel.h"
#include "intel_dpio_phy.h"
#include "intel_dpll.h"
+#include "intel_drrs.h"
#include "intel_fifo_underrun.h"
#include "intel_hdcp.h"
#include "intel_hdmi.h"
/* The values must be in increasing order */
static const int mtl_rates[] = {
162000, 216000, 243000, 270000, 324000, 432000, 540000, 675000,
- 810000, 1000000, 1350000, 2000000,
+ 810000, 1000000, 2000000,
};
static const int icl_rates[] = {
162000, 216000, 270000, 324000, 432000, 540000, 648000, 810000,
if (DISPLAY_VER(dev_priv) >= 12)
return true;
- if (DISPLAY_VER(dev_priv) == 11 && encoder->port != PORT_A)
+ if (DISPLAY_VER(dev_priv) == 11 && encoder->port != PORT_A &&
+ !intel_crtc_has_type(pipe_config, INTEL_OUTPUT_DP_MST))
return true;
return false;
dsc_max_bpp = min(dsc_max_bpp, pipe_bpp - 1);
for (i = 0; i < ARRAY_SIZE(valid_dsc_bpp); i++) {
- if (valid_dsc_bpp[i] < dsc_min_bpp ||
- valid_dsc_bpp[i] > dsc_max_bpp)
+ if (valid_dsc_bpp[i] < dsc_min_bpp)
+ continue;
+ if (valid_dsc_bpp[i] > dsc_max_bpp)
break;
ret = dsc_compute_link_config(intel_dp,
intel_hdmi_infoframe_enable(HDMI_PACKET_TYPE_GAMUT_METADATA);
}
-static bool cpu_transcoder_has_drrs(struct drm_i915_private *i915,
- enum transcoder cpu_transcoder)
-{
- if (HAS_DOUBLE_BUFFERED_M_N(i915))
- return true;
-
- return intel_cpu_transcoder_has_m2_n2(i915, cpu_transcoder);
-}
-
static bool can_enable_drrs(struct intel_connector *connector,
const struct intel_crtc_state *pipe_config,
const struct drm_display_mode *downclock_mode)
if (pipe_config->has_pch_encoder)
return false;
- if (!cpu_transcoder_has_drrs(i915, pipe_config->cpu_transcoder))
+ if (!intel_cpu_transcoder_has_drrs(i915, pipe_config->cpu_transcoder))
return false;
return downclock_mode &&
intel_panel_downclock_mode(connector, &pipe_config->hw.adjusted_mode);
int pixel_clock;
- if (has_seamless_m_n(connector))
+ /*
+ * FIXME all joined pipes share the same transcoder.
+ * Need to account for that when updating M/N live.
+ */
+ if (has_seamless_m_n(connector) && !pipe_config->bigjoiner_pipes)
pipe_config->update_m_n = true;
if (!can_enable_drrs(connector, pipe_config, downclock_mode)) {
intel_connector->get_hw_state = intel_ddi_connector_get_hw_state;
else
intel_connector->get_hw_state = intel_connector_get_hw_state;
+ intel_connector->sync_state = intel_dp_connector_sync_state;
if (!intel_edp_init_connector(intel_dp, intel_connector)) {
intel_dp_aux_fini(intel_dp);
u8 bcaps;
int ret;
+ *hdcp_capable = false;
+ *hdcp2_capable = false;
if (!intel_encoder_is_mst(connector->encoder))
return -EINVAL;
ret = _intel_dp_hdcp2_get_capability(aux, hdcp2_capable);
if (ret)
- return ret;
+ drm_dbg_kms(&i915->drm,
+ "HDCP2 DPCD capability read failed err: %d\n", ret);
ret = intel_dp_hdcp_read_bcaps(aux, i915, &bcaps);
if (ret)
return 0;
}
- if (DISPLAY_VER(dev_priv) >= 10 &&
+ if (HAS_DSC_MST(dev_priv) &&
drm_dp_sink_supports_dsc(intel_connector->dp.dsc_dpcd)) {
/*
* TBD pass the connector BPC,
static bool
ehl_combo_pll_div_frac_wa_needed(struct drm_i915_private *i915)
{
- return (((IS_ELKHARTLAKE(i915) || IS_JASPERLAKE(i915)) &&
+ return ((IS_ELKHARTLAKE(i915) &&
IS_DISPLAY_STEP(i915, STEP_B0, STEP_FOREVER)) ||
IS_TIGERLAKE(i915) || IS_ALDERLAKE_S(i915) || IS_ALDERLAKE_P(i915)) &&
i915->display.dpll.ref_clks.nssc == 38400;
return str[drrs_type];
}
+bool intel_cpu_transcoder_has_drrs(struct drm_i915_private *i915,
+ enum transcoder cpu_transcoder)
+{
+ if (HAS_DOUBLE_BUFFERED_M_N(i915))
+ return true;
+
+ return intel_cpu_transcoder_has_m2_n2(i915, cpu_transcoder);
+}
+
static void
intel_drrs_set_refresh_rate_pipeconf(struct intel_crtc *crtc,
enum drrs_refresh_rate refresh_rate)
mutex_lock(&crtc->drrs.mutex);
seq_printf(m, "DRRS capable: %s\n",
- str_yes_no(crtc_state->has_drrs ||
- HAS_DOUBLE_BUFFERED_M_N(i915) ||
- intel_cpu_transcoder_has_m2_n2(i915, crtc_state->cpu_transcoder)));
+ str_yes_no(intel_cpu_transcoder_has_drrs(i915,
+ crtc_state->cpu_transcoder)));
seq_printf(m, "DRRS enabled: %s\n",
str_yes_no(crtc_state->has_drrs));
#include <linux/types.h>
enum drrs_type;
+enum transcoder;
struct drm_i915_private;
struct intel_atomic_state;
struct intel_crtc;
struct intel_crtc_state;
struct intel_connector;
+bool intel_cpu_transcoder_has_drrs(struct drm_i915_private *i915,
+ enum transcoder cpu_transcoder);
const char *intel_drrs_type_str(enum drrs_type drrs_type);
bool intel_drrs_is_active(struct intel_crtc *crtc);
void intel_drrs_activate(const struct intel_crtc_state *crtc_state);
return max(0, vblank_start - intel_usecs_to_scanlines(adjusted_mode, latency));
}
+static u32 dsb_chicken(struct intel_crtc *crtc)
+{
+ if (crtc->mode_flags & I915_MODE_FLAG_VRR)
+ return DSB_CTRL_WAIT_SAFE_WINDOW |
+ DSB_CTRL_NO_WAIT_VBLANK |
+ DSB_INST_WAIT_SAFE_WINDOW |
+ DSB_INST_NO_WAIT_VBLANK;
+ else
+ return 0;
+}
+
static void _intel_dsb_commit(struct intel_dsb *dsb, u32 ctrl,
int dewake_scanline)
{
intel_de_write_fw(dev_priv, DSB_CTRL(pipe, dsb->id),
ctrl | DSB_ENABLE);
+ intel_de_write_fw(dev_priv, DSB_CHICKEN(pipe, dsb->id),
+ dsb_chicken(crtc));
+
intel_de_write_fw(dev_priv, DSB_HEAD(pipe, dsb->id),
intel_dsb_buffer_ggtt_offset(&dsb->dsb_buf));
return PTR_ERR(vma);
plane_state->ggtt_vma = vma;
+
+ /*
+ * Pre-populate the dma address before we enter the vblank
+ * evade critical section as i915_gem_object_get_dma_address()
+ * will trigger might_sleep() even if it won't actually sleep,
+ * which is the case when the fb has already been pinned.
+ */
+ if (phys_cursor)
+ plane_state->phys_dma_addr =
+ i915_gem_object_get_dma_address(intel_fb_obj(fb), 0);
} else {
struct intel_framebuffer *intel_fb = to_intel_framebuffer(fb);
return;
}
+ /*
+ * FIXME figure out what is wrong with PSR+bigjoiner and
+ * fix it. Presumably something related to the fact that
+ * PSR is a transcoder level feature.
+ */
+ if (crtc_state->bigjoiner_pipes) {
+ drm_dbg_kms(&dev_priv->drm,
+ "PSR disabled due to bigjoiner\n");
+ return;
+ }
+
if (CAN_PANEL_REPLAY(intel_dp))
crtc_state->has_panel_replay = true;
else
void intel_psr2_program_trans_man_trk_ctl(const struct intel_crtc_state *crtc_state)
{
+ struct intel_crtc *crtc = to_intel_crtc(crtc_state->uapi.crtc);
struct drm_i915_private *dev_priv = to_i915(crtc_state->uapi.crtc->dev);
enum transcoder cpu_transcoder = crtc_state->cpu_transcoder;
struct intel_encoder *encoder;
intel_de_write(dev_priv, PSR2_MAN_TRK_CTL(cpu_transcoder),
crtc_state->psr2_man_track_ctl);
+
+ if (!crtc_state->enable_psr2_su_region_et)
+ return;
+
+ intel_de_write(dev_priv, PIPE_SRCSZ_ERLY_TPT(crtc->pipe),
+ crtc_state->pipe_srcsz_early_tpt);
}
static void psr2_man_trk_ctl_calc(struct intel_crtc_state *crtc_state,
crtc_state->psr2_man_track_ctl = val;
}
+static u32 psr2_pipe_srcsz_early_tpt_calc(struct intel_crtc_state *crtc_state,
+ bool full_update)
+{
+ int width, height;
+
+ if (!crtc_state->enable_psr2_su_region_et || full_update)
+ return 0;
+
+ width = drm_rect_width(&crtc_state->psr2_su_area);
+ height = drm_rect_height(&crtc_state->psr2_su_area);
+
+ return PIPESRC_WIDTH(width - 1) | PIPESRC_HEIGHT(height - 1);
+}
+
static void clip_area_update(struct drm_rect *overlap_damage_area,
struct drm_rect *damage_area,
struct drm_rect *pipe_src)
* cursor fully when cursor is in SU area.
*/
static void
-intel_psr2_sel_fetch_et_alignment(struct intel_crtc_state *crtc_state,
- struct intel_plane_state *cursor_state)
+intel_psr2_sel_fetch_et_alignment(struct intel_atomic_state *state,
+ struct intel_crtc *crtc)
{
- struct drm_rect inter;
+ struct intel_crtc_state *crtc_state = intel_atomic_get_new_crtc_state(state, crtc);
+ struct intel_plane_state *new_plane_state;
+ struct intel_plane *plane;
+ int i;
- if (!crtc_state->enable_psr2_su_region_et ||
- !cursor_state->uapi.visible)
+ if (!crtc_state->enable_psr2_su_region_et)
return;
- inter = crtc_state->psr2_su_area;
- if (!drm_rect_intersect(&inter, &cursor_state->uapi.dst))
- return;
+ for_each_new_intel_plane_in_state(state, plane, new_plane_state, i) {
+ struct drm_rect inter;
+
+ if (new_plane_state->uapi.crtc != crtc_state->uapi.crtc)
+ continue;
+
+ if (plane->id != PLANE_CURSOR)
+ continue;
+
+ if (!new_plane_state->uapi.visible)
+ continue;
- clip_area_update(&crtc_state->psr2_su_area, &cursor_state->uapi.dst,
- &crtc_state->pipe_src);
+ inter = crtc_state->psr2_su_area;
+ if (!drm_rect_intersect(&inter, &new_plane_state->uapi.dst))
+ continue;
+
+ clip_area_update(&crtc_state->psr2_su_area, &new_plane_state->uapi.dst,
+ &crtc_state->pipe_src);
+ }
}
/*
{
struct drm_i915_private *dev_priv = to_i915(state->base.dev);
struct intel_crtc_state *crtc_state = intel_atomic_get_new_crtc_state(state, crtc);
- struct intel_plane_state *new_plane_state, *old_plane_state,
- *cursor_plane_state = NULL;
+ struct intel_plane_state *new_plane_state, *old_plane_state;
struct intel_plane *plane;
bool full_update = false;
int i, ret;
damaged_area.x2 += new_plane_state->uapi.dst.x1 - src.x1;
clip_area_update(&crtc_state->psr2_su_area, &damaged_area, &crtc_state->pipe_src);
-
- /*
- * Cursor plane new state is stored to adjust su area to cover
- * cursor are fully.
- */
- if (plane->id == PLANE_CURSOR)
- cursor_plane_state = new_plane_state;
}
/*
if (ret)
return ret;
- /* Adjust su area to cover cursor fully as necessary */
- if (cursor_plane_state)
- intel_psr2_sel_fetch_et_alignment(crtc_state, cursor_plane_state);
+ /*
+ * Adjust su area to cover cursor fully as necessary (early
+ * transport). This needs to be done after
+ * drm_atomic_add_affected_planes to ensure visible cursor is added into
+ * affected planes even when cursor is not updated by itself.
+ */
+ intel_psr2_sel_fetch_et_alignment(state, crtc);
intel_psr2_sel_fetch_pipe_alignment(crtc_state);
skip_sel_fetch_set_loop:
psr2_man_trk_ctl_calc(crtc_state, full_update);
+ crtc_state->pipe_srcsz_early_tpt =
+ psr2_pipe_srcsz_early_tpt_calc(crtc_state, full_update);
return 0;
}
struct intel_crtc *crtc = to_intel_crtc(old_crtc_state->uapi.crtc);
u32 temp;
- encoder->audio_disable(encoder, old_crtc_state, conn_state);
-
intel_sdvo_set_active_outputs(intel_sdvo, 0);
if (0)
intel_sdvo_set_encoder_power_state(intel_sdvo,
intel_sdvo_set_encoder_power_state(intel_sdvo,
DRM_MODE_DPMS_ON);
intel_sdvo_set_active_outputs(intel_sdvo, intel_sdvo_connector->output_flag);
-
- encoder->audio_enable(encoder, pipe_config, conn_state);
}
static enum drm_mode_status
const struct drm_display_info *info = &connector->base.display_info;
int vmin, vmax;
+ /*
+ * FIXME all joined pipes share the same transcoder.
+ * Need to account for that during VRR toggle/push/etc.
+ */
+ if (crtc_state->bigjoiner_pipes)
+ return;
+
if (adjusted_mode->flags & DRM_MODE_FLAG_INTERLACE)
return;
enum transcoder cpu_transcoder = crtc_state->cpu_transcoder;
/*
- * TRANS_SET_CONTEXT_LATENCY with VRR enabled
- * requires this chicken bit on ADL/DG2.
+ * This bit seems to have two meanings depending on the platform:
+ * TGL: generate VRR "safe window" for DSB vblank waits
+ * ADL/DG2: make TRANS_SET_CONTEXT_LATENCY effective with VRR
*/
- if (DISPLAY_VER(dev_priv) == 13)
+ if (IS_DISPLAY_VER(dev_priv, 12, 13))
intel_de_rmw(dev_priv, CHICKEN_TRANS(cpu_transcoder),
0, PIPE_VBLANK_WITH_DELAY);
if (HAS_4TILE(i915))
caps |= INTEL_PLANE_CAP_TILING_4;
+ if (!IS_ENABLED(I915) && !HAS_FLAT_CCS(i915))
+ return caps;
+
if (skl_plane_has_rc_ccs(i915, pipe, plane_id)) {
caps |= INTEL_PLANE_CAP_CCS_RC;
if (DISPLAY_VER(i915) >= 12)
struct i915_vma *vma;
int ret;
+ if (!intel_gt_needs_wa_16018031267(vm->gt))
+ return 0;
+
/* The memory will be used only by GPU. */
obj = i915_gem_object_create_lmem(i915, PAGE_SIZE,
I915_BO_ALLOC_VOLATILE |
info->engine_mask &= ~BIT(GSC0);
}
+ /*
+ * Do not create the command streamer for CCS slices beyond the first.
+ * All the workload submitted to the first engine will be shared among
+ * all the slices.
+ *
+ * Once the user will be allowed to customize the CCS mode, then this
+ * check needs to be removed.
+ */
+ if (IS_DG2(gt->i915)) {
+ u8 first_ccs = __ffs(CCS_MASK(gt));
+
+ /* Mask off all the CCS engine */
+ info->engine_mask &= ~GENMASK(CCS3, CCS0);
+ /* Put back in the first CCS engine */
+ info->engine_mask |= BIT(_CCS(first_ccs));
+ }
+
return info->engine_mask;
}
intel_engine_park_heartbeat(engine);
intel_breadcrumbs_park(engine->breadcrumbs);
- /* Must be reset upon idling, or we may miss the busy wakeup. */
- GEM_BUG_ON(engine->sched_engine->queue_priority_hint != INT_MIN);
-
if (engine->park)
engine->park(engine);
{
cancel_timer(&engine->execlists.timer);
cancel_timer(&engine->execlists.preempt);
+
+ /* Reset upon idling, or we may delay the busy wakeup. */
+ WRITE_ONCE(engine->sched_engine->queue_priority_hint, INT_MIN);
}
static void add_to_engine(struct i915_request *rq)
return I915_MAP_WC;
}
+bool intel_gt_needs_wa_16018031267(struct intel_gt *gt)
+{
+ /* Wa_16018031267, Wa_16018063123 */
+ return IS_GFX_GT_IP_RANGE(gt, IP_VER(12, 55), IP_VER(12, 71));
+}
+
bool intel_gt_needs_wa_22016122933(struct intel_gt *gt)
{
return MEDIA_VER_FULL(gt->i915) == IP_VER(13, 0) && gt->type == GT_MEDIA;
##__VA_ARGS__); \
} while (0)
-#define NEEDS_FASTCOLOR_BLT_WABB(engine) ( \
- IS_GFX_GT_IP_RANGE(engine->gt, IP_VER(12, 55), IP_VER(12, 71)) && \
- engine->class == COPY_ENGINE_CLASS && engine->instance == 0)
-
static inline bool gt_is_root(struct intel_gt *gt)
{
return !gt->info.id;
}
+bool intel_gt_needs_wa_16018031267(struct intel_gt *gt);
bool intel_gt_needs_wa_22016122933(struct intel_gt *gt);
+#define NEEDS_FASTCOLOR_BLT_WABB(engine) ( \
+ intel_gt_needs_wa_16018031267(engine->gt) && \
+ engine->class == COPY_ENGINE_CLASS && engine->instance == 0)
+
static inline struct intel_gt *uc_to_gt(struct intel_uc *uc)
{
return container_of(uc, struct intel_gt, uc);
--- /dev/null
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2024 Intel Corporation
+ */
+
+#include "i915_drv.h"
+#include "intel_gt.h"
+#include "intel_gt_ccs_mode.h"
+#include "intel_gt_regs.h"
+
+void intel_gt_apply_ccs_mode(struct intel_gt *gt)
+{
+ int cslice;
+ u32 mode = 0;
+ int first_ccs = __ffs(CCS_MASK(gt));
+
+ if (!IS_DG2(gt->i915))
+ return;
+
+ /* Build the value for the fixed CCS load balancing */
+ for (cslice = 0; cslice < I915_MAX_CCS; cslice++) {
+ if (CCS_MASK(gt) & BIT(cslice))
+ /*
+ * If available, assign the cslice
+ * to the first available engine...
+ */
+ mode |= XEHP_CCS_MODE_CSLICE(cslice, first_ccs);
+
+ else
+ /*
+ * ... otherwise, mark the cslice as
+ * unavailable if no CCS dispatches here
+ */
+ mode |= XEHP_CCS_MODE_CSLICE(cslice,
+ XEHP_CCS_MODE_CSLICE_MASK);
+ }
+
+ intel_uncore_write(gt->uncore, XEHP_CCS_MODE, mode);
+}
--- /dev/null
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2024 Intel Corporation
+ */
+
+#ifndef __INTEL_GT_CCS_MODE_H__
+#define __INTEL_GT_CCS_MODE_H__
+
+struct intel_gt;
+
+void intel_gt_apply_ccs_mode(struct intel_gt *gt);
+
+#endif /* __INTEL_GT_CCS_MODE_H__ */
#define ECOBITS_PPGTT_CACHE4B (0 << 8)
#define GEN12_RCU_MODE _MMIO(0x14800)
+#define XEHP_RCU_MODE_FIXED_SLICE_CCS_MODE REG_BIT(1)
#define GEN12_RCU_MODE_CCS_ENABLE REG_BIT(0)
+#define XEHP_CCS_MODE _MMIO(0x14804)
+#define XEHP_CCS_MODE_CSLICE_MASK REG_GENMASK(2, 0) /* CCS0-3 + rsvd */
+#define XEHP_CCS_MODE_CSLICE_WIDTH ilog2(XEHP_CCS_MODE_CSLICE_MASK + 1)
+#define XEHP_CCS_MODE_CSLICE(cslice, ccs) (ccs << (cslice * XEHP_CCS_MODE_CSLICE_WIDTH))
+
#define CHV_FUSE_GT _MMIO(VLV_GUNIT_BASE + 0x2168)
#define CHV_FGT_DISABLE_SS0 (1 << 10)
#define CHV_FGT_DISABLE_SS1 (1 << 11)
#include "intel_engine_regs.h"
#include "intel_gpu_commands.h"
#include "intel_gt.h"
+#include "intel_gt_ccs_mode.h"
#include "intel_gt_mcr.h"
#include "intel_gt_print.h"
#include "intel_gt_regs.h"
* registers belonging to BCS, VCS or VECS should be implemented in
* xcs_engine_wa_init(). Workarounds for registers not belonging to a specific
* engine's MMIO range but that are part of of the common RCS/CCS reset domain
- * should be implemented in general_render_compute_wa_init().
+ * should be implemented in general_render_compute_wa_init(). The settings
+ * about the CCS load balancing should be added in ccs_engine_wa_mode().
*
* - GT workarounds: the list of these WAs is applied whenever these registers
* revert to their default values: on GPU reset, suspend/resume [1]_, etc.
xelpg_gt_workarounds_init(struct intel_gt *gt, struct i915_wa_list *wal)
{
/* Wa_14018575942 / Wa_18018781329 */
+ wa_mcr_write_or(wal, RENDER_MOD_CTRL, FORCE_MISS_FTLB);
wa_mcr_write_or(wal, COMP_MOD_CTRL, FORCE_MISS_FTLB);
/* Wa_22016670082 */
wa_write_clr(wal, GEN8_GARBCNTL, GEN12_BUS_HASH_CTL_BIT_EXC);
}
+static void ccs_engine_wa_mode(struct intel_engine_cs *engine, struct i915_wa_list *wal)
+{
+ struct intel_gt *gt = engine->gt;
+
+ if (!IS_DG2(gt->i915))
+ return;
+
+ /*
+ * Wa_14019159160: This workaround, along with others, leads to
+ * significant challenges in utilizing load balancing among the
+ * CCS slices. Consequently, an architectural decision has been
+ * made to completely disable automatic CCS load balancing.
+ */
+ wa_masked_en(wal, GEN12_RCU_MODE, XEHP_RCU_MODE_FIXED_SLICE_CCS_MODE);
+
+ /*
+ * After having disabled automatic load balancing we need to
+ * assign all slices to a single CCS. We will call it CCS mode 1
+ */
+ intel_gt_apply_ccs_mode(gt);
+}
+
/*
* The workarounds in this function apply to shared registers in
* the general render reset domain that aren't tied to a
* to a single RCS/CCS engine's workaround list since
* they're reset as part of the general render domain reset.
*/
- if (engine->flags & I915_ENGINE_FIRST_RENDER_COMPUTE)
+ if (engine->flags & I915_ENGINE_FIRST_RENDER_COMPUTE) {
general_render_compute_wa_init(engine, wal);
+ ccs_engine_wa_mode(engine, wal);
+ }
if (engine->class == COMPUTE_CLASS)
ccs_engine_wa_init(engine, wal);
* Trying to pass a 'need_sync' or 'in_reset' flag all the way down through
* every possible call stack is unfeasible. It would be too intrusive to many
* areas that really don't care about the GuC backend. However, there is the
- * 'reset_in_progress' flag available, so just use that.
+ * I915_RESET_BACKOFF flag and the gt->reset.mutex can be tested for is_locked.
+ * So just use those. Note that testing both is required due to the hideously
+ * complex nature of the i915 driver's reset code paths.
*
* And note that in the case of a reset occurring during driver unload
- * (wedge_on_fini), skipping the cancel in _prepare (when the reset flag is set
- * is fine because there is another cancel in _finish (when the reset flag is
- * not).
+ * (wedged_on_fini), skipping the cancel in reset_prepare/reset_fini (when the
+ * reset flag/mutex are set) is fine because there is another explicit cancel in
+ * intel_guc_submission_fini (when the reset flag/mutex are not).
*/
- if (guc_to_gt(guc)->uc.reset_in_progress)
+ if (mutex_is_locked(&guc_to_gt(guc)->reset.mutex) ||
+ test_bit(I915_RESET_BACKOFF, &guc_to_gt(guc)->reset.flags))
cancel_delayed_work(&guc->timestamp.work);
else
cancel_delayed_work_sync(&guc->timestamp.work);
unsigned long flags;
ktime_t unused;
- guc_cancel_busyness_worker(guc);
-
spin_lock_irqsave(&guc->timestamp.lock, flags);
guc_update_pm_timestamp(guc, &unused);
void intel_guc_submission_reset_finish(struct intel_guc *guc)
{
- /*
- * Ensure the busyness worker gets cancelled even on a fatal wedge.
- * Note that reset_prepare is not allowed to because it confuses lockdep.
- */
- if (guc_submission_initialized(guc))
- guc_cancel_busyness_worker(guc);
-
/* Reset called during driver load or during wedge? */
if (unlikely(!guc_submission_initialized(guc) ||
!intel_guc_is_fw_running(guc) ||
if (!guc->submission_initialized)
return;
+ guc_fini_engine_stats(guc);
guc_flush_destroyed_contexts(guc);
guc_lrc_desc_pool_destroy_v69(guc);
i915_sched_engine_put(guc->sched_engine);
{
struct intel_guc *guc = &uc->guc;
+ /*
+ * NB: The wedge code path results in prepare -> prepare -> finish -> finish.
+ * So this function is sometimes called with the in-progress flag not set.
+ */
uc->reset_in_progress = false;
/* Firmware expected to be running when this function is called */
goto out_cleanup_modeset2;
ret = intel_pxp_init(i915);
- if (ret != -ENODEV)
+ if (ret && ret != -ENODEV)
drm_dbg(&i915->drm, "pxp init failed with %d\n", ret);
ret = intel_display_driver_probe(i915);
struct intel_uncore *uncore = ddat->uncore;
intel_wakeref_t wakeref;
- mutex_lock(&hwmon->hwmon_lock);
+ with_intel_runtime_pm(uncore->rpm, wakeref) {
+ mutex_lock(&hwmon->hwmon_lock);
- with_intel_runtime_pm(uncore->rpm, wakeref)
intel_uncore_rmw(uncore, reg, clear, set);
- mutex_unlock(&hwmon->hwmon_lock);
+ mutex_unlock(&hwmon->hwmon_lock);
+ }
}
/*
else
rgaddr = hwmon->rg.energy_status_all;
- mutex_lock(&hwmon->hwmon_lock);
+ with_intel_runtime_pm(uncore->rpm, wakeref) {
+ mutex_lock(&hwmon->hwmon_lock);
- with_intel_runtime_pm(uncore->rpm, wakeref)
reg_val = intel_uncore_read(uncore, rgaddr);
- if (reg_val >= ei->reg_val_prev)
- ei->accum_energy += reg_val - ei->reg_val_prev;
- else
- ei->accum_energy += UINT_MAX - ei->reg_val_prev + reg_val;
- ei->reg_val_prev = reg_val;
+ if (reg_val >= ei->reg_val_prev)
+ ei->accum_energy += reg_val - ei->reg_val_prev;
+ else
+ ei->accum_energy += UINT_MAX - ei->reg_val_prev + reg_val;
+ ei->reg_val_prev = reg_val;
- *energy = mul_u64_u32_shr(ei->accum_energy, SF_ENERGY,
- hwmon->scl_shift_energy);
- mutex_unlock(&hwmon->hwmon_lock);
+ *energy = mul_u64_u32_shr(ei->accum_energy, SF_ENERGY,
+ hwmon->scl_shift_energy);
+ mutex_unlock(&hwmon->hwmon_lock);
+ }
}
static ssize_t
/* Block waiting for GuC reset to complete when needed */
for (;;) {
+ wakeref = intel_runtime_pm_get(ddat->uncore->rpm);
mutex_lock(&hwmon->hwmon_lock);
prepare_to_wait(&ddat->waitq, &wait, TASK_INTERRUPTIBLE);
}
mutex_unlock(&hwmon->hwmon_lock);
+ intel_runtime_pm_put(ddat->uncore->rpm, wakeref);
schedule();
}
finish_wait(&ddat->waitq, &wait);
if (ret)
- goto unlock;
-
- wakeref = intel_runtime_pm_get(ddat->uncore->rpm);
+ goto exit;
/* Disable PL1 limit and verify, because the limit cannot be disabled on all platforms */
if (val == PL1_DISABLE) {
intel_uncore_rmw(ddat->uncore, hwmon->rg.pkg_rapl_limit,
PKG_PWR_LIM_1_EN | PKG_PWR_LIM_1, nval);
exit:
- intel_runtime_pm_put(ddat->uncore->rpm, wakeref);
-unlock:
mutex_unlock(&hwmon->hwmon_lock);
+ intel_runtime_pm_put(ddat->uncore->rpm, wakeref);
return ret;
}
#include <linux/kernel.h>
#include <linux/string.h>
#include <linux/cpufeature.h>
+#include <linux/bug.h>
+#include <linux/build_bug.h>
#include <asm/fpu/api.h>
#include "i915_memcpy.h"
#define MTL_CHICKEN_TRANS(trans) _MMIO_TRANS((trans), \
_MTL_CHICKEN_TRANS_A, \
_MTL_CHICKEN_TRANS_B)
-#define PIPE_VBLANK_WITH_DELAY REG_BIT(31) /* ADL/DG2 */
+#define PIPE_VBLANK_WITH_DELAY REG_BIT(31) /* tgl+ */
#define SKL_UNMASK_VBL_TO_PIPE_IN_SRD REG_BIT(30) /* skl+ */
#define HSW_FRAME_START_DELAY_MASK REG_GENMASK(28, 27)
#define HSW_FRAME_START_DELAY(x) REG_FIELD_PREP(HSW_FRAME_START_DELAY_MASK, x)
#include "gt/intel_engine.h"
#include "gt/intel_engine_heartbeat.h"
#include "gt/intel_gt.h"
+#include "gt/intel_gt_pm.h"
#include "gt/intel_gt_requests.h"
#include "gt/intel_tlb.h"
static int __i915_vma_active(struct i915_active *ref)
{
- return i915_vma_tryget(active_to_vma(ref)) ? 0 : -ENOENT;
+ struct i915_vma *vma = active_to_vma(ref);
+
+ if (!i915_vma_tryget(vma))
+ return -ENOENT;
+
+ /*
+ * Exclude global GTT VMA from holding a GT wakeref
+ * while active, otherwise GPU never goes idle.
+ */
+ if (!i915_vma_is_ggtt(vma)) {
+ /*
+ * Since we and our _retire() counterpart can be
+ * called asynchronously, storing a wakeref tracking
+ * handle inside struct i915_vma is not safe, and
+ * there is no other good place for that. Hence,
+ * use untracked variants of intel_gt_pm_get/put().
+ */
+ intel_gt_pm_get_untracked(vma->vm->gt);
+ }
+
+ return 0;
}
static void __i915_vma_retire(struct i915_active *ref)
{
- i915_vma_put(active_to_vma(ref));
+ struct i915_vma *vma = active_to_vma(ref);
+
+ if (!i915_vma_is_ggtt(vma)) {
+ /*
+ * Since we can be called from atomic contexts,
+ * use an async variant of intel_gt_pm_put().
+ */
+ intel_gt_pm_put_async_untracked(vma->vm->gt);
+ }
+
+ i915_vma_put(vma);
}
static struct i915_vma *
struct i915_vma_work *work = NULL;
struct dma_fence *moving = NULL;
struct i915_vma_resource *vma_res = NULL;
- intel_wakeref_t wakeref = 0;
+ intel_wakeref_t wakeref;
unsigned int bound;
int err;
if (err)
return err;
- if (flags & PIN_GLOBAL)
- wakeref = intel_runtime_pm_get(&vma->vm->i915->runtime_pm);
+ /*
+ * In case of a global GTT, we must hold a runtime-pm wakeref
+ * while global PTEs are updated. In other cases, we hold
+ * the rpm reference while the VMA is active. Since runtime
+ * resume may require allocations, which are forbidden inside
+ * vm->mutex, get the first rpm wakeref outside of the mutex.
+ */
+ wakeref = intel_runtime_pm_get(&vma->vm->i915->runtime_pm);
if (flags & vma->vm->bind_async_flags) {
/* lock VM */
if (work)
dma_fence_work_commit_imm(&work->base);
err_rpm:
- if (wakeref)
- intel_runtime_pm_put(&vma->vm->i915->runtime_pm, wakeref);
+ intel_runtime_pm_put(&vma->vm->i915->runtime_pm, wakeref);
if (moving)
dma_fence_put(moving);
if (adreno_is_a618(gpu))
gpu->ubwc_config.highest_bank_bit = 14;
+ if (adreno_is_a619(gpu))
+ /* TODO: Should be 14 but causes corruption at e.g. 1920x1200 on DP */
+ gpu->ubwc_config.highest_bank_bit = 13;
+
if (adreno_is_a619_holi(gpu))
gpu->ubwc_config.highest_bank_bit = 13;
(block->type << 8) | i);
in += CRASHDUMP_READ(in, REG_A6XX_HLSQ_DBG_AHB_READ_APERTURE,
- block->size, dumper->iova + A6XX_CD_DATA_OFFSET);
+ block->size, out);
out += block->size * sizeof(u32);
}
},
};
+/* TODO: INTF 3, 8 and 7 are used for MST, marked as INTF_NONE for now */
static const struct dpu_intf_cfg x1e80100_intf[] = {
{
.name = "intf_0", .id = INTF_0,
.name = "intf_3", .id = INTF_3,
.base = 0x37000, .len = 0x280,
.features = INTF_SC7280_MASK,
- .type = INTF_DP,
- .controller_id = MSM_DP_CONTROLLER_1,
+ .type = INTF_NONE,
+ .controller_id = MSM_DP_CONTROLLER_0, /* pair with intf_0 for DP MST */
.prog_fetch_lines_worst_case = 24,
.intr_underrun = DPU_IRQ_IDX(MDP_SSPP_TOP0_INTR, 30),
.intr_vsync = DPU_IRQ_IDX(MDP_SSPP_TOP0_INTR, 31),
.base = 0x38000, .len = 0x280,
.features = INTF_SC7280_MASK,
.type = INTF_DP,
- .controller_id = MSM_DP_CONTROLLER_2,
+ .controller_id = MSM_DP_CONTROLLER_1,
.prog_fetch_lines_worst_case = 24,
.intr_underrun = DPU_IRQ_IDX(MDP_SSPP_TOP0_INTR, 20),
.intr_vsync = DPU_IRQ_IDX(MDP_SSPP_TOP0_INTR, 21),
.prog_fetch_lines_worst_case = 24,
.intr_underrun = DPU_IRQ_IDX(MDP_SSPP_TOP0_INTR, 22),
.intr_vsync = DPU_IRQ_IDX(MDP_SSPP_TOP0_INTR, 23),
+ }, {
+ .name = "intf_6", .id = INTF_6,
+ .base = 0x3A000, .len = 0x280,
+ .features = INTF_SC7280_MASK,
+ .type = INTF_DP,
+ .controller_id = MSM_DP_CONTROLLER_2,
+ .prog_fetch_lines_worst_case = 24,
+ .intr_underrun = DPU_IRQ_IDX(MDP_SSPP_TOP0_INTR, 17),
+ .intr_vsync = DPU_IRQ_IDX(MDP_SSPP_TOP0_INTR, 16),
+ }, {
+ .name = "intf_7", .id = INTF_7,
+ .base = 0x3b000, .len = 0x280,
+ .features = INTF_SC7280_MASK,
+ .type = INTF_NONE,
+ .controller_id = MSM_DP_CONTROLLER_2, /* pair with intf_6 for DP MST */
+ .prog_fetch_lines_worst_case = 24,
+ .intr_underrun = DPU_IRQ_IDX(MDP_SSPP_TOP0_INTR, 18),
+ .intr_vsync = DPU_IRQ_IDX(MDP_SSPP_TOP0_INTR, 19),
+ }, {
+ .name = "intf_8", .id = INTF_8,
+ .base = 0x3c000, .len = 0x280,
+ .features = INTF_SC7280_MASK,
+ .type = INTF_NONE,
+ .controller_id = MSM_DP_CONTROLLER_1, /* pair with intf_4 for DP MST */
+ .prog_fetch_lines_worst_case = 24,
+ .intr_underrun = DPU_IRQ_IDX(MDP_SSPP_TOP0_INTR, 12),
+ .intr_vsync = DPU_IRQ_IDX(MDP_SSPP_TOP0_INTR, 13),
},
};
&perf->core_clk_rate);
debugfs_create_u32("enable_bw_release", 0600, entry,
(u32 *)&perf->enable_bw_release);
- debugfs_create_u32("threshold_low", 0600, entry,
+ debugfs_create_u32("threshold_low", 0400, entry,
(u32 *)&perf->perf_cfg->max_bw_low);
- debugfs_create_u32("threshold_high", 0600, entry,
+ debugfs_create_u32("threshold_high", 0400, entry,
(u32 *)&perf->perf_cfg->max_bw_high);
- debugfs_create_u32("min_core_ib", 0600, entry,
+ debugfs_create_u32("min_core_ib", 0400, entry,
(u32 *)&perf->perf_cfg->min_core_ib);
- debugfs_create_u32("min_llcc_ib", 0600, entry,
+ debugfs_create_u32("min_llcc_ib", 0400, entry,
(u32 *)&perf->perf_cfg->min_llcc_ib);
- debugfs_create_u32("min_dram_ib", 0600, entry,
+ debugfs_create_u32("min_dram_ib", 0400, entry,
(u32 *)&perf->perf_cfg->min_dram_ib);
debugfs_create_file("perf_mode", 0600, entry,
(u32 *)perf, &dpu_core_perf_mode_fops);
int ret;
if (!irq_cb) {
- DPU_ERROR("invalid IRQ=[%d, %d] irq_cb:%ps\n",
- DPU_IRQ_REG(irq_idx), DPU_IRQ_BIT(irq_idx), irq_cb);
+ DPU_ERROR("IRQ=[%d, %d] NULL callback\n",
+ DPU_IRQ_REG(irq_idx), DPU_IRQ_BIT(irq_idx));
return -EINVAL;
}
if (!dpu_core_irq_is_valid(irq_idx)) {
- DPU_ERROR("invalid IRQ=[%d, %d]\n",
- DPU_IRQ_REG(irq_idx), DPU_IRQ_BIT(irq_idx));
+ DPU_ERROR("invalid IRQ=[%d, %d] irq_cb:%ps\n",
+ DPU_IRQ_REG(irq_idx), DPU_IRQ_BIT(irq_idx), irq_cb);
return -EINVAL;
}
}
}
-static int dp_display_handle_port_ststus_changed(struct dp_display_private *dp)
+static int dp_display_handle_port_status_changed(struct dp_display_private *dp)
{
int rc = 0;
drm_dbg_dp(dp->drm_dev, "hpd_state=%d sink_request=%d\n",
dp->hpd_state, sink_request);
if (sink_request & DS_PORT_STATUS_CHANGED)
- rc = dp_display_handle_port_ststus_changed(dp);
+ rc = dp_display_handle_port_status_changed(dp);
else
rc = dp_display_handle_irq_hpd(dp);
}
ret = dp_display_usbpd_configure_cb(&pdev->dev);
if (ret) { /* link train failed */
dp->hpd_state = ST_DISCONNECTED;
+ pm_runtime_put_sync(&pdev->dev);
} else {
dp->hpd_state = ST_MAINLINK_READY;
}
dp_display_host_phy_exit(dp);
dp->hpd_state = ST_DISCONNECTED;
dp_display_notify_disconnect(&dp->dp_display.pdev->dev);
+ pm_runtime_put_sync(&pdev->dev);
mutex_unlock(&dp->event_mutex);
return 0;
}
for (i = 0; i < n; i++) {
ret = msm_gem_get_and_pin_iova(fb->obj[i], aspace, &msm_fb->iova[i]);
- drm_dbg_state(fb->dev, "FB[%u]: iova[%d]: %08llx (%d)",
+ drm_dbg_state(fb->dev, "FB[%u]: iova[%d]: %08llx (%d)\n",
fb->base.id, i, msm_fb->iova[i], ret);
if (ret)
return ret;
const struct msm_format *format;
int ret, i, n;
- drm_dbg_state(dev, "create framebuffer: mode_cmd=%p (%dx%d@%4.4s)",
+ drm_dbg_state(dev, "create framebuffer: mode_cmd=%p (%dx%d@%4.4s)\n",
mode_cmd, mode_cmd->width, mode_cmd->height,
(char *)&mode_cmd->pixel_format);
refcount_set(&msm_fb->dirtyfb, 1);
- drm_dbg_state(dev, "create: FB ID: %d (%p)", fb->base.id, fb);
+ drm_dbg_state(dev, "create: FB ID: %d (%p)\n", fb->base.id, fb);
return fb;
struct msm_kms *kms = priv->kms;
if (!kms)
return -ENXIO;
- drm_dbg_vbl(dev, "crtc=%u", crtc->base.id);
+ drm_dbg_vbl(dev, "crtc=%u\n", crtc->base.id);
return vblank_ctrl_queue_work(priv, crtc, true);
}
struct msm_kms *kms = priv->kms;
if (!kms)
return;
- drm_dbg_vbl(dev, "crtc=%u", crtc->base.id);
+ drm_dbg_vbl(dev, "crtc=%u\n", crtc->base.id);
vblank_ctrl_queue_work(priv, crtc, false);
}
*/
#include "nouveau_drv.h"
+#include "nouveau_bios.h"
#include "nouveau_reg.h"
#include "dispnv04/hw.h"
#include "nouveau_encoder.h"
*/
if (nv_match_device(dev, 0x0201, 0x1462, 0x8851)) {
if (*conn == 0xf2005014 && *conf == 0xffffffff) {
- fabricate_dcb_output(dcb, DCB_OUTPUT_TMDS, 1, 1, 1);
+ fabricate_dcb_output(dcb, DCB_OUTPUT_TMDS, 1, 1, DCB_OUTPUT_B);
return false;
}
}
#ifdef __powerpc__
/* Apple iMac G4 NV17 */
if (of_machine_is_compatible("PowerMac4,5")) {
- fabricate_dcb_output(dcb, DCB_OUTPUT_TMDS, 0, all_heads, 1);
- fabricate_dcb_output(dcb, DCB_OUTPUT_ANALOG, 1, all_heads, 2);
+ fabricate_dcb_output(dcb, DCB_OUTPUT_TMDS, 0, all_heads, DCB_OUTPUT_B);
+ fabricate_dcb_output(dcb, DCB_OUTPUT_ANALOG, 1, all_heads, DCB_OUTPUT_C);
return;
}
#endif
/* Make up some sane defaults */
fabricate_dcb_output(dcb, DCB_OUTPUT_ANALOG,
- bios->legacy.i2c_indices.crt, 1, 1);
+ bios->legacy.i2c_indices.crt, 1, DCB_OUTPUT_B);
if (nv04_tv_identify(dev, bios->legacy.i2c_indices.tv) >= 0)
fabricate_dcb_output(dcb, DCB_OUTPUT_TV,
bios->legacy.i2c_indices.tv,
- all_heads, 0);
+ all_heads, DCB_OUTPUT_A);
else if (bios->tmds.output0_script_ptr ||
bios->tmds.output1_script_ptr)
fabricate_dcb_output(dcb, DCB_OUTPUT_TMDS,
bios->legacy.i2c_indices.panel,
- all_heads, 1);
+ all_heads, DCB_OUTPUT_B);
}
static int
dma_addr_t *dma_addrs;
struct nouveau_fence *fence;
- src_pfns = kcalloc(npages, sizeof(*src_pfns), GFP_KERNEL);
- dst_pfns = kcalloc(npages, sizeof(*dst_pfns), GFP_KERNEL);
- dma_addrs = kcalloc(npages, sizeof(*dma_addrs), GFP_KERNEL);
+ src_pfns = kvcalloc(npages, sizeof(*src_pfns), GFP_KERNEL | __GFP_NOFAIL);
+ dst_pfns = kvcalloc(npages, sizeof(*dst_pfns), GFP_KERNEL | __GFP_NOFAIL);
+ dma_addrs = kvcalloc(npages, sizeof(*dma_addrs), GFP_KERNEL | __GFP_NOFAIL);
migrate_device_range(src_pfns, chunk->pagemap.range.start >> PAGE_SHIFT,
npages);
migrate_device_pages(src_pfns, dst_pfns, npages);
nouveau_dmem_fence_done(&fence);
migrate_device_finalize(src_pfns, dst_pfns, npages);
- kfree(src_pfns);
- kfree(dst_pfns);
+ kvfree(src_pfns);
+ kvfree(dst_pfns);
for (i = 0; i < npages; i++)
dma_unmap_page(chunk->drm->dev->dev, dma_addrs[i], PAGE_SIZE, DMA_BIDIRECTIONAL);
- kfree(dma_addrs);
+ kvfree(dma_addrs);
}
void
u8 *dpcd = nv_encoder->dp.dpcd;
int ret = NOUVEAU_DP_NONE, hpd;
- /* If we've already read the DPCD on an eDP device, we don't need to
- * reread it as it won't change
+ /* eDP ports don't support hotplugging - so there's no point in probing eDP ports unless we
+ * haven't probed them once before.
*/
- if (connector->connector_type == DRM_MODE_CONNECTOR_eDP &&
- dpcd[DP_DPCD_REV] != 0)
- return NOUVEAU_DP_SST;
+ if (connector->connector_type == DRM_MODE_CONNECTOR_eDP) {
+ if (connector->status == connector_status_connected)
+ return NOUVEAU_DP_SST;
+ else if (connector->status == connector_status_disconnected)
+ return NOUVEAU_DP_NONE;
+ }
+
+ // Ensure that the aux bus is enabled for probing
+ drm_dp_dpcd_set_powered(&nv_connector->aux, true);
mutex_lock(&nv_encoder->dp.hpd_irq_lock);
if (mstm) {
if (mstm && !mstm->suspended && ret != NOUVEAU_DP_MST)
nv50_mstm_remove(mstm);
+ /* GSP doesn't like when we try to do aux transactions on a port it considers disconnected,
+ * and since we don't really have a usecase for that anyway - just disable the aux bus here
+ * if we've decided the connector is disconnected
+ */
+ if (ret == NOUVEAU_DP_NONE)
+ drm_dp_dpcd_set_powered(&nv_connector->aux, false);
+
mutex_unlock(&nv_encoder->dp.hpd_irq_lock);
return ret;
}
struct drm_gpuva_op_unmap *u = r->unmap;
struct nouveau_uvma *uvma = uvma_from_va(u->va);
u64 addr = uvma->va.va.addr;
- u64 range = uvma->va.va.range;
+ u64 end = uvma->va.va.addr + uvma->va.va.range;
if (r->prev)
addr = r->prev->va.addr + r->prev->va.range;
if (r->next)
- range = r->next->va.addr - addr;
+ end = r->next->va.addr;
- op_unmap_range(u, addr, range);
+ op_unmap_range(u, addr, end - addr);
}
static int
return ret;
} else {
ret = nvkm_memory_map(gr->attrib_cb, 0, chan->vmm, chan->attrib_cb,
- &args, sizeof(args));;
+ &args, sizeof(args));
if (ret)
return ret;
}
return ERR_PTR(-EINVAL);
}
+static void of_fini(void *p)
+{
+ kfree(p);
+}
+
const struct nvbios_source
nvbios_of = {
.name = "OpenFirmware",
.init = of_init,
- .fini = (void(*)(void *))kfree,
+ .fini = of_fini,
.read = of_read,
.size = of_size,
.rw = false,
#include <subdev/bios.h>
#include <subdev/bios/init.h>
+#include <subdev/gsp.h>
void
gm107_devinit_disable(struct nvkm_devinit *init)
u32 r021c00 = nvkm_rd32(device, 0x021c00);
u32 r021c04 = nvkm_rd32(device, 0x021c04);
- if (r021c00 & 0x00000001)
- nvkm_subdev_disable(device, NVKM_ENGINE_CE, 0);
- if (r021c00 & 0x00000004)
- nvkm_subdev_disable(device, NVKM_ENGINE_CE, 2);
+ /* gsp only wants to enable/disable display */
+ if (!nvkm_gsp_rm(device->gsp)) {
+ if (r021c00 & 0x00000001)
+ nvkm_subdev_disable(device, NVKM_ENGINE_CE, 0);
+ if (r021c00 & 0x00000004)
+ nvkm_subdev_disable(device, NVKM_ENGINE_CE, 2);
+ }
if (r021c04 & 0x00000001)
nvkm_subdev_disable(device, NVKM_ENGINE_DISP, 0);
}
rm->dtor = r535_devinit_dtor;
rm->post = hw->post;
+ rm->disable = hw->disable;
ret = nv50_devinit_new_(rm, device, type, inst, pdevinit);
if (ret)
rpc->numEntries = NV_GSP_REG_NUM_ENTRIES;
str_offset = offsetof(typeof(*rpc), entries[NV_GSP_REG_NUM_ENTRIES]);
- strings = (char *)&rpc->entries[NV_GSP_REG_NUM_ENTRIES];
+ strings = (char *)rpc + str_offset;
for (i = 0; i < NV_GSP_REG_NUM_ENTRIES; i++) {
int name_len = strlen(r535_registry_entries[i].name) + 1;
void __iomem *map = NULL;
/* Already mapped? */
- if (refcount_inc_not_zero(&iobj->maps))
+ if (refcount_inc_not_zero(&iobj->maps)) {
+ /* read barrier match the wmb on refcount set */
+ smp_rmb();
return iobj->map;
+ }
/* Take the lock, and re-check that another thread hasn't
* already mapped the object in the meantime.
iobj->base.memory.ptrs = &nv50_instobj_fast;
else
iobj->base.memory.ptrs = &nv50_instobj_slow;
+ /* barrier to ensure the ptrs are written before refcount is set */
+ smp_wmb();
refcount_set(&iobj->maps, 1);
}
struct nt36672e_panel *ctx = mipi_dsi_get_drvdata(dsi);
mipi_dsi_detach(ctx->dsi);
- mipi_dsi_device_unregister(ctx->dsi);
-
drm_panel_remove(&ctx->panel);
}
struct visionox_rm69299 *ctx = mipi_dsi_get_drvdata(dsi);
mipi_dsi_detach(ctx->dsi);
- mipi_dsi_device_unregister(ctx->dsi);
-
drm_panel_remove(&ctx->panel);
}
gpu_write(pfdev, SHADER_PWROFF_LO, pfdev->features.shader_present);
ret = readl_relaxed_poll_timeout(pfdev->iomem + SHADER_PWRTRANS_LO,
- val, !val, 1, 1000);
+ val, !val, 1, 2000);
if (ret)
dev_err(pfdev->dev, "shader power transition timeout");
gpu_write(pfdev, TILER_PWROFF_LO, pfdev->features.tiler_present);
ret = readl_relaxed_poll_timeout(pfdev->iomem + TILER_PWRTRANS_LO,
- val, !val, 1, 1000);
+ val, !val, 1, 2000);
if (ret)
dev_err(pfdev->dev, "tiler power transition timeout");
gpu_write(pfdev, L2_PWROFF_LO, pfdev->features.l2_present);
ret = readl_poll_timeout(pfdev->iomem + L2_PWRTRANS_LO,
- val, !val, 0, 1000);
+ val, !val, 0, 2000);
if (ret)
dev_err(pfdev->dev, "l2 power transition timeout");
}
mapping_set_unevictable(mapping);
for (i = page_offset; i < page_offset + NUM_FAULT_PAGES; i++) {
+ /* Can happen if the last fault only partially filled this
+ * section of the pages array before failing. In that case
+ * we skip already filled pages.
+ */
+ if (pages[i])
+ continue;
+
pages[i] = shmem_read_mapping_page(mapping, i);
if (IS_ERR(pages[i])) {
ret = PTR_ERR(pages[i]);
pages[i] = NULL;
- goto err_pages;
+ goto err_unlock;
}
}
ret = sg_alloc_table_from_pages(sgt, pages + page_offset,
NUM_FAULT_PAGES, 0, SZ_2M, GFP_KERNEL);
if (ret)
- goto err_pages;
+ goto err_unlock;
ret = dma_map_sgtable(pfdev->dev, sgt, DMA_BIDIRECTIONAL, 0);
if (ret)
err_map:
sg_free_table(sgt);
-err_pages:
- drm_gem_shmem_put_pages(&bo->base);
err_unlock:
dma_resv_unlock(obj->resv);
err_bo:
{
uint32_t handle;
int idr_ret;
- int count = 0;
again:
idr_preload(GFP_ATOMIC);
spin_lock(&qdev->surf_id_idr_lock);
handle = idr_ret;
if (handle >= qdev->rom->n_surfaces) {
- count++;
spin_lock(&qdev->surf_id_idr_lock);
idr_remove(&qdev->surf_id_idr, handle);
spin_unlock(&qdev->surf_id_idr_lock);
struct qxl_release *release;
struct qxl_bo *cmd_bo;
void *fb_cmd;
- int i, ret, num_relocs;
+ int i, ret;
int unwritten;
switch (cmd->type) {
}
/* fill out reloc info structs */
- num_relocs = 0;
for (i = 0; i < cmd->relocs_num; ++i) {
struct drm_qxl_reloc reloc;
struct drm_qxl_reloc __user *u = u64_to_user_ptr(cmd->relocs);
reloc_info[i].dst_bo = cmd_bo;
reloc_info[i].dst_offset = reloc.dst_offset + release->release_offset;
}
- num_relocs++;
/* reserve and validate the reloc dst bo */
if (reloc.reloc_type == QXL_RELOC_TYPE_BO || reloc.src_handle) {
signed long timeout)
{
struct qxl_device *qdev;
+ struct qxl_release *release;
+ int count = 0, sc = 0;
+ bool have_drawable_releases;
unsigned long cur, end = jiffies + timeout;
qdev = container_of(fence->lock, struct qxl_device, release_lock);
+ release = container_of(fence, struct qxl_release, base);
+ have_drawable_releases = release->type == QXL_RELEASE_DRAWABLE;
- if (!wait_event_timeout(qdev->release_event,
- (dma_fence_is_signaled(fence) ||
- (qxl_io_notify_oom(qdev), 0)),
- timeout))
- return 0;
+retry:
+ sc++;
+
+ if (dma_fence_is_signaled(fence))
+ goto signaled;
+
+ qxl_io_notify_oom(qdev);
+
+ for (count = 0; count < 11; count++) {
+ if (!qxl_queue_garbage_collect(qdev, true))
+ break;
+
+ if (dma_fence_is_signaled(fence))
+ goto signaled;
+ }
+
+ if (dma_fence_is_signaled(fence))
+ goto signaled;
+
+ if (have_drawable_releases || sc < 4) {
+ if (sc > 2)
+ /* back off */
+ usleep_range(500, 1000);
+
+ if (time_after(jiffies, end))
+ return 0;
+
+ if (have_drawable_releases && sc > 300) {
+ DMA_FENCE_WARN(fence,
+ "failed to wait on release %llu after spincount %d\n",
+ fence->context & ~0xf0000000, sc);
+ goto signaled;
+ }
+ goto retry;
+ }
+ /*
+ * yeah, original sync_obj_wait gave up after 3 spins when
+ * have_drawable_releases is not set.
+ */
+signaled:
cur = jiffies;
if (time_after(cur, end))
return 0;
typedef struct _ATOM_PPLIB_STATE_V2
{
//number of valid dpm levels in this state; Driver uses it to calculate the whole
- //size of the state: sizeof(ATOM_PPLIB_STATE_V2) + (ucNumDPMLevels - 1) * sizeof(UCHAR)
+ //size of the state: struct_size(ATOM_PPLIB_STATE_V2, clockInfoIndex, ucNumDPMLevels)
UCHAR ucNumDPMLevels;
//a index to the array of nonClockInfos
/**
* Driver will read the first ucNumDPMLevels in this array
*/
- UCHAR clockInfoIndex[1];
+ UCHAR clockInfoIndex[] __counted_by(ucNumDPMLevels);
} ATOM_PPLIB_STATE_V2;
typedef struct _StateArray{
//how many states we have
UCHAR ucNumEntries;
- ATOM_PPLIB_STATE_V2 states[1];
+ ATOM_PPLIB_STATE_V2 states[] __counted_by(ucNumEntries);
}StateArray;
//sizeof(ATOM_PPLIB_CLOCK_INFO)
UCHAR ucEntrySize;
- UCHAR clockInfo[1];
+ UCHAR clockInfo[] __counted_by(ucNumEntries);
}ClockInfoArray;
typedef struct _NonClockInfoArray{
//sizeof(ATOM_PPLIB_NONCLOCK_INFO)
UCHAR ucEntrySize;
- ATOM_PPLIB_NONCLOCK_INFO nonClockInfo[1];
+ ATOM_PPLIB_NONCLOCK_INFO nonClockInfo[] __counted_by(ucNumEntries);
}NonClockInfoArray;
typedef struct _ATOM_PPLIB_Clock_Voltage_Dependency_Record
max_device = ATOM_MAX_SUPPORTED_DEVICE_INFO;
for (i = 0; i < max_device; i++) {
- ATOM_CONNECTOR_INFO_I2C ci =
- supported_devices->info.asConnInfo[i];
+ ATOM_CONNECTOR_INFO_I2C ci;
+
+ if (frev > 1)
+ ci = supported_devices->info_2d1.asConnInfo[i];
+ else
+ ci = supported_devices->info.asConnInfo[i];
bios_connectors[i].valid = false;
static const uint32_t formats_cluster[] = {
DRM_FORMAT_XRGB2101010,
- DRM_FORMAT_ARGB2101010,
DRM_FORMAT_XBGR2101010,
- DRM_FORMAT_ABGR2101010,
DRM_FORMAT_XRGB8888,
DRM_FORMAT_ARGB8888,
DRM_FORMAT_XBGR8888,
entity->guilty = guilty;
entity->num_sched_list = num_sched_list;
entity->priority = priority;
+ /*
+ * It's perfectly valid to initialize an entity without having a valid
+ * scheduler attached. It's just not valid to use the scheduler before it
+ * is initialized itself.
+ */
entity->sched_list = num_sched_list > 1 ? sched_list : NULL;
RCU_INIT_POINTER(entity->last_scheduled, NULL);
RB_CLEAR_NODE(&entity->rb_tree_node);
- if (!sched_list[0]->sched_rq) {
- /* Warn drivers not to do this and to fix their DRM
- * calling order.
+ if (num_sched_list && !sched_list[0]->sched_rq) {
+ /* Since every entry covered by num_sched_list
+ * should be non-NULL and therefore we warn drivers
+ * not to do this and to fix their DRM calling order.
*/
pr_warn("%s: called with uninitialized scheduler\n", __func__);
} else if (num_sched_list) {
enum ttm_caching caching,
unsigned int order)
{
- if (pool->use_dma_alloc || pool->nid != NUMA_NO_NODE)
+ if (pool->use_dma_alloc)
return &pool->caching[caching].orders[order];
#ifdef CONFIG_X86
switch (caching) {
case ttm_write_combined:
+ if (pool->nid != NUMA_NO_NODE)
+ return &pool->caching[caching].orders[order];
+
if (pool->use_dma32)
return &global_dma32_write_combined[order];
return &global_write_combined[order];
case ttm_uncached:
+ if (pool->nid != NUMA_NO_NODE)
+ return &pool->caching[caching].orders[order];
+
if (pool->use_dma32)
return &global_dma32_uncached[order];
pool->use_dma_alloc = use_dma_alloc;
pool->use_dma32 = use_dma32;
- if (use_dma_alloc || nid != NUMA_NO_NODE) {
- for (i = 0; i < TTM_NUM_CACHING_TYPES; ++i)
- for (j = 0; j < NR_PAGE_ORDERS; ++j)
- ttm_pool_type_init(&pool->caching[i].orders[j],
- pool, i, j);
+ for (i = 0; i < TTM_NUM_CACHING_TYPES; ++i) {
+ for (j = 0; j < NR_PAGE_ORDERS; ++j) {
+ struct ttm_pool_type *pt;
+
+ /* Initialize only pool types which are actually used */
+ pt = ttm_pool_select_type(pool, i, j);
+ if (pt != &pool->caching[i].orders[j])
+ continue;
+
+ ttm_pool_type_init(pt, pool, i, j);
+ }
}
}
EXPORT_SYMBOL(ttm_pool_init);
{
unsigned int i, j;
- if (pool->use_dma_alloc || pool->nid != NUMA_NO_NODE) {
- for (i = 0; i < TTM_NUM_CACHING_TYPES; ++i)
- for (j = 0; j < NR_PAGE_ORDERS; ++j)
- ttm_pool_type_fini(&pool->caching[i].orders[j]);
+ for (i = 0; i < TTM_NUM_CACHING_TYPES; ++i) {
+ for (j = 0; j < NR_PAGE_ORDERS; ++j) {
+ struct ttm_pool_type *pt;
+
+ pt = ttm_pool_select_type(pool, i, j);
+ if (pt != &pool->caching[i].orders[j])
+ continue;
+
+ ttm_pool_type_fini(pt);
+ }
}
/* We removed the pool types from the LRU, but we need to also make sure
struct v3d_file_priv *file = v3d->bin_job->base.file->driver_priv;
u64 runtime = local_clock() - file->start_ns[V3D_BIN];
- file->enabled_ns[V3D_BIN] += local_clock() - file->start_ns[V3D_BIN];
file->jobs_sent[V3D_BIN]++;
v3d->queue[V3D_BIN].jobs_sent++;
struct v3d_file_priv *file = v3d->render_job->base.file->driver_priv;
u64 runtime = local_clock() - file->start_ns[V3D_RENDER];
- file->enabled_ns[V3D_RENDER] += local_clock() - file->start_ns[V3D_RENDER];
file->jobs_sent[V3D_RENDER]++;
v3d->queue[V3D_RENDER].jobs_sent++;
struct v3d_file_priv *file = v3d->csd_job->base.file->driver_priv;
u64 runtime = local_clock() - file->start_ns[V3D_CSD];
- file->enabled_ns[V3D_CSD] += local_clock() - file->start_ns[V3D_CSD];
file->jobs_sent[V3D_CSD]++;
v3d->queue[V3D_CSD].jobs_sent++;
struct v3d_file_priv *file = v3d->tfu_job->base.file->driver_priv;
u64 runtime = local_clock() - file->start_ns[V3D_TFU];
- file->enabled_ns[V3D_TFU] += local_clock() - file->start_ns[V3D_TFU];
file->jobs_sent[V3D_TFU]++;
v3d->queue[V3D_TFU].jobs_sent++;
.no_wait_gpu = false
};
u32 j, initial_line = dst_offset / dst_stride;
- struct vmw_bo_blit_line_data d;
+ struct vmw_bo_blit_line_data d = {0};
int ret = 0;
+ struct page **dst_pages = NULL;
+ struct page **src_pages = NULL;
/* Buffer objects need to be either pinned or reserved: */
if (!(dst->pin_count))
return ret;
}
+ if (!src->ttm->pages && src->ttm->sg) {
+ src_pages = kvmalloc_array(src->ttm->num_pages,
+ sizeof(struct page *), GFP_KERNEL);
+ if (!src_pages)
+ return -ENOMEM;
+ ret = drm_prime_sg_to_page_array(src->ttm->sg, src_pages,
+ src->ttm->num_pages);
+ if (ret)
+ goto out;
+ }
+ if (!dst->ttm->pages && dst->ttm->sg) {
+ dst_pages = kvmalloc_array(dst->ttm->num_pages,
+ sizeof(struct page *), GFP_KERNEL);
+ if (!dst_pages) {
+ ret = -ENOMEM;
+ goto out;
+ }
+ ret = drm_prime_sg_to_page_array(dst->ttm->sg, dst_pages,
+ dst->ttm->num_pages);
+ if (ret)
+ goto out;
+ }
+
d.mapped_dst = 0;
d.mapped_src = 0;
d.dst_addr = NULL;
d.src_addr = NULL;
- d.dst_pages = dst->ttm->pages;
- d.src_pages = src->ttm->pages;
+ d.dst_pages = dst->ttm->pages ? dst->ttm->pages : dst_pages;
+ d.src_pages = src->ttm->pages ? src->ttm->pages : src_pages;
d.dst_num_pages = PFN_UP(dst->resource->size);
d.src_num_pages = PFN_UP(src->resource->size);
d.dst_prot = ttm_io_prot(dst, dst->resource, PAGE_KERNEL);
kunmap_atomic(d.src_addr);
if (d.dst_addr)
kunmap_atomic(d.dst_addr);
+ if (src_pages)
+ kvfree(src_pages);
+ if (dst_pages)
+ kvfree(dst_pages);
return ret;
}
{
struct ttm_operation_ctx ctx = {
.interruptible = params->bo_type != ttm_bo_type_kernel,
- .no_wait_gpu = false
+ .no_wait_gpu = false,
+ .resv = params->resv,
};
struct ttm_device *bdev = &dev_priv->bdev;
struct drm_device *vdev = &dev_priv->drm;
vmw_bo_placement_set(vmw_bo, params->domain, params->busy_domain);
ret = ttm_bo_init_reserved(bdev, &vmw_bo->tbo, params->bo_type,
- &vmw_bo->placement, 0, &ctx, NULL,
- NULL, destroy);
+ &vmw_bo->placement, 0, &ctx,
+ params->sg, params->resv, destroy);
if (unlikely(ret))
return ret;
enum ttm_bo_type bo_type;
size_t size;
bool pin;
+ struct dma_resv *resv;
+ struct sg_table *sg;
};
/**
[vmw_dma_map_populate] = "Caching DMA mappings.",
[vmw_dma_map_bind] = "Giving up DMA mappings early."};
- /* TTM currently doesn't fully support SEV encryption. */
- if (cc_platform_has(CC_ATTR_MEM_ENCRYPT))
- return -EINVAL;
-
- if (vmw_force_coherent)
+ /*
+ * When running with SEV we always want dma mappings, because
+ * otherwise ttm tt pool pages will bounce through swiotlb running
+ * out of available space.
+ */
+ if (vmw_force_coherent || cc_platform_has(CC_ATTR_MEM_ENCRYPT))
dev_priv->map_mode = vmw_dma_alloc_coherent;
else if (vmw_restrict_iommu)
dev_priv->map_mode = vmw_dma_map_bind;
root, "system_ttm");
ttm_resource_manager_create_debugfs(ttm_manager_type(&vmw->bdev, TTM_PL_VRAM),
root, "vram_ttm");
- ttm_resource_manager_create_debugfs(ttm_manager_type(&vmw->bdev, VMW_PL_GMR),
- root, "gmr_ttm");
- ttm_resource_manager_create_debugfs(ttm_manager_type(&vmw->bdev, VMW_PL_MOB),
- root, "mob_ttm");
- ttm_resource_manager_create_debugfs(ttm_manager_type(&vmw->bdev, VMW_PL_SYSTEM),
- root, "system_mob_ttm");
+ if (vmw->has_gmr)
+ ttm_resource_manager_create_debugfs(ttm_manager_type(&vmw->bdev, VMW_PL_GMR),
+ root, "gmr_ttm");
+ if (vmw->has_mob) {
+ ttm_resource_manager_create_debugfs(ttm_manager_type(&vmw->bdev, VMW_PL_MOB),
+ root, "mob_ttm");
+ ttm_resource_manager_create_debugfs(ttm_manager_type(&vmw->bdev, VMW_PL_SYSTEM),
+ root, "system_mob_ttm");
+ }
}
static int vmwgfx_pm_notifier(struct notifier_block *nb, unsigned long val,
.prime_fd_to_handle = vmw_prime_fd_to_handle,
.prime_handle_to_fd = vmw_prime_handle_to_fd,
+ .gem_prime_import_sg_table = vmw_prime_import_sg_table,
.fops = &vmwgfx_driver_fops,
.name = VMWGFX_DRIVER_NAME,
struct drm_file *file_priv,
uint32_t handle, uint32_t flags,
int *prime_fd);
+struct drm_gem_object *vmw_prime_import_sg_table(struct drm_device *dev,
+ struct dma_buf_attachment *attach,
+ struct sg_table *table);
/*
* MemoryOBject management - vmwgfx_mob.c
return ret;
}
+struct drm_gem_object *vmw_prime_import_sg_table(struct drm_device *dev,
+ struct dma_buf_attachment *attach,
+ struct sg_table *table)
+{
+ int ret;
+ struct vmw_private *dev_priv = vmw_priv(dev);
+ struct drm_gem_object *gem = NULL;
+ struct vmw_bo *vbo;
+ struct vmw_bo_params params = {
+ .domain = (dev_priv->has_mob) ? VMW_BO_DOMAIN_SYS : VMW_BO_DOMAIN_VRAM,
+ .busy_domain = VMW_BO_DOMAIN_SYS,
+ .bo_type = ttm_bo_type_sg,
+ .size = attach->dmabuf->size,
+ .pin = false,
+ .resv = attach->dmabuf->resv,
+ .sg = table,
+
+ };
+
+ dma_resv_lock(params.resv, NULL);
+
+ ret = vmw_bo_create(dev_priv, ¶ms, &vbo);
+ if (ret != 0)
+ goto out_no_bo;
+
+ vbo->tbo.base.funcs = &vmw_gem_object_funcs;
+
+ gem = &vbo->tbo.base;
+out_no_bo:
+ dma_resv_unlock(params.resv);
+ return gem;
+}
int vmw_gem_object_create_ioctl(struct drm_device *dev, void *data,
struct drm_file *filp)
int vmw_du_crtc_atomic_check(struct drm_crtc *crtc,
struct drm_atomic_state *state)
{
+ struct vmw_private *vmw = vmw_priv(crtc->dev);
struct drm_crtc_state *new_state = drm_atomic_get_new_crtc_state(state,
crtc);
struct vmw_display_unit *du = vmw_crtc_to_du(new_state->crtc);
bool has_primary = new_state->plane_mask &
drm_plane_mask(crtc->primary);
- /* We always want to have an active plane with an active CRTC */
- if (has_primary != new_state->enable)
- return -EINVAL;
+ /*
+ * This is fine in general, but broken userspace might expect
+ * some actual rendering so give a clue as why it's blank.
+ */
+ if (new_state->enable && !has_primary)
+ drm_dbg_driver(&vmw->drm,
+ "CRTC without a primary plane will be blank.\n");
if (new_state->connector_mask != connector_mask &&
static const uint32_t __maybe_unused vmw_primary_plane_formats[] = {
- DRM_FORMAT_XRGB1555,
- DRM_FORMAT_RGB565,
DRM_FORMAT_XRGB8888,
DRM_FORMAT_ARGB8888,
+ DRM_FORMAT_RGB565,
+ DRM_FORMAT_XRGB1555,
};
static const uint32_t __maybe_unused vmw_cursor_plane_formats[] = {
int fd, u32 *handle)
{
struct ttm_object_file *tfile = vmw_fpriv(file_priv)->tfile;
+ int ret = ttm_prime_fd_to_handle(tfile, fd, handle);
- return ttm_prime_fd_to_handle(tfile, fd, handle);
+ if (ret)
+ ret = drm_gem_prime_fd_to_handle(dev, file_priv, fd, handle);
+
+ return ret;
}
int vmw_prime_handle_to_fd(struct drm_device *dev,
int *prime_fd)
{
struct ttm_object_file *tfile = vmw_fpriv(file_priv)->tfile;
- return ttm_prime_handle_to_fd(tfile, handle, flags, prime_fd);
+ int ret;
+
+ if (handle > VMWGFX_NUM_MOB)
+ ret = ttm_prime_handle_to_fd(tfile, handle, flags, prime_fd);
+ else
+ ret = drm_gem_prime_handle_to_fd(dev, file_priv, handle, flags, prime_fd);
+
+ return ret;
}
switch (dev_priv->map_mode) {
case vmw_dma_map_bind:
case vmw_dma_map_populate:
- vsgt->sgt = &vmw_tt->sgt;
- ret = sg_alloc_table_from_pages_segment(
- &vmw_tt->sgt, vsgt->pages, vsgt->num_pages, 0,
- (unsigned long)vsgt->num_pages << PAGE_SHIFT,
- dma_get_max_seg_size(dev_priv->drm.dev), GFP_KERNEL);
- if (ret)
- goto out_sg_alloc_fail;
+ if (vmw_tt->dma_ttm.page_flags & TTM_TT_FLAG_EXTERNAL) {
+ vsgt->sgt = vmw_tt->dma_ttm.sg;
+ } else {
+ vsgt->sgt = &vmw_tt->sgt;
+ ret = sg_alloc_table_from_pages_segment(&vmw_tt->sgt,
+ vsgt->pages, vsgt->num_pages, 0,
+ (unsigned long)vsgt->num_pages << PAGE_SHIFT,
+ dma_get_max_seg_size(dev_priv->drm.dev),
+ GFP_KERNEL);
+ if (ret)
+ goto out_sg_alloc_fail;
+ }
ret = vmw_ttm_map_for_dma(vmw_tt);
if (unlikely(ret != 0))
return 0;
out_map_fail:
- sg_free_table(vmw_tt->vsgt.sgt);
- vmw_tt->vsgt.sgt = NULL;
+ drm_warn(&dev_priv->drm, "VSG table map failed!");
+ sg_free_table(vsgt->sgt);
+ vsgt->sgt = NULL;
out_sg_alloc_fail:
return ret;
}
static int vmw_ttm_populate(struct ttm_device *bdev,
struct ttm_tt *ttm, struct ttm_operation_ctx *ctx)
{
- int ret;
+ bool external = (ttm->page_flags & TTM_TT_FLAG_EXTERNAL) != 0;
- /* TODO: maybe completely drop this ? */
if (ttm_tt_is_populated(ttm))
return 0;
- ret = ttm_pool_alloc(&bdev->pool, ttm, ctx);
+ if (external && ttm->sg)
+ return drm_prime_sg_to_dma_addr_array(ttm->sg,
+ ttm->dma_address,
+ ttm->num_pages);
- return ret;
+ return ttm_pool_alloc(&bdev->pool, ttm, ctx);
}
static void vmw_ttm_unpopulate(struct ttm_device *bdev,
{
struct vmw_ttm_tt *vmw_tt = container_of(ttm, struct vmw_ttm_tt,
dma_ttm);
+ bool external = (ttm->page_flags & TTM_TT_FLAG_EXTERNAL) != 0;
+
+ if (external)
+ return;
vmw_ttm_unbind(bdev, ttm);
{
struct vmw_ttm_tt *vmw_be;
int ret;
+ bool external = bo->type == ttm_bo_type_sg;
vmw_be = kzalloc(sizeof(*vmw_be), GFP_KERNEL);
if (!vmw_be)
vmw_be->dev_priv = vmw_priv_from_ttm(bo->bdev);
vmw_be->mob = NULL;
- if (vmw_be->dev_priv->map_mode == vmw_dma_alloc_coherent)
+ if (external)
+ page_flags |= TTM_TT_FLAG_EXTERNAL | TTM_TT_FLAG_EXTERNAL_MAPPABLE;
+
+ if (vmw_be->dev_priv->map_mode == vmw_dma_alloc_coherent || external)
ret = ttm_sg_tt_init(&vmw_be->dma_ttm, bo, page_flags,
ttm_cached);
else
-Ddrm_i915_gem_object=xe_bo \
-Ddrm_i915_private=xe_device
-CFLAGS_i915-display/intel_fbdev.o = $(call cc-disable-warning, override-init)
-CFLAGS_i915-display/intel_display_device.o = $(call cc-disable-warning, override-init)
+CFLAGS_i915-display/intel_fbdev.o = -Wno-override-init
+CFLAGS_i915-display/intel_display_device.o = -Wno-override-init
# Rule to build SOC code shared with i915
$(obj)/i915-soc/%.o: $(srctree)/drivers/gpu/drm/i915/soc/%.c FORCE
ret = ttm_bo_reserve(&bo->ttm, true, false, NULL);
if (ret)
- return ret;
+ goto err;
if (!(bo->flags & XE_BO_SCANOUT_BIT)) {
/*
*/
if (XE_IOCTL_DBG(i915, !list_empty(&bo->ttm.base.gpuva.list))) {
ttm_bo_unreserve(&bo->ttm);
- return -EINVAL;
+ ret = -EINVAL;
+ goto err;
}
bo->flags |= XE_BO_SCANOUT_BIT;
}
ttm_bo_unreserve(&bo->ttm);
+ return 0;
+err:
+ xe_bo_put(bo);
return ret;
}
xe->display.hotplug.dp_wq = alloc_ordered_workqueue("xe-dp", 0);
drmm_mutex_init(&xe->drm, &xe->sb_lock);
- drmm_mutex_init(&xe->drm, &xe->display.backlight.lock);
- drmm_mutex_init(&xe->drm, &xe->display.audio.mutex);
- drmm_mutex_init(&xe->drm, &xe->display.wm.wm_mutex);
- drmm_mutex_init(&xe->drm, &xe->display.pps.mutex);
- drmm_mutex_init(&xe->drm, &xe->display.hdcp.hdcp_mutex);
xe->enabled_irq_mask = ~0;
err = drmm_add_action_or_reset(&xe->drm, display_destroy, NULL);
#define RING_EXECLIST_STATUS_LO(base) XE_REG((base) + 0x234)
#define RING_EXECLIST_STATUS_HI(base) XE_REG((base) + 0x234 + 4)
-#define RING_CONTEXT_CONTROL(base) XE_REG((base) + 0x244)
+#define RING_CONTEXT_CONTROL(base) XE_REG((base) + 0x244, XE_REG_OPTION_MASKED)
#define CTX_CTRL_INHIBIT_SYN_CTX_SWITCH REG_BIT(3)
#define CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT REG_BIT(0)
.mem_type = XE_PL_TT,
};
*c += 1;
-
- if (bo->props.preferred_mem_type == XE_BO_PROPS_INVALID)
- bo->props.preferred_mem_type = XE_PL_TT;
}
}
}
places[*c] = place;
*c += 1;
-
- if (bo->props.preferred_mem_type == XE_BO_PROPS_INVALID)
- bo->props.preferred_mem_type = mem_type;
}
static void try_add_vram(struct xe_device *xe, struct xe_bo *bo,
u32 bo_flags, u32 *c)
{
- if (bo->props.preferred_gt == XE_GT1) {
- if (bo_flags & XE_BO_CREATE_VRAM1_BIT)
- add_vram(xe, bo, bo->placements, bo_flags, XE_PL_VRAM1, c);
- if (bo_flags & XE_BO_CREATE_VRAM0_BIT)
- add_vram(xe, bo, bo->placements, bo_flags, XE_PL_VRAM0, c);
- } else {
- if (bo_flags & XE_BO_CREATE_VRAM0_BIT)
- add_vram(xe, bo, bo->placements, bo_flags, XE_PL_VRAM0, c);
- if (bo_flags & XE_BO_CREATE_VRAM1_BIT)
- add_vram(xe, bo, bo->placements, bo_flags, XE_PL_VRAM1, c);
- }
+ if (bo_flags & XE_BO_CREATE_VRAM0_BIT)
+ add_vram(xe, bo, bo->placements, bo_flags, XE_PL_VRAM0, c);
+ if (bo_flags & XE_BO_CREATE_VRAM1_BIT)
+ add_vram(xe, bo, bo->placements, bo_flags, XE_PL_VRAM1, c);
}
static void try_add_stolen(struct xe_device *xe, struct xe_bo *bo,
{
u32 c = 0;
- bo->props.preferred_mem_type = XE_BO_PROPS_INVALID;
-
- /* The order of placements should indicate preferred location */
-
- if (bo->props.preferred_mem_class == DRM_XE_MEM_REGION_CLASS_SYSMEM) {
- try_add_system(xe, bo, bo_flags, &c);
- try_add_vram(xe, bo, bo_flags, &c);
- } else {
- try_add_vram(xe, bo, bo_flags, &c);
- try_add_system(xe, bo, bo_flags, &c);
- }
+ try_add_vram(xe, bo, bo_flags, &c);
+ try_add_system(xe, bo, bo_flags, &c);
try_add_stolen(xe, bo, bo_flags, &c);
if (!c)
}
}
-static bool should_migrate_to_system(struct xe_bo *bo)
-{
- struct xe_device *xe = xe_bo_device(bo);
-
- return xe_device_in_fault_mode(xe) && bo->props.cpu_atomic;
-}
-
static vm_fault_t xe_gem_fault(struct vm_fault *vmf)
{
struct ttm_buffer_object *tbo = vmf->vma->vm_private_data;
struct xe_bo *bo = ttm_to_xe_bo(tbo);
bool needs_rpm = bo->flags & XE_BO_CREATE_VRAM_MASK;
vm_fault_t ret;
- int idx, r = 0;
+ int idx;
if (needs_rpm)
xe_device_mem_access_get(xe);
if (drm_dev_enter(ddev, &idx)) {
trace_xe_bo_cpu_fault(bo);
- if (should_migrate_to_system(bo)) {
- r = xe_bo_migrate(bo, XE_PL_TT);
- if (r == -EBUSY || r == -ERESTARTSYS || r == -EINTR)
- ret = VM_FAULT_NOPAGE;
- else if (r)
- ret = VM_FAULT_SIGBUS;
- }
- if (!ret)
- ret = ttm_bo_vm_fault_reserved(vmf,
- vmf->vma->vm_page_prot,
- TTM_BO_VM_NUM_PREFAULT);
+ ret = ttm_bo_vm_fault_reserved(vmf, vmf->vma->vm_page_prot,
+ TTM_BO_VM_NUM_PREFAULT);
drm_dev_exit(idx);
} else {
ret = ttm_bo_vm_dummy_page(vmf, vmf->vma->vm_page_prot);
bo->flags = flags;
bo->cpu_caching = cpu_caching;
bo->ttm.base.funcs = &xe_gem_object_funcs;
- bo->props.preferred_mem_class = XE_BO_PROPS_INVALID;
- bo->props.preferred_gt = XE_BO_PROPS_INVALID;
- bo->props.preferred_mem_type = XE_BO_PROPS_INVALID;
bo->ttm.priority = XE_BO_PRIORITY_NORMAL;
INIT_LIST_HEAD(&bo->pinned_link);
#ifdef CONFIG_PROC_FS
*/
struct list_head client_link;
#endif
- /** @props: BO user controlled properties */
- struct {
- /** @preferred_mem: preferred memory class for this BO */
- s16 preferred_mem_class;
- /** @prefered_gt: preferred GT for this BO */
- s16 preferred_gt;
- /** @preferred_mem_type: preferred memory type */
- s32 preferred_mem_type;
- /**
- * @cpu_atomic: the CPU expects to do atomics operations to
- * this BO
- */
- bool cpu_atomic;
- /**
- * @device_atomic: the device expects to do atomics operations
- * to this BO
- */
- bool device_atomic;
- } props;
/** @freed: List node for delayed put. */
struct llist_node freed;
/** @created: Whether the bo has passed initial creation */
{
struct xe_device *xe = to_xe_device(dev);
+ if (xe->preempt_fence_wq)
+ destroy_workqueue(xe->preempt_fence_wq);
+
if (xe->ordered_wq)
destroy_workqueue(xe->ordered_wq);
INIT_LIST_HEAD(&xe->pinned.external_vram);
INIT_LIST_HEAD(&xe->pinned.evicted);
+ xe->preempt_fence_wq = alloc_ordered_workqueue("xe-preempt-fence-wq", 0);
xe->ordered_wq = alloc_ordered_workqueue("xe-ordered-wq", 0);
xe->unordered_wq = alloc_workqueue("xe-unordered-wq", 0, 0);
- if (!xe->ordered_wq || !xe->unordered_wq) {
+ if (!xe->ordered_wq || !xe->unordered_wq ||
+ !xe->preempt_fence_wq) {
+ /*
+ * Cleanup done in xe_device_destroy via
+ * drmm_add_action_or_reset register above
+ */
drm_err(&xe->drm, "Failed to allocate xe workqueues\n");
err = -ENOMEM;
goto err;
static inline struct xe_gt *xe_tile_get_gt(struct xe_tile *tile, u8 gt_id)
{
- if (drm_WARN_ON(&tile_to_xe(tile)->drm, gt_id > XE_MAX_GT_PER_TILE))
+ if (drm_WARN_ON(&tile_to_xe(tile)->drm, gt_id >= XE_MAX_GT_PER_TILE))
gt_id = 0;
return gt_id ? tile->media_gt : tile->primary_gt;
if (MEDIA_VER(xe) >= 13) {
gt = xe_tile_get_gt(root_tile, gt_id);
} else {
- if (drm_WARN_ON(&xe->drm, gt_id > XE_MAX_TILES_PER_DEVICE))
+ if (drm_WARN_ON(&xe->drm, gt_id >= XE_MAX_TILES_PER_DEVICE))
gt_id = 0;
gt = xe->tiles[gt_id].primary_gt;
/** @ufence_wq: user fence wait queue */
wait_queue_head_t ufence_wq;
+ /** @preempt_fence_wq: used to serialize preempt fences */
+ struct workqueue_struct *preempt_fence_wq;
+
/** @ordered_wq: used to serialize compute mode resume */
struct workqueue_struct *ordered_wq;
* Unlock all
*/
+/*
+ * Add validation and rebinding to the drm_exec locking loop, since both can
+ * trigger eviction which may require sleeping dma_resv locks.
+ */
static int xe_exec_fn(struct drm_gpuvm_exec *vm_exec)
{
struct xe_vm *vm = container_of(vm_exec->vm, struct xe_vm, gpuvm);
- struct drm_gem_object *obj;
- unsigned long index;
- int num_fences;
- int ret;
-
- ret = drm_gpuvm_validate(vm_exec->vm, &vm_exec->exec);
- if (ret)
- return ret;
-
- /*
- * 1 fence slot for the final submit, and 1 more for every per-tile for
- * GPU bind and 1 extra for CPU bind. Note that there are potentially
- * many vma per object/dma-resv, however the fence slot will just be
- * re-used, since they are largely the same timeline and the seqno
- * should be in order. In the case of CPU bind there is dummy fence used
- * for all CPU binds, so no need to have a per-tile slot for that.
- */
- num_fences = 1 + 1 + vm->xe->info.tile_count;
- /*
- * We don't know upfront exactly how many fence slots we will need at
- * the start of the exec, since the TTM bo_validate above can consume
- * numerous fence slots. Also due to how the dma_resv_reserve_fences()
- * works it only ensures that at least that many fence slots are
- * available i.e if there are already 10 slots available and we reserve
- * two more, it can just noop without reserving anything. With this it
- * is quite possible that TTM steals some of the fence slots and then
- * when it comes time to do the vma binding and final exec stage we are
- * lacking enough fence slots, leading to some nasty BUG_ON() when
- * adding the fences. Hence just add our own fences here, after the
- * validate stage.
- */
- drm_exec_for_each_locked_object(&vm_exec->exec, index, obj) {
- ret = dma_resv_reserve_fences(obj->resv, num_fences);
- if (ret)
- return ret;
- }
-
- return 0;
+ /* The fence slot added here is intended for the exec sched job. */
+ return xe_vm_validate_rebind(vm, &vm_exec->exec, 1);
}
int xe_exec_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
struct drm_exec *exec = &vm_exec.exec;
u32 i, num_syncs = 0, num_ufence = 0;
struct xe_sched_job *job;
- struct dma_fence *rebind_fence;
struct xe_vm *vm;
bool write_locked, skip_retry = false;
ktime_t end = 0;
goto err_exec;
}
- /*
- * Rebind any invalidated userptr or evicted BOs in the VM, non-compute
- * VM mode only.
- */
- rebind_fence = xe_vm_rebind(vm, false);
- if (IS_ERR(rebind_fence)) {
- err = PTR_ERR(rebind_fence);
- goto err_put_job;
- }
-
- /*
- * We store the rebind_fence in the VM so subsequent execs don't get
- * scheduled before the rebinds of userptrs / evicted BOs is complete.
- */
- if (rebind_fence) {
- dma_fence_put(vm->rebind_fence);
- vm->rebind_fence = rebind_fence;
- }
- if (vm->rebind_fence) {
- if (test_bit(DMA_FENCE_FLAG_SIGNALED_BIT,
- &vm->rebind_fence->flags)) {
- dma_fence_put(vm->rebind_fence);
- vm->rebind_fence = NULL;
- } else {
- dma_fence_get(vm->rebind_fence);
- err = drm_sched_job_add_dependency(&job->drm,
- vm->rebind_fence);
- if (err)
- goto err_put_job;
- }
- }
-
- /* Wait behind munmap style rebinds */
+ /* Wait behind rebinds */
if (!xe_vm_in_lr_mode(vm)) {
err = drm_sched_job_add_resv_dependencies(&job->drm,
xe_vm_resv(vm),
{
u32 idx;
- if (eci.engine_class > ARRAY_SIZE(user_to_xe_engine_class))
+ if (eci.engine_class >= ARRAY_SIZE(user_to_xe_engine_class))
return NULL;
if (eci.gt_id >= xe->info.gt_count)
const struct xe_ring_ops *ring_ops;
/** @entity: DRM sched entity for this exec queue (1 to 1 relationship) */
struct drm_sched_entity *entity;
+ /**
+ * @tlb_flush_seqno: The seqno of the last rebind tlb flush performed
+ * Protected by @vm's resv. Unused if @vm == NULL.
+ */
+ u64 tlb_flush_seqno;
/** @lrc: logical ring context for this exec queue */
struct xe_lrc lrc[];
};
err);
/* Initialize CCS mode sysfs after early initialization of HW engines */
- xe_gt_ccs_mode_sysfs_init(gt);
+ err = xe_gt_ccs_mode_sysfs_init(gt);
+ if (err)
+ goto err_force_wake;
/*
* Stash hardware-reported version. Since this register does not exist
* and it is expected that there are no open drm clients while doing so.
* The number of available compute slices is exposed to user through a per-gt
* 'num_cslices' sysfs interface.
+ *
+ * Returns: Returns error value for failure and 0 for success.
*/
-void xe_gt_ccs_mode_sysfs_init(struct xe_gt *gt)
+int xe_gt_ccs_mode_sysfs_init(struct xe_gt *gt)
{
struct xe_device *xe = gt_to_xe(gt);
int err;
if (!xe_gt_ccs_mode_enabled(gt))
- return;
+ return 0;
err = sysfs_create_files(gt->sysfs, gt_ccs_mode_attrs);
- if (err) {
- drm_warn(&xe->drm, "Sysfs creation for ccs_mode failed err: %d\n", err);
- return;
- }
+ if (err)
+ return err;
- err = drmm_add_action_or_reset(&xe->drm, xe_gt_ccs_mode_sysfs_fini, gt);
- if (err) {
- sysfs_remove_files(gt->sysfs, gt_ccs_mode_attrs);
- drm_warn(&xe->drm, "%s: drmm_add_action_or_reset failed, err: %d\n",
- __func__, err);
- }
+ return drmm_add_action_or_reset(&xe->drm, xe_gt_ccs_mode_sysfs_fini, gt);
}
#include "xe_platform_types.h"
void xe_gt_apply_ccs_mode(struct xe_gt *gt);
-void xe_gt_ccs_mode_sysfs_init(struct xe_gt *gt);
+int xe_gt_ccs_mode_sysfs_init(struct xe_gt *gt);
static inline bool xe_gt_ccs_mode_enabled(const struct xe_gt *gt)
{
{
struct xe_bo *bo = xe_vma_bo(vma);
struct xe_vm *vm = xe_vma_vm(vma);
- unsigned int num_shared = 2; /* slots for bind + move */
int err;
- err = xe_vm_prepare_vma(exec, vma, num_shared);
+ err = xe_vm_lock_vma(exec, vma);
if (err)
return err;
INIT_LIST_HEAD(>->tlb_invalidation.pending_fences);
spin_lock_init(>->tlb_invalidation.pending_lock);
spin_lock_init(>->tlb_invalidation.lock);
- gt->tlb_invalidation.fence_context = dma_fence_context_alloc(1);
INIT_DELAYED_WORK(>->tlb_invalidation.fence_tdr,
xe_gt_tlb_fence_timeout);
* xe_gt_tlb_fence_timeout after the timeut interval is over.
*/
struct delayed_work fence_tdr;
- /** @tlb_invalidation.fence_context: context for TLB invalidation fences */
- u64 fence_context;
- /**
- * @tlb_invalidation.fence_seqno: seqno to TLB invalidation fences, protected by
- * tlb_invalidation.lock
- */
- u32 fence_seqno;
/** @tlb_invalidation.lock: protects TLB invalidation fences */
spinlock_t lock;
} tlb_invalidation;
adj_len);
break;
case XE_GUC_ACTION_GUC2PF_RELAY_FROM_VF:
- ret = xe_guc_relay_process_guc2pf(&guc->relay, payload, adj_len);
+ ret = xe_guc_relay_process_guc2pf(&guc->relay, hxg, hxg_len);
break;
case XE_GUC_ACTION_GUC2VF_RELAY_FROM_PF:
- ret = xe_guc_relay_process_guc2vf(&guc->relay, payload, adj_len);
+ ret = xe_guc_relay_process_guc2vf(&guc->relay, hxg, hxg_len);
break;
default:
drm_err(&xe->drm, "unexpected action 0x%04x\n", action);
init_waitqueue_head(&ge->suspend_wait);
timeout = (q->vm && xe_vm_in_lr_mode(q->vm)) ? MAX_SCHEDULE_TIMEOUT :
- q->sched_props.job_timeout_ms;
+ msecs_to_jiffies(q->sched_props.job_timeout_ms);
err = xe_sched_init(&ge->sched, &drm_sched_ops, &xe_sched_ops,
get_submit_wq(guc),
q->lrc[0].ring.size / MAX_JOB_SIZE_BYTES, 64,
struct xe_gt *gt = huc_to_gt(huc);
struct xe_device *xe = gt_to_xe(gt);
struct xe_bo *bo;
- int err;
/* we use a single object for both input and output */
bo = xe_bo_create_pin_map(xe, gt_to_tile(gt), NULL,
huc->gsc_pkt = bo;
- err = drmm_add_action_or_reset(&xe->drm, free_gsc_pkt, huc);
- if (err) {
- free_gsc_pkt(&xe->drm, huc);
- return err;
- }
-
- return 0;
+ return drmm_add_action_or_reset(&xe->drm, free_gsc_pkt, huc);
}
int xe_huc_init(struct xe_huc *huc)
* As y can be < 2, we compute tau4 = (4 | x) << y
* and then add 2 when doing the final right shift to account for units
*/
- tau4 = ((1 << x_w) | x) << y;
+ tau4 = (u64)((1 << x_w) | x) << y;
/* val in hwmon interface units (millisec) */
out = mul_u64_u32_shr(tau4, SF_TIME, hwmon->scl_shift_time + x_w);
r = FIELD_PREP(PKG_MAX_WIN, PKG_MAX_WIN_DEFAULT);
x = REG_FIELD_GET(PKG_MAX_WIN_X, r);
y = REG_FIELD_GET(PKG_MAX_WIN_Y, r);
- tau4 = ((1 << x_w) | x) << y;
+ tau4 = (u64)((1 << x_w) | x) << y;
max_win = mul_u64_u32_shr(tau4, SF_TIME, hwmon->scl_shift_time + x_w);
if (val > max_win)
#define REG16(x) \
(((x) >> 9) | BIT(7) | BUILD_BUG_ON_ZERO(x >= 0x10000)), \
(((x) >> 2) & 0x7f)
-#define END 0
{
const u32 base = hwe->mmio_base;
REG16(0x274),
REG16(0x270),
- END
+ 0
};
static const u8 dg2_xcs_offsets[] = {
REG16(0x274),
REG16(0x270),
- END
+ 0
};
static const u8 gen12_rcs_offsets[] = {
REG(0x084),
NOP(1),
- END
+ 0
};
static const u8 xehp_rcs_offsets[] = {
LRI(1, 0),
REG(0x0c8),
- END
+ 0
};
static const u8 dg2_rcs_offsets[] = {
LRI(1, 0),
REG(0x0c8),
- END
+ 0
};
static const u8 mtl_rcs_offsets[] = {
LRI(1, 0),
REG(0x0c8),
- END
+ 0
};
#define XE2_CTX_COMMON \
LRI(1, 0), /* [0x47] */
REG(0x0c8), /* [0x48] R_PWR_CLK_STATE */
- END
+ 0
};
static const u8 xe2_bcs_offsets[] = {
REG16(0x200), /* [0x42] BCS_SWCTRL */
REG16(0x204), /* [0x44] BLIT_CCTL */
- END
+ 0
};
static const u8 xe2_xcs_offsets[] = {
XE2_CTX_COMMON,
- END
+ 0
};
-#undef END
#undef REG16
#undef REG
#undef LRI
static void set_context_control(u32 *regs, struct xe_hw_engine *hwe)
{
- regs[CTX_CONTEXT_CONTROL] = _MASKED_BIT_ENABLE(CTX_CTRL_INHIBIT_SYN_CTX_SWITCH) |
- _MASKED_BIT_DISABLE(CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT) |
- CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT;
+ regs[CTX_CONTEXT_CONTROL] = _MASKED_BIT_ENABLE(CTX_CTRL_INHIBIT_SYN_CTX_SWITCH |
+ CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT);
/* TODO: Timestamp */
}
if (vm->flags & XE_VM_FLAG_64K && level == 1)
flags = XE_PDE_64K;
- entry = vm->pt_ops->pde_encode_bo(bo, map_ofs + (level - 1) *
+ entry = vm->pt_ops->pde_encode_bo(bo, map_ofs + (u64)(level - 1) *
XE_PAGE_SIZE, pat_index);
xe_map_wr(xe, &bo->vmap, map_ofs + XE_PAGE_SIZE * level, u64,
entry | flags);
/* Write PDE's that point to our BO. */
for (i = 0; i < num_entries - num_level; i++) {
- entry = vm->pt_ops->pde_encode_bo(bo, i * XE_PAGE_SIZE,
+ entry = vm->pt_ops->pde_encode_bo(bo, (u64)i * XE_PAGE_SIZE,
pat_index);
xe_map_wr(xe, &bo->vmap, map_ofs + XE_PAGE_SIZE +
#define VM_SA_UPDATE_UNIT_SIZE (XE_PAGE_SIZE / NUM_VMUSA_UNIT_PER_PAGE)
#define NUM_VMUSA_WRITES_PER_UNIT (VM_SA_UPDATE_UNIT_SIZE / sizeof(u64))
drm_suballoc_manager_init(&m->vm_update_sa,
- (map_ofs / XE_PAGE_SIZE - NUM_KERNEL_PDE) *
+ (size_t)(map_ofs / XE_PAGE_SIZE - NUM_KERNEL_PDE) *
NUM_VMUSA_UNIT_PER_PAGE, 0);
m->pt_bo = bo;
struct xe_vm *vm = m->q->vm;
u16 pat_index;
u32 ptes;
- u64 ofs = at_pt * XE_PAGE_SIZE;
+ u64 ofs = (u64)at_pt * XE_PAGE_SIZE;
u64 cur_ofs;
/* Indirect access needs compression enabled uncached PAT index */
struct xe_exec_queue *q = pfence->q;
pfence->error = q->ops->suspend(q);
- queue_work(system_unbound_wq, &pfence->preempt_work);
+ queue_work(q->vm->xe->preempt_fence_wq, &pfence->preempt_work);
return true;
}
spin_lock_irq(>->tlb_invalidation.lock);
dma_fence_init(&ifence->base.base, &invalidation_fence_ops,
>->tlb_invalidation.lock,
- gt->tlb_invalidation.fence_context,
- ++gt->tlb_invalidation.fence_seqno);
+ dma_fence_context_alloc(1), 1);
spin_unlock_irq(>->tlb_invalidation.lock);
INIT_LIST_HEAD(&ifence->base.link);
err = xe_pt_prepare_bind(tile, vma, entries, &num_entries);
if (err)
goto err;
+
+ err = dma_resv_reserve_fences(xe_vm_resv(vm), 1);
+ if (!err && !xe_vma_has_no_bo(vma) && !xe_vma_bo(vma)->vm)
+ err = dma_resv_reserve_fences(xe_vma_bo(vma)->ttm.base.resv, 1);
+ if (err)
+ goto err;
+
xe_tile_assert(tile, num_entries <= ARRAY_SIZE(entries));
xe_vm_dbg_print_entries(tile_to_xe(tile), entries, num_entries);
* non-faulting LR, in particular on user-space batch buffer chaining,
* it needs to be done here.
*/
- if ((rebind && !xe_vm_in_lr_mode(vm) && !vm->batch_invalidate_tlb) ||
- (!rebind && xe_vm_has_scratch(vm) && xe_vm_in_preempt_fence_mode(vm))) {
+ if ((!rebind && xe_vm_has_scratch(vm) && xe_vm_in_preempt_fence_mode(vm))) {
ifence = kzalloc(sizeof(*ifence), GFP_KERNEL);
if (!ifence)
return ERR_PTR(-ENOMEM);
+ } else if (rebind && !xe_vm_in_lr_mode(vm)) {
+ /* We bump also if batch_invalidate_tlb is true */
+ vm->tlb_flush_seqno++;
}
rfence = kzalloc(sizeof(*rfence), GFP_KERNEL);
}
/* add shared fence now for pagetable delayed destroy */
- dma_resv_add_fence(xe_vm_resv(vm), fence, !rebind &&
+ dma_resv_add_fence(xe_vm_resv(vm), fence, rebind ||
last_munmap_rebind ?
DMA_RESV_USAGE_KERNEL :
DMA_RESV_USAGE_BOOKKEEP);
struct dma_fence *fence = NULL;
struct invalidation_fence *ifence;
struct xe_range_fence *rfence;
+ int err;
LLIST_HEAD(deferred);
xe_pt_calc_rfence_interval(vma, &unbind_pt_update, entries,
num_entries);
+ err = dma_resv_reserve_fences(xe_vm_resv(vm), 1);
+ if (!err && !xe_vma_has_no_bo(vma) && !xe_vma_bo(vma)->vm)
+ err = dma_resv_reserve_fences(xe_vma_bo(vma)->ttm.base.resv, 1);
+ if (err)
+ return ERR_PTR(err);
+
ifence = kzalloc(sizeof(*ifence), GFP_KERNEL);
if (!ifence)
return ERR_PTR(-ENOMEM);
return -EINVAL;
eci = &resp.eci;
- if (eci->gt_id > XE_MAX_GT_PER_TILE)
+ if (eci->gt_id >= XE_MAX_GT_PER_TILE)
return -EINVAL;
gt = xe_device_get_gt(xe, eci->gt_id);
{
u32 dw[MAX_JOB_SIZE_DW], i = 0;
u32 ppgtt_flag = get_ppgtt_flag(job);
- struct xe_vm *vm = job->q->vm;
struct xe_gt *gt = job->q->gt;
- if (vm && vm->batch_invalidate_tlb) {
+ if (job->ring_ops_flush_tlb) {
dw[i++] = preparser_disable(true);
i = emit_flush_imm_ggtt(xe_lrc_start_seqno_ggtt_addr(lrc),
seqno, true, dw, i);
struct xe_gt *gt = job->q->gt;
struct xe_device *xe = gt_to_xe(gt);
bool decode = job->q->class == XE_ENGINE_CLASS_VIDEO_DECODE;
- struct xe_vm *vm = job->q->vm;
dw[i++] = preparser_disable(true);
i = emit_aux_table_inv(gt, VE0_AUX_INV, dw, i);
}
- if (vm && vm->batch_invalidate_tlb)
+ if (job->ring_ops_flush_tlb)
i = emit_flush_imm_ggtt(xe_lrc_start_seqno_ggtt_addr(lrc),
seqno, true, dw, i);
dw[i++] = preparser_disable(false);
- if (!vm || !vm->batch_invalidate_tlb)
+ if (!job->ring_ops_flush_tlb)
i = emit_store_imm_ggtt(xe_lrc_start_seqno_ggtt_addr(lrc),
seqno, dw, i);
struct xe_gt *gt = job->q->gt;
struct xe_device *xe = gt_to_xe(gt);
bool lacks_render = !(gt->info.engine_mask & XE_HW_ENGINE_RCS_MASK);
- struct xe_vm *vm = job->q->vm;
u32 mask_flags = 0;
dw[i++] = preparser_disable(true);
mask_flags = PIPE_CONTROL_3D_ENGINE_FLAGS;
/* See __xe_pt_bind_vma() for a discussion on TLB invalidations. */
- i = emit_pipe_invalidate(mask_flags, vm && vm->batch_invalidate_tlb, dw, i);
+ i = emit_pipe_invalidate(mask_flags, job->ring_ops_flush_tlb, dw, i);
/* hsdes: 1809175790 */
if (has_aux_ccs(xe))
void xe_sched_job_arm(struct xe_sched_job *job)
{
+ struct xe_exec_queue *q = job->q;
+ struct xe_vm *vm = q->vm;
+
+ if (vm && !xe_sched_job_is_migration(q) && !xe_vm_in_lr_mode(vm) &&
+ (vm->batch_invalidate_tlb || vm->tlb_flush_seqno != q->tlb_flush_seqno)) {
+ xe_vm_assert_held(vm);
+ q->tlb_flush_seqno = vm->tlb_flush_seqno;
+ job->ring_ops_flush_tlb = true;
+ }
+
drm_sched_job_arm(&job->drm);
}
} user_fence;
/** @migrate_flush_flags: Additional flush flags for migration jobs */
u32 migrate_flush_flags;
+ /** @ring_ops_flush_tlb: The ring ops need to flush TLB before payload. */
+ bool ring_ops_flush_tlb;
/** @batch_addr: batch buffer address of job */
u64 batch_addr[];
};
return 0;
}
+/**
+ * xe_vm_validate_rebind() - Validate buffer objects and rebind vmas
+ * @vm: The vm for which we are rebinding.
+ * @exec: The struct drm_exec with the locked GEM objects.
+ * @num_fences: The number of fences to reserve for the operation, not
+ * including rebinds and validations.
+ *
+ * Validates all evicted gem objects and rebinds their vmas. Note that
+ * rebindings may cause evictions and hence the validation-rebind
+ * sequence is rerun until there are no more objects to validate.
+ *
+ * Return: 0 on success, negative error code on error. In particular,
+ * may return -EINTR or -ERESTARTSYS if interrupted, and -EDEADLK if
+ * the drm_exec transaction needs to be restarted.
+ */
+int xe_vm_validate_rebind(struct xe_vm *vm, struct drm_exec *exec,
+ unsigned int num_fences)
+{
+ struct drm_gem_object *obj;
+ unsigned long index;
+ int ret;
+
+ do {
+ ret = drm_gpuvm_validate(&vm->gpuvm, exec);
+ if (ret)
+ return ret;
+
+ ret = xe_vm_rebind(vm, false);
+ if (ret)
+ return ret;
+ } while (!list_empty(&vm->gpuvm.evict.list));
+
+ drm_exec_for_each_locked_object(exec, index, obj) {
+ ret = dma_resv_reserve_fences(obj->resv, num_fences);
+ if (ret)
+ return ret;
+ }
+
+ return 0;
+}
+
static int xe_preempt_work_begin(struct drm_exec *exec, struct xe_vm *vm,
bool *done)
{
int err;
- /*
- * 1 fence for each preempt fence plus a fence for each tile from a
- * possible rebind
- */
- err = drm_gpuvm_prepare_vm(&vm->gpuvm, exec, vm->preempt.num_exec_queues +
- vm->xe->info.tile_count);
+ err = drm_gpuvm_prepare_vm(&vm->gpuvm, exec, 0);
if (err)
return err;
return 0;
}
- err = drm_gpuvm_prepare_objects(&vm->gpuvm, exec, vm->preempt.num_exec_queues);
+ err = drm_gpuvm_prepare_objects(&vm->gpuvm, exec, 0);
if (err)
return err;
if (err)
return err;
- return drm_gpuvm_validate(&vm->gpuvm, exec);
+ /*
+ * Add validation and rebinding to the locking loop since both can
+ * cause evictions which may require blocing dma_resv locks.
+ * The fence reservation here is intended for the new preempt fences
+ * we attach at the end of the rebind work.
+ */
+ return xe_vm_validate_rebind(vm, exec, vm->preempt.num_exec_queues);
}
static void preempt_rebind_work_func(struct work_struct *w)
{
struct xe_vm *vm = container_of(w, struct xe_vm, preempt.rebind_work);
struct drm_exec exec;
- struct dma_fence *rebind_fence;
unsigned int fence_count = 0;
LIST_HEAD(preempt_fences);
ktime_t end = 0;
if (err)
goto out_unlock;
- rebind_fence = xe_vm_rebind(vm, true);
- if (IS_ERR(rebind_fence)) {
- err = PTR_ERR(rebind_fence);
+ err = xe_vm_rebind(vm, true);
+ if (err)
goto out_unlock;
- }
-
- if (rebind_fence) {
- dma_fence_wait(rebind_fence, false);
- dma_fence_put(rebind_fence);
- }
- /* Wait on munmap style VM unbinds */
+ /* Wait on rebinds and munmap style VM unbinds */
wait = dma_resv_wait_timeout(xe_vm_resv(vm),
DMA_RESV_USAGE_KERNEL,
false, MAX_SCHEDULE_TIMEOUT);
struct xe_sync_entry *syncs, u32 num_syncs,
bool first_op, bool last_op);
-struct dma_fence *xe_vm_rebind(struct xe_vm *vm, bool rebind_worker)
+int xe_vm_rebind(struct xe_vm *vm, bool rebind_worker)
{
- struct dma_fence *fence = NULL;
+ struct dma_fence *fence;
struct xe_vma *vma, *next;
lockdep_assert_held(&vm->lock);
if (xe_vm_in_lr_mode(vm) && !rebind_worker)
- return NULL;
+ return 0;
xe_vm_assert_held(vm);
list_for_each_entry_safe(vma, next, &vm->rebind_list,
xe_assert(vm->xe, vma->tile_present);
list_del_init(&vma->combined_links.rebind);
- dma_fence_put(fence);
if (rebind_worker)
trace_xe_vma_rebind_worker(vma);
else
trace_xe_vma_rebind_exec(vma);
fence = xe_vm_bind_vma(vma, NULL, NULL, 0, false, false);
if (IS_ERR(fence))
- return fence;
+ return PTR_ERR(fence);
+ dma_fence_put(fence);
}
- return fence;
+ return 0;
}
static void xe_vma_free(struct xe_vma *vma)
}
/**
- * xe_vm_prepare_vma() - drm_exec utility to lock a vma
+ * xe_vm_lock_vma() - drm_exec utility to lock a vma
* @exec: The drm_exec object we're currently locking for.
* @vma: The vma for witch we want to lock the vm resv and any attached
* object's resv.
- * @num_shared: The number of dma-fence slots to pre-allocate in the
- * objects' reservation objects.
*
* Return: 0 on success, negative error code on error. In particular
* may return -EDEADLK on WW transaction contention and -EINTR if
* an interruptible wait is terminated by a signal.
*/
-int xe_vm_prepare_vma(struct drm_exec *exec, struct xe_vma *vma,
- unsigned int num_shared)
+int xe_vm_lock_vma(struct drm_exec *exec, struct xe_vma *vma)
{
struct xe_vm *vm = xe_vma_vm(vma);
struct xe_bo *bo = xe_vma_bo(vma);
int err;
XE_WARN_ON(!vm);
- if (num_shared)
- err = drm_exec_prepare_obj(exec, xe_vm_obj(vm), num_shared);
- else
- err = drm_exec_lock_obj(exec, xe_vm_obj(vm));
- if (!err && bo && !bo->vm) {
- if (num_shared)
- err = drm_exec_prepare_obj(exec, &bo->ttm.base, num_shared);
- else
- err = drm_exec_lock_obj(exec, &bo->ttm.base);
- }
+
+ err = drm_exec_lock_obj(exec, xe_vm_obj(vm));
+ if (!err && bo && !bo->vm)
+ err = drm_exec_lock_obj(exec, &bo->ttm.base);
return err;
}
drm_exec_init(&exec, 0, 0);
drm_exec_until_all_locked(&exec) {
- err = xe_vm_prepare_vma(&exec, vma, 0);
+ err = xe_vm_lock_vma(&exec, vma);
drm_exec_retry_on_contention(&exec);
if (XE_WARN_ON(err))
break;
xe->usm.num_vm_in_fault_mode--;
else if (!(vm->flags & XE_VM_FLAG_MIGRATION))
xe->usm.num_vm_in_non_fault_mode--;
+
+ if (vm->usm.asid) {
+ void *lookup;
+
+ xe_assert(xe, xe->info.has_asid);
+ xe_assert(xe, !(vm->flags & XE_VM_FLAG_MIGRATION));
+
+ lookup = xa_erase(&xe->usm.asid_to_vm, vm->usm.asid);
+ xe_assert(xe, lookup == vm);
+ }
mutex_unlock(&xe->usm.lock);
for_each_tile(tile, xe, id)
struct xe_device *xe = vm->xe;
struct xe_tile *tile;
u8 id;
- void *lookup;
/* xe_vm_close_and_put was not called? */
xe_assert(xe, !vm->size);
mutex_destroy(&vm->snap_mutex);
- if (!(vm->flags & XE_VM_FLAG_MIGRATION)) {
+ if (!(vm->flags & XE_VM_FLAG_MIGRATION))
xe_device_mem_access_put(xe);
- if (xe->info.has_asid && vm->usm.asid) {
- mutex_lock(&xe->usm.lock);
- lookup = xa_erase(&xe->usm.asid_to_vm, vm->usm.asid);
- xe_assert(xe, lookup == vm);
- mutex_unlock(&xe->usm.lock);
- }
- }
-
for_each_tile(tile, xe, id)
XE_WARN_ON(vm->pt_root[id]);
trace_xe_vm_free(vm);
- dma_fence_put(vm->rebind_fence);
kfree(vm);
}
lockdep_assert_held_write(&vm->lock);
- err = xe_vm_prepare_vma(exec, vma, 1);
+ err = xe_vm_lock_vma(exec, vma);
if (err)
return err;
int xe_vm_userptr_check_repin(struct xe_vm *vm);
-struct dma_fence *xe_vm_rebind(struct xe_vm *vm, bool rebind_worker);
+int xe_vm_rebind(struct xe_vm *vm, bool rebind_worker);
int xe_vm_invalidate_vma(struct xe_vma *vma);
int xe_analyze_vm(struct drm_printer *p, struct xe_vm *vm, int gt_id);
-int xe_vm_prepare_vma(struct drm_exec *exec, struct xe_vma *vma,
- unsigned int num_shared);
+int xe_vm_lock_vma(struct drm_exec *exec, struct xe_vma *vma);
+
+int xe_vm_validate_rebind(struct xe_vm *vm, struct drm_exec *exec,
+ unsigned int num_fences);
/**
* xe_vm_resv() - Return's the vm's reservation object
*/
struct list_head rebind_list;
- /** @rebind_fence: rebind fence from execbuf */
- struct dma_fence *rebind_fence;
-
/**
* @destroy_work: worker to destroy VM, needed as a dma_fence signaling
* from an irq context can be last put and the destroy needs to be able
bool capture_once;
} error_capture;
+ /**
+ * @tlb_flush_seqno: Required TLB flush seqno for the next exec.
+ * protected by the vm resv.
+ */
+ u64 tlb_flush_seqno;
/** @batch_invalidate_tlb: Always invalidate TLB before batch start */
bool batch_invalidate_tlb;
/** @xef: XE file handle for tracking this VM's drm client */
return 0;
}
-static int host1x_dma_configure(struct device *dev)
-{
- return of_dma_configure(dev, dev->of_node, true);
-}
-
static const struct dev_pm_ops host1x_device_pm_ops = {
.suspend = pm_generic_suspend,
.resume = pm_generic_resume,
.name = "host1x",
.match = host1x_device_match,
.uevent = host1x_device_uevent,
- .dma_configure = host1x_dma_configure,
.pm = &host1x_device_pm_ops,
};
device->dev.bus = &host1x_bus_type;
device->dev.parent = host1x->dev;
- of_dma_configure(&device->dev, host1x->dev->of_node, true);
-
device->dev.dma_parms = &device->dma_parms;
dma_set_max_seg_size(&device->dev, UINT_MAX);
}
break;
case REPORT_TYPE_MOUSE:
- workitem->reports_supported |= STD_MOUSE | HIDPP;
- if (djrcv_dev->type == recvr_type_mouse_only)
- workitem->reports_supported |= MULTIMEDIA;
+ workitem->reports_supported |= STD_MOUSE | HIDPP | MULTIMEDIA;
break;
}
}
/* This is needed to be sure hid_hw_stop() isn't called twice by the subsystem */
static void mcp2221_remove(struct hid_device *hdev)
{
+#if IS_REACHABLE(CONFIG_IIO)
struct mcp2221 *mcp = hid_get_drvdata(hdev);
cancel_delayed_work_sync(&mcp->init_work);
+#endif
}
#if IS_REACHABLE(CONFIG_IIO)
{ BTN_TR, JC_BTN_R, },
{ BTN_TR2, JC_BTN_LSTICK, }, /* ZR */
{ BTN_START, JC_BTN_PLUS, },
- { BTN_FORWARD, JC_BTN_Y, }, /* C UP */
- { BTN_BACK, JC_BTN_ZR, }, /* C DOWN */
- { BTN_LEFT, JC_BTN_X, }, /* C LEFT */
- { BTN_RIGHT, JC_BTN_MINUS, }, /* C RIGHT */
+ { BTN_SELECT, JC_BTN_Y, }, /* C UP */
+ { BTN_X, JC_BTN_ZR, }, /* C DOWN */
+ { BTN_Y, JC_BTN_X, }, /* C LEFT */
+ { BTN_C, JC_BTN_MINUS, }, /* C RIGHT */
{ BTN_MODE, JC_BTN_HOME, },
{ BTN_Z, JC_BTN_CAP, },
{ /* sentinel */ },
/* flags */
#define I2C_HID_STARTED 0
#define I2C_HID_RESET_PENDING 1
-#define I2C_HID_READ_PENDING 2
#define I2C_HID_PWR_ON 0x00
#define I2C_HID_PWR_SLEEP 0x01
msgs[n].len = recv_len;
msgs[n].buf = recv_buf;
n++;
-
- set_bit(I2C_HID_READ_PENDING, &ihid->flags);
}
ret = i2c_transfer(client->adapter, msgs, n);
- if (recv_len)
- clear_bit(I2C_HID_READ_PENDING, &ihid->flags);
-
if (ret != n)
return ret < 0 ? ret : -EIO;
{
struct i2c_hid *ihid = dev_id;
- if (test_bit(I2C_HID_READ_PENDING, &ihid->flags))
- return IRQ_HANDLED;
-
i2c_hid_get_input(ihid);
return IRQ_HANDLED;
mutex_lock(&ihid->reset_lock);
do {
ret = i2c_hid_start_hwreset(ihid);
- if (ret)
+ if (ret == 0)
+ ret = i2c_hid_finish_hwreset(ihid);
+ else
msleep(1000);
} while (tries-- > 0 && ret);
+ mutex_unlock(&ihid->reset_lock);
if (ret)
- goto abort_reset;
+ return ret;
use_override = i2c_hid_get_dmi_hid_report_desc_override(client->name,
&rsize);
i2c_hid_dbg(ihid, "Using a HID report descriptor override\n");
} else {
rdesc = kzalloc(rsize, GFP_KERNEL);
-
- if (!rdesc) {
- ret = -ENOMEM;
- goto abort_reset;
- }
+ if (!rdesc)
+ return -ENOMEM;
i2c_hid_dbg(ihid, "asking HID report descriptor\n");
rdesc, rsize);
if (ret) {
hid_err(hid, "reading report descriptor failed\n");
- goto abort_reset;
+ goto out;
}
}
- /*
- * Windows directly reads the report-descriptor after sending reset
- * and then waits for resets completion afterwards. Some touchpads
- * actually wait for the report-descriptor to be read before signalling
- * reset completion.
- */
- ret = i2c_hid_finish_hwreset(ihid);
-abort_reset:
- clear_bit(I2C_HID_RESET_PENDING, &ihid->flags);
- mutex_unlock(&ihid->reset_lock);
- if (ret)
- goto out;
-
i2c_hid_dbg(ihid, "Report Descriptor: %*ph\n", rsize, rdesc);
ret = hid_parse_report(hid, rdesc, rsize);
if (!dev)
return NULL;
+ dev->devc = &pdev->dev;
ishtp_device_init(dev);
init_waitqueue_head(&dev->wait_hw_ready);
}
dev->ops = &ish_hw_ops;
- dev->devc = &pdev->dev;
dev->mtu = IPC_PAYLOAD_SIZE - sizeof(struct ishtp_msg_hdr);
return dev;
}
hv_ringbuffer_cleanup(&channel->inbound);
if (channel->ringbuffer_page) {
- __free_pages(channel->ringbuffer_page,
+ /* In a CoCo VM leak the memory if it didn't get re-encrypted */
+ if (!channel->ringbuffer_gpadlhandle.decrypted)
+ __free_pages(channel->ringbuffer_page,
get_order(channel->ringbuffer_pagecount
<< PAGE_SHIFT));
channel->ringbuffer_page = NULL;
(atomic_inc_return(&vmbus_connection.next_gpadl_handle) - 1);
ret = create_gpadl_header(type, kbuffer, size, send_offset, &msginfo);
- if (ret)
+ if (ret) {
+ gpadl->decrypted = false;
return ret;
+ }
+ /*
+ * Set the "decrypted" flag to true for the set_memory_decrypted()
+ * success case. In the failure case, the encryption state of the
+ * memory is unknown. Leave "decrypted" as true to ensure the
+ * memory will be leaked instead of going back on the free list.
+ */
+ gpadl->decrypted = true;
ret = set_memory_decrypted((unsigned long)kbuffer,
PFN_UP(size));
if (ret) {
kfree(msginfo);
- if (ret)
- set_memory_encrypted((unsigned long)kbuffer,
- PFN_UP(size));
+ if (ret) {
+ /*
+ * If set_memory_encrypted() fails, the decrypted flag is
+ * left as true so the memory is leaked instead of being
+ * put back on the free list.
+ */
+ if (!set_memory_encrypted((unsigned long)kbuffer, PFN_UP(size)))
+ gpadl->decrypted = false;
+ }
return ret;
}
if (ret)
pr_warn("Fail to set mem host visibility in GPADL teardown %d.\n", ret);
+ gpadl->decrypted = ret;
+
return ret;
}
EXPORT_SYMBOL_GPL(vmbus_teardown_gpadl);
vmbus_connection.monitor_pages[0], 1);
ret |= set_memory_decrypted((unsigned long)
vmbus_connection.monitor_pages[1], 1);
- if (ret)
+ if (ret) {
+ /*
+ * If set_memory_decrypted() fails, the encryption state
+ * of the memory is unknown. So leak the memory instead
+ * of risking returning decrypted memory to the free list.
+ * For simplicity, always handle both pages the same.
+ */
+ vmbus_connection.monitor_pages[0] = NULL;
+ vmbus_connection.monitor_pages[1] = NULL;
goto cleanup;
+ }
/*
* Set_memory_decrypted() will change the memory contents if
vmbus_connection.int_page = NULL;
}
- set_memory_encrypted((unsigned long)vmbus_connection.monitor_pages[0], 1);
- set_memory_encrypted((unsigned long)vmbus_connection.monitor_pages[1], 1);
+ if (vmbus_connection.monitor_pages[0]) {
+ if (!set_memory_encrypted(
+ (unsigned long)vmbus_connection.monitor_pages[0], 1))
+ hv_free_hyperv_page(vmbus_connection.monitor_pages[0]);
+ vmbus_connection.monitor_pages[0] = NULL;
+ }
- hv_free_hyperv_page(vmbus_connection.monitor_pages[0]);
- hv_free_hyperv_page(vmbus_connection.monitor_pages[1]);
- vmbus_connection.monitor_pages[0] = NULL;
- vmbus_connection.monitor_pages[1] = NULL;
+ if (vmbus_connection.monitor_pages[1]) {
+ if (!set_memory_encrypted(
+ (unsigned long)vmbus_connection.monitor_pages[1], 1))
+ hv_free_hyperv_page(vmbus_connection.monitor_pages[1]);
+ vmbus_connection.monitor_pages[1] = NULL;
+ }
}
/*
if (!hv_dev->channel)
return -ENODEV;
- return sprintf(buf, "%d\n", hv_dev->channel->offermsg.child_relid);
+ return sysfs_emit(buf, "%d\n", hv_dev->channel->offermsg.child_relid);
}
static DEVICE_ATTR_RO(id);
if (!hv_dev->channel)
return -ENODEV;
- return sprintf(buf, "%d\n", hv_dev->channel->state);
+ return sysfs_emit(buf, "%d\n", hv_dev->channel->state);
}
static DEVICE_ATTR_RO(state);
if (!hv_dev->channel)
return -ENODEV;
- return sprintf(buf, "%d\n", hv_dev->channel->offermsg.monitorid);
+ return sysfs_emit(buf, "%d\n", hv_dev->channel->offermsg.monitorid);
}
static DEVICE_ATTR_RO(monitor_id);
if (!hv_dev->channel)
return -ENODEV;
- return sprintf(buf, "{%pUl}\n",
- &hv_dev->channel->offermsg.offer.if_type);
+ return sysfs_emit(buf, "{%pUl}\n",
+ &hv_dev->channel->offermsg.offer.if_type);
}
static DEVICE_ATTR_RO(class_id);
if (!hv_dev->channel)
return -ENODEV;
- return sprintf(buf, "{%pUl}\n",
- &hv_dev->channel->offermsg.offer.if_instance);
+ return sysfs_emit(buf, "{%pUl}\n",
+ &hv_dev->channel->offermsg.offer.if_instance);
}
static DEVICE_ATTR_RO(device_id);
{
struct hv_device *hv_dev = device_to_hv_device(dev);
- return sprintf(buf, "vmbus:%*phN\n", UUID_SIZE, &hv_dev->dev_type);
+ return sysfs_emit(buf, "vmbus:%*phN\n", UUID_SIZE, &hv_dev->dev_type);
}
static DEVICE_ATTR_RO(modalias);
if (!hv_dev->channel)
return -ENODEV;
- return sprintf(buf, "%d\n", cpu_to_node(hv_dev->channel->target_cpu));
+ return sysfs_emit(buf, "%d\n", cpu_to_node(hv_dev->channel->target_cpu));
}
static DEVICE_ATTR_RO(numa_node);
#endif
if (!hv_dev->channel)
return -ENODEV;
- return sprintf(buf, "%d\n",
- channel_pending(hv_dev->channel,
- vmbus_connection.monitor_pages[0]));
+ return sysfs_emit(buf, "%d\n", channel_pending(hv_dev->channel,
+ vmbus_connection.monitor_pages[0]));
}
static DEVICE_ATTR_RO(server_monitor_pending);
if (!hv_dev->channel)
return -ENODEV;
- return sprintf(buf, "%d\n",
- channel_pending(hv_dev->channel,
- vmbus_connection.monitor_pages[1]));
+ return sysfs_emit(buf, "%d\n", channel_pending(hv_dev->channel,
+ vmbus_connection.monitor_pages[1]));
}
static DEVICE_ATTR_RO(client_monitor_pending);
if (!hv_dev->channel)
return -ENODEV;
- return sprintf(buf, "%d\n",
- channel_latency(hv_dev->channel,
- vmbus_connection.monitor_pages[0]));
+ return sysfs_emit(buf, "%d\n", channel_latency(hv_dev->channel,
+ vmbus_connection.monitor_pages[0]));
}
static DEVICE_ATTR_RO(server_monitor_latency);
if (!hv_dev->channel)
return -ENODEV;
- return sprintf(buf, "%d\n",
- channel_latency(hv_dev->channel,
- vmbus_connection.monitor_pages[1]));
+ return sysfs_emit(buf, "%d\n", channel_latency(hv_dev->channel,
+ vmbus_connection.monitor_pages[1]));
}
static DEVICE_ATTR_RO(client_monitor_latency);
if (!hv_dev->channel)
return -ENODEV;
- return sprintf(buf, "%d\n",
- channel_conn_id(hv_dev->channel,
- vmbus_connection.monitor_pages[0]));
+ return sysfs_emit(buf, "%d\n", channel_conn_id(hv_dev->channel,
+ vmbus_connection.monitor_pages[0]));
}
static DEVICE_ATTR_RO(server_monitor_conn_id);
if (!hv_dev->channel)
return -ENODEV;
- return sprintf(buf, "%d\n",
- channel_conn_id(hv_dev->channel,
- vmbus_connection.monitor_pages[1]));
+ return sysfs_emit(buf, "%d\n", channel_conn_id(hv_dev->channel,
+ vmbus_connection.monitor_pages[1]));
}
static DEVICE_ATTR_RO(client_monitor_conn_id);
if (ret < 0)
return ret;
- return sprintf(buf, "%d\n", outbound.current_interrupt_mask);
+ return sysfs_emit(buf, "%d\n", outbound.current_interrupt_mask);
}
static DEVICE_ATTR_RO(out_intr_mask);
&outbound);
if (ret < 0)
return ret;
- return sprintf(buf, "%d\n", outbound.current_read_index);
+ return sysfs_emit(buf, "%d\n", outbound.current_read_index);
}
static DEVICE_ATTR_RO(out_read_index);
&outbound);
if (ret < 0)
return ret;
- return sprintf(buf, "%d\n", outbound.current_write_index);
+ return sysfs_emit(buf, "%d\n", outbound.current_write_index);
}
static DEVICE_ATTR_RO(out_write_index);
&outbound);
if (ret < 0)
return ret;
- return sprintf(buf, "%d\n", outbound.bytes_avail_toread);
+ return sysfs_emit(buf, "%d\n", outbound.bytes_avail_toread);
}
static DEVICE_ATTR_RO(out_read_bytes_avail);
&outbound);
if (ret < 0)
return ret;
- return sprintf(buf, "%d\n", outbound.bytes_avail_towrite);
+ return sysfs_emit(buf, "%d\n", outbound.bytes_avail_towrite);
}
static DEVICE_ATTR_RO(out_write_bytes_avail);
if (ret < 0)
return ret;
- return sprintf(buf, "%d\n", inbound.current_interrupt_mask);
+ return sysfs_emit(buf, "%d\n", inbound.current_interrupt_mask);
}
static DEVICE_ATTR_RO(in_intr_mask);
if (ret < 0)
return ret;
- return sprintf(buf, "%d\n", inbound.current_read_index);
+ return sysfs_emit(buf, "%d\n", inbound.current_read_index);
}
static DEVICE_ATTR_RO(in_read_index);
if (ret < 0)
return ret;
- return sprintf(buf, "%d\n", inbound.current_write_index);
+ return sysfs_emit(buf, "%d\n", inbound.current_write_index);
}
static DEVICE_ATTR_RO(in_write_index);
if (ret < 0)
return ret;
- return sprintf(buf, "%d\n", inbound.bytes_avail_toread);
+ return sysfs_emit(buf, "%d\n", inbound.bytes_avail_toread);
}
static DEVICE_ATTR_RO(in_read_bytes_avail);
if (ret < 0)
return ret;
- return sprintf(buf, "%d\n", inbound.bytes_avail_towrite);
+ return sysfs_emit(buf, "%d\n", inbound.bytes_avail_towrite);
}
static DEVICE_ATTR_RO(in_write_bytes_avail);
{
struct hv_device *hv_dev = device_to_hv_device(dev);
struct vmbus_channel *channel = hv_dev->channel, *cur_sc;
- int buf_size = PAGE_SIZE, n_written, tot_written;
+ int n_written;
struct list_head *cur;
if (!channel)
mutex_lock(&vmbus_connection.channel_mutex);
- tot_written = snprintf(buf, buf_size, "%u:%u\n",
- channel->offermsg.child_relid, channel->target_cpu);
+ n_written = sysfs_emit(buf, "%u:%u\n",
+ channel->offermsg.child_relid,
+ channel->target_cpu);
list_for_each(cur, &channel->sc_list) {
- if (tot_written >= buf_size - 1)
- break;
cur_sc = list_entry(cur, struct vmbus_channel, sc_list);
- n_written = scnprintf(buf + tot_written,
- buf_size - tot_written,
- "%u:%u\n",
- cur_sc->offermsg.child_relid,
- cur_sc->target_cpu);
- tot_written += n_written;
+ n_written += sysfs_emit_at(buf, n_written, "%u:%u\n",
+ cur_sc->offermsg.child_relid,
+ cur_sc->target_cpu);
}
mutex_unlock(&vmbus_connection.channel_mutex);
- return tot_written;
+ return n_written;
}
static DEVICE_ATTR_RO(channel_vp_mapping);
{
struct hv_device *hv_dev = device_to_hv_device(dev);
- return sprintf(buf, "0x%x\n", hv_dev->vendor_id);
+ return sysfs_emit(buf, "0x%x\n", hv_dev->vendor_id);
}
static DEVICE_ATTR_RO(vendor);
{
struct hv_device *hv_dev = device_to_hv_device(dev);
- return sprintf(buf, "0x%x\n", hv_dev->device_id);
+ return sysfs_emit(buf, "0x%x\n", hv_dev->device_id);
}
static DEVICE_ATTR_RO(device);
ssize_t len;
device_lock(dev);
- len = snprintf(buf, PAGE_SIZE, "%s\n", hv_dev->driver_override);
+ len = sysfs_emit(buf, "%s\n", hv_dev->driver_override);
device_unlock(dev);
return len;
if (read_write == I2C_SMBUS_READ ||
command == I2C_SMBUS_BLOCK_PROC_CALL) {
- status = i801_get_block_len(priv);
- if (status < 0)
+ len = i801_get_block_len(priv);
+ if (len < 0) {
+ status = len;
goto out;
+ }
- len = status;
data->block[0] = len;
inb_p(SMBHSTCNT(priv)); /* reset the data buffer index */
for (i = 0; i < len; i++)
decode_bits(KERN_DEBUG "ISR", isr_bits, ARRAY_SIZE(isr_bits), val);
}
+#ifdef CONFIG_I2C_PXA_SLAVE
static const struct bits icr_bits[] = {
PXA_BIT(ICR_START, "START", NULL),
PXA_BIT(ICR_STOP, "STOP", NULL),
PXA_BIT(ICR_UR, "UR", "ur"),
};
-#ifdef CONFIG_I2C_PXA_SLAVE
static void decode_ICR(unsigned int val)
{
decode_bits(KERN_DEBUG "ICR", icr_bits, ARRAY_SIZE(icr_bits), val);
}
}
-static noinline void cm_destroy_id_wait_timeout(struct ib_cm_id *cm_id)
+static noinline void cm_destroy_id_wait_timeout(struct ib_cm_id *cm_id,
+ enum ib_cm_state old_state)
{
struct cm_id_private *cm_id_priv;
cm_id_priv = container_of(cm_id, struct cm_id_private, id);
- pr_err("%s: cm_id=%p timed out. state=%d refcnt=%d\n", __func__,
- cm_id, cm_id->state, refcount_read(&cm_id_priv->refcount));
+ pr_err("%s: cm_id=%p timed out. state %d -> %d, refcnt=%d\n", __func__,
+ cm_id, old_state, cm_id->state, refcount_read(&cm_id_priv->refcount));
}
static void cm_destroy_id(struct ib_cm_id *cm_id, int err)
{
struct cm_id_private *cm_id_priv;
+ enum ib_cm_state old_state;
struct cm_work *work;
int ret;
cm_id_priv = container_of(cm_id, struct cm_id_private, id);
spin_lock_irq(&cm_id_priv->lock);
+ old_state = cm_id->state;
retest:
switch (cm_id->state) {
case IB_CM_LISTEN:
msecs_to_jiffies(
CM_DESTROY_ID_WAIT_TIMEOUT));
if (!ret) /* timeout happened */
- cm_destroy_id_wait_timeout(cm_id);
+ cm_destroy_id_wait_timeout(cm_id, old_state);
} while (!ret);
while ((work = cm_dequeue_work(cm_id_priv)) != NULL)
mdev = dev->mdev;
mdev_port_num = 1;
}
- if (MLX5_CAP_GEN(dev->mdev, num_ports) == 1) {
+ if (MLX5_CAP_GEN(dev->mdev, num_ports) == 1 &&
+ !mlx5_core_mp_enabled(mdev)) {
/* set local port to one for Function-Per-Port HCA. */
mdev = dev->mdev;
mdev_port_num = 1;
if (rxe->tfm)
crypto_free_shash(rxe->tfm);
+
+ mutex_destroy(&rxe->usdev_lock);
}
/* initialize rxe device parameters */
path->num_nodes = num_nodes;
+ mutex_lock(&icc_bw_lock);
+
for (i = num_nodes - 1; i >= 0; i--) {
node->provider->users++;
hlist_add_head(&path->reqs[i].req_node, &node->req_list);
node = node->reverse;
}
+ mutex_unlock(&icc_bw_lock);
+
return path;
}
pr_err("%s: error (%d)\n", __func__, ret);
mutex_lock(&icc_lock);
+ mutex_lock(&icc_bw_lock);
+
for (i = 0; i < path->num_nodes; i++) {
node = path->reqs[i].node;
hlist_del(&path->reqs[i].req_node);
if (!WARN_ON(!node->provider->users))
node->provider->users--;
}
+
+ mutex_unlock(&icc_bw_lock);
mutex_unlock(&icc_lock);
kfree_const(path->name);
.links = { X1E80100_SLAVE_A2NOC_SNOC },
};
-static struct qcom_icc_node ddr_perf_mode_master = {
- .name = "ddr_perf_mode_master",
- .id = X1E80100_MASTER_DDR_PERF_MODE,
- .channels = 1,
- .buswidth = 4,
- .num_links = 1,
- .links = { X1E80100_SLAVE_DDR_PERF_MODE },
-};
-
static struct qcom_icc_node qup0_core_master = {
.name = "qup0_core_master",
.id = X1E80100_MASTER_QUP_CORE_0,
.links = { X1E80100_MASTER_A2NOC_SNOC },
};
-static struct qcom_icc_node ddr_perf_mode_slave = {
- .name = "ddr_perf_mode_slave",
- .id = X1E80100_SLAVE_DDR_PERF_MODE,
- .channels = 1,
- .buswidth = 4,
- .num_links = 0,
-};
-
static struct qcom_icc_node qup0_core_slave = {
.name = "qup0_core_slave",
.id = X1E80100_SLAVE_QUP_CORE_0,
.nodes = { &ebi },
};
-static struct qcom_icc_bcm bcm_acv_perf = {
- .name = "ACV_PERF",
- .num_nodes = 1,
- .nodes = { &ddr_perf_mode_slave },
-};
-
static struct qcom_icc_bcm bcm_ce0 = {
.name = "CE0",
.num_nodes = 1,
};
static struct qcom_icc_bcm * const clk_virt_bcms[] = {
- &bcm_acv_perf,
&bcm_qup0,
&bcm_qup1,
&bcm_qup2,
};
static struct qcom_icc_node * const clk_virt_nodes[] = {
- [MASTER_DDR_PERF_MODE] = &ddr_perf_mode_master,
[MASTER_QUP_CORE_0] = &qup0_core_master,
[MASTER_QUP_CORE_1] = &qup1_core_master,
[MASTER_QUP_CORE_2] = &qup2_core_master,
- [SLAVE_DDR_PERF_MODE] = &ddr_perf_mode_slave,
[SLAVE_QUP_CORE_0] = &qup0_core_slave,
[SLAVE_QUP_CORE_1] = &qup1_core_slave,
[SLAVE_QUP_CORE_2] = &qup2_core_slave,
static void iommu_snp_enable(void)
{
#ifdef CONFIG_KVM_AMD_SEV
- if (!cpu_feature_enabled(X86_FEATURE_SEV_SNP))
+ if (!cc_platform_has(CC_ATTR_HOST_SEV_SNP))
return;
/*
* The SNP support requires that IOMMU must be enabled, and is
- * not configured in the passthrough mode.
+ * configured with V1 page table (DTE[Mode] = 0 is not supported).
*/
if (no_iommu || iommu_default_passthrough()) {
- pr_err("SNP: IOMMU disabled or configured in passthrough mode, SNP cannot be supported.\n");
- return;
+ pr_warn("SNP: IOMMU disabled or configured in passthrough mode, SNP cannot be supported.\n");
+ goto disable_snp;
+ }
+
+ if (amd_iommu_pgtable != AMD_IOMMU_V1) {
+ pr_warn("SNP: IOMMU is configured with V2 page table mode, SNP cannot be supported.\n");
+ goto disable_snp;
}
amd_iommu_snp_en = check_feature(FEATURE_SNP);
if (!amd_iommu_snp_en) {
- pr_err("SNP: IOMMU SNP feature not enabled, SNP cannot be supported.\n");
- return;
+ pr_warn("SNP: IOMMU SNP feature not enabled, SNP cannot be supported.\n");
+ goto disable_snp;
}
pr_info("IOMMU SNP support enabled.\n");
+ return;
- /* Enforce IOMMU v1 pagetable when SNP is enabled. */
- if (amd_iommu_pgtable != AMD_IOMMU_V1) {
- pr_warn("Forcing use of AMD IOMMU v1 page table due to SNP.\n");
- amd_iommu_pgtable = AMD_IOMMU_V1;
- }
+disable_snp:
+ cc_platform_clear(CC_ATTR_HOST_SEV_SNP);
#endif
}
static u16 domain_id_alloc(void)
{
+ unsigned long flags;
int id;
- spin_lock(&pd_bitmap_lock);
+ spin_lock_irqsave(&pd_bitmap_lock, flags);
id = find_first_zero_bit(amd_iommu_pd_alloc_bitmap, MAX_DOMAIN_ID);
BUG_ON(id == 0);
if (id > 0 && id < MAX_DOMAIN_ID)
__set_bit(id, amd_iommu_pd_alloc_bitmap);
else
id = 0;
- spin_unlock(&pd_bitmap_lock);
+ spin_unlock_irqrestore(&pd_bitmap_lock, flags);
return id;
}
static void domain_id_free(int id)
{
- spin_lock(&pd_bitmap_lock);
+ unsigned long flags;
+
+ spin_lock_irqsave(&pd_bitmap_lock, flags);
if (id > 0 && id < MAX_DOMAIN_ID)
__clear_bit(id, amd_iommu_pd_alloc_bitmap);
- spin_unlock(&pd_bitmap_lock);
+ spin_unlock_irqrestore(&pd_bitmap_lock, flags);
}
static void free_gcr3_tbl_level1(u64 *tbl)
* requires a breaking update, zero the V bit, write all qwords
* but 0, then set qword 0
*/
- unused_update.data[0] = entry->data[0] & (~STRTAB_STE_0_V);
+ unused_update.data[0] = entry->data[0] &
+ cpu_to_le64(~STRTAB_STE_0_V);
entry_set(smmu, sid, entry, &unused_update, 0, 1);
entry_set(smmu, sid, entry, target, 1, num_entry_qwords - 1);
entry_set(smmu, sid, entry, target, 0, 1);
FIELD_PREP(STRTAB_STE_0_CFG, STRTAB_STE_0_CFG_ABORT));
}
-static void arm_smmu_make_bypass_ste(struct arm_smmu_ste *target)
+static void arm_smmu_make_bypass_ste(struct arm_smmu_device *smmu,
+ struct arm_smmu_ste *target)
{
memset(target, 0, sizeof(*target));
target->data[0] = cpu_to_le64(
STRTAB_STE_0_V |
FIELD_PREP(STRTAB_STE_0_CFG, STRTAB_STE_0_CFG_BYPASS));
- target->data[1] = cpu_to_le64(
- FIELD_PREP(STRTAB_STE_1_SHCFG, STRTAB_STE_1_SHCFG_INCOMING));
+
+ if (smmu->features & ARM_SMMU_FEAT_ATTR_TYPES_OVR)
+ target->data[1] = cpu_to_le64(FIELD_PREP(STRTAB_STE_1_SHCFG,
+ STRTAB_STE_1_SHCFG_INCOMING));
}
static void arm_smmu_make_cdtable_ste(struct arm_smmu_ste *target,
typeof(&pgtbl_cfg->arm_lpae_s2_cfg.vtcr) vtcr =
&pgtbl_cfg->arm_lpae_s2_cfg.vtcr;
u64 vtcr_val;
+ struct arm_smmu_device *smmu = master->smmu;
memset(target, 0, sizeof(*target));
target->data[0] = cpu_to_le64(
target->data[1] = cpu_to_le64(
FIELD_PREP(STRTAB_STE_1_EATS,
- master->ats_enabled ? STRTAB_STE_1_EATS_TRANS : 0) |
- FIELD_PREP(STRTAB_STE_1_SHCFG,
- STRTAB_STE_1_SHCFG_INCOMING));
+ master->ats_enabled ? STRTAB_STE_1_EATS_TRANS : 0));
+
+ if (smmu->features & ARM_SMMU_FEAT_ATTR_TYPES_OVR)
+ target->data[1] |= cpu_to_le64(FIELD_PREP(STRTAB_STE_1_SHCFG,
+ STRTAB_STE_1_SHCFG_INCOMING));
vtcr_val = FIELD_PREP(STRTAB_STE_2_VTCR_S2T0SZ, vtcr->tsz) |
FIELD_PREP(STRTAB_STE_2_VTCR_S2SL0, vtcr->sl) |
* This can safely directly manipulate the STE memory without a sync sequence
* because the STE table has not been installed in the SMMU yet.
*/
-static void arm_smmu_init_initial_stes(struct arm_smmu_ste *strtab,
+static void arm_smmu_init_initial_stes(struct arm_smmu_device *smmu,
+ struct arm_smmu_ste *strtab,
unsigned int nent)
{
unsigned int i;
if (disable_bypass)
arm_smmu_make_abort_ste(strtab);
else
- arm_smmu_make_bypass_ste(strtab);
+ arm_smmu_make_bypass_ste(smmu, strtab);
strtab++;
}
}
return -ENOMEM;
}
- arm_smmu_init_initial_stes(desc->l2ptr, 1 << STRTAB_SPLIT);
+ arm_smmu_init_initial_stes(smmu, desc->l2ptr, 1 << STRTAB_SPLIT);
arm_smmu_write_strtab_l1_desc(strtab, desc);
return 0;
}
struct device *dev)
{
struct arm_smmu_ste ste;
+ struct arm_smmu_master *master = dev_iommu_priv_get(dev);
- arm_smmu_make_bypass_ste(&ste);
+ arm_smmu_make_bypass_ste(master->smmu, &ste);
return arm_smmu_attach_dev_ste(dev, &ste);
}
reg |= FIELD_PREP(STRTAB_BASE_CFG_LOG2SIZE, smmu->sid_bits);
cfg->strtab_base_cfg = reg;
- arm_smmu_init_initial_stes(strtab, cfg->num_l1_ents);
+ arm_smmu_init_initial_stes(smmu, strtab, cfg->num_l1_ents);
return 0;
}
return -ENXIO;
}
+ if (reg & IDR1_ATTR_TYPES_OVR)
+ smmu->features |= ARM_SMMU_FEAT_ATTR_TYPES_OVR;
+
/* Queue sizes, capped to ensure natural alignment */
smmu->cmdq.q.llq.max_n_shift = min_t(u32, CMDQ_MAX_SZ_SHIFT,
FIELD_GET(IDR1_CMDQS, reg));
* STE table is not programmed to HW, see
* arm_smmu_initial_bypass_stes()
*/
- arm_smmu_make_bypass_ste(
+ arm_smmu_make_bypass_ste(smmu,
arm_smmu_get_step_for_sid(smmu, rmr->sids[i]));
}
}
#define IDR1_TABLES_PRESET (1 << 30)
#define IDR1_QUEUES_PRESET (1 << 29)
#define IDR1_REL (1 << 28)
+#define IDR1_ATTR_TYPES_OVR (1 << 27)
#define IDR1_CMDQS GENMASK(25, 21)
#define IDR1_EVTQS GENMASK(20, 16)
#define IDR1_PRIQS GENMASK(15, 11)
#define ARM_SMMU_FEAT_SVA (1 << 17)
#define ARM_SMMU_FEAT_E2H (1 << 18)
#define ARM_SMMU_FEAT_NESTING (1 << 19)
+#define ARM_SMMU_FEAT_ATTR_TYPES_OVR (1 << 20)
u32 features;
#define ARM_SMMU_OPT_SKIP_PREFETCH (1 << 0)
}
dev_iommu_priv_set(dev, info);
- ret = device_rbtree_insert(iommu, info);
- if (ret)
- goto free;
+ if (pdev && pci_ats_supported(pdev)) {
+ ret = device_rbtree_insert(iommu, info);
+ if (ret)
+ goto free;
+ }
if (sm_supported(iommu) && !dev_is_real_dma_subdevice(dev)) {
ret = intel_pasid_alloc_table(dev);
struct intel_iommu *iommu = info->iommu;
mutex_lock(&iommu->iopf_lock);
- device_rbtree_remove(info);
+ if (dev_is_pci(dev) && pci_ats_supported(to_pci_dev(dev)))
+ device_rbtree_remove(info);
mutex_unlock(&iommu->iopf_lock);
if (sm_supported(iommu) && !dev_is_real_dma_subdevice(dev) &&
iommu_pmu_set_filter(domain, event->attr.config1,
IOMMU_PMU_FILTER_DOMAIN, idx,
event->attr.config1);
- iommu_pmu_set_filter(pasid, event->attr.config1,
+ iommu_pmu_set_filter(pasid, event->attr.config2,
IOMMU_PMU_FILTER_PASID, idx,
event->attr.config1);
iommu_pmu_set_filter(ats, event->attr.config2,
struct page *pages;
int irq, ret;
- pages = alloc_pages(GFP_KERNEL | __GFP_ZERO, PRQ_ORDER);
+ pages = alloc_pages_node(iommu->node, GFP_KERNEL | __GFP_ZERO, PRQ_ORDER);
if (!pages) {
pr_warn("IOMMU: %s: Failed to allocate page request queue\n",
iommu->name);
{
/* Caller must be a probed driver on dev */
struct iommu_group *group = dev->iommu_group;
+ struct group_device *device;
void *curr;
int ret;
if (!group)
return -ENODEV;
- if (!dev_has_iommu(dev) || dev_iommu_ops(dev) != domain->owner)
+ if (!dev_has_iommu(dev) || dev_iommu_ops(dev) != domain->owner ||
+ pasid == IOMMU_NO_PASID)
return -EINVAL;
mutex_lock(&group->mutex);
+ for_each_group_device(group, device) {
+ if (pasid >= device->dev->iommu->max_pasids) {
+ ret = -EINVAL;
+ goto out_unlock;
+ }
+ }
+
curr = xa_cmpxchg(&group->pasid_array, pasid, NULL, domain, GFP_KERNEL);
if (curr) {
ret = xa_err(curr) ? : -EBUSY;
depends on DEBUG_KERNEL
depends on FAULT_INJECTION
depends on RUNTIME_TESTING_MENU
+ select IOMMUFD_DRIVER
default n
help
This is dangerous, do not enable unless running
{ .compatible = "mediatek,mt8365-m4u", .data = &mt8365_data},
{}
};
+MODULE_DEVICE_TABLE(of, mtk_iommu_of_ids);
static struct platform_driver mtk_iommu_driver = {
.probe = mtk_iommu_probe,
{ .compatible = "mediatek,mt2701-m4u", },
{}
};
+MODULE_DEVICE_TABLE(of, mtk_iommu_v1_of_ids);
static const struct component_master_ops mtk_iommu_v1_com_ops = {
.bind = mtk_iommu_v1_bind,
return 0;
}
#else
-static void armada_370_xp_msi_reenable_percpu(void) {}
+static __maybe_unused void armada_370_xp_msi_reenable_percpu(void) {}
static inline int armada_370_xp_msi_init(struct device_node *node,
phys_addr_t main_int_phys_base)
struct its_cmd_block *cmd,
struct its_cmd_desc *desc)
{
+ struct its_vpe *vpe = valid_vpe(its, desc->its_vmapp_cmd.vpe);
unsigned long vpt_addr, vconf_addr;
u64 target;
bool alloc;
if (is_v4_1(its)) {
alloc = !atomic_dec_return(&desc->its_vmapp_cmd.vpe->vmapp_count);
its_encode_alloc(cmd, alloc);
+ /*
+ * Unmapping a VPE is self-synchronizing on GICv4.1,
+ * no need to issue a VSYNC.
+ */
+ vpe = NULL;
}
goto out;
out:
its_fixup_cmd(cmd);
- return valid_vpe(its, desc->its_vmapp_cmd.vpe);
+ return vpe;
}
static struct its_vpe *its_build_vmapti_cmd(struct its_node *its,
}
static int data_sock_setsockopt(struct socket *sock, int level, int optname,
- sockptr_t optval, unsigned int len)
+ sockptr_t optval, unsigned int optlen)
{
struct sock *sk = sock->sk;
int err = 0, opt = 0;
if (*debug & DEBUG_SOCKET)
printk(KERN_DEBUG "%s(%p, %d, %x, optval, %d)\n", __func__, sock,
- level, optname, len);
+ level, optname, optlen);
lock_sock(sk);
switch (optname) {
case MISDN_TIME_STAMP:
- if (copy_from_sockptr(&opt, optval, sizeof(int))) {
- err = -EFAULT;
+ err = copy_safe_from_sockptr(&opt, sizeof(opt),
+ optval, optlen);
+ if (err)
break;
- }
if (opt)
_pms(sk)->cmask |= MISDN_TIME_STAMP;
} else if (sscanf(opt_string, "sectors_per_bit:%llu%c", &llval, &dummy) == 1) {
log2_sectors_per_bitmap_bit = !llval ? 0 : __ilog2_u64(llval);
} else if (sscanf(opt_string, "bitmap_flush_interval:%u%c", &val, &dummy) == 1) {
- if (val >= (uint64_t)UINT_MAX * 1000 / HZ) {
+ if ((uint64_t)val >= (uint64_t)UINT_MAX * 1000 / HZ) {
r = -EINVAL;
ti->error = "Invalid bitmap_flush_interval argument";
goto bad;
#include "murmurhash3.h"
+#include <asm/unaligned.h>
+
static inline u64 rotl64(u64 x, s8 r)
{
return (x << r) | (x >> (64 - r));
}
#define ROTL64(x, y) rotl64(x, y)
-static __always_inline u64 getblock64(const u64 *p, int i)
-{
-#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
- return p[i];
-#elif __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
- return __builtin_bswap64(p[i]);
-#else
-#error "can't figure out byte order"
-#endif
-}
-
-static __always_inline void putblock64(u64 *p, int i, u64 value)
-{
-#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
- p[i] = value;
-#elif __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
- p[i] = __builtin_bswap64(value);
-#else
-#error "can't figure out byte order"
-#endif
-}
/* Finalization mix - force all bits of a hash block to avalanche */
const u64 c1 = 0x87c37b91114253d5LLU;
const u64 c2 = 0x4cf5ad432745937fLLU;
+ u64 *hash_out = out;
+
/* body */
const u64 *blocks = (const u64 *)(data);
int i;
for (i = 0; i < nblocks; i++) {
- u64 k1 = getblock64(blocks, i * 2 + 0);
- u64 k2 = getblock64(blocks, i * 2 + 1);
+ u64 k1 = get_unaligned_le64(&blocks[i * 2]);
+ u64 k2 = get_unaligned_le64(&blocks[i * 2 + 1]);
k1 *= c1;
k1 = ROTL64(k1, 31);
break;
default:
break;
- };
+ }
}
/* finalization */
h1 += h2;
h2 += h1;
- putblock64((u64 *)out, 0, h1);
- putblock64((u64 *)out, 1, h2);
+ put_unaligned_le64(h1, &hash_out[0]);
+ put_unaligned_le64(h2, &hash_out[1]);
}
return td;
out_blkdev_put:
- fput(bdev_file);
+ __fput_sync(bdev_file);
out_free_td:
kfree(td);
return ERR_PTR(r);
{
if (md->disk->slave_dir)
bd_unlink_disk_holder(td->dm_dev.bdev, md->disk);
- fput(td->dm_dev.bdev_file);
+
+ /* Leverage async fput() if DMF_DEFERRED_REMOVE set */
+ if (unlikely(test_bit(DMF_DEFERRED_REMOVE, &md->flags)))
+ fput(td->dm_dev.bdev_file);
+ else
+ __fput_sync(td->dm_dev.bdev_file);
+
put_dax(td->dm_dev.dax_dev);
list_del(&td->list);
kfree(td);
for (j = 0; j < i; j++)
if (r1_bio->bios[j])
rdev_dec_pending(conf->mirrors[j].rdev, mddev);
- free_r1bio(r1_bio);
+ mempool_free(r1_bio, &conf->r1bio_pool);
allow_barrier(conf, bio->bi_iter.bi_sector);
if (bio->bi_opf & REQ_NOWAIT) {
dev_err(&dev->plat_dev->dev, "Watchdog timeout!!");
- mutex_lock(&dev->dev_mutex);
+ mutex_lock(&dev->dev_ctx_lock);
list_for_each_entry(ctx, &dev->ctx_list, list) {
ctx->state = MTK_STATE_ABORT;
mtk_v4l2_vdec_dbg(0, ctx, "[%d] Change to state MTK_STATE_ABORT", ctx->id);
}
- mutex_unlock(&dev->dev_mutex);
+ mutex_unlock(&dev->dev_ctx_lock);
}
static void mtk_vcodec_vpu_reset_enc_handler(void *priv)
dev_err(&dev->plat_dev->dev, "Watchdog timeout!!");
- mutex_lock(&dev->dev_mutex);
+ mutex_lock(&dev->dev_ctx_lock);
list_for_each_entry(ctx, &dev->ctx_list, list) {
ctx->state = MTK_STATE_ABORT;
mtk_v4l2_vdec_dbg(0, ctx, "[%d] Change to state MTK_STATE_ABORT", ctx->id);
}
- mutex_unlock(&dev->dev_mutex);
+ mutex_unlock(&dev->dev_ctx_lock);
}
static const struct mtk_vcodec_fw_ops mtk_vcodec_vpu_msg = {
ctx->dev->vdec_pdata->init_vdec_params(ctx);
+ mutex_lock(&dev->dev_ctx_lock);
list_add(&ctx->list, &dev->ctx_list);
+ mutex_unlock(&dev->dev_ctx_lock);
mtk_vcodec_dbgfs_create(ctx);
mutex_unlock(&dev->dev_mutex);
v4l2_ctrl_handler_free(&ctx->ctrl_hdl);
mtk_vcodec_dbgfs_remove(dev, ctx->id);
+ mutex_lock(&dev->dev_ctx_lock);
list_del_init(&ctx->list);
+ mutex_unlock(&dev->dev_ctx_lock);
kfree(ctx);
mutex_unlock(&dev->dev_mutex);
return 0;
for (i = 0; i < MTK_VDEC_HW_MAX; i++)
mutex_init(&dev->dec_mutex[i]);
mutex_init(&dev->dev_mutex);
+ mutex_init(&dev->dev_ctx_lock);
spin_lock_init(&dev->irqlock);
snprintf(dev->v4l2_dev.name, sizeof(dev->v4l2_dev.name), "%s",
*
* @dec_mutex: decoder hardware lock
* @dev_mutex: video_device lock
+ * @dev_ctx_lock: the lock of context list
* @decode_workqueue: decode work queue
*
* @irqlock: protect data access by irq handler and work thread
/* decoder hardware mutex lock */
struct mutex dec_mutex[MTK_VDEC_HW_MAX];
struct mutex dev_mutex;
+ struct mutex dev_ctx_lock;
struct workqueue_struct *decode_workqueue;
spinlock_t irqlock;
inst->vpu.codec_type = ctx->current_codec;
inst->vpu.capture_type = ctx->capture_fourcc;
- ctx->drv_handle = inst;
err = vpu_dec_init(&inst->vpu);
if (err) {
mtk_vdec_err(ctx, "vdec_hevc init err=%d", err);
mtk_vdec_debug(ctx, "lat hevc instance >> %p, codec_type = 0x%x",
inst, inst->vpu.codec_type);
+ ctx->drv_handle = inst;
return 0;
error_free_inst:
kfree(inst);
inst->frm_cnt, y_fb_dma, c_fb_dma, fb);
inst->cur_fb = fb;
- dec->bs_dma = (unsigned long)bs->dma_addr;
+ dec->bs_dma = (uint64_t)bs->dma_addr;
dec->bs_sz = bs->size;
dec->cur_y_fb_dma = y_fb_dma;
dec->cur_c_fb_dma = c_fb_dma;
#include "../vdec_drv_base.h"
#include "../vdec_vpu_if.h"
+#define VP9_MAX_SUPER_FRAMES_NUM 8
#define VP9_SUPER_FRAME_BS_SZ 64
#define MAX_VP9_DPB_SIZE 9
*/
struct vdec_vp9_vsi {
unsigned char sf_bs_buf[VP9_SUPER_FRAME_BS_SZ];
- struct vp9_sf_ref_fb sf_ref_fb[VP9_MAX_FRM_BUF_NUM-1];
+ struct vp9_sf_ref_fb sf_ref_fb[VP9_MAX_SUPER_FRAMES_NUM];
int sf_next_ref_fb_idx;
unsigned int sf_frm_cnt;
- unsigned int sf_frm_offset[VP9_MAX_FRM_BUF_NUM-1];
- unsigned int sf_frm_sz[VP9_MAX_FRM_BUF_NUM-1];
+ unsigned int sf_frm_offset[VP9_MAX_SUPER_FRAMES_NUM];
+ unsigned int sf_frm_sz[VP9_MAX_SUPER_FRAMES_NUM];
unsigned int sf_frm_idx;
unsigned int sf_init;
struct vdec_fb fb;
/* if this super frame and it is not last sub-frame, get next fb for
* sub-frame decode
*/
- if (vsi->sf_frm_cnt > 0 && vsi->sf_frm_idx != vsi->sf_frm_cnt - 1)
+ if (vsi->sf_frm_cnt > 0 && vsi->sf_frm_idx != vsi->sf_frm_cnt)
vsi->sf_next_ref_fb_idx = vp9_get_sf_ref_fb(inst);
}
static int validate_vsi_array_indexes(struct vdec_vp9_inst *inst,
struct vdec_vp9_vsi *vsi) {
- if (vsi->sf_frm_idx >= VP9_MAX_FRM_BUF_NUM - 1) {
+ if (vsi->sf_frm_idx > VP9_MAX_SUPER_FRAMES_NUM) {
mtk_vdec_err(inst->ctx, "Invalid vsi->sf_frm_idx=%u.", vsi->sf_frm_idx);
return -EIO;
}
unsigned int mi_row;
unsigned int mi_col;
unsigned int offset;
- unsigned int pa;
+ dma_addr_t pa;
unsigned int size;
struct vdec_vp9_slice_tiles *tiles;
unsigned char *pos;
pos = va + offset;
end = va + bs->size;
/* truncated */
- pa = (unsigned int)bs->dma_addr + offset;
+ pa = bs->dma_addr + offset;
tb = instance->tile.va;
for (i = 0; i < rows; i++) {
for (j = 0; j < cols; j++) {
struct mtk_vcodec_dec_ctx *ctx;
int ret = false;
+ mutex_lock(&dec_dev->dev_ctx_lock);
list_for_each_entry(ctx, &dec_dev->ctx_list, list) {
if (!IS_ERR_OR_NULL(ctx) && ctx->vpu_inst == vpu) {
ret = true;
break;
}
}
+ mutex_unlock(&dec_dev->dev_ctx_lock);
return ret;
}
mtk_v4l2_venc_dbg(2, ctx, "Create instance [%d]@%p m2m_ctx=%p ",
ctx->id, ctx, ctx->m2m_ctx);
+ mutex_lock(&dev->dev_ctx_lock);
list_add(&ctx->list, &dev->ctx_list);
+ mutex_unlock(&dev->dev_ctx_lock);
mutex_unlock(&dev->dev_mutex);
mtk_v4l2_venc_dbg(0, ctx, "%s encoder [%d]", dev_name(&dev->plat_dev->dev),
v4l2_fh_exit(&ctx->fh);
v4l2_ctrl_handler_free(&ctx->ctrl_hdl);
+ mutex_lock(&dev->dev_ctx_lock);
list_del_init(&ctx->list);
+ mutex_unlock(&dev->dev_ctx_lock);
kfree(ctx);
mutex_unlock(&dev->dev_mutex);
return 0;
mutex_init(&dev->enc_mutex);
mutex_init(&dev->dev_mutex);
+ mutex_init(&dev->dev_ctx_lock);
spin_lock_init(&dev->irqlock);
snprintf(dev->v4l2_dev.name, sizeof(dev->v4l2_dev.name), "%s",
*
* @enc_mutex: encoder hardware lock.
* @dev_mutex: video_device lock
+ * @dev_ctx_lock: the lock of context list
* @encode_workqueue: encode work queue
*
* @enc_irq: h264 encoder irq resource
/* encoder hardware mutex lock */
struct mutex enc_mutex;
struct mutex dev_mutex;
+ struct mutex dev_ctx_lock;
struct workqueue_struct *encode_workqueue;
int enc_irq;
struct mtk_vcodec_enc_ctx *ctx;
int ret = false;
+ mutex_lock(&enc_dev->dev_ctx_lock);
list_for_each_entry(ctx, &enc_dev->ctx_list, list) {
if (!IS_ERR_OR_NULL(ctx) && ctx->vpu_inst == vpu) {
ret = true;
break;
}
}
+ mutex_unlock(&enc_dev->dev_ctx_lock);
return ret;
}
} else {
pcr->card_removed |= SD_EXIST;
pcr->card_inserted &= ~SD_EXIST;
- if (PCI_PID(pcr) == PID_5261) {
+ if ((PCI_PID(pcr) == PID_5261) || (PCI_PID(pcr) == PID_5264)) {
rtsx_pci_write_register(pcr, RTS5261_FW_STATUS,
RTS5261_EXPRESS_LINK_FAIL_MASK, 0);
pcr->extra_caps |= EXTRA_CAPS_SD_EXPRESS;
{MEI_PCI_DEVICE(MEI_DEV_ID_ADP_P, MEI_ME_PCH15_CFG)},
{MEI_PCI_DEVICE(MEI_DEV_ID_ADP_N, MEI_ME_PCH15_CFG)},
- {MEI_PCI_DEVICE(MEI_DEV_ID_RPL_S, MEI_ME_PCH15_CFG)},
+ {MEI_PCI_DEVICE(MEI_DEV_ID_RPL_S, MEI_ME_PCH15_SPS_CFG)},
{MEI_PCI_DEVICE(MEI_DEV_ID_MTL_M, MEI_ME_PCH15_CFG)},
{MEI_PCI_DEVICE(MEI_DEV_ID_ARL_S, MEI_ME_PCH15_CFG)},
static int mei_vsc_suspend(struct device *dev)
{
struct mei_device *mei_dev = dev_get_drvdata(dev);
+ struct mei_vsc_hw *hw = mei_dev_to_vsc_hw(mei_dev);
mei_stop(mei_dev);
+ mei_disable_interrupts(mei_dev);
+
+ vsc_tp_free_irq(hw->tp);
+
return 0;
}
static int mei_vsc_resume(struct device *dev)
{
struct mei_device *mei_dev = dev_get_drvdata(dev);
+ struct mei_vsc_hw *hw = mei_dev_to_vsc_hw(mei_dev);
int ret;
- ret = mei_restart(mei_dev);
+ ret = vsc_tp_request_irq(hw->tp);
if (ret)
return ret;
+ ret = mei_restart(mei_dev);
+ if (ret)
+ goto err_free;
+
/* start timer if stopped in suspend */
schedule_delayed_work(&mei_dev->timer_work, HZ);
return 0;
+
+err_free:
+ vsc_tp_free_irq(hw->tp);
+
+ return ret;
}
static DEFINE_SIMPLE_DEV_PM_OPS(mei_vsc_pm_ops, mei_vsc_suspend, mei_vsc_resume);
{}
};
+static irqreturn_t vsc_tp_isr(int irq, void *data)
+{
+ struct vsc_tp *tp = data;
+
+ atomic_inc(&tp->assert_cnt);
+
+ wake_up(&tp->xfer_wait);
+
+ return IRQ_WAKE_THREAD;
+}
+
+static irqreturn_t vsc_tp_thread_isr(int irq, void *data)
+{
+ struct vsc_tp *tp = data;
+
+ if (tp->event_notify)
+ tp->event_notify(tp->event_notify_context);
+
+ return IRQ_HANDLED;
+}
+
/* wakeup firmware and wait for response */
static int vsc_tp_wakeup_request(struct vsc_tp *tp)
{
}
EXPORT_SYMBOL_NS_GPL(vsc_tp_register_event_cb, VSC_TP);
+/**
+ * vsc_tp_request_irq - request irq for vsc_tp device
+ * @tp: vsc_tp device handle
+ */
+int vsc_tp_request_irq(struct vsc_tp *tp)
+{
+ struct spi_device *spi = tp->spi;
+ struct device *dev = &spi->dev;
+ int ret;
+
+ irq_set_status_flags(spi->irq, IRQ_DISABLE_UNLAZY);
+ ret = request_threaded_irq(spi->irq, vsc_tp_isr, vsc_tp_thread_isr,
+ IRQF_TRIGGER_FALLING | IRQF_ONESHOT,
+ dev_name(dev), tp);
+ if (ret)
+ return ret;
+
+ return 0;
+}
+EXPORT_SYMBOL_NS_GPL(vsc_tp_request_irq, VSC_TP);
+
+/**
+ * vsc_tp_free_irq - free irq for vsc_tp device
+ * @tp: vsc_tp device handle
+ */
+void vsc_tp_free_irq(struct vsc_tp *tp)
+{
+ free_irq(tp->spi->irq, tp);
+}
+EXPORT_SYMBOL_NS_GPL(vsc_tp_free_irq, VSC_TP);
+
/**
* vsc_tp_intr_synchronize - synchronize vsc_tp interrupt
* @tp: vsc_tp device handle
}
EXPORT_SYMBOL_NS_GPL(vsc_tp_intr_disable, VSC_TP);
-static irqreturn_t vsc_tp_isr(int irq, void *data)
-{
- struct vsc_tp *tp = data;
-
- atomic_inc(&tp->assert_cnt);
-
- return IRQ_WAKE_THREAD;
-}
-
-static irqreturn_t vsc_tp_thread_isr(int irq, void *data)
-{
- struct vsc_tp *tp = data;
-
- wake_up(&tp->xfer_wait);
-
- if (tp->event_notify)
- tp->event_notify(tp->event_notify_context);
-
- return IRQ_HANDLED;
-}
-
static int vsc_tp_match_any(struct acpi_device *adev, void *data)
{
struct acpi_device **__adev = data;
tp->spi = spi;
irq_set_status_flags(spi->irq, IRQ_DISABLE_UNLAZY);
- ret = devm_request_threaded_irq(dev, spi->irq, vsc_tp_isr,
- vsc_tp_thread_isr,
- IRQF_TRIGGER_FALLING | IRQF_ONESHOT,
- dev_name(dev), tp);
+ ret = request_threaded_irq(spi->irq, vsc_tp_isr, vsc_tp_thread_isr,
+ IRQF_TRIGGER_FALLING | IRQF_ONESHOT,
+ dev_name(dev), tp);
if (ret)
return ret;
err_destroy_lock:
mutex_destroy(&tp->mutex);
+ free_irq(spi->irq, tp);
+
return ret;
}
platform_device_unregister(tp->pdev);
mutex_destroy(&tp->mutex);
+
+ free_irq(spi->irq, tp);
}
static const struct acpi_device_id vsc_tp_acpi_ids[] = {
int vsc_tp_register_event_cb(struct vsc_tp *tp, vsc_tp_event_cb_t event_cb,
void *context);
+int vsc_tp_request_irq(struct vsc_tp *tp);
+void vsc_tp_free_irq(struct vsc_tp *tp);
+
void vsc_tp_intr_enable(struct vsc_tp *tp);
void vsc_tp_intr_disable(struct vsc_tp *tp);
void vsc_tp_intr_synchronize(struct vsc_tp *tp);
struct mmc_blk_ioc_data *idata;
int err;
- idata = kmalloc(sizeof(*idata), GFP_KERNEL);
+ idata = kzalloc(sizeof(*idata), GFP_KERNEL);
if (!idata) {
err = -ENOMEM;
goto out;
if (idata->flags & MMC_BLK_IOC_DROP)
return 0;
- if (idata->flags & MMC_BLK_IOC_SBC)
+ if (idata->flags & MMC_BLK_IOC_SBC && i > 0)
prev_idata = idatas[i - 1];
/*
remain = sgm->length;
if (remain > host->data_len)
remain = host->data_len;
+ sgm->consumed = 0;
if (data->flags & MMC_DATA_WRITE) {
while (remain > 0) {
host = slot->host;
- if (slot->vsd)
- gpiod_set_value(slot->vsd, power_on);
- if (slot->vio)
- gpiod_set_value(slot->vio, power_on);
+ if (power_on) {
+ if (slot->vsd) {
+ gpiod_set_value(slot->vsd, power_on);
+ msleep(1);
+ }
+ if (slot->vio) {
+ gpiod_set_value(slot->vio, power_on);
+ msleep(1);
+ }
+ } else {
+ if (slot->vio) {
+ gpiod_set_value(slot->vio, power_on);
+ msleep(50);
+ }
+ if (slot->vsd) {
+ gpiod_set_value(slot->vsd, power_on);
+ msleep(50);
+ }
+ }
if (slot->pdata->set_power != NULL)
slot->pdata->set_power(mmc_dev(slot->mmc), slot->id, power_on,
slot->pdata = &host->pdata->slots[id];
/* Check for some optional GPIO controls */
- slot->vsd = gpiod_get_index_optional(host->dev, "vsd",
- id, GPIOD_OUT_LOW);
+ slot->vsd = devm_gpiod_get_index_optional(host->dev, "vsd",
+ id, GPIOD_OUT_LOW);
if (IS_ERR(slot->vsd))
return dev_err_probe(host->dev, PTR_ERR(slot->vsd),
"error looking up VSD GPIO\n");
- slot->vio = gpiod_get_index_optional(host->dev, "vio",
- id, GPIOD_OUT_LOW);
+ slot->vio = devm_gpiod_get_index_optional(host->dev, "vio",
+ id, GPIOD_OUT_LOW);
if (IS_ERR(slot->vio))
return dev_err_probe(host->dev, PTR_ERR(slot->vio),
"error looking up VIO GPIO\n");
- slot->cover = gpiod_get_index_optional(host->dev, "cover",
- id, GPIOD_IN);
+ slot->cover = devm_gpiod_get_index_optional(host->dev, "cover",
+ id, GPIOD_IN);
if (IS_ERR(slot->cover))
return dev_err_probe(host->dev, PTR_ERR(slot->cover),
"error looking up cover switch GPIO\n");
if (IS_ERR(host->virt_base))
return PTR_ERR(host->virt_base);
- host->slot_switch = gpiod_get_optional(host->dev, "switch",
- GPIOD_OUT_LOW);
- if (IS_ERR(host->slot_switch))
- return dev_err_probe(host->dev, PTR_ERR(host->slot_switch),
- "error looking up slot switch GPIO\n");
-
-
INIT_WORK(&host->slot_release_work, mmc_omap_slot_release_work);
INIT_WORK(&host->send_stop_work, mmc_omap_send_stop_work);
host->dev = &pdev->dev;
platform_set_drvdata(pdev, host);
+ host->slot_switch = devm_gpiod_get_optional(host->dev, "switch",
+ GPIOD_OUT_LOW);
+ if (IS_ERR(host->slot_switch))
+ return dev_err_probe(host->dev, PTR_ERR(host->slot_switch),
+ "error looking up slot switch GPIO\n");
+
host->id = pdev->id;
host->irq = irq;
host->phys_base = res->start;
struct sdhci_host *host = dev_get_drvdata(dev);
struct sdhci_pltfm_host *pltfm_host = sdhci_priv(host);
struct sdhci_msm_host *msm_host = sdhci_pltfm_priv(pltfm_host);
+ unsigned long flags;
+
+ spin_lock_irqsave(&host->lock, flags);
+ host->runtime_suspended = true;
+ spin_unlock_irqrestore(&host->lock, flags);
/* Drop the performance vote */
dev_pm_opp_set_rate(dev, 0);
struct sdhci_host *host = dev_get_drvdata(dev);
struct sdhci_pltfm_host *pltfm_host = sdhci_priv(host);
struct sdhci_msm_host *msm_host = sdhci_pltfm_priv(pltfm_host);
+ unsigned long flags;
int ret;
ret = clk_bulk_prepare_enable(ARRAY_SIZE(msm_host->bulk_clks),
dev_pm_opp_set_rate(dev, msm_host->clk_rate);
- return sdhci_msm_ice_resume(msm_host);
+ ret = sdhci_msm_ice_resume(msm_host);
+ if (ret)
+ return ret;
+
+ spin_lock_irqsave(&host->lock, flags);
+ host->runtime_suspended = false;
+ spin_unlock_irqrestore(&host->lock, flags);
+
+ return ret;
}
static const struct dev_pm_ops sdhci_msm_pm_ops = {
/* perform tuning */
sdhci_start_tuning(host);
+ host->tuning_loop_count = 128;
host->tuning_err = __sdhci_execute_tuning(host, opcode);
if (host->tuning_err) {
/* disable auto-tuning upon tuning error */
return err;
}
+static void dwcmshc_disable_card_clk(struct sdhci_host *host)
+{
+ u16 ctrl;
+
+ ctrl = sdhci_readw(host, SDHCI_CLOCK_CONTROL);
+ if (ctrl & SDHCI_CLOCK_CARD_EN) {
+ ctrl &= ~SDHCI_CLOCK_CARD_EN;
+ sdhci_writew(host, ctrl, SDHCI_CLOCK_CONTROL);
+ }
+}
+
static void dwcmshc_remove(struct platform_device *pdev)
{
struct sdhci_host *host = platform_get_drvdata(pdev);
struct dwcmshc_priv *priv = sdhci_pltfm_priv(pltfm_host);
struct rk35xx_priv *rk_priv = priv->priv;
+ pm_runtime_get_sync(&pdev->dev);
+ pm_runtime_disable(&pdev->dev);
+ pm_runtime_put_noidle(&pdev->dev);
+
sdhci_remove_host(host, 0);
+ dwcmshc_disable_card_clk(host);
+
clk_disable_unprepare(pltfm_host->clk);
clk_disable_unprepare(priv->bus_clk);
if (rk_priv)
}
}
-static void dwcmshc_disable_card_clk(struct sdhci_host *host)
-{
- u16 ctrl;
-
- ctrl = sdhci_readw(host, SDHCI_CLOCK_CONTROL);
- if (ctrl & SDHCI_CLOCK_CARD_EN) {
- ctrl &= ~SDHCI_CLOCK_CARD_EN;
- sdhci_writew(host, ctrl, SDHCI_CLOCK_CONTROL);
- }
-}
-
static int dwcmshc_runtime_suspend(struct device *dev)
{
struct sdhci_host *host = dev_get_drvdata(dev);
struct sdhci_pltfm_host *pltfm_host = sdhci_priv(host);
struct sdhci_omap_host *omap_host = sdhci_pltfm_priv(pltfm_host);
+ if (host->tuning_mode != SDHCI_TUNING_MODE_3)
+ mmc_retune_needed(host->mmc);
+
if (omap_host->con != -EINVAL)
sdhci_runtime_suspend_host(host);
if (dev->bdev_file) {
invalidate_mapping_pages(dev->bdev_file->f_mapping, 0, -1);
- fput(dev->bdev_file);
+ bdev_fput(dev->bdev_file);
}
kfree(dev);
config.name = compatible;
config.id = NVMEM_DEVID_AUTO;
config.owner = THIS_MODULE;
- config.add_legacy_fixed_of_cells = true;
+ config.add_legacy_fixed_of_cells = !mtd_type_is_nand(mtd);
config.type = NVMEM_TYPE_OTP;
config.root_only = true;
config.ignore_wp = true;
struct brcmnand_soc *soc = ctrl->soc;
int i;
- if (soc->read_data_bus) {
+ if (soc && soc->read_data_bus) {
soc->read_data_bus(soc, flash_cache, buffer, fc_words);
} else {
for (i = 0; i < fc_words; i++)
0xe8000, 0xea000, 0xec000, 0xee000,
#endif
#endif
- 0xffffffff };
+};
static struct mtd_info *doclist = NULL;
if (ret < 0)
return ret;
} else {
- for (i = 0; (doc_locations[i] != 0xffffffff); i++) {
+ for (i = 0; i < ARRAY_SIZE(doc_locations); i++) {
doc_probe(doc_locations[i]);
}
}
host->cfg0_raw & ~(7 << CW_PER_PAGE));
nandc_set_reg(chip, NAND_DEV0_CFG1, host->cfg1_raw);
instrs = 3;
- } else {
+ } else if (q_op.cmd_reg != OP_RESET_DEVICE) {
return 0;
}
nandc_set_reg(chip, NAND_EXEC_CMD, 1);
write_reg_dma(nandc, NAND_FLASH_CMD, instrs, NAND_BAM_NEXT_SGL);
- (q_op.cmd_reg == OP_BLOCK_ERASE) ? write_reg_dma(nandc, NAND_DEV0_CFG0,
- 2, NAND_BAM_NEXT_SGL) : read_reg_dma(nandc,
- NAND_FLASH_STATUS, 1, NAND_BAM_NEXT_SGL);
+ if (q_op.cmd_reg == OP_BLOCK_ERASE)
+ write_reg_dma(nandc, NAND_DEV0_CFG0, 2, NAND_BAM_NEXT_SGL);
write_reg_dma(nandc, NAND_EXEC_CMD, 1, NAND_BAM_NEXT_SGL);
read_reg_dma(nandc, NAND_FLASH_STATUS, 1, NAND_BAM_NEXT_SGL);
mutex_unlock(&priv->reg_mutex);
}
-/* On page 205, section "8.6.3 Frame filtering" of the active standard, IEEE Std
- * 802.1Q™-2022, it is stated that frames with 01:80:C2:00:00:00-0F as MAC DA
- * must only be propagated to C-VLAN and MAC Bridge components. That means
- * VLAN-aware and VLAN-unaware bridges. On the switch designs with CPU ports,
- * these frames are supposed to be processed by the CPU (software). So we make
- * the switch only forward them to the CPU port. And if received from a CPU
- * port, forward to a single port. The software is responsible of making the
- * switch conform to the latter by setting a single port as destination port on
- * the special tag.
+/* In Clause 5 of IEEE Std 802-2014, two sublayers of the data link layer (DLL)
+ * of the Open Systems Interconnection basic reference model (OSI/RM) are
+ * described; the medium access control (MAC) and logical link control (LLC)
+ * sublayers. The MAC sublayer is the one facing the physical layer.
*
- * This switch intellectual property cannot conform to this part of the standard
- * fully. Whilst the REV_UN frame tag covers the remaining :04-0D and :0F MAC
- * DAs, it also includes :22-FF which the scope of propagation is not supposed
- * to be restricted for these MAC DAs.
+ * In 8.2 of IEEE Std 802.1Q-2022, the Bridge architecture is described. A
+ * Bridge component comprises a MAC Relay Entity for interconnecting the Ports
+ * of the Bridge, at least two Ports, and higher layer entities with at least a
+ * Spanning Tree Protocol Entity included.
+ *
+ * Each Bridge Port also functions as an end station and shall provide the MAC
+ * Service to an LLC Entity. Each instance of the MAC Service is provided to a
+ * distinct LLC Entity that supports protocol identification, multiplexing, and
+ * demultiplexing, for protocol data unit (PDU) transmission and reception by
+ * one or more higher layer entities.
+ *
+ * It is described in 8.13.9 of IEEE Std 802.1Q-2022 that in a Bridge, the LLC
+ * Entity associated with each Bridge Port is modeled as being directly
+ * connected to the attached Local Area Network (LAN).
+ *
+ * On the switch with CPU port architecture, CPU port functions as Management
+ * Port, and the Management Port functionality is provided by software which
+ * functions as an end station. Software is connected to an IEEE 802 LAN that is
+ * wholly contained within the system that incorporates the Bridge. Software
+ * provides access to the LLC Entity associated with each Bridge Port by the
+ * value of the source port field on the special tag on the frame received by
+ * software.
+ *
+ * We call frames that carry control information to determine the active
+ * topology and current extent of each Virtual Local Area Network (VLAN), i.e.,
+ * spanning tree or Shortest Path Bridging (SPB) and Multiple VLAN Registration
+ * Protocol Data Units (MVRPDUs), and frames from other link constrained
+ * protocols, such as Extensible Authentication Protocol over LAN (EAPOL) and
+ * Link Layer Discovery Protocol (LLDP), link-local frames. They are not
+ * forwarded by a Bridge. Permanently configured entries in the filtering
+ * database (FDB) ensure that such frames are discarded by the Forwarding
+ * Process. In 8.6.3 of IEEE Std 802.1Q-2022, this is described in detail:
+ *
+ * Each of the reserved MAC addresses specified in Table 8-1
+ * (01-80-C2-00-00-[00,01,02,03,04,05,06,07,08,09,0A,0B,0C,0D,0E,0F]) shall be
+ * permanently configured in the FDB in C-VLAN components and ERs.
+ *
+ * Each of the reserved MAC addresses specified in Table 8-2
+ * (01-80-C2-00-00-[01,02,03,04,05,06,07,08,09,0A,0E]) shall be permanently
+ * configured in the FDB in S-VLAN components.
+ *
+ * Each of the reserved MAC addresses specified in Table 8-3
+ * (01-80-C2-00-00-[01,02,04,0E]) shall be permanently configured in the FDB in
+ * TPMR components.
+ *
+ * The FDB entries for reserved MAC addresses shall specify filtering for all
+ * Bridge Ports and all VIDs. Management shall not provide the capability to
+ * modify or remove entries for reserved MAC addresses.
+ *
+ * The addresses in Table 8-1, Table 8-2, and Table 8-3 determine the scope of
+ * propagation of PDUs within a Bridged Network, as follows:
+ *
+ * The Nearest Bridge group address (01-80-C2-00-00-0E) is an address that no
+ * conformant Two-Port MAC Relay (TPMR) component, Service VLAN (S-VLAN)
+ * component, Customer VLAN (C-VLAN) component, or MAC Bridge can forward.
+ * PDUs transmitted using this destination address, or any other addresses
+ * that appear in Table 8-1, Table 8-2, and Table 8-3
+ * (01-80-C2-00-00-[00,01,02,03,04,05,06,07,08,09,0A,0B,0C,0D,0E,0F]), can
+ * therefore travel no further than those stations that can be reached via a
+ * single individual LAN from the originating station.
+ *
+ * The Nearest non-TPMR Bridge group address (01-80-C2-00-00-03), is an
+ * address that no conformant S-VLAN component, C-VLAN component, or MAC
+ * Bridge can forward; however, this address is relayed by a TPMR component.
+ * PDUs using this destination address, or any of the other addresses that
+ * appear in both Table 8-1 and Table 8-2 but not in Table 8-3
+ * (01-80-C2-00-00-[00,03,05,06,07,08,09,0A,0B,0C,0D,0F]), will be relayed by
+ * any TPMRs but will propagate no further than the nearest S-VLAN component,
+ * C-VLAN component, or MAC Bridge.
+ *
+ * The Nearest Customer Bridge group address (01-80-C2-00-00-00) is an address
+ * that no conformant C-VLAN component, MAC Bridge can forward; however, it is
+ * relayed by TPMR components and S-VLAN components. PDUs using this
+ * destination address, or any of the other addresses that appear in Table 8-1
+ * but not in either Table 8-2 or Table 8-3 (01-80-C2-00-00-[00,0B,0C,0D,0F]),
+ * will be relayed by TPMR components and S-VLAN components but will propagate
+ * no further than the nearest C-VLAN component or MAC Bridge.
+ *
+ * Because the LLC Entity associated with each Bridge Port is provided via CPU
+ * port, we must not filter these frames but forward them to CPU port.
+ *
+ * In a Bridge, the transmission Port is majorly decided by ingress and egress
+ * rules, FDB, and spanning tree Port State functions of the Forwarding Process.
+ * For link-local frames, only CPU port should be designated as destination port
+ * in the FDB, and the other functions of the Forwarding Process must not
+ * interfere with the decision of the transmission Port. We call this process
+ * trapping frames to CPU port.
+ *
+ * Therefore, on the switch with CPU port architecture, link-local frames must
+ * be trapped to CPU port, and certain link-local frames received by a Port of a
+ * Bridge comprising a TPMR component or an S-VLAN component must be excluded
+ * from it.
+ *
+ * A Bridge of the switch with CPU port architecture cannot comprise a Two-Port
+ * MAC Relay (TPMR) component as a TPMR component supports only a subset of the
+ * functionality of a MAC Bridge. A Bridge comprising two Ports (Management Port
+ * doesn't count) of this architecture will either function as a standard MAC
+ * Bridge or a standard VLAN Bridge.
+ *
+ * Therefore, a Bridge of this architecture can only comprise S-VLAN components,
+ * C-VLAN components, or MAC Bridge components. Since there's no TPMR component,
+ * we don't need to relay PDUs using the destination addresses specified on the
+ * Nearest non-TPMR section, and the proportion of the Nearest Customer Bridge
+ * section where they must be relayed by TPMR components.
+ *
+ * One option to trap link-local frames to CPU port is to add static FDB entries
+ * with CPU port designated as destination port. However, because that
+ * Independent VLAN Learning (IVL) is being used on every VID, each entry only
+ * applies to a single VLAN Identifier (VID). For a Bridge comprising a MAC
+ * Bridge component or a C-VLAN component, there would have to be 16 times 4096
+ * entries. This switch intellectual property can only hold a maximum of 2048
+ * entries. Using this option, there also isn't a mechanism to prevent
+ * link-local frames from being discarded when the spanning tree Port State of
+ * the reception Port is discarding.
+ *
+ * The remaining option is to utilise the BPC, RGAC1, RGAC2, RGAC3, and RGAC4
+ * registers. Whilst this applies to every VID, it doesn't contain all of the
+ * reserved MAC addresses without affecting the remaining Standard Group MAC
+ * Addresses. The REV_UN frame tag utilised using the RGAC4 register covers the
+ * remaining 01-80-C2-00-00-[04,05,06,07,08,09,0A,0B,0C,0D,0F] destination
+ * addresses. It also includes the 01-80-C2-00-00-22 to 01-80-C2-00-00-FF
+ * destination addresses which may be relayed by MAC Bridges or VLAN Bridges.
+ * The latter option provides better but not complete conformance.
+ *
+ * This switch intellectual property also does not provide a mechanism to trap
+ * link-local frames with specific destination addresses to CPU port by Bridge,
+ * to conform to the filtering rules for the distinct Bridge components.
+ *
+ * Therefore, regardless of the type of the Bridge component, link-local frames
+ * with these destination addresses will be trapped to CPU port:
+ *
+ * 01-80-C2-00-00-[00,01,02,03,0E]
+ *
+ * In a Bridge comprising a MAC Bridge component or a C-VLAN component:
+ *
+ * Link-local frames with these destination addresses won't be trapped to CPU
+ * port which won't conform to IEEE Std 802.1Q-2022:
+ *
+ * 01-80-C2-00-00-[04,05,06,07,08,09,0A,0B,0C,0D,0F]
+ *
+ * In a Bridge comprising an S-VLAN component:
+ *
+ * Link-local frames with these destination addresses will be trapped to CPU
+ * port which won't conform to IEEE Std 802.1Q-2022:
+ *
+ * 01-80-C2-00-00-00
+ *
+ * Link-local frames with these destination addresses won't be trapped to CPU
+ * port which won't conform to IEEE Std 802.1Q-2022:
+ *
+ * 01-80-C2-00-00-[04,05,06,07,08,09,0A]
+ *
+ * To trap link-local frames to CPU port as conformant as this switch
+ * intellectual property can allow, link-local frames are made to be regarded as
+ * Bridge Protocol Data Units (BPDUs). This is because this switch intellectual
+ * property only lets the frames regarded as BPDUs bypass the spanning tree Port
+ * State function of the Forwarding Process.
+ *
+ * The only remaining interference is the ingress rules. When the reception Port
+ * has no PVID assigned on software, VLAN-untagged frames won't be allowed in.
+ * There doesn't seem to be a mechanism on the switch intellectual property to
+ * have link-local frames bypass this function of the Forwarding Process.
*/
static void
mt753x_trap_frames(struct mt7530_priv *priv)
/* Trap 802.1X PAE frames and BPDUs to the CPU port(s) and egress them
* VLAN-untagged.
*/
- mt7530_rmw(priv, MT753X_BPC, MT753X_PAE_EG_TAG_MASK |
- MT753X_PAE_PORT_FW_MASK | MT753X_BPDU_EG_TAG_MASK |
- MT753X_BPDU_PORT_FW_MASK,
- MT753X_PAE_EG_TAG(MT7530_VLAN_EG_UNTAGGED) |
- MT753X_PAE_PORT_FW(MT753X_BPDU_CPU_ONLY) |
- MT753X_BPDU_EG_TAG(MT7530_VLAN_EG_UNTAGGED) |
- MT753X_BPDU_CPU_ONLY);
+ mt7530_rmw(priv, MT753X_BPC,
+ MT753X_PAE_BPDU_FR | MT753X_PAE_EG_TAG_MASK |
+ MT753X_PAE_PORT_FW_MASK | MT753X_BPDU_EG_TAG_MASK |
+ MT753X_BPDU_PORT_FW_MASK,
+ MT753X_PAE_BPDU_FR |
+ MT753X_PAE_EG_TAG(MT7530_VLAN_EG_UNTAGGED) |
+ MT753X_PAE_PORT_FW(MT753X_BPDU_CPU_ONLY) |
+ MT753X_BPDU_EG_TAG(MT7530_VLAN_EG_UNTAGGED) |
+ MT753X_BPDU_CPU_ONLY);
/* Trap frames with :01 and :02 MAC DAs to the CPU port(s) and egress
* them VLAN-untagged.
*/
- mt7530_rmw(priv, MT753X_RGAC1, MT753X_R02_EG_TAG_MASK |
- MT753X_R02_PORT_FW_MASK | MT753X_R01_EG_TAG_MASK |
- MT753X_R01_PORT_FW_MASK,
- MT753X_R02_EG_TAG(MT7530_VLAN_EG_UNTAGGED) |
- MT753X_R02_PORT_FW(MT753X_BPDU_CPU_ONLY) |
- MT753X_R01_EG_TAG(MT7530_VLAN_EG_UNTAGGED) |
- MT753X_BPDU_CPU_ONLY);
+ mt7530_rmw(priv, MT753X_RGAC1,
+ MT753X_R02_BPDU_FR | MT753X_R02_EG_TAG_MASK |
+ MT753X_R02_PORT_FW_MASK | MT753X_R01_BPDU_FR |
+ MT753X_R01_EG_TAG_MASK | MT753X_R01_PORT_FW_MASK,
+ MT753X_R02_BPDU_FR |
+ MT753X_R02_EG_TAG(MT7530_VLAN_EG_UNTAGGED) |
+ MT753X_R02_PORT_FW(MT753X_BPDU_CPU_ONLY) |
+ MT753X_R01_BPDU_FR |
+ MT753X_R01_EG_TAG(MT7530_VLAN_EG_UNTAGGED) |
+ MT753X_BPDU_CPU_ONLY);
/* Trap frames with :03 and :0E MAC DAs to the CPU port(s) and egress
* them VLAN-untagged.
*/
- mt7530_rmw(priv, MT753X_RGAC2, MT753X_R0E_EG_TAG_MASK |
- MT753X_R0E_PORT_FW_MASK | MT753X_R03_EG_TAG_MASK |
- MT753X_R03_PORT_FW_MASK,
- MT753X_R0E_EG_TAG(MT7530_VLAN_EG_UNTAGGED) |
- MT753X_R0E_PORT_FW(MT753X_BPDU_CPU_ONLY) |
- MT753X_R03_EG_TAG(MT7530_VLAN_EG_UNTAGGED) |
- MT753X_BPDU_CPU_ONLY);
+ mt7530_rmw(priv, MT753X_RGAC2,
+ MT753X_R0E_BPDU_FR | MT753X_R0E_EG_TAG_MASK |
+ MT753X_R0E_PORT_FW_MASK | MT753X_R03_BPDU_FR |
+ MT753X_R03_EG_TAG_MASK | MT753X_R03_PORT_FW_MASK,
+ MT753X_R0E_BPDU_FR |
+ MT753X_R0E_EG_TAG(MT7530_VLAN_EG_UNTAGGED) |
+ MT753X_R0E_PORT_FW(MT753X_BPDU_CPU_ONLY) |
+ MT753X_R03_BPDU_FR |
+ MT753X_R03_EG_TAG(MT7530_VLAN_EG_UNTAGGED) |
+ MT753X_BPDU_CPU_ONLY);
}
static void
static int mt753x_mirror_port_get(unsigned int id, u32 val)
{
- return (id == ID_MT7531) ? MT7531_MIRROR_PORT_GET(val) :
- MIRROR_PORT(val);
+ return (id == ID_MT7531 || id == ID_MT7988) ?
+ MT7531_MIRROR_PORT_GET(val) :
+ MIRROR_PORT(val);
}
static int mt753x_mirror_port_set(unsigned int id, u32 val)
{
- return (id == ID_MT7531) ? MT7531_MIRROR_PORT_SET(val) :
- MIRROR_PORT(val);
+ return (id == ID_MT7531 || id == ID_MT7988) ?
+ MT7531_MIRROR_PORT_SET(val) :
+ MIRROR_PORT(val);
}
static int mt753x_port_mirror_add(struct dsa_switch *ds, int port,
SYS_CTRL_PHY_RST | SYS_CTRL_SW_RST |
SYS_CTRL_REG_RST);
- mt7530_pll_setup(priv);
-
/* Lower Tx driving for TRGMII path */
for (i = 0; i < NUM_TRGMII_CTRL; i++)
mt7530_write(priv, MT7530_TRGMII_TD_ODT(i),
val |= MHWTRAP_MANUAL;
mt7530_write(priv, MT7530_MHWTRAP, val);
+ if ((val & HWTRAP_XTAL_MASK) == HWTRAP_XTAL_40MHZ)
+ mt7530_pll_setup(priv);
+
mt753x_trap_frames(priv);
/* Enable and reset MIB counters */
PVC_EG_TAG(MT7530_VLAN_EG_CONSISTENT));
}
+ /* Allow mirroring frames received on the local port (monitor port). */
+ mt7530_set(priv, MT753X_AGC, LOCAL_EN);
+
/* Setup VLAN ID 0 for VLAN-unaware bridges */
ret = mt7530_setup_vlan0(priv);
if (ret)
PVC_EG_TAG(MT7530_VLAN_EG_CONSISTENT));
}
+ /* Allow mirroring frames received on the local port (monitor port). */
+ mt7530_set(priv, MT753X_AGC, LOCAL_EN);
+
/* Flush the FDB table */
ret = mt7530_fdb_cmd(priv, MT7530_FDB_FLUSH, NULL);
if (ret < 0)
mt7530_rmw(priv, MT7531_GPIO_MODE0, MT7531_GPIO0_MASK,
MT7531_GPIO0_INTERRUPT);
- /* Enable PHY core PLL, since phy_device has not yet been created
- * provided for phy_[read,write]_mmd_indirect is called, we provide
- * our own mt7531_ind_mmd_phy_[read,write] to complete this
- * function.
+ /* Enable Energy-Efficient Ethernet (EEE) and PHY core PLL, since
+ * phy_device has not yet been created provided for
+ * phy_[read,write]_mmd_indirect is called, we provide our own
+ * mt7531_ind_mmd_phy_[read,write] to complete this function.
*/
val = mt7531_ind_c45_phy_read(priv, MT753X_CTRL_PHY_ADDR,
MDIO_MMD_VEND2, CORE_PLL_GROUP4);
- val |= MT7531_PHY_PLL_BYPASS_MODE;
+ val |= MT7531_RG_SYSPLL_DMY2 | MT7531_PHY_PLL_BYPASS_MODE;
val &= ~MT7531_PHY_PLL_OFF;
mt7531_ind_c45_phy_write(priv, MT753X_CTRL_PHY_ADDR, MDIO_MMD_VEND2,
CORE_PLL_GROUP4, val);
+ /* Disable EEE advertisement on the switch PHYs. */
+ for (i = MT753X_CTRL_PHY_ADDR;
+ i < MT753X_CTRL_PHY_ADDR + MT7530_NUM_PHYS; i++) {
+ mt7531_ind_c45_phy_write(priv, i, MDIO_MMD_AN, MDIO_AN_EEE_ADV,
+ 0);
+ }
+
mt7531_setup_common(ds);
/* Setup VLAN ID 0 for VLAN-unaware bridges */
#define SYSC_REG_RSTCTRL 0x34
#define RESET_MCM BIT(2)
+/* Register for ARL global control */
+#define MT753X_AGC 0xc
+#define LOCAL_EN BIT(7)
+
/* Registers to mac forward control for unknown frames */
#define MT7530_MFC 0x10
#define BC_FFP(x) (((x) & 0xff) << 24)
/* Registers for BPDU and PAE frame control*/
#define MT753X_BPC 0x24
+#define MT753X_PAE_BPDU_FR BIT(25)
#define MT753X_PAE_EG_TAG_MASK GENMASK(24, 22)
#define MT753X_PAE_EG_TAG(x) FIELD_PREP(MT753X_PAE_EG_TAG_MASK, x)
#define MT753X_PAE_PORT_FW_MASK GENMASK(18, 16)
/* Register for :01 and :02 MAC DA frame control */
#define MT753X_RGAC1 0x28
+#define MT753X_R02_BPDU_FR BIT(25)
#define MT753X_R02_EG_TAG_MASK GENMASK(24, 22)
#define MT753X_R02_EG_TAG(x) FIELD_PREP(MT753X_R02_EG_TAG_MASK, x)
#define MT753X_R02_PORT_FW_MASK GENMASK(18, 16)
#define MT753X_R02_PORT_FW(x) FIELD_PREP(MT753X_R02_PORT_FW_MASK, x)
+#define MT753X_R01_BPDU_FR BIT(9)
#define MT753X_R01_EG_TAG_MASK GENMASK(8, 6)
#define MT753X_R01_EG_TAG(x) FIELD_PREP(MT753X_R01_EG_TAG_MASK, x)
#define MT753X_R01_PORT_FW_MASK GENMASK(2, 0)
/* Register for :03 and :0E MAC DA frame control */
#define MT753X_RGAC2 0x2c
+#define MT753X_R0E_BPDU_FR BIT(25)
#define MT753X_R0E_EG_TAG_MASK GENMASK(24, 22)
#define MT753X_R0E_EG_TAG(x) FIELD_PREP(MT753X_R0E_EG_TAG_MASK, x)
#define MT753X_R0E_PORT_FW_MASK GENMASK(18, 16)
#define MT753X_R0E_PORT_FW(x) FIELD_PREP(MT753X_R0E_PORT_FW_MASK, x)
+#define MT753X_R03_BPDU_FR BIT(9)
#define MT753X_R03_EG_TAG_MASK GENMASK(8, 6)
#define MT753X_R03_EG_TAG(x) FIELD_PREP(MT753X_R03_EG_TAG_MASK, x)
#define MT753X_R03_PORT_FW_MASK GENMASK(2, 0)
#define RG_SYSPLL_DDSFBK_EN BIT(12)
#define RG_SYSPLL_BIAS_EN BIT(11)
#define RG_SYSPLL_BIAS_LPF_EN BIT(10)
+#define MT7531_RG_SYSPLL_DMY2 BIT(6)
#define MT7531_PHY_PLL_OFF BIT(5)
#define MT7531_PHY_PLL_BYPASS_MODE BIT(4)
phy_interface_set_rgmii(supported);
}
-static void mv88e6250_phylink_get_caps(struct mv88e6xxx_chip *chip, int port,
- struct phylink_config *config)
+static void
+mv88e6250_setup_supported_interfaces(struct mv88e6xxx_chip *chip, int port,
+ struct phylink_config *config)
{
unsigned long *supported = config->supported_interfaces;
+ int err;
+ u16 reg;
- /* Translate the default cmode */
- mv88e6xxx_translate_cmode(chip->ports[port].cmode, supported);
+ err = mv88e6xxx_port_read(chip, port, MV88E6XXX_PORT_STS, ®);
+ if (err) {
+ dev_err(chip->dev, "p%d: failed to read port status\n", port);
+ return;
+ }
+
+ switch (reg & MV88E6250_PORT_STS_PORTMODE_MASK) {
+ case MV88E6250_PORT_STS_PORTMODE_MII_10_HALF_PHY:
+ case MV88E6250_PORT_STS_PORTMODE_MII_100_HALF_PHY:
+ case MV88E6250_PORT_STS_PORTMODE_MII_10_FULL_PHY:
+ case MV88E6250_PORT_STS_PORTMODE_MII_100_FULL_PHY:
+ __set_bit(PHY_INTERFACE_MODE_REVMII, supported);
+ break;
+
+ case MV88E6250_PORT_STS_PORTMODE_MII_HALF:
+ case MV88E6250_PORT_STS_PORTMODE_MII_FULL:
+ __set_bit(PHY_INTERFACE_MODE_MII, supported);
+ break;
+
+ case MV88E6250_PORT_STS_PORTMODE_MII_DUAL_100_RMII_FULL_PHY:
+ case MV88E6250_PORT_STS_PORTMODE_MII_200_RMII_FULL_PHY:
+ case MV88E6250_PORT_STS_PORTMODE_MII_10_100_RMII_HALF_PHY:
+ case MV88E6250_PORT_STS_PORTMODE_MII_10_100_RMII_FULL_PHY:
+ __set_bit(PHY_INTERFACE_MODE_REVRMII, supported);
+ break;
+
+ case MV88E6250_PORT_STS_PORTMODE_MII_DUAL_100_RMII_FULL:
+ case MV88E6250_PORT_STS_PORTMODE_MII_10_100_RMII_FULL:
+ __set_bit(PHY_INTERFACE_MODE_RMII, supported);
+ break;
+
+ case MV88E6250_PORT_STS_PORTMODE_MII_100_RGMII:
+ __set_bit(PHY_INTERFACE_MODE_RGMII, supported);
+ break;
+
+ default:
+ dev_err(chip->dev,
+ "p%d: invalid port mode in status register: %04x\n",
+ port, reg);
+ }
+}
+
+static void mv88e6250_phylink_get_caps(struct mv88e6xxx_chip *chip, int port,
+ struct phylink_config *config)
+{
+ if (!mv88e6xxx_phy_is_internal(chip, port))
+ mv88e6250_setup_supported_interfaces(chip, port, config);
config->mac_capabilities = MAC_SYM_PAUSE | MAC_10 | MAC_100;
}
.family = MV88E6XXX_FAMILY_6250,
.name = "Marvell 88E6020",
.num_databases = 64,
- .num_ports = 4,
+ /* Ports 2-4 are not routed to pins
+ * => usable ports 0, 1, 5, 6
+ */
+ .num_ports = 7,
.num_internal_phys = 2,
+ .invalid_port_mask = BIT(2) | BIT(3) | BIT(4),
.max_vid = 4095,
.port_base_addr = 0x8,
.phy_base_addr = 0x0,
#define MV88E6250_PORT_STS_PORTMODE_PHY_100_HALF 0x0900
#define MV88E6250_PORT_STS_PORTMODE_PHY_10_FULL 0x0a00
#define MV88E6250_PORT_STS_PORTMODE_PHY_100_FULL 0x0b00
-#define MV88E6250_PORT_STS_PORTMODE_MII_10_HALF 0x0c00
-#define MV88E6250_PORT_STS_PORTMODE_MII_100_HALF 0x0d00
-#define MV88E6250_PORT_STS_PORTMODE_MII_10_FULL 0x0e00
-#define MV88E6250_PORT_STS_PORTMODE_MII_100_FULL 0x0f00
+/* - Modes with PHY suffix use output instead of input clock
+ * - Modes without RMII or RGMII use MII
+ * - Modes without speed do not have a fixed speed specified in the manual
+ * ("DC to x MHz" - variable clock support?)
+ */
+#define MV88E6250_PORT_STS_PORTMODE_MII_DISABLED 0x0000
+#define MV88E6250_PORT_STS_PORTMODE_MII_100_RGMII 0x0100
+#define MV88E6250_PORT_STS_PORTMODE_MII_DUAL_100_RMII_FULL_PHY 0x0200
+#define MV88E6250_PORT_STS_PORTMODE_MII_200_RMII_FULL_PHY 0x0400
+#define MV88E6250_PORT_STS_PORTMODE_MII_DUAL_100_RMII_FULL 0x0600
+#define MV88E6250_PORT_STS_PORTMODE_MII_10_100_RMII_FULL 0x0700
+#define MV88E6250_PORT_STS_PORTMODE_MII_HALF 0x0800
+#define MV88E6250_PORT_STS_PORTMODE_MII_10_100_RMII_HALF_PHY 0x0900
+#define MV88E6250_PORT_STS_PORTMODE_MII_FULL 0x0a00
+#define MV88E6250_PORT_STS_PORTMODE_MII_10_100_RMII_FULL_PHY 0x0b00
+#define MV88E6250_PORT_STS_PORTMODE_MII_10_HALF_PHY 0x0c00
+#define MV88E6250_PORT_STS_PORTMODE_MII_100_HALF_PHY 0x0d00
+#define MV88E6250_PORT_STS_PORTMODE_MII_10_FULL_PHY 0x0e00
+#define MV88E6250_PORT_STS_PORTMODE_MII_100_FULL_PHY 0x0f00
#define MV88E6XXX_PORT_STS_LINK 0x0800
#define MV88E6XXX_PORT_STS_DUPLEX 0x0400
#define MV88E6XXX_PORT_STS_SPEED_MASK 0x0300
return tmp & 0xffff;
}
-int sja1110_pcs_mdio_write_c45(struct mii_bus *bus, int phy, int reg, int mmd,
+int sja1110_pcs_mdio_write_c45(struct mii_bus *bus, int phy, int mmd, int reg,
u16 val)
{
struct sja1105_mdio_private *mdio_priv = bus->priv;
ENA_COM_BOUNCE_BUFFER_CNTRL_CNT;
io_sq->bounce_buf_ctrl.next_to_use = 0;
- size = io_sq->bounce_buf_ctrl.buffer_size *
+ size = (size_t)io_sq->bounce_buf_ctrl.buffer_size *
io_sq->bounce_buf_ctrl.buffers_num;
dev_node = dev_to_node(ena_dev->dmadev);
static void ena_free_tx_bufs(struct ena_ring *tx_ring)
{
bool print_once = true;
+ bool is_xdp_ring;
u32 i;
+ is_xdp_ring = ENA_IS_XDP_INDEX(tx_ring->adapter, tx_ring->qid);
+
for (i = 0; i < tx_ring->ring_size; i++) {
struct ena_tx_buffer *tx_info = &tx_ring->tx_buffer_info[i];
ena_unmap_tx_buff(tx_ring, tx_info);
- dev_kfree_skb_any(tx_info->skb);
+ if (is_xdp_ring)
+ xdp_return_frame(tx_info->xdpf);
+ else
+ dev_kfree_skb_any(tx_info->skb);
}
- netdev_tx_reset_queue(netdev_get_tx_queue(tx_ring->netdev,
- tx_ring->qid));
+
+ if (!is_xdp_ring)
+ netdev_tx_reset_queue(netdev_get_tx_queue(tx_ring->netdev,
+ tx_ring->qid));
}
static void ena_free_all_tx_bufs(struct ena_adapter *adapter)
{
struct ena_ring *tx_ring;
struct ena_ring *rx_ring;
- int i, budget, rc;
+ int qid, budget, rc;
int io_queue_count;
io_queue_count = adapter->xdp_num_queues + adapter->num_io_queues;
+
/* Make sure the driver doesn't turn the device in other process */
smp_rmb();
if (adapter->missing_tx_completion_to == ENA_HW_HINTS_NO_TIMEOUT)
return;
- budget = ENA_MONITORED_TX_QUEUES;
+ budget = min_t(u32, io_queue_count, ENA_MONITORED_TX_QUEUES);
- for (i = adapter->last_monitored_tx_qid; i < io_queue_count; i++) {
- tx_ring = &adapter->tx_ring[i];
- rx_ring = &adapter->rx_ring[i];
+ qid = adapter->last_monitored_tx_qid;
+
+ while (budget) {
+ qid = (qid + 1) % io_queue_count;
+
+ tx_ring = &adapter->tx_ring[qid];
+ rx_ring = &adapter->rx_ring[qid];
rc = check_missing_comp_in_tx_queue(adapter, tx_ring);
if (unlikely(rc))
return;
- rc = !ENA_IS_XDP_INDEX(adapter, i) ?
+ rc = !ENA_IS_XDP_INDEX(adapter, qid) ?
check_for_rx_interrupt_queue(adapter, rx_ring) : 0;
if (unlikely(rc))
return;
budget--;
- if (!budget)
- break;
}
- adapter->last_monitored_tx_qid = i % io_queue_count;
+ adapter->last_monitored_tx_qid = qid;
}
/* trigger napi schedule after 2 consecutive detections */
rc = ena_xdp_tx_map_frame(tx_ring, tx_info, xdpf, &ena_tx_ctx);
if (unlikely(rc))
- return rc;
+ goto err;
ena_tx_ctx.req_id = req_id;
error_unmap_dma:
ena_unmap_tx_buff(tx_ring, tx_info);
+err:
tx_info->xdpf = NULL;
+
return rc;
}
pdsc_teardown(pdsc, PDSC_TEARDOWN_RECOVERY);
}
+void pdsc_pci_reset_thread(struct work_struct *work)
+{
+ struct pdsc *pdsc = container_of(work, struct pdsc, pci_reset_work);
+ struct pci_dev *pdev = pdsc->pdev;
+
+ pci_dev_get(pdev);
+ pci_reset_function(pdev);
+ pci_dev_put(pdev);
+}
+
static void pdsc_check_pci_health(struct pdsc *pdsc)
{
u8 fw_status;
if (fw_status != PDS_RC_BAD_PCI)
return;
- pci_reset_function(pdsc->pdev);
+ /* prevent deadlock between pdsc_reset_prepare and pdsc_health_thread */
+ queue_work(pdsc->wq, &pdsc->pci_reset_work);
}
void pdsc_health_thread(struct work_struct *work)
struct pdsc_qcq notifyqcq;
u64 last_eid;
struct pdsc_viftype *viftype_status;
+ struct work_struct pci_reset_work;
};
/** enum pds_core_dbell_bits - bitwise composition of dbell values.
void pdsc_fw_down(struct pdsc *pdsc);
void pdsc_fw_up(struct pdsc *pdsc);
+void pdsc_pci_reset_thread(struct work_struct *work);
#endif /* _PDSC_H_ */
.reset.opcode = PDS_CORE_CMD_RESET,
};
+ if (!pdsc_is_fw_running(pdsc))
+ return 0;
+
return pdsc_devcmd(pdsc, &cmd, &comp, pdsc->devcmd_timeout);
}
snprintf(wq_name, sizeof(wq_name), "%s.%d", PDS_CORE_DRV_NAME, pdsc->uid);
pdsc->wq = create_singlethread_workqueue(wq_name);
INIT_WORK(&pdsc->health_work, pdsc_health_thread);
+ INIT_WORK(&pdsc->pci_reset_work, pdsc_pci_reset_thread);
timer_setup(&pdsc->wdtimer, pdsc_wdtimer_cb, 0);
pdsc->wdtimer_period = PDSC_WATCHDOG_SECS * HZ;
umac_wl(intf, 0x0, UMC_CMD);
umac_wl(intf, UMC_CMD_SW_RESET, UMC_CMD);
usleep_range(10, 100);
- umac_wl(intf, 0x0, UMC_CMD);
+ /* We hold the umac in reset and bring it out of
+ * reset when phy link is up.
+ */
}
static void umac_set_hw_addr(struct bcmasp_intf *intf,
u32 reg;
reg = umac_rl(intf, UMC_CMD);
+ if (reg & UMC_CMD_SW_RESET)
+ return;
if (enable)
reg |= mask;
else
umac_wl(intf, 0x800, UMC_FRM_LEN);
umac_wl(intf, 0xffff, UMC_PAUSE_CNTRL);
umac_wl(intf, 0x800, UMC_RX_MAX_PKT_SZ);
- umac_enable_set(intf, UMC_CMD_PROMISC, 1);
}
-static int bcmasp_tx_poll(struct napi_struct *napi, int budget)
+static int bcmasp_tx_reclaim(struct bcmasp_intf *intf)
{
- struct bcmasp_intf *intf =
- container_of(napi, struct bcmasp_intf, tx_napi);
struct bcmasp_intf_stats64 *stats = &intf->stats64;
struct device *kdev = &intf->parent->pdev->dev;
unsigned long read, released = 0;
DESC_RING_COUNT);
}
- /* Ensure all descriptors have been written to DRAM for the hardware
- * to see updated contents.
- */
- wmb();
+ return released;
+}
+
+static int bcmasp_tx_poll(struct napi_struct *napi, int budget)
+{
+ struct bcmasp_intf *intf =
+ container_of(napi, struct bcmasp_intf, tx_napi);
+ int released = 0;
+
+ released = bcmasp_tx_reclaim(intf);
napi_complete(&intf->tx_napi);
UMC_CMD_HD_EN | UMC_CMD_RX_PAUSE_IGNORE |
UMC_CMD_TX_PAUSE_IGNORE);
reg |= cmd_bits;
+ if (reg & UMC_CMD_SW_RESET) {
+ reg &= ~UMC_CMD_SW_RESET;
+ umac_wl(intf, reg, UMC_CMD);
+ udelay(2);
+ reg |= UMC_CMD_TX_EN | UMC_CMD_RX_EN | UMC_CMD_PROMISC;
+ }
umac_wl(intf, reg, UMC_CMD);
active = phy_init_eee(phydev, 0) >= 0;
intf->tx_spb_dma_read = intf->tx_spb_dma_addr;
intf->tx_spb_index = 0;
intf->tx_spb_clean_index = 0;
+ memset(intf->tx_cbs, 0, sizeof(struct bcmasp_tx_cb) * DESC_RING_COUNT);
/* Make sure channels are disabled */
tx_spb_ctrl_wl(intf, 0x0, TX_SPB_CTRL_ENABLE);
} while (timeout-- > 0);
tx_spb_dma_wl(intf, 0x0, TX_SPB_DMA_FIFO_CTRL);
+ bcmasp_tx_reclaim(intf);
+
umac_enable_set(intf, UMC_CMD_TX_EN, 0);
phy_stop(dev->phydev);
/* Indicate that the MAC is responsible for PHY PM */
phydev->mac_managed_pm = true;
- } else if (!intf->wolopts) {
- ret = phy_resume(dev->phydev);
- if (ret)
- goto err_phy_disable;
}
umac_reset(intf);
umac_init(intf);
- /* Disable the UniMAC RX/TX */
- umac_enable_set(intf, (UMC_CMD_RX_EN | UMC_CMD_TX_EN), 0);
-
umac_set_hw_addr(intf, dev->dev_addr);
intf->old_duplex = -1;
netif_napi_add(intf->ndev, &intf->rx_napi, bcmasp_rx_poll);
bcmasp_enable_rx(intf, 1);
- /* Turn on UniMAC TX/RX */
- umac_enable_set(intf, (UMC_CMD_RX_EN | UMC_CMD_TX_EN), 1);
-
intf->crc_fwd = !!(umac_rl(intf, UMC_CMD) & UMC_CMD_CRC_FWD);
bcmasp_netif_start(dev);
if (intf->wolopts & WAKE_FILTER)
bcmasp_netfilt_suspend(intf);
- /* UniMAC receive needs to be turned on */
+ /* Bring UniMAC out of reset if needed and enable RX */
+ reg = umac_rl(intf, UMC_CMD);
+ if (reg & UMC_CMD_SW_RESET)
+ reg &= ~UMC_CMD_SW_RESET;
+
+ reg |= UMC_CMD_RX_EN | UMC_CMD_PROMISC;
+ umac_wl(intf, reg, UMC_CMD);
+
umac_enable_set(intf, UMC_CMD_RX_EN, 1);
if (intf->parent->wol_irq > 0) {
{
struct device *kdev = &intf->parent->pdev->dev;
struct net_device *dev = intf->ndev;
- int ret = 0;
if (!netif_running(dev))
return 0;
bcmasp_netif_deinit(dev);
if (!intf->wolopts) {
- ret = phy_suspend(dev->phydev);
- if (ret)
- goto out;
-
if (intf->internal_phy)
bcmasp_ephy_enable_set(intf, false);
else
clk_disable_unprepare(intf->parent->clk);
- return ret;
-
-out:
- bcmasp_netif_init(dev, false);
- return ret;
+ return 0;
}
static void bcmasp_resume_from_wol(struct bcmasp_intf *intf)
bp->flags |= B44_FLAG_TX_PAUSE;
else
bp->flags &= ~B44_FLAG_TX_PAUSE;
- if (bp->flags & B44_FLAG_PAUSE_AUTO) {
- b44_halt(bp);
- b44_init_rings(bp);
- b44_init_hw(bp, B44_FULL_RESET);
- } else {
- __b44_set_flow_ctrl(bp, bp->flags);
+ if (netif_running(dev)) {
+ if (bp->flags & B44_FLAG_PAUSE_AUTO) {
+ b44_halt(bp);
+ b44_init_rings(bp);
+ b44_init_hw(bp, B44_FULL_RESET);
+ } else {
+ __b44_set_flow_ctrl(bp, bp->flags);
+ }
}
spin_unlock_irq(&bp->lock);
skb = bnxt_copy_skb(bnapi, data_ptr, len, mapping);
if (!skb) {
bnxt_abort_tpa(cpr, idx, agg_bufs);
- cpr->sw_stats.rx.rx_oom_discards += 1;
+ cpr->bnapi->cp_ring.sw_stats.rx.rx_oom_discards += 1;
return NULL;
}
} else {
new_data = __bnxt_alloc_rx_frag(bp, &new_mapping, GFP_ATOMIC);
if (!new_data) {
bnxt_abort_tpa(cpr, idx, agg_bufs);
- cpr->sw_stats.rx.rx_oom_discards += 1;
+ cpr->bnapi->cp_ring.sw_stats.rx.rx_oom_discards += 1;
return NULL;
}
if (!skb) {
skb_free_frag(data);
bnxt_abort_tpa(cpr, idx, agg_bufs);
- cpr->sw_stats.rx.rx_oom_discards += 1;
+ cpr->bnapi->cp_ring.sw_stats.rx.rx_oom_discards += 1;
return NULL;
}
skb_reserve(skb, bp->rx_offset);
skb = bnxt_rx_agg_pages_skb(bp, cpr, skb, idx, agg_bufs, true);
if (!skb) {
/* Page reuse already handled by bnxt_rx_pages(). */
- cpr->sw_stats.rx.rx_oom_discards += 1;
+ cpr->bnapi->cp_ring.sw_stats.rx.rx_oom_discards += 1;
return NULL;
}
}
u32 frag_len = bnxt_rx_agg_pages_xdp(bp, cpr, &xdp,
cp_cons, agg_bufs,
false);
- if (!frag_len) {
- cpr->sw_stats.rx.rx_oom_discards += 1;
- rc = -ENOMEM;
- goto next_rx;
- }
+ if (!frag_len)
+ goto oom_next_rx;
}
xdp_active = true;
}
else
bnxt_xdp_buff_frags_free(rxr, &xdp);
}
- cpr->sw_stats.rx.rx_oom_discards += 1;
- rc = -ENOMEM;
- goto next_rx;
+ goto oom_next_rx;
}
} else {
u32 payload;
payload = 0;
skb = bp->rx_skb_func(bp, rxr, cons, data, data_ptr, dma_addr,
payload | len);
- if (!skb) {
- cpr->sw_stats.rx.rx_oom_discards += 1;
- rc = -ENOMEM;
- goto next_rx;
- }
+ if (!skb)
+ goto oom_next_rx;
}
if (agg_bufs) {
if (!xdp_active) {
skb = bnxt_rx_agg_pages_skb(bp, cpr, skb, cp_cons, agg_bufs, false);
- if (!skb) {
- cpr->sw_stats.rx.rx_oom_discards += 1;
- rc = -ENOMEM;
- goto next_rx;
- }
+ if (!skb)
+ goto oom_next_rx;
} else {
skb = bnxt_xdp_build_skb(bp, skb, agg_bufs, rxr->page_pool, &xdp, rxcmp1);
if (!skb) {
/* we should be able to free the old skb here */
bnxt_xdp_buff_frags_free(rxr, &xdp);
- cpr->sw_stats.rx.rx_oom_discards += 1;
- rc = -ENOMEM;
- goto next_rx;
+ goto oom_next_rx;
}
}
}
*raw_cons = tmp_raw_cons;
return rc;
+
+oom_next_rx:
+ cpr->bnapi->cp_ring.sw_stats.rx.rx_oom_discards += 1;
+ rc = -ENOMEM;
+ goto next_rx;
}
/* In netpoll mode, if we are using a combined completion ring, we need to
}
rc = bnxt_rx_pkt(bp, cpr, raw_cons, event);
if (rc && rc != -EBUSY)
- cpr->sw_stats.rx.rx_netpoll_discards += 1;
+ cpr->bnapi->cp_ring.sw_stats.rx.rx_netpoll_discards += 1;
return rc;
}
BNXT_FW_HEALTH_WIN_BASE +
BNXT_GRC_REG_CHIP_NUM);
}
- if (!BNXT_CHIP_P5(bp))
+ if (!BNXT_CHIP_P5_PLUS(bp))
return;
status_loc = BNXT_GRC_REG_STATUS_P5 |
/* VF-reps may need to be re-opened after the PF is re-opened */
if (BNXT_PF(bp))
bnxt_vf_reps_open(bp);
+ if (bp->ptp_cfg)
+ atomic_set(&bp->ptp_cfg->tx_avail, BNXT_MAX_TX_TS);
bnxt_ptp_init_rtc(bp, true);
bnxt_ptp_cfg_tstamp_filters(bp);
bnxt_cfg_usr_fltrs(bp);
bnxt_rtnl_unlock_sp(bp);
}
+static void bnxt_fw_fatal_close(struct bnxt *bp)
+{
+ bnxt_tx_disable(bp);
+ bnxt_disable_napi(bp);
+ bnxt_disable_int_sync(bp);
+ bnxt_free_irq(bp);
+ bnxt_clear_int_mode(bp);
+ pci_disable_device(bp->pdev);
+}
+
static void bnxt_fw_reset_close(struct bnxt *bp)
{
bnxt_ulp_stop(bp);
pci_read_config_word(bp->pdev, PCI_SUBSYSTEM_ID, &val);
if (val == 0xffff)
bp->fw_reset_min_dsecs = 0;
- bnxt_tx_disable(bp);
- bnxt_disable_napi(bp);
- bnxt_disable_int_sync(bp);
- bnxt_free_irq(bp);
- bnxt_clear_int_mode(bp);
- pci_disable_device(bp->pdev);
+ bnxt_fw_fatal_close(bp);
}
__bnxt_close_nic(bp, true, false);
bnxt_vf_reps_free(bp);
{
struct net_device *netdev = pci_get_drvdata(pdev);
struct bnxt *bp = netdev_priv(netdev);
+ bool abort = false;
netdev_info(netdev, "PCI I/O error detected\n");
bnxt_ulp_stop(bp);
- if (state == pci_channel_io_perm_failure) {
+ if (test_and_set_bit(BNXT_STATE_IN_FW_RESET, &bp->state)) {
+ netdev_err(bp->dev, "Firmware reset already in progress\n");
+ abort = true;
+ }
+
+ if (abort || state == pci_channel_io_perm_failure) {
rtnl_unlock();
return PCI_ERS_RESULT_DISCONNECT;
}
- if (state == pci_channel_io_frozen)
+ /* Link is not reliable anymore if state is pci_channel_io_frozen
+ * so we disable bus master to prevent any potential bad DMAs before
+ * freeing kernel memory.
+ */
+ if (state == pci_channel_io_frozen) {
set_bit(BNXT_STATE_PCI_CHANNEL_IO_FROZEN, &bp->state);
+ bnxt_fw_fatal_close(bp);
+ }
if (netif_running(netdev))
- bnxt_close(netdev);
+ __bnxt_close_nic(bp, true, true);
if (pci_is_enabled(pdev))
pci_disable_device(pdev);
}
reset_exit:
+ clear_bit(BNXT_STATE_IN_FW_RESET, &bp->state);
bnxt_clear_reservations(bp, true);
rtnl_unlock();
if (err)
return;
+ if (edev->ulp_tbl->msix_requested)
+ bnxt_fill_msix_vecs(bp, edev->msix_entries);
+
if (aux_priv) {
struct auxiliary_device *adev;
if (!edev)
goto aux_dev_uninit;
+ aux_priv->edev = edev;
+
ulp = kzalloc(sizeof(*ulp), GFP_KERNEL);
if (!ulp)
goto aux_dev_uninit;
edev->ulp_tbl = ulp;
- aux_priv->edev = edev;
bp->edev = edev;
bnxt_set_edev_info(edev, bp);
}
/* Returns a reusable dma control register value */
-static u32 bcmgenet_dma_disable(struct bcmgenet_priv *priv)
+static u32 bcmgenet_dma_disable(struct bcmgenet_priv *priv, bool flush_rx)
{
unsigned int i;
u32 reg;
udelay(10);
bcmgenet_umac_writel(priv, 0, UMAC_TX_FLUSH);
+ if (flush_rx) {
+ reg = bcmgenet_rbuf_ctrl_get(priv);
+ bcmgenet_rbuf_ctrl_set(priv, reg | BIT(0));
+ udelay(10);
+ bcmgenet_rbuf_ctrl_set(priv, reg);
+ udelay(10);
+ }
+
return dma_ctrl;
}
bcmgenet_set_hw_addr(priv, dev->dev_addr);
- /* Disable RX/TX DMA and flush TX queues */
- dma_ctrl = bcmgenet_dma_disable(priv);
+ /* Disable RX/TX DMA and flush TX and RX queues */
+ dma_ctrl = bcmgenet_dma_disable(priv, true);
/* Reinitialize TDMA and RDMA and SW housekeeping */
ret = bcmgenet_init_dma(priv);
bcmgenet_hfb_create_rxnfc_filter(priv, rule);
/* Disable RX/TX DMA and flush TX queues */
- dma_ctrl = bcmgenet_dma_disable(priv);
+ dma_ctrl = bcmgenet_dma_disable(priv, false);
/* Reinitialize TDMA and RDMA and SW housekeeping */
ret = bcmgenet_init_dma(priv);
fep->link = 0;
fep->full_duplex = 0;
- phy_dev->mac_managed_pm = true;
-
phy_attached_info(phy_dev);
return 0;
struct net_device *ndev = platform_get_drvdata(pdev);
struct fec_enet_private *fep = netdev_priv(ndev);
bool suppress_preamble = false;
+ struct phy_device *phydev;
struct device_node *node;
int err = -ENXIO;
u32 mii_speed, holdtime;
u32 bus_freq;
+ int addr;
/*
* The i.MX28 dual fec interfaces are not equal.
goto err_out_free_mdiobus;
of_node_put(node);
+ /* find all the PHY devices on the bus and set mac_managed_pm to true */
+ for (addr = 0; addr < PHY_MAX_ADDR; addr++) {
+ phydev = mdiobus_get_phy(fep->mii_bus, addr);
+ if (phydev)
+ phydev->mac_managed_pm = true;
+ }
+
mii_cnt++;
/* save fec0 mii_bus */
hclge_comm_cmd_setup_basic_desc(&desc, HCLGE_OPC_QUERY_TX_STATS,
true);
- desc.data[0] = cpu_to_le32(tqp->index & 0x1ff);
+ desc.data[0] = cpu_to_le32(tqp->index);
ret = hclge_comm_cmd_send(hw, &desc, 1);
if (ret) {
dev_err(&hw->cmq.csq.pdev->dev,
#define HNS3_NIC_LB_TEST_NO_MEM_ERR 1
#define HNS3_NIC_LB_TEST_TX_CNT_ERR 2
#define HNS3_NIC_LB_TEST_RX_CNT_ERR 3
+#define HNS3_NIC_LB_TEST_UNEXECUTED 4
+
+static int hns3_get_sset_count(struct net_device *netdev, int stringset);
static int hns3_lp_setup(struct net_device *ndev, enum hnae3_loop loop, bool en)
{
static void hns3_self_test(struct net_device *ndev,
struct ethtool_test *eth_test, u64 *data)
{
+ int cnt = hns3_get_sset_count(ndev, ETH_SS_TEST);
struct hns3_nic_priv *priv = netdev_priv(ndev);
struct hnae3_handle *h = priv->ae_handle;
int st_param[HNAE3_LOOP_NONE][2];
bool if_running = netif_running(ndev);
+ int i;
+
+ /* initialize the loopback test result, avoid marking an unexcuted
+ * loopback test as PASS.
+ */
+ for (i = 0; i < cnt; i++)
+ data[i] = HNS3_NIC_LB_TEST_UNEXECUTED;
if (hns3_nic_resetting(ndev)) {
netdev_err(ndev, "dev resetting!");
- return;
+ goto failure;
}
if (!(eth_test->flags & ETH_TEST_FL_OFFLINE))
- return;
+ goto failure;
if (netif_msg_ifdown(h))
netdev_info(ndev, "self test start\n");
if (netif_msg_ifdown(h))
netdev_info(ndev, "self test end\n");
+ return;
+
+failure:
+ eth_test->flags |= ETH_TEST_FL_FAILED;
}
static void hns3_update_limit_promisc_mode(struct net_device *netdev,
if (ret)
goto err_pci_uninit;
+ devl_lock(hdev->devlink);
+
/* Firmware command queue initialize */
ret = hclge_comm_cmd_queue_init(hdev->pdev, &hdev->hw.hw);
if (ret)
hclge_task_schedule(hdev, round_jiffies_relative(HZ));
+ devl_unlock(hdev->devlink);
return 0;
err_mdiobus_unreg:
err_cmd_uninit:
hclge_comm_cmd_uninit(hdev->ae_dev, &hdev->hw.hw);
err_devlink_uninit:
+ devl_unlock(hdev->devlink);
hclge_devlink_uninit(hdev);
err_pci_uninit:
pcim_iounmap(pdev, hdev->hw.hw.io_base);
u32 id;
u32 reset_delay_us; /* in usec */
u32 revision;
+ u32 retry_count;
enum e1000_media_type media_type;
bool polarity_correction;
bool speed_downgraded;
bool autoneg_wait_to_complete;
+ bool retry_enabled;
};
struct e1000_nvm_info {
if (hw->mac.type >= e1000_pch_lpt) {
/* Only unforce SMBus if ME is not active */
if (!(er32(FWSM) & E1000_ICH_FWSM_FW_VALID)) {
+ /* Switching PHY interface always returns MDI error
+ * so disable retry mechanism to avoid wasting time
+ */
+ e1000e_disable_phy_retry(hw);
+
/* Unforce SMBus mode in PHY */
e1e_rphy_locked(hw, CV_SMB_CTRL, &phy_reg);
phy_reg &= ~CV_SMB_CTRL_FORCE_SMBUS;
e1e_wphy_locked(hw, CV_SMB_CTRL, phy_reg);
+ e1000e_enable_phy_retry(hw);
+
/* Unforce SMBus mode in MAC */
mac_reg = er32(CTRL_EXT);
mac_reg &= ~E1000_CTRL_EXT_FORCE_SMBUS;
goto out;
}
+ /* There is no guarantee that the PHY is accessible at this time
+ * so disable retry mechanism to avoid wasting time
+ */
+ e1000e_disable_phy_retry(hw);
+
/* The MAC-PHY interconnect may be in SMBus mode. If the PHY is
* inaccessible and resetting the PHY is not blocked, toggle the
* LANPHYPC Value bit to force the interconnect to PCIe mode.
break;
}
+ e1000e_enable_phy_retry(hw);
+
hw->phy.ops.release(hw);
if (!ret_val) {
phy->id = e1000_phy_unknown;
+ if (hw->mac.type == e1000_pch_mtp) {
+ phy->retry_count = 2;
+ e1000e_enable_phy_retry(hw);
+ }
+
ret_val = e1000_init_phy_workarounds_pchlan(hw);
if (ret_val)
return ret_val;
if (ret_val)
goto out;
- /* Force SMBus mode in PHY */
- ret_val = e1000_read_phy_reg_hv_locked(hw, CV_SMB_CTRL, &phy_reg);
- if (ret_val)
- goto release;
- phy_reg |= CV_SMB_CTRL_FORCE_SMBUS;
- e1000_write_phy_reg_hv_locked(hw, CV_SMB_CTRL, phy_reg);
-
- /* Force SMBus mode in MAC */
- mac_reg = er32(CTRL_EXT);
- mac_reg |= E1000_CTRL_EXT_FORCE_SMBUS;
- ew32(CTRL_EXT, mac_reg);
-
/* Si workaround for ULP entry flow on i127/rev6 h/w. Enable
* LPLU and disable Gig speed when entering ULP
*/
/* Toggle LANPHYPC Value bit */
e1000_toggle_lanphypc_pch_lpt(hw);
+ /* Switching PHY interface always returns MDI error
+ * so disable retry mechanism to avoid wasting time
+ */
+ e1000e_disable_phy_retry(hw);
+
/* Unforce SMBus mode in PHY */
ret_val = e1000_read_phy_reg_hv_locked(hw, CV_SMB_CTRL, &phy_reg);
if (ret_val) {
phy_reg &= ~CV_SMB_CTRL_FORCE_SMBUS;
e1000_write_phy_reg_hv_locked(hw, CV_SMB_CTRL, phy_reg);
+ e1000e_enable_phy_retry(hw);
+
/* Unforce SMBus mode in MAC */
mac_reg = er32(CTRL_EXT);
mac_reg &= ~E1000_CTRL_EXT_FORCE_SMBUS;
struct e1000_hw *hw = &adapter->hw;
u32 ctrl, ctrl_ext, rctl, status, wufc;
int retval = 0;
+ u16 smb_ctrl;
/* Runtime suspend should only enable wakeup for link changes */
if (runtime)
if (retval)
return retval;
}
+
+ /* Force SMBUS to allow WOL */
+ /* Switching PHY interface always returns MDI error
+ * so disable retry mechanism to avoid wasting time
+ */
+ e1000e_disable_phy_retry(hw);
+
+ e1e_rphy(hw, CV_SMB_CTRL, &smb_ctrl);
+ smb_ctrl |= CV_SMB_CTRL_FORCE_SMBUS;
+ e1e_wphy(hw, CV_SMB_CTRL, smb_ctrl);
+
+ e1000e_enable_phy_retry(hw);
+
+ /* Force SMBus mode in MAC */
+ ctrl_ext = er32(CTRL_EXT);
+ ctrl_ext |= E1000_CTRL_EXT_FORCE_SMBUS;
+ ew32(CTRL_EXT, ctrl_ext);
}
/* Ensure that the appropriate bits are set in LPI_CTRL
return e1e_wphy(hw, M88E1000_PHY_GEN_CONTROL, 0);
}
+void e1000e_disable_phy_retry(struct e1000_hw *hw)
+{
+ hw->phy.retry_enabled = false;
+}
+
+void e1000e_enable_phy_retry(struct e1000_hw *hw)
+{
+ hw->phy.retry_enabled = true;
+}
+
/**
* e1000e_read_phy_reg_mdic - Read MDI control register
* @hw: pointer to the HW structure
**/
s32 e1000e_read_phy_reg_mdic(struct e1000_hw *hw, u32 offset, u16 *data)
{
+ u32 i, mdic = 0, retry_counter, retry_max;
struct e1000_phy_info *phy = &hw->phy;
- u32 i, mdic = 0;
+ bool success;
if (offset > MAX_PHY_REG_ADDRESS) {
e_dbg("PHY Address %d is out of range\n", offset);
return -E1000_ERR_PARAM;
}
+ retry_max = phy->retry_enabled ? phy->retry_count : 0;
+
/* Set up Op-code, Phy Address, and register offset in the MDI
* Control register. The MAC will take care of interfacing with the
* PHY to retrieve the desired data.
*/
- mdic = ((offset << E1000_MDIC_REG_SHIFT) |
- (phy->addr << E1000_MDIC_PHY_SHIFT) |
- (E1000_MDIC_OP_READ));
+ for (retry_counter = 0; retry_counter <= retry_max; retry_counter++) {
+ success = true;
- ew32(MDIC, mdic);
+ mdic = ((offset << E1000_MDIC_REG_SHIFT) |
+ (phy->addr << E1000_MDIC_PHY_SHIFT) |
+ (E1000_MDIC_OP_READ));
- /* Poll the ready bit to see if the MDI read completed
- * Increasing the time out as testing showed failures with
- * the lower time out
- */
- for (i = 0; i < (E1000_GEN_POLL_TIMEOUT * 3); i++) {
- udelay(50);
- mdic = er32(MDIC);
- if (mdic & E1000_MDIC_READY)
- break;
- }
- if (!(mdic & E1000_MDIC_READY)) {
- e_dbg("MDI Read PHY Reg Address %d did not complete\n", offset);
- return -E1000_ERR_PHY;
- }
- if (mdic & E1000_MDIC_ERROR) {
- e_dbg("MDI Read PHY Reg Address %d Error\n", offset);
- return -E1000_ERR_PHY;
- }
- if (FIELD_GET(E1000_MDIC_REG_MASK, mdic) != offset) {
- e_dbg("MDI Read offset error - requested %d, returned %d\n",
- offset, FIELD_GET(E1000_MDIC_REG_MASK, mdic));
- return -E1000_ERR_PHY;
+ ew32(MDIC, mdic);
+
+ /* Poll the ready bit to see if the MDI read completed
+ * Increasing the time out as testing showed failures with
+ * the lower time out
+ */
+ for (i = 0; i < (E1000_GEN_POLL_TIMEOUT * 3); i++) {
+ usleep_range(50, 60);
+ mdic = er32(MDIC);
+ if (mdic & E1000_MDIC_READY)
+ break;
+ }
+ if (!(mdic & E1000_MDIC_READY)) {
+ e_dbg("MDI Read PHY Reg Address %d did not complete\n",
+ offset);
+ success = false;
+ }
+ if (mdic & E1000_MDIC_ERROR) {
+ e_dbg("MDI Read PHY Reg Address %d Error\n", offset);
+ success = false;
+ }
+ if (FIELD_GET(E1000_MDIC_REG_MASK, mdic) != offset) {
+ e_dbg("MDI Read offset error - requested %d, returned %d\n",
+ offset, FIELD_GET(E1000_MDIC_REG_MASK, mdic));
+ success = false;
+ }
+
+ /* Allow some time after each MDIC transaction to avoid
+ * reading duplicate data in the next MDIC transaction.
+ */
+ if (hw->mac.type == e1000_pch2lan)
+ usleep_range(100, 150);
+
+ if (success) {
+ *data = (u16)mdic;
+ return 0;
+ }
+
+ if (retry_counter != retry_max) {
+ e_dbg("Perform retry on PHY transaction...\n");
+ mdelay(10);
+ }
}
- *data = (u16)mdic;
- /* Allow some time after each MDIC transaction to avoid
- * reading duplicate data in the next MDIC transaction.
- */
- if (hw->mac.type == e1000_pch2lan)
- udelay(100);
- return 0;
+ return -E1000_ERR_PHY;
}
/**
**/
s32 e1000e_write_phy_reg_mdic(struct e1000_hw *hw, u32 offset, u16 data)
{
+ u32 i, mdic = 0, retry_counter, retry_max;
struct e1000_phy_info *phy = &hw->phy;
- u32 i, mdic = 0;
+ bool success;
if (offset > MAX_PHY_REG_ADDRESS) {
e_dbg("PHY Address %d is out of range\n", offset);
return -E1000_ERR_PARAM;
}
+ retry_max = phy->retry_enabled ? phy->retry_count : 0;
+
/* Set up Op-code, Phy Address, and register offset in the MDI
* Control register. The MAC will take care of interfacing with the
* PHY to retrieve the desired data.
*/
- mdic = (((u32)data) |
- (offset << E1000_MDIC_REG_SHIFT) |
- (phy->addr << E1000_MDIC_PHY_SHIFT) |
- (E1000_MDIC_OP_WRITE));
+ for (retry_counter = 0; retry_counter <= retry_max; retry_counter++) {
+ success = true;
- ew32(MDIC, mdic);
+ mdic = (((u32)data) |
+ (offset << E1000_MDIC_REG_SHIFT) |
+ (phy->addr << E1000_MDIC_PHY_SHIFT) |
+ (E1000_MDIC_OP_WRITE));
- /* Poll the ready bit to see if the MDI read completed
- * Increasing the time out as testing showed failures with
- * the lower time out
- */
- for (i = 0; i < (E1000_GEN_POLL_TIMEOUT * 3); i++) {
- udelay(50);
- mdic = er32(MDIC);
- if (mdic & E1000_MDIC_READY)
- break;
- }
- if (!(mdic & E1000_MDIC_READY)) {
- e_dbg("MDI Write PHY Reg Address %d did not complete\n", offset);
- return -E1000_ERR_PHY;
- }
- if (mdic & E1000_MDIC_ERROR) {
- e_dbg("MDI Write PHY Red Address %d Error\n", offset);
- return -E1000_ERR_PHY;
- }
- if (FIELD_GET(E1000_MDIC_REG_MASK, mdic) != offset) {
- e_dbg("MDI Write offset error - requested %d, returned %d\n",
- offset, FIELD_GET(E1000_MDIC_REG_MASK, mdic));
- return -E1000_ERR_PHY;
- }
+ ew32(MDIC, mdic);
- /* Allow some time after each MDIC transaction to avoid
- * reading duplicate data in the next MDIC transaction.
- */
- if (hw->mac.type == e1000_pch2lan)
- udelay(100);
+ /* Poll the ready bit to see if the MDI read completed
+ * Increasing the time out as testing showed failures with
+ * the lower time out
+ */
+ for (i = 0; i < (E1000_GEN_POLL_TIMEOUT * 3); i++) {
+ usleep_range(50, 60);
+ mdic = er32(MDIC);
+ if (mdic & E1000_MDIC_READY)
+ break;
+ }
+ if (!(mdic & E1000_MDIC_READY)) {
+ e_dbg("MDI Write PHY Reg Address %d did not complete\n",
+ offset);
+ success = false;
+ }
+ if (mdic & E1000_MDIC_ERROR) {
+ e_dbg("MDI Write PHY Reg Address %d Error\n", offset);
+ success = false;
+ }
+ if (FIELD_GET(E1000_MDIC_REG_MASK, mdic) != offset) {
+ e_dbg("MDI Write offset error - requested %d, returned %d\n",
+ offset, FIELD_GET(E1000_MDIC_REG_MASK, mdic));
+ success = false;
+ }
- return 0;
+ /* Allow some time after each MDIC transaction to avoid
+ * reading duplicate data in the next MDIC transaction.
+ */
+ if (hw->mac.type == e1000_pch2lan)
+ usleep_range(100, 150);
+
+ if (success)
+ return 0;
+
+ if (retry_counter != retry_max) {
+ e_dbg("Perform retry on PHY transaction...\n");
+ mdelay(10);
+ }
+ }
+
+ return -E1000_ERR_PHY;
}
/**
s32 e1000e_write_phy_reg_bm2(struct e1000_hw *hw, u32 offset, u16 data);
void e1000_power_up_phy_copper(struct e1000_hw *hw);
void e1000_power_down_phy_copper(struct e1000_hw *hw);
+void e1000e_disable_phy_retry(struct e1000_hw *hw);
+void e1000e_enable_phy_retry(struct e1000_hw *hw);
s32 e1000e_read_phy_reg_mdic(struct e1000_hw *hw, u32 offset, u16 *data);
s32 e1000e_write_phy_reg_mdic(struct e1000_hw *hw, u32 offset, u16 data);
s32 e1000_read_phy_reg_hv(struct e1000_hw *hw, u32 offset, u16 *data);
struct rcu_head rcu; /* to avoid race with update stats on free */
char name[I40E_INT_NAME_STR_LEN];
bool arm_wb_state;
+ bool in_busy_poll;
int irq_num; /* IRQ assigned to this q_vector */
} ____cacheline_internodealigned_in_smp;
int bkt;
int cnt = 0;
- hash_for_each_safe(vsi->mac_filter_hash, bkt, h, f, hlist)
- ++cnt;
+ hash_for_each_safe(vsi->mac_filter_hash, bkt, h, f, hlist) {
+ if (f->state == I40E_FILTER_NEW ||
+ f->state == I40E_FILTER_ACTIVE)
+ ++cnt;
+ }
return cnt;
}
q_vector->tx.target_itr >> 1);
q_vector->tx.current_itr = q_vector->tx.target_itr;
+ /* Set ITR for software interrupts triggered after exiting
+ * busy-loop polling.
+ */
+ wr32(hw, I40E_PFINT_ITRN(I40E_SW_ITR, vector - 1),
+ I40E_ITR_20K);
+
wr32(hw, I40E_PFINT_RATEN(vector - 1),
i40e_intrl_usec_to_reg(vsi->int_rate_limit));
val = FIELD_GET(I40E_PRTGL_SAH_MFS_MASK,
rd32(&pf->hw, I40E_PRTGL_SAH));
if (val < MAX_FRAME_SIZE_DEFAULT)
- dev_warn(&pdev->dev, "MFS for port %x has been set below the default: %x\n",
- pf->hw.port, val);
+ dev_warn(&pdev->dev, "MFS for port %x (%d) has been set below the default (%d)\n",
+ pf->hw.port, val, MAX_FRAME_SIZE_DEFAULT);
/* Add a filter to drop all Flow control frames from any VSI from being
* transmitted. By doing so we stop a malicious VF from sending out
* since we need to be able to guarantee forward progress even under
* memory pressure.
*/
- i40e_wq = alloc_workqueue("%s", WQ_MEM_RECLAIM, 0, i40e_driver_name);
+ i40e_wq = alloc_workqueue("%s", 0, 0, i40e_driver_name);
if (!i40e_wq) {
pr_err("%s: Failed to create workqueue\n", i40e_driver_name);
return -ENOMEM;
#define I40E_PFINT_DYN_CTLN_ITR_INDX_SHIFT 3
#define I40E_PFINT_DYN_CTLN_ITR_INDX_MASK I40E_MASK(0x3, I40E_PFINT_DYN_CTLN_ITR_INDX_SHIFT)
#define I40E_PFINT_DYN_CTLN_INTERVAL_SHIFT 5
+#define I40E_PFINT_DYN_CTLN_INTERVAL_MASK I40E_MASK(0xFFF, I40E_PFINT_DYN_CTLN_INTERVAL_SHIFT)
#define I40E_PFINT_DYN_CTLN_SW_ITR_INDX_ENA_SHIFT 24
#define I40E_PFINT_DYN_CTLN_SW_ITR_INDX_ENA_MASK I40E_MASK(0x1, I40E_PFINT_DYN_CTLN_SW_ITR_INDX_ENA_SHIFT)
+#define I40E_PFINT_DYN_CTLN_SW_ITR_INDX_SHIFT 25
+#define I40E_PFINT_DYN_CTLN_SW_ITR_INDX_MASK I40E_MASK(0x3, I40E_PFINT_DYN_CTLN_SW_ITR_INDX_SHIFT)
#define I40E_PFINT_ICR0 0x00038780 /* Reset: CORER */
#define I40E_PFINT_ICR0_INTEVENT_SHIFT 0
#define I40E_PFINT_ICR0_INTEVENT_MASK I40E_MASK(0x1, I40E_PFINT_ICR0_INTEVENT_SHIFT)
return failure ? budget : (int)total_rx_packets;
}
-static inline u32 i40e_buildreg_itr(const int type, u16 itr)
+/**
+ * i40e_buildreg_itr - build a value for writing to I40E_PFINT_DYN_CTLN register
+ * @itr_idx: interrupt throttling index
+ * @interval: interrupt throttling interval value in usecs
+ * @force_swint: force software interrupt
+ *
+ * The function builds a value for I40E_PFINT_DYN_CTLN register that
+ * is used to update interrupt throttling interval for specified ITR index
+ * and optionally enforces a software interrupt. If the @itr_idx is equal
+ * to I40E_ITR_NONE then no interval change is applied and only @force_swint
+ * parameter is taken into account. If the interval change and enforced
+ * software interrupt are not requested then the built value just enables
+ * appropriate vector interrupt.
+ **/
+static u32 i40e_buildreg_itr(enum i40e_dyn_idx itr_idx, u16 interval,
+ bool force_swint)
{
u32 val;
* an event in the PBA anyway so we need to rely on the automask
* to hold pending events for us until the interrupt is re-enabled
*
- * The itr value is reported in microseconds, and the register
- * value is recorded in 2 microsecond units. For this reason we
- * only need to shift by the interval shift - 1 instead of the
- * full value.
+ * We have to shift the given value as it is reported in microseconds
+ * and the register value is recorded in 2 microsecond units.
*/
- itr &= I40E_ITR_MASK;
+ interval >>= 1;
+ /* 1. Enable vector interrupt
+ * 2. Update the interval for the specified ITR index
+ * (I40E_ITR_NONE in the register is used to indicate that
+ * no interval update is requested)
+ */
val = I40E_PFINT_DYN_CTLN_INTENA_MASK |
- (type << I40E_PFINT_DYN_CTLN_ITR_INDX_SHIFT) |
- (itr << (I40E_PFINT_DYN_CTLN_INTERVAL_SHIFT - 1));
+ FIELD_PREP(I40E_PFINT_DYN_CTLN_ITR_INDX_MASK, itr_idx) |
+ FIELD_PREP(I40E_PFINT_DYN_CTLN_INTERVAL_MASK, interval);
+
+ /* 3. Enforce software interrupt trigger if requested
+ * (These software interrupts rate is limited by ITR2 that is
+ * set to 20K interrupts per second)
+ */
+ if (force_swint)
+ val |= I40E_PFINT_DYN_CTLN_SWINT_TRIG_MASK |
+ I40E_PFINT_DYN_CTLN_SW_ITR_INDX_ENA_MASK |
+ FIELD_PREP(I40E_PFINT_DYN_CTLN_SW_ITR_INDX_MASK,
+ I40E_SW_ITR);
return val;
}
-/* a small macro to shorten up some long lines */
-#define INTREG I40E_PFINT_DYN_CTLN
-
/* The act of updating the ITR will cause it to immediately trigger. In order
* to prevent this from throwing off adaptive update statistics we defer the
* update so that it can only happen so often. So after either Tx or Rx are
static inline void i40e_update_enable_itr(struct i40e_vsi *vsi,
struct i40e_q_vector *q_vector)
{
+ enum i40e_dyn_idx itr_idx = I40E_ITR_NONE;
struct i40e_hw *hw = &vsi->back->hw;
- u32 intval;
+ u16 interval = 0;
+ u32 itr_val;
/* If we don't have MSIX, then we only need to re-enable icr0 */
if (!test_bit(I40E_FLAG_MSIX_ENA, vsi->back->flags)) {
*/
if (q_vector->rx.target_itr < q_vector->rx.current_itr) {
/* Rx ITR needs to be reduced, this is highest priority */
- intval = i40e_buildreg_itr(I40E_RX_ITR,
- q_vector->rx.target_itr);
+ itr_idx = I40E_RX_ITR;
+ interval = q_vector->rx.target_itr;
q_vector->rx.current_itr = q_vector->rx.target_itr;
q_vector->itr_countdown = ITR_COUNTDOWN_START;
} else if ((q_vector->tx.target_itr < q_vector->tx.current_itr) ||
/* Tx ITR needs to be reduced, this is second priority
* Tx ITR needs to be increased more than Rx, fourth priority
*/
- intval = i40e_buildreg_itr(I40E_TX_ITR,
- q_vector->tx.target_itr);
+ itr_idx = I40E_TX_ITR;
+ interval = q_vector->tx.target_itr;
q_vector->tx.current_itr = q_vector->tx.target_itr;
q_vector->itr_countdown = ITR_COUNTDOWN_START;
} else if (q_vector->rx.current_itr != q_vector->rx.target_itr) {
/* Rx ITR needs to be increased, third priority */
- intval = i40e_buildreg_itr(I40E_RX_ITR,
- q_vector->rx.target_itr);
+ itr_idx = I40E_RX_ITR;
+ interval = q_vector->rx.target_itr;
q_vector->rx.current_itr = q_vector->rx.target_itr;
q_vector->itr_countdown = ITR_COUNTDOWN_START;
} else {
/* No ITR update, lowest priority */
- intval = i40e_buildreg_itr(I40E_ITR_NONE, 0);
if (q_vector->itr_countdown)
q_vector->itr_countdown--;
}
- if (!test_bit(__I40E_VSI_DOWN, vsi->state))
- wr32(hw, INTREG(q_vector->reg_idx), intval);
+ /* Do not update interrupt control register if VSI is down */
+ if (test_bit(__I40E_VSI_DOWN, vsi->state))
+ return;
+
+ /* Update ITR interval if necessary and enforce software interrupt
+ * if we are exiting busy poll.
+ */
+ if (q_vector->in_busy_poll) {
+ itr_val = i40e_buildreg_itr(itr_idx, interval, true);
+ q_vector->in_busy_poll = false;
+ } else {
+ itr_val = i40e_buildreg_itr(itr_idx, interval, false);
+ }
+ wr32(hw, I40E_PFINT_DYN_CTLN(q_vector->reg_idx), itr_val);
}
/**
*/
if (likely(napi_complete_done(napi, work_done)))
i40e_update_enable_itr(vsi, q_vector);
+ else
+ q_vector->in_busy_poll = true;
return min(work_done, budget - 1);
}
/* these are indexes into ITRN registers */
#define I40E_RX_ITR I40E_IDX_ITR0
#define I40E_TX_ITR I40E_IDX_ITR1
+#define I40E_SW_ITR I40E_IDX_ITR2
/* Supported RSS offloads */
#define I40E_DEFAULT_RSS_HENA ( \
{
struct i40e_hw *hw = &pf->hw;
struct i40e_vf *vf;
- int i, v;
u32 reg;
+ int i;
/* If we don't have any VFs, then there is nothing to reset */
if (!pf->num_alloc_vfs)
return false;
/* Begin reset on all VFs at once */
- for (v = 0; v < pf->num_alloc_vfs; v++) {
- vf = &pf->vf[v];
+ for (vf = &pf->vf[0]; vf < &pf->vf[pf->num_alloc_vfs]; ++vf) {
/* If VF is being reset no need to trigger reset again */
if (!test_bit(I40E_VF_STATE_RESETTING, &vf->vf_states))
- i40e_trigger_vf_reset(&pf->vf[v], flr);
+ i40e_trigger_vf_reset(vf, flr);
}
/* HW requires some time to make sure it can flush the FIFO for a VF
* the VFs using a simple iterator that increments once that VF has
* finished resetting.
*/
- for (i = 0, v = 0; i < 10 && v < pf->num_alloc_vfs; i++) {
+ for (i = 0, vf = &pf->vf[0]; i < 10 && vf < &pf->vf[pf->num_alloc_vfs]; ++i) {
usleep_range(10000, 20000);
/* Check each VF in sequence, beginning with the VF to fail
* the previous check.
*/
- while (v < pf->num_alloc_vfs) {
- vf = &pf->vf[v];
+ while (vf < &pf->vf[pf->num_alloc_vfs]) {
if (!test_bit(I40E_VF_STATE_RESETTING, &vf->vf_states)) {
reg = rd32(hw, I40E_VPGEN_VFRSTAT(vf->vf_id));
if (!(reg & I40E_VPGEN_VFRSTAT_VFRD_MASK))
/* If the current VF has finished resetting, move on
* to the next VF in sequence.
*/
- v++;
+ ++vf;
}
}
/* Display a warning if at least one VF didn't manage to reset in
* time, but continue on with the operation.
*/
- if (v < pf->num_alloc_vfs)
+ if (vf < &pf->vf[pf->num_alloc_vfs])
dev_err(&pf->pdev->dev, "VF reset check timeout on VF %d\n",
- pf->vf[v].vf_id);
+ vf->vf_id);
usleep_range(10000, 20000);
/* Begin disabling all the rings associated with VFs, but do not wait
* between each VF.
*/
- for (v = 0; v < pf->num_alloc_vfs; v++) {
+ for (vf = &pf->vf[0]; vf < &pf->vf[pf->num_alloc_vfs]; ++vf) {
/* On initial reset, we don't have any queues to disable */
- if (pf->vf[v].lan_vsi_idx == 0)
+ if (vf->lan_vsi_idx == 0)
continue;
/* If VF is reset in another thread just continue */
if (test_bit(I40E_VF_STATE_RESETTING, &vf->vf_states))
continue;
- i40e_vsi_stop_rings_no_wait(pf->vsi[pf->vf[v].lan_vsi_idx]);
+ i40e_vsi_stop_rings_no_wait(pf->vsi[vf->lan_vsi_idx]);
}
/* Now that we've notified HW to disable all of the VF rings, wait
* until they finish.
*/
- for (v = 0; v < pf->num_alloc_vfs; v++) {
+ for (vf = &pf->vf[0]; vf < &pf->vf[pf->num_alloc_vfs]; ++vf) {
/* On initial reset, we don't have any queues to disable */
- if (pf->vf[v].lan_vsi_idx == 0)
+ if (vf->lan_vsi_idx == 0)
continue;
/* If VF is reset in another thread just continue */
if (test_bit(I40E_VF_STATE_RESETTING, &vf->vf_states))
continue;
- i40e_vsi_wait_queues_disabled(pf->vsi[pf->vf[v].lan_vsi_idx]);
+ i40e_vsi_wait_queues_disabled(pf->vsi[vf->lan_vsi_idx]);
}
/* Hw may need up to 50ms to finish disabling the RX queues. We
mdelay(50);
/* Finish the reset on each VF */
- for (v = 0; v < pf->num_alloc_vfs; v++) {
+ for (vf = &pf->vf[0]; vf < &pf->vf[pf->num_alloc_vfs]; ++vf) {
/* If VF is reset in another thread just continue */
if (test_bit(I40E_VF_STATE_RESETTING, &vf->vf_states))
continue;
- i40e_cleanup_reset_vf(&pf->vf[v]);
+ i40e_cleanup_reset_vf(vf);
}
i40e_flush(hw);
/* Allow to delete VF primary MAC only if it was not set
* administratively by PF or if VF is trusted.
*/
- if (ether_addr_equal(addr, vf->default_lan_addr.addr) &&
- i40e_can_vf_change_mac(vf))
- was_unimac_deleted = true;
- else
- continue;
+ if (ether_addr_equal(addr, vf->default_lan_addr.addr)) {
+ if (i40e_can_vf_change_mac(vf))
+ was_unimac_deleted = true;
+ else
+ continue;
+ }
if (i40e_del_mac_filter(vsi, al->list[i].addr)) {
ret = -EINVAL;
spin_unlock_bh(&adapter->cloud_filter_list_lock);
}
+/**
+ * iavf_is_tc_config_same - Compare the mqprio TC config with the
+ * TC config already configured on this adapter.
+ * @adapter: board private structure
+ * @mqprio_qopt: TC config received from kernel.
+ *
+ * This function compares the TC config received from the kernel
+ * with the config already configured on the adapter.
+ *
+ * Return: True if configuration is same, false otherwise.
+ **/
+static bool iavf_is_tc_config_same(struct iavf_adapter *adapter,
+ struct tc_mqprio_qopt *mqprio_qopt)
+{
+ struct virtchnl_channel_info *ch = &adapter->ch_config.ch_info[0];
+ int i;
+
+ if (adapter->num_tc != mqprio_qopt->num_tc)
+ return false;
+
+ for (i = 0; i < adapter->num_tc; i++) {
+ if (ch[i].count != mqprio_qopt->count[i] ||
+ ch[i].offset != mqprio_qopt->offset[i])
+ return false;
+ }
+ return true;
+}
+
/**
* __iavf_setup_tc - configure multiple traffic classes
* @netdev: network interface device structure
if (ret)
return ret;
/* Return if same TC config is requested */
- if (adapter->num_tc == num_tc)
+ if (iavf_is_tc_config_same(adapter, &mqprio_qopt->qopt))
return 0;
adapter->num_tc = num_tc;
struct ice_aqc_recipe_to_profile {
__le16 profile_id;
u8 rsvd[6];
- DECLARE_BITMAP(recipe_assoc, ICE_MAX_NUM_RECIPES);
+ __le64 recipe_assoc;
};
+static_assert(sizeof(struct ice_aqc_recipe_to_profile) == 16);
/* Add/Update/Remove/Get switch rules (indirect 0x02A0, 0x02A1, 0x02A2, 0x02A3)
*/
*/
int ice_init_hw(struct ice_hw *hw)
{
- struct ice_aqc_get_phy_caps_data *pcaps __free(kfree);
- void *mac_buf __free(kfree);
+ struct ice_aqc_get_phy_caps_data *pcaps __free(kfree) = NULL;
+ void *mac_buf __free(kfree) = NULL;
u16 mac_buf_len;
int status;
return status;
if (li->link_info & ICE_AQ_MEDIA_AVAILABLE) {
- struct ice_aqc_get_phy_caps_data *pcaps __free(kfree);
+ struct ice_aqc_get_phy_caps_data *pcaps __free(kfree) = NULL;
pcaps = kzalloc(sizeof(*pcaps), GFP_KERNEL);
if (!pcaps)
int
ice_set_fc(struct ice_port_info *pi, u8 *aq_failures, bool ena_auto_link_update)
{
- struct ice_aqc_get_phy_caps_data *pcaps __free(kfree);
+ struct ice_aqc_get_phy_caps_data *pcaps __free(kfree) = NULL;
struct ice_aqc_set_phy_cfg_data cfg = { 0 };
struct ice_hw *hw;
int status;
ice_cfg_phy_fec(struct ice_port_info *pi, struct ice_aqc_set_phy_cfg_data *cfg,
enum ice_fec_mode fec)
{
- struct ice_aqc_get_phy_caps_data *pcaps __free(kfree);
+ struct ice_aqc_get_phy_caps_data *pcaps __free(kfree) = NULL;
struct ice_hw *hw;
int status;
struct ice_netdev_priv *np = netdev_priv(netdev);
struct ice_vsi *orig_vsi = np->vsi, *test_vsi;
struct ice_pf *pf = orig_vsi->back;
+ u8 *tx_frame __free(kfree) = NULL;
u8 broadcast[ETH_ALEN], ret = 0;
int num_frames, valid_frames;
struct ice_tx_ring *tx_ring;
struct ice_rx_ring *rx_ring;
- u8 *tx_frame __free(kfree);
int i;
netdev_info(netdev, "loopback test\n");
/* associate recipes to profiles */
for (n = 0; n < ICE_PROFID_IPV6_GTPU_IPV6_TCP_INNER; n++) {
err = ice_aq_get_recipe_to_profile(&pf->hw, n,
- (u8 *)&recipe_bits, NULL);
+ &recipe_bits, NULL);
if (err)
continue;
recipe_bits |= BIT(lag->pf_recipe) |
BIT(lag->lport_recipe);
ice_aq_map_recipe_to_profile(&pf->hw, n,
- (u8 *)&recipe_bits, NULL);
+ recipe_bits, NULL);
}
}
{
struct ice_vsi_cfg_params params = {};
struct ice_coalesce_stored *coalesce;
- int prev_num_q_vectors = 0;
+ int prev_num_q_vectors;
struct ice_pf *pf;
int ret;
if (WARN_ON(vsi->type == ICE_VSI_VF && !vsi->vf))
return -EINVAL;
- coalesce = kcalloc(vsi->num_q_vectors,
- sizeof(struct ice_coalesce_stored), GFP_KERNEL);
- if (!coalesce)
- return -ENOMEM;
-
- prev_num_q_vectors = ice_vsi_rebuild_get_coalesce(vsi, coalesce);
-
ret = ice_vsi_realloc_stat_arrays(vsi);
if (ret)
goto err_vsi_cfg;
if (ret)
goto err_vsi_cfg;
+ coalesce = kcalloc(vsi->num_q_vectors,
+ sizeof(struct ice_coalesce_stored), GFP_KERNEL);
+ if (!coalesce)
+ return -ENOMEM;
+
+ prev_num_q_vectors = ice_vsi_rebuild_get_coalesce(vsi, coalesce);
+
ret = ice_vsi_cfg_tc_lan(pf, vsi);
if (ret) {
if (vsi_flags & ICE_VSI_FLAG_INIT) {
err_vsi_cfg_tc_lan:
ice_vsi_decfg(vsi);
-err_vsi_cfg:
kfree(coalesce);
+err_vsi_cfg:
return ret;
}
* ice_aq_map_recipe_to_profile - Map recipe to packet profile
* @hw: pointer to the HW struct
* @profile_id: package profile ID to associate the recipe with
- * @r_bitmap: Recipe bitmap filled in and need to be returned as response
+ * @r_assoc: Recipe bitmap filled in and need to be returned as response
* @cd: pointer to command details structure or NULL
* Recipe to profile association (0x0291)
*/
int
-ice_aq_map_recipe_to_profile(struct ice_hw *hw, u32 profile_id, u8 *r_bitmap,
+ice_aq_map_recipe_to_profile(struct ice_hw *hw, u32 profile_id, u64 r_assoc,
struct ice_sq_cd *cd)
{
struct ice_aqc_recipe_to_profile *cmd;
/* Set the recipe ID bit in the bitmask to let the device know which
* profile we are associating the recipe to
*/
- memcpy(cmd->recipe_assoc, r_bitmap, sizeof(cmd->recipe_assoc));
+ cmd->recipe_assoc = cpu_to_le64(r_assoc);
return ice_aq_send_cmd(hw, &desc, NULL, 0, cd);
}
* ice_aq_get_recipe_to_profile - Map recipe to packet profile
* @hw: pointer to the HW struct
* @profile_id: package profile ID to associate the recipe with
- * @r_bitmap: Recipe bitmap filled in and need to be returned as response
+ * @r_assoc: Recipe bitmap filled in and need to be returned as response
* @cd: pointer to command details structure or NULL
* Associate profile ID with given recipe (0x0293)
*/
int
-ice_aq_get_recipe_to_profile(struct ice_hw *hw, u32 profile_id, u8 *r_bitmap,
+ice_aq_get_recipe_to_profile(struct ice_hw *hw, u32 profile_id, u64 *r_assoc,
struct ice_sq_cd *cd)
{
struct ice_aqc_recipe_to_profile *cmd;
status = ice_aq_send_cmd(hw, &desc, NULL, 0, cd);
if (!status)
- memcpy(r_bitmap, cmd->recipe_assoc, sizeof(cmd->recipe_assoc));
+ *r_assoc = le64_to_cpu(cmd->recipe_assoc);
return status;
}
static void ice_get_recp_to_prof_map(struct ice_hw *hw)
{
DECLARE_BITMAP(r_bitmap, ICE_MAX_NUM_RECIPES);
+ u64 recp_assoc;
u16 i;
for (i = 0; i < hw->switch_info->max_used_prof_index + 1; i++) {
bitmap_zero(profile_to_recipe[i], ICE_MAX_NUM_RECIPES);
bitmap_zero(r_bitmap, ICE_MAX_NUM_RECIPES);
- if (ice_aq_get_recipe_to_profile(hw, i, (u8 *)r_bitmap, NULL))
+ if (ice_aq_get_recipe_to_profile(hw, i, &recp_assoc, NULL))
continue;
+ bitmap_from_arr64(r_bitmap, &recp_assoc, ICE_MAX_NUM_RECIPES);
bitmap_copy(profile_to_recipe[i], r_bitmap,
ICE_MAX_NUM_RECIPES);
for_each_set_bit(j, r_bitmap, ICE_MAX_NUM_RECIPES)
*/
list_for_each_entry(fvit, &rm->fv_list, list_entry) {
DECLARE_BITMAP(r_bitmap, ICE_MAX_NUM_RECIPES);
+ u64 recp_assoc;
u16 j;
status = ice_aq_get_recipe_to_profile(hw, fvit->profile_id,
- (u8 *)r_bitmap, NULL);
+ &recp_assoc, NULL);
if (status)
goto err_unroll;
+ bitmap_from_arr64(r_bitmap, &recp_assoc, ICE_MAX_NUM_RECIPES);
bitmap_or(r_bitmap, r_bitmap, rm->r_bitmap,
ICE_MAX_NUM_RECIPES);
status = ice_acquire_change_lock(hw, ICE_RES_WRITE);
if (status)
goto err_unroll;
+ bitmap_to_arr64(&recp_assoc, r_bitmap, ICE_MAX_NUM_RECIPES);
status = ice_aq_map_recipe_to_profile(hw, fvit->profile_id,
- (u8 *)r_bitmap,
- NULL);
+ recp_assoc, NULL);
ice_release_change_lock(hw);
if (status)
struct ice_aqc_recipe_data_elem *s_recipe_list,
u16 num_recipes, struct ice_sq_cd *cd);
int
-ice_aq_get_recipe_to_profile(struct ice_hw *hw, u32 profile_id, u8 *r_bitmap,
+ice_aq_get_recipe_to_profile(struct ice_hw *hw, u32 profile_id, u64 *r_assoc,
struct ice_sq_cd *cd);
int
-ice_aq_map_recipe_to_profile(struct ice_hw *hw, u32 profile_id, u8 *r_bitmap,
+ice_aq_map_recipe_to_profile(struct ice_hw *hw, u32 profile_id, u64 r_assoc,
struct ice_sq_cd *cd);
#endif /* _ICE_SWITCH_H_ */
* - ICE_TC_FLWR_FIELD_VLAN_TPID (present if specified)
* - Tunnel flag (present if tunnel)
*/
+ if (fltr->direction == ICE_ESWITCH_FLTR_EGRESS)
+ lkups_cnt++;
if (flags & ICE_TC_FLWR_FIELD_TENANT_ID)
lkups_cnt++;
/* Always add direction metadata */
ice_rule_add_direction_metadata(&list[ICE_TC_METADATA_LKUP_IDX]);
+ if (tc_fltr->direction == ICE_ESWITCH_FLTR_EGRESS) {
+ ice_rule_add_src_vsi_metadata(&list[i]);
+ i++;
+ }
+
rule_info->tun_type = ice_sw_type_from_tunnel(tc_fltr->tunnel_type);
if (tc_fltr->tunnel_type != TNL_LAST) {
i = ice_tc_fill_tunnel_outer(flags, tc_fltr, list, i);
int ret;
int i;
- if (!flags || (flags & ICE_TC_FLWR_FIELD_ENC_SRC_L4_PORT)) {
+ if (flags & ICE_TC_FLWR_FIELD_ENC_SRC_L4_PORT) {
NL_SET_ERR_MSG_MOD(fltr->extack, "Unsupported encap field(s)");
return -EOPNOTSUPP;
}
/* specify the cookie as filter_rule_id */
rule_info.fltr_rule_id = fltr->cookie;
+ rule_info.src_vsi = vsi->idx;
ret = ice_add_adv_rule(hw, list, lkups_cnt, &rule_info, &rule_added);
if (ret == -EEXIST) {
(BIT_ULL(FLOW_DISSECTOR_KEY_ENC_IPV4_ADDRS) |
BIT_ULL(FLOW_DISSECTOR_KEY_ENC_IPV6_ADDRS) |
BIT_ULL(FLOW_DISSECTOR_KEY_ENC_KEYID) |
- BIT_ULL(FLOW_DISSECTOR_KEY_ENC_PORTS))) {
+ BIT_ULL(FLOW_DISSECTOR_KEY_ENC_PORTS) |
+ BIT_ULL(FLOW_DISSECTOR_KEY_ENC_IP) |
+ BIT_ULL(FLOW_DISSECTOR_KEY_ENC_OPTS) |
+ BIT_ULL(FLOW_DISSECTOR_KEY_ENC_CONTROL))) {
NL_SET_ERR_MSG_MOD(fltr->extack, "Tunnel key used, but device isn't a tunnel");
return -EOPNOTSUPP;
} else {
return 0;
}
+ if (flags & ICE_VF_RESET_LOCK)
+ mutex_lock(&vf->cfg_lock);
+ else
+ lockdep_assert_held(&vf->cfg_lock);
+
lag = pf->lag;
mutex_lock(&pf->lag_mutex);
if (lag && lag->bonded && lag->primary) {
act_prt = ICE_LAG_INVALID_PORT;
}
- if (flags & ICE_VF_RESET_LOCK)
- mutex_lock(&vf->cfg_lock);
- else
- lockdep_assert_held(&vf->cfg_lock);
-
if (ice_is_vf_disabled(vf)) {
vsi = ice_get_vf_vsi(vf);
if (!vsi) {
ice_mbx_clear_malvf(&vf->mbx_info);
out_unlock:
- if (flags & ICE_VF_RESET_LOCK)
- mutex_unlock(&vf->cfg_lock);
-
if (lag && lag->bonded && lag->primary &&
act_prt != ICE_LAG_INVALID_PORT)
ice_lag_move_vf_nodes_cfg(lag, pri_prt, act_prt);
mutex_unlock(&pf->lag_mutex);
+ if (flags & ICE_VF_RESET_LOCK)
+ mutex_unlock(&vf->cfg_lock);
+
return err;
}
struct ice_vsi_vlan_ops *vlan_ops;
struct ice_pf *pf = vsi->back;
- if (ice_is_dvm_ena(&pf->hw)) {
- vlan_ops = &vsi->outer_vlan_ops;
-
- /* setup outer VLAN ops */
- vlan_ops->set_port_vlan = ice_vsi_set_outer_port_vlan;
- vlan_ops->clear_port_vlan = ice_vsi_clear_outer_port_vlan;
+ /* setup inner VLAN ops */
+ vlan_ops = &vsi->inner_vlan_ops;
- /* setup inner VLAN ops */
- vlan_ops = &vsi->inner_vlan_ops;
+ if (ice_is_dvm_ena(&pf->hw)) {
vlan_ops->add_vlan = noop_vlan_arg;
vlan_ops->del_vlan = noop_vlan_arg;
vlan_ops->ena_stripping = ice_vsi_ena_inner_stripping;
vlan_ops->dis_stripping = ice_vsi_dis_inner_stripping;
vlan_ops->ena_insertion = ice_vsi_ena_inner_insertion;
vlan_ops->dis_insertion = ice_vsi_dis_inner_insertion;
- } else {
- vlan_ops = &vsi->inner_vlan_ops;
+ /* setup outer VLAN ops */
+ vlan_ops = &vsi->outer_vlan_ops;
+ vlan_ops->set_port_vlan = ice_vsi_set_outer_port_vlan;
+ vlan_ops->clear_port_vlan = ice_vsi_clear_outer_port_vlan;
+ } else {
vlan_ops->set_port_vlan = ice_vsi_set_inner_port_vlan;
vlan_ops->clear_port_vlan = ice_vsi_clear_inner_port_vlan;
}
rx_ptype = le16_get_bits(rx_desc->ptype_err_fflags0,
VIRTCHNL2_RX_FLEX_DESC_ADV_PTYPE_M);
+ skb->protocol = eth_type_trans(skb, rxq->vport->netdev);
+
decoded = rxq->vport->rx_ptype_lkup[rx_ptype];
/* If we don't know the ptype we can't do anything else with it. Just
* pass it up the stack as-is.
/* process RSS/hash */
idpf_rx_hash(rxq, skb, rx_desc, &decoded);
- skb->protocol = eth_type_trans(skb, rxq->vport->netdev);
-
if (le16_get_bits(rx_desc->hdrlen_flags,
VIRTCHNL2_RX_FLEX_DESC_ADV_RSC_M))
return idpf_rx_rsc(rxq, skb, rx_desc, &decoded);
/* LEDs */
struct mutex led_mutex;
+ struct igc_led_classdev *leds;
};
void igc_up(struct igc_adapter *adapter);
void igc_ptp_tx_tstamp_event(struct igc_adapter *adapter);
int igc_led_setup(struct igc_adapter *adapter);
+void igc_led_free(struct igc_adapter *adapter);
#define igc_rx_pg_size(_ring) (PAGE_SIZE << igc_rx_pg_order(_ring))
pci_dev_id(adapter->pdev), index);
}
-static void igc_setup_ldev(struct igc_led_classdev *ldev,
- struct net_device *netdev, int index)
+static int igc_setup_ldev(struct igc_led_classdev *ldev,
+ struct net_device *netdev, int index)
{
struct igc_adapter *adapter = netdev_priv(netdev);
struct led_classdev *led_cdev = &ldev->led;
led_cdev->hw_control_get = igc_led_hw_control_get;
led_cdev->hw_control_get_device = igc_led_hw_control_get_device;
- devm_led_classdev_register(&netdev->dev, led_cdev);
+ return led_classdev_register(&netdev->dev, led_cdev);
}
int igc_led_setup(struct igc_adapter *adapter)
{
struct net_device *netdev = adapter->netdev;
- struct device *dev = &netdev->dev;
struct igc_led_classdev *leds;
- int i;
+ int i, err;
mutex_init(&adapter->led_mutex);
- leds = devm_kcalloc(dev, IGC_NUM_LEDS, sizeof(*leds), GFP_KERNEL);
+ leds = kcalloc(IGC_NUM_LEDS, sizeof(*leds), GFP_KERNEL);
if (!leds)
return -ENOMEM;
- for (i = 0; i < IGC_NUM_LEDS; i++)
- igc_setup_ldev(leds + i, netdev, i);
+ for (i = 0; i < IGC_NUM_LEDS; i++) {
+ err = igc_setup_ldev(leds + i, netdev, i);
+ if (err)
+ goto err;
+ }
+
+ adapter->leds = leds;
return 0;
+
+err:
+ for (i--; i >= 0; i--)
+ led_classdev_unregister(&((leds + i)->led));
+
+ kfree(leds);
+ return err;
+}
+
+void igc_led_free(struct igc_adapter *adapter)
+{
+ struct igc_led_classdev *leds = adapter->leds;
+ int i;
+
+ for (i = 0; i < IGC_NUM_LEDS; i++)
+ led_classdev_unregister(&((leds + i)->led));
+
+ kfree(leds);
}
if (unlikely(test_bit(IGC_RING_FLAG_TX_HWTSTAMP, &tx_ring->flags) &&
skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP)) {
- /* FIXME: add support for retrieving timestamps from
- * the other timer registers before skipping the
- * timestamping request.
- */
unsigned long flags;
u32 tstamp_flags;
cancel_work_sync(&adapter->watchdog_task);
hrtimer_cancel(&adapter->hrtimer);
+ if (IS_ENABLED(CONFIG_IGC_LEDS))
+ igc_led_free(adapter);
+
/* Release control of h/w to f/w. If f/w is AMT enabled, this
* would have already happened in close and is redundant.
*/
goto err_out;
}
- xs = kzalloc(sizeof(*xs), GFP_KERNEL);
+ algo = xfrm_aead_get_byname(aes_gcm_name, IXGBE_IPSEC_AUTH_BITS, 1);
+ if (unlikely(!algo)) {
+ err = -ENOENT;
+ goto err_out;
+ }
+
+ xs = kzalloc(sizeof(*xs), GFP_ATOMIC);
if (unlikely(!xs)) {
err = -ENOMEM;
goto err_out;
memcpy(&xs->id.daddr.a4, sam->addr, sizeof(xs->id.daddr.a4));
xs->xso.dev = adapter->netdev;
- algo = xfrm_aead_get_byname(aes_gcm_name, IXGBE_IPSEC_AUTH_BITS, 1);
- if (unlikely(!algo)) {
- err = -ENOENT;
- goto err_xs;
- }
-
aead_len = sizeof(*xs->aead) + IXGBE_IPSEC_KEY_BITS / 8;
- xs->aead = kzalloc(aead_len, GFP_KERNEL);
+ xs->aead = kzalloc(aead_len, GFP_ATOMIC);
if (unlikely(!xs->aead)) {
err = -ENOMEM;
goto err_xs;
if (!is_lmac_valid(cgx, lmac_id))
return -ENODEV;
+ cfg = cgx_read(cgx, lmac_id, CGXX_GMP_GMI_RXX_FRM_CTL);
+ cfg &= ~CGX_GMP_GMI_RXX_FRM_CTL_CTL_BCK;
+ cfg |= rx_pause ? CGX_GMP_GMI_RXX_FRM_CTL_CTL_BCK : 0x0;
+ cgx_write(cgx, lmac_id, CGXX_GMP_GMI_RXX_FRM_CTL, cfg);
+
cfg = cgx_read(cgx, lmac_id, CGXX_SMUX_RX_FRM_CTL);
cfg &= ~CGX_SMUX_RX_FRM_CTL_CTL_BCK;
cfg |= rx_pause ? CGX_SMUX_RX_FRM_CTL_CTL_BCK : 0x0;
continue;
lmac_bmap = cgx_get_lmac_bmap(rvu_cgx_pdata(cgx, rvu));
for_each_set_bit(iter, &lmac_bmap, rvu->hw->lmac_per_cgx) {
+ if (iter >= MAX_LMAC_COUNT)
+ continue;
lmac = cgx_get_lmacid(rvu_cgx_pdata(cgx, rvu),
iter);
rvu->pf2cgxlmac_map[pf] = cgxlmac_id_to_bmap(cgx, lmac);
*/
rvu_write64(rvu, blkaddr, NIX_AF_CFG,
rvu_read64(rvu, blkaddr, NIX_AF_CFG) | 0x40ULL);
+ }
- /* Set chan/link to backpressure TL3 instead of TL2 */
- rvu_write64(rvu, blkaddr, NIX_AF_PSE_CHANNEL_LEVEL, 0x01);
+ /* Set chan/link to backpressure TL3 instead of TL2 */
+ rvu_write64(rvu, blkaddr, NIX_AF_PSE_CHANNEL_LEVEL, 0x01);
- /* Disable SQ manager's sticky mode operation (set TM6 = 0)
- * This sticky mode is known to cause SQ stalls when multiple
- * SQs are mapped to same SMQ and transmitting pkts at a time.
- */
- cfg = rvu_read64(rvu, blkaddr, NIX_AF_SQM_DBG_CTL_STATUS);
- cfg &= ~BIT_ULL(15);
- rvu_write64(rvu, blkaddr, NIX_AF_SQM_DBG_CTL_STATUS, cfg);
- }
+ /* Disable SQ manager's sticky mode operation (set TM6 = 0)
+ * This sticky mode is known to cause SQ stalls when multiple
+ * SQs are mapped to same SMQ and transmitting pkts at a time.
+ */
+ cfg = rvu_read64(rvu, blkaddr, NIX_AF_SQM_DBG_CTL_STATUS);
+ cfg &= ~BIT_ULL(15);
+ rvu_write64(rvu, blkaddr, NIX_AF_SQM_DBG_CTL_STATUS, cfg);
ltdefs = rvu->kpu.lt_def;
/* Calibrate X2P bus to check if CGX/LBK links are fine */
struct npc_coalesced_kpu_prfl *img_data = NULL;
int i = 0, rc = -EINVAL;
void __iomem *kpu_prfl_addr;
- u16 offset;
+ u32 offset;
img_data = (struct npc_coalesced_kpu_prfl __force *)rvu->kpu_prfl_addr;
if (le64_to_cpu(img_data->signature) == KPU_SIGN &&
kfree(pkind->rsrc.bmap);
npc_mcam_rsrcs_deinit(rvu);
- kfree(mcam->counters.bmap);
if (rvu->kpu_prfl_addr)
iounmap(rvu->kpu_prfl_addr);
else
* mcam entries are enabled to receive the packets. Hence disable the
* packet I/O.
*/
- if (err == EIO)
+ if (err == -EIO)
goto err_disable_rxtx;
else if (err)
goto err_tx_stop_queues;
if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_CONTROL)) {
struct flow_match_control match;
+ u32 val;
flow_rule_match_control(rule, &match);
if (match.mask->flags & FLOW_DIS_FIRST_FRAG) {
}
if (match.mask->flags & FLOW_DIS_IS_FRAGMENT) {
+ val = match.key->flags & FLOW_DIS_IS_FRAGMENT;
if (ntohs(flow_spec->etype) == ETH_P_IP) {
- flow_spec->ip_flag = IPV4_FLAG_MORE;
+ flow_spec->ip_flag = val ? IPV4_FLAG_MORE : 0;
flow_mask->ip_flag = IPV4_FLAG_MORE;
req->features |= BIT_ULL(NPC_IPFRAG_IPV4);
} else if (ntohs(flow_spec->etype) == ETH_P_IPV6) {
- flow_spec->next_header = IPPROTO_FRAGMENT;
+ flow_spec->next_header = val ?
+ IPPROTO_FRAGMENT : 0;
flow_mask->next_header = 0xff;
req->features |= BIT_ULL(NPC_IPFRAG_IPV6);
} else {
otx2_qos_read_txschq_cfg_tl(node, cfg);
cnt = cfg->static_node_pos[node->level];
cfg->schq_contig_list[node->level][cnt] = node->schq;
+ cfg->schq_index_used[node->level][cnt] = true;
cfg->schq_contig[node->level]++;
cfg->static_node_pos[node->level]++;
otx2_qos_read_txschq_cfg_schq(node, cfg);
static void
mtk_wed_stop(struct mtk_wed_device *dev)
{
+ mtk_wed_dma_disable(dev);
mtk_wed_set_ext_int(dev, false);
wed_w32(dev, MTK_WED_WPDMA_INT_TRIGGER, 0);
wed_w32(dev, MTK_WED_WDMA_INT_TRIGGER, 0);
wdma_w32(dev, MTK_WDMA_INT_MASK, 0);
wdma_w32(dev, MTK_WDMA_INT_GRP2, 0);
- wed_w32(dev, MTK_WED_WPDMA_INT_MASK, 0);
if (!mtk_wed_get_rx_capa(dev))
return;
mtk_wed_deinit(struct mtk_wed_device *dev)
{
mtk_wed_stop(dev);
- mtk_wed_dma_disable(dev);
wed_clr(dev, MTK_WED_CTRL,
MTK_WED_CTRL_WDMA_INT_AGENT_EN |
static void
mtk_wed_irq_set_mask(struct mtk_wed_device *dev, u32 mask)
{
- if (!dev->running)
- return;
-
mtk_wed_set_ext_int(dev, !!mask);
wed_w32(dev, MTK_WED_INT_MASK, mask);
}
}
static inline u8
+mlx5e_ptp_metadata_fifo_peek(struct mlx5e_ptp_metadata_fifo *fifo)
+{
+ return fifo->data[fifo->mask & fifo->cc];
+}
+
+static inline void
mlx5e_ptp_metadata_fifo_pop(struct mlx5e_ptp_metadata_fifo *fifo)
{
- return fifo->data[fifo->mask & fifo->cc++];
+ fifo->cc++;
}
static inline void
txq_ix = mlx5e_qid_from_qos(chs, node_qid);
- WARN_ON(node_qid > priv->htb_max_qos_sqs);
- if (node_qid == priv->htb_max_qos_sqs) {
- struct mlx5e_sq_stats *stats, **stats_list = NULL;
-
- if (priv->htb_max_qos_sqs == 0) {
- stats_list = kvcalloc(mlx5e_qos_max_leaf_nodes(priv->mdev),
- sizeof(*stats_list),
- GFP_KERNEL);
- if (!stats_list)
- return -ENOMEM;
- }
+ WARN_ON(node_qid >= mlx5e_htb_cur_leaf_nodes(priv->htb));
+ if (!priv->htb_qos_sq_stats) {
+ struct mlx5e_sq_stats **stats_list;
+
+ stats_list = kvcalloc(mlx5e_qos_max_leaf_nodes(priv->mdev),
+ sizeof(*stats_list), GFP_KERNEL);
+ if (!stats_list)
+ return -ENOMEM;
+
+ WRITE_ONCE(priv->htb_qos_sq_stats, stats_list);
+ }
+
+ if (!priv->htb_qos_sq_stats[node_qid]) {
+ struct mlx5e_sq_stats *stats;
+
stats = kzalloc(sizeof(*stats), GFP_KERNEL);
- if (!stats) {
- kvfree(stats_list);
+ if (!stats)
return -ENOMEM;
- }
- if (stats_list)
- WRITE_ONCE(priv->htb_qos_sq_stats, stats_list);
+
WRITE_ONCE(priv->htb_qos_sq_stats[node_qid], stats);
/* Order htb_max_qos_sqs increment after writing the array pointer.
* Pairs with smp_load_acquire in en_stats.c.
mlx5e_reset_txqsq_cc_pc(sq);
sq->stats->recover++;
clear_bit(MLX5E_SQ_STATE_RECOVERING, &sq->state);
+ rtnl_lock();
mlx5e_activate_txqsq(sq);
+ rtnl_unlock();
+
if (sq->channel)
mlx5e_trigger_napi_icosq(sq->channel);
else
carrier_ok = netif_carrier_ok(netdev);
netif_carrier_off(netdev);
+ rtnl_lock();
mlx5e_deactivate_priv_channels(priv);
+ rtnl_unlock();
mlx5e_ptp_close(chs->ptp);
err = mlx5e_ptp_open(priv, &chs->params, chs->c[0]->lag_port, &chs->ptp);
+ rtnl_lock();
mlx5e_activate_priv_channels(priv);
+ rtnl_unlock();
/* return carrier back if needed */
if (carrier_ok)
return min_t(u32, rqt_size, max_cap_rqt_size);
}
+#define MLX5E_MAX_RQT_SIZE_ALLOWED_WITH_XOR8_HASH 256
+
+unsigned int mlx5e_rqt_max_num_channels_allowed_for_xor8(void)
+{
+ return MLX5E_MAX_RQT_SIZE_ALLOWED_WITH_XOR8_HASH / MLX5E_UNIFORM_SPREAD_RQT_FACTOR;
+}
+
void mlx5e_rqt_destroy(struct mlx5e_rqt *rqt)
{
mlx5_core_destroy_rqt(rqt->mdev, rqt->rqtn);
}
u32 mlx5e_rqt_size(struct mlx5_core_dev *mdev, unsigned int num_channels);
+unsigned int mlx5e_rqt_max_num_channels_allowed_for_xor8(void);
int mlx5e_rqt_redirect_direct(struct mlx5e_rqt *rqt, u32 rqn, u32 *vhca_id);
int mlx5e_rqt_redirect_indir(struct mlx5e_rqt *rqt, u32 *rqns, u32 *vhca_ids,
unsigned int num_rqns,
void mlx5e_selq_cleanup(struct mlx5e_selq *selq)
{
+ mutex_lock(selq->state_lock);
WARN_ON_ONCE(selq->is_prepared);
kvfree(selq->standby);
kvfree(selq->standby);
selq->standby = NULL;
+ mutex_unlock(selq->state_lock);
}
void mlx5e_selq_prepare_params(struct mlx5e_selq *selq, struct mlx5e_params *params)
.mdo_add_secy = mlx5e_macsec_add_secy,
.mdo_upd_secy = mlx5e_macsec_upd_secy,
.mdo_del_secy = mlx5e_macsec_del_secy,
+ .rx_uses_md_dst = true,
};
bool mlx5e_macsec_handle_tx_skb(struct mlx5e_macsec *macsec, struct sk_buff *skb)
struct hlist_head rules_hash[ARFS_HASH_SIZE];
};
+enum {
+ MLX5E_ARFS_STATE_ENABLED,
+};
+
enum arfs_type {
ARFS_IPV4_TCP,
ARFS_IPV6_TCP,
spinlock_t arfs_lock;
int last_filter_id;
struct workqueue_struct *wq;
+ unsigned long state;
};
struct arfs_tuple {
return err;
}
}
+ set_bit(MLX5E_ARFS_STATE_ENABLED, &arfs->state);
+
return 0;
}
int i;
int j;
+ clear_bit(MLX5E_ARFS_STATE_ENABLED, &arfs->state);
+
spin_lock_bh(&arfs->arfs_lock);
mlx5e_for_each_arfs_rule(rule, htmp, arfs->arfs_tables, i, j) {
hlist_del_init(&rule->hlist);
struct mlx5_flow_handle *rule;
arfs = mlx5e_fs_get_arfs(priv->fs);
- mutex_lock(&priv->state_lock);
- if (!test_bit(MLX5E_STATE_OPENED, &priv->state)) {
- spin_lock_bh(&arfs->arfs_lock);
- hlist_del(&arfs_rule->hlist);
- spin_unlock_bh(&arfs->arfs_lock);
-
- mutex_unlock(&priv->state_lock);
- kfree(arfs_rule);
- goto out;
- }
- mutex_unlock(&priv->state_lock);
+ if (!test_bit(MLX5E_ARFS_STATE_ENABLED, &arfs->state))
+ return;
if (!arfs_rule->rule) {
rule = arfs_add_rule(priv, arfs_rule);
return -EPROTONOSUPPORT;
spin_lock_bh(&arfs->arfs_lock);
+ if (!test_bit(MLX5E_ARFS_STATE_ENABLED, &arfs->state)) {
+ spin_unlock_bh(&arfs->arfs_lock);
+ return -EPERM;
+ }
+
arfs_rule = arfs_find_rule(arfs_t, &fk);
if (arfs_rule) {
if (arfs_rule->rxq == rxq_index || work_busy(&arfs_rule->arfs_work)) {
mutex_lock(&priv->state_lock);
+ if (mlx5e_rx_res_get_current_hash(priv->rx_res).hfunc == ETH_RSS_HASH_XOR) {
+ unsigned int xor8_max_channels = mlx5e_rqt_max_num_channels_allowed_for_xor8();
+
+ if (count > xor8_max_channels) {
+ err = -EINVAL;
+ netdev_err(priv->netdev, "%s: Requested number of channels (%d) exceeds the maximum allowed by the XOR8 RSS hfunc (%d)\n",
+ __func__, count, xor8_max_channels);
+ goto out;
+ }
+ }
+
+ /* If RXFH is configured, changing the channels number is allowed only if
+ * it does not require resizing the RSS table. This is because the previous
+ * configuration may no longer be compatible with the new RSS table.
+ */
+ if (netif_is_rxfh_configured(priv->netdev)) {
+ int cur_rqt_size = mlx5e_rqt_size(priv->mdev, cur_params->num_channels);
+ int new_rqt_size = mlx5e_rqt_size(priv->mdev, count);
+
+ if (new_rqt_size != cur_rqt_size) {
+ err = -EINVAL;
+ netdev_err(priv->netdev,
+ "%s: RXFH is configured, block changing channels number that affects RSS table size (new: %d, current: %d)\n",
+ __func__, new_rqt_size, cur_rqt_size);
+ goto out;
+ }
+ }
+
/* Don't allow changing the number of channels if HTB offload is active,
* because the numeration of the QoS SQs will change, while per-queue
* qdiscs are attached.
static void
mlx5e_set_priv_channels_tx_coalesce(struct mlx5e_priv *priv, struct ethtool_coalesce *coal)
{
- struct mlx5_core_dev *mdev = priv->mdev;
int tc;
int i;
for (i = 0; i < priv->channels.num; ++i) {
struct mlx5e_channel *c = priv->channels.c[i];
+ struct mlx5_core_dev *mdev = c->mdev;
for (tc = 0; tc < c->num_tc; tc++) {
mlx5_core_modify_cq_moderation(mdev,
static void
mlx5e_set_priv_channels_rx_coalesce(struct mlx5e_priv *priv, struct ethtool_coalesce *coal)
{
- struct mlx5_core_dev *mdev = priv->mdev;
int i;
for (i = 0; i < priv->channels.num; ++i) {
struct mlx5e_channel *c = priv->channels.c[i];
+ struct mlx5_core_dev *mdev = c->mdev;
mlx5_core_modify_cq_moderation(mdev, &c->rq.cq.mcq,
coal->rx_coalesce_usecs,
struct mlx5e_priv *priv = netdev_priv(dev);
u32 *rss_context = &rxfh->rss_context;
u8 hfunc = rxfh->hfunc;
+ unsigned int count;
int err;
mutex_lock(&priv->state_lock);
+
+ count = priv->channels.params.num_channels;
+
+ if (hfunc == ETH_RSS_HASH_XOR) {
+ unsigned int xor8_max_channels = mlx5e_rqt_max_num_channels_allowed_for_xor8();
+
+ if (count > xor8_max_channels) {
+ err = -EINVAL;
+ netdev_err(priv->netdev, "%s: Cannot set RSS hash function to XOR, current number of channels (%d) exceeds the maximum allowed for XOR8 RSS hfunc (%d)\n",
+ __func__, count, xor8_max_channels);
+ goto unlock;
+ }
+ }
+
if (*rss_context && rxfh->rss_delete) {
err = mlx5e_rx_res_rss_destroy(priv->rx_res, *rss_context);
goto unlock;
}
if (*rss_context == ETH_RXFH_CONTEXT_ALLOC) {
- unsigned int count = priv->channels.params.num_channels;
-
err = mlx5e_rx_res_rss_init(priv->rx_res, rss_context, count);
if (err)
goto unlock;
*data,
mlx5e_devcom_event_mpv,
priv);
- if (IS_ERR_OR_NULL(priv->devcom))
- return -EOPNOTSUPP;
+ if (IS_ERR(priv->devcom))
+ return PTR_ERR(priv->devcom);
if (mlx5_core_is_mp_master(priv->mdev)) {
mlx5_devcom_send_event(priv->devcom, MPV_DEVCOM_MASTER_UP,
kfree(priv->tx_rates);
kfree(priv->txq2sq);
destroy_workqueue(priv->wq);
- mutex_lock(&priv->state_lock);
mlx5e_selq_cleanup(&priv->selq);
- mutex_unlock(&priv->state_lock);
free_cpumask_var(priv->scratchpad.cpumask);
for (i = 0; i < priv->htb_max_qos_sqs; i++)
(skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP))) {
u8 metadata_index = be32_to_cpu(eseg->flow_table_metadata);
+ mlx5e_ptp_metadata_fifo_pop(&sq->ptpsq->metadata_freelist);
+
mlx5e_skb_cb_hwtstamp_init(skb);
mlx5e_ptp_metadata_map_put(&sq->ptpsq->metadata_map, skb,
metadata_index);
err_drop:
stats->dropped++;
- if (unlikely(sq->ptpsq && (skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP)))
- mlx5e_ptp_metadata_fifo_push(&sq->ptpsq->metadata_freelist,
- be32_to_cpu(eseg->flow_table_metadata));
dev_kfree_skb_any(skb);
mlx5e_tx_flush(sq);
}
{
if (unlikely(skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP))
eseg->flow_table_metadata =
- cpu_to_be32(mlx5e_ptp_metadata_fifo_pop(&ptpsq->metadata_freelist));
+ cpu_to_be32(mlx5e_ptp_metadata_fifo_peek(&ptpsq->metadata_freelist));
}
static void mlx5e_txwqe_build_eseg(struct mlx5e_priv *priv, struct mlx5e_txqsq *sq,
if (err)
goto abort;
+ dev->priv.eswitch = esw;
err = esw_offloads_init(esw);
if (err)
goto reps_err;
esw->offloads.encap = DEVLINK_ESWITCH_ENCAP_MODE_BASIC;
else
esw->offloads.encap = DEVLINK_ESWITCH_ENCAP_MODE_NONE;
- if (MLX5_ESWITCH_MANAGER(dev) &&
- mlx5_esw_vport_match_metadata_supported(esw))
- esw->flags |= MLX5_ESWITCH_VPORT_MATCH_METADATA;
-
- dev->priv.eswitch = esw;
BLOCKING_INIT_NOTIFIER_HEAD(&esw->n_head);
esw_info(dev,
reps_err:
mlx5_esw_vports_cleanup(esw);
+ dev->priv.eswitch = NULL;
abort:
if (esw->work_queue)
destroy_workqueue(esw->work_queue);
esw_info(esw->dev, "cleanup\n");
- esw->dev->priv.eswitch = NULL;
destroy_workqueue(esw->work_queue);
WARN_ON(refcount_read(&esw->qos.refcnt));
mutex_destroy(&esw->state_lock);
mutex_destroy(&esw->offloads.encap_tbl_lock);
mutex_destroy(&esw->offloads.decap_tbl_lock);
esw_offloads_cleanup(esw);
+ esw->dev->priv.eswitch = NULL;
mlx5_esw_vports_cleanup(esw);
debugfs_remove_recursive(esw->debugfs_root);
devl_params_unregister(priv_to_devlink(esw->dev), mlx5_eswitch_params,
#include "rdma.h"
#include "en.h"
#include "fs_core.h"
+#include "lib/mlx5.h"
#include "lib/devcom.h"
#include "lib/eq.h"
#include "lib/fs_chains.h"
if (err)
return err;
+ if (MLX5_ESWITCH_MANAGER(esw->dev) &&
+ mlx5_esw_vport_match_metadata_supported(esw))
+ esw->flags |= MLX5_ESWITCH_VPORT_MATCH_METADATA;
+
err = devl_params_register(priv_to_devlink(esw->dev),
esw_devlink_params,
ARRAY_SIZE(esw_devlink_params));
key,
mlx5_esw_offloads_devcom_event,
esw);
- if (IS_ERR_OR_NULL(esw->devcom))
+ if (IS_ERR(esw->devcom))
return;
mlx5_devcom_send_event(esw->devcom,
if (esw_mode_from_devlink(mode, &mlx5_mode))
return -EINVAL;
+ if (mode == DEVLINK_ESWITCH_MODE_SWITCHDEV && mlx5_get_sd(esw->dev)) {
+ NL_SET_ERR_MSG_MOD(extack,
+ "Can't change E-Switch mode to switchdev when multi-PF netdev (Socket Direct) is configured.");
+ return -EPERM;
+ }
+
mlx5_lag_disable_change(esw->dev);
err = mlx5_esw_try_lock(esw);
if (err < 0) {
return err;
}
+static bool mlx5_pkt_reformat_cmp(struct mlx5_pkt_reformat *p1,
+ struct mlx5_pkt_reformat *p2)
+{
+ return p1->owner == p2->owner &&
+ (p1->owner == MLX5_FLOW_RESOURCE_OWNER_FW ?
+ p1->id == p2->id :
+ mlx5_fs_dr_action_get_pkt_reformat_id(p1) ==
+ mlx5_fs_dr_action_get_pkt_reformat_id(p2));
+}
+
static bool mlx5_flow_dests_cmp(struct mlx5_flow_destination *d1,
struct mlx5_flow_destination *d2)
{
((d1->vport.flags & MLX5_FLOW_DEST_VPORT_VHCA_ID) ?
(d1->vport.vhca_id == d2->vport.vhca_id) : true) &&
((d1->vport.flags & MLX5_FLOW_DEST_VPORT_REFORMAT_ID) ?
- (d1->vport.pkt_reformat->id ==
- d2->vport.pkt_reformat->id) : true)) ||
+ mlx5_pkt_reformat_cmp(d1->vport.pkt_reformat,
+ d2->vport.pkt_reformat) : true)) ||
(d1->type == MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE &&
d1->ft == d2->ft) ||
(d1->type == MLX5_FLOW_DESTINATION_TYPE_TIR &&
}
trace_mlx5_fs_set_fte(fte, false);
+ /* Link newly added rules into the tree. */
for (i = 0; i < handle->num_rules; i++) {
- if (refcount_read(&handle->rule[i]->node.refcount) == 1) {
+ if (!handle->rule[i]->node.parent) {
tree_add_node(&handle->rule[i]->node, &fte->node);
trace_mlx5_fs_add_rule(handle->rule[i]);
}
return err;
}
- if (test_bit(MLX5_LAG_MODE_FLAG_HASH_BASED, &flags))
+ if (test_bit(MLX5_LAG_MODE_FLAG_HASH_BASED, &flags)) {
mlx5_lag_port_sel_destroy(ldev);
+ ldev->buckets = 1;
+ }
if (mlx5_lag_has_drop_rule(ldev))
mlx5_lag_drop_rule_cleanup(ldev);
struct mlx5_devcom_comp *comp;
if (IS_ERR_OR_NULL(devc))
- return NULL;
+ return ERR_PTR(-EINVAL);
mutex_lock(&comp_list_lock);
comp = devcom_component_get(devc, id, key, handler);
sd = mlx5_get_sd(dev);
devcom = mlx5_devcom_register_component(dev->priv.devc, MLX5_DEVCOM_SD_GROUP,
sd->group_id, NULL, dev);
- if (!devcom)
- return -ENOMEM;
+ if (IS_ERR(devcom))
+ return PTR_ERR(devcom);
sd->devcom = devcom;
mlx5_devcom_register_component(dev->priv.devc, MLX5_DEVCOM_HCA_PORTS,
mlx5_query_nic_system_image_guid(dev),
NULL, dev);
- if (IS_ERR_OR_NULL(dev->priv.hca_devcom_comp))
+ if (IS_ERR(dev->priv.hca_devcom_comp))
mlx5_core_err(dev, "Failed to register devcom HCA component\n");
}
if (err)
goto err_register;
+ err = mlx5_crdump_enable(dev);
+ if (err)
+ mlx5_core_err(dev, "mlx5_crdump_enable failed with error code %d\n", err);
+
+ err = mlx5_hwmon_dev_register(dev);
+ if (err)
+ mlx5_core_err(dev, "mlx5_hwmon_dev_register failed with error code %d\n", err);
+
mutex_unlock(&dev->intf_state_mutex);
return 0;
int err;
devl_lock(devlink);
+ devl_register(devlink);
err = mlx5_init_one_devl_locked(dev);
+ if (err)
+ devl_unregister(devlink);
devl_unlock(devlink);
return err;
}
devl_lock(devlink);
mutex_lock(&dev->intf_state_mutex);
+ mlx5_hwmon_dev_unregister(dev);
+ mlx5_crdump_disable(dev);
mlx5_unregister_device(dev);
if (!test_bit(MLX5_INTERFACE_STATE_UP, &dev->intf_state)) {
mlx5_function_teardown(dev, true);
out:
mutex_unlock(&dev->intf_state_mutex);
+ devl_unregister(devlink);
devl_unlock(devlink);
}
}
devl_lock(devlink);
+ devl_register(devlink);
+
err = mlx5_devlink_params_register(priv_to_devlink(dev));
- devl_unlock(devlink);
if (err) {
mlx5_core_warn(dev, "mlx5_devlink_param_reg err = %d\n", err);
- goto query_hca_caps_err;
+ goto params_reg_err;
}
+ devl_unlock(devlink);
return 0;
+params_reg_err:
+ devl_unregister(devlink);
+ devl_unlock(devlink);
query_hca_caps_err:
+ devl_unregister(devlink);
+ devl_unlock(devlink);
mlx5_function_disable(dev, true);
out:
dev->state = MLX5_DEVICE_STATE_INTERNAL_ERROR;
devl_lock(devlink);
mlx5_devlink_params_unregister(priv_to_devlink(dev));
+ devl_unregister(devlink);
devl_unlock(devlink);
if (dev->state != MLX5_DEVICE_STATE_UP)
return;
goto err_init_one;
}
- err = mlx5_crdump_enable(dev);
- if (err)
- dev_err(&pdev->dev, "mlx5_crdump_enable failed with error code %d\n", err);
-
- err = mlx5_hwmon_dev_register(dev);
- if (err)
- mlx5_core_err(dev, "mlx5_hwmon_dev_register failed with error code %d\n", err);
-
pci_save_state(pdev);
- devlink_register(devlink);
return 0;
err_init_one:
struct devlink *devlink = priv_to_devlink(dev);
set_bit(MLX5_BREAK_FW_WAIT, &dev->intf_state);
- /* mlx5_drain_fw_reset() and mlx5_drain_health_wq() are using
- * devlink notify APIs.
- * Hence, we must drain them before unregistering the devlink.
- */
mlx5_drain_fw_reset(dev);
mlx5_drain_health_wq(dev);
- devlink_unregister(devlink);
mlx5_sriov_disable(pdev, false);
- mlx5_hwmon_dev_unregister(dev);
- mlx5_crdump_disable(dev);
mlx5_uninit_one(dev);
mlx5_pci_close(dev);
mlx5_mdev_uninit(dev);
#define MLX5_IRQ_CTRL_SF_MAX 8
/* min num of vectors for SFs to be enabled */
#define MLX5_IRQ_VEC_COMP_BASE_SF 2
+#define MLX5_IRQ_VEC_COMP_BASE 1
#define MLX5_EQ_SHARE_IRQ_MAX_COMP (8)
#define MLX5_EQ_SHARE_IRQ_MAX_CTRL (UINT_MAX)
return;
}
+ vecidx -= MLX5_IRQ_VEC_COMP_BASE;
snprintf(name, MLX5_MAX_IRQ_NAME, "mlx5_comp%d", vecidx);
}
struct mlx5_irq_table *table = mlx5_irq_table_get(dev);
struct mlx5_irq_pool *pool = table->pcif_pool;
struct irq_affinity_desc af_desc;
- int offset = 1;
+ int offset = MLX5_IRQ_VEC_COMP_BASE;
if (!pool->xa_num_irqs.max)
offset = 0;
goto peer_devlink_set_err;
}
- devlink_register(devlink);
return 0;
peer_devlink_set_err:
devlink = priv_to_devlink(mdev);
set_bit(MLX5_BREAK_FW_WAIT, &mdev->intf_state);
mlx5_drain_health_wq(mdev);
- devlink_unregister(devlink);
if (mlx5_dev_is_lightweight(mdev))
mlx5_uninit_one_light(mdev);
else
}
static int
-dr_dump_rule_action_mem(struct seq_file *file, const u64 rule_id,
+dr_dump_rule_action_mem(struct seq_file *file, char *buff, const u64 rule_id,
struct mlx5dr_rule_action_member *action_mem)
{
struct mlx5dr_action *action = action_mem->action;
const u64 action_id = DR_DBG_PTR_TO_ID(action);
- char buff[MLX5DR_DEBUG_DUMP_BUFF_LENGTH];
u64 hit_tbl_ptr, miss_tbl_ptr;
u32 hit_tbl_id, miss_tbl_id;
int ret;
}
static int
-dr_dump_rule_mem(struct seq_file *file, struct mlx5dr_ste *ste,
+dr_dump_rule_mem(struct seq_file *file, char *buff, struct mlx5dr_ste *ste,
bool is_rx, const u64 rule_id, u8 format_ver)
{
- char buff[MLX5DR_DEBUG_DUMP_BUFF_LENGTH];
char hw_ste_dump[DR_HEX_SIZE];
u32 mem_rec_type;
int ret;
}
static int
-dr_dump_rule_rx_tx(struct seq_file *file, struct mlx5dr_rule_rx_tx *rule_rx_tx,
+dr_dump_rule_rx_tx(struct seq_file *file, char *buff,
+ struct mlx5dr_rule_rx_tx *rule_rx_tx,
bool is_rx, const u64 rule_id, u8 format_ver)
{
struct mlx5dr_ste *ste_arr[DR_RULE_MAX_STES + DR_ACTION_MAX_STES];
return 0;
while (i--) {
- ret = dr_dump_rule_mem(file, ste_arr[i], is_rx, rule_id,
+ ret = dr_dump_rule_mem(file, buff, ste_arr[i], is_rx, rule_id,
format_ver);
if (ret < 0)
return ret;
return 0;
}
-static int dr_dump_rule(struct seq_file *file, struct mlx5dr_rule *rule)
+static noinline_for_stack int
+dr_dump_rule(struct seq_file *file, struct mlx5dr_rule *rule)
{
struct mlx5dr_rule_action_member *action_mem;
const u64 rule_id = DR_DBG_PTR_TO_ID(rule);
return ret;
if (rx->nic_matcher) {
- ret = dr_dump_rule_rx_tx(file, rx, true, rule_id, format_ver);
+ ret = dr_dump_rule_rx_tx(file, buff, rx, true, rule_id, format_ver);
if (ret < 0)
return ret;
}
if (tx->nic_matcher) {
- ret = dr_dump_rule_rx_tx(file, tx, false, rule_id, format_ver);
+ ret = dr_dump_rule_rx_tx(file, buff, tx, false, rule_id, format_ver);
if (ret < 0)
return ret;
}
list_for_each_entry(action_mem, &rule->rule_actions_list, list) {
- ret = dr_dump_rule_action_mem(file, rule_id, action_mem);
+ ret = dr_dump_rule_action_mem(file, buff, rule_id, action_mem);
if (ret < 0)
return ret;
}
}
static int
-dr_dump_matcher_mask(struct seq_file *file, struct mlx5dr_match_param *mask,
+dr_dump_matcher_mask(struct seq_file *file, char *buff,
+ struct mlx5dr_match_param *mask,
u8 criteria, const u64 matcher_id)
{
- char buff[MLX5DR_DEBUG_DUMP_BUFF_LENGTH];
char dump[DR_HEX_SIZE];
int ret;
}
static int
-dr_dump_matcher_builder(struct seq_file *file, struct mlx5dr_ste_build *builder,
+dr_dump_matcher_builder(struct seq_file *file, char *buff,
+ struct mlx5dr_ste_build *builder,
u32 index, bool is_rx, const u64 matcher_id)
{
- char buff[MLX5DR_DEBUG_DUMP_BUFF_LENGTH];
int ret;
ret = snprintf(buff, MLX5DR_DEBUG_DUMP_BUFF_LENGTH,
}
static int
-dr_dump_matcher_rx_tx(struct seq_file *file, bool is_rx,
+dr_dump_matcher_rx_tx(struct seq_file *file, char *buff, bool is_rx,
struct mlx5dr_matcher_rx_tx *matcher_rx_tx,
const u64 matcher_id)
{
- char buff[MLX5DR_DEBUG_DUMP_BUFF_LENGTH];
enum dr_dump_rec_type rec_type;
u64 s_icm_addr, e_icm_addr;
int i, ret;
return ret;
for (i = 0; i < matcher_rx_tx->num_of_builders; i++) {
- ret = dr_dump_matcher_builder(file,
+ ret = dr_dump_matcher_builder(file, buff,
&matcher_rx_tx->ste_builder[i],
i, is_rx, matcher_id);
if (ret < 0)
return 0;
}
-static int
+static noinline_for_stack int
dr_dump_matcher(struct seq_file *file, struct mlx5dr_matcher *matcher)
{
struct mlx5dr_matcher_rx_tx *rx = &matcher->rx;
if (ret)
return ret;
- ret = dr_dump_matcher_mask(file, &matcher->mask,
+ ret = dr_dump_matcher_mask(file, buff, &matcher->mask,
matcher->match_criteria, matcher_id);
if (ret < 0)
return ret;
if (rx->nic_tbl) {
- ret = dr_dump_matcher_rx_tx(file, true, rx, matcher_id);
+ ret = dr_dump_matcher_rx_tx(file, buff, true, rx, matcher_id);
if (ret < 0)
return ret;
}
if (tx->nic_tbl) {
- ret = dr_dump_matcher_rx_tx(file, false, tx, matcher_id);
+ ret = dr_dump_matcher_rx_tx(file, buff, false, tx, matcher_id);
if (ret < 0)
return ret;
}
}
static int
-dr_dump_table_rx_tx(struct seq_file *file, bool is_rx,
+dr_dump_table_rx_tx(struct seq_file *file, char *buff, bool is_rx,
struct mlx5dr_table_rx_tx *table_rx_tx,
const u64 table_id)
{
- char buff[MLX5DR_DEBUG_DUMP_BUFF_LENGTH];
enum dr_dump_rec_type rec_type;
u64 s_icm_addr;
int ret;
return 0;
}
-static int dr_dump_table(struct seq_file *file, struct mlx5dr_table *table)
+static noinline_for_stack int
+dr_dump_table(struct seq_file *file, struct mlx5dr_table *table)
{
struct mlx5dr_table_rx_tx *rx = &table->rx;
struct mlx5dr_table_rx_tx *tx = &table->tx;
return ret;
if (rx->nic_dmn) {
- ret = dr_dump_table_rx_tx(file, true, rx,
+ ret = dr_dump_table_rx_tx(file, buff, true, rx,
DR_DBG_PTR_TO_ID(table));
if (ret < 0)
return ret;
}
if (tx->nic_dmn) {
- ret = dr_dump_table_rx_tx(file, false, tx,
+ ret = dr_dump_table_rx_tx(file, buff, false, tx,
DR_DBG_PTR_TO_ID(table));
if (ret < 0)
return ret;
}
static int
-dr_dump_send_ring(struct seq_file *file, struct mlx5dr_send_ring *ring,
+dr_dump_send_ring(struct seq_file *file, char *buff,
+ struct mlx5dr_send_ring *ring,
const u64 domain_id)
{
- char buff[MLX5DR_DEBUG_DUMP_BUFF_LENGTH];
int ret;
ret = snprintf(buff, MLX5DR_DEBUG_DUMP_BUFF_LENGTH,
return 0;
}
-static noinline_for_stack int
+static int
dr_dump_domain_info_flex_parser(struct seq_file *file,
+ char *buff,
const char *flex_parser_name,
const u8 flex_parser_value,
const u64 domain_id)
{
- char buff[MLX5DR_DEBUG_DUMP_BUFF_LENGTH];
int ret;
ret = snprintf(buff, MLX5DR_DEBUG_DUMP_BUFF_LENGTH,
return 0;
}
-static noinline_for_stack int
-dr_dump_domain_info_caps(struct seq_file *file, struct mlx5dr_cmd_caps *caps,
+static int
+dr_dump_domain_info_caps(struct seq_file *file, char *buff,
+ struct mlx5dr_cmd_caps *caps,
const u64 domain_id)
{
- char buff[MLX5DR_DEBUG_DUMP_BUFF_LENGTH];
struct mlx5dr_cmd_vport_cap *vport_caps;
unsigned long i, vports_num;
int ret;
}
static int
-dr_dump_domain_info(struct seq_file *file, struct mlx5dr_domain_info *info,
+dr_dump_domain_info(struct seq_file *file, char *buff,
+ struct mlx5dr_domain_info *info,
const u64 domain_id)
{
int ret;
- ret = dr_dump_domain_info_caps(file, &info->caps, domain_id);
+ ret = dr_dump_domain_info_caps(file, buff, &info->caps, domain_id);
if (ret < 0)
return ret;
- ret = dr_dump_domain_info_flex_parser(file, "icmp_dw0",
+ ret = dr_dump_domain_info_flex_parser(file, buff, "icmp_dw0",
info->caps.flex_parser_id_icmp_dw0,
domain_id);
if (ret < 0)
return ret;
- ret = dr_dump_domain_info_flex_parser(file, "icmp_dw1",
+ ret = dr_dump_domain_info_flex_parser(file, buff, "icmp_dw1",
info->caps.flex_parser_id_icmp_dw1,
domain_id);
if (ret < 0)
return ret;
- ret = dr_dump_domain_info_flex_parser(file, "icmpv6_dw0",
+ ret = dr_dump_domain_info_flex_parser(file, buff, "icmpv6_dw0",
info->caps.flex_parser_id_icmpv6_dw0,
domain_id);
if (ret < 0)
return ret;
- ret = dr_dump_domain_info_flex_parser(file, "icmpv6_dw1",
+ ret = dr_dump_domain_info_flex_parser(file, buff, "icmpv6_dw1",
info->caps.flex_parser_id_icmpv6_dw1,
domain_id);
if (ret < 0)
if (ret)
return ret;
- ret = dr_dump_domain_info(file, &dmn->info, domain_id);
+ ret = dr_dump_domain_info(file, buff, &dmn->info, domain_id);
if (ret < 0)
return ret;
if (dmn->info.supp_sw_steering) {
- ret = dr_dump_send_ring(file, dmn->send_ring, domain_id);
+ ret = dr_dump_send_ring(file, buff, dmn->send_ring, domain_id);
if (ret < 0)
return ret;
}
#include <linux/module.h>
#include <linux/phy.h>
#include <linux/platform_device.h>
+#include <linux/rtnetlink.h>
#include <linux/skbuff.h>
#include "mlxbf_gige.h"
control |= MLXBF_GIGE_CONTROL_PORT_EN;
writeq(control, priv->base + MLXBF_GIGE_CONTROL);
- err = mlxbf_gige_request_irqs(priv);
- if (err)
- return err;
mlxbf_gige_cache_stats(priv);
err = mlxbf_gige_clean_port(priv);
if (err)
- goto free_irqs;
+ return err;
/* Clear driver's valid_polarity to match hardware,
* since the above call to clean_port() resets the
err = mlxbf_gige_tx_init(priv);
if (err)
- goto free_irqs;
+ goto phy_deinit;
err = mlxbf_gige_rx_init(priv);
if (err)
goto tx_deinit;
napi_enable(&priv->napi);
netif_start_queue(netdev);
+ err = mlxbf_gige_request_irqs(priv);
+ if (err)
+ goto napi_deinit;
+
/* Set bits in INT_EN that we care about */
int_en = MLXBF_GIGE_INT_EN_HW_ACCESS_ERROR |
MLXBF_GIGE_INT_EN_TX_CHECKSUM_INPUTS |
return 0;
+napi_deinit:
+ netif_stop_queue(netdev);
+ napi_disable(&priv->napi);
+ netif_napi_del(&priv->napi);
+ mlxbf_gige_rx_deinit(priv);
+
tx_deinit:
mlxbf_gige_tx_deinit(priv);
-free_irqs:
- mlxbf_gige_free_irqs(priv);
+phy_deinit:
+ phy_stop(phydev);
return err;
}
{
struct mlxbf_gige *priv = platform_get_drvdata(pdev);
- writeq(0, priv->base + MLXBF_GIGE_INT_EN);
- mlxbf_gige_clean_port(priv);
+ rtnl_lock();
+ netif_device_detach(priv->netdev);
+
+ if (netif_running(priv->netdev))
+ dev_close(priv->netdev);
+
+ rtnl_unlock();
}
static const struct acpi_device_id __maybe_unused mlxbf_gige_acpi_match[] = {
static const struct mlxsw_listener mlxsw_emad_rx_listener =
MLXSW_RXL(mlxsw_emad_rx_listener_func, ETHEMAD, TRAP_TO_CPU, false,
- EMAD, DISCARD);
+ EMAD, FORWARD);
static int mlxsw_emad_tlv_enable(struct mlxsw_core *mlxsw_core)
{
.got_inactive = mlxsw_env_got_inactive,
};
-static int mlxsw_env_max_module_eeprom_len_query(struct mlxsw_env *mlxsw_env)
+static void mlxsw_env_max_module_eeprom_len_query(struct mlxsw_env *mlxsw_env)
{
char mcam_pl[MLXSW_REG_MCAM_LEN];
- bool mcia_128b_supported;
+ bool mcia_128b_supported = false;
int err;
mlxsw_reg_mcam_pack(mcam_pl,
MLXSW_REG_MCAM_FEATURE_GROUP_ENHANCED_FEATURES);
err = mlxsw_reg_query(mlxsw_env->core, MLXSW_REG(mcam), mcam_pl);
- if (err)
- return err;
-
- mlxsw_reg_mcam_unpack(mcam_pl, MLXSW_REG_MCAM_MCIA_128B,
- &mcia_128b_supported);
+ if (!err)
+ mlxsw_reg_mcam_unpack(mcam_pl, MLXSW_REG_MCAM_MCIA_128B,
+ &mcia_128b_supported);
mlxsw_env->max_eeprom_len = mcia_128b_supported ? 128 : 48;
-
- return 0;
}
int mlxsw_env_init(struct mlxsw_core *mlxsw_core,
if (err)
goto err_type_set;
- err = mlxsw_env_max_module_eeprom_len_query(env);
- if (err)
- goto err_eeprom_len_query;
-
+ mlxsw_env_max_module_eeprom_len_query(env);
env->line_cards[0]->active = true;
return 0;
-err_eeprom_len_query:
err_type_set:
mlxsw_env_module_event_disable(env, 0);
err_mlxsw_env_module_event_enable:
{
struct pci_dev *pdev = mlxsw_pci->pdev;
char mcam_pl[MLXSW_REG_MCAM_LEN];
- bool pci_reset_supported;
+ bool pci_reset_supported = false;
u32 sys_status;
int err;
mlxsw_reg_mcam_pack(mcam_pl,
MLXSW_REG_MCAM_FEATURE_GROUP_ENHANCED_FEATURES);
err = mlxsw_reg_query(mlxsw_pci->core, MLXSW_REG(mcam), mcam_pl);
- if (err)
- return err;
-
- mlxsw_reg_mcam_unpack(mcam_pl, MLXSW_REG_MCAM_PCI_RESET,
- &pci_reset_supported);
+ if (!err)
+ mlxsw_reg_mcam_unpack(mcam_pl, MLXSW_REG_MCAM_PCI_RESET,
+ &pci_reset_supported);
if (pci_reset_supported) {
pci_dbg(pdev, "Starting PCI reset flow\n");
#include <linux/netdevice.h>
#include <linux/mutex.h>
#include <linux/refcount.h>
+#include <linux/idr.h>
#include <net/devlink.h>
#include <trace/events/mlxsw.h>
static int mlxsw_sp_acl_tcam_region_id_get(struct mlxsw_sp_acl_tcam *tcam,
u16 *p_id)
{
- u16 id;
+ int id;
- id = find_first_zero_bit(tcam->used_regions, tcam->max_regions);
- if (id < tcam->max_regions) {
- __set_bit(id, tcam->used_regions);
- *p_id = id;
- return 0;
- }
- return -ENOBUFS;
+ id = ida_alloc_max(&tcam->used_regions, tcam->max_regions - 1,
+ GFP_KERNEL);
+ if (id < 0)
+ return id;
+
+ *p_id = id;
+
+ return 0;
}
static void mlxsw_sp_acl_tcam_region_id_put(struct mlxsw_sp_acl_tcam *tcam,
u16 id)
{
- __clear_bit(id, tcam->used_regions);
+ ida_free(&tcam->used_regions, id);
}
static int mlxsw_sp_acl_tcam_group_id_get(struct mlxsw_sp_acl_tcam *tcam,
u16 *p_id)
{
- u16 id;
+ int id;
- id = find_first_zero_bit(tcam->used_groups, tcam->max_groups);
- if (id < tcam->max_groups) {
- __set_bit(id, tcam->used_groups);
- *p_id = id;
- return 0;
- }
- return -ENOBUFS;
+ id = ida_alloc_max(&tcam->used_groups, tcam->max_groups - 1,
+ GFP_KERNEL);
+ if (id < 0)
+ return id;
+
+ *p_id = id;
+
+ return 0;
}
static void mlxsw_sp_acl_tcam_group_id_put(struct mlxsw_sp_acl_tcam *tcam,
u16 id)
{
- __clear_bit(id, tcam->used_groups);
+ ida_free(&tcam->used_groups, id);
}
struct mlxsw_sp_acl_tcam_pattern {
rehash.dw.work);
int credits = MLXSW_SP_ACL_TCAM_VREGION_REHASH_CREDITS;
+ mutex_lock(&vregion->lock);
mlxsw_sp_acl_tcam_vregion_rehash(vregion->mlxsw_sp, vregion, &credits);
+ mutex_unlock(&vregion->lock);
if (credits < 0)
/* Rehash gone out of credits so it was interrupted.
* Schedule the work as soon as possible to continue.
mlxsw_sp_acl_tcam_vregion_rehash_work_schedule(vregion);
}
+static void
+mlxsw_sp_acl_tcam_rehash_ctx_vchunk_reset(struct mlxsw_sp_acl_tcam_rehash_ctx *ctx)
+{
+ /* The entry markers are relative to the current chunk and therefore
+ * needs to be reset together with the chunk marker.
+ */
+ ctx->current_vchunk = NULL;
+ ctx->start_ventry = NULL;
+ ctx->stop_ventry = NULL;
+}
+
static void
mlxsw_sp_acl_tcam_rehash_ctx_vchunk_changed(struct mlxsw_sp_acl_tcam_vchunk *vchunk)
{
* the current chunk pointer to make sure all chunks
* are properly migrated.
*/
- vregion->rehash.ctx.current_vchunk = NULL;
+ mlxsw_sp_acl_tcam_rehash_ctx_vchunk_reset(&vregion->rehash.ctx);
}
static struct mlxsw_sp_acl_tcam_vregion *
struct mlxsw_sp_acl_tcam *tcam = vregion->tcam;
if (vgroup->vregion_rehash_enabled && ops->region_rehash_hints_get) {
+ struct mlxsw_sp_acl_tcam_rehash_ctx *ctx = &vregion->rehash.ctx;
+
mutex_lock(&tcam->lock);
list_del(&vregion->tlist);
mutex_unlock(&tcam->lock);
- cancel_delayed_work_sync(&vregion->rehash.dw);
+ if (cancel_delayed_work_sync(&vregion->rehash.dw) &&
+ ctx->hints_priv)
+ ops->region_rehash_hints_put(ctx->hints_priv);
}
mlxsw_sp_acl_tcam_vgroup_vregion_detach(mlxsw_sp, vregion);
if (vregion->region2)
struct mlxsw_sp_acl_tcam_ventry *ventry,
bool *activity)
{
- return mlxsw_sp_acl_tcam_entry_activity_get(mlxsw_sp,
- ventry->entry, activity);
+ struct mlxsw_sp_acl_tcam_vregion *vregion = ventry->vchunk->vregion;
+ int err;
+
+ mutex_lock(&vregion->lock);
+ err = mlxsw_sp_acl_tcam_entry_activity_get(mlxsw_sp, ventry->entry,
+ activity);
+ mutex_unlock(&vregion->lock);
+ return err;
}
static int
{
struct mlxsw_sp_acl_tcam_chunk *new_chunk;
+ WARN_ON(vchunk->chunk2);
+
new_chunk = mlxsw_sp_acl_tcam_chunk_create(mlxsw_sp, vchunk, region);
if (IS_ERR(new_chunk))
return PTR_ERR(new_chunk);
{
mlxsw_sp_acl_tcam_chunk_destroy(mlxsw_sp, vchunk->chunk2);
vchunk->chunk2 = NULL;
- ctx->current_vchunk = NULL;
+ mlxsw_sp_acl_tcam_rehash_ctx_vchunk_reset(ctx);
}
static int
return 0;
}
+ if (list_empty(&vchunk->ventry_list))
+ goto out;
+
/* If the migration got interrupted, we have the ventry to start from
* stored in context.
*/
ventry = list_first_entry(&vchunk->ventry_list,
typeof(*ventry), list);
+ WARN_ON(ventry->vchunk != vchunk);
+
list_for_each_entry_from(ventry, &vchunk->ventry_list, list) {
/* During rollback, once we reach the ventry that failed
* to migrate, we are done.
}
}
+out:
mlxsw_sp_acl_tcam_vchunk_migrate_end(mlxsw_sp, vchunk, ctx);
return 0;
}
struct mlxsw_sp_acl_tcam_vchunk *vchunk;
int err;
+ if (list_empty(&vregion->vchunk_list))
+ return 0;
+
/* If the migration got interrupted, we have the vchunk
* we are working on stored in context.
*/
int err, err2;
trace_mlxsw_sp_acl_tcam_vregion_migrate(mlxsw_sp, vregion);
- mutex_lock(&vregion->lock);
err = mlxsw_sp_acl_tcam_vchunk_migrate_all(mlxsw_sp, vregion,
ctx, credits);
if (err) {
+ if (ctx->this_is_rollback)
+ return err;
/* In case migration was not successful, we need to swap
* so the original region pointer is assigned again
* to vregion->region.
*/
swap(vregion->region, vregion->region2);
- ctx->current_vchunk = NULL;
+ mlxsw_sp_acl_tcam_rehash_ctx_vchunk_reset(ctx);
ctx->this_is_rollback = true;
err2 = mlxsw_sp_acl_tcam_vchunk_migrate_all(mlxsw_sp, vregion,
ctx, credits);
/* Let the rollback to be continued later on. */
}
}
- mutex_unlock(&vregion->lock);
trace_mlxsw_sp_acl_tcam_vregion_migrate_end(mlxsw_sp, vregion);
return err;
}
ctx->hints_priv = hints_priv;
ctx->this_is_rollback = false;
+ mlxsw_sp_acl_tcam_rehash_ctx_vchunk_reset(ctx);
return 0;
err = mlxsw_sp_acl_tcam_vregion_migrate(mlxsw_sp, vregion,
ctx, credits);
if (err) {
- dev_err(mlxsw_sp->bus_info->dev, "Failed to migrate vregion\n");
+ dev_err_ratelimited(mlxsw_sp->bus_info->dev, "Failed to migrate vregion\n");
+ return;
}
if (*credits >= 0)
if (max_tcam_regions < max_regions)
max_regions = max_tcam_regions;
- tcam->used_regions = bitmap_zalloc(max_regions, GFP_KERNEL);
- if (!tcam->used_regions) {
- err = -ENOMEM;
- goto err_alloc_used_regions;
- }
+ ida_init(&tcam->used_regions);
tcam->max_regions = max_regions;
max_groups = MLXSW_CORE_RES_GET(mlxsw_sp->core, ACL_MAX_GROUPS);
- tcam->used_groups = bitmap_zalloc(max_groups, GFP_KERNEL);
- if (!tcam->used_groups) {
- err = -ENOMEM;
- goto err_alloc_used_groups;
- }
+ ida_init(&tcam->used_groups);
tcam->max_groups = max_groups;
tcam->max_group_size = MLXSW_CORE_RES_GET(mlxsw_sp->core,
ACL_MAX_GROUP_SIZE);
return 0;
err_tcam_init:
- bitmap_free(tcam->used_groups);
-err_alloc_used_groups:
- bitmap_free(tcam->used_regions);
-err_alloc_used_regions:
+ ida_destroy(&tcam->used_groups);
+ ida_destroy(&tcam->used_regions);
mlxsw_sp_acl_tcam_rehash_params_unregister(mlxsw_sp);
err_rehash_params_register:
mutex_destroy(&tcam->lock);
const struct mlxsw_sp_acl_tcam_ops *ops = mlxsw_sp->acl_tcam_ops;
ops->fini(mlxsw_sp, tcam->priv);
- bitmap_free(tcam->used_groups);
- bitmap_free(tcam->used_regions);
+ ida_destroy(&tcam->used_groups);
+ ida_destroy(&tcam->used_regions);
mlxsw_sp_acl_tcam_rehash_params_unregister(mlxsw_sp);
mutex_destroy(&tcam->lock);
}
#include <linux/list.h>
#include <linux/parman.h>
+#include <linux/idr.h>
#include "reg.h"
#include "spectrum.h"
#include "core_acl_flex_keys.h"
struct mlxsw_sp_acl_tcam {
- unsigned long *used_regions; /* bit array */
+ struct ida used_regions;
unsigned int max_regions;
- unsigned long *used_groups; /* bit array */
+ struct ida used_groups;
unsigned int max_groups;
unsigned int max_group_size;
struct mutex lock; /* guards vregion list */
* @rdfifo: FIFO read callback
* @wrfifo: FIFO write callback
* @start_xmit: start_xmit() implementation callback
- * @rx_skb: rx_skb() implementation callback
* @flush_tx_work: flush_tx_work() implementation callback
*
* The @statelock is used to protect information in the structure which may
struct sk_buff *txp, bool irq);
netdev_tx_t (*start_xmit)(struct sk_buff *skb,
struct net_device *dev);
- void (*rx_skb)(struct ks8851_net *ks,
- struct sk_buff *skb);
void (*flush_tx_work)(struct ks8851_net *ks);
};
rxpkt[12], rxpkt[13], rxpkt[14], rxpkt[15]);
}
-/**
- * ks8851_rx_skb - receive skbuff
- * @ks: The device state.
- * @skb: The skbuff
- */
-static void ks8851_rx_skb(struct ks8851_net *ks, struct sk_buff *skb)
-{
- ks->rx_skb(ks, skb);
-}
-
/**
* ks8851_rx_pkts - receive packets from the host
* @ks: The device information.
ks8851_dbg_dumpkkt(ks, rxpkt);
skb->protocol = eth_type_trans(skb, ks->netdev);
- ks8851_rx_skb(ks, skb);
+ __netif_rx(skb);
ks->netdev->stats.rx_packets++;
ks->netdev->stats.rx_bytes += rxlen;
unsigned long flags;
unsigned int status;
+ local_bh_disable();
+
ks8851_lock(ks, &flags);
status = ks8851_rdreg16(ks, KS_ISR);
if (status & IRQ_LCI)
mii_check_link(&ks->mii);
+ local_bh_enable();
+
return IRQ_HANDLED;
}
iowrite16_rep(ksp->hw_addr, txp->data, len / 2);
}
-/**
- * ks8851_rx_skb_par - receive skbuff
- * @ks: The device state.
- * @skb: The skbuff
- */
-static void ks8851_rx_skb_par(struct ks8851_net *ks, struct sk_buff *skb)
-{
- netif_rx(skb);
-}
-
static unsigned int ks8851_rdreg16_par_txqcr(struct ks8851_net *ks)
{
return ks8851_rdreg16_par(ks, KS_TXQCR);
ks->rdfifo = ks8851_rdfifo_par;
ks->wrfifo = ks8851_wrfifo_par;
ks->start_xmit = ks8851_start_xmit_par;
- ks->rx_skb = ks8851_rx_skb_par;
#define STD_IRQ (IRQ_LCI | /* Link Change */ \
IRQ_RXI | /* RX done */ \
return ALIGN(len + 4, 4);
}
-/**
- * ks8851_rx_skb_spi - receive skbuff
- * @ks: The device state
- * @skb: The skbuff
- */
-static void ks8851_rx_skb_spi(struct ks8851_net *ks, struct sk_buff *skb)
-{
- netif_rx(skb);
-}
-
/**
* ks8851_tx_work - process tx packet(s)
* @work: The work strucutre what was scheduled.
ks->rdfifo = ks8851_rdfifo_spi;
ks->wrfifo = ks8851_wrfifo_spi;
ks->start_xmit = ks8851_start_xmit_spi;
- ks->rx_skb = ks8851_rx_skb_spi;
ks->flush_tx_work = ks8851_flush_tx_work_spi;
#define STD_IRQ (IRQ_LCI | /* Link Change */ \
#define PCS_POWER_STATE_DOWN 0x6
#define PCS_POWER_STATE_UP 0x4
+#define RFE_RD_FIFO_TH_3_DWORDS 0x3
+
static void pci11x1x_strap_get_status(struct lan743x_adapter *adapter)
{
u32 chip_rev;
lan743x_pci_cleanup(adapter);
}
+static void pci11x1x_set_rfe_rd_fifo_threshold(struct lan743x_adapter *adapter)
+{
+ u16 rev = adapter->csr.id_rev & ID_REV_CHIP_REV_MASK_;
+
+ if (rev == ID_REV_CHIP_REV_PCI11X1X_B0_) {
+ u32 misc_ctl;
+
+ misc_ctl = lan743x_csr_read(adapter, MISC_CTL_0);
+ misc_ctl &= ~MISC_CTL_0_RFE_READ_FIFO_MASK_;
+ misc_ctl |= FIELD_PREP(MISC_CTL_0_RFE_READ_FIFO_MASK_,
+ RFE_RD_FIFO_TH_3_DWORDS);
+ lan743x_csr_write(adapter, MISC_CTL_0, misc_ctl);
+ }
+}
+
static int lan743x_hardware_init(struct lan743x_adapter *adapter,
struct pci_dev *pdev)
{
pci11x1x_strap_get_status(adapter);
spin_lock_init(&adapter->eth_syslock_spinlock);
mutex_init(&adapter->sgmii_rw_lock);
+ pci11x1x_set_rfe_rd_fifo_threshold(adapter);
} else {
adapter->max_tx_channels = LAN743X_MAX_TX_CHANNELS;
adapter->used_tx_channels = LAN743X_USED_TX_CHANNELS;
#define ID_REV_CHIP_REV_MASK_ (0x0000FFFF)
#define ID_REV_CHIP_REV_A0_ (0x00000000)
#define ID_REV_CHIP_REV_B0_ (0x00000010)
+#define ID_REV_CHIP_REV_PCI11X1X_B0_ (0x000000B0)
#define FPGA_REV (0x04)
#define FPGA_REV_GET_MINOR_(fpga_rev) (((fpga_rev) >> 8) & 0x000000FF)
#define SGMII_CTL_LINK_STATUS_SOURCE_ BIT(8)
#define SGMII_CTL_SGMII_POWER_DN_ BIT(1)
+#define MISC_CTL_0 (0x920)
+#define MISC_CTL_0_RFE_READ_FIFO_MASK_ GENMASK(6, 4)
+
/* Vendor Specific SGMII MMD details */
#define SR_VSMMD_PCS_ID1 0x0004
#define SR_VSMMD_PCS_ID2 0x0005
bool sgmii = false, inband_aneg = false;
int err;
- if (port->conf.inband) {
+ if (conf->inband) {
if (conf->portmode == PHY_INTERFACE_MODE_SGMII ||
conf->portmode == PHY_INTERFACE_MODE_QSGMII)
inband_aneg = true; /* Cisco-SGMII in-band-aneg */
if (err)
return -EINVAL;
- if (port->conf.inband) {
+ if (conf->inband) {
/* Enable/disable 1G counters in ASM */
spx5_rmw(ASM_PORT_CFG_CSC_STAT_DIS_SET(high_speed_dev),
ASM_PORT_CFG_CSC_STAT_DIS,
u16 l3_proto; /* protocol specified in the template */
};
+/* SparX-5 VCAP fragment types:
+ * 0 = no fragment, 1 = initial fragment,
+ * 2 = suspicious fragment, 3 = valid follow-up fragment
+ */
+enum { /* key / mask */
+ FRAG_NOT = 0x03, /* 0 / 3 */
+ FRAG_SOME = 0x11, /* 1 / 1 */
+ FRAG_FIRST = 0x13, /* 1 / 3 */
+ FRAG_LATER = 0x33, /* 3 / 3 */
+ FRAG_INVAL = 0xff, /* invalid */
+};
+
+/* Flower fragment flag to VCAP fragment type mapping */
+static const u8 sparx5_vcap_frag_map[4][4] = { /* is_frag */
+ { FRAG_INVAL, FRAG_INVAL, FRAG_INVAL, FRAG_FIRST }, /* 0/0 */
+ { FRAG_NOT, FRAG_NOT, FRAG_INVAL, FRAG_INVAL }, /* 0/1 */
+ { FRAG_INVAL, FRAG_INVAL, FRAG_INVAL, FRAG_INVAL }, /* 1/0 */
+ { FRAG_SOME, FRAG_LATER, FRAG_INVAL, FRAG_FIRST } /* 1/1 */
+ /* 0/0 0/1 1/0 1/1 <-- first_frag */
+};
+
static int
sparx5_tc_flower_es0_tpid(struct vcap_tc_flower_parse_usage *st)
{
flow_rule_match_control(st->frule, &mt);
if (mt.mask->flags) {
- if (mt.mask->flags & FLOW_DIS_FIRST_FRAG) {
- if (mt.key->flags & FLOW_DIS_FIRST_FRAG) {
- value = 1; /* initial fragment */
- mask = 0x3;
- } else {
- if (mt.mask->flags & FLOW_DIS_IS_FRAGMENT) {
- value = 3; /* follow up fragment */
- mask = 0x3;
- } else {
- value = 0; /* no fragment */
- mask = 0x3;
- }
- }
- } else {
- if (mt.mask->flags & FLOW_DIS_IS_FRAGMENT) {
- value = 3; /* follow up fragment */
- mask = 0x3;
- } else {
- value = 0; /* no fragment */
- mask = 0x3;
- }
+ u8 is_frag_key = !!(mt.key->flags & FLOW_DIS_IS_FRAGMENT);
+ u8 is_frag_mask = !!(mt.mask->flags & FLOW_DIS_IS_FRAGMENT);
+ u8 is_frag_idx = (is_frag_key << 1) | is_frag_mask;
+
+ u8 first_frag_key = !!(mt.key->flags & FLOW_DIS_FIRST_FRAG);
+ u8 first_frag_mask = !!(mt.mask->flags & FLOW_DIS_FIRST_FRAG);
+ u8 first_frag_idx = (first_frag_key << 1) | first_frag_mask;
+
+ /* Lookup verdict based on the 2 + 2 input bits */
+ u8 vdt = sparx5_vcap_frag_map[is_frag_idx][first_frag_idx];
+
+ if (vdt == FRAG_INVAL) {
+ NL_SET_ERR_MSG_MOD(st->fco->common.extack,
+ "Match on invalid fragment flag combination");
+ return -EINVAL;
}
+ /* Extract VCAP fragment key and mask from verdict */
+ value = (vdt >> 4) & 0x3;
+ mask = vdt & 0x3;
+
err = vcap_rule_add_key_u32(st->vrule,
VCAP_KF_L3_FRAGMENT_TYPE,
value, mask);
*alloc_size = mtu + MANA_RXBUF_PAD + *headroom;
- *datasize = ALIGN(mtu + ETH_HLEN, MANA_RX_DATA_ALIGN);
+ *datasize = mtu + ETH_HLEN;
}
static int mana_pre_alloc_rxbufs(struct mana_port_context *mpc, int new_mtu)
};
struct rtl8169_private;
+struct r8169_led_classdev;
void r8169_apply_firmware(struct rtl8169_private *tp);
u16 rtl8168h_2_get_adc_bias_ioffset(struct rtl8169_private *tp);
char *buf, int buf_len);
int rtl8168_get_led_mode(struct rtl8169_private *tp);
int rtl8168_led_mod_ctrl(struct rtl8169_private *tp, u16 mask, u16 val);
-void rtl8168_init_leds(struct net_device *ndev);
+struct r8169_led_classdev *rtl8168_init_leds(struct net_device *ndev);
int rtl8125_get_led_mode(struct rtl8169_private *tp, int index);
int rtl8125_set_led_mode(struct rtl8169_private *tp, int index, u16 mode);
-void rtl8125_init_leds(struct net_device *ndev);
+struct r8169_led_classdev *rtl8125_init_leds(struct net_device *ndev);
+void r8169_remove_leds(struct r8169_led_classdev *leds);
led_cdev->hw_control_get_device = r8169_led_hw_control_get_device;
/* ignore errors */
- devm_led_classdev_register(&ndev->dev, led_cdev);
+ led_classdev_register(&ndev->dev, led_cdev);
}
-void rtl8168_init_leds(struct net_device *ndev)
+struct r8169_led_classdev *rtl8168_init_leds(struct net_device *ndev)
{
- /* bind resource mgmt to netdev */
- struct device *dev = &ndev->dev;
struct r8169_led_classdev *leds;
int i;
- leds = devm_kcalloc(dev, RTL8168_NUM_LEDS, sizeof(*leds), GFP_KERNEL);
+ leds = kcalloc(RTL8168_NUM_LEDS + 1, sizeof(*leds), GFP_KERNEL);
if (!leds)
- return;
+ return NULL;
for (i = 0; i < RTL8168_NUM_LEDS; i++)
rtl8168_setup_ldev(leds + i, ndev, i);
+
+ return leds;
}
static int rtl8125_led_hw_control_is_supported(struct led_classdev *led_cdev,
led_cdev->hw_control_get_device = r8169_led_hw_control_get_device;
/* ignore errors */
- devm_led_classdev_register(&ndev->dev, led_cdev);
+ led_classdev_register(&ndev->dev, led_cdev);
}
-void rtl8125_init_leds(struct net_device *ndev)
+struct r8169_led_classdev *rtl8125_init_leds(struct net_device *ndev)
{
- /* bind resource mgmt to netdev */
- struct device *dev = &ndev->dev;
struct r8169_led_classdev *leds;
int i;
- leds = devm_kcalloc(dev, RTL8125_NUM_LEDS, sizeof(*leds), GFP_KERNEL);
+ leds = kcalloc(RTL8125_NUM_LEDS + 1, sizeof(*leds), GFP_KERNEL);
if (!leds)
- return;
+ return NULL;
for (i = 0; i < RTL8125_NUM_LEDS; i++)
rtl8125_setup_led_ldev(leds + i, ndev, i);
+
+ return leds;
+}
+
+void r8169_remove_leds(struct r8169_led_classdev *leds)
+{
+ if (!leds)
+ return;
+
+ for (struct r8169_led_classdev *l = leds; l->ndev; l++)
+ led_classdev_unregister(&l->led);
+
+ kfree(leds);
}
const char *fw_name;
struct rtl_fw *rtl_fw;
+ struct r8169_led_classdev *leds;
+
u32 ocp_base;
};
RTL_W8(tp, IBCR0, RTL_R8(tp, IBCR0) & ~0x01);
}
+static void rtl_dash_loop_wait(struct rtl8169_private *tp,
+ const struct rtl_cond *c,
+ unsigned long usecs, int n, bool high)
+{
+ if (!tp->dash_enabled)
+ return;
+ rtl_loop_wait(tp, c, usecs, n, high);
+}
+
+static void rtl_dash_loop_wait_high(struct rtl8169_private *tp,
+ const struct rtl_cond *c,
+ unsigned long d, int n)
+{
+ rtl_dash_loop_wait(tp, c, d, n, true);
+}
+
+static void rtl_dash_loop_wait_low(struct rtl8169_private *tp,
+ const struct rtl_cond *c,
+ unsigned long d, int n)
+{
+ rtl_dash_loop_wait(tp, c, d, n, false);
+}
+
static void rtl8168dp_driver_start(struct rtl8169_private *tp)
{
r8168dp_oob_notify(tp, OOB_CMD_DRIVER_START);
- rtl_loop_wait_high(tp, &rtl_dp_ocp_read_cond, 10000, 10);
+ rtl_dash_loop_wait_high(tp, &rtl_dp_ocp_read_cond, 10000, 10);
}
static void rtl8168ep_driver_start(struct rtl8169_private *tp)
{
r8168ep_ocp_write(tp, 0x01, 0x180, OOB_CMD_DRIVER_START);
r8168ep_ocp_write(tp, 0x01, 0x30, r8168ep_ocp_read(tp, 0x30) | 0x01);
- rtl_loop_wait_high(tp, &rtl_ep_ocp_read_cond, 10000, 30);
+ rtl_dash_loop_wait_high(tp, &rtl_ep_ocp_read_cond, 10000, 30);
}
static void rtl8168_driver_start(struct rtl8169_private *tp)
static void rtl8168dp_driver_stop(struct rtl8169_private *tp)
{
r8168dp_oob_notify(tp, OOB_CMD_DRIVER_STOP);
- rtl_loop_wait_low(tp, &rtl_dp_ocp_read_cond, 10000, 10);
+ rtl_dash_loop_wait_low(tp, &rtl_dp_ocp_read_cond, 10000, 10);
}
static void rtl8168ep_driver_stop(struct rtl8169_private *tp)
rtl8168ep_stop_cmac(tp);
r8168ep_ocp_write(tp, 0x01, 0x180, OOB_CMD_DRIVER_STOP);
r8168ep_ocp_write(tp, 0x01, 0x30, r8168ep_ocp_read(tp, 0x30) | 0x01);
- rtl_loop_wait_low(tp, &rtl_ep_ocp_read_cond, 10000, 10);
+ rtl_dash_loop_wait_low(tp, &rtl_ep_ocp_read_cond, 10000, 10);
}
static void rtl8168_driver_stop(struct rtl8169_private *tp)
cancel_work_sync(&tp->wk.work);
+ if (IS_ENABLED(CONFIG_R8169_LEDS))
+ r8169_remove_leds(tp->leds);
+
unregister_netdev(tp->dev);
if (tp->dash_type != RTL_DASH_NONE)
struct mii_bus *new_bus;
int ret;
+ /* On some boards with this chip version the BIOS is buggy and misses
+ * to reset the PHY page selector. This results in the PHY ID read
+ * accessing registers on a different page, returning a more or
+ * less random value. Fix this by resetting the page selector first.
+ */
+ if (tp->mac_version == RTL_GIGA_MAC_VER_25 ||
+ tp->mac_version == RTL_GIGA_MAC_VER_26)
+ r8169_mdio_write(tp, 0x1f, 0);
+
new_bus = devm_mdiobus_alloc(&pdev->dev);
if (!new_bus)
return -ENOMEM;
if (IS_ENABLED(CONFIG_R8169_LEDS)) {
if (rtl_is_8125(tp))
- rtl8125_init_leds(dev);
+ tp->leds = rtl8125_init_leds(dev);
else if (tp->mac_version > RTL_GIGA_MAC_VER_06)
- rtl8168_init_leds(dev);
+ tp->leds = rtl8168_init_leds(dev);
}
netdev_info(dev, "%s, %pM, XID %03x, IRQ %d\n",
dma_addr_t dma_addr;
int rx_packets = 0;
u8 desc_status;
- u16 pkt_len;
+ u16 desc_len;
u8 die_dt;
int entry;
int limit;
int i;
- entry = priv->cur_rx[q] % priv->num_rx_ring[q];
limit = priv->dirty_rx[q] + priv->num_rx_ring[q] - priv->cur_rx[q];
stats = &priv->stats[q];
- desc = &priv->rx_ring[q].desc[entry];
- for (i = 0; i < limit && rx_packets < *quota && desc->die_dt != DT_FEMPTY; i++) {
+ for (i = 0; i < limit; i++, priv->cur_rx[q]++) {
+ entry = priv->cur_rx[q] % priv->num_rx_ring[q];
+ desc = &priv->rx_ring[q].desc[entry];
+ if (rx_packets == *quota || desc->die_dt == DT_FEMPTY)
+ break;
+
/* Descriptor type must be checked before all other reads */
dma_rmb();
desc_status = desc->msc;
- pkt_len = le16_to_cpu(desc->ds_cc) & RX_DS;
+ desc_len = le16_to_cpu(desc->ds_cc) & RX_DS;
/* We use 0-byte descriptors to mark the DMA mapping errors */
- if (!pkt_len)
+ if (!desc_len)
continue;
if (desc_status & MSC_MC)
switch (die_dt) {
case DT_FSINGLE:
skb = ravb_get_skb_gbeth(ndev, entry, desc);
- skb_put(skb, pkt_len);
+ skb_put(skb, desc_len);
skb->protocol = eth_type_trans(skb, ndev);
if (ndev->features & NETIF_F_RXCSUM)
ravb_rx_csum_gbeth(skb);
napi_gro_receive(&priv->napi[q], skb);
rx_packets++;
- stats->rx_bytes += pkt_len;
+ stats->rx_bytes += desc_len;
break;
case DT_FSTART:
priv->rx_1st_skb = ravb_get_skb_gbeth(ndev, entry, desc);
- skb_put(priv->rx_1st_skb, pkt_len);
+ skb_put(priv->rx_1st_skb, desc_len);
break;
case DT_FMID:
skb = ravb_get_skb_gbeth(ndev, entry, desc);
skb_copy_to_linear_data_offset(priv->rx_1st_skb,
priv->rx_1st_skb->len,
skb->data,
- pkt_len);
- skb_put(priv->rx_1st_skb, pkt_len);
+ desc_len);
+ skb_put(priv->rx_1st_skb, desc_len);
dev_kfree_skb(skb);
break;
case DT_FEND:
skb_copy_to_linear_data_offset(priv->rx_1st_skb,
priv->rx_1st_skb->len,
skb->data,
- pkt_len);
- skb_put(priv->rx_1st_skb, pkt_len);
+ desc_len);
+ skb_put(priv->rx_1st_skb, desc_len);
dev_kfree_skb(skb);
priv->rx_1st_skb->protocol =
eth_type_trans(priv->rx_1st_skb, ndev);
if (ndev->features & NETIF_F_RXCSUM)
- ravb_rx_csum_gbeth(skb);
+ ravb_rx_csum_gbeth(priv->rx_1st_skb);
+ stats->rx_bytes += priv->rx_1st_skb->len;
napi_gro_receive(&priv->napi[q],
priv->rx_1st_skb);
rx_packets++;
- stats->rx_bytes += pkt_len;
break;
}
}
-
- entry = (++priv->cur_rx[q]) % priv->num_rx_ring[q];
- desc = &priv->rx_ring[q].desc[entry];
}
/* Refill the RX ring buffers. */
{
struct ravb_private *priv = netdev_priv(ndev);
const struct ravb_hw_info *info = priv->info;
- int entry = priv->cur_rx[q] % priv->num_rx_ring[q];
- int boguscnt = (priv->dirty_rx[q] + priv->num_rx_ring[q]) -
- priv->cur_rx[q];
struct net_device_stats *stats = &priv->stats[q];
struct ravb_ex_rx_desc *desc;
+ unsigned int limit, i;
struct sk_buff *skb;
dma_addr_t dma_addr;
struct timespec64 ts;
+ int rx_packets = 0;
u8 desc_status;
u16 pkt_len;
- int limit;
+ int entry;
+
+ limit = priv->dirty_rx[q] + priv->num_rx_ring[q] - priv->cur_rx[q];
+ for (i = 0; i < limit; i++, priv->cur_rx[q]++) {
+ entry = priv->cur_rx[q] % priv->num_rx_ring[q];
+ desc = &priv->rx_ring[q].ex_desc[entry];
+ if (rx_packets == *quota || desc->die_dt == DT_FEMPTY)
+ break;
- boguscnt = min(boguscnt, *quota);
- limit = boguscnt;
- desc = &priv->rx_ring[q].ex_desc[entry];
- while (desc->die_dt != DT_FEMPTY) {
/* Descriptor type must be checked before all other reads */
dma_rmb();
desc_status = desc->msc;
pkt_len = le16_to_cpu(desc->ds_cc) & RX_DS;
- if (--boguscnt < 0)
- break;
-
/* We use 0-byte descriptors to mark the DMA mapping errors */
if (!pkt_len)
continue;
if (ndev->features & NETIF_F_RXCSUM)
ravb_rx_csum(skb);
napi_gro_receive(&priv->napi[q], skb);
- stats->rx_packets++;
+ rx_packets++;
stats->rx_bytes += pkt_len;
}
-
- entry = (++priv->cur_rx[q]) % priv->num_rx_ring[q];
- desc = &priv->rx_ring[q].ex_desc[entry];
}
/* Refill the RX ring buffers. */
desc->die_dt = DT_FEMPTY;
}
- *quota -= limit - (++boguscnt);
-
- return boguscnt <= 0;
+ stats->rx_packets += rx_packets;
+ *quota -= rx_packets;
+ return *quota == 0;
}
/* Packet receive function for Ethernet AVB */
int q = napi - priv->napi;
int mask = BIT(q);
int quota = budget;
+ bool unmask;
/* Processing RX Descriptor Ring */
/* Clear RX interrupt */
ravb_write(ndev, ~(mask | RIS0_RESERVED), RIS0);
- if (ravb_rx(ndev, "a, q))
- goto out;
+ unmask = !ravb_rx(ndev, "a, q);
/* Processing TX Descriptor Ring */
spin_lock_irqsave(&priv->lock, flags);
netif_wake_subqueue(ndev, q);
spin_unlock_irqrestore(&priv->lock, flags);
+ /* Receive error message handling */
+ priv->rx_over_errors = priv->stats[RAVB_BE].rx_over_errors;
+ if (info->nc_queues)
+ priv->rx_over_errors += priv->stats[RAVB_NC].rx_over_errors;
+ if (priv->rx_over_errors != ndev->stats.rx_over_errors)
+ ndev->stats.rx_over_errors = priv->rx_over_errors;
+ if (priv->rx_fifo_errors != ndev->stats.rx_fifo_errors)
+ ndev->stats.rx_fifo_errors = priv->rx_fifo_errors;
+
+ if (!unmask)
+ goto out;
+
napi_complete(napi);
/* Re-enable RX/TX interrupts */
}
spin_unlock_irqrestore(&priv->lock, flags);
- /* Receive error message handling */
- priv->rx_over_errors = priv->stats[RAVB_BE].rx_over_errors;
- if (info->nc_queues)
- priv->rx_over_errors += priv->stats[RAVB_NC].rx_over_errors;
- if (priv->rx_over_errors != ndev->stats.rx_over_errors)
- ndev->stats.rx_over_errors = priv->rx_over_errors;
- if (priv->rx_fifo_errors != ndev->stats.rx_fifo_errors)
- ndev->stats.rx_fifo_errors = priv->rx_fifo_errors;
out:
return budget - quota;
}
struct platform_device *pdev = priv->pdev;
struct net_device *ndev = priv->ndev;
struct device *dev = &pdev->dev;
- const char *dev_name;
+ const char *devname = dev_name(dev);
unsigned long flags;
int error, irq_num;
if (irq_name) {
- dev_name = devm_kasprintf(dev, GFP_KERNEL, "%s:%s", ndev->name, ch);
- if (!dev_name)
+ devname = devm_kasprintf(dev, GFP_KERNEL, "%s:%s", devname, ch);
+ if (!devname)
return -ENOMEM;
irq_num = platform_get_irq_byname(pdev, irq_name);
flags = 0;
} else {
- dev_name = ndev->name;
irq_num = platform_get_irq(pdev, 0);
flags = IRQF_SHARED;
}
if (irq)
*irq = irq_num;
- error = devm_request_irq(dev, irq_num, handler, flags, dev_name, ndev);
+ error = devm_request_irq(dev, irq_num, handler, flags, devname, ndev);
if (error)
- netdev_err(ndev, "cannot request IRQ %s\n", dev_name);
+ netdev_err(ndev, "cannot request IRQ %s\n", devname);
return error;
}
* the macros available to do this only define GCC 8.
*/
__diag_push();
-__diag_ignore(GCC, 8, "-Woverride-init",
+__diag_ignore_all("-Woverride-init",
"logic to initialize all and then override some is OK");
static const u16 sh_eth_offset_gigabit[SH_ETH_MAX_REGISTER_OFFSET] = {
SH_ETH_OFFSET_DEFAULTS,
extern const struct stmmac_mode_ops dwmac4_ring_mode_ops;
struct mac_link {
+ u32 caps;
u32 speed_mask;
u32 speed10;
u32 speed100;
priv->dev->priv_flags |= IFF_UNICAST_FLT;
+ mac->link.caps = MAC_ASYM_PAUSE | MAC_SYM_PAUSE |
+ MAC_10 | MAC_100 | MAC_1000;
/* The loopback bit seems to be re-set when link change
* Simply mask it each time
* Speed 10/100/1000 are set in BIT(2)/BIT(3)
if (mac->multicast_filter_bins)
mac->mcast_bits_log2 = ilog2(mac->multicast_filter_bins);
+ mac->link.caps = MAC_ASYM_PAUSE | MAC_SYM_PAUSE |
+ MAC_10 | MAC_100 | MAC_1000;
mac->link.duplex = GMAC_CONTROL_DM;
mac->link.speed10 = GMAC_CONTROL_PS;
mac->link.speed100 = GMAC_CONTROL_PS | GMAC_CONTROL_FES;
dev_info(priv->device, "\tDWMAC100\n");
mac->pcsr = priv->ioaddr;
+ mac->link.caps = MAC_ASYM_PAUSE | MAC_SYM_PAUSE |
+ MAC_10 | MAC_100;
mac->link.duplex = MAC_CONTROL_F;
mac->link.speed10 = 0;
mac->link.speed100 = 0;
static void dwmac4_phylink_get_caps(struct stmmac_priv *priv)
{
- priv->phylink_config.mac_capabilities |= MAC_2500FD;
+ if (priv->plat->tx_queues_to_use > 1)
+ priv->hw->link.caps &= ~(MAC_10HD | MAC_100HD | MAC_1000HD);
+ else
+ priv->hw->link.caps |= (MAC_10HD | MAC_100HD | MAC_1000HD);
}
static void dwmac4_rx_queue_enable(struct mac_device_info *hw,
u32 prio, u32 queue)
{
void __iomem *ioaddr = hw->pcsr;
- u32 base_register;
- u32 value;
+ u32 clear_mask = 0;
+ u32 ctrl2, ctrl3;
+ int i;
- base_register = (queue < 4) ? GMAC_RXQ_CTRL2 : GMAC_RXQ_CTRL3;
- if (queue >= 4)
- queue -= 4;
+ ctrl2 = readl(ioaddr + GMAC_RXQ_CTRL2);
+ ctrl3 = readl(ioaddr + GMAC_RXQ_CTRL3);
- value = readl(ioaddr + base_register);
+ /* The software must ensure that the same priority
+ * is not mapped to multiple Rx queues
+ */
+ for (i = 0; i < 4; i++)
+ clear_mask |= ((prio << GMAC_RXQCTRL_PSRQX_SHIFT(i)) &
+ GMAC_RXQCTRL_PSRQX_MASK(i));
- value &= ~GMAC_RXQCTRL_PSRQX_MASK(queue);
- value |= (prio << GMAC_RXQCTRL_PSRQX_SHIFT(queue)) &
+ ctrl2 &= ~clear_mask;
+ ctrl3 &= ~clear_mask;
+
+ /* First assign new priorities to a queue, then
+ * clear them from others queues
+ */
+ if (queue < 4) {
+ ctrl2 |= (prio << GMAC_RXQCTRL_PSRQX_SHIFT(queue)) &
GMAC_RXQCTRL_PSRQX_MASK(queue);
- writel(value, ioaddr + base_register);
+
+ writel(ctrl2, ioaddr + GMAC_RXQ_CTRL2);
+ writel(ctrl3, ioaddr + GMAC_RXQ_CTRL3);
+ } else {
+ queue -= 4;
+
+ ctrl3 |= (prio << GMAC_RXQCTRL_PSRQX_SHIFT(queue)) &
+ GMAC_RXQCTRL_PSRQX_MASK(queue);
+
+ writel(ctrl3, ioaddr + GMAC_RXQ_CTRL3);
+ writel(ctrl2, ioaddr + GMAC_RXQ_CTRL2);
+ }
}
static void dwmac4_tx_queue_priority(struct mac_device_info *hw,
if (mac->multicast_filter_bins)
mac->mcast_bits_log2 = ilog2(mac->multicast_filter_bins);
+ mac->link.caps = MAC_ASYM_PAUSE | MAC_SYM_PAUSE |
+ MAC_10 | MAC_100 | MAC_1000 | MAC_2500FD;
mac->link.duplex = GMAC_CONFIG_DM;
mac->link.speed10 = GMAC_CONFIG_PS;
mac->link.speed100 = GMAC_CONFIG_FES | GMAC_CONFIG_PS;
writel(XGMAC_INT_DEFAULT_EN, ioaddr + XGMAC_INT_EN);
}
-static void xgmac_phylink_get_caps(struct stmmac_priv *priv)
-{
- priv->phylink_config.mac_capabilities |= MAC_2500FD | MAC_5000FD |
- MAC_10000FD | MAC_25000FD |
- MAC_40000FD | MAC_50000FD |
- MAC_100000FD;
-}
-
static void dwxgmac2_set_mac(void __iomem *ioaddr, bool enable)
{
u32 tx = readl(ioaddr + XGMAC_TX_CONFIG);
u32 queue)
{
void __iomem *ioaddr = hw->pcsr;
- u32 value, reg;
+ u32 clear_mask = 0;
+ u32 ctrl2, ctrl3;
+ int i;
- reg = (queue < 4) ? XGMAC_RXQ_CTRL2 : XGMAC_RXQ_CTRL3;
- if (queue >= 4)
+ ctrl2 = readl(ioaddr + XGMAC_RXQ_CTRL2);
+ ctrl3 = readl(ioaddr + XGMAC_RXQ_CTRL3);
+
+ /* The software must ensure that the same priority
+ * is not mapped to multiple Rx queues
+ */
+ for (i = 0; i < 4; i++)
+ clear_mask |= ((prio << XGMAC_PSRQ_SHIFT(i)) &
+ XGMAC_PSRQ(i));
+
+ ctrl2 &= ~clear_mask;
+ ctrl3 &= ~clear_mask;
+
+ /* First assign new priorities to a queue, then
+ * clear them from others queues
+ */
+ if (queue < 4) {
+ ctrl2 |= (prio << XGMAC_PSRQ_SHIFT(queue)) &
+ XGMAC_PSRQ(queue);
+
+ writel(ctrl2, ioaddr + XGMAC_RXQ_CTRL2);
+ writel(ctrl3, ioaddr + XGMAC_RXQ_CTRL3);
+ } else {
queue -= 4;
- value = readl(ioaddr + reg);
- value &= ~XGMAC_PSRQ(queue);
- value |= (prio << XGMAC_PSRQ_SHIFT(queue)) & XGMAC_PSRQ(queue);
+ ctrl3 |= (prio << XGMAC_PSRQ_SHIFT(queue)) &
+ XGMAC_PSRQ(queue);
- writel(value, ioaddr + reg);
+ writel(ctrl3, ioaddr + XGMAC_RXQ_CTRL3);
+ writel(ctrl2, ioaddr + XGMAC_RXQ_CTRL2);
+ }
}
static void dwxgmac2_tx_queue_prio(struct mac_device_info *hw, u32 prio,
const struct stmmac_ops dwxgmac210_ops = {
.core_init = dwxgmac2_core_init,
- .phylink_get_caps = xgmac_phylink_get_caps,
.set_mac = dwxgmac2_set_mac,
.rx_ipc = dwxgmac2_rx_ipc,
.rx_queue_enable = dwxgmac2_rx_queue_enable,
const struct stmmac_ops dwxlgmac2_ops = {
.core_init = dwxgmac2_core_init,
- .phylink_get_caps = xgmac_phylink_get_caps,
.set_mac = dwxgmac2_set_mac,
.rx_ipc = dwxgmac2_rx_ipc,
.rx_queue_enable = dwxlgmac2_rx_queue_enable,
if (mac->multicast_filter_bins)
mac->mcast_bits_log2 = ilog2(mac->multicast_filter_bins);
+ mac->link.caps = MAC_ASYM_PAUSE | MAC_SYM_PAUSE |
+ MAC_1000FD | MAC_2500FD | MAC_5000FD |
+ MAC_10000FD;
mac->link.duplex = 0;
mac->link.speed10 = XGMAC_CONFIG_SS_10_MII;
mac->link.speed100 = XGMAC_CONFIG_SS_100_MII;
if (mac->multicast_filter_bins)
mac->mcast_bits_log2 = ilog2(mac->multicast_filter_bins);
+ mac->link.caps = MAC_ASYM_PAUSE | MAC_SYM_PAUSE |
+ MAC_1000FD | MAC_2500FD | MAC_5000FD |
+ MAC_10000FD | MAC_25000FD |
+ MAC_40000FD | MAC_50000FD |
+ MAC_100000FD;
mac->link.duplex = 0;
mac->link.speed1000 = XLGMAC_CONFIG_SS_1000;
mac->link.speed2500 = XLGMAC_CONFIG_SS_2500;
unsigned int mmc_tx_excessdef;
unsigned int mmc_tx_pause_frame;
unsigned int mmc_tx_vlan_frame_g;
+ unsigned int mmc_tx_oversize_g;
unsigned int mmc_tx_lpi_usec;
unsigned int mmc_tx_lpi_tran;
unsigned int mmc_rx_fifo_overflow;
unsigned int mmc_rx_vlan_frames_gb;
unsigned int mmc_rx_watchdog_error;
+ unsigned int mmc_rx_error;
unsigned int mmc_rx_lpi_usec;
unsigned int mmc_rx_lpi_tran;
unsigned int mmc_rx_discard_frames_gb;
#define MMC_TX_EXCESSDEF 0x6c
#define MMC_TX_PAUSE_FRAME 0x70
#define MMC_TX_VLAN_FRAME_G 0x74
+#define MMC_TX_OVERSIZE_G 0x78
/* MMC RX counter registers */
#define MMC_RX_FRAMECOUNT_GB 0x80
#define MMC_RX_FIFO_OVERFLOW 0xd4
#define MMC_RX_VLAN_FRAMES_GB 0xd8
#define MMC_RX_WATCHDOG_ERROR 0xdc
+#define MMC_RX_ERROR 0xe0
+
+#define MMC_TX_LPI_USEC 0xec
+#define MMC_TX_LPI_TRAN 0xf0
+#define MMC_RX_LPI_USEC 0xf4
+#define MMC_RX_LPI_TRAN 0xf8
+
/* IPC*/
#define MMC_RX_IPC_INTR_MASK 0x100
#define MMC_RX_IPC_INTR 0x108
mmc->mmc_tx_excessdef += readl(mmcaddr + MMC_TX_EXCESSDEF);
mmc->mmc_tx_pause_frame += readl(mmcaddr + MMC_TX_PAUSE_FRAME);
mmc->mmc_tx_vlan_frame_g += readl(mmcaddr + MMC_TX_VLAN_FRAME_G);
+ mmc->mmc_tx_oversize_g += readl(mmcaddr + MMC_TX_OVERSIZE_G);
+ mmc->mmc_tx_lpi_usec += readl(mmcaddr + MMC_TX_LPI_USEC);
+ mmc->mmc_tx_lpi_tran += readl(mmcaddr + MMC_TX_LPI_TRAN);
/* MMC RX counter registers */
mmc->mmc_rx_framecount_gb += readl(mmcaddr + MMC_RX_FRAMECOUNT_GB);
mmc->mmc_rx_fifo_overflow += readl(mmcaddr + MMC_RX_FIFO_OVERFLOW);
mmc->mmc_rx_vlan_frames_gb += readl(mmcaddr + MMC_RX_VLAN_FRAMES_GB);
mmc->mmc_rx_watchdog_error += readl(mmcaddr + MMC_RX_WATCHDOG_ERROR);
+ mmc->mmc_rx_error += readl(mmcaddr + MMC_RX_ERROR);
+ mmc->mmc_rx_lpi_usec += readl(mmcaddr + MMC_RX_LPI_USEC);
+ mmc->mmc_rx_lpi_tran += readl(mmcaddr + MMC_RX_LPI_TRAN);
+
/* IPv4 */
mmc->mmc_rx_ipv4_gd += readl(mmcaddr + MMC_RX_IPV4_GD);
mmc->mmc_rx_ipv4_hderr += readl(mmcaddr + MMC_RX_IPV4_HDERR);
STMMAC_MMC_STAT(mmc_tx_excessdef),
STMMAC_MMC_STAT(mmc_tx_pause_frame),
STMMAC_MMC_STAT(mmc_tx_vlan_frame_g),
+ STMMAC_MMC_STAT(mmc_tx_oversize_g),
STMMAC_MMC_STAT(mmc_tx_lpi_usec),
STMMAC_MMC_STAT(mmc_tx_lpi_tran),
STMMAC_MMC_STAT(mmc_rx_framecount_gb),
STMMAC_MMC_STAT(mmc_rx_fifo_overflow),
STMMAC_MMC_STAT(mmc_rx_vlan_frames_gb),
STMMAC_MMC_STAT(mmc_rx_watchdog_error),
+ STMMAC_MMC_STAT(mmc_rx_error),
STMMAC_MMC_STAT(mmc_rx_lpi_usec),
STMMAC_MMC_STAT(mmc_rx_lpi_tran),
STMMAC_MMC_STAT(mmc_rx_discard_frames_gb),
return ret;
}
-static void stmmac_set_half_duplex(struct stmmac_priv *priv)
-{
- /* Half-Duplex can only work with single tx queue */
- if (priv->plat->tx_queues_to_use > 1)
- priv->phylink_config.mac_capabilities &=
- ~(MAC_10HD | MAC_100HD | MAC_1000HD);
- else
- priv->phylink_config.mac_capabilities |=
- (MAC_10HD | MAC_100HD | MAC_1000HD);
-}
-
static int stmmac_phy_setup(struct stmmac_priv *priv)
{
struct stmmac_mdio_bus_data *mdio_bus_data;
xpcs_get_interfaces(priv->hw->xpcs,
priv->phylink_config.supported_interfaces);
- priv->phylink_config.mac_capabilities = MAC_ASYM_PAUSE | MAC_SYM_PAUSE |
- MAC_10FD | MAC_100FD |
- MAC_1000FD;
-
- stmmac_set_half_duplex(priv);
-
/* Get the MAC specific capabilities */
stmmac_mac_phylink_get_caps(priv);
+ priv->phylink_config.mac_capabilities = priv->hw->link.caps;
+
max_speed = priv->plat->max_speed;
if (max_speed)
phylink_limit_mac_speed(&priv->phylink_config, max_speed);
{
struct stmmac_priv *priv = netdev_priv(dev);
int ret = 0, i;
+ int max_speed;
if (netif_running(dev))
stmmac_release(dev);
priv->rss.table[i] = ethtool_rxfh_indir_default(i,
rx_cnt);
- stmmac_set_half_duplex(priv);
+ stmmac_mac_phylink_get_caps(priv);
+
+ priv->phylink_config.mac_capabilities = priv->hw->link.caps;
+
+ max_speed = priv->plat->max_speed;
+ if (max_speed)
+ phylink_limit_mac_speed(&priv->phylink_config, max_speed);
+
stmmac_napi_add(dev);
if (netif_running(dev))
static int am65_cpsw_nuss_register_ndevs(struct am65_cpsw_common *common)
{
+ struct am65_cpsw_rx_chn *rx_chan = &common->rx_chns;
+ struct am65_cpsw_tx_chn *tx_chan = common->tx_chns;
struct device *dev = common->dev;
struct am65_cpsw_port *port;
int ret = 0, i;
if (ret)
return ret;
+ /* The DMA Channels are not guaranteed to be in a clean state.
+ * Reset and disable them to ensure that they are back to the
+ * clean state and ready to be used.
+ */
+ for (i = 0; i < common->tx_ch_num; i++) {
+ k3_udma_glue_reset_tx_chn(tx_chan[i].tx_chn, &tx_chan[i],
+ am65_cpsw_nuss_tx_cleanup);
+ k3_udma_glue_disable_tx_chn(tx_chan[i].tx_chn);
+ }
+
+ for (i = 0; i < AM65_CPSW_MAX_RX_FLOWS; i++)
+ k3_udma_glue_reset_rx_chn(rx_chan->rx_chn, i, rx_chan,
+ am65_cpsw_nuss_rx_cleanup, !!i);
+
+ k3_udma_glue_disable_rx_chn(rx_chan->rx_chn);
+
ret = am65_cpsw_nuss_register_devlink(common);
if (ret)
return ret;
struct am65_cpts_skb_cb_data *skb_cb =
(struct am65_cpts_skb_cb_data *)skb->cb;
+ if ((ptp_classify_raw(skb) & PTP_CLASS_V1) &&
+ ((mtype_seqid & AM65_CPTS_EVENT_1_SEQUENCE_ID_MASK) ==
+ (skb_cb->skb_mtype_seqid & AM65_CPTS_EVENT_1_SEQUENCE_ID_MASK)))
+ mtype_seqid = skb_cb->skb_mtype_seqid;
+
if (mtype_seqid == skb_cb->skb_mtype_seqid) {
u64 ns = event->timestamp;
if (!i)
fdqring_id = k3_udma_glue_rx_flow_get_fdq_id(rx_chn->rx_chn,
i);
- rx_chn->irq[i] = k3_udma_glue_rx_get_irq(rx_chn->rx_chn, i);
- if (rx_chn->irq[i] <= 0) {
- ret = rx_chn->irq[i];
+ ret = k3_udma_glue_rx_get_irq(rx_chn->rx_chn, i);
+ if (ret <= 0) {
+ if (!ret)
+ ret = -ENXIO;
netdev_err(ndev, "Failed to get rx dma irq");
goto fail;
}
+ rx_chn->irq[i] = ret;
}
return 0;
*/
static int wx_acquire_msix_vectors(struct wx *wx)
{
- struct irq_affinity affd = {0, };
+ struct irq_affinity affd = { .pre_vectors = 1 };
int nvecs, i;
/* We start by asking for one vector per queue pair */
char clk_name[32];
struct clk *clk;
- snprintf(clk_name, sizeof(clk_name), "i2c_dw.%d",
+ snprintf(clk_name, sizeof(clk_name), "i2c_designware.%d",
pci_dev_id(pdev));
clk = clk_register_fixed_rate(NULL, clk_name, NULL, 0, 156250000);
}
/* map device registers */
- lp->regs = devm_platform_ioremap_resource_byname(pdev, 0);
+ lp->regs = devm_platform_ioremap_resource(pdev, 0);
if (IS_ERR(lp->regs)) {
dev_err(&pdev->dev, "could not map TEMAC registers\n");
return -ENOMEM;
__be16 sport;
int err;
- if (!pskb_inet_may_pull(skb))
+ if (!skb_vlan_inet_prepare(skb))
return -EINVAL;
if (!gs4)
__be16 sport;
int err;
- if (!pskb_inet_may_pull(skb))
+ if (!skb_vlan_inet_prepare(skb))
return -EINVAL;
if (!gs6)
static void gtp_dellink(struct net_device *dev, struct list_head *head)
{
struct gtp_dev *gtp = netdev_priv(dev);
+ struct hlist_node *next;
struct pdp_ctx *pctx;
int i;
for (i = 0; i < gtp->hash_size; i++)
- hlist_for_each_entry_rcu(pctx, >p->tid_hash[i], hlist_tid)
+ hlist_for_each_entry_safe(pctx, next, >p->tid_hash[i], hlist_tid)
pdp_context_delete(pctx);
list_del_rcu(>p->list);
int i;
kfree(nvdev->extension);
- vfree(nvdev->recv_buf);
- vfree(nvdev->send_buf);
+
+ if (!nvdev->recv_buf_gpadl_handle.decrypted)
+ vfree(nvdev->recv_buf);
+ if (!nvdev->send_buf_gpadl_handle.decrypted)
+ vfree(nvdev->send_buf);
bitmap_free(nvdev->send_section_map);
for (i = 0; i < VRSS_CHANNEL_MAX; i++) {
struct metadata_dst *md_dst;
struct macsec_rxh_data *rxd;
struct macsec_dev *macsec;
+ bool is_macsec_md_dst;
rcu_read_lock();
rxd = macsec_data_rcu(skb->dev);
md_dst = skb_metadata_dst(skb);
+ is_macsec_md_dst = md_dst && md_dst->type == METADATA_MACSEC;
list_for_each_entry_rcu(macsec, &rxd->secys, secys) {
struct sk_buff *nskb;
* the SecTAG, so we have to deduce which port to deliver to.
*/
if (macsec_is_offloaded(macsec) && netif_running(ndev)) {
- struct macsec_rx_sc *rx_sc = NULL;
+ const struct macsec_ops *ops;
- if (md_dst && md_dst->type == METADATA_MACSEC)
- rx_sc = find_rx_sc(&macsec->secy, md_dst->u.macsec_info.sci);
+ ops = macsec_get_ops(macsec, NULL);
- if (md_dst && md_dst->type == METADATA_MACSEC && !rx_sc)
+ if (ops->rx_uses_md_dst && !is_macsec_md_dst)
continue;
+ if (is_macsec_md_dst) {
+ struct macsec_rx_sc *rx_sc;
+
+ /* All drivers that implement MACsec offload
+ * support using skb metadata destinations must
+ * indicate that they do so.
+ */
+ DEBUG_NET_WARN_ON_ONCE(!ops->rx_uses_md_dst);
+ rx_sc = find_rx_sc(&macsec->secy,
+ md_dst->u.macsec_info.sci);
+ if (!rx_sc)
+ continue;
+ /* device indicated macsec offload occurred */
+ skb->dev = ndev;
+ skb->pkt_type = PACKET_HOST;
+ eth_skb_pkt_type(skb, ndev);
+ ret = RX_HANDLER_ANOTHER;
+ goto out;
+ }
+
+ /* This datapath is insecure because it is unable to
+ * enforce isolation of broadcast/multicast traffic and
+ * unicast traffic with promiscuous mode on the macsec
+ * netdev. Since the core stack has no mechanism to
+ * check that the hardware did indeed receive MACsec
+ * traffic, it is possible that the response handling
+ * done by the MACsec port was to a plaintext packet.
+ * This violates the MACsec protocol standard.
+ */
if (ether_addr_equal_64bits(hdr->h_dest,
ndev->dev_addr)) {
/* exact match, divert skb to this port */
break;
nskb->dev = ndev;
- if (ether_addr_equal_64bits(hdr->h_dest,
- ndev->broadcast))
- nskb->pkt_type = PACKET_BROADCAST;
- else
- nskb->pkt_type = PACKET_MULTICAST;
+ eth_skb_pkt_type(nskb, ndev);
__netif_rx(nskb);
- } else if (rx_sc || ndev->flags & IFF_PROMISC) {
+ } else if (ndev->flags & IFF_PROMISC) {
skb->dev = ndev;
skb->pkt_type = PACKET_HOST;
ret = RX_HANDLER_ANOTHER;
phy_ctrl_val = dp83869->mode;
if (phydev->interface == PHY_INTERFACE_MODE_MII) {
if (dp83869->mode == DP83869_100M_MEDIA_CONVERT ||
- dp83869->mode == DP83869_RGMII_100_BASE) {
+ dp83869->mode == DP83869_RGMII_100_BASE ||
+ dp83869->mode == DP83869_RGMII_COPPER_ETHERNET) {
phy_ctrl_val |= DP83869_OP_MODE_MII;
} else {
phydev_err(phydev, "selected op-mode is not valid with MII mode\n");
#define MTK_PHY_LED_ON_LINK1000 BIT(0)
#define MTK_PHY_LED_ON_LINK100 BIT(1)
#define MTK_PHY_LED_ON_LINK10 BIT(2)
+#define MTK_PHY_LED_ON_LINK (MTK_PHY_LED_ON_LINK10 |\
+ MTK_PHY_LED_ON_LINK100 |\
+ MTK_PHY_LED_ON_LINK1000)
#define MTK_PHY_LED_ON_LINKDOWN BIT(3)
#define MTK_PHY_LED_ON_FDX BIT(4) /* Full duplex */
#define MTK_PHY_LED_ON_HDX BIT(5) /* Half duplex */
#define MTK_PHY_LED_BLINK_100RX BIT(3)
#define MTK_PHY_LED_BLINK_10TX BIT(4)
#define MTK_PHY_LED_BLINK_10RX BIT(5)
+#define MTK_PHY_LED_BLINK_RX (MTK_PHY_LED_BLINK_10RX |\
+ MTK_PHY_LED_BLINK_100RX |\
+ MTK_PHY_LED_BLINK_1000RX)
+#define MTK_PHY_LED_BLINK_TX (MTK_PHY_LED_BLINK_10TX |\
+ MTK_PHY_LED_BLINK_100TX |\
+ MTK_PHY_LED_BLINK_1000TX)
#define MTK_PHY_LED_BLINK_COLLISION BIT(6)
#define MTK_PHY_LED_BLINK_RX_CRC_ERR BIT(7)
#define MTK_PHY_LED_BLINK_RX_IDLE_ERR BIT(8)
if (blink < 0)
return -EIO;
- if ((on & (MTK_PHY_LED_ON_LINK1000 | MTK_PHY_LED_ON_LINK100 |
- MTK_PHY_LED_ON_LINK10)) ||
- (blink & (MTK_PHY_LED_BLINK_1000RX | MTK_PHY_LED_BLINK_100RX |
- MTK_PHY_LED_BLINK_10RX | MTK_PHY_LED_BLINK_1000TX |
- MTK_PHY_LED_BLINK_100TX | MTK_PHY_LED_BLINK_10TX)))
+ if ((on & (MTK_PHY_LED_ON_LINK | MTK_PHY_LED_ON_FDX | MTK_PHY_LED_ON_HDX |
+ MTK_PHY_LED_ON_LINKDOWN)) ||
+ (blink & (MTK_PHY_LED_BLINK_RX | MTK_PHY_LED_BLINK_TX)))
set_bit(bit_netdev, &priv->led_state);
else
clear_bit(bit_netdev, &priv->led_state);
if (!rules)
return 0;
- if (on & (MTK_PHY_LED_ON_LINK1000 | MTK_PHY_LED_ON_LINK100 | MTK_PHY_LED_ON_LINK10))
+ if (on & MTK_PHY_LED_ON_LINK)
*rules |= BIT(TRIGGER_NETDEV_LINK);
if (on & MTK_PHY_LED_ON_LINK10)
if (on & MTK_PHY_LED_ON_HDX)
*rules |= BIT(TRIGGER_NETDEV_HALF_DUPLEX);
- if (blink & (MTK_PHY_LED_BLINK_1000RX | MTK_PHY_LED_BLINK_100RX | MTK_PHY_LED_BLINK_10RX))
+ if (blink & MTK_PHY_LED_BLINK_RX)
*rules |= BIT(TRIGGER_NETDEV_RX);
- if (blink & (MTK_PHY_LED_BLINK_1000TX | MTK_PHY_LED_BLINK_100TX | MTK_PHY_LED_BLINK_10TX))
+ if (blink & MTK_PHY_LED_BLINK_TX)
*rules |= BIT(TRIGGER_NETDEV_TX);
return 0;
on |= MTK_PHY_LED_ON_LINK1000;
if (rules & BIT(TRIGGER_NETDEV_RX)) {
- blink |= MTK_PHY_LED_BLINK_10RX |
- MTK_PHY_LED_BLINK_100RX |
- MTK_PHY_LED_BLINK_1000RX;
+ blink |= (on & MTK_PHY_LED_ON_LINK) ?
+ (((on & MTK_PHY_LED_ON_LINK10) ? MTK_PHY_LED_BLINK_10RX : 0) |
+ ((on & MTK_PHY_LED_ON_LINK100) ? MTK_PHY_LED_BLINK_100RX : 0) |
+ ((on & MTK_PHY_LED_ON_LINK1000) ? MTK_PHY_LED_BLINK_1000RX : 0)) :
+ MTK_PHY_LED_BLINK_RX;
}
if (rules & BIT(TRIGGER_NETDEV_TX)) {
- blink |= MTK_PHY_LED_BLINK_10TX |
- MTK_PHY_LED_BLINK_100TX |
- MTK_PHY_LED_BLINK_1000TX;
+ blink |= (on & MTK_PHY_LED_ON_LINK) ?
+ (((on & MTK_PHY_LED_ON_LINK10) ? MTK_PHY_LED_BLINK_10TX : 0) |
+ ((on & MTK_PHY_LED_ON_LINK100) ? MTK_PHY_LED_BLINK_100TX : 0) |
+ ((on & MTK_PHY_LED_ON_LINK1000) ? MTK_PHY_LED_BLINK_1000TX : 0)) :
+ MTK_PHY_LED_BLINK_TX;
}
if (blink || on)
MTK_PHY_LED0_ON_CTRL,
MTK_PHY_LED_ON_FDX |
MTK_PHY_LED_ON_HDX |
- MTK_PHY_LED_ON_LINK10 |
- MTK_PHY_LED_ON_LINK100 |
- MTK_PHY_LED_ON_LINK1000,
+ MTK_PHY_LED_ON_LINK,
on);
if (ret)
struct lan8814_ptp_rx_ts *rx_ts, *tmp;
int txcfg = 0, rxcfg = 0;
int pkt_ts_enable;
+ int tx_mod;
ptp_priv->hwts_tx_type = config->tx_type;
ptp_priv->rx_filter = config->rx_filter;
lanphy_write_page_reg(ptp_priv->phydev, 5, PTP_RX_TIMESTAMP_EN, pkt_ts_enable);
lanphy_write_page_reg(ptp_priv->phydev, 5, PTP_TX_TIMESTAMP_EN, pkt_ts_enable);
- if (ptp_priv->hwts_tx_type == HWTSTAMP_TX_ONESTEP_SYNC)
+ tx_mod = lanphy_read_page_reg(ptp_priv->phydev, 5, PTP_TX_MOD);
+ if (ptp_priv->hwts_tx_type == HWTSTAMP_TX_ONESTEP_SYNC) {
lanphy_write_page_reg(ptp_priv->phydev, 5, PTP_TX_MOD,
- PTP_TX_MOD_TX_PTP_SYNC_TS_INSERT_);
+ tx_mod | PTP_TX_MOD_TX_PTP_SYNC_TS_INSERT_);
+ } else if (ptp_priv->hwts_tx_type == HWTSTAMP_TX_ON) {
+ lanphy_write_page_reg(ptp_priv->phydev, 5, PTP_TX_MOD,
+ tx_mod & ~PTP_TX_MOD_TX_PTP_SYNC_TS_INSERT_);
+ }
if (config->rx_filter != HWTSTAMP_FILTER_NONE)
lan8814_config_ts_intr(ptp_priv->phydev, true);
}
}
-static void lan8814_get_sig_rx(struct sk_buff *skb, u16 *sig)
+static bool lan8814_get_sig_rx(struct sk_buff *skb, u16 *sig)
{
struct ptp_header *ptp_header;
u32 type;
ptp_header = ptp_parse_header(skb, type);
skb_pull_inline(skb, ETH_HLEN);
+ if (!ptp_header)
+ return false;
+
*sig = (__force u16)(ntohs(ptp_header->sequence_id));
+ return true;
}
static bool lan8814_match_rx_skb(struct kszphy_ptp_priv *ptp_priv,
bool ret = false;
u16 skb_sig;
- lan8814_get_sig_rx(skb, &skb_sig);
+ if (!lan8814_get_sig_rx(skb, &skb_sig))
+ return ret;
/* Iterate over all RX timestamps and match it with the received skbs */
spin_lock_irqsave(&ptp_priv->rx_ts_lock, flags);
return 0;
}
-static void lan8814_get_sig_tx(struct sk_buff *skb, u16 *sig)
+static bool lan8814_get_sig_tx(struct sk_buff *skb, u16 *sig)
{
struct ptp_header *ptp_header;
u32 type;
type = ptp_classify_raw(skb);
ptp_header = ptp_parse_header(skb, type);
+ if (!ptp_header)
+ return false;
+
*sig = (__force u16)(ntohs(ptp_header->sequence_id));
+ return true;
}
static void lan8814_match_tx_skb(struct kszphy_ptp_priv *ptp_priv,
spin_lock_irqsave(&ptp_priv->tx_queue.lock, flags);
skb_queue_walk_safe(&ptp_priv->tx_queue, skb, skb_tmp) {
- lan8814_get_sig_tx(skb, &skb_sig);
+ if (!lan8814_get_sig_tx(skb, &skb_sig))
+ continue;
if (memcmp(&skb_sig, &seq_id, sizeof(seq_id)))
continue;
spin_lock_irqsave(&ptp_priv->rx_queue.lock, flags);
skb_queue_walk_safe(&ptp_priv->rx_queue, skb, skb_tmp) {
- lan8814_get_sig_rx(skb, &skb_sig);
+ if (!lan8814_get_sig_rx(skb, &skb_sig))
+ continue;
if (memcmp(&skb_sig, &rx_ts->seq_id, sizeof(rx_ts->seq_id)))
continue;
static int at8031_probe(struct phy_device *phydev)
{
- struct at803x_priv *priv = phydev->priv;
+ struct at803x_priv *priv;
int mode_cfg;
int ccr;
int ret;
if (ret)
return ret;
+ priv = phydev->priv;
+
/* Only supported on AR8031/AR8033, the AR8030/AR8035 use strapping
* options.
*/
tun_is_little_endian(tun), true,
vlan_hlen)) {
struct skb_shared_info *sinfo = skb_shinfo(skb);
- pr_err("unexpected GSO type: "
- "0x%x, gso_size %d, hdr_len %d\n",
- sinfo->gso_type, tun16_to_cpu(tun, gso.gso_size),
- tun16_to_cpu(tun, gso.hdr_len));
- print_hex_dump(KERN_ERR, "tun: ",
- DUMP_PREFIX_NONE,
- 16, 1, skb->head,
- min((int)tun16_to_cpu(tun, gso.hdr_len), 64), true);
+
+ if (net_ratelimit()) {
+ netdev_err(tun->dev, "unexpected GSO type: 0x%x, gso_size %d, hdr_len %d\n",
+ sinfo->gso_type, tun16_to_cpu(tun, gso.gso_size),
+ tun16_to_cpu(tun, gso.hdr_len));
+ print_hex_dump(KERN_ERR, "tun: ",
+ DUMP_PREFIX_NONE,
+ 16, 1, skb->head,
+ min((int)tun16_to_cpu(tun, gso.hdr_len), 64), true);
+ }
WARN_ON_ONCE(1);
return -EINVAL;
}
if (is_valid_ether_addr(mac)) {
eth_hw_addr_set(dev->net, mac);
+ if (!is_local_ether_addr(mac))
+ dev->net->addr_assign_type = NET_ADDR_PERM;
} else {
netdev_info(dev->net, "invalid MAC address, using random\n");
eth_hw_addr_random(dev->net);
netif_set_tso_max_size(dev->net, 16384);
+ ax88179_reset(dev);
+
return 0;
}
/* Skip IP alignment pseudo header */
skb_pull(skb, 2);
- skb->truesize = SKB_TRUESIZE(pkt_len_plus_padd);
ax88179_rx_checksum(skb, pkt_hdr);
return 1;
}
- ax_skb = skb_clone(skb, GFP_ATOMIC);
+ ax_skb = netdev_alloc_skb_ip_align(dev->net, pkt_len);
if (!ax_skb)
return 0;
- skb_trim(ax_skb, pkt_len);
-
- /* Skip IP alignment pseudo header */
- skb_pull(ax_skb, 2);
+ skb_put(ax_skb, pkt_len);
+ memcpy(ax_skb->data, skb->data + 2, pkt_len);
- skb->truesize = pkt_len_plus_padd +
- SKB_DATA_ALIGN(sizeof(struct sk_buff));
ax88179_rx_checksum(ax_skb, pkt_hdr);
usbnet_skb_return(dev, ax_skb);
.unbind = ax88179_unbind,
.status = ax88179_status,
.link_reset = ax88179_link_reset,
- .reset = ax88179_reset,
.stop = ax88179_stop,
.flags = FLAG_ETHER | FLAG_FRAMING_AX,
.rx_fixup = ax88179_rx_fixup,
.unbind = ax88179_unbind,
.status = ax88179_status,
.link_reset = ax88179_link_reset,
- .reset = ax88179_reset,
.stop = ax88179_stop,
.flags = FLAG_ETHER | FLAG_FRAMING_AX,
.rx_fixup = ax88179_rx_fixup,
{QMI_QUIRK_SET_DTR(0x1bc7, 0x1060, 2)}, /* Telit LN920 */
{QMI_QUIRK_SET_DTR(0x1bc7, 0x1070, 2)}, /* Telit FN990 */
{QMI_QUIRK_SET_DTR(0x1bc7, 0x1080, 2)}, /* Telit FE990 */
+ {QMI_QUIRK_SET_DTR(0x1bc7, 0x10a0, 0)}, /* Telit FN920C04 */
+ {QMI_QUIRK_SET_DTR(0x1bc7, 0x10a4, 0)}, /* Telit FN920C04 */
+ {QMI_QUIRK_SET_DTR(0x1bc7, 0x10a9, 0)}, /* Telit FN920C04 */
{QMI_FIXED_INTF(0x1bc7, 0x1100, 3)}, /* Telit ME910 */
{QMI_FIXED_INTF(0x1bc7, 0x1101, 3)}, /* Telit ME910 dual modem */
{QMI_FIXED_INTF(0x1bc7, 0x1200, 5)}, /* Telit LE920 */
{QMI_FIXED_INTF(0x2692, 0x9025, 4)}, /* Cellient MPL200 (rebranded Qualcomm 05c6:9025) */
{QMI_QUIRK_SET_DTR(0x1546, 0x1312, 4)}, /* u-blox LARA-R6 01B */
{QMI_QUIRK_SET_DTR(0x1546, 0x1342, 4)}, /* u-blox LARA-L6 */
+ {QMI_QUIRK_SET_DTR(0x33f8, 0x0104, 4)}, /* Rolling RW101 RMNET */
/* 4. Gobi 1000 devices */
{QMI_GOBI1K_DEVICE(0x05c6, 0x9212)}, /* Acer Gobi Modem Device */
struct netlink_ext_ack *extack)
{
struct virtnet_info *vi = netdev_priv(dev);
+ bool update = false;
int i;
if (rxfh->hfunc != ETH_RSS_HASH_NO_CHANGE &&
return -EOPNOTSUPP;
if (rxfh->indir) {
+ if (!vi->has_rss)
+ return -EOPNOTSUPP;
+
for (i = 0; i < vi->rss_indir_table_size; ++i)
vi->ctrl->rss.indirection_table[i] = rxfh->indir[i];
+ update = true;
}
- if (rxfh->key)
+
+ if (rxfh->key) {
+ /* If either _F_HASH_REPORT or _F_RSS are negotiated, the
+ * device provides hash calculation capabilities, that is,
+ * hash_key is configured.
+ */
+ if (!vi->has_rss && !vi->has_rss_hash_report)
+ return -EOPNOTSUPP;
+
memcpy(vi->ctrl->rss.key, rxfh->key, vi->rss_key_size);
+ update = true;
+ }
- virtnet_commit_rss_command(vi);
+ if (update)
+ virtnet_commit_rss_command(vi);
return 0;
}
if (virtio_has_feature(vdev, VIRTIO_NET_F_HASH_REPORT))
vi->has_rss_hash_report = true;
- if (virtio_has_feature(vdev, VIRTIO_NET_F_RSS))
+ if (virtio_has_feature(vdev, VIRTIO_NET_F_RSS)) {
vi->has_rss = true;
- if (vi->has_rss || vi->has_rss_hash_report) {
vi->rss_indir_table_size =
virtio_cread16(vdev, offsetof(struct virtio_net_config,
rss_max_indirection_table_length));
+ }
+
+ if (vi->has_rss || vi->has_rss_hash_report) {
vi->rss_key_size =
virtio_cread8(vdev, offsetof(struct virtio_net_config, rss_max_key_size));
if (ether_addr_equal(eth_hdr(skb)->h_source, vxlan->dev->dev_addr))
return false;
+ /* Ignore packets from invalid src-address */
+ if (!is_valid_ether_addr(eth_hdr(skb)->h_source))
+ return false;
+
/* Get address from the outer IP header */
if (vxlan_get_sk_family(vs) == AF_INET) {
saddr.sin.sin_addr.s_addr = ip_hdr(skb)->saddr;
offload = &arvif->arp_ns_offload;
count = 0;
+ /* Note: read_lock_bh() calls rcu_read_lock() */
read_lock_bh(&idev->lock);
memset(offload->ipv6_addr, 0, sizeof(offload->ipv6_addr));
}
/* get anycast address */
- for (ifaca6 = idev->ac_list; ifaca6; ifaca6 = ifaca6->aca_next) {
+ for (ifaca6 = rcu_dereference(idev->ac_list); ifaca6;
+ ifaca6 = rcu_dereference(ifaca6->aca_next)) {
if (count >= ATH11K_IPV6_MAX_COUNT)
goto generate;
#include "fw/api/txq.h"
/* Highest firmware API version supported */
-#define IWL_BZ_UCODE_API_MAX 90
+#define IWL_BZ_UCODE_API_MAX 89
/* Lowest firmware API version supported */
#define IWL_BZ_UCODE_API_MIN 80
#include "fw/api/txq.h"
/* Highest firmware API version supported */
-#define IWL_SC_UCODE_API_MAX 90
+#define IWL_SC_UCODE_API_MAX 89
/* Lowest firmware API version supported */
#define IWL_SC_UCODE_API_MIN 82
struct iwl_fw_dbg_params params = {0};
struct iwl_fwrt_dump_data *dump_data =
&fwrt->dump.wks[wk_idx].dump_data;
- u32 policy;
- u32 time_point;
if (!test_bit(wk_idx, &fwrt->dump.active_wks))
return;
iwl_fw_dbg_stop_restart_recording(fwrt, ¶ms, false);
- policy = le32_to_cpu(dump_data->trig->apply_policy);
- time_point = le32_to_cpu(dump_data->trig->time_point);
+ if (iwl_trans_dbg_ini_valid(fwrt->trans)) {
+ u32 policy = le32_to_cpu(dump_data->trig->apply_policy);
+ u32 time_point = le32_to_cpu(dump_data->trig->time_point);
- if (policy & IWL_FW_INI_APPLY_POLICY_DUMP_COMPLETE_CMD) {
- IWL_DEBUG_FW_INFO(fwrt, "WRT: sending dump complete\n");
- iwl_send_dbg_dump_complete_cmd(fwrt, time_point, 0);
+ if (policy & IWL_FW_INI_APPLY_POLICY_DUMP_COMPLETE_CMD) {
+ IWL_DEBUG_FW_INFO(fwrt, "WRT: sending dump complete\n");
+ iwl_send_dbg_dump_complete_cmd(fwrt, time_point, 0);
+ }
}
+
if (fwrt->trans->dbg.last_tp_resetfw == IWL_FW_INI_RESET_FW_MODE_STOP_FW_ONLY)
iwl_force_nmi(fwrt->trans);
if (IS_ERR_OR_NULL(vif))
return 1;
- if (ieee80211_vif_is_mld(vif) && vif->cfg.assoc) {
+ if (hweight16(vif->active_links) > 1) {
/*
- * Select the 'best' link. May need to revisit, it seems
- * better to not optimize for throughput but rather range,
- * reliability and power here - and select 2.4 GHz ...
+ * Select the 'best' link.
+ * May need to revisit, it seems better to not optimize
+ * for throughput but rather range, reliability and
+ * power here - and select 2.4 GHz ...
*/
- primary_link =
- iwl_mvm_mld_get_primary_link(mvm, vif,
- vif->active_links);
+ primary_link = iwl_mvm_mld_get_primary_link(mvm, vif,
+ vif->active_links);
if (WARN_ONCE(primary_link < 0, "no primary link in 0x%x\n",
vif->active_links))
ret = ieee80211_set_active_links(vif, BIT(primary_link));
if (ret)
return ret;
+ } else if (vif->active_links) {
+ primary_link = __ffs(vif->active_links);
} else {
primary_link = 0;
}
{
struct dentry *dbgfs_dir = vif->debugfs_dir;
struct iwl_mvm_vif *mvmvif = iwl_mvm_vif_from_mac80211(vif);
- char buf[100];
+ char buf[3 * 3 + 11 + (NL80211_WIPHY_NAME_MAXLEN + 1) +
+ (7 + IFNAMSIZ + 1) + 6 + 1];
+ char name[7 + IFNAMSIZ + 1];
/* this will happen in monitor mode */
if (!dbgfs_dir)
* find
* netdev:wlan0 -> ../../../ieee80211/phy0/netdev:wlan0/iwlmvm/
*/
- snprintf(buf, 100, "../../../%pd3/iwlmvm", dbgfs_dir);
+ snprintf(name, sizeof(name), "%pd", dbgfs_dir);
+ snprintf(buf, sizeof(buf), "../../../%pd3/iwlmvm", dbgfs_dir);
- mvmvif->dbgfs_slink = debugfs_create_symlink(dbgfs_dir->d_name.name,
- mvm->debugfs_dir, buf);
+ mvmvif->dbgfs_slink =
+ debugfs_create_symlink(name, mvm->debugfs_dir, buf);
}
void iwl_mvm_vif_dbgfs_rm_link(struct iwl_mvm *mvm, struct ieee80211_vif *vif)
if (!pasn)
return -ENOBUFS;
+ iwl_mvm_ftm_remove_pasn_sta(mvm, addr);
+
pasn->cipher = iwl_mvm_cipher_to_location_cipher(cipher);
switch (pasn->cipher) {
return ret;
}
+int iwl_mvm_set_link_mapping(struct iwl_mvm *mvm, struct ieee80211_vif *vif,
+ struct ieee80211_bss_conf *link_conf)
+{
+ struct iwl_mvm_vif *mvmvif = iwl_mvm_vif_from_mac80211(vif);
+ struct iwl_mvm_vif_link_info *link_info =
+ mvmvif->link[link_conf->link_id];
+
+ if (link_info->fw_link_id == IWL_MVM_FW_LINK_ID_INVALID) {
+ link_info->fw_link_id = iwl_mvm_get_free_fw_link_id(mvm,
+ mvmvif);
+ if (link_info->fw_link_id >=
+ ARRAY_SIZE(mvm->link_id_to_link_conf))
+ return -EINVAL;
+
+ rcu_assign_pointer(mvm->link_id_to_link_conf[link_info->fw_link_id],
+ link_conf);
+ }
+
+ return 0;
+}
+
int iwl_mvm_add_link(struct iwl_mvm *mvm, struct ieee80211_vif *vif,
struct ieee80211_bss_conf *link_conf)
{
struct iwl_link_config_cmd cmd = {};
unsigned int cmd_id = WIDE_ID(MAC_CONF_GROUP, LINK_CONFIG_CMD);
u8 cmd_ver = iwl_fw_lookup_cmd_ver(mvm->fw, cmd_id, 1);
+ int ret;
if (WARN_ON_ONCE(!link_info))
return -EINVAL;
- if (link_info->fw_link_id == IWL_MVM_FW_LINK_ID_INVALID) {
- link_info->fw_link_id = iwl_mvm_get_free_fw_link_id(mvm,
- mvmvif);
- if (link_info->fw_link_id >= ARRAY_SIZE(mvm->link_id_to_link_conf))
- return -EINVAL;
-
- rcu_assign_pointer(mvm->link_id_to_link_conf[link_info->fw_link_id],
- link_conf);
- }
+ ret = iwl_mvm_set_link_mapping(mvm, vif, link_conf);
+ if (ret)
+ return ret;
/* Update SF - Disable if needed. if this fails, SF might still be on
* while many macs are bound, which is forbidden - so fail the binding.
return ret;
}
+int iwl_mvm_unset_link_mapping(struct iwl_mvm *mvm, struct ieee80211_vif *vif,
+ struct ieee80211_bss_conf *link_conf)
+{
+ struct iwl_mvm_vif *mvmvif = iwl_mvm_vif_from_mac80211(vif);
+ struct iwl_mvm_vif_link_info *link_info =
+ mvmvif->link[link_conf->link_id];
+
+ /* mac80211 thought we have the link, but it was never configured */
+ if (WARN_ON(!link_info ||
+ link_info->fw_link_id >=
+ ARRAY_SIZE(mvm->link_id_to_link_conf)))
+ return -EINVAL;
+
+ RCU_INIT_POINTER(mvm->link_id_to_link_conf[link_info->fw_link_id],
+ NULL);
+ iwl_mvm_release_fw_link_id(mvm, link_info->fw_link_id);
+ return 0;
+}
+
int iwl_mvm_remove_link(struct iwl_mvm *mvm, struct ieee80211_vif *vif,
struct ieee80211_bss_conf *link_conf)
{
struct iwl_link_config_cmd cmd = {};
int ret;
- /* mac80211 thought we have the link, but it was never configured */
- if (WARN_ON(!link_info ||
- link_info->fw_link_id >= ARRAY_SIZE(mvm->link_id_to_link_conf)))
+ ret = iwl_mvm_unset_link_mapping(mvm, vif, link_conf);
+ if (ret)
return 0;
- RCU_INIT_POINTER(mvm->link_id_to_link_conf[link_info->fw_link_id],
- NULL);
cmd.link_id = cpu_to_le32(link_info->fw_link_id);
- iwl_mvm_release_fw_link_id(mvm, link_info->fw_link_id);
link_info->fw_link_id = IWL_MVM_FW_LINK_ID_INVALID;
cmd.spec_link_id = link_conf->link_id;
cmd.phy_id = cpu_to_le32(FW_CTXT_INVALID);
if (mvm->mld_api_is_used && mvm->nvm_data->sku_cap_11be_enable &&
!iwlwifi_mod_params.disable_11ax &&
!iwlwifi_mod_params.disable_11be)
- hw->wiphy->flags |= WIPHY_FLAG_SUPPORTS_MLO;
+ hw->wiphy->flags |= WIPHY_FLAG_DISABLE_WEXT;
/* With MLD FW API, it tracks timing by itself,
* no need for any timing from the host
mvmvif->mvm = mvm;
/* the first link always points to the default one */
+ mvmvif->deflink.fw_link_id = IWL_MVM_FW_LINK_ID_INVALID;
+ mvmvif->deflink.active = 0;
mvmvif->link[0] = &mvmvif->deflink;
+ ret = iwl_mvm_set_link_mapping(mvm, vif, &vif->bss_conf);
+ if (ret)
+ goto out;
+
/*
* Not much to do here. The stack will not allow interface
* types or combinations that we didn't advertise, so we
mvm->p2p_device_vif = NULL;
}
+ iwl_mvm_unset_link_mapping(mvm, vif, &vif->bss_conf);
iwl_mvm_mac_ctxt_remove(mvm, vif);
RCU_INIT_POINTER(mvm->vif_id_to_mac[mvmvif->id], NULL);
int iwl_mvm_mld_rm_sta_id(struct iwl_mvm *mvm, u8 sta_id)
{
- int ret = iwl_mvm_mld_rm_sta_from_fw(mvm, sta_id);
+ int ret;
lockdep_assert_held(&mvm->mutex);
+ if (WARN_ON(sta_id == IWL_MVM_INVALID_STA))
+ return 0;
+
+ ret = iwl_mvm_mld_rm_sta_from_fw(mvm, sta_id);
+
RCU_INIT_POINTER(mvm->fw_id_to_mac_id[sta_id], NULL);
RCU_INIT_POINTER(mvm->fw_id_to_link_sta[sta_id], NULL);
return ret;
u32 iwl_mvm_get_lmac_id(struct iwl_mvm *mvm, enum nl80211_band band);
/* Links */
+int iwl_mvm_set_link_mapping(struct iwl_mvm *mvm, struct ieee80211_vif *vif,
+ struct ieee80211_bss_conf *link_conf);
int iwl_mvm_add_link(struct iwl_mvm *mvm, struct ieee80211_vif *vif,
struct ieee80211_bss_conf *link_conf);
int iwl_mvm_link_changed(struct iwl_mvm *mvm, struct ieee80211_vif *vif,
struct ieee80211_bss_conf *link_conf,
u32 changes, bool active);
+int iwl_mvm_unset_link_mapping(struct iwl_mvm *mvm, struct ieee80211_vif *vif,
+ struct ieee80211_bss_conf *link_conf);
int iwl_mvm_remove_link(struct iwl_mvm *mvm, struct ieee80211_vif *vif,
struct ieee80211_bss_conf *link_conf);
int iwl_mvm_disable_link(struct iwl_mvm *mvm, struct ieee80211_vif *vif,
if (ret)
return ERR_PTR(ret);
- if (WARN_ON_ONCE(iwl_rx_packet_payload_len(cmd.resp_pkt) != resp_size))
+ if (WARN_ON_ONCE(iwl_rx_packet_payload_len(cmd.resp_pkt) !=
+ resp_size)) {
+ iwl_free_resp(&cmd);
return ERR_PTR(-EIO);
+ }
resp = kmemdup(cmd.resp_pkt->data, resp_size, GFP_KERNEL);
+ iwl_free_resp(&cmd);
+
if (!resp)
return ERR_PTR(-ENOMEM);
- iwl_free_resp(&cmd);
return resp;
}
static void iwl_mvm_pass_packet_to_mac80211(struct iwl_mvm *mvm,
struct napi_struct *napi,
struct sk_buff *skb, int queue,
- struct ieee80211_sta *sta,
- struct ieee80211_link_sta *link_sta)
+ struct ieee80211_sta *sta)
{
if (unlikely(iwl_mvm_check_pn(mvm, skb, queue, sta))) {
kfree_skb(skb);
return;
}
- if (sta && sta->valid_links && link_sta) {
- struct ieee80211_rx_status *rx_status = IEEE80211_SKB_RXCB(skb);
-
- rx_status->link_valid = 1;
- rx_status->link_id = link_sta->link_id;
- }
-
ieee80211_rx_napi(mvm->hw, sta, skb, napi);
}
while ((skb = __skb_dequeue(skb_list))) {
iwl_mvm_pass_packet_to_mac80211(mvm, napi, skb,
reorder_buf->queue,
- sta, NULL /* FIXME */);
+ sta);
reorder_buf->num_stored--;
}
}
if (IS_ERR(sta))
sta = NULL;
link_sta = rcu_dereference(mvm->fw_id_to_link_sta[id]);
+
+ if (sta && sta->valid_links && link_sta) {
+ rx_status->link_valid = 1;
+ rx_status->link_id = link_sta->link_id;
+ }
}
} else if (!is_multicast_ether_addr(hdr->addr2)) {
/*
!(desc->amsdu_info & IWL_RX_MPDU_AMSDU_LAST_SUBFRAME))
rx_status->flag |= RX_FLAG_AMSDU_MORE;
- iwl_mvm_pass_packet_to_mac80211(mvm, napi, skb, queue, sta,
- link_sta);
+ iwl_mvm_pass_packet_to_mac80211(mvm, napi, skb, queue, sta);
}
out:
rcu_read_unlock();
if (ver_handler->version != scan_ver)
continue;
- return ver_handler->handler(mvm, vif, params, type, uid);
+ err = ver_handler->handler(mvm, vif, params, type, uid);
+ return err ? : uid;
}
err = iwl_mvm_scan_umac(mvm, vif, params, type, uid);
struct iwl_rx_packet *pkt = rxb_addr(rxb);
struct iwl_mvm_session_prot_notif *notif = (void *)pkt->data;
unsigned int ver =
- iwl_fw_lookup_cmd_ver(mvm->fw,
- WIDE_ID(MAC_CONF_GROUP,
- SESSION_PROTECTION_CMD), 2);
+ iwl_fw_lookup_notif_ver(mvm->fw, MAC_CONF_GROUP,
+ SESSION_PROTECTION_NOTIF, 2);
int id = le32_to_cpu(notif->mac_link_id);
struct ieee80211_vif *vif;
struct iwl_mvm_vif *mvmvif;
return;
tfd_num = iwl_txq_get_cmd_index(txq, ssn);
- read_ptr = iwl_txq_get_cmd_index(txq, txq->read_ptr);
spin_lock_bh(&txq->lock);
+ read_ptr = iwl_txq_get_cmd_index(txq, txq->read_ptr);
if (!test_bit(txq_id, trans->txqs.queue_used)) {
IWL_DEBUG_TX_QUEUES(trans, "Q %d inactive - ignoring idx %d\n",
* Shared-Ant && BTG-path:WL mask(0x55f), others:WL THRU(0x5ff)
*/
if (btc->ant_type == BTC_ANT_SHARED && btc->btg_pos == path)
- rtw8922a_set_trx_mask(rtwdev, path, BTC_BT_TX_GROUP, 0x5ff);
+ rtw8922a_set_trx_mask(rtwdev, path, BTC_BT_TX_GROUP, 0x55f);
else
rtw8922a_set_trx_mask(rtwdev, path, BTC_BT_TX_GROUP, 0x5ff);
}
nla_for_each_nested(peer, peers, rem) {
- struct cfg80211_pmsr_result result;
+ struct cfg80211_pmsr_result result = {};
err = mac80211_hwsim_parse_pmsr_result(peer, &result, info);
if (err)
{
u32 offset = REG_CLDMA_UL_START_ADDRL_0 + qno * ADDR_SIZE;
- return ioread64(hw_info->ap_pdn_base + offset);
+ return ioread64_lo_hi(hw_info->ap_pdn_base + offset);
}
void t7xx_cldma_hw_set_start_addr(struct t7xx_cldma_hw *hw_info, unsigned int qno, u64 address,
reg = tx_rx == MTK_RX ? hw_info->ap_ao_base + REG_CLDMA_DL_START_ADDRL_0 :
hw_info->ap_pdn_base + REG_CLDMA_UL_START_ADDRL_0;
- iowrite64(address, reg + offset);
+ iowrite64_lo_hi(address, reg + offset);
}
void t7xx_cldma_hw_resume_queue(struct t7xx_cldma_hw *hw_info, unsigned int qno,
return -ENODEV;
}
- gpd_addr = ioread64(hw_info->ap_pdn_base + REG_CLDMA_DL_CURRENT_ADDRL_0 +
- queue->index * sizeof(u64));
+ gpd_addr = ioread64_lo_hi(hw_info->ap_pdn_base +
+ REG_CLDMA_DL_CURRENT_ADDRL_0 +
+ queue->index * sizeof(u64));
if (req->gpd_addr == gpd_addr || hwo_polling_count++ >= 100)
return 0;
struct t7xx_cldma_hw *hw_info = &md_ctrl->hw_info;
/* Check current processing TGPD, 64-bit address is in a table by Q index */
- ul_curr_addr = ioread64(hw_info->ap_pdn_base + REG_CLDMA_UL_CURRENT_ADDRL_0 +
- queue->index * sizeof(u64));
+ ul_curr_addr = ioread64_lo_hi(hw_info->ap_pdn_base + REG_CLDMA_UL_CURRENT_ADDRL_0 +
+ queue->index * sizeof(u64));
if (req->gpd_addr != ul_curr_addr) {
spin_unlock_irqrestore(&md_ctrl->cldma_lock, flags);
dev_err(md_ctrl->dev, "CLDMA%d queue %d is not empty\n",
for (i = 0; i < ATR_TABLE_NUM_PER_ATR; i++) {
offset = ATR_PORT_OFFSET * port + ATR_TABLE_OFFSET * i;
reg = pbase + ATR_PCIE_WIN0_T0_ATR_PARAM_SRC_ADDR + offset;
- iowrite64(0, reg);
+ iowrite64_lo_hi(0, reg);
}
}
reg = pbase + ATR_PCIE_WIN0_T0_TRSL_ADDR + offset;
value = cfg->trsl_addr & ATR_PCIE_WIN0_ADDR_ALGMT;
- iowrite64(value, reg);
+ iowrite64_lo_hi(value, reg);
reg = pbase + ATR_PCIE_WIN0_T0_TRSL_PARAM + offset;
iowrite32(cfg->trsl_id, reg);
reg = pbase + ATR_PCIE_WIN0_T0_ATR_PARAM_SRC_ADDR + offset;
value = (cfg->src_addr & ATR_PCIE_WIN0_ADDR_ALGMT) | (atr_size << 1) | BIT(0);
- iowrite64(value, reg);
+ iowrite64_lo_hi(value, reg);
/* Ensure ATR is set */
- ioread64(reg);
+ ioread64_lo_hi(reg);
return 0;
}
return NULL;
}
skb_add_rx_frag(skb, 0, page, 0, 0, PAGE_SIZE);
+ skb_mark_for_recycle(skb);
/* Align ip header to a 16 bytes boundary */
skb_reserve(skb, NET_IP_ALIGN);
enum trf7970a_state state;
struct device *dev;
struct spi_device *spi;
- struct regulator *regulator;
+ struct regulator *vin_regulator;
+ struct regulator *vddio_regulator;
struct nfc_digital_dev *ddev;
u32 quirks;
bool is_initiator;
if (trf->state != TRF7970A_ST_PWR_OFF)
return 0;
- ret = regulator_enable(trf->regulator);
+ ret = regulator_enable(trf->vin_regulator);
if (ret) {
dev_err(trf->dev, "%s - Can't enable VIN: %d\n", __func__, ret);
return ret;
if (trf->en2_gpiod && !(trf->quirks & TRF7970A_QUIRK_EN2_MUST_STAY_LOW))
gpiod_set_value_cansleep(trf->en2_gpiod, 0);
- ret = regulator_disable(trf->regulator);
+ ret = regulator_disable(trf->vin_regulator);
if (ret)
dev_err(trf->dev, "%s - Can't disable VIN: %d\n", __func__,
ret);
mutex_init(&trf->lock);
INIT_DELAYED_WORK(&trf->timeout_work, trf7970a_timeout_work_handler);
- trf->regulator = devm_regulator_get(&spi->dev, "vin");
- if (IS_ERR(trf->regulator)) {
- ret = PTR_ERR(trf->regulator);
+ trf->vin_regulator = devm_regulator_get(&spi->dev, "vin");
+ if (IS_ERR(trf->vin_regulator)) {
+ ret = PTR_ERR(trf->vin_regulator);
dev_err(trf->dev, "Can't get VIN regulator: %d\n", ret);
goto err_destroy_lock;
}
- ret = regulator_enable(trf->regulator);
+ ret = regulator_enable(trf->vin_regulator);
if (ret) {
dev_err(trf->dev, "Can't enable VIN: %d\n", ret);
goto err_destroy_lock;
}
- uvolts = regulator_get_voltage(trf->regulator);
+ uvolts = regulator_get_voltage(trf->vin_regulator);
if (uvolts > 4000000)
trf->chip_status_ctrl = TRF7970A_CHIP_STATUS_VRS5_3;
- trf->regulator = devm_regulator_get(&spi->dev, "vdd-io");
- if (IS_ERR(trf->regulator)) {
- ret = PTR_ERR(trf->regulator);
+ trf->vddio_regulator = devm_regulator_get(&spi->dev, "vdd-io");
+ if (IS_ERR(trf->vddio_regulator)) {
+ ret = PTR_ERR(trf->vddio_regulator);
dev_err(trf->dev, "Can't get VDD_IO regulator: %d\n", ret);
- goto err_destroy_lock;
+ goto err_disable_vin_regulator;
}
- ret = regulator_enable(trf->regulator);
+ ret = regulator_enable(trf->vddio_regulator);
if (ret) {
dev_err(trf->dev, "Can't enable VDD_IO: %d\n", ret);
- goto err_destroy_lock;
+ goto err_disable_vin_regulator;
}
- if (regulator_get_voltage(trf->regulator) == 1800000) {
+ if (regulator_get_voltage(trf->vddio_regulator) == 1800000) {
trf->io_ctrl = TRF7970A_REG_IO_CTRL_IO_LOW;
dev_dbg(trf->dev, "trf7970a config vdd_io to 1.8V\n");
}
if (!trf->ddev) {
dev_err(trf->dev, "Can't allocate NFC digital device\n");
ret = -ENOMEM;
- goto err_disable_regulator;
+ goto err_disable_vddio_regulator;
}
nfc_digital_set_parent_dev(trf->ddev, trf->dev);
trf7970a_shutdown(trf);
err_free_ddev:
nfc_digital_free_device(trf->ddev);
-err_disable_regulator:
- regulator_disable(trf->regulator);
+err_disable_vddio_regulator:
+ regulator_disable(trf->vddio_regulator);
+err_disable_vin_regulator:
+ regulator_disable(trf->vin_regulator);
err_destroy_lock:
mutex_destroy(&trf->lock);
return ret;
nfc_digital_unregister_device(trf->ddev);
nfc_digital_free_device(trf->ddev);
- regulator_disable(trf->regulator);
+ regulator_disable(trf->vddio_regulator);
+ regulator_disable(trf->vin_regulator);
mutex_destroy(&trf->lock);
}
bool vwc = ns->ctrl->vwc & NVME_CTRL_VWC_PRESENT;
struct queue_limits lim;
struct nvme_id_ns_nvm *nvm = NULL;
+ struct nvme_zone_info zi = {};
struct nvme_id_ns *id;
sector_t capacity;
unsigned lbaf;
if (id->ncap == 0) {
/* namespace not allocated or attached */
info->is_removed = true;
- ret = -ENODEV;
+ ret = -ENXIO;
goto out;
}
+ lbaf = nvme_lbaf_index(id->flbas);
if (ns->ctrl->ctratt & NVME_CTRL_ATTR_ELBAS) {
ret = nvme_identify_ns_nvm(ns->ctrl, info->nsid, &nvm);
goto out;
}
+ if (IS_ENABLED(CONFIG_BLK_DEV_ZONED) &&
+ ns->head->ids.csi == NVME_CSI_ZNS) {
+ ret = nvme_query_zone_info(ns, lbaf, &zi);
+ if (ret < 0)
+ goto out;
+ }
+
blk_mq_freeze_queue(ns->disk->queue);
- lbaf = nvme_lbaf_index(id->flbas);
ns->head->lba_shift = id->lbaf[lbaf].ds;
ns->head->nuse = le64_to_cpu(id->nuse);
capacity = nvme_lba_to_sect(ns->head, le64_to_cpu(id->nsze));
capacity = 0;
nvme_config_discard(ns, &lim);
if (IS_ENABLED(CONFIG_BLK_DEV_ZONED) &&
- ns->head->ids.csi == NVME_CSI_ZNS) {
- ret = nvme_update_zone_info(ns, lbaf, &lim);
- if (ret) {
- blk_mq_unfreeze_queue(ns->disk->queue);
- goto out;
- }
- }
+ ns->head->ids.csi == NVME_CSI_ZNS)
+ nvme_update_zone_info(ns, &lim, &zi);
ret = queue_limits_commit_update(ns->disk->queue, &lim);
if (ret) {
blk_mq_unfreeze_queue(ns->disk->queue);
}
if (!ret && nvme_ns_head_multipath(ns->head)) {
+ struct queue_limits *ns_lim = &ns->disk->queue->limits;
struct queue_limits lim;
blk_mq_freeze_queue(ns->head->disk->queue);
set_disk_ro(ns->head->disk, nvme_ns_is_readonly(ns, info));
nvme_mpath_revalidate_paths(ns);
+ /*
+ * queue_limits mixes values that are the hardware limitations
+ * for bio splitting with what is the device configuration.
+ *
+ * For NVMe the device configuration can change after e.g. a
+ * Format command, and we really want to pick up the new format
+ * value here. But we must still stack the queue limits to the
+ * least common denominator for multipathing to split the bios
+ * properly.
+ *
+ * To work around this, we explicitly set the device
+ * configuration to those that we just queried, but only stack
+ * the splitting limits in to make sure we still obey possibly
+ * lower limitations of other controllers.
+ */
lim = queue_limits_start_update(ns->head->disk->queue);
+ lim.logical_block_size = ns_lim->logical_block_size;
+ lim.physical_block_size = ns_lim->physical_block_size;
+ lim.io_min = ns_lim->io_min;
+ lim.io_opt = ns_lim->io_opt;
queue_limits_stack_bdev(&lim, ns->disk->part0, 0,
ns->head->disk->disk_name);
ret = queue_limits_commit_update(ns->head->disk->queue, &lim);
* controller. Called after last nvme_put_ctrl() call
*/
static void
-nvme_fc_nvme_ctrl_freed(struct nvme_ctrl *nctrl)
+nvme_fc_free_ctrl(struct nvme_ctrl *nctrl)
{
struct nvme_fc_ctrl *ctrl = to_fc_ctrl(nctrl);
.reg_read32 = nvmf_reg_read32,
.reg_read64 = nvmf_reg_read64,
.reg_write32 = nvmf_reg_write32,
- .free_ctrl = nvme_fc_nvme_ctrl_freed,
+ .free_ctrl = nvme_fc_free_ctrl,
.submit_async_event = nvme_fc_submit_async_event,
.delete_ctrl = nvme_fc_delete_ctrl,
.get_address = nvmf_get_address,
}
#endif /* CONFIG_NVME_MULTIPATH */
+struct nvme_zone_info {
+ u64 zone_size;
+ unsigned int max_open_zones;
+ unsigned int max_active_zones;
+};
+
int nvme_ns_report_zones(struct nvme_ns *ns, sector_t sector,
unsigned int nr_zones, report_zones_cb cb, void *data);
-int nvme_update_zone_info(struct nvme_ns *ns, unsigned lbaf,
- struct queue_limits *lim);
+int nvme_query_zone_info(struct nvme_ns *ns, unsigned lbaf,
+ struct nvme_zone_info *zi);
+void nvme_update_zone_info(struct nvme_ns *ns, struct queue_limits *lim,
+ struct nvme_zone_info *zi);
#ifdef CONFIG_BLK_DEV_ZONED
blk_status_t nvme_setup_zone_mgmt_send(struct nvme_ns *ns, struct request *req,
struct nvme_command *cmnd,
return 0;
}
-int nvme_update_zone_info(struct nvme_ns *ns, unsigned lbaf,
- struct queue_limits *lim)
+int nvme_query_zone_info(struct nvme_ns *ns, unsigned lbaf,
+ struct nvme_zone_info *zi)
{
struct nvme_effects_log *log = ns->head->effects;
struct nvme_command c = { };
goto free_data;
}
- ns->head->zsze =
- nvme_lba_to_sect(ns->head, le64_to_cpu(id->lbafe[lbaf].zsze));
- if (!is_power_of_2(ns->head->zsze)) {
+ zi->zone_size = le64_to_cpu(id->lbafe[lbaf].zsze);
+ if (!is_power_of_2(zi->zone_size)) {
dev_warn(ns->ctrl->device,
- "invalid zone size:%llu for namespace:%u\n",
- ns->head->zsze, ns->head->ns_id);
+ "invalid zone size: %llu for namespace: %u\n",
+ zi->zone_size, ns->head->ns_id);
status = -ENODEV;
goto free_data;
}
+ zi->max_open_zones = le32_to_cpu(id->mor) + 1;
+ zi->max_active_zones = le32_to_cpu(id->mar) + 1;
- blk_queue_flag_set(QUEUE_FLAG_ZONE_RESETALL, ns->queue);
- lim->zoned = 1;
- lim->max_open_zones = le32_to_cpu(id->mor) + 1;
- lim->max_active_zones = le32_to_cpu(id->mar) + 1;
- lim->chunk_sectors = ns->head->zsze;
- lim->max_zone_append_sectors = ns->ctrl->max_zone_append;
free_data:
kfree(id);
return status;
}
+void nvme_update_zone_info(struct nvme_ns *ns, struct queue_limits *lim,
+ struct nvme_zone_info *zi)
+{
+ lim->zoned = 1;
+ lim->max_open_zones = zi->max_open_zones;
+ lim->max_active_zones = zi->max_active_zones;
+ lim->max_zone_append_sectors = ns->ctrl->max_zone_append;
+ lim->chunk_sectors = ns->head->zsze =
+ nvme_lba_to_sect(ns->head, zi->zone_size);
+ blk_queue_flag_set(QUEUE_FLAG_ZONE_RESETALL, ns->queue);
+}
+
static void *nvme_zns_alloc_report_buffer(struct nvme_ns *ns,
unsigned int nr_zones, size_t *buflen)
{
return ERR_PTR(-EINVAL);
}
+ if (sysfs_streq(name, nvmet_disc_subsys->subsysnqn)) {
+ pr_err("can't create subsystem using unique discovery NQN\n");
+ return ERR_PTR(-EINVAL);
+ }
+
subsys = nvmet_subsys_alloc(name, NVME_NQN_NVME);
if (IS_ERR(subsys))
return ERR_CAST(subsys);
static struct config_group nvmet_hosts_group;
+static ssize_t nvmet_root_discovery_nqn_show(struct config_item *item,
+ char *page)
+{
+ return snprintf(page, PAGE_SIZE, "%s\n", nvmet_disc_subsys->subsysnqn);
+}
+
+static ssize_t nvmet_root_discovery_nqn_store(struct config_item *item,
+ const char *page, size_t count)
+{
+ struct list_head *entry;
+ size_t len;
+
+ len = strcspn(page, "\n");
+ if (!len || len > NVMF_NQN_FIELD_LEN - 1)
+ return -EINVAL;
+
+ down_write(&nvmet_config_sem);
+ list_for_each(entry, &nvmet_subsystems_group.cg_children) {
+ struct config_item *item =
+ container_of(entry, struct config_item, ci_entry);
+
+ if (!strncmp(config_item_name(item), page, len)) {
+ pr_err("duplicate NQN %s\n", config_item_name(item));
+ up_write(&nvmet_config_sem);
+ return -EINVAL;
+ }
+ }
+ memset(nvmet_disc_subsys->subsysnqn, 0, NVMF_NQN_FIELD_LEN);
+ memcpy(nvmet_disc_subsys->subsysnqn, page, len);
+ up_write(&nvmet_config_sem);
+
+ return len;
+}
+
+CONFIGFS_ATTR(nvmet_root_, discovery_nqn);
+
+static struct configfs_attribute *nvmet_root_attrs[] = {
+ &nvmet_root_attr_discovery_nqn,
+ NULL,
+};
+
static const struct config_item_type nvmet_root_type = {
+ .ct_attrs = nvmet_root_attrs,
.ct_owner = THIS_MODULE,
};
}
down_read(&nvmet_config_sem);
+ if (!strncmp(nvmet_disc_subsys->subsysnqn, subsysnqn,
+ NVMF_NQN_SIZE)) {
+ if (kref_get_unless_zero(&nvmet_disc_subsys->ref)) {
+ up_read(&nvmet_config_sem);
+ return nvmet_disc_subsys;
+ }
+ }
list_for_each_entry(p, &port->subsystems, entry) {
if (!strncmp(p->subsys->subsysnqn, subsysnqn,
NVMF_NQN_SIZE)) {
}
static bool
-nvmet_fc_assoc_exits(struct nvmet_fc_tgtport *tgtport, u64 association_id)
+nvmet_fc_assoc_exists(struct nvmet_fc_tgtport *tgtport, u64 association_id)
{
struct nvmet_fc_tgt_assoc *a;
+ bool found = false;
+ rcu_read_lock();
list_for_each_entry_rcu(a, &tgtport->assoc_list, a_list) {
- if (association_id == a->association_id)
- return true;
+ if (association_id == a->association_id) {
+ found = true;
+ break;
+ }
}
+ rcu_read_unlock();
- return false;
+ return found;
}
static struct nvmet_fc_tgt_assoc *
ran = ran << BYTES_FOR_QID_SHIFT;
spin_lock_irqsave(&tgtport->lock, flags);
- rcu_read_lock();
- if (!nvmet_fc_assoc_exits(tgtport, ran)) {
+ if (!nvmet_fc_assoc_exists(tgtport, ran)) {
assoc->association_id = ran;
list_add_tail_rcu(&assoc->a_list, &tgtport->assoc_list);
done = true;
}
- rcu_read_unlock();
spin_unlock_irqrestore(&tgtport->lock, flags);
} while (!done);
#define pr_fmt(fmt) "OF: " fmt
+#include <linux/device.h>
#include <linux/of.h>
#include <linux/spinlock.h>
#include <linux/slab.h>
{
struct of_changeset_entry *ce, *cen;
+ /*
+ * When a device is deleted, the device links to/from it are also queued
+ * for deletion. Until these device links are freed, the devices
+ * themselves aren't freed. If the device being deleted is due to an
+ * overlay change, this device might be holding a reference to a device
+ * node that will be freed. So, wait until all already pending device
+ * links are deleted before freeing a device node. This ensures we don't
+ * free any device node that has a non-zero reference count.
+ */
+ device_link_wait_removal();
+
list_for_each_entry_safe_reverse(ce, cen, &ocs->entries, node)
__of_changeset_entry_destroy(ce);
}
ssize_t csize;
ssize_t tsize;
+ /*
+ * Prevent a kernel oops in vsnprintf() -- it only allows passing a
+ * NULL ptr when the length is also 0. Also filter out the negative
+ * lengths...
+ */
+ if ((len > 0 && !str) || len < 0)
+ return -EINVAL;
+
/* Name & Type */
/* %p eats all alphanum characters, so %c must be used here */
csize = snprintf(str, len, "of:N%pOFn%c%s", np, 'T',
*/
DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_CAVIUM, 0xa100, quirk_no_bus_reset);
-/*
- * Apparently the LSI / Agere FW643 can't recover after a Secondary Bus
- * Reset and requires a power-off or suspend/resume and rescan. Prevent
- * use of that reset.
- */
-DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_ATT, 0x5900, quirk_no_bus_reset);
-DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_ATT, 0x5901, quirk_no_bus_reset);
-
/*
* Some TI KeyStone C667X devices do not support bus/hot reset. The PCIESS
* automatically disables LTSSM when Secondary Bus Reset is received and
u64 event_config = 0;
uint64_t cmask;
+ /* driver does not support branch stack sampling */
+ if (has_branch_stack(event))
+ return -EOPNOTSUPP;
+
hwc->flags = 0;
mapped_event = rvpmu->event_map(event, &event_config);
if (mapped_event < 0) {
/* Source clock from SoC internal PLL */
writel(ANA_PLL_CLK_OUT_TO_EXT_IO_SEL,
imx8_phy->base + IMX8MM_PCIE_PHY_CMN_REG062);
- writel(AUX_PLL_REFCLK_SEL_SYS_PLL,
- imx8_phy->base + IMX8MM_PCIE_PHY_CMN_REG063);
+ if (imx8_phy->drvdata->variant != IMX8MM) {
+ writel(AUX_PLL_REFCLK_SEL_SYS_PLL,
+ imx8_phy->base + IMX8MM_PCIE_PHY_CMN_REG063);
+ }
val = ANA_AUX_RX_TX_SEL_TX | ANA_AUX_TX_TERM;
writel(val | ANA_AUX_RX_TERM_GND_EN,
imx8_phy->base + IMX8MM_PCIE_PHY_CMN_REG064);
u16 val;
fix_idx = 0;
- for (addr = 0; addr < 512; addr++) {
+ for (addr = 0; addr < ARRAY_SIZE(gbe_phy_init); addr++) {
/*
* All PHY register values are defined in full for 3.125Gbps
* SERDES speed. The values required for 1.25 Gbps are almost
* comparison to 3.125 Gbps values. These register values are
* stored in "gbe_phy_init_fix" array.
*/
- if (!is_1gbps && gbe_phy_init_fix[fix_idx].addr == addr) {
+ if (!is_1gbps &&
+ fix_idx < ARRAY_SIZE(gbe_phy_init_fix) &&
+ gbe_phy_init_fix[fix_idx].addr == addr) {
/* Use new value */
val = gbe_phy_init_fix[fix_idx].value;
- if (fix_idx < ARRAY_SIZE(gbe_phy_init_fix))
- fix_idx++;
+ fix_idx++;
} else {
val = gbe_phy_init[addr];
}
return dev_err_probe(dev, PTR_ERR(qphy->phy),
"failed to create phy\n");
- qphy->vreg = devm_regulator_get(dev, "vdda-phy");
+ qphy->vreg = devm_regulator_get(dev, "vdd");
if (IS_ERR(qphy->vreg))
return dev_err_probe(dev, PTR_ERR(qphy->vreg),
"failed to get vreg\n");
QPHY_COM_BIAS_EN_CLKBUFLR_EN,
QPHY_DP_PHY_STATUS,
+ QPHY_DP_PHY_VCO_DIV,
QPHY_TX_TX_POL_INV,
QPHY_TX_TX_DRV_LVL,
[QPHY_COM_BIAS_EN_CLKBUFLR_EN] = QSERDES_V3_COM_BIAS_EN_CLKBUFLR_EN,
[QPHY_DP_PHY_STATUS] = QSERDES_V3_DP_PHY_STATUS,
+ [QPHY_DP_PHY_VCO_DIV] = QSERDES_V3_DP_PHY_VCO_DIV,
[QPHY_TX_TX_POL_INV] = QSERDES_V3_TX_TX_POL_INV,
[QPHY_TX_TX_DRV_LVL] = QSERDES_V3_TX_TX_DRV_LVL,
[QPHY_COM_BIAS_EN_CLKBUFLR_EN] = QSERDES_V4_COM_BIAS_EN_CLKBUFLR_EN,
[QPHY_DP_PHY_STATUS] = QSERDES_V4_DP_PHY_STATUS,
+ [QPHY_DP_PHY_VCO_DIV] = QSERDES_V4_DP_PHY_VCO_DIV,
[QPHY_TX_TX_POL_INV] = QSERDES_V4_TX_TX_POL_INV,
[QPHY_TX_TX_DRV_LVL] = QSERDES_V4_TX_TX_DRV_LVL,
[QPHY_COM_BIAS_EN_CLKBUFLR_EN] = QSERDES_V5_COM_BIAS_EN_CLKBUFLR_EN,
[QPHY_DP_PHY_STATUS] = QSERDES_V5_DP_PHY_STATUS,
+ [QPHY_DP_PHY_VCO_DIV] = QSERDES_V5_DP_PHY_VCO_DIV,
[QPHY_TX_TX_POL_INV] = QSERDES_V5_5NM_TX_TX_POL_INV,
[QPHY_TX_TX_DRV_LVL] = QSERDES_V5_5NM_TX_TX_DRV_LVL,
[QPHY_COM_BIAS_EN_CLKBUFLR_EN] = QSERDES_V6_COM_PLL_BIAS_EN_CLK_BUFLR_EN,
[QPHY_DP_PHY_STATUS] = QSERDES_V6_DP_PHY_STATUS,
+ [QPHY_DP_PHY_VCO_DIV] = QSERDES_V6_DP_PHY_VCO_DIV,
[QPHY_TX_TX_POL_INV] = QSERDES_V6_TX_TX_POL_INV,
[QPHY_TX_TX_DRV_LVL] = QSERDES_V6_TX_TX_DRV_LVL,
writel(val, qmp->dp_dp_phy + QSERDES_DP_PHY_PD_CTL);
if (reverse)
- writel(0x4c, qmp->pcs + QSERDES_DP_PHY_MODE);
+ writel(0x4c, qmp->dp_dp_phy + QSERDES_DP_PHY_MODE);
else
- writel(0x5c, qmp->pcs + QSERDES_DP_PHY_MODE);
+ writel(0x5c, qmp->dp_dp_phy + QSERDES_DP_PHY_MODE);
return reverse;
}
const struct phy_configure_opts_dp *dp_opts = &qmp->dp_opts;
u32 phy_vco_div;
unsigned long pixel_freq;
+ const struct qmp_phy_cfg *cfg = qmp->cfg;
switch (dp_opts->link_rate) {
case 1620:
/* Other link rates aren't supported */
return -EINVAL;
}
- writel(phy_vco_div, qmp->dp_dp_phy + QSERDES_V4_DP_PHY_VCO_DIV);
+ writel(phy_vco_div, qmp->dp_dp_phy + cfg->regs[QPHY_DP_PHY_VCO_DIV]);
clk_set_rate(qmp->dp_link_hw.clk, dp_opts->link_rate * 100000);
clk_set_rate(qmp->dp_pixel_hw.clk, pixel_freq);
#define QCOM_PHY_QMP_DP_PHY_V5_H_
/* Only for QMP V5 PHY - DP PHY registers */
+#define QSERDES_V5_DP_PHY_VCO_DIV 0x070
#define QSERDES_V5_DP_PHY_AUX_INTERRUPT_STATUS 0x0d8
#define QSERDES_V5_DP_PHY_STATUS 0x0dc
#define QCOM_PHY_QMP_DP_PHY_V6_H_
/* Only for QMP V6 PHY - DP PHY registers */
+#define QSERDES_V6_DP_PHY_VCO_DIV 0x070
#define QSERDES_V6_DP_PHY_AUX_INTERRUPT_STATUS 0x0e0
#define QSERDES_V6_DP_PHY_STATUS 0x0e4
tristate "Rockchip Samsung HDMI/eDP Combo PHY driver"
depends on (ARCH_ROCKCHIP || COMPILE_TEST) && OF
select GENERIC_PHY
+ select RATIONAL
help
Enable this to support the Rockchip HDMI/eDP Combo PHY
with Samsung IP block.
};
struct rockchip_combphy_cfg {
+ unsigned int num_phys;
+ unsigned int phy_ids[3];
const struct rockchip_combphy_grfcfg *grfcfg;
int (*combphy_cfg)(struct rockchip_combphy_priv *priv);
};
struct rockchip_combphy_priv {
u8 type;
+ int id;
void __iomem *mmio;
int num_clks;
struct clk_bulk_data *clks;
struct rockchip_combphy_priv *priv;
const struct rockchip_combphy_cfg *phy_cfg;
struct resource *res;
- int ret;
+ int ret, id;
phy_cfg = of_device_get_match_data(dev);
if (!phy_cfg) {
return ret;
}
+ /* find the phy-id from the io address */
+ priv->id = -ENODEV;
+ for (id = 0; id < phy_cfg->num_phys; id++) {
+ if (res->start == phy_cfg->phy_ids[id]) {
+ priv->id = id;
+ break;
+ }
+ }
+
priv->dev = dev;
priv->type = PHY_NONE;
priv->cfg = phy_cfg;
};
static const struct rockchip_combphy_cfg rk3568_combphy_cfgs = {
+ .num_phys = 3,
+ .phy_ids = {
+ 0xfe820000,
+ 0xfe830000,
+ 0xfe840000,
+ },
.grfcfg = &rk3568_combphy_grfcfgs,
.combphy_cfg = rk3568_combphy_cfg,
};
rockchip_combphy_param_write(priv->phy_grf, &cfg->con1_for_pcie, true);
rockchip_combphy_param_write(priv->phy_grf, &cfg->con2_for_pcie, true);
rockchip_combphy_param_write(priv->phy_grf, &cfg->con3_for_pcie, true);
- rockchip_combphy_param_write(priv->pipe_grf, &cfg->pipe_pcie1l0_sel, true);
- rockchip_combphy_param_write(priv->pipe_grf, &cfg->pipe_pcie1l1_sel, true);
+ switch (priv->id) {
+ case 1:
+ rockchip_combphy_param_write(priv->pipe_grf, &cfg->pipe_pcie1l0_sel, true);
+ break;
+ case 2:
+ rockchip_combphy_param_write(priv->pipe_grf, &cfg->pipe_pcie1l1_sel, true);
+ break;
+ }
break;
case PHY_TYPE_USB3:
/* Set SSC downward spread spectrum */
};
static const struct rockchip_combphy_cfg rk3588_combphy_cfgs = {
+ .num_phys = 3,
+ .phy_ids = {
+ 0xfee00000,
+ 0xfee10000,
+ 0xfee20000,
+ },
.grfcfg = &rk3588_combphy_grfcfgs,
.combphy_cfg = rk3588_combphy_cfg,
};
#define RK3588_BIFURCATION_LANE_0_1 BIT(0)
#define RK3588_BIFURCATION_LANE_2_3 BIT(1)
#define RK3588_LANE_AGGREGATION BIT(2)
+#define RK3588_PCIE1LN_SEL_EN (GENMASK(1, 0) << 16)
+#define RK3588_PCIE30_PHY_MODE_EN (GENMASK(2, 0) << 16)
struct rockchip_p3phy_ops;
static int rockchip_p3phy_rk3588_init(struct rockchip_p3phy_priv *priv)
{
u32 reg = 0;
- u8 mode = 0;
+ u8 mode = RK3588_LANE_AGGREGATION; /* default */
int ret;
/* Deassert PCIe PMA output clamp mode */
/* Set bifurcation if needed */
for (int i = 0; i < priv->num_lanes; i++) {
- if (!priv->lanes[i])
- mode |= (BIT(i) << 3);
-
if (priv->lanes[i] > 1)
- mode |= (BIT(i) >> 1);
- }
-
- if (!mode)
- reg = RK3588_LANE_AGGREGATION;
- else {
- if (mode & (BIT(0) | BIT(1)))
- reg |= RK3588_BIFURCATION_LANE_0_1;
-
- if (mode & (BIT(2) | BIT(3)))
- reg |= RK3588_BIFURCATION_LANE_2_3;
+ mode &= ~RK3588_LANE_AGGREGATION;
+ if (priv->lanes[i] == 3)
+ mode |= RK3588_BIFURCATION_LANE_0_1;
+ if (priv->lanes[i] == 4)
+ mode |= RK3588_BIFURCATION_LANE_2_3;
}
- regmap_write(priv->phy_grf, RK3588_PCIE3PHY_GRF_CMN_CON0, (0x7<<16) | reg);
+ reg = mode;
+ regmap_write(priv->phy_grf, RK3588_PCIE3PHY_GRF_CMN_CON0,
+ RK3588_PCIE30_PHY_MODE_EN | reg);
/* Set pcie1ln_sel in PHP_GRF_PCIESEL_CON */
if (!IS_ERR(priv->pipe_grf)) {
- reg = (mode & (BIT(6) | BIT(7))) >> 6;
+ reg = mode & (RK3588_BIFURCATION_LANE_0_1 | RK3588_BIFURCATION_LANE_2_3);
if (reg)
regmap_write(priv->pipe_grf, PHP_GRF_PCIESEL_CON,
- (reg << 16) | reg);
+ RK3588_PCIE1LN_SEL_EN | reg);
}
reset_control_deassert(priv->p30phy);
struct delayed_work chg_det_work;
struct notifier_block psy_nb;
struct power_supply *psy;
- struct power_supply *charger;
#endif
};
static bool tusb1210_get_online(struct tusb1210 *tusb)
{
+ struct power_supply *charger = NULL;
union power_supply_propval val;
- int i;
+ bool online = false;
+ int i, ret;
- for (i = 0; i < ARRAY_SIZE(tusb1210_chargers) && !tusb->charger; i++)
- tusb->charger = power_supply_get_by_name(tusb1210_chargers[i]);
+ for (i = 0; i < ARRAY_SIZE(tusb1210_chargers) && !charger; i++)
+ charger = power_supply_get_by_name(tusb1210_chargers[i]);
- if (!tusb->charger)
+ if (!charger)
return false;
- if (power_supply_get_property(tusb->charger, POWER_SUPPLY_PROP_ONLINE, &val))
- return false;
+ ret = power_supply_get_property(charger, POWER_SUPPLY_PROP_ONLINE, &val);
+ if (ret == 0)
+ online = val.intval;
+
+ power_supply_put(charger);
- return val.intval;
+ return online;
}
static void tusb1210_chg_det_work(struct work_struct *work)
cancel_delayed_work_sync(&tusb->chg_det_work);
power_supply_unregister(tusb->psy);
}
-
- if (tusb->charger)
- power_supply_put(tusb->charger);
}
#else
static void tusb1210_probe_charger_detect(struct tusb1210 *tusb) { }
# SPDX-License-Identifier: GPL-2.0-only
# Aspeed pinctrl support
-ccflags-y += $(call cc-option,-Woverride-init)
+ccflags-y += -Woverride-init
obj-$(CONFIG_PINCTRL_ASPEED) += pinctrl-aspeed.o pinmux-aspeed.o
obj-$(CONFIG_PINCTRL_ASPEED_G4) += pinctrl-aspeed-g4.o
obj-$(CONFIG_PINCTRL_ASPEED_G5) += pinctrl-aspeed-g5.o
}
ret = devm_request_irq(&pdev->dev, gpio_dev->irq, amd_gpio_irq_handler,
- IRQF_SHARED | IRQF_ONESHOT, KBUILD_MODNAME, gpio_dev);
+ IRQF_SHARED | IRQF_COND_ONESHOT, KBUILD_MODNAME, gpio_dev);
if (ret)
goto out2;
if (!ec_dev)
return -ENOMEM;
- ret = devm_serdev_device_open(dev, serdev);
- if (ret) {
- dev_err(dev, "Unable to open UART device");
- return ret;
- }
-
serdev_device_set_drvdata(serdev, ec_dev);
init_waitqueue_head(&ec_uart->response.wait_queue);
return ret;
}
- ret = serdev_device_set_baudrate(serdev, ec_uart->baudrate);
- if (ret < 0) {
- dev_err(dev, "Failed to set up host baud rate (%d)", ret);
- return ret;
- }
-
- serdev_device_set_flow_control(serdev, ec_uart->flowcontrol);
-
/* Initialize ec_dev for cros_ec */
ec_dev->phys_name = dev_name(dev);
ec_dev->dev = dev;
serdev_device_set_client_ops(serdev, &cros_ec_uart_client_ops);
+ ret = devm_serdev_device_open(dev, serdev);
+ if (ret) {
+ dev_err(dev, "Unable to open UART device");
+ return ret;
+ }
+
+ ret = serdev_device_set_baudrate(serdev, ec_uart->baudrate);
+ if (ret < 0) {
+ dev_err(dev, "Failed to set up host baud rate (%d)", ret);
+ return ret;
+ }
+
+ serdev_device_set_flow_control(serdev, ec_uart->flowcontrol);
+
return cros_ec_register(ec_dev);
}
},
.driver_data = &quirk_acer_predator_v4,
},
+ {
+ .callback = dmi_matched,
+ .ident = "Acer Predator PH18-71",
+ .matches = {
+ DMI_MATCH(DMI_SYS_VENDOR, "Acer"),
+ DMI_MATCH(DMI_PRODUCT_NAME, "Predator PH18-71"),
+ },
+ .driver_data = &quirk_acer_predator_v4,
+ },
{
.callback = set_force_caps,
.ident = "Acer Aspire Switch 10E SW3-016",
DMI_MATCH(DMI_BIOS_VERSION, "03.03"),
}
},
+ {
+ .ident = "Framework Laptop 13 (Phoenix)",
+ .driver_data = &quirk_spurious_8042,
+ .matches = {
+ DMI_MATCH(DMI_SYS_VENDOR, "Framework"),
+ DMI_MATCH(DMI_PRODUCT_NAME, "Laptop 13 (AMD Ryzen 7040Series)"),
+ DMI_MATCH(DMI_BIOS_VERSION, "03.05"),
+ }
+ },
{}
};
obj-$(CONFIG_AMD_PMF) += amd-pmf.o
amd-pmf-objs := core.o acpi.o sps.o \
auto-mode.o cnqf.o \
- tee-if.o spc.o
+ tee-if.o spc.o pmf-quirks.o
if (err)
return err;
- pdev->supported_func = output.supported_functions;
+ /* only set if not already set by a quirk */
+ if (!pdev->supported_func)
+ pdev->supported_func = output.supported_functions;
+
dev_dbg(pdev->dev, "supported functions:0x%x notifications:0x%x version:%u\n",
output.supported_functions, output.notification_mask, output.version);
status = acpi_walk_resources(ahandle, METHOD_NAME__CRS, apmf_walk_resources, pmf_dev);
if (ACPI_FAILURE(status)) {
- dev_err(pmf_dev->dev, "acpi_walk_resources failed :%d\n", status);
+ dev_dbg(pmf_dev->dev, "acpi_walk_resources failed :%d\n", status);
return -EINVAL;
}
mutex_init(&dev->lock);
mutex_init(&dev->update_mutex);
+ amd_pmf_quirks_init(dev);
apmf_acpi_init(dev);
platform_set_drvdata(pdev, dev);
amd_pmf_dbgfs_register(dev);
--- /dev/null
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * AMD Platform Management Framework Driver Quirks
+ *
+ * Copyright (c) 2024, Advanced Micro Devices, Inc.
+ * All Rights Reserved.
+ *
+ * Author: Mario Limonciello <mario.limonciello@amd.com>
+ */
+
+#include <linux/dmi.h>
+
+#include "pmf.h"
+
+struct quirk_entry {
+ u32 supported_func;
+};
+
+static struct quirk_entry quirk_no_sps_bug = {
+ .supported_func = 0x4003,
+};
+
+static const struct dmi_system_id fwbug_list[] = {
+ {
+ .ident = "ROG Zephyrus G14",
+ .matches = {
+ DMI_MATCH(DMI_SYS_VENDOR, "ASUSTeK COMPUTER INC."),
+ DMI_MATCH(DMI_PRODUCT_NAME, "GA403UV"),
+ },
+ .driver_data = &quirk_no_sps_bug,
+ },
+ {}
+};
+
+void amd_pmf_quirks_init(struct amd_pmf_dev *dev)
+{
+ const struct dmi_system_id *dmi_id;
+ struct quirk_entry *quirks;
+
+ dmi_id = dmi_first_match(fwbug_list);
+ if (!dmi_id)
+ return;
+
+ quirks = dmi_id->driver_data;
+ if (quirks->supported_func) {
+ dev->supported_func = quirks->supported_func;
+ pr_info("Using supported funcs quirk to avoid %s platform firmware bug\n",
+ dmi_id->ident);
+ }
+}
+
void amd_pmf_populate_ta_inputs(struct amd_pmf_dev *dev, struct ta_pmf_enact_table *in);
void amd_pmf_dump_ta_inputs(struct amd_pmf_dev *dev, struct ta_pmf_enact_table *in);
+/* Quirk infrastructure */
+void amd_pmf_quirks_init(struct amd_pmf_dev *dev);
+
#endif /* PMF_H */
{"INTC1076", 0},
{"INTC1077", 0},
{"INTC1078", 0},
+ {"INTC107B", 0},
+ {"INTC10CB", 0},
{"", 0},
};
MODULE_DEVICE_TABLE(acpi, intel_hid_ids);
struct platform_device *device = context;
struct intel_hid_priv *priv = dev_get_drvdata(&device->dev);
unsigned long long ev_index;
+ struct key_entry *ke;
int err;
/*
if (event == 0xc0 || !priv->array)
return;
- if (!sparse_keymap_entry_from_scancode(priv->array, event)) {
+ ke = sparse_keymap_entry_from_scancode(priv->array, event);
+ if (!ke) {
dev_info(&device->dev, "unknown event 0x%x\n", event);
return;
}
+ if (ke->type == KE_IGNORE)
+ return;
+
wakeup:
pm_wakeup_hard_event(&device->dev);
};
static const struct x86_cpu_id hpm_cpu_ids[] = {
+ X86_MATCH_INTEL_FAM6_MODEL(GRANITERAPIDS_D, NULL),
X86_MATCH_INTEL_FAM6_MODEL(GRANITERAPIDS_X, NULL),
X86_MATCH_INTEL_FAM6_MODEL(ATOM_CRESTMONT_X, NULL),
{}
#include "uncore-frequency-common.h"
#define UNCORE_MAJOR_VERSION 0
-#define UNCORE_MINOR_VERSION 1
+#define UNCORE_MINOR_VERSION 2
#define UNCORE_HEADER_INDEX 0
#define UNCORE_FABRIC_CLUSTER_OFFSET 8
goto remove_clusters;
}
- if (TPMI_MINOR_VERSION(pd_info->ufs_header_ver) != UNCORE_MINOR_VERSION)
+ if (TPMI_MINOR_VERSION(pd_info->ufs_header_ver) > UNCORE_MINOR_VERSION)
dev_info(&auxdev->dev, "Uncore: Ignore: Unsupported minor version:%lx\n",
TPMI_MINOR_VERSION(pd_info->ufs_header_ver));
priv->switches_dev->id.bustype = BUS_HOST;
if (priv->has_switches) {
- detect_tablet_mode(&device->dev);
-
ret = input_register_device(priv->switches_dev);
if (ret)
return ret;
static bool intel_vbtn_has_switches(acpi_handle handle, bool dual_accel)
{
- unsigned long long vgbs;
- acpi_status status;
-
/* See dual_accel_detect.h for more info */
if (dual_accel)
return false;
if (!dmi_check_system(dmi_switches_allow_list))
return false;
- status = acpi_evaluate_integer(handle, "VGBS", NULL, &vgbs);
- return ACPI_SUCCESS(status);
+ return acpi_has_method(handle, "VGBS");
}
static int intel_vbtn_probe(struct platform_device *device)
if (ACPI_FAILURE(status))
dev_err(&device->dev, "Error VBDL failed with ACPI status %d\n", status);
}
+ // Check switches after buttons since VBDL may have side effects.
+ if (has_switches)
+ detect_tablet_mode(&device->dev);
device_init_wakeup(&device->dev, true);
/*
default:
year = 2019;
}
- pr_info("product: %s year: %d\n", product, year);
+ pr_info("product: %s year: %d\n", product ?: "unknown", year);
if (year >= 2019)
battery_limit_use_wmbb = 1;
{ KE_KEY, 0xb32, { KEY_NEXTSONG } },
{ KE_KEY, 0xb33, { KEY_PLAYPAUSE } },
{ KE_KEY, 0xb5a, { KEY_MEDIA } },
+ { KE_IGNORE, 0x0e00, { KEY_RESERVED } }, /* Wake from sleep */
{ KE_IGNORE, 0x1430, { KEY_RESERVED } }, /* Wake from sleep */
{ KE_IGNORE, 0x1501, { KEY_RESERVED } }, /* Output changed */
{ KE_IGNORE, 0x1502, { KEY_RESERVED } }, /* HDMI plugged/unplugged */
(dev->kbd_mode == SCI_KBD_MODE_ON) ?
LED_FULL : LED_OFF);
break;
+ case 0x8e: /* Power button pressed */
+ break;
case 0x85: /* Unknown */
case 0x8d: /* Unknown */
- case 0x8e: /* Unknown */
case 0x94: /* Unknown */
case 0x95: /* Unknown */
default:
if (IS_ERR(pwm))
return pwm;
- if (args->args_count > 1)
+ if (args->args_count > 0)
pwm->args.period = args->args[0];
pwm->args.polarity = PWM_POLARITY_NORMAL;
dwc->clk_ns = 10;
chip->ops = &dwc_pwm_ops;
- dev_set_drvdata(dev, chip);
return chip;
}
EXPORT_SYMBOL_GPL(dwc_pwm_alloc);
.size = 0x1000,
};
-static int dwc_pwm_init_one(struct device *dev, void __iomem *base, unsigned int offset)
+static int dwc_pwm_init_one(struct device *dev, struct dwc_pwm_drvdata *ddata, unsigned int idx)
{
struct pwm_chip *chip;
struct dwc_pwm *dwc;
+ int ret;
chip = dwc_pwm_alloc(dev);
if (IS_ERR(chip))
return PTR_ERR(chip);
dwc = to_dwc_pwm(chip);
- dwc->base = base + offset;
+ dwc->base = ddata->io_base + (ddata->info->size * idx);
- return devm_pwmchip_add(dev, chip);
+ ret = devm_pwmchip_add(dev, chip);
+ if (ret)
+ return ret;
+
+ ddata->chips[idx] = chip;
+ return 0;
}
static int dwc_pwm_probe(struct pci_dev *pci, const struct pci_device_id *id)
{
const struct dwc_pwm_info *info;
struct device *dev = &pci->dev;
- int i, ret;
+ struct dwc_pwm_drvdata *ddata;
+ unsigned int idx;
+ int ret;
ret = pcim_enable_device(pci);
if (ret)
return dev_err_probe(dev, ret, "Failed to iomap PCI BAR\n");
info = (const struct dwc_pwm_info *)id->driver_data;
-
- for (i = 0; i < info->nr; i++) {
- /*
- * No need to check for pcim_iomap_table() failure,
- * pcim_iomap_regions() already does it for us.
- */
- ret = dwc_pwm_init_one(dev, pcim_iomap_table(pci)[0], i * info->size);
+ ddata = devm_kzalloc(dev, struct_size(ddata, chips, info->nr), GFP_KERNEL);
+ if (!ddata)
+ return -ENOMEM;
+
+ /*
+ * No need to check for pcim_iomap_table() failure,
+ * pcim_iomap_regions() already does it for us.
+ */
+ ddata->io_base = pcim_iomap_table(pci)[0];
+ ddata->info = info;
+
+ for (idx = 0; idx < ddata->info->nr; idx++) {
+ ret = dwc_pwm_init_one(dev, ddata, idx);
if (ret)
return ret;
}
+ dev_set_drvdata(dev, ddata);
+
pm_runtime_put(dev);
pm_runtime_allow(dev);
static int dwc_pwm_suspend(struct device *dev)
{
- struct pwm_chip *chip = dev_get_drvdata(dev);
- struct dwc_pwm *dwc = to_dwc_pwm(chip);
- int i;
-
- for (i = 0; i < DWC_TIMERS_TOTAL; i++) {
- if (chip->pwms[i].state.enabled) {
- dev_err(dev, "PWM %u in use by consumer (%s)\n",
- i, chip->pwms[i].label);
- return -EBUSY;
+ struct dwc_pwm_drvdata *ddata = dev_get_drvdata(dev);
+ unsigned int idx;
+
+ for (idx = 0; idx < ddata->info->nr; idx++) {
+ struct pwm_chip *chip = ddata->chips[idx];
+ struct dwc_pwm *dwc = to_dwc_pwm(chip);
+ unsigned int i;
+
+ for (i = 0; i < DWC_TIMERS_TOTAL; i++) {
+ if (chip->pwms[i].state.enabled) {
+ dev_err(dev, "PWM %u in use by consumer (%s)\n",
+ i, chip->pwms[i].label);
+ return -EBUSY;
+ }
+ dwc->ctx[i].cnt = dwc_pwm_readl(dwc, DWC_TIM_LD_CNT(i));
+ dwc->ctx[i].cnt2 = dwc_pwm_readl(dwc, DWC_TIM_LD_CNT2(i));
+ dwc->ctx[i].ctrl = dwc_pwm_readl(dwc, DWC_TIM_CTRL(i));
}
- dwc->ctx[i].cnt = dwc_pwm_readl(dwc, DWC_TIM_LD_CNT(i));
- dwc->ctx[i].cnt2 = dwc_pwm_readl(dwc, DWC_TIM_LD_CNT2(i));
- dwc->ctx[i].ctrl = dwc_pwm_readl(dwc, DWC_TIM_CTRL(i));
}
return 0;
static int dwc_pwm_resume(struct device *dev)
{
- struct pwm_chip *chip = dev_get_drvdata(dev);
- struct dwc_pwm *dwc = to_dwc_pwm(chip);
- int i;
-
- for (i = 0; i < DWC_TIMERS_TOTAL; i++) {
- dwc_pwm_writel(dwc, dwc->ctx[i].cnt, DWC_TIM_LD_CNT(i));
- dwc_pwm_writel(dwc, dwc->ctx[i].cnt2, DWC_TIM_LD_CNT2(i));
- dwc_pwm_writel(dwc, dwc->ctx[i].ctrl, DWC_TIM_CTRL(i));
+ struct dwc_pwm_drvdata *ddata = dev_get_drvdata(dev);
+ unsigned int idx;
+
+ for (idx = 0; idx < ddata->info->nr; idx++) {
+ struct pwm_chip *chip = ddata->chips[idx];
+ struct dwc_pwm *dwc = to_dwc_pwm(chip);
+ unsigned int i;
+
+ for (i = 0; i < DWC_TIMERS_TOTAL; i++) {
+ dwc_pwm_writel(dwc, dwc->ctx[i].cnt, DWC_TIM_LD_CNT(i));
+ dwc_pwm_writel(dwc, dwc->ctx[i].cnt2, DWC_TIM_LD_CNT2(i));
+ dwc_pwm_writel(dwc, dwc->ctx[i].ctrl, DWC_TIM_CTRL(i));
+ }
}
return 0;
unsigned int size;
};
+struct dwc_pwm_drvdata {
+ const struct dwc_pwm_info *info;
+ void __iomem *io_base;
+ struct pwm_chip *chips[];
+};
+
struct dwc_pwm_ctx {
u32 cnt;
u32 cnt2;
return PTR_ERR(imgchip->sys_clk);
}
- imgchip->pwm_clk = devm_clk_get(&pdev->dev, "imgchip");
+ imgchip->pwm_clk = devm_clk_get(&pdev->dev, "pwm");
if (IS_ERR(imgchip->pwm_clk)) {
- dev_err(&pdev->dev, "failed to get imgchip clock\n");
+ dev_err(&pdev->dev, "failed to get pwm clock\n");
return PTR_ERR(imgchip->pwm_clk);
}
/* Total length of record including headers and list of descriptor entries. */
static size_t max_rec_len;
+#define FMPM_MAX_REC_LEN (sizeof(struct fru_rec) + (sizeof(struct cper_fru_poison_desc) * 255))
+
/* Total number of SPA entries across all FRUs. */
static unsigned int spa_nr_entries;
struct cper_section_descriptor *sec_desc = &rec->sec_desc;
struct cper_record_header *hdr = &rec->hdr;
+ /*
+ * This is a saved record created with fewer max_nr_entries.
+ * Update the record lengths and keep everything else as-is.
+ */
+ if (hdr->record_length && hdr->record_length < max_rec_len) {
+ pr_debug("Growing record 0x%016llx from %u to %zu bytes\n",
+ hdr->record_id, hdr->record_length, max_rec_len);
+ goto update_lengths;
+ }
+
memcpy(hdr->signature, CPER_SIG_RECORD, CPER_SIG_SIZE);
hdr->revision = CPER_RECORD_REV;
hdr->signature_end = CPER_SIG_END;
hdr->error_severity = CPER_SEV_RECOVERABLE;
hdr->validation_bits = 0;
- hdr->record_length = max_rec_len;
hdr->creator_id = CPER_CREATOR_FMP;
hdr->notification_type = CPER_NOTIFY_MCE;
hdr->record_id = cper_next_record_id();
hdr->flags = CPER_HW_ERROR_FLAGS_PREVERR;
sec_desc->section_offset = sizeof(struct cper_record_header);
- sec_desc->section_length = max_rec_len - sizeof(struct cper_record_header);
sec_desc->revision = CPER_SEC_REV;
sec_desc->validation_bits = 0;
sec_desc->flags = CPER_SEC_PRIMARY;
sec_desc->section_type = CPER_SECTION_TYPE_FMP;
sec_desc->section_severity = CPER_SEV_RECOVERABLE;
+
+update_lengths:
+ hdr->record_length = max_rec_len;
+ sec_desc->section_length = max_rec_len - sizeof(struct cper_record_header);
}
static int save_new_records(void)
int ret = 0;
for_each_fru(i, rec) {
- if (rec->hdr.record_length)
+ /* No need to update saved records that match the current record size. */
+ if (rec->hdr.record_length == max_rec_len)
continue;
+ if (!rec->hdr.record_length)
+ set_bit(i, new_records);
+
set_rec_fields(rec);
ret = update_record_on_storage(rec);
if (ret)
goto out_clear;
-
- set_bit(i, new_records);
}
return ret;
int ret, pos;
ssize_t len;
- /*
- * Assume saved records match current max size.
- *
- * However, this may not be true depending on module parameters.
- */
- old = kmalloc(max_rec_len, GFP_KERNEL);
+ old = kmalloc(FMPM_MAX_REC_LEN, GFP_KERNEL);
if (!old) {
ret = -ENOMEM;
goto out;
* Make sure to clear temporary buffer between reads to avoid
* leftover data from records of various sizes.
*/
- memset(old, 0, max_rec_len);
+ memset(old, 0, FMPM_MAX_REC_LEN);
- len = erst_read_record(record_id, &old->hdr, max_rec_len,
+ len = erst_read_record(record_id, &old->hdr, FMPM_MAX_REC_LEN,
sizeof(struct fru_rec), &CPER_CREATOR_FMP);
if (len < 0)
continue;
- if (len > max_rec_len) {
- pr_debug("Found record larger than max_rec_len\n");
+ new = get_valid_record(old);
+ if (!new) {
+ erst_clear(record_id);
continue;
}
- new = get_valid_record(old);
- if (!new)
- erst_clear(record_id);
+ if (len > max_rec_len) {
+ unsigned int saved_nr_entries;
+
+ saved_nr_entries = len - sizeof(struct fru_rec);
+ saved_nr_entries /= sizeof(struct cper_fru_poison_desc);
+
+ pr_warn("Saved record found with %u entries.\n", saved_nr_entries);
+ pr_warn("Please increase max_nr_entries to %u.\n", saved_nr_entries);
+
+ ret = -EINVAL;
+ goto out_end;
+ }
/* Restore the record */
memcpy(new, old, len);
#include <linux/debugfs.h>
+#if IS_ENABLED(CONFIG_DEBUG_FS)
struct dentry *ras_get_debugfs_root(void);
+#else
+static inline struct dentry *ras_get_debugfs_root(void) { return NULL; }
+#endif /* DEBUG_FS */
#endif /* __RAS_DEBUGFS_H__ */
};
MODULE_DEVICE_TABLE(i2c, tps65132_id);
+static const struct of_device_id __maybe_unused tps65132_of_match[] = {
+ { .compatible = "ti,tps65132" },
+ {},
+};
+MODULE_DEVICE_TABLE(of, tps65132_of_match);
+
static struct i2c_driver tps65132_i2c_driver = {
.driver = {
.name = "tps65132",
.probe_type = PROBE_PREFER_ASYNCHRONOUS,
+ .of_match_table = of_match_ptr(tps65132_of_match),
},
.probe = tps65132_probe,
.id_table = tps65132_id,
spin_lock_irq(cdev->ccwlock);
ret = ccw_device_online(cdev);
- spin_unlock_irq(cdev->ccwlock);
- if (ret == 0)
- wait_event(cdev->private->wait_q, dev_fsm_final_state(cdev));
- else {
+ if (ret) {
+ spin_unlock_irq(cdev->ccwlock);
CIO_MSG_EVENT(0, "ccw_device_online returned %d, "
"device 0.%x.%04x\n",
ret, cdev->private->dev_id.ssid,
put_device(&cdev->dev);
return ret;
}
- spin_lock_irq(cdev->ccwlock);
+ /* Wait until a final state is reached */
+ while (!dev_fsm_final_state(cdev)) {
+ spin_unlock_irq(cdev->ccwlock);
+ wait_event(cdev->private->wait_q, dev_fsm_final_state(cdev));
+ spin_lock_irq(cdev->ccwlock);
+ }
/* Check if online processing was successful */
if ((cdev->private->state != DEV_STATE_ONLINE) &&
(cdev->private->state != DEV_STATE_W4SENSE)) {
ccw_device_done(cdev, DEV_STATE_ONLINE);
/* Deliver fake irb to device driver, if needed. */
if (cdev->private->flags.fake_irb) {
+ CIO_MSG_EVENT(2, "fakeirb: deliver device 0.%x.%04x intparm %lx type=%d\n",
+ cdev->private->dev_id.ssid,
+ cdev->private->dev_id.devno,
+ cdev->private->intparm,
+ cdev->private->flags.fake_irb);
create_fake_irb(&cdev->private->dma_area->irb,
cdev->private->flags.fake_irb);
cdev->private->flags.fake_irb = 0;
if (!cdev->private->flags.fake_irb) {
cdev->private->flags.fake_irb = FAKE_CMD_IRB;
cdev->private->intparm = intparm;
+ CIO_MSG_EVENT(2, "fakeirb: queue device 0.%x.%04x intparm %lx type=%d\n",
+ cdev->private->dev_id.ssid,
+ cdev->private->dev_id.devno, intparm,
+ cdev->private->flags.fake_irb);
return 0;
} else
/* There's already a fake I/O around. */
if (!cdev->private->flags.fake_irb) {
cdev->private->flags.fake_irb = FAKE_TM_IRB;
cdev->private->intparm = intparm;
+ CIO_MSG_EVENT(2, "fakeirb: queue device 0.%x.%04x intparm %lx type=%d\n",
+ cdev->private->dev_id.ssid,
+ cdev->private->dev_id.devno, intparm,
+ cdev->private->flags.fake_irb);
return 0;
} else
/* There's already a fake I/O around. */
lgr_info_log();
}
-static void qdio_establish_handle_irq(struct qdio_irq *irq_ptr, int cstat,
- int dstat)
+static int qdio_establish_handle_irq(struct qdio_irq *irq_ptr, int cstat,
+ int dstat, int dcc)
{
DBF_DEV_EVENT(DBF_INFO, irq_ptr, "qest irq");
goto error;
if (dstat & ~(DEV_STAT_DEV_END | DEV_STAT_CHN_END))
goto error;
+ if (dcc == 1)
+ return -EAGAIN;
if (!(dstat & DEV_STAT_DEV_END))
goto error;
qdio_set_state(irq_ptr, QDIO_IRQ_STATE_ESTABLISHED);
- return;
+ return 0;
error:
DBF_ERROR("%4x EQ:error", irq_ptr->schid.sch_no);
DBF_ERROR("ds: %2x cs:%2x", dstat, cstat);
qdio_set_state(irq_ptr, QDIO_IRQ_STATE_ERR);
+ return -EIO;
}
/* qdio interrupt handler */
{
struct qdio_irq *irq_ptr = cdev->private->qdio_data;
struct subchannel_id schid;
- int cstat, dstat;
+ int cstat, dstat, rc, dcc;
if (!intparm || !irq_ptr) {
ccw_device_get_schid(cdev, &schid);
qdio_irq_check_sense(irq_ptr, irb);
cstat = irb->scsw.cmd.cstat;
dstat = irb->scsw.cmd.dstat;
+ dcc = scsw_cmd_is_valid_cc(&irb->scsw) ? irb->scsw.cmd.cc : 0;
+ rc = 0;
switch (irq_ptr->state) {
case QDIO_IRQ_STATE_INACTIVE:
- qdio_establish_handle_irq(irq_ptr, cstat, dstat);
+ rc = qdio_establish_handle_irq(irq_ptr, cstat, dstat, dcc);
break;
case QDIO_IRQ_STATE_CLEANUP:
qdio_set_state(irq_ptr, QDIO_IRQ_STATE_INACTIVE);
if (cstat || dstat)
qdio_handle_activate_check(irq_ptr, intparm, cstat,
dstat);
+ else if (dcc == 1)
+ rc = -EAGAIN;
break;
case QDIO_IRQ_STATE_STOPPED:
break;
default:
WARN_ON_ONCE(1);
}
+
+ if (rc == -EAGAIN) {
+ DBF_DEV_EVENT(DBF_INFO, irq_ptr, "qint retry");
+ rc = ccw_device_start(cdev, irq_ptr->ccw, intparm, 0, 0);
+ if (!rc)
+ return;
+ DBF_ERROR("%4x RETRY ERR", irq_ptr->schid.sch_no);
+ DBF_ERROR("rc:%4x", rc);
+ qdio_set_state(irq_ptr, QDIO_IRQ_STATE_ERR);
+ }
+
wake_up(&cdev->private->wait_q);
}
static void ism_free_dmb(struct ism_dev *ism, struct ism_dmb *dmb)
{
clear_bit(dmb->sba_idx, ism->sba_bitmap);
- dma_free_coherent(&ism->pdev->dev, dmb->dmb_len,
- dmb->cpu_addr, dmb->dma_addr);
+ dma_unmap_page(&ism->pdev->dev, dmb->dma_addr, dmb->dmb_len,
+ DMA_FROM_DEVICE);
+ folio_put(virt_to_folio(dmb->cpu_addr));
}
static int ism_alloc_dmb(struct ism_dev *ism, struct ism_dmb *dmb)
{
+ struct folio *folio;
unsigned long bit;
+ int rc;
if (PAGE_ALIGN(dmb->dmb_len) > dma_get_max_seg_size(&ism->pdev->dev))
return -EINVAL;
test_and_set_bit(dmb->sba_idx, ism->sba_bitmap))
return -EINVAL;
- dmb->cpu_addr = dma_alloc_coherent(&ism->pdev->dev, dmb->dmb_len,
- &dmb->dma_addr,
- GFP_KERNEL | __GFP_NOWARN |
- __GFP_NOMEMALLOC | __GFP_NORETRY);
- if (!dmb->cpu_addr)
- clear_bit(dmb->sba_idx, ism->sba_bitmap);
+ folio = folio_alloc(GFP_KERNEL | __GFP_NOWARN | __GFP_NOMEMALLOC |
+ __GFP_NORETRY, get_order(dmb->dmb_len));
- return dmb->cpu_addr ? 0 : -ENOMEM;
+ if (!folio) {
+ rc = -ENOMEM;
+ goto out_bit;
+ }
+
+ dmb->cpu_addr = folio_address(folio);
+ dmb->dma_addr = dma_map_page(&ism->pdev->dev,
+ virt_to_page(dmb->cpu_addr), 0,
+ dmb->dmb_len, DMA_FROM_DEVICE);
+ if (dma_mapping_error(&ism->pdev->dev, dmb->dma_addr)) {
+ rc = -ENOMEM;
+ goto out_free;
+ }
+
+ return 0;
+
+out_free:
+ kfree(dmb->cpu_addr);
+out_bit:
+ clear_bit(dmb->sba_idx, ism->sba_bitmap);
+ return rc;
}
int ism_register_dmb(struct ism_dev *ism, struct ism_dmb *dmb,
}
}
+/**
+ * qeth_irq() - qeth interrupt handler
+ * @cdev: ccw device
+ * @intparm: expect pointer to iob
+ * @irb: Interruption Response Block
+ *
+ * In the good path:
+ * corresponding qeth channel is locked with last used iob as active_cmd.
+ * But this function is also called for error interrupts.
+ *
+ * Caller ensures that:
+ * Interrupts are disabled; ccw device lock is held;
+ *
+ */
static void qeth_irq(struct ccw_device *cdev, unsigned long intparm,
struct irb *irb)
{
iob = (struct qeth_cmd_buffer *) (addr_t)intparm;
}
- qeth_unlock_channel(card, channel);
-
rc = qeth_check_irb_error(card, cdev, irb);
if (rc) {
/* IO was terminated, free its resources. */
+ qeth_unlock_channel(card, channel);
if (iob)
qeth_cancel_cmd(iob, rc);
return;
rc = qeth_get_problem(card, cdev, irb);
if (rc) {
card->read_or_write_problem = 1;
+ qeth_unlock_channel(card, channel);
if (iob)
qeth_cancel_cmd(iob, rc);
qeth_clear_ipacmd_list(card);
}
}
+ if (scsw_cmd_is_valid_cc(&irb->scsw) && irb->scsw.cmd.cc == 1 && iob) {
+ /* channel command hasn't started: retry.
+ * active_cmd is still set to last iob
+ */
+ QETH_CARD_TEXT(card, 2, "irqcc1");
+ rc = ccw_device_start_timeout(cdev, __ccw_from_cmd(iob),
+ (addr_t)iob, 0, 0, iob->timeout);
+ if (rc) {
+ QETH_DBF_MESSAGE(2,
+ "ccw retry on %x failed, rc = %i\n",
+ CARD_DEVID(card), rc);
+ QETH_CARD_TEXT_(card, 2, " err%d", rc);
+ qeth_unlock_channel(card, channel);
+ qeth_cancel_cmd(iob, rc);
+ }
+ return;
+ }
+
+ qeth_unlock_channel(card, channel);
+
if (iob) {
/* sanity check: */
if (irb->scsw.cmd.count > iob->length) {
BNX2FC_TGT_DBG(tgt, "Freeing up session resources\n");
- spin_lock_bh(&tgt->cq_lock);
ctx_base_ptr = tgt->ctx_base;
tgt->ctx_base = NULL;
tgt->sq, tgt->sq_dma);
tgt->sq = NULL;
}
- spin_unlock_bh(&tgt->cq_lock);
if (ctx_base_ptr)
iounmap(ctx_base_ptr);
#define MAX_RETRIES 1
-static struct class * ch_sysfs_class;
+static const struct class ch_sysfs_class = {
+ .name = "scsi_changer",
+};
typedef struct {
struct kref ref;
mutex_init(&ch->lock);
kref_init(&ch->ref);
ch->device = sd;
- class_dev = device_create(ch_sysfs_class, dev,
+ class_dev = device_create(&ch_sysfs_class, dev,
MKDEV(SCSI_CHANGER_MAJOR, ch->minor), ch,
"s%s", ch->name);
if (IS_ERR(class_dev)) {
return 0;
destroy_dev:
- device_destroy(ch_sysfs_class, MKDEV(SCSI_CHANGER_MAJOR, ch->minor));
+ device_destroy(&ch_sysfs_class, MKDEV(SCSI_CHANGER_MAJOR, ch->minor));
put_device:
scsi_device_put(sd);
remove_idr:
dev_set_drvdata(dev, NULL);
spin_unlock(&ch_index_lock);
- device_destroy(ch_sysfs_class, MKDEV(SCSI_CHANGER_MAJOR,ch->minor));
+ device_destroy(&ch_sysfs_class, MKDEV(SCSI_CHANGER_MAJOR, ch->minor));
scsi_device_put(ch->device);
kref_put(&ch->ref, ch_destroy);
return 0;
int rc;
printk(KERN_INFO "SCSI Media Changer driver v" VERSION " \n");
- ch_sysfs_class = class_create("scsi_changer");
- if (IS_ERR(ch_sysfs_class)) {
- rc = PTR_ERR(ch_sysfs_class);
+ rc = class_register(&ch_sysfs_class);
+ if (rc)
return rc;
- }
rc = register_chrdev(SCSI_CHANGER_MAJOR,"ch",&changer_fops);
if (rc < 0) {
printk("Unable to get major %d for SCSI-Changer\n",
fail2:
unregister_chrdev(SCSI_CHANGER_MAJOR, "ch");
fail1:
- class_destroy(ch_sysfs_class);
+ class_unregister(&ch_sysfs_class);
return rc;
}
{
scsi_unregister_driver(&ch_template.gendrv);
unregister_chrdev(SCSI_CHANGER_MAJOR, "ch");
- class_destroy(ch_sysfs_class);
+ class_unregister(&ch_sysfs_class);
idr_destroy(&ch_index_idr);
}
MODULE_AUTHOR("Matthew R. Ochs <mrochs@linux.vnet.ibm.com>");
MODULE_LICENSE("GPL");
-static struct class *cxlflash_class;
+static char *cxlflash_devnode(const struct device *dev, umode_t *mode);
+static const struct class cxlflash_class = {
+ .name = "cxlflash",
+ .devnode = cxlflash_devnode,
+};
+
static u32 cxlflash_major;
static DECLARE_BITMAP(cxlflash_minor, CXLFLASH_MAX_ADAPTERS);
goto err1;
}
- char_dev = device_create(cxlflash_class, NULL, devno,
+ char_dev = device_create(&cxlflash_class, NULL, devno,
NULL, "cxlflash%d", minor);
if (IS_ERR(char_dev)) {
rc = PTR_ERR(char_dev);
cxlflash_major = MAJOR(devno);
- cxlflash_class = class_create("cxlflash");
- if (IS_ERR(cxlflash_class)) {
- rc = PTR_ERR(cxlflash_class);
+ rc = class_register(&cxlflash_class);
+ if (rc) {
pr_err("%s: class_create failed rc=%d\n", __func__, rc);
goto err;
}
- cxlflash_class->devnode = cxlflash_devnode;
out:
pr_debug("%s: returning rc=%d\n", __func__, rc);
return rc;
{
dev_t devno = MKDEV(cxlflash_major, 0);
- class_destroy(cxlflash_class);
+ class_unregister(&cxlflash_class);
unregister_chrdev_region(devno, CXLFLASH_MAX_ADAPTERS);
}
if (dev_is_sata(device)) {
struct ata_link *link = &device->sata_dev.ap->link;
- rc = ata_wait_after_reset(link, HISI_SAS_WAIT_PHYUP_TIMEOUT,
+ rc = ata_wait_after_reset(link, jiffies + HISI_SAS_WAIT_PHYUP_TIMEOUT,
smp_ata_check_ready_type);
} else {
msleep(2000);
case SAS_PROTOCOL_SATA | SAS_PROTOCOL_STP:
if ((dw0 & CMPLT_HDR_RSPNS_XFRD_MSK) &&
(sipc_rx_err_type & RX_FIS_STATUS_ERR_MSK)) {
- ts->stat = SAS_PROTO_RESPONSE;
+ if (task->ata_task.use_ncq) {
+ struct domain_device *device = task->dev;
+ struct hisi_sas_device *sas_dev = device->lldd_dev;
+
+ sas_dev->dev_status = HISI_SAS_DEV_NCQ_ERR;
+ slot->abort = 1;
+ } else {
+ ts->stat = SAS_PROTO_RESPONSE;
+ }
} else if (dma_rx_err_type & RX_DATA_LEN_UNDERFLOW_MSK) {
ts->residual = trans_tx_fail_type;
ts->stat = SAS_DATA_UNDERRUN;
if (shost->shost_state == SHOST_CREATED) {
/*
- * Free the shost_dev device name here if scsi_host_alloc()
- * and scsi_host_put() have been called but neither
+ * Free the shost_dev device name and remove the proc host dir
+ * here if scsi_host_{alloc,put}() have been called but neither
* scsi_host_add() nor scsi_remove_host() has been called.
* This avoids that the memory allocated for the shost_dev
- * name is leaked.
+ * name as well as the proc dir structure are leaked.
*/
+ scsi_proc_hostdir_rm(shost->hostt);
kfree(dev_name(&shost->shost_dev));
}
static inline void *alloc_smp_req(int size)
{
- u8 *p = kzalloc(size, GFP_KERNEL);
+ u8 *p = kzalloc(ALIGN(size, ARCH_DMA_MINALIGN), GFP_KERNEL);
if (p)
p[0] = SMP_REQUEST;
return p;
/* ---------- Domain revalidation ---------- */
+static void sas_get_sas_addr_and_dev_type(struct smp_disc_resp *disc_resp,
+ u8 *sas_addr,
+ enum sas_device_type *type)
+{
+ memcpy(sas_addr, disc_resp->disc.attached_sas_addr, SAS_ADDR_SIZE);
+ *type = to_dev_type(&disc_resp->disc);
+ if (*type == SAS_PHY_UNUSED)
+ memset(sas_addr, 0, SAS_ADDR_SIZE);
+}
+
static int sas_get_phy_discover(struct domain_device *dev,
int phy_id, struct smp_disc_resp *disc_resp)
{
return -ENOMEM;
res = sas_get_phy_discover(dev, phy_id, disc_resp);
- if (res == 0) {
- memcpy(sas_addr, disc_resp->disc.attached_sas_addr,
- SAS_ADDR_SIZE);
- *type = to_dev_type(&disc_resp->disc);
- if (*type == 0)
- memset(sas_addr, 0, SAS_ADDR_SIZE);
- }
+ if (res == 0)
+ sas_get_sas_addr_and_dev_type(disc_resp, sas_addr, type);
kfree(disc_resp);
return res;
}
struct expander_device *ex = &dev->ex_dev;
struct ex_phy *phy = &ex->ex_phy[phy_id];
enum sas_device_type type = SAS_PHY_UNUSED;
+ struct smp_disc_resp *disc_resp;
u8 sas_addr[SAS_ADDR_SIZE];
char msg[80] = "";
int res;
SAS_ADDR(dev->sas_addr), phy_id, msg);
memset(sas_addr, 0, SAS_ADDR_SIZE);
- res = sas_get_phy_attached_dev(dev, phy_id, sas_addr, &type);
+ disc_resp = alloc_smp_resp(DISCOVER_RESP_SIZE);
+ if (!disc_resp)
+ return -ENOMEM;
+
+ res = sas_get_phy_discover(dev, phy_id, disc_resp);
switch (res) {
case SMP_RESP_NO_PHY:
phy->phy_state = PHY_NOT_PRESENT;
sas_unregister_devs_sas_addr(dev, phy_id, last);
- return res;
+ goto out_free_resp;
case SMP_RESP_PHY_VACANT:
phy->phy_state = PHY_VACANT;
sas_unregister_devs_sas_addr(dev, phy_id, last);
- return res;
+ goto out_free_resp;
case SMP_RESP_FUNC_ACC:
break;
case -ECOMM:
break;
default:
- return res;
+ goto out_free_resp;
}
+ if (res == 0)
+ sas_get_sas_addr_and_dev_type(disc_resp, sas_addr, &type);
+
if ((SAS_ADDR(sas_addr) == 0) || (res == -ECOMM)) {
phy->phy_state = PHY_EMPTY;
sas_unregister_devs_sas_addr(dev, phy_id, last);
/*
- * Even though the PHY is empty, for convenience we discover
- * the PHY to update the PHY info, like negotiated linkrate.
+ * Even though the PHY is empty, for convenience we update
+ * the PHY info, like negotiated linkrate.
*/
- sas_ex_phy_discover(dev, phy_id);
- return res;
+ if (res == 0)
+ sas_set_ex_phy(dev, phy_id, disc_resp);
+ goto out_free_resp;
} else if (SAS_ADDR(sas_addr) == SAS_ADDR(phy->attached_sas_addr) &&
dev_type_flutter(type, phy->attached_dev_type)) {
struct domain_device *ata_dev = sas_ex_to_ata(dev, phy_id);
action = ", needs recovery";
pr_debug("ex %016llx phy%02d broadcast flutter%s\n",
SAS_ADDR(dev->sas_addr), phy_id, action);
- return res;
+ goto out_free_resp;
}
/* we always have to delete the old device when we went here */
SAS_ADDR(phy->attached_sas_addr));
sas_unregister_devs_sas_addr(dev, phy_id, last);
- return sas_discover_new(dev, phy_id);
+ res = sas_discover_new(dev, phy_id);
+out_free_resp:
+ kfree(disc_resp);
+ return res;
}
/**
struct timer_list fabric_block_timer;
unsigned long bit_flags;
atomic_t num_rsrc_err;
- atomic_t num_cmd_success;
unsigned long last_rsrc_error_time;
unsigned long last_ramp_down_time;
#ifdef CONFIG_SCSI_LPFC_DEBUG_FS
struct timer_list inactive_vmid_poll;
/* RAS Support */
+ spinlock_t ras_fwlog_lock; /* do not take while holding another lock */
struct lpfc_ras_fwlog ras_fwlog;
uint32_t iocb_cnt;
if (phba->cfg_ras_fwlog_func != PCI_FUNC(phba->pcidev->devfn))
return -EINVAL;
- spin_lock_irq(&phba->hbalock);
+ spin_lock_irq(&phba->ras_fwlog_lock);
state = phba->ras_fwlog.state;
- spin_unlock_irq(&phba->hbalock);
+ spin_unlock_irq(&phba->ras_fwlog_lock);
if (state == REG_INPROGRESS) {
lpfc_printf_log(phba, KERN_ERR, LOG_SLI, "6147 RAS Logging "
return -ENOMEM;
}
- dmabuff = (struct lpfc_dmabuf *)mbox->ctx_buf;
+ dmabuff = mbox->ctx_buf;
mbox->ctx_buf = NULL;
mbox->ctx_ndlp = NULL;
status = lpfc_sli_issue_mbox_wait(phba, mbox, LPFC_MBOX_TMO);
}
cmdwqe = &cmdiocbq->wqe;
- memset(cmdwqe, 0, sizeof(union lpfc_wqe));
+ memset(cmdwqe, 0, sizeof(*cmdwqe));
if (phba->sli_rev < LPFC_SLI_REV4) {
rspwqe = &rspiocbq->wqe;
- memset(rspwqe, 0, sizeof(union lpfc_wqe));
+ memset(rspwqe, 0, sizeof(*rspwqe));
}
INIT_LIST_HEAD(&head);
unsigned long flags;
uint8_t *pmb, *pmb_buf;
- dd_data = pmboxq->ctx_ndlp;
+ dd_data = pmboxq->ctx_u.dd_data;
/*
* The outgoing buffer is readily referred from the dma buffer,
struct lpfc_sli_config_mbox *sli_cfg_mbx;
uint8_t *pmbx;
- dd_data = pmboxq->ctx_buf;
+ dd_data = pmboxq->ctx_u.dd_data;
/* Determine if job has been aborted */
spin_lock_irqsave(&phba->ct_ev_lock, flags);
pmboxq->mbox_cmpl = lpfc_bsg_issue_read_mbox_ext_cmpl;
/* context fields to callback function */
- pmboxq->ctx_buf = dd_data;
+ pmboxq->ctx_u.dd_data = dd_data;
dd_data->type = TYPE_MBOX;
dd_data->set_job = job;
dd_data->context_un.mbox.pmboxq = pmboxq;
pmboxq->mbox_cmpl = lpfc_bsg_issue_write_mbox_ext_cmpl;
/* context fields to callback function */
- pmboxq->ctx_buf = dd_data;
+ pmboxq->ctx_u.dd_data = dd_data;
dd_data->type = TYPE_MBOX;
dd_data->set_job = job;
dd_data->context_un.mbox.pmboxq = pmboxq;
pmboxq->mbox_cmpl = lpfc_bsg_issue_write_mbox_ext_cmpl;
/* context fields to callback function */
- pmboxq->ctx_buf = dd_data;
+ pmboxq->ctx_u.dd_data = dd_data;
dd_data->type = TYPE_MBOX;
dd_data->set_job = job;
dd_data->context_un.mbox.pmboxq = pmboxq;
if (mbox_req->inExtWLen || mbox_req->outExtWLen) {
from = pmbx;
ext = from + sizeof(MAILBOX_t);
- pmboxq->ctx_buf = ext;
+ pmboxq->ext_buf = ext;
pmboxq->in_ext_byte_len =
mbox_req->inExtWLen * sizeof(uint32_t);
pmboxq->out_ext_byte_len =
pmboxq->mbox_cmpl = lpfc_bsg_issue_mbox_cmpl;
/* setup context field to pass wait_queue pointer to wake function */
- pmboxq->ctx_ndlp = dd_data;
+ pmboxq->ctx_u.dd_data = dd_data;
dd_data->type = TYPE_MBOX;
dd_data->set_job = job;
dd_data->context_un.mbox.pmboxq = pmboxq;
bsg_reply->reply_data.vendor_reply.vendor_rsp;
/* Current logging state */
- spin_lock_irq(&phba->hbalock);
+ spin_lock_irq(&phba->ras_fwlog_lock);
if (ras_fwlog->state == ACTIVE)
ras_reply->state = LPFC_RASLOG_STATE_RUNNING;
else
ras_reply->state = LPFC_RASLOG_STATE_STOPPED;
- spin_unlock_irq(&phba->hbalock);
+ spin_unlock_irq(&phba->ras_fwlog_lock);
ras_reply->log_level = phba->ras_fwlog.fw_loglevel;
ras_reply->log_buff_sz = phba->cfg_ras_fwlog_buffsize;
if (action == LPFC_RASACTION_STOP_LOGGING) {
/* Check if already disabled */
- spin_lock_irq(&phba->hbalock);
+ spin_lock_irq(&phba->ras_fwlog_lock);
if (ras_fwlog->state != ACTIVE) {
- spin_unlock_irq(&phba->hbalock);
+ spin_unlock_irq(&phba->ras_fwlog_lock);
rc = -ESRCH;
goto ras_job_error;
}
- spin_unlock_irq(&phba->hbalock);
+ spin_unlock_irq(&phba->ras_fwlog_lock);
/* Disable logging */
lpfc_ras_stop_fwlog(phba);
* FW-logging with new log-level. Return status
* "Logging already Running" to caller.
**/
- spin_lock_irq(&phba->hbalock);
+ spin_lock_irq(&phba->ras_fwlog_lock);
if (ras_fwlog->state != INACTIVE)
action_status = -EINPROGRESS;
- spin_unlock_irq(&phba->hbalock);
+ spin_unlock_irq(&phba->ras_fwlog_lock);
/* Enable logging */
rc = lpfc_sli4_ras_fwlog_init(phba, log_level,
goto ras_job_error;
/* Logging to be stopped before reading */
- spin_lock_irq(&phba->hbalock);
+ spin_lock_irq(&phba->ras_fwlog_lock);
if (ras_fwlog->state == ACTIVE) {
- spin_unlock_irq(&phba->hbalock);
+ spin_unlock_irq(&phba->ras_fwlog_lock);
rc = -EINPROGRESS;
goto ras_job_error;
}
- spin_unlock_irq(&phba->hbalock);
+ spin_unlock_irq(&phba->ras_fwlog_lock);
if (job->request_len <
sizeof(struct fc_bsg_request) +
memset(buffer, 0, size);
- spin_lock_irq(&phba->hbalock);
+ spin_lock_irq(&phba->ras_fwlog_lock);
if (phba->ras_fwlog.state != ACTIVE) {
- spin_unlock_irq(&phba->hbalock);
+ spin_unlock_irq(&phba->ras_fwlog_lock);
return -EINVAL;
}
- spin_unlock_irq(&phba->hbalock);
+ spin_unlock_irq(&phba->ras_fwlog_lock);
list_for_each_entry_safe(dmabuf, next,
&phba->ras_fwlog.fwlog_buff_list, list) {
int size;
int rc = -ENOMEM;
- spin_lock_irq(&phba->hbalock);
+ spin_lock_irq(&phba->ras_fwlog_lock);
if (phba->ras_fwlog.state != ACTIVE) {
- spin_unlock_irq(&phba->hbalock);
+ spin_unlock_irq(&phba->ras_fwlog_lock);
rc = -EINVAL;
goto out;
}
- spin_unlock_irq(&phba->hbalock);
+ spin_unlock_irq(&phba->ras_fwlog_lock);
if (check_mul_overflow(LPFC_RAS_MIN_BUFF_POST_SIZE,
phba->cfg_ras_fwlog_buffsize, &size))
unsigned long flags;
struct lpfc_work_evt *evtp = &ndlp->els_retry_evt;
+ /* Hold a node reference for outstanding queued work */
+ if (!lpfc_nlp_get(ndlp))
+ return;
+
spin_lock_irqsave(&phba->hbalock, flags);
if (!list_empty(&evtp->evt_listp)) {
spin_unlock_irqrestore(&phba->hbalock, flags);
+ lpfc_nlp_put(ndlp);
return;
}
- /* We need to hold the node by incrementing the reference
- * count until the queued work is done
- */
- evtp->evt_arg1 = lpfc_nlp_get(ndlp);
- if (evtp->evt_arg1) {
- evtp->evt = LPFC_EVT_ELS_RETRY;
- list_add_tail(&evtp->evt_listp, &phba->work_list);
- lpfc_worker_wake_up(phba);
- }
+ evtp->evt_arg1 = ndlp;
+ evtp->evt = LPFC_EVT_ELS_RETRY;
+ list_add_tail(&evtp->evt_listp, &phba->work_list);
spin_unlock_irqrestore(&phba->hbalock, flags);
- return;
+
+ lpfc_worker_wake_up(phba);
}
/**
goto rdp_fail;
mbox->vport = rdp_context->ndlp->vport;
mbox->mbox_cmpl = lpfc_mbx_cmpl_rdp_page_a0;
- mbox->ctx_ndlp = (struct lpfc_rdp_context *)rdp_context;
+ mbox->ctx_u.rdp = rdp_context;
rc = lpfc_sli_issue_mbox(phba, mbox, MBX_NOWAIT);
if (rc == MBX_NOT_FINISHED) {
lpfc_mbox_rsrc_cleanup(phba, mbox, MBOX_THD_UNLOCKED);
mbox->in_ext_byte_len = DMP_SFF_PAGE_A0_SIZE;
mbox->out_ext_byte_len = DMP_SFF_PAGE_A0_SIZE;
mbox->mbox_offset_word = 5;
- mbox->ctx_buf = virt;
+ mbox->ext_buf = virt;
} else {
bf_set(lpfc_mbx_memory_dump_type3_length,
&mbox->u.mqe.un.mem_dump_type3, DMP_SFF_PAGE_A0_SIZE);
mbox->u.mqe.un.mem_dump_type3.addr_hi = putPaddrHigh(mp->phys);
}
mbox->vport = phba->pport;
- mbox->ctx_ndlp = (struct lpfc_rdp_context *)rdp_context;
rc = lpfc_sli_issue_mbox_wait(phba, mbox, 30);
if (rc == MBX_NOT_FINISHED) {
}
if (phba->sli_rev == LPFC_SLI_REV4)
- mp = (struct lpfc_dmabuf *)(mbox->ctx_buf);
+ mp = mbox->ctx_buf;
else
mp = mpsave;
mbox->in_ext_byte_len = DMP_SFF_PAGE_A2_SIZE;
mbox->out_ext_byte_len = DMP_SFF_PAGE_A2_SIZE;
mbox->mbox_offset_word = 5;
- mbox->ctx_buf = virt;
+ mbox->ext_buf = virt;
} else {
bf_set(lpfc_mbx_memory_dump_type3_length,
&mbox->u.mqe.un.mem_dump_type3, DMP_SFF_PAGE_A2_SIZE);
mbox->u.mqe.un.mem_dump_type3.addr_hi = putPaddrHigh(mp->phys);
}
- mbox->ctx_ndlp = (struct lpfc_rdp_context *)rdp_context;
rc = lpfc_sli_issue_mbox_wait(phba, mbox, 30);
if (bf_get(lpfc_mqe_status, &mbox->u.mqe)) {
rc = 1;
int rc;
mb = &pmb->u.mb;
- lcb_context = (struct lpfc_lcb_context *)pmb->ctx_ndlp;
+ lcb_context = pmb->ctx_u.lcb;
ndlp = lcb_context->ndlp;
- pmb->ctx_ndlp = NULL;
+ memset(&pmb->ctx_u, 0, sizeof(pmb->ctx_u));
pmb->ctx_buf = NULL;
shdr = (union lpfc_sli4_cfg_shdr *)
lpfc_sli4_config(phba, mbox, LPFC_MBOX_SUBSYSTEM_COMMON,
LPFC_MBOX_OPCODE_SET_BEACON_CONFIG, len,
LPFC_SLI4_MBX_EMBED);
- mbox->ctx_ndlp = (void *)lcb_context;
+ mbox->ctx_u.lcb = lcb_context;
mbox->vport = phba->pport;
mbox->mbox_cmpl = lpfc_els_lcb_rsp;
bf_set(lpfc_mbx_set_beacon_port_num, &mbox->u.mqe.un.beacon_config,
mb = &pmb->u.mb;
ndlp = pmb->ctx_ndlp;
- rxid = (uint16_t)((unsigned long)(pmb->ctx_buf) & 0xffff);
- oxid = (uint16_t)(((unsigned long)(pmb->ctx_buf) >> 16) & 0xffff);
- pmb->ctx_buf = NULL;
+ rxid = (uint16_t)(pmb->ctx_u.ox_rx_id & 0xffff);
+ oxid = (uint16_t)((pmb->ctx_u.ox_rx_id >> 16) & 0xffff);
+ memset(&pmb->ctx_u, 0, sizeof(pmb->ctx_u));
pmb->ctx_ndlp = NULL;
if (mb->mbxStatus) {
mbox = mempool_alloc(phba->mbox_mem_pool, GFP_ATOMIC);
if (mbox) {
lpfc_read_lnk_stat(phba, mbox);
- mbox->ctx_buf = (void *)((unsigned long)
- (ox_id << 16 | ctx));
+ mbox->ctx_u.ox_rx_id = ox_id << 16 | ctx;
mbox->ctx_ndlp = lpfc_nlp_get(ndlp);
if (!mbox->ctx_ndlp)
goto node_err;
if (evtp->evt_arg1) {
evtp->evt = LPFC_EVT_DEV_LOSS;
list_add_tail(&evtp->evt_listp, &phba->work_list);
+ spin_unlock_irqrestore(&phba->hbalock, iflags);
lpfc_worker_wake_up(phba);
+ return;
}
spin_unlock_irqrestore(&phba->hbalock, iflags);
} else {
lpfc_disc_state_machine(vport, ndlp, NULL,
NLP_EVT_DEVICE_RM);
}
-
}
-
- return;
}
/**
lpfc_mbx_cmpl_read_sparam(struct lpfc_hba *phba, LPFC_MBOXQ_t *pmb)
{
MAILBOX_t *mb = &pmb->u.mb;
- struct lpfc_dmabuf *mp = (struct lpfc_dmabuf *)pmb->ctx_buf;
+ struct lpfc_dmabuf *mp = pmb->ctx_buf;
struct lpfc_vport *vport = pmb->vport;
struct Scsi_Host *shost = lpfc_shost_from_vport(vport);
struct serv_parm *sp = &vport->fc_sparam;
struct lpfc_mbx_read_top *la;
struct lpfc_sli_ring *pring;
MAILBOX_t *mb = &pmb->u.mb;
- struct lpfc_dmabuf *mp = (struct lpfc_dmabuf *)(pmb->ctx_buf);
+ struct lpfc_dmabuf *mp = pmb->ctx_buf;
uint8_t attn_type;
/* Unblock ELS traffic */
lpfc_mbx_cmpl_reg_login(struct lpfc_hba *phba, LPFC_MBOXQ_t *pmb)
{
struct lpfc_vport *vport = pmb->vport;
- struct lpfc_dmabuf *mp = (struct lpfc_dmabuf *)pmb->ctx_buf;
- struct lpfc_nodelist *ndlp = (struct lpfc_nodelist *)pmb->ctx_ndlp;
+ struct lpfc_dmabuf *mp = pmb->ctx_buf;
+ struct lpfc_nodelist *ndlp = pmb->ctx_ndlp;
/* The driver calls the state machine with the pmb pointer
* but wants to make sure a stale ctx_buf isn't acted on.
* the dump routine is a single-use construct.
*/
if (pmb->ctx_buf) {
- mp = (struct lpfc_dmabuf *)pmb->ctx_buf;
+ mp = pmb->ctx_buf;
lpfc_mbuf_free(phba, mp->virt, mp->phys);
kfree(mp);
pmb->ctx_buf = NULL;
if (phba->sli_rev == LPFC_SLI_REV4) {
byte_count = pmb->u.mqe.un.mb_words[5];
- mp = (struct lpfc_dmabuf *)pmb->ctx_buf;
+ mp = pmb->ctx_buf;
if (byte_count > sizeof(struct static_vport_info) -
offset)
byte_count = sizeof(struct static_vport_info)
{
struct lpfc_vport *vport = pmb->vport;
MAILBOX_t *mb = &pmb->u.mb;
- struct lpfc_nodelist *ndlp = (struct lpfc_nodelist *)pmb->ctx_ndlp;
+ struct lpfc_nodelist *ndlp = pmb->ctx_ndlp;
pmb->ctx_ndlp = NULL;
lpfc_mbx_cmpl_ns_reg_login(struct lpfc_hba *phba, LPFC_MBOXQ_t *pmb)
{
MAILBOX_t *mb = &pmb->u.mb;
- struct lpfc_nodelist *ndlp = (struct lpfc_nodelist *)pmb->ctx_ndlp;
+ struct lpfc_nodelist *ndlp = pmb->ctx_ndlp;
struct lpfc_vport *vport = pmb->vport;
int rc;
{
struct lpfc_vport *vport = pmb->vport;
MAILBOX_t *mb = &pmb->u.mb;
- struct lpfc_nodelist *ndlp = (struct lpfc_nodelist *)pmb->ctx_ndlp;
+ struct lpfc_nodelist *ndlp = pmb->ctx_ndlp;
pmb->ctx_ndlp = NULL;
if (mb->mbxStatus) {
struct lpfc_vport *vport = pmb->vport;
struct lpfc_nodelist *ndlp;
- ndlp = (struct lpfc_nodelist *)(pmb->ctx_ndlp);
+ ndlp = pmb->ctx_ndlp;
if (!ndlp)
return;
lpfc_issue_els_logo(vport, ndlp, 0);
if ((mb = phba->sli.mbox_active)) {
if ((mb->u.mb.mbxCommand == MBX_REG_LOGIN64) &&
!(mb->mbox_flag & LPFC_MBX_IMED_UNREG) &&
- (ndlp == (struct lpfc_nodelist *)mb->ctx_ndlp)) {
+ (ndlp == mb->ctx_ndlp)) {
mb->ctx_ndlp = NULL;
mb->mbox_cmpl = lpfc_sli_def_mbox_cmpl;
}
list_for_each_entry(mb, &phba->sli.mboxq_cmpl, list) {
if ((mb->u.mb.mbxCommand != MBX_REG_LOGIN64) ||
(mb->mbox_flag & LPFC_MBX_IMED_UNREG) ||
- (ndlp != (struct lpfc_nodelist *)mb->ctx_ndlp))
+ (ndlp != mb->ctx_ndlp))
continue;
mb->ctx_ndlp = NULL;
list_for_each_entry_safe(mb, nextmb, &phba->sli.mboxq, list) {
if ((mb->u.mb.mbxCommand == MBX_REG_LOGIN64) &&
!(mb->mbox_flag & LPFC_MBX_IMED_UNREG) &&
- (ndlp == (struct lpfc_nodelist *)mb->ctx_ndlp)) {
+ (ndlp == mb->ctx_ndlp)) {
list_del(&mb->list);
lpfc_mbox_rsrc_cleanup(phba, mb, MBOX_THD_LOCKED);
lpfc_mbx_cmpl_fdmi_reg_login(struct lpfc_hba *phba, LPFC_MBOXQ_t *pmb)
{
MAILBOX_t *mb = &pmb->u.mb;
- struct lpfc_nodelist *ndlp = (struct lpfc_nodelist *)pmb->ctx_ndlp;
+ struct lpfc_nodelist *ndlp = pmb->ctx_ndlp;
struct lpfc_vport *vport = pmb->vport;
pmb->ctx_ndlp = NULL;
return -EIO;
}
- mp = (struct lpfc_dmabuf *)pmb->ctx_buf;
+ mp = pmb->ctx_buf;
/* This dmabuf was allocated by lpfc_read_sparam. The dmabuf is no
* longer needed. Prevent unintended ctx_buf access as the mbox is
/* Cleanup any outstanding ELS commands */
lpfc_els_flush_all_cmd(phba);
psli->slistat.link_event++;
- lpfc_read_topology(phba, pmb, (struct lpfc_dmabuf *)pmb->ctx_buf);
+ lpfc_read_topology(phba, pmb, pmb->ctx_buf);
pmb->mbox_cmpl = lpfc_mbx_cmpl_read_topology;
pmb->vport = vport;
/* Block ELS IOCBs until we have processed this mbox command */
phba->sli.slistat.link_event++;
/* Create lpfc_handle_latt mailbox command from link ACQE */
- lpfc_read_topology(phba, pmb, (struct lpfc_dmabuf *)pmb->ctx_buf);
+ lpfc_read_topology(phba, pmb, pmb->ctx_buf);
pmb->mbox_cmpl = lpfc_mbx_cmpl_read_topology;
pmb->vport = phba->pport;
phba->sli.slistat.link_event++;
/* Create lpfc_handle_latt mailbox command from link ACQE */
- lpfc_read_topology(phba, pmb, (struct lpfc_dmabuf *)pmb->ctx_buf);
+ lpfc_read_topology(phba, pmb, pmb->ctx_buf);
pmb->mbox_cmpl = lpfc_mbx_cmpl_read_topology;
pmb->vport = phba->pport;
"NVME" : " "),
(phba->nvmet_support ? "NVMET" : " "));
+ /* ras_fwlog state */
+ spin_lock_init(&phba->ras_fwlog_lock);
+
/* Initialize the IO buffer list used by driver for SLI3 SCSI */
spin_lock_init(&phba->scsi_buf_list_get_lock);
INIT_LIST_HEAD(&phba->lpfc_scsi_buf_list_get);
rc = request_threaded_irq(eqhdl->irq,
&lpfc_sli4_hba_intr_handler,
&lpfc_sli4_hba_intr_handler_th,
- IRQF_ONESHOT, name, eqhdl);
+ 0, name, eqhdl);
if (rc) {
lpfc_printf_log(phba, KERN_WARNING, LOG_INIT,
"0486 MSI-X fast-path (%d) "
{
struct lpfc_dmabuf *mp;
- mp = (struct lpfc_dmabuf *)mbox->ctx_buf;
+ mp = mbox->ctx_buf;
mbox->ctx_buf = NULL;
/* Release the generic BPL buffer memory. */
uint16_t region_id)
{
MAILBOX_t *mb;
- void *ctx;
mb = &pmb->u.mb;
- ctx = pmb->ctx_buf;
/* Setup to dump VPD region */
memset(pmb, 0, sizeof (LPFC_MBOXQ_t));
mb->un.varDmp.word_cnt = (DMP_RSP_SIZE / sizeof (uint32_t));
mb->un.varDmp.co = 0;
mb->un.varDmp.resp_offset = 0;
- pmb->ctx_buf = ctx;
mb->mbxOwner = OWN_HOST;
return;
}
lpfc_dump_wakeup_param(struct lpfc_hba *phba, LPFC_MBOXQ_t *pmb)
{
MAILBOX_t *mb;
- void *ctx;
mb = &pmb->u.mb;
- /* Save context so that we can restore after memset */
- ctx = pmb->ctx_buf;
/* Setup to dump VPD region */
memset(pmb, 0, sizeof(LPFC_MBOXQ_t));
mb->un.varDmp.word_cnt = WAKE_UP_PARMS_WORD_SIZE;
mb->un.varDmp.co = 0;
mb->un.varDmp.resp_offset = 0;
- pmb->ctx_buf = ctx;
return;
}
/* Save address for later completion and set the owner to host so that
* the FW knows this mailbox is available for processing.
*/
- pmb->ctx_buf = (uint8_t *)mp;
+ pmb->ctx_buf = mp;
mb->mbxOwner = OWN_HOST;
return (0);
}
}
/* Reinitialize the context pointers to avoid stale usage. */
mbox->ctx_buf = NULL;
- mbox->context3 = NULL;
+ memset(&mbox->ctx_u, 0, sizeof(mbox->ctx_u));
kfree(mbox->sge_array);
/* Finally, free the mailbox command itself */
mempool_free(mbox, phba->mbox_mem_pool);
{
MAILBOX_t *mb;
int rc = FAILURE;
- struct lpfc_rdp_context *rdp_context =
- (struct lpfc_rdp_context *)(mboxq->ctx_ndlp);
+ struct lpfc_rdp_context *rdp_context = mboxq->ctx_u.rdp;
mb = &mboxq->u.mb;
if (mb->mbxStatus)
static void
lpfc_mbx_cmpl_rdp_page_a2(struct lpfc_hba *phba, LPFC_MBOXQ_t *mbox)
{
- struct lpfc_dmabuf *mp = (struct lpfc_dmabuf *)mbox->ctx_buf;
- struct lpfc_rdp_context *rdp_context =
- (struct lpfc_rdp_context *)(mbox->ctx_ndlp);
+ struct lpfc_dmabuf *mp = mbox->ctx_buf;
+ struct lpfc_rdp_context *rdp_context = mbox->ctx_u.rdp;
if (bf_get(lpfc_mqe_status, &mbox->u.mqe))
goto error_mbox_free;
/* Save the dma buffer for cleanup in the final completion. */
mbox->ctx_buf = mp;
mbox->mbox_cmpl = lpfc_mbx_cmpl_rdp_link_stat;
- mbox->ctx_ndlp = (struct lpfc_rdp_context *)rdp_context;
+ mbox->ctx_u.rdp = rdp_context;
if (lpfc_sli_issue_mbox(phba, mbox, MBX_NOWAIT) == MBX_NOT_FINISHED)
goto error_mbox_free;
lpfc_mbx_cmpl_rdp_page_a0(struct lpfc_hba *phba, LPFC_MBOXQ_t *mbox)
{
int rc;
- struct lpfc_dmabuf *mp = (struct lpfc_dmabuf *)(mbox->ctx_buf);
- struct lpfc_rdp_context *rdp_context =
- (struct lpfc_rdp_context *)(mbox->ctx_ndlp);
+ struct lpfc_dmabuf *mp = mbox->ctx_buf;
+ struct lpfc_rdp_context *rdp_context = mbox->ctx_u.rdp;
if (bf_get(lpfc_mqe_status, &mbox->u.mqe))
goto error;
mbox->u.mqe.un.mem_dump_type3.addr_hi = putPaddrHigh(mp->phys);
mbox->mbox_cmpl = lpfc_mbx_cmpl_rdp_page_a2;
- mbox->ctx_ndlp = (struct lpfc_rdp_context *)rdp_context;
+ mbox->ctx_u.rdp = rdp_context;
rc = lpfc_sli_issue_mbox(phba, mbox, MBX_NOWAIT);
if (rc == MBX_NOT_FINISHED)
goto error;
int rc;
ndlp = login_mbox->ctx_ndlp;
- save_iocb = login_mbox->context3;
+ save_iocb = login_mbox->ctx_u.save_iocb;
if (mb->mbxStatus == MBX_SUCCESS) {
/* Now that REG_RPI completed successfully,
if (!login_mbox->ctx_ndlp)
goto out;
- login_mbox->context3 = save_iocb; /* For PLOGI ACC */
+ login_mbox->ctx_u.save_iocb = save_iocb; /* For PLOGI ACC */
spin_lock_irq(&ndlp->lock);
ndlp->nlp_flag |= (NLP_ACC_REGLOGIN | NLP_RCV_PLOGI);
struct lpfc_nodelist *ndlp;
uint32_t cmd;
- elsiocb = (struct lpfc_iocbq *)mboxq->ctx_buf;
- ndlp = (struct lpfc_nodelist *)mboxq->ctx_ndlp;
+ elsiocb = mboxq->ctx_u.save_iocb;
+ ndlp = mboxq->ctx_ndlp;
vport = mboxq->vport;
cmd = elsiocb->drvrTimeout;
/* cleanup any ndlp on mbox q waiting for reglogin cmpl */
if ((mb = phba->sli.mbox_active)) {
if ((mb->u.mb.mbxCommand == MBX_REG_LOGIN64) &&
- (ndlp == (struct lpfc_nodelist *)mb->ctx_ndlp)) {
+ (ndlp == mb->ctx_ndlp)) {
ndlp->nlp_flag &= ~NLP_REG_LOGIN_SEND;
lpfc_nlp_put(ndlp);
mb->ctx_ndlp = NULL;
spin_lock_irq(&phba->hbalock);
list_for_each_entry_safe(mb, nextmb, &phba->sli.mboxq, list) {
if ((mb->u.mb.mbxCommand == MBX_REG_LOGIN64) &&
- (ndlp == (struct lpfc_nodelist *)mb->ctx_ndlp)) {
+ (ndlp == mb->ctx_ndlp)) {
ndlp->nlp_flag &= ~NLP_REG_LOGIN_SEND;
lpfc_nlp_put(ndlp);
list_del(&mb->list);
/* No concern about the role change on the nvme remoteport.
* The transport will update it.
*/
- spin_lock_irq(&vport->phba->hbalock);
+ spin_lock_irq(&ndlp->lock);
ndlp->fc4_xpt_flags |= NVME_XPT_UNREG_WAIT;
- spin_unlock_irq(&vport->phba->hbalock);
+ spin_unlock_irq(&ndlp->lock);
/* Don't let the host nvme transport keep sending keep-alives
* on this remoteport. Vport is unloading, no recovery. The
wqe = &nvmewqe->wqe;
/* Initialize WQE */
- memset(wqe, 0, sizeof(union lpfc_wqe));
+ memset(wqe, 0, sizeof(*wqe));
ctx_buf->iocbq->cmd_dmabuf = NULL;
spin_lock(&phba->sli4_hba.sgl_list_lock);
struct Scsi_Host *shost;
struct scsi_device *sdev;
unsigned long new_queue_depth;
- unsigned long num_rsrc_err, num_cmd_success;
+ unsigned long num_rsrc_err;
int i;
num_rsrc_err = atomic_read(&phba->num_rsrc_err);
- num_cmd_success = atomic_read(&phba->num_cmd_success);
/*
* The error and success command counters are global per
for (i = 0; i <= phba->max_vports && vports[i] != NULL; i++) {
shost = lpfc_shost_from_vport(vports[i]);
shost_for_each_device(sdev, shost) {
- new_queue_depth =
- sdev->queue_depth * num_rsrc_err /
- (num_rsrc_err + num_cmd_success);
- if (!new_queue_depth)
- new_queue_depth = sdev->queue_depth - 1;
+ if (num_rsrc_err >= sdev->queue_depth)
+ new_queue_depth = 1;
else
new_queue_depth = sdev->queue_depth -
- new_queue_depth;
+ num_rsrc_err;
scsi_change_queue_depth(sdev, new_queue_depth);
}
}
lpfc_destroy_vport_work_array(phba, vports);
atomic_set(&phba->num_rsrc_err, 0);
- atomic_set(&phba->num_cmd_success, 0);
}
/**
}
err = lpfc_bg_scsi_prep_dma_buf(phba, lpfc_cmd);
} else {
- if (vport->phba->cfg_enable_bg) {
- lpfc_printf_vlog(vport,
- KERN_INFO, LOG_SCSI_CMD,
- "9038 BLKGRD: rcvd PROT_NORMAL cmd: "
- "x%x reftag x%x cnt %u pt %x\n",
- cmnd->cmnd[0],
- scsi_prot_ref_tag(cmnd),
- scsi_logical_block_count(cmnd),
- (cmnd->cmnd[1]>>5));
- }
err = lpfc_scsi_prep_dma_buf(phba, lpfc_cmd);
}
empty = list_empty(&phba->active_rrq_list);
list_add_tail(&rrq->list, &phba->active_rrq_list);
phba->hba_flag |= HBA_RRQ_ACTIVE;
+ spin_unlock_irqrestore(&phba->hbalock, iflags);
if (empty)
lpfc_worker_wake_up(phba);
- spin_unlock_irqrestore(&phba->hbalock, iflags);
return 0;
out:
spin_unlock_irqrestore(&phba->hbalock, iflags);
*/
pmboxq->mbox_flag |= LPFC_MBX_WAKE;
spin_lock_irqsave(&phba->hbalock, drvr_flag);
- pmbox_done = (struct completion *)pmboxq->context3;
+ pmbox_done = pmboxq->ctx_u.mbox_wait;
if (pmbox_done)
complete(pmbox_done);
spin_unlock_irqrestore(&phba->hbalock, drvr_flag);
if (!test_bit(FC_UNLOADING, &phba->pport->load_flag) &&
pmb->u.mb.mbxCommand == MBX_REG_LOGIN64 &&
!pmb->u.mb.mbxStatus) {
- mp = (struct lpfc_dmabuf *)pmb->ctx_buf;
+ mp = pmb->ctx_buf;
if (mp) {
pmb->ctx_buf = NULL;
lpfc_mbuf_free(phba, mp->virt, mp->phys);
}
if (pmb->u.mb.mbxCommand == MBX_REG_LOGIN64) {
- ndlp = (struct lpfc_nodelist *)pmb->ctx_ndlp;
+ ndlp = pmb->ctx_ndlp;
lpfc_nlp_put(ndlp);
}
if (pmb->u.mb.mbxCommand == MBX_UNREG_LOGIN) {
- ndlp = (struct lpfc_nodelist *)pmb->ctx_ndlp;
+ ndlp = pmb->ctx_ndlp;
/* Check to see if there are any deferred events to process */
if (ndlp) {
/* This nlp_put pairs with lpfc_sli4_resume_rpi */
if (pmb->u.mb.mbxCommand == MBX_RESUME_RPI) {
- ndlp = (struct lpfc_nodelist *)pmb->ctx_ndlp;
+ ndlp = pmb->ctx_ndlp;
lpfc_nlp_put(ndlp);
}
goto out_free_mboxq;
}
- mp = (struct lpfc_dmabuf *)mboxq->ctx_buf;
+ mp = mboxq->ctx_buf;
rc = lpfc_sli_issue_mbox(phba, mboxq, MBX_POLL);
lpfc_printf_log(phba, KERN_INFO, LOG_MBOX | LOG_SLI,
{
struct lpfc_ras_fwlog *ras_fwlog = &phba->ras_fwlog;
- spin_lock_irq(&phba->hbalock);
+ spin_lock_irq(&phba->ras_fwlog_lock);
ras_fwlog->state = INACTIVE;
- spin_unlock_irq(&phba->hbalock);
+ spin_unlock_irq(&phba->ras_fwlog_lock);
/* Disable FW logging to host memory */
writel(LPFC_CTL_PDEV_CTL_DDL_RAS,
ras_fwlog->lwpd.virt = NULL;
}
- spin_lock_irq(&phba->hbalock);
+ spin_lock_irq(&phba->ras_fwlog_lock);
ras_fwlog->state = INACTIVE;
- spin_unlock_irq(&phba->hbalock);
+ spin_unlock_irq(&phba->ras_fwlog_lock);
}
/**
goto disable_ras;
}
- spin_lock_irq(&phba->hbalock);
+ spin_lock_irq(&phba->ras_fwlog_lock);
ras_fwlog->state = ACTIVE;
- spin_unlock_irq(&phba->hbalock);
+ spin_unlock_irq(&phba->ras_fwlog_lock);
mempool_free(pmb, phba->mbox_mem_pool);
return;
uint32_t len = 0, fwlog_buffsize, fwlog_entry_count;
int rc = 0;
- spin_lock_irq(&phba->hbalock);
+ spin_lock_irq(&phba->ras_fwlog_lock);
ras_fwlog->state = INACTIVE;
- spin_unlock_irq(&phba->hbalock);
+ spin_unlock_irq(&phba->ras_fwlog_lock);
fwlog_buffsize = (LPFC_RAS_MIN_BUFF_POST_SIZE *
phba->cfg_ras_fwlog_buffsize);
mbx_fwlog->u.request.lwpd.addr_lo = putPaddrLow(ras_fwlog->lwpd.phys);
mbx_fwlog->u.request.lwpd.addr_hi = putPaddrHigh(ras_fwlog->lwpd.phys);
- spin_lock_irq(&phba->hbalock);
+ spin_lock_irq(&phba->ras_fwlog_lock);
ras_fwlog->state = REG_INPROGRESS;
- spin_unlock_irq(&phba->hbalock);
+ spin_unlock_irq(&phba->ras_fwlog_lock);
mbox->vport = phba->pport;
mbox->mbox_cmpl = lpfc_sli4_ras_mbox_cmpl;
mboxq->vport = vport;
rc = lpfc_sli_issue_mbox(phba, mboxq, MBX_POLL);
- mp = (struct lpfc_dmabuf *)mboxq->ctx_buf;
+ mp = mboxq->ctx_buf;
if (rc == MBX_SUCCESS) {
memcpy(&vport->fc_sparam, mp->virt, sizeof(struct serv_parm));
rc = 0;
}
/* Copy the mailbox extension data */
- if (pmbox->in_ext_byte_len && pmbox->ctx_buf) {
- lpfc_sli_pcimem_bcopy(pmbox->ctx_buf,
+ if (pmbox->in_ext_byte_len && pmbox->ext_buf) {
+ lpfc_sli_pcimem_bcopy(pmbox->ext_buf,
(uint8_t *)phba->mbox_ext,
pmbox->in_ext_byte_len);
}
= MAILBOX_HBA_EXT_OFFSET;
/* Copy the mailbox extension data */
- if (pmbox->in_ext_byte_len && pmbox->ctx_buf)
+ if (pmbox->in_ext_byte_len && pmbox->ext_buf)
lpfc_memcpy_to_slim(phba->MBslimaddr +
MAILBOX_HBA_EXT_OFFSET,
- pmbox->ctx_buf, pmbox->in_ext_byte_len);
+ pmbox->ext_buf, pmbox->in_ext_byte_len);
if (mbx->mbxCommand == MBX_CONFIG_PORT)
/* copy command data into host mbox for cmpl */
lpfc_sli_pcimem_bcopy(phba->mbox, mbx,
MAILBOX_CMD_SIZE);
/* Copy the mailbox extension data */
- if (pmbox->out_ext_byte_len && pmbox->ctx_buf) {
+ if (pmbox->out_ext_byte_len && pmbox->ext_buf) {
lpfc_sli_pcimem_bcopy(phba->mbox_ext,
- pmbox->ctx_buf,
+ pmbox->ext_buf,
pmbox->out_ext_byte_len);
}
} else {
lpfc_memcpy_from_slim(mbx, phba->MBslimaddr,
MAILBOX_CMD_SIZE);
/* Copy the mailbox extension data */
- if (pmbox->out_ext_byte_len && pmbox->ctx_buf) {
+ if (pmbox->out_ext_byte_len && pmbox->ext_buf) {
lpfc_memcpy_from_slim(
- pmbox->ctx_buf,
+ pmbox->ext_buf,
phba->MBslimaddr +
MAILBOX_HBA_EXT_OFFSET,
pmbox->out_ext_byte_len);
unsigned long iflags;
struct lpfc_work_evt *evtp = &ndlp->recovery_evt;
+ /* Hold a node reference for outstanding queued work */
+ if (!lpfc_nlp_get(ndlp))
+ return;
+
spin_lock_irqsave(&phba->hbalock, iflags);
if (!list_empty(&evtp->evt_listp)) {
spin_unlock_irqrestore(&phba->hbalock, iflags);
+ lpfc_nlp_put(ndlp);
return;
}
- /* Incrementing the reference count until the queued work is done. */
- evtp->evt_arg1 = lpfc_nlp_get(ndlp);
- if (!evtp->evt_arg1) {
- spin_unlock_irqrestore(&phba->hbalock, iflags);
- return;
- }
+ evtp->evt_arg1 = ndlp;
evtp->evt = LPFC_EVT_RECOVER_PORT;
list_add_tail(&evtp->evt_listp, &phba->work_list);
spin_unlock_irqrestore(&phba->hbalock, iflags);
/* setup wake call as IOCB callback */
pmboxq->mbox_cmpl = lpfc_sli_wake_mbox_wait;
- /* setup context3 field to pass wait_queue pointer to wake function */
+ /* setup ctx_u field to pass wait_queue pointer to wake function */
init_completion(&mbox_done);
- pmboxq->context3 = &mbox_done;
+ pmboxq->ctx_u.mbox_wait = &mbox_done;
/* now issue the command */
retval = lpfc_sli_issue_mbox(phba, pmboxq, MBX_NOWAIT);
if (retval == MBX_BUSY || retval == MBX_SUCCESS) {
msecs_to_jiffies(timeout * 1000));
spin_lock_irqsave(&phba->hbalock, flag);
- pmboxq->context3 = NULL;
+ pmboxq->ctx_u.mbox_wait = NULL;
/*
* if LPFC_MBX_WAKE flag is set the mailbox is completed
* else do not free the resources.
lpfc_sli_pcimem_bcopy(mbox, pmbox,
MAILBOX_CMD_SIZE);
if (pmb->out_ext_byte_len &&
- pmb->ctx_buf)
+ pmb->ext_buf)
lpfc_sli_pcimem_bcopy(
phba->mbox_ext,
- pmb->ctx_buf,
+ pmb->ext_buf,
pmb->out_ext_byte_len);
}
if (pmb->mbox_flag & LPFC_MBX_IMED_UNREG) {
pmbox->un.varWords[0], 0);
if (!pmbox->mbxStatus) {
- mp = (struct lpfc_dmabuf *)
- (pmb->ctx_buf);
- ndlp = (struct lpfc_nodelist *)
- pmb->ctx_ndlp;
+ mp = pmb->ctx_buf;
+ ndlp = pmb->ctx_ndlp;
/* Reg_LOGIN of dflt RPI was
* successful. new lets get
mcqe_status,
pmbox->un.varWords[0], 0);
if (mcqe_status == MB_CQE_STATUS_SUCCESS) {
- mp = (struct lpfc_dmabuf *)(pmb->ctx_buf);
- ndlp = (struct lpfc_nodelist *)pmb->ctx_ndlp;
+ mp = pmb->ctx_buf;
+ ndlp = pmb->ctx_ndlp;
/* Reg_LOGIN of dflt RPI was successful. Mark the
* node as having an UNREG_LOGIN in progress to stop
* lpfc_sli4_resume_rpi - Remove the rpi bitmask region
* @ndlp: pointer to lpfc nodelist data structure.
* @cmpl: completion call-back.
- * @arg: data to load as MBox 'caller buffer information'
+ * @iocbq: data to load as mbox ctx_u information
*
* This routine is invoked to remove the memory region that
* provided rpi via a bitmask.
**/
int
lpfc_sli4_resume_rpi(struct lpfc_nodelist *ndlp,
- void (*cmpl)(struct lpfc_hba *, LPFC_MBOXQ_t *), void *arg)
+ void (*cmpl)(struct lpfc_hba *, LPFC_MBOXQ_t *),
+ struct lpfc_iocbq *iocbq)
{
LPFC_MBOXQ_t *mboxq;
struct lpfc_hba *phba = ndlp->phba;
lpfc_resume_rpi(mboxq, ndlp);
if (cmpl) {
mboxq->mbox_cmpl = cmpl;
- mboxq->ctx_buf = arg;
+ mboxq->ctx_u.save_iocb = iocbq;
} else
mboxq->mbox_cmpl = lpfc_sli_def_mbox_cmpl;
mboxq->ctx_ndlp = ndlp;
if (lpfc_sli4_dump_cfg_rg23(phba, mboxq))
goto out;
mqe = &mboxq->u.mqe;
- mp = (struct lpfc_dmabuf *)mboxq->ctx_buf;
+ mp = mboxq->ctx_buf;
rc = lpfc_sli_issue_mbox(phba, mboxq, MBX_POLL);
if (rc)
goto out;
(mb->u.mb.mbxCommand == MBX_REG_VPI))
mb->mbox_cmpl = lpfc_sli_def_mbox_cmpl;
if (mb->u.mb.mbxCommand == MBX_REG_LOGIN64) {
- act_mbx_ndlp = (struct lpfc_nodelist *)mb->ctx_ndlp;
+ act_mbx_ndlp = mb->ctx_ndlp;
/* This reference is local to this routine. The
* reference is removed at routine exit.
mb->mbox_cmpl = lpfc_sli_def_mbox_cmpl;
if (mb->u.mb.mbxCommand == MBX_REG_LOGIN64) {
- ndlp = (struct lpfc_nodelist *)mb->ctx_ndlp;
+ ndlp = mb->ctx_ndlp;
/* Unregister the RPI when mailbox complete */
mb->mbox_flag |= LPFC_MBX_IMED_UNREG;
restart_loop = 1;
while (!list_empty(&mbox_cmd_list)) {
list_remove_head(&mbox_cmd_list, mb, LPFC_MBOXQ_t, list);
if (mb->u.mb.mbxCommand == MBX_REG_LOGIN64) {
- ndlp = (struct lpfc_nodelist *)mb->ctx_ndlp;
+ ndlp = mb->ctx_ndlp;
mb->ctx_ndlp = NULL;
if (ndlp) {
spin_lock(&ndlp->lock);
/*******************************************************************
* This file is part of the Emulex Linux Device Driver for *
* Fibre Channel Host Bus Adapters. *
- * Copyright (C) 2017-2023 Broadcom. All Rights Reserved. The term *
+ * Copyright (C) 2017-2024 Broadcom. All Rights Reserved. The term *
* “Broadcom” refers to Broadcom Inc. and/or its subsidiaries. *
* Copyright (C) 2004-2016 Emulex. All rights reserved. *
* EMULEX and SLI are trademarks of Emulex. *
struct lpfc_mqe mqe;
} u;
struct lpfc_vport *vport; /* virtual port pointer */
- void *ctx_ndlp; /* an lpfc_nodelist pointer */
- void *ctx_buf; /* an lpfc_dmabuf pointer */
- void *context3; /* a generic pointer. Code must
- * accommodate the actual datatype.
- */
+ struct lpfc_nodelist *ctx_ndlp; /* caller ndlp pointer */
+ struct lpfc_dmabuf *ctx_buf; /* caller buffer information */
+ void *ext_buf; /* extended buffer for extended mbox
+ * cmds. Not a generic pointer.
+ * Use for storing virtual address.
+ */
+
+ /* Pointers that are seldom used during mbox execution, but require
+ * a saved context.
+ */
+ union {
+ unsigned long ox_rx_id; /* Used in els_rsp_rls_acc */
+ struct lpfc_rdp_context *rdp; /* Used in get_rdp_info */
+ struct lpfc_lcb_context *lcb; /* Used in set_beacon */
+ struct completion *mbox_wait; /* Used in issue_mbox_wait */
+ struct bsg_job_data *dd_data; /* Used in bsg_issue_mbox_cmpl
+ * and
+ * bsg_issue_mbox_ext_handle_job
+ */
+ struct lpfc_iocbq *save_iocb; /* Used in defer_plogi_acc and
+ * lpfc_mbx_cmpl_resume_rpi
+ */
+ } ctx_u;
void (*mbox_cmpl) (struct lpfc_hba *, struct lpfcMboxq *);
uint8_t mbox_flag;
/*******************************************************************
* This file is part of the Emulex Linux Device Driver for *
* Fibre Channel Host Bus Adapters. *
- * Copyright (C) 2017-2023 Broadcom. All Rights Reserved. The term *
+ * Copyright (C) 2017-2024 Broadcom. All Rights Reserved. The term *
* “Broadcom” refers to Broadcom Inc. and/or its subsidiaries. *
* Copyright (C) 2009-2016 Emulex. All rights reserved. *
* EMULEX and SLI are trademarks of Emulex. *
void lpfc_sli4_remove_rpis(struct lpfc_hba *);
void lpfc_sli4_async_event_proc(struct lpfc_hba *);
void lpfc_sli4_fcf_redisc_event_proc(struct lpfc_hba *);
-int lpfc_sli4_resume_rpi(struct lpfc_nodelist *,
- void (*)(struct lpfc_hba *, LPFC_MBOXQ_t *), void *);
+int lpfc_sli4_resume_rpi(struct lpfc_nodelist *ndlp,
+ void (*cmpl)(struct lpfc_hba *, LPFC_MBOXQ_t *),
+ struct lpfc_iocbq *iocbq);
void lpfc_sli4_els_xri_abort_event_proc(struct lpfc_hba *phba);
void lpfc_sli4_nvme_pci_offline_aborted(struct lpfc_hba *phba,
struct lpfc_io_buf *lpfc_ncmd);
* included with this package. *
*******************************************************************/
-#define LPFC_DRIVER_VERSION "14.4.0.0"
+#define LPFC_DRIVER_VERSION "14.4.0.1"
#define LPFC_DRIVER_NAME "lpfc"
/* Used for SLI 2/3 */
}
}
- mp = (struct lpfc_dmabuf *)pmb->ctx_buf;
+ mp = pmb->ctx_buf;
memcpy(&vport->fc_sparam, mp->virt, sizeof (struct serv_parm));
memcpy(&vport->fc_nodename, &vport->fc_sparam.nodeName,
sizeof (struct lpfc_name));
lpfc_free_sysfs_attr(vport);
lpfc_debugfs_terminate(vport);
- /* Remove FC host to break driver binding. */
- fc_remove_host(shost);
- scsi_remove_host(shost);
-
/* Send the DA_ID and Fabric LOGO to cleanup Nameserver entries. */
ndlp = lpfc_findnode_did(vport, Fabric_DID);
if (!ndlp)
skip_logo:
+ /* Remove FC host to break driver binding. */
+ fc_remove_host(shost);
+ scsi_remove_host(shost);
+
lpfc_cleanup(vport);
/* Remove scsi host now. The nodes are cleaned up. */
if ((mpirep_offset != 0xFF) &&
drv_bufs[mpirep_offset].bsg_buf_len) {
drv_buf_iter = &drv_bufs[mpirep_offset];
- drv_buf_iter->kern_buf_len = (sizeof(*bsg_reply_buf) - 1 +
+ drv_buf_iter->kern_buf_len = (sizeof(*bsg_reply_buf) +
mrioc->reply_sz);
bsg_reply_buf = kzalloc(drv_buf_iter->kern_buf_len, GFP_KERNEL);
name = myrb_devstate_name(ldev_info->state);
if (name)
- ret = snprintf(buf, 32, "%s\n", name);
+ ret = snprintf(buf, 64, "%s\n", name);
else
- ret = snprintf(buf, 32, "Invalid (%02X)\n",
+ ret = snprintf(buf, 64, "Invalid (%02X)\n",
ldev_info->state);
} else {
struct myrb_pdev_state *pdev_info = sdev->hostdata;
else
name = myrb_devstate_name(pdev_info->state);
if (name)
- ret = snprintf(buf, 32, "%s\n", name);
+ ret = snprintf(buf, 64, "%s\n", name);
else
- ret = snprintf(buf, 32, "Invalid (%02X)\n",
+ ret = snprintf(buf, 64, "Invalid (%02X)\n",
pdev_info->state);
}
return ret;
name = myrb_raidlevel_name(ldev_info->raid_level);
if (!name)
- return snprintf(buf, 32, "Invalid (%02X)\n",
+ return snprintf(buf, 64, "Invalid (%02X)\n",
ldev_info->state);
- return snprintf(buf, 32, "%s\n", name);
+ return snprintf(buf, 64, "%s\n", name);
}
- return snprintf(buf, 32, "Physical Drive\n");
+ return snprintf(buf, 64, "Physical Drive\n");
}
static DEVICE_ATTR_RO(raid_level);
unsigned char status;
if (sdev->channel < myrb_logical_channel(sdev->host))
- return snprintf(buf, 32, "physical device - not rebuilding\n");
+ return snprintf(buf, 64, "physical device - not rebuilding\n");
status = myrb_get_rbld_progress(cb, &rbld_buf);
if (rbld_buf.ldev_num != sdev->id ||
status != MYRB_STATUS_SUCCESS)
- return snprintf(buf, 32, "not rebuilding\n");
+ return snprintf(buf, 64, "not rebuilding\n");
- return snprintf(buf, 32, "rebuilding block %u of %u\n",
+ return snprintf(buf, 64, "rebuilding block %u of %u\n",
rbld_buf.ldev_size - rbld_buf.blocks_left,
rbld_buf.ldev_size);
}
name = myrs_devstate_name(ldev_info->dev_state);
if (name)
- ret = snprintf(buf, 32, "%s\n", name);
+ ret = snprintf(buf, 64, "%s\n", name);
else
- ret = snprintf(buf, 32, "Invalid (%02X)\n",
+ ret = snprintf(buf, 64, "Invalid (%02X)\n",
ldev_info->dev_state);
} else {
struct myrs_pdev_info *pdev_info;
pdev_info = sdev->hostdata;
name = myrs_devstate_name(pdev_info->dev_state);
if (name)
- ret = snprintf(buf, 32, "%s\n", name);
+ ret = snprintf(buf, 64, "%s\n", name);
else
- ret = snprintf(buf, 32, "Invalid (%02X)\n",
+ ret = snprintf(buf, 64, "Invalid (%02X)\n",
pdev_info->dev_state);
}
return ret;
ldev_info = sdev->hostdata;
name = myrs_raid_level_name(ldev_info->raid_level);
if (!name)
- return snprintf(buf, 32, "Invalid (%02X)\n",
+ return snprintf(buf, 64, "Invalid (%02X)\n",
ldev_info->dev_state);
} else
name = myrs_raid_level_name(MYRS_RAID_PHYSICAL);
- return snprintf(buf, 32, "%s\n", name);
+ return snprintf(buf, 64, "%s\n", name);
}
static DEVICE_ATTR_RO(raid_level);
unsigned char status;
if (sdev->channel < cs->ctlr_info->physchan_present)
- return snprintf(buf, 32, "physical device - not rebuilding\n");
+ return snprintf(buf, 64, "physical device - not rebuilding\n");
ldev_info = sdev->hostdata;
ldev_num = ldev_info->ldev_num;
return -EIO;
}
if (ldev_info->rbld_active) {
- return snprintf(buf, 32, "rebuilding block %zu of %zu\n",
+ return snprintf(buf, 64, "rebuilding block %zu of %zu\n",
(size_t)ldev_info->rbld_lba,
(size_t)ldev_info->cfg_devsize);
} else
- return snprintf(buf, 32, "not rebuilding\n");
+ return snprintf(buf, 64, "not rebuilding\n");
}
static ssize_t rebuild_store(struct device *dev,
unsigned short ldev_num;
if (sdev->channel < cs->ctlr_info->physchan_present)
- return snprintf(buf, 32, "physical device - not checking\n");
+ return snprintf(buf, 64, "physical device - not checking\n");
ldev_info = sdev->hostdata;
if (!ldev_info)
ldev_num = ldev_info->ldev_num;
myrs_get_ldev_info(cs, ldev_num, ldev_info);
if (ldev_info->cc_active)
- return snprintf(buf, 32, "checking block %zu of %zu\n",
+ return snprintf(buf, 64, "checking block %zu of %zu\n",
(size_t)ldev_info->cc_lba,
(size_t)ldev_info->cfg_devsize);
else
- return snprintf(buf, 32, "not checking\n");
+ return snprintf(buf, 64, "not checking\n");
}
static ssize_t consistency_check_store(struct device *dev,
* pmcraid_minor - minor number(s) to use
*/
static unsigned int pmcraid_major;
-static struct class *pmcraid_class;
+static const struct class pmcraid_class = {
+ .name = PMCRAID_DEVFILE,
+};
static DECLARE_BITMAP(pmcraid_minor, PMCRAID_MAX_ADAPTERS);
/*
if (error)
pmcraid_release_minor(minor);
else
- device_create(pmcraid_class, NULL, MKDEV(pmcraid_major, minor),
+ device_create(&pmcraid_class, NULL, MKDEV(pmcraid_major, minor),
NULL, "%s%u", PMCRAID_DEVFILE, minor);
return error;
}
static void pmcraid_release_chrdev(struct pmcraid_instance *pinstance)
{
pmcraid_release_minor(MINOR(pinstance->cdev.dev));
- device_destroy(pmcraid_class,
+ device_destroy(&pmcraid_class,
MKDEV(pmcraid_major, MINOR(pinstance->cdev.dev)));
cdev_del(&pinstance->cdev);
}
}
pmcraid_major = MAJOR(dev);
- pmcraid_class = class_create(PMCRAID_DEVFILE);
- if (IS_ERR(pmcraid_class)) {
- error = PTR_ERR(pmcraid_class);
+ error = class_register(&pmcraid_class);
+
+ if (error) {
pmcraid_err("failed to register with sysfs, error = %x\n",
error);
goto out_unreg_chrdev;
error = pmcraid_netlink_init();
if (error) {
- class_destroy(pmcraid_class);
+ class_unregister(&pmcraid_class);
goto out_unreg_chrdev;
}
pmcraid_err("failed to register pmcraid driver, error = %x\n",
error);
- class_destroy(pmcraid_class);
+ class_unregister(&pmcraid_class);
pmcraid_netlink_release();
out_unreg_chrdev:
unregister_chrdev_region(MKDEV(pmcraid_major, 0),
PMCRAID_MAX_ADAPTERS);
pci_unregister_driver(&pmcraid_driver);
- class_destroy(pmcraid_class);
+ class_unregister(&pmcraid_class);
}
module_init(pmcraid_init);
return;
if (unlikely(pci_channel_offline(fcport->vha->hw->pdev))) {
- qla2x00_abort_all_cmds(fcport->vha, DID_NO_CONNECT << 16);
+ /* Will wait for wind down of adapter */
+ ql_dbg(ql_dbg_aer, fcport->vha, 0x900c,
+ "%s pci offline detected (id %06x)\n", __func__,
+ fcport->d_id.b24);
+ qla_pci_set_eeh_busy(fcport->vha);
+ qla2x00_eh_wait_for_pending_commands(fcport->vha, fcport->d_id.b24,
+ 0, WAIT_TARGET);
return;
}
}
vha = fcport->vha;
if (unlikely(pci_channel_offline(fcport->vha->hw->pdev))) {
- qla2x00_abort_all_cmds(fcport->vha, DID_NO_CONNECT << 16);
+ /* Will wait for wind down of adapter */
+ ql_dbg(ql_dbg_aer, fcport->vha, 0x900b,
+ "%s pci offline detected (id %06x)\n", __func__,
+ fcport->d_id.b24);
+ qla_pci_set_eeh_busy(vha);
qla2x00_eh_wait_for_pending_commands(fcport->vha, fcport->d_id.b24,
0, WAIT_TARGET);
return;
#include "qla_nvme.h"
#define QLA2XXX_DRIVER_NAME "qla2xxx"
#define QLA2XXX_APIDEV "ql2xapidev"
-#define QLA2XXX_MANUFACTURER "Marvell Semiconductor, Inc."
+#define QLA2XXX_MANUFACTURER "Marvell"
/*
* We have MAILBOX_REGISTER_COUNT sized arrays in a few places,
list_for_each_entry_safe(fcport, tf, &vha->vp_fcports, list) {
if (fcport->edif.enable) {
- if (pcnt > app_req.num_ports)
+ if (pcnt >= app_req.num_ports)
break;
app_reply->elem[pcnt].rekey_count =
extern int qla2x00_local_device_login(scsi_qla_host_t *, fc_port_t *);
extern int qla24xx_els_dcmd_iocb(scsi_qla_host_t *, int, port_id_t);
-extern int qla24xx_els_dcmd2_iocb(scsi_qla_host_t *, int, fc_port_t *, bool);
+extern int qla24xx_els_dcmd2_iocb(scsi_qla_host_t *, int, fc_port_t *);
extern void qla2x00_els_dcmd2_free(scsi_qla_host_t *vha,
struct els_plogi *els_plogi);
return rval;
done_free_sp:
- /* ref: INIT */
- kref_put(&sp->cmd_kref, qla2x00_sp_release);
+ /*
+ * use qla24xx_async_gnl_sp_done to purge all pending gnl request.
+ * kref_put is call behind the scene.
+ */
+ sp->u.iocb_cmd.u.mbx.in_mb[0] = MBS_COMMAND_ERROR;
+ qla24xx_async_gnl_sp_done(sp, QLA_COMMAND_ERROR);
fcport->flags &= ~(FCF_ASYNC_SENT);
done:
fcport->flags &= ~(FCF_ASYNC_ACTIVE);
return rval;
}
+static void qla_enable_fce_trace(scsi_qla_host_t *vha)
+{
+ int rval;
+ struct qla_hw_data *ha = vha->hw;
+
+ if (ha->fce) {
+ ha->flags.fce_enabled = 1;
+ memset(ha->fce, 0, fce_calc_size(ha->fce_bufs));
+ rval = qla2x00_enable_fce_trace(vha,
+ ha->fce_dma, ha->fce_bufs, ha->fce_mb, &ha->fce_bufs);
+
+ if (rval) {
+ ql_log(ql_log_warn, vha, 0x8033,
+ "Unable to reinitialize FCE (%d).\n", rval);
+ ha->flags.fce_enabled = 0;
+ }
+ }
+}
+
+static void qla_enable_eft_trace(scsi_qla_host_t *vha)
+{
+ int rval;
+ struct qla_hw_data *ha = vha->hw;
+
+ if (ha->eft) {
+ memset(ha->eft, 0, EFT_SIZE);
+ rval = qla2x00_enable_eft_trace(vha, ha->eft_dma, EFT_NUM_BUFFERS);
+
+ if (rval) {
+ ql_log(ql_log_warn, vha, 0x8034,
+ "Unable to reinitialize EFT (%d).\n", rval);
+ }
+ }
+}
/*
* qla2x00_initialize_adapter
* Initialize board.
}
static void
-qla2x00_init_fce_trace(scsi_qla_host_t *vha)
+qla2x00_alloc_fce_trace(scsi_qla_host_t *vha)
{
- int rval;
dma_addr_t tc_dma;
void *tc;
struct qla_hw_data *ha = vha->hw;
return;
}
- rval = qla2x00_enable_fce_trace(vha, tc_dma, FCE_NUM_BUFFERS,
- ha->fce_mb, &ha->fce_bufs);
- if (rval) {
- ql_log(ql_log_warn, vha, 0x00bf,
- "Unable to initialize FCE (%d).\n", rval);
- dma_free_coherent(&ha->pdev->dev, FCE_SIZE, tc, tc_dma);
- return;
- }
-
ql_dbg(ql_dbg_init, vha, 0x00c0,
"Allocated (%d KB) for FCE...\n", FCE_SIZE / 1024);
- ha->flags.fce_enabled = 1;
ha->fce_dma = tc_dma;
ha->fce = tc;
+ ha->fce_bufs = FCE_NUM_BUFFERS;
}
static void
-qla2x00_init_eft_trace(scsi_qla_host_t *vha)
+qla2x00_alloc_eft_trace(scsi_qla_host_t *vha)
{
- int rval;
dma_addr_t tc_dma;
void *tc;
struct qla_hw_data *ha = vha->hw;
return;
}
- rval = qla2x00_enable_eft_trace(vha, tc_dma, EFT_NUM_BUFFERS);
- if (rval) {
- ql_log(ql_log_warn, vha, 0x00c2,
- "Unable to initialize EFT (%d).\n", rval);
- dma_free_coherent(&ha->pdev->dev, EFT_SIZE, tc, tc_dma);
- return;
- }
-
ql_dbg(ql_dbg_init, vha, 0x00c3,
"Allocated (%d KB) EFT ...\n", EFT_SIZE / 1024);
ha->eft = tc;
}
-static void
-qla2x00_alloc_offload_mem(scsi_qla_host_t *vha)
-{
- qla2x00_init_fce_trace(vha);
- qla2x00_init_eft_trace(vha);
-}
-
void
qla2x00_alloc_fw_dump(scsi_qla_host_t *vha)
{
if (ha->tgt.atio_ring)
mq_size += ha->tgt.atio_q_length * sizeof(request_t);
- qla2x00_init_fce_trace(vha);
+ qla2x00_alloc_fce_trace(vha);
if (ha->fce)
fce_size = sizeof(struct qla2xxx_fce_chain) + FCE_SIZE;
- qla2x00_init_eft_trace(vha);
+ qla2x00_alloc_eft_trace(vha);
if (ha->eft)
eft_size = EFT_SIZE;
}
struct qla_hw_data *ha = vha->hw;
struct device_reg_2xxx __iomem *reg = &ha->iobase->isp;
unsigned long flags;
- uint16_t fw_major_version;
int done_once = 0;
if (IS_P3P_TYPE(ha)) {
goto failed;
enable_82xx_npiv:
- fw_major_version = ha->fw_major_version;
if (IS_P3P_TYPE(ha))
qla82xx_check_md_needed(vha);
else
if (rval != QLA_SUCCESS)
goto failed;
- if (!fw_major_version && !(IS_P3P_TYPE(ha)))
- qla2x00_alloc_offload_mem(vha);
-
if (ql2xallocfwdump && !(IS_P3P_TYPE(ha)))
qla2x00_alloc_fw_dump(vha);
+ qla_enable_fce_trace(vha);
+ qla_enable_eft_trace(vha);
} else {
goto failed;
}
int
qla2x00_abort_isp(scsi_qla_host_t *vha)
{
- int rval;
uint8_t status = 0;
struct qla_hw_data *ha = vha->hw;
struct scsi_qla_host *vp, *tvp;
struct req_que *req = ha->req_q_map[0];
unsigned long flags;
+ fc_port_t *fcport;
if (vha->flags.online) {
qla2x00_abort_isp_cleanup(vha);
"ISP Abort - ISP reg disconnect post nvmram config, exiting.\n");
return status;
}
+
+ /* User may have updated [fcp|nvme] prefer in flash */
+ list_for_each_entry(fcport, &vha->vp_fcports, list) {
+ if (NVME_PRIORITY(ha, fcport))
+ fcport->do_prli_nvme = 1;
+ else
+ fcport->do_prli_nvme = 0;
+ }
+
if (!qla2x00_restart_isp(vha)) {
clear_bit(RESET_MARKER_NEEDED, &vha->dpc_flags);
if (IS_QLA81XX(ha) || IS_QLA8031(ha))
qla2x00_get_fw_version(vha);
- if (ha->fce) {
- ha->flags.fce_enabled = 1;
- memset(ha->fce, 0,
- fce_calc_size(ha->fce_bufs));
- rval = qla2x00_enable_fce_trace(vha,
- ha->fce_dma, ha->fce_bufs, ha->fce_mb,
- &ha->fce_bufs);
- if (rval) {
- ql_log(ql_log_warn, vha, 0x8033,
- "Unable to reinitialize FCE "
- "(%d).\n", rval);
- ha->flags.fce_enabled = 0;
- }
- }
- if (ha->eft) {
- memset(ha->eft, 0, EFT_SIZE);
- rval = qla2x00_enable_eft_trace(vha,
- ha->eft_dma, EFT_NUM_BUFFERS);
- if (rval) {
- ql_log(ql_log_warn, vha, 0x8034,
- "Unable to reinitialize EFT "
- "(%d).\n", rval);
- }
- }
} else { /* failed the ISP abort */
vha->flags.online = 1;
if (test_bit(ISP_ABORT_RETRY, &vha->dpc_flags)) {
atomic_inc(&vp->vref_count);
spin_unlock_irqrestore(&ha->vport_slock, flags);
+ /* User may have updated [fcp|nvme] prefer in flash */
+ list_for_each_entry(fcport, &vp->vp_fcports, list) {
+ if (NVME_PRIORITY(ha, fcport))
+ fcport->do_prli_nvme = 1;
+ else
+ fcport->do_prli_nvme = 0;
+ }
+
qla2x00_vp_abort_isp(vp);
spin_lock_irqsave(&ha->vport_slock, flags);
qla2x00_sp_release(struct kref *kref)
{
struct srb *sp = container_of(kref, struct srb, cmd_kref);
+ struct scsi_qla_host *vha = sp->vha;
+
+ switch (sp->type) {
+ case SRB_CT_PTHRU_CMD:
+ /* GPSC & GFPNID use fcport->ct_desc.ct_sns for both req & rsp */
+ if (sp->u.iocb_cmd.u.ctarg.req &&
+ (!sp->fcport ||
+ sp->u.iocb_cmd.u.ctarg.req != sp->fcport->ct_desc.ct_sns)) {
+ dma_free_coherent(&vha->hw->pdev->dev,
+ sp->u.iocb_cmd.u.ctarg.req_allocated_size,
+ sp->u.iocb_cmd.u.ctarg.req,
+ sp->u.iocb_cmd.u.ctarg.req_dma);
+ sp->u.iocb_cmd.u.ctarg.req = NULL;
+ }
+ if (sp->u.iocb_cmd.u.ctarg.rsp &&
+ (!sp->fcport ||
+ sp->u.iocb_cmd.u.ctarg.rsp != sp->fcport->ct_desc.ct_sns)) {
+ dma_free_coherent(&vha->hw->pdev->dev,
+ sp->u.iocb_cmd.u.ctarg.rsp_allocated_size,
+ sp->u.iocb_cmd.u.ctarg.rsp,
+ sp->u.iocb_cmd.u.ctarg.rsp_dma);
+ sp->u.iocb_cmd.u.ctarg.rsp = NULL;
+ }
+ break;
+ default:
+ break;
+ }
sp->free(sp);
}
{
struct srb_iocb *elsio = &sp->u.iocb_cmd;
- kfree(sp->fcport);
+ if (sp->fcport)
+ qla2x00_free_fcport(sp->fcport);
if (elsio->u.els_logo.els_logo_pyld)
dma_free_coherent(&sp->vha->hw->pdev->dev, DMA_POOL_SIZE,
*/
sp = qla2x00_get_sp(vha, fcport, GFP_KERNEL);
if (!sp) {
- kfree(fcport);
+ qla2x00_free_fcport(fcport);
ql_log(ql_log_info, vha, 0x70e6,
"SRB allocation failed\n");
return -ENOMEM;
if (!elsio->u.els_logo.els_logo_pyld) {
/* ref: INIT */
kref_put(&sp->cmd_kref, qla2x00_sp_release);
+ qla2x00_free_fcport(fcport);
return QLA_FUNCTION_FAILED;
}
if (rval != QLA_SUCCESS) {
/* ref: INIT */
kref_put(&sp->cmd_kref, qla2x00_sp_release);
+ qla2x00_free_fcport(fcport);
return QLA_FUNCTION_FAILED;
}
int
qla24xx_els_dcmd2_iocb(scsi_qla_host_t *vha, int els_opcode,
- fc_port_t *fcport, bool wait)
+ fc_port_t *fcport)
{
srb_t *sp;
struct srb_iocb *elsio = NULL;
if (!sp) {
ql_log(ql_log_info, vha, 0x70e6,
"SRB allocation failed\n");
- fcport->flags &= ~FCF_ASYNC_ACTIVE;
- return -ENOMEM;
+ goto done;
}
fcport->flags |= FCF_ASYNC_SENT;
ql_dbg(ql_dbg_io, vha, 0x3073,
"%s Enter: PLOGI portid=%06x\n", __func__, fcport->d_id.b24);
- if (wait)
- sp->flags = SRB_WAKEUP_ON_COMP;
-
sp->type = SRB_ELS_DCMD;
sp->name = "ELS_DCMD";
sp->fcport = fcport;
if (!elsio->u.els_plogi.els_plogi_pyld) {
rval = QLA_FUNCTION_FAILED;
- goto out;
+ goto done_free_sp;
}
resp_ptr = elsio->u.els_plogi.els_resp_pyld =
if (!elsio->u.els_plogi.els_resp_pyld) {
rval = QLA_FUNCTION_FAILED;
- goto out;
+ goto done_free_sp;
}
ql_dbg(ql_dbg_io, vha, 0x3073, "PLOGI %p %p\n", ptr, resp_ptr);
if (els_opcode == ELS_DCMD_PLOGI && DBELL_ACTIVE(vha)) {
struct fc_els_flogi *p = ptr;
-
p->fl_csp.sp_features |= cpu_to_be16(FC_SP_FT_SEC);
}
(uint8_t *)elsio->u.els_plogi.els_plogi_pyld,
sizeof(*elsio->u.els_plogi.els_plogi_pyld));
- init_completion(&elsio->u.els_plogi.comp);
rval = qla2x00_start_sp(sp);
if (rval != QLA_SUCCESS) {
- rval = QLA_FUNCTION_FAILED;
+ fcport->flags |= FCF_LOGIN_NEEDED;
+ set_bit(RELOGIN_NEEDED, &vha->dpc_flags);
+ goto done_free_sp;
} else {
ql_dbg(ql_dbg_disc, vha, 0x3074,
"%s PLOGI sent, hdl=%x, loopid=%x, to port_id %06x from port_id %06x\n",
fcport->d_id.b24, vha->d_id.b24);
}
- if (wait) {
- wait_for_completion(&elsio->u.els_plogi.comp);
-
- if (elsio->u.els_plogi.comp_status != CS_COMPLETE)
- rval = QLA_FUNCTION_FAILED;
- } else {
- goto done;
- }
+ return rval;
-out:
- fcport->flags &= ~(FCF_ASYNC_SENT | FCF_ASYNC_ACTIVE);
+done_free_sp:
qla2x00_els_dcmd2_free(vha, &elsio->u.els_plogi);
/* ref: INIT */
kref_put(&sp->cmd_kref, qla2x00_sp_release);
done:
+ fcport->flags &= ~(FCF_ASYNC_SENT | FCF_ASYNC_ACTIVE);
+ qla2x00_set_fcport_disc_state(fcport, DSC_DELETED);
return rval;
}
return -EAGAIN;
}
- pkt = __qla2x00_alloc_iocbs(sp->qpair, sp);
+ pkt = qla2x00_alloc_iocbs_ready(sp->qpair, sp);
if (!pkt) {
rval = -EAGAIN;
ql_log(ql_log_warn, vha, 0x700c,
if (ha->flags.purge_mbox || chip_reset != ha->chip_reset ||
ha->flags.eeh_busy) {
ql_log(ql_log_warn, vha, 0xd035,
- "Error detected: purge[%d] eeh[%d] cmd=0x%x, Exiting.\n",
+ "Purge mbox: purge[%d] eeh[%d] cmd=0x%x, Exiting.\n",
ha->flags.purge_mbox, ha->flags.eeh_busy, mcp->mb[0]);
rval = QLA_ABORTED;
goto premature_exit;
ha->init_cb_dma = 0;
fail_free_vp_map:
kfree(ha->vp_map);
+ ha->vp_map = NULL;
fail:
ql_log(ql_log_fatal, NULL, 0x0030,
"Memory allocation failure.\n");
break;
case QLA_EVT_ELS_PLOGI:
qla24xx_els_dcmd2_iocb(vha, ELS_DCMD_PLOGI,
- e->u.fcport.fcport, false);
+ e->u.fcport.fcport);
break;
case QLA_EVT_SA_REPLACE:
rc = qla24xx_issue_sa_replace_iocb(vha, e);
"%s: sess %p logout completed\n", __func__, sess);
}
+ /* check for any straggling io left behind */
+ if (!(sess->flags & FCF_FCP2_DEVICE) &&
+ qla2x00_eh_wait_for_pending_commands(sess->vha, sess->d_id.b24, 0, WAIT_TARGET)) {
+ ql_log(ql_log_warn, vha, 0x3027,
+ "IO not return. Resetting.\n");
+ set_bit(ISP_ABORT_NEEDED, &vha->dpc_flags);
+ qla2xxx_wake_dpc(vha);
+ qla2x00_wait_for_chip_reset(vha);
+ }
+
if (sess->logo_ack_needed) {
sess->logo_ack_needed = 0;
qla24xx_async_notify_ack(vha, sess,
/*
* Driver version
*/
-#define QLA2XXX_VERSION "10.02.09.100-k"
+#define QLA2XXX_VERSION "10.02.09.200-k"
#define QLA_DRIVER_MAJOR_VER 10
#define QLA_DRIVER_MINOR_VER 2
#define QLA_DRIVER_PATCH_VER 9
-#define QLA_DRIVER_BETA_VER 100
+#define QLA_DRIVER_BETA_VER 200
if (blk_queue_add_random(q))
add_disk_randomness(req->q->disk);
- if (!blk_rq_is_passthrough(req)) {
- WARN_ON_ONCE(!(cmd->flags & SCMD_INITIALIZED));
- cmd->flags &= ~SCMD_INITIALIZED;
- }
+ WARN_ON_ONCE(!blk_rq_is_passthrough(req) &&
+ !(cmd->flags & SCMD_INITIALIZED));
+ cmd->flags = 0;
/*
* Calling rcu_barrier() is not necessary here because the
}
EXPORT_SYMBOL(scsi_add_device);
+int scsi_resume_device(struct scsi_device *sdev)
+{
+ struct device *dev = &sdev->sdev_gendev;
+ int ret = 0;
+
+ device_lock(dev);
+
+ /*
+ * Bail out if the device or its queue are not running. Otherwise,
+ * the rescan may block waiting for commands to be executed, with us
+ * holding the device lock. This can result in a potential deadlock
+ * in the power management core code when system resume is on-going.
+ */
+ if (sdev->sdev_state != SDEV_RUNNING ||
+ blk_queue_pm_only(sdev->request_queue)) {
+ ret = -EWOULDBLOCK;
+ goto unlock;
+ }
+
+ if (dev->driver && try_module_get(dev->driver->owner)) {
+ struct scsi_driver *drv = to_scsi_driver(dev->driver);
+
+ if (drv->resume)
+ ret = drv->resume(dev);
+ module_put(dev->driver->owner);
+ }
+
+unlock:
+ device_unlock(dev);
+
+ return ret;
+}
+EXPORT_SYMBOL(scsi_resume_device);
+
int scsi_rescan_device(struct scsi_device *sdev)
{
struct device *dev = &sdev->sdev_gendev;
error = device_add_disk(dev, gd, NULL);
if (error) {
- put_device(&sdkp->disk_dev);
+ device_unregister(&sdkp->disk_dev);
put_disk(gd);
goto out;
}
return sd_suspend_common(dev, true);
}
-static int sd_resume(struct device *dev, bool runtime)
+static int sd_resume(struct device *dev)
+{
+ struct scsi_disk *sdkp = dev_get_drvdata(dev);
+
+ sd_printk(KERN_NOTICE, sdkp, "Starting disk\n");
+
+ if (opal_unlock_from_suspend(sdkp->opal_dev)) {
+ sd_printk(KERN_NOTICE, sdkp, "OPAL unlock failed\n");
+ return -EIO;
+ }
+
+ return 0;
+}
+
+static int sd_resume_common(struct device *dev, bool runtime)
{
struct scsi_disk *sdkp = dev_get_drvdata(dev);
int ret;
sd_printk(KERN_NOTICE, sdkp, "Starting disk\n");
ret = sd_start_stop_device(sdkp, 1);
if (!ret) {
- opal_unlock_from_suspend(sdkp->opal_dev);
+ sd_resume(dev);
sdkp->suspended = false;
}
return 0;
}
- return sd_resume(dev, false);
+ return sd_resume_common(dev, false);
}
static int sd_resume_runtime(struct device *dev)
"Failed to clear sense data\n");
}
- return sd_resume(dev, true);
+ return sd_resume_common(dev, true);
}
static const struct dev_pm_ops sd_pm_ops = {
.pm = &sd_pm_ops,
},
.rescan = sd_rescan,
+ .resume = sd_resume,
.init_command = sd_init_command,
.uninit_command = sd_uninit_command,
.done = sd_done,
int dev = iminor(inode);
int flags = filp->f_flags;
struct request_queue *q;
+ struct scsi_device *device;
Sg_device *sdp;
Sg_fd *sfp;
int retval;
/* This driver's module count bumped by fops_get in <linux/fs.h> */
/* Prevent the device driver from vanishing while we sleep */
- retval = scsi_device_get(sdp->device);
+ device = sdp->device;
+ retval = scsi_device_get(device);
if (retval)
goto sg_put;
- retval = scsi_autopm_get_device(sdp->device);
+ retval = scsi_autopm_get_device(device);
if (retval)
goto sdp_put;
* check if O_NONBLOCK. Permits SCSI commands to be issued
* during error recovery. Tread carefully. */
if (!((flags & O_NONBLOCK) ||
- scsi_block_when_processing_errors(sdp->device))) {
+ scsi_block_when_processing_errors(device))) {
retval = -ENXIO;
/* we are in error recovery for this device */
goto error_out;
if (sdp->open_cnt < 1) { /* no existing opens */
sdp->sgdebug = 0;
- q = sdp->device->request_queue;
+ q = device->request_queue;
sdp->sg_tablesize = queue_max_segments(q);
}
sfp = sg_add_sfp(sdp);
error_mutex_locked:
mutex_unlock(&sdp->open_rel_lock);
error_out:
- scsi_autopm_put_device(sdp->device);
+ scsi_autopm_put_device(device);
sdp_put:
- scsi_device_put(sdp->device);
- goto sg_put;
+ kref_put(&sdp->d_ref, sg_device_destroy);
+ scsi_device_put(device);
+ return retval;
}
/* Release resources associated with a successful sg_open()
.llseek = no_llseek,
};
-static struct class *sg_sysfs_class;
+static const struct class sg_sysfs_class = {
+ .name = "scsi_generic"
+};
static int sg_sysfs_valid = 0;
if (sg_sysfs_valid) {
struct device *sg_class_member;
- sg_class_member = device_create(sg_sysfs_class, cl_dev->parent,
+ sg_class_member = device_create(&sg_sysfs_class, cl_dev->parent,
MKDEV(SCSI_GENERIC_MAJOR,
sdp->index),
sdp, "%s", sdp->name);
read_unlock_irqrestore(&sdp->sfd_lock, iflags);
sysfs_remove_link(&scsidp->sdev_gendev.kobj, "generic");
- device_destroy(sg_sysfs_class, MKDEV(SCSI_GENERIC_MAJOR, sdp->index));
+ device_destroy(&sg_sysfs_class, MKDEV(SCSI_GENERIC_MAJOR, sdp->index));
cdev_del(sdp->cdev);
sdp->cdev = NULL;
SG_MAX_DEVS, "sg");
if (rc)
return rc;
- sg_sysfs_class = class_create("scsi_generic");
- if ( IS_ERR(sg_sysfs_class) ) {
- rc = PTR_ERR(sg_sysfs_class);
+ rc = class_register(&sg_sysfs_class);
+ if (rc)
goto err_out;
- }
sg_sysfs_valid = 1;
rc = scsi_register_interface(&sg_interface);
if (0 == rc) {
#endif /* CONFIG_SCSI_PROC_FS */
return 0;
}
- class_destroy(sg_sysfs_class);
+ class_unregister(&sg_sysfs_class);
register_sg_sysctls();
err_out:
unregister_chrdev_region(MKDEV(SCSI_GENERIC_MAJOR, 0), SG_MAX_DEVS);
remove_proc_subtree("scsi/sg", NULL);
#endif /* CONFIG_SCSI_PROC_FS */
scsi_unregister_interface(&sg_interface);
- class_destroy(sg_sysfs_class);
+ class_unregister(&sg_sysfs_class);
sg_sysfs_valid = 0;
unregister_chrdev_region(MKDEV(SCSI_GENERIC_MAJOR, 0),
SG_MAX_DEVS);
{
struct sg_fd *sfp = container_of(work, struct sg_fd, ew.work);
struct sg_device *sdp = sfp->parentdp;
+ struct scsi_device *device = sdp->device;
Sg_request *srp;
unsigned long iflags;
"sg_remove_sfp: sfp=0x%p\n", sfp));
kfree(sfp);
- scsi_device_put(sdp->device);
kref_put(&sdp->d_ref, sg_device_destroy);
+ scsi_device_put(device);
module_put(THIS_MODULE);
}
static int try_wdio = 1;
static int debug_flag;
-static struct class st_sysfs_class;
+static const struct class st_sysfs_class;
static const struct attribute_group *st_dev_groups[];
static const struct attribute_group *st_drv_groups[];
return;
}
-static struct class st_sysfs_class = {
+static const struct class st_sysfs_class = {
.name = "scsi_tape",
.dev_groups = st_dev_groups,
};
tristate "MediaTek SoC Information"
default y
depends on NVMEM_MTK_EFUSE
+ select SOC_BUS
help
The MediaTek SoC Information (mtk-socinfo) driver provides
information about the SoC to the userspace including the
const struct svs_bank_pdata *bdata;
struct svs_bank *svsb;
struct dev_pm_opp *opp;
+ char tz_name_buf[20];
unsigned long freq;
int count, ret;
u32 idx, i;
}
if (!IS_ERR_OR_NULL(bdata->tzone_name)) {
- svsb->tzd = thermal_zone_get_zone_by_name(bdata->tzone_name);
+ snprintf(tz_name_buf, ARRAY_SIZE(tz_name_buf),
+ "%s-thermal", bdata->tzone_name);
+ svsb->tzd = thermal_zone_get_zone_by_name(tz_name_buf);
if (IS_ERR(svsb->tzd)) {
dev_err(svsb->dev, "cannot get \"%s\" thermal zone\n",
- bdata->tzone_name);
+ tz_name_buf);
return PTR_ERR(svsb->tzd);
}
}
fsl_lpspi->base = devm_platform_get_and_ioremap_resource(pdev, 0, &res);
if (IS_ERR(fsl_lpspi->base)) {
ret = PTR_ERR(fsl_lpspi->base);
- goto out_controller_put;
+ return ret;
}
fsl_lpspi->base_phys = res->start;
irq = platform_get_irq(pdev, 0);
if (irq < 0) {
ret = irq;
- goto out_controller_put;
+ return ret;
}
ret = devm_request_irq(&pdev->dev, irq, fsl_lpspi_isr, 0,
dev_name(&pdev->dev), fsl_lpspi);
if (ret) {
dev_err(&pdev->dev, "can't get irq%d: %d\n", irq, ret);
- goto out_controller_put;
+ return ret;
}
fsl_lpspi->clk_per = devm_clk_get(&pdev->dev, "per");
if (IS_ERR(fsl_lpspi->clk_per)) {
ret = PTR_ERR(fsl_lpspi->clk_per);
- goto out_controller_put;
+ return ret;
}
fsl_lpspi->clk_ipg = devm_clk_get(&pdev->dev, "ipg");
if (IS_ERR(fsl_lpspi->clk_ipg)) {
ret = PTR_ERR(fsl_lpspi->clk_ipg);
- goto out_controller_put;
+ return ret;
}
/* enable the clock */
ret = fsl_lpspi_init_rpm(fsl_lpspi);
if (ret)
- goto out_controller_put;
+ return ret;
ret = pm_runtime_get_sync(fsl_lpspi->dev);
if (ret < 0) {
pm_runtime_dont_use_autosuspend(fsl_lpspi->dev);
pm_runtime_put_sync(fsl_lpspi->dev);
pm_runtime_disable(fsl_lpspi->dev);
-out_controller_put:
- spi_controller_put(controller);
return ret;
}
spi_bus->spi_int[iter] = devm_kzalloc(&pdev->dev,
sizeof(struct pci1xxxx_spi_internal),
GFP_KERNEL);
+ if (!spi_bus->spi_int[iter])
+ return -ENOMEM;
spi_sub_ptr = spi_bus->spi_int[iter];
spi_sub_ptr->spi_host = devm_spi_alloc_host(dev, sizeof(struct spi_controller));
if (!spi_sub_ptr->spi_host)
struct s3c64xx_spi_driver_data *sdd = spi_controller_get_devdata(host);
if (sdd->rx_dma.ch && sdd->tx_dma.ch)
- return xfer->len > sdd->fifo_depth;
+ return xfer->len >= sdd->fifo_depth;
return false;
}
return status;
}
- if (!is_polling(sdd) && (xfer->len > fifo_len) &&
+ if (!is_polling(sdd) && xfer->len >= fifo_len &&
sdd->rx_dma.ch && sdd->tx_dma.ch) {
use_dma = 1;
-
} else if (xfer->len >= fifo_len) {
tx_buf = xfer->tx_buf;
rx_buf = xfer->rx_buf;
/* build component create message */
m.h.type = MMAL_MSG_TYPE_COMPONENT_CREATE;
m.u.component_create.client_component = component->client_component;
- strncpy(m.u.component_create.name, name,
- sizeof(m.u.component_create.name));
+ strscpy_pad(m.u.component_create.name, name,
+ sizeof(m.u.component_create.name));
+ m.u.component_create.pid = 0;
ret = send_synchronous_mmal_msg(instance, &m,
sizeof(m.u.component_create),
struct iscsi_pdu *pdu)
{
int i, send_recovery_r2t = 0, recovery = 0;
- u32 length = 0, offset = 0, pdu_count = 0, xfer_len = 0;
+ u32 length = 0, offset = 0, pdu_count = 0;
struct iscsit_conn *conn = cmd->conn;
struct iscsi_pdu *first_pdu = NULL;
if (cmd->pdu_list[i].seq_no == pdu->seq_no) {
if (!first_pdu)
first_pdu = &cmd->pdu_list[i];
- xfer_len += cmd->pdu_list[i].length;
pdu_count++;
} else if (pdu_count)
break;
{
struct configfs_subsystem *subsys = &target_core_fabrics;
struct t10_alua_lu_gp *lu_gp;
+ struct cred *kern_cred;
+ const struct cred *old_cred;
int ret;
pr_debug("TARGET_CORE[0]: Loading Generic Kernel Storage"
if (ret < 0)
goto out;
+ /* We use the kernel credentials to access the target directory */
+ kern_cred = prepare_kernel_cred(&init_task);
+ if (!kern_cred) {
+ ret = -ENOMEM;
+ goto out;
+ }
+ old_cred = override_creds(kern_cred);
target_init_dbroot();
+ revert_creds(old_cred);
+ put_cred(kern_cred);
return 0;
out:
+ target_xcopy_release_pt();
configfs_unregister_subsystem(subsys);
core_dev_release_virtual_lun0();
rd_module_exit();
res = dfc->power_ops->get_real_power(df, power, freq, voltage);
if (!res) {
- state = dfc->capped_state;
+ state = dfc->max_state - dfc->capped_state;
/* Convert EM power into milli-Watts first */
rcu_read_lock();
/* There might be no cooling devices yet. */
if (!num_actors) {
- ret = -EINVAL;
+ ret = 0;
goto clean_state;
}
return -ENOMEM;
get_governor_trips(tz, params);
- if (!params->trip_max) {
- dev_warn(&tz->device, "power_allocator: missing trip_max\n");
- kfree(params);
- return -EINVAL;
- }
ret = check_power_actors(tz, params);
if (ret < 0) {
else
params->sustainable_power = tz->tzp->sustainable_power;
- estimate_pid_constants(tz, tz->tzp->sustainable_power,
- params->trip_switch_on,
- params->trip_max->temperature);
+ if (params->trip_max)
+ estimate_pid_constants(tz, tz->tzp->sustainable_power,
+ params->trip_switch_on,
+ params->trip_max->temperature);
reset_pid_controller(params);
tze->trip_stats[trip_id].timestamp = now;
tze->trip_stats[trip_id].max = max(tze->trip_stats[trip_id].max, temperature);
tze->trip_stats[trip_id].min = min(tze->trip_stats[trip_id].min, temperature);
+ tze->trip_stats[trip_id].count++;
tze->trip_stats[trip_id].avg = tze->trip_stats[trip_id].avg +
(temperature - tze->trip_stats[trip_id].avg) /
tze->trip_stats[trip_id].count;
{
const struct thermal_trip *trip;
int low = -INT_MAX, high = INT_MAX;
- bool same_trip = false;
int ret;
lockdep_assert_held(&tz->lock);
return;
for_each_trip(tz, trip) {
- bool low_set = false;
int trip_low;
trip_low = trip->temperature - trip->hysteresis;
- if (trip_low < tz->temperature && trip_low > low) {
+ if (trip_low < tz->temperature && trip_low > low)
low = trip_low;
- low_set = true;
- same_trip = false;
- }
if (trip->temperature > tz->temperature &&
- trip->temperature < high) {
+ trip->temperature < high)
high = trip->temperature;
- same_trip = low_set;
- }
}
/* No need to change trip points */
if (tz->prev_low_trip == low && tz->prev_high_trip == high)
return;
- /*
- * If "high" and "low" are the same, skip the change unless this is the
- * first time.
- */
- if (same_trip && (tz->prev_low_trip != -INT_MAX ||
- tz->prev_high_trip != INT_MAX))
- return;
-
tz->prev_low_trip = low;
tz->prev_high_trip = high;
{
struct tb_port *up, *down;
- if (sw->is_unplugged)
- return;
if (!tb_route(sw) || tb_switch_is_icm(sw))
return;
+ /*
+ * Unconfigure downstream port so that wake-on-connect can be
+ * configured after router unplug. No need to unconfigure upstream port
+ * since its router is unplugged.
+ */
up = tb_upstream_port(sw);
- if (tb_switch_is_usb4(up->sw))
- usb4_port_unconfigure(up);
- else
- tb_lc_unconfigure_port(up);
-
down = up->remote;
if (tb_switch_is_usb4(down->sw))
usb4_port_unconfigure(down);
else
tb_lc_unconfigure_port(down);
+
+ if (sw->is_unplugged)
+ return;
+
+ up = tb_upstream_port(sw);
+ if (tb_switch_is_usb4(up->sw))
+ usb4_port_unconfigure(up);
+ else
+ tb_lc_unconfigure_port(up);
}
static void tb_switch_credits_init(struct tb_switch *sw)
return tb_lc_set_wake(sw, flags);
}
-int tb_switch_resume(struct tb_switch *sw)
+static void tb_switch_check_wakes(struct tb_switch *sw)
+{
+ if (device_may_wakeup(&sw->dev)) {
+ if (tb_switch_is_usb4(sw))
+ usb4_switch_check_wakes(sw);
+ }
+}
+
+/**
+ * tb_switch_resume() - Resume a switch after sleep
+ * @sw: Switch to resume
+ * @runtime: Is this resume from runtime suspend or system sleep
+ *
+ * Resumes and re-enumerates router (and all its children), if still plugged
+ * after suspend. Don't enumerate device router whose UID was changed during
+ * suspend. If this is resume from system sleep, notifies PM core about the
+ * wakes occurred during suspend. Disables all wakes, except USB4 wake of
+ * upstream port for USB4 routers that shall be always enabled.
+ */
+int tb_switch_resume(struct tb_switch *sw, bool runtime)
{
struct tb_port *port;
int err;
if (err)
return err;
+ if (!runtime)
+ tb_switch_check_wakes(sw);
+
/* Disable wakes */
tb_switch_set_wake(sw, 0);
*/
if (tb_port_unlock(port))
tb_port_warn(port, "failed to unlock port\n");
- if (port->remote && tb_switch_resume(port->remote->sw)) {
+ if (port->remote &&
+ tb_switch_resume(port->remote->sw, runtime)) {
tb_port_warn(port,
"lost during suspend, disconnecting\n");
tb_sw_set_unplugged(port->remote->sw);
continue;
}
+ /* Needs to be on different routers */
+ if (in->sw == port->sw) {
+ tb_port_dbg(port, "skipping DP OUT on same router\n");
+ continue;
+ }
+
tb_port_dbg(port, "DP OUT available\n");
/*
if (!tb_switch_is_usb4(tb->root_switch))
tb_switch_reset(tb->root_switch);
- tb_switch_resume(tb->root_switch);
+ tb_switch_resume(tb->root_switch, false);
tb_free_invalid_tunnels(tb);
tb_free_unplugged_children(tb->root_switch);
tb_restore_children(tb->root_switch);
struct tb_tunnel *tunnel, *n;
mutex_lock(&tb->lock);
- tb_switch_resume(tb->root_switch);
+ tb_switch_resume(tb->root_switch, true);
tb_free_invalid_tunnels(tb);
tb_restore_children(tb->root_switch);
list_for_each_entry_safe(tunnel, n, &tcm->tunnel_list, list)
int tb_switch_add(struct tb_switch *sw);
void tb_switch_remove(struct tb_switch *sw);
void tb_switch_suspend(struct tb_switch *sw, bool runtime);
-int tb_switch_resume(struct tb_switch *sw);
+int tb_switch_resume(struct tb_switch *sw, bool runtime);
int tb_switch_reset(struct tb_switch *sw);
int tb_switch_wait_for_bit(struct tb_switch *sw, u32 offset, u32 bit,
u32 value, int timeout_msec);
return usb4_switch_version(sw) > 0;
}
+void usb4_switch_check_wakes(struct tb_switch *sw);
int usb4_switch_setup(struct tb_switch *sw);
int usb4_switch_configuration_valid(struct tb_switch *sw);
int usb4_switch_read_uid(struct tb_switch *sw, u64 *uid);
tx_dwords, rx_data, rx_dwords);
}
-static void usb4_switch_check_wakes(struct tb_switch *sw)
+/**
+ * usb4_switch_check_wakes() - Check for wakes and notify PM core about them
+ * @sw: Router whose wakes to check
+ *
+ * Checks wakes occurred during suspend and notify the PM core about them.
+ */
+void usb4_switch_check_wakes(struct tb_switch *sw)
{
bool wakeup_usb4 = false;
struct usb4_port *usb4;
bool wakeup = false;
u32 val;
- if (!device_may_wakeup(&sw->dev))
- return;
-
if (tb_route(sw)) {
if (tb_sw_read(sw, &val, TB_CFG_SWITCH, ROUTER_CS_6, 1))
return;
u32 val = 0;
int ret;
- usb4_switch_check_wakes(sw);
-
if (!tb_route(sw))
return 0;
long rate;
int ret;
+ clk_disable_unprepare(d->clk);
rate = clk_round_rate(d->clk, newrate);
- if (rate > 0 && p->uartclk != rate) {
- clk_disable_unprepare(d->clk);
+ if (rate > 0) {
/*
* Note that any clock-notifer worker will block in
* serial8250_update_uartclk() until we are done.
ret = clk_set_rate(d->clk, newrate);
if (!ret)
p->uartclk = rate;
- clk_prepare_enable(d->clk);
}
+ clk_prepare_enable(d->clk);
dw8250_do_set_termios(p, termios, old);
}
ret = uart_read_port_properties(&uart.port);
if (ret)
- return ret;
+ goto dis_uart_clk;
uart.port.iotype = UPIO_MEM32;
uart.port.regshift = 2;
{ PCI_VENDOR_ID_LAVA, PCI_DEVICE_ID_LAVA_QUATRO_B,
PCI_ANY_ID, PCI_ANY_ID, 0, 0,
pbn_b0_bt_2_115200 },
- { PCI_VENDOR_ID_LAVA, PCI_DEVICE_ID_LAVA_QUATTRO_A,
- PCI_ANY_ID, PCI_ANY_ID, 0, 0,
- pbn_b0_bt_2_115200 },
- { PCI_VENDOR_ID_LAVA, PCI_DEVICE_ID_LAVA_QUATTRO_B,
- PCI_ANY_ID, PCI_ANY_ID, 0, 0,
- pbn_b0_bt_2_115200 },
{ PCI_VENDOR_ID_LAVA, PCI_DEVICE_ID_LAVA_OCTO_A,
PCI_ANY_ID, PCI_ANY_ID, 0, 0,
pbn_b0_bt_4_460800 },
static irqreturn_t mxs_auart_irq_handle(int irq, void *context)
{
- u32 istat;
+ u32 istat, stat;
struct mxs_auart_port *s = context;
u32 mctrl_temp = s->mctrl_prev;
- u32 stat = mxs_read(s, REG_STAT);
+ uart_port_lock(&s->port);
+
+ stat = mxs_read(s, REG_STAT);
istat = mxs_read(s, REG_INTR);
/* ack irq */
istat &= ~AUART_INTR_TXIS;
}
+ uart_port_unlock(&s->port);
+
return IRQ_HANDLED;
}
{
struct tty_port *port;
unsigned char ch, r1, drop, flag;
- int loops = 0;
/* Sanity check, make sure the old bug is no longer happening */
if (uap->port.state == NULL) {
if (r1 & Rx_OVR)
tty_insert_flip_char(port, 0, TTY_OVERRUN);
next_char:
- /* We can get stuck in an infinite loop getting char 0 when the
- * line is in a wrong HW state, we break that here.
- * When that happens, I disable the receive side of the driver.
- * Note that what I've been experiencing is a real irq loop where
- * I'm getting flooded regardless of the actual port speed.
- * Something strange is going on with the HW
- */
- if ((++loops) > 1000)
- goto flood;
ch = read_zsreg(uap, R0);
if (!(ch & Rx_CH_AV))
break;
}
- return true;
- flood:
- pmz_interrupt_control(uap, 0);
- pmz_error("pmz: rx irq flood !\n");
return true;
}
struct serial_port_device {
struct device dev;
struct uart_port *port;
+ unsigned int tx_enabled:1;
};
int serial_base_ctrl_init(void);
int serial_base_port_init(void);
void serial_base_port_exit(void);
+void serial_base_port_startup(struct uart_port *port);
+void serial_base_port_shutdown(struct uart_port *port);
+
int serial_base_driver_register(struct device_driver *driver);
void serial_base_driver_unregister(struct device_driver *driver);
* enabled, serial_port_runtime_resume() calls start_tx() again
* after enabling the device.
*/
- if (pm_runtime_active(&port_dev->dev))
+ if (!pm_runtime_enabled(port->dev) || pm_runtime_active(&port_dev->dev))
port->ops->start_tx(port);
pm_runtime_mark_last_busy(&port_dev->dev);
pm_runtime_put_autosuspend(&port_dev->dev);
bool init_hw)
{
struct tty_port *port = &state->port;
+ struct uart_port *uport;
int retval;
if (tty_port_initialized(port))
- return 0;
+ goto out_base_port_startup;
retval = uart_port_startup(tty, state, init_hw);
- if (retval)
+ if (retval) {
set_bit(TTY_IO_ERROR, &tty->flags);
+ return retval;
+ }
- return retval;
+out_base_port_startup:
+ uport = uart_port_check(state);
+ if (!uport)
+ return -EIO;
+
+ serial_base_port_startup(uport);
+
+ return 0;
}
/*
if (tty)
set_bit(TTY_IO_ERROR, &tty->flags);
+ if (uport)
+ serial_base_port_shutdown(uport);
+
if (tty_port_initialized(port)) {
tty_port_set_initialized(port, false);
uport->ops->stop_rx(uport);
uart_port_unlock_irq(uport);
+ serial_base_port_shutdown(uport);
uart_port_shutdown(port);
/*
* Free the transmit buffer.
*/
uart_port_lock_irq(uport);
+ uart_circ_clear(&state->xmit);
buf = state->xmit.buf;
state->xmit.buf = NULL;
uart_port_unlock_irq(uport);
/* Flush any pending TX for the port */
uart_port_lock_irqsave(port, &flags);
+ if (!port_dev->tx_enabled)
+ goto unlock;
if (__serial_port_busy(port))
port->ops->start_tx(port);
+
+unlock:
uart_port_unlock_irqrestore(port, flags);
out:
return 0;
uart_port_lock_irqsave(port, &flags);
+ if (!port_dev->tx_enabled) {
+ uart_port_unlock_irqrestore(port, flags);
+ return 0;
+ }
+
busy = __serial_port_busy(port);
if (busy)
port->ops->start_tx(port);
return busy ? -EBUSY : 0;
}
+static void serial_base_port_set_tx(struct uart_port *port,
+ struct serial_port_device *port_dev,
+ bool enabled)
+{
+ unsigned long flags;
+
+ uart_port_lock_irqsave(port, &flags);
+ port_dev->tx_enabled = enabled;
+ uart_port_unlock_irqrestore(port, flags);
+}
+
+void serial_base_port_startup(struct uart_port *port)
+{
+ struct serial_port_device *port_dev = port->port_dev;
+
+ serial_base_port_set_tx(port, port_dev, true);
+}
+
+void serial_base_port_shutdown(struct uart_port *port)
+{
+ struct serial_port_device *port_dev = port->port_dev;
+
+ serial_base_port_set_tx(port, port_dev, false);
+}
+
static DEFINE_RUNTIME_DEV_PM_OPS(serial_port_pm,
serial_port_runtime_suspend,
serial_port_runtime_resume, NULL);
const struct stm32_usart_offsets *ofs = &stm32_port->info->ofs;
u32 sr;
unsigned int size;
+ irqreturn_t ret = IRQ_NONE;
sr = readl_relaxed(port->membase + ofs->isr);
(sr & USART_SR_TC)) {
stm32_usart_tc_interrupt_disable(port);
stm32_usart_rs485_rts_disable(port);
+ ret = IRQ_HANDLED;
}
- if ((sr & USART_SR_RTOF) && ofs->icr != UNDEF_REG)
+ if ((sr & USART_SR_RTOF) && ofs->icr != UNDEF_REG) {
writel_relaxed(USART_ICR_RTOCF,
port->membase + ofs->icr);
+ ret = IRQ_HANDLED;
+ }
if ((sr & USART_SR_WUF) && ofs->icr != UNDEF_REG) {
/* Clear wake up flag and disable wake up interrupt */
stm32_usart_clr_bits(port, ofs->cr3, USART_CR3_WUFIE);
if (irqd_is_wakeup_set(irq_get_irq_data(port->irq)))
pm_wakeup_event(tport->tty->dev, 0);
+ ret = IRQ_HANDLED;
}
/*
uart_unlock_and_check_sysrq(port);
if (size)
tty_flip_buffer_push(tport);
+ ret = IRQ_HANDLED;
}
}
uart_port_lock(port);
stm32_usart_transmit_chars(port);
uart_port_unlock(port);
+ ret = IRQ_HANDLED;
}
/* Receiver timeout irq for DMA RX */
uart_unlock_and_check_sysrq(port);
if (size)
tty_flip_buffer_push(tport);
+ ret = IRQ_HANDLED;
}
- return IRQ_HANDLED;
+ return ret;
}
static void stm32_usart_set_mctrl(struct uart_port *port, unsigned int mctrl)
val |= USART_CR2_SWAP;
writel_relaxed(val, port->membase + ofs->cr2);
}
+ stm32_port->throttled = false;
/* RX FIFO Flush */
if (ofs->rqr != UNDEF_REG)
val = ufshcd_readl(hba, REG_UFS_MCQ_CFG);
val &= ~MCQ_CFG_MAC_MASK;
- val |= FIELD_PREP(MCQ_CFG_MAC_MASK, max_active_cmds);
+ val |= FIELD_PREP(MCQ_CFG_MAC_MASK, max_active_cmds - 1);
ufshcd_writel(hba, val, REG_UFS_MCQ_CFG);
}
EXPORT_SYMBOL_GPL(ufshcd_mcq_config_mac);
/* MCQ mode */
if (is_mcq_enabled(hba)) {
- err = ufshcd_clear_cmd(hba, lrbp->task_tag);
+ /* successfully cleared the command, retry if needed */
+ if (ufshcd_clear_cmd(hba, lrbp->task_tag) == 0)
+ err = -EAGAIN;
hba->dev_cmd.complete = NULL;
return err;
}
/* UFS device & link must be active before we enter in this function */
if (!ufshcd_is_ufs_dev_active(hba) || !ufshcd_is_link_active(hba)) {
- ret = -EINVAL;
+ /* Wait err handler finish or trigger err recovery */
+ if (!ufshcd_eh_in_progress(hba))
+ ufshcd_force_error_recovery(hba);
+ ret = -EBUSY;
goto enable_scaling;
}
TSTBUS_MAX,
};
-#define QCOM_UFS_MAX_GEAR 4
+#define QCOM_UFS_MAX_GEAR 5
#define QCOM_UFS_MAX_LANE 2
enum {
[MODE_PWM][UFS_PWM_G2][UFS_LANE_1] = { 1844, 1000 },
[MODE_PWM][UFS_PWM_G3][UFS_LANE_1] = { 3688, 1000 },
[MODE_PWM][UFS_PWM_G4][UFS_LANE_1] = { 7376, 1000 },
+ [MODE_PWM][UFS_PWM_G5][UFS_LANE_1] = { 14752, 1000 },
[MODE_PWM][UFS_PWM_G1][UFS_LANE_2] = { 1844, 1000 },
[MODE_PWM][UFS_PWM_G2][UFS_LANE_2] = { 3688, 1000 },
[MODE_PWM][UFS_PWM_G3][UFS_LANE_2] = { 7376, 1000 },
[MODE_PWM][UFS_PWM_G4][UFS_LANE_2] = { 14752, 1000 },
+ [MODE_PWM][UFS_PWM_G5][UFS_LANE_2] = { 29504, 1000 },
[MODE_HS_RA][UFS_HS_G1][UFS_LANE_1] = { 127796, 1000 },
[MODE_HS_RA][UFS_HS_G2][UFS_LANE_1] = { 255591, 1000 },
[MODE_HS_RA][UFS_HS_G3][UFS_LANE_1] = { 1492582, 102400 },
[MODE_HS_RA][UFS_HS_G4][UFS_LANE_1] = { 2915200, 204800 },
+ [MODE_HS_RA][UFS_HS_G5][UFS_LANE_1] = { 5836800, 409600 },
[MODE_HS_RA][UFS_HS_G1][UFS_LANE_2] = { 255591, 1000 },
[MODE_HS_RA][UFS_HS_G2][UFS_LANE_2] = { 511181, 1000 },
[MODE_HS_RA][UFS_HS_G3][UFS_LANE_2] = { 1492582, 204800 },
[MODE_HS_RA][UFS_HS_G4][UFS_LANE_2] = { 2915200, 409600 },
+ [MODE_HS_RA][UFS_HS_G5][UFS_LANE_2] = { 5836800, 819200 },
[MODE_HS_RB][UFS_HS_G1][UFS_LANE_1] = { 149422, 1000 },
[MODE_HS_RB][UFS_HS_G2][UFS_LANE_1] = { 298189, 1000 },
[MODE_HS_RB][UFS_HS_G3][UFS_LANE_1] = { 1492582, 102400 },
[MODE_HS_RB][UFS_HS_G4][UFS_LANE_1] = { 2915200, 204800 },
+ [MODE_HS_RB][UFS_HS_G5][UFS_LANE_1] = { 5836800, 409600 },
[MODE_HS_RB][UFS_HS_G1][UFS_LANE_2] = { 298189, 1000 },
[MODE_HS_RB][UFS_HS_G2][UFS_LANE_2] = { 596378, 1000 },
[MODE_HS_RB][UFS_HS_G3][UFS_LANE_2] = { 1492582, 204800 },
[MODE_HS_RB][UFS_HS_G4][UFS_LANE_2] = { 2915200, 409600 },
+ [MODE_HS_RB][UFS_HS_G5][UFS_LANE_2] = { 5836800, 819200 },
[MODE_MAX][0][0] = { 7643136, 307200 },
};
list_for_each_entry(clki, head, list) {
if (!IS_ERR_OR_NULL(clki->clk) &&
- !strcmp(clki->name, "core_clk_unipro")) {
- if (is_scale_up)
+ !strcmp(clki->name, "core_clk_unipro")) {
+ if (!clki->max_freq)
+ cycles_in_1us = 150; /* default for backwards compatibility */
+ else if (is_scale_up)
cycles_in_1us = ceil(clki->max_freq, (1000 * 1000));
else
cycles_in_1us = ceil(clk_get_rate(clki->clk), (1000 * 1000));
*/
vma->vm_pgoff = 0;
- addr = (void *)mem->addr;
+ addr = (void *)(uintptr_t)mem->addr;
ret = dma_mmap_coherent(mem->dma_device,
vma,
addr,
addr = dma_alloc_coherent(&priv->pdev->dev, uiomem->size,
&uiomem->dma_addr, GFP_KERNEL);
- uiomem->addr = addr ? (phys_addr_t) addr : DMEM_MAP_ERROR;
+ uiomem->addr = addr ? (uintptr_t) addr : DMEM_MAP_ERROR;
++uiomem;
}
priv->refcnt++;
break;
if (uiomem->addr) {
dma_free_coherent(uiomem->dma_device, uiomem->size,
- (void *) uiomem->addr,
+ (void *) (uintptr_t) uiomem->addr,
uiomem->dma_addr);
}
uiomem->addr = DMEM_MAP_ERROR;
{
if (pdata->send_gpadl.gpadl_handle) {
vmbus_teardown_gpadl(dev->channel, &pdata->send_gpadl);
- vfree(pdata->send_buf);
+ if (!pdata->send_gpadl.decrypted)
+ vfree(pdata->send_buf);
}
if (pdata->recv_gpadl.gpadl_handle) {
vmbus_teardown_gpadl(dev->channel, &pdata->recv_gpadl);
- vfree(pdata->recv_buf);
+ if (!pdata->recv_gpadl.decrypted)
+ vfree(pdata->recv_buf);
}
}
ret = vmbus_establish_gpadl(channel, pdata->recv_buf,
RECV_BUFFER_SIZE, &pdata->recv_gpadl);
if (ret) {
- vfree(pdata->recv_buf);
+ if (!pdata->recv_gpadl.decrypted)
+ vfree(pdata->recv_buf);
goto fail_close;
}
ret = vmbus_establish_gpadl(channel, pdata->send_buf,
SEND_BUFFER_SIZE, &pdata->send_gpadl);
if (ret) {
- vfree(pdata->send_buf);
+ if (!pdata->send_gpadl.decrypted)
+ vfree(pdata->send_buf);
goto fail_close;
}
p->mem[1].size = sram_pool_sz;
p->mem[1].memtype = UIO_MEM_PHYS;
- p->mem[2].addr = (phys_addr_t) gdev->ddr_vaddr;
+ p->mem[2].addr = (uintptr_t) gdev->ddr_vaddr;
p->mem[2].dma_addr = gdev->ddr_paddr;
p->mem[2].size = extram_pool_sz;
p->mem[2].memtype = UIO_MEM_DMA_COHERENT;
#define HUB_DEBOUNCE_STEP 25
#define HUB_DEBOUNCE_STABLE 100
-static void hub_release(struct kref *kref);
static int usb_reset_and_verify_device(struct usb_device *udev);
static int hub_port_disable(struct usb_hub *hub, int port1, int set_state);
static bool hub_port_warm_reset_required(struct usb_hub *hub, int port1,
*/
intf = to_usb_interface(hub->intfdev);
usb_autopm_get_interface_no_resume(intf);
- kref_get(&hub->kref);
+ hub_get(hub);
if (queue_work(hub_wq, &hub->events))
return;
/* the work has already been scheduled */
usb_autopm_put_interface_async(intf);
- kref_put(&hub->kref, hub_release);
+ hub_put(hub);
}
void usb_kick_hub_wq(struct usb_device *hdev)
goto init2;
goto init3;
}
- kref_get(&hub->kref);
+ hub_get(hub);
/* The superspeed hub except for root hub has to use Hub Depth
* value as an offset into the route string to locate the bits
device_unlock(&hdev->dev);
}
- kref_put(&hub->kref, hub_release);
+ hub_put(hub);
}
/* Implement the continuations for the delays above */
kfree(hub);
}
+void hub_get(struct usb_hub *hub)
+{
+ kref_get(&hub->kref);
+}
+
+void hub_put(struct usb_hub *hub)
+{
+ kref_put(&hub->kref, hub_release);
+}
+
static unsigned highspeed_hubs;
static void hub_disconnect(struct usb_interface *intf)
onboard_hub_destroy_pdevs(&hub->onboard_hub_devs);
- kref_put(&hub->kref, hub_release);
+ hub_put(hub);
}
static bool hub_descriptor_is_sane(struct usb_host_interface *desc)
/* Balance the stuff in kick_hub_wq() and allow autosuspend */
usb_autopm_put_interface(intf);
- kref_put(&hub->kref, hub_release);
+ hub_put(hub);
kcov_remote_stop();
}
extern int usb_hub_set_port_power(struct usb_device *hdev, struct usb_hub *hub,
int port1, bool set);
extern struct usb_hub *usb_hub_to_struct_hub(struct usb_device *hdev);
+extern void hub_get(struct usb_hub *hub);
+extern void hub_put(struct usb_hub *hub);
extern int hub_port_debounce(struct usb_hub *hub, int port1,
bool must_be_connected);
extern int usb_clear_port_feature(struct usb_device *hdev,
u16 portstatus, unused;
bool disabled;
int rc;
+ struct kernfs_node *kn;
+ hub_get(hub);
rc = usb_autopm_get_interface(intf);
if (rc < 0)
- return rc;
+ goto out_hub_get;
+ /*
+ * Prevent deadlock if another process is concurrently
+ * trying to unregister hdev.
+ */
+ kn = sysfs_break_active_protection(&dev->kobj, &attr->attr);
+ if (!kn) {
+ rc = -ENODEV;
+ goto out_autopm;
+ }
usb_lock_device(hdev);
if (hub->disconnected) {
rc = -ENODEV;
usb_hub_port_status(hub, port1, &portstatus, &unused);
disabled = !usb_port_is_power_on(hub, portstatus);
-out_hdev_lock:
+ out_hdev_lock:
usb_unlock_device(hdev);
+ sysfs_unbreak_active_protection(kn);
+ out_autopm:
usb_autopm_put_interface(intf);
+ out_hub_get:
+ hub_put(hub);
if (rc)
return rc;
int port1 = port_dev->portnum;
bool disabled;
int rc;
+ struct kernfs_node *kn;
rc = kstrtobool(buf, &disabled);
if (rc)
return rc;
+ hub_get(hub);
rc = usb_autopm_get_interface(intf);
if (rc < 0)
- return rc;
+ goto out_hub_get;
+ /*
+ * Prevent deadlock if another process is concurrently
+ * trying to unregister hdev.
+ */
+ kn = sysfs_break_active_protection(&dev->kobj, &attr->attr);
+ if (!kn) {
+ rc = -ENODEV;
+ goto out_autopm;
+ }
usb_lock_device(hdev);
if (hub->disconnected) {
rc = -ENODEV;
if (!rc)
rc = count;
-out_hdev_lock:
+ out_hdev_lock:
usb_unlock_device(hdev);
+ sysfs_unbreak_active_protection(kn);
+ out_autopm:
usb_autopm_put_interface(intf);
+ out_hub_get:
+ hub_put(hub);
return rc;
}
{
struct usb_port *port_dev = to_usb_port(dev);
- if (port_dev->child)
+ if (port_dev->child) {
usb_disable_usb2_hardware_lpm(port_dev->child);
+ usb_unlocked_disable_lpm(port_dev->child);
+ }
}
static const struct dev_pm_ops usb_port_pm_ops = {
{
struct usb_interface *intf = to_usb_interface(dev);
bool val;
+ struct kernfs_node *kn;
if (kstrtobool(buf, &val) != 0)
return -EINVAL;
- if (val)
+ if (val) {
usb_authorize_interface(intf);
- else
- usb_deauthorize_interface(intf);
+ } else {
+ /*
+ * Prevent deadlock if another process is concurrently
+ * trying to unregister intf.
+ */
+ kn = sysfs_break_active_protection(&dev->kobj, &attr->attr);
+ if (kn) {
+ usb_deauthorize_interface(intf);
+ sysfs_unbreak_active_protection(kn);
+ }
+ }
return count;
}
* struct dwc2_hregs_backup - Holds host registers state before
* entering partial power down
* @hcfg: Backup of HCFG register
+ * @hflbaddr: Backup of HFLBADDR register
* @haintmsk: Backup of HAINTMSK register
+ * @hcchar: Backup of HCCHAR register
+ * @hcsplt: Backup of HCSPLT register
* @hcintmsk: Backup of HCINTMSK register
+ * @hctsiz: Backup of HCTSIZ register
+ * @hdma: Backup of HCDMA register
+ * @hcdmab: Backup of HCDMAB register
* @hprt0: Backup of HPTR0 register
* @hfir: Backup of HFIR register
* @hptxfsiz: Backup of HPTXFSIZ register
*/
struct dwc2_hregs_backup {
u32 hcfg;
+ u32 hflbaddr;
u32 haintmsk;
+ u32 hcchar[MAX_EPS_CHANNELS];
+ u32 hcsplt[MAX_EPS_CHANNELS];
u32 hcintmsk[MAX_EPS_CHANNELS];
+ u32 hctsiz[MAX_EPS_CHANNELS];
+ u32 hcidma[MAX_EPS_CHANNELS];
+ u32 hcidmab[MAX_EPS_CHANNELS];
u32 hprt0;
u32 hfir;
u32 hptxfsiz;
bool needs_byte_swap;
/* DWC OTG HW Release versions */
+#define DWC2_CORE_REV_4_30a 0x4f54430a
#define DWC2_CORE_REV_2_71a 0x4f54271a
#define DWC2_CORE_REV_2_72a 0x4f54272a
#define DWC2_CORE_REV_2_80a 0x4f54280a
int dwc2_restore_global_registers(struct dwc2_hsotg *hsotg);
void dwc2_enable_acg(struct dwc2_hsotg *hsotg);
+void dwc2_wakeup_from_lpm_l1(struct dwc2_hsotg *hsotg, bool remotewakeup);
/* This function should be called on every hardware interrupt. */
irqreturn_t dwc2_handle_common_intr(int irq, void *dev);
/* Exit gadget mode clock gating. */
if (hsotg->params.power_down ==
- DWC2_POWER_DOWN_PARAM_NONE && hsotg->bus_suspended)
+ DWC2_POWER_DOWN_PARAM_NONE && hsotg->bus_suspended &&
+ !hsotg->params.no_clock_gating)
dwc2_gadget_exit_clock_gating(hsotg, 0);
}
* @hsotg: Programming view of DWC_otg controller
*
*/
-static void dwc2_wakeup_from_lpm_l1(struct dwc2_hsotg *hsotg)
+void dwc2_wakeup_from_lpm_l1(struct dwc2_hsotg *hsotg, bool remotewakeup)
{
u32 glpmcfg;
- u32 i = 0;
+ u32 pcgctl;
+ u32 dctl;
if (hsotg->lx_state != DWC2_L1) {
dev_err(hsotg->dev, "Core isn't in DWC2_L1 state\n");
glpmcfg = dwc2_readl(hsotg, GLPMCFG);
if (dwc2_is_device_mode(hsotg)) {
- dev_dbg(hsotg->dev, "Exit from L1 state\n");
+ dev_dbg(hsotg->dev, "Exit from L1 state, remotewakeup=%d\n", remotewakeup);
glpmcfg &= ~GLPMCFG_ENBLSLPM;
- glpmcfg &= ~GLPMCFG_HIRD_THRES_EN;
+ glpmcfg &= ~GLPMCFG_HIRD_THRES_MASK;
dwc2_writel(hsotg, glpmcfg, GLPMCFG);
- do {
- glpmcfg = dwc2_readl(hsotg, GLPMCFG);
+ pcgctl = dwc2_readl(hsotg, PCGCTL);
+ pcgctl &= ~PCGCTL_ENBL_SLEEP_GATING;
+ dwc2_writel(hsotg, pcgctl, PCGCTL);
- if (!(glpmcfg & (GLPMCFG_COREL1RES_MASK |
- GLPMCFG_L1RESUMEOK | GLPMCFG_SLPSTS)))
- break;
+ glpmcfg = dwc2_readl(hsotg, GLPMCFG);
+ if (glpmcfg & GLPMCFG_ENBESL) {
+ glpmcfg |= GLPMCFG_RSTRSLPSTS;
+ dwc2_writel(hsotg, glpmcfg, GLPMCFG);
+ }
+
+ if (remotewakeup) {
+ if (dwc2_hsotg_wait_bit_set(hsotg, GLPMCFG, GLPMCFG_L1RESUMEOK, 1000)) {
+ dev_warn(hsotg->dev, "%s: timeout GLPMCFG_L1RESUMEOK\n", __func__);
+ goto fail;
+ return;
+ }
+
+ dctl = dwc2_readl(hsotg, DCTL);
+ dctl |= DCTL_RMTWKUPSIG;
+ dwc2_writel(hsotg, dctl, DCTL);
- udelay(1);
- } while (++i < 200);
+ if (dwc2_hsotg_wait_bit_set(hsotg, GINTSTS, GINTSTS_WKUPINT, 1000)) {
+ dev_warn(hsotg->dev, "%s: timeout GINTSTS_WKUPINT\n", __func__);
+ goto fail;
+ return;
+ }
+ }
- if (i == 200) {
- dev_err(hsotg->dev, "Failed to exit L1 sleep state in 200us.\n");
+ glpmcfg = dwc2_readl(hsotg, GLPMCFG);
+ if (glpmcfg & GLPMCFG_COREL1RES_MASK || glpmcfg & GLPMCFG_SLPSTS ||
+ glpmcfg & GLPMCFG_L1RESUMEOK) {
+ goto fail;
return;
}
- dwc2_gadget_init_lpm(hsotg);
+
+ /* Inform gadget to exit from L1 */
+ call_gadget(hsotg, resume);
+ /* Change to L0 state */
+ hsotg->lx_state = DWC2_L0;
+ hsotg->bus_suspended = false;
+fail: dwc2_gadget_init_lpm(hsotg);
} else {
/* TODO */
dev_err(hsotg->dev, "Host side LPM is not supported.\n");
return;
}
-
- /* Change to L0 state */
- hsotg->lx_state = DWC2_L0;
-
- /* Inform gadget to exit from L1 */
- call_gadget(hsotg, resume);
}
/*
dev_dbg(hsotg->dev, "%s lxstate = %d\n", __func__, hsotg->lx_state);
if (hsotg->lx_state == DWC2_L1) {
- dwc2_wakeup_from_lpm_l1(hsotg);
+ dwc2_wakeup_from_lpm_l1(hsotg, false);
return;
}
/* Exit gadget mode clock gating. */
if (hsotg->params.power_down ==
- DWC2_POWER_DOWN_PARAM_NONE && hsotg->bus_suspended)
+ DWC2_POWER_DOWN_PARAM_NONE && hsotg->bus_suspended &&
+ !hsotg->params.no_clock_gating)
dwc2_gadget_exit_clock_gating(hsotg, 0);
} else {
/* Change to L0 state */
}
if (hsotg->params.power_down ==
- DWC2_POWER_DOWN_PARAM_NONE && hsotg->bus_suspended)
+ DWC2_POWER_DOWN_PARAM_NONE && hsotg->bus_suspended &&
+ !hsotg->params.no_clock_gating)
dwc2_host_exit_clock_gating(hsotg, 1);
/*
ep->name, req, req->length, req->buf, req->no_interrupt,
req->zero, req->short_not_ok);
+ if (hs->lx_state == DWC2_L1) {
+ dwc2_wakeup_from_lpm_l1(hs, true);
+ }
+
/* Prevent new request submission when controller is suspended */
if (hs->lx_state != DWC2_L0) {
dev_dbg(hs->dev, "%s: submit request only in active state\n",
if (hsotg->in_ppd && hsotg->lx_state == DWC2_L2)
dwc2_exit_partial_power_down(hsotg, 0, true);
+ /* Exit gadget mode clock gating. */
+ if (hsotg->params.power_down ==
+ DWC2_POWER_DOWN_PARAM_NONE && hsotg->bus_suspended &&
+ !hsotg->params.no_clock_gating)
+ dwc2_gadget_exit_clock_gating(hsotg, 0);
+
hsotg->lx_state = DWC2_L0;
}
hsotg->available_host_channels--;
}
qh = list_entry(qh_ptr, struct dwc2_qh, qh_list_entry);
- if (dwc2_assign_and_init_hc(hsotg, qh))
+ if (dwc2_assign_and_init_hc(hsotg, qh)) {
+ if (hsotg->params.uframe_sched)
+ hsotg->available_host_channels++;
break;
+ }
/*
* Move the QH from the periodic ready schedule to the
hsotg->available_host_channels--;
}
- if (dwc2_assign_and_init_hc(hsotg, qh))
+ if (dwc2_assign_and_init_hc(hsotg, qh)) {
+ if (hsotg->params.uframe_sched)
+ hsotg->available_host_channels++;
break;
+ }
/*
* Move the QH from the non-periodic inactive schedule to the
urb->actual_length);
if (usb_pipetype(urb->pipe) == PIPE_ISOCHRONOUS) {
+ if (!hsotg->params.dma_desc_enable)
+ urb->start_frame = qtd->qh->start_active_frame;
urb->error_count = dwc2_hcd_urb_get_error_count(qtd->urb);
for (i = 0; i < urb->number_of_packets; ++i) {
urb->iso_frame_desc[i].actual_length =
}
if (hsotg->params.power_down == DWC2_POWER_DOWN_PARAM_NONE &&
- hsotg->bus_suspended) {
+ hsotg->bus_suspended && !hsotg->params.no_clock_gating) {
if (dwc2_is_device_mode(hsotg))
dwc2_gadget_exit_clock_gating(hsotg, 0);
else
/* Backup Host regs */
hr = &hsotg->hr_backup;
hr->hcfg = dwc2_readl(hsotg, HCFG);
+ hr->hflbaddr = dwc2_readl(hsotg, HFLBADDR);
hr->haintmsk = dwc2_readl(hsotg, HAINTMSK);
- for (i = 0; i < hsotg->params.host_channels; ++i)
+ for (i = 0; i < hsotg->params.host_channels; ++i) {
+ hr->hcchar[i] = dwc2_readl(hsotg, HCCHAR(i));
+ hr->hcsplt[i] = dwc2_readl(hsotg, HCSPLT(i));
hr->hcintmsk[i] = dwc2_readl(hsotg, HCINTMSK(i));
+ hr->hctsiz[i] = dwc2_readl(hsotg, HCTSIZ(i));
+ hr->hcidma[i] = dwc2_readl(hsotg, HCDMA(i));
+ hr->hcidmab[i] = dwc2_readl(hsotg, HCDMAB(i));
+ }
hr->hprt0 = dwc2_read_hprt0(hsotg);
hr->hfir = dwc2_readl(hsotg, HFIR);
hr->valid = false;
dwc2_writel(hsotg, hr->hcfg, HCFG);
+ dwc2_writel(hsotg, hr->hflbaddr, HFLBADDR);
dwc2_writel(hsotg, hr->haintmsk, HAINTMSK);
- for (i = 0; i < hsotg->params.host_channels; ++i)
+ for (i = 0; i < hsotg->params.host_channels; ++i) {
+ dwc2_writel(hsotg, hr->hcchar[i], HCCHAR(i));
+ dwc2_writel(hsotg, hr->hcsplt[i], HCSPLT(i));
dwc2_writel(hsotg, hr->hcintmsk[i], HCINTMSK(i));
+ dwc2_writel(hsotg, hr->hctsiz[i], HCTSIZ(i));
+ dwc2_writel(hsotg, hr->hcidma[i], HCDMA(i));
+ dwc2_writel(hsotg, hr->hcidmab[i], HCDMAB(i));
+ }
dwc2_writel(hsotg, hr->hprt0, HPRT0);
dwc2_writel(hsotg, hr->hfir, HFIR);
dwc2_writel(hsotg, hr->hcfg, HCFG);
/* De-assert Wakeup Logic */
- gpwrdn = dwc2_readl(hsotg, GPWRDN);
- gpwrdn &= ~GPWRDN_PMUACTV;
- dwc2_writel(hsotg, gpwrdn, GPWRDN);
- udelay(10);
+ if (!(rem_wakeup && hsotg->hw_params.snpsid >= DWC2_CORE_REV_4_30a)) {
+ gpwrdn = dwc2_readl(hsotg, GPWRDN);
+ gpwrdn &= ~GPWRDN_PMUACTV;
+ dwc2_writel(hsotg, gpwrdn, GPWRDN);
+ udelay(10);
+ }
hprt0 = hr->hprt0;
hprt0 |= HPRT0_PWR;
hprt0 |= HPRT0_RES;
dwc2_writel(hsotg, hprt0, HPRT0);
+ /* De-assert Wakeup Logic */
+ if ((rem_wakeup && hsotg->hw_params.snpsid >= DWC2_CORE_REV_4_30a)) {
+ gpwrdn = dwc2_readl(hsotg, GPWRDN);
+ gpwrdn &= ~GPWRDN_PMUACTV;
+ dwc2_writel(hsotg, gpwrdn, GPWRDN);
+ udelay(10);
+ }
/* Wait for Resume time and then program HPRT again */
mdelay(100);
hprt0 &= ~HPRT0_RES;
idx = qh->td_last;
inc = qh->host_interval;
hsotg->frame_number = dwc2_hcd_get_frame_number(hsotg);
- cur_idx = dwc2_frame_list_idx(hsotg->frame_number);
+ cur_idx = idx;
next_idx = dwc2_desclist_idx_inc(qh->td_last, inc, qh->dev_speed);
/*
{
struct dwc2_dma_desc *dma_desc;
struct dwc2_hcd_iso_packet_desc *frame_desc;
+ u16 frame_desc_idx;
+ struct urb *usb_urb;
u16 remain = 0;
int rc = 0;
if (!qtd->urb)
return -EINVAL;
+ usb_urb = qtd->urb->priv;
+
dma_sync_single_for_cpu(hsotg->dev, qh->desc_list_dma + (idx *
sizeof(struct dwc2_dma_desc)),
sizeof(struct dwc2_dma_desc),
DMA_FROM_DEVICE);
dma_desc = &qh->desc_list[idx];
+ frame_desc_idx = (idx - qtd->isoc_td_first) & (usb_urb->number_of_packets - 1);
- frame_desc = &qtd->urb->iso_descs[qtd->isoc_frame_index_last];
+ frame_desc = &qtd->urb->iso_descs[frame_desc_idx];
+ if (idx == qtd->isoc_td_first)
+ usb_urb->start_frame = dwc2_hcd_get_frame_number(hsotg);
dma_desc->buf = (u32)(qtd->urb->dma + frame_desc->offset);
if (chan->ep_is_in)
remain = (dma_desc->status & HOST_DMA_ISOC_NBYTES_MASK) >>
frame_desc->status = 0;
}
- if (++qtd->isoc_frame_index == qtd->urb->packet_count) {
+ if (++qtd->isoc_frame_index == usb_urb->number_of_packets) {
/*
* urb->status is not used for isoc transfers here. The
* individual frame_desc status are used instead.
return;
idx = dwc2_desclist_idx_inc(idx, qh->host_interval,
chan->speed);
- if (!rc)
+ if (rc == 0)
continue;
- if (rc == DWC2_CMPL_DONE)
- break;
+ if (rc == DWC2_CMPL_DONE || rc == DWC2_CMPL_STOP)
+ goto stop_scan;
/* rc == DWC2_CMPL_STOP */
#define TXSTS_QTOP_TOKEN_MASK (0x3 << 25)
#define TXSTS_QTOP_TOKEN_SHIFT 25
#define TXSTS_QTOP_TERMINATE BIT(24)
-#define TXSTS_QSPCAVAIL_MASK (0xff << 16)
+#define TXSTS_QSPCAVAIL_MASK (0x7f << 16)
#define TXSTS_QSPCAVAIL_SHIFT 16
#define TXSTS_FSPCAVAIL_MASK (0xffff << 0)
#define TXSTS_FSPCAVAIL_SHIFT 0
/* Exit clock gating when driver is removed. */
if (hsotg->params.power_down == DWC2_POWER_DOWN_PARAM_NONE &&
- hsotg->bus_suspended) {
+ hsotg->bus_suspended && !hsotg->params.no_clock_gating) {
if (dwc2_is_device_mode(hsotg))
dwc2_gadget_exit_clock_gating(hsotg, 0);
else
else
dwc->sysdev = dwc->dev;
+ dwc->sys_wakeup = device_may_wakeup(dwc->sysdev);
+
ret = device_property_read_string(dev, "usb-psy-name", &usb_psy_name);
if (ret >= 0) {
dwc->usb_psy = power_supply_get_by_name(usb_psy_name);
* 3 - Reserved
* @dis_metastability_quirk: set to disable metastability quirk.
* @dis_split_quirk: set to disable split boundary.
+ * @sys_wakeup: set if the device may do system wakeup.
* @wakeup_configured: set if the device is configured for remote wakeup.
* @suspended: set to track suspend event due to U3/L2.
* @imod_interval: set the interrupt moderation interval in 250ns
unsigned dis_split_quirk:1;
unsigned async_callbacks:1;
+ unsigned sys_wakeup:1;
unsigned wakeup_configured:1;
unsigned suspended:1;
#define PCI_DEVICE_ID_INTEL_MTLP 0x7ec1
#define PCI_DEVICE_ID_INTEL_MTLS 0x7f6f
#define PCI_DEVICE_ID_INTEL_MTL 0x7e7e
-#define PCI_DEVICE_ID_INTEL_ARLH 0x7ec1
#define PCI_DEVICE_ID_INTEL_ARLH_PCH 0x777e
#define PCI_DEVICE_ID_INTEL_TGL 0x9a15
#define PCI_DEVICE_ID_AMD_MR 0x163a
{ PCI_DEVICE_DATA(INTEL, MTLP, &dwc3_pci_intel_swnode) },
{ PCI_DEVICE_DATA(INTEL, MTL, &dwc3_pci_intel_swnode) },
{ PCI_DEVICE_DATA(INTEL, MTLS, &dwc3_pci_intel_swnode) },
- { PCI_DEVICE_DATA(INTEL, ARLH, &dwc3_pci_intel_swnode) },
{ PCI_DEVICE_DATA(INTEL, ARLH_PCH, &dwc3_pci_intel_swnode) },
{ PCI_DEVICE_DATA(INTEL, TGL, &dwc3_pci_intel_swnode) },
/* reinitialize physical ep1 */
dep = dwc->eps[1];
- dep->flags = DWC3_EP_ENABLED;
+ dep->flags &= DWC3_EP_RESOURCE_ALLOCATED;
+ dep->flags |= DWC3_EP_ENABLED;
/* stall is always issued on EP0 */
dep = dwc->eps[0];
dwc->gadget_driver = driver;
spin_unlock_irqrestore(&dwc->lock, flags);
+ if (dwc->sys_wakeup)
+ device_wakeup_enable(dwc->sysdev);
+
return 0;
}
struct dwc3 *dwc = gadget_to_dwc(g);
unsigned long flags;
+ if (dwc->sys_wakeup)
+ device_wakeup_disable(dwc->sysdev);
+
spin_lock_irqsave(&dwc->lock, flags);
dwc->gadget_driver = NULL;
dwc->max_cfg_eps = 0;
else
dwc3_gadget_set_speed(dwc->gadget, dwc->maximum_speed);
+ /* No system wakeup if no gadget driver bound */
+ if (dwc->sys_wakeup)
+ device_wakeup_disable(dwc->sysdev);
+
return 0;
err5:
goto err;
}
+ if (dwc->sys_wakeup) {
+ /* Restore wakeup setting if switched from device */
+ device_wakeup_enable(dwc->sysdev);
+
+ /* Pass on wakeup setting to the new xhci platform device */
+ device_init_wakeup(&xhci->dev, true);
+ }
+
return 0;
err:
platform_device_put(xhci);
void dwc3_host_exit(struct dwc3 *dwc)
{
+ if (dwc->sys_wakeup)
+ device_init_wakeup(&dwc->xhci->dev, false);
+
platform_device_unregister(dwc->xhci);
dwc->xhci = NULL;
}
#define FUNCTIONFS_MAGIC 0xa647361 /* Chosen by a honest dice roll ;) */
+#define DMABUF_ENQUEUE_TIMEOUT_MS 5000
+
MODULE_IMPORT_NS(DMA_BUF);
/* Reference counter handling */
struct ffs_dmabuf_priv *priv;
struct ffs_dma_fence *fence;
struct usb_request *usb_req;
+ enum dma_resv_usage resv_dir;
struct dma_buf *dmabuf;
+ unsigned long timeout;
struct ffs_ep *ep;
bool cookie;
u32 seqno;
+ long retl;
int ret;
if (req->flags & ~USB_FFS_DMABUF_TRANSFER_MASK)
goto err_attachment_put;
/* Make sure we don't have writers */
- if (!dma_resv_test_signaled(dmabuf->resv, DMA_RESV_USAGE_WRITE)) {
- pr_vdebug("FFS WRITE fence is not signaled\n");
- ret = -EBUSY;
- goto err_resv_unlock;
- }
-
- /* If we're writing to the DMABUF, make sure we don't have readers */
- if (epfile->in &&
- !dma_resv_test_signaled(dmabuf->resv, DMA_RESV_USAGE_READ)) {
- pr_vdebug("FFS READ fence is not signaled\n");
- ret = -EBUSY;
+ timeout = nonblock ? 0 : msecs_to_jiffies(DMABUF_ENQUEUE_TIMEOUT_MS);
+ retl = dma_resv_wait_timeout(dmabuf->resv,
+ dma_resv_usage_rw(epfile->in),
+ true, timeout);
+ if (retl == 0)
+ retl = -EBUSY;
+ if (retl < 0) {
+ ret = (int)retl;
goto err_resv_unlock;
}
dma_fence_init(&fence->base, &ffs_dmabuf_fence_ops,
&priv->lock, priv->context, seqno);
- dma_resv_add_fence(dmabuf->resv, &fence->base,
- dma_resv_usage_rw(epfile->in));
+ resv_dir = epfile->in ? DMA_RESV_USAGE_WRITE : DMA_RESV_USAGE_READ;
+
+ dma_resv_add_fence(dmabuf->resv, &fence->base, resv_dir);
dma_resv_unlock(dmabuf->resv);
/* Now that the dma_fence is in place, queue the transfer. */
if (alt > 1)
goto fail;
- if (ncm->port.in_ep->enabled) {
+ if (ncm->netdev) {
DBG(cdev, "reset ncm\n");
ncm->netdev = NULL;
gether_disconnect(&ncm->port);
DBG(cdev, "ncm deactivated\n");
- if (ncm->port.in_ep->enabled) {
+ if (ncm->netdev) {
ncm->netdev = NULL;
gether_disconnect(&ncm->port);
}
{
int ret = 0;
- if (WARN_ON_ONCE(!ep->enabled && ep->address)) {
+ if (!ep->enabled && ep->address) {
+ pr_debug("USB gadget: queue request to disabled ep 0x%x (%s)\n",
+ ep->address, ep->name);
ret = -ESHUTDOWN;
goto out;
}
{
struct fsl_ep *ep = container_of(_ep, struct fsl_ep, ep);
struct fsl_req *req = container_of(_req, struct fsl_req, req);
- struct fsl_udc *udc;
+ struct fsl_udc *udc = ep->udc;
unsigned long flags;
int ret;
dev_vdbg(&udc->gadget.dev, "%s, bad params\n", __func__);
return -EINVAL;
}
- if (unlikely(!_ep || !ep->ep.desc)) {
+ if (unlikely(!ep->ep.desc)) {
dev_vdbg(&udc->gadget.dev, "%s, bad ep\n", __func__);
return -EINVAL;
}
return -EMSGSIZE;
}
- udc = ep->udc;
if (!udc->driver || udc->gadget.speed == USB_SPEED_UNKNOWN)
return -ESHUTDOWN;
irqreturn_t xhci_irq(struct usb_hcd *hcd)
{
struct xhci_hcd *xhci = hcd_to_xhci(hcd);
- irqreturn_t ret = IRQ_NONE;
+ irqreturn_t ret = IRQ_HANDLED;
u32 status;
spin_lock(&xhci->lock);
status = readl(&xhci->op_regs->status);
if (status == ~(u32)0) {
xhci_hc_died(xhci);
- ret = IRQ_HANDLED;
goto out;
}
- if (!(status & STS_EINT))
+ if (!(status & STS_EINT)) {
+ ret = IRQ_NONE;
goto out;
+ }
if (status & STS_HCE) {
xhci_warn(xhci, "WARNING: Host Controller Error\n");
if (status & STS_FATAL) {
xhci_warn(xhci, "WARNING: Host System Error\n");
xhci_halt(xhci);
- ret = IRQ_HANDLED;
goto out;
}
*/
status |= STS_EINT;
writel(status, &xhci->op_regs->status);
- ret = IRQ_HANDLED;
/* This is the handler of the primary interrupter */
xhci_handle_events(xhci, xhci->interrupters[0]);
__field(void *, vdev)
__field(unsigned long long, out_ctx)
__field(unsigned long long, in_ctx)
- __field(int, hcd_portnum)
- __field(int, hw_portnum)
+ __field(int, slot_id)
__field(u16, current_mel)
),
__entry->vdev = vdev;
__entry->in_ctx = (unsigned long long) vdev->in_ctx->dma;
__entry->out_ctx = (unsigned long long) vdev->out_ctx->dma;
- __entry->hcd_portnum = (int) vdev->rhub_port->hcd_portnum;
- __entry->hw_portnum = (int) vdev->rhub_port->hw_portnum;
+ __entry->slot_id = (int) vdev->slot_id;
__entry->current_mel = (u16) vdev->current_mel;
),
- TP_printk("vdev %p ctx %llx | %llx hcd_portnum %d hw_portnum %d current_mel %d",
- __entry->vdev, __entry->in_ctx, __entry->out_ctx,
- __entry->hcd_portnum, __entry->hw_portnum, __entry->current_mel
+ TP_printk("vdev %p slot %d ctx %llx | %llx current_mel %d",
+ __entry->vdev, __entry->slot_id, __entry->in_ctx,
+ __entry->out_ctx, __entry->current_mel
)
);
err = regulator_bulk_enable(hub->pdata->num_supplies, hub->supplies);
if (err) {
dev_err(hub->dev, "failed to enable supplies: %pe\n", ERR_PTR(err));
- return err;
+ goto disable_clk;
}
fsleep(hub->pdata->reset_us);
hub->is_powered_on = true;
return 0;
+
+disable_clk:
+ clk_disable_unprepare(hub->clk);
+ return err;
}
static int onboard_hub_power_off(struct onboard_hub *hub)
int ret;
client = kzalloc(sizeof *client, GFP_KERNEL);
- if (!client)
+ if (!client) {
+ kfree(data);
return -ENOMEM;
+ }
client->type = type;
client->id = id;
auxdev->dev.release = ljca_auxdev_release;
ret = auxiliary_device_init(auxdev);
- if (ret)
+ if (ret) {
+ kfree(data);
goto err_free;
+ }
ljca_auxdev_acpi_bind(adap, auxdev, adr, id);
valid_pin[i] = get_unaligned_le32(&desc->bank_desc[i].valid_pins);
bitmap_from_arr32(gpio_info->valid_pin_map, valid_pin, gpio_num);
- ret = ljca_new_client_device(adap, LJCA_CLIENT_GPIO, 0, "ljca-gpio",
+ return ljca_new_client_device(adap, LJCA_CLIENT_GPIO, 0, "ljca-gpio",
gpio_info, LJCA_GPIO_ACPI_ADR);
- if (ret)
- kfree(gpio_info);
-
- return ret;
}
static int ljca_enumerate_i2c(struct ljca_adapter *adap)
ret = ljca_new_client_device(adap, LJCA_CLIENT_I2C, i,
"ljca-i2c", i2c_info,
LJCA_I2C1_ACPI_ADR + i);
- if (ret) {
- kfree(i2c_info);
+ if (ret)
return ret;
- }
}
return 0;
ret = ljca_new_client_device(adap, LJCA_CLIENT_SPI, i,
"ljca-spi", spi_info,
LJCA_SPI1_ACPI_ADR + i);
- if (ret) {
- kfree(spi_info);
+ if (ret)
return ret;
- }
}
return 0;
return dev_err_probe(dev, PTR_ERR(nop->vbus_draw),
"could not get vbus regulator\n");
- nop->vbus_draw = devm_regulator_get_exclusive(dev, "vbus");
- if (PTR_ERR(nop->vbus_draw) == -ENODEV)
- nop->vbus_draw = NULL;
- if (IS_ERR(nop->vbus_draw))
- return dev_err_probe(dev, PTR_ERR(nop->vbus_draw),
- "could not get vbus regulator\n");
-
nop->dev = dev;
nop->phy.dev = nop->dev;
nop->phy.label = "nop-xceiv";
#define QUECTEL_PRODUCT_EM061K_LMS 0x0124
#define QUECTEL_PRODUCT_EC25 0x0125
#define QUECTEL_PRODUCT_EM060K_128 0x0128
+#define QUECTEL_PRODUCT_EM060K_129 0x0129
+#define QUECTEL_PRODUCT_EM060K_12a 0x012a
+#define QUECTEL_PRODUCT_EM060K_12b 0x012b
+#define QUECTEL_PRODUCT_EM060K_12c 0x012c
#define QUECTEL_PRODUCT_EG91 0x0191
#define QUECTEL_PRODUCT_EG95 0x0195
#define QUECTEL_PRODUCT_BG96 0x0296
{ USB_DEVICE_AND_INTERFACE_INFO(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_EM060K_128, 0xff, 0xff, 0x30) },
{ USB_DEVICE_AND_INTERFACE_INFO(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_EM060K_128, 0xff, 0x00, 0x40) },
{ USB_DEVICE_AND_INTERFACE_INFO(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_EM060K_128, 0xff, 0xff, 0x40) },
+ { USB_DEVICE_AND_INTERFACE_INFO(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_EM060K_129, 0xff, 0xff, 0x30) },
+ { USB_DEVICE_AND_INTERFACE_INFO(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_EM060K_129, 0xff, 0x00, 0x40) },
+ { USB_DEVICE_AND_INTERFACE_INFO(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_EM060K_129, 0xff, 0xff, 0x40) },
+ { USB_DEVICE_AND_INTERFACE_INFO(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_EM060K_12a, 0xff, 0xff, 0x30) },
+ { USB_DEVICE_AND_INTERFACE_INFO(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_EM060K_12a, 0xff, 0x00, 0x40) },
+ { USB_DEVICE_AND_INTERFACE_INFO(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_EM060K_12a, 0xff, 0xff, 0x40) },
+ { USB_DEVICE_AND_INTERFACE_INFO(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_EM060K_12b, 0xff, 0xff, 0x30) },
+ { USB_DEVICE_AND_INTERFACE_INFO(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_EM060K_12b, 0xff, 0x00, 0x40) },
+ { USB_DEVICE_AND_INTERFACE_INFO(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_EM060K_12b, 0xff, 0xff, 0x40) },
+ { USB_DEVICE_AND_INTERFACE_INFO(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_EM060K_12c, 0xff, 0xff, 0x30) },
+ { USB_DEVICE_AND_INTERFACE_INFO(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_EM060K_12c, 0xff, 0x00, 0x40) },
+ { USB_DEVICE_AND_INTERFACE_INFO(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_EM060K_12c, 0xff, 0xff, 0x40) },
{ USB_DEVICE_AND_INTERFACE_INFO(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_EM061K_LCN, 0xff, 0xff, 0x30) },
{ USB_DEVICE_AND_INTERFACE_INFO(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_EM061K_LCN, 0xff, 0x00, 0x40) },
{ USB_DEVICE_AND_INTERFACE_INFO(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_EM061K_LCN, 0xff, 0xff, 0x40) },
.driver_info = NCTRL(2) | RSVD(3) },
{ USB_DEVICE_INTERFACE_CLASS(TELIT_VENDOR_ID, 0x1083, 0xff), /* Telit FE990 (ECM) */
.driver_info = NCTRL(0) | RSVD(1) },
+ { USB_DEVICE_INTERFACE_CLASS(TELIT_VENDOR_ID, 0x10a0, 0xff), /* Telit FN20C04 (rmnet) */
+ .driver_info = RSVD(0) | NCTRL(3) },
+ { USB_DEVICE_INTERFACE_CLASS(TELIT_VENDOR_ID, 0x10a4, 0xff), /* Telit FN20C04 (rmnet) */
+ .driver_info = RSVD(0) | NCTRL(3) },
+ { USB_DEVICE_INTERFACE_CLASS(TELIT_VENDOR_ID, 0x10a9, 0xff), /* Telit FN20C04 (rmnet) */
+ .driver_info = RSVD(0) | NCTRL(2) | RSVD(3) | RSVD(4) },
{ USB_DEVICE(TELIT_VENDOR_ID, TELIT_PRODUCT_ME910),
.driver_info = NCTRL(0) | RSVD(1) | RSVD(3) },
{ USB_DEVICE(TELIT_VENDOR_ID, TELIT_PRODUCT_ME910_DUAL_MODEM),
.driver_info = RSVD(3) },
{ USB_DEVICE_INTERFACE_CLASS(LONGCHEER_VENDOR_ID, 0x9803, 0xff),
.driver_info = RSVD(4) },
+ { USB_DEVICE(LONGCHEER_VENDOR_ID, 0x9b05), /* Longsung U8300 */
+ .driver_info = RSVD(4) | RSVD(5) },
+ { USB_DEVICE(LONGCHEER_VENDOR_ID, 0x9b3c), /* Longsung U9300 */
+ .driver_info = RSVD(0) | RSVD(4) },
{ USB_DEVICE(LONGCHEER_VENDOR_ID, ZOOM_PRODUCT_4597) },
{ USB_DEVICE(LONGCHEER_VENDOR_ID, IBALL_3_5G_CONNECT) },
{ USB_DEVICE(HAIER_VENDOR_ID, HAIER_PRODUCT_CE100) },
{ USB_DEVICE_AND_INTERFACE_INFO(0x2cb7, 0x010b, 0xff, 0xff, 0x30) }, /* Fibocom FG150 Diag */
{ USB_DEVICE_AND_INTERFACE_INFO(0x2cb7, 0x010b, 0xff, 0, 0) }, /* Fibocom FG150 AT */
{ USB_DEVICE_INTERFACE_CLASS(0x2cb7, 0x0111, 0xff) }, /* Fibocom FM160 (MBIM mode) */
+ { USB_DEVICE_INTERFACE_CLASS(0x2cb7, 0x0115, 0xff), /* Fibocom FM135 (laptop MBIM) */
+ .driver_info = RSVD(5) },
{ USB_DEVICE_INTERFACE_CLASS(0x2cb7, 0x01a0, 0xff) }, /* Fibocom NL668-AM/NL652-EU (laptop MBIM) */
{ USB_DEVICE_INTERFACE_CLASS(0x2cb7, 0x01a2, 0xff) }, /* Fibocom FM101-GL (laptop MBIM) */
{ USB_DEVICE_INTERFACE_CLASS(0x2cb7, 0x01a3, 0xff) }, /* Fibocom FM101-GL (laptop MBIM) */
{ USB_DEVICE_INTERFACE_CLASS(0x2cb7, 0x01a4, 0xff), /* Fibocom FM101-GL (laptop MBIM) */
.driver_info = RSVD(4) },
+ { USB_DEVICE_INTERFACE_CLASS(0x2cb7, 0x0a04, 0xff) }, /* Fibocom FM650-CN (ECM mode) */
+ { USB_DEVICE_INTERFACE_CLASS(0x2cb7, 0x0a05, 0xff) }, /* Fibocom FM650-CN (NCM mode) */
+ { USB_DEVICE_INTERFACE_CLASS(0x2cb7, 0x0a06, 0xff) }, /* Fibocom FM650-CN (RNDIS mode) */
+ { USB_DEVICE_INTERFACE_CLASS(0x2cb7, 0x0a07, 0xff) }, /* Fibocom FM650-CN (MBIM mode) */
{ USB_DEVICE_INTERFACE_CLASS(0x2df3, 0x9d03, 0xff) }, /* LongSung M5710 */
{ USB_DEVICE_INTERFACE_CLASS(0x305a, 0x1404, 0xff) }, /* GosunCn GM500 RNDIS */
{ USB_DEVICE_INTERFACE_CLASS(0x305a, 0x1405, 0xff) }, /* GosunCn GM500 MBIM */
{ USB_DEVICE_INTERFACE_CLASS(0x305a, 0x1406, 0xff) }, /* GosunCn GM500 ECM/NCM */
+ { USB_DEVICE(0x33f8, 0x0104), /* Rolling RW101-GL (laptop RMNET) */
+ .driver_info = RSVD(4) | RSVD(5) },
+ { USB_DEVICE_INTERFACE_CLASS(0x33f8, 0x01a2, 0xff) }, /* Rolling RW101-GL (laptop MBIM) */
+ { USB_DEVICE_INTERFACE_CLASS(0x33f8, 0x01a3, 0xff) }, /* Rolling RW101-GL (laptop MBIM) */
+ { USB_DEVICE_INTERFACE_CLASS(0x33f8, 0x01a4, 0xff), /* Rolling RW101-GL (laptop MBIM) */
+ .driver_info = RSVD(4) },
+ { USB_DEVICE_INTERFACE_CLASS(0x33f8, 0x0115, 0xff), /* Rolling RW135-GL (laptop MBIM) */
+ .driver_info = RSVD(5) },
{ USB_DEVICE_AND_INTERFACE_INFO(OPPO_VENDOR_ID, OPPO_PRODUCT_R11, 0xff, 0xff, 0x30) },
{ USB_DEVICE_AND_INTERFACE_INFO(SIERRA_VENDOR_ID, SIERRA_PRODUCT_EM9191, 0xff, 0xff, 0x30) },
{ USB_DEVICE_AND_INTERFACE_INFO(SIERRA_VENDOR_ID, SIERRA_PRODUCT_EM9191, 0xff, 0xff, 0x40) },
* daft to me.
*/
-static struct urb *uas_submit_sense_urb(struct scsi_cmnd *cmnd, gfp_t gfp)
+static int uas_submit_sense_urb(struct scsi_cmnd *cmnd, gfp_t gfp)
{
struct uas_dev_info *devinfo = cmnd->device->hostdata;
struct urb *urb;
urb = uas_alloc_sense_urb(devinfo, gfp, cmnd);
if (!urb)
- return NULL;
+ return -ENOMEM;
usb_anchor_urb(urb, &devinfo->sense_urbs);
err = usb_submit_urb(urb, gfp);
if (err) {
usb_unanchor_urb(urb);
uas_log_cmd_state(cmnd, "sense submit err", err);
usb_free_urb(urb);
- return NULL;
}
- return urb;
+ return err;
}
static int uas_submit_urbs(struct scsi_cmnd *cmnd,
struct uas_dev_info *devinfo)
{
struct uas_cmd_info *cmdinfo = scsi_cmd_priv(cmnd);
- struct urb *urb;
int err;
lockdep_assert_held(&devinfo->lock);
if (cmdinfo->state & SUBMIT_STATUS_URB) {
- urb = uas_submit_sense_urb(cmnd, GFP_ATOMIC);
- if (!urb)
- return SCSI_MLQUEUE_DEVICE_BUSY;
+ err = uas_submit_sense_urb(cmnd, GFP_ATOMIC);
+ if (err)
+ return err;
cmdinfo->state &= ~SUBMIT_STATUS_URB;
}
cmdinfo->data_in_urb = uas_alloc_data_urb(devinfo, GFP_ATOMIC,
cmnd, DMA_FROM_DEVICE);
if (!cmdinfo->data_in_urb)
- return SCSI_MLQUEUE_DEVICE_BUSY;
+ return -ENOMEM;
cmdinfo->state &= ~ALLOC_DATA_IN_URB;
}
if (err) {
usb_unanchor_urb(cmdinfo->data_in_urb);
uas_log_cmd_state(cmnd, "data in submit err", err);
- return SCSI_MLQUEUE_DEVICE_BUSY;
+ return err;
}
cmdinfo->state &= ~SUBMIT_DATA_IN_URB;
cmdinfo->state |= DATA_IN_URB_INFLIGHT;
cmdinfo->data_out_urb = uas_alloc_data_urb(devinfo, GFP_ATOMIC,
cmnd, DMA_TO_DEVICE);
if (!cmdinfo->data_out_urb)
- return SCSI_MLQUEUE_DEVICE_BUSY;
+ return -ENOMEM;
cmdinfo->state &= ~ALLOC_DATA_OUT_URB;
}
if (err) {
usb_unanchor_urb(cmdinfo->data_out_urb);
uas_log_cmd_state(cmnd, "data out submit err", err);
- return SCSI_MLQUEUE_DEVICE_BUSY;
+ return err;
}
cmdinfo->state &= ~SUBMIT_DATA_OUT_URB;
cmdinfo->state |= DATA_OUT_URB_INFLIGHT;
if (cmdinfo->state & ALLOC_CMD_URB) {
cmdinfo->cmd_urb = uas_alloc_cmd_urb(devinfo, GFP_ATOMIC, cmnd);
if (!cmdinfo->cmd_urb)
- return SCSI_MLQUEUE_DEVICE_BUSY;
+ return -ENOMEM;
cmdinfo->state &= ~ALLOC_CMD_URB;
}
if (err) {
usb_unanchor_urb(cmdinfo->cmd_urb);
uas_log_cmd_state(cmnd, "cmd submit err", err);
- return SCSI_MLQUEUE_DEVICE_BUSY;
+ return err;
}
cmdinfo->cmd_urb = NULL;
cmdinfo->state &= ~SUBMIT_CMD_URB;
* of queueing, no matter how fatal the error
*/
if (err == -ENODEV) {
- set_host_byte(cmnd, DID_ERROR);
+ set_host_byte(cmnd, DID_NO_CONNECT);
scsi_done(cmnd);
goto zombie;
}
{
struct typec_port *port = to_typec_port(dev);
struct usb_power_delivery *pd;
+ int ret;
if (!port->ops || !port->ops->pd_set)
return -EOPNOTSUPP;
if (!pd)
return -EINVAL;
- return port->ops->pd_set(port, pd);
+ ret = port->ops->pd_set(port, pd);
+ if (ret)
+ return ret;
+
+ return size;
}
static ssize_t select_usb_power_delivery_show(struct device *dev,
#include <linux/usb/typec_mux.h>
#define IT5205_REG_CHIP_ID(x) (0x4 + (x))
-#define IT5205FN_CHIP_ID 0x35323035 /* "5205" */
+#define IT5205FN_CHIP_ID 0x35303235 /* "5025" -> "5205" */
/* MUX power down register */
#define IT5205_REG_MUXPDR 0x10
if (data->sink_desc.pdo[0]) {
for (i = 0; i < PDO_MAX_OBJECTS && data->sink_desc.pdo[i]; i++)
port->snk_pdo[i] = data->sink_desc.pdo[i];
- port->nr_snk_pdo = i + 1;
+ port->nr_snk_pdo = i;
port->operating_snk_mw = data->operating_snk_mw;
}
if (data->source_desc.pdo[0]) {
for (i = 0; i < PDO_MAX_OBJECTS && data->source_desc.pdo[i]; i++)
- port->snk_pdo[i] = data->source_desc.pdo[i];
- port->nr_src_pdo = i + 1;
+ port->src_pdo[i] = data->source_desc.pdo[i];
+ port->nr_src_pdo = i;
}
switch (port->state) {
port->port_source_caps = data->source_cap;
port->port_sink_caps = data->sink_cap;
+ typec_port_set_usb_power_delivery(p, NULL);
port->selected_pd = pd;
+ typec_port_set_usb_power_delivery(p, port->selected_pd);
unlock:
mutex_unlock(&port->lock);
return ret;
port->port_source_caps = NULL;
for (i = 0; i < port->pd_count; i++) {
usb_power_delivery_unregister_capabilities(port->pd_list[i]->sink_cap);
- kfree(port->pd_list[i]->sink_cap);
usb_power_delivery_unregister_capabilities(port->pd_list[i]->source_cap);
- kfree(port->pd_list[i]->source_cap);
devm_kfree(port->dev, port->pd_list[i]);
port->pd_list[i] = NULL;
usb_power_delivery_unregister(port->pds[i]);
if (!(cci & UCSI_CCI_COMMAND_COMPLETE))
return -EIO;
- if (cci & UCSI_CCI_NOT_SUPPORTED)
+ if (cci & UCSI_CCI_NOT_SUPPORTED) {
+ if (ucsi_acknowledge_command(ucsi) < 0)
+ dev_err(ucsi->dev,
+ "ACK of unsupported command failed\n");
return -EOPNOTSUPP;
+ }
if (cci & UCSI_CCI_ERROR) {
if (cmd == UCSI_GET_ERROR_STATUS)
if (ret < 0)
return ret;
- ret = ucsi_get_cable_identity(con);
- if (ret < 0)
- return ret;
+ if (con->ucsi->cap.features & UCSI_CAP_GET_PD_MESSAGE) {
+ ret = ucsi_get_cable_identity(con);
+ if (ret < 0)
+ return ret;
+ }
- ret = ucsi_register_plug(con);
- if (ret < 0)
- return ret;
+ if (con->ucsi->cap.features & UCSI_CAP_ALT_MODE_DETAILS) {
+ ret = ucsi_register_plug(con);
+ if (ret < 0)
+ return ret;
- ret = ucsi_register_altmodes(con, UCSI_RECIPIENT_SOP_P);
- if (ret < 0)
- return ret;
+ ret = ucsi_register_altmodes(con, UCSI_RECIPIENT_SOP_P);
+ if (ret < 0)
+ return ret;
+ }
return 0;
}
ucsi_register_partner(con);
ucsi_partner_task(con, ucsi_check_connection, 1, HZ);
ucsi_partner_task(con, ucsi_check_connector_capability, 1, HZ);
- ucsi_partner_task(con, ucsi_get_partner_identity, 1, HZ);
- ucsi_partner_task(con, ucsi_check_cable, 1, HZ);
+ if (con->ucsi->cap.features & UCSI_CAP_GET_PD_MESSAGE)
+ ucsi_partner_task(con, ucsi_get_partner_identity, 1, HZ);
+ if (con->ucsi->cap.features & UCSI_CAP_CABLE_DETAILS)
+ ucsi_partner_task(con, ucsi_check_cable, 1, HZ);
if (UCSI_CONSTAT_PWR_OPMODE(con->status.flags) ==
UCSI_CONSTAT_PWR_OPMODE_PD)
if (con->status.change & UCSI_CONSTAT_CAM_CHANGE)
ucsi_partner_task(con, ucsi_check_altmodes, 1, 0);
- clear_bit(EVENT_PENDING, &con->ucsi->flags);
-
mutex_lock(&ucsi->ppm_lock);
+ clear_bit(EVENT_PENDING, &con->ucsi->flags);
ret = ucsi_acknowledge_connector_change(ucsi);
mutex_unlock(&ucsi->ppm_lock);
+
if (ret)
dev_err(ucsi->dev, "%s: ACK failed (%d)", __func__, ret);
struct ucsi_connector *con = &ucsi->connector[num - 1];
if (!(ucsi->ntfy & UCSI_ENABLE_NTFY_CONNECTOR_CHANGE)) {
- dev_dbg(ucsi->dev, "Bogus connector change event\n");
+ dev_dbg(ucsi->dev, "Early connector change event\n");
return;
}
static int ucsi_reset_ppm(struct ucsi *ucsi)
{
- u64 command = UCSI_PPM_RESET;
+ u64 command;
unsigned long tmo;
u32 cci;
int ret;
mutex_lock(&ucsi->ppm_lock);
+ ret = ucsi->ops->read(ucsi, UCSI_CCI, &cci, sizeof(cci));
+ if (ret < 0)
+ goto out;
+
+ /*
+ * If UCSI_CCI_RESET_COMPLETE is already set we must clear
+ * the flag before we start another reset. Send a
+ * UCSI_SET_NOTIFICATION_ENABLE command to achieve this.
+ * Ignore a timeout and try the reset anyway if this fails.
+ */
+ if (cci & UCSI_CCI_RESET_COMPLETE) {
+ command = UCSI_SET_NOTIFICATION_ENABLE;
+ ret = ucsi->ops->async_write(ucsi, UCSI_CONTROL, &command,
+ sizeof(command));
+ if (ret < 0)
+ goto out;
+
+ tmo = jiffies + msecs_to_jiffies(UCSI_TIMEOUT_MS);
+ do {
+ ret = ucsi->ops->read(ucsi, UCSI_CCI,
+ &cci, sizeof(cci));
+ if (ret < 0)
+ goto out;
+ if (cci & UCSI_CCI_COMMAND_COMPLETE)
+ break;
+ if (time_is_before_jiffies(tmo))
+ break;
+ msleep(20);
+ } while (1);
+
+ WARN_ON(cci & UCSI_CCI_RESET_COMPLETE);
+ }
+
+ command = UCSI_PPM_RESET;
ret = ucsi->ops->async_write(ucsi, UCSI_CONTROL, &command,
sizeof(command));
if (ret < 0)
ucsi_register_partner(con);
ucsi_pwr_opmode_change(con);
ucsi_port_psy_changed(con);
- ucsi_get_partner_identity(con);
- ucsi_check_cable(con);
+ if (con->ucsi->cap.features & UCSI_CAP_GET_PD_MESSAGE)
+ ucsi_get_partner_identity(con);
+ if (con->ucsi->cap.features & UCSI_CAP_CABLE_DETAILS)
+ ucsi_check_cable(con);
}
/* Only notify USB controller if partner supports USB data */
{
struct ucsi_connector *con, *connector;
u64 command, ntfy;
+ u32 cci;
int ret;
int i;
ucsi->connector = connector;
ucsi->ntfy = ntfy;
+
+ mutex_lock(&ucsi->ppm_lock);
+ ret = ucsi->ops->read(ucsi, UCSI_CCI, &cci, sizeof(cci));
+ mutex_unlock(&ucsi->ppm_lock);
+ if (ret)
+ return ret;
+ if (UCSI_CCI_CONNECTOR(cci))
+ ucsi_connector_change(ucsi, UCSI_CCI_CONNECTOR(cci));
+
return 0;
err_unregister:
#define UCSI_CAP_ATTR_POWER_OTHER BIT(10)
#define UCSI_CAP_ATTR_POWER_VBUS BIT(14)
u8 num_connectors;
- u8 features;
+ u16 features;
#define UCSI_CAP_SET_UOM BIT(0)
#define UCSI_CAP_SET_PDM BIT(1)
#define UCSI_CAP_ALT_MODE_DETAILS BIT(2)
#define UCSI_CAP_CABLE_DETAILS BIT(5)
#define UCSI_CAP_EXT_SUPPLY_NOTIFICATIONS BIT(6)
#define UCSI_CAP_PD_RESET BIT(7)
- u16 reserved_1;
+#define UCSI_CAP_GET_PD_MESSAGE BIT(8)
+ u8 reserved_1;
u8 num_alt_modes;
u8 reserved_2;
u16 bc_version;
void *base;
struct completion complete;
unsigned long flags;
+#define UCSI_ACPI_SUPPRESS_EVENT 0
+#define UCSI_ACPI_COMMAND_PENDING 1
+#define UCSI_ACPI_ACK_PENDING 2
guid_t guid;
u64 cmd;
- bool dell_quirk_probed;
- bool dell_quirk_active;
};
static int ucsi_acpi_dsm(struct ucsi_acpi *ua, int func)
int ret;
if (ack)
- set_bit(ACK_PENDING, &ua->flags);
+ set_bit(UCSI_ACPI_ACK_PENDING, &ua->flags);
else
- set_bit(COMMAND_PENDING, &ua->flags);
+ set_bit(UCSI_ACPI_COMMAND_PENDING, &ua->flags);
ret = ucsi_acpi_async_write(ucsi, offset, val, val_len);
if (ret)
out_clear_bit:
if (ack)
- clear_bit(ACK_PENDING, &ua->flags);
+ clear_bit(UCSI_ACPI_ACK_PENDING, &ua->flags);
else
- clear_bit(COMMAND_PENDING, &ua->flags);
+ clear_bit(UCSI_ACPI_COMMAND_PENDING, &ua->flags);
return ret;
}
};
/*
- * Some Dell laptops expect that an ACK command with the
- * UCSI_ACK_CONNECTOR_CHANGE bit set is followed by a (separate)
- * ACK command that only has the UCSI_ACK_COMMAND_COMPLETE bit set.
- * If this is not done events are not delivered to OSPM and
- * subsequent commands will timeout.
+ * Some Dell laptops don't like ACK commands with the
+ * UCSI_ACK_CONNECTOR_CHANGE but not the UCSI_ACK_COMMAND_COMPLETE
+ * bit set. To work around this send a dummy command and bundle the
+ * UCSI_ACK_CONNECTOR_CHANGE with the UCSI_ACK_COMMAND_COMPLETE
+ * for the dummy command.
*/
static int
ucsi_dell_sync_write(struct ucsi *ucsi, unsigned int offset,
const void *val, size_t val_len)
{
struct ucsi_acpi *ua = ucsi_get_drvdata(ucsi);
- u64 cmd = *(u64 *)val, ack = 0;
+ u64 cmd = *(u64 *)val;
+ u64 dummycmd = UCSI_GET_CAPABILITY;
int ret;
- if (UCSI_COMMAND(cmd) == UCSI_ACK_CC_CI &&
- cmd & UCSI_ACK_CONNECTOR_CHANGE)
- ack = UCSI_ACK_CC_CI | UCSI_ACK_COMMAND_COMPLETE;
-
- ret = ucsi_acpi_sync_write(ucsi, offset, val, val_len);
- if (ret != 0)
- return ret;
- if (ack == 0)
- return ret;
-
- if (!ua->dell_quirk_probed) {
- ua->dell_quirk_probed = true;
-
- cmd = UCSI_GET_CAPABILITY;
- ret = ucsi_acpi_sync_write(ucsi, UCSI_CONTROL, &cmd,
- sizeof(cmd));
- if (ret == 0)
- return ucsi_acpi_sync_write(ucsi, UCSI_CONTROL,
- &ack, sizeof(ack));
- if (ret != -ETIMEDOUT)
+ if (cmd == (UCSI_ACK_CC_CI | UCSI_ACK_CONNECTOR_CHANGE)) {
+ cmd |= UCSI_ACK_COMMAND_COMPLETE;
+
+ /*
+ * The UCSI core thinks it is sending a connector change ack
+ * and will accept new connector change events. We don't want
+ * this to happen for the dummy command as its response will
+ * still report the very event that the core is trying to clear.
+ */
+ set_bit(UCSI_ACPI_SUPPRESS_EVENT, &ua->flags);
+ ret = ucsi_acpi_sync_write(ucsi, UCSI_CONTROL, &dummycmd,
+ sizeof(dummycmd));
+ clear_bit(UCSI_ACPI_SUPPRESS_EVENT, &ua->flags);
+
+ if (ret < 0)
return ret;
-
- ua->dell_quirk_active = true;
- dev_err(ua->dev, "Firmware bug: Additional ACK required after ACKing a connector change.\n");
- dev_err(ua->dev, "Firmware bug: Enabling workaround\n");
}
- if (!ua->dell_quirk_active)
- return ret;
-
- return ucsi_acpi_sync_write(ucsi, UCSI_CONTROL, &ack, sizeof(ack));
+ return ucsi_acpi_sync_write(ucsi, UCSI_CONTROL, &cmd, sizeof(cmd));
}
static const struct ucsi_operations ucsi_dell_ops = {
if (ret)
return;
- if (UCSI_CCI_CONNECTOR(cci))
+ if (UCSI_CCI_CONNECTOR(cci) &&
+ !test_bit(UCSI_ACPI_SUPPRESS_EVENT, &ua->flags))
ucsi_connector_change(ua->ucsi, UCSI_CCI_CONNECTOR(cci));
if (cci & UCSI_CCI_ACK_COMPLETE && test_bit(ACK_PENDING, &ua->flags))
complete(&ua->complete);
if (cci & UCSI_CCI_COMMAND_COMPLETE &&
- test_bit(COMMAND_PENDING, &ua->flags))
+ test_bit(UCSI_ACPI_COMMAND_PENDING, &ua->flags))
complete(&ua->complete);
}
static void pmic_glink_ucsi_register(struct work_struct *work)
{
struct pmic_glink_ucsi *ucsi = container_of(work, struct pmic_glink_ucsi, register_work);
+ int orientation;
+ int i;
+
+ for (i = 0; i < PMIC_GLINK_MAX_PORTS; i++) {
+ if (!ucsi->port_orientation[i])
+ continue;
+ orientation = gpiod_get_value(ucsi->port_orientation[i]);
+
+ if (orientation >= 0) {
+ typec_switch_set(ucsi->port_switch[i],
+ orientation ? TYPEC_ORIENTATION_REVERSE
+ : TYPEC_ORIENTATION_NORMAL);
+ }
+ }
ucsi_register(ucsi->ucsi);
}
val_u32 = __virtio32_to_cpu(true, config->size_max);
- return nla_put_u32(msg, VDPA_ATTR_DEV_BLK_CFG_SEG_SIZE, val_u32);
+ return nla_put_u32(msg, VDPA_ATTR_DEV_BLK_CFG_SIZE_MAX, val_u32);
}
/* fill the block size*/
u8 ro;
ro = ((features & BIT_ULL(VIRTIO_BLK_F_RO)) == 0) ? 0 : 1;
- if (nla_put_u8(msg, VDPA_ATTR_DEV_BLK_CFG_READ_ONLY, ro))
+ if (nla_put_u8(msg, VDPA_ATTR_DEV_BLK_READ_ONLY, ro))
return -EMSGSIZE;
return 0;
u8 flush;
flush = ((features & BIT_ULL(VIRTIO_BLK_F_FLUSH)) == 0) ? 0 : 1;
- if (nla_put_u8(msg, VDPA_ATTR_DEV_BLK_CFG_FLUSH, flush))
+ if (nla_put_u8(msg, VDPA_ATTR_DEV_BLK_FLUSH, flush))
return -EMSGSIZE;
return 0;
vq->avail_idx = vhost16_to_cpu(vq, avail_idx);
if (unlikely((u16)(vq->avail_idx - last_avail_idx) > vq->num)) {
- vq_err(vq, "Guest moved used index from %u to %u",
+ vq_err(vq, "Guest moved avail index from %u to %u",
last_avail_idx, vq->avail_idx);
return -EFAULT;
}
r = vhost_get_avail_idx(vq, &avail_idx);
if (unlikely(r))
return false;
+
vq->avail_idx = vhost16_to_cpu(vq, avail_idx);
+ if (vq->avail_idx != vq->last_avail_idx) {
+ /* Since we have updated avail_idx, the following
+ * call to vhost_get_vq_desc() will read available
+ * ring entries. Make sure that read happens after
+ * the avail_idx read.
+ */
+ smp_rmb();
+ return false;
+ }
- return vq->avail_idx == vq->last_avail_idx;
+ return true;
}
EXPORT_SYMBOL_GPL(vhost_vq_avail_empty);
&vq->avail->idx, r);
return false;
}
+
vq->avail_idx = vhost16_to_cpu(vq, avail_idx);
+ if (vq->avail_idx != vq->last_avail_idx) {
+ /* Since we have updated avail_idx, the following
+ * call to vhost_get_vq_desc() will read available
+ * ring entries. Make sure that read happens after
+ * the avail_idx read.
+ */
+ smp_rmb();
+ return true;
+ }
- return vq->avail_idx != vq->last_avail_idx;
+ return false;
}
EXPORT_SYMBOL_GPL(vhost_enable_notify);
select FB_CFB_COPYAREA
select FB_CFB_FILLRECT
select FB_CFB_IMAGEBLIT
+ select FB_IOMEM_FOPS
config FB_BW2
bool "BWtwo support"
depends on (FB = y) && (SPARC && FB_SBUS)
select FB_CFB_COPYAREA
select FB_CFB_IMAGEBLIT
+ select FB_IOMEM_FOPS
help
This is the frame buffer device driver for the CGsix (GX, TurboGX)
frame buffer.
depends on FB_SBUS && SPARC64
select FB_CFB_COPYAREA
select FB_CFB_IMAGEBLIT
+ select FB_IOMEM_FOPS
help
This is the frame buffer device driver for the Creator, Creator3D,
and Elite3D graphics boards.
*/
static vm_fault_t fb_deferred_io_page_mkwrite(struct fb_info *info, struct vm_fault *vmf)
{
- unsigned long offset = vmf->address - vmf->vma->vm_start;
+ unsigned long offset = vmf->pgoff << PAGE_SHIFT;
struct page *page = vmf->page;
file_update_time(vmf->vma->vm_file);
static void vmgenid_notify(struct acpi_device *device, u32 event)
{
struct vmgenid_state *state = acpi_driver_data(device);
- char *envp[] = { "NEW_VMGENID=1", NULL };
u8 old_id[VMGENID_SIZE];
memcpy(old_id, state->this_id, sizeof(old_id));
if (!memcmp(old_id, state->this_id, sizeof(old_id)))
return;
add_vmfork_randomness(state->this_id, sizeof(state->this_id));
- kobject_uevent_env(&device->dev.kobj, KOBJ_CHANGE, envp);
}
static const struct acpi_device_id vmgenid_ids[] = {
.remove = virtio_dev_remove,
};
-int register_virtio_driver(struct virtio_driver *driver)
+int __register_virtio_driver(struct virtio_driver *driver, struct module *owner)
{
/* Catch this early. */
BUG_ON(driver->feature_table_size && !driver->feature_table);
driver->driver.bus = &virtio_bus;
+ driver->driver.owner = owner;
+
return driver_register(&driver->driver);
}
-EXPORT_SYMBOL_GPL(register_virtio_driver);
+EXPORT_SYMBOL_GPL(__register_virtio_driver);
void unregister_virtio_driver(struct virtio_driver *driver)
{
static inline void v9fs_fid_add_modes(struct p9_fid *fid, unsigned int s_flags,
unsigned int s_cache, unsigned int f_flags)
{
- if (fid->qid.type != P9_QTFILE)
- return;
-
if ((!s_cache) ||
((fid->qid.version == 0) && !(s_flags & V9FS_IGNORE_QV)) ||
(s_flags & V9FS_DIRECT_IO) || (f_flags & O_DIRECT)) {
struct inode *old_dir, struct dentry *old_dentry,
struct inode *new_dir, struct dentry *new_dentry,
unsigned int flags);
-extern struct inode *v9fs_fid_iget(struct super_block *sb, struct p9_fid *fid);
+extern struct inode *v9fs_fid_iget(struct super_block *sb, struct p9_fid *fid,
+ bool new);
extern const struct inode_operations v9fs_dir_inode_operations_dotl;
extern const struct inode_operations v9fs_file_inode_operations_dotl;
extern const struct inode_operations v9fs_symlink_inode_operations_dotl;
extern const struct netfs_request_ops v9fs_req_ops;
extern struct inode *v9fs_fid_iget_dotl(struct super_block *sb,
- struct p9_fid *fid);
+ struct p9_fid *fid, bool new);
/* other default globals */
#define V9FS_PORT 564
*/
static inline struct inode *
v9fs_get_inode_from_fid(struct v9fs_session_info *v9ses, struct p9_fid *fid,
- struct super_block *sb)
+ struct super_block *sb, bool new)
{
if (v9fs_proto_dotl(v9ses))
- return v9fs_fid_iget_dotl(sb, fid);
+ return v9fs_fid_iget_dotl(sb, fid, new);
else
- return v9fs_fid_iget(sb, fid);
+ return v9fs_fid_iget(sb, fid, new);
}
#endif
.splice_read = v9fs_file_splice_read,
.splice_write = iter_file_splice_write,
.fsync = v9fs_file_fsync,
+ .setlease = simple_nosetlease,
};
const struct file_operations v9fs_file_operations_dotl = {
.splice_read = v9fs_file_splice_read,
.splice_write = iter_file_splice_write,
.fsync = v9fs_file_fsync_dotl,
+ .setlease = simple_nosetlease,
};
int res;
int mode = stat->mode;
- res = mode & S_IALLUGO;
+ res = mode & 0777; /* S_IRWXUGO */
if (v9fs_proto_dotu(v9ses)) {
if ((mode & P9_DMSETUID) == P9_DMSETUID)
res |= S_ISUID;
break;
}
+ if (uflags & O_TRUNC)
+ ret |= P9_OTRUNC;
+
if (extended) {
if (uflags & O_EXCL)
ret |= P9_OEXCL;
struct v9fs_inode __maybe_unused *v9inode = V9FS_I(inode);
__le32 __maybe_unused version;
- truncate_inode_pages_final(&inode->i_data);
+ if (!is_bad_inode(inode)) {
+ truncate_inode_pages_final(&inode->i_data);
- version = cpu_to_le32(v9inode->qid.version);
- netfs_clear_inode_writeback(inode, &version);
+ version = cpu_to_le32(v9inode->qid.version);
+ netfs_clear_inode_writeback(inode, &version);
- clear_inode(inode);
- filemap_fdatawrite(&inode->i_data);
+ clear_inode(inode);
+ filemap_fdatawrite(&inode->i_data);
#ifdef CONFIG_9P_FSCACHE
- fscache_relinquish_cookie(v9fs_inode_cookie(v9inode), false);
+ if (v9fs_inode_cookie(v9inode))
+ fscache_relinquish_cookie(v9fs_inode_cookie(v9inode), false);
#endif
+ } else
+ clear_inode(inode);
}
-struct inode *v9fs_fid_iget(struct super_block *sb, struct p9_fid *fid)
+struct inode *
+v9fs_fid_iget(struct super_block *sb, struct p9_fid *fid, bool new)
{
dev_t rdev;
int retval;
inode = iget_locked(sb, QID2INO(&fid->qid));
if (unlikely(!inode))
return ERR_PTR(-ENOMEM);
- if (!(inode->i_state & I_NEW))
- return inode;
+ if (!(inode->i_state & I_NEW)) {
+ if (!new) {
+ goto done;
+ } else {
+ p9_debug(P9_DEBUG_VFS, "WARNING: Inode collision %ld\n",
+ inode->i_ino);
+ iput(inode);
+ remove_inode_hash(inode);
+ inode = iget_locked(sb, QID2INO(&fid->qid));
+ WARN_ON(!(inode->i_state & I_NEW));
+ }
+ }
/*
* initialize the inode with the stat info
v9fs_set_netfs_context(inode);
v9fs_cache_inode_get_cookie(inode);
unlock_new_inode(inode);
+done:
return inode;
error:
iget_failed(inode);
return ERR_PTR(retval);
-
}
/**
*/
static void v9fs_dec_count(struct inode *inode)
{
- if (!S_ISDIR(inode->i_mode) || inode->i_nlink > 2)
- drop_nlink(inode);
+ if (!S_ISDIR(inode->i_mode) || inode->i_nlink > 2) {
+ if (inode->i_nlink) {
+ drop_nlink(inode);
+ } else {
+ p9_debug(P9_DEBUG_VFS,
+ "WARNING: unexpected i_nlink zero %d inode %ld\n",
+ inode->i_nlink, inode->i_ino);
+ }
+ }
}
/**
} else
v9fs_dec_count(inode);
+ if (inode->i_nlink <= 0) /* no more refs unhash it */
+ remove_inode_hash(inode);
+
v9fs_invalidate_inode_attr(inode);
v9fs_invalidate_inode_attr(dir);
/*
* instantiate inode and assign the unopened fid to the dentry
*/
- inode = v9fs_get_inode_from_fid(v9ses, fid, dir->i_sb);
+ inode = v9fs_get_inode_from_fid(v9ses, fid, dir->i_sb, true);
if (IS_ERR(inode)) {
err = PTR_ERR(inode);
p9_debug(P9_DEBUG_VFS,
else if (IS_ERR(fid))
inode = ERR_CAST(fid);
else
- inode = v9fs_get_inode_from_fid(v9ses, fid, dir->i_sb);
+ inode = v9fs_get_inode_from_fid(v9ses, fid, dir->i_sb, false);
/*
* If we had a rename on the server and a parallel lookup
* for the new name, then make sure we instantiate with
struct v9fs_session_info *v9ses = sb->s_fs_info;
struct v9fs_inode *v9inode = V9FS_I(inode);
- set_nlink(inode, 1);
-
inode_set_atime(inode, stat->atime, 0);
inode_set_mtime(inode, stat->mtime, 0);
inode_set_ctime(inode, stat->mtime, 0);
return current_fsgid();
}
-struct inode *v9fs_fid_iget_dotl(struct super_block *sb, struct p9_fid *fid)
+
+
+struct inode *
+v9fs_fid_iget_dotl(struct super_block *sb, struct p9_fid *fid, bool new)
{
int retval;
struct inode *inode;
inode = iget_locked(sb, QID2INO(&fid->qid));
if (unlikely(!inode))
return ERR_PTR(-ENOMEM);
- if (!(inode->i_state & I_NEW))
- return inode;
+ if (!(inode->i_state & I_NEW)) {
+ if (!new) {
+ goto done;
+ } else { /* deal with race condition in inode number reuse */
+ p9_debug(P9_DEBUG_ERROR, "WARNING: Inode collision %lx\n",
+ inode->i_ino);
+ iput(inode);
+ remove_inode_hash(inode);
+ inode = iget_locked(sb, QID2INO(&fid->qid));
+ WARN_ON(!(inode->i_state & I_NEW));
+ }
+ }
/*
* initialize the inode with the stat info
retval = v9fs_init_inode(v9ses, inode, &fid->qid,
st->st_mode, new_decode_dev(st->st_rdev));
+ v9fs_stat2inode_dotl(st, inode, 0);
kfree(st);
if (retval)
goto error;
- v9fs_stat2inode_dotl(st, inode, 0);
v9fs_set_netfs_context(inode);
v9fs_cache_inode_get_cookie(inode);
retval = v9fs_get_acl(inode, fid);
goto error;
unlock_new_inode(inode);
-
+done:
return inode;
error:
iget_failed(inode);
return ERR_PTR(retval);
-
}
struct dotl_openflag_map {
p9_debug(P9_DEBUG_VFS, "p9_client_walk failed %d\n", err);
goto out;
}
- inode = v9fs_fid_iget_dotl(dir->i_sb, fid);
+ inode = v9fs_fid_iget_dotl(dir->i_sb, fid, true);
if (IS_ERR(inode)) {
err = PTR_ERR(inode);
p9_debug(P9_DEBUG_VFS, "inode creation failed %d\n", err);
umode_t omode)
{
int err;
- struct v9fs_session_info *v9ses;
struct p9_fid *fid = NULL, *dfid = NULL;
kgid_t gid;
const unsigned char *name;
struct posix_acl *dacl = NULL, *pacl = NULL;
p9_debug(P9_DEBUG_VFS, "name %pd\n", dentry);
- v9ses = v9fs_inode2v9ses(dir);
omode |= S_IFDIR;
if (dir->i_mode & S_ISGID)
}
/* instantiate inode and assign the unopened fid to the dentry */
- inode = v9fs_fid_iget_dotl(dir->i_sb, fid);
+ inode = v9fs_fid_iget_dotl(dir->i_sb, fid, true);
if (IS_ERR(inode)) {
err = PTR_ERR(inode);
p9_debug(P9_DEBUG_VFS, "inode creation failed %d\n",
kgid_t gid;
const unsigned char *name;
umode_t mode;
- struct v9fs_session_info *v9ses;
struct p9_fid *fid = NULL, *dfid = NULL;
struct inode *inode;
struct p9_qid qid;
dir->i_ino, dentry, omode,
MAJOR(rdev), MINOR(rdev));
- v9ses = v9fs_inode2v9ses(dir);
dfid = v9fs_parent_fid(dentry);
if (IS_ERR(dfid)) {
err = PTR_ERR(dfid);
err);
goto error;
}
- inode = v9fs_fid_iget_dotl(dir->i_sb, fid);
+ inode = v9fs_fid_iget_dotl(dir->i_sb, fid, true);
if (IS_ERR(inode)) {
err = PTR_ERR(inode);
p9_debug(P9_DEBUG_VFS, "inode creation failed %d\n",
else
sb->s_d_op = &v9fs_dentry_operations;
- inode = v9fs_get_inode_from_fid(v9ses, fid, sb);
+ inode = v9fs_get_inode_from_fid(v9ses, fid, sb, true);
if (IS_ERR(inode)) {
retval = PTR_ERR(inode);
goto release_sb;
return res;
}
+static int v9fs_drop_inode(struct inode *inode)
+{
+ struct v9fs_session_info *v9ses;
+
+ v9ses = v9fs_inode2v9ses(inode);
+ if (v9ses->cache & (CACHE_META|CACHE_LOOSE))
+ return generic_drop_inode(inode);
+ /*
+ * in case of non cached mode always drop the
+ * inode because we want the inode attribute
+ * to always match that on the server.
+ */
+ return 1;
+}
+
static int v9fs_write_inode(struct inode *inode,
struct writeback_control *wbc)
{
.alloc_inode = v9fs_alloc_inode,
.free_inode = v9fs_free_inode,
.statfs = simple_statfs,
+ .drop_inode = v9fs_drop_inode,
.evict_inode = v9fs_evict_inode,
.show_options = v9fs_show_options,
.umount_begin = v9fs_umount_begin,
.alloc_inode = v9fs_alloc_inode,
.free_inode = v9fs_free_inode,
.statfs = v9fs_statfs,
+ .drop_inode = v9fs_drop_inode,
.evict_inode = v9fs_evict_inode,
.show_options = v9fs_show_options,
.umount_begin = v9fs_umount_begin,
spin_lock_irqsave(&ctx->wait.lock, flags);
list_for_each_entry_safe(curr, next, &ctx->wait.head, w.entry)
if (avail >= curr->min_nr) {
- list_del_init_careful(&curr->w.entry);
wake_up_process(curr->w.private);
+ list_del_init_careful(&curr->w.entry);
}
spin_unlock_irqrestore(&ctx->wait.lock, flags);
}
btree_journal_iter.o \
btree_key_cache.o \
btree_locking.o \
+ btree_node_scan.o \
btree_trans_commit.o \
btree_update.o \
btree_update_interior.o \
error.o \
extents.o \
extent_update.o \
+ eytzinger.o \
fs.o \
fs-common.o \
fs-ioctl.o \
quota.o \
rebalance.o \
recovery.o \
+ recovery_passes.o \
reflink.o \
replicas.o \
sb-clean.o \
struct xattr_search_key search = X_SEARCH(acl_to_xattr_type(type), "", 0);
struct btree_trans *trans = bch2_trans_get(c);
struct btree_iter iter = { NULL };
- struct bkey_s_c_xattr xattr;
struct posix_acl *acl = NULL;
struct bkey_s_c k;
int ret;
ret = bch2_hash_lookup(trans, &iter, bch2_xattr_hash_desc,
&hash, inode_inum(inode), &search, 0);
- if (ret) {
- if (!bch2_err_matches(ret, ENOENT))
- acl = ERR_PTR(ret);
- goto out;
- }
+ if (ret)
+ goto err;
k = bch2_btree_iter_peek_slot(&iter);
ret = bkey_err(k);
- if (ret) {
- acl = ERR_PTR(ret);
- goto out;
- }
+ if (ret)
+ goto err;
- xattr = bkey_s_c_to_xattr(k);
+ struct bkey_s_c_xattr xattr = bkey_s_c_to_xattr(k);
acl = bch2_acl_from_disk(trans, xattr_val(xattr.v),
- le16_to_cpu(xattr.v->x_val_len));
+ le16_to_cpu(xattr.v->x_val_len));
+ ret = PTR_ERR_OR_ZERO(acl);
+err:
+ if (bch2_err_matches(ret, BCH_ERR_transaction_restart))
+ goto retry;
- if (!IS_ERR(acl))
+ if (ret)
+ acl = !bch2_err_matches(ret, ENOENT) ? ERR_PTR(ret) : NULL;
+
+ if (!IS_ERR_OR_NULL(acl))
set_cached_acl(&inode->v, type, acl);
-out:
- if (bch2_err_matches(PTR_ERR_OR_ZERO(acl), BCH_ERR_transaction_restart))
- goto retry;
bch2_trans_iter_exit(trans, &iter);
bch2_trans_put(trans);
if (ret)
goto out;
- if (BCH_ALLOC_V4_NEED_INC_GEN(&a->v)) {
- a->v.gen++;
- SET_BCH_ALLOC_V4_NEED_INC_GEN(&a->v, false);
- goto write;
- }
-
- if (a->v.journal_seq > c->journal.flushed_seq_ondisk) {
- if (c->curr_recovery_pass > BCH_RECOVERY_PASS_check_alloc_info) {
- bch2_trans_inconsistent(trans,
- "clearing need_discard but journal_seq %llu > flushed_seq %llu\n"
- "%s",
- a->v.journal_seq,
- c->journal.flushed_seq_ondisk,
- (bch2_bkey_val_to_text(&buf, c, k), buf.buf));
+ if (a->v.dirty_sectors) {
+ if (bch2_trans_inconsistent_on(c->curr_recovery_pass > BCH_RECOVERY_PASS_check_alloc_info,
+ trans, "attempting to discard bucket with dirty data\n%s",
+ (bch2_bkey_val_to_text(&buf, c, k), buf.buf)))
ret = -EIO;
- }
goto out;
}
if (a->v.data_type != BCH_DATA_need_discard) {
- if (c->curr_recovery_pass > BCH_RECOVERY_PASS_check_alloc_info) {
- bch2_trans_inconsistent(trans,
- "bucket incorrectly set in need_discard btree\n"
- "%s",
- (bch2_bkey_val_to_text(&buf, c, k), buf.buf));
- ret = -EIO;
+ if (data_type_is_empty(a->v.data_type) &&
+ BCH_ALLOC_V4_NEED_INC_GEN(&a->v)) {
+ a->v.gen++;
+ SET_BCH_ALLOC_V4_NEED_INC_GEN(&a->v, false);
+ goto write;
}
+ if (bch2_trans_inconsistent_on(c->curr_recovery_pass > BCH_RECOVERY_PASS_check_alloc_info,
+ trans, "bucket incorrectly set in need_discard btree\n"
+ "%s",
+ (bch2_bkey_val_to_text(&buf, c, k), buf.buf)))
+ ret = -EIO;
+ goto out;
+ }
+
+ if (a->v.journal_seq > c->journal.flushed_seq_ondisk) {
+ if (bch2_trans_inconsistent_on(c->curr_recovery_pass > BCH_RECOVERY_PASS_check_alloc_info,
+ trans, "clearing need_discard but journal_seq %llu > flushed_seq %llu\n%s",
+ a->v.journal_seq,
+ c->journal.flushed_seq_ondisk,
+ (bch2_bkey_val_to_text(&buf, c, k), buf.buf)))
+ ret = -EIO;
goto out;
}
if (ret)
goto err;
+ BUG_ON(a->v.dirty_sectors);
SET_BCH_ALLOC_V4_NEED_DISCARD(&a->v, false);
a->v.data_type = alloc_data_type(a->v, a->v.data_type);
goto out;
BUG_ON(a->v.data_type != BCH_DATA_cached);
+ BUG_ON(a->v.dirty_sectors);
if (!a->v.cached_sectors)
bch_err(c, "invalidating empty bucket, confused");
static inline unsigned open_buckets_reserved(enum bch_watermark watermark)
{
switch (watermark) {
- case BCH_WATERMARK_reclaim:
+ case BCH_WATERMARK_interior_updates:
return 0;
+ case BCH_WATERMARK_reclaim:
+ return OPEN_BUCKETS_COUNT / 6;
case BCH_WATERMARK_btree:
case BCH_WATERMARK_btree_copygc:
return OPEN_BUCKETS_COUNT / 4;
x(copygc) \
x(btree) \
x(btree_copygc) \
- x(reclaim)
+ x(reclaim) \
+ x(interior_updates)
enum bch_watermark {
#define x(name) BCH_WATERMARK_##name,
#include "btree_update.h"
#include "btree_update_interior.h"
#include "btree_write_buffer.h"
+#include "checksum.h"
#include "error.h"
#include <linux/mm.h>
if (p.ptr.cached)
continue;
- bch2_extent_ptr_to_bp(c, btree_id, level, k, p,
- &bucket2, &bp2);
+ bch2_extent_ptr_to_bp(c, btree_id, level, k, p, entry, &bucket2, &bp2);
if (bpos_eq(bucket, bucket2) &&
!memcmp(&bp, &bp2, sizeof(bp)))
return true;
struct printbuf *err)
{
struct bkey_s_c_backpointer bp = bkey_s_c_to_backpointer(k);
+
+ /* these will be caught by fsck */
+ if (!bch2_dev_exists2(c, bp.k->p.inode))
+ return 0;
+
+ struct bch_dev *ca = bch_dev_bkey_exists(c, bp.k->p.inode);
struct bpos bucket = bp_pos_to_bucket(c, bp.k->p);
int ret = 0;
- bkey_fsck_err_on(!bpos_eq(bp.k->p, bucket_pos_to_bp(c, bucket, bp.v->bucket_offset)),
+ bkey_fsck_err_on((bp.v->bucket_offset >> MAX_EXTENT_COMPRESS_RATIO_SHIFT) >= ca->mi.bucket_size ||
+ !bpos_eq(bp.k->p, bucket_pos_to_bp(c, bucket, bp.v->bucket_offset)),
c, err,
- backpointer_pos_wrong,
- "backpointer at wrong pos");
+ backpointer_bucket_offset_wrong,
+ "backpointer bucket_offset wrong");
fsck_err:
return ret;
}
backpointer_to_missing_alloc,
"backpointer for nonexistent alloc key: %llu:%llu:0\n%s",
alloc_iter.pos.inode, alloc_iter.pos.offset,
- (bch2_bkey_val_to_text(&buf, c, alloc_k), buf.buf))) {
+ (bch2_bkey_val_to_text(&buf, c, k), buf.buf))) {
ret = bch2_btree_delete_at(trans, bp_iter, 0);
goto out;
}
struct bkey_buf last_flushed;
};
+static int drop_dev_and_update(struct btree_trans *trans, enum btree_id btree,
+ struct bkey_s_c extent, unsigned dev)
+{
+ struct bkey_i *n = bch2_bkey_make_mut_noupdate(trans, extent);
+ int ret = PTR_ERR_OR_ZERO(n);
+ if (ret)
+ return ret;
+
+ bch2_bkey_drop_device(bkey_i_to_s(n), dev);
+ return bch2_btree_insert_trans(trans, btree, n, 0);
+}
+
+static int check_extent_checksum(struct btree_trans *trans,
+ enum btree_id btree, struct bkey_s_c extent,
+ enum btree_id o_btree, struct bkey_s_c extent2, unsigned dev)
+{
+ struct bch_fs *c = trans->c;
+ struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(extent);
+ const union bch_extent_entry *entry;
+ struct extent_ptr_decoded p;
+ struct printbuf buf = PRINTBUF;
+ void *data_buf = NULL;
+ struct bio *bio = NULL;
+ size_t bytes;
+ int ret = 0;
+
+ if (bkey_is_btree_ptr(extent.k))
+ return false;
+
+ bkey_for_each_ptr_decode(extent.k, ptrs, p, entry)
+ if (p.ptr.dev == dev)
+ goto found;
+ BUG();
+found:
+ if (!p.crc.csum_type)
+ return false;
+
+ bytes = p.crc.compressed_size << 9;
+
+ struct bch_dev *ca = bch_dev_bkey_exists(c, dev);
+ if (!bch2_dev_get_ioref(ca, READ))
+ return false;
+
+ data_buf = kvmalloc(bytes, GFP_KERNEL);
+ if (!data_buf) {
+ ret = -ENOMEM;
+ goto err;
+ }
+
+ bio = bio_alloc(ca->disk_sb.bdev, buf_pages(data_buf, bytes), REQ_OP_READ, GFP_KERNEL);
+ bio->bi_iter.bi_sector = p.ptr.offset;
+ bch2_bio_map(bio, data_buf, bytes);
+ ret = submit_bio_wait(bio);
+ if (ret)
+ goto err;
+
+ prt_str(&buf, "extents pointing to same space, but first extent checksum bad:");
+ prt_printf(&buf, "\n %s ", bch2_btree_id_str(btree));
+ bch2_bkey_val_to_text(&buf, c, extent);
+ prt_printf(&buf, "\n %s ", bch2_btree_id_str(o_btree));
+ bch2_bkey_val_to_text(&buf, c, extent2);
+
+ struct nonce nonce = extent_nonce(extent.k->version, p.crc);
+ struct bch_csum csum = bch2_checksum(c, p.crc.csum_type, nonce, data_buf, bytes);
+ if (fsck_err_on(bch2_crc_cmp(csum, p.crc.csum),
+ c, dup_backpointer_to_bad_csum_extent,
+ "%s", buf.buf))
+ ret = drop_dev_and_update(trans, btree, extent, dev) ?: 1;
+fsck_err:
+err:
+ if (bio)
+ bio_put(bio);
+ kvfree(data_buf);
+ percpu_ref_put(&ca->io_ref);
+ printbuf_exit(&buf);
+ return ret;
+}
+
static int check_bp_exists(struct btree_trans *trans,
struct extents_to_bp_state *s,
struct bpos bucket,
struct bkey_s_c orig_k)
{
struct bch_fs *c = trans->c;
- struct btree_iter bp_iter = { NULL };
+ struct btree_iter bp_iter = {};
+ struct btree_iter other_extent_iter = {};
struct printbuf buf = PRINTBUF;
struct bkey_s_c bp_k;
struct bkey_buf tmp;
bch2_bkey_buf_init(&tmp);
+ if (!bch2_dev_bucket_exists(c, bucket)) {
+ prt_str(&buf, "extent for nonexistent device:bucket ");
+ bch2_bpos_to_text(&buf, bucket);
+ prt_str(&buf, "\n ");
+ bch2_bkey_val_to_text(&buf, c, orig_k);
+ bch_err(c, "%s", buf.buf);
+ return -BCH_ERR_fsck_repair_unimplemented;
+ }
+
if (bpos_lt(bucket, s->bucket_start) ||
bpos_gt(bucket, s->bucket_end))
return 0;
- if (!bch2_dev_bucket_exists(c, bucket))
- goto missing;
-
bp_k = bch2_bkey_get_iter(trans, &bp_iter, BTREE_ID_backpointers,
bucket_pos_to_bp(c, bucket, bp.bucket_offset),
0);
ret = -BCH_ERR_transaction_restart_write_buffer_flush;
goto out;
}
- goto missing;
+
+ goto check_existing_bp;
}
out:
err:
fsck_err:
+ bch2_trans_iter_exit(trans, &other_extent_iter);
bch2_trans_iter_exit(trans, &bp_iter);
bch2_bkey_buf_exit(&tmp, c);
printbuf_exit(&buf);
return ret;
+check_existing_bp:
+ /* Do we have a backpointer for a different extent? */
+ if (bp_k.k->type != KEY_TYPE_backpointer)
+ goto missing;
+
+ struct bch_backpointer other_bp = *bkey_s_c_to_backpointer(bp_k).v;
+
+ struct bkey_s_c other_extent =
+ bch2_backpointer_get_key(trans, &other_extent_iter, bp_k.k->p, other_bp, 0);
+ ret = bkey_err(other_extent);
+ if (ret == -BCH_ERR_backpointer_to_overwritten_btree_node)
+ ret = 0;
+ if (ret)
+ goto err;
+
+ if (!other_extent.k)
+ goto missing;
+
+ if (bch2_extents_match(orig_k, other_extent)) {
+ printbuf_reset(&buf);
+ prt_printf(&buf, "duplicate versions of same extent, deleting smaller\n ");
+ bch2_bkey_val_to_text(&buf, c, orig_k);
+ prt_str(&buf, "\n ");
+ bch2_bkey_val_to_text(&buf, c, other_extent);
+ bch_err(c, "%s", buf.buf);
+
+ if (other_extent.k->size <= orig_k.k->size) {
+ ret = drop_dev_and_update(trans, other_bp.btree_id, other_extent, bucket.inode);
+ if (ret)
+ goto err;
+ goto out;
+ } else {
+ ret = drop_dev_and_update(trans, bp.btree_id, orig_k, bucket.inode);
+ if (ret)
+ goto err;
+ goto missing;
+ }
+ }
+
+ ret = check_extent_checksum(trans, other_bp.btree_id, other_extent, bp.btree_id, orig_k, bucket.inode);
+ if (ret < 0)
+ goto err;
+ if (ret) {
+ ret = 0;
+ goto missing;
+ }
+
+ ret = check_extent_checksum(trans, bp.btree_id, orig_k, other_bp.btree_id, other_extent, bucket.inode);
+ if (ret < 0)
+ goto err;
+ if (ret) {
+ ret = 0;
+ goto out;
+ }
+
+ printbuf_reset(&buf);
+ prt_printf(&buf, "duplicate extents pointing to same space on dev %llu\n ", bucket.inode);
+ bch2_bkey_val_to_text(&buf, c, orig_k);
+ prt_str(&buf, "\n ");
+ bch2_bkey_val_to_text(&buf, c, other_extent);
+ bch_err(c, "%s", buf.buf);
+ ret = -BCH_ERR_fsck_repair_unimplemented;
+ goto err;
missing:
+ printbuf_reset(&buf);
prt_printf(&buf, "missing backpointer for btree=%s l=%u ",
bch2_btree_id_str(bp.btree_id), bp.level);
bch2_bkey_val_to_text(&buf, c, orig_k);
- prt_printf(&buf, "\nbp pos ");
- bch2_bpos_to_text(&buf, bp_iter.pos);
+ prt_printf(&buf, "\n got: ");
+ bch2_bkey_val_to_text(&buf, c, bp_k);
+
+ struct bkey_i_backpointer n_bp_k;
+ bkey_backpointer_init(&n_bp_k.k_i);
+ n_bp_k.k.p = bucket_pos_to_bp(trans->c, bucket, bp.bucket_offset);
+ n_bp_k.v = bp;
+ prt_printf(&buf, "\n want: ");
+ bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(&n_bp_k.k_i));
if (fsck_err(c, ptr_to_missing_backpointer, "%s", buf.buf))
ret = bch2_bucket_backpointer_mod(trans, bucket, bp, orig_k, true);
if (p.ptr.cached)
continue;
- bch2_extent_ptr_to_bp(c, btree, level,
- k, p, &bucket_pos, &bp);
+ bch2_extent_ptr_to_bp(c, btree, level, k, p, entry, &bucket_pos, &bp);
ret = check_bp_exists(trans, s, bucket_pos, bp, k);
if (ret)
u64 bucket_offset)
{
struct bch_dev *ca = bch_dev_bkey_exists(c, bucket.inode);
- struct bpos ret;
-
- ret = POS(bucket.inode,
- (bucket_to_sector(ca, bucket.offset) <<
- MAX_EXTENT_COMPRESS_RATIO_SHIFT) + bucket_offset);
+ struct bpos ret = POS(bucket.inode,
+ (bucket_to_sector(ca, bucket.offset) <<
+ MAX_EXTENT_COMPRESS_RATIO_SHIFT) + bucket_offset);
EBUG_ON(!bkey_eq(bucket, bp_pos_to_bucket(c, ret)));
-
return ret;
}
return bch2_trans_update_buffered(trans, BTREE_ID_backpointers, &bp_k.k_i);
}
-static inline enum bch_data_type bkey_ptr_data_type(enum btree_id btree_id, unsigned level,
- struct bkey_s_c k, struct extent_ptr_decoded p)
+static inline enum bch_data_type bch2_bkey_ptr_data_type(struct bkey_s_c k,
+ struct extent_ptr_decoded p,
+ const union bch_extent_entry *entry)
{
- return level ? BCH_DATA_btree :
- p.has_ec ? BCH_DATA_stripe :
- BCH_DATA_user;
+ switch (k.k->type) {
+ case KEY_TYPE_btree_ptr:
+ case KEY_TYPE_btree_ptr_v2:
+ return BCH_DATA_btree;
+ case KEY_TYPE_extent:
+ case KEY_TYPE_reflink_v:
+ return p.has_ec ? BCH_DATA_stripe : BCH_DATA_user;
+ case KEY_TYPE_stripe: {
+ const struct bch_extent_ptr *ptr = &entry->ptr;
+ struct bkey_s_c_stripe s = bkey_s_c_to_stripe(k);
+
+ BUG_ON(ptr < s.v->ptrs ||
+ ptr >= s.v->ptrs + s.v->nr_blocks);
+
+ return ptr >= s.v->ptrs + s.v->nr_blocks - s.v->nr_redundant
+ ? BCH_DATA_parity
+ : BCH_DATA_user;
+ }
+ default:
+ BUG();
+ }
}
static inline void bch2_extent_ptr_to_bp(struct bch_fs *c,
enum btree_id btree_id, unsigned level,
struct bkey_s_c k, struct extent_ptr_decoded p,
+ const union bch_extent_entry *entry,
struct bpos *bucket_pos, struct bch_backpointer *bp)
{
- enum bch_data_type data_type = bkey_ptr_data_type(btree_id, level, k, p);
+ enum bch_data_type data_type = bch2_bkey_ptr_data_type(k, p, entry);
s64 sectors = level ? btree_sectors(c) : k.k->size;
u32 bucket_offset;
#include "fifo.h"
#include "nocow_locking_types.h"
#include "opts.h"
-#include "recovery_types.h"
+#include "recovery_passes_types.h"
#include "sb-errors_types.h"
#include "seqmutex.h"
#include "time_stats.h"
#include "alloc_types.h"
#include "btree_types.h"
+#include "btree_node_scan_types.h"
#include "btree_write_buffer_types.h"
#include "buckets_types.h"
#include "buckets_waiting_for_journal_types.h"
*/
#define BCH_FS_FLAGS() \
+ x(new_fs) \
x(started) \
x(may_go_rw) \
x(rw) \
x(stripe_delete) \
x(reflink) \
x(fallocate) \
+ x(fsync) \
+ x(dio_write) \
x(discard) \
x(discard_fast) \
x(invalidate) \
u64 features;
u64 compat;
unsigned long errors_silent[BITS_TO_LONGS(BCH_SB_ERR_MAX)];
+ u64 btrees_lost_data;
} sb;
/* snapshot.c: */
struct snapshot_table __rcu *snapshots;
- size_t snapshot_table_size;
struct mutex snapshot_table_lock;
struct rw_semaphore snapshot_create_lock;
struct journal_keys journal_keys;
struct list_head journal_iters;
+ struct find_btree_nodes found_btree_nodes;
+
u64 last_bucket_seq_cleanup;
u64 counters_on_mount[BCH_COUNTER_NR];
__le64 nbuckets; /* device size */
__le16 first_bucket; /* index of first bucket used */
__le16 bucket_size; /* sectors */
- __le32 pad;
+ __u8 btree_bitmap_shift;
+ __u8 pad[3];
__le64 last_mount; /* time_t */
__le64 flags;
__le64 errors_at_reset[BCH_MEMBER_ERROR_NR];
__le64 errors_reset_time;
__le64 seq;
+ __le64 btree_allocated_bitmap;
};
#define BCH_MEMBER_V1_BYTES 56
struct bch_sb_field field;
__le64 recovery_passes_required[2];
__le64 errors_silent[8];
+ __le64 btrees_lost_data;
};
struct bch_sb_field_downgrade_entry {
x(rebalance_work, BCH_VERSION(1, 3)) \
x(member_seq, BCH_VERSION(1, 4)) \
x(subvolume_fs_parent, BCH_VERSION(1, 5)) \
- x(btree_subvolume_children, BCH_VERSION(1, 6))
+ x(btree_subvolume_children, BCH_VERSION(1, 6)) \
+ x(mi_btree_bitmap, BCH_VERSION(1, 7))
enum bcachefs_metadata_version {
bcachefs_metadata_version_min = 9,
x(write_buffer_keys, 11) \
x(datetime, 12)
-enum {
+enum bch_jset_entry_type {
#define x(f, nr) BCH_JSET_ENTRY_##f = nr,
BCH_JSET_ENTRY_TYPES()
#undef x
x(inodes, 1) \
x(key_version, 2)
-enum {
+enum bch_fs_usage_type {
#define x(f, nr) BCH_FS_USAGE_##f = nr,
BCH_FS_USAGE_TYPES()
#undef x
BIT_ULL(KEY_TYPE_stripe)) \
x(reflink, 7, BTREE_ID_EXTENTS|BTREE_ID_DATA, \
BIT_ULL(KEY_TYPE_reflink_v)| \
- BIT_ULL(KEY_TYPE_indirect_inline_data)) \
+ BIT_ULL(KEY_TYPE_indirect_inline_data)| \
+ BIT_ULL(KEY_TYPE_error)) \
x(subvolumes, 8, 0, \
BIT_ULL(KEY_TYPE_subvolume)) \
x(snapshots, 9, 0, \
BTREE_ID_NR
};
+static inline bool btree_id_is_alloc(enum btree_id id)
+{
+ switch (id) {
+ case BTREE_ID_alloc:
+ case BTREE_ID_backpointers:
+ case BTREE_ID_need_discard:
+ case BTREE_ID_freespace:
+ case BTREE_ID_bucket_gens:
+ return true;
+ default:
+ return false;
+ }
+}
+
#define BTREE_MAX_DEPTH 4U
/* Btree nodes */
return bkey_packed(k) ? format->key_u64s : BKEY_U64s;
}
+static inline bool bkeyp_u64s_valid(const struct bkey_format *f,
+ const struct bkey_packed *k)
+{
+ return ((unsigned) k->u64s - bkeyp_key_u64s(f, k) <= U8_MAX - BKEY_U64s);
+}
+
static inline unsigned bkeyp_key_bytes(const struct bkey_format *format,
const struct bkey_packed *k)
{
if (type >= BKEY_TYPE_NR)
return 0;
- bkey_fsck_err_on((flags & BKEY_INVALID_COMMIT) &&
+ bkey_fsck_err_on((type == BKEY_TYPE_btree ||
+ (flags & BKEY_INVALID_COMMIT)) &&
!(bch2_key_types_allowed[type] & BIT_ULL(k.k->type)), c, err,
bkey_invalid_type_for_btree,
"invalid key type for btree %s (%s)",
- bch2_btree_node_type_str(type), bch2_bkey_types[k.k->type]);
+ bch2_btree_node_type_str(type),
+ k.k->type < KEY_TYPE_MAX
+ ? bch2_bkey_types[k.k->type]
+ : "(unknown)");
if (btree_node_type_is_extents(type) && !bkey_whiteout(k.k)) {
bkey_fsck_err_on(k.k->size == 0, c, err,
printbuf_exit(&buf);
}
-#ifdef CONFIG_BCACHEFS_DEBUG
-
-void __bch2_verify_btree_nr_keys(struct btree *b)
+struct btree_nr_keys bch2_btree_node_count_keys(struct btree *b)
{
struct bset_tree *t;
struct bkey_packed *k;
- struct btree_nr_keys nr = { 0 };
+ struct btree_nr_keys nr = {};
for_each_bset(b, t)
bset_tree_for_each_key(b, t, k)
if (!bkey_deleted(k))
btree_keys_account_key_add(&nr, t - b->set, k);
+ return nr;
+}
+
+#ifdef CONFIG_BCACHEFS_DEBUG
+
+void __bch2_verify_btree_nr_keys(struct btree *b)
+{
+ struct btree_nr_keys nr = bch2_btree_node_count_keys(b);
BUG_ON(memcmp(&nr, &b->nr, sizeof(nr)));
}
/* Accounting: */
+struct btree_nr_keys bch2_btree_node_count_keys(struct btree *);
+
static inline void btree_keys_account_key(struct btree_nr_keys *n,
unsigned bset,
struct bkey_packed *k,
struct bch_fs *c = trans->c;
struct btree_cache *bc = &c->btree_cache;
struct btree *b;
- u32 seq;
- BUG_ON(level + 1 >= BTREE_MAX_DEPTH);
+ if (unlikely(level >= BTREE_MAX_DEPTH)) {
+ int ret = bch2_fs_topology_error(c, "attempting to get btree node at level %u, >= max depth %u",
+ level, BTREE_MAX_DEPTH);
+ return ERR_PTR(ret);
+ }
+
+ if (unlikely(!bkey_is_btree_ptr(&k->k))) {
+ struct printbuf buf = PRINTBUF;
+ bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(k));
+
+ int ret = bch2_fs_topology_error(c, "attempting to get btree node with non-btree key %s", buf.buf);
+ printbuf_exit(&buf);
+ return ERR_PTR(ret);
+ }
+
+ if (unlikely(k->k.u64s > BKEY_BTREE_PTR_U64s_MAX)) {
+ struct printbuf buf = PRINTBUF;
+ bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(k));
+
+ int ret = bch2_fs_topology_error(c, "attempting to get btree node with too big key %s", buf.buf);
+ printbuf_exit(&buf);
+ return ERR_PTR(ret);
+ }
+
/*
* Parent node must be locked, else we could read in a btree node that's
* been freed:
}
set_btree_node_read_in_flight(b);
-
six_unlock_write(&b->c.lock);
- seq = six_lock_seq(&b->c.lock);
- six_unlock_intent(&b->c.lock);
- /* Unlock before doing IO: */
- if (path && sync)
- bch2_trans_unlock_noassert(trans);
-
- bch2_btree_node_read(trans, b, sync);
+ if (path) {
+ u32 seq = six_lock_seq(&b->c.lock);
- if (!sync)
- return NULL;
+ /* Unlock before doing IO: */
+ six_unlock_intent(&b->c.lock);
+ bch2_trans_unlock_noassert(trans);
- if (path) {
- int ret = bch2_trans_relock(trans) ?:
- bch2_btree_path_relock_intent(trans, path);
- if (ret) {
- BUG_ON(!trans->restarted);
- return ERR_PTR(ret);
- }
- }
+ bch2_btree_node_read(trans, b, sync);
- if (!six_relock_type(&b->c.lock, lock_type, seq)) {
- BUG_ON(!path);
+ if (!sync)
+ return NULL;
- trace_and_count(c, trans_restart_relock_after_fill, trans, _THIS_IP_, path);
- return ERR_PTR(btree_trans_restart(trans, BCH_ERR_transaction_restart_relock_after_fill));
+ if (!six_relock_type(&b->c.lock, lock_type, seq))
+ b = NULL;
+ } else {
+ bch2_btree_node_read(trans, b, sync);
+ if (lock_type == SIX_LOCK_read)
+ six_lock_downgrade(&b->c.lock);
}
return b;
prt_printf(&buf, "\nmax ");
bch2_bpos_to_text(&buf, b->data->max_key);
- bch2_fs_inconsistent(c, "%s", buf.buf);
+ bch2_fs_topology_error(c, "%s", buf.buf);
+
printbuf_exit(&buf);
}
{
struct bch_fs *c = trans->c;
struct btree_cache *bc = &c->btree_cache;
- struct btree *b;
BUG_ON(path && !btree_node_locked(path, level + 1));
BUG_ON(level >= BTREE_MAX_DEPTH);
- b = btree_cache_find(bc, k);
+ struct btree *b = btree_cache_find(bc, k);
if (b)
return 0;
b = bch2_btree_node_fill(trans, path, k, btree_id,
level, SIX_LOCK_read, false);
- return PTR_ERR_OR_ZERO(b);
+ if (!IS_ERR_OR_NULL(b))
+ six_unlock_read(&b->c.lock);
+ return bch2_trans_relock(trans) ?: PTR_ERR_OR_ZERO(b);
}
void bch2_btree_node_evict(struct btree_trans *trans, const struct bkey_i *k)
b = btree_cache_find(bc, k);
if (!b)
return;
+
+ BUG_ON(b == btree_node_root(trans->c, b));
wait_on_io:
/* not allowed to wait on io with btree locks held: */
btree_node_lock_nopath_nofail(trans, &b->c, SIX_LOCK_intent);
btree_node_lock_nopath_nofail(trans, &b->c, SIX_LOCK_write);
+ if (unlikely(b->hash_val != btree_ptr_hash_val(k)))
+ goto out;
if (btree_node_dirty(b)) {
__bch2_btree_node_write(c, b, BTREE_WRITE_cache_reclaim);
btree_node_data_free(c, b);
bch2_btree_node_hash_remove(bc, b);
mutex_unlock(&bc->lock);
-
+out:
six_unlock_write(&b->c.lock);
six_unlock_intent(&b->c.lock);
}
#include "bcachefs.h"
#include "alloc_background.h"
#include "alloc_foreground.h"
+#include "backpointers.h"
#include "bkey_methods.h"
#include "bkey_buf.h"
#include "btree_journal_iter.h"
#include "btree_key_cache.h"
#include "btree_locking.h"
+#include "btree_node_scan.h"
#include "btree_update_interior.h"
#include "btree_io.h"
#include "btree_gc.h"
#include "journal.h"
#include "keylist.h"
#include "move.h"
-#include "recovery.h"
+#include "recovery_passes.h"
#include "reflink.h"
#include "replicas.h"
#include "super-io.h"
#define DROP_THIS_NODE 10
#define DROP_PREV_NODE 11
+#define DID_FILL_FROM_SCAN 12
static struct bkey_s unsafe_bkey_s_c_to_s(struct bkey_s_c k)
{
__gc_pos_set(c, new_pos);
}
-/*
- * Missing: if an interior btree node is empty, we need to do something -
- * perhaps just kill it
- */
-static int bch2_gc_check_topology(struct bch_fs *c,
- struct btree *b,
- struct bkey_buf *prev,
- struct bkey_buf cur,
- bool is_last)
-{
- struct bpos node_start = b->data->min_key;
- struct bpos node_end = b->data->max_key;
- struct bpos expected_start = bkey_deleted(&prev->k->k)
- ? node_start
- : bpos_successor(prev->k->k.p);
- struct printbuf buf1 = PRINTBUF, buf2 = PRINTBUF;
- int ret = 0;
-
- if (cur.k->k.type == KEY_TYPE_btree_ptr_v2) {
- struct bkey_i_btree_ptr_v2 *bp = bkey_i_to_btree_ptr_v2(cur.k);
-
- if (!bpos_eq(expected_start, bp->v.min_key)) {
- bch2_topology_error(c);
-
- if (bkey_deleted(&prev->k->k)) {
- prt_printf(&buf1, "start of node: ");
- bch2_bpos_to_text(&buf1, node_start);
- } else {
- bch2_bkey_val_to_text(&buf1, c, bkey_i_to_s_c(prev->k));
- }
- bch2_bkey_val_to_text(&buf2, c, bkey_i_to_s_c(cur.k));
-
- if (__fsck_err(c,
- FSCK_CAN_FIX|
- FSCK_CAN_IGNORE|
- FSCK_NO_RATELIMIT,
- btree_node_topology_bad_min_key,
- "btree node with incorrect min_key at btree %s level %u:\n"
- " prev %s\n"
- " cur %s",
- bch2_btree_id_str(b->c.btree_id), b->c.level,
- buf1.buf, buf2.buf) && should_restart_for_topology_repair(c)) {
- bch_info(c, "Halting mark and sweep to start topology repair pass");
- ret = bch2_run_explicit_recovery_pass(c, BCH_RECOVERY_PASS_check_topology);
- goto err;
- } else {
- set_bit(BCH_FS_initial_gc_unfixed, &c->flags);
- }
- }
- }
-
- if (is_last && !bpos_eq(cur.k->k.p, node_end)) {
- bch2_topology_error(c);
-
- printbuf_reset(&buf1);
- printbuf_reset(&buf2);
-
- bch2_bkey_val_to_text(&buf1, c, bkey_i_to_s_c(cur.k));
- bch2_bpos_to_text(&buf2, node_end);
-
- if (__fsck_err(c, FSCK_CAN_FIX|FSCK_CAN_IGNORE|FSCK_NO_RATELIMIT,
- btree_node_topology_bad_max_key,
- "btree node with incorrect max_key at btree %s level %u:\n"
- " %s\n"
- " expected %s",
- bch2_btree_id_str(b->c.btree_id), b->c.level,
- buf1.buf, buf2.buf) &&
- should_restart_for_topology_repair(c)) {
- bch_info(c, "Halting mark and sweep to start topology repair pass");
- ret = bch2_run_explicit_recovery_pass(c, BCH_RECOVERY_PASS_check_topology);
- goto err;
- } else {
- set_bit(BCH_FS_initial_gc_unfixed, &c->flags);
- }
- }
-
- bch2_bkey_buf_copy(prev, c, cur.k);
-err:
-fsck_err:
- printbuf_exit(&buf2);
- printbuf_exit(&buf1);
- return ret;
-}
-
static void btree_ptr_to_v2(struct btree *b, struct bkey_i_btree_ptr_v2 *dst)
{
switch (b->key.k.type) {
struct bkey_i_btree_ptr_v2 *new;
int ret;
+ if (c->opts.verbose) {
+ struct printbuf buf = PRINTBUF;
+
+ bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(&b->key));
+ prt_str(&buf, " -> ");
+ bch2_bpos_to_text(&buf, new_min);
+
+ bch_info(c, "%s(): %s", __func__, buf.buf);
+ printbuf_exit(&buf);
+ }
+
new = kmalloc_array(BKEY_BTREE_PTR_U64s_MAX, sizeof(u64), GFP_KERNEL);
if (!new)
return -BCH_ERR_ENOMEM_gc_repair_key;
struct bkey_i_btree_ptr_v2 *new;
int ret;
+ if (c->opts.verbose) {
+ struct printbuf buf = PRINTBUF;
+
+ bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(&b->key));
+ prt_str(&buf, " -> ");
+ bch2_bpos_to_text(&buf, new_max);
+
+ bch_info(c, "%s(): %s", __func__, buf.buf);
+ printbuf_exit(&buf);
+ }
+
ret = bch2_journal_key_delete(c, b->c.btree_id, b->c.level + 1, b->key.k.p);
if (ret)
return ret;
return 0;
}
-static int btree_repair_node_boundaries(struct bch_fs *c, struct btree *b,
- struct btree *prev, struct btree *cur)
+static int btree_check_node_boundaries(struct bch_fs *c, struct btree *b,
+ struct btree *prev, struct btree *cur,
+ struct bpos *pulled_from_scan)
{
struct bpos expected_start = !prev
? b->data->min_key
: bpos_successor(prev->key.k.p);
- struct printbuf buf1 = PRINTBUF, buf2 = PRINTBUF;
+ struct printbuf buf = PRINTBUF;
int ret = 0;
- if (!prev) {
- prt_printf(&buf1, "start of node: ");
- bch2_bpos_to_text(&buf1, b->data->min_key);
- } else {
- bch2_bkey_val_to_text(&buf1, c, bkey_i_to_s_c(&prev->key));
+ BUG_ON(b->key.k.type == KEY_TYPE_btree_ptr_v2 &&
+ !bpos_eq(bkey_i_to_btree_ptr_v2(&b->key)->v.min_key,
+ b->data->min_key));
+
+ if (bpos_eq(expected_start, cur->data->min_key))
+ return 0;
+
+ prt_printf(&buf, " at btree %s level %u:\n parent: ",
+ bch2_btree_id_str(b->c.btree_id), b->c.level);
+ bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(&b->key));
+
+ if (prev) {
+ prt_printf(&buf, "\n prev: ");
+ bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(&prev->key));
}
- bch2_bkey_val_to_text(&buf2, c, bkey_i_to_s_c(&cur->key));
-
- if (prev &&
- bpos_gt(expected_start, cur->data->min_key) &&
- BTREE_NODE_SEQ(cur->data) > BTREE_NODE_SEQ(prev->data)) {
- /* cur overwrites prev: */
-
- if (mustfix_fsck_err_on(bpos_ge(prev->data->min_key,
- cur->data->min_key), c,
- btree_node_topology_overwritten_by_next_node,
- "btree node overwritten by next node at btree %s level %u:\n"
- " node %s\n"
- " next %s",
- bch2_btree_id_str(b->c.btree_id), b->c.level,
- buf1.buf, buf2.buf)) {
- ret = DROP_PREV_NODE;
- goto out;
- }
+ prt_str(&buf, "\n next: ");
+ bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(&cur->key));
- if (mustfix_fsck_err_on(!bpos_eq(prev->key.k.p,
- bpos_predecessor(cur->data->min_key)), c,
- btree_node_topology_bad_max_key,
- "btree node with incorrect max_key at btree %s level %u:\n"
- " node %s\n"
- " next %s",
- bch2_btree_id_str(b->c.btree_id), b->c.level,
- buf1.buf, buf2.buf))
- ret = set_node_max(c, prev,
- bpos_predecessor(cur->data->min_key));
- } else {
- /* prev overwrites cur: */
-
- if (mustfix_fsck_err_on(bpos_ge(expected_start,
- cur->data->max_key), c,
- btree_node_topology_overwritten_by_prev_node,
- "btree node overwritten by prev node at btree %s level %u:\n"
- " prev %s\n"
- " node %s",
- bch2_btree_id_str(b->c.btree_id), b->c.level,
- buf1.buf, buf2.buf)) {
- ret = DROP_THIS_NODE;
- goto out;
- }
+ if (bpos_lt(expected_start, cur->data->min_key)) { /* gap */
+ if (b->c.level == 1 &&
+ bpos_lt(*pulled_from_scan, cur->data->min_key)) {
+ ret = bch2_get_scanned_nodes(c, b->c.btree_id, 0,
+ expected_start,
+ bpos_predecessor(cur->data->min_key));
+ if (ret)
+ goto err;
- if (mustfix_fsck_err_on(!bpos_eq(expected_start, cur->data->min_key), c,
- btree_node_topology_bad_min_key,
- "btree node with incorrect min_key at btree %s level %u:\n"
- " prev %s\n"
- " node %s",
- bch2_btree_id_str(b->c.btree_id), b->c.level,
- buf1.buf, buf2.buf))
- ret = set_node_min(c, cur, expected_start);
+ *pulled_from_scan = cur->data->min_key;
+ ret = DID_FILL_FROM_SCAN;
+ } else {
+ if (mustfix_fsck_err(c, btree_node_topology_bad_min_key,
+ "btree node with incorrect min_key%s", buf.buf))
+ ret = set_node_min(c, cur, expected_start);
+ }
+ } else { /* overlap */
+ if (prev && BTREE_NODE_SEQ(cur->data) > BTREE_NODE_SEQ(prev->data)) { /* cur overwrites prev */
+ if (bpos_ge(prev->data->min_key, cur->data->min_key)) { /* fully? */
+ if (mustfix_fsck_err(c, btree_node_topology_overwritten_by_next_node,
+ "btree node overwritten by next node%s", buf.buf))
+ ret = DROP_PREV_NODE;
+ } else {
+ if (mustfix_fsck_err(c, btree_node_topology_bad_max_key,
+ "btree node with incorrect max_key%s", buf.buf))
+ ret = set_node_max(c, prev,
+ bpos_predecessor(cur->data->min_key));
+ }
+ } else {
+ if (bpos_ge(expected_start, cur->data->max_key)) { /* fully? */
+ if (mustfix_fsck_err(c, btree_node_topology_overwritten_by_prev_node,
+ "btree node overwritten by prev node%s", buf.buf))
+ ret = DROP_THIS_NODE;
+ } else {
+ if (mustfix_fsck_err(c, btree_node_topology_bad_min_key,
+ "btree node with incorrect min_key%s", buf.buf))
+ ret = set_node_min(c, cur, expected_start);
+ }
+ }
}
-out:
+err:
fsck_err:
- printbuf_exit(&buf2);
- printbuf_exit(&buf1);
+ printbuf_exit(&buf);
return ret;
}
static int btree_repair_node_end(struct bch_fs *c, struct btree *b,
- struct btree *child)
+ struct btree *child, struct bpos *pulled_from_scan)
{
- struct printbuf buf1 = PRINTBUF, buf2 = PRINTBUF;
+ struct printbuf buf = PRINTBUF;
int ret = 0;
- bch2_bkey_val_to_text(&buf1, c, bkey_i_to_s_c(&child->key));
- bch2_bpos_to_text(&buf2, b->key.k.p);
+ if (bpos_eq(child->key.k.p, b->key.k.p))
+ return 0;
- if (mustfix_fsck_err_on(!bpos_eq(child->key.k.p, b->key.k.p), c,
- btree_node_topology_bad_max_key,
- "btree node with incorrect max_key at btree %s level %u:\n"
- " %s\n"
- " expected %s",
- bch2_btree_id_str(b->c.btree_id), b->c.level,
- buf1.buf, buf2.buf)) {
- ret = set_node_max(c, child, b->key.k.p);
- if (ret)
- goto err;
+ prt_printf(&buf, "at btree %s level %u:\n parent: ",
+ bch2_btree_id_str(b->c.btree_id), b->c.level);
+ bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(&b->key));
+
+ prt_str(&buf, "\n child: ");
+ bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(&child->key));
+
+ if (mustfix_fsck_err(c, btree_node_topology_bad_max_key,
+ "btree node with incorrect max_key%s", buf.buf)) {
+ if (b->c.level == 1 &&
+ bpos_lt(*pulled_from_scan, b->key.k.p)) {
+ ret = bch2_get_scanned_nodes(c, b->c.btree_id, 0,
+ bpos_successor(child->key.k.p), b->key.k.p);
+ if (ret)
+ goto err;
+
+ *pulled_from_scan = b->key.k.p;
+ ret = DID_FILL_FROM_SCAN;
+ } else {
+ ret = set_node_max(c, child, b->key.k.p);
+ }
}
err:
fsck_err:
- printbuf_exit(&buf2);
- printbuf_exit(&buf1);
+ printbuf_exit(&buf);
return ret;
}
-static int bch2_btree_repair_topology_recurse(struct btree_trans *trans, struct btree *b)
+static int bch2_btree_repair_topology_recurse(struct btree_trans *trans, struct btree *b,
+ struct bpos *pulled_from_scan)
{
struct bch_fs *c = trans->c;
struct btree_and_journal_iter iter;
struct bkey_s_c k;
struct bkey_buf prev_k, cur_k;
struct btree *prev = NULL, *cur = NULL;
- bool have_child, dropped_children = false;
+ bool have_child, new_pass = false;
struct printbuf buf = PRINTBUF;
int ret = 0;
if (!b->c.level)
return 0;
-again:
- prev = NULL;
- have_child = dropped_children = false;
+
bch2_bkey_buf_init(&prev_k);
bch2_bkey_buf_init(&cur_k);
+again:
+ cur = prev = NULL;
+ have_child = new_pass = false;
bch2_btree_and_journal_iter_init_node_iter(trans, &iter, b);
iter.prefetch = true;
b->c.level - 1,
buf.buf)) {
bch2_btree_node_evict(trans, cur_k.k);
+ cur = NULL;
ret = bch2_journal_key_delete(c, b->c.btree_id,
b->c.level, cur_k.k->k.p);
- cur = NULL;
if (ret)
break;
+
+ if (!btree_id_is_alloc(b->c.btree_id)) {
+ ret = bch2_run_explicit_recovery_pass(c, BCH_RECOVERY_PASS_scan_for_btree_nodes);
+ if (ret)
+ break;
+ }
continue;
}
if (ret)
break;
- ret = btree_repair_node_boundaries(c, b, prev, cur);
+ if (bch2_btree_node_is_stale(c, cur)) {
+ bch_info(c, "btree node %s older than nodes found by scanning", buf.buf);
+ six_unlock_read(&cur->c.lock);
+ bch2_btree_node_evict(trans, cur_k.k);
+ ret = bch2_journal_key_delete(c, b->c.btree_id,
+ b->c.level, cur_k.k->k.p);
+ cur = NULL;
+ if (ret)
+ break;
+ continue;
+ }
+
+ ret = btree_check_node_boundaries(c, b, prev, cur, pulled_from_scan);
+ if (ret == DID_FILL_FROM_SCAN) {
+ new_pass = true;
+ ret = 0;
+ }
if (ret == DROP_THIS_NODE) {
six_unlock_read(&cur->c.lock);
prev = NULL;
if (ret == DROP_PREV_NODE) {
+ bch_info(c, "dropped prev node");
bch2_btree_node_evict(trans, prev_k.k);
ret = bch2_journal_key_delete(c, b->c.btree_id,
b->c.level, prev_k.k->k.p);
break;
bch2_btree_and_journal_iter_exit(&iter);
- bch2_bkey_buf_exit(&prev_k, c);
- bch2_bkey_buf_exit(&cur_k, c);
goto again;
} else if (ret)
break;
if (!ret && !IS_ERR_OR_NULL(prev)) {
BUG_ON(cur);
- ret = btree_repair_node_end(c, b, prev);
+ ret = btree_repair_node_end(c, b, prev, pulled_from_scan);
+ if (ret == DID_FILL_FROM_SCAN) {
+ new_pass = true;
+ ret = 0;
+ }
}
if (!IS_ERR_OR_NULL(prev))
goto err;
bch2_btree_and_journal_iter_exit(&iter);
+
+ if (new_pass)
+ goto again;
+
bch2_btree_and_journal_iter_init_node_iter(trans, &iter, b);
iter.prefetch = true;
if (ret)
goto err;
- ret = bch2_btree_repair_topology_recurse(trans, cur);
+ ret = bch2_btree_repair_topology_recurse(trans, cur, pulled_from_scan);
six_unlock_read(&cur->c.lock);
cur = NULL;
bch2_btree_node_evict(trans, cur_k.k);
ret = bch2_journal_key_delete(c, b->c.btree_id,
b->c.level, cur_k.k->k.p);
- dropped_children = true;
+ new_pass = true;
}
if (ret)
six_unlock_read(&cur->c.lock);
bch2_btree_and_journal_iter_exit(&iter);
- bch2_bkey_buf_exit(&prev_k, c);
- bch2_bkey_buf_exit(&cur_k, c);
- if (!ret && dropped_children)
+ if (!ret && new_pass)
goto again;
+ BUG_ON(!ret && bch2_btree_node_check_topology(trans, b));
+
+ bch2_bkey_buf_exit(&prev_k, c);
+ bch2_bkey_buf_exit(&cur_k, c);
printbuf_exit(&buf);
return ret;
}
int bch2_check_topology(struct bch_fs *c)
{
struct btree_trans *trans = bch2_trans_get(c);
- struct btree *b;
- unsigned i;
+ struct bpos pulled_from_scan = POS_MIN;
int ret = 0;
- for (i = 0; i < btree_id_nr_alive(c) && !ret; i++) {
+ for (unsigned i = 0; i < btree_id_nr_alive(c) && !ret; i++) {
struct btree_root *r = bch2_btree_id_root(c, i);
+ bool reconstructed_root = false;
- if (!r->alive)
- continue;
+ if (r->error) {
+ ret = bch2_run_explicit_recovery_pass(c, BCH_RECOVERY_PASS_scan_for_btree_nodes);
+ if (ret)
+ break;
+reconstruct_root:
+ bch_info(c, "btree root %s unreadable, must recover from scan", bch2_btree_id_str(i));
- b = r->b;
- if (btree_node_fake(b))
- continue;
+ r->alive = false;
+ r->error = 0;
+
+ if (!bch2_btree_has_scanned_nodes(c, i)) {
+ mustfix_fsck_err(c, btree_root_unreadable_and_scan_found_nothing,
+ "no nodes found for btree %s, continue?", bch2_btree_id_str(i));
+ bch2_btree_root_alloc_fake(c, i, 0);
+ } else {
+ bch2_btree_root_alloc_fake(c, i, 1);
+ bch2_shoot_down_journal_keys(c, i, 1, BTREE_MAX_DEPTH, POS_MIN, SPOS_MAX);
+ ret = bch2_get_scanned_nodes(c, i, 0, POS_MIN, SPOS_MAX);
+ if (ret)
+ break;
+ }
+
+ reconstructed_root = true;
+ }
+
+ struct btree *b = r->b;
btree_node_lock_nopath_nofail(trans, &b->c, SIX_LOCK_read);
- ret = bch2_btree_repair_topology_recurse(trans, b);
+ ret = bch2_btree_repair_topology_recurse(trans, b, &pulled_from_scan);
six_unlock_read(&b->c.lock);
if (ret == DROP_THIS_NODE) {
- bch_err(c, "empty btree root - repair unimplemented");
- ret = -BCH_ERR_fsck_repair_unimplemented;
+ bch2_btree_node_hash_remove(&c->btree_cache, b);
+ mutex_lock(&c->btree_cache.lock);
+ list_move(&b->list, &c->btree_cache.freeable);
+ mutex_unlock(&c->btree_cache.lock);
+
+ r->b = NULL;
+
+ if (!reconstructed_root)
+ goto reconstruct_root;
+
+ bch_err(c, "empty btree root %s", bch2_btree_id_str(i));
+ bch2_btree_root_alloc_fake(c, i, 0);
+ r->alive = false;
+ ret = 0;
}
}
-
+fsck_err:
bch2_trans_put(trans);
-
return ret;
}
bkey_for_each_ptr_decode(k->k, ptrs_c, p, entry_c) {
struct bch_dev *ca = bch_dev_bkey_exists(c, p.ptr.dev);
struct bucket *g = PTR_GC_BUCKET(ca, &p.ptr);
- enum bch_data_type data_type = bch2_bkey_ptr_data_type(*k, &entry_c->ptr);
+ enum bch_data_type data_type = bch2_bkey_ptr_data_type(*k, p, entry_c);
if (fsck_err_on(!g->gen_valid,
c, ptr_to_missing_alloc_key,
continue;
if (fsck_err_on(bucket_data_type(g->data_type) &&
- bucket_data_type(g->data_type) != data_type, c,
+ bucket_data_type(g->data_type) !=
+ bucket_data_type(data_type), c,
ptr_bucket_data_type_mismatch,
"bucket %u:%zu different types of data in same bucket: %s, %s\n"
"while marking %s",
}
if (do_update) {
- struct bkey_ptrs ptrs;
- union bch_extent_entry *entry;
- struct bch_extent_ptr *ptr;
- struct bkey_i *new;
-
if (is_root) {
bch_err(c, "cannot update btree roots yet");
ret = -EINVAL;
goto err;
}
- new = kmalloc(bkey_bytes(k->k), GFP_KERNEL);
+ struct bkey_i *new = kmalloc(bkey_bytes(k->k), GFP_KERNEL);
if (!new) {
ret = -BCH_ERR_ENOMEM_gc_repair_key;
bch_err_msg(c, ret, "allocating new key");
* btree node isn't there anymore, the read path will
* sort it out:
*/
- ptrs = bch2_bkey_ptrs(bkey_i_to_s(new));
+ struct bkey_ptrs ptrs = bch2_bkey_ptrs(bkey_i_to_s(new));
bkey_for_each_ptr(ptrs, ptr) {
struct bch_dev *ca = bch_dev_bkey_exists(c, ptr->dev);
struct bucket *g = PTR_GC_BUCKET(ca, ptr);
ptr->gen = g->gen;
}
} else {
- bch2_bkey_drop_ptrs(bkey_i_to_s(new), ptr, ({
- struct bch_dev *ca = bch_dev_bkey_exists(c, ptr->dev);
- struct bucket *g = PTR_GC_BUCKET(ca, ptr);
- enum bch_data_type data_type = bch2_bkey_ptr_data_type(*k, ptr);
-
- (ptr->cached &&
- (!g->gen_valid || gen_cmp(ptr->gen, g->gen) > 0)) ||
- (!ptr->cached &&
- gen_cmp(ptr->gen, g->gen) < 0) ||
- gen_cmp(g->gen, ptr->gen) > BUCKET_GC_GEN_MAX ||
- (g->data_type &&
- g->data_type != data_type);
- }));
+ struct bkey_ptrs ptrs;
+ union bch_extent_entry *entry;
+restart_drop_ptrs:
+ ptrs = bch2_bkey_ptrs(bkey_i_to_s(new));
+ bkey_for_each_ptr_decode(bkey_i_to_s(new).k, ptrs, p, entry) {
+ struct bch_dev *ca = bch_dev_bkey_exists(c, p.ptr.dev);
+ struct bucket *g = PTR_GC_BUCKET(ca, &p.ptr);
+ enum bch_data_type data_type = bch2_bkey_ptr_data_type(bkey_i_to_s_c(new), p, entry);
+
+ if ((p.ptr.cached &&
+ (!g->gen_valid || gen_cmp(p.ptr.gen, g->gen) > 0)) ||
+ (!p.ptr.cached &&
+ gen_cmp(p.ptr.gen, g->gen) < 0) ||
+ gen_cmp(g->gen, p.ptr.gen) > BUCKET_GC_GEN_MAX ||
+ (g->data_type &&
+ g->data_type != data_type)) {
+ bch2_bkey_drop_ptr(bkey_i_to_s(new), &entry->ptr);
+ goto restart_drop_ptrs;
+ }
+ }
again:
ptrs = bch2_bkey_ptrs(bkey_i_to_s(new));
bkey_extent_entry_for_each(ptrs, entry) {
}
}
- ret = bch2_journal_key_insert_take(c, btree_id, level, new);
- if (ret) {
- kfree(new);
- goto err;
- }
-
if (level)
bch2_btree_node_update_key_early(trans, btree_id, level - 1, *k, new);
bch_info(c, "new key %s", buf.buf);
}
+ ret = bch2_journal_key_insert_take(c, btree_id, level, new);
+ if (ret) {
+ kfree(new);
+ goto err;
+ }
+
*k = bkey_i_to_s_c(new);
}
err:
struct bch_fs *c = trans->c;
struct bkey deleted = KEY(0, 0, 0);
struct bkey_s_c old = (struct bkey_s_c) { &deleted, NULL };
+ struct printbuf buf = PRINTBUF;
int ret = 0;
deleted.p = k->k->p;
BUG_ON(bch2_journal_seq_verify &&
k->k->version.lo > atomic64_read(&c->journal.seq));
- ret = bch2_check_fix_ptrs(trans, btree_id, level, is_root, k);
- if (ret)
- goto err;
-
if (fsck_err_on(k->k->version.lo > atomic64_read(&c->key_version), c,
bkey_version_in_future,
"key version number higher than recorded: %llu > %llu",
atomic64_set(&c->key_version, k->k->version.lo);
}
+ ret = bch2_check_fix_ptrs(trans, btree_id, level, is_root, k);
+ if (ret)
+ goto err;
+
+ if (mustfix_fsck_err_on(level && !bch2_dev_btree_bitmap_marked(c, *k),
+ c, btree_bitmap_not_marked,
+ "btree ptr not marked in member info btree allocated bitmap\n %s",
+ (bch2_bkey_val_to_text(&buf, c, *k),
+ buf.buf))) {
+ mutex_lock(&c->sb_lock);
+ bch2_dev_btree_bitmap_mark(c, *k);
+ bch2_write_super(c);
+ mutex_unlock(&c->sb_lock);
+ }
+
ret = commit_do(trans, NULL, NULL, 0,
- bch2_key_trigger(trans, btree_id, level, old, unsafe_bkey_s_c_to_s(*k), BTREE_TRIGGER_GC));
+ bch2_key_trigger(trans, btree_id, level, old,
+ unsafe_bkey_s_c_to_s(*k), BTREE_TRIGGER_GC));
fsck_err:
err:
+ printbuf_exit(&buf);
bch_err_fn(c, ret);
return ret;
}
static int btree_gc_mark_node(struct btree_trans *trans, struct btree *b, bool initial)
{
- struct bch_fs *c = trans->c;
struct btree_node_iter iter;
struct bkey unpacked;
struct bkey_s_c k;
- struct bkey_buf prev, cur;
int ret = 0;
+ ret = bch2_btree_node_check_topology(trans, b);
+ if (ret)
+ return ret;
+
if (!btree_node_type_needs_gc(btree_node_type(b)))
return 0;
bch2_btree_node_iter_init_from_start(&iter, b);
- bch2_bkey_buf_init(&prev);
- bch2_bkey_buf_init(&cur);
- bkey_init(&prev.k->k);
while ((k = bch2_btree_node_iter_peek_unpack(&iter, b, &unpacked)).k) {
ret = bch2_gc_mark_key(trans, b->c.btree_id, b->c.level, false,
&k, initial);
if (ret)
- break;
+ return ret;
bch2_btree_node_iter_advance(&iter, b);
-
- if (b->c.level) {
- bch2_bkey_buf_reassemble(&cur, c, k);
-
- ret = bch2_gc_check_topology(c, b, &prev, cur,
- bch2_btree_node_iter_end(&iter));
- if (ret)
- break;
- }
}
- bch2_bkey_buf_exit(&cur, c);
- bch2_bkey_buf_exit(&prev, c);
- return ret;
+ return 0;
}
static int bch2_gc_btree(struct btree_trans *trans, enum btree_id btree_id,
struct bch_fs *c = trans->c;
struct btree_and_journal_iter iter;
struct bkey_s_c k;
- struct bkey_buf cur, prev;
+ struct bkey_buf cur;
struct printbuf buf = PRINTBUF;
int ret = 0;
+ ret = bch2_btree_node_check_topology(trans, b);
+ if (ret)
+ return ret;
+
bch2_btree_and_journal_iter_init_node_iter(trans, &iter, b);
- bch2_bkey_buf_init(&prev);
bch2_bkey_buf_init(&cur);
- bkey_init(&prev.k->k);
while ((k = bch2_btree_and_journal_iter_peek(&iter)).k) {
BUG_ON(bpos_lt(k.k->p, b->data->min_key));
if (ret)
goto fsck_err;
- if (b->c.level) {
- bch2_bkey_buf_reassemble(&cur, c, k);
- k = bkey_i_to_s_c(cur.k);
-
- bch2_btree_and_journal_iter_advance(&iter);
-
- ret = bch2_gc_check_topology(c, b,
- &prev, cur,
- !bch2_btree_and_journal_iter_peek(&iter).k);
- if (ret)
- goto fsck_err;
- } else {
- bch2_btree_and_journal_iter_advance(&iter);
- }
+ bch2_btree_and_journal_iter_advance(&iter);
}
if (b->c.level > target_depth) {
}
fsck_err:
bch2_bkey_buf_exit(&cur, c);
- bch2_bkey_buf_exit(&prev, c);
bch2_btree_and_journal_iter_exit(&iter);
printbuf_exit(&buf);
return ret;
b = bch2_btree_id_root(c, btree_id)->b;
- if (btree_node_fake(b))
- return 0;
-
six_lock_read(&b->c.lock, NULL, NULL);
printbuf_reset(&buf);
bch2_bpos_to_text(&buf, b->data->min_key);
struct bkey_i *new = bch2_bkey_make_mut_noupdate(trans, k);
ret = PTR_ERR_OR_ZERO(new);
if (ret)
- return ret;
+ goto out;
if (!r->refcount)
new->k.type = KEY_TYPE_deleted;
*bkey_refcount(bkey_i_to_s(new)) = cpu_to_le64(r->refcount);
ret = bch2_trans_update(trans, iter, new, 0);
}
+out:
fsck_err:
printbuf_exit(&buf);
return ret;
*/
bch2_bset_set_no_aux_tree(b, b->set);
bch2_btree_build_aux_trees(b);
+ b->nr = bch2_btree_node_count_keys(b);
struct bkey_s_c k;
struct bkey unpacked;
(rw == WRITE ? bch2_bkey_val_invalid(c, k, READ, err) : 0);
}
-static bool __bkey_valid(struct bch_fs *c, struct btree *b,
+static bool bkey_packed_valid(struct bch_fs *c, struct btree *b,
struct bset *i, struct bkey_packed *k)
{
if (bkey_p_next(k) > vstruct_last(i))
if (k->format > KEY_FORMAT_CURRENT)
return false;
- if (k->u64s < bkeyp_key_u64s(&b->format, k))
+ if (!bkeyp_u64s_valid(&b->format, k))
return false;
struct printbuf buf = PRINTBUF;
"invalid bkey format %u", k->format))
goto drop_this_key;
- if (btree_err_on(k->u64s < bkeyp_key_u64s(&b->format, k),
+ if (btree_err_on(!bkeyp_u64s_valid(&b->format, k),
-BCH_ERR_btree_node_read_err_fixable,
c, NULL, b, i,
btree_node_bkey_bad_u64s,
- "k->u64s too small (%u < %u)", k->u64s, bkeyp_key_u64s(&b->format, k)))
+ "bad k->u64s %u (min %u max %zu)", k->u64s,
+ bkeyp_key_u64s(&b->format, k),
+ U8_MAX - BKEY_U64s + bkeyp_key_u64s(&b->format, k)))
goto drop_this_key;
if (!write)
* do
*/
- if (!__bkey_valid(c, b, i, (void *) ((u64 *) k + next_good_key))) {
+ if (!bkey_packed_valid(c, b, i, (void *) ((u64 *) k + next_good_key))) {
for (next_good_key = 1;
next_good_key < (u64 *) vstruct_last(i) - (u64 *) k;
next_good_key++)
- if (__bkey_valid(c, b, i, (void *) ((u64 *) k + next_good_key)))
+ if (bkey_packed_valid(c, b, i, (void *) ((u64 *) k + next_good_key)))
goto got_good_key;
-
}
/*
return retry_read;
fsck_err:
if (ret == -BCH_ERR_btree_node_read_err_want_retry ||
- ret == -BCH_ERR_btree_node_read_err_must_retry)
+ ret == -BCH_ERR_btree_node_read_err_must_retry) {
retry_read = 1;
- else
+ } else {
set_btree_node_read_error(b);
+ bch2_btree_lost_data(c, b->c.btree_id);
+ }
goto out;
}
if (!can_retry) {
set_btree_node_read_error(b);
+ bch2_btree_lost_data(c, b->c.btree_id);
break;
}
}
rb->start_time);
bio_put(&rb->bio);
- if (saw_error && !btree_node_read_error(b)) {
+ if (saw_error &&
+ !btree_node_read_error(b) &&
+ c->curr_recovery_pass != BCH_RECOVERY_PASS_scan_for_btree_nodes) {
printbuf_reset(&buf);
bch2_bpos_to_text(&buf, b->key.k.p);
bch_err_ratelimited(c, "%s: rewriting btree node at btree=%s level=%u %s due to error",
ret = -1;
}
- if (ret)
+ if (ret) {
set_btree_node_read_error(b);
- else if (*saw_error)
+ bch2_btree_lost_data(c, b->c.btree_id);
+ } else if (*saw_error)
bch2_btree_node_rewrite_async(c, b);
for (i = 0; i < ra->nr; i++) {
prt_str(&buf, "btree node read error: no device to read from\n at ");
bch2_btree_pos_to_text(&buf, c, b);
- bch_err(c, "%s", buf.buf);
+ bch_err_ratelimited(c, "%s", buf.buf);
if (c->recovery_passes_explicit & BIT_ULL(BCH_RECOVERY_PASS_check_topology) &&
c->curr_recovery_pass > BCH_RECOVERY_PASS_check_topology)
bch2_fatal_error(c);
set_btree_node_read_error(b);
+ bch2_btree_lost_data(c, b->c.btree_id);
clear_btree_node_read_in_flight(b);
wake_up_bit(&b->flags, BTREE_NODE_read_in_flight);
printbuf_exit(&buf);
} else {
ret = bch2_trans_do(c, NULL, NULL, 0,
bch2_btree_node_update_key_get_iter(trans, b, &wbio->key,
- BCH_WATERMARK_reclaim|
+ BCH_WATERMARK_interior_updates|
BCH_TRANS_COMMIT_journal_reclaim|
BCH_TRANS_COMMIT_no_enospc|
BCH_TRANS_COMMIT_no_check_rw,
if (ret)
goto err;
} else {
- bch2_bkey_buf_unpack(&tmp, c, l->b,
- bch2_btree_node_iter_peek(&l->iter, l->b));
+ struct bkey_packed *k = bch2_btree_node_iter_peek(&l->iter, l->b);
+ if (!k) {
+ struct printbuf buf = PRINTBUF;
+
+ prt_str(&buf, "node not found at pos ");
+ bch2_bpos_to_text(&buf, path->pos);
+ prt_str(&buf, " within parent node ");
+ bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(&l->b->key));
+
+ bch2_fs_fatal_error(c, "%s", buf.buf);
+ printbuf_exit(&buf);
+ ret = -BCH_ERR_btree_need_topology_repair;
+ goto err;
+ }
+
+ bch2_bkey_buf_unpack(&tmp, c, l->b, k);
if ((flags & BTREE_ITER_PREFETCH) &&
c->opts.btree_node_prefetch) {
return ret;
}
-
static int bch2_btree_path_traverse_all(struct btree_trans *trans)
{
struct bch_fs *c = trans->c;
struct btree_transaction_stats *s = btree_trans_stats(trans);
s->max_mem = max(s->max_mem, new_bytes);
+ if (trans->used_mempool) {
+ if (trans->mem_bytes >= new_bytes)
+ goto out_change_top;
+
+ /* No more space from mempool item, need malloc new one */
+ new_mem = kmalloc(new_bytes, GFP_NOWAIT|__GFP_NOWARN);
+ if (unlikely(!new_mem)) {
+ bch2_trans_unlock(trans);
+
+ new_mem = kmalloc(new_bytes, GFP_KERNEL);
+ if (!new_mem)
+ return ERR_PTR(-BCH_ERR_ENOMEM_trans_kmalloc);
+
+ ret = bch2_trans_relock(trans);
+ if (ret) {
+ kfree(new_mem);
+ return ERR_PTR(ret);
+ }
+ }
+ memcpy(new_mem, trans->mem, trans->mem_top);
+ trans->used_mempool = false;
+ mempool_free(trans->mem, &c->btree_trans_mem_pool);
+ goto out_new_mem;
+ }
+
new_mem = krealloc(trans->mem, new_bytes, GFP_NOWAIT|__GFP_NOWARN);
if (unlikely(!new_mem)) {
bch2_trans_unlock(trans);
if (!new_mem && new_bytes <= BTREE_TRANS_MEM_MAX) {
new_mem = mempool_alloc(&c->btree_trans_mem_pool, GFP_KERNEL);
new_bytes = BTREE_TRANS_MEM_MAX;
+ memcpy(new_mem, trans->mem, trans->mem_top);
+ trans->used_mempool = true;
kfree(trans->mem);
}
if (ret)
return ERR_PTR(ret);
}
-
+out_new_mem:
trans->mem = new_mem;
trans->mem_bytes = new_bytes;
trace_and_count(c, trans_restart_mem_realloced, trans, _RET_IP_, new_bytes);
return ERR_PTR(btree_trans_restart(trans, BCH_ERR_transaction_restart_mem_realloced));
}
-
+out_change_top:
p = trans->mem + trans->mem_top;
trans->mem_top += size;
memset(p, 0, size);
if (paths_allocated != trans->_paths_allocated)
kvfree_rcu_mightsleep(paths_allocated);
- if (trans->mem_bytes == BTREE_TRANS_MEM_MAX)
+ if (trans->used_mempool)
mempool_free(trans->mem, &c->btree_trans_mem_pool);
else
kfree(trans->mem);
{
struct btree_trans *trans = iter->trans;
- if (!trans->restarted)
- btree_iter_path(trans, iter)->preserve = false;
+ if (!iter->path || trans->restarted)
+ return;
+
+ struct btree_path *path = btree_iter_path(trans, iter);
+ path->preserve = false;
+ if (path->ref == 1)
+ path->should_be_locked = false;
}
void *__bch2_trans_kmalloc(struct btree_trans *, size_t);
static inline int btree_trans_too_many_iters(struct btree_trans *trans)
{
- if (bitmap_weight(trans->paths_allocated, trans->nr_paths) > BTREE_ITER_INITIAL - 8)
+ if (bitmap_weight(trans->paths_allocated, trans->nr_paths) > BTREE_ITER_NORMAL_LIMIT - 8)
return __bch2_btree_trans_too_many_iters(trans);
return 0;
return bch2_journal_keys_peek_upto(c, btree_id, level, pos, pos, &idx);
}
+static void journal_iter_verify(struct journal_iter *iter)
+{
+ struct journal_keys *keys = iter->keys;
+ size_t gap_size = keys->size - keys->nr;
+
+ BUG_ON(iter->idx >= keys->gap &&
+ iter->idx < keys->gap + gap_size);
+
+ if (iter->idx < keys->size) {
+ struct journal_key *k = keys->data + iter->idx;
+
+ int cmp = cmp_int(k->btree_id, iter->btree_id) ?:
+ cmp_int(k->level, iter->level);
+ BUG_ON(cmp < 0);
+ }
+}
+
static void journal_iters_fix(struct bch_fs *c)
{
struct journal_keys *keys = &c->journal_keys;
/* The key we just inserted is immediately before the gap: */
size_t gap_end = keys->gap + (keys->size - keys->nr);
- struct btree_and_journal_iter *iter;
+ struct journal_key *new_key = &keys->data[keys->gap - 1];
+ struct journal_iter *iter;
/*
* If an iterator points one after the key we just inserted, decrement
* decrement was unnecessary, bch2_btree_and_journal_iter_peek() will
* handle that:
*/
- list_for_each_entry(iter, &c->journal_iters, journal.list)
- if (iter->journal.idx == gap_end)
- iter->journal.idx = keys->gap - 1;
+ list_for_each_entry(iter, &c->journal_iters, list) {
+ journal_iter_verify(iter);
+ if (iter->idx == gap_end &&
+ new_key->btree_id == iter->btree_id &&
+ new_key->level == iter->level)
+ iter->idx = keys->gap - 1;
+ journal_iter_verify(iter);
+ }
}
static void journal_iters_move_gap(struct bch_fs *c, size_t old_gap, size_t new_gap)
if (idx > keys->gap)
idx -= keys->size - keys->nr;
+ size_t old_gap = keys->gap;
+
if (keys->nr == keys->size) {
+ journal_iters_move_gap(c, old_gap, keys->size);
+ old_gap = keys->size;
+
struct journal_keys new_keys = {
.nr = keys->nr,
.size = max_t(size_t, keys->size, 8) * 2,
keys->gap = keys->nr;
}
- journal_iters_move_gap(c, keys->gap, idx);
+ journal_iters_move_gap(c, old_gap, idx);
move_gap(keys, idx);
return bch2_journal_key_insert(c, id, level, &whiteout);
}
+bool bch2_key_deleted_in_journal(struct btree_trans *trans, enum btree_id btree,
+ unsigned level, struct bpos pos)
+{
+ struct journal_keys *keys = &trans->c->journal_keys;
+ size_t idx = bch2_journal_key_search(keys, btree, level, pos);
+
+ if (!trans->journal_replay_not_finished)
+ return false;
+
+ return (idx < keys->size &&
+ keys->data[idx].btree_id == btree &&
+ keys->data[idx].level == level &&
+ bpos_eq(keys->data[idx].k->k.p, pos) &&
+ bkey_deleted(&keys->data[idx].k->k));
+}
+
void bch2_journal_key_overwritten(struct bch_fs *c, enum btree_id btree,
unsigned level, struct bpos pos)
{
static struct bkey_s_c bch2_journal_iter_peek(struct journal_iter *iter)
{
- struct journal_key *k = iter->keys->data + iter->idx;
+ journal_iter_verify(iter);
+
+ while (iter->idx < iter->keys->size) {
+ struct journal_key *k = iter->keys->data + iter->idx;
+
+ int cmp = cmp_int(k->btree_id, iter->btree_id) ?:
+ cmp_int(k->level, iter->level);
+ if (cmp > 0)
+ break;
+ BUG_ON(cmp);
- while (k < iter->keys->data + iter->keys->size &&
- k->btree_id == iter->btree_id &&
- k->level == iter->level) {
if (!k->overwritten)
return bkey_i_to_s_c(k->k);
bch2_journal_iter_advance(iter);
- k = iter->keys->data + iter->idx;
}
return bkey_s_c_null;
iter->level = level;
iter->keys = &c->journal_keys;
iter->idx = bch2_journal_key_search(&c->journal_keys, id, level, pos);
+
+ journal_iter_verify(iter);
}
static struct bkey_s_c bch2_journal_iter_peek_btree(struct btree_and_journal_iter *iter)
struct bkey_s_c bch2_btree_and_journal_iter_peek(struct btree_and_journal_iter *iter)
{
- struct bkey_s_c btree_k, journal_k, ret;
+ struct bkey_s_c btree_k, journal_k = bkey_s_c_null, ret;
if (iter->prefetch && iter->journal.level)
btree_and_journal_iter_prefetch(iter);
bpos_lt(btree_k.k->p, iter->pos))
bch2_journal_iter_advance_btree(iter);
- while ((journal_k = bch2_journal_iter_peek(&iter->journal)).k &&
- bpos_lt(journal_k.k->p, iter->pos))
- bch2_journal_iter_advance(&iter->journal);
+ if (iter->trans->journal_replay_not_finished)
+ while ((journal_k = bch2_journal_iter_peek(&iter->journal)).k &&
+ bpos_lt(journal_k.k->p, iter->pos))
+ bch2_journal_iter_advance(&iter->journal);
ret = journal_k.k &&
(!btree_k.k || bpos_le(journal_k.k->p, btree_k.k->p))
iter->trans = trans;
iter->b = b;
iter->node_iter = node_iter;
- bch2_journal_iter_init(trans->c, &iter->journal, b->c.btree_id, b->c.level, pos);
- INIT_LIST_HEAD(&iter->journal.list);
iter->pos = b->data->min_key;
iter->at_end = false;
+ INIT_LIST_HEAD(&iter->journal.list);
+
+ if (trans->journal_replay_not_finished) {
+ bch2_journal_iter_init(trans->c, &iter->journal, b->c.btree_id, b->c.level, pos);
+ if (!test_bit(BCH_FS_may_go_rw, &trans->c->flags))
+ list_add(&iter->journal.list, &trans->c->journal_iters);
+ }
}
/*
bch2_btree_node_iter_init_from_start(&node_iter, b);
__bch2_btree_and_journal_iter_init_node_iter(trans, iter, b, node_iter, b->data->min_key);
- list_add(&iter->journal.list, &trans->c->journal_iters);
}
/* sort and dedup all keys in the journal: */
bch_verbose(c, "Journal keys: %zu read, %zu after sorting and compacting", nr_read, keys->nr);
return 0;
}
+
+void bch2_shoot_down_journal_keys(struct bch_fs *c, enum btree_id btree,
+ unsigned level_min, unsigned level_max,
+ struct bpos start, struct bpos end)
+{
+ struct journal_keys *keys = &c->journal_keys;
+ size_t dst = 0;
+
+ move_gap(keys, keys->nr);
+
+ darray_for_each(*keys, i)
+ if (!(i->btree_id == btree &&
+ i->level >= level_min &&
+ i->level <= level_max &&
+ bpos_ge(i->k->k.p, start) &&
+ bpos_le(i->k->k.p, end)))
+ keys->data[dst++] = *i;
+ keys->nr = keys->gap = dst;
+}
unsigned, struct bkey_i *);
int bch2_journal_key_delete(struct bch_fs *, enum btree_id,
unsigned, struct bpos);
-void bch2_journal_key_overwritten(struct bch_fs *, enum btree_id,
- unsigned, struct bpos);
+bool bch2_key_deleted_in_journal(struct btree_trans *, enum btree_id, unsigned, struct bpos);
+void bch2_journal_key_overwritten(struct bch_fs *, enum btree_id, unsigned, struct bpos);
void bch2_btree_and_journal_iter_advance(struct btree_and_journal_iter *);
struct bkey_s_c bch2_btree_and_journal_iter_peek(struct btree_and_journal_iter *);
int bch2_journal_keys_sort(struct bch_fs *);
+void bch2_shoot_down_journal_keys(struct bch_fs *, enum btree_id,
+ unsigned, unsigned,
+ struct bpos, struct bpos);
+
#endif /* _BCACHEFS_BTREE_JOURNAL_ITER_H */
} else {
mutex_lock(&bc->lock);
list_move_tail(&ck->list, &bc->freed_pcpu);
+ bc->nr_freed_pcpu++;
mutex_unlock(&bc->lock);
}
}
if (!list_empty(&bc->freed_pcpu)) {
ck = list_last_entry(&bc->freed_pcpu, struct bkey_cached, list);
list_del_init(&ck->list);
+ bc->nr_freed_pcpu--;
}
mutex_unlock(&bc->lock);
}
commit_flags |= BCH_WATERMARK_reclaim;
if (ck->journal.seq != journal_last_seq(j) ||
- j->watermark == BCH_WATERMARK_stripe)
+ !test_bit(JOURNAL_SPACE_LOW, &c->journal.flags))
commit_flags |= BCH_TRANS_COMMIT_no_journal_res;
ret = bch2_btree_iter_traverse(&b_iter) ?:
* Newest freed entries are at the end of the list - once we hit one
* that's too new to be freed, we can bail out:
*/
- scanned += bc->nr_freed_nonpcpu;
-
list_for_each_entry_safe(ck, t, &bc->freed_nonpcpu, list) {
if (!poll_state_synchronize_srcu(&c->btree_trans_barrier,
ck->btree_trans_barrier_seq))
bc->nr_freed_nonpcpu--;
}
- if (scanned >= nr)
- goto out;
-
- scanned += bc->nr_freed_pcpu;
-
list_for_each_entry_safe(ck, t, &bc->freed_pcpu, list) {
if (!poll_state_synchronize_srcu(&c->btree_trans_barrier,
ck->btree_trans_barrier_seq))
bc->nr_freed_pcpu--;
}
- if (scanned >= nr)
- goto out;
-
rcu_read_lock();
tbl = rht_dereference_rcu(bc->table.tbl, &bc->table);
if (bc->shrink_iter >= tbl->size)
next = rht_dereference_bucket_rcu(pos->next, tbl, bc->shrink_iter);
ck = container_of(pos, struct bkey_cached, hash);
- if (test_bit(BKEY_CACHED_DIRTY, &ck->flags))
+ if (test_bit(BKEY_CACHED_DIRTY, &ck->flags)) {
goto next;
-
- if (test_bit(BKEY_CACHED_ACCESSED, &ck->flags))
+ } else if (test_bit(BKEY_CACHED_ACCESSED, &ck->flags)) {
clear_bit(BKEY_CACHED_ACCESSED, &ck->flags);
- else if (bkey_cached_lock_for_evict(ck)) {
+ goto next;
+ } else if (bkey_cached_lock_for_evict(ck)) {
bkey_cached_evict(bc, ck);
bkey_cached_free(bc, ck);
}
} while (scanned < nr && bc->shrink_iter != start);
rcu_read_unlock();
-out:
memalloc_nofs_restore(flags);
srcu_read_unlock(&c->btree_trans_barrier, srcu_idx);
mutex_unlock(&bc->lock);
struct btree_path *path,
struct btree_bkey_cached_common *b)
{
- struct btree_path *linked;
- unsigned i, iter;
- int ret;
-
- /*
- * XXX BIG FAT NOTICE
- *
- * Drop all read locks before taking a write lock:
- *
- * This is a hack, because bch2_btree_node_lock_write_nofail() is a
- * hack - but by dropping read locks first, this should never fail, and
- * we only use this in code paths where whatever read locks we've
- * already taken are no longer needed:
- */
-
- trans_for_each_path(trans, linked, iter) {
- if (!linked->nodes_locked)
- continue;
-
- for (i = 0; i < BTREE_MAX_DEPTH; i++)
- if (btree_node_read_locked(linked, i)) {
- btree_node_unlock(trans, linked, i);
- btree_path_set_dirty(linked, BTREE_ITER_NEED_RELOCK);
- }
- }
-
- ret = __btree_node_lock_write(trans, path, b, true);
+ int ret = __btree_node_lock_write(trans, path, b, true);
BUG_ON(ret);
}
--- /dev/null
+// SPDX-License-Identifier: GPL-2.0
+
+#include "bcachefs.h"
+#include "btree_cache.h"
+#include "btree_io.h"
+#include "btree_journal_iter.h"
+#include "btree_node_scan.h"
+#include "btree_update_interior.h"
+#include "buckets.h"
+#include "error.h"
+#include "journal_io.h"
+#include "recovery_passes.h"
+
+#include <linux/kthread.h>
+#include <linux/sort.h>
+
+struct find_btree_nodes_worker {
+ struct closure *cl;
+ struct find_btree_nodes *f;
+ struct bch_dev *ca;
+};
+
+static void found_btree_node_to_text(struct printbuf *out, struct bch_fs *c, const struct found_btree_node *n)
+{
+ prt_printf(out, "%s l=%u seq=%u cookie=%llx ", bch2_btree_id_str(n->btree_id), n->level, n->seq, n->cookie);
+ bch2_bpos_to_text(out, n->min_key);
+ prt_str(out, "-");
+ bch2_bpos_to_text(out, n->max_key);
+
+ if (n->range_updated)
+ prt_str(out, " range updated");
+ if (n->overwritten)
+ prt_str(out, " overwritten");
+
+ for (unsigned i = 0; i < n->nr_ptrs; i++) {
+ prt_char(out, ' ');
+ bch2_extent_ptr_to_text(out, c, n->ptrs + i);
+ }
+}
+
+static void found_btree_nodes_to_text(struct printbuf *out, struct bch_fs *c, found_btree_nodes nodes)
+{
+ printbuf_indent_add(out, 2);
+ darray_for_each(nodes, i) {
+ found_btree_node_to_text(out, c, i);
+ prt_newline(out);
+ }
+ printbuf_indent_sub(out, 2);
+}
+
+static void found_btree_node_to_key(struct bkey_i *k, const struct found_btree_node *f)
+{
+ struct bkey_i_btree_ptr_v2 *bp = bkey_btree_ptr_v2_init(k);
+
+ set_bkey_val_u64s(&bp->k, sizeof(struct bch_btree_ptr_v2) / sizeof(u64) + f->nr_ptrs);
+ bp->k.p = f->max_key;
+ bp->v.seq = cpu_to_le64(f->cookie);
+ bp->v.sectors_written = 0;
+ bp->v.flags = 0;
+ bp->v.min_key = f->min_key;
+ SET_BTREE_PTR_RANGE_UPDATED(&bp->v, f->range_updated);
+ memcpy(bp->v.start, f->ptrs, sizeof(struct bch_extent_ptr) * f->nr_ptrs);
+}
+
+static bool found_btree_node_is_readable(struct btree_trans *trans,
+ const struct found_btree_node *f)
+{
+ struct { __BKEY_PADDED(k, BKEY_BTREE_PTR_VAL_U64s_MAX); } k;
+
+ found_btree_node_to_key(&k.k, f);
+
+ struct btree *b = bch2_btree_node_get_noiter(trans, &k.k, f->btree_id, f->level, false);
+ bool ret = !IS_ERR_OR_NULL(b);
+ if (ret)
+ six_unlock_read(&b->c.lock);
+
+ /*
+ * We might update this node's range; if that happens, we need the node
+ * to be re-read so the read path can trim keys that are no longer in
+ * this node
+ */
+ if (b != btree_node_root(trans->c, b))
+ bch2_btree_node_evict(trans, &k.k);
+ return ret;
+}
+
+static int found_btree_node_cmp_cookie(const void *_l, const void *_r)
+{
+ const struct found_btree_node *l = _l;
+ const struct found_btree_node *r = _r;
+
+ return cmp_int(l->btree_id, r->btree_id) ?:
+ cmp_int(l->level, r->level) ?:
+ cmp_int(l->cookie, r->cookie);
+}
+
+/*
+ * Given two found btree nodes, if their sequence numbers are equal, take the
+ * one that's readable:
+ */
+static int found_btree_node_cmp_time(const struct found_btree_node *l,
+ const struct found_btree_node *r)
+{
+ return cmp_int(l->seq, r->seq);
+}
+
+static int found_btree_node_cmp_pos(const void *_l, const void *_r)
+{
+ const struct found_btree_node *l = _l;
+ const struct found_btree_node *r = _r;
+
+ return cmp_int(l->btree_id, r->btree_id) ?:
+ -cmp_int(l->level, r->level) ?:
+ bpos_cmp(l->min_key, r->min_key) ?:
+ -found_btree_node_cmp_time(l, r);
+}
+
+static void try_read_btree_node(struct find_btree_nodes *f, struct bch_dev *ca,
+ struct bio *bio, struct btree_node *bn, u64 offset)
+{
+ struct bch_fs *c = container_of(f, struct bch_fs, found_btree_nodes);
+
+ bio_reset(bio, ca->disk_sb.bdev, REQ_OP_READ);
+ bio->bi_iter.bi_sector = offset;
+ bch2_bio_map(bio, bn, PAGE_SIZE);
+
+ submit_bio_wait(bio);
+ if (bch2_dev_io_err_on(bio->bi_status, ca, BCH_MEMBER_ERROR_read,
+ "IO error in try_read_btree_node() at %llu: %s",
+ offset, bch2_blk_status_to_str(bio->bi_status)))
+ return;
+
+ if (le64_to_cpu(bn->magic) != bset_magic(c))
+ return;
+
+ if (bch2_csum_type_is_encryption(BSET_CSUM_TYPE(&bn->keys))) {
+ struct nonce nonce = btree_nonce(&bn->keys, 0);
+ unsigned bytes = (void *) &bn->keys - (void *) &bn->flags;
+
+ bch2_encrypt(c, BSET_CSUM_TYPE(&bn->keys), nonce, &bn->flags, bytes);
+ }
+
+ if (btree_id_is_alloc(BTREE_NODE_ID(bn)))
+ return;
+
+ if (BTREE_NODE_LEVEL(bn) >= BTREE_MAX_DEPTH)
+ return;
+
+ rcu_read_lock();
+ struct found_btree_node n = {
+ .btree_id = BTREE_NODE_ID(bn),
+ .level = BTREE_NODE_LEVEL(bn),
+ .seq = BTREE_NODE_SEQ(bn),
+ .cookie = le64_to_cpu(bn->keys.seq),
+ .min_key = bn->min_key,
+ .max_key = bn->max_key,
+ .nr_ptrs = 1,
+ .ptrs[0].type = 1 << BCH_EXTENT_ENTRY_ptr,
+ .ptrs[0].offset = offset,
+ .ptrs[0].dev = ca->dev_idx,
+ .ptrs[0].gen = *bucket_gen(ca, sector_to_bucket(ca, offset)),
+ };
+ rcu_read_unlock();
+
+ if (bch2_trans_run(c, found_btree_node_is_readable(trans, &n))) {
+ mutex_lock(&f->lock);
+ if (BSET_BIG_ENDIAN(&bn->keys) != CPU_BIG_ENDIAN) {
+ bch_err(c, "try_read_btree_node() can't handle endian conversion");
+ f->ret = -EINVAL;
+ goto unlock;
+ }
+
+ if (darray_push(&f->nodes, n))
+ f->ret = -ENOMEM;
+unlock:
+ mutex_unlock(&f->lock);
+ }
+}
+
+static int read_btree_nodes_worker(void *p)
+{
+ struct find_btree_nodes_worker *w = p;
+ struct bch_fs *c = container_of(w->f, struct bch_fs, found_btree_nodes);
+ struct bch_dev *ca = w->ca;
+ void *buf = (void *) __get_free_page(GFP_KERNEL);
+ struct bio *bio = bio_alloc(NULL, 1, 0, GFP_KERNEL);
+ unsigned long last_print = jiffies;
+
+ if (!buf || !bio) {
+ bch_err(c, "read_btree_nodes_worker: error allocating bio/buf");
+ w->f->ret = -ENOMEM;
+ goto err;
+ }
+
+ for (u64 bucket = ca->mi.first_bucket; bucket < ca->mi.nbuckets; bucket++)
+ for (unsigned bucket_offset = 0;
+ bucket_offset + btree_sectors(c) <= ca->mi.bucket_size;
+ bucket_offset += btree_sectors(c)) {
+ if (time_after(jiffies, last_print + HZ * 30)) {
+ u64 cur_sector = bucket * ca->mi.bucket_size + bucket_offset;
+ u64 end_sector = ca->mi.nbuckets * ca->mi.bucket_size;
+
+ bch_info(ca, "%s: %2u%% done", __func__,
+ (unsigned) div64_u64(cur_sector * 100, end_sector));
+ last_print = jiffies;
+ }
+
+ u64 sector = bucket * ca->mi.bucket_size + bucket_offset;
+
+ if (c->sb.version_upgrade_complete >= bcachefs_metadata_version_mi_btree_bitmap &&
+ !bch2_dev_btree_bitmap_marked_sectors(ca, sector, btree_sectors(c)))
+ continue;
+
+ try_read_btree_node(w->f, ca, bio, buf, sector);
+ }
+err:
+ bio_put(bio);
+ free_page((unsigned long) buf);
+ percpu_ref_get(&ca->io_ref);
+ closure_put(w->cl);
+ kfree(w);
+ return 0;
+}
+
+static int read_btree_nodes(struct find_btree_nodes *f)
+{
+ struct bch_fs *c = container_of(f, struct bch_fs, found_btree_nodes);
+ struct closure cl;
+ int ret = 0;
+
+ closure_init_stack(&cl);
+
+ for_each_online_member(c, ca) {
+ if (!(ca->mi.data_allowed & BIT(BCH_DATA_btree)))
+ continue;
+
+ struct find_btree_nodes_worker *w = kmalloc(sizeof(*w), GFP_KERNEL);
+ struct task_struct *t;
+
+ if (!w) {
+ percpu_ref_put(&ca->io_ref);
+ ret = -ENOMEM;
+ goto err;
+ }
+
+ percpu_ref_get(&ca->io_ref);
+ closure_get(&cl);
+ w->cl = &cl;
+ w->f = f;
+ w->ca = ca;
+
+ t = kthread_run(read_btree_nodes_worker, w, "read_btree_nodes/%s", ca->name);
+ ret = IS_ERR_OR_NULL(t);
+ if (ret) {
+ percpu_ref_put(&ca->io_ref);
+ closure_put(&cl);
+ f->ret = ret;
+ bch_err(c, "error starting kthread: %i", ret);
+ break;
+ }
+ }
+err:
+ closure_sync(&cl);
+ return f->ret ?: ret;
+}
+
+static void bubble_up(struct found_btree_node *n, struct found_btree_node *end)
+{
+ while (n + 1 < end &&
+ found_btree_node_cmp_pos(n, n + 1) > 0) {
+ swap(n[0], n[1]);
+ n++;
+ }
+}
+
+static int handle_overwrites(struct bch_fs *c,
+ struct found_btree_node *start,
+ struct found_btree_node *end)
+{
+ struct found_btree_node *n;
+again:
+ for (n = start + 1;
+ n < end &&
+ n->btree_id == start->btree_id &&
+ n->level == start->level &&
+ bpos_lt(n->min_key, start->max_key);
+ n++) {
+ int cmp = found_btree_node_cmp_time(start, n);
+
+ if (cmp > 0) {
+ if (bpos_cmp(start->max_key, n->max_key) >= 0)
+ n->overwritten = true;
+ else {
+ n->range_updated = true;
+ n->min_key = bpos_successor(start->max_key);
+ n->range_updated = true;
+ bubble_up(n, end);
+ goto again;
+ }
+ } else if (cmp < 0) {
+ BUG_ON(bpos_cmp(n->min_key, start->min_key) <= 0);
+
+ start->max_key = bpos_predecessor(n->min_key);
+ start->range_updated = true;
+ } else if (n->level) {
+ n->overwritten = true;
+ } else {
+ struct printbuf buf = PRINTBUF;
+
+ prt_str(&buf, "overlapping btree nodes with same seq! halting\n ");
+ found_btree_node_to_text(&buf, c, start);
+ prt_str(&buf, "\n ");
+ found_btree_node_to_text(&buf, c, n);
+ bch_err(c, "%s", buf.buf);
+ printbuf_exit(&buf);
+ return -BCH_ERR_fsck_repair_unimplemented;
+ }
+ }
+
+ return 0;
+}
+
+int bch2_scan_for_btree_nodes(struct bch_fs *c)
+{
+ struct find_btree_nodes *f = &c->found_btree_nodes;
+ struct printbuf buf = PRINTBUF;
+ size_t dst;
+ int ret = 0;
+
+ if (f->nodes.nr)
+ return 0;
+
+ mutex_init(&f->lock);
+
+ ret = read_btree_nodes(f);
+ if (ret)
+ return ret;
+
+ if (!f->nodes.nr) {
+ bch_err(c, "%s: no btree nodes found", __func__);
+ ret = -EINVAL;
+ goto err;
+ }
+
+ if (0 && c->opts.verbose) {
+ printbuf_reset(&buf);
+ prt_printf(&buf, "%s: nodes found:\n", __func__);
+ found_btree_nodes_to_text(&buf, c, f->nodes);
+ bch2_print_string_as_lines(KERN_INFO, buf.buf);
+ }
+
+ sort(f->nodes.data, f->nodes.nr, sizeof(f->nodes.data[0]), found_btree_node_cmp_cookie, NULL);
+
+ dst = 0;
+ darray_for_each(f->nodes, i) {
+ struct found_btree_node *prev = dst ? f->nodes.data + dst - 1 : NULL;
+
+ if (prev &&
+ prev->cookie == i->cookie) {
+ if (prev->nr_ptrs == ARRAY_SIZE(prev->ptrs)) {
+ bch_err(c, "%s: found too many replicas for btree node", __func__);
+ ret = -EINVAL;
+ goto err;
+ }
+ prev->ptrs[prev->nr_ptrs++] = i->ptrs[0];
+ } else {
+ f->nodes.data[dst++] = *i;
+ }
+ }
+ f->nodes.nr = dst;
+
+ sort(f->nodes.data, f->nodes.nr, sizeof(f->nodes.data[0]), found_btree_node_cmp_pos, NULL);
+
+ if (0 && c->opts.verbose) {
+ printbuf_reset(&buf);
+ prt_printf(&buf, "%s: nodes after merging replicas:\n", __func__);
+ found_btree_nodes_to_text(&buf, c, f->nodes);
+ bch2_print_string_as_lines(KERN_INFO, buf.buf);
+ }
+
+ dst = 0;
+ darray_for_each(f->nodes, i) {
+ if (i->overwritten)
+ continue;
+
+ ret = handle_overwrites(c, i, &darray_top(f->nodes));
+ if (ret)
+ goto err;
+
+ BUG_ON(i->overwritten);
+ f->nodes.data[dst++] = *i;
+ }
+ f->nodes.nr = dst;
+
+ if (c->opts.verbose) {
+ printbuf_reset(&buf);
+ prt_printf(&buf, "%s: nodes found after overwrites:\n", __func__);
+ found_btree_nodes_to_text(&buf, c, f->nodes);
+ bch2_print_string_as_lines(KERN_INFO, buf.buf);
+ }
+
+ eytzinger0_sort(f->nodes.data, f->nodes.nr, sizeof(f->nodes.data[0]), found_btree_node_cmp_pos, NULL);
+err:
+ printbuf_exit(&buf);
+ return ret;
+}
+
+static int found_btree_node_range_start_cmp(const void *_l, const void *_r)
+{
+ const struct found_btree_node *l = _l;
+ const struct found_btree_node *r = _r;
+
+ return cmp_int(l->btree_id, r->btree_id) ?:
+ -cmp_int(l->level, r->level) ?:
+ bpos_cmp(l->max_key, r->min_key);
+}
+
+#define for_each_found_btree_node_in_range(_f, _search, _idx) \
+ for (size_t _idx = eytzinger0_find_gt((_f)->nodes.data, (_f)->nodes.nr, \
+ sizeof((_f)->nodes.data[0]), \
+ found_btree_node_range_start_cmp, &search); \
+ _idx < (_f)->nodes.nr && \
+ (_f)->nodes.data[_idx].btree_id == _search.btree_id && \
+ (_f)->nodes.data[_idx].level == _search.level && \
+ bpos_lt((_f)->nodes.data[_idx].min_key, _search.max_key); \
+ _idx = eytzinger0_next(_idx, (_f)->nodes.nr))
+
+bool bch2_btree_node_is_stale(struct bch_fs *c, struct btree *b)
+{
+ struct find_btree_nodes *f = &c->found_btree_nodes;
+
+ struct found_btree_node search = {
+ .btree_id = b->c.btree_id,
+ .level = b->c.level,
+ .min_key = b->data->min_key,
+ .max_key = b->key.k.p,
+ };
+
+ for_each_found_btree_node_in_range(f, search, idx)
+ if (f->nodes.data[idx].seq > BTREE_NODE_SEQ(b->data))
+ return true;
+ return false;
+}
+
+bool bch2_btree_has_scanned_nodes(struct bch_fs *c, enum btree_id btree)
+{
+ struct found_btree_node search = {
+ .btree_id = btree,
+ .level = 0,
+ .min_key = POS_MIN,
+ .max_key = SPOS_MAX,
+ };
+
+ for_each_found_btree_node_in_range(&c->found_btree_nodes, search, idx)
+ return true;
+ return false;
+}
+
+int bch2_get_scanned_nodes(struct bch_fs *c, enum btree_id btree,
+ unsigned level, struct bpos node_min, struct bpos node_max)
+{
+ if (btree_id_is_alloc(btree))
+ return 0;
+
+ struct find_btree_nodes *f = &c->found_btree_nodes;
+
+ int ret = bch2_run_explicit_recovery_pass(c, BCH_RECOVERY_PASS_scan_for_btree_nodes);
+ if (ret)
+ return ret;
+
+ if (c->opts.verbose) {
+ struct printbuf buf = PRINTBUF;
+
+ prt_printf(&buf, "recovering %s l=%u ", bch2_btree_id_str(btree), level);
+ bch2_bpos_to_text(&buf, node_min);
+ prt_str(&buf, " - ");
+ bch2_bpos_to_text(&buf, node_max);
+
+ bch_info(c, "%s(): %s", __func__, buf.buf);
+ printbuf_exit(&buf);
+ }
+
+ struct found_btree_node search = {
+ .btree_id = btree,
+ .level = level,
+ .min_key = node_min,
+ .max_key = node_max,
+ };
+
+ for_each_found_btree_node_in_range(f, search, idx) {
+ struct found_btree_node n = f->nodes.data[idx];
+
+ n.range_updated |= bpos_lt(n.min_key, node_min);
+ n.min_key = bpos_max(n.min_key, node_min);
+
+ n.range_updated |= bpos_gt(n.max_key, node_max);
+ n.max_key = bpos_min(n.max_key, node_max);
+
+ struct { __BKEY_PADDED(k, BKEY_BTREE_PTR_VAL_U64s_MAX); } tmp;
+
+ found_btree_node_to_key(&tmp.k, &n);
+
+ struct printbuf buf = PRINTBUF;
+ bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(&tmp.k));
+ bch_verbose(c, "%s(): recovering %s", __func__, buf.buf);
+ printbuf_exit(&buf);
+
+ BUG_ON(bch2_bkey_invalid(c, bkey_i_to_s_c(&tmp.k), BKEY_TYPE_btree, 0, NULL));
+
+ ret = bch2_journal_key_insert(c, btree, level + 1, &tmp.k);
+ if (ret)
+ return ret;
+ }
+
+ return 0;
+}
+
+void bch2_find_btree_nodes_exit(struct find_btree_nodes *f)
+{
+ darray_exit(&f->nodes);
+}
--- /dev/null
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _BCACHEFS_BTREE_NODE_SCAN_H
+#define _BCACHEFS_BTREE_NODE_SCAN_H
+
+int bch2_scan_for_btree_nodes(struct bch_fs *);
+bool bch2_btree_node_is_stale(struct bch_fs *, struct btree *);
+bool bch2_btree_has_scanned_nodes(struct bch_fs *, enum btree_id);
+int bch2_get_scanned_nodes(struct bch_fs *, enum btree_id, unsigned, struct bpos, struct bpos);
+void bch2_find_btree_nodes_exit(struct find_btree_nodes *);
+
+#endif /* _BCACHEFS_BTREE_NODE_SCAN_H */
--- /dev/null
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _BCACHEFS_BTREE_NODE_SCAN_TYPES_H
+#define _BCACHEFS_BTREE_NODE_SCAN_TYPES_H
+
+#include "darray.h"
+
+struct found_btree_node {
+ bool range_updated:1;
+ bool overwritten:1;
+ u8 btree_id;
+ u8 level;
+ u32 seq;
+ u64 cookie;
+
+ struct bpos min_key;
+ struct bpos max_key;
+
+ unsigned nr_ptrs;
+ struct bch_extent_ptr ptrs[BCH_REPLICAS_MAX];
+};
+
+typedef DARRAY(struct found_btree_node) found_btree_nodes;
+
+struct find_btree_nodes {
+ int ret;
+ struct mutex lock;
+ found_btree_nodes nodes;
+};
+
+#endif /* _BCACHEFS_BTREE_NODE_SCAN_TYPES_H */
!(i->flags & BTREE_UPDATE_INTERNAL_SNAPSHOT_NODE) &&
test_bit(JOURNAL_REPLAY_DONE, &trans->c->journal.flags) &&
i->k->k.p.snapshot &&
- bch2_snapshot_is_internal_node(trans->c, i->k->k.p.snapshot));
+ bch2_snapshot_is_internal_node(trans->c, i->k->k.p.snapshot) > 0);
}
static __always_inline int bch2_trans_journal_res_get(struct btree_trans *trans,
struct bkey_cached *ck = (void *) path->l[0].b;
unsigned new_u64s;
struct bkey_i *new_k;
+ unsigned watermark = flags & BCH_WATERMARK_MASK;
EBUG_ON(path->level);
- if (!test_bit(BKEY_CACHED_DIRTY, &ck->flags) &&
- bch2_btree_key_cache_must_wait(c) &&
- !(flags & BCH_TRANS_COMMIT_journal_reclaim))
+ if (watermark < BCH_WATERMARK_reclaim &&
+ !test_bit(BKEY_CACHED_DIRTY, &ck->flags) &&
+ bch2_btree_key_cache_must_wait(c))
return -BCH_ERR_btree_insert_need_journal_reclaim;
/*
}
static int run_btree_triggers(struct btree_trans *trans, enum btree_id btree_id,
- struct btree_insert_entry *btree_id_start)
+ unsigned btree_id_start)
{
- struct btree_insert_entry *i;
bool trans_trigger_run;
int ret, overwrite;
do {
trans_trigger_run = false;
- for (i = btree_id_start;
- i < trans->updates + trans->nr_updates && i->btree_id <= btree_id;
+ for (unsigned i = btree_id_start;
+ i < trans->nr_updates && trans->updates[i].btree_id <= btree_id;
i++) {
- if (i->btree_id != btree_id)
+ if (trans->updates[i].btree_id != btree_id)
continue;
- ret = run_one_trans_trigger(trans, i, overwrite);
+ ret = run_one_trans_trigger(trans, trans->updates + i, overwrite);
if (ret < 0)
return ret;
if (ret)
static int bch2_trans_commit_run_triggers(struct btree_trans *trans)
{
- struct btree_insert_entry *btree_id_start = trans->updates;
- unsigned btree_id = 0;
+ unsigned btree_id = 0, btree_id_start = 0;
int ret = 0;
/*
if (btree_id == BTREE_ID_alloc)
continue;
- while (btree_id_start < trans->updates + trans->nr_updates &&
- btree_id_start->btree_id < btree_id)
+ while (btree_id_start < trans->nr_updates &&
+ trans->updates[btree_id_start].btree_id < btree_id)
btree_id_start++;
ret = run_btree_triggers(trans, btree_id, btree_id_start);
return ret;
}
- trans_for_each_update(trans, i) {
+ for (unsigned idx = 0; idx < trans->nr_updates; idx++) {
+ struct btree_insert_entry *i = trans->updates + idx;
+
if (i->btree_id > BTREE_ID_alloc)
break;
if (i->btree_id == BTREE_ID_alloc) {
- ret = run_btree_triggers(trans, BTREE_ID_alloc, i);
+ ret = run_btree_triggers(trans, BTREE_ID_alloc, idx);
if (ret)
return ret;
break;
struct bch_fs *c = trans->c;
int ret = 0, u64s_delta = 0;
- trans_for_each_update(trans, i) {
+ for (unsigned idx = 0; idx < trans->nr_updates; idx++) {
+ struct btree_insert_entry *i = trans->updates + idx;
if (i->cached)
continue;
int ret, unsigned long trace_ip)
{
struct bch_fs *c = trans->c;
+ enum bch_watermark watermark = flags & BCH_WATERMARK_MASK;
switch (ret) {
case -BCH_ERR_btree_insert_btree_node_full:
* flag
*/
if ((flags & BCH_TRANS_COMMIT_journal_reclaim) &&
- (flags & BCH_WATERMARK_MASK) != BCH_WATERMARK_reclaim) {
+ watermark < BCH_WATERMARK_reclaim) {
ret = -BCH_ERR_journal_reclaim_would_deadlock;
break;
}
struct btree_bkey_cached_common c;
unsigned long flags;
+ unsigned long btree_trans_barrier_seq;
u16 u64s;
bool valid;
- u32 btree_trans_barrier_seq;
struct bkey_cached_key key;
struct rhash_head hash;
unsigned long ip_allocated;
};
+/* Number of btree paths we preallocate, usually enough */
#define BTREE_ITER_INITIAL 64
+/*
+ * Lmiit for btree_trans_too_many_iters(); this is enough that almost all code
+ * paths should run inside this limit, and if they don't it usually indicates a
+ * bug (leaking/duplicated btree paths).
+ *
+ * exception: some fsck paths
+ *
+ * bugs with excessive path usage seem to have possibly been eliminated now, so
+ * we might consider eliminating this (and btree_trans_too_many_iter()) at some
+ * point.
+ */
+#define BTREE_ITER_NORMAL_LIMIT 256
+/* never exceed limit */
#define BTREE_ITER_MAX (1U << 10)
struct btree_trans_commit_hook;
struct bkey_i *update;
int ret;
+ if (unlikely(trans->journal_replay_not_finished))
+ return 0;
+
update = bch2_bkey_make_mut_noupdate(trans, k);
ret = PTR_ERR_OR_ZERO(update);
if (ret)
struct bch_fs *c = trans->c;
int ret;
+ if (unlikely(trans->journal_replay_not_finished))
+ return 0;
+
ret = bch2_key_has_snapshot_overwrites(trans, iter->btree_id, insert->k.p) ?:
bch2_key_has_snapshot_overwrites(trans, iter->btree_id, k.k->p);
if (ret < 0)
#include "bcachefs.h"
#include "alloc_foreground.h"
+#include "bkey_buf.h"
#include "bkey_methods.h"
#include "btree_cache.h"
#include "btree_gc.h"
#include "journal.h"
#include "journal_reclaim.h"
#include "keylist.h"
+#include "recovery_passes.h"
#include "replicas.h"
+#include "sb-members.h"
#include "super-io.h"
#include "trace.h"
#include <linux/random.h>
+static const char * const bch2_btree_update_modes[] = {
+#define x(t) #t,
+ BTREE_UPDATE_MODES()
+#undef x
+ NULL
+};
+
static int bch2_btree_insert_node(struct btree_update *, struct btree_trans *,
btree_path_idx_t, struct btree *, struct keylist *);
static void bch2_btree_update_add_new_node(struct btree_update *, struct btree *);
return path_idx;
}
-/* Debug code: */
-
/*
* Verify that child nodes correctly span parent node's range:
*/
-static void btree_node_interior_verify(struct bch_fs *c, struct btree *b)
+int bch2_btree_node_check_topology(struct btree_trans *trans, struct btree *b)
{
-#ifdef CONFIG_BCACHEFS_DEBUG
- struct bpos next_node = b->data->min_key;
- struct btree_node_iter iter;
+ struct bch_fs *c = trans->c;
+ struct bpos node_min = b->key.k.type == KEY_TYPE_btree_ptr_v2
+ ? bkey_i_to_btree_ptr_v2(&b->key)->v.min_key
+ : b->data->min_key;
+ struct btree_and_journal_iter iter;
struct bkey_s_c k;
- struct bkey_s_c_btree_ptr_v2 bp;
- struct bkey unpacked;
- struct printbuf buf1 = PRINTBUF, buf2 = PRINTBUF;
+ struct printbuf buf = PRINTBUF;
+ struct bkey_buf prev;
+ int ret = 0;
- BUG_ON(!b->c.level);
+ BUG_ON(b->key.k.type == KEY_TYPE_btree_ptr_v2 &&
+ !bpos_eq(bkey_i_to_btree_ptr_v2(&b->key)->v.min_key,
+ b->data->min_key));
- if (!test_bit(JOURNAL_REPLAY_DONE, &c->journal.flags))
- return;
+ if (!b->c.level)
+ return 0;
- bch2_btree_node_iter_init_from_start(&iter, b);
+ bch2_bkey_buf_init(&prev);
+ bkey_init(&prev.k->k);
+ bch2_btree_and_journal_iter_init_node_iter(trans, &iter, b);
- while (1) {
- k = bch2_btree_node_iter_peek_unpack(&iter, b, &unpacked);
+ while ((k = bch2_btree_and_journal_iter_peek(&iter)).k) {
if (k.k->type != KEY_TYPE_btree_ptr_v2)
- break;
- bp = bkey_s_c_to_btree_ptr_v2(k);
+ goto out;
- if (!bpos_eq(next_node, bp.v->min_key)) {
- bch2_dump_btree_node(c, b);
- bch2_bpos_to_text(&buf1, next_node);
- bch2_bpos_to_text(&buf2, bp.v->min_key);
- panic("expected next min_key %s got %s\n", buf1.buf, buf2.buf);
- }
+ struct bkey_s_c_btree_ptr_v2 bp = bkey_s_c_to_btree_ptr_v2(k);
- bch2_btree_node_iter_advance(&iter, b);
+ struct bpos expected_min = bkey_deleted(&prev.k->k)
+ ? node_min
+ : bpos_successor(prev.k->k.p);
- if (bch2_btree_node_iter_end(&iter)) {
- if (!bpos_eq(k.k->p, b->key.k.p)) {
- bch2_dump_btree_node(c, b);
- bch2_bpos_to_text(&buf1, b->key.k.p);
- bch2_bpos_to_text(&buf2, k.k->p);
- panic("expected end %s got %s\n", buf1.buf, buf2.buf);
- }
- break;
+ if (!bpos_eq(expected_min, bp.v->min_key)) {
+ bch2_topology_error(c);
+
+ printbuf_reset(&buf);
+ prt_str(&buf, "end of prev node doesn't match start of next node\n"),
+ prt_printf(&buf, " in btree %s level %u node ",
+ bch2_btree_id_str(b->c.btree_id), b->c.level);
+ bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(&b->key));
+ prt_str(&buf, "\n prev ");
+ bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(prev.k));
+ prt_str(&buf, "\n next ");
+ bch2_bkey_val_to_text(&buf, c, k);
+
+ need_fsck_err(c, btree_node_topology_bad_min_key, "%s", buf.buf);
+ goto topology_repair;
}
- next_node = bpos_successor(k.k->p);
+ bch2_bkey_buf_reassemble(&prev, c, k);
+ bch2_btree_and_journal_iter_advance(&iter);
+ }
+
+ if (bkey_deleted(&prev.k->k)) {
+ bch2_topology_error(c);
+
+ printbuf_reset(&buf);
+ prt_str(&buf, "empty interior node\n");
+ prt_printf(&buf, " in btree %s level %u node ",
+ bch2_btree_id_str(b->c.btree_id), b->c.level);
+ bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(&b->key));
+
+ need_fsck_err(c, btree_node_topology_empty_interior_node, "%s", buf.buf);
+ goto topology_repair;
+ } else if (!bpos_eq(prev.k->k.p, b->key.k.p)) {
+ bch2_topology_error(c);
+
+ printbuf_reset(&buf);
+ prt_str(&buf, "last child node doesn't end at end of parent node\n");
+ prt_printf(&buf, " in btree %s level %u node ",
+ bch2_btree_id_str(b->c.btree_id), b->c.level);
+ bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(&b->key));
+ prt_str(&buf, "\n last key ");
+ bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(prev.k));
+
+ need_fsck_err(c, btree_node_topology_bad_max_key, "%s", buf.buf);
+ goto topology_repair;
}
-#endif
+out:
+fsck_err:
+ bch2_btree_and_journal_iter_exit(&iter);
+ bch2_bkey_buf_exit(&prev, c);
+ printbuf_exit(&buf);
+ return ret;
+topology_repair:
+ if ((c->recovery_passes_explicit & BIT_ULL(BCH_RECOVERY_PASS_check_topology)) &&
+ c->curr_recovery_pass > BCH_RECOVERY_PASS_check_topology) {
+ bch2_inconsistent_error(c);
+ ret = -BCH_ERR_btree_need_topology_repair;
+ } else {
+ ret = bch2_run_explicit_recovery_pass(c, BCH_RECOVERY_PASS_check_topology);
+ }
+ goto out;
}
/* Calculate ideal packed bkey format for new btree nodes: */
struct open_buckets obs = { .nr = 0 };
struct bch_devs_list devs_have = (struct bch_devs_list) { 0 };
enum bch_watermark watermark = flags & BCH_WATERMARK_MASK;
- unsigned nr_reserve = watermark > BCH_WATERMARK_reclaim
+ unsigned nr_reserve = watermark < BCH_WATERMARK_reclaim
? BTREE_NODE_RESERVE
: 0;
int ret;
bch2_keylist_push(keys);
}
+static bool btree_update_new_nodes_marked_sb(struct btree_update *as)
+{
+ for_each_keylist_key(&as->new_keys, k)
+ if (!bch2_dev_btree_bitmap_marked(as->c, bkey_i_to_s_c(k)))
+ return false;
+ return true;
+}
+
+static void btree_update_new_nodes_mark_sb(struct btree_update *as)
+{
+ struct bch_fs *c = as->c;
+
+ mutex_lock(&c->sb_lock);
+ for_each_keylist_key(&as->new_keys, k)
+ bch2_dev_btree_bitmap_mark(c, bkey_i_to_s_c(k));
+
+ bch2_write_super(c);
+ mutex_unlock(&c->sb_lock);
+}
+
/*
* The transactional part of an interior btree node update, where we journal the
* update we did to the interior node and update alloc info:
if (ret)
goto err;
+ if (!btree_update_new_nodes_marked_sb(as))
+ btree_update_new_nodes_mark_sb(as);
+
/*
* Wait for any in flight writes to finish before we free the old nodes
* on disk:
* which may require allocations as well.
*/
ret = commit_do(trans, &as->disk_res, &journal_seq,
- BCH_WATERMARK_reclaim|
+ BCH_WATERMARK_interior_updates|
BCH_TRANS_COMMIT_no_enospc|
BCH_TRANS_COMMIT_no_check_rw|
BCH_TRANS_COMMIT_journal_reclaim,
bch2_fs_fatal_err_on(ret && !bch2_journal_error(&c->journal), c,
"%s", bch2_err_str(ret));
err:
- if (as->b) {
-
- b = as->b;
+ /*
+ * We have to be careful because another thread might be getting ready
+ * to free as->b and calling btree_update_reparent() on us - we'll
+ * recheck under btree_update_lock below:
+ */
+ b = READ_ONCE(as->b);
+ if (b) {
btree_path_idx_t path_idx = get_unlocked_mut_path(trans,
as->btree_id, b->c.level, b->key.k.p);
struct btree_path *path = trans->paths + path_idx;
{
struct bch_fs *c = as->c;
- mutex_lock(&c->btree_interior_update_lock);
- list_add_tail(&as->unwritten_list, &c->btree_interior_updates_unwritten);
-
- BUG_ON(as->mode != BTREE_INTERIOR_NO_UPDATE);
+ BUG_ON(as->mode != BTREE_UPDATE_none);
+ BUG_ON(as->update_level_end < b->c.level);
BUG_ON(!btree_node_dirty(b));
BUG_ON(!b->c.level);
- as->mode = BTREE_INTERIOR_UPDATING_NODE;
+ mutex_lock(&c->btree_interior_update_lock);
+ list_add_tail(&as->unwritten_list, &c->btree_interior_updates_unwritten);
+
+ as->mode = BTREE_UPDATE_node;
as->b = b;
+ as->update_level_end = b->c.level;
set_btree_node_write_blocked(b);
list_add(&as->write_blocked_list, &b->write_blocked);
lockdep_assert_held(&c->btree_interior_update_lock);
child->b = NULL;
- child->mode = BTREE_INTERIOR_UPDATING_AS;
+ child->mode = BTREE_UPDATE_update;
bch2_journal_pin_copy(&c->journal, &as->journal, &child->journal,
bch2_update_reparent_journal_pin_flush);
struct bkey_i *insert = &b->key;
struct bch_fs *c = as->c;
- BUG_ON(as->mode != BTREE_INTERIOR_NO_UPDATE);
+ BUG_ON(as->mode != BTREE_UPDATE_none);
BUG_ON(as->journal_u64s + jset_u64s(insert->k.u64s) >
ARRAY_SIZE(as->journal_entries));
mutex_lock(&c->btree_interior_update_lock);
list_add_tail(&as->unwritten_list, &c->btree_interior_updates_unwritten);
- as->mode = BTREE_INTERIOR_UPDATING_ROOT;
+ as->mode = BTREE_UPDATE_root;
mutex_unlock(&c->btree_interior_update_lock);
}
struct bch_fs *c = as->c;
u64 start_time = as->start_time;
- BUG_ON(as->mode == BTREE_INTERIOR_NO_UPDATE);
+ BUG_ON(as->mode == BTREE_UPDATE_none);
if (as->took_gc_lock)
up_read(&as->c->gc_lock);
static struct btree_update *
bch2_btree_update_start(struct btree_trans *trans, struct btree_path *path,
- unsigned level, bool split, unsigned flags)
+ unsigned level_start, bool split, unsigned flags)
{
struct bch_fs *c = trans->c;
struct btree_update *as;
int disk_res_flags = (flags & BCH_TRANS_COMMIT_no_enospc)
? BCH_DISK_RESERVATION_NOFAIL : 0;
unsigned nr_nodes[2] = { 0, 0 };
- unsigned update_level = level;
+ unsigned level_end = level_start;
enum bch_watermark watermark = flags & BCH_WATERMARK_MASK;
int ret = 0;
u32 restart_count = trans->restart_count;
flags &= ~BCH_WATERMARK_MASK;
flags |= watermark;
- if (watermark < c->journal.watermark) {
- struct journal_res res = { 0 };
- unsigned journal_flags = watermark|JOURNAL_RES_GET_CHECK;
-
- if ((flags & BCH_TRANS_COMMIT_journal_reclaim) &&
- watermark != BCH_WATERMARK_reclaim)
- journal_flags |= JOURNAL_RES_GET_NONBLOCK;
+ if (watermark < BCH_WATERMARK_reclaim &&
+ test_bit(JOURNAL_SPACE_LOW, &c->journal.flags)) {
+ if (flags & BCH_TRANS_COMMIT_journal_reclaim)
+ return ERR_PTR(-BCH_ERR_journal_reclaim_would_deadlock);
- ret = drop_locks_do(trans,
- bch2_journal_res_get(&c->journal, &res, 1, journal_flags));
- if (bch2_err_matches(ret, BCH_ERR_operation_blocked))
- ret = -BCH_ERR_journal_reclaim_would_deadlock;
+ bch2_trans_unlock(trans);
+ wait_event(c->journal.wait, !test_bit(JOURNAL_SPACE_LOW, &c->journal.flags));
+ ret = bch2_trans_relock(trans);
if (ret)
return ERR_PTR(ret);
}
while (1) {
- nr_nodes[!!update_level] += 1 + split;
- update_level++;
+ nr_nodes[!!level_end] += 1 + split;
+ level_end++;
- ret = bch2_btree_path_upgrade(trans, path, update_level + 1);
+ ret = bch2_btree_path_upgrade(trans, path, level_end + 1);
if (ret)
return ERR_PTR(ret);
- if (!btree_path_node(path, update_level)) {
+ if (!btree_path_node(path, level_end)) {
/* Allocating new root? */
nr_nodes[1] += split;
- update_level = BTREE_MAX_DEPTH;
+ level_end = BTREE_MAX_DEPTH;
break;
}
* Always check for space for two keys, even if we won't have to
* split at prior level - it might have been a merge instead:
*/
- if (bch2_btree_node_insert_fits(path->l[update_level].b,
+ if (bch2_btree_node_insert_fits(path->l[level_end].b,
BKEY_BTREE_PTR_U64s_MAX * 2))
break;
- split = path->l[update_level].b->nr.live_u64s > BTREE_SPLIT_THRESHOLD(c);
+ split = path->l[level_end].b->nr.live_u64s > BTREE_SPLIT_THRESHOLD(c);
}
if (!down_read_trylock(&c->gc_lock)) {
as = mempool_alloc(&c->btree_interior_update_pool, GFP_NOFS);
memset(as, 0, sizeof(*as));
closure_init(&as->cl, NULL);
- as->c = c;
- as->start_time = start_time;
- as->ip_started = _RET_IP_;
- as->mode = BTREE_INTERIOR_NO_UPDATE;
- as->took_gc_lock = true;
- as->btree_id = path->btree_id;
- as->update_level = update_level;
+ as->c = c;
+ as->start_time = start_time;
+ as->ip_started = _RET_IP_;
+ as->mode = BTREE_UPDATE_none;
+ as->watermark = watermark;
+ as->took_gc_lock = true;
+ as->btree_id = path->btree_id;
+ as->update_level_start = level_start;
+ as->update_level_end = level_end;
INIT_LIST_HEAD(&as->list);
INIT_LIST_HEAD(&as->unwritten_list);
INIT_LIST_HEAD(&as->write_blocked_list);
*/
if (bch2_err_matches(ret, ENOSPC) &&
(flags & BCH_TRANS_COMMIT_journal_reclaim) &&
- watermark != BCH_WATERMARK_reclaim) {
+ watermark < BCH_WATERMARK_reclaim) {
ret = -BCH_ERR_journal_reclaim_would_deadlock;
goto err;
}
bch2_recalc_btree_reserve(c);
}
-static void bch2_btree_set_root(struct btree_update *as,
- struct btree_trans *trans,
- struct btree_path *path,
- struct btree *b)
+static int bch2_btree_set_root(struct btree_update *as,
+ struct btree_trans *trans,
+ struct btree_path *path,
+ struct btree *b,
+ bool nofail)
{
struct bch_fs *c = as->c;
- struct btree *old;
trace_and_count(c, btree_node_set_root, trans, b);
- old = btree_node_root(c, b);
+ struct btree *old = btree_node_root(c, b);
/*
* Ensure no one is using the old root while we switch to the
* new root:
*/
- bch2_btree_node_lock_write_nofail(trans, path, &old->c);
+ if (nofail) {
+ bch2_btree_node_lock_write_nofail(trans, path, &old->c);
+ } else {
+ int ret = bch2_btree_node_lock_write(trans, path, &old->c);
+ if (ret)
+ return ret;
+ }
bch2_btree_set_root_inmem(c, b);
* depend on the new root would have to update the new root.
*/
bch2_btree_node_unlock_write(trans, path, old);
+ return 0;
}
/* Interior node updates: */
}
static void
-__bch2_btree_insert_keys_interior(struct btree_update *as,
- struct btree_trans *trans,
- struct btree_path *path,
- struct btree *b,
- struct btree_node_iter node_iter,
- struct keylist *keys)
+bch2_btree_insert_keys_interior(struct btree_update *as,
+ struct btree_trans *trans,
+ struct btree_path *path,
+ struct btree *b,
+ struct btree_node_iter node_iter,
+ struct keylist *keys)
{
struct bkey_i *insert = bch2_keylist_front(keys);
struct bkey_packed *k;
if (bkey_deleted(k))
continue;
+ uk = bkey_unpack_key(b, k);
+
+ if (b->c.level &&
+ u64s < n1_u64s &&
+ u64s + k->u64s >= n1_u64s &&
+ bch2_key_deleted_in_journal(trans, b->c.btree_id, b->c.level, uk.p))
+ n1_u64s += k->u64s;
+
i = u64s >= n1_u64s;
u64s += k->u64s;
- uk = bkey_unpack_key(b, k);
if (!i)
n1_pos = uk.p;
bch2_bkey_format_add_key(&format[i], &uk);
bch2_verify_btree_nr_keys(n[i]);
- if (b->c.level)
- btree_node_interior_verify(as->c, n[i]);
+ BUG_ON(bch2_btree_node_check_topology(trans, n[i]));
}
}
bch2_btree_node_iter_init(&node_iter, b, &bch2_keylist_front(keys)->k.p);
- __bch2_btree_insert_keys_interior(as, trans, path, b, node_iter, keys);
+ bch2_btree_insert_keys_interior(as, trans, path, b, node_iter, keys);
- btree_node_interior_verify(as->c, b);
+ BUG_ON(bch2_btree_node_check_topology(trans, b));
}
}
u64 start_time = local_clock();
int ret = 0;
+ bch2_verify_btree_nr_keys(b);
BUG_ON(!parent && (b != btree_node_root(c, b)));
BUG_ON(parent && !btree_node_intent_locked(trans->paths + path, b->c.level + 1));
+ ret = bch2_btree_node_check_topology(trans, b);
+ if (ret)
+ return ret;
+
bch2_btree_interior_update_will_free_node(as, b);
if (b->nr.live_u64s > BTREE_SPLIT_THRESHOLD(c)) {
if (parent) {
/* Split a non root node */
ret = bch2_btree_insert_node(as, trans, path, parent, &as->parent_keys);
- if (ret)
- goto err;
} else if (n3) {
- bch2_btree_set_root(as, trans, trans->paths + path, n3);
+ ret = bch2_btree_set_root(as, trans, trans->paths + path, n3, false);
} else {
/* Root filled up but didn't need to be split */
- bch2_btree_set_root(as, trans, trans->paths + path, n1);
+ ret = bch2_btree_set_root(as, trans, trans->paths + path, n1, false);
}
+ if (ret)
+ goto err;
+
if (n3) {
bch2_btree_update_get_open_buckets(as, n3);
bch2_btree_node_write(c, n3, SIX_LOCK_intent, 0);
goto out;
}
-static void
-bch2_btree_insert_keys_interior(struct btree_update *as,
- struct btree_trans *trans,
- struct btree_path *path,
- struct btree *b,
- struct keylist *keys)
-{
- struct btree_path *linked;
- unsigned i;
-
- __bch2_btree_insert_keys_interior(as, trans, path, b,
- path->l[b->c.level].iter, keys);
-
- btree_update_updated_node(as, b);
-
- trans_for_each_path_with_node(trans, b, linked, i)
- bch2_btree_node_iter_peek(&linked->l[b->c.level].iter, b);
-
- bch2_trans_verify_paths(trans);
-}
-
/**
* bch2_btree_insert_node - insert bkeys into a given btree node
*
struct keylist *keys)
{
struct bch_fs *c = as->c;
- struct btree_path *path = trans->paths + path_idx;
+ struct btree_path *path = trans->paths + path_idx, *linked;
+ unsigned i;
int old_u64s = le16_to_cpu(btree_bset_last(b)->u64s);
int old_live_u64s = b->nr.live_u64s;
int live_u64s_added, u64s_added;
goto split;
}
- btree_node_interior_verify(c, b);
+ ret = bch2_btree_node_check_topology(trans, b);
+ if (ret) {
+ bch2_btree_node_unlock_write(trans, path, b);
+ return ret;
+ }
+
+ bch2_btree_insert_keys_interior(as, trans, path, b,
+ path->l[b->c.level].iter, keys);
+
+ trans_for_each_path_with_node(trans, b, linked, i)
+ bch2_btree_node_iter_peek(&linked->l[b->c.level].iter, b);
- bch2_btree_insert_keys_interior(as, trans, path, b, keys);
+ bch2_trans_verify_paths(trans);
live_u64s_added = (int) b->nr.live_u64s - old_live_u64s;
u64s_added = (int) le16_to_cpu(btree_bset_last(b)->u64s) - old_u64s;
bch2_maybe_compact_whiteouts(c, b))
bch2_trans_node_reinit_iter(trans, b);
+ btree_update_updated_node(as, b);
bch2_btree_node_unlock_write(trans, path, b);
- btree_node_interior_verify(c, b);
+ BUG_ON(bch2_btree_node_check_topology(trans, b));
return 0;
split:
/*
* We could attempt to avoid the transaction restart, by calling
* bch2_btree_path_upgrade() and allocating more nodes:
*/
- if (b->c.level >= as->update_level) {
+ if (b->c.level >= as->update_level_end) {
trace_and_count(c, trans_restart_split_race, trans, _THIS_IP_, b);
return btree_trans_restart(trans, BCH_ERR_transaction_restart_split_race);
}
bch2_keylist_add(&as->parent_keys, &b->key);
btree_split_insert_keys(as, trans, path_idx, n, &as->parent_keys);
- bch2_btree_set_root(as, trans, path, n);
+ int ret = bch2_btree_set_root(as, trans, path, n, true);
+ BUG_ON(ret);
+
bch2_btree_update_get_open_buckets(as, n);
bch2_btree_node_write(c, n, SIX_LOCK_intent, 0);
bch2_trans_node_add(trans, path, n);
{
struct bch_fs *c = trans->c;
struct btree *b = bch2_btree_id_root(c, trans->paths[path].btree_id)->b;
+
+ if (btree_node_fake(b))
+ return bch2_btree_split_leaf(trans, path, flags);
+
struct btree_update *as =
- bch2_btree_update_start(trans, trans->paths + path,
- b->c.level, true, flags);
+ bch2_btree_update_start(trans, trans->paths + path, b->c.level, true, flags);
if (IS_ERR(as))
return PTR_ERR(as);
BUG_ON(!trans->paths[path].should_be_locked);
BUG_ON(!btree_node_locked(&trans->paths[path], level));
+ /*
+ * Work around a deadlock caused by the btree write buffer not doing
+ * merges and leaving tons of merges for us to do - we really don't need
+ * to be doing merges at all from the interior update path, and if the
+ * interior update path is generating too many new interior updates we
+ * deadlock:
+ */
+ if ((flags & BCH_WATERMARK_MASK) == BCH_WATERMARK_interior_updates)
+ return 0;
+
+ if ((flags & BCH_WATERMARK_MASK) <= BCH_WATERMARK_reclaim) {
+ flags &= ~BCH_WATERMARK_MASK;
+ flags |= BCH_WATERMARK_btree;
+ flags |= BCH_TRANS_COMMIT_journal_reclaim;
+ }
+
b = trans->paths[path].l[level].b;
if ((sib == btree_prev_sib && bpos_eq(b->data->min_key, POS_MIN)) ||
bch2_path_put(trans, new_path, true);
bch2_path_put(trans, sib_path, true);
bch2_trans_verify_locks(trans);
+ if (ret == -BCH_ERR_journal_reclaim_would_deadlock)
+ ret = 0;
+ if (!ret)
+ ret = bch2_trans_relock(trans);
return ret;
err_free_update:
bch2_btree_node_free_never_used(as, trans, n);
if (parent) {
bch2_keylist_add(&as->parent_keys, &n->key);
ret = bch2_btree_insert_node(as, trans, iter->path, parent, &as->parent_keys);
- if (ret)
- goto err;
} else {
- bch2_btree_set_root(as, trans, btree_iter_path(trans, iter), n);
+ ret = bch2_btree_set_root(as, trans, btree_iter_path(trans, iter), n, false);
}
+ if (ret)
+ goto err;
+
bch2_btree_update_get_open_buckets(as, n);
bch2_btree_node_write(c, n, SIX_LOCK_intent, 0);
bch2_btree_set_root_inmem(c, b);
}
-static int __bch2_btree_root_alloc(struct btree_trans *trans, enum btree_id id)
+static int __bch2_btree_root_alloc_fake(struct btree_trans *trans, enum btree_id id, unsigned level)
{
struct bch_fs *c = trans->c;
struct closure cl;
set_btree_node_fake(b);
set_btree_node_need_rewrite(b);
- b->c.level = 0;
+ b->c.level = level;
b->c.btree_id = id;
bkey_btree_ptr_init(&b->key);
return 0;
}
-void bch2_btree_root_alloc(struct bch_fs *c, enum btree_id id)
+void bch2_btree_root_alloc_fake(struct bch_fs *c, enum btree_id id, unsigned level)
+{
+ bch2_trans_run(c, __bch2_btree_root_alloc_fake(trans, id, level));
+}
+
+static void bch2_btree_update_to_text(struct printbuf *out, struct btree_update *as)
{
- bch2_trans_run(c, __bch2_btree_root_alloc(trans, id));
+ prt_printf(out, "%ps: btree=%s l=%u-%u watermark=%s mode=%s nodes_written=%u cl.remaining=%u journal_seq=%llu\n",
+ (void *) as->ip_started,
+ bch2_btree_id_str(as->btree_id),
+ as->update_level_start,
+ as->update_level_end,
+ bch2_watermarks[as->watermark],
+ bch2_btree_update_modes[as->mode],
+ as->nodes_written,
+ closure_nr_remaining(&as->cl),
+ as->journal.seq);
}
void bch2_btree_updates_to_text(struct printbuf *out, struct bch_fs *c)
mutex_lock(&c->btree_interior_update_lock);
list_for_each_entry(as, &c->btree_interior_update_list, list)
- prt_printf(out, "%ps: mode=%u nodes_written=%u cl.remaining=%u journal_seq=%llu\n",
- (void *) as->ip_started,
- as->mode,
- as->nodes_written,
- closure_nr_remaining(&as->cl),
- as->journal.seq);
+ bch2_btree_update_to_text(out, as);
mutex_unlock(&c->btree_interior_update_lock);
}
#define BTREE_UPDATE_JOURNAL_RES (BTREE_UPDATE_NODES_MAX * (BKEY_BTREE_PTR_U64s_MAX + 1))
+int bch2_btree_node_check_topology(struct btree_trans *, struct btree *);
+
+#define BTREE_UPDATE_MODES() \
+ x(none) \
+ x(node) \
+ x(root) \
+ x(update)
+
+enum btree_update_mode {
+#define x(n) BTREE_UPDATE_##n,
+ BTREE_UPDATE_MODES()
+#undef x
+};
+
/*
* Tracks an in progress split/rewrite of a btree node and the update to the
* parent node:
struct list_head list;
struct list_head unwritten_list;
- /* What kind of update are we doing? */
- enum {
- BTREE_INTERIOR_NO_UPDATE,
- BTREE_INTERIOR_UPDATING_NODE,
- BTREE_INTERIOR_UPDATING_ROOT,
- BTREE_INTERIOR_UPDATING_AS,
- } mode;
-
+ enum btree_update_mode mode;
+ enum bch_watermark watermark;
unsigned nodes_written:1;
unsigned took_gc_lock:1;
enum btree_id btree_id;
- unsigned update_level;
+ unsigned update_level_start;
+ unsigned update_level_end;
struct disk_reservation disk_res;
/*
- * BTREE_INTERIOR_UPDATING_NODE:
+ * BTREE_UPDATE_node:
* The update that made the new nodes visible was a regular update to an
* existing interior node - @b. We can't write out the update to @b
* until the new nodes we created are finished writing, so we block @b
struct bkey_i *, unsigned, bool);
void bch2_btree_set_root_for_read(struct bch_fs *, struct btree *);
-void bch2_btree_root_alloc(struct bch_fs *, enum btree_id);
+void bch2_btree_root_alloc_fake(struct bch_fs *, enum btree_id, unsigned);
static inline unsigned btree_update_reserve_required(struct bch_fs *c,
struct btree *b)
#include "journal_reclaim.h"
#include <linux/prefetch.h>
+#include <linux/sort.h>
static int bch2_btree_write_buffer_journal_flush(struct journal *,
struct journal_entry_pin *, u64);
#endif
}
+static int wb_key_seq_cmp(const void *_l, const void *_r)
+{
+ const struct btree_write_buffered_key *l = _l;
+ const struct btree_write_buffered_key *r = _r;
+
+ return cmp_int(l->journal_seq, r->journal_seq);
+}
+
/* Compare excluding idx, the low 24 bits: */
static inline bool wb_key_eq(const void *_l, const void *_r)
{
bpos_gt(k->k.k.p, path->l[0].b->key.k.p)) {
bch2_btree_node_unlock_write(trans, path, path->l[0].b);
write_locked = false;
+
+ ret = lockrestart_do(trans,
+ bch2_btree_iter_traverse(&iter) ?:
+ bch2_foreground_maybe_merge(trans, iter.path, 0,
+ BCH_WATERMARK_reclaim|
+ BCH_TRANS_COMMIT_journal_reclaim|
+ BCH_TRANS_COMMIT_no_check_rw|
+ BCH_TRANS_COMMIT_no_enospc));
+ if (ret)
+ goto err;
}
}
*/
trace_and_count(c, write_buffer_flush_slowpath, trans, slowpath, wb->flushing.keys.nr);
+ sort(wb->flushing.keys.data,
+ wb->flushing.keys.nr,
+ sizeof(wb->flushing.keys.data[0]),
+ wb_key_seq_cmp, NULL);
+
darray_for_each(wb->flushing.keys, i) {
if (!i->journal_seq)
continue;
ret = commit_do(trans, NULL, NULL,
BCH_WATERMARK_reclaim|
+ BCH_TRANS_COMMIT_journal_reclaim|
BCH_TRANS_COMMIT_no_check_rw|
BCH_TRANS_COMMIT_no_enospc|
- BCH_TRANS_COMMIT_no_journal_res|
- BCH_TRANS_COMMIT_journal_reclaim,
+ BCH_TRANS_COMMIT_no_journal_res ,
btree_write_buffered_insert(trans, i));
if (ret)
goto err;
"different types of data in same bucket: %s, %s",
bch2_data_type_str(g->data_type),
bch2_data_type_str(data_type))) {
+ BUG();
ret = -EIO;
goto err;
}
bch2_data_type_str(ptr_data_type),
(printbuf_reset(&buf),
bch2_bkey_val_to_text(&buf, c, k), buf.buf));
+ BUG();
ret = -EIO;
goto err;
}
static int bch2_trigger_pointer(struct btree_trans *trans,
enum btree_id btree_id, unsigned level,
struct bkey_s_c k, struct extent_ptr_decoded p,
- s64 *sectors,
- unsigned flags)
+ const union bch_extent_entry *entry,
+ s64 *sectors, unsigned flags)
{
bool insert = !(flags & BTREE_TRIGGER_OVERWRITE);
struct bpos bucket;
struct bch_backpointer bp;
- bch2_extent_ptr_to_bp(trans->c, btree_id, level, k, p, &bucket, &bp);
+ bch2_extent_ptr_to_bp(trans->c, btree_id, level, k, p, entry, &bucket, &bp);
*sectors = insert ? bp.bucket_len : -((s64) bp.bucket_len);
if (flags & BTREE_TRIGGER_TRANSACTIONAL) {
if (flags & BTREE_TRIGGER_GC) {
struct bch_fs *c = trans->c;
struct bch_dev *ca = bch_dev_bkey_exists(c, p.ptr.dev);
- enum bch_data_type data_type = bkey_ptr_data_type(btree_id, level, k, p);
+ enum bch_data_type data_type = bch2_bkey_ptr_data_type(k, p, entry);
percpu_down_read(&c->mark_lock);
struct bucket *g = PTR_GC_BUCKET(ca, &p.ptr);
bkey_for_each_ptr_decode(k.k, ptrs, p, entry) {
s64 disk_sectors;
- ret = bch2_trigger_pointer(trans, btree_id, level, k, p, &disk_sectors, flags);
+ ret = bch2_trigger_pointer(trans, btree_id, level, k, p, entry, &disk_sectors, flags);
if (ret < 0)
return ret;
fallthrough;
case BCH_WATERMARK_btree_copygc:
case BCH_WATERMARK_reclaim:
+ case BCH_WATERMARK_interior_updates:
break;
}
: "(invalid data type)";
}
-static inline void bch2_prt_data_type(struct printbuf *out, enum bch_data_type type)
-{
- if (type < BCH_DATA_NR)
- prt_str(out, __bch2_data_types[type]);
- else
- prt_printf(out, "(invalid data type %u)", type);
-}
-
/* disk reservations: */
static inline void bch2_disk_reservation_put(struct bch_fs *c,
#include "chardev.h"
#include "journal.h"
#include "move.h"
-#include "recovery.h"
+#include "recovery_passes.h"
#include "replicas.h"
#include "super.h"
#include "super-io.h"
struct fsck_thread {
struct thread_with_stdio thr;
struct bch_fs *c;
- char **devs;
- size_t nr_devs;
struct bch_opts opts;
};
static void bch2_fsck_thread_exit(struct thread_with_stdio *_thr)
{
struct fsck_thread *thr = container_of(_thr, struct fsck_thread, thr);
- if (thr->devs)
- for (size_t i = 0; i < thr->nr_devs; i++)
- kfree(thr->devs[i]);
- kfree(thr->devs);
kfree(thr);
}
static int bch2_fsck_offline_thread_fn(struct thread_with_stdio *stdio)
{
struct fsck_thread *thr = container_of(stdio, struct fsck_thread, thr);
- struct bch_fs *c = bch2_fs_open(thr->devs, thr->nr_devs, thr->opts);
-
- if (IS_ERR(c))
- return PTR_ERR(c);
+ struct bch_fs *c = thr->c;
- int ret = 0;
- if (test_bit(BCH_FS_errors_fixed, &c->flags))
- ret |= 1;
- if (test_bit(BCH_FS_error, &c->flags))
- ret |= 4;
+ int ret = PTR_ERR_OR_ZERO(c);
+ if (ret)
+ return ret;
- bch2_fs_stop(c);
+ ret = bch2_fs_start(thr->c);
+ if (ret)
+ goto err;
- if (ret & 1)
+ if (test_bit(BCH_FS_errors_fixed, &c->flags)) {
bch2_stdio_redirect_printf(&stdio->stdio, false, "%s: errors fixed\n", c->name);
- if (ret & 4)
+ ret |= 1;
+ }
+ if (test_bit(BCH_FS_error, &c->flags)) {
bch2_stdio_redirect_printf(&stdio->stdio, false, "%s: still has errors\n", c->name);
-
+ ret |= 4;
+ }
+err:
+ bch2_fs_stop(c);
return ret;
}
{
struct bch_ioctl_fsck_offline arg;
struct fsck_thread *thr = NULL;
- u64 *devs = NULL;
+ darray_str(devs) = {};
long ret = 0;
if (copy_from_user(&arg, user_arg, sizeof(arg)))
if (!capable(CAP_SYS_ADMIN))
return -EPERM;
- if (!(devs = kcalloc(arg.nr_devs, sizeof(*devs), GFP_KERNEL)) ||
- !(thr = kzalloc(sizeof(*thr), GFP_KERNEL)) ||
- !(thr->devs = kcalloc(arg.nr_devs, sizeof(*thr->devs), GFP_KERNEL))) {
- ret = -ENOMEM;
- goto err;
- }
+ for (size_t i = 0; i < arg.nr_devs; i++) {
+ u64 dev_u64;
+ ret = copy_from_user_errcode(&dev_u64, &user_arg->devs[i], sizeof(u64));
+ if (ret)
+ goto err;
- thr->opts = bch2_opts_empty();
- thr->nr_devs = arg.nr_devs;
+ char *dev_str = strndup_user((char __user *)(unsigned long) dev_u64, PATH_MAX);
+ ret = PTR_ERR_OR_ZERO(dev_str);
+ if (ret)
+ goto err;
- if (copy_from_user(devs, &user_arg->devs[0],
- array_size(sizeof(user_arg->devs[0]), arg.nr_devs))) {
- ret = -EINVAL;
- goto err;
+ ret = darray_push(&devs, dev_str);
+ if (ret) {
+ kfree(dev_str);
+ goto err;
+ }
}
- for (size_t i = 0; i < arg.nr_devs; i++) {
- thr->devs[i] = strndup_user((char __user *)(unsigned long) devs[i], PATH_MAX);
- ret = PTR_ERR_OR_ZERO(thr->devs[i]);
- if (ret)
- goto err;
+ thr = kzalloc(sizeof(*thr), GFP_KERNEL);
+ if (!thr) {
+ ret = -ENOMEM;
+ goto err;
}
+ thr->opts = bch2_opts_empty();
+
if (arg.opts) {
char *optstr = strndup_user((char __user *)(unsigned long) arg.opts, 1 << 16);
opt_set(thr->opts, stdio, (u64)(unsigned long)&thr->thr.stdio);
- ret = bch2_run_thread_with_stdio(&thr->thr, &bch2_offline_fsck_ops);
-err:
- if (ret < 0) {
- if (thr)
- bch2_fsck_thread_exit(&thr->thr);
- pr_err("ret %s", bch2_err_str(ret));
- }
- kfree(devs);
+ /* We need request_key() to be called before we punt to kthread: */
+ opt_set(thr->opts, nostart, true);
+
+ bch2_thread_with_stdio_init(&thr->thr, &bch2_offline_fsck_ops);
+
+ thr->c = bch2_fs_open(devs.data, arg.nr_devs, thr->opts);
+
+ if (!IS_ERR(thr->c) &&
+ thr->c->opts.errors == BCH_ON_ERROR_panic)
+ thr->c->opts.errors = BCH_ON_ERROR_ro;
+
+ ret = __bch2_run_thread_with_stdio(&thr->thr);
+out:
+ darray_for_each(devs, i)
+ kfree(*i);
+ darray_exit(&devs);
return ret;
+err:
+ if (thr)
+ bch2_fsck_thread_exit(&thr->thr);
+ pr_err("ret %s", bch2_err_str(ret));
+ goto out;
}
static long bch2_global_ioctl(unsigned cmd, void __user *arg)
extent_nonce(version, crc_old), bio);
if (bch2_crc_cmp(merged, crc_old.csum) && !c->opts.no_data_io) {
- bch_err(c, "checksum error in %s() (memory corruption or bug?)\n"
- "expected %0llx:%0llx got %0llx:%0llx (old type %s new type %s)",
- __func__,
- crc_old.csum.hi,
- crc_old.csum.lo,
- merged.hi,
- merged.lo,
- bch2_csum_types[crc_old.csum_type],
- bch2_csum_types[new_csum_type]);
+ struct printbuf buf = PRINTBUF;
+ prt_printf(&buf, "checksum error in %s() (memory corruption or bug?)\n"
+ "expected %0llx:%0llx got %0llx:%0llx (old type ",
+ __func__,
+ crc_old.csum.hi,
+ crc_old.csum.lo,
+ merged.hi,
+ merged.lo);
+ bch2_prt_csum_type(&buf, crc_old.csum_type);
+ prt_str(&buf, " new type ");
+ bch2_prt_csum_type(&buf, new_csum_type);
+ prt_str(&buf, ")");
+ bch_err(c, "%s", buf.buf);
+ printbuf_exit(&buf);
return -EIO;
}
struct bch_csum expected,
struct bch_csum got)
{
- prt_printf(out, "checksum error: got ");
+ prt_str(out, "checksum error, type ");
+ bch2_prt_csum_type(out, type);
+ prt_str(out, ": got ");
bch2_csum_to_text(out, type, got);
prt_str(out, " should be ");
bch2_csum_to_text(out, type, expected);
- prt_printf(out, " type %s", bch2_csum_types[type]);
}
int bch2_chacha_encrypt_key(struct bch_key *, struct nonce, void *, size_t);
return __bch2_compression_opt_to_type[bch2_compression_decode(v).type];
}
-static inline void bch2_prt_compression_type(struct printbuf *out, enum bch_compression_type type)
-{
- if (type < BCH_COMPRESSION_TYPE_NR)
- prt_str(out, __bch2_compression_types[type]);
- else
- prt_printf(out, "(invalid compression type %u)", type);
-}
-
int bch2_bio_uncompress_inplace(struct bch_fs *, struct bio *,
struct bch_extent_crc_unpacked *);
int bch2_bio_uncompress(struct bch_fs *, struct bio *, struct bio *,
#include "move.h"
#include "nocow_locking.h"
#include "rebalance.h"
+#include "snapshot.h"
#include "subvolume.h"
#include "trace.h"
unsigned ptrs_locked = 0;
int ret = 0;
+ /*
+ * fs is corrupt we have a key for a snapshot node that doesn't exist,
+ * and we have to check for this because we go rw before repairing the
+ * snapshots table - just skip it, we can move it later.
+ */
+ if (unlikely(k.k->p.snapshot && !bch2_snapshot_equiv(c, k.k->p.snapshot)))
+ return -BCH_ERR_data_update_done;
+
bch2_bkey_buf_init(&m->k);
bch2_bkey_buf_reassemble(&m->k, c, k);
m->btree_id = btree_id;
move_ctxt_wait_event(ctxt,
(locked = bch2_bucket_nocow_trylock(&c->nocow_locks,
PTR_BUCKET_POS(c, &p.ptr), 0)) ||
- (!atomic_read(&ctxt->read_sectors) &&
- !atomic_read(&ctxt->write_sectors)));
+ list_empty(&ctxt->ios));
if (!locked)
bch2_bucket_nocow_lock(&c->nocow_locks,
i++;
}
+ unsigned durability_required = max(0, (int) (io_opts.data_replicas - durability_have));
+
/*
* If current extent durability is less than io_opts.data_replicas,
* we're not trying to rereplicate the extent up to data_replicas here -
* rereplicate, currently, so that users don't get an unexpected -ENOSPC
*/
if (!(m->data_opts.write_flags & BCH_WRITE_CACHED) &&
- durability_have >= io_opts.data_replicas) {
+ !durability_required) {
m->data_opts.kill_ptrs |= m->data_opts.rewrite_ptrs;
m->data_opts.rewrite_ptrs = 0;
/* if iter == NULL, it's just a promote */
goto done;
}
- m->op.nr_replicas = min(durability_removing, io_opts.data_replicas - durability_have) +
+ m->op.nr_replicas = min(durability_removing, durability_required) +
m->data_opts.extra_replicas;
- m->op.nr_replicas_required = m->op.nr_replicas;
- BUG_ON(!m->op.nr_replicas);
+ /*
+ * If device(s) were set to durability=0 after data was written to them
+ * we can end up with a duribilty=0 extent, and the normal algorithm
+ * that tries not to increase durability doesn't work:
+ */
+ if (!(durability_have + durability_removing))
+ m->op.nr_replicas = max((unsigned) m->op.nr_replicas, 1);
+
+ m->op.nr_replicas_required = m->op.nr_replicas;
if (reserve_sectors) {
ret = bch2_disk_reservation_add(c, &m->op.res, reserve_sectors,
#include "btree_iter.h"
#include "btree_locking.h"
#include "btree_update.h"
+#include "btree_update_interior.h"
#include "buckets.h"
#include "debug.h"
#include "error.h"
i->size = size;
i->ret = 0;
- do {
+ while (1) {
err = flush_buf(i);
if (err)
return err;
if (!i->size)
break;
+ if (done)
+ break;
+
done = bch2_journal_seq_pins_to_text(&i->buf, &c->journal, &i->iter);
i->iter++;
- } while (!done);
+ }
if (i->buf.allocation_failure)
return -ENOMEM;
.read = bch2_journal_pins_read,
};
+static ssize_t bch2_btree_updates_read(struct file *file, char __user *buf,
+ size_t size, loff_t *ppos)
+{
+ struct dump_iter *i = file->private_data;
+ struct bch_fs *c = i->c;
+ int err;
+
+ i->ubuf = buf;
+ i->size = size;
+ i->ret = 0;
+
+ if (!i->iter) {
+ bch2_btree_updates_to_text(&i->buf, c);
+ i->iter++;
+ }
+
+ err = flush_buf(i);
+ if (err)
+ return err;
+
+ if (i->buf.allocation_failure)
+ return -ENOMEM;
+
+ return i->ret;
+}
+
+static const struct file_operations btree_updates_ops = {
+ .owner = THIS_MODULE,
+ .open = bch2_dump_open,
+ .release = bch2_dump_release,
+ .read = bch2_btree_updates_read,
+};
+
static int btree_transaction_stats_open(struct inode *inode, struct file *file)
{
struct bch_fs *c = inode->i_private;
struct dump_iter *i;
i = kzalloc(sizeof(struct dump_iter), GFP_KERNEL);
-
if (!i)
return -ENOMEM;
debugfs_remove_recursive(c->fs_debug_dir);
}
+static void bch2_fs_debug_btree_init(struct bch_fs *c, struct btree_debug *bd)
+{
+ struct dentry *d;
+
+ d = debugfs_create_dir(bch2_btree_id_str(bd->id), c->btree_debug_dir);
+
+ debugfs_create_file("keys", 0400, d, bd, &btree_debug_ops);
+
+ debugfs_create_file("formats", 0400, d, bd, &btree_format_debug_ops);
+
+ debugfs_create_file("bfloat-failed", 0400, d, bd,
+ &bfloat_failed_debug_ops);
+}
+
void bch2_fs_debug_init(struct bch_fs *c)
{
struct btree_debug *bd;
debugfs_create_file("journal_pins", 0400, c->fs_debug_dir,
c->btree_debug, &journal_pins_ops);
+ debugfs_create_file("btree_updates", 0400, c->fs_debug_dir,
+ c->btree_debug, &btree_updates_ops);
+
debugfs_create_file("btree_transaction_stats", 0400, c->fs_debug_dir,
c, &btree_transaction_stats_op);
bd < c->btree_debug + ARRAY_SIZE(c->btree_debug);
bd++) {
bd->id = bd - c->btree_debug;
- debugfs_create_file(bch2_btree_id_str(bd->id),
- 0400, c->btree_debug_dir, bd,
- &btree_debug_ops);
-
- snprintf(name, sizeof(name), "%s-formats",
- bch2_btree_id_str(bd->id));
-
- debugfs_create_file(name, 0400, c->btree_debug_dir, bd,
- &btree_format_debug_ops);
-
- snprintf(name, sizeof(name), "%s-bfloat-failed",
- bch2_btree_id_str(bd->id));
-
- debugfs_create_file(name, 0400, c->btree_debug_dir, bd,
- &bfloat_failed_debug_ops);
+ bch2_fs_debug_btree_init(c, bd);
}
}
void bch2_stripe_to_text(struct printbuf *out, struct bch_fs *c,
struct bkey_s_c k)
{
- const struct bch_stripe *s = bkey_s_c_to_stripe(k).v;
- unsigned i, nr_data = s->nr_blocks - s->nr_redundant;
+ const struct bch_stripe *sp = bkey_s_c_to_stripe(k).v;
+ struct bch_stripe s = {};
+
+ memcpy(&s, sp, min(sizeof(s), bkey_val_bytes(k.k)));
+
+ unsigned nr_data = s.nr_blocks - s.nr_redundant;
+
+ prt_printf(out, "algo %u sectors %u blocks %u:%u csum ",
+ s.algorithm,
+ le16_to_cpu(s.sectors),
+ nr_data,
+ s.nr_redundant);
+ bch2_prt_csum_type(out, s.csum_type);
+ prt_printf(out, " gran %u", 1U << s.csum_granularity_bits);
+
+ for (unsigned i = 0; i < s.nr_blocks; i++) {
+ const struct bch_extent_ptr *ptr = sp->ptrs + i;
+
+ if ((void *) ptr >= bkey_val_end(k))
+ break;
+
+ bch2_extent_ptr_to_text(out, c, ptr);
- prt_printf(out, "algo %u sectors %u blocks %u:%u csum %u gran %u",
- s->algorithm,
- le16_to_cpu(s->sectors),
- nr_data,
- s->nr_redundant,
- s->csum_type,
- 1U << s->csum_granularity_bits);
-
- for (i = 0; i < s->nr_blocks; i++) {
- const struct bch_extent_ptr *ptr = s->ptrs + i;
- struct bch_dev *ca = bch_dev_bkey_exists(c, ptr->dev);
- u32 offset;
- u64 b = sector_to_bucket_and_offset(ca, ptr->offset, &offset);
-
- prt_printf(out, " %u:%llu:%u", ptr->dev, b, offset);
- if (i < nr_data)
- prt_printf(out, "#%u", stripe_blockcount_get(s, i));
- prt_printf(out, " gen %u", ptr->gen);
- if (ptr_stale(ca, ptr))
- prt_printf(out, " stale");
+ if (s.csum_type < BCH_CSUM_NR &&
+ i < nr_data &&
+ stripe_blockcount_offset(&s, i) < bkey_val_bytes(k.k))
+ prt_printf(out, "#%u", stripe_blockcount_get(sp, i));
}
}
struct printbuf err = PRINTBUF;
struct bch_dev *ca = bch_dev_bkey_exists(c, v->ptrs[i].dev);
- prt_printf(&err, "stripe checksum error: expected %0llx:%0llx got %0llx:%0llx (type %s)\n",
- want.hi, want.lo,
- got.hi, got.lo,
- bch2_csum_types[v->csum_type]);
+ prt_str(&err, "stripe ");
+ bch2_csum_err_msg(&err, v->csum_type, want, got);
prt_printf(&err, " for %ps at %u of\n ", (void *) _RET_IP_, i);
bch2_bkey_val_to_text(&err, c, bkey_i_to_s_c(&buf->key));
bch_err_ratelimited(ca, "%s", err.buf);
static inline unsigned stripe_csum_offset(const struct bch_stripe *s,
unsigned dev, unsigned csum_idx)
{
+ EBUG_ON(s->csum_type >= BCH_CSUM_NR);
+
unsigned csum_bytes = bch_crc_bytes[s->csum_type];
return sizeof(struct bch_stripe) +
x(BCH_ERR_nopromote, nopromote_in_flight) \
x(BCH_ERR_nopromote, nopromote_no_writes) \
x(BCH_ERR_nopromote, nopromote_enomem) \
- x(0, need_inode_lock)
+ x(0, need_inode_lock) \
+ x(0, invalid_snapshot_node)
enum bch_errcode {
BCH_ERR_START = 2048,
// SPDX-License-Identifier: GPL-2.0
#include "bcachefs.h"
#include "error.h"
-#include "recovery.h"
+#include "journal.h"
+#include "recovery_passes.h"
#include "super.h"
#include "thread_with_file.h"
return false;
case BCH_ON_ERROR_ro:
if (bch2_fs_emergency_read_only(c))
- bch_err(c, "inconsistency detected - emergency read only");
+ bch_err(c, "inconsistency detected - emergency read only at journal seq %llu",
+ journal_cur_seq(&c->journal));
return true;
case BCH_ON_ERROR_panic:
panic(bch2_fmt(c, "panic after error"));
int bch2_topology_error(struct bch_fs *);
+#define bch2_fs_topology_error(c, ...) \
+({ \
+ bch_err(c, "btree topology error: " __VA_ARGS__); \
+ bch2_topology_error(c); \
+})
+
#define bch2_fs_inconsistent(c, ...) \
({ \
bch_err(c, __VA_ARGS__); \
enum bkey_invalid_flags flags,
struct printbuf *err)
{
+ struct bkey_s_c_btree_ptr_v2 bp = bkey_s_c_to_btree_ptr_v2(k);
int ret = 0;
- bkey_fsck_err_on(bkey_val_u64s(k.k) > BKEY_BTREE_PTR_VAL_U64s_MAX, c, err,
- btree_ptr_v2_val_too_big,
+ bkey_fsck_err_on(bkey_val_u64s(k.k) > BKEY_BTREE_PTR_VAL_U64s_MAX,
+ c, err, btree_ptr_v2_val_too_big,
"value too big (%zu > %zu)",
bkey_val_u64s(k.k), BKEY_BTREE_PTR_VAL_U64s_MAX);
+ bkey_fsck_err_on(bpos_ge(bp.v->min_key, bp.k->p),
+ c, err, btree_ptr_v2_min_key_bad,
+ "min_key > key");
+
ret = bch2_bkey_ptrs_invalid(c, k, flags, err);
fsck_err:
return ret;
return bkey_deleted(k.k);
}
+void bch2_extent_ptr_to_text(struct printbuf *out, struct bch_fs *c, const struct bch_extent_ptr *ptr)
+{
+ struct bch_dev *ca = c && ptr->dev < c->sb.nr_devices && c->devs[ptr->dev]
+ ? bch_dev_bkey_exists(c, ptr->dev)
+ : NULL;
+
+ if (!ca) {
+ prt_printf(out, "ptr: %u:%llu gen %u%s", ptr->dev,
+ (u64) ptr->offset, ptr->gen,
+ ptr->cached ? " cached" : "");
+ } else {
+ u32 offset;
+ u64 b = sector_to_bucket_and_offset(ca, ptr->offset, &offset);
+
+ prt_printf(out, "ptr: %u:%llu:%u gen %u",
+ ptr->dev, b, offset, ptr->gen);
+ if (ptr->cached)
+ prt_str(out, " cached");
+ if (ptr->unwritten)
+ prt_str(out, " unwritten");
+ if (b >= ca->mi.first_bucket &&
+ b < ca->mi.nbuckets &&
+ ptr_stale(ca, ptr))
+ prt_printf(out, " stale");
+ }
+}
+
void bch2_bkey_ptrs_to_text(struct printbuf *out, struct bch_fs *c,
struct bkey_s_c k)
{
prt_printf(out, " ");
switch (__extent_entry_type(entry)) {
- case BCH_EXTENT_ENTRY_ptr: {
- const struct bch_extent_ptr *ptr = entry_to_ptr(entry);
- struct bch_dev *ca = c && ptr->dev < c->sb.nr_devices && c->devs[ptr->dev]
- ? bch_dev_bkey_exists(c, ptr->dev)
- : NULL;
-
- if (!ca) {
- prt_printf(out, "ptr: %u:%llu gen %u%s", ptr->dev,
- (u64) ptr->offset, ptr->gen,
- ptr->cached ? " cached" : "");
- } else {
- u32 offset;
- u64 b = sector_to_bucket_and_offset(ca, ptr->offset, &offset);
-
- prt_printf(out, "ptr: %u:%llu:%u gen %u",
- ptr->dev, b, offset, ptr->gen);
- if (ptr->cached)
- prt_str(out, " cached");
- if (ptr->unwritten)
- prt_str(out, " unwritten");
- if (ca && ptr_stale(ca, ptr))
- prt_printf(out, " stale");
- }
+ case BCH_EXTENT_ENTRY_ptr:
+ bch2_extent_ptr_to_text(out, c, entry_to_ptr(entry));
break;
- }
+
case BCH_EXTENT_ENTRY_crc32:
case BCH_EXTENT_ENTRY_crc64:
case BCH_EXTENT_ENTRY_crc128: {
struct bch_extent_crc_unpacked crc =
bch2_extent_crc_unpack(k.k, entry_to_crc(entry));
- prt_printf(out, "crc: c_size %u size %u offset %u nonce %u csum %s compress ",
+ prt_printf(out, "crc: c_size %u size %u offset %u nonce %u csum ",
crc.compressed_size,
crc.uncompressed_size,
- crc.offset, crc.nonce,
- bch2_csum_types[crc.csum_type]);
+ crc.offset, crc.nonce);
+ bch2_prt_csum_type(out, crc.csum_type);
+ prt_str(out, " compress ");
bch2_prt_compression_type(out, crc.compression_type);
break;
}
return ret;
}
-static inline unsigned bch2_bkey_ptr_data_type(struct bkey_s_c k, const struct bch_extent_ptr *ptr)
-{
- switch (k.k->type) {
- case KEY_TYPE_btree_ptr:
- case KEY_TYPE_btree_ptr_v2:
- return BCH_DATA_btree;
- case KEY_TYPE_extent:
- case KEY_TYPE_reflink_v:
- return BCH_DATA_user;
- case KEY_TYPE_stripe: {
- struct bkey_s_c_stripe s = bkey_s_c_to_stripe(k);
-
- BUG_ON(ptr < s.v->ptrs ||
- ptr >= s.v->ptrs + s.v->nr_blocks);
-
- return ptr >= s.v->ptrs + s.v->nr_blocks - s.v->nr_redundant
- ? BCH_DATA_parity
- : BCH_DATA_user;
- }
- default:
- BUG();
- }
-}
-
unsigned bch2_bkey_nr_ptrs(struct bkey_s_c);
unsigned bch2_bkey_nr_ptrs_allocated(struct bkey_s_c);
unsigned bch2_bkey_nr_ptrs_fully_allocated(struct bkey_s_c);
void bch2_extent_ptr_set_cached(struct bkey_s, struct bch_extent_ptr *);
bool bch2_extent_normalize(struct bch_fs *, struct bkey_s);
+void bch2_extent_ptr_to_text(struct printbuf *out, struct bch_fs *, const struct bch_extent_ptr *);
void bch2_bkey_ptrs_to_text(struct printbuf *, struct bch_fs *,
struct bkey_s_c);
int bch2_bkey_ptrs_invalid(struct bch_fs *, struct bkey_s_c,
--- /dev/null
+// SPDX-License-Identifier: GPL-2.0
+
+#include "eytzinger.h"
+
+/**
+ * is_aligned - is this pointer & size okay for word-wide copying?
+ * @base: pointer to data
+ * @size: size of each element
+ * @align: required alignment (typically 4 or 8)
+ *
+ * Returns true if elements can be copied using word loads and stores.
+ * The size must be a multiple of the alignment, and the base address must
+ * be if we do not have CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS.
+ *
+ * For some reason, gcc doesn't know to optimize "if (a & mask || b & mask)"
+ * to "if ((a | b) & mask)", so we do that by hand.
+ */
+__attribute_const__ __always_inline
+static bool is_aligned(const void *base, size_t size, unsigned char align)
+{
+ unsigned char lsbits = (unsigned char)size;
+
+ (void)base;
+#ifndef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS
+ lsbits |= (unsigned char)(uintptr_t)base;
+#endif
+ return (lsbits & (align - 1)) == 0;
+}
+
+/**
+ * swap_words_32 - swap two elements in 32-bit chunks
+ * @a: pointer to the first element to swap
+ * @b: pointer to the second element to swap
+ * @n: element size (must be a multiple of 4)
+ *
+ * Exchange the two objects in memory. This exploits base+index addressing,
+ * which basically all CPUs have, to minimize loop overhead computations.
+ *
+ * For some reason, on x86 gcc 7.3.0 adds a redundant test of n at the
+ * bottom of the loop, even though the zero flag is still valid from the
+ * subtract (since the intervening mov instructions don't alter the flags).
+ * Gcc 8.1.0 doesn't have that problem.
+ */
+static void swap_words_32(void *a, void *b, size_t n)
+{
+ do {
+ u32 t = *(u32 *)(a + (n -= 4));
+ *(u32 *)(a + n) = *(u32 *)(b + n);
+ *(u32 *)(b + n) = t;
+ } while (n);
+}
+
+/**
+ * swap_words_64 - swap two elements in 64-bit chunks
+ * @a: pointer to the first element to swap
+ * @b: pointer to the second element to swap
+ * @n: element size (must be a multiple of 8)
+ *
+ * Exchange the two objects in memory. This exploits base+index
+ * addressing, which basically all CPUs have, to minimize loop overhead
+ * computations.
+ *
+ * We'd like to use 64-bit loads if possible. If they're not, emulating
+ * one requires base+index+4 addressing which x86 has but most other
+ * processors do not. If CONFIG_64BIT, we definitely have 64-bit loads,
+ * but it's possible to have 64-bit loads without 64-bit pointers (e.g.
+ * x32 ABI). Are there any cases the kernel needs to worry about?
+ */
+static void swap_words_64(void *a, void *b, size_t n)
+{
+ do {
+#ifdef CONFIG_64BIT
+ u64 t = *(u64 *)(a + (n -= 8));
+ *(u64 *)(a + n) = *(u64 *)(b + n);
+ *(u64 *)(b + n) = t;
+#else
+ /* Use two 32-bit transfers to avoid base+index+4 addressing */
+ u32 t = *(u32 *)(a + (n -= 4));
+ *(u32 *)(a + n) = *(u32 *)(b + n);
+ *(u32 *)(b + n) = t;
+
+ t = *(u32 *)(a + (n -= 4));
+ *(u32 *)(a + n) = *(u32 *)(b + n);
+ *(u32 *)(b + n) = t;
+#endif
+ } while (n);
+}
+
+/**
+ * swap_bytes - swap two elements a byte at a time
+ * @a: pointer to the first element to swap
+ * @b: pointer to the second element to swap
+ * @n: element size
+ *
+ * This is the fallback if alignment doesn't allow using larger chunks.
+ */
+static void swap_bytes(void *a, void *b, size_t n)
+{
+ do {
+ char t = ((char *)a)[--n];
+ ((char *)a)[n] = ((char *)b)[n];
+ ((char *)b)[n] = t;
+ } while (n);
+}
+
+/*
+ * The values are arbitrary as long as they can't be confused with
+ * a pointer, but small integers make for the smallest compare
+ * instructions.
+ */
+#define SWAP_WORDS_64 (swap_r_func_t)0
+#define SWAP_WORDS_32 (swap_r_func_t)1
+#define SWAP_BYTES (swap_r_func_t)2
+#define SWAP_WRAPPER (swap_r_func_t)3
+
+struct wrapper {
+ cmp_func_t cmp;
+ swap_func_t swap_func;
+};
+
+/*
+ * The function pointer is last to make tail calls most efficient if the
+ * compiler decides not to inline this function.
+ */
+static void do_swap(void *a, void *b, size_t size, swap_r_func_t swap_func, const void *priv)
+{
+ if (swap_func == SWAP_WRAPPER) {
+ ((const struct wrapper *)priv)->swap_func(a, b, (int)size);
+ return;
+ }
+
+ if (swap_func == SWAP_WORDS_64)
+ swap_words_64(a, b, size);
+ else if (swap_func == SWAP_WORDS_32)
+ swap_words_32(a, b, size);
+ else if (swap_func == SWAP_BYTES)
+ swap_bytes(a, b, size);
+ else
+ swap_func(a, b, (int)size, priv);
+}
+
+#define _CMP_WRAPPER ((cmp_r_func_t)0L)
+
+static int do_cmp(const void *a, const void *b, cmp_r_func_t cmp, const void *priv)
+{
+ if (cmp == _CMP_WRAPPER)
+ return ((const struct wrapper *)priv)->cmp(a, b);
+ return cmp(a, b, priv);
+}
+
+static inline int eytzinger0_do_cmp(void *base, size_t n, size_t size,
+ cmp_r_func_t cmp_func, const void *priv,
+ size_t l, size_t r)
+{
+ return do_cmp(base + inorder_to_eytzinger0(l, n) * size,
+ base + inorder_to_eytzinger0(r, n) * size,
+ cmp_func, priv);
+}
+
+static inline void eytzinger0_do_swap(void *base, size_t n, size_t size,
+ swap_r_func_t swap_func, const void *priv,
+ size_t l, size_t r)
+{
+ do_swap(base + inorder_to_eytzinger0(l, n) * size,
+ base + inorder_to_eytzinger0(r, n) * size,
+ size, swap_func, priv);
+}
+
+void eytzinger0_sort_r(void *base, size_t n, size_t size,
+ cmp_r_func_t cmp_func,
+ swap_r_func_t swap_func,
+ const void *priv)
+{
+ int i, c, r;
+
+ /* called from 'sort' without swap function, let's pick the default */
+ if (swap_func == SWAP_WRAPPER && !((struct wrapper *)priv)->swap_func)
+ swap_func = NULL;
+
+ if (!swap_func) {
+ if (is_aligned(base, size, 8))
+ swap_func = SWAP_WORDS_64;
+ else if (is_aligned(base, size, 4))
+ swap_func = SWAP_WORDS_32;
+ else
+ swap_func = SWAP_BYTES;
+ }
+
+ /* heapify */
+ for (i = n / 2 - 1; i >= 0; --i) {
+ for (r = i; r * 2 + 1 < n; r = c) {
+ c = r * 2 + 1;
+
+ if (c + 1 < n &&
+ eytzinger0_do_cmp(base, n, size, cmp_func, priv, c, c + 1) < 0)
+ c++;
+
+ if (eytzinger0_do_cmp(base, n, size, cmp_func, priv, r, c) >= 0)
+ break;
+
+ eytzinger0_do_swap(base, n, size, swap_func, priv, r, c);
+ }
+ }
+
+ /* sort */
+ for (i = n - 1; i > 0; --i) {
+ eytzinger0_do_swap(base, n, size, swap_func, priv, 0, i);
+
+ for (r = 0; r * 2 + 1 < i; r = c) {
+ c = r * 2 + 1;
+
+ if (c + 1 < i &&
+ eytzinger0_do_cmp(base, n, size, cmp_func, priv, c, c + 1) < 0)
+ c++;
+
+ if (eytzinger0_do_cmp(base, n, size, cmp_func, priv, r, c) >= 0)
+ break;
+
+ eytzinger0_do_swap(base, n, size, swap_func, priv, r, c);
+ }
+ }
+}
+
+void eytzinger0_sort(void *base, size_t n, size_t size,
+ cmp_func_t cmp_func,
+ swap_func_t swap_func)
+{
+ struct wrapper w = {
+ .cmp = cmp_func,
+ .swap_func = swap_func,
+ };
+
+ return eytzinger0_sort_r(base, n, size, _CMP_WRAPPER, SWAP_WRAPPER, &w);
+}
#include <linux/bitops.h>
#include <linux/log2.h>
-#include "util.h"
+#ifdef EYTZINGER_DEBUG
+#define EYTZINGER_BUG_ON(cond) BUG_ON(cond)
+#else
+#define EYTZINGER_BUG_ON(cond)
+#endif
/*
* Traversal for trees in eytzinger layout - a full binary tree layed out in an
- * array
- */
-
-/*
- * One based indexing version:
+ * array.
+ *
+ * Consider using an eytzinger tree any time you would otherwise be doing binary
+ * search over an array. Binary search is a worst case scenario for branch
+ * prediction and prefetching, but in an eytzinger tree every node's children
+ * are adjacent in memory, thus we can prefetch children before knowing the
+ * result of the comparison, assuming multiple nodes fit on a cacheline.
*
- * With one based indexing each level of the tree starts at a power of two -
- * good for cacheline alignment:
+ * Two variants are provided, for one based indexing and zero based indexing.
+ *
+ * Zero based indexing is more convenient, but one based indexing has better
+ * alignment and thus better performance because each new level of the tree
+ * starts at a power of two, and thus if element 0 was cacheline aligned, each
+ * new level will be as well.
*/
static inline unsigned eytzinger1_child(unsigned i, unsigned child)
{
- EBUG_ON(child > 1);
+ EYTZINGER_BUG_ON(child > 1);
return (i << 1) + child;
}
static inline unsigned eytzinger1_next(unsigned i, unsigned size)
{
- EBUG_ON(i > size);
+ EYTZINGER_BUG_ON(i > size);
if (eytzinger1_right_child(i) <= size) {
i = eytzinger1_right_child(i);
static inline unsigned eytzinger1_prev(unsigned i, unsigned size)
{
- EBUG_ON(i > size);
+ EYTZINGER_BUG_ON(i > size);
if (eytzinger1_left_child(i) <= size) {
i = eytzinger1_left_child(i) + 1;
unsigned shift = __fls(size) - b;
int s;
- EBUG_ON(!i || i > size);
+ EYTZINGER_BUG_ON(!i || i > size);
i ^= 1U << b;
i <<= 1;
unsigned shift;
int s;
- EBUG_ON(!i || i > size);
+ EYTZINGER_BUG_ON(!i || i > size);
/*
* sign bit trick:
static inline unsigned eytzinger0_child(unsigned i, unsigned child)
{
- EBUG_ON(child > 1);
+ EYTZINGER_BUG_ON(child > 1);
return (i << 1) + 1 + child;
}
(_i) != -1; \
(_i) = eytzinger0_next((_i), (_size)))
-typedef int (*eytzinger_cmp_fn)(const void *l, const void *r, size_t size);
-
/* return greatest node <= @search, or -1 if not found */
-static inline ssize_t eytzinger0_find_le(void *base, size_t nr, size_t size,
- eytzinger_cmp_fn cmp, const void *search)
+static inline int eytzinger0_find_le(void *base, size_t nr, size_t size,
+ cmp_func_t cmp, const void *search)
{
unsigned i, n = 0;
do {
i = n;
- n = eytzinger0_child(i, cmp(search, base + i * size, size) >= 0);
+ n = eytzinger0_child(i, cmp(base + i * size, search) <= 0);
} while (n < nr);
if (n & 1) {
- /* @i was greater than @search, return previous node: */
-
- if (i == eytzinger0_first(nr))
- return -1;
-
+ /*
+ * @i was greater than @search, return previous node:
+ *
+ * if @i was leftmost/smallest element,
+ * eytzinger0_prev(eytzinger0_first())) returns -1, as expected
+ */
return eytzinger0_prev(i, nr);
} else {
return i;
}
}
+static inline int eytzinger0_find_gt(void *base, size_t nr, size_t size,
+ cmp_func_t cmp, const void *search)
+{
+ ssize_t idx = eytzinger0_find_le(base, nr, size, cmp, search);
+
+ /*
+ * if eytitzinger0_find_le() returned -1 - no element was <= search - we
+ * want to return the first element; next/prev identities mean this work
+ * as expected
+ *
+ * similarly if find_le() returns last element, we should return -1;
+ * identities mean this all works out:
+ */
+ return eytzinger0_next(idx, nr);
+}
+
#define eytzinger0_find(base, nr, size, _cmp, search) \
({ \
void *_base = (base); \
int _res; \
\
while (_i < _nr && \
- (_res = _cmp(_search, _base + _i * _size, _size))) \
+ (_res = _cmp(_search, _base + _i * _size))) \
_i = eytzinger0_child(_i, _res > 0); \
_i; \
})
-void eytzinger0_sort(void *, size_t, size_t,
- int (*cmp_func)(const void *, const void *, size_t),
- void (*swap_func)(void *, void *, size_t));
+void eytzinger0_sort_r(void *, size_t, size_t,
+ cmp_r_func_t, swap_r_func_t, const void *);
+void eytzinger0_sort(void *, size_t, size_t, cmp_func_t, swap_func_t);
#endif /* _EYTZINGER_H */
ret = dio->op.error ?: ((long) dio->written << 9);
bio_put(&dio->op.wbio.bio);
+ bch2_write_ref_put(dio->op.c, BCH_WRITE_REF_dio_write);
+
/* inode->i_dio_count is our ref on inode and thus bch_fs */
inode_dio_end(&inode->v);
if (likely(!dio->iter.count) || dio->op.error)
break;
- bio_reset(bio, NULL, REQ_OP_WRITE);
+ bio_reset(bio, NULL, REQ_OP_WRITE | REQ_SYNC | REQ_IDLE);
}
out:
return bch2_dio_write_done(dio);
prefetch(&inode->ei_inode);
prefetch((void *) &inode->ei_inode + 64);
+ if (!bch2_write_ref_tryget(c, BCH_WRITE_REF_dio_write))
+ return -EROFS;
+
inode_lock(&inode->v);
ret = generic_write_checks(req, iter);
if (unlikely(ret <= 0))
- goto err;
+ goto err_put_write_ref;
ret = file_remove_privs(file);
if (unlikely(ret))
- goto err;
+ goto err_put_write_ref;
ret = file_update_time(file);
if (unlikely(ret))
- goto err;
+ goto err_put_write_ref;
if (unlikely((req->ki_pos|iter->count) & (block_bytes(c) - 1)))
- goto err;
+ goto err_put_write_ref;
inode_dio_begin(&inode->v);
bch2_pagecache_block_get(inode);
bio = bio_alloc_bioset(NULL,
bio_iov_vecs_to_alloc(iter, BIO_MAX_VECS),
- REQ_OP_WRITE,
+ REQ_OP_WRITE | REQ_SYNC | REQ_IDLE,
GFP_KERNEL,
&c->dio_write_bioset);
dio = container_of(bio, struct dio_write, op.wbio.bio);
}
ret = bch2_dio_write_loop(dio);
-err:
+out:
if (locked)
inode_unlock(&inode->v);
return ret;
bch2_pagecache_block_put(inode);
bio_put(bio);
inode_dio_end(&inode->v);
- goto err;
+err_put_write_ref:
+ bch2_write_ref_put(c, BCH_WRITE_REF_dio_write);
+ goto out;
}
void bch2_fs_fs_io_direct_exit(struct bch_fs *c)
static int bch2_flush_inode(struct bch_fs *c,
struct bch_inode_info *inode)
{
- struct bch_inode_unpacked u;
- int ret;
-
if (c->opts.journal_flush_disabled)
return 0;
- ret = bch2_inode_find_by_inum(c, inode_inum(inode), &u);
- if (ret)
- return ret;
+ if (!bch2_write_ref_tryget(c, BCH_WRITE_REF_fsync))
+ return -EROFS;
- return bch2_journal_flush_seq(&c->journal, u.bi_journal_seq) ?:
- bch2_inode_flush_nocow_writes(c, inode);
+ struct bch_inode_unpacked u;
+ int ret = bch2_inode_find_by_inum(c, inode_inum(inode), &u) ?:
+ bch2_journal_flush_seq(&c->journal, u.bi_journal_seq) ?:
+ bch2_inode_flush_nocow_writes(c, inode);
+ bch2_write_ref_put(c, BCH_WRITE_REF_fsync);
+ return ret;
}
int bch2_fsync(struct file *file, loff_t start, loff_t end, int datasync)
BUG_ON(!old);
if (unlikely(old != inode)) {
- discard_new_inode(&inode->v);
+ __destroy_inode(&inode->v);
+ kmem_cache_free(bch2_inode_cache, inode);
inode = old;
} else {
mutex_lock(&c->vfs_inodes_lock);
if (unlikely(!inode)) {
int ret = drop_locks_do(trans, (inode = to_bch_ei(new_inode(c->vfs_sb))) ? 0 : -ENOMEM);
- if (ret && inode)
- discard_new_inode(&inode->v);
+ if (ret && inode) {
+ __destroy_inode(&inode->v);
+ kmem_cache_free(bch2_inode_cache, inode);
+ }
if (ret)
return ERR_PTR(ret);
}
return dget(sb->s_root);
err_put_super:
+ __bch2_fs_stop(c);
deactivate_locked_super(sb);
return ERR_PTR(bch2_err_class(ret));
}
#include "fsck.h"
#include "inode.h"
#include "keylist.h"
-#include "recovery.h"
+#include "recovery_passes.h"
#include "snapshot.h"
#include "super.h"
#include "xattr.h"
u32 *snapshot, u64 *inum)
{
struct bch_subvolume s;
- int ret;
-
- ret = bch2_subvolume_get(trans, subvol, false, 0, &s);
+ int ret = bch2_subvolume_get(trans, subvol, false, 0, &s);
*snapshot = le32_to_cpu(s.snapshot);
*inum = le64_to_cpu(s.inode);
bch2_trans_iter_init(trans, &iter, BTREE_ID_dirents, pos, BTREE_ITER_INTENT);
- ret = bch2_hash_delete_at(trans, bch2_dirent_hash_desc,
- &dir_hash_info, &iter,
- BTREE_UPDATE_INTERNAL_SNAPSHOT_NODE);
+ ret = bch2_btree_iter_traverse(&iter) ?:
+ bch2_hash_delete_at(trans, bch2_dirent_hash_desc,
+ &dir_hash_info, &iter,
+ BTREE_UPDATE_INTERNAL_SNAPSHOT_NODE);
bch2_trans_iter_exit(trans, &iter);
err:
bch_err_fn(c, ret);
/* Get lost+found, create if it doesn't exist: */
static int lookup_lostfound(struct btree_trans *trans, u32 snapshot,
- struct bch_inode_unpacked *lostfound)
+ struct bch_inode_unpacked *lostfound,
+ u64 reattaching_inum)
{
struct bch_fs *c = trans->c;
struct qstr lostfound_str = QSTR("lost+found");
return ret;
subvol_inum root_inum = { .subvol = le32_to_cpu(st.master_subvol) };
- u32 subvol_snapshot;
- ret = subvol_lookup(trans, le32_to_cpu(st.master_subvol),
- &subvol_snapshot, &root_inum.inum);
- bch_err_msg(c, ret, "looking up root subvol");
+ struct bch_subvolume subvol;
+ ret = bch2_subvolume_get(trans, le32_to_cpu(st.master_subvol),
+ false, 0, &subvol);
+ bch_err_msg(c, ret, "looking up root subvol %u for snapshot %u",
+ le32_to_cpu(st.master_subvol), snapshot);
if (ret)
return ret;
+ if (!subvol.inode) {
+ struct btree_iter iter;
+ struct bkey_i_subvolume *subvol = bch2_bkey_get_mut_typed(trans, &iter,
+ BTREE_ID_subvolumes, POS(0, le32_to_cpu(st.master_subvol)),
+ 0, subvolume);
+ ret = PTR_ERR_OR_ZERO(subvol);
+ if (ret)
+ return ret;
+
+ subvol->v.inode = cpu_to_le64(reattaching_inum);
+ bch2_trans_iter_exit(trans, &iter);
+ }
+
+ root_inum.inum = le64_to_cpu(subvol.inode);
+
struct bch_inode_unpacked root_inode;
struct bch_hash_info root_hash_info;
u32 root_inode_snapshot = snapshot;
ret = lookup_inode(trans, root_inum.inum, &root_inode, &root_inode_snapshot);
- bch_err_msg(c, ret, "looking up root inode");
+ bch_err_msg(c, ret, "looking up root inode %llu for subvol %u",
+ root_inum.inum, le32_to_cpu(st.master_subvol));
if (ret)
return ret;
snprintf(name_buf, sizeof(name_buf), "%llu", inode->bi_inum);
}
- ret = lookup_lostfound(trans, dirent_snapshot, &lostfound);
+ ret = lookup_lostfound(trans, dirent_snapshot, &lostfound, inode->bi_inum);
if (ret)
return ret;
return ret;
}
+static int reconstruct_subvol(struct btree_trans *trans, u32 snapshotid, u32 subvolid, u64 inum)
+{
+ struct bch_fs *c = trans->c;
+
+ if (!bch2_snapshot_is_leaf(c, snapshotid)) {
+ bch_err(c, "need to reconstruct subvol, but have interior node snapshot");
+ return -BCH_ERR_fsck_repair_unimplemented;
+ }
+
+ /*
+ * If inum isn't set, that means we're being called from check_dirents,
+ * not check_inodes - the root of this subvolume doesn't exist or we
+ * would have found it there:
+ */
+ if (!inum) {
+ struct btree_iter inode_iter = {};
+ struct bch_inode_unpacked new_inode;
+ u64 cpu = raw_smp_processor_id();
+
+ bch2_inode_init_early(c, &new_inode);
+ bch2_inode_init_late(&new_inode, bch2_current_time(c), 0, 0, S_IFDIR|0755, 0, NULL);
+
+ new_inode.bi_subvol = subvolid;
+
+ int ret = bch2_inode_create(trans, &inode_iter, &new_inode, snapshotid, cpu) ?:
+ bch2_btree_iter_traverse(&inode_iter) ?:
+ bch2_inode_write(trans, &inode_iter, &new_inode);
+ bch2_trans_iter_exit(trans, &inode_iter);
+ if (ret)
+ return ret;
+
+ inum = new_inode.bi_inum;
+ }
+
+ bch_info(c, "reconstructing subvol %u with root inode %llu", subvolid, inum);
+
+ struct bkey_i_subvolume *new_subvol = bch2_trans_kmalloc(trans, sizeof(*new_subvol));
+ int ret = PTR_ERR_OR_ZERO(new_subvol);
+ if (ret)
+ return ret;
+
+ bkey_subvolume_init(&new_subvol->k_i);
+ new_subvol->k.p.offset = subvolid;
+ new_subvol->v.snapshot = cpu_to_le32(snapshotid);
+ new_subvol->v.inode = cpu_to_le64(inum);
+ ret = bch2_btree_insert_trans(trans, BTREE_ID_subvolumes, &new_subvol->k_i, 0);
+ if (ret)
+ return ret;
+
+ struct btree_iter iter;
+ struct bkey_i_snapshot *s = bch2_bkey_get_mut_typed(trans, &iter,
+ BTREE_ID_snapshots, POS(0, snapshotid),
+ 0, snapshot);
+ ret = PTR_ERR_OR_ZERO(s);
+ bch_err_msg(c, ret, "getting snapshot %u", snapshotid);
+ if (ret)
+ return ret;
+
+ u32 snapshot_tree = le32_to_cpu(s->v.tree);
+
+ s->v.subvol = cpu_to_le32(subvolid);
+ SET_BCH_SNAPSHOT_SUBVOL(&s->v, true);
+ bch2_trans_iter_exit(trans, &iter);
+
+ struct bkey_i_snapshot_tree *st = bch2_bkey_get_mut_typed(trans, &iter,
+ BTREE_ID_snapshot_trees, POS(0, snapshot_tree),
+ 0, snapshot_tree);
+ ret = PTR_ERR_OR_ZERO(st);
+ bch_err_msg(c, ret, "getting snapshot tree %u", snapshot_tree);
+ if (ret)
+ return ret;
+
+ if (!st->v.master_subvol)
+ st->v.master_subvol = cpu_to_le32(subvolid);
+
+ bch2_trans_iter_exit(trans, &iter);
+ return 0;
+}
+
+static int reconstruct_inode(struct btree_trans *trans, u32 snapshot, u64 inum, u64 size, unsigned mode)
+{
+ struct bch_fs *c = trans->c;
+ struct bch_inode_unpacked new_inode;
+
+ bch2_inode_init_early(c, &new_inode);
+ bch2_inode_init_late(&new_inode, bch2_current_time(c), 0, 0, mode|0755, 0, NULL);
+ new_inode.bi_size = size;
+ new_inode.bi_inum = inum;
+
+ return __bch2_fsck_write_inode(trans, &new_inode, snapshot);
+}
+
+static int reconstruct_reg_inode(struct btree_trans *trans, u32 snapshot, u64 inum)
+{
+ struct btree_iter iter = {};
+
+ bch2_trans_iter_init(trans, &iter, BTREE_ID_extents, SPOS(inum, U64_MAX, snapshot), 0);
+ struct bkey_s_c k = bch2_btree_iter_peek_prev(&iter);
+ bch2_trans_iter_exit(trans, &iter);
+ int ret = bkey_err(k);
+ if (ret)
+ return ret;
+
+ return reconstruct_inode(trans, snapshot, inum, k.k->p.offset << 9, S_IFREG);
+}
+
struct snapshots_seen_entry {
u32 id;
u32 equiv;
if (ret && !bch2_err_matches(ret, ENOENT))
goto err;
+ if (ret && (c->sb.btrees_lost_data & BIT_ULL(BTREE_ID_subvolumes))) {
+ ret = reconstruct_subvol(trans, k.k->p.snapshot, u.bi_subvol, u.bi_inum);
+ goto do_update;
+ }
+
if (fsck_err_on(ret,
c, inode_bi_subvol_missing,
"inode %llu:%u bi_subvol points to missing subvolume %u",
do_update = true;
}
}
-
+do_update:
if (do_update) {
ret = __bch2_fsck_write_inode(trans, &u, iter->pos.snapshot);
bch_err_msg(c, ret, "in fsck updating inode");
i->count = count2;
if (i->count != count2) {
- bch_err(c, "fsck counted i_sectors wrong for inode %llu:%u: got %llu should be %llu",
- w->last_pos.inode, i->snapshot, i->count, count2);
+ bch_err_ratelimited(c, "fsck counted i_sectors wrong for inode %llu:%u: got %llu should be %llu",
+ w->last_pos.inode, i->snapshot, i->count, count2);
return -BCH_ERR_internal_fsck_err;
}
goto err;
}
- ret = extent_ends_at(c, extent_ends, seen, k);
- if (ret)
- goto err;
-
extent_ends->last_pos = k.k->p;
err:
return ret;
goto err;
if (k.k->type != KEY_TYPE_whiteout) {
+ if (!i && (c->sb.btrees_lost_data & BIT_ULL(BTREE_ID_inodes))) {
+ ret = reconstruct_reg_inode(trans, k.k->p.snapshot, k.k->p.inode) ?:
+ bch2_trans_commit(trans, NULL, NULL, BCH_TRANS_COMMIT_no_enospc);
+ if (ret)
+ goto err;
+
+ inode->last_pos.inode--;
+ ret = -BCH_ERR_transaction_restart_nested;
+ goto err;
+ }
+
if (fsck_err_on(!i, c, extent_in_missing_inode,
"extent in missing inode:\n %s",
(printbuf_reset(&buf),
i->seen_this_pos = true;
}
+
+ if (k.k->type != KEY_TYPE_whiteout) {
+ ret = extent_ends_at(c, extent_ends, s, k);
+ if (ret)
+ goto err;
+ }
out:
err:
fsck_err:
return count2;
if (i->count != count2) {
- bch_err(c, "fsck counted subdirectories wrong: got %llu should be %llu",
- i->count, count2);
+ bch_err_ratelimited(c, "fsck counted subdirectories wrong for inum %llu:%u: got %llu should be %llu",
+ w->last_pos.inode, i->snapshot, i->count, count2);
i->count = count2;
if (i->inode.bi_nlink == i->count)
continue;
u32 parent_subvol = le32_to_cpu(d.v->d_parent_subvol);
u32 target_subvol = le32_to_cpu(d.v->d_child_subvol);
u32 parent_snapshot;
+ u32 new_parent_subvol = 0;
u64 parent_inum;
struct printbuf buf = PRINTBUF;
int ret = 0;
if (ret && !bch2_err_matches(ret, ENOENT))
return ret;
+ if (ret ||
+ (!ret && !bch2_snapshot_is_ancestor(c, parent_snapshot, d.k->p.snapshot))) {
+ int ret2 = find_snapshot_subvol(trans, d.k->p.snapshot, &new_parent_subvol);
+ if (ret2 && !bch2_err_matches(ret, ENOENT))
+ return ret2;
+ }
+
+ if (ret &&
+ !new_parent_subvol &&
+ (c->sb.btrees_lost_data & BIT_ULL(BTREE_ID_subvolumes))) {
+ /*
+ * Couldn't find a subvol for dirent's snapshot - but we lost
+ * subvols, so we need to reconstruct:
+ */
+ ret = reconstruct_subvol(trans, d.k->p.snapshot, parent_subvol, 0);
+ if (ret)
+ return ret;
+
+ parent_snapshot = d.k->p.snapshot;
+ }
+
if (fsck_err_on(ret, c, dirent_to_missing_parent_subvol,
"dirent parent_subvol points to missing subvolume\n%s",
(bch2_bkey_val_to_text(&buf, c, d.s_c), buf.buf)) ||
"dirent not visible in parent_subvol (not an ancestor of subvol snap %u)\n%s",
parent_snapshot,
(bch2_bkey_val_to_text(&buf, c, d.s_c), buf.buf))) {
- u32 new_parent_subvol;
- ret = find_snapshot_subvol(trans, d.k->p.snapshot, &new_parent_subvol);
- if (ret)
- goto err;
+ if (!new_parent_subvol) {
+ bch_err(c, "could not find a subvol for snapshot %u", d.k->p.snapshot);
+ return -BCH_ERR_fsck_repair_unimplemented;
+ }
struct bkey_i_dirent *new_dirent = bch2_bkey_make_mut_typed(trans, iter, &d.s_c, 0, dirent);
ret = PTR_ERR_OR_ZERO(new_dirent);
ret = lookup_inode(trans, target_inum, &subvol_root, &target_snapshot);
if (ret && !bch2_err_matches(ret, ENOENT))
- return ret;
+ goto err;
+
+ if (ret) {
+ bch_err(c, "subvol %u points to missing inode root %llu", target_subvol, target_inum);
+ ret = -BCH_ERR_fsck_repair_unimplemented;
+ ret = 0;
+ goto err;
+ }
- if (fsck_err_on(parent_subvol != subvol_root.bi_parent_subvol,
+ if (fsck_err_on(!ret && parent_subvol != subvol_root.bi_parent_subvol,
c, inode_bi_parent_wrong,
"subvol root %llu has wrong bi_parent_subvol: got %u, should be %u",
target_inum,
subvol_root.bi_parent_subvol = parent_subvol;
ret = __bch2_fsck_write_inode(trans, &subvol_root, target_snapshot);
if (ret)
- return ret;
+ goto err;
}
ret = check_dirent_target(trans, iter, d, &subvol_root,
target_snapshot);
if (ret)
- return ret;
+ goto err;
out:
err:
fsck_err:
struct snapshots_seen *s)
{
struct bch_fs *c = trans->c;
- struct bkey_s_c_dirent d;
struct inode_walker_entry *i;
struct printbuf buf = PRINTBUF;
struct bpos equiv;
*hash_info = bch2_hash_info_init(c, &dir->inodes.data[0].inode);
dir->first_this_inode = false;
+ if (!i && (c->sb.btrees_lost_data & BIT_ULL(BTREE_ID_inodes))) {
+ ret = reconstruct_inode(trans, k.k->p.snapshot, k.k->p.inode, 0, S_IFDIR) ?:
+ bch2_trans_commit(trans, NULL, NULL, BCH_TRANS_COMMIT_no_enospc);
+ if (ret)
+ goto err;
+
+ dir->last_pos.inode--;
+ ret = -BCH_ERR_transaction_restart_nested;
+ goto err;
+ }
+
if (fsck_err_on(!i, c, dirent_in_missing_dir_inode,
"dirent in nonexisting directory:\n%s",
(printbuf_reset(&buf),
if (k.k->type != KEY_TYPE_dirent)
goto out;
- d = bkey_s_c_to_dirent(k);
+ struct bkey_s_c_dirent d = bkey_s_c_to_dirent(k);
if (d.v->d_type == DT_SUBVOL) {
ret = check_dirent_to_subvol(trans, iter, d);
if (mustfix_fsck_err_on(ret, c, root_subvol_missing,
"root subvol missing")) {
- struct bkey_i_subvolume root_subvol;
+ struct bkey_i_subvolume *root_subvol =
+ bch2_trans_kmalloc(trans, sizeof(*root_subvol));
+ ret = PTR_ERR_OR_ZERO(root_subvol);
+ if (ret)
+ goto err;
snapshot = U32_MAX;
inum = BCACHEFS_ROOT_INO;
- bkey_subvolume_init(&root_subvol.k_i);
- root_subvol.k.p.offset = BCACHEFS_ROOT_SUBVOL;
- root_subvol.v.flags = 0;
- root_subvol.v.snapshot = cpu_to_le32(snapshot);
- root_subvol.v.inode = cpu_to_le64(inum);
- ret = bch2_btree_insert_trans(trans, BTREE_ID_subvolumes, &root_subvol.k_i, 0);
+ bkey_subvolume_init(&root_subvol->k_i);
+ root_subvol->k.p.offset = BCACHEFS_ROOT_SUBVOL;
+ root_subvol->v.flags = 0;
+ root_subvol->v.snapshot = cpu_to_le32(snapshot);
+ root_subvol->v.inode = cpu_to_le64(inum);
+ ret = bch2_btree_insert_trans(trans, BTREE_ID_subvolumes, &root_subvol->k_i, 0);
bch_err_msg(c, ret, "writing root subvol");
if (ret)
goto err;
prt_printf(out, "bi_sectors=%llu", inode->bi_sectors);
prt_newline(out);
- prt_newline(out);
prt_printf(out, "bi_version=%llu", inode->bi_version);
+ prt_newline(out);
#define x(_name, _bits) \
prt_printf(out, #_name "=%llu", (u64) inode->_name); \
ret = 0;
err:
bch2_logged_op_finish(trans, op_k);
+ bch_err_fn(c, ret);
return ret;
}
break;
}
err:
+ bch_err_fn(c, ret);
bch2_logged_op_finish(trans, op_k);
bch2_trans_iter_exit(trans, &iter);
return ret;
if (entry) {
prt_str(out, " type=");
- prt_str(out, bch2_jset_entry_types[entry->type]);
+ bch2_prt_jset_entry_type(out, entry->type);
}
if (!jset) {
jset_entry_for_each_key(entry, k) {
if (!first) {
prt_newline(out);
- prt_printf(out, "%s: ", bch2_jset_entry_types[entry->type]);
+ bch2_prt_jset_entry_type(out, entry->type);
+ prt_str(out, ": ");
}
prt_printf(out, "btree=%s l=%u ", bch2_btree_id_str(entry->btree_id), entry->level);
bch2_bkey_val_to_text(out, c, bkey_i_to_s_c(k));
struct jset_entry_usage *u =
container_of(entry, struct jset_entry_usage, entry);
- prt_printf(out, "type=%s v=%llu",
- bch2_fs_usage_types[u->entry.btree_id],
- le64_to_cpu(u->v));
+ prt_str(out, "type=");
+ bch2_prt_fs_usage_type(out, u->entry.btree_id);
+ prt_printf(out, " v=%llu", le64_to_cpu(u->v));
}
static int journal_entry_data_usage_validate(struct bch_fs *c,
void bch2_journal_entry_to_text(struct printbuf *out, struct bch_fs *c,
struct jset_entry *entry)
{
+ bch2_prt_jset_entry_type(out, entry->type);
+
if (entry->type < BCH_JSET_ENTRY_NR) {
- prt_printf(out, "%s: ", bch2_jset_entry_types[entry->type]);
+ prt_str(out, ": ");
bch2_jset_entry_ops[entry->type].to_text(out, c, entry);
- } else {
- prt_printf(out, "(unknown type %u)", entry->type);
}
}
percpu_ref_put(&ca->io_ref);
}
-static CLOSURE_CALLBACK(do_journal_write)
+static CLOSURE_CALLBACK(journal_write_submit)
{
closure_type(w, struct journal_buf, io);
struct journal *j = container_of(w, struct journal, buf[w->idx]);
continue_at(cl, journal_write_done, j->wq);
}
+static CLOSURE_CALLBACK(journal_write_preflush)
+{
+ closure_type(w, struct journal_buf, io);
+ struct journal *j = container_of(w, struct journal, buf[w->idx]);
+ struct bch_fs *c = container_of(j, struct bch_fs, journal);
+
+ if (j->seq_ondisk + 1 != le64_to_cpu(w->data->seq)) {
+ spin_lock(&j->lock);
+ closure_wait(&j->async_wait, cl);
+ spin_unlock(&j->lock);
+
+ continue_at(cl, journal_write_preflush, j->wq);
+ return;
+ }
+
+ if (w->separate_flush) {
+ for_each_rw_member(c, ca) {
+ percpu_ref_get(&ca->io_ref);
+
+ struct journal_device *ja = &ca->journal;
+ struct bio *bio = &ja->bio[w->idx]->bio;
+ bio_reset(bio, ca->disk_sb.bdev,
+ REQ_OP_WRITE|REQ_SYNC|REQ_META|REQ_PREFLUSH);
+ bio->bi_end_io = journal_write_endio;
+ bio->bi_private = ca;
+ closure_bio_submit(bio, cl);
+ }
+
+ continue_at(cl, journal_write_submit, j->wq);
+ } else {
+ /*
+ * no need to punt to another work item if we're not waiting on
+ * preflushes
+ */
+ journal_write_submit(&cl->work);
+ }
+}
+
static int bch2_journal_write_prep(struct journal *j, struct journal_buf *w)
{
struct bch_fs *c = container_of(j, struct bch_fs, journal);
goto err;
if (!JSET_NO_FLUSH(w->data))
- closure_wait_event(&j->async_wait, j->seq_ondisk + 1 == le64_to_cpu(w->data->seq));
-
- if (!JSET_NO_FLUSH(w->data) && w->separate_flush) {
- for_each_rw_member(c, ca) {
- percpu_ref_get(&ca->io_ref);
-
- struct journal_device *ja = &ca->journal;
- struct bio *bio = &ja->bio[w->idx]->bio;
- bio_reset(bio, ca->disk_sb.bdev,
- REQ_OP_WRITE|REQ_SYNC|REQ_META|REQ_PREFLUSH);
- bio->bi_end_io = journal_write_endio;
- bio->bi_private = ca;
- closure_bio_submit(bio, cl);
- }
- }
-
- continue_at(cl, do_journal_write, j->wq);
+ continue_at(cl, journal_write_preflush, j->wq);
+ else
+ continue_at(cl, journal_write_submit, j->wq);
return;
no_io:
continue_at(cl, journal_write_done, j->wq);
track_event_change(&c->times[BCH_TIME_blocked_write_buffer_full], low_on_wb))
trace_and_count(c, journal_full, c);
+ mod_bit(JOURNAL_SPACE_LOW, &j->flags, low_on_space || low_on_pin);
+
swap(watermark, j->watermark);
if (watermark > j->watermark)
journal_wake(j);
return ret ?: bch2_blacklist_table_initialize(c);
}
-static int journal_seq_blacklist_table_cmp(const void *_l,
- const void *_r, size_t size)
+static int journal_seq_blacklist_table_cmp(const void *_l, const void *_r)
{
const struct journal_seq_blacklist_table_entry *l = _l;
const struct journal_seq_blacklist_table_entry *r = _r;
JOURNAL_STARTED,
JOURNAL_MAY_SKIP_FLUSH,
JOURNAL_NEED_FLUSH_WRITE,
+ JOURNAL_SPACE_LOW,
};
/* Reasons we may fail to get a journal reservation: */
const struct bch_logged_op_fn *fn = logged_op_fn(k.k->type);
struct bkey_buf sk;
u32 restart_count = trans->restart_count;
- int ret;
if (!fn)
return 0;
bch2_bkey_buf_init(&sk);
bch2_bkey_buf_reassemble(&sk, c, k);
- ret = drop_locks_do(trans, (bch2_fs_lazy_rw(c), 0)) ?:
- fn->resume(trans, sk.k) ?: trans_was_restarted(trans, restart_count);
+ fn->resume(trans, sk.k);
bch2_bkey_buf_exit(&sk, c);
- return ret;
+
+ return trans_was_restarted(trans, restart_count);
}
int bch2_resume_logged_ops(struct bch_fs *c)
d, mean, stddev, weighted_mean, weighted_stddev);
}
-static void mean_and_variance_test_2(struct kunit *test)
-{
- s64 d[] = { 100, 10, 10, 10, 10, 10, 10 };
- s64 mean[] = { 10, 10, 10, 10, 10, 10, 10 };
- s64 stddev[] = { 9, 9, 9, 9, 9, 9, 9 };
- s64 weighted_mean[] = { 32, 27, 22, 19, 17, 15, 14 };
- s64 weighted_stddev[] = { 38, 35, 31, 27, 24, 21, 18 };
-
- do_mean_and_variance_test(test, 10, 6, ARRAY_SIZE(d), 2,
- d, mean, stddev, weighted_mean, weighted_stddev);
-}
-
/* Test behaviour where we switch from one steady state to another: */
-static void mean_and_variance_test_3(struct kunit *test)
+static void mean_and_variance_test_2(struct kunit *test)
{
s64 d[] = { 100, 100, 100, 100, 100 };
s64 mean[] = { 22, 32, 40, 46, 50 };
d, mean, stddev, weighted_mean, weighted_stddev);
}
-static void mean_and_variance_test_4(struct kunit *test)
-{
- s64 d[] = { 100, 100, 100, 100, 100 };
- s64 mean[] = { 10, 11, 12, 13, 14 };
- s64 stddev[] = { 9, 13, 15, 17, 19 };
- s64 weighted_mean[] = { 32, 49, 61, 71, 78 };
- s64 weighted_stddev[] = { 38, 44, 44, 41, 38 };
-
- do_mean_and_variance_test(test, 10, 6, ARRAY_SIZE(d), 2,
- d, mean, stddev, weighted_mean, weighted_stddev);
-}
-
static void mean_and_variance_fast_divpow2(struct kunit *test)
{
s64 i;
KUNIT_CASE(mean_and_variance_weighted_advanced_test),
KUNIT_CASE(mean_and_variance_test_1),
KUNIT_CASE(mean_and_variance_test_2),
- KUNIT_CASE(mean_and_variance_test_3),
- KUNIT_CASE(mean_and_variance_test_4),
{}
};
#include "disk_groups.h"
#include "error.h"
#include "opts.h"
+#include "recovery_passes.h"
#include "super-io.h"
#include "util.h"
NULL
};
-const char * const bch2_csum_types[] = {
+static const char * const __bch2_csum_types[] = {
BCH_CSUM_TYPES()
NULL
};
NULL
};
-const char * const __bch2_compression_types[] = {
+static const char * const __bch2_compression_types[] = {
BCH_COMPRESSION_TYPES()
NULL
};
NULL
};
-const char * const bch2_jset_entry_types[] = {
+static const char * const __bch2_jset_entry_types[] = {
BCH_JSET_ENTRY_TYPES()
NULL
};
-const char * const bch2_fs_usage_types[] = {
+static const char * const __bch2_fs_usage_types[] = {
BCH_FS_USAGE_TYPES()
NULL
};
#undef x
+static void prt_str_opt_boundscheck(struct printbuf *out, const char * const opts[],
+ unsigned nr, const char *type, unsigned idx)
+{
+ if (idx < nr)
+ prt_str(out, opts[idx]);
+ else
+ prt_printf(out, "(unknown %s %u)", type, idx);
+}
+
+#define PRT_STR_OPT_BOUNDSCHECKED(name, type) \
+void bch2_prt_##name(struct printbuf *out, type t) \
+{ \
+ prt_str_opt_boundscheck(out, __bch2_##name##s, ARRAY_SIZE(__bch2_##name##s) - 1, #name, t);\
+}
+
+PRT_STR_OPT_BOUNDSCHECKED(jset_entry_type, enum bch_jset_entry_type);
+PRT_STR_OPT_BOUNDSCHECKED(fs_usage_type, enum bch_fs_usage_type);
+PRT_STR_OPT_BOUNDSCHECKED(data_type, enum bch_data_type);
+PRT_STR_OPT_BOUNDSCHECKED(csum_type, enum bch_csum_type);
+PRT_STR_OPT_BOUNDSCHECKED(compression_type, enum bch_compression_type);
+
static int bch2_opt_fix_errors_parse(struct bch_fs *c, const char *val, u64 *res,
struct printbuf *err)
{
#define OPT_STR(_choices) .type = BCH_OPT_STR, \
.min = 0, .max = ARRAY_SIZE(_choices), \
.choices = _choices
+#define OPT_STR_NOLIMIT(_choices) .type = BCH_OPT_STR, \
+ .min = 0, .max = U64_MAX, \
+ .choices = _choices
#define OPT_FN(_fn) .type = BCH_OPT_FN, .fn = _fn
#define x(_name, _bits, _flags, _type, _sb_opt, _default, _hint, _help) \
extern const char * const bch2_sb_features[];
extern const char * const bch2_sb_compat[];
extern const char * const __bch2_btree_ids[];
-extern const char * const bch2_csum_types[];
extern const char * const bch2_csum_opts[];
-extern const char * const __bch2_compression_types[];
extern const char * const bch2_compression_opts[];
extern const char * const bch2_str_hash_types[];
extern const char * const bch2_str_hash_opts[];
extern const char * const __bch2_data_types[];
extern const char * const bch2_member_states[];
-extern const char * const bch2_jset_entry_types[];
-extern const char * const bch2_fs_usage_types[];
extern const char * const bch2_d_types[];
+void bch2_prt_jset_entry_type(struct printbuf *, enum bch_jset_entry_type);
+void bch2_prt_fs_usage_type(struct printbuf *, enum bch_fs_usage_type);
+void bch2_prt_data_type(struct printbuf *, enum bch_data_type);
+void bch2_prt_csum_type(struct printbuf *, enum bch_csum_type);
+void bch2_prt_compression_type(struct printbuf *, enum bch_compression_type);
+
static inline const char *bch2_d_type_str(unsigned d_type)
{
return (d_type < BCH_DT_MAX ? bch2_d_types[d_type] : NULL) ?: "(bad d_type)";
OPT_FS|OPT_MOUNT, \
OPT_BOOL(), \
BCH2_NO_SB_OPT, false, \
- NULL, "Don't replay the journal") \
- x(keep_journal, u8, \
+ NULL, "Exit recovery immediately prior to journal replay")\
+ x(recovery_pass_last, u8, \
+ OPT_FS|OPT_MOUNT, \
+ OPT_STR_NOLIMIT(bch2_recovery_passes), \
+ BCH2_NO_SB_OPT, 0, \
+ NULL, "Exit recovery after specified pass") \
+ x(retain_recovery_info, u8, \
0, \
OPT_BOOL(), \
BCH2_NO_SB_OPT, false, \
- NULL, "Don't free journal entries/keys after startup")\
+ NULL, "Don't free journal entries/keys, scanned btree nodes after startup")\
x(read_entire_journal, u8, \
0, \
OPT_BOOL(), \
// SPDX-License-Identifier: GPL-2.0
#include "bcachefs.h"
-#include "backpointers.h"
-#include "bkey_buf.h"
#include "alloc_background.h"
-#include "btree_gc.h"
+#include "bkey_buf.h"
#include "btree_journal_iter.h"
+#include "btree_node_scan.h"
#include "btree_update.h"
#include "btree_update_interior.h"
#include "btree_io.h"
#include "buckets.h"
#include "dirent.h"
-#include "ec.h"
#include "errcode.h"
#include "error.h"
#include "fs-common.h"
-#include "fsck.h"
#include "journal_io.h"
#include "journal_reclaim.h"
#include "journal_seq_blacklist.h"
-#include "lru.h"
#include "logged_ops.h"
#include "move.h"
#include "quota.h"
#include "rebalance.h"
#include "recovery.h"
+#include "recovery_passes.h"
#include "replicas.h"
#include "sb-clean.h"
#include "sb-downgrade.h"
#include "snapshot.h"
-#include "subvolume.h"
#include "super-io.h"
#include <linux/sort.h>
#define QSTR(n) { { { .len = strlen(n) } }, .name = n }
-static bool btree_id_is_alloc(enum btree_id id)
+void bch2_btree_lost_data(struct bch_fs *c, enum btree_id btree)
{
- switch (id) {
- case BTREE_ID_alloc:
- case BTREE_ID_backpointers:
- case BTREE_ID_need_discard:
- case BTREE_ID_freespace:
- case BTREE_ID_bucket_gens:
- return true;
- default:
- return false;
+ u64 b = BIT_ULL(btree);
+
+ if (!(c->sb.btrees_lost_data & b)) {
+ bch_err(c, "flagging btree %s lost data", bch2_btree_id_str(btree));
+
+ mutex_lock(&c->sb_lock);
+ bch2_sb_field_get(c->disk_sb.sb, ext)->btrees_lost_data |= cpu_to_le64(b);
+ bch2_write_super(c);
+ mutex_unlock(&c->sb_lock);
}
}
/* for -o reconstruct_alloc: */
-static void do_reconstruct_alloc(struct bch_fs *c)
+static void bch2_reconstruct_alloc(struct bch_fs *c)
{
bch2_journal_log_msg(c, "dropping alloc info");
bch_info(c, "dropping and reconstructing all alloc info");
c->recovery_passes_explicit |= bch2_recovery_passes_from_stable(le64_to_cpu(ext->recovery_passes_required[0]));
- struct journal_keys *keys = &c->journal_keys;
- size_t src, dst;
- move_gap(keys, keys->nr);
-
- for (src = 0, dst = 0; src < keys->nr; src++)
- if (!btree_id_is_alloc(keys->data[src].btree_id))
- keys->data[dst++] = keys->data[src];
- keys->nr = keys->gap = dst;
+ bch2_shoot_down_journal_keys(c, BTREE_ID_alloc,
+ 0, BTREE_MAX_DEPTH, POS_MIN, SPOS_MAX);
+ bch2_shoot_down_journal_keys(c, BTREE_ID_backpointers,
+ 0, BTREE_MAX_DEPTH, POS_MIN, SPOS_MAX);
+ bch2_shoot_down_journal_keys(c, BTREE_ID_need_discard,
+ 0, BTREE_MAX_DEPTH, POS_MIN, SPOS_MAX);
+ bch2_shoot_down_journal_keys(c, BTREE_ID_freespace,
+ 0, BTREE_MAX_DEPTH, POS_MIN, SPOS_MAX);
+ bch2_shoot_down_journal_keys(c, BTREE_ID_bucket_gens,
+ 0, BTREE_MAX_DEPTH, POS_MIN, SPOS_MAX);
}
/*
return cmp_int(l->journal_seq, r->journal_seq);
}
-static int bch2_journal_replay(struct bch_fs *c)
+int bch2_journal_replay(struct bch_fs *c)
{
struct journal_keys *keys = &c->journal_keys;
DARRAY(struct journal_key *) keys_sorted = { 0 };
u64 start_seq = c->journal_replay_seq_start;
u64 end_seq = c->journal_replay_seq_start;
struct btree_trans *trans = bch2_trans_get(c);
+ bool immediate_flush = false;
int ret = 0;
if (keys->nr) {
darray_for_each(*keys, k) {
cond_resched();
+ /*
+ * k->allocated means the key wasn't read in from the journal,
+ * rather it was from early repair code
+ */
+ if (k->allocated)
+ immediate_flush = true;
+
/* Skip fastpath if we're low on space in the journal */
ret = c->journal.watermark ? -1 :
commit_do(trans, NULL, NULL,
struct journal_key *k = *kp;
- replay_now_at(j, k->journal_seq);
+ if (k->journal_seq)
+ replay_now_at(j, k->journal_seq);
+ else
+ replay_now_at(j, j->replay_journal_seq_end);
ret = commit_do(trans, NULL, NULL,
BCH_TRANS_COMMIT_no_enospc|
bch2_trans_put(trans);
trans = NULL;
- if (!c->opts.keep_journal)
+ if (!c->opts.retain_recovery_info &&
+ c->recovery_pass_done >= BCH_RECOVERY_PASS_journal_replay)
bch2_journal_keys_put_initial(c);
replay_now_at(j, j->replay_journal_seq_end);
bch2_journal_set_replay_done(j);
+ /* if we did any repair, flush it immediately */
+ if (immediate_flush) {
+ bch2_journal_flush_all_pins(&c->journal);
+ ret = bch2_journal_meta(&c->journal);
+ }
+
if (keys->nr)
bch2_journal_log_msg(c, "journal replay finished");
err:
static int read_btree_roots(struct bch_fs *c)
{
- unsigned i;
int ret = 0;
- for (i = 0; i < btree_id_nr_alive(c); i++) {
+ for (unsigned i = 0; i < btree_id_nr_alive(c); i++) {
struct btree_root *r = bch2_btree_id_root(c, i);
if (!r->alive)
if (btree_id_is_alloc(i) && c->opts.reconstruct_alloc)
continue;
- if (r->error) {
- __fsck_err(c,
- btree_id_is_alloc(i)
- ? FSCK_CAN_IGNORE : 0,
- btree_root_bkey_invalid,
- "invalid btree root %s",
- bch2_btree_id_str(i));
- if (i == BTREE_ID_alloc)
+ if (mustfix_fsck_err_on((ret = r->error),
+ c, btree_root_bkey_invalid,
+ "invalid btree root %s",
+ bch2_btree_id_str(i)) ||
+ mustfix_fsck_err_on((ret = r->error = bch2_btree_root_read(c, i, &r->key, r->level)),
+ c, btree_root_read_error,
+ "error reading btree root %s l=%u: %s",
+ bch2_btree_id_str(i), r->level, bch2_err_str(ret))) {
+ if (btree_id_is_alloc(i)) {
+ c->recovery_passes_explicit |= BIT_ULL(BCH_RECOVERY_PASS_check_allocations);
+ c->recovery_passes_explicit |= BIT_ULL(BCH_RECOVERY_PASS_check_alloc_info);
+ c->recovery_passes_explicit |= BIT_ULL(BCH_RECOVERY_PASS_check_lrus);
+ c->recovery_passes_explicit |= BIT_ULL(BCH_RECOVERY_PASS_check_extents_to_backpointers);
+ c->recovery_passes_explicit |= BIT_ULL(BCH_RECOVERY_PASS_check_alloc_to_lru_refs);
c->sb.compat &= ~(1ULL << BCH_COMPAT_alloc_info);
- }
+ r->error = 0;
+ } else if (!(c->recovery_passes_explicit & BIT_ULL(BCH_RECOVERY_PASS_scan_for_btree_nodes))) {
+ bch_info(c, "will run btree node scan");
+ c->recovery_passes_explicit |= BIT_ULL(BCH_RECOVERY_PASS_scan_for_btree_nodes);
+ c->recovery_passes_explicit |= BIT_ULL(BCH_RECOVERY_PASS_check_topology);
+ }
- ret = bch2_btree_root_read(c, i, &r->key, r->level);
- if (ret) {
- fsck_err(c,
- btree_root_read_error,
- "error reading btree root %s",
- bch2_btree_id_str(i));
- if (btree_id_is_alloc(i))
- c->sb.compat &= ~(1ULL << BCH_COMPAT_alloc_info);
ret = 0;
+ bch2_btree_lost_data(c, i);
}
}
- for (i = 0; i < BTREE_ID_NR; i++) {
+ for (unsigned i = 0; i < BTREE_ID_NR; i++) {
struct btree_root *r = bch2_btree_id_root(c, i);
- if (!r->b) {
+ if (!r->b && !r->error) {
r->alive = false;
r->level = 0;
- bch2_btree_root_alloc(c, i);
+ bch2_btree_root_alloc_fake(c, i, 0);
}
}
fsck_err:
return ret;
}
-static int bch2_initialize_subvolumes(struct bch_fs *c)
-{
- struct bkey_i_snapshot_tree root_tree;
- struct bkey_i_snapshot root_snapshot;
- struct bkey_i_subvolume root_volume;
- int ret;
-
- bkey_snapshot_tree_init(&root_tree.k_i);
- root_tree.k.p.offset = 1;
- root_tree.v.master_subvol = cpu_to_le32(1);
- root_tree.v.root_snapshot = cpu_to_le32(U32_MAX);
-
- bkey_snapshot_init(&root_snapshot.k_i);
- root_snapshot.k.p.offset = U32_MAX;
- root_snapshot.v.flags = 0;
- root_snapshot.v.parent = 0;
- root_snapshot.v.subvol = cpu_to_le32(BCACHEFS_ROOT_SUBVOL);
- root_snapshot.v.tree = cpu_to_le32(1);
- SET_BCH_SNAPSHOT_SUBVOL(&root_snapshot.v, true);
-
- bkey_subvolume_init(&root_volume.k_i);
- root_volume.k.p.offset = BCACHEFS_ROOT_SUBVOL;
- root_volume.v.flags = 0;
- root_volume.v.snapshot = cpu_to_le32(U32_MAX);
- root_volume.v.inode = cpu_to_le64(BCACHEFS_ROOT_INO);
-
- ret = bch2_btree_insert(c, BTREE_ID_snapshot_trees, &root_tree.k_i, NULL, 0) ?:
- bch2_btree_insert(c, BTREE_ID_snapshots, &root_snapshot.k_i, NULL, 0) ?:
- bch2_btree_insert(c, BTREE_ID_subvolumes, &root_volume.k_i, NULL, 0);
- bch_err_fn(c, ret);
- return ret;
-}
-
-static int __bch2_fs_upgrade_for_subvolumes(struct btree_trans *trans)
-{
- struct btree_iter iter;
- struct bkey_s_c k;
- struct bch_inode_unpacked inode;
- int ret;
-
- k = bch2_bkey_get_iter(trans, &iter, BTREE_ID_inodes,
- SPOS(0, BCACHEFS_ROOT_INO, U32_MAX), 0);
- ret = bkey_err(k);
- if (ret)
- return ret;
-
- if (!bkey_is_inode(k.k)) {
- bch_err(trans->c, "root inode not found");
- ret = -BCH_ERR_ENOENT_inode;
- goto err;
- }
-
- ret = bch2_inode_unpack(k, &inode);
- BUG_ON(ret);
-
- inode.bi_subvol = BCACHEFS_ROOT_SUBVOL;
-
- ret = bch2_inode_write(trans, &iter, &inode);
-err:
- bch2_trans_iter_exit(trans, &iter);
- return ret;
-}
-
-/* set bi_subvol on root inode */
-noinline_for_stack
-static int bch2_fs_upgrade_for_subvolumes(struct bch_fs *c)
-{
- int ret = bch2_trans_do(c, NULL, NULL, BCH_TRANS_COMMIT_lazy_rw,
- __bch2_fs_upgrade_for_subvolumes(trans));
- bch_err_fn(c, ret);
- return ret;
-}
-
-const char * const bch2_recovery_passes[] = {
-#define x(_fn, ...) #_fn,
- BCH_RECOVERY_PASSES()
-#undef x
- NULL
-};
-
-static int bch2_check_allocations(struct bch_fs *c)
-{
- return bch2_gc(c, true, c->opts.norecovery);
-}
-
-static int bch2_set_may_go_rw(struct bch_fs *c)
-{
- struct journal_keys *keys = &c->journal_keys;
-
- /*
- * After we go RW, the journal keys buffer can't be modified (except for
- * setting journal_key->overwritten: it will be accessed by multiple
- * threads
- */
- move_gap(keys, keys->nr);
-
- set_bit(BCH_FS_may_go_rw, &c->flags);
-
- if (keys->nr || c->opts.fsck || !c->sb.clean)
- return bch2_fs_read_write_early(c);
- return 0;
-}
-
-struct recovery_pass_fn {
- int (*fn)(struct bch_fs *);
- unsigned when;
-};
-
-static struct recovery_pass_fn recovery_pass_fns[] = {
-#define x(_fn, _id, _when) { .fn = bch2_##_fn, .when = _when },
- BCH_RECOVERY_PASSES()
-#undef x
-};
-
-u64 bch2_recovery_passes_to_stable(u64 v)
-{
- static const u8 map[] = {
-#define x(n, id, ...) [BCH_RECOVERY_PASS_##n] = BCH_RECOVERY_PASS_STABLE_##n,
- BCH_RECOVERY_PASSES()
-#undef x
- };
-
- u64 ret = 0;
- for (unsigned i = 0; i < ARRAY_SIZE(map); i++)
- if (v & BIT_ULL(i))
- ret |= BIT_ULL(map[i]);
- return ret;
-}
-
-u64 bch2_recovery_passes_from_stable(u64 v)
-{
- static const u8 map[] = {
-#define x(n, id, ...) [BCH_RECOVERY_PASS_STABLE_##n] = BCH_RECOVERY_PASS_##n,
- BCH_RECOVERY_PASSES()
-#undef x
- };
-
- u64 ret = 0;
- for (unsigned i = 0; i < ARRAY_SIZE(map); i++)
- if (v & BIT_ULL(i))
- ret |= BIT_ULL(map[i]);
- return ret;
-}
-
static bool check_version_upgrade(struct bch_fs *c)
{
unsigned latest_version = bcachefs_metadata_version_current;
return false;
}
-u64 bch2_fsck_recovery_passes(void)
-{
- u64 ret = 0;
-
- for (unsigned i = 0; i < ARRAY_SIZE(recovery_pass_fns); i++)
- if (recovery_pass_fns[i].when & PASS_FSCK)
- ret |= BIT_ULL(i);
- return ret;
-}
-
-static bool should_run_recovery_pass(struct bch_fs *c, enum bch_recovery_pass pass)
-{
- struct recovery_pass_fn *p = recovery_pass_fns + pass;
-
- if (c->opts.norecovery && pass > BCH_RECOVERY_PASS_snapshots_read)
- return false;
- if (c->recovery_passes_explicit & BIT_ULL(pass))
- return true;
- if ((p->when & PASS_FSCK) && c->opts.fsck)
- return true;
- if ((p->when & PASS_UNCLEAN) && !c->sb.clean)
- return true;
- if (p->when & PASS_ALWAYS)
- return true;
- return false;
-}
-
-static int bch2_run_recovery_pass(struct bch_fs *c, enum bch_recovery_pass pass)
-{
- struct recovery_pass_fn *p = recovery_pass_fns + pass;
- int ret;
-
- if (!(p->when & PASS_SILENT))
- bch2_print(c, KERN_INFO bch2_log_msg(c, "%s..."),
- bch2_recovery_passes[pass]);
- ret = p->fn(c);
- if (ret)
- return ret;
- if (!(p->when & PASS_SILENT))
- bch2_print(c, KERN_CONT " done\n");
-
- return 0;
-}
-
-static int bch2_run_recovery_passes(struct bch_fs *c)
-{
- int ret = 0;
-
- while (c->curr_recovery_pass < ARRAY_SIZE(recovery_pass_fns)) {
- if (should_run_recovery_pass(c, c->curr_recovery_pass)) {
- unsigned pass = c->curr_recovery_pass;
-
- ret = bch2_run_recovery_pass(c, c->curr_recovery_pass);
- if (bch2_err_matches(ret, BCH_ERR_restart_recovery) ||
- (ret && c->curr_recovery_pass < pass))
- continue;
- if (ret)
- break;
-
- c->recovery_passes_complete |= BIT_ULL(c->curr_recovery_pass);
- }
- c->curr_recovery_pass++;
- c->recovery_pass_done = max(c->recovery_pass_done, c->curr_recovery_pass);
- }
-
- return ret;
-}
-
-int bch2_run_online_recovery_passes(struct bch_fs *c)
-{
- int ret = 0;
-
- for (unsigned i = 0; i < ARRAY_SIZE(recovery_pass_fns); i++) {
- struct recovery_pass_fn *p = recovery_pass_fns + i;
-
- if (!(p->when & PASS_ONLINE))
- continue;
-
- ret = bch2_run_recovery_pass(c, i);
- if (bch2_err_matches(ret, BCH_ERR_restart_recovery)) {
- i = c->curr_recovery_pass;
- continue;
- }
- if (ret)
- break;
- }
-
- return ret;
-}
-
int bch2_fs_recovery(struct bch_fs *c)
{
struct bch_sb_field_clean *clean = NULL;
goto err;
}
- if (c->opts.fsck && c->opts.norecovery) {
- bch_err(c, "cannot select both norecovery and fsck");
- ret = -EINVAL;
- goto err;
- }
+ if (c->opts.norecovery)
+ c->opts.recovery_pass_last = BCH_RECOVERY_PASS_journal_replay - 1;
if (!c->opts.nochanges) {
mutex_lock(&c->sb_lock);
+ struct bch_sb_field_ext *ext = bch2_sb_field_get(c->disk_sb.sb, ext);
bool write_sb = false;
- struct bch_sb_field_ext *ext =
- bch2_sb_field_get_minsize(&c->disk_sb, ext, sizeof(*ext) / sizeof(u64));
- if (!ext) {
- ret = -BCH_ERR_ENOSPC_sb;
- mutex_unlock(&c->sb_lock);
- goto err;
- }
-
if (BCH_SB_HAS_TOPOLOGY_ERRORS(c->disk_sb.sb)) {
ext->recovery_passes_required[0] |=
cpu_to_le64(bch2_recovery_passes_to_stable(BIT_ULL(BCH_RECOVERY_PASS_check_topology)));
goto err;
}
- if (!c->sb.clean || c->opts.fsck || c->opts.keep_journal) {
+ if (!c->sb.clean || c->opts.fsck || c->opts.retain_recovery_info) {
struct genradix_iter iter;
struct journal_replay **i;
c->journal_replay_seq_end = blacklist_seq - 1;
if (c->opts.reconstruct_alloc)
- do_reconstruct_alloc(c);
+ bch2_reconstruct_alloc(c);
zero_out_btree_mem_ptr(&c->journal_keys);
clear_bit(BCH_FS_fsck_running, &c->flags);
+ /* fsync if we fixed errors */
+ if (test_bit(BCH_FS_errors_fixed, &c->flags)) {
+ bch2_journal_flush_all_pins(&c->journal);
+ bch2_journal_meta(&c->journal);
+ }
+
/* If we fixed errors, verify that fs is actually clean now: */
if (IS_ENABLED(CONFIG_BCACHEFS_DEBUG) &&
test_bit(BCH_FS_errors_fixed, &c->flags) &&
}
mutex_lock(&c->sb_lock);
+ struct bch_sb_field_ext *ext = bch2_sb_field_get(c->disk_sb.sb, ext);
bool write_sb = false;
if (BCH_SB_VERSION_UPGRADE_COMPLETE(c->disk_sb.sb) != le16_to_cpu(c->disk_sb.sb->version)) {
write_sb = true;
}
- if (!test_bit(BCH_FS_error, &c->flags)) {
- struct bch_sb_field_ext *ext = bch2_sb_field_get(c->disk_sb.sb, ext);
- if (ext &&
- (!bch2_is_zero(ext->recovery_passes_required, sizeof(ext->recovery_passes_required)) ||
- !bch2_is_zero(ext->errors_silent, sizeof(ext->errors_silent)))) {
- memset(ext->recovery_passes_required, 0, sizeof(ext->recovery_passes_required));
- memset(ext->errors_silent, 0, sizeof(ext->errors_silent));
- write_sb = true;
- }
+ if (!test_bit(BCH_FS_error, &c->flags) &&
+ !bch2_is_zero(ext->errors_silent, sizeof(ext->errors_silent))) {
+ memset(ext->errors_silent, 0, sizeof(ext->errors_silent));
+ write_sb = true;
+ }
+
+ if (c->opts.fsck &&
+ !test_bit(BCH_FS_error, &c->flags) &&
+ c->recovery_pass_done == BCH_RECOVERY_PASS_NR - 1 &&
+ ext->btrees_lost_data) {
+ ext->btrees_lost_data = 0;
+ write_sb = true;
}
if (c->opts.fsck &&
out:
bch2_flush_fsck_errs(c);
- if (!c->opts.keep_journal &&
- test_bit(JOURNAL_REPLAY_DONE, &c->journal.flags))
+ if (!c->opts.retain_recovery_info) {
bch2_journal_keys_put_initial(c);
+ bch2_find_btree_nodes_exit(&c->found_btree_nodes);
+ }
kfree(clean);
if (!ret &&
int ret;
bch_notice(c, "initializing new filesystem");
+ set_bit(BCH_FS_new_fs, &c->flags);
mutex_lock(&c->sb_lock);
c->disk_sb.sb->compat[0] |= cpu_to_le64(1ULL << BCH_COMPAT_extents_above_btree_updates_done);
}
mutex_unlock(&c->sb_lock);
- c->curr_recovery_pass = ARRAY_SIZE(recovery_pass_fns);
+ c->curr_recovery_pass = BCH_RECOVERY_PASS_NR;
set_bit(BCH_FS_may_go_rw, &c->flags);
for (unsigned i = 0; i < BTREE_ID_NR; i++)
- bch2_btree_root_alloc(c, i);
+ bch2_btree_root_alloc_fake(c, i, 0);
for_each_member_device(c, ca)
bch2_dev_usage_init(ca);
if (ret)
goto err;
- c->recovery_pass_done = ARRAY_SIZE(recovery_pass_fns) - 1;
+ c->recovery_pass_done = BCH_RECOVERY_PASS_NR - 1;
if (enabled_qtypes(c)) {
ret = bch2_fs_quota_read(c);
#ifndef _BCACHEFS_RECOVERY_H
#define _BCACHEFS_RECOVERY_H
-extern const char * const bch2_recovery_passes[];
+void bch2_btree_lost_data(struct bch_fs *, enum btree_id);
-u64 bch2_recovery_passes_to_stable(u64 v);
-u64 bch2_recovery_passes_from_stable(u64 v);
-
-/*
- * For when we need to rewind recovery passes and run a pass we skipped:
- */
-static inline int bch2_run_explicit_recovery_pass(struct bch_fs *c,
- enum bch_recovery_pass pass)
-{
- if (c->recovery_passes_explicit & BIT_ULL(pass))
- return 0;
-
- bch_info(c, "running explicit recovery pass %s (%u), currently at %s (%u)",
- bch2_recovery_passes[pass], pass,
- bch2_recovery_passes[c->curr_recovery_pass], c->curr_recovery_pass);
-
- c->recovery_passes_explicit |= BIT_ULL(pass);
-
- if (c->curr_recovery_pass >= pass) {
- c->curr_recovery_pass = pass;
- c->recovery_passes_complete &= (1ULL << pass) >> 1;
- return -BCH_ERR_restart_recovery;
- } else {
- return 0;
- }
-}
-
-int bch2_run_online_recovery_passes(struct bch_fs *);
-u64 bch2_fsck_recovery_passes(void);
+int bch2_journal_replay(struct bch_fs *);
int bch2_fs_recovery(struct bch_fs *);
int bch2_fs_initialize(struct bch_fs *);
--- /dev/null
+// SPDX-License-Identifier: GPL-2.0
+
+#include "bcachefs.h"
+#include "alloc_background.h"
+#include "backpointers.h"
+#include "btree_gc.h"
+#include "btree_node_scan.h"
+#include "ec.h"
+#include "fsck.h"
+#include "inode.h"
+#include "journal.h"
+#include "lru.h"
+#include "logged_ops.h"
+#include "rebalance.h"
+#include "recovery.h"
+#include "recovery_passes.h"
+#include "snapshot.h"
+#include "subvolume.h"
+#include "super.h"
+#include "super-io.h"
+
+const char * const bch2_recovery_passes[] = {
+#define x(_fn, ...) #_fn,
+ BCH_RECOVERY_PASSES()
+#undef x
+ NULL
+};
+
+static int bch2_check_allocations(struct bch_fs *c)
+{
+ return bch2_gc(c, true, false);
+}
+
+static int bch2_set_may_go_rw(struct bch_fs *c)
+{
+ struct journal_keys *keys = &c->journal_keys;
+
+ /*
+ * After we go RW, the journal keys buffer can't be modified (except for
+ * setting journal_key->overwritten: it will be accessed by multiple
+ * threads
+ */
+ move_gap(keys, keys->nr);
+
+ set_bit(BCH_FS_may_go_rw, &c->flags);
+
+ if (keys->nr || c->opts.fsck || !c->sb.clean || c->recovery_passes_explicit)
+ return bch2_fs_read_write_early(c);
+ return 0;
+}
+
+struct recovery_pass_fn {
+ int (*fn)(struct bch_fs *);
+ unsigned when;
+};
+
+static struct recovery_pass_fn recovery_pass_fns[] = {
+#define x(_fn, _id, _when) { .fn = bch2_##_fn, .when = _when },
+ BCH_RECOVERY_PASSES()
+#undef x
+};
+
+static const u8 passes_to_stable_map[] = {
+#define x(n, id, ...) [BCH_RECOVERY_PASS_##n] = BCH_RECOVERY_PASS_STABLE_##n,
+ BCH_RECOVERY_PASSES()
+#undef x
+};
+
+static enum bch_recovery_pass_stable bch2_recovery_pass_to_stable(enum bch_recovery_pass pass)
+{
+ return passes_to_stable_map[pass];
+}
+
+u64 bch2_recovery_passes_to_stable(u64 v)
+{
+ u64 ret = 0;
+ for (unsigned i = 0; i < ARRAY_SIZE(passes_to_stable_map); i++)
+ if (v & BIT_ULL(i))
+ ret |= BIT_ULL(passes_to_stable_map[i]);
+ return ret;
+}
+
+u64 bch2_recovery_passes_from_stable(u64 v)
+{
+ static const u8 map[] = {
+#define x(n, id, ...) [BCH_RECOVERY_PASS_STABLE_##n] = BCH_RECOVERY_PASS_##n,
+ BCH_RECOVERY_PASSES()
+#undef x
+ };
+
+ u64 ret = 0;
+ for (unsigned i = 0; i < ARRAY_SIZE(map); i++)
+ if (v & BIT_ULL(i))
+ ret |= BIT_ULL(map[i]);
+ return ret;
+}
+
+/*
+ * For when we need to rewind recovery passes and run a pass we skipped:
+ */
+int bch2_run_explicit_recovery_pass(struct bch_fs *c,
+ enum bch_recovery_pass pass)
+{
+ if (c->recovery_passes_explicit & BIT_ULL(pass))
+ return 0;
+
+ bch_info(c, "running explicit recovery pass %s (%u), currently at %s (%u)",
+ bch2_recovery_passes[pass], pass,
+ bch2_recovery_passes[c->curr_recovery_pass], c->curr_recovery_pass);
+
+ c->recovery_passes_explicit |= BIT_ULL(pass);
+
+ if (c->curr_recovery_pass >= pass) {
+ c->curr_recovery_pass = pass;
+ c->recovery_passes_complete &= (1ULL << pass) >> 1;
+ return -BCH_ERR_restart_recovery;
+ } else {
+ return 0;
+ }
+}
+
+int bch2_run_explicit_recovery_pass_persistent(struct bch_fs *c,
+ enum bch_recovery_pass pass)
+{
+ enum bch_recovery_pass_stable s = bch2_recovery_pass_to_stable(pass);
+
+ mutex_lock(&c->sb_lock);
+ struct bch_sb_field_ext *ext = bch2_sb_field_get(c->disk_sb.sb, ext);
+
+ if (!test_bit_le64(s, ext->recovery_passes_required)) {
+ __set_bit_le64(s, ext->recovery_passes_required);
+ bch2_write_super(c);
+ }
+ mutex_unlock(&c->sb_lock);
+
+ return bch2_run_explicit_recovery_pass(c, pass);
+}
+
+static void bch2_clear_recovery_pass_required(struct bch_fs *c,
+ enum bch_recovery_pass pass)
+{
+ enum bch_recovery_pass_stable s = bch2_recovery_pass_to_stable(pass);
+
+ mutex_lock(&c->sb_lock);
+ struct bch_sb_field_ext *ext = bch2_sb_field_get(c->disk_sb.sb, ext);
+
+ if (test_bit_le64(s, ext->recovery_passes_required)) {
+ __clear_bit_le64(s, ext->recovery_passes_required);
+ bch2_write_super(c);
+ }
+ mutex_unlock(&c->sb_lock);
+}
+
+u64 bch2_fsck_recovery_passes(void)
+{
+ u64 ret = 0;
+
+ for (unsigned i = 0; i < ARRAY_SIZE(recovery_pass_fns); i++)
+ if (recovery_pass_fns[i].when & PASS_FSCK)
+ ret |= BIT_ULL(i);
+ return ret;
+}
+
+static bool should_run_recovery_pass(struct bch_fs *c, enum bch_recovery_pass pass)
+{
+ struct recovery_pass_fn *p = recovery_pass_fns + pass;
+
+ if (c->recovery_passes_explicit & BIT_ULL(pass))
+ return true;
+ if ((p->when & PASS_FSCK) && c->opts.fsck)
+ return true;
+ if ((p->when & PASS_UNCLEAN) && !c->sb.clean)
+ return true;
+ if (p->when & PASS_ALWAYS)
+ return true;
+ return false;
+}
+
+static int bch2_run_recovery_pass(struct bch_fs *c, enum bch_recovery_pass pass)
+{
+ struct recovery_pass_fn *p = recovery_pass_fns + pass;
+ int ret;
+
+ if (!(p->when & PASS_SILENT))
+ bch2_print(c, KERN_INFO bch2_log_msg(c, "%s..."),
+ bch2_recovery_passes[pass]);
+ ret = p->fn(c);
+ if (ret)
+ return ret;
+ if (!(p->when & PASS_SILENT))
+ bch2_print(c, KERN_CONT " done\n");
+
+ return 0;
+}
+
+int bch2_run_online_recovery_passes(struct bch_fs *c)
+{
+ int ret = 0;
+
+ for (unsigned i = 0; i < ARRAY_SIZE(recovery_pass_fns); i++) {
+ struct recovery_pass_fn *p = recovery_pass_fns + i;
+
+ if (!(p->when & PASS_ONLINE))
+ continue;
+
+ ret = bch2_run_recovery_pass(c, i);
+ if (bch2_err_matches(ret, BCH_ERR_restart_recovery)) {
+ i = c->curr_recovery_pass;
+ continue;
+ }
+ if (ret)
+ break;
+ }
+
+ return ret;
+}
+
+int bch2_run_recovery_passes(struct bch_fs *c)
+{
+ int ret = 0;
+
+ while (c->curr_recovery_pass < ARRAY_SIZE(recovery_pass_fns)) {
+ if (c->opts.recovery_pass_last &&
+ c->curr_recovery_pass > c->opts.recovery_pass_last)
+ break;
+
+ if (should_run_recovery_pass(c, c->curr_recovery_pass)) {
+ unsigned pass = c->curr_recovery_pass;
+
+ ret = bch2_run_recovery_pass(c, c->curr_recovery_pass);
+ if (bch2_err_matches(ret, BCH_ERR_restart_recovery) ||
+ (ret && c->curr_recovery_pass < pass))
+ continue;
+ if (ret)
+ break;
+
+ c->recovery_passes_complete |= BIT_ULL(c->curr_recovery_pass);
+ }
+
+ c->recovery_pass_done = max(c->recovery_pass_done, c->curr_recovery_pass);
+
+ if (!test_bit(BCH_FS_error, &c->flags))
+ bch2_clear_recovery_pass_required(c, c->curr_recovery_pass);
+
+ c->curr_recovery_pass++;
+ }
+
+ return ret;
+}
--- /dev/null
+#ifndef _BCACHEFS_RECOVERY_PASSES_H
+#define _BCACHEFS_RECOVERY_PASSES_H
+
+extern const char * const bch2_recovery_passes[];
+
+u64 bch2_recovery_passes_to_stable(u64 v);
+u64 bch2_recovery_passes_from_stable(u64 v);
+
+u64 bch2_fsck_recovery_passes(void);
+
+int bch2_run_explicit_recovery_pass(struct bch_fs *, enum bch_recovery_pass);
+int bch2_run_explicit_recovery_pass_persistent(struct bch_fs *, enum bch_recovery_pass);
+
+int bch2_run_online_recovery_passes(struct bch_fs *);
+int bch2_run_recovery_passes(struct bch_fs *);
+
+#endif /* _BCACHEFS_RECOVERY_PASSES_H */
/* SPDX-License-Identifier: GPL-2.0 */
-#ifndef _BCACHEFS_RECOVERY_TYPES_H
-#define _BCACHEFS_RECOVERY_TYPES_H
+#ifndef _BCACHEFS_RECOVERY_PASSES_TYPES_H
+#define _BCACHEFS_RECOVERY_PASSES_TYPES_H
#define PASS_SILENT BIT(0)
#define PASS_FSCK BIT(1)
* must never change:
*/
#define BCH_RECOVERY_PASSES() \
+ x(scan_for_btree_nodes, 37, 0) \
x(check_topology, 4, 0) \
x(alloc_read, 0, PASS_ALWAYS) \
x(stripes_read, 1, PASS_ALWAYS) \
x(check_alloc_to_lru_refs, 15, PASS_ONLINE|PASS_FSCK) \
x(fs_freespace_init, 16, PASS_ALWAYS|PASS_SILENT) \
x(bucket_gens_init, 17, 0) \
+ x(reconstruct_snapshots, 38, 0) \
x(check_snapshot_trees, 18, PASS_ONLINE|PASS_FSCK) \
x(check_snapshots, 19, PASS_ONLINE|PASS_FSCK) \
x(check_subvols, 20, PASS_ONLINE|PASS_FSCK) \
x(check_subvol_children, 35, PASS_ONLINE|PASS_FSCK) \
x(delete_dead_snapshots, 21, PASS_ONLINE|PASS_FSCK) \
x(fs_upgrade_for_subvolumes, 22, 0) \
- x(resume_logged_ops, 23, PASS_ALWAYS) \
x(check_inodes, 24, PASS_FSCK) \
x(check_extents, 25, PASS_FSCK) \
x(check_indirect_extents, 26, PASS_FSCK) \
x(check_subvolume_structure, 36, PASS_ONLINE|PASS_FSCK) \
x(check_directory_structure, 30, PASS_ONLINE|PASS_FSCK) \
x(check_nlinks, 31, PASS_FSCK) \
+ x(resume_logged_ops, 23, PASS_ALWAYS) \
x(delete_dead_inodes, 32, PASS_FSCK|PASS_UNCLEAN) \
x(fix_reflink_p, 33, 0) \
x(set_fs_needs_rebalance, 34, 0) \
#define x(n, id, when) BCH_RECOVERY_PASS_##n,
BCH_RECOVERY_PASSES()
#undef x
+ BCH_RECOVERY_PASS_NR
};
/* But we also need stable identifiers that can be used in the superblock */
#undef x
};
-#endif /* _BCACHEFS_RECOVERY_TYPES_H */
+#endif /* _BCACHEFS_RECOVERY_PASSES_TYPES_H */
} else {
bkey_error_init(update);
update->k.p = p.k->p;
- update->k.p.offset = next_idx;
- update->k.size = next_idx - *idx;
+ update->k.size = p.k->size;
set_bkey_val_u64s(&update->k, 0);
}
#include "replicas.h"
#include "super-io.h"
+#include <linux/sort.h>
+
static int bch2_cpu_replicas_to_sb_replicas(struct bch_fs *,
struct bch_replicas_cpu *);
/* Some (buggy!) compilers don't allow memcmp to be passed as a pointer */
-static int bch2_memcmp(const void *l, const void *r, size_t size)
+static int bch2_memcmp(const void *l, const void *r, const void *priv)
{
+ size_t size = (size_t) priv;
return memcmp(l, r, size);
}
static void bch2_cpu_replicas_sort(struct bch_replicas_cpu *r)
{
- eytzinger0_sort(r->entries, r->nr, r->entry_size, bch2_memcmp, NULL);
+ eytzinger0_sort_r(r->entries, r->nr, r->entry_size,
+ bch2_memcmp, NULL, (void *)(size_t)r->entry_size);
}
static void bch2_replicas_entry_v0_to_text(struct printbuf *out,
verify_replicas_entry(search);
-#define entry_cmp(_l, _r, size) memcmp(_l, _r, entry_size)
+#define entry_cmp(_l, _r) memcmp(_l, _r, entry_size)
idx = eytzinger0_find(r->entries, r->nr, r->entry_size,
entry_cmp, search);
#undef entry_cmp
{
unsigned i;
- sort_cmp_size(cpu_r->entries,
- cpu_r->nr,
- cpu_r->entry_size,
- bch2_memcmp, NULL);
+ sort_r(cpu_r->entries,
+ cpu_r->nr,
+ cpu_r->entry_size,
+ bch2_memcmp, NULL,
+ (void *)(size_t)cpu_r->entry_size);
for (i = 0; i < cpu_r->nr; i++) {
struct bch_replicas_entry_v1 *e =
for (entry = clean->start;
entry < (struct jset_entry *) vstruct_end(&clean->field);
entry = vstruct_next(entry)) {
+ if (vstruct_end(entry) > vstruct_end(&clean->field)) {
+ bch_err(c, "journal entry (u64s %u) overran end of superblock clean section (u64s %u) by %zu",
+ le16_to_cpu(entry->u64s), le32_to_cpu(clean->field.u64s),
+ (u64 *) vstruct_end(entry) - (u64 *) vstruct_end(&clean->field));
+ bch2_sb_error_count(c, BCH_FSCK_ERR_sb_clean_entry_overrun);
+ return -BCH_ERR_fsck_repair_unimplemented;
+ }
+
ret = bch2_journal_entry_validate(c, NULL, entry,
le16_to_cpu(c->disk_sb.sb->version),
BCH_SB_BIG_ENDIAN(c->disk_sb.sb),
#include "bcachefs.h"
#include "darray.h"
-#include "recovery.h"
+#include "recovery_passes.h"
#include "sb-downgrade.h"
#include "sb-errors.h"
#include "super-io.h"
BCH_FSCK_ERR_subvol_fs_path_parent_wrong) \
x(btree_subvolume_children, \
BIT_ULL(BCH_RECOVERY_PASS_check_subvols), \
- BCH_FSCK_ERR_subvol_children_not_set)
+ BCH_FSCK_ERR_subvol_children_not_set) \
+ x(mi_btree_bitmap, \
+ BIT_ULL(BCH_RECOVERY_PASS_check_allocations), \
+ BCH_FSCK_ERR_btree_bitmap_not_marked)
#define DOWNGRADE_TABLE()
x(bucket_gens_nonzero_for_invalid_buckets, 122) \
x(need_discard_freespace_key_to_invalid_dev_bucket, 123) \
x(need_discard_freespace_key_bad, 124) \
- x(backpointer_pos_wrong, 125) \
+ x(backpointer_bucket_offset_wrong, 125) \
x(backpointer_to_missing_device, 126) \
x(backpointer_to_missing_alloc, 127) \
x(backpointer_to_missing_ptr, 128) \
x(subvol_children_bad, 257) \
x(subvol_loop, 258) \
x(subvol_unreachable, 259) \
- x(btree_node_bkey_bad_u64s, 260)
+ x(btree_node_bkey_bad_u64s, 260) \
+ x(btree_node_topology_empty_interior_node, 261) \
+ x(btree_ptr_v2_min_key_bad, 262) \
+ x(btree_root_unreadable_and_scan_found_nothing, 263) \
+ x(snapshot_node_missing, 264) \
+ x(dup_backpointer_to_bad_csum_extent, 265) \
+ x(btree_bitmap_not_marked, 266) \
+ x(sb_clean_entry_overrun, 267)
enum bch_sb_error_id {
#define x(t, n) BCH_FSCK_ERR_##t = n,
// SPDX-License-Identifier: GPL-2.0
#include "bcachefs.h"
+#include "btree_cache.h"
#include "disk_groups.h"
#include "opts.h"
#include "replicas.h"
bch2_write_super(c);
mutex_unlock(&c->sb_lock);
}
+
+/*
+ * Per member "range has btree nodes" bitmap:
+ *
+ * This is so that if we ever have to run the btree node scan to repair we don't
+ * have to scan full devices:
+ */
+
+bool bch2_dev_btree_bitmap_marked(struct bch_fs *c, struct bkey_s_c k)
+{
+ bkey_for_each_ptr(bch2_bkey_ptrs_c(k), ptr)
+ if (!bch2_dev_btree_bitmap_marked_sectors(bch_dev_bkey_exists(c, ptr->dev),
+ ptr->offset, btree_sectors(c)))
+ return false;
+ return true;
+}
+
+static void __bch2_dev_btree_bitmap_mark(struct bch_sb_field_members_v2 *mi, unsigned dev,
+ u64 start, unsigned sectors)
+{
+ struct bch_member *m = __bch2_members_v2_get_mut(mi, dev);
+ u64 bitmap = le64_to_cpu(m->btree_allocated_bitmap);
+
+ u64 end = start + sectors;
+
+ int resize = ilog2(roundup_pow_of_two(end)) - (m->btree_bitmap_shift + 6);
+ if (resize > 0) {
+ u64 new_bitmap = 0;
+
+ for (unsigned i = 0; i < 64; i++)
+ if (bitmap & BIT_ULL(i))
+ new_bitmap |= BIT_ULL(i >> resize);
+ bitmap = new_bitmap;
+ m->btree_bitmap_shift += resize;
+ }
+
+ for (unsigned bit = start >> m->btree_bitmap_shift;
+ (u64) bit << m->btree_bitmap_shift < end;
+ bit++)
+ bitmap |= BIT_ULL(bit);
+
+ m->btree_allocated_bitmap = cpu_to_le64(bitmap);
+}
+
+void bch2_dev_btree_bitmap_mark(struct bch_fs *c, struct bkey_s_c k)
+{
+ lockdep_assert_held(&c->sb_lock);
+
+ struct bch_sb_field_members_v2 *mi = bch2_sb_field_get(c->disk_sb.sb, members_v2);
+ bkey_for_each_ptr(bch2_bkey_ptrs_c(k), ptr)
+ __bch2_dev_btree_bitmap_mark(mi, ptr->dev, ptr->offset, btree_sectors(c));
+}
#define _BCACHEFS_SB_MEMBERS_H
#include "darray.h"
+#include "bkey_types.h"
extern char * const bch2_member_error_strs[];
: 1,
.freespace_initialized = BCH_MEMBER_FREESPACE_INITIALIZED(mi),
.valid = bch2_member_exists(mi),
+ .btree_bitmap_shift = mi->btree_bitmap_shift,
+ .btree_allocated_bitmap = le64_to_cpu(mi->btree_allocated_bitmap),
};
}
void bch2_dev_io_errors_to_text(struct printbuf *, struct bch_dev *);
void bch2_dev_errors_reset(struct bch_dev *);
+static inline bool bch2_dev_btree_bitmap_marked_sectors(struct bch_dev *ca, u64 start, unsigned sectors)
+{
+ u64 end = start + sectors;
+
+ if (end > 64ULL << ca->mi.btree_bitmap_shift)
+ return false;
+
+ for (unsigned bit = start >> ca->mi.btree_bitmap_shift;
+ (u64) bit << ca->mi.btree_bitmap_shift < end;
+ bit++)
+ if (!(ca->mi.btree_allocated_bitmap & BIT_ULL(bit)))
+ return false;
+ return true;
+}
+
+bool bch2_dev_btree_bitmap_marked(struct bch_fs *, struct bkey_s_c);
+void bch2_dev_btree_bitmap_mark(struct bch_fs *, struct bkey_s_c);
+
#endif /* _BCACHEFS_SB_MEMBERS_H */
#include "errcode.h"
#include "error.h"
#include "fs.h"
+#include "recovery_passes.h"
#include "snapshot.h"
#include <linux/random.h>
static bool __bch2_snapshot_is_ancestor_early(struct snapshot_table *t, u32 id, u32 ancestor)
{
- while (id && id < ancestor)
- id = __snapshot_t(t, id)->parent;
+ while (id && id < ancestor) {
+ const struct snapshot_t *s = __snapshot_t(t, id);
+ id = s ? s->parent : 0;
+ }
return id == ancestor;
}
static inline u32 get_ancestor_below(struct snapshot_table *t, u32 id, u32 ancestor)
{
const struct snapshot_t *s = __snapshot_t(t, id);
+ if (!s)
+ return 0;
if (s->skip[2] <= ancestor)
return s->skip[2];
return s->parent;
}
+static bool test_ancestor_bitmap(struct snapshot_table *t, u32 id, u32 ancestor)
+{
+ const struct snapshot_t *s = __snapshot_t(t, id);
+ if (!s)
+ return false;
+
+ return test_bit(ancestor - id - 1, s->is_ancestor);
+}
+
bool __bch2_snapshot_is_ancestor(struct bch_fs *c, u32 id, u32 ancestor)
{
bool ret;
rcu_read_lock();
struct snapshot_table *t = rcu_dereference(c->snapshots);
- if (unlikely(c->recovery_pass_done <= BCH_RECOVERY_PASS_check_snapshots)) {
+ if (unlikely(c->recovery_pass_done < BCH_RECOVERY_PASS_check_snapshots)) {
ret = __bch2_snapshot_is_ancestor_early(t, id, ancestor);
goto out;
}
while (id && id < ancestor - IS_ANCESTOR_BITMAP)
id = get_ancestor_below(t, id, ancestor);
- if (id && id < ancestor) {
- ret = test_bit(ancestor - id - 1, __snapshot_t(t, id)->is_ancestor);
+ ret = id && id < ancestor
+ ? test_ancestor_bitmap(t, id, ancestor)
+ : id == ancestor;
- EBUG_ON(ret != __bch2_snapshot_is_ancestor_early(t, id, ancestor));
- } else {
- ret = id == ancestor;
- }
+ EBUG_ON(ret != __bch2_snapshot_is_ancestor_early(t, id, ancestor));
out:
rcu_read_unlock();
static noinline struct snapshot_t *__snapshot_t_mut(struct bch_fs *c, u32 id)
{
size_t idx = U32_MAX - id;
- size_t new_size;
struct snapshot_table *new, *old;
- new_size = max(16UL, roundup_pow_of_two(idx + 1));
+ size_t new_bytes = kmalloc_size_roundup(struct_size(new, s, idx + 1));
+ size_t new_size = (new_bytes - sizeof(*new)) / sizeof(new->s[0]);
- new = kvzalloc(struct_size(new, s, new_size), GFP_KERNEL);
+ new = kvzalloc(new_bytes, GFP_KERNEL);
if (!new)
return NULL;
+ new->nr = new_size;
+
old = rcu_dereference_protected(c->snapshots, true);
if (old)
- memcpy(new->s,
- rcu_dereference_protected(c->snapshots, true)->s,
- sizeof(new->s[0]) * c->snapshot_table_size);
+ memcpy(new->s, old->s, sizeof(old->s[0]) * old->nr);
rcu_assign_pointer(c->snapshots, new);
- c->snapshot_table_size = new_size;
- kvfree_rcu_mightsleep(old);
+ kvfree_rcu(old, rcu);
- return &rcu_dereference_protected(c->snapshots, true)->s[idx];
+ return &rcu_dereference_protected(c->snapshots,
+ lockdep_is_held(&c->snapshot_table_lock))->s[idx];
}
static inline struct snapshot_t *snapshot_t_mut(struct bch_fs *c, u32 id)
{
size_t idx = U32_MAX - id;
+ struct snapshot_table *table =
+ rcu_dereference_protected(c->snapshots,
+ lockdep_is_held(&c->snapshot_table_lock));
lockdep_assert_held(&c->snapshot_table_lock);
- if (likely(idx < c->snapshot_table_size))
- return &rcu_dereference_protected(c->snapshots, true)->s[idx];
+ if (likely(table && idx < table->nr))
+ return &table->s[idx];
return __snapshot_t_mut(c, id);
}
u32 subvol_id;
ret = bch2_snapshot_tree_master_subvol(trans, root_id, &subvol_id);
+ bch_err_fn(c, ret);
+
+ if (bch2_err_matches(ret, ENOENT)) { /* nothing to be done here */
+ ret = 0;
+ goto err;
+ }
+
if (ret)
goto err;
u32 parent_id = bch2_snapshot_parent_early(c, k.k->p.offset);
u32 real_depth;
struct printbuf buf = PRINTBUF;
- bool should_have_subvol;
u32 i, id;
int ret = 0;
}
}
- should_have_subvol = BCH_SNAPSHOT_SUBVOL(&s) &&
+ bool should_have_subvol = BCH_SNAPSHOT_SUBVOL(&s) &&
!BCH_SNAPSHOT_DELETED(&s);
if (should_have_subvol) {
return ret;
}
+static int check_snapshot_exists(struct btree_trans *trans, u32 id)
+{
+ struct bch_fs *c = trans->c;
+
+ if (bch2_snapshot_equiv(c, id))
+ return 0;
+
+ u32 tree_id;
+ int ret = bch2_snapshot_tree_create(trans, id, 0, &tree_id);
+ if (ret)
+ return ret;
+
+ struct bkey_i_snapshot *snapshot = bch2_trans_kmalloc(trans, sizeof(*snapshot));
+ ret = PTR_ERR_OR_ZERO(snapshot);
+ if (ret)
+ return ret;
+
+ bkey_snapshot_init(&snapshot->k_i);
+ snapshot->k.p = POS(0, id);
+ snapshot->v.tree = cpu_to_le32(tree_id);
+ snapshot->v.btime.lo = cpu_to_le64(bch2_current_time(c));
+
+ return bch2_btree_insert_trans(trans, BTREE_ID_snapshots, &snapshot->k_i, 0) ?:
+ bch2_mark_snapshot(trans, BTREE_ID_snapshots, 0,
+ bkey_s_c_null, bkey_i_to_s(&snapshot->k_i), 0) ?:
+ bch2_snapshot_set_equiv(trans, bkey_i_to_s_c(&snapshot->k_i));
+}
+
+/* Figure out which snapshot nodes belong in the same tree: */
+struct snapshot_tree_reconstruct {
+ enum btree_id btree;
+ struct bpos cur_pos;
+ snapshot_id_list cur_ids;
+ DARRAY(snapshot_id_list) trees;
+};
+
+static void snapshot_tree_reconstruct_exit(struct snapshot_tree_reconstruct *r)
+{
+ darray_for_each(r->trees, i)
+ darray_exit(i);
+ darray_exit(&r->trees);
+ darray_exit(&r->cur_ids);
+}
+
+static inline bool same_snapshot(struct snapshot_tree_reconstruct *r, struct bpos pos)
+{
+ return r->btree == BTREE_ID_inodes
+ ? r->cur_pos.offset == pos.offset
+ : r->cur_pos.inode == pos.inode;
+}
+
+static inline bool snapshot_id_lists_have_common(snapshot_id_list *l, snapshot_id_list *r)
+{
+ darray_for_each(*l, i)
+ if (snapshot_list_has_id(r, *i))
+ return true;
+ return false;
+}
+
+static void snapshot_id_list_to_text(struct printbuf *out, snapshot_id_list *s)
+{
+ bool first = true;
+ darray_for_each(*s, i) {
+ if (!first)
+ prt_char(out, ' ');
+ first = false;
+ prt_printf(out, "%u", *i);
+ }
+}
+
+static int snapshot_tree_reconstruct_next(struct bch_fs *c, struct snapshot_tree_reconstruct *r)
+{
+ if (r->cur_ids.nr) {
+ darray_for_each(r->trees, i)
+ if (snapshot_id_lists_have_common(i, &r->cur_ids)) {
+ int ret = snapshot_list_merge(c, i, &r->cur_ids);
+ if (ret)
+ return ret;
+ goto out;
+ }
+ darray_push(&r->trees, r->cur_ids);
+ darray_init(&r->cur_ids);
+ }
+out:
+ r->cur_ids.nr = 0;
+ return 0;
+}
+
+static int get_snapshot_trees(struct bch_fs *c, struct snapshot_tree_reconstruct *r, struct bpos pos)
+{
+ if (!same_snapshot(r, pos))
+ snapshot_tree_reconstruct_next(c, r);
+ r->cur_pos = pos;
+ return snapshot_list_add_nodup(c, &r->cur_ids, pos.snapshot);
+}
+
+int bch2_reconstruct_snapshots(struct bch_fs *c)
+{
+ struct btree_trans *trans = bch2_trans_get(c);
+ struct printbuf buf = PRINTBUF;
+ struct snapshot_tree_reconstruct r = {};
+ int ret = 0;
+
+ for (unsigned btree = 0; btree < BTREE_ID_NR; btree++) {
+ if (btree_type_has_snapshots(btree)) {
+ r.btree = btree;
+
+ ret = for_each_btree_key(trans, iter, btree, POS_MIN,
+ BTREE_ITER_ALL_SNAPSHOTS|BTREE_ITER_PREFETCH, k, ({
+ get_snapshot_trees(c, &r, k.k->p);
+ }));
+ if (ret)
+ goto err;
+
+ snapshot_tree_reconstruct_next(c, &r);
+ }
+ }
+
+ darray_for_each(r.trees, t) {
+ printbuf_reset(&buf);
+ snapshot_id_list_to_text(&buf, t);
+
+ darray_for_each(*t, id) {
+ if (fsck_err_on(!bch2_snapshot_equiv(c, *id),
+ c, snapshot_node_missing,
+ "snapshot node %u from tree %s missing", *id, buf.buf)) {
+ if (t->nr > 1) {
+ bch_err(c, "cannot reconstruct snapshot trees with multiple nodes");
+ ret = -BCH_ERR_fsck_repair_unimplemented;
+ goto err;
+ }
+
+ ret = commit_do(trans, NULL, NULL, BCH_TRANS_COMMIT_no_enospc,
+ check_snapshot_exists(trans, *id));
+ if (ret)
+ goto err;
+ }
+ }
+ }
+fsck_err:
+err:
+ bch2_trans_put(trans);
+ snapshot_tree_reconstruct_exit(&r);
+ printbuf_exit(&buf);
+ bch_err_fn(c, ret);
+ return ret;
+}
+
/*
* Mark a snapshot as deleted, for future cleanup:
*/
POS_MIN, 0, k,
(set_is_ancestor_bitmap(c, k.k->p.offset), 0)));
bch_err_fn(c, ret);
+
+ /*
+ * It's important that we check if we need to reconstruct snapshots
+ * before going RW, so we mark that pass as required in the superblock -
+ * otherwise, we could end up deleting keys with missing snapshot nodes
+ * instead
+ */
+ BUG_ON(!test_bit(BCH_FS_new_fs, &c->flags) &&
+ test_bit(BCH_FS_may_go_rw, &c->flags));
+
+ if (bch2_err_matches(ret, EIO) ||
+ (c->sb.btrees_lost_data & BIT_ULL(BTREE_ID_snapshots)))
+ ret = bch2_run_explicit_recovery_pass_persistent(c, BCH_RECOVERY_PASS_reconstruct_snapshots);
+
return ret;
}
static inline struct snapshot_t *__snapshot_t(struct snapshot_table *t, u32 id)
{
- return &t->s[U32_MAX - id];
+ u32 idx = U32_MAX - id;
+
+ return likely(t && idx < t->nr)
+ ? &t->s[idx]
+ : NULL;
}
static inline const struct snapshot_t *snapshot_t(struct bch_fs *c, u32 id)
static inline u32 bch2_snapshot_tree(struct bch_fs *c, u32 id)
{
rcu_read_lock();
- id = snapshot_t(c, id)->tree;
+ const struct snapshot_t *s = snapshot_t(c, id);
+ id = s ? s->tree : 0;
rcu_read_unlock();
return id;
static inline u32 __bch2_snapshot_parent_early(struct bch_fs *c, u32 id)
{
- return snapshot_t(c, id)->parent;
+ const struct snapshot_t *s = snapshot_t(c, id);
+ return s ? s->parent : 0;
}
static inline u32 bch2_snapshot_parent_early(struct bch_fs *c, u32 id)
static inline u32 __bch2_snapshot_parent(struct bch_fs *c, u32 id)
{
-#ifdef CONFIG_BCACHEFS_DEBUG
- u32 parent = snapshot_t(c, id)->parent;
+ const struct snapshot_t *s = snapshot_t(c, id);
+ if (!s)
+ return 0;
- if (parent &&
- snapshot_t(c, id)->depth != snapshot_t(c, parent)->depth + 1)
+ u32 parent = s->parent;
+ if (IS_ENABLED(CONFIG_BCACHEFS_DEBU) &&
+ parent &&
+ s->depth != snapshot_t(c, parent)->depth + 1)
panic("id %u depth=%u parent %u depth=%u\n",
id, snapshot_t(c, id)->depth,
parent, snapshot_t(c, parent)->depth);
return parent;
-#else
- return snapshot_t(c, id)->parent;
-#endif
}
static inline u32 bch2_snapshot_parent(struct bch_fs *c, u32 id)
static inline u32 __bch2_snapshot_equiv(struct bch_fs *c, u32 id)
{
- return snapshot_t(c, id)->equiv;
+ const struct snapshot_t *s = snapshot_t(c, id);
+ return s ? s->equiv : 0;
}
static inline u32 bch2_snapshot_equiv(struct bch_fs *c, u32 id)
return id == bch2_snapshot_equiv(c, id);
}
-static inline bool bch2_snapshot_is_internal_node(struct bch_fs *c, u32 id)
+static inline int bch2_snapshot_is_internal_node(struct bch_fs *c, u32 id)
{
- const struct snapshot_t *s;
- bool ret;
-
rcu_read_lock();
- s = snapshot_t(c, id);
- ret = s->children[0];
+ const struct snapshot_t *s = snapshot_t(c, id);
+ int ret = s ? s->children[0] : -BCH_ERR_invalid_snapshot_node;
rcu_read_unlock();
return ret;
}
-static inline u32 bch2_snapshot_is_leaf(struct bch_fs *c, u32 id)
-{
- return !bch2_snapshot_is_internal_node(c, id);
-}
-
-static inline u32 bch2_snapshot_sibling(struct bch_fs *c, u32 id)
+static inline int bch2_snapshot_is_leaf(struct bch_fs *c, u32 id)
{
- const struct snapshot_t *s;
- u32 parent = __bch2_snapshot_parent(c, id);
-
- if (!parent)
- return 0;
-
- s = snapshot_t(c, __bch2_snapshot_parent(c, id));
- if (id == s->children[0])
- return s->children[1];
- if (id == s->children[1])
- return s->children[0];
- return 0;
+ int ret = bch2_snapshot_is_internal_node(c, id);
+ if (ret < 0)
+ return ret;
+ return !ret;
}
static inline u32 bch2_snapshot_depth(struct bch_fs *c, u32 parent)
static inline int snapshot_list_add(struct bch_fs *c, snapshot_id_list *s, u32 id)
{
- int ret;
-
BUG_ON(snapshot_list_has_id(s, id));
- ret = darray_push(s, id);
+ int ret = darray_push(s, id);
if (ret)
bch_err(c, "error reallocating snapshot_id_list (size %zu)", s->size);
return ret;
}
+static inline int snapshot_list_add_nodup(struct bch_fs *c, snapshot_id_list *s, u32 id)
+{
+ int ret = snapshot_list_has_id(s, id)
+ ? 0
+ : darray_push(s, id);
+ if (ret)
+ bch_err(c, "error reallocating snapshot_id_list (size %zu)", s->size);
+ return ret;
+}
+
+static inline int snapshot_list_merge(struct bch_fs *c, snapshot_id_list *dst, snapshot_id_list *src)
+{
+ darray_for_each(*src, i) {
+ int ret = snapshot_list_add_nodup(c, dst, *i);
+ if (ret)
+ return ret;
+ }
+
+ return 0;
+}
+
int bch2_snapshot_lookup(struct btree_trans *trans, u32 id,
struct bch_snapshot *s);
int bch2_snapshot_get_subvol(struct btree_trans *, u32,
int bch2_check_snapshot_trees(struct bch_fs *);
int bch2_check_snapshots(struct bch_fs *);
+int bch2_reconstruct_snapshots(struct bch_fs *);
int bch2_snapshot_node_set_deleted(struct btree_trans *, u32);
void bch2_delete_dead_snapshots_work(struct work_struct *);
struct bpos pos)
{
if (!btree_type_has_snapshots(id) ||
- bch2_snapshot_is_leaf(trans->c, pos.snapshot))
+ bch2_snapshot_is_leaf(trans->c, pos.snapshot) > 0)
return 0;
return __bch2_key_has_snapshot_overwrites(trans, id, pos);
return ret;
}
+int bch2_initialize_subvolumes(struct bch_fs *c)
+{
+ struct bkey_i_snapshot_tree root_tree;
+ struct bkey_i_snapshot root_snapshot;
+ struct bkey_i_subvolume root_volume;
+ int ret;
+
+ bkey_snapshot_tree_init(&root_tree.k_i);
+ root_tree.k.p.offset = 1;
+ root_tree.v.master_subvol = cpu_to_le32(1);
+ root_tree.v.root_snapshot = cpu_to_le32(U32_MAX);
+
+ bkey_snapshot_init(&root_snapshot.k_i);
+ root_snapshot.k.p.offset = U32_MAX;
+ root_snapshot.v.flags = 0;
+ root_snapshot.v.parent = 0;
+ root_snapshot.v.subvol = cpu_to_le32(BCACHEFS_ROOT_SUBVOL);
+ root_snapshot.v.tree = cpu_to_le32(1);
+ SET_BCH_SNAPSHOT_SUBVOL(&root_snapshot.v, true);
+
+ bkey_subvolume_init(&root_volume.k_i);
+ root_volume.k.p.offset = BCACHEFS_ROOT_SUBVOL;
+ root_volume.v.flags = 0;
+ root_volume.v.snapshot = cpu_to_le32(U32_MAX);
+ root_volume.v.inode = cpu_to_le64(BCACHEFS_ROOT_INO);
+
+ ret = bch2_btree_insert(c, BTREE_ID_snapshot_trees, &root_tree.k_i, NULL, 0) ?:
+ bch2_btree_insert(c, BTREE_ID_snapshots, &root_snapshot.k_i, NULL, 0) ?:
+ bch2_btree_insert(c, BTREE_ID_subvolumes, &root_volume.k_i, NULL, 0);
+ bch_err_fn(c, ret);
+ return ret;
+}
+
+static int __bch2_fs_upgrade_for_subvolumes(struct btree_trans *trans)
+{
+ struct btree_iter iter;
+ struct bkey_s_c k;
+ struct bch_inode_unpacked inode;
+ int ret;
+
+ k = bch2_bkey_get_iter(trans, &iter, BTREE_ID_inodes,
+ SPOS(0, BCACHEFS_ROOT_INO, U32_MAX), 0);
+ ret = bkey_err(k);
+ if (ret)
+ return ret;
+
+ if (!bkey_is_inode(k.k)) {
+ bch_err(trans->c, "root inode not found");
+ ret = -BCH_ERR_ENOENT_inode;
+ goto err;
+ }
+
+ ret = bch2_inode_unpack(k, &inode);
+ BUG_ON(ret);
+
+ inode.bi_subvol = BCACHEFS_ROOT_SUBVOL;
+
+ ret = bch2_inode_write(trans, &iter, &inode);
+err:
+ bch2_trans_iter_exit(trans, &iter);
+ return ret;
+}
+
+/* set bi_subvol on root inode */
+int bch2_fs_upgrade_for_subvolumes(struct bch_fs *c)
+{
+ int ret = bch2_trans_do(c, NULL, NULL, BCH_TRANS_COMMIT_lazy_rw,
+ __bch2_fs_upgrade_for_subvolumes(trans));
+ bch_err_fn(c, ret);
+ return ret;
+}
+
int bch2_fs_subvolumes_init(struct bch_fs *c)
{
INIT_WORK(&c->snapshot_delete_work, bch2_delete_dead_snapshots_work);
int bch2_subvolume_unlink(struct btree_trans *, u32);
int bch2_subvolume_create(struct btree_trans *, u64, u32, u32, u32 *, u32 *, bool);
+int bch2_initialize_subvolumes(struct bch_fs *);
+int bch2_fs_upgrade_for_subvolumes(struct bch_fs *);
+
int bch2_fs_subvolumes_init(struct bch_fs *);
#endif /* _BCACHEFS_SUBVOLUME_H */
};
struct snapshot_table {
+ struct rcu_head rcu;
+ size_t nr;
#ifndef RUST_BINDGEN
DECLARE_FLEX_ARRAY(struct snapshot_t, s);
#else
#include "journal.h"
#include "journal_sb.h"
#include "journal_seq_blacklist.h"
-#include "recovery.h"
+#include "recovery_passes.h"
#include "replicas.h"
#include "quota.h"
#include "sb-clean.h"
{
kfree(sb->bio);
if (!IS_ERR_OR_NULL(sb->s_bdev_file))
- fput(sb->s_bdev_file);
+ bdev_fput(sb->s_bdev_file);
kfree(sb->holder);
kfree(sb->sb_name);
memset(c->sb.errors_silent, 0, sizeof(c->sb.errors_silent));
struct bch_sb_field_ext *ext = bch2_sb_field_get(src, ext);
- if (ext)
+ if (ext) {
le_bitvector_to_cpu(c->sb.errors_silent, (void *) ext->errors_silent,
sizeof(c->sb.errors_silent) * 8);
+ c->sb.btrees_lost_data = le64_to_cpu(ext->btrees_lost_data);
+ }
for_each_member_device(c, ca) {
struct bch_member m = bch2_sb_member_get(src, ca->dev_idx);
return -ENOMEM;
sb->sb_name = kstrdup(path, GFP_KERNEL);
- if (!sb->sb_name)
- return -ENOMEM;
+ if (!sb->sb_name) {
+ ret = -ENOMEM;
+ prt_printf(&err, "error allocating memory for sb_name");
+ goto err;
+ }
#ifndef __KERNEL__
if (opt_get(*opts, direct_io) == false)
kfree(errors_silent);
}
+
+ prt_printf(out, "Btrees with missing data:");
+ prt_tab(out);
+ prt_bitflags(out, __bch2_btree_ids, le64_to_cpu(e->btrees_lost_data));
+ prt_newline(out);
}
static const struct bch_sb_field_ops bch_sb_field_ops_ext = {
#include "btree_gc.h"
#include "btree_journal_iter.h"
#include "btree_key_cache.h"
+#include "btree_node_scan.h"
#include "btree_update_interior.h"
#include "btree_io.h"
#include "btree_write_buffer.h"
if (test_bit(JOURNAL_REPLAY_DONE, &c->journal.flags) &&
!test_bit(BCH_FS_emergency_ro, &c->flags))
set_bit(BCH_FS_clean_shutdown, &c->flags);
+
bch2_fs_journal_stop(&c->journal);
+ bch_info(c, "%sshutdown complete, journal seq %llu",
+ test_bit(BCH_FS_clean_shutdown, &c->flags) ? "" : "un",
+ c->journal.seq_ondisk);
+
/*
* After stopping journal:
*/
!test_bit(BCH_FS_emergency_ro, &c->flags) &&
test_bit(BCH_FS_started, &c->flags) &&
test_bit(BCH_FS_clean_shutdown, &c->flags) &&
- !c->opts.norecovery) {
+ c->recovery_pass_done >= BCH_RECOVERY_PASS_journal_replay) {
BUG_ON(c->journal.last_empty_seq != journal_cur_seq(&c->journal));
BUG_ON(atomic_read(&c->btree_cache.dirty));
BUG_ON(atomic_long_read(&c->btree_key_cache.nr_dirty));
int bch2_fs_read_write(struct bch_fs *c)
{
- if (c->opts.norecovery)
+ if (c->opts.recovery_pass_last &&
+ c->opts.recovery_pass_last < BCH_RECOVERY_PASS_journal_replay)
return -BCH_ERR_erofs_norecovery;
if (c->opts.nochanges)
for (i = 0; i < BCH_TIME_STAT_NR; i++)
bch2_time_stats_exit(&c->times[i]);
+ bch2_find_btree_nodes_exit(&c->found_btree_nodes);
bch2_free_pending_node_rewrites(c);
+ bch2_fs_allocator_background_exit(c);
bch2_fs_sb_errors_exit(c);
bch2_fs_counters_exit(c);
bch2_fs_snapshots_exit(c);
bch2_io_clock_exit(&c->io_clock[READ]);
bch2_fs_compress_exit(c);
bch2_journal_keys_put_initial(c);
+ bch2_find_btree_nodes_exit(&c->found_btree_nodes);
BUG_ON(atomic_read(&c->journal_keys.ref));
bch2_fs_btree_write_buffer_exit(c);
percpu_free_rwsem(&c->mark_lock);
for_each_online_member(c, ca)
bch2_members_v2_get_mut(c->disk_sb.sb, ca->dev_idx)->last_mount = cpu_to_le64(now);
+ struct bch_sb_field_ext *ext =
+ bch2_sb_field_get_minsize(&c->disk_sb, ext, sizeof(*ext) / sizeof(u64));
mutex_unlock(&c->sb_lock);
+ if (!ext) {
+ bch_err(c, "insufficient space in superblock for sb_field_ext");
+ ret = -BCH_ERR_ENOSPC_sb;
+ goto err;
+ }
+
for_each_rw_member(c, ca)
bch2_dev_allocator_add(c, ca);
bch2_recalc_capacity(c);
u8 durability;
u8 freespace_initialized;
u8 valid;
+ u8 btree_bitmap_shift;
+ u64 btree_allocated_bitmap;
};
#endif /* _BCACHEFS_SUPER_TYPES_H */
#include "btree_iter.h"
#include "btree_key_cache.h"
#include "btree_update.h"
-#include "btree_update_interior.h"
#include "btree_gc.h"
#include "buckets.h"
#include "clock.h"
#include "ec.h"
#include "inode.h"
#include "journal.h"
+#include "journal_reclaim.h"
#include "keylist.h"
#include "move.h"
#include "movinggc.h"
write_attribute(trigger_gc);
write_attribute(trigger_discards);
write_attribute(trigger_invalidates);
+write_attribute(trigger_journal_flush);
write_attribute(prune_cache);
write_attribute(btree_wakeup);
rw_attribute(btree_gc_periodic);
read_attribute(btree_cache_size);
read_attribute(compression_stats);
read_attribute(journal_debug);
-read_attribute(btree_updates);
read_attribute(btree_cache);
read_attribute(btree_key_cache);
read_attribute(stripes_heap);
if (attr == &sysfs_journal_debug)
bch2_journal_debug_to_text(out, &c->journal);
- if (attr == &sysfs_btree_updates)
- bch2_btree_updates_to_text(out, c);
-
if (attr == &sysfs_btree_cache)
bch2_btree_cache_to_text(out, c);
/* Debugging: */
- if (!test_bit(BCH_FS_rw, &c->flags))
+ if (!bch2_write_ref_tryget(c, BCH_WRITE_REF_sysfs))
return -EROFS;
if (attr == &sysfs_prune_cache) {
if (attr == &sysfs_trigger_invalidates)
bch2_do_invalidates(c);
+ if (attr == &sysfs_trigger_journal_flush) {
+ bch2_journal_flush_all_pins(&c->journal);
+ bch2_journal_meta(&c->journal);
+ }
+
#ifdef CONFIG_BCACHEFS_TESTS
if (attr == &sysfs_perf_test) {
char *tmp = kstrdup(buf, GFP_KERNEL), *p = tmp;
size = ret;
}
#endif
+ bch2_write_ref_put(c, BCH_WRITE_REF_sysfs);
return size;
}
SYSFS_OPS(bch2_fs);
struct attribute *bch2_fs_internal_files[] = {
&sysfs_flags,
&sysfs_journal_debug,
- &sysfs_btree_updates,
&sysfs_btree_cache,
&sysfs_btree_key_cache,
&sysfs_new_stripes,
&sysfs_trigger_gc,
&sysfs_trigger_discards,
&sysfs_trigger_invalidates,
+ &sysfs_trigger_journal_flush,
&sysfs_prune_cache,
&sysfs_btree_wakeup,
bch2_trans_iter_init(trans, &iter, BTREE_ID_xattrs, pos,
BTREE_ITER_INTENT);
- k = bch2_btree_iter_peek(&iter);
+ k = bch2_btree_iter_peek_upto(&iter, POS(0, U64_MAX));
ret = bkey_err(k);
if (ret)
goto err;
return 0;
}
-int bch2_run_thread_with_stdio(struct thread_with_stdio *thr,
- const struct thread_with_stdio_ops *ops)
+void bch2_thread_with_stdio_init(struct thread_with_stdio *thr,
+ const struct thread_with_stdio_ops *ops)
{
stdio_buf_init(&thr->stdio.input);
stdio_buf_init(&thr->stdio.output);
thr->ops = ops;
+}
+int __bch2_run_thread_with_stdio(struct thread_with_stdio *thr)
+{
return bch2_run_thread_with_file(&thr->thr, &thread_with_stdio_fops, thread_with_stdio_fn);
}
+int bch2_run_thread_with_stdio(struct thread_with_stdio *thr,
+ const struct thread_with_stdio_ops *ops)
+{
+ bch2_thread_with_stdio_init(thr, ops);
+
+ return __bch2_run_thread_with_stdio(thr);
+}
+
int bch2_run_thread_with_stdout(struct thread_with_stdio *thr,
const struct thread_with_stdio_ops *ops)
{
const struct thread_with_stdio_ops *ops;
};
+void bch2_thread_with_stdio_init(struct thread_with_stdio *,
+ const struct thread_with_stdio_ops *);
+int __bch2_run_thread_with_stdio(struct thread_with_stdio *);
int bch2_run_thread_with_stdio(struct thread_with_stdio *,
const struct thread_with_stdio_ops *);
int bch2_run_thread_with_stdout(struct thread_with_stdio *,
}
}
-static int alignment_ok(const void *base, size_t align)
-{
- return IS_ENABLED(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS) ||
- ((unsigned long)base & (align - 1)) == 0;
-}
-
-static void u32_swap(void *a, void *b, size_t size)
-{
- u32 t = *(u32 *)a;
- *(u32 *)a = *(u32 *)b;
- *(u32 *)b = t;
-}
-
-static void u64_swap(void *a, void *b, size_t size)
-{
- u64 t = *(u64 *)a;
- *(u64 *)a = *(u64 *)b;
- *(u64 *)b = t;
-}
-
-static void generic_swap(void *a, void *b, size_t size)
-{
- char t;
-
- do {
- t = *(char *)a;
- *(char *)a++ = *(char *)b;
- *(char *)b++ = t;
- } while (--size > 0);
-}
-
-static inline int do_cmp(void *base, size_t n, size_t size,
- int (*cmp_func)(const void *, const void *, size_t),
- size_t l, size_t r)
-{
- return cmp_func(base + inorder_to_eytzinger0(l, n) * size,
- base + inorder_to_eytzinger0(r, n) * size,
- size);
-}
-
-static inline void do_swap(void *base, size_t n, size_t size,
- void (*swap_func)(void *, void *, size_t),
- size_t l, size_t r)
-{
- swap_func(base + inorder_to_eytzinger0(l, n) * size,
- base + inorder_to_eytzinger0(r, n) * size,
- size);
-}
-
-void eytzinger0_sort(void *base, size_t n, size_t size,
- int (*cmp_func)(const void *, const void *, size_t),
- void (*swap_func)(void *, void *, size_t))
-{
- int i, c, r;
-
- if (!swap_func) {
- if (size == 4 && alignment_ok(base, 4))
- swap_func = u32_swap;
- else if (size == 8 && alignment_ok(base, 8))
- swap_func = u64_swap;
- else
- swap_func = generic_swap;
- }
-
- /* heapify */
- for (i = n / 2 - 1; i >= 0; --i) {
- for (r = i; r * 2 + 1 < n; r = c) {
- c = r * 2 + 1;
-
- if (c + 1 < n &&
- do_cmp(base, n, size, cmp_func, c, c + 1) < 0)
- c++;
-
- if (do_cmp(base, n, size, cmp_func, r, c) >= 0)
- break;
-
- do_swap(base, n, size, swap_func, r, c);
- }
- }
-
- /* sort */
- for (i = n - 1; i > 0; --i) {
- do_swap(base, n, size, swap_func, 0, i);
-
- for (r = 0; r * 2 + 1 < i; r = c) {
- c = r * 2 + 1;
-
- if (c + 1 < i &&
- do_cmp(base, n, size, cmp_func, c, c + 1) < 0)
- c++;
-
- if (do_cmp(base, n, size, cmp_func, r, c) >= 0)
- break;
-
- do_swap(base, n, size, swap_func, r, c);
- }
- }
-}
-
-void sort_cmp_size(void *base, size_t num, size_t size,
- int (*cmp_func)(const void *, const void *, size_t),
- void (*swap_func)(void *, void *, size_t size))
-{
- /* pre-scale counters for performance */
- int i = (num/2 - 1) * size, n = num * size, c, r;
-
- if (!swap_func) {
- if (size == 4 && alignment_ok(base, 4))
- swap_func = u32_swap;
- else if (size == 8 && alignment_ok(base, 8))
- swap_func = u64_swap;
- else
- swap_func = generic_swap;
- }
-
- /* heapify */
- for ( ; i >= 0; i -= size) {
- for (r = i; r * 2 + size < n; r = c) {
- c = r * 2 + size;
- if (c < n - size &&
- cmp_func(base + c, base + c + size, size) < 0)
- c += size;
- if (cmp_func(base + r, base + c, size) >= 0)
- break;
- swap_func(base + r, base + c, size);
- }
- }
-
- /* sort */
- for (i = n - size; i > 0; i -= size) {
- swap_func(base, base + i, size);
- for (r = 0; r * 2 + size < i; r = c) {
- c = r * 2 + size;
- if (c < i - size &&
- cmp_func(base + c, base + c + size, size) < 0)
- c += size;
- if (cmp_func(base + r, base + c, size) >= 0)
- break;
- swap_func(base + r, base + c, size);
- }
- }
-}
-
#if 0
void eytzinger1_test(void)
{
memset(s + bytes, c, rem);
}
-void sort_cmp_size(void *base, size_t num, size_t size,
- int (*cmp_func)(const void *, const void *, size_t),
- void (*swap_func)(void *, void *, size_t));
-
/* just the memmove, doesn't update @_nr */
#define __array_insert_item(_array, _nr, _pos) \
memmove(&(_array)[(_pos) + 1], \
#endif
+static inline void mod_bit(long nr, volatile unsigned long *addr, bool v)
+{
+ if (v)
+ set_bit(nr, addr);
+ else
+ clear_bit(nr, addr);
+}
+
static inline void __set_bit_le64(size_t bit, __le64 *addr)
{
addr[bit / 64] |= cpu_to_le64(BIT_ULL(bit % 64));
}
+static inline void __clear_bit_le64(size_t bit, __le64 *addr)
+{
+ addr[bit / 64] &= ~cpu_to_le64(BIT_ULL(bit % 64));
+}
+
+static inline bool test_bit_le64(size_t bit, __le64 *addr)
+{
+ return (addr[bit / 64] & cpu_to_le64(BIT_ULL(bit % 64))) != 0;
+}
+
#endif /* _BCACHEFS_UTIL_H */
SET_UID(psinfo->pr_uid, from_kuid_munged(cred->user_ns, cred->uid));
SET_GID(psinfo->pr_gid, from_kgid_munged(cred->user_ns, cred->gid));
rcu_read_unlock();
- strncpy(psinfo->pr_fname, p->comm, sizeof(psinfo->pr_fname));
+ get_task_comm(psinfo->pr_fname, p);
return 0;
}
size_t alloc_bytes;
alloc_bytes = max_t(size_t, total_bytes, sizeof(*data));
- data = kvmalloc(alloc_bytes, GFP_KERNEL);
+ data = kvzalloc(alloc_bytes, GFP_KERNEL);
if (!data)
return ERR_PTR(-ENOMEM);
- if (total_bytes >= sizeof(*data)) {
+ if (total_bytes >= sizeof(*data))
data->bytes_left = total_bytes - sizeof(*data);
- data->bytes_missing = 0;
- } else {
+ else
data->bytes_missing = sizeof(*data) - total_bytes;
- data->bytes_left = 0;
- }
-
- data->elem_cnt = 0;
- data->elem_missed = 0;
return data;
}
* needing to allocate extents from the block group.
*/
used = btrfs_space_info_used(space_info, true);
- if (space_info->total_bytes - block_group->length < used) {
+ if (space_info->total_bytes - block_group->length < used &&
+ block_group->zone_unusable < block_group->length) {
/*
* Add a reference for the list, compensate for the ref
* drop under the "next" label for the
if (ret)
return ret;
+ ret = btrfs_record_root_in_trans(trans, node->root);
+ if (ret)
+ return ret;
ret = btrfs_update_delayed_inode(trans, node->root, path, node);
return ret;
}
if (root_id != BTRFS_TREE_LOG_OBJECTID) {
struct btrfs_ref generic_ref = { 0 };
+ /*
+ * Assert that the extent buffer is not cleared due to
+ * EXTENT_BUFFER_ZONED_ZEROOUT. Please refer
+ * btrfs_clear_buffer_dirty() and btree_csum_one_bio() for
+ * detail.
+ */
+ ASSERT(btrfs_header_bytenr(buf) != 0);
+
btrfs_init_generic_ref(&generic_ref, BTRFS_DROP_DELAYED_REF,
buf->start, buf->len, parent,
btrfs_header_owner(buf));
int btrfs_alloc_page_array(unsigned int nr_pages, struct page **page_array,
gfp_t extra_gfp)
{
+ const gfp_t gfp = GFP_NOFS | extra_gfp;
unsigned int allocated;
for (allocated = 0; allocated < nr_pages;) {
unsigned int last = allocated;
- allocated = alloc_pages_bulk_array(GFP_NOFS | extra_gfp,
- nr_pages, page_array);
-
- if (allocated == nr_pages)
- return 0;
-
- /*
- * During this iteration, no page could be allocated, even
- * though alloc_pages_bulk_array() falls back to alloc_page()
- * if it could not bulk-allocate. So we must be out of memory.
- */
- if (allocated == last) {
+ allocated = alloc_pages_bulk_array(gfp, nr_pages, page_array);
+ if (unlikely(allocated == last)) {
+ /* No progress, fail and do cleanup. */
for (int i = 0; i < allocated; i++) {
__free_page(page_array[i]);
page_array[i] = NULL;
}
return -ENOMEM;
}
-
- memalloc_retry_wait(GFP_NOFS);
}
return 0;
}
* The actual zeroout of the buffer will happen later in
* btree_csum_one_bio.
*/
- if (btrfs_is_zoned(fs_info)) {
+ if (btrfs_is_zoned(fs_info) && test_bit(EXTENT_BUFFER_DIRTY, &eb->bflags)) {
set_bit(EXTENT_BUFFER_ZONED_ZEROOUT, &eb->bflags);
return;
}
num_folios = num_extent_folios(eb);
WARN_ON(atomic_read(&eb->refs) == 0);
WARN_ON(!test_bit(EXTENT_BUFFER_TREE_REF, &eb->bflags));
+ WARN_ON(test_bit(EXTENT_BUFFER_ZONED_ZEROOUT, &eb->bflags));
if (!was_dirty) {
bool subpage = eb->fs_info->nodesize < PAGE_SIZE;
if (test_and_set_bit(EXTENT_BUFFER_READING, &eb->bflags))
goto done;
+ /*
+ * Between the initial test_bit(EXTENT_BUFFER_UPTODATE) and the above
+ * test_and_set_bit(EXTENT_BUFFER_READING), someone else could have
+ * started and finished reading the same eb. In this case, UPTODATE
+ * will now be set, and we shouldn't read it in again.
+ */
+ if (unlikely(test_bit(EXTENT_BUFFER_UPTODATE, &eb->bflags))) {
+ clear_bit(EXTENT_BUFFER_READING, &eb->bflags);
+ smp_mb__after_atomic();
+ wake_up_bit(&eb->bflags, EXTENT_BUFFER_READING);
+ return 0;
+ }
+
clear_bit(EXTENT_BUFFER_READ_ERR, &eb->bflags);
eb->read_mirror = 0;
check_buffer_tree_ref(eb);
btrfs_warn(fs_info,
"no extent map found for inode %llu (root %lld) when unpinning extent range [%llu, %llu), generation %llu",
btrfs_ino(inode), btrfs_root_id(inode->root),
- start, len, gen);
+ start, start + len, gen);
ret = -ENOENT;
goto out;
}
btrfs_warn(fs_info,
"found extent map for inode %llu (root %lld) with unexpected start offset %llu when unpinning extent range [%llu, %llu), generation %llu",
btrfs_ino(inode), btrfs_root_id(inode->root),
- em->start, start, len, gen);
+ em->start, start, start + len, gen);
ret = -EUCLEAN;
goto out;
}
em->mod_len = em->len;
}
- free_extent_map(em);
out:
write_unlock(&tree->lock);
+ free_extent_map(em);
return ret;
}
*/
ret = merge_extent_mapping(em_tree, existing,
em, start);
- if (ret) {
+ if (WARN_ON(ret)) {
free_extent_map(em);
*em_in = NULL;
- WARN_ONCE(ret,
-"extent map merge error existing [%llu, %llu) with em [%llu, %llu) start %llu\n",
- existing->start, existing->len,
- orig_start, orig_len, start);
+ btrfs_warn(fs_info,
+"extent map merge error existing [%llu, %llu) with em [%llu, %llu) start %llu",
+ existing->start, extent_map_end(existing),
+ orig_start, orig_start + orig_len, start);
}
free_extent_map(existing);
}
split->block_len = em->block_len;
split->orig_start = em->orig_start;
} else {
- const u64 diff = start + len - em->start;
+ const u64 diff = end - em->start;
split->block_len = split->len;
split->block_start += diff;
0, *alloc_hint, &ins, 1, 1);
if (ret) {
/*
- * Here we used to try again by going back to non-compressed
- * path for ENOSPC. But we can't reserve space even for
- * compressed size, how could it work for uncompressed size
- * which requires larger size? So here we directly go error
- * path.
+ * We can't reserve contiguous space for the compressed size.
+ * Unlikely, but it's possible that we could have enough
+ * non-contiguous space for the uncompressed size instead. So
+ * fall back to uncompressed.
*/
- goto out_free;
+ submit_uncompressed_range(inode, async_extent, locked_page);
+ goto done;
}
/* Here we're doing allocation and writeback of the compressed pages */
out_free_reserve:
btrfs_dec_block_group_reservations(fs_info, ins.objectid);
btrfs_free_reserved_extent(fs_info, ins.objectid, ins.offset, 1);
-out_free:
mapping_set_error(inode->vfs_inode.i_mapping, -EIO);
extent_clear_unlock_delalloc(inode, start, end,
NULL, EXTENT_LOCKED | EXTENT_DELALLOC |
*/
if (bits & EXTENT_CLEAR_META_RESV &&
root != fs_info->tree_root)
- btrfs_delalloc_release_metadata(inode, len, false);
+ btrfs_delalloc_release_metadata(inode, len, true);
/* For sanity tests. */
if (btrfs_is_testing(fs_info))
struct btrfs_trans_handle *trans;
struct btrfs_block_rsv block_rsv;
u64 root_flags;
+ u64 qgroup_reserved = 0;
int ret;
down_write(&fs_info->subvol_sem);
ret = btrfs_subvolume_reserve_metadata(root, &block_rsv, 5, true);
if (ret)
goto out_undead;
+ qgroup_reserved = block_rsv.qgroup_rsv_reserved;
trans = btrfs_start_transaction(root, 0);
if (IS_ERR(trans)) {
ret = PTR_ERR(trans);
goto out_release;
}
+ ret = btrfs_record_root_in_trans(trans, root);
+ if (ret) {
+ btrfs_abort_transaction(trans, ret);
+ goto out_end_trans;
+ }
+ btrfs_qgroup_convert_reserved_meta(root, qgroup_reserved);
+ qgroup_reserved = 0;
trans->block_rsv = &block_rsv;
trans->bytes_reserved = block_rsv.size;
ret = btrfs_end_transaction(trans);
inode->i_flags |= S_DEAD;
out_release:
- btrfs_subvolume_release_metadata(root, &block_rsv);
+ btrfs_block_rsv_release(fs_info, &block_rsv, (u64)-1, NULL);
+ if (qgroup_reserved)
+ btrfs_qgroup_free_meta_prealloc(root, qgroup_reserved);
out_undead:
if (ret) {
spin_lock(&dest->root_item_lock);
int ret;
dev_t anon_dev;
u64 objectid;
+ u64 qgroup_reserved = 0;
root_item = kzalloc(sizeof(*root_item), GFP_KERNEL);
if (!root_item)
trans_num_items, false);
if (ret)
goto out_new_inode_args;
+ qgroup_reserved = block_rsv.qgroup_rsv_reserved;
trans = btrfs_start_transaction(root, 0);
if (IS_ERR(trans)) {
ret = PTR_ERR(trans);
- btrfs_subvolume_release_metadata(root, &block_rsv);
- goto out_new_inode_args;
+ goto out_release_rsv;
}
+ ret = btrfs_record_root_in_trans(trans, BTRFS_I(dir)->root);
+ if (ret)
+ goto out;
+ btrfs_qgroup_convert_reserved_meta(root, qgroup_reserved);
+ qgroup_reserved = 0;
trans->block_rsv = &block_rsv;
trans->bytes_reserved = block_rsv.size;
/* Tree log can't currently deal with an inode which is a new root. */
out:
trans->block_rsv = NULL;
trans->bytes_reserved = 0;
- btrfs_subvolume_release_metadata(root, &block_rsv);
-
btrfs_end_transaction(trans);
+out_release_rsv:
+ btrfs_block_rsv_release(fs_info, &block_rsv, (u64)-1, NULL);
+ if (qgroup_reserved)
+ btrfs_qgroup_free_meta_prealloc(root, qgroup_reserved);
out_new_inode_args:
btrfs_new_inode_args_destroy(&new_inode_args);
out_inode:
struct btrfs_pending_snapshot *pending_snapshot;
unsigned int trans_num_items;
struct btrfs_trans_handle *trans;
+ struct btrfs_block_rsv *block_rsv;
+ u64 qgroup_reserved = 0;
int ret;
/* We do not support snapshotting right now. */
goto free_pending;
}
- btrfs_init_block_rsv(&pending_snapshot->block_rsv,
- BTRFS_BLOCK_RSV_TEMP);
+ block_rsv = &pending_snapshot->block_rsv;
+ btrfs_init_block_rsv(block_rsv, BTRFS_BLOCK_RSV_TEMP);
/*
* 1 to add dir item
* 1 to add dir index
* 1 to update parent inode item
*/
trans_num_items = create_subvol_num_items(inherit) + 3;
- ret = btrfs_subvolume_reserve_metadata(BTRFS_I(dir)->root,
- &pending_snapshot->block_rsv,
+ ret = btrfs_subvolume_reserve_metadata(BTRFS_I(dir)->root, block_rsv,
trans_num_items, false);
if (ret)
goto free_pending;
+ qgroup_reserved = block_rsv->qgroup_rsv_reserved;
pending_snapshot->dentry = dentry;
pending_snapshot->root = root;
ret = PTR_ERR(trans);
goto fail;
}
+ ret = btrfs_record_root_in_trans(trans, BTRFS_I(dir)->root);
+ if (ret) {
+ btrfs_end_transaction(trans);
+ goto fail;
+ }
+ btrfs_qgroup_convert_reserved_meta(root, qgroup_reserved);
+ qgroup_reserved = 0;
trans->pending_snapshot = pending_snapshot;
if (ret && pending_snapshot->snap)
pending_snapshot->snap->anon_dev = 0;
btrfs_put_root(pending_snapshot->snap);
- btrfs_subvolume_release_metadata(root, &pending_snapshot->block_rsv);
+ btrfs_block_rsv_release(fs_info, block_rsv, (u64)-1, NULL);
+ if (qgroup_reserved)
+ btrfs_qgroup_free_meta_prealloc(root, qgroup_reserved);
free_pending:
if (pending_snapshot->anon_dev)
free_anon_bdev(pending_snapshot->anon_dev);
#ifdef CONFIG_PRINTK
-#define STATE_STRING_PREFACE ": state "
+#define STATE_STRING_PREFACE " state "
#define STATE_STRING_BUF_LEN (sizeof(STATE_STRING_PREFACE) + BTRFS_FS_STATE_COUNT + 1)
/*
BTRFS_QGROUP_RSV_META_PREALLOC);
trace_qgroup_meta_convert(root, num_bytes);
qgroup_convert_meta(fs_info, root->root_key.objectid, num_bytes);
+ if (!sb_rdonly(fs_info->sb))
+ add_root_meta_rsv(root, num_bytes, BTRFS_QGROUP_RSV_META_PERTRANS);
}
/*
}
return ret;
}
-
-void btrfs_subvolume_release_metadata(struct btrfs_root *root,
- struct btrfs_block_rsv *rsv)
-{
- struct btrfs_fs_info *fs_info = root->fs_info;
- u64 qgroup_to_release;
-
- btrfs_block_rsv_release(fs_info, rsv, (u64)-1, &qgroup_to_release);
- btrfs_qgroup_convert_reserved_meta(root, qgroup_to_release);
-}
int btrfs_subvolume_reserve_metadata(struct btrfs_root *root,
struct btrfs_block_rsv *rsv,
int nitems, bool use_global_rsv);
-void btrfs_subvolume_release_metadata(struct btrfs_root *root,
- struct btrfs_block_rsv *rsv);
int btrfs_add_root_ref(struct btrfs_trans_handle *trans, u64 root_id,
u64 ref_id, u64 dirid, u64 sequence,
const struct fscrypt_str *name);
struct btrfs_fs_info *fs_info = sctx->fs_info;
int num_copies = btrfs_num_copies(fs_info, stripe->bg->start,
stripe->bg->length);
+ unsigned long repaired;
int mirror;
int i;
* Submit the repaired sectors. For zoned case, we cannot do repair
* in-place, but queue the bg to be relocated.
*/
- if (btrfs_is_zoned(fs_info)) {
- if (!bitmap_empty(&stripe->error_bitmap, stripe->nr_sectors))
+ bitmap_andnot(&repaired, &stripe->init_error_bitmap, &stripe->error_bitmap,
+ stripe->nr_sectors);
+ if (!sctx->readonly && !bitmap_empty(&repaired, stripe->nr_sectors)) {
+ if (btrfs_is_zoned(fs_info)) {
btrfs_repair_one_zone(fs_info, sctx->stripes[0].bg->start);
- } else if (!sctx->readonly) {
- unsigned long repaired;
-
- bitmap_andnot(&repaired, &stripe->init_error_bitmap,
- &stripe->error_bitmap, stripe->nr_sectors);
- scrub_write_sectors(sctx, stripe, repaired, false);
- wait_scrub_stripe_io(stripe);
+ } else {
+ scrub_write_sectors(sctx, stripe, repaired, false);
+ wait_scrub_stripe_io(stripe);
+ }
}
scrub_stripe_report_errors(sctx, stripe);
gen = btrfs_get_last_trans_committed(fs_info);
for (i = 0; i < BTRFS_SUPER_MIRROR_MAX; i++) {
- bytenr = btrfs_sb_offset(i);
+ ret = btrfs_sb_log_location(scrub_dev, i, 0, &bytenr);
+ if (ret == -ENOENT)
+ break;
+
+ if (ret) {
+ spin_lock(&sctx->stat_lock);
+ sctx->stat.super_errors++;
+ spin_unlock(&sctx->stat_lock);
+ continue;
+ }
+
if (bytenr + BTRFS_SUPER_INFO_SIZE >
scrub_dev->commit_total_bytes)
break;
goto out;
}
+ if (em->block_start != SZ_32K + SZ_4K) {
+ test_err("em->block_start is %llu, expected 36K", em->block_start);
+ goto out;
+ }
+
free_extent_map(em);
read_lock(&em_tree->lock);
h->reloc_reserved = reloc_reserved;
}
- /*
- * Now that we have found a transaction to be a part of, convert the
- * qgroup reservation from prealloc to pertrans. A different transaction
- * can't race in and free our pertrans out from under us.
- */
- if (qgroup_reserved)
- btrfs_qgroup_convert_reserved_meta(root, qgroup_reserved);
-
got_it:
if (!current->journal_info)
current->journal_info = h;
* not just freed.
*/
btrfs_end_transaction(h);
- return ERR_PTR(ret);
+ goto reserve_fail;
}
+ /*
+ * Now that we have found a transaction to be a part of, convert the
+ * qgroup reservation from prealloc to pertrans. A different transaction
+ * can't race in and free our pertrans out from under us.
+ */
+ if (qgroup_reserved)
+ btrfs_qgroup_convert_reserved_meta(root, qgroup_reserved);
return h;
radix_tree_tag_clear(&fs_info->fs_roots_radix,
(unsigned long)root->root_key.objectid,
BTRFS_ROOT_TRANS_TAG);
+ btrfs_qgroup_free_meta_all_pertrans(root);
spin_unlock(&fs_info->fs_roots_radix_lock);
btrfs_free_log(trans, root);
if (ret2)
return ret2;
spin_lock(&fs_info->fs_roots_radix_lock);
- btrfs_qgroup_free_meta_all_pertrans(root);
}
}
spin_unlock(&fs_info->fs_roots_radix_lock);
device->bdev = file_bdev(bdev_file);
clear_bit(BTRFS_DEV_STATE_IN_FS_METADATA, &device->dev_state);
+ if (device->devt != device->bdev->bd_dev) {
+ btrfs_warn(NULL,
+ "device %s maj:min changed from %d:%d to %d:%d",
+ device->name->str, MAJOR(device->devt),
+ MINOR(device->devt), MAJOR(device->bdev->bd_dev),
+ MINOR(device->bdev->bd_dev));
+
+ device->devt = device->bdev->bd_dev;
+ }
+
fs_devices->open_devices++;
if (test_bit(BTRFS_DEV_STATE_WRITEABLE, &device->dev_state) &&
device->devid != BTRFS_DEV_REPLACE_DEVID) {
struct btrfs_device *device;
struct btrfs_device *latest_dev = NULL;
struct btrfs_device *tmp_device;
+ int ret = 0;
list_for_each_entry_safe(device, tmp_device, &fs_devices->devices,
dev_list) {
- int ret;
+ int ret2;
- ret = btrfs_open_one_device(fs_devices, device, flags, holder);
- if (ret == 0 &&
+ ret2 = btrfs_open_one_device(fs_devices, device, flags, holder);
+ if (ret2 == 0 &&
(!latest_dev || device->generation > latest_dev->generation)) {
latest_dev = device;
- } else if (ret == -ENODATA) {
+ } else if (ret2 == -ENODATA) {
fs_devices->num_devices--;
list_del(&device->dev_list);
btrfs_free_device(device);
}
+ if (ret == 0 && ret2 != 0)
+ ret = ret2;
}
- if (fs_devices->open_devices == 0)
+
+ if (fs_devices->open_devices == 0) {
+ if (ret)
+ return ret;
return -EINVAL;
+ }
fs_devices->opened = 1;
fs_devices->latest_dev = latest_dev;
if (!map)
return -EINVAL;
- cache->physical_map = btrfs_clone_chunk_map(map, GFP_NOFS);
- if (!cache->physical_map) {
- ret = -ENOMEM;
- goto out;
- }
+ cache->physical_map = map;
zone_info = kcalloc(map->num_stripes, sizeof(*zone_info), GFP_NOFS);
if (!zone_info) {
}
bitmap_free(active);
kfree(zone_info);
- btrfs_free_chunk_map(map);
return ret;
}
struct btrfs_chunk_map *map;
const bool is_metadata = (block_group->flags &
(BTRFS_BLOCK_GROUP_METADATA | BTRFS_BLOCK_GROUP_SYSTEM));
+ struct btrfs_dev_replace *dev_replace = &fs_info->dev_replace;
int ret = 0;
int i;
btrfs_clear_data_reloc_bg(block_group);
spin_unlock(&block_group->lock);
+ down_read(&dev_replace->rwsem);
map = block_group->physical_map;
for (i = 0; i < map->num_stripes; i++) {
struct btrfs_device *device = map->stripes[i].dev;
zinfo->zone_size >> SECTOR_SHIFT);
memalloc_nofs_restore(nofs_flags);
- if (ret)
+ if (ret) {
+ up_read(&dev_replace->rwsem);
return ret;
+ }
if (!(block_group->flags & BTRFS_BLOCK_GROUP_DATA))
zinfo->reserved_active_zones++;
btrfs_dev_clear_active_zone(device, physical);
}
+ up_read(&dev_replace->rwsem);
if (!fully_written)
btrfs_dec_block_group_ro(block_group);
ihold(inode);
if (wbc->sync_mode == WB_SYNC_NONE &&
- ceph_inode_to_fs_client(inode)->write_congested)
+ ceph_inode_to_fs_client(inode)->write_congested) {
+ redirty_page_for_writepage(wbc, page);
return AOP_WRITEPAGE_ACTIVATE;
+ }
wait_on_page_fscache(page);
doutc(mdsc->fsc->client, "%p %llx.%llx\n", inode,
ceph_vinop(inode));
- spin_lock(&mdsc->cap_unlink_delay_lock);
+ spin_lock(&mdsc->cap_delay_lock);
ci->i_ceph_flags |= CEPH_I_FLUSH;
if (!list_empty(&ci->i_cap_delay_list))
list_del_init(&ci->i_cap_delay_list);
list_add_tail(&ci->i_cap_delay_list,
&mdsc->cap_unlink_delay_list);
- spin_unlock(&mdsc->cap_unlink_delay_lock);
+ spin_unlock(&mdsc->cap_delay_lock);
/*
* Fire the work immediately, because the MDS maybe
struct ceph_client *cl = mdsc->fsc->client;
doutc(cl, "begin\n");
- spin_lock(&mdsc->cap_unlink_delay_lock);
+ spin_lock(&mdsc->cap_delay_lock);
while (!list_empty(&mdsc->cap_unlink_delay_list)) {
struct ceph_inode_info *ci;
struct inode *inode;
inode = igrab(&ci->netfs.inode);
if (inode) {
- spin_unlock(&mdsc->cap_unlink_delay_lock);
+ spin_unlock(&mdsc->cap_delay_lock);
doutc(cl, "on %p %llx.%llx\n", inode,
ceph_vinop(inode));
ceph_check_caps(ci, CHECK_CAPS_FLUSH);
iput(inode);
- spin_lock(&mdsc->cap_unlink_delay_lock);
+ spin_lock(&mdsc->cap_delay_lock);
}
}
- spin_unlock(&mdsc->cap_unlink_delay_lock);
+ spin_unlock(&mdsc->cap_delay_lock);
doutc(cl, "done\n");
}
INIT_LIST_HEAD(&mdsc->cap_wait_list);
spin_lock_init(&mdsc->cap_delay_lock);
INIT_LIST_HEAD(&mdsc->cap_unlink_delay_list);
- spin_lock_init(&mdsc->cap_unlink_delay_lock);
INIT_LIST_HEAD(&mdsc->snap_flush_list);
spin_lock_init(&mdsc->snap_flush_lock);
mdsc->last_cap_flush_tid = 1;
struct delayed_work delayed_work; /* delayed work */
unsigned long last_renew_caps; /* last time we renewed our caps */
struct list_head cap_delay_list; /* caps with delayed release */
- spinlock_t cap_delay_lock; /* protects cap_delay_list */
struct list_head cap_unlink_delay_list; /* caps with delayed release for unlink */
- spinlock_t cap_unlink_delay_lock; /* protects cap_unlink_delay_list */
+ spinlock_t cap_delay_lock; /* protects cap_delay_list and cap_unlink_delay_list */
struct list_head snap_flush_list; /* cap_snaps ready to flush */
spinlock_t snap_flush_lock;
sb->s_mtd = NULL;
} else if (IS_ENABLED(CONFIG_CRAMFS_BLOCKDEV) && sb->s_bdev) {
sync_blockdev(sb->s_bdev);
- fput(sb->s_bdev_file);
+ bdev_fput(sb->s_bdev_file);
}
kfree(sbi);
}
switch (mode) {
case EROFS_MOUNT_DAX_ALWAYS:
- warnfc(fc, "DAX enabled. Warning: EXPERIMENTAL, use at your own risk");
set_opt(&ctx->opt, DAX_ALWAYS);
clear_opt(&ctx->opt, DAX_NEVER);
return true;
goto out;
}
+ bprm->exec += *sp_location - MAX_ARG_PAGES * PAGE_SIZE;
*sp_location = sp;
out:
brelse(sbi->s_sbh);
if (sbi->s_journal_bdev_file) {
invalidate_bdev(file_bdev(sbi->s_journal_bdev_file));
- fput(sbi->s_journal_bdev_file);
+ bdev_fput(sbi->s_journal_bdev_file);
}
out_fail:
invalidate_bdev(sb->s_bdev);
out_bh:
brelse(bh);
out_bdev:
- fput(bdev_file);
+ bdev_fput(bdev_file);
return ERR_PTR(errno);
}
out_journal:
jbd2_journal_destroy(journal);
out_bdev:
- fput(bdev_file);
+ bdev_fput(bdev_file);
return ERR_PTR(errno);
}
kill_block_super(sb);
if (bdev_file)
- fput(bdev_file);
+ bdev_fput(bdev_file);
}
static struct file_system_type ext4_fs_type = {
for (i = 0; i < sbi->s_ndevs; i++) {
if (i > 0)
- fput(FDEV(i).bdev_file);
+ bdev_fput(FDEV(i).bdev_file);
#ifdef CONFIG_BLK_DEV_ZONED
kvfree(FDEV(i).blkz_seq);
#endif
/**
* cuse_process_init_reply - finish initializing CUSE channel
*
+ * @fm: The fuse mount information containing the CUSE connection.
+ * @args: The arguments passed to the init reply.
+ * @error: The error code signifying if any error occurred during the process.
+ *
* This function creates the character device and sets up all the
* required data structures for it. Please read the comment at the
* top of this file for high level overview.
err = fuse_do_statx(inode, file, stat);
if (err == -ENOSYS) {
fc->no_statx = 1;
+ err = 0;
goto retry;
}
} else {
bool *exclusive)
{
struct inode *inode = file_inode(iocb->ki_filp);
- struct fuse_file *ff = iocb->ki_filp->private_data;
+ struct fuse_inode *fi = get_fuse_inode(inode);
*exclusive = fuse_dio_wr_exclusive_lock(iocb, from);
if (*exclusive) {
* have raced, so check it again.
*/
if (fuse_io_past_eof(iocb, from) ||
- fuse_file_uncached_io_start(inode, ff, NULL) != 0) {
+ fuse_inode_uncached_io_start(fi, NULL) != 0) {
inode_unlock_shared(inode);
inode_lock(inode);
*exclusive = true;
static void fuse_dio_unlock(struct kiocb *iocb, bool exclusive)
{
struct inode *inode = file_inode(iocb->ki_filp);
- struct fuse_file *ff = iocb->ki_filp->private_data;
+ struct fuse_inode *fi = get_fuse_inode(inode);
if (exclusive) {
inode_unlock(inode);
} else {
/* Allow opens in caching mode after last parallel dio end */
- fuse_file_uncached_io_end(inode, ff);
+ fuse_inode_uncached_io_end(fi);
inode_unlock_shared(inode);
}
}
* First mmap of direct_io file enters caching inode io mode.
* Also waits for parallel dio writers to go into serial mode
* (exclusive instead of shared lock).
+ * After first mmap, the inode stays in caching io mode until
+ * the direct_io file release.
*/
- rc = fuse_file_cached_io_start(inode, ff);
+ rc = fuse_file_cached_io_open(inode, ff);
if (rc)
return rc;
}
struct dentry *dentry, struct fileattr *fa);
/* iomode.c */
-int fuse_file_cached_io_start(struct inode *inode, struct fuse_file *ff);
-int fuse_file_uncached_io_start(struct inode *inode, struct fuse_file *ff, struct fuse_backing *fb);
-void fuse_file_uncached_io_end(struct inode *inode, struct fuse_file *ff);
+int fuse_file_cached_io_open(struct inode *inode, struct fuse_file *ff);
+int fuse_inode_uncached_io_start(struct fuse_inode *fi,
+ struct fuse_backing *fb);
+void fuse_inode_uncached_io_end(struct fuse_inode *fi);
int fuse_file_io_open(struct file *file, struct inode *inode);
void fuse_file_io_release(struct fuse_file *ff, struct inode *inode);
}
}
if (S_ISREG(inode->i_mode) && !fuse_is_bad(inode)) {
+ WARN_ON(fi->iocachectr != 0);
WARN_ON(!list_empty(&fi->write_files));
WARN_ON(!list_empty(&fi->queued_writes));
}
}
/*
- * Start cached io mode.
+ * Called on cached file open() and on first mmap() of direct_io file.
+ * Takes cached_io inode mode reference to be dropped on file release.
*
* Blocks new parallel dio writes and waits for the in-progress parallel dio
* writes to complete.
*/
-int fuse_file_cached_io_start(struct inode *inode, struct fuse_file *ff)
+int fuse_file_cached_io_open(struct inode *inode, struct fuse_file *ff)
{
struct fuse_inode *fi = get_fuse_inode(inode);
return 0;
}
-static void fuse_file_cached_io_end(struct inode *inode, struct fuse_file *ff)
+static void fuse_file_cached_io_release(struct fuse_file *ff,
+ struct fuse_inode *fi)
{
- struct fuse_inode *fi = get_fuse_inode(inode);
-
spin_lock(&fi->lock);
WARN_ON(fi->iocachectr <= 0);
WARN_ON(ff->iomode != IOM_CACHED);
}
/* Start strictly uncached io mode where cache access is not allowed */
-int fuse_file_uncached_io_start(struct inode *inode, struct fuse_file *ff, struct fuse_backing *fb)
+int fuse_inode_uncached_io_start(struct fuse_inode *fi, struct fuse_backing *fb)
{
- struct fuse_inode *fi = get_fuse_inode(inode);
struct fuse_backing *oldfb;
int err = 0;
spin_lock(&fi->lock);
/* deny conflicting backing files on same fuse inode */
oldfb = fuse_inode_backing(fi);
- if (oldfb && oldfb != fb) {
+ if (fb && oldfb && oldfb != fb) {
err = -EBUSY;
goto unlock;
}
err = -ETXTBSY;
goto unlock;
}
- WARN_ON(ff->iomode != IOM_NONE);
fi->iocachectr--;
- ff->iomode = IOM_UNCACHED;
/* fuse inode holds a single refcount of backing file */
- if (!oldfb) {
+ if (fb && !oldfb) {
oldfb = fuse_inode_backing_set(fi, fb);
WARN_ON_ONCE(oldfb != NULL);
} else {
return err;
}
-void fuse_file_uncached_io_end(struct inode *inode, struct fuse_file *ff)
+/* Takes uncached_io inode mode reference to be dropped on file release */
+static int fuse_file_uncached_io_open(struct inode *inode,
+ struct fuse_file *ff,
+ struct fuse_backing *fb)
{
struct fuse_inode *fi = get_fuse_inode(inode);
+ int err;
+
+ err = fuse_inode_uncached_io_start(fi, fb);
+ if (err)
+ return err;
+
+ WARN_ON(ff->iomode != IOM_NONE);
+ ff->iomode = IOM_UNCACHED;
+ return 0;
+}
+
+void fuse_inode_uncached_io_end(struct fuse_inode *fi)
+{
struct fuse_backing *oldfb = NULL;
spin_lock(&fi->lock);
WARN_ON(fi->iocachectr >= 0);
- WARN_ON(ff->iomode != IOM_UNCACHED);
- ff->iomode = IOM_NONE;
fi->iocachectr++;
if (!fi->iocachectr) {
wake_up(&fi->direct_io_waitq);
fuse_backing_put(oldfb);
}
+/* Drop uncached_io reference from passthrough open */
+static void fuse_file_uncached_io_release(struct fuse_file *ff,
+ struct fuse_inode *fi)
+{
+ WARN_ON(ff->iomode != IOM_UNCACHED);
+ ff->iomode = IOM_NONE;
+ fuse_inode_uncached_io_end(fi);
+}
+
/*
* Open flags that are allowed in combination with FOPEN_PASSTHROUGH.
* A combination of FOPEN_PASSTHROUGH and FOPEN_DIRECT_IO means that read/write
return PTR_ERR(fb);
/* First passthrough file open denies caching inode io mode */
- err = fuse_file_uncached_io_start(inode, ff, fb);
+ err = fuse_file_uncached_io_open(inode, ff, fb);
if (!err)
return 0;
if (ff->open_flags & FOPEN_PASSTHROUGH)
err = fuse_file_passthrough_open(inode, file);
else
- err = fuse_file_cached_io_start(inode, ff);
+ err = fuse_file_cached_io_open(inode, ff);
if (err)
goto fail;
/* No more pending io and no new io possible to inode via open/mmapped file */
void fuse_file_io_release(struct fuse_file *ff, struct inode *inode)
{
+ struct fuse_inode *fi = get_fuse_inode(inode);
+
/*
- * Last parallel dio close allows caching inode io mode.
+ * Last passthrough file close allows caching inode io mode.
* Last caching file close exits caching inode io mode.
*/
switch (ff->iomode) {
/* Nothing to do */
break;
case IOM_UNCACHED:
- fuse_file_uncached_io_end(inode, ff);
+ fuse_file_uncached_io_release(ff, fi);
break;
case IOM_CACHED:
- fuse_file_cached_io_end(inode, ff);
+ fuse_file_cached_io_release(ff, fi);
break;
}
}
struct buffer_head *dibh, *bh;
struct gfs2_holder rd_gh;
unsigned int bsize_shift = sdp->sd_sb.sb_bsize_shift;
- u64 lblock = (offset + (1 << bsize_shift) - 1) >> bsize_shift;
+ unsigned int bsize = 1 << bsize_shift;
+ u64 lblock = (offset + bsize - 1) >> bsize_shift;
__u16 start_list[GFS2_MAX_META_HEIGHT];
__u16 __end_list[GFS2_MAX_META_HEIGHT], *end_list = NULL;
unsigned int start_aligned, end_aligned;
u64 prev_bnr = 0;
__be64 *start, *end;
- if (offset >= maxsize) {
+ if (offset + bsize - 1 >= maxsize) {
/*
* The starting point lies beyond the allocated metadata;
* there are no blocks to deallocate.
struct fsuuid2 u = { .len = sb->s_uuid_len, };
if (!sb->s_uuid_len)
- return -ENOIOCTLCMD;
+ return -ENOTTY;
memcpy(&u.uuid[0], &sb->s_uuid, sb->s_uuid_len);
struct super_block *sb = file_inode(file)->i_sb;
if (!strlen(sb->s_sysfs_name))
- return -ENOIOCTLCMD;
+ return -ENOTTY;
struct fs_sysfs_path u = {};
lbmLogShutdown(log);
close: /* close external log device */
- fput(bdev_file);
+ bdev_fput(bdev_file);
free: /* free log descriptor */
mutex_unlock(&jfs_log_mutex);
bdev_file = log->bdev_file;
rc = lmLogShutdown(log);
- fput(bdev_file);
+ bdev_fput(bdev_file);
kfree(log);
* each file a separate locking class. Let's differentiate on
* whether the file has mmap or not for now.
*
- * Both paths of the branch look the same. They're supposed to
+ * For similar reasons, writable and readonly files are given different
+ * lockdep key, because the writable file /sys/power/resume may call vfs
+ * lookup helpers for arbitrary paths and readonly files can be read by
+ * overlayfs from vfs helpers when sysfs is a lower layer of overalyfs.
+ *
+ * All three cases look the same. They're supposed to
* look that way and give @of->mutex different static lockdep keys.
*/
if (has_mmap)
mutex_init(&of->mutex);
+ else if (file->f_mode & FMODE_WRITE)
+ mutex_init(&of->mutex);
else
mutex_init(&of->mutex);
case 0: case S_IFREG:
error = vfs_create(idmap, path.dentry->d_inode,
dentry, mode, true);
+ if (!error)
+ security_path_post_mknod(idmap, dentry);
break;
case S_IFCHR: case S_IFBLK:
error = vfs_mknod(idmap, path.dentry->d_inode,
dentry, mode, 0);
break;
}
-
- if (error)
- goto out2;
-
- security_path_post_mknod(idmap, dentry);
out2:
done_path_create(&path, dentry);
if (retry_estale(error, lookup_flags)) {
enum netfs_how_to_modify howto;
enum netfs_folio_trace trace;
unsigned int bdp_flags = (iocb->ki_flags & IOCB_SYNC) ? 0: BDP_ASYNC;
- ssize_t written = 0, ret;
+ ssize_t written = 0, ret, ret2;
loff_t i_size, pos = iocb->ki_pos, from, to;
size_t max_chunk = PAGE_SIZE << MAX_PAGECACHE_ORDER;
bool maybe_trouble = false;
if (unlikely(test_bit(NETFS_ICTX_WRITETHROUGH, &ctx->flags) ||
iocb->ki_flags & (IOCB_DSYNC | IOCB_SYNC))
) {
- if (pos < i_size_read(inode)) {
- ret = filemap_write_and_wait_range(mapping, pos, pos + iter->count);
- if (ret < 0) {
- goto out;
- }
- }
-
wbc_attach_fdatawrite_inode(&wbc, mapping->host);
+ ret = filemap_write_and_wait_range(mapping, pos, pos + iter->count);
+ if (ret < 0) {
+ wbc_detach_inode(&wbc);
+ goto out;
+ }
+
wreq = netfs_begin_writethrough(iocb, iter->count);
if (IS_ERR(wreq)) {
wbc_detach_inode(&wbc);
out:
if (unlikely(wreq)) {
- ret = netfs_end_writethrough(wreq, iocb);
+ ret2 = netfs_end_writethrough(wreq, iocb);
wbc_detach_inode(&wbc);
- if (ret == -EIOCBQUEUED)
- return ret;
+ if (ret2 == -EIOCBQUEUED)
+ return ret2;
+ if (ret == 0)
+ ret = ret2;
}
iocb->ki_pos += written;
static bool nfsd4_queue_cb(struct nfsd4_callback *cb)
{
trace_nfsd_cb_queue(cb->cb_clp, cb);
- return queue_delayed_work(callback_wq, &cb->cb_work, 0);
-}
-
-static void nfsd4_queue_cb_delayed(struct nfsd4_callback *cb,
- unsigned long msecs)
-{
- trace_nfsd_cb_queue(cb->cb_clp, cb);
- queue_delayed_work(callback_wq, &cb->cb_work,
- msecs_to_jiffies(msecs));
+ return queue_work(callback_wq, &cb->cb_work);
}
static void nfsd41_cb_inflight_begin(struct nfs4_client *clp)
nfsd4_run_cb_work(struct work_struct *work)
{
struct nfsd4_callback *cb =
- container_of(work, struct nfsd4_callback, cb_work.work);
+ container_of(work, struct nfsd4_callback, cb_work);
struct nfs4_client *clp = cb->cb_clp;
struct rpc_clnt *clnt;
int flags;
clnt = clp->cl_cb_client;
if (!clnt) {
- if (test_bit(NFSD4_CLIENT_CB_KILL, &clp->cl_flags))
- nfsd41_destroy_cb(cb);
- else {
- /*
- * XXX: Ideally, we could wait for the client to
- * reconnect, but I haven't figured out how
- * to do that yet.
- */
- nfsd4_queue_cb_delayed(cb, 25);
- }
+ /* Callback channel broken, or client killed; give up: */
+ nfsd41_destroy_cb(cb);
return;
}
cb->cb_msg.rpc_argp = cb;
cb->cb_msg.rpc_resp = cb;
cb->cb_ops = ops;
- INIT_DELAYED_WORK(&cb->cb_work, nfsd4_run_cb_work);
+ INIT_WORK(&cb->cb_work, nfsd4_run_cb_work);
cb->cb_status = 0;
cb->cb_need_restart = false;
cb->cb_holds_slot = false;
nfsd4_cb_recall_any_release(struct nfsd4_callback *cb)
{
struct nfs4_client *clp = cb->cb_clp;
- struct nfsd_net *nn = net_generic(clp->net, nfsd_net_id);
- spin_lock(&nn->client_lock);
clear_bit(NFSD4_CLIENT_CB_RECALL_ANY, &clp->cl_flags);
- put_client_renew_locked(clp);
- spin_unlock(&nn->client_lock);
+ drop_client(clp);
}
static int
else
cs_slot = &unconf->cl_cs_slot;
status = check_slot_seqid(cr_ses->seqid, cs_slot->sl_seqid, 0);
- if (status) {
- if (status == nfserr_replay_cache) {
- status = nfsd4_replay_create_session(cr_ses, cs_slot);
- goto out_free_conn;
- }
+ switch (status) {
+ case nfs_ok:
+ cs_slot->sl_seqid++;
+ cr_ses->seqid = cs_slot->sl_seqid;
+ break;
+ case nfserr_replay_cache:
+ status = nfsd4_replay_create_session(cr_ses, cs_slot);
+ fallthrough;
+ case nfserr_jukebox:
+ /* The server MUST NOT cache NFS4ERR_DELAY */
+ goto out_free_conn;
+ default:
goto out_cache_error;
}
- cs_slot->sl_seqid++;
- cr_ses->seqid = cs_slot->sl_seqid;
/* RFC 8881 Section 18.36.4 Phase 3: Client ID confirmation. */
if (conf) {
old = find_confirmed_client_by_name(&unconf->cl_name, nn);
if (old) {
status = mark_client_expired_locked(old);
- if (status) {
- old = NULL;
- goto out_cache_error;
- }
+ if (status)
+ goto out_expired_error;
trace_nfsd_clid_replaced(&old->cl_clientid);
}
move_to_confirmed(unconf);
expire_client(old);
return status;
+out_expired_error:
+ old = NULL;
+ /*
+ * Revert the slot seq_nr change so the server will process
+ * the client's resend instead of returning a cached response.
+ */
+ if (status == nfserr_jukebox) {
+ cs_slot->sl_seqid--;
+ cr_ses->seqid = cs_slot->sl_seqid;
+ goto out_free_conn;
+ }
out_cache_error:
nfsd4_cache_create_session(cr_ses, cs_slot, status);
out_free_conn:
list_add(&clp->cl_ra_cblist, &cblist);
/* release in nfsd4_cb_recall_any_release */
- atomic_inc(&clp->cl_rpc_users);
+ kref_get(&clp->cl_nfsdfs.cl_ref);
set_bit(NFSD4_CLIENT_CB_RECALL_ANY, &clp->cl_flags);
clp->cl_ra_time = ktime_get_boottime_seconds();
}
struct dentry *dentry, const u32 *bmval,
int ignore_crossmnt)
{
+ DECLARE_BITMAP(attr_bitmap, ARRAY_SIZE(nfsd4_enc_fattr4_encode_ops));
struct nfsd4_fattr_args args;
struct svc_fh *tempfh = NULL;
int starting_len = xdr->buf->len;
__be32 *attrlen_p, status;
int attrlen_offset;
+ u32 attrmask[3];
int err;
struct nfsd4_compoundres *resp = rqstp->rq_resp;
u32 minorversion = resp->cstate.minorversion;
.mnt = exp->ex_path.mnt,
.dentry = dentry,
};
- union {
- u32 attrmask[3];
- unsigned long mask[2];
- } u;
unsigned long bit;
bool file_modified = false;
u64 size = 0;
/*
* Make a local copy of the attribute bitmap that can be modified.
*/
- memset(&u, 0, sizeof(u));
- u.attrmask[0] = bmval[0];
- u.attrmask[1] = bmval[1];
- u.attrmask[2] = bmval[2];
+ attrmask[0] = bmval[0];
+ attrmask[1] = bmval[1];
+ attrmask[2] = bmval[2];
args.rdattr_err = 0;
if (exp->ex_fslocs.migrated) {
- status = fattr_handle_absent_fs(&u.attrmask[0], &u.attrmask[1],
- &u.attrmask[2], &args.rdattr_err);
+ status = fattr_handle_absent_fs(&attrmask[0], &attrmask[1],
+ &attrmask[2], &args.rdattr_err);
if (status)
goto out;
}
args.size = 0;
- if (u.attrmask[0] & (FATTR4_WORD0_CHANGE | FATTR4_WORD0_SIZE)) {
+ if (attrmask[0] & (FATTR4_WORD0_CHANGE | FATTR4_WORD0_SIZE)) {
status = nfsd4_deleg_getattr_conflict(rqstp, d_inode(dentry),
&file_modified, &size);
if (status)
if (!(args.stat.result_mask & STATX_BTIME))
/* underlying FS does not offer btime so we can't share it */
- u.attrmask[1] &= ~FATTR4_WORD1_TIME_CREATE;
- if ((u.attrmask[0] & (FATTR4_WORD0_FILES_AVAIL | FATTR4_WORD0_FILES_FREE |
+ attrmask[1] &= ~FATTR4_WORD1_TIME_CREATE;
+ if ((attrmask[0] & (FATTR4_WORD0_FILES_AVAIL | FATTR4_WORD0_FILES_FREE |
FATTR4_WORD0_FILES_TOTAL | FATTR4_WORD0_MAXNAME)) ||
- (u.attrmask[1] & (FATTR4_WORD1_SPACE_AVAIL | FATTR4_WORD1_SPACE_FREE |
+ (attrmask[1] & (FATTR4_WORD1_SPACE_AVAIL | FATTR4_WORD1_SPACE_FREE |
FATTR4_WORD1_SPACE_TOTAL))) {
err = vfs_statfs(&path, &args.statfs);
if (err)
goto out_nfserr;
}
- if ((u.attrmask[0] & (FATTR4_WORD0_FILEHANDLE | FATTR4_WORD0_FSID)) &&
+ if ((attrmask[0] & (FATTR4_WORD0_FILEHANDLE | FATTR4_WORD0_FSID)) &&
!fhp) {
tempfh = kmalloc(sizeof(struct svc_fh), GFP_KERNEL);
status = nfserr_jukebox;
args.fhp = fhp;
args.acl = NULL;
- if (u.attrmask[0] & FATTR4_WORD0_ACL) {
+ if (attrmask[0] & FATTR4_WORD0_ACL) {
err = nfsd4_get_nfs4_acl(rqstp, dentry, &args.acl);
if (err == -EOPNOTSUPP)
- u.attrmask[0] &= ~FATTR4_WORD0_ACL;
+ attrmask[0] &= ~FATTR4_WORD0_ACL;
else if (err == -EINVAL) {
status = nfserr_attrnotsupp;
goto out;
#ifdef CONFIG_NFSD_V4_SECURITY_LABEL
args.context = NULL;
- if ((u.attrmask[2] & FATTR4_WORD2_SECURITY_LABEL) ||
- u.attrmask[0] & FATTR4_WORD0_SUPPORTED_ATTRS) {
+ if ((attrmask[2] & FATTR4_WORD2_SECURITY_LABEL) ||
+ attrmask[0] & FATTR4_WORD0_SUPPORTED_ATTRS) {
if (exp->ex_flags & NFSEXP_SECURITY_LABEL)
err = security_inode_getsecctx(d_inode(dentry),
&args.context, &args.contextlen);
else
err = -EOPNOTSUPP;
args.contextsupport = (err == 0);
- if (u.attrmask[2] & FATTR4_WORD2_SECURITY_LABEL) {
+ if (attrmask[2] & FATTR4_WORD2_SECURITY_LABEL) {
if (err == -EOPNOTSUPP)
- u.attrmask[2] &= ~FATTR4_WORD2_SECURITY_LABEL;
+ attrmask[2] &= ~FATTR4_WORD2_SECURITY_LABEL;
else if (err)
goto out_nfserr;
}
#endif /* CONFIG_NFSD_V4_SECURITY_LABEL */
/* attrmask */
- status = nfsd4_encode_bitmap4(xdr, u.attrmask[0],
- u.attrmask[1], u.attrmask[2]);
+ status = nfsd4_encode_bitmap4(xdr, attrmask[0], attrmask[1],
+ attrmask[2]);
if (status)
goto out;
attrlen_p = xdr_reserve_space(xdr, XDR_UNIT);
if (!attrlen_p)
goto out_resource;
- for_each_set_bit(bit, (const unsigned long *)&u.mask,
+ bitmap_from_arr32(attr_bitmap, attrmask,
+ ARRAY_SIZE(nfsd4_enc_fattr4_encode_ops));
+ for_each_set_bit(bit, attr_bitmap,
ARRAY_SIZE(nfsd4_enc_fattr4_encode_ops)) {
status = nfsd4_enc_fattr4_encode_ops[bit](xdr, &args);
if (status != nfs_ok)
struct nfs4_client *cb_clp;
struct rpc_message cb_msg;
const struct nfsd4_callback_ops *cb_ops;
- struct delayed_work cb_work;
+ struct work_struct cb_work;
int cb_seq_status;
int cb_status;
bool cb_need_restart;
trap = lock_rename(tdentry, fdentry);
if (IS_ERR(trap)) {
err = (rqstp->rq_vers == 2) ? nfserr_acces : nfserr_xdev;
- goto out;
+ goto out_want_write;
}
err = fh_fill_pre_attrs(ffhp);
if (err != nfs_ok)
}
out_unlock:
unlock_rename(tdentry, fdentry);
+out_want_write:
fh_drop_write(ffhp);
/*
#define S_SHIFT 12
static unsigned char
-nilfs_type_by_mode[S_IFMT >> S_SHIFT] = {
+nilfs_type_by_mode[(S_IFMT >> S_SHIFT) + 1] = {
[S_IFREG >> S_SHIFT] = NILFS_FT_REG_FILE,
[S_IFDIR >> S_SHIFT] = NILFS_FT_DIR,
[S_IFCHR >> S_SHIFT] = NILFS_FT_CHRDEV,
NOTE: this is linux only feature. Windows will ignore these ACLs.
If you don't know what Access Control Lists are, say N.
+
+config NTFS_FS
+ tristate "NTFS file system support"
+ select NTFS3_FS
+ select BUFFER_HEAD
+ select NLS
+ help
+ This config option is here only for backward compatibility. NTFS
+ filesystem is now handled by the NTFS3 driver.
.compat_ioctl = ntfs_compat_ioctl,
#endif
};
+
+const struct file_operations ntfs_legacy_dir_operations = {
+ .llseek = generic_file_llseek,
+ .read = generic_read_dir,
+ .iterate_shared = ntfs_readdir,
+ .open = ntfs_file_open,
+};
// clang-format on
.fallocate = ntfs_fallocate,
.release = ntfs_file_release,
};
+
+const struct file_operations ntfs_legacy_file_operations = {
+ .llseek = generic_file_llseek,
+ .read_iter = ntfs_file_read_iter,
+ .splice_read = ntfs_file_splice_read,
+ .open = ntfs_file_open,
+ .release = ntfs_file_release,
+};
// clang-format on
* Usually a hard links to directories are disabled.
*/
inode->i_op = &ntfs_dir_inode_operations;
- inode->i_fop = &ntfs_dir_operations;
+ if (is_legacy_ntfs(inode->i_sb))
+ inode->i_fop = &ntfs_legacy_dir_operations;
+ else
+ inode->i_fop = &ntfs_dir_operations;
ni->i_valid = 0;
} else if (S_ISLNK(mode)) {
ni->std_fa &= ~FILE_ATTRIBUTE_DIRECTORY;
} else if (S_ISREG(mode)) {
ni->std_fa &= ~FILE_ATTRIBUTE_DIRECTORY;
inode->i_op = &ntfs_file_inode_operations;
- inode->i_fop = &ntfs_file_operations;
+ if (is_legacy_ntfs(inode->i_sb))
+ inode->i_fop = &ntfs_legacy_file_operations;
+ else
+ inode->i_fop = &ntfs_file_operations;
inode->i_mapping->a_ops = is_compressed(ni) ? &ntfs_aops_cmpr :
&ntfs_aops;
if (ino != MFT_REC_MFT)
if (S_ISDIR(mode)) {
inode->i_op = &ntfs_dir_inode_operations;
- inode->i_fop = &ntfs_dir_operations;
+ if (is_legacy_ntfs(inode->i_sb))
+ inode->i_fop = &ntfs_legacy_dir_operations;
+ else
+ inode->i_fop = &ntfs_dir_operations;
} else if (S_ISLNK(mode)) {
inode->i_op = &ntfs_link_inode_operations;
inode->i_fop = NULL;
inode_nohighmem(inode);
} else if (S_ISREG(mode)) {
inode->i_op = &ntfs_file_inode_operations;
- inode->i_fop = &ntfs_file_operations;
+ if (is_legacy_ntfs(inode->i_sb))
+ inode->i_fop = &ntfs_legacy_file_operations;
+ else
+ inode->i_fop = &ntfs_file_operations;
inode->i_mapping->a_ops = is_compressed(ni) ? &ntfs_aops_cmpr :
&ntfs_aops;
init_rwsem(&ni->file.run_lock);
struct ntfs_fnd *fnd);
bool dir_is_empty(struct inode *dir);
extern const struct file_operations ntfs_dir_operations;
+extern const struct file_operations ntfs_legacy_dir_operations;
/* Globals from file.c */
int ntfs_getattr(struct mnt_idmap *idmap, const struct path *path,
extern const struct inode_operations ntfs_special_inode_operations;
extern const struct inode_operations ntfs_file_inode_operations;
extern const struct file_operations ntfs_file_operations;
+extern const struct file_operations ntfs_legacy_file_operations;
/* Globals from frecord.c */
void ni_remove_mi(struct ntfs_inode *ni, struct mft_inode *mi);
*var = cpu_to_le64(le64_to_cpu(*var) - val);
}
+bool is_legacy_ntfs(struct super_block *sb);
+
#endif /* _LINUX_NTFS3_NTFS_FS_H */
struct ntfs_mount_options *new_opts = fc->fs_private;
int ro_rw;
+ /* If ntfs3 is used as legacy ntfs enforce read-only mode. */
+ if (is_legacy_ntfs(sb)) {
+ fc->sb_flags |= SB_RDONLY;
+ goto out;
+ }
+
ro_rw = sb_rdonly(sb) && !(fc->sb_flags & SB_RDONLY);
if (ro_rw && (sbi->flags & NTFS_FLAGS_NEED_REPLAY)) {
errorf(fc,
fc,
"ntfs3: Cannot use different iocharset when remounting!");
- sync_filesystem(sb);
-
if (ro_rw && (sbi->volume.flags & VOLUME_FLAG_DIRTY) &&
!new_opts->force) {
errorf(fc,
return -EINVAL;
}
+out:
+ sync_filesystem(sb);
swap(sbi->options, fc->fs_private);
return 0;
}
#endif
+ if (is_legacy_ntfs(sb))
+ sb->s_flags |= SB_RDONLY;
return 0;
put_inode_out:
* This will called when mount/remount. We will first initialize
* options so that if remount we can use just that.
*/
-static int ntfs_init_fs_context(struct fs_context *fc)
+static int __ntfs_init_fs_context(struct fs_context *fc)
{
struct ntfs_mount_options *opts;
struct ntfs_sb_info *sbi;
return -ENOMEM;
}
+static int ntfs_init_fs_context(struct fs_context *fc)
+{
+ return __ntfs_init_fs_context(fc);
+}
+
static void ntfs3_kill_sb(struct super_block *sb)
{
struct ntfs_sb_info *sbi = sb->s_fs_info;
.kill_sb = ntfs3_kill_sb,
.fs_flags = FS_REQUIRES_DEV | FS_ALLOW_IDMAP,
};
+
+#if IS_ENABLED(CONFIG_NTFS_FS)
+static int ntfs_legacy_init_fs_context(struct fs_context *fc)
+{
+ int ret;
+
+ ret = __ntfs_init_fs_context(fc);
+ /* If ntfs3 is used as legacy ntfs enforce read-only mode. */
+ fc->sb_flags |= SB_RDONLY;
+ return ret;
+}
+
+static struct file_system_type ntfs_legacy_fs_type = {
+ .owner = THIS_MODULE,
+ .name = "ntfs",
+ .init_fs_context = ntfs_legacy_init_fs_context,
+ .parameters = ntfs_fs_parameters,
+ .kill_sb = ntfs3_kill_sb,
+ .fs_flags = FS_REQUIRES_DEV | FS_ALLOW_IDMAP,
+};
+MODULE_ALIAS_FS("ntfs");
+
+static inline void register_as_ntfs_legacy(void)
+{
+ int err = register_filesystem(&ntfs_legacy_fs_type);
+ if (err)
+ pr_warn("ntfs3: Failed to register legacy ntfs filesystem driver: %d\n", err);
+}
+
+static inline void unregister_as_ntfs_legacy(void)
+{
+ unregister_filesystem(&ntfs_legacy_fs_type);
+}
+bool is_legacy_ntfs(struct super_block *sb)
+{
+ return sb->s_type == &ntfs_legacy_fs_type;
+}
+#else
+static inline void register_as_ntfs_legacy(void) {}
+static inline void unregister_as_ntfs_legacy(void) {}
+bool is_legacy_ntfs(struct super_block *sb) { return false; }
+#endif
+
+
// clang-format on
static int __init init_ntfs_fs(void)
goto out1;
}
+ register_as_ntfs_legacy();
err = register_filesystem(&ntfs_fs_type);
if (err)
goto out;
rcu_barrier();
kmem_cache_destroy(ntfs_inode_cachep);
unregister_filesystem(&ntfs_fs_type);
+ unregister_as_ntfs_legacy();
ntfs3_exit_bitmap();
#ifdef CONFIG_PROC_FS
obj-y += proc.o
-CFLAGS_task_mmu.o += $(call cc-option,-Wno-override-init,)
+CFLAGS_task_mmu.o += -Wno-override-init
proc-y := nommu.o task_nommu.o
proc-$(CONFIG_MMU) := task_mmu.o
break;
dst += ret;
}
- if (ret >= 0 && boot_command_line[0]) {
- ret = snprintf(dst, rest(dst, end), "# Parameters from bootloader:\n# %s\n",
- boot_command_line);
- if (ret > 0)
- dst += ret;
- }
+ }
+ if (cmdline_has_extra_options() && ret >= 0 && boot_command_line[0]) {
+ ret = snprintf(dst, rest(dst, end), "# Parameters from bootloader:\n# %s\n",
+ boot_command_line);
+ if (ret > 0)
+ dst += ret;
}
out:
kfree(key);
*/
ppage = pfn_to_online_page(pfn);
- if (!ppage || PageSlab(ppage) || page_has_type(ppage))
+ if (!ppage)
pcount = 0;
else
pcount = page_mapcount(ppage);
/*
* pseudo flags for the well known (anonymous) memory mapped pages
- *
- * Note that page->_mapcount is overloaded in SLAB, so the
- * simple test in page_mapped() is not enough.
*/
- if (!PageSlab(page) && page_mapped(page))
+ if (page_mapped(page))
u |= 1 << KPF_MMAP;
if (PageAnon(page))
u |= 1 << KPF_ANON;
static void release_journal_dev(struct reiserfs_journal *journal)
{
if (journal->j_bdev_file) {
- fput(journal->j_bdev_file);
+ bdev_fput(journal->j_bdev_file);
journal->j_bdev_file = NULL;
}
}
#ifdef CONFIG_ROMFS_ON_BLOCK
if (sb->s_bdev) {
sync_blockdev(sb->s_bdev);
- fput(sb->s_bdev_file);
+ bdev_fput(sb->s_bdev_file);
}
#endif
}
{
struct cached_fid *cfid = container_of(ref, struct cached_fid,
refcount);
+ int rc;
spin_lock(&cfid->cfids->cfid_list_lock);
if (cfid->on_list) {
cfid->dentry = NULL;
if (cfid->is_open) {
- SMB2_close(0, cfid->tcon, cfid->fid.persistent_fid,
+ rc = SMB2_close(0, cfid->tcon, cfid->fid.persistent_fid,
cfid->fid.volatile_fid);
- atomic_dec(&cfid->tcon->num_remote_opens);
+ if (rc) /* should we retry on -EBUSY or -EAGAIN? */
+ cifs_dbg(VFS, "close cached dir rc %d\n", rc);
}
free_cached_dir(cfid);
spin_lock(&cifs_tcp_ses_lock);
list_for_each_entry(server, &cifs_tcp_ses_list, tcp_ses_list) {
list_for_each_entry(ses, &server->smb_ses_list, smb_ses_list) {
+ if (cifs_ses_exiting(ses))
+ continue;
list_for_each_entry(tcon, &ses->tcon_list, tcon_list) {
spin_lock(&tcon->open_file_lock);
list_for_each_entry(cfile, &tcon->openFileList, tlist) {
}
#endif /* CONFIG_CIFS_STATS2 */
list_for_each_entry(ses, &server->smb_ses_list, smb_ses_list) {
+ if (cifs_ses_exiting(ses))
+ continue;
list_for_each_entry(tcon, &ses->tcon_list, tcon_list) {
atomic_set(&tcon->num_smbs_sent, 0);
spin_lock(&tcon->stat_lock);
}
#endif /* STATS2 */
list_for_each_entry(ses, &server->smb_ses_list, smb_ses_list) {
+ if (cifs_ses_exiting(ses))
+ continue;
list_for_each_entry(tcon, &ses->tcon_list, tcon_list) {
i++;
seq_printf(m, "\n%d) %s", i, tcon->tree_name);
struct workqueue_struct *fileinfo_put_wq;
struct workqueue_struct *cifsoplockd_wq;
struct workqueue_struct *deferredclose_wq;
+struct workqueue_struct *serverclose_wq;
__u32 cifs_lock_secret;
/*
* server, can not assume caching of file data or metadata.
*/
cifs_set_oplock_level(cifs_inode, 0);
+ cifs_inode->lease_granted = false;
cifs_inode->flags = 0;
spin_lock_init(&cifs_inode->writers_lock);
cifs_inode->writers = 0;
spin_lock(&cifs_tcp_ses_lock);
spin_lock(&tcon->tc_lock);
+ trace_smb3_tcon_ref(tcon->debug_id, tcon->tc_count,
+ netfs_trace_tcon_ref_see_umount);
if ((tcon->tc_count > 1) || (tcon->status == TID_EXITING)) {
/* we have other mounts to same share or we have
already tried to umount this and woken up
goto out_destroy_cifsoplockd_wq;
}
+ serverclose_wq = alloc_workqueue("serverclose",
+ WQ_FREEZABLE|WQ_MEM_RECLAIM, 0);
+ if (!serverclose_wq) {
+ rc = -ENOMEM;
+ goto out_destroy_serverclose_wq;
+ }
+
rc = cifs_init_inodecache();
if (rc)
goto out_destroy_deferredclose_wq;
destroy_workqueue(decrypt_wq);
out_destroy_cifsiod_wq:
destroy_workqueue(cifsiod_wq);
+out_destroy_serverclose_wq:
+ destroy_workqueue(serverclose_wq);
out_clean_proc:
cifs_proc_clean();
return rc;
destroy_workqueue(cifsoplockd_wq);
destroy_workqueue(decrypt_wq);
destroy_workqueue(fileinfo_put_wq);
+ destroy_workqueue(serverclose_wq);
destroy_workqueue(cifsiod_wq);
cifs_proc_clean();
}
/* set fid protocol-specific info */
void (*set_fid)(struct cifsFileInfo *, struct cifs_fid *, __u32);
/* close a file */
- void (*close)(const unsigned int, struct cifs_tcon *,
+ int (*close)(const unsigned int, struct cifs_tcon *,
struct cifs_fid *);
/* close a file, returning file attributes and timestamps */
- void (*close_getattr)(const unsigned int xid, struct cifs_tcon *tcon,
+ int (*close_getattr)(const unsigned int xid, struct cifs_tcon *tcon,
struct cifsFileInfo *pfile_info);
/* send a flush request to the server */
int (*flush)(const unsigned int, struct cifs_tcon *, struct cifs_fid *);
and after mount option parsing we fill it */
char *domainName;
char *password;
+ char *password2; /* When key rotation used, new password may be set before it expires */
char workstation_name[CIFS_MAX_WORKSTATION_LEN];
struct session_key auth_key;
struct ntlmssp_auth *ntlmssp; /* ciphertext, flags, server challenge */
*/
struct cifs_tcon {
struct list_head tcon_list;
+ int debug_id; /* Debugging for tracing */
int tc_count;
struct list_head rlist; /* reconnect list */
spinlock_t tc_lock; /* protect anything here that is not protected */
__u32 max_cached_dirs;
#ifdef CONFIG_CIFS_FSCACHE
u64 resource_id; /* server resource id */
+ bool fscache_acquired; /* T if we've tried acquiring a cookie */
struct fscache_volume *fscache; /* cookie for share */
+ struct mutex fscache_lock; /* Prevent regetting a cookie */
#endif
struct list_head pending_opens; /* list of incomplete opens */
struct cached_fids *cfids;
/* BB add field for back pointer to sb struct(s)? */
#ifdef CONFIG_CIFS_DFS_UPCALL
- struct list_head dfs_ses_list;
struct delayed_work dfs_cache_work;
#endif
struct delayed_work query_interfaces; /* query interfaces workqueue job */
bool swapfile:1;
bool oplock_break_cancelled:1;
bool status_file_deleted:1; /* file has been deleted */
+ bool offload:1; /* offload final part of _put to a wq */
unsigned int oplock_epoch; /* epoch from the lease break */
__u32 oplock_level; /* oplock/lease level from the lease break */
int count;
struct cifs_search_info srch_inf;
struct work_struct oplock_break; /* work for oplock breaks */
struct work_struct put; /* work for the final part of _put */
+ struct work_struct serverclose; /* work for serverclose */
struct delayed_work deferred;
bool deferred_close_scheduled; /* Flag to indicate close is scheduled */
char *symlink_target;
struct TCP_Server_Info *server;
struct cifs_ses *ses;
struct cifs_tcon *tcon;
- struct list_head dfs_ses_list;
};
static inline void __free_dfs_info_param(struct dfs_info3_param *param)
extern struct workqueue_struct *fileinfo_put_wq;
extern struct workqueue_struct *cifsoplockd_wq;
extern struct workqueue_struct *deferredclose_wq;
+extern struct workqueue_struct *serverclose_wq;
extern __u32 cifs_lock_secret;
extern mempool_t *cifs_sm_req_poolp;
struct kvec ea_iov;
};
+static inline bool cifs_ses_exiting(struct cifs_ses *ses)
+{
+ bool ret;
+
+ spin_lock(&ses->ses_lock);
+ ret = ses->ses_status == SES_EXITING;
+ spin_unlock(&ses->ses_lock);
+ return ret;
+}
+
#endif /* _CIFS_GLOB_H */
struct TCP_Server_Info *primary_server);
extern void cifs_put_tcp_session(struct TCP_Server_Info *server,
int from_reconnect);
-extern void cifs_put_tcon(struct cifs_tcon *tcon);
+extern void cifs_put_tcon(struct cifs_tcon *tcon, enum smb3_tcon_ref_trace trace);
extern void cifs_release_automount_timer(void);
extern struct cifs_ses *sesInfoAlloc(void);
extern void sesInfoFree(struct cifs_ses *);
-extern struct cifs_tcon *tcon_info_alloc(bool dir_leases_enabled);
-extern void tconInfoFree(struct cifs_tcon *);
+extern struct cifs_tcon *tcon_info_alloc(bool dir_leases_enabled,
+ enum smb3_tcon_ref_trace trace);
+extern void tconInfoFree(struct cifs_tcon *tcon, enum smb3_tcon_ref_trace trace);
extern int cifs_sign_rqst(struct smb_rqst *rqst, struct TCP_Server_Info *server,
__u32 *pexpected_response_sequence_number);
return options;
}
-struct super_block *cifs_get_tcon_super(struct cifs_tcon *tcon);
-void cifs_put_tcon_super(struct super_block *sb);
int cifs_wait_for_server_reconnect(struct TCP_Server_Info *server, bool retry);
-/* Put references of @ses and @ses->dfs_root_ses */
+/* Put references of @ses and its children */
static inline void cifs_put_smb_ses(struct cifs_ses *ses)
{
- struct cifs_ses *rses = ses->dfs_root_ses;
+ struct cifs_ses *next;
- __cifs_put_smb_ses(ses);
- if (rses)
- __cifs_put_smb_ses(rses);
+ do {
+ next = ses->dfs_root_ses;
+ __cifs_put_smb_ses(ses);
+ } while ((ses = next));
}
-/* Get an active reference of @ses and @ses->dfs_root_ses.
+/* Get an active reference of @ses and its children.
*
* NOTE: make sure to call this function when incrementing reference count of
* @ses to ensure that any DFS root session attached to it (@ses->dfs_root_ses)
* will also get its reference count incremented.
*
- * cifs_put_smb_ses() will put both references, so call it when you're done.
+ * cifs_put_smb_ses() will put all references, so call it when you're done.
*/
static inline void cifs_smb_ses_inc_refcount(struct cifs_ses *ses)
{
lockdep_assert_held(&cifs_tcp_ses_lock);
- ses->ses_count++;
- if (ses->dfs_root_ses)
- ses->dfs_root_ses->ses_count++;
+ for (; ses; ses = ses->dfs_root_ses)
+ ses->ses_count++;
}
static inline bool dfs_src_pathname_equal(const char *s1, const char *s2)
parm_data->list.EA_flags = 0;
/* we checked above that name len is less than 255 */
parm_data->list.name_len = (__u8)name_len;
- /* EA names are always ASCII */
- if (ea_name)
- strncpy(parm_data->list.name, ea_name, name_len);
- parm_data->list.name[name_len] = '\0';
+ /* EA names are always ASCII and NUL-terminated */
+ strscpy(parm_data->list.name, ea_name ?: "", name_len + 1);
parm_data->list.value_len = cpu_to_le16(ea_value_len);
/* caller ensures that ea_value_len is less than 64K but
we need to ensure that it fits within the smb */
spin_lock(&cifs_tcp_ses_lock);
list_for_each_entry(ses, &pserver->smb_ses_list, smb_ses_list) {
+ if (cifs_ses_exiting(ses))
+ continue;
spin_lock(&ses->chan_lock);
for (i = 0; i < ses->chan_count; i++) {
if (!ses->chans[i].server)
spin_lock(&cifs_tcp_ses_lock);
list_for_each_entry_safe(ses, nses, &pserver->smb_ses_list, smb_ses_list) {
- /* check if iface is still active */
+ spin_lock(&ses->ses_lock);
+ if (ses->ses_status == SES_EXITING) {
+ spin_unlock(&ses->ses_lock);
+ continue;
+ }
+ spin_unlock(&ses->ses_lock);
+
spin_lock(&ses->chan_lock);
if (cifs_ses_get_chan_index(ses, server) ==
CIFS_INVAL_CHAN_INDEX) {
ctx->sectype != ses->sectype)
return 0;
+ if (ctx->dfs_root_ses != ses->dfs_root_ses)
+ return 0;
+
/*
* If an existing session is limited to less channels than
* requested, it should not be reused
}
/* no need to setup directory caching on IPC share, so pass in false */
- tcon = tcon_info_alloc(false);
+ tcon = tcon_info_alloc(false, netfs_trace_tcon_ref_new_ipc);
if (tcon == NULL)
return -ENOMEM;
if (rc) {
cifs_server_dbg(VFS, "failed to connect to IPC (rc=%d)\n", rc);
- tconInfoFree(tcon);
+ tconInfoFree(tcon, netfs_trace_tcon_ref_free_ipc_fail);
goto out;
}
return rc;
}
-/**
- * cifs_free_ipc - helper to release the session IPC tcon
- * @ses: smb session to unmount the IPC from
- *
- * Needs to be called everytime a session is destroyed.
- *
- * On session close, the IPC is closed and the server must release all tcons of the session.
- * No need to send a tree disconnect here.
- *
- * Besides, it will make the server to not close durable and resilient files on session close, as
- * specified in MS-SMB2 3.3.5.6 Receiving an SMB2 LOGOFF Request.
- */
-static int
-cifs_free_ipc(struct cifs_ses *ses)
-{
- struct cifs_tcon *tcon = ses->tcon_ipc;
-
- if (tcon == NULL)
- return 0;
-
- tconInfoFree(tcon);
- ses->tcon_ipc = NULL;
- return 0;
-}
-
static struct cifs_ses *
cifs_find_smb_ses(struct TCP_Server_Info *server, struct smb3_fs_context *ctx)
{
void __cifs_put_smb_ses(struct cifs_ses *ses)
{
struct TCP_Server_Info *server = ses->server;
+ struct cifs_tcon *tcon;
unsigned int xid;
size_t i;
+ bool do_logoff;
int rc;
+ spin_lock(&cifs_tcp_ses_lock);
spin_lock(&ses->ses_lock);
- if (ses->ses_status == SES_EXITING) {
+ cifs_dbg(FYI, "%s: id=0x%llx ses_count=%d ses_status=%u ipc=%s\n",
+ __func__, ses->Suid, ses->ses_count, ses->ses_status,
+ ses->tcon_ipc ? ses->tcon_ipc->tree_name : "none");
+ if (ses->ses_status == SES_EXITING || --ses->ses_count > 0) {
spin_unlock(&ses->ses_lock);
+ spin_unlock(&cifs_tcp_ses_lock);
return;
}
- spin_unlock(&ses->ses_lock);
+ /* ses_count can never go negative */
+ WARN_ON(ses->ses_count < 0);
- cifs_dbg(FYI, "%s: ses_count=%d\n", __func__, ses->ses_count);
- cifs_dbg(FYI,
- "%s: ses ipc: %s\n", __func__, ses->tcon_ipc ? ses->tcon_ipc->tree_name : "NONE");
+ spin_lock(&ses->chan_lock);
+ cifs_chan_clear_need_reconnect(ses, server);
+ spin_unlock(&ses->chan_lock);
- spin_lock(&cifs_tcp_ses_lock);
- if (--ses->ses_count > 0) {
- spin_unlock(&cifs_tcp_ses_lock);
- return;
- }
- spin_lock(&ses->ses_lock);
- if (ses->ses_status == SES_GOOD)
- ses->ses_status = SES_EXITING;
+ do_logoff = ses->ses_status == SES_GOOD && server->ops->logoff;
+ ses->ses_status = SES_EXITING;
+ tcon = ses->tcon_ipc;
+ ses->tcon_ipc = NULL;
spin_unlock(&ses->ses_lock);
spin_unlock(&cifs_tcp_ses_lock);
- /* ses_count can never go negative */
- WARN_ON(ses->ses_count < 0);
-
- spin_lock(&ses->ses_lock);
- if (ses->ses_status == SES_EXITING && server->ops->logoff) {
- spin_unlock(&ses->ses_lock);
- cifs_free_ipc(ses);
+ /*
+ * On session close, the IPC is closed and the server must release all
+ * tcons of the session. No need to send a tree disconnect here.
+ *
+ * Besides, it will make the server to not close durable and resilient
+ * files on session close, as specified in MS-SMB2 3.3.5.6 Receiving an
+ * SMB2 LOGOFF Request.
+ */
+ tconInfoFree(tcon, netfs_trace_tcon_ref_free_ipc);
+ if (do_logoff) {
xid = get_xid();
rc = server->ops->logoff(xid, ses);
if (rc)
cifs_server_dbg(VFS, "%s: Session Logoff failure rc=%d\n",
__func__, rc);
_free_xid(xid);
- } else {
- spin_unlock(&ses->ses_lock);
- cifs_free_ipc(ses);
}
spin_lock(&cifs_tcp_ses_lock);
}
++delim;
+ /* BB consider adding support for password2 (Key Rotation) for multiuser in future */
ctx->password = kstrndup(delim, len, GFP_KERNEL);
if (!ctx->password) {
cifs_dbg(FYI, "Unable to allocate %zd bytes for password\n",
kfree(ctx->username);
ctx->username = NULL;
kfree_sensitive(ctx->password);
+ /* no need to free ctx->password2 since not allocated in this path */
ctx->password = NULL;
goto out_key_put;
}
if (!ses->password)
goto get_ses_fail;
}
+ /* ctx->password freed at unmount */
+ if (ctx->password2) {
+ ses->password2 = kstrdup(ctx->password2, GFP_KERNEL);
+ if (!ses->password2)
+ goto get_ses_fail;
+ }
if (ctx->domainname) {
ses->domainName = kstrdup(ctx->domainname, GFP_KERNEL);
if (!ses->domainName)
* need to lock before changing something in the session.
*/
spin_lock(&cifs_tcp_ses_lock);
+ if (ctx->dfs_root_ses)
+ cifs_smb_ses_inc_refcount(ctx->dfs_root_ses);
ses->dfs_root_ses = ctx->dfs_root_ses;
- if (ses->dfs_root_ses)
- ses->dfs_root_ses->ses_count++;
list_add(&ses->smb_ses_list, &server->smb_ses_list);
spin_unlock(&cifs_tcp_ses_lock);
continue;
}
++tcon->tc_count;
+ trace_smb3_tcon_ref(tcon->debug_id, tcon->tc_count,
+ netfs_trace_tcon_ref_get_find);
spin_unlock(&tcon->tc_lock);
spin_unlock(&cifs_tcp_ses_lock);
return tcon;
}
void
-cifs_put_tcon(struct cifs_tcon *tcon)
+cifs_put_tcon(struct cifs_tcon *tcon, enum smb3_tcon_ref_trace trace)
{
unsigned int xid;
struct cifs_ses *ses;
cifs_dbg(FYI, "%s: tc_count=%d\n", __func__, tcon->tc_count);
spin_lock(&cifs_tcp_ses_lock);
spin_lock(&tcon->tc_lock);
+ trace_smb3_tcon_ref(tcon->debug_id, tcon->tc_count - 1, trace);
if (--tcon->tc_count > 0) {
spin_unlock(&tcon->tc_lock);
spin_unlock(&cifs_tcp_ses_lock);
_free_xid(xid);
cifs_fscache_release_super_cookie(tcon);
- tconInfoFree(tcon);
+ tconInfoFree(tcon, netfs_trace_tcon_ref_free);
cifs_put_smb_ses(ses);
}
nohandlecache = ctx->nohandlecache;
else
nohandlecache = true;
- tcon = tcon_info_alloc(!nohandlecache);
+ tcon = tcon_info_alloc(!nohandlecache, netfs_trace_tcon_ref_new);
if (tcon == NULL) {
rc = -ENOMEM;
goto out_fail;
return tcon;
out_fail:
- tconInfoFree(tcon);
+ tconInfoFree(tcon, netfs_trace_tcon_ref_free_fail);
return ERR_PTR(rc);
}
}
if (!IS_ERR(tlink_tcon(tlink)))
- cifs_put_tcon(tlink_tcon(tlink));
+ cifs_put_tcon(tlink_tcon(tlink), netfs_trace_tcon_ref_put_tlink);
kfree(tlink);
}
int rc = 0;
if (mnt_ctx->tcon)
- cifs_put_tcon(mnt_ctx->tcon);
+ cifs_put_tcon(mnt_ctx->tcon, netfs_trace_tcon_ref_put_mnt_ctx);
else if (mnt_ctx->ses)
cifs_put_smb_ses(mnt_ctx->ses);
else if (mnt_ctx->server)
cifs_put_tcp_session(mnt_ctx->server, 0);
+ mnt_ctx->ses = NULL;
+ mnt_ctx->tcon = NULL;
+ mnt_ctx->server = NULL;
mnt_ctx->cifs_sb->mnt_cifs_flags &= ~CIFS_MOUNT_POSIX_PATHS;
free_xid(mnt_ctx->xid);
}
bool isdfs;
int rc;
- INIT_LIST_HEAD(&mnt_ctx.dfs_ses_list);
-
rc = dfs_mount_share(&mnt_ctx, &isdfs);
if (rc)
goto error;
return rc;
error:
- dfs_put_root_smb_sessions(&mnt_ctx.dfs_ses_list);
cifs_mount_put_conns(&mnt_ctx);
return rc;
}
goto error;
rc = cifs_mount_get_tcon(&mnt_ctx);
+ if (!rc) {
+ /*
+ * Prevent superblock from being created with any missing
+ * connections.
+ */
+ if (WARN_ON(!mnt_ctx.server))
+ rc = -EHOSTDOWN;
+ else if (WARN_ON(!mnt_ctx.ses))
+ rc = -EACCES;
+ else if (WARN_ON(!mnt_ctx.tcon))
+ rc = -ENOENT;
+ }
if (rc)
goto error;
}
static struct cifs_tcon *
-cifs_construct_tcon(struct cifs_sb_info *cifs_sb, kuid_t fsuid)
+__cifs_construct_tcon(struct cifs_sb_info *cifs_sb, kuid_t fsuid)
{
int rc;
struct cifs_tcon *master_tcon = cifs_sb_master_tcon(cifs_sb);
struct cifs_ses *ses;
struct cifs_tcon *tcon = NULL;
struct smb3_fs_context *ctx;
+ char *origin_fullpath = NULL;
ctx = kzalloc(sizeof(*ctx), GFP_KERNEL);
if (ctx == NULL)
ctx->sign = master_tcon->ses->sign;
ctx->seal = master_tcon->seal;
ctx->witness = master_tcon->use_witness;
+ ctx->dfs_root_ses = master_tcon->ses->dfs_root_ses;
rc = cifs_set_vol_auth(ctx, master_tcon->ses);
if (rc) {
goto out;
}
+#ifdef CONFIG_CIFS_DFS_UPCALL
+ spin_lock(&master_tcon->tc_lock);
+ if (master_tcon->origin_fullpath) {
+ spin_unlock(&master_tcon->tc_lock);
+ origin_fullpath = dfs_get_path(cifs_sb, cifs_sb->ctx->source);
+ if (IS_ERR(origin_fullpath)) {
+ tcon = ERR_CAST(origin_fullpath);
+ origin_fullpath = NULL;
+ cifs_put_smb_ses(ses);
+ goto out;
+ }
+ } else {
+ spin_unlock(&master_tcon->tc_lock);
+ }
+#endif
+
tcon = cifs_get_tcon(ses, ctx);
if (IS_ERR(tcon)) {
cifs_put_smb_ses(ses);
goto out;
}
+#ifdef CONFIG_CIFS_DFS_UPCALL
+ if (origin_fullpath) {
+ spin_lock(&tcon->tc_lock);
+ tcon->origin_fullpath = origin_fullpath;
+ spin_unlock(&tcon->tc_lock);
+ origin_fullpath = NULL;
+ queue_delayed_work(dfscache_wq, &tcon->dfs_cache_work,
+ dfs_cache_get_ttl() * HZ);
+ }
+#endif
+
#ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY
if (cap_unix(ses))
reset_cifs_unix_caps(0, tcon, NULL, ctx);
out:
kfree(ctx->username);
kfree_sensitive(ctx->password);
+ kfree(origin_fullpath);
kfree(ctx);
return tcon;
}
+static struct cifs_tcon *
+cifs_construct_tcon(struct cifs_sb_info *cifs_sb, kuid_t fsuid)
+{
+ struct cifs_tcon *ret;
+
+ cifs_mount_lock();
+ ret = __cifs_construct_tcon(cifs_sb, fsuid);
+ cifs_mount_unlock();
+ return ret;
+}
+
struct cifs_tcon *
cifs_sb_master_tcon(struct cifs_sb_info *cifs_sb)
{
}
/*
- * Track individual DFS referral servers used by new DFS mount.
- *
- * On success, their lifetime will be shared by final tcon (dfs_ses_list).
- * Otherwise, they will be put by dfs_put_root_smb_sessions() in cifs_mount().
+ * Get an active reference of @ses so that next call to cifs_put_tcon() won't
+ * release it as any new DFS referrals must go through its IPC tcon.
*/
-static int add_root_smb_session(struct cifs_mount_ctx *mnt_ctx)
+static void add_root_smb_session(struct cifs_mount_ctx *mnt_ctx)
{
struct smb3_fs_context *ctx = mnt_ctx->fs_ctx;
- struct dfs_root_ses *root_ses;
struct cifs_ses *ses = mnt_ctx->ses;
if (ses) {
- root_ses = kmalloc(sizeof(*root_ses), GFP_KERNEL);
- if (!root_ses)
- return -ENOMEM;
-
- INIT_LIST_HEAD(&root_ses->list);
-
spin_lock(&cifs_tcp_ses_lock);
cifs_smb_ses_inc_refcount(ses);
spin_unlock(&cifs_tcp_ses_lock);
- root_ses->ses = ses;
- list_add_tail(&root_ses->list, &mnt_ctx->dfs_ses_list);
}
- /* Select new DFS referral server so that new referrals go through it */
ctx->dfs_root_ses = ses;
- return 0;
}
static inline int parse_dfs_target(struct smb3_fs_context *ctx,
continue;
}
- if (is_refsrv) {
- rc = add_root_smb_session(mnt_ctx);
- if (rc)
- goto out;
- }
+ if (is_refsrv)
+ add_root_smb_session(mnt_ctx);
rc = ref_walk_advance(rw);
if (!rc) {
struct smb3_fs_context *ctx = mnt_ctx->fs_ctx;
struct cifs_tcon *tcon;
char *origin_fullpath;
+ bool new_tcon = true;
int rc;
origin_fullpath = dfs_get_path(cifs_sb, ctx->source);
return PTR_ERR(origin_fullpath);
rc = dfs_referral_walk(mnt_ctx);
+ if (!rc) {
+ /*
+ * Prevent superblock from being created with any missing
+ * connections.
+ */
+ if (WARN_ON(!mnt_ctx->server))
+ rc = -EHOSTDOWN;
+ else if (WARN_ON(!mnt_ctx->ses))
+ rc = -EACCES;
+ else if (WARN_ON(!mnt_ctx->tcon))
+ rc = -ENOENT;
+ }
if (rc)
goto out;
if (!tcon->origin_fullpath) {
tcon->origin_fullpath = origin_fullpath;
origin_fullpath = NULL;
+ } else {
+ new_tcon = false;
}
spin_unlock(&tcon->tc_lock);
- if (list_empty(&tcon->dfs_ses_list)) {
- list_replace_init(&mnt_ctx->dfs_ses_list, &tcon->dfs_ses_list);
+ if (new_tcon) {
queue_delayed_work(dfscache_wq, &tcon->dfs_cache_work,
dfs_cache_get_ttl() * HZ);
- } else {
- dfs_put_root_smb_sessions(&mnt_ctx->dfs_ses_list);
}
out:
if (rc)
return rc;
- ctx->dfs_root_ses = mnt_ctx->ses;
/*
* If called with 'nodfs' mount option, then skip DFS resolving. Otherwise unconditionally
* try to get an DFS referral (even cached) to determine whether it is an DFS mount.
*isdfs = true;
add_root_smb_session(mnt_ctx);
- return __dfs_mount_share(mnt_ctx);
+ rc = __dfs_mount_share(mnt_ctx);
+ dfs_put_root_smb_sessions(mnt_ctx);
+ return rc;
}
/* Update dfs referral path of superblock */
#define _CIFS_DFS_H
#include "cifsglob.h"
+#include "cifsproto.h"
#include "fs_context.h"
+#include "dfs_cache.h"
#include "cifs_unicode.h"
#include <linux/namei.h>
ref_walk_tit(rw));
}
-struct dfs_root_ses {
- struct list_head list;
- struct cifs_ses *ses;
-};
-
int dfs_parse_target_referral(const char *full_path, const struct dfs_info3_param *ref,
struct smb3_fs_context *ctx);
int dfs_mount_share(struct cifs_mount_ctx *mnt_ctx, bool *isdfs);
{
struct smb3_fs_context *ctx = mnt_ctx->fs_ctx;
struct cifs_sb_info *cifs_sb = mnt_ctx->cifs_sb;
+ struct cifs_ses *rses = ctx->dfs_root_ses ?: mnt_ctx->ses;
- return dfs_cache_find(mnt_ctx->xid, ctx->dfs_root_ses, cifs_sb->local_nls,
+ return dfs_cache_find(mnt_ctx->xid, rses, cifs_sb->local_nls,
cifs_remap(cifs_sb), path, ref, tl);
}
-static inline void dfs_put_root_smb_sessions(struct list_head *head)
+/*
+ * cifs_get_smb_ses() already guarantees an active reference of
+ * @ses->dfs_root_ses when a new session is created, so we need to put extra
+ * references of all DFS root sessions that were used across the mount process
+ * in dfs_mount_share().
+ */
+static inline void dfs_put_root_smb_sessions(struct cifs_mount_ctx *mnt_ctx)
{
- struct dfs_root_ses *root, *tmp;
+ const struct smb3_fs_context *ctx = mnt_ctx->fs_ctx;
+ struct cifs_ses *ses = ctx->dfs_root_ses;
+ struct cifs_ses *cur;
+
+ if (!ses)
+ return;
- list_for_each_entry_safe(root, tmp, head, list) {
- list_del_init(&root->list);
- cifs_put_smb_ses(root->ses);
- kfree(root);
+ for (cur = ses; cur; cur = cur->dfs_root_ses) {
+ if (cur->dfs_root_ses)
+ cifs_put_smb_ses(cur->dfs_root_ses);
}
+ cifs_put_smb_ses(ses);
}
#endif /* _CIFS_DFS_H */
return ret;
}
-/* Refresh dfs referral of tcon and mark it for reconnect if needed */
-static int __refresh_tcon(const char *path, struct cifs_ses *ses, bool force_refresh)
+/* Refresh dfs referral of @ses and mark it for reconnect if needed */
+static void __refresh_ses_referral(struct cifs_ses *ses, bool force_refresh)
{
struct TCP_Server_Info *server = ses->server;
DFS_CACHE_TGT_LIST(old_tl);
bool needs_refresh = false;
struct cache_entry *ce;
unsigned int xid;
+ char *path = NULL;
int rc = 0;
xid = get_xid();
+ mutex_lock(&server->refpath_lock);
+ if (server->leaf_fullpath) {
+ path = kstrdup(server->leaf_fullpath + 1, GFP_ATOMIC);
+ if (!path)
+ rc = -ENOMEM;
+ }
+ mutex_unlock(&server->refpath_lock);
+ if (!path)
+ goto out;
+
down_read(&htable_rw_lock);
ce = lookup_cache_entry(path);
needs_refresh = force_refresh || IS_ERR(ce) || cache_entry_expired(ce);
free_xid(xid);
dfs_cache_free_tgts(&old_tl);
dfs_cache_free_tgts(&new_tl);
- return rc;
+ kfree(path);
}
-static int refresh_tcon(struct cifs_tcon *tcon, bool force_refresh)
+static inline void refresh_ses_referral(struct cifs_ses *ses)
{
- struct TCP_Server_Info *server = tcon->ses->server;
- struct cifs_ses *ses = tcon->ses;
+ __refresh_ses_referral(ses, false);
+}
- mutex_lock(&server->refpath_lock);
- if (server->leaf_fullpath)
- __refresh_tcon(server->leaf_fullpath + 1, ses, force_refresh);
- mutex_unlock(&server->refpath_lock);
- return 0;
+static inline void force_refresh_ses_referral(struct cifs_ses *ses)
+{
+ __refresh_ses_referral(ses, true);
}
/**
*/
cifs_sb->mnt_cifs_flags |= CIFS_MOUNT_USE_PREFIX_PATH;
- return refresh_tcon(tcon, true);
+ force_refresh_ses_referral(tcon->ses);
+ return 0;
}
/* Refresh all DFS referrals related to DFS tcon */
void dfs_cache_refresh(struct work_struct *work)
{
- struct TCP_Server_Info *server;
- struct dfs_root_ses *rses;
struct cifs_tcon *tcon;
struct cifs_ses *ses;
tcon = container_of(work, struct cifs_tcon, dfs_cache_work.work);
- ses = tcon->ses;
- server = ses->server;
- mutex_lock(&server->refpath_lock);
- if (server->leaf_fullpath)
- __refresh_tcon(server->leaf_fullpath + 1, ses, false);
- mutex_unlock(&server->refpath_lock);
-
- list_for_each_entry(rses, &tcon->dfs_ses_list, list) {
- ses = rses->ses;
- server = ses->server;
- mutex_lock(&server->refpath_lock);
- if (server->leaf_fullpath)
- __refresh_tcon(server->leaf_fullpath + 1, ses, false);
- mutex_unlock(&server->refpath_lock);
- }
+ for (ses = tcon->ses; ses; ses = ses->dfs_root_ses)
+ refresh_ses_referral(ses);
queue_delayed_work(dfscache_wq, &tcon->dfs_cache_work,
atomic_read(&dfs_cache_ttl) * HZ);
int disposition;
struct TCP_Server_Info *server = tcon->ses->server;
struct cifs_open_parms oparms;
+ int rdwr_for_fscache = 0;
*oplock = 0;
if (tcon->ses->server->oplocks)
return PTR_ERR(full_path);
}
+ /* If we're caching, we need to be able to fill in around partial writes. */
+ if (cifs_fscache_enabled(inode) && (oflags & O_ACCMODE) == O_WRONLY)
+ rdwr_for_fscache = 1;
+
#ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY
if (tcon->unix_ext && cap_unix(tcon->ses) && !tcon->broken_posix_open &&
(CIFS_UNIX_POSIX_PATH_OPS_CAP &
desired_access |= GENERIC_READ; /* is this too little? */
if (OPEN_FMODE(oflags) & FMODE_WRITE)
desired_access |= GENERIC_WRITE;
+ if (rdwr_for_fscache == 1)
+ desired_access |= GENERIC_READ;
disposition = FILE_OVERWRITE_IF;
if ((oflags & (O_CREAT | O_EXCL)) == (O_CREAT | O_EXCL))
if (!tcon->unix_ext && (mode & S_IWUGO) == 0)
create_options |= CREATE_OPTION_READONLY;
+retry_open:
oparms = (struct cifs_open_parms) {
.tcon = tcon,
.cifs_sb = cifs_sb,
rc = server->ops->open(xid, &oparms, oplock, buf);
if (rc) {
cifs_dbg(FYI, "cifs_create returned 0x%x\n", rc);
+ if (rc == -EACCES && rdwr_for_fscache == 1) {
+ desired_access &= ~GENERIC_READ;
+ rdwr_for_fscache = 2;
+ goto retry_open;
+ }
goto out;
}
+ if (rdwr_for_fscache == 2)
+ cifs_invalidate_cache(inode, FSCACHE_INVAL_DIO_WRITE);
#ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY
/*
goto mknod_out;
}
+ trace_smb3_mknod_enter(xid, tcon->ses->Suid, tcon->tid, full_path);
+
rc = tcon->ses->server->ops->make_node(xid, inode, direntry, tcon,
full_path, mode,
device_number);
mknod_out:
+ if (rc)
+ trace_smb3_mknod_err(xid, tcon->ses->Suid, tcon->tid, rc);
+ else
+ trace_smb3_mknod_done(xid, tcon->ses->Suid, tcon->tid);
+
free_dentry_path(page);
free_xid(xid);
cifs_put_tlink(tlink);
*/
}
-static inline int cifs_convert_flags(unsigned int flags)
+static inline int cifs_convert_flags(unsigned int flags, int rdwr_for_fscache)
{
if ((flags & O_ACCMODE) == O_RDONLY)
return GENERIC_READ;
else if ((flags & O_ACCMODE) == O_WRONLY)
- return GENERIC_WRITE;
+ return rdwr_for_fscache == 1 ? (GENERIC_READ | GENERIC_WRITE) : GENERIC_WRITE;
else if ((flags & O_ACCMODE) == O_RDWR) {
/* GENERIC_ALL is too much permission to request
can cause unnecessary access denied on create */
int create_options = CREATE_NOT_DIR;
struct TCP_Server_Info *server = tcon->ses->server;
struct cifs_open_parms oparms;
+ int rdwr_for_fscache = 0;
if (!server->ops->open)
return -ENOSYS;
- desired_access = cifs_convert_flags(f_flags);
+ /* If we're caching, we need to be able to fill in around partial writes. */
+ if (cifs_fscache_enabled(inode) && (f_flags & O_ACCMODE) == O_WRONLY)
+ rdwr_for_fscache = 1;
+
+ desired_access = cifs_convert_flags(f_flags, rdwr_for_fscache);
/*********************************************************************
* open flag mapping table:
if (f_flags & O_DIRECT)
create_options |= CREATE_NO_BUFFER;
+retry_open:
oparms = (struct cifs_open_parms) {
.tcon = tcon,
.cifs_sb = cifs_sb,
};
rc = server->ops->open(xid, &oparms, oplock, buf);
- if (rc)
+ if (rc) {
+ if (rc == -EACCES && rdwr_for_fscache == 1) {
+ desired_access = cifs_convert_flags(f_flags, 0);
+ rdwr_for_fscache = 2;
+ goto retry_open;
+ }
return rc;
+ }
+ if (rdwr_for_fscache == 2)
+ cifs_invalidate_cache(inode, FSCACHE_INVAL_DIO_WRITE);
/* TODO: Add support for calling posix query info but with passing in fid */
if (tcon->unix_ext)
}
static void cifsFileInfo_put_work(struct work_struct *work);
+void serverclose_work(struct work_struct *work);
struct cifsFileInfo *cifs_new_fileinfo(struct cifs_fid *fid, struct file *file,
struct tcon_link *tlink, __u32 oplock,
cfile->tlink = cifs_get_tlink(tlink);
INIT_WORK(&cfile->oplock_break, cifs_oplock_break);
INIT_WORK(&cfile->put, cifsFileInfo_put_work);
+ INIT_WORK(&cfile->serverclose, serverclose_work);
INIT_DELAYED_WORK(&cfile->deferred, smb2_deferred_work_close);
mutex_init(&cfile->fh_mutex);
spin_lock_init(&cfile->file_info_lock);
cifsFileInfo_put_final(cifs_file);
}
+void serverclose_work(struct work_struct *work)
+{
+ struct cifsFileInfo *cifs_file = container_of(work,
+ struct cifsFileInfo, serverclose);
+
+ struct cifs_tcon *tcon = tlink_tcon(cifs_file->tlink);
+
+ struct TCP_Server_Info *server = tcon->ses->server;
+ int rc = 0;
+ int retries = 0;
+ int MAX_RETRIES = 4;
+
+ do {
+ if (server->ops->close_getattr)
+ rc = server->ops->close_getattr(0, tcon, cifs_file);
+ else if (server->ops->close)
+ rc = server->ops->close(0, tcon, &cifs_file->fid);
+
+ if (rc == -EBUSY || rc == -EAGAIN) {
+ retries++;
+ msleep(250);
+ }
+ } while ((rc == -EBUSY || rc == -EAGAIN) && (retries < MAX_RETRIES)
+ );
+
+ if (retries == MAX_RETRIES)
+ pr_warn("Serverclose failed %d times, giving up\n", MAX_RETRIES);
+
+ if (cifs_file->offload)
+ queue_work(fileinfo_put_wq, &cifs_file->put);
+ else
+ cifsFileInfo_put_final(cifs_file);
+}
+
/**
* cifsFileInfo_put - release a reference of file priv data
*
struct cifs_fid fid = {};
struct cifs_pending_open open;
bool oplock_break_cancelled;
+ bool serverclose_offloaded = false;
spin_lock(&tcon->open_file_lock);
spin_lock(&cifsi->open_file_lock);
spin_lock(&cifs_file->file_info_lock);
+
+ cifs_file->offload = offload;
if (--cifs_file->count > 0) {
spin_unlock(&cifs_file->file_info_lock);
spin_unlock(&cifsi->open_file_lock);
if (!tcon->need_reconnect && !cifs_file->invalidHandle) {
struct TCP_Server_Info *server = tcon->ses->server;
unsigned int xid;
+ int rc = 0;
xid = get_xid();
if (server->ops->close_getattr)
- server->ops->close_getattr(xid, tcon, cifs_file);
+ rc = server->ops->close_getattr(xid, tcon, cifs_file);
else if (server->ops->close)
- server->ops->close(xid, tcon, &cifs_file->fid);
+ rc = server->ops->close(xid, tcon, &cifs_file->fid);
_free_xid(xid);
+
+ if (rc == -EBUSY || rc == -EAGAIN) {
+ // Server close failed, hence offloading it as an async op
+ queue_work(serverclose_wq, &cifs_file->serverclose);
+ serverclose_offloaded = true;
+ }
}
if (oplock_break_cancelled)
cifs_del_pending_open(&open);
- if (offload)
- queue_work(fileinfo_put_wq, &cifs_file->put);
- else
- cifsFileInfo_put_final(cifs_file);
+ // if serverclose has been offloaded to wq (on failure), it will
+ // handle offloading put as well. If serverclose not offloaded,
+ // we need to handle offloading put here.
+ if (!serverclose_offloaded) {
+ if (offload)
+ queue_work(fileinfo_put_wq, &cifs_file->put);
+ else
+ cifsFileInfo_put_final(cifs_file);
+ }
}
int cifs_open(struct inode *inode, struct file *file)
use_cache:
fscache_use_cookie(cifs_inode_cookie(file_inode(file)),
file->f_mode & FMODE_WRITE);
- if (file->f_flags & O_DIRECT &&
- (!((file->f_flags & O_ACCMODE) != O_RDONLY) ||
- file->f_flags & O_APPEND))
- cifs_invalidate_cache(file_inode(file),
- FSCACHE_INVAL_DIO_WRITE);
+ if (!(file->f_flags & O_DIRECT))
+ goto out;
+ if ((file->f_flags & (O_ACCMODE | O_APPEND)) == O_RDONLY)
+ goto out;
+ cifs_invalidate_cache(file_inode(file), FSCACHE_INVAL_DIO_WRITE);
out:
free_dentry_path(page);
int disposition = FILE_OPEN;
int create_options = CREATE_NOT_DIR;
struct cifs_open_parms oparms;
+ int rdwr_for_fscache = 0;
xid = get_xid();
mutex_lock(&cfile->fh_mutex);
}
#endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */
- desired_access = cifs_convert_flags(cfile->f_flags);
+ /* If we're caching, we need to be able to fill in around partial writes. */
+ if (cifs_fscache_enabled(inode) && (cfile->f_flags & O_ACCMODE) == O_WRONLY)
+ rdwr_for_fscache = 1;
+
+ desired_access = cifs_convert_flags(cfile->f_flags, rdwr_for_fscache);
/* O_SYNC also has bit for O_DSYNC so following check picks up either */
if (cfile->f_flags & O_SYNC)
if (server->ops->get_lease_key)
server->ops->get_lease_key(inode, &cfile->fid);
+retry_open:
oparms = (struct cifs_open_parms) {
.tcon = tcon,
.cifs_sb = cifs_sb,
/* indicate that we need to relock the file */
oparms.reconnect = true;
}
+ if (rc == -EACCES && rdwr_for_fscache == 1) {
+ desired_access = cifs_convert_flags(cfile->f_flags, 0);
+ rdwr_for_fscache = 2;
+ goto retry_open;
+ }
if (rc) {
mutex_unlock(&cfile->fh_mutex);
goto reopen_error_exit;
}
+ if (rdwr_for_fscache == 2)
+ cifs_invalidate_cache(inode, FSCACHE_INVAL_DIO_WRITE);
+
#ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY
reopen_success:
#endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */
#include "rfc1002pdu.h"
#include "fs_context.h"
-static DEFINE_MUTEX(cifs_mount_mutex);
+DEFINE_MUTEX(cifs_mount_mutex);
static const match_table_t cifs_smb_version_tokens = {
{ Smb_1, SMB1_VERSION_STRING },
fsparam_string("username", Opt_user),
fsparam_string("pass", Opt_pass),
fsparam_string("password", Opt_pass),
+ fsparam_string("password2", Opt_pass2),
fsparam_string("ip", Opt_ip),
fsparam_string("addr", Opt_ip),
fsparam_string("domain", Opt_domain),
new_ctx->nodename = NULL;
new_ctx->username = NULL;
new_ctx->password = NULL;
+ new_ctx->password2 = NULL;
new_ctx->server_hostname = NULL;
new_ctx->domainname = NULL;
new_ctx->UNC = NULL;
DUP_CTX_STR(prepath);
DUP_CTX_STR(username);
DUP_CTX_STR(password);
+ DUP_CTX_STR(password2);
DUP_CTX_STR(server_hostname);
DUP_CTX_STR(UNC);
DUP_CTX_STR(source);
/* set the port that we got earlier */
cifs_set_port((struct sockaddr *)&ctx->dstaddr, ctx->port);
+ if (ctx->uid_specified && !ctx->forceuid_specified) {
+ ctx->override_uid = 1;
+ pr_notice("enabling forceuid mount option implicitly because uid= option is specified\n");
+ }
+
+ if (ctx->gid_specified && !ctx->forcegid_specified) {
+ ctx->override_gid = 1;
+ pr_notice("enabling forcegid mount option implicitly because gid= option is specified\n");
+ }
+
if (ctx->override_uid && !ctx->uid_specified) {
ctx->override_uid = 0;
pr_notice("ignoring forceuid mount option specified with no uid= option\n");
if (err)
return err;
- mutex_lock(&cifs_mount_mutex);
+ cifs_mount_lock();
ret = smb3_get_tree_common(fc);
- mutex_unlock(&cifs_mount_mutex);
+ cifs_mount_unlock();
return ret;
}
else {
kfree_sensitive(ses->password);
ses->password = kstrdup(ctx->password, GFP_KERNEL);
+ kfree_sensitive(ses->password2);
+ ses->password2 = kstrdup(ctx->password2, GFP_KERNEL);
}
STEAL_STRING(cifs_sb, ctx, domainname);
STEAL_STRING(cifs_sb, ctx, nodename);
ctx->override_uid = 0;
else
ctx->override_uid = 1;
+ ctx->forceuid_specified = true;
break;
case Opt_forcegid:
if (result.negated)
ctx->override_gid = 0;
else
ctx->override_gid = 1;
+ ctx->forcegid_specified = true;
break;
case Opt_perm:
if (result.negated)
goto cifs_parse_mount_err;
}
break;
+ case Opt_pass2:
+ kfree_sensitive(ctx->password2);
+ ctx->password2 = NULL;
+ if (strlen(param->string) == 0)
+ break;
+
+ ctx->password2 = kstrdup(param->string, GFP_KERNEL);
+ if (ctx->password2 == NULL) {
+ cifs_errorf(fc, "OOM when copying password2 string\n");
+ goto cifs_parse_mount_err;
+ }
+ break;
case Opt_ip:
if (strlen(param->string) == 0) {
ctx->got_ip = false;
cifs_parse_mount_err:
kfree_sensitive(ctx->password);
ctx->password = NULL;
+ kfree_sensitive(ctx->password2);
+ ctx->password2 = NULL;
return -EINVAL;
}
ctx->username = NULL;
kfree_sensitive(ctx->password);
ctx->password = NULL;
+ kfree_sensitive(ctx->password2);
+ ctx->password2 = NULL;
kfree(ctx->server_hostname);
ctx->server_hostname = NULL;
kfree(ctx->UNC);
Opt_source,
Opt_user,
Opt_pass,
+ Opt_pass2,
Opt_ip,
Opt_domain,
Opt_srcaddr,
};
struct smb3_fs_context {
+ bool forceuid_specified;
+ bool forcegid_specified;
bool uid_specified;
bool cruid_specified;
bool gid_specified;
char *username;
char *password;
+ char *password2;
char *domainname;
char *source;
char *server_hostname;
#define MAX_CACHED_FIDS 16
extern char *cifs_sanitize_prepath(char *prepath, gfp_t gfp);
+extern struct mutex cifs_mount_mutex;
+
+static inline void cifs_mount_lock(void)
+{
+ mutex_lock(&cifs_mount_mutex);
+}
+
+static inline void cifs_mount_unlock(void)
+{
+ mutex_unlock(&cifs_mount_mutex);
+}
+
#endif
#include "cifs_fs_sb.h"
#include "cifsproto.h"
+/*
+ * Key for fscache inode. [!] Contents must match comparisons in cifs_find_inode().
+ */
+struct cifs_fscache_inode_key {
+
+ __le64 uniqueid; /* server inode number */
+ __le64 createtime; /* creation time on server */
+ u8 type; /* S_IFMT file type */
+} __packed;
+
static void cifs_fscache_fill_volume_coherency(
struct cifs_tcon *tcon,
struct cifs_fscache_volume_coherency_data *cd)
char *key;
int ret = -ENOMEM;
+ if (tcon->fscache_acquired)
+ return 0;
+
+ mutex_lock(&tcon->fscache_lock);
+ if (tcon->fscache_acquired) {
+ mutex_unlock(&tcon->fscache_lock);
+ return 0;
+ }
+ tcon->fscache_acquired = true;
+
tcon->fscache = NULL;
switch (sa->sa_family) {
case AF_INET:
case AF_INET6:
break;
default:
+ mutex_unlock(&tcon->fscache_lock);
cifs_dbg(VFS, "Unknown network family '%d'\n", sa->sa_family);
return -EINVAL;
}
sharename = extract_sharename(tcon->tree_name);
if (IS_ERR(sharename)) {
+ mutex_unlock(&tcon->fscache_lock);
cifs_dbg(FYI, "%s: couldn't extract sharename\n", __func__);
return PTR_ERR(sharename);
}
}
pr_err("Cache volume key already in use (%s)\n", key);
vcookie = NULL;
+ trace_smb3_tcon_ref(tcon->debug_id, tcon->tc_count,
+ netfs_trace_tcon_ref_see_fscache_collision);
+ } else {
+ trace_smb3_tcon_ref(tcon->debug_id, tcon->tc_count,
+ netfs_trace_tcon_ref_see_fscache_okay);
}
tcon->fscache = vcookie;
kfree(key);
out:
kfree(sharename);
+ mutex_unlock(&tcon->fscache_lock);
return ret;
}
cifs_fscache_fill_volume_coherency(tcon, &cd);
fscache_relinquish_volume(tcon->fscache, &cd, false);
tcon->fscache = NULL;
+ trace_smb3_tcon_ref(tcon->debug_id, tcon->tc_count,
+ netfs_trace_tcon_ref_see_fscache_relinq);
}
void cifs_fscache_get_inode_cookie(struct inode *inode)
{
struct cifs_fscache_inode_coherency_data cd;
+ struct cifs_fscache_inode_key key;
struct cifsInodeInfo *cifsi = CIFS_I(inode);
struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
struct cifs_tcon *tcon = cifs_sb_master_tcon(cifs_sb);
+ key.uniqueid = cpu_to_le64(cifsi->uniqueid);
+ key.createtime = cpu_to_le64(cifsi->createtime);
+ key.type = (inode->i_mode & S_IFMT) >> 12;
cifs_fscache_fill_coherency(&cifsi->netfs.inode, &cd);
cifsi->netfs.cache =
fscache_acquire_cookie(tcon->fscache, 0,
- &cifsi->uniqueid, sizeof(cifsi->uniqueid),
+ &key, sizeof(key),
&cd, sizeof(cd),
i_size_read(&cifsi->netfs.inode));
if (cifsi->netfs.cache)
__cifs_readahead_to_fscache(inode, pos, len);
}
+static inline bool cifs_fscache_enabled(struct inode *inode)
+{
+ return fscache_cookie_enabled(cifs_inode_cookie(inode));
+}
+
#else /* CONFIG_CIFS_FSCACHE */
static inline
void cifs_fscache_fill_coherency(struct inode *inode,
static inline void cifs_fscache_unuse_inode_cookie(struct inode *inode, bool update) {}
static inline struct fscache_cookie *cifs_inode_cookie(struct inode *inode) { return NULL; }
static inline void cifs_invalidate_cache(struct inode *inode, unsigned int flags) {}
+static inline bool cifs_fscache_enabled(struct inode *inode) { return false; }
static inline int cifs_fscache_query_occupancy(struct inode *inode,
pgoff_t first, unsigned int nr_pages,
} else {
cifs_open_info_to_fattr(fattr, data, sb);
}
- if (!rc && fattr->cf_flags & CIFS_FATTR_DELETE_PENDING)
+ if (!rc && *inode &&
+ (fattr->cf_flags & CIFS_FATTR_DELETE_PENDING))
cifs_mark_open_handles_for_deleted_file(*inode, full_path);
break;
case -EREMOTE:
{
struct cifs_fattr *fattr = opaque;
+ /* [!] The compared values must be the same in struct cifs_fscache_inode_key. */
+
/* don't match inode with different uniqueid */
if (CIFS_I(inode)->uniqueid != fattr->cf_uniqueid)
return 0;
spin_lock(&cifs_tcp_ses_lock);
list_for_each_entry(server_it, &cifs_tcp_ses_list, tcp_ses_list) {
list_for_each_entry(ses_it, &server_it->smb_ses_list, smb_ses_list) {
- if (ses_it->Suid == out.session_id) {
+ spin_lock(&ses_it->ses_lock);
+ if (ses_it->ses_status != SES_EXITING &&
+ ses_it->Suid == out.session_id) {
ses = ses_it;
/*
* since we are using the session outside the crit
* so increment its refcount
*/
cifs_smb_ses_inc_refcount(ses);
+ spin_unlock(&ses_it->ses_lock);
found = true;
goto search_end;
}
+ spin_unlock(&ses_it->ses_lock);
}
}
search_end:
kfree(buf_to_free->serverDomain);
kfree(buf_to_free->serverNOS);
kfree_sensitive(buf_to_free->password);
+ kfree_sensitive(buf_to_free->password2);
kfree(buf_to_free->user_name);
kfree(buf_to_free->domainName);
kfree_sensitive(buf_to_free->auth_key.response);
}
struct cifs_tcon *
-tcon_info_alloc(bool dir_leases_enabled)
+tcon_info_alloc(bool dir_leases_enabled, enum smb3_tcon_ref_trace trace)
{
struct cifs_tcon *ret_buf;
+ static atomic_t tcon_debug_id;
ret_buf = kzalloc(sizeof(*ret_buf), GFP_KERNEL);
if (!ret_buf)
atomic_inc(&tconInfoAllocCount);
ret_buf->status = TID_NEW;
- ++ret_buf->tc_count;
+ ret_buf->debug_id = atomic_inc_return(&tcon_debug_id);
+ ret_buf->tc_count = 1;
spin_lock_init(&ret_buf->tc_lock);
INIT_LIST_HEAD(&ret_buf->openFileList);
INIT_LIST_HEAD(&ret_buf->tcon_list);
atomic_set(&ret_buf->num_local_opens, 0);
atomic_set(&ret_buf->num_remote_opens, 0);
ret_buf->stats_from_time = ktime_get_real_seconds();
-#ifdef CONFIG_CIFS_DFS_UPCALL
- INIT_LIST_HEAD(&ret_buf->dfs_ses_list);
+#ifdef CONFIG_CIFS_FSCACHE
+ mutex_init(&ret_buf->fscache_lock);
#endif
+ trace_smb3_tcon_ref(ret_buf->debug_id, ret_buf->tc_count, trace);
return ret_buf;
}
void
-tconInfoFree(struct cifs_tcon *tcon)
+tconInfoFree(struct cifs_tcon *tcon, enum smb3_tcon_ref_trace trace)
{
if (tcon == NULL) {
cifs_dbg(FYI, "Null buffer passed to tconInfoFree\n");
return;
}
+ trace_smb3_tcon_ref(tcon->debug_id, tcon->tc_count, trace);
free_cached_dirs(tcon->cfids);
atomic_dec(&tconInfoAllocCount);
kfree(tcon->nativeFileSystem);
kfree_sensitive(tcon->password);
-#ifdef CONFIG_CIFS_DFS_UPCALL
- dfs_put_root_smb_sessions(&tcon->dfs_ses_list);
-#endif
kfree(tcon->origin_fullpath);
kfree(tcon);
}
/* look up tcon based on tid & uid */
spin_lock(&cifs_tcp_ses_lock);
list_for_each_entry(ses, &pserver->smb_ses_list, smb_ses_list) {
+ if (cifs_ses_exiting(ses))
+ continue;
list_for_each_entry(tcon, &ses->tcon_list, tcon_list) {
if (tcon->tid != buf->Tid)
continue;
cinode->can_cache_brlcks = CIFS_CACHE_WRITE(cinode);
}
-static void
+static int
cifs_close_file(const unsigned int xid, struct cifs_tcon *tcon,
struct cifs_fid *fid)
{
- CIFSSMBClose(xid, tcon, fid->netfid);
+ return CIFSSMBClose(xid, tcon, fid->netfid);
}
static int
/* look up tcon based on tid & uid */
spin_lock(&cifs_tcp_ses_lock);
list_for_each_entry(ses, &pserver->smb_ses_list, smb_ses_list) {
+ if (cifs_ses_exiting(ses))
+ continue;
list_for_each_entry(tcon, &ses->tcon_list, tcon_list) {
spin_lock(&tcon->open_file_lock);
cifs_stats_inc(
/* look up tcon based on tid & uid */
spin_lock(&cifs_tcp_ses_lock);
list_for_each_entry(ses, &pserver->smb_ses_list, smb_ses_list) {
+ if (cifs_ses_exiting(ses))
+ continue;
list_for_each_entry(tcon, &ses->tcon_list, tcon_list) {
spin_lock(&tcon->open_file_lock);
if (rc)
cifs_tcon_dbg(VFS, "Close cancelled mid failed rc:%d\n", rc);
- cifs_put_tcon(tcon);
+ cifs_put_tcon(tcon, netfs_trace_tcon_ref_put_cancelled_close_fid);
kfree(cancelled);
}
if (tcon->tc_count <= 0) {
struct TCP_Server_Info *server = NULL;
+ trace_smb3_tcon_ref(tcon->debug_id, tcon->tc_count,
+ netfs_trace_tcon_ref_see_cancelled_close);
WARN_ONCE(tcon->tc_count < 0, "tcon refcount is negative");
spin_unlock(&cifs_tcp_ses_lock);
return 0;
}
tcon->tc_count++;
+ trace_smb3_tcon_ref(tcon->debug_id, tcon->tc_count,
+ netfs_trace_tcon_ref_get_cancelled_close);
spin_unlock(&cifs_tcp_ses_lock);
rc = __smb2_handle_cancelled_cmd(tcon, SMB2_CLOSE_HE, 0,
persistent_fid, volatile_fid);
if (rc)
- cifs_put_tcon(tcon);
+ cifs_put_tcon(tcon, netfs_trace_tcon_ref_put_cancelled_close);
return rc;
}
rsp->PersistentFileId,
rsp->VolatileFileId);
if (rc)
- cifs_put_tcon(tcon);
+ cifs_put_tcon(tcon, netfs_trace_tcon_ref_put_cancelled_mid);
return rc;
}
memcpy(cfile->fid.create_guid, fid->create_guid, 16);
}
-static void
+static int
smb2_close_file(const unsigned int xid, struct cifs_tcon *tcon,
struct cifs_fid *fid)
{
- SMB2_close(xid, tcon, fid->persistent_fid, fid->volatile_fid);
+ return SMB2_close(xid, tcon, fid->persistent_fid, fid->volatile_fid);
}
-static void
+static int
smb2_close_getattr(const unsigned int xid, struct cifs_tcon *tcon,
struct cifsFileInfo *cfile)
{
rc = __SMB2_close(xid, tcon, cfile->fid.persistent_fid,
cfile->fid.volatile_fid, &file_inf);
if (rc)
- return;
+ return rc;
inode = d_inode(cfile->dentry);
/* End of file and Attributes should not have to be updated on close */
spin_unlock(&inode->i_lock);
+ return rc;
}
static int
spin_lock(&cifs_tcp_ses_lock);
list_for_each_entry(ses, &pserver->smb_ses_list, smb_ses_list) {
+ if (cifs_ses_exiting(ses))
+ continue;
list_for_each_entry(tcon, &ses->tcon_list, tcon_list) {
if (tcon->tid == le32_to_cpu(shdr->Id.SyncId.TreeId)) {
spin_lock(&tcon->tc_lock);
tcon = list_first_entry_or_null(&ses->tcon_list,
struct cifs_tcon,
tcon_list);
- if (tcon)
+ if (tcon) {
tcon->tc_count++;
+ trace_smb3_tcon_ref(tcon->debug_id, tcon->tc_count,
+ netfs_trace_tcon_ref_get_dfs_refer);
+ }
spin_unlock(&cifs_tcp_ses_lock);
}
/* ipc tcons are not refcounted */
spin_lock(&cifs_tcp_ses_lock);
tcon->tc_count--;
+ trace_smb3_tcon_ref(tcon->debug_id, tcon->tc_count,
+ netfs_trace_tcon_ref_dec_dfs_refer);
/* tc_count can never go negative */
WARN_ON(tcon->tc_count < 0);
spin_unlock(&cifs_tcp_ses_lock);
strcat(message, "W");
}
if (!new_oplock)
- strncpy(message, "None", sizeof(message));
+ strscpy(message, "None");
cinode->oplock = new_oplock;
cifs_dbg(FYI, "%s Lease granted on inode %p\n", message,
return 0;
}
-int cifs_sfu_make_node(unsigned int xid, struct inode *inode,
- struct dentry *dentry, struct cifs_tcon *tcon,
- const char *full_path, umode_t mode, dev_t dev)
+static int __cifs_sfu_make_node(unsigned int xid, struct inode *inode,
+ struct dentry *dentry, struct cifs_tcon *tcon,
+ const char *full_path, umode_t mode, dev_t dev)
{
- struct cifs_open_info_data buf = {};
struct TCP_Server_Info *server = tcon->ses->server;
struct cifs_open_parms oparms;
struct cifs_io_parms io_parms = {};
struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
struct cifs_fid fid;
unsigned int bytes_written;
- struct win_dev *pdev;
+ struct win_dev pdev = {};
struct kvec iov[2];
__u32 oplock = server->oplocks ? REQ_OPLOCK : 0;
int rc;
- if (!S_ISCHR(mode) && !S_ISBLK(mode) && !S_ISFIFO(mode))
+ switch (mode & S_IFMT) {
+ case S_IFCHR:
+ strscpy(pdev.type, "IntxCHR");
+ pdev.major = cpu_to_le64(MAJOR(dev));
+ pdev.minor = cpu_to_le64(MINOR(dev));
+ break;
+ case S_IFBLK:
+ strscpy(pdev.type, "IntxBLK");
+ pdev.major = cpu_to_le64(MAJOR(dev));
+ pdev.minor = cpu_to_le64(MINOR(dev));
+ break;
+ case S_IFIFO:
+ strscpy(pdev.type, "LnxFIFO");
+ break;
+ default:
return -EPERM;
+ }
- oparms = (struct cifs_open_parms) {
- .tcon = tcon,
- .cifs_sb = cifs_sb,
- .desired_access = GENERIC_WRITE,
- .create_options = cifs_create_options(cifs_sb, CREATE_NOT_DIR |
- CREATE_OPTION_SPECIAL),
- .disposition = FILE_CREATE,
- .path = full_path,
- .fid = &fid,
- };
+ oparms = CIFS_OPARMS(cifs_sb, tcon, full_path, GENERIC_WRITE,
+ FILE_CREATE, CREATE_NOT_DIR |
+ CREATE_OPTION_SPECIAL, ACL_NO_MODE);
+ oparms.fid = &fid;
- rc = server->ops->open(xid, &oparms, &oplock, &buf);
+ rc = server->ops->open(xid, &oparms, &oplock, NULL);
if (rc)
return rc;
- /*
- * BB Do not bother to decode buf since no local inode yet to put
- * timestamps in, but we can reuse it safely.
- */
- pdev = (struct win_dev *)&buf.fi;
io_parms.pid = current->tgid;
io_parms.tcon = tcon;
- io_parms.length = sizeof(*pdev);
- iov[1].iov_base = pdev;
- iov[1].iov_len = sizeof(*pdev);
- if (S_ISCHR(mode)) {
- memcpy(pdev->type, "IntxCHR", 8);
- pdev->major = cpu_to_le64(MAJOR(dev));
- pdev->minor = cpu_to_le64(MINOR(dev));
- } else if (S_ISBLK(mode)) {
- memcpy(pdev->type, "IntxBLK", 8);
- pdev->major = cpu_to_le64(MAJOR(dev));
- pdev->minor = cpu_to_le64(MINOR(dev));
- } else if (S_ISFIFO(mode)) {
- memcpy(pdev->type, "LnxFIFO", 8);
- }
+ io_parms.length = sizeof(pdev);
+ iov[1].iov_base = &pdev;
+ iov[1].iov_len = sizeof(pdev);
rc = server->ops->sync_write(xid, &fid, &io_parms,
&bytes_written, iov, 1);
server->ops->close(xid, tcon, &fid);
- d_drop(dentry);
- /* FIXME: add code here to set EAs */
- cifs_free_open_info(&buf);
+ return rc;
+}
+
+int cifs_sfu_make_node(unsigned int xid, struct inode *inode,
+ struct dentry *dentry, struct cifs_tcon *tcon,
+ const char *full_path, umode_t mode, dev_t dev)
+{
+ struct inode *new = NULL;
+ int rc;
+
+ rc = __cifs_sfu_make_node(xid, inode, dentry, tcon,
+ full_path, mode, dev);
+ if (rc)
+ return rc;
+
+ if (tcon->posix_extensions) {
+ rc = smb311_posix_get_inode_info(&new, full_path, NULL,
+ inode->i_sb, xid);
+ } else if (tcon->unix_ext) {
+ rc = cifs_get_inode_info_unix(&new, full_path,
+ inode->i_sb, xid);
+ } else {
+ rc = cifs_get_inode_info(&new, full_path, NULL,
+ inode->i_sb, xid, NULL);
+ }
+ if (!rc)
+ d_instantiate(dentry, new);
return rc;
}
}
rc = cifs_setup_session(0, ses, server, nls_codepage);
+ if ((rc == -EACCES) || (rc == -EKEYEXPIRED) || (rc == -EKEYREVOKED)) {
+ /*
+ * Try alternate password for next reconnect (key rotation
+ * could be enabled on the server e.g.) if an alternate
+ * password is available and the current password is expired,
+ * but do not swap on non pwd related errors like host down
+ */
+ if (ses->password2)
+ swap(ses->password2, ses->password);
+ }
+
if ((rc == -EACCES) && !tcon->retry) {
mutex_unlock(&ses->session_mutex);
rc = -EHOSTDOWN;
memcpy(&pbuf->network_open_info,
&rsp->network_open_info,
sizeof(pbuf->network_open_info));
+ atomic_dec(&tcon->num_remote_opens);
}
- atomic_dec(&tcon->num_remote_opens);
close_exit:
SMB2_close_free(&rqst);
free_rsp_buf(resp_buftype, rsp);
list_for_each_entry(tcon, &ses->tcon_list, tcon_list) {
if (tcon->need_reconnect || tcon->need_reopen_files) {
tcon->tc_count++;
+ trace_smb3_tcon_ref(tcon->debug_id, tcon->tc_count,
+ netfs_trace_tcon_ref_get_reconnect_server);
list_add_tail(&tcon->rlist, &tmp_list);
tcon_selected = true;
}
if (tcon->ipc)
cifs_put_smb_ses(tcon->ses);
else
- cifs_put_tcon(tcon);
+ cifs_put_tcon(tcon, netfs_trace_tcon_ref_put_reconnect_server);
}
if (!ses_exist)
goto done;
/* allocate a dummy tcon struct used for reconnect */
- tcon = tcon_info_alloc(false);
+ tcon = tcon_info_alloc(false, netfs_trace_tcon_ref_new_reconnect_server);
if (!tcon) {
resched = true;
list_for_each_entry_safe(ses, ses2, &tmp_ses_list, rlist) {
list_del_init(&ses->rlist);
cifs_put_smb_ses(ses);
}
- tconInfoFree(tcon);
+ tconInfoFree(tcon, netfs_trace_tcon_ref_free_reconnect_server);
done:
cifs_dbg(FYI, "Reconnecting tcons and channels finished\n");
if (tcon->tid != tid)
continue;
++tcon->tc_count;
+ trace_smb3_tcon_ref(tcon->debug_id, tcon->tc_count,
+ netfs_trace_tcon_ref_get_find_sess_tcon);
return tcon;
}
}
spin_unlock(&server->srv_lock);
if (!is_binding && !server->session_estab) {
- strncpy(shdr->Signature, "BSRSPYL", 8);
+ strscpy(shdr->Signature, "BSRSPYL");
return 0;
}
* Copyright (C) 2018, Microsoft Corporation.
*
* Author(s): Steve French <stfrench@microsoft.com>
+ *
+ * Please use this 3-part article as a reference for writing new tracepoints:
+ * https://lwn.net/Articles/379903/
*/
#undef TRACE_SYSTEM
#define TRACE_SYSTEM cifs
#include <linux/inet.h>
/*
- * Please use this 3-part article as a reference for writing new tracepoints:
- * https://lwn.net/Articles/379903/
+ * Specify enums for tracing information.
*/
+#define smb3_tcon_ref_traces \
+ EM(netfs_trace_tcon_ref_dec_dfs_refer, "DEC DfsRef") \
+ EM(netfs_trace_tcon_ref_free, "FRE ") \
+ EM(netfs_trace_tcon_ref_free_fail, "FRE Fail ") \
+ EM(netfs_trace_tcon_ref_free_ipc, "FRE Ipc ") \
+ EM(netfs_trace_tcon_ref_free_ipc_fail, "FRE Ipc-F ") \
+ EM(netfs_trace_tcon_ref_free_reconnect_server, "FRE Reconn") \
+ EM(netfs_trace_tcon_ref_get_cancelled_close, "GET Cn-Cls") \
+ EM(netfs_trace_tcon_ref_get_dfs_refer, "GET DfsRef") \
+ EM(netfs_trace_tcon_ref_get_find, "GET Find ") \
+ EM(netfs_trace_tcon_ref_get_find_sess_tcon, "GET FndSes") \
+ EM(netfs_trace_tcon_ref_get_reconnect_server, "GET Reconn") \
+ EM(netfs_trace_tcon_ref_new, "NEW ") \
+ EM(netfs_trace_tcon_ref_new_ipc, "NEW Ipc ") \
+ EM(netfs_trace_tcon_ref_new_reconnect_server, "NEW Reconn") \
+ EM(netfs_trace_tcon_ref_put_cancelled_close, "PUT Cn-Cls") \
+ EM(netfs_trace_tcon_ref_put_cancelled_close_fid, "PUT Cn-Fid") \
+ EM(netfs_trace_tcon_ref_put_cancelled_mid, "PUT Cn-Mid") \
+ EM(netfs_trace_tcon_ref_put_mnt_ctx, "PUT MntCtx") \
+ EM(netfs_trace_tcon_ref_put_reconnect_server, "PUT Reconn") \
+ EM(netfs_trace_tcon_ref_put_tlink, "PUT Tlink ") \
+ EM(netfs_trace_tcon_ref_see_cancelled_close, "SEE Cn-Cls") \
+ EM(netfs_trace_tcon_ref_see_fscache_collision, "SEE FV-CO!") \
+ EM(netfs_trace_tcon_ref_see_fscache_okay, "SEE FV-Ok ") \
+ EM(netfs_trace_tcon_ref_see_fscache_relinq, "SEE FV-Rlq") \
+ E_(netfs_trace_tcon_ref_see_umount, "SEE Umount")
+
+#undef EM
+#undef E_
+
+/*
+ * Define those tracing enums.
+ */
+#ifndef __SMB3_DECLARE_TRACE_ENUMS_ONCE_ONLY
+#define __SMB3_DECLARE_TRACE_ENUMS_ONCE_ONLY
+
+#define EM(a, b) a,
+#define E_(a, b) a
+
+enum smb3_tcon_ref_trace { smb3_tcon_ref_traces } __mode(byte);
+
+#undef EM
+#undef E_
+#endif
+
+/*
+ * Export enum symbols via userspace.
+ */
+#define EM(a, b) TRACE_DEFINE_ENUM(a);
+#define E_(a, b) TRACE_DEFINE_ENUM(a);
+
+smb3_tcon_ref_traces;
+
+#undef EM
+#undef E_
+
+/*
+ * Now redefine the EM() and E_() macros to map the enums to the strings that
+ * will be printed in the output.
+ */
+#define EM(a, b) { a, b },
+#define E_(a, b) { a, b }
/* For logging errors in read or write */
DECLARE_EVENT_CLASS(smb3_rw_err_class,
DEFINE_SMB3_INF_COMPOUND_ENTER_EVENT(delete_enter);
DEFINE_SMB3_INF_COMPOUND_ENTER_EVENT(mkdir_enter);
DEFINE_SMB3_INF_COMPOUND_ENTER_EVENT(tdis_enter);
+DEFINE_SMB3_INF_COMPOUND_ENTER_EVENT(mknod_enter);
DECLARE_EVENT_CLASS(smb3_inf_compound_done_class,
TP_PROTO(unsigned int xid,
DEFINE_SMB3_INF_COMPOUND_DONE_EVENT(delete_done);
DEFINE_SMB3_INF_COMPOUND_DONE_EVENT(mkdir_done);
DEFINE_SMB3_INF_COMPOUND_DONE_EVENT(tdis_done);
-
+DEFINE_SMB3_INF_COMPOUND_DONE_EVENT(mknod_done);
DECLARE_EVENT_CLASS(smb3_inf_compound_err_class,
TP_PROTO(unsigned int xid,
DEFINE_SMB3_INF_COMPOUND_ERR_EVENT(mkdir_err);
DEFINE_SMB3_INF_COMPOUND_ERR_EVENT(delete_err);
DEFINE_SMB3_INF_COMPOUND_ERR_EVENT(tdis_err);
+DEFINE_SMB3_INF_COMPOUND_ERR_EVENT(mknod_err);
/*
* For logging SMB3 Status code and Command for responses which return errors
DEFINE_SMB3_CREDIT_EVENT(overflow_credits);
DEFINE_SMB3_CREDIT_EVENT(set_credits);
+
+TRACE_EVENT(smb3_tcon_ref,
+ TP_PROTO(unsigned int tcon_debug_id, int ref,
+ enum smb3_tcon_ref_trace trace),
+ TP_ARGS(tcon_debug_id, ref, trace),
+ TP_STRUCT__entry(
+ __field(unsigned int, tcon)
+ __field(int, ref)
+ __field(enum smb3_tcon_ref_trace, trace)
+ ),
+ TP_fast_assign(
+ __entry->tcon = tcon_debug_id;
+ __entry->ref = ref;
+ __entry->trace = trace;
+ ),
+ TP_printk("TC=%08x %s r=%u",
+ __entry->tcon,
+ __print_symbolic(__entry->trace, smb3_tcon_ref_traces),
+ __entry->ref)
+ );
+
+
+#undef EM
+#undef E_
#endif /* _CIFS_TRACE_H */
#undef TRACE_INCLUDE_PATH
__le16 StructureSize; /* 60 */
__le16 Flags;
__le32 Reserved;
- struct_group(network_open_info,
+ struct_group_attr(network_open_info, __packed,
__le64 CreationTime;
__le64 LastAccessTime;
__le64 LastWriteTime;
__u16 force_uid;
__u16 force_gid;
__s8 share_name[KSMBD_REQ_MAX_SHARE_NAME];
- __u32 reserved[112]; /* Reserved room */
+ __u32 reserved[111]; /* Reserved room */
+ __u32 payload_sz;
__u32 veto_list_sz;
__s8 ____payload[];
};
/*
* Share config flags.
*/
-#define KSMBD_SHARE_FLAG_INVALID (0)
-#define KSMBD_SHARE_FLAG_AVAILABLE BIT(0)
-#define KSMBD_SHARE_FLAG_BROWSEABLE BIT(1)
-#define KSMBD_SHARE_FLAG_WRITEABLE BIT(2)
-#define KSMBD_SHARE_FLAG_READONLY BIT(3)
-#define KSMBD_SHARE_FLAG_GUEST_OK BIT(4)
-#define KSMBD_SHARE_FLAG_GUEST_ONLY BIT(5)
-#define KSMBD_SHARE_FLAG_STORE_DOS_ATTRS BIT(6)
-#define KSMBD_SHARE_FLAG_OPLOCKS BIT(7)
-#define KSMBD_SHARE_FLAG_PIPE BIT(8)
-#define KSMBD_SHARE_FLAG_HIDE_DOT_FILES BIT(9)
-#define KSMBD_SHARE_FLAG_INHERIT_OWNER BIT(10)
-#define KSMBD_SHARE_FLAG_STREAMS BIT(11)
-#define KSMBD_SHARE_FLAG_FOLLOW_SYMLINKS BIT(12)
-#define KSMBD_SHARE_FLAG_ACL_XATTR BIT(13)
-#define KSMBD_SHARE_FLAG_UPDATE BIT(14)
-#define KSMBD_SHARE_FLAG_CROSSMNT BIT(15)
+#define KSMBD_SHARE_FLAG_INVALID (0)
+#define KSMBD_SHARE_FLAG_AVAILABLE BIT(0)
+#define KSMBD_SHARE_FLAG_BROWSEABLE BIT(1)
+#define KSMBD_SHARE_FLAG_WRITEABLE BIT(2)
+#define KSMBD_SHARE_FLAG_READONLY BIT(3)
+#define KSMBD_SHARE_FLAG_GUEST_OK BIT(4)
+#define KSMBD_SHARE_FLAG_GUEST_ONLY BIT(5)
+#define KSMBD_SHARE_FLAG_STORE_DOS_ATTRS BIT(6)
+#define KSMBD_SHARE_FLAG_OPLOCKS BIT(7)
+#define KSMBD_SHARE_FLAG_PIPE BIT(8)
+#define KSMBD_SHARE_FLAG_HIDE_DOT_FILES BIT(9)
+#define KSMBD_SHARE_FLAG_INHERIT_OWNER BIT(10)
+#define KSMBD_SHARE_FLAG_STREAMS BIT(11)
+#define KSMBD_SHARE_FLAG_FOLLOW_SYMLINKS BIT(12)
+#define KSMBD_SHARE_FLAG_ACL_XATTR BIT(13)
+#define KSMBD_SHARE_FLAG_UPDATE BIT(14)
+#define KSMBD_SHARE_FLAG_CROSSMNT BIT(15)
+#define KSMBD_SHARE_FLAG_CONTINUOUS_AVAILABILITY BIT(16)
/*
* Tree connect request flags.
share->name = kstrdup(name, GFP_KERNEL);
if (!test_share_config_flag(share, KSMBD_SHARE_FLAG_PIPE)) {
- share->path = kstrdup(ksmbd_share_config_path(resp),
+ int path_len = PATH_MAX;
+
+ if (resp->payload_sz)
+ path_len = resp->payload_sz - resp->veto_list_sz;
+
+ share->path = kstrndup(ksmbd_share_config_path(resp), path_len,
GFP_KERNEL);
if (share->path)
share->path_sz = strlen(share->path);
int rc;
bool is_chained = false;
- if (conn->ops->allocate_rsp_buf(work))
- return;
-
if (conn->ops->is_transform_hdr &&
conn->ops->is_transform_hdr(work->request_buf)) {
rc = conn->ops->decrypt_req(work);
- if (rc < 0) {
- conn->ops->set_rsp_status(work, STATUS_DATA_ERROR);
- goto send;
- }
-
+ if (rc < 0)
+ return;
work->encrypted = true;
}
+ if (conn->ops->allocate_rsp_buf(work))
+ return;
+
rc = conn->ops->init_rsp_hdr(work);
if (rc) {
/* either uid or tid is not correct */
conn->cli_cap & SMB2_GLOBAL_CAP_ENCRYPTION)
conn->vals->capabilities |= SMB2_GLOBAL_CAP_ENCRYPTION;
+ if (server_conf.flags & KSMBD_GLOBAL_FLAG_SMB2_ENCRYPTION ||
+ (!(server_conf.flags & KSMBD_GLOBAL_FLAG_SMB2_ENCRYPTION_OFF) &&
+ conn->cli_cap & SMB2_GLOBAL_CAP_ENCRYPTION))
+ conn->vals->capabilities |= SMB2_GLOBAL_CAP_ENCRYPTION;
+
if (server_conf.flags & KSMBD_GLOBAL_FLAG_SMB3_MULTICHANNEL)
conn->vals->capabilities |= SMB2_GLOBAL_CAP_MULTI_CHANNEL;
}
conn->vals->capabilities |= SMB2_GLOBAL_CAP_LEASING |
SMB2_GLOBAL_CAP_DIRECTORY_LEASING;
- if (server_conf.flags & KSMBD_GLOBAL_FLAG_SMB2_ENCRYPTION ||
- (!(server_conf.flags & KSMBD_GLOBAL_FLAG_SMB2_ENCRYPTION_OFF) &&
- conn->cli_cap & SMB2_GLOBAL_CAP_ENCRYPTION))
- conn->vals->capabilities |= SMB2_GLOBAL_CAP_ENCRYPTION;
-
if (server_conf.flags & KSMBD_GLOBAL_FLAG_SMB3_MULTICHANNEL)
conn->vals->capabilities |= SMB2_GLOBAL_CAP_MULTI_CHANNEL;
if (cmd == SMB2_QUERY_INFO_HE) {
struct smb2_query_info_req *req;
+ if (get_rfc1002_len(work->request_buf) <
+ offsetof(struct smb2_query_info_req, OutputBufferLength))
+ return -EINVAL;
+
req = smb2_get_msg(work->request_buf);
if ((req->InfoType == SMB2_O_INFO_FILE &&
(req->FileInfoClass == FILE_FULL_EA_INFORMATION ||
write_unlock(&sess->tree_conns_lock);
rsp->StructureSize = cpu_to_le16(16);
out_err1:
- rsp->Capabilities = 0;
+ if (server_conf.flags & KSMBD_GLOBAL_FLAG_DURABLE_HANDLE &&
+ test_share_config_flag(share,
+ KSMBD_SHARE_FLAG_CONTINUOUS_AVAILABILITY))
+ rsp->Capabilities = SMB2_SHARE_CAP_CONTINUOUS_AVAILABILITY;
+ else
+ rsp->Capabilities = 0;
rsp->Reserved = 0;
/* default manual caching */
rsp->ShareFlags = SMB2_SHAREFLAG_MANUAL_CACHING;
memcpy(fp->client_guid, conn->ClientGUID, SMB2_CLIENT_GUID_SIZE);
if (dh_info.type == DURABLE_REQ_V2 || dh_info.type == DURABLE_REQ) {
- if (dh_info.type == DURABLE_REQ_V2 && dh_info.persistent)
+ if (dh_info.type == DURABLE_REQ_V2 && dh_info.persistent &&
+ test_share_config_flag(work->tcon->share_conf,
+ KSMBD_SHARE_FLAG_CONTINUOUS_AVAILABILITY))
fp->is_persistent = true;
else
fp->is_durable = true;
if (!file_info->ReplaceIfExists)
flags = RENAME_NOREPLACE;
- smb_break_all_levII_oplock(work, fp, 0);
rc = ksmbd_vfs_rename(work, &fp->filp->f_path, new_name, flags);
+ if (!rc)
+ smb_break_all_levII_oplock(work, fp, 0);
out:
kfree(new_name);
return rc;
struct hlist_node ipc_table_hlist;
void *response;
+ unsigned int msg_sz;
};
static struct delayed_work ipc_timer_work;
}
memcpy(entry->response, payload, sz);
+ entry->msg_sz = sz;
wake_up_interruptible(&entry->wait);
ret = 0;
break;
return ret;
}
+static int ipc_validate_msg(struct ipc_msg_table_entry *entry)
+{
+ unsigned int msg_sz = entry->msg_sz;
+
+ if (entry->type == KSMBD_EVENT_RPC_REQUEST) {
+ struct ksmbd_rpc_command *resp = entry->response;
+
+ msg_sz = sizeof(struct ksmbd_rpc_command) + resp->payload_sz;
+ } else if (entry->type == KSMBD_EVENT_SPNEGO_AUTHEN_REQUEST) {
+ struct ksmbd_spnego_authen_response *resp = entry->response;
+
+ msg_sz = sizeof(struct ksmbd_spnego_authen_response) +
+ resp->session_key_len + resp->spnego_blob_len;
+ } else if (entry->type == KSMBD_EVENT_SHARE_CONFIG_REQUEST) {
+ struct ksmbd_share_config_response *resp = entry->response;
+
+ if (resp->payload_sz) {
+ if (resp->payload_sz < resp->veto_list_sz)
+ return -EINVAL;
+
+ msg_sz = sizeof(struct ksmbd_share_config_response) +
+ resp->payload_sz;
+ }
+ }
+
+ return entry->msg_sz != msg_sz ? -EINVAL : 0;
+}
+
static void *ipc_msg_send_request(struct ksmbd_ipc_msg *msg, unsigned int handle)
{
struct ipc_msg_table_entry entry;
ret = wait_event_interruptible_timeout(entry.wait,
entry.response != NULL,
IPC_WAIT_TIMEOUT);
+ if (entry.response) {
+ ret = ipc_validate_msg(&entry);
+ if (ret) {
+ kvfree(entry.response);
+ entry.response = NULL;
+ }
+ }
out:
down_write(&ipc_msg_table_lock);
hash_del(&entry.ipc_table_hlist);
goto out4;
}
+ /*
+ * explicitly handle file overwrite case, for compatibility with
+ * filesystems that may not support rename flags (e.g: fuse)
+ */
if ((flags & RENAME_NOREPLACE) && d_is_positive(new_dentry)) {
err = -EEXIST;
goto out4;
}
+ flags &= ~(RENAME_NOREPLACE);
if (old_child == trap) {
err = -EINVAL;
gid_t i_gid;
int err;
+ inode->i_ino = le32_to_cpu(sqsh_ino->inode_number);
+ if (inode->i_ino == 0)
+ return -EINVAL;
+
err = squashfs_get_id(sb, le16_to_cpu(sqsh_ino->uid), &i_uid);
if (err)
return err;
i_uid_write(inode, i_uid);
i_gid_write(inode, i_gid);
- inode->i_ino = le32_to_cpu(sqsh_ino->inode_number);
inode_set_mtime(inode, le32_to_cpu(sqsh_ino->mtime), 0);
inode_set_atime(inode, inode_get_mtime_sec(inode), 0);
inode_set_ctime(inode, inode_get_mtime_sec(inode), 0);
return error;
}
-static void fs_bdev_super_get(void *data)
-{
- struct super_block *sb = data;
-
- spin_lock(&sb_lock);
- sb->s_count++;
- spin_unlock(&sb_lock);
-}
-
-static void fs_bdev_super_put(void *data)
-{
- struct super_block *sb = data;
-
- put_super(sb);
-}
-
const struct blk_holder_ops fs_holder_ops = {
.mark_dead = fs_bdev_mark_dead,
.sync = fs_bdev_sync,
.freeze = fs_bdev_freeze,
.thaw = fs_bdev_thaw,
- .get_holder = fs_bdev_super_get,
- .put_holder = fs_bdev_super_put,
};
EXPORT_SYMBOL_GPL(fs_holder_ops);
* writable from userspace even for a read-only block device.
*/
if ((mode & BLK_OPEN_WRITE) && bdev_read_only(bdev)) {
- fput(bdev_file);
+ bdev_fput(bdev_file);
return -EACCES;
}
if (atomic_read(&bdev->bd_fsfreeze_count) > 0) {
if (fc)
warnf(fc, "%pg: Can't mount, blockdev is frozen", bdev);
- fput(bdev_file);
+ bdev_fput(bdev_file);
return -EBUSY;
}
spin_lock(&sb_lock);
generic_shutdown_super(sb);
if (bdev) {
sync_blockdev(bdev);
- fput(sb->s_bdev_file);
+ bdev_fput(sb->s_bdev_file);
}
}
kn = kernfs_find_and_get(kobj->sd, attr->name);
if (kn)
kernfs_break_active_protection(kn);
+ else
+ kobject_put(kobj);
return kn;
}
EXPORT_SYMBOL_GPL(sysfs_break_active_protection);
/**
* lookup_file - look up a file in the tracefs filesystem
+ * @parent_ei: Pointer to the eventfs_inode that represents parent of the file
* @dentry: the dentry to look up
* @mode: the permission that the file should have.
* @attr: saved attributes changed by user
/**
* lookup_dir_entry - look up a dir in the tracefs filesystem
* @dentry: the directory to look up
+ * @pei: Pointer to the parent eventfs_inode if available
* @ei: the eventfs_inode that represents the directory to create
*
* This function will look up a dentry for a directory represented by
/**
* lookup_file_dentry - create a dentry for a file of an eventfs_inode
+ * @dentry: The parent dentry under which the new file's dentry will be created
* @ei: the eventfs_inode that the file will be created under
* @idx: the index into the entry_attrs[] of the @ei
- * @parent: The parent dentry of the created file.
- * @name: The name of the file to create
* @mode: The mode of the file.
* @data: The data to use to set the inode of the file with on open()
* @fops: The fops of the file to be created.
*
- * Create a dentry for a file of an eventfs_inode @ei and place it into the
- * address located at @e_dentry.
+ * This function creates a dentry for a file associated with an
+ * eventfs_inode @ei. It uses the entry attributes specified by @idx,
+ * if available. The file will have the specified @mode and its inode will be
+ * set up with @data upon open. The file operations will be set to @fops.
+ *
+ * Return: Returns a pointer to the newly created file's dentry or an error
+ * pointer.
*/
static struct dentry *
lookup_file_dentry(struct dentry *dentry,
.release = vboxsf_file_release,
.fsync = noop_fsync,
.splice_read = filemap_splice_read,
+ .setlease = simple_nosetlease,
};
const struct inode_operations vboxsf_reg_iops = {
if (!sbi->nls) {
vbg_err("vboxsf: Count not load '%s' nls\n", nls_name);
err = -EINVAL;
- goto fail_free;
+ goto fail_destroy_idr;
}
}
- sbi->bdi_id = ida_simple_get(&vboxsf_bdi_ida, 0, 0, GFP_KERNEL);
+ sbi->bdi_id = ida_alloc(&vboxsf_bdi_ida, GFP_KERNEL);
if (sbi->bdi_id < 0) {
err = sbi->bdi_id;
goto fail_free;
vboxsf_unmap_folder(sbi->root);
fail_free:
if (sbi->bdi_id >= 0)
- ida_simple_remove(&vboxsf_bdi_ida, sbi->bdi_id);
+ ida_free(&vboxsf_bdi_ida, sbi->bdi_id);
if (sbi->nls)
unload_nls(sbi->nls);
+fail_destroy_idr:
idr_destroy(&sbi->ino_idr);
kfree(sbi);
return err;
vboxsf_unmap_folder(sbi->root);
if (sbi->bdi_id >= 0)
- ida_simple_remove(&vboxsf_bdi_ida, sbi->bdi_id);
+ ida_free(&vboxsf_bdi_ida, sbi->bdi_id);
if (sbi->nls)
unload_nls(sbi->nls);
{
const char *in;
char *out;
- size_t out_len;
size_t out_bound_len;
size_t in_bound_len;
in_bound_len = utf8_len;
out = name;
- out_len = 0;
/* Reserve space for terminating 0 */
out_bound_len = name_bound_len - 1;
out += nb;
out_bound_len -= nb;
- out_len += nb;
}
*out = 0;
}
if (!xfs_validate_stripe_geometry(mp, XFS_FSB_TO_B(mp, sbp->sb_unit),
- XFS_FSB_TO_B(mp, sbp->sb_width), 0, false))
+ XFS_FSB_TO_B(mp, sbp->sb_width), 0,
+ xfs_buf_daddr(bp) == XFS_SB_DADDR, false))
return -EFSCORRUPTED;
/*
}
/*
- * sunit, swidth, sectorsize(optional with 0) should be all in bytes,
- * so users won't be confused by values in error messages.
+ * sunit, swidth, sectorsize(optional with 0) should be all in bytes, so users
+ * won't be confused by values in error messages. This function returns false
+ * if the stripe geometry is invalid and the caller is unable to repair the
+ * stripe configuration later in the mount process.
*/
bool
xfs_validate_stripe_geometry(
__s64 sunit,
__s64 swidth,
int sectorsize,
+ bool may_repair,
bool silent)
{
if (swidth > INT_MAX) {
if (!silent)
xfs_notice(mp,
"stripe width (%lld) is too large", swidth);
- return false;
+ goto check_override;
}
if (sunit > swidth) {
if (!silent)
xfs_notice(mp,
"stripe unit (%lld) is larger than the stripe width (%lld)", sunit, swidth);
- return false;
+ goto check_override;
}
if (sectorsize && (int)sunit % sectorsize) {
xfs_notice(mp,
"stripe unit (%lld) must be a multiple of the sector size (%d)",
sunit, sectorsize);
- return false;
+ goto check_override;
}
if (sunit && !swidth) {
if (!silent)
xfs_notice(mp,
"invalid stripe unit (%lld) and stripe width of 0", sunit);
- return false;
+ goto check_override;
}
if (!sunit && swidth) {
if (!silent)
xfs_notice(mp,
"invalid stripe width (%lld) and stripe unit of 0", swidth);
- return false;
+ goto check_override;
}
if (sunit && (int)swidth % (int)sunit) {
xfs_notice(mp,
"stripe width (%lld) must be a multiple of the stripe unit (%lld)",
swidth, sunit);
- return false;
+ goto check_override;
}
return true;
+
+check_override:
+ if (!may_repair)
+ return false;
+ /*
+ * During mount, mp->m_dalign will not be set unless the sunit mount
+ * option was set. If it was set, ignore the bad stripe alignment values
+ * and allow the validation and overwrite later in the mount process to
+ * attempt to overwrite the bad stripe alignment values with the values
+ * supplied by mount options.
+ */
+ if (!mp->m_dalign)
+ return false;
+ if (!silent)
+ xfs_notice(mp,
+"Will try to correct with specified mount options sunit (%d) and swidth (%d)",
+ BBTOB(mp->m_dalign), BBTOB(mp->m_swidth));
+ return true;
}
/*
struct xfs_trans *tp, xfs_agnumber_t agno,
struct xfs_buf **bpp);
-extern bool xfs_validate_stripe_geometry(struct xfs_mount *mp,
- __s64 sunit, __s64 swidth, int sectorsize, bool silent);
+bool xfs_validate_stripe_geometry(struct xfs_mount *mp,
+ __s64 sunit, __s64 swidth, int sectorsize, bool may_repair,
+ bool silent);
uint8_t xfs_compute_rextslog(xfs_rtbxlen_t rtextents);
struct xfs_scrub *sc,
struct xfs_inode *ip)
{
- if (current->journal_info != NULL) {
- ASSERT(current->journal_info == sc->tp);
-
+ if (sc->tp) {
/*
* If we are in a transaction, we /cannot/ drop the inode
* ourselves, because the VFS will trigger writeback, which
{
struct xfs_writepage_ctx wpc = { };
- /*
- * Writing back data in a transaction context can result in recursive
- * transactions. This is bad, so issue a warning and get out of here.
- */
- if (WARN_ON_ONCE(current->journal_info))
- return 0;
-
xfs_iflags_clear(XFS_I(mapping->host), XFS_ITRUNCATED);
return iomap_writepages(mapping, wbc, &wpc.ctx, &xfs_writeback_ops);
}
fs_put_dax(btp->bt_daxdev, btp->bt_mount);
/* the main block device is closed by kill_block_super */
if (btp->bt_bdev != btp->bt_mount->m_super->s_bdev)
- fput(btp->bt_bdev_file);
+ bdev_fput(btp->bt_bdev_file);
kfree(btp);
}
* - Memory shrinkers queued the inactivation worker and it hasn't finished.
* - The queue depth exceeds the maximum allowable percpu backlog.
*
- * Note: If the current thread is running a transaction, we don't ever want to
- * wait for other transactions because that could introduce a deadlock.
+ * Note: If we are in a NOFS context here (e.g. current thread is running a
+ * transaction) the we don't want to block here as inodegc progress may require
+ * filesystem resources we hold to make progress and that could result in a
+ * deadlock. Hence we skip out of here if we are in a scoped NOFS context.
*/
static inline bool
xfs_inodegc_want_flush_work(
unsigned int items,
unsigned int shrinker_hits)
{
- if (current->journal_info)
+ if (current->flags & PF_MEMALLOC_NOFS)
return false;
if (shrinker_hits > 0)
*/
if (unlikely((tdp->i_diflags & XFS_DIFLAG_PROJINHERIT) &&
tdp->i_projid != sip->i_projid)) {
- error = -EXDEV;
- goto error_return;
+ /*
+ * Project quota setup skips special files which can
+ * leave inodes in a PROJINHERIT directory without a
+ * project ID set. We need to allow links to be made
+ * to these "project-less" inodes because userspace
+ * expects them to succeed after project ID setup,
+ * but everything else should be rejected.
+ */
+ if (!special_file(VFS_I(sip)->i_mode) ||
+ sip->i_projid != 0) {
+ error = -EXDEV;
+ goto error_return;
+ }
}
if (!resblks) {
mp->m_logdev_targp = mp->m_ddev_targp;
/* Handle won't be used, drop it */
if (logdev_file)
- fput(logdev_file);
+ bdev_fput(logdev_file);
}
return 0;
xfs_free_buftarg(mp->m_ddev_targp);
out_close_rtdev:
if (rtdev_file)
- fput(rtdev_file);
+ bdev_fput(rtdev_file);
out_close_logdev:
if (logdev_file)
- fput(logdev_file);
+ bdev_fput(logdev_file);
return error;
}
xfs_trans_set_context(
struct xfs_trans *tp)
{
- ASSERT(current->journal_info == NULL);
tp->t_pflags = memalloc_nofs_save();
- current->journal_info = tp;
}
static inline void
xfs_trans_clear_context(
struct xfs_trans *tp)
{
- if (current->journal_info == tp) {
- memalloc_nofs_restore(tp->t_pflags);
- current->journal_info = NULL;
- }
+ memalloc_nofs_restore(tp->t_pflags);
}
static inline void
struct xfs_trans *old_tp,
struct xfs_trans *new_tp)
{
- ASSERT(current->journal_info == old_tp);
new_tp->t_pflags = old_tp->t_pflags;
old_tp->t_pflags = 0;
- current->journal_info = new_tp;
}
#endif /* __XFS_TRANS_H__ */
zonefs_info(sb, "Zone group \"%s\" has %u file%s\n",
zonefs_zgroup_name(ztype),
zgroup->g_nr_zones,
- zgroup->g_nr_zones > 1 ? "s" : "");
+ str_plural(zgroup->g_nr_zones));
return 0;
}
* acpi_dev_hid_uid_match - Match device by supplied HID and UID
* @adev: ACPI device to match.
* @hid2: Hardware ID of the device.
- * @uid2: Unique ID of the device, pass 0 or NULL to not check _UID.
+ * @uid2: Unique ID of the device, pass NULL to not check _UID.
*
* Matches HID and UID in @adev with given @hid2 and @uid2. Absence of @uid2
* will be treated as a match. If user wants to validate @uid2, it should be
* done before calling this function.
*
- * Returns: %true if matches or @uid2 is 0 or NULL, %false otherwise.
+ * Returns: %true if matches or @uid2 is NULL, %false otherwise.
*/
#define acpi_dev_hid_uid_match(adev, hid2, uid2) \
(acpi_dev_hid_match(adev, hid2) && \
- (!(uid2) || acpi_dev_uid_match(adev, uid2)))
+ /* Distinguish integer 0 from NULL @uid2 */ \
+ (_Generic(uid2, ACPI_STR_TYPES(!(uid2)), default: 0) || \
+ acpi_dev_uid_match(adev, uid2)))
void acpi_dev_clear_dependencies(struct acpi_device *supplier);
bool acpi_dev_ready_for_enumeration(const struct acpi_device *device);
#define io_stop_wc() do { } while (0)
#endif
+/*
+ * Architectures that guarantee an implicit smp_mb() in switch_mm()
+ * can override smp_mb__after_switch_mm.
+ */
+#ifndef smp_mb__after_switch_mm
+# define smp_mb__after_switch_mm() smp_mb()
+#endif
+
#endif /* !__ASSEMBLY__ */
#endif /* __ASM_GENERIC_BARRIER_H */
#else /* !CONFIG_BUG */
#ifndef HAVE_ARCH_BUG
-#define BUG() do {} while (1)
+#define BUG() do { \
+ do {} while (1); \
+ unreachable(); \
+} while (0)
#endif
#ifndef HAVE_ARCH_BUG_ON
+++ /dev/null
-/* SPDX-License-Identifier: GPL-2.0-only */
-#ifndef __ASM_GENERIC_EXPORT_H
-#define __ASM_GENERIC_EXPORT_H
-
-/*
- * <asm/export.h> and <asm-generic/export.h> are deprecated.
- * Please include <linux/export.h> directly.
- */
-#include <linux/export.h>
-
-#endif
u32 proximity_info_valid : 1;
} __packed;
-/* Not a union in windows but useful for zeroing */
-union hv_proximity_domain_info {
- struct {
- u32 domain_id;
- struct hv_proximity_domain_flags flags;
- };
- u64 as_uint64;
+struct hv_proximity_domain_info {
+ u32 domain_id;
+ struct hv_proximity_domain_flags flags;
} __packed;
struct hv_lp_startup_status {
} __packed;
/* HvAddLogicalProcessor hypercall */
-struct hv_add_logical_processor_in {
+struct hv_input_add_logical_processor {
u32 lp_index;
u32 apic_id;
- union hv_proximity_domain_info proximity_domain_info;
- u64 flags;
+ struct hv_proximity_domain_info proximity_domain_info;
} __packed;
-struct hv_add_logical_processor_out {
+struct hv_output_add_logical_processor {
struct hv_lp_startup_status startup_status;
} __packed;
u8 padding[3];
u8 subnode_type;
u64 subnode_id;
- union hv_proximity_domain_info proximity_domain_info;
+ struct hv_proximity_domain_info proximity_domain_info;
u64 flags;
} __packed;
#include <linux/types.h>
#include <linux/atomic.h>
#include <linux/bitops.h>
+#include <acpi/acpi_numa.h>
#include <linux/cpumask.h>
#include <linux/nmi.h>
#include <asm/ptrace.h>
bool hv_isolation_type_snp(void);
bool hv_isolation_type_tdx(void);
+static inline struct hv_proximity_domain_info hv_numa_node_to_pxm_info(int node)
+{
+ struct hv_proximity_domain_info pxm_info = {};
+
+ if (node != NUMA_NO_NODE) {
+ pxm_info.domain_id = node_to_pxm(node);
+ pxm_info.flags.proximity_info_valid = 1;
+ pxm_info.flags.proximity_preferred = 1;
+ }
+
+ return pxm_info;
+}
+
/* Helper functions that provide a consistent pattern for checking Hyper-V hypercall status. */
static inline int hv_result(u64 status)
{
*/
#define kvm_pmu_update_vcpu_events(vcpu) \
do { \
- if (!has_vhe() && kvm_vcpu_has_pmu(vcpu)) \
+ if (!has_vhe() && kvm_arm_support_pmu_v3()) \
vcpu->arch.pmu.events = *kvm_get_pmu_events(); \
} while (0)
#define BLK_OPEN_WRITE_IOCTL ((__force blk_mode_t)(1 << 4))
/* open is exclusive wrt all other BLK_OPEN_WRITE opens to the device */
#define BLK_OPEN_RESTRICT_WRITES ((__force blk_mode_t)(1 << 5))
+/* return partition scanning errors */
+#define BLK_OPEN_STRICT_SCAN ((__force blk_mode_t)(1 << 6))
struct gendisk {
/*
* Thaw the file system mounted on the block device.
*/
int (*thaw)(struct block_device *bdev);
-
- /*
- * If needed, get a reference to the holder.
- */
- void (*get_holder)(void *holder);
-
- /*
- * Release the holder.
- */
- void (*put_holder)(void *holder);
};
/*
int bdev_freeze(struct block_device *bdev);
int bdev_thaw(struct block_device *bdev);
+void bdev_fput(struct file *bdev_file);
struct io_comp_batch {
struct request *req_list;
#ifdef __KERNEL__
#include <linux/kernel.h>
#include <linux/types.h>
+bool __init cmdline_has_extra_options(void);
#else /* !__KERNEL__ */
/*
* NOTE: This is only for tools/bootconfig, because tools/bootconfig will
int __init xbc_get_info(int *node_size, size_t *data_size);
/* XBC cleanup data structures */
-void __init xbc_exit(void);
+void __init _xbc_exit(bool early);
+
+static inline void xbc_exit(void)
+{
+ _xbc_exit(false);
+}
/* XBC embedded bootconfig data in kernel */
#ifdef CONFIG_BOOT_CONFIG_EMBED
enum bpf_link_type type;
const struct bpf_link_ops *ops;
struct bpf_prog *prog;
- struct work_struct work;
+ /* rcu is used before freeing, work can be used to schedule that
+ * RCU-based freeing before that, so they never overlap
+ */
+ union {
+ struct rcu_head rcu;
+ struct work_struct work;
+ };
};
struct bpf_link_ops {
void (*release)(struct bpf_link *link);
+ /* deallocate link resources callback, called without RCU grace period
+ * waiting
+ */
void (*dealloc)(struct bpf_link *link);
+ /* deallocate link resources callback, called after RCU grace period;
+ * if underlying BPF program is sleepable we go through tasks trace
+ * RCU GP and then "classic" RCU GP
+ */
+ void (*dealloc_deferred)(struct bpf_link *link);
int (*detach)(struct bpf_link *link);
int (*update_prog)(struct bpf_link *link, struct bpf_prog *new_prog,
struct bpf_prog *old_prog);
* Examples include TDX Guest.
*/
CC_ATTR_HOTPLUG_DISABLED,
+
+ /**
+ * @CC_ATTR_HOST_SEV_SNP: AMD SNP enabled on the host.
+ *
+ * The host kernel is running with the necessary features
+ * enabled to run SEV-SNP guests.
+ */
+ CC_ATTR_HOST_SEV_SNP,
};
#ifdef CONFIG_ARCH_HAS_CC_PLATFORM
* * FALSE - Specified Confidential Computing attribute is not active
*/
bool cc_platform_has(enum cc_attr attr);
+void cc_platform_set(enum cc_attr attr);
+void cc_platform_clear(enum cc_attr attr);
#else /* !CONFIG_ARCH_HAS_CC_PLATFORM */
static inline bool cc_platform_has(enum cc_attr attr) { return false; }
+static inline void cc_platform_set(enum cc_attr attr) { }
+static inline void cc_platform_clear(enum cc_attr attr) { }
#endif /* CONFIG_ARCH_HAS_CC_PLATFORM */
return 0;
}
+static inline int devm_clk_rate_exclusive_get(struct device *dev, struct clk *clk)
+{
+ return 0;
+}
+
static inline void clk_rate_exclusive_put(struct clk *clk) {}
#endif
* - When one operand is a null pointer constant (i.e. when x is an integer
* constant expression) and the other is an object pointer (i.e. our
* third operand), the conditional operator returns the type of the
- * object pointer operand (i.e. "int *). Here, within the sizeof(), we
+ * object pointer operand (i.e. "int *"). Here, within the sizeof(), we
* would then get:
* sizeof(*((int *)(...)) == sizeof(int) == 4
* - When one operand is a void pointer (i.e. when x is not an integer
void device_link_remove(void *consumer, struct device *supplier);
void device_links_supplier_sync_state_pause(void);
void device_links_supplier_sync_state_resume(void);
+void device_link_wait_removal(void);
/* Create alias, so I can be autoloaded. */
#define MODULE_ALIAS_CHARDEV(major,minor) \
return dma_fence_is_array(fence) || dma_fence_is_chain(fence);
}
+#define DMA_FENCE_WARN(f, fmt, args...) \
+ do { \
+ struct dma_fence *__ff = (f); \
+ pr_warn("f %llu#%llu: " fmt, __ff->context, __ff->seqno,\
+ ##args); \
+ } while (0)
+
#endif /* __LINUX_DMA_FENCE_H */
* max utilization to the allowed CPU capacity before calculating
* effective performance.
*/
- max_util = map_util_perf(max_util);
max_util = min(max_util, allowed_cpu_cap);
/*
eth_hw_addr_set(dev, addr);
}
+/**
+ * eth_skb_pkt_type - Assign packet type if destination address does not match
+ * @skb: Assigned a packet type if address does not match @dev address
+ * @dev: Network device used to compare packet address against
+ *
+ * If the destination MAC address of the packet does not match the network
+ * device address, assign an appropriate packet type.
+ */
+static inline void eth_skb_pkt_type(struct sk_buff *skb,
+ const struct net_device *dev)
+{
+ const struct ethhdr *eth = eth_hdr(skb);
+
+ if (unlikely(!ether_addr_equal_64bits(eth->h_dest, dev->dev_addr))) {
+ if (unlikely(is_multicast_ether_addr_64bits(eth->h_dest))) {
+ if (ether_addr_equal_64bits(eth->h_dest, dev->broadcast))
+ skb->pkt_type = PACKET_BROADCAST;
+ else
+ skb->pkt_type = PACKET_MULTICAST;
+ } else {
+ skb->pkt_type = PACKET_OTHERHOST;
+ }
+ }
+}
+
/**
* eth_skb_pad - Pad buffer to mininum number of octets for Ethernet frame
* @skb: Buffer to pad
#define __QCOM_QSEECOM_H
#include <linux/auxiliary_bus.h>
+#include <linux/dma-mapping.h>
#include <linux/types.h>
#include <linux/firmware/qcom/qcom_scm.h>
u32 app_id;
};
+/**
+ * qseecom_scm_dev() - Get the SCM device associated with the QSEECOM client.
+ * @client: The QSEECOM client device.
+ *
+ * Returns the SCM device under which the provided QSEECOM client device
+ * operates. This function is intended to be used for DMA allocations.
+ */
+static inline struct device *qseecom_scm_dev(struct qseecom_client *client)
+{
+ return client->aux_dev.dev.parent->parent;
+}
+
+/**
+ * qseecom_dma_alloc() - Allocate DMA memory for a QSEECOM client.
+ * @client: The QSEECOM client to allocate the memory for.
+ * @size: The number of bytes to allocate.
+ * @dma_handle: Pointer to where the DMA address should be stored.
+ * @gfp: Allocation flags.
+ *
+ * Wrapper function for dma_alloc_coherent(), allocating DMA memory usable for
+ * TZ/QSEECOM communication. Refer to dma_alloc_coherent() for details.
+ */
+static inline void *qseecom_dma_alloc(struct qseecom_client *client, size_t size,
+ dma_addr_t *dma_handle, gfp_t gfp)
+{
+ return dma_alloc_coherent(qseecom_scm_dev(client), size, dma_handle, gfp);
+}
+
+/**
+ * dma_free_coherent() - Free QSEECOM DMA memory.
+ * @client: The QSEECOM client for which the memory has been allocated.
+ * @size: The number of bytes allocated.
+ * @cpu_addr: Virtual memory address to free.
+ * @dma_handle: DMA memory address to free.
+ *
+ * Wrapper function for dma_free_coherent(), freeing memory previously
+ * allocated with qseecom_dma_alloc(). Refer to dma_free_coherent() for
+ * details.
+ */
+static inline void qseecom_dma_free(struct qseecom_client *client, size_t size,
+ void *cpu_addr, dma_addr_t dma_handle)
+{
+ return dma_free_coherent(qseecom_scm_dev(client), size, cpu_addr, dma_handle);
+}
+
/**
* qcom_qseecom_app_send() - Send to and receive data from a given QSEE app.
* @client: The QSEECOM client associated with the target app.
- * @req: Request buffer sent to the app (must be DMA-mappable).
+ * @req: DMA address of the request buffer sent to the app.
* @req_size: Size of the request buffer.
- * @rsp: Response buffer, written to by the app (must be DMA-mappable).
+ * @rsp: DMA address of the response buffer, written to by the app.
* @rsp_size: Size of the response buffer.
*
* Sends a request to the QSEE app associated with the given client and read
*
* Return: Zero on success, nonzero on failure.
*/
-static inline int qcom_qseecom_app_send(struct qseecom_client *client, void *req, size_t req_size,
- void *rsp, size_t rsp_size)
+static inline int qcom_qseecom_app_send(struct qseecom_client *client,
+ dma_addr_t req, size_t req_size,
+ dma_addr_t rsp, size_t rsp_size)
{
return qcom_scm_qseecom_app_send(client->app_id, req, req_size, rsp, rsp_size);
}
#ifdef CONFIG_QCOM_QSEECOM
int qcom_scm_qseecom_app_get_id(const char *app_name, u32 *app_id);
-int qcom_scm_qseecom_app_send(u32 app_id, void *req, size_t req_size, void *rsp,
- size_t rsp_size);
+int qcom_scm_qseecom_app_send(u32 app_id, dma_addr_t req, size_t req_size,
+ dma_addr_t rsp, size_t rsp_size);
#else /* CONFIG_QCOM_QSEECOM */
return -EINVAL;
}
-static inline int qcom_scm_qseecom_app_send(u32 app_id, void *req,
- size_t req_size, void *rsp,
- size_t rsp_size)
+static inline int qcom_scm_qseecom_app_send(u32 app_id,
+ dma_addr_t req, size_t req_size,
+ dma_addr_t rsp, size_t rsp_size)
{
return -EINVAL;
}
return -ENOSYS;
}
-struct framer *framer_get(struct device *dev, const char *con_id)
+static inline struct framer *framer_get(struct device *dev, const char *con_id)
{
return ERR_PTR(-ENOSYS);
}
-void framer_put(struct device *dev, struct framer *framer)
+static inline void framer_put(struct device *dev, struct framer *framer)
{
}
#define FMODE_PWRITE ((__force fmode_t)0x10)
/* File is opened for execution with sys_execve / sys_uselib */
#define FMODE_EXEC ((__force fmode_t)0x20)
+/* File writes are restricted (block device specific) */
+#define FMODE_WRITE_RESTRICTED ((__force fmode_t)0x40)
/* 32bit hashes as llseek() offset (for directories) */
#define FMODE_32BITHASH ((__force fmode_t)0x200)
/* 64bit hashes as llseek() offset (for directories) */
#ifndef __LINUX_GFP_TYPES_H
#define __LINUX_GFP_TYPES_H
+#include <linux/bits.h>
+
/* The typedef is in types.h but we want the documentation here */
#if 0
/**
struct gpio_device *gpio_device_find(const void *data,
int (*match)(struct gpio_chip *gc,
const void *data));
-struct gpio_device *gpio_device_find_by_label(const char *label);
-struct gpio_device *gpio_device_find_by_fwnode(const struct fwnode_handle *fwnode);
struct gpio_device *gpio_device_get(struct gpio_device *gdev);
void gpio_device_put(struct gpio_device *gdev);
int gpio_device_get_base(struct gpio_device *gdev);
const char *gpio_device_get_label(struct gpio_device *gdev);
+struct gpio_device *gpio_device_find_by_label(const char *label);
+struct gpio_device *gpio_device_find_by_fwnode(const struct fwnode_handle *fwnode);
+
#else /* CONFIG_GPIOLIB */
#include <asm/bug.h>
return NULL;
}
+static inline struct gpio_device *gpio_device_find_by_label(const char *label)
+{
+ WARN_ON(1);
+ return NULL;
+}
+
+static inline struct gpio_device *gpio_device_find_by_fwnode(const struct fwnode_handle *fwnode)
+{
+ WARN_ON(1);
+ return NULL;
+}
+
static inline int gpiochip_lock_as_irq(struct gpio_chip *gc,
unsigned int offset)
{
#ifndef __LINUX_GPIO_PROPERTY_H
#define __LINUX_GPIO_PROPERTY_H
-#include <dt-bindings/gpio/gpio.h> /* for GPIO_* flags */
#include <linux/property.h>
#define PROPERTY_ENTRY_GPIO(_name_, _chip_node_, _idx_, _flags_) \
u32 gpadl_handle;
u32 size;
void *buffer;
+ bool decrypted;
};
struct vmbus_channel {
* later.
* IRQF_NO_DEBUG - Exclude from runnaway detection for IPI and similar handlers,
* depends on IRQF_PERCPU.
+ * IRQF_COND_ONESHOT - Agree to do IRQF_ONESHOT if already set for a shared
+ * interrupt.
*/
#define IRQF_SHARED 0x00000080
#define IRQF_PROBE_SHARED 0x00000100
#define IRQF_COND_SUSPEND 0x00040000
#define IRQF_NO_AUTOEN 0x00080000
#define IRQF_NO_DEBUG 0x00100000
+#define IRQF_COND_ONESHOT 0x00200000
#define IRQF_TIMER (__IRQF_TIMER | IRQF_NO_SUSPEND | IRQF_NO_THREAD)
* A hint to not wake right away but delay until there are enough of
* tw's queued to match the number of CQEs the task is waiting for.
*
- * Must not be used wirh requests generating more than one CQE.
+ * Must not be used with requests generating more than one CQE.
* It's also ignored unless IORING_SETUP_DEFER_TASKRUN is set.
*/
IOU_F_TWQ_LAZY_WAKE = 1,
struct io_submit_state submit_state;
- struct io_buffer_list *io_bl;
struct xarray io_bl_xa;
struct io_hash_table cancel_table_locked;
# define lockdep_softirq_enter() do { } while (0)
# define lockdep_softirq_exit() do { } while (0)
# define lockdep_hrtimer_enter(__hrtimer) false
-# define lockdep_hrtimer_exit(__context) do { } while (0)
+# define lockdep_hrtimer_exit(__context) do { (void)(__context); } while (0)
# define lockdep_posixtimer_enter() do { } while (0)
# define lockdep_posixtimer_exit() do { } while (0)
# define lockdep_irq_work_enter(__work) do { } while (0)
ATA_DFLAG_NCQ_PRIO_ENABLED = (1 << 20), /* Priority cmds sent to dev */
ATA_DFLAG_CDL_ENABLED = (1 << 21), /* cmd duration limits is enabled */
+ ATA_DFLAG_RESUMING = (1 << 22), /* Device is resuming */
ATA_DFLAG_DETACH = (1 << 24),
ATA_DFLAG_DETACHED = (1 << 25),
ATA_DFLAG_DA = (1 << 26), /* device supports Device Attention */
* a large folio, it includes the number of times this page is mapped
* as part of that folio.
*
- * The result is undefined for pages which cannot be mapped into userspace.
- * For example SLAB or special types of pages. See function page_has_type().
- * They use this field in struct page differently.
+ * Will report 0 for pages which cannot be mapped into userspace, eg
+ * slab, page tables and similar.
*/
static inline int page_mapcount(struct page *page)
{
int mapcount = atomic_read(&page->_mapcount) + 1;
+ /* Handle page_has_type() pages */
+ if (mapcount < 0)
+ mapcount = 0;
if (unlikely(PageCompound(page)))
mapcount += folio_entire_mapcount(page_folio(page));
*/
#include <linux/vmstat.h>
-static __always_inline void *lowmem_page_address(const struct page *page)
-{
- return page_to_virt(page);
-}
-
#if defined(CONFIG_HIGHMEM) && !defined(WANT_PAGE_VIRTUAL)
#define HASHED_PAGE_VIRTUAL
#endif
void page_address_init(void);
#endif
+static __always_inline void *lowmem_page_address(const struct page *page)
+{
+ return page_to_virt(page);
+}
+
#if !defined(HASHED_PAGE_VIRTUAL) && !defined(WANT_PAGE_VIRTUAL)
#define page_address(page) lowmem_page_address(page)
#define set_page_address(page, address) do { } while(0)
unsigned long vm_commit_limit(void);
+#ifndef arch_memory_deny_write_exec_supported
+static inline bool arch_memory_deny_write_exec_supported(void)
+{
+ return true;
+}
+#define arch_memory_deny_write_exec_supported arch_memory_deny_write_exec_supported
+#endif
+
/*
* Denies creating a writable executable mapping or gaining executable permissions.
*
* build_OID_registry.pl to generate the data for look_up_OID().
*/
enum OID {
+ OID_id_dsa_with_sha1, /* 1.2.840.10030.4.3 */
OID_id_dsa, /* 1.2.840.10040.4.1 */
OID_id_ecPublicKey, /* 1.2.840.10045.2.1 */
OID_id_prime192v1, /* 1.2.840.10045.3.1.1 */
OID_id_prime256v1, /* 1.2.840.10045.3.1.7 */
+ OID_id_ecdsa_with_sha1, /* 1.2.840.10045.4.1 */
OID_id_ecdsa_with_sha224, /* 1.2.840.10045.4.3.1 */
OID_id_ecdsa_with_sha256, /* 1.2.840.10045.4.3.2 */
OID_id_ecdsa_with_sha384, /* 1.2.840.10045.4.3.3 */
/* PKCS#1 {iso(1) member-body(2) us(840) rsadsi(113549) pkcs(1) pkcs-1(1)} */
OID_rsaEncryption, /* 1.2.840.113549.1.1.1 */
+ OID_sha1WithRSAEncryption, /* 1.2.840.113549.1.1.5 */
OID_sha256WithRSAEncryption, /* 1.2.840.113549.1.1.11 */
OID_sha384WithRSAEncryption, /* 1.2.840.113549.1.1.12 */
OID_sha512WithRSAEncryption, /* 1.2.840.113549.1.1.13 */
OID_PKU2U, /* 1.3.5.1.5.2.7 */
OID_Scram, /* 1.3.6.1.5.5.14 */
OID_certAuthInfoAccess, /* 1.3.6.1.5.5.7.1.1 */
+ OID_sha1, /* 1.3.14.3.2.26 */
OID_id_ansip384r1, /* 1.3.132.0.34 */
OID_sha256, /* 2.16.840.1.101.3.4.2.1 */
OID_sha384, /* 2.16.840.1.101.3.4.2.2 */
/* At least one page in this folio has the hwpoison flag set */
PG_has_hwpoisoned = PG_error,
- PG_hugetlb = PG_active,
PG_large_rmappable = PG_workingset, /* anon or file-backed */
};
TESTSETFLAG(uname, lname, policy) \
TESTCLEARFLAG(uname, lname, policy)
+#define FOLIO_TEST_FLAG_FALSE(name) \
+static inline bool folio_test_##name(const struct folio *folio) \
+{ return false; }
+#define FOLIO_SET_FLAG_NOOP(name) \
+static inline void folio_set_##name(struct folio *folio) { }
+#define FOLIO_CLEAR_FLAG_NOOP(name) \
+static inline void folio_clear_##name(struct folio *folio) { }
+#define __FOLIO_SET_FLAG_NOOP(name) \
+static inline void __folio_set_##name(struct folio *folio) { }
+#define __FOLIO_CLEAR_FLAG_NOOP(name) \
+static inline void __folio_clear_##name(struct folio *folio) { }
+#define FOLIO_TEST_SET_FLAG_FALSE(name) \
+static inline bool folio_test_set_##name(struct folio *folio) \
+{ return false; }
+#define FOLIO_TEST_CLEAR_FLAG_FALSE(name) \
+static inline bool folio_test_clear_##name(struct folio *folio) \
+{ return false; }
+
+#define FOLIO_FLAG_FALSE(name) \
+FOLIO_TEST_FLAG_FALSE(name) \
+FOLIO_SET_FLAG_NOOP(name) \
+FOLIO_CLEAR_FLAG_NOOP(name)
+
#define TESTPAGEFLAG_FALSE(uname, lname) \
-static inline bool folio_test_##lname(const struct folio *folio) { return false; } \
+FOLIO_TEST_FLAG_FALSE(lname) \
static inline int Page##uname(const struct page *page) { return 0; }
#define SETPAGEFLAG_NOOP(uname, lname) \
-static inline void folio_set_##lname(struct folio *folio) { } \
+FOLIO_SET_FLAG_NOOP(lname) \
static inline void SetPage##uname(struct page *page) { }
#define CLEARPAGEFLAG_NOOP(uname, lname) \
-static inline void folio_clear_##lname(struct folio *folio) { } \
+FOLIO_CLEAR_FLAG_NOOP(lname) \
static inline void ClearPage##uname(struct page *page) { }
#define __CLEARPAGEFLAG_NOOP(uname, lname) \
-static inline void __folio_clear_##lname(struct folio *folio) { } \
+__FOLIO_CLEAR_FLAG_NOOP(lname) \
static inline void __ClearPage##uname(struct page *page) { }
#define TESTSETFLAG_FALSE(uname, lname) \
-static inline bool folio_test_set_##lname(struct folio *folio) \
-{ return 0; } \
+FOLIO_TEST_SET_FLAG_FALSE(lname) \
static inline int TestSetPage##uname(struct page *page) { return 0; }
#define TESTCLEARFLAG_FALSE(uname, lname) \
-static inline bool folio_test_clear_##lname(struct folio *folio) \
-{ return 0; } \
+FOLIO_TEST_CLEAR_FLAG_FALSE(lname) \
static inline int TestClearPage##uname(struct page *page) { return 0; }
#define PAGEFLAG_FALSE(uname, lname) TESTPAGEFLAG_FALSE(uname, lname) \
#define PG_head_mask ((1UL << PG_head))
-#ifdef CONFIG_HUGETLB_PAGE
-int PageHuge(const struct page *page);
-SETPAGEFLAG(HugeTLB, hugetlb, PF_SECOND)
-CLEARPAGEFLAG(HugeTLB, hugetlb, PF_SECOND)
-
-/**
- * folio_test_hugetlb - Determine if the folio belongs to hugetlbfs
- * @folio: The folio to test.
- *
- * Context: Any context. Caller should have a reference on the folio to
- * prevent it from being turned into a tail page.
- * Return: True for hugetlbfs folios, false for anon folios or folios
- * belonging to other filesystems.
- */
-static inline bool folio_test_hugetlb(const struct folio *folio)
-{
- return folio_test_large(folio) &&
- test_bit(PG_hugetlb, const_folio_flags(folio, 1));
-}
-#else
-TESTPAGEFLAG_FALSE(Huge, hugetlb)
-#endif
-
#ifdef CONFIG_TRANSPARENT_HUGEPAGE
/*
* PageHuge() only returns true for hugetlbfs pages, but not for
TESTSCFLAG_FALSE(HasHWPoisoned, has_hwpoisoned)
#endif
-/*
- * Check if a page is currently marked HWPoisoned. Note that this check is
- * best effort only and inherently racy: there is no way to synchronize with
- * failing hardware.
- */
-static inline bool is_page_hwpoison(struct page *page)
-{
- if (PageHWPoison(page))
- return true;
- return PageHuge(page) && PageHWPoison(compound_head(page));
-}
-
/*
* For pages that are never mapped to userspace (and aren't PageSlab),
* page_type may be used. Because it is initialised to -1, we invert the
* sense of the bit, so __SetPageFoo *clears* the bit used for PageFoo, and
* __ClearPageFoo *sets* the bit used for PageFoo. We reserve a few high and
- * low bits so that an underflow or overflow of page_mapcount() won't be
+ * low bits so that an underflow or overflow of _mapcount won't be
* mistaken for a page type value.
*/
#define PAGE_TYPE_BASE 0xf0000000
-/* Reserve 0x0000007f to catch underflows of page_mapcount */
+/* Reserve 0x0000007f to catch underflows of _mapcount */
#define PAGE_MAPCOUNT_RESERVE -128
#define PG_buddy 0x00000080
#define PG_offline 0x00000100
#define PG_table 0x00000200
#define PG_guard 0x00000400
+#define PG_hugetlb 0x00000800
#define PageType(page, flag) \
((page->page_type & (PAGE_TYPE_BASE | flag)) == PAGE_TYPE_BASE)
return page_type_has_type(page->page_type);
}
+#define FOLIO_TYPE_OPS(lname, fname) \
+static __always_inline bool folio_test_##fname(const struct folio *folio)\
+{ \
+ return folio_test_type(folio, PG_##lname); \
+} \
+static __always_inline void __folio_set_##fname(struct folio *folio) \
+{ \
+ VM_BUG_ON_FOLIO(!folio_test_type(folio, 0), folio); \
+ folio->page.page_type &= ~PG_##lname; \
+} \
+static __always_inline void __folio_clear_##fname(struct folio *folio) \
+{ \
+ VM_BUG_ON_FOLIO(!folio_test_##fname(folio), folio); \
+ folio->page.page_type |= PG_##lname; \
+}
+
#define PAGE_TYPE_OPS(uname, lname, fname) \
+FOLIO_TYPE_OPS(lname, fname) \
static __always_inline int Page##uname(const struct page *page) \
{ \
return PageType(page, PG_##lname); \
} \
-static __always_inline int folio_test_##fname(const struct folio *folio)\
-{ \
- return folio_test_type(folio, PG_##lname); \
-} \
static __always_inline void __SetPage##uname(struct page *page) \
{ \
VM_BUG_ON_PAGE(!PageType(page, 0), page); \
page->page_type &= ~PG_##lname; \
} \
-static __always_inline void __folio_set_##fname(struct folio *folio) \
-{ \
- VM_BUG_ON_FOLIO(!folio_test_type(folio, 0), folio); \
- folio->page.page_type &= ~PG_##lname; \
-} \
static __always_inline void __ClearPage##uname(struct page *page) \
{ \
VM_BUG_ON_PAGE(!Page##uname(page), page); \
page->page_type |= PG_##lname; \
-} \
-static __always_inline void __folio_clear_##fname(struct folio *folio) \
-{ \
- VM_BUG_ON_FOLIO(!folio_test_##fname(folio), folio); \
- folio->page.page_type |= PG_##lname; \
-} \
+}
/*
* PageBuddy() indicates that the page is free and in the buddy system
*/
PAGE_TYPE_OPS(Guard, guard, guard)
+#ifdef CONFIG_HUGETLB_PAGE
+FOLIO_TYPE_OPS(hugetlb, hugetlb)
+#else
+FOLIO_TEST_FLAG_FALSE(hugetlb)
+#endif
+
+/**
+ * PageHuge - Determine if the page belongs to hugetlbfs
+ * @page: The page to test.
+ *
+ * Context: Any context.
+ * Return: True for hugetlbfs pages, false for anon pages or pages
+ * belonging to other filesystems.
+ */
+static inline bool PageHuge(const struct page *page)
+{
+ return folio_test_hugetlb(page_folio(page));
+}
+
+/*
+ * Check if a page is currently marked HWPoisoned. Note that this check is
+ * best effort only and inherently racy: there is no way to synchronize with
+ * failing hardware.
+ */
+static inline bool is_page_hwpoison(struct page *page)
+{
+ if (PageHWPoison(page))
+ return true;
+ return PageHuge(page) && PageHWPoison(compound_head(page));
+}
+
extern bool is_free_buddy_page(struct page *page);
PAGEFLAG(Isolated, isolated, PF_ANY);
*/
#define PAGE_FLAGS_SECOND \
(0xffUL /* order */ | 1UL << PG_has_hwpoisoned | \
- 1UL << PG_hugetlb | 1UL << PG_large_rmappable)
+ 1UL << PG_large_rmappable)
#define PAGE_FLAGS_PRIVATE \
(1UL << PG_private | 1UL << PG_private_2)
#include <linux/types.h>
-/* 15 pointers + header align the folio_batch structure to a power of two */
-#define PAGEVEC_SIZE 15
+/* 31 pointers + header align the folio_batch structure to a power of two */
+#define PAGEVEC_SIZE 31
struct folio;
/**
* struct peci_device - PECI device
* @dev: device object to register PECI device to the device model
- * @controller: manages the bus segment hosting this PECI device
* @info: PECI device characteristics
* @info.family: device family
* @info.model: device model
if (static_branch_maybe(CONFIG_RANDOMIZE_KSTACK_OFFSET_DEFAULT, \
&randomize_kstack_offset)) { \
u32 offset = raw_cpu_read(kstack_offset); \
- offset ^= (rand); \
+ offset = ror32(offset, 5) ^ (rand); \
raw_cpu_write(kstack_offset, offset); \
} \
} while (0)
return atomic_read(&rwb->readers) != READER_BIAS;
}
-static inline void rw_base_assert_held_write(const struct rwbase_rt *rwb)
+static __always_inline bool rw_base_is_write_locked(const struct rwbase_rt *rwb)
{
- WARN_ON(atomic_read(&rwb->readers) != WRITER_BIAS);
+ return atomic_read(&rwb->readers) == WRITER_BIAS;
}
static __always_inline bool rw_base_is_contended(const struct rwbase_rt *rwb)
return rw_base_is_locked(&sem->rwbase);
}
-static inline void rwsem_assert_held_nolockdep(const struct rw_semaphore *sem)
+static __always_inline void rwsem_assert_held_nolockdep(const struct rw_semaphore *sem)
{
WARN_ON(!rwsem_is_locked(sem));
}
-static inline void rwsem_assert_held_write_nolockdep(const struct rw_semaphore *sem)
+static __always_inline void rwsem_assert_held_write_nolockdep(const struct rw_semaphore *sem)
{
- rw_base_assert_held_write(sem);
+ WARN_ON(!rw_base_is_write_locked(&sem->rwbase));
}
static __always_inline int rwsem_is_contended(struct rw_semaphore *sem)
/*
* Using folio_mapping() is quite slow because of the actual call
* instruction.
- * We know that secretmem pages are not compound and LRU so we can
+ * We know that secretmem pages are not compound, so we can
* save a couple of cycles here.
*/
- if (folio_test_large(folio) || !folio_test_lru(folio))
+ if (folio_test_large(folio))
return false;
mapping = (struct address_space *)
extern void shmem_truncate_range(struct inode *inode, loff_t start, loff_t end);
int shmem_unuse(unsigned int type);
+#ifdef CONFIG_TRANSPARENT_HUGEPAGE
extern bool shmem_is_huge(struct inode *inode, pgoff_t index, bool shmem_huge_force,
struct mm_struct *mm, unsigned long vm_flags);
+#else
+static __always_inline bool shmem_is_huge(struct inode *inode, pgoff_t index, bool shmem_huge_force,
+ struct mm_struct *mm, unsigned long vm_flags)
+{
+ return false;
+}
+#endif
+
#ifdef CONFIG_SHMEM
extern unsigned long shmem_swap_usage(struct vm_area_struct *vma);
#else
* @list: queue head
* @ll_node: anchor in an llist (eg socket defer_list)
* @sk: Socket we are owned by
- * @ip_defrag_offset: (aka @sk) alternate use of @sk, used in
- * fragmentation management
* @dev: Device we arrived on/are leaving by
* @dev_scratch: (aka @dev) alternate use of @dev when @dev would be %NULL
* @cb: Control buffer. Free for use by every layer. Put private vars here
struct llist_node ll_node;
};
- union {
- struct sock *sk;
- int ip_defrag_offset;
- };
+ struct sock *sk;
union {
ktime_t tstamp;
return 0;
}
+/* Deprecated.
+ * This is unsafe, unless caller checked user provided optlen.
+ * Prefer copy_safe_from_sockptr() instead.
+ */
static inline int copy_from_sockptr(void *dst, sockptr_t src, size_t size)
{
return copy_from_sockptr_offset(dst, src, 0, size);
}
+/**
+ * copy_safe_from_sockptr: copy a struct from sockptr
+ * @dst: Destination address, in kernel space. This buffer must be @ksize
+ * bytes long.
+ * @ksize: Size of @dst struct.
+ * @optval: Source address. (in user or kernel space)
+ * @optlen: Size of @optval data.
+ *
+ * Returns:
+ * * -EINVAL: @optlen < @ksize
+ * * -EFAULT: access to userspace failed.
+ * * 0 : @ksize bytes were copied
+ */
+static inline int copy_safe_from_sockptr(void *dst, size_t ksize,
+ sockptr_t optval, unsigned int optlen)
+{
+ if (optlen < ksize)
+ return -EINVAL;
+ return copy_from_sockptr(dst, optval, ksize);
+}
+
static inline int copy_struct_from_sockptr(void *dst, size_t ksize,
sockptr_t src, size_t usize)
{
union handle_parts {
depot_stack_handle_t handle;
struct {
- /* pool_index is offset by 1 */
- u32 pool_index : DEPOT_POOL_INDEX_BITS;
- u32 offset : DEPOT_OFFSET_BITS;
- u32 extra : STACK_DEPOT_EXTRA_BITS;
+ u32 pool_index_plus_1 : DEPOT_POOL_INDEX_BITS;
+ u32 offset : DEPOT_OFFSET_BITS;
+ u32 extra : STACK_DEPOT_EXTRA_BITS;
};
};
*/
struct svc_rdma_write_info {
struct svcxprt_rdma *wi_rdma;
- struct list_head wi_list;
const struct svc_rdma_chunk *wi_chunk;
struct ib_cqe sc_cqe;
struct xdr_buf sc_hdrbuf;
struct xdr_stream sc_stream;
-
- struct list_head sc_write_info_list;
struct svc_rdma_write_info sc_reply_info;
-
void *sc_xprt_buf;
int sc_page_count;
int sc_cur_sge_no;
extern void svc_rdma_cc_release(struct svcxprt_rdma *rdma,
struct svc_rdma_chunk_ctxt *cc,
enum dma_data_direction dir);
-extern void svc_rdma_write_chunk_release(struct svcxprt_rdma *rdma,
- struct svc_rdma_send_ctxt *ctxt);
extern void svc_rdma_reply_chunk_release(struct svcxprt_rdma *rdma,
struct svc_rdma_send_ctxt *ctxt);
-extern int svc_rdma_prepare_write_list(struct svcxprt_rdma *rdma,
- const struct svc_rdma_pcl *write_pcl,
- struct svc_rdma_send_ctxt *sctxt,
- const struct xdr_buf *xdr);
+extern int svc_rdma_send_write_list(struct svcxprt_rdma *rdma,
+ const struct svc_rdma_recv_ctxt *rctxt,
+ const struct xdr_buf *xdr);
extern int svc_rdma_prepare_reply_chunk(struct svcxprt_rdma *rdma,
const struct svc_rdma_pcl *write_pcl,
const struct svc_rdma_pcl *reply_pcl,
}
#endif /* CONFIG_MIGRATION */
+#ifdef CONFIG_MEMORY_FAILURE
+
+/*
+ * Support for hardware poisoned pages
+ */
+static inline swp_entry_t make_hwpoison_entry(struct page *page)
+{
+ BUG_ON(!PageLocked(page));
+ return swp_entry(SWP_HWPOISON, page_to_pfn(page));
+}
+
+static inline int is_hwpoison_entry(swp_entry_t entry)
+{
+ return swp_type(entry) == SWP_HWPOISON;
+}
+
+#else
+
+static inline swp_entry_t make_hwpoison_entry(struct page *page)
+{
+ return swp_entry(0, 0);
+}
+
+static inline int is_hwpoison_entry(swp_entry_t swp)
+{
+ return 0;
+}
+#endif
+
typedef unsigned long pte_marker;
#define PTE_MARKER_UFFD_WP BIT(0)
/*
* A pfn swap entry is a special type of swap entry that always has a pfn stored
- * in the swap offset. They are used to represent unaddressable device memory
- * and to restrict access to a page undergoing migration.
+ * in the swap offset. They can either be used to represent unaddressable device
+ * memory, to restrict access to a page undergoing migration or to represent a
+ * pfn which has been hwpoisoned and unmapped.
*/
static inline bool is_pfn_swap_entry(swp_entry_t entry)
{
BUILD_BUG_ON(SWP_TYPE_SHIFT < SWP_PFN_BITS);
return is_migration_entry(entry) || is_device_private_entry(entry) ||
- is_device_exclusive_entry(entry);
+ is_device_exclusive_entry(entry) || is_hwpoison_entry(entry);
}
struct page_vma_mapped_walk;
}
#endif /* CONFIG_ARCH_ENABLE_THP_MIGRATION */
-#ifdef CONFIG_MEMORY_FAILURE
-
-/*
- * Support for hardware poisoned pages
- */
-static inline swp_entry_t make_hwpoison_entry(struct page *page)
-{
- BUG_ON(!PageLocked(page));
- return swp_entry(SWP_HWPOISON, page_to_pfn(page));
-}
-
-static inline int is_hwpoison_entry(swp_entry_t entry)
-{
- return swp_type(entry) == SWP_HWPOISON;
-}
-
-#else
-
-static inline swp_entry_t make_hwpoison_entry(struct page *page)
-{
- return swp_entry(0, 0);
-}
-
-static inline int is_hwpoison_entry(swp_entry_t swp)
-{
- return 0;
-}
-#endif
-
static inline int non_swap_entry(swp_entry_t entry)
{
return swp_type(entry) >= MAX_SWAPFILES;
*
* @read: returns the current cycle value
* @mask: bitmask for two's complement
- * subtraction of non 64 bit counters,
+ * subtraction of non-64-bit counters,
* see CYCLECOUNTER_MASK() helper macro
* @mult: cycle to nanosecond multiplier
* @shift: cycle to nanosecond divisor (power of two)
};
/**
- * struct timecounter - layer above a %struct cyclecounter which counts nanoseconds
+ * struct timecounter - layer above a &struct cyclecounter which counts nanoseconds
* Contains the state needed by timecounter_read() to detect
* cycle counter wrap around. Initialize with
* timecounter_init(). Also used to convert cycle counts into the
* @cycles: Cycles
* @mask: bit mask for maintaining the 'frac' field
* @frac: pointer to storage for the fractional nanoseconds.
+ *
+ * Returns: cycle counter cycles converted to nanoseconds
*/
static inline u64 cyclecounter_cyc2ns(const struct cyclecounter *cc,
u64 cycles, u64 mask, u64 *frac)
/**
* timecounter_adjtime - Shifts the time of the clock.
+ * @tc: The &struct timecounter to adjust
* @delta: Desired change in nanoseconds.
*/
static inline void timecounter_adjtime(struct timecounter *tc, s64 delta)
*
* In other words, keeps track of time since the same epoch as
* the function which generated the initial time stamp.
+ *
+ * Returns: nanoseconds since the initial time stamp
*/
extern u64 timecounter_read(struct timecounter *tc);
*
* This allows conversion of cycle counter values which were generated
* in the past.
+ *
+ * Returns: cycle counter converted to nanoseconds since the initial time stamp
*/
extern u64 timecounter_cyc2time(const struct timecounter *tc,
u64 cycle_tstamp);
const struct timezone *tz);
/*
- * ktime_get() family: read the current time in a multitude of ways,
+ * ktime_get() family - read the current time in a multitude of ways.
*
* The default time reference is CLOCK_MONOTONIC, starting at
* boot time but not counting the time spent in suspend.
* For other references, use the functions with "real", "clocktai",
* "boottime" and "raw" suffixes.
*
- * To get the time in a different format, use the ones wit
+ * To get the time in a different format, use the ones with
* "ns", "ts64" and "seconds" suffix.
*
* See Documentation/core-api/timekeeping.rst for more details.
/**
* ktime_get_real - get the real (wall-) time in ktime_t format
+ *
+ * Returns: real (wall) time in ktime_t format
*/
static inline ktime_t ktime_get_real(void)
{
}
/**
- * ktime_get_boottime - Returns monotonic time since boot in ktime_t format
+ * ktime_get_boottime - Get monotonic time since boot in ktime_t format
*
* This is similar to CLOCK_MONTONIC/ktime_get, but also includes the
* time spent in suspend.
+ *
+ * Returns: monotonic time since boot in ktime_t format
*/
static inline ktime_t ktime_get_boottime(void)
{
}
/**
- * ktime_get_clocktai - Returns the TAI time of day in ktime_t format
+ * ktime_get_clocktai - Get the TAI time of day in ktime_t format
+ *
+ * Returns: the TAI time of day in ktime_t format
*/
static inline ktime_t ktime_get_clocktai(void)
{
/**
* ktime_mono_to_real - Convert monotonic time to clock realtime
+ * @mono: monotonic time to convert
+ *
+ * Returns: time converted to realtime clock
*/
static inline ktime_t ktime_mono_to_real(ktime_t mono)
{
return ktime_mono_to_any(mono, TK_OFFS_REAL);
}
+/**
+ * ktime_get_ns - Get the current time in nanoseconds
+ *
+ * Returns: current time converted to nanoseconds
+ */
static inline u64 ktime_get_ns(void)
{
return ktime_to_ns(ktime_get());
}
+/**
+ * ktime_get_real_ns - Get the current real/wall time in nanoseconds
+ *
+ * Returns: current real time converted to nanoseconds
+ */
static inline u64 ktime_get_real_ns(void)
{
return ktime_to_ns(ktime_get_real());
}
+/**
+ * ktime_get_boottime_ns - Get the monotonic time since boot in nanoseconds
+ *
+ * Returns: current boottime converted to nanoseconds
+ */
static inline u64 ktime_get_boottime_ns(void)
{
return ktime_to_ns(ktime_get_boottime());
}
+/**
+ * ktime_get_clocktai_ns - Get the current TAI time of day in nanoseconds
+ *
+ * Returns: current TAI time converted to nanoseconds
+ */
static inline u64 ktime_get_clocktai_ns(void)
{
return ktime_to_ns(ktime_get_clocktai());
}
+/**
+ * ktime_get_raw_ns - Get the raw monotonic time in nanoseconds
+ *
+ * Returns: current raw monotonic time converted to nanoseconds
+ */
static inline u64 ktime_get_raw_ns(void)
{
return ktime_to_ns(ktime_get_raw());
extern void timekeeping_inject_sleeptime64(const struct timespec64 *delta);
-/*
- * struct ktime_timestanps - Simultaneous mono/boot/real timestamps
+/**
+ * struct ktime_timestamps - Simultaneous mono/boot/real timestamps
* @mono: Monotonic timestamp
* @boot: Boottime timestamp
* @real: Realtime timestamp
* @cycles: Clocksource counter value to produce the system times
* @real: Realtime system time
* @raw: Monotonic raw system time
- * @clock_was_set_seq: The sequence number of clock was set events
+ * @cs_id: Clocksource ID
+ * @clock_was_set_seq: The sequence number of clock-was-set events
* @cs_was_changed_seq: The sequence number of clocksource change events
*/
struct system_time_snapshot {
#define __TIMER_LOCKDEP_MAP_INITIALIZER(_kn)
#endif
-/**
+/*
* @TIMER_DEFERRABLE: A deferrable timer will work normally when the
* system is busy, but will not cause a CPU to come out of idle just
* to service it; instead, the timer will be serviced when the CPU
* or not. Callers must ensure serialization wrt. other operations done
* to this timer, eg. interrupt contexts, or other CPUs on SMP.
*
- * return value: 1 if the timer is pending, 0 if not.
+ * Returns: 1 if the timer is pending, 0 if not.
*/
static inline int timer_pending(const struct timer_list * timer)
{
* See timer_delete_sync() for detailed explanation.
*
* Do not use in new code. Use timer_delete_sync() instead.
+ *
+ * Returns:
+ * * %0 - The timer was not pending
+ * * %1 - The timer was pending and deactivated
*/
static inline int del_timer_sync(struct timer_list *timer)
{
* See timer_delete() for detailed explanation.
*
* Do not use in new code. Use timer_delete() instead.
+ *
+ * Returns:
+ * * %0 - The timer was not pending
+ * * %1 - The timer was pending and deactivated
*/
static inline int del_timer(struct timer_list *timer)
{
p->v++;
}
-static inline void u64_stats_init(struct u64_stats_sync *syncp)
-{
- seqcount_init(&syncp->seq);
-}
+#define u64_stats_init(syncp) \
+ do { \
+ struct u64_stats_sync *__s = (syncp); \
+ seqcount_init(&__s->seq); \
+ } while (0)
static inline void __u64_stats_update_begin(struct u64_stats_sync *syncp)
{
#define udp_assign_bit(nr, sk, val) \
assign_bit(UDP_FLAGS_##nr, &udp_sk(sk)->udp_flags, val)
-#define UDP_MAX_SEGMENTS (1 << 6UL)
+#define UDP_MAX_SEGMENTS (1 << 7UL)
#define udp_sk(ptr) container_of_const(ptr, struct udp_sock, inet.sk)
}
}
+DECLARE_STATIC_KEY_FALSE(udp_encap_needed_key);
+#if IS_ENABLED(CONFIG_IPV6)
+DECLARE_STATIC_KEY_FALSE(udpv6_encap_needed_key);
+#endif
+
+static inline bool udp_encap_needed(void)
+{
+ if (static_branch_unlikely(&udp_encap_needed_key))
+ return true;
+
+#if IS_ENABLED(CONFIG_IPV6)
+ if (static_branch_unlikely(&udpv6_encap_needed_key))
+ return true;
+#endif
+
+ return false;
+}
+
static inline bool udp_unexpected_gso(struct sock *sk, struct sk_buff *skb)
{
if (!skb_is_gso(skb))
!udp_test_bit(ACCEPT_FRAGLIST, sk))
return true;
+ /* GSO packets lacking the SKB_GSO_UDP_TUNNEL/_CSUM bits might still
+ * land in a tunnel as the socket check in udp_gro_receive cannot be
+ * foolproof.
+ */
+ if (udp_encap_needed() &&
+ READ_ONCE(udp_sk(sk)->encap_rcv) &&
+ !(skb_shinfo(skb)->gso_type &
+ (SKB_GSO_UDP_TUNNEL | SKB_GSO_UDP_TUNNEL_CSUM)))
+ return true;
+
return false;
}
/**
* struct virtio_driver - operations for a virtio I/O driver
- * @driver: underlying device driver (populate name and owner).
+ * @driver: underlying device driver (populate name).
* @id_table: the ids serviced by this driver.
* @feature_table: an array of feature numbers supported by this driver.
* @feature_table_size: number of entries in the feature table array.
return container_of(drv, struct virtio_driver, driver);
}
-int register_virtio_driver(struct virtio_driver *drv);
+/* use a macro to avoid include chaining to get THIS_MODULE */
+#define register_virtio_driver(drv) \
+ __register_virtio_driver(drv, THIS_MODULE)
+int __register_virtio_driver(struct virtio_driver *drv, struct module *owner);
void unregister_virtio_driver(struct virtio_driver *drv);
/* module_virtio_driver() - Helper macro for drivers that don't do
refcount_inc(&ifp->refcnt);
}
+static inline bool in6_ifa_hold_safe(struct inet6_ifaddr *ifp)
+{
+ return refcount_inc_not_zero(&ifp->refcnt);
+}
/*
* compute link-local solicited-node multicast address
U_LOCK_NORMAL,
U_LOCK_SECOND, /* for double locking, see unix_state_double_lock(). */
U_LOCK_DIAG, /* used while dumping icons, see sk_diag_dump_icons(). */
+ U_LOCK_GC_LISTENER, /* used for listening socket while determining gc
+ * candidates to close a small race window.
+ */
};
static inline void unix_state_lock_nested(struct sock *sk,
return skb;
}
+static inline int bt_copy_from_sockptr(void *dst, size_t dst_size,
+ sockptr_t src, size_t src_size)
+{
+ if (dst_size > src_size)
+ return -EINVAL;
+
+ return copy_from_sockptr(dst, src, dst_size);
+}
+
int bt_to_errno(u16 code);
__u8 bt_status(int err);
*/
HCI_QUIRK_USE_BDADDR_PROPERTY,
+ /* When this quirk is set, the Bluetooth Device Address provided by
+ * the 'local-bd-address' fwnode property is incorrectly specified in
+ * big-endian order.
+ *
+ * This quirk can be set before hci_register_dev is called or
+ * during the hdev->setup vendor callback.
+ */
+ HCI_QUIRK_BDADDR_PROPERTY_BROKEN,
+
/* When this quirk is set, the duplicate filtering during
* scanning is based on Bluetooth devices addresses. To allow
* RSSI based updates, restart scanning if needed.
__u8 le_per_adv_data[HCI_MAX_PER_AD_TOT_LEN];
__u16 le_per_adv_data_len;
__u16 le_per_adv_data_offset;
+ __u8 le_adv_phy;
+ __u8 le_adv_sec_phy;
__u8 le_tx_phy;
__u8 le_rx_phy;
__s8 rssi;
enum conn_reasons conn_reason);
struct hci_conn *hci_connect_le(struct hci_dev *hdev, bdaddr_t *dst,
u8 dst_type, bool dst_resolved, u8 sec_level,
- u16 conn_timeout, u8 role);
+ u16 conn_timeout, u8 role, u8 phy, u8 sec_phy);
void hci_connect_le_scan_cleanup(struct hci_conn *conn, u8 status);
struct hci_conn *hci_connect_acl(struct hci_dev *hdev, bdaddr_t *dst,
u8 sec_level, u8 auth_type,
#define privacy_mode_capable(dev) (use_ll_privacy(dev) && \
(hdev->commands[39] & 0x04))
+#define read_key_size_capable(dev) \
+ ((dev)->commands[20] & 0x10 && \
+ !test_bit(HCI_QUIRK_BROKEN_READ_ENC_KEY_SIZE, &hdev->quirks))
+
/* Use enhanced synchronous connection if command is supported and its quirk
* has not been set.
*/
* set this flag to update channels on beacon hints.
* @WIPHY_FLAG_SUPPORTS_NSTR_NONPRIMARY: support connection to non-primary link
* of an NSTR mobile AP MLD.
+ * @WIPHY_FLAG_DISABLE_WEXT: disable wireless extensions for this device
*/
enum wiphy_flags {
WIPHY_FLAG_SUPPORTS_EXT_KEK_KCK = BIT(0),
WIPHY_FLAG_4ADDR_STATION = BIT(6),
WIPHY_FLAG_CONTROL_PORT_PROTOCOL = BIT(7),
WIPHY_FLAG_IBSS_RSN = BIT(8),
+ WIPHY_FLAG_DISABLE_WEXT = BIT(9),
WIPHY_FLAG_MESH_AUTH = BIT(10),
WIPHY_FLAG_SUPPORTS_EXT_KCK_32 = BIT(11),
WIPHY_FLAG_SUPPORTS_NSTR_NONPRIMARY = BIT(12),
void (*delack_handler)(struct timer_list *),
void (*keepalive_handler)(struct timer_list *));
void inet_csk_clear_xmit_timers(struct sock *sk);
+void inet_csk_clear_xmit_timers_sync(struct sock *sk);
static inline void inet_csk_schedule_ack(struct sock *sk)
{
return pskb_network_may_pull(skb, nhlen);
}
+/* Variant of pskb_inet_may_pull().
+ */
+static inline bool skb_vlan_inet_prepare(struct sk_buff *skb)
+{
+ int nhlen = 0, maclen = ETH_HLEN;
+ __be16 type = skb->protocol;
+
+ /* Essentially this is skb_protocol(skb, true)
+ * And we get MAC len.
+ */
+ if (eth_type_vlan(type))
+ type = __vlan_get_protocol(skb, type, &maclen);
+
+ switch (type) {
+#if IS_ENABLED(CONFIG_IPV6)
+ case htons(ETH_P_IPV6):
+ nhlen = sizeof(struct ipv6hdr);
+ break;
+#endif
+ case htons(ETH_P_IP):
+ nhlen = sizeof(struct iphdr);
+ break;
+ }
+ /* For ETH_P_IPV6/ETH_P_IP we make sure to pull
+ * a base network header in skb->head.
+ */
+ if (!pskb_may_pull(skb, maclen + nhlen))
+ return false;
+
+ skb_set_network_header(skb, maclen);
+ return true;
+}
+
static inline int ip_encap_hlen(struct ip_tunnel_encap *e)
{
const struct ip_tunnel_encap_ops *ops;
* of their QoS TID or other priority field values.
* @IEEE80211_TX_CTRL_MCAST_MLO_FIRST_TX: first MLO TX, used mostly internally
* for sequence number assignment
+ * @IEEE80211_TX_CTRL_SCAN_TX: Indicates that this frame is transmitted
+ * due to scanning, not in normal operation on the interface.
* @IEEE80211_TX_CTRL_MLO_LINK: If not @IEEE80211_LINK_UNSPECIFIED, this
* frame should be transmitted on the specific link. This really is
* only relevant for frames that do not have data present, and is
IEEE80211_TX_CTRL_NO_SEQNO = BIT(7),
IEEE80211_TX_CTRL_DONT_REORDER = BIT(8),
IEEE80211_TX_CTRL_MCAST_MLO_FIRST_TX = BIT(9),
+ IEEE80211_TX_CTRL_SCAN_TX = BIT(10),
IEEE80211_TX_CTRL_MLO_LINK = 0xf0000000,
};
* for the TX tag
* @needed_tailroom: number of bytes reserved at the end of the sk_buff for the
* TX tag
+ * @rx_uses_md_dst: whether MACsec device offload supports sk_buff md_dst
*/
struct macsec_ops {
/* Device wide */
struct sk_buff *skb);
unsigned int needed_headroom;
unsigned int needed_tailroom;
+ bool rx_uses_md_dst;
};
void macsec_pn_wrapped(struct macsec_secy *secy, struct macsec_tx_sa *tx_sa);
#define COMP_ENTRY_SIZE 64
#define RX_BUFFERS_PER_QUEUE 512
-#define MANA_RX_DATA_ALIGN 64
#define MAX_SEND_BUFFERS_PER_QUEUE 256
int nf_flow_table_offload_init(void);
void nf_flow_table_offload_exit(void);
-static inline __be16 nf_flow_pppoe_proto(const struct sk_buff *skb)
+static inline __be16 __nf_flow_pppoe_proto(const struct sk_buff *skb)
{
__be16 proto;
return 0;
}
+static inline bool nf_flow_pppoe_proto(struct sk_buff *skb, __be16 *inner_proto)
+{
+ if (!pskb_may_pull(skb, PPPOE_SES_HLEN))
+ return false;
+
+ *inner_proto = __nf_flow_pppoe_proto(skb);
+
+ return true;
+}
+
#define NF_FLOW_TABLE_STAT_INC(net, count) __this_cpu_inc((net)->ft.stat->count)
#define NF_FLOW_TABLE_STAT_DEC(net, count) __this_cpu_dec((net)->ft.stat->count)
#define NF_FLOW_TABLE_STAT_INC_ATOMIC(net, count) \
return (void *)priv;
}
+
+/**
+ * enum nft_iter_type - nftables set iterator type
+ *
+ * @NFT_ITER_READ: read-only iteration over set elements
+ * @NFT_ITER_UPDATE: iteration under mutex to update set element state
+ */
+enum nft_iter_type {
+ NFT_ITER_UNSPEC,
+ NFT_ITER_READ,
+ NFT_ITER_UPDATE,
+};
+
struct nft_set;
struct nft_set_iter {
u8 genmask;
+ enum nft_iter_type type:8;
unsigned int count;
unsigned int skip;
int err;
struct qdisc_skb_head q;
struct gnet_stats_basic_sync bstats;
struct gnet_stats_queue qstats;
+ int owner;
unsigned long state;
unsigned long state2; /* must be written under qdisc spinlock */
struct Qdisc *next_sched;
#define SK_MEMORY_PCPU_RESERVE (1 << (20 - PAGE_SHIFT))
extern int sysctl_mem_pcpu_rsv;
+static inline void proto_memory_pcpu_drain(struct proto *proto)
+{
+ int val = this_cpu_xchg(*proto->per_cpu_fw_alloc, 0);
+
+ if (val)
+ atomic_long_add(val, proto->memory_allocated);
+}
+
static inline void
-sk_memory_allocated_add(struct sock *sk, int amt)
+sk_memory_allocated_add(const struct sock *sk, int val)
{
- int local_reserve;
+ struct proto *proto = sk->sk_prot;
- preempt_disable();
- local_reserve = __this_cpu_add_return(*sk->sk_prot->per_cpu_fw_alloc, amt);
- if (local_reserve >= READ_ONCE(sysctl_mem_pcpu_rsv)) {
- __this_cpu_sub(*sk->sk_prot->per_cpu_fw_alloc, local_reserve);
- atomic_long_add(local_reserve, sk->sk_prot->memory_allocated);
- }
- preempt_enable();
+ val = this_cpu_add_return(*proto->per_cpu_fw_alloc, val);
+
+ if (unlikely(val >= READ_ONCE(sysctl_mem_pcpu_rsv)))
+ proto_memory_pcpu_drain(proto);
}
static inline void
-sk_memory_allocated_sub(struct sock *sk, int amt)
+sk_memory_allocated_sub(const struct sock *sk, int val)
{
- int local_reserve;
+ struct proto *proto = sk->sk_prot;
- preempt_disable();
- local_reserve = __this_cpu_sub_return(*sk->sk_prot->per_cpu_fw_alloc, amt);
- if (local_reserve <= -READ_ONCE(sysctl_mem_pcpu_rsv)) {
- __this_cpu_sub(*sk->sk_prot->per_cpu_fw_alloc, local_reserve);
- atomic_long_add(local_reserve, sk->sk_prot->memory_allocated);
- }
- preempt_enable();
+ val = this_cpu_sub_return(*proto->per_cpu_fw_alloc, val);
+
+ if (unlikely(val <= -READ_ONCE(sysctl_mem_pcpu_rsv)))
+ proto_memory_pcpu_drain(proto);
}
#define SK_ALLOC_PERCPU_COUNTER_BATCH 16
#endif
}
+static inline void sock_not_owned_by_me(const struct sock *sk)
+{
+#ifdef CONFIG_LOCKDEP
+ WARN_ON_ONCE(lockdep_sock_is_held(sk) && debug_locks);
+#endif
+}
+
static inline bool sock_owned_by_user(const struct sock *sk)
{
sock_owned_by_me(sk);
u32 stopped : 1;
u32 copy_mode : 1;
u32 mixed_decrypted : 1;
- u32 msg_ready : 1;
+
+ bool msg_ready;
struct strp_msg stm;
{
if (!compl)
return;
+ if (!compl->tx_timestamp)
+ return;
*compl->tx_timestamp = ops->tmo_fill_timestamp(priv);
}
struct scsi_driver {
struct device_driver gendrv;
+ int (*resume)(struct device *);
void (*rescan)(struct device *);
blk_status_t (*init_command)(struct scsi_cmnd *);
void (*uninit_command)(struct scsi_cmnd *);
#define scsi_template_proc_dir(sht) NULL
#endif
extern void scsi_scan_host(struct Scsi_Host *);
+extern int scsi_resume_device(struct scsi_device *sdev);
extern int scsi_rescan_device(struct scsi_device *sdev);
extern void scsi_remove_host(struct Scsi_Host *);
extern struct Scsi_Host *scsi_host_get(struct Scsi_Host *);
u32 pphcldpl;
u32 pphcldpu;
+ u32 pplcllpl;
+ u32 pplcllpu;
+
bool decoupled:1;
bool link_locked:1;
bool link_prepared;
u32 bus_id, u8 link_type, u8 vbps, u8 bps,
u8 num_ch, u32 rate, u8 dir, u8 dev_type);
+int intel_nhlt_ssp_device_type(struct device *dev, struct nhlt_acpi_table *nhlt,
+ u8 virtual_bus_id);
+
#else
static inline struct nhlt_acpi_table *intel_nhlt_init(struct device *dev)
return NULL;
}
+static inline int intel_nhlt_ssp_device_type(struct device *dev,
+ struct nhlt_acpi_table *nhlt,
+ u8 virtual_bus_id)
+{
+ return -EINVAL;
+}
+
#endif
#endif
#ifndef __TAS2781_TLV_H__
#define __TAS2781_TLV_H__
-static const DECLARE_TLV_DB_SCALE(dvc_tlv, -10000, 100, 0);
+static const __maybe_unused DECLARE_TLV_DB_SCALE(dvc_tlv, -10000, 100, 0);
static const DECLARE_TLV_DB_SCALE(amp_vol_tlv, 1100, 50, 0);
#endif
#define DEF_PAGETYPE_NAME(_name) { PG_##_name, __stringify(_name) }
#define __def_pagetype_names \
+ DEF_PAGETYPE_NAME(hugetlb), \
DEF_PAGETYPE_NAME(offline), \
DEF_PAGETYPE_NAME(guard), \
DEF_PAGETYPE_NAME(table), \
__field(unsigned int, timeout)
__field(u32, window_size)
__field(int, len)
- __string(acceptor, data)
+ __string_len(acceptor, data, len)
),
TP_fast_assign(
__entry->timeout = timeout;
__entry->window_size = window_size;
__entry->len = len;
- strncpy(__get_str(acceptor), data, len);
+ __assign_str(acceptor, data);
),
TP_printk("win_size=%u expiry=%lu now=%lu timeout=%u acceptor=%.*s",
#define ETNAVIV_PARAM_GPU_PRODUCT_ID 0x1c
#define ETNAVIV_PARAM_GPU_CUSTOMER_ID 0x1d
#define ETNAVIV_PARAM_GPU_ECO_ID 0x1e
-#define ETNAVIV_PARAM_GPU_NN_CORE_COUNT 0x1f
-#define ETNAVIV_PARAM_GPU_NN_MAD_PER_CORE 0x20
-#define ETNAVIV_PARAM_GPU_TP_CORE_COUNT 0x21
-#define ETNAVIV_PARAM_GPU_ON_CHIP_SRAM_SIZE 0x22
-#define ETNAVIV_PARAM_GPU_AXI_SRAM_SIZE 0x23
#define ETNA_MAX_PIPES 4
KFD_EC_MASK(EC_DEVICE_NEW))
#define KFD_EC_MASK_PROCESS (KFD_EC_MASK(EC_PROCESS_RUNTIME) | \
KFD_EC_MASK(EC_PROCESS_DEVICE_REMOVE))
+#define KFD_EC_MASK_PACKET (KFD_EC_MASK(EC_QUEUE_PACKET_DISPATCH_DIM_INVALID) | \
+ KFD_EC_MASK(EC_QUEUE_PACKET_DISPATCH_GROUP_SEGMENT_SIZE_INVALID) | \
+ KFD_EC_MASK(EC_QUEUE_PACKET_DISPATCH_CODE_INVALID) | \
+ KFD_EC_MASK(EC_QUEUE_PACKET_RESERVED) | \
+ KFD_EC_MASK(EC_QUEUE_PACKET_UNSUPPORTED) | \
+ KFD_EC_MASK(EC_QUEUE_PACKET_DISPATCH_WORK_GROUP_SIZE_INVALID) | \
+ KFD_EC_MASK(EC_QUEUE_PACKET_DISPATCH_REGISTER_INVALID) | \
+ KFD_EC_MASK(EC_QUEUE_PACKET_VENDOR_UNSUPPORTED))
/* Checks for exception code types for KFD search */
+#define KFD_DBG_EC_IS_VALID(ecode) (ecode > EC_NONE && ecode < EC_MAX)
#define KFD_DBG_EC_TYPE_IS_QUEUE(ecode) \
- (!!(KFD_EC_MASK(ecode) & KFD_EC_MASK_QUEUE))
+ (KFD_DBG_EC_IS_VALID(ecode) && !!(KFD_EC_MASK(ecode) & KFD_EC_MASK_QUEUE))
#define KFD_DBG_EC_TYPE_IS_DEVICE(ecode) \
- (!!(KFD_EC_MASK(ecode) & KFD_EC_MASK_DEVICE))
+ (KFD_DBG_EC_IS_VALID(ecode) && !!(KFD_EC_MASK(ecode) & KFD_EC_MASK_DEVICE))
#define KFD_DBG_EC_TYPE_IS_PROCESS(ecode) \
- (!!(KFD_EC_MASK(ecode) & KFD_EC_MASK_PROCESS))
+ (KFD_DBG_EC_IS_VALID(ecode) && !!(KFD_EC_MASK(ecode) & KFD_EC_MASK_PROCESS))
+#define KFD_DBG_EC_TYPE_IS_PACKET(ecode) \
+ (KFD_DBG_EC_IS_VALID(ecode) && !!(KFD_EC_MASK(ecode) & KFD_EC_MASK_PACKET))
/* Runtime enable states */
VDPA_ATTR_DEV_FEATURES, /* u64 */
VDPA_ATTR_DEV_BLK_CFG_CAPACITY, /* u64 */
- VDPA_ATTR_DEV_BLK_CFG_SEG_SIZE, /* u32 */
+ VDPA_ATTR_DEV_BLK_CFG_SIZE_MAX, /* u32 */
VDPA_ATTR_DEV_BLK_CFG_BLK_SIZE, /* u32 */
VDPA_ATTR_DEV_BLK_CFG_SEG_MAX, /* u32 */
VDPA_ATTR_DEV_BLK_CFG_NUM_QUEUES, /* u16 */
VDPA_ATTR_DEV_BLK_CFG_DISCARD_SEC_ALIGN,/* u32 */
VDPA_ATTR_DEV_BLK_CFG_MAX_WRITE_ZEROES_SEC, /* u32 */
VDPA_ATTR_DEV_BLK_CFG_MAX_WRITE_ZEROES_SEG, /* u32 */
- VDPA_ATTR_DEV_BLK_CFG_READ_ONLY, /* u8 */
- VDPA_ATTR_DEV_BLK_CFG_FLUSH, /* u8 */
+ VDPA_ATTR_DEV_BLK_READ_ONLY, /* u8 */
+ VDPA_ATTR_DEV_BLK_FLUSH, /* u8 */
/* new attributes must be added above here */
VDPA_ATTR_MAX,
/* Get the config size */
#define VHOST_VDPA_GET_CONFIG_SIZE _IOR(VHOST_VIRTIO, 0x79, __u32)
-/* Get the count of all virtqueues */
-#define VHOST_VDPA_GET_VQS_COUNT _IOR(VHOST_VIRTIO, 0x80, __u32)
-
-/* Get the number of virtqueue groups. */
-#define VHOST_VDPA_GET_GROUP_NUM _IOR(VHOST_VIRTIO, 0x81, __u32)
-
/* Get the number of address spaces. */
#define VHOST_VDPA_GET_AS_NUM _IOR(VHOST_VIRTIO, 0x7A, unsigned int)
#define VHOST_VDPA_GET_VRING_DESC_GROUP _IOWR(VHOST_VIRTIO, 0x7F, \
struct vhost_vring_state)
+
+/* Get the count of all virtqueues */
+#define VHOST_VDPA_GET_VQS_COUNT _IOR(VHOST_VIRTIO, 0x80, __u32)
+
+/* Get the number of virtqueue groups. */
+#define VHOST_VDPA_GET_GROUP_NUM _IOR(VHOST_VIRTIO, 0x81, __u32)
+
/* Get the queue size of a specific virtqueue.
* userspace set the vring index in vhost_vring_state.index
* kernel set the queue size in vhost_vring_state.num
*/
-#define VHOST_VDPA_GET_VRING_SIZE _IOWR(VHOST_VIRTIO, 0x80, \
+#define VHOST_VDPA_GET_VRING_SIZE _IOWR(VHOST_VIRTIO, 0x82, \
struct vhost_vring_state)
#endif
__u8 mpi_reply_type;
__u8 rsvd1;
__u16 rsvd2;
- __u8 reply_buf[1];
+ __u8 reply_buf[];
};
/**
* @op_runtime_config: called to config Operation and runtime regs Pointers
* @get_outstanding_cqs: called to get outstanding completion queues
* @config_esi: called to config Event Specific Interrupt
+ * @config_scsi_dev: called to configure SCSI device parameters
*/
struct ufs_hba_variant_ops {
const char *name;
#include <vdso/time32.h>
#include <vdso/time64.h>
-#ifdef CONFIG_ARM64
-#include <asm/page-def.h>
-#else
-#include <asm/page.h>
-#endif
-
#ifdef CONFIG_ARCH_HAS_VDSO_DATA
#include <asm/vdso/data.h>
#else
*/
union vdso_data_store {
struct vdso_data data[CS_BASES];
- u8 page[PAGE_SIZE];
+ u8 page[1U << CONFIG_PAGE_SHIFT];
};
/*
if (S_ISREG(mode)) {
int ml = maybe_link();
if (ml >= 0) {
- int openflags = O_WRONLY|O_CREAT;
+ int openflags = O_WRONLY|O_CREAT|O_LARGEFILE;
if (ml != 1)
openflags |= O_TRUNC;
wfile = filp_open(collected, openflags, mode);
printk(KERN_INFO "rootfs image is not initramfs (%s); looks like an initrd\n",
err);
- file = filp_open("/initrd.image", O_WRONLY | O_CREAT, 0700);
+ file = filp_open("/initrd.image", O_WRONLY|O_CREAT|O_LARGEFILE, 0700);
if (IS_ERR(file))
return;
early_param("bootconfig", warn_bootconfig);
+bool __init cmdline_has_extra_options(void)
+{
+ return extra_command_line || extra_init_args;
+}
+
/* Change NUL term back to "=", to make "param" the whole string. */
static void __init repair_env_string(char *param, char *val)
{
if (!saved_command_line)
panic("%s: Failed to allocate %zu bytes\n", __func__, len + ilen);
+ len = xlen + strlen(command_line) + 1;
+
static_command_line = memblock_alloc(len, SMP_CACHE_BYTES);
if (!static_command_line)
panic("%s: Failed to allocate %zu bytes\n", __func__, len);
static void io_queue_sqe(struct io_kiocb *req);
struct kmem_cache *req_cachep;
+static struct workqueue_struct *iou_wq __ro_after_init;
static int __read_mostly sysctl_io_uring_disabled;
static int __read_mostly sysctl_io_uring_group = -1;
err:
kfree(ctx->cancel_table.hbs);
kfree(ctx->cancel_table_locked.hbs);
- kfree(ctx->io_bl);
xa_destroy(&ctx->io_bl_xa);
kfree(ctx);
return NULL;
err = -EBADFD;
if (!io_file_can_poll(req))
goto fail;
- err = -ECANCELED;
- if (io_arm_poll_handler(req, issue_flags) != IO_APOLL_OK)
- goto fail;
- return;
+ if (req->file->f_flags & O_NONBLOCK ||
+ req->file->f_mode & FMODE_NOWAIT) {
+ err = -ECANCELED;
+ if (io_arm_poll_handler(req, issue_flags) != IO_APOLL_OK)
+ goto fail;
+ return;
+ } else {
+ req->flags &= ~REQ_F_APOLL_MULTISHOT;
+ }
}
if (req->flags & REQ_F_FORCE_ASYNC) {
if (__io_cqring_events_user(ctx) >= min_events)
return 0;
- if (sig) {
-#ifdef CONFIG_COMPAT
- if (in_compat_syscall())
- ret = set_compat_user_sigmask((const compat_sigset_t __user *)sig,
- sigsz);
- else
-#endif
- ret = set_user_sigmask(sig, sigsz);
-
- if (ret)
- return ret;
- }
-
init_waitqueue_func_entry(&iowq.wq, io_wake_function);
iowq.wq.private = current;
INIT_LIST_HEAD(&iowq.wq.entry);
io_napi_adjust_timeout(ctx, &iowq, &ts);
}
+ if (sig) {
+#ifdef CONFIG_COMPAT
+ if (in_compat_syscall())
+ ret = set_compat_user_sigmask((const compat_sigset_t __user *)sig,
+ sigsz);
+ else
+#endif
+ ret = set_user_sigmask(sig, sigsz);
+
+ if (ret)
+ return ret;
+ }
+
io_napi_busy_loop(ctx, &iowq);
trace_io_uring_cqring_wait(ctx, min_events);
io_napi_free(ctx);
kfree(ctx->cancel_table.hbs);
kfree(ctx->cancel_table_locked.hbs);
- kfree(ctx->io_bl);
xa_destroy(&ctx->io_bl_xa);
kfree(ctx);
}
* noise and overhead, there's no discernable change in runtime
* over using system_wq.
*/
- queue_work(system_unbound_wq, &ctx->exit_work);
+ queue_work(iou_wq, &ctx->exit_work);
}
static int io_uring_release(struct inode *inode, struct file *file)
ptr = ctx->sq_sqes;
break;
case IORING_OFF_PBUF_RING: {
+ struct io_buffer_list *bl;
unsigned int bgid;
bgid = (offset & ~IORING_OFF_MMAP_MASK) >> IORING_OFF_PBUF_SHIFT;
- rcu_read_lock();
- ptr = io_pbuf_get_address(ctx, bgid);
- rcu_read_unlock();
- if (!ptr)
- return ERR_PTR(-EINVAL);
+ bl = io_pbuf_get_bl(ctx, bgid);
+ if (IS_ERR(bl))
+ return bl;
+ ptr = bl->buf_ring;
+ io_put_bl(ctx, bl);
break;
}
default:
io_buf_cachep = KMEM_CACHE(io_buffer,
SLAB_HWCACHE_ALIGN | SLAB_PANIC | SLAB_ACCOUNT);
+ iou_wq = alloc_workqueue("iou_exit", WQ_UNBOUND, 64);
+
#ifdef CONFIG_SYSCTL
register_sysctl_init("kernel", kernel_io_uring_disabled_table);
#endif
#define IO_BUFFER_LIST_BUF_PER_PAGE (PAGE_SIZE / sizeof(struct io_uring_buf))
-#define BGID_ARRAY 64
-
/* BIDs are addressed by a 16-bit field in a CQE */
#define MAX_BIDS_PER_BGID (1 << 16)
int inuse;
};
-static struct io_buffer_list *__io_buffer_get_list(struct io_ring_ctx *ctx,
- struct io_buffer_list *bl,
- unsigned int bgid)
+static inline struct io_buffer_list *__io_buffer_get_list(struct io_ring_ctx *ctx,
+ unsigned int bgid)
{
- if (bl && bgid < BGID_ARRAY)
- return &bl[bgid];
-
return xa_load(&ctx->io_bl_xa, bgid);
}
{
lockdep_assert_held(&ctx->uring_lock);
- return __io_buffer_get_list(ctx, ctx->io_bl, bgid);
+ return __io_buffer_get_list(ctx, bgid);
}
static int io_buffer_add_list(struct io_ring_ctx *ctx,
* always under the ->uring_lock, but the RCU lookup from mmap does.
*/
bl->bgid = bgid;
- smp_store_release(&bl->is_ready, 1);
-
- if (bgid < BGID_ARRAY)
- return 0;
-
+ atomic_set(&bl->refs, 1);
return xa_err(xa_store(&ctx->io_bl_xa, bgid, bl, GFP_KERNEL));
}
return ret;
}
-static __cold int io_init_bl_list(struct io_ring_ctx *ctx)
-{
- struct io_buffer_list *bl;
- int i;
-
- bl = kcalloc(BGID_ARRAY, sizeof(struct io_buffer_list), GFP_KERNEL);
- if (!bl)
- return -ENOMEM;
-
- for (i = 0; i < BGID_ARRAY; i++) {
- INIT_LIST_HEAD(&bl[i].buf_list);
- bl[i].bgid = i;
- }
-
- smp_store_release(&ctx->io_bl, bl);
- return 0;
-}
-
/*
* Mark the given mapped range as free for reuse
*/
return i;
}
+void io_put_bl(struct io_ring_ctx *ctx, struct io_buffer_list *bl)
+{
+ if (atomic_dec_and_test(&bl->refs)) {
+ __io_remove_buffers(ctx, bl, -1U);
+ kfree_rcu(bl, rcu);
+ }
+}
+
void io_destroy_buffers(struct io_ring_ctx *ctx)
{
struct io_buffer_list *bl;
struct list_head *item, *tmp;
struct io_buffer *buf;
unsigned long index;
- int i;
-
- for (i = 0; i < BGID_ARRAY; i++) {
- if (!ctx->io_bl)
- break;
- __io_remove_buffers(ctx, &ctx->io_bl[i], -1U);
- }
xa_for_each(&ctx->io_bl_xa, index, bl) {
xa_erase(&ctx->io_bl_xa, bl->bgid);
- __io_remove_buffers(ctx, bl, -1U);
- kfree_rcu(bl, rcu);
+ io_put_bl(ctx, bl);
}
/*
io_ring_submit_lock(ctx, issue_flags);
- if (unlikely(p->bgid < BGID_ARRAY && !ctx->io_bl)) {
- ret = io_init_bl_list(ctx);
- if (ret)
- goto err;
- }
-
bl = io_buffer_get_list(ctx, p->bgid);
if (unlikely(!bl)) {
bl = kzalloc(sizeof(*bl), GFP_KERNEL_ACCOUNT);
if (ret) {
/*
* Doesn't need rcu free as it was never visible, but
- * let's keep it consistent throughout. Also can't
- * be a lower indexed array group, as adding one
- * where lookup failed cannot happen.
+ * let's keep it consistent throughout.
*/
- if (p->bgid >= BGID_ARRAY)
- kfree_rcu(bl, rcu);
- else
- WARN_ON_ONCE(1);
+ kfree_rcu(bl, rcu);
goto err;
}
}
if (reg.ring_entries >= 65536)
return -EINVAL;
- if (unlikely(reg.bgid < BGID_ARRAY && !ctx->io_bl)) {
- int ret = io_init_bl_list(ctx);
- if (ret)
- return ret;
- }
-
bl = io_buffer_get_list(ctx, reg.bgid);
if (bl) {
/* if mapped buffer ring OR classic exists, don't allow */
if (!bl->is_buf_ring)
return -EINVAL;
- __io_remove_buffers(ctx, bl, -1U);
- if (bl->bgid >= BGID_ARRAY) {
- xa_erase(&ctx->io_bl_xa, bl->bgid);
- kfree_rcu(bl, rcu);
- }
+ xa_erase(&ctx->io_bl_xa, bl->bgid);
+ io_put_bl(ctx, bl);
return 0;
}
return 0;
}
-void *io_pbuf_get_address(struct io_ring_ctx *ctx, unsigned long bgid)
+struct io_buffer_list *io_pbuf_get_bl(struct io_ring_ctx *ctx,
+ unsigned long bgid)
{
struct io_buffer_list *bl;
+ bool ret;
- bl = __io_buffer_get_list(ctx, smp_load_acquire(&ctx->io_bl), bgid);
-
- if (!bl || !bl->is_mmap)
- return NULL;
/*
- * Ensure the list is fully setup. Only strictly needed for RCU lookup
- * via mmap, and in that case only for the array indexed groups. For
- * the xarray lookups, it's either visible and ready, or not at all.
+ * We have to be a bit careful here - we're inside mmap and cannot grab
+ * the uring_lock. This means the buffer_list could be simultaneously
+ * going away, if someone is trying to be sneaky. Look it up under rcu
+ * so we know it's not going away, and attempt to grab a reference to
+ * it. If the ref is already zero, then fail the mapping. If successful,
+ * the caller will call io_put_bl() to drop the the reference at at the
+ * end. This may then safely free the buffer_list (and drop the pages)
+ * at that point, vm_insert_pages() would've already grabbed the
+ * necessary vma references.
*/
- if (!smp_load_acquire(&bl->is_ready))
- return NULL;
-
- return bl->buf_ring;
+ rcu_read_lock();
+ bl = xa_load(&ctx->io_bl_xa, bgid);
+ /* must be a mmap'able buffer ring and have pages */
+ ret = false;
+ if (bl && bl->is_mmap)
+ ret = atomic_inc_not_zero(&bl->refs);
+ rcu_read_unlock();
+
+ if (ret)
+ return bl;
+
+ return ERR_PTR(-EINVAL);
}
/*
__u16 head;
__u16 mask;
+ atomic_t refs;
+
/* ring mapped provided buffers */
__u8 is_buf_ring;
/* ring mapped provided buffers, but mmap'ed by application */
__u8 is_mmap;
- /* bl is visible from an RCU point of view for lookup */
- __u8 is_ready;
};
struct io_buffer {
bool io_kbuf_recycle_legacy(struct io_kiocb *req, unsigned issue_flags);
-void *io_pbuf_get_address(struct io_ring_ctx *ctx, unsigned long bgid);
+void io_put_bl(struct io_ring_ctx *ctx, struct io_buffer_list *bl);
+struct io_buffer_list *io_pbuf_get_bl(struct io_ring_ctx *ctx,
+ unsigned long bgid);
static inline bool io_kbuf_recycle_ring(struct io_kiocb *req)
{
if (req_has_async_data(req)) {
kmsg = req->async_data;
+ kmsg->msg.msg_control_user = sr->msg_control;
} else {
ret = io_sendmsg_copy_hdr(req, &iomsg);
if (ret)
ret = __io_read(req, issue_flags);
+ /*
+ * If the file doesn't support proper NOWAIT, then disable multishot
+ * and stay in single shot mode.
+ */
+ if (!io_file_supports_nowait(req))
+ req->flags &= ~REQ_F_APOLL_MULTISHOT;
+
/*
* If we get -EAGAIN, recycle our buffer and just let normal poll
* handling arm it.
/*
* Any successful return value will keep the multishot read armed.
*/
- if (ret > 0) {
+ if (ret > 0 && req->flags & REQ_F_APOLL_MULTISHOT) {
/*
* Put our buffer and post a CQE. If we fail to post a CQE, then
* jump to the termination path. This request is then done.
# ___bpf_prog_run() needs GCSE disabled on x86; see 3193c0836f203 for details
cflags-nogcse-$(CONFIG_X86)$(CONFIG_CC_IS_GCC) := -fno-gcse
endif
-CFLAGS_core.o += $(call cc-disable-warning, override-init) $(cflags-nogcse-yy)
+CFLAGS_core.o += -Wno-override-init $(cflags-nogcse-yy)
obj-$(CONFIG_BPF_SYSCALL) += syscall.o verifier.o inode.o helpers.o tnum.o log.o token.o
obj-$(CONFIG_BPF_SYSCALL) += bpf_iter.o map_iter.o task_iter.o prog_iter.o link_iter.o
/* number of bytes addressable by LDX/STX insn with 16-bit 'off' field */
#define GUARD_SZ (1ull << sizeof(((struct bpf_insn *)0)->off) * 8)
-#define KERN_VM_SZ ((1ull << 32) + GUARD_SZ)
+#define KERN_VM_SZ (SZ_4G + GUARD_SZ)
struct bpf_arena {
struct bpf_map map;
return ERR_PTR(-EINVAL);
vm_range = (u64)attr->max_entries * PAGE_SIZE;
- if (vm_range > (1ull << 32))
+ if (vm_range > SZ_4G)
return ERR_PTR(-E2BIG);
if ((attr->map_extra >> 32) != ((attr->map_extra + vm_range - 1) >> 32))
if (pgoff)
return -EINVAL;
- if (len > (1ull << 32))
+ if (len > SZ_4G)
return -E2BIG;
/* if user_vm_start was specified at arena creation time */
if (WARN_ON_ONCE(arena->user_vm_start))
/* checks at map creation time should prevent this */
return -EFAULT;
- return round_up(ret, 1ull << 32);
+ return round_up(ret, SZ_4G);
}
static int arena_map_mmap(struct bpf_map *map, struct vm_area_struct *vma)
return -EBUSY;
/* Earlier checks should prevent this */
- if (WARN_ON_ONCE(vma->vm_end - vma->vm_start > (1ull << 32) || vma->vm_pgoff))
+ if (WARN_ON_ONCE(vma->vm_end - vma->vm_start > SZ_4G || vma->vm_pgoff))
return -EFAULT;
if (remember_vma(arena, vma))
if (uaddr & ~PAGE_MASK)
return 0;
pgoff = compute_pgoff(arena, uaddr);
- if (pgoff + page_cnt > page_cnt_max)
+ if (pgoff > page_cnt_max - page_cnt)
/* requested address will be outside of user VMA */
return 0;
}
goto out;
uaddr32 = (u32)(arena->user_vm_start + pgoff * PAGE_SIZE);
- /* Earlier checks make sure that uaddr32 + page_cnt * PAGE_SIZE will not overflow 32-bit */
+ /* Earlier checks made sure that uaddr32 + page_cnt * PAGE_SIZE - 1
+ * will not overflow 32-bit. Lower 32-bit need to represent
+ * contiguous user address range.
+ * Map these pages at kern_vm_start base.
+ * kern_vm_start + uaddr32 + page_cnt * PAGE_SIZE - 1 can overflow
+ * lower 32-bit and it's ok.
+ */
ret = vm_area_map_pages(arena->kern_vm, kern_vm_start + uaddr32,
kern_vm_start + uaddr32 + page_cnt * PAGE_SIZE, pages);
if (ret) {
if (!page)
continue;
if (page_cnt == 1 && page_mapped(page)) /* mapped by some user process */
+ /* Optimization for the common case of page_cnt==1:
+ * If page wasn't mapped into some user vma there
+ * is no need to call zap_pages which is slow. When
+ * page_cnt is big it's faster to do the batched zap.
+ */
zap_pages(arena, full_uaddr, 1);
vm_area_unmap_pages(arena->kern_vm, kaddr, kaddr + PAGE_SIZE);
__free_page(page);
return -EOPNOTSUPP;
}
+/* Called from syscall */
+static int bloom_map_alloc_check(union bpf_attr *attr)
+{
+ if (attr->value_size > KMALLOC_MAX_SIZE)
+ /* if value_size is bigger, the user space won't be able to
+ * access the elements.
+ */
+ return -E2BIG;
+
+ return 0;
+}
+
static struct bpf_map *bloom_map_alloc(union bpf_attr *attr)
{
u32 bitset_bytes, bitset_mask, nr_hash_funcs, nr_bits;
BTF_ID_LIST_SINGLE(bpf_bloom_map_btf_ids, struct, bpf_bloom_filter)
const struct bpf_map_ops bloom_filter_map_ops = {
.map_meta_equal = bpf_map_meta_equal,
+ .map_alloc_check = bloom_map_alloc_check,
.map_alloc = bloom_map_alloc,
.map_free = bloom_map_free,
.map_get_next_key = bloom_map_get_next_key,
__bpf_kfunc_end_defs();
BTF_KFUNCS_START(generic_btf_ids)
-#ifdef CONFIG_KEXEC_CORE
+#ifdef CONFIG_CRASH_DUMP
BTF_ID_FLAGS(func, crash_kexec, KF_DESTRUCTIVE)
#endif
BTF_ID_FLAGS(func, bpf_obj_new_impl, KF_ACQUIRE | KF_RET_NULL)
atomic64_inc(&link->refcnt);
}
+static void bpf_link_defer_dealloc_rcu_gp(struct rcu_head *rcu)
+{
+ struct bpf_link *link = container_of(rcu, struct bpf_link, rcu);
+
+ /* free bpf_link and its containing memory */
+ link->ops->dealloc_deferred(link);
+}
+
+static void bpf_link_defer_dealloc_mult_rcu_gp(struct rcu_head *rcu)
+{
+ if (rcu_trace_implies_rcu_gp())
+ bpf_link_defer_dealloc_rcu_gp(rcu);
+ else
+ call_rcu(rcu, bpf_link_defer_dealloc_rcu_gp);
+}
+
/* bpf_link_free is guaranteed to be called from process context */
static void bpf_link_free(struct bpf_link *link)
{
+ bool sleepable = false;
+
bpf_link_free_id(link->id);
if (link->prog) {
+ sleepable = link->prog->sleepable;
/* detach BPF program, clean up used resources */
link->ops->release(link);
bpf_prog_put(link->prog);
}
- /* free bpf_link and its containing memory */
- link->ops->dealloc(link);
+ if (link->ops->dealloc_deferred) {
+ /* schedule BPF link deallocation; if underlying BPF program
+ * is sleepable, we need to first wait for RCU tasks trace
+ * sync, then go through "classic" RCU grace period
+ */
+ if (sleepable)
+ call_rcu_tasks_trace(&link->rcu, bpf_link_defer_dealloc_mult_rcu_gp);
+ else
+ call_rcu(&link->rcu, bpf_link_defer_dealloc_rcu_gp);
+ }
+ if (link->ops->dealloc)
+ link->ops->dealloc(link);
}
static void bpf_link_put_deferred(struct work_struct *work)
static const struct bpf_link_ops bpf_raw_tp_link_lops = {
.release = bpf_raw_tp_link_release,
- .dealloc = bpf_raw_tp_link_dealloc,
+ .dealloc_deferred = bpf_raw_tp_link_dealloc,
.show_fdinfo = bpf_raw_tp_link_show_fdinfo,
.fill_link_info = bpf_raw_tp_link_fill_link_info,
};
return reg->type == PTR_TO_FLOW_KEYS;
}
+static bool is_arena_reg(struct bpf_verifier_env *env, int regno)
+{
+ const struct bpf_reg_state *reg = reg_state(env, regno);
+
+ return reg->type == PTR_TO_ARENA;
+}
+
static u32 *reg2btf_ids[__BPF_REG_TYPE_MAX] = {
#ifdef CONFIG_NET
[PTR_TO_SOCKET] = &btf_sock_ids[BTF_SOCK_TYPE_SOCK],
err = check_stack_slot_within_bounds(env, min_off, state, type);
if (!err && max_off > 0)
err = -EINVAL; /* out of stack access into non-negative offsets */
+ if (!err && access_size < 0)
+ /* access_size should not be negative (or overflow an int); others checks
+ * along the way should have prevented such an access.
+ */
+ err = -EFAULT; /* invalid negative access size; integer overflow? */
if (err) {
if (tnum_is_const(reg->var_off)) {
if (is_ctx_reg(env, insn->dst_reg) ||
is_pkt_reg(env, insn->dst_reg) ||
is_flow_key_reg(env, insn->dst_reg) ||
- is_sk_reg(env, insn->dst_reg)) {
+ is_sk_reg(env, insn->dst_reg) ||
+ is_arena_reg(env, insn->dst_reg)) {
verbose(env, "BPF_ATOMIC stores into R%d %s is not allowed\n",
insn->dst_reg,
reg_type_str(env, reg_state(env, insn->dst_reg)->type));
verbose(env, "addr_space_cast insn can only convert between address space 1 and 0\n");
return -EINVAL;
}
+ if (!env->prog->aux->arena) {
+ verbose(env, "addr_space_cast insn can only be used in a program that has an associated arena\n");
+ return -EINVAL;
+ }
} else {
if ((insn->off != 0 && insn->off != 8 && insn->off != 16 &&
insn->off != 32) || insn->imm) {
if (insn->imm) {
/* off == BPF_ADDR_SPACE_CAST */
mark_reg_unknown(env, regs, insn->dst_reg);
- if (insn->imm == 1) /* cast from as(1) to as(0) */
+ if (insn->imm == 1) { /* cast from as(1) to as(0) */
dst_reg->type = PTR_TO_ARENA;
+ /* PTR_TO_ARENA is 32-bit */
+ dst_reg->subreg_def = env->insn_idx + 1;
+ }
} else if (insn->off == 0) {
/* case: R1 = R2
* copy register state to dest reg
}
if (!env->prog->jit_requested) {
verbose(env, "JIT is required to use arena\n");
+ fdput(f);
return -EOPNOTSUPP;
}
if (!bpf_jit_supports_arena()) {
verbose(env, "JIT doesn't support arena\n");
+ fdput(f);
return -EOPNOTSUPP;
}
env->prog->aux->arena = (void *)map;
if (!bpf_arena_get_user_vm_start(env->prog->aux->arena)) {
verbose(env, "arena's user address must be set via map_extra or mmap()\n");
+ fdput(f);
return -EINVAL;
}
}
(((struct bpf_map *)env->prog->aux->arena)->map_flags & BPF_F_NO_USER_CONV)) {
/* convert to 32-bit mov that clears upper 32-bit */
insn->code = BPF_ALU | BPF_MOV | BPF_X;
- /* clear off, so it's a normal 'wX = wY' from JIT pov */
+ /* clear off and imm, so it's a normal 'wX = wY' from JIT pov */
insn->off = 0;
+ insn->imm = 0;
} /* cast from as(0) to as(1) should be handled by JIT */
goto next_insn;
}
CONFIG_UBSAN_TRAP=y
CONFIG_UBSAN_BOUNDS=y
# CONFIG_UBSAN_SHIFT is not set
-# CONFIG_UBSAN_DIV_ZERO
-# CONFIG_UBSAN_UNREACHABLE
-# CONFIG_UBSAN_BOOL
-# CONFIG_UBSAN_ENUM
-# CONFIG_UBSAN_ALIGNMENT
+# CONFIG_UBSAN_DIV_ZERO is not set
+# CONFIG_UBSAN_UNREACHABLE is not set
+# CONFIG_UBSAN_SIGNED_WRAP is not set
+# CONFIG_UBSAN_BOOL is not set
+# CONFIG_UBSAN_ENUM is not set
+# CONFIG_UBSAN_ALIGNMENT is not set
# Sampling-based heap out-of-bounds and use-after-free detection.
CONFIG_KFENCE=y
};
static enum cpu_mitigations cpu_mitigations __ro_after_init =
- CPU_MITIGATIONS_AUTO;
+ IS_ENABLED(CONFIG_SPECULATION_MITIGATIONS) ? CPU_MITIGATIONS_AUTO :
+ CPU_MITIGATIONS_OFF;
static int __init mitigations_parse_cmdline(char *arg)
{
crashk_low_res.start = low_base;
crashk_low_res.end = low_base + low_size - 1;
+#ifdef HAVE_ARCH_ADD_CRASH_RES_TO_IOMEM_EARLY
insert_resource(&iomem_resource, &crashk_low_res);
+#endif
#endif
return 0;
}
crashk_res.start = crash_base;
crashk_res.end = crash_base + crash_size - 1;
+#ifdef HAVE_ARCH_ADD_CRASH_RES_TO_IOMEM_EARLY
+ insert_resource(&iomem_resource, &crashk_res);
+#endif
}
+#ifndef HAVE_ARCH_ADD_CRASH_RES_TO_IOMEM_EARLY
static __init int insert_crashkernel_resources(void)
{
if (crashk_res.start < crashk_res.end)
}
early_initcall(insert_crashkernel_resources);
#endif
+#endif
* @alloc_size: Size of the allocated buffer.
* @list: The free list describing the number of free entries available
* from each index.
+ * @pad_slots: Number of preceding padding slots. Valid only in the first
+ * allocated non-padding slot.
*/
struct io_tlb_slot {
phys_addr_t orig_addr;
size_t alloc_size;
- unsigned int list;
+ unsigned short list;
+ unsigned short pad_slots;
};
static bool swiotlb_force_bounce;
mem->nslabs - i);
mem->slots[i].orig_addr = INVALID_PHYS_ADDR;
mem->slots[i].alloc_size = 0;
+ mem->slots[i].pad_slots = 0;
}
memset(vaddr, 0, bytes);
#endif
}
-/*
- * Return the offset into a iotlb slot required to keep the device happy.
+/**
+ * swiotlb_align_offset() - Get required offset into an IO TLB allocation.
+ * @dev: Owning device.
+ * @align_mask: Allocation alignment mask.
+ * @addr: DMA address.
+ *
+ * Return the minimum offset from the start of an IO TLB allocation which is
+ * required for a given buffer address and allocation alignment to keep the
+ * device happy.
+ *
+ * First, the address bits covered by min_align_mask must be identical in the
+ * original address and the bounce buffer address. High bits are preserved by
+ * choosing a suitable IO TLB slot, but bits below IO_TLB_SHIFT require extra
+ * padding bytes before the bounce buffer.
+ *
+ * Second, @align_mask specifies which bits of the first allocated slot must
+ * be zero. This may require allocating additional padding slots, and then the
+ * offset (in bytes) from the first such padding slot is returned.
*/
-static unsigned int swiotlb_align_offset(struct device *dev, u64 addr)
+static unsigned int swiotlb_align_offset(struct device *dev,
+ unsigned int align_mask, u64 addr)
{
- return addr & dma_get_min_align_mask(dev) & (IO_TLB_SIZE - 1);
+ return addr & dma_get_min_align_mask(dev) &
+ (align_mask | (IO_TLB_SIZE - 1));
}
/*
size_t alloc_size = mem->slots[index].alloc_size;
unsigned long pfn = PFN_DOWN(orig_addr);
unsigned char *vaddr = mem->vaddr + tlb_addr - mem->start;
- unsigned int tlb_offset, orig_addr_offset;
+ int tlb_offset;
if (orig_addr == INVALID_PHYS_ADDR)
return;
- tlb_offset = tlb_addr & (IO_TLB_SIZE - 1);
- orig_addr_offset = swiotlb_align_offset(dev, orig_addr);
- if (tlb_offset < orig_addr_offset) {
- dev_WARN_ONCE(dev, 1,
- "Access before mapping start detected. orig offset %u, requested offset %u.\n",
- orig_addr_offset, tlb_offset);
- return;
- }
-
- tlb_offset -= orig_addr_offset;
- if (tlb_offset > alloc_size) {
- dev_WARN_ONCE(dev, 1,
- "Buffer overflow detected. Allocation size: %zu. Mapping size: %zu+%u.\n",
- alloc_size, size, tlb_offset);
- return;
- }
+ /*
+ * It's valid for tlb_offset to be negative. This can happen when the
+ * "offset" returned by swiotlb_align_offset() is non-zero, and the
+ * tlb_addr is pointing within the first "offset" bytes of the second
+ * or subsequent slots of the allocated swiotlb area. While it's not
+ * valid for tlb_addr to be pointing within the first "offset" bytes
+ * of the first slot, there's no way to check for such an error since
+ * this function can't distinguish the first slot from the second and
+ * subsequent slots.
+ */
+ tlb_offset = (tlb_addr & (IO_TLB_SIZE - 1)) -
+ swiotlb_align_offset(dev, 0, orig_addr);
orig_addr += tlb_offset;
alloc_size -= tlb_offset;
unsigned long max_slots = get_max_slots(boundary_mask);
unsigned int iotlb_align_mask = dma_get_min_align_mask(dev);
unsigned int nslots = nr_slots(alloc_size), stride;
- unsigned int offset = swiotlb_align_offset(dev, orig_addr);
+ unsigned int offset = swiotlb_align_offset(dev, 0, orig_addr);
unsigned int index, slots_checked, count = 0, i;
unsigned long flags;
unsigned int slot_base;
unsigned long attrs)
{
struct io_tlb_mem *mem = dev->dma_io_tlb_mem;
- unsigned int offset = swiotlb_align_offset(dev, orig_addr);
+ unsigned int offset;
struct io_tlb_pool *pool;
unsigned int i;
int index;
phys_addr_t tlb_addr;
+ unsigned short pad_slots;
if (!mem || !mem->nslabs) {
dev_warn_ratelimited(dev,
return (phys_addr_t)DMA_MAPPING_ERROR;
}
+ offset = swiotlb_align_offset(dev, alloc_align_mask, orig_addr);
index = swiotlb_find_slots(dev, orig_addr,
alloc_size + offset, alloc_align_mask, &pool);
if (index == -1) {
* This is needed when we sync the memory. Then we sync the buffer if
* needed.
*/
+ pad_slots = offset >> IO_TLB_SHIFT;
+ offset &= (IO_TLB_SIZE - 1);
+ index += pad_slots;
+ pool->slots[index].pad_slots = pad_slots;
for (i = 0; i < nr_slots(alloc_size + offset); i++)
pool->slots[index + i].orig_addr = slot_addr(orig_addr, i);
tlb_addr = slot_addr(pool->start, index) + offset;
{
struct io_tlb_pool *mem = swiotlb_find_pool(dev, tlb_addr);
unsigned long flags;
- unsigned int offset = swiotlb_align_offset(dev, tlb_addr);
- int index = (tlb_addr - offset - mem->start) >> IO_TLB_SHIFT;
- int nslots = nr_slots(mem->slots[index].alloc_size + offset);
- int aindex = index / mem->area_nslabs;
- struct io_tlb_area *area = &mem->areas[aindex];
+ unsigned int offset = swiotlb_align_offset(dev, 0, tlb_addr);
+ int index, nslots, aindex;
+ struct io_tlb_area *area;
int count, i;
+ index = (tlb_addr - offset - mem->start) >> IO_TLB_SHIFT;
+ index -= mem->slots[index].pad_slots;
+ nslots = nr_slots(mem->slots[index].alloc_size + offset);
+ aindex = index / mem->area_nslabs;
+ area = &mem->areas[aindex];
+
/*
* Return the buffer to the free list by setting the corresponding
* entries to indicate the number of contiguous entries available.
mem->slots[i].list = ++count;
mem->slots[i].orig_addr = INVALID_PHYS_ADDR;
mem->slots[i].alloc_size = 0;
+ mem->slots[i].pad_slots = 0;
}
/*
static void swiotlb_create_debugfs_files(struct io_tlb_mem *mem,
const char *dirname)
{
- atomic_long_set(&mem->total_used, 0);
- atomic_long_set(&mem->used_hiwater, 0);
-
mem->debugfs = debugfs_create_dir(dirname, io_tlb_default_mem.debugfs);
if (!mem->nslabs)
return;
debugfs_create_file("io_tlb_used_hiwater", 0600, mem->debugfs, mem,
&fops_io_tlb_hiwater);
#ifdef CONFIG_SWIOTLB_DYNAMIC
- atomic_long_set(&mem->transient_nslabs, 0);
debugfs_create_file("io_tlb_transient_nslabs", 0400, mem->debugfs,
mem, &fops_io_tlb_transient_used);
#endif
} else if (anon_vma_fork(tmp, mpnt))
goto fail_nomem_anon_vma_fork;
vm_flags_clear(tmp, VM_LOCKED_MASK);
+ /*
+ * Copy/update hugetlb private vma information.
+ */
+ if (is_vm_hugetlb_page(tmp))
+ hugetlb_dup_vma_private(tmp);
+
+ /*
+ * Link the vma into the MT. After using __mt_dup(), memory
+ * allocation is not necessary here, so it cannot fail.
+ */
+ vma_iter_bulk_store(&vmi, tmp);
+
+ mm->map_count++;
+
+ if (tmp->vm_ops && tmp->vm_ops->open)
+ tmp->vm_ops->open(tmp);
+
file = tmp->vm_file;
if (file) {
struct address_space *mapping = file->f_mapping;
i_mmap_unlock_write(mapping);
}
- /*
- * Copy/update hugetlb private vma information.
- */
- if (is_vm_hugetlb_page(tmp))
- hugetlb_dup_vma_private(tmp);
-
- /*
- * Link the vma into the MT. After using __mt_dup(), memory
- * allocation is not necessary here, so it cannot fail.
- */
- vma_iter_bulk_store(&vmi, tmp);
-
- mm->map_count++;
if (!(tmp->vm_flags & VM_WIPEONFORK))
retval = copy_page_range(tmp, mpnt);
- if (tmp->vm_ops && tmp->vm_ops->open)
- tmp->vm_ops->open(tmp);
-
if (retval) {
mpnt = vma_next(&vmi);
goto loop_out;
}
if (!((old->flags & new->flags) & IRQF_SHARED) ||
- (oldtype != (new->flags & IRQF_TRIGGER_MASK)) ||
- ((old->flags ^ new->flags) & IRQF_ONESHOT))
+ (oldtype != (new->flags & IRQF_TRIGGER_MASK)))
+ goto mismatch;
+
+ if ((old->flags & IRQF_ONESHOT) &&
+ (new->flags & IRQF_COND_ONESHOT))
+ new->flags |= IRQF_ONESHOT;
+ else if ((old->flags ^ new->flags) & IRQF_ONESHOT)
goto mismatch;
/* All handlers must agree on per-cpuness */
jump_label_lock();
preempt_disable();
- /* Ensure it is not in reserved area nor out of text */
- if (!(core_kernel_text((unsigned long) p->addr) ||
- is_module_text_address((unsigned long) p->addr)) ||
- in_gate_area_no_mm((unsigned long) p->addr) ||
+ /* Ensure the address is in a text area, and find a module if exists. */
+ *probed_mod = NULL;
+ if (!core_kernel_text((unsigned long) p->addr)) {
+ *probed_mod = __module_text_address((unsigned long) p->addr);
+ if (!(*probed_mod)) {
+ ret = -EINVAL;
+ goto out;
+ }
+ }
+ /* Ensure it is not in reserved area. */
+ if (in_gate_area_no_mm((unsigned long) p->addr) ||
within_kprobe_blacklist((unsigned long) p->addr) ||
jump_label_text_reserved(p->addr, p->addr) ||
static_call_text_reserved(p->addr, p->addr) ||
goto out;
}
- /* Check if 'p' is probing a module. */
- *probed_mod = __module_text_address((unsigned long) p->addr);
+ /* Get module refcount and reject __init functions for loaded modules. */
if (*probed_mod) {
/*
* We must hold a refcount of the probed module while updating
possible to load a signed module containing the algorithm to check
the signature on that module.
+config MODULE_SIG_SHA1
+ bool "Sign modules with SHA-1"
+ select CRYPTO_SHA1
+
config MODULE_SIG_SHA256
bool "Sign modules with SHA-256"
select CRYPTO_SHA256
config MODULE_SIG_HASH
string
depends on MODULE_SIG || IMA_APPRAISE_MODSIG
+ default "sha1" if MODULE_SIG_SHA1
default "sha256" if MODULE_SIG_SHA256
default "sha384" if MODULE_SIG_SHA384
default "sha512" if MODULE_SIG_SHA512
swait_event_exclusive(s2idle_wait_head,
s2idle_state == S2IDLE_STATE_WAKE);
+ /*
+ * Kick all CPUs to ensure that they resume their timers and restore
+ * consistent system state.
+ */
+ wake_up_all_idle_cpus();
+
cpus_read_unlock();
raw_spin_lock_irq(&s2idle_lock);
*/
mutex_acquire(&console_lock_dep_map, 0, 1, _THIS_IP_);
+ /*
+ * Update @console_may_schedule for trylock because the previous
+ * owner may have been schedulable.
+ */
+ console_may_schedule = 0;
+
return 1;
}
# include <asm/paravirt_api_clock.h>
#endif
+#include <asm/barrier.h>
+
#include "cpupri.h"
#include "cpudeadline.h"
* between rq->curr store and load of {prev,next}->mm->pcpu_cid[cpu].
* Provide it here.
*/
- if (!prev->mm) // from kernel
+ if (!prev->mm) { // from kernel
smp_mb();
- /*
- * user -> user transition guarantees a memory barrier through
- * switch_mm() when current->mm changes. If current->mm is
- * unchanged, no barrier is needed.
- */
+ } else { // from user
+ /*
+ * user->user transition relies on an implicit
+ * memory barrier in switch_mm() when
+ * current->mm changes. If the architecture
+ * switch_mm() does not have an implicit memory
+ * barrier, it is emitted here. If current->mm
+ * is unchanged, no barrier is needed.
+ */
+ smp_mb__after_switch_mm();
+ }
}
if (prev->mm_cid_active) {
mm_cid_snapshot_time(rq, prev->mm);
if (bits & PR_MDWE_NO_INHERIT && !(bits & PR_MDWE_REFUSE_EXEC_GAIN))
return -EINVAL;
- /* PARISC cannot allow mdwe as it needs writable stacks */
- if (IS_ENABLED(CONFIG_PARISC))
+ /*
+ * EOPNOTSUPP might be more appropriate here in principle, but
+ * existing userspace depends on EINVAL specifically.
+ */
+ if (!arch_memory_deny_write_exec_supported())
return -EINVAL;
current_bits = get_current_mdwe();
goto out;
}
pccontext->clk = clk;
- fp->private_data = pccontext;
- if (clk->ops.open)
+ if (clk->ops.open) {
err = clk->ops.open(pccontext, fp->f_mode);
- else
- err = 0;
-
- if (!err) {
- get_device(clk->dev);
+ if (err) {
+ kfree(pccontext);
+ goto out;
+ }
}
+
+ fp->private_data = pccontext;
+ get_device(clk->dev);
+ err = 0;
out:
up_read(&clk->rwsem);
return err;
* Copyright(C) 2005-2007, Red Hat, Inc., Ingo Molnar
* Copyright(C) 2006-2007, Timesys Corp., Thomas Gleixner
*/
+#include <linux/compiler.h>
#include <linux/cpu.h>
#include <linux/err.h>
#include <linux/hrtimer.h>
*/
static void tick_periodic(int cpu)
{
- if (tick_do_timer_cpu == cpu) {
+ if (READ_ONCE(tick_do_timer_cpu) == cpu) {
raw_spin_lock(&jiffies_lock);
write_seqcount_begin(&jiffies_seq);
* If no cpu took the do_timer update, assign it to
* this cpu:
*/
- if (tick_do_timer_cpu == TICK_DO_TIMER_BOOT) {
- tick_do_timer_cpu = cpu;
+ if (READ_ONCE(tick_do_timer_cpu) == TICK_DO_TIMER_BOOT) {
+ WRITE_ONCE(tick_do_timer_cpu, cpu);
tick_next_period = ktime_get();
#ifdef CONFIG_NO_HZ_FULL
/*
!tick_nohz_full_cpu(cpu)) {
tick_take_do_timer_from_boot();
tick_do_timer_boot_cpu = -1;
- WARN_ON(tick_do_timer_cpu != cpu);
+ WARN_ON(READ_ONCE(tick_do_timer_cpu) != cpu);
#endif
}
int tick_cpu_dying(unsigned int dying_cpu)
{
/*
- * If the current CPU is the timekeeper, it's the only one that
- * can safely hand over its duty. Also all online CPUs are in
- * stop machine, guaranteed not to be idle, therefore it's safe
- * to pick any online successor.
+ * If the current CPU is the timekeeper, it's the only one that can
+ * safely hand over its duty. Also all online CPUs are in stop
+ * machine, guaranteed not to be idle, therefore there is no
+ * concurrency and it's safe to pick any online successor.
*/
if (tick_do_timer_cpu == dying_cpu)
tick_do_timer_cpu = cpumask_first(cpu_online_mask);
*
* Started by: Thomas Gleixner and Ingo Molnar
*/
+#include <linux/compiler.h>
#include <linux/cpu.h>
#include <linux/err.h>
#include <linux/hrtimer.h>
static void tick_sched_do_timer(struct tick_sched *ts, ktime_t now)
{
- int cpu = smp_processor_id();
+ int tick_cpu, cpu = smp_processor_id();
/*
* Check if the do_timer duty was dropped. We don't care about
* If nohz_full is enabled, this should not happen because the
* 'tick_do_timer_cpu' CPU never relinquishes.
*/
- if (IS_ENABLED(CONFIG_NO_HZ_COMMON) &&
- unlikely(tick_do_timer_cpu == TICK_DO_TIMER_NONE)) {
+ tick_cpu = READ_ONCE(tick_do_timer_cpu);
+
+ if (IS_ENABLED(CONFIG_NO_HZ_COMMON) && unlikely(tick_cpu == TICK_DO_TIMER_NONE)) {
#ifdef CONFIG_NO_HZ_FULL
WARN_ON_ONCE(tick_nohz_full_running);
#endif
- tick_do_timer_cpu = cpu;
+ WRITE_ONCE(tick_do_timer_cpu, cpu);
+ tick_cpu = cpu;
}
/* Check if jiffies need an update */
- if (tick_do_timer_cpu == cpu)
+ if (tick_cpu == cpu)
tick_do_update_jiffies64(now);
/*
* timers, workqueues, timekeeping, ...) on behalf of full dynticks
* CPUs. It must remain online when nohz full is enabled.
*/
- if (tick_nohz_full_running && tick_do_timer_cpu == cpu)
+ if (tick_nohz_full_running && READ_ONCE(tick_do_timer_cpu) == cpu)
return false;
return true;
}
/**
* tick_nohz_update_jiffies - update jiffies when idle was interrupted
+ * @now: current ktime_t
*
* Called from interrupt entry when the CPU was idle
*
* This time is measured via accounting rather than sampling,
* and is as accurate as ktime_get() is.
*
- * This function returns -1 if NOHZ is not enabled.
+ * Return: -1 if NOHZ is not enabled, else total idle time of the @cpu
*/
u64 get_cpu_idle_time_us(int cpu, u64 *last_update_time)
{
* This time is measured via accounting rather than sampling,
* and is as accurate as ktime_get() is.
*
- * This function returns -1 if NOHZ is not enabled.
+ * Return: -1 if NOHZ is not enabled, else total iowait time of @cpu
*/
u64 get_cpu_iowait_time_us(int cpu, u64 *last_update_time)
{
{
u64 basemono, next_tick, delta, expires;
unsigned long basejiff;
+ int tick_cpu;
basemono = get_jiffies_update(&basejiff);
ts->last_jiffies = basejiff;
* Otherwise we can sleep as long as we want.
*/
delta = timekeeping_max_deferment();
- if (cpu != tick_do_timer_cpu &&
- (tick_do_timer_cpu != TICK_DO_TIMER_NONE ||
- !tick_sched_flag_test(ts, TS_FLAG_DO_TIMER_LAST)))
+ tick_cpu = READ_ONCE(tick_do_timer_cpu);
+ if (tick_cpu != cpu &&
+ (tick_cpu != TICK_DO_TIMER_NONE || !tick_sched_flag_test(ts, TS_FLAG_DO_TIMER_LAST)))
delta = KTIME_MAX;
/* Calculate the next expiry time */
unsigned long basejiff = ts->last_jiffies;
u64 basemono = ts->timer_expires_base;
bool timer_idle = tick_sched_flag_test(ts, TS_FLAG_STOPPED);
+ int tick_cpu;
u64 expires;
/* Make sure we won't be trying to stop it twice in a row. */
* do_timer() never gets invoked. Keep track of the fact that it
* was the one which had the do_timer() duty last.
*/
- if (cpu == tick_do_timer_cpu) {
- tick_do_timer_cpu = TICK_DO_TIMER_NONE;
+ tick_cpu = READ_ONCE(tick_do_timer_cpu);
+ if (tick_cpu == cpu) {
+ WRITE_ONCE(tick_do_timer_cpu, TICK_DO_TIMER_NONE);
tick_sched_flag_set(ts, TS_FLAG_DO_TIMER_LAST);
- } else if (tick_do_timer_cpu != TICK_DO_TIMER_NONE) {
+ } else if (tick_cpu != TICK_DO_TIMER_NONE) {
tick_sched_flag_clear(ts, TS_FLAG_DO_TIMER_LAST);
}
return false;
if (tick_nohz_full_enabled()) {
+ int tick_cpu = READ_ONCE(tick_do_timer_cpu);
+
/*
* Keep the tick alive to guarantee timekeeping progression
* if there are full dynticks CPUs around
*/
- if (tick_do_timer_cpu == cpu)
+ if (tick_cpu == cpu)
return false;
/* Should not happen for nohz-full */
- if (WARN_ON_ONCE(tick_do_timer_cpu == TICK_DO_TIMER_NONE))
+ if (WARN_ON_ONCE(tick_cpu == TICK_DO_TIMER_NONE))
return false;
}
/**
* tick_nohz_idle_got_tick - Check whether or not the tick handler has run
+ *
+ * Return: %true if the tick handler has run, otherwise %false
*/
bool tick_nohz_idle_got_tick(void)
{
* stopped, it returns the next hrtimer.
*
* Called from power state control code with interrupts disabled
+ *
+ * Return: the next expiration time
*/
ktime_t tick_nohz_get_next_hrtimer(void)
{
* The return value of this function and/or the value returned by it through the
* @delta_next pointer can be negative which must be taken into account by its
* callers.
+ *
+ * Return: the expected length of the current sleep
*/
ktime_t tick_nohz_get_sleep_length(ktime_t *delta_next)
{
/**
* tick_nohz_get_idle_calls_cpu - return the current idle calls counter value
* for a particular CPU.
+ * @cpu: target CPU number
*
* Called from the schedutil frequency scaling governor in scheduler context.
+ *
+ * Return: the current idle calls counter value for @cpu
*/
unsigned long tick_nohz_get_idle_calls_cpu(int cpu)
{
* tick_nohz_get_idle_calls - return the current idle calls counter value
*
* Called from the schedutil frequency scaling governor in scheduler context.
+ *
+ * Return: the current idle calls counter value for the current CPU
*/
unsigned long tick_nohz_get_idle_calls(void)
{
/**
* tick_setup_sched_timer - setup the tick emulation timer
- * @mode: tick_nohz_mode to setup for
+ * @hrtimer: whether to use the hrtimer or not
*/
void tick_setup_sched_timer(bool hrtimer)
{
* @next_tick: Next tick to be fired when in dynticks mode.
* @idle_jiffies: jiffies at the entry to idle for idle time accounting
* @idle_waketime: Time when the idle was interrupted
+ * @idle_sleeptime_seq: sequence counter for data consistency
* @idle_entrytime: Time when the idle call was entered
- * @nohz_mode: Mode - one state of tick_nohz_mode
* @last_jiffies: Base jiffies snapshot when next event was last computed
* @timer_expires_base: Base time clock monotonic for @timer_expires
* @timer_expires: Anticipated timer expiration time (in case sched tick is stopped)
/*
* The timer wheel has LVL_DEPTH array levels. Each level provides an array of
- * LVL_SIZE buckets. Each level is driven by its own clock and therefor each
+ * LVL_SIZE buckets. Each level is driven by its own clock and therefore each
* level has a different granularity.
*
- * The level granularity is: LVL_CLK_DIV ^ lvl
+ * The level granularity is: LVL_CLK_DIV ^ level
* The level clock frequency is: HZ / (LVL_CLK_DIV ^ level)
*
* The array level of a newly armed timer depends on the relative expiry
* time. The farther the expiry time is away the higher the array level and
- * therefor the granularity becomes.
+ * therefore the granularity becomes.
*
* Contrary to the original timer wheel implementation, which aims for 'exact'
* expiry of the timers, this implementation removes the need for recascading
* struct timer_base - Per CPU timer base (number of base depends on config)
* @lock: Lock protecting the timer_base
* @running_timer: When expiring timers, the lock is dropped. To make
- * sure not to race agains deleting/modifying a
+ * sure not to race against deleting/modifying a
* currently running timer, the pointer is set to the
* timer, which expires at the moment. If no timer is
* running, the pointer is NULL.
}
/*
- * fixup_init is called when:
+ * timer_fixup_init is called when:
* - an active object is initialized
*/
static bool timer_fixup_init(void *addr, enum debug_obj_state state)
}
/*
- * fixup_activate is called when:
+ * timer_fixup_activate is called when:
* - an active object is activated
* - an unknown non-static object is activated
*/
}
/*
- * fixup_free is called when:
+ * timer_fixup_free is called when:
* - an active object is freed
*/
static bool timer_fixup_free(void *addr, enum debug_obj_state state)
}
/*
- * fixup_assert_init is called when:
+ * timer_fixup_assert_init is called when:
* - an untracked/uninit-ed object is found
*/
static bool timer_fixup_assert_init(void *addr, enum debug_obj_state state)
* @key: lockdep class key of the fake lock used for tracking timer
* sync lock dependencies
*
- * init_timer_key() must be done to a timer prior calling *any* of the
+ * init_timer_key() must be done to a timer prior to calling *any* of the
* other timer functions.
*/
void init_timer_key(struct timer_list *timer,
* If @shutdown is set then the lock has to be taken whether the
* timer is pending or not to protect against a concurrent rearm
* which might hit between the lockless pending check and the lock
- * aquisition. By taking the lock it is ensured that such a newly
+ * acquisition. By taking the lock it is ensured that such a newly
* enqueued timer is dequeued and cannot end up with
* timer->function == NULL in the expiry code.
*
/*
* When timer base is not set idle, undo the effect of
- * tmigr_cpu_deactivate() to prevent inconsitent states - active
+ * tmigr_cpu_deactivate() to prevent inconsistent states - active
* timer base but inactive timer migration hierarchy.
*
* When timer base was already marked idle, nothing will be
first_childevt = evt = data->evt;
+ /*
+ * Walking the hierarchy is required in any case when a
+ * remote expiry was done before. This ensures to not lose
+ * already queued events in non active groups (see section
+ * "Required event and timerqueue update after a remote
+ * expiry" in the documentation at the top).
+ *
+ * The two call sites which are executed without a remote expiry
+ * before, are not prevented from propagating changes through
+ * the hierarchy by the return:
+ * - When entering this path by tmigr_new_timer(), @evt->ignore
+ * is never set.
+ * - tmigr_inactive_up() takes care of the propagation by
+ * itself and ignores the return value. But an immediate
+ * return is possible if there is a parent, sparing group
+ * locking at this level, because the upper walking call to
+ * the parent will take care about removing this event from
+ * within the group and update next_expiry accordingly.
+ *
+ * However if there is no parent, ie: the hierarchy has only a
+ * single level so @group is the top level group, make sure the
+ * first event information of the group is updated properly and
+ * also handled properly, so skip this fast return path.
+ */
+ if (evt->ignore && !remote && group->parent)
+ return true;
+
raw_spin_lock(&group->lock);
childstate.state = 0;
* queue when the expiry time changed only or when it could be ignored.
*/
if (timerqueue_node_queued(&evt->nextevt)) {
- if ((evt->nextevt.expires == nextexp) && !evt->ignore)
+ if ((evt->nextevt.expires == nextexp) && !evt->ignore) {
+ /* Make sure not to miss a new CPU event with the same expiry */
+ evt->cpu = first_childevt->cpu;
goto check_toplvl;
+ }
if (!timerqueue_del(&group->events, &evt->nextevt))
WRITE_ONCE(group->next_expiry, KTIME_MAX);
config FTRACE_RECORD_RECURSION_SIZE
int "Max number of recursed functions to record"
- default 128
+ default 128
depends on FTRACE_RECORD_RECURSION
help
This defines the limit of number of functions that can be
static const struct bpf_link_ops bpf_kprobe_multi_link_lops = {
.release = bpf_kprobe_multi_link_release,
- .dealloc = bpf_kprobe_multi_link_dealloc,
+ .dealloc_deferred = bpf_kprobe_multi_link_dealloc,
.fill_link_info = bpf_kprobe_multi_link_fill_link_info,
};
umulti_link = container_of(link, struct bpf_uprobe_multi_link, link);
bpf_uprobe_unregister(&umulti_link->path, umulti_link->uprobes, umulti_link->cnt);
+ if (umulti_link->task)
+ put_task_struct(umulti_link->task);
+ path_put(&umulti_link->path);
}
static void bpf_uprobe_multi_link_dealloc(struct bpf_link *link)
struct bpf_uprobe_multi_link *umulti_link;
umulti_link = container_of(link, struct bpf_uprobe_multi_link, link);
- if (umulti_link->task)
- put_task_struct(umulti_link->task);
- path_put(&umulti_link->path);
kvfree(umulti_link->uprobes);
kfree(umulti_link);
}
static const struct bpf_link_ops bpf_uprobe_multi_link_lops = {
.release = bpf_uprobe_multi_link_release,
- .dealloc = bpf_uprobe_multi_link_dealloc,
+ .dealloc_deferred = bpf_uprobe_multi_link_dealloc,
.fill_link_info = bpf_uprobe_multi_link_fill_link_info,
};
old_write = local_add_return(RB_WRITE_INTCNT, &next_page->write);
old_entries = local_add_return(RB_WRITE_INTCNT, &next_page->entries);
- local_inc(&cpu_buffer->pages_touched);
/*
* Just make sure we have seen our old_write and synchronize
* with any interrupts that come in.
*/
local_set(&next_page->page->commit, 0);
- /* Again, either we update tail_page or an interrupt does */
- (void)cmpxchg(&cpu_buffer->tail_page, tail_page, next_page);
+ /* Either we update tail_page or an interrupt does */
+ if (try_cmpxchg(&cpu_buffer->tail_page, &tail_page, next_page))
+ local_inc(&cpu_buffer->pages_touched);
}
}
return 0;
}
+#ifdef CONFIG_PERF_EVENTS
static ssize_t
event_id_read(struct file *filp, char __user *ubuf, size_t cnt, loff_t *ppos)
{
return simple_read_from_buffer(ubuf, cnt, ppos, buf, len);
}
+#endif
static ssize_t
event_filter_read(struct file *filp, char __user *ubuf, size_t cnt,
.release = seq_release,
};
+#ifdef CONFIG_PERF_EVENTS
static const struct file_operations ftrace_event_id_fops = {
.read = event_id_read,
.llseek = default_llseek,
};
+#endif
static const struct file_operations ftrace_event_filter_fops = {
.open = tracing_open_file_tr,
void store_trace_entry_data(void *edata, struct trace_probe *tp, struct pt_regs *regs)
{
struct probe_entry_arg *earg = tp->entry_arg;
- unsigned long val;
+ unsigned long val = 0;
int i;
if (!earg)
VMCOREINFO_NUMBER(PG_head_mask);
#define PAGE_BUDDY_MAPCOUNT_VALUE (~PG_buddy)
VMCOREINFO_NUMBER(PAGE_BUDDY_MAPCOUNT_VALUE);
-#ifdef CONFIG_HUGETLB_PAGE
- VMCOREINFO_NUMBER(PG_hugetlb);
+#define PAGE_HUGETLB_MAPCOUNT_VALUE (~PG_hugetlb)
+ VMCOREINFO_NUMBER(PAGE_HUGETLB_MAPCOUNT_VALUE);
#define PAGE_OFFLINE_MAPCOUNT_VALUE (~PG_offline)
VMCOREINFO_NUMBER(PAGE_OFFLINE_MAPCOUNT_VALUE);
-#endif
#ifdef CONFIG_KALLSYMS
VMCOREINFO_SYMBOL(kallsyms_names);
return memblock_alloc(size, SMP_CACHE_BYTES);
}
-static inline void __init xbc_free_mem(void *addr, size_t size)
+static inline void __init xbc_free_mem(void *addr, size_t size, bool early)
{
- memblock_free(addr, size);
+ if (early)
+ memblock_free(addr, size);
+ else if (addr)
+ memblock_free_late(__pa(addr), size);
}
#else /* !__KERNEL__ */
return malloc(size);
}
-static inline void xbc_free_mem(void *addr, size_t size)
+static inline void xbc_free_mem(void *addr, size_t size, bool early)
{
free(addr);
}
}
/**
- * xbc_exit() - Clean up all parsed bootconfig
+ * _xbc_exit() - Clean up all parsed bootconfig
+ * @early: Set true if this is called before budy system is initialized.
*
* This clears all data structures of parsed bootconfig on memory.
* If you need to reuse xbc_init() with new boot config, you can
* use this.
*/
-void __init xbc_exit(void)
+void __init _xbc_exit(bool early)
{
- xbc_free_mem(xbc_data, xbc_data_size);
+ xbc_free_mem(xbc_data, xbc_data_size, early);
xbc_data = NULL;
xbc_data_size = 0;
xbc_node_num = 0;
- xbc_free_mem(xbc_nodes, sizeof(struct xbc_node) * XBC_NODE_MAX);
+ xbc_free_mem(xbc_nodes, sizeof(struct xbc_node) * XBC_NODE_MAX, early);
xbc_nodes = NULL;
brace_index = 0;
}
if (!xbc_nodes) {
if (emsg)
*emsg = "Failed to allocate bootconfig nodes";
- xbc_exit();
+ _xbc_exit(true);
return -ENOMEM;
}
memset(xbc_nodes, 0, sizeof(struct xbc_node) * XBC_NODE_MAX);
*epos = xbc_err_pos;
if (emsg)
*emsg = xbc_err_msg;
- xbc_exit();
+ _xbc_exit(true);
} else
ret = xbc_node_num;
static void test_csum_ipv6_magic(struct kunit *test)
{
-#if defined(CONFIG_NET)
const struct in6_addr *saddr;
const struct in6_addr *daddr;
unsigned int len;
unsigned char proto;
__wsum csum;
+ if (!IS_ENABLED(CONFIG_NET))
+ return;
+
const int daddr_offset = sizeof(struct in6_addr);
const int len_offset = sizeof(struct in6_addr) + sizeof(struct in6_addr);
const int proto_offset = sizeof(struct in6_addr) + sizeof(struct in6_addr) +
CHECK_EQ(to_sum16(expected_csum_ipv6_magic[i]),
csum_ipv6_magic(saddr, daddr, len, proto, csum));
}
-#endif /* !CONFIG_NET */
}
static struct kunit_case __refdata checksum_test_cases[] = {
stack = current_pool + pool_offset;
/* Pre-initialize handle once. */
- stack->handle.pool_index = pool_index + 1;
+ stack->handle.pool_index_plus_1 = pool_index + 1;
stack->handle.offset = pool_offset >> DEPOT_STACK_ALIGN;
stack->handle.extra = 0;
INIT_LIST_HEAD(&stack->hash_list);
const int pools_num_cached = READ_ONCE(pools_num);
union handle_parts parts = { .handle = handle };
void *pool;
- u32 pool_index = parts.pool_index - 1;
+ u32 pool_index = parts.pool_index_plus_1 - 1;
size_t offset = parts.offset << DEPOT_STACK_ALIGN;
struct stack_record *stack;
/*
* Zero out zone modifiers, as we don't have specific zone
* requirements. Keep the flags related to allocation in atomic
- * contexts and I/O.
+ * contexts, I/O, nolockdep.
*/
alloc_flags &= ~GFP_ZONEMASK;
- alloc_flags &= (GFP_ATOMIC | GFP_KERNEL);
+ alloc_flags &= (GFP_ATOMIC | GFP_KERNEL | __GFP_NOLOCKDEP);
alloc_flags |= __GFP_NOWARN;
page = alloc_pages(alloc_flags, DEPOT_POOL_ORDER);
if (page)
};
/* Excluded because they Oops the module. */
-static const test_ubsan_fp skip_ubsan_array[] = {
+static __used const test_ubsan_fp skip_ubsan_array[] = {
test_ubsan_divrem_overflow,
};
case ubsan_shift_out_of_bounds:
return "UBSAN: shift out of bounds";
#endif
-#ifdef CONFIG_UBSAN_DIV_ZERO
+#if defined(CONFIG_UBSAN_DIV_ZERO) || defined(CONFIG_UBSAN_SIGNED_WRAP)
/*
- * SanitizerKind::IntegerDivideByZero emits
+ * SanitizerKind::IntegerDivideByZero and
+ * SanitizerKind::SignedIntegerOverflow emit
* SanitizerHandler::DivremOverflow.
*/
case ubsan_divrem_overflow:
return "UBSAN: alignment assumption";
case ubsan_type_mismatch:
return "UBSAN: type mismatch";
+#endif
+#ifdef CONFIG_UBSAN_SIGNED_WRAP
+ /*
+ * SanitizerKind::SignedIntegerOverflow emits
+ * SanitizerHandler::AddOverflow, SanitizerHandler::SubOverflow,
+ * or SanitizerHandler::MulOverflow.
+ */
+ case ubsan_add_overflow:
+ return "UBSAN: integer addition overflow";
+ case ubsan_sub_overflow:
+ return "UBSAN: integer subtraction overflow";
+ case ubsan_mul_overflow:
+ return "UBSAN: integer multiplication overflow";
#endif
default:
return "UBSAN: unrecognized failure code";
KCOV_INSTRUMENT_vmstat.o := n
KCOV_INSTRUMENT_failslab.o := n
-CFLAGS_init-mm.o += $(call cc-disable-warning, override-init)
-CFLAGS_init-mm.o += $(call cc-disable-warning, initializer-overrides)
+CFLAGS_init-mm.o += -Wno-override-init
mmu-y := nommu.o
mmu-$(CONFIG_MMU) := highmem.o memory.o mincore.o \
/* shmem file - in swap cache */
swp_entry_t swp = radix_to_swp_entry(folio);
+ /* swapin error results in poisoned entry */
+ if (non_swap_entry(swp))
+ goto resched;
+
+ /*
+ * Getting a swap entry from the shmem
+ * inode means we beat
+ * shmem_unuse(). rcu_read_lock()
+ * ensures swapoff waits for us before
+ * freeing the swapper space. However,
+ * we can race with swapping and
+ * invalidation, so there might not be
+ * a shadow in the swapcache (yet).
+ */
shadow = get_shadow_from_swap_cache(swp);
+ if (!shadow)
+ goto resched;
}
#endif
if (workingset_test_recent(shadow, true, &workingset))
/* first iteration or cross vma bound */
if (!vma || start >= vma->vm_end) {
+ /*
+ * MADV_POPULATE_(READ|WRITE) wants to handle VMA
+ * lookups+error reporting differently.
+ */
+ if (gup_flags & FOLL_MADV_POPULATE) {
+ vma = vma_lookup(mm, start);
+ if (!vma) {
+ ret = -ENOMEM;
+ goto out;
+ }
+ if (check_vma_flags(vma, gup_flags)) {
+ ret = -EINVAL;
+ goto out;
+ }
+ goto retry;
+ }
vma = gup_vma_lookup(mm, start);
if (!vma && in_gate_area(mm, start)) {
ret = get_gate_page(mm, start & PAGE_MASK,
if (vma->vm_flags & VM_LOCKONFAULT)
return nr_pages;
+ /* ... similarly, we've never faulted in PROT_NONE pages */
+ if (!vma_is_accessible(vma))
+ return -EFAULT;
+
gup_flags = FOLL_TOUCH;
/*
* We want to touch writable mappings with a write fault in order
* to break COW, except for shared mappings because these don't COW
* and we would not want to dirty them for nothing.
+ *
+ * Otherwise, do a read fault, and use FOLL_FORCE in case it's not
+ * readable (ie write-only or executable).
*/
if ((vma->vm_flags & (VM_WRITE | VM_SHARED)) == VM_WRITE)
gup_flags |= FOLL_WRITE;
-
- /*
- * We want mlock to succeed for regions that have any permissions
- * other than PROT_NONE.
- */
- if (vma_is_accessible(vma))
+ else
gup_flags |= FOLL_FORCE;
if (locked)
}
/*
- * faultin_vma_page_range() - populate (prefault) page tables inside the
- * given VMA range readable/writable
+ * faultin_page_range() - populate (prefault) page tables inside the
+ * given range readable/writable
*
* This takes care of mlocking the pages, too, if VM_LOCKED is set.
*
- * @vma: target vma
+ * @mm: the mm to populate page tables in
* @start: start address
* @end: end address
* @write: whether to prefault readable or writable
* @locked: whether the mmap_lock is still held
*
- * Returns either number of processed pages in the vma, or a negative error
- * code on error (see __get_user_pages()).
+ * Returns either number of processed pages in the MM, or a negative error
+ * code on error (see __get_user_pages()). Note that this function reports
+ * errors related to VMAs, such as incompatible mappings, as expected by
+ * MADV_POPULATE_(READ|WRITE).
*
- * vma->vm_mm->mmap_lock must be held. The range must be page-aligned and
- * covered by the VMA. If it's released, *@locked will be set to 0.
+ * The range must be page-aligned.
+ *
+ * mm->mmap_lock must be held. If it's released, *@locked will be set to 0.
*/
-long faultin_vma_page_range(struct vm_area_struct *vma, unsigned long start,
- unsigned long end, bool write, int *locked)
+long faultin_page_range(struct mm_struct *mm, unsigned long start,
+ unsigned long end, bool write, int *locked)
{
- struct mm_struct *mm = vma->vm_mm;
unsigned long nr_pages = (end - start) / PAGE_SIZE;
int gup_flags;
long ret;
VM_BUG_ON(!PAGE_ALIGNED(start));
VM_BUG_ON(!PAGE_ALIGNED(end));
- VM_BUG_ON_VMA(start < vma->vm_start, vma);
- VM_BUG_ON_VMA(end > vma->vm_end, vma);
mmap_assert_locked(mm);
/*
* a poisoned page.
* !FOLL_FORCE: Require proper access permissions.
*/
- gup_flags = FOLL_TOUCH | FOLL_HWPOISON | FOLL_UNLOCKABLE;
+ gup_flags = FOLL_TOUCH | FOLL_HWPOISON | FOLL_UNLOCKABLE |
+ FOLL_MADV_POPULATE;
if (write)
gup_flags |= FOLL_WRITE;
- /*
- * We want to report -EINVAL instead of -EFAULT for any permission
- * problems or incompatible mappings.
- */
- if (check_vma_flags(vma, gup_flags))
- return -EINVAL;
-
- ret = __get_user_pages(mm, start, nr_pages, gup_flags,
- NULL, locked);
+ ret = __get_user_pages_locked(mm, start, nr_pages, NULL, locked,
+ gup_flags);
lru_add_drain();
return ret;
}
goto unlock_ptls;
}
- folio_move_anon_rmap(src_folio, dst_vma);
- WRITE_ONCE(src_folio->index, linear_page_index(dst_vma, dst_addr));
-
src_pmdval = pmdp_huge_clear_flush(src_vma, src_addr, src_pmd);
/* Folio got pinned from under us. Put it back and fail the move. */
if (folio_maybe_dma_pinned(src_folio)) {
goto unlock_ptls;
}
+ folio_move_anon_rmap(src_folio, dst_vma);
+ WRITE_ONCE(src_folio->index, linear_page_index(dst_vma, dst_addr));
+
_dst_pmd = mk_huge_pmd(&src_folio->page, dst_vma->vm_page_prot);
/* Follow mremap() behavior and treat the entry dirty after the move */
_dst_pmd = pmd_mkwrite(pmd_mkdirty(_dst_pmd), dst_vma);
{
lockdep_assert_held(&hugetlb_lock);
- folio_clear_hugetlb(folio);
+ __folio_clear_hugetlb(folio);
}
/*
h->surplus_huge_pages_node[nid]++;
}
- folio_set_hugetlb(folio);
+ __folio_set_hugetlb(folio);
folio_change_private(folio, NULL);
/*
* We have to set hugetlb_vmemmap_optimized again as above
* If vmemmap pages were allocated above, then we need to clear the
* hugetlb destructor under the hugetlb lock.
*/
- if (clear_dtor) {
+ if (folio_test_hugetlb(folio)) {
spin_lock_irq(&hugetlb_lock);
__clear_hugetlb_destructor(h, folio);
spin_unlock_irq(&hugetlb_lock);
static void init_new_hugetlb_folio(struct hstate *h, struct folio *folio)
{
- folio_set_hugetlb(folio);
+ __folio_set_hugetlb(folio);
INIT_LIST_HEAD(&folio->lru);
hugetlb_set_folio_subpool(folio, NULL);
set_hugetlb_cgroup(folio, NULL);
return __prep_compound_gigantic_folio(folio, order, true);
}
-/*
- * PageHuge() only returns true for hugetlbfs pages, but not for normal or
- * transparent huge pages. See the PageTransHuge() documentation for more
- * details.
- */
-int PageHuge(const struct page *page)
-{
- const struct folio *folio;
-
- if (!PageCompound(page))
- return 0;
- folio = page_folio(page);
- return folio_test_hugetlb(folio);
-}
-EXPORT_SYMBOL_GPL(PageHuge);
-
/*
* Find and lock address space (mapping) in write mode.
*
rsv_adjust = hugepage_subpool_put_pages(spool, 1);
hugetlb_acct_memory(h, -rsv_adjust);
- if (deferred_reserve)
+ if (deferred_reserve) {
+ spin_lock_irq(&hugetlb_lock);
hugetlb_cgroup_uncharge_folio_rsvd(hstate_index(h),
pages_per_huge_page(h), folio);
+ spin_unlock_irq(&hugetlb_lock);
+ }
}
if (!memcg_charge_ret)
VM_UFFD_MISSING);
}
+ if (!(vma->vm_flags & VM_MAYSHARE)) {
+ ret = vmf_anon_prepare(vmf);
+ if (unlikely(ret))
+ goto out;
+ }
+
folio = alloc_hugetlb_folio(vma, haddr, 0);
if (IS_ERR(folio)) {
/*
*/
restore_reserve_on_error(h, vma, haddr, folio);
folio_put(folio);
+ ret = VM_FAULT_SIGBUS;
goto out;
}
new_pagecache_folio = true;
} else {
folio_lock(folio);
-
- ret = vmf_anon_prepare(vmf);
- if (unlikely(ret))
- goto backout_unlocked;
anon_rmap = 1;
}
} else {
if (!pte_same(pte, newpte))
set_huge_pte_at(mm, address, ptep, newpte, psize);
} else if (unlikely(is_pte_marker(pte))) {
- /* No other markers apply for now. */
- WARN_ON_ONCE(!pte_marker_uffd_wp(pte));
- if (uffd_wp_resolve)
+ /*
+ * Do nothing on a poison marker; page is
+ * corrupted, permissons do not apply. Here
+ * pte_marker_uffd_wp()==true implies !poison
+ * because they're mutual exclusive.
+ */
+ if (pte_marker_uffd_wp(pte) && uffd_wp_resolve)
/* Safe to modify directly (non-present->none). */
huge_pte_clear(mm, address, ptep, psize);
} else if (!huge_pte_none(pte)) {
void unmap_mapping_folio(struct folio *folio);
extern long populate_vma_page_range(struct vm_area_struct *vma,
unsigned long start, unsigned long end, int *locked);
-extern long faultin_vma_page_range(struct vm_area_struct *vma,
- unsigned long start, unsigned long end,
- bool write, int *locked);
+extern long faultin_page_range(struct mm_struct *mm, unsigned long start,
+ unsigned long end, bool write, int *locked);
extern bool mlock_future_ok(struct mm_struct *mm, unsigned long flags,
unsigned long bytes);
FOLL_FAST_ONLY = 1 << 20,
/* allow unlocking the mmap lock */
FOLL_UNLOCKABLE = 1 << 21,
+ /* VMA lookup+checks compatible with MADV_POPULATE_(READ|WRITE) */
+ FOLL_MADV_POPULATE = 1 << 22,
};
#define INTERNAL_GUP_FLAGS (FOLL_TOUCH | FOLL_TRIED | FOLL_REMOTE | FOLL_PIN | \
- FOLL_FAST_ONLY | FOLL_UNLOCKABLE)
+ FOLL_FAST_ONLY | FOLL_UNLOCKABLE | \
+ FOLL_MADV_POPULATE)
/*
* Indicates for which pages that are write-protected in the page table,
{
const bool write = behavior == MADV_POPULATE_WRITE;
struct mm_struct *mm = vma->vm_mm;
- unsigned long tmp_end;
int locked = 1;
long pages;
*prev = vma;
while (start < end) {
- /*
- * We might have temporarily dropped the lock. For example,
- * our VMA might have been split.
- */
- if (!vma || start >= vma->vm_end) {
- vma = vma_lookup(mm, start);
- if (!vma)
- return -ENOMEM;
- }
-
- tmp_end = min_t(unsigned long, end, vma->vm_end);
/* Populate (prefault) page tables readable/writable. */
- pages = faultin_vma_page_range(vma, start, tmp_end, write,
- &locked);
+ pages = faultin_page_range(mm, start, end, write, &locked);
if (!locked) {
mmap_read_lock(mm);
locked = 1;
pr_warn_once("%s: unhandled return value: %ld\n",
__func__, pages);
fallthrough;
- case -ENOMEM:
+ case -ENOMEM: /* No VMA or out of memory. */
return -ENOMEM;
}
}
{
int ret;
- zone_pcp_disable(page_zone(page));
+ /*
+ * zone_pcp_disable() can't be used here. It will
+ * hold pcp_batch_high_lock and dissolve_free_huge_page() might hold
+ * cpu_hotplug_lock via static_key_slow_dec() when hugetlb vmemmap
+ * optimization is enabled. This will break current lock dependency
+ * chain and leads to deadlock.
+ * Disabling pcp before dissolving the page was a deterministic
+ * approach because we made sure that those pages cannot end up in any
+ * PCP list. Draining PCP lists expels those pages to the buddy system,
+ * but nothing guarantees that those pages do not get back to a PCP
+ * queue if we need to refill those.
+ */
ret = dissolve_free_huge_page(page);
- if (!ret)
+ if (!ret) {
+ drain_all_pages(page_zone(page));
ret = take_page_off_buddy(page);
- zone_pcp_enable(page_zone(page));
+ }
return ret;
}
ptep_get_and_clear_full(mm, addr, pte, tlb->fullmm);
arch_check_zapped_pte(vma, ptent);
tlb_remove_tlb_entry(tlb, pte, addr);
- VM_WARN_ON_ONCE(userfaultfd_wp(vma));
+ if (userfaultfd_pte_wp(vma, ptent))
+ zap_install_uffd_wp_if_needed(vma, addr, pte, 1,
+ details, ptent);
ksm_might_unmap_zero_page(mm, ptent);
return 1;
}
goto out;
pte = ptep_get(ptep);
+ /* Never return PFNs of anon folios in COW mappings. */
+ if (vm_normal_folio(vma, address, pte))
+ goto unlock;
+
if ((flags & FOLL_WRITE) && !pte_write(pte))
goto unlock;
static void init_early_allocated_pages(void);
+static inline void set_current_in_page_owner(void)
+{
+ /*
+ * Avoid recursion.
+ *
+ * We might need to allocate more memory from page_owner code, so make
+ * sure to signal it in order to avoid recursion.
+ */
+ current->in_page_owner = 1;
+}
+
+static inline void unset_current_in_page_owner(void)
+{
+ current->in_page_owner = 0;
+}
+
static int __init early_page_owner_param(char *buf)
{
int ret = kstrtobool(buf, &page_owner_enabled);
register_dummy_stack();
register_failure_stack();
register_early_stack();
- static_branch_enable(&page_owner_inited);
init_early_allocated_pages();
/* Initialize dummy and failure stacks and link them to stack_list */
dummy_stack.stack_record = __stack_depot_get_stack_record(dummy_handle);
refcount_set(&failure_stack.stack_record->count, 1);
dummy_stack.next = &failure_stack;
stack_list = &dummy_stack;
+ static_branch_enable(&page_owner_inited);
}
struct page_ext_operations page_owner_ops = {
depot_stack_handle_t handle;
unsigned int nr_entries;
- /*
- * Avoid recursion.
- *
- * Sometimes page metadata allocation tracking requires more
- * memory to be allocated:
- * - when new stack trace is saved to stack depot
- */
if (current->in_page_owner)
return dummy_handle;
- current->in_page_owner = 1;
+ set_current_in_page_owner();
nr_entries = stack_trace_save(entries, ARRAY_SIZE(entries), 2);
handle = stack_depot_save(entries, nr_entries, flags);
if (!handle)
handle = failure_handle;
+ unset_current_in_page_owner();
- current->in_page_owner = 0;
return handle;
}
gfp_mask &= (GFP_ATOMIC | GFP_KERNEL);
gfp_mask |= __GFP_NOWARN;
+ set_current_in_page_owner();
stack = kmalloc(sizeof(*stack), gfp_mask);
- if (!stack)
+ if (!stack) {
+ unset_current_in_page_owner();
return;
+ }
+ unset_current_in_page_owner();
stack->stack_record = stack_record;
stack->next = NULL;
spin_unlock_irqrestore(&stack_list_lock, flags);
}
-static void inc_stack_record_count(depot_stack_handle_t handle, gfp_t gfp_mask)
+static void inc_stack_record_count(depot_stack_handle_t handle, gfp_t gfp_mask,
+ int nr_base_pages)
{
struct stack_record *stack_record = __stack_depot_get_stack_record(handle);
/* Add the new stack_record to our list */
add_stack_record_to_list(stack_record, gfp_mask);
}
- refcount_inc(&stack_record->count);
+ refcount_add(nr_base_pages, &stack_record->count);
}
-static void dec_stack_record_count(depot_stack_handle_t handle)
+static void dec_stack_record_count(depot_stack_handle_t handle,
+ int nr_base_pages)
{
struct stack_record *stack_record = __stack_depot_get_stack_record(handle);
- if (stack_record)
- refcount_dec(&stack_record->count);
+ if (!stack_record)
+ return;
+
+ if (refcount_sub_and_test(nr_base_pages, &stack_record->count))
+ pr_warn("%s: refcount went to 0 for %u handle\n", __func__,
+ handle);
}
-void __reset_page_owner(struct page *page, unsigned short order)
+static inline void __update_page_owner_handle(struct page_ext *page_ext,
+ depot_stack_handle_t handle,
+ unsigned short order,
+ gfp_t gfp_mask,
+ short last_migrate_reason, u64 ts_nsec,
+ pid_t pid, pid_t tgid, char *comm)
{
int i;
+ struct page_owner *page_owner;
+
+ for (i = 0; i < (1 << order); i++) {
+ page_owner = get_page_owner(page_ext);
+ page_owner->handle = handle;
+ page_owner->order = order;
+ page_owner->gfp_mask = gfp_mask;
+ page_owner->last_migrate_reason = last_migrate_reason;
+ page_owner->pid = pid;
+ page_owner->tgid = tgid;
+ page_owner->ts_nsec = ts_nsec;
+ strscpy(page_owner->comm, comm,
+ sizeof(page_owner->comm));
+ __set_bit(PAGE_EXT_OWNER, &page_ext->flags);
+ __set_bit(PAGE_EXT_OWNER_ALLOCATED, &page_ext->flags);
+ page_ext = page_ext_next(page_ext);
+ }
+}
+
+static inline void __update_page_owner_free_handle(struct page_ext *page_ext,
+ depot_stack_handle_t handle,
+ unsigned short order,
+ pid_t pid, pid_t tgid,
+ u64 free_ts_nsec)
+{
+ int i;
+ struct page_owner *page_owner;
+
+ for (i = 0; i < (1 << order); i++) {
+ page_owner = get_page_owner(page_ext);
+ /* Only __reset_page_owner() wants to clear the bit */
+ if (handle) {
+ __clear_bit(PAGE_EXT_OWNER_ALLOCATED, &page_ext->flags);
+ page_owner->free_handle = handle;
+ }
+ page_owner->free_ts_nsec = free_ts_nsec;
+ page_owner->free_pid = current->pid;
+ page_owner->free_tgid = current->tgid;
+ page_ext = page_ext_next(page_ext);
+ }
+}
+
+void __reset_page_owner(struct page *page, unsigned short order)
+{
struct page_ext *page_ext;
depot_stack_handle_t handle;
depot_stack_handle_t alloc_handle;
alloc_handle = page_owner->handle;
handle = save_stack(GFP_NOWAIT | __GFP_NOWARN);
- for (i = 0; i < (1 << order); i++) {
- __clear_bit(PAGE_EXT_OWNER_ALLOCATED, &page_ext->flags);
- page_owner->free_handle = handle;
- page_owner->free_ts_nsec = free_ts_nsec;
- page_owner->free_pid = current->pid;
- page_owner->free_tgid = current->tgid;
- page_ext = page_ext_next(page_ext);
- page_owner = get_page_owner(page_ext);
- }
+ __update_page_owner_free_handle(page_ext, handle, order, current->pid,
+ current->tgid, free_ts_nsec);
page_ext_put(page_ext);
+
if (alloc_handle != early_handle)
/*
* early_handle is being set as a handle for all those
* the machinery is not ready yet, we cannot decrement
* their refcount either.
*/
- dec_stack_record_count(alloc_handle);
-}
-
-static inline void __set_page_owner_handle(struct page_ext *page_ext,
- depot_stack_handle_t handle,
- unsigned short order, gfp_t gfp_mask)
-{
- struct page_owner *page_owner;
- int i;
- u64 ts_nsec = local_clock();
-
- for (i = 0; i < (1 << order); i++) {
- page_owner = get_page_owner(page_ext);
- page_owner->handle = handle;
- page_owner->order = order;
- page_owner->gfp_mask = gfp_mask;
- page_owner->last_migrate_reason = -1;
- page_owner->pid = current->pid;
- page_owner->tgid = current->tgid;
- page_owner->ts_nsec = ts_nsec;
- strscpy(page_owner->comm, current->comm,
- sizeof(page_owner->comm));
- __set_bit(PAGE_EXT_OWNER, &page_ext->flags);
- __set_bit(PAGE_EXT_OWNER_ALLOCATED, &page_ext->flags);
-
- page_ext = page_ext_next(page_ext);
- }
+ dec_stack_record_count(alloc_handle, 1 << order);
}
noinline void __set_page_owner(struct page *page, unsigned short order,
gfp_t gfp_mask)
{
struct page_ext *page_ext;
+ u64 ts_nsec = local_clock();
depot_stack_handle_t handle;
handle = save_stack(gfp_mask);
page_ext = page_ext_get(page);
if (unlikely(!page_ext))
return;
- __set_page_owner_handle(page_ext, handle, order, gfp_mask);
+ __update_page_owner_handle(page_ext, handle, order, gfp_mask, -1,
+ current->pid, current->tgid, ts_nsec,
+ current->comm);
page_ext_put(page_ext);
- inc_stack_record_count(handle, gfp_mask);
+ inc_stack_record_count(handle, gfp_mask, 1 << order);
}
void __set_page_owner_migrate_reason(struct page *page, int reason)
void __folio_copy_owner(struct folio *newfolio, struct folio *old)
{
+ int i;
struct page_ext *old_ext;
struct page_ext *new_ext;
- struct page_owner *old_page_owner, *new_page_owner;
+ struct page_owner *old_page_owner;
+ struct page_owner *new_page_owner;
+ depot_stack_handle_t migrate_handle;
old_ext = page_ext_get(&old->page);
if (unlikely(!old_ext))
old_page_owner = get_page_owner(old_ext);
new_page_owner = get_page_owner(new_ext);
- new_page_owner->order = old_page_owner->order;
- new_page_owner->gfp_mask = old_page_owner->gfp_mask;
- new_page_owner->last_migrate_reason =
- old_page_owner->last_migrate_reason;
- new_page_owner->handle = old_page_owner->handle;
- new_page_owner->pid = old_page_owner->pid;
- new_page_owner->tgid = old_page_owner->tgid;
- new_page_owner->free_pid = old_page_owner->free_pid;
- new_page_owner->free_tgid = old_page_owner->free_tgid;
- new_page_owner->ts_nsec = old_page_owner->ts_nsec;
- new_page_owner->free_ts_nsec = old_page_owner->ts_nsec;
- strcpy(new_page_owner->comm, old_page_owner->comm);
-
+ migrate_handle = new_page_owner->handle;
+ __update_page_owner_handle(new_ext, old_page_owner->handle,
+ old_page_owner->order, old_page_owner->gfp_mask,
+ old_page_owner->last_migrate_reason,
+ old_page_owner->ts_nsec, old_page_owner->pid,
+ old_page_owner->tgid, old_page_owner->comm);
/*
- * We don't clear the bit on the old folio as it's going to be freed
- * after migration. Until then, the info can be useful in case of
- * a bug, and the overall stats will be off a bit only temporarily.
- * Also, migrate_misplaced_transhuge_page() can still fail the
- * migration and then we want the old folio to retain the info. But
- * in that case we also don't need to explicitly clear the info from
- * the new page, which will be freed.
+ * Do not proactively clear PAGE_EXT_OWNER{_ALLOCATED} bits as the folio
+ * will be freed after migration. Keep them until then as they may be
+ * useful.
*/
- __set_bit(PAGE_EXT_OWNER, &new_ext->flags);
- __set_bit(PAGE_EXT_OWNER_ALLOCATED, &new_ext->flags);
+ __update_page_owner_free_handle(new_ext, 0, old_page_owner->order,
+ old_page_owner->free_pid,
+ old_page_owner->free_tgid,
+ old_page_owner->free_ts_nsec);
+ /*
+ * We linked the original stack to the new folio, we need to do the same
+ * for the new one and the old folio otherwise there will be an imbalance
+ * when subtracting those pages from the stack.
+ */
+ for (i = 0; i < (1 << new_page_owner->order); i++) {
+ old_page_owner->handle = migrate_handle;
+ old_ext = page_ext_next(old_ext);
+ old_page_owner = get_page_owner(old_ext);
+ }
+
page_ext_put(new_ext);
page_ext_put(old_ext);
}
goto ext_put_continue;
/* Found early allocated page */
- __set_page_owner_handle(page_ext, early_handle,
- 0, 0);
+ __update_page_owner_handle(page_ext, early_handle, 0, 0,
+ -1, local_clock(), current->pid,
+ current->tgid, current->comm);
count++;
ext_put_continue:
page_ext_put(page_ext);
* value of stack_list.
*/
stack = smp_load_acquire(&stack_list);
+ m->private = stack;
} else {
stack = m->private;
- stack = stack->next;
}
- m->private = stack;
-
return stack;
}
return stack;
}
-static unsigned long page_owner_stack_threshold;
+static unsigned long page_owner_pages_threshold;
static int stack_print(struct seq_file *m, void *v)
{
- int i, stack_count;
+ int i, nr_base_pages;
struct stack *stack = v;
unsigned long *entries;
unsigned long nr_entries;
nr_entries = stack_record->size;
entries = stack_record->entries;
- stack_count = refcount_read(&stack_record->count) - 1;
+ nr_base_pages = refcount_read(&stack_record->count) - 1;
- if (stack_count < 1 || stack_count < page_owner_stack_threshold)
+ if (nr_base_pages < 1 || nr_base_pages < page_owner_pages_threshold)
return 0;
for (i = 0; i < nr_entries; i++)
seq_printf(m, " %pS\n", (void *)entries[i]);
- seq_printf(m, "stack_count: %d\n\n", stack_count);
+ seq_printf(m, "nr_base_pages: %d\n\n", nr_base_pages);
return 0;
}
static int page_owner_threshold_get(void *data, u64 *val)
{
- *val = READ_ONCE(page_owner_stack_threshold);
+ *val = READ_ONCE(page_owner_pages_threshold);
return 0;
}
static int page_owner_threshold_set(void *data, u64 val)
{
- WRITE_ONCE(page_owner_stack_threshold, val);
+ WRITE_ONCE(page_owner_pages_threshold, val);
return 0;
}
#define shmem_huge SHMEM_HUGE_DENY
-bool shmem_is_huge(struct inode *inode, pgoff_t index, bool shmem_huge_force,
- struct mm_struct *mm, unsigned long vm_flags)
-{
- return false;
-}
-
static unsigned long shmem_unused_huge_shrink(struct shmem_sb_info *sbinfo,
struct shrink_control *sc, unsigned long nr_to_split)
{
static int shmem_get_next_id(struct super_block *sb, struct kqid *qid)
{
struct mem_dqinfo *info = sb_dqinfo(sb, qid->type);
- struct rb_node *node = ((struct rb_root *)info->dqi_priv)->rb_node;
+ struct rb_node *node;
qid_t id = from_kqid(&init_user_ns, *qid);
struct quota_info *dqopt = sb_dqopt(sb);
struct quota_id *entry = NULL;
return -ESRCH;
down_read(&dqopt->dqio_sem);
+ node = ((struct rb_root *)info->dqi_priv)->rb_node;
while (node) {
entry = rb_entry(node, struct quota_id, node);
static int shmem_acquire_dquot(struct dquot *dquot)
{
struct mem_dqinfo *info = sb_dqinfo(dquot->dq_sb, dquot->dq_id.type);
- struct rb_node **n = &((struct rb_root *)info->dqi_priv)->rb_node;
+ struct rb_node **n;
struct shmem_sb_info *sbinfo = dquot->dq_sb->s_fs_info;
struct rb_node *parent = NULL, *new_node = NULL;
struct quota_id *new_entry, *entry;
mutex_lock(&dquot->dq_lock);
down_write(&dqopt->dqio_sem);
+ n = &((struct rb_root *)info->dqi_priv)->rb_node;
+
while (*n) {
parent = *n;
entry = rb_entry(parent, struct quota_id, node);
static int shmem_release_dquot(struct dquot *dquot)
{
struct mem_dqinfo *info = sb_dqinfo(dquot->dq_sb, dquot->dq_id.type);
- struct rb_node *node = ((struct rb_root *)info->dqi_priv)->rb_node;
+ struct rb_node *node;
qid_t id = from_kqid(&init_user_ns, dquot->dq_id);
struct quota_info *dqopt = sb_dqopt(dquot->dq_sb);
struct quota_id *entry = NULL;
goto out_dqlock;
down_write(&dqopt->dqio_sem);
+ node = ((struct rb_root *)info->dqi_priv)->rb_node;
while (node) {
entry = rb_entry(node, struct quota_id, node);
*/
down_read(&(*dst_vmap)->vm_lock->lock);
if (*dst_vmap != *src_vmap)
- down_read(&(*src_vmap)->vm_lock->lock);
+ down_read_nested(&(*src_vmap)->vm_lock->lock,
+ SINGLE_DEPTH_NESTING);
}
mmap_read_unlock(mm);
return err;
return atomic_long_read(&nr_vmalloc_pages);
}
+static struct vmap_area *__find_vmap_area(unsigned long addr, struct rb_root *root)
+{
+ struct rb_node *n = root->rb_node;
+
+ addr = (unsigned long)kasan_reset_tag((void *)addr);
+
+ while (n) {
+ struct vmap_area *va;
+
+ va = rb_entry(n, struct vmap_area, rb_node);
+ if (addr < va->va_start)
+ n = n->rb_left;
+ else if (addr >= va->va_end)
+ n = n->rb_right;
+ else
+ return va;
+ }
+
+ return NULL;
+}
+
/* Look up the first VA which satisfies addr < va_end, NULL if none. */
static struct vmap_area *
__find_vmap_area_exceed_addr(unsigned long addr, struct rb_root *root)
static struct vmap_node *
find_vmap_area_exceed_addr_lock(unsigned long addr, struct vmap_area **va)
{
- struct vmap_node *vn, *va_node = NULL;
- struct vmap_area *va_lowest;
+ unsigned long va_start_lowest;
+ struct vmap_node *vn;
int i;
- for (i = 0; i < nr_vmap_nodes; i++) {
+repeat:
+ for (i = 0, va_start_lowest = 0; i < nr_vmap_nodes; i++) {
vn = &vmap_nodes[i];
spin_lock(&vn->busy.lock);
- va_lowest = __find_vmap_area_exceed_addr(addr, &vn->busy.root);
- if (va_lowest) {
- if (!va_node || va_lowest->va_start < (*va)->va_start) {
- if (va_node)
- spin_unlock(&va_node->busy.lock);
-
- *va = va_lowest;
- va_node = vn;
- continue;
- }
- }
+ *va = __find_vmap_area_exceed_addr(addr, &vn->busy.root);
+
+ if (*va)
+ if (!va_start_lowest || (*va)->va_start < va_start_lowest)
+ va_start_lowest = (*va)->va_start;
spin_unlock(&vn->busy.lock);
}
- return va_node;
-}
-
-static struct vmap_area *__find_vmap_area(unsigned long addr, struct rb_root *root)
-{
- struct rb_node *n = root->rb_node;
+ /*
+ * Check if found VA exists, it might have gone away. In this case we
+ * repeat the search because a VA has been removed concurrently and we
+ * need to proceed to the next one, which is a rare case.
+ */
+ if (va_start_lowest) {
+ vn = addr_to_node(va_start_lowest);
- addr = (unsigned long)kasan_reset_tag((void *)addr);
+ spin_lock(&vn->busy.lock);
+ *va = __find_vmap_area(va_start_lowest, &vn->busy.root);
- while (n) {
- struct vmap_area *va;
+ if (*va)
+ return vn;
- va = rb_entry(n, struct vmap_area, rb_node);
- if (addr < va->va_start)
- n = n->rb_left;
- else if (addr >= va->va_end)
- n = n->rb_right;
- else
- return va;
+ spin_unlock(&vn->busy.lock);
+ goto repeat;
}
return NULL;
struct vmap_area *va;
int i, j;
+ if (unlikely(!vmap_initialized))
+ return NULL;
+
/*
* An addr_to_node_id(addr) converts an address to a node index
* where a VA is located. If VA spans several zones and passed
mutex_lock(&acomp_ctx->mutex);
src = zpool_map_handle(zpool, entry->handle, ZPOOL_MM_RO);
- if (acomp_ctx->is_sleepable && !zpool_can_sleep_mapped(zpool)) {
+ /*
+ * If zpool_map_handle is atomic, we cannot reliably utilize its mapped buffer
+ * to do crypto_acomp_decompress() which might sleep. In such cases, we must
+ * resort to copying the buffer to a temporary one.
+ * Meanwhile, zpool_map_handle() might return a non-linearly mapped buffer,
+ * such as a kmap address of high memory or even ever a vmap address.
+ * However, sg_init_one is only equipped to handle linearly mapped low memory.
+ * In such cases, we also must copy the buffer to a temporary and lowmem one.
+ */
+ if ((acomp_ctx->is_sleepable && !zpool_can_sleep_mapped(zpool)) ||
+ !virt_addr_valid(src)) {
memcpy(acomp_ctx->buffer, src, entry->length);
src = acomp_ctx->buffer;
zpool_unmap_handle(zpool, entry->handle);
BUG_ON(acomp_ctx->req->dlen != PAGE_SIZE);
mutex_unlock(&acomp_ctx->mutex);
- if (!acomp_ctx->is_sleepable || zpool_can_sleep_mapped(zpool))
+ if (src != acomp_ctx->buffer)
zpool_unmap_handle(zpool, entry->handle);
}
if (!zswap_shrinker_enabled || !mem_cgroup_zswap_writeback_enabled(memcg))
return 0;
-#ifdef CONFIG_MEMCG_KMEM
- mem_cgroup_flush_stats(memcg);
- nr_backing = memcg_page_state(memcg, MEMCG_ZSWAP_B) >> PAGE_SHIFT;
- nr_stored = memcg_page_state(memcg, MEMCG_ZSWAPPED);
-#else
- /* use pool stats instead of memcg stats */
- nr_backing = zswap_pool_total_size >> PAGE_SHIFT;
- nr_stored = atomic_read(&zswap_nr_stored);
-#endif
+ /*
+ * The shrinker resumes swap writeback, which will enter block
+ * and may enter fs. XXX: Harmonize with vmscan.c __GFP_FS
+ * rules (may_enter_fs()), which apply on a per-folio basis.
+ */
+ if (!gfp_has_io_fs(sc->gfp_mask))
+ return 0;
+
+ /*
+ * For memcg, use the cgroup-wide ZSWAP stats since we don't
+ * have them per-node and thus per-lruvec. Careful if memcg is
+ * runtime-disabled: we can get sc->memcg == NULL, which is ok
+ * for the lruvec, but not for memcg_page_state().
+ *
+ * Without memcg, use the zswap pool-wide metrics.
+ */
+ if (!mem_cgroup_disabled()) {
+ mem_cgroup_flush_stats(memcg);
+ nr_backing = memcg_page_state(memcg, MEMCG_ZSWAP_B) >> PAGE_SHIFT;
+ nr_stored = memcg_page_state(memcg, MEMCG_ZSWAPPED);
+ } else {
+ nr_backing = zswap_pool_total_size >> PAGE_SHIFT;
+ nr_stored = atomic_read(&zswap_nr_stored);
+ }
if (!nr_stored)
return 0;
swp_entry_t swp = folio->swap;
pgoff_t offset = swp_offset(swp);
struct page *page = &folio->page;
+ bool swapcache = folio_test_swapcache(folio);
struct zswap_tree *tree = swap_zswap_tree(swp);
struct zswap_entry *entry;
u8 *dst;
spin_unlock(&tree->lock);
return false;
}
- zswap_rb_erase(&tree->rbroot, entry);
+ /*
+ * When reading into the swapcache, invalidate our entry. The
+ * swapcache can be the authoritative owner of the page and
+ * its mappings, and the pressure that results from having two
+ * in-memory copies outweighs any benefits of caching the
+ * compression work.
+ *
+ * (Most swapins go through the swapcache. The notable
+ * exception is the singleton fault on SWP_SYNCHRONOUS_IO
+ * files, which reads into a private page and may free it if
+ * the fault fails. We remain the primary owner of the entry.)
+ */
+ if (swapcache)
+ zswap_rb_erase(&tree->rbroot, entry);
spin_unlock(&tree->lock);
if (entry->length)
if (entry->objcg)
count_objcg_event(entry->objcg, ZSWPIN);
- zswap_entry_free(entry);
-
- folio_mark_dirty(folio);
+ if (swapcache) {
+ zswap_entry_free(entry);
+ folio_mark_dirty(folio);
+ }
return true;
}
received = rsize;
}
- p9_debug(P9_DEBUG_9P, "<<< RREAD count %d\n", count);
+ p9_debug(P9_DEBUG_9P, "<<< RREAD count %d\n", received);
if (non_zc) {
int n = copy_to_iter(dataptr, received, to);
int total = 0;
*err = 0;
- p9_debug(P9_DEBUG_9P, ">>> TWRITE fid %d offset %llu count %zd\n",
- fid->fid, offset, iov_iter_count(from));
-
while (iov_iter_count(from)) {
int count = iov_iter_count(from);
int rsize = fid->iounit;
if (count < rsize)
rsize = count;
+ p9_debug(P9_DEBUG_9P, ">>> TWRITE fid %d offset %llu count %d (/%d)\n",
+ fid->fid, offset, rsize, count);
+
/* Don't bother zerocopy for small IO (< 1024) */
if (clnt->trans_mod->zc_request && rsize > 1024) {
req = p9_client_zc_rpc(clnt, P9_TWRITE, NULL, from, 0,
written = rsize;
}
- p9_debug(P9_DEBUG_9P, "<<< RWRITE count %d\n", count);
+ p9_debug(P9_DEBUG_9P, "<<< RWRITE count %d\n", written);
p9_req_put(clnt, req);
iov_iter_revert(from, count - written - iov_iter_count(from));
* @unsent_req_list: accounting for requests that haven't been sent
* @rreq: read request
* @wreq: write request
- * @req: current request being processed (if any)
* @tmp_buf: temporary buffer to read in header
* @rc: temporary fcall for reading current frame
* @wpos: write position for current frame
s->ax25_dev = NULL;
if (sk->sk_socket) {
netdev_put(ax25_dev->dev,
- &ax25_dev->dev_tracker);
+ &s->dev_tracker);
ax25_dev_put(ax25_dev);
}
ax25_cb_del(s);
spin_lock_bh(&ax25_dev_lock);
#ifdef CONFIG_AX25_DAMA_SLAVE
- ax25_ds_del_timer(ax25_dev);
+ timer_shutdown_sync(&ax25_dev->dama.slave_timer);
#endif
/*
spin_lock_bh(&bat_priv->tt.commit_lock);
- while (true) {
+ while (timeout) {
table_size = batadv_tt_local_table_transmit_size(bat_priv);
if (packet_size_max >= table_size)
break;
struct hci_conn *hci_connect_le(struct hci_dev *hdev, bdaddr_t *dst,
u8 dst_type, bool dst_resolved, u8 sec_level,
- u16 conn_timeout, u8 role)
+ u16 conn_timeout, u8 role, u8 phy, u8 sec_phy)
{
struct hci_conn *conn;
struct smp_irk *irk;
conn->dst_type = dst_type;
conn->sec_level = BT_SECURITY_LOW;
conn->conn_timeout = conn_timeout;
+ conn->le_adv_phy = phy;
+ conn->le_adv_sec_phy = sec_phy;
err = hci_connect_le_sync(hdev, conn);
if (err) {
le = hci_connect_le(hdev, dst, dst_type, false,
BT_SECURITY_LOW,
HCI_LE_CONN_TIMEOUT,
- HCI_ROLE_SLAVE);
+ HCI_ROLE_SLAVE, 0, 0);
else
le = hci_connect_le_scan(hdev, dst, dst_type,
BT_SECURITY_LOW,
cancel_delayed_work_sync(&hdev->ncmd_timer);
atomic_set(&hdev->cmd_cnt, 1);
- hci_cmd_sync_cancel_sync(hdev, -err);
+ hci_cmd_sync_cancel_sync(hdev, err);
}
/* Suspend HCI device */
return 0;
/* Cancel potentially blocking sync operation before suspend */
- hci_cancel_cmd_sync(hdev, -EHOSTDOWN);
+ hci_cancel_cmd_sync(hdev, EHOSTDOWN);
hci_req_sync_lock(hdev);
ret = hci_suspend_sync(hdev);
err = hci_send_frame(hdev, skb);
if (err < 0) {
- hci_cmd_sync_cancel_sync(hdev, err);
+ hci_cmd_sync_cancel_sync(hdev, -err);
return;
}
{
struct hci_dev *hdev = data;
- if (val == 0 || val > hdev->conn_info_max_age)
+ hci_dev_lock(hdev);
+ if (val == 0 || val > hdev->conn_info_max_age) {
+ hci_dev_unlock(hdev);
return -EINVAL;
+ }
- hci_dev_lock(hdev);
hdev->conn_info_min_age = val;
hci_dev_unlock(hdev);
{
struct hci_dev *hdev = data;
- if (val == 0 || val < hdev->conn_info_min_age)
+ hci_dev_lock(hdev);
+ if (val == 0 || val < hdev->conn_info_min_age) {
+ hci_dev_unlock(hdev);
return -EINVAL;
+ }
- hci_dev_lock(hdev);
hdev->conn_info_max_age = val;
hci_dev_unlock(hdev);
{
struct hci_dev *hdev = data;
- if (val == 0 || val % 2 || val > hdev->sniff_max_interval)
+ hci_dev_lock(hdev);
+ if (val == 0 || val % 2 || val > hdev->sniff_max_interval) {
+ hci_dev_unlock(hdev);
return -EINVAL;
+ }
- hci_dev_lock(hdev);
hdev->sniff_min_interval = val;
hci_dev_unlock(hdev);
{
struct hci_dev *hdev = data;
- if (val == 0 || val % 2 || val < hdev->sniff_min_interval)
+ hci_dev_lock(hdev);
+ if (val == 0 || val % 2 || val < hdev->sniff_min_interval) {
+ hci_dev_unlock(hdev);
return -EINVAL;
+ }
- hci_dev_lock(hdev);
hdev->sniff_max_interval = val;
hci_dev_unlock(hdev);
{
struct hci_dev *hdev = data;
- if (val < 0x0006 || val > 0x0c80 || val > hdev->le_conn_max_interval)
+ hci_dev_lock(hdev);
+ if (val < 0x0006 || val > 0x0c80 || val > hdev->le_conn_max_interval) {
+ hci_dev_unlock(hdev);
return -EINVAL;
+ }
- hci_dev_lock(hdev);
hdev->le_conn_min_interval = val;
hci_dev_unlock(hdev);
{
struct hci_dev *hdev = data;
- if (val < 0x0006 || val > 0x0c80 || val < hdev->le_conn_min_interval)
+ hci_dev_lock(hdev);
+ if (val < 0x0006 || val > 0x0c80 || val < hdev->le_conn_min_interval) {
+ hci_dev_unlock(hdev);
return -EINVAL;
+ }
- hci_dev_lock(hdev);
hdev->le_conn_max_interval = val;
hci_dev_unlock(hdev);
{
struct hci_dev *hdev = data;
- if (val < 0x0020 || val > 0x4000 || val > hdev->le_adv_max_interval)
+ hci_dev_lock(hdev);
+ if (val < 0x0020 || val > 0x4000 || val > hdev->le_adv_max_interval) {
+ hci_dev_unlock(hdev);
return -EINVAL;
+ }
- hci_dev_lock(hdev);
hdev->le_adv_min_interval = val;
hci_dev_unlock(hdev);
{
struct hci_dev *hdev = data;
- if (val < 0x0020 || val > 0x4000 || val < hdev->le_adv_min_interval)
+ hci_dev_lock(hdev);
+ if (val < 0x0020 || val > 0x4000 || val < hdev->le_adv_min_interval) {
+ hci_dev_unlock(hdev);
return -EINVAL;
+ }
- hci_dev_lock(hdev);
hdev->le_adv_max_interval = val;
hci_dev_unlock(hdev);
if (test_bit(HCI_ENCRYPT, &hdev->flags))
set_bit(HCI_CONN_ENCRYPT, &conn->flags);
+ /* "Link key request" completed ahead of "connect request" completes */
+ if (ev->encr_mode == 1 && !test_bit(HCI_CONN_ENCRYPT, &conn->flags) &&
+ ev->link_type == ACL_LINK) {
+ struct link_key *key;
+ struct hci_cp_read_enc_key_size cp;
+
+ key = hci_find_link_key(hdev, &ev->bdaddr);
+ if (key) {
+ set_bit(HCI_CONN_ENCRYPT, &conn->flags);
+
+ if (!read_key_size_capable(hdev)) {
+ conn->enc_key_size = HCI_LINK_KEY_SIZE;
+ } else {
+ cp.handle = cpu_to_le16(conn->handle);
+ if (hci_send_cmd(hdev, HCI_OP_READ_ENC_KEY_SIZE,
+ sizeof(cp), &cp)) {
+ bt_dev_err(hdev, "sending read key size failed");
+ conn->enc_key_size = HCI_LINK_KEY_SIZE;
+ }
+ }
+
+ hci_encrypt_cfm(conn, ev->status);
+ }
+ }
+
/* Get remote features */
if (conn->type == ACL_LINK) {
struct hci_cp_read_remote_features cp;
* controller really supports it. If it doesn't, assume
* the default size (16).
*/
- if (!(hdev->commands[20] & 0x10) ||
- test_bit(HCI_QUIRK_BROKEN_READ_ENC_KEY_SIZE, &hdev->quirks)) {
+ if (!read_key_size_capable(hdev)) {
conn->enc_key_size = HCI_LINK_KEY_SIZE;
goto notify;
}
static struct hci_conn *check_pending_le_conn(struct hci_dev *hdev,
bdaddr_t *addr,
u8 addr_type, bool addr_resolved,
- u8 adv_type)
+ u8 adv_type, u8 phy, u8 sec_phy)
{
struct hci_conn *conn;
struct hci_conn_params *params;
conn = hci_connect_le(hdev, addr, addr_type, addr_resolved,
BT_SECURITY_LOW, hdev->def_le_autoconnect_timeout,
- HCI_ROLE_MASTER);
+ HCI_ROLE_MASTER, phy, sec_phy);
if (!IS_ERR(conn)) {
/* If HCI_AUTO_CONN_EXPLICIT is set, conn is already owned
* by higher layer that tried to connect, if no then
static void process_adv_report(struct hci_dev *hdev, u8 type, bdaddr_t *bdaddr,
u8 bdaddr_type, bdaddr_t *direct_addr,
- u8 direct_addr_type, s8 rssi, u8 *data, u8 len,
- bool ext_adv, bool ctl_time, u64 instant)
+ u8 direct_addr_type, u8 phy, u8 sec_phy, s8 rssi,
+ u8 *data, u8 len, bool ext_adv, bool ctl_time,
+ u64 instant)
{
struct discovery_state *d = &hdev->discovery;
struct smp_irk *irk;
* for advertising reports) and is already verified to be RPA above.
*/
conn = check_pending_le_conn(hdev, bdaddr, bdaddr_type, bdaddr_resolved,
- type);
+ type, phy, sec_phy);
if (!ext_adv && conn && type == LE_ADV_IND &&
len <= max_adv_len(hdev)) {
/* Store report for later inclusion by
if (info->length <= max_adv_len(hdev)) {
rssi = info->data[info->length];
process_adv_report(hdev, info->type, &info->bdaddr,
- info->bdaddr_type, NULL, 0, rssi,
+ info->bdaddr_type, NULL, 0,
+ HCI_ADV_PHY_1M, 0, rssi,
info->data, info->length, false,
false, instant);
} else {
if (legacy_evt_type != LE_ADV_INVALID) {
process_adv_report(hdev, legacy_evt_type, &info->bdaddr,
info->bdaddr_type, NULL, 0,
+ info->primary_phy,
+ info->secondary_phy,
info->rssi, info->data, info->length,
!(evt_type & LE_EXT_ADV_LEGACY_PDU),
false, instant);
process_adv_report(hdev, info->type, &info->bdaddr,
info->bdaddr_type, &info->direct_addr,
- info->direct_addr_type, info->rssi, NULL, 0,
- false, false, instant);
+ info->direct_addr_type, HCI_ADV_PHY_1M, 0,
+ info->rssi, NULL, 0, false, false, instant);
}
hci_dev_unlock(hdev);
if (hdev->req_status == HCI_REQ_PEND) {
hdev->req_result = result;
hdev->req_status = HCI_REQ_DONE;
- if (skb)
+ if (skb) {
+ kfree_skb(hdev->req_skb);
hdev->req_skb = skb_get(skb);
+ }
wake_up_interruptible(&hdev->req_wait_q);
}
}
switch (optname) {
case HCI_DATA_DIR:
- if (copy_from_sockptr(&opt, optval, sizeof(opt))) {
- err = -EFAULT;
+ err = bt_copy_from_sockptr(&opt, sizeof(opt), optval, len);
+ if (err)
break;
- }
if (opt)
hci_pi(sk)->cmsg_mask |= HCI_CMSG_DIR;
break;
case HCI_TIME_STAMP:
- if (copy_from_sockptr(&opt, optval, sizeof(opt))) {
- err = -EFAULT;
+ err = bt_copy_from_sockptr(&opt, sizeof(opt), optval, len);
+ if (err)
break;
- }
if (opt)
hci_pi(sk)->cmsg_mask |= HCI_CMSG_TSTAMP;
uf.event_mask[1] = *((u32 *) f->event_mask + 1);
}
- len = min_t(unsigned int, len, sizeof(uf));
- if (copy_from_sockptr(&uf, optval, len)) {
- err = -EFAULT;
+ err = bt_copy_from_sockptr(&uf, sizeof(uf), optval, len);
+ if (err)
break;
- }
if (!capable(CAP_NET_RAW)) {
uf.type_mask &= hci_sec_filter.type_mask;
goto done;
}
- if (copy_from_sockptr(&opt, optval, sizeof(opt))) {
- err = -EFAULT;
+ err = bt_copy_from_sockptr(&opt, sizeof(opt), optval, len);
+ if (err)
break;
- }
hci_pi(sk)->mtu = opt;
break;
bt_dev_dbg(hdev, "err 0x%2.2x", err);
if (hdev->req_status == HCI_REQ_PEND) {
- hdev->req_result = err;
+ /* req_result is __u32 so error must be positive to be properly
+ * propagated.
+ */
+ hdev->req_result = err < 0 ? -err : err;
hdev->req_status = HCI_REQ_CANCELED;
wake_up_interruptible(&hdev->req_wait_q);
if (qos->bcast.in.phy & BT_ISO_PHY_CODED) {
cp->scanning_phys |= LE_SCAN_PHY_CODED;
hci_le_scan_phy_params(phy, type,
- interval,
- window);
+ interval * 3,
+ window * 3);
num_phy++;
phy++;
}
if (scan_coded(hdev)) {
cp->scanning_phys |= LE_SCAN_PHY_CODED;
- hci_le_scan_phy_params(phy, type, interval, window);
+ hci_le_scan_phy_params(phy, type, interval * 3, window * 3);
num_phy++;
phy++;
}
if (ret < 0 || !bacmp(&ba, BDADDR_ANY))
return;
- bacpy(&hdev->public_addr, &ba);
+ if (test_bit(HCI_QUIRK_BDADDR_PROPERTY_BROKEN, &hdev->quirks))
+ baswap(&hdev->public_addr, &ba);
+ else
+ bacpy(&hdev->public_addr, &ba);
}
struct hci_init_stage {
plen = sizeof(*cp);
- if (scan_1m(hdev)) {
+ if (scan_1m(hdev) && (conn->le_adv_phy == HCI_ADV_PHY_1M ||
+ conn->le_adv_sec_phy == HCI_ADV_PHY_1M)) {
cp->phys |= LE_SCAN_PHY_1M;
set_ext_conn_params(conn, p);
plen += sizeof(*p);
}
- if (scan_2m(hdev)) {
+ if (scan_2m(hdev) && (conn->le_adv_phy == HCI_ADV_PHY_2M ||
+ conn->le_adv_sec_phy == HCI_ADV_PHY_2M)) {
cp->phys |= LE_SCAN_PHY_2M;
set_ext_conn_params(conn, p);
plen += sizeof(*p);
}
- if (scan_coded(hdev)) {
+ if (scan_coded(hdev) && (conn->le_adv_phy == HCI_ADV_PHY_CODED ||
+ conn->le_adv_sec_phy == HCI_ADV_PHY_CODED)) {
cp->phys |= LE_SCAN_PHY_CODED;
set_ext_conn_params(conn, p);
static bool check_bcast_qos(struct bt_iso_qos *qos)
{
- if (qos->bcast.sync_factor == 0x00)
- return false;
+ if (!qos->bcast.sync_factor)
+ qos->bcast.sync_factor = 0x01;
if (qos->bcast.packing > 0x01)
return false;
if (qos->bcast.skip > 0x01f3)
return false;
+ if (!qos->bcast.sync_timeout)
+ qos->bcast.sync_timeout = BT_ISO_SYNC_TIMEOUT;
+
if (qos->bcast.sync_timeout < 0x000a || qos->bcast.sync_timeout > 0x4000)
return false;
if (qos->bcast.mse > 0x1f)
return false;
+ if (!qos->bcast.timeout)
+ qos->bcast.sync_timeout = BT_ISO_SYNC_TIMEOUT;
+
if (qos->bcast.timeout < 0x000a || qos->bcast.timeout > 0x4000)
return false;
sockptr_t optval, unsigned int optlen)
{
struct sock *sk = sock->sk;
- int len, err = 0;
+ int err = 0;
struct bt_iso_qos qos = default_qos;
u32 opt;
break;
}
- if (copy_from_sockptr(&opt, optval, sizeof(u32))) {
- err = -EFAULT;
+ err = bt_copy_from_sockptr(&opt, sizeof(opt), optval, optlen);
+ if (err)
break;
- }
if (opt)
set_bit(BT_SK_DEFER_SETUP, &bt_sk(sk)->flags);
break;
case BT_PKT_STATUS:
- if (copy_from_sockptr(&opt, optval, sizeof(u32))) {
- err = -EFAULT;
+ err = bt_copy_from_sockptr(&opt, sizeof(opt), optval, optlen);
+ if (err)
break;
- }
if (opt)
set_bit(BT_SK_PKT_STATUS, &bt_sk(sk)->flags);
break;
}
- len = min_t(unsigned int, sizeof(qos), optlen);
-
- if (copy_from_sockptr(&qos, optval, len)) {
- err = -EFAULT;
- break;
- }
-
- if (len == sizeof(qos.ucast) && !check_ucast_qos(&qos)) {
- err = -EINVAL;
+ err = bt_copy_from_sockptr(&qos, sizeof(qos), optval, optlen);
+ if (err)
break;
- }
iso_pi(sk)->qos = qos;
iso_pi(sk)->qos_user_set = true;
}
if (optlen > sizeof(iso_pi(sk)->base)) {
- err = -EOVERFLOW;
+ err = -EINVAL;
break;
}
- len = min_t(unsigned int, sizeof(iso_pi(sk)->base), optlen);
-
- if (copy_from_sockptr(iso_pi(sk)->base, optval, len)) {
- err = -EFAULT;
+ err = bt_copy_from_sockptr(iso_pi(sk)->base, optlen, optval,
+ optlen);
+ if (err)
break;
- }
- iso_pi(sk)->base_len = len;
+ iso_pi(sk)->base_len = optlen;
break;
return -EPROTO;
hci_dev_lock(hdev);
- if (hci_dev_test_flag(hdev, HCI_MGMT) &&
- !test_and_set_bit(HCI_CONN_MGMT_CONNECTED, &hcon->flags))
+ if (hci_dev_test_flag(hdev, HCI_MGMT))
mgmt_device_connected(hdev, hcon, NULL, 0);
hci_dev_unlock(hdev);
if (hci_dev_test_flag(hdev, HCI_ADVERTISING))
hcon = hci_connect_le(hdev, dst, dst_type, false,
chan->sec_level, timeout,
- HCI_ROLE_SLAVE);
+ HCI_ROLE_SLAVE, 0, 0);
else
hcon = hci_connect_le_scan(hdev, dst, dst_type,
chan->sec_level, timeout,
struct l2cap_chan *chan = l2cap_pi(sk)->chan;
struct l2cap_options opts;
struct l2cap_conninfo cinfo;
- int len, err = 0;
+ int err = 0;
+ size_t len;
u32 opt;
BT_DBG("sk %p", sk);
BT_DBG("mode 0x%2.2x", chan->mode);
- len = min_t(unsigned int, len, sizeof(opts));
+ len = min(len, sizeof(opts));
if (copy_to_user(optval, (char *) &opts, len))
err = -EFAULT;
cinfo.hci_handle = chan->conn->hcon->handle;
memcpy(cinfo.dev_class, chan->conn->hcon->dev_class, 3);
- len = min_t(unsigned int, len, sizeof(cinfo));
+ len = min(len, sizeof(cinfo));
if (copy_to_user(optval, (char *) &cinfo, len))
err = -EFAULT;
struct sock *sk = sock->sk;
struct l2cap_chan *chan = l2cap_pi(sk)->chan;
struct l2cap_options opts;
- int len, err = 0;
+ int err = 0;
u32 opt;
BT_DBG("sk %p", sk);
opts.max_tx = chan->max_tx;
opts.txwin_size = chan->tx_win;
- len = min_t(unsigned int, sizeof(opts), optlen);
- if (copy_from_sockptr(&opts, optval, len)) {
- err = -EFAULT;
+ err = bt_copy_from_sockptr(&opts, sizeof(opts), optval, optlen);
+ if (err)
break;
- }
if (opts.txwin_size > L2CAP_DEFAULT_EXT_WINDOW) {
err = -EINVAL;
break;
case L2CAP_LM:
- if (copy_from_sockptr(&opt, optval, sizeof(u32))) {
- err = -EFAULT;
+ err = bt_copy_from_sockptr(&opt, sizeof(opt), optval, optlen);
+ if (err)
break;
- }
if (opt & L2CAP_LM_FIPS) {
err = -EINVAL;
struct bt_security sec;
struct bt_power pwr;
struct l2cap_conn *conn;
- int len, err = 0;
+ int err = 0;
u32 opt;
u16 mtu;
u8 mode;
sec.level = BT_SECURITY_LOW;
- len = min_t(unsigned int, sizeof(sec), optlen);
- if (copy_from_sockptr(&sec, optval, len)) {
- err = -EFAULT;
+ err = bt_copy_from_sockptr(&sec, sizeof(sec), optval, optlen);
+ if (err)
break;
- }
if (sec.level < BT_SECURITY_LOW ||
sec.level > BT_SECURITY_FIPS) {
break;
}
- if (copy_from_sockptr(&opt, optval, sizeof(u32))) {
- err = -EFAULT;
+ err = bt_copy_from_sockptr(&opt, sizeof(opt), optval, optlen);
+ if (err)
break;
- }
if (opt) {
set_bit(BT_SK_DEFER_SETUP, &bt_sk(sk)->flags);
break;
case BT_FLUSHABLE:
- if (copy_from_sockptr(&opt, optval, sizeof(u32))) {
- err = -EFAULT;
+ err = bt_copy_from_sockptr(&opt, sizeof(opt), optval, optlen);
+ if (err)
break;
- }
if (opt > BT_FLUSHABLE_ON) {
err = -EINVAL;
pwr.force_active = BT_POWER_FORCE_ACTIVE_ON;
- len = min_t(unsigned int, sizeof(pwr), optlen);
- if (copy_from_sockptr(&pwr, optval, len)) {
- err = -EFAULT;
+ err = bt_copy_from_sockptr(&pwr, sizeof(pwr), optval, optlen);
+ if (err)
break;
- }
if (pwr.force_active)
set_bit(FLAG_FORCE_ACTIVE, &chan->flags);
break;
case BT_CHANNEL_POLICY:
- if (copy_from_sockptr(&opt, optval, sizeof(u32))) {
- err = -EFAULT;
+ err = bt_copy_from_sockptr(&opt, sizeof(opt), optval, optlen);
+ if (err)
break;
- }
err = -EOPNOTSUPP;
break;
break;
}
- if (copy_from_sockptr(&mtu, optval, sizeof(u16))) {
- err = -EFAULT;
+ err = bt_copy_from_sockptr(&mtu, sizeof(mtu), optval, optlen);
+ if (err)
break;
- }
if (chan->mode == L2CAP_MODE_EXT_FLOWCTL &&
sk->sk_state == BT_CONNECTED)
break;
}
- if (copy_from_sockptr(&mode, optval, sizeof(u8))) {
- err = -EFAULT;
+ err = bt_copy_from_sockptr(&mode, sizeof(mode), optval, optlen);
+ if (err)
break;
- }
BT_DBG("mode %u", mode);
goto failed;
}
- err = hci_cmd_sync_queue(hdev, add_uuid_sync, cmd, mgmt_class_complete);
+ /* MGMT_OP_ADD_UUID don't require adapter the UP/Running so use
+ * hci_cmd_sync_submit instead of hci_cmd_sync_queue.
+ */
+ err = hci_cmd_sync_submit(hdev, add_uuid_sync, cmd,
+ mgmt_class_complete);
if (err < 0) {
mgmt_pending_free(cmd);
goto failed;
goto unlock;
}
- err = hci_cmd_sync_queue(hdev, remove_uuid_sync, cmd,
- mgmt_class_complete);
+ /* MGMT_OP_REMOVE_UUID don't require adapter the UP/Running so use
+ * hci_cmd_sync_submit instead of hci_cmd_sync_queue.
+ */
+ err = hci_cmd_sync_submit(hdev, remove_uuid_sync, cmd,
+ mgmt_class_complete);
if (err < 0)
mgmt_pending_free(cmd);
goto unlock;
}
- err = hci_cmd_sync_queue(hdev, set_class_sync, cmd,
- mgmt_class_complete);
+ /* MGMT_OP_SET_DEV_CLASS don't require adapter the UP/Running so use
+ * hci_cmd_sync_submit instead of hci_cmd_sync_queue.
+ */
+ err = hci_cmd_sync_submit(hdev, set_class_sync, cmd,
+ mgmt_class_complete);
if (err < 0)
mgmt_pending_free(cmd);
goto unlock;
}
- err = hci_cmd_sync_queue(hdev, mgmt_remove_adv_monitor_sync, cmd,
- mgmt_remove_adv_monitor_complete);
+ err = hci_cmd_sync_submit(hdev, mgmt_remove_adv_monitor_sync, cmd,
+ mgmt_remove_adv_monitor_complete);
if (err) {
mgmt_pending_remove(cmd);
switch (optname) {
case RFCOMM_LM:
- if (copy_from_sockptr(&opt, optval, sizeof(u32))) {
+ if (bt_copy_from_sockptr(&opt, sizeof(opt), optval, optlen)) {
err = -EFAULT;
break;
}
struct sock *sk = sock->sk;
struct bt_security sec;
int err = 0;
- size_t len;
u32 opt;
BT_DBG("sk %p", sk);
sec.level = BT_SECURITY_LOW;
- len = min_t(unsigned int, sizeof(sec), optlen);
- if (copy_from_sockptr(&sec, optval, len)) {
- err = -EFAULT;
+ err = bt_copy_from_sockptr(&sec, sizeof(sec), optval, optlen);
+ if (err)
break;
- }
if (sec.level > BT_SECURITY_HIGH) {
err = -EINVAL;
break;
}
- if (copy_from_sockptr(&opt, optval, sizeof(u32))) {
- err = -EFAULT;
+ err = bt_copy_from_sockptr(&opt, sizeof(opt), optval, optlen);
+ if (err)
break;
- }
if (opt)
set_bit(BT_SK_DEFER_SETUP, &bt_sk(sk)->flags);
sockptr_t optval, unsigned int optlen)
{
struct sock *sk = sock->sk;
- int len, err = 0;
+ int err = 0;
struct bt_voice voice;
u32 opt;
struct bt_codecs *codecs;
break;
}
- if (copy_from_sockptr(&opt, optval, sizeof(u32))) {
- err = -EFAULT;
+ err = bt_copy_from_sockptr(&opt, sizeof(opt), optval, optlen);
+ if (err)
break;
- }
if (opt)
set_bit(BT_SK_DEFER_SETUP, &bt_sk(sk)->flags);
voice.setting = sco_pi(sk)->setting;
- len = min_t(unsigned int, sizeof(voice), optlen);
- if (copy_from_sockptr(&voice, optval, len)) {
- err = -EFAULT;
+ err = bt_copy_from_sockptr(&voice, sizeof(voice), optval,
+ optlen);
+ if (err)
break;
- }
/* Explicitly check for these values */
if (voice.setting != BT_VOICE_TRANSPARENT &&
break;
case BT_PKT_STATUS:
- if (copy_from_sockptr(&opt, optval, sizeof(u32))) {
- err = -EFAULT;
+ err = bt_copy_from_sockptr(&opt, sizeof(opt), optval, optlen);
+ if (err)
break;
- }
if (opt)
set_bit(BT_SK_PKT_STATUS, &bt_sk(sk)->flags);
break;
}
- if (copy_from_sockptr(buffer, optval, optlen)) {
+ err = bt_copy_from_sockptr(buffer, optlen, optval, optlen);
+ if (err) {
hci_dev_put(hdev);
- err = -EFAULT;
break;
}
struct sock *sk = sock->sk;
struct sco_options opts;
struct sco_conninfo cinfo;
- int len, err = 0;
+ int err = 0;
+ size_t len;
BT_DBG("sk %p", sk);
BT_DBG("mtu %u", opts.mtu);
- len = min_t(unsigned int, len, sizeof(opts));
+ len = min(len, sizeof(opts));
if (copy_to_user(optval, (char *)&opts, len))
err = -EFAULT;
cinfo.hci_handle = sco_pi(sk)->conn->hcon->handle;
memcpy(cinfo.dev_class, sco_pi(sk)->conn->hcon->dev_class, 3);
- len = min_t(unsigned int, len, sizeof(cinfo));
+ len = min(len, sizeof(cinfo));
if (copy_to_user(optval, (char *)&cinfo, len))
err = -EFAULT;
return netif_receive_skb(skb);
}
-static int br_pass_frame_up(struct sk_buff *skb)
+static int br_pass_frame_up(struct sk_buff *skb, bool promisc)
{
struct net_device *indev, *brdev = BR_INPUT_SKB_CB(skb)->brdev;
struct net_bridge *br = netdev_priv(brdev);
br_multicast_count(br, NULL, skb, br_multicast_igmp_type(skb),
BR_MCAST_DIR_TX);
+ BR_INPUT_SKB_CB(skb)->promisc = promisc;
+
return NF_HOOK(NFPROTO_BRIDGE, NF_BR_LOCAL_IN,
dev_net(indev), NULL, skb, indev, NULL,
br_netif_receive_skb);
struct net_bridge_mcast *brmctx;
struct net_bridge_vlan *vlan;
struct net_bridge *br;
+ bool promisc;
u16 vid = 0;
u8 state;
if (p->flags & BR_LEARNING)
br_fdb_update(br, p, eth_hdr(skb)->h_source, vid, 0);
- local_rcv = !!(br->dev->flags & IFF_PROMISC);
+ promisc = !!(br->dev->flags & IFF_PROMISC);
+ local_rcv = promisc;
+
if (is_multicast_ether_addr(eth_hdr(skb)->h_dest)) {
/* by definition the broadcast is also a multicast address */
if (is_broadcast_ether_addr(eth_hdr(skb)->h_dest)) {
unsigned long now = jiffies;
if (test_bit(BR_FDB_LOCAL, &dst->flags))
- return br_pass_frame_up(skb);
+ return br_pass_frame_up(skb, false);
if (now != dst->used)
dst->used = now;
}
if (local_rcv)
- return br_pass_frame_up(skb);
+ return br_pass_frame_up(skb, promisc);
out:
return 0;
goto forward;
}
+ BR_INPUT_SKB_CB(skb)->promisc = false;
+
/* The else clause should be hit when nf_hook():
* - returns < 0 (drop/error)
* - returns = 0 (stolen/nf_queue)
struct sk_buff *skb,
const struct nf_hook_state *state)
{
+ bool promisc = BR_INPUT_SKB_CB(skb)->promisc;
struct nf_conntrack *nfct = skb_nfct(skb);
const struct nf_ct_hook *ct_hook;
struct nf_conn *ct;
int ret;
+ if (promisc) {
+ nf_reset_ct(skb);
+ return NF_ACCEPT;
+ }
+
if (!nfct || skb->pkt_type == PACKET_HOST)
return NF_ACCEPT;
{
u32 filter = RTEXT_FILTER_BRVLAN_COMPRESSED;
- return br_info_notify(event, br, port, filter);
+ br_info_notify(event, br, port, filter);
}
/*
#endif
u8 proxyarp_replied:1;
u8 src_port_isolated:1;
+ u8 promisc:1;
#ifdef CONFIG_BRIDGE_VLAN_FILTERING
u8 vlan_filtered:1;
#endif
struct ebt_table_info *newinfo;
struct ebt_replace tmp;
+ if (len < sizeof(tmp))
+ return -EINVAL;
if (copy_from_sockptr(&tmp, arg, sizeof(tmp)) != 0)
return -EFAULT;
{
struct ebt_replace hlp;
+ if (len < sizeof(hlp))
+ return -EINVAL;
if (copy_from_sockptr(&hlp, arg, sizeof(hlp)))
return -EFAULT;
{
struct compat_ebt_replace hlp;
+ if (len < sizeof(hlp))
+ return -EINVAL;
if (copy_from_sockptr(&hlp, arg, sizeof(hlp)))
return -EFAULT;
static unsigned int nf_ct_bridge_in(void *priv, struct sk_buff *skb,
const struct nf_hook_state *state)
{
- enum ip_conntrack_info ctinfo;
+ bool promisc = BR_INPUT_SKB_CB(skb)->promisc;
+ struct nf_conntrack *nfct = skb_nfct(skb);
struct nf_conn *ct;
- if (skb->pkt_type == PACKET_HOST)
+ if (promisc) {
+ nf_reset_ct(skb);
+ return NF_ACCEPT;
+ }
+
+ if (!nfct || skb->pkt_type == PACKET_HOST)
return NF_ACCEPT;
/* nf_conntrack_confirm() cannot handle concurrent clones,
* this happens for broad/multicast frames with e.g. macvlan on top
* of the bridge device.
*/
- ct = nf_ct_get(skb, &ctinfo);
- if (!ct || nf_ct_is_confirmed(ct) || nf_ct_is_template(ct))
+ ct = container_of(nfct, struct nf_conn, ct_general);
+ if (nf_ct_is_confirmed(ct) || nf_ct_is_template(ct))
return NF_ACCEPT;
/* let inet prerouting call conntrack again */
* PP consumers must pay attention to run APIs in the appropriate context
* (e.g. NAPI context).
*/
-static DEFINE_PER_CPU_ALIGNED(struct page_pool *, system_page_pool);
+static DEFINE_PER_CPU(struct page_pool *, system_page_pool);
#ifdef CONFIG_LOCKDEP
/*
return rc;
}
+ if (unlikely(READ_ONCE(q->owner) == smp_processor_id())) {
+ kfree_skb_reason(skb, SKB_DROP_REASON_TC_RECLASSIFY_LOOP);
+ return NET_XMIT_DROP;
+ }
/*
* Heuristic to force contended enqueues to serialize on a
* separate lock before trying to get qdisc main lock.
qdisc_run_end(q);
rc = NET_XMIT_SUCCESS;
} else {
+ WRITE_ONCE(q->owner, smp_processor_id());
rc = dev_qdisc_enqueue(skb, q, &to_free, txq);
+ WRITE_ONCE(q->owner, -1);
if (qdisc_run_begin(q)) {
if (unlikely(contended)) {
spin_unlock(&q->busylock);
}
merge:
- /* sk owenrship - if any - completely transferred to the aggregated packet */
+ /* sk ownership - if any - completely transferred to the aggregated packet */
skb->destructor = NULL;
+ skb->sk = NULL;
delta_truesize = skb->truesize;
if (offset > headlen) {
unsigned int eat = offset - headlen;
unsigned long flags;
struct sk_buff_head *list = &sk->sk_receive_queue;
- if (atomic_read(&sk->sk_rmem_alloc) >= sk->sk_rcvbuf) {
+ if (atomic_read(&sk->sk_rmem_alloc) >= READ_ONCE(sk->sk_rcvbuf)) {
atomic_inc(&sk->sk_drops);
trace_sock_rcvqueue_full(sk, skb);
return -ENOMEM;
skb->dev = NULL;
- if (sk_rcvqueues_full(sk, sk->sk_rcvbuf)) {
+ if (sk_rcvqueues_full(sk, READ_ONCE(sk->sk_rcvbuf))) {
atomic_inc(&sk->sk_drops);
goto discard_and_relse;
}
struct sock *sk;
int err = 0;
+ if (irqs_disabled())
+ return -EOPNOTSUPP; /* locks here are hardirq-unsafe */
+
spin_lock_bh(&stab->lock);
sk = *psk;
if (!sk_test || sk_test == sk)
struct bpf_shtab_elem *elem;
int ret = -ENOENT;
+ if (irqs_disabled())
+ return -EOPNOTSUPP; /* locks here are hardirq-unsafe */
+
hash = sock_hash_bucket_hash(key, key_size);
bucket = sock_hash_select_bucket(htab, hash);
eth = (struct ethhdr *)skb->data;
skb_pull_inline(skb, ETH_HLEN);
- if (unlikely(!ether_addr_equal_64bits(eth->h_dest,
- dev->dev_addr))) {
- if (unlikely(is_multicast_ether_addr_64bits(eth->h_dest))) {
- if (ether_addr_equal_64bits(eth->h_dest, dev->broadcast))
- skb->pkt_type = PACKET_BROADCAST;
- else
- skb->pkt_type = PACKET_MULTICAST;
- } else {
- skb->pkt_type = PACKET_OTHERHOST;
- }
- }
+ eth_skb_pkt_type(skb, dev);
/*
* Some variants of DSA tagging don't have an ethertype field
{
struct hsr_priv *hsr;
struct hsr_port *port;
- char designation;
+ const char *designation = NULL;
hsr = netdev_priv(dev);
- designation = '\0';
hsr_for_each_port(hsr, port) {
if (port->type == HSR_PT_MASTER)
continue;
switch (port->type) {
case HSR_PT_SLAVE_A:
- designation = 'A';
+ designation = "Slave A";
break;
case HSR_PT_SLAVE_B:
- designation = 'B';
+ designation = "Slave B";
break;
default:
- designation = '?';
+ designation = "Unknown";
}
if (!is_slave_up(port->dev))
- netdev_warn(dev, "Slave %c (%s) is not up; please bring it up to get a fully working HSR network\n",
+ netdev_warn(dev, "%s (%s) is not up; please bring it up to get a fully working HSR network\n",
designation, port->dev->name);
}
- if (designation == '\0')
+ if (!designation)
netdev_warn(dev, "No slave devices configured\n");
return 0;
netdev_update_features(master->dev);
dev_set_mtu(master->dev, hsr_get_max_mtu(hsr));
netdev_rx_handler_unregister(port->dev);
- dev_set_promiscuity(port->dev, -1);
+ if (!port->hsr->fwd_offloaded)
+ dev_set_promiscuity(port->dev, -1);
netdev_upper_dev_unlink(port->dev, master->dev);
}
e++;
}
}
+
+ /* Don't let NLM_DONE coalesce into a message, even if it could.
+ * Some user space expects NLM_DONE in a separate recv().
+ */
+ err = skb->len;
out:
cb->args[1] = e;
#include <net/inet_common.h>
#include <net/ip_fib.h>
#include <net/l3mdev.h>
+#include <net/addrconf.h>
/*
* Build xmit assembly blocks
struct icmp_ext_hdr *ext_hdr, _ext_hdr;
struct icmp_ext_echo_iio *iio, _iio;
struct net *net = dev_net(skb->dev);
+ struct inet6_dev *in6_dev;
+ struct in_device *in_dev;
struct net_device *dev;
char buff[IFNAMSIZ];
u16 ident_len;
/* Fill bits in reply message */
if (dev->flags & IFF_UP)
status |= ICMP_EXT_ECHOREPLY_ACTIVE;
- if (__in_dev_get_rcu(dev) && __in_dev_get_rcu(dev)->ifa_list)
+
+ in_dev = __in_dev_get_rcu(dev);
+ if (in_dev && rcu_access_pointer(in_dev->ifa_list))
status |= ICMP_EXT_ECHOREPLY_IPV4;
- if (!list_empty(&rcu_dereference(dev->ip6_ptr)->addr_list))
+
+ in6_dev = __in6_dev_get(dev);
+ if (in6_dev && !list_empty(&in6_dev->addr_list))
status |= ICMP_EXT_ECHOREPLY_IPV6;
+
dev_put(dev);
icmphdr->un.echo.sequence |= htons(status);
return true;
kuid_t sk_uid, bool relax,
bool reuseport_cb_ok, bool reuseport_ok)
{
- if (sk->sk_family == AF_INET && ipv6_only_sock(sk2))
- return false;
+ if (ipv6_only_sock(sk2)) {
+ if (sk->sk_family == AF_INET)
+ return false;
+
+#if IS_ENABLED(CONFIG_IPV6)
+ if (ipv6_addr_v4mapped(&sk->sk_v6_rcv_saddr))
+ return false;
+#endif
+ }
return inet_bind_conflict(sk, sk2, sk_uid, relax,
reuseport_cb_ok, reuseport_ok);
struct sock_reuseport *reuseport_cb;
struct inet_bind_hashbucket *head2;
struct inet_bind2_bucket *tb2;
+ bool conflict = false;
bool reuseport_cb_ok;
rcu_read_lock();
spin_lock(&head2->lock);
- inet_bind_bucket_for_each(tb2, &head2->chain)
- if (inet_bind2_bucket_match_addr_any(tb2, net, port, l3mdev, sk))
- break;
+ inet_bind_bucket_for_each(tb2, &head2->chain) {
+ if (!inet_bind2_bucket_match_addr_any(tb2, net, port, l3mdev, sk))
+ continue;
- if (tb2 && inet_bhash2_conflict(sk, tb2, uid, relax, reuseport_cb_ok,
- reuseport_ok)) {
- spin_unlock(&head2->lock);
- return true;
+ if (!inet_bhash2_conflict(sk, tb2, uid, relax, reuseport_cb_ok, reuseport_ok))
+ continue;
+
+ conflict = true;
+ break;
}
spin_unlock(&head2->lock);
- return false;
+
+ return conflict;
}
/*
}
EXPORT_SYMBOL(inet_csk_clear_xmit_timers);
+void inet_csk_clear_xmit_timers_sync(struct sock *sk)
+{
+ struct inet_connection_sock *icsk = inet_csk(sk);
+
+ /* ongoing timer handlers need to acquire socket lock. */
+ sock_not_owned_by_me(sk);
+
+ icsk->icsk_pending = icsk->icsk_ack.pending = 0;
+
+ sk_stop_timer_sync(sk, &icsk->icsk_retransmit_timer);
+ sk_stop_timer_sync(sk, &icsk->icsk_delack_timer);
+ sk_stop_timer_sync(sk, &sk->sk_timer);
+}
+
void inet_csk_delete_keepalive_timer(struct sock *sk)
{
sk_stop_timer(sk, &sk->sk_timer);
#include <net/ip.h>
#include <net/ipv6.h>
+#include "../core/sock_destructor.h"
+
/* Use skb->cb to track consecutive/adjacent fragments coming at
* the end of the queue. Nodes in the rb-tree queue will
* contain "runs" of one or more adjacent fragments.
};
struct sk_buff *next_frag;
int frag_run_len;
+ int ip_defrag_offset;
};
#define FRAG_CB(skb) ((struct ipfrag_skb_cb *)((skb)->cb))
*/
if (!last)
fragrun_create(q, skb); /* First fragment. */
- else if (last->ip_defrag_offset + last->len < end) {
+ else if (FRAG_CB(last)->ip_defrag_offset + last->len < end) {
/* This is the common case: skb goes to the end. */
/* Detect and discard overlaps. */
- if (offset < last->ip_defrag_offset + last->len)
+ if (offset < FRAG_CB(last)->ip_defrag_offset + last->len)
return IPFRAG_OVERLAP;
- if (offset == last->ip_defrag_offset + last->len)
+ if (offset == FRAG_CB(last)->ip_defrag_offset + last->len)
fragrun_append_to_last(q, skb);
else
fragrun_create(q, skb);
parent = *rbn;
curr = rb_to_skb(parent);
- curr_run_end = curr->ip_defrag_offset +
+ curr_run_end = FRAG_CB(curr)->ip_defrag_offset +
FRAG_CB(curr)->frag_run_len;
- if (end <= curr->ip_defrag_offset)
+ if (end <= FRAG_CB(curr)->ip_defrag_offset)
rbn = &parent->rb_left;
else if (offset >= curr_run_end)
rbn = &parent->rb_right;
- else if (offset >= curr->ip_defrag_offset &&
+ else if (offset >= FRAG_CB(curr)->ip_defrag_offset &&
end <= curr_run_end)
return IPFRAG_DUP;
else
rb_insert_color(&skb->rbnode, &q->rb_fragments);
}
- skb->ip_defrag_offset = offset;
+ FRAG_CB(skb)->ip_defrag_offset = offset;
return IPFRAG_OK;
}
struct sk_buff *parent)
{
struct sk_buff *fp, *head = skb_rb_first(&q->rb_fragments);
- struct sk_buff **nextp;
+ void (*destructor)(struct sk_buff *);
+ unsigned int orig_truesize = 0;
+ struct sk_buff **nextp = NULL;
+ struct sock *sk = skb->sk;
int delta;
+ if (sk && is_skb_wmem(skb)) {
+ /* TX: skb->sk might have been passed as argument to
+ * dst->output and must remain valid until tx completes.
+ *
+ * Move sk to reassembled skb and fix up wmem accounting.
+ */
+ orig_truesize = skb->truesize;
+ destructor = skb->destructor;
+ }
+
if (head != skb) {
fp = skb_clone(skb, GFP_ATOMIC);
- if (!fp)
- return NULL;
+ if (!fp) {
+ head = skb;
+ goto out_restore_sk;
+ }
FRAG_CB(fp)->next_frag = FRAG_CB(skb)->next_frag;
if (RB_EMPTY_NODE(&skb->rbnode))
FRAG_CB(parent)->next_frag = fp;
&q->rb_fragments);
if (q->fragments_tail == skb)
q->fragments_tail = fp;
+
+ if (orig_truesize) {
+ /* prevent skb_morph from releasing sk */
+ skb->sk = NULL;
+ skb->destructor = NULL;
+ }
skb_morph(skb, head);
FRAG_CB(skb)->next_frag = FRAG_CB(head)->next_frag;
rb_replace_node(&head->rbnode, &skb->rbnode,
consume_skb(head);
head = skb;
}
- WARN_ON(head->ip_defrag_offset != 0);
+ WARN_ON(FRAG_CB(head)->ip_defrag_offset != 0);
delta = -head->truesize;
/* Head of list must not be cloned. */
if (skb_unclone(head, GFP_ATOMIC))
- return NULL;
+ goto out_restore_sk;
delta += head->truesize;
if (delta)
clone = alloc_skb(0, GFP_ATOMIC);
if (!clone)
- return NULL;
+ goto out_restore_sk;
skb_shinfo(clone)->frag_list = skb_shinfo(head)->frag_list;
skb_frag_list_init(head);
for (i = 0; i < skb_shinfo(head)->nr_frags; i++)
nextp = &skb_shinfo(head)->frag_list;
}
+out_restore_sk:
+ if (orig_truesize) {
+ int ts_delta = head->truesize - orig_truesize;
+
+ /* if this reassembled skb is fragmented later,
+ * fraglist skbs will get skb->sk assigned from head->sk,
+ * and each frag skb will be released via sock_wfree.
+ *
+ * Update sk_wmem_alloc.
+ */
+ head->sk = sk;
+ head->destructor = destructor;
+ refcount_add(ts_delta, &sk->sk_wmem_alloc);
+ }
+
return nextp;
}
EXPORT_SYMBOL(inet_frag_reasm_prepare);
void inet_frag_reasm_finish(struct inet_frag_queue *q, struct sk_buff *head,
void *reasm_data, bool try_coalesce)
{
+ struct sock *sk = is_skb_wmem(head) ? head->sk : NULL;
+ const unsigned int head_truesize = head->truesize;
struct sk_buff **nextp = reasm_data;
struct rb_node *rbn;
struct sk_buff *fp;
head->prev = NULL;
head->tstamp = q->stamp;
head->mono_delivery_time = q->mono_delivery_time;
+
+ if (sk)
+ refcount_add(sum_truesize - head_truesize, &sk->sk_wmem_alloc);
}
EXPORT_SYMBOL(inet_frag_reasm_finish);
}
skb_dst_drop(skb);
+ skb_orphan(skb);
return -EINPROGRESS;
insert_error:
struct ipq *qp;
__IP_INC_STATS(net, IPSTATS_MIB_REASMREQDS);
- skb_orphan(skb);
/* Lookup (or create) queue header */
qp = ip_find(net, ip_hdr(skb), user, vif);
tpi->flags | TUNNEL_NO_KEY,
iph->saddr, iph->daddr, 0);
} else {
+ if (unlikely(!pskb_may_pull(skb,
+ gre_hdr_len + sizeof(*ershdr))))
+ return PACKET_REJECT;
+
ershdr = (struct erspan_base_hdr *)(skb->data + gre_hdr_len);
ver = ershdr->ver;
+ iph = ip_hdr(skb);
tunnel = ip_tunnel_lookup(itn, skb->dev->ifindex,
tpi->flags | TUNNEL_KEY,
iph->saddr, iph->daddr, tpi->key);
config IP_NF_ARPFILTER
tristate "arptables-legacy packet filtering support"
select IP_NF_ARPTABLES
+ select NETFILTER_FAMILY_ARP
depends on NETFILTER_XTABLES
help
ARP packet filtering defines a table `filter', which has a series of
void *loc_cpu_entry;
struct arpt_entry *iter;
+ if (len < sizeof(tmp))
+ return -EINVAL;
if (copy_from_sockptr(&tmp, arg, sizeof(tmp)) != 0)
return -EFAULT;
return -ENOMEM;
if (tmp.num_counters == 0)
return -EINVAL;
+ if ((u64)len < (u64)tmp.size + sizeof(tmp))
+ return -EINVAL;
tmp.name[sizeof(tmp.name)-1] = 0;
void *loc_cpu_entry;
struct arpt_entry *iter;
+ if (len < sizeof(tmp))
+ return -EINVAL;
if (copy_from_sockptr(&tmp, arg, sizeof(tmp)) != 0)
return -EFAULT;
return -ENOMEM;
if (tmp.num_counters == 0)
return -EINVAL;
+ if ((u64)len < (u64)tmp.size + sizeof(tmp))
+ return -EINVAL;
tmp.name[sizeof(tmp.name)-1] = 0;
void *loc_cpu_entry;
struct ipt_entry *iter;
+ if (len < sizeof(tmp))
+ return -EINVAL;
if (copy_from_sockptr(&tmp, arg, sizeof(tmp)) != 0)
return -EFAULT;
return -ENOMEM;
if (tmp.num_counters == 0)
return -EINVAL;
+ if ((u64)len < (u64)tmp.size + sizeof(tmp))
+ return -EINVAL;
tmp.name[sizeof(tmp.name)-1] = 0;
void *loc_cpu_entry;
struct ipt_entry *iter;
+ if (len < sizeof(tmp))
+ return -EINVAL;
if (copy_from_sockptr(&tmp, arg, sizeof(tmp)) != 0)
return -EFAULT;
return -ENOMEM;
if (tmp.num_counters == 0)
return -EINVAL;
+ if ((u64)len < (u64)tmp.size + sizeof(tmp))
+ return -EINVAL;
tmp.name[sizeof(tmp.name)-1] = 0;
struct net *net = nh->net;
int err;
- if (nexthop_notifiers_is_empty(net))
+ if (nexthop_notifiers_is_empty(net)) {
+ *hw_stats_used = false;
return 0;
+ }
err = nh_notifier_grp_hw_stats_init(&info, nh);
if (err)
icmp_send(skb, ICMP_REDIRECT, ICMP_REDIR_HOST, gw);
peer->rate_last = jiffies;
++peer->n_redirects;
-#ifdef CONFIG_IP_ROUTE_VERBOSE
- if (log_martians &&
+ if (IS_ENABLED(CONFIG_IP_ROUTE_VERBOSE) && log_martians &&
peer->n_redirects == ip_rt_redirect_number)
net_warn_ratelimited("host %pI4/if%d ignores redirects for %pI4 to %pI4\n",
&ip_hdr(skb)->saddr, inet_iif(skb),
&ip_hdr(skb)->daddr, &gw);
-#endif
}
out_put_peer:
inet_putpeer(peer);
int err = -EINVAL;
u32 tag = 0;
+ if (!in_dev)
+ return -EINVAL;
+
if (ipv4_is_multicast(saddr) || ipv4_is_lbcast(saddr))
goto martian_source;
lock_sock(sk);
__tcp_close(sk, timeout);
release_sock(sk);
+ if (!sk->sk_net_refcnt)
+ inet_csk_clear_xmit_timers_sync(sk);
sock_put(sk);
}
EXPORT_SYMBOL(tcp_close);
{
struct tcp_sock *tp = tcp_sk(sk);
struct tcp_ao_info *ao_info;
+ struct hlist_node *next;
union tcp_ao_addr *addr;
struct tcp_ao_key *key;
int family, l3index;
l3index = l3mdev_master_ifindex_by_index(sock_net(sk),
sk->sk_bound_dev_if);
- hlist_for_each_entry_rcu(key, &ao_info->head, node) {
+ hlist_for_each_entry_safe(key, next, &ao_info->head, node) {
if (!tcp_ao_key_cmp(key, l3index, addr, key->prefixlen, family, -1, -1))
continue;
}
DEFINE_STATIC_KEY_FALSE(udp_encap_needed_key);
+EXPORT_SYMBOL(udp_encap_needed_key);
+
+#if IS_ENABLED(CONFIG_IPV6)
+DEFINE_STATIC_KEY_FALSE(udpv6_encap_needed_key);
+EXPORT_SYMBOL(udpv6_encap_needed_key);
+#endif
+
void udp_encap_enable(void)
{
static_branch_inc(&udp_encap_needed_key);
if (msg->msg_controllen) {
err = udp_cmsg_send(sk, msg, &ipc.gso_size);
- if (err > 0)
+ if (err > 0) {
err = ip_cmsg_send(sk, msg, &ipc,
sk->sk_family == AF_INET6);
+ connected = 0;
+ }
if (unlikely(err < 0)) {
kfree(ipc.opt);
return err;
}
if (ipc.opt)
free = 1;
- connected = 0;
}
if (!ipc.opt) {
struct ip_options_rcu *inet_opt;
NAPI_GRO_CB(p)->count++;
p->data_len += skb->len;
- /* sk owenrship - if any - completely transferred to the aggregated packet */
+ /* sk ownership - if any - completely transferred to the aggregated packet */
skb->destructor = NULL;
+ skb->sk = NULL;
p->truesize += skb->truesize;
p->len += skb->len;
unsigned int off = skb_gro_offset(skb);
int flush = 1;
- /* we can do L4 aggregation only if the packet can't land in a tunnel
- * otherwise we could corrupt the inner stream
+ /* We can do L4 aggregation only if the packet can't land in a tunnel
+ * otherwise we could corrupt the inner stream. Detecting such packets
+ * cannot be foolproof and the aggregation might still happen in some
+ * cases. Such packets should be caught in udp_unexpected_gso later.
*/
NAPI_GRO_CB(skb)->is_flist = 0;
if (!sk || !udp_sk(sk)->gro_receive) {
+ /* If the packet was locally encapsulated in a UDP tunnel that
+ * wasn't detected above, do not GRO.
+ */
+ if (skb->encapsulation)
+ goto out;
+
if (skb->dev->features & NETIF_F_GRO_FRAGLIST)
NAPI_GRO_CB(skb)->is_flist = sk ? !udp_test_bit(GRO_ENABLED, sk) : 1;
skb_shinfo(skb)->gso_type |= (SKB_GSO_FRAGLIST|SKB_GSO_UDP_L4);
skb_shinfo(skb)->gso_segs = NAPI_GRO_CB(skb)->count;
- if (skb->ip_summed == CHECKSUM_UNNECESSARY) {
- if (skb->csum_level < SKB_MAX_CSUM_LEVEL)
- skb->csum_level++;
- } else {
- skb->ip_summed = CHECKSUM_UNNECESSARY;
- skb->csum_level = 0;
- }
+ __skb_incr_checksum_unnecessary(skb);
return 0;
}
if (ipv6_addr_equal(&ifp->addr, addr)) {
if (!dev || ifp->idev->dev == dev ||
!(ifp->scope&(IFA_LINK|IFA_HOST) || strict)) {
- result = ifp;
- in6_ifa_hold(ifp);
- break;
+ if (in6_ifa_hold_safe(ifp)) {
+ result = ifp;
+ break;
+ }
}
}
}
err = 0;
if (fillargs.ifindex) {
- err = -ENODEV;
dev = dev_get_by_index_rcu(tgt_net, fillargs.ifindex);
- if (!dev)
+ if (!dev) {
+ err = -ENODEV;
goto done;
+ }
idev = __in6_dev_get(dev);
if (idev)
err = in6_dump_addrs(idev, skb, cb,
if (!w) {
/* New dump:
*
- * 1. hook callback destructor.
- */
- cb->args[3] = (long)cb->done;
- cb->done = fib6_dump_done;
-
- /*
- * 2. allocate and initialize walker.
+ * 1. allocate and initialize walker.
*/
w = kzalloc(sizeof(*w), GFP_ATOMIC);
if (!w)
return -ENOMEM;
w->func = fib6_dump_node;
cb->args[2] = (long)w;
+
+ /* 2. hook callback destructor.
+ */
+ cb->args[3] = (long)cb->done;
+ cb->done = fib6_dump_done;
+
}
arg.skb = skb;
struct nl_info *info, struct netlink_ext_ack *extack)
{
struct fib6_table *table = rt->fib6_table;
- struct fib6_node *fn, *pn = NULL;
+ struct fib6_node *fn;
+#ifdef CONFIG_IPV6_SUBTREES
+ struct fib6_node *pn = NULL;
+#endif
int err = -ENOMEM;
int allow_create = 1;
int replace_required = 0;
goto out;
}
+#ifdef CONFIG_IPV6_SUBTREES
pn = fn;
-#ifdef CONFIG_IPV6_SUBTREES
if (rt->fib6_src.plen) {
struct fib6_node *sn;
struct ip6_tnl *tunnel;
u8 ver;
+ if (unlikely(!pskb_may_pull(skb, sizeof(*ershdr))))
+ return PACKET_REJECT;
+
ipv6h = ipv6_hdr(skb);
ershdr = (struct erspan_base_hdr *)skb->data;
ver = ershdr->ver;
void *loc_cpu_entry;
struct ip6t_entry *iter;
+ if (len < sizeof(tmp))
+ return -EINVAL;
if (copy_from_sockptr(&tmp, arg, sizeof(tmp)) != 0)
return -EFAULT;
return -ENOMEM;
if (tmp.num_counters == 0)
return -EINVAL;
+ if ((u64)len < (u64)tmp.size + sizeof(tmp))
+ return -EINVAL;
tmp.name[sizeof(tmp.name)-1] = 0;
void *loc_cpu_entry;
struct ip6t_entry *iter;
+ if (len < sizeof(tmp))
+ return -EINVAL;
if (copy_from_sockptr(&tmp, arg, sizeof(tmp)) != 0)
return -EFAULT;
return -ENOMEM;
if (tmp.num_counters == 0)
return -EINVAL;
+ if ((u64)len < (u64)tmp.size + sizeof(tmp))
+ return -EINVAL;
tmp.name[sizeof(tmp.name)-1] = 0;
}
skb_dst_drop(skb);
+ skb_orphan(skb);
return -EINPROGRESS;
insert_error:
hdr = ipv6_hdr(skb);
fhdr = (struct frag_hdr *)skb_transport_header(skb);
- skb_orphan(skb);
fq = fq_find(net, fhdr->identification, user, hdr,
skb->dev ? skb->dev->ifindex : 0);
if (fq == NULL) {
goto try_again;
}
-DEFINE_STATIC_KEY_FALSE(udpv6_encap_needed_key);
+DECLARE_STATIC_KEY_FALSE(udpv6_encap_needed_key);
void udpv6_encap_enable(void)
{
static_branch_inc(&udpv6_encap_needed_key);
ipc6.opt = opt;
err = udp_cmsg_send(sk, msg, &ipc6.gso_size);
- if (err > 0)
+ if (err > 0) {
err = ip6_datagram_send_ctl(sock_net(sk), sk, msg, fl6,
&ipc6);
+ connected = false;
+ }
if (err < 0) {
fl6_sock_release(flowlabel);
return err;
}
if (!(opt->opt_nflen|opt->opt_flen))
opt = NULL;
- connected = false;
}
if (!opt) {
opt = txopt_get(np);
skb_shinfo(skb)->gso_type |= (SKB_GSO_FRAGLIST|SKB_GSO_UDP_L4);
skb_shinfo(skb)->gso_segs = NAPI_GRO_CB(skb)->count;
- if (skb->ip_summed == CHECKSUM_UNNECESSARY) {
- if (skb->csum_level < SKB_MAX_CSUM_LEVEL)
- skb->csum_level++;
- } else {
- skb->ip_summed = CHECKSUM_UNNECESSARY;
- skb->csum_level = 0;
- }
+ __skb_incr_checksum_unnecessary(skb);
return 0;
}
}
if (sta->sdata->vif.type == NL80211_IFTYPE_AP_VLAN &&
- sta->sdata->u.vlan.sta) {
- ieee80211_clear_fast_rx(sta);
+ sta->sdata->u.vlan.sta)
RCU_INIT_POINTER(sta->sdata->u.vlan.sta, NULL);
- }
if (test_sta_flag(sta, WLAN_STA_AUTHORIZED))
ieee80211_vif_dec_num_mcast(sta->sdata);
sta->sdata = vlansdata;
+ ieee80211_check_fast_rx(sta);
ieee80211_check_fast_xmit(sta);
if (test_sta_flag(sta, WLAN_STA_AUTHORIZED)) {
struct ieee80211_local *local = sdata->local;
struct ieee80211_chanctx_conf *conf;
struct ieee80211_chanctx *curr_ctx = NULL;
+ bool new_idle;
int ret = 0;
if (WARN_ON(sdata->vif.type == NL80211_IFTYPE_NAN))
out:
rcu_assign_pointer(link->conf->chanctx_conf, conf);
- sdata->vif.cfg.idle = !conf;
-
if (curr_ctx && ieee80211_chanctx_num_assigned(local, curr_ctx) > 0) {
ieee80211_recalc_chanctx_chantype(local, curr_ctx);
ieee80211_recalc_smps_chanctx(local, curr_ctx);
ieee80211_recalc_chanctx_min_def(local, new_ctx, NULL);
}
- if (sdata->vif.type != NL80211_IFTYPE_P2P_DEVICE &&
- sdata->vif.type != NL80211_IFTYPE_MONITOR)
- ieee80211_vif_cfg_change_notify(sdata, BSS_CHANGED_IDLE);
+ if (conf) {
+ new_idle = false;
+ } else {
+ struct ieee80211_link_data *tmp;
+
+ new_idle = true;
+ for_each_sdata_link(local, tmp) {
+ if (rcu_access_pointer(tmp->conf->chanctx_conf)) {
+ new_idle = false;
+ break;
+ }
+ }
+ }
+
+ if (new_idle != sdata->vif.cfg.idle) {
+ sdata->vif.cfg.idle = new_idle;
+
+ if (sdata->vif.type != NL80211_IFTYPE_P2P_DEVICE &&
+ sdata->vif.type != NL80211_IFTYPE_MONITOR)
+ ieee80211_vif_cfg_change_notify(sdata, BSS_CHANGED_IDLE);
+ }
ieee80211_check_fast_xmit_iface(sdata);
_sdata_dbg(print, sdata, "[link %d] " fmt, \
link_id, ##__VA_ARGS__); \
else \
- _sdata_dbg(1, sdata, fmt, ##__VA_ARGS__); \
+ _sdata_dbg(print, sdata, fmt, ##__VA_ARGS__); \
} while (0)
#define link_dbg(link, fmt, ...) \
_link_id_dbg(1, (link)->sdata, (link)->link_id, \
};
/**
- * enum ieee80211_corrupt_data_flags - BSS data corruption flags
+ * enum ieee80211_bss_corrupt_data_flags - BSS data corruption flags
* @IEEE80211_BSS_CORRUPT_BEACON: last beacon frame received was corrupted
* @IEEE80211_BSS_CORRUPT_PROBE_RESP: last probe response received was corrupted
*
};
/**
- * enum ieee80211_valid_data_flags - BSS valid data flags
+ * enum ieee80211_bss_valid_data_flags - BSS valid data flags
* @IEEE80211_BSS_VALID_WMM: WMM/UAPSD data was gathered from non-corrupt IE
* @IEEE80211_BSS_VALID_RATES: Supported rates were gathered from non-corrupt IE
* @IEEE80211_BSS_VALID_ERP: ERP flag was gathered from non-corrupt IE
struct sk_buff *skb, u32 ctrl_flags)
{
struct ieee80211_if_mesh *ifmsh = &sdata->u.mesh;
+ struct ieee80211_mesh_fast_tx_key key = {
+ .type = MESH_FAST_TX_TYPE_LOCAL
+ };
struct ieee80211_mesh_fast_tx *entry;
struct ieee80211s_hdr *meshhdr;
u8 sa[ETH_ALEN] __aligned(2);
return false;
}
- entry = mesh_fast_tx_get(sdata, skb->data);
+ ether_addr_copy(key.addr, skb->data);
+ if (!ether_addr_equal(skb->data + ETH_ALEN, sdata->vif.addr))
+ key.type = MESH_FAST_TX_TYPE_PROXIED;
+ entry = mesh_fast_tx_get(sdata, &key);
if (!entry)
return false;
#define MESH_FAST_TX_CACHE_THRESHOLD_SIZE 384
#define MESH_FAST_TX_CACHE_TIMEOUT 8000 /* msecs */
+/**
+ * enum ieee80211_mesh_fast_tx_type - cached mesh fast tx entry type
+ *
+ * @MESH_FAST_TX_TYPE_LOCAL: tx from the local vif address as SA
+ * @MESH_FAST_TX_TYPE_PROXIED: local tx with a different SA (e.g. bridged)
+ * @MESH_FAST_TX_TYPE_FORWARDED: forwarded from a different mesh point
+ * @NUM_MESH_FAST_TX_TYPE: number of entry types
+ */
+enum ieee80211_mesh_fast_tx_type {
+ MESH_FAST_TX_TYPE_LOCAL,
+ MESH_FAST_TX_TYPE_PROXIED,
+ MESH_FAST_TX_TYPE_FORWARDED,
+
+ /* must be last */
+ NUM_MESH_FAST_TX_TYPE
+};
+
+
+/**
+ * struct ieee80211_mesh_fast_tx_key - cached mesh fast tx entry key
+ *
+ * @addr: The Ethernet DA for this entry
+ * @type: cache entry type
+ */
+struct ieee80211_mesh_fast_tx_key {
+ u8 addr[ETH_ALEN] __aligned(2);
+ u16 type;
+};
+
/**
* struct ieee80211_mesh_fast_tx - cached mesh fast tx entry
* @rhash: rhashtable pointer
- * @addr_key: The Ethernet DA which is the key for this entry
+ * @key: the lookup key for this cache entry
* @fast_tx: base fast_tx data
* @hdr: cached mesh and rfc1042 headers
* @hdrlen: length of mesh + rfc1042
*/
struct ieee80211_mesh_fast_tx {
struct rhash_head rhash;
- u8 addr_key[ETH_ALEN] __aligned(2);
+ struct ieee80211_mesh_fast_tx_key key;
struct ieee80211_fast_tx fast_tx;
u8 hdr[sizeof(struct ieee80211s_hdr) + sizeof(rfc1042_header)];
bool mesh_action_is_path_sel(struct ieee80211_mgmt *mgmt);
struct ieee80211_mesh_fast_tx *
-mesh_fast_tx_get(struct ieee80211_sub_if_data *sdata, const u8 *addr);
+mesh_fast_tx_get(struct ieee80211_sub_if_data *sdata,
+ struct ieee80211_mesh_fast_tx_key *key);
bool ieee80211_mesh_xmit_fast(struct ieee80211_sub_if_data *sdata,
struct sk_buff *skb, u32 ctrl_flags);
void mesh_fast_tx_cache(struct ieee80211_sub_if_data *sdata,
static const struct rhashtable_params fast_tx_rht_params = {
.nelem_hint = 10,
.automatic_shrinking = true,
- .key_len = ETH_ALEN,
- .key_offset = offsetof(struct ieee80211_mesh_fast_tx, addr_key),
+ .key_len = sizeof_field(struct ieee80211_mesh_fast_tx, key),
+ .key_offset = offsetof(struct ieee80211_mesh_fast_tx, key),
.head_offset = offsetof(struct ieee80211_mesh_fast_tx, rhash),
.hashfn = mesh_table_hash,
};
}
struct ieee80211_mesh_fast_tx *
-mesh_fast_tx_get(struct ieee80211_sub_if_data *sdata, const u8 *addr)
+mesh_fast_tx_get(struct ieee80211_sub_if_data *sdata,
+ struct ieee80211_mesh_fast_tx_key *key)
{
struct ieee80211_mesh_fast_tx *entry;
struct mesh_tx_cache *cache;
cache = &sdata->u.mesh.tx_cache;
- entry = rhashtable_lookup(&cache->rht, addr, fast_tx_rht_params);
+ entry = rhashtable_lookup(&cache->rht, key, fast_tx_rht_params);
if (!entry)
return NULL;
if (!(entry->mpath->flags & MESH_PATH_ACTIVE) ||
mpath_expired(entry->mpath)) {
spin_lock_bh(&cache->walk_lock);
- entry = rhashtable_lookup(&cache->rht, addr, fast_tx_rht_params);
+ entry = rhashtable_lookup(&cache->rht, key, fast_tx_rht_params);
if (entry)
mesh_fast_tx_entry_free(cache, entry);
spin_unlock_bh(&cache->walk_lock);
if (!sta)
return;
+ build.key.type = MESH_FAST_TX_TYPE_LOCAL;
if ((meshhdr->flags & MESH_FLAGS_AE) == MESH_FLAGS_AE_A5_A6) {
/* This is required to keep the mppath alive */
mppath = mpp_path_lookup(sdata, meshhdr->eaddr1);
if (!mppath)
return;
build.mppath = mppath;
+ if (!ether_addr_equal(meshhdr->eaddr2, sdata->vif.addr))
+ build.key.type = MESH_FAST_TX_TYPE_PROXIED;
} else if (ieee80211_has_a4(hdr->frame_control)) {
mppath = mpath;
} else {
return;
}
+ if (!ether_addr_equal(hdr->addr4, sdata->vif.addr))
+ build.key.type = MESH_FAST_TX_TYPE_FORWARDED;
+
/* rate limit, in case fast xmit can't be enabled */
if (mppath->fast_tx_check == jiffies)
return;
}
}
- memcpy(build.addr_key, mppath->dst, ETH_ALEN);
+ memcpy(build.key.addr, mppath->dst, ETH_ALEN);
build.timestamp = jiffies;
build.fast_tx.band = info->band;
build.fast_tx.da_offs = offsetof(struct ieee80211_hdr, addr3);
const u8 *addr)
{
struct mesh_tx_cache *cache = &sdata->u.mesh.tx_cache;
+ struct ieee80211_mesh_fast_tx_key key = {};
struct ieee80211_mesh_fast_tx *entry;
+ int i;
+ ether_addr_copy(key.addr, addr);
spin_lock_bh(&cache->walk_lock);
- entry = rhashtable_lookup_fast(&cache->rht, addr, fast_tx_rht_params);
- if (entry)
- mesh_fast_tx_entry_free(cache, entry);
+ for (i = 0; i < NUM_MESH_FAST_TX_TYPE; i++) {
+ key.type = i;
+ entry = rhashtable_lookup_fast(&cache->rht, &key, fast_tx_rht_params);
+ if (entry)
+ mesh_fast_tx_entry_free(cache, entry);
+ }
spin_unlock_bh(&cache->walk_lock);
}
.from_ap = true,
.start = ies->data,
.len = ies->len,
- .mode = conn->mode,
};
struct ieee802_11_elems *elems;
struct ieee80211_supported_band *sband;
int ret;
again:
+ parse_params.mode = conn->mode;
elems = ieee802_11_parse_elems_full(&parse_params);
if (!elems)
return ERR_PTR(-ENOMEM);
ap_mode = ieee80211_determine_ap_chan(sdata, channel, bss->vht_cap_info,
elems, false, conn, &ap_chandef);
- mlme_link_id_dbg(sdata, link_id, "determined AP %pM to be %s\n",
- cbss->bssid, ieee80211_conn_mode_str(ap_mode));
-
/* this should be impossible since parsing depends on our mode */
if (WARN_ON(ap_mode > conn->mode)) {
ret = -EINVAL;
goto free;
}
+ if (conn->mode != ap_mode) {
+ conn->mode = ap_mode;
+ kfree(elems);
+ goto again;
+ }
+
+ mlme_link_id_dbg(sdata, link_id, "determined AP %pM to be %s\n",
+ cbss->bssid, ieee80211_conn_mode_str(ap_mode));
+
sband = sdata->local->hw.wiphy->bands[channel->band];
switch (channel->band) {
break;
}
- conn->mode = ap_mode;
chanreq->oper = ap_chandef;
/* wider-bandwidth OFDMA is only done in EHT */
}
/* the mode can only decrease, so this must terminate */
- if (ap_mode != conn->mode)
+ if (ap_mode != conn->mode) {
+ kfree(elems);
goto again;
+ }
mlme_link_id_dbg(sdata, link_id,
"connecting with %s mode, max bandwidth %d MHz\n",
*/
if (control &
IEEE80211_MLE_STA_RECONF_CONTROL_AP_REM_TIMER_PRESENT)
- link_removal_timeout[link_id] = le16_to_cpu(*(__le16 *)pos);
+ link_removal_timeout[link_id] = get_unaligned_le16(pos);
}
removed_links &= sdata->vif.valid_links;
continue;
}
- link_delay = link_conf->beacon_int *
- link_removal_timeout[link_id];
+ if (link_removal_timeout[link_id] < 1)
+ link_delay = 0;
+ else
+ link_delay = link_conf->beacon_int *
+ (link_removal_timeout[link_id] - 1);
if (!delay)
delay = link_delay;
}
if (sdata->vif.active_links != active_links) {
+ /* usable links are affected when active_links are changed,
+ * so notify the driver about the status change
+ */
+ changed |= BSS_CHANGED_MLD_VALID_LINKS;
+ active_links &= sdata->vif.active_links;
+ if (!active_links)
+ active_links =
+ BIT(__ffs(sdata->vif.valid_links &
+ ~dormant_links));
ret = ieee80211_set_active_links(&sdata->vif, active_links);
if (ret) {
sdata_info(sdata, "Failed to set TTLM active links\n");
goto out;
}
- changed |= BSS_CHANGED_MLD_VALID_LINKS;
sdata->vif.suspended_links = suspended_links;
if (sdata->vif.suspended_links)
changed |= BSS_CHANGED_MLD_TTLM;
link->u.mgd.dtim_period = elems->dtim_period;
link->u.mgd.have_beacon = true;
ifmgd->assoc_data->need_beacon = false;
- if (ieee80211_hw_check(&local->hw, TIMING_BEACON_ONLY)) {
+ if (ieee80211_hw_check(&local->hw, TIMING_BEACON_ONLY) &&
+ !ieee80211_is_s1g_beacon(hdr->frame_control)) {
link->conf->sync_tsf =
le64_to_cpu(mgmt->u.beacon.timestamp);
link->conf->sync_device_ts =
sdata_info(sdata,
"failed to insert STA entry for the AP (error %d)\n",
err);
- goto out_err;
+ goto out_release_chan;
}
} else
WARN_ON_ONCE(!ether_addr_equal(link->u.mgd.bssid, cbss->bssid));
return 0;
+out_release_chan:
+ ieee80211_link_release_channel(link);
out_err:
- ieee80211_link_release_channel(&sdata->deflink);
ieee80211_vif_set_links(sdata, 0, 0);
return err;
}
struct ieee80211_sub_if_data *sdata;
struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb);
struct ieee80211_supported_band *sband;
+ u32 mask = ~0;
rate_control_fill_sta_table(sta, info, dest, max_rates);
if (ieee80211_is_tx_data(skb))
rate_control_apply_mask(sdata, sta, sband, dest, max_rates);
+ if (!(info->control.flags & IEEE80211_TX_CTRL_SCAN_TX))
+ mask = sdata->rc_rateidx_mask[info->band];
+
if (dest[0].idx < 0)
__rate_control_send_low(&sdata->local->hw, sband, sta, info,
- sdata->rc_rateidx_mask[info->band]);
+ mask);
if (sta)
rate_fixup_ratelist(vif, sband, info, dest, max_rates);
struct sk_buff *skb, int hdrlen)
{
struct ieee80211_if_mesh *ifmsh = &sdata->u.mesh;
- struct ieee80211_mesh_fast_tx *entry = NULL;
+ struct ieee80211_mesh_fast_tx_key key = {
+ .type = MESH_FAST_TX_TYPE_FORWARDED
+ };
+ struct ieee80211_mesh_fast_tx *entry;
struct ieee80211s_hdr *mesh_hdr;
struct tid_ampdu_tx *tid_tx;
struct sta_info *sta;
mesh_hdr = (struct ieee80211s_hdr *)(skb->data + sizeof(eth));
if ((mesh_hdr->flags & MESH_FLAGS_AE) == MESH_FLAGS_AE_A5_A6)
- entry = mesh_fast_tx_get(sdata, mesh_hdr->eaddr1);
+ ether_addr_copy(key.addr, mesh_hdr->eaddr1);
else if (!(mesh_hdr->flags & MESH_FLAGS_AE))
- entry = mesh_fast_tx_get(sdata, skb->data);
+ ether_addr_copy(key.addr, skb->data);
+ else
+ return false;
+
+ entry = mesh_fast_tx_get(sdata, &key);
if (!entry)
return false;
}
break;
case WLAN_CATEGORY_PROTECTED_EHT:
+ if (len < offsetofend(typeof(*mgmt),
+ u.action.u.ttlm_req.action_code))
+ break;
+
switch (mgmt->u.action.u.ttlm_req.action_code) {
case WLAN_PROTECTED_EHT_ACTION_TTLM_REQ:
if (sdata->vif.type != NL80211_IFTYPE_STATION)
cpu_to_le16(IEEE80211_SN_TO_SEQ(sn));
}
IEEE80211_SKB_CB(skb)->flags |= tx_flags;
+ IEEE80211_SKB_CB(skb)->control.flags |= IEEE80211_TX_CTRL_SCAN_TX;
ieee80211_tx_skb_tid_band(sdata, skb, 7, channel->band);
}
}
txrc.bss_conf = &tx->sdata->vif.bss_conf;
txrc.skb = tx->skb;
txrc.reported_rate.idx = -1;
- txrc.rate_idx_mask = tx->sdata->rc_rateidx_mask[info->band];
- if (tx->sdata->rc_has_mcs_mask[info->band])
- txrc.rate_idx_mcs_mask =
- tx->sdata->rc_rateidx_mcs_mask[info->band];
+ if (unlikely(info->control.flags & IEEE80211_TX_CTRL_SCAN_TX)) {
+ txrc.rate_idx_mask = ~0;
+ } else {
+ txrc.rate_idx_mask = tx->sdata->rc_rateidx_mask[info->band];
+
+ if (tx->sdata->rc_has_mcs_mask[info->band])
+ txrc.rate_idx_mcs_mask =
+ tx->sdata->rc_rateidx_mcs_mask[info->band];
+ }
txrc.bss = (tx->sdata->vif.type == NL80211_IFTYPE_AP ||
tx->sdata->vif.type == NL80211_IFTYPE_MESH_POINT ||
mptcp_set_state(newsk, TCP_CLOSE);
}
} else {
- MPTCP_INC_STATS(sock_net(ssk),
- MPTCP_MIB_MPCAPABLEPASSIVEFALLBACK);
tcpfallback:
newsk->sk_kern_sock = kern;
lock_sock(newsk);
struct mptcp_subflow_context *subflow;
int space, cap;
+ /* bpf can land here with a wrong sk type */
+ if (sk->sk_protocol == IPPROTO_TCP)
+ return -EINVAL;
+
if (sk->sk_userlocks & SOCK_RCVBUF_LOCK)
cap = sk->sk_rcvbuf >> 1;
else
return child;
fallback:
+ if (fallback)
+ SUBFLOW_REQ_INC_STATS(req, MPTCP_MIB_MPCAPABLEPASSIVEFALLBACK);
mptcp_subflow_drop_ctx(child);
return child;
}
if (sctph->source != cp->vport || payload_csum ||
skb->ip_summed == CHECKSUM_PARTIAL) {
sctph->source = cp->vport;
- sctp_nat_csum(skb, sctph, sctphoff);
+ if (!skb_is_gso(skb) || !skb_is_gso_sctp(skb))
+ sctp_nat_csum(skb, sctph, sctphoff);
} else {
skb->ip_summed = CHECKSUM_UNNECESSARY;
}
(skb->ip_summed == CHECKSUM_PARTIAL &&
!(skb_dst(skb)->dev->features & NETIF_F_SCTP_CRC))) {
sctph->dest = cp->dport;
- sctp_nat_csum(skb, sctph, sctphoff);
+ if (!skb_is_gso(skb) || !skb_is_gso_sctp(skb))
+ sctp_nat_csum(skb, sctph, sctphoff);
} else if (skb->ip_summed != CHECKSUM_PARTIAL) {
skb->ip_summed = CHECKSUM_UNNECESSARY;
}
proto = veth->h_vlan_encapsulated_proto;
break;
case htons(ETH_P_PPP_SES):
- proto = nf_flow_pppoe_proto(skb);
+ if (!nf_flow_pppoe_proto(skb, &proto))
+ return NF_ACCEPT;
break;
default:
proto = skb->protocol;
tuple->encap[i].proto = skb->protocol;
break;
case htons(ETH_P_PPP_SES):
- phdr = (struct pppoe_hdr *)skb_mac_header(skb);
+ phdr = (struct pppoe_hdr *)skb_network_header(skb);
tuple->encap[i].id = ntohs(phdr->sid);
tuple->encap[i].proto = skb->protocol;
break;
return NF_STOLEN;
}
-static bool nf_flow_skb_encap_protocol(const struct sk_buff *skb, __be16 proto,
+static bool nf_flow_skb_encap_protocol(struct sk_buff *skb, __be16 proto,
u32 *offset)
{
struct vlan_ethhdr *veth;
+ __be16 inner_proto;
switch (skb->protocol) {
case htons(ETH_P_8021Q):
}
break;
case htons(ETH_P_PPP_SES):
- if (nf_flow_pppoe_proto(skb) == proto) {
+ if (nf_flow_pppoe_proto(skb, &inner_proto) &&
+ inner_proto == proto) {
*offset += PPPOE_SES_HLEN;
return true;
}
skb_reset_network_header(skb);
break;
case htons(ETH_P_PPP_SES):
- skb->protocol = nf_flow_pppoe_proto(skb);
+ skb->protocol = __nf_flow_pppoe_proto(skb);
skb_pull(skb, PPPOE_SES_HLEN);
skb_reset_network_header(skb);
break;
const struct nft_set_iter *iter,
struct nft_elem_priv *elem_priv)
{
+ struct nft_set_ext *ext = nft_set_elem_ext(set, elem_priv);
+
+ if (!nft_set_elem_active(ext, iter->genmask))
+ return 0;
+
+ nft_set_elem_change_active(ctx->net, set, ext);
nft_setelem_data_deactivate(ctx->net, set, elem_priv);
return 0;
if (!nft_set_elem_active(ext, genmask))
continue;
+ nft_set_elem_change_active(ctx->net, set, ext);
nft_setelem_data_deactivate(ctx->net, set, catchall->elem);
break;
}
{
struct nft_set_iter iter = {
.genmask = nft_genmask_next(ctx->net),
+ .type = NFT_ITER_UPDATE,
.fn = nft_mapelem_deactivate,
};
__NFT_TABLE_F_WAS_AWAKEN | \
__NFT_TABLE_F_WAS_ORPHAN)
+static bool nft_table_pending_update(const struct nft_ctx *ctx)
+{
+ struct nftables_pernet *nft_net = nft_pernet(ctx->net);
+ struct nft_trans *trans;
+
+ if (ctx->table->flags & __NFT_TABLE_F_UPDATE)
+ return true;
+
+ list_for_each_entry(trans, &nft_net->commit_list, list) {
+ if (trans->ctx.table == ctx->table &&
+ ((trans->msg_type == NFT_MSG_NEWCHAIN &&
+ nft_trans_chain_update(trans)) ||
+ (trans->msg_type == NFT_MSG_DELCHAIN &&
+ nft_is_base_chain(trans->ctx.chain))))
+ return true;
+ }
+
+ return false;
+}
+
static int nf_tables_updtable(struct nft_ctx *ctx)
{
struct nft_trans *trans;
return -EOPNOTSUPP;
/* No dormant off/on/off/on games in single transaction */
- if (ctx->table->flags & __NFT_TABLE_F_UPDATE)
+ if (nft_table_pending_update(ctx))
return -EINVAL;
trans = nft_trans_alloc(ctx, NFT_MSG_NEWTABLE,
struct nft_stats __percpu *stats = NULL;
struct nft_chain_hook hook = {};
+ if (table->flags & __NFT_TABLE_F_UPDATE)
+ return -EINVAL;
+
if (flags & NFT_CHAIN_BINDING)
return -EOPNOTSUPP;
}
}
+ if (table->flags & __NFT_TABLE_F_UPDATE &&
+ !list_empty(&hook.list)) {
+ NL_SET_BAD_ATTR(extack, attr);
+ err = -EOPNOTSUPP;
+ goto err_hooks;
+ }
+
if (!(table->flags & NFT_TABLE_F_DORMANT) &&
nft_is_base_chain(chain) &&
!list_empty(&hook.list)) {
struct nft_trans *trans;
int err;
+ if (ctx->table->flags & __NFT_TABLE_F_UPDATE)
+ return -EOPNOTSUPP;
+
err = nft_chain_parse_hook(ctx->net, basechain, nla, &chain_hook,
ctx->family, chain->flags, extack);
if (err < 0)
nft_ctx_init(&ctx, net, skb, info->nlh, family, table, chain, nla);
if (nla[NFTA_CHAIN_HOOK]) {
- if (chain->flags & NFT_CHAIN_HW_OFFLOAD)
+ if (NFNL_MSG_TYPE(info->nlh->nlmsg_type) == NFT_MSG_DESTROYCHAIN ||
+ chain->flags & NFT_CHAIN_HW_OFFLOAD)
return -EOPNOTSUPP;
if (nft_is_base_chain(chain)) {
{
const struct nft_expr_type *type, *candidate = NULL;
- list_for_each_entry(type, &nf_tables_expressions, list) {
+ list_for_each_entry_rcu(type, &nf_tables_expressions, list) {
if (!nla_strcmp(nla, type->name)) {
if (!type->family && !candidate)
candidate = type;
if (nla == NULL)
return ERR_PTR(-EINVAL);
+ rcu_read_lock();
type = __nft_expr_type_get(family, nla);
- if (type != NULL && try_module_get(type->owner))
+ if (type != NULL && try_module_get(type->owner)) {
+ rcu_read_unlock();
return type;
+ }
+ rcu_read_unlock();
lockdep_nfnl_nft_mutex_not_held();
#ifdef CONFIG_MODULES
const struct nft_data *data;
int err;
+ if (!nft_set_elem_active(ext, iter->genmask))
+ return 0;
+
if (nft_set_ext_exists(ext, NFT_SET_EXT_FLAGS) &&
*nft_set_ext_flags(ext) & NFT_SET_ELEM_INTERVAL_END)
return 0;
int nft_set_catchall_validate(const struct nft_ctx *ctx, struct nft_set *set)
{
- u8 genmask = nft_genmask_next(ctx->net);
+ struct nft_set_iter dummy_iter = {
+ .genmask = nft_genmask_next(ctx->net),
+ };
struct nft_set_elem_catchall *catchall;
+
struct nft_set_ext *ext;
int ret = 0;
list_for_each_entry_rcu(catchall, &set->catchall_list, list) {
ext = nft_set_elem_ext(set, catchall->elem);
- if (!nft_set_elem_active(ext, genmask))
+ if (!nft_set_elem_active(ext, dummy_iter.genmask))
continue;
- ret = nft_setelem_validate(ctx, set, NULL, catchall->elem);
+ ret = nft_setelem_validate(ctx, set, &dummy_iter, catchall->elem);
if (ret < 0)
return ret;
}
const struct nft_set_iter *iter,
struct nft_elem_priv *elem_priv)
{
+ const struct nft_set_ext *ext = nft_set_elem_ext(set, elem_priv);
+
+ if (!nft_set_elem_active(ext, iter->genmask))
+ return 0;
+
return nft_setelem_data_validate(ctx, set, elem_priv);
}
}
iter.genmask = nft_genmask_next(ctx->net);
+ iter.type = NFT_ITER_UPDATE;
iter.skip = 0;
iter.count = 0;
iter.err = 0;
const struct nft_set_iter *iter,
struct nft_elem_priv *elem_priv)
{
+ struct nft_set_ext *ext = nft_set_elem_ext(set, elem_priv);
+
+ /* called from abort path, reverse check to undo changes. */
+ if (nft_set_elem_active(ext, iter->genmask))
+ return 0;
+
+ nft_clear(ctx->net, ext);
nft_setelem_data_activate(ctx->net, set, elem_priv);
return 0;
if (!nft_set_elem_active(ext, genmask))
continue;
+ nft_clear(ctx->net, ext);
nft_setelem_data_activate(ctx->net, set, catchall->elem);
break;
}
{
struct nft_set_iter iter = {
.genmask = nft_genmask_next(ctx->net),
+ .type = NFT_ITER_UPDATE,
.fn = nft_mapelem_activate,
};
const struct nft_set_ext *ext = nft_set_elem_ext(set, elem_priv);
struct nft_set_dump_args *args;
+ if (!nft_set_elem_active(ext, iter->genmask))
+ return 0;
+
if (nft_set_elem_expired(ext) || nft_set_elem_is_dead(ext))
return 0;
args.skb = skb;
args.reset = dump_ctx->reset;
args.iter.genmask = nft_genmask_cur(net);
+ args.iter.type = NFT_ITER_READ;
args.iter.skip = cb->args[0];
args.iter.count = 0;
args.iter.err = 0;
struct nft_set_ext *ext = nft_set_elem_ext(set, elem_priv);
if (nft_setelem_is_catchall(set, elem_priv)) {
- nft_set_elem_change_active(net, set, ext);
+ nft_clear(net, ext);
} else {
set->ops->activate(net, set, elem_priv);
}
}
}
+static int nft_setelem_active_next(const struct net *net,
+ const struct nft_set *set,
+ struct nft_elem_priv *elem_priv)
+{
+ const struct nft_set_ext *ext = nft_set_elem_ext(set, elem_priv);
+ u8 genmask = nft_genmask_next(net);
+
+ return nft_set_elem_active(ext, genmask);
+}
+
static void nft_setelem_data_activate(const struct net *net,
const struct nft_set *set,
struct nft_elem_priv *elem_priv)
const struct nft_set_iter *iter,
struct nft_elem_priv *elem_priv)
{
+ const struct nft_set_ext *ext = nft_set_elem_ext(set, elem_priv);
struct nft_trans *trans;
+ if (!nft_set_elem_active(ext, iter->genmask))
+ return 0;
+
trans = nft_trans_alloc_gfp(ctx, NFT_MSG_DELSETELEM,
sizeof(struct nft_trans_elem), GFP_ATOMIC);
if (!trans)
{
struct nft_set_iter iter = {
.genmask = genmask,
+ .type = NFT_ITER_UPDATE,
.fn = nft_setelem_flush,
};
{
const struct nft_object_type *type;
- list_for_each_entry(type, &nf_tables_objects, list) {
+ list_for_each_entry_rcu(type, &nf_tables_objects, list) {
if (type->family != NFPROTO_UNSPEC &&
type->family != family)
continue;
{
const struct nft_object_type *type;
+ rcu_read_lock();
type = __nft_obj_type_get(objtype, family);
- if (type != NULL && try_module_get(type->owner))
+ if (type != NULL && try_module_get(type->owner)) {
+ rcu_read_unlock();
return type;
+ }
+ rcu_read_unlock();
lockdep_nfnl_nft_mutex_not_held();
#ifdef CONFIG_MODULES
return err;
}
+/* call under rcu_read_lock */
static const struct nf_flowtable_type *__nft_flowtable_type_get(u8 family)
{
const struct nf_flowtable_type *type;
- list_for_each_entry(type, &nf_tables_flowtables, list) {
+ list_for_each_entry_rcu(type, &nf_tables_flowtables, list) {
if (family == type->family)
return type;
}
{
const struct nf_flowtable_type *type;
+ rcu_read_lock();
type = __nft_flowtable_type_get(family);
- if (type != NULL && try_module_get(type->owner))
+ if (type != NULL && try_module_get(type->owner)) {
+ rcu_read_unlock();
return type;
+ }
+ rcu_read_unlock();
lockdep_nfnl_nft_mutex_not_held();
#ifdef CONFIG_MODULES
if (nft_trans_chain_update(trans)) {
nf_tables_chain_notify(&trans->ctx, NFT_MSG_DELCHAIN,
&nft_trans_chain_hooks(trans));
- nft_netdev_unregister_hooks(net,
- &nft_trans_chain_hooks(trans),
- true);
+ if (!(trans->ctx.table->flags & NFT_TABLE_F_DORMANT)) {
+ nft_netdev_unregister_hooks(net,
+ &nft_trans_chain_hooks(trans),
+ true);
+ }
} else {
nft_chain_del(trans->ctx.chain);
nf_tables_chain_notify(&trans->ctx, NFT_MSG_DELCHAIN,
struct nft_trans *trans, *next;
LIST_HEAD(set_update_list);
struct nft_trans_elem *te;
+ int err = 0;
if (action == NFNL_ABORT_VALIDATE &&
nf_tables_validate(net) < 0)
- return -EAGAIN;
+ err = -EAGAIN;
list_for_each_entry_safe_reverse(trans, next, &nft_net->commit_list,
list) {
break;
case NFT_MSG_NEWCHAIN:
if (nft_trans_chain_update(trans)) {
- nft_netdev_unregister_hooks(net,
- &nft_trans_chain_hooks(trans),
- true);
+ if (!(trans->ctx.table->flags & NFT_TABLE_F_DORMANT)) {
+ nft_netdev_unregister_hooks(net,
+ &nft_trans_chain_hooks(trans),
+ true);
+ }
free_percpu(nft_trans_chain_stats(trans));
kfree(nft_trans_chain_name(trans));
nft_trans_destroy(trans);
case NFT_MSG_DESTROYSETELEM:
te = (struct nft_trans_elem *)trans->data;
- nft_setelem_data_activate(net, te->set, te->elem_priv);
- nft_setelem_activate(net, te->set, te->elem_priv);
+ if (!nft_setelem_active_next(net, te->set, te->elem_priv)) {
+ nft_setelem_data_activate(net, te->set, te->elem_priv);
+ nft_setelem_activate(net, te->set, te->elem_priv);
+ }
if (!nft_setelem_is_catchall(te->set, te->elem_priv))
te->set->ndeact--;
nf_tables_abort_release(trans);
}
- if (action == NFNL_ABORT_AUTOLOAD)
- nf_tables_module_autoload(net);
- else
- nf_tables_module_autoload_cleanup(net);
-
- return 0;
+ return err;
}
static int nf_tables_abort(struct net *net, struct sk_buff *skb,
gc_seq = nft_gc_seq_begin(nft_net);
ret = __nf_tables_abort(net, action);
nft_gc_seq_end(nft_net, gc_seq);
+
+ WARN_ON_ONCE(!list_empty(&nft_net->commit_list));
+
+ /* module autoload needs to happen after GC sequence update because it
+ * temporarily releases and grabs mutex again.
+ */
+ if (action == NFNL_ABORT_AUTOLOAD)
+ nf_tables_module_autoload(net);
+ else
+ nf_tables_module_autoload_cleanup(net);
+
mutex_unlock(&nft_net->commit_mutex);
return ret;
{
const struct nft_set_ext *ext = nft_set_elem_ext(set, elem_priv);
+ if (!nft_set_elem_active(ext, iter->genmask))
+ return 0;
+
if (nft_set_ext_exists(ext, NFT_SET_EXT_FLAGS) &&
*nft_set_ext_flags(ext) & NFT_SET_ELEM_INTERVAL_END)
return 0;
continue;
iter.genmask = nft_genmask_next(ctx->net);
+ iter.type = NFT_ITER_UPDATE;
iter.skip = 0;
iter.count = 0;
iter.err = 0;
gc_seq = nft_gc_seq_begin(nft_net);
- if (!list_empty(&nft_net->commit_list) ||
- !list_empty(&nft_net->module_list))
- __nf_tables_abort(net, NFNL_ABORT_NONE);
+ WARN_ON_ONCE(!list_empty(&nft_net->commit_list));
+
+ if (!list_empty(&nft_net->module_list))
+ nf_tables_module_autoload_cleanup(net);
__nft_release_tables(net);
unregister_netdevice_notifier(&nf_tables_flowtable_notifier);
nft_chain_filter_fini();
nft_chain_route_fini();
+ nf_tables_trans_destroy_flush_work();
unregister_pernet_subsys(&nf_tables_net_ops);
cancel_work_sync(&trans_gc_work);
cancel_work_sync(&trans_destroy_work);
return;
if (n > 1) {
- nf_unregister_net_hook(ctx->net, &found->ops);
+ if (!(ctx->chain->table->flags & NFT_TABLE_F_DORMANT))
+ nf_unregister_net_hook(ctx->net, &found->ops);
+
list_del_rcu(&found->list);
kfree_rcu(found, rcu);
return;
return 0;
iter.genmask = nft_genmask_next(ctx->net);
+ iter.type = NFT_ITER_UPDATE;
iter.skip = 0;
iter.count = 0;
iter.err = 0;
nft_bitmap_location(set, nft_set_ext_key(&be->ext), &idx, &off);
/* Enter 11 state. */
priv->bitmap[idx] |= (genmask << off);
- nft_set_elem_change_active(net, set, &be->ext);
+ nft_clear(net, &be->ext);
}
static void nft_bitmap_flush(const struct net *net,
list_for_each_entry_rcu(be, &priv->list, head) {
if (iter->count < iter->skip)
goto cont;
- if (!nft_set_elem_active(&be->ext, iter->genmask))
- goto cont;
iter->err = iter->fn(ctx, set, iter, &be->priv);
{
struct nft_rhash_elem *he = nft_elem_priv_cast(elem_priv);
- nft_set_elem_change_active(net, set, &he->ext);
+ nft_clear(net, &he->ext);
}
static void nft_rhash_flush(const struct net *net,
if (iter->count < iter->skip)
goto cont;
- if (!nft_set_elem_active(&he->ext, iter->genmask))
- goto cont;
iter->err = iter->fn(ctx, set, iter, &he->priv);
if (iter->err < 0)
{
struct nft_hash_elem *he = nft_elem_priv_cast(elem_priv);
- nft_set_elem_change_active(net, set, &he->ext);
+ nft_clear(net, &he->ext);
}
static void nft_hash_flush(const struct net *net,
hlist_for_each_entry_rcu(he, &priv->table[i], node) {
if (iter->count < iter->skip)
goto cont;
- if (!nft_set_elem_active(&he->ext, iter->genmask))
- goto cont;
iter->err = iter->fn(ctx, set, iter, &he->priv);
if (iter->err < 0)
{
struct nft_pipapo_elem *e = nft_elem_priv_cast(elem_priv);
- nft_set_elem_change_active(net, set, &e->ext);
+ nft_clear(net, &e->ext);
}
/**
rules_fx = rules_f0;
nft_pipapo_for_each_field(f, i, m) {
+ bool last = i == m->field_count - 1;
+
if (!pipapo_match_field(f, start, rules_fx,
match_start, match_end))
break;
match_start += NFT_PIPAPO_GROUPS_PADDED_SIZE(f);
match_end += NFT_PIPAPO_GROUPS_PADDED_SIZE(f);
- }
- if (i == m->field_count) {
- priv->dirty = true;
- pipapo_drop(m, rulemap);
- return;
+ if (last && f->mt[rulemap[i].to].e == e) {
+ priv->dirty = true;
+ pipapo_drop(m, rulemap);
+ return;
+ }
}
first_rule += rules_f0;
}
+
+ WARN_ON_ONCE(1); /* elem_priv not found */
}
/**
struct nft_set_iter *iter)
{
struct nft_pipapo *priv = nft_set_priv(set);
- struct net *net = read_pnet(&set->net);
const struct nft_pipapo_match *m;
const struct nft_pipapo_field *f;
unsigned int i, r;
+ WARN_ON_ONCE(iter->type != NFT_ITER_READ &&
+ iter->type != NFT_ITER_UPDATE);
+
rcu_read_lock();
- if (iter->genmask == nft_genmask_cur(net))
+ if (iter->type == NFT_ITER_READ)
m = rcu_dereference(priv->match);
else
m = priv->clone;
e = f->mt[r].e;
- if (!nft_set_elem_active(&e->ext, iter->genmask))
- goto cont;
-
iter->err = iter->fn(ctx, set, iter, &e->priv);
if (iter->err < 0)
goto out;
{
struct nft_rbtree_elem *rbe = nft_elem_priv_cast(elem_priv);
- nft_set_elem_change_active(net, set, &rbe->ext);
+ nft_clear(net, &rbe->ext);
}
static void nft_rbtree_flush(const struct net *net,
if (iter->count < iter->skip)
goto cont;
- if (!nft_set_elem_active(&rbe->ext, iter->genmask))
- goto cont;
iter->err = iter->fn(ctx, set, iter, &rbe->priv);
if (iter->err < 0) {
break;
}
- if (copy_from_sockptr(&opt, optval, sizeof(u32))) {
- err = -EFAULT;
+ err = copy_safe_from_sockptr(&opt, sizeof(opt),
+ optval, optlen);
+ if (err)
break;
- }
if (opt > LLCP_MAX_RW) {
err = -EINVAL;
break;
}
- if (copy_from_sockptr(&opt, optval, sizeof(u32))) {
- err = -EFAULT;
+ err = copy_safe_from_sockptr(&opt, sizeof(opt),
+ optval, optlen);
+ if (err)
break;
- }
if (opt > LLCP_MAX_MIUX) {
err = -EINVAL;
nfc_send_to_raw_sock(ndev->nfc_dev, skb,
RAW_PAYLOAD_NCI, NFC_DIRECTION_RX);
+ if (!nci_plen(skb->data)) {
+ kfree_skb(skb);
+ break;
+ }
+
/* Process frame */
switch (nci_mt(skb->data)) {
case NCI_MT_RSP_PKT:
if (ct_info.timeout[0]) {
if (nf_ct_set_timeout(net, ct_info.ct, family, key->ip.proto,
ct_info.timeout))
- pr_info_ratelimited("Failed to associated timeout "
- "policy `%s'\n", ct_info.timeout);
+ OVS_NLERR(log,
+ "Failed to associated timeout policy '%s'",
+ ct_info.timeout);
else
ct_info.nf_ct_timeout = rcu_dereference(
nf_ct_timeout_find(ct_info.ct)->timeout);
for (i = 0; i < CT_LIMIT_HASH_BUCKETS; ++i) {
struct hlist_head *head = &info->limits[i];
struct ovs_ct_limit *ct_limit;
+ struct hlist_node *next;
- hlist_for_each_entry_rcu(ct_limit, head, hlist_node,
- lockdep_ovsl_is_held())
+ hlist_for_each_entry_safe(ct_limit, next, head, hlist_node)
kfree_rcu(ct_limit, rcu);
}
kfree(info->limits);
}
ret = PTR_ERR(trans_private);
/* Trigger connection so that its ready for the next retry */
- if (ret == -ENODEV)
+ if (ret == -ENODEV && cp)
rds_conn_connect_if_down(cp->cp_conn);
goto out;
}
struct tcf_skbmod *d = to_skbmod(a);
unsigned char *b = skb_tail_pointer(skb);
struct tcf_skbmod_params *p;
- struct tc_skbmod opt = {
- .index = d->tcf_index,
- .refcnt = refcount_read(&d->tcf_refcnt) - ref,
- .bindcnt = atomic_read(&d->tcf_bindcnt) - bind,
- };
+ struct tc_skbmod opt;
struct tcf_t t;
+ memset(&opt, 0, sizeof(opt));
+ opt.index = d->tcf_index;
+ opt.refcnt = refcount_read(&d->tcf_refcnt) - ref,
+ opt.bindcnt = atomic_read(&d->tcf_bindcnt) - bind;
spin_lock_bh(&d->tcf_lock);
opt.action = d->tcf_action;
p = rcu_dereference_protected(d->skbmod_p,
notify = !sch->q.qlen && !WARN_ON_ONCE(!n &&
!qdisc_is_offloaded);
/* TODO: perform the search on a per txq basis */
- sch = qdisc_lookup(qdisc_dev(sch), TC_H_MAJ(parentid));
+ sch = qdisc_lookup_rcu(qdisc_dev(sch), TC_H_MAJ(parentid));
if (sch == NULL) {
WARN_ON_ONCE(parentid != TC_H_ROOT);
break;
sch->enqueue = ops->enqueue;
sch->dequeue = ops->dequeue;
sch->dev_queue = dev_queue;
+ sch->owner = -1;
netdev_hold(dev, &sch->dev_tracker, GFP_KERNEL);
refcount_set(&sch->refcnt, 1);
* Caller provides the truncation length of the output token (h) in
* cksumout.len.
*
- * Note that for RPCSEC, the "initial cipher state" is always all zeroes.
- *
* Return values:
* %GSS_S_COMPLETE: Digest computed, @cksumout filled in
* %GSS_S_FAILURE: Call failed
int body_offset, struct xdr_netobj *cksumout)
{
unsigned int ivsize = crypto_sync_skcipher_ivsize(cipher);
- static const u8 iv[GSS_KRB5_MAX_BLOCKSIZE];
struct ahash_request *req;
struct scatterlist sg[1];
+ u8 *iv, *checksumdata;
int err = -ENOMEM;
- u8 *checksumdata;
checksumdata = kmalloc(crypto_ahash_digestsize(tfm), GFP_KERNEL);
if (!checksumdata)
return GSS_S_FAILURE;
+ /* For RPCSEC, the "initial cipher state" is always all zeroes. */
+ iv = kzalloc(ivsize, GFP_KERNEL);
+ if (!iv)
+ goto out_free_mem;
req = ahash_request_alloc(tfm, GFP_KERNEL);
if (!req)
- goto out_free_cksumdata;
+ goto out_free_mem;
ahash_request_set_callback(req, CRYPTO_TFM_REQ_MAY_SLEEP, NULL, NULL);
err = crypto_ahash_init(req);
if (err)
out_free_ahash:
ahash_request_free(req);
-out_free_cksumdata:
+out_free_mem:
+ kfree(iv);
kfree_sensitive(checksumdata);
return err ? GSS_S_FAILURE : GSS_S_COMPLETE;
}
* MSG_SPLICE_PAGES is used exclusively to reduce the number of
* copy operations in this path. Therefore the caller must ensure
* that the pages backing @xdr are unchanging.
- *
- * Note that the send is non-blocking. The caller has incremented
- * the reference count on each page backing the RPC message, and
- * the network layer will "put" these pages when transmission is
- * complete.
- *
- * This is safe for our RPC services because the memory backing
- * the head and tail components is never kmalloc'd. These always
- * come from pages in the svc_rqst::rq_pages array.
*/
static int svc_tcp_sendmsg(struct svc_sock *svsk, struct svc_rqst *rqstp,
rpc_fraghdr marker, unsigned int *sentp)
iov_iter_bvec(&msg.msg_iter, ITER_SOURCE, rqstp->rq_bvec,
1 + count, sizeof(marker) + rqstp->rq_res.len);
ret = sock_sendmsg(svsk->sk_sock, &msg);
+ page_frag_free(buf);
if (ret < 0)
return ret;
*sentp += ret;
queue_work(svcrdma_wq, &info->wi_work);
}
-/**
- * svc_rdma_write_chunk_release - Release Write chunk I/O resources
- * @rdma: controlling transport
- * @ctxt: Send context that is being released
- */
-void svc_rdma_write_chunk_release(struct svcxprt_rdma *rdma,
- struct svc_rdma_send_ctxt *ctxt)
-{
- struct svc_rdma_write_info *info;
- struct svc_rdma_chunk_ctxt *cc;
-
- while (!list_empty(&ctxt->sc_write_info_list)) {
- info = list_first_entry(&ctxt->sc_write_info_list,
- struct svc_rdma_write_info, wi_list);
- list_del(&info->wi_list);
-
- cc = &info->wi_cc;
- svc_rdma_wake_send_waiters(rdma, cc->cc_sqecount);
- svc_rdma_write_info_free(info);
- }
-}
-
/**
* svc_rdma_reply_chunk_release - Release Reply chunk I/O resources
* @rdma: controlling transport
struct ib_cqe *cqe = wc->wr_cqe;
struct svc_rdma_chunk_ctxt *cc =
container_of(cqe, struct svc_rdma_chunk_ctxt, cc_cqe);
+ struct svc_rdma_write_info *info =
+ container_of(cc, struct svc_rdma_write_info, wi_cc);
switch (wc->status) {
case IB_WC_SUCCESS:
trace_svcrdma_wc_write(&cc->cc_cid);
- return;
+ break;
case IB_WC_WR_FLUSH_ERR:
trace_svcrdma_wc_write_flush(wc, &cc->cc_cid);
break;
trace_svcrdma_wc_write_err(wc, &cc->cc_cid);
}
- /* The RDMA Write has flushed, so the client won't get
- * some of the outgoing RPC message. Signal the loss
- * to the client by closing the connection.
- */
- svc_xprt_deferred_close(&rdma->sc_xprt);
+ svc_rdma_wake_send_waiters(rdma, cc->cc_sqecount);
+
+ if (unlikely(wc->status != IB_WC_SUCCESS))
+ svc_xprt_deferred_close(&rdma->sc_xprt);
+
+ svc_rdma_write_info_free(info);
}
/**
return xdr->len;
}
-/* Link Write WRs for @chunk onto @sctxt's WR chain.
- */
-static int svc_rdma_prepare_write_chunk(struct svcxprt_rdma *rdma,
- struct svc_rdma_send_ctxt *sctxt,
- const struct svc_rdma_chunk *chunk,
- const struct xdr_buf *xdr)
+static int svc_rdma_send_write_chunk(struct svcxprt_rdma *rdma,
+ const struct svc_rdma_chunk *chunk,
+ const struct xdr_buf *xdr)
{
struct svc_rdma_write_info *info;
struct svc_rdma_chunk_ctxt *cc;
- struct ib_send_wr *first_wr;
struct xdr_buf payload;
- struct list_head *pos;
- struct ib_cqe *cqe;
int ret;
if (xdr_buf_subsegment(xdr, &payload, chunk->ch_position,
if (ret != payload.len)
goto out_err;
- ret = -EINVAL;
- if (unlikely(cc->cc_sqecount > rdma->sc_sq_depth))
- goto out_err;
-
- first_wr = sctxt->sc_wr_chain;
- cqe = &cc->cc_cqe;
- list_for_each(pos, &cc->cc_rwctxts) {
- struct svc_rdma_rw_ctxt *rwc;
-
- rwc = list_entry(pos, struct svc_rdma_rw_ctxt, rw_list);
- first_wr = rdma_rw_ctx_wrs(&rwc->rw_ctx, rdma->sc_qp,
- rdma->sc_port_num, cqe, first_wr);
- cqe = NULL;
- }
- sctxt->sc_wr_chain = first_wr;
- sctxt->sc_sqecount += cc->cc_sqecount;
- list_add(&info->wi_list, &sctxt->sc_write_info_list);
-
trace_svcrdma_post_write_chunk(&cc->cc_cid, cc->cc_sqecount);
+ ret = svc_rdma_post_chunk_ctxt(rdma, cc);
+ if (ret < 0)
+ goto out_err;
return 0;
out_err:
}
/**
- * svc_rdma_prepare_write_list - Construct WR chain for sending Write list
+ * svc_rdma_send_write_list - Send all chunks on the Write list
* @rdma: controlling RDMA transport
- * @write_pcl: Write list provisioned by the client
- * @sctxt: Send WR resources
+ * @rctxt: Write list provisioned by the client
* @xdr: xdr_buf containing an RPC Reply message
*
* Returns zero on success, or a negative errno if one or more
* Write chunks could not be sent.
*/
-int svc_rdma_prepare_write_list(struct svcxprt_rdma *rdma,
- const struct svc_rdma_pcl *write_pcl,
- struct svc_rdma_send_ctxt *sctxt,
- const struct xdr_buf *xdr)
+int svc_rdma_send_write_list(struct svcxprt_rdma *rdma,
+ const struct svc_rdma_recv_ctxt *rctxt,
+ const struct xdr_buf *xdr)
{
struct svc_rdma_chunk *chunk;
int ret;
- pcl_for_each_chunk(chunk, write_pcl) {
+ pcl_for_each_chunk(chunk, &rctxt->rc_write_pcl) {
if (!chunk->ch_payload_length)
break;
- ret = svc_rdma_prepare_write_chunk(rdma, sctxt, chunk, xdr);
+ ret = svc_rdma_send_write_chunk(rdma, chunk, xdr);
if (ret < 0)
return ret;
}
ctxt->sc_send_wr.sg_list = ctxt->sc_sges;
ctxt->sc_send_wr.send_flags = IB_SEND_SIGNALED;
ctxt->sc_cqe.done = svc_rdma_wc_send;
- INIT_LIST_HEAD(&ctxt->sc_write_info_list);
ctxt->sc_xprt_buf = buffer;
xdr_buf_init(&ctxt->sc_hdrbuf, ctxt->sc_xprt_buf,
rdma->sc_max_req_size);
struct ib_device *device = rdma->sc_cm_id->device;
unsigned int i;
- svc_rdma_write_chunk_release(rdma, ctxt);
svc_rdma_reply_chunk_release(rdma, ctxt);
if (ctxt->sc_page_count)
if (!p)
goto put_ctxt;
- ret = svc_rdma_prepare_write_list(rdma, &rctxt->rc_write_pcl, sctxt,
- &rqstp->rq_res);
+ ret = svc_rdma_send_write_list(rdma, rctxt, &rqstp->rq_res);
if (ret < 0)
goto put_ctxt;
static inline bool tls_strp_msg_ready(struct tls_sw_context_rx *ctx)
{
- return ctx->strp.msg_ready;
+ return READ_ONCE(ctx->strp.msg_ready);
}
static inline bool tls_strp_msg_mixed_decrypted(struct tls_sw_context_rx *ctx)
if (strp->stm.full_len && strp->stm.full_len == skb->len) {
desc->count = 0;
- strp->msg_ready = 1;
+ WRITE_ONCE(strp->msg_ready, 1);
tls_rx_msg_ready(strp);
}
if (!tls_strp_check_queue_ok(strp))
return tls_strp_read_copy(strp, false);
- strp->msg_ready = 1;
+ WRITE_ONCE(strp->msg_ready, 1);
tls_rx_msg_ready(strp);
return 0;
else
tls_strp_flush_anchor_copy(strp);
- strp->msg_ready = 0;
+ WRITE_ONCE(strp->msg_ready, 0);
memset(&strp->stm, 0, sizeof(strp->stm));
tls_strp_check_rcv(strp);
if (unlikely(flags & MSG_ERRQUEUE))
return sock_recv_errqueue(sk, msg, len, SOL_IP, IP_RECVERR);
- psock = sk_psock_get(sk);
err = tls_rx_reader_lock(sk, ctx, flags & MSG_DONTWAIT);
if (err < 0)
return err;
+ psock = sk_psock_get(sk);
bpf_strp_enabled = sk_psock_strp_enabled(psock);
/* If crypto failed the connection is broken */
}
/* Drain records from the rx_list & copy if required */
- if (is_peek || is_kvec)
+ if (is_peek)
err = process_rx_list(ctx, msg, &control, copied + peeked,
decrypted - peeked, is_peek, NULL);
else
err = process_rx_list(ctx, msg, &control, 0,
async_copy_bytes, is_peek, NULL);
+
+ /* we could have copied less than we wanted, and possibly nothing */
+ decrypted += max(err, 0) - async_copy_bytes;
}
copied += decrypted;
WRITE_ONCE(u->oob_skb, NULL);
consume_skb(skb);
}
- } else if (!(flags & MSG_PEEK)) {
+ } else if (flags & MSG_PEEK) {
+ skb = NULL;
+ } else {
skb_unlink(skb, &sk->sk_receive_queue);
- consume_skb(skb);
+ WRITE_ONCE(u->oob_skb, NULL);
+ if (!WARN_ON_ONCE(skb_unref(skb)))
+ kfree_skb(skb);
skb = skb_peek(&sk->sk_receive_queue);
}
}
last = skb = skb_peek(&sk->sk_receive_queue);
last_len = last ? last->len : 0;
+again:
#if IS_ENABLED(CONFIG_AF_UNIX_OOB)
if (skb) {
skb = manage_oob(skb, sk, flags, copied);
- if (!skb) {
+ if (!skb && copied) {
unix_state_unlock(sk);
- if (copied)
- break;
- goto redo;
+ break;
}
}
#endif
-again:
if (skb == NULL) {
if (copied >= target)
goto unlock;
* receive queues. Other, non candidate sockets _can_ be
* added to queue, so we must make sure only to touch
* candidates.
+ *
+ * Embryos, though never candidates themselves, affect which
+ * candidates are reachable by the garbage collector. Before
+ * being added to a listener's queue, an embryo may already
+ * receive data carrying SCM_RIGHTS, potentially making the
+ * passed socket a candidate that is not yet reachable by the
+ * collector. It becomes reachable once the embryo is
+ * enqueued. Therefore, we must ensure that no SCM-laden
+ * embryo appears in a (candidate) listener's queue between
+ * consecutive scan_children() calls.
*/
list_for_each_entry_safe(u, next, &gc_inflight_list, link) {
+ struct sock *sk = &u->sk;
long total_refs;
- total_refs = file_count(u->sk.sk_socket->file);
+ total_refs = file_count(sk->sk_socket->file);
WARN_ON_ONCE(!u->inflight);
WARN_ON_ONCE(total_refs < u->inflight);
list_move_tail(&u->link, &gc_candidates);
__set_bit(UNIX_GC_CANDIDATE, &u->gc_flags);
__set_bit(UNIX_GC_MAYBE_CYCLE, &u->gc_flags);
+
+ if (sk->sk_state == TCP_LISTEN) {
+ unix_state_lock_nested(sk, U_LOCK_GC_LISTENER);
+ unix_state_unlock(sk);
+ }
}
}
if (!skb)
break;
- virtio_transport_deliver_tap_pkt(skb);
reply = virtio_vsock_skb_reply(skb);
sgs = vsock->out_sgs;
sg_init_one(sgs[out_sg], virtio_vsock_hdr(skb),
break;
}
+ virtio_transport_deliver_tap_pkt(skb);
+
if (reply) {
struct virtqueue *rx_vq = vsock->vqs[VSOCK_VQ_RX];
int val;
error:
for (i = 0; i < new_coalesce.n_rules; i++) {
tmp_rule = &new_coalesce.rules[i];
+ if (!tmp_rule)
+ continue;
for (j = 0; j < tmp_rule->n_patterns; j++)
kfree(tmp_rule->patterns[j].mask);
kfree(tmp_rule->patterns);
TRACE_EVENT(rdev_dump_mpp,
TP_PROTO(struct wiphy *wiphy, struct net_device *netdev, int _idx,
u8 *dst, u8 *mpp),
- TP_ARGS(wiphy, netdev, _idx, mpp, dst),
+ TP_ARGS(wiphy, netdev, _idx, dst, mpp),
TP_STRUCT__entry(
WIPHY_ENTRY
NETDEV_ENTRY
DECLARE_EVENT_CLASS(tx_rx_evt,
TP_PROTO(struct wiphy *wiphy, u32 tx, u32 rx),
- TP_ARGS(wiphy, rx, tx),
+ TP_ARGS(wiphy, tx, rx),
TP_STRUCT__entry(
WIPHY_ENTRY
__field(u32, tx)
DEFINE_EVENT(tx_rx_evt, rdev_set_antenna,
TP_PROTO(struct wiphy *wiphy, u32 tx, u32 rx),
- TP_ARGS(wiphy, rx, tx)
+ TP_ARGS(wiphy, tx, rx)
);
DECLARE_EVENT_CLASS(wiphy_netdev_id_evt,
* Authors : Jean Tourrilhes - HPL - <jt@hpl.hp.com>
* Copyright (c) 1997-2007 Jean Tourrilhes, All Rights Reserved.
* Copyright 2009 Johannes Berg <johannes@sipsolutions.net>
+ * Copyright (C) 2024 Intel Corporation
*
* (As all part of the Linux kernel, this file is GPL)
*/
dev->ieee80211_ptr->wiphy->wext &&
dev->ieee80211_ptr->wiphy->wext->get_wireless_stats) {
wireless_warn_cfg80211_wext();
- if (dev->ieee80211_ptr->wiphy->flags & WIPHY_FLAG_SUPPORTS_MLO)
+ if (dev->ieee80211_ptr->wiphy->flags & (WIPHY_FLAG_SUPPORTS_MLO |
+ WIPHY_FLAG_DISABLE_WEXT))
return NULL;
return dev->ieee80211_ptr->wiphy->wext->get_wireless_stats(dev);
}
#ifdef CONFIG_CFG80211_WEXT
if (dev->ieee80211_ptr && dev->ieee80211_ptr->wiphy) {
wireless_warn_cfg80211_wext();
- if (dev->ieee80211_ptr->wiphy->flags & WIPHY_FLAG_SUPPORTS_MLO)
+ if (dev->ieee80211_ptr->wiphy->flags & (WIPHY_FLAG_SUPPORTS_MLO |
+ WIPHY_FLAG_DISABLE_WEXT))
return NULL;
handlers = dev->ieee80211_ptr->wiphy->wext;
}
struct xsk_queue **q;
int entries;
+ if (optlen < sizeof(entries))
+ return -EINVAL;
if (copy_from_sockptr(&entries, optval, sizeof(entries)))
return -EFAULT;
KBUILD_CFLAGS += $(call cc-disable-warning, format-truncation)
KBUILD_CFLAGS += $(call cc-disable-warning, stringop-truncation)
+KBUILD_CFLAGS += -Wno-override-init # alias for -Wno-initializer-overrides in clang
+
ifdef CONFIG_CC_IS_CLANG
# Clang before clang-16 would warn on default argument promotions.
ifneq ($(call clang-min-version, 160000),y)
KBUILD_CFLAGS += $(call cc-option, -Wmaybe-uninitialized)
KBUILD_CFLAGS += $(call cc-option, -Wunused-macros)
-ifdef CONFIG_CC_IS_CLANG
-KBUILD_CFLAGS += -Winitializer-overrides
-endif
-
KBUILD_CPPFLAGS += -DKBUILD_EXTRA_WARN2
else
KBUILD_CFLAGS += -Wno-type-limits
KBUILD_CFLAGS += -Wno-shift-negative-value
-ifdef CONFIG_CC_IS_CLANG
-KBUILD_CFLAGS += -Wno-initializer-overrides
-else
+ifdef CONFIG_CC_IS_GCC
KBUILD_CFLAGS += -Wno-maybe-uninitialized
endif
part-of-module = y
quiet_cmd_cc_o_c = CC [M] $@
- cmd_cc_o_c = $(CC) $(filter-out $(CC_FLAGS_CFI) $(CFLAGS_GCOV), $(c_flags)) -c -o $@ $<
+ cmd_cc_o_c = $(CC) $(filter-out $(CC_FLAGS_CFI) $(CFLAGS_GCOV) $(CFLAGS_KCSAN), $(c_flags)) -c -o $@ $<
%.mod.o: %.mod.c FORCE
$(call if_changed_dep,cc_o_c)
version = version.stdout.decode().rstrip()
except:
try:
- version = subprocess.run(['make', 'kernelversion'], cwd=linuxRoot,
- capture_output=True, check=True)
+ version = subprocess.run(['make', '-s', '--no-print-directory', 'kernelversion'],
+ cwd=linuxRoot, capture_output=True, check=True)
version = version.stdout.decode().rstrip()
except:
return 'Linux'
return false;
if (STRING_EQUAL(section, ".entry.text"))
return false;
+ if (STRING_EQUAL(section, ".head.text"))
+ return false;
}
return track_frame_size >= 0;
continue;
}
sym_set_tristate_value(child->sym, yes);
- for (child = child->list; child; child = child->next) {
- indent += 2;
- conf(child);
- indent -= 2;
- }
return 1;
}
}
struct property *menu_add_prompt(enum prop_type type, char *prompt, struct expr *dep);
void menu_add_expr(enum prop_type type, struct expr *expr, struct expr *dep);
void menu_add_symbol(enum prop_type type, struct symbol *sym, struct expr *dep);
-void menu_finalize(struct menu *parent);
+void menu_finalize(void);
void menu_set_type(int type);
extern struct menu rootmenu;
}
do_resize:
- if (getmaxy(stdscr) < (height + CHECKLIST_HEIGTH_MIN))
+ if (getmaxy(stdscr) < (height + CHECKLIST_HEIGHT_MIN))
return -ERRDISPLAYTOOSMALL;
if (getmaxx(stdscr) < (width + CHECKLIST_WIDTH_MIN))
return -ERRDISPLAYTOOSMALL;
int on_key_resize(void);
/* minimum (re)size values */
-#define CHECKLIST_HEIGTH_MIN 6 /* For dialog_checklist() */
+#define CHECKLIST_HEIGHT_MIN 6 /* For dialog_checklist() */
#define CHECKLIST_WIDTH_MIN 6
-#define INPUTBOX_HEIGTH_MIN 2 /* For dialog_inputbox() */
+#define INPUTBOX_HEIGHT_MIN 2 /* For dialog_inputbox() */
#define INPUTBOX_WIDTH_MIN 2
-#define MENUBOX_HEIGTH_MIN 15 /* For dialog_menu() */
+#define MENUBOX_HEIGHT_MIN 15 /* For dialog_menu() */
#define MENUBOX_WIDTH_MIN 65
-#define TEXTBOX_HEIGTH_MIN 8 /* For dialog_textbox() */
+#define TEXTBOX_HEIGHT_MIN 8 /* For dialog_textbox() */
#define TEXTBOX_WIDTH_MIN 8
-#define YESNO_HEIGTH_MIN 4 /* For dialog_yesno() */
+#define YESNO_HEIGHT_MIN 4 /* For dialog_yesno() */
#define YESNO_WIDTH_MIN 4
-#define WINDOW_HEIGTH_MIN 19 /* For init_dialog() */
+#define WINDOW_HEIGHT_MIN 19 /* For init_dialog() */
#define WINDOW_WIDTH_MIN 80
int init_dialog(const char *backtitle);
strcpy(instr, init);
do_resize:
- if (getmaxy(stdscr) <= (height - INPUTBOX_HEIGTH_MIN))
+ if (getmaxy(stdscr) <= (height - INPUTBOX_HEIGHT_MIN))
return -ERRDISPLAYTOOSMALL;
if (getmaxx(stdscr) <= (width - INPUTBOX_WIDTH_MIN))
return -ERRDISPLAYTOOSMALL;
do_resize:
height = getmaxy(stdscr);
width = getmaxx(stdscr);
- if (height < MENUBOX_HEIGTH_MIN || width < MENUBOX_WIDTH_MIN)
+ if (height < MENUBOX_HEIGHT_MIN || width < MENUBOX_WIDTH_MIN)
return -ERRDISPLAYTOOSMALL;
height -= 4;
do_resize:
getmaxyx(stdscr, height, width);
- if (height < TEXTBOX_HEIGTH_MIN || width < TEXTBOX_WIDTH_MIN)
+ if (height < TEXTBOX_HEIGHT_MIN || width < TEXTBOX_WIDTH_MIN)
return -ERRDISPLAYTOOSMALL;
if (initial_height != 0)
height = initial_height;
getyx(stdscr, saved_y, saved_x);
getmaxyx(stdscr, height, width);
- if (height < WINDOW_HEIGTH_MIN || width < WINDOW_WIDTH_MIN) {
+ if (height < WINDOW_HEIGHT_MIN || width < WINDOW_WIDTH_MIN) {
endwin();
return -ERRDISPLAYTOOSMALL;
}
WINDOW *dialog;
do_resize:
- if (getmaxy(stdscr) < (height + YESNO_HEIGTH_MIN))
+ if (getmaxy(stdscr) < (height + YESNO_HEIGHT_MIN))
return -ERRDISPLAYTOOSMALL;
if (getmaxx(stdscr) < (width + YESNO_WIDTH_MIN))
return -ERRDISPLAYTOOSMALL;
dialog_clear();
res = dialog_checklist(prompt ? prompt : "Main Menu",
radiolist_instructions,
- MENUBOX_HEIGTH_MIN,
+ MENUBOX_HEIGHT_MIN,
MENUBOX_WIDTH_MIN,
- CHECKLIST_HEIGTH_MIN);
+ CHECKLIST_HEIGHT_MIN);
selected = item_activate_selected();
switch (res) {
case 0:
}
}
-void menu_finalize(struct menu *parent)
+static void _menu_finalize(struct menu *parent, bool inside_choice)
{
struct menu *menu, *last_menu;
struct symbol *sym;
* and propagate parent dependencies before moving on.
*/
- if (sym && sym_is_choice(sym)) {
+ bool is_choice = false;
+
+ if (sym && sym_is_choice(sym))
+ is_choice = true;
+
+ if (is_choice) {
if (sym->type == S_UNKNOWN) {
/* find the first choice value to find out choice type */
current_entry = parent;
}
}
- if (sym && sym_is_choice(sym))
+ if (is_choice)
expr_free(parentdep);
/*
* moving on
*/
for (menu = parent->list; menu; menu = menu->next)
- menu_finalize(menu);
- } else if (sym) {
+ _menu_finalize(menu, is_choice);
+ } else if (!inside_choice && sym) {
/*
* Automatic submenu creation. If sym is a symbol and A, B, C,
* ... are consecutive items (symbols, menus, ifs, etc.) that
/* Superset, put in submenu */
expr_free(dep2);
next:
- menu_finalize(menu);
+ _menu_finalize(menu, false);
menu->parent = parent;
last_menu = menu;
}
}
}
+void menu_finalize(void)
+{
+ _menu_finalize(&rootmenu, false);
+}
+
bool menu_has_prompt(struct menu *menu)
{
if (!menu->prompt)
menu_add_prompt(P_MENU, "Main menu", NULL);
}
- menu_finalize(&rootmenu);
+ menu_finalize();
menu = &rootmenu;
while (menu) {
save_struct_actual($2);
push_parameter($2, "$type $1", $arg, $file, $declaration_name);
- } elsif ($param =~ m/(.*?):(\d+)/) {
+ } elsif ($param =~ m/(.*?):(\w+)/) {
if ($type ne "") { # skip unnamed bit-fields
save_struct_actual($1);
push_parameter($1, "$type:$2", $arg, $file, $declaration_name)
static Elf_Sym *find_tosym(struct elf_info *elf, Elf_Addr addr, Elf_Sym *sym)
{
+ Elf_Sym *new_sym;
+
/* If the supplied symbol has a valid name, return it */
if (is_valid_name(elf, sym))
return sym;
* Strive to find a better symbol name, but the resulting name may not
* match the symbol referenced in the original code.
*/
- return symsearch_find_nearest(elf, addr, get_secindex(elf, sym),
- true, 20);
+ new_sym = symsearch_find_nearest(elf, addr, get_secindex(elf, sym),
+ true, 20);
+ return new_sym ? new_sym : sym;
}
static bool is_executable_section(struct elf_info *elf, unsigned int secndx)
EXPORT_SYMBOL(security_path_mknod);
/**
- * security_path_post_mknod() - Update inode security field after file creation
+ * security_path_post_mknod() - Update inode security after reg file creation
* @idmap: idmap of the mount
* @dentry: new file
*
- * Update inode security field after a file has been created.
+ * Update inode security field after a regular file has been created.
*/
void security_path_post_mknod(struct mnt_idmap *idmap, struct dentry *dentry)
{
.kill_sb = sel_kill_sb,
};
-static struct vfsmount *selinuxfs_mount __ro_after_init;
struct path selinux_null __ro_after_init;
static int __init init_sel_fs(void)
return err;
}
- selinux_null.mnt = selinuxfs_mount = kern_mount(&sel_fs_type);
- if (IS_ERR(selinuxfs_mount)) {
+ selinux_null.mnt = kern_mount(&sel_fs_type);
+ if (IS_ERR(selinux_null.mnt)) {
pr_err("selinuxfs: could not mount!\n");
- err = PTR_ERR(selinuxfs_mount);
- selinuxfs_mount = NULL;
+ err = PTR_ERR(selinux_null.mnt);
+ selinux_null.mnt = NULL;
+ return err;
}
+
selinux_null.dentry = d_hash_and_lookup(selinux_null.mnt->mnt_root,
&null_name);
if (IS_ERR(selinux_null.dentry)) {
pr_err("selinuxfs: could not lookup null!\n");
err = PTR_ERR(selinux_null.dentry);
selinux_null.dentry = NULL;
+ return err;
}
return err;
struct device_node *child, *sound = NULL;
struct resource *r;
int i, layout = 0, rlen, ok = force;
- char node_name[6];
+ char node_name[8];
static const char *rnames[] = { "i2sbus: %pOFn (control)",
"i2sbus: %pOFn (tx)",
"i2sbus: %pOFn (rx)" };
midi1->note.group = midi2->note.group;
midi1->note.status = midi2->note.status;
midi1->note.channel = midi2->note.channel;
- switch (midi2->note.status << 4) {
+ switch (midi2->note.status) {
case UMP_MSG_STATUS_NOTE_ON:
case UMP_MSG_STATUS_NOTE_OFF:
midi1->note.note = midi2->note.note;
return NULL;
}
EXPORT_SYMBOL(intel_nhlt_get_endpoint_blob);
+
+int intel_nhlt_ssp_device_type(struct device *dev, struct nhlt_acpi_table *nhlt,
+ u8 virtual_bus_id)
+{
+ struct nhlt_endpoint *epnt;
+ int i;
+
+ if (!nhlt)
+ return -EINVAL;
+
+ epnt = (struct nhlt_endpoint *)nhlt->desc;
+ for (i = 0; i < nhlt->endpoint_count; i++) {
+ /* for SSP link the virtual bus id is the SSP port number */
+ if (epnt->linktype == NHLT_LINK_SSP &&
+ epnt->virtual_bus_id == virtual_bus_id) {
+ dev_dbg(dev, "SSP%d: dev_type=%d\n", virtual_bus_id,
+ epnt->device_type);
+ return epnt->device_type;
+ }
+
+ epnt = (struct nhlt_endpoint *)((u8 *)epnt + epnt->length);
+ }
+
+ return -EINVAL;
+}
+EXPORT_SYMBOL(intel_nhlt_ssp_device_type);
dmasound_deinit();
}
-static struct platform_driver amiga_audio_driver = {
+/*
+ * amiga_audio_remove() lives in .exit.text. For drivers registered via
+ * module_platform_driver_probe() this is ok because they cannot get unbound at
+ * runtime. So mark the driver struct with __refdata to prevent modpost
+ * triggering a section mismatch warning.
+ */
+static struct platform_driver amiga_audio_driver __refdata = {
.remove_new = __exit_p(amiga_audio_remove),
.driver = {
.name = "amiga-audio",
/* check if sample is finished playing (non-looping only) */
if (bp != best + V_OFF && bp != best + V_FREE &&
(vp->reg.sample_mode & SNDRV_SFNT_SAMPLE_SINGLESHOT)) {
- val = snd_emu10k1_ptr_read(hw, CCCA_CURRADDR, vp->ch) - 64;
+ val = snd_emu10k1_ptr_read(hw, CCCA_CURRADDR, vp->ch);
if (val >= vp->reg.loopstart)
bp = best + V_OFF;
}
map = (hw->silent_page.addr << hw->address_mode) | (hw->address_mode ? MAP_PTI_MASK1 : MAP_PTI_MASK0);
- addr = vp->reg.start + 64;
+ addr = vp->reg.start;
temp = vp->reg.parm.filterQ;
ccca = (temp << 28) | addr;
if (vp->apitch < 0xe400)
/* Q & current address (Q 4bit value, MSB) */
CCCA, ccca,
- /* cache */
- CCR, REG_VAL_PUT(CCR_CACHEINVALIDSIZE, 64),
-
/* reset volume */
VTFT, vtarget | vp->ftarget,
CVCF, vtarget | CVCF_CURRENTFILTER_MASK,
{ "10431F12", 2, INTERNAL, { CS35L41_LEFT, CS35L41_RIGHT, 0, 0 }, 0, 1, -1, 1000, 4500, 24 },
{ "10431F1F", 2, EXTERNAL, { CS35L41_LEFT, CS35L41_RIGHT, 0, 0 }, 1, -1, 0, 0, 0, 0 },
{ "10431F62", 2, EXTERNAL, { CS35L41_LEFT, CS35L41_RIGHT, 0, 0 }, 1, 2, 0, 0, 0, 0 },
+ { "10433A60", 2, INTERNAL, { CS35L41_LEFT, CS35L41_RIGHT, 0, 0 }, 1, 2, 0, 1000, 4500, 24 },
{ "17AA386F", 2, EXTERNAL, { CS35L41_LEFT, CS35L41_RIGHT, 0, 0 }, 0, -1, -1, 0, 0, 0 },
+ { "17AA3877", 2, EXTERNAL, { CS35L41_LEFT, CS35L41_RIGHT, 0, 0 }, 0, 1, -1, 0, 0, 0 },
+ { "17AA3878", 2, EXTERNAL, { CS35L41_LEFT, CS35L41_RIGHT, 0, 0 }, 0, 1, -1, 0, 0, 0 },
{ "17AA38A9", 2, EXTERNAL, { CS35L41_LEFT, CS35L41_RIGHT, 0, 0 }, 0, 2, -1, 0, 0, 0 },
{ "17AA38AB", 2, EXTERNAL, { CS35L41_LEFT, CS35L41_RIGHT, 0, 0 }, 0, 2, -1, 0, 0, 0 },
{ "17AA38B4", 2, EXTERNAL, { CS35L41_LEFT, CS35L41_RIGHT, 0, 0 }, 0, 1, -1, 0, 0, 0 },
{ "CSC3551", "10431F12", generic_dsd_config },
{ "CSC3551", "10431F1F", generic_dsd_config },
{ "CSC3551", "10431F62", generic_dsd_config },
+ { "CSC3551", "10433A60", generic_dsd_config },
{ "CSC3551", "17AA386F", generic_dsd_config },
+ { "CSC3551", "17AA3877", generic_dsd_config },
+ { "CSC3551", "17AA3878", generic_dsd_config },
{ "CSC3551", "17AA38A9", generic_dsd_config },
{ "CSC3551", "17AA38AB", generic_dsd_config },
{ "CSC3551", "17AA38B4", generic_dsd_config },
goto err;
}
- dev_dbg(cs35l56->base.dev, "DSP system name: '%s', amp name: '%s'\n",
- cs35l56->system_name, cs35l56->amp_name);
+ dev_info(cs35l56->base.dev, "DSP system name: '%s', amp name: '%s'\n",
+ cs35l56->system_name, cs35l56->amp_name);
regmap_multi_reg_write(cs35l56->base.regmap, cs35l56_hda_dai_config,
ARRAY_SIZE(cs35l56_hda_dai_config));
pm_runtime_mark_last_busy(cs35l56->base.dev);
pm_runtime_enable(cs35l56->base.dev);
+ cs35l56->base.init_done = true;
+
ret = component_add(cs35l56->base.dev, &cs35l56_hda_comp_ops);
if (ret) {
dev_err(cs35l56->base.dev, "Register component failed: %d\n", ret);
goto pm_err;
}
- cs35l56->base.init_done = true;
-
return 0;
pm_err:
{}
};
+static const struct acpi_device_id cs35l56_acpi_hda_match[] = {
+ { "CSC3554", 0 },
+ { "CSC3556", 0 },
+ { "CSC3557", 0 },
+ {}
+};
+MODULE_DEVICE_TABLE(acpi, cs35l56_acpi_hda_match);
+
static struct i2c_driver cs35l56_hda_i2c_driver = {
.driver = {
- .name = "cs35l56-hda",
- .pm = &cs35l56_hda_pm_ops,
+ .name = "cs35l56-hda",
+ .acpi_match_table = cs35l56_acpi_hda_match,
+ .pm = &cs35l56_hda_pm_ops,
},
.id_table = cs35l56_hda_i2c_id,
.probe = cs35l56_hda_i2c_probe,
{}
};
+static const struct acpi_device_id cs35l56_acpi_hda_match[] = {
+ { "CSC3554", 0 },
+ { "CSC3556", 0 },
+ { "CSC3557", 0 },
+ {}
+};
+MODULE_DEVICE_TABLE(acpi, cs35l56_acpi_hda_match);
+
static struct spi_driver cs35l56_hda_spi_driver = {
.driver = {
- .name = "cs35l56-hda",
- .pm = &cs35l56_hda_pm_ops,
+ .name = "cs35l56-hda",
+ .acpi_match_table = cs35l56_acpi_hda_match,
+ .pm = &cs35l56_hda_pm_ops,
},
.id_table = cs35l56_hda_spi_id,
.probe = cs35l56_hda_spi_probe,
comp_generic_fixup(cdc, action, "i2c", "CLSA0101", "-%s:00-cs35l41-hda.%d", 2);
}
+static void cs35l56_fixup_i2c_two(struct hda_codec *cdc, const struct hda_fixup *fix, int action)
+{
+ comp_generic_fixup(cdc, action, "i2c", "CSC3556", "-%s:00-cs35l56-hda.%d", 2);
+}
+
+static void cs35l56_fixup_i2c_four(struct hda_codec *cdc, const struct hda_fixup *fix, int action)
+{
+ comp_generic_fixup(cdc, action, "i2c", "CSC3556", "-%s:00-cs35l56-hda.%d", 4);
+}
+
+static void cs35l56_fixup_spi_two(struct hda_codec *cdc, const struct hda_fixup *fix, int action)
+{
+ comp_generic_fixup(cdc, action, "spi", "CSC3556", "-%s:00-cs35l56-hda.%d", 2);
+}
+
static void cs35l56_fixup_spi_four(struct hda_codec *cdc, const struct hda_fixup *fix, int action)
{
comp_generic_fixup(cdc, action, "spi", "CSC3556", "-%s:00-cs35l56-hda.%d", 4);
}
+static void alc285_fixup_asus_ga403u(struct hda_codec *cdc, const struct hda_fixup *fix, int action)
+{
+ /*
+ * The same SSID has been re-used in different hardware, they have
+ * different codecs and the newer GA403U has a ALC285.
+ */
+ if (cdc->core.vendor_id == 0x10ec0285)
+ cs35l56_fixup_i2c_two(cdc, fix, action);
+ else
+ alc_fixup_inv_dmic(cdc, fix, action);
+}
+
static void tas2781_fixup_i2c(struct hda_codec *cdc,
const struct hda_fixup *fix, int action)
{
ALC256_FIXUP_ACER_SFG16_MICMUTE_LED,
ALC256_FIXUP_HEADPHONE_AMP_VOL,
ALC245_FIXUP_HP_SPECTRE_X360_EU0XXX,
+ ALC285_FIXUP_CS35L56_SPI_2,
+ ALC285_FIXUP_CS35L56_I2C_2,
+ ALC285_FIXUP_CS35L56_I2C_4,
+ ALC285_FIXUP_ASUS_GA403U,
+ ALC285_FIXUP_ASUS_GA403U_HEADSET_MIC,
+ ALC285_FIXUP_ASUS_GA403U_I2C_SPEAKER2_TO_DAC1,
+ ALC285_FIXUP_ASUS_GU605_SPI_2_HEADSET_MIC,
+ ALC285_FIXUP_ASUS_GU605_SPI_SPEAKER2_TO_DAC1
};
/* A special fixup for Lenovo C940 and Yoga Duet 7;
.type = HDA_FIXUP_FUNC,
.v.func = alc245_fixup_hp_spectre_x360_eu0xxx,
},
+ [ALC285_FIXUP_CS35L56_SPI_2] = {
+ .type = HDA_FIXUP_FUNC,
+ .v.func = cs35l56_fixup_spi_two,
+ },
+ [ALC285_FIXUP_CS35L56_I2C_2] = {
+ .type = HDA_FIXUP_FUNC,
+ .v.func = cs35l56_fixup_i2c_two,
+ },
+ [ALC285_FIXUP_CS35L56_I2C_4] = {
+ .type = HDA_FIXUP_FUNC,
+ .v.func = cs35l56_fixup_i2c_four,
+ },
+ [ALC285_FIXUP_ASUS_GA403U] = {
+ .type = HDA_FIXUP_FUNC,
+ .v.func = alc285_fixup_asus_ga403u,
+ },
+ [ALC285_FIXUP_ASUS_GA403U_HEADSET_MIC] = {
+ .type = HDA_FIXUP_PINS,
+ .v.pins = (const struct hda_pintbl[]) {
+ { 0x19, 0x03a11050 },
+ { 0x1b, 0x03a11c30 },
+ { }
+ },
+ .chained = true,
+ .chain_id = ALC285_FIXUP_ASUS_GA403U_I2C_SPEAKER2_TO_DAC1
+ },
+ [ALC285_FIXUP_ASUS_GU605_SPI_SPEAKER2_TO_DAC1] = {
+ .type = HDA_FIXUP_FUNC,
+ .v.func = alc285_fixup_speaker2_to_dac1,
+ .chained = true,
+ .chain_id = ALC285_FIXUP_ASUS_GU605_SPI_2_HEADSET_MIC,
+ },
+ [ALC285_FIXUP_ASUS_GU605_SPI_2_HEADSET_MIC] = {
+ .type = HDA_FIXUP_PINS,
+ .v.pins = (const struct hda_pintbl[]) {
+ { 0x19, 0x03a11050 },
+ { 0x1b, 0x03a11c30 },
+ { }
+ },
+ .chained = true,
+ .chain_id = ALC285_FIXUP_CS35L56_SPI_2
+ },
+ [ALC285_FIXUP_ASUS_GA403U_I2C_SPEAKER2_TO_DAC1] = {
+ .type = HDA_FIXUP_FUNC,
+ .v.func = alc285_fixup_speaker2_to_dac1,
+ .chained = true,
+ .chain_id = ALC285_FIXUP_ASUS_GA403U,
+ },
};
static const struct snd_pci_quirk alc269_fixup_tbl[] = {
SND_PCI_QUIRK(0x103c, 0x8ca7, "HP ZBook Fury", ALC245_FIXUP_CS35L41_SPI_2_HP_GPIO_LED),
SND_PCI_QUIRK(0x103c, 0x8cdd, "HP Spectre", ALC287_FIXUP_CS35L41_I2C_2),
SND_PCI_QUIRK(0x103c, 0x8cde, "HP Spectre", ALC287_FIXUP_CS35L41_I2C_2),
+ SND_PCI_QUIRK(0x103c, 0x8cdf, "HP SnowWhite", ALC287_FIXUP_CS35L41_I2C_2_HP_GPIO_LED),
+ SND_PCI_QUIRK(0x103c, 0x8ce0, "HP SnowWhite", ALC287_FIXUP_CS35L41_I2C_2_HP_GPIO_LED),
SND_PCI_QUIRK(0x103c, 0x8cf5, "HP ZBook Studio 16", ALC245_FIXUP_CS35L41_SPI_4_HP_GPIO_LED),
SND_PCI_QUIRK(0x1043, 0x103e, "ASUS X540SA", ALC256_FIXUP_ASUS_MIC),
SND_PCI_QUIRK(0x1043, 0x103f, "ASUS TX300", ALC282_FIXUP_ASUS_TX300),
SND_PCI_QUIRK(0x1043, 0x1a83, "ASUS UM5302LA", ALC294_FIXUP_CS35L41_I2C_2),
SND_PCI_QUIRK(0x1043, 0x1a8f, "ASUS UX582ZS", ALC245_FIXUP_CS35L41_SPI_2),
SND_PCI_QUIRK(0x1043, 0x1b11, "ASUS UX431DA", ALC294_FIXUP_ASUS_COEF_1B),
- SND_PCI_QUIRK(0x1043, 0x1b13, "Asus U41SV", ALC269_FIXUP_INV_DMIC),
+ SND_PCI_QUIRK(0x1043, 0x1b13, "ASUS U41SV/GA403U", ALC285_FIXUP_ASUS_GA403U_HEADSET_MIC),
SND_PCI_QUIRK(0x1043, 0x1b93, "ASUS G614JVR/JIR", ALC245_FIXUP_CS35L41_SPI_2),
SND_PCI_QUIRK(0x1043, 0x1bbd, "ASUS Z550MA", ALC255_FIXUP_ASUS_MIC_NO_PRESENCE),
SND_PCI_QUIRK(0x1043, 0x1c03, "ASUS UM3406HA", ALC287_FIXUP_CS35L41_I2C_2),
SND_PCI_QUIRK(0x1043, 0x1c33, "ASUS UX5304MA", ALC245_FIXUP_CS35L41_SPI_2),
SND_PCI_QUIRK(0x1043, 0x1c43, "ASUS UX8406MA", ALC245_FIXUP_CS35L41_SPI_2),
SND_PCI_QUIRK(0x1043, 0x1c62, "ASUS GU603", ALC289_FIXUP_ASUS_GA401),
+ SND_PCI_QUIRK(0x1043, 0x1c63, "ASUS GU605M", ALC285_FIXUP_ASUS_GU605_SPI_SPEAKER2_TO_DAC1),
SND_PCI_QUIRK(0x1043, 0x1c92, "ASUS ROG Strix G15", ALC285_FIXUP_ASUS_G533Z_PINS),
SND_PCI_QUIRK(0x1043, 0x1c9f, "ASUS G614JU/JV/JI", ALC285_FIXUP_ASUS_HEADSET_MIC),
SND_PCI_QUIRK(0x1043, 0x1caf, "ASUS G634JY/JZ/JI/JG", ALC285_FIXUP_ASUS_SPI_REAR_SPEAKERS),
SND_PCI_QUIRK(0x1043, 0x1d42, "ASUS Zephyrus G14 2022", ALC289_FIXUP_ASUS_GA401),
SND_PCI_QUIRK(0x1043, 0x1d4e, "ASUS TM420", ALC256_FIXUP_ASUS_HPE),
SND_PCI_QUIRK(0x1043, 0x1da2, "ASUS UP6502ZA/ZD", ALC245_FIXUP_CS35L41_SPI_2),
+ SND_PCI_QUIRK(0x1043, 0x1df3, "ASUS UM5606", ALC285_FIXUP_CS35L56_I2C_4),
SND_PCI_QUIRK(0x1043, 0x1e02, "ASUS UX3402ZA", ALC245_FIXUP_CS35L41_SPI_2),
SND_PCI_QUIRK(0x1043, 0x1e11, "ASUS Zephyrus G15", ALC289_FIXUP_ASUS_GA502),
SND_PCI_QUIRK(0x1043, 0x1e12, "ASUS UM3402", ALC287_FIXUP_CS35L41_I2C_2),
SND_PCI_QUIRK(0x1043, 0x1e51, "ASUS Zephyrus M15", ALC294_FIXUP_ASUS_GU502_PINS),
SND_PCI_QUIRK(0x1043, 0x1e5e, "ASUS ROG Strix G513", ALC294_FIXUP_ASUS_G513_PINS),
+ SND_PCI_QUIRK(0x1043, 0x1e63, "ASUS H7606W", ALC285_FIXUP_CS35L56_I2C_2),
+ SND_PCI_QUIRK(0x1043, 0x1e83, "ASUS GA605W", ALC285_FIXUP_CS35L56_I2C_2),
SND_PCI_QUIRK(0x1043, 0x1e8e, "ASUS Zephyrus G15", ALC289_FIXUP_ASUS_GA401),
SND_PCI_QUIRK(0x1043, 0x1ee2, "ASUS UM6702RA/RC", ALC287_FIXUP_CS35L41_I2C_2),
SND_PCI_QUIRK(0x1043, 0x1c52, "ASUS Zephyrus G15 2022", ALC289_FIXUP_ASUS_GA401),
SND_PCI_QUIRK(0x1043, 0x3a30, "ASUS G814JVR/JIR", ALC245_FIXUP_CS35L41_SPI_2),
SND_PCI_QUIRK(0x1043, 0x3a40, "ASUS G814JZR", ALC245_FIXUP_CS35L41_SPI_2),
SND_PCI_QUIRK(0x1043, 0x3a50, "ASUS G834JYR/JZR", ALC245_FIXUP_CS35L41_SPI_2),
- SND_PCI_QUIRK(0x1043, 0x3a60, "ASUS G634JYR/JZR", ALC245_FIXUP_CS35L41_SPI_2),
+ SND_PCI_QUIRK(0x1043, 0x3a60, "ASUS G634JYR/JZR", ALC285_FIXUP_ASUS_SPI_REAR_SPEAKERS),
SND_PCI_QUIRK(0x1043, 0x831a, "ASUS P901", ALC269_FIXUP_STEREO_DMIC),
SND_PCI_QUIRK(0x1043, 0x834a, "ASUS S101", ALC269_FIXUP_STEREO_DMIC),
SND_PCI_QUIRK(0x1043, 0x8398, "ASUS P1005", ALC269_FIXUP_STEREO_DMIC),
SND_PCI_QUIRK(0x10ec, 0x1254, "Intel Reference board", ALC295_FIXUP_CHROME_BOOK),
SND_PCI_QUIRK(0x10ec, 0x12cc, "Intel Reference board", ALC295_FIXUP_CHROME_BOOK),
SND_PCI_QUIRK(0x10ec, 0x12f6, "Intel Reference board", ALC295_FIXUP_CHROME_BOOK),
- SND_PCI_QUIRK(0x10f7, 0x8338, "Panasonic CF-SZ6", ALC269_FIXUP_HEADSET_MODE),
+ SND_PCI_QUIRK(0x10f7, 0x8338, "Panasonic CF-SZ6", ALC269_FIXUP_ASPIRE_HEADSET_MIC),
SND_PCI_QUIRK(0x144d, 0xc109, "Samsung Ativ book 9 (NP900X3G)", ALC269_FIXUP_INV_DMIC),
SND_PCI_QUIRK(0x144d, 0xc169, "Samsung Notebook 9 Pen (NP930SBE-K01US)", ALC298_FIXUP_SAMSUNG_AMP),
SND_PCI_QUIRK(0x144d, 0xc176, "Samsung Notebook 9 Pro (NP930MBE-K04US)", ALC298_FIXUP_SAMSUNG_AMP),
SND_PCI_QUIRK(0x1462, 0xb120, "MSI Cubi MS-B120", ALC283_FIXUP_HEADSET_MIC),
SND_PCI_QUIRK(0x1462, 0xb171, "Cubi N 8GL (MS-B171)", ALC283_FIXUP_HEADSET_MIC),
SND_PCI_QUIRK(0x152d, 0x1082, "Quanta NL3", ALC269_FIXUP_LIFEBOOK),
+ SND_PCI_QUIRK(0x152d, 0x1262, "Huawei NBLB-WAX9N", ALC2XX_FIXUP_HEADSET_MIC),
SND_PCI_QUIRK(0x1558, 0x0353, "Clevo V35[05]SN[CDE]Q", ALC256_FIXUP_SYSTEM76_MIC_NO_PRESENCE),
SND_PCI_QUIRK(0x1558, 0x1323, "Clevo N130ZU", ALC293_FIXUP_SYSTEM76_MIC_NO_PRESENCE),
SND_PCI_QUIRK(0x1558, 0x1325, "Clevo N15[01][CW]U", ALC293_FIXUP_SYSTEM76_MIC_NO_PRESENCE),
SND_PCI_QUIRK(0x17aa, 0x222e, "Thinkpad", ALC298_FIXUP_TPT470_DOCK),
SND_PCI_QUIRK(0x17aa, 0x2231, "Thinkpad T560", ALC292_FIXUP_TPT460),
SND_PCI_QUIRK(0x17aa, 0x2233, "Thinkpad", ALC292_FIXUP_TPT460),
+ SND_PCI_QUIRK(0x17aa, 0x2234, "Thinkpad ICE-1", ALC287_FIXUP_TAS2781_I2C),
SND_PCI_QUIRK(0x17aa, 0x2245, "Thinkpad T470", ALC298_FIXUP_TPT470_DOCK),
SND_PCI_QUIRK(0x17aa, 0x2246, "Thinkpad", ALC298_FIXUP_TPT470_DOCK),
SND_PCI_QUIRK(0x17aa, 0x2247, "Thinkpad", ALC298_FIXUP_TPT470_DOCK),
SND_PCI_QUIRK(0x17aa, 0x3869, "Lenovo Yoga7 14IAL7", ALC287_FIXUP_YOGA9_14IAP7_BASS_SPK_PIN),
SND_PCI_QUIRK(0x17aa, 0x386f, "Legion 7i 16IAX7", ALC287_FIXUP_CS35L41_I2C_2),
SND_PCI_QUIRK(0x17aa, 0x3870, "Lenovo Yoga 7 14ARB7", ALC287_FIXUP_YOGA7_14ARB7_I2C),
+ SND_PCI_QUIRK(0x17aa, 0x3877, "Lenovo Legion 7 Slim 16ARHA7", ALC287_FIXUP_CS35L41_I2C_2),
+ SND_PCI_QUIRK(0x17aa, 0x3878, "Lenovo Legion 7 Slim 16ARHA7", ALC287_FIXUP_CS35L41_I2C_2),
SND_PCI_QUIRK(0x17aa, 0x387d, "Yoga S780-16 pro Quad AAC", ALC287_FIXUP_TAS2781_I2C),
SND_PCI_QUIRK(0x17aa, 0x387e, "Yoga S780-16 pro Quad YC", ALC287_FIXUP_TAS2781_I2C),
SND_PCI_QUIRK(0x17aa, 0x3881, "YB9 dual power mode2 YC", ALC287_FIXUP_TAS2781_I2C),
SND_PCI_QUIRK(0x17aa, 0x3886, "Y780 VECO DUAL", ALC287_FIXUP_TAS2781_I2C),
SND_PCI_QUIRK(0x17aa, 0x38a7, "Y780P AMD YG dual", ALC287_FIXUP_TAS2781_I2C),
SND_PCI_QUIRK(0x17aa, 0x38a8, "Y780P AMD VECO dual", ALC287_FIXUP_TAS2781_I2C),
- SND_PCI_QUIRK(0x17aa, 0x38a9, "Thinkbook 16P", ALC287_FIXUP_CS35L41_I2C_2),
- SND_PCI_QUIRK(0x17aa, 0x38ab, "Thinkbook 16P", ALC287_FIXUP_CS35L41_I2C_2),
+ SND_PCI_QUIRK(0x17aa, 0x38a9, "Thinkbook 16P", ALC287_FIXUP_MG_RTKC_CSAMP_CS35L41_I2C_THINKPAD),
+ SND_PCI_QUIRK(0x17aa, 0x38ab, "Thinkbook 16P", ALC287_FIXUP_MG_RTKC_CSAMP_CS35L41_I2C_THINKPAD),
SND_PCI_QUIRK(0x17aa, 0x38b4, "Legion Slim 7 16IRH8", ALC287_FIXUP_CS35L41_I2C_2),
SND_PCI_QUIRK(0x17aa, 0x38b5, "Legion Slim 7 16IRH8", ALC287_FIXUP_CS35L41_I2C_2),
SND_PCI_QUIRK(0x17aa, 0x38b6, "Legion Slim 7 16APH8", ALC287_FIXUP_CS35L41_I2C_2),
SND_PCI_QUIRK(0x1d05, 0x1147, "TongFang GMxTGxx", ALC269_FIXUP_NO_SHUTUP),
SND_PCI_QUIRK(0x1d05, 0x115c, "TongFang GMxTGxx", ALC269_FIXUP_NO_SHUTUP),
SND_PCI_QUIRK(0x1d05, 0x121b, "TongFang GMxAGxx", ALC269_FIXUP_NO_SHUTUP),
+ SND_PCI_QUIRK(0x1d05, 0x1387, "TongFang GMxIXxx", ALC2XX_FIXUP_HEADSET_MIC),
+ SND_PCI_QUIRK(0x1d17, 0x3288, "Haier Boyue G42", ALC269VC_FIXUP_ACER_VCOPPERBOX_PINS),
SND_PCI_QUIRK(0x1d72, 0x1602, "RedmiBook", ALC255_FIXUP_XIAOMI_HEADSET_MIC),
SND_PCI_QUIRK(0x1d72, 0x1701, "XiaomiNotebook Pro", ALC298_FIXUP_DELL1_MIC_NO_PRESENCE),
SND_PCI_QUIRK(0x1d72, 0x1901, "RedmiBook 14", ALC256_FIXUP_ASUS_HEADSET_MIC),
struct snd_kcontrol *dsp_prog_ctl;
struct snd_kcontrol *dsp_conf_ctl;
struct snd_kcontrol *prof_ctl;
- struct snd_kcontrol *snd_ctls[3];
+ struct snd_kcontrol *snd_ctls[2];
};
static int tas2781_get_i2c_res(struct acpi_resource *ares, void *data)
pm_runtime_put_autosuspend(dev);
break;
default:
- dev_dbg(tas_hda->dev, "Playback action not supported: %d\n",
- action);
break;
}
}
{
struct tasdevice_priv *tas_priv = snd_kcontrol_chip(kcontrol);
+ mutex_lock(&tas_priv->codec_lock);
+
ucontrol->value.integer.value[0] = tas_priv->rcabin.profile_cfg_id;
+ dev_dbg(tas_priv->dev, "%s: kcontrol %s: %d\n",
+ __func__, kcontrol->id.name, tas_priv->rcabin.profile_cfg_id);
+
+ mutex_unlock(&tas_priv->codec_lock);
+
return 0;
}
val = clamp(nr_profile, 0, max);
+ mutex_lock(&tas_priv->codec_lock);
+
+ dev_dbg(tas_priv->dev, "%s: kcontrol %s: %d -> %d\n",
+ __func__, kcontrol->id.name,
+ tas_priv->rcabin.profile_cfg_id, val);
+
if (tas_priv->rcabin.profile_cfg_id != val) {
tas_priv->rcabin.profile_cfg_id = val;
ret = 1;
}
+ mutex_unlock(&tas_priv->codec_lock);
+
return ret;
}
{
struct tasdevice_priv *tas_priv = snd_kcontrol_chip(kcontrol);
+ mutex_lock(&tas_priv->codec_lock);
+
ucontrol->value.integer.value[0] = tas_priv->cur_prog;
+ dev_dbg(tas_priv->dev, "%s: kcontrol %s: %d\n",
+ __func__, kcontrol->id.name, tas_priv->cur_prog);
+
+ mutex_unlock(&tas_priv->codec_lock);
+
return 0;
}
val = clamp(nr_program, 0, max);
+ mutex_lock(&tas_priv->codec_lock);
+
+ dev_dbg(tas_priv->dev, "%s: kcontrol %s: %d -> %d\n",
+ __func__, kcontrol->id.name, tas_priv->cur_prog, val);
+
if (tas_priv->cur_prog != val) {
tas_priv->cur_prog = val;
ret = 1;
}
+ mutex_unlock(&tas_priv->codec_lock);
+
return ret;
}
{
struct tasdevice_priv *tas_priv = snd_kcontrol_chip(kcontrol);
+ mutex_lock(&tas_priv->codec_lock);
+
ucontrol->value.integer.value[0] = tas_priv->cur_conf;
+ dev_dbg(tas_priv->dev, "%s: kcontrol %s: %d\n",
+ __func__, kcontrol->id.name, tas_priv->cur_conf);
+
+ mutex_unlock(&tas_priv->codec_lock);
+
return 0;
}
val = clamp(nr_config, 0, max);
+ mutex_lock(&tas_priv->codec_lock);
+
+ dev_dbg(tas_priv->dev, "%s: kcontrol %s: %d -> %d\n",
+ __func__, kcontrol->id.name, tas_priv->cur_conf, val);
+
if (tas_priv->cur_conf != val) {
tas_priv->cur_conf = val;
ret = 1;
}
+ mutex_unlock(&tas_priv->codec_lock);
+
return ret;
}
-/*
- * tas2781_digital_getvol - get the volum control
- * @kcontrol: control pointer
- * @ucontrol: User data
- * Customer Kcontrol for tas2781 is primarily for regmap booking, paging
- * depends on internal regmap mechanism.
- * tas2781 contains book and page two-level register map, especially
- * book switching will set the register BXXP00R7F, after switching to the
- * correct book, then leverage the mechanism for paging to access the
- * register.
- */
-static int tas2781_digital_getvol(struct snd_kcontrol *kcontrol,
+static int tas2781_amp_getvol(struct snd_kcontrol *kcontrol,
struct snd_ctl_elem_value *ucontrol)
{
struct tasdevice_priv *tas_priv = snd_kcontrol_chip(kcontrol);
struct soc_mixer_control *mc =
(struct soc_mixer_control *)kcontrol->private_value;
+ int ret;
- return tasdevice_digital_getvol(tas_priv, ucontrol, mc);
-}
+ mutex_lock(&tas_priv->codec_lock);
-static int tas2781_amp_getvol(struct snd_kcontrol *kcontrol,
- struct snd_ctl_elem_value *ucontrol)
-{
- struct tasdevice_priv *tas_priv = snd_kcontrol_chip(kcontrol);
- struct soc_mixer_control *mc =
- (struct soc_mixer_control *)kcontrol->private_value;
+ ret = tasdevice_amp_getvol(tas_priv, ucontrol, mc);
- return tasdevice_amp_getvol(tas_priv, ucontrol, mc);
-}
+ dev_dbg(tas_priv->dev, "%s: kcontrol %s: %ld\n",
+ __func__, kcontrol->id.name, ucontrol->value.integer.value[0]);
-static int tas2781_digital_putvol(struct snd_kcontrol *kcontrol,
- struct snd_ctl_elem_value *ucontrol)
-{
- struct tasdevice_priv *tas_priv = snd_kcontrol_chip(kcontrol);
- struct soc_mixer_control *mc =
- (struct soc_mixer_control *)kcontrol->private_value;
+ mutex_unlock(&tas_priv->codec_lock);
- /* The check of the given value is in tasdevice_digital_putvol. */
- return tasdevice_digital_putvol(tas_priv, ucontrol, mc);
+ return ret;
}
static int tas2781_amp_putvol(struct snd_kcontrol *kcontrol,
struct tasdevice_priv *tas_priv = snd_kcontrol_chip(kcontrol);
struct soc_mixer_control *mc =
(struct soc_mixer_control *)kcontrol->private_value;
+ int ret;
+
+ mutex_lock(&tas_priv->codec_lock);
+
+ dev_dbg(tas_priv->dev, "%s: kcontrol %s: -> %ld\n",
+ __func__, kcontrol->id.name, ucontrol->value.integer.value[0]);
/* The check of the given value is in tasdevice_amp_putvol. */
- return tasdevice_amp_putvol(tas_priv, ucontrol, mc);
+ ret = tasdevice_amp_putvol(tas_priv, ucontrol, mc);
+
+ mutex_unlock(&tas_priv->codec_lock);
+
+ return ret;
}
static int tas2781_force_fwload_get(struct snd_kcontrol *kcontrol,
{
struct tasdevice_priv *tas_priv = snd_kcontrol_chip(kcontrol);
+ mutex_lock(&tas_priv->codec_lock);
+
ucontrol->value.integer.value[0] = (int)tas_priv->force_fwload_status;
- dev_dbg(tas_priv->dev, "%s : Force FWload %s\n", __func__,
- tas_priv->force_fwload_status ? "ON" : "OFF");
+ dev_dbg(tas_priv->dev, "%s: kcontrol %s: %d\n",
+ __func__, kcontrol->id.name, tas_priv->force_fwload_status);
+
+ mutex_unlock(&tas_priv->codec_lock);
return 0;
}
struct tasdevice_priv *tas_priv = snd_kcontrol_chip(kcontrol);
bool change, val = (bool)ucontrol->value.integer.value[0];
+ mutex_lock(&tas_priv->codec_lock);
+
+ dev_dbg(tas_priv->dev, "%s: kcontrol %s: %d -> %d\n",
+ __func__, kcontrol->id.name,
+ tas_priv->force_fwload_status, val);
+
if (tas_priv->force_fwload_status == val)
change = false;
else {
change = true;
tas_priv->force_fwload_status = val;
}
- dev_dbg(tas_priv->dev, "%s : Force FWload %s\n", __func__,
- tas_priv->force_fwload_status ? "ON" : "OFF");
+
+ mutex_unlock(&tas_priv->codec_lock);
return change;
}
ACARD_SINGLE_RANGE_EXT_TLV("Speaker Analog Gain", TAS2781_AMP_LEVEL,
1, 0, 20, 0, tas2781_amp_getvol,
tas2781_amp_putvol, amp_vol_tlv),
- ACARD_SINGLE_RANGE_EXT_TLV("Speaker Digital Gain", TAS2781_DVC_LVL,
- 0, 0, 200, 1, tas2781_digital_getvol,
- tas2781_digital_putvol, dvc_tlv),
ACARD_SINGLE_BOOL_EXT("Speaker Force Firmware Load", 0,
tas2781_force_fwload_get, tas2781_force_fwload_put),
};
static void tas2781_apply_calib(struct tasdevice_priv *tas_priv)
{
static const unsigned char page_array[CALIB_MAX] = {
- 0x17, 0x18, 0x18, 0x0d, 0x18
+ 0x17, 0x18, 0x18, 0x13, 0x18,
};
static const unsigned char rgno_array[CALIB_MAX] = {
- 0x74, 0x0c, 0x14, 0x3c, 0x7c
+ 0x74, 0x0c, 0x14, 0x70, 0x7c,
};
unsigned char *data;
int i, j, rc;
dreamcastcard->clicks++;
if (unlikely(dreamcastcard->clicks >= AICA_PERIOD_NUMBER))
dreamcastcard->clicks %= AICA_PERIOD_NUMBER;
- mod_timer(&dreamcastcard->timer, jiffies + 1);
+ if (snd_pcm_running(dreamcastcard->substream))
+ mod_timer(&dreamcastcard->timer, jiffies + 1);
}
}
/*timer function - so cannot sleep */
int play_period;
struct snd_pcm_runtime *runtime;
+ if (!snd_pcm_running(substream))
+ return;
runtime = substream->runtime;
dreamcastcard = substream->pcm->private_data;
/* Have we played out an additional period? */
return 0;
}
+static int snd_aicapcm_pcm_sync_stop(struct snd_pcm_substream *substream)
+{
+ struct snd_card_aica *dreamcastcard = substream->pcm->private_data;
+
+ del_timer_sync(&dreamcastcard->timer);
+ cancel_work_sync(&dreamcastcard->spu_dma_work);
+ return 0;
+}
+
static int snd_aicapcm_pcm_close(struct snd_pcm_substream
*substream)
{
struct snd_card_aica *dreamcastcard = substream->pcm->private_data;
- flush_work(&(dreamcastcard->spu_dma_work));
- del_timer(&dreamcastcard->timer);
dreamcastcard->substream = NULL;
kfree(dreamcastcard->channel);
spu_disable();
.prepare = snd_aicapcm_pcm_prepare,
.trigger = snd_aicapcm_pcm_trigger,
.pointer = snd_aicapcm_pcm_pointer,
+ .sync_stop = snd_aicapcm_pcm_sync_stop,
};
/* TO DO: set up to handle more than one pcm instance */
goto unregister_dmic_dev;
}
- acp_init(chip);
+ ret = acp_init(chip);
+ if (ret)
+ goto unregister_dmic_dev;
+
res = devm_kcalloc(&pci->dev, num_res, sizeof(struct resource), GFP_KERNEL);
if (!res) {
ret = -ENOMEM;
}
}
- if (flag == FLAG_AMD_LEGACY_ONLY_DMIC) {
- ret = check_acp_pdm(pci, chip);
- if (ret < 0)
- goto skip_pdev_creation;
- }
+ ret = check_acp_pdm(pci, chip);
+ if (ret < 0)
+ goto skip_pdev_creation;
chip->flag = flag;
memset(&pdevinfo, 0, sizeof(pdevinfo));
dev_dbg(dsp->dev, "Calibration: Ambient=%#x, Status=%#x, CalR=%d\n",
data->calAmbient, data->calStatus, data->calR);
+ if (list_empty(&dsp->ctl_list)) {
+ dev_info(dsp->dev, "Calibration disabled due to missing firmware controls\n");
+ return -ENOENT;
+ }
+
ret = cs_amp_write_cal_coeff(dsp, controls, controls->ambient, data->calAmbient);
if (ret)
return ret;
static int cs42l43_codec_suspend(struct device *dev)
{
- struct cs42l43 *cs42l43 = dev_get_drvdata(dev);
+ struct cs42l43_codec *priv = dev_get_drvdata(dev);
+ struct cs42l43 *cs42l43 = priv->core;
disable_irq(cs42l43->irq);
static int cs42l43_codec_suspend_noirq(struct device *dev)
{
- struct cs42l43 *cs42l43 = dev_get_drvdata(dev);
+ struct cs42l43_codec *priv = dev_get_drvdata(dev);
+ struct cs42l43 *cs42l43 = priv->core;
enable_irq(cs42l43->irq);
static int cs42l43_codec_resume(struct device *dev)
{
- struct cs42l43 *cs42l43 = dev_get_drvdata(dev);
+ struct cs42l43_codec *priv = dev_get_drvdata(dev);
+ struct cs42l43 *cs42l43 = priv->core;
enable_irq(cs42l43->irq);
static int cs42l43_codec_resume_noirq(struct device *dev)
{
- struct cs42l43 *cs42l43 = dev_get_drvdata(dev);
+ struct cs42l43_codec *priv = dev_get_drvdata(dev);
+ struct cs42l43 *cs42l43 = priv->core;
disable_irq(cs42l43->irq);
{125, 48000, 6000000, 0x04, 0x04, 0x1F, 0x2D, 0x8A, 0x0A, 0x27, 0x27},
{128, 8000, 1024000, 0x60, 0x00, 0x05, 0x75, 0x8A, 0x1B, 0x1F, 0x7F},
- {128, 16000, 2048000, 0x20, 0x00, 0x31, 0x35, 0x8A, 0x1B, 0x1F, 0x3F},
- {128, 44100, 5644800, 0xE0, 0x00, 0x01, 0x2D, 0xCA, 0x0A, 0x1F, 0x1F},
- {128, 48000, 6144000, 0xE0, 0x00, 0x01, 0x2D, 0xCA, 0x0A, 0x1F, 0x1F},
+ {128, 16000, 2048000, 0x20, 0x00, 0x31, 0x35, 0x08, 0x19, 0x1F, 0x3F},
+ {128, 44100, 5644800, 0xE0, 0x00, 0x01, 0x2D, 0x48, 0x08, 0x1F, 0x1F},
+ {128, 48000, 6144000, 0xE0, 0x00, 0x01, 0x2D, 0x48, 0x08, 0x1F, 0x1F},
{144, 8000, 1152000, 0x20, 0x00, 0x03, 0x35, 0x8A, 0x1B, 0x23, 0x47},
{144, 16000, 2304000, 0x20, 0x00, 0x11, 0x35, 0x8A, 0x1B, 0x23, 0x47},
{192, 8000, 1536000, 0x60, 0x02, 0x0D, 0x75, 0x8A, 0x1B, 0x1F, 0x7F},
{200, 48000, 9600000, 0x04, 0x04, 0x0F, 0x2D, 0xCA, 0x0A, 0x1F, 0x1F},
{250, 48000, 12000000, 0x04, 0x04, 0x0F, 0x2D, 0xCA, 0x0A, 0x27, 0x27},
- {256, 8000, 2048000, 0x60, 0x00, 0x31, 0x35, 0x8A, 0x1B, 0x1F, 0x7F},
- {256, 16000, 4096000, 0x20, 0x00, 0x01, 0x35, 0x8A, 0x1B, 0x1F, 0x3F},
- {256, 44100, 11289600, 0xE0, 0x00, 0x30, 0x2D, 0xCA, 0x0A, 0x1F, 0x1F},
- {256, 48000, 12288000, 0xE0, 0x00, 0x30, 0x2D, 0xCA, 0x0A, 0x1F, 0x1F},
+ {256, 8000, 2048000, 0x60, 0x00, 0x31, 0x35, 0x08, 0x19, 0x1F, 0x7F},
+ {256, 16000, 4096000, 0x20, 0x00, 0x01, 0x35, 0x08, 0x19, 0x1F, 0x3F},
+ {256, 44100, 11289600, 0xE0, 0x01, 0x01, 0x2D, 0x48, 0x08, 0x1F, 0x1F},
+ {256, 48000, 12288000, 0xE0, 0x01, 0x01, 0x2D, 0x48, 0x08, 0x1F, 0x1F},
{288, 8000, 2304000, 0x20, 0x00, 0x01, 0x35, 0x8A, 0x1B, 0x23, 0x47},
{384, 8000, 3072000, 0x60, 0x02, 0x05, 0x75, 0x8A, 0x1B, 0x1F, 0x7F},
{384, 16000, 6144000, 0x20, 0x02, 0x03, 0x35, 0x8A, 0x1B, 0x1F, 0x3F},
{400, 48000, 19200000, 0xE4, 0x04, 0x35, 0x6d, 0xCA, 0x0A, 0x1F, 0x1F},
{500, 48000, 24000000, 0xF8, 0x04, 0x3F, 0x6D, 0xCA, 0x0A, 0x1F, 0x1F},
- {512, 8000, 4096000, 0x60, 0x00, 0x01, 0x35, 0x8A, 0x1B, 0x1F, 0x7F},
- {512, 16000, 8192000, 0x20, 0x00, 0x30, 0x35, 0x8A, 0x1B, 0x1F, 0x3F},
- {512, 44100, 22579200, 0xE0, 0x00, 0x00, 0x2D, 0xCA, 0x0A, 0x1F, 0x1F},
- {512, 48000, 24576000, 0xE0, 0x00, 0x00, 0x2D, 0xCA, 0x0A, 0x1F, 0x1F},
+ {512, 8000, 4096000, 0x60, 0x00, 0x01, 0x08, 0x19, 0x1B, 0x1F, 0x7F},
+ {512, 16000, 8192000, 0x20, 0x00, 0x30, 0x35, 0x08, 0x19, 0x1F, 0x3F},
+ {512, 44100, 22579200, 0xE0, 0x00, 0x00, 0x2D, 0x48, 0x08, 0x1F, 0x1F},
+ {512, 48000, 24576000, 0xE0, 0x00, 0x00, 0x2D, 0x48, 0x08, 0x1F, 0x1F},
{768, 8000, 6144000, 0x60, 0x02, 0x11, 0x35, 0x8A, 0x1B, 0x1F, 0x7F},
{768, 16000, 12288000, 0x20, 0x02, 0x01, 0x35, 0x8A, 0x1B, 0x1F, 0x3F},
{768, 32000, 24576000, 0xE0, 0x02, 0x30, 0x2D, 0xCA, 0x0A, 0x1F, 0x1F},
dev_dbg(comp->dev, "Report hp remove event\n");
snd_soc_jack_report(es8326->jack, 0, SND_JACK_HEADSET);
/* mute adc when mic path switch */
- regmap_write(es8326->regmap, ES8326_ADC_SCALE, 0x33);
regmap_write(es8326->regmap, ES8326_ADC1_SRC, 0x44);
regmap_write(es8326->regmap, ES8326_ADC2_SRC, 0x66);
es8326->hp = 0;
regmap_update_bits(es8326->regmap, ES8326_HPDET_TYPE, 0x03, 0x01);
regmap_write(es8326->regmap, ES8326_SYS_BIAS, 0x0a);
regmap_update_bits(es8326->regmap, ES8326_HP_DRIVER_REF, 0x0f, 0x03);
+ regmap_write(es8326->regmap, ES8326_INT_SOURCE, ES8326_INT_SRC_PIN9);
/*
* Inverted HPJACK_POL bit to trigger one IRQ to double check HP Removal event
*/
* set auto-check mode, then restart jack_detect_work after 400ms.
* Don't report jack status.
*/
+ regmap_write(es8326->regmap, ES8326_INT_SOURCE,
+ (ES8326_INT_SRC_PIN9 | ES8326_INT_SRC_BUTTON));
regmap_update_bits(es8326->regmap, ES8326_HPDET_TYPE, 0x03, 0x01);
es8326_enable_micbias(es8326->component);
usleep_range(50000, 70000);
snd_soc_jack_report(es8326->jack,
SND_JACK_HEADSET, SND_JACK_HEADSET);
- regmap_write(es8326->regmap, ES8326_ADC_SCALE, 0x33);
regmap_update_bits(es8326->regmap, ES8326_PGA_PDN,
0x08, 0x08);
regmap_update_bits(es8326->regmap, ES8326_PGAGAIN,
regmap_write(es8326->regmap, ES8326_VMIDSEL, 0x0E);
regmap_write(es8326->regmap, ES8326_ANA_LP, 0xf0);
usleep_range(10000, 15000);
- regmap_write(es8326->regmap, ES8326_HPJACK_TIMER, 0xe9);
+ regmap_write(es8326->regmap, ES8326_HPJACK_TIMER, 0xd9);
regmap_write(es8326->regmap, ES8326_ANA_MICBIAS, 0xcb);
/* set headphone default type and detect pin */
regmap_write(es8326->regmap, ES8326_HPDET_TYPE, 0x83);
es8326_enable_micbias(es8326->component);
usleep_range(50000, 70000);
regmap_update_bits(es8326->regmap, ES8326_HPDET_TYPE, 0x03, 0x00);
- regmap_write(es8326->regmap, ES8326_INT_SOURCE,
- (ES8326_INT_SRC_PIN9 | ES8326_INT_SRC_BUTTON));
+ regmap_write(es8326->regmap, ES8326_INT_SOURCE, ES8326_INT_SRC_PIN9);
regmap_write(es8326->regmap, ES8326_INTOUT_IO,
es8326->interrupt_clk);
regmap_write(es8326->regmap, ES8326_SDINOUT1_IO,
es8326->hp = 0;
es8326->hpl_vol = 0x03;
es8326->hpr_vol = 0x03;
+
+ es8326_irq(es8326->irq, es8326);
return 0;
}
cancel_delayed_work_sync(&es8326->jack_detect_work);
es8326_disable_micbias(component);
es8326->calibrated = false;
+ regmap_write(es8326->regmap, ES8326_CLK_MUX, 0x2d);
+ regmap_write(es8326->regmap, ES8326_DAC2HPMIX, 0x00);
+ regmap_write(es8326->regmap, ES8326_ANA_PDN, 0x3b);
regmap_write(es8326->regmap, ES8326_CLK_CTL, ES8326_CLK_OFF);
regcache_cache_only(es8326->regmap, true);
regcache_mark_dirty(es8326->regmap);
#define ES8326_MUTE (3 << 0)
/* ES8326_CLK_CTL */
-#define ES8326_CLK_ON (0x7e << 0)
+#define ES8326_CLK_ON (0x7f << 0)
#define ES8326_CLK_OFF (0 << 0)
/* ES8326_CLK_INV */
retval = sdw_stream_add_slave(rt1316->sdw_slave, &stream_config,
&port_config, 1, sdw_stream);
if (retval) {
- dev_err(dai->dev, "Unable to configure port\n");
+ dev_err(dai->dev, "%s: Unable to configure port\n", __func__);
return retval;
}
if (rt1316->bq_params_cnt) {
rt1316->bq_params = devm_kzalloc(dev, rt1316->bq_params_cnt, GFP_KERNEL);
if (!rt1316->bq_params) {
- dev_err(dev, "Could not allocate bq_params memory\n");
+ dev_err(dev, "%s: Could not allocate bq_params memory\n", __func__);
ret = -ENOMEM;
} else {
ret = device_property_read_u8_array(dev, "realtek,bq-params", rt1316->bq_params, rt1316->bq_params_cnt);
if (ret < 0)
- dev_err(dev, "Could not read list of realtek,bq-params\n");
+ dev_err(dev, "%s: Could not read list of realtek,bq-params\n", __func__);
}
}
time = wait_for_completion_timeout(&slave->initialization_complete,
msecs_to_jiffies(RT1316_PROBE_TIMEOUT));
if (!time) {
- dev_err(&slave->dev, "Initialization not complete, timed out\n");
+ dev_err(&slave->dev, "%s: Initialization not complete, timed out\n", __func__);
sdw_show_ping_status(slave->bus, true);
return -ETIMEDOUT;
retval = sdw_stream_add_slave(rt1318->sdw_slave, &stream_config,
&port_config, 1, sdw_stream);
if (retval) {
- dev_err(dai->dev, "Unable to configure port\n");
+ dev_err(dai->dev, "%s: Unable to configure port\n", __func__);
return retval;
}
sampling_rate = RT1318_SDCA_RATE_192000HZ;
break;
default:
- dev_err(component->dev, "Rate %d is not supported\n",
- params_rate(params));
+ dev_err(component->dev, "%s: Rate %d is not supported\n",
+ __func__, params_rate(params));
return -EINVAL;
}
time = wait_for_completion_timeout(&slave->initialization_complete,
msecs_to_jiffies(RT1318_PROBE_TIMEOUT));
if (!time) {
- dev_err(&slave->dev, "Initialization not complete, timed out\n");
+ dev_err(&slave->dev, "%s: Initialization not complete, timed out\n", __func__);
return -ETIMEDOUT;
}
retval = sdw_stream_add_slave(rt5682->slave, &stream_config,
&port_config, 1, sdw_stream);
if (retval) {
- dev_err(dai->dev, "Unable to configure port\n");
+ dev_err(dai->dev, "%s: Unable to configure port\n", __func__);
return retval;
}
&rt5682_sdw_indirect_regmap);
if (IS_ERR(rt5682->regmap)) {
ret = PTR_ERR(rt5682->regmap);
- dev_err(dev, "Failed to allocate register map: %d\n",
- ret);
+ dev_err(dev, "%s: Failed to allocate register map: %d\n",
+ __func__, ret);
return ret;
}
}
if (val != DEVICE_ID) {
- dev_err(dev, "Device with ID register %x is not rt5682\n", val);
+ dev_err(dev, "%s: Device with ID register %x is not rt5682\n", __func__, val);
ret = -ENODEV;
goto err_nodev;
}
ret = rt5682_clock_config(&slave->dev);
if (ret < 0)
- dev_err(&slave->dev, "Invalid clk config");
+ dev_err(&slave->dev, "%s: Invalid clk config", __func__);
return ret;
}
return 0;
if (!slave->unattach_request) {
+ mutex_lock(&rt5682->disable_irq_lock);
if (rt5682->disable_irq == true) {
- mutex_lock(&rt5682->disable_irq_lock);
sdw_write_no_pm(slave, SDW_SCP_INTMASK1, SDW_SCP_INT1_IMPL_DEF);
rt5682->disable_irq = false;
- mutex_unlock(&rt5682->disable_irq_lock);
}
+ mutex_unlock(&rt5682->disable_irq_lock);
goto regmap_sync;
}
time = wait_for_completion_timeout(&slave->initialization_complete,
msecs_to_jiffies(RT5682_PROBE_TIMEOUT));
if (!time) {
- dev_err(&slave->dev, "Initialization not complete, timed out\n");
+ dev_err(&slave->dev, "%s: Initialization not complete, timed out\n", __func__);
sdw_show_ping_status(slave->bus, true);
return -ETIMEDOUT;
ret = regmap_write(regmap, addr, value);
if (ret < 0)
- pr_err("Failed to set private value: %06x <= %04x ret=%d\n",
- addr, value, ret);
+ pr_err("%s: Failed to set private value: %06x <= %04x ret=%d\n",
+ __func__, addr, value, ret);
return ret;
}
*value = 0;
ret = regmap_read(regmap, addr, value);
if (ret < 0)
- pr_err("Failed to get private value: %06x => %04x ret=%d\n",
- addr, *value, ret);
+ pr_err("%s: Failed to get private value: %06x => %04x ret=%d\n",
+ __func__, addr, *value, ret);
return ret;
}
port_config.num += 2;
break;
default:
- dev_err(component->dev, "Invalid DAI id %d\n", dai->id);
+ dev_err(component->dev, "%s: Invalid DAI id %d\n", __func__, dai->id);
return -EINVAL;
}
retval = sdw_stream_add_slave(rt700->slave, &stream_config,
&port_config, 1, sdw_stream);
if (retval) {
- dev_err(dai->dev, "Unable to configure port\n");
+ dev_err(dai->dev, "%s: Unable to configure port\n", __func__);
return retval;
}
/* bit 3:0 Number of Channel */
val |= (params_channels(params) - 1);
} else {
- dev_err(component->dev, "Unsupported channels %d\n",
- params_channels(params));
+ dev_err(component->dev, "%s: Unsupported channels %d\n",
+ __func__, params_channels(params));
return -EINVAL;
}
return 0;
if (!slave->unattach_request) {
+ mutex_lock(&rt711->disable_irq_lock);
if (rt711->disable_irq == true) {
- mutex_lock(&rt711->disable_irq_lock);
sdw_write_no_pm(slave, SDW_SCP_SDCA_INTMASK1, SDW_SCP_SDCA_INTMASK_SDCA_0);
sdw_write_no_pm(slave, SDW_SCP_SDCA_INTMASK2, SDW_SCP_SDCA_INTMASK_SDCA_8);
rt711->disable_irq = false;
- mutex_unlock(&rt711->disable_irq_lock);
}
+ mutex_unlock(&rt711->disable_irq_lock);
goto regmap_sync;
}
time = wait_for_completion_timeout(&slave->initialization_complete,
msecs_to_jiffies(RT711_PROBE_TIMEOUT));
if (!time) {
- dev_err(&slave->dev, "Initialization not complete, timed out\n");
+ dev_err(&slave->dev, "%s: Initialization not complete, timed out\n", __func__);
sdw_show_ping_status(slave->bus, true);
return -ETIMEDOUT;
ret = regmap_write(regmap, addr, value);
if (ret < 0)
dev_err(&rt711->slave->dev,
- "Failed to set private value: %06x <= %04x ret=%d\n",
- addr, value, ret);
+ "%s: Failed to set private value: %06x <= %04x ret=%d\n",
+ __func__, addr, value, ret);
return ret;
}
ret = regmap_read(regmap, addr, value);
if (ret < 0)
dev_err(&rt711->slave->dev,
- "Failed to get private value: %06x => %04x ret=%d\n",
- addr, *value, ret);
+ "%s: Failed to get private value: %06x => %04x ret=%d\n",
+ __func__, addr, *value, ret);
return ret;
}
retval = sdw_stream_add_slave(rt711->slave, &stream_config,
&port_config, 1, sdw_stream);
if (retval) {
- dev_err(dai->dev, "Unable to configure port\n");
+ dev_err(dai->dev, "%s: Unable to configure port\n", __func__);
return retval;
}
if (params_channels(params) > 16) {
- dev_err(component->dev, "Unsupported channels %d\n",
- params_channels(params));
+ dev_err(component->dev, "%s: Unsupported channels %d\n",
+ __func__, params_channels(params));
return -EINVAL;
}
sampling_rate = RT711_SDCA_RATE_192000HZ;
break;
default:
- dev_err(component->dev, "Rate %d is not supported\n",
- params_rate(params));
+ dev_err(component->dev, "%s: Rate %d is not supported\n",
+ __func__, params_rate(params));
return -EINVAL;
}
ret = rt711_clock_config(&slave->dev);
if (ret < 0)
- dev_err(&slave->dev, "Invalid clk config");
+ dev_err(&slave->dev, "%s: Invalid clk config", __func__);
return ret;
}
return 0;
if (!slave->unattach_request) {
+ mutex_lock(&rt711->disable_irq_lock);
if (rt711->disable_irq == true) {
- mutex_lock(&rt711->disable_irq_lock);
sdw_write_no_pm(slave, SDW_SCP_INTMASK1, SDW_SCP_INT1_IMPL_DEF);
rt711->disable_irq = false;
- mutex_unlock(&rt711->disable_irq_lock);
}
+ mutex_unlock(&rt711->disable_irq_lock);
goto regmap_sync;
}
time = wait_for_completion_timeout(&slave->initialization_complete,
msecs_to_jiffies(RT711_PROBE_TIMEOUT));
if (!time) {
- dev_err(&slave->dev, "Initialization not complete, timed out\n");
+ dev_err(&slave->dev, "%s: Initialization not complete, timed out\n", __func__);
return -ETIMEDOUT;
}
ret = regmap_write(regmap, addr, value);
if (ret < 0)
- pr_err("Failed to set private value: %06x <= %04x ret=%d\n",
- addr, value, ret);
+ pr_err("%s: Failed to set private value: %06x <= %04x ret=%d\n",
+ __func__, addr, value, ret);
return ret;
}
*value = 0;
ret = regmap_read(regmap, addr, value);
if (ret < 0)
- pr_err("Failed to get private value: %06x => %04x ret=%d\n",
- addr, *value, ret);
+ pr_err("%s: Failed to get private value: %06x => %04x ret=%d\n",
+ __func__, addr, *value, ret);
return ret;
}
RT711_HP_JD_FINAL_RESULT_CTL_JD12);
break;
default:
- dev_warn(rt711->component->dev, "Wrong JD source\n");
+ dev_warn(rt711->component->dev, "%s: Wrong JD source\n", __func__);
break;
}
retval = sdw_stream_add_slave(rt711->slave, &stream_config,
&port_config, 1, sdw_stream);
if (retval) {
- dev_err(dai->dev, "Unable to configure port\n");
+ dev_err(dai->dev, "%s: Unable to configure port\n", __func__);
return retval;
}
/* bit 3:0 Number of Channel */
val |= (params_channels(params) - 1);
} else {
- dev_err(component->dev, "Unsupported channels %d\n",
- params_channels(params));
+ dev_err(component->dev, "%s: Unsupported channels %d\n",
+ __func__, params_channels(params));
return -EINVAL;
}
ret = regmap_write(regmap, addr, value);
if (ret < 0)
dev_err(&rt712->slave->dev,
- "Failed to set private value: %06x <= %04x ret=%d\n",
- addr, value, ret);
+ "%s: Failed to set private value: %06x <= %04x ret=%d\n",
+ __func__, addr, value, ret);
return ret;
}
ret = regmap_read(regmap, addr, value);
if (ret < 0)
dev_err(&rt712->slave->dev,
- "Failed to get private value: %06x => %04x ret=%d\n",
- addr, *value, ret);
+ "%s: Failed to get private value: %06x => %04x ret=%d\n",
+ __func__, addr, *value, ret);
return ret;
}
for (i = 0; i < p->count; i++) {
err = regmap_write(rt712->mbq_regmap, p->reg_base + i, gain_val[i]);
if (err < 0)
- dev_err(&rt712->slave->dev, "0x%08x can't be set\n", p->reg_base + i);
+ dev_err(&rt712->slave->dev, "%s: 0x%08x can't be set\n",
+ __func__, p->reg_base + i);
}
return changed;
retval = sdw_stream_add_slave(rt712->slave, &stream_config,
&port_config, 1, sdw_stream);
if (retval) {
- dev_err(dai->dev, "Unable to configure port\n");
+ dev_err(dai->dev, "%s: Unable to configure port\n", __func__);
return retval;
}
if (params_channels(params) > 4) {
- dev_err(component->dev, "Unsupported channels %d\n",
- params_channels(params));
+ dev_err(component->dev, "%s: Unsupported channels %d\n",
+ __func__, params_channels(params));
return -EINVAL;
}
sampling_rate = RT712_SDCA_RATE_192000HZ;
break;
default:
- dev_err(component->dev, "Rate %d is not supported\n",
- params_rate(params));
+ dev_err(component->dev, "%s: Rate %d is not supported\n",
+ __func__, params_rate(params));
return -EINVAL;
}
time = wait_for_completion_timeout(&slave->initialization_complete,
msecs_to_jiffies(RT712_PROBE_TIMEOUT));
if (!time) {
- dev_err(&slave->dev, "Initialization not complete, timed out\n");
+ dev_err(&slave->dev, "%s: Initialization not complete, timed out\n",
+ __func__);
sdw_show_ping_status(slave->bus, true);
return -ETIMEDOUT;
return 0;
if (!slave->unattach_request) {
+ mutex_lock(&rt712->disable_irq_lock);
if (rt712->disable_irq == true) {
- mutex_lock(&rt712->disable_irq_lock);
+
sdw_write_no_pm(slave, SDW_SCP_SDCA_INTMASK1, SDW_SCP_SDCA_INTMASK_SDCA_0);
sdw_write_no_pm(slave, SDW_SCP_SDCA_INTMASK2, SDW_SCP_SDCA_INTMASK_SDCA_8);
rt712->disable_irq = false;
- mutex_unlock(&rt712->disable_irq_lock);
}
+ mutex_unlock(&rt712->disable_irq_lock);
goto regmap_sync;
}
time = wait_for_completion_timeout(&slave->initialization_complete,
msecs_to_jiffies(RT712_PROBE_TIMEOUT));
if (!time) {
- dev_err(&slave->dev, "Initialization not complete, timed out\n");
+ dev_err(&slave->dev, "%s: Initialization not complete, timed out\n", __func__);
sdw_show_ping_status(slave->bus, true);
return -ETIMEDOUT;
ret = regmap_write(regmap, addr, value);
if (ret < 0)
dev_err(&rt712->slave->dev,
- "Failed to set private value: %06x <= %04x ret=%d\n",
- addr, value, ret);
+ "%s: Failed to set private value: %06x <= %04x ret=%d\n",
+ __func__, addr, value, ret);
return ret;
}
ret = regmap_read(regmap, addr, value);
if (ret < 0)
dev_err(&rt712->slave->dev,
- "Failed to get private value: %06x => %04x ret=%d\n",
- addr, *value, ret);
+ "%s: Failed to get private value: %06x => %04x ret=%d\n",
+ __func__, addr, *value, ret);
return ret;
}
retval = sdw_stream_add_slave(rt712->slave, &stream_config,
&port_config, 1, sdw_stream);
if (retval) {
- dev_err(dai->dev, "Unable to configure port\n");
+ dev_err(dai->dev, "%s: Unable to configure port\n", __func__);
return retval;
}
if (params_channels(params) > 16) {
- dev_err(component->dev, "Unsupported channels %d\n",
- params_channels(params));
+ dev_err(component->dev, "%s: Unsupported channels %d\n",
+ __func__, params_channels(params));
return -EINVAL;
}
sampling_rate = RT712_SDCA_RATE_192000HZ;
break;
default:
- dev_err(component->dev, "Rate %d is not supported\n",
- params_rate(params));
+ dev_err(component->dev, "%s: Rate %d is not supported\n",
+ __func__, params_rate(params));
return -EINVAL;
}
sampling_rate);
break;
default:
- dev_err(component->dev, "Wrong DAI id\n");
+ dev_err(component->dev, "%s: Wrong DAI id\n", __func__);
return -EINVAL;
}
time = wait_for_completion_timeout(&slave->enumeration_complete,
msecs_to_jiffies(RT715_PROBE_TIMEOUT));
if (!time) {
- dev_err(&slave->dev, "Enumeration not complete, timed out\n");
+ dev_err(&slave->dev, "%s: Enumeration not complete, timed out\n", __func__);
sdw_show_ping_status(slave->bus, true);
return -ETIMEDOUT;
ret = regmap_write(regmap, addr, value);
if (ret < 0)
dev_err(&rt715->slave->dev,
- "Failed to set private value: %08x <= %04x %d\n",
- addr, value, ret);
+ "%s: Failed to set private value: %08x <= %04x %d\n",
+ __func__, addr, value, ret);
return ret;
}
ret = regmap_read(regmap, addr, value);
if (ret < 0)
dev_err(&rt715->slave->dev,
- "Failed to get private value: %06x => %04x ret=%d\n",
- addr, *value, ret);
+ "%s: Failed to get private value: %06x => %04x ret=%d\n",
+ __func__, addr, *value, ret);
return ret;
}
mc->shift);
ret = regmap_write(rt715->mbq_regmap, mc->reg + i, gain_val);
if (ret != 0) {
- dev_err(component->dev, "Failed to write 0x%x=0x%x\n",
- mc->reg + i, gain_val);
+ dev_err(component->dev, "%s: Failed to write 0x%x=0x%x\n",
+ __func__, mc->reg + i, gain_val);
return ret;
}
}
ret = regmap_write(rt715->mbq_regmap, reg_base + i,
gain_val);
if (ret != 0) {
- dev_err(component->dev, "Failed to write 0x%x=0x%x\n",
- reg_base + i, gain_val);
+ dev_err(component->dev, "%s: Failed to write 0x%x=0x%x\n",
+ __func__, reg_base + i, gain_val);
return ret;
}
}
reg = i < 7 ? reg_base + i : (reg_base - 1) | BIT(15);
ret = regmap_write(rt715->mbq_regmap, reg, gain_val);
if (ret != 0) {
- dev_err(component->dev, "Failed to write 0x%x=0x%x\n",
- reg, gain_val);
+ dev_err(component->dev, "%s: Failed to write 0x%x=0x%x\n",
+ __func__, reg, gain_val);
return ret;
}
}
for (i = 0; i < 2; i++) {
ret = regmap_read(rt715->mbq_regmap, mc->reg + i, &val);
if (ret < 0) {
- dev_err(component->dev, "Failed to read 0x%x, ret=%d\n",
- mc->reg + i, ret);
+ dev_err(component->dev, "%s: Failed to read 0x%x, ret=%d\n",
+ __func__, mc->reg + i, ret);
return ret;
}
ucontrol->value.integer.value[i] = rt715_sdca_get_gain(val, mc->shift);
for (i = 0; i < 4; i++) {
ret = regmap_read(rt715->mbq_regmap, reg_base + i, &val);
if (ret < 0) {
- dev_err(component->dev, "Failed to read 0x%x, ret=%d\n",
- reg_base + i, ret);
+ dev_err(component->dev, "%s: Failed to read 0x%x, ret=%d\n",
+ __func__, reg_base + i, ret);
return ret;
}
ucontrol->value.integer.value[i] = rt715_sdca_get_gain(val, gain_sft);
for (i = 0; i < 8; i += 2) {
ret = regmap_read(rt715->mbq_regmap, reg_base + i, &val_l);
if (ret < 0) {
- dev_err(component->dev, "Failed to read 0x%x, ret=%d\n",
- reg_base + i, ret);
+ dev_err(component->dev, "%s: Failed to read 0x%x, ret=%d\n",
+ __func__, reg_base + i, ret);
return ret;
}
ucontrol->value.integer.value[i] = (val_l >> gain_sft) / 10;
reg = (i == 6) ? (reg_base - 1) | BIT(15) : reg_base + 1 + i;
ret = regmap_read(rt715->mbq_regmap, reg, &val_r);
if (ret < 0) {
- dev_err(component->dev, "Failed to read 0x%x, ret=%d\n",
- reg, ret);
+ dev_err(component->dev, "%s: Failed to read 0x%x, ret=%d\n",
+ __func__, reg, ret);
return ret;
}
ucontrol->value.integer.value[i + 1] = (val_r >> gain_sft) / 10;
0xaf00);
break;
default:
- dev_err(component->dev, "Invalid DAI id %d\n", dai->id);
+ dev_err(component->dev, "%s: Invalid DAI id %d\n", __func__, dai->id);
return -EINVAL;
}
retval = sdw_stream_add_slave(rt715->slave, &stream_config,
&port_config, 1, sdw_stream);
if (retval) {
- dev_err(component->dev, "Unable to configure port, retval:%d\n",
- retval);
+ dev_err(component->dev, "%s: Unable to configure port, retval:%d\n",
+ __func__, retval);
return retval;
}
val = 0xf;
break;
default:
- dev_err(component->dev, "Unsupported sample rate %d\n",
- params_rate(params));
+ dev_err(component->dev, "%s: Unsupported sample rate %d\n",
+ __func__, params_rate(params));
return -EINVAL;
}
ret = rt715_clock_config(&slave->dev);
if (ret < 0)
- dev_err(&slave->dev, "Invalid clk config");
+ dev_err(&slave->dev, "%s: Invalid clk config", __func__);
return 0;
}
time = wait_for_completion_timeout(&slave->initialization_complete,
msecs_to_jiffies(RT715_PROBE_TIMEOUT));
if (!time) {
- dev_err(&slave->dev, "Initialization not complete, timed out\n");
+ dev_err(&slave->dev, "%s: Initialization not complete, timed out\n", __func__);
sdw_show_ping_status(slave->bus, true);
return -ETIMEDOUT;
ret = regmap_write(regmap, addr, value);
if (ret < 0) {
- pr_err("Failed to set private value: %08x <= %04x %d\n",
- addr, value, ret);
+ pr_err("%s: Failed to set private value: %08x <= %04x %d\n",
+ __func__, addr, value, ret);
}
return ret;
ret = regmap_write(regmap, addr, value);
if (ret < 0)
- pr_err("Failed to set private value: %06x <= %04x ret=%d\n",
- addr, value, ret);
+ pr_err("%s: Failed to set private value: %06x <= %04x ret=%d\n",
+ __func__, addr, value, ret);
return ret;
}
*value = 0;
ret = regmap_read(regmap, addr, value);
if (ret < 0)
- pr_err("Failed to get private value: %06x => %04x ret=%d\n",
- addr, *value, ret);
+ pr_err("%s: Failed to get private value: %06x => %04x ret=%d\n",
+ __func__, addr, *value, ret);
return ret;
}
rt715_index_write(rt715->regmap, RT715_SDW_INPUT_SEL, 0xa000);
break;
default:
- dev_err(component->dev, "Invalid DAI id %d\n", dai->id);
+ dev_err(component->dev, "%s: Invalid DAI id %d\n", __func__, dai->id);
return -EINVAL;
}
retval = sdw_stream_add_slave(rt715->slave, &stream_config,
&port_config, 1, sdw_stream);
if (retval) {
- dev_err(dai->dev, "Unable to configure port\n");
+ dev_err(dai->dev, "%s: Unable to configure port\n", __func__);
return retval;
}
val |= 0x0 << 8;
break;
default:
- dev_err(component->dev, "Unsupported sample rate %d\n",
- params_rate(params));
+ dev_err(component->dev, "%s: Unsupported sample rate %d\n",
+ __func__, params_rate(params));
return -EINVAL;
}
/* bit 3:0 Number of Channel */
val |= (params_channels(params) - 1);
} else {
- dev_err(component->dev, "Unsupported channels %d\n",
- params_channels(params));
+ dev_err(component->dev, "%s: Unsupported channels %d\n",
+ __func__, params_channels(params));
return -EINVAL;
}
return 0;
if (!slave->unattach_request) {
+ mutex_lock(&rt722->disable_irq_lock);
if (rt722->disable_irq == true) {
- mutex_lock(&rt722->disable_irq_lock);
sdw_write_no_pm(slave, SDW_SCP_SDCA_INTMASK1, SDW_SCP_SDCA_INTMASK_SDCA_6);
sdw_write_no_pm(slave, SDW_SCP_SDCA_INTMASK2, SDW_SCP_SDCA_INTMASK_SDCA_8);
rt722->disable_irq = false;
- mutex_unlock(&rt722->disable_irq_lock);
}
+ mutex_unlock(&rt722->disable_irq_lock);
goto regmap_sync;
}
ret = regmap_write(regmap, addr, value);
if (ret < 0)
dev_err(&rt722->slave->dev,
- "Failed to set private value: %06x <= %04x ret=%d\n",
- addr, value, ret);
+ "%s: Failed to set private value: %06x <= %04x ret=%d\n",
+ __func__, addr, value, ret);
return ret;
}
ret = regmap_read(regmap, addr, value);
if (ret < 0)
dev_err(&rt722->slave->dev,
- "Failed to get private value: %06x => %04x ret=%d\n",
- addr, *value, ret);
+ "%s: Failed to get private value: %06x => %04x ret=%d\n",
+ __func__, addr, *value, ret);
return ret;
}
for (i = 0; i < p->count; i++) {
err = regmap_write(rt722->mbq_regmap, p->reg_base + i, gain_val[i]);
if (err < 0)
- dev_err(&rt722->slave->dev, "%#08x can't be set\n", p->reg_base + i);
+ dev_err(&rt722->slave->dev, "%s: %#08x can't be set\n",
+ __func__, p->reg_base + i);
}
return changed;
retval = sdw_stream_add_slave(rt722->slave, &stream_config,
&port_config, 1, sdw_stream);
if (retval) {
- dev_err(dai->dev, "Unable to configure port\n");
+ dev_err(dai->dev, "%s: Unable to configure port\n", __func__);
return retval;
}
if (params_channels(params) > 16) {
- dev_err(component->dev, "Unsupported channels %d\n",
- params_channels(params));
+ dev_err(component->dev, "%s: Unsupported channels %d\n",
+ __func__, params_channels(params));
return -EINVAL;
}
sampling_rate = RT722_SDCA_RATE_192000HZ;
break;
default:
- dev_err(component->dev, "Rate %d is not supported\n",
- params_rate(params));
+ dev_err(component->dev, "%s: Rate %d is not supported\n",
+ __func__, params_rate(params));
return -EINVAL;
}
int wm_adsp_write_ctl(struct wm_adsp *dsp, const char *name, int type,
unsigned int alg, void *buf, size_t len)
{
- struct cs_dsp_coeff_ctl *cs_ctl = cs_dsp_get_ctl(&dsp->cs_dsp, name, type, alg);
+ struct cs_dsp_coeff_ctl *cs_ctl;
struct wm_coeff_ctl *ctl;
int ret;
mutex_lock(&dsp->cs_dsp.pwr_lock);
+ cs_ctl = cs_dsp_get_ctl(&dsp->cs_dsp, name, type, alg);
ret = cs_dsp_coeff_write_ctrl(cs_ctl, 0, buf, len);
mutex_unlock(&dsp->cs_dsp.pwr_lock);
module_platform_driver(avs_da7219_driver);
+MODULE_DESCRIPTION("Intel da7219 machine driver");
MODULE_AUTHOR("Cezary Rojewski <cezary.rojewski@intel.com>");
MODULE_LICENSE("GPL");
module_platform_driver(avs_dmic_driver);
+MODULE_DESCRIPTION("Intel DMIC machine driver");
MODULE_LICENSE("GPL");
module_platform_driver(avs_es8336_driver);
+MODULE_DESCRIPTION("Intel es8336 machine driver");
MODULE_LICENSE("GPL");
module_platform_driver(avs_i2s_test_driver);
+MODULE_DESCRIPTION("Intel i2s test machine driver");
MODULE_LICENSE("GPL");
module_platform_driver(avs_max98357a_driver)
+MODULE_DESCRIPTION("Intel max98357a machine driver");
MODULE_LICENSE("GPL");
module_platform_driver(avs_max98373_driver)
+MODULE_DESCRIPTION("Intel max98373 machine driver");
MODULE_LICENSE("GPL");
module_platform_driver(avs_max98927_driver)
+MODULE_DESCRIPTION("Intel max98927 machine driver");
MODULE_LICENSE("GPL");
module_platform_driver(avs_nau8825_driver)
+MODULE_DESCRIPTION("Intel nau8825 machine driver");
MODULE_LICENSE("GPL");
module_platform_driver(avs_probe_mb_driver);
+MODULE_DESCRIPTION("Intel probe machine driver");
MODULE_LICENSE("GPL");
module_platform_driver(avs_rt274_driver);
+MODULE_DESCRIPTION("Intel rt274 machine driver");
MODULE_LICENSE("GPL");
module_platform_driver(avs_rt286_driver);
+MODULE_DESCRIPTION("Intel rt286 machine driver");
MODULE_LICENSE("GPL");
module_platform_driver(avs_rt298_driver);
+MODULE_DESCRIPTION("Intel rt298 machine driver");
MODULE_LICENSE("GPL");
module_platform_driver(avs_rt5514_driver);
+MODULE_DESCRIPTION("Intel rt5514 machine driver");
MODULE_LICENSE("GPL");
module_platform_driver(avs_rt5663_driver);
+MODULE_DESCRIPTION("Intel rt5663 machine driver");
MODULE_LICENSE("GPL");
module_platform_driver(avs_rt5682_driver)
+MODULE_DESCRIPTION("Intel rt5682 machine driver");
MODULE_AUTHOR("Cezary Rojewski <cezary.rojewski@intel.com>");
MODULE_LICENSE("GPL");
module_platform_driver(avs_ssm4567_driver)
+MODULE_DESCRIPTION("Intel ssm4567 machine driver");
MODULE_LICENSE("GPL");
int max = mc->max;
int min = mc->min;
int sign_bit = mc->sign_bit;
- unsigned int mask = (1 << fls(max)) - 1;
+ unsigned int mask = (1ULL << fls(max)) - 1;
unsigned int invert = mc->invert;
int val;
int ret;
goto unregister_dev;
}
+ ret = acp_init(sdev);
+ if (ret < 0)
+ goto free_smn_dev;
+
sdev->ipc_irq = pci->irq;
ret = request_threaded_irq(sdev->ipc_irq, acp_irq_handler, acp_irq_thread,
IRQF_SHARED, "AudioDSP", sdev);
goto free_smn_dev;
}
- ret = acp_init(sdev);
- if (ret < 0)
- goto free_ipc_irq;
-
/* scan SoundWire capabilities exposed by DSDT */
ret = acp_sof_scan_sdw_devices(sdev, chip->sdw_acpi_dev_addr);
if (ret < 0) {
ret = snd_sof_probe(sdev);
if (ret < 0) {
dev_err(sdev->dev, "failed to probe DSP %d\n", ret);
- sof_ops_free(sdev);
- return ret;
+ goto err_sof_probe;
}
/* check machine info */
ret = validate_sof_ops(sdev);
if (ret < 0) {
snd_sof_remove(sdev);
+ snd_sof_remove_late(sdev);
return ret;
}
}
+ return 0;
+
err_machine_check:
- if (ret) {
- snd_sof_remove(sdev);
- sof_ops_free(sdev);
- }
+ snd_sof_remove(sdev);
+err_sof_probe:
+ snd_sof_remove_late(sdev);
+ sof_ops_free(sdev);
return ret;
}
.pcm_pointer = hda_dsp_pcm_pointer,
.pcm_ack = hda_dsp_pcm_ack,
+ .get_dai_frame_counter = hda_dsp_get_stream_llp,
+ .get_host_byte_counter = hda_dsp_get_stream_ldp,
+
/* firmware loading */
.load_firmware = snd_sof_load_firmware_raw,
#include <sound/pcm_params.h>
#include <sound/hdaudio_ext.h>
+#include <sound/hda_register.h>
#include <sound/hda-mlink.h>
#include <sound/sof/ipc4/header.h>
#include <uapi/sound/sof/header.h>
case SNDRV_PCM_TRIGGER_STOP:
case SNDRV_PCM_TRIGGER_PAUSE_PUSH:
snd_hdac_ext_stream_clear(hext_stream);
+
+ /*
+ * Save the LLP registers in case the stream is
+ * restarting due PAUSE_RELEASE, or START without a pcm
+ * close/open since in this case the LLP register is not reset
+ * to 0 and the delay calculation will return with invalid
+ * results.
+ */
+ hext_stream->pplcllpl = readl(hext_stream->pplc_addr + AZX_REG_PPLCLLPL);
+ hext_stream->pplcllpu = readl(hext_stream->pplc_addr + AZX_REG_PPLCLLPU);
break;
default:
dev_err(sdev->dev, "unknown trigger command %d\n", cmd);
struct sof_intel_hda_dev *hda = sdev->pdata->hw_pdata;
const struct sof_intel_dsp_desc *chip = hda->desc;
struct hdac_bus *bus = sof_to_bus(sdev);
+ bool imr_lost = false;
int ret, j;
/*
- * The memory used for IMR boot loses its content in deeper than S3 state
- * We must not try IMR boot on next power up (as it will fail).
- *
+ * The memory used for IMR boot loses its content in deeper than S3
+ * state on CAVS platforms.
+ * On ACE platforms due to the system architecture the IMR content is
+ * lost at S3 state already, they are tailored for s2idle use.
+ * We must not try IMR boot on next power up in these cases as it will
+ * fail.
+ */
+ if (sdev->system_suspend_target > SOF_SUSPEND_S3 ||
+ (chip->hw_ip_version >= SOF_INTEL_ACE_1_0 &&
+ sdev->system_suspend_target == SOF_SUSPEND_S3))
+ imr_lost = true;
+
+ /*
* In case of firmware crash or boot failure set the skip_imr_boot to true
* as well in order to try to re-load the firmware to do a 'cold' boot.
*/
- if (sdev->system_suspend_target > SOF_SUSPEND_S3 ||
- sdev->fw_state == SOF_FW_CRASHED ||
+ if (imr_lost || sdev->fw_state == SOF_FW_CRASHED ||
sdev->fw_state == SOF_FW_BOOT_FAILED)
hda->skip_imr_boot = true;
snd_pcm_hw_constraint_mask64(substream->runtime, SNDRV_PCM_HW_PARAM_FORMAT,
SNDRV_PCM_FMTBIT_S16 | SNDRV_PCM_FMTBIT_S32);
+ /*
+ * The dsp_max_burst_size_in_ms is the length of the maximum burst size
+ * of the host DMA in the ALSA buffer.
+ *
+ * On playback start the DMA will transfer dsp_max_burst_size_in_ms
+ * amount of data in one initial burst to fill up the host DMA buffer.
+ * Consequent DMA burst sizes are shorter and their length can vary.
+ * To make sure that userspace allocate large enough ALSA buffer we need
+ * to place a constraint on the buffer time.
+ *
+ * On capture the DMA will transfer 1ms chunks.
+ *
+ * Exact dsp_max_burst_size_in_ms constraint is racy, so set the
+ * constraint to a minimum of 2x dsp_max_burst_size_in_ms.
+ */
+ if (spcm->stream[direction].dsp_max_burst_size_in_ms)
+ snd_pcm_hw_constraint_minmax(substream->runtime,
+ SNDRV_PCM_HW_PARAM_BUFFER_TIME,
+ spcm->stream[direction].dsp_max_burst_size_in_ms * USEC_PER_MSEC * 2,
+ UINT_MAX);
+
/* binding pcm substream to hda stream */
substream->runtime->private_data = &dsp_stream->hstream;
+
+ /*
+ * Reset the llp cache values (they are used for LLP compensation in
+ * case the counter is not reset)
+ */
+ dsp_stream->pplcllpl = 0;
+ dsp_stream->pplcllpu = 0;
+
return 0;
}
return pos;
}
+
+#define merge_u64(u32_u, u32_l) (((u64)(u32_u) << 32) | (u32_l))
+
+/**
+ * hda_dsp_get_stream_llp - Retrieve the LLP (Linear Link Position) of the stream
+ * @sdev: SOF device
+ * @component: ASoC component
+ * @substream: PCM substream
+ *
+ * Returns the raw Linear Link Position value
+ */
+u64 hda_dsp_get_stream_llp(struct snd_sof_dev *sdev,
+ struct snd_soc_component *component,
+ struct snd_pcm_substream *substream)
+{
+ struct hdac_stream *hstream = substream->runtime->private_data;
+ struct hdac_ext_stream *hext_stream = stream_to_hdac_ext_stream(hstream);
+ u32 llp_l, llp_u;
+
+ /*
+ * The pplc_addr have been calculated during probe in
+ * hda_dsp_stream_init():
+ * pplc_addr = sdev->bar[HDA_DSP_PP_BAR] +
+ * SOF_HDA_PPLC_BASE +
+ * SOF_HDA_PPLC_MULTI * total_stream +
+ * SOF_HDA_PPLC_INTERVAL * stream_index
+ *
+ * Use this pre-calculated address to avoid repeated re-calculation.
+ */
+ llp_l = readl(hext_stream->pplc_addr + AZX_REG_PPLCLLPL);
+ llp_u = readl(hext_stream->pplc_addr + AZX_REG_PPLCLLPU);
+
+ /* Compensate the LLP counter with the saved offset */
+ if (hext_stream->pplcllpl || hext_stream->pplcllpu)
+ return merge_u64(llp_u, llp_l) -
+ merge_u64(hext_stream->pplcllpu, hext_stream->pplcllpl);
+
+ return merge_u64(llp_u, llp_l);
+}
+
+/**
+ * hda_dsp_get_stream_ldp - Retrieve the LDP (Linear DMA Position) of the stream
+ * @sdev: SOF device
+ * @component: ASoC component
+ * @substream: PCM substream
+ *
+ * Returns the raw Linear Link Position value
+ */
+u64 hda_dsp_get_stream_ldp(struct snd_sof_dev *sdev,
+ struct snd_soc_component *component,
+ struct snd_pcm_substream *substream)
+{
+ struct hdac_stream *hstream = substream->runtime->private_data;
+ struct hdac_ext_stream *hext_stream = stream_to_hdac_ext_stream(hstream);
+ u32 ldp_l, ldp_u;
+
+ /*
+ * The pphc_addr have been calculated during probe in
+ * hda_dsp_stream_init():
+ * pphc_addr = sdev->bar[HDA_DSP_PP_BAR] +
+ * SOF_HDA_PPHC_BASE +
+ * SOF_HDA_PPHC_INTERVAL * stream_index
+ *
+ * Use this pre-calculated address to avoid repeated re-calculation.
+ */
+ ldp_l = readl(hext_stream->pphc_addr + AZX_REG_PPHCLDPL);
+ ldp_u = readl(hext_stream->pphc_addr + AZX_REG_PPHCLDPU);
+
+ return ((u64)ldp_u << 32) | ldp_l;
+}
snd_pcm_uframes_t hda_dsp_stream_get_position(struct hdac_stream *hstream,
int direction, bool can_sleep);
+u64 hda_dsp_get_stream_llp(struct snd_sof_dev *sdev,
+ struct snd_soc_component *component,
+ struct snd_pcm_substream *substream);
+u64 hda_dsp_get_stream_ldp(struct snd_sof_dev *sdev,
+ struct snd_soc_component *component,
+ struct snd_pcm_substream *substream);
struct hdac_ext_stream *
hda_dsp_stream_get(struct snd_sof_dev *sdev, int direction, u32 flags);
};
/* this helps allows the DSP to setup DMIC/SSP */
-static int hdac_bus_offload_dmic_ssp(struct hdac_bus *bus)
+static int hdac_bus_offload_dmic_ssp(struct hdac_bus *bus, bool enable)
{
int ret;
- ret = hdac_bus_eml_enable_offload(bus, true, AZX_REG_ML_LEPTR_ID_INTEL_SSP, true);
+ ret = hdac_bus_eml_enable_offload(bus, true,
+ AZX_REG_ML_LEPTR_ID_INTEL_SSP, enable);
if (ret < 0)
return ret;
- ret = hdac_bus_eml_enable_offload(bus, true, AZX_REG_ML_LEPTR_ID_INTEL_DMIC, true);
+ ret = hdac_bus_eml_enable_offload(bus, true,
+ AZX_REG_ML_LEPTR_ID_INTEL_DMIC, enable);
if (ret < 0)
return ret;
if (ret < 0)
return ret;
- return hdac_bus_offload_dmic_ssp(sof_to_bus(sdev));
+ return hdac_bus_offload_dmic_ssp(sof_to_bus(sdev), true);
+}
+
+static void lnl_hda_dsp_remove(struct snd_sof_dev *sdev)
+{
+ int ret;
+
+ ret = hdac_bus_offload_dmic_ssp(sof_to_bus(sdev), false);
+ if (ret < 0)
+ dev_warn(sdev->dev,
+ "Failed to disable offload for DMIC/SSP: %d\n", ret);
+
+ hda_dsp_remove(sdev);
}
static int lnl_hda_dsp_resume(struct snd_sof_dev *sdev)
if (ret < 0)
return ret;
- return hdac_bus_offload_dmic_ssp(sof_to_bus(sdev));
+ return hdac_bus_offload_dmic_ssp(sof_to_bus(sdev), true);
}
static int lnl_hda_dsp_runtime_resume(struct snd_sof_dev *sdev)
if (ret < 0)
return ret;
- return hdac_bus_offload_dmic_ssp(sof_to_bus(sdev));
+ return hdac_bus_offload_dmic_ssp(sof_to_bus(sdev), true);
}
static int lnl_dsp_post_fw_run(struct snd_sof_dev *sdev)
/* common defaults */
memcpy(&sof_lnl_ops, &sof_hda_common_ops, sizeof(struct snd_sof_dsp_ops));
- /* probe */
- if (!sdev->dspless_mode_selected)
+ /* probe/remove */
+ if (!sdev->dspless_mode_selected) {
sof_lnl_ops.probe = lnl_hda_dsp_probe;
+ sof_lnl_ops.remove = lnl_hda_dsp_remove;
+ }
/* shutdown */
sof_lnl_ops.shutdown = hda_dsp_shutdown;
sof_lnl_ops.runtime_resume = lnl_hda_dsp_runtime_resume;
}
- sof_lnl_ops.get_stream_position = mtl_dsp_get_stream_hda_link_position;
-
/* dsp core get/put */
sof_lnl_ops.core_get = mtl_dsp_core_get;
sof_lnl_ops.core_put = mtl_dsp_core_put;
return mtl_enable_interrupts(sdev, false);
}
-u64 mtl_dsp_get_stream_hda_link_position(struct snd_sof_dev *sdev,
- struct snd_soc_component *component,
- struct snd_pcm_substream *substream)
-{
- struct hdac_stream *hstream = substream->runtime->private_data;
- u32 llp_l, llp_u;
-
- llp_l = snd_sof_dsp_read(sdev, HDA_DSP_HDA_BAR, MTL_PPLCLLPL(hstream->index));
- llp_u = snd_sof_dsp_read(sdev, HDA_DSP_HDA_BAR, MTL_PPLCLLPU(hstream->index));
- return ((u64)llp_u << 32) | llp_l;
-}
-
int mtl_dsp_core_get(struct snd_sof_dev *sdev, int core)
{
const struct sof_ipc_pm_ops *pm_ops = sdev->ipc->ops->pm;
sof_mtl_ops.core_get = mtl_dsp_core_get;
sof_mtl_ops.core_put = mtl_dsp_core_put;
- sof_mtl_ops.get_stream_position = mtl_dsp_get_stream_hda_link_position;
-
sdev->private = kzalloc(sizeof(struct sof_ipc4_fw_data), GFP_KERNEL);
if (!sdev->private)
return -ENOMEM;
* Copyright(c) 2020-2022 Intel Corporation. All rights reserved.
*/
-/* HDA Registers */
-#define MTL_PPLCLLPL_BASE 0x948
-#define MTL_PPLCLLPU_STRIDE 0x10
-#define MTL_PPLCLLPL(x) (MTL_PPLCLLPL_BASE + (x) * MTL_PPLCLLPU_STRIDE)
-#define MTL_PPLCLLPU(x) (MTL_PPLCLLPL_BASE + 0x4 + (x) * MTL_PPLCLLPU_STRIDE)
-
/* DSP Registers */
#define MTL_HFDSSCS 0x1000
#define MTL_HFDSSCS_SPA_MASK BIT(16)
void mtl_ipc_dump(struct snd_sof_dev *sdev);
-u64 mtl_dsp_get_stream_hda_link_position(struct snd_sof_dev *sdev,
- struct snd_soc_component *component,
- struct snd_pcm_substream *substream);
-
int mtl_dsp_core_get(struct snd_sof_dev *sdev, int core);
int mtl_dsp_core_put(struct snd_sof_dev *sdev, int core);
#include <linux/debugfs.h>
#include <linux/sched/signal.h>
+#include <linux/sched/clock.h>
#include <sound/sof/ipc4/header.h>
#include "sof-priv.h"
#include "ipc4-priv.h"
const struct sof_ipc_ops *iops = sdev->ipc->ops;
struct sof_ipc4_msg msg;
u64 system_time;
- ktime_t kt;
int ret;
if (priv->mtrace_state != SOF_MTRACE_DISABLED)
msg.primary |= SOF_IPC4_MOD_INSTANCE(SOF_IPC4_MOD_INIT_BASEFW_INSTANCE_ID);
msg.extension = SOF_IPC4_MOD_EXT_MSG_PARAM_ID(SOF_IPC4_FW_PARAM_SYSTEM_TIME);
- /* The system time is in usec, UTC, epoch is 1601-01-01 00:00:00 */
- kt = ktime_add_us(ktime_get_real(), FW_EPOCH_DELTA * USEC_PER_SEC);
- system_time = ktime_to_us(kt);
+ /*
+ * local_clock() is used to align with dmesg, so both kernel and firmware logs have
+ * the same base and a minor delta due to the IPC. system time is in us format but
+ * local_clock() returns the time in ns, so convert to ns.
+ */
+ system_time = div64_u64(local_clock(), NSEC_PER_USEC);
msg.data_size = sizeof(system_time);
msg.data_ptr = &system_time;
ret = iops->set_get_data(sdev, &msg, msg.data_size, true);
#include "ipc4-topology.h"
#include "ipc4-fw-reg.h"
+/**
+ * struct sof_ipc4_timestamp_info - IPC4 timestamp info
+ * @host_copier: the host copier of the pcm stream
+ * @dai_copier: the dai copier of the pcm stream
+ * @stream_start_offset: reported by fw in memory window (converted to frames)
+ * @stream_end_offset: reported by fw in memory window (converted to frames)
+ * @llp_offset: llp offset in memory window
+ * @boundary: wrap boundary should be used for the LLP frame counter
+ * @delay: Calculated and stored in pointer callback. The stored value is
+ * returned in the delay callback.
+ */
+struct sof_ipc4_timestamp_info {
+ struct sof_ipc4_copier *host_copier;
+ struct sof_ipc4_copier *dai_copier;
+ u64 stream_start_offset;
+ u64 stream_end_offset;
+ u32 llp_offset;
+
+ u64 boundary;
+ snd_pcm_sframes_t delay;
+};
+
static int sof_ipc4_set_multi_pipeline_state(struct snd_sof_dev *sdev, u32 state,
struct ipc4_pipeline_set_state_data *trigger_list)
{
}
/* return if this is the final state */
- if (state == SOF_IPC4_PIPE_PAUSED)
+ if (state == SOF_IPC4_PIPE_PAUSED) {
+ struct sof_ipc4_timestamp_info *time_info;
+
+ /*
+ * Invalidate the stream_start_offset to make sure that it is
+ * going to be updated if the stream resumes
+ */
+ time_info = spcm->stream[substream->stream].private;
+ if (time_info)
+ time_info->stream_start_offset = SOF_IPC4_INVALID_STREAM_POSITION;
+
goto free;
+ }
skip_pause_transition:
/* else set the RUNNING/RESET state in the DSP */
ret = sof_ipc4_set_multi_pipeline_state(sdev, state, trigger_list);
/* determine the pipeline state */
switch (cmd) {
- case SNDRV_PCM_TRIGGER_PAUSE_PUSH:
- state = SOF_IPC4_PIPE_PAUSED;
- break;
case SNDRV_PCM_TRIGGER_PAUSE_RELEASE:
case SNDRV_PCM_TRIGGER_RESUME:
case SNDRV_PCM_TRIGGER_START:
state = SOF_IPC4_PIPE_RUNNING;
break;
+ case SNDRV_PCM_TRIGGER_PAUSE_PUSH:
case SNDRV_PCM_TRIGGER_SUSPEND:
case SNDRV_PCM_TRIGGER_STOP:
state = SOF_IPC4_PIPE_PAUSED;
if (abi_version < SOF_IPC4_FW_REGS_ABI_VER)
support_info = false;
+ /* For delay reporting the get_host_byte_counter callback is needed */
+ if (!sof_ops(sdev) || !sof_ops(sdev)->get_host_byte_counter)
+ support_info = false;
+
for_each_pcm_streams(stream) {
pipeline_list = &spcm->stream[stream].pipeline_list;
struct sof_ipc4_copier *host_copier = time_info->host_copier;
struct sof_ipc4_copier *dai_copier = time_info->dai_copier;
struct sof_ipc4_pipeline_registers ppl_reg;
- u64 stream_start_position;
u32 dai_sample_size;
u32 ch, node_index;
u32 offset;
if (ppl_reg.stream_start_offset == SOF_IPC4_INVALID_STREAM_POSITION)
return -EINVAL;
- stream_start_position = ppl_reg.stream_start_offset;
ch = dai_copier->data.out_format.fmt_cfg;
ch = SOF_IPC4_AUDIO_FORMAT_CFG_CHANNELS_COUNT(ch);
dai_sample_size = (dai_copier->data.out_format.bit_depth >> 3) * ch;
- /* convert offset to sample count */
- do_div(stream_start_position, dai_sample_size);
- time_info->stream_start_offset = stream_start_position;
+
+ /* convert offsets to frame count */
+ time_info->stream_start_offset = ppl_reg.stream_start_offset;
+ do_div(time_info->stream_start_offset, dai_sample_size);
+ time_info->stream_end_offset = ppl_reg.stream_end_offset;
+ do_div(time_info->stream_end_offset, dai_sample_size);
+
+ /*
+ * Calculate the wrap boundary need to be used for delay calculation
+ * The host counter is in bytes, it will wrap earlier than the frames
+ * based link counter.
+ */
+ time_info->boundary = div64_u64(~((u64)0),
+ frames_to_bytes(substream->runtime, 1));
+ /* Initialize the delay value to 0 (no delay) */
+ time_info->delay = 0;
return 0;
}
-static snd_pcm_sframes_t sof_ipc4_pcm_delay(struct snd_soc_component *component,
- struct snd_pcm_substream *substream)
+static int sof_ipc4_pcm_pointer(struct snd_soc_component *component,
+ struct snd_pcm_substream *substream,
+ snd_pcm_uframes_t *pointer)
{
struct snd_sof_dev *sdev = snd_soc_component_get_drvdata(component);
struct snd_soc_pcm_runtime *rtd = snd_soc_substream_to_rtd(substream);
struct sof_ipc4_timestamp_info *time_info;
struct sof_ipc4_llp_reading_slot llp;
- snd_pcm_uframes_t head_ptr, tail_ptr;
+ snd_pcm_uframes_t head_cnt, tail_cnt;
struct snd_sof_pcm_stream *stream;
+ u64 dai_cnt, host_cnt, host_ptr;
struct snd_sof_pcm *spcm;
- u64 tmp_ptr;
int ret;
spcm = snd_sof_find_spcm_dai(component, rtd);
if (!spcm)
- return 0;
+ return -EOPNOTSUPP;
stream = &spcm->stream[substream->stream];
time_info = stream->private;
if (!time_info)
- return 0;
+ return -EOPNOTSUPP;
/*
* stream_start_offset is updated to memory window by FW based on
if (time_info->stream_start_offset == SOF_IPC4_INVALID_STREAM_POSITION) {
ret = sof_ipc4_get_stream_start_offset(sdev, substream, stream, time_info);
if (ret < 0)
- return 0;
+ return -EOPNOTSUPP;
}
+ /* For delay calculation we need the host counter */
+ host_cnt = snd_sof_pcm_get_host_byte_counter(sdev, component, substream);
+ host_ptr = host_cnt;
+
+ /* convert the host_cnt to frames */
+ host_cnt = div64_u64(host_cnt, frames_to_bytes(substream->runtime, 1));
+
/*
- * HDaudio links don't support the LLP counter reported by firmware
- * the link position is read directly from hardware registers.
+ * If the LLP counter is not reported by firmware in the SRAM window
+ * then read the dai (link) counter via host accessible means if
+ * available.
*/
if (!time_info->llp_offset) {
- tmp_ptr = snd_sof_pcm_get_stream_position(sdev, component, substream);
- if (!tmp_ptr)
- return 0;
+ dai_cnt = snd_sof_pcm_get_dai_frame_counter(sdev, component, substream);
+ if (!dai_cnt)
+ return -EOPNOTSUPP;
} else {
sof_mailbox_read(sdev, time_info->llp_offset, &llp, sizeof(llp));
- tmp_ptr = ((u64)llp.reading.llp_u << 32) | llp.reading.llp_l;
+ dai_cnt = ((u64)llp.reading.llp_u << 32) | llp.reading.llp_l;
}
+ dai_cnt += time_info->stream_end_offset;
- /* In two cases dai dma position is not accurate
+ /* In two cases dai dma counter is not accurate
* (1) dai pipeline is started before host pipeline
- * (2) multiple streams mixed into one. Each stream has the same dai dma position
+ * (2) multiple streams mixed into one. Each stream has the same dai dma
+ * counter
*
- * Firmware calculates correct stream_start_offset for all cases including above two.
- * Driver subtracts stream_start_offset from dai dma position to get accurate one
+ * Firmware calculates correct stream_start_offset for all cases
+ * including above two.
+ * Driver subtracts stream_start_offset from dai dma counter to get
+ * accurate one
*/
- tmp_ptr -= time_info->stream_start_offset;
- /* Calculate the delay taking into account that both pointer can wrap */
- div64_u64_rem(tmp_ptr, substream->runtime->boundary, &tmp_ptr);
+ /*
+ * On stream start the dai counter might not yet have reached the
+ * stream_start_offset value which means that no frames have left the
+ * DSP yet from the audio stream (on playback, capture streams have
+ * offset of 0 as we start capturing right away).
+ * In this case we need to adjust the distance between the counters by
+ * increasing the host counter by (offset - dai_counter).
+ * Otherwise the dai_counter needs to be adjusted to reflect the number
+ * of valid frames passed on the DAI side.
+ *
+ * The delay is the difference between the counters on the two
+ * sides of the DSP.
+ */
+ if (dai_cnt < time_info->stream_start_offset) {
+ host_cnt += time_info->stream_start_offset - dai_cnt;
+ dai_cnt = 0;
+ } else {
+ dai_cnt -= time_info->stream_start_offset;
+ }
+
+ /* Wrap the dai counter at the boundary where the host counter wraps */
+ div64_u64_rem(dai_cnt, time_info->boundary, &dai_cnt);
+
if (substream->stream == SNDRV_PCM_STREAM_PLAYBACK) {
- head_ptr = substream->runtime->status->hw_ptr;
- tail_ptr = tmp_ptr;
+ head_cnt = host_cnt;
+ tail_cnt = dai_cnt;
} else {
- head_ptr = tmp_ptr;
- tail_ptr = substream->runtime->status->hw_ptr;
+ head_cnt = dai_cnt;
+ tail_cnt = host_cnt;
+ }
+
+ if (head_cnt < tail_cnt) {
+ time_info->delay = time_info->boundary - tail_cnt + head_cnt;
+ goto out;
}
- if (head_ptr < tail_ptr)
- return substream->runtime->boundary - tail_ptr + head_ptr;
+ time_info->delay = head_cnt - tail_cnt;
+
+out:
+ /*
+ * Convert the host byte counter to PCM pointer which wraps in buffer
+ * and it is in frames
+ */
+ div64_u64_rem(host_ptr, snd_pcm_lib_buffer_bytes(substream), &host_ptr);
+ *pointer = bytes_to_frames(substream->runtime, host_ptr);
+
+ return 0;
+}
+
+static snd_pcm_sframes_t sof_ipc4_pcm_delay(struct snd_soc_component *component,
+ struct snd_pcm_substream *substream)
+{
+ struct snd_soc_pcm_runtime *rtd = snd_soc_substream_to_rtd(substream);
+ struct sof_ipc4_timestamp_info *time_info;
+ struct snd_sof_pcm_stream *stream;
+ struct snd_sof_pcm *spcm;
+
+ spcm = snd_sof_find_spcm_dai(component, rtd);
+ if (!spcm)
+ return 0;
+
+ stream = &spcm->stream[substream->stream];
+ time_info = stream->private;
+ /*
+ * Report the stored delay value calculated in the pointer callback.
+ * In the unlikely event that the calculation was skipped/aborted, the
+ * default 0 delay returned.
+ */
+ if (time_info)
+ return time_info->delay;
+
+ /* No delay information available, report 0 as delay */
+ return 0;
- return head_ptr - tail_ptr;
}
const struct sof_ipc_pcm_ops ipc4_pcm_ops = {
.dai_link_fixup = sof_ipc4_pcm_dai_link_fixup,
.pcm_setup = sof_ipc4_pcm_setup,
.pcm_free = sof_ipc4_pcm_free,
+ .pointer = sof_ipc4_pcm_pointer,
.delay = sof_ipc4_pcm_delay,
.ipc_first_on_start = true,
.platform_stop_during_hw_free = true,
struct mutex pipeline_state_mutex; /* protect pipeline triggers, ref counts and states */
};
-/**
- * struct sof_ipc4_timestamp_info - IPC4 timestamp info
- * @host_copier: the host copier of the pcm stream
- * @dai_copier: the dai copier of the pcm stream
- * @stream_start_offset: reported by fw in memory window
- * @llp_offset: llp offset in memory window
- */
-struct sof_ipc4_timestamp_info {
- struct sof_ipc4_copier *host_copier;
- struct sof_ipc4_copier *dai_copier;
- u64 stream_start_offset;
- u32 llp_offset;
-};
-
extern const struct sof_ipc_fw_loader_ops ipc4_loader_ops;
extern const struct sof_ipc_tplg_ops ipc4_tplg_ops;
extern const struct sof_ipc_tplg_control_ops tplg_ipc4_control_ops;
struct sof_ipc4_available_audio_format *available_fmt;
struct snd_soc_component *scomp = swidget->scomp;
struct sof_ipc4_copier *ipc4_copier;
+ struct snd_sof_pcm *spcm;
int node_type = 0;
- int ret;
+ int ret, dir;
ipc4_copier = kzalloc(sizeof(*ipc4_copier), GFP_KERNEL);
if (!ipc4_copier)
}
dev_dbg(scomp->dev, "host copier '%s' node_type %u\n", swidget->widget->name, node_type);
+ spcm = snd_sof_find_spcm_comp(scomp, swidget->comp_id, &dir);
+ if (!spcm)
+ goto skip_gtw_cfg;
+
+ if (dir == SNDRV_PCM_STREAM_PLAYBACK) {
+ struct snd_sof_pcm_stream *sps = &spcm->stream[dir];
+
+ sof_update_ipc_object(scomp, &sps->dsp_max_burst_size_in_ms,
+ SOF_COPIER_DEEP_BUFFER_TOKENS,
+ swidget->tuples,
+ swidget->num_tuples, sizeof(u32), 1);
+ /* Set default DMA buffer size if it is not specified in topology */
+ if (!sps->dsp_max_burst_size_in_ms)
+ sps->dsp_max_burst_size_in_ms = SOF_IPC4_MIN_DMA_BUFFER_SIZE;
+ } else {
+ /* Capture data is copied from DSP to host in 1ms bursts */
+ spcm->stream[dir].dsp_max_burst_size_in_ms = 1;
+ }
+
skip_gtw_cfg:
ipc4_copier->gtw_attr = kzalloc(sizeof(*ipc4_copier->gtw_attr), GFP_KERNEL);
if (!ipc4_copier->gtw_attr) {
int sample_rate, channel_count;
int bit_depth, ret;
u32 nhlt_type;
+ int dev_type = 0;
/* convert to NHLT type */
switch (linktype) {
&bit_depth);
if (ret < 0)
return ret;
+
+ /*
+ * We need to know the type of the external device attached to a SSP
+ * port to retrieve the blob from NHLT. However, device type is not
+ * specified in topology.
+ * Query the type for the port and then pass that information back
+ * to the blob lookup function.
+ */
+ dev_type = intel_nhlt_ssp_device_type(sdev->dev, ipc4_data->nhlt,
+ dai_index);
+ if (dev_type < 0)
+ return dev_type;
break;
default:
return 0;
}
- dev_dbg(sdev->dev, "dai index %d nhlt type %d direction %d\n",
- dai_index, nhlt_type, dir);
+ dev_dbg(sdev->dev, "dai index %d nhlt type %d direction %d dev type %d\n",
+ dai_index, nhlt_type, dir, dev_type);
/* find NHLT blob with matching params */
cfg = intel_nhlt_get_endpoint_blob(sdev->dev, ipc4_data->nhlt, dai_index, nhlt_type,
bit_depth, bit_depth, channel_count, sample_rate,
- dir, 0);
+ dir, dev_type);
if (!cfg) {
dev_err(sdev->dev,
return 0;
}
-static inline u64 snd_sof_pcm_get_stream_position(struct snd_sof_dev *sdev,
- struct snd_soc_component *component,
- struct snd_pcm_substream *substream)
+static inline u64
+snd_sof_pcm_get_dai_frame_counter(struct snd_sof_dev *sdev,
+ struct snd_soc_component *component,
+ struct snd_pcm_substream *substream)
{
- if (sof_ops(sdev) && sof_ops(sdev)->get_stream_position)
- return sof_ops(sdev)->get_stream_position(sdev, component, substream);
+ if (sof_ops(sdev) && sof_ops(sdev)->get_dai_frame_counter)
+ return sof_ops(sdev)->get_dai_frame_counter(sdev, component,
+ substream);
+
+ return 0;
+}
+
+static inline u64
+snd_sof_pcm_get_host_byte_counter(struct snd_sof_dev *sdev,
+ struct snd_soc_component *component,
+ struct snd_pcm_substream *substream)
+{
+ if (sof_ops(sdev) && sof_ops(sdev)->get_host_byte_counter)
+ return sof_ops(sdev)->get_host_byte_counter(sdev, component,
+ substream);
return 0;
}
{
struct snd_soc_pcm_runtime *rtd = snd_soc_substream_to_rtd(substream);
struct snd_sof_dev *sdev = snd_soc_component_get_drvdata(component);
+ const struct sof_ipc_pcm_ops *pcm_ops = sof_ipc_get_ops(sdev, pcm);
struct snd_sof_pcm *spcm;
snd_pcm_uframes_t host, dai;
+ int ret = -EOPNOTSUPP;
/* nothing to do for BE */
if (rtd->dai_link->no_pcm)
return 0;
+ if (pcm_ops && pcm_ops->pointer)
+ ret = pcm_ops->pointer(component, substream, &host);
+
+ if (ret != -EOPNOTSUPP)
+ return ret ? ret : host;
+
/* use dsp ops pointer callback directly if set */
if (sof_ops(sdev)->pcm_pointer)
return sof_ops(sdev)->pcm_pointer(sdev, substream);
* additional memory in the SOF PCM stream structure
* @pcm_free: Function pointer for PCM free that can be used for freeing any
* additional memory in the SOF PCM stream structure
- * @delay: Function pointer for pcm delay calculation
+ * @pointer: Function pointer for pcm pointer
+ * Note: the @pointer callback may return -EOPNOTSUPP which should be
+ * handled in a same way as if the callback is not provided
+ * @delay: Function pointer for pcm delay reporting
* @reset_hw_params_during_stop: Flag indicating whether the hw_params should be reset during the
* STOP pcm trigger
* @ipc_first_on_start: Send IPC before invoking platform trigger during
int (*dai_link_fixup)(struct snd_soc_pcm_runtime *rtd, struct snd_pcm_hw_params *params);
int (*pcm_setup)(struct snd_sof_dev *sdev, struct snd_sof_pcm *spcm);
void (*pcm_free)(struct snd_sof_dev *sdev, struct snd_sof_pcm *spcm);
+ int (*pointer)(struct snd_soc_component *component,
+ struct snd_pcm_substream *substream,
+ snd_pcm_uframes_t *pointer);
snd_pcm_sframes_t (*delay)(struct snd_soc_component *component,
struct snd_pcm_substream *substream);
bool reset_hw_params_during_stop;
struct work_struct period_elapsed_work;
struct snd_soc_dapm_widget_list *list; /* list of connected DAPM widgets */
bool d0i3_compatible; /* DSP can be in D0I3 when this pcm is opened */
+ unsigned int dsp_max_burst_size_in_ms; /* The maximum size of the host DMA burst in ms */
/*
* flag to indicate that the DSP pipelines should be kept
* active or not while suspending the stream
int (*pcm_ack)(struct snd_sof_dev *sdev, struct snd_pcm_substream *substream); /* optional */
/*
- * optional callback to retrieve the link DMA position for the substream
- * when the position is not reported in the shared SRAM windows but
- * instead from a host-accessible hardware counter.
+ * optional callback to retrieve the number of frames left/arrived from/to
+ * the DSP on the DAI side (link/codec/DMIC/etc).
+ *
+ * The callback is used when the firmware does not provide this information
+ * via the shared SRAM window and it can be retrieved by host.
*/
- u64 (*get_stream_position)(struct snd_sof_dev *sdev,
- struct snd_soc_component *component,
- struct snd_pcm_substream *substream); /* optional */
+ u64 (*get_dai_frame_counter)(struct snd_sof_dev *sdev,
+ struct snd_soc_component *component,
+ struct snd_pcm_substream *substream); /* optional */
+
+ /*
+ * Optional callback to retrieve the number of bytes left/arrived from/to
+ * the DSP on the host side (bytes between host ALSA buffer and DSP).
+ *
+ * The callback is needed for ALSA delay reporting.
+ */
+ u64 (*get_host_byte_counter)(struct snd_sof_dev *sdev,
+ struct snd_soc_component *component,
+ struct snd_pcm_substream *substream); /* optional */
/* host read DSP stream data */
int (*ipc_msg_data)(struct snd_sof_dev *sdev,
struct urb *urb;
/* create message: */
- msg = kmalloc(sizeof(struct message), GFP_ATOMIC);
+ msg = kzalloc(sizeof(struct message), GFP_ATOMIC);
if (msg == NULL)
return -ENOMEM;
int ret;
/* initialize USB buffers: */
- line6->buffer_listen = kmalloc(LINE6_BUFSIZE_LISTEN, GFP_KERNEL);
+ line6->buffer_listen = kzalloc(LINE6_BUFSIZE_LISTEN, GFP_KERNEL);
if (!line6->buffer_listen)
return -ENOMEM;
return -ENOMEM;
if (line6->properties->capabilities & LINE6_CAP_CONTROL_MIDI) {
- line6->buffer_message = kmalloc(LINE6_MIDI_MESSAGE_MAXLEN, GFP_KERNEL);
+ line6->buffer_message = kzalloc(LINE6_MIDI_MESSAGE_MAXLEN, GFP_KERNEL);
if (!line6->buffer_message)
return -ENOMEM;
@echo ''
@echo ' acpi - ACPI tools'
@echo ' bpf - misc BPF tools'
- @echo ' cgroup - cgroup tools'
@echo ' counter - counter tools'
@echo ' cpupower - a tool for all things x86 CPU power'
@echo ' debugging - tools for debugging'
cpupower: FORCE
$(call descend,power/$@)
-cgroup counter firewire hv guest bootconfig spi usb virtio mm bpf iio gpio objtool leds wmi pci firmware debugging tracing: FORCE
+counter firewire hv guest bootconfig spi usb virtio mm bpf iio gpio objtool leds wmi pci firmware debugging tracing: FORCE
$(call descend,$@)
bpf/%: FORCE
kvm_stat: FORCE
$(call descend,kvm/$@)
-all: acpi cgroup counter cpupower gpio hv firewire \
+all: acpi counter cpupower gpio hv firewire \
perf selftests bootconfig spi turbostat usb \
virtio mm bpf x86_energy_perf_policy \
tmon freefall iio objtool kvm_stat wmi \
cpupower_install:
$(call descend,power/$(@:_install=),install)
-cgroup_install counter_install firewire_install gpio_install hv_install iio_install perf_install bootconfig_install spi_install usb_install virtio_install mm_install bpf_install objtool_install wmi_install pci_install debugging_install tracing_install:
+counter_install firewire_install gpio_install hv_install iio_install perf_install bootconfig_install spi_install usb_install virtio_install mm_install bpf_install objtool_install wmi_install pci_install debugging_install tracing_install:
$(call descend,$(@:_install=),install)
selftests_install:
kvm_stat_install:
$(call descend,kvm/$(@:_install=),install)
-install: acpi_install cgroup_install counter_install cpupower_install gpio_install \
+install: acpi_install counter_install cpupower_install gpio_install \
hv_install firewire_install iio_install \
perf_install selftests_install turbostat_install usb_install \
virtio_install mm_install bpf_install x86_energy_perf_policy_install \
cpupower_clean:
$(call descend,power/cpupower,clean)
-cgroup_clean counter_clean hv_clean firewire_clean bootconfig_clean spi_clean usb_clean virtio_clean mm_clean wmi_clean bpf_clean iio_clean gpio_clean objtool_clean leds_clean pci_clean firmware_clean debugging_clean tracing_clean:
+counter_clean hv_clean firewire_clean bootconfig_clean spi_clean usb_clean virtio_clean mm_clean wmi_clean bpf_clean iio_clean gpio_clean objtool_clean leds_clean pci_clean firmware_clean debugging_clean tracing_clean:
$(call descend,$(@:_clean=),clean)
libapi_clean:
build_clean:
$(call descend,build,clean)
-clean: acpi_clean cgroup_clean counter_clean cpupower_clean hv_clean firewire_clean \
+clean: acpi_clean counter_clean cpupower_clean hv_clean firewire_clean \
perf_clean selftests_clean turbostat_clean bootconfig_clean spi_clean usb_clean virtio_clean \
mm_clean bpf_clean iio_clean x86_energy_perf_policy_clean tmon_clean \
freefall_clean build_clean libbpf_clean libsubcmd_clean \
#define ARM_CPU_IMP_HISI 0x48
#define ARM_CPU_IMP_APPLE 0x61
#define ARM_CPU_IMP_AMPERE 0xC0
+#define ARM_CPU_IMP_MICROSOFT 0x6D
#define ARM_CPU_PART_AEM_V8 0xD0F
#define ARM_CPU_PART_FOUNDATION 0xD00
#define AMPERE_CPU_PART_AMPERE1 0xAC3
+#define MICROSOFT_CPU_PART_AZURE_COBALT_100 0xD49 /* Based on r0p0 of ARM Neoverse N2 */
+
#define MIDR_CORTEX_A53 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A53)
#define MIDR_CORTEX_A57 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A57)
#define MIDR_CORTEX_A72 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A72)
#define MIDR_APPLE_M2_BLIZZARD_MAX MIDR_CPU_MODEL(ARM_CPU_IMP_APPLE, APPLE_CPU_PART_M2_BLIZZARD_MAX)
#define MIDR_APPLE_M2_AVALANCHE_MAX MIDR_CPU_MODEL(ARM_CPU_IMP_APPLE, APPLE_CPU_PART_M2_AVALANCHE_MAX)
#define MIDR_AMPERE1 MIDR_CPU_MODEL(ARM_CPU_IMP_AMPERE, AMPERE_CPU_PART_AMPERE1)
+#define MIDR_MICROSOFT_AZURE_COBALT_100 MIDR_CPU_MODEL(ARM_CPU_IMP_MICROSOFT, MICROSOFT_CPU_PART_AZURE_COBALT_100)
/* Fujitsu Erratum 010001 affects A64FX 1.0 and 1.1, (v0r0 and v1r0) */
#define MIDR_FUJITSU_ERRATUM_010001 MIDR_FUJITSU_A64FX
#include <asm/ptrace.h>
#include <asm/sve_context.h>
-#define __KVM_HAVE_GUEST_DEBUG
#define __KVM_HAVE_IRQ_LINE
-#define __KVM_HAVE_READONLY_MEM
#define __KVM_HAVE_VCPU_EVENTS
#define KVM_COALESCED_MMIO_PAGE_OFFSET 1
/* KVM_ARM_SET_DEVICE_ADDR ioctl id encoding */
#define KVM_ARM_DEVICE_TYPE_SHIFT 0
-#define KVM_ARM_DEVICE_TYPE_MASK GENMASK(KVM_ARM_DEVICE_TYPE_SHIFT + 15, \
- KVM_ARM_DEVICE_TYPE_SHIFT)
+#define KVM_ARM_DEVICE_TYPE_MASK __GENMASK(KVM_ARM_DEVICE_TYPE_SHIFT + 15, \
+ KVM_ARM_DEVICE_TYPE_SHIFT)
#define KVM_ARM_DEVICE_ID_SHIFT 16
-#define KVM_ARM_DEVICE_ID_MASK GENMASK(KVM_ARM_DEVICE_ID_SHIFT + 15, \
- KVM_ARM_DEVICE_ID_SHIFT)
+#define KVM_ARM_DEVICE_ID_MASK __GENMASK(KVM_ARM_DEVICE_ID_SHIFT + 15, \
+ KVM_ARM_DEVICE_ID_SHIFT)
/* Supported device IDs */
#define KVM_ARM_DEVICE_VGIC_V2 0
__u64 device_irq_level;
};
+/* Bits for run->s.regs.device_irq_level */
+#define KVM_ARM_DEV_EL1_VTIMER (1 << 0)
+#define KVM_ARM_DEV_EL1_PTIMER (1 << 1)
+#define KVM_ARM_DEV_PMU (1 << 2)
+
/*
* PMU filter structure. Describe a range of events with a particular
* action. To be used with KVM_ARM_VCPU_PMU_V3_FILTER.
#define __KVM_HAVE_PPC_SMT
#define __KVM_HAVE_IRQCHIP
#define __KVM_HAVE_IRQ_LINE
-#define __KVM_HAVE_GUEST_DEBUG
/* Not always available, but if it is, this is the correct offset. */
#define KVM_COALESCED_MMIO_PAGE_OFFSET 1
#define KVM_XIVE_TIMA_PAGE_OFFSET 0
#define KVM_XIVE_ESB_PAGE_OFFSET 4
+/* for KVM_PPC_GET_PVINFO */
+
+#define KVM_PPC_PVINFO_FLAGS_EV_IDLE (1<<0)
+
+struct kvm_ppc_pvinfo {
+ /* out */
+ __u32 flags;
+ __u32 hcall[4];
+ __u8 pad[108];
+};
+
+/* for KVM_PPC_GET_SMMU_INFO */
+#define KVM_PPC_PAGE_SIZES_MAX_SZ 8
+
+struct kvm_ppc_one_page_size {
+ __u32 page_shift; /* Page shift (or 0) */
+ __u32 pte_enc; /* Encoding in the HPTE (>>12) */
+};
+
+struct kvm_ppc_one_seg_page_size {
+ __u32 page_shift; /* Base page shift of segment (or 0) */
+ __u32 slb_enc; /* SLB encoding for BookS */
+ struct kvm_ppc_one_page_size enc[KVM_PPC_PAGE_SIZES_MAX_SZ];
+};
+
+#define KVM_PPC_PAGE_SIZES_REAL 0x00000001
+#define KVM_PPC_1T_SEGMENTS 0x00000002
+#define KVM_PPC_NO_HASH 0x00000004
+
+struct kvm_ppc_smmu_info {
+ __u64 flags;
+ __u32 slb_size;
+ __u16 data_keys; /* # storage keys supported for data */
+ __u16 instr_keys; /* # storage keys supported for instructions */
+ struct kvm_ppc_one_seg_page_size sps[KVM_PPC_PAGE_SIZES_MAX_SZ];
+};
+
+/* for KVM_PPC_RESIZE_HPT_{PREPARE,COMMIT} */
+struct kvm_ppc_resize_hpt {
+ __u64 flags;
+ __u32 shift;
+ __u32 pad;
+};
+
#endif /* __LINUX_KVM_POWERPC_H */
#include <linux/types.h>
#define __KVM_S390
-#define __KVM_HAVE_GUEST_DEBUG
+
+struct kvm_s390_skeys {
+ __u64 start_gfn;
+ __u64 count;
+ __u64 skeydata_addr;
+ __u32 flags;
+ __u32 reserved[9];
+};
+
+#define KVM_S390_CMMA_PEEK (1 << 0)
+
+/**
+ * kvm_s390_cmma_log - Used for CMMA migration.
+ *
+ * Used both for input and output.
+ *
+ * @start_gfn: Guest page number to start from.
+ * @count: Size of the result buffer.
+ * @flags: Control operation mode via KVM_S390_CMMA_* flags
+ * @remaining: Used with KVM_S390_GET_CMMA_BITS. Indicates how many dirty
+ * pages are still remaining.
+ * @mask: Used with KVM_S390_SET_CMMA_BITS. Bitmap of bits to actually set
+ * in the PGSTE.
+ * @values: Pointer to the values buffer.
+ *
+ * Used in KVM_S390_{G,S}ET_CMMA_BITS ioctls.
+ */
+struct kvm_s390_cmma_log {
+ __u64 start_gfn;
+ __u32 count;
+ __u32 flags;
+ union {
+ __u64 remaining;
+ __u64 mask;
+ };
+ __u64 values;
+};
+
+#define KVM_S390_RESET_POR 1
+#define KVM_S390_RESET_CLEAR 2
+#define KVM_S390_RESET_SUBSYSTEM 4
+#define KVM_S390_RESET_CPU_INIT 8
+#define KVM_S390_RESET_IPL 16
+
+/* for KVM_S390_MEM_OP */
+struct kvm_s390_mem_op {
+ /* in */
+ __u64 gaddr; /* the guest address */
+ __u64 flags; /* flags */
+ __u32 size; /* amount of bytes */
+ __u32 op; /* type of operation */
+ __u64 buf; /* buffer in userspace */
+ union {
+ struct {
+ __u8 ar; /* the access register number */
+ __u8 key; /* access key, ignored if flag unset */
+ __u8 pad1[6]; /* ignored */
+ __u64 old_addr; /* ignored if cmpxchg flag unset */
+ };
+ __u32 sida_offset; /* offset into the sida */
+ __u8 reserved[32]; /* ignored */
+ };
+};
+/* types for kvm_s390_mem_op->op */
+#define KVM_S390_MEMOP_LOGICAL_READ 0
+#define KVM_S390_MEMOP_LOGICAL_WRITE 1
+#define KVM_S390_MEMOP_SIDA_READ 2
+#define KVM_S390_MEMOP_SIDA_WRITE 3
+#define KVM_S390_MEMOP_ABSOLUTE_READ 4
+#define KVM_S390_MEMOP_ABSOLUTE_WRITE 5
+#define KVM_S390_MEMOP_ABSOLUTE_CMPXCHG 6
+
+/* flags for kvm_s390_mem_op->flags */
+#define KVM_S390_MEMOP_F_CHECK_ONLY (1ULL << 0)
+#define KVM_S390_MEMOP_F_INJECT_EXCEPTION (1ULL << 1)
+#define KVM_S390_MEMOP_F_SKEY_PROTECTION (1ULL << 2)
+
+/* flags specifying extension support via KVM_CAP_S390_MEM_OP_EXTENSION */
+#define KVM_S390_MEMOP_EXTENSION_CAP_BASE (1 << 0)
+#define KVM_S390_MEMOP_EXTENSION_CAP_CMPXCHG (1 << 1)
+
+struct kvm_s390_psw {
+ __u64 mask;
+ __u64 addr;
+};
+
+/* valid values for type in kvm_s390_interrupt */
+#define KVM_S390_SIGP_STOP 0xfffe0000u
+#define KVM_S390_PROGRAM_INT 0xfffe0001u
+#define KVM_S390_SIGP_SET_PREFIX 0xfffe0002u
+#define KVM_S390_RESTART 0xfffe0003u
+#define KVM_S390_INT_PFAULT_INIT 0xfffe0004u
+#define KVM_S390_INT_PFAULT_DONE 0xfffe0005u
+#define KVM_S390_MCHK 0xfffe1000u
+#define KVM_S390_INT_CLOCK_COMP 0xffff1004u
+#define KVM_S390_INT_CPU_TIMER 0xffff1005u
+#define KVM_S390_INT_VIRTIO 0xffff2603u
+#define KVM_S390_INT_SERVICE 0xffff2401u
+#define KVM_S390_INT_EMERGENCY 0xffff1201u
+#define KVM_S390_INT_EXTERNAL_CALL 0xffff1202u
+/* Anything below 0xfffe0000u is taken by INT_IO */
+#define KVM_S390_INT_IO(ai,cssid,ssid,schid) \
+ (((schid)) | \
+ ((ssid) << 16) | \
+ ((cssid) << 18) | \
+ ((ai) << 26))
+#define KVM_S390_INT_IO_MIN 0x00000000u
+#define KVM_S390_INT_IO_MAX 0xfffdffffu
+#define KVM_S390_INT_IO_AI_MASK 0x04000000u
+
+
+struct kvm_s390_interrupt {
+ __u32 type;
+ __u32 parm;
+ __u64 parm64;
+};
+
+struct kvm_s390_io_info {
+ __u16 subchannel_id;
+ __u16 subchannel_nr;
+ __u32 io_int_parm;
+ __u32 io_int_word;
+};
+
+struct kvm_s390_ext_info {
+ __u32 ext_params;
+ __u32 pad;
+ __u64 ext_params2;
+};
+
+struct kvm_s390_pgm_info {
+ __u64 trans_exc_code;
+ __u64 mon_code;
+ __u64 per_address;
+ __u32 data_exc_code;
+ __u16 code;
+ __u16 mon_class_nr;
+ __u8 per_code;
+ __u8 per_atmid;
+ __u8 exc_access_id;
+ __u8 per_access_id;
+ __u8 op_access_id;
+#define KVM_S390_PGM_FLAGS_ILC_VALID 0x01
+#define KVM_S390_PGM_FLAGS_ILC_0 0x02
+#define KVM_S390_PGM_FLAGS_ILC_1 0x04
+#define KVM_S390_PGM_FLAGS_ILC_MASK 0x06
+#define KVM_S390_PGM_FLAGS_NO_REWIND 0x08
+ __u8 flags;
+ __u8 pad[2];
+};
+
+struct kvm_s390_prefix_info {
+ __u32 address;
+};
+
+struct kvm_s390_extcall_info {
+ __u16 code;
+};
+
+struct kvm_s390_emerg_info {
+ __u16 code;
+};
+
+#define KVM_S390_STOP_FLAG_STORE_STATUS 0x01
+struct kvm_s390_stop_info {
+ __u32 flags;
+};
+
+struct kvm_s390_mchk_info {
+ __u64 cr14;
+ __u64 mcic;
+ __u64 failing_storage_address;
+ __u32 ext_damage_code;
+ __u32 pad;
+ __u8 fixed_logout[16];
+};
+
+struct kvm_s390_irq {
+ __u64 type;
+ union {
+ struct kvm_s390_io_info io;
+ struct kvm_s390_ext_info ext;
+ struct kvm_s390_pgm_info pgm;
+ struct kvm_s390_emerg_info emerg;
+ struct kvm_s390_extcall_info extcall;
+ struct kvm_s390_prefix_info prefix;
+ struct kvm_s390_stop_info stop;
+ struct kvm_s390_mchk_info mchk;
+ char reserved[64];
+ } u;
+};
+
+struct kvm_s390_irq_state {
+ __u64 buf;
+ __u32 flags; /* will stay unused for compatibility reasons */
+ __u32 len;
+ __u32 reserved[4]; /* will stay unused for compatibility reasons */
+};
+
+struct kvm_s390_ucas_mapping {
+ __u64 user_addr;
+ __u64 vcpu_addr;
+ __u64 length;
+};
+
+struct kvm_s390_pv_sec_parm {
+ __u64 origin;
+ __u64 length;
+};
+
+struct kvm_s390_pv_unp {
+ __u64 addr;
+ __u64 size;
+ __u64 tweak;
+};
+
+enum pv_cmd_dmp_id {
+ KVM_PV_DUMP_INIT,
+ KVM_PV_DUMP_CONFIG_STOR_STATE,
+ KVM_PV_DUMP_COMPLETE,
+ KVM_PV_DUMP_CPU,
+};
+
+struct kvm_s390_pv_dmp {
+ __u64 subcmd;
+ __u64 buff_addr;
+ __u64 buff_len;
+ __u64 gaddr; /* For dump storage state */
+ __u64 reserved[4];
+};
+
+enum pv_cmd_info_id {
+ KVM_PV_INFO_VM,
+ KVM_PV_INFO_DUMP,
+};
+
+struct kvm_s390_pv_info_dump {
+ __u64 dump_cpu_buffer_len;
+ __u64 dump_config_mem_buffer_per_1m;
+ __u64 dump_config_finalize_len;
+};
+
+struct kvm_s390_pv_info_vm {
+ __u64 inst_calls_list[4];
+ __u64 max_cpus;
+ __u64 max_guests;
+ __u64 max_guest_addr;
+ __u64 feature_indication;
+};
+
+struct kvm_s390_pv_info_header {
+ __u32 id;
+ __u32 len_max;
+ __u32 len_written;
+ __u32 reserved;
+};
+
+struct kvm_s390_pv_info {
+ struct kvm_s390_pv_info_header header;
+ union {
+ struct kvm_s390_pv_info_dump dump;
+ struct kvm_s390_pv_info_vm vm;
+ };
+};
+
+enum pv_cmd_id {
+ KVM_PV_ENABLE,
+ KVM_PV_DISABLE,
+ KVM_PV_SET_SEC_PARMS,
+ KVM_PV_UNPACK,
+ KVM_PV_VERIFY,
+ KVM_PV_PREP_RESET,
+ KVM_PV_UNSHARE_ALL,
+ KVM_PV_INFO,
+ KVM_PV_DUMP,
+ KVM_PV_ASYNC_CLEANUP_PREPARE,
+ KVM_PV_ASYNC_CLEANUP_PERFORM,
+};
+
+struct kvm_pv_cmd {
+ __u32 cmd; /* Command to be executed */
+ __u16 rc; /* Ultravisor return code */
+ __u16 rrc; /* Ultravisor return reason code */
+ __u64 data; /* Data or address */
+ __u32 flags; /* flags for future extensions. Must be 0 for now */
+ __u32 reserved[3];
+};
+
+struct kvm_s390_zpci_op {
+ /* in */
+ __u32 fh; /* target device */
+ __u8 op; /* operation to perform */
+ __u8 pad[3];
+ union {
+ /* for KVM_S390_ZPCIOP_REG_AEN */
+ struct {
+ __u64 ibv; /* Guest addr of interrupt bit vector */
+ __u64 sb; /* Guest addr of summary bit */
+ __u32 flags;
+ __u32 noi; /* Number of interrupts */
+ __u8 isc; /* Guest interrupt subclass */
+ __u8 sbo; /* Offset of guest summary bit vector */
+ __u16 pad;
+ } reg_aen;
+ __u64 reserved[8];
+ } u;
+};
+
+/* types for kvm_s390_zpci_op->op */
+#define KVM_S390_ZPCIOP_REG_AEN 0
+#define KVM_S390_ZPCIOP_DEREG_AEN 1
+
+/* flags for kvm_s390_zpci_op->u.reg_aen.flags */
+#define KVM_S390_ZPCIOP_REGAEN_HOST (1 << 0)
/* Device control API: s390-specific devices */
#define KVM_DEV_FLIC_GET_ALL_IRQS 1
/*
* Defines x86 CPU feature bits
*/
-#define NCAPINTS 21 /* N 32-bit words worth of info */
+#define NCAPINTS 22 /* N 32-bit words worth of info */
#define NBUGINTS 2 /* N 32-bit bug flags */
/*
#define X86_FEATURE_K6_MTRR ( 3*32+ 1) /* AMD K6 nonstandard MTRRs */
#define X86_FEATURE_CYRIX_ARR ( 3*32+ 2) /* Cyrix ARRs (= MTRRs) */
#define X86_FEATURE_CENTAUR_MCR ( 3*32+ 3) /* Centaur MCRs (= MTRRs) */
-
-/* CPU types for specific tunings: */
#define X86_FEATURE_K8 ( 3*32+ 4) /* "" Opteron, Athlon64 */
-/* FREE, was #define X86_FEATURE_K7 ( 3*32+ 5) "" Athlon */
+#define X86_FEATURE_ZEN5 ( 3*32+ 5) /* "" CPU based on Zen5 microarchitecture */
#define X86_FEATURE_P3 ( 3*32+ 6) /* "" P3 */
#define X86_FEATURE_P4 ( 3*32+ 7) /* "" P4 */
#define X86_FEATURE_CONSTANT_TSC ( 3*32+ 8) /* TSC ticks at a constant rate */
#define X86_FEATURE_SYSENTER32 ( 3*32+15) /* "" sysenter in IA32 userspace */
#define X86_FEATURE_REP_GOOD ( 3*32+16) /* REP microcode works well */
#define X86_FEATURE_AMD_LBR_V2 ( 3*32+17) /* AMD Last Branch Record Extension Version 2 */
-/* FREE, was #define X86_FEATURE_LFENCE_RDTSC ( 3*32+18) "" LFENCE synchronizes RDTSC */
+#define X86_FEATURE_CLEAR_CPU_BUF ( 3*32+18) /* "" Clear CPU buffers using VERW */
#define X86_FEATURE_ACC_POWER ( 3*32+19) /* AMD Accumulated Power Mechanism */
#define X86_FEATURE_NOPL ( 3*32+20) /* The NOPL (0F 1F) instructions */
#define X86_FEATURE_ALWAYS ( 3*32+21) /* "" Always-present feature */
#define X86_FEATURE_IBPB_BRTYPE (20*32+28) /* "" MSR_PRED_CMD[IBPB] flushes all branch type predictions */
#define X86_FEATURE_SRSO_NO (20*32+29) /* "" CPU is not affected by SRSO */
+/*
+ * Extended auxiliary flags: Linux defined - for features scattered in various
+ * CPUID levels like 0x80000022, etc.
+ *
+ * Reuse free bits when adding new feature flags!
+ */
+#define X86_FEATURE_AMD_LBR_PMC_FREEZE (21*32+ 0) /* AMD LBR and PMC Freeze */
+
/*
* BUG word(s)
*/
/* BUG word 2 */
#define X86_BUG_SRSO X86_BUG(1*32 + 0) /* AMD SRSO bug */
#define X86_BUG_DIV0 X86_BUG(1*32 + 1) /* AMD DIV0 speculation bug */
+#define X86_BUG_RFDS X86_BUG(1*32 + 2) /* CPU is vulnerable to Register File Data Sampling */
#endif /* _ASM_X86_CPUFEATURES_H */
# define DISABLE_FRED (1 << (X86_FEATURE_FRED & 31))
#endif
+#ifdef CONFIG_KVM_AMD_SEV
+#define DISABLE_SEV_SNP 0
+#else
+#define DISABLE_SEV_SNP (1 << (X86_FEATURE_SEV_SNP & 31))
+#endif
+
/*
* Make sure to add features to the correct mask
*/
DISABLE_ENQCMD)
#define DISABLED_MASK17 0
#define DISABLED_MASK18 (DISABLE_IBT)
-#define DISABLED_MASK19 0
+#define DISABLED_MASK19 (DISABLE_SEV_SNP)
#define DISABLED_MASK20 0
-#define DISABLED_MASK_CHECK BUILD_BUG_ON_ZERO(NCAPINTS != 21)
+#define DISABLED_MASK21 0
+#define DISABLED_MASK_CHECK BUILD_BUG_ON_ZERO(NCAPINTS != 22)
#endif /* _ASM_X86_DISABLED_FEATURES_H */
#define HYPERVISOR_CALLBACK_VECTOR 0xf3
/* Vector for KVM to deliver posted interrupt IPI */
-#if IS_ENABLED(CONFIG_KVM)
#define POSTED_INTR_VECTOR 0xf2
#define POSTED_INTR_WAKEUP_VECTOR 0xf1
#define POSTED_INTR_NESTED_VECTOR 0xf0
-#endif
#define MANAGED_IRQ_SHUTDOWN_VECTOR 0xef
* CPU is not vulnerable to Gather
* Data Sampling (GDS).
*/
+#define ARCH_CAP_RFDS_NO BIT(27) /*
+ * Not susceptible to Register
+ * File Data Sampling.
+ */
+#define ARCH_CAP_RFDS_CLEAR BIT(28) /*
+ * VERW clears CPU Register
+ * File.
+ */
#define ARCH_CAP_XAPIC_DISABLE BIT(21) /*
* IA32_XAPIC_DISABLE_STATUS MSR
#define MSR_AMD64_SEV_ES_GHCB 0xc0010130
#define MSR_AMD64_SEV 0xc0010131
#define MSR_AMD64_SEV_ENABLED_BIT 0
-#define MSR_AMD64_SEV_ES_ENABLED_BIT 1
-#define MSR_AMD64_SEV_SNP_ENABLED_BIT 2
#define MSR_AMD64_SEV_ENABLED BIT_ULL(MSR_AMD64_SEV_ENABLED_BIT)
+#define MSR_AMD64_SEV_ES_ENABLED_BIT 1
#define MSR_AMD64_SEV_ES_ENABLED BIT_ULL(MSR_AMD64_SEV_ES_ENABLED_BIT)
+#define MSR_AMD64_SEV_SNP_ENABLED_BIT 2
#define MSR_AMD64_SEV_SNP_ENABLED BIT_ULL(MSR_AMD64_SEV_SNP_ENABLED_BIT)
-
-/* SNP feature bits enabled by the hypervisor */
-#define MSR_AMD64_SNP_VTOM BIT_ULL(3)
-#define MSR_AMD64_SNP_REFLECT_VC BIT_ULL(4)
-#define MSR_AMD64_SNP_RESTRICTED_INJ BIT_ULL(5)
-#define MSR_AMD64_SNP_ALT_INJ BIT_ULL(6)
-#define MSR_AMD64_SNP_DEBUG_SWAP BIT_ULL(7)
-#define MSR_AMD64_SNP_PREVENT_HOST_IBS BIT_ULL(8)
-#define MSR_AMD64_SNP_BTB_ISOLATION BIT_ULL(9)
-#define MSR_AMD64_SNP_VMPL_SSS BIT_ULL(10)
-#define MSR_AMD64_SNP_SECURE_TSC BIT_ULL(11)
-#define MSR_AMD64_SNP_VMGEXIT_PARAM BIT_ULL(12)
-#define MSR_AMD64_SNP_IBS_VIRT BIT_ULL(14)
-#define MSR_AMD64_SNP_VMSA_REG_PROTECTION BIT_ULL(16)
-#define MSR_AMD64_SNP_SMT_PROTECTION BIT_ULL(17)
-
-/* SNP feature bits reserved for future use. */
-#define MSR_AMD64_SNP_RESERVED_BIT13 BIT_ULL(13)
-#define MSR_AMD64_SNP_RESERVED_BIT15 BIT_ULL(15)
-#define MSR_AMD64_SNP_RESERVED_MASK GENMASK_ULL(63, 18)
+#define MSR_AMD64_SNP_VTOM_BIT 3
+#define MSR_AMD64_SNP_VTOM BIT_ULL(MSR_AMD64_SNP_VTOM_BIT)
+#define MSR_AMD64_SNP_REFLECT_VC_BIT 4
+#define MSR_AMD64_SNP_REFLECT_VC BIT_ULL(MSR_AMD64_SNP_REFLECT_VC_BIT)
+#define MSR_AMD64_SNP_RESTRICTED_INJ_BIT 5
+#define MSR_AMD64_SNP_RESTRICTED_INJ BIT_ULL(MSR_AMD64_SNP_RESTRICTED_INJ_BIT)
+#define MSR_AMD64_SNP_ALT_INJ_BIT 6
+#define MSR_AMD64_SNP_ALT_INJ BIT_ULL(MSR_AMD64_SNP_ALT_INJ_BIT)
+#define MSR_AMD64_SNP_DEBUG_SWAP_BIT 7
+#define MSR_AMD64_SNP_DEBUG_SWAP BIT_ULL(MSR_AMD64_SNP_DEBUG_SWAP_BIT)
+#define MSR_AMD64_SNP_PREVENT_HOST_IBS_BIT 8
+#define MSR_AMD64_SNP_PREVENT_HOST_IBS BIT_ULL(MSR_AMD64_SNP_PREVENT_HOST_IBS_BIT)
+#define MSR_AMD64_SNP_BTB_ISOLATION_BIT 9
+#define MSR_AMD64_SNP_BTB_ISOLATION BIT_ULL(MSR_AMD64_SNP_BTB_ISOLATION_BIT)
+#define MSR_AMD64_SNP_VMPL_SSS_BIT 10
+#define MSR_AMD64_SNP_VMPL_SSS BIT_ULL(MSR_AMD64_SNP_VMPL_SSS_BIT)
+#define MSR_AMD64_SNP_SECURE_TSC_BIT 11
+#define MSR_AMD64_SNP_SECURE_TSC BIT_ULL(MSR_AMD64_SNP_SECURE_TSC_BIT)
+#define MSR_AMD64_SNP_VMGEXIT_PARAM_BIT 12
+#define MSR_AMD64_SNP_VMGEXIT_PARAM BIT_ULL(MSR_AMD64_SNP_VMGEXIT_PARAM_BIT)
+#define MSR_AMD64_SNP_RESERVED_BIT13 BIT_ULL(13)
+#define MSR_AMD64_SNP_IBS_VIRT_BIT 14
+#define MSR_AMD64_SNP_IBS_VIRT BIT_ULL(MSR_AMD64_SNP_IBS_VIRT_BIT)
+#define MSR_AMD64_SNP_RESERVED_BIT15 BIT_ULL(15)
+#define MSR_AMD64_SNP_VMSA_REG_PROT_BIT 16
+#define MSR_AMD64_SNP_VMSA_REG_PROT BIT_ULL(MSR_AMD64_SNP_VMSA_REG_PROT_BIT)
+#define MSR_AMD64_SNP_SMT_PROT_BIT 17
+#define MSR_AMD64_SNP_SMT_PROT BIT_ULL(MSR_AMD64_SNP_SMT_PROT_BIT)
+#define MSR_AMD64_SNP_RESV_BIT 18
+#define MSR_AMD64_SNP_RESERVED_MASK GENMASK_ULL(63, MSR_AMD64_SNP_RESV_BIT)
#define MSR_AMD64_VIRT_SPEC_CTRL 0xc001011f
+#define MSR_AMD64_RMP_BASE 0xc0010132
+#define MSR_AMD64_RMP_END 0xc0010133
+
/* AMD Collaborative Processor Performance Control MSRs */
#define MSR_AMD_CPPC_CAP1 0xc00102b0
#define MSR_AMD_CPPC_ENABLE 0xc00102b1
#define MSR_K8_TOP_MEM1 0xc001001a
#define MSR_K8_TOP_MEM2 0xc001001d
#define MSR_AMD64_SYSCFG 0xc0010010
-#define MSR_AMD64_SYSCFG_MEM_ENCRYPT_BIT 23
+#define MSR_AMD64_SYSCFG_MEM_ENCRYPT_BIT 23
#define MSR_AMD64_SYSCFG_MEM_ENCRYPT BIT_ULL(MSR_AMD64_SYSCFG_MEM_ENCRYPT_BIT)
+#define MSR_AMD64_SYSCFG_SNP_EN_BIT 24
+#define MSR_AMD64_SYSCFG_SNP_EN BIT_ULL(MSR_AMD64_SYSCFG_SNP_EN_BIT)
+#define MSR_AMD64_SYSCFG_SNP_VMPL_EN_BIT 25
+#define MSR_AMD64_SYSCFG_SNP_VMPL_EN BIT_ULL(MSR_AMD64_SYSCFG_SNP_VMPL_EN_BIT)
+#define MSR_AMD64_SYSCFG_MFDM_BIT 19
+#define MSR_AMD64_SYSCFG_MFDM BIT_ULL(MSR_AMD64_SYSCFG_MFDM_BIT)
+
#define MSR_K8_INT_PENDING_MSG 0xc0010055
/* C1E active bits in int pending message */
#define K8_INTP_C1E_ACTIVE_MASK 0x18000000
#define REQUIRED_MASK18 0
#define REQUIRED_MASK19 0
#define REQUIRED_MASK20 0
-#define REQUIRED_MASK_CHECK BUILD_BUG_ON_ZERO(NCAPINTS != 21)
+#define REQUIRED_MASK21 0
+#define REQUIRED_MASK_CHECK BUILD_BUG_ON_ZERO(NCAPINTS != 22)
#endif /* _ASM_X86_REQUIRED_FEATURES_H */
*
*/
+#include <linux/const.h>
+#include <linux/bits.h>
#include <linux/types.h>
#include <linux/ioctl.h>
#include <linux/stddef.h>
#define __KVM_HAVE_IRQ_LINE
#define __KVM_HAVE_MSI
#define __KVM_HAVE_USER_NMI
-#define __KVM_HAVE_GUEST_DEBUG
#define __KVM_HAVE_MSIX
#define __KVM_HAVE_MCE
#define __KVM_HAVE_PIT_STATE2
#define __KVM_HAVE_DEBUGREGS
#define __KVM_HAVE_XSAVE
#define __KVM_HAVE_XCRS
-#define __KVM_HAVE_READONLY_MEM
/* Architectural interrupt line count. */
#define KVM_NR_INTERRUPTS 256
#define KVM_PMU_EVENT_ALLOW 0
#define KVM_PMU_EVENT_DENY 1
-#define KVM_PMU_EVENT_FLAG_MASKED_EVENTS BIT(0)
+#define KVM_PMU_EVENT_FLAG_MASKED_EVENTS _BITUL(0)
#define KVM_PMU_EVENT_FLAGS_VALID_MASK (KVM_PMU_EVENT_FLAG_MASKED_EVENTS)
+/* for KVM_CAP_MCE */
+struct kvm_x86_mce {
+ __u64 status;
+ __u64 addr;
+ __u64 misc;
+ __u64 mcg_status;
+ __u8 bank;
+ __u8 pad1[7];
+ __u64 pad2[3];
+};
+
+/* for KVM_CAP_XEN_HVM */
+#define KVM_XEN_HVM_CONFIG_HYPERCALL_MSR (1 << 0)
+#define KVM_XEN_HVM_CONFIG_INTERCEPT_HCALL (1 << 1)
+#define KVM_XEN_HVM_CONFIG_SHARED_INFO (1 << 2)
+#define KVM_XEN_HVM_CONFIG_RUNSTATE (1 << 3)
+#define KVM_XEN_HVM_CONFIG_EVTCHN_2LEVEL (1 << 4)
+#define KVM_XEN_HVM_CONFIG_EVTCHN_SEND (1 << 5)
+#define KVM_XEN_HVM_CONFIG_RUNSTATE_UPDATE_FLAG (1 << 6)
+#define KVM_XEN_HVM_CONFIG_PVCLOCK_TSC_UNSTABLE (1 << 7)
+#define KVM_XEN_HVM_CONFIG_SHARED_INFO_HVA (1 << 8)
+
+struct kvm_xen_hvm_config {
+ __u32 flags;
+ __u32 msr;
+ __u64 blob_addr_32;
+ __u64 blob_addr_64;
+ __u8 blob_size_32;
+ __u8 blob_size_64;
+ __u8 pad2[30];
+};
+
+struct kvm_xen_hvm_attr {
+ __u16 type;
+ __u16 pad[3];
+ union {
+ __u8 long_mode;
+ __u8 vector;
+ __u8 runstate_update_flag;
+ union {
+ __u64 gfn;
+#define KVM_XEN_INVALID_GFN ((__u64)-1)
+ __u64 hva;
+ } shared_info;
+ struct {
+ __u32 send_port;
+ __u32 type; /* EVTCHNSTAT_ipi / EVTCHNSTAT_interdomain */
+ __u32 flags;
+#define KVM_XEN_EVTCHN_DEASSIGN (1 << 0)
+#define KVM_XEN_EVTCHN_UPDATE (1 << 1)
+#define KVM_XEN_EVTCHN_RESET (1 << 2)
+ /*
+ * Events sent by the guest are either looped back to
+ * the guest itself (potentially on a different port#)
+ * or signalled via an eventfd.
+ */
+ union {
+ struct {
+ __u32 port;
+ __u32 vcpu;
+ __u32 priority;
+ } port;
+ struct {
+ __u32 port; /* Zero for eventfd */
+ __s32 fd;
+ } eventfd;
+ __u32 padding[4];
+ } deliver;
+ } evtchn;
+ __u32 xen_version;
+ __u64 pad[8];
+ } u;
+};
+
+
+/* Available with KVM_CAP_XEN_HVM / KVM_XEN_HVM_CONFIG_SHARED_INFO */
+#define KVM_XEN_ATTR_TYPE_LONG_MODE 0x0
+#define KVM_XEN_ATTR_TYPE_SHARED_INFO 0x1
+#define KVM_XEN_ATTR_TYPE_UPCALL_VECTOR 0x2
+/* Available with KVM_CAP_XEN_HVM / KVM_XEN_HVM_CONFIG_EVTCHN_SEND */
+#define KVM_XEN_ATTR_TYPE_EVTCHN 0x3
+#define KVM_XEN_ATTR_TYPE_XEN_VERSION 0x4
+/* Available with KVM_CAP_XEN_HVM / KVM_XEN_HVM_CONFIG_RUNSTATE_UPDATE_FLAG */
+#define KVM_XEN_ATTR_TYPE_RUNSTATE_UPDATE_FLAG 0x5
+/* Available with KVM_CAP_XEN_HVM / KVM_XEN_HVM_CONFIG_SHARED_INFO_HVA */
+#define KVM_XEN_ATTR_TYPE_SHARED_INFO_HVA 0x6
+
+struct kvm_xen_vcpu_attr {
+ __u16 type;
+ __u16 pad[3];
+ union {
+ __u64 gpa;
+#define KVM_XEN_INVALID_GPA ((__u64)-1)
+ __u64 hva;
+ __u64 pad[8];
+ struct {
+ __u64 state;
+ __u64 state_entry_time;
+ __u64 time_running;
+ __u64 time_runnable;
+ __u64 time_blocked;
+ __u64 time_offline;
+ } runstate;
+ __u32 vcpu_id;
+ struct {
+ __u32 port;
+ __u32 priority;
+ __u64 expires_ns;
+ } timer;
+ __u8 vector;
+ } u;
+};
+
+/* Available with KVM_CAP_XEN_HVM / KVM_XEN_HVM_CONFIG_SHARED_INFO */
+#define KVM_XEN_VCPU_ATTR_TYPE_VCPU_INFO 0x0
+#define KVM_XEN_VCPU_ATTR_TYPE_VCPU_TIME_INFO 0x1
+#define KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_ADDR 0x2
+#define KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_CURRENT 0x3
+#define KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_DATA 0x4
+#define KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_ADJUST 0x5
+/* Available with KVM_CAP_XEN_HVM / KVM_XEN_HVM_CONFIG_EVTCHN_SEND */
+#define KVM_XEN_VCPU_ATTR_TYPE_VCPU_ID 0x6
+#define KVM_XEN_VCPU_ATTR_TYPE_TIMER 0x7
+#define KVM_XEN_VCPU_ATTR_TYPE_UPCALL_VECTOR 0x8
+/* Available with KVM_CAP_XEN_HVM / KVM_XEN_HVM_CONFIG_SHARED_INFO_HVA */
+#define KVM_XEN_VCPU_ATTR_TYPE_VCPU_INFO_HVA 0x9
+
+/* Secure Encrypted Virtualization command */
+enum sev_cmd_id {
+ /* Guest initialization commands */
+ KVM_SEV_INIT = 0,
+ KVM_SEV_ES_INIT,
+ /* Guest launch commands */
+ KVM_SEV_LAUNCH_START,
+ KVM_SEV_LAUNCH_UPDATE_DATA,
+ KVM_SEV_LAUNCH_UPDATE_VMSA,
+ KVM_SEV_LAUNCH_SECRET,
+ KVM_SEV_LAUNCH_MEASURE,
+ KVM_SEV_LAUNCH_FINISH,
+ /* Guest migration commands (outgoing) */
+ KVM_SEV_SEND_START,
+ KVM_SEV_SEND_UPDATE_DATA,
+ KVM_SEV_SEND_UPDATE_VMSA,
+ KVM_SEV_SEND_FINISH,
+ /* Guest migration commands (incoming) */
+ KVM_SEV_RECEIVE_START,
+ KVM_SEV_RECEIVE_UPDATE_DATA,
+ KVM_SEV_RECEIVE_UPDATE_VMSA,
+ KVM_SEV_RECEIVE_FINISH,
+ /* Guest status and debug commands */
+ KVM_SEV_GUEST_STATUS,
+ KVM_SEV_DBG_DECRYPT,
+ KVM_SEV_DBG_ENCRYPT,
+ /* Guest certificates commands */
+ KVM_SEV_CERT_EXPORT,
+ /* Attestation report */
+ KVM_SEV_GET_ATTESTATION_REPORT,
+ /* Guest Migration Extension */
+ KVM_SEV_SEND_CANCEL,
+
+ KVM_SEV_NR_MAX,
+};
+
+struct kvm_sev_cmd {
+ __u32 id;
+ __u32 pad0;
+ __u64 data;
+ __u32 error;
+ __u32 sev_fd;
+};
+
+struct kvm_sev_launch_start {
+ __u32 handle;
+ __u32 policy;
+ __u64 dh_uaddr;
+ __u32 dh_len;
+ __u32 pad0;
+ __u64 session_uaddr;
+ __u32 session_len;
+ __u32 pad1;
+};
+
+struct kvm_sev_launch_update_data {
+ __u64 uaddr;
+ __u32 len;
+ __u32 pad0;
+};
+
+
+struct kvm_sev_launch_secret {
+ __u64 hdr_uaddr;
+ __u32 hdr_len;
+ __u32 pad0;
+ __u64 guest_uaddr;
+ __u32 guest_len;
+ __u32 pad1;
+ __u64 trans_uaddr;
+ __u32 trans_len;
+ __u32 pad2;
+};
+
+struct kvm_sev_launch_measure {
+ __u64 uaddr;
+ __u32 len;
+ __u32 pad0;
+};
+
+struct kvm_sev_guest_status {
+ __u32 handle;
+ __u32 policy;
+ __u32 state;
+};
+
+struct kvm_sev_dbg {
+ __u64 src_uaddr;
+ __u64 dst_uaddr;
+ __u32 len;
+ __u32 pad0;
+};
+
+struct kvm_sev_attestation_report {
+ __u8 mnonce[16];
+ __u64 uaddr;
+ __u32 len;
+ __u32 pad0;
+};
+
+struct kvm_sev_send_start {
+ __u32 policy;
+ __u32 pad0;
+ __u64 pdh_cert_uaddr;
+ __u32 pdh_cert_len;
+ __u32 pad1;
+ __u64 plat_certs_uaddr;
+ __u32 plat_certs_len;
+ __u32 pad2;
+ __u64 amd_certs_uaddr;
+ __u32 amd_certs_len;
+ __u32 pad3;
+ __u64 session_uaddr;
+ __u32 session_len;
+ __u32 pad4;
+};
+
+struct kvm_sev_send_update_data {
+ __u64 hdr_uaddr;
+ __u32 hdr_len;
+ __u32 pad0;
+ __u64 guest_uaddr;
+ __u32 guest_len;
+ __u32 pad1;
+ __u64 trans_uaddr;
+ __u32 trans_len;
+ __u32 pad2;
+};
+
+struct kvm_sev_receive_start {
+ __u32 handle;
+ __u32 policy;
+ __u64 pdh_uaddr;
+ __u32 pdh_len;
+ __u32 pad0;
+ __u64 session_uaddr;
+ __u32 session_len;
+ __u32 pad1;
+};
+
+struct kvm_sev_receive_update_data {
+ __u64 hdr_uaddr;
+ __u32 hdr_len;
+ __u32 pad0;
+ __u64 guest_uaddr;
+ __u32 guest_len;
+ __u32 pad1;
+ __u64 trans_uaddr;
+ __u32 trans_len;
+ __u32 pad2;
+};
+
+#define KVM_X2APIC_API_USE_32BIT_IDS (1ULL << 0)
+#define KVM_X2APIC_API_DISABLE_BROADCAST_QUIRK (1ULL << 1)
+
+struct kvm_hyperv_eventfd {
+ __u32 conn_id;
+ __s32 fd;
+ __u32 flags;
+ __u32 padding[3];
+};
+
+#define KVM_HYPERV_CONN_ID_MASK 0x00ffffff
+#define KVM_HYPERV_EVENTFD_DEASSIGN (1 << 0)
+
/*
* Masked event layout.
* Bits Description
((__u64)(!!(exclude)) << 55))
#define KVM_PMU_MASKED_ENTRY_EVENT_SELECT \
- (GENMASK_ULL(7, 0) | GENMASK_ULL(35, 32))
-#define KVM_PMU_MASKED_ENTRY_UMASK_MASK (GENMASK_ULL(63, 56))
-#define KVM_PMU_MASKED_ENTRY_UMASK_MATCH (GENMASK_ULL(15, 8))
-#define KVM_PMU_MASKED_ENTRY_EXCLUDE (BIT_ULL(55))
+ (__GENMASK_ULL(7, 0) | __GENMASK_ULL(35, 32))
+#define KVM_PMU_MASKED_ENTRY_UMASK_MASK (__GENMASK_ULL(63, 56))
+#define KVM_PMU_MASKED_ENTRY_UMASK_MATCH (__GENMASK_ULL(15, 8))
+#define KVM_PMU_MASKED_ENTRY_EXCLUDE (_BITULL(55))
#define KVM_PMU_MASKED_ENTRY_UMASK_MASK_SHIFT (56)
/* for KVM_{GET,SET,HAS}_DEVICE_ATTR */
#define KVM_VCPU_TSC_OFFSET 0 /* attribute for the TSC offset */
/* x86-specific KVM_EXIT_HYPERCALL flags. */
-#define KVM_EXIT_HYPERCALL_LONG_MODE BIT(0)
+#define KVM_EXIT_HYPERCALL_LONG_MODE _BITULL(0)
#define KVM_X86_DEFAULT_VM 0
#define KVM_X86_SW_PROTECTED_VM 1
int i, n;
/* recognize hard coded LLVM section name */
- if (strcmp(sec_name, ".arena.1") == 0) {
+ if (strcmp(sec_name, ".addr_space.1") == 0) {
/* this is the name to use in skeleton */
snprintf(buf, buf_sz, "arena");
return true;
DNS
};
+enum {
+ IPV4 = 1,
+ IPV6,
+ IP_TYPE_MAX
+};
+
static int in_hand_shake;
static char *os_name = "";
#define MAX_FILE_NAME 100
#define ENTRIES_PER_BLOCK 50
+/*
+ * Change this entry if the number of addresses increases in future
+ */
+#define MAX_IP_ENTRIES 64
+#define OUTSTR_BUF_SIZE ((INET6_ADDRSTRLEN + 1) * MAX_IP_ENTRIES)
struct kvp_record {
char key[HV_KVP_EXCHANGE_MAX_KEY_SIZE];
return 0;
}
+int ip_version_check(const char *input_addr)
+{
+ struct in6_addr addr;
+
+ if (inet_pton(AF_INET, input_addr, &addr))
+ return IPV4;
+ else if (inet_pton(AF_INET6, input_addr, &addr))
+ return IPV6;
+
+ return -EINVAL;
+}
+
/*
* Only IPv4 subnet strings needs to be converted to plen
* For IPv6 the subnet is already privided in plen format
return plen;
}
+static int process_dns_gateway_nm(FILE *f, char *ip_string, int type,
+ int ip_sec)
+{
+ char addr[INET6_ADDRSTRLEN], *output_str;
+ int ip_offset = 0, error = 0, ip_ver;
+ char *param_name;
+
+ if (type == DNS)
+ param_name = "dns";
+ else if (type == GATEWAY)
+ param_name = "gateway";
+ else
+ return -EINVAL;
+
+ output_str = (char *)calloc(OUTSTR_BUF_SIZE, sizeof(char));
+ if (!output_str)
+ return -ENOMEM;
+
+ while (1) {
+ memset(addr, 0, sizeof(addr));
+
+ if (!parse_ip_val_buffer(ip_string, &ip_offset, addr,
+ (MAX_IP_ADDR_SIZE * 2)))
+ break;
+
+ ip_ver = ip_version_check(addr);
+ if (ip_ver < 0)
+ continue;
+
+ if ((ip_ver == IPV4 && ip_sec == IPV4) ||
+ (ip_ver == IPV6 && ip_sec == IPV6)) {
+ /*
+ * do a bound check to avoid out-of bound writes
+ */
+ if ((OUTSTR_BUF_SIZE - strlen(output_str)) >
+ (strlen(addr) + 1)) {
+ strncat(output_str, addr,
+ OUTSTR_BUF_SIZE -
+ strlen(output_str) - 1);
+ strncat(output_str, ",",
+ OUTSTR_BUF_SIZE -
+ strlen(output_str) - 1);
+ }
+ } else {
+ continue;
+ }
+ }
+
+ if (strlen(output_str)) {
+ /*
+ * This is to get rid of that extra comma character
+ * in the end of the string
+ */
+ output_str[strlen(output_str) - 1] = '\0';
+ error = fprintf(f, "%s=%s\n", param_name, output_str);
+ }
+
+ free(output_str);
+ return error;
+}
+
static int process_ip_string_nm(FILE *f, char *ip_string, char *subnet,
- int is_ipv6)
+ int ip_sec)
{
char addr[INET6_ADDRSTRLEN];
char subnet_addr[INET6_ADDRSTRLEN];
- int error, i = 0;
+ int error = 0, i = 0;
int ip_offset = 0, subnet_offset = 0;
- int plen;
+ int plen, ip_ver;
memset(addr, 0, sizeof(addr));
memset(subnet_addr, 0, sizeof(subnet_addr));
subnet_addr,
(MAX_IP_ADDR_SIZE *
2))) {
- if (!is_ipv6)
+ ip_ver = ip_version_check(addr);
+ if (ip_ver < 0)
+ continue;
+
+ if (ip_ver == IPV4 && ip_sec == IPV4)
plen = kvp_subnet_to_plen((char *)subnet_addr);
- else
+ else if (ip_ver == IPV6 && ip_sec == IPV6)
plen = atoi(subnet_addr);
+ else
+ continue;
if (plen < 0)
return plen;
memset(subnet_addr, 0, sizeof(subnet_addr));
}
- return 0;
+ return error;
}
static int kvp_set_ip_info(char *if_name, struct hv_kvp_ipaddr_value *new_val)
{
- int error = 0;
+ int error = 0, ip_ver;
char if_filename[PATH_MAX];
char nm_filename[PATH_MAX];
FILE *ifcfg_file, *nmfile;
char cmd[PATH_MAX];
- int is_ipv6 = 0;
char *mac_addr;
int str_len;
if (error)
goto setval_error;
- if (new_val->addr_family & ADDR_FAMILY_IPV6) {
- error = fprintf(nmfile, "\n[ipv6]\n");
- if (error < 0)
- goto setval_error;
- is_ipv6 = 1;
- } else {
- error = fprintf(nmfile, "\n[ipv4]\n");
- if (error < 0)
- goto setval_error;
- }
-
/*
* Now we populate the keyfile format
+ *
+ * The keyfile format expects the IPv6 and IPv4 configuration in
+ * different sections. Therefore we iterate through the list twice,
+ * once to populate the IPv4 section and the next time for IPv6
*/
+ ip_ver = IPV4;
+ do {
+ if (ip_ver == IPV4) {
+ error = fprintf(nmfile, "\n[ipv4]\n");
+ if (error < 0)
+ goto setval_error;
+ } else {
+ error = fprintf(nmfile, "\n[ipv6]\n");
+ if (error < 0)
+ goto setval_error;
+ }
- if (new_val->dhcp_enabled) {
- error = kvp_write_file(nmfile, "method", "", "auto");
- if (error < 0)
- goto setval_error;
- } else {
- error = kvp_write_file(nmfile, "method", "", "manual");
+ /*
+ * Write the configuration for ipaddress, netmask, gateway and
+ * name services
+ */
+ error = process_ip_string_nm(nmfile, (char *)new_val->ip_addr,
+ (char *)new_val->sub_net,
+ ip_ver);
if (error < 0)
goto setval_error;
- }
- /*
- * Write the configuration for ipaddress, netmask, gateway and
- * name services
- */
- error = process_ip_string_nm(nmfile, (char *)new_val->ip_addr,
- (char *)new_val->sub_net, is_ipv6);
- if (error < 0)
- goto setval_error;
+ /*
+ * As dhcp_enabled is only valid for ipv4, we do not set dhcp
+ * methods for ipv6 based on dhcp_enabled flag.
+ *
+ * For ipv4, set method to manual only when dhcp_enabled is
+ * false and specific ipv4 addresses are configured. If neither
+ * dhcp_enabled is true and no ipv4 addresses are configured,
+ * set method to 'disabled'.
+ *
+ * For ipv6, set method to manual when we configure ipv6
+ * addresses. Otherwise set method to 'auto' so that SLAAC from
+ * RA may be used.
+ */
+ if (ip_ver == IPV4) {
+ if (new_val->dhcp_enabled) {
+ error = kvp_write_file(nmfile, "method", "",
+ "auto");
+ if (error < 0)
+ goto setval_error;
+ } else if (error) {
+ error = kvp_write_file(nmfile, "method", "",
+ "manual");
+ if (error < 0)
+ goto setval_error;
+ } else {
+ error = kvp_write_file(nmfile, "method", "",
+ "disabled");
+ if (error < 0)
+ goto setval_error;
+ }
+ } else if (ip_ver == IPV6) {
+ if (error) {
+ error = kvp_write_file(nmfile, "method", "",
+ "manual");
+ if (error < 0)
+ goto setval_error;
+ } else {
+ error = kvp_write_file(nmfile, "method", "",
+ "auto");
+ if (error < 0)
+ goto setval_error;
+ }
+ }
- /* we do not want ipv4 addresses in ipv6 section and vice versa */
- if (is_ipv6 != is_ipv4((char *)new_val->gate_way)) {
- error = fprintf(nmfile, "gateway=%s\n", (char *)new_val->gate_way);
+ error = process_dns_gateway_nm(nmfile,
+ (char *)new_val->gate_way,
+ GATEWAY, ip_ver);
if (error < 0)
goto setval_error;
- }
- if (is_ipv6 != is_ipv4((char *)new_val->dns_addr)) {
- error = fprintf(nmfile, "dns=%s\n", (char *)new_val->dns_addr);
+ error = process_dns_gateway_nm(nmfile,
+ (char *)new_val->dns_addr, DNS,
+ ip_ver);
if (error < 0)
goto setval_error;
- }
+
+ ip_ver++;
+ } while (ip_ver < IP_TYPE_MAX);
+
fclose(nmfile);
fclose(ifcfg_file);
#include <asm/types.h>
/**
- * __fls - find last (most-significant) set bit in a long word
+ * generic___fls - find last (most-significant) set bit in a long word
* @word: the word to search
*
* Undefined if no set bit exists, so code should check against 0 first.
*/
-static __always_inline unsigned long __fls(unsigned long word)
+static __always_inline unsigned long generic___fls(unsigned long word)
{
int num = BITS_PER_LONG - 1;
return num;
}
+#ifndef __HAVE_ARCH___FLS
+#define __fls(word) generic___fls(word)
+#endif
+
#endif /* _ASM_GENERIC_BITOPS___FLS_H_ */
#define _ASM_GENERIC_BITOPS_FLS_H_
/**
- * fls - find last (most-significant) bit set
+ * generic_fls - find last (most-significant) bit set
* @x: the word to search
*
* This is defined the same way as ffs.
* Note fls(0) = 0, fls(1) = 1, fls(0x80000000) = 32.
*/
-static __always_inline int fls(unsigned int x)
+static __always_inline int generic_fls(unsigned int x)
{
int r = 32;
return r;
}
+#ifndef __HAVE_ARCH_FLS
+#define fls(x) generic_fls(x)
+#endif
+
#endif /* _ASM_GENERIC_BITOPS_FLS_H_ */
#ifndef _LINUX_BTF_IDS_H
#define _LINUX_BTF_IDS_H
+#include <linux/types.h> /* for u32 */
+
struct btf_id_set {
u32 cnt;
u32 ids[];
#include <linux/build_bug.h>
#include <linux/compiler.h>
#include <linux/math.h>
+#include <linux/panic.h>
#include <endian.h>
#include <byteswap.h>
{
}
+static inline int early_pfn_to_nid(unsigned long pfn)
+{
+ return 0;
+}
+
#endif
--- /dev/null
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _TOOLS_LINUX_PANIC_H
+#define _TOOLS_LINUX_PANIC_H
+
+#include <stdarg.h>
+#include <stdio.h>
+#include <stdlib.h>
+
+static inline void panic(const char *fmt, ...)
+{
+ va_list argp;
+
+ va_start(argp, fmt);
+ vfprintf(stderr, fmt, argp);
+ va_end(argp);
+ exit(-1);
+}
+
+#endif
* - %DRM_I915_QUERY_MEMORY_REGIONS (see struct drm_i915_query_memory_regions)
* - %DRM_I915_QUERY_HWCONFIG_BLOB (see `GuC HWCONFIG blob uAPI`)
* - %DRM_I915_QUERY_GEOMETRY_SUBSLICES (see struct drm_i915_query_topology_info)
+ * - %DRM_I915_QUERY_GUC_SUBMISSION_VERSION (see struct drm_i915_query_guc_submission_version)
*/
__u64 query_id;
#define DRM_I915_QUERY_TOPOLOGY_INFO 1
#define DRM_I915_QUERY_MEMORY_REGIONS 4
#define DRM_I915_QUERY_HWCONFIG_BLOB 5
#define DRM_I915_QUERY_GEOMETRY_SUBSLICES 6
+#define DRM_I915_QUERY_GUC_SUBMISSION_VERSION 7
/* Must be kept compact -- no holes and well documented */
/**
struct drm_i915_memory_region_info regions[];
};
+/**
+ * struct drm_i915_query_guc_submission_version - query GuC submission interface version
+ */
+struct drm_i915_query_guc_submission_version {
+ /** @branch: Firmware branch version. */
+ __u32 branch;
+ /** @major: Firmware major version. */
+ __u32 major;
+ /** @minor: Firmware minor version. */
+ __u32 minor;
+ /** @patch: Firmware patch version. */
+ __u32 patch;
+};
+
/**
* DOC: GuC HWCONFIG blob uAPI
*
__u64 minlen;
};
+/*
+ * We include a length field because some filesystems (vfat) have an identifier
+ * that we do want to expose as a UUID, but doesn't have the standard length.
+ *
+ * We use a fixed size buffer beacuse this interface will, by fiat, never
+ * support "UUIDs" longer than 16 bytes; we don't want to force all downstream
+ * users to have to deal with that.
+ */
+struct fsuuid2 {
+ __u8 len;
+ __u8 uuid[16];
+};
+
+struct fs_sysfs_path {
+ __u8 len;
+ __u8 name[128];
+};
+
/* extent-same (dedupe) ioctls; these MUST match the btrfs ioctl definitions */
#define FILE_DEDUPE_RANGE_SAME 0
#define FILE_DEDUPE_RANGE_DIFFERS 1
#define FS_IOC_FSSETXATTR _IOW('X', 32, struct fsxattr)
#define FS_IOC_GETFSLABEL _IOR(0x94, 49, char[FSLABEL_MAX])
#define FS_IOC_SETFSLABEL _IOW(0x94, 50, char[FSLABEL_MAX])
+/* Returns the external filesystem UUID, the same one blkid returns */
+#define FS_IOC_GETFSUUID _IOR(0x15, 0, struct fsuuid2)
+/*
+ * Returns the path component under /sys/fs/ that refers to this filesystem;
+ * also /sys/kernel/debug/ for filesystems with debugfs exports
+ */
+#define FS_IOC_GETFSSYSFSPATH _IOR(0x15, 1, struct fs_sysfs_path)
/*
* Inode flags (FS_IOC_GETFLAGS / FS_IOC_SETFLAGS)
/* per-IO O_APPEND */
#define RWF_APPEND ((__force __kernel_rwf_t)0x00000010)
+/* per-IO negation of O_APPEND */
+#define RWF_NOAPPEND ((__force __kernel_rwf_t)0x00000020)
+
/* mask of flags supported by the kernel */
#define RWF_SUPPORTED (RWF_HIPRI | RWF_DSYNC | RWF_SYNC | RWF_NOWAIT |\
- RWF_APPEND)
+ RWF_APPEND | RWF_NOAPPEND)
/* Pagemap ioctl */
#define PAGEMAP_SCAN _IOWR('f', 16, struct pm_scan_arg)
#define KVM_API_VERSION 12
+/*
+ * Backwards-compatible definitions.
+ */
+#define __KVM_HAVE_GUEST_DEBUG
+
/* for KVM_SET_USER_MEMORY_REGION */
struct kvm_userspace_memory_region {
__u32 slot;
#define KVM_PIT_SPEAKER_DUMMY 1
-struct kvm_s390_skeys {
- __u64 start_gfn;
- __u64 count;
- __u64 skeydata_addr;
- __u32 flags;
- __u32 reserved[9];
-};
-
-#define KVM_S390_CMMA_PEEK (1 << 0)
-
-/**
- * kvm_s390_cmma_log - Used for CMMA migration.
- *
- * Used both for input and output.
- *
- * @start_gfn: Guest page number to start from.
- * @count: Size of the result buffer.
- * @flags: Control operation mode via KVM_S390_CMMA_* flags
- * @remaining: Used with KVM_S390_GET_CMMA_BITS. Indicates how many dirty
- * pages are still remaining.
- * @mask: Used with KVM_S390_SET_CMMA_BITS. Bitmap of bits to actually set
- * in the PGSTE.
- * @values: Pointer to the values buffer.
- *
- * Used in KVM_S390_{G,S}ET_CMMA_BITS ioctls.
- */
-struct kvm_s390_cmma_log {
- __u64 start_gfn;
- __u32 count;
- __u32 flags;
- union {
- __u64 remaining;
- __u64 mask;
- };
- __u64 values;
-};
-
struct kvm_hyperv_exit {
#define KVM_EXIT_HYPERV_SYNIC 1
#define KVM_EXIT_HYPERV_HCALL 2
__u32 ipb;
} s390_sieic;
/* KVM_EXIT_S390_RESET */
-#define KVM_S390_RESET_POR 1
-#define KVM_S390_RESET_CLEAR 2
-#define KVM_S390_RESET_SUBSYSTEM 4
-#define KVM_S390_RESET_CPU_INIT 8
-#define KVM_S390_RESET_IPL 16
__u64 s390_reset_flags;
/* KVM_EXIT_S390_UCONTROL */
struct {
__u8 pad[5];
};
-/* for KVM_S390_MEM_OP */
-struct kvm_s390_mem_op {
- /* in */
- __u64 gaddr; /* the guest address */
- __u64 flags; /* flags */
- __u32 size; /* amount of bytes */
- __u32 op; /* type of operation */
- __u64 buf; /* buffer in userspace */
- union {
- struct {
- __u8 ar; /* the access register number */
- __u8 key; /* access key, ignored if flag unset */
- __u8 pad1[6]; /* ignored */
- __u64 old_addr; /* ignored if cmpxchg flag unset */
- };
- __u32 sida_offset; /* offset into the sida */
- __u8 reserved[32]; /* ignored */
- };
-};
-/* types for kvm_s390_mem_op->op */
-#define KVM_S390_MEMOP_LOGICAL_READ 0
-#define KVM_S390_MEMOP_LOGICAL_WRITE 1
-#define KVM_S390_MEMOP_SIDA_READ 2
-#define KVM_S390_MEMOP_SIDA_WRITE 3
-#define KVM_S390_MEMOP_ABSOLUTE_READ 4
-#define KVM_S390_MEMOP_ABSOLUTE_WRITE 5
-#define KVM_S390_MEMOP_ABSOLUTE_CMPXCHG 6
-
-/* flags for kvm_s390_mem_op->flags */
-#define KVM_S390_MEMOP_F_CHECK_ONLY (1ULL << 0)
-#define KVM_S390_MEMOP_F_INJECT_EXCEPTION (1ULL << 1)
-#define KVM_S390_MEMOP_F_SKEY_PROTECTION (1ULL << 2)
-
-/* flags specifying extension support via KVM_CAP_S390_MEM_OP_EXTENSION */
-#define KVM_S390_MEMOP_EXTENSION_CAP_BASE (1 << 0)
-#define KVM_S390_MEMOP_EXTENSION_CAP_CMPXCHG (1 << 1)
-
/* for KVM_INTERRUPT */
struct kvm_interrupt {
/* in */
__u32 mp_state;
};
-struct kvm_s390_psw {
- __u64 mask;
- __u64 addr;
-};
-
-/* valid values for type in kvm_s390_interrupt */
-#define KVM_S390_SIGP_STOP 0xfffe0000u
-#define KVM_S390_PROGRAM_INT 0xfffe0001u
-#define KVM_S390_SIGP_SET_PREFIX 0xfffe0002u
-#define KVM_S390_RESTART 0xfffe0003u
-#define KVM_S390_INT_PFAULT_INIT 0xfffe0004u
-#define KVM_S390_INT_PFAULT_DONE 0xfffe0005u
-#define KVM_S390_MCHK 0xfffe1000u
-#define KVM_S390_INT_CLOCK_COMP 0xffff1004u
-#define KVM_S390_INT_CPU_TIMER 0xffff1005u
-#define KVM_S390_INT_VIRTIO 0xffff2603u
-#define KVM_S390_INT_SERVICE 0xffff2401u
-#define KVM_S390_INT_EMERGENCY 0xffff1201u
-#define KVM_S390_INT_EXTERNAL_CALL 0xffff1202u
-/* Anything below 0xfffe0000u is taken by INT_IO */
-#define KVM_S390_INT_IO(ai,cssid,ssid,schid) \
- (((schid)) | \
- ((ssid) << 16) | \
- ((cssid) << 18) | \
- ((ai) << 26))
-#define KVM_S390_INT_IO_MIN 0x00000000u
-#define KVM_S390_INT_IO_MAX 0xfffdffffu
-#define KVM_S390_INT_IO_AI_MASK 0x04000000u
-
-
-struct kvm_s390_interrupt {
- __u32 type;
- __u32 parm;
- __u64 parm64;
-};
-
-struct kvm_s390_io_info {
- __u16 subchannel_id;
- __u16 subchannel_nr;
- __u32 io_int_parm;
- __u32 io_int_word;
-};
-
-struct kvm_s390_ext_info {
- __u32 ext_params;
- __u32 pad;
- __u64 ext_params2;
-};
-
-struct kvm_s390_pgm_info {
- __u64 trans_exc_code;
- __u64 mon_code;
- __u64 per_address;
- __u32 data_exc_code;
- __u16 code;
- __u16 mon_class_nr;
- __u8 per_code;
- __u8 per_atmid;
- __u8 exc_access_id;
- __u8 per_access_id;
- __u8 op_access_id;
-#define KVM_S390_PGM_FLAGS_ILC_VALID 0x01
-#define KVM_S390_PGM_FLAGS_ILC_0 0x02
-#define KVM_S390_PGM_FLAGS_ILC_1 0x04
-#define KVM_S390_PGM_FLAGS_ILC_MASK 0x06
-#define KVM_S390_PGM_FLAGS_NO_REWIND 0x08
- __u8 flags;
- __u8 pad[2];
-};
-
-struct kvm_s390_prefix_info {
- __u32 address;
-};
-
-struct kvm_s390_extcall_info {
- __u16 code;
-};
-
-struct kvm_s390_emerg_info {
- __u16 code;
-};
-
-#define KVM_S390_STOP_FLAG_STORE_STATUS 0x01
-struct kvm_s390_stop_info {
- __u32 flags;
-};
-
-struct kvm_s390_mchk_info {
- __u64 cr14;
- __u64 mcic;
- __u64 failing_storage_address;
- __u32 ext_damage_code;
- __u32 pad;
- __u8 fixed_logout[16];
-};
-
-struct kvm_s390_irq {
- __u64 type;
- union {
- struct kvm_s390_io_info io;
- struct kvm_s390_ext_info ext;
- struct kvm_s390_pgm_info pgm;
- struct kvm_s390_emerg_info emerg;
- struct kvm_s390_extcall_info extcall;
- struct kvm_s390_prefix_info prefix;
- struct kvm_s390_stop_info stop;
- struct kvm_s390_mchk_info mchk;
- char reserved[64];
- } u;
-};
-
-struct kvm_s390_irq_state {
- __u64 buf;
- __u32 flags; /* will stay unused for compatibility reasons */
- __u32 len;
- __u32 reserved[4]; /* will stay unused for compatibility reasons */
-};
-
/* for KVM_SET_GUEST_DEBUG */
#define KVM_GUESTDBG_ENABLE 0x00000001
__u8 pad[64];
};
-/* for KVM_PPC_GET_PVINFO */
-
-#define KVM_PPC_PVINFO_FLAGS_EV_IDLE (1<<0)
-
-struct kvm_ppc_pvinfo {
- /* out */
- __u32 flags;
- __u32 hcall[4];
- __u8 pad[108];
-};
-
-/* for KVM_PPC_GET_SMMU_INFO */
-#define KVM_PPC_PAGE_SIZES_MAX_SZ 8
-
-struct kvm_ppc_one_page_size {
- __u32 page_shift; /* Page shift (or 0) */
- __u32 pte_enc; /* Encoding in the HPTE (>>12) */
-};
-
-struct kvm_ppc_one_seg_page_size {
- __u32 page_shift; /* Base page shift of segment (or 0) */
- __u32 slb_enc; /* SLB encoding for BookS */
- struct kvm_ppc_one_page_size enc[KVM_PPC_PAGE_SIZES_MAX_SZ];
-};
-
-#define KVM_PPC_PAGE_SIZES_REAL 0x00000001
-#define KVM_PPC_1T_SEGMENTS 0x00000002
-#define KVM_PPC_NO_HASH 0x00000004
-
-struct kvm_ppc_smmu_info {
- __u64 flags;
- __u32 slb_size;
- __u16 data_keys; /* # storage keys supported for data */
- __u16 instr_keys; /* # storage keys supported for instructions */
- struct kvm_ppc_one_seg_page_size sps[KVM_PPC_PAGE_SIZES_MAX_SZ];
-};
-
-/* for KVM_PPC_RESIZE_HPT_{PREPARE,COMMIT} */
-struct kvm_ppc_resize_hpt {
- __u64 flags;
- __u32 shift;
- __u32 pad;
-};
-
#define KVMIO 0xAE
/* machine type bits, to be used as argument to KVM_CREATE_VM */
/* Bug in KVM_SET_USER_MEMORY_REGION fixed: */
#define KVM_CAP_DESTROY_MEMORY_REGION_WORKS 21
#define KVM_CAP_USER_NMI 22
-#ifdef __KVM_HAVE_GUEST_DEBUG
#define KVM_CAP_SET_GUEST_DEBUG 23
-#endif
#ifdef __KVM_HAVE_PIT
#define KVM_CAP_REINJECT_CONTROL 24
#endif
#define KVM_CAP_GUEST_MEMFD 234
#define KVM_CAP_VM_TYPES 235
-#ifdef KVM_CAP_IRQ_ROUTING
-
struct kvm_irq_routing_irqchip {
__u32 irqchip;
__u32 pin;
struct kvm_irq_routing_entry entries[];
};
-#endif
-
-#ifdef KVM_CAP_MCE
-/* x86 MCE */
-struct kvm_x86_mce {
- __u64 status;
- __u64 addr;
- __u64 misc;
- __u64 mcg_status;
- __u8 bank;
- __u8 pad1[7];
- __u64 pad2[3];
-};
-#endif
-
-#ifdef KVM_CAP_XEN_HVM
-#define KVM_XEN_HVM_CONFIG_HYPERCALL_MSR (1 << 0)
-#define KVM_XEN_HVM_CONFIG_INTERCEPT_HCALL (1 << 1)
-#define KVM_XEN_HVM_CONFIG_SHARED_INFO (1 << 2)
-#define KVM_XEN_HVM_CONFIG_RUNSTATE (1 << 3)
-#define KVM_XEN_HVM_CONFIG_EVTCHN_2LEVEL (1 << 4)
-#define KVM_XEN_HVM_CONFIG_EVTCHN_SEND (1 << 5)
-#define KVM_XEN_HVM_CONFIG_RUNSTATE_UPDATE_FLAG (1 << 6)
-#define KVM_XEN_HVM_CONFIG_PVCLOCK_TSC_UNSTABLE (1 << 7)
-
-struct kvm_xen_hvm_config {
- __u32 flags;
- __u32 msr;
- __u64 blob_addr_32;
- __u64 blob_addr_64;
- __u8 blob_size_32;
- __u8 blob_size_64;
- __u8 pad2[30];
-};
-#endif
-
#define KVM_IRQFD_FLAG_DEASSIGN (1 << 0)
/*
* Available with KVM_CAP_IRQFD_RESAMPLE
struct kvm_userspace_memory_region2)
/* enable ucontrol for s390 */
-struct kvm_s390_ucas_mapping {
- __u64 user_addr;
- __u64 vcpu_addr;
- __u64 length;
-};
#define KVM_S390_UCAS_MAP _IOW(KVMIO, 0x50, struct kvm_s390_ucas_mapping)
#define KVM_S390_UCAS_UNMAP _IOW(KVMIO, 0x51, struct kvm_s390_ucas_mapping)
#define KVM_S390_VCPU_FAULT _IOW(KVMIO, 0x52, unsigned long)
#define KVM_S390_NORMAL_RESET _IO(KVMIO, 0xc3)
#define KVM_S390_CLEAR_RESET _IO(KVMIO, 0xc4)
-struct kvm_s390_pv_sec_parm {
- __u64 origin;
- __u64 length;
-};
-
-struct kvm_s390_pv_unp {
- __u64 addr;
- __u64 size;
- __u64 tweak;
-};
-
-enum pv_cmd_dmp_id {
- KVM_PV_DUMP_INIT,
- KVM_PV_DUMP_CONFIG_STOR_STATE,
- KVM_PV_DUMP_COMPLETE,
- KVM_PV_DUMP_CPU,
-};
-
-struct kvm_s390_pv_dmp {
- __u64 subcmd;
- __u64 buff_addr;
- __u64 buff_len;
- __u64 gaddr; /* For dump storage state */
- __u64 reserved[4];
-};
-
-enum pv_cmd_info_id {
- KVM_PV_INFO_VM,
- KVM_PV_INFO_DUMP,
-};
-
-struct kvm_s390_pv_info_dump {
- __u64 dump_cpu_buffer_len;
- __u64 dump_config_mem_buffer_per_1m;
- __u64 dump_config_finalize_len;
-};
-
-struct kvm_s390_pv_info_vm {
- __u64 inst_calls_list[4];
- __u64 max_cpus;
- __u64 max_guests;
- __u64 max_guest_addr;
- __u64 feature_indication;
-};
-
-struct kvm_s390_pv_info_header {
- __u32 id;
- __u32 len_max;
- __u32 len_written;
- __u32 reserved;
-};
-
-struct kvm_s390_pv_info {
- struct kvm_s390_pv_info_header header;
- union {
- struct kvm_s390_pv_info_dump dump;
- struct kvm_s390_pv_info_vm vm;
- };
-};
-
-enum pv_cmd_id {
- KVM_PV_ENABLE,
- KVM_PV_DISABLE,
- KVM_PV_SET_SEC_PARMS,
- KVM_PV_UNPACK,
- KVM_PV_VERIFY,
- KVM_PV_PREP_RESET,
- KVM_PV_UNSHARE_ALL,
- KVM_PV_INFO,
- KVM_PV_DUMP,
- KVM_PV_ASYNC_CLEANUP_PREPARE,
- KVM_PV_ASYNC_CLEANUP_PERFORM,
-};
-
-struct kvm_pv_cmd {
- __u32 cmd; /* Command to be executed */
- __u16 rc; /* Ultravisor return code */
- __u16 rrc; /* Ultravisor return reason code */
- __u64 data; /* Data or address */
- __u32 flags; /* flags for future extensions. Must be 0 for now */
- __u32 reserved[3];
-};
-
/* Available with KVM_CAP_S390_PROTECTED */
#define KVM_S390_PV_COMMAND _IOWR(KVMIO, 0xc5, struct kvm_pv_cmd)
#define KVM_XEN_HVM_GET_ATTR _IOWR(KVMIO, 0xc8, struct kvm_xen_hvm_attr)
#define KVM_XEN_HVM_SET_ATTR _IOW(KVMIO, 0xc9, struct kvm_xen_hvm_attr)
-struct kvm_xen_hvm_attr {
- __u16 type;
- __u16 pad[3];
- union {
- __u8 long_mode;
- __u8 vector;
- __u8 runstate_update_flag;
- struct {
- __u64 gfn;
-#define KVM_XEN_INVALID_GFN ((__u64)-1)
- } shared_info;
- struct {
- __u32 send_port;
- __u32 type; /* EVTCHNSTAT_ipi / EVTCHNSTAT_interdomain */
- __u32 flags;
-#define KVM_XEN_EVTCHN_DEASSIGN (1 << 0)
-#define KVM_XEN_EVTCHN_UPDATE (1 << 1)
-#define KVM_XEN_EVTCHN_RESET (1 << 2)
- /*
- * Events sent by the guest are either looped back to
- * the guest itself (potentially on a different port#)
- * or signalled via an eventfd.
- */
- union {
- struct {
- __u32 port;
- __u32 vcpu;
- __u32 priority;
- } port;
- struct {
- __u32 port; /* Zero for eventfd */
- __s32 fd;
- } eventfd;
- __u32 padding[4];
- } deliver;
- } evtchn;
- __u32 xen_version;
- __u64 pad[8];
- } u;
-};
-
-
-/* Available with KVM_CAP_XEN_HVM / KVM_XEN_HVM_CONFIG_SHARED_INFO */
-#define KVM_XEN_ATTR_TYPE_LONG_MODE 0x0
-#define KVM_XEN_ATTR_TYPE_SHARED_INFO 0x1
-#define KVM_XEN_ATTR_TYPE_UPCALL_VECTOR 0x2
-/* Available with KVM_CAP_XEN_HVM / KVM_XEN_HVM_CONFIG_EVTCHN_SEND */
-#define KVM_XEN_ATTR_TYPE_EVTCHN 0x3
-#define KVM_XEN_ATTR_TYPE_XEN_VERSION 0x4
-/* Available with KVM_CAP_XEN_HVM / KVM_XEN_HVM_CONFIG_RUNSTATE_UPDATE_FLAG */
-#define KVM_XEN_ATTR_TYPE_RUNSTATE_UPDATE_FLAG 0x5
-
/* Per-vCPU Xen attributes */
#define KVM_XEN_VCPU_GET_ATTR _IOWR(KVMIO, 0xca, struct kvm_xen_vcpu_attr)
#define KVM_XEN_VCPU_SET_ATTR _IOW(KVMIO, 0xcb, struct kvm_xen_vcpu_attr)
#define KVM_GET_SREGS2 _IOR(KVMIO, 0xcc, struct kvm_sregs2)
#define KVM_SET_SREGS2 _IOW(KVMIO, 0xcd, struct kvm_sregs2)
-struct kvm_xen_vcpu_attr {
- __u16 type;
- __u16 pad[3];
- union {
- __u64 gpa;
-#define KVM_XEN_INVALID_GPA ((__u64)-1)
- __u64 pad[8];
- struct {
- __u64 state;
- __u64 state_entry_time;
- __u64 time_running;
- __u64 time_runnable;
- __u64 time_blocked;
- __u64 time_offline;
- } runstate;
- __u32 vcpu_id;
- struct {
- __u32 port;
- __u32 priority;
- __u64 expires_ns;
- } timer;
- __u8 vector;
- } u;
-};
-
-/* Available with KVM_CAP_XEN_HVM / KVM_XEN_HVM_CONFIG_SHARED_INFO */
-#define KVM_XEN_VCPU_ATTR_TYPE_VCPU_INFO 0x0
-#define KVM_XEN_VCPU_ATTR_TYPE_VCPU_TIME_INFO 0x1
-#define KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_ADDR 0x2
-#define KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_CURRENT 0x3
-#define KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_DATA 0x4
-#define KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_ADJUST 0x5
-/* Available with KVM_CAP_XEN_HVM / KVM_XEN_HVM_CONFIG_EVTCHN_SEND */
-#define KVM_XEN_VCPU_ATTR_TYPE_VCPU_ID 0x6
-#define KVM_XEN_VCPU_ATTR_TYPE_TIMER 0x7
-#define KVM_XEN_VCPU_ATTR_TYPE_UPCALL_VECTOR 0x8
-
-/* Secure Encrypted Virtualization command */
-enum sev_cmd_id {
- /* Guest initialization commands */
- KVM_SEV_INIT = 0,
- KVM_SEV_ES_INIT,
- /* Guest launch commands */
- KVM_SEV_LAUNCH_START,
- KVM_SEV_LAUNCH_UPDATE_DATA,
- KVM_SEV_LAUNCH_UPDATE_VMSA,
- KVM_SEV_LAUNCH_SECRET,
- KVM_SEV_LAUNCH_MEASURE,
- KVM_SEV_LAUNCH_FINISH,
- /* Guest migration commands (outgoing) */
- KVM_SEV_SEND_START,
- KVM_SEV_SEND_UPDATE_DATA,
- KVM_SEV_SEND_UPDATE_VMSA,
- KVM_SEV_SEND_FINISH,
- /* Guest migration commands (incoming) */
- KVM_SEV_RECEIVE_START,
- KVM_SEV_RECEIVE_UPDATE_DATA,
- KVM_SEV_RECEIVE_UPDATE_VMSA,
- KVM_SEV_RECEIVE_FINISH,
- /* Guest status and debug commands */
- KVM_SEV_GUEST_STATUS,
- KVM_SEV_DBG_DECRYPT,
- KVM_SEV_DBG_ENCRYPT,
- /* Guest certificates commands */
- KVM_SEV_CERT_EXPORT,
- /* Attestation report */
- KVM_SEV_GET_ATTESTATION_REPORT,
- /* Guest Migration Extension */
- KVM_SEV_SEND_CANCEL,
-
- KVM_SEV_NR_MAX,
-};
-
-struct kvm_sev_cmd {
- __u32 id;
- __u64 data;
- __u32 error;
- __u32 sev_fd;
-};
-
-struct kvm_sev_launch_start {
- __u32 handle;
- __u32 policy;
- __u64 dh_uaddr;
- __u32 dh_len;
- __u64 session_uaddr;
- __u32 session_len;
-};
-
-struct kvm_sev_launch_update_data {
- __u64 uaddr;
- __u32 len;
-};
-
-
-struct kvm_sev_launch_secret {
- __u64 hdr_uaddr;
- __u32 hdr_len;
- __u64 guest_uaddr;
- __u32 guest_len;
- __u64 trans_uaddr;
- __u32 trans_len;
-};
-
-struct kvm_sev_launch_measure {
- __u64 uaddr;
- __u32 len;
-};
-
-struct kvm_sev_guest_status {
- __u32 handle;
- __u32 policy;
- __u32 state;
-};
-
-struct kvm_sev_dbg {
- __u64 src_uaddr;
- __u64 dst_uaddr;
- __u32 len;
-};
-
-struct kvm_sev_attestation_report {
- __u8 mnonce[16];
- __u64 uaddr;
- __u32 len;
-};
-
-struct kvm_sev_send_start {
- __u32 policy;
- __u64 pdh_cert_uaddr;
- __u32 pdh_cert_len;
- __u64 plat_certs_uaddr;
- __u32 plat_certs_len;
- __u64 amd_certs_uaddr;
- __u32 amd_certs_len;
- __u64 session_uaddr;
- __u32 session_len;
-};
-
-struct kvm_sev_send_update_data {
- __u64 hdr_uaddr;
- __u32 hdr_len;
- __u64 guest_uaddr;
- __u32 guest_len;
- __u64 trans_uaddr;
- __u32 trans_len;
-};
-
-struct kvm_sev_receive_start {
- __u32 handle;
- __u32 policy;
- __u64 pdh_uaddr;
- __u32 pdh_len;
- __u64 session_uaddr;
- __u32 session_len;
-};
-
-struct kvm_sev_receive_update_data {
- __u64 hdr_uaddr;
- __u32 hdr_len;
- __u64 guest_uaddr;
- __u32 guest_len;
- __u64 trans_uaddr;
- __u32 trans_len;
-};
-
-#define KVM_DEV_ASSIGN_ENABLE_IOMMU (1 << 0)
-#define KVM_DEV_ASSIGN_PCI_2_3 (1 << 1)
-#define KVM_DEV_ASSIGN_MASK_INTX (1 << 2)
-
-struct kvm_assigned_pci_dev {
- __u32 assigned_dev_id;
- __u32 busnr;
- __u32 devfn;
- __u32 flags;
- __u32 segnr;
- union {
- __u32 reserved[11];
- };
-};
-
-#define KVM_DEV_IRQ_HOST_INTX (1 << 0)
-#define KVM_DEV_IRQ_HOST_MSI (1 << 1)
-#define KVM_DEV_IRQ_HOST_MSIX (1 << 2)
-
-#define KVM_DEV_IRQ_GUEST_INTX (1 << 8)
-#define KVM_DEV_IRQ_GUEST_MSI (1 << 9)
-#define KVM_DEV_IRQ_GUEST_MSIX (1 << 10)
-
-#define KVM_DEV_IRQ_HOST_MASK 0x00ff
-#define KVM_DEV_IRQ_GUEST_MASK 0xff00
-
-struct kvm_assigned_irq {
- __u32 assigned_dev_id;
- __u32 host_irq; /* ignored (legacy field) */
- __u32 guest_irq;
- __u32 flags;
- union {
- __u32 reserved[12];
- };
-};
-
-struct kvm_assigned_msix_nr {
- __u32 assigned_dev_id;
- __u16 entry_nr;
- __u16 padding;
-};
-
-#define KVM_MAX_MSIX_PER_DEV 256
-struct kvm_assigned_msix_entry {
- __u32 assigned_dev_id;
- __u32 gsi;
- __u16 entry; /* The index of entry in the MSI-X table */
- __u16 padding[3];
-};
-
-#define KVM_X2APIC_API_USE_32BIT_IDS (1ULL << 0)
-#define KVM_X2APIC_API_DISABLE_BROADCAST_QUIRK (1ULL << 1)
-
-/* Available with KVM_CAP_ARM_USER_IRQ */
-
-/* Bits for run->s.regs.device_irq_level */
-#define KVM_ARM_DEV_EL1_VTIMER (1 << 0)
-#define KVM_ARM_DEV_EL1_PTIMER (1 << 1)
-#define KVM_ARM_DEV_PMU (1 << 2)
-
-struct kvm_hyperv_eventfd {
- __u32 conn_id;
- __s32 fd;
- __u32 flags;
- __u32 padding[3];
-};
-
-#define KVM_HYPERV_CONN_ID_MASK 0x00ffffff
-#define KVM_HYPERV_EVENTFD_DEASSIGN (1 << 0)
-
#define KVM_DIRTY_LOG_MANUAL_PROTECT_ENABLE (1 << 0)
#define KVM_DIRTY_LOG_INITIALLY_SET (1 << 1)
/* Available with KVM_CAP_S390_ZPCI_OP */
#define KVM_S390_ZPCI_OP _IOW(KVMIO, 0xd1, struct kvm_s390_zpci_op)
-struct kvm_s390_zpci_op {
- /* in */
- __u32 fh; /* target device */
- __u8 op; /* operation to perform */
- __u8 pad[3];
- union {
- /* for KVM_S390_ZPCIOP_REG_AEN */
- struct {
- __u64 ibv; /* Guest addr of interrupt bit vector */
- __u64 sb; /* Guest addr of summary bit */
- __u32 flags;
- __u32 noi; /* Number of interrupts */
- __u8 isc; /* Guest interrupt subclass */
- __u8 sbo; /* Offset of guest summary bit vector */
- __u16 pad;
- } reg_aen;
- __u64 reserved[8];
- } u;
-};
-
-/* types for kvm_s390_zpci_op->op */
-#define KVM_S390_ZPCIOP_REG_AEN 0
-#define KVM_S390_ZPCIOP_DEREG_AEN 1
-
-/* flags for kvm_s390_zpci_op->u.reg_aen.flags */
-#define KVM_S390_ZPCIOP_REGAEN_HOST (1 << 0)
-
/* Available with KVM_CAP_MEMORY_ATTRIBUTES */
#define KVM_SET_MEMORY_ATTRIBUTES _IOW(KVMIO, 0xd2, struct kvm_memory_attributes)
* *
*****************************************************************************/
-#define SNDRV_PCM_VERSION SNDRV_PROTOCOL_VERSION(2, 0, 16)
+#define SNDRV_PCM_VERSION SNDRV_PROTOCOL_VERSION(2, 0, 17)
typedef unsigned long snd_pcm_uframes_t;
typedef signed long snd_pcm_sframes_t;
unsigned int rmask; /* W: requested masks */
unsigned int cmask; /* R: changed masks */
unsigned int info; /* R: Info flags for returned setup */
- unsigned int msbits; /* R: used most significant bits */
+ unsigned int msbits; /* R: used most significant bits (in sample bit-width) */
unsigned int rate_num; /* R: rate numerator */
unsigned int rate_den; /* R: rate denominator */
snd_pcm_uframes_t fifo_size; /* R: chip FIFO size in frames */
#define KSYMS_SEC ".ksyms"
#define STRUCT_OPS_SEC ".struct_ops"
#define STRUCT_OPS_LINK_SEC ".struct_ops.link"
-#define ARENA_SEC ".arena.1"
+#define ARENA_SEC ".addr_space.1"
enum libbpf_map_type {
LIBBPF_MAP_UNSPEC,
return syscall(__NR_memfd_create, name, flags);
}
+#ifndef MFD_CLOEXEC
+#define MFD_CLOEXEC 0x0001U
+#endif
+
static int create_placeholder_fd(void)
{
int fd;
goto err_out;
}
if (map->def.type == BPF_MAP_TYPE_ARENA) {
- map->mmaped = mmap((void *)map->map_extra, bpf_map_mmap_sz(map),
- PROT_READ | PROT_WRITE,
+ map->mmaped = mmap((void *)(long)map->map_extra,
+ bpf_map_mmap_sz(map), PROT_READ | PROT_WRITE,
map->map_extra ? MAP_SHARED | MAP_FIXED : MAP_SHARED,
map->fd, 0);
if (map->mmaped == MAP_FAILED) {
self.done = 1
extack_off = 20
elif self.nl_type == Netlink.NLMSG_DONE:
+ self.error = struct.unpack("i", self.raw[0:4])[0]
self.done = 1
extack_off = 4
presence = ''
for i in range(0, len(ref)):
presence = f"{var}->{'.'.join(ref[:i] + [''])}_present.{ref[i]}"
- if self.presence_type() == 'bit':
- code.append(presence + ' = 1;')
+ # Every layer below last is a nest, so we know it uses bit presence
+ # last layer is "self" and may be a complex type
+ if i == len(ref) - 1 and self.presence_type() != 'bit':
+ continue
+ code.append(presence + ' = 1;')
code += self._setter_lines(ri, member, presence)
func_name = f"{op_prefix(ri, direction, deref=deref)}_set_{'_'.join(ref)}"
struct section *rsec;
struct reloc *reloc;
struct instruction *insn;
- unsigned long offset;
+ uint64_t offset;
/*
* Check for manually annotated dead ends.
if (dso->annotate_warned)
return -1;
- if (not_annotated) {
+ if (not_annotated || !sym->annotate2) {
err = symbol__annotate2(ms, evsel, &browser.arch);
if (err) {
char msg[BUFSIZ];
if (parch)
*parch = arch;
+ if (!list_empty(¬es->src->source))
+ return 0;
+
args.arch = arch;
args.ms = *ms;
if (annotate_opts.full_addr)
struct task_struct *curr;
struct mm_struct___old *mm_old;
struct mm_struct___new *mm_new;
+ struct sighand_struct *sighand;
switch (flags) {
case LCB_F_READ: /* rwsem */
break;
case LCB_F_SPIN: /* spinlock */
curr = bpf_get_current_task_btf();
- if (&curr->sighand->siglock == (void *)lock)
+ sighand = curr->sighand;
+
+ if (sighand && &sighand->siglock == (void *)lock)
return LCD_F_SIGHAND_LOCK;
break;
default:
.PP
\fB--quiet\fP Do not decode and print the system configuration header information.
.PP
++\fB--no-msr\fP Disable all the uses of the MSR driver.
++.PP
++\fB--no-perf\fP Disable all the uses of the perf API.
++.PP
\fB--interval seconds\fP overrides the default 5.0 second measurement interval.
.PP
\fB--num_iterations num\fP number of the measurement iterations.
.PP
\fBPkgTmp\fP Degrees Celsius reported by the per-package Package Thermal Monitor.
.PP
-\fBGFX%rc6\fP The percentage of time the GPU is in the "render C6" state, rc6, during the measurement interval. From /sys/class/drm/card0/power/rc6_residency_ms.
+\fBGFX%rc6\fP The percentage of time the GPU is in the "render C6" state, rc6, during the measurement interval. From /sys/class/drm/card0/power/rc6_residency_ms or /sys/class/drm/card0/gt/gt0/rc6_residency_ms or /sys/class/drm/card0/device/tile0/gtN/gtidle/idle_residency_ms depending on the graphics driver being used.
.PP
-\fBGFXMHz\fP Instantaneous snapshot of what sysfs presents at the end of the measurement interval. From /sys/class/graphics/fb0/device/drm/card0/gt_cur_freq_mhz.
+\fBGFXMHz\fP Instantaneous snapshot of what sysfs presents at the end of the measurement interval. From /sys/class/graphics/fb0/device/drm/card0/gt_cur_freq_mhz or /sys/class/drm/card0/gt_cur_freq_mhz or /sys/class/drm/card0/gt/gt0/rps_cur_freq_mhz or /sys/class/drm/card0/device/tile0/gtN/freq0/cur_freq depending on the graphics driver being used.
+.PP
+\fBGFXAMHz\fP Instantaneous snapshot of what sysfs presents at the end of the measurement interval. From /sys/class/graphics/fb0/device/drm/card0/gt_act_freq_mhz or /sys/class/drm/card0/gt_act_freq_mhz or /sys/class/drm/card0/gt/gt0/rps_act_freq_mhz or /sys/class/drm/card0/device/tile0/gtN/freq0/act_freq depending on the graphics driver being used.
+.PP
+\fBSAM%mc6\fP The percentage of time the SA Media is in the "module C6" state, mc6, during the measurement interval. From /sys/class/drm/card0/gt/gt1/rc6_residency_ms or /sys/class/drm/card0/device/tile0/gtN/gtidle/idle_residency_ms depending on the graphics driver being used.
+.PP
+\fBSAMMHz\fP Instantaneous snapshot of what sysfs presents at the end of the measurement interval. From /sys/class/drm/card0/gt/gt1/rps_cur_freq_mhz or /sys/class/drm/card0/device/tile0/gtN/freq0/cur_freq depending on the graphics driver being used.
+.PP
+\fBSAMAMHz\fP Instantaneous snapshot of what sysfs presents at the end of the measurement interval. From /sys/class/drm/card0/gt/gt1/rps_act_freq_mhz or /sys/class/drm/card0/device/tile0/gtN/freq0/act_freq depending on the graphics driver being used.
.PP
\fBPkg%pc2, Pkg%pc3, Pkg%pc6, Pkg%pc7\fP percentage residency in hardware package idle states. These numbers are from hardware residency counters.
.PP
Busy% = MPERF_delta/TSC_delta
-Bzy_MHz = TSC_delta/APERF_delta/MPERF_delta/measurement_interval
+Bzy_MHz = TSC_delta*APERF_delta/MPERF_delta/measurement_interval
Note that these calculations depend on TSC_delta, so they
are not reliable during intervals when TSC_MHz is not running at the base frequency.
* turbostat -- show CPU frequency and C-state residency
* on modern Intel and AMD processors.
*
- * Copyright (c) 2023 Intel Corporation.
+ * Copyright (c) 2024 Intel Corporation.
* Len Brown <len.brown@intel.com>
*/
#include <linux/perf_event.h>
#include <asm/unistd.h>
#include <stdbool.h>
+#include <assert.h>
+#include <linux/kernel.h>
#define UNUSED(x) (void)(x)
#define NAME_BYTES 20
#define PATH_BYTES 128
+#define MAX_NOFILE 0x8000
+
enum counter_scope { SCOPE_CPU, SCOPE_CORE, SCOPE_PACKAGE };
enum counter_type { COUNTER_ITEMS, COUNTER_CYCLES, COUNTER_SECONDS, COUNTER_USEC };
enum counter_format { FORMAT_RAW, FORMAT_DELTA, FORMAT_PERCENT };
+enum amperf_source { AMPERF_SOURCE_PERF, AMPERF_SOURCE_MSR };
+enum rapl_source { RAPL_SOURCE_NONE, RAPL_SOURCE_PERF, RAPL_SOURCE_MSR };
struct msr_counter {
unsigned int msr_num;
{ 0x0, "IPC", "", 0, 0, 0, NULL, 0 },
{ 0x0, "CoreThr", "", 0, 0, 0, NULL, 0 },
{ 0x0, "UncMHz", "", 0, 0, 0, NULL, 0 },
+ { 0x0, "SAM%mc6", "", 0, 0, 0, NULL, 0 },
+ { 0x0, "SAMMHz", "", 0, 0, 0, NULL, 0 },
+ { 0x0, "SAMAMHz", "", 0, 0, 0, NULL, 0 },
};
#define MAX_BIC (sizeof(bic) / sizeof(struct msr_counter))
#define BIC_IPC (1ULL << 52)
#define BIC_CORE_THROT_CNT (1ULL << 53)
#define BIC_UNCORE_MHZ (1ULL << 54)
+#define BIC_SAM_mc6 (1ULL << 55)
+#define BIC_SAMMHz (1ULL << 56)
+#define BIC_SAMACTMHz (1ULL << 57)
#define BIC_TOPOLOGY (BIC_Package | BIC_Node | BIC_CoreCnt | BIC_PkgCnt | BIC_Core | BIC_CPU | BIC_Die )
#define BIC_THERMAL_PWR ( BIC_CoreTmp | BIC_PkgTmp | BIC_PkgWatt | BIC_CorWatt | BIC_GFXWatt | BIC_RAMWatt | BIC_PKG__ | BIC_RAM__)
-#define BIC_FREQUENCY ( BIC_Avg_MHz | BIC_Busy | BIC_Bzy_MHz | BIC_TSC_MHz | BIC_GFXMHz | BIC_GFXACTMHz | BIC_UNCORE_MHZ)
-#define BIC_IDLE ( BIC_sysfs | BIC_CPU_c1 | BIC_CPU_c3 | BIC_CPU_c6 | BIC_CPU_c7 | BIC_GFX_rc6 | BIC_Pkgpc2 | BIC_Pkgpc3 | BIC_Pkgpc6 | BIC_Pkgpc7 | BIC_Pkgpc8 | BIC_Pkgpc9 | BIC_Pkgpc10 | BIC_CPU_LPI | BIC_SYS_LPI | BIC_Mod_c6 | BIC_Totl_c0 | BIC_Any_c0 | BIC_GFX_c0 | BIC_CPUGFX)
+#define BIC_FREQUENCY (BIC_Avg_MHz | BIC_Busy | BIC_Bzy_MHz | BIC_TSC_MHz | BIC_GFXMHz | BIC_GFXACTMHz | BIC_SAMMHz | BIC_SAMACTMHz | BIC_UNCORE_MHZ)
+#define BIC_IDLE (BIC_sysfs | BIC_CPU_c1 | BIC_CPU_c3 | BIC_CPU_c6 | BIC_CPU_c7 | BIC_GFX_rc6 | BIC_Pkgpc2 | BIC_Pkgpc3 | BIC_Pkgpc6 | BIC_Pkgpc7 | BIC_Pkgpc8 | BIC_Pkgpc9 | BIC_Pkgpc10 | BIC_CPU_LPI | BIC_SYS_LPI | BIC_Mod_c6 | BIC_Totl_c0 | BIC_Any_c0 | BIC_GFX_c0 | BIC_CPUGFX | BIC_SAM_mc6)
#define BIC_OTHER ( BIC_IRQ | BIC_SMI | BIC_ThreadC | BIC_CoreTmp | BIC_IPC)
#define BIC_DISABLED_BY_DEFAULT (BIC_USEC | BIC_TOD | BIC_APIC | BIC_X2APIC)
#define BIC_NOT_PRESENT(COUNTER_BIT) (bic_present &= ~COUNTER_BIT)
#define BIC_IS_ENABLED(COUNTER_BIT) (bic_enabled & COUNTER_BIT)
+struct amperf_group_fd;
+
char *proc_stat = "/proc/stat";
FILE *outf;
int *fd_percpu;
int *fd_instr_count_percpu;
+struct amperf_group_fd *fd_amperf_percpu; /* File descriptors for perf group with APERF and MPERF counters. */
struct timeval interval_tv = { 5, 0 };
struct timespec interval_ts = { 5, 0 };
unsigned int do_dts;
unsigned int do_ptm;
unsigned int do_ipc;
-unsigned long long gfx_cur_rc6_ms;
unsigned long long cpuidle_cur_cpu_lpi_us;
unsigned long long cpuidle_cur_sys_lpi_us;
-unsigned int gfx_cur_mhz;
-unsigned int gfx_act_mhz;
unsigned int tj_max;
unsigned int tj_max_override;
double rapl_power_units, rapl_time_units;
unsigned int has_hwp_pkg; /* IA32_HWP_REQUEST_PKG */
unsigned int first_counter_read = 1;
int ignore_stdin;
+bool no_msr;
+bool no_perf;
+enum amperf_source amperf_source;
+
+enum gfx_sysfs_idx {
+ GFX_rc6,
+ GFX_MHz,
+ GFX_ACTMHz,
+ SAM_mc6,
+ SAM_MHz,
+ SAM_ACTMHz,
+ GFX_MAX
+};
+
+struct gfx_sysfs_info {
+ const char *path;
+ FILE *fp;
+ unsigned int val;
+ unsigned long long val_ull;
+};
+
+static struct gfx_sysfs_info gfx_info[GFX_MAX];
int get_msr(int cpu, off_t offset, unsigned long long *msr);
.bclk_freq = BCLK_100MHZ,
.supported_cstates = CC1 | CC6 | PC2 | PC6,
.cst_limit = CST_LIMIT_ICX,
+ .has_msr_core_c1_res = 1,
.has_irtl_msrs = 1,
.has_cst_prewake_bit = 1,
.trl_msrs = TRL_BASE | TRL_CORECOUNT,
#define MAX_ADDED_THREAD_COUNTERS 24
#define BITMASK_SIZE 32
+/* Indexes used to map data read from perf and MSRs into global variables */
+enum rapl_rci_index {
+ RAPL_RCI_INDEX_ENERGY_PKG = 0,
+ RAPL_RCI_INDEX_ENERGY_CORES = 1,
+ RAPL_RCI_INDEX_DRAM = 2,
+ RAPL_RCI_INDEX_GFX = 3,
+ RAPL_RCI_INDEX_PKG_PERF_STATUS = 4,
+ RAPL_RCI_INDEX_DRAM_PERF_STATUS = 5,
+ RAPL_RCI_INDEX_CORE_ENERGY = 6,
+ NUM_RAPL_COUNTERS,
+};
+
+enum rapl_unit {
+ RAPL_UNIT_INVALID,
+ RAPL_UNIT_JOULES,
+ RAPL_UNIT_WATTS,
+};
+
+struct rapl_counter_info_t {
+ unsigned long long data[NUM_RAPL_COUNTERS];
+ enum rapl_source source[NUM_RAPL_COUNTERS];
+ unsigned long long flags[NUM_RAPL_COUNTERS];
+ double scale[NUM_RAPL_COUNTERS];
+ enum rapl_unit unit[NUM_RAPL_COUNTERS];
+
+ union {
+ /* Active when source == RAPL_SOURCE_MSR */
+ struct {
+ unsigned long long msr[NUM_RAPL_COUNTERS];
+ unsigned long long msr_mask[NUM_RAPL_COUNTERS];
+ int msr_shift[NUM_RAPL_COUNTERS];
+ };
+ };
+
+ int fd_perf;
+};
+
+/* struct rapl_counter_info_t for each RAPL domain */
+struct rapl_counter_info_t *rapl_counter_info_perdomain;
+
+#define RAPL_COUNTER_FLAG_USE_MSR_SUM (1u << 1)
+
+struct rapl_counter_arch_info {
+ int feature_mask; /* Mask for testing if the counter is supported on host */
+ const char *perf_subsys;
+ const char *perf_name;
+ unsigned long long msr;
+ unsigned long long msr_mask;
+ int msr_shift; /* Positive mean shift right, negative mean shift left */
+ double *platform_rapl_msr_scale; /* Scale applied to values read by MSR (platform dependent, filled at runtime) */
+ unsigned int rci_index; /* Maps data from perf counters to global variables */
+ unsigned long long bic;
+ double compat_scale; /* Some counters require constant scaling to be in the same range as other, similar ones */
+ unsigned long long flags;
+};
+
+static const struct rapl_counter_arch_info rapl_counter_arch_infos[] = {
+ {
+ .feature_mask = RAPL_PKG,
+ .perf_subsys = "power",
+ .perf_name = "energy-pkg",
+ .msr = MSR_PKG_ENERGY_STATUS,
+ .msr_mask = 0xFFFFFFFFFFFFFFFF,
+ .msr_shift = 0,
+ .platform_rapl_msr_scale = &rapl_energy_units,
+ .rci_index = RAPL_RCI_INDEX_ENERGY_PKG,
+ .bic = BIC_PkgWatt | BIC_Pkg_J,
+ .compat_scale = 1.0,
+ .flags = RAPL_COUNTER_FLAG_USE_MSR_SUM,
+ },
+ {
+ .feature_mask = RAPL_AMD_F17H,
+ .perf_subsys = "power",
+ .perf_name = "energy-pkg",
+ .msr = MSR_PKG_ENERGY_STAT,
+ .msr_mask = 0xFFFFFFFFFFFFFFFF,
+ .msr_shift = 0,
+ .platform_rapl_msr_scale = &rapl_energy_units,
+ .rci_index = RAPL_RCI_INDEX_ENERGY_PKG,
+ .bic = BIC_PkgWatt | BIC_Pkg_J,
+ .compat_scale = 1.0,
+ .flags = RAPL_COUNTER_FLAG_USE_MSR_SUM,
+ },
+ {
+ .feature_mask = RAPL_CORE_ENERGY_STATUS,
+ .perf_subsys = "power",
+ .perf_name = "energy-cores",
+ .msr = MSR_PP0_ENERGY_STATUS,
+ .msr_mask = 0xFFFFFFFFFFFFFFFF,
+ .msr_shift = 0,
+ .platform_rapl_msr_scale = &rapl_energy_units,
+ .rci_index = RAPL_RCI_INDEX_ENERGY_CORES,
+ .bic = BIC_CorWatt | BIC_Cor_J,
+ .compat_scale = 1.0,
+ .flags = RAPL_COUNTER_FLAG_USE_MSR_SUM,
+ },
+ {
+ .feature_mask = RAPL_DRAM,
+ .perf_subsys = "power",
+ .perf_name = "energy-ram",
+ .msr = MSR_DRAM_ENERGY_STATUS,
+ .msr_mask = 0xFFFFFFFFFFFFFFFF,
+ .msr_shift = 0,
+ .platform_rapl_msr_scale = &rapl_dram_energy_units,
+ .rci_index = RAPL_RCI_INDEX_DRAM,
+ .bic = BIC_RAMWatt | BIC_RAM_J,
+ .compat_scale = 1.0,
+ .flags = RAPL_COUNTER_FLAG_USE_MSR_SUM,
+ },
+ {
+ .feature_mask = RAPL_GFX,
+ .perf_subsys = "power",
+ .perf_name = "energy-gpu",
+ .msr = MSR_PP1_ENERGY_STATUS,
+ .msr_mask = 0xFFFFFFFFFFFFFFFF,
+ .msr_shift = 0,
+ .platform_rapl_msr_scale = &rapl_energy_units,
+ .rci_index = RAPL_RCI_INDEX_GFX,
+ .bic = BIC_GFXWatt | BIC_GFX_J,
+ .compat_scale = 1.0,
+ .flags = RAPL_COUNTER_FLAG_USE_MSR_SUM,
+ },
+ {
+ .feature_mask = RAPL_PKG_PERF_STATUS,
+ .perf_subsys = NULL,
+ .perf_name = NULL,
+ .msr = MSR_PKG_PERF_STATUS,
+ .msr_mask = 0xFFFFFFFFFFFFFFFF,
+ .msr_shift = 0,
+ .platform_rapl_msr_scale = &rapl_time_units,
+ .rci_index = RAPL_RCI_INDEX_PKG_PERF_STATUS,
+ .bic = BIC_PKG__,
+ .compat_scale = 100.0,
+ .flags = RAPL_COUNTER_FLAG_USE_MSR_SUM,
+ },
+ {
+ .feature_mask = RAPL_DRAM_PERF_STATUS,
+ .perf_subsys = NULL,
+ .perf_name = NULL,
+ .msr = MSR_DRAM_PERF_STATUS,
+ .msr_mask = 0xFFFFFFFFFFFFFFFF,
+ .msr_shift = 0,
+ .platform_rapl_msr_scale = &rapl_time_units,
+ .rci_index = RAPL_RCI_INDEX_DRAM_PERF_STATUS,
+ .bic = BIC_RAM__,
+ .compat_scale = 100.0,
+ .flags = RAPL_COUNTER_FLAG_USE_MSR_SUM,
+ },
+ {
+ .feature_mask = RAPL_AMD_F17H,
+ .perf_subsys = NULL,
+ .perf_name = NULL,
+ .msr = MSR_CORE_ENERGY_STAT,
+ .msr_mask = 0xFFFFFFFF,
+ .msr_shift = 0,
+ .platform_rapl_msr_scale = &rapl_energy_units,
+ .rci_index = RAPL_RCI_INDEX_CORE_ENERGY,
+ .bic = BIC_CorWatt | BIC_Cor_J,
+ .compat_scale = 1.0,
+ .flags = 0,
+ },
+};
+
+struct rapl_counter {
+ unsigned long long raw_value;
+ enum rapl_unit unit;
+ double scale;
+};
+
struct thread_data {
struct timeval tv_begin;
struct timeval tv_end;
unsigned long long c7;
unsigned long long mc6_us; /* duplicate as per-core for now, even though per module */
unsigned int core_temp_c;
- unsigned int core_energy; /* MSR_CORE_ENERGY_STAT */
+ struct rapl_counter core_energy; /* MSR_CORE_ENERGY_STAT */
unsigned int core_id;
unsigned long long core_throt_cnt;
unsigned long long counter[MAX_ADDED_COUNTERS];
unsigned long long pc8;
unsigned long long pc9;
unsigned long long pc10;
- unsigned long long cpu_lpi;
- unsigned long long sys_lpi;
+ long long cpu_lpi;
+ long long sys_lpi;
unsigned long long pkg_wtd_core_c0;
unsigned long long pkg_any_core_c0;
unsigned long long pkg_any_gfxe_c0;
long long gfx_rc6_ms;
unsigned int gfx_mhz;
unsigned int gfx_act_mhz;
+ long long sam_mc6_ms;
+ unsigned int sam_mhz;
+ unsigned int sam_act_mhz;
unsigned int package_id;
- unsigned long long energy_pkg; /* MSR_PKG_ENERGY_STATUS */
- unsigned long long energy_dram; /* MSR_DRAM_ENERGY_STATUS */
- unsigned long long energy_cores; /* MSR_PP0_ENERGY_STATUS */
- unsigned long long energy_gfx; /* MSR_PP1_ENERGY_STATUS */
- unsigned long long rapl_pkg_perf_status; /* MSR_PKG_PERF_STATUS */
- unsigned long long rapl_dram_perf_status; /* MSR_DRAM_PERF_STATUS */
+ struct rapl_counter energy_pkg; /* MSR_PKG_ENERGY_STATUS */
+ struct rapl_counter energy_dram; /* MSR_DRAM_ENERGY_STATUS */
+ struct rapl_counter energy_cores; /* MSR_PP0_ENERGY_STATUS */
+ struct rapl_counter energy_gfx; /* MSR_PP1_ENERGY_STATUS */
+ struct rapl_counter rapl_pkg_perf_status; /* MSR_PKG_PERF_STATUS */
+ struct rapl_counter rapl_dram_perf_status; /* MSR_DRAM_PERF_STATUS */
unsigned int pkg_temp_c;
unsigned int uncore_mhz;
unsigned long long counter[MAX_ADDED_COUNTERS];
struct msr_counter *pp;
} sys;
+void free_sys_counters(void)
+{
+ struct msr_counter *p = sys.tp, *pnext = NULL;
+
+ while (p) {
+ pnext = p->next;
+ free(p);
+ p = pnext;
+ }
+
+ p = sys.cp, pnext = NULL;
+ while (p) {
+ pnext = p->next;
+ free(p);
+ p = pnext;
+ }
+
+ p = sys.pp, pnext = NULL;
+ while (p) {
+ pnext = p->next;
+ free(p);
+ p = pnext;
+ }
+
+ sys.added_thread_counters = 0;
+ sys.added_core_counters = 0;
+ sys.added_package_counters = 0;
+ sys.tp = NULL;
+ sys.cp = NULL;
+ sys.pp = NULL;
+}
+
struct system_summary {
struct thread_data threads;
struct core_data cores;
sprintf(pathname, "/dev/cpu/%d/msr", cpu);
fd = open(pathname, O_RDONLY);
if (fd < 0)
- err(-1, "%s open failed, try chown or chmod +r /dev/cpu/*/msr, or run as root", pathname);
+ err(-1, "%s open failed, try chown or chmod +r /dev/cpu/*/msr, "
+ "or run with --no-msr, or run as root", pathname);
fd_percpu[cpu] = fd;
return fd;
}
+static void bic_disable_msr_access(void)
+{
+ const unsigned long bic_msrs =
+ BIC_SMI |
+ BIC_CPU_c1 |
+ BIC_CPU_c3 |
+ BIC_CPU_c6 |
+ BIC_CPU_c7 |
+ BIC_Mod_c6 |
+ BIC_CoreTmp |
+ BIC_Totl_c0 |
+ BIC_Any_c0 |
+ BIC_GFX_c0 |
+ BIC_CPUGFX |
+ BIC_Pkgpc2 | BIC_Pkgpc3 | BIC_Pkgpc6 | BIC_Pkgpc7 | BIC_Pkgpc8 | BIC_Pkgpc9 | BIC_Pkgpc10 | BIC_PkgTmp;
+
+ bic_enabled &= ~bic_msrs;
+
+ free_sys_counters();
+}
+
static long perf_event_open(struct perf_event_attr *hw_event, pid_t pid, int cpu, int group_fd, unsigned long flags)
{
+ assert(!no_perf);
+
return syscall(__NR_perf_event_open, hw_event, pid, cpu, group_fd, flags);
}
-static int perf_instr_count_open(int cpu_num)
+static long open_perf_counter(int cpu, unsigned int type, unsigned int config, int group_fd, __u64 read_format)
{
- struct perf_event_attr pea;
- int fd;
+ struct perf_event_attr attr;
+ const pid_t pid = -1;
+ const unsigned long flags = 0;
- memset(&pea, 0, sizeof(struct perf_event_attr));
- pea.type = PERF_TYPE_HARDWARE;
- pea.size = sizeof(struct perf_event_attr);
- pea.config = PERF_COUNT_HW_INSTRUCTIONS;
+ assert(!no_perf);
- /* counter for cpu_num, including user + kernel and all processes */
- fd = perf_event_open(&pea, -1, cpu_num, -1, 0);
- if (fd == -1) {
- warnx("capget(CAP_PERFMON) failed, try \"# setcap cap_sys_admin=ep %s\"", progname);
- BIC_NOT_PRESENT(BIC_IPC);
- }
+ memset(&attr, 0, sizeof(struct perf_event_attr));
+
+ attr.type = type;
+ attr.size = sizeof(struct perf_event_attr);
+ attr.config = config;
+ attr.disabled = 0;
+ attr.sample_type = PERF_SAMPLE_IDENTIFIER;
+ attr.read_format = read_format;
+
+ const int fd = perf_event_open(&attr, pid, cpu, group_fd, flags);
return fd;
}
if (fd_instr_count_percpu[cpu])
return fd_instr_count_percpu[cpu];
- fd_instr_count_percpu[cpu] = perf_instr_count_open(cpu);
+ fd_instr_count_percpu[cpu] = open_perf_counter(cpu, PERF_TYPE_HARDWARE, PERF_COUNT_HW_INSTRUCTIONS, -1, 0);
return fd_instr_count_percpu[cpu];
}
{
ssize_t retval;
+ assert(!no_msr);
+
retval = pread(get_msr_fd(cpu), msr, sizeof(*msr), offset);
if (retval != sizeof *msr)
return 0;
}
+int probe_msr(int cpu, off_t offset)
+{
+ ssize_t retval;
+ unsigned long long dummy;
+
+ assert(!no_msr);
+
+ retval = pread(get_msr_fd(cpu), &dummy, sizeof(dummy), offset);
+
+ if (retval != sizeof(dummy))
+ return 1;
+
+ return 0;
+}
+
#define MAX_DEFERRED 16
char *deferred_add_names[MAX_DEFERRED];
char *deferred_skip_names[MAX_DEFERRED];
" Override default 5-second measurement interval\n"
" -J, --Joules displays energy in Joules instead of Watts\n"
" -l, --list list column headers only\n"
+ " -M, --no-msr Disable all uses of the MSR driver\n"
+ " -P, --no-perf Disable all uses of the perf API\n"
" -n, --num_iterations num\n"
" number of the measurement iterations\n"
" -N, --header_iterations num\n"
if (DO_BIC(BIC_GFXACTMHz))
outp += sprintf(outp, "%sGFXAMHz", (printed++ ? delim : ""));
+ if (DO_BIC(BIC_SAM_mc6))
+ outp += sprintf(outp, "%sSAM%%mc6", (printed++ ? delim : ""));
+
+ if (DO_BIC(BIC_SAMMHz))
+ outp += sprintf(outp, "%sSAMMHz", (printed++ ? delim : ""));
+
+ if (DO_BIC(BIC_SAMACTMHz))
+ outp += sprintf(outp, "%sSAMAMHz", (printed++ ? delim : ""));
+
if (DO_BIC(BIC_Totl_c0))
outp += sprintf(outp, "%sTotl%%C0", (printed++ ? delim : ""));
if (DO_BIC(BIC_Any_c0))
outp += sprintf(outp, "SMI: %d\n", t->smi_count);
for (i = 0, mp = sys.tp; mp; i++, mp = mp->next) {
- outp += sprintf(outp, "tADDED [%d] msr0x%x: %08llX\n", i, mp->msr_num, t->counter[i]);
+ outp +=
+ sprintf(outp, "tADDED [%d] %8s msr0x%x: %08llX %s\n", i, mp->name, mp->msr_num,
+ t->counter[i], mp->path);
}
}
- if (c) {
+ if (c && is_cpu_first_thread_in_core(t, c, p)) {
outp += sprintf(outp, "core: %d\n", c->core_id);
outp += sprintf(outp, "c3: %016llX\n", c->c3);
outp += sprintf(outp, "c6: %016llX\n", c->c6);
outp += sprintf(outp, "c7: %016llX\n", c->c7);
outp += sprintf(outp, "DTS: %dC\n", c->core_temp_c);
outp += sprintf(outp, "cpu_throt_count: %016llX\n", c->core_throt_cnt);
- outp += sprintf(outp, "Joules: %0X\n", c->core_energy);
+
+ const unsigned long long energy_value = c->core_energy.raw_value * c->core_energy.scale;
+ const double energy_scale = c->core_energy.scale;
+
+ if (c->core_energy.unit == RAPL_UNIT_JOULES)
+ outp += sprintf(outp, "Joules: %0llX (scale: %lf)\n", energy_value, energy_scale);
for (i = 0, mp = sys.cp; mp; i++, mp = mp->next) {
- outp += sprintf(outp, "cADDED [%d] msr0x%x: %08llX\n", i, mp->msr_num, c->counter[i]);
+ outp +=
+ sprintf(outp, "cADDED [%d] %8s msr0x%x: %08llX %s\n", i, mp->name, mp->msr_num,
+ c->counter[i], mp->path);
}
outp += sprintf(outp, "mc6_us: %016llX\n", c->mc6_us);
}
- if (p) {
+ if (p && is_cpu_first_core_in_package(t, c, p)) {
outp += sprintf(outp, "package: %d\n", p->package_id);
outp += sprintf(outp, "Weighted cores: %016llX\n", p->pkg_wtd_core_c0);
outp += sprintf(outp, "pc10: %016llX\n", p->pc10);
outp += sprintf(outp, "cpu_lpi: %016llX\n", p->cpu_lpi);
outp += sprintf(outp, "sys_lpi: %016llX\n", p->sys_lpi);
- outp += sprintf(outp, "Joules PKG: %0llX\n", p->energy_pkg);
- outp += sprintf(outp, "Joules COR: %0llX\n", p->energy_cores);
- outp += sprintf(outp, "Joules GFX: %0llX\n", p->energy_gfx);
- outp += sprintf(outp, "Joules RAM: %0llX\n", p->energy_dram);
- outp += sprintf(outp, "Throttle PKG: %0llX\n", p->rapl_pkg_perf_status);
- outp += sprintf(outp, "Throttle RAM: %0llX\n", p->rapl_dram_perf_status);
+ outp += sprintf(outp, "Joules PKG: %0llX\n", p->energy_pkg.raw_value);
+ outp += sprintf(outp, "Joules COR: %0llX\n", p->energy_cores.raw_value);
+ outp += sprintf(outp, "Joules GFX: %0llX\n", p->energy_gfx.raw_value);
+ outp += sprintf(outp, "Joules RAM: %0llX\n", p->energy_dram.raw_value);
+ outp += sprintf(outp, "Throttle PKG: %0llX\n", p->rapl_pkg_perf_status.raw_value);
+ outp += sprintf(outp, "Throttle RAM: %0llX\n", p->rapl_dram_perf_status.raw_value);
outp += sprintf(outp, "PTM: %dC\n", p->pkg_temp_c);
for (i = 0, mp = sys.pp; mp; i++, mp = mp->next) {
- outp += sprintf(outp, "pADDED [%d] msr0x%x: %08llX\n", i, mp->msr_num, p->counter[i]);
+ outp +=
+ sprintf(outp, "pADDED [%d] %8s msr0x%x: %08llX %s\n", i, mp->name, mp->msr_num,
+ p->counter[i], mp->path);
}
}
return 0;
}
+double rapl_counter_get_value(const struct rapl_counter *c, enum rapl_unit desired_unit, double interval)
+{
+ assert(desired_unit != RAPL_UNIT_INVALID);
+
+ /*
+ * For now we don't expect anything other than joules,
+ * so just simplify the logic.
+ */
+ assert(c->unit == RAPL_UNIT_JOULES);
+
+ const double scaled = c->raw_value * c->scale;
+
+ if (desired_unit == RAPL_UNIT_WATTS)
+ return scaled / interval;
+ return scaled;
+}
+
/*
* column formatting convention & formats
*/
if (DO_BIC(BIC_CorWatt) && platform->has_per_core_rapl)
outp +=
- sprintf(outp, fmt8, (printed++ ? delim : ""), c->core_energy * rapl_energy_units / interval_float);
+ sprintf(outp, fmt8, (printed++ ? delim : ""),
+ rapl_counter_get_value(&c->core_energy, RAPL_UNIT_WATTS, interval_float));
if (DO_BIC(BIC_Cor_J) && platform->has_per_core_rapl)
- outp += sprintf(outp, fmt8, (printed++ ? delim : ""), c->core_energy * rapl_energy_units);
+ outp += sprintf(outp, fmt8, (printed++ ? delim : ""),
+ rapl_counter_get_value(&c->core_energy, RAPL_UNIT_JOULES, interval_float));
/* print per-package data only for 1st core in package */
if (!is_cpu_first_core_in_package(t, c, p))
if (DO_BIC(BIC_GFXACTMHz))
outp += sprintf(outp, "%s%d", (printed++ ? delim : ""), p->gfx_act_mhz);
+ /* SAMmc6 */
+ if (DO_BIC(BIC_SAM_mc6)) {
+ if (p->sam_mc6_ms == -1) { /* detect GFX counter reset */
+ outp += sprintf(outp, "%s**.**", (printed++ ? delim : ""));
+ } else {
+ outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""),
+ p->sam_mc6_ms / 10.0 / interval_float);
+ }
+ }
+
+ /* SAMMHz */
+ if (DO_BIC(BIC_SAMMHz))
+ outp += sprintf(outp, "%s%d", (printed++ ? delim : ""), p->sam_mhz);
+
+ /* SAMACTMHz */
+ if (DO_BIC(BIC_SAMACTMHz))
+ outp += sprintf(outp, "%s%d", (printed++ ? delim : ""), p->sam_act_mhz);
+
/* Totl%C0, Any%C0 GFX%C0 CPUGFX% */
if (DO_BIC(BIC_Totl_c0))
outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->pkg_wtd_core_c0 / tsc);
if (DO_BIC(BIC_Pkgpc10))
outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->pc10 / tsc);
- if (DO_BIC(BIC_CPU_LPI))
- outp +=
- sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->cpu_lpi / 1000000.0 / interval_float);
- if (DO_BIC(BIC_SYS_LPI))
- outp +=
- sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->sys_lpi / 1000000.0 / interval_float);
+ if (DO_BIC(BIC_CPU_LPI)) {
+ if (p->cpu_lpi >= 0)
+ outp +=
+ sprintf(outp, "%s%.2f", (printed++ ? delim : ""),
+ 100.0 * p->cpu_lpi / 1000000.0 / interval_float);
+ else
+ outp += sprintf(outp, "%s(neg)", (printed++ ? delim : ""));
+ }
+ if (DO_BIC(BIC_SYS_LPI)) {
+ if (p->sys_lpi >= 0)
+ outp +=
+ sprintf(outp, "%s%.2f", (printed++ ? delim : ""),
+ 100.0 * p->sys_lpi / 1000000.0 / interval_float);
+ else
+ outp += sprintf(outp, "%s(neg)", (printed++ ? delim : ""));
+ }
if (DO_BIC(BIC_PkgWatt))
outp +=
- sprintf(outp, fmt8, (printed++ ? delim : ""), p->energy_pkg * rapl_energy_units / interval_float);
-
+ sprintf(outp, fmt8, (printed++ ? delim : ""),
+ rapl_counter_get_value(&p->energy_pkg, RAPL_UNIT_WATTS, interval_float));
if (DO_BIC(BIC_CorWatt) && !platform->has_per_core_rapl)
outp +=
- sprintf(outp, fmt8, (printed++ ? delim : ""), p->energy_cores * rapl_energy_units / interval_float);
+ sprintf(outp, fmt8, (printed++ ? delim : ""),
+ rapl_counter_get_value(&p->energy_cores, RAPL_UNIT_WATTS, interval_float));
if (DO_BIC(BIC_GFXWatt))
outp +=
- sprintf(outp, fmt8, (printed++ ? delim : ""), p->energy_gfx * rapl_energy_units / interval_float);
+ sprintf(outp, fmt8, (printed++ ? delim : ""),
+ rapl_counter_get_value(&p->energy_gfx, RAPL_UNIT_WATTS, interval_float));
if (DO_BIC(BIC_RAMWatt))
outp +=
sprintf(outp, fmt8, (printed++ ? delim : ""),
- p->energy_dram * rapl_dram_energy_units / interval_float);
+ rapl_counter_get_value(&p->energy_dram, RAPL_UNIT_WATTS, interval_float));
if (DO_BIC(BIC_Pkg_J))
- outp += sprintf(outp, fmt8, (printed++ ? delim : ""), p->energy_pkg * rapl_energy_units);
+ outp += sprintf(outp, fmt8, (printed++ ? delim : ""),
+ rapl_counter_get_value(&p->energy_pkg, RAPL_UNIT_JOULES, interval_float));
if (DO_BIC(BIC_Cor_J) && !platform->has_per_core_rapl)
- outp += sprintf(outp, fmt8, (printed++ ? delim : ""), p->energy_cores * rapl_energy_units);
+ outp += sprintf(outp, fmt8, (printed++ ? delim : ""),
+ rapl_counter_get_value(&p->energy_cores, RAPL_UNIT_JOULES, interval_float));
if (DO_BIC(BIC_GFX_J))
- outp += sprintf(outp, fmt8, (printed++ ? delim : ""), p->energy_gfx * rapl_energy_units);
+ outp += sprintf(outp, fmt8, (printed++ ? delim : ""),
+ rapl_counter_get_value(&p->energy_gfx, RAPL_UNIT_JOULES, interval_float));
if (DO_BIC(BIC_RAM_J))
- outp += sprintf(outp, fmt8, (printed++ ? delim : ""), p->energy_dram * rapl_dram_energy_units);
+ outp += sprintf(outp, fmt8, (printed++ ? delim : ""),
+ rapl_counter_get_value(&p->energy_dram, RAPL_UNIT_JOULES, interval_float));
if (DO_BIC(BIC_PKG__))
outp +=
sprintf(outp, fmt8, (printed++ ? delim : ""),
- 100.0 * p->rapl_pkg_perf_status * rapl_time_units / interval_float);
+ rapl_counter_get_value(&p->rapl_pkg_perf_status, RAPL_UNIT_WATTS, interval_float));
if (DO_BIC(BIC_RAM__))
outp +=
sprintf(outp, fmt8, (printed++ ? delim : ""),
- 100.0 * p->rapl_dram_perf_status * rapl_time_units / interval_float);
+ rapl_counter_get_value(&p->rapl_dram_perf_status, RAPL_UNIT_WATTS, interval_float));
/* UncMHz */
if (DO_BIC(BIC_UNCORE_MHZ))
outp += sprintf(outp, "%s%d", (printed++ ? delim : ""), p->uncore_mhz);
old->gfx_mhz = new->gfx_mhz;
old->gfx_act_mhz = new->gfx_act_mhz;
- old->energy_pkg = new->energy_pkg - old->energy_pkg;
- old->energy_cores = new->energy_cores - old->energy_cores;
- old->energy_gfx = new->energy_gfx - old->energy_gfx;
- old->energy_dram = new->energy_dram - old->energy_dram;
- old->rapl_pkg_perf_status = new->rapl_pkg_perf_status - old->rapl_pkg_perf_status;
- old->rapl_dram_perf_status = new->rapl_dram_perf_status - old->rapl_dram_perf_status;
+ /* flag an error when mc6 counter resets/wraps */
+ if (old->sam_mc6_ms > new->sam_mc6_ms)
+ old->sam_mc6_ms = -1;
+ else
+ old->sam_mc6_ms = new->sam_mc6_ms - old->sam_mc6_ms;
+
+ old->sam_mhz = new->sam_mhz;
+ old->sam_act_mhz = new->sam_act_mhz;
+
+ old->energy_pkg.raw_value = new->energy_pkg.raw_value - old->energy_pkg.raw_value;
+ old->energy_cores.raw_value = new->energy_cores.raw_value - old->energy_cores.raw_value;
+ old->energy_gfx.raw_value = new->energy_gfx.raw_value - old->energy_gfx.raw_value;
+ old->energy_dram.raw_value = new->energy_dram.raw_value - old->energy_dram.raw_value;
+ old->rapl_pkg_perf_status.raw_value = new->rapl_pkg_perf_status.raw_value - old->rapl_pkg_perf_status.raw_value;
+ old->rapl_dram_perf_status.raw_value =
+ new->rapl_dram_perf_status.raw_value - old->rapl_dram_perf_status.raw_value;
for (i = 0, mp = sys.pp; mp; i++, mp = mp->next) {
if (mp->format == FORMAT_RAW)
old->core_throt_cnt = new->core_throt_cnt;
old->mc6_us = new->mc6_us - old->mc6_us;
- DELTA_WRAP32(new->core_energy, old->core_energy);
+ DELTA_WRAP32(new->core_energy.raw_value, old->core_energy.raw_value);
for (i = 0, mp = sys.cp; mp; i++, mp = mp->next) {
if (mp->format == FORMAT_RAW)
return retval;
}
+void rapl_counter_clear(struct rapl_counter *c)
+{
+ c->raw_value = 0;
+ c->scale = 0.0;
+ c->unit = RAPL_UNIT_INVALID;
+}
+
void clear_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p)
{
int i;
c->c7 = 0;
c->mc6_us = 0;
c->core_temp_c = 0;
- c->core_energy = 0;
+ rapl_counter_clear(&c->core_energy);
c->core_throt_cnt = 0;
p->pkg_wtd_core_c0 = 0;
p->cpu_lpi = 0;
p->sys_lpi = 0;
- p->energy_pkg = 0;
- p->energy_dram = 0;
- p->energy_cores = 0;
- p->energy_gfx = 0;
- p->rapl_pkg_perf_status = 0;
- p->rapl_dram_perf_status = 0;
+ rapl_counter_clear(&p->energy_pkg);
+ rapl_counter_clear(&p->energy_dram);
+ rapl_counter_clear(&p->energy_cores);
+ rapl_counter_clear(&p->energy_gfx);
+ rapl_counter_clear(&p->rapl_pkg_perf_status);
+ rapl_counter_clear(&p->rapl_dram_perf_status);
p->pkg_temp_c = 0;
p->gfx_rc6_ms = 0;
p->uncore_mhz = 0;
p->gfx_mhz = 0;
p->gfx_act_mhz = 0;
+ p->sam_mc6_ms = 0;
+ p->sam_mhz = 0;
+ p->sam_act_mhz = 0;
for (i = 0, mp = sys.tp; mp; i++, mp = mp->next)
t->counter[i] = 0;
p->counter[i] = 0;
}
+void rapl_counter_accumulate(struct rapl_counter *dst, const struct rapl_counter *src)
+{
+ /* Copy unit and scale from src if dst is not initialized */
+ if (dst->unit == RAPL_UNIT_INVALID) {
+ dst->unit = src->unit;
+ dst->scale = src->scale;
+ }
+
+ assert(dst->unit == src->unit);
+ assert(dst->scale == src->scale);
+
+ dst->raw_value += src->raw_value;
+}
+
int sum_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p)
{
int i;
average.cores.core_temp_c = MAX(average.cores.core_temp_c, c->core_temp_c);
average.cores.core_throt_cnt = MAX(average.cores.core_throt_cnt, c->core_throt_cnt);
- average.cores.core_energy += c->core_energy;
+ rapl_counter_accumulate(&average.cores.core_energy, &c->core_energy);
for (i = 0, mp = sys.cp; mp; i++, mp = mp->next) {
if (mp->format == FORMAT_RAW)
average.packages.cpu_lpi = p->cpu_lpi;
average.packages.sys_lpi = p->sys_lpi;
- average.packages.energy_pkg += p->energy_pkg;
- average.packages.energy_dram += p->energy_dram;
- average.packages.energy_cores += p->energy_cores;
- average.packages.energy_gfx += p->energy_gfx;
+ rapl_counter_accumulate(&average.packages.energy_pkg, &p->energy_pkg);
+ rapl_counter_accumulate(&average.packages.energy_dram, &p->energy_dram);
+ rapl_counter_accumulate(&average.packages.energy_cores, &p->energy_cores);
+ rapl_counter_accumulate(&average.packages.energy_gfx, &p->energy_gfx);
average.packages.gfx_rc6_ms = p->gfx_rc6_ms;
average.packages.uncore_mhz = p->uncore_mhz;
average.packages.gfx_mhz = p->gfx_mhz;
average.packages.gfx_act_mhz = p->gfx_act_mhz;
+ average.packages.sam_mc6_ms = p->sam_mc6_ms;
+ average.packages.sam_mhz = p->sam_mhz;
+ average.packages.sam_act_mhz = p->sam_act_mhz;
average.packages.pkg_temp_c = MAX(average.packages.pkg_temp_c, p->pkg_temp_c);
- average.packages.rapl_pkg_perf_status += p->rapl_pkg_perf_status;
- average.packages.rapl_dram_perf_status += p->rapl_dram_perf_status;
+ rapl_counter_accumulate(&average.packages.rapl_pkg_perf_status, &p->rapl_pkg_perf_status);
+ rapl_counter_accumulate(&average.packages.rapl_dram_perf_status, &p->rapl_dram_perf_status);
for (i = 0, mp = sys.pp; mp; i++, mp = mp->next) {
- if (mp->format == FORMAT_RAW)
- continue;
- average.packages.counter[i] += p->counter[i];
+ if ((mp->format == FORMAT_RAW) && (topo.num_packages == 0))
+ average.packages.counter[i] = p->counter[i];
+ else
+ average.packages.counter[i] += p->counter[i];
}
return 0;
}
int get_mp(int cpu, struct msr_counter *mp, unsigned long long *counterp)
{
if (mp->msr_num != 0) {
+ assert(!no_msr);
if (get_msr(cpu, mp->msr_num, counterp))
return -1;
} else {
{
char path[128];
- sprintf(path, "/sys/devices/system/cpu/intel_uncore_frequency/package_0%d_die_0%d/current_freq_khz", package,
+ sprintf(path, "/sys/devices/system/cpu/intel_uncore_frequency/package_%02d_die_%02d/current_freq_khz", package,
die);
return (snapshot_sysfs_counter(path) / 1000);
return epb;
msr_fallback:
+ if (no_msr)
+ return -1;
+
get_msr(cpu, MSR_IA32_ENERGY_PERF_BIAS, &msr);
return msr & 0xf;
return 0;
}
-/*
- * get_counters(...)
- * migrate to cpu
- * acquire and record local counters for that cpu
- */
-int get_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p)
+struct amperf_group_fd {
+ int aperf; /* Also the group descriptor */
+ int mperf;
+};
+
+static int read_perf_counter_info(const char *const path, const char *const parse_format, void *value_ptr)
{
- int cpu = t->cpu_id;
- unsigned long long msr;
- int aperf_mperf_retry_count = 0;
- struct msr_counter *mp;
- int i;
+ int fdmt;
+ int bytes_read;
+ char buf[64];
+ int ret = -1;
- if (cpu_migrate(cpu)) {
- fprintf(outf, "get_counters: Could not migrate to CPU %d\n", cpu);
- return -1;
+ fdmt = open(path, O_RDONLY, 0);
+ if (fdmt == -1) {
+ if (debug)
+ fprintf(stderr, "Failed to parse perf counter info %s\n", path);
+ ret = -1;
+ goto cleanup_and_exit;
}
- gettimeofday(&t->tv_begin, (struct timezone *)NULL);
+ bytes_read = read(fdmt, buf, sizeof(buf) - 1);
+ if (bytes_read <= 0 || bytes_read >= (int)sizeof(buf)) {
+ if (debug)
+ fprintf(stderr, "Failed to parse perf counter info %s\n", path);
+ ret = -1;
+ goto cleanup_and_exit;
+ }
- if (first_counter_read)
- get_apic_id(t);
-retry:
- t->tsc = rdtsc(); /* we are running on local CPU of interest */
+ buf[bytes_read] = '\0';
- if (DO_BIC(BIC_Avg_MHz) || DO_BIC(BIC_Busy) || DO_BIC(BIC_Bzy_MHz) || DO_BIC(BIC_IPC)
- || soft_c1_residency_display(BIC_Avg_MHz)) {
- unsigned long long tsc_before, tsc_between, tsc_after, aperf_time, mperf_time;
+ if (sscanf(buf, parse_format, value_ptr) != 1) {
+ if (debug)
+ fprintf(stderr, "Failed to parse perf counter info %s\n", path);
+ ret = -1;
+ goto cleanup_and_exit;
+ }
- /*
- * The TSC, APERF and MPERF must be read together for
- * APERF/MPERF and MPERF/TSC to give accurate results.
- *
- * Unfortunately, APERF and MPERF are read by
- * individual system call, so delays may occur
- * between them. If the time to read them
- * varies by a large amount, we re-read them.
- */
+ ret = 0;
- /*
- * This initial dummy APERF read has been seen to
- * reduce jitter in the subsequent reads.
- */
+cleanup_and_exit:
+ close(fdmt);
+ return ret;
+}
- if (get_msr(cpu, MSR_IA32_APERF, &t->aperf))
- return -3;
+static unsigned int read_perf_counter_info_n(const char *const path, const char *const parse_format)
+{
+ unsigned int v;
+ int status;
- t->tsc = rdtsc(); /* re-read close to APERF */
+ status = read_perf_counter_info(path, parse_format, &v);
+ if (status)
+ v = -1;
- tsc_before = t->tsc;
+ return v;
+}
- if (get_msr(cpu, MSR_IA32_APERF, &t->aperf))
- return -3;
+static unsigned int read_msr_type(void)
+{
+ const char *const path = "/sys/bus/event_source/devices/msr/type";
+ const char *const format = "%u";
- tsc_between = rdtsc();
+ return read_perf_counter_info_n(path, format);
+}
- if (get_msr(cpu, MSR_IA32_MPERF, &t->mperf))
- return -4;
+static unsigned int read_aperf_config(void)
+{
+ const char *const path = "/sys/bus/event_source/devices/msr/events/aperf";
+ const char *const format = "event=%x";
- tsc_after = rdtsc();
+ return read_perf_counter_info_n(path, format);
+}
- aperf_time = tsc_between - tsc_before;
- mperf_time = tsc_after - tsc_between;
+static unsigned int read_mperf_config(void)
+{
+ const char *const path = "/sys/bus/event_source/devices/msr/events/mperf";
+ const char *const format = "event=%x";
- /*
- * If the system call latency to read APERF and MPERF
- * differ by more than 2x, then try again.
- */
- if ((aperf_time > (2 * mperf_time)) || (mperf_time > (2 * aperf_time))) {
- aperf_mperf_retry_count++;
- if (aperf_mperf_retry_count < 5)
- goto retry;
- else
- warnx("cpu%d jitter %lld %lld", cpu, aperf_time, mperf_time);
- }
- aperf_mperf_retry_count = 0;
+ return read_perf_counter_info_n(path, format);
+}
- t->aperf = t->aperf * aperf_mperf_multiplier;
- t->mperf = t->mperf * aperf_mperf_multiplier;
- }
+static unsigned int read_perf_type(const char *subsys)
+{
+ const char *const path_format = "/sys/bus/event_source/devices/%s/type";
+ const char *const format = "%u";
+ char path[128];
- if (DO_BIC(BIC_IPC))
- if (read(get_instr_count_fd(cpu), &t->instr_count, sizeof(long long)) != sizeof(long long))
- return -4;
+ snprintf(path, sizeof(path), path_format, subsys);
- if (DO_BIC(BIC_IRQ))
- t->irq_count = irqs_per_cpu[cpu];
- if (DO_BIC(BIC_SMI)) {
- if (get_msr(cpu, MSR_SMI_COUNT, &msr))
- return -5;
- t->smi_count = msr & 0xFFFFFFFF;
- }
- if (DO_BIC(BIC_CPU_c1) && platform->has_msr_core_c1_res) {
- if (get_msr(cpu, MSR_CORE_C1_RES, &t->c1))
- return -6;
- }
+ return read_perf_counter_info_n(path, format);
+}
- for (i = 0, mp = sys.tp; mp; i++, mp = mp->next) {
- if (get_mp(cpu, mp, &t->counter[i]))
- return -10;
- }
+static unsigned int read_rapl_config(const char *subsys, const char *event_name)
+{
+ const char *const path_format = "/sys/bus/event_source/devices/%s/events/%s";
+ const char *const format = "event=%x";
+ char path[128];
- /* collect core counters only for 1st thread in core */
- if (!is_cpu_first_thread_in_core(t, c, p))
- goto done;
+ snprintf(path, sizeof(path), path_format, subsys, event_name);
- if (DO_BIC(BIC_CPU_c3) || soft_c1_residency_display(BIC_CPU_c3)) {
- if (get_msr(cpu, MSR_CORE_C3_RESIDENCY, &c->c3))
- return -6;
- }
+ return read_perf_counter_info_n(path, format);
+}
- if ((DO_BIC(BIC_CPU_c6) || soft_c1_residency_display(BIC_CPU_c6)) && !platform->has_msr_knl_core_c6_residency) {
- if (get_msr(cpu, MSR_CORE_C6_RESIDENCY, &c->c6))
- return -7;
- } else if (platform->has_msr_knl_core_c6_residency && soft_c1_residency_display(BIC_CPU_c6)) {
- if (get_msr(cpu, MSR_KNL_CORE_C6_RESIDENCY, &c->c6))
- return -7;
- }
+static unsigned int read_perf_rapl_unit(const char *subsys, const char *event_name)
+{
+ const char *const path_format = "/sys/bus/event_source/devices/%s/events/%s.unit";
+ const char *const format = "%s";
+ char path[128];
+ char unit_buffer[16];
- if (DO_BIC(BIC_CPU_c7) || soft_c1_residency_display(BIC_CPU_c7)) {
- if (get_msr(cpu, MSR_CORE_C7_RESIDENCY, &c->c7))
- return -8;
- else if (t->is_atom) {
- /*
- * For Atom CPUs that has core cstate deeper than c6,
- * MSR_CORE_C6_RESIDENCY returns residency of cc6 and deeper.
- * Minus CC7 (and deeper cstates) residency to get
- * accturate cc6 residency.
- */
- c->c6 -= c->c7;
- }
- }
+ snprintf(path, sizeof(path), path_format, subsys, event_name);
- if (DO_BIC(BIC_Mod_c6))
- if (get_msr(cpu, MSR_MODULE_C6_RES_MS, &c->mc6_us))
- return -8;
+ read_perf_counter_info(path, format, &unit_buffer);
+ if (strcmp("Joules", unit_buffer) == 0)
+ return RAPL_UNIT_JOULES;
- if (DO_BIC(BIC_CoreTmp)) {
- if (get_msr(cpu, MSR_IA32_THERM_STATUS, &msr))
- return -9;
- c->core_temp_c = tj_max - ((msr >> 16) & 0x7F);
- }
+ return RAPL_UNIT_INVALID;
+}
- if (DO_BIC(BIC_CORE_THROT_CNT))
- get_core_throt_cnt(cpu, &c->core_throt_cnt);
+static double read_perf_rapl_scale(const char *subsys, const char *event_name)
+{
+ const char *const path_format = "/sys/bus/event_source/devices/%s/events/%s.scale";
+ const char *const format = "%lf";
+ char path[128];
+ double scale;
- if (platform->rapl_msrs & RAPL_AMD_F17H) {
- if (get_msr(cpu, MSR_CORE_ENERGY_STAT, &msr))
- return -14;
- c->core_energy = msr & 0xFFFFFFFF;
- }
+ snprintf(path, sizeof(path), path_format, subsys, event_name);
- for (i = 0, mp = sys.cp; mp; i++, mp = mp->next) {
- if (get_mp(cpu, mp, &c->counter[i]))
- return -10;
- }
+ if (read_perf_counter_info(path, format, &scale))
+ return 0.0;
- /* collect package counters only for 1st core in package */
- if (!is_cpu_first_core_in_package(t, c, p))
- goto done;
+ return scale;
+}
- if (DO_BIC(BIC_Totl_c0)) {
- if (get_msr(cpu, MSR_PKG_WEIGHTED_CORE_C0_RES, &p->pkg_wtd_core_c0))
- return -10;
- }
- if (DO_BIC(BIC_Any_c0)) {
- if (get_msr(cpu, MSR_PKG_ANY_CORE_C0_RES, &p->pkg_any_core_c0))
- return -11;
- }
- if (DO_BIC(BIC_GFX_c0)) {
- if (get_msr(cpu, MSR_PKG_ANY_GFXE_C0_RES, &p->pkg_any_gfxe_c0))
- return -12;
- }
- if (DO_BIC(BIC_CPUGFX)) {
- if (get_msr(cpu, MSR_PKG_BOTH_CORE_GFXE_C0_RES, &p->pkg_both_core_gfxe_c0))
- return -13;
- }
- if (DO_BIC(BIC_Pkgpc3))
- if (get_msr(cpu, MSR_PKG_C3_RESIDENCY, &p->pc3))
- return -9;
- if (DO_BIC(BIC_Pkgpc6)) {
+static struct amperf_group_fd open_amperf_fd(int cpu)
+{
+ const unsigned int msr_type = read_msr_type();
+ const unsigned int aperf_config = read_aperf_config();
+ const unsigned int mperf_config = read_mperf_config();
+ struct amperf_group_fd fds = {.aperf = -1, .mperf = -1 };
+
+ fds.aperf = open_perf_counter(cpu, msr_type, aperf_config, -1, PERF_FORMAT_GROUP);
+ fds.mperf = open_perf_counter(cpu, msr_type, mperf_config, fds.aperf, PERF_FORMAT_GROUP);
+
+ return fds;
+}
+
+static int get_amperf_fd(int cpu)
+{
+ assert(fd_amperf_percpu);
+
+ if (fd_amperf_percpu[cpu].aperf)
+ return fd_amperf_percpu[cpu].aperf;
+
+ fd_amperf_percpu[cpu] = open_amperf_fd(cpu);
+
+ return fd_amperf_percpu[cpu].aperf;
+}
+
+/* Read APERF, MPERF and TSC using the perf API. */
+static int read_aperf_mperf_tsc_perf(struct thread_data *t, int cpu)
+{
+ union {
+ struct {
+ unsigned long nr_entries;
+ unsigned long aperf;
+ unsigned long mperf;
+ };
+
+ unsigned long as_array[3];
+ } cnt;
+
+ const int fd_amperf = get_amperf_fd(cpu);
+
+ /*
+ * Read the TSC with rdtsc, because we want the absolute value and not
+ * the offset from the start of the counter.
+ */
+ t->tsc = rdtsc();
+
+ const int n = read(fd_amperf, &cnt.as_array[0], sizeof(cnt.as_array));
+
+ if (n != sizeof(cnt.as_array))
+ return -2;
+
+ t->aperf = cnt.aperf * aperf_mperf_multiplier;
+ t->mperf = cnt.mperf * aperf_mperf_multiplier;
+
+ return 0;
+}
+
+/* Read APERF, MPERF and TSC using the MSR driver and rdtsc instruction. */
+static int read_aperf_mperf_tsc_msr(struct thread_data *t, int cpu)
+{
+ unsigned long long tsc_before, tsc_between, tsc_after, aperf_time, mperf_time;
+ int aperf_mperf_retry_count = 0;
+
+ /*
+ * The TSC, APERF and MPERF must be read together for
+ * APERF/MPERF and MPERF/TSC to give accurate results.
+ *
+ * Unfortunately, APERF and MPERF are read by
+ * individual system call, so delays may occur
+ * between them. If the time to read them
+ * varies by a large amount, we re-read them.
+ */
+
+ /*
+ * This initial dummy APERF read has been seen to
+ * reduce jitter in the subsequent reads.
+ */
+
+ if (get_msr(cpu, MSR_IA32_APERF, &t->aperf))
+ return -3;
+
+retry:
+ t->tsc = rdtsc(); /* re-read close to APERF */
+
+ tsc_before = t->tsc;
+
+ if (get_msr(cpu, MSR_IA32_APERF, &t->aperf))
+ return -3;
+
+ tsc_between = rdtsc();
+
+ if (get_msr(cpu, MSR_IA32_MPERF, &t->mperf))
+ return -4;
+
+ tsc_after = rdtsc();
+
+ aperf_time = tsc_between - tsc_before;
+ mperf_time = tsc_after - tsc_between;
+
+ /*
+ * If the system call latency to read APERF and MPERF
+ * differ by more than 2x, then try again.
+ */
+ if ((aperf_time > (2 * mperf_time)) || (mperf_time > (2 * aperf_time))) {
+ aperf_mperf_retry_count++;
+ if (aperf_mperf_retry_count < 5)
+ goto retry;
+ else
+ warnx("cpu%d jitter %lld %lld", cpu, aperf_time, mperf_time);
+ }
+ aperf_mperf_retry_count = 0;
+
+ t->aperf = t->aperf * aperf_mperf_multiplier;
+ t->mperf = t->mperf * aperf_mperf_multiplier;
+
+ return 0;
+}
+
+size_t rapl_counter_info_count_perf(const struct rapl_counter_info_t *rci)
+{
+ size_t ret = 0;
+
+ for (int i = 0; i < NUM_RAPL_COUNTERS; ++i)
+ if (rci->source[i] == RAPL_SOURCE_PERF)
+ ++ret;
+
+ return ret;
+}
+
+void write_rapl_counter(struct rapl_counter *rc, struct rapl_counter_info_t *rci, unsigned int idx)
+{
+ rc->raw_value = rci->data[idx];
+ rc->unit = rci->unit[idx];
+ rc->scale = rci->scale[idx];
+}
+
+int get_rapl_counters(int cpu, int domain, struct core_data *c, struct pkg_data *p)
+{
+ unsigned long long perf_data[NUM_RAPL_COUNTERS + 1];
+ struct rapl_counter_info_t *rci = &rapl_counter_info_perdomain[domain];
+
+ if (debug)
+ fprintf(stderr, "%s: cpu%d domain%d\n", __func__, cpu, domain);
+
+ assert(rapl_counter_info_perdomain);
+
+ /*
+ * If we have any perf counters to read, read them all now, in bulk
+ */
+ if (rci->fd_perf != -1) {
+ size_t num_perf_counters = rapl_counter_info_count_perf(rci);
+ const ssize_t expected_read_size = (num_perf_counters + 1) * sizeof(unsigned long long);
+ const ssize_t actual_read_size = read(rci->fd_perf, &perf_data[0], sizeof(perf_data));
+
+ if (actual_read_size != expected_read_size)
+ err(-1, "%s: failed to read perf_data (%zu %zu)", __func__, expected_read_size,
+ actual_read_size);
+ }
+
+ for (unsigned int i = 0, pi = 1; i < NUM_RAPL_COUNTERS; ++i) {
+ switch (rci->source[i]) {
+ case RAPL_SOURCE_NONE:
+ break;
+
+ case RAPL_SOURCE_PERF:
+ assert(pi < ARRAY_SIZE(perf_data));
+ assert(rci->fd_perf != -1);
+
+ if (debug)
+ fprintf(stderr, "Reading rapl counter via perf at %u (%llu %e %lf)\n",
+ i, perf_data[pi], rci->scale[i], perf_data[pi] * rci->scale[i]);
+
+ rci->data[i] = perf_data[pi];
+
+ ++pi;
+ break;
+
+ case RAPL_SOURCE_MSR:
+ if (debug)
+ fprintf(stderr, "Reading rapl counter via msr at %u\n", i);
+
+ assert(!no_msr);
+ if (rci->flags[i] & RAPL_COUNTER_FLAG_USE_MSR_SUM) {
+ if (get_msr_sum(cpu, rci->msr[i], &rci->data[i]))
+ return -13 - i;
+ } else {
+ if (get_msr(cpu, rci->msr[i], &rci->data[i]))
+ return -13 - i;
+ }
+
+ rci->data[i] &= rci->msr_mask[i];
+ if (rci->msr_shift[i] >= 0)
+ rci->data[i] >>= abs(rci->msr_shift[i]);
+ else
+ rci->data[i] <<= abs(rci->msr_shift[i]);
+
+ break;
+ }
+ }
+
+ _Static_assert(NUM_RAPL_COUNTERS == 7);
+ write_rapl_counter(&p->energy_pkg, rci, RAPL_RCI_INDEX_ENERGY_PKG);
+ write_rapl_counter(&p->energy_cores, rci, RAPL_RCI_INDEX_ENERGY_CORES);
+ write_rapl_counter(&p->energy_dram, rci, RAPL_RCI_INDEX_DRAM);
+ write_rapl_counter(&p->energy_gfx, rci, RAPL_RCI_INDEX_GFX);
+ write_rapl_counter(&p->rapl_pkg_perf_status, rci, RAPL_RCI_INDEX_PKG_PERF_STATUS);
+ write_rapl_counter(&p->rapl_dram_perf_status, rci, RAPL_RCI_INDEX_DRAM_PERF_STATUS);
+ write_rapl_counter(&c->core_energy, rci, RAPL_RCI_INDEX_CORE_ENERGY);
+
+ return 0;
+}
+
+/*
+ * get_counters(...)
+ * migrate to cpu
+ * acquire and record local counters for that cpu
+ */
+int get_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p)
+{
+ int cpu = t->cpu_id;
+ unsigned long long msr;
+ struct msr_counter *mp;
+ int i;
+ int status;
+
+ if (cpu_migrate(cpu)) {
+ fprintf(outf, "%s: Could not migrate to CPU %d\n", __func__, cpu);
+ return -1;
+ }
+
+ gettimeofday(&t->tv_begin, (struct timezone *)NULL);
+
+ if (first_counter_read)
+ get_apic_id(t);
+
+ t->tsc = rdtsc(); /* we are running on local CPU of interest */
+
+ if (DO_BIC(BIC_Avg_MHz) || DO_BIC(BIC_Busy) || DO_BIC(BIC_Bzy_MHz) || DO_BIC(BIC_IPC)
+ || soft_c1_residency_display(BIC_Avg_MHz)) {
+ int status = -1;
+
+ assert(!no_perf || !no_msr);
+
+ switch (amperf_source) {
+ case AMPERF_SOURCE_PERF:
+ status = read_aperf_mperf_tsc_perf(t, cpu);
+ break;
+ case AMPERF_SOURCE_MSR:
+ status = read_aperf_mperf_tsc_msr(t, cpu);
+ break;
+ }
+
+ if (status != 0)
+ return status;
+ }
+
+ if (DO_BIC(BIC_IPC))
+ if (read(get_instr_count_fd(cpu), &t->instr_count, sizeof(long long)) != sizeof(long long))
+ return -4;
+
+ if (DO_BIC(BIC_IRQ))
+ t->irq_count = irqs_per_cpu[cpu];
+ if (DO_BIC(BIC_SMI)) {
+ if (get_msr(cpu, MSR_SMI_COUNT, &msr))
+ return -5;
+ t->smi_count = msr & 0xFFFFFFFF;
+ }
+ if (DO_BIC(BIC_CPU_c1) && platform->has_msr_core_c1_res) {
+ if (get_msr(cpu, MSR_CORE_C1_RES, &t->c1))
+ return -6;
+ }
+
+ for (i = 0, mp = sys.tp; mp; i++, mp = mp->next) {
+ if (get_mp(cpu, mp, &t->counter[i]))
+ return -10;
+ }
+
+ /* collect core counters only for 1st thread in core */
+ if (!is_cpu_first_thread_in_core(t, c, p))
+ goto done;
+
+ if (platform->has_per_core_rapl) {
+ status = get_rapl_counters(cpu, c->core_id, c, p);
+ if (status != 0)
+ return status;
+ }
+
+ if (DO_BIC(BIC_CPU_c3) || soft_c1_residency_display(BIC_CPU_c3)) {
+ if (get_msr(cpu, MSR_CORE_C3_RESIDENCY, &c->c3))
+ return -6;
+ }
+
+ if ((DO_BIC(BIC_CPU_c6) || soft_c1_residency_display(BIC_CPU_c6)) && !platform->has_msr_knl_core_c6_residency) {
+ if (get_msr(cpu, MSR_CORE_C6_RESIDENCY, &c->c6))
+ return -7;
+ } else if (platform->has_msr_knl_core_c6_residency && soft_c1_residency_display(BIC_CPU_c6)) {
+ if (get_msr(cpu, MSR_KNL_CORE_C6_RESIDENCY, &c->c6))
+ return -7;
+ }
+
+ if (DO_BIC(BIC_CPU_c7) || soft_c1_residency_display(BIC_CPU_c7)) {
+ if (get_msr(cpu, MSR_CORE_C7_RESIDENCY, &c->c7))
+ return -8;
+ else if (t->is_atom) {
+ /*
+ * For Atom CPUs that has core cstate deeper than c6,
+ * MSR_CORE_C6_RESIDENCY returns residency of cc6 and deeper.
+ * Minus CC7 (and deeper cstates) residency to get
+ * accturate cc6 residency.
+ */
+ c->c6 -= c->c7;
+ }
+ }
+
+ if (DO_BIC(BIC_Mod_c6))
+ if (get_msr(cpu, MSR_MODULE_C6_RES_MS, &c->mc6_us))
+ return -8;
+
+ if (DO_BIC(BIC_CoreTmp)) {
+ if (get_msr(cpu, MSR_IA32_THERM_STATUS, &msr))
+ return -9;
+ c->core_temp_c = tj_max - ((msr >> 16) & 0x7F);
+ }
+
+ if (DO_BIC(BIC_CORE_THROT_CNT))
+ get_core_throt_cnt(cpu, &c->core_throt_cnt);
+
+ for (i = 0, mp = sys.cp; mp; i++, mp = mp->next) {
+ if (get_mp(cpu, mp, &c->counter[i]))
+ return -10;
+ }
+
+ /* collect package counters only for 1st core in package */
+ if (!is_cpu_first_core_in_package(t, c, p))
+ goto done;
+
+ if (DO_BIC(BIC_Totl_c0)) {
+ if (get_msr(cpu, MSR_PKG_WEIGHTED_CORE_C0_RES, &p->pkg_wtd_core_c0))
+ return -10;
+ }
+ if (DO_BIC(BIC_Any_c0)) {
+ if (get_msr(cpu, MSR_PKG_ANY_CORE_C0_RES, &p->pkg_any_core_c0))
+ return -11;
+ }
+ if (DO_BIC(BIC_GFX_c0)) {
+ if (get_msr(cpu, MSR_PKG_ANY_GFXE_C0_RES, &p->pkg_any_gfxe_c0))
+ return -12;
+ }
+ if (DO_BIC(BIC_CPUGFX)) {
+ if (get_msr(cpu, MSR_PKG_BOTH_CORE_GFXE_C0_RES, &p->pkg_both_core_gfxe_c0))
+ return -13;
+ }
+ if (DO_BIC(BIC_Pkgpc3))
+ if (get_msr(cpu, MSR_PKG_C3_RESIDENCY, &p->pc3))
+ return -9;
+ if (DO_BIC(BIC_Pkgpc6)) {
if (platform->has_msr_atom_pkg_c6_residency) {
if (get_msr(cpu, MSR_ATOM_PKG_C6_RESIDENCY, &p->pc6))
return -10;
if (DO_BIC(BIC_SYS_LPI))
p->sys_lpi = cpuidle_cur_sys_lpi_us;
- if (platform->rapl_msrs & RAPL_PKG) {
- if (get_msr_sum(cpu, MSR_PKG_ENERGY_STATUS, &msr))
- return -13;
- p->energy_pkg = msr;
- }
- if (platform->rapl_msrs & RAPL_CORE_ENERGY_STATUS) {
- if (get_msr_sum(cpu, MSR_PP0_ENERGY_STATUS, &msr))
- return -14;
- p->energy_cores = msr;
- }
- if (platform->rapl_msrs & RAPL_DRAM) {
- if (get_msr_sum(cpu, MSR_DRAM_ENERGY_STATUS, &msr))
- return -15;
- p->energy_dram = msr;
- }
- if (platform->rapl_msrs & RAPL_GFX) {
- if (get_msr_sum(cpu, MSR_PP1_ENERGY_STATUS, &msr))
- return -16;
- p->energy_gfx = msr;
- }
- if (platform->rapl_msrs & RAPL_PKG_PERF_STATUS) {
- if (get_msr_sum(cpu, MSR_PKG_PERF_STATUS, &msr))
- return -16;
- p->rapl_pkg_perf_status = msr;
- }
- if (platform->rapl_msrs & RAPL_DRAM_PERF_STATUS) {
- if (get_msr_sum(cpu, MSR_DRAM_PERF_STATUS, &msr))
- return -16;
- p->rapl_dram_perf_status = msr;
- }
- if (platform->rapl_msrs & RAPL_AMD_F17H) {
- if (get_msr_sum(cpu, MSR_PKG_ENERGY_STAT, &msr))
- return -13;
- p->energy_pkg = msr;
+ if (!platform->has_per_core_rapl) {
+ status = get_rapl_counters(cpu, p->package_id, c, p);
+ if (status != 0)
+ return status;
}
+
if (DO_BIC(BIC_PkgTmp)) {
if (get_msr(cpu, MSR_IA32_PACKAGE_THERM_STATUS, &msr))
return -17;
p->pkg_temp_c = tj_max - ((msr >> 16) & 0x7F);
}
- if (DO_BIC(BIC_GFX_rc6))
- p->gfx_rc6_ms = gfx_cur_rc6_ms;
-
/* n.b. assume die0 uncore frequency applies to whole package */
if (DO_BIC(BIC_UNCORE_MHZ))
p->uncore_mhz = get_uncore_mhz(p->package_id, 0);
+ if (DO_BIC(BIC_GFX_rc6))
+ p->gfx_rc6_ms = gfx_info[GFX_rc6].val_ull;
+
if (DO_BIC(BIC_GFXMHz))
- p->gfx_mhz = gfx_cur_mhz;
+ p->gfx_mhz = gfx_info[GFX_MHz].val;
if (DO_BIC(BIC_GFXACTMHz))
- p->gfx_act_mhz = gfx_act_mhz;
+ p->gfx_act_mhz = gfx_info[GFX_ACTMHz].val;
+
+ if (DO_BIC(BIC_SAM_mc6))
+ p->sam_mc6_ms = gfx_info[SAM_mc6].val_ull;
+
+ if (DO_BIC(BIC_SAMMHz))
+ p->sam_mhz = gfx_info[SAM_MHz].val;
+
+ if (DO_BIC(BIC_SAMACTMHz))
+ p->sam_act_mhz = gfx_info[SAM_ACTMHz].val;
for (i = 0, mp = sys.pp; mp; i++, mp = mp->next) {
if (get_mp(cpu, mp, &p->counter[i]))
unsigned long long msr;
int *pkg_cstate_limits;
- if (!platform->has_nhm_msrs)
+ if (!platform->has_nhm_msrs || no_msr)
return;
switch (platform->cst_limit) {
unsigned long long msr;
unsigned int ratio;
- if (!platform->has_nhm_msrs)
+ if (!platform->has_nhm_msrs || no_msr)
return;
get_msr(base_cpu, MSR_PLATFORM_INFO, &msr);
{
unsigned long long msr;
- if (!platform->has_nhm_msrs)
+ if (!platform->has_nhm_msrs || no_msr)
return;
get_msr(base_cpu, MSR_IA32_POWER_CTL, &msr);
{
unsigned long long msr;
- if (!platform->has_nhm_msrs)
+ if (!platform->has_nhm_msrs || no_msr)
return;
get_msr(base_cpu, MSR_PKG_CST_CONFIG_CONTROL, &msr);
{
unsigned long long msr;
- if (!platform->has_irtl_msrs)
+ if (!platform->has_irtl_msrs || no_msr)
return;
if (platform->supported_cstates & PC3) {
{
int i;
+ if (!fd_percpu)
+ return;
+
for (i = 0; i < topo.max_cpu_num + 1; ++i) {
if (fd_percpu[i] != 0)
close(fd_percpu[i]);
}
free(fd_percpu);
+ fd_percpu = NULL;
+}
+
+void free_fd_amperf_percpu(void)
+{
+ int i;
+
+ if (!fd_amperf_percpu)
+ return;
+
+ for (i = 0; i < topo.max_cpu_num + 1; ++i) {
+ if (fd_amperf_percpu[i].mperf != 0)
+ close(fd_amperf_percpu[i].mperf);
+
+ if (fd_amperf_percpu[i].aperf != 0)
+ close(fd_amperf_percpu[i].aperf);
+ }
+
+ free(fd_amperf_percpu);
+ fd_amperf_percpu = NULL;
+}
+
+void free_fd_instr_count_percpu(void)
+{
+ if (!fd_instr_count_percpu)
+ return;
+
+ for (int i = 0; i < topo.max_cpu_num + 1; ++i) {
+ if (fd_instr_count_percpu[i] != 0)
+ close(fd_instr_count_percpu[i]);
+ }
+
+ free(fd_instr_count_percpu);
+ fd_instr_count_percpu = NULL;
+}
+
+void free_fd_rapl_percpu(void)
+{
+ if (!rapl_counter_info_perdomain)
+ return;
+
+ const int num_domains = platform->has_per_core_rapl ? topo.num_cores : topo.num_packages;
+
+ for (int domain_id = 0; domain_id < num_domains; ++domain_id) {
+ if (rapl_counter_info_perdomain[domain_id].fd_perf != -1)
+ close(rapl_counter_info_perdomain[domain_id].fd_perf);
+ }
+
+ free(rapl_counter_info_perdomain);
}
void free_all_buffers(void)
outp = NULL;
free_fd_percpu();
+ free_fd_instr_count_percpu();
+ free_fd_amperf_percpu();
+ free_fd_rapl_percpu();
free(irq_column_2_cpu);
free(irqs_per_cpu);
err(1, "%s: cpu str malformat %s\n", PATH_EFFECTIVE_CPUS, cpu_effective_str);
}
+void linux_perf_init(void);
+void rapl_perf_init(void);
+
void re_initialize(void)
{
free_all_buffers();
setup_all_buffers(false);
- fprintf(outf, "turbostat: re-initialized with num_cpus %d, allowed_cpus %d\n", topo.num_cpus, topo.allowed_cpus);
+ linux_perf_init();
+ rapl_perf_init();
+ fprintf(outf, "turbostat: re-initialized with num_cpus %d, allowed_cpus %d\n", topo.num_cpus,
+ topo.allowed_cpus);
}
void set_max_cpu_num(void)
}
/*
- * snapshot_gfx_rc6_ms()
+ * snapshot_graphics()
*
- * record snapshot of
- * /sys/class/drm/card0/power/rc6_residency_ms
+ * record snapshot of specified graphics sysfs knob
*
* return 1 if config change requires a restart, else return 0
*/
-int snapshot_gfx_rc6_ms(void)
+int snapshot_graphics(int idx)
{
FILE *fp;
int retval;
- fp = fopen_or_die("/sys/class/drm/card0/power/rc6_residency_ms", "r");
-
- retval = fscanf(fp, "%lld", &gfx_cur_rc6_ms);
- if (retval != 1)
- err(1, "GFX rc6");
-
- fclose(fp);
-
- return 0;
-}
-
-/*
- * snapshot_gfx_mhz()
- *
- * fall back to /sys/class/graphics/fb0/device/drm/card0/gt_cur_freq_mhz
- * when /sys/class/drm/card0/gt_cur_freq_mhz is not available.
- *
- * return 1 if config change requires a restart, else return 0
- */
-int snapshot_gfx_mhz(void)
-{
- static FILE *fp;
- int retval;
-
- if (fp == NULL) {
- fp = fopen("/sys/class/drm/card0/gt_cur_freq_mhz", "r");
- if (!fp)
- fp = fopen_or_die("/sys/class/graphics/fb0/device/drm/card0/gt_cur_freq_mhz", "r");
- } else {
- rewind(fp);
- fflush(fp);
- }
-
- retval = fscanf(fp, "%d", &gfx_cur_mhz);
- if (retval != 1)
- err(1, "GFX MHz");
-
- return 0;
-}
-
-/*
- * snapshot_gfx_cur_mhz()
- *
- * fall back to /sys/class/graphics/fb0/device/drm/card0/gt_act_freq_mhz
- * when /sys/class/drm/card0/gt_act_freq_mhz is not available.
- *
- * return 1 if config change requires a restart, else return 0
- */
-int snapshot_gfx_act_mhz(void)
-{
- static FILE *fp;
- int retval;
-
- if (fp == NULL) {
- fp = fopen("/sys/class/drm/card0/gt_act_freq_mhz", "r");
- if (!fp)
- fp = fopen_or_die("/sys/class/graphics/fb0/device/drm/card0/gt_act_freq_mhz", "r");
- } else {
- rewind(fp);
- fflush(fp);
+ switch (idx) {
+ case GFX_rc6:
+ case SAM_mc6:
+ fp = fopen_or_die(gfx_info[idx].path, "r");
+ retval = fscanf(fp, "%lld", &gfx_info[idx].val_ull);
+ if (retval != 1)
+ err(1, "rc6");
+ fclose(fp);
+ return 0;
+ case GFX_MHz:
+ case GFX_ACTMHz:
+ case SAM_MHz:
+ case SAM_ACTMHz:
+ if (gfx_info[idx].fp == NULL) {
+ gfx_info[idx].fp = fopen_or_die(gfx_info[idx].path, "r");
+ } else {
+ rewind(gfx_info[idx].fp);
+ fflush(gfx_info[idx].fp);
+ }
+ retval = fscanf(gfx_info[idx].fp, "%d", &gfx_info[idx].val);
+ if (retval != 1)
+ err(1, "MHz");
+ return 0;
+ default:
+ return -EINVAL;
}
-
- retval = fscanf(fp, "%d", &gfx_act_mhz);
- if (retval != 1)
- err(1, "GFX ACT MHz");
-
- return 0;
}
/*
return 1;
if (DO_BIC(BIC_GFX_rc6))
- snapshot_gfx_rc6_ms();
+ snapshot_graphics(GFX_rc6);
if (DO_BIC(BIC_GFXMHz))
- snapshot_gfx_mhz();
+ snapshot_graphics(GFX_MHz);
if (DO_BIC(BIC_GFXACTMHz))
- snapshot_gfx_act_mhz();
+ snapshot_graphics(GFX_ACTMHz);
+
+ if (DO_BIC(BIC_SAM_mc6))
+ snapshot_graphics(SAM_mc6);
+
+ if (DO_BIC(BIC_SAMMHz))
+ snapshot_graphics(SAM_MHz);
+
+ if (DO_BIC(BIC_SAMACTMHz))
+ snapshot_graphics(SAM_ACTMHz);
if (DO_BIC(BIC_CPU_LPI))
snapshot_cpu_lpi_us();
int ret, idx;
unsigned long long msr_cur, msr_last;
+ assert(!no_msr);
+
if (!per_cpu_msr_sum)
return 1;
UNUSED(c);
UNUSED(p);
+ assert(!no_msr);
+
for (i = IDX_PKG_ENERGY; i < IDX_COUNT; i++) {
unsigned long long msr_cur, msr_last;
off_t offset;
/*
* set_my_sched_priority(pri)
- * return previous
+ * return previous priority on success
+ * return value < -20 on failure
*/
int set_my_sched_priority(int priority)
{
errno = 0;
original_priority = getpriority(PRIO_PROCESS, 0);
if (errno && (original_priority == -1))
- err(errno, "getpriority");
+ return -21;
retval = setpriority(PRIO_PROCESS, 0, priority);
if (retval)
- errx(retval, "capget(CAP_SYS_NICE) failed,try \"# setcap cap_sys_nice=ep %s\"", progname);
+ return -21;
errno = 0;
retval = getpriority(PRIO_PROCESS, 0);
if (retval != priority)
- err(retval, "getpriority(%d) != setpriority(%d)", retval, priority);
+ return -21;
return original_priority;
}
/*
* elevate own priority for interval mode
+ *
+ * ignore on error - we probably don't have permission to set it, but
+ * it's not a big deal
*/
set_my_sched_priority(-20);
struct stat sb;
char pathname[32];
+ if (no_msr)
+ return;
+
sprintf(pathname, "/dev/cpu/%d/msr", base_cpu);
if (stat(pathname, &sb))
if (system("/sbin/modprobe msr > /dev/null 2>&1"))
- err(-5, "no /dev/cpu/0/msr, Try \"# modprobe msr\" ");
+ no_msr = 1;
}
/*
{
cap_t caps;
cap_flag_value_t cap_flag_value;
+ int ret = 0;
caps = cap_get_proc();
if (caps == NULL)
- err(-6, "cap_get_proc\n");
+ return 1;
- if (cap_get_flag(caps, CAP_SYS_RAWIO, CAP_EFFECTIVE, &cap_flag_value))
- err(-6, "cap_get\n");
+ if (cap_get_flag(caps, CAP_SYS_RAWIO, CAP_EFFECTIVE, &cap_flag_value)) {
+ ret = 1;
+ goto free_and_exit;
+ }
if (cap_flag_value != CAP_SET) {
- warnx("capget(CAP_SYS_RAWIO) failed," " try \"# setcap cap_sys_rawio=ep %s\"", progname);
- return 1;
+ ret = 1;
+ goto free_and_exit;
}
+free_and_exit:
if (cap_free(caps) == -1)
err(-6, "cap_free\n");
- return 0;
+ return ret;
}
-void check_permissions(void)
+void check_msr_permission(void)
{
- int do_exit = 0;
+ int failed = 0;
char pathname[32];
+ if (no_msr)
+ return;
+
/* check for CAP_SYS_RAWIO */
- do_exit += check_for_cap_sys_rawio();
+ failed += check_for_cap_sys_rawio();
/* test file permissions */
sprintf(pathname, "/dev/cpu/%d/msr", base_cpu);
if (euidaccess(pathname, R_OK)) {
- do_exit++;
- warn("/dev/cpu/0/msr open failed, try chown or chmod +r /dev/cpu/*/msr");
+ failed++;
}
- /* if all else fails, thell them to be root */
- if (do_exit)
- if (getuid() != 0)
- warnx("... or simply run as root");
-
- if (do_exit)
- exit(-6);
+ if (failed) {
+ warnx("Failed to access %s. Some of the counters may not be available\n"
+ "\tRun as root to enable them or use %s to disable the access explicitly", pathname, "--no-msr");
+ no_msr = 1;
+ }
}
void probe_bclk(void)
unsigned long long msr;
unsigned int base_ratio;
- if (!platform->has_nhm_msrs)
+ if (!platform->has_nhm_msrs || no_msr)
return;
if (platform->bclk_freq == BCLK_100MHZ)
if (!has_turbo)
return;
- if (!platform->has_nhm_msrs)
+ if (!platform->has_nhm_msrs || no_msr)
return;
if (platform->trl_msrs & TRL_LIMIT2)
static void probe_intel_uncore_frequency(void)
{
int i, j;
- char path[128];
+ char path[256];
if (!genuine_intel)
return;
- if (access("/sys/devices/system/cpu/intel_uncore_frequency/package_00_die_00", R_OK))
- return;
-
- /* Cluster level sysfs not supported yet. */
- if (!access("/sys/devices/system/cpu/intel_uncore_frequency/uncore00", R_OK))
- return;
+ if (access("/sys/devices/system/cpu/intel_uncore_frequency/package_00_die_00/current_freq_khz", R_OK))
+ goto probe_cluster;
- if (!access("/sys/devices/system/cpu/intel_uncore_frequency/package_00_die_00/current_freq_khz", R_OK))
- BIC_PRESENT(BIC_UNCORE_MHZ);
+ BIC_PRESENT(BIC_UNCORE_MHZ);
if (quiet)
return;
for (i = 0; i < topo.num_packages; ++i) {
for (j = 0; j < topo.num_die; ++j) {
int k, l;
+ char path_base[128];
- sprintf(path, "/sys/devices/system/cpu/intel_uncore_frequency/package_0%d_die_0%d/min_freq_khz",
- i, j);
+ sprintf(path_base, "/sys/devices/system/cpu/intel_uncore_frequency/package_%02d_die_%02d", i,
+ j);
+
+ sprintf(path, "%s/min_freq_khz", path_base);
k = read_sysfs_int(path);
- sprintf(path, "/sys/devices/system/cpu/intel_uncore_frequency/package_0%d_die_0%d/max_freq_khz",
- i, j);
+ sprintf(path, "%s/max_freq_khz", path_base);
l = read_sysfs_int(path);
- fprintf(outf, "Uncore Frequency pkg%d die%d: %d - %d MHz ", i, j, k / 1000, l / 1000);
+ fprintf(outf, "Uncore Frequency package%d die%d: %d - %d MHz ", i, j, k / 1000, l / 1000);
- sprintf(path,
- "/sys/devices/system/cpu/intel_uncore_frequency/package_0%d_die_0%d/initial_min_freq_khz",
- i, j);
+ sprintf(path, "%s/initial_min_freq_khz", path_base);
k = read_sysfs_int(path);
- sprintf(path,
- "/sys/devices/system/cpu/intel_uncore_frequency/package_0%d_die_0%d/initial_max_freq_khz",
- i, j);
+ sprintf(path, "%s/initial_max_freq_khz", path_base);
l = read_sysfs_int(path);
- fprintf(outf, "(%d - %d MHz)\n", k / 1000, l / 1000);
+ fprintf(outf, "(%d - %d MHz)", k / 1000, l / 1000);
+
+ sprintf(path, "%s/current_freq_khz", path_base);
+ k = read_sysfs_int(path);
+ fprintf(outf, " %d MHz\n", k / 1000);
}
}
+ return;
+
+probe_cluster:
+ if (access("/sys/devices/system/cpu/intel_uncore_frequency/uncore00/current_freq_khz", R_OK))
+ return;
+
+ if (quiet)
+ return;
+
+ for (i = 0;; ++i) {
+ int k, l;
+ char path_base[128];
+ int package_id, domain_id, cluster_id;
+
+ sprintf(path_base, "/sys/devices/system/cpu/intel_uncore_frequency/uncore%02d", i);
+
+ if (access(path_base, R_OK))
+ break;
+
+ sprintf(path, "%s/package_id", path_base);
+ package_id = read_sysfs_int(path);
+
+ sprintf(path, "%s/domain_id", path_base);
+ domain_id = read_sysfs_int(path);
+
+ sprintf(path, "%s/fabric_cluster_id", path_base);
+ cluster_id = read_sysfs_int(path);
+
+ sprintf(path, "%s/min_freq_khz", path_base);
+ k = read_sysfs_int(path);
+ sprintf(path, "%s/max_freq_khz", path_base);
+ l = read_sysfs_int(path);
+ fprintf(outf, "Uncore Frequency package%d domain%d cluster%d: %d - %d MHz ", package_id, domain_id,
+ cluster_id, k / 1000, l / 1000);
+
+ sprintf(path, "%s/initial_min_freq_khz", path_base);
+ k = read_sysfs_int(path);
+ sprintf(path, "%s/initial_max_freq_khz", path_base);
+ l = read_sysfs_int(path);
+ fprintf(outf, "(%d - %d MHz)", k / 1000, l / 1000);
+
+ sprintf(path, "%s/current_freq_khz", path_base);
+ k = read_sysfs_int(path);
+ fprintf(outf, " %d MHz\n", k / 1000);
+ }
}
static void probe_graphics(void)
{
+ /* Xe graphics sysfs knobs */
+ if (!access("/sys/class/drm/card0/device/tile0/gt0/gtidle/idle_residency_ms", R_OK)) {
+ FILE *fp;
+ char buf[8];
+ bool gt0_is_gt;
+ int idx;
+
+ fp = fopen("/sys/class/drm/card0/device/tile0/gt0/gtidle/name", "r");
+ if (!fp)
+ goto next;
+
+ if (!fread(buf, sizeof(char), 7, fp)) {
+ fclose(fp);
+ goto next;
+ }
+ fclose(fp);
+
+ if (!strncmp(buf, "gt0-rc", strlen("gt0-rc")))
+ gt0_is_gt = true;
+ else if (!strncmp(buf, "gt0-mc", strlen("gt0-mc")))
+ gt0_is_gt = false;
+ else
+ goto next;
+
+ idx = gt0_is_gt ? GFX_rc6 : SAM_mc6;
+ gfx_info[idx].path = "/sys/class/drm/card0/device/tile0/gt0/gtidle/idle_residency_ms";
+
+ idx = gt0_is_gt ? GFX_MHz : SAM_MHz;
+ if (!access("/sys/class/drm/card0/device/tile0/gt0/freq0/cur_freq", R_OK))
+ gfx_info[idx].path = "/sys/class/drm/card0/device/tile0/gt0/freq0/cur_freq";
+
+ idx = gt0_is_gt ? GFX_ACTMHz : SAM_ACTMHz;
+ if (!access("/sys/class/drm/card0/device/tile0/gt0/freq0/act_freq", R_OK))
+ gfx_info[idx].path = "/sys/class/drm/card0/device/tile0/gt0/freq0/act_freq";
+
+ idx = gt0_is_gt ? SAM_mc6 : GFX_rc6;
+ if (!access("/sys/class/drm/card0/device/tile0/gt1/gtidle/idle_residency_ms", R_OK))
+ gfx_info[idx].path = "/sys/class/drm/card0/device/tile0/gt1/gtidle/idle_residency_ms";
+
+ idx = gt0_is_gt ? SAM_MHz : GFX_MHz;
+ if (!access("/sys/class/drm/card0/device/tile0/gt1/freq0/cur_freq", R_OK))
+ gfx_info[idx].path = "/sys/class/drm/card0/device/tile0/gt1/freq0/cur_freq";
+
+ idx = gt0_is_gt ? SAM_ACTMHz : GFX_ACTMHz;
+ if (!access("/sys/class/drm/card0/device/tile0/gt1/freq0/act_freq", R_OK))
+ gfx_info[idx].path = "/sys/class/drm/card0/device/tile0/gt1/freq0/act_freq";
+
+ goto end;
+ }
+
+next:
+ /* New i915 graphics sysfs knobs */
+ if (!access("/sys/class/drm/card0/gt/gt0/rc6_residency_ms", R_OK)) {
+ gfx_info[GFX_rc6].path = "/sys/class/drm/card0/gt/gt0/rc6_residency_ms";
+
+ if (!access("/sys/class/drm/card0/gt/gt0/rps_cur_freq_mhz", R_OK))
+ gfx_info[GFX_MHz].path = "/sys/class/drm/card0/gt/gt0/rps_cur_freq_mhz";
+
+ if (!access("/sys/class/drm/card0/gt/gt0/rps_act_freq_mhz", R_OK))
+ gfx_info[GFX_ACTMHz].path = "/sys/class/drm/card0/gt/gt0/rps_act_freq_mhz";
+
+ if (!access("/sys/class/drm/card0/gt/gt1/rc6_residency_ms", R_OK))
+ gfx_info[SAM_mc6].path = "/sys/class/drm/card0/gt/gt1/rc6_residency_ms";
+
+ if (!access("/sys/class/drm/card0/gt/gt1/rps_cur_freq_mhz", R_OK))
+ gfx_info[SAM_MHz].path = "/sys/class/drm/card0/gt/gt1/rps_cur_freq_mhz";
+
+ if (!access("/sys/class/drm/card0/gt/gt1/rps_act_freq_mhz", R_OK))
+ gfx_info[SAM_ACTMHz].path = "/sys/class/drm/card0/gt/gt1/rps_act_freq_mhz";
+
+ goto end;
+ }
+
+ /* Fall back to traditional i915 graphics sysfs knobs */
if (!access("/sys/class/drm/card0/power/rc6_residency_ms", R_OK))
- BIC_PRESENT(BIC_GFX_rc6);
+ gfx_info[GFX_rc6].path = "/sys/class/drm/card0/power/rc6_residency_ms";
+
+ if (!access("/sys/class/drm/card0/gt_cur_freq_mhz", R_OK))
+ gfx_info[GFX_MHz].path = "/sys/class/drm/card0/gt_cur_freq_mhz";
+ else if (!access("/sys/class/graphics/fb0/device/drm/card0/gt_cur_freq_mhz", R_OK))
+ gfx_info[GFX_MHz].path = "/sys/class/graphics/fb0/device/drm/card0/gt_cur_freq_mhz";
- if (!access("/sys/class/drm/card0/gt_cur_freq_mhz", R_OK) ||
- !access("/sys/class/graphics/fb0/device/drm/card0/gt_cur_freq_mhz", R_OK))
- BIC_PRESENT(BIC_GFXMHz);
- if (!access("/sys/class/drm/card0/gt_act_freq_mhz", R_OK) ||
- !access("/sys/class/graphics/fb0/device/drm/card0/gt_act_freq_mhz", R_OK))
+ if (!access("/sys/class/drm/card0/gt_act_freq_mhz", R_OK))
+ gfx_info[GFX_ACTMHz].path = "/sys/class/drm/card0/gt_act_freq_mhz";
+ else if (!access("/sys/class/graphics/fb0/device/drm/card0/gt_act_freq_mhz", R_OK))
+ gfx_info[GFX_ACTMHz].path = "/sys/class/graphics/fb0/device/drm/card0/gt_act_freq_mhz";
+
+end:
+ if (gfx_info[GFX_rc6].path)
+ BIC_PRESENT(BIC_GFX_rc6);
+ if (gfx_info[GFX_MHz].path)
+ BIC_PRESENT(BIC_GFXMHz);
+ if (gfx_info[GFX_ACTMHz].path)
BIC_PRESENT(BIC_GFXACTMHz);
+ if (gfx_info[SAM_mc6].path)
+ BIC_PRESENT(BIC_SAM_mc6);
+ if (gfx_info[SAM_MHz].path)
+ BIC_PRESENT(BIC_SAMMHz);
+ if (gfx_info[SAM_ACTMHz].path)
+ BIC_PRESENT(BIC_SAMACTMHz);
}
static void dump_sysfs_cstate_config(void)
UNUSED(c);
UNUSED(p);
+ if (no_msr)
+ return 0;
+
if (!has_hwp)
return 0;
UNUSED(c);
UNUSED(p);
+ if (no_msr)
+ return 0;
+
cpu = t->cpu_id;
/* per-package */
unsigned long long msr;
unsigned int time_unit;
double tdp;
+ const unsigned long long bic_watt_bits = BIC_PkgWatt | BIC_CorWatt | BIC_RAMWatt | BIC_GFXWatt;
+ const unsigned long long bic_joules_bits = BIC_Pkg_J | BIC_Cor_J | BIC_RAM_J | BIC_GFX_J;
- if (rapl_joules) {
- if (platform->rapl_msrs & RAPL_PKG_ENERGY_STATUS)
- BIC_PRESENT(BIC_Pkg_J);
- if (platform->rapl_msrs & RAPL_CORE_ENERGY_STATUS)
- BIC_PRESENT(BIC_Cor_J);
- if (platform->rapl_msrs & RAPL_DRAM_ENERGY_STATUS)
- BIC_PRESENT(BIC_RAM_J);
- if (platform->rapl_msrs & RAPL_GFX_ENERGY_STATUS)
- BIC_PRESENT(BIC_GFX_J);
- } else {
- if (platform->rapl_msrs & RAPL_PKG_ENERGY_STATUS)
- BIC_PRESENT(BIC_PkgWatt);
- if (platform->rapl_msrs & RAPL_CORE_ENERGY_STATUS)
- BIC_PRESENT(BIC_CorWatt);
- if (platform->rapl_msrs & RAPL_DRAM_ENERGY_STATUS)
- BIC_PRESENT(BIC_RAMWatt);
- if (platform->rapl_msrs & RAPL_GFX_ENERGY_STATUS)
- BIC_PRESENT(BIC_GFXWatt);
- }
+ if (rapl_joules)
+ bic_enabled &= ~bic_watt_bits;
+ else
+ bic_enabled &= ~bic_joules_bits;
- if (platform->rapl_msrs & RAPL_PKG_PERF_STATUS)
- BIC_PRESENT(BIC_PKG__);
- if (platform->rapl_msrs & RAPL_DRAM_PERF_STATUS)
- BIC_PRESENT(BIC_RAM__);
+ if (!(platform->rapl_msrs & RAPL_PKG_PERF_STATUS))
+ bic_enabled &= ~BIC_PKG__;
+ if (!(platform->rapl_msrs & RAPL_DRAM_PERF_STATUS))
+ bic_enabled &= ~BIC_RAM__;
/* units on package 0, verify later other packages match */
if (get_msr(base_cpu, MSR_RAPL_POWER_UNIT, &msr))
{
unsigned long long msr;
double tdp;
+ const unsigned long long bic_watt_bits = BIC_PkgWatt | BIC_CorWatt;
+ const unsigned long long bic_joules_bits = BIC_Pkg_J | BIC_Cor_J;
- if (rapl_joules) {
- BIC_PRESENT(BIC_Pkg_J);
- BIC_PRESENT(BIC_Cor_J);
- } else {
- BIC_PRESENT(BIC_PkgWatt);
- BIC_PRESENT(BIC_CorWatt);
- }
+ if (rapl_joules)
+ bic_enabled &= ~bic_watt_bits;
+ else
+ bic_enabled &= ~bic_joules_bits;
if (get_msr(base_cpu, MSR_RAPL_PWR_UNIT, &msr))
return;
*/
void probe_rapl(void)
{
- if (!platform->rapl_msrs)
+ if (!platform->rapl_msrs || no_msr)
return;
if (genuine_intel)
}
/* Temperature Target MSR is Nehalem and newer only */
- if (!platform->has_nhm_msrs)
+ if (!platform->has_nhm_msrs || no_msr)
goto guess;
if (get_msr(base_cpu, MSR_IA32_TEMPERATURE_TARGET, &msr))
UNUSED(c);
UNUSED(p);
+ if (no_msr)
+ return 0;
+
if (!(do_dts || do_ptm))
return 0;
{
unsigned long long msr;
+ if (no_msr)
+ return;
+
if (!get_msr(base_cpu, MSR_IA32_FEAT_CTL, &msr))
fprintf(outf, "cpu%d: MSR_IA32_FEATURE_CONTROL: 0x%08llx (%sLocked %s)\n",
base_cpu, msr, msr & FEAT_CTL_LOCKED ? "" : "UN-", msr & (1 << 18) ? "SGX" : "");
{
unsigned long long msr;
+ if (no_msr)
+ return;
+
if (!genuine_intel)
return;
{
unsigned long long msr;
+ if (no_msr)
+ return;
+
if (!platform->has_msr_misc_feature_control)
return;
{
unsigned long long msr;
+ if (no_msr)
+ return;
+
if (!platform->has_msr_misc_pwr_mgmt)
return;
{
unsigned long long msr;
+ if (no_msr)
+ return;
+
if (!platform->has_msr_c6_demotion_policy_config)
return;
fd = open(path, O_RDONLY);
if (fd < 0) {
- warnx("capget(CAP_SYS_ADMIN) failed, try \"# setcap cap_sys_admin=ep %s\"", progname);
+ if (debug)
+ warnx("Read %s failed", path);
return;
}
close(fd);
}
+static int has_instr_count_access(void)
+{
+ int fd;
+ int has_access;
+
+ if (no_perf)
+ return 0;
+
+ fd = open_perf_counter(base_cpu, PERF_TYPE_HARDWARE, PERF_COUNT_HW_INSTRUCTIONS, -1, 0);
+ has_access = fd != -1;
+
+ if (fd != -1)
+ close(fd);
+
+ if (!has_access)
+ warnx("Failed to access %s. Some of the counters may not be available\n"
+ "\tRun as root to enable them or use %s to disable the access explicitly",
+ "instructions retired perf counter", "--no-perf");
+
+ return has_access;
+}
+
+bool is_aperf_access_required(void)
+{
+ return BIC_IS_ENABLED(BIC_Avg_MHz)
+ || BIC_IS_ENABLED(BIC_Busy)
+ || BIC_IS_ENABLED(BIC_Bzy_MHz)
+ || BIC_IS_ENABLED(BIC_IPC);
+}
+
+int add_rapl_perf_counter_(int cpu, struct rapl_counter_info_t *rci, const struct rapl_counter_arch_info *cai,
+ double *scale_, enum rapl_unit *unit_)
+{
+ if (no_perf)
+ return -1;
+
+ const double scale = read_perf_rapl_scale(cai->perf_subsys, cai->perf_name);
+
+ if (scale == 0.0)
+ return -1;
+
+ const enum rapl_unit unit = read_perf_rapl_unit(cai->perf_subsys, cai->perf_name);
+
+ if (unit == RAPL_UNIT_INVALID)
+ return -1;
+
+ const unsigned int rapl_type = read_perf_type(cai->perf_subsys);
+ const unsigned int rapl_energy_pkg_config = read_rapl_config(cai->perf_subsys, cai->perf_name);
+
+ const int fd_counter =
+ open_perf_counter(cpu, rapl_type, rapl_energy_pkg_config, rci->fd_perf, PERF_FORMAT_GROUP);
+ if (fd_counter == -1)
+ return -1;
+
+ /* If it's the first counter opened, make it a group descriptor */
+ if (rci->fd_perf == -1)
+ rci->fd_perf = fd_counter;
+
+ *scale_ = scale;
+ *unit_ = unit;
+ return fd_counter;
+}
+
+int add_rapl_perf_counter(int cpu, struct rapl_counter_info_t *rci, const struct rapl_counter_arch_info *cai,
+ double *scale, enum rapl_unit *unit)
+{
+ int ret = add_rapl_perf_counter_(cpu, rci, cai, scale, unit);
+
+ if (debug)
+ fprintf(stderr, "%s: %d (cpu: %d)\n", __func__, ret, cpu);
+
+ return ret;
+}
+
/*
* Linux-perf manages the HW instructions-retired counter
* by enabling when requested, and hiding rollover
*/
void linux_perf_init(void)
{
- if (!BIC_IS_ENABLED(BIC_IPC))
- return;
-
if (access("/proc/sys/kernel/perf_event_paranoid", F_OK))
return;
- fd_instr_count_percpu = calloc(topo.max_cpu_num + 1, sizeof(int));
- if (fd_instr_count_percpu == NULL)
- err(-1, "calloc fd_instr_count_percpu");
+ if (BIC_IS_ENABLED(BIC_IPC) && has_aperf) {
+ fd_instr_count_percpu = calloc(topo.max_cpu_num + 1, sizeof(int));
+ if (fd_instr_count_percpu == NULL)
+ err(-1, "calloc fd_instr_count_percpu");
+ }
+
+ const bool aperf_required = is_aperf_access_required();
+
+ if (aperf_required && has_aperf && amperf_source == AMPERF_SOURCE_PERF) {
+ fd_amperf_percpu = calloc(topo.max_cpu_num + 1, sizeof(*fd_amperf_percpu));
+ if (fd_amperf_percpu == NULL)
+ err(-1, "calloc fd_amperf_percpu");
+ }
+}
+
+void rapl_perf_init(void)
+{
+ const int num_domains = platform->has_per_core_rapl ? topo.num_cores : topo.num_packages;
+ bool *domain_visited = calloc(num_domains, sizeof(bool));
+
+ rapl_counter_info_perdomain = calloc(num_domains, sizeof(*rapl_counter_info_perdomain));
+ if (rapl_counter_info_perdomain == NULL)
+ err(-1, "calloc rapl_counter_info_percpu");
+
+ /*
+ * Initialize rapl_counter_info_percpu
+ */
+ for (int domain_id = 0; domain_id < num_domains; ++domain_id) {
+ struct rapl_counter_info_t *rci = &rapl_counter_info_perdomain[domain_id];
+
+ rci->fd_perf = -1;
+ for (size_t i = 0; i < NUM_RAPL_COUNTERS; ++i) {
+ rci->data[i] = 0;
+ rci->source[i] = RAPL_SOURCE_NONE;
+ }
+ }
+
+ /*
+ * Open/probe the counters
+ * If can't get it via perf, fallback to MSR
+ */
+ for (size_t i = 0; i < ARRAY_SIZE(rapl_counter_arch_infos); ++i) {
+
+ const struct rapl_counter_arch_info *const cai = &rapl_counter_arch_infos[i];
+ bool has_counter = 0;
+ double scale;
+ enum rapl_unit unit;
+ int next_domain;
+
+ memset(domain_visited, 0, num_domains * sizeof(*domain_visited));
+
+ for (int cpu = 0; cpu < topo.max_cpu_num + 1; ++cpu) {
+
+ if (cpu_is_not_allowed(cpu))
+ continue;
+
+ /* Skip already seen and handled RAPL domains */
+ next_domain =
+ platform->has_per_core_rapl ? cpus[cpu].physical_core_id : cpus[cpu].physical_package_id;
+
+ if (domain_visited[next_domain])
+ continue;
+
+ domain_visited[next_domain] = 1;
+
+ struct rapl_counter_info_t *rci = &rapl_counter_info_perdomain[next_domain];
+
+ /* Check if the counter is enabled and accessible */
+ if (BIC_IS_ENABLED(cai->bic) && (platform->rapl_msrs & cai->feature_mask)) {
+
+ /* Use perf API for this counter */
+ if (!no_perf && cai->perf_name
+ && add_rapl_perf_counter(cpu, rci, cai, &scale, &unit) != -1) {
+ rci->source[cai->rci_index] = RAPL_SOURCE_PERF;
+ rci->scale[cai->rci_index] = scale * cai->compat_scale;
+ rci->unit[cai->rci_index] = unit;
+ rci->flags[cai->rci_index] = cai->flags;
+
+ /* Use MSR for this counter */
+ } else if (!no_msr && cai->msr && probe_msr(cpu, cai->msr) == 0) {
+ rci->source[cai->rci_index] = RAPL_SOURCE_MSR;
+ rci->msr[cai->rci_index] = cai->msr;
+ rci->msr_mask[cai->rci_index] = cai->msr_mask;
+ rci->msr_shift[cai->rci_index] = cai->msr_shift;
+ rci->unit[cai->rci_index] = RAPL_UNIT_JOULES;
+ rci->scale[cai->rci_index] = *cai->platform_rapl_msr_scale * cai->compat_scale;
+ rci->flags[cai->rci_index] = cai->flags;
+ }
+ }
+
+ if (rci->source[cai->rci_index] != RAPL_SOURCE_NONE)
+ has_counter = 1;
+ }
+
+ /* If any CPU has access to the counter, make it present */
+ if (has_counter)
+ BIC_PRESENT(cai->bic);
+ }
+
+ free(domain_visited);
+}
+
+static int has_amperf_access_via_msr(void)
+{
+ if (no_msr)
+ return 0;
+
+ if (probe_msr(base_cpu, MSR_IA32_APERF))
+ return 0;
+
+ if (probe_msr(base_cpu, MSR_IA32_MPERF))
+ return 0;
+
+ return 1;
+}
+
+static int has_amperf_access_via_perf(void)
+{
+ struct amperf_group_fd fds;
+
+ /*
+ * Cache the last result, so we don't warn the user multiple times
+ *
+ * Negative means cached, no access
+ * Zero means not cached
+ * Positive means cached, has access
+ */
+ static int has_access_cached;
+
+ if (no_perf)
+ return 0;
+
+ if (has_access_cached != 0)
+ return has_access_cached > 0;
+
+ fds = open_amperf_fd(base_cpu);
+ has_access_cached = (fds.aperf != -1) && (fds.mperf != -1);
+
+ if (fds.aperf == -1)
+ warnx("Failed to access %s. Some of the counters may not be available\n"
+ "\tRun as root to enable them or use %s to disable the access explicitly",
+ "APERF perf counter", "--no-perf");
+ else
+ close(fds.aperf);
+
+ if (fds.mperf == -1)
+ warnx("Failed to access %s. Some of the counters may not be available\n"
+ "\tRun as root to enable them or use %s to disable the access explicitly",
+ "MPERF perf counter", "--no-perf");
+ else
+ close(fds.mperf);
+
+ if (has_access_cached == 0)
+ has_access_cached = -1;
+
+ return has_access_cached > 0;
+}
+
+/* Check if we can access APERF and MPERF */
+static int has_amperf_access(void)
+{
+ if (!is_aperf_access_required())
+ return 0;
+
+ if (!no_msr && has_amperf_access_via_msr())
+ return 1;
+
+ if (!no_perf && has_amperf_access_via_perf())
+ return 1;
- BIC_PRESENT(BIC_IPC);
+ return 0;
}
void probe_cstates(void)
if (platform->has_msr_module_c6_res_ms)
BIC_PRESENT(BIC_Mod_c6);
- if (platform->has_ext_cst_msrs) {
+ if (platform->has_ext_cst_msrs && !no_msr) {
BIC_PRESENT(BIC_Totl_c0);
BIC_PRESENT(BIC_Any_c0);
BIC_PRESENT(BIC_GFX_c0);
unsigned int eax, ebx, ecx, edx;
unsigned int fms, family, model, stepping, ecx_flags, edx_flags;
unsigned long long ucode_patch = 0;
+ bool ucode_patch_valid = false;
eax = ebx = ecx = edx = 0;
ecx_flags = ecx;
edx_flags = edx;
- if (get_msr(sched_getcpu(), MSR_IA32_UCODE_REV, &ucode_patch))
- warnx("get_msr(UCODE)");
+ if (!no_msr) {
+ if (get_msr(sched_getcpu(), MSR_IA32_UCODE_REV, &ucode_patch))
+ warnx("get_msr(UCODE)");
+ else
+ ucode_patch_valid = true;
+ }
/*
* check max extended function levels of CPUID.
__cpuid(0x80000000, max_extended_level, ebx, ecx, edx);
if (!quiet) {
- fprintf(outf, "CPUID(1): family:model:stepping 0x%x:%x:%x (%d:%d:%d) microcode 0x%x\n",
- family, model, stepping, family, model, stepping,
- (unsigned int)((ucode_patch >> 32) & 0xFFFFFFFF));
+ fprintf(outf, "CPUID(1): family:model:stepping 0x%x:%x:%x (%d:%d:%d)",
+ family, model, stepping, family, model, stepping);
+ if (ucode_patch_valid)
+ fprintf(outf, " microcode 0x%x", (unsigned int)((ucode_patch >> 32) & 0xFFFFFFFF));
+ fputc('\n', outf);
+
fprintf(outf, "CPUID(0x80000000): max_extended_levels: 0x%x\n", max_extended_level);
fprintf(outf, "CPUID(1): %s %s %s %s %s %s %s %s %s %s\n",
ecx_flags & (1 << 0) ? "SSE3" : "-",
__cpuid(0x6, eax, ebx, ecx, edx);
has_aperf = ecx & (1 << 0);
- if (has_aperf) {
+ if (has_aperf && has_amperf_access()) {
BIC_PRESENT(BIC_Avg_MHz);
BIC_PRESENT(BIC_Busy);
BIC_PRESENT(BIC_Bzy_MHz);
+ BIC_PRESENT(BIC_IPC);
}
do_dts = eax & (1 << 0);
if (do_dts)
base_mhz = max_mhz = bus_mhz = edx = 0;
__cpuid(0x16, base_mhz, max_mhz, bus_mhz, edx);
+
+ bclk = bus_mhz;
+
+ base_hz = base_mhz * 1000000;
+ has_base_hz = 1;
+
+ if (platform->enable_tsc_tweak)
+ tsc_tweak = base_hz / tsc_hz;
+
if (!quiet)
fprintf(outf, "CPUID(0x16): base_mhz: %d max_mhz: %d bus_mhz: %d\n",
base_mhz, max_mhz, bus_mhz);
probe_thermal();
- if (platform->has_nhm_msrs)
+ if (platform->has_nhm_msrs && !no_msr)
BIC_PRESENT(BIC_SMI);
if (!quiet)
topo.allowed_packages = 0;
for_all_cpus(update_topo, ODD_COUNTERS);
}
+
void setup_all_buffers(bool startup)
{
topology_probe(startup);
err(-ENODEV, "No valid cpus found");
}
+static void set_amperf_source(void)
+{
+ amperf_source = AMPERF_SOURCE_PERF;
+
+ const bool aperf_required = is_aperf_access_required();
+
+ if (no_perf || !aperf_required || !has_amperf_access_via_perf())
+ amperf_source = AMPERF_SOURCE_MSR;
+
+ if (quiet || !debug)
+ return;
+
+ fprintf(outf, "aperf/mperf source preference: %s\n", amperf_source == AMPERF_SOURCE_MSR ? "msr" : "perf");
+}
+
+bool has_added_counters(void)
+{
+ /*
+ * It only makes sense to call this after the command line is parsed,
+ * otherwise sys structure is not populated.
+ */
+
+ return sys.added_core_counters | sys.added_thread_counters | sys.added_package_counters;
+}
+
+bool is_msr_access_required(void)
+{
+ if (no_msr)
+ return false;
+
+ if (has_added_counters())
+ return true;
+
+ return BIC_IS_ENABLED(BIC_SMI)
+ || BIC_IS_ENABLED(BIC_CPU_c1)
+ || BIC_IS_ENABLED(BIC_CPU_c3)
+ || BIC_IS_ENABLED(BIC_CPU_c6)
+ || BIC_IS_ENABLED(BIC_CPU_c7)
+ || BIC_IS_ENABLED(BIC_Mod_c6)
+ || BIC_IS_ENABLED(BIC_CoreTmp)
+ || BIC_IS_ENABLED(BIC_Totl_c0)
+ || BIC_IS_ENABLED(BIC_Any_c0)
+ || BIC_IS_ENABLED(BIC_GFX_c0)
+ || BIC_IS_ENABLED(BIC_CPUGFX)
+ || BIC_IS_ENABLED(BIC_Pkgpc3)
+ || BIC_IS_ENABLED(BIC_Pkgpc6)
+ || BIC_IS_ENABLED(BIC_Pkgpc2)
+ || BIC_IS_ENABLED(BIC_Pkgpc7)
+ || BIC_IS_ENABLED(BIC_Pkgpc8)
+ || BIC_IS_ENABLED(BIC_Pkgpc9)
+ || BIC_IS_ENABLED(BIC_Pkgpc10)
+ /* TODO: Multiplex access with perf */
+ || BIC_IS_ENABLED(BIC_CorWatt)
+ || BIC_IS_ENABLED(BIC_Cor_J)
+ || BIC_IS_ENABLED(BIC_PkgWatt)
+ || BIC_IS_ENABLED(BIC_CorWatt)
+ || BIC_IS_ENABLED(BIC_GFXWatt)
+ || BIC_IS_ENABLED(BIC_RAMWatt)
+ || BIC_IS_ENABLED(BIC_Pkg_J)
+ || BIC_IS_ENABLED(BIC_Cor_J)
+ || BIC_IS_ENABLED(BIC_GFX_J)
+ || BIC_IS_ENABLED(BIC_RAM_J)
+ || BIC_IS_ENABLED(BIC_PKG__)
+ || BIC_IS_ENABLED(BIC_RAM__)
+ || BIC_IS_ENABLED(BIC_PkgTmp)
+ || (is_aperf_access_required() && !has_amperf_access_via_perf());
+}
+
+void check_msr_access(void)
+{
+ if (!is_msr_access_required())
+ no_msr = 1;
+
+ check_dev_msr();
+ check_msr_permission();
+
+ if (no_msr)
+ bic_disable_msr_access();
+}
+
+void check_perf_access(void)
+{
+ const bool intrcount_required = BIC_IS_ENABLED(BIC_IPC);
+
+ if (no_perf || !intrcount_required || !has_instr_count_access())
+ bic_enabled &= ~BIC_IPC;
+
+ const bool aperf_required = is_aperf_access_required();
+
+ if (!aperf_required || !has_amperf_access()) {
+ bic_enabled &= ~BIC_Avg_MHz;
+ bic_enabled &= ~BIC_Busy;
+ bic_enabled &= ~BIC_Bzy_MHz;
+ bic_enabled &= ~BIC_IPC;
+ }
+}
+
void turbostat_init()
{
setup_all_buffers(true);
set_base_cpu();
- check_dev_msr();
- check_permissions();
+ check_msr_access();
+ check_perf_access();
process_cpuid();
probe_pm_features();
+ set_amperf_source();
linux_perf_init();
+ rapl_perf_init();
for_all_cpus(get_cpu_type, ODD_COUNTERS);
for_all_cpus(get_cpu_type, EVEN_COUNTERS);
if (DO_BIC(BIC_IPC))
(void)get_instr_count_fd(base_cpu);
+
+ /*
+ * If TSC tweak is needed, but couldn't get it,
+ * disable more BICs, since it can't be reported accurately.
+ */
+ if (platform->enable_tsc_tweak && !has_base_hz) {
+ bic_enabled &= ~BIC_Busy;
+ bic_enabled &= ~BIC_Bzy_MHz;
+ }
}
int fork_it(char **argv)
void print_version()
{
- fprintf(outf, "turbostat version 2023.11.07 - Len Brown <lenb@kernel.org>\n");
+ fprintf(outf, "turbostat version 2024.04.08 - Len Brown <lenb@kernel.org>\n");
}
#define COMMAND_LINE_SIZE 2048
{
struct msr_counter *msrp;
+ if (no_msr && msr_num)
+ errx(1, "Requested MSR counter 0x%x, but in --no-msr mode", msr_num);
+
msrp = calloc(1, sizeof(struct msr_counter));
if (msrp == NULL) {
perror("calloc");
{ "list", no_argument, 0, 'l' },
{ "out", required_argument, 0, 'o' },
{ "quiet", no_argument, 0, 'q' },
+ { "no-msr", no_argument, 0, 'M' },
+ { "no-perf", no_argument, 0, 'P' },
{ "show", required_argument, 0, 's' },
{ "Summary", no_argument, 0, 'S' },
{ "TCC", required_argument, 0, 'T' },
progname = argv[0];
- while ((opt = getopt_long_only(argc, argv, "+C:c:Dde:hi:Jn:o:qST:v", long_options, &option_index)) != -1) {
+ /*
+ * Parse some options early, because they may make other options invalid,
+ * like adding the MSR counter with --add and at the same time using --no-msr.
+ */
+ while ((opt = getopt_long_only(argc, argv, "MP", long_options, &option_index)) != -1) {
+ switch (opt) {
+ case 'M':
+ no_msr = 1;
+ break;
+ case 'P':
+ no_perf = 1;
+ break;
+ default:
+ break;
+ }
+ }
+ optind = 0;
+
+ while ((opt = getopt_long_only(argc, argv, "+C:c:Dde:hi:Jn:o:qMST:v", long_options, &option_index)) != -1) {
switch (opt) {
case 'a':
parse_add_command(optarg);
case 'q':
quiet = 1;
break;
+ case 'M':
+ case 'P':
+ /* Parsed earlier */
+ break;
case 'n':
num_iterations = strtod(optarg, NULL);
}
}
+void set_rlimit(void)
+{
+ struct rlimit limit;
+
+ if (getrlimit(RLIMIT_NOFILE, &limit) < 0)
+ err(1, "Failed to get rlimit");
+
+ if (limit.rlim_max < MAX_NOFILE)
+ limit.rlim_max = MAX_NOFILE;
+ if (limit.rlim_cur < MAX_NOFILE)
+ limit.rlim_cur = MAX_NOFILE;
+
+ if (setrlimit(RLIMIT_NOFILE, &limit) < 0)
+ err(1, "Failed to set rlimit");
+}
+
int main(int argc, char **argv)
{
int fd, ret;
probe_sysfs();
+ if (!getuid())
+ set_rlimit();
+
turbostat_init();
- msr_sum_record();
+ if (!no_msr)
+ msr_sum_record();
/* dump counters and exit */
if (dump_only)
{
dpa_perf->qos_class = FAKE_QTG_ID;
dpa_perf->dpa_range = *range;
- dpa_perf->coord.read_latency = 500;
- dpa_perf->coord.write_latency = 500;
- dpa_perf->coord.read_bandwidth = 1000;
- dpa_perf->coord.write_bandwidth = 1000;
+ for (int i = 0; i < ACCESS_COORDINATE_MAX; i++) {
+ dpa_perf->coord[i].read_latency = 500;
+ dpa_perf->coord[i].write_latency = 500;
+ dpa_perf->coord[i].read_bandwidth = 1000;
+ dpa_perf->coord[i].write_bandwidth = 1000;
+ }
}
static void mock_cxl_endpoint_parse_cdat(struct cxl_port *port)
CONFIG_INET=y
CONFIG_MPTCP=y
+CONFIG_NETDEVICES=y
+CONFIG_WLAN=y
CONFIG_CFG80211=y
CONFIG_MAC80211=y
CONFIG_WLAN_VENDOR_INTEL=y
CONFIG_DAMON_PADDR=y
CONFIG_DEBUG_FS=y
CONFIG_DAMON_DBGFS=y
+CONFIG_DAMON_DBGFS_DEPRECATED=y
CONFIG_REGMAP_BUILD=y
*/
#endif
-#if defined(__BPF_FEATURE_ARENA_CAST) && !defined(BPF_ARENA_FORCE_ASM)
+#if defined(__BPF_FEATURE_ADDR_SPACE_CAST) && !defined(BPF_ARENA_FORCE_ASM)
#define __arena __attribute__((address_space(1)))
#define cast_kern(ptr) /* nop for bpf prog. emitted by LLVM */
#define cast_user(ptr) /* nop for bpf prog. emitted by LLVM */
#include <test_progs.h>
#include <sys/mman.h>
#include <network_helpers.h>
-
+#include <sys/user.h>
+#ifndef PAGE_SIZE /* on some archs it comes in sys/user.h */
+#include <unistd.h>
+#define PAGE_SIZE getpagesize()
+#endif
#include "arena_htab_asm.skel.h"
#include "arena_htab.skel.h"
-#define PAGE_SIZE 4096
-
#include "bpf_arena_htab.h"
static void test_arena_htab_common(struct htab *htab)
#include <test_progs.h>
#include <sys/mman.h>
#include <network_helpers.h>
-
-#define PAGE_SIZE 4096
+#include <sys/user.h>
+#ifndef PAGE_SIZE /* on some archs it comes in sys/user.h */
+#include <unistd.h>
+#define PAGE_SIZE getpagesize()
+#endif
#include "bpf_arena_list.h"
#include "arena_list.skel.h"
/* Copyright (c) 2021 Facebook */
#include <sys/syscall.h>
+#include <limits.h>
#include <test_progs.h>
#include "bloom_filter_map.skel.h"
if (!ASSERT_LT(fd, 0, "bpf_map_create bloom filter invalid value size 0"))
close(fd);
+ /* Invalid value size: too big */
+ fd = bpf_map_create(BPF_MAP_TYPE_BLOOM_FILTER, NULL, 0, INT32_MAX, 100, NULL);
+ if (!ASSERT_LT(fd, 0, "bpf_map_create bloom filter invalid value too large"))
+ close(fd);
+
/* Invalid max entries size */
fd = bpf_map_create(BPF_MAP_TYPE_BLOOM_FILTER, NULL, 0, sizeof(value), 0, NULL);
if (!ASSERT_LT(fd, 0, "bpf_map_create bloom filter invalid max entries size"))
#include "cap_helpers.h"
#include "verifier_and.skel.h"
#include "verifier_arena.skel.h"
+#include "verifier_arena_large.skel.h"
#include "verifier_array_access.skel.h"
#include "verifier_basic_stack.skel.h"
#include "verifier_bitfield_write.skel.h"
void test_verifier_and(void) { RUN(verifier_and); }
void test_verifier_arena(void) { RUN(verifier_arena); }
+void test_verifier_arena_large(void) { RUN(verifier_arena_large); }
void test_verifier_basic_stack(void) { RUN(verifier_basic_stack); }
void test_verifier_bitfield_write(void) { RUN(verifier_bitfield_write); }
void test_verifier_bounds(void) { RUN(verifier_bounds); }
SEC("syscall")
int arena_htab_llvm(void *ctx)
{
-#if defined(__BPF_FEATURE_ARENA_CAST) || defined(BPF_ARENA_FORCE_ASM)
+#if defined(__BPF_FEATURE_ADDR_SPACE_CAST) || defined(BPF_ARENA_FORCE_ASM)
struct htab __arena *htab;
__u64 i;
int cnt;
bool skip = false;
-#ifdef __BPF_FEATURE_ARENA_CAST
+#ifdef __BPF_FEATURE_ADDR_SPACE_CAST
long __arena arena_sum;
int __arena test_val = 1;
struct arena_list_head __arena global_head;
#else
-long arena_sum SEC(".arena.1");
-int test_val SEC(".arena.1");
+long arena_sum SEC(".addr_space.1");
+int test_val SEC(".addr_space.1");
#endif
int zero;
SEC("syscall")
int arena_list_add(void *ctx)
{
-#ifdef __BPF_FEATURE_ARENA_CAST
+#ifdef __BPF_FEATURE_ADDR_SPACE_CAST
__u64 i;
list_head = &global_head;
SEC("syscall")
int arena_list_del(void *ctx)
{
-#ifdef __BPF_FEATURE_ARENA_CAST
+#ifdef __BPF_FEATURE_ADDR_SPACE_CAST
struct elem __arena *n;
int sum = 0;
__uint(type, BPF_MAP_TYPE_ARENA);
__uint(map_flags, BPF_F_MMAPABLE);
__uint(max_entries, 2); /* arena of two pages close to 32-bit boundary*/
- __ulong(map_extra, (1ull << 44) | (~0u - __PAGE_SIZE * 2 + 1)); /* start of mmap() region */
+#ifdef __TARGET_ARCH_arm64
+ __ulong(map_extra, (1ull << 32) | (~0u - __PAGE_SIZE * 2 + 1)); /* start of mmap() region */
+#else
+ __ulong(map_extra, (1ull << 44) | (~0u - __PAGE_SIZE * 2 + 1)); /* start of mmap() region */
+#endif
} arena SEC(".maps");
SEC("syscall")
__success __retval(0)
int basic_alloc1(void *ctx)
{
-#if defined(__BPF_FEATURE_ARENA_CAST)
+#if defined(__BPF_FEATURE_ADDR_SPACE_CAST)
volatile int __arena *page1, *page2, *no_page, *page3;
page1 = bpf_arena_alloc_pages(&arena, NULL, 1, NUMA_NO_NODE, 0);
__success __retval(0)
int basic_alloc2(void *ctx)
{
-#if defined(__BPF_FEATURE_ARENA_CAST)
+#if defined(__BPF_FEATURE_ADDR_SPACE_CAST)
volatile char __arena *page1, *page2, *page3, *page4;
page1 = bpf_arena_alloc_pages(&arena, NULL, 2, NUMA_NO_NODE, 0);
--- /dev/null
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2024 Meta Platforms, Inc. and affiliates. */
+
+#include <vmlinux.h>
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+#include "bpf_misc.h"
+#include "bpf_experimental.h"
+#include "bpf_arena_common.h"
+
+#define ARENA_SIZE (1ull << 32)
+
+struct {
+ __uint(type, BPF_MAP_TYPE_ARENA);
+ __uint(map_flags, BPF_F_MMAPABLE);
+ __uint(max_entries, ARENA_SIZE / PAGE_SIZE);
+} arena SEC(".maps");
+
+SEC("syscall")
+__success __retval(0)
+int big_alloc1(void *ctx)
+{
+#if defined(__BPF_FEATURE_ADDR_SPACE_CAST)
+ volatile char __arena *page1, *page2, *no_page, *page3;
+ void __arena *base;
+
+ page1 = base = bpf_arena_alloc_pages(&arena, NULL, 1, NUMA_NO_NODE, 0);
+ if (!page1)
+ return 1;
+ *page1 = 1;
+ page2 = bpf_arena_alloc_pages(&arena, base + ARENA_SIZE - PAGE_SIZE,
+ 1, NUMA_NO_NODE, 0);
+ if (!page2)
+ return 2;
+ *page2 = 2;
+ no_page = bpf_arena_alloc_pages(&arena, base + ARENA_SIZE,
+ 1, NUMA_NO_NODE, 0);
+ if (no_page)
+ return 3;
+ if (*page1 != 1)
+ return 4;
+ if (*page2 != 2)
+ return 5;
+ bpf_arena_free_pages(&arena, (void __arena *)page1, 1);
+ if (*page2 != 2)
+ return 6;
+ if (*page1 != 0) /* use-after-free should return 0 */
+ return 7;
+ page3 = bpf_arena_alloc_pages(&arena, NULL, 1, NUMA_NO_NODE, 0);
+ if (!page3)
+ return 8;
+ *page3 = 3;
+ if (page1 != page3)
+ return 9;
+ if (*page2 != 2)
+ return 10;
+ if (*(page1 + PAGE_SIZE) != 0)
+ return 11;
+ if (*(page1 - PAGE_SIZE) != 0)
+ return 12;
+ if (*(page2 + PAGE_SIZE) != 0)
+ return 13;
+ if (*(page2 - PAGE_SIZE) != 0)
+ return 14;
+#endif
+ return 0;
+}
+char _license[] SEC("license") = "GPL";
--- /dev/null
+CONFIG_DMABUF_HEAPS=y
+CONFIG_DMABUF_HEAPS_SYSTEM=y
+CONFIG_DRM_VGEM=y
--- /dev/null
+timeout=600
$(OUTPUT)/subdir:
mkdir -p $@
-$(OUTPUT)/script:
- echo '#!/bin/sh' > $@
+$(OUTPUT)/script: Makefile
+ echo '#!/bin/bash' > $@
echo 'exit $$*' >> $@
chmod +x $@
$(OUTPUT)/execveat.symlink: $(OUTPUT)/execveat
NAME_MAX=int(subprocess.check_output(["getconf", "NAME_MAX", "."]))
test_num=0
+pass_num=0
+fail_num=0
code='''#!/usr/bin/perl
print "Executed interpreter! Args:\n";
# ...
def test(name, size, good=True, leading="", root="./", target="/perl",
fill="A", arg="", newline="\n", hashbang="#!"):
- global test_num, tests, NAME_MAX
+ global test_num, pass_num, fail_num, tests, NAME_MAX
test_num += 1
if test_num > tests:
raise ValueError("more binfmt_script tests than expected! (want %d, expected %d)"
if good:
print("ok %d - binfmt_script %s (successful good exec)"
% (test_num, name))
+ pass_num += 1
else:
print("not ok %d - binfmt_script %s succeeded when it should have failed"
% (test_num, name))
+ fail_num = 1
else:
if good:
print("not ok %d - binfmt_script %s failed when it should have succeeded (rc:%d)"
% (test_num, name, proc.returncode))
+ fail_num = 1
else:
print("ok %d - binfmt_script %s (correctly failed bad exec)"
% (test_num, name))
+ pass_num += 1
# Clean up crazy binaries
os.unlink(script)
test(name="two-under-leading", size=int(SIZE/2), leading=" ")
test(name="two-under-lead-trunc-arg", size=int(SIZE/2), leading=" ", arg=" ")
+print("# Totals: pass:%d fail:%d xfail:0 xpass:0 skip:0 error:0" % (pass_num, fail_num))
+
if test_num != tests:
raise ValueError("fewer binfmt_script tests than expected! (ran %d, expected %d"
% (test_num, tests))
if (child == 0) {
/* Child: do execveat(). */
rc = execveat_(fd, path, argv, envp, flags);
- ksft_print_msg("execveat() failed, rc=%d errno=%d (%s)\n",
+ ksft_print_msg("child execveat() failed, rc=%d errno=%d (%s)\n",
rc, errno, strerror(errno));
- ksft_test_result_fail("%s\n", test_name);
- exit(1); /* should not reach here */
+ exit(errno);
}
/* Parent: wait for & check child's exit status. */
rc = waitpid(child, &status, 0);
* "If the command name is found, but it is not an executable utility,
* the exit status shall be 126."), so allow either.
*/
- if (is_script)
+ if (is_script) {
+ ksft_print_msg("Invoke script via root_dfd and relative filename\n");
fail += check_execveat_invoked_rc(root_dfd, longpath + 1, 0,
127, 126);
- else
+ } else {
+ ksft_print_msg("Invoke exec via root_dfd and relative filename\n");
fail += check_execveat(root_dfd, longpath + 1, 0);
+ }
return fail;
}
#include <link.h>
#include <stdio.h>
#include <stdlib.h>
+#include "../kselftest.h"
struct Statistics {
unsigned long long load_address;
unsigned long long misalign;
int ret;
+ ksft_print_header();
+ ksft_set_plan(1);
+
ret = dl_iterate_phdr(ExtractStatistics, &extracted);
- if (ret != 1) {
- fprintf(stderr, "FAILED\n");
- return 1;
- }
+ if (ret != 1)
+ ksft_exit_fail_msg("FAILED: dl_iterate_phdr\n");
- if (extracted.alignment == 0) {
- fprintf(stderr, "No alignment found\n");
- return 1;
- } else if (extracted.alignment & (extracted.alignment - 1)) {
- fprintf(stderr, "Alignment is not a power of 2\n");
- return 1;
- }
+ if (extracted.alignment == 0)
+ ksft_exit_fail_msg("FAILED: No alignment found\n");
+ else if (extracted.alignment & (extracted.alignment - 1))
+ ksft_exit_fail_msg("FAILED: Alignment is not a power of 2\n");
misalign = extracted.load_address & (extracted.alignment - 1);
- if (misalign) {
- printf("alignment = %llu, load_address = %llu\n",
- extracted.alignment, extracted.load_address);
- fprintf(stderr, "FAILED\n");
- return 1;
- }
+ if (misalign)
+ ksft_exit_fail_msg("FAILED: alignment = %llu, load_address = %llu\n",
+ extracted.alignment, extracted.load_address);
- fprintf(stderr, "PASS\n");
- return 0;
+ ksft_test_result_pass("Completed\n");
+ ksft_finished();
}
#include <fcntl.h>
#include <sys/mount.h>
#include <unistd.h>
+#include "../kselftest.h"
int main(void)
{
+ int fd, rv;
+
+ ksft_print_header();
+ ksft_set_plan(1);
+
if (unshare(CLONE_NEWNS) == -1) {
if (errno == ENOSYS || errno == EPERM) {
- fprintf(stderr, "error: unshare, errno %d\n", errno);
- return 4;
+ ksft_test_result_skip("error: unshare, errno %d\n", errno);
+ ksft_finished();
}
- fprintf(stderr, "error: unshare, errno %d\n", errno);
- return 1;
- }
- if (mount(NULL, "/", NULL, MS_PRIVATE|MS_REC, NULL) == -1) {
- fprintf(stderr, "error: mount '/', errno %d\n", errno);
- return 1;
+ ksft_exit_fail_msg("error: unshare, errno %d\n", errno);
}
+
+ if (mount(NULL, "/", NULL, MS_PRIVATE | MS_REC, NULL) == -1)
+ ksft_exit_fail_msg("error: mount '/', errno %d\n", errno);
+
/* Require "exec" filesystem. */
- if (mount(NULL, "/tmp", "ramfs", 0, NULL) == -1) {
- fprintf(stderr, "error: mount ramfs, errno %d\n", errno);
- return 1;
- }
+ if (mount(NULL, "/tmp", "ramfs", 0, NULL) == -1)
+ ksft_exit_fail_msg("error: mount ramfs, errno %d\n", errno);
#define FILENAME "/tmp/1"
- int fd = creat(FILENAME, 0700);
- if (fd == -1) {
- fprintf(stderr, "error: creat, errno %d\n", errno);
- return 1;
- }
+ fd = creat(FILENAME, 0700);
+ if (fd == -1)
+ ksft_exit_fail_msg("error: creat, errno %d\n", errno);
+
#define S "#!" FILENAME "\n"
- if (write(fd, S, strlen(S)) != strlen(S)) {
- fprintf(stderr, "error: write, errno %d\n", errno);
- return 1;
- }
+ if (write(fd, S, strlen(S)) != strlen(S))
+ ksft_exit_fail_msg("error: write, errno %d\n", errno);
+
close(fd);
- int rv = execve(FILENAME, NULL, NULL);
- if (rv == -1 && errno == ELOOP) {
- return 0;
- }
- fprintf(stderr, "error: execve, rv %d, errno %d\n", rv, errno);
- return 1;
+ rv = execve(FILENAME, NULL, NULL);
+ ksft_test_result(rv == -1 && errno == ELOOP,
+ "execve failed as expected (ret %d, errno %d)\n", rv, errno);
+ ksft_finished();
}
yield
-count=`cat trace | grep -v ^# | awk '{ print $5 }' | sort -u | wc -l`
+count=`head -n 100 trace | grep -v ^# | awk '{ print $5 }' | sort -u | wc -l`
if [ $count -lt 3 ]; then
fail "at least fork, exec and exit events should be recorded"
fi
yield
-count=`cat trace | grep -v ^# | awk '{ print $5 }' | sort -u | wc -l`
+count=`head -n 100 trace | grep -v ^# | awk '{ print $5 }' | sort -u | wc -l`
if [ $count -lt 3 ]; then
fail "at least fork, exec and exit events should be recorded"
fi
yield
-count=`cat trace | grep -v ^# | awk '{ print $5 }' | sort -u | wc -l`
+count=`head -n 100 trace | grep -v ^# | awk '{ print $5 }' | sort -u | wc -l`
if [ $count -ne 0 ]; then
fail "any of scheduler events should not be recorded"
fi
echo "Get the most frequently calling function"
sample_events
-target_func=`cut -d: -f3 trace | sed 's/call_site=\([^+]*\)+0x.*/\1/' | sort | uniq -c | sort | tail -n 1 | sed 's/^[ 0-9]*//'`
+target_func=`cat trace | grep -o 'call_site=\([^+]*\)' | sed 's/call_site=//' | sort | uniq -c | sort | tail -n 1 | sed 's/^[ 0-9]*//'`
if [ -z "$target_func" ]; then
exit_fail
fi
CONFIG_IOMMUFD=y
+CONFIG_FAULT_INJECTION_DEBUG_FS=y
CONFIG_FAULT_INJECTION=y
CONFIG_IOMMUFD_TEST=y
+CONFIG_FAILSLAB=y
#include <stdarg.h>
#include <string.h>
#include <stdio.h>
+#include <sys/utsname.h>
#endif
#ifndef ARRAY_SIZE
#define KSFT_XPASS 3
#define KSFT_SKIP 4
+#ifndef __noreturn
+#define __noreturn __attribute__((__noreturn__))
+#endif
#define __printf(a, b) __attribute__((format(printf, a, b)))
/* counters */
}
/* Docs seem to call for double space if directive is absent */
- if (!directive[0] && msg[0])
+ if (!directive[0] && msg)
directive = " # ";
- va_start(args, msg);
printf("%s %u %s%s", tap_code, ksft_test_num(), test_name, directive);
errno = saved_errno;
- vprintf(msg, args);
+ if (msg) {
+ va_start(args, msg);
+ vprintf(msg, args);
+ va_end(args);
+ }
printf("\n");
- va_end(args);
}
-static inline int ksft_exit_pass(void)
+static inline __noreturn int ksft_exit_pass(void)
{
ksft_print_cnts();
exit(KSFT_PASS);
}
-static inline int ksft_exit_fail(void)
+static inline __noreturn int ksft_exit_fail(void)
{
ksft_print_cnts();
exit(KSFT_FAIL);
ksft_cnt.ksft_xfail + \
ksft_cnt.ksft_xskip)
-static inline __printf(1, 2) int ksft_exit_fail_msg(const char *msg, ...)
+static inline __noreturn __printf(1, 2) int ksft_exit_fail_msg(const char *msg, ...)
{
int saved_errno = errno;
va_list args;
exit(KSFT_FAIL);
}
-static inline int ksft_exit_xfail(void)
+static inline __noreturn int ksft_exit_xfail(void)
{
ksft_print_cnts();
exit(KSFT_XFAIL);
}
-static inline int ksft_exit_xpass(void)
+static inline __noreturn int ksft_exit_xpass(void)
{
ksft_print_cnts();
exit(KSFT_XPASS);
}
-static inline __printf(1, 2) int ksft_exit_skip(const char *msg, ...)
+static inline __noreturn __printf(1, 2) int ksft_exit_skip(const char *msg, ...)
{
int saved_errno = errno;
va_list args;
exit(KSFT_SKIP);
}
+static inline int ksft_min_kernel_version(unsigned int min_major,
+ unsigned int min_minor)
+{
+#ifdef NOLIBC
+ ksft_print_msg("NOLIBC: Can't check kernel version: Function not implemented\n");
+ return 0;
+#else
+ unsigned int major, minor;
+ struct utsname info;
+
+ if (uname(&info) || sscanf(info.release, "%u.%u.", &major, &minor) != 2)
+ ksft_exit_fail_msg("Can't parse kernel version\n");
+
+ return major > min_major || (major == min_major && minor >= min_minor);
+#endif
+}
+
#endif /* __KSELFTEST_H */
#include <asm/types.h>
#include <ctype.h>
#include <errno.h>
-#include <limits.h>
#include <stdbool.h>
#include <stdint.h>
#include <stdio.h>
FIXTURE_DATA(fixture_name) self; \
pid_t child = 1; \
int status = 0; \
+ bool jmp = false; \
memset(&self, 0, sizeof(FIXTURE_DATA(fixture_name))); \
if (setjmp(_metadata->env) == 0) { \
/* Use the same _metadata. */ \
_metadata->exit_code = KSFT_FAIL; \
} \
} \
+ else \
+ jmp = true; \
if (child == 0) { \
- if (_metadata->setup_completed && !_metadata->teardown_parent) \
+ if (_metadata->setup_completed && !_metadata->teardown_parent && !jmp) \
fixture_name##_teardown(_metadata, &self, variant->data); \
_exit(0); \
} \
struct __test_metadata *t)
{
struct __test_xfail *xfail;
- char test_name[LINE_MAX];
+ char *test_name;
const char *diagnostic;
/* reset test struct */
t->trigger = 0;
memset(t->results->reason, 0, sizeof(t->results->reason));
- snprintf(test_name, sizeof(test_name), "%s%s%s.%s",
- f->name, variant->name[0] ? "." : "", variant->name, t->name);
+ if (asprintf(&test_name, "%s%s%s.%s", f->name,
+ variant->name[0] ? "." : "", variant->name, t->name) == -1) {
+ ksft_print_msg("ERROR ALLOCATING MEMORY\n");
+ t->exit_code = KSFT_FAIL;
+ _exit(t->exit_code);
+ }
ksft_print_msg(" RUN %s ...\n", test_name);
diagnostic = "unknown";
ksft_test_result_code(t->exit_code, test_name,
- diagnostic ? "%s" : "", diagnostic);
+ diagnostic ? "%s" : NULL, diagnostic);
+ free(test_name);
}
static int test_harness_run(int argc, char **argv)
irq_iter = READ_ONCE(shared_data->nr_iter);
__GUEST_ASSERT(config_iter + 1 == irq_iter,
- "config_iter + 1 = 0x%lx, irq_iter = 0x%lx.\n"
- " Guest timer interrupt was not trigged within the specified\n"
+ "config_iter + 1 = 0x%x, irq_iter = 0x%x.\n"
+ " Guest timer interrupt was not triggered within the specified\n"
" interval, try to increase the error margin by [-e] option.\n",
config_iter + 1, irq_iter);
}
void vcpu_set_cpuid_property(struct kvm_vcpu *vcpu,
struct kvm_x86_cpu_property property,
uint32_t value);
+void vcpu_set_cpuid_maxphyaddr(struct kvm_vcpu *vcpu, uint8_t maxphyaddr);
void vcpu_clear_cpuid_entry(struct kvm_vcpu *vcpu, uint32_t function);
+
+static inline bool vcpu_cpuid_has(struct kvm_vcpu *vcpu,
+ struct kvm_x86_cpu_feature feature)
+{
+ struct kvm_cpuid_entry2 *entry;
+
+ entry = __vcpu_get_cpuid_entry(vcpu, feature.function, feature.index);
+ return *((&entry->eax) + feature.reg) & BIT(feature.bit);
+}
+
void vcpu_set_or_clear_cpuid_feature(struct kvm_vcpu *vcpu,
struct kvm_x86_cpu_feature feature,
bool set);
{
uint64_t gpa;
- for (gpa = start_gpa; gpa < end_gpa; gpa += stride)
- *((volatile uint64_t *)gpa) = gpa;
-
- GUEST_DONE();
+ for (;;) {
+ for (gpa = start_gpa; gpa < end_gpa; gpa += stride)
+ *((volatile uint64_t *)gpa) = gpa;
+ GUEST_SYNC(0);
+ }
}
struct vcpu_info {
static void run_vcpu(struct kvm_vcpu *vcpu)
{
vcpu_run(vcpu);
- TEST_ASSERT_EQ(get_ucall(vcpu, NULL), UCALL_DONE);
+ TEST_ASSERT_EQ(get_ucall(vcpu, NULL), UCALL_SYNC);
}
static void *vcpu_worker(void *data)
struct kvm_vcpu *vcpu = info->vcpu;
struct kvm_vm *vm = vcpu->vm;
struct kvm_sregs sregs;
- struct kvm_regs regs;
vcpu_args_set(vcpu, 3, info->start_gpa, info->end_gpa, vm->page_size);
- /* Snapshot regs before the first run. */
- vcpu_regs_get(vcpu, ®s);
rendezvous_with_boss();
run_vcpu(vcpu);
rendezvous_with_boss();
- vcpu_regs_set(vcpu, ®s);
vcpu_sregs_get(vcpu, &sregs);
#ifdef __x86_64__
/* Toggle CR0.WP to trigger a MMU context reset. */
irq_iter = READ_ONCE(shared_data->nr_iter);
__GUEST_ASSERT(config_iter + 1 == irq_iter,
"config_iter + 1 = 0x%x, irq_iter = 0x%x.\n"
- " Guest timer interrupt was not trigged within the specified\n"
+ " Guest timer interrupt was not triggered within the specified\n"
" interval, try to increase the error margin by [-e] option.\n",
config_iter + 1, irq_iter);
}
struct kvm_vm *vm;
int r, i;
-#if defined __aarch64__ || defined __x86_64__
+#if defined __aarch64__ || defined __riscv || defined __x86_64__
supported_flags |= KVM_MEM_READONLY;
#endif
}
}
+static void test_pv_unhalt(void)
+{
+ struct kvm_vcpu *vcpu;
+ struct kvm_vm *vm;
+ struct kvm_cpuid_entry2 *ent;
+ u32 kvm_sig_old;
+
+ pr_info("testing KVM_FEATURE_PV_UNHALT\n");
+
+ TEST_REQUIRE(KVM_CAP_X86_DISABLE_EXITS);
+
+ /* KVM_PV_UNHALT test */
+ vm = vm_create_with_one_vcpu(&vcpu, guest_main);
+ vcpu_set_cpuid_feature(vcpu, X86_FEATURE_KVM_PV_UNHALT);
+
+ TEST_ASSERT(vcpu_cpuid_has(vcpu, X86_FEATURE_KVM_PV_UNHALT),
+ "Enabling X86_FEATURE_KVM_PV_UNHALT had no effect");
+
+ /* Make sure KVM clears vcpu->arch.kvm_cpuid */
+ ent = vcpu_get_cpuid_entry(vcpu, KVM_CPUID_SIGNATURE);
+ kvm_sig_old = ent->ebx;
+ ent->ebx = 0xdeadbeef;
+ vcpu_set_cpuid(vcpu);
+
+ vm_enable_cap(vm, KVM_CAP_X86_DISABLE_EXITS, KVM_X86_DISABLE_EXITS_HLT);
+ ent = vcpu_get_cpuid_entry(vcpu, KVM_CPUID_SIGNATURE);
+ ent->ebx = kvm_sig_old;
+ vcpu_set_cpuid(vcpu);
+
+ TEST_ASSERT(!vcpu_cpuid_has(vcpu, X86_FEATURE_KVM_PV_UNHALT),
+ "KVM_FEATURE_PV_UNHALT is set with KVM_CAP_X86_DISABLE_EXITS");
+
+ /* FIXME: actually test KVM_FEATURE_PV_UNHALT feature */
+
+ kvm_vm_free(vm);
+}
+
int main(void)
{
struct kvm_vcpu *vcpu;
enter_guest(vcpu);
kvm_vm_free(vm);
+
+ test_pv_unhalt();
}
static void guest_test_gp_counters(void)
{
+ uint8_t pmu_version = guest_get_pmu_version();
uint8_t nr_gp_counters = 0;
uint32_t base_msr;
- if (guest_get_pmu_version())
+ if (pmu_version)
nr_gp_counters = this_cpu_property(X86_PROPERTY_PMU_NR_GP_COUNTERS);
+ /*
+ * For v2+ PMUs, PERF_GLOBAL_CTRL's architectural post-RESET value is
+ * "Sets bits n-1:0 and clears the upper bits", where 'n' is the number
+ * of GP counters. If there are no GP counters, require KVM to leave
+ * PERF_GLOBAL_CTRL '0'. This edge case isn't covered by the SDM, but
+ * follow the spirit of the architecture and only globally enable GP
+ * counters, of which there are none.
+ */
+ if (pmu_version > 1) {
+ uint64_t global_ctrl = rdmsr(MSR_CORE_PERF_GLOBAL_CTRL);
+
+ if (nr_gp_counters)
+ GUEST_ASSERT_EQ(global_ctrl, GENMASK_ULL(nr_gp_counters - 1, 0));
+ else
+ GUEST_ASSERT_EQ(global_ctrl, 0);
+ }
+
if (this_cpu_has(X86_FEATURE_PDCM) &&
rdmsr(MSR_IA32_PERF_CAPABILITIES) & PMU_CAP_FW_WRITES)
base_msr = MSR_IA32_PMC0;
#define NESTED_TEST_MEM1 0xc0001000
#define NESTED_TEST_MEM2 0xc0002000
-static void l2_guest_code(void)
+static void l2_guest_code(u64 *a, u64 *b)
{
- *(volatile uint64_t *)NESTED_TEST_MEM1;
- *(volatile uint64_t *)NESTED_TEST_MEM1 = 1;
+ READ_ONCE(*a);
+ WRITE_ONCE(*a, 1);
GUEST_SYNC(true);
GUEST_SYNC(false);
- *(volatile uint64_t *)NESTED_TEST_MEM2 = 1;
+ WRITE_ONCE(*b, 1);
GUEST_SYNC(true);
- *(volatile uint64_t *)NESTED_TEST_MEM2 = 1;
+ WRITE_ONCE(*b, 1);
GUEST_SYNC(true);
GUEST_SYNC(false);
vmcall();
}
+static void l2_guest_code_ept_enabled(void)
+{
+ l2_guest_code((u64 *)NESTED_TEST_MEM1, (u64 *)NESTED_TEST_MEM2);
+}
+
+static void l2_guest_code_ept_disabled(void)
+{
+ /* Access the same L1 GPAs as l2_guest_code_ept_enabled() */
+ l2_guest_code((u64 *)GUEST_TEST_MEM, (u64 *)GUEST_TEST_MEM);
+}
+
void l1_guest_code(struct vmx_pages *vmx)
{
#define L2_GUEST_STACK_SIZE 64
unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE];
+ void *l2_rip;
GUEST_ASSERT(vmx->vmcs_gpa);
GUEST_ASSERT(prepare_for_vmx_operation(vmx));
GUEST_ASSERT(load_vmcs(vmx));
- prepare_vmcs(vmx, l2_guest_code,
- &l2_guest_stack[L2_GUEST_STACK_SIZE]);
+ if (vmx->eptp_gpa)
+ l2_rip = l2_guest_code_ept_enabled;
+ else
+ l2_rip = l2_guest_code_ept_disabled;
+
+ prepare_vmcs(vmx, l2_rip, &l2_guest_stack[L2_GUEST_STACK_SIZE]);
GUEST_SYNC(false);
GUEST_ASSERT(!vmlaunch());
GUEST_DONE();
}
-int main(int argc, char *argv[])
+static void test_vmx_dirty_log(bool enable_ept)
{
vm_vaddr_t vmx_pages_gva = 0;
struct vmx_pages *vmx;
struct ucall uc;
bool done = false;
- TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_VMX));
- TEST_REQUIRE(kvm_cpu_has_ept());
+ pr_info("Nested EPT: %s\n", enable_ept ? "enabled" : "disabled");
/* Create VM */
vm = vm_create_with_one_vcpu(&vcpu, l1_guest_code);
*
* Note that prepare_eptp should be called only L1's GPA map is done,
* meaning after the last call to virt_map.
+ *
+ * When EPT is disabled, the L2 guest code will still access the same L1
+ * GPAs as the EPT enabled case.
*/
- prepare_eptp(vmx, vm, 0);
- nested_map_memslot(vmx, vm, 0);
- nested_map(vmx, vm, NESTED_TEST_MEM1, GUEST_TEST_MEM, 4096);
- nested_map(vmx, vm, NESTED_TEST_MEM2, GUEST_TEST_MEM, 4096);
+ if (enable_ept) {
+ prepare_eptp(vmx, vm, 0);
+ nested_map_memslot(vmx, vm, 0);
+ nested_map(vmx, vm, NESTED_TEST_MEM1, GUEST_TEST_MEM, 4096);
+ nested_map(vmx, vm, NESTED_TEST_MEM2, GUEST_TEST_MEM, 4096);
+ }
bmap = bitmap_zalloc(TEST_MEM_PAGES);
host_test_mem = addr_gpa2hva(vm, GUEST_TEST_MEM);
}
}
}
+
+int main(int argc, char *argv[])
+{
+ TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_VMX));
+
+ test_vmx_dirty_log(/*enable_ept=*/false);
+
+ if (kvm_cpu_has_ept())
+ test_vmx_dirty_log(/*enable_ept=*/true);
+
+ return 0;
+}
ksft_print_header();
ksft_set_plan(nthreads);
- filed = open(file, O_RDWR|O_CREAT);
+ filed = open(file, O_RDWR|O_CREAT, 0664);
if (filed < 0)
ksft_exit_fail_msg("Unable to open %s: %s\n", file, strerror(errno));
#include <linux/mman.h>
#include <linux/prctl.h>
+#define _GNU_SOURCE
#include <stdio.h>
#include <stdlib.h>
#include <sys/auxv.h>
u64 shadow_pkey_reg;
int dprint_in_signal;
char dprint_in_signal_buffer[DPRINT_IN_SIGNAL_BUF_SIZE];
-char buf[256];
void cat_into_file(char *str, char *file)
{
shadow_pkey_reg = __read_pkey_reg();
}
-void restore_settings_atexit(void)
-{
- cat_into_file(buf, "/proc/sys/vm/nr_hugepages");
-}
-
-void save_settings(void)
-{
- int fd;
- int err;
-
- if (geteuid())
- return;
-
- fd = open("/proc/sys/vm/nr_hugepages", O_RDONLY);
- if (fd < 0) {
- fprintf(stderr, "error opening\n");
- perror("error: ");
- exit(__LINE__);
- }
-
- /* -1 to guarantee leaving the trailing \0 */
- err = read(fd, buf, sizeof(buf)-1);
- if (err < 0) {
- fprintf(stderr, "error reading\n");
- perror("error: ");
- exit(__LINE__);
- }
-
- atexit(restore_settings_atexit);
- close(fd);
-}
-
int main(void)
{
int nr_iterations = 22;
srand((unsigned int)time(NULL));
- save_settings();
setup_handlers();
printf("has pkeys: %d\n", pkeys_supported);
CATEGORY="ksm" run_test ./ksm_functional_tests
# protection_keys tests
+nr_hugepgs=$(cat /proc/sys/vm/nr_hugepages)
if [ -x ./protection_keys_32 ]
then
CATEGORY="pkey" run_test ./protection_keys_32
then
CATEGORY="pkey" run_test ./protection_keys_64
fi
+echo "$nr_hugepgs" > /proc/sys/vm/nr_hugepages
if [ -x ./soft-dirty ]
then
if (!map)
ksft_exit_fail_msg("anon mmap failed\n");
} else {
- test_fd = open(fname, O_RDWR | O_CREAT);
+ test_fd = open(fname, O_RDWR | O_CREAT, 0664);
if (test_fd < 0) {
ksft_test_result_skip("Test %s open() file failed\n", __func__);
return;
ksft_exit_fail_msg("Fail to create file-backed THP split testing file\n");
}
- fd = open(testfile, O_CREAT|O_WRONLY);
+ fd = open(testfile, O_CREAT|O_WRONLY, 0664);
if (fd == -1) {
ksft_perror("Cannot open testing file");
goto cleanup;
char **addr)
{
size_t i;
- int dummy;
+ int __attribute__((unused)) dummy = 0;
srand(time(NULL));
unsigned long long *count_verify;
uffd_test_ops_t *uffd_test_ops;
uffd_test_case_ops_t *uffd_test_case_ops;
+atomic_bool ready_for_fork;
static int uffd_mem_fd_create(off_t mem_size, bool hugetlb)
{
pollfd[1].fd = pipefd[cpu*2];
pollfd[1].events = POLLIN;
+ ready_for_fork = true;
+
for (;;) {
ret = poll(pollfd, 2, -1);
if (ret <= 0) {
#include <inttypes.h>
#include <stdint.h>
#include <sys/random.h>
+#include <stdatomic.h>
#include "../kselftest.h"
#include "vm_util.h"
extern bool test_uffdio_wp;
extern unsigned long long *count_verify;
extern volatile bool test_uffdio_copy_eexist;
+extern atomic_bool ready_for_fork;
extern uffd_test_ops_t anon_uffd_test_ops;
extern uffd_test_ops_t shmem_uffd_test_ops;
char c;
struct uffd_args args = { 0 };
+ ready_for_fork = false;
+
fcntl(uffd, F_SETFL, uffd_flags | O_NONBLOCK);
if (uffd_register(uffd, area_dst, nr_pages * page_size,
if (pthread_create(&uffd_mon, NULL, uffd_poll_thread, &args))
err("uffd_poll_thread create");
+ while (!ready_for_fork)
+ ; /* Wait for the poll_thread to start executing before forking */
+
pid = fork();
if (pid < 0)
err("fork");
char c;
struct uffd_args args = { 0 };
+ ready_for_fork = false;
+
fcntl(uffd, F_SETFL, uffd_flags | O_NONBLOCK);
if (uffd_register(uffd, area_dst, nr_pages * page_size,
true, wp, false))
if (pthread_create(&uffd_mon, NULL, uffd_poll_thread, &args))
err("uffd_poll_thread create");
+ while (!ready_for_fork)
+ ; /* Wait for the poll_thread to start executing before forking */
+
pid = fork();
if (pid < 0)
err("fork");
.uffd_fn = uffd_sigbus_wp_test,
.mem_targets = MEM_ALL,
.uffd_feature_required = UFFD_FEATURE_SIGBUS |
- UFFD_FEATURE_EVENT_FORK | UFFD_FEATURE_PAGEFAULT_FLAG_WP,
+ UFFD_FEATURE_EVENT_FORK | UFFD_FEATURE_PAGEFAULT_FLAG_WP |
+ UFFD_FEATURE_WP_HUGETLBFS_SHMEM,
},
{
.name = "events",
#include <stdbool.h>
#include <sys/mman.h>
#include <err.h>
-#include <string.h> /* ffsl() */
+#include <strings.h> /* ffsl() */
#include <unistd.h> /* _SC_PAGESIZE */
#define BIT_ULL(nr) (1ULL << (nr))
#include "../kselftest_harness.h"
-struct in6_addr in6addr_v4mapped_any = {
+static const __u32 in4addr_any = INADDR_ANY;
+static const __u32 in4addr_loopback = INADDR_LOOPBACK;
+static const struct in6_addr in6addr_v4mapped_any = {
.s6_addr = {
0, 0, 0, 0,
0, 0, 0, 0,
0, 0, 0, 0
}
};
-
-struct in6_addr in6addr_v4mapped_loopback = {
+static const struct in6_addr in6addr_v4mapped_loopback = {
.s6_addr = {
0, 0, 0, 0,
0, 0, 0, 0,
}
};
+#define NR_SOCKETS 8
+
FIXTURE(bind_wildcard)
{
- struct sockaddr_in addr4;
- struct sockaddr_in6 addr6;
+ int fd[NR_SOCKETS];
+ socklen_t addrlen[NR_SOCKETS];
+ union {
+ struct sockaddr addr;
+ struct sockaddr_in addr4;
+ struct sockaddr_in6 addr6;
+ } addr[NR_SOCKETS];
};
FIXTURE_VARIANT(bind_wildcard)
{
- const __u32 addr4_const;
- const struct in6_addr *addr6_const;
- int expected_errno;
+ sa_family_t family[2];
+ const void *addr[2];
+ bool ipv6_only[2];
+
+ /* 6 bind() calls below follow two bind() for the defined 2 addresses:
+ *
+ * 0.0.0.0
+ * 127.0.0.1
+ * ::
+ * ::1
+ * ::ffff:0.0.0.0
+ * ::ffff:127.0.0.1
+ */
+ int expected_errno[NR_SOCKETS];
+ int expected_reuse_errno[NR_SOCKETS];
+};
+
+/* (IPv4, IPv4) */
+FIXTURE_VARIANT_ADD(bind_wildcard, v4_any_v4_local)
+{
+ .family = {AF_INET, AF_INET},
+ .addr = {&in4addr_any, &in4addr_loopback},
+ .expected_errno = {0, EADDRINUSE,
+ EADDRINUSE, EADDRINUSE,
+ EADDRINUSE, 0,
+ EADDRINUSE, EADDRINUSE},
+ .expected_reuse_errno = {0, 0,
+ EADDRINUSE, EADDRINUSE,
+ EADDRINUSE, 0,
+ EADDRINUSE, EADDRINUSE},
+};
+
+FIXTURE_VARIANT_ADD(bind_wildcard, v4_local_v4_any)
+{
+ .family = {AF_INET, AF_INET},
+ .addr = {&in4addr_loopback, &in4addr_any},
+ .expected_errno = {0, EADDRINUSE,
+ EADDRINUSE, EADDRINUSE,
+ EADDRINUSE, 0,
+ EADDRINUSE, EADDRINUSE},
+ .expected_reuse_errno = {0, 0,
+ EADDRINUSE, EADDRINUSE,
+ EADDRINUSE, 0,
+ EADDRINUSE, EADDRINUSE},
};
+/* (IPv4, IPv6) */
FIXTURE_VARIANT_ADD(bind_wildcard, v4_any_v6_any)
{
- .addr4_const = INADDR_ANY,
- .addr6_const = &in6addr_any,
- .expected_errno = EADDRINUSE,
+ .family = {AF_INET, AF_INET6},
+ .addr = {&in4addr_any, &in6addr_any},
+ .expected_errno = {0, EADDRINUSE,
+ EADDRINUSE, EADDRINUSE,
+ EADDRINUSE, 0,
+ EADDRINUSE, EADDRINUSE},
+ .expected_reuse_errno = {0, 0,
+ EADDRINUSE, EADDRINUSE,
+ EADDRINUSE, EADDRINUSE,
+ EADDRINUSE, EADDRINUSE},
+};
+
+FIXTURE_VARIANT_ADD(bind_wildcard, v4_any_v6_any_only)
+{
+ .family = {AF_INET, AF_INET6},
+ .addr = {&in4addr_any, &in6addr_any},
+ .ipv6_only = {false, true},
+ .expected_errno = {0, 0,
+ EADDRINUSE, EADDRINUSE,
+ EADDRINUSE, EADDRINUSE,
+ EADDRINUSE, EADDRINUSE},
+ .expected_reuse_errno = {0, 0,
+ EADDRINUSE, EADDRINUSE,
+ EADDRINUSE, EADDRINUSE,
+ EADDRINUSE, EADDRINUSE},
};
FIXTURE_VARIANT_ADD(bind_wildcard, v4_any_v6_local)
{
- .addr4_const = INADDR_ANY,
- .addr6_const = &in6addr_loopback,
- .expected_errno = 0,
+ .family = {AF_INET, AF_INET6},
+ .addr = {&in4addr_any, &in6addr_loopback},
+ .expected_errno = {0, 0,
+ EADDRINUSE, EADDRINUSE,
+ EADDRINUSE, EADDRINUSE,
+ EADDRINUSE, EADDRINUSE},
+ .expected_reuse_errno = {0, 0,
+ EADDRINUSE, EADDRINUSE,
+ EADDRINUSE, EADDRINUSE,
+ EADDRINUSE, EADDRINUSE},
};
FIXTURE_VARIANT_ADD(bind_wildcard, v4_any_v6_v4mapped_any)
{
- .addr4_const = INADDR_ANY,
- .addr6_const = &in6addr_v4mapped_any,
- .expected_errno = EADDRINUSE,
+ .family = {AF_INET, AF_INET6},
+ .addr = {&in4addr_any, &in6addr_v4mapped_any},
+ .expected_errno = {0, EADDRINUSE,
+ EADDRINUSE, EADDRINUSE,
+ EADDRINUSE, 0,
+ EADDRINUSE, EADDRINUSE},
+ .expected_reuse_errno = {0, 0,
+ EADDRINUSE, EADDRINUSE,
+ EADDRINUSE, 0,
+ EADDRINUSE, EADDRINUSE},
};
FIXTURE_VARIANT_ADD(bind_wildcard, v4_any_v6_v4mapped_local)
{
- .addr4_const = INADDR_ANY,
- .addr6_const = &in6addr_v4mapped_loopback,
- .expected_errno = EADDRINUSE,
+ .family = {AF_INET, AF_INET6},
+ .addr = {&in4addr_any, &in6addr_v4mapped_loopback},
+ .expected_errno = {0, EADDRINUSE,
+ EADDRINUSE, EADDRINUSE,
+ EADDRINUSE, 0,
+ EADDRINUSE, EADDRINUSE},
+ .expected_reuse_errno = {0, 0,
+ EADDRINUSE, EADDRINUSE,
+ EADDRINUSE, 0,
+ EADDRINUSE, EADDRINUSE},
};
FIXTURE_VARIANT_ADD(bind_wildcard, v4_local_v6_any)
{
- .addr4_const = INADDR_LOOPBACK,
- .addr6_const = &in6addr_any,
- .expected_errno = EADDRINUSE,
+ .family = {AF_INET, AF_INET6},
+ .addr = {&in4addr_loopback, &in6addr_any},
+ .expected_errno = {0, EADDRINUSE,
+ EADDRINUSE, EADDRINUSE,
+ EADDRINUSE, 0,
+ EADDRINUSE, EADDRINUSE},
+ .expected_reuse_errno = {0, 0,
+ EADDRINUSE, EADDRINUSE,
+ EADDRINUSE, EADDRINUSE,
+ EADDRINUSE, EADDRINUSE},
+};
+
+FIXTURE_VARIANT_ADD(bind_wildcard, v4_local_v6_any_only)
+{
+ .family = {AF_INET, AF_INET6},
+ .addr = {&in4addr_loopback, &in6addr_any},
+ .ipv6_only = {false, true},
+ .expected_errno = {0, 0,
+ EADDRINUSE, EADDRINUSE,
+ EADDRINUSE, EADDRINUSE,
+ EADDRINUSE, EADDRINUSE},
+ .expected_reuse_errno = {0, 0,
+ EADDRINUSE, EADDRINUSE,
+ EADDRINUSE, EADDRINUSE,
+ EADDRINUSE, EADDRINUSE},
};
FIXTURE_VARIANT_ADD(bind_wildcard, v4_local_v6_local)
{
- .addr4_const = INADDR_LOOPBACK,
- .addr6_const = &in6addr_loopback,
- .expected_errno = 0,
+ .family = {AF_INET, AF_INET6},
+ .addr = {&in4addr_loopback, &in6addr_loopback},
+ .expected_errno = {0, 0,
+ EADDRINUSE, EADDRINUSE,
+ EADDRINUSE, EADDRINUSE,
+ EADDRINUSE, EADDRINUSE},
+ .expected_reuse_errno = {0, 0,
+ EADDRINUSE, EADDRINUSE,
+ EADDRINUSE, EADDRINUSE,
+ EADDRINUSE, EADDRINUSE},
};
FIXTURE_VARIANT_ADD(bind_wildcard, v4_local_v6_v4mapped_any)
{
- .addr4_const = INADDR_LOOPBACK,
- .addr6_const = &in6addr_v4mapped_any,
- .expected_errno = EADDRINUSE,
+ .family = {AF_INET, AF_INET6},
+ .addr = {&in4addr_loopback, &in6addr_v4mapped_any},
+ .expected_errno = {0, EADDRINUSE,
+ EADDRINUSE, EADDRINUSE,
+ EADDRINUSE, 0,
+ EADDRINUSE, EADDRINUSE},
+ .expected_reuse_errno = {0, 0,
+ EADDRINUSE, EADDRINUSE,
+ EADDRINUSE, 0,
+ EADDRINUSE, EADDRINUSE},
};
FIXTURE_VARIANT_ADD(bind_wildcard, v4_local_v6_v4mapped_local)
{
- .addr4_const = INADDR_LOOPBACK,
- .addr6_const = &in6addr_v4mapped_loopback,
- .expected_errno = EADDRINUSE,
+ .family = {AF_INET, AF_INET6},
+ .addr = {&in4addr_loopback, &in6addr_v4mapped_loopback},
+ .expected_errno = {0, EADDRINUSE,
+ EADDRINUSE, EADDRINUSE,
+ EADDRINUSE, 0,
+ EADDRINUSE, EADDRINUSE},
+ .expected_reuse_errno = {0, 0,
+ EADDRINUSE, EADDRINUSE,
+ EADDRINUSE, 0,
+ EADDRINUSE, EADDRINUSE},
+};
+
+/* (IPv6, IPv4) */
+FIXTURE_VARIANT_ADD(bind_wildcard, v6_any_v4_any)
+{
+ .family = {AF_INET6, AF_INET},
+ .addr = {&in6addr_any, &in4addr_any},
+ .expected_errno = {0, EADDRINUSE,
+ EADDRINUSE, EADDRINUSE,
+ EADDRINUSE, EADDRINUSE,
+ EADDRINUSE, EADDRINUSE},
+ .expected_reuse_errno = {0, 0,
+ EADDRINUSE, EADDRINUSE,
+ EADDRINUSE, EADDRINUSE,
+ EADDRINUSE, EADDRINUSE},
};
+FIXTURE_VARIANT_ADD(bind_wildcard, v6_any_only_v4_any)
+{
+ .family = {AF_INET6, AF_INET},
+ .addr = {&in6addr_any, &in4addr_any},
+ .ipv6_only = {true, false},
+ .expected_errno = {0, 0,
+ EADDRINUSE, EADDRINUSE,
+ EADDRINUSE, EADDRINUSE,
+ EADDRINUSE, EADDRINUSE},
+ .expected_reuse_errno = {0, 0,
+ EADDRINUSE, EADDRINUSE,
+ EADDRINUSE, EADDRINUSE,
+ EADDRINUSE, EADDRINUSE},
+};
+
+FIXTURE_VARIANT_ADD(bind_wildcard, v6_any_v4_local)
+{
+ .family = {AF_INET6, AF_INET},
+ .addr = {&in6addr_any, &in4addr_loopback},
+ .expected_errno = {0, EADDRINUSE,
+ EADDRINUSE, EADDRINUSE,
+ EADDRINUSE, EADDRINUSE,
+ EADDRINUSE, EADDRINUSE},
+ .expected_reuse_errno = {0, 0,
+ EADDRINUSE, EADDRINUSE,
+ EADDRINUSE, EADDRINUSE,
+ EADDRINUSE, EADDRINUSE},
+};
+
+FIXTURE_VARIANT_ADD(bind_wildcard, v6_any_only_v4_local)
+{
+ .family = {AF_INET6, AF_INET},
+ .addr = {&in6addr_any, &in4addr_loopback},
+ .ipv6_only = {true, false},
+ .expected_errno = {0, 0,
+ EADDRINUSE, EADDRINUSE,
+ EADDRINUSE, EADDRINUSE,
+ EADDRINUSE, EADDRINUSE},
+ .expected_reuse_errno = {0, 0,
+ EADDRINUSE, EADDRINUSE,
+ EADDRINUSE, EADDRINUSE,
+ EADDRINUSE, EADDRINUSE},
+};
+
+FIXTURE_VARIANT_ADD(bind_wildcard, v6_local_v4_any)
+{
+ .family = {AF_INET6, AF_INET},
+ .addr = {&in6addr_loopback, &in4addr_any},
+ .expected_errno = {0, 0,
+ EADDRINUSE, EADDRINUSE,
+ EADDRINUSE, EADDRINUSE,
+ EADDRINUSE, EADDRINUSE},
+ .expected_reuse_errno = {0, 0,
+ EADDRINUSE, EADDRINUSE,
+ EADDRINUSE, EADDRINUSE,
+ EADDRINUSE, EADDRINUSE},
+};
+
+FIXTURE_VARIANT_ADD(bind_wildcard, v6_local_v4_local)
+{
+ .family = {AF_INET6, AF_INET},
+ .addr = {&in6addr_loopback, &in4addr_loopback},
+ .expected_errno = {0, 0,
+ EADDRINUSE, EADDRINUSE,
+ EADDRINUSE, EADDRINUSE,
+ EADDRINUSE, EADDRINUSE},
+ .expected_reuse_errno = {0, 0,
+ EADDRINUSE, EADDRINUSE,
+ EADDRINUSE, EADDRINUSE,
+ EADDRINUSE, EADDRINUSE},
+};
+
+FIXTURE_VARIANT_ADD(bind_wildcard, v6_v4mapped_any_v4_any)
+{
+ .family = {AF_INET6, AF_INET},
+ .addr = {&in6addr_v4mapped_any, &in4addr_any},
+ .expected_errno = {0, EADDRINUSE,
+ EADDRINUSE, EADDRINUSE,
+ EADDRINUSE, 0,
+ EADDRINUSE, EADDRINUSE},
+ .expected_reuse_errno = {0, 0,
+ EADDRINUSE, EADDRINUSE,
+ EADDRINUSE, 0,
+ EADDRINUSE, EADDRINUSE},
+};
+
+FIXTURE_VARIANT_ADD(bind_wildcard, v6_v4mapped_any_v4_local)
+{
+ .family = {AF_INET6, AF_INET},
+ .addr = {&in6addr_v4mapped_any, &in4addr_loopback},
+ .expected_errno = {0, EADDRINUSE,
+ EADDRINUSE, EADDRINUSE,
+ EADDRINUSE, 0,
+ EADDRINUSE, EADDRINUSE},
+ .expected_reuse_errno = {0, 0,
+ EADDRINUSE, EADDRINUSE,
+ EADDRINUSE, 0,
+ EADDRINUSE, EADDRINUSE},
+};
+
+FIXTURE_VARIANT_ADD(bind_wildcard, v6_v4mapped_local_v4_any)
+{
+ .family = {AF_INET6, AF_INET},
+ .addr = {&in6addr_v4mapped_loopback, &in4addr_any},
+ .expected_errno = {0, EADDRINUSE,
+ EADDRINUSE, EADDRINUSE,
+ EADDRINUSE, 0,
+ EADDRINUSE, EADDRINUSE},
+ .expected_reuse_errno = {0, 0,
+ EADDRINUSE, EADDRINUSE,
+ EADDRINUSE, 0,
+ EADDRINUSE, EADDRINUSE},
+};
+
+FIXTURE_VARIANT_ADD(bind_wildcard, v6_v4mapped_local_v4_local)
+{
+ .family = {AF_INET6, AF_INET},
+ .addr = {&in6addr_v4mapped_loopback, &in4addr_loopback},
+ .expected_errno = {0, EADDRINUSE,
+ EADDRINUSE, EADDRINUSE,
+ EADDRINUSE, 0,
+ EADDRINUSE, EADDRINUSE},
+ .expected_reuse_errno = {0, 0,
+ EADDRINUSE, EADDRINUSE,
+ EADDRINUSE, 0,
+ EADDRINUSE, EADDRINUSE},
+};
+
+/* (IPv6, IPv6) */
+FIXTURE_VARIANT_ADD(bind_wildcard, v6_any_v6_any)
+{
+ .family = {AF_INET6, AF_INET6},
+ .addr = {&in6addr_any, &in6addr_any},
+ .expected_errno = {0, EADDRINUSE,
+ EADDRINUSE, EADDRINUSE,
+ EADDRINUSE, EADDRINUSE,
+ EADDRINUSE, EADDRINUSE},
+ .expected_reuse_errno = {0, 0,
+ EADDRINUSE, EADDRINUSE,
+ EADDRINUSE, EADDRINUSE,
+ EADDRINUSE, EADDRINUSE},
+};
+
+FIXTURE_VARIANT_ADD(bind_wildcard, v6_any_only_v6_any)
+{
+ .family = {AF_INET6, AF_INET6},
+ .addr = {&in6addr_any, &in6addr_any},
+ .ipv6_only = {true, false},
+ .expected_errno = {0, EADDRINUSE,
+ 0, EADDRINUSE,
+ EADDRINUSE, EADDRINUSE,
+ EADDRINUSE, EADDRINUSE},
+ .expected_reuse_errno = {0, 0,
+ EADDRINUSE, EADDRINUSE,
+ EADDRINUSE, EADDRINUSE,
+ EADDRINUSE, EADDRINUSE},
+};
+
+FIXTURE_VARIANT_ADD(bind_wildcard, v6_any_v6_any_only)
+{
+ .family = {AF_INET6, AF_INET6},
+ .addr = {&in6addr_any, &in6addr_any},
+ .ipv6_only = {false, true},
+ .expected_errno = {0, EADDRINUSE,
+ EADDRINUSE, EADDRINUSE,
+ EADDRINUSE, EADDRINUSE,
+ EADDRINUSE, EADDRINUSE},
+ .expected_reuse_errno = {0, 0,
+ EADDRINUSE, EADDRINUSE,
+ EADDRINUSE, EADDRINUSE,
+ EADDRINUSE, EADDRINUSE},
+};
+
+FIXTURE_VARIANT_ADD(bind_wildcard, v6_any_only_v6_any_only)
+{
+ .family = {AF_INET6, AF_INET6},
+ .addr = {&in6addr_any, &in6addr_any},
+ .ipv6_only = {true, true},
+ .expected_errno = {0, EADDRINUSE,
+ 0, EADDRINUSE,
+ EADDRINUSE, EADDRINUSE,
+ EADDRINUSE, EADDRINUSE},
+ .expected_reuse_errno = {0, 0,
+ 0, EADDRINUSE,
+ EADDRINUSE, EADDRINUSE,
+ EADDRINUSE, EADDRINUSE},
+};
+
+FIXTURE_VARIANT_ADD(bind_wildcard, v6_any_v6_local)
+{
+ .family = {AF_INET6, AF_INET6},
+ .addr = {&in6addr_any, &in6addr_loopback},
+ .expected_errno = {0, EADDRINUSE,
+ EADDRINUSE, EADDRINUSE,
+ EADDRINUSE, EADDRINUSE,
+ EADDRINUSE, EADDRINUSE},
+ .expected_reuse_errno = {0, 0,
+ EADDRINUSE, EADDRINUSE,
+ EADDRINUSE, EADDRINUSE,
+ EADDRINUSE, EADDRINUSE},
+};
+
+FIXTURE_VARIANT_ADD(bind_wildcard, v6_any_only_v6_local)
+{
+ .family = {AF_INET6, AF_INET6},
+ .addr = {&in6addr_any, &in6addr_loopback},
+ .ipv6_only = {true, false},
+ .expected_errno = {0, EADDRINUSE,
+ 0, EADDRINUSE,
+ EADDRINUSE, EADDRINUSE,
+ EADDRINUSE, EADDRINUSE},
+ .expected_reuse_errno = {0, 0,
+ 0, EADDRINUSE,
+ EADDRINUSE, EADDRINUSE,
+ EADDRINUSE, EADDRINUSE},
+};
+
+FIXTURE_VARIANT_ADD(bind_wildcard, v6_any_v6_v4mapped_any)
+{
+ .family = {AF_INET6, AF_INET6},
+ .addr = {&in6addr_any, &in6addr_v4mapped_any},
+ .expected_errno = {0, EADDRINUSE,
+ EADDRINUSE, EADDRINUSE,
+ EADDRINUSE, EADDRINUSE,
+ EADDRINUSE, EADDRINUSE},
+ .expected_reuse_errno = {0, 0,
+ EADDRINUSE, EADDRINUSE,
+ EADDRINUSE, EADDRINUSE,
+ EADDRINUSE, EADDRINUSE},
+};
+
+FIXTURE_VARIANT_ADD(bind_wildcard, v6_any_only_v6_v4mapped_any)
+{
+ .family = {AF_INET6, AF_INET6},
+ .addr = {&in6addr_any, &in6addr_v4mapped_any},
+ .ipv6_only = {true, false},
+ .expected_errno = {0, 0,
+ EADDRINUSE, EADDRINUSE,
+ EADDRINUSE, EADDRINUSE,
+ EADDRINUSE, EADDRINUSE},
+ .expected_reuse_errno = {0, 0,
+ EADDRINUSE, EADDRINUSE,
+ EADDRINUSE, EADDRINUSE,
+ EADDRINUSE, EADDRINUSE},
+};
+
+FIXTURE_VARIANT_ADD(bind_wildcard, v6_any_v6_v4mapped_local)
+{
+ .family = {AF_INET6, AF_INET6},
+ .addr = {&in6addr_any, &in6addr_v4mapped_loopback},
+ .expected_errno = {0, EADDRINUSE,
+ EADDRINUSE, EADDRINUSE,
+ EADDRINUSE, EADDRINUSE,
+ EADDRINUSE, EADDRINUSE},
+ .expected_reuse_errno = {0, 0,
+ EADDRINUSE, EADDRINUSE,
+ EADDRINUSE, EADDRINUSE,
+ EADDRINUSE, EADDRINUSE},
+};
+
+FIXTURE_VARIANT_ADD(bind_wildcard, v6_any_only_v6_v4mapped_local)
+{
+ .family = {AF_INET6, AF_INET6},
+ .addr = {&in6addr_any, &in6addr_v4mapped_loopback},
+ .ipv6_only = {true, false},
+ .expected_errno = {0, 0,
+ EADDRINUSE, EADDRINUSE,
+ EADDRINUSE, EADDRINUSE,
+ EADDRINUSE, EADDRINUSE},
+ .expected_reuse_errno = {0, 0,
+ EADDRINUSE, EADDRINUSE,
+ EADDRINUSE, EADDRINUSE,
+ EADDRINUSE, EADDRINUSE},
+};
+
+FIXTURE_VARIANT_ADD(bind_wildcard, v6_local_v6_any)
+{
+ .family = {AF_INET6, AF_INET6},
+ .addr = {&in6addr_loopback, &in6addr_any},
+ .expected_errno = {0, EADDRINUSE,
+ 0, EADDRINUSE,
+ EADDRINUSE, EADDRINUSE,
+ EADDRINUSE, EADDRINUSE},
+ .expected_reuse_errno = {0, 0,
+ EADDRINUSE, EADDRINUSE,
+ EADDRINUSE, EADDRINUSE,
+ EADDRINUSE, EADDRINUSE},
+};
+
+FIXTURE_VARIANT_ADD(bind_wildcard, v6_local_v6_any_only)
+{
+ .family = {AF_INET6, AF_INET6},
+ .addr = {&in6addr_loopback, &in6addr_any},
+ .ipv6_only = {false, true},
+ .expected_errno = {0, EADDRINUSE,
+ 0, EADDRINUSE,
+ EADDRINUSE, EADDRINUSE,
+ EADDRINUSE, EADDRINUSE},
+ .expected_reuse_errno = {0, 0,
+ 0, EADDRINUSE,
+ EADDRINUSE, EADDRINUSE,
+ EADDRINUSE, EADDRINUSE},
+};
+
+FIXTURE_VARIANT_ADD(bind_wildcard, v6_local_v6_v4mapped_any)
+{
+ .family = {AF_INET6, AF_INET6},
+ .addr = {&in6addr_loopback, &in6addr_v4mapped_any},
+ .expected_errno = {0, 0,
+ EADDRINUSE, EADDRINUSE,
+ EADDRINUSE, EADDRINUSE,
+ EADDRINUSE, EADDRINUSE},
+ .expected_reuse_errno = {0, 0,
+ EADDRINUSE, EADDRINUSE,
+ EADDRINUSE, EADDRINUSE,
+ EADDRINUSE, EADDRINUSE},
+};
+
+FIXTURE_VARIANT_ADD(bind_wildcard, v6_local_v6_v4mapped_local)
+{
+ .family = {AF_INET6, AF_INET6},
+ .addr = {&in6addr_loopback, &in6addr_v4mapped_loopback},
+ .expected_errno = {0, 0,
+ EADDRINUSE, EADDRINUSE,
+ EADDRINUSE, EADDRINUSE,
+ EADDRINUSE, EADDRINUSE},
+ .expected_reuse_errno = {0, 0,
+ EADDRINUSE, EADDRINUSE,
+ EADDRINUSE, EADDRINUSE,
+ EADDRINUSE, EADDRINUSE},
+};
+
+FIXTURE_VARIANT_ADD(bind_wildcard, v6_v4mapped_any_v6_any)
+{
+ .family = {AF_INET6, AF_INET6},
+ .addr = {&in6addr_v4mapped_any, &in6addr_any},
+ .expected_errno = {0, EADDRINUSE,
+ EADDRINUSE, EADDRINUSE,
+ EADDRINUSE, 0,
+ EADDRINUSE, EADDRINUSE},
+ .expected_reuse_errno = {0, 0,
+ EADDRINUSE, EADDRINUSE,
+ EADDRINUSE, EADDRINUSE,
+ EADDRINUSE, EADDRINUSE},
+};
+
+FIXTURE_VARIANT_ADD(bind_wildcard, v6_v4mapped_any_v6_any_only)
+{
+ .family = {AF_INET6, AF_INET6},
+ .addr = {&in6addr_v4mapped_any, &in6addr_any},
+ .ipv6_only = {false, true},
+ .expected_errno = {0, 0,
+ EADDRINUSE, EADDRINUSE,
+ EADDRINUSE, EADDRINUSE,
+ EADDRINUSE, EADDRINUSE},
+ .expected_reuse_errno = {0, 0,
+ EADDRINUSE, EADDRINUSE,
+ EADDRINUSE, EADDRINUSE,
+ EADDRINUSE, EADDRINUSE},
+};
+
+FIXTURE_VARIANT_ADD(bind_wildcard, v6_v4mapped_any_v6_local)
+{
+ .family = {AF_INET6, AF_INET6},
+ .addr = {&in6addr_v4mapped_any, &in6addr_loopback},
+ .expected_errno = {0, 0,
+ EADDRINUSE, EADDRINUSE,
+ EADDRINUSE, EADDRINUSE,
+ EADDRINUSE, EADDRINUSE},
+ .expected_reuse_errno = {0, 0,
+ EADDRINUSE, EADDRINUSE,
+ EADDRINUSE, EADDRINUSE,
+ EADDRINUSE, EADDRINUSE},
+};
+
+FIXTURE_VARIANT_ADD(bind_wildcard, v6_v4mapped_any_v6_v4mapped_local)
+{
+ .family = {AF_INET6, AF_INET6},
+ .addr = {&in6addr_v4mapped_any, &in6addr_v4mapped_loopback},
+ .expected_errno = {0, EADDRINUSE,
+ EADDRINUSE, EADDRINUSE,
+ EADDRINUSE, 0,
+ EADDRINUSE, EADDRINUSE},
+ .expected_reuse_errno = {0, 0,
+ EADDRINUSE, EADDRINUSE,
+ EADDRINUSE, 0,
+ EADDRINUSE, EADDRINUSE},
+};
+
+FIXTURE_VARIANT_ADD(bind_wildcard, v6_v4mapped_loopback_v6_any)
+{
+ .family = {AF_INET6, AF_INET6},
+ .addr = {&in6addr_v4mapped_loopback, &in6addr_any},
+ .expected_errno = {0, EADDRINUSE,
+ EADDRINUSE, EADDRINUSE,
+ EADDRINUSE, 0,
+ EADDRINUSE, EADDRINUSE},
+ .expected_reuse_errno = {0, 0,
+ EADDRINUSE, EADDRINUSE,
+ EADDRINUSE, EADDRINUSE,
+ EADDRINUSE, EADDRINUSE},
+};
+
+FIXTURE_VARIANT_ADD(bind_wildcard, v6_v4mapped_loopback_v6_any_only)
+{
+ .family = {AF_INET6, AF_INET6},
+ .addr = {&in6addr_v4mapped_loopback, &in6addr_any},
+ .ipv6_only = {false, true},
+ .expected_errno = {0, 0,
+ EADDRINUSE, EADDRINUSE,
+ EADDRINUSE, EADDRINUSE,
+ EADDRINUSE, EADDRINUSE},
+ .expected_reuse_errno = {0, 0,
+ EADDRINUSE, EADDRINUSE,
+ EADDRINUSE, EADDRINUSE,
+ EADDRINUSE, EADDRINUSE},
+};
+
+FIXTURE_VARIANT_ADD(bind_wildcard, v6_v4mapped_loopback_v6_local)
+{
+ .family = {AF_INET6, AF_INET6},
+ .addr = {&in6addr_v4mapped_loopback, &in6addr_loopback},
+ .expected_errno = {0, 0,
+ EADDRINUSE, EADDRINUSE,
+ EADDRINUSE, EADDRINUSE,
+ EADDRINUSE, EADDRINUSE},
+ .expected_reuse_errno = {0, 0,
+ EADDRINUSE, EADDRINUSE,
+ EADDRINUSE, EADDRINUSE,
+ EADDRINUSE, EADDRINUSE},
+};
+
+FIXTURE_VARIANT_ADD(bind_wildcard, v6_v4mapped_loopback_v6_v4mapped_any)
+{
+ .family = {AF_INET6, AF_INET6},
+ .addr = {&in6addr_v4mapped_loopback, &in6addr_v4mapped_any},
+ .expected_errno = {0, EADDRINUSE,
+ EADDRINUSE, EADDRINUSE,
+ EADDRINUSE, 0,
+ EADDRINUSE, EADDRINUSE},
+ .expected_reuse_errno = {0, 0,
+ EADDRINUSE, EADDRINUSE,
+ EADDRINUSE, 0,
+ EADDRINUSE, EADDRINUSE},
+};
+
+static void setup_addr(FIXTURE_DATA(bind_wildcard) *self, int i,
+ int family, const void *addr_const)
+{
+ if (family == AF_INET) {
+ struct sockaddr_in *addr4 = &self->addr[i].addr4;
+ const __u32 *addr4_const = addr_const;
+
+ addr4->sin_family = AF_INET;
+ addr4->sin_port = htons(0);
+ addr4->sin_addr.s_addr = htonl(*addr4_const);
+
+ self->addrlen[i] = sizeof(struct sockaddr_in);
+ } else {
+ struct sockaddr_in6 *addr6 = &self->addr[i].addr6;
+ const struct in6_addr *addr6_const = addr_const;
+
+ addr6->sin6_family = AF_INET6;
+ addr6->sin6_port = htons(0);
+ addr6->sin6_addr = *addr6_const;
+
+ self->addrlen[i] = sizeof(struct sockaddr_in6);
+ }
+}
+
FIXTURE_SETUP(bind_wildcard)
{
- self->addr4.sin_family = AF_INET;
- self->addr4.sin_port = htons(0);
- self->addr4.sin_addr.s_addr = htonl(variant->addr4_const);
+ setup_addr(self, 0, variant->family[0], variant->addr[0]);
+ setup_addr(self, 1, variant->family[1], variant->addr[1]);
+
+ setup_addr(self, 2, AF_INET, &in4addr_any);
+ setup_addr(self, 3, AF_INET, &in4addr_loopback);
- self->addr6.sin6_family = AF_INET6;
- self->addr6.sin6_port = htons(0);
- self->addr6.sin6_addr = *variant->addr6_const;
+ setup_addr(self, 4, AF_INET6, &in6addr_any);
+ setup_addr(self, 5, AF_INET6, &in6addr_loopback);
+ setup_addr(self, 6, AF_INET6, &in6addr_v4mapped_any);
+ setup_addr(self, 7, AF_INET6, &in6addr_v4mapped_loopback);
}
FIXTURE_TEARDOWN(bind_wildcard)
{
+ int i;
+
+ for (i = 0; i < NR_SOCKETS; i++)
+ close(self->fd[i]);
}
-void bind_sockets(struct __test_metadata *_metadata,
- FIXTURE_DATA(bind_wildcard) *self,
- int expected_errno,
- struct sockaddr *addr1, socklen_t addrlen1,
- struct sockaddr *addr2, socklen_t addrlen2)
+void bind_socket(struct __test_metadata *_metadata,
+ FIXTURE_DATA(bind_wildcard) *self,
+ const FIXTURE_VARIANT(bind_wildcard) *variant,
+ int i, int reuse)
{
- int fd[2];
int ret;
- fd[0] = socket(addr1->sa_family, SOCK_STREAM, 0);
- ASSERT_GT(fd[0], 0);
+ self->fd[i] = socket(self->addr[i].addr.sa_family, SOCK_STREAM, 0);
+ ASSERT_GT(self->fd[i], 0);
- ret = bind(fd[0], addr1, addrlen1);
- ASSERT_EQ(ret, 0);
+ if (i < 2 && variant->ipv6_only[i]) {
+ ret = setsockopt(self->fd[i], SOL_IPV6, IPV6_V6ONLY, &(int){1}, sizeof(int));
+ ASSERT_EQ(ret, 0);
+ }
- ret = getsockname(fd[0], addr1, &addrlen1);
- ASSERT_EQ(ret, 0);
+ if (i < 2 && reuse) {
+ ret = setsockopt(self->fd[i], SOL_SOCKET, reuse, &(int){1}, sizeof(int));
+ ASSERT_EQ(ret, 0);
+ }
- ((struct sockaddr_in *)addr2)->sin_port = ((struct sockaddr_in *)addr1)->sin_port;
+ self->addr[i].addr4.sin_port = self->addr[0].addr4.sin_port;
- fd[1] = socket(addr2->sa_family, SOCK_STREAM, 0);
- ASSERT_GT(fd[1], 0);
+ ret = bind(self->fd[i], &self->addr[i].addr, self->addrlen[i]);
- ret = bind(fd[1], addr2, addrlen2);
- if (expected_errno) {
- ASSERT_EQ(ret, -1);
- ASSERT_EQ(errno, expected_errno);
+ if (reuse) {
+ if (variant->expected_reuse_errno[i]) {
+ ASSERT_EQ(ret, -1);
+ ASSERT_EQ(errno, variant->expected_reuse_errno[i]);
+ } else {
+ ASSERT_EQ(ret, 0);
+ }
} else {
+ if (variant->expected_errno[i]) {
+ ASSERT_EQ(ret, -1);
+ ASSERT_EQ(errno, variant->expected_errno[i]);
+ } else {
+ ASSERT_EQ(ret, 0);
+ }
+ }
+
+ if (i == 0) {
+ ret = getsockname(self->fd[0], &self->addr[0].addr, &self->addrlen[0]);
ASSERT_EQ(ret, 0);
}
+}
- close(fd[1]);
- close(fd[0]);
+TEST_F(bind_wildcard, plain)
+{
+ int i;
+
+ for (i = 0; i < NR_SOCKETS; i++)
+ bind_socket(_metadata, self, variant, i, 0);
}
-TEST_F(bind_wildcard, v4_v6)
+TEST_F(bind_wildcard, reuseaddr)
{
- bind_sockets(_metadata, self, variant->expected_errno,
- (struct sockaddr *)&self->addr4, sizeof(self->addr4),
- (struct sockaddr *)&self->addr6, sizeof(self->addr6));
+ int i;
+
+ for (i = 0; i < NR_SOCKETS; i++)
+ bind_socket(_metadata, self, variant, i, SO_REUSEADDR);
}
-TEST_F(bind_wildcard, v6_v4)
+TEST_F(bind_wildcard, reuseport)
{
- bind_sockets(_metadata, self, variant->expected_errno,
- (struct sockaddr *)&self->addr6, sizeof(self->addr6),
- (struct sockaddr *)&self->addr4, sizeof(self->addr4));
+ int i;
+
+ for (i = 0; i < NR_SOCKETS; i++)
+ bind_socket(_metadata, self, variant, i, SO_REUSEPORT);
}
TEST_HARNESS_MAIN
local stat_cookierx_last
local stat_csum_err_s
local stat_csum_err_c
+ local stat_tcpfb_last_l
stat_synrx_last_l=$(mptcp_lib_get_counter "${listener_ns}" "MPTcpExtMPCapableSYNRX")
stat_ackrx_last_l=$(mptcp_lib_get_counter "${listener_ns}" "MPTcpExtMPCapableACKRX")
stat_cookietx_last=$(mptcp_lib_get_counter "${listener_ns}" "TcpExtSyncookiesSent")
stat_cookierx_last=$(mptcp_lib_get_counter "${listener_ns}" "TcpExtSyncookiesRecv")
stat_csum_err_s=$(mptcp_lib_get_counter "${listener_ns}" "MPTcpExtDataCsumErr")
stat_csum_err_c=$(mptcp_lib_get_counter "${connector_ns}" "MPTcpExtDataCsumErr")
+ stat_tcpfb_last_l=$(mptcp_lib_get_counter "${listener_ns}" "MPTcpExtMPCapableFallbackACK")
timeout ${timeout_test} \
ip netns exec ${listener_ns} \
local stat_cookietx_now
local stat_cookierx_now
local stat_ooo_now
+ local stat_tcpfb_now_l
stat_synrx_now_l=$(mptcp_lib_get_counter "${listener_ns}" "MPTcpExtMPCapableSYNRX")
stat_ackrx_now_l=$(mptcp_lib_get_counter "${listener_ns}" "MPTcpExtMPCapableACKRX")
stat_cookietx_now=$(mptcp_lib_get_counter "${listener_ns}" "TcpExtSyncookiesSent")
stat_cookierx_now=$(mptcp_lib_get_counter "${listener_ns}" "TcpExtSyncookiesRecv")
stat_ooo_now=$(mptcp_lib_get_counter "${listener_ns}" "TcpExtTCPOFOQueue")
+ stat_tcpfb_now_l=$(mptcp_lib_get_counter "${listener_ns}" "MPTcpExtMPCapableFallbackACK")
expect_synrx=$((stat_synrx_last_l))
expect_ackrx=$((stat_ackrx_last_l))
fi
fi
+ if [ ${stat_ooo_now} -eq 0 ] && [ ${stat_tcpfb_last_l} -ne ${stat_tcpfb_now_l} ]; then
+ mptcp_lib_pr_fail "unexpected fallback to TCP"
+ rets=1
+ fi
+
if [ $cookies -eq 2 ];then
if [ $stat_cookietx_last -ge $stat_cookietx_now ] ;then
extra+=" WARN: CookieSent: did not advance"
[ -n "$_flags" ]; flags="flags $_flags"
shift
elif [ $1 = "dev" ]; then
- [ -n "$2" ]; dev="dev $1"
+ [ -n "$2" ]; dev="dev $2"
shift
elif [ $1 = "id" ]; then
_id=$2
local tests_pid=$!
wait_mpj $ns2
+ pm_nl_check_endpoint "creation" \
+ $ns2 10.0.2.2 id 2 flags subflow dev ns2eth2
chk_subflow_nr "before delete" 2
chk_mptcp_info subflows 1 subflows 1
fd1 = open_port(0, 1);
if (fd1 >= 0)
error(1, 0, "Was allowed to create an ipv4 reuseport on an already bound non-reuseport socket with no ipv6");
- fprintf(stderr, "Success");
+ fprintf(stderr, "Success\n");
return 0;
}
pos = strchr(line, ' ') + 1;
- if (fscanf(fnetstat, type->header_name) == EOF)
+ if (fscanf(fnetstat, "%[^ :]", type->header_name) == EOF)
test_error("fscanf(%s)", type->header_name);
if (fread(&tmp, 1, 1, fnetstat) != 1 || tmp != ':')
test_error("Unexpected netstat format (%c)", tmp);
void __test_msg(const char *buf)
{
pthread_mutex_lock(&ksft_print_lock);
- ksft_print_msg(buf);
+ ksft_print_msg("%s", buf);
pthread_mutex_unlock(&ksft_print_lock);
}
void __test_ok(const char *buf)
{
pthread_mutex_lock(&ksft_print_lock);
- ksft_test_result_pass(buf);
+ ksft_test_result_pass("%s", buf);
pthread_mutex_unlock(&ksft_print_lock);
}
void __test_fail(const char *buf)
{
pthread_mutex_lock(&ksft_print_lock);
- ksft_test_result_fail(buf);
+ ksft_test_result_fail("%s", buf);
pthread_mutex_unlock(&ksft_print_lock);
}
void __test_xfail(const char *buf)
{
pthread_mutex_lock(&ksft_print_lock);
- ksft_test_result_xfail(buf);
+ ksft_test_result_xfail("%s", buf);
pthread_mutex_unlock(&ksft_print_lock);
}
void __test_error(const char *buf)
{
pthread_mutex_lock(&ksft_print_lock);
- ksft_test_result_error(buf);
+ ksft_test_result_error("%s", buf);
pthread_mutex_unlock(&ksft_print_lock);
}
void __test_skip(const char *buf)
{
pthread_mutex_lock(&ksft_print_lock);
- ksft_test_result_skip(buf);
+ ksft_test_result_skip("%s", buf);
pthread_mutex_unlock(&ksft_print_lock);
}
static void test_client_active_rst(unsigned int port)
{
- /* one in queue, another accept()ed */
- unsigned int wait_for = backlog + 2;
int i, sk[3], err;
bool is_writable[ARRAY_SIZE(sk)] = {false};
unsigned int last = ARRAY_SIZE(sk) - 1;
for (i = 0; i < last; i++) {
err = _test_connect_socket(sk[i], this_ip_dest, port,
(i == 0) ? TEST_TIMEOUT_SEC : -1);
-
if (err < 0)
test_error("failed to connect()");
}
- synchronize_threads(); /* 2: connection accept()ed, another queued */
- err = test_wait_fds(sk, last, is_writable, wait_for, TEST_TIMEOUT_SEC);
+ synchronize_threads(); /* 2: two connections: one accept()ed, another queued */
+ err = test_wait_fds(sk, last, is_writable, last, TEST_TIMEOUT_SEC);
if (err < 0)
test_error("test_wait_fds(): %d", err);
+ /* async connect() with third sk to get into request_sock_queue */
+ err = _test_connect_socket(sk[last], this_ip_dest, port, -1);
+ if (err < 0)
+ test_error("failed to connect()");
+
synchronize_threads(); /* 3: close listen socket */
if (test_client_verify(sk[0], packet_sz, quota / packet_sz, TEST_TIMEOUT_SEC))
test_fail("Failed to send data on connected socket");
test_ok("Verified established tcp connection");
synchronize_threads(); /* 4: finishing up */
- err = _test_connect_socket(sk[last], this_ip_dest, port, -1);
- if (err < 0)
- test_error("failed to connect()");
synchronize_threads(); /* 5: closed active sk */
- err = test_wait_fds(sk, ARRAY_SIZE(sk), NULL,
- wait_for, TEST_TIMEOUT_SEC);
+ /*
+ * Wait for 2 connections: one accepted, another in the accept queue,
+ * the one in request_sock_queue won't get fully established, so
+ * doesn't receive an active RST, see inet_csk_listen_stop().
+ */
+ err = test_wait_fds(sk, last, NULL, last, TEST_TIMEOUT_SEC);
if (err < 0)
test_error("select(): %d", err);
static void test_vefify_ao_info(int sk, struct tcp_ao_info_opt *info,
const char *tst)
{
- struct tcp_ao_info_opt tmp;
+ struct tcp_ao_info_opt tmp = {};
socklen_t len = sizeof(tmp);
if (getsockopt(sk, IPPROTO_TCP, TCP_AO_INFO, &tmp, &len))
local plen=$1; shift
local enc_ethtype=$1; shift
local grp=$1; shift
+ local grp_dmac=$1; shift
local src=$1; shift
local mz=$1; shift
run_cmd "bridge -n $ns1 mdb replace dev vx0 port vx0 grp $grp permanent dst $vtep2_ip src_vni 10020"
run_cmd "tc -n $ns2 filter replace dev vx0 ingress pref 1 handle 101 proto all flower enc_dst_ip $vtep1_ip action pass"
- run_cmd "ip netns exec $ns1 $mz br0.10 -A $src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q"
+ run_cmd "ip netns exec $ns1 $mz br0.10 -a own -b $grp_dmac -A $src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q"
tc_check_packets "$ns2" "dev vx0 ingress" 101 1
log_test $? 0 "Destination IP - match"
- run_cmd "ip netns exec $ns1 $mz br0.20 -A $src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q"
+ run_cmd "ip netns exec $ns1 $mz br0.20 -a own -b $grp_dmac -A $src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q"
tc_check_packets "$ns2" "dev vx0 ingress" 101 1
log_test $? 0 "Destination IP - no match"
run_cmd "bridge -n $ns1 mdb replace dev vx0 port vx0 grp $grp permanent dst $vtep1_ip dst_port 1111 src_vni 10020"
run_cmd "tc -n $ns2 filter replace dev veth0 ingress pref 1 handle 101 proto $enc_ethtype flower ip_proto udp dst_port 4789 action pass"
- run_cmd "ip netns exec $ns1 $mz br0.10 -A $src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q"
+ run_cmd "ip netns exec $ns1 $mz br0.10 -a own -b $grp_dmac -A $src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q"
tc_check_packets "$ns2" "dev veth0 ingress" 101 1
log_test $? 0 "Default destination port - match"
- run_cmd "ip netns exec $ns1 $mz br0.20 -A $src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q"
+ run_cmd "ip netns exec $ns1 $mz br0.20 -a own -b $grp_dmac -A $src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q"
tc_check_packets "$ns2" "dev veth0 ingress" 101 1
log_test $? 0 "Default destination port - no match"
run_cmd "tc -n $ns2 filter replace dev veth0 ingress pref 1 handle 101 proto $enc_ethtype flower ip_proto udp dst_port 1111 action pass"
- run_cmd "ip netns exec $ns1 $mz br0.20 -A $src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q"
+ run_cmd "ip netns exec $ns1 $mz br0.20 -a own -b $grp_dmac -A $src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q"
tc_check_packets "$ns2" "dev veth0 ingress" 101 1
log_test $? 0 "Non-default destination port - match"
- run_cmd "ip netns exec $ns1 $mz br0.10 -A $src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q"
+ run_cmd "ip netns exec $ns1 $mz br0.10 -a own -b $grp_dmac -A $src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q"
tc_check_packets "$ns2" "dev veth0 ingress" 101 1
log_test $? 0 "Non-default destination port - no match"
run_cmd "bridge -n $ns1 mdb replace dev vx0 port vx0 grp $grp permanent dst $vtep1_ip src_vni 10020"
run_cmd "tc -n $ns2 filter replace dev vx0 ingress pref 1 handle 101 proto all flower enc_key_id 10010 action pass"
- run_cmd "ip netns exec $ns1 $mz br0.10 -A $src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q"
+ run_cmd "ip netns exec $ns1 $mz br0.10 -a own -b $grp_dmac -A $src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q"
tc_check_packets "$ns2" "dev vx0 ingress" 101 1
log_test $? 0 "Default destination VNI - match"
- run_cmd "ip netns exec $ns1 $mz br0.20 -A $src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q"
+ run_cmd "ip netns exec $ns1 $mz br0.20 -a own -b $grp_dmac -A $src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q"
tc_check_packets "$ns2" "dev vx0 ingress" 101 1
log_test $? 0 "Default destination VNI - no match"
run_cmd "bridge -n $ns1 mdb replace dev vx0 port vx0 grp $grp permanent dst $vtep1_ip vni 10010 src_vni 10020"
run_cmd "tc -n $ns2 filter replace dev vx0 ingress pref 1 handle 101 proto all flower enc_key_id 10020 action pass"
- run_cmd "ip netns exec $ns1 $mz br0.10 -A $src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q"
+ run_cmd "ip netns exec $ns1 $mz br0.10 -a own -b $grp_dmac -A $src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q"
tc_check_packets "$ns2" "dev vx0 ingress" 101 1
log_test $? 0 "Non-default destination VNI - match"
- run_cmd "ip netns exec $ns1 $mz br0.20 -A $src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q"
+ run_cmd "ip netns exec $ns1 $mz br0.20 -a own -b $grp_dmac -A $src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q"
tc_check_packets "$ns2" "dev vx0 ingress" 101 1
log_test $? 0 "Non-default destination VNI - no match"
local plen=32
local enc_ethtype="ip"
local grp=239.1.1.1
+ local grp_dmac=01:00:5e:01:01:01
local src=192.0.2.129
echo
echo "------------------------------------------------------------------"
encap_params_common $ns1 $ns2 $vtep1_ip $vtep2_ip $plen $enc_ethtype \
- $grp $src "mausezahn"
+ $grp $grp_dmac $src "mausezahn"
}
encap_params_ipv6_ipv4()
local plen=32
local enc_ethtype="ip"
local grp=ff0e::1
+ local grp_dmac=33:33:00:00:00:01
local src=2001:db8:100::1
echo
echo "------------------------------------------------------------------"
encap_params_common $ns1 $ns2 $vtep1_ip $vtep2_ip $plen $enc_ethtype \
- $grp $src "mausezahn -6"
+ $grp $grp_dmac $src "mausezahn -6"
}
encap_params_ipv4_ipv6()
local plen=128
local enc_ethtype="ipv6"
local grp=239.1.1.1
+ local grp_dmac=01:00:5e:01:01:01
local src=192.0.2.129
echo
echo "------------------------------------------------------------------"
encap_params_common $ns1 $ns2 $vtep1_ip $vtep2_ip $plen $enc_ethtype \
- $grp $src "mausezahn"
+ $grp $grp_dmac $src "mausezahn"
}
encap_params_ipv6_ipv6()
local plen=128
local enc_ethtype="ipv6"
local grp=ff0e::1
+ local grp_dmac=33:33:00:00:00:01
local src=2001:db8:100::1
echo
echo "------------------------------------------------------------------"
encap_params_common $ns1 $ns2 $vtep1_ip $vtep2_ip $plen $enc_ethtype \
- $grp $src "mausezahn -6"
+ $grp $grp_dmac $src "mausezahn -6"
}
starg_exclude_ir_common()
local vtep2_ip=$1; shift
local plen=$1; shift
local grp=$1; shift
+ local grp_dmac=$1; shift
local valid_src=$1; shift
local invalid_src=$1; shift
local mz=$1; shift
run_cmd "bridge -n $ns1 mdb replace dev vx0 port vx0 grp $grp permanent filter_mode exclude source_list $invalid_src dst $vtep2_ip src_vni 10010"
# Check that invalid source is not forwarded to any VTEP.
- run_cmd "ip netns exec $ns1 $mz br0.10 -A $invalid_src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q"
+ run_cmd "ip netns exec $ns1 $mz br0.10 -a own -b $grp_dmac -A $invalid_src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q"
tc_check_packets "$ns2" "dev vx0 ingress" 101 0
log_test $? 0 "Block excluded source - first VTEP"
tc_check_packets "$ns2" "dev vx0 ingress" 102 0
log_test $? 0 "Block excluded source - second VTEP"
# Check that valid source is forwarded to both VTEPs.
- run_cmd "ip netns exec $ns1 $mz br0.10 -A $valid_src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q"
+ run_cmd "ip netns exec $ns1 $mz br0.10 -a own -b $grp_dmac -A $valid_src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q"
tc_check_packets "$ns2" "dev vx0 ingress" 101 1
log_test $? 0 "Forward valid source - first VTEP"
tc_check_packets "$ns2" "dev vx0 ingress" 102 1
run_cmd "bridge -n $ns1 mdb del dev vx0 port vx0 grp $grp dst $vtep2_ip src_vni 10010"
# Check that invalid source is not forwarded to any VTEP.
- run_cmd "ip netns exec $ns1 $mz br0.10 -A $invalid_src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q"
+ run_cmd "ip netns exec $ns1 $mz br0.10 -a own -b $grp_dmac -A $invalid_src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q"
tc_check_packets "$ns2" "dev vx0 ingress" 101 1
log_test $? 0 "Block excluded source after removal - first VTEP"
tc_check_packets "$ns2" "dev vx0 ingress" 102 1
log_test $? 0 "Block excluded source after removal - second VTEP"
# Check that valid source is forwarded to the remaining VTEP.
- run_cmd "ip netns exec $ns1 $mz br0.10 -A $valid_src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q"
+ run_cmd "ip netns exec $ns1 $mz br0.10 -a own -b $grp_dmac -A $valid_src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q"
tc_check_packets "$ns2" "dev vx0 ingress" 101 2
log_test $? 0 "Forward valid source after removal - first VTEP"
tc_check_packets "$ns2" "dev vx0 ingress" 102 1
local vtep2_ip=198.51.100.200
local plen=32
local grp=239.1.1.1
+ local grp_dmac=01:00:5e:01:01:01
local valid_src=192.0.2.129
local invalid_src=192.0.2.145
echo "-------------------------------------------------------------"
starg_exclude_ir_common $ns1 $ns2 $vtep1_ip $vtep2_ip $plen $grp \
- $valid_src $invalid_src "mausezahn"
+ $grp_dmac $valid_src $invalid_src "mausezahn"
}
starg_exclude_ir_ipv6_ipv4()
local vtep2_ip=198.51.100.200
local plen=32
local grp=ff0e::1
+ local grp_dmac=33:33:00:00:00:01
local valid_src=2001:db8:100::1
local invalid_src=2001:db8:200::1
echo "-------------------------------------------------------------"
starg_exclude_ir_common $ns1 $ns2 $vtep1_ip $vtep2_ip $plen $grp \
- $valid_src $invalid_src "mausezahn -6"
+ $grp_dmac $valid_src $invalid_src "mausezahn -6"
}
starg_exclude_ir_ipv4_ipv6()
local vtep2_ip=2001:db8:2000::1
local plen=128
local grp=239.1.1.1
+ local grp_dmac=01:00:5e:01:01:01
local valid_src=192.0.2.129
local invalid_src=192.0.2.145
echo "-------------------------------------------------------------"
starg_exclude_ir_common $ns1 $ns2 $vtep1_ip $vtep2_ip $plen $grp \
- $valid_src $invalid_src "mausezahn"
+ $grp_dmac $valid_src $invalid_src "mausezahn"
}
starg_exclude_ir_ipv6_ipv6()
local vtep2_ip=2001:db8:2000::1
local plen=128
local grp=ff0e::1
+ local grp_dmac=33:33:00:00:00:01
local valid_src=2001:db8:100::1
local invalid_src=2001:db8:200::1
echo "-------------------------------------------------------------"
starg_exclude_ir_common $ns1 $ns2 $vtep1_ip $vtep2_ip $plen $grp \
- $valid_src $invalid_src "mausezahn -6"
+ $grp_dmac $valid_src $invalid_src "mausezahn -6"
}
starg_include_ir_common()
local vtep2_ip=$1; shift
local plen=$1; shift
local grp=$1; shift
+ local grp_dmac=$1; shift
local valid_src=$1; shift
local invalid_src=$1; shift
local mz=$1; shift
run_cmd "bridge -n $ns1 mdb replace dev vx0 port vx0 grp $grp permanent filter_mode include source_list $valid_src dst $vtep2_ip src_vni 10010"
# Check that invalid source is not forwarded to any VTEP.
- run_cmd "ip netns exec $ns1 $mz br0.10 -A $invalid_src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q"
+ run_cmd "ip netns exec $ns1 $mz br0.10 -a own -b $grp_dmac -A $invalid_src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q"
tc_check_packets "$ns2" "dev vx0 ingress" 101 0
log_test $? 0 "Block excluded source - first VTEP"
tc_check_packets "$ns2" "dev vx0 ingress" 102 0
log_test $? 0 "Block excluded source - second VTEP"
# Check that valid source is forwarded to both VTEPs.
- run_cmd "ip netns exec $ns1 $mz br0.10 -A $valid_src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q"
+ run_cmd "ip netns exec $ns1 $mz br0.10 -a own -b $grp_dmac -A $valid_src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q"
tc_check_packets "$ns2" "dev vx0 ingress" 101 1
log_test $? 0 "Forward valid source - first VTEP"
tc_check_packets "$ns2" "dev vx0 ingress" 102 1
run_cmd "bridge -n $ns1 mdb del dev vx0 port vx0 grp $grp dst $vtep2_ip src_vni 10010"
# Check that invalid source is not forwarded to any VTEP.
- run_cmd "ip netns exec $ns1 $mz br0.10 -A $invalid_src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q"
+ run_cmd "ip netns exec $ns1 $mz br0.10 -a own -b $grp_dmac -A $invalid_src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q"
tc_check_packets "$ns2" "dev vx0 ingress" 101 1
log_test $? 0 "Block excluded source after removal - first VTEP"
tc_check_packets "$ns2" "dev vx0 ingress" 102 1
log_test $? 0 "Block excluded source after removal - second VTEP"
# Check that valid source is forwarded to the remaining VTEP.
- run_cmd "ip netns exec $ns1 $mz br0.10 -A $valid_src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q"
+ run_cmd "ip netns exec $ns1 $mz br0.10 -a own -b $grp_dmac -A $valid_src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q"
tc_check_packets "$ns2" "dev vx0 ingress" 101 2
log_test $? 0 "Forward valid source after removal - first VTEP"
tc_check_packets "$ns2" "dev vx0 ingress" 102 1
local vtep2_ip=198.51.100.200
local plen=32
local grp=239.1.1.1
+ local grp_dmac=01:00:5e:01:01:01
local valid_src=192.0.2.129
local invalid_src=192.0.2.145
echo "-------------------------------------------------------------"
starg_include_ir_common $ns1 $ns2 $vtep1_ip $vtep2_ip $plen $grp \
- $valid_src $invalid_src "mausezahn"
+ $grp_dmac $valid_src $invalid_src "mausezahn"
}
starg_include_ir_ipv6_ipv4()
local vtep2_ip=198.51.100.200
local plen=32
local grp=ff0e::1
+ local grp_dmac=33:33:00:00:00:01
local valid_src=2001:db8:100::1
local invalid_src=2001:db8:200::1
echo "-------------------------------------------------------------"
starg_include_ir_common $ns1 $ns2 $vtep1_ip $vtep2_ip $plen $grp \
- $valid_src $invalid_src "mausezahn -6"
+ $grp_dmac $valid_src $invalid_src "mausezahn -6"
}
starg_include_ir_ipv4_ipv6()
local vtep2_ip=2001:db8:2000::1
local plen=128
local grp=239.1.1.1
+ local grp_dmac=01:00:5e:01:01:01
local valid_src=192.0.2.129
local invalid_src=192.0.2.145
echo "-------------------------------------------------------------"
starg_include_ir_common $ns1 $ns2 $vtep1_ip $vtep2_ip $plen $grp \
- $valid_src $invalid_src "mausezahn"
+ $grp_dmac $valid_src $invalid_src "mausezahn"
}
starg_include_ir_ipv6_ipv6()
local vtep2_ip=2001:db8:2000::1
local plen=128
local grp=ff0e::1
+ local grp_dmac=33:33:00:00:00:01
local valid_src=2001:db8:100::1
local invalid_src=2001:db8:200::1
echo "-------------------------------------------------------------"
starg_include_ir_common $ns1 $ns2 $vtep1_ip $vtep2_ip $plen $grp \
- $valid_src $invalid_src "mausezahn -6"
+ $grp_dmac $valid_src $invalid_src "mausezahn -6"
}
starg_exclude_p2mp_common()
local mcast_grp=$1; shift
local plen=$1; shift
local grp=$1; shift
+ local grp_dmac=$1; shift
local valid_src=$1; shift
local invalid_src=$1; shift
local mz=$1; shift
run_cmd "bridge -n $ns1 mdb replace dev vx0 port vx0 grp $grp permanent filter_mode exclude source_list $invalid_src dst $mcast_grp src_vni 10010 via veth0"
# Check that invalid source is not forwarded.
- run_cmd "ip netns exec $ns1 $mz br0.10 -A $invalid_src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q"
+ run_cmd "ip netns exec $ns1 $mz br0.10 -a own -b $grp_dmac -A $invalid_src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q"
tc_check_packets "$ns2" "dev vx0 ingress" 101 0
log_test $? 0 "Block excluded source"
# Check that valid source is forwarded.
- run_cmd "ip netns exec $ns1 $mz br0.10 -A $valid_src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q"
+ run_cmd "ip netns exec $ns1 $mz br0.10 -a own -b $grp_dmac -A $valid_src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q"
tc_check_packets "$ns2" "dev vx0 ingress" 101 1
log_test $? 0 "Forward valid source"
run_cmd "ip -n $ns2 address del $mcast_grp/$plen dev veth0"
# Check that valid source is not received anymore.
- run_cmd "ip netns exec $ns1 $mz br0.10 -A $valid_src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q"
+ run_cmd "ip netns exec $ns1 $mz br0.10 -a own -b $grp_dmac -A $valid_src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q"
tc_check_packets "$ns2" "dev vx0 ingress" 101 1
log_test $? 0 "Receive of valid source after removal from group"
}
local mcast_grp=238.1.1.1
local plen=32
local grp=239.1.1.1
+ local grp_dmac=01:00:5e:01:01:01
local valid_src=192.0.2.129
local invalid_src=192.0.2.145
echo "Data path: (*, G) EXCLUDE - P2MP - IPv4 overlay / IPv4 underlay"
echo "---------------------------------------------------------------"
- starg_exclude_p2mp_common $ns1 $ns2 $mcast_grp $plen $grp \
+ starg_exclude_p2mp_common $ns1 $ns2 $mcast_grp $plen $grp $grp_dmac \
$valid_src $invalid_src "mausezahn"
}
local mcast_grp=238.1.1.1
local plen=32
local grp=ff0e::1
+ local grp_dmac=33:33:00:00:00:01
local valid_src=2001:db8:100::1
local invalid_src=2001:db8:200::1
echo "Data path: (*, G) EXCLUDE - P2MP - IPv6 overlay / IPv4 underlay"
echo "---------------------------------------------------------------"
- starg_exclude_p2mp_common $ns1 $ns2 $mcast_grp $plen $grp \
+ starg_exclude_p2mp_common $ns1 $ns2 $mcast_grp $plen $grp $grp_dmac \
$valid_src $invalid_src "mausezahn -6"
}
local mcast_grp=ff0e::2
local plen=128
local grp=239.1.1.1
+ local grp_dmac=01:00:5e:01:01:01
local valid_src=192.0.2.129
local invalid_src=192.0.2.145
echo "Data path: (*, G) EXCLUDE - P2MP - IPv4 overlay / IPv6 underlay"
echo "---------------------------------------------------------------"
- starg_exclude_p2mp_common $ns1 $ns2 $mcast_grp $plen $grp \
+ starg_exclude_p2mp_common $ns1 $ns2 $mcast_grp $plen $grp $grp_dmac \
$valid_src $invalid_src "mausezahn"
}
local mcast_grp=ff0e::2
local plen=128
local grp=ff0e::1
+ local grp_dmac=33:33:00:00:00:01
local valid_src=2001:db8:100::1
local invalid_src=2001:db8:200::1
echo "Data path: (*, G) EXCLUDE - P2MP - IPv6 overlay / IPv6 underlay"
echo "---------------------------------------------------------------"
- starg_exclude_p2mp_common $ns1 $ns2 $mcast_grp $plen $grp \
+ starg_exclude_p2mp_common $ns1 $ns2 $mcast_grp $plen $grp $grp_dmac \
$valid_src $invalid_src "mausezahn -6"
}
local mcast_grp=$1; shift
local plen=$1; shift
local grp=$1; shift
+ local grp_dmac=$1; shift
local valid_src=$1; shift
local invalid_src=$1; shift
local mz=$1; shift
run_cmd "bridge -n $ns1 mdb replace dev vx0 port vx0 grp $grp permanent filter_mode include source_list $valid_src dst $mcast_grp src_vni 10010 via veth0"
# Check that invalid source is not forwarded.
- run_cmd "ip netns exec $ns1 $mz br0.10 -A $invalid_src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q"
+ run_cmd "ip netns exec $ns1 $mz br0.10 -a own -b $grp_dmac -A $invalid_src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q"
tc_check_packets "$ns2" "dev vx0 ingress" 101 0
log_test $? 0 "Block excluded source"
# Check that valid source is forwarded.
- run_cmd "ip netns exec $ns1 $mz br0.10 -A $valid_src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q"
+ run_cmd "ip netns exec $ns1 $mz br0.10 -a own -b $grp_dmac -A $valid_src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q"
tc_check_packets "$ns2" "dev vx0 ingress" 101 1
log_test $? 0 "Forward valid source"
run_cmd "ip -n $ns2 address del $mcast_grp/$plen dev veth0"
# Check that valid source is not received anymore.
- run_cmd "ip netns exec $ns1 $mz br0.10 -A $valid_src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q"
+ run_cmd "ip netns exec $ns1 $mz br0.10 -a own -b $grp_dmac -A $valid_src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q"
tc_check_packets "$ns2" "dev vx0 ingress" 101 1
log_test $? 0 "Receive of valid source after removal from group"
}
local mcast_grp=238.1.1.1
local plen=32
local grp=239.1.1.1
+ local grp_dmac=01:00:5e:01:01:01
local valid_src=192.0.2.129
local invalid_src=192.0.2.145
echo "Data path: (*, G) INCLUDE - P2MP - IPv4 overlay / IPv4 underlay"
echo "---------------------------------------------------------------"
- starg_include_p2mp_common $ns1 $ns2 $mcast_grp $plen $grp \
+ starg_include_p2mp_common $ns1 $ns2 $mcast_grp $plen $grp $grp_dmac \
$valid_src $invalid_src "mausezahn"
}
local mcast_grp=238.1.1.1
local plen=32
local grp=ff0e::1
+ local grp_dmac=33:33:00:00:00:01
local valid_src=2001:db8:100::1
local invalid_src=2001:db8:200::1
echo "Data path: (*, G) INCLUDE - P2MP - IPv6 overlay / IPv4 underlay"
echo "---------------------------------------------------------------"
- starg_include_p2mp_common $ns1 $ns2 $mcast_grp $plen $grp \
+ starg_include_p2mp_common $ns1 $ns2 $mcast_grp $plen $grp $grp_dmac \
$valid_src $invalid_src "mausezahn -6"
}
local mcast_grp=ff0e::2
local plen=128
local grp=239.1.1.1
+ local grp_dmac=01:00:5e:01:01:01
local valid_src=192.0.2.129
local invalid_src=192.0.2.145
echo "Data path: (*, G) INCLUDE - P2MP - IPv4 overlay / IPv6 underlay"
echo "---------------------------------------------------------------"
- starg_include_p2mp_common $ns1 $ns2 $mcast_grp $plen $grp \
+ starg_include_p2mp_common $ns1 $ns2 $mcast_grp $plen $grp $grp_dmac \
$valid_src $invalid_src "mausezahn"
}
local mcast_grp=ff0e::2
local plen=128
local grp=ff0e::1
+ local grp_dmac=33:33:00:00:00:01
local valid_src=2001:db8:100::1
local invalid_src=2001:db8:200::1
echo "Data path: (*, G) INCLUDE - P2MP - IPv6 overlay / IPv6 underlay"
echo "---------------------------------------------------------------"
- starg_include_p2mp_common $ns1 $ns2 $mcast_grp $plen $grp \
+ starg_include_p2mp_common $ns1 $ns2 $mcast_grp $plen $grp $grp_dmac \
$valid_src $invalid_src "mausezahn -6"
}
local plen=$1; shift
local proto=$1; shift
local grp=$1; shift
+ local grp_dmac=$1; shift
local src=$1; shift
local mz=$1; shift
# Make sure that packets sent from the first VTEP over VLAN 10 are
# received by the SVI corresponding to the L3VNI (14000 / VLAN 4000) on
# the second VTEP, since it is configured as PVID.
- run_cmd "ip netns exec $ns1 $mz br0.10 -A $src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q"
+ run_cmd "ip netns exec $ns1 $mz br0.10 -a own -b $grp_dmac -A $src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q"
tc_check_packets "$ns2" "dev br0.4000 ingress" 101 1
log_test $? 0 "Egress VNI translation - PVID configured"
# Remove PVID flag from VLAN 4000 on the second VTEP and make sure
# packets are no longer received by the SVI interface.
run_cmd "bridge -n $ns2 vlan add vid 4000 dev vx0"
- run_cmd "ip netns exec $ns1 $mz br0.10 -A $src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q"
+ run_cmd "ip netns exec $ns1 $mz br0.10 -a own -b $grp_dmac -A $src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q"
tc_check_packets "$ns2" "dev br0.4000 ingress" 101 1
log_test $? 0 "Egress VNI translation - no PVID configured"
# Reconfigure the PVID and make sure packets are received again.
run_cmd "bridge -n $ns2 vlan add vid 4000 dev vx0 pvid"
- run_cmd "ip netns exec $ns1 $mz br0.10 -A $src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q"
+ run_cmd "ip netns exec $ns1 $mz br0.10 -a own -b $grp_dmac -A $src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q"
tc_check_packets "$ns2" "dev br0.4000 ingress" 101 2
log_test $? 0 "Egress VNI translation - PVID reconfigured"
}
local plen=32
local proto="ipv4"
local grp=239.1.1.1
+ local grp_dmac=01:00:5e:01:01:01
local src=192.0.2.129
echo
echo "----------------------------------------------------------------"
egress_vni_translation_common $ns1 $ns2 $mcast_grp $plen $proto $grp \
- $src "mausezahn"
+ $grp_dmac $src "mausezahn"
}
egress_vni_translation_ipv6_ipv4()
local plen=32
local proto="ipv6"
local grp=ff0e::1
+ local grp_dmac=33:33:00:00:00:01
local src=2001:db8:100::1
echo
echo "----------------------------------------------------------------"
egress_vni_translation_common $ns1 $ns2 $mcast_grp $plen $proto $grp \
- $src "mausezahn -6"
+ $grp_dmac $src "mausezahn -6"
}
egress_vni_translation_ipv4_ipv6()
local plen=128
local proto="ipv4"
local grp=239.1.1.1
+ local grp_dmac=01:00:5e:01:01:01
local src=192.0.2.129
echo
echo "----------------------------------------------------------------"
egress_vni_translation_common $ns1 $ns2 $mcast_grp $plen $proto $grp \
- $src "mausezahn"
+ $grp_dmac $src "mausezahn"
}
egress_vni_translation_ipv6_ipv6()
local plen=128
local proto="ipv6"
local grp=ff0e::1
+ local grp_dmac=33:33:00:00:00:01
local src=2001:db8:100::1
echo
echo "----------------------------------------------------------------"
egress_vni_translation_common $ns1 $ns2 $mcast_grp $plen $proto $grp \
- $src "mausezahn -6"
+ $grp_dmac $src "mausezahn -6"
}
all_zeros_mdb_common()
local vtep4_ip=$1; shift
local plen=$1; shift
local ipv4_grp=239.1.1.1
+ local ipv4_grp_dmac=01:00:5e:01:01:01
local ipv4_unreg_grp=239.2.2.2
+ local ipv4_unreg_grp_dmac=01:00:5e:02:02:02
local ipv4_ll_grp=224.0.0.100
+ local ipv4_ll_grp_dmac=01:00:5e:00:00:64
local ipv4_src=192.0.2.129
local ipv6_grp=ff0e::1
+ local ipv6_grp_dmac=33:33:00:00:00:01
local ipv6_unreg_grp=ff0e::2
+ local ipv6_unreg_grp_dmac=33:33:00:00:00:02
local ipv6_ll_grp=ff02::1
+ local ipv6_ll_grp_dmac=33:33:00:00:00:01
local ipv6_src=2001:db8:100::1
# Install all-zeros (catchall) MDB entries for IPv4 and IPv6 traffic
# Send registered IPv4 multicast and make sure it only arrives to the
# first VTEP.
- run_cmd "ip netns exec $ns1 mausezahn br0.10 -A $ipv4_src -B $ipv4_grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q"
+ run_cmd "ip netns exec $ns1 mausezahn br0.10 -a own -b $ipv4_grp_dmac -A $ipv4_src -B $ipv4_grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q"
tc_check_packets "$ns2" "dev vx0 ingress" 101 1
log_test $? 0 "Registered IPv4 multicast - first VTEP"
tc_check_packets "$ns2" "dev vx0 ingress" 102 0
# Send unregistered IPv4 multicast that is not link-local and make sure
# it arrives to the first and second VTEPs.
- run_cmd "ip netns exec $ns1 mausezahn br0.10 -A $ipv4_src -B $ipv4_unreg_grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q"
+ run_cmd "ip netns exec $ns1 mausezahn br0.10 -a own -b $ipv4_unreg_grp_dmac -A $ipv4_src -B $ipv4_unreg_grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q"
tc_check_packets "$ns2" "dev vx0 ingress" 101 2
log_test $? 0 "Unregistered IPv4 multicast - first VTEP"
tc_check_packets "$ns2" "dev vx0 ingress" 102 1
# Send IPv4 link-local multicast traffic and make sure it does not
# arrive to any VTEP.
- run_cmd "ip netns exec $ns1 mausezahn br0.10 -A $ipv4_src -B $ipv4_ll_grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q"
+ run_cmd "ip netns exec $ns1 mausezahn br0.10 -a own -b $ipv4_ll_grp_dmac -A $ipv4_src -B $ipv4_ll_grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q"
tc_check_packets "$ns2" "dev vx0 ingress" 101 2
log_test $? 0 "Link-local IPv4 multicast - first VTEP"
tc_check_packets "$ns2" "dev vx0 ingress" 102 1
# Send registered IPv6 multicast and make sure it only arrives to the
# third VTEP.
- run_cmd "ip netns exec $ns1 mausezahn -6 br0.10 -A $ipv6_src -B $ipv6_grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q"
+ run_cmd "ip netns exec $ns1 mausezahn -6 br0.10 -a own -b $ipv6_grp_dmac -A $ipv6_src -B $ipv6_grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q"
tc_check_packets "$ns2" "dev vx0 ingress" 103 1
log_test $? 0 "Registered IPv6 multicast - third VTEP"
tc_check_packets "$ns2" "dev vx0 ingress" 104 0
# Send unregistered IPv6 multicast that is not link-local and make sure
# it arrives to the third and fourth VTEPs.
- run_cmd "ip netns exec $ns1 mausezahn -6 br0.10 -A $ipv6_src -B $ipv6_unreg_grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q"
+ run_cmd "ip netns exec $ns1 mausezahn -6 br0.10 -a own -b $ipv6_unreg_grp_dmac -A $ipv6_src -B $ipv6_unreg_grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q"
tc_check_packets "$ns2" "dev vx0 ingress" 103 2
log_test $? 0 "Unregistered IPv6 multicast - third VTEP"
tc_check_packets "$ns2" "dev vx0 ingress" 104 1
# Send IPv6 link-local multicast traffic and make sure it does not
# arrive to any VTEP.
- run_cmd "ip netns exec $ns1 mausezahn -6 br0.10 -A $ipv6_src -B $ipv6_ll_grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q"
+ run_cmd "ip netns exec $ns1 mausezahn -6 br0.10 -a own -b $ipv6_ll_grp_dmac -A $ipv6_src -B $ipv6_ll_grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q"
tc_check_packets "$ns2" "dev vx0 ingress" 103 2
log_test $? 0 "Link-local IPv6 multicast - third VTEP"
tc_check_packets "$ns2" "dev vx0 ingress" 104 1
local plen=$1; shift
local proto=$1; shift
local grp=$1; shift
+ local grp_dmac=$1; shift
local src=$1; shift
local mz=$1; shift
# Send IP multicast traffic and make sure it is forwarded by the MDB
# and only arrives to the first VTEP.
- run_cmd "ip netns exec $ns1 $mz br0.10 -A $src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q"
+ run_cmd "ip netns exec $ns1 $mz br0.10 -a own -b $grp_dmac -A $src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q"
tc_check_packets "$ns2" "dev vx0 ingress" 101 1
log_test $? 0 "IP multicast - first VTEP"
tc_check_packets "$ns2" "dev vx0 ingress" 102 0
# Remove the MDB entry and make sure that IP multicast is now forwarded
# by the FDB to the second VTEP.
run_cmd "bridge -n $ns1 mdb del dev vx0 port vx0 grp $grp dst $vtep1_ip src_vni 10010"
- run_cmd "ip netns exec $ns1 $mz br0.10 -A $src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q"
+ run_cmd "ip netns exec $ns1 $mz br0.10 -a own -b $grp_dmac -A $src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q"
tc_check_packets "$ns2" "dev vx0 ingress" 101 1
log_test $? 0 "IP multicast after removal - first VTEP"
tc_check_packets "$ns2" "dev vx0 ingress" 102 2
local plen=32
local proto="ipv4"
local grp=239.1.1.1
+ local grp_dmac=01:00:5e:01:01:01
local src=192.0.2.129
echo
echo "Data path: MDB with FDB - IPv4 overlay / IPv4 underlay"
echo "------------------------------------------------------"
- mdb_fdb_common $ns1 $ns2 $vtep1_ip $vtep2_ip $plen $proto $grp $src \
- "mausezahn"
+ mdb_fdb_common $ns1 $ns2 $vtep1_ip $vtep2_ip $plen $proto $grp \
+ $grp_dmac $src "mausezahn"
}
mdb_fdb_ipv6_ipv4()
local plen=32
local proto="ipv6"
local grp=ff0e::1
+ local grp_dmac=33:33:00:00:00:01
local src=2001:db8:100::1
echo
echo "Data path: MDB with FDB - IPv6 overlay / IPv4 underlay"
echo "------------------------------------------------------"
- mdb_fdb_common $ns1 $ns2 $vtep1_ip $vtep2_ip $plen $proto $grp $src \
- "mausezahn -6"
+ mdb_fdb_common $ns1 $ns2 $vtep1_ip $vtep2_ip $plen $proto $grp \
+ $grp_dmac $src "mausezahn -6"
}
mdb_fdb_ipv4_ipv6()
local plen=128
local proto="ipv4"
local grp=239.1.1.1
+ local grp_dmac=01:00:5e:01:01:01
local src=192.0.2.129
echo
echo "Data path: MDB with FDB - IPv4 overlay / IPv6 underlay"
echo "------------------------------------------------------"
- mdb_fdb_common $ns1 $ns2 $vtep1_ip $vtep2_ip $plen $proto $grp $src \
- "mausezahn"
+ mdb_fdb_common $ns1 $ns2 $vtep1_ip $vtep2_ip $plen $proto $grp \
+ $grp_dmac $src "mausezahn"
}
mdb_fdb_ipv6_ipv6()
local plen=128
local proto="ipv6"
local grp=ff0e::1
+ local grp_dmac=33:33:00:00:00:01
local src=2001:db8:100::1
echo
echo "Data path: MDB with FDB - IPv6 overlay / IPv6 underlay"
echo "------------------------------------------------------"
- mdb_fdb_common $ns1 $ns2 $vtep1_ip $vtep2_ip $plen $proto $grp $src \
- "mausezahn -6"
+ mdb_fdb_common $ns1 $ns2 $vtep1_ip $vtep2_ip $plen $proto $grp \
+ $grp_dmac $src "mausezahn -6"
}
mdb_grp1_loop()
local vtep1_ip=$1; shift
local vtep2_ip=$1; shift
local grp1=$1; shift
+ local grp1_dmac=$1; shift
local grp2=$1; shift
+ local grp2_dmac=$1; shift
local src=$1; shift
local mz=$1; shift
local pid1
pid1=$!
mdb_grp2_loop $ns1 $vtep1_ip $vtep2_ip $grp2 &
pid2=$!
- ip netns exec $ns1 $mz br0.10 -A $src -B $grp1 -t udp sp=12345,dp=54321 -p 100 -c 0 -q &
+ ip netns exec $ns1 $mz br0.10 -a own -b $grp1_dmac -A $src -B $grp1 -t udp sp=12345,dp=54321 -p 100 -c 0 -q &
pid3=$!
- ip netns exec $ns1 $mz br0.10 -A $src -B $grp2 -t udp sp=12345,dp=54321 -p 100 -c 0 -q &
+ ip netns exec $ns1 $mz br0.10 -a own -b $grp2_dmac -A $src -B $grp2 -t udp sp=12345,dp=54321 -p 100 -c 0 -q &
pid4=$!
sleep 30
local vtep1_ip=198.51.100.100
local vtep2_ip=198.51.100.200
local grp1=239.1.1.1
+ local grp1_dmac=01:00:5e:01:01:01
local grp2=239.2.2.2
+ local grp2_dmac=01:00:5e:02:02:02
local src=192.0.2.129
echo
echo "Data path: MDB torture test - IPv4 overlay / IPv4 underlay"
echo "----------------------------------------------------------"
- mdb_torture_common $ns1 $vtep1_ip $vtep2_ip $grp1 $grp2 $src \
- "mausezahn"
+ mdb_torture_common $ns1 $vtep1_ip $vtep2_ip $grp1 $grp1_dmac $grp2 \
+ $grp2_dmac $src "mausezahn"
}
mdb_torture_ipv6_ipv4()
local vtep1_ip=198.51.100.100
local vtep2_ip=198.51.100.200
local grp1=ff0e::1
+ local grp1_dmac=33:33:00:00:00:01
local grp2=ff0e::2
+ local grp2_dmac=33:33:00:00:00:02
local src=2001:db8:100::1
echo
echo "Data path: MDB torture test - IPv6 overlay / IPv4 underlay"
echo "----------------------------------------------------------"
- mdb_torture_common $ns1 $vtep1_ip $vtep2_ip $grp1 $grp2 $src \
- "mausezahn -6"
+ mdb_torture_common $ns1 $vtep1_ip $vtep2_ip $grp1 $grp1_dmac $grp2 \
+ $grp2_dmac $src "mausezahn -6"
}
mdb_torture_ipv4_ipv6()
local vtep1_ip=2001:db8:1000::1
local vtep2_ip=2001:db8:2000::1
local grp1=239.1.1.1
+ local grp1_dmac=01:00:5e:01:01:01
local grp2=239.2.2.2
+ local grp2_dmac=01:00:5e:02:02:02
local src=192.0.2.129
echo
echo "Data path: MDB torture test - IPv4 overlay / IPv6 underlay"
echo "----------------------------------------------------------"
- mdb_torture_common $ns1 $vtep1_ip $vtep2_ip $grp1 $grp2 $src \
- "mausezahn"
+ mdb_torture_common $ns1 $vtep1_ip $vtep2_ip $grp1 $grp1_dmac $grp2 \
+ $grp2_dmac $src "mausezahn"
}
mdb_torture_ipv6_ipv6()
local vtep1_ip=2001:db8:1000::1
local vtep2_ip=2001:db8:2000::1
local grp1=ff0e::1
+ local grp1_dmac=33:33:00:00:00:01
local grp2=ff0e::2
+ local grp2_dmac=33:33:00:00:00:02
local src=2001:db8:100::1
echo
echo "Data path: MDB torture test - IPv6 overlay / IPv6 underlay"
echo "----------------------------------------------------------"
- mdb_torture_common $ns1 $vtep1_ip $vtep2_ip $grp1 $grp2 $src \
- "mausezahn -6"
+ mdb_torture_common $ns1 $vtep1_ip $vtep2_ip $grp1 $grp1_dmac $grp2 \
+ $grp2_dmac $src "mausezahn -6"
}
################################################################################
EXPECT_EQ(errno, EINVAL);
}
+TEST_F(tls, recv_efault)
+{
+ char *rec1 = "1111111111";
+ char *rec2 = "2222222222";
+ struct msghdr hdr = {};
+ struct iovec iov[2];
+ char recv_mem[12];
+ int ret;
+
+ if (self->notls)
+ SKIP(return, "no TLS support");
+
+ EXPECT_EQ(send(self->fd, rec1, 10, 0), 10);
+ EXPECT_EQ(send(self->fd, rec2, 10, 0), 10);
+
+ iov[0].iov_base = recv_mem;
+ iov[0].iov_len = sizeof(recv_mem);
+ iov[1].iov_base = NULL; /* broken iov to make process_rx_list fail */
+ iov[1].iov_len = 1;
+
+ hdr.msg_iovlen = 2;
+ hdr.msg_iov = iov;
+
+ EXPECT_EQ(recv(self->cfd, recv_mem, 1, 0), 1);
+ EXPECT_EQ(recv_mem[0], rec1[0]);
+
+ ret = recvmsg(self->cfd, &hdr, 0);
+ EXPECT_LE(ret, sizeof(recv_mem));
+ EXPECT_GE(ret, 9);
+ EXPECT_EQ(memcmp(rec1, recv_mem, 9), 0);
+ if (ret > 9)
+ EXPECT_EQ(memcmp(rec2, recv_mem + 9, ret - 9), 0);
+}
+
FIXTURE(tls_err)
{
int fd, cfd;
create_vxlan_pair
ip netns exec $NS_DST ethtool -K veth$DST generic-receive-offload on
ip netns exec $NS_DST ethtool -K veth$DST rx-gro-list on
- run_test "GRO frag list over UDP tunnel" $OL_NET$DST 1 1
+ run_test "GRO frag list over UDP tunnel" $OL_NET$DST 10 10
cleanup
# use NAT to circumvent GRO FWD check
# load arp cache before running the test to reduce the amount of
# stray traffic on top of the UDP tunnel
ip netns exec $NS_SRC $PING -q -c 1 $OL_NET$DST_NAT >/dev/null
- run_test "GRO fwd over UDP tunnel" $OL_NET$DST_NAT 1 1 $OL_NET$DST
- cleanup
-
- create_vxlan_pair
- run_bench "UDP tunnel fwd perf" $OL_NET$DST
- ip netns exec $NS_DST ethtool -K veth$DST rx-udp-gro-forwarding on
- run_bench "UDP tunnel GRO fwd perf" $OL_NET$DST
+ run_test "GRO fwd over UDP tunnel" $OL_NET$DST_NAT 10 10 $OL_NET$DST
cleanup
done
#endif
#ifndef UDP_MAX_SEGMENTS
-#define UDP_MAX_SEGMENTS (1 << 6UL)
+#define UDP_MAX_SEGMENTS (1 << 7UL)
#endif
#define CONST_MTU_TEST 1500
static int papr_vpd_close_handle_without_reading(void)
{
const int devfd = open(DEVPATH, O_RDONLY);
- struct papr_location_code lc;
+ struct papr_location_code lc = { .str = "", };
int fd;
SKIP_IF_MSG(devfd < 0 && errno == ENOENT,
-timeout=120
+timeout=180
diff = end.tv_usec - start.tv_usec;
diff += (end.tv_sec - start.tv_sec) * USECS_PER_SEC;
- if (abs(diff - DELAY * USECS_PER_SEC) > USECS_PER_SEC / 2) {
+ if (llabs(diff - DELAY * USECS_PER_SEC) > USECS_PER_SEC / 2) {
printf("Diff too high: %lld..", diff);
return -1;
}
return 0;
}
-int remain;
-__thread int got_signal;
+static pthread_t ctd_thread;
+static volatile int ctd_count, ctd_failed;
-static void *distribution_thread(void *arg)
+static void ctd_sighandler(int sig)
{
- while (__atomic_load_n(&remain, __ATOMIC_RELAXED));
- return NULL;
+ if (pthread_self() != ctd_thread)
+ ctd_failed = 1;
+ ctd_count--;
}
-static void distribution_handler(int nr)
+static void *ctd_thread_func(void *arg)
{
- if (!__atomic_exchange_n(&got_signal, 1, __ATOMIC_RELAXED))
- __atomic_fetch_sub(&remain, 1, __ATOMIC_RELAXED);
-}
-
-/*
- * Test that all running threads _eventually_ receive CLOCK_PROCESS_CPUTIME_ID
- * timer signals. This primarily tests that the kernel does not favour any one.
- */
-static int check_timer_distribution(void)
-{
- int err, i;
- timer_t id;
- const int nthreads = 10;
- pthread_t threads[nthreads];
struct itimerspec val = {
.it_value.tv_sec = 0,
.it_value.tv_nsec = 1000 * 1000,
.it_interval.tv_sec = 0,
.it_interval.tv_nsec = 1000 * 1000,
};
+ timer_t id;
- remain = nthreads + 1; /* worker threads + this thread */
- signal(SIGALRM, distribution_handler);
- err = timer_create(CLOCK_PROCESS_CPUTIME_ID, NULL, &id);
- if (err < 0) {
- ksft_perror("Can't create timer");
- return -1;
- }
- err = timer_settime(id, 0, &val, NULL);
- if (err < 0) {
- ksft_perror("Can't set timer");
- return -1;
- }
+ /* 1/10 seconds to ensure the leader sleeps */
+ usleep(10000);
- for (i = 0; i < nthreads; i++) {
- err = pthread_create(&threads[i], NULL, distribution_thread,
- NULL);
- if (err) {
- ksft_print_msg("Can't create thread: %s (%d)\n",
- strerror(errno), errno);
- return -1;
- }
- }
+ ctd_count = 100;
+ if (timer_create(CLOCK_PROCESS_CPUTIME_ID, NULL, &id))
+ return "Can't create timer\n";
+ if (timer_settime(id, 0, &val, NULL))
+ return "Can't set timer\n";
- /* Wait for all threads to receive the signal. */
- while (__atomic_load_n(&remain, __ATOMIC_RELAXED));
+ while (ctd_count > 0 && !ctd_failed)
+ ;
- for (i = 0; i < nthreads; i++) {
- err = pthread_join(threads[i], NULL);
- if (err) {
- ksft_print_msg("Can't join thread: %s (%d)\n",
- strerror(errno), errno);
- return -1;
- }
- }
+ if (timer_delete(id))
+ return "Can't delete timer\n";
- if (timer_delete(id)) {
- ksft_perror("Can't delete timer");
- return -1;
- }
+ return NULL;
+}
+
+/*
+ * Test that only the running thread receives the timer signal.
+ */
+static int check_timer_distribution(void)
+{
+ const char *errmsg;
- ksft_test_result_pass("check_timer_distribution\n");
+ signal(SIGALRM, ctd_sighandler);
+
+ errmsg = "Can't create thread\n";
+ if (pthread_create(&ctd_thread, NULL, ctd_thread_func, NULL))
+ goto err;
+
+ errmsg = "Can't join thread\n";
+ if (pthread_join(ctd_thread, (void **)&errmsg) || errmsg)
+ goto err;
+
+ if (!ctd_failed)
+ ksft_test_result_pass("check signal distribution\n");
+ else if (ksft_min_kernel_version(6, 3))
+ ksft_test_result_fail("check signal distribution\n");
+ else
+ ksft_test_result_skip("check signal distribution (old kernel)\n");
return 0;
+err:
+ ksft_print_msg("%s", errmsg);
+ return -1;
}
int main(int argc, char **argv)
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*/
-
-
-
#include <stdio.h>
#include <stdlib.h>
#include <time.h>
#define NUM_FREQ_OUTOFRANGE 4
#define NUM_FREQ_INVALID 2
+#define SHIFTED_PPM (1 << 16)
+
long valid_freq[NUM_FREQ_VALID] = {
- -499<<16,
- -450<<16,
- -400<<16,
- -350<<16,
- -300<<16,
- -250<<16,
- -200<<16,
- -150<<16,
- -100<<16,
- -75<<16,
- -50<<16,
- -25<<16,
- -10<<16,
- -5<<16,
- -1<<16,
+ -499 * SHIFTED_PPM,
+ -450 * SHIFTED_PPM,
+ -400 * SHIFTED_PPM,
+ -350 * SHIFTED_PPM,
+ -300 * SHIFTED_PPM,
+ -250 * SHIFTED_PPM,
+ -200 * SHIFTED_PPM,
+ -150 * SHIFTED_PPM,
+ -100 * SHIFTED_PPM,
+ -75 * SHIFTED_PPM,
+ -50 * SHIFTED_PPM,
+ -25 * SHIFTED_PPM,
+ -10 * SHIFTED_PPM,
+ -5 * SHIFTED_PPM,
+ -1 * SHIFTED_PPM,
-1000,
- 1<<16,
- 5<<16,
- 10<<16,
- 25<<16,
- 50<<16,
- 75<<16,
- 100<<16,
- 150<<16,
- 200<<16,
- 250<<16,
- 300<<16,
- 350<<16,
- 400<<16,
- 450<<16,
- 499<<16,
+ 1 * SHIFTED_PPM,
+ 5 * SHIFTED_PPM,
+ 10 * SHIFTED_PPM,
+ 25 * SHIFTED_PPM,
+ 50 * SHIFTED_PPM,
+ 75 * SHIFTED_PPM,
+ 100 * SHIFTED_PPM,
+ 150 * SHIFTED_PPM,
+ 200 * SHIFTED_PPM,
+ 250 * SHIFTED_PPM,
+ 300 * SHIFTED_PPM,
+ 350 * SHIFTED_PPM,
+ 400 * SHIFTED_PPM,
+ 450 * SHIFTED_PPM,
+ 499 * SHIFTED_PPM,
};
long outofrange_freq[NUM_FREQ_OUTOFRANGE] = {
- -1000<<16,
- -550<<16,
- 550<<16,
- 1000<<16,
+ -1000 * SHIFTED_PPM,
+ -550 * SHIFTED_PPM,
+ 550 * SHIFTED_PPM,
+ 1000 * SHIFTED_PPM,
};
#define LONG_MAX (~0UL>>1)
--- /dev/null
+#!/bin/env python3
+# SPDX-License-Identifier: GPL-2.0
+
+import subprocess
+from shutil import which
+
+turbostat = which('turbostat')
+if turbostat is None:
+ print('Could not find turbostat binary')
+ exit(1)
+
+timeout = which('timeout')
+if timeout is None:
+ print('Could not find timeout binary')
+ exit(1)
+
+proc_turbostat = subprocess.run([turbostat, '--list'], capture_output = True)
+if proc_turbostat.returncode != 0:
+ print(f'turbostat failed with {proc_turbostat.returncode}')
+ exit(1)
+
+#
+# By default --list reports also "usec" and "Time_Of_Day_Seconds" columns
+# which are only visible when running with --debug.
+#
+expected_columns_debug = proc_turbostat.stdout.replace(b',', b'\t').strip()
+expected_columns = expected_columns_debug.replace(b'usec\t', b'').replace(b'Time_Of_Day_Seconds\t', b'').replace(b'X2APIC\t', b'').replace(b'APIC\t', b'')
+
+#
+# Run turbostat with no options for 10 seconds and send SIGINT
+#
+timeout_argv = [timeout, '--preserve-status', '-s', 'SIGINT', '-k', '3', '1s']
+turbostat_argv = [turbostat, '-i', '0.250']
+
+print(f'Running turbostat with {turbostat_argv=}... ', end = '', flush = True)
+proc_turbostat = subprocess.run(timeout_argv + turbostat_argv, capture_output = True)
+if proc_turbostat.returncode != 0:
+ print(f'turbostat failed with {proc_turbostat.returncode}')
+ exit(1)
+actual_columns = proc_turbostat.stdout.split(b'\n')[0]
+if expected_columns != actual_columns:
+ print(f'turbostat column check failed\n{expected_columns=}\n{actual_columns=}')
+ exit(1)
+print('OK')
+
+#
+# Same, but with --debug
+#
+turbostat_argv.append('--debug')
+
+print(f'Running turbostat with {turbostat_argv=}... ', end = '', flush = True)
+proc_turbostat = subprocess.run(timeout_argv + turbostat_argv, capture_output = True)
+if proc_turbostat.returncode != 0:
+ print(f'turbostat failed with {proc_turbostat.returncode}')
+ exit(1)
+actual_columns = proc_turbostat.stdout.split(b'\n')[0]
+if expected_columns_debug != actual_columns:
+ print(f'turbostat column check failed\n{expected_columns_debug=}\n{actual_columns=}')
+ exit(1)
+print('OK')
* mn_active_invalidate_count (see above) instead of
* mmu_invalidate_in_progress.
*/
- gfn_to_pfn_cache_invalidate_start(kvm, range->start, range->end,
- hva_range.may_block);
+ gfn_to_pfn_cache_invalidate_start(kvm, range->start, range->end);
/*
* If one or more memslots were found and thus zapped, notify arch code
#ifdef CONFIG_HAVE_KVM_PFNCACHE
void gfn_to_pfn_cache_invalidate_start(struct kvm *kvm,
unsigned long start,
- unsigned long end,
- bool may_block);
+ unsigned long end);
#else
static inline void gfn_to_pfn_cache_invalidate_start(struct kvm *kvm,
unsigned long start,
- unsigned long end,
- bool may_block)
+ unsigned long end)
{
}
#endif /* HAVE_KVM_PFNCACHE */
* MMU notifier 'invalidate_range_start' hook.
*/
void gfn_to_pfn_cache_invalidate_start(struct kvm *kvm, unsigned long start,
- unsigned long end, bool may_block)
+ unsigned long end)
{
struct gfn_to_pfn_cache *gpc;
spin_unlock(&kvm->gpc_lock);
}
+static bool kvm_gpc_is_valid_len(gpa_t gpa, unsigned long uhva,
+ unsigned long len)
+{
+ unsigned long offset = kvm_is_error_gpa(gpa) ? offset_in_page(uhva) :
+ offset_in_page(gpa);
+
+ /*
+ * The cached access must fit within a single page. The 'len' argument
+ * to activate() and refresh() exists only to enforce that.
+ */
+ return offset + len <= PAGE_SIZE;
+}
+
bool kvm_gpc_check(struct gfn_to_pfn_cache *gpc, unsigned long len)
{
struct kvm_memslots *slots = kvm_memslots(gpc->kvm);
if (kvm_is_error_hva(gpc->uhva))
return false;
- if (offset_in_page(gpc->uhva) + len > PAGE_SIZE)
+ if (!kvm_gpc_is_valid_len(gpc->gpa, gpc->uhva, len))
return false;
if (!gpc->valid)
return -EFAULT;
}
-static int __kvm_gpc_refresh(struct gfn_to_pfn_cache *gpc, gpa_t gpa, unsigned long uhva,
- unsigned long len)
+static int __kvm_gpc_refresh(struct gfn_to_pfn_cache *gpc, gpa_t gpa, unsigned long uhva)
{
unsigned long page_offset;
bool unmap_old = false;
if (WARN_ON_ONCE(kvm_is_error_gpa(gpa) == kvm_is_error_hva(uhva)))
return -EINVAL;
- /*
- * The cached acces must fit within a single page. The 'len' argument
- * exists only to enforce that.
- */
- page_offset = kvm_is_error_gpa(gpa) ? offset_in_page(uhva) :
- offset_in_page(gpa);
- if (page_offset + len > PAGE_SIZE)
- return -EINVAL;
-
lockdep_assert_held(&gpc->refresh_lock);
write_lock_irq(&gpc->lock);
old_uhva = PAGE_ALIGN_DOWN(gpc->uhva);
if (kvm_is_error_gpa(gpa)) {
+ page_offset = offset_in_page(uhva);
+
gpc->gpa = INVALID_GPA;
gpc->memslot = NULL;
gpc->uhva = PAGE_ALIGN_DOWN(uhva);
} else {
struct kvm_memslots *slots = kvm_memslots(gpc->kvm);
+ page_offset = offset_in_page(gpa);
+
if (gpc->gpa != gpa || gpc->generation != slots->generation ||
kvm_is_error_hva(gpc->uhva)) {
gfn_t gfn = gpa_to_gfn(gpa);
guard(mutex)(&gpc->refresh_lock);
+ if (!kvm_gpc_is_valid_len(gpc->gpa, gpc->uhva, len))
+ return -EINVAL;
+
/*
* If the GPA is valid then ignore the HVA, as a cache can be GPA-based
* or HVA-based, not both. For GPA-based caches, the HVA will be
*/
uhva = kvm_is_error_gpa(gpc->gpa) ? gpc->uhva : KVM_HVA_ERR_BAD;
- return __kvm_gpc_refresh(gpc, gpc->gpa, uhva, len);
+ return __kvm_gpc_refresh(gpc, gpc->gpa, uhva);
}
void kvm_gpc_init(struct gfn_to_pfn_cache *gpc, struct kvm *kvm)
{
struct kvm *kvm = gpc->kvm;
+ if (!kvm_gpc_is_valid_len(gpa, uhva, len))
+ return -EINVAL;
+
guard(mutex)(&gpc->refresh_lock);
if (!gpc->active) {
gpc->active = true;
write_unlock_irq(&gpc->lock);
}
- return __kvm_gpc_refresh(gpc, gpa, uhva, len);
+ return __kvm_gpc_refresh(gpc, gpa, uhva);
}
int kvm_gpc_activate(struct gfn_to_pfn_cache *gpc, gpa_t gpa, unsigned long len)
{
+ /*
+ * Explicitly disallow INVALID_GPA so that the magic value can be used
+ * by KVM to differentiate between GPA-based and HVA-based caches.
+ */
+ if (WARN_ON_ONCE(kvm_is_error_gpa(gpa)))
+ return -EINVAL;
+
return __kvm_gpc_activate(gpc, gpa, KVM_HVA_ERR_BAD, len);
}