Skip to content

Commit b1fc44e

Browse files
aikmpe
authored andcommitted
pseries/iommu/ddw: Fix kdump to work in absence of ibm,dma-window
The pseries platform uses 32bit default DMA window (always 4K pages) and optional 64bit DMA window available via DDW ("Dynamic DMA Windows"), 64K or 2M pages. For ages the default one was not removed and a huge window was created in addition. Things changed with SRIOV-enabled PowerVM which creates a default-and-bigger DMA window in 64bit space (still using 4K pages) for IOV VFs so certain OSes do not need to use the DDW API in order to utilize all available TCE budget. Linux on the other hand removes the default window and creates a bigger one (with more TCEs or/and a bigger page size - 64K/2M) in a bid to map the entire RAM, and if the new window size is smaller than that - it still uses this new bigger window. The result is that the default window is removed but the "ibm,dma-window" property is not. When kdump is invoked, the existing code tries reusing the existing 64bit DMA window which location and parameters are stored in the device tree but this fails as the new property does not make it to the kdump device tree blob. So the code falls back to the default window which does not exist anymore although the device tree says that it does. The result of that is that PCI devices become unusable and cannot be used for kdumping. This preserves the DMA64 and DIRECT64 properties in the device tree blob for the crash kernel. Since the crash kernel setup is done after device drivers are loaded and probed, the proper DMA config is stored at least for boot time devices. Because DDW window is optional and the code configures the default window first, the existing code creates an IOMMU table descriptor for the non-existing default DMA window. It is harmless for kdump as it does not touch the actual window (only reads what is mapped and marks those IO pages as used) but it is bad for kexec which clears it thinking it is a smaller default window rather than a bigger DDW window. This removes the "ibm,dma-window" property from the device tree after a bigger window is created and the crash kernel setup picks it up. Fixes: 381ceda ("powerpc/pseries/iommu: Make use of DDW for indirect mapping") Signed-off-by: Alexey Kardashevskiy <[email protected]> Acked-by: Hari Bathini <[email protected]> Signed-off-by: Michael Ellerman <[email protected]> Link: https://lore.kernel.org/r/[email protected]
1 parent c725505 commit b1fc44e

File tree

2 files changed

+102
-41
lines changed

2 files changed

+102
-41
lines changed

arch/powerpc/kexec/file_load_64.c

+54
Original file line numberDiff line numberDiff line change
@@ -1038,6 +1038,48 @@ static int update_cpus_node(void *fdt)
10381038
return ret;
10391039
}
10401040

1041+
static int copy_property(void *fdt, int node_offset, const struct device_node *dn,
1042+
const char *propname)
1043+
{
1044+
const void *prop, *fdtprop;
1045+
int len = 0, fdtlen = 0, ret;
1046+
1047+
prop = of_get_property(dn, propname, &len);
1048+
fdtprop = fdt_getprop(fdt, node_offset, propname, &fdtlen);
1049+
1050+
if (fdtprop && !prop)
1051+
ret = fdt_delprop(fdt, node_offset, propname);
1052+
else if (prop)
1053+
ret = fdt_setprop(fdt, node_offset, propname, prop, len);
1054+
1055+
return ret;
1056+
}
1057+
1058+
static int update_pci_dma_nodes(void *fdt, const char *dmapropname)
1059+
{
1060+
struct device_node *dn;
1061+
int pci_offset, root_offset, ret = 0;
1062+
1063+
if (!firmware_has_feature(FW_FEATURE_LPAR))
1064+
return 0;
1065+
1066+
root_offset = fdt_path_offset(fdt, "/");
1067+
for_each_node_with_property(dn, dmapropname) {
1068+
pci_offset = fdt_subnode_offset(fdt, root_offset, of_node_full_name(dn));
1069+
if (pci_offset < 0)
1070+
continue;
1071+
1072+
ret = copy_property(fdt, pci_offset, dn, "ibm,dma-window");
1073+
if (ret < 0)
1074+
break;
1075+
ret = copy_property(fdt, pci_offset, dn, dmapropname);
1076+
if (ret < 0)
1077+
break;
1078+
}
1079+
1080+
return ret;
1081+
}
1082+
10411083
/**
10421084
* setup_new_fdt_ppc64 - Update the flattend device-tree of the kernel
10431085
* being loaded.
@@ -1099,6 +1141,18 @@ int setup_new_fdt_ppc64(const struct kimage *image, void *fdt,
10991141
if (ret < 0)
11001142
goto out;
11011143

1144+
#define DIRECT64_PROPNAME "linux,direct64-ddr-window-info"
1145+
#define DMA64_PROPNAME "linux,dma64-ddr-window-info"
1146+
ret = update_pci_dma_nodes(fdt, DIRECT64_PROPNAME);
1147+
if (ret < 0)
1148+
goto out;
1149+
1150+
ret = update_pci_dma_nodes(fdt, DMA64_PROPNAME);
1151+
if (ret < 0)
1152+
goto out;
1153+
#undef DMA64_PROPNAME
1154+
#undef DIRECT64_PROPNAME
1155+
11021156
/* Update memory reserve map */
11031157
ret = get_reserved_memory_ranges(&rmem);
11041158
if (ret)

arch/powerpc/platforms/pseries/iommu.c

+48-41
Original file line numberDiff line numberDiff line change
@@ -700,6 +700,33 @@ struct iommu_table_ops iommu_table_lpar_multi_ops = {
700700
.get = tce_get_pSeriesLP
701701
};
702702

703+
/*
704+
* Find nearest ibm,dma-window (default DMA window) or direct DMA window or
705+
* dynamic 64bit DMA window, walking up the device tree.
706+
*/
707+
static struct device_node *pci_dma_find(struct device_node *dn,
708+
const __be32 **dma_window)
709+
{
710+
const __be32 *dw = NULL;
711+
712+
for ( ; dn && PCI_DN(dn); dn = dn->parent) {
713+
dw = of_get_property(dn, "ibm,dma-window", NULL);
714+
if (dw) {
715+
if (dma_window)
716+
*dma_window = dw;
717+
return dn;
718+
}
719+
dw = of_get_property(dn, DIRECT64_PROPNAME, NULL);
720+
if (dw)
721+
return dn;
722+
dw = of_get_property(dn, DMA64_PROPNAME, NULL);
723+
if (dw)
724+
return dn;
725+
}
726+
727+
return NULL;
728+
}
729+
703730
static void pci_dma_bus_setup_pSeriesLP(struct pci_bus *bus)
704731
{
705732
struct iommu_table *tbl;
@@ -712,20 +739,10 @@ static void pci_dma_bus_setup_pSeriesLP(struct pci_bus *bus)
712739
pr_debug("pci_dma_bus_setup_pSeriesLP: setting up bus %pOF\n",
713740
dn);
714741

715-
/*
716-
* Find nearest ibm,dma-window (default DMA window), walking up the
717-
* device tree
718-
*/
719-
for (pdn = dn; pdn != NULL; pdn = pdn->parent) {
720-
dma_window = of_get_property(pdn, "ibm,dma-window", NULL);
721-
if (dma_window != NULL)
722-
break;
723-
}
742+
pdn = pci_dma_find(dn, &dma_window);
724743

725-
if (dma_window == NULL) {
744+
if (dma_window == NULL)
726745
pr_debug(" no ibm,dma-window property !\n");
727-
return;
728-
}
729746

730747
ppci = PCI_DN(pdn);
731748

@@ -735,11 +752,13 @@ static void pci_dma_bus_setup_pSeriesLP(struct pci_bus *bus)
735752
if (!ppci->table_group) {
736753
ppci->table_group = iommu_pseries_alloc_group(ppci->phb->node);
737754
tbl = ppci->table_group->tables[0];
738-
iommu_table_setparms_lpar(ppci->phb, pdn, tbl,
739-
ppci->table_group, dma_window);
755+
if (dma_window) {
756+
iommu_table_setparms_lpar(ppci->phb, pdn, tbl,
757+
ppci->table_group, dma_window);
740758

741-
if (!iommu_init_table(tbl, ppci->phb->node, 0, 0))
742-
panic("Failed to initialize iommu table");
759+
if (!iommu_init_table(tbl, ppci->phb->node, 0, 0))
760+
panic("Failed to initialize iommu table");
761+
}
743762
iommu_register_group(ppci->table_group,
744763
pci_domain_nr(bus), 0);
745764
pr_debug(" created table: %p\n", ppci->table_group);
@@ -1234,7 +1253,7 @@ static bool enable_ddw(struct pci_dev *dev, struct device_node *pdn)
12341253
bool default_win_removed = false, direct_mapping = false;
12351254
bool pmem_present;
12361255
struct pci_dn *pci = PCI_DN(pdn);
1237-
struct iommu_table *tbl = pci->table_group->tables[0];
1256+
struct property *default_win = NULL;
12381257

12391258
dn = of_find_node_by_type(NULL, "ibm,pmemory");
12401259
pmem_present = dn != NULL;
@@ -1291,11 +1310,10 @@ static bool enable_ddw(struct pci_dev *dev, struct device_node *pdn)
12911310
* for extensions presence.
12921311
*/
12931312
if (query.windows_available == 0) {
1294-
struct property *default_win;
12951313
int reset_win_ext;
12961314

12971315
/* DDW + IOMMU on single window may fail if there is any allocation */
1298-
if (iommu_table_in_use(tbl)) {
1316+
if (iommu_table_in_use(pci->table_group->tables[0])) {
12991317
dev_warn(&dev->dev, "current IOMMU table in use, can't be replaced.\n");
13001318
goto out_failed;
13011319
}
@@ -1431,16 +1449,18 @@ static bool enable_ddw(struct pci_dev *dev, struct device_node *pdn)
14311449

14321450
pci->table_group->tables[1] = newtbl;
14331451

1434-
/* Keep default DMA window struct if removed */
1435-
if (default_win_removed) {
1436-
tbl->it_size = 0;
1437-
vfree(tbl->it_map);
1438-
tbl->it_map = NULL;
1439-
}
1440-
14411452
set_iommu_table_base(&dev->dev, newtbl);
14421453
}
14431454

1455+
if (default_win_removed) {
1456+
iommu_tce_table_put(pci->table_group->tables[0]);
1457+
pci->table_group->tables[0] = NULL;
1458+
1459+
/* default_win is valid here because default_win_removed == true */
1460+
of_remove_property(pdn, default_win);
1461+
dev_info(&dev->dev, "Removed default DMA window for %pOF\n", pdn);
1462+
}
1463+
14441464
spin_lock(&dma_win_list_lock);
14451465
list_add(&window->list, &dma_win_list);
14461466
spin_unlock(&dma_win_list_lock);
@@ -1505,13 +1525,7 @@ static void pci_dma_dev_setup_pSeriesLP(struct pci_dev *dev)
15051525
dn = pci_device_to_OF_node(dev);
15061526
pr_debug(" node is %pOF\n", dn);
15071527

1508-
for (pdn = dn; pdn && PCI_DN(pdn) && !PCI_DN(pdn)->table_group;
1509-
pdn = pdn->parent) {
1510-
dma_window = of_get_property(pdn, "ibm,dma-window", NULL);
1511-
if (dma_window)
1512-
break;
1513-
}
1514-
1528+
pdn = pci_dma_find(dn, &dma_window);
15151529
if (!pdn || !PCI_DN(pdn)) {
15161530
printk(KERN_WARNING "pci_dma_dev_setup_pSeriesLP: "
15171531
"no DMA window found for pci dev=%s dn=%pOF\n",
@@ -1542,7 +1556,6 @@ static void pci_dma_dev_setup_pSeriesLP(struct pci_dev *dev)
15421556
static bool iommu_bypass_supported_pSeriesLP(struct pci_dev *pdev, u64 dma_mask)
15431557
{
15441558
struct device_node *dn = pci_device_to_OF_node(pdev), *pdn;
1545-
const __be32 *dma_window = NULL;
15461559

15471560
/* only attempt to use a new window if 64-bit DMA is requested */
15481561
if (dma_mask < DMA_BIT_MASK(64))
@@ -1556,13 +1569,7 @@ static bool iommu_bypass_supported_pSeriesLP(struct pci_dev *pdev, u64 dma_mask)
15561569
* search upwards in the tree until we either hit a dma-window
15571570
* property, OR find a parent with a table already allocated.
15581571
*/
1559-
for (pdn = dn; pdn && PCI_DN(pdn) && !PCI_DN(pdn)->table_group;
1560-
pdn = pdn->parent) {
1561-
dma_window = of_get_property(pdn, "ibm,dma-window", NULL);
1562-
if (dma_window)
1563-
break;
1564-
}
1565-
1572+
pdn = pci_dma_find(dn, NULL);
15661573
if (pdn && PCI_DN(pdn))
15671574
return enable_ddw(pdev, pdn);
15681575

0 commit comments

Comments
 (0)