Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

join_ald_scenario() works with upper/lower sector/technology #172

Closed
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,9 @@

User-facing

* `join_ald_scenario()` now convert `sector_ald` and `technology` to lower case
(#172). This results in the expected matches in the rare case when the values
of these columns are not exclusively lower case.
* `target_sda()` now aggregates input `ald` by `technology` and `plant_location`
prior to calculating targets (@QianFeng2020 #160).

Expand Down
10 changes: 6 additions & 4 deletions R/join_ald_scenario.R
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,8 @@ join_ald_scenario <- function(data,
region_isos <- region_isos %>%
rename(scenario_source = .data$source)

ald <- modify_at_(ald, "sector", tolower)
ald <- modify_at_(ald, "technology", tolower)
data %>%
left_join(ald, by = ald_columns()) %>%
inner_join(scenario, by = scenario_columns()) %>%
Expand All @@ -59,26 +61,26 @@ join_ald_scenario <- function(data,
}

warn_if_has_zero_rows <- function(data, message) {
if (nrow(data) == 0L) warn(message)
if (nrow(data) == 0L) warn(message = message, class = "has_zero_rows")

invisible(data)
}

check_portfolio_ald_scenario <- function(valid_matches, ald, scenario) {
check_crucial_names(valid_matches, names(ald_columns()))
walk(names(ald_columns()), ~ check_no_value_is_missing(valid_matches, .x))
walk_(names(ald_columns()), ~ check_no_value_is_missing(valid_matches, .x))

check_crucial_names(
ald, c("name_company", "plant_location", unname(scenario_columns()))
)
walk(
walk_(
c("name_company", unname(scenario_columns())),
~ check_no_value_is_missing(ald, .x)
)


check_crucial_names(scenario, c(scenario_columns(), "scenario_source", "region"))
walk(
walk_(
c(scenario_columns(), "scenario_source", "region"),
~ check_no_value_is_missing(scenario, .x)
)
Expand Down
2 changes: 1 addition & 1 deletion R/summarize_weighted_production.R
Original file line number Diff line number Diff line change
Expand Up @@ -119,7 +119,7 @@ add_weighted_loan_metric <- function(data, use_credit_limit, percent_change) {
}

check_crucial_names(data, crucial)
walk(crucial, ~ check_no_value_is_missing(data, .x))
walk_(crucial, ~ check_no_value_is_missing(data, .x))

if (percent_change) {
check_zero_initial_production(data)
Expand Down
2 changes: 1 addition & 1 deletion R/target_market_share.R
Original file line number Diff line number Diff line change
Expand Up @@ -66,7 +66,7 @@ target_market_share <- function(data,
crucial_scenario <- c("scenario", "tmsr", "smsp")
check_crucial_names(scenario, crucial_scenario)
check_crucial_names(ald, "is_ultimate_owner")
walk(crucial_scenario, ~ check_no_value_is_missing(scenario, .x))
walk_(crucial_scenario, ~ check_no_value_is_missing(scenario, .x))

summary_groups <- maybe_add_name_ald(
c("scenario", "tmsr", "smsp", "region", "scenario_source"),
Expand Down
10 changes: 4 additions & 6 deletions R/target_sda.R
Original file line number Diff line number Diff line change
Expand Up @@ -108,13 +108,13 @@ target_sda <- function(data,

check_crucial_names(data, crucial_portfolio)
check_unique_id(data, "id_loan")
walk(crucial_portfolio, ~ check_no_value_is_missing(data, .x))
walk_(crucial_portfolio, ~ check_no_value_is_missing(data, .x))

check_crucial_names(ald, crucial_ald)
walk(crucial_ald, ~ check_no_value_is_missing(ald, .x))
walk_(crucial_ald, ~ check_no_value_is_missing(ald, .x))

check_crucial_names(co2_intensity_scenario, crucial_scenario)
walk(crucial_scenario, ~ check_no_value_is_missing(co2_intensity_scenario, .x))
walk_(crucial_scenario, ~ check_no_value_is_missing(co2_intensity_scenario, .x))

ald_aggregation_columns <- c("technology", "plant_location")
ald_by_sector <- aggregate_ald_by_columns(ald, ald_aggregation_columns)
Expand All @@ -133,7 +133,7 @@ target_sda <- function(data,
)

if (identical(nrow(loanbook_with_weighted_emission_factors), 0L)) {
rlang::warn("Found no match between loanbook and ald.")
warn("Found no match between loanbook and ald.", class = "no_match")
return(empty_target_sda_output())
}

Expand Down Expand Up @@ -191,7 +191,6 @@ calculate_weighted_emission_factor <- function(data,
...,
use_credit_limit = FALSE,
by_company = FALSE) {

data %>%
inner_join(ald, by = ald_columns()) %>%
add_loan_weighted_emission_factor(
Expand All @@ -204,7 +203,6 @@ calculate_weighted_emission_factor <- function(data,
) %>%
ungroup() %>%
rename(sector = .data$sector_ald)

}

add_loan_weighted_emission_factor <- function(data, use_credit_limit, by_company = FALSE) {
Expand Down
8 changes: 7 additions & 1 deletion R/utils.R
Original file line number Diff line number Diff line change
Expand Up @@ -27,12 +27,18 @@ warn_grouped <- function(data, message) {
}

# Avoid dependency on purrr
walk <- function(.x, .f, ...) {
walk_ <- function(.x, .f, ...) {
.f <- rlang::as_function(.f)
lapply(.x, .f, ...)
invisible(.x)
}

# Avoid dependency on purrr
modify_at_ <- function(.x, .at, .f) {
.x[[.at]] <- .f(.x[[.at]])
.x
}

# We can remove this once we depend on R >= 3.5. See ?backports::isTRUE
isTRUE <- function(x) {
is.logical(x) && length(x) == 1L && !is.na(x) && x
Expand Down
49 changes: 46 additions & 3 deletions tests/testthat/test-join_ald_scenario.R
Original file line number Diff line number Diff line change
Expand Up @@ -99,13 +99,13 @@ test_that("is case-insensitive to `plant_location` inputs", {

test_that("outputs a number of rows equal to matches by `scenario_source`", {
matching_0 <- expect_warning(
class = "has_zero_row",
join_ald_scenario(
fake_matched(),
ald = fake_ald(plant_location = "a"),
scenario = fake_scenario(region = "b", scenario_source = "c"),
region_isos = tibble(isos = "a", region = "b", source = "-")
)
),
"region_isos.*0 row",
)
expect_equal(nrow(matching_0), 0L)

Expand Down Expand Up @@ -190,20 +190,27 @@ test_that("warns 0-rows caused by scenario or region_isos", {
bad_scenario <- fake_scenario(
region = l$region, scenario_source = l$source, sector = "bad"
)
expect_warning(join_ald_scenario2(l, bad_scenario), "scenario")
# There are more than one warnings; this catches the first one, the rest
# buttle up so we need to suppress them.
suppressWarnings(
expect_warning(join_ald_scenario2(l, bad_scenario), class = "has_zero_rows")
)

bad_region1 <- tibble(region = "bad", isos = l$isos, source = l$source)
expect_warning(
class = "has_zero_rows",
join_ald_scenario2(l, region_isos = bad_region1), "region_isos"
)

bad_region2 <- tibble(region = l$region, isos = "bad", source = l$source)
expect_warning(
class = "has_zero_rows",
join_ald_scenario2(l, region_isos = bad_region2), "region_isos"
)

bad_region3 <- tibble(region = l$region, isos = l$isos, source = "bad")
expect_warning(
class = "has_zero_rows",
join_ald_scenario2(l, region_isos = bad_region3), "region_isos"
)
})
Expand Down Expand Up @@ -235,3 +242,39 @@ test_that("include/excludes `plant_location` inside/outside a region", {
# The output excludes locations outside matching regions
expect_false(any(unique(out$plant_location) %in% "us"))
})

test_that("outputs the same with upper/lower ald$sector or ald$technology", {
# From r2dii.match fake_lbk()
lbk <- tibble(
sector_classification_system = c("NACE"),
id_ultimate_parent = c("UP15"),
name_ultimate_parent = c("Alpine Knits India Pvt. Limited", NA),
id_direct_loantaker = c("C294"),
name_direct_loantaker = c("Yuamen Xinneng Thermal Power Co Ltd", NA),
sector_classification_direct_loantaker = c(3511),
id_loan = c(1)
)
# Based on r2dii.match fake_ald()
ald <- tibble(
name_company = "alpine knits india pvt. limited",
sector = "power",
alias_ald = "alpineknitsindiapvt ltd",
plant_location = "dm",
technology = "renewablescap",
year = 2020
)
matched <- prioritize(match_name(lbk, ald))

scenario <- r2dii.data::scenario_demo_2020
regions <- r2dii.data::region_isos_demo

out_lower <- join_ald_scenario(matched, ald, scenario, regions)

upper_sector <- modify_at_(ald, "sector", toupper)
out_upper <- join_ald_scenario(matched, upper_sector, scenario, regions)
expect_equal(out_upper, out_lower)

upper_technology <- modify_at_(ald, "technology", toupper)
out_upper <- join_ald_scenario(matched, upper_technology, scenario, regions)
expect_equal(out_upper, out_lower)
})
1 change: 1 addition & 0 deletions tests/testthat/test-target_sda.R
Original file line number Diff line number Diff line change
Expand Up @@ -253,6 +253,7 @@ test_that("with known input outputs as expected, at company level (#155)", {
test_that("with no matching data warns", {
no_matches <- fake_matched(sector_ald = "bad")
expect_warning(
class = "no_match",
target_sda(no_matches, fake_ald(), fake_co2_scenario()), "no match"
)

Expand Down