Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix CRAN issues for gcc 13 #36

Merged
merged 12 commits into from
Jul 3, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .Rbuildignore
Original file line number Diff line number Diff line change
Expand Up @@ -17,3 +17,4 @@
^_pkgdown\.yml$
^docs$
^pkgdown$
^Dockerfile$
11 changes: 7 additions & 4 deletions .github/workflows/build-check.yml
Original file line number Diff line number Diff line change
Expand Up @@ -28,9 +28,10 @@ jobs:
config:
- {os: macOS-latest, r: 'release'}
- {os: windows-latest, r: 'release'}
- {os: ubuntu-latest, r: 'release'}
#- {os: windows-latest, r: '3.6', rspm: "https://packagemanager.rstudio.com/cran/latest"}
#- {os: ubuntu-18.04, r: 'devel', rspm: "https://packagemanager.rstudio.com/cran/__linux__/bionic/latest", http-user-agent: "R/4.0.0 (ubuntu-18.04) R (4.0.0 x86_64-pc-linux-gnu x86_64 linux-gnu) on GitHub Actions" }
- {os: ubuntu-18.04, r: 'release', rspm: "https://packagemanager.rstudio.com/cran/__linux__/bionic/latest"}
#- {os: ubuntu-18.04, r: 'release', rspm: "https://packagemanager.rstudio.com/cran/__linux__/bionic/latest"}
#- {os: ubuntu-18.04, r: 'oldrel', rspm: "https://packagemanager.rstudio.com/cran/__linux__/bionic/latest"}
#- {os: ubuntu-18.04, r: '3.6', rspm: "https://packagemanager.rstudio.com/cran/__linux__/bionic/latest"}
#- {os: ubuntu-18.04, r: '3.5', rspm: "https://packagemanager.rstudio.com/cran/__linux__/bionic/latest"}
Expand All @@ -42,16 +43,18 @@ jobs:
GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }}

steps:
- uses: actions/checkout@v2
- uses: actions/checkout@v3
with:
submodules: recursive

- uses: r-lib/actions/setup-r@v1
- uses: r-lib/actions/setup-r@v2
with:
rtools-version: '42'
r-version: ${{ matrix.config.r }}
http-user-agent: ${{ matrix.config.http-user-agent }}
use-public-rspm: true

- uses: r-lib/actions/setup-pandoc@v1
- uses: r-lib/actions/setup-pandoc@v2

- name: Query dependencies
run: |
Expand Down
4 changes: 2 additions & 2 deletions .github/workflows/lint-project.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -16,11 +16,11 @@ jobs:
env:
GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }}
steps:
- uses: actions/checkout@v2
- uses: actions/checkout@v3
with:
submodules: recursive

- uses: r-lib/actions/setup-r@v1
- uses: r-lib/actions/setup-r@v2

- name: Install lintr
run: install.packages("lintr")
Expand Down
6 changes: 3 additions & 3 deletions .github/workflows/pkgdown.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -14,13 +14,13 @@ jobs:
env:
GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }}
steps:
- uses: actions/checkout@v2
- uses: actions/checkout@v3
with:
submodules: recursive

- uses: r-lib/actions/setup-r@v1
- uses: r-lib/actions/setup-r@v2

- uses: r-lib/actions/setup-pandoc@v1
- uses: r-lib/actions/setup-pandoc@v2

- name: Query dependencies
run: |
Expand Down
4 changes: 3 additions & 1 deletion .vscode/settings.json
Original file line number Diff line number Diff line change
Expand Up @@ -102,6 +102,8 @@
"geodaweight.h": "c",
"filesystem": "cpp",
"numbers": "cpp",
"semaphore": "cpp"
"semaphore": "cpp",
"__bits": "cpp",
"__verbose_abort": "cpp"
}
}
6 changes: 3 additions & 3 deletions DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
Package: rgeoda
Type: Package
Title: R Library for Spatial Data Analysis
Version: 0.0.9
Date: 2022-04-09
Version: 0.0.10-4
Date: 2023-07-01
Authors@R:
c(person(given = "Xun", family = "Li", email="lixun910@gmail.com", role=c("aut","cre")),
person(given = "Luc", family = "Anselin", email="anselin@uchicago.edu", role="aut"))
Expand Down Expand Up @@ -39,4 +39,4 @@ Encoding: UTF-8
Suggests:
wkb,
sp
SystemRequirements: C++14
SystemRequirements: C++17
7 changes: 7 additions & 0 deletions Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
FROM rocker/r-base

ENV DEBIAN_FRONTEND noninteractive

RUN apt-get update && apt-get install -y git libssl-dev libgeos-dev libgeos++-dev gdal-bin libproj-dev libgdal-dev libudunits2-dev

RUN install2.r --error proxy Rcpp wk sp digest sf BH wkb TinyTex
43 changes: 30 additions & 13 deletions R/clustering.R
Original file line number Diff line number Diff line change
Expand Up @@ -4,15 +4,26 @@
#' similar values for features of interest.
#' @param k The number of clusters
#' @param w An instance of Weight class
#' @param df A data frame with selected variables only. E.g. guerry[c("Crm_prs", "Crm_prp", "Litercy")]
#' @param df A data frame with selected variables only.
#' E.g. guerry[c("Crm_prs", "Crm_prp", "Litercy")]
#' @param bound_variable (optional) A data frame with selected bound variable
#' @param min_bound (optional) A minimum bound value that applies to all clusters
#' @param scale_method One of the scaling methods {'raw', 'standardize', 'demean', 'mad', 'range_standardize', 'range_adjust'} to apply on input data. Default is 'standardize' (Z-score normalization).
#' @param distance_method (optional) The distance method used to compute the distance betwen observation i and j. Defaults to "euclidean". Options are "euclidean" and "manhattan"
#' @param random_seed (int,optional) The seed for random number generator. Defaults to 123456789.
#' @param cpu_threads (optional) The number of cpu threads used for parallel computation
#' @param rdist (optional) The distance matrix (lower triangular matrix, column wise storage)
#' @return A names list with names "Clusters", "Total sum of squares", "Within-cluster sum of squares", "Total within-cluster sum of squares", and "The ratio of between to total sum of squares".
#' @param min_bound (optional) A minimum bound value that applies to all
#' clusters
#' @param scale_method One of the scaling methods {'raw', 'standardize',
#' 'demean', 'mad', 'range_standardize', 'range_adjust'} to apply on input data.
#' Default is 'standardize' (Z-score normalization).
#' @param distance_method (optional) The distance method used to compute the
#' distance betwen observation i and j. Defaults to "euclidean". Options are
#' "euclidean" and "manhattan"
#' @param random_seed (int,optional) The seed for random number generator.
#' Defaults to 123456789.
#' @param cpu_threads (optional) The number of cpu threads used for parallel
#' computation
#' @param rdist (optional) The distance matrix (lower triangular matrix,
#' column wise storage)
#' @return A names list with names "Clusters", "Total sum of squares",
#' "Within-cluster sum of squares", "Total within-cluster sum of squares",
#' and "The ratio of between to total sum of squares".
#' @examples
#' library(sf)
#' guerry_path <- system.file("extdata", "Guerry.shp", package = "rgeoda")
Expand All @@ -22,12 +33,15 @@
#' guerry_clusters <- skater(4, queen_w, data)
#' guerry_clusters
#' @export
skater <- function(k, w, df, bound_variable=data.frame(), min_bound=0, scale_method="standardize", distance_method="euclidean", random_seed=123456789, cpu_threads=6, rdist=numeric()) {
skater <- function(k, w, df, bound_variable=data.frame(), min_bound=0,
scale_method="standardize", distance_method="euclidean",
random_seed=123456789, cpu_threads=6, rdist=numeric()) {
if (w$num_obs < 1) {
stop("The weights is not valid.")
}
if (k <1 && k > w$num_obs) {
stop("The number of clusters should be a positive integer number, which is less than the number of observations.")
stop("The number of clusters should be a positive integer number, which is
less than the number of observations.")
}
if (inherits(df, "data.frame") == FALSE) {
stop("The input data needs to be a data.frame.")
Expand All @@ -43,9 +57,11 @@ skater <- function(k, w, df, bound_variable=data.frame(), min_bound=0, scale_met
stop("The data.frame is empty.")
}

scale_methods <- c('raw', 'standardize', 'demean', 'mad', 'range_standardize', 'range_adjust')
scale_methods <- c('raw', 'standardize', 'demean', 'mad', 'range_standardize',
'range_adjust')
if (!(scale_method %in% scale_methods)) {
stop("The scale_method has to be one of {'raw', 'standardize', 'demean', 'mad', 'range_standardize', 'range_adjust'}")
stop("The scale_method has to be one of {'raw', 'standardize', 'demean',
'mad', 'range_standardize', 'range_adjust'}")
}

if (distance_method != "euclidean" && distance_method != "manhattan") {
Expand All @@ -56,7 +72,8 @@ skater <- function(k, w, df, bound_variable=data.frame(), min_bound=0, scale_met
if (length(bound_variable) > 0) {
bound_values <- bound_variable[[1]]
}
return(p_skater(k, w$GetPointer(), df, n_vars, scale_method, distance_method, bound_values, min_bound, random_seed, cpu_threads, rdist))
return(p_skater(k, w$GetPointer(), df, n_vars, scale_method, distance_method,
bound_values, min_bound, random_seed, cpu_threads, rdist))
}


Expand Down
4 changes: 2 additions & 2 deletions R/sf_geoda.R
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
# Create a random string (internally used)
# The input is a positive number, indicating the number of items to choose from.
random_string <- function(n = 5000) {
a <- do.call(paste0, replicate(5, sample(LETTERS, n, TRUE), FALSE))
paste0(a, sprintf("%04d", sample(9999, n, TRUE)), sample(LETTERS, n, TRUE))
a <- do.call(paste0, replicate(10, sample(LETTERS, n, TRUE), FALSE))
return(a)
}


Expand Down
27 changes: 19 additions & 8 deletions man/skater.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion src/Makevars
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ PKG_CPPFLAGS=\
PKG_LIBS=\
-pthread

CXX_STD=CXX14
CXX_STD=CXX17

CPP_SRC_FILES = \
$(RGEODALIB)/libgeoda.cpp \
Expand Down
2 changes: 1 addition & 1 deletion src/Makevars.win
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ PKG_LIBS=\
-L$(RWINLIB)/lib$(R_ARCH) \
-pthread

CXX_STD = CXX14
CXX_STD = CXX17

CPP_SRC_FILES = \
$(RGEODALIB)/libgeoda.cpp \
Expand Down
2 changes: 0 additions & 2 deletions src/rcpp_clustering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -81,10 +81,8 @@ Rcpp::List p_skater(int k, SEXP xp_w, Rcpp::List& data, int n_vars, std::string
int num_obs = w->GetNumObs();
double** dist_matrix = rdist_matrix(num_obs, rdist);

Rcout << "aaa" << dist_matrix;
std::vector<std::vector<int> > cluster_ids = gda_skater(k, w, raw_data, scale_method, distance_method, raw_bound, min_bound, seed, cpu_threads, dist_matrix);

Rcout << "after gda_skater";
if (dist_matrix) {
for (int i = 1; i < num_obs; i++) {
free(dist_matrix[i]);
Expand Down
34 changes: 19 additions & 15 deletions tests/testthat/test-clustering.R
Original file line number Diff line number Diff line change
Expand Up @@ -63,10 +63,14 @@ testthat::test_that("schc", {
testthat::expect_equal(clusters[[5]], 0.2147711255)
})

# NOTE!!!!!!!!!
# The results are computed using Boost library 1.58.0.
# To pass the following test cases
# , please install BH package version==1.58.0
# NOTE
# The previous results are computed using Boost library 1.58.0.
# The new results are computed using Boost library 1.81.0.1
# The differences are caused by the different implementation of
# boost::unordered_map: he keys in boost::unordered_map are not ordered and
# have different orders in the two Boost versions. This involves a different
# mechanism of randomness in max-p algorithm when picking which area or region
# to process.

testthat::test_that("azp_greedy", {
library(sf)
Expand All @@ -78,7 +82,7 @@ testthat::test_that("azp_greedy", {

azp_clusters <- azp_greedy(5, queen_w, data)

testthat::expect_equal(azp_clusters[[5]], 0.3598541)
testthat::expect_equal(azp_clusters[[5]], 0.36, tolerance = 1e-3)

bound_variable <- guerry["Pop1831"]
min_bound <- 3236.67 # 10% of Pop1831
Expand All @@ -87,7 +91,7 @@ testthat::test_that("azp_greedy", {
bound_variable = bound_variable,
min_bound = min_bound)

testthat::expect_equal(azp_clusters[[5]], 0.3980921835)
testthat::expect_equal(azp_clusters[[5]], 0.417, tolerance = 1e-3)

})

Expand All @@ -101,7 +105,7 @@ testthat::test_that("azp_sa", {

azp_clusters <- azp_sa(5, queen_w, data, cooling_rate = 0.85, sa_maxit = 1)

testthat::expect_equal(azp_clusters[[5]], 0.4211363)
testthat::expect_equal(azp_clusters[[5]], 0.359, tolerance = 1e-3)
})

testthat::test_that("azp_tabu", {
Expand Down Expand Up @@ -129,9 +133,9 @@ testthat::test_that("maxp_greedy", {
bound_vals <- guerry["Pop1831"]
min_bound <- 3236.67 # 10% of Pop1831

#clusters <- maxp_greedy(queen_w, data, bound_vals, min_bound)
clusters <- maxp_greedy(queen_w, data, bound_vals, min_bound)

#testthat::expect_equal(clusters[[5]], 0.4499671068)
testthat::expect_equal(clusters[[5]], 0.484, tolerance = 1e-3)
})

testthat::test_that("maxp_sa", {
Expand All @@ -145,10 +149,10 @@ testthat::test_that("maxp_sa", {
bound_vals <- guerry["Pop1831"]
min_bound <- 3236.67 # 10% of Pop1831

#clusters <- maxp_sa(queen_w, data, bound_vals, min_bound,
# cooling_rate = 0.85, sa_maxit = 1)
clusters <- maxp_sa(queen_w, data, bound_vals, min_bound,
cooling_rate = 0.85, sa_maxit = 1)

#testthat::expect_equal(clusters[[5]], 0.4585352223)
testthat::expect_equal(clusters[[5]], 0.496, tolerance = 1e-3)
})

testthat::test_that("maxp_tabu", {
Expand All @@ -163,9 +167,9 @@ testthat::test_that("maxp_tabu", {
min_bound <- 3236.67 # 10% of Pop1831


#clusters <- maxp_tabu(queen_w, data, bound_vals, min_bound,
# tabu_length = 10, conv_tabu = 10)
clusters <- maxp_tabu(queen_w, data, bound_vals, min_bound,
tabu_length = 10, conv_tabu = 10)

#testthat::expect_equal(clusters[[5]], 0.4893668149)
testthat::expect_equal(clusters[[5]], 0.478, tolerance = 1e-3)

})