diff --git a/DESCRIPTION b/DESCRIPTION index ed5486bf..24b3e4f9 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -99,6 +99,7 @@ Collate: 'models-mobilenetv3.R' 'models-mobilenetv3_large.R' 'models-resnet.R' + 'models-rfdetr_detection.R' 'models-vgg.R' 'models-vit.R' 'ops-box_convert.R' @@ -115,4 +116,4 @@ Depends: R (>= 3.5) LazyData: true VignetteBuilder: knitr -Config/roxygen2/version: 8.0.0 +RoxygenNote: 8.0.0 diff --git a/NAMESPACE b/NAMESPACE index 5c0fdcfb..ca911dae 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -169,6 +169,13 @@ export(model_resnet34) export(model_resnet50) export(model_resnext101_32x8d) export(model_resnext50_32x4d) +export(model_rfdetr_base) +export(model_rfdetr_base_2) +export(model_rfdetr_base_o365) +export(model_rfdetr_large) +export(model_rfdetr_medium) +export(model_rfdetr_nano) +export(model_rfdetr_small) export(model_vgg11) export(model_vgg11_bn) export(model_vgg13) @@ -263,20 +270,28 @@ importFrom(torch,nn_batch_norm2d) importFrom(torch,nn_conv2d) importFrom(torch,nn_conv_transpose2d) importFrom(torch,nn_dropout) +importFrom(torch,nn_embedding) importFrom(torch,nn_gelu) importFrom(torch,nn_hardsigmoid) importFrom(torch,nn_hardswish) importFrom(torch,nn_identity) +importFrom(torch,nn_init_constant_) +importFrom(torch,nn_init_ones_) +importFrom(torch,nn_init_trunc_normal_) +importFrom(torch,nn_init_xavier_uniform_) +importFrom(torch,nn_init_zeros_) importFrom(torch,nn_layer_norm) importFrom(torch,nn_linear) importFrom(torch,nn_max_pool2d) importFrom(torch,nn_module) importFrom(torch,nn_module_dict) importFrom(torch,nn_module_list) +importFrom(torch,nn_multihead_attention) importFrom(torch,nn_parameter) importFrom(torch,nn_prelu) importFrom(torch,nn_relu) importFrom(torch,nn_sequential) +importFrom(torch,nn_silu) importFrom(torch,nn_softmax) importFrom(torch,nnf_gelu) importFrom(torch,nnf_grid_sample) @@ -284,17 +299,20 @@ importFrom(torch,nnf_interpolate) importFrom(torch,nnf_layer_norm) importFrom(torch,nnf_normalize) importFrom(torch,nnf_relu) +importFrom(torch,nnf_silu) importFrom(torch,nnf_softmax) importFrom(torch,torch_arange) importFrom(torch,torch_cat) importFrom(torch,torch_chunk) importFrom(torch,torch_clamp) +importFrom(torch,torch_cumsum) importFrom(torch,torch_empty) importFrom(torch,torch_flatten) importFrom(torch,torch_float) importFrom(torch,torch_float32) importFrom(torch,torch_int32) importFrom(torch,torch_linspace) +importFrom(torch,torch_log) importFrom(torch,torch_log2) importFrom(torch,torch_long) importFrom(torch,torch_matmul) @@ -305,6 +323,7 @@ importFrom(torch,torch_ones) importFrom(torch,torch_randn) importFrom(torch,torch_sigmoid) importFrom(torch,torch_stack) +importFrom(torch,torch_sum) importFrom(torch,torch_tensor) importFrom(torch,torch_zeros) importFrom(torch,torch_zeros_like) diff --git a/NEWS.md b/NEWS.md index f7c934a6..30cbfd1e 100644 --- a/NEWS.md +++ b/NEWS.md @@ -3,6 +3,7 @@ ## Bug fixes and improvements * `nms()` now uses `torchvisionlib::ops_nms()` when torchvisionlib is installed, speeding up inference for `model_fasterrcnn_*()` and `model_maskrcnn_*()` (#321, #322). +* Added RF-DETR (Nano, Small, Medium, Base, Base-v2, Base-O365, Large) object detection models with DINOv2 backbone and deformable-attention decoder (@DerrickUnleashed, #327). # torchvision 0.9.0 diff --git a/R/models-rfdetr_detection.R b/R/models-rfdetr_detection.R new file mode 100644 index 00000000..fe665c13 --- /dev/null +++ b/R/models-rfdetr_detection.R @@ -0,0 +1,1617 @@ +#' RF-DETR Implementation +#' +#' RF-DETR: Neural Architecture Search for Real-Time Detection Transformers +#' ([https://arxiv.org/abs/2511.09554](https://arxiv.org/abs/2511.09554)) +#' +#' Object detection transformer models combining a DINOv2 backbone +#' (windowed attention, register tokens) with a deformable-attention +#' decoder and two-stage query proposal. Supports Nano, Small, Medium, +#' Base, Base with O365 pretraining, and Large variants. +#' +#' ## Model Variants +#' ``` +#' | Variant | Backbone | Decoder Layers | Resolution | # Queries | Group DETR | Weights | +#' |-----------|--------------------|----------------|------------|-----------|------------|-----------------------------| +#' | nano | DINOv2 Small (win) | 2 | 384 | 300 | 13 | COCO (91 classes) | +#' | small | DINOv2 Small (win) | 2 | 512 | 300 | 13 | COCO (91 classes) | +#' | medium | DINOv2 Small (win) | 2 | 640 | 300 | 13 | COCO (91 classes) | +#' | base | DINOv2 Small (win) | 3 | 640 | 300 | 13 | COCO (91 classes) | +#' | base_2 | DINOv2 Small (win) | 3 | 640 | 300 | 13 | COCO (91 classes, alt run) | +#' | base_o365 | DINOv2 Small (win) | 3 | 640 | 300 | 13 | Objects365 (366 classes) | +#' | large | DINOv2 Base (win) | 3 | 560 | 300 | 13 | COCO (91 classes) | +#' ``` +#' - All models use group DETR (group_detr=13) with two-stage query proposal, +#' Lite Refpoint Refine, and BBox reparameterisation. +#' - The `large` variant corresponds to the deprecated RF-DETR-Large config +#' (DINOv2 Base encoder, hidden_dim=384). +#' +#' @inheritParams model_mobilenet_v2 +#' +#' @family object_detection_model +#' @rdname model_rfdetr +#' @name model_rfdetr +NULL + +rfdetr_torchscript_urls <- list( + rfdetr_nano = c("https://torch-cdn.mlverse.org/models/vision/v2/models/rf_detr_nano.pth", "f995ded00af2036196c9d4148da1532d", "116 MB"), + rfdetr_small = c("https://torch-cdn.mlverse.org/models/vision/v2/models/rf_detr_small.pth", "b9ea5f60a04f07efb51097db3815e397", "116 MB"), + rfdetr_medium = c("https://torch-cdn.mlverse.org/models/vision/v2/models/rf_detr_medium.pth", "6d836fae193ca043f269b6c38b415791", "116 MB"), + rfdetr_base = c("https://torch-cdn.mlverse.org/models/vision/v2/models/rf_detr_base.pth", "7652e8b739b9e59479eaface8f48a285", "123 MB"), + rfdetr_base_2 = c("https://torch-cdn.mlverse.org/models/vision/v2/models/rf_detr_base_2.pth", "a281798e2562ebb4ec38140a767cde7d", "123 MB"), + rfdetr_base_o365 = c("https://torch-cdn.mlverse.org/models/vision/v2/models/rf_detr_base_o365.pth", "2d409686b30615418bf0849bcac11863", "127 MB"), + rfdetr_large = c("https://torch-cdn.mlverse.org/models/vision/v2/models/rf_detr_large.pth", "d77db0c7d6df39c93b0077a136ba07a3", "518 MB") +) + +get_clones <- function(module, n) { + nn_module_list(lapply(seq_len(n), function(i) module$clone(deep = TRUE))) +} + +dinov2_patch_embeddings <- nn_module( + "dinov2_patch_embeddings", + initialize = function(hidden_size, patch_size, num_channels = 3) { + self$projection <- nn_conv2d(num_channels, hidden_size, kernel_size = patch_size, stride = patch_size) + }, + forward = function(pixel_values) { + embeddings <- self$projection(pixel_values) + embeddings <- embeddings$flatten(start_dim = 3, end_dim = 4) + embeddings <- embeddings$transpose(2, 3) + embeddings + } +) + +windowed_dinov2_embeddings <- nn_module( + "windowed_dinov2_embeddings", + initialize = function(config) { + self$cls_token <- nn_parameter(torch_randn(1, 1, config$hidden_size)) + self$mask_token <- nn_parameter(torch_zeros(1, config$hidden_size)) + if (!is.null(config$num_register_tokens) && config$num_register_tokens > 0) { + self$register_tokens <- nn_parameter(torch_zeros(1, config$num_register_tokens, config$hidden_size)) + } + self$patch_embeddings <- dinov2_patch_embeddings( + config$hidden_size, config$patch_size, config$num_channels %||% 3 + ) + num_patches <- (config$image_size %/% config$patch_size)^2 + self$position_embeddings <- nn_parameter(torch_randn(1, num_patches + 1, config$hidden_size)) + self$dropout <- nn_dropout(p = config$hidden_dropout_prob %||% 0.0) + self$patch_size <- config$patch_size + self$num_windows <- config$num_windows %||% 1 + self$num_register_tokens <- config$num_register_tokens %||% 0 + self$config <- config + }, + interpolate_pos_encoding = function(embeddings, height, width) { + num_patches <- embeddings$size(2) - 1 + num_positions <- self$position_embeddings$size(2) - 1 + if (num_patches == num_positions && height == width) { + return(self$position_embeddings) + } + class_pos_embed <- self$position_embeddings[, 1, ] + patch_pos_embed <- self$position_embeddings[, 2:(num_positions + 1), , drop = FALSE] + dim <- embeddings$size(3) + h <- height %/% self$config$patch_size + w <- width %/% self$config$patch_size + sqrt_n <- as.integer(sqrt(num_positions)) + patch_pos_embed <- patch_pos_embed$reshape(c(1, sqrt_n, sqrt_n, dim)) + patch_pos_embed <- patch_pos_embed$permute(c(1, 4, 2, 3)) + target_dtype <- patch_pos_embed$dtype + patch_pos_embed <- nnf_interpolate( + patch_pos_embed$to(dtype = torch_float32()), + size = c(h, w), + mode = "bicubic", + align_corners = FALSE + )$to(dtype = target_dtype) + patch_pos_embed <- patch_pos_embed$permute(c(1, 3, 4, 2))$reshape(c(1, -1, dim)) + torch_cat(list(class_pos_embed$unsqueeze(1), patch_pos_embed), dim = 2) + }, + forward = function(pixel_values) { + batch_size <- pixel_values$size(1) + height <- pixel_values$size(3) + width <- pixel_values$size(4) + target_dtype <- self$patch_embeddings$projection$weight$dtype + embeddings <- self$patch_embeddings(pixel_values$to(dtype = target_dtype)) + cls_tokens <- self$cls_token$expand(c(batch_size, -1, -1)) + embeddings <- torch_cat(list(cls_tokens, embeddings), dim = 2) + embeddings <- embeddings + self$interpolate_pos_encoding(embeddings, height, width) + if (self$num_windows > 1) { + num_h_patches <- height %/% self$patch_size + num_w_patches <- width %/% self$patch_size + cls_token_with_pos <- embeddings[, 1, , drop = FALSE] + pixel_tokens <- embeddings[, 2:embeddings$size(2), , drop = FALSE] + pixel_tokens <- pixel_tokens$view(c(batch_size, num_h_patches, num_w_patches, -1)) + num_h_ppw <- num_h_patches %/% self$num_windows + num_w_ppw <- num_w_patches %/% self$num_windows + nw <- self$num_windows + windowed_pixel <- pixel_tokens$reshape(c( + batch_size * nw, num_h_ppw, nw, num_w_ppw, -1 + )) + windowed_pixel <- windowed_pixel$permute(c(1, 3, 2, 4, 5)) + windowed_pixel <- windowed_pixel$reshape(c( + batch_size * nw^2, num_h_ppw * num_w_ppw, -1 + )) + windowed_cls <- cls_token_with_pos$'repeat'(c(nw^2, 1, 1)) + embeddings <- torch_cat(list(windowed_cls, windowed_pixel), dim = 2) + } + if (self$num_register_tokens > 0) { + reg_tokens <- self$register_tokens$expand(c(embeddings$size(1), -1, -1)) + embeddings <- torch_cat(list( + embeddings[, 1, , drop = FALSE], reg_tokens, embeddings[, 2:embeddings$size(2), , drop = FALSE] + ), dim = 2) + } + embeddings <- self$dropout(embeddings) + embeddings + } +) + +dinov2_self_attention <- nn_module( + "dinov2_self_attention", + initialize = function(config) { + self$num_attention_heads <- config$num_attention_heads + self$attention_head_size <- config$hidden_size %/% config$num_attention_heads + self$all_head_size <- self$num_attention_heads * self$attention_head_size + self$query <- nn_linear(config$hidden_size, self$all_head_size, bias = config$qkv_bias %||% TRUE) + self$key <- nn_linear(config$hidden_size, self$all_head_size, bias = config$qkv_bias %||% TRUE) + self$value <- nn_linear(config$hidden_size, self$all_head_size, bias = config$qkv_bias %||% TRUE) + self$dropout <- nn_dropout(p = config$attention_probs_dropout_prob %||% 0.0) + }, + transpose_for_scores = function(x) { + new_shape <- c(x$size()[-length(x$size())], self$num_attention_heads, self$attention_head_size) + x <- x$reshape(new_shape) + x$permute(c(1, 3, 2, 4)) + }, + forward = function(hidden_states) { + mixed_query_layer <- self$query(hidden_states) + key_layer <- self$transpose_for_scores(self$key(hidden_states)) + value_layer <- self$transpose_for_scores(self$value(hidden_states)) + query_layer <- self$transpose_for_scores(mixed_query_layer) + attention_scores <- query_layer$matmul(key_layer$transpose(-2, -1)) + attention_scores <- attention_scores / sqrt(self$attention_head_size) + attention_probs <- nnf_softmax(attention_scores, dim = -1) + attention_probs <- self$dropout(attention_probs) + context_layer <- attention_probs$matmul(value_layer) + context_layer <- context_layer$permute(c(1, 3, 2, 4)) + context_layer <- context_layer$reshape(c( + context_layer$size()[1:2], self$all_head_size + )) + list(context_layer, attention_probs) + } +) + +dinov2_self_output <- nn_module( + "dinov2_self_output", + initialize = function(config) { + self$dense <- nn_linear(config$hidden_size, config$hidden_size) + self$dropout <- nn_dropout(p = config$hidden_dropout_prob %||% 0.0) + }, + forward = function(hidden_states) { + hidden_states <- self$dense(hidden_states) + hidden_states <- self$dropout(hidden_states) + hidden_states + } +) + +dinov2_attention <- nn_module( + "dinov2_attention", + initialize = function(config) { + self$attention <- dinov2_self_attention(config) + self$output <- dinov2_self_output(config) + }, + forward = function(hidden_states) { + self_outputs <- self$attention(hidden_states) + attention_output <- self$output(self_outputs[[1]]) + list(attention_output, self_outputs[[2]]) + } +) + +dinov2_layerscale <- nn_module( + "dinov2_layerscale", + initialize = function(config) { + self$lambda1 <- nn_parameter(config$layerscale_value %||% 1.0 * torch_ones(config$hidden_size)) + }, + forward = function(hidden_state) { + hidden_state * self$lambda1 + } +) + +dinov2_mlp <- nn_module( + "dinov2_mlp", + initialize = function(config) { + in_features <- out_features <- config$hidden_size + hidden_features <- as.integer(config$hidden_size * config$mlp_ratio %||% 4) + self$fc1 <- nn_linear(in_features, hidden_features, bias = TRUE) + self$activation <- nn_gelu() + self$fc2 <- nn_linear(hidden_features, out_features, bias = TRUE) + }, + forward = function(hidden_state) { + hidden_state <- self$fc1(hidden_state) + hidden_state <- self$activation(hidden_state) + hidden_state <- self$fc2(hidden_state) + hidden_state + } +) + +windowed_dinov2_layer <- nn_module( + "windowed_dinov2_layer", + initialize = function(config) { + self$num_windows <- config$num_windows %||% 1 + self$norm1 <- nn_layer_norm(config$hidden_size, eps = config$layer_norm_eps %||% 1e-6) + self$attention <- dinov2_attention(config) + self$layer_scale1 <- dinov2_layerscale(config) + self$norm2 <- nn_layer_norm(config$hidden_size, eps = config$layer_norm_eps %||% 1e-6) + self$mlp <- if (config$use_swiglu_ffn %||% FALSE) { + dinov2_swiglu_mlp(config) + } else { + dinov2_mlp(config) + } + self$layer_scale2 <- dinov2_layerscale(config) + }, + forward = function(hidden_states, run_full_attention = FALSE) { + shortcut <- hidden_states + if (run_full_attention) { + bw <- hidden_states$size(1) + tpw <- hidden_states$size(2) + c <- hidden_states$size(3) + nws <- self$num_windows^2 + hidden_states <- hidden_states$reshape(c(bw %/% nws, nws * tpw, c)) + } + norm1_out <- self$norm1(hidden_states) + attn_outputs <- self$attention(norm1_out) + attention_output <- attn_outputs[[1]] + if (run_full_attention) { + bw <- hidden_states$size(1) + tpw <- hidden_states$size(2) + c <- hidden_states$size(3) + nws <- self$num_windows^2 + attention_output <- attention_output$reshape(c(bw * nws, tpw %/% nws, c)) + } + attention_output <- self$layer_scale1(attention_output) + hidden_states <- attention_output + shortcut + layer_output <- self$norm2(hidden_states) + layer_output <- self$mlp(layer_output) + layer_output <- self$layer_scale2(layer_output) + layer_output <- layer_output + hidden_states + list(layer_output) + } +) + +dinov2_swiglu_mlp <- nn_module( + "dinov2_swiglu_mlp", + initialize = function(config) { + in_features <- out_features <- config$hidden_size + hidden_features <- as.integer(config$hidden_size * config$mlp_ratio %||% 4) + hidden_features <- as.integer((hidden_features * 2 / 3 + 7) %/% 8 * 8) + self$weights_in <- nn_linear(in_features, 2 * hidden_features, bias = TRUE) + self$weights_out <- nn_linear(hidden_features, out_features, bias = TRUE) + }, + forward = function(hidden_state) { + hidden_state <- self$weights_in(hidden_state) + x1 <- hidden_state$chunk(2, dim = -1)[[1]] + x2 <- hidden_state$chunk(2, dim = -1)[[2]] + hidden <- nnf_silu(x1) * x2 + self$weights_out(hidden) + } +) + +windowed_dinov2_encoder <- nn_module( + "windowed_dinov2_encoder", + initialize = function(config) { + self$config <- config + self$layer <- nn_module_list(lapply(seq_len(config$num_hidden_layers), function(i) { + windowed_dinov2_layer(config) + })) + }, + forward = function(hidden_states, output_hidden_states = FALSE) { + all_hidden_states <- list() + out_feat_idx <- as.integer(gsub("stage", "", self$config$out_features)) + for (i in seq_along(self$layer)) { + if (output_hidden_states) { + all_hidden_states[[length(all_hidden_states) + 1]] <- hidden_states + } + if (i > max(out_feat_idx)) break + run_full_attention <- !((i - 1) %in% (self$config$window_block_indexes %||% list())) + layer_outputs <- self$layer[[i]](hidden_states, run_full_attention) + hidden_states <- layer_outputs[[1]] + } + if (output_hidden_states) { + all_hidden_states[[length(all_hidden_states) + 1]] <- hidden_states + } + list(hidden_states, all_hidden_states) + } +) + +windowed_dinov2_backbone_hf <- nn_module( + "windowed_dinov2_backbone_hf", + initialize = function(config) { + self$embeddings <- windowed_dinov2_embeddings(config) + self$encoder <- windowed_dinov2_encoder(config) + self$layernorm <- nn_layer_norm(config$hidden_size, eps = config$layer_norm_eps %||% 1e-6) + self$config <- config + self$num_register_tokens <- config$num_register_tokens %||% 0 + self$stage_names <- c("stem", paste0("stage", 1:config$num_hidden_layers)) + self$out_features <- config$out_features + self$apply_layernorm <- config$apply_layernorm %||% TRUE + self$reshape_hidden_states <- config$reshape_hidden_states %||% TRUE + }, + forward = function(pixel_values) { + embedding_output <- self$embeddings(pixel_values) + encoder_outputs <- self$encoder(embedding_output, output_hidden_states = TRUE) + hidden_states <- encoder_outputs[[2]] + feature_maps <- list() + for (i in seq_along(self$stage_names)) { + stage <- self$stage_names[i] + if (stage %in% self$out_features && i <= length(hidden_states)) { + hs <- hidden_states[[i]] + if (self$apply_layernorm) { + hs <- self$layernorm(hs) + } + if (self$reshape_hidden_states) { + if (self$num_register_tokens > 0) { + hs <- hs[, (2 + self$num_register_tokens):hs$size(2), , drop = FALSE] + } else { + hs <- hs[, 2:hs$size(2), , drop = FALSE] + } + batch_size <- pixel_values$size(1) + height <- pixel_values$size(3) + width <- pixel_values$size(4) + patch_size <- self$config$patch_size + num_h_patches <- height %/% patch_size + num_w_patches <- width %/% patch_size + num_windows <- self$config$num_windows %||% 1 + if (num_windows > 1) { + nws <- num_windows^2 + bw <- hs$size(1) + tpw <- hs$size(2) + c <- hs$size(3) + nh_ppw <- num_h_patches %/% num_windows + nw_ppw <- num_w_patches %/% num_windows + hs <- hs$reshape(c(bw %/% nws, nws * tpw, c)) + hs <- hs$reshape(c( + (bw %/% nws) * num_windows, num_windows, nh_ppw, nw_ppw, c + )) + hs <- hs$permute(c(1, 3, 2, 4, 5)) + } + hs <- hs$reshape(c(batch_size, num_h_patches, num_w_patches, -1)) + hs <- hs$permute(c(1, 4, 2, 3))$contiguous() + } + feature_maps[[length(feature_maps) + 1]] <- hs + } + } + list(feature_maps) + } +) + +projector_layernorm <- nn_module( + "projector_layernorm", + initialize = function(normalized_shape, eps = 1e-6) { + self$weight <- nn_parameter(torch_ones(normalized_shape)) + self$bias <- nn_parameter(torch_zeros(normalized_shape)) + self$eps <- eps + self$normalized_shape <- c(normalized_shape) + }, + forward = function(x) { + x <- x$permute(c(1, 3, 4, 2)) + x <- nnf_layer_norm(x, self$normalized_shape, self$weight, self$bias, self$eps) + x <- x$permute(c(1, 4, 2, 3)) + x + } +) + +convx <- nn_module( + "convx", + initialize = function(in_planes, out_planes, kernel = 3, stride = 1, groups = 1, + act = "silu", layer_norm = FALSE) { + if (length(kernel) == 1) kernel <- c(kernel, kernel) + padding <- c(kernel[1] %/% 2, kernel[2] %/% 2) + self$conv <- nn_conv2d(in_planes, out_planes, kernel_size = kernel, stride = stride, + padding = padding, groups = groups, bias = FALSE) + if (layer_norm) { + self$bn <- projector_layernorm(out_planes) + } else { + self$bn <- nn_batch_norm2d(out_planes) + } + self$act <- if (act == "silu") nn_silu(inplace = TRUE) else nn_relu(inplace = TRUE) + }, + forward = function(x) { + self$act(self$bn(self$conv(x$contiguous()))) + } +) + +rfdetr_bottleneck <- nn_module( + "rfdetr_bottleneck", + initialize = function(c1, c2, shortcut = TRUE, g = 1, k = c(3, 3), e = 0.5, act = "silu", layer_norm = FALSE) { + c_ <- as.integer(c2 * e) + self$cv1 <- convx(c1, c_, kernel = k[1], stride = 1, act = act, layer_norm = layer_norm) + self$cv2 <- convx(c_, c2, kernel = k[2], stride = 1, groups = g, act = act, layer_norm = layer_norm) + self$add <- shortcut && c1 == c2 + }, + forward = function(x) { + out <- self$cv2(self$cv1(x)) + if (self$add) x + out else out + } +) + +c2f <- nn_module( + "c2f", + initialize = function(c1, c2, n = 1, shortcut = FALSE, g = 1, e = 0.5, act = "silu", layer_norm = FALSE) { + c <- as.integer(c2 * e) + self$c <- c + self$cv1 <- convx(c1, 2 * c, kernel = 1, stride = 1, act = act, layer_norm = layer_norm) + self$cv2 <- convx((2 + n) * c, c2, kernel = 1, act = act, layer_norm = layer_norm) + self$m <- nn_module_list(lapply(seq_len(n), function(i) { + rfdetr_bottleneck(c, c, shortcut, g, k = c(3, 3), e = 1.0, act = act, layer_norm = layer_norm) + })) + }, + forward = function(x) { + y <- self$cv1(x)$split(self$c, dim = 2) + y <- as.list(y) + for (i in seq_along(self$m)) { + y[[length(y) + 1]] <- self$m[[i]](y[[length(y)]]) + } + self$cv2(torch_cat(y, dim = 2)) + } +) + +multiscale_projector <- nn_module( + "multiscale_projector", + initialize = function(in_channels, out_channels, scale_factors, num_blocks = 3, + layer_norm = FALSE) { + self$scale_factors <- scale_factors + stages_sampling <- list() + for (i in seq_along(scale_factors)) { + scale <- scale_factors[i] + level_modules <- list() + for (j in seq_along(in_channels)) { + in_dim <- in_channels[j] + if (scale == 1.0) { + level_modules[[j]] <- nn_sequential(nn_identity()) + } else if (scale == 2.0) { + level_modules[[j]] <- nn_sequential( + nn_conv_transpose2d(in_dim, in_dim %/% 2, kernel_size = 2, stride = 2) + ) + } else if (scale == 0.5) { + level_modules[[j]] <- nn_sequential( + convx(in_dim, in_dim, kernel = 3, stride = 2, act = "silu", layer_norm = layer_norm) + ) + } else { + level_modules[[j]] <- nn_sequential(nn_identity()) + } + } + stages_sampling[[i]] <- nn_module_list(level_modules) + } + self$stages_sampling <- nn_module_list(stages_sampling) + self$stages <- nn_module_list() + for (scale in scale_factors) { + in_dim <- as.integer(sum(in_channels / max(1, scale))) + layers <- nn_sequential( + c2f(in_dim, out_channels, num_blocks, layer_norm = layer_norm), + projector_layernorm(out_channels) + ) + self$stages$append(layers) + } + }, + forward = function(x) { + results <- list() + for (i in seq_along(self$stages)) { + feat_fuse <- list() + for (j in seq_along(x)) { + feat_fuse[[length(feat_fuse) + 1]] <- self$stages_sampling[[i]][[j]](x[[j]]) + } + if (length(feat_fuse) > 1) { + feat_fuse <- torch_cat(feat_fuse, dim = 2) + } else { + feat_fuse <- feat_fuse[[1]] + } + results[[length(results) + 1]] <- self$stages[[i]](feat_fuse) + } + results + } +) + +position_embedding_sine <- nn_module( + "position_embedding_sine", + initialize = function(num_pos_feats = 128, temperature = 10000, normalize = TRUE, scale = NULL) { + self$num_pos_feats <- num_pos_feats + self$temperature <- temperature + self$normalize <- normalize + if (is.null(scale)) scale <- 2 * pi + self$scale <- scale + }, + forward = function(x, align_dim_orders = FALSE) { + if (is.list(x) && !is.null(x$mask)) { + mask <- x$mask + } else { + mask <- NULL + } + if (!is.null(mask)) { + not_mask <- !mask + } else { + not_mask <- torch_ones(c(x$size(1), x$size(3), x$size(4)), device = x$device, dtype = torch_bool()) + } + y_embed <- torch_cumsum(not_mask$to(dtype = torch_float32()), dim = 2) + x_embed <- torch_cumsum(not_mask$to(dtype = torch_float32()), dim = 3) + if (self$normalize) { + eps <- 1e-6 + y_embed <- y_embed / (y_embed[, -1, , drop = FALSE] + eps) * self$scale + x_embed <- x_embed / (x_embed[, , -1, drop = FALSE] + eps) * self$scale + } + dim_t <- torch_arange(0, self$num_pos_feats - 1, dtype = torch_float32(), device = x$device) + dim_t <- self$temperature^(2 * (dim_t %/% 2) / self$num_pos_feats) + pos_x <- x_embed$unsqueeze(4) / dim_t + pos_y <- y_embed$unsqueeze(4) / dim_t + pos_x <- torch_stack(list(pos_x[, , , seq(1, self$num_pos_feats, 2)]$sin(), + pos_x[, , , seq(2, self$num_pos_feats, 2)]$cos()), dim = 5)$flatten(start_dim = 4) + pos_y <- torch_stack(list(pos_y[, , , seq(1, self$num_pos_feats, 2)]$sin(), + pos_y[, , , seq(2, self$num_pos_feats, 2)]$cos()), dim = 5)$flatten(start_dim = 4) + if (align_dim_orders) { + pos <- torch_cat(list(pos_y, pos_x), dim = 4)$permute(c(3, 4, 1, 2)) + } else { + pos <- torch_cat(list(pos_y, pos_x), dim = 4)$permute(c(1, 4, 2, 3)) + } + pos + } +) + +rfdetr_dinov2 <- nn_module( + "rfdetr_dinov2", + initialize = function(size = "small", out_feature_indexes = c(3, 6, 9, 12), + image_size = 518, patch_size = 14, num_windows = 2, + hidden_size = 384, num_attention_heads = 6, + num_hidden_layers = 12, num_register_tokens = 4, + drop_path_rate = 0.0) { + config <- list( + hidden_size = hidden_size, + num_hidden_layers = num_hidden_layers, + num_attention_heads = num_attention_heads, + mlp_ratio = 4, + hidden_act = "gelu", + hidden_dropout_prob = 0.0, + attention_probs_dropout_prob = 0.0, + layer_norm_eps = 1e-6, + image_size = image_size, + patch_size = patch_size, + num_channels = 3, + qkv_bias = TRUE, + layerscale_value = 1.0, + drop_path_rate = drop_path_rate, + use_swiglu_ffn = FALSE, + num_register_tokens = num_register_tokens, + num_windows = num_windows, + window_block_indexes = setdiff(0:max(out_feature_indexes), out_feature_indexes), + out_features = paste0("stage", out_feature_indexes), + apply_layernorm = TRUE, + reshape_hidden_states = TRUE + ) + self$config <- config + self$encoder <- windowed_dinov2_backbone_hf(config) + self$out_feature_channels <- rep(hidden_size, length(out_feature_indexes)) + }, + forward = function(x) { + out <- self$encoder(x) + out[[1]] + } +) + +rfdetr_backbone <- nn_module( + "rfdetr_backbone", + initialize = function(encoder_name = "dinov2_windowed_small", + out_feature_indexes = c(3, 6, 9, 12), + hidden_dim = 256, layer_norm = TRUE, + target_shape = c(640, 640), patch_size = 16, + num_windows = 2, num_register_tokens = 0, + drop_path = 0.0, projector_scale = c(1.0)) { + size_configs <- list( + tiny = list(hidden_size = 192, num_heads = 3, num_layers = 12), + small = list(hidden_size = 384, num_heads = 6, num_layers = 12), + base = list(hidden_size = 768, num_heads = 12, num_layers = 12), + large = list(hidden_size = 1024, num_heads = 16, num_layers = 24) + ) + name_parts <- strsplit(encoder_name, "_")[[1]] + size <- name_parts[length(name_parts)] + sc <- size_configs[[size]] + self$encoder <- rfdetr_dinov2( + size = size, + out_feature_indexes = out_feature_indexes, + image_size = target_shape[1] %/% patch_size * patch_size, + patch_size = patch_size, + num_windows = num_windows, + hidden_size = sc$hidden_size, + num_attention_heads = sc$num_heads, + num_hidden_layers = sc$num_layers, + num_register_tokens = num_register_tokens, + drop_path_rate = drop_path + ) + self$patch_size <- patch_size + self$num_windows <- num_windows + self$projector <- multiscale_projector( + in_channels = rep(sc$hidden_size, length(out_feature_indexes)), + out_channels = hidden_dim, + scale_factors = projector_scale, + num_blocks = 3, + layer_norm = layer_norm + ) + self$out_channels <- hidden_dim + self$hidden_dim <- hidden_dim + }, + forward = function(x, mask = NULL) { + divisor <- self$patch_size * self$num_windows + h <- x$size(3) %/% divisor * divisor + w <- x$size(4) %/% divisor * divisor + if (h != x$size(3) || w != x$size(4)) { + x <- nnf_interpolate(x, size = c(h, w), mode = "bilinear", align_corners = FALSE) + } + feats <- self$encoder(x) + feats <- self$projector(feats) + if (!is.null(mask)) { + out <- list() + for (feat in feats) { + m <- nnf_interpolate(mask$unsqueeze(1)$float(), size = feat$shape[3:4])$to(dtype = torch_bool())[1, , ] + out[[length(out) + 1]] <- list(tensors = feat, mask = m) + } + out + } else { + feats + } + } +) + +rfdetr_joiner <- nn_module( + "rfdetr_joiner", + initialize = function(backbone, position_embedding) { + self[["0"]] <- backbone + self[["1"]] <- position_embedding + }, + forward = function(x, mask = NULL) { + if (is.list(x) && !is.null(x$tensors)) { + mask <- x$mask + x <- x$tensors + } + feats <- self[["0"]](x, mask) + if (length(feats) > 0 && is.list(feats[[1]]) && !is.null(feats[[1]]$tensors)) { + pos <- list() + for (i in seq_along(feats)) { + pos[[i]] <- self[["1"]](feats[[i]]$tensors, align_dim_orders = FALSE) + } + list(feats, pos) + } else { + pos <- list() + for (feat in feats) { + pos[[length(pos) + 1]] <- self[["1"]](feat, align_dim_orders = FALSE) + } + list(feats, pos) + } + } +) + +ms_deform_attn_core_pytorch <- function(value, spatial_shapes, sampling_locations, attention_weights, + spatial_shapes_hw = NULL) { + batch_size <- value$size(1) + n_heads <- value$size(2) + head_dim <- value$size(3) + len_query <- sampling_locations$size(2) + n_levels <- sampling_locations$size(4) + n_points <- sampling_locations$size(5) + shapes <- if (!is.null(spatial_shapes_hw)) spatial_shapes_hw else spatial_shapes + n_shapes <- if (is.list(shapes)) length(shapes) else shapes$size(1) + sizes <- lapply(seq_len(n_shapes), function(i) { + if (is.list(shapes)) as.integer(shapes[[i]][1] * shapes[[i]][2]) + else as.integer(shapes[i, 1]$item() * shapes[i, 2]$item()) + }) + value_list <- value$split(sizes, dim = 4) + sampling_grids <- 2 * sampling_locations - 1 + sampling_value_list <- list() + for (lvl in seq_len(n_levels)) { + h <- if (is.list(shapes)) shapes[[lvl]][1] else as.integer(shapes[lvl, 1]$item()) + w <- if (is.list(shapes)) shapes[[lvl]][2] else as.integer(shapes[lvl, 2]$item()) + value_l <- value_list[[lvl]]$view(c(batch_size * n_heads, head_dim, h, w)) + sampling_grid_l <- sampling_grids[, , , lvl, , , drop = FALSE] + sampling_grid_l <- sampling_grid_l$squeeze(4) + sampling_grid_l <- sampling_grid_l$transpose(2, 3) + sampling_grid_l <- sampling_grid_l$flatten(start_dim = 1, end_dim = 2) + sampling_value_l <- nnf_grid_sample( + value_l, sampling_grid_l, mode = "bilinear", + padding_mode = "zeros", align_corners = FALSE + ) + sampling_value_list[[lvl]] <- sampling_value_l + } + attention_weights <- attention_weights$transpose(2, 3) + attention_weights <- attention_weights$reshape(c(batch_size * n_heads, 1, len_query, n_levels * n_points)) + sampling_values <- torch_stack(sampling_value_list, dim = -2)$flatten(start_dim = -2) + output <- (sampling_values * attention_weights)$sum(dim = -1) + output <- output$view(c(batch_size, n_heads * head_dim, len_query)) + output$transpose(2, 3)$contiguous() +} + +ms_deform_attn <- nn_module( + "ms_deform_attn", + initialize = function(d_model = 256, n_levels = 4, n_heads = 8, n_points = 4) { + self$d_model <- d_model + self$n_levels <- n_levels + self$n_heads <- n_heads + self$n_points <- n_points + self$sampling_offsets <- nn_linear(d_model, n_heads * n_levels * n_points * 2) + self$attention_weights <- nn_linear(d_model, n_heads * n_levels * n_points) + self$value_proj <- nn_linear(d_model, d_model) + self$output_proj <- nn_linear(d_model, d_model) + self$reset_parameters() + }, + reset_parameters = function() { + nn_init_constant_(self$sampling_offsets$weight, 0) + thetas <- torch_arange(0, self$n_heads - 1, dtype = torch_float32()) * (2 * pi / self$n_heads) + grid_init <- torch_stack(list(thetas$cos(), thetas$sin()), dim = -1) + grid_init <- grid_init / grid_init$abs()$max(dim = -1, keepdim = TRUE)[[1]] + grid_init <- grid_init$view(c(self$n_heads, 1, 1, 2))$'repeat'(c(1, self$n_levels, self$n_points, 1)) + for (i in seq_len(self$n_points)) { + grid_init[, , i, ] <- grid_init[, , i, ] * i + } + self$sampling_offsets$bias <- nn_parameter(grid_init$view(-1)) + nn_init_constant_(self$attention_weights$weight, 0) + nn_init_constant_(self$attention_weights$bias, 0) + nn_init_xavier_uniform_(self$value_proj$weight) + nn_init_constant_(self$value_proj$bias, 0) + nn_init_xavier_uniform_(self$output_proj$weight) + nn_init_constant_(self$output_proj$bias, 0) + }, + forward = function(query, reference_points, input_flatten, input_spatial_shapes, + input_level_start_index, input_padding_mask = NULL, + input_spatial_shapes_hw = NULL) { + batch_size <- query$size(1) + len_query <- query$size(2) + value <- self$value_proj(input_flatten) + if (!is.null(input_padding_mask)) { + value <- value$masked_fill(input_padding_mask$unsqueeze(3), 0) + } + sampling_offsets <- self$sampling_offsets(query)$view(c( + batch_size, len_query, self$n_heads, self$n_levels, self$n_points, 2 + )) + attention_weights <- self$attention_weights(query)$view(c( + batch_size, len_query, self$n_heads, self$n_levels * self$n_points + )) + if (reference_points$size(-1) == 2) { + offset_normalizer <- torch_stack(list( + input_spatial_shapes[, 2], input_spatial_shapes[, 1] + ), dim = -1) + sampling_locations <- reference_points$unsqueeze(3)$unsqueeze(5) + + sampling_offsets / offset_normalizer$unsqueeze(1)$unsqueeze(1)$unsqueeze(4) + } else { + sampling_locations <- reference_points[, , NULL, , NULL, 1:2] + + sampling_offsets / self$n_points * reference_points[, , NULL, , NULL, 3:4] * 0.5 + } + attention_weights <- nnf_softmax(attention_weights, dim = -1) + value <- value$transpose(2, 3)$contiguous()$view(c( + batch_size, self$n_heads, self$d_model %/% self$n_heads, -1 + )) + output <- ms_deform_attn_core_pytorch( + value, input_spatial_shapes, sampling_locations, attention_weights, + input_spatial_shapes_hw + ) + self$output_proj(output) + } +) + +gen_sineembed_for_position <- function(pos_tensor, dim = 128) { + scale <- 2 * pi + dim_t <- torch_arange(0, dim - 1, dtype = pos_tensor$dtype, device = pos_tensor$device) + dim_t <- 10000^(2 * (dim_t %/% 2) / dim) + x_embed <- pos_tensor[, , 1] * scale + y_embed <- pos_tensor[, , 2] * scale + pos_x <- x_embed$unsqueeze(3) / dim_t + pos_y <- y_embed$unsqueeze(3) / dim_t + pos_x <- torch_stack(list(pos_x[, , seq(1, dim, 2)]$sin(), pos_x[, , seq(2, dim, 2)]$cos()), dim = 4)$flatten(start_dim = 3) + pos_y <- torch_stack(list(pos_y[, , seq(1, dim, 2)]$sin(), pos_y[, , seq(2, dim, 2)]$cos()), dim = 4)$flatten(start_dim = 3) + if (pos_tensor$size(-1) == 4) { + w_embed <- pos_tensor[, , 3] * scale + h_embed <- pos_tensor[, , 4] * scale + pos_w <- w_embed$unsqueeze(3) / dim_t + pos_h <- h_embed$unsqueeze(3) / dim_t + pos_w <- torch_stack(list(pos_w[, , seq(1, dim, 2)]$sin(), pos_w[, , seq(2, dim, 2)]$cos()), dim = 4)$flatten(start_dim = 3) + pos_h <- torch_stack(list(pos_h[, , seq(1, dim, 2)]$sin(), pos_h[, , seq(2, dim, 2)]$cos()), dim = 4)$flatten(start_dim = 3) + pos <- torch_cat(list(pos_y, pos_x, pos_w, pos_h), dim = 3) + } else { + pos <- torch_cat(list(pos_y, pos_x), dim = 3) + } + pos +} + +gen_encoder_output_proposals <- function(memory, memory_padding_mask, spatial_shapes, unsigmoid = TRUE) { + proposals <- list() + cur <- 1 + for (lvl in seq_len(nrow(spatial_shapes))) { + h <- as.integer(spatial_shapes[lvl, 1]) + w <- as.integer(spatial_shapes[lvl, 2]) + if (!is.null(memory_padding_mask)) { + mask_flatten <- memory_padding_mask[, cur:(cur + h * w - 1)] + mask_flatten <- mask_flatten$view(c(-1, h, w, 1)) + valid_height <- torch_sum((!mask_flatten[, , 1, 1])$to(dtype = torch_int()), dim = 2) + valid_width <- torch_sum((!mask_flatten[, 1, , 1])$to(dtype = torch_int()), dim = 2) + } + grid_y <- torch_linspace(0, h - 1, h, dtype = torch_float32(), device = memory$device) + grid_x <- torch_linspace(0, w - 1, w, dtype = torch_float32(), device = memory$device) + grid <- torch_stack(torch_meshgrid(list(grid_x, grid_y), indexing = "ij"), dim = -1) + scale <- torch_tensor(c(w, h), dtype = torch_float32(), device = grid$device)$unsqueeze(1)$unsqueeze(1) + grid <- (grid$unsqueeze(1) + 0.5) / scale + wh <- torch_ones_like(grid) * 0.05 * (2^(lvl - 1)) + proposal <- torch_cat(list(grid, wh), dim = -1)$reshape(c(-1, h * w, 4)) + proposals[[lvl]] <- proposal + cur <- cur + h * w + } + output_proposals <- torch_cat(proposals, dim = 2) + output_proposals_valid <- (output_proposals > 0.01 & output_proposals < 0.99)$all(dim = -1, keepdim = TRUE) + if (unsigmoid) { + output_proposals <- torch_log(output_proposals / (1 - output_proposals)) + if (!is.null(memory_padding_mask)) { + output_proposals <- output_proposals$masked_fill(memory_padding_mask$unsqueeze(3), Inf) + } + output_proposals <- output_proposals$masked_fill(!output_proposals_valid, Inf) + } else { + if (!is.null(memory_padding_mask)) { + output_proposals <- output_proposals$masked_fill(memory_padding_mask$unsqueeze(3), 0) + } + output_proposals <- output_proposals$masked_fill(!output_proposals_valid, 0) + } + output_memory <- memory + if (!is.null(memory_padding_mask)) { + output_memory <- output_memory$masked_fill(memory_padding_mask$unsqueeze(3), 0) + } + output_memory <- output_memory$masked_fill(!output_proposals_valid, 0) + list(output_memory, output_proposals) +} + +mlp_module <- nn_module( + "mlp_module", + initialize = function(input_dim, hidden_dim, output_dim, num_layers) { + self$num_layers <- num_layers + h <- rep(hidden_dim, num_layers - 1) + dims <- c(input_dim, h, output_dim) + self$layers <- nn_module_list(lapply(seq_len(num_layers), function(i) { + nn_linear(dims[i], dims[i + 1]) + })) + }, + forward = function(x) { + for (i in seq_len(self$num_layers)) { + x <- self$layers[[i]](x) + if (i < self$num_layers) x <- nnf_relu(x) + } + x + } +) + +rfdetr_decoder_layer <- nn_module( + "rfdetr_decoder_layer", + initialize = function(d_model = 256, sa_nhead = 8, ca_nhead = 16, + dim_feedforward = 2048, dropout = 0.0, + group_detr = 1, num_feature_levels = 1, + dec_n_points = 2) { + self$self_attn <- nn_multihead_attention(d_model, sa_nhead, dropout = dropout, batch_first = TRUE) + self$dropout1 <- nn_dropout(dropout) + self$norm1 <- nn_layer_norm(d_model) + self$cross_attn <- ms_deform_attn(d_model, n_levels = num_feature_levels, n_heads = ca_nhead, n_points = dec_n_points) + self$linear1 <- nn_linear(d_model, dim_feedforward) + self$dropout <- nn_dropout(dropout) + self$linear2 <- nn_linear(dim_feedforward, d_model) + self$norm2 <- nn_layer_norm(d_model) + self$norm3 <- nn_layer_norm(d_model) + self$dropout2 <- nn_dropout(dropout) + self$dropout3 <- nn_dropout(dropout) + self$activation <- nn_relu() + self$group_detr <- group_detr + }, + with_pos_embed = function(tensor, pos) { + if (is.null(pos)) tensor else tensor + pos + }, + forward = function(tgt, memory, tgt_mask = NULL, memory_key_padding_mask = NULL, + pos = NULL, query_pos = NULL, query_sine_embed = NULL, + is_first = FALSE, reference_points = NULL, + spatial_shapes = NULL, level_start_index = NULL, + spatial_shapes_hw = NULL) { + bs <- tgt$size(1) + num_queries <- tgt$size(2) + q <- k <- tgt + query_pos + v <- tgt + if (self$training) { + q <- torch_cat(q$split(num_queries %/% self$group_detr, dim = 2), dim = 1) + k <- torch_cat(k$split(num_queries %/% self$group_detr, dim = 2), dim = 1) + v <- torch_cat(v$split(num_queries %/% self$group_detr, dim = 2), dim = 1) + } + tgt2 <- self$self_attn(q, k, v, attn_mask = tgt_mask, need_weights = FALSE)[[1]] + if (self$training) { + tgt2 <- torch_cat(tgt2$split(bs, dim = 1), dim = 2) + } + tgt <- tgt + self$dropout1(tgt2) + tgt <- self$norm1(tgt) + tgt2 <- self$cross_attn( + self$with_pos_embed(tgt, query_pos), + reference_points, + memory, + spatial_shapes, + level_start_index, + memory_key_padding_mask, + spatial_shapes_hw + ) + tgt <- tgt + self$dropout2(tgt2) + tgt <- self$norm2(tgt) + tgt2 <- self$linear2(self$dropout(self$activation(self$linear1(tgt)))) + tgt <- tgt + self$dropout3(tgt2) + tgt <- self$norm3(tgt) + tgt + } +) + +rfdetr_decoder <- nn_module( + "rfdetr_decoder", + initialize = function(decoder_layer, num_layers, norm = NULL, + return_intermediate = FALSE, d_model = 256, + lite_refpoint_refine = FALSE, bbox_reparam = FALSE) { + self$layers <- get_clones(decoder_layer, num_layers) + self$num_layers <- num_layers + self$norm <- norm + self$return_intermediate <- return_intermediate + self$lite_refpoint_refine <- lite_refpoint_refine + self$bbox_reparam <- bbox_reparam + self$ref_point_head <- mlp_module(2 * d_model, d_model, d_model, 2) + }, + refpoints_refine = function(refpoints_unsigmoid, new_refpoints_delta) { + if (self$bbox_reparam) { + new_cxcy <- new_refpoints_delta[, , 1:2] * refpoints_unsigmoid[, , 3:4] + refpoints_unsigmoid[, , 1:2] + new_wh <- new_refpoints_delta[, , 3:4]$exp() * refpoints_unsigmoid[, , 3:4] + torch_cat(list(new_cxcy, new_wh), dim = -1) + } else { + refpoints_unsigmoid + new_refpoints_delta + } + }, + get_reference = function(refpoints, valid_ratios, d_model_half) { + obj_center <- refpoints[, , 1:4, drop = FALSE] + refpoints_input <- obj_center$unsqueeze(3) * torch_cat(list(valid_ratios, valid_ratios), dim = -1)$unsqueeze(2) + query_sine_embed <- gen_sineembed_for_position(refpoints_input[, , 1, ], d_model_half) + query_pos <- self$ref_point_head(query_sine_embed) + list(obj_center, refpoints_input, query_pos, query_sine_embed) + }, + forward = function(tgt, memory, memory_key_padding_mask = NULL, pos = NULL, + refpoints_unsigmoid = NULL, level_start_index = NULL, + spatial_shapes = NULL, valid_ratios = NULL, + spatial_shapes_hw = NULL) { + output <- tgt + intermediate <- list() + hs_refpoints <- list(refpoints_unsigmoid) + d_model_half <- as.integer(memory$size(3) / 2) + if (self$lite_refpoint_refine) { + if (self$bbox_reparam) { + ref_info <- self$get_reference(refpoints_unsigmoid, valid_ratios, d_model_half) + } else { + ref_info <- self$get_reference(refpoints_unsigmoid$sigmoid(), valid_ratios, d_model_half) + } + } + for (layer_id in seq_len(self$num_layers)) { + if (!self$lite_refpoint_refine) { + if (self$bbox_reparam) { + ref_info <- self$get_reference(refpoints_unsigmoid, valid_ratios, d_model_half) + } else { + ref_info <- self$get_reference(refpoints_unsigmoid$sigmoid(), valid_ratios, d_model_half) + } + } + query_pos <- ref_info[[3]] + output <- self$layers[[layer_id]]( + output, memory, + tgt_mask = NULL, + memory_key_padding_mask = memory_key_padding_mask, + pos = pos, + query_pos = query_pos, + query_sine_embed = ref_info[[4]], + is_first = (layer_id == 1), + reference_points = ref_info[[2]], + spatial_shapes = spatial_shapes, + level_start_index = level_start_index, + spatial_shapes_hw = spatial_shapes_hw + ) + if (self$return_intermediate) { + intermediate[[layer_id]] <- if (!is.null(self$norm)) self$norm(output) else output + } + } + if (!is.null(self$norm)) { + output <- self$norm(output) + } + if (self$return_intermediate) { + intermediate[[self$num_layers]] <- output + } + if (self$return_intermediate) { + list(torch_stack(intermediate, dim = 1), refpoints_unsigmoid$unsqueeze(1)) + } else { + list(output$unsqueeze(1)) + } + } +) + +rfdetr_transformer <- nn_module( + "rfdetr_transformer", + initialize = function(d_model = 256, sa_nhead = 8, ca_nhead = 16, + num_queries = 300, num_decoder_layers = 6, + dim_feedforward = 2048, dropout = 0.0, + return_intermediate_dec = TRUE, + group_detr = 1, two_stage = FALSE, + num_feature_levels = 1, dec_n_points = 2, + lite_refpoint_refine = FALSE, + bbox_reparam = FALSE) { + self$d_model <- d_model + self$dec_layers <- num_decoder_layers + self$group_detr <- group_detr + self$num_feature_levels <- num_feature_levels + self$bbox_reparam <- bbox_reparam + decoder_layer <- rfdetr_decoder_layer( + d_model, sa_nhead, ca_nhead, dim_feedforward, dropout, + group_detr, num_feature_levels, dec_n_points + ) + self$decoder <- rfdetr_decoder( + decoder_layer, num_decoder_layers, + norm = nn_layer_norm(d_model), + return_intermediate = return_intermediate_dec, + d_model = d_model, + lite_refpoint_refine = lite_refpoint_refine, + bbox_reparam = bbox_reparam + ) + if (two_stage) { + self$enc_output <- nn_module_list(lapply(seq_len(group_detr), function(i) nn_linear(d_model, d_model))) + self$enc_output_norm <- nn_module_list(lapply(seq_len(group_detr), function(i) nn_layer_norm(d_model))) + } + self$two_stage <- two_stage + }, + get_valid_ratio = function(mask) { + mask <- mask$to(dtype = torch_float32()) + valid_height <- torch_sum((1 - mask[, , 1]), dim = 2) + valid_width <- torch_sum((1 - mask[, 1, ]), dim = 2) + valid_ratio_h <- valid_height$float() / mask$size(2) + valid_ratio_w <- valid_width$float() / mask$size(3) + torch_stack(list(valid_ratio_w, valid_ratio_h), dim = -1) + }, + forward = function(srcs, masks, pos_embeds, refpoint_embed, query_feat) { + src_flatten <- list() + mask_flatten <- list() + lvl_pos_embed_flatten <- list() + spatial_shapes <- list() + for (lvl in seq_along(srcs)) { + src <- srcs[[lvl]] + c <- src$size(2) + h <- src$size(3) + w <- src$size(4) + spatial_shapes[[lvl]] <- c(h, w) + src <- src$flatten(start_dim = 3)$transpose(2, 3) + pos_embed <- pos_embeds[[lvl]]$flatten(start_dim = 3)$transpose(2, 3) + lvl_pos_embed_flatten[[lvl]] <- pos_embed + src_flatten[[lvl]] <- src + if (!is.null(masks)) { + mask_flatten[[lvl]] <- masks[[lvl]]$flatten(start_dim = 2) + } + } + memory <- torch_cat(src_flatten, dim = 2) + spatial_shapes_t <- torch_tensor( + matrix(unlist(spatial_shapes), ncol = 2, byrow = TRUE), + dtype = torch_int64(), device = memory$device + ) + n_spatial <- nrow(spatial_shapes_t) + level_start_index <- torch_cat(list( + spatial_shapes_t$new_zeros(1), + if (n_spatial > 1) cumsum(spatial_shapes_t[, 1] * spatial_shapes_t[, 2])[1:(n_spatial - 1)] else spatial_shapes_t$new_zeros(0) + ), dim = 1)$to(dtype = torch_int64()) + if (!is.null(masks)) { + mask_flatten <- torch_cat(mask_flatten, dim = 2) + valid_ratios <- torch_stack(lapply(masks, function(m) self$get_valid_ratio(m)), dim = 2) + } else { + valid_ratios <- torch_stack( + lapply(seq_len(n_spatial), function(i) + torch_ones(c(memory$size(1), 2), dtype = torch_float32(), device = memory$device) + ), dim = 2 + ) + } + lvl_pos_embed_flatten <- torch_cat(lvl_pos_embed_flatten, dim = 2) + hs <- NULL + references <- NULL + hs_enc <- NULL + ref_enc <- NULL + if (self$two_stage) { + proposals <- gen_encoder_output_proposals( + memory, if (!is.null(masks)) mask_flatten else NULL, + spatial_shapes_t, unsigmoid = !self$bbox_reparam + ) + output_memory <- proposals[[1]] + output_proposals <- proposals[[2]] + refpoint_embed_ts <- list() + memory_ts <- list() + boxes_ts <- list() + gd <- if (self$training) self$group_detr else 1 + for (g_idx in seq_len(gd)) { + om <- self$enc_output_norm[[g_idx]](self$enc_output[[g_idx]](output_memory)) + cls_enc_g <- self$enc_out_class_embed[[g_idx]](om) + if (self$bbox_reparam) { + delta_g <- self$enc_out_bbox_embed[[g_idx]](om) + cxcy_g <- delta_g[, , 1:2] * output_proposals[, , 3:4] + output_proposals[, , 1:2] + wh_g <- delta_g[, , 3:4]$exp() * output_proposals[, , 3:4] + coord_g <- torch_cat(list(cxcy_g, wh_g), dim = -1) + } + topk <- refpoint_embed$size(1) %/% gd + topk_idx <- cls_enc_g$max(dim = -1, keepdim = FALSE)[[1]]$topk(topk, dim = 2)[[2]] + ref_g <- torch_gather(coord_g, 2, topk_idx$unsqueeze(3)$'repeat'(c(1, 1, 4))) + tgt_g <- torch_gather(om, 2, topk_idx$unsqueeze(3)$'repeat'(c(1, 1, memory$size(3)))) + refpoint_embed_ts[[g_idx]] <- ref_g$detach() + memory_ts[[g_idx]] <- tgt_g + boxes_ts[[g_idx]] <- ref_g + } + refpoint_embed_ts <- torch_cat(refpoint_embed_ts, dim = 2) + memory_ts <- torch_cat(memory_ts, dim = 2) + boxes_ts <- torch_cat(boxes_ts, dim = 2) + } + if (self$dec_layers > 0) { + tgt <- query_feat$unsqueeze(1)$expand(c(memory$size(1), -1, -1))$contiguous() + refpoint_embed_exp <- refpoint_embed$unsqueeze(1)$expand(c(memory$size(1), -1, -1))$contiguous() + if (self$two_stage) { + ts_len <- refpoint_embed_ts$size(2) + ref_subset <- refpoint_embed_exp[, 1:ts_len, , drop = FALSE] + ref_remain <- refpoint_embed_exp[, (ts_len + 1):refpoint_embed_exp$size(2), , drop = FALSE] + if (self$bbox_reparam) { + cxcy <- ref_subset[, , 1:2] * refpoint_embed_ts[, , 3:4] + refpoint_embed_ts[, , 1:2] + wh <- ref_subset[, , 3:4]$exp() * refpoint_embed_ts[, , 3:4] + ref_subset <- torch_cat(list(cxcy, wh), dim = -1) + } + refpoint_embed_exp <- torch_cat(list(ref_subset, ref_remain), dim = 2) + } + dec_out <- self$decoder( + tgt, memory, + memory_key_padding_mask = if (!is.null(masks)) mask_flatten else NULL, + pos = lvl_pos_embed_flatten, + refpoints_unsigmoid = refpoint_embed_exp, + level_start_index = level_start_index, + spatial_shapes = spatial_shapes_t, + valid_ratios = valid_ratios, + spatial_shapes_hw = spatial_shapes + ) + hs <- dec_out[[1]] + references <- dec_out[[2]] + } + if (self$two_stage) { + list(hs, references, memory_ts, boxes_ts) + } else { + list(hs, references, NULL, NULL) + } + } +) + +rfdetr_model <- nn_module( + "rfdetr_model", + initialize = function(backbone, transformer, num_classes = 91, + num_queries = 300, aux_loss = TRUE, + group_detr = 13, two_stage = TRUE, + lite_refpoint_refine = TRUE, + bbox_reparam = TRUE) { + self$num_queries <- num_queries + self$transformer <- transformer + hidden_dim <- transformer$d_model + self$class_embed <- nn_linear(hidden_dim, num_classes) + self$bbox_embed <- mlp_module(hidden_dim, hidden_dim, 4, 3) + query_dim <- 4 + self$refpoint_embed <- nn_embedding(num_queries * group_detr, query_dim) + self$query_feat <- nn_embedding(num_queries * group_detr, hidden_dim) + nn_init_constant_(self$refpoint_embed$weight, 0) + self$backbone <- backbone + self$aux_loss <- aux_loss + self$group_detr <- group_detr + self$lite_refpoint_refine <- lite_refpoint_refine + if (!self$lite_refpoint_refine) { + self$transformer$decoder$bbox_embed <- self$bbox_embed + } else { + self$transformer$decoder$bbox_embed <- NULL + } + self$bbox_reparam <- bbox_reparam + prior_prob <- 0.01 + bias_value <- -log((1 - prior_prob) / prior_prob) + self$class_embed$bias <- nn_parameter(torch_ones(num_classes) * bias_value) + nn_init_constant_(self$bbox_embed$layers[[3]]$weight, 0) + nn_init_constant_(self$bbox_embed$layers[[3]]$bias, 0) + if (two_stage) { + self$transformer$enc_out_bbox_embed <- nn_module_list( + lapply(seq_len(group_detr), function(i) self$bbox_embed$clone(deep = TRUE)) + ) + self$transformer$enc_out_class_embed <- nn_module_list( + lapply(seq_len(group_detr), function(i) self$class_embed$clone(deep = TRUE)) + ) + } + self$two_stage <- two_stage + }, + forward = function(x, mask = NULL) { + if (is.list(x) && !is.null(x$tensors)) { + mask <- x$mask + x <- x$tensors + } + backbone_out <- self$backbone(x, mask) + features <- backbone_out[[1]] + poss <- backbone_out[[2]] + srcs <- list() + masks <- list() + for (feat in features) { + srcs[[length(srcs) + 1]] <- feat + if (!is.null(mask)) { + m <- nnf_interpolate(mask$unsqueeze(1)$float(), size = feat$shape[3:4])$squeeze(1)$to(dtype = torch_bool()) + masks[[length(masks) + 1]] <- m + } + } + refpoint_embed_weight <- self$refpoint_embed$weight + query_feat_weight <- self$query_feat$weight + if (!self$training) { + refpoint_embed_weight <- refpoint_embed_weight[1:self$num_queries, ] + query_feat_weight <- query_feat_weight[1:self$num_queries, ] + } + trans_out <- self$transformer( + srcs, if (length(masks) > 0) masks else NULL, poss, + refpoint_embed_weight, query_feat_weight + ) + hs <- trans_out[[1]] + ref_unsigmoid <- trans_out[[2]] + hs_enc <- trans_out[[3]] + ref_enc <- trans_out[[4]] + out <- list() + if (!is.null(hs)) { + if (self$bbox_reparam) { + outputs_coord_delta <- self$bbox_embed(hs) + outputs_coord_cxcy <- outputs_coord_delta[, , , 1:2] * ref_unsigmoid[, , , 3:4] + ref_unsigmoid[, , , 1:2] + outputs_coord_wh <- outputs_coord_delta[, , , 3:4]$exp() * ref_unsigmoid[, , , 3:4] + outputs_coord <- torch_cat(list(outputs_coord_cxcy, outputs_coord_wh), dim = -1) + } else { + outputs_coord <- (self$bbox_embed(hs) + ref_unsigmoid)$sigmoid() + } + outputs_class <- self$class_embed(hs) + out$pred_logits <- outputs_class[hs$size(1), , , ] + out$pred_boxes <- outputs_coord[hs$size(1), , , ] + if (self$aux_loss) { + aux <- list() + for (i in seq_len(hs$size(1) - 1)) { + aux[[i]] <- list( + pred_logits = outputs_class[i, , , ], + pred_boxes = outputs_coord[i, , , ] + ) + } + out$aux_outputs <- aux + } + } + if (self$two_stage) { + gd <- if (self$training) self$group_detr else 1 + hs_enc_list <- hs_enc$split(hs_enc$size(2) %/% gd, dim = 2) + cls_enc_list <- list() + for (g_idx in seq_len(gd)) { + cls_enc_list[[g_idx]] <- self$transformer$enc_out_class_embed[[g_idx]](hs_enc_list[[g_idx]]) + } + cls_enc <- torch_cat(cls_enc_list, dim = 2) + if (!is.null(hs)) { + out$enc_outputs <- list(pred_logits = cls_enc, pred_boxes = ref_enc) + } else { + out$pred_logits <- cls_enc + out$pred_boxes <- ref_enc + } + } + out + } +) + +rfdetr_postprocess <- nn_module( + "rfdetr_postprocess", + initialize = function(num_select = 300) { + self$num_select <- num_select + }, + forward = function(outputs, target_sizes) { + out_logits <- outputs$pred_logits + out_bbox <- outputs$pred_boxes + prob <- out_logits$sigmoid() + topk_values <- prob$view(c(out_logits$size(1), -1))$topk(self$num_select, dim = 2) + scores <- topk_values[[1]] + topk_indexes <- topk_values[[2]] + flat_idx_0 <- topk_indexes - 1L + topk_boxes <- flat_idx_0 %/% out_logits$size(3) + labels <- flat_idx_0 %% out_logits$size(3) + boxes <- out_bbox + boxes_cxcywh <- boxes + x1 <- boxes_cxcywh[, , 1] - boxes_cxcywh[, , 3] / 2 + y1 <- boxes_cxcywh[, , 2] - boxes_cxcywh[, , 4] / 2 + x2 <- boxes_cxcywh[, , 1] + boxes_cxcywh[, , 3] / 2 + y2 <- boxes_cxcywh[, , 2] + boxes_cxcywh[, , 4] / 2 + boxes_xyxy <- torch_stack(list(x1, y1, x2, y2), dim = -1) + gather_idx <- (topk_boxes + 1L)$unsqueeze(3)$'repeat'(c(1, 1, 4)) + boxes_xyxy <- torch_gather(boxes_xyxy, 2, gather_idx) + img_h <- target_sizes[, 1] + img_w <- target_sizes[, 2] + scale_fct <- torch_stack(list(img_w, img_h, img_w, img_h), dim = 2) + boxes_xyxy <- boxes_xyxy * scale_fct$unsqueeze(2) + results <- list() + for (i in seq_len(boxes_xyxy$size(1))) { + results[[i]] <- list( + scores = scores[i, ], + labels = labels[i, ], + boxes = boxes_xyxy[i, , ] + ) + } + results + } +) + +rfdetr_configs <- list( + nano = list( + encoder = "dinov2_windowed_small", + hidden_dim = 256, + num_queries = 300, + dec_layers = 2, + sa_nheads = 8, + ca_nheads = 16, + dim_feedforward = 2048, + dec_n_points = 2, + group_detr = 13, + out_feature_indexes = c(3, 6, 9, 12), + patch_size = 16, + num_windows = 2, + num_classes = 91, + num_register_tokens = 0, + resolution = 384 + ), + small = list( + encoder = "dinov2_windowed_small", + hidden_dim = 256, + num_queries = 300, + dec_layers = 2, + sa_nheads = 8, + ca_nheads = 16, + dim_feedforward = 2048, + dec_n_points = 2, + group_detr = 13, + out_feature_indexes = c(3, 6, 9, 12), + patch_size = 16, + num_windows = 2, + num_classes = 91, + num_register_tokens = 0, + resolution = 512 + ), + medium = list( + encoder = "dinov2_windowed_small", + hidden_dim = 256, + num_queries = 300, + dec_layers = 2, + sa_nheads = 8, + ca_nheads = 16, + dim_feedforward = 2048, + dec_n_points = 2, + group_detr = 13, + out_feature_indexes = c(3, 6, 9, 12), + patch_size = 16, + num_windows = 2, + num_classes = 91, + resolution = 640 + ), + base = list( + encoder = "dinov2_windowed_small", + hidden_dim = 256, + num_queries = 300, + dec_layers = 3, + sa_nheads = 8, + ca_nheads = 16, + dim_feedforward = 2048, + dec_n_points = 2, + group_detr = 13, + out_feature_indexes = c(2, 5, 8, 11), + patch_size = 14, + num_windows = 4, + num_classes = 91, + resolution = 640 + ), + large = list( + encoder = "dinov2_windowed_base", + hidden_dim = 384, + num_queries = 300, + dec_layers = 3, + sa_nheads = 12, + ca_nheads = 24, + dim_feedforward = 2048, + dec_n_points = 4, + group_detr = 13, + out_feature_indexes = c(2, 5, 8, 11), + patch_size = 14, + num_windows = 4, + num_classes = 91, + resolution = 560, + projector_scale = c(2.0, 0.5) + ), + base_2 = list( + encoder = "dinov2_windowed_small", + hidden_dim = 256, + num_queries = 300, + dec_layers = 3, + sa_nheads = 8, + ca_nheads = 16, + dim_feedforward = 2048, + dec_n_points = 2, + group_detr = 13, + out_feature_indexes = c(2, 5, 8, 11), + patch_size = 14, + num_windows = 4, + num_classes = 91, + resolution = 640 + ), + base_o365 = list( + encoder = "dinov2_windowed_small", + hidden_dim = 256, + num_queries = 300, + dec_layers = 3, + sa_nheads = 8, + ca_nheads = 16, + dim_feedforward = 2048, + dec_n_points = 2, + group_detr = 13, + out_feature_indexes = c(2, 5, 8, 11), + patch_size = 14, + num_windows = 4, + num_classes = 366, + resolution = 640 + ) +) + +#' @importFrom torch nn_parameter nn_linear nn_embedding nn_layer_norm nn_dropout +#' @importFrom torch nn_multihead_attention nn_conv2d nn_conv_transpose2d nn_batch_norm2d +#' @importFrom torch nn_relu nn_gelu nn_silu nnf_relu nnf_gelu nnf_silu nnf_softmax +#' @importFrom torch nnf_interpolate nnf_grid_sample nnf_layer_norm +#' @importFrom torch nn_init_trunc_normal_ nn_init_constant_ nn_init_xavier_uniform_ nn_init_zeros_ nn_init_ones_ +#' @importFrom torch torch_randn torch_zeros torch_ones torch_arange torch_linspace torch_cat +#' @importFrom torch torch_stack torch_sum torch_cumsum torch_log +build_rfdetr <- function(cfg, pretrained = FALSE, progress = TRUE, name = NULL) { + projector_scale <- cfg$projector_scale %||% c(1.0) + backbone <- rfdetr_backbone( + encoder_name = cfg$encoder, + out_feature_indexes = cfg$out_feature_indexes, + hidden_dim = cfg$hidden_dim, + layer_norm = TRUE, + target_shape = c(cfg$resolution, cfg$resolution), + patch_size = cfg$patch_size, + num_windows = cfg$num_windows, + num_register_tokens = cfg$num_register_tokens %||% 0, + projector_scale = projector_scale + ) + pos_embed <- position_embedding_sine( + num_pos_feats = as.integer(cfg$hidden_dim / 2), + normalize = TRUE + ) + joiner <- rfdetr_joiner(backbone, pos_embed) + transformer <- rfdetr_transformer( + d_model = cfg$hidden_dim, + sa_nhead = cfg$sa_nheads, + ca_nhead = cfg$ca_nheads, + num_queries = cfg$num_queries, + num_decoder_layers = cfg$dec_layers, + dim_feedforward = cfg$dim_feedforward, + dropout = 0.0, + return_intermediate_dec = TRUE, + group_detr = cfg$group_detr, + two_stage = TRUE, + num_feature_levels = length(projector_scale), + dec_n_points = cfg$dec_n_points, + lite_refpoint_refine = TRUE, + bbox_reparam = TRUE + ) + model <- rfdetr_model( + backbone = joiner, + transformer = transformer, + num_classes = cfg$num_classes, + num_queries = cfg$num_queries, + aux_loss = TRUE, + group_detr = cfg$group_detr, + two_stage = TRUE, + lite_refpoint_refine = TRUE, + bbox_reparam = TRUE + ) + if (pretrained) { + if (is.null(name)) { + runtime_error("Internal error: variant name required for pretrained weights") + } + r <- rfdetr_torchscript_urls[[name]] + if (is.null(r)) { + runtime_error("Pretrained weights not available for this variant") + } + cli_inform("Model weights for {.cls {name}} (~{.emph {r[3]}}) will be downloaded and processed if not already available.") + archive <- download_and_cache(r[1], prefix = name) + if (tools::md5sum(archive) != r[2]) { + runtime_error("Corrupt file! Delete the file in {archive} and try again.") + } + state_dict <- torch::load_state_dict(archive) + model_sd <- model$state_dict() + loaded <- 0 + skipped <- 0 + local({ + with_no_grad({ + for (k in names(state_dict)) { + if (is.null(model_sd[[k]])) { + skipped <<- skipped + 1 + next + } + if (!identical(dim(state_dict[[k]]), dim(model_sd[[k]]))) { + if (grepl("position_embeddings", k)) { + sd_w <- state_dict[[k]] + m_w <- model_sd[[k]] + cls_pos <- sd_w[, 1, , drop = FALSE] + patch_pos <- sd_w[, 2:sd_w$size(2), , drop = FALSE] + dim <- patch_pos$size(3) + num_patches <- patch_pos$size(2) + h <- w <- as.integer(sqrt(num_patches)) + patch_pos <- patch_pos$reshape(c(1, h, w, dim)) + patch_pos <- patch_pos$permute(c(1, 4, 2, 3)) + target_h <- m_w$size(2) - 1 + target_w <- target_h + target_size <- as.integer(sqrt(target_h)) + patch_pos <- nnf_interpolate( + patch_pos$to(dtype = torch_float32()), + size = c(target_size, target_size), + mode = "bicubic", + align_corners = FALSE + )$to(dtype = sd_w$dtype) + patch_pos <- patch_pos$permute(c(1, 3, 4, 2))$reshape(c(1, -1, dim)) + interpolated <- torch_cat(list(cls_pos, patch_pos), dim = 2) + m_w$copy_(interpolated) + loaded <<- loaded + 1 + } else { + skipped <<- skipped + 1 + } + next + } + model_sd[[k]]$copy_(state_dict[[k]]) + loaded <<- loaded + 1 + } + }) + }) + model$load_state_dict(model_sd, strict = FALSE) + total <- length(model_sd) + cli_inform("Loaded pretrained weights for {.cls {name}} ({loaded}/{total} keys, {skipped} skipped).") + } + model +} + +#' @describeIn model_rfdetr RF-DETR Nano (fastest, COCO, 384px) +#' @export +model_rfdetr_nano <- function(pretrained = FALSE, progress = TRUE, ...) { + cfg <- rfdetr_configs$nano + cfg[names(list(...))] <- list(...) + build_rfdetr(cfg, pretrained, progress, name = "rfdetr_nano") +} + +#' @describeIn model_rfdetr RF-DETR Small (lightweight, COCO, 512px) +#' @export +model_rfdetr_small <- function(pretrained = FALSE, progress = TRUE, ...) { + cfg <- rfdetr_configs$small + cfg[names(list(...))] <- list(...) + build_rfdetr(cfg, pretrained, progress, name = "rfdetr_small") +} + +#' @describeIn model_rfdetr RF-DETR Medium (balanced speed/accuracy, COCO, 640px) +#' @export +model_rfdetr_medium <- function(pretrained = FALSE, progress = TRUE, ...) { + cfg <- rfdetr_configs$medium + cfg[names(list(...))] <- list(...) + build_rfdetr(cfg, pretrained, progress, name = "rfdetr_medium") +} + +#' @describeIn model_rfdetr RF-DETR Base (COCO pretrained, 640px) +#' @export +model_rfdetr_base <- function(pretrained = FALSE, progress = TRUE, ...) { + cfg <- rfdetr_configs$base + cfg[names(list(...))] <- list(...) + build_rfdetr(cfg, pretrained, progress, name = "rfdetr_base") +} + +#' @describeIn model_rfdetr RF-DETR Base variant 2 (alternative COCO training run) +#' @export +model_rfdetr_base_2 <- function(pretrained = FALSE, progress = TRUE, ...) { + cfg <- rfdetr_configs$base_2 + cfg[names(list(...))] <- list(...) + build_rfdetr(cfg, pretrained, progress, name = "rfdetr_base_2") +} + +#' @describeIn model_rfdetr RF-DETR Base O365 (Objects365, 366 classes) +#' @export +model_rfdetr_base_o365 <- function(pretrained = FALSE, progress = TRUE, ...) { + cfg <- rfdetr_configs$base_o365 + cfg[names(list(...))] <- list(...) + build_rfdetr(cfg, pretrained, progress, name = "rfdetr_base_o365") +} + +#' @describeIn model_rfdetr RF-DETR Large (DINOv2 Base backbone, COCO, 560px) +#' @export +model_rfdetr_large <- function(pretrained = FALSE, progress = TRUE, ...) { + cfg <- rfdetr_configs$large + cfg[names(list(...))] <- list(...) + build_rfdetr(cfg, pretrained, progress, name = "rfdetr_large") +} diff --git a/man/caltech_classes.Rd b/man/caltech_classes.Rd index 0192c561..58d3920d 100644 --- a/man/caltech_classes.Rd +++ b/man/caltech_classes.Rd @@ -18,9 +18,9 @@ Utilities for resolving Caltech class identifiers to their corresponding human readable labels. } \seealso{ -Other class_resolution: -\code{\link[=coco_classes]{coco_classes()}}, -\code{\link[=imagenet_classes]{imagenet_classes()}}, -\code{\link[=pascal_voc_classes]{pascal_voc_classes()}} +Other class_resolution: +\code{\link{coco_classes}()}, +\code{\link{imagenet_classes}()}, +\code{\link{pascal_voc_classes}()} } \concept{class_resolution} diff --git a/man/caltech_dataset.Rd b/man/caltech_dataset.Rd index f72df0b7..365b565d 100644 --- a/man/caltech_dataset.Rd +++ b/man/caltech_dataset.Rd @@ -74,20 +74,20 @@ first_item$y # Integer label } \seealso{ -Other classification_dataset: -\code{\link[=cifar10_dataset]{cifar10_dataset()}}, -\code{\link[=eurosat_dataset]{eurosat_dataset()}}, -\code{\link[=fer_dataset]{fer_dataset()}}, -\code{\link[=fgvc_aircraft_dataset]{fgvc_aircraft_dataset()}}, -\code{\link[=flowers102_dataset]{flowers102_dataset()}}, -\code{\link[=image_folder_dataset]{image_folder_dataset()}}, +Other classification_dataset: +\code{\link{cifar10_dataset}()}, +\code{\link{eurosat_dataset}()}, +\code{\link{fer_dataset}()}, +\code{\link{fgvc_aircraft_dataset}()}, +\code{\link{flowers102_dataset}()}, +\code{\link{image_folder_dataset}()}, \code{\link{lfw_dataset}}, -\code{\link[=mnist_dataset]{mnist_dataset()}}, -\code{\link[=oxfordiiitpet_dataset]{oxfordiiitpet_dataset()}}, -\code{\link[=places365_dataset]{places365_dataset()}}, -\code{\link[=tiny_imagenet_dataset]{tiny_imagenet_dataset()}}, -\code{\link[=vggface2_dataset]{vggface2_dataset()}}, -\code{\link[=whoi_plankton_dataset]{whoi_plankton_dataset()}}, -\code{\link[=whoi_small_coralnet_dataset]{whoi_small_coralnet_dataset()}} +\code{\link{mnist_dataset}()}, +\code{\link{oxfordiiitpet_dataset}()}, +\code{\link{places365_dataset}()}, +\code{\link{tiny_imagenet_dataset}()}, +\code{\link{vggface2_dataset}()}, +\code{\link{whoi_plankton_dataset}()}, +\code{\link{whoi_small_coralnet_dataset}()} } \concept{classification_dataset} diff --git a/man/cifar_datasets.Rd b/man/cifar_datasets.Rd index 9436f611..be28853a 100644 --- a/man/cifar_datasets.Rd +++ b/man/cifar_datasets.Rd @@ -62,20 +62,20 @@ item$y } } \seealso{ -Other classification_dataset: +Other classification_dataset: \code{\link{caltech_dataset}}, -\code{\link[=eurosat_dataset]{eurosat_dataset()}}, -\code{\link[=fer_dataset]{fer_dataset()}}, -\code{\link[=fgvc_aircraft_dataset]{fgvc_aircraft_dataset()}}, -\code{\link[=flowers102_dataset]{flowers102_dataset()}}, -\code{\link[=image_folder_dataset]{image_folder_dataset()}}, +\code{\link{eurosat_dataset}()}, +\code{\link{fer_dataset}()}, +\code{\link{fgvc_aircraft_dataset}()}, +\code{\link{flowers102_dataset}()}, +\code{\link{image_folder_dataset}()}, \code{\link{lfw_dataset}}, -\code{\link[=mnist_dataset]{mnist_dataset()}}, -\code{\link[=oxfordiiitpet_dataset]{oxfordiiitpet_dataset()}}, -\code{\link[=places365_dataset]{places365_dataset()}}, -\code{\link[=tiny_imagenet_dataset]{tiny_imagenet_dataset()}}, -\code{\link[=vggface2_dataset]{vggface2_dataset()}}, -\code{\link[=whoi_plankton_dataset]{whoi_plankton_dataset()}}, -\code{\link[=whoi_small_coralnet_dataset]{whoi_small_coralnet_dataset()}} +\code{\link{mnist_dataset}()}, +\code{\link{oxfordiiitpet_dataset}()}, +\code{\link{places365_dataset}()}, +\code{\link{tiny_imagenet_dataset}()}, +\code{\link{vggface2_dataset}()}, +\code{\link{whoi_plankton_dataset}()}, +\code{\link{whoi_small_coralnet_dataset}()} } \concept{classification_dataset} diff --git a/man/coco_caption_dataset.Rd b/man/coco_caption_dataset.Rd index 979d32f9..00279070 100644 --- a/man/coco_caption_dataset.Rd +++ b/man/coco_caption_dataset.Rd @@ -57,7 +57,7 @@ title(main = y, col.main = "black") } } \seealso{ -Other caption_dataset: +Other caption_dataset: \code{\link{flickr_caption_dataset}} } \concept{caption_dataset} diff --git a/man/coco_classes.Rd b/man/coco_classes.Rd index 1b6db700..3b8a3098 100644 --- a/man/coco_classes.Rd +++ b/man/coco_classes.Rd @@ -18,9 +18,9 @@ human readable labels. The labels are retrieved from pytorch/vision source to be with torchvision pretrained models. } \seealso{ -Other class_resolution: -\code{\link[=caltech_classes]{caltech_classes()}}, -\code{\link[=imagenet_classes]{imagenet_classes()}}, -\code{\link[=pascal_voc_classes]{pascal_voc_classes()}} +Other class_resolution: +\code{\link{caltech_classes}()}, +\code{\link{imagenet_classes}()}, +\code{\link{pascal_voc_classes}()} } \concept{class_resolution} diff --git a/man/coco_detection_dataset.Rd b/man/coco_detection_dataset.Rd index 0731da33..bb7b14b9 100644 --- a/man/coco_detection_dataset.Rd +++ b/man/coco_detection_dataset.Rd @@ -70,13 +70,13 @@ tensor_image_browse(boxed) \seealso{ \code{\link{coco_segmentation_dataset}} for instance segmentation tasks -Other detection_dataset: +Other detection_dataset: \code{\link{pascal_voc_datasets}}, -\code{\link[=rf100_biology_collection]{rf100_biology_collection()}}, -\code{\link[=rf100_damage_collection]{rf100_damage_collection()}}, -\code{\link[=rf100_document_collection]{rf100_document_collection()}}, -\code{\link[=rf100_infrared_collection]{rf100_infrared_collection()}}, -\code{\link[=rf100_medical_collection]{rf100_medical_collection()}}, -\code{\link[=rf100_underwater_collection]{rf100_underwater_collection()}} +\code{\link{rf100_biology_collection}()}, +\code{\link{rf100_damage_collection}()}, +\code{\link{rf100_document_collection}()}, +\code{\link{rf100_infrared_collection}()}, +\code{\link{rf100_medical_collection}()}, +\code{\link{rf100_underwater_collection}()} } \concept{detection_dataset} diff --git a/man/coco_segmentation_dataset.Rd b/man/coco_segmentation_dataset.Rd index 2f93d78b..9a92c435 100644 --- a/man/coco_segmentation_dataset.Rd +++ b/man/coco_segmentation_dataset.Rd @@ -72,9 +72,9 @@ tensor_image_browse(masked) \seealso{ \code{\link{coco_detection_dataset}} for object detection tasks -Other segmentation_dataset: -\code{\link[=oxfordiiitpet_segmentation_dataset]{oxfordiiitpet_segmentation_dataset()}}, +Other segmentation_dataset: +\code{\link{oxfordiiitpet_segmentation_dataset}()}, \code{\link{pascal_voc_datasets}}, -\code{\link[=rf100_peixos_segmentation_dataset]{rf100_peixos_segmentation_dataset()}} +\code{\link{rf100_peixos_segmentation_dataset}()} } \concept{segmentation_dataset} diff --git a/man/draw_bounding_boxes.Rd b/man/draw_bounding_boxes.Rd index 6bfbb1f9..7fa667fa 100644 --- a/man/draw_bounding_boxes.Rd +++ b/man/draw_bounding_boxes.Rd @@ -74,11 +74,11 @@ tensor_image_browse(bboxed) } } \seealso{ -Other image display: -\code{\link[=draw_keypoints]{draw_keypoints()}}, -\code{\link[=draw_segmentation_masks]{draw_segmentation_masks()}}, -\code{\link[=tensor_image_browse]{tensor_image_browse()}}, -\code{\link[=tensor_image_display]{tensor_image_display()}}, -\code{\link[=vision_make_grid]{vision_make_grid()}} +Other image display: +\code{\link{draw_keypoints}()}, +\code{\link{draw_segmentation_masks}()}, +\code{\link{tensor_image_browse}()}, +\code{\link{tensor_image_display}()}, +\code{\link{vision_make_grid}()} } \concept{image display} diff --git a/man/draw_keypoints.Rd b/man/draw_keypoints.Rd index 03658131..d01727c1 100644 --- a/man/draw_keypoints.Rd +++ b/man/draw_keypoints.Rd @@ -48,11 +48,11 @@ tensor_image_browse(keypoint_image) } } \seealso{ -Other image display: -\code{\link[=draw_bounding_boxes]{draw_bounding_boxes()}}, -\code{\link[=draw_segmentation_masks]{draw_segmentation_masks()}}, -\code{\link[=tensor_image_browse]{tensor_image_browse()}}, -\code{\link[=tensor_image_display]{tensor_image_display()}}, -\code{\link[=vision_make_grid]{vision_make_grid()}} +Other image display: +\code{\link{draw_bounding_boxes}()}, +\code{\link{draw_segmentation_masks}()}, +\code{\link{tensor_image_browse}()}, +\code{\link{tensor_image_display}()}, +\code{\link{vision_make_grid}()} } \concept{image display} diff --git a/man/draw_segmentation_masks.Rd b/man/draw_segmentation_masks.Rd index 0b6ccb12..ebdf4d76 100644 --- a/man/draw_segmentation_masks.Rd +++ b/man/draw_segmentation_masks.Rd @@ -45,11 +45,11 @@ tensor_image_browse(masked_image) \dontshow{\}) # examplesIf} } \seealso{ -Other image display: -\code{\link[=draw_bounding_boxes]{draw_bounding_boxes()}}, -\code{\link[=draw_keypoints]{draw_keypoints()}}, -\code{\link[=tensor_image_browse]{tensor_image_browse()}}, -\code{\link[=tensor_image_display]{tensor_image_display()}}, -\code{\link[=vision_make_grid]{vision_make_grid()}} +Other image display: +\code{\link{draw_bounding_boxes}()}, +\code{\link{draw_keypoints}()}, +\code{\link{tensor_image_browse}()}, +\code{\link{tensor_image_display}()}, +\code{\link{vision_make_grid}()} } \concept{image display} diff --git a/man/eurosat_dataset.Rd b/man/eurosat_dataset.Rd index 552f5d83..4192ef8f 100644 --- a/man/eurosat_dataset.Rd +++ b/man/eurosat_dataset.Rd @@ -78,20 +78,20 @@ print(head$y) # Label } } \seealso{ -Other classification_dataset: +Other classification_dataset: \code{\link{caltech_dataset}}, -\code{\link[=cifar10_dataset]{cifar10_dataset()}}, -\code{\link[=fer_dataset]{fer_dataset()}}, -\code{\link[=fgvc_aircraft_dataset]{fgvc_aircraft_dataset()}}, -\code{\link[=flowers102_dataset]{flowers102_dataset()}}, -\code{\link[=image_folder_dataset]{image_folder_dataset()}}, +\code{\link{cifar10_dataset}()}, +\code{\link{fer_dataset}()}, +\code{\link{fgvc_aircraft_dataset}()}, +\code{\link{flowers102_dataset}()}, +\code{\link{image_folder_dataset}()}, \code{\link{lfw_dataset}}, -\code{\link[=mnist_dataset]{mnist_dataset()}}, -\code{\link[=oxfordiiitpet_dataset]{oxfordiiitpet_dataset()}}, -\code{\link[=places365_dataset]{places365_dataset()}}, -\code{\link[=tiny_imagenet_dataset]{tiny_imagenet_dataset()}}, -\code{\link[=vggface2_dataset]{vggface2_dataset()}}, -\code{\link[=whoi_plankton_dataset]{whoi_plankton_dataset()}}, -\code{\link[=whoi_small_coralnet_dataset]{whoi_small_coralnet_dataset()}} +\code{\link{mnist_dataset}()}, +\code{\link{oxfordiiitpet_dataset}()}, +\code{\link{places365_dataset}()}, +\code{\link{tiny_imagenet_dataset}()}, +\code{\link{vggface2_dataset}()}, +\code{\link{whoi_plankton_dataset}()}, +\code{\link{whoi_small_coralnet_dataset}()} } \concept{classification_dataset} diff --git a/man/fer_dataset.Rd b/man/fer_dataset.Rd index 807e5a7c..ce8d8d1d 100644 --- a/man/fer_dataset.Rd +++ b/man/fer_dataset.Rd @@ -55,20 +55,20 @@ fer$classes[first_item$y] # "Happy" } \seealso{ -Other classification_dataset: +Other classification_dataset: \code{\link{caltech_dataset}}, -\code{\link[=cifar10_dataset]{cifar10_dataset()}}, -\code{\link[=eurosat_dataset]{eurosat_dataset()}}, -\code{\link[=fgvc_aircraft_dataset]{fgvc_aircraft_dataset()}}, -\code{\link[=flowers102_dataset]{flowers102_dataset()}}, -\code{\link[=image_folder_dataset]{image_folder_dataset()}}, +\code{\link{cifar10_dataset}()}, +\code{\link{eurosat_dataset}()}, +\code{\link{fgvc_aircraft_dataset}()}, +\code{\link{flowers102_dataset}()}, +\code{\link{image_folder_dataset}()}, \code{\link{lfw_dataset}}, -\code{\link[=mnist_dataset]{mnist_dataset()}}, -\code{\link[=oxfordiiitpet_dataset]{oxfordiiitpet_dataset()}}, -\code{\link[=places365_dataset]{places365_dataset()}}, -\code{\link[=tiny_imagenet_dataset]{tiny_imagenet_dataset()}}, -\code{\link[=vggface2_dataset]{vggface2_dataset()}}, -\code{\link[=whoi_plankton_dataset]{whoi_plankton_dataset()}}, -\code{\link[=whoi_small_coralnet_dataset]{whoi_small_coralnet_dataset()}} +\code{\link{mnist_dataset}()}, +\code{\link{oxfordiiitpet_dataset}()}, +\code{\link{places365_dataset}()}, +\code{\link{tiny_imagenet_dataset}()}, +\code{\link{vggface2_dataset}()}, +\code{\link{whoi_plankton_dataset}()}, +\code{\link{whoi_small_coralnet_dataset}()} } \concept{classification_dataset} diff --git a/man/fgvc_aircraft_dataset.Rd b/man/fgvc_aircraft_dataset.Rd index 6dd424c1..d2bc4b2c 100644 --- a/man/fgvc_aircraft_dataset.Rd +++ b/man/fgvc_aircraft_dataset.Rd @@ -95,20 +95,20 @@ fgvc$classes$variant[item$y[3]] # e.g., "707-320" } \seealso{ -Other classification_dataset: +Other classification_dataset: \code{\link{caltech_dataset}}, -\code{\link[=cifar10_dataset]{cifar10_dataset()}}, -\code{\link[=eurosat_dataset]{eurosat_dataset()}}, -\code{\link[=fer_dataset]{fer_dataset()}}, -\code{\link[=flowers102_dataset]{flowers102_dataset()}}, -\code{\link[=image_folder_dataset]{image_folder_dataset()}}, +\code{\link{cifar10_dataset}()}, +\code{\link{eurosat_dataset}()}, +\code{\link{fer_dataset}()}, +\code{\link{flowers102_dataset}()}, +\code{\link{image_folder_dataset}()}, \code{\link{lfw_dataset}}, -\code{\link[=mnist_dataset]{mnist_dataset()}}, -\code{\link[=oxfordiiitpet_dataset]{oxfordiiitpet_dataset()}}, -\code{\link[=places365_dataset]{places365_dataset()}}, -\code{\link[=tiny_imagenet_dataset]{tiny_imagenet_dataset()}}, -\code{\link[=vggface2_dataset]{vggface2_dataset()}}, -\code{\link[=whoi_plankton_dataset]{whoi_plankton_dataset()}}, -\code{\link[=whoi_small_coralnet_dataset]{whoi_small_coralnet_dataset()}} +\code{\link{mnist_dataset}()}, +\code{\link{oxfordiiitpet_dataset}()}, +\code{\link{places365_dataset}()}, +\code{\link{tiny_imagenet_dataset}()}, +\code{\link{vggface2_dataset}()}, +\code{\link{whoi_plankton_dataset}()}, +\code{\link{whoi_small_coralnet_dataset}()} } \concept{classification_dataset} diff --git a/man/flickr_caption_dataset.Rd b/man/flickr_caption_dataset.Rd index d7574972..074b89b8 100644 --- a/man/flickr_caption_dataset.Rd +++ b/man/flickr_caption_dataset.Rd @@ -79,7 +79,7 @@ first_item$y # character vector containing five captions. } \seealso{ -Other caption_dataset: -\code{\link[=coco_caption_dataset]{coco_caption_dataset()}} +Other caption_dataset: +\code{\link{coco_caption_dataset}()} } \concept{caption_dataset} diff --git a/man/flowers102_dataset.Rd b/man/flowers102_dataset.Rd index 31960838..dd2f9faa 100644 --- a/man/flowers102_dataset.Rd +++ b/man/flowers102_dataset.Rd @@ -68,20 +68,20 @@ batch$y # Tensor of shape (4,) with numeric class labels } \seealso{ -Other classification_dataset: +Other classification_dataset: \code{\link{caltech_dataset}}, -\code{\link[=cifar10_dataset]{cifar10_dataset()}}, -\code{\link[=eurosat_dataset]{eurosat_dataset()}}, -\code{\link[=fer_dataset]{fer_dataset()}}, -\code{\link[=fgvc_aircraft_dataset]{fgvc_aircraft_dataset()}}, -\code{\link[=image_folder_dataset]{image_folder_dataset()}}, +\code{\link{cifar10_dataset}()}, +\code{\link{eurosat_dataset}()}, +\code{\link{fer_dataset}()}, +\code{\link{fgvc_aircraft_dataset}()}, +\code{\link{image_folder_dataset}()}, \code{\link{lfw_dataset}}, -\code{\link[=mnist_dataset]{mnist_dataset()}}, -\code{\link[=oxfordiiitpet_dataset]{oxfordiiitpet_dataset()}}, -\code{\link[=places365_dataset]{places365_dataset()}}, -\code{\link[=tiny_imagenet_dataset]{tiny_imagenet_dataset()}}, -\code{\link[=vggface2_dataset]{vggface2_dataset()}}, -\code{\link[=whoi_plankton_dataset]{whoi_plankton_dataset()}}, -\code{\link[=whoi_small_coralnet_dataset]{whoi_small_coralnet_dataset()}} +\code{\link{mnist_dataset}()}, +\code{\link{oxfordiiitpet_dataset}()}, +\code{\link{places365_dataset}()}, +\code{\link{tiny_imagenet_dataset}()}, +\code{\link{vggface2_dataset}()}, +\code{\link{whoi_plankton_dataset}()}, +\code{\link{whoi_small_coralnet_dataset}()} } \concept{classification_dataset} diff --git a/man/image_folder_dataset.Rd b/man/image_folder_dataset.Rd index 90d3a746..18585829 100644 --- a/man/image_folder_dataset.Rd +++ b/man/image_folder_dataset.Rd @@ -47,20 +47,20 @@ root/cat/asd932_.png }\if{html}{\out{}} } \seealso{ -Other classification_dataset: +Other classification_dataset: \code{\link{caltech_dataset}}, -\code{\link[=cifar10_dataset]{cifar10_dataset()}}, -\code{\link[=eurosat_dataset]{eurosat_dataset()}}, -\code{\link[=fer_dataset]{fer_dataset()}}, -\code{\link[=fgvc_aircraft_dataset]{fgvc_aircraft_dataset()}}, -\code{\link[=flowers102_dataset]{flowers102_dataset()}}, +\code{\link{cifar10_dataset}()}, +\code{\link{eurosat_dataset}()}, +\code{\link{fer_dataset}()}, +\code{\link{fgvc_aircraft_dataset}()}, +\code{\link{flowers102_dataset}()}, \code{\link{lfw_dataset}}, -\code{\link[=mnist_dataset]{mnist_dataset()}}, -\code{\link[=oxfordiiitpet_dataset]{oxfordiiitpet_dataset()}}, -\code{\link[=places365_dataset]{places365_dataset()}}, -\code{\link[=tiny_imagenet_dataset]{tiny_imagenet_dataset()}}, -\code{\link[=vggface2_dataset]{vggface2_dataset()}}, -\code{\link[=whoi_plankton_dataset]{whoi_plankton_dataset()}}, -\code{\link[=whoi_small_coralnet_dataset]{whoi_small_coralnet_dataset()}} +\code{\link{mnist_dataset}()}, +\code{\link{oxfordiiitpet_dataset}()}, +\code{\link{places365_dataset}()}, +\code{\link{tiny_imagenet_dataset}()}, +\code{\link{vggface2_dataset}()}, +\code{\link{whoi_plankton_dataset}()}, +\code{\link{whoi_small_coralnet_dataset}()} } \concept{classification_dataset} diff --git a/man/imagenet_classes.Rd b/man/imagenet_classes.Rd index b39e8e7b..4a5fe928 100644 --- a/man/imagenet_classes.Rd +++ b/man/imagenet_classes.Rd @@ -33,9 +33,24 @@ human readable labels. The labels are retrieved from the same source used by PyTorch's reference implementation. } \seealso{ -Other class_resolution: -\code{\link[=caltech_classes]{caltech_classes()}}, -\code{\link[=coco_classes]{coco_classes()}}, -\code{\link[=pascal_voc_classes]{pascal_voc_classes()}} +Other class_resolution: +\code{\link{caltech_classes}()}, +\code{\link{coco_classes}()}, +\code{\link{pascal_voc_classes}()} + +Other class_resolution: +\code{\link{caltech_classes}()}, +\code{\link{coco_classes}()}, +\code{\link{pascal_voc_classes}()} + +Other class_resolution: +\code{\link{caltech_classes}()}, +\code{\link{coco_classes}()}, +\code{\link{pascal_voc_classes}()} + +Other class_resolution: +\code{\link{caltech_classes}()}, +\code{\link{coco_classes}()}, +\code{\link{pascal_voc_classes}()} } \concept{class_resolution} diff --git a/man/lfw_dataset.Rd b/man/lfw_dataset.Rd index 67a80c97..ddb03ed6 100644 --- a/man/lfw_dataset.Rd +++ b/man/lfw_dataset.Rd @@ -111,20 +111,20 @@ lfw$classes[first_item$y] # Class Name (e.g., "Same" or "Different") } \seealso{ -Other classification_dataset: +Other classification_dataset: \code{\link{caltech_dataset}}, -\code{\link[=cifar10_dataset]{cifar10_dataset()}}, -\code{\link[=eurosat_dataset]{eurosat_dataset()}}, -\code{\link[=fer_dataset]{fer_dataset()}}, -\code{\link[=fgvc_aircraft_dataset]{fgvc_aircraft_dataset()}}, -\code{\link[=flowers102_dataset]{flowers102_dataset()}}, -\code{\link[=image_folder_dataset]{image_folder_dataset()}}, -\code{\link[=mnist_dataset]{mnist_dataset()}}, -\code{\link[=oxfordiiitpet_dataset]{oxfordiiitpet_dataset()}}, -\code{\link[=places365_dataset]{places365_dataset()}}, -\code{\link[=tiny_imagenet_dataset]{tiny_imagenet_dataset()}}, -\code{\link[=vggface2_dataset]{vggface2_dataset()}}, -\code{\link[=whoi_plankton_dataset]{whoi_plankton_dataset()}}, -\code{\link[=whoi_small_coralnet_dataset]{whoi_small_coralnet_dataset()}} +\code{\link{cifar10_dataset}()}, +\code{\link{eurosat_dataset}()}, +\code{\link{fer_dataset}()}, +\code{\link{fgvc_aircraft_dataset}()}, +\code{\link{flowers102_dataset}()}, +\code{\link{image_folder_dataset}()}, +\code{\link{mnist_dataset}()}, +\code{\link{oxfordiiitpet_dataset}()}, +\code{\link{places365_dataset}()}, +\code{\link{tiny_imagenet_dataset}()}, +\code{\link{vggface2_dataset}()}, +\code{\link{whoi_plankton_dataset}()}, +\code{\link{whoi_small_coralnet_dataset}()} } \concept{classification_dataset} diff --git a/man/mnist_dataset.Rd b/man/mnist_dataset.Rd index 723bbcd5..6c87e21f 100644 --- a/man/mnist_dataset.Rd +++ b/man/mnist_dataset.Rd @@ -146,20 +146,20 @@ fmnist <- fashion_mnist_dataset(download = TRUE, train = TRUE) } \seealso{ -Other classification_dataset: +Other classification_dataset: \code{\link{caltech_dataset}}, -\code{\link[=cifar10_dataset]{cifar10_dataset()}}, -\code{\link[=eurosat_dataset]{eurosat_dataset()}}, -\code{\link[=fer_dataset]{fer_dataset()}}, -\code{\link[=fgvc_aircraft_dataset]{fgvc_aircraft_dataset()}}, -\code{\link[=flowers102_dataset]{flowers102_dataset()}}, -\code{\link[=image_folder_dataset]{image_folder_dataset()}}, +\code{\link{cifar10_dataset}()}, +\code{\link{eurosat_dataset}()}, +\code{\link{fer_dataset}()}, +\code{\link{fgvc_aircraft_dataset}()}, +\code{\link{flowers102_dataset}()}, +\code{\link{image_folder_dataset}()}, \code{\link{lfw_dataset}}, -\code{\link[=oxfordiiitpet_dataset]{oxfordiiitpet_dataset()}}, -\code{\link[=places365_dataset]{places365_dataset()}}, -\code{\link[=tiny_imagenet_dataset]{tiny_imagenet_dataset()}}, -\code{\link[=vggface2_dataset]{vggface2_dataset()}}, -\code{\link[=whoi_plankton_dataset]{whoi_plankton_dataset()}}, -\code{\link[=whoi_small_coralnet_dataset]{whoi_small_coralnet_dataset()}} +\code{\link{oxfordiiitpet_dataset}()}, +\code{\link{places365_dataset}()}, +\code{\link{tiny_imagenet_dataset}()}, +\code{\link{vggface2_dataset}()}, +\code{\link{whoi_plankton_dataset}()}, +\code{\link{whoi_small_coralnet_dataset}()} } \concept{classification_dataset} diff --git a/man/model_alexnet.Rd b/man/model_alexnet.Rd index a9a672b3..522f3eb4 100644 --- a/man/model_alexnet.Rd +++ b/man/model_alexnet.Rd @@ -20,14 +20,14 @@ AlexNet model architecture from the \href{https://arxiv.org/abs/1404.5997}{One weird trick...} paper. } \seealso{ -Other classification_model: +Other classification_model: \code{\link{model_convnext}}, \code{\link{model_efficientnet}}, \code{\link{model_efficientnet_v2}}, \code{\link{model_facenet}}, -\code{\link[=model_inception_v3]{model_inception_v3()}}, -\code{\link[=model_maxvit]{model_maxvit()}}, -\code{\link[=model_mobilenet_v2]{model_mobilenet_v2()}}, +\code{\link{model_inception_v3}()}, +\code{\link{model_maxvit}()}, +\code{\link{model_mobilenet_v2}()}, \code{\link{model_mobilenet_v3}}, \code{\link{model_resnet}}, \code{\link{model_vgg}}, diff --git a/man/model_convnext.Rd b/man/model_convnext.Rd index 9e34e55c..a8e26668 100644 --- a/man/model_convnext.Rd +++ b/man/model_convnext.Rd @@ -161,14 +161,14 @@ glue::glue("{seq_along(indices)}. {imagenet_classes(indices)} ({round(scores, 2) } \seealso{ -Other classification_model: -\code{\link[=model_alexnet]{model_alexnet()}}, +Other classification_model: +\code{\link{model_alexnet}()}, \code{\link{model_efficientnet}}, \code{\link{model_efficientnet_v2}}, \code{\link{model_facenet}}, -\code{\link[=model_inception_v3]{model_inception_v3()}}, -\code{\link[=model_maxvit]{model_maxvit()}}, -\code{\link[=model_mobilenet_v2]{model_mobilenet_v2()}}, +\code{\link{model_inception_v3}()}, +\code{\link{model_maxvit}()}, +\code{\link{model_mobilenet_v2}()}, \code{\link{model_mobilenet_v3}}, \code{\link{model_resnet}}, \code{\link{model_vgg}}, diff --git a/man/model_convnext_detection.Rd b/man/model_convnext_detection.Rd index ba64a308..0de17686 100644 --- a/man/model_convnext_detection.Rd +++ b/man/model_convnext_detection.Rd @@ -133,9 +133,10 @@ if (num_boxes > 0) { } \seealso{ -Other object_detection_model: +Other object_detection_model: \code{\link{model_facenet}}, \code{\link{model_fasterrcnn}}, -\code{\link{model_maskrcnn}} +\code{\link{model_maskrcnn}}, +\code{\link{model_rfdetr}} } \concept{object_detection_model} diff --git a/man/model_convnext_segmentation.Rd b/man/model_convnext_segmentation.Rd index 0a24dd66..f0d3277c 100644 --- a/man/model_convnext_segmentation.Rd +++ b/man/model_convnext_segmentation.Rd @@ -155,7 +155,7 @@ tensor_image_display(segmented) } \seealso{ -Other semantic_segmentation_model: +Other semantic_segmentation_model: \code{\link{model_deeplabv3}}, \code{\link{model_fcn_resnet}} } diff --git a/man/model_deeplabv3.Rd b/man/model_deeplabv3.Rd index da7a97c9..4047642f 100644 --- a/man/model_deeplabv3.Rd +++ b/man/model_deeplabv3.Rd @@ -153,7 +153,7 @@ cli::cli_inform("Majority class {.pkg ResNet-50}: {.emph {pascal_voc_classes(top } \seealso{ -Other semantic_segmentation_model: +Other semantic_segmentation_model: \code{\link{model_convnext_segmentation}}, \code{\link{model_fcn_resnet}} } diff --git a/man/model_efficientnet.Rd b/man/model_efficientnet.Rd index 9b60e54e..30b43246 100644 --- a/man/model_efficientnet.Rd +++ b/man/model_efficientnet.Rd @@ -108,14 +108,14 @@ imagenet_classes(which.max(as.numeric(output))) } \seealso{ -Other classification_model: -\code{\link[=model_alexnet]{model_alexnet()}}, +Other classification_model: +\code{\link{model_alexnet}()}, \code{\link{model_convnext}}, \code{\link{model_efficientnet_v2}}, \code{\link{model_facenet}}, -\code{\link[=model_inception_v3]{model_inception_v3()}}, -\code{\link[=model_maxvit]{model_maxvit()}}, -\code{\link[=model_mobilenet_v2]{model_mobilenet_v2()}}, +\code{\link{model_inception_v3}()}, +\code{\link{model_maxvit}()}, +\code{\link{model_mobilenet_v2}()}, \code{\link{model_mobilenet_v3}}, \code{\link{model_resnet}}, \code{\link{model_vgg}}, diff --git a/man/model_efficientnet_v2.Rd b/man/model_efficientnet_v2.Rd index ddf90efc..eaf1b6ca 100644 --- a/man/model_efficientnet_v2.Rd +++ b/man/model_efficientnet_v2.Rd @@ -74,14 +74,14 @@ glue::glue("{seq_along(indices)}. {imagenet_classes(indices)} ({round(scores, 2) \seealso{ \code{\link{model_efficientnet}} -Other classification_model: -\code{\link[=model_alexnet]{model_alexnet()}}, +Other classification_model: +\code{\link{model_alexnet}()}, \code{\link{model_convnext}}, \code{\link{model_efficientnet}}, \code{\link{model_facenet}}, -\code{\link[=model_inception_v3]{model_inception_v3()}}, -\code{\link[=model_maxvit]{model_maxvit()}}, -\code{\link[=model_mobilenet_v2]{model_mobilenet_v2()}}, +\code{\link{model_inception_v3}()}, +\code{\link{model_maxvit}()}, +\code{\link{model_mobilenet_v2}()}, \code{\link{model_mobilenet_v3}}, \code{\link{model_resnet}}, \code{\link{model_vgg}}, diff --git a/man/model_facenet.Rd b/man/model_facenet.Rd index e1c86695..33ad720e 100644 --- a/man/model_facenet.Rd +++ b/man/model_facenet.Rd @@ -174,19 +174,26 @@ output } \seealso{ -Other object_detection_model: +Other object_detection_model: \code{\link{model_convnext_detection}}, \code{\link{model_fasterrcnn}}, -\code{\link{model_maskrcnn}} +\code{\link{model_maskrcnn}}, +\code{\link{model_rfdetr}} -Other classification_model: -\code{\link[=model_alexnet]{model_alexnet()}}, +Other object_detection_model: +\code{\link{model_convnext_detection}}, +\code{\link{model_fasterrcnn}}, +\code{\link{model_maskrcnn}}, +\code{\link{model_rfdetr}} + +Other classification_model: +\code{\link{model_alexnet}()}, \code{\link{model_convnext}}, \code{\link{model_efficientnet}}, \code{\link{model_efficientnet_v2}}, -\code{\link[=model_inception_v3]{model_inception_v3()}}, -\code{\link[=model_maxvit]{model_maxvit()}}, -\code{\link[=model_mobilenet_v2]{model_mobilenet_v2()}}, +\code{\link{model_inception_v3}()}, +\code{\link{model_maxvit}()}, +\code{\link{model_mobilenet_v2}()}, \code{\link{model_mobilenet_v3}}, \code{\link{model_resnet}}, \code{\link{model_vgg}}, diff --git a/man/model_fasterrcnn.Rd b/man/model_fasterrcnn.Rd index 646404de..16c482f4 100644 --- a/man/model_fasterrcnn.Rd +++ b/man/model_fasterrcnn.Rd @@ -145,9 +145,10 @@ tensor_image_browse(boxed) } \seealso{ -Other object_detection_model: +Other object_detection_model: \code{\link{model_convnext_detection}}, \code{\link{model_facenet}}, -\code{\link{model_maskrcnn}} +\code{\link{model_maskrcnn}}, +\code{\link{model_rfdetr}} } \concept{object_detection_model} diff --git a/man/model_fcn_resnet.Rd b/man/model_fcn_resnet.Rd index 57801d3a..1eb6d9b4 100644 --- a/man/model_fcn_resnet.Rd +++ b/man/model_fcn_resnet.Rd @@ -91,7 +91,7 @@ tensor_image_display(segmented) } } \seealso{ -Other semantic_segmentation_model: +Other semantic_segmentation_model: \code{\link{model_convnext_segmentation}}, \code{\link{model_deeplabv3}} } diff --git a/man/model_inception_v3.Rd b/man/model_inception_v3.Rd index 4e2735d2..b8e97ffd 100644 --- a/man/model_inception_v3.Rd +++ b/man/model_inception_v3.Rd @@ -28,14 +28,14 @@ The required minimum input size of the model is 75x75. N x 3 x 299 x 299, so ensure your images are sized accordingly. } \seealso{ -Other classification_model: -\code{\link[=model_alexnet]{model_alexnet()}}, +Other classification_model: +\code{\link{model_alexnet}()}, \code{\link{model_convnext}}, \code{\link{model_efficientnet}}, \code{\link{model_efficientnet_v2}}, \code{\link{model_facenet}}, -\code{\link[=model_maxvit]{model_maxvit()}}, -\code{\link[=model_mobilenet_v2]{model_mobilenet_v2()}}, +\code{\link{model_maxvit}()}, +\code{\link{model_mobilenet_v2}()}, \code{\link{model_mobilenet_v3}}, \code{\link{model_resnet}}, \code{\link{model_vgg}}, diff --git a/man/model_maskrcnn.Rd b/man/model_maskrcnn.Rd index 59c80ee9..1caae1da 100644 --- a/man/model_maskrcnn.Rd +++ b/man/model_maskrcnn.Rd @@ -122,9 +122,10 @@ tensor_image_browse(boxed) } \seealso{ -Other object_detection_model: +Other object_detection_model: \code{\link{model_convnext_detection}}, \code{\link{model_facenet}}, -\code{\link{model_fasterrcnn}} +\code{\link{model_fasterrcnn}}, +\code{\link{model_rfdetr}} } \concept{object_detection_model} diff --git a/man/model_maxvit.Rd b/man/model_maxvit.Rd index 25520983..ad76eb5c 100644 --- a/man/model_maxvit.Rd +++ b/man/model_maxvit.Rd @@ -59,14 +59,14 @@ glue::glue("{seq_along(indices)}. {imagenet_classes(indices)} ({round(scores, 2) } \seealso{ -Other classification_model: -\code{\link[=model_alexnet]{model_alexnet()}}, +Other classification_model: +\code{\link{model_alexnet}()}, \code{\link{model_convnext}}, \code{\link{model_efficientnet}}, \code{\link{model_efficientnet_v2}}, \code{\link{model_facenet}}, -\code{\link[=model_inception_v3]{model_inception_v3()}}, -\code{\link[=model_mobilenet_v2]{model_mobilenet_v2()}}, +\code{\link{model_inception_v3}()}, +\code{\link{model_mobilenet_v2}()}, \code{\link{model_mobilenet_v3}}, \code{\link{model_resnet}}, \code{\link{model_vgg}}, diff --git a/man/model_mobilenet_v2.Rd b/man/model_mobilenet_v2.Rd index 771b95c0..3a114720 100644 --- a/man/model_mobilenet_v2.Rd +++ b/man/model_mobilenet_v2.Rd @@ -19,14 +19,14 @@ Constructs a MobileNetV2 architecture from \href{https://arxiv.org/abs/1801.04381}{MobileNetV2: Inverted Residuals and Linear Bottlenecks}. } \seealso{ -Other classification_model: -\code{\link[=model_alexnet]{model_alexnet()}}, +Other classification_model: +\code{\link{model_alexnet}()}, \code{\link{model_convnext}}, \code{\link{model_efficientnet}}, \code{\link{model_efficientnet_v2}}, \code{\link{model_facenet}}, -\code{\link[=model_inception_v3]{model_inception_v3()}}, -\code{\link[=model_maxvit]{model_maxvit()}}, +\code{\link{model_inception_v3}()}, +\code{\link{model_maxvit}()}, \code{\link{model_mobilenet_v3}}, \code{\link{model_resnet}}, \code{\link{model_vgg}}, diff --git a/man/model_mobilenet_v3.Rd b/man/model_mobilenet_v3.Rd index 0eb66455..051665a9 100644 --- a/man/model_mobilenet_v3.Rd +++ b/man/model_mobilenet_v3.Rd @@ -112,15 +112,28 @@ glue::glue("{seq_along(indices)}. {imagenet_classes(indices)} ({round(scores, 2) } \seealso{ -Other classification_model: -\code{\link[=model_alexnet]{model_alexnet()}}, +Other classification_model: +\code{\link{model_alexnet}()}, \code{\link{model_convnext}}, \code{\link{model_efficientnet}}, \code{\link{model_efficientnet_v2}}, \code{\link{model_facenet}}, -\code{\link[=model_inception_v3]{model_inception_v3()}}, -\code{\link[=model_maxvit]{model_maxvit()}}, -\code{\link[=model_mobilenet_v2]{model_mobilenet_v2()}}, +\code{\link{model_inception_v3}()}, +\code{\link{model_maxvit}()}, +\code{\link{model_mobilenet_v2}()}, +\code{\link{model_resnet}}, +\code{\link{model_vgg}}, +\code{\link{model_vit}} + +Other classification_model: +\code{\link{model_alexnet}()}, +\code{\link{model_convnext}}, +\code{\link{model_efficientnet}}, +\code{\link{model_efficientnet_v2}}, +\code{\link{model_facenet}}, +\code{\link{model_inception_v3}()}, +\code{\link{model_maxvit}()}, +\code{\link{model_mobilenet_v2}()}, \code{\link{model_resnet}}, \code{\link{model_vgg}}, \code{\link{model_vit}} diff --git a/man/model_resnet.Rd b/man/model_resnet.Rd index 68e55a4b..ba1d7f25 100644 --- a/man/model_resnet.Rd +++ b/man/model_resnet.Rd @@ -70,15 +70,15 @@ with width per group of 128. }} \seealso{ -Other classification_model: -\code{\link[=model_alexnet]{model_alexnet()}}, +Other classification_model: +\code{\link{model_alexnet}()}, \code{\link{model_convnext}}, \code{\link{model_efficientnet}}, \code{\link{model_efficientnet_v2}}, \code{\link{model_facenet}}, -\code{\link[=model_inception_v3]{model_inception_v3()}}, -\code{\link[=model_maxvit]{model_maxvit()}}, -\code{\link[=model_mobilenet_v2]{model_mobilenet_v2()}}, +\code{\link{model_inception_v3}()}, +\code{\link{model_maxvit}()}, +\code{\link{model_mobilenet_v2}()}, \code{\link{model_mobilenet_v3}}, \code{\link{model_vgg}}, \code{\link{model_vit}} diff --git a/man/model_rfdetr.Rd b/man/model_rfdetr.Rd new file mode 100644 index 00000000..a9c7d0c0 --- /dev/null +++ b/man/model_rfdetr.Rd @@ -0,0 +1,89 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/models-rfdetr_detection.R +\name{model_rfdetr} +\alias{model_rfdetr} +\alias{model_rfdetr_nano} +\alias{model_rfdetr_small} +\alias{model_rfdetr_medium} +\alias{model_rfdetr_base} +\alias{model_rfdetr_base_2} +\alias{model_rfdetr_base_o365} +\alias{model_rfdetr_large} +\title{RF-DETR Implementation} +\usage{ +model_rfdetr_nano(pretrained = FALSE, progress = TRUE, ...) + +model_rfdetr_small(pretrained = FALSE, progress = TRUE, ...) + +model_rfdetr_medium(pretrained = FALSE, progress = TRUE, ...) + +model_rfdetr_base(pretrained = FALSE, progress = TRUE, ...) + +model_rfdetr_base_2(pretrained = FALSE, progress = TRUE, ...) + +model_rfdetr_base_o365(pretrained = FALSE, progress = TRUE, ...) + +model_rfdetr_large(pretrained = FALSE, progress = TRUE, ...) +} +\arguments{ +\item{pretrained}{(bool): If TRUE, returns a model pre-trained on ImageNet.} + +\item{progress}{(bool): If TRUE, displays a progress bar of the download to +stderr.} + +\item{...}{Other parameters passed to the model implementation.} +} +\description{ +RF-DETR: Neural Architecture Search for Real-Time Detection Transformers +(\url{https://arxiv.org/abs/2511.09554}) +} +\details{ +Object detection transformer models combining a DINOv2 backbone +(windowed attention, register tokens) with a deformable-attention +decoder and two-stage query proposal. Supports Nano, Small, Medium, +Base, Base with O365 pretraining, and Large variants. +\subsection{Model Variants}{ + +\if{html}{\out{
}}\preformatted{| Variant | Backbone | Decoder Layers | Resolution | # Queries | Group DETR | Weights | +|-----------|--------------------|----------------|------------|-----------|------------|-----------------------------| +| nano | DINOv2 Small (win) | 2 | 384 | 300 | 13 | COCO (91 classes) | +| small | DINOv2 Small (win) | 2 | 512 | 300 | 13 | COCO (91 classes) | +| medium | DINOv2 Small (win) | 2 | 640 | 300 | 13 | COCO (91 classes) | +| base | DINOv2 Small (win) | 3 | 640 | 300 | 13 | COCO (91 classes) | +| base_2 | DINOv2 Small (win) | 3 | 640 | 300 | 13 | COCO (91 classes, alt run) | +| base_o365 | DINOv2 Small (win) | 3 | 640 | 300 | 13 | Objects365 (366 classes) | +| large | DINOv2 Base (win) | 3 | 560 | 300 | 13 | COCO (91 classes) | +}\if{html}{\out{
}} +\itemize{ +\item All models use group DETR (group_detr=13) with two-stage query proposal, +Lite Refpoint Refine, and BBox reparameterisation. +\item The \code{large} variant corresponds to the deprecated RF-DETR-Large config +(DINOv2 Base encoder, hidden_dim=384). +} +} +} +\section{Functions}{ +\itemize{ +\item \code{model_rfdetr_nano()}: RF-DETR Nano (fastest, COCO, 384px) + +\item \code{model_rfdetr_small()}: RF-DETR Small (lightweight, COCO, 512px) + +\item \code{model_rfdetr_medium()}: RF-DETR Medium (balanced speed/accuracy, COCO, 640px) + +\item \code{model_rfdetr_base()}: RF-DETR Base (COCO pretrained, 640px) + +\item \code{model_rfdetr_base_2()}: RF-DETR Base variant 2 (alternative COCO training run) + +\item \code{model_rfdetr_base_o365()}: RF-DETR Base O365 (Objects365, 366 classes) + +\item \code{model_rfdetr_large()}: RF-DETR Large (DINOv2 Base backbone, COCO, 560px) + +}} +\seealso{ +Other object_detection_model: +\code{\link{model_convnext_detection}}, +\code{\link{model_facenet}}, +\code{\link{model_fasterrcnn}}, +\code{\link{model_maskrcnn}} +} +\concept{object_detection_model} diff --git a/man/model_vgg.Rd b/man/model_vgg.Rd index 08756118..dd36a07c 100644 --- a/man/model_vgg.Rd +++ b/man/model_vgg.Rd @@ -60,15 +60,15 @@ VGG models implementations based on }} \seealso{ -Other classification_model: -\code{\link[=model_alexnet]{model_alexnet()}}, +Other classification_model: +\code{\link{model_alexnet}()}, \code{\link{model_convnext}}, \code{\link{model_efficientnet}}, \code{\link{model_efficientnet_v2}}, \code{\link{model_facenet}}, -\code{\link[=model_inception_v3]{model_inception_v3()}}, -\code{\link[=model_maxvit]{model_maxvit()}}, -\code{\link[=model_mobilenet_v2]{model_mobilenet_v2()}}, +\code{\link{model_inception_v3}()}, +\code{\link{model_maxvit}()}, +\code{\link{model_mobilenet_v2}()}, \code{\link{model_mobilenet_v3}}, \code{\link{model_resnet}}, \code{\link{model_vit}} diff --git a/man/model_vit.Rd b/man/model_vit.Rd index dccfd82b..03865e33 100644 --- a/man/model_vit.Rd +++ b/man/model_vit.Rd @@ -71,15 +71,15 @@ image patches as tokens in a Transformer model. }} \seealso{ -Other classification_model: -\code{\link[=model_alexnet]{model_alexnet()}}, +Other classification_model: +\code{\link{model_alexnet}()}, \code{\link{model_convnext}}, \code{\link{model_efficientnet}}, \code{\link{model_efficientnet_v2}}, \code{\link{model_facenet}}, -\code{\link[=model_inception_v3]{model_inception_v3()}}, -\code{\link[=model_maxvit]{model_maxvit()}}, -\code{\link[=model_mobilenet_v2]{model_mobilenet_v2()}}, +\code{\link{model_inception_v3}()}, +\code{\link{model_maxvit}()}, +\code{\link{model_mobilenet_v2}()}, \code{\link{model_mobilenet_v3}}, \code{\link{model_resnet}}, \code{\link{model_vgg}} diff --git a/man/nms.Rd b/man/nms.Rd index ec24cb5c..5d25b3a9 100644 --- a/man/nms.Rd +++ b/man/nms.Rd @@ -34,7 +34,7 @@ criterion with respect to a reference box, the selected box is not guaranteed to be the same between CPU and GPU. This is similar to the behavior of argsort in torch when repeated values are present. -When the optional \pkg{torchvisionlib} package and its native library are +When the optional \code{torchvisionlib} package and its native library are installed, \code{nms()} uses \code{torchvisionlib::ops_nms()} for faster inference. -Otherwise, it falls back to the pure R algorithm. +Otherwise, it falls back to the pure R algorithm below. } diff --git a/man/oxfordiiitpet_dataset.Rd b/man/oxfordiiitpet_dataset.Rd index c82dc3b4..6f992168 100644 --- a/man/oxfordiiitpet_dataset.Rd +++ b/man/oxfordiiitpet_dataset.Rd @@ -77,20 +77,20 @@ oxford_bin$classes[first_item$y] # "Cat" or "Dog" } \seealso{ -Other classification_dataset: +Other classification_dataset: \code{\link{caltech_dataset}}, -\code{\link[=cifar10_dataset]{cifar10_dataset()}}, -\code{\link[=eurosat_dataset]{eurosat_dataset()}}, -\code{\link[=fer_dataset]{fer_dataset()}}, -\code{\link[=fgvc_aircraft_dataset]{fgvc_aircraft_dataset()}}, -\code{\link[=flowers102_dataset]{flowers102_dataset()}}, -\code{\link[=image_folder_dataset]{image_folder_dataset()}}, +\code{\link{cifar10_dataset}()}, +\code{\link{eurosat_dataset}()}, +\code{\link{fer_dataset}()}, +\code{\link{fgvc_aircraft_dataset}()}, +\code{\link{flowers102_dataset}()}, +\code{\link{image_folder_dataset}()}, \code{\link{lfw_dataset}}, -\code{\link[=mnist_dataset]{mnist_dataset()}}, -\code{\link[=places365_dataset]{places365_dataset()}}, -\code{\link[=tiny_imagenet_dataset]{tiny_imagenet_dataset()}}, -\code{\link[=vggface2_dataset]{vggface2_dataset()}}, -\code{\link[=whoi_plankton_dataset]{whoi_plankton_dataset()}}, -\code{\link[=whoi_small_coralnet_dataset]{whoi_small_coralnet_dataset()}} +\code{\link{mnist_dataset}()}, +\code{\link{places365_dataset}()}, +\code{\link{tiny_imagenet_dataset}()}, +\code{\link{vggface2_dataset}()}, +\code{\link{whoi_plankton_dataset}()}, +\code{\link{whoi_small_coralnet_dataset}()} } \concept{classification_dataset} diff --git a/man/oxfordiiitpet_segmentation_dataset.Rd b/man/oxfordiiitpet_segmentation_dataset.Rd index abfdc01f..7919dfb0 100644 --- a/man/oxfordiiitpet_segmentation_dataset.Rd +++ b/man/oxfordiiitpet_segmentation_dataset.Rd @@ -76,9 +76,9 @@ tensor_image_browse(overlay) } \seealso{ -Other segmentation_dataset: -\code{\link[=coco_segmentation_dataset]{coco_segmentation_dataset()}}, +Other segmentation_dataset: +\code{\link{coco_segmentation_dataset}()}, \code{\link{pascal_voc_datasets}}, -\code{\link[=rf100_peixos_segmentation_dataset]{rf100_peixos_segmentation_dataset()}} +\code{\link{rf100_peixos_segmentation_dataset}()} } \concept{segmentation_dataset} diff --git a/man/pascal_voc_classes.Rd b/man/pascal_voc_classes.Rd index bdddcc2f..1840275b 100644 --- a/man/pascal_voc_classes.Rd +++ b/man/pascal_voc_classes.Rd @@ -24,9 +24,9 @@ The dataset defines 21 semantic classes: \code{"background"}, \code{"aeroplane"} They are available through the \code{classes} variable of the dataset object. } \seealso{ -Other class_resolution: -\code{\link[=caltech_classes]{caltech_classes()}}, -\code{\link[=coco_classes]{coco_classes()}}, -\code{\link[=imagenet_classes]{imagenet_classes()}} +Other class_resolution: +\code{\link{caltech_classes}()}, +\code{\link{coco_classes}()}, +\code{\link{imagenet_classes}()} } \concept{class_resolution} diff --git a/man/pascal_voc_datasets.Rd b/man/pascal_voc_datasets.Rd index 12041ffa..3e654cd1 100644 --- a/man/pascal_voc_datasets.Rd +++ b/man/pascal_voc_datasets.Rd @@ -112,19 +112,19 @@ tensor_image_browse(boxed_img) } \seealso{ -Other segmentation_dataset: -\code{\link[=coco_segmentation_dataset]{coco_segmentation_dataset()}}, -\code{\link[=oxfordiiitpet_segmentation_dataset]{oxfordiiitpet_segmentation_dataset()}}, -\code{\link[=rf100_peixos_segmentation_dataset]{rf100_peixos_segmentation_dataset()}} - -Other detection_dataset: -\code{\link[=coco_detection_dataset]{coco_detection_dataset()}}, -\code{\link[=rf100_biology_collection]{rf100_biology_collection()}}, -\code{\link[=rf100_damage_collection]{rf100_damage_collection()}}, -\code{\link[=rf100_document_collection]{rf100_document_collection()}}, -\code{\link[=rf100_infrared_collection]{rf100_infrared_collection()}}, -\code{\link[=rf100_medical_collection]{rf100_medical_collection()}}, -\code{\link[=rf100_underwater_collection]{rf100_underwater_collection()}} +Other segmentation_dataset: +\code{\link{coco_segmentation_dataset}()}, +\code{\link{oxfordiiitpet_segmentation_dataset}()}, +\code{\link{rf100_peixos_segmentation_dataset}()} + +Other detection_dataset: +\code{\link{coco_detection_dataset}()}, +\code{\link{rf100_biology_collection}()}, +\code{\link{rf100_damage_collection}()}, +\code{\link{rf100_document_collection}()}, +\code{\link{rf100_infrared_collection}()}, +\code{\link{rf100_medical_collection}()}, +\code{\link{rf100_underwater_collection}()} } \concept{detection_dataset} \concept{segmentation_dataset} diff --git a/man/places365_dataset.Rd b/man/places365_dataset.Rd index 9c107496..dc2255ac 100644 --- a/man/places365_dataset.Rd +++ b/man/places365_dataset.Rd @@ -99,20 +99,20 @@ batch$x } \seealso{ -Other classification_dataset: +Other classification_dataset: \code{\link{caltech_dataset}}, -\code{\link[=cifar10_dataset]{cifar10_dataset()}}, -\code{\link[=eurosat_dataset]{eurosat_dataset()}}, -\code{\link[=fer_dataset]{fer_dataset()}}, -\code{\link[=fgvc_aircraft_dataset]{fgvc_aircraft_dataset()}}, -\code{\link[=flowers102_dataset]{flowers102_dataset()}}, -\code{\link[=image_folder_dataset]{image_folder_dataset()}}, +\code{\link{cifar10_dataset}()}, +\code{\link{eurosat_dataset}()}, +\code{\link{fer_dataset}()}, +\code{\link{fgvc_aircraft_dataset}()}, +\code{\link{flowers102_dataset}()}, +\code{\link{image_folder_dataset}()}, \code{\link{lfw_dataset}}, -\code{\link[=mnist_dataset]{mnist_dataset()}}, -\code{\link[=oxfordiiitpet_dataset]{oxfordiiitpet_dataset()}}, -\code{\link[=tiny_imagenet_dataset]{tiny_imagenet_dataset()}}, -\code{\link[=vggface2_dataset]{vggface2_dataset()}}, -\code{\link[=whoi_plankton_dataset]{whoi_plankton_dataset()}}, -\code{\link[=whoi_small_coralnet_dataset]{whoi_small_coralnet_dataset()}} +\code{\link{mnist_dataset}()}, +\code{\link{oxfordiiitpet_dataset}()}, +\code{\link{tiny_imagenet_dataset}()}, +\code{\link{vggface2_dataset}()}, +\code{\link{whoi_plankton_dataset}()}, +\code{\link{whoi_small_coralnet_dataset}()} } \concept{classification_dataset} diff --git a/man/rf100_biology_collection.Rd b/man/rf100_biology_collection.Rd index 322fbb2d..368df536 100644 --- a/man/rf100_biology_collection.Rd +++ b/man/rf100_biology_collection.Rd @@ -58,13 +58,13 @@ tensor_image_browse(boxed) } \seealso{ -Other detection_dataset: -\code{\link[=coco_detection_dataset]{coco_detection_dataset()}}, +Other detection_dataset: +\code{\link{coco_detection_dataset}()}, \code{\link{pascal_voc_datasets}}, -\code{\link[=rf100_damage_collection]{rf100_damage_collection()}}, -\code{\link[=rf100_document_collection]{rf100_document_collection()}}, -\code{\link[=rf100_infrared_collection]{rf100_infrared_collection()}}, -\code{\link[=rf100_medical_collection]{rf100_medical_collection()}}, -\code{\link[=rf100_underwater_collection]{rf100_underwater_collection()}} +\code{\link{rf100_damage_collection}()}, +\code{\link{rf100_document_collection}()}, +\code{\link{rf100_infrared_collection}()}, +\code{\link{rf100_medical_collection}()}, +\code{\link{rf100_underwater_collection}()} } \concept{detection_dataset} diff --git a/man/rf100_damage_collection.Rd b/man/rf100_damage_collection.Rd index 6590110b..3ff4e815 100644 --- a/man/rf100_damage_collection.Rd +++ b/man/rf100_damage_collection.Rd @@ -57,13 +57,13 @@ tensor_image_browse(boxed) } \seealso{ -Other detection_dataset: -\code{\link[=coco_detection_dataset]{coco_detection_dataset()}}, +Other detection_dataset: +\code{\link{coco_detection_dataset}()}, \code{\link{pascal_voc_datasets}}, -\code{\link[=rf100_biology_collection]{rf100_biology_collection()}}, -\code{\link[=rf100_document_collection]{rf100_document_collection()}}, -\code{\link[=rf100_infrared_collection]{rf100_infrared_collection()}}, -\code{\link[=rf100_medical_collection]{rf100_medical_collection()}}, -\code{\link[=rf100_underwater_collection]{rf100_underwater_collection()}} +\code{\link{rf100_biology_collection}()}, +\code{\link{rf100_document_collection}()}, +\code{\link{rf100_infrared_collection}()}, +\code{\link{rf100_medical_collection}()}, +\code{\link{rf100_underwater_collection}()} } \concept{detection_dataset} diff --git a/man/rf100_document_collection.Rd b/man/rf100_document_collection.Rd index daddf675..08b0373c 100644 --- a/man/rf100_document_collection.Rd +++ b/man/rf100_document_collection.Rd @@ -67,13 +67,13 @@ tensor_image_browse(boxed_img) } \seealso{ -Other detection_dataset: -\code{\link[=coco_detection_dataset]{coco_detection_dataset()}}, +Other detection_dataset: +\code{\link{coco_detection_dataset}()}, \code{\link{pascal_voc_datasets}}, -\code{\link[=rf100_biology_collection]{rf100_biology_collection()}}, -\code{\link[=rf100_damage_collection]{rf100_damage_collection()}}, -\code{\link[=rf100_infrared_collection]{rf100_infrared_collection()}}, -\code{\link[=rf100_medical_collection]{rf100_medical_collection()}}, -\code{\link[=rf100_underwater_collection]{rf100_underwater_collection()}} +\code{\link{rf100_biology_collection}()}, +\code{\link{rf100_damage_collection}()}, +\code{\link{rf100_infrared_collection}()}, +\code{\link{rf100_medical_collection}()}, +\code{\link{rf100_underwater_collection}()} } \concept{detection_dataset} diff --git a/man/rf100_infrared_collection.Rd b/man/rf100_infrared_collection.Rd index 54fdc03f..a3979ab6 100644 --- a/man/rf100_infrared_collection.Rd +++ b/man/rf100_infrared_collection.Rd @@ -57,13 +57,13 @@ tensor_image_browse(boxed) } \seealso{ -Other detection_dataset: -\code{\link[=coco_detection_dataset]{coco_detection_dataset()}}, +Other detection_dataset: +\code{\link{coco_detection_dataset}()}, \code{\link{pascal_voc_datasets}}, -\code{\link[=rf100_biology_collection]{rf100_biology_collection()}}, -\code{\link[=rf100_damage_collection]{rf100_damage_collection()}}, -\code{\link[=rf100_document_collection]{rf100_document_collection()}}, -\code{\link[=rf100_medical_collection]{rf100_medical_collection()}}, -\code{\link[=rf100_underwater_collection]{rf100_underwater_collection()}} +\code{\link{rf100_biology_collection}()}, +\code{\link{rf100_damage_collection}()}, +\code{\link{rf100_document_collection}()}, +\code{\link{rf100_medical_collection}()}, +\code{\link{rf100_underwater_collection}()} } \concept{detection_dataset} diff --git a/man/rf100_medical_collection.Rd b/man/rf100_medical_collection.Rd index 347a168d..aaed3db9 100644 --- a/man/rf100_medical_collection.Rd +++ b/man/rf100_medical_collection.Rd @@ -59,13 +59,13 @@ tensor_image_browse(boxed) } \seealso{ -Other detection_dataset: -\code{\link[=coco_detection_dataset]{coco_detection_dataset()}}, +Other detection_dataset: +\code{\link{coco_detection_dataset}()}, \code{\link{pascal_voc_datasets}}, -\code{\link[=rf100_biology_collection]{rf100_biology_collection()}}, -\code{\link[=rf100_damage_collection]{rf100_damage_collection()}}, -\code{\link[=rf100_document_collection]{rf100_document_collection()}}, -\code{\link[=rf100_infrared_collection]{rf100_infrared_collection()}}, -\code{\link[=rf100_underwater_collection]{rf100_underwater_collection()}} +\code{\link{rf100_biology_collection}()}, +\code{\link{rf100_damage_collection}()}, +\code{\link{rf100_document_collection}()}, +\code{\link{rf100_infrared_collection}()}, +\code{\link{rf100_underwater_collection}()} } \concept{detection_dataset} diff --git a/man/rf100_peixos_segmentation_dataset.Rd b/man/rf100_peixos_segmentation_dataset.Rd index ac444fe5..49de3751 100644 --- a/man/rf100_peixos_segmentation_dataset.Rd +++ b/man/rf100_peixos_segmentation_dataset.Rd @@ -58,9 +58,9 @@ tensor_image_browse(overlay) } \seealso{ -Other segmentation_dataset: -\code{\link[=coco_segmentation_dataset]{coco_segmentation_dataset()}}, -\code{\link[=oxfordiiitpet_segmentation_dataset]{oxfordiiitpet_segmentation_dataset()}}, +Other segmentation_dataset: +\code{\link{coco_segmentation_dataset}()}, +\code{\link{oxfordiiitpet_segmentation_dataset}()}, \code{\link{pascal_voc_datasets}} } \concept{segmentation_dataset} diff --git a/man/rf100_underwater_collection.Rd b/man/rf100_underwater_collection.Rd index 78cca678..ce95c628 100644 --- a/man/rf100_underwater_collection.Rd +++ b/man/rf100_underwater_collection.Rd @@ -61,13 +61,13 @@ tensor_image_browse(boxed) } \seealso{ -Other detection_dataset: -\code{\link[=coco_detection_dataset]{coco_detection_dataset()}}, +Other detection_dataset: +\code{\link{coco_detection_dataset}()}, \code{\link{pascal_voc_datasets}}, -\code{\link[=rf100_biology_collection]{rf100_biology_collection()}}, -\code{\link[=rf100_damage_collection]{rf100_damage_collection()}}, -\code{\link[=rf100_document_collection]{rf100_document_collection()}}, -\code{\link[=rf100_infrared_collection]{rf100_infrared_collection()}}, -\code{\link[=rf100_medical_collection]{rf100_medical_collection()}} +\code{\link{rf100_biology_collection}()}, +\code{\link{rf100_damage_collection}()}, +\code{\link{rf100_document_collection}()}, +\code{\link{rf100_infrared_collection}()}, +\code{\link{rf100_medical_collection}()} } \concept{detection_dataset} diff --git a/man/target_transform_coco_masks.Rd b/man/target_transform_coco_masks.Rd index 5470d61b..d79adf9c 100644 --- a/man/target_transform_coco_masks.Rd +++ b/man/target_transform_coco_masks.Rd @@ -31,7 +31,7 @@ draw_segmentation_masks(item) } \seealso{ -Other target_transforms: -\code{\link[=target_transform_trimap_masks]{target_transform_trimap_masks()}} +Other target_transforms: +\code{\link{target_transform_trimap_masks}()} } \concept{target_transforms} diff --git a/man/target_transform_trimap_masks.Rd b/man/target_transform_trimap_masks.Rd index f415b385..e0d3a177 100644 --- a/man/target_transform_trimap_masks.Rd +++ b/man/target_transform_trimap_masks.Rd @@ -38,7 +38,7 @@ draw_segmentation_masks(item) } \seealso{ -Other target_transforms: -\code{\link[=target_transform_coco_masks]{target_transform_coco_masks()}} +Other target_transforms: +\code{\link{target_transform_coco_masks}()} } \concept{target_transforms} diff --git a/man/tensor_image_browse.Rd b/man/tensor_image_browse.Rd index 3f226ea8..09cc101a 100644 --- a/man/tensor_image_browse.Rd +++ b/man/tensor_image_browse.Rd @@ -16,11 +16,11 @@ color image to display} Display image tensor into browser } \seealso{ -Other image display: -\code{\link[=draw_bounding_boxes]{draw_bounding_boxes()}}, -\code{\link[=draw_keypoints]{draw_keypoints()}}, -\code{\link[=draw_segmentation_masks]{draw_segmentation_masks()}}, -\code{\link[=tensor_image_display]{tensor_image_display()}}, -\code{\link[=vision_make_grid]{vision_make_grid()}} +Other image display: +\code{\link{draw_bounding_boxes}()}, +\code{\link{draw_keypoints}()}, +\code{\link{draw_segmentation_masks}()}, +\code{\link{tensor_image_display}()}, +\code{\link{vision_make_grid}()} } \concept{image display} diff --git a/man/tensor_image_display.Rd b/man/tensor_image_display.Rd index 856cc3d0..cb921827 100644 --- a/man/tensor_image_display.Rd +++ b/man/tensor_image_display.Rd @@ -16,11 +16,11 @@ color image to display} Display image tensor onto the X11 device } \seealso{ -Other image display: -\code{\link[=draw_bounding_boxes]{draw_bounding_boxes()}}, -\code{\link[=draw_keypoints]{draw_keypoints()}}, -\code{\link[=draw_segmentation_masks]{draw_segmentation_masks()}}, -\code{\link[=tensor_image_browse]{tensor_image_browse()}}, -\code{\link[=vision_make_grid]{vision_make_grid()}} +Other image display: +\code{\link{draw_bounding_boxes}()}, +\code{\link{draw_keypoints}()}, +\code{\link{draw_segmentation_masks}()}, +\code{\link{tensor_image_browse}()}, +\code{\link{vision_make_grid}()} } \concept{image display} diff --git a/man/tiny_imagenet_dataset.Rd b/man/tiny_imagenet_dataset.Rd index f1fbaea5..77af8c3a 100644 --- a/man/tiny_imagenet_dataset.Rd +++ b/man/tiny_imagenet_dataset.Rd @@ -19,20 +19,20 @@ tiny_imagenet_dataset(root, split = "train", download = FALSE, ...) Prepares the Tiny ImageNet dataset and optionally downloads it. } \seealso{ -Other classification_dataset: +Other classification_dataset: \code{\link{caltech_dataset}}, -\code{\link[=cifar10_dataset]{cifar10_dataset()}}, -\code{\link[=eurosat_dataset]{eurosat_dataset()}}, -\code{\link[=fer_dataset]{fer_dataset()}}, -\code{\link[=fgvc_aircraft_dataset]{fgvc_aircraft_dataset()}}, -\code{\link[=flowers102_dataset]{flowers102_dataset()}}, -\code{\link[=image_folder_dataset]{image_folder_dataset()}}, +\code{\link{cifar10_dataset}()}, +\code{\link{eurosat_dataset}()}, +\code{\link{fer_dataset}()}, +\code{\link{fgvc_aircraft_dataset}()}, +\code{\link{flowers102_dataset}()}, +\code{\link{image_folder_dataset}()}, \code{\link{lfw_dataset}}, -\code{\link[=mnist_dataset]{mnist_dataset()}}, -\code{\link[=oxfordiiitpet_dataset]{oxfordiiitpet_dataset()}}, -\code{\link[=places365_dataset]{places365_dataset()}}, -\code{\link[=vggface2_dataset]{vggface2_dataset()}}, -\code{\link[=whoi_plankton_dataset]{whoi_plankton_dataset()}}, -\code{\link[=whoi_small_coralnet_dataset]{whoi_small_coralnet_dataset()}} +\code{\link{mnist_dataset}()}, +\code{\link{oxfordiiitpet_dataset}()}, +\code{\link{places365_dataset}()}, +\code{\link{vggface2_dataset}()}, +\code{\link{whoi_plankton_dataset}()}, +\code{\link{whoi_small_coralnet_dataset}()} } \concept{classification_dataset} diff --git a/man/transform_adjust_brightness.Rd b/man/transform_adjust_brightness.Rd index ad0f6781..b6f89657 100644 --- a/man/transform_adjust_brightness.Rd +++ b/man/transform_adjust_brightness.Rd @@ -17,25 +17,25 @@ original image while 2 increases the brightness by a factor of 2.} Adjust the brightness of an image } \seealso{ -Other unitary_transforms: -\code{\link[=transform_adjust_contrast]{transform_adjust_contrast()}}, -\code{\link[=transform_adjust_gamma]{transform_adjust_gamma()}}, -\code{\link[=transform_adjust_hue]{transform_adjust_hue()}}, -\code{\link[=transform_adjust_saturation]{transform_adjust_saturation()}}, -\code{\link[=transform_affine]{transform_affine()}}, -\code{\link[=transform_center_crop]{transform_center_crop()}}, -\code{\link[=transform_convert_image_dtype]{transform_convert_image_dtype()}}, -\code{\link[=transform_crop]{transform_crop()}}, -\code{\link[=transform_grayscale]{transform_grayscale()}}, -\code{\link[=transform_hflip]{transform_hflip()}}, -\code{\link[=transform_linear_transformation]{transform_linear_transformation()}}, -\code{\link[=transform_normalize]{transform_normalize()}}, -\code{\link[=transform_pad]{transform_pad()}}, -\code{\link[=transform_perspective]{transform_perspective()}}, -\code{\link[=transform_resize]{transform_resize()}}, -\code{\link[=transform_rgb_to_grayscale]{transform_rgb_to_grayscale()}}, -\code{\link[=transform_rotate]{transform_rotate()}}, -\code{\link[=transform_to_tensor]{transform_to_tensor()}}, -\code{\link[=transform_vflip]{transform_vflip()}} +Other unitary_transforms: +\code{\link{transform_adjust_contrast}()}, +\code{\link{transform_adjust_gamma}()}, +\code{\link{transform_adjust_hue}()}, +\code{\link{transform_adjust_saturation}()}, +\code{\link{transform_affine}()}, +\code{\link{transform_center_crop}()}, +\code{\link{transform_convert_image_dtype}()}, +\code{\link{transform_crop}()}, +\code{\link{transform_grayscale}()}, +\code{\link{transform_hflip}()}, +\code{\link{transform_linear_transformation}()}, +\code{\link{transform_normalize}()}, +\code{\link{transform_pad}()}, +\code{\link{transform_perspective}()}, +\code{\link{transform_resize}()}, +\code{\link{transform_rgb_to_grayscale}()}, +\code{\link{transform_rotate}()}, +\code{\link{transform_to_tensor}()}, +\code{\link{transform_vflip}()} } \concept{unitary_transforms} diff --git a/man/transform_adjust_contrast.Rd b/man/transform_adjust_contrast.Rd index d402f27a..fa415d1e 100644 --- a/man/transform_adjust_contrast.Rd +++ b/man/transform_adjust_contrast.Rd @@ -17,25 +17,25 @@ original image while 2 increases the contrast by a factor of 2.} Adjust the contrast of an image } \seealso{ -Other unitary_transforms: -\code{\link[=transform_adjust_brightness]{transform_adjust_brightness()}}, -\code{\link[=transform_adjust_gamma]{transform_adjust_gamma()}}, -\code{\link[=transform_adjust_hue]{transform_adjust_hue()}}, -\code{\link[=transform_adjust_saturation]{transform_adjust_saturation()}}, -\code{\link[=transform_affine]{transform_affine()}}, -\code{\link[=transform_center_crop]{transform_center_crop()}}, -\code{\link[=transform_convert_image_dtype]{transform_convert_image_dtype()}}, -\code{\link[=transform_crop]{transform_crop()}}, -\code{\link[=transform_grayscale]{transform_grayscale()}}, -\code{\link[=transform_hflip]{transform_hflip()}}, -\code{\link[=transform_linear_transformation]{transform_linear_transformation()}}, -\code{\link[=transform_normalize]{transform_normalize()}}, -\code{\link[=transform_pad]{transform_pad()}}, -\code{\link[=transform_perspective]{transform_perspective()}}, -\code{\link[=transform_resize]{transform_resize()}}, -\code{\link[=transform_rgb_to_grayscale]{transform_rgb_to_grayscale()}}, -\code{\link[=transform_rotate]{transform_rotate()}}, -\code{\link[=transform_to_tensor]{transform_to_tensor()}}, -\code{\link[=transform_vflip]{transform_vflip()}} +Other unitary_transforms: +\code{\link{transform_adjust_brightness}()}, +\code{\link{transform_adjust_gamma}()}, +\code{\link{transform_adjust_hue}()}, +\code{\link{transform_adjust_saturation}()}, +\code{\link{transform_affine}()}, +\code{\link{transform_center_crop}()}, +\code{\link{transform_convert_image_dtype}()}, +\code{\link{transform_crop}()}, +\code{\link{transform_grayscale}()}, +\code{\link{transform_hflip}()}, +\code{\link{transform_linear_transformation}()}, +\code{\link{transform_normalize}()}, +\code{\link{transform_pad}()}, +\code{\link{transform_perspective}()}, +\code{\link{transform_resize}()}, +\code{\link{transform_rgb_to_grayscale}()}, +\code{\link{transform_rotate}()}, +\code{\link{transform_to_tensor}()}, +\code{\link{transform_vflip}()} } \concept{unitary_transforms} diff --git a/man/transform_adjust_gamma.Rd b/man/transform_adjust_gamma.Rd index f84bd845..b85cecb1 100644 --- a/man/transform_adjust_gamma.Rd +++ b/man/transform_adjust_gamma.Rd @@ -27,25 +27,25 @@ based on the following equation: Search for Gamma Correction for more details. } \seealso{ -Other unitary_transforms: -\code{\link[=transform_adjust_brightness]{transform_adjust_brightness()}}, -\code{\link[=transform_adjust_contrast]{transform_adjust_contrast()}}, -\code{\link[=transform_adjust_hue]{transform_adjust_hue()}}, -\code{\link[=transform_adjust_saturation]{transform_adjust_saturation()}}, -\code{\link[=transform_affine]{transform_affine()}}, -\code{\link[=transform_center_crop]{transform_center_crop()}}, -\code{\link[=transform_convert_image_dtype]{transform_convert_image_dtype()}}, -\code{\link[=transform_crop]{transform_crop()}}, -\code{\link[=transform_grayscale]{transform_grayscale()}}, -\code{\link[=transform_hflip]{transform_hflip()}}, -\code{\link[=transform_linear_transformation]{transform_linear_transformation()}}, -\code{\link[=transform_normalize]{transform_normalize()}}, -\code{\link[=transform_pad]{transform_pad()}}, -\code{\link[=transform_perspective]{transform_perspective()}}, -\code{\link[=transform_resize]{transform_resize()}}, -\code{\link[=transform_rgb_to_grayscale]{transform_rgb_to_grayscale()}}, -\code{\link[=transform_rotate]{transform_rotate()}}, -\code{\link[=transform_to_tensor]{transform_to_tensor()}}, -\code{\link[=transform_vflip]{transform_vflip()}} +Other unitary_transforms: +\code{\link{transform_adjust_brightness}()}, +\code{\link{transform_adjust_contrast}()}, +\code{\link{transform_adjust_hue}()}, +\code{\link{transform_adjust_saturation}()}, +\code{\link{transform_affine}()}, +\code{\link{transform_center_crop}()}, +\code{\link{transform_convert_image_dtype}()}, +\code{\link{transform_crop}()}, +\code{\link{transform_grayscale}()}, +\code{\link{transform_hflip}()}, +\code{\link{transform_linear_transformation}()}, +\code{\link{transform_normalize}()}, +\code{\link{transform_pad}()}, +\code{\link{transform_perspective}()}, +\code{\link{transform_resize}()}, +\code{\link{transform_rgb_to_grayscale}()}, +\code{\link{transform_rotate}()}, +\code{\link{transform_to_tensor}()}, +\code{\link{transform_vflip}()} } \concept{unitary_transforms} diff --git a/man/transform_adjust_hue.Rd b/man/transform_adjust_hue.Rd index df9d09d1..aea5f6f6 100644 --- a/man/transform_adjust_hue.Rd +++ b/man/transform_adjust_hue.Rd @@ -27,25 +27,25 @@ interval \verb{[-0.5, 0.5]}. Search for Hue for more details. } \seealso{ -Other unitary_transforms: -\code{\link[=transform_adjust_brightness]{transform_adjust_brightness()}}, -\code{\link[=transform_adjust_contrast]{transform_adjust_contrast()}}, -\code{\link[=transform_adjust_gamma]{transform_adjust_gamma()}}, -\code{\link[=transform_adjust_saturation]{transform_adjust_saturation()}}, -\code{\link[=transform_affine]{transform_affine()}}, -\code{\link[=transform_center_crop]{transform_center_crop()}}, -\code{\link[=transform_convert_image_dtype]{transform_convert_image_dtype()}}, -\code{\link[=transform_crop]{transform_crop()}}, -\code{\link[=transform_grayscale]{transform_grayscale()}}, -\code{\link[=transform_hflip]{transform_hflip()}}, -\code{\link[=transform_linear_transformation]{transform_linear_transformation()}}, -\code{\link[=transform_normalize]{transform_normalize()}}, -\code{\link[=transform_pad]{transform_pad()}}, -\code{\link[=transform_perspective]{transform_perspective()}}, -\code{\link[=transform_resize]{transform_resize()}}, -\code{\link[=transform_rgb_to_grayscale]{transform_rgb_to_grayscale()}}, -\code{\link[=transform_rotate]{transform_rotate()}}, -\code{\link[=transform_to_tensor]{transform_to_tensor()}}, -\code{\link[=transform_vflip]{transform_vflip()}} +Other unitary_transforms: +\code{\link{transform_adjust_brightness}()}, +\code{\link{transform_adjust_contrast}()}, +\code{\link{transform_adjust_gamma}()}, +\code{\link{transform_adjust_saturation}()}, +\code{\link{transform_affine}()}, +\code{\link{transform_center_crop}()}, +\code{\link{transform_convert_image_dtype}()}, +\code{\link{transform_crop}()}, +\code{\link{transform_grayscale}()}, +\code{\link{transform_hflip}()}, +\code{\link{transform_linear_transformation}()}, +\code{\link{transform_normalize}()}, +\code{\link{transform_pad}()}, +\code{\link{transform_perspective}()}, +\code{\link{transform_resize}()}, +\code{\link{transform_rgb_to_grayscale}()}, +\code{\link{transform_rotate}()}, +\code{\link{transform_to_tensor}()}, +\code{\link{transform_vflip}()} } \concept{unitary_transforms} diff --git a/man/transform_adjust_saturation.Rd b/man/transform_adjust_saturation.Rd index 12ea817e..ca5d1ddc 100644 --- a/man/transform_adjust_saturation.Rd +++ b/man/transform_adjust_saturation.Rd @@ -17,25 +17,25 @@ give a black and white image, 1 will give the original image while Adjust the color saturation of an image } \seealso{ -Other unitary_transforms: -\code{\link[=transform_adjust_brightness]{transform_adjust_brightness()}}, -\code{\link[=transform_adjust_contrast]{transform_adjust_contrast()}}, -\code{\link[=transform_adjust_gamma]{transform_adjust_gamma()}}, -\code{\link[=transform_adjust_hue]{transform_adjust_hue()}}, -\code{\link[=transform_affine]{transform_affine()}}, -\code{\link[=transform_center_crop]{transform_center_crop()}}, -\code{\link[=transform_convert_image_dtype]{transform_convert_image_dtype()}}, -\code{\link[=transform_crop]{transform_crop()}}, -\code{\link[=transform_grayscale]{transform_grayscale()}}, -\code{\link[=transform_hflip]{transform_hflip()}}, -\code{\link[=transform_linear_transformation]{transform_linear_transformation()}}, -\code{\link[=transform_normalize]{transform_normalize()}}, -\code{\link[=transform_pad]{transform_pad()}}, -\code{\link[=transform_perspective]{transform_perspective()}}, -\code{\link[=transform_resize]{transform_resize()}}, -\code{\link[=transform_rgb_to_grayscale]{transform_rgb_to_grayscale()}}, -\code{\link[=transform_rotate]{transform_rotate()}}, -\code{\link[=transform_to_tensor]{transform_to_tensor()}}, -\code{\link[=transform_vflip]{transform_vflip()}} +Other unitary_transforms: +\code{\link{transform_adjust_brightness}()}, +\code{\link{transform_adjust_contrast}()}, +\code{\link{transform_adjust_gamma}()}, +\code{\link{transform_adjust_hue}()}, +\code{\link{transform_affine}()}, +\code{\link{transform_center_crop}()}, +\code{\link{transform_convert_image_dtype}()}, +\code{\link{transform_crop}()}, +\code{\link{transform_grayscale}()}, +\code{\link{transform_hflip}()}, +\code{\link{transform_linear_transformation}()}, +\code{\link{transform_normalize}()}, +\code{\link{transform_pad}()}, +\code{\link{transform_perspective}()}, +\code{\link{transform_resize}()}, +\code{\link{transform_rgb_to_grayscale}()}, +\code{\link{transform_rotate}()}, +\code{\link{transform_to_tensor}()}, +\code{\link{transform_vflip}()} } \concept{unitary_transforms} diff --git a/man/transform_affine.Rd b/man/transform_affine.Rd index a733db3f..3283bf19 100644 --- a/man/transform_affine.Rd +++ b/man/transform_affine.Rd @@ -48,25 +48,25 @@ Supported values are 0 / "nearest" and 2 / "bilinear". Default is 0.} Apply affine transformation on an image keeping image center invariant } \seealso{ -Other unitary_transforms: -\code{\link[=transform_adjust_brightness]{transform_adjust_brightness()}}, -\code{\link[=transform_adjust_contrast]{transform_adjust_contrast()}}, -\code{\link[=transform_adjust_gamma]{transform_adjust_gamma()}}, -\code{\link[=transform_adjust_hue]{transform_adjust_hue()}}, -\code{\link[=transform_adjust_saturation]{transform_adjust_saturation()}}, -\code{\link[=transform_center_crop]{transform_center_crop()}}, -\code{\link[=transform_convert_image_dtype]{transform_convert_image_dtype()}}, -\code{\link[=transform_crop]{transform_crop()}}, -\code{\link[=transform_grayscale]{transform_grayscale()}}, -\code{\link[=transform_hflip]{transform_hflip()}}, -\code{\link[=transform_linear_transformation]{transform_linear_transformation()}}, -\code{\link[=transform_normalize]{transform_normalize()}}, -\code{\link[=transform_pad]{transform_pad()}}, -\code{\link[=transform_perspective]{transform_perspective()}}, -\code{\link[=transform_resize]{transform_resize()}}, -\code{\link[=transform_rgb_to_grayscale]{transform_rgb_to_grayscale()}}, -\code{\link[=transform_rotate]{transform_rotate()}}, -\code{\link[=transform_to_tensor]{transform_to_tensor()}}, -\code{\link[=transform_vflip]{transform_vflip()}} +Other unitary_transforms: +\code{\link{transform_adjust_brightness}()}, +\code{\link{transform_adjust_contrast}()}, +\code{\link{transform_adjust_gamma}()}, +\code{\link{transform_adjust_hue}()}, +\code{\link{transform_adjust_saturation}()}, +\code{\link{transform_center_crop}()}, +\code{\link{transform_convert_image_dtype}()}, +\code{\link{transform_crop}()}, +\code{\link{transform_grayscale}()}, +\code{\link{transform_hflip}()}, +\code{\link{transform_linear_transformation}()}, +\code{\link{transform_normalize}()}, +\code{\link{transform_pad}()}, +\code{\link{transform_perspective}()}, +\code{\link{transform_resize}()}, +\code{\link{transform_rgb_to_grayscale}()}, +\code{\link{transform_rotate}()}, +\code{\link{transform_to_tensor}()}, +\code{\link{transform_vflip}()} } \concept{unitary_transforms} diff --git a/man/transform_center_crop.Rd b/man/transform_center_crop.Rd index e5c57cd9..14607247 100644 --- a/man/transform_center_crop.Rd +++ b/man/transform_center_crop.Rd @@ -20,25 +20,25 @@ expected to have \verb{[..., H, W]} shape, where ... means an arbitrary number of leading dimensions. } \seealso{ -Other unitary_transforms: -\code{\link[=transform_adjust_brightness]{transform_adjust_brightness()}}, -\code{\link[=transform_adjust_contrast]{transform_adjust_contrast()}}, -\code{\link[=transform_adjust_gamma]{transform_adjust_gamma()}}, -\code{\link[=transform_adjust_hue]{transform_adjust_hue()}}, -\code{\link[=transform_adjust_saturation]{transform_adjust_saturation()}}, -\code{\link[=transform_affine]{transform_affine()}}, -\code{\link[=transform_convert_image_dtype]{transform_convert_image_dtype()}}, -\code{\link[=transform_crop]{transform_crop()}}, -\code{\link[=transform_grayscale]{transform_grayscale()}}, -\code{\link[=transform_hflip]{transform_hflip()}}, -\code{\link[=transform_linear_transformation]{transform_linear_transformation()}}, -\code{\link[=transform_normalize]{transform_normalize()}}, -\code{\link[=transform_pad]{transform_pad()}}, -\code{\link[=transform_perspective]{transform_perspective()}}, -\code{\link[=transform_resize]{transform_resize()}}, -\code{\link[=transform_rgb_to_grayscale]{transform_rgb_to_grayscale()}}, -\code{\link[=transform_rotate]{transform_rotate()}}, -\code{\link[=transform_to_tensor]{transform_to_tensor()}}, -\code{\link[=transform_vflip]{transform_vflip()}} +Other unitary_transforms: +\code{\link{transform_adjust_brightness}()}, +\code{\link{transform_adjust_contrast}()}, +\code{\link{transform_adjust_gamma}()}, +\code{\link{transform_adjust_hue}()}, +\code{\link{transform_adjust_saturation}()}, +\code{\link{transform_affine}()}, +\code{\link{transform_convert_image_dtype}()}, +\code{\link{transform_crop}()}, +\code{\link{transform_grayscale}()}, +\code{\link{transform_hflip}()}, +\code{\link{transform_linear_transformation}()}, +\code{\link{transform_normalize}()}, +\code{\link{transform_pad}()}, +\code{\link{transform_perspective}()}, +\code{\link{transform_resize}()}, +\code{\link{transform_rgb_to_grayscale}()}, +\code{\link{transform_rotate}()}, +\code{\link{transform_to_tensor}()}, +\code{\link{transform_vflip}()} } \concept{unitary_transforms} diff --git a/man/transform_color_jitter.Rd b/man/transform_color_jitter.Rd index d8f8bb3e..bb5cf0ab 100644 --- a/man/transform_color_jitter.Rd +++ b/man/transform_color_jitter.Rd @@ -38,15 +38,15 @@ Should be non negative numbers.} Randomly change the brightness, contrast and saturation of an image } \seealso{ -Other random_transforms: -\code{\link[=transform_random_affine]{transform_random_affine()}}, -\code{\link[=transform_random_crop]{transform_random_crop()}}, -\code{\link[=transform_random_erasing]{transform_random_erasing()}}, -\code{\link[=transform_random_grayscale]{transform_random_grayscale()}}, -\code{\link[=transform_random_horizontal_flip]{transform_random_horizontal_flip()}}, -\code{\link[=transform_random_perspective]{transform_random_perspective()}}, -\code{\link[=transform_random_resized_crop]{transform_random_resized_crop()}}, -\code{\link[=transform_random_rotation]{transform_random_rotation()}}, -\code{\link[=transform_random_vertical_flip]{transform_random_vertical_flip()}} +Other random_transforms: +\code{\link{transform_random_affine}()}, +\code{\link{transform_random_crop}()}, +\code{\link{transform_random_erasing}()}, +\code{\link{transform_random_grayscale}()}, +\code{\link{transform_random_horizontal_flip}()}, +\code{\link{transform_random_perspective}()}, +\code{\link{transform_random_resized_crop}()}, +\code{\link{transform_random_rotation}()}, +\code{\link{transform_random_vertical_flip}()} } \concept{random_transforms} diff --git a/man/transform_convert_image_dtype.Rd b/man/transform_convert_image_dtype.Rd index eb94642f..becb1637 100644 --- a/man/transform_convert_image_dtype.Rd +++ b/man/transform_convert_image_dtype.Rd @@ -20,25 +20,25 @@ values are \strong{not} mapped exactly. If converted back and forth, this mismatch has no effect. } \seealso{ -Other unitary_transforms: -\code{\link[=transform_adjust_brightness]{transform_adjust_brightness()}}, -\code{\link[=transform_adjust_contrast]{transform_adjust_contrast()}}, -\code{\link[=transform_adjust_gamma]{transform_adjust_gamma()}}, -\code{\link[=transform_adjust_hue]{transform_adjust_hue()}}, -\code{\link[=transform_adjust_saturation]{transform_adjust_saturation()}}, -\code{\link[=transform_affine]{transform_affine()}}, -\code{\link[=transform_center_crop]{transform_center_crop()}}, -\code{\link[=transform_crop]{transform_crop()}}, -\code{\link[=transform_grayscale]{transform_grayscale()}}, -\code{\link[=transform_hflip]{transform_hflip()}}, -\code{\link[=transform_linear_transformation]{transform_linear_transformation()}}, -\code{\link[=transform_normalize]{transform_normalize()}}, -\code{\link[=transform_pad]{transform_pad()}}, -\code{\link[=transform_perspective]{transform_perspective()}}, -\code{\link[=transform_resize]{transform_resize()}}, -\code{\link[=transform_rgb_to_grayscale]{transform_rgb_to_grayscale()}}, -\code{\link[=transform_rotate]{transform_rotate()}}, -\code{\link[=transform_to_tensor]{transform_to_tensor()}}, -\code{\link[=transform_vflip]{transform_vflip()}} +Other unitary_transforms: +\code{\link{transform_adjust_brightness}()}, +\code{\link{transform_adjust_contrast}()}, +\code{\link{transform_adjust_gamma}()}, +\code{\link{transform_adjust_hue}()}, +\code{\link{transform_adjust_saturation}()}, +\code{\link{transform_affine}()}, +\code{\link{transform_center_crop}()}, +\code{\link{transform_crop}()}, +\code{\link{transform_grayscale}()}, +\code{\link{transform_hflip}()}, +\code{\link{transform_linear_transformation}()}, +\code{\link{transform_normalize}()}, +\code{\link{transform_pad}()}, +\code{\link{transform_perspective}()}, +\code{\link{transform_resize}()}, +\code{\link{transform_rgb_to_grayscale}()}, +\code{\link{transform_rotate}()}, +\code{\link{transform_to_tensor}()}, +\code{\link{transform_vflip}()} } \concept{unitary_transforms} diff --git a/man/transform_crop.Rd b/man/transform_crop.Rd index d8dfd22f..11c21f1e 100644 --- a/man/transform_crop.Rd +++ b/man/transform_crop.Rd @@ -22,25 +22,25 @@ box.} Crop the given image at specified location and output size } \seealso{ -Other unitary_transforms: -\code{\link[=transform_adjust_brightness]{transform_adjust_brightness()}}, -\code{\link[=transform_adjust_contrast]{transform_adjust_contrast()}}, -\code{\link[=transform_adjust_gamma]{transform_adjust_gamma()}}, -\code{\link[=transform_adjust_hue]{transform_adjust_hue()}}, -\code{\link[=transform_adjust_saturation]{transform_adjust_saturation()}}, -\code{\link[=transform_affine]{transform_affine()}}, -\code{\link[=transform_center_crop]{transform_center_crop()}}, -\code{\link[=transform_convert_image_dtype]{transform_convert_image_dtype()}}, -\code{\link[=transform_grayscale]{transform_grayscale()}}, -\code{\link[=transform_hflip]{transform_hflip()}}, -\code{\link[=transform_linear_transformation]{transform_linear_transformation()}}, -\code{\link[=transform_normalize]{transform_normalize()}}, -\code{\link[=transform_pad]{transform_pad()}}, -\code{\link[=transform_perspective]{transform_perspective()}}, -\code{\link[=transform_resize]{transform_resize()}}, -\code{\link[=transform_rgb_to_grayscale]{transform_rgb_to_grayscale()}}, -\code{\link[=transform_rotate]{transform_rotate()}}, -\code{\link[=transform_to_tensor]{transform_to_tensor()}}, -\code{\link[=transform_vflip]{transform_vflip()}} +Other unitary_transforms: +\code{\link{transform_adjust_brightness}()}, +\code{\link{transform_adjust_contrast}()}, +\code{\link{transform_adjust_gamma}()}, +\code{\link{transform_adjust_hue}()}, +\code{\link{transform_adjust_saturation}()}, +\code{\link{transform_affine}()}, +\code{\link{transform_center_crop}()}, +\code{\link{transform_convert_image_dtype}()}, +\code{\link{transform_grayscale}()}, +\code{\link{transform_hflip}()}, +\code{\link{transform_linear_transformation}()}, +\code{\link{transform_normalize}()}, +\code{\link{transform_pad}()}, +\code{\link{transform_perspective}()}, +\code{\link{transform_resize}()}, +\code{\link{transform_rgb_to_grayscale}()}, +\code{\link{transform_rotate}()}, +\code{\link{transform_to_tensor}()}, +\code{\link{transform_vflip}()} } \concept{unitary_transforms} diff --git a/man/transform_five_crop.Rd b/man/transform_five_crop.Rd index ff53abec..51cb51b9 100644 --- a/man/transform_five_crop.Rd +++ b/man/transform_five_crop.Rd @@ -21,11 +21,11 @@ returns a tuple of images and there may be a mismatch in the number of inputs and targets your Dataset returns. } \seealso{ -Other combining_transforms: -\code{\link[=transform_random_apply]{transform_random_apply()}}, -\code{\link[=transform_random_choice]{transform_random_choice()}}, -\code{\link[=transform_random_order]{transform_random_order()}}, -\code{\link[=transform_resized_crop]{transform_resized_crop()}}, -\code{\link[=transform_ten_crop]{transform_ten_crop()}} +Other combining_transforms: +\code{\link{transform_random_apply}()}, +\code{\link{transform_random_choice}()}, +\code{\link{transform_random_order}()}, +\code{\link{transform_resized_crop}()}, +\code{\link{transform_ten_crop}()} } \concept{combining_transforms} diff --git a/man/transform_grayscale.Rd b/man/transform_grayscale.Rd index a51bf550..10eaac2b 100644 --- a/man/transform_grayscale.Rd +++ b/man/transform_grayscale.Rd @@ -16,25 +16,25 @@ output image} Convert image to grayscale } \seealso{ -Other unitary_transforms: -\code{\link[=transform_adjust_brightness]{transform_adjust_brightness()}}, -\code{\link[=transform_adjust_contrast]{transform_adjust_contrast()}}, -\code{\link[=transform_adjust_gamma]{transform_adjust_gamma()}}, -\code{\link[=transform_adjust_hue]{transform_adjust_hue()}}, -\code{\link[=transform_adjust_saturation]{transform_adjust_saturation()}}, -\code{\link[=transform_affine]{transform_affine()}}, -\code{\link[=transform_center_crop]{transform_center_crop()}}, -\code{\link[=transform_convert_image_dtype]{transform_convert_image_dtype()}}, -\code{\link[=transform_crop]{transform_crop()}}, -\code{\link[=transform_hflip]{transform_hflip()}}, -\code{\link[=transform_linear_transformation]{transform_linear_transformation()}}, -\code{\link[=transform_normalize]{transform_normalize()}}, -\code{\link[=transform_pad]{transform_pad()}}, -\code{\link[=transform_perspective]{transform_perspective()}}, -\code{\link[=transform_resize]{transform_resize()}}, -\code{\link[=transform_rgb_to_grayscale]{transform_rgb_to_grayscale()}}, -\code{\link[=transform_rotate]{transform_rotate()}}, -\code{\link[=transform_to_tensor]{transform_to_tensor()}}, -\code{\link[=transform_vflip]{transform_vflip()}} +Other unitary_transforms: +\code{\link{transform_adjust_brightness}()}, +\code{\link{transform_adjust_contrast}()}, +\code{\link{transform_adjust_gamma}()}, +\code{\link{transform_adjust_hue}()}, +\code{\link{transform_adjust_saturation}()}, +\code{\link{transform_affine}()}, +\code{\link{transform_center_crop}()}, +\code{\link{transform_convert_image_dtype}()}, +\code{\link{transform_crop}()}, +\code{\link{transform_hflip}()}, +\code{\link{transform_linear_transformation}()}, +\code{\link{transform_normalize}()}, +\code{\link{transform_pad}()}, +\code{\link{transform_perspective}()}, +\code{\link{transform_resize}()}, +\code{\link{transform_rgb_to_grayscale}()}, +\code{\link{transform_rotate}()}, +\code{\link{transform_to_tensor}()}, +\code{\link{transform_vflip}()} } \concept{unitary_transforms} diff --git a/man/transform_hflip.Rd b/man/transform_hflip.Rd index bfa7799d..2e0c54f7 100644 --- a/man/transform_hflip.Rd +++ b/man/transform_hflip.Rd @@ -13,25 +13,25 @@ transform_hflip(img) Horizontally flip a PIL Image or Tensor } \seealso{ -Other unitary_transforms: -\code{\link[=transform_adjust_brightness]{transform_adjust_brightness()}}, -\code{\link[=transform_adjust_contrast]{transform_adjust_contrast()}}, -\code{\link[=transform_adjust_gamma]{transform_adjust_gamma()}}, -\code{\link[=transform_adjust_hue]{transform_adjust_hue()}}, -\code{\link[=transform_adjust_saturation]{transform_adjust_saturation()}}, -\code{\link[=transform_affine]{transform_affine()}}, -\code{\link[=transform_center_crop]{transform_center_crop()}}, -\code{\link[=transform_convert_image_dtype]{transform_convert_image_dtype()}}, -\code{\link[=transform_crop]{transform_crop()}}, -\code{\link[=transform_grayscale]{transform_grayscale()}}, -\code{\link[=transform_linear_transformation]{transform_linear_transformation()}}, -\code{\link[=transform_normalize]{transform_normalize()}}, -\code{\link[=transform_pad]{transform_pad()}}, -\code{\link[=transform_perspective]{transform_perspective()}}, -\code{\link[=transform_resize]{transform_resize()}}, -\code{\link[=transform_rgb_to_grayscale]{transform_rgb_to_grayscale()}}, -\code{\link[=transform_rotate]{transform_rotate()}}, -\code{\link[=transform_to_tensor]{transform_to_tensor()}}, -\code{\link[=transform_vflip]{transform_vflip()}} +Other unitary_transforms: +\code{\link{transform_adjust_brightness}()}, +\code{\link{transform_adjust_contrast}()}, +\code{\link{transform_adjust_gamma}()}, +\code{\link{transform_adjust_hue}()}, +\code{\link{transform_adjust_saturation}()}, +\code{\link{transform_affine}()}, +\code{\link{transform_center_crop}()}, +\code{\link{transform_convert_image_dtype}()}, +\code{\link{transform_crop}()}, +\code{\link{transform_grayscale}()}, +\code{\link{transform_linear_transformation}()}, +\code{\link{transform_normalize}()}, +\code{\link{transform_pad}()}, +\code{\link{transform_perspective}()}, +\code{\link{transform_resize}()}, +\code{\link{transform_rgb_to_grayscale}()}, +\code{\link{transform_rotate}()}, +\code{\link{transform_to_tensor}()}, +\code{\link{transform_vflip}()} } \concept{unitary_transforms} diff --git a/man/transform_linear_transformation.Rd b/man/transform_linear_transformation.Rd index 464af05b..f7b3fc78 100644 --- a/man/transform_linear_transformation.Rd +++ b/man/transform_linear_transformation.Rd @@ -28,25 +28,25 @@ perform SVD on this matrix and pass it as \code{transformation_matrix}. } \seealso{ -Other unitary_transforms: -\code{\link[=transform_adjust_brightness]{transform_adjust_brightness()}}, -\code{\link[=transform_adjust_contrast]{transform_adjust_contrast()}}, -\code{\link[=transform_adjust_gamma]{transform_adjust_gamma()}}, -\code{\link[=transform_adjust_hue]{transform_adjust_hue()}}, -\code{\link[=transform_adjust_saturation]{transform_adjust_saturation()}}, -\code{\link[=transform_affine]{transform_affine()}}, -\code{\link[=transform_center_crop]{transform_center_crop()}}, -\code{\link[=transform_convert_image_dtype]{transform_convert_image_dtype()}}, -\code{\link[=transform_crop]{transform_crop()}}, -\code{\link[=transform_grayscale]{transform_grayscale()}}, -\code{\link[=transform_hflip]{transform_hflip()}}, -\code{\link[=transform_normalize]{transform_normalize()}}, -\code{\link[=transform_pad]{transform_pad()}}, -\code{\link[=transform_perspective]{transform_perspective()}}, -\code{\link[=transform_resize]{transform_resize()}}, -\code{\link[=transform_rgb_to_grayscale]{transform_rgb_to_grayscale()}}, -\code{\link[=transform_rotate]{transform_rotate()}}, -\code{\link[=transform_to_tensor]{transform_to_tensor()}}, -\code{\link[=transform_vflip]{transform_vflip()}} +Other unitary_transforms: +\code{\link{transform_adjust_brightness}()}, +\code{\link{transform_adjust_contrast}()}, +\code{\link{transform_adjust_gamma}()}, +\code{\link{transform_adjust_hue}()}, +\code{\link{transform_adjust_saturation}()}, +\code{\link{transform_affine}()}, +\code{\link{transform_center_crop}()}, +\code{\link{transform_convert_image_dtype}()}, +\code{\link{transform_crop}()}, +\code{\link{transform_grayscale}()}, +\code{\link{transform_hflip}()}, +\code{\link{transform_normalize}()}, +\code{\link{transform_pad}()}, +\code{\link{transform_perspective}()}, +\code{\link{transform_resize}()}, +\code{\link{transform_rgb_to_grayscale}()}, +\code{\link{transform_rotate}()}, +\code{\link{transform_to_tensor}()}, +\code{\link{transform_vflip}()} } \concept{unitary_transforms} diff --git a/man/transform_normalize.Rd b/man/transform_normalize.Rd index 1bc14285..8df4a2a9 100644 --- a/man/transform_normalize.Rd +++ b/man/transform_normalize.Rd @@ -25,25 +25,25 @@ channels, this transform will normalize each channel of the input This transform acts out of place, i.e., it does not mutate the input tensor. } \seealso{ -Other unitary_transforms: -\code{\link[=transform_adjust_brightness]{transform_adjust_brightness()}}, -\code{\link[=transform_adjust_contrast]{transform_adjust_contrast()}}, -\code{\link[=transform_adjust_gamma]{transform_adjust_gamma()}}, -\code{\link[=transform_adjust_hue]{transform_adjust_hue()}}, -\code{\link[=transform_adjust_saturation]{transform_adjust_saturation()}}, -\code{\link[=transform_affine]{transform_affine()}}, -\code{\link[=transform_center_crop]{transform_center_crop()}}, -\code{\link[=transform_convert_image_dtype]{transform_convert_image_dtype()}}, -\code{\link[=transform_crop]{transform_crop()}}, -\code{\link[=transform_grayscale]{transform_grayscale()}}, -\code{\link[=transform_hflip]{transform_hflip()}}, -\code{\link[=transform_linear_transformation]{transform_linear_transformation()}}, -\code{\link[=transform_pad]{transform_pad()}}, -\code{\link[=transform_perspective]{transform_perspective()}}, -\code{\link[=transform_resize]{transform_resize()}}, -\code{\link[=transform_rgb_to_grayscale]{transform_rgb_to_grayscale()}}, -\code{\link[=transform_rotate]{transform_rotate()}}, -\code{\link[=transform_to_tensor]{transform_to_tensor()}}, -\code{\link[=transform_vflip]{transform_vflip()}} +Other unitary_transforms: +\code{\link{transform_adjust_brightness}()}, +\code{\link{transform_adjust_contrast}()}, +\code{\link{transform_adjust_gamma}()}, +\code{\link{transform_adjust_hue}()}, +\code{\link{transform_adjust_saturation}()}, +\code{\link{transform_affine}()}, +\code{\link{transform_center_crop}()}, +\code{\link{transform_convert_image_dtype}()}, +\code{\link{transform_crop}()}, +\code{\link{transform_grayscale}()}, +\code{\link{transform_hflip}()}, +\code{\link{transform_linear_transformation}()}, +\code{\link{transform_pad}()}, +\code{\link{transform_perspective}()}, +\code{\link{transform_resize}()}, +\code{\link{transform_rgb_to_grayscale}()}, +\code{\link{transform_rotate}()}, +\code{\link{transform_to_tensor}()}, +\code{\link{transform_vflip}()} } \concept{unitary_transforms} diff --git a/man/transform_pad.Rd b/man/transform_pad.Rd index d6a9ef8d..785ee367 100644 --- a/man/transform_pad.Rd +++ b/man/transform_pad.Rd @@ -40,25 +40,25 @@ expected to have \verb{[..., H, W]} shape, where ... means an arbitrary number of leading dimensions. } \seealso{ -Other unitary_transforms: -\code{\link[=transform_adjust_brightness]{transform_adjust_brightness()}}, -\code{\link[=transform_adjust_contrast]{transform_adjust_contrast()}}, -\code{\link[=transform_adjust_gamma]{transform_adjust_gamma()}}, -\code{\link[=transform_adjust_hue]{transform_adjust_hue()}}, -\code{\link[=transform_adjust_saturation]{transform_adjust_saturation()}}, -\code{\link[=transform_affine]{transform_affine()}}, -\code{\link[=transform_center_crop]{transform_center_crop()}}, -\code{\link[=transform_convert_image_dtype]{transform_convert_image_dtype()}}, -\code{\link[=transform_crop]{transform_crop()}}, -\code{\link[=transform_grayscale]{transform_grayscale()}}, -\code{\link[=transform_hflip]{transform_hflip()}}, -\code{\link[=transform_linear_transformation]{transform_linear_transformation()}}, -\code{\link[=transform_normalize]{transform_normalize()}}, -\code{\link[=transform_perspective]{transform_perspective()}}, -\code{\link[=transform_resize]{transform_resize()}}, -\code{\link[=transform_rgb_to_grayscale]{transform_rgb_to_grayscale()}}, -\code{\link[=transform_rotate]{transform_rotate()}}, -\code{\link[=transform_to_tensor]{transform_to_tensor()}}, -\code{\link[=transform_vflip]{transform_vflip()}} +Other unitary_transforms: +\code{\link{transform_adjust_brightness}()}, +\code{\link{transform_adjust_contrast}()}, +\code{\link{transform_adjust_gamma}()}, +\code{\link{transform_adjust_hue}()}, +\code{\link{transform_adjust_saturation}()}, +\code{\link{transform_affine}()}, +\code{\link{transform_center_crop}()}, +\code{\link{transform_convert_image_dtype}()}, +\code{\link{transform_crop}()}, +\code{\link{transform_grayscale}()}, +\code{\link{transform_hflip}()}, +\code{\link{transform_linear_transformation}()}, +\code{\link{transform_normalize}()}, +\code{\link{transform_perspective}()}, +\code{\link{transform_resize}()}, +\code{\link{transform_rgb_to_grayscale}()}, +\code{\link{transform_rotate}()}, +\code{\link{transform_to_tensor}()}, +\code{\link{transform_vflip}()} } \concept{unitary_transforms} diff --git a/man/transform_perspective.Rd b/man/transform_perspective.Rd index ccc1ac6f..dc60b2cc 100644 --- a/man/transform_perspective.Rd +++ b/man/transform_perspective.Rd @@ -26,7 +26,7 @@ image.} \item{interpolation}{(int, optional) Desired interpolation. An integer \code{0 = nearest}, \code{2 = bilinear}, and \code{3 = bicubic} or a name from -\code{\link[magick:filter_types]{magick::filter_types()}}.} +\code{\link[magick:options]{magick::filter_types()}}.} \item{fill}{(int or str or tuple): Pixel fill value for constant fill. Default is 0. If a tuple of length 3, it is used to fill R, G, B channels @@ -37,25 +37,25 @@ Only int value is supported for Tensors.} Perspective transformation of an image } \seealso{ -Other unitary_transforms: -\code{\link[=transform_adjust_brightness]{transform_adjust_brightness()}}, -\code{\link[=transform_adjust_contrast]{transform_adjust_contrast()}}, -\code{\link[=transform_adjust_gamma]{transform_adjust_gamma()}}, -\code{\link[=transform_adjust_hue]{transform_adjust_hue()}}, -\code{\link[=transform_adjust_saturation]{transform_adjust_saturation()}}, -\code{\link[=transform_affine]{transform_affine()}}, -\code{\link[=transform_center_crop]{transform_center_crop()}}, -\code{\link[=transform_convert_image_dtype]{transform_convert_image_dtype()}}, -\code{\link[=transform_crop]{transform_crop()}}, -\code{\link[=transform_grayscale]{transform_grayscale()}}, -\code{\link[=transform_hflip]{transform_hflip()}}, -\code{\link[=transform_linear_transformation]{transform_linear_transformation()}}, -\code{\link[=transform_normalize]{transform_normalize()}}, -\code{\link[=transform_pad]{transform_pad()}}, -\code{\link[=transform_resize]{transform_resize()}}, -\code{\link[=transform_rgb_to_grayscale]{transform_rgb_to_grayscale()}}, -\code{\link[=transform_rotate]{transform_rotate()}}, -\code{\link[=transform_to_tensor]{transform_to_tensor()}}, -\code{\link[=transform_vflip]{transform_vflip()}} +Other unitary_transforms: +\code{\link{transform_adjust_brightness}()}, +\code{\link{transform_adjust_contrast}()}, +\code{\link{transform_adjust_gamma}()}, +\code{\link{transform_adjust_hue}()}, +\code{\link{transform_adjust_saturation}()}, +\code{\link{transform_affine}()}, +\code{\link{transform_center_crop}()}, +\code{\link{transform_convert_image_dtype}()}, +\code{\link{transform_crop}()}, +\code{\link{transform_grayscale}()}, +\code{\link{transform_hflip}()}, +\code{\link{transform_linear_transformation}()}, +\code{\link{transform_normalize}()}, +\code{\link{transform_pad}()}, +\code{\link{transform_resize}()}, +\code{\link{transform_rgb_to_grayscale}()}, +\code{\link{transform_rotate}()}, +\code{\link{transform_to_tensor}()}, +\code{\link{transform_vflip}()} } \concept{unitary_transforms} diff --git a/man/transform_random_affine.Rd b/man/transform_random_affine.Rd index 502378f2..62424fb2 100644 --- a/man/transform_random_affine.Rd +++ b/man/transform_random_affine.Rd @@ -56,15 +56,15 @@ Default is 0. This option is not supported for Tensor input.} Random affine transformation of the image keeping center invariant } \seealso{ -Other random_transforms: -\code{\link[=transform_color_jitter]{transform_color_jitter()}}, -\code{\link[=transform_random_crop]{transform_random_crop()}}, -\code{\link[=transform_random_erasing]{transform_random_erasing()}}, -\code{\link[=transform_random_grayscale]{transform_random_grayscale()}}, -\code{\link[=transform_random_horizontal_flip]{transform_random_horizontal_flip()}}, -\code{\link[=transform_random_perspective]{transform_random_perspective()}}, -\code{\link[=transform_random_resized_crop]{transform_random_resized_crop()}}, -\code{\link[=transform_random_rotation]{transform_random_rotation()}}, -\code{\link[=transform_random_vertical_flip]{transform_random_vertical_flip()}} +Other random_transforms: +\code{\link{transform_color_jitter}()}, +\code{\link{transform_random_crop}()}, +\code{\link{transform_random_erasing}()}, +\code{\link{transform_random_grayscale}()}, +\code{\link{transform_random_horizontal_flip}()}, +\code{\link{transform_random_perspective}()}, +\code{\link{transform_random_resized_crop}()}, +\code{\link{transform_random_rotation}()}, +\code{\link{transform_random_vertical_flip}()} } \concept{random_transforms} diff --git a/man/transform_random_apply.Rd b/man/transform_random_apply.Rd index 9ded32d4..0778779c 100644 --- a/man/transform_random_apply.Rd +++ b/man/transform_random_apply.Rd @@ -17,11 +17,11 @@ transform_random_apply(img, transforms, p = 0.5) Apply a list of transformations randomly with a given probability } \seealso{ -Other combining_transforms: -\code{\link[=transform_five_crop]{transform_five_crop()}}, -\code{\link[=transform_random_choice]{transform_random_choice()}}, -\code{\link[=transform_random_order]{transform_random_order()}}, -\code{\link[=transform_resized_crop]{transform_resized_crop()}}, -\code{\link[=transform_ten_crop]{transform_ten_crop()}} +Other combining_transforms: +\code{\link{transform_five_crop}()}, +\code{\link{transform_random_choice}()}, +\code{\link{transform_random_order}()}, +\code{\link{transform_resized_crop}()}, +\code{\link{transform_ten_crop}()} } \concept{combining_transforms} diff --git a/man/transform_random_choice.Rd b/man/transform_random_choice.Rd index 0c10260c..d7e58ae8 100644 --- a/man/transform_random_choice.Rd +++ b/man/transform_random_choice.Rd @@ -15,11 +15,11 @@ transform_random_choice(img, transforms) Apply single transformation randomly picked from a list } \seealso{ -Other combining_transforms: -\code{\link[=transform_five_crop]{transform_five_crop()}}, -\code{\link[=transform_random_apply]{transform_random_apply()}}, -\code{\link[=transform_random_order]{transform_random_order()}}, -\code{\link[=transform_resized_crop]{transform_resized_crop()}}, -\code{\link[=transform_ten_crop]{transform_ten_crop()}} +Other combining_transforms: +\code{\link{transform_five_crop}()}, +\code{\link{transform_random_apply}()}, +\code{\link{transform_random_order}()}, +\code{\link{transform_resized_crop}()}, +\code{\link{transform_ten_crop}()} } \concept{combining_transforms} diff --git a/man/transform_random_crop.Rd b/man/transform_random_crop.Rd index 04cce526..1683691a 100644 --- a/man/transform_random_crop.Rd +++ b/man/transform_random_crop.Rd @@ -57,15 +57,15 @@ to have \verb{[..., H, W]} shape, where ... means an arbitrary number of leading dimensions. } \seealso{ -Other random_transforms: -\code{\link[=transform_color_jitter]{transform_color_jitter()}}, -\code{\link[=transform_random_affine]{transform_random_affine()}}, -\code{\link[=transform_random_erasing]{transform_random_erasing()}}, -\code{\link[=transform_random_grayscale]{transform_random_grayscale()}}, -\code{\link[=transform_random_horizontal_flip]{transform_random_horizontal_flip()}}, -\code{\link[=transform_random_perspective]{transform_random_perspective()}}, -\code{\link[=transform_random_resized_crop]{transform_random_resized_crop()}}, -\code{\link[=transform_random_rotation]{transform_random_rotation()}}, -\code{\link[=transform_random_vertical_flip]{transform_random_vertical_flip()}} +Other random_transforms: +\code{\link{transform_color_jitter}()}, +\code{\link{transform_random_affine}()}, +\code{\link{transform_random_erasing}()}, +\code{\link{transform_random_grayscale}()}, +\code{\link{transform_random_horizontal_flip}()}, +\code{\link{transform_random_perspective}()}, +\code{\link{transform_random_resized_crop}()}, +\code{\link{transform_random_rotation}()}, +\code{\link{transform_random_vertical_flip}()} } \concept{random_transforms} diff --git a/man/transform_random_erasing.Rd b/man/transform_random_erasing.Rd index 56767b21..75994b89 100644 --- a/man/transform_random_erasing.Rd +++ b/man/transform_random_erasing.Rd @@ -34,15 +34,15 @@ If a str of 'random', erasing each pixel with random values.} See \url{https://arxiv.org/pdf/1708.04896} } \seealso{ -Other random_transforms: -\code{\link[=transform_color_jitter]{transform_color_jitter()}}, -\code{\link[=transform_random_affine]{transform_random_affine()}}, -\code{\link[=transform_random_crop]{transform_random_crop()}}, -\code{\link[=transform_random_grayscale]{transform_random_grayscale()}}, -\code{\link[=transform_random_horizontal_flip]{transform_random_horizontal_flip()}}, -\code{\link[=transform_random_perspective]{transform_random_perspective()}}, -\code{\link[=transform_random_resized_crop]{transform_random_resized_crop()}}, -\code{\link[=transform_random_rotation]{transform_random_rotation()}}, -\code{\link[=transform_random_vertical_flip]{transform_random_vertical_flip()}} +Other random_transforms: +\code{\link{transform_color_jitter}()}, +\code{\link{transform_random_affine}()}, +\code{\link{transform_random_crop}()}, +\code{\link{transform_random_grayscale}()}, +\code{\link{transform_random_horizontal_flip}()}, +\code{\link{transform_random_perspective}()}, +\code{\link{transform_random_resized_crop}()}, +\code{\link{transform_random_rotation}()}, +\code{\link{transform_random_vertical_flip}()} } \concept{random_transforms} diff --git a/man/transform_random_grayscale.Rd b/man/transform_random_grayscale.Rd index d9283816..96f2f5ee 100644 --- a/man/transform_random_grayscale.Rd +++ b/man/transform_random_grayscale.Rd @@ -16,15 +16,15 @@ transform_random_grayscale(img, p = 0.1) Convert image to grayscale with a probability of \code{p}. } \seealso{ -Other random_transforms: -\code{\link[=transform_color_jitter]{transform_color_jitter()}}, -\code{\link[=transform_random_affine]{transform_random_affine()}}, -\code{\link[=transform_random_crop]{transform_random_crop()}}, -\code{\link[=transform_random_erasing]{transform_random_erasing()}}, -\code{\link[=transform_random_horizontal_flip]{transform_random_horizontal_flip()}}, -\code{\link[=transform_random_perspective]{transform_random_perspective()}}, -\code{\link[=transform_random_resized_crop]{transform_random_resized_crop()}}, -\code{\link[=transform_random_rotation]{transform_random_rotation()}}, -\code{\link[=transform_random_vertical_flip]{transform_random_vertical_flip()}} +Other random_transforms: +\code{\link{transform_color_jitter}()}, +\code{\link{transform_random_affine}()}, +\code{\link{transform_random_crop}()}, +\code{\link{transform_random_erasing}()}, +\code{\link{transform_random_horizontal_flip}()}, +\code{\link{transform_random_perspective}()}, +\code{\link{transform_random_resized_crop}()}, +\code{\link{transform_random_rotation}()}, +\code{\link{transform_random_vertical_flip}()} } \concept{random_transforms} diff --git a/man/transform_random_horizontal_flip.Rd b/man/transform_random_horizontal_flip.Rd index 86a41331..a75075d3 100644 --- a/man/transform_random_horizontal_flip.Rd +++ b/man/transform_random_horizontal_flip.Rd @@ -19,15 +19,15 @@ be a Magick Image or a torch Tensor, in which case it is expected to have dimensions } \seealso{ -Other random_transforms: -\code{\link[=transform_color_jitter]{transform_color_jitter()}}, -\code{\link[=transform_random_affine]{transform_random_affine()}}, -\code{\link[=transform_random_crop]{transform_random_crop()}}, -\code{\link[=transform_random_erasing]{transform_random_erasing()}}, -\code{\link[=transform_random_grayscale]{transform_random_grayscale()}}, -\code{\link[=transform_random_perspective]{transform_random_perspective()}}, -\code{\link[=transform_random_resized_crop]{transform_random_resized_crop()}}, -\code{\link[=transform_random_rotation]{transform_random_rotation()}}, -\code{\link[=transform_random_vertical_flip]{transform_random_vertical_flip()}} +Other random_transforms: +\code{\link{transform_color_jitter}()}, +\code{\link{transform_random_affine}()}, +\code{\link{transform_random_crop}()}, +\code{\link{transform_random_erasing}()}, +\code{\link{transform_random_grayscale}()}, +\code{\link{transform_random_perspective}()}, +\code{\link{transform_random_resized_crop}()}, +\code{\link{transform_random_rotation}()}, +\code{\link{transform_random_vertical_flip}()} } \concept{random_transforms} diff --git a/man/transform_random_order.Rd b/man/transform_random_order.Rd index fff2abd2..1fa09b73 100644 --- a/man/transform_random_order.Rd +++ b/man/transform_random_order.Rd @@ -15,11 +15,11 @@ transform_random_order(img, transforms) Apply a list of transformations in a random order } \seealso{ -Other combining_transforms: -\code{\link[=transform_five_crop]{transform_five_crop()}}, -\code{\link[=transform_random_apply]{transform_random_apply()}}, -\code{\link[=transform_random_choice]{transform_random_choice()}}, -\code{\link[=transform_resized_crop]{transform_resized_crop()}}, -\code{\link[=transform_ten_crop]{transform_ten_crop()}} +Other combining_transforms: +\code{\link{transform_five_crop}()}, +\code{\link{transform_random_apply}()}, +\code{\link{transform_random_choice}()}, +\code{\link{transform_resized_crop}()}, +\code{\link{transform_ten_crop}()} } \concept{combining_transforms} diff --git a/man/transform_random_perspective.Rd b/man/transform_random_perspective.Rd index 1d95b5c2..34dbd4d9 100644 --- a/man/transform_random_perspective.Rd +++ b/man/transform_random_perspective.Rd @@ -22,7 +22,7 @@ and ranges from 0 to 1. Default is 0.5.} \item{interpolation}{(int, optional) Desired interpolation. An integer \code{0 = nearest}, \code{2 = bilinear}, and \code{3 = bicubic} or a name from -\code{\link[magick:filter_types]{magick::filter_types()}}.} +\code{\link[magick:options]{magick::filter_types()}}.} \item{fill}{(int or str or tuple): Pixel fill value for constant fill. Default is 0. If a tuple of length 3, it is used to fill R, G, B channels @@ -34,15 +34,15 @@ Performs a random perspective transformation of the given image with a given probability } \seealso{ -Other random_transforms: -\code{\link[=transform_color_jitter]{transform_color_jitter()}}, -\code{\link[=transform_random_affine]{transform_random_affine()}}, -\code{\link[=transform_random_crop]{transform_random_crop()}}, -\code{\link[=transform_random_erasing]{transform_random_erasing()}}, -\code{\link[=transform_random_grayscale]{transform_random_grayscale()}}, -\code{\link[=transform_random_horizontal_flip]{transform_random_horizontal_flip()}}, -\code{\link[=transform_random_resized_crop]{transform_random_resized_crop()}}, -\code{\link[=transform_random_rotation]{transform_random_rotation()}}, -\code{\link[=transform_random_vertical_flip]{transform_random_vertical_flip()}} +Other random_transforms: +\code{\link{transform_color_jitter}()}, +\code{\link{transform_random_affine}()}, +\code{\link{transform_random_crop}()}, +\code{\link{transform_random_erasing}()}, +\code{\link{transform_random_grayscale}()}, +\code{\link{transform_random_horizontal_flip}()}, +\code{\link{transform_random_resized_crop}()}, +\code{\link{transform_random_rotation}()}, +\code{\link{transform_random_vertical_flip}()} } \concept{random_transforms} diff --git a/man/transform_random_resized_crop.Rd b/man/transform_random_resized_crop.Rd index 994f2d9b..0e521def 100644 --- a/man/transform_random_resized_crop.Rd +++ b/man/transform_random_resized_crop.Rd @@ -28,7 +28,7 @@ ratio cropped.} \item{interpolation}{(int, optional) Desired interpolation. An integer \code{0 = nearest}, \code{2 = bilinear}, and \code{3 = bicubic} or a name from -\code{\link[magick:filter_types]{magick::filter_types()}}.} +\code{\link[magick:options]{magick::filter_types()}}.} } \description{ Crop the given image to a random size and aspect ratio. The image can be a @@ -43,15 +43,15 @@ ratio is made. This crop is finally resized to given size. This is popularly used to train the Inception networks. } \seealso{ -Other random_transforms: -\code{\link[=transform_color_jitter]{transform_color_jitter()}}, -\code{\link[=transform_random_affine]{transform_random_affine()}}, -\code{\link[=transform_random_crop]{transform_random_crop()}}, -\code{\link[=transform_random_erasing]{transform_random_erasing()}}, -\code{\link[=transform_random_grayscale]{transform_random_grayscale()}}, -\code{\link[=transform_random_horizontal_flip]{transform_random_horizontal_flip()}}, -\code{\link[=transform_random_perspective]{transform_random_perspective()}}, -\code{\link[=transform_random_rotation]{transform_random_rotation()}}, -\code{\link[=transform_random_vertical_flip]{transform_random_vertical_flip()}} +Other random_transforms: +\code{\link{transform_color_jitter}()}, +\code{\link{transform_random_affine}()}, +\code{\link{transform_random_crop}()}, +\code{\link{transform_random_erasing}()}, +\code{\link{transform_random_grayscale}()}, +\code{\link{transform_random_horizontal_flip}()}, +\code{\link{transform_random_perspective}()}, +\code{\link{transform_random_rotation}()}, +\code{\link{transform_random_vertical_flip}()} } \concept{random_transforms} diff --git a/man/transform_random_rotation.Rd b/man/transform_random_rotation.Rd index 47d78b54..b01c6269 100644 --- a/man/transform_random_rotation.Rd +++ b/man/transform_random_rotation.Rd @@ -44,15 +44,15 @@ value for the area outside the transform in the output image is always 0.} Rotate the image by angle } \seealso{ -Other random_transforms: -\code{\link[=transform_color_jitter]{transform_color_jitter()}}, -\code{\link[=transform_random_affine]{transform_random_affine()}}, -\code{\link[=transform_random_crop]{transform_random_crop()}}, -\code{\link[=transform_random_erasing]{transform_random_erasing()}}, -\code{\link[=transform_random_grayscale]{transform_random_grayscale()}}, -\code{\link[=transform_random_horizontal_flip]{transform_random_horizontal_flip()}}, -\code{\link[=transform_random_perspective]{transform_random_perspective()}}, -\code{\link[=transform_random_resized_crop]{transform_random_resized_crop()}}, -\code{\link[=transform_random_vertical_flip]{transform_random_vertical_flip()}} +Other random_transforms: +\code{\link{transform_color_jitter}()}, +\code{\link{transform_random_affine}()}, +\code{\link{transform_random_crop}()}, +\code{\link{transform_random_erasing}()}, +\code{\link{transform_random_grayscale}()}, +\code{\link{transform_random_horizontal_flip}()}, +\code{\link{transform_random_perspective}()}, +\code{\link{transform_random_resized_crop}()}, +\code{\link{transform_random_vertical_flip}()} } \concept{random_transforms} diff --git a/man/transform_random_vertical_flip.Rd b/man/transform_random_vertical_flip.Rd index 6de76b18..d66a712e 100644 --- a/man/transform_random_vertical_flip.Rd +++ b/man/transform_random_vertical_flip.Rd @@ -18,15 +18,15 @@ to have \verb{[..., H, W]} shape, where \code{...} means an arbitrary number of leading dimensions } \seealso{ -Other random_transforms: -\code{\link[=transform_color_jitter]{transform_color_jitter()}}, -\code{\link[=transform_random_affine]{transform_random_affine()}}, -\code{\link[=transform_random_crop]{transform_random_crop()}}, -\code{\link[=transform_random_erasing]{transform_random_erasing()}}, -\code{\link[=transform_random_grayscale]{transform_random_grayscale()}}, -\code{\link[=transform_random_horizontal_flip]{transform_random_horizontal_flip()}}, -\code{\link[=transform_random_perspective]{transform_random_perspective()}}, -\code{\link[=transform_random_resized_crop]{transform_random_resized_crop()}}, -\code{\link[=transform_random_rotation]{transform_random_rotation()}} +Other random_transforms: +\code{\link{transform_color_jitter}()}, +\code{\link{transform_random_affine}()}, +\code{\link{transform_random_crop}()}, +\code{\link{transform_random_erasing}()}, +\code{\link{transform_random_grayscale}()}, +\code{\link{transform_random_horizontal_flip}()}, +\code{\link{transform_random_perspective}()}, +\code{\link{transform_random_resized_crop}()}, +\code{\link{transform_random_rotation}()} } \concept{random_transforms} diff --git a/man/transform_resize.Rd b/man/transform_resize.Rd index 4e9762a9..8dbbe3cf 100644 --- a/man/transform_resize.Rd +++ b/man/transform_resize.Rd @@ -17,7 +17,7 @@ i.e, if height > width, then image will be rescaled to \item{interpolation}{(int, optional) Desired interpolation. An integer \code{0 = nearest}, \code{2 = bilinear}, and \code{3 = bicubic} or a name from -\code{\link[magick:filter_types]{magick::filter_types()}}.} +\code{\link[magick:options]{magick::filter_types()}}.} } \description{ The image can be a Magic Image or a torch Tensor, in which case it is @@ -25,25 +25,25 @@ expected to have \verb{[..., H, W]} shape, where ... means an arbitrary number of leading dimensions } \seealso{ -Other unitary_transforms: -\code{\link[=transform_adjust_brightness]{transform_adjust_brightness()}}, -\code{\link[=transform_adjust_contrast]{transform_adjust_contrast()}}, -\code{\link[=transform_adjust_gamma]{transform_adjust_gamma()}}, -\code{\link[=transform_adjust_hue]{transform_adjust_hue()}}, -\code{\link[=transform_adjust_saturation]{transform_adjust_saturation()}}, -\code{\link[=transform_affine]{transform_affine()}}, -\code{\link[=transform_center_crop]{transform_center_crop()}}, -\code{\link[=transform_convert_image_dtype]{transform_convert_image_dtype()}}, -\code{\link[=transform_crop]{transform_crop()}}, -\code{\link[=transform_grayscale]{transform_grayscale()}}, -\code{\link[=transform_hflip]{transform_hflip()}}, -\code{\link[=transform_linear_transformation]{transform_linear_transformation()}}, -\code{\link[=transform_normalize]{transform_normalize()}}, -\code{\link[=transform_pad]{transform_pad()}}, -\code{\link[=transform_perspective]{transform_perspective()}}, -\code{\link[=transform_rgb_to_grayscale]{transform_rgb_to_grayscale()}}, -\code{\link[=transform_rotate]{transform_rotate()}}, -\code{\link[=transform_to_tensor]{transform_to_tensor()}}, -\code{\link[=transform_vflip]{transform_vflip()}} +Other unitary_transforms: +\code{\link{transform_adjust_brightness}()}, +\code{\link{transform_adjust_contrast}()}, +\code{\link{transform_adjust_gamma}()}, +\code{\link{transform_adjust_hue}()}, +\code{\link{transform_adjust_saturation}()}, +\code{\link{transform_affine}()}, +\code{\link{transform_center_crop}()}, +\code{\link{transform_convert_image_dtype}()}, +\code{\link{transform_crop}()}, +\code{\link{transform_grayscale}()}, +\code{\link{transform_hflip}()}, +\code{\link{transform_linear_transformation}()}, +\code{\link{transform_normalize}()}, +\code{\link{transform_pad}()}, +\code{\link{transform_perspective}()}, +\code{\link{transform_rgb_to_grayscale}()}, +\code{\link{transform_rotate}()}, +\code{\link{transform_to_tensor}()}, +\code{\link{transform_vflip}()} } \concept{unitary_transforms} diff --git a/man/transform_resized_crop.Rd b/man/transform_resized_crop.Rd index 243a698d..15c07371 100644 --- a/man/transform_resized_crop.Rd +++ b/man/transform_resized_crop.Rd @@ -26,17 +26,17 @@ i.e, if height > width, then image will be rescaled to \item{interpolation}{(int, optional) Desired interpolation. An integer \code{0 = nearest}, \code{2 = bilinear}, and \code{3 = bicubic} or a name from -\code{\link[magick:filter_types]{magick::filter_types()}}.} +\code{\link[magick:options]{magick::filter_types()}}.} } \description{ Crop an image and resize it to a desired size } \seealso{ -Other combining_transforms: -\code{\link[=transform_five_crop]{transform_five_crop()}}, -\code{\link[=transform_random_apply]{transform_random_apply()}}, -\code{\link[=transform_random_choice]{transform_random_choice()}}, -\code{\link[=transform_random_order]{transform_random_order()}}, -\code{\link[=transform_ten_crop]{transform_ten_crop()}} +Other combining_transforms: +\code{\link{transform_five_crop}()}, +\code{\link{transform_random_apply}()}, +\code{\link{transform_random_choice}()}, +\code{\link{transform_random_order}()}, +\code{\link{transform_ten_crop}()} } \concept{combining_transforms} diff --git a/man/transform_rgb_to_grayscale.Rd b/man/transform_rgb_to_grayscale.Rd index 44f71ca2..ecadf725 100644 --- a/man/transform_rgb_to_grayscale.Rd +++ b/man/transform_rgb_to_grayscale.Rd @@ -14,25 +14,25 @@ For RGB to Grayscale conversion, ITU-R 601-2 luma transform is performed which is L = R * 0.2989 + G * 0.5870 + B * 0.1140 } \seealso{ -Other unitary_transforms: -\code{\link[=transform_adjust_brightness]{transform_adjust_brightness()}}, -\code{\link[=transform_adjust_contrast]{transform_adjust_contrast()}}, -\code{\link[=transform_adjust_gamma]{transform_adjust_gamma()}}, -\code{\link[=transform_adjust_hue]{transform_adjust_hue()}}, -\code{\link[=transform_adjust_saturation]{transform_adjust_saturation()}}, -\code{\link[=transform_affine]{transform_affine()}}, -\code{\link[=transform_center_crop]{transform_center_crop()}}, -\code{\link[=transform_convert_image_dtype]{transform_convert_image_dtype()}}, -\code{\link[=transform_crop]{transform_crop()}}, -\code{\link[=transform_grayscale]{transform_grayscale()}}, -\code{\link[=transform_hflip]{transform_hflip()}}, -\code{\link[=transform_linear_transformation]{transform_linear_transformation()}}, -\code{\link[=transform_normalize]{transform_normalize()}}, -\code{\link[=transform_pad]{transform_pad()}}, -\code{\link[=transform_perspective]{transform_perspective()}}, -\code{\link[=transform_resize]{transform_resize()}}, -\code{\link[=transform_rotate]{transform_rotate()}}, -\code{\link[=transform_to_tensor]{transform_to_tensor()}}, -\code{\link[=transform_vflip]{transform_vflip()}} +Other unitary_transforms: +\code{\link{transform_adjust_brightness}()}, +\code{\link{transform_adjust_contrast}()}, +\code{\link{transform_adjust_gamma}()}, +\code{\link{transform_adjust_hue}()}, +\code{\link{transform_adjust_saturation}()}, +\code{\link{transform_affine}()}, +\code{\link{transform_center_crop}()}, +\code{\link{transform_convert_image_dtype}()}, +\code{\link{transform_crop}()}, +\code{\link{transform_grayscale}()}, +\code{\link{transform_hflip}()}, +\code{\link{transform_linear_transformation}()}, +\code{\link{transform_normalize}()}, +\code{\link{transform_pad}()}, +\code{\link{transform_perspective}()}, +\code{\link{transform_resize}()}, +\code{\link{transform_rotate}()}, +\code{\link{transform_to_tensor}()}, +\code{\link{transform_vflip}()} } \concept{unitary_transforms} diff --git a/man/transform_rotate.Rd b/man/transform_rotate.Rd index efe95fc3..96498c4d 100644 --- a/man/transform_rotate.Rd +++ b/man/transform_rotate.Rd @@ -43,25 +43,25 @@ value for the area outside the transform in the output image is always 0.} Angular rotation of an image } \seealso{ -Other unitary_transforms: -\code{\link[=transform_adjust_brightness]{transform_adjust_brightness()}}, -\code{\link[=transform_adjust_contrast]{transform_adjust_contrast()}}, -\code{\link[=transform_adjust_gamma]{transform_adjust_gamma()}}, -\code{\link[=transform_adjust_hue]{transform_adjust_hue()}}, -\code{\link[=transform_adjust_saturation]{transform_adjust_saturation()}}, -\code{\link[=transform_affine]{transform_affine()}}, -\code{\link[=transform_center_crop]{transform_center_crop()}}, -\code{\link[=transform_convert_image_dtype]{transform_convert_image_dtype()}}, -\code{\link[=transform_crop]{transform_crop()}}, -\code{\link[=transform_grayscale]{transform_grayscale()}}, -\code{\link[=transform_hflip]{transform_hflip()}}, -\code{\link[=transform_linear_transformation]{transform_linear_transformation()}}, -\code{\link[=transform_normalize]{transform_normalize()}}, -\code{\link[=transform_pad]{transform_pad()}}, -\code{\link[=transform_perspective]{transform_perspective()}}, -\code{\link[=transform_resize]{transform_resize()}}, -\code{\link[=transform_rgb_to_grayscale]{transform_rgb_to_grayscale()}}, -\code{\link[=transform_to_tensor]{transform_to_tensor()}}, -\code{\link[=transform_vflip]{transform_vflip()}} +Other unitary_transforms: +\code{\link{transform_adjust_brightness}()}, +\code{\link{transform_adjust_contrast}()}, +\code{\link{transform_adjust_gamma}()}, +\code{\link{transform_adjust_hue}()}, +\code{\link{transform_adjust_saturation}()}, +\code{\link{transform_affine}()}, +\code{\link{transform_center_crop}()}, +\code{\link{transform_convert_image_dtype}()}, +\code{\link{transform_crop}()}, +\code{\link{transform_grayscale}()}, +\code{\link{transform_hflip}()}, +\code{\link{transform_linear_transformation}()}, +\code{\link{transform_normalize}()}, +\code{\link{transform_pad}()}, +\code{\link{transform_perspective}()}, +\code{\link{transform_resize}()}, +\code{\link{transform_rgb_to_grayscale}()}, +\code{\link{transform_to_tensor}()}, +\code{\link{transform_vflip}()} } \concept{unitary_transforms} diff --git a/man/transform_ten_crop.Rd b/man/transform_ten_crop.Rd index b7c88253..acd35f76 100644 --- a/man/transform_ten_crop.Rd +++ b/man/transform_ten_crop.Rd @@ -24,11 +24,11 @@ returns a tuple of images and there may be a mismatch in the number of inputs and targets your Dataset returns. } \seealso{ -Other combining_transforms: -\code{\link[=transform_five_crop]{transform_five_crop()}}, -\code{\link[=transform_random_apply]{transform_random_apply()}}, -\code{\link[=transform_random_choice]{transform_random_choice()}}, -\code{\link[=transform_random_order]{transform_random_order()}}, -\code{\link[=transform_resized_crop]{transform_resized_crop()}} +Other combining_transforms: +\code{\link{transform_five_crop}()}, +\code{\link{transform_random_apply}()}, +\code{\link{transform_random_choice}()}, +\code{\link{transform_random_order}()}, +\code{\link{transform_resized_crop}()} } \concept{combining_transforms} diff --git a/man/transform_to_tensor.Rd b/man/transform_to_tensor.Rd index dcf169d7..7fd3a4da 100644 --- a/man/transform_to_tensor.Rd +++ b/man/transform_to_tensor.Rd @@ -19,25 +19,25 @@ Because the input image is scaled to \verb{[0.0, 1.0]}, this transformation should not be used when transforming target image masks. } \seealso{ -Other unitary_transforms: -\code{\link[=transform_adjust_brightness]{transform_adjust_brightness()}}, -\code{\link[=transform_adjust_contrast]{transform_adjust_contrast()}}, -\code{\link[=transform_adjust_gamma]{transform_adjust_gamma()}}, -\code{\link[=transform_adjust_hue]{transform_adjust_hue()}}, -\code{\link[=transform_adjust_saturation]{transform_adjust_saturation()}}, -\code{\link[=transform_affine]{transform_affine()}}, -\code{\link[=transform_center_crop]{transform_center_crop()}}, -\code{\link[=transform_convert_image_dtype]{transform_convert_image_dtype()}}, -\code{\link[=transform_crop]{transform_crop()}}, -\code{\link[=transform_grayscale]{transform_grayscale()}}, -\code{\link[=transform_hflip]{transform_hflip()}}, -\code{\link[=transform_linear_transformation]{transform_linear_transformation()}}, -\code{\link[=transform_normalize]{transform_normalize()}}, -\code{\link[=transform_pad]{transform_pad()}}, -\code{\link[=transform_perspective]{transform_perspective()}}, -\code{\link[=transform_resize]{transform_resize()}}, -\code{\link[=transform_rgb_to_grayscale]{transform_rgb_to_grayscale()}}, -\code{\link[=transform_rotate]{transform_rotate()}}, -\code{\link[=transform_vflip]{transform_vflip()}} +Other unitary_transforms: +\code{\link{transform_adjust_brightness}()}, +\code{\link{transform_adjust_contrast}()}, +\code{\link{transform_adjust_gamma}()}, +\code{\link{transform_adjust_hue}()}, +\code{\link{transform_adjust_saturation}()}, +\code{\link{transform_affine}()}, +\code{\link{transform_center_crop}()}, +\code{\link{transform_convert_image_dtype}()}, +\code{\link{transform_crop}()}, +\code{\link{transform_grayscale}()}, +\code{\link{transform_hflip}()}, +\code{\link{transform_linear_transformation}()}, +\code{\link{transform_normalize}()}, +\code{\link{transform_pad}()}, +\code{\link{transform_perspective}()}, +\code{\link{transform_resize}()}, +\code{\link{transform_rgb_to_grayscale}()}, +\code{\link{transform_rotate}()}, +\code{\link{transform_vflip}()} } \concept{unitary_transforms} diff --git a/man/transform_vflip.Rd b/man/transform_vflip.Rd index 2b28faad..416df8ed 100644 --- a/man/transform_vflip.Rd +++ b/man/transform_vflip.Rd @@ -13,25 +13,25 @@ transform_vflip(img) Vertically flip a PIL Image or Tensor } \seealso{ -Other unitary_transforms: -\code{\link[=transform_adjust_brightness]{transform_adjust_brightness()}}, -\code{\link[=transform_adjust_contrast]{transform_adjust_contrast()}}, -\code{\link[=transform_adjust_gamma]{transform_adjust_gamma()}}, -\code{\link[=transform_adjust_hue]{transform_adjust_hue()}}, -\code{\link[=transform_adjust_saturation]{transform_adjust_saturation()}}, -\code{\link[=transform_affine]{transform_affine()}}, -\code{\link[=transform_center_crop]{transform_center_crop()}}, -\code{\link[=transform_convert_image_dtype]{transform_convert_image_dtype()}}, -\code{\link[=transform_crop]{transform_crop()}}, -\code{\link[=transform_grayscale]{transform_grayscale()}}, -\code{\link[=transform_hflip]{transform_hflip()}}, -\code{\link[=transform_linear_transformation]{transform_linear_transformation()}}, -\code{\link[=transform_normalize]{transform_normalize()}}, -\code{\link[=transform_pad]{transform_pad()}}, -\code{\link[=transform_perspective]{transform_perspective()}}, -\code{\link[=transform_resize]{transform_resize()}}, -\code{\link[=transform_rgb_to_grayscale]{transform_rgb_to_grayscale()}}, -\code{\link[=transform_rotate]{transform_rotate()}}, -\code{\link[=transform_to_tensor]{transform_to_tensor()}} +Other unitary_transforms: +\code{\link{transform_adjust_brightness}()}, +\code{\link{transform_adjust_contrast}()}, +\code{\link{transform_adjust_gamma}()}, +\code{\link{transform_adjust_hue}()}, +\code{\link{transform_adjust_saturation}()}, +\code{\link{transform_affine}()}, +\code{\link{transform_center_crop}()}, +\code{\link{transform_convert_image_dtype}()}, +\code{\link{transform_crop}()}, +\code{\link{transform_grayscale}()}, +\code{\link{transform_hflip}()}, +\code{\link{transform_linear_transformation}()}, +\code{\link{transform_normalize}()}, +\code{\link{transform_pad}()}, +\code{\link{transform_perspective}()}, +\code{\link{transform_resize}()}, +\code{\link{transform_rgb_to_grayscale}()}, +\code{\link{transform_rotate}()}, +\code{\link{transform_to_tensor}()} } \concept{unitary_transforms} diff --git a/man/vggface2_dataset.Rd b/man/vggface2_dataset.Rd index 272a511b..d84d77cc 100644 --- a/man/vggface2_dataset.Rd +++ b/man/vggface2_dataset.Rd @@ -60,20 +60,20 @@ ds$classes[item$y] # list(name=..., gender=...) } \seealso{ -Other classification_dataset: +Other classification_dataset: \code{\link{caltech_dataset}}, -\code{\link[=cifar10_dataset]{cifar10_dataset()}}, -\code{\link[=eurosat_dataset]{eurosat_dataset()}}, -\code{\link[=fer_dataset]{fer_dataset()}}, -\code{\link[=fgvc_aircraft_dataset]{fgvc_aircraft_dataset()}}, -\code{\link[=flowers102_dataset]{flowers102_dataset()}}, -\code{\link[=image_folder_dataset]{image_folder_dataset()}}, +\code{\link{cifar10_dataset}()}, +\code{\link{eurosat_dataset}()}, +\code{\link{fer_dataset}()}, +\code{\link{fgvc_aircraft_dataset}()}, +\code{\link{flowers102_dataset}()}, +\code{\link{image_folder_dataset}()}, \code{\link{lfw_dataset}}, -\code{\link[=mnist_dataset]{mnist_dataset()}}, -\code{\link[=oxfordiiitpet_dataset]{oxfordiiitpet_dataset()}}, -\code{\link[=places365_dataset]{places365_dataset()}}, -\code{\link[=tiny_imagenet_dataset]{tiny_imagenet_dataset()}}, -\code{\link[=whoi_plankton_dataset]{whoi_plankton_dataset()}}, -\code{\link[=whoi_small_coralnet_dataset]{whoi_small_coralnet_dataset()}} +\code{\link{mnist_dataset}()}, +\code{\link{oxfordiiitpet_dataset}()}, +\code{\link{places365_dataset}()}, +\code{\link{tiny_imagenet_dataset}()}, +\code{\link{whoi_plankton_dataset}()}, +\code{\link{whoi_small_coralnet_dataset}()} } \concept{classification_dataset} diff --git a/man/vision_make_grid.Rd b/man/vision_make_grid.Rd index e69d7ba7..cfd3889f 100644 --- a/man/vision_make_grid.Rd +++ b/man/vision_make_grid.Rd @@ -31,11 +31,11 @@ Arranges a batch B of (image) tensors in a grid, with optional padding between images. Expects a 4d mini-batch tensor of shape (B x C x H x W). } \seealso{ -Other image display: -\code{\link[=draw_bounding_boxes]{draw_bounding_boxes()}}, -\code{\link[=draw_keypoints]{draw_keypoints()}}, -\code{\link[=draw_segmentation_masks]{draw_segmentation_masks()}}, -\code{\link[=tensor_image_browse]{tensor_image_browse()}}, -\code{\link[=tensor_image_display]{tensor_image_display()}} +Other image display: +\code{\link{draw_bounding_boxes}()}, +\code{\link{draw_keypoints}()}, +\code{\link{draw_segmentation_masks}()}, +\code{\link{tensor_image_browse}()}, +\code{\link{tensor_image_display}()} } \concept{image display} diff --git a/man/whoi_plankton_dataset.Rd b/man/whoi_plankton_dataset.Rd index e932b6fc..30ced205 100644 --- a/man/whoi_plankton_dataset.Rd +++ b/man/whoi_plankton_dataset.Rd @@ -76,20 +76,20 @@ first_item$y # id of the plankton class. } \seealso{ -Other classification_dataset: +Other classification_dataset: \code{\link{caltech_dataset}}, -\code{\link[=cifar10_dataset]{cifar10_dataset()}}, -\code{\link[=eurosat_dataset]{eurosat_dataset()}}, -\code{\link[=fer_dataset]{fer_dataset()}}, -\code{\link[=fgvc_aircraft_dataset]{fgvc_aircraft_dataset()}}, -\code{\link[=flowers102_dataset]{flowers102_dataset()}}, -\code{\link[=image_folder_dataset]{image_folder_dataset()}}, +\code{\link{cifar10_dataset}()}, +\code{\link{eurosat_dataset}()}, +\code{\link{fer_dataset}()}, +\code{\link{fgvc_aircraft_dataset}()}, +\code{\link{flowers102_dataset}()}, +\code{\link{image_folder_dataset}()}, \code{\link{lfw_dataset}}, -\code{\link[=mnist_dataset]{mnist_dataset()}}, -\code{\link[=oxfordiiitpet_dataset]{oxfordiiitpet_dataset()}}, -\code{\link[=places365_dataset]{places365_dataset()}}, -\code{\link[=tiny_imagenet_dataset]{tiny_imagenet_dataset()}}, -\code{\link[=vggface2_dataset]{vggface2_dataset()}}, -\code{\link[=whoi_small_coralnet_dataset]{whoi_small_coralnet_dataset()}} +\code{\link{mnist_dataset}()}, +\code{\link{oxfordiiitpet_dataset}()}, +\code{\link{places365_dataset}()}, +\code{\link{tiny_imagenet_dataset}()}, +\code{\link{vggface2_dataset}()}, +\code{\link{whoi_small_coralnet_dataset}()} } \concept{classification_dataset} diff --git a/man/whoi_small_coralnet_dataset.Rd b/man/whoi_small_coralnet_dataset.Rd index 7efd8ba7..4a7adc0b 100644 --- a/man/whoi_small_coralnet_dataset.Rd +++ b/man/whoi_small_coralnet_dataset.Rd @@ -27,20 +27,20 @@ and produced by \href{https://coralnet.ucsd.edu}{CoralNet}, a resource for benthic images classification. } \seealso{ -Other classification_dataset: +Other classification_dataset: \code{\link{caltech_dataset}}, -\code{\link[=cifar10_dataset]{cifar10_dataset()}}, -\code{\link[=eurosat_dataset]{eurosat_dataset()}}, -\code{\link[=fer_dataset]{fer_dataset()}}, -\code{\link[=fgvc_aircraft_dataset]{fgvc_aircraft_dataset()}}, -\code{\link[=flowers102_dataset]{flowers102_dataset()}}, -\code{\link[=image_folder_dataset]{image_folder_dataset()}}, +\code{\link{cifar10_dataset}()}, +\code{\link{eurosat_dataset}()}, +\code{\link{fer_dataset}()}, +\code{\link{fgvc_aircraft_dataset}()}, +\code{\link{flowers102_dataset}()}, +\code{\link{image_folder_dataset}()}, \code{\link{lfw_dataset}}, -\code{\link[=mnist_dataset]{mnist_dataset()}}, -\code{\link[=oxfordiiitpet_dataset]{oxfordiiitpet_dataset()}}, -\code{\link[=places365_dataset]{places365_dataset()}}, -\code{\link[=tiny_imagenet_dataset]{tiny_imagenet_dataset()}}, -\code{\link[=vggface2_dataset]{vggface2_dataset()}}, -\code{\link[=whoi_plankton_dataset]{whoi_plankton_dataset()}} +\code{\link{mnist_dataset}()}, +\code{\link{oxfordiiitpet_dataset}()}, +\code{\link{places365_dataset}()}, +\code{\link{tiny_imagenet_dataset}()}, +\code{\link{vggface2_dataset}()}, +\code{\link{whoi_plankton_dataset}()} } \concept{classification_dataset} diff --git a/tests/testthat/test-models-rfdetr.R b/tests/testthat/test-models-rfdetr.R new file mode 100644 index 00000000..00ae8ef6 --- /dev/null +++ b/tests/testthat/test-models-rfdetr.R @@ -0,0 +1,195 @@ +test_that("tests for non-pretrained model_rfdetr_nano", { + model <- model_rfdetr_nano() + input <- torch::torch_randn(1, 3, 384, 384) + model$eval() + out <- model(input) + expect_tensor_shape(out$pred_logits, c(1, 300, 91)) + expect_tensor_shape(out$pred_boxes, c(1, 300, 4)) + + rm(model) + gc() +}) + +test_that("tests for pretrained model_rfdetr_nano", { + + skip_if(Sys.getenv("TEST_LARGE_MODELS", unset = 0) != 1, + "Skipping test: set TEST_LARGE_MODELS=1 to enable tests requiring large downloads.") + + model <- model_rfdetr_nano(pretrained = TRUE) + input <- torch::torch_randn(1, 3, 384, 384) + model$eval() + out <- model(input) + expect_tensor_shape(out$pred_logits, c(1, 300, 91)) + expect_tensor_shape(out$pred_boxes, c(1, 300, 4)) + + rm(model) + gc() +}) + +test_that("tests for non-pretrained model_rfdetr_small", { + model <- model_rfdetr_small() + input <- torch::torch_randn(1, 3, 512, 512) + model$eval() + out <- model(input) + expect_tensor_shape(out$pred_logits, c(1, 300, 91)) + expect_tensor_shape(out$pred_boxes, c(1, 300, 4)) + + rm(model) + gc() +}) + +test_that("tests for pretrained model_rfdetr_small", { + + skip_if(Sys.getenv("TEST_LARGE_MODELS", unset = 0) != 1, + "Skipping test: set TEST_LARGE_MODELS=1 to enable tests requiring large downloads.") + + model <- model_rfdetr_small(pretrained = TRUE) + input <- torch::torch_randn(1, 3, 512, 512) + model$eval() + out <- model(input) + expect_tensor_shape(out$pred_logits, c(1, 300, 91)) + expect_tensor_shape(out$pred_boxes, c(1, 300, 4)) + + rm(model) + gc() +}) + +test_that("tests for non-pretrained model_rfdetr_medium", { + model <- model_rfdetr_medium() + input <- torch::torch_randn(1, 3, 640, 640) + model$eval() + out <- model(input) + expect_tensor_shape(out$pred_logits, c(1, 300, 91)) + expect_tensor_shape(out$pred_boxes, c(1, 300, 4)) + + rm(model) + gc() +}) + +test_that("tests for pretrained model_rfdetr_medium", { + + skip_if(Sys.getenv("TEST_LARGE_MODELS", unset = 0) != 1, + "Skipping test: set TEST_LARGE_MODELS=1 to enable tests requiring large downloads.") + + model <- model_rfdetr_medium(pretrained = TRUE) + input <- torch::torch_randn(1, 3, 640, 640) + model$eval() + out <- model(input) + expect_tensor_shape(out$pred_logits, c(1, 300, 91)) + expect_tensor_shape(out$pred_boxes, c(1, 300, 4)) + + rm(model) + gc() +}) + +test_that("tests for non-pretrained model_rfdetr_base", { + model <- model_rfdetr_base() + input <- torch::torch_randn(1, 3, 640, 640) + model$eval() + out <- model(input) + expect_tensor_shape(out$pred_logits, c(1, 300, 91)) + expect_tensor_shape(out$pred_boxes, c(1, 300, 4)) + + rm(model) + gc() +}) + +test_that("tests for pretrained model_rfdetr_base", { + + skip_if(Sys.getenv("TEST_LARGE_MODELS", unset = 0) != 1, + "Skipping test: set TEST_LARGE_MODELS=1 to enable tests requiring large downloads.") + + model <- model_rfdetr_base(pretrained = TRUE) + input <- torch::torch_randn(1, 3, 640, 640) + model$eval() + out <- model(input) + expect_tensor_shape(out$pred_logits, c(1, 300, 91)) + expect_tensor_shape(out$pred_boxes, c(1, 300, 4)) + + rm(model) + gc() +}) + +test_that("tests for non-pretrained model_rfdetr_base_2", { + model <- model_rfdetr_base_2() + input <- torch::torch_randn(1, 3, 640, 640) + model$eval() + out <- model(input) + expect_tensor_shape(out$pred_logits, c(1, 300, 91)) + expect_tensor_shape(out$pred_boxes, c(1, 300, 4)) + + rm(model) + gc() +}) + +test_that("tests for pretrained model_rfdetr_base_2", { + + skip_if(Sys.getenv("TEST_LARGE_MODELS", unset = 0) != 1, + "Skipping test: set TEST_LARGE_MODELS=1 to enable tests requiring large downloads.") + + model <- model_rfdetr_base_2(pretrained = TRUE) + input <- torch::torch_randn(1, 3, 640, 640) + model$eval() + out <- model(input) + expect_tensor_shape(out$pred_logits, c(1, 300, 91)) + expect_tensor_shape(out$pred_boxes, c(1, 300, 4)) + + rm(model) + gc() +}) + +test_that("tests for non-pretrained model_rfdetr_base_o365", { + model <- model_rfdetr_base_o365() + input <- torch::torch_randn(1, 3, 640, 640) + model$eval() + out <- model(input) + expect_tensor_shape(out$pred_logits, c(1, 300, 366)) + expect_tensor_shape(out$pred_boxes, c(1, 300, 4)) + + rm(model) + gc() +}) + +test_that("tests for pretrained model_rfdetr_base_o365", { + + skip_if(Sys.getenv("TEST_LARGE_MODELS", unset = 0) != 1, + "Skipping test: set TEST_LARGE_MODELS=1 to enable tests requiring large downloads.") + + model <- model_rfdetr_base_o365(pretrained = TRUE) + input <- torch::torch_randn(1, 3, 640, 640) + model$eval() + out <- model(input) + expect_tensor_shape(out$pred_logits, c(1, 300, 366)) + expect_tensor_shape(out$pred_boxes, c(1, 300, 4)) + + rm(model) + gc() +}) + +test_that("tests for non-pretrained model_rfdetr_large", { + model <- model_rfdetr_large() + input <- torch::torch_randn(1, 3, 560, 560) + model$eval() + out <- model(input) + expect_tensor_shape(out$pred_logits, c(1, 300, 91)) + expect_tensor_shape(out$pred_boxes, c(1, 300, 4)) + + rm(model) + gc() +}) + +test_that("tests for pretrained model_rfdetr_large", { + + skip_if(Sys.getenv("TEST_LARGE_MODELS", unset = 0) != 1, + "Skipping test: set TEST_LARGE_MODELS=1 to enable tests requiring large downloads.") + + model <- model_rfdetr_large(pretrained = TRUE) + input <- torch::torch_randn(1, 3, 560, 560) + model$eval() + out <- model(input) + expect_tensor_shape(out$pred_logits, c(1, 300, 91)) + expect_tensor_shape(out$pred_boxes, c(1, 300, 4)) + + rm(model) + gc() +})