Quantcast
Channel: Count and summation of positive and negative number sequences - Stack Overflow
Viewing all articles
Browse latest Browse all 15

Answer by GoGonzo for Count and summation of positive and negative number sequences

$
0
0

I recommend R package runner for this kind of operations.streak_run calculates consecutive occurrence of the same value, and sum_run calculates sum in window which length is defined by k argument.

Here is solution:

set.seed(100)x <- round(rnorm(20, sd = 0.02), 3)n_of_sequence <- runner::streak_run(x > 0)sum <- runner::sum_run(x, k = n_of_sequence)data.frame(x, n_of_sequence, sum)#         x n_of_sequence    sum# 1  -0.010             1 -0.010# 2   0.003             1  0.003# 3  -0.002             1 -0.002# 4   0.018             1  0.018# 5   0.002             2  0.020# 6   0.006             3  0.026# 7  -0.012             1 -0.012# 8   0.014             1  0.014# 9  -0.017             1 -0.017# 10 -0.007             2 -0.024# 11  0.002             1  0.002# 12  0.002             2  0.004# 13 -0.004             1 -0.004# 14  0.015             1  0.015# 15  0.002             2  0.017# 16 -0.001             1 -0.001# 17 -0.008             2 -0.009# 18  0.010             1  0.010# 19 -0.018             1 -0.018# 20  0.046             1  0.046

Below benchmark to compare actual solutions

set.seed(0)x <- round(rnorm(10000, sd = 0.02), 3)library(runner)runner_streak <- function(x) {  n_of_sequence <- streak_run(x > 0)  sum <- sum_run(x, k = n_of_sequence)}library(data.table)dt <- data.table(x)dt_streak <- function(dt) {  dt[, c("n_of_sequence", "sum") := list(seq_len(.N), cumsum(x)),rleid(sign(x))]}rle_streak <- function(x) {  run_lengths <- rle(sign(x))$lengths  run_lengths  n_of_sequence <- run_lengths %>% map(seq) %>% unlist  start <- cumsum(c(1,run_lengths))  start <- start[-length(start)]  sum <- map2(start,run_lengths,~cumsum(x[.x:(.x+.y-1)])) %>% unlist()}library(tidyverse)df <- tibble(x = x)tv_streak <- function(x) {  res <- df %>%    mutate(seqno = cumsum(c(1, diff(sign(x)) != 0))) %>%    group_by(seqno) %>%    mutate(n_of_sequence = row_number(),           sum = cumsum(x)) %>%    ungroup() %>%     select(-seqno)  }count_and_sum <- function(x) {  runs   <- rle((x > 0) * 1)$lengths  groups <- split(x, rep(1:length(runs), runs))  output <- function(group)     data.frame(x = group, n = seq_along(group), sum = cumsum(group))  result <- as.data.frame(do.call(rbind, lapply(groups, output)))  `rownames<-`(result, 1:nrow(result))}
microbenchmark::microbenchmark(  runner_streak(x),  dt_streak(dt),  rle_streak(x),  tv_streak(df),  count_and_sum(x),  times = 100L)# Unit: milliseconds#             expr         min          lq        mean      median          uq        max neval# runner_streak(x)    4.240192    4.833563    6.321697    5.300817    6.543926   14.80221   100#    dt_streak(dt)    7.648100    8.587887   10.862806    9.650483   11.295488   34.66027   100#    rle_streak(x)   42.321506   55.397586   64.195692   63.404403   67.813738  167.71444   100#    tv_streak(df)   31.398885   36.333751   45.141452   40.800077   45.756279  163.19535   100# count_and_sum(x) 1691.438977 1919.518282 2306.036783 2149.543281 2499.951020 6158.43384   100

Viewing all articles
Browse latest Browse all 15

Trending Articles



<script src="https://jsc.adskeeper.com/r/s/rssing.com.1596347.js" async> </script>