Переглянути джерело

track match count

tags/2.0.1
Jonathan Cobb 5 роки тому
джерело
коміт
7f944cf93d
5 змінених файлів з 77 додано та 44 видалено
  1. +63
    -37
      src/main/java/org/cobbzilla/util/io/regex/RegexFilterReader.java
  2. +1
    -0
      src/main/java/org/cobbzilla/util/io/regex/RegexFilterResult.java
  3. +6
    -3
      src/main/java/org/cobbzilla/util/io/regex/RegexInsertionFilter.java
  4. +5
    -2
      src/main/java/org/cobbzilla/util/io/regex/RegexReplacementFilter.java
  5. +2
    -2
      src/main/java/org/cobbzilla/util/string/StringUtil.java

+ 63
- 37
src/main/java/org/cobbzilla/util/io/regex/RegexFilterReader.java Переглянути файл

@@ -12,6 +12,7 @@ import java.util.concurrent.atomic.AtomicReference;

import static org.apache.commons.lang3.ArrayUtils.addAll;
import static org.cobbzilla.util.daemon.ZillaRuntime.die;
import static org.cobbzilla.util.string.StringUtil.EMPTY_CHAR_ARRAY;

@Slf4j @Accessors(chain=true)
public class RegexFilterReader extends BufferedReader {
@@ -20,6 +21,7 @@ public class RegexFilterReader extends BufferedReader {

private final int bufsiz;
private RegexStreamFilter filter;
@Getter @Setter private Integer maxMatches;
@Getter @Setter private String name; // for debugging/identifying which reader

public RegexFilterReader(Reader in, RegexStreamFilter filter) {
@@ -43,7 +45,7 @@ public class RegexFilterReader extends BufferedReader {
}

@Getter(lazy=true) private final FilterResponse filterResponse = initFilterResponse();
private FilterResponse initFilterResponse() { return new FilterResponse(bufsiz, filter); }
private FilterResponse initFilterResponse() { return new FilterResponse(bufsiz, filter, maxMatches); }

@Override public int read() throws IOException {
final char[] c = new char[1];
@@ -105,10 +107,13 @@ public class RegexFilterReader extends BufferedReader {
private int readPos = 0;
private boolean eof = false;
private RegexStreamFilter filter;
private int matchCount = 0;
private Integer maxMatches = null;

public FilterResponse(int bufsiz, RegexStreamFilter filter) {
public FilterResponse(int bufsiz, RegexStreamFilter filter, Integer maxMatches) {
this.bufsiz = bufsiz;
this.filter = filter;
this.maxMatches = maxMatches;
}

private final AtomicReference<char[]> unprocessed = new AtomicReference<>();
@@ -118,46 +123,67 @@ public class RegexFilterReader extends BufferedReader {

this.eof = eof;

synchronized (unprocessed) {
char[] input = unprocessed.get();
if (input == null) {
input = new char[len];
System.arraycopy(b, 0, input, 0, len);
} else {
char[] new_input = new char[input.length + len];
System.arraycopy(input, 0, new_input, 0, input.length);
System.arraycopy(b, 0, new_input, input.length, len);
input = new_input;
}
unprocessed.set(input);

// process buffer with filter, it may leave a remainder
final RegexFilterResult result = filter.apply(new StringBuilder().append(input), eof);

// put unprocessed remainder chars back onto unprocessed array
if (result.remainder > 0) {
final char[] subarray = ArrayUtils.subarray(input, input.length - result.remainder, input.length);
unprocessed.set(subarray);
} else {
unprocessed.set(null);
if (maxMatches != null && matchCount >= maxMatches) {
// no more processing to do. add all unprocessed bytes to processed, and add new bytes to processed
synchronized (unprocessed) {
synchronized (processed) {
char[] u = unprocessed.get();
if (u == null) u = EMPTY_CHAR_ARRAY;
char[] p = processed.get();
if (p == null) p = EMPTY_CHAR_ARRAY;
final int newLen = p.length + u.length + len;
final char[] update = new char[newLen];
System.arraycopy(p, 0, update, 0, p.length);
System.arraycopy(u, 0, update, p.length, u.length);
System.arraycopy(b, 0, update, p.length + u.length, len);
processed.set(update);
unprocessed.set(EMPTY_CHAR_ARRAY);
return u.length + len;
}
}
} else {

synchronized (unprocessed) {
char[] input = unprocessed.get();
if (input == null) {
input = new char[len];
System.arraycopy(b, 0, input, 0, len);
} else {
char[] new_input = new char[input.length + len];
System.arraycopy(input, 0, new_input, 0, input.length);
System.arraycopy(b, 0, new_input, input.length, len);
input = new_input;
}
unprocessed.set(input);

// process buffer with filter, it may leave a remainder
final RegexFilterResult result = filter.apply(new StringBuilder().append(input), eof);
matchCount += result.matchCount;

// put unprocessed remainder chars back onto unprocessed array
if (result.remainder > 0) {
final char[] subarray = ArrayUtils.subarray(input, input.length - result.remainder, input.length);
unprocessed.set(subarray);
} else {
unprocessed.set(null);
}

// if it produced nothing, but gave us a remainder, return zero now, read more data
if (result.buffer.length() == 0 && !eof) return 0;

// remove processed chars from unprocessed array
synchronized (processed) {
final char[] newChars = result.buffer.toString().toCharArray();
if (newChars.length > 0) {
if (processed.get() == null) {
processed.set(newChars);
} else {
processed.set(addAll(processed.get(), newChars));
// if it produced nothing, but gave us a remainder, return zero now, read more data
if (result.buffer.length() == 0 && !eof) return 0;

// remove processed chars from unprocessed array
synchronized (processed) {
final char[] newChars = result.buffer.toString().toCharArray();
if (newChars.length > 0) {
if (processed.get() == null) {
processed.set(newChars);
} else {
processed.set(addAll(processed.get(), newChars));
}
}
}
return result.buffer.length();
}

return result.buffer.length();
}
}



+ 1
- 0
src/main/java/org/cobbzilla/util/io/regex/RegexFilterResult.java Переглянути файл

@@ -7,5 +7,6 @@ public class RegexFilterResult {

public StringBuilder buffer;
public int remainder;
public int matchCount;

}

+ 6
- 3
src/main/java/org/cobbzilla/util/io/regex/RegexInsertionFilter.java Переглянути файл

@@ -44,6 +44,7 @@ public class RegexInsertionFilter implements RegexStreamFilter {
final StringBuilder result = new StringBuilder(buffer.length());
int start = 0;
final Matcher matcher = get_pattern().matcher(buffer.toString());
int matchCount = 0;
while (matcher.find(start)) {

// add everything before the first match
@@ -63,12 +64,14 @@ public class RegexInsertionFilter implements RegexStreamFilter {

// advance start pointer and track last match end
start = matcher.end();

matchCount++;
}

if (eof) {
// we are at the end, include everything else, no remainder
result.append(buffer.subSequence(start, buffer.length()));
return new RegexFilterResult(result, 0);
return new RegexFilterResult(result, 0, matchCount);
}

// nothing matched
@@ -76,10 +79,10 @@ public class RegexInsertionFilter implements RegexStreamFilter {
final int totalRemainder = buffer.length() - start;
if (totalRemainder > 1024) {
result.append(buffer.subSequence(start, buffer.length()-1024));
return new RegexFilterResult(result, 1024);
return new RegexFilterResult(result, 1024, matchCount);
} else {
// leave the entire remainder, we can't be sure
return new RegexFilterResult(result, totalRemainder);
return new RegexFilterResult(result, totalRemainder, matchCount);
}
}


+ 5
- 2
src/main/java/org/cobbzilla/util/io/regex/RegexReplacementFilter.java Переглянути файл

@@ -35,6 +35,7 @@ public class RegexReplacementFilter implements RegexStreamFilter {
final StringBuilder result = new StringBuilder(buffer.length());
int start = 0;
final Matcher matcher = pattern.matcher(buffer.toString());
int matchCount = 0;
while (matcher.find(start)) {
// add everything before the first match
result.append(buffer.subSequence(start, matcher.start()));
@@ -50,12 +51,14 @@ public class RegexReplacementFilter implements RegexStreamFilter {

// advance start pointer and track last match end
start = matcher.end();

matchCount++;
}
if (eof) {
result.append(buffer.subSequence(start, buffer.length()));
return new RegexFilterResult(result, 0);
return new RegexFilterResult(result, 0, matchCount);
}
return new RegexFilterResult(result, buffer.length() - start);
return new RegexFilterResult(result, buffer.length() - start, matchCount);
}

}

+ 2
- 2
src/main/java/org/cobbzilla/util/string/StringUtil.java Переглянути файл

@@ -43,13 +43,13 @@ public class StringUtil {

public static final String EMPTY = "";
public static final String[] EMPTY_ARRAY = {};
public static final String DEFAULT_LOCALE = "en_US";
public static final String BYTES_PATTERN = "(\\d+)(\\p{javaSpaceChar}+)?([MgGgTtPpEe][Bb])";
public static final char[] EMPTY_CHAR_ARRAY = {};
public static final String CRLF = "\r\n";

public static final Transformer XFORM_TO_STRING = o -> String.valueOf(o);

public static final String[] VOWELS = {"e", "a", "o", "i", "u"};

public static boolean isVowel(String symbol) { return ArrayUtils.indexOf(VOWELS, symbol) != -1; }

public static List<String> toStringCollection (Collection c) {


Завантаження…
Відмінити
Зберегти