From dfafe62c7eb3413cf1210e40e551094458f4d9d0 Mon Sep 17 00:00:00 2001 From: Jonathan Cobb Date: Wed, 16 Sep 2020 12:25:08 -0400 Subject: [PATCH] allow StringUtil.findAllMatches to return multiple groups --- .../java/org/cobbzilla/util/http/HttpUtil.java | 8 ++++---- .../org/cobbzilla/util/string/StringUtil.java | 16 +++++++++++++--- 2 files changed, 17 insertions(+), 7 deletions(-) diff --git a/src/main/java/org/cobbzilla/util/http/HttpUtil.java b/src/main/java/org/cobbzilla/util/http/HttpUtil.java index d576abe..af2a323 100644 --- a/src/main/java/org/cobbzilla/util/http/HttpUtil.java +++ b/src/main/java/org/cobbzilla/util/http/HttpUtil.java @@ -32,6 +32,7 @@ import java.net.HttpURLConnection; import java.net.URL; import java.net.URLDecoder; import java.nio.charset.Charset; +import java.util.Collection; import java.util.LinkedHashMap; import java.util.List; import java.util.Map; @@ -49,8 +50,7 @@ import static org.cobbzilla.util.io.FileUtil.getDefaultTempDir; import static org.cobbzilla.util.json.JsonUtil.COMPACT_MAPPER; import static org.cobbzilla.util.json.JsonUtil.json; import static org.cobbzilla.util.security.CryptStream.BUFFER_SIZE; -import static org.cobbzilla.util.string.StringUtil.CRLF; -import static org.cobbzilla.util.string.StringUtil.urlEncode; +import static org.cobbzilla.util.string.StringUtil.*; import static org.cobbzilla.util.system.Sleep.sleep; import static org.cobbzilla.util.time.TimeUtil.DATE_FORMAT_LAST_MODIFIED; @@ -564,13 +564,13 @@ public class HttpUtil { System.out.println("dest = "+dest); } - public static List applyRegexToUrl(String url, List headers, String regex, Integer group) { + public static List> applyRegexToUrl(String url, List headers, String regex, Collection groups) { final HttpRequestBean requestBean = new HttpRequestBean(GET, url).setHeaders(headers); final HttpClientBuilder clientBuilder = requestBean.initClientBuilder(HttpClients.custom().disableRedirectHandling()); try { @Cleanup final CloseableHttpClient client = clientBuilder.build(); final HttpResponseBean responseBean = HttpUtil.getResponse(requestBean, client); - return StringUtil.findAllMatches(responseBean.getEntityString(), regex, group); + return findAllMatches(responseBean.getEntityString(), regex, groups); } catch (Exception e) { log.error("applyRegexToUrl: error: "+shortError(e)); } diff --git a/src/main/java/org/cobbzilla/util/string/StringUtil.java b/src/main/java/org/cobbzilla/util/string/StringUtil.java index e3b8fb4..09e21ad 100644 --- a/src/main/java/org/cobbzilla/util/string/StringUtil.java +++ b/src/main/java/org/cobbzilla/util/string/StringUtil.java @@ -558,11 +558,21 @@ public class StringUtil { public static String sqlEscapeAndQuote(String val) { return "'" + escapeSql(val) + "'"; } - public static List findAllMatches(String val, String regex, Integer group) { + public static List> findAllMatches(String val, String regex, Collection groups) { final Pattern pattern = Pattern.compile(regex); final Matcher matcher = pattern.matcher(val); - final List matches = new ArrayList<>(); - while (matcher.find()) matches.add(group == null ? matcher.group() : matcher.group(group)); + final List> matches = new ArrayList<>(); + while (matcher.find()) { + final Map match = new HashMap<>(); + if (groups == null) { + match.put(0, matcher.group()); + } else { + for (Integer group : groups) { + match.put(group, group == 0 ? matcher.group() : group > matcher.groupCount() ? null : matcher.group(group)); + } + } + matches.add(match); + } return matches; }