diff --git a/src/main/java/org/cobbzilla/util/http/HttpUtil.java b/src/main/java/org/cobbzilla/util/http/HttpUtil.java index 6194349..d576abe 100644 --- a/src/main/java/org/cobbzilla/util/http/HttpUtil.java +++ b/src/main/java/org/cobbzilla/util/http/HttpUtil.java @@ -33,6 +33,7 @@ import java.net.URL; import java.net.URLDecoder; import java.nio.charset.Charset; import java.util.LinkedHashMap; +import java.util.List; import java.util.Map; import static com.google.common.net.HttpHeaders.CONTENT_DISPOSITION; @@ -562,4 +563,18 @@ public class HttpUtil { String dest = chaseRedirects("http://example.com/?", 10); System.out.println("dest = "+dest); } + + public static List applyRegexToUrl(String url, List headers, String regex, Integer group) { + final HttpRequestBean requestBean = new HttpRequestBean(GET, url).setHeaders(headers); + final HttpClientBuilder clientBuilder = requestBean.initClientBuilder(HttpClients.custom().disableRedirectHandling()); + try { + @Cleanup final CloseableHttpClient client = clientBuilder.build(); + final HttpResponseBean responseBean = HttpUtil.getResponse(requestBean, client); + return StringUtil.findAllMatches(responseBean.getEntityString(), regex, group); + } catch (Exception e) { + log.error("applyRegexToUrl: error: "+shortError(e)); + } + return null; + } + } diff --git a/src/main/java/org/cobbzilla/util/string/StringUtil.java b/src/main/java/org/cobbzilla/util/string/StringUtil.java index 4f1d009..e3b8fb4 100644 --- a/src/main/java/org/cobbzilla/util/string/StringUtil.java +++ b/src/main/java/org/cobbzilla/util/string/StringUtil.java @@ -558,4 +558,12 @@ public class StringUtil { public static String sqlEscapeAndQuote(String val) { return "'" + escapeSql(val) + "'"; } + public static List findAllMatches(String val, String regex, Integer group) { + final Pattern pattern = Pattern.compile(regex); + final Matcher matcher = pattern.matcher(val); + final List matches = new ArrayList<>(); + while (matcher.find()) matches.add(group == null ? matcher.group() : matcher.group(group)); + return matches; + } + }