diff --git a/bubble-server/src/main/java/bubble/ApiConstants.java b/bubble-server/src/main/java/bubble/ApiConstants.java index 70b56700..379f2d14 100644 --- a/bubble-server/src/main/java/bubble/ApiConstants.java +++ b/bubble-server/src/main/java/bubble/ApiConstants.java @@ -253,6 +253,7 @@ public class ApiConstants { public static final String EP_UPGRADE = "/upgrade"; public static final String EP_LOGS = "/logs"; public static final String EP_FOLLOW = "/follow"; + public static final String EP_FOLLOW_AND_APPLY_REGEX = "/followAndApplyRegex"; public static final String DETECT_ENDPOINT = "/detect"; public static final String EP_LOCALE = "/locale"; diff --git a/bubble-server/src/main/java/bubble/resources/stream/FilterHttpResource.java b/bubble-server/src/main/java/bubble/resources/stream/FilterHttpResource.java index 3525f5ea..d68c43a0 100644 --- a/bubble-server/src/main/java/bubble/resources/stream/FilterHttpResource.java +++ b/bubble-server/src/main/java/bubble/resources/stream/FilterHttpResource.java @@ -34,13 +34,12 @@ import bubble.service.stream.StandardRuleEngineService; import com.fasterxml.jackson.databind.JsonNode; import lombok.Getter; import lombok.extern.slf4j.Slf4j; +import org.apache.commons.lang3.ArrayUtils; import org.cobbzilla.util.collection.ExpirationEvictionPolicy; import org.cobbzilla.util.collection.ExpirationMap; import org.cobbzilla.util.collection.NameAndValue; import org.cobbzilla.util.http.HttpContentEncodingType; -import org.cobbzilla.util.http.HttpUtil; import org.cobbzilla.util.network.NetworkUtil; -import org.cobbzilla.util.string.StringUtil; import org.cobbzilla.wizard.cache.redis.RedisService; import org.glassfish.grizzly.http.server.Request; import org.glassfish.jersey.server.ContainerRequest; @@ -64,15 +63,16 @@ import static bubble.service.stream.StandardRuleEngineService.MATCHERS_CACHE_TIM import static com.google.common.net.HttpHeaders.CONTENT_SECURITY_POLICY; import static java.util.Collections.emptyMap; import static java.util.concurrent.TimeUnit.*; -import static javax.ws.rs.core.HttpHeaders.CONTENT_LENGTH; +import static org.apache.http.HttpHeaders.*; import static org.cobbzilla.util.collection.ArrayUtil.arrayToString; import static org.cobbzilla.util.daemon.ZillaRuntime.*; import static org.cobbzilla.util.http.HttpContentTypes.APPLICATION_JSON; import static org.cobbzilla.util.http.HttpContentTypes.TEXT_PLAIN; import static org.cobbzilla.util.http.HttpUtil.applyRegexToUrl; -import static org.cobbzilla.util.json.JsonUtil.COMPACT_MAPPER; -import static org.cobbzilla.util.json.JsonUtil.json; +import static org.cobbzilla.util.http.HttpUtil.chaseRedirects; +import static org.cobbzilla.util.json.JsonUtil.*; import static org.cobbzilla.util.network.NetworkUtil.isLocalIpv4; +import static org.cobbzilla.util.security.ShaUtil.sha256_hex; import static org.cobbzilla.util.string.StringUtil.trimQuotes; import static org.cobbzilla.wizard.cache.redis.RedisService.EX; import static org.cobbzilla.wizard.model.NamedEntity.names; @@ -446,8 +446,17 @@ public class FilterHttpResource { public Response flushCaches(@Context ContainerRequest request) { final Account caller = userPrincipal(request); if (!caller.admin()) return forbidden(); + + final int connCheckMatcherCacheSize = connCheckMatcherCache.size(); connCheckMatcherCache.clear(); - return ok(ruleEngine.flushCaches()); + + // disable redirect flushing for now -- it works well and it's a lot of work + // final Long redirectCacheSize = getRedirectCache().del_matching("*"); + + final Map flushes = ruleEngine.flushCaches(); + flushes.put("connCheckMatchersCache", connCheckMatcherCacheSize); + // flushes.put("redirectCache", redirectCacheSize == null ? 0 : redirectCacheSize); + return ok(flushes); } @DELETE @Path(EP_MATCHERS) @@ -690,40 +699,68 @@ public class FilterHttpResource { return ok_empty(); } - private final Map redirectCache - = new ExpirationMap<>(1000, DAYS.toMillis(3), ExpirationEvictionPolicy.atime); + public static final String REDIS_PREFIX_REDIRECT_CACHE = "followLink_"; + @Getter(lazy=true) private final RedisService redirectCache = redis.prefixNamespace(REDIS_PREFIX_REDIRECT_CACHE); @POST @Path(EP_FOLLOW+"/{requestId}") + @Consumes(APPLICATION_JSON) @Produces(TEXT_PLAIN) public Response followLink(@Context Request req, @Context ContainerRequest ctx, @PathParam("requestId") String requestId, - JsonNode followSpec) { + JsonNode urlNode) { final FilterSubContext filterCtx = new FilterSubContext(req, requestId); + final RedisService cache = getRedirectCache(); + final String url = urlNode.textValue(); + final String cacheKey = sha256_hex(url); + final String cachedValue = cache.get(cacheKey); + if (cachedValue != null) return ok(cachedValue); + + final String result = chaseRedirects(url); + cache.set(cacheKey, result, EX, DAYS.toMillis(365)); + return ok(result); + } - // is this a request to parse regexes from a URL? - if (followSpec.has("regex")) { - return ok(redirectCache.computeIfAbsent(json(followSpec), k -> { - final String url = followSpec.get("url").textValue(); - final String regex = followSpec.get("regex").textValue(); - final Integer group = followSpec.has("group") ? followSpec.get("group").asInt() : null; - final List headers = new ArrayList<>(); - for (String name : req.getHeaderNames()) { - final String value = req.getHeader(name); - headers.add(new NameAndValue(name, value)); - } - final List matches = applyRegexToUrl(url, headers, regex, group); - return matches == null ? null : StringUtil.toString(matches, "\n"); - })); + public static final String CLIENT_HEADER_PREFIX = "X-Bubble-Client-Header-"; - } else if (followSpec.isTextual()) { - // just a regular follow -- chase redirects - return ok(redirectCache.computeIfAbsent(followSpec.textValue(), HttpUtil::chaseRedirects)); - } else { - final String json = json(followSpec); - log.error("followLink: invalid json (expected String or {regex, url}): "+json); - return notFound(json); + public static final String[] EXCLUDED_CLIENT_HEADERS = { + ACCEPT.toLowerCase(), + CONTENT_TYPE.toLowerCase(), CONTENT_LENGTH.toLowerCase(), + CONTENT_ENCODING.toLowerCase(), TRANSFER_ENCODING.toLowerCase() + }; + + @POST @Path(EP_FOLLOW_AND_APPLY_REGEX+"/{requestId}") + @Consumes(APPLICATION_JSON) + @Produces(APPLICATION_JSON) + public Response followLinkThenApplyRegex(@Context Request req, + @Context ContainerRequest ctx, + @PathParam("requestId") String requestId, + FollowThenApplyRegex follow) { + final FilterSubContext filterCtx = new FilterSubContext(req, requestId); + final RedisService cache = getRedirectCache(); + final String followJson = json(follow); + final String cacheKey = sha256_hex(followJson); + final String cachedValue = cache.get(cacheKey); + if (cachedValue != null) return ok(cachedValue); + + // collect client headers + final List headers = new ArrayList<>(); + for (String name : req.getHeaderNames()) { + if (name.toLowerCase().startsWith(CLIENT_HEADER_PREFIX.toLowerCase())) { + final String value = req.getHeader(name); + final String realName = name.substring(CLIENT_HEADER_PREFIX.length()); + if (ArrayUtils.indexOf(EXCLUDED_CLIENT_HEADERS, realName.toLowerCase()) == -1) { + headers.add(new NameAndValue(realName, value)); + } + } } + headers.add(new NameAndValue(ACCEPT, "*/*")); + final List> matches + = applyRegexToUrl(follow.getUrl(), headers, follow.getRegex(), Arrays.asList(follow.getGroups())); + if (log.isWarnEnabled()) log.warn("followLink(" + follow.getUrl() + ") returning: " + json(matches)); + final String result = matches == null ? EMPTY_JSON_ARRAY : json(matches); + cache.set(cacheKey, result, EX, DAYS.toMillis(365)); + return ok(result); } @Path(EP_ASSETS+"/{requestId}/{appId}") diff --git a/bubble-server/src/main/java/bubble/resources/stream/FollowThenApplyRegex.java b/bubble-server/src/main/java/bubble/resources/stream/FollowThenApplyRegex.java new file mode 100644 index 00000000..3fed138b --- /dev/null +++ b/bubble-server/src/main/java/bubble/resources/stream/FollowThenApplyRegex.java @@ -0,0 +1,16 @@ +/** + * Copyright (c) 2020 Bubble, Inc. All rights reserved. + * For personal (non-commercial) use, see license: https://getbubblenow.com/bubble-license/ + */ +package bubble.resources.stream; + +import lombok.Getter; +import lombok.Setter; + +public class FollowThenApplyRegex { + + @Getter @Setter private String url; + @Getter @Setter private String regex; + @Getter @Setter private Integer[] groups; + +} diff --git a/bubble-server/src/main/resources/bubble/rule/RequestModifierRule_icon.js.hbs b/bubble-server/src/main/resources/bubble/rule/RequestModifierRule_icon.js.hbs index 52706e65..7c5c3ed7 100644 --- a/bubble-server/src/main/resources/bubble/rule/RequestModifierRule_icon.js.hbs +++ b/bubble-server/src/main/resources/bubble/rule/RequestModifierRule_icon.js.hbs @@ -87,9 +87,12 @@ function {{JS_PREFIX}}_create_button(labelKey, labelDefault, onclick, labelForma return btn; } -if (typeof {{PAGE_PREFIX}}_icon_status === 'undefined') { +{{JS_PREFIX}}_follow_url = '/__bubble/api/filter/follow/{{BUBBLE_REQUEST_ID}}'; +{{JS_PREFIX}}_follow_and_apply_regex_url = '/__bubble/api/filter/followAndApplyRegex/{{BUBBLE_REQUEST_ID}}'; + +{{JS_PREFIX}}_url_chasers = {}; - let {{PAGE_PREFIX}}_url_chasers = {}; +if (typeof {{PAGE_PREFIX}}_icon_status === 'undefined') { {{PAGE_PREFIX}}_screenWidth = function () { return window.innerWidth || document.documentElement.clientWidth || document.body.clientWidth }; @@ -232,10 +235,10 @@ if (typeof {{PAGE_PREFIX}}_icon_status === 'undefined') { }); } -function {{JS_PREFIX}}_chase_redirects (a, removeParams) { +function {{JS_PREFIX}}_chase_redirects (a, removeParams, regex, groups, callback) { const initial_href = a.href; - if (initial_href in {{PAGE_PREFIX}}_url_chasers) { - a.href = {{PAGE_PREFIX}}_url_chasers[initial_href]; + if (initial_href in {{JS_PREFIX}}_url_chasers) { + a.href = {{JS_PREFIX}}_url_chasers[initial_href]; return; } if (a.className && a.className.indexOf('{{JS_PREFIX}}_followed') !== -1) return; @@ -247,20 +250,38 @@ function {{JS_PREFIX}}_chase_redirects (a, removeParams) { a.rel = 'noopener noreferrer nofollow'; - fetch('/__bubble/api/filter/follow/{{BUBBLE_REQUEST_ID}}', {method: 'POST', body: JSON.stringify(initial_href)}) - .then(response => response.text()) + let is_regex = (typeof regex !== 'undefined'); + const follow_body = !is_regex ? initial_href : + { + 'url': initial_href, + 'regex': regex, + 'groups': (typeof groups === 'undefined' || groups === null ? null : groups) + }; + const request_opts = { + method: 'POST', + headers: {'Content-Type': 'application/json'}, + body: JSON.stringify(follow_body) + } + const follow_url = is_regex ? {{JS_PREFIX}}_follow_and_apply_regex_url : {{JS_PREFIX}}_follow_url; + console.log('>>>>>>>>>>>> requesting follow_url ('+follow_url+') with request_opts: '+JSON.stringify(request_opts)); + fetch(follow_url, request_opts) + .then(response => is_regex ? response.json() : response.text()) .then(data => { - if (data && (data.startsWith('http://') || data.startsWith('https://'))) { - if (typeof removeParams === 'undefined' || removeParams === null || removeParams) { - const qPos = data.indexOf('?'); - a.href = qPos === -1 ? data : data.substring(0, qPos); + if (is_regex) { + callback(data); + } else { + if (data && (data.startsWith('http://') || data.startsWith('https://'))) { + if (typeof removeParams === 'undefined' || removeParams === null || removeParams) { + const qPos = data.indexOf('?'); + a.href = qPos === -1 ? data : data.substring(0, qPos); + } else { + a.href = data; + } + {{JS_PREFIX}}_url_chasers[initial_href] = a.href; + // console.log('chase_redirect: rewrote '+initial_href+' -> '+a.href); } else { - a.href = data; + console.warn('chase_redirects: ' + a.href + ' returned non-URL response: ' + data); } - {{PAGE_PREFIX}}_url_chasers[initial_href] = a.href; - // console.log('chase_redirect: rewrote '+initial_href+' -> '+a.href); - } else { - console.warn('chase_redirects: '+a.href+' returned non-URL response: '+data); } }) .catch((error) => { diff --git a/bubble-server/src/main/resources/bubble/rule/social/block/site/FB.js.hbs b/bubble-server/src/main/resources/bubble/rule/social/block/site/FB.js.hbs index cd2938b7..058c6513 100644 --- a/bubble-server/src/main/resources/bubble/rule/social/block/site/FB.js.hbs +++ b/bubble-server/src/main/resources/bubble/rule/social/block/site/FB.js.hbs @@ -32,7 +32,7 @@ Element.prototype.appendChild = function() { const block = {{JS_PREFIX}}_should_block({{JS_PREFIX}}_blocked_users, node) if (block) { // log('>>> BLOCKING via appendChild: '+block); - {{JS_PREFIX}}_appendChild.apply({{JS_PREFIX}}_jail, arguments); + return {{JS_PREFIX}}_appendChild.apply({{JS_PREFIX}}_jail, arguments); } } } @@ -40,7 +40,7 @@ Element.prototype.appendChild = function() { console.log('>>> error inspecting: e='+e); } try { - {{JS_PREFIX}}_appendChild.apply(this, arguments); + return {{JS_PREFIX}}_appendChild.apply(this, arguments); } catch (e) { console.log('>>> error calling document.appendChild: arg[0].tagName = '+node.tagName+' e='+e); } diff --git a/bubble-server/src/main/resources/bubble/rule/social/block/site/LI.js.hbs b/bubble-server/src/main/resources/bubble/rule/social/block/site/LI.js.hbs index d38f958a..c4922e96 100644 --- a/bubble-server/src/main/resources/bubble/rule/social/block/site/LI.js.hbs +++ b/bubble-server/src/main/resources/bubble/rule/social/block/site/LI.js.hbs @@ -1,8 +1,21 @@ {{JS_PREFIX}}_supports_keywords = true; +{{JS_PREFIX}}_idle_interval = 5000; const {{JS_PREFIX}}_site_host = location.protocol + '//' + window.location.hostname + '/'; +const {{JS_PREFIX}}_jail = document.createElement('div'); +{{JS_PREFIX}}_jail.style.display = 'none'; + function {{JS_PREFIX}}_apply_blocks(blocked_users) { + const adBanner = Array.from(document.getElementsByTagName('iframe')).find(i => i.className && i.className === 'ad-banner'); + if (typeof adBanner !== 'undefined') { + let adParent = adBanner.parentNode; + if (adParent != null) { + adParent.innerHTML = ''; + adParent.style.display = 'none'; + } else { + } + } const articles = Array.from(document.getElementsByClassName('feed-shared-update-v2')); if (articles === null || articles.length === 0) { console.warn('No articles found, not filtering'); @@ -11,7 +24,21 @@ function {{JS_PREFIX}}_apply_blocks(blocked_users) { {{JS_PREFIX}}_consider_block(articles, blocked_users); } -function {{JS_PREFIX}}_author_from_href(href) { +function {{JS_PREFIX}}_is_valid_author_name(name) { + return !(name.startsWith('ACoAA') || name.length >= 38); +} + +function {{JS_PREFIX}}_author_from_href(linkId, callback) { + if (typeof linkId === 'undefined' || linkId === null || linkId.length === 0) { + console.log('author_from_href: invalid link ID: '+linkId); + return; + } + const link = document.getElementById(linkId); + if (link === null) { + // console.log('author_from_href: link with ID '+linkId+' not found'); + return; + } + const href = link.href; if (typeof href === 'undefined' || href === null) return null; let h = href.startsWith({{JS_PREFIX}}_site_host) ? href.substring({{JS_PREFIX}}_site_host.length) : href; const qPos = h.indexOf('?'); @@ -19,14 +46,51 @@ function {{JS_PREFIX}}_author_from_href(href) { h = h.substring(0, qPos); } if (h.endsWith('/')) h = h.substring(0, h.length - 1); - if (!h.startsWith('in/') && !h.startsWith('company/')) { - return null; + let profile_type = null; + if (h.startsWith('in/')) { + profile_type = 'in/'; + } else if (h.startsWith('company/')) { + profile_type = 'company/'; + } else { + // console.log("author_from_href: skipping (not in/ or company/) href: "+href); + return; } const slashPos = h.indexOf('/'); - const name = h.substring(slashPos); - if (name.length > 35 && name.indexOf('-') === -1 && name.indexOf('_') === -1) return null; - console.log("author_from_href: found "+name+' from '+href); - return name; + const name = h.substring(slashPos+1); + if ({{JS_PREFIX}}_is_valid_author_name(name)) { + // console.log("author_from_href: found " + name + ' from ' + href); + callback(linkId, name); + } else { + // only chase a link once + let linkClass = link.className; + const chaseClass = '{{JS_PREFIX}}_link_chased'; + if (linkClass && linkClass.indexOf(chaseClass) !== -1) { + return; + } else { + link.className = link.className ? link.className + ' '+chaseClass : chaseClass; + } + + {{JS_PREFIX}}_chase_redirects(link, true, '/voyager/api/identity/profiles/([^/]+)/privacySettings', [1], function (matches) { + if (typeof matches.length !== 'undefined') { + for (let i=0; i 0) { for (let i=0; i 0) { + appendToSpan = authorSpans[0]; + } else { + appendToSpan = Array.from(realLink.getElementsByTagName('span')) + .find(s => s.getAttribute('dir') === 'ltr' || s.getAttribute('data-entity-type')); + if (typeof appendToSpan === 'undefined') { + console.log('consider_block: found no span to attach block control for author: '+author); + return; + } + } + + let b = {{JS_PREFIX}}_create_block_control(article, author, realLink); + if (b !== null) { + console.log('consider_block: inserting span='+b.id+' for article by '+author); + appendToSpan.parentNode.appendChild(b); + {{JS_PREFIX}}_tally_allow(); + } else { + console.log('consider_block: create_block_control returned null for author '+author) + } } - } + }); } } } diff --git a/bubble-server/src/main/resources/packer/roles/mitmproxy/files/bubble_api.py b/bubble-server/src/main/resources/packer/roles/mitmproxy/files/bubble_api.py index e16d3ac1..7ffaea77 100644 --- a/bubble-server/src/main/resources/packer/roles/mitmproxy/files/bubble_api.py +++ b/bubble-server/src/main/resources/packer/roles/mitmproxy/files/bubble_api.py @@ -469,6 +469,10 @@ def health_check_response(flow): flow.response.stream = lambda chunks: [b'OK\n'] +def include_request_headers(path): + return '/followAndApplyRegex' in path + + def special_bubble_response(flow): name = 'special_bubble_response' path = flow.request.path @@ -478,7 +482,7 @@ def special_bubble_response(flow): uri = make_bubble_special_path(path) if bubble_log.isEnabledFor(DEBUG): - bubble_log.debug('special_bubble_response: sending special bubble request to '+uri) + bubble_log.debug('special_bubble_response: sending special bubble '+flow.request.method+' to '+uri) headers = { 'Accept': 'application/json', 'Content-Type': 'application/json' @@ -489,12 +493,22 @@ def special_bubble_response(flow): response = async_stream(client, name, uri, headers=headers, loop=loop) elif flow.request.method == 'POST': - loop = asyncio.new_event_loop() - client = async_client(timeout=30) + if include_request_headers(flow.request.path): + if bubble_log.isEnabledFor(DEBUG): + bubble_log.debug('special_bubble_request: including client headers: '+repr(flow.request.headers)) + # add client request headers + for name, value in flow.request.headers.items(): + headers['X-Bubble-Client-Header-'+name] = value + if bubble_log.isEnabledFor(DEBUG): + bubble_log.debug('special_bubble_request: NOW headers='+repr(headers)) + data = None if flow.request.content and flow.request.content: headers[HEADER_CONTENT_LENGTH] = str(len(flow.request.content)) data = flow.request.content + + loop = asyncio.new_event_loop() + client = async_client(timeout=30) response = async_stream(client, name, uri, headers=headers, method='POST', data=data, loop=loop) else: diff --git a/utils/cobbzilla-utils b/utils/cobbzilla-utils index ea72ac4a..dfafe62c 160000 --- a/utils/cobbzilla-utils +++ b/utils/cobbzilla-utils @@ -1 +1 @@ -Subproject commit ea72ac4a1619c4f5915047650cdd18b8a6202681 +Subproject commit dfafe62c7eb3413cf1210e40e551094458f4d9d0