package org.archive.wayback.accesscontrol.robotstxt;

import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStreamReader;
import java.net.MalformedURLException;
import java.net.URL;
import java.nio.charset.Charset;
import java.util.logging.Level;
import java.util.logging.Logger;
import org.apache.commons.configuration.tree.DefaultExpressionEngine;
import org.archive.wayback.core.CaptureSearchResult;
import org.archive.wayback.core.Resource;
import org.archive.wayback.exception.LiveDocumentNotAvailableException;
import org.archive.wayback.exception.LiveWebCacheUnavailableException;
import org.archive.wayback.exception.LiveWebTimeoutException;
import org.archive.wayback.liveweb.LiveWebCache;
import org.archive.wayback.resourceindex.filters.ExclusionFilter;
import org.archive.wayback.util.url.UrlOperations;
import org.archive.wayback.webapp.PerformanceLogger;

/* loaded from: input_file:WEB-INF/lib/openwayback-core-2.0.0.BETA.1.jar:org/archive/wayback/accesscontrol/robotstxt/HRobotExclusionFilter.class */
public class HRobotExclusionFilter extends ExclusionFilter {
    private static final String ROBOT_SUFFIX = "/robots.txt";
    private RobotsDirectiveAggregation aggregation;
    private LiveWebCache webCache;
    private String userAgent;
    private static final Logger LOGGER = Logger.getLogger(HRobotExclusionFilter.class.getName());
    private static final FixedRobotsDirectives ALLOW_ROBOT_DIRECTIVE = new FixedRobotsDirectives(true);
    private Charset cs = Charset.forName("UTF-8");
    private boolean notifiedSeen = false;
    private boolean notifiedPassed = false;

    public HRobotExclusionFilter(LiveWebCache liveWebCache, String str, long j) {
        this.aggregation = null;
        this.webCache = null;
        this.userAgent = null;
        this.aggregation = new RobotsDirectiveAggregation();
        this.webCache = liveWebCache;
        this.userAgent = str;
    }

    private void updateAggregation(String str) throws LiveWebCacheUnavailableException, LiveWebTimeoutException, MalformedURLException, IOException {
        for (String str2 : this.aggregation.getMissingRobotUrls(str)) {
            long currentTimeMillis = System.currentTimeMillis();
            try {
                Resource cachedResource = this.webCache.getCachedResource(new URL(str2), 0L, true);
                if (cachedResource.getStatusCode() != 200) {
                    LOGGER.info("ROBOT: Non200(" + str2 + DefaultExpressionEngine.DEFAULT_INDEX_END);
                    this.aggregation.addDirectives(str2, ALLOW_ROBOT_DIRECTIVE);
                } else {
                    this.aggregation.addDirectives(str2, new Robotstxt(new BufferedReader(new InputStreamReader(cachedResource, this.cs))).getDirectivesFor(this.userAgent));
                }
            } catch (LiveDocumentNotAvailableException e) {
                if (LOGGER.isLoggable(Level.INFO)) {
                    LOGGER.info("ROBOT: LiveDocumentNotAvailableException(" + str2 + DefaultExpressionEngine.DEFAULT_INDEX_END);
                }
                this.aggregation.addDirectives(str2, ALLOW_ROBOT_DIRECTIVE);
            }
            PerformanceLogger.noteElapsed("RobotRequest", System.currentTimeMillis() - currentTimeMillis, str2);
        }
    }

    @Override // org.archive.wayback.util.ObjectFilter
    public int filterObject(CaptureSearchResult captureSearchResult) {
        if (!this.notifiedSeen) {
            if (this.filterGroup != null) {
                this.filterGroup.setSawRobots();
            }
            this.notifiedSeen = true;
        }
        String originalUrl = captureSearchResult.getOriginalUrl();
        String uRLPath = UrlOperations.getURLPath(originalUrl);
        if (uRLPath.equals(ROBOT_SUFFIX)) {
            if (this.notifiedPassed) {
                return 0;
            }
            if (this.filterGroup != null) {
                this.filterGroup.setPassedRobots();
            }
            this.notifiedPassed = true;
            return 0;
        }
        try {
            updateAggregation(UrlOperations.urlToHost(originalUrl));
            if (!this.aggregation.isBlocked(uRLPath)) {
                if (LOGGER.isLoggable(Level.INFO)) {
                    LOGGER.fine("ROBOT: BLOCKED(" + originalUrl + DefaultExpressionEngine.DEFAULT_INDEX_END);
                }
                if (LOGGER.isLoggable(Level.FINE)) {
                    LOGGER.finer("ROBOT: ALLOWED(" + originalUrl + DefaultExpressionEngine.DEFAULT_INDEX_END);
                }
                if (this.notifiedPassed) {
                    return 0;
                }
                if (this.filterGroup != null) {
                    this.filterGroup.setPassedRobots();
                }
                this.notifiedPassed = true;
                return 0;
            }
        } catch (MalformedURLException e) {
            LOGGER.warning("ROBOT: MalformedURLException(" + originalUrl + DefaultExpressionEngine.DEFAULT_INDEX_END);
        } catch (IOException e2) {
            e2.printStackTrace();
            return 1;
        } catch (LiveWebCacheUnavailableException e3) {
            LOGGER.severe("ROBOT: LiveWebCacheUnavailableException(" + originalUrl + DefaultExpressionEngine.DEFAULT_INDEX_END);
            this.filterGroup.setLiveWebGone();
        } catch (LiveWebTimeoutException e4) {
            LOGGER.severe("ROBOT: LiveDocumentTimedOutException(" + originalUrl + DefaultExpressionEngine.DEFAULT_INDEX_END);
            this.filterGroup.setRobotTimedOut();
        }
        if (this.filterGroup.getRobotTimedOut() || this.filterGroup.getLiveWebGone()) {
            return 2;
        }
        if (!LOGGER.isLoggable(Level.INFO)) {
            return 1;
        }
        LOGGER.fine("ROBOT: BLOCKED(" + originalUrl + DefaultExpressionEngine.DEFAULT_INDEX_END);
        return 1;
    }
}
