package org.allenai.pdffigures2;

import org.allenai.common.Logging;
import org.allenai.common.Logging$logger$;
import org.allenai.common.Logging$loggerConfig$;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import scala.Function1;
import scala.MatchError;
import scala.None$;
import scala.Option;
import scala.Predef$;
import scala.Some;
import scala.Tuple2;
import scala.Tuple3;
import scala.collection.IterableLike;
import scala.collection.Seq;
import scala.collection.Seq$;
import scala.collection.SeqView$;
import scala.collection.TraversableLike;
import scala.collection.generic.GenericTraversableTemplate;
import scala.collection.immutable.List;
import scala.collection.immutable.List$;
import scala.collection.immutable.Nil$;
import scala.collection.immutable.StringOps;
import scala.math.Ordering$Int$;
import scala.runtime.BoxedUnit;
import scala.runtime.BoxesRunTime;
import scala.runtime.Tuple3Zipped$;
import scala.runtime.Tuple3Zipped$Ops$;
import scala.runtime.VolatileObjectRef;
import scala.util.matching.Regex;

/* compiled from: FormattingTextExtractor.scala */
/* loaded from: input_file:org/allenai/pdffigures2/FormattingTextExtractor$.class */
public final class FormattingTextExtractor$ implements Logging {
    public static final FormattingTextExtractor$ MODULE$ = null;
    private final Regex org$allenai$pdffigures2$FormattingTextExtractor$$AbstractRegex;
    private final Regex org$allenai$pdffigures2$FormattingTextExtractor$$PageNumberRegex;
    private final Logger internalLogger;
    private volatile Logging$logger$ logger$module;
    private volatile Logging$loggerConfig$ loggerConfig$module;

    static {
        new FormattingTextExtractor$();
    }

    /* JADX WARN: Multi-variable type inference failed */
    /* JADX WARN: Type inference failed for: r0v0 */
    /* JADX WARN: Type inference failed for: r0v1, types: [java.lang.Throwable] */
    /* JADX WARN: Type inference failed for: r0v5 */
    /* JADX WARN: Type inference failed for: r1v2, types: [org.allenai.pdffigures2.FormattingTextExtractor$Interval$4$, T] */
    private FormattingTextExtractor$Interval$4$ org$allenai$pdffigures2$FormattingTextExtractor$$Interval$2$lzycompute(VolatileObjectRef volatileObjectRef) {
        ?? r0 = this;
        synchronized (r0) {
            if (volatileObjectRef.elem == null) {
                volatileObjectRef.elem = new FormattingTextExtractor$Interval$4$(volatileObjectRef);
            }
            BoxedUnit boxedUnit = BoxedUnit.UNIT;
            r0 = r0;
            return (FormattingTextExtractor$Interval$4$) volatileObjectRef.elem;
        }
    }

    @Override // org.allenai.common.Logging
    public Logger internalLogger() {
        return this.internalLogger;
    }

    /* JADX WARN: Multi-variable type inference failed */
    /* JADX WARN: Type inference failed for: r0v0 */
    /* JADX WARN: Type inference failed for: r0v1, types: [java.lang.Throwable] */
    /* JADX WARN: Type inference failed for: r0v5 */
    private Logging$logger$ logger$lzycompute() {
        ?? r0 = this;
        synchronized (r0) {
            if (this.logger$module == null) {
                this.logger$module = new Logging$logger$(this);
            }
            BoxedUnit boxedUnit = BoxedUnit.UNIT;
            r0 = r0;
            return this.logger$module;
        }
    }

    @Override // org.allenai.common.Logging
    public Logging$logger$ logger() {
        return this.logger$module == null ? logger$lzycompute() : this.logger$module;
    }

    /* JADX WARN: Multi-variable type inference failed */
    /* JADX WARN: Type inference failed for: r0v0 */
    /* JADX WARN: Type inference failed for: r0v1, types: [java.lang.Throwable] */
    /* JADX WARN: Type inference failed for: r0v5 */
    private Logging$loggerConfig$ loggerConfig$lzycompute() {
        ?? r0 = this;
        synchronized (r0) {
            if (this.loggerConfig$module == null) {
                this.loggerConfig$module = new Logging$loggerConfig$(this);
            }
            BoxedUnit boxedUnit = BoxedUnit.UNIT;
            r0 = r0;
            return this.loggerConfig$module;
        }
    }

    @Override // org.allenai.common.Logging
    public Logging$loggerConfig$ loggerConfig() {
        return this.loggerConfig$module == null ? loggerConfig$lzycompute() : this.loggerConfig$module;
    }

    @Override // org.allenai.common.Logging
    public void org$allenai$common$Logging$_setter_$internalLogger_$eq(Logger logger) {
        this.internalLogger = logger;
    }

    public Regex org$allenai$pdffigures2$FormattingTextExtractor$$AbstractRegex() {
        return this.org$allenai$pdffigures2$FormattingTextExtractor$$AbstractRegex;
    }

    /* JADX WARN: Multi-variable type inference failed */
    public Seq<Paragraph> selectAbstract(Page page) {
        Seq filter = page.paragraphs().filter(new FormattingTextExtractor$$anonfun$1());
        if (filter.size() != 1) {
            return (Seq) Seq$.MODULE$.apply(Nil$.MODULE$);
        }
        logger().debug(new FormattingTextExtractor$$anonfun$selectAbstract$1());
        Paragraph paragraph = (Paragraph) filter.mo455head();
        if (!(paragraph.lines().size() == 1 && paragraph.lines().mo455head().words().size() == 1)) {
            return (Seq) Seq$.MODULE$.apply(Predef$.MODULE$.wrapRefArray(new Paragraph[]{paragraph}));
        }
        Seq filter2 = page.paragraphs().filter(new FormattingTextExtractor$$anonfun$2(paragraph, paragraph.boundary().xCenter()));
        if (filter2.size() != 1) {
            return (Seq) Seq$.MODULE$.apply(Predef$.MODULE$.wrapRefArray(new Paragraph[]{paragraph}));
        }
        logger().debug(new FormattingTextExtractor$$anonfun$selectAbstract$2());
        return (Seq) ((TraversableLike) Seq$.MODULE$.apply(Predef$.MODULE$.wrapRefArray(new Paragraph[]{paragraph}))).$plus$plus(filter2, Seq$.MODULE$.canBuildFrom());
    }

    private Seq<Option<Paragraph>> selectHeaderCandidates(Seq<Page> seq, Seq<Option<Paragraph>> seq2, int i) {
        Seq seq3 = (Seq) seq2.flatten2(new FormattingTextExtractor$$anonfun$3());
        if (seq3.size() < i) {
            return (Seq) Seq$.MODULE$.fill(seq.size(), new FormattingTextExtractor$$anonfun$selectHeaderCandidates$4());
        }
        Tuple2 tuple2 = (Tuple2) ((TraversableLike) seq3.map(new FormattingTextExtractor$$anonfun$4(), Seq$.MODULE$.canBuildFrom())).groupBy((Function1) new FormattingTextExtractor$$anonfun$5()).mapValues((Function1) new FormattingTextExtractor$$anonfun$6()).maxBy(new FormattingTextExtractor$$anonfun$7(), Ordering$Int$.MODULE$);
        if (tuple2 == null) {
            throw new MatchError(tuple2);
        }
        Tuple2 tuple22 = new Tuple2((String) tuple2.mo2061_1(), BoxesRunTime.boxToInteger(tuple2._2$mcI$sp()));
        String str = (String) tuple22.mo2061_1();
        if (tuple22._2$mcI$sp() >= i) {
            return (Seq) seq2.map(new FormattingTextExtractor$$anonfun$selectHeaderCandidates$1(str), Seq$.MODULE$.canBuildFrom());
        }
        VolatileObjectRef<Object> zero = VolatileObjectRef.zero();
        Seq seq4 = (Seq) seq3.map(new FormattingTextExtractor$$anonfun$8(zero), Seq$.MODULE$.canBuildFrom());
        Option<A> find = seq4.find(new FormattingTextExtractor$$anonfun$9(i, seq4));
        return find.isDefined() ? (Seq) seq2.map(new FormattingTextExtractor$$anonfun$selectHeaderCandidates$2(find, zero), Seq$.MODULE$.canBuildFrom()) : (Seq) Seq$.MODULE$.fill(seq.size(), new FormattingTextExtractor$$anonfun$selectHeaderCandidates$3());
    }

    public Seq<Seq<Paragraph>> findHeaders(Seq<Page> seq, int i) {
        Tuple2 unzip = ((GenericTraversableTemplate) seq.map(new FormattingTextExtractor$$anonfun$10(), Seq$.MODULE$.canBuildFrom())).unzip(Predef$.MODULE$.$conforms());
        if (unzip == null) {
            throw new MatchError(unzip);
        }
        Tuple2 tuple2 = new Tuple2((Seq) unzip.mo2061_1(), (Seq) unzip.mo2060_2());
        Seq<Option<Paragraph>> seq2 = (Seq) tuple2.mo2061_1();
        Seq seq3 = (Seq) tuple2.mo2060_2();
        Seq<Option<Paragraph>> selectHeaderCandidates = selectHeaderCandidates(seq, seq2, i);
        return (Seq) ((TraversableLike) selectHeaderCandidates.zip(selectHeaderCandidates(seq, (Seq) ((TraversableLike) seq3.zip(selectHeaderCandidates, Seq$.MODULE$.canBuildFrom())).map(new FormattingTextExtractor$$anonfun$15(), Seq$.MODULE$.canBuildFrom()), i), Seq$.MODULE$.canBuildFrom())).map(new FormattingTextExtractor$$anonfun$findHeaders$1(), Seq$.MODULE$.canBuildFrom());
    }

    public Regex org$allenai$pdffigures2$FormattingTextExtractor$$PageNumberRegex() {
        return this.org$allenai$pdffigures2$FormattingTextExtractor$$PageNumberRegex;
    }

    public Seq<Option<Line>> findPageNumber(Seq<Page> seq, int i) {
        Seq<Option<Line>> seq2 = (Seq) seq.map(new FormattingTextExtractor$$anonfun$16(), Seq$.MODULE$.canBuildFrom());
        if (!(i <= seq2.count(new FormattingTextExtractor$$anonfun$18()))) {
            return (Seq) Seq$.MODULE$.fill(seq.size(), new FormattingTextExtractor$$anonfun$findPageNumber$2());
        }
        logger().debug(new FormattingTextExtractor$$anonfun$findPageNumber$1());
        return seq2;
    }

    public List<PageWithClassifiedText> extractFormattingText(List<Page> list) {
        int size = list.size() - (list.size() < 3 ? 0 : list.size() < 5 ? 1 : 2);
        Seq<Seq<Paragraph>> findHeaders = findHeaders(list, size);
        Seq<Option<Line>> findPageNumber = findPageNumber(list, size);
        Option find = ((IterableLike) list.take(2).view().map(new FormattingTextExtractor$$anonfun$19(), SeqView$.MODULE$.canBuildFrom())).find(new FormattingTextExtractor$$anonfun$20());
        return (List) Tuple3Zipped$.MODULE$.map$extension(Tuple3Zipped$Ops$.MODULE$.zipped$extension(Predef$.MODULE$.tuple3ToZippedOps(new Tuple3(list, findHeaders, findPageNumber)), Predef$.MODULE$.$conforms(), Predef$.MODULE$.$conforms(), Predef$.MODULE$.$conforms()), new FormattingTextExtractor$$anonfun$21(find, find.isDefined() ? new Some(BoxesRunTime.boxToInteger(((Tuple2) find.get())._1$mcI$sp())) : None$.MODULE$), List$.MODULE$.canBuildFrom());
    }

    /* JADX WARN: Multi-variable type inference failed */
    public final FormattingTextExtractor$Interval$4$ org$allenai$pdffigures2$FormattingTextExtractor$$Interval$2(VolatileObjectRef volatileObjectRef) {
        return volatileObjectRef.elem == 0 ? org$allenai$pdffigures2$FormattingTextExtractor$$Interval$2$lzycompute(volatileObjectRef) : (FormattingTextExtractor$Interval$4$) volatileObjectRef.elem;
    }

    private FormattingTextExtractor$() {
        MODULE$ = this;
        org$allenai$common$Logging$_setter_$internalLogger_$eq(LoggerFactory.getLogger(getClass()));
        this.org$allenai$pdffigures2$FormattingTextExtractor$$AbstractRegex = new StringOps(Predef$.MODULE$.augmentString("^(Abstract|ABSTRACT)(((—|-)[a-zA-Z]*)|.)?$")).r();
        this.org$allenai$pdffigures2$FormattingTextExtractor$$PageNumberRegex = new StringOps(Predef$.MODULE$.augmentString("[1-9][0-9]*")).r();
    }
}
