/*
 * Decompiled with CFR 0.152.
 */
package org.apache.pig.piggybank.storage;

import java.io.IOException;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.InputFormat;
import org.apache.hadoop.mapreduce.InputSplit;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.RecordReader;
import org.apache.hadoop.mapreduce.TaskAttemptContext;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.input.FileSplit;
import org.apache.pig.LoadFunc;
import org.apache.pig.backend.hadoop.executionengine.mapReduceLayer.PigSplit;
import org.apache.pig.backend.hadoop.executionengine.mapReduceLayer.PigTextInputFormat;
import org.apache.pig.bzip2r.Bzip2TextInputFormat;
import org.apache.pig.data.DataByteArray;
import org.apache.pig.data.Tuple;
import org.apache.pig.data.TupleFactory;

public class XMLLoader
extends LoadFunc {
    private String loadLocation;
    protected RecordReader in = null;
    private String identifier;

    public XMLLoader(String identifier) {
        this.identifier = identifier;
    }

    public void prepareToRead(RecordReader reader, PigSplit split) throws IOException {
        this.in = reader;
    }

    public Tuple getNext() throws IOException {
        try {
            if (!this.in.nextKeyValue()) {
                return null;
            }
            Tuple tuple = this.createTuple(this.in.getCurrentValue().toString());
            return tuple;
        }
        catch (InterruptedException e) {
            e.printStackTrace();
            return null;
        }
    }

    public Tuple createTuple(String str) {
        return TupleFactory.getInstance().newTuple((Object)new DataByteArray(str));
    }

    public InputFormat getInputFormat() throws IOException {
        if (this.loadLocation.endsWith(".bz2") || this.loadLocation.endsWith(".bz")) {
            return new Bzip2TextInputFormat(){

                public RecordReader<LongWritable, Text> createRecordReader(InputSplit split, TaskAttemptContext context) {
                    try {
                        RecordReader originalReader = super.createRecordReader(split, context);
                        XMLRecordReader reader = new XMLRecordReader((RecordReader<LongWritable, Text>)originalReader);
                        reader.setXMLIdentifier(XMLLoader.this.identifier);
                        return reader;
                    }
                    catch (IOException e) {
                        throw new RuntimeException("Cannot create input split", e);
                    }
                    catch (InterruptedException e) {
                        throw new RuntimeException("Cannot create input split", e);
                    }
                }
            };
        }
        return new PigTextInputFormat(){

            public RecordReader<LongWritable, Text> createRecordReader(InputSplit split, TaskAttemptContext context) {
                RecordReader originalReader = super.createRecordReader(split, context);
                XMLRecordReader reader = new XMLRecordReader((RecordReader<LongWritable, Text>)originalReader);
                reader.setXMLIdentifier(XMLLoader.this.identifier);
                return reader;
            }
        };
    }

    public void setLocation(String location, Job job) throws IOException {
        this.loadLocation = location;
        FileInputFormat.setInputPaths((Job)job, (String)location);
    }

    public static class XMLRecordReader
    extends RecordReader<LongWritable, Text> {
        protected final RecordReader<LongWritable, Text> wrapped;
        private static final String XMLTagNameRegExp = "[a-zA-Z\\_][0-9a-zA-Z\\-_]+";
        private Pattern identifiersPattern;
        private LongWritable key;
        private Text value;
        private long bufferPos;
        private String buffer;
        private long originalEnd;
        private boolean terminated;

        public XMLRecordReader(RecordReader<LongWritable, Text> wrapped) {
            this.wrapped = wrapped;
        }

        public void initialize(InputSplit split, TaskAttemptContext context) throws IOException, InterruptedException {
            this.key = new LongWritable();
            this.value = new Text();
            if (!(split instanceof FileSplit)) {
                throw new RuntimeException("Cannot override a split of type'" + split.getClass() + "'");
            }
            FileSplit fsplit = (FileSplit)split;
            this.originalEnd = fsplit.getStart() + fsplit.getLength();
            Path path = fsplit.getPath();
            long fileEnd = path.getFileSystem(context.getConfiguration()).getFileStatus(path).getLen();
            FileSplit extendedSplit = new FileSplit(path, fsplit.getStart(), Math.min(fsplit.getLength() * 10L, fileEnd - fsplit.getStart()), fsplit.getLocations());
            this.wrapped.initialize((InputSplit)extendedSplit, context);
        }

        public void setXMLIdentifier(String identifier) {
            if (!identifier.matches(XMLTagNameRegExp)) {
                throw new RuntimeException("XML tag identifier '" + identifier + "' does not match the regular expression /" + XMLTagNameRegExp + "/");
            }
            String inlineClosedTagRegExp = "<\\s*" + identifier + "\\s*[^>]*/>";
            String openTagRegExp = "<\\s*" + identifier + "(?:\\s*|\\s+(?:[^/>]*|[^>]*[^>/]))>";
            String closeTagRegExp = "</\\s*" + identifier + "\\s*>";
            this.identifiersPattern = Pattern.compile("(" + inlineClosedTagRegExp + ")|(" + openTagRegExp + ")|(" + closeTagRegExp + ")");
        }

        public void close() throws IOException {
            this.wrapped.close();
        }

        public boolean equals(Object obj) {
            return this.wrapped.equals(obj);
        }

        public LongWritable getCurrentKey() throws IOException, InterruptedException {
            return this.key;
        }

        public Text getCurrentValue() throws IOException, InterruptedException {
            return this.value;
        }

        public float getProgress() throws IOException, InterruptedException {
            return Math.max(1.0f, this.wrapped.getProgress() * 10.0f);
        }

        public int hashCode() {
            return this.wrapped.hashCode();
        }

        public boolean nextKeyValue() throws IOException, InterruptedException {
            if (this.terminated) {
                return false;
            }
            int depth = 0;
            StringBuffer currentMatch = new StringBuffer();
            try {
                while (true) {
                    int offsetOfFirstMatchedOpenTag = 0;
                    while (this.buffer == null || this.buffer.length() == 0) {
                        if (!this.wrapped.nextKeyValue()) {
                            return false;
                        }
                        if (this.bufferPos >= this.originalEnd && depth == 0) {
                            this.terminated = true;
                            return false;
                        }
                        this.bufferPos = ((LongWritable)this.wrapped.getCurrentKey()).get();
                        this.buffer = ((Text)this.wrapped.getCurrentValue()).toString();
                    }
                    Matcher matcher = this.identifiersPattern.matcher(this.buffer);
                    while (matcher.find()) {
                        int startOfCurrentMatch = matcher.start();
                        int endOfCurrentMatch = matcher.end();
                        String group = matcher.group(1);
                        if (group != null) {
                            this.value = new Text(group);
                            this.key.set(this.bufferPos + (long)matcher.start(1));
                            this.bufferPos += (long)matcher.end(1);
                            this.buffer = this.buffer.substring(endOfCurrentMatch);
                            return true;
                        }
                        group = matcher.group(2);
                        if (group != null) {
                            if (depth == 0) {
                                offsetOfFirstMatchedOpenTag = startOfCurrentMatch;
                                this.key.set(this.bufferPos + (long)startOfCurrentMatch);
                            }
                            ++depth;
                            continue;
                        }
                        group = matcher.group(3);
                        if (group != null) {
                            if (depth <= 0 || --depth != 0) continue;
                            if (currentMatch.length() == 0) {
                                this.value = new Text(this.buffer.substring(offsetOfFirstMatchedOpenTag, endOfCurrentMatch));
                            } else {
                                currentMatch.append(this.buffer, offsetOfFirstMatchedOpenTag, endOfCurrentMatch);
                                this.value = new Text(currentMatch.toString());
                            }
                            this.buffer = this.buffer.substring(endOfCurrentMatch);
                            this.bufferPos += (long)endOfCurrentMatch;
                            return true;
                        }
                        throw new RuntimeException("Invalid match '" + matcher.group() + "' in string '" + this.buffer + "'");
                    }
                    if (depth > 0) {
                        currentMatch.append(this.buffer, offsetOfFirstMatchedOpenTag, this.buffer.length());
                    }
                    this.buffer = null;
                }
            }
            catch (InterruptedException e) {
                throw new IOException("Error getting input");
            }
        }

        public String toString() {
            return this.wrapped.toString();
        }
    }
}

