/*
 * Decompiled with CFR 0.152.
 */
package edu.harvard.hul.ois.jhove.module;

import edu.harvard.hul.ois.jhove.Agent;
import edu.harvard.hul.ois.jhove.AgentType;
import edu.harvard.hul.ois.jhove.Checksum;
import edu.harvard.hul.ois.jhove.ChecksumInputStream;
import edu.harvard.hul.ois.jhove.ChecksumType;
import edu.harvard.hul.ois.jhove.Checksummer;
import edu.harvard.hul.ois.jhove.Document;
import edu.harvard.hul.ois.jhove.DocumentType;
import edu.harvard.hul.ois.jhove.ErrorMessage;
import edu.harvard.hul.ois.jhove.Identifier;
import edu.harvard.hul.ois.jhove.IdentifierType;
import edu.harvard.hul.ois.jhove.InfoMessage;
import edu.harvard.hul.ois.jhove.JhoveBase;
import edu.harvard.hul.ois.jhove.Message;
import edu.harvard.hul.ois.jhove.Module;
import edu.harvard.hul.ois.jhove.ModuleBase;
import edu.harvard.hul.ois.jhove.Property;
import edu.harvard.hul.ois.jhove.PropertyArity;
import edu.harvard.hul.ois.jhove.PropertyType;
import edu.harvard.hul.ois.jhove.RepInfo;
import edu.harvard.hul.ois.jhove.Utf8BlockMarker;
import java.io.DataInputStream;
import java.io.EOFException;
import java.io.File;
import java.io.IOException;
import java.io.InputStream;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.LinkedList;
import java.util.Map;

public class Utf8Module
extends ModuleBase {
    private static final String NAME = "UTF8-hul";
    private static final String RELEASE = "1.3";
    private static final int[] DATE = new int[]{2007, 8, 30};
    private static final String[] FORMAT = new String[]{"UTF-8"};
    private static final String COVERAGE = "Unicode 4.0.0";
    private static final String[] MIMETYPE = new String[]{"text/plain; charset=UTF-8"};
    private static final String WELLFORMED = "An UTF-8 object is well-formed if each character is correctly encoded as a one-to-four byte sequence, as defined in the specifications";
    private static final String VALIDITY = null;
    private static final String REPINFO = "Additional representation information includes: number of characters and Unicode 4.0.0 code blocks";
    private static final String NOTE = null;
    private static final String RIGHTS = "Copyright 2003-2007 by JSTOR and the President and Fellows of Harvard College. Released under the GNU Lesser General Public License.";
    private static final String[] POSITION = new String[]{"second", "third", "fourth"};
    private static final String[] controlCharMnemonics = new String[]{"NUL (0x00)", "SOH (0x01)", "STX (0x02)", "ETX (0x03)", "EOT (0x04)", "ENQ (0x05)", "ACK (0x06)", "BEL (0x07)", "BS (0x08)", "TAB (0x09)", "LF (0x0A)", "VT (0x0B)", "FF (0x0C)", "CR (0x0D)", "SO (0x0E)", "SI (0x0F)", "DLE (0x10)", "DC1 (0x11)", "DC2 (0x12)", "DC3 (0x13)", "DC4 (0x14)", "NAK (0x15)", "SYN (0x16)", "ETB (0x17)", "CAN (0x18)", "EM (0x19)", "SUB (0x1A)", "ESC (0x1B)", "FS (0x1C)", "GS (0x1D)", "RS (0x1E)", "US (0x1F)"};
    protected ChecksumInputStream _cstream;
    protected DataInputStream _dstream;
    protected boolean _lineEndCR;
    protected boolean _lineEndLF;
    protected boolean _lineEndCRLF;
    protected int _prevChar;
    protected Map _controlCharMap;
    protected int[] initialBytes;
    protected Utf8BlockMarker blockMarker;

    public Utf8Module() {
        super(NAME, RELEASE, DATE, FORMAT, COVERAGE, MIMETYPE, WELLFORMED, VALIDITY, REPINFO, NOTE, RIGHTS, false);
        Agent agent = new Agent("Harvard University Library", AgentType.EDUCATIONAL);
        agent.setAddress("Office for Information Systems, 90 Mt. Auburn St., Cambridge, MA 02138");
        agent.setTelephone("+1 (617) 495-3724");
        agent.setEmail("jhove-support@hulmail.harvard.edu");
        this._vendor = agent;
        Document document = new Document("The Unicode Standard, Version 4.0", DocumentType.BOOK);
        agent = new Agent("The Unicode Consortium", AgentType.NONPROFIT);
        agent.setWeb("http://www.unicode.org/versions/Unicode4.0.0/");
        document.setAuthor(agent);
        agent = new Agent("Addison-Wesley", AgentType.COMMERCIAL);
        agent.setAddress("Boston, Massachusetts");
        document.setPublisher(agent);
        document.setDate("2003");
        document.setIdentifier(new Identifier("0-321-18578-1", IdentifierType.ISBN));
        this._specification.add(document);
        document = new Document("Information technology -- Universal Multiple-Octet Coded Character Set (UCS) -- Part 1: Architecture and Basic Multilingual Plane. Appendix R, Amendment 2", DocumentType.STANDARD);
        agent = new Agent("ISO", AgentType.STANDARD);
        agent.setAddress("1, rue de Varembe, Casa postale 56, CH-1211, Geneva 20, Switzerland");
        agent.setTelephone("+41 22 749 01 11");
        agent.setFax("+41 22 733 34 30");
        agent.setEmail("iso@iso.ch");
        agent.setWeb("http://www.iso.org/");
        document.setPublisher(agent);
        document.setDate("1991");
        document.setIdentifier(new Identifier("ISO/IEC 10646-1 Amendment 2", IdentifierType.ISO));
        this._specification.add(document);
        document = new Document("UTF-8, a transformation format of ISO 10646", DocumentType.RFC);
        agent = new Agent("F. Yergeau", AgentType.OTHER);
        document.setAuthor(agent);
        agent = new Agent("IETF", AgentType.NONPROFIT);
        agent.setWeb("http://www.ietf.org/");
        document.setPublisher(agent);
        document.setDate("1998-01");
        document.setIdentifier(new Identifier("RFC 2279", IdentifierType.RFC));
        document.setIdentifier(new Identifier("http://www.ietf.org/rfc/rfc2279.txt", IdentifierType.URL));
        this._specification.add(document);
    }

    public final int parse(InputStream inputStream, RepInfo repInfo, int n) throws IOException {
        int n2;
        Object object;
        this.initParse();
        repInfo.setFormat(this._format[0]);
        repInfo.setMimeType(this._mimeType[0]);
        repInfo.setModule((Module)this);
        this.initialBytes = new int[4];
        this._lineEndCR = false;
        this._lineEndLF = false;
        this._lineEndCRLF = false;
        this._prevChar = 0;
        this._controlCharMap = new HashMap();
        boolean bl = false;
        repInfo.setNote("Additional representation information includes the line endings: CR, LF, or CRLF");
        this._nByte = 0L;
        long l = 0L;
        Checksummer checksummer = null;
        if (this._je != null && this._je.getChecksumFlag() && repInfo.getChecksum().size() == 0) {
            checksummer = new Checksummer();
            this._cstream = new ChecksumInputStream(inputStream, checksummer);
            this._dstream = Utf8Module.getBufferedDataStream((InputStream)this._cstream, (int)(this._je != null ? this._je.getBufferSize() : 0));
        } else {
            this._dstream = Utf8Module.getBufferedDataStream((InputStream)inputStream, (int)(this._je != null ? this._je.getBufferSize() : 0));
        }
        this.blockMarker = new Utf8BlockMarker();
        boolean bl2 = false;
        while (!bl2) {
            try {
                boolean bl3 = false;
                object = new int[4];
                int n3 = -1;
                object[0] = Utf8Module.readUnsignedByte((DataInputStream)this._dstream, (ModuleBase)this);
                if (this._nByte < 4L) {
                    bl3 = this.checkMark(object[0], repInfo);
                    if (repInfo.getWellFormed() == 0) {
                        return 0;
                    }
                    if (bl3) {
                        l = 0L;
                    }
                }
                int n4 = 1;
                if (192 <= object[0] && object[0] <= 223) {
                    n4 = 2;
                } else if (224 <= object[0] && object[0] <= 239) {
                    n4 = 3;
                } else if (240 <= object[0] && object[0] <= 247) {
                    n4 = 4;
                } else if (128 <= object[0] && object[0] <= 191 || 248 <= object[0] && object[0] <= 255) {
                    ErrorMessage errorMessage = new ErrorMessage("Not valid first byte of UTF-8 encoding", "Value = " + (char)object[0] + " (0x" + Integer.toHexString(object[0]) + ")", this._nByte);
                    repInfo.setMessage((Message)errorMessage);
                    repInfo.setWellFormed(false);
                    return 0;
                }
                for (n2 = 1; n2 < n4; ++n2) {
                    object[n2] = Utf8Module.readUnsignedByte((DataInputStream)this._dstream, (ModuleBase)this);
                    if (this._nByte < 4L) {
                        bl3 = this.checkMark(object[n2], repInfo);
                    }
                    if (repInfo.getWellFormed() == 0) {
                        return 0;
                    }
                    if (128 <= object[n2] && object[n2] <= 191) continue;
                    ErrorMessage errorMessage = new ErrorMessage("Not valid " + POSITION[n2 - 1] + " byte of UTF-8 endcoding", "Value = " + (char)object[n2] + " (0x" + Integer.toHexString(object[n2]) + ")", this._nByte);
                    repInfo.setMessage((Message)errorMessage);
                    repInfo.setWellFormed(false);
                    return 0;
                }
                if (n4 == 1) {
                    n3 = object[0];
                } else if (n4 == 2) {
                    n3 = ((object[0] & 0x1F) << 6) + (object[1] & 0x3F);
                } else if (n4 == 3) {
                    n3 = ((object[0] & 0xF) << 12) + ((object[1] & 0x3F) << 6) + (object[2] & 0x3F);
                } else if (n4 == 4) {
                    n3 = ((object[0] & 7) << 18) + ((object[1] & 0x3F) << 12) + ((object[2] & 0x3F) << 6) + (object[3] & 0x3F);
                }
                if (!bl3) {
                    this.blockMarker.markBlock(n3);
                }
                if (n3 < 32 && n3 != 13 && n3 != 10) {
                    this._controlCharMap.put(new Integer(n3), controlCharMnemonics[n3]);
                } else if (n3 == 127) {
                    this._controlCharMap.put(new Integer(n3), "DEL (0x7F)");
                }
                if (n3 > 31 && n3 != 127) {
                    bl = true;
                }
                this.checkLineEnd(n3);
                this._prevChar = n3;
                ++l;
            }
            catch (EOFException eOFException) {
                bl2 = true;
            }
        }
        if (checksummer != null) {
            repInfo.setSize(this._cstream.getNBytes());
            repInfo.setChecksum(new Checksum(checksummer.getCRC32(), ChecksumType.CRC32));
            String string = checksummer.getMD5();
            if (string != null) {
                repInfo.setChecksum(new Checksum(string, ChecksumType.MD5));
            }
            if ((string = checksummer.getSHA1()) != null) {
                repInfo.setChecksum(new Checksum(string, ChecksumType.SHA1));
            }
        }
        if (this._nByte == 0L) {
            repInfo.setMessage((Message)new ErrorMessage("Zero-length file"));
            repInfo.setWellFormed(0);
            return 0;
        }
        ArrayList<int[]> arrayList = new ArrayList<int[]>(4);
        repInfo.setProperty(new Property("UTF8Metadata", PropertyType.PROPERTY, PropertyArity.LIST, arrayList));
        object = new Property("Characters", PropertyType.LONG, (Object)new Long(l));
        arrayList.add((int[])object);
        object = this.blockMarker.getBlocksUsedProperty("UnicodeBlocks");
        arrayList.add((int[])object);
        if (this._lineEndCR || this._lineEndLF || this._lineEndCRLF) {
            ArrayList<String> arrayList2 = new ArrayList<String>(3);
            if (this._lineEndCR) {
                arrayList2.add("CR");
            }
            if (this._lineEndLF) {
                arrayList2.add("LF");
            }
            if (this._lineEndCRLF) {
                arrayList2.add("CRLF");
            }
            object = new Property("LineEndings", PropertyType.STRING, PropertyArity.LIST, arrayList2);
            arrayList.add((int[])object);
        }
        if (!this._controlCharMap.isEmpty()) {
            LinkedList<String> linkedList = new LinkedList<String>();
            for (n2 = 0; n2 < 32; ++n2) {
                String string = (String)this._controlCharMap.get(new Integer(n2));
                if (string == null) continue;
                linkedList.add(string);
            }
            String string = (String)this._controlCharMap.get(new Integer(127));
            if (string != null) {
                linkedList.add(string);
            }
            object = new Property("ControlCharacters", PropertyType.STRING, PropertyArity.LIST, linkedList);
            arrayList.add((int[])object);
        }
        if (!bl) {
            repInfo.setMessage((Message)new InfoMessage("No printable characters"));
        }
        return 0;
    }

    public void checkSignatures(File file, InputStream inputStream, RepInfo repInfo) throws IOException {
        repInfo.setFormat(this._format[0]);
        repInfo.setMimeType(this._mimeType[0]);
        repInfo.setModule((Module)this);
        this.initialBytes = new int[4];
        JhoveBase jhoveBase = this.getBase();
        int n = jhoveBase.getSigBytes();
        int n2 = 0;
        this.blockMarker = new Utf8BlockMarker();
        boolean bl = false;
        this._nByte = 0L;
        long l = 0L;
        DataInputStream dataInputStream = new DataInputStream(inputStream);
        while (!bl && n2 < n) {
            boolean bl2 = false;
            int[] nArray = new int[4];
            try {
                nArray[0] = Utf8Module.readUnsignedByte((DataInputStream)dataInputStream, (ModuleBase)this);
                ++n2;
                if (this._nByte < 4L) {
                    bl2 = this.checkMark(nArray[0], repInfo);
                    if (repInfo.getWellFormed() == 0) {
                        return;
                    }
                    if (bl2) {
                        l = 0L;
                    }
                }
                int n3 = 1;
                if (192 <= nArray[0] && nArray[0] <= 223) {
                    n3 = 2;
                } else if (224 <= nArray[0] && nArray[0] <= 239) {
                    n3 = 3;
                } else if (240 <= nArray[0] && nArray[0] <= 247) {
                    n3 = 4;
                } else if (128 <= nArray[0] && nArray[0] <= 191 || 248 <= nArray[0] && nArray[0] <= 255) {
                    repInfo.setWellFormed(false);
                    return;
                }
                for (int i = 1; i < n3; ++i) {
                    nArray[i] = Utf8Module.readUnsignedByte((DataInputStream)dataInputStream, (ModuleBase)this);
                    if (this._nByte < 4L) {
                        bl2 = this.checkMark(nArray[i], repInfo);
                    }
                    if (repInfo.getWellFormed() == 0) {
                        return;
                    }
                    if (128 <= nArray[i] && nArray[i] <= 191) continue;
                    repInfo.setWellFormed(false);
                    return;
                }
            }
            catch (EOFException eOFException) {
                bl = true;
            }
        }
        if (n2 > 0) {
            repInfo.setSigMatch(this._name);
        } else {
            repInfo.setWellFormed(false);
        }
    }

    protected void checkLineEnd(int n) {
        if (n == 10) {
            if (this._prevChar == 13) {
                this._lineEndCRLF = true;
            } else {
                this._lineEndLF = true;
            }
        } else if (this._prevChar == 13) {
            this._lineEndCR = true;
        }
    }

    protected boolean checkMark(int n, RepInfo repInfo) {
        this.initialBytes[(int)this._nByte - 1] = n;
        if (this._nByte == 3L) {
            if (this.initialBytes[0] == 239 && this.initialBytes[1] == 187 && this.initialBytes[2] == 191) {
                InfoMessage infoMessage = new InfoMessage("UTF-8 Byte Order Mark signature is present", 0L);
                repInfo.setMessage((Message)infoMessage);
                this.blockMarker.reset();
                return true;
            }
            if (this.initialBytes[0] == 255 && this.initialBytes[1] == 254) {
                ErrorMessage errorMessage = this.initialBytes[2] == 0 && this.initialBytes[3] == 0 ? new ErrorMessage("UCS-4 little-endian encoding, not UTF-8") : new ErrorMessage("UTF-16 little-endian encoding, not UTF-8");
                repInfo.setMessage((Message)errorMessage);
                repInfo.setWellFormed(false);
                return false;
            }
            if (this.initialBytes[0] == 254 && this.initialBytes[1] == 255) {
                ErrorMessage errorMessage = new ErrorMessage("UTF-16 big-endian encoding, not UTF-8");
                repInfo.setMessage((Message)errorMessage);
                repInfo.setWellFormed(false);
                return false;
            }
        }
        return false;
    }
}

