/* Copyright (C) 2018 TeselaGen Biotechnology, Inc. */
import { SHA256, enc } from "crypto-js";
import {
  pick,
  range,
  cloneDeep,
  get,
  mapKeys,
  map,
  assign,
  lowerCase
} from "lodash";
import {
  anyToJson,
  jsonToGenbank,
  jsonToFasta,
  jsonToJsonString
} from "@teselagen/bio-parsers";
import { basename } from "path";
import Promise from "bluebird";
import {
  getFeatureTypes,
  getReverseComplementSequenceString,
  tidyUpSequenceData,
  deleteSequenceDataAtRange,
  filterSequenceString
} from "@teselagen/sequence-utils";
import sequenceStringToFragments from "./sequenceStringToFragments";
import {
  allowedCsvFileTypes,
  extractZipFiles,
  getExt,
  isCsvOrExcelFile,
  isExcelFile,
  parseCsvOrExcelFile
} from "../utils/fileUtils";
import getExternalReferenceFieldsAndTags from "../utils/getExternalReferenceFieldsAndTags";
import { isBrowser } from "browser-or-node";
import { showAminoAcidStrippedCharacterWarningDialog } from "./showAminoAcidStrippedCharacterWarningDialog";
import { removeExt } from "../utils/fileUtils";
import { sequenceToVeInput } from "./sequenceToVeInput";
import { parseExcelToCsv } from "../utils/fileUtils";
import processSequenceUploadCSVContent from "./processSequenceUploadCSVContent";
import { isoContext } from "@teselagen/utils";
import addSequenceTypeOrDefault from "./addSequenceTypeOrDefault";
import {
  getDigestPartFields,
  tidyUpDigestPartFields
} from "../../utils/digestPartUtils";
import { flatMap } from "lodash";
import { filterFilesInZip } from "@teselagen/file-utils";
import getEnvIso from "../utils/getEnvIso";
export { sequenceStringToFragments, sequenceToVeInput };
const molecularWeightDictionary = {
  A: 71.0788,
  R: 156.1875,
  N: 114.1038,
  D: 115.0886,
  C: 103.1388,
  E: 129.1155,
  Q: 128.1307,
  G: 57.0519,
  H: 137.1411,
  I: 113.1594,
  L: 113.1594,
  K: 128.1741,
  M: 131.1926,
  F: 147.1766,
  P: 97.1167,
  S: 87.0782,
  T: 101.1051,
  W: 186.2132,
  Y: 163.176,
  V: 99.1326,
  U: 150.0388,
  O: 237.3018
};

const sharedFileTypesAndDescriptions = ({
  isProtein,
  isRNA,
  isOligo,
  isDNA,
  isMaterial
} = {}) => [
  {
    type: "zip",
    description: "Any of the following types, just compressed",
    exampleFile: isProtein
      ? "exampleProtein.zip"
      : isRNA
        ? "exampleRna.zip"
        : "exampleGenbank.zip"
  },
  {
    type: "json",
    description: "TeselaGen JSON Format",
    exampleFile: isProtein
      ? "exampleTgJson_protein.json"
      : isRNA
        ? "exampleTgJson_rna.json"
        : "exampleTgJson.json"
  },
  {
    type: ["fasta", "fas", "fa", "fna", "ffn", "txt"],
    description: "Fasta Format",
    exampleFile: isProtein ? "proteinFasta.fasta" : "pj5_0001.fasta"
  },
  {
    type: allowedCsvFileTypes.map(t => t.replace(".", "")),
    description: "TeselaGen CSV Format",
    extendedPropTypes: [isProtein ? "amino acid sequence" : "sequence"],
    validateAgainstSchema: {
      name: isMaterial ? "materialUploadExample" : "sequenceUploadExample",
      requireAllOrNone: [["Feature", "FeatureType"]],
      // description: `This upload accepts custom fields for the DNA sequences. Add a custom field name to this header (ex. 'ext-sequence-Date of Purchase') and values to the rows. To add additional sequence custom fields just duplicate the column header.`,
      fields: [
        {
          isRequired: true,
          path: "Name",
          description: "The name of the sequence",
          example: "pj5_0001",
          isUnique: true
        },
        {
          isRequired: true,
          path: "Sequence",
          isUnique: true,
          description: `The ${
            isProtein ? "AA" : isRNA ? "rna" : "dna"
          } sequence of the ${isMaterial ? "material" : "sequence"}`,
          example: isProtein
            ? "MGLVDFPIALFHAAPSLVQETTPEEKARGVCVVLGAHNITGRIQVSDRDWLTVGSTLPGVYVARVEKGVVTGDVVIADPNRGTIKVKGAVEEYL"
            : "ATGCATTGAGGACCTAACCATATCTAA",
          format: (s, { name } = {}) => {
            if (!s) return;
            const [f] = filterSequenceString(s, {
              name,
              isRNA,
              isProtein
            });
            // if (f !== s) {
            //   window.toastr.warning(
            //     `Stripped invalid characters from sequence ${name || ""}`,
            //     { timeout: window.Cypress ? 3000 : 60000 }
            //   );
            // }
            return f;
          }
        },
        {
          path: "Aliases",
          description: `A comma-separated list of alternative names or identifiers for the ${
            isMaterial ? "material" : "sequence"
          }`,
          example: "alias 1, alias 2"
        },
        {
          path: "Tags",
          description:
            "A comma-separated list of keywords or labels that describe the sequence. These should match existing tags",
          example: "tag1, tag2"
        },
        {
          path: "Description",
          description: "A short description or summary of the sequence",
          example: "Short example sequence to show off CSV import"
        },
        ...(isOligo
          ? []
          : isDNA
            ? [
                {
                  displayName: "Circular/Linear",
                  path: "Circular/Linear",
                  isRequired: true,
                  type: "dropdown",
                  values: ["Circular", "Linear"],
                  example: "Circular"
                }
              ]
            : isProtein
              ? [
                  {
                    path: "designId",
                    description:
                      "An identifier for the design of the protein sequence",
                    example: "1234"
                  },
                  {
                    path: "pdbId",
                    description:
                      "The Protein Data Bank (PDB) ID for the protein structure, if available",
                    example: "6GCT"
                  },
                  {
                    path: "uniprotId",
                    description:
                      "The UniProt ID for the protein sequence, if available",
                    example: "P12345"
                  },
                  {
                    path: "hisTagLoc",
                    displayName: "His Tag Location",
                    description:
                      "The location of the histidine tag, if present in the protein sequence",
                    example: "N-terminal"
                  },
                  {
                    path: "isoPoint",
                    displayName: "AA PI (Isoelectric Point)",
                    description:
                      "The isoelectric point of the amino acid sequence",
                    example: "6.8",
                    type: "number"
                  },
                  {
                    path: "cofactor",
                    description:
                      "Any cofactors or prosthetic groups bound to the protein, if applicable",
                    example: "heme"
                  }
                ]
              : []),
        {
          path: "Part",
          description:
            "A name for a part that spans the entire sequence (optional)",
          example: "Example Part"
        },
        {
          path: "PartTags",
          example: "tag1, tag2",
          description:
            "A comma-separated list of keywords or labels that describe the part of the sequence specified in the Part field (optional)"
        },
        {
          path: "Feature",
          description:
            "A name for a feature that spans the entire sequence (optional)",
          example: "Example Feature"
        },
        {
          path: "FeatureType",
          description: "The type or category of the feature (optional)",
          type: "dropdown",
          values: getFeatureTypes(),
          example: "CDS"
        }
      ]
    }
  },

  ...(isProtein
    ? [
        {
          type: ["gp", "genpep", "txt"],
          description: "Genbank Format",
          exampleFile: "protein_example.gp"
        }
      ]
    : [
        ...(isRNA
          ? []
          : [
              {
                type: "geneious",
                description: "Geneious Format",
                exampleFile: "pAN7-1.geneious"
              }
            ]),
        {
          type: "ab1",
          description: "Sequence Trace Format",
          exampleFile: "ab1_trace_example.ab1"
        },

        {
          type: "dna",
          description: "SnapGene DNA Format",
          exampleFile: isRNA
            ? "snapgene_example_linear.dna"
            : "snapgene_example.dna"
        },
        {
          type: ["gb", "gbk", "txt"],
          description: "Genbank Format",
          exampleFile: "pj5_0001.gb"
        },
        {
          type: ["xml", "rdf"],
          description: "SBOL XML Format",
          exampleFile: "sbolExample.xml"
        }
      ])
];

export const nonDNASeqTypes = [
  "GENOME",
  "GENOMIC_REGION",
  "ALIGNMENT_SEQ",
  "RNA"
];

export const dnaFileTypesAndDescriptions = sharedFileTypesAndDescriptions({
  isDNA: true
});
export const dnaMaterialFileTypesAndDescriptions =
  sharedFileTypesAndDescriptions({
    isDNA: true,
    isMaterial: true
  });
export const rnaFileTypesAndDescriptions = sharedFileTypesAndDescriptions({
  isRNA: true
});
export const oligoFileTypesAndDescriptions = sharedFileTypesAndDescriptions({
  isOligo: true
});
export const rnaMaterialFileTypesAndDescriptions =
  sharedFileTypesAndDescriptions({
    isRNA: true,
    isMaterial: true
  });
export const aminoAcidFileTypesAndDescriptions = sharedFileTypesAndDescriptions(
  { isProtein: true }
);

export const guideRNAFileTypesAndDescriptions = [
  {
    type: ["csv", "xlsx"],
    description: "Teselagen gRNA CSV Format",
    exampleFile: "gRNA_example.csv",
    extendedPropTypes: ["sequence"],
    validateAgainstSchema: {
      fields: [
        {
          path: "Name",
          isUnique: true,
          description: "Name of the sequence",
          example: "example sequence 1",
          isRequired: true
        },
        {
          path: "Spacer Sequence",
          description: "Spacer sequence",
          example: "acgcaugcuagcaugcuagc",
          isRequired: true
        },
        {
          path: "Scaffold Sequence",
          description: "Scaffold sequence",
          example: "ugaucgaucgaugcuagc",
          isRequired: true
        },
        {
          path: "Aliases",
          description: "Aliases for the sequence",
          example: "alias1, alias2"
        },
        {
          path: "Tags",
          description: "Tags for the sequence",
          example: "tag1, tag2"
        },
        {
          path: "Description",
          description: "Description of the sequence",
          example: "Short example to show off CSV import"
        }
      ]
    }
  }
];

export const sequenceFileTypes = flatMap(
  dnaFileTypesAndDescriptions,
  t => t.type
);
export const rnaFileTypes = flatMap(rnaFileTypesAndDescriptions, t => t.type);

export const aminoAcidSequenceFileTypes = flatMap(
  aminoAcidFileTypesAndDescriptions,
  t => t.type
);

// Create a single sequence segment with multiple sequence fragments
// to encompass a sequence string.
export const sequenceStringToSegment = (sequenceStr /* fragLength=4000 */) => ({
  strand: 1,
  start: 0, // Both start and end or 0-based and inclusive.
  end: sequenceStr.length - 1
});

export function computeSequenceHash(
  sequence,
  sequenceTypeCode = "",
  joinChar = ""
) {
  if (!sequenceTypeCode) {
    throw new Error("sequenceTypeCode is required to compute sequence hash.");
  }
  if (!sequence) return;
  let seq = sequence;
  if (Array.isArray(sequence)) {
    seq = sequence.join(joinChar);
  }
  seq = seq.toLowerCase();
  const prefix = hashPrefix(sequenceTypeCode);
  if (prefix.charAt(1) === "C") seq = leastRotation(seq); // Circular Sequence
  return prefix + enc.Hex.stringify(SHA256(seq));
}

function hashPrefix(sequenceTypeCode) {
  /*

  Please refer to 'hash_sequence.sql.js' file for the true prefix map.
  sequence Type code  | hashPrefix
  ----------------+----------
  AA                 | ALS
  CDS                | NLD
  CIRCULAR_DNA       | NCD
  LINEAR_DNA         | NLD
  RNA                | NLS
  OLIGO              | NLS
  GENOME             | NLD
  GENOMIC_REGION     | NLD
  ALIGNMENT_SEQ      | NLD
  */
  const prefix = {
    AA: "ALS",
    CDS: "NLD",
    CIRCULAR_DNA: "NCD",
    LINEAR_DNA: "NLD",
    RNA: "NLS",
    OLIGO: "NLS",
    GENOME: "NLD",
    GENOMIC_REGION: "NLD",
    ALIGNMENT_SEQ: "NLD",
    REGISTERED_ANN: "H2P"
  };
  return prefix[sequenceTypeCode] || "H2P";
}

// Should only be called on circular sequence!
// Booth's algorithm:
// https://en.wikipedia.org/wiki/Lexicographically_minimal_string_rotation#Booth's_Algorithm
function leastRotation(sequence) {
  const seq_length = sequence.length;
  const f = Array(2 * seq_length).fill(-1); //Failure function
  let k = 0;
  for (let j = 1; j < 2 * seq_length; j++) {
    let i = f[j - k - 1];
    const sj = sequence[j % seq_length];
    while (i !== -1 && sj !== sequence[(k + i + 1) % seq_length]) {
      if (sj < sequence[(k + i + 1) % seq_length]) {
        k = j - i - 1;
      }
      i = f[i];
    }

    if (i === -1 && sj !== sequence[(k + i + 1) % seq_length]) {
      if (sj < sequence[k % seq_length]) {
        // k + i + 1 = k
        k = j;
      }
      f[j - k] = -1;
    } else {
      f[j - k] = i + 1;
    }
  }
  return sequence.substring(k) + sequence.substring(0, k);
}

export const stripLastStopCodon = aaSequenceString => {
  let newAASequence = aaSequenceString;
  if (aaSequenceString.slice(aaSequenceString.length - 1) === "*") {
    newAASequence = aaSequenceString.slice(0, -1);
  }
  return newAASequence;
};

export const sequenceJSONtoGraphQLInput = (sequenceJSON, options = {}) => {
  // This might be called with partial sequence data for updating
  const { polynucleotideMaterialTypeCode, name, keepIds } = options;
  const polynucleotideMaterialTypeCodeToSequenceTypeCode = {
    LINEAR_DNA: "LINEAR_DNA",
    OLIGO: "OLIGO",
    PLASMID: "CIRCULAR_DNA",
    RNA: "RNA"
  };
  let sequenceTypeCode;
  if (sequenceJSON.sequenceTypeCode) {
    sequenceTypeCode = sequenceJSON.sequenceTypeCode;
  } else if (polynucleotideMaterialTypeCode) {
    sequenceTypeCode =
      polynucleotideMaterialTypeCodeToSequenceTypeCode[
        polynucleotideMaterialTypeCode
      ];
  } else if (sequenceJSON.circular) {
    sequenceTypeCode = "CIRCULAR_DNA";
  } else {
    sequenceTypeCode = "LINEAR_DNA";
  }
  sequenceJSON = tidyUpSequenceData(sequenceJSON, {
    doNotProvideIdsForAnnotations: true
  });

  const sequenceFeatures = sequenceJSON.features || [];
  sequenceJSON.sequence = (sequenceJSON.sequence || "").replace(/\s+/g, "");
  if (sequenceTypeCode === "RNA") {
    sequenceJSON.sequence = (sequenceJSON.sequence || "").replace(/t/gi, "u");
  }
  const size = get(sequenceJSON, "sequence.length") || sequenceJSON.size;
  const seqName = name || sequenceJSON.name;
  // limit to 100,000 bps unless genome
  if (sequenceTypeCode !== "GENOME") {
    const maxLen = getMaxSeqLen();
    if (size > maxLen) {
      throw new Error(
        `Sequence ${seqName} is greater than ${maxLen} base pairs and can not be uploaded to this system.`
      );
    }
  }
  const hash = computeSequenceHash(sequenceJSON.sequence, sequenceTypeCode);
  const toRet = {
    ...(keepIds && { id: sequenceJSON.id }),
    name: seqName,
    isCds: sequenceJSON.isCds,
    ...getExternalReferenceFieldsAndTags(sequenceJSON),
    chromatogramData: sequenceJSON.chromatogramData,
    circular:
      sequenceJSON.circular === false
        ? false
        : sequenceJSON.circular || sequenceTypeCode === "CIRCULAR_DNA",
    sequenceTypeCode,
    parts:
      sequenceJSON.parts &&
      map(sequenceJSON.parts, part => {
        part.notes = part.notes || {};
        part.notes["tag"] = part.notes["tag"] || [];
        return {
          ...(keepIds && { id: part.id }),
          taggedItems: part.notes["tag"].map(tn => ({
            tag: { name: tn }
          })),
          start: part.start,
          end: part.end,
          name: part.name,
          strand: part.strand,
          ...(part.isDigestPart && { ...getDigestPartFields(part) })
        };
      }),
    // externalRecordIdentifiers: sequenceJSON.externalRecordIdentifiers,
    ...(sequenceJSON.sequence && {
      size,
      hash,
      sequenceSegments: [sequenceStringToSegment(sequenceJSON.sequence)],
      sequenceFragments: sequenceStringToFragments(sequenceJSON.sequence)
    }),
    tags: sequenceJSON.tags,
    sequenceFeatures: sequenceFeatures.map(feature =>
      pick(feature, [
        ...(keepIds ? ["id"] : []),
        "start",
        "end",
        "name",
        "strand",
        "arrowheadType",
        "type",
        "locations",
        "notes"
      ])
    ),
    description: sequenceJSON.description || ""
  };
  return toRet;
};

export const aaSequenceJSONtoGraphQLInput = (
  aaSequenceJSON = {},
  options = {}
) => {
  const { strippedAATracker, keepIds } = options;
  const passedBps =
    aaSequenceJSON.proteinSequence || aaSequenceJSON.sequence || "";
  const [aminoAcidSequence] = filterSequenceString(passedBps, {
    name: aaSequenceJSON.name,
    isProtein: true
  });
  const feats =
    aaSequenceJSON.features &&
    aaSequenceJSON.features.map(ann =>
      pick(ann, [
        ...(keepIds ? ["id"] : []),
        "start",
        "end",
        "name",
        "type",
        "locations"
      ])
    );
  const gqlInput = {
    ...(keepIds && { id: aaSequenceJSON.id }),
    ...getExternalReferenceFieldsAndTags(aaSequenceJSON),
    name: aaSequenceJSON.name,
    size: aminoAcidSequence.length,
    hash: computeSequenceHash(aminoAcidSequence, "AA"),
    description: aaSequenceJSON.description || "",
    ...(!!aminoAcidSequence.length &&
      calculateMolecularWeightAndExtinctionCoefficient(aminoAcidSequence)),
    regionAnnotations: feats,
    proteinSequence: aminoAcidSequence,
    aminoAcidParts:
      aaSequenceJSON.parts &&
      aaSequenceJSON.parts.map(ann =>
        pick(ann, [...(keepIds ? ["id"] : []), "start", "end", "name", "type"])
      )
  };
  if (strippedAATracker) {
    if (passedBps.length > aminoAcidSequence.length) {
      strippedAATracker.push(gqlInput);
    }
  }
  return gqlInput;
};

export const calculateMolecularWeightAndExtinctionCoefficient = (
  sequence = ""
) => {
  let molecularWeight = 18.1524,
    numTyr = 0,
    numTrp = 0;
  for (const char of sequence.toUpperCase()) {
    molecularWeight += molecularWeightDictionary[char] || 0;
    if (char === "Y") numTyr++;
    if (char === "W") numTrp++;
  }
  return {
    molecularWeight,
    extinctionCoefficient: numTyr * 1490 + numTrp * 5500
  };
};

/**
 * This function will be used in both the server and client
 * @param {*} sequence
 * @param {*} sequenceTypeCode
 * @returns
 */
export const calculateDNAMolecularWeight = (
  sequence = "",
  sequenceTypeCode
) => {
  let numA = 0;
  let numT = 0;
  let numG = 0;
  let numC = 0;
  let numU = 0;

  const doubleStrandedTypeCodes = ["CIRCULAR_DNA", "LINEAR_DNA", "CDS"];

  for (const char of sequence.toUpperCase()) {
    if (char === "A") numA++;
    if (char === "T") numT++;
    if (char === "G") numG++;
    if (char === "C") numC++;
    if (char === "U") numU++;
  }
  let molecularWeight =
    numA * 313.21 + numU * 306.2 + numT * 304.2 + numG * 329.21 + numC * 289.18;
  if (doubleStrandedTypeCodes.includes(sequenceTypeCode)) {
    molecularWeight +=
      numA * 304.2 + numT * 313.21 + numG * 289.18 + numC * 329.21;
  } else if (sequenceTypeCode === "OLIGO") {
    molecularWeight -= 61.96;
  }
  return molecularWeight;
};

export const oveSeqDataToGraphQL = (seqData, opts) => {
  if (!seqData) return;
  const isProtein = seqData.isProtein;
  const jsonToGraphql = isProtein
    ? aaSequenceJSONtoGraphQLInput
    : sequenceJSONtoGraphQLInput;
  // aaSequenceJSONtoGraphQLInput() tnw - here for ease of finding
  // sequenceJSONtoGraphQLInput() tnw - here for ease of finding
  return jsonToGraphql(seqData, opts);
};

/**
 * Parses sequence files and creates graphql input sequences
 * @param {array} sequenceFiles
 * @param {boolean} options.isProtein - whether they are amino acid sequences
 */
export const parseSequenceFiles = async (
  sequenceFiles,
  options = {},
  ctx = isoContext
) => {
  const {
    isProtein,
    isOligo,
    useFilenameAsSequenceName,
    isRNA,
    sequenceTypeCode,
    isGenbankFile
  } = options;
  try {
    const jsonToGraphql = isProtein
      ? aaSequenceJSONtoGraphQLInput
      : sequenceJSONtoGraphQLInput;
    const parsedSequences = [];
    parsedSequences.filenames = [];
    parsedSequences.messages = [];
    const strippedAAs = [];
    await Promise.map(sequenceFiles, async ({ originFileObj, ...rest }) => {
      let filename = rest.name || originFileObj.name;
      filename = filename && basename(filename);
      const sequenceResults = await anyToJson(originFileObj || rest, {
        acceptParts: true,
        isProtein: isProtein,
        isRNA,
        isOligo,
        primersAsFeatures: true,
        fileName: filename
      });
      let fileNameNoExt, lowerFilename;
      if (filename) {
        fileNameNoExt = removeExt(filename);
        lowerFilename = filename.toLowerCase();
      }

      /**
       * Sequence data results from files may come with digest parts.
       * In case of GenBank files, these will be in the part annotation notes,
       * For TG JSON files, these will be in the part annotation fields.
       *
       * In either case, the 5' and 3' restriction enzyme will come in the form of
       * the enzyme name and/or recognition sequence pattern. We need to find the TG ID based
       * on that and add it to the part annotation.
       */
      for (const sequenceResult of sequenceResults) {
        if (sequenceResult.parsedSequence.parts) {
          for (const part of sequenceResult.parsedSequence.parts) {
            await tidyUpDigestPartFields(
              part,
              { digestInfoInNotes: isGenbankFile },
              ctx
            );
          }
        }
      }

      sequenceResults.forEach(result => {
        result.name =
          useFilenameAsSequenceName && filename
            ? filename.replace(/\.[^/.]+$/, "")
            : result.name;

        if (!result.success)
          throw new Error(
            `Sequence parsing failed for ${filename} - ${(
              result.messages || []
            ).join(" ")}`
          );

        if (isOligo) {
          result.parsedSequence.sequenceTypeCode = "OLIGO";
          if (result.parsedSequence.circular) {
            // eslint-disable-next-line no-throw-literal
            throw {
              mismatchError: `Sequence ${result.parsedSequence.name} cannot be an oligo because the sequence file is circular.`
            };
          }
        } else if (isRNA) {
          result.parsedSequence.sequenceTypeCode = "RNA";
          if (result.parsedSequence.circular) {
            // eslint-disable-next-line no-throw-literal
            throw {
              mismatchError: `Sequence ${result.parsedSequence.name} cannot be RNA because the sequence file is circular.`
            };
          }
        } else if (sequenceTypeCode) {
          result.parsedSequence.sequenceTypeCode = sequenceTypeCode;
          if (
            result.parsedSequence.circular &&
            sequenceTypeCode !== "CIRCULAR_DNA"
          ) {
            // eslint-disable-next-line no-throw-literal
            throw {
              mismatchError: `Sequence ${result.parsedSequence.name} cannot be ${sequenceTypeCode} because the sequence file is circular.`
            };
          }
        } else {
          addSequenceTypeOrDefault(result.parsedSequence);
        }
        const sequenceInput = jsonToGraphql(result.parsedSequence, {
          strippedAATracker: strippedAAs
        });
        if (result.messages && result.messages.length) {
          //tnr: this is confusing.. It's unclear whether sequences.messages is an
          // array of arrays or just an array of strings.. I'm opting for an array of strings
          //OLD CODE:
          // if (!parsedSequences.messages[i]) parsedSequences.messages[i] = [];
          // parsedSequences.messages[i].push(...result.messages);
          //NEW CODE:
          parsedSequences.messages.concat(result.messages);
        }
        // the filename is needed for some plate uploads which have the sequence filename as a column in the csv
        parsedSequences.filenames.push(filename);
        // helper maps for going from sequence name or filename to sequence
        parsedSequences.sequenceNameMap = parsedSequences.sequenceNameMap || {};
        parsedSequences.sequenceFileMap = parsedSequences.sequenceFileMap || {};
        const sm = parsedSequences.sequenceNameMap;
        const fm = parsedSequences.sequenceFileMap;
        const lowerSeqName = (sequenceInput.name || "").toLowerCase();
        if (lowerSeqName) {
          sm[lowerSeqName] = sm[lowerSeqName] || [];
          sm[lowerSeqName].push(sequenceInput);
        }
        if (lowerFilename) {
          fm[lowerFilename] = fm[lowerFilename] || [];
          fm[lowerFilename].push(sequenceInput);
        }
        // add an alias for filename if it isn't the same as the name
        // (the locus only supports 16 characters so this might be a common occurrence)
        // (only do this if a single sequence is in the file)
        if (
          sequenceResults.length === 1 &&
          fileNameNoExt &&
          fileNameNoExt !== sequenceInput.name
        ) {
          sequenceInput.aliases = sequenceInput.aliases || [];
          sequenceInput.aliases.push({
            name: fileNameNoExt
          });
        }
        parsedSequences.push(sequenceInput);
      });
    });
    if (strippedAAs.length && isBrowser) {
      const continueUpload =
        await showAminoAcidStrippedCharacterWarningDialog(strippedAAs);
      if (!continueUpload) {
        throw new Error("Import Cancelled");
      }
    }
    if (parsedSequences.sequenceFileMap) {
      // store with and without extension so that we can match on both
      Object.keys(parsedSequences.sequenceFileMap).forEach(key => {
        const noExt = removeExt(key);
        if (noExt !== key) {
          parsedSequences.sequenceFileMap[noExt] =
            parsedSequences.sequenceFileMap[key];
        }
      });
    }
    return parsedSequences;
  } catch (error) {
    console.error(`2135134513456 error:`, error);
    throw error.mismatchError ? new Error(error.mismatchError) : error;
  }
};

export function getParsedSequenceMatch(
  parsedSequences = [],
  sequenceName = ""
) {
  const lowerSeqName = sequenceName.toLowerCase();
  const nm = parsedSequences.sequenceNameMap;
  const fm = parsedSequences.sequenceFileMap;
  const byName = nm && nm[lowerSeqName] && nm[lowerSeqName][0];
  const byFile = fm && fm[lowerSeqName] && fm[lowerSeqName][0];
  return byName || byFile;
}

/**
 * parses sequence text into graphql input. From a single string input it can return multiple sequences (when passing fasta or gembank)
 * @param {object | string} sequenceText - The text of the sequence (or many sequences in case of genbank or fasta)
 * @param {string=} sequenceName - acts as an override for all sequence names when passed
 * @param {boolean=} options.isProtein - whether it is an amino acid
 * @param {boolean=} options.defaultToCircular
 * @param {string=} options.sequenceTypeCode
 */
export const parseSequenceText = async (
  sequenceText,
  sequenceName = undefined,
  options = {}
) => {
  const { isProtein, defaultToCircular, sequenceTypeCode } = options;
  const toReturn = [];
  const isOligo = sequenceTypeCode === "OLIGO";
  const isRNA = sequenceTypeCode === "RNA";
  toReturn.messages = [];
  const jsonToGraphql = isProtein
    ? aaSequenceJSONtoGraphQLInput
    : sequenceJSONtoGraphQLInput;

  // aaSequenceJSONtoGraphQLInput() tnw - here for ease of finding
  // sequenceJSONtoGraphQLInput() tnw - here for ease of finding
  const addResult = result => {
    // loop through each parsing result
    let sequenceResult = result.parsedSequence;

    // If a sequenceName is provided, it will override any existing previous sequence names
    if (sequenceName) {
      sequenceResult = { ...sequenceResult, name: sequenceName };
    }
    if (isOligo && sequenceResult.circular) {
      // eslint-disable-next-line no-throw-literal
      throw {
        mismatchError: `Sequence ${result.parsedSequence.name} cannot be an oligo because the pasted sequence file is circular.`
      };
    }
    if (isRNA && sequenceResult.circular) {
      // eslint-disable-next-line no-throw-literal
      throw {
        mismatchError: `Sequence ${result.parsedSequence.name} cannot be a RNA because the pasted sequence file is circular.`
      };
    }
    if (!isProtein) {
      addSequenceTypeOrDefault(sequenceResult, sequenceTypeCode);
    }
    const sequenceInput = jsonToGraphql(sequenceResult);
    toReturn.messages = toReturn.messages.concat(result.messages);
    toReturn.push(sequenceInput);
  };

  // sequenceText can either be a string (genbank, fasta, single sequence string) or a tesleagen json like object
  try {
    const results =
      sequenceText && sequenceText.sequence //just return the teselagen json
        ? [{ parsedSequence: tidyUpSequenceData(sequenceText) }]
        : await anyToJson(sequenceText, {
            parseFastaAsCircular: defaultToCircular,
            acceptParts: true,
            isProtein: isProtein,
            isOligo,
            isRNA,
            primersAsFeatures: true
          });

    results.forEach(addResult);
  } catch (error) {
    console.error("err:", error);
    throw new Error(error.mismatchError || error || "Error parsing sequence");
  }
  return toReturn;
};

export async function filterSequenceUploads(
  {
    allSequenceFiles: _allSequenceFiles = [],
    isGenomicRegionUpload,
    isGuideRNA,
    isMaterial, //tnw: if true we've got a CSV file coming in with both material and sequence fields and will need to parse both
    sequences = [],
    isProtein,
    isRNA: _isRNA,
    sequenceTypeCode,
    warnings = []
  },
  ctx = isoContext
) {
  const isRNA = _isRNA || sequenceTypeCode === "RNA";
  const allSequenceFiles = await filterFilesInZip(
    _allSequenceFiles,
    isProtein
      ? aminoAcidSequenceFileTypes
      : isRNA
        ? rnaFileTypes
        : sequenceFileTypes
  );
  const sequenceFiles = [];
  const genbankFiles = [];
  const csvAndExcelFiles = [];

  allSequenceFiles.forEach(file => {
    const ext = file.name.split(".").pop();
    if (isCsvOrExcelFile(file)) {
      csvAndExcelFiles.push(file);
    } else if (ext === "gb" || ext === "gbk") {
      genbankFiles.push(file);
    } else {
      sequenceFiles.push(file);
    }
  });

  if (
    _allSequenceFiles.length &&
    !csvAndExcelFiles.length &&
    !sequenceFiles.length &&
    !genbankFiles.length
  ) {
    throw new Error("No sequence files found.");
  }

  const invalidFiles = [];
  const addError = (parsedSequences, files) => {
    if (parsedSequences.messages) {
      parsedSequences.messages.forEach((messageArray, i) => {
        if (
          messageArray.some(message => message.includes("Illegal character"))
        ) {
          invalidFiles.push(files[i].name);
        }
      });
    }
  };
  const isOligo = sequenceTypeCode === "OLIGO";
  if (sequenceFiles.length) {
    const parsedSequences = await parseSequenceFiles(sequenceFiles, {
      isProtein,
      isOligo,
      isRNA
    });

    addError(parsedSequences, sequenceFiles);
    sequences = sequences.concat(parsedSequences);
  }

  if (genbankFiles.length) {
    const parsedGenbankSequences = await parseSequenceFiles(
      genbankFiles,
      {
        isOligo,
        isRNA,
        isProtein,
        isGenbankFile: true
      },
      ctx
    );
    addError(parsedGenbankSequences, genbankFiles);
    const invalidGenbanks = [];
    parsedGenbankSequences.forEach(gbSeq => {
      if (sequenceTypeCode && gbSeq.sequenceTypeCode !== sequenceTypeCode) {
        invalidGenbanks.push(gbSeq.name);
      }
    });
    if (invalidGenbanks.length) {
      if (isGenomicRegionUpload) {
        throw new Error(
          `Genomic region GenBanks must be linear. The following have invalid sequences: ${invalidGenbanks.join(
            ", "
          )}`
        );
      } else {
        throw new Error(
          `The sequences in the following GenBanks are not ${readableSequenceType(
            sequenceTypeCode
          )}: ${invalidGenbanks.join(", ")}`
        );
      }
    }
    sequences = sequences.concat(parsedGenbankSequences);
  }

  if (csvAndExcelFiles.length) {
    for (const file of csvAndExcelFiles) {
      let csv;
      if (isExcelFile(file)) {
        csv = await parseExcelToCsv(file.originFileObj);
      } else {
        csv = file;
      }
      const invalidLines = [];
      const csvSeqs = await processSequenceUploadCSVContent({
        apolloMethods: ctx,
        isMaterial,
        file: csv,
        filename: file.name,
        invalidLines,
        warnings,
        isProtein,
        sequenceTypeCode,
        isGuideRNA
      });

      sequences = sequences.concat(csvSeqs);
    }
  }
  if (invalidFiles.length) {
    throw new Error(
      `These files have invalid sequence characters: ${invalidFiles.join(
        ", "
      )}.`
    );
  }

  return sequences;
}

export function readableSequenceType(typeCode) {
  return lowerCase(typeCode)
    .replace("dna", "DNA")
    .replace("cds", "CDS")
    .replace("rna", "RNA");
}

export const chunkSequence = (sequence = "", fragSize = 4000) =>
  range(Math.ceil(sequence.length / fragSize)).map(i =>
    sequence.substr(fragSize * i, fragSize)
  );

export const chunkSequenceToFragments = (sequence, fragSize) =>
  chunkSequence(sequence, fragSize).map((fragment, index) => ({
    fragment,
    index
  }));

export const getSequenceOfFeature = (feature, seqStr) => {
  let featSeq =
    feature.start <= feature.end
      ? seqStr.slice(feature.start, feature.end + 1)
      : seqStr.slice(feature.start) + seqStr.slice(0, feature.end + 1);
  if (feature.strand === -1)
    featSeq = getReverseComplementSequenceString(featSeq);
  return featSeq;
};

export const getSequenceOfPart = (part, seqStr) => {
  return getSequenceOfFeature(
    {
      start: part.start,
      end: part.end,
      strand: part.strand
    },
    seqStr
  );
};

export const getSequenceOfElement = element => {
  let seqStr;
  if (!element.part) seqStr = element.regex;
  else {
    const {
      part,
      part: { sequence }
    } = element;
    const seq = getSequenceSequence(sequence);
    if (!seq) {
      console.info("Sequence not found for element", element);
      return null;
    }
    if (part.start <= part.end) seqStr = seq.slice(part.start, part.end + 1);
    else seqStr = seq.slice(part.start) + seq.slice(0, part.end + 1);
  }

  if (element.direction === "reverse" || get(element, "part.strand") === -1)
    seqStr = getReverseComplementSequenceString(seqStr);
  return {
    sequence: seqStr,
    name: element.name,
    id: element.id
  };
};

export const getSequenceSequence = sequence => {
  const mappedSequence = cloneDeep(sequence);
  if (!mappedSequence) return null;
  if (mappedSequence.sequence) return mappedSequence.sequence.replace(/,/g, "");
  else if (mappedSequence.sequenceFragments) {
    return mappedSequence.sequenceFragments
      .sort((a, b) => a.index - b.index)
      .map(({ fragment }) => fragment)
      .join("")
      .replace(/,/g, "");
  }
  return null;
};

export const stringHasOnlyDigits = value => {
  return /^-?\d+$/.test(value);
};

export const addHashPropToSeqs = sequences => {
  for (const seq of sequences) {
    addHashPropToSeq(seq);
  }
  return sequences;
};

export const addHashPropToSeq = seq => {
  if (!seq.hash) {
    let rawSeq;
    if (typeof seq.fullSequence === "string") {
      rawSeq = seq.fullSequence;
    } else if (typeof seq.sequence === "string") {
      rawSeq = seq.sequence;
    } else {
      const orderedFragments = mapKeys(
        seq.sequenceFragments,
        ({ index }) => index
      );
      rawSeq = "";
      for (const index in orderedFragments) {
        rawSeq += orderedFragments[index].fragment;
      }
    }
    seq.hash = computeSequenceHash(rawSeq, seq.sequenceTypeCode);
  }
};

export async function parseCsvAndSequenceFiles(
  allFilesMaybeZipped = [],
  { allowMultipleCsvFiles } = {}
) {
  const allFiles = await extractZipFiles(allFilesMaybeZipped);
  const csvFile = allFiles.find(isCsvOrExcelFile);
  const csvFiles = allFiles.filter(isCsvOrExcelFile);
  if (!csvFile) {
    throw new Error("No CSV file found.");
  }
  const nonCsvFiles = allFiles.filter(file => !isCsvOrExcelFile(file));
  let parsedCsv,
    parsedCsvs,
    allParsedSequences = [],
    allParsedSequencesFilenames = [];
  try {
    if (allowMultipleCsvFiles) {
      parsedCsvs = await Promise.all(csvFiles.map(parseCsvOrExcelFile));
    } else {
      parsedCsv = await parseCsvOrExcelFile(csvFile);
    }
  } catch (error) {
    console.error("error:", error);
    throw new Error("Error parsing CSV file.");
  }
  const sequenceFiles = nonCsvFiles
    ? nonCsvFiles.filter(f => sequenceFileTypes.includes(getExt(f)))
    : [];
  if (sequenceFiles.length) {
    allParsedSequences = await parseSequenceFiles(sequenceFiles);
    allParsedSequencesFilenames = allParsedSequences.filenames;
  }
  return {
    parsedCsv,
    parsedCsvs,
    allParsedSequences,
    allParsedSequencesFilenames
  };
}

/**
 * This function will recursively delete all assembly_gap features on the sequence
 */
function deleteAssemblyGapFeature(seq) {
  const feature = Object.values(seq.features).find(
    feat => feat.type === "assembly_gap"
  );
  if (feature) {
    seq = deleteSequenceDataAtRange(seq, {
      start: feature.start,
      end: feature.end
    });
    return deleteAssemblyGapFeature(seq);
  } else {
    return seq;
  }
}

export const stripAssemblyGaps = async sequences => {
  // check for features of type assembly_gap
  // if there are features show confirmation to see if they want to strip
  // those features. (deleteSequenceDataAtRange) need to update all other features and parts
  // need to make sure the sequence still exists.
  // by default not in browser do not strip.
  if (isBrowser) {
    // maintain sequenceOrder
    const sequenceIndicesWithAssemblyGapFeatures = [];
    sequences.forEach((s, i) => {
      const hasAssemblyGap =
        s.sequenceFeatures &&
        s.sequenceFeatures.some(f => f.type === "assembly_gap");
      if (hasAssemblyGap) {
        sequenceIndicesWithAssemblyGapFeatures.push(i);
      }
    });

    if (sequenceIndicesWithAssemblyGapFeatures.length) {
      const stripFeatures = await window.showConfirmationDialog({
        text: "One or more assembly gap features were detected in sequence files, would you like to strip them from the sequences?",
        confirmButtonText: "Yes",
        cancelButtonText: "No"
      });
      if (stripFeatures) {
        sequenceIndicesWithAssemblyGapFeatures.forEach(index => {
          const sequence = sequences[index];
          const veStyleSequence = sequenceToVeInput(sequence);
          const cleanedSequence = deleteAssemblyGapFeature(veStyleSequence);
          cleanedSequence.sequenceTypeCode = sequence.sequenceTypeCode;
          // THIS NEEDS TO BE MUTATIVE. we are using this sequence in a map in parseSequenceFiles
          // which must be updated with this new hash and features
          assign(sequence, sequenceJSONtoGraphQLInput(cleanedSequence));
        });
      }
    }
  }
};

export const sequenceFileTypeToExt = (sequenceFileType, isProtein = false) => {
  return sequenceFileType === "genbank"
    ? isProtein
      ? "gp"
      : "gb"
    : sequenceFileType;
};

export const getSequenceInFileType = (sequence, type, options = {}) => {
  const fn =
    type === "genbank"
      ? getSequenceGenbank
      : type === "fasta"
        ? getSequenceFasta
        : type === "json"
          ? options.keepJsonObject
            ? sequenceToVeInput
            : getSequenceJsonString
          : null;
  if (fn) return fn(sequence, options);
};

export const getSequenceGenbank = (sequence, options) => {
  return jsonToGenbank(sequenceToVeInput(sequence, options));
};

export const getSequenceFasta = (sequence, options) => {
  return jsonToFasta(sequenceToVeInput(sequence, options));
};

export const getSequenceJsonString = (sequence, options) => {
  return jsonToJsonString(sequenceToVeInput(sequence, options));
};

export const getMaxSeqLen = () => {
  const maxLen =
    (typeof window !== "undefined" && window.Cypress?.__maxBpImportLength) ||
    getEnvIso("maxBpImportLength") ||
    100000;
  return maxLen;
};
