using System; using System.Collections.Generic; using System.Globalization; using System.IO; using System.Linq; using System.Security.Cryptography; using System.Text; using Newtonsoft.Json; //Do not remove - Required for JObject using Shinydocs.CognitiveToolkit.Scripting; using CsvHelper; using Shinydocs.CognitiveToolkit.Core.Models.Index; using Shinydocs.CognitiveToolkit.Core.Tools.ScrollTools.RunScript; public class ImportCsv : IScript { private static string Version = "2.6.0"; private readonly ScriptLogger _log = new ScriptLogger(); private string _indexName; private string _serverUrl; private string _fieldName; private string _fieldValue; private string[] _idFields; // comma separated list of fields to create the id from private int _threads; private int _nodesPerRequest; private RunScriptDocumentUpdater _documentUpdater; private string _csvFile; public void SetUp(string[] arguments) { _log.Information(string.Format("Using version {0}", Version)); Console.WriteLine(string.Format("Using version {0}", Version)); try { Dictionary options; // OptionsParser will parse an array of arguments into a dictionary of flags and values if (OptionsParser.TryParse(arguments, out options)) { OptionsParser.ParseStandardOptions(options, out _serverUrl, out _indexName, out _threads, out _nodesPerRequest); if (!options.TryGetValue("--csv", out _csvFile)) OptionsParser.InputError("The path to csv (--csv) is a required parameter"); _fieldName = options["--fieldName"]; _fieldValue = options["--fieldValue"]; _idFields = options["--idFields"].Split(',').Select(s => s.Trim()).ToArray(); foreach (var id in _idFields) { if (string.IsNullOrEmpty(id)) { throw new Exception("Field should not be empty"); } } _documentUpdater = new RunScriptDocumentUpdater(_serverUrl, _indexName, null, _nodesPerRequest, _threads); _log.Information((string.Format("ImportCsv tool setup method -u {0}, -i {1}, --filePath{2}, --fieldName{3}, --fieldValue{4}, --idField{5}", _serverUrl, _indexName, _csvFile, _fieldName, _fieldValue, _idFields))); } else { throw new ArgumentException("Failed to parse arguments"); } } catch (Exception) { Console.WriteLine("ImportCsv tool requires the following parameters -u for Index server Url, -i for IndexName, --fieldName for the field name," + " --fieldValue for the field value, --idField for unique identifier and --filePath for the CSV file path "); throw; } } public void Run() { Console.WriteLine("ImportCsv: Running ..."); UpdateCsv(_csvFile); } private static string CamelCase(string text, IDictionary replacements) { foreach (var replacement in replacements) //will replace the key with appropriate value provided in the function { text = text.Replace(replacement.Key, replacement.Value); } var first = text.Split(' ').First().ToLowerInvariant(); var textInfo = new CultureInfo("en-US", false).TextInfo; var titleCase = textInfo.ToTitleCase(text.Trim()); var noSpaces = titleCase.Replace(" ", string.Empty); if (noSpaces.Length == 1) return text; var camelCase = first + noSpaces.Substring(first.Length); return camelCase; } public void UpdateCsv(string CsvFilePath) //will update the document in index { var replacements = new Dictionary() { {"#", "Number"} }; using (var reader = new StreamReader(CsvFilePath)) { using (var csv = new CsvReader(reader, false)) { csv.Configuration.PrepareHeaderForMatch = (string header, int index) => CamelCase(header, replacements); var records = csv.GetRecords(); if (records == null) { return; } var bulkUpdateDocuments = new List(); HashAlgorithm hash = SHA256.Create(); foreach (var record in records) { var doc = record as IDictionary; if (doc == null) { continue; } var idFieldValues = doc .Where(pair => _idFields.Contains(pair.Key)) .OrderBy(pair => pair.Key) .Select(v => v.Value.ToString()) .ToList(); if (idFieldValues.Count() != _idFields.Length) { throw new Exception("Fields don't exist"); } var joinValues = string.Join("", idFieldValues); var byteArray = hash.ComputeHash(Encoding.UTF8.GetBytes(joinValues)); var builder = new StringBuilder(); for (int i = 0; i < byteArray.Length; i++) { builder.Append(byteArray[i].ToString("x2")); } var documentId = builder.ToString(); var bulkUpdateDocument = new BulkUpdateDocument(documentId, _indexName) { Document = doc }; bulkUpdateDocument.Document.Add(_fieldName, _fieldValue); bulkUpdateDocument.Document.Add("cogToolKitImportDate", DateTime.Now); bulkUpdateDocuments.Add(bulkUpdateDocument); } _documentUpdater.Insert(bulkUpdateDocuments); } } } public void TearDown() { } }