using System;
using System.Collections.Generic;
using System.Linq;
using System.Text.RegularExpressions;
using System.Threading;
using LuhnNet;
using Newtonsoft.Json; //Do not remove - Required for JObject
using Newtonsoft.Json.Linq;
using Shinydocs.CognitiveToolkit.Scripting;
using Shinydocs.CognitiveToolkit.Core.Tools.ScrollTools.RunScript;
using Shinydocs.CognitiveToolkit.UI.CommandLine;
using Shinydocs.CognitiveToolkit.UI.CommandLine.Progress;
using Shinydocs.CognitiveToolkit.Utilities; //Do not remove - Required for JObject
#pragma warning disable CS8603
#pragma warning disable CS8619
#pragma warning disable CS8604
#pragma warning disable CS8600
#pragma warning disable CS8601
#pragma warning disable CS8618
public class FlagFieldBasedOnRegexScript : IScript
{
// ScriptLogger allows you to log to the same index as the rest of the cognitive toolkit.
// See https://github.com/serilog/serilog/wiki/Writing-Log-Events for message formatting information
private readonly ScriptLogger _log = new ScriptLogger();
private static string ScriptName = "FlagFieldBasedOnRegexScript";
private static string Version = "2.6.0.1";
private string[] _fieldsToReturn = { "id", "path", "length" };
private string _indexName;
private string _query;
private string _serverUrl;
public string _regexPattern;
public string _tagValue;
public Regex _regex;
public string _searchField;
public string _fieldName;
public bool _validateWithLuhn;
private RunScriptDocumentUpdater _documentUpdater;
private int _threads;
private int _nodesPerRequest;
///
/// SetUp Called before executing run
///
/// Array of command line arguments passed in, excluding the Script File Path ("-p") and Script Class Name ("-c") parameters used by the RunScript tool
public void SetUp(string[] arguments){
_log.Information(string.Format("{0} version {1}", ScriptName, Version));
Console.WriteLine(string.Format("{0} version {1}", ScriptName, Version));
try
{
Dictionary options;
// OptionsParser will parse an array of arguments into a dictionary of flags and values
if (OptionsParser.TryParse(arguments, out options))
{
OptionsParser.ParseStandardOptions(options, out _serverUrl, out _indexName, out _threads, out _nodesPerRequest);
OptionsParser.ParseQueryOption(options, out _query);
if (!options.TryGetValue("--regex-pattern", out _regexPattern)) OptionsParser.InputError("The regex (--regex-pattern) is a required parameter.");
if (!options.TryGetValue("--value", out _tagValue)) OptionsParser.InputError("The value (--value) is a required parameter.");
if (!options.TryGetValue("--field-name", out _fieldName)) OptionsParser.InputError("The field name (--field-name) is a required parameter.");
if (!options.TryGetValue("--search-field", out _searchField))
{
_searchField = "fullText";
}
string validateLuhnString;
if (options.TryGetValue("--valid-luhn", out validateLuhnString))
{
bool.TryParse(validateLuhnString, out _validateWithLuhn);
}
_fieldsToReturn = _fieldsToReturn.Append(_searchField).Append(_fieldName).ToArray();
_regex = new Regex(_regexPattern);
_documentUpdater = new RunScriptDocumentUpdater(_serverUrl, _indexName, _fieldsToReturn, _nodesPerRequest, _threads);
}
else
{
throw new ArgumentException("Failed to parse arguments. Tool requires exactly the following parameters -q (Query or Path to QueryFile) -u (Index Server Url) -i (IndexName).");
}
}
catch (Exception ex)
{
var message = ex.Message + "\n\nPlease review the following parameters:"
+ "\n" + "-q \tquery or path to queryfile"
+ "\n" + "-u \tindex server URL"
+ "\n" + "-i \tindex name"
+ "\n" + "--regex-pattern \tRegex pattern"
+ "\n" + "--value \tfield value"
+ "\n" + "--field-name \tfield name"
+ "\n" + "--search-field \tField to search (Default: fullText)"
+ "\n" + "--valid-luhn \tValidate using Luhn algorithm (Optional)"
+ "\n" + "--threads (Default: 1)"
+ "\n" + "--nodes-per-request (Default: 100)";
_log.Error(ex, message);
Console.WriteLine(message);
throw;
}
}
public void Run()
{
var processedTotal = 0;
var updatedTotal = 0;
using (var progress = new CommandLineProgress())
{
_documentUpdater.Progress = progress;
_documentUpdater.Update(_query,document =>
{
var id = document["id"];
try
{
Interlocked.Increment(ref processedTotal);
if (PassesChecks(document))
{
Interlocked.Increment(ref updatedTotal);
return document.ToObject>();
}
}
catch (Exception ex)
{
_log.Error(string.Format("Error processing {0}.{1}", id, ex.Message), ex);
}
return new Dictionary();
});
}
}
public bool PassesChecks(JObject document)
{
var searchField = document[_searchField];
if (searchField == null) return false;
var text = searchField.ToString();
if (MatchRegex(text))
{
if (_validateWithLuhn)
{
if (!AllMatchesValid(text)) return false;
}
UpdateDocument(document);
return true;
}
return false;
}
public void TearDown()
{
_log.Information(string.Format("Complete. Total items processed : {0}", RunScriptDocumentUpdater.TotalProcessed));
}
public JObject UpdateDocument(JObject document)
{
var piiField = document[_fieldName] as JArray ?? new JArray(_tagValue);
document.Remove(_searchField);
if (!piiField.Any(d => string.Equals(d.ToString(), _tagValue, StringComparison.CurrentCultureIgnoreCase)))
{
piiField.Add(new JValue(_tagValue));
}
document[_fieldName] = piiField;
return document;
}
public bool MatchRegex(string text)
{
return !string.IsNullOrWhiteSpace(text) && _regex.IsMatch(text);
}
public bool AllMatchesValid(string text)
{
var matches = _regex.Matches(text).Cast().ToList();
return matches.Any() && matches.Any(m => Luhn.IsValid(m.Value));
}
}