using System;
using System.Collections.Generic;
using System.IO;
using System.Linq;
using excel = Microsoft.Office.Interop.Excel;
using System.Data;
using System.IO.Compression;
namespace PV.Custom.Utilities.Extraction
{
public class CaseExtraction
{
private bool getDuplicate(string original, string terms, string delimiters)
{
var scount = System.Text.RegularExpressions.Regex.Split(terms, delimiters).ToList().Where(p => original.ToLower().Contains(p.ToLower().ToString())).ToList();
if (scount.Count() > 0)
return true;
else
return false;
}
private DataRow getReplaceTerms(DataRow original, string terms, string delimiters,string liketerm)
{
System.Text.RegularExpressions.Regex.Split(terms, ",").ToList().Select(p =>
p).ToList().ForEach(p => {
var sname = System.Text.RegularExpressions.Regex.Split(p, "_").FirstOrDefault();
var lname = System.Text.RegularExpressions.Regex.Split(p, "_").LastOrDefault();
int h = 0;
original.ItemArray.ToList().ForEach(pop => {
if(getDuplicate(pop.ToString(),sname,"_"))
{
original[h] = lname;
}
h++;
});
});
//ToList().ForEach(p => {
// });
//p.Where(pp=>pp.ToString().ToLower().Contains(System.Text.RegularExpressions.Regex.Split(p,delimiters).FirstOrDefault())).ToString().Replace("",p));
// original.ToList().ForEach(p=>)
return original;
}
//dname, "1,2,6,10,12,13,14,15,16", "$", "*", "Spontaneous Report|Literature Case", "1!F/U_Followup,Initial_Initial", "5,6!H_Hospilation,LT_Life Threating,No_Non-Serious,F_Factal");
public string ReadCsvFile(string filepath,string sCols, string coldelimiter, string rowdelimiter, string AcceptedType,string likeTerms,string delimiters,string totalcols)
{
DataTable dtCsv = new DataTable();
string Fulltext;
if (File.Exists(filepath))
{
string FileSaveWithPath = Path.GetTempPath() + "Import"+ System.DateTime.Now.ToString("ddMMyyyy_hhmmss") + ".csv";//Server.MapPath("\\Files\\Import" + System.DateTime.Now.ToString("ddMMyyyy_hhmmss") + ".csv");
//FileUpload1.SaveAs(FileSaveWithPath);
try
{
StreamReader sr = new StreamReader(filepath);
FileSaveWithPath = filepath;
}
catch
{
File.Copy(filepath, FileSaveWithPath);
}
using (StreamReader sr = new StreamReader(FileSaveWithPath))
{
while (!sr.EndOfStream)
{
Fulltext = sr.ReadToEnd().ToString(); //read full file text
string[] rowws = System.Text.RegularExpressions.Regex.Split(Fulltext, "\"\r\n\"");
List<int> numbers = Enumerable.Range(1, 25).ToList();// (System.Text.RegularExpressions.Regex.Split(rowws[0], ",").Count()+ System.Text.RegularExpressions.Regex.Split(rowws[0], "\r\n").Count())).ToList();
numbers.ForEach(e => dtCsv.Columns.Add("Columns" + e));
for (int i = 0; i < rowws.Count() - 1; i++)
{
if (getDuplicate(rowws, AcceptedType, delimiters))
{
DataRow dr = dtCsv.NewRow();
DataRow dr1 = dtCsv.NewRow();
int j = 0;
System.Text.RegularExpressions.Regex.Split(rowws, "\",\"").ToList().ForEach(op =>
{
if (!op.Contains("\r\n"))
{
dr[j] = op.Replace("\"", "");
dr1[j] = op.Replace("\"", "");
j++;
}
else
{
System.Text.RegularExpressions.Regex.Split(op, "\r\n").ToList().ForEach(ll =>
{
dr[j] = ll.Replace("\"", "");
dr1[j] = ll.Replace("\"", "");
j++;
});
}
});
if (dr[11].ToString().Contains("/"))
{
dr = getReplaceTerms(dr, likeTerms, delimiters, "Like");
dtCsv.Rows.Add(dr);
}
else
{
List<int> numbecounts = Enumerable.Range(Convert.ToInt32(11) - 2, 25).ToList();
bool bb = true;
numbecounts.ForEach(e =>
{
if (bb)
{
if (dr[e].ToString().Contains("/"))
{
// DataRow dr1 = dr;
int tt = e;
List<int> numbecounts1 = Enumerable.Range(Convert.ToInt32(11), (24- Convert.ToInt32(11))).ToList();
numbecounts1.ForEach(k =>
{
dr[k] = dr1[tt]==null ? "": dr1[tt];
tt++;
});
bb = false;
}
}
});
}
}
}
}
}
}
string s = "";
dtCsv.AsEnumerable().Where(pl => pl.Field<string>(Convert.ToInt32(totalcols)) == null).ToList().ForEach(pp => {
List<int> numbers = numbers = Enumerable.Range(1, Convert.ToInt32(totalcols)).ToList();
numbers.ForEach(e =>
{
if(sCols.Contains("'"+e.ToString()+"'"))
{
s += pp.Field<string>(e - 1).ToString() + coldelimiter;
}
});
s += rowdelimiter;
});
s = s.Replace(coldelimiter + rowdelimiter, rowdelimiter);
//dtCsv.AsEnumerable().Where(pl => pl.Field<string>(Convert.ToInt32(totalcols)) != null).ToList().ForEach(pp =>
//{
// List<int> numbers = numbers = Enumerable.Range(1, Convert.ToInt32(totalcols)).ToList();
// numbers.ForEach(e =>
// {
// if (sCols.Contains("'" + e.ToString() + "'"))
// {
// s += pp.Field<string>(e - 1).ToString() + coldelimiter;
// }
// });
// s += rowdelimiter;
//});
return s;
}
public string GetExtractionString(string filepath,string scols,string coldelimiter,string rowdelimiter,string position,string replacewords,string Delimitercontent,string delimiterreplace, string acceptedeportType,string SeriousnessTypes)
{
if (!System.IO.File.Exists(filepath))
return "Exception - File does not exist";
//var sfiledata = System.IO.File.ReadAllText(filepath);
//string[] strlist = new string[] { "F/U", "Initial" };
//System.Text.RegularExpressions.Regex.Split(sfiledata, strlist).ToList().ForEach(pp =>
//{
// var pop = pp.ToString();
//});
//ReadFromFile(filepath);
//string scols = "'1,'2,'6,'10,'14,'15,'16";
// var tt = ReadCsvFile(filepath,scols);
var tt = ReadExcel(filepath);
DataTable dtTab = new DataTable();
DataTable dttabb = new DataTable();
List<int> numbers = Enumerable.Range(1, System.Text.RegularExpressions.Regex.Split(scols,",").Count()).ToList();
numbers.ForEach(e => dtTab.Columns.Add("Columns"+e));
numbers.ForEach(e => dttabb.Columns.Add("Columns" + e));
int i, j,z;
i = 1;
tt.Tables[0].AsEnumerable().ToList().ForEach(pp => {
dtTab.Rows.Add();
numbers = Enumerable.Range(1, tt.Tables[0].Columns.Count-1).ToList();
i = 1;
j = z = 1;
numbers.ForEach(e => {
var hh = System.Text.RegularExpressions.Regex.Split(pp.Field<string>(z - 1).ToString(),"###");
hh.ToList().ForEach(pop => {
if (scols.Contains("'" + i))
{
if(Convert.ToInt32(position)-1==j-1)
{
var gg = System.Text.RegularExpressions.Regex.Split(replacewords, Delimitercontent);
gg.ToList().ForEach(ppp => {
if(pop.ToString().Contains(System.Text.RegularExpressions.Regex.Split(ppp, delimiterreplace).FirstOrDefault()))
{
dtTab.Rows[dtTab.Rows.Count - 1][j - 1] = System.Text.RegularExpressions.Regex.Split(ppp,delimiterreplace).LastOrDefault();
}
});
}else
dtTab.Rows[dtTab.Rows.Count - 1][j - 1] = pop.ToString();
j++;
}
i++;
})
;
z++;
}
);
});
if (SeriousnessTypes != "")
{
var gg = System.Text.RegularExpressions.Regex.Split(SeriousnessTypes, "!");
var firstcols = System.Text.RegularExpressions.Regex.Split(gg.FirstOrDefault(),Delimitercontent);
firstcols.ToList().ForEach(k => {
var secondtime=gg.LastOrDefault();
var sdt= System.Text.RegularExpressions.Regex.Split(secondtime, Delimitercontent);
sdt.ToList().ForEach(jok => {
var fgd=System.Text.RegularExpressions.Regex.Split(jok, delimiterreplace);
dtTab.AsEnumerable().Where(plop => plop.Field<string>(Convert.ToInt32(k)).ToString() == fgd.FirstOrDefault()).ToList().ForEach(oo => { oo.SetField<string>(Convert.ToInt32(k),fgd.LastOrDefault()); });
});
});
//gg.ToList().ForEach(ppp =>
//{
// var ggg= System.Text.RegularExpressions.Regex.Split(ppp, "|");
// ggg.ToList().ForEach(pppp =>
// {
// //dtTab.AsEnumerable().Where(plop=>plop.Field<string>(pppp).ToString()==)
// });
// });
}
dtTab.AsEnumerable().ToList().ForEach(plo => {
numbers = Enumerable.Range(1, dtTab.Columns.Count).ToList();
bool b = true;
numbers.ForEach(e => {
if (acceptedeportType.Contains(plo.Field<string>(e-1)))
b = false;
});
if(!b)
{
dttabb.Rows.Add();
numbers.ForEach(e => {
dttabb.Rows[dttabb.Rows.Count - 1][e-1] = plo[e-1];
});
dttabb.AcceptChanges();
}
});
dtTab = dttabb;
string s = "";
dtTab.AsEnumerable().ToList().ForEach(pp => {
numbers = Enumerable.Range(1, dtTab.Columns.Count).ToList();
numbers.ForEach(e =>
{
s += pp.Field<string>(e-1).ToString()+coldelimiter;
});
s += rowdelimiter;
});
//var dd=CsvParser(System.IO.File.ReadAllText(filepath));
//var ddf = dd(filepath);
return s;
}
public DataSet ReadExcel(string excelfilename)
{
excel.Application application = new excel.Application();
excel.Workbook workbook = application.Workbooks.Open(excelfilename);
try
{
DataTable excelData = new DataTable();
DataSet ds = new DataSet();
string ExcelWorkbookname = workbook.Name;
int worksheetcount = workbook.Worksheets.Count;
for (int ij = 1; ij <= worksheetcount; ij++)
{
excelData = new DataTable();
List<int> numbers = Enumerable.Range(1, 10).ToList();
numbers.ForEach(e => excelData.Columns.Add("Columns"+e));
excelData.Columns.Add();
excel.Worksheet worksheet = (excel.Worksheet)workbook.Worksheets[ij];
excel.Range xlRange = worksheet.UsedRange;
int rowCount = xlRange.Rows.Count;
int colCount = xlRange.Columns.Count;
//for (int i = 1; i <= colCount; i++)
//{
// excelData.Columns.Add((string)(xlRange.Cells[1, i] as excel.Range).Value2);
//}
List<object> rowValues;
for (int i = 1; i <= rowCount; i++)
{
rowValues = new List<object>();
var rowValues1 = new List<object>();
for (int j = 1; j <= colCount; j++)
{
var tt = Convert.ToString((xlRange.Cells[i, j] as excel.Range).Value2);
rowValues.Add(tt);
}
rowValues.ToList().ForEach(pp => {
rowValues1.Add(pp.ToString().Replace("\r\n", "###"));
});
excelData.Rows.Add(rowValues1.ToArray());
}
excelData.TableName = worksheet.Name;
ds.Tables.Add(excelData.Copy());
}
return ds;
}
catch
{
return null;
}
finally
{
workbook.Close();
application.Quit();
}
}
//private void getconnect(string filename)
//{
// //Get Excel File
// excel.Workbook workbook = new excel.Workbook();
// workbook.LoadFromFile(filename);
// //Initailize worksheet
// excel.Worksheet sheet = workbook.Worksheets[0];
// //Protect cells
// sheet.Protect("LockCells");
// //Set Unlocked Cell
// sheet.Range["A1"].Style.Locked = false;
// //Protect Workbook
// workbook.Protect("123password");
// //Save and Launch
// workbook.SaveToFile("Protected.xlsx", ExcelVersion.Version2010);
// System.Diagnostics.Process.Start(workbook.FileName);
//}
public void getload()
{
//System.IO.Compression. xp = new System.IO.Compression.GZipStream(Stream;
//using (ZipFile archive = new ZipFile(@"c:\path\to\your\password\protected\archive.zip",))
//{
// //archive.Password = "your-pass-word-here";
// //archive.Encryption = EncryptionAlgorithm.PkzipWeak; // the default: you might need to select the proper value here
// //archive.StatusMessageTextWriter = Console.Out;
// //archive.ExtractAll(@"c:\path\to\unzip\directory\", ExtractExistingFileAction.Throw);
//}
//ICSharpCode.SharpZipLib.Zip.ZipFile
}
}
}