MVC 5 Epplus Excel Found Unreadable Content - excel

I am trying to write some information to an excel file. I can write data in it with using EPPlus. And download the excel file. But when I run and download this file and open, I see this error.
"Excel found unreadable content in filename.xls. Do you want to recover the contents of this workbook? If you trust the source of this workbook, click Yes."
Even if I did not write anything to file, I got this error.
My code in Excel class is:
using MaasRaporlari.Models;
using OfficeOpenXml;
using System;
using System.Collections.Generic;
using System.IO;
using System.Linq;
using System.Text;
using System.Web;
namespace MaasRaporlari.Controllers
{
class AyrintiliHarcamaRapor
{
public List<AyrintiliHarcamaProgramiVM2> Ayrintili;
private static int RaporaEklenebilecekIcerikSayısı = 33;
public string ExcelTemplatePath { get; set; } // = HostingEnvironment.MapPath(Url.Content("~/Content/Xsl/")); //#"C:\test\OdemeEmriBelgesi.xlsx";
public string ExcelResultPath { get; set; } // = #"C:\test\OdemeEmriSonuc.xlsx";
public FileStream Save()
{
try
{
ExcelTemplatePath = HttpContext.Current.Server.MapPath("~/Images/AyrintiliHarcamaProgrami.xlsx");
ExcelResultPath = HttpContext.Current.Server.MapPath("~/Images/Harcama.xls");
var File = new FileInfo(ExcelTemplatePath);
using (ExcelPackage package = new ExcelPackage(File))
{
package.Load(new FileStream(ExcelTemplatePath, FileMode.Open));
int sheetCount = (int)Math.Ceiling((double)Ayrintili.Count / RaporaEklenebilecekIcerikSayısı);
ExcelWorksheet workSheet = package.Workbook.Worksheets["Sheet1"];
List<ExcelWorksheet> workSheets = new List<ExcelWorksheet>();
workSheets.Add(workSheet);
package.Stream.Position = 0;
if (sheetCount >= 2)
{
for (int i = 0; i < sheetCount - 1; i++)
{
ExcelWorksheet tempSheet = package.Workbook.Worksheets.Add(string.Format("Sheet{0}", i + 2), workSheet);
workSheets.Add(tempSheet);
}
}
int icerikSayac = 0;
foreach (ExcelWorksheet workSheetItem in workSheets)
{
package.Stream.Position = 0;
decimal MiktarToplam1 = 0;
decimal MiktarToplam2 = 0;
decimal MiktarToplam3 = 0;
decimal MiktarToplam4 = 0;
decimal MiktarToplam5 = 0;
decimal OranToplam1 = 0;
decimal OranToplam2 = 0;
decimal OranToplam3 = 0;
decimal OranToplam4 = 0;
decimal OranToplam5 = 0;
MiktarToplam Toplam = new MiktarToplam();
int rowPointer = 10;
for (int i = 0; i < RaporaEklenebilecekIcerikSayısı; i++)
{
if (icerikSayac < Ayrintili.Count)
{
var item = Ayrintili[icerikSayac];
package.Stream.Position = 0;
MiktarToplam5 = item.Miktar1 + item.Miktar2 + item.Miktar3 + item.Miktar4;
OranToplam5 = item.Oran1 + item.Oran2 + item.Oran3 + item.Oran4;
MiktarToplam1 += item.Miktar1;
MiktarToplam2 += item.Miktar2;
MiktarToplam3 += item.Miktar3;
MiktarToplam4 += item.Miktar4;
OranToplam1 += item.Oran1;
OranToplam2 += item.Oran2;
OranToplam3 += item.Oran3;
OranToplam4 += item.Oran4;
workSheetItem.Cells["A" + rowPointer.ToString()].Value = item.One;
workSheetItem.Cells["B" + rowPointer.ToString()].Value = item.Two;
workSheetItem.Cells["C" + rowPointer.ToString()].Value = item.Aciklama;
workSheetItem.Cells["D" + rowPointer.ToString()].Value = item.Miktar1;
workSheetItem.Cells["F" + rowPointer.ToString()].Value = item.Miktar2;
workSheetItem.Cells["H" + rowPointer.ToString()].Value = item.Miktar3;
workSheetItem.Cells["J" + rowPointer.ToString()].Value = item.Miktar4;
workSheetItem.Cells["E" + rowPointer.ToString()].Value = item.Oran1;
workSheetItem.Cells["G" + rowPointer.ToString()].Value = item.Oran2;
workSheetItem.Cells["I" + rowPointer.ToString()].Value = item.Oran3;
workSheetItem.Cells["K" + rowPointer.ToString()].Value = item.Oran4;
workSheetItem.Cells["AC" + rowPointer.ToString()].Value = MiktarToplam5;
workSheetItem.Cells["AD" + rowPointer.ToString()].Value = OranToplam5;
rowPointer++;
icerikSayac++;
}
}
workSheetItem.Cells["D33"].Value = MiktarToplam1;
workSheetItem.Cells["E33"].Value = OranToplam1;
workSheetItem.Cells["F33"].Value = MiktarToplam2;
workSheetItem.Cells["G33"].Value = OranToplam2;
workSheetItem.Cells["H33"].Value = MiktarToplam3;
workSheetItem.Cells["I33"].Value = OranToplam3;
workSheetItem.Cells["J33"].Value = MiktarToplam4;
workSheetItem.Cells["K33"].Value = OranToplam4;
}
using (FileStream outStream = new FileStream(ExcelResultPath, FileMode.OpenOrCreate, FileAccess.ReadWrite))
{
package.Stream.Position = 0;
package.SaveAs(outStream);
outStream.Position = 0;
package.Stream.Dispose();
return (outStream);
}
}
}
catch (Exception)
{
throw;
}
}
public class MiktarToplam
{
public decimal Toplam { get; set; }
public MiktarToplam()
{
Toplam = 0;
}
}
}
}

Just a few quick questions:
Is it necessary to put the file in the ExcelPackage constructor and then load it(package.Load)?
Why do you set package.Stream.Postision = 0 on line 34 and again line 69?
I am doing this in my solution(wb is the workbook):
var ms = new MemoryStream();
wb.SaveAs(ms);
ms.Seek(0, SeekOrigin.Begin);
Have you tried saving to file instead (Just to make sure that everything is working).

Epplus is an old tool for Excel. If you look a little, you can see lots of better tools. And you should research carefully. Additionally, you should look to these tool's websites. When was the last commit or update?
NPOI and GemBox one of the best tools. Good luck.

Related

How to speed up Excel file upload EPPlus

I have an ASP.NET Core app, with a model, the aim is to allow user to upload an excel file and then save the file to the model/table. I have the below method
[HttpPost]
[ValidateAntiForgeryToken]
public async Task<IActionResult> Upload(IFormFile file)
{
string webRootPath = _hostEnvironment.WebRootPath;
var uploads = Path.Combine(webRootPath, "Upload");
var files = HttpContext.Request.Form.Files;
var extension = Path.GetExtension(files[0].FileName);
using (var filesStream = new FileStream(Path.Combine(uploads, file.FileName), FileMode.Create))
{
files[0].CopyTo(filesStream);
}
var list = new List<User>();
using (var stream = new MemoryStream())
{
await file.CopyToAsync(stream);
using (var package = new ExcelPackage(stream))
{
ExcelWorksheet worksheet = package.Workbook.Worksheets[0];
var rowcount = worksheet.Dimension.Rows;
for (int row = 2; row <= rowcount; row++)
{
list.Add(new User
{
Name = worksheet.Cells[row, 1]?.Value?.ToString().Trim(),
Address1 = worksheet.Cells[row, 2]?.Value?.ToString().Trim(),
PostCode = worksheet.Cells[row, 3]?.Value?.ToString().Trim(),
Mobile = worksheet.Cells[row, 4]?.Value?.ToString().Trim(),
});
}
}
}
foreach (var user in list)
{
_db.User.AddAsyncy(user);
}
_db.SaveChangesAsyncy();
return View();
}
This code works fine by processing an excel file uploaded by a user but the problem I'm having is that when the file is large say above 3 mb, it takes well over 8 minutes to upload.
Any idea how to speed this up please? Thanks.
There are two things you can do to increase speed.
1)Instead of reading excel file with ExcelWorksheet class go with a library called ExcelDataReader which can read around 600k records under a minute.
sample code
Model
class Person
{
public int id,
public string name
}
//and excel file has both columns in model ,the we can read with below code
using ExcelDataReader;
System.Text.Encoding.RegisterProvider(System.Text.CodePagesEncodingProvider.Instance);
var fileName = "./Person.xlsx";
var timer = new Stopwatch();
timer.Start();
int counter=0;
List<Person> persons = new List<Person>();
using (var stream = System.IO.File.Open(fileName, FileMode.Open, FileAccess.Read))
{
using (var reader = ExcelReaderFactory.CreateReader(stream))
{
while (reader.Read()) //Each row of the file
{
var person = new Person
{
id = reader.GetValue(0).ToString(),
name = reader.GetValue(1).ToString()
}
persons.Add(person)
counter++;
}
timer.Stop();
duration = timer.ElapsedMilliseconds / 1000;
//to check performace print duration and persons list
}
}
https://github.com/ExcelDataReader/ExcelDataReader
2)Once you read and store data in a list, you can store that data in DataTable class and insert into database using Oracle.ManagedDataAccess.Client Nuget package instead of EFcore. This method is fast. Please go through below link for doing this with Oracle database.
https://www.c-sharpcorner.com/article/two-ways-to-insert-bulk-data-into-oracle-database-using-c-sharp/
var db_timer = new Stopwatch();
db_timer.Start();
DataTable dt = new DataTable();
dt.Columns.Add("id");
dt.Columns.Add("name");
for (int i = 0; i < counter; i++)
{
DataRow dr = dt.NewRow();
dr["id"] = persons[i].id;
dr["name"] = persons[i].name;
dt.Rows.Add(dr);
}
using (var connection = new OracleConnection(oracleConString))
{
connection.Open();
int[] ids = new int[dt.Rows.Count];
string[] names = new string[dt.Rows.Count];
for (int j = 0; j < dt.Rows.Count; j++)
{
ids[j] = Convert.ToString(dt.Rows[j]["id"]);
names[j] = Convert.ToString(dt.Rows[j]["name"]);
}
OracleParameter id = new OracleParameter();
id.OracleDbType = OracleDbType.Int32;
id.Value = ids;
OracleParameter name = new OracleParameter();
name.OracleDbType = OracleDbType.Varchar2;
name.Value = names;
OracleCommand cmd = connection.CreateCommand();
cmd.CommandText = "INSERT INTO TEST(id,name) VALUES (:1,:2)";
cmd.ArrayBindCount = ids.Length;
cmd.Parameters.Add(id);
cmd.Parameters.Add(name);
cmd.ExecuteNonQuery();
}
just sample code you can user timer to check how much time it is taking to execute.

MVC Linq throws intermittent null reference exception during export to Excel

I'm using the same code in 6 Areas, each a separate database. Been working fine for a long time, now fails in 3 of the 6 with a null reference exception. Classes all the same, all database tables hold data. First method is the view which displays the data, second called by a link in the view to export the data to Excel. 3 cases display and export correctly. In 2 out of the other 3 cases the data is displayed correctly in the view, and the null exception is thrown at the point the data should be being retrieved in the export method. In the last case the null exception is thrown when trying to return the view. Simply can't understand how it works in some instances but not in others.
public ActionResult PVS(string studySite, string currentFilter, int? page)
{
ViewBag.SelectedID = studySite;
if (studySite != null)
page = 1;
else
studySite = currentFilter;
ViewBag.CurrentFilter = studySite;
IQueryable<PVS> schedule = db.PVS.OrderBy(v => v.SiteNumber).ThenBy(v => v.Participant);
if (studySite != null)
{
string selectedID = studySite;
images = schedule.Where(v => v.SiteNumber == selectedID);
}
int pageSize = 10;
int pageNumber = (page ?? 1);
return View(schedule.ToPagedList(pageNumber, pageSize));
}
public void ExportPVSToExcel()
{
var grid = new System.Web.UI.WebControls.GridView();
var pvs = db.PVS.OrderBy(p => p.SiteNumber).ThenBy(p => p.Participant).ToList();
grid.DataSource = pvs;
grid.DataBind();
// create an Excel worksheet for the data export
ExcelPackage excel = new ExcelPackage();
var workSheet = excel.Workbook.Worksheets.Add("PVS");
var totalCols = grid.Rows[0].Cells.Count;
var totalRows = grid.Rows.Count;
var headerRow = grid.HeaderRow;
for (var i = 1; i <= totalCols; i++)
{
workSheet.Cells[1, i].Value = headerRow.Cells[i - 1].Text;
}
for (var j = 1; j <= totalRows; j++)
{
for (var i = 1; i <= totalCols; i++)
{
var data = pvs.ElementAt(j - 1);
workSheet.Cells[j + 1, i].Value = data.GetType().GetProperty(headerRow.Cells[i - 1].Text).GetValue(data, null);
}
}
using (var memoryStream = new MemoryStream())
{
Response.ContentType = "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet";
Response.AddHeader("content-disposition", "attachment; filename=PVS.xlsx");
excel.SaveAs(memoryStream);
memoryStream.WriteTo(Response.OutputStream);
Response.Flush();
Response.End();
}
}

Generating a HTML table from an Excel file using EPPlus?

I would like to generate a HTML table from an excel file. The EPPlus package provides a .net API for manipulating excel file. I would be happy to know whether it is possible to generate a HTML table code from an Excel file using EPPlus? I couldn't find anything on the documentation, but intuition tells me that there should be a way to do it
Thank you!
If you are looking for something built into EPPlus, I havent seen anything that will export directly HTML.
Best thing would be to bring in the Excel file to EPPlus and extract the data to a collection or DataTable. That should be pretty straight forward.
From there, there is plenty of documentation on how to get that to an html table. Quick search turned up this as the first hit:
Datatable to html Table
I wrote some code, you can try this.
static void Main(string[] args)
{
ExcelPackage p = new ExcelPackage(new System.IO.FileInfo("X.XLSX"));
var sheet = p.Workbook.Worksheets["HMTD"];
var noOfCol = sheet.Dimension.End.Column;
var noOfRow = sheet.Dimension.End.Row;
StringBuilder s = new StringBuilder();
s.Append("<table>");
for (int i = 1; i < noOfRow; i++)
{
s.Append("<tr>");
for (int j = 1; j < noOfCol; j++)
{
int colspan = 1;
int rowspan = 1;
if (!sheet.Cells[i, j].Merge || (sheet.Cells[i, j].Merge && isFirstMergeRange(sheet, sheet.Cells[i, j].Address, ref colspan, ref rowspan)))
{
s.Append("<td rowspan='" + rowspan + "' colspan='" + colspan + "'>");
if(sheet.Cells[i,j] != null && sheet.Cells[i,j].Value != null)
s.Append(sheet.Cells[i,j].Value.ToString());
s.Append("</td>");
}
}
s.Append("</tr>");
}
s.Append("</table>");
System.IO.File.WriteAllText("duc.html",s.ToString());
Console.ReadKey();
}
private static bool isFirstMergeRange(ExcelWorksheet sheet, string address, ref int colspan, ref int rowspan)
{
colspan = 1;
rowspan = 1;
foreach (var item in sheet.MergedCells)
{
var s = item.Split(':');
if (s.Length > 0 && s[0].Equals(address)){
ExcelRange range = sheet.Cells[item];
colspan = range.End.Column - range.Start.Column;
rowspan = range.End.Row - range.Start.Row;
if(colspan == 0) colspan = 1;
if(rowspan == 0) rowspan = 1;
return true;
}
}
return false;
}
Based on the answer from Duc Tran Minh, it works fine for me after I modified the code like this:
static void Main(string[] args)
{
ExcelPackage p = new ExcelPackage(new System.IO.FileInfo("X.XLSX"));
var sheet = p.Workbook.Worksheets["HMTD"];
var noOfCol = sheet.Dimension.End.Column;
var noOfRow = sheet.Dimension.End.Row;
StringBuilder s = new StringBuilder();
s.Append("<table>");
for (int i = 1; i <= noOfRow; i++)
{
s.Append("<tr>");
for (int j = 1; j <= noOfCol; j++)
{
int colspan = 1;
int rowspan = 1;
if (!sheet.Cells[i, j].Merge || (sheet.Cells[i, j].Merge && isFirstMergeRange(sheet, sheet.Cells[i, j].Address, ref colspan, ref rowspan)))
{
s.Append("<td rowspan='" + rowspan + "' colspan='" + colspan + "'>");
if(sheet.Cells[i,j] != null && sheet.Cells[i,j].Value != null)
s.Append(sheet.Cells[i,j].Value.ToString());
s.Append("</td>");
}
}
s.Append("</tr>");
}
s.Append("</table>");
System.IO.File.WriteAllText("duc.html",s.ToString());
Console.ReadKey();
}
bool isFirstMergeRange(ExcelWorksheet sheet, string address, ref int colspan, ref int rowspan)
{
colspan = 1;
rowspan = 1;
foreach (var item in sheet.MergedCells)
{
var s = item.Split(':');
if (s.Length > 0 && s[0].Equals(address))
{
ExcelRange range = sheet.Cells[item];
colspan = range.End.Column - range.Start.Column + 1;
rowspan = range.End.Row - range.Start.Row + 1;
return true;
}
}
return false;
}

Convert a number in scientific notation to numeric format in Excel using a macro or VBA

MS Excel has eaten my head. It is randomly converting numbers into scientific notation format. This causes a problem when I load the file saved in tab delimited format into SQL Server. I know I can provide a format file and do lot of fancy stuff. But let's say I can't.
Is there a macro which loops over all the cells and if the number in a cell is in scientific notation format then it converts it to numeric format?
Say:
Input: spaces signify different cells.
1.00E13 egalitarian
Output after macro:
10000000000000 egalitarian
I am trying this in Excel 2007.
I wrote a simple C# program to resolve this issue. Hope it's of use.
Input:
Input directory where files reside (assuming files are in .txt format).
Output:
Output directory where converted files will be spit out.
Delimiter:
Column delimiter.
The code
using System;
using System.Text.RegularExpressions;
using System.IO;
using System.Text;
using System.Threading;
namespace ConvertToNumber
{
class Program
{
private static string ToLongString(double input)
{
string str = input.ToString().ToUpper();
// If string representation was collapsed from scientific notation, just return it:
if (!str.Contains("E"))
return str;
var positive = true;
if (input < 0)
{
positive = false;
}
string sep = Thread.CurrentThread.CurrentCulture.NumberFormat.NumberDecimalSeparator;
char decSeparator = sep.ToCharArray()[0];
string[] exponentParts = str.Split('E');
string[] decimalParts = exponentParts[0].Split(decSeparator);
// Fix missing decimal point:
if (decimalParts.Length == 1)
decimalParts = new string[] { exponentParts[0], "0" };
int exponentValue = int.Parse(exponentParts[1]);
string newNumber = decimalParts[0].Replace("-","").
Replace("+","") + decimalParts[1];
string result;
if (exponentValue > 0)
{
if(positive)
result =
newNumber +
GetZeros(exponentValue - decimalParts[1].Length);
else
result = "-"+
newNumber +
GetZeros(exponentValue - decimalParts[1].Length);
}
else // Negative exponent
{
if(positive)
result =
"0" +
decSeparator +
GetZeros(exponentValue + decimalParts[0].Replace("-", "").
Replace("+", "").Length) + newNumber;
else
result =
"-0" +
decSeparator +
GetZeros(exponentValue + decimalParts[0].Replace("-", "").
Replace("+", "").Length) + newNumber;
result = result.TrimEnd('0');
}
float temp = 0.00F;
if (float.TryParse(result, out temp))
{
return result;
}
throw new Exception();
}
private static string GetZeros(int zeroCount)
{
if (zeroCount < 0)
zeroCount = Math.Abs(zeroCount);
StringBuilder sb = new StringBuilder();
for (int i = 0; i < zeroCount; i++) sb.Append("0");
return sb.ToString();
}
static void Main(string[] args)
{
//Get Input Directory.
Console.WriteLine(#"Enter the Input Directory");
var readLine = Console.ReadLine();
if (readLine == null)
{
Console.WriteLine(#"Enter the input path properly.");
return;
}
var pathToInputDirectory = readLine.Trim();
//Get Output Directory.
Console.WriteLine(#"Enter the Output Directory");
readLine = Console.ReadLine();
if (readLine == null)
{
Console.WriteLine(#"Enter the output path properly.");
return;
}
var pathToOutputDirectory = readLine.Trim();
//Get Delimiter.
Console.WriteLine("Enter the delimiter;");
var columnDelimiter = (char) Console.Read();
//Loop over all files in the directory.
foreach (var inputFileName in Directory.GetFiles(pathToInputDirectory))
{
var outputFileWithouthNumbersInScientificNotation = string.Empty;
Console.WriteLine("Started operation on File : " + inputFileName);
if (File.Exists(inputFileName))
{
// Read the file
using (var file = new StreamReader(inputFileName))
{
string line;
while ((line = file.ReadLine()) != null)
{
String[] columns = line.Split(columnDelimiter);
var duplicateLine = string.Empty;
int lengthOfColumns = columns.Length;
int counter = 1;
foreach (var column in columns)
{
var columnDuplicate = column;
try
{
if (Regex.IsMatch(columnDuplicate.Trim(),
#"^[+-]?[0-9]+(\.[0-9]+)?[E]([+-]?[0-9]+)$",
RegexOptions.IgnoreCase))
{
Console.WriteLine("Regular expression matched for this :" + column);
columnDuplicate = ToLongString(Double.Parse
(column,
System.Globalization.NumberStyles.Float));
Console.WriteLine("Converted this no in scientific notation " +
"" + column + " to this number " +
columnDuplicate);
}
}
catch (Exception)
{
}
duplicateLine = duplicateLine + columnDuplicate;
if (counter != lengthOfColumns)
{
duplicateLine = duplicateLine + columnDelimiter.ToString();
}
counter++;
}
duplicateLine = duplicateLine + Environment.NewLine;
outputFileWithouthNumbersInScientificNotation = outputFileWithouthNumbersInScientificNotation + duplicateLine;
}
file.Close();
}
var outputFilePathWithoutNumbersInScientificNotation
= Path.Combine(pathToOutputDirectory, Path.GetFileName(inputFileName));
//Create the directory if it does not exist.
if (!Directory.Exists(pathToOutputDirectory))
Directory.CreateDirectory(pathToOutputDirectory);
using (var outputFile =
new StreamWriter(outputFilePathWithoutNumbersInScientificNotation))
{
outputFile.Write(outputFileWithouthNumbersInScientificNotation);
outputFile.Close();
}
Console.WriteLine("The transformed file is here :" +
outputFilePathWithoutNumbersInScientificNotation);
}
}
}
}
}
This works fairly well in case of huge files which we are unable to open in MS Excel.
Thanks Peter.
I updated your original work to:
1) take in an input file or path
2) only write out a processing statement after every 1000 lines read
3) write the transformed lines to the output file as they are processed so a potentially large string is not kept hanging around
4) added a readkey at the end so that console does not exit automatically while debuggging
using System;
using System.Text.RegularExpressions;
using System.IO;
using System.Text;
using System.Threading;
namespace ConvertScientificToLong
{
class Program
{
private static string ToLongString(double input)
{
string str = input.ToString().ToUpper();
// If string representation was collapsed from scientific notation, just return it:
if (!str.Contains("E"))
return str;
var positive = true;
if (input < 0)
{
positive = false;
}
string sep = Thread.CurrentThread.CurrentCulture.NumberFormat.NumberDecimalSeparator;
char decSeparator = sep.ToCharArray()[0];
string[] exponentParts = str.Split('E');
string[] decimalParts = exponentParts[0].Split(decSeparator);
// Fix missing decimal point:
if (decimalParts.Length == 1)
decimalParts = new string[] { exponentParts[0], "0" };
int exponentValue = int.Parse(exponentParts[1]);
string newNumber = decimalParts[0].Replace("-", "").
Replace("+", "") + decimalParts[1];
string result;
if (exponentValue > 0)
{
if (positive)
result =
newNumber +
GetZeros(exponentValue - decimalParts[1].Length);
else
result = "-" +
newNumber +
GetZeros(exponentValue - decimalParts[1].Length);
}
else // Negative exponent
{
if (positive)
result =
"0" +
decSeparator +
GetZeros(exponentValue + decimalParts[0].Replace("-", "").
Replace("+", "").Length) + newNumber;
else
result =
"-0" +
decSeparator +
GetZeros(exponentValue + decimalParts[0].Replace("-", "").
Replace("+", "").Length) + newNumber;
result = result.TrimEnd('0');
}
float temp = 0.00F;
if (float.TryParse(result, out temp))
{
return result;
}
throw new Exception();
}
private static string GetZeros(int zeroCount)
{
if (zeroCount < 0)
zeroCount = Math.Abs(zeroCount);
StringBuilder sb = new StringBuilder();
for (int i = 0; i < zeroCount; i++) sb.Append("0");
return sb.ToString();
}
static void Main(string[] args)
{
//Get Input Directory.
Console.WriteLine(#"Enter the Input Directory or File Path");
var readLine = Console.ReadLine();
if (readLine == null)
{
Console.WriteLine(#"Enter the input path properly.");
return;
}
var pathToInputDirectory = readLine.Trim();
//Get Output Directory.
Console.WriteLine(#"Enter the Output Directory");
readLine = Console.ReadLine();
if (readLine == null)
{
Console.WriteLine(#"Enter the output path properly.");
return;
}
var pathToOutputDirectory = readLine.Trim();
//Get Delimiter.
Console.WriteLine("Enter the delimiter;");
var columnDelimiter = (char)Console.Read();
string[] inputFiles = null;
if (File.Exists(pathToInputDirectory))
{
inputFiles = new String[]{pathToInputDirectory};
}
else
{
inputFiles = Directory.GetFiles(pathToInputDirectory);
}
//Loop over all files in the directory.
foreach (var inputFileName in inputFiles)
{
var outputFileWithouthNumbersInScientificNotation = string.Empty;
Console.WriteLine("Started operation on File : " + inputFileName);
if (File.Exists(inputFileName))
{
string outputFilePathWithoutNumbersInScientificNotation
= Path.Combine(pathToOutputDirectory, Path.GetFileName(inputFileName));
//Create the directory if it does not exist.
if (!Directory.Exists(pathToOutputDirectory))
Directory.CreateDirectory(pathToOutputDirectory);
using (var outputFile =
new StreamWriter(outputFilePathWithoutNumbersInScientificNotation))
{
// Read the file
using (StreamReader file = new StreamReader(inputFileName))
{
string line;
int lineCount = 0;
while ((line = file.ReadLine()) != null)
{
String[] columns = line.Split(columnDelimiter);
var duplicateLine = string.Empty;
int lengthOfColumns = columns.Length;
int counter = 1;
foreach (var column in columns)
{
var columnDuplicate = column;
try
{
if (Regex.IsMatch(columnDuplicate.Trim(),
#"^[+-]?[0-9]+(\.[0-9]+)?[E]([+-]?[0-9]+)$",
RegexOptions.IgnoreCase))
{
//Console.WriteLine("Regular expression matched for this :" + column);
columnDuplicate = ToLongString(Double.Parse
(column,
System.Globalization.NumberStyles.Float));
//Console.WriteLine("Converted this no in scientific notation " +
// "" + column + " to this number " +
// columnDuplicate);
if (lineCount % 1000 == 0)
{
Console.WriteLine(string.Format("processed {0} lines. still going....", lineCount));
}
}
}
catch (Exception)
{
}
duplicateLine = duplicateLine + columnDuplicate;
if (counter != lengthOfColumns)
{
duplicateLine = duplicateLine + columnDelimiter.ToString();
}
counter++;
}
outputFile.WriteLine(duplicateLine);
lineCount++;
}
}
}
Console.WriteLine("The transformed file is here :" +
outputFilePathWithoutNumbersInScientificNotation);
Console.WriteLine(#"Hit any key to exit");
Console.ReadKey();
}
}
}
}
}
An easier way would be to save it as a .csv file. Then, instead of just opening the file - you go to the Data tab and select "from text" so that you can get the dialogue box. This way you can identify that column with the scientific notation as text to wipe out that format. Import the file and then you can reformat it to number or whatever you want.

how to extract Paragraph text color from ms word using apache poi

i am using apache POI , is it possible to read text background and foreground color from ms word paragraph
I got the solution
HWPFDocument doc = new HWPFDocument(fs);
WordExtractor we = new WordExtractor(doc);
Range range = doc.getRange();
String[] paragraphs = we.getParagraphText();
for (int i = 0; i < paragraphs.length; i++) {
org.apache.poi.hwpf.usermodel.Paragraph pr = range.getParagraph(i);
System.out.println(pr.getEndOffset());
int j=0;
while (true) {
CharacterRun run = pr.getCharacterRun(j++);
System.out.println("-------------------------------");
System.out.println("Color---"+ run.getColor());
System.out.println("getFontName---"+ run.getFontName());
System.out.println("getFontSize---"+ run.getFontSize());
if( run.getEndOffset()==pr.getEndOffset()){
break;
}
}
}
I found it in :
CharacterRun run = para.getCharacterRun(i)
i should be integer and should be incremented so the code will be as follow :
int c=0;
while (true) {
CharacterRun run = para.getCharacterRun(c++);
int x = run.getPicOffset();
System.out.println("pic offset" + x);
if (run.getEndOffset() == para.getEndOffset()) {
break;
}
}
if (paragraph != null)
{
int numberOfRuns = paragraph.NumCharacterRuns;
for (int runIndex = 0; runIndex < numberOfRuns; runIndex++)
{
CharacterRun run = paragraph.GetCharacterRun(runIndex);
string color = getColor24(run.GetIco24());
}
}
GetColor24 Function to Convert Color in Hex Format for C#
public static String getColor24(int argbValue)
{
if (argbValue == -1)
return "";
int bgrValue = argbValue & 0x00FFFFFF;
int rgbValue = (bgrValue & 0x0000FF) << 16 | (bgrValue & 0x00FF00)
| (bgrValue & 0xFF0000) >> 16;
StringBuilder result = new StringBuilder("#");
String hex = rgbValue.ToString("X");
for (int i = hex.Length; i < 6; i++)
{
result.Append('0');
}
result.Append(hex);
return result.ToString();
}
if you are working on docx(OOXML), you may want to take a look on this:
import java.io.*
import org.apache.poi.xwpf.usermodel.XWPFDocument
fun test(){
try {
val file = File("file.docx")
val fis = FileInputStream(file.absolutePath)
val document = XWPFDocument(fis)
val paragraphs = document.paragraphs
for (para in paragraphs) {
println("-- ("+para.alignment+") " + para.text)
para.runs.forEach { it ->
println(
"text:" + it.text() + " "
+ "(color:" + it.color
+ ",fontFamily:" + it.fontFamily
+ ")"
)
}
}
fis.close()
} catch (e: Exception) {
e.printStackTrace()
}
}

Resources