Reading images from all excel worksheets using Apache POI

Reading images from all excel worksheets using Apache POI - excel

I am trying to copy all the images from all the worksheets from an existing excel file to a new excel file. For this, I read all the sheets from the existing excel file and copy the images to the new excel file. The following code accesses all the sheets and tries to copy all the images to the new sheet.
public static void modifyExcelFile(XSSFWorkbook xssfWorkbook) throws InvalidFormatException, IOException, XmlException{
String newFileName = "outputexcelfile.xlsx";
XSSFWorkbook dest = new XSSFWorkbook("test.xlsx");
int numSheets = xssfWorkbook.getNumberOfSheets();
// clone the template sheet additional number of times required
for(int i=1; i<numSheets; i++){
dest.cloneSheet(0);
}
for(int i=0; i<numSheets; i++) {
XSSFSheet destSheet = dest.getSheetAt(i);
dest.setSheetName(i, xssfWorkbook.getSheetName(i));
XSSFSheet sheet = xssfWorkbook.getSheetAt(i);
// copy images
System.out.println("Copying images ...");
copyImages(sheet, destSheet);
}
writeFile(dest, newFileName);
}
test.xlsx is the template file with only one sheet. The following copyImages function tries to copy all the images from one sheet to the other.
public static void copyImages(XSSFSheet from, XSSFSheet to) throws IOException, XmlException, InvalidFormatException{
Drawing drawingPatriarch = to.createDrawingPatriarch();
XSSFWorkbook destWorkbook = to.getWorkbook();
// Add image
for (POIXMLDocumentPart pdp : from.getRelations()) {
if (!XSSFRelation.DRAWINGS.getRelation().equals(pdp.getPackageRelationship().getRelationshipType())) continue;
PackagePart drawPP = pdp.getPackagePart();
WsDrDocument draw = WsDrDocument.Factory.parse(drawPP.getInputStream());
for (CTTwoCellAnchor twoAnc : draw.getWsDr().getTwoCellAnchorList()) {
String picId = twoAnc.getPic().getBlipFill().getBlip().getEmbed();
PackageRelationship pr = drawPP.getRelationship(picId);
PackagePart imgPP = drawPP.getRelatedPart(pr);
System.out.println(imgPP.getPartName() + ": contentType: " + imgPP.getContentType() + " size: " + imgPP.getSize()
+ ": picId: " + picId
+" - Col1: "+twoAnc.getFrom().getCol()
+" - Row1: "+twoAnc.getFrom().getRow()
+" - Col2: "+twoAnc.getTo().getCol()
+" - Row2: "+twoAnc.getTo().getRow()
);
// skip the logo
if(twoAnc.getFrom().getCol()==0 && twoAnc.getFrom().getRow()==0)
continue;
try {
InputStream is = imgPP.getInputStream();
byte[] bytes = IOUtils.toByteArray(is);
int pictureIdx = destWorkbook.addPicture(bytes, Workbook.PICTURE_TYPE_PNG);
is.close();
CreationHelper helper = destWorkbook.getCreationHelper();
//add a picture shape
ClientAnchor anchor = helper.createClientAnchor();
//set top-left corner of the picture,
anchor.setCol1(twoAnc.getFrom().getCol());
anchor.setRow1(twoAnc.getFrom().getRow());
anchor.setRow2(twoAnc.getTo().getRow());
anchor.setCol2(twoAnc.getTo().getCol());
Picture pict = drawingPatriarch.createPicture(anchor, pictureIdx);
}catch(IOException ioEx){
System.out.println("Failed to add icons. Details: " + ioEx.getMessage());
}
}
}
}
But if I run the code, it only copies images from the first sheet successfully. For all other sheets, images are not there. The code runs successfully without any errors. Any help is greatly appreciated. Thanks!

Just adding pict.resize(); function call after the Picture pict=... fixes the issue. So the modified code looks like the following:
Picture pict = drawingPatriarch.createPicture(anchor, pictureIdx);
pict.resize();
Not sure why it doesn't work without the resize() call.

Related

Apache-poi how to unhide column upon creation of Excel file

I am trying to generate an Excel workbook which will be a template.
For now I am trying to generate a workbook with 1 sheet that holds only header cells with values, with certain Height and Width values. The problem is not that I cannot do it, but when I generate/create the .xlsx file the cells are hidden in a certain way and I have to click N(if there are 13 cells) times to display them all.
[Example of how Cells are hidden]
[1]: https://i.stack.imgur.com/xYh1P.png
And the way I want them to be displayed upon creation of the file is like this.
[Example of how I wish them to be displayed uppon creation]
[2]: https://i.stack.imgur.com/FWILw.png
The code is as follows
//creating workbook
private static void createWorkBookFile() throws IOException {
String filePath = "C:\\UltimateMapper\\UltimateMapperProject\\";
filePath+="\\WriteTestFiles";
System.out.print("Enter name of file: ");
String fileName = scan.nextLine();
filePath+="\\"+fileName+".xlsx";
XSSFWorkbook workbook = new XSSFWorkbook();
XSSFSheet currentSheet = workbook.createSheet("File to STG");
XSSFRow row;
XSSFCell cell;
//TO DO START FORM HERE FINISH AND THEN INSIDE LOOP
String[] fileToSTGCellValues_Names = new String[] {"Source Location_Schema","Source File_Table Name",
"Source_Field_Column Name","Start Pos","End Pos",
"Source Field Length","Source Field_Column Data Type",
"Transformation","Target Location_Schema","Target File_Table Name",
"Target Field_Column Name","Target Field_Column Data Type","Comments"};
HashMap<String,Integer> fileToSTGCellValues_Widths = new HashMap<String,Integer>();
fileToSTGCellValues_Widths.put("Source Location_Schema",26);
fileToSTGCellValues_Widths.put("Source File_Table Name",26);
fileToSTGCellValues_Widths.put("Source_Field_Column Name",35);
fileToSTGCellValues_Widths.put("Start Pos",10);
fileToSTGCellValues_Widths.put("End Pos",10);
fileToSTGCellValues_Widths.put("Source Field Length",18);
fileToSTGCellValues_Widths.put("Source Field_Column Data Type",21);
fileToSTGCellValues_Widths.put("Transformation",43);
fileToSTGCellValues_Widths.put("Target Location_Schema",18);
fileToSTGCellValues_Widths.put("Target File_Table Name",36);
fileToSTGCellValues_Widths.put("Target Field_Column Name",36);
fileToSTGCellValues_Widths.put("Target Field_Column Data Type",20);
fileToSTGCellValues_Widths.put("Comments",47);
int headeRowNumber = 0;
row = currentSheet.createRow(headeRowNumber);
row.setHeightInPoints(28.50f);
for(int i =0;i<13;i++) {
currentSheet.setColumnWidth(i, fileToSTGCellValues_Widths.get(fileToSTGCellValues_Names[i]));
cell = row.createCell(i);
cell.setCellValue(fileToSTGCellValues_Names[i]);
}
FileOutputStream fout = new FileOutputStream(filePath);
workbook.write(fout);
fout.close();
System.out.println("File created");
}```

Get Excel sheet names unsorted via delphi ADO [duplicate]

I'm using OleDb to read from an excel workbook with many sheets.
I need to read the sheet names, but I need them in the order they are defined in the spreadsheet; so If I have a file that looks like this;
|_____|_____|____|____|____|____|____|____|____|
|_____|_____|____|____|____|____|____|____|____|
|_____|_____|____|____|____|____|____|____|____|
\__GERMANY__/\__UK__/\__IRELAND__/
Then I need to get the dictionary
1="GERMANY",
2="UK",
3="IRELAND"
I've tried using OleDbConnection.GetOleDbSchemaTable(), and that gives me the list of names, but it alphabetically sorts them. The alpha-sort means I don't know which sheet number a particular name corresponds to. So I get;
GERMANY, IRELAND, UK
which has changed the order of UK and IRELAND.
The reason I need it to be sorted is that I have to let the user choose a range of data by name or index; they can ask for 'all the data from GERMANY to IRELAND' or 'data from sheet 1 to sheet 3'.
Any ideas would be greatly appreciated.
if I could use the office interop classes, this would be straightforward. Unfortunately, I can't because the interop classes don't work reliably in non-interactive environments such as windows services and ASP.NET sites, so I needed to use OLEDB.

Can you not just loop through the sheets from 0 to Count of names -1? that way you should get them in the correct order.
Edit
I noticed through the comments that there are a lot of concerns about using the Interop classes to retrieve the sheet names. Therefore here is an example using OLEDB to retrieve them:
/// <summary>
/// This method retrieves the excel sheet names from
/// an excel workbook.
/// </summary>
/// <param name="excelFile">The excel file.</param>
/// <returns>String[]</returns>
private String[] GetExcelSheetNames(string excelFile)
{
OleDbConnection objConn = null;
System.Data.DataTable dt = null;
try
{
// Connection String. Change the excel file to the file you
// will search.
String connString = "Provider=Microsoft.Jet.OLEDB.4.0;" +
"Data Source=" + excelFile + ";Extended Properties=Excel 8.0;";
// Create connection object by using the preceding connection string.
objConn = new OleDbConnection(connString);
// Open connection with the database.
objConn.Open();
// Get the data table containg the schema guid.
dt = objConn.GetOleDbSchemaTable(OleDbSchemaGuid.Tables, null);
if(dt == null)
{
return null;
}
String[] excelSheets = new String[dt.Rows.Count];
int i = 0;
// Add the sheet name to the string array.
foreach(DataRow row in dt.Rows)
{
excelSheets[i] = row["TABLE_NAME"].ToString();
i++;
}
// Loop through all of the sheets if you want too...
for(int j=0; j < excelSheets.Length; j++)
{
// Query each excel sheet.
}
return excelSheets;
}
catch(Exception ex)
{
return null;
}
finally
{
// Clean up.
if(objConn != null)
{
objConn.Close();
objConn.Dispose();
}
if(dt != null)
{
dt.Dispose();
}
}
}
Extracted from Article on the CodeProject.

Since above code do not cover procedures for extracting list of sheet name for Excel 2007,following code will be applicable for both Excel(97-2003) and Excel 2007 too:
public List<string> ListSheetInExcel(string filePath)
{
OleDbConnectionStringBuilder sbConnection = new OleDbConnectionStringBuilder();
String strExtendedProperties = String.Empty;
sbConnection.DataSource = filePath;
if (Path.GetExtension(filePath).Equals(".xls"))//for 97-03 Excel file
{
sbConnection.Provider = "Microsoft.Jet.OLEDB.4.0";
strExtendedProperties = "Excel 8.0;HDR=Yes;IMEX=1";//HDR=ColumnHeader,IMEX=InterMixed
}
else if (Path.GetExtension(filePath).Equals(".xlsx")) //for 2007 Excel file
{
sbConnection.Provider = "Microsoft.ACE.OLEDB.12.0";
strExtendedProperties = "Excel 12.0;HDR=Yes;IMEX=1";
}
sbConnection.Add("Extended Properties",strExtendedProperties);
List<string> listSheet = new List<string>();
using (OleDbConnection conn = new OleDbConnection(sbConnection.ToString()))
{
conn.Open();
DataTable dtSheet = conn.GetOleDbSchemaTable(OleDbSchemaGuid.Tables, null);
foreach (DataRow drSheet in dtSheet.Rows)
{
if (drSheet["TABLE_NAME"].ToString().Contains("$"))//checks whether row contains '_xlnm#_FilterDatabase' or sheet name(i.e. sheet name always ends with $ sign)
{
listSheet.Add(drSheet["TABLE_NAME"].ToString());
}
}
}
return listSheet;
}
Above function returns list of sheet in particular excel file for both excel type(97,2003,2007).

Can't find this in actual MSDN documentation, but a moderator in the forums said
I am afraid that OLEDB does not preserve the sheet order as they were in Excel
Excel Sheet Names in Sheet Order
Seems like this would be a common enough requirement that there would be a decent workaround.

This is short, fast, safe, and usable...
public static List<string> ToExcelsSheetList(string excelFilePath)
{
List<string> sheets = new List<string>();
using (OleDbConnection connection =
new OleDbConnection((excelFilePath.TrimEnd().ToLower().EndsWith("x"))
? "Provider=Microsoft.ACE.OLEDB.12.0;Data Source='" + excelFilePath + "';" + "Extended Properties='Excel 12.0 Xml;HDR=YES;'"
: "provider=Microsoft.Jet.OLEDB.4.0;Data Source='" + excelFilePath + "';Extended Properties=Excel 8.0;"))
{
connection.Open();
DataTable dt = connection.GetOleDbSchemaTable(OleDbSchemaGuid.Tables, null);
foreach (DataRow drSheet in dt.Rows)
if (drSheet["TABLE_NAME"].ToString().Contains("$"))
{
string s = drSheet["TABLE_NAME"].ToString();
sheets.Add(s.StartsWith("'")?s.Substring(1, s.Length - 3): s.Substring(0, s.Length - 1));
}
connection.Close();
}
return sheets;
}

Another way:
a xls(x) file is just a collection of *.xml files stored in a *.zip container.
unzip the file "app.xml" in the folder docProps.
<?xml version="1.0" encoding="UTF-8" standalone="true"?>
-<Properties xmlns:vt="http://schemas.openxmlformats.org/officeDocument/2006/docPropsVTypes" xmlns="http://schemas.openxmlformats.org/officeDocument/2006/extended-properties">
<TotalTime>0</TotalTime>
<Application>Microsoft Excel</Application>
<DocSecurity>0</DocSecurity>
<ScaleCrop>false</ScaleCrop>
-<HeadingPairs>
-<vt:vector baseType="variant" size="2">
-<vt:variant>
<vt:lpstr>Arbeitsblätter</vt:lpstr>
</vt:variant>
-<vt:variant>
<vt:i4>4</vt:i4>
</vt:variant>
</vt:vector>
</HeadingPairs>
-<TitlesOfParts>
-<vt:vector baseType="lpstr" size="4">
<vt:lpstr>Tabelle3</vt:lpstr>
<vt:lpstr>Tabelle4</vt:lpstr>
<vt:lpstr>Tabelle1</vt:lpstr>
<vt:lpstr>Tabelle2</vt:lpstr>
</vt:vector>
</TitlesOfParts>
<Company/>
<LinksUpToDate>false</LinksUpToDate>
<SharedDoc>false</SharedDoc>
<HyperlinksChanged>false</HyperlinksChanged>
<AppVersion>14.0300</AppVersion>
</Properties>
The file is a german file (Arbeitsblätter = worksheets).
The table names (Tabelle3 etc) are in the correct order. You just need to read these tags;)
regards

I have created the below function using the information provided in the answer from #kraeppy (https://stackoverflow.com/a/19930386/2617732). This requires the .net framework v4.5 to be used and requires a reference to System.IO.Compression. This only works for xlsx files and not for the older xls files.
using System.IO.Compression;
using System.Xml;
using System.Xml.Linq;
static IEnumerable<string> GetWorksheetNamesOrdered(string fileName)
{
//open the excel file
using (FileStream data = new FileStream(fileName, FileMode.Open))
{
//unzip
ZipArchive archive = new ZipArchive(data);
//select the correct file from the archive
ZipArchiveEntry appxmlFile = archive.Entries.SingleOrDefault(e => e.FullName == "docProps/app.xml");
//read the xml
XDocument xdoc = XDocument.Load(appxmlFile.Open());
//find the titles element
XElement titlesElement = xdoc.Descendants().Where(e => e.Name.LocalName == "TitlesOfParts").Single();
//extract the worksheet names
return titlesElement
.Elements().Where(e => e.Name.LocalName == "vector").Single()
.Elements().Where(e => e.Name.LocalName == "lpstr")
.Select(e => e.Value);
}
}

I like the idea of #deathApril to name the sheets as 1_Germany, 2_UK, 3_IRELAND. I also got your issue to do this rename for hundreds of sheets. If you don't have a problem to rename the sheet name then you can use this macro to do it for you. It will take less than seconds to rename all sheet names. unfortunately ODBC, OLEDB return the sheet name order by asc. There is no replacement for that. You have to either use COM or rename your name to be in the order.
Sub Macro1()
'
' Macro1 Macro
'
'
Dim i As Integer
For i = 1 To Sheets.Count
Dim prefix As String
prefix = i
If Len(prefix) < 4 Then
prefix = "000"
ElseIf Len(prefix) < 3 Then
prefix = "00"
ElseIf Len(prefix) < 2 Then
prefix = "0"
End If
Dim sheetName As String
sheetName = Sheets(i).Name
Dim names
names = Split(sheetName, "-")
If (UBound(names) > 0) And IsNumeric(names(0)) Then
'do nothing
Else
Sheets(i).Name = prefix & i & "-" & Sheets(i).Name
End If
Next
End Sub
UPDATE:
After reading #SidHoland comment regarding BIFF an idea flashed. The following steps can be done through code. Don't know if you really want to do that to get the sheet names in the same order. Let me know if you need help to do this through code.
1. Consider XLSX as a zip file. Rename *.xlsx into *.zip
2. Unzip
3. Go to unzipped folder root and open /docprops/app.xml
4. This xml contains the sheet name in the same order of what you see.
5. Parse the xml and get the sheet names
UPDATE:
Another solution - NPOI might be helpful here
http://npoi.codeplex.com/
FileStream file = new FileStream(#"yourexcelfilename", FileMode.Open, FileAccess.Read);
HSSFWorkbook hssfworkbook = new HSSFWorkbook(file);
for (int i = 0; i < hssfworkbook.NumberOfSheets; i++)
{
Console.WriteLine(hssfworkbook.GetSheetName(i));
}
file.Close();
This solution works for xls. I didn't try xlsx.
Thanks,
Esen

This worked for me. Stolen from here: How do you get the name of the first page of an excel workbook?
object opt = System.Reflection.Missing.Value;
Excel.Application app = new Microsoft.Office.Interop.Excel.Application();
Excel.Workbook workbook = app.Workbooks.Open(WorkBookToOpen,
opt, opt, opt, opt, opt, opt, opt,
opt, opt, opt, opt, opt, opt, opt);
Excel.Worksheet worksheet = workbook.Worksheets[1] as Microsoft.Office.Interop.Excel.Worksheet;
string firstSheetName = worksheet.Name;

Try this. Here is the code to get the sheet names in order.
private Dictionary<int, string> GetExcelSheetNames(string fileName)
{
Excel.Application _excel = null;
Excel.Workbook _workBook = null;
Dictionary<int, string> excelSheets = new Dictionary<int, string>();
try
{
object missing = Type.Missing;
object readOnly = true;
Excel.XlFileFormat.xlWorkbookNormal
_excel = new Excel.ApplicationClass();
_excel.Visible = false;
_workBook = _excel.Workbooks.Open(fileName, 0, readOnly, 5, missing,
missing, true, Excel.XlPlatform.xlWindows, "\\t", false, false, 0, true, true, missing);
if (_workBook != null)
{
int index = 0;
foreach (Excel.Worksheet sheet in _workBook.Sheets)
{
// Can get sheet names in order they are in workbook
excelSheets.Add(++index, sheet.Name);
}
}
}
catch (Exception e)
{
return null;
}
finally
{
if (_excel != null)
{
if (_workBook != null)
_workBook.Close(false, Type.Missing, Type.Missing);
_excel.Application.Quit();
}
_excel = null;
_workBook = null;
}
return excelSheets;
}

As per MSDN, In a case of spreadsheets inside of Excel it might not work because Excel files are not real databases. So you will be not able to get the sheets name in order of their visualization in workbook.
Code to get sheets name as per their visual appearance using interop:
Add reference to Microsoft Excel 12.0 Object Library.
Following code will give the sheets name in the actual order stored in workbook, not the sorted name.
Sample Code:
using Microsoft.Office.Interop.Excel;
string filename = "C:\\romil.xlsx";
object missing = System.Reflection.Missing.Value;
Microsoft.Office.Interop.Excel.Application excel = new Microsoft.Office.Interop.Excel.Application();
Microsoft.Office.Interop.Excel.Workbook wb =excel.Workbooks.Open(filename, missing, missing, missing, missing,missing, missing, missing, missing, missing, missing, missing, missing, missing, missing);
ArrayList sheetname = new ArrayList();
foreach (Microsoft.Office.Interop.Excel.Worksheet sheet in wb.Sheets)
{
sheetname.Add(sheet.Name);
}

I don't see any documentation that says the order in app.xml is guaranteed to be the order of the sheets. It PROBABLY is, but not according to the OOXML specification.
The workbook.xml file, on the other hand, includes the sheetId attribute, which does determine the sequence - from 1 to the number of sheets. This is according to the OOXML specification. workbook.xml is described as the place where the sequence of the sheets is kept.
So reading workbook.xml after it is extracted form the XLSX would be my recommendation. NOT app.xml. Instead of docProps/app.xml, use xl/workbook.xml and look at the element, as shown here -
`
<workbook xmlns="http://schemas.openxmlformats.org/spreadsheetml/2006/main" xmlns:r="http://schemas.openxmlformats.org/officeDocument/2006/relationships">
<fileVersion appName="xl" lastEdited="5" lowestEdited="5" rupBuild="9303" />
<workbookPr defaultThemeVersion="124226" />
- <bookViews>
<workbookView xWindow="120" yWindow="135" windowWidth="19035" windowHeight="8445" />
</bookViews>
- <sheets>
<sheet name="By song" sheetId="1" r:id="rId1" />
<sheet name="By actors" sheetId="2" r:id="rId2" />
<sheet name="By pit" sheetId="3" r:id="rId3" />
</sheets>
- <definedNames>
<definedName name="_xlnm._FilterDatabase" localSheetId="0" hidden="1">'By song'!$A$1:$O$59</definedName>
</definedNames>
<calcPr calcId="145621" />
</workbook>
`

EPPlus corrupt Excel file when having more than 65,530 rows

I'm running into an issue with EPPlus when there are more than 65,530 rows that have a column with a hyperlink. The example below is configured to create 65,530 rows. With this number it will create the Excel file correctly (not corrupt). Once you run it with anything over 65,530, the Excel file will be created but when you open it, Excel will report that is corrupt. Any ideas how to solve this issue?
try
{
int maxRowsToCreate = 65530; //-- no errors will be generated
//int maxRowsToCreate = 65531; //-- error will be generated. The Excel file will be created but will give an error when trying to open it.
string report = string.Format("D:\\temp\\hypelinkIssue-{0}.xlsx", maxRowsToCreate.ToString());
if (File.Exists(report))
{
File.Delete(report);
}
using (ExcelPackage pck = new ExcelPackage(new System.IO.FileInfo(report)))
{
//Add the Content sheet
var ws = pck.Workbook.Worksheets.Add("Catalog");
ws.View.ShowGridLines = true;
var namedStyle = pck.Workbook.Styles.CreateNamedStyle("HyperLink"); //This one is language dependent
namedStyle.Style.Font.UnderLine = true;
namedStyle.Style.Font.Color.SetColor(Color.Blue);
ws.Column(1).Width = 100;
int rowIndex = 0;
for (int i = 0; i < maxRowsToCreate; i++)
{
rowIndex += 1;
string fullFilePath = string.Format("D:\\temp\\{0}", Path.GetRandomFileName());
ws.Cells[rowIndex, 1].StyleName = "HyperLink";
ws.Cells[rowIndex, 1].Hyperlink = new Uri(string.Format(#"file:///{0}", fullFilePath));
ws.Cells[rowIndex, 1].Value = fullFilePath;
}
pck.Save();
}
System.Diagnostics.Process.Start(report);
}
catch (Exception ex)
{
throw ex;
}

The issue occurs when using ".Hyperlink". If instead I use ".Formula" and populate it with the "=HYPERLINK" Excel formula, it works fine. I was able to create 250k records with unique hyperlink using this approach. I did not try more than 250k but hopefully it will work fine.
Thanks for pointing me in the right direction.
/*
This only works with LESS than 65,530 hyperlinks
*/
ws.Cells[rowIndex, 1].StyleName = "HyperLink";
ws.Cells[rowIndex, 1].Hyperlink = new OfficeOpenXml.ExcelHyperLink(fullFilePath, ExcelHyperLink.UriSchemeFile);
ws.Cells[rowIndex, 1].Value = fullFilePath;
/*
This works with more that 65,530 hyperlinks
*/
string cellFormula = string.Format("=HYPERLINK(\"{0}\")", filePath);
ws.Cells[rowIndex, 1].Formula = cellFormula;

This is because Excel limits the amount of unique URLs in a file to 65,530. You should try to insert them as text, instead of a url.
For a possible solution, take a look at this answer.

How to fix "The supplied data appears to be in the Office 2007+ XML."

This is my code that extracts value from an xlsx file and print it on Eclipse console
public class testcode {
public void readexcel(String filepath, String filename, String sheetname) throws IOException
{
//Create an object of file class to open xlsx file
File file = new File(filepath+"\\"+filename);
//Create an object of FileInputStream to read an xlsx file
FileInputStream inputstream = new FileInputStream(file);
Workbook workbook = null;
//Find file extension name by using substring
String FileExtensionName = filename.substring(filename.indexOf("."));
//Check condition whether file is xlsx or xls
if(FileExtensionName.equalsIgnoreCase("xlsx"))
workbook = new XSSFWorkbook(inputstream);
else
workbook = new HSSFWorkbook(inputstream);
//Read sheet inside the workbook by its name
Sheet sheet = workbook.getSheet(sheetname);
//Find number of rows in sheet
int rowCount = sheet.getLastRowNum() - sheet.getFirstRowNum();
//Create a loop over all the rows of excel file to read it
for(int i = 0; i<rowCount+1;i++)
{
Row row = sheet.getRow(i);
//Create loop to print cell values in a row
for(int j = 0; j<row.getLastCellNum();j++)
{
//Print excel value in console System.out.println(row.getCell(j).getStringCellValue()+"||");
}
System.out.println();
}
}
public static void main(String[] args) throws IOException {
testcode objExcelFile = new testcode();
//Prepare path of excel file
String filepath = "C:\\Users\\malfoy\\Desktop";
objExcelFile.readexcel(filepath,"testfile.xlsx", "read");
}
}
I am using office 2007 edition and I am getting an exception which says
"The supplied data appears to be in the Office 2007+ XML. You are calling the part of POI that deals with OLE2 Office Documents. You need to call a different part of POI to process this data (eg XSSF instead of HSSF)"
How to fix it?

The line
String FileExtensionName = filename.substring(filename.indexOf("."));
returns a value with the dot (in your case ".xlsx")
So the following if statement returns a HSSFWorkbook instance instead of XSSFWorkbook.
To correct it use
String FileExtensionName = filename.substring(filename.lastIndexOf(".")+1);

dropdown Validation not working if it exceeds 50 rows in the Export To Excel

I am generating Excel File(.xlsx) using apache poi jar (poi-ooxml-3.9.jar), I added dropdown validation for 10 columns in my excel file, If I generate the Excel File with 50 rows, drop down validation is working. If it exceeds more than 50 rows, drop down validation is not coming in the Excel File, When I open the excel File I get the message as "We found a problem with some content in fileName.xlsx. Do you want us to try to recover as much as we can ? If you trust the source of this workbook, click Yes ". when click on Yes, all the dropdown validation it is removing. Kindly need solution to fix this issue.

Do not create DataValidationConstraint for each single cell but only for each varying list you need. Then create DataValidation using those DataValidationConstraint for continuous CellRangeAddressList which are as big as possible and also are not all single cells.
Example creates ten different list validations for column 1 to 10 in rows 1 to 10000.
import java.io.*;
import org.apache.poi.ss.usermodel.*;
import org.apache.poi.xssf.usermodel.*;
import org.apache.poi.hssf.usermodel.*;
import org.apache.poi.ss.util.CellRangeAddressList;
class DataValidationList {
public static void main(String[] args) throws Exception {
Workbook workbook = new XSSFWorkbook(); // or new HSSFWorkbook
Sheet sheet = workbook.createSheet("Data Validation");
DataValidationHelper dvHelper = sheet.getDataValidationHelper();
for (int col = 0; col < 10; col++) {
DataValidationConstraint dvConstraint = dvHelper.createExplicitListConstraint(
new String[]{"Col "+(col+1)+" one","Col "+(col+1)+" two","Col "+(col+1)+" three"});
CellRangeAddressList addressList = new CellRangeAddressList(0, 9999, 0, col);
DataValidation validation = dvHelper.createValidation(
dvConstraint, addressList);
if(validation instanceof XSSFDataValidation) {
validation.setSuppressDropDownArrow(true);
validation.setShowErrorBox(true);
}
else {
validation.setSuppressDropDownArrow(false);
}
sheet.addValidationData(validation);
}
String filename;
if(workbook instanceof XSSFWorkbook) {
filename = "DataValidationList.xlsx";
} else {
filename = "DataValidationList.xls";
}
FileOutputStream out = new FileOutputStream(filename);
workbook.write(out);
out.close();
workbook.close();
}
}

Develop Reference

node.js excel linux python-3.x azure haskell apache-spark rust .htaccess string

Reading images from all excel worksheets using Apache POI - excel

Just adding pict.resize(); function call after the Picture pict=... fixes the issue. So the modified code looks like the following: Picture pict = drawingPatriarch.createPicture(anchor, pictureIdx); pict.resize(); Not sure why it doesn't work without the resize() call.

Related

Apache-poi how to unhide column upon creation of Excel file

Get Excel sheet names unsorted via delphi ADO [duplicate]

EPPlus corrupt Excel file when having more than 65,530 rows

How to fix "The supplied data appears to be in the Office 2007+ XML."

dropdown Validation not working if it exceeds 50 rows in the Export To Excel

Categories

Resources