Convert xls to xlsx and xml in C #

In this article I will try to talk about one of the possible ways to convert xls to xml documents .


Directly, my task looked something like this:


Given:


  1. 1-2 days of time to find and implement a solution
  2. The solution should be as free as possible.
  3. The approximate structure, size and content of the file that must be processed is known, which eliminates the need to read graphs, charts, and other medians from the document.
  4. Use of Interop , as well as other libraries, which require the installation of Office can not, because it creates an extra load on the server and requires no additional installation of the Office suite.

I must say that this task is by no means an easy one, and to solve it, just as it was done with the new office formats (docx and xlsx), will fail for the following reasons:


  • First, Microsoft does not offer any solutions for reading these formats ( we don’t take into account Interopt , since its use requires installing an office suite on the server, and reading data from one file will consume unreasonably much RAM).
  • Secondly, all information in xls documents (as well as in doc) is stored in binary form and is not available for free reading.
  • Thirdly, there is practically no information on the Internet about libraries that would solve the problem of reading information from old excel and word formats, as well as the task of converting old formats to new ones.

stackoverflow, , , .Net , xls xlsx, xlsx .


, Microsoft, , , , . , , ( ), , , . , , , , , !


Tony Qu , . , .Net, ( .Net doc).
, , , : NPOI - , . , , .


, , , xls xlsx, , .




  1. nuget NPOI ( nuget : Tony Qu, netcore β€” . , , ):
  2. XlsToXlsx public MemoryStream Convert(Stream sourceStream):
    public MemoryStream Convert(Stream sourceStream)
    {
    //  xls
    var source = new HSSFWorkbook(sourceStream);
    //     xlsx
    var destination = new XSSFWorkbook();
    //    xls    xlsx
    for (int i = 0; i < source.NumberOfSheets; i++)
    {
        var xssfSheet = (XSSFSheet)destination.CreateSheet(source.GetSheetAt(i).SheetName);
        var hssfSheet = (HSSFSheet)source.GetSheetAt(i);
        CopyStyles(hssfSheet, xssfSheet);
        CopySheet(hssfSheet, xssfSheet);
    }
    //   
    using (var ms = new MemoryStream())
    {
        destination.Write(ms);
        return ms;
    }
    }

, . , , (, , ).
xls NPOI HSSFWorkbook, , .
xlsx XSSFWorkbook.
, xls xlsx. , . , xssfSheet HSSFSheet.
, , .
MemoryStream


,


private void CopyStyles(HSSFSheet from, XSSFSheet to)
{
    for (short i = 0; i <= from.Workbook.NumberOfFonts; i++)
    {
        CopyFont(to.Workbook.CreateFont(), from.Workbook.GetFontAt(i));
    }

    for (short i = 0; i < from.Workbook.NumCellStyles; i++)
    {
        CopyStyle(to.Workbook.CreateCellStyle(), from.Workbook.GetCellStyleAt(i), to.Workbook, from.Workbook);
    }
}

, , , , .


void CopyFont(IFont toFront, IFont fontFrom)


private void CopyFont(IFont toFront, IFont fontFrom)
{
    toFront.Boldweight = fontFrom.Boldweight;
    toFront.Charset = fontFrom.Charset;
    toFront.Color = fontFrom.Color;
    toFront.FontHeightInPoints = fontFrom.FontHeightInPoints;
    toFront.FontName = fontFrom.FontName;
    toFront.IsBold = fontFrom.IsBold;
    toFront.IsItalic = fontFrom.IsItalic;
    toFront.IsStrikeout = fontFrom.IsStrikeout;
}

, , β€” .
:


private void CopyStyle(ICellStyle toCellStyle, ICellStyle fromCellStyle, IWorkbook toWorkbook, IWorkbook fromWorkbook)
{
    toCellStyle.Alignment = fromCellStyle.Alignment;
    toCellStyle.BorderBottom = fromCellStyle.BorderBottom;
    toCellStyle.BorderDiagonal = fromCellStyle.BorderDiagonal;
    toCellStyle.BorderDiagonalColor = fromCellStyle.BorderDiagonalColor;
    toCellStyle.BorderDiagonalLineStyle = fromCellStyle.BorderDiagonalLineStyle;
    toCellStyle.BorderLeft = fromCellStyle.BorderLeft;
    toCellStyle.BorderRight = fromCellStyle.BorderRight;
    toCellStyle.BorderTop = fromCellStyle.BorderTop;
    toCellStyle.BottomBorderColor = fromCellStyle.BottomBorderColor;
    toCellStyle.DataFormat = fromCellStyle.DataFormat;
    toCellStyle.FillBackgroundColor = fromCellStyle.FillBackgroundColor;
    toCellStyle.FillForegroundColor = fromCellStyle.FillForegroundColor;
    toCellStyle.FillPattern = fromCellStyle.FillPattern;
    toCellStyle.Indention = fromCellStyle.Indention;
    toCellStyle.IsHidden = fromCellStyle.IsHidden;
    toCellStyle.IsLocked = fromCellStyle.IsLocked;
    toCellStyle.LeftBorderColor = fromCellStyle.LeftBorderColor;
    toCellStyle.RightBorderColor = fromCellStyle.RightBorderColor;
    toCellStyle.Rotation = fromCellStyle.Rotation;
    toCellStyle.ShrinkToFit = fromCellStyle.ShrinkToFit;
    toCellStyle.TopBorderColor = fromCellStyle.TopBorderColor;
    toCellStyle.VerticalAlignment = fromCellStyle.VerticalAlignment;
    toCellStyle.WrapText = fromCellStyle.WrapText;
    toCellStyle.SetFont(toWorkbook.GetFontAt((short)(fromCellStyle.GetFont(fromWorkbook).Index + 1)));
}

, , . , , SetFont, .


xls xlsx


xls xlsx.


private void CopySheet(HSSFSheet source, XSSFSheet destination)
{
    var maxColumnNum = 0;
    var mergedRegions = new List<CellRangeAddress>();
    for (int i = source.FirstRowNum; i <= source.LastRowNum; i++)
    {
        var srcRow = (HSSFRow)source.GetRow(i);
        var destRow = (XSSFRow)destination.CreateRow(i);
        if (srcRow != null)
        {
            CopyRow(source, destination, srcRow, destRow, mergedRegions);
            //      
            if (srcRow.LastCellNum > maxColumnNum)
            {
                maxColumnNum = srcRow.LastCellNum;
            }
        }
    }
    //     
    for (int i = 0; i <= maxColumnNum; i++)
    {
        destination.SetColumnWidth(i, source.GetColumnWidth(i));
    }
}

CopyRow(source, destination, srcRow, destRow, mergedRegions) β€” . , CopySheet . , (, xls xlsx ).


void CopyRow(HSSFSheet srcSheet, XSSFSheet destSheet, HSSFRow srcRow, XSSFRow destRow, List<CellRangeAddress> mergedRegions):


private void CopyRow(HSSFSheet srcSheet, XSSFSheet destSheet, HSSFRow srcRow, XSSFRow destRow, List<CellRangeAddress> mergedRegions)
{
    //   
    destRow.Height = srcRow.Height;
    for (int j = srcRow.FirstCellNum; srcRow.LastCellNum >= 0 && j <= srcRow.LastCellNum; j++)
    {
        var oldCell = (HSSFCell)srcRow.GetCell(j);
        var newCell = (XSSFCell)destRow.GetCell(j);
        if (oldCell != null)
        {
            //      
            if (newCell == null)
            {
                newCell = (XSSFCell)destRow.CreateCell(j);
            }
            CopyCell(oldCell, newCell);
            //     
            //        
            var mergedRegion = GetMergedRegion(srcSheet, srcRow.RowNum,
                    (short)oldCell.ColumnIndex);
            //    
            if (mergedRegion != null)
            {
                //          
                var newMergedRegion = new CellRangeAddress(mergedRegion.FirstRow,
                        mergedRegion.LastRow, mergedRegion.FirstColumn, mergedRegion.LastColumn);
                //   ,         
                if (IsNewMergedRegion(newMergedRegion, mergedRegions))
                {
                    mergedRegions.Add(newMergedRegion);
                    destSheet.AddMergedRegion(newMergedRegion);
                }
            }
        }
    }
}

:


  • -, "" xls "" xlsx
  • -, , . , , , .

:


private void CopyCell(HSSFCell oldCell, XSSFCell newCell)
{
    CopyCellStyle(oldCell, newCell);
    CopyCellValue(oldCell, newCell);
}

:


  • -, :
    private void CopyCellStyle(HSSFCell oldCell, XSSFCell newCell)
    {
    if (oldCell.CellStyle == null)
        return;
    newCell.CellStyle = newCell.Sheet.Workbook.GetCellStyleAt((short)(oldCell.CellStyle.Index + 1));
    }
  • -, :
    void CopyCellValue(HSSFCell oldCell, XSSFCell newCell)
    {
    switch (oldCell.CellType)
    {
        case CellType.String:
            newCell.SetCellValue(oldCell.StringCellValue);
            break;
        case CellType.Numeric:
            newCell.SetCellValue(oldCell.NumericCellValue);
            break;
        case CellType.Blank:
            newCell.SetCellType(CellType.Blank);
            break;
        case CellType.Boolean:
            newCell.SetCellValue(oldCell.BooleanCellValue);
            break;
        case CellType.Error:
            newCell.SetCellErrorValue(oldCell.ErrorCellValue);
            break;
        case CellType.Formula:
            newCell.SetCellFormula(oldCell.CellFormula);
            break;
        default:
            break;
    }
    }

, , (, ).



, .
, CellRangeAddress GetMergedRegion(HSSFSheet sheet, int rowNum, short cellNum) bool IsNewMergedRegion(CellRangeAddress newMergedRegion, List<CellRangeAddress> mergedRegions):


CellRangeAddress GetMergedRegion(HSSFSheet sheet, int rowNum, short cellNum)
{
for (var i = 0; i < sheet.NumMergedRegions; i++)
{
    var merged = sheet.GetMergedRegion(i);
    if (merged.IsInRange(rowNum, cellNum))
    {
        return merged;
    }
}
return null;
}

, , , - . , , . , null.


bool IsNewMergedRegion(CellRangeAddress newMergedRegion,List<CellRangeAddress> mergedRegions)
{
    return !mergedRegions.Any(r =>
    r.FirstColumn == newMergedRegion.FirstColumn &&
    r.LastColumn == newMergedRegion.LastColumn &&
    r.FirstRow == newMergedRegion.FirstRow &&
    r.LastRow == newMergedRegion.LastRow);
}

, , . , 4- ( , ).


xls xlsx .


, stackoverflow, xlsx xls, stackoverflow.



:



  • public void ConvertToXlsxFile(MemoryStream stream, string path)
    {
    var result = Convert(stream);
    using (FileStream fs = new FileStream(path, FileMode.OpenOrCreate))
    {
        fs.Write(result.ToArray());
    }
    }
  • :
    public void ConvertToXlsxFile(string xlsPath, string destPath)
    {
    MemoryStream result;
    using (FileStream fs = new FileStream(xlsPath, FileMode.Open))
    {
        result = Convert(fs);
    }
    using (FileStream fs = new FileStream(destPath, FileMode.OpenOrCreate))
    {
        fs.Write(result.ToArray());
    }
    }

, , xml:


public class XlsToXml : IConvertable
{
public string Convert(Stream stream)
{
    XlsToXlsx excelConvert = new XlsToXlsx();
    Stream str = excelConvert.Convert(stream);
    XlsxToXml converter = new XlsToXml();
    return converter.Convert(str);
}
public string ConvertByFile(string path)
{
    using (FileStream fs = File.OpenRead(path))
    {
        return Convert(fs);
    }
}
}

, XlsxToXml, xlsx xml, .



All Articles