/* AbiWord * Copyright (C) 1998 AbiSource, Inc. * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License * as published by the Free Software Foundation; either version 2 * of the License, or (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA * 02111-1307, USA. */ #include #include #include "ut_types.h" #include "ut_assert.h" #include "ut_debugmsg.h" #include "ut_string.h" #include "ie_imp_UCS2.h" #include "pd_Document.h" #include "ut_growbuf.h" /*****************************************************************/ /*****************************************************************/ #ifdef ENABLE_PLUGINS // completely generic code to allow this to be a plugin #include "xap_Module.h" #define SUPPORTS_ABI_VERSION(a,b,c) (((a==0)&&(b==7)&&(c==15)) ? 1 : 0) // we use a reference-counted sniffer static IE_Imp_UCS2_Sniffer * m_sniffer = 0; static UT_sint32 m_refs = 0; ABI_FAR extern "C" int abi_plugin_register (XAP_ModuleInfo * mi) { if (!m_refs && !m_sniffer) { m_sniffer = new IE_Imp_UCS2_Sniffer (); m_refs++; } else if (m_refs && m_sniffer) { m_refs++; } else { UT_ASSERT(UT_SHOULD_NOT_HAPPEN); } mi->name = "UCS2 Importer"; mi->desc = "Import UCS2 Documents"; mi->version = "0.7.15"; mi->author = "Andrew Dunbar"; mi->usage = "No Usage"; IE_Imp::registerImporter (m_sniffer); return 1; } ABI_FAR extern "C" int abi_plugin_unregister (XAP_ModuleInfo * mi) { mi->name = 0; mi->desc = 0; mi->version = 0; mi->author = 0; mi->usage = 0; UT_ASSERT (m_refs && m_sniffer); m_refs--; IE_Imp::unregisterImporter (m_sniffer); if (!m_refs) { delete m_sniffer; m_sniffer = 0; } return 1; } ABI_FAR extern "C" int abi_plugin_supports_version (UT_uint32 major, UT_uint32 minor, UT_uint32 release) { return SUPPORTS_ABI_VERSION(major, minor, release); } #endif /*****************************************************************/ /*****************************************************************/ bool IE_Imp_UCS2_Sniffer::recognizeContents(const char * szBuf, UT_uint32 iNumbytes) { bool bSuccess = false; if (iNumbytes >= 2) { const unsigned char *p = reinterpret_cast(szBuf); if (p[0] == 0xfe && p[1] == 0xff) { // Big endian UT_DEBUGMSG(("BOM big endian\n")); bSuccess = true; } else if (p[0] == 0xff && p[1] == 0xfe) { // Little endian UT_DEBUGMSG(("BOM little endian\n")); bSuccess = true; } } return bSuccess; } bool IE_Imp_UCS2_Sniffer::recognizeSuffix(const char * szSuffix) { return (UT_stricmp(szSuffix,".ucs2") == 0); } UT_Error IE_Imp_UCS2_Sniffer::constructImporter(PD_Document * pDocument, IE_Imp ** ppie) { IE_Imp_UCS2 * p = new IE_Imp_UCS2(pDocument); *ppie = p; return UT_OK; } bool IE_Imp_UCS2_Sniffer::getDlgLabels(const char ** pszDesc, const char ** pszSuffixList, IEFileType * ft) { *pszDesc = "UCS2 (.ucs2)"; *pszSuffixList = "*.ucs2"; *ft = getFileType(); return true; } /*****************************************************************/ /*****************************************************************/ /* Import a plain text file formatted in UCS2. We allow either LF or CR or CRLF line termination. Each line terminator is taken to be a paragraph break. */ /*****************************************************************/ /*****************************************************************/ #define X_CleanupIfError(error,exp) do { if (((error)=(exp)) != UT_OK) goto Cleanup; } while (0) UT_Error IE_Imp_UCS2::importFile(const char * szFilename) { FILE *fp = fopen(szFilename, "r"); if (!fp) { UT_DEBUGMSG(("Could not open file %s\n",szFilename)); return UT_IE_FILENOTFOUND; } UT_Error error; X_CleanupIfError(error,_writeHeader(fp)); X_CleanupIfError(error,_parseFile(fp)); error = UT_OK; Cleanup: fclose(fp); return error; } #undef X_CleanupIfError /*****************************************************************/ /*****************************************************************/ IE_Imp_UCS2::~IE_Imp_UCS2() { } IE_Imp_UCS2::IE_Imp_UCS2(PD_Document * pDocument) : IE_Imp(pDocument) { } /*****************************************************************/ /*****************************************************************/ #define X_ReturnIfFail(exp,error) do { bool b = (exp); if (!b) return (error); } while (0) #define X_ReturnNoMemIfError(exp) X_ReturnIfFail(exp,UT_IE_NOMEMORY) UT_Error IE_Imp_UCS2::_writeHeader(FILE * /* fp */) { X_ReturnNoMemIfError(m_pDocument->appendStrux(PTX_Section, NULL)); return UT_OK; } UT_Error IE_Imp_UCS2::_parseFile(FILE * fp) { UT_GrowBuf gbBlock(1024); bool bEatLF = false; bool bEmptyFile = true; unsigned char b1, b2; UT_UCSChar c; bool bBigEndian = false; // TODO Without BOMs should this be platform-dependant? while (1) { if (fread(&b1, 1, sizeof(b1), fp) > 0) { if (fread(&b2, 1, sizeof(b2), fp) > 0) { // Handle BOM if (b1 == 0xfe && b2 == 0xff) { bBigEndian = true; continue; } else if (b1 == 0xff && b2 == 0xfe) { bBigEndian = false; continue; } else { if (bBigEndian) { c = b1; c <<= 8; c |= b2; } else { c = b2; c <<= 8; c |= b1; } } } else { // Handle half character missing c = (UT_UCSChar)0xfffd; } } else { // TODO // Handle no more bytes break; } switch (c) { case '\r': case '\n': if ((c == '\n') && bEatLF) { bEatLF = false; break; } if (c == '\r') { bEatLF = true; } // we interprete either CRLF, CR, or LF as a paragraph break. // start a paragraph and emit any text that we // have accumulated. X_ReturnNoMemIfError(m_pDocument->appendStrux(PTX_Block, NULL)); bEmptyFile = false; if (gbBlock.getLength() > 0) { X_ReturnNoMemIfError(m_pDocument->appendSpan(gbBlock.getPointer(0), gbBlock.getLength())); gbBlock.truncate(0); } break; default: bEatLF = false; X_ReturnNoMemIfError(gbBlock.ins(gbBlock.getLength(),&c,1)); break; } } if (gbBlock.getLength() > 0 || bEmptyFile) { // if we have text left over (without final CR/LF), // or if we read an empty file, // create a paragraph and emit the text now. X_ReturnNoMemIfError(m_pDocument->appendStrux(PTX_Block, NULL)); if (gbBlock.getLength() > 0) X_ReturnNoMemIfError(m_pDocument->appendSpan(gbBlock.getPointer(0), gbBlock.getLength())); } return UT_OK; } #undef X_ReturnNoMemIfError #undef X_ReturnIfFail /*****************************************************************/ /*****************************************************************/ void IE_Imp_UCS2::pasteFromBuffer(PD_DocumentRange * pDocRange, unsigned char * pData, UT_uint32 lenData) { }