M
mickeydisn
Sub: C++ Word automation Extract text
hello.
I want extact text form a word document using a visual c++ programme.
I have see a lot of documentation. and my analysis is that I must use a
"word automation".
I have foud a lot of exemple to use it but I need your precious help to
use it properly.
I init my automation
...
CLSID clsid;
CLSIDFromProgID(L"Word.Application", &clsid);
IUnknown* pUnk;
HRESULT hr = ::CoCreateInstance( clsid, NULL, CLSCTX_LOCAL_SERVER,
IID_IUnknown, (void**) &pUnk);
if (FAILED(hr))
{
OutputDebugString("Error in creating Word application instance\n");
Destroy();
return -1;
}
hr = pUnk->QueryInterface(IID_IDispatch, (void**)&m_pDispApp);
DISPPARAMS dp = { NULL, NULL, 0, 0 };
DISPID dispID;
LPOLESTR szDoc = L"Documents";
IDispatch* pDocuments = NULL;
VARIANT varRetVal;
hr = m_pDispApp->GetIDsOfNames(IID_NULL, &szDoc, 1,
LOCALE_SYSTEM_DEFAULT, &dispID);
hr = m_pDispApp->Invoke(dispID, IID_NULL, LOCALE_SYSTEM_DEFAULT,
DISPATCH_PROPERTYGET, &dp, &varRetVal, NULL, NULL);
if (!SUCCEEDED(hr))
{
Destroy();
return -1;
}
m_pDocuments = varRetVal.pdispVal;
...
I open a word document
...
VARIANT varRetVal;
EXCEPINFO excepInfo; // this variable contains exception info if any
Invoke call fails
VARIANTARG varg;
varg.vt = VT_BSTR;
varg.bstrVal = _bstr_t(strFilePath); // this is the MS-word document
filename, must be changed to a valid filename that
exists on disk
DISPPARAMS dpOpen = { &varg, NULL, 1, 0 };
DISPID dispOpenID;
LPOLESTR szOpenDoc = L"Open";
HRESULT hr = m_pDocuments->GetIDsOfNames(IID_NULL, &szOpenDoc, 1,
LOCALE_SYSTEM_DEFAULT, &dispOpenID);
hr = m_pDocuments->Invoke(dispOpenID, IID_NULL, LOCALE_SYSTEM_DEFAULT,
DISPATCH_METHOD, &dpOpen, &varRetVal, &excepInfo,
NULL);
if (FAILED(hr))
{
OutputDebugString("Error opening the document\n");
Destroy();
return -1;
}
...
a ms word programme open, <b>i dont want to see the MS Word application
to open</b>
How to read the word file w/o having the MSWord interface poping up ?
2. if the document is used, then, i see a message box that asks me to
chose between read-only, notifiy ..
I always have to open it as readonly (how can do this ?)
3. I have found how to save a doc document in txt file
...
VARIANT vNeuerName, vSpeichernFormat;
vNeuerName.vt = VT_BSTR;
vNeuerName.bstrVal = SysAllocString(L"C:\\delete\\docneu.txt");
vSpeichernFormat.vt = VT_I4;
vSpeichernFormat.lVal = 2;
printf("SAVE");
getchar();
IDispatch* pDocument = varRetVal.pdispVal;
DISPPARAMS dpSave = { NULL, NULL, 0, 0 };
DISPID dispSaveID;
LPOLESTR szSaveDoc = L"SaveAs";
hr = pDocument->GetIDsOfNames(IID_NULL, &szSaveDoc, 1,
LOCALE_SYSTEM_DEFAULT, &dispSaveID);
hr = pDocument->Invoke(dispSaveID, IID_NULL, LOCALE_SYSTEM_DEFAULT,
DISPATCH_METHOD, &dpSave, &varRetVal, NULL, NULL);
...
But this code is invalid. Why ?
4. I only have to read the text from the .doc file and I would like to
avoid saving it in a other file
(how can do it ?)
Many Thank you for your help.
hello.
I want extact text form a word document using a visual c++ programme.
I have see a lot of documentation. and my analysis is that I must use a
"word automation".
I have foud a lot of exemple to use it but I need your precious help to
use it properly.
I init my automation
...
CLSID clsid;
CLSIDFromProgID(L"Word.Application", &clsid);
IUnknown* pUnk;
HRESULT hr = ::CoCreateInstance( clsid, NULL, CLSCTX_LOCAL_SERVER,
IID_IUnknown, (void**) &pUnk);
if (FAILED(hr))
{
OutputDebugString("Error in creating Word application instance\n");
Destroy();
return -1;
}
hr = pUnk->QueryInterface(IID_IDispatch, (void**)&m_pDispApp);
DISPPARAMS dp = { NULL, NULL, 0, 0 };
DISPID dispID;
LPOLESTR szDoc = L"Documents";
IDispatch* pDocuments = NULL;
VARIANT varRetVal;
hr = m_pDispApp->GetIDsOfNames(IID_NULL, &szDoc, 1,
LOCALE_SYSTEM_DEFAULT, &dispID);
hr = m_pDispApp->Invoke(dispID, IID_NULL, LOCALE_SYSTEM_DEFAULT,
DISPATCH_PROPERTYGET, &dp, &varRetVal, NULL, NULL);
if (!SUCCEEDED(hr))
{
Destroy();
return -1;
}
m_pDocuments = varRetVal.pdispVal;
...
I open a word document
...
VARIANT varRetVal;
EXCEPINFO excepInfo; // this variable contains exception info if any
Invoke call fails
VARIANTARG varg;
varg.vt = VT_BSTR;
varg.bstrVal = _bstr_t(strFilePath); // this is the MS-word document
filename, must be changed to a valid filename that
exists on disk
DISPPARAMS dpOpen = { &varg, NULL, 1, 0 };
DISPID dispOpenID;
LPOLESTR szOpenDoc = L"Open";
HRESULT hr = m_pDocuments->GetIDsOfNames(IID_NULL, &szOpenDoc, 1,
LOCALE_SYSTEM_DEFAULT, &dispOpenID);
hr = m_pDocuments->Invoke(dispOpenID, IID_NULL, LOCALE_SYSTEM_DEFAULT,
DISPATCH_METHOD, &dpOpen, &varRetVal, &excepInfo,
NULL);
if (FAILED(hr))
{
OutputDebugString("Error opening the document\n");
Destroy();
return -1;
}
...
a ms word programme open, <b>i dont want to see the MS Word application
to open</b>
How to read the word file w/o having the MSWord interface poping up ?
2. if the document is used, then, i see a message box that asks me to
chose between read-only, notifiy ..
I always have to open it as readonly (how can do this ?)
3. I have found how to save a doc document in txt file
...
VARIANT vNeuerName, vSpeichernFormat;
vNeuerName.vt = VT_BSTR;
vNeuerName.bstrVal = SysAllocString(L"C:\\delete\\docneu.txt");
vSpeichernFormat.vt = VT_I4;
vSpeichernFormat.lVal = 2;
printf("SAVE");
getchar();
IDispatch* pDocument = varRetVal.pdispVal;
DISPPARAMS dpSave = { NULL, NULL, 0, 0 };
DISPID dispSaveID;
LPOLESTR szSaveDoc = L"SaveAs";
hr = pDocument->GetIDsOfNames(IID_NULL, &szSaveDoc, 1,
LOCALE_SYSTEM_DEFAULT, &dispSaveID);
hr = pDocument->Invoke(dispSaveID, IID_NULL, LOCALE_SYSTEM_DEFAULT,
DISPATCH_METHOD, &dpSave, &varRetVal, NULL, NULL);
...
But this code is invalid. Why ?
4. I only have to read the text from the .doc file and I would like to
avoid saving it in a other file
(how can do it ?)
Many Thank you for your help.