VTK
Main Page
Related Pages
Namespaces
Classes
Files
File List
File Members
dox
TextAnalysis
vtkTokenizer.h
Go to the documentation of this file.
1
/*=========================================================================
2
3
Program: Visualization Toolkit
4
Module: vtkTokenizer.h
5
6
Copyright (c) Ken Martin, Will Schroeder, Bill Lorensen
7
All rights reserved.
8
See Copyright.txt or http://www.kitware.com/Copyright.htm for details.
9
10
This software is distributed WITHOUT ANY WARRANTY; without even
11
the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
12
PURPOSE. See the above copyright notice for more information.
13
14
=========================================================================*/
15
/*-------------------------------------------------------------------------
16
Copyright 2008 Sandia Corporation.
17
Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
18
the U.S. Government retains certain rights in this software.
19
-------------------------------------------------------------------------*/
20
81
#ifndef __vtkTokenizer_h
82
#define __vtkTokenizer_h
83
84
#include <
vtkTableAlgorithm.h
>
85
#include <
vtkUnicodeString.h
>
//Needed for delimiter specification
86
87
class
VTK_TEXT_ANALYSIS_EXPORT
vtkTokenizer
:
88
public
vtkTableAlgorithm
89
{
90
public
:
91
static
vtkTokenizer
*
New
();
92
vtkTypeMacro(
vtkTokenizer
,
vtkTableAlgorithm
);
93
void
PrintSelf
(ostream& os,
vtkIndent
indent);
94
95
//BTX
97
99
typedef
vtkstd::pair<vtkUnicodeString::value_type, vtkUnicodeString::value_type>
DelimiterRange
;
100
// Description:
101
// Defines storage for a collection of half-open ranges of Unicode characters.
102
typedef
vtkstd::vector<DelimiterRange>
DelimiterRanges
;
104
106
108
static
const
DelimiterRanges
Punctuation();
109
// Description:
110
// Returns a set of delimiter ranges that match Unicode whitespace codepoints.
111
static
const
DelimiterRanges
Whitespace();
112
// Description:
113
// Returns a set of delimiter ranges that match logosyllabic languages where characters represent
114
// words instead of sounds, such as Chinese, Japanese, and Korean.
115
static
const
DelimiterRanges
Logosyllabic();
117
119
121
void
AddDroppedDelimiters(
vtkUnicodeString::value_type
begin,
vtkUnicodeString::value_type
end);
122
// Description:
123
// Adds a collection of delimiter ranges to the set of "dropped" delimiters.
124
void
AddDroppedDelimiters(
const
DelimiterRanges
& ranges);
126
128
130
void
AddKeptDelimiters(
vtkUnicodeString::value_type
begin,
vtkUnicodeString::value_type
end);
131
// Description:
132
// Adds a collection of delimiter ranges to the set of "kept" delimiters.
133
void
AddKeptDelimiters(
const
DelimiterRanges
& ranges);
134
//ETX
136
138
142
void
DropPunctuation();
143
void
DropWhitespace();
144
void
KeepPunctuation();
145
void
KeepWhitespace();
146
void
KeepLogosyllabic();
148
150
151
void
ClearDroppedDelimiters();
152
// Description:
153
// Clears the set of "kept" delimiters.
154
void
ClearKeptDelimiters();
156
157
//BTX
158
protected
:
159
vtkTokenizer
();
160
~
vtkTokenizer
();
161
162
int
FillInputPortInformation
(
int
port
,
vtkInformation
*
info
);
163
164
virtual
int
RequestData
(
165
vtkInformation
* request,
166
vtkInformationVector
** inputVector,
167
vtkInformationVector
* outputVector);
168
169
private
:
170
vtkTokenizer
(
const
vtkTokenizer
&);
// Not implemented.
171
void
operator=(
const
vtkTokenizer
&);
// Not implemented.
172
173
class
Internals;
174
Internals*
const
Implementation;
175
//ETX
176
};
177
178
#endif // __vtkTokenizer_h
179
Generated on Wed May 30 2012 13:22:24 for VTK by
1.8.1