Lucene++ - a full-featured, c++ search engine
API Documentation


PorterStemmer.h
Go to the documentation of this file.
1 // Copyright (c) 2009-2014 Alan Wright. All rights reserved.
3 // Distributable under the terms of either the Apache License (Version 2.0)
4 // or the GNU Lesser General Public License.
6 
7 #ifndef PORTERSTEMMER_H
8 #define PORTERSTEMMER_H
9 
10 #include "LuceneObject.h"
11 
12 namespace Lucene {
13 
26 class PorterStemmer : public LuceneObject {
27 public:
29  virtual ~PorterStemmer();
30 
32 
33 protected:
34  wchar_t* b; // buffer for word to be stemmed
35  int32_t k; // offset to the end of the string
36  int32_t j; // a general offset into the string
37  int32_t i; // initial length of word
38  bool dirty;
39 
40 public:
41  bool stem(CharArray word);
42 
46  bool stem(wchar_t* b, int32_t k);
47 
48  wchar_t* getResultBuffer();
49  int32_t getResultLength();
50 
51 protected:
53  bool cons(int32_t i);
54 
63  int32_t m();
64 
66  bool vowelinstem();
67 
69  bool doublec(int32_t j);
70 
76  bool cvc(int32_t i);
77 
79  bool ends(const wchar_t* s);
80 
82  void setto(const wchar_t* s);
83 
84  void r(const wchar_t* s);
85 
105  void step1ab();
106 
108  void step1c();
109 
112  void step2();
113 
115  void step3();
116 
118  void step4();
119 
121  void step5();
122 };
123 
124 }
125 
126 #endif
Lucene::PorterStemmer::step5
void step5()
Removes a final -e if m() > 1, and changes -ll to -l if m() > 1.
Lucene::PorterStemmer::cons
bool cons(int32_t i)
Returns true if b[i] is a consonant. ('b' means 'z->b', but here and below we drop 'z->' in comments.
LUCENE_CLASS
#define LUCENE_CLASS(Name)
Definition: LuceneObject.h:24
Lucene::PorterStemmer::getResultBuffer
wchar_t * getResultBuffer()
Lucene::PorterStemmer::i
int32_t i
Definition: PorterStemmer.h:37
Lucene::PorterStemmer
This is the Porter stemming algorithm, coded up as thread-safe ANSI C by the author.
Definition: PorterStemmer.h:26
Lucene::PorterStemmer::j
int32_t j
Definition: PorterStemmer.h:36
Lucene::PorterStemmer::m
int32_t m()
Measures the number of consonant sequences between 0 and j. If c is a consonant sequence and v a vowe...
Lucene::PorterStemmer::~PorterStemmer
virtual ~PorterStemmer()
Lucene::PorterStemmer::step4
void step4()
Takes off -ant, -ence etc., in context vcvc<v>.
Lucene::PorterStemmer::b
wchar_t * b
Definition: PorterStemmer.h:31
Lucene::PorterStemmer::k
int32_t k
Definition: PorterStemmer.h:35
Lucene::PorterStemmer::step3
void step3()
Deals with -ic-, -full, -ness etc. similar strategy to step2.
Lucene::PorterStemmer::step2
void step2()
Maps double suffices to single ones. so -ization ( = -ize plus -ation) maps to -ize etc....
Lucene
Definition: AbstractAllTermDocs.h:12
Lucene::PorterStemmer::stem
bool stem(CharArray word)
Lucene::LuceneObject
Base class for all Lucene classes.
Definition: LuceneObject.h:31
Lucene::PorterStemmer::r
void r(const wchar_t *s)
Lucene::PorterStemmer::ends
bool ends(const wchar_t *s)
Returns true if 0,...k ends with the string s.
Lucene::PorterStemmer::vowelinstem
bool vowelinstem()
Return true if 0,...j contains a vowel.
Lucene::PorterStemmer::stem
bool stem(wchar_t *b, int32_t k)
In stem(b, k), b is a char pointer, and the string to be stemmed is from b[0] to b[k] inclusive....
Lucene::PorterStemmer::setto
void setto(const wchar_t *s)
Sets (j+1),...k to the characters in the string s, readjusting k.
Lucene::PorterStemmer::cvc
bool cvc(int32_t i)
Return true if i-2,i-1,i has the form consonant - vowel - consonant and also if the second c is not w...
Lucene::PorterStemmer::dirty
bool dirty
Definition: PorterStemmer.h:38
Lucene::PorterStemmer::getResultLength
int32_t getResultLength()
Lucene::PorterStemmer::doublec
bool doublec(int32_t j)
Return true if j,(j-1) contain a double consonant.
Lucene::PorterStemmer::PorterStemmer
PorterStemmer()
Lucene::PorterStemmer::step1c
void step1c()
Turns terminal y to i when there is another vowel in the stem.
Lucene::PorterStemmer::step1ab
void step1ab()
step1ab() gets rid of plurals and -ed or -ing. eg.
LuceneObject.h

clucene.sourceforge.net