|
||||||||||
| PREV CLASS NEXT CLASS | FRAMES NO FRAMES | |||||||||
| SUMMARY: NESTED | FIELD | CONSTR | METHOD | DETAIL: FIELD | CONSTR | METHOD | |||||||||
java.lang.Objectedu.umd.cloud9.collection.Indexable
edu.umd.cloud9.collection.clue.ClueWarcRecord
public class ClueWarcRecord
| Nested Class Summary | |
|---|---|
class |
ClueWarcRecord.WarcHeader
Warc header class |
| Field Summary | |
|---|---|
static String |
WARC_VERSION
|
static String |
WARC_VERSION_LINE
|
| Constructor Summary | |
|---|---|
ClueWarcRecord()
Default Constructor |
|
ClueWarcRecord(ClueWarcRecord o)
Copy Constructor |
|
| Method Summary | |
|---|---|
void |
addHeaderMetadata(String key,
String value)
Adds a key/value pair to a WARC header. |
void |
clearHeaderMetadata()
Clears all metadata items from a header |
byte[] |
getByteContent()
Retrieves the byte content for this record |
String |
getContent()
Returns the content of the document. |
String |
getContentUTF8()
Retrieves the bytes content as a UTF-8 string |
String |
getDisplayContentType()
|
String |
getDocid()
Returns the globally-unique String identifier of the document within the collection. |
Set<Map.Entry<String,String>> |
getHeaderMetadata()
Gets the set of metadata items from the header |
String |
getHeaderMetadataItem(String key)
Gets a value for a specific header metadata key |
String |
getHeaderRecordType()
Gets the header record type string |
String |
getHeaderString()
Gets the WARC header as a string |
int |
getTotalRecordLength()
Retrieves the total record length (header and content) |
String |
getWarcFilePath()
Gets the file path from this WARC file (if set) |
void |
readFields(DataInput in)
Serialization input |
static ClueWarcRecord |
readNextWarcRecord(DataInputStream in)
Reads in a WARC record from a data input stream |
void |
set(ClueWarcRecord o)
Sets the record content (copy) |
void |
setContent(byte[] content)
Sets the byte content for this record |
void |
setContent(String content)
Sets the byte content for this record |
void |
setWarcContentType(String contentType)
Sets the content type string |
void |
setWarcDate(String dateString)
Sets the WARC header date string |
void |
setWarcFilePath(String path)
Sets the warc file path (optional - for use with getWarcFilePath) |
void |
setWarcRecordType(String recordType)
Sets the record type string |
void |
setWarcUUID(String UUID)
Sets the WARC uuid string |
String |
toString()
|
void |
write(DataOutput out)
Serialization output |
| Methods inherited from class edu.umd.cloud9.collection.Indexable |
|---|
getDisplayContent |
| Methods inherited from class java.lang.Object |
|---|
equals, getClass, hashCode, notify, notifyAll, wait, wait, wait |
| Field Detail |
|---|
public static String WARC_VERSION
public static String WARC_VERSION_LINE
| Constructor Detail |
|---|
public ClueWarcRecord()
public ClueWarcRecord(ClueWarcRecord o)
o - | Method Detail |
|---|
public static ClueWarcRecord readNextWarcRecord(DataInputStream in)
throws IOException
in - the input stream
IOExceptionpublic int getTotalRecordLength()
public void set(ClueWarcRecord o)
o - record to copy frompublic String getWarcFilePath()
public void setWarcFilePath(String path)
path - public void setWarcRecordType(String recordType)
recordType - public void setWarcContentType(String contentType)
contentType - public void setWarcDate(String dateString)
dateString - public void setWarcUUID(String UUID)
UUID -
public void addHeaderMetadata(String key,
String value)
key - value - public void clearHeaderMetadata()
public Set<Map.Entry<String,String>> getHeaderMetadata()
public String getHeaderMetadataItem(String key)
key - public void setContent(byte[] content)
content - public void setContent(String content)
content - public byte[] getByteContent()
public String getContentUTF8()
public String getHeaderRecordType()
public String toString()
toString in class Objectpublic String getHeaderString()
public void write(DataOutput out)
throws IOException
out -
IOException
public void readFields(DataInput in)
throws IOException
in -
IOExceptionpublic String getDocid()
Indexable
getDocid in class Indexablepublic String getContent()
Indexable
getContent in class Indexablepublic String getDisplayContentType()
getDisplayContentType in class Indexable
|
||||||||||
| PREV CLASS NEXT CLASS | FRAMES NO FRAMES | |||||||||
| SUMMARY: NESTED | FIELD | CONSTR | METHOD | DETAIL: FIELD | CONSTR | METHOD | |||||||||