/* prosite.c - Prosite databanks functions */

#ifdef HAVE_CONFIG_H
#include <config.h>
#endif

#include <stdio.h>
#ifdef STDC_HEADERS
#include <stdlib.h>
#include <string.h>
#endif

#include "error.h"
#include "prosite.h"


static char *text_add(char *, char *, size_t);


prosite_t *prosite_new(void) {
  prosite_t *pro;
  size_t s;

  s = sizeof(prosite_t);
  if ((pro = (prosite_t *)malloc(s)) == NULL) {
    error_fatal("memory", NULL); }

  pro->nam = pro->acc = pro->dsc = NULL;
  pro->pat = pro->doc = NULL;
  pro->flags = PROSITE_DEFAULTS;

  return pro; }


void prosite_free(prosite_t *pro) {

  if (pro == NULL) { return; }

  if (pro->nam) { free(pro->nam); }
  if (pro->acc) { free(pro->acc); }
  if (pro->dsc) { free(pro->dsc); }
  if (pro->pat) { free(pro->pat); }
  if (pro->doc) { free(pro->doc); }

  free(pro);

  return; }


/* Parse prosite databank entry */
prosite_t *prosite_parse(FILE *f) {
  char *p, *q, *buf;
  int len;
  long hdr, off;
  size_t s;
  prosite_t *pro;

  /* No more entries */
  if (feof(f) != 0) {
    return NULL; }

  pro = prosite_new();

  len = 100;
  if ((buf = (char *)malloc((size_t)len+1)) == NULL) {
    error_fatal("memory", NULL); }

  hdr = ftell(f);

  while (fgets(buf, len, f) != NULL) {

    /* Check that line is complete */
    if (strrchr(buf, '\n') == NULL) {
      off = ftell(f) - strlen(buf);
      len += 100;
      if ((buf = (char *)realloc(buf, (size_t)len+1)) == NULL) {
	error_fatal("memory", NULL); }
      if (fseek(f, off, SEEK_SET) == -1) {
	error_fatal("file", NULL); }
      continue; }

    p = buf;

    /* Skip file header */
    if (hdr == 0) {
      if (*p == 'C' && *(p+1) == 'C') {
	continue; }
      hdr = 1;
      if (*p == '/' && *(p+1) == '/') {
	continue; }
    }

    /* Entry end */
    if (*p == '/' && *(p+1) == '/') {
      break; }

    /* Name */
    if (*p == 'I' && strncmp(p, "ID   ", 5) == 0) {
      p += 5; q = p; while (*q && *q != ';') q++; s = q - p;
      pro->nam = text_add(pro->nam, p, s);
      continue; }

    /* Accession */
    if (*p == 'A' && strncmp(p, "AC   ", 5) == 0) {
      p += 5; q = p; while (*q && *q != ';') q++; s = q - p;
      pro->acc = text_add(pro->acc, p, s);
      continue; }

    /* Description */
    if (*p == 'D' && strncmp(p, "DE   ", 5) == 0) {
      p += 5; q = p; while (*q) q++;
      if (q > p && *(q-1) == '.') q--; s = q - p;
      pro->dsc = text_add(pro->dsc, p, s);
      continue; }

    /* Pattern */
    if (*p == 'P' && strncmp(p, "PA   ", 5) == 0) {
      p += 5; q = p; while (*q && *q != '\n') q++;
      if (q > p && *(q-1) == '.') q--; s = q - p;
      pro->pat = text_add(pro->pat, p, s);
      continue; }

    /* Comments */
    if (*p == 'C' && strncmp(p, "CC   ", 5) == 0) {
      /* Check for abundant flag */
      if (strncmp(p+5, "/SKIP-FLAG=TRUE;", 16) == 0) {
	pro->flags |= PROSITE_ABUNDANT; }
      continue; }

    /* Documentation */
    if (*p == 'D' && strncmp(p, "DO   ", 5) == 0) {
      p += 5; q = p; while (*q && *q != ';') q++; s = q - p;
      pro->doc = text_add(pro->doc, p, s);
      continue; }

  }

  free(buf);

  if (pro->nam == NULL && pro->acc == NULL &&
      pro->dsc == NULL && pro->doc == NULL &&
      pro->pat == NULL) {
    free(pro);
    return NULL; }

  return pro; }


/* Translate PROSITE Pattern into Basic Regular Expression */
char *prosite_pat2bre(char *pat) {
  char *p, *q, *bre;
  size_t s;

  /* Count BRE size */
  p = pat; s = 0;
  while (*p) {
    switch (*p) {
    case '-': break;
    case '(':
    case ')':
    case '{': s += 2; break;
    default: s++; break;
    }
    p++; }

  /* Allocate BRE buffer */
  if ((bre = (char *)malloc(s+1)) == NULL) {
    error_fatal("memory", NULL); }

  /* Translate */
  p = pat; q = bre;
  while (*p) {
    switch (*p) {
    case '-': break; /* Skip separator */
    case '{': *q++ = '['; *q++ = '^'; break;
    case '}': *q++ = ']'; break;
    case '(': *q++ = '\\'; *q++ = '{'; break;
    case ')': *q++ = '\\'; *q++ = '}'; break;
    case '<': *q++ = '^'; break;
    case '>': *q++ = '$'; break;
    case 'x': *q++ = '.'; break;
    default: *q++ = *p; break;
    }
    p++; }
  *q = '\0';

  return bre; }


static char *text_add(char *dst, char *src, size_t len) {
  size_t d, s;

  if (src == NULL || len == 0) {
    return dst; }

  if (strlen(src) < len) {
    error_fatal(src, "string too short"); }

  d = (dst != NULL) ? strlen(dst) : 0;
  s = d + len;

  if ((dst = (char *)realloc(dst, s+1)) == NULL) {
    error_fatal("memory", NULL); }

  if (d == 0) { *dst = '\0'; }
  (void)strncat(dst, src, len);

  return dst; }
